Difference between revisions of "PM: Transition System Miner"

From OnnoWiki
Jump to navigation Jump to search
(Created page with "Siap! Berikut script **Transition System Miner** dengan **PM4Py** yang membaca **file XES terlampir** (`/mnt/data/BPIC_2012_A.xes`), menambang **Transition System (TS)**, lalu...")
 
 
Line 1: Line 1:
Siap! Berikut script **Transition System Miner** dengan **PM4Py** yang membaca **file XES terlampir** (`/mnt/data/BPIC_2012_A.xes`), menambang **Transition System (TS)**, lalu:
+
Siap! Berikut script '''Transition System Miner''' dengan '''PM4Py''' yang membaca '''file XES terlampir''' (`/mnt/data/BPIC_2012_A.xes`), menambang '''Transition System (TS)''', lalu:
  
* Cetak ringkasan jumlah **state** dan **transition**
+
* Cetak ringkasan jumlah '''state''' dan '''transition'''
* Simpan **gambar TS** (`transition_system.png`)
+
* Simpan '''gambar TS''' (`transition_system.png`)
* Ekspor daftar **state** (`ts_states.csv`) dan **transition** (`ts_transitions.csv`)
+
* Ekspor daftar '''state''' (`ts_states.csv`) dan '''transition''' (`ts_transitions.csv`)
  
> **Install dulu (sekali saja):**
+
'''Install dulu (sekali saja):'''
>
 
> ```bash
 
> pip install pm4py pandas graphviz
 
> # pastikan sistem punya Graphviz binary (dot). Linux (Debian/Ubuntu):
 
> # sudo apt-get install graphviz
 
> ```
 
  
---
 
  
### `transition_system_miner.py`
+
pip install pm4py pandas graphviz
 +
# pastikan sistem punya Graphviz binary (dot). Linux (Debian/Ubuntu):
 +
# sudo apt-get install graphviz
  
```python
 
#!/usr/bin/env python3
 
# -*- coding: utf-8 -*-
 
  
import argparse
+
==transition_system_miner.py==
import sys
 
from pathlib import Path
 
import pandas as pd
 
  
from pm4py.objects.log.importer.xes import importer as xes_importer
+
#!/usr/bin/env python3
from pm4py.algo.discovery.transition_system import algorithm as ts_discovery
+
# -*- coding: utf-8 -*-
from pm4py.visualization.transition_system import visualizer as ts_visualizer
+
 +
import argparse
 +
import sys
 +
from pathlib import Path
 +
import pandas as pd
 +
 +
from pm4py.objects.log.importer.xes import importer as xes_importer
 +
from pm4py.algo.discovery.transition_system import algorithm as ts_discovery
 +
from pm4py.visualization.transition_system import visualizer as ts_visualizer
  
 +
def discover_transition_system(xes_path: Path, image_out: Path, states_out: Path, trans_out: Path):
 +
    # 1) Load event log
 +
    log = xes_importer.apply(str(xes_path))
 +
 +
    # 2) Discover Transition System
 +
    # Parameter kunci (ubah jika perlu)
 +
    parameters = {
 +
        "case_glue": "case:concept:name",
 +
        "activity_key": "concept:name",
 +
        "timestamp_key": "time:timestamp",
 +
    }
 +
    ts = ts_discovery.apply(log, parameters=parameters)
 +
 +
    # 3) Visualisasi & simpan ke file
 +
    gviz = ts_visualizer.apply(ts)
 +
    ts_visualizer.save(gviz, str(image_out))
 +
 +
    # 4) Ekspor states & transitions ke CSV
 +
    # Struktur objek TS di PM4Py: ts.states (set of State), ts.transitions (set of Transition)
 +
    # State biasanya memiliki 'name' (id) dan 'label' (representasi state)
 +
    states_rows = []
 +
    for s in ts.states:
 +
        sid = getattr(s, "name", None)
 +
        slabel = getattr(s, "label", None)
 +
        # fallback agar tetap terisi
 +
        if sid is None:
 +
            sid = str(s)
 +
        if slabel is None:
 +
            slabel = str(s)
 +
        states_rows.append({"state_id": sid, "state_label": slabel})
 +
 +
    trans_rows = []
 +
    for t in ts.transitions:
 +
        # transition memiliki source (from), target (to), label (activity/event class)
 +
        src = getattr(t, "from_state", getattr(t, "from", None))
 +
        dst = getattr(t, "to_state", getattr(t, "to", None))
 +
        lab = getattr(t, "label", None)
 +
 +
        # Ambil id/label state sumber & tujuan
 +
        def state_id_label(state_obj):
 +
            if state_obj is None:
 +
                return None, None
 +
            sid = getattr(state_obj, "name", None) or str(state_obj)
 +
            slb = getattr(state_obj, "label", None) or str(state_obj)
 +
            return sid, slb
 +
 +
        src_id, src_label = state_id_label(src)
 +
        dst_id, dst_label = state_id_label(dst)
 +
 +
        trans_rows.append({
 +
            "source_id": src_id,
 +
            "source_label": src_label,
 +
            "target_id": dst_id,
 +
            "target_label": dst_label,
 +
            "transition_label": lab if lab is not None else ""
 +
        })
 +
 +
    pd.DataFrame(states_rows).to_csv(states_out, index=False)
 +
    pd.DataFrame(trans_rows).to_csv(trans_out, index=False)
 +
 +
    # 5) Ringkasan
 +
    print("=== Transition System Summary ===")
 +
    print(f"States    : {len(states_rows)}")
 +
    print(f"Transitions: {len(trans_rows)}")
 +
    print(f"Gambar    : {image_out}")
 +
    print(f"States CSV : {states_out}")
 +
    print(f"Trans CSV  : {trans_out}")
 +
 +
 +
def main():
 +
    ap = argparse.ArgumentParser(description="Transition System Miner using PM4Py (from XES)")
 +
    ap.add_argument("xes_path", type=str, help="Path ke file .xes")
 +
    ap.add_argument("--img", type=str, default="transition_system.png", help="Output image (PNG)")
 +
    ap.add_argument("--states_csv", type=str, default="ts_states.csv", help="Output CSV daftar state")
 +
    ap.add_argument("--trans_csv", type=str, default="ts_transitions.csv", help="Output CSV daftar transition")
 +
    args = ap.parse_args()
 +
 +
    xes_path = Path(args.xes_path)
 +
    if not xes_path.exists():
 +
        print(f"[ERROR] File tidak ditemukan: {xes_path}", file=sys.stderr)
 +
        sys.exit(1)
 +
 +
    discover_transition_system(
 +
        xes_path=xes_path,
 +
        image_out=Path(args.img),
 +
        states_out=Path(args.states_csv),
 +
        trans_out=Path(args.trans_csv),
 +
    )
 +
 +
 +
if __name__ == "__main__":
 +
    main()
  
def discover_transition_system(xes_path: Path, image_out: Path, states_out: Path, trans_out: Path):
 
    # 1) Load event log
 
    log = xes_importer.apply(str(xes_path))
 
  
    # 2) Discover Transition System
+
==Cara menjalankan (pakai file terlampir)==
    # Parameter kunci (ubah jika perlu)
 
    parameters = {
 
        "case_glue": "case:concept:name",
 
        "activity_key": "concept:name",
 
        "timestamp_key": "time:timestamp",
 
    }
 
    ts = ts_discovery.apply(log, parameters=parameters)
 
  
    # 3) Visualisasi & simpan ke file
+
python transition_system_miner.py /mnt/data/BPIC_2012_A.xes \
    gviz = ts_visualizer.apply(ts)
+
  --img ts_BPICA.png \
    ts_visualizer.save(gviz, str(image_out))
+
  --states_csv ts_BPICA_states.csv \
 +
  --trans_csv ts_BPICA_transitions.csv
  
    # 4) Ekspor states & transitions ke CSV
 
    # Struktur objek TS di PM4Py: ts.states (set of State), ts.transitions (set of Transition)
 
    # State biasanya memiliki 'name' (id) dan 'label' (representasi state)
 
    states_rows = []
 
    for s in ts.states:
 
        sid = getattr(s, "name", None)
 
        slabel = getattr(s, "label", None)
 
        # fallback agar tetap terisi
 
        if sid is None:
 
            sid = str(s)
 
        if slabel is None:
 
            slabel = str(s)
 
        states_rows.append({"state_id": sid, "state_label": slabel})
 
  
    trans_rows = []
+
==Opsi & catatan==
    for t in ts.transitions:
 
        # transition memiliki source (from), target (to), label (activity/event class)
 
        src = getattr(t, "from_state", getattr(t, "from", None))
 
        dst = getattr(t, "to_state", getattr(t, "to", None))
 
        lab = getattr(t, "label", None)
 
  
        # Ambil id/label state sumber & tujuan
+
* '''Kolom kunci''' diset ke standar PM4Py:
        def state_id_label(state_obj):
 
            if state_obj is None:
 
                return None, None
 
            sid = getattr(state_obj, "name", None) or str(state_obj)
 
            slb = getattr(state_obj, "label", None) or str(state_obj)
 
            return sid, slb
 
 
 
        src_id, src_label = state_id_label(src)
 
        dst_id, dst_label = state_id_label(dst)
 
 
 
        trans_rows.append({
 
            "source_id": src_id,
 
            "source_label": src_label,
 
            "target_id": dst_id,
 
            "target_label": dst_label,
 
            "transition_label": lab if lab is not None else ""
 
        })
 
 
 
    pd.DataFrame(states_rows).to_csv(states_out, index=False)
 
    pd.DataFrame(trans_rows).to_csv(trans_out, index=False)
 
 
 
    # 5) Ringkasan
 
    print("=== Transition System Summary ===")
 
    print(f"States    : {len(states_rows)}")
 
    print(f"Transitions: {len(trans_rows)}")
 
    print(f"Gambar    : {image_out}")
 
    print(f"States CSV : {states_out}")
 
    print(f"Trans CSV  : {trans_out}")
 
 
 
 
 
def main():
 
    ap = argparse.ArgumentParser(description="Transition System Miner using PM4Py (from XES)")
 
    ap.add_argument("xes_path", type=str, help="Path ke file .xes")
 
    ap.add_argument("--img", type=str, default="transition_system.png", help="Output image (PNG)")
 
    ap.add_argument("--states_csv", type=str, default="ts_states.csv", help="Output CSV daftar state")
 
    ap.add_argument("--trans_csv", type=str, default="ts_transitions.csv", help="Output CSV daftar transition")
 
    args = ap.parse_args()
 
 
 
    xes_path = Path(args.xes_path)
 
    if not xes_path.exists():
 
        print(f"[ERROR] File tidak ditemukan: {xes_path}", file=sys.stderr)
 
        sys.exit(1)
 
 
 
    discover_transition_system(
 
        xes_path=xes_path,
 
        image_out=Path(args.img),
 
        states_out=Path(args.states_csv),
 
        trans_out=Path(args.trans_csv),
 
    )
 
 
 
 
 
if __name__ == "__main__":
 
    main()
 
```
 
 
 
---
 
 
 
### Cara menjalankan (pakai file terlampir)
 
 
 
```bash
 
python transition_system_miner.py /mnt/data/BPIC_2012_A.xes \
 
  --img ts_BPICA.png \
 
  --states_csv ts_BPICA_states.csv \
 
  --trans_csv ts_BPICA_transitions.csv
 
```
 
 
 
---
 
 
 
### Opsi & catatan
 
 
 
* **Kolom kunci** diset ke standar PM4Py:
 
 
   `case:concept:name`, `concept:name`, `time:timestamp`.
 
   `case:concept:name`, `concept:name`, `time:timestamp`.
 
   Jika log Anda memakai nama kolom berbeda, ubah di `parameters`.
 
   Jika log Anda memakai nama kolom berbeda, ubah di `parameters`.
* **Graphviz** diperlukan agar file PNG bisa disimpan. Jika belum ada, install `graphviz` (OS) selain paket Python-nya.
+
* '''Graphviz''' diperlukan agar file PNG bisa disimpan. Jika belum ada, install `graphviz` (OS) selain paket Python-nya.
 
* Untuk log besar, TS bisa sangat besar. Anda bisa mulai dari subset (filtering case/variant) sebelum menambang TS:
 
* Untuk log besar, TS bisa sangat besar. Anda bisa mulai dari subset (filtering case/variant) sebelum menambang TS:
 
+
** Filter variant Top-K, atau
  * Filter variant Top-K, atau
+
** Filter rentang tanggal tertentu.
  * Filter rentang tanggal tertentu.
+
* Jika ingin '''lihat''' langsung (open viewer), ganti `ts_visualizer.save(...)` menjadi `ts_visualizer.view(gviz)` (akan membuka jendela viewer apabila environment mendukung).
* Jika ingin **lihat** langsung (open viewer), ganti `ts_visualizer.save(...)` menjadi `ts_visualizer.view(gviz)` (akan membuka jendela viewer apabila environment mendukung).
 
 
 
Mau saya tambahkan versi dengan **filter Top-K variants** atau **prefix-length limit** biar graf tidak terlalu padat?
 

Latest revision as of 15:58, 13 September 2025

Siap! Berikut script Transition System Miner dengan PM4Py yang membaca file XES terlampir (`/mnt/data/BPIC_2012_A.xes`), menambang Transition System (TS), lalu:

  • Cetak ringkasan jumlah state dan transition
  • Simpan gambar TS (`transition_system.png`)
  • Ekspor daftar state (`ts_states.csv`) dan transition (`ts_transitions.csv`)

Install dulu (sekali saja):


pip install pm4py pandas graphviz
# pastikan sistem punya Graphviz binary (dot). Linux (Debian/Ubuntu):
# sudo apt-get install graphviz


transition_system_miner.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import argparse
import sys
from pathlib import Path
import pandas as pd

from pm4py.objects.log.importer.xes import importer as xes_importer
from pm4py.algo.discovery.transition_system import algorithm as ts_discovery
from pm4py.visualization.transition_system import visualizer as ts_visualizer
def discover_transition_system(xes_path: Path, image_out: Path, states_out: Path, trans_out: Path):
    # 1) Load event log
    log = xes_importer.apply(str(xes_path))

    # 2) Discover Transition System
    # Parameter kunci (ubah jika perlu)
    parameters = {
        "case_glue": "case:concept:name",
        "activity_key": "concept:name",
        "timestamp_key": "time:timestamp",
    }
    ts = ts_discovery.apply(log, parameters=parameters)

    # 3) Visualisasi & simpan ke file
    gviz = ts_visualizer.apply(ts)
    ts_visualizer.save(gviz, str(image_out))

    # 4) Ekspor states & transitions ke CSV
    # Struktur objek TS di PM4Py: ts.states (set of State), ts.transitions (set of Transition)
    # State biasanya memiliki 'name' (id) dan 'label' (representasi state)
    states_rows = []
    for s in ts.states:
        sid = getattr(s, "name", None)
        slabel = getattr(s, "label", None)
        # fallback agar tetap terisi
        if sid is None:
            sid = str(s)
        if slabel is None:
            slabel = str(s)
        states_rows.append({"state_id": sid, "state_label": slabel})

    trans_rows = []
    for t in ts.transitions:
        # transition memiliki source (from), target (to), label (activity/event class)
        src = getattr(t, "from_state", getattr(t, "from", None))
        dst = getattr(t, "to_state", getattr(t, "to", None))
        lab = getattr(t, "label", None)

        # Ambil id/label state sumber & tujuan
        def state_id_label(state_obj):
            if state_obj is None:
                return None, None
            sid = getattr(state_obj, "name", None) or str(state_obj)
            slb = getattr(state_obj, "label", None) or str(state_obj)
            return sid, slb

        src_id, src_label = state_id_label(src)
        dst_id, dst_label = state_id_label(dst)

        trans_rows.append({
            "source_id": src_id,
            "source_label": src_label,
            "target_id": dst_id,
            "target_label": dst_label,
            "transition_label": lab if lab is not None else ""
        })

    pd.DataFrame(states_rows).to_csv(states_out, index=False)
    pd.DataFrame(trans_rows).to_csv(trans_out, index=False)

    # 5) Ringkasan
    print("=== Transition System Summary ===")
    print(f"States     : {len(states_rows)}")
    print(f"Transitions: {len(trans_rows)}")
    print(f"Gambar     : {image_out}")
    print(f"States CSV : {states_out}")
    print(f"Trans CSV  : {trans_out}")


def main():
    ap = argparse.ArgumentParser(description="Transition System Miner using PM4Py (from XES)")
    ap.add_argument("xes_path", type=str, help="Path ke file .xes")
    ap.add_argument("--img", type=str, default="transition_system.png", help="Output image (PNG)")
    ap.add_argument("--states_csv", type=str, default="ts_states.csv", help="Output CSV daftar state")
    ap.add_argument("--trans_csv", type=str, default="ts_transitions.csv", help="Output CSV daftar transition")
    args = ap.parse_args()

    xes_path = Path(args.xes_path)
    if not xes_path.exists():
        print(f"[ERROR] File tidak ditemukan: {xes_path}", file=sys.stderr)
        sys.exit(1)

    discover_transition_system(
        xes_path=xes_path,
        image_out=Path(args.img),
        states_out=Path(args.states_csv),
        trans_out=Path(args.trans_csv),
    )


if __name__ == "__main__":
    main()


Cara menjalankan (pakai file terlampir)

python transition_system_miner.py /mnt/data/BPIC_2012_A.xes \
  --img ts_BPICA.png \
  --states_csv ts_BPICA_states.csv \
  --trans_csv ts_BPICA_transitions.csv


Opsi & catatan

  • Kolom kunci diset ke standar PM4Py:
 `case:concept:name`, `concept:name`, `time:timestamp`.
 Jika log Anda memakai nama kolom berbeda, ubah di `parameters`.
  • Graphviz diperlukan agar file PNG bisa disimpan. Jika belum ada, install `graphviz` (OS) selain paket Python-nya.
  • Untuk log besar, TS bisa sangat besar. Anda bisa mulai dari subset (filtering case/variant) sebelum menambang TS:
    • Filter variant Top-K, atau
    • Filter rentang tanggal tertentu.
  • Jika ingin lihat langsung (open viewer), ganti `ts_visualizer.save(...)` menjadi `ts_visualizer.view(gviz)` (akan membuka jendela viewer apabila environment mendukung).