PM: Transition System Miner

From OnnoWiki
Revision as of 15:58, 13 September 2025 by Onnowpurbo (talk | contribs)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search

Siap! Berikut script Transition System Miner dengan PM4Py yang membaca file XES terlampir (`/mnt/data/BPIC_2012_A.xes`), menambang Transition System (TS), lalu:

  • Cetak ringkasan jumlah state dan transition
  • Simpan gambar TS (`transition_system.png`)
  • Ekspor daftar state (`ts_states.csv`) dan transition (`ts_transitions.csv`)

Install dulu (sekali saja):


pip install pm4py pandas graphviz
# pastikan sistem punya Graphviz binary (dot). Linux (Debian/Ubuntu):
# sudo apt-get install graphviz


transition_system_miner.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import argparse
import sys
from pathlib import Path
import pandas as pd

from pm4py.objects.log.importer.xes import importer as xes_importer
from pm4py.algo.discovery.transition_system import algorithm as ts_discovery
from pm4py.visualization.transition_system import visualizer as ts_visualizer
def discover_transition_system(xes_path: Path, image_out: Path, states_out: Path, trans_out: Path):
    # 1) Load event log
    log = xes_importer.apply(str(xes_path))

    # 2) Discover Transition System
    # Parameter kunci (ubah jika perlu)
    parameters = {
        "case_glue": "case:concept:name",
        "activity_key": "concept:name",
        "timestamp_key": "time:timestamp",
    }
    ts = ts_discovery.apply(log, parameters=parameters)

    # 3) Visualisasi & simpan ke file
    gviz = ts_visualizer.apply(ts)
    ts_visualizer.save(gviz, str(image_out))

    # 4) Ekspor states & transitions ke CSV
    # Struktur objek TS di PM4Py: ts.states (set of State), ts.transitions (set of Transition)
    # State biasanya memiliki 'name' (id) dan 'label' (representasi state)
    states_rows = []
    for s in ts.states:
        sid = getattr(s, "name", None)
        slabel = getattr(s, "label", None)
        # fallback agar tetap terisi
        if sid is None:
            sid = str(s)
        if slabel is None:
            slabel = str(s)
        states_rows.append({"state_id": sid, "state_label": slabel})

    trans_rows = []
    for t in ts.transitions:
        # transition memiliki source (from), target (to), label (activity/event class)
        src = getattr(t, "from_state", getattr(t, "from", None))
        dst = getattr(t, "to_state", getattr(t, "to", None))
        lab = getattr(t, "label", None)

        # Ambil id/label state sumber & tujuan
        def state_id_label(state_obj):
            if state_obj is None:
                return None, None
            sid = getattr(state_obj, "name", None) or str(state_obj)
            slb = getattr(state_obj, "label", None) or str(state_obj)
            return sid, slb

        src_id, src_label = state_id_label(src)
        dst_id, dst_label = state_id_label(dst)

        trans_rows.append({
            "source_id": src_id,
            "source_label": src_label,
            "target_id": dst_id,
            "target_label": dst_label,
            "transition_label": lab if lab is not None else ""
        })

    pd.DataFrame(states_rows).to_csv(states_out, index=False)
    pd.DataFrame(trans_rows).to_csv(trans_out, index=False)

    # 5) Ringkasan
    print("=== Transition System Summary ===")
    print(f"States     : {len(states_rows)}")
    print(f"Transitions: {len(trans_rows)}")
    print(f"Gambar     : {image_out}")
    print(f"States CSV : {states_out}")
    print(f"Trans CSV  : {trans_out}")


def main():
    ap = argparse.ArgumentParser(description="Transition System Miner using PM4Py (from XES)")
    ap.add_argument("xes_path", type=str, help="Path ke file .xes")
    ap.add_argument("--img", type=str, default="transition_system.png", help="Output image (PNG)")
    ap.add_argument("--states_csv", type=str, default="ts_states.csv", help="Output CSV daftar state")
    ap.add_argument("--trans_csv", type=str, default="ts_transitions.csv", help="Output CSV daftar transition")
    args = ap.parse_args()

    xes_path = Path(args.xes_path)
    if not xes_path.exists():
        print(f"[ERROR] File tidak ditemukan: {xes_path}", file=sys.stderr)
        sys.exit(1)

    discover_transition_system(
        xes_path=xes_path,
        image_out=Path(args.img),
        states_out=Path(args.states_csv),
        trans_out=Path(args.trans_csv),
    )


if __name__ == "__main__":
    main()


Cara menjalankan (pakai file terlampir)

python transition_system_miner.py /mnt/data/BPIC_2012_A.xes \
  --img ts_BPICA.png \
  --states_csv ts_BPICA_states.csv \
  --trans_csv ts_BPICA_transitions.csv


Opsi & catatan

  • Kolom kunci diset ke standar PM4Py:
 `case:concept:name`, `concept:name`, `time:timestamp`.
 Jika log Anda memakai nama kolom berbeda, ubah di `parameters`.
  • Graphviz diperlukan agar file PNG bisa disimpan. Jika belum ada, install `graphviz` (OS) selain paket Python-nya.
  • Untuk log besar, TS bisa sangat besar. Anda bisa mulai dari subset (filtering case/variant) sebelum menambang TS:
    • Filter variant Top-K, atau
    • Filter rentang tanggal tertentu.
  • Jika ingin lihat langsung (open viewer), ganti `ts_visualizer.save(...)` menjadi `ts_visualizer.view(gviz)` (akan membuka jendela viewer apabila environment mendukung).