result_analysis

Class to generate the output json file "result_simulated_log(experiment_name)" with some analysis on the simulated log. See here for the full list of information provided as output

 1'''
 2Class to generate the output json file "result_simulated_log(experiment_name)" with some analysis on the simulated log.
 3Example of analysis:
 4
 5| Name       | Description  |
 6|:------------:|:-------------------------- |
 7| total_events | Total events in the log |
 8| total_traces | Total traces in the log |
 9| *A*_frequency | Total occurrences of activity *A* in the log |
10| total_duration | Total duration of simulation |
11| start_date | Start date of the simulation |
12| end_date | End date of the simulation |
13
14'''
15
16
17import glob
18import os
19import pandas as pd
20import json
21from parameters import Parameters
22import pm4py
23from datetime import datetime, timedelta
24
25
26class Result(object):
27
28    def __init__(self, folder: str, params: Parameters):
29        self._folder = folder
30        self._all_file = glob.glob("{}/output/{}/simulated_log_*.csv".format(os.getcwd(), self._folder))
31        self._params = params
32
33    def analysis_log(self, sim):
34        '''
35        Method to compute the analysis over the single log
36        '''
37        analysis = dict()
38        sim_df = pd.read_csv(sim, sep=',')
39        analysis.update(self.general_analysis(sim_df))
40
41        for role in self._params.ROLE_CAPACITY.keys():
42            if role != 'TRIGGER_TIMER':
43                analysis[role] = {"total": len(sim_df[sim_df['role'] == role])}
44                for resource in self._params.ROLE_CAPACITY[role][0]:
45                    analysis[role][resource] = len(sim_df[sim_df['resource'] == resource])
46
47        self._write_json(analysis, sim)
48
49    def general_analysis(self, sim_df):
50        analysis = dict()
51        analysis['total_events'] = len(sim_df)
52        analysis['total_traces'] = len(set(sim_df['id_case']))
53        for act in self._params.PROCESSING_TIME.keys():
54            analysis[act + "_frequency"] = len(sim_df[sim_df['activity'] == act])
55        seconds = (datetime.strptime(sim_df['end_time'].iloc[-1], '%Y-%m-%d %H:%M:%S.%f') - datetime.strptime(sim_df['start_time'].iloc[0], '%Y-%m-%d %H:%M:%S.%f')).total_seconds()
56        analysis['duration'] = str(timedelta(seconds=seconds))
57        analysis['start_simulation'] = sim_df['start_time'].iloc[0]
58        analysis['end_simulation'] = sim_df['end_time'].iloc[-1]
59
60        return analysis
61    def _analyse(self, type='single'):
62        if type == 'single':
63            self.analysis_log(self._all_file[0])
64            self._csv_to_xes(self._all_file[0])
65        else:
66            for file in self._all_file:
67                self.analysis_log(file)
68
69    def _write_json(self, analysis, sim):
70        try:
71            filename = 'output/{}/result_{}.json'.format(self._folder, os.path.splitext(os.path.basename(sim))[0])
72            with open(filename, 'w') as json_file:
73                json.dump(analysis, json_file, indent=len(analysis))
74        except Exception as e:
75            print(f"Error: {e}")
76
77    def _csv_to_xes(self, sim):
78        sim_df = pd.read_csv(sim, sep=',')
79        sim_df = pm4py.format_dataframe(sim_df, case_id='id_case', activity_key='activity',
80                                           timestamp_key='end_time')
81        event_log = pm4py.convert_to_event_log(sim_df)
82        pm4py.write_xes(event_log, 'output/{}/{}.xes'.format(self._folder, os.path.splitext(os.path.basename(sim))[0]))
class Result:
27class Result(object):
28
29    def __init__(self, folder: str, params: Parameters):
30        self._folder = folder
31        self._all_file = glob.glob("{}/output/{}/simulated_log_*.csv".format(os.getcwd(), self._folder))
32        self._params = params
33
34    def analysis_log(self, sim):
35        '''
36        Method to compute the analysis over the single log
37        '''
38        analysis = dict()
39        sim_df = pd.read_csv(sim, sep=',')
40        analysis.update(self.general_analysis(sim_df))
41
42        for role in self._params.ROLE_CAPACITY.keys():
43            if role != 'TRIGGER_TIMER':
44                analysis[role] = {"total": len(sim_df[sim_df['role'] == role])}
45                for resource in self._params.ROLE_CAPACITY[role][0]:
46                    analysis[role][resource] = len(sim_df[sim_df['resource'] == resource])
47
48        self._write_json(analysis, sim)
49
50    def general_analysis(self, sim_df):
51        analysis = dict()
52        analysis['total_events'] = len(sim_df)
53        analysis['total_traces'] = len(set(sim_df['id_case']))
54        for act in self._params.PROCESSING_TIME.keys():
55            analysis[act + "_frequency"] = len(sim_df[sim_df['activity'] == act])
56        seconds = (datetime.strptime(sim_df['end_time'].iloc[-1], '%Y-%m-%d %H:%M:%S.%f') - datetime.strptime(sim_df['start_time'].iloc[0], '%Y-%m-%d %H:%M:%S.%f')).total_seconds()
57        analysis['duration'] = str(timedelta(seconds=seconds))
58        analysis['start_simulation'] = sim_df['start_time'].iloc[0]
59        analysis['end_simulation'] = sim_df['end_time'].iloc[-1]
60
61        return analysis
62    def _analyse(self, type='single'):
63        if type == 'single':
64            self.analysis_log(self._all_file[0])
65            self._csv_to_xes(self._all_file[0])
66        else:
67            for file in self._all_file:
68                self.analysis_log(file)
69
70    def _write_json(self, analysis, sim):
71        try:
72            filename = 'output/{}/result_{}.json'.format(self._folder, os.path.splitext(os.path.basename(sim))[0])
73            with open(filename, 'w') as json_file:
74                json.dump(analysis, json_file, indent=len(analysis))
75        except Exception as e:
76            print(f"Error: {e}")
77
78    def _csv_to_xes(self, sim):
79        sim_df = pd.read_csv(sim, sep=',')
80        sim_df = pm4py.format_dataframe(sim_df, case_id='id_case', activity_key='activity',
81                                           timestamp_key='end_time')
82        event_log = pm4py.convert_to_event_log(sim_df)
83        pm4py.write_xes(event_log, 'output/{}/{}.xes'.format(self._folder, os.path.splitext(os.path.basename(sim))[0]))
Result(folder: str, params: parameters.Parameters)
29    def __init__(self, folder: str, params: Parameters):
30        self._folder = folder
31        self._all_file = glob.glob("{}/output/{}/simulated_log_*.csv".format(os.getcwd(), self._folder))
32        self._params = params
def analysis_log(self, sim):
34    def analysis_log(self, sim):
35        '''
36        Method to compute the analysis over the single log
37        '''
38        analysis = dict()
39        sim_df = pd.read_csv(sim, sep=',')
40        analysis.update(self.general_analysis(sim_df))
41
42        for role in self._params.ROLE_CAPACITY.keys():
43            if role != 'TRIGGER_TIMER':
44                analysis[role] = {"total": len(sim_df[sim_df['role'] == role])}
45                for resource in self._params.ROLE_CAPACITY[role][0]:
46                    analysis[role][resource] = len(sim_df[sim_df['resource'] == resource])
47
48        self._write_json(analysis, sim)

Method to compute the analysis over the single log

def general_analysis(self, sim_df):
50    def general_analysis(self, sim_df):
51        analysis = dict()
52        analysis['total_events'] = len(sim_df)
53        analysis['total_traces'] = len(set(sim_df['id_case']))
54        for act in self._params.PROCESSING_TIME.keys():
55            analysis[act + "_frequency"] = len(sim_df[sim_df['activity'] == act])
56        seconds = (datetime.strptime(sim_df['end_time'].iloc[-1], '%Y-%m-%d %H:%M:%S.%f') - datetime.strptime(sim_df['start_time'].iloc[0], '%Y-%m-%d %H:%M:%S.%f')).total_seconds()
57        analysis['duration'] = str(timedelta(seconds=seconds))
58        analysis['start_simulation'] = sim_df['start_time'].iloc[0]
59        analysis['end_simulation'] = sim_df['end_time'].iloc[-1]
60
61        return analysis