result_analysis
Class to generate the output json file "result_simulated_log(experiment_name)" with some analysis on the simulated log. See here for the full list of information provided as output
1''' 2Class to generate the output json file "result_simulated_log(experiment_name)" with some analysis on the simulated log. 3Example of analysis: 4 5| Name | Description | 6|:------------:|:-------------------------- | 7| total_events | Total events in the log | 8| total_traces | Total traces in the log | 9| *A*_frequency | Total occurrences of activity *A* in the log | 10| total_duration | Total duration of simulation | 11| start_date | Start date of the simulation | 12| end_date | End date of the simulation | 13 14''' 15 16 17import glob 18import os 19import pandas as pd 20import json 21from parameters import Parameters 22import pm4py 23from datetime import datetime, timedelta 24 25 26class Result(object): 27 28 def __init__(self, folder: str, params: Parameters): 29 self._folder = folder 30 self._all_file = glob.glob("{}/output/{}/simulated_log_*.csv".format(os.getcwd(), self._folder)) 31 self._params = params 32 33 def analysis_log(self, sim): 34 ''' 35 Method to compute the analysis over the single log 36 ''' 37 analysis = dict() 38 sim_df = pd.read_csv(sim, sep=',') 39 analysis.update(self.general_analysis(sim_df)) 40 41 for role in self._params.ROLE_CAPACITY.keys(): 42 if role != 'TRIGGER_TIMER': 43 analysis[role] = {"total": len(sim_df[sim_df['role'] == role])} 44 for resource in self._params.ROLE_CAPACITY[role][0]: 45 analysis[role][resource] = len(sim_df[sim_df['resource'] == resource]) 46 47 self._write_json(analysis, sim) 48 49 def general_analysis(self, sim_df): 50 analysis = dict() 51 analysis['total_events'] = len(sim_df) 52 analysis['total_traces'] = len(set(sim_df['id_case'])) 53 for act in self._params.PROCESSING_TIME.keys(): 54 analysis[act + "_frequency"] = len(sim_df[sim_df['activity'] == act]) 55 seconds = (datetime.strptime(sim_df['end_time'].iloc[-1], '%Y-%m-%d %H:%M:%S.%f') - datetime.strptime(sim_df['start_time'].iloc[0], '%Y-%m-%d %H:%M:%S.%f')).total_seconds() 56 analysis['duration'] = str(timedelta(seconds=seconds)) 57 analysis['start_simulation'] = sim_df['start_time'].iloc[0] 58 analysis['end_simulation'] = sim_df['end_time'].iloc[-1] 59 60 return analysis 61 def _analyse(self, type='single'): 62 if type == 'single': 63 self.analysis_log(self._all_file[0]) 64 self._csv_to_xes(self._all_file[0]) 65 else: 66 for file in self._all_file: 67 self.analysis_log(file) 68 69 def _write_json(self, analysis, sim): 70 try: 71 filename = 'output/{}/result_{}.json'.format(self._folder, os.path.splitext(os.path.basename(sim))[0]) 72 with open(filename, 'w') as json_file: 73 json.dump(analysis, json_file, indent=len(analysis)) 74 except Exception as e: 75 print(f"Error: {e}") 76 77 def _csv_to_xes(self, sim): 78 sim_df = pd.read_csv(sim, sep=',') 79 sim_df = pm4py.format_dataframe(sim_df, case_id='id_case', activity_key='activity', 80 timestamp_key='end_time') 81 event_log = pm4py.convert_to_event_log(sim_df) 82 pm4py.write_xes(event_log, 'output/{}/{}.xes'.format(self._folder, os.path.splitext(os.path.basename(sim))[0]))
class
Result:
27class Result(object): 28 29 def __init__(self, folder: str, params: Parameters): 30 self._folder = folder 31 self._all_file = glob.glob("{}/output/{}/simulated_log_*.csv".format(os.getcwd(), self._folder)) 32 self._params = params 33 34 def analysis_log(self, sim): 35 ''' 36 Method to compute the analysis over the single log 37 ''' 38 analysis = dict() 39 sim_df = pd.read_csv(sim, sep=',') 40 analysis.update(self.general_analysis(sim_df)) 41 42 for role in self._params.ROLE_CAPACITY.keys(): 43 if role != 'TRIGGER_TIMER': 44 analysis[role] = {"total": len(sim_df[sim_df['role'] == role])} 45 for resource in self._params.ROLE_CAPACITY[role][0]: 46 analysis[role][resource] = len(sim_df[sim_df['resource'] == resource]) 47 48 self._write_json(analysis, sim) 49 50 def general_analysis(self, sim_df): 51 analysis = dict() 52 analysis['total_events'] = len(sim_df) 53 analysis['total_traces'] = len(set(sim_df['id_case'])) 54 for act in self._params.PROCESSING_TIME.keys(): 55 analysis[act + "_frequency"] = len(sim_df[sim_df['activity'] == act]) 56 seconds = (datetime.strptime(sim_df['end_time'].iloc[-1], '%Y-%m-%d %H:%M:%S.%f') - datetime.strptime(sim_df['start_time'].iloc[0], '%Y-%m-%d %H:%M:%S.%f')).total_seconds() 57 analysis['duration'] = str(timedelta(seconds=seconds)) 58 analysis['start_simulation'] = sim_df['start_time'].iloc[0] 59 analysis['end_simulation'] = sim_df['end_time'].iloc[-1] 60 61 return analysis 62 def _analyse(self, type='single'): 63 if type == 'single': 64 self.analysis_log(self._all_file[0]) 65 self._csv_to_xes(self._all_file[0]) 66 else: 67 for file in self._all_file: 68 self.analysis_log(file) 69 70 def _write_json(self, analysis, sim): 71 try: 72 filename = 'output/{}/result_{}.json'.format(self._folder, os.path.splitext(os.path.basename(sim))[0]) 73 with open(filename, 'w') as json_file: 74 json.dump(analysis, json_file, indent=len(analysis)) 75 except Exception as e: 76 print(f"Error: {e}") 77 78 def _csv_to_xes(self, sim): 79 sim_df = pd.read_csv(sim, sep=',') 80 sim_df = pm4py.format_dataframe(sim_df, case_id='id_case', activity_key='activity', 81 timestamp_key='end_time') 82 event_log = pm4py.convert_to_event_log(sim_df) 83 pm4py.write_xes(event_log, 'output/{}/{}.xes'.format(self._folder, os.path.splitext(os.path.basename(sim))[0]))
Result(folder: str, params: parameters.Parameters)
def
analysis_log(self, sim):
34 def analysis_log(self, sim): 35 ''' 36 Method to compute the analysis over the single log 37 ''' 38 analysis = dict() 39 sim_df = pd.read_csv(sim, sep=',') 40 analysis.update(self.general_analysis(sim_df)) 41 42 for role in self._params.ROLE_CAPACITY.keys(): 43 if role != 'TRIGGER_TIMER': 44 analysis[role] = {"total": len(sim_df[sim_df['role'] == role])} 45 for resource in self._params.ROLE_CAPACITY[role][0]: 46 analysis[role][resource] = len(sim_df[sim_df['resource'] == resource]) 47 48 self._write_json(analysis, sim)
Method to compute the analysis over the single log
def
general_analysis(self, sim_df):
50 def general_analysis(self, sim_df): 51 analysis = dict() 52 analysis['total_events'] = len(sim_df) 53 analysis['total_traces'] = len(set(sim_df['id_case'])) 54 for act in self._params.PROCESSING_TIME.keys(): 55 analysis[act + "_frequency"] = len(sim_df[sim_df['activity'] == act]) 56 seconds = (datetime.strptime(sim_df['end_time'].iloc[-1], '%Y-%m-%d %H:%M:%S.%f') - datetime.strptime(sim_df['start_time'].iloc[0], '%Y-%m-%d %H:%M:%S.%f')).total_seconds() 57 analysis['duration'] = str(timedelta(seconds=seconds)) 58 analysis['start_simulation'] = sim_df['start_time'].iloc[0] 59 analysis['end_simulation'] = sim_df['end_time'].iloc[-1] 60 61 return analysis