diff --git a/analytics/elasticity/fancy.py b/analytics/elasticity/elasticityPhases.py similarity index 98% rename from analytics/elasticity/fancy.py rename to analytics/elasticity/elasticityPhases.py index b0c4c7b75afd14ac33636a537e67473247c6669c..c74087c829b5af58043b175b550b72750fffe281 100644 --- a/analytics/elasticity/fancy.py +++ b/analytics/elasticity/elasticityPhases.py @@ -33,6 +33,7 @@ plt.rcParams["font.family"] = "Times New Roman" sns.set(style="darkgrid", font='serif', font_scale=0.8) +############ Configuration ################ cbMapping = { "4" : "low", @@ -71,7 +72,7 @@ labelMapping = cbMapping inputPath = "../dataframes/cb-read-dataframes/" - +############## Execution ############ def load_dfs(threads): iteration = "{}{}-threads".format(inputPath, threads) diff --git a/analytics/elasticity/extractDataFrame.py b/analytics/elasticity/extractDataFrame.py index 2cdbf180e029feaacb3c6971e3ece5086943b145..c9f7e6a9ddd041633dd1beebeeaacd7ec9fcdd23 100644 --- a/analytics/elasticity/extractDataFrame.py +++ b/analytics/elasticity/extractDataFrame.py @@ -16,9 +16,6 @@ class TimeSeriesAggregator: def __init__(self): self.parser = None self.input_path = None - self.output_path = None - self.marker_pos_x = None - self.label_text = None self.agg_latency = None self.agg_throughput = None self.cols_latency = None @@ -29,14 +26,13 @@ class TimeSeriesAggregator: self.max_t = 0 self.min_l = 0 self.max_l = 0 - self.plotAverage = False + def parser_setup(self): """ sets up a parser that allows to read parameters when executing """ - #TODO: add output format as parameter: pdf/png and align plotting call # define input directory with --input/-i and the output path with --output/-o on which # the resulting plots shall be stored @@ -45,15 +41,8 @@ class TimeSeriesAggregator: help='path of the folder with the load.txt') #parser.add_argument('--output', '-o', type=str, nargs=1, required=True, # help='path to the store timeseries plot') - # optional arguments for marker position at the x axis and the label_text if a marker is given - parser.add_argument('--marker_pos_x', '-pos', type=int, nargs=1, default=None, required=False, - help='fixing a marker at given x position') - parser.add_argument('--label', '-l', type=str, nargs=1, default=None, required=False, - help='label text to display at the marker position (argument marker_pos_x') - # optional argument: run number (the specified run will be shown in the whole context) - parser.add_argument('--run', '-r', type=int, choices=range(0, 10), default=-1, required=False) - parser.add_argument('--plotAverage', '-pa', type=bool, default=False, required=False) - # fix the parser as main parser object of the TimeSeriesAggregator object + + self.parser = parser def parse_input(self): @@ -70,21 +59,8 @@ class TimeSeriesAggregator: #self.output_path = args.output[0] #if not os.path.isdir(self.output_path): # os.makedirs(self.output_path) - # set the marker and label_text if given, otherwise None - if args.marker_pos_x is not None: - self.marker_pos_x = args.marker_pos_x[0] - else: - self.marker_pos_x = None - if args.label is not None: - self.label_text = args.label[0] - else: - self.label_text = None - # get the run number, if given - if args.run is not None: - self.run = args.run - - if args.plotAverage: - self.plotAverage = True + + def extract_input(self): @@ -140,46 +116,21 @@ class TimeSeriesAggregator: self.agg_latency = agg_frame_latency self.agg_throughput = agg_frame_throughput - def plot_all_timeseries(self): + def extractDataframe(self): """ - processes the collected data and generates plots out of the given data + extract the dataframe and save to disk """ - # THROUGHPUT - # indicate the start of the plotting process - print("Plotting timeseries data...") - - - #TODO: only clean outliers if values are really outliers, cleaning all values by default will corrupt the data, i.e. determination of an outliers needs to be defined - #TODO: move to its own function - # cleaning of outliers - #for column in self.agg_throughput.columns: - # self.agg_throughput = self.remove_outliers(self.agg_throughput, column) - # determine the min and max values of throughput to adjust the scaling of the graphs later - - self.min_t = self.agg_throughput.iloc[:, :-2].min().min() self.max_t = self.agg_throughput.iloc[:, :-2].max().max() timestamps_t = self.agg_throughput.index - self.agg_throughput.to_pickle("C:/mowgli/cb-write-dataframes/128-threads") - + self.agg_throughput.to_pickle(os.path.join(self.input_path,"throughputDataframe")) + self.agg_latency.to_pickle(os.path.join(self.input_path,"latencyDataframe")) - @staticmethod - def remove_outliers(df_in, col_name): - """ - removes data point outliers from the set and adjusts them with help of the interquartile range - :param df_in: the input data frame from which shall be cleaned - :param col_name: the current column name: in which column the calculation shall be performed - :return: a cleaned data frame - """ - q1 = df_in[col_name].quantile(0.25) - q3 = df_in[col_name].quantile(0.75) - iqr = q3 - q1 # Interquartile range - fence_low = q1 - 1.5 * iqr - fence_high = q3 + 1.5 * iqr - df_out = df_in.loc[(df_in[col_name] > fence_low) & (df_in[col_name] < fence_high)] - return df_out + print(self.agg_throughput) + print(self.agg_latency) + def process(self): """ @@ -188,7 +139,7 @@ class TimeSeriesAggregator: self.parser_setup() self.parse_input() self.extract_input() - self.plot_all_timeseries() + self.extractDataframe() if __name__ == "__main__":