Commit 4e6dbcb6 authored by Daniel Seybold's avatar Daniel Seybold

some usability fixes for elasticity plotting

parent f4992fd9
Pipeline #92908 passed with stage
in 13 minutes and 14 seconds
......@@ -33,6 +33,7 @@ plt.rcParams["font.family"] = "Times New Roman"
sns.set(style="darkgrid", font='serif', font_scale=0.8)
############ Configuration ################
cbMapping = {
"4" : "low",
......@@ -71,7 +72,7 @@ labelMapping = cbMapping
inputPath = "../dataframes/cb-read-dataframes/"
############## Execution ############
def load_dfs(threads):
iteration = "{}{}-threads".format(inputPath, threads)
......
......@@ -16,9 +16,6 @@ class TimeSeriesAggregator:
def __init__(self):
self.parser = None
self.input_path = None
self.output_path = None
self.marker_pos_x = None
self.label_text = None
self.agg_latency = None
self.agg_throughput = None
self.cols_latency = None
......@@ -29,14 +26,13 @@ class TimeSeriesAggregator:
self.max_t = 0
self.min_l = 0
self.max_l = 0
self.plotAverage = False
def parser_setup(self):
"""
sets up a parser that allows to read parameters when executing
"""
#TODO: add output format as parameter: pdf/png and align plotting call
# define input directory with --input/-i and the output path with --output/-o on which
# the resulting plots shall be stored
......@@ -45,15 +41,8 @@ class TimeSeriesAggregator:
help='path of the folder with the load.txt')
#parser.add_argument('--output', '-o', type=str, nargs=1, required=True,
# help='path to the store timeseries plot')
# optional arguments for marker position at the x axis and the label_text if a marker is given
parser.add_argument('--marker_pos_x', '-pos', type=int, nargs=1, default=None, required=False,
help='fixing a marker at given x position')
parser.add_argument('--label', '-l', type=str, nargs=1, default=None, required=False,
help='label text to display at the marker position (argument marker_pos_x')
# optional argument: run number (the specified run will be shown in the whole context)
parser.add_argument('--run', '-r', type=int, choices=range(0, 10), default=-1, required=False)
parser.add_argument('--plotAverage', '-pa', type=bool, default=False, required=False)
# fix the parser as main parser object of the TimeSeriesAggregator object
self.parser = parser
def parse_input(self):
......@@ -70,21 +59,8 @@ class TimeSeriesAggregator:
#self.output_path = args.output[0]
#if not os.path.isdir(self.output_path):
# os.makedirs(self.output_path)
# set the marker and label_text if given, otherwise None
if args.marker_pos_x is not None:
self.marker_pos_x = args.marker_pos_x[0]
else:
self.marker_pos_x = None
if args.label is not None:
self.label_text = args.label[0]
else:
self.label_text = None
# get the run number, if given
if args.run is not None:
self.run = args.run
if args.plotAverage:
self.plotAverage = True
def extract_input(self):
......@@ -140,46 +116,21 @@ class TimeSeriesAggregator:
self.agg_latency = agg_frame_latency
self.agg_throughput = agg_frame_throughput
def plot_all_timeseries(self):
def extractDataframe(self):
"""
processes the collected data and generates plots out of the given data
extract the dataframe and save to disk
"""
# THROUGHPUT
# indicate the start of the plotting process
print("Plotting timeseries data...")
#TODO: only clean outliers if values are really outliers, cleaning all values by default will corrupt the data, i.e. determination of an outliers needs to be defined
#TODO: move to its own function
# cleaning of outliers
#for column in self.agg_throughput.columns:
# self.agg_throughput = self.remove_outliers(self.agg_throughput, column)
# determine the min and max values of throughput to adjust the scaling of the graphs later
self.min_t = self.agg_throughput.iloc[:, :-2].min().min()
self.max_t = self.agg_throughput.iloc[:, :-2].max().max()
timestamps_t = self.agg_throughput.index
self.agg_throughput.to_pickle("C:/mowgli/cb-write-dataframes/128-threads")
self.agg_throughput.to_pickle(os.path.join(self.input_path,"throughputDataframe"))
self.agg_latency.to_pickle(os.path.join(self.input_path,"latencyDataframe"))
@staticmethod
def remove_outliers(df_in, col_name):
"""
removes data point outliers from the set and adjusts them with help of the interquartile range
:param df_in: the input data frame from which shall be cleaned
:param col_name: the current column name: in which column the calculation shall be performed
:return: a cleaned data frame
"""
q1 = df_in[col_name].quantile(0.25)
q3 = df_in[col_name].quantile(0.75)
iqr = q3 - q1 # Interquartile range
fence_low = q1 - 1.5 * iqr
fence_high = q3 + 1.5 * iqr
df_out = df_in.loc[(df_in[col_name] > fence_low) & (df_in[col_name] < fence_high)]
return df_out
print(self.agg_throughput)
print(self.agg_latency)
def process(self):
"""
......@@ -188,7 +139,7 @@ class TimeSeriesAggregator:
self.parser_setup()
self.parse_input()
self.extract_input()
self.plot_all_timeseries()
self.extractDataframe()
if __name__ == "__main__":
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment