ATTENTION: we are currently upgrading gitlabs storage backend for repositories. This may cause unavailability and in unlikely cases also data loss. Therefore, we recommend to keep local copies of all repos. Issues, Milestones, and other features of gitlab are not affected.

Commit d83f8602 authored by Julia Abramenko's avatar Julia Abramenko

- added possibility to have a scatter plot only for one run (latency only)

parent e47c397c
Pipeline #50712 failed with stage
in 8 minutes and 56 seconds
......@@ -146,22 +146,22 @@ class TimeSeriesAggregator:
"""
processes the collected data and generates plots out of the given data
"""
# THROUGHPUT
# indicate the start of the plotting process
print("Plotting throughput data...")
# cleaning of outliers
# for column in self.agg_throughput.columns:
# self.agg_throughput = self.remove_outliers(self.agg_throughput, column)
for column in self.agg_throughput.columns[:-2]:
self.agg_throughput = self.remove_outliers_throughput(self.agg_throughput, column)
# determine the min and max values of throughput to adjust the scaling of the graphs later
self.min_t = self.agg_throughput.iloc[:, :-2].min().min()
self.max_t = self.agg_throughput.iloc[:, :-2].max().max()
timestamps_t = self.agg_throughput.index
ax = self.agg_throughput.plot()
for column in self.agg_throughput.columns:
plt.plot(self.agg_throughput.index, self.agg_throughput[column], label=column)
# limit the area shown in the plot, depending on min and max values, adjusted by 300 for better view on the plot
plt.ylim(self.min_t - 300, self.max_t + 300)
# remove the line for standard deviation as it is painted as an area around the mean
ax = plt.gca()
ax.lines.remove(ax.lines[-1])
if self.run != -1:
for i in range(self.files, -1, -1):
......@@ -234,15 +234,16 @@ class TimeSeriesAggregator:
# add theoretical outliers in the graph
#if self.outlier_scatter:
# if self.run != -1:
# item = self.latency_outliers[self.agg_latency.columns[self.run]]
# x, y = zip(*item)
# plt.scatter(x, y, color=color_dict[x], label='outlier {}'.format(x))
# else:
for key, item in self.latency_outliers.items():
x, y = zip(*item)
plt.scatter(x, y, color=color_dict[key], label="outlier {}".format(key))
if self.outlier_scatter:
if self.run != -1:
for key, item in self.latency_outliers.items():
if key == "l{}".format(self.run):
x, y = zip(*item)
plt.scatter(x, y, color=color_dict[key], label="outlier {}".format(key))
else:
for key, item in self.latency_outliers.items():
x, y = zip(*item)
plt.scatter(x, y, color=color_dict[key], label="outlier {}".format(key))
# modify default legend
ax.legend(loc='lower center', ncol=3, borderpad=1, prop={'size': 6})
# adjust size of the figure
......@@ -275,6 +276,21 @@ class TimeSeriesAggregator:
df_out = df_in.loc[(df_in[col_name] > fence_low) & (df_in[col_name] < fence_high)]
return df_out
def remove_outliers_throughput(self, df_in, col_name):
"""
removes data point outliers from the set and adjusts them with help of the interquartile range
:param df_in: the input data frame from which shall be cleaned
:param col_name: the current column name: in which column the calculation shall be performed
:return: a cleaned data frame
"""
q1 = df_in[col_name].quantile(0.25)
q3 = df_in[col_name].quantile(0.75)
iqr = q3 - q1 # Interquartile range
fence_low = q1 - 1.5 * iqr
fence_high = q3 + 1.5 * iqr
df_out = df_in.loc[(df_in[col_name] > fence_low) & (df_in[col_name] < fence_high)]
return df_out
def process(self):
"""
main method. Executes the methods in correct order and terminates after running
......@@ -282,7 +298,7 @@ class TimeSeriesAggregator:
self.parser_setup()
self.parse_input()
self.extract_input()
# self.plot_throughput()
self.plot_throughput()
self.plot_latency()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment