from matplotlib import pyplot as plt import argparse import glob import os import random import matplotlib import pandas as pd import seaborn as sns import json import itertools # sns.color_palette().as_hex() # STYLE SETTINGS colors = [ # '#1f77b4', #'#2ca02c', '#d62728', '#ff7f0e', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf' ] palette = itertools.cycle(sns.color_palette(colors)) palette_end = itertools.cycle(sns.color_palette(colors)) plt.rcParams["font.family"] = "Times New Roman" sns.set(style="darkgrid", font='serif', font_scale=0.8) ############ Configuration ################ cbMapping = { "4" : "low", "32" : "optimal", "128" : "overload" } caMapping = { "4" : "low", "16" : "optimal", "64" : "overload" } # Benchmark selection # # ca-write-dataframes #charts = ["4", "16", "64"] #labelMapping = caMapping #inputPath = "../dataframes/ca-write-dataframes/" # # ca-read-dataframes #charts = ["4", "16", "64"] #labelMapping = caMapping #inputPath = "../dataframes/ca-read-dataframes/" # cb-write-dataframes #charts = ["4" , "32", "128"] #labelMapping = cbMapping #inputPath = "../dataframes/cb-write-dataframes/" # cb-read-dataframes #charts = ["4" , "32", "128"] charts = ["128"] labelMapping = cbMapping inputPath = "../dataframes/cb-read-dataframes/" ############## Execution ############ def load_dfs(threads): iteration = "{}{}-threads".format(inputPath, threads) print("processing {}".format(iteration)) # Load dumped dataframe df = pd.read_pickle("{}{}-threads".format(inputPath, threads)) # Remove unnecessary columns (std and mean are determined by seaborn) df = df.drop(columns=['Mean', 'Standard Deviation']) df['timestamp'] = df.index # Transform dataframe group columns into single column (seaborn req) df = df.melt(id_vars=['timestamp'], var_name='iteration', value_name='bandwidth') print(labelMapping) lineLabel = labelMapping[threads] df['threads'] = " {} ".format(lineLabel) #df['threads'] = " {} threads".format(threads) # Drop rows with null values df = df.dropna() # Filter out above and below 10-percentile df = df[df.bandwidth < df.bandwidth.quantile(.95)] df = df[df.bandwidth > df.bandwidth.quantile(.05)] return df def load_events(threads): # load events from json file to dataframe iteration = "{}{}-events.json".format(inputPath, threads) with open(iteration) as json_file: data = json.load(json_file) df = pd.DataFrame.from_dict(data) # calculate diff (duration) and add metainformation df["diffVM"] = df.diff(axis=1)["endVM"] df["diffDBMS"] = df.diff(axis=1)["DBMSReady"] df["threads"] = threads return df dfs = [] events = [] # LOADING PHASE for threads in charts: events.append(load_events(threads)) dfs.append(load_dfs(threads)) # PLOTTING PHASE dfs = pd.concat(dfs, ignore_index=True) events = pd.concat(events, ignore_index=False) print(events) scale_begin = 180 vm_end = events["diffVM"].mean() dfs['threads'] = dfs['threads'].astype(str) # Plot linecharts ax = sns.lineplot(x="timestamp", y="bandwidth", hue="threads", palette=palette, data=dfs) # scaling lines plt.axvline(scale_begin, color=next(palette), linestyle=':', label='scale-out trigger') plt.axvline(scale_begin+vm_end, color=next(palette), linestyle=':', label='VM ready') #use the average scale-out time for horizontal line #end_timestamps = dfs.groupby(["threads"], sort=False)["timestamp"].max() end_timestamps = events.groupby(["threads"], sort=False)["diffDBMS"].mean() + scale_begin + events.groupby(["threads"], sort=False)["diffVM"].mean() print(end_timestamps) # Ending timestamp viz for each thread config for name, end_timestamp in end_timestamps.items(): plt.axvline(end_timestamp, color=next(palette_end), linestyle=':', #label=name.replace("(avg)", "(avg) scale-out end") label="scale-out complete" #label= labelMapping[name] + " scale-out\ncomplete" ) # ax.legend(loc='upper right', ncol=3, borderpad=1) ax.set_ylabel('average throughput in ops/s') ax.set_xlabel('runtime in s') #no title as title will be set via latex #ax.set_title(inputPath.split('/')[-2]) legend = ax.legend() # remove legend title handles, labels = ax.get_legend_handles_labels() ax.legend(handles=handles[1:], labels=labels[1:]) plt.ylim(0,16000) # store the created picture # save file under the predetermined directory output_file = os.path.join(inputPath, "fancy_single.pdf") plt.savefig(output_file, format='pdf') plt.close()