From 939f5bdd96b03a3c835741fb6af085951910f625 Mon Sep 17 00:00:00 2001 From: Johannes Mey <johannes.mey@tu-dresden.de> Date: Thu, 13 Feb 2020 15:17:44 +0100 Subject: [PATCH] update diagram drawing, remove unfinished toolwise option --- .../trainbenchmark-reporting/build.gradle | 7 - .../trainbenchmark-reporting/combined.R | 94 ++--------- .../trainbenchmark-reporting/individual.R | 34 ++-- .../trainbenchmark-reporting/toolwise.R | 154 ------------------ .../trainbenchmark-reporting/toolwise.py | 33 ---- .../trainbenchmark-reporting/toolwise.sh | 4 - 6 files changed, 35 insertions(+), 291 deletions(-) delete mode 100644 trainbenchmark/trainbenchmark-reporting/toolwise.R delete mode 100644 trainbenchmark/trainbenchmark-reporting/toolwise.py delete mode 100755 trainbenchmark/trainbenchmark-reporting/toolwise.sh diff --git a/trainbenchmark/trainbenchmark-reporting/build.gradle b/trainbenchmark/trainbenchmark-reporting/build.gradle index f9eceadfa..d0d04dbb1 100644 --- a/trainbenchmark/trainbenchmark-reporting/build.gradle +++ b/trainbenchmark/trainbenchmark-reporting/build.gradle @@ -17,10 +17,3 @@ task plotIndividual(type: Exec) { commandLine 'Rscript', 'individual.R' dependsOn doMerge } - -task plotToolwise(type: Exec) { - group = 'Benchmark' - description = 'Plots the individual TrainBenchmark results per tool' - commandLine './toolwise.sh' - dependsOn doMerge -} diff --git a/trainbenchmark/trainbenchmark-reporting/combined.R b/trainbenchmark/trainbenchmark-reporting/combined.R index ddd13d8ee..4015a4a00 100644 --- a/trainbenchmark/trainbenchmark-reporting/combined.R +++ b/trainbenchmark/trainbenchmark-reporting/combined.R @@ -130,7 +130,7 @@ for (scenario in c("inject", "repair")) { p = ggplot(df) + #na.omit(df)) + aes(x = as.factor(Model), y = Time) + - labs(title = paste("Individual query execution time,", phase, "phase"), x = element_blank(), y = element_blank()) +#, x = "Model size\n#Elements", y = "Execution times [ms]") + + labs(title = paste("All", paste(toupper(substring(scenario, 1,1)), substring(scenario, 2), sep="", collapse=" "), "Queries in Sequence\n", phase, "Phase"), x = "Number of elements", y = "Execution times [ms]") + geom_point(aes(col = Tool, shape = Tool), size = 2.0) + scale_shape_manual( values = c(1,16, 0,15, 2,17, 5,18, 8,10), @@ -174,96 +174,36 @@ for (scenario in c("inject", "repair")) { scale_y_log10(breaks = ybreaks, minor_breaks=NULL, labels = ylabels, limits = c(3e-3,3e4), expand = c(0, 0)) + theme_bw() + theme( - plot.title = element_blank(), # element_text(hjust = 0.5), + plot.title = element_text(hjust = 0.5), # text = element_text(family="Open Sans", size = 10), # legend.key = element_blank(), legend.title = element_blank(), - strip.text.x = element_blank(), - legend.position = "none", # "none", # + # strip.text.x = element_blank(), + legend.position = "bottom", # "none", # axis.text = element_text(size = 7) #, panel.grid.minor = element_blank() ) fnTmp <- paste("../diagrams/recent/benchmark/", scenario, "/", phase.filename, "-", scenario, "-tmp.pdf", sep="") fn <- paste("../diagrams/recent/benchmark/", scenario, "/", phase.filename, "-", scenario, ".pdf", sep="") - if (phase == "Read") { - p = p + scale_y_log10(breaks = ybreaks, minor_breaks=NULL, labels = ylabels, limits = c(3e0,3e4), expand = c(0, 0)) + - scale_colour_manual(values = c( "#56B4E9", "#56B4E9", # Sky blue - Name Lookup - "#009E73", "#009E73", # Bluish green - Intrinsic References - "#0072B2", "#0072B2", # Blue - Manual Serialization - "#E69F00", "#E69F00", # Orange - RelAST - "#D55E00", # Vermillion - Tinkergraph - "#CC79A7", # ReddishPurple - VIATRA - "#F0E442" # Yellow - ), - labels = c( - "Name Lookup / Reflection-based", - "Name Lookup (Incremental) / Reflection-based", - "Intrinsic References / Modified Reflection-based", - "Intrinsic References (Incremental) / Modified Reflection-based", - "Relational RAGs / Hand-written", - "Relational RAGs (Incremental) / Hand-written", - "Relational RAGs / Generated", - "Relational RAGs (Incremental) / Generated" - ) - ) + - scale_shape_manual( values = c(1,16, 0,15, 2,17, 5,18, 8,10), - labels = c( - "Name Lookup / Reflection-based", - "Name Lookup (Incremental) / Reflection-based", - "Intrinsic References / Modified Reflection-based", - "Intrinsic References (Incremental) / Modified Reflection-based", - "Relational RAGs / Hand-written", - "Relational RAGs (Incremental) / Hand-written", - "Relational RAGs / Generated", - "Relational RAGs (Incremental) / Generated" - ), - guide=guide_legend(ncol=2,nrow=4) - ) + - theme( - plot.title = element_blank(), # element_text(hjust = 0.5), - # text = element_text(family="Open Sans", size = 10), - # legend.key = element_blank(), - legend.title = element_blank(), - strip.text.x = element_blank(), - legend.position = "none", # "none", # - axis.text = element_text(size = 9) - #, panel.grid.minor = element_blank() - ) - p <- p + - # reverse ticks - annotation_logticks(sides = "l", short = unit(- 0.06, "cm"), mid = unit(- 0.10, "cm"), long = unit(- 0.14, "cm")) + - # remove clipping - coord_cartesian(clip = "off") + - # add space between ticks and labels - theme(axis.text.y = element_text(margin = margin(r = 2))) - ggsave( + + p <- p + + # reverse ticks + # annotation_logticks(sides = "l", short = unit(- 0.07, "cm"), mid = unit(- 0.14, "cm"), long = unit(- 0.21, "cm")) + + # remove clipping + # coord_cartesian(clip = "off") + + # add space between ticks and labels + # theme(axis.text.y = element_text(margin = margin(r = 4))) + + ggsave( plot = p, filename = fnTmp, - width = 160, - height = 100, + width = 297, + height = 210, units = "mm" - ) - } else { + ) - - p <- p + - # reverse ticks - annotation_logticks(sides = "l", short = unit(- 0.07, "cm"), mid = unit(- 0.14, "cm"), long = unit(- 0.21, "cm")) + - # remove clipping - coord_cartesian(clip = "off") + - # add space between ticks and labels - theme(axis.text.y = element_text(margin = margin(r = 4))) - - ggsave( - plot = p, - filename = fnTmp, - width = 78, - height = 104, - units = "mm" - ) - } embed_fonts(fnTmp, outfile=fn) file.remove(fnTmp) diff --git a/trainbenchmark/trainbenchmark-reporting/individual.R b/trainbenchmark/trainbenchmark-reporting/individual.R index e4e985b25..3245c26b4 100644 --- a/trainbenchmark/trainbenchmark-reporting/individual.R +++ b/trainbenchmark/trainbenchmark-reporting/individual.R @@ -129,7 +129,7 @@ for (scenario in c("inject", "repair")) { for (cpage in 1:6) { p = ggplot(df) + #na.omit(df)) + aes(x = as.factor(Model), y = Time) + - labs(title = paste("Individual query execution time,", phase, "phase"), x = element_blank(), y = element_blank()) +#, x = "Model size\n#Elements") +#, y = "Execution times [ms]") + + labs(title = paste("Individual", workloads[cpage], paste(toupper(substring(scenario, 1,1)), substring(scenario, 2), sep="", collapse=" "), "Query\n", phase, "Phase"), x = "Number of elements", y = "Execution times [ms]") + geom_point(aes(col = Tool, shape = Tool), size = 2.0) + scale_shape_manual( values = c(1,16, 0,15, 2,17, 5,18, 8,10), labels = c( "Name Lookup", @@ -173,34 +173,36 @@ for (scenario in c("inject", "repair")) { facet_grid_paginate(~ Workload, nrow=1, ncol = 1, page=cpage, scale = "free") + theme_bw() + theme( - plot.title = element_blank(), # element_text(hjust = 0.5), + plot.title = element_text(hjust = 0.5), # text = element_text(family="Open Sans", size = 10), # legend.key = element_blank(), legend.title = element_blank(), - strip.text.x = element_blank(), - legend.position = "none", # "none", # + # strip.text.x = element_blank(), + legend.position = "bottom", # "none", # axis.text = element_text(size = 7) #, panel.grid.minor = element_blank() - ) - p <- p + - # reverse ticks - annotation_logticks(sides = "l", short = unit(- 0.06, "cm"), mid = unit(- 0.10, "cm"), long = unit(- 0.14, "cm")) + - # remove clipping - coord_cartesian(clip = "off") + - # add space between ticks and labels - theme(axis.text.y = element_text(margin = margin(r = 2))) + ) + # p <- p + + # reverse ticks + # annotation_logticks(sides = "l", short = unit(- 0.06, "cm"), mid = unit(- 0.10, "cm"), long = unit(- 0.14, "cm")) + + # remove clipping + # coord_cartesian(clip = "off") + + # add space between ticks and labels + # theme(axis.text.y = element_text(margin = margin(r = 2))) fnTmp <- paste("../diagrams/recent/", scenario, "/", phase.filename, "-", workloads[cpage], "-", scenario, "-tmp.pdf", sep = "") fn <- paste("../diagrams/recent/", scenario, "/", phase.filename, "-", workloads[cpage], "-", scenario, ".pdf", sep = "") ggsave( plot = p, filename = fnTmp, - width = 78, - height = 104, + width = 297, + height = 210, units = "mm" ) - embed_fonts(fnTmp, outfile = fn) + + embed_fonts(fnTmp, outfile=fn) file.remove(fnTmp) - } + } + } } diff --git a/trainbenchmark/trainbenchmark-reporting/toolwise.R b/trainbenchmark/trainbenchmark-reporting/toolwise.R deleted file mode 100644 index 3d0db01f3..000000000 --- a/trainbenchmark/trainbenchmark-reporting/toolwise.R +++ /dev/null @@ -1,154 +0,0 @@ -library(data.table) -library(reshape2) -library(plyr) -library(ggplot2) -library(ggrepel) -library(arules) -library(ggforce) - -source('util.R') - -args = commandArgs(trailingOnly=TRUE) -if (length(args)==0) { - stop("At least one argument must be supplied (tool-name).\n", call.=FALSE) -} -toolName = args[1] - -# prepare output directory -output_dir = paste("../diagrams/merged", toolName, sep="/") -if (!(dir.exists(output_dir))) { - dir.create(output_dir) -} - -# constants -workloads = c( - "PosLength", "SwitchMonitored", - "RouteSensor", "SwitchSet", - "ConnectedSegments", "SemaphoreNeighbor" -) -phases = c("Read", "Check", "Read.and.Check", "Transformation", "Recheck", "Transformation.and.Recheck") -phasesPrettified = c("Read", "Check", "Read and Check", "Transformation", "Recheck", "Transformation and Recheck") - -sizes = list() # 1 2 4 8 16 32 64 128 256 512 1024 2048 4096 -sizes[["Repair"]] = c("8k", "15k", "33k", "66k", "135k", "271k", "566k", "1.1M", "2.2M", "4.6M", "9.3M", "18M", "37M") - -runList = read.csv(paste("../results/tools/", toolName, "/run-list.csv", sep=""), colClasses=c(rep("character",1))) - -# load the data -tsvs = list.files(paste("../results/tools/", toolName, "/all-runs/", sep=""), pattern = "times-.*\\.csv", full.names = T, recursive = T) - -l = lapply(tsvs, read.csv) -times = rbindlist(l) - -# preprocess the data -times$Tool = factor(times$Tool, levels = runList$Runs) -keep_descriptions_first_char(times) - -times$Model = gsub("\\D+", "", times$Model) -times$Model = as.numeric(times$Model) -times$Time = times$Time / 10^6 -# make the phases a factor with a fixed set of values to help dcasting -# (e.g. Batch measurements do not have Transformation and Recheck attributes, -# hence accessing the "Transformation" attribute would throw an error) -times$Phase = factor(times$Phase, levels = c("Read", "Check", "Transformation", "Recheck")) - -times.wide = dcast(data = times, - formula = Tool + Workload + Description + Model + Run ~ Phase, - value.var = "Time", - drop = T, - fun.aggregate = mean -) - -# calculate aggregated values -times.derived = times.wide -times.derived$Read.and.Check = times.derived$Read + times.derived$Check -times.derived$Transformation.and.Recheck = times.derived$Transformation + times.derived$Recheck - -# calculate the median value of runs -times.aggregated.runs = ddply( - .data = times.derived, - .variables = c("Tool", "Workload", "Description", "Model"), - .fun = colwise(median), - .progress = "text" -) -# drop the "Run" column -times.aggregated.runs = subset(times.aggregated.runs, select = -c(Run)) - -times.processed = melt( - data = times.aggregated.runs, - id.vars = c("Tool", "Workload", "Description", "Model"), - measure.vars = phases, - variable.name = "Phase", - value.name = "Time" -) - -# beautify plotted record: -# 1. change dots to spaces -# 2. make sure that the phases are still factors -times.plot = times.processed -times.plot$Phase = gsub('\\.', ' ', times.plot$Phase) -times.plot$Phase = factor(times.plot$Phase, levels = phasesPrettified) -times.plot$Workload = factor(times.plot$Workload, levels = workloads) - -### line charts -for (phase in phasesPrettified) { - phase.filename = gsub(' ', '-', phase) - workloadSizes = sizes[["Repair"]] - - # filter the dataframe to the current phase - df = times.plot[times.plot$Phase == phase, ] - - # do not visualize empty data sets - if (nrow(df) == 0) { - print(paste("No rows to visualize for phase", phase)) - next - } - - # x axis labels - xbreaks = unique(df$Model) - currentWorkloadSizes = head(workloadSizes, n=length(xbreaks)) - xlabels = paste(xbreaks, "\n", currentWorkloadSizes, sep = "") - - # drop every other models size - maxLabel = max(log2(max(df$Model)), 2) - if (maxLabel %% 2) { - start = 3 - } else { - start = 2 - } - filter = seq(start, maxLabel, by=2) - - xlabels[filter] = "" - - # y axis labels - yaxis = nice_y_axis() - ybreaks = yaxis$ybreaks - ylabels = yaxis$ylabels - - p = ggplot(df) + #na.omit(df)) + - aes(x = as.factor(Model), y = Time) + - labs(title = paste("Individual query execution time,", phase, "phase, ", toolName), x = "Model size\n#Elements", y = "Execution times [ms]") + - geom_point(aes(col = Tool, shape = Tool), size = 2.0) + - scale_shape_manual(values = seq(0, 15)) + - geom_line(aes(col = Tool, group = Tool), size = 0.5) + - scale_x_discrete(breaks = xbreaks, labels = xlabels) + - scale_y_log10(breaks = ybreaks, labels = ylabels) + - guides(color = guide_legend(ncol = 4)) + - theme_bw() + - theme( - plot.title = element_text(hjust = 0.5), - text = element_text(size = 10), - legend.key = element_blank(), - legend.title = element_blank(), - legend.position = "bottom", - axis.text = element_text(size = 9) - ) - print(p) - for (cpage in 1:6) { - ggsave( - plot = p + facet_grid_paginate(~ Workload, nrow=1, ncol = 1, page=cpage, scale = "free"), - filename = paste(output_dir, "/", toolName, "-", phase.filename, "-",workloads[cpage], ".pdf", sep=""), - width = 250, height = 150, units = "mm" - ) - } -} diff --git a/trainbenchmark/trainbenchmark-reporting/toolwise.py b/trainbenchmark/trainbenchmark-reporting/toolwise.py deleted file mode 100644 index 93a275357..000000000 --- a/trainbenchmark/trainbenchmark-reporting/toolwise.py +++ /dev/null @@ -1,33 +0,0 @@ -import argparse -import json -import logging -import os.path -import subprocess - - -FORMAT = '%(asctime)s %(levelname)-8s %(threadName)-10s (%(filename)s:%(lineno)d): %(message)s' -logger = logging.getLogger('toolwise') - - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Plot results per tool.') - parser.add_argument( - "-v", "--verbose", help="Print debug messages.", action="store_true") - parser.add_argument( - "-f", "--file-config", default='merge_results.json', help="Config file to use.") - args = parser.parse_args() - logging.basicConfig(format=FORMAT, level=logging.DEBUG if args.verbose else logging.INFO) - # load config file - with open('merge_results.json') as fdr: - content = json.load(fdr) - # update with local version, if existing - directory, basename = os.path.split(os.path.abspath('merge_results.json')) - local_config_file = os.path.join(directory, 'local-' + basename) - if os.path.exists(local_config_file): - with open(local_config_file) as fdr: - content.update(json.load(fdr)) - else: - logger.debug('No local config file found.') - for tool in content.get('toolwise', []): - logging.info('Processing %s now.', tool) - subprocess.call(["Rscript", "toolwise.R", tool]) diff --git a/trainbenchmark/trainbenchmark-reporting/toolwise.sh b/trainbenchmark/trainbenchmark-reporting/toolwise.sh deleted file mode 100755 index e64f70cd8..000000000 --- a/trainbenchmark/trainbenchmark-reporting/toolwise.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -python toolwise.py --verbose -# --file-config <file> -# --verbose -- GitLab