From 939f5bdd96b03a3c835741fb6af085951910f625 Mon Sep 17 00:00:00 2001
From: Johannes Mey <johannes.mey@tu-dresden.de>
Date: Thu, 13 Feb 2020 15:17:44 +0100
Subject: [PATCH] update diagram drawing, remove unfinished toolwise option

---
 .../trainbenchmark-reporting/build.gradle     |   7 -
 .../trainbenchmark-reporting/combined.R       |  94 ++---------
 .../trainbenchmark-reporting/individual.R     |  34 ++--
 .../trainbenchmark-reporting/toolwise.R       | 154 ------------------
 .../trainbenchmark-reporting/toolwise.py      |  33 ----
 .../trainbenchmark-reporting/toolwise.sh      |   4 -
 6 files changed, 35 insertions(+), 291 deletions(-)
 delete mode 100644 trainbenchmark/trainbenchmark-reporting/toolwise.R
 delete mode 100644 trainbenchmark/trainbenchmark-reporting/toolwise.py
 delete mode 100755 trainbenchmark/trainbenchmark-reporting/toolwise.sh

diff --git a/trainbenchmark/trainbenchmark-reporting/build.gradle b/trainbenchmark/trainbenchmark-reporting/build.gradle
index f9eceadfa..d0d04dbb1 100644
--- a/trainbenchmark/trainbenchmark-reporting/build.gradle
+++ b/trainbenchmark/trainbenchmark-reporting/build.gradle
@@ -17,10 +17,3 @@ task plotIndividual(type: Exec) {
     commandLine 'Rscript', 'individual.R'
     dependsOn doMerge
 }
-
-task plotToolwise(type: Exec) {
-    group = 'Benchmark'
-    description = 'Plots the individual TrainBenchmark results per tool'
-    commandLine './toolwise.sh'
-    dependsOn doMerge
-}
diff --git a/trainbenchmark/trainbenchmark-reporting/combined.R b/trainbenchmark/trainbenchmark-reporting/combined.R
index ddd13d8ee..4015a4a00 100644
--- a/trainbenchmark/trainbenchmark-reporting/combined.R
+++ b/trainbenchmark/trainbenchmark-reporting/combined.R
@@ -130,7 +130,7 @@ for (scenario in c("inject", "repair")) {
 
     p = ggplot(df) + #na.omit(df)) +
       aes(x = as.factor(Model), y = Time) +
-      labs(title = paste("Individual query execution time,", phase, "phase"), x = element_blank(), y = element_blank()) +#, x = "Model size\n#Elements", y = "Execution times [ms]") +
+      labs(title = paste("All", paste(toupper(substring(scenario, 1,1)), substring(scenario, 2), sep="", collapse=" "), "Queries in Sequence\n", phase, "Phase"), x = "Number of elements", y = "Execution times [ms]") +
 
       geom_point(aes(col = Tool, shape = Tool), size = 2.0) +
       scale_shape_manual( values = c(1,16, 0,15, 2,17, 5,18, 8,10),
@@ -174,96 +174,36 @@ for (scenario in c("inject", "repair")) {
       scale_y_log10(breaks = ybreaks, minor_breaks=NULL, labels = ylabels, limits = c(3e-3,3e4), expand = c(0, 0)) +
       theme_bw() +
       theme(
-        plot.title = element_blank(), # element_text(hjust = 0.5),
+        plot.title = element_text(hjust = 0.5),
         # text = element_text(family="Open Sans", size = 10),
         # legend.key = element_blank(),
         legend.title = element_blank(),
-        strip.text.x = element_blank(),
-        legend.position = "none", # "none", #
+        # strip.text.x = element_blank(),
+        legend.position = "bottom", # "none", #
         axis.text = element_text(size = 7)
         #, panel.grid.minor = element_blank()
       )
     fnTmp <- paste("../diagrams/recent/benchmark/", scenario, "/", phase.filename, "-", scenario, "-tmp.pdf", sep="")
     fn <- paste("../diagrams/recent/benchmark/", scenario, "/", phase.filename, "-", scenario, ".pdf", sep="")
-    if (phase == "Read") {
-      p = p + scale_y_log10(breaks = ybreaks, minor_breaks=NULL, labels = ylabels, limits = c(3e0,3e4), expand = c(0, 0)) +
-          scale_colour_manual(values = c( "#56B4E9", "#56B4E9", # Sky blue - Name Lookup
-          "#009E73", "#009E73", # Bluish green - Intrinsic References
-          "#0072B2", "#0072B2", # Blue - Manual Serialization
-          "#E69F00", "#E69F00", # Orange - RelAST
-          "#D55E00", # Vermillion - Tinkergraph
-          "#CC79A7", # ReddishPurple - VIATRA
-          "#F0E442"  # Yellow
-          ),
-          labels = c(
-          "Name Lookup / Reflection-based",
-          "Name Lookup (Incremental) / Reflection-based",
-          "Intrinsic References / Modified Reflection-based",
-          "Intrinsic References (Incremental) / Modified Reflection-based",
-          "Relational RAGs / Hand-written",
-          "Relational RAGs (Incremental) / Hand-written",
-          "Relational RAGs / Generated",
-          "Relational RAGs (Incremental) / Generated"
-          )
-          ) +
-          scale_shape_manual( values = c(1,16, 0,15, 2,17, 5,18, 8,10),
-          labels = c(
-          "Name Lookup / Reflection-based",
-          "Name Lookup (Incremental) / Reflection-based",
-          "Intrinsic References / Modified Reflection-based",
-          "Intrinsic References (Incremental) / Modified Reflection-based",
-          "Relational RAGs / Hand-written",
-          "Relational RAGs (Incremental) / Hand-written",
-          "Relational RAGs / Generated",
-          "Relational RAGs (Incremental) / Generated"
-          ),
-          guide=guide_legend(ncol=2,nrow=4)
-          ) +
-          theme(
-          plot.title = element_blank(), # element_text(hjust = 0.5),
-          # text = element_text(family="Open Sans", size = 10),
-          # legend.key = element_blank(),
-          legend.title = element_blank(),
-          strip.text.x = element_blank(),
-          legend.position = "none", # "none", #
-          axis.text = element_text(size = 9)
-          #, panel.grid.minor = element_blank()
-          )
 
-      p <- p +
-      # reverse ticks
-          annotation_logticks(sides = "l", short = unit(- 0.06, "cm"), mid = unit(- 0.10, "cm"), long = unit(- 0.14, "cm")) +
-      # remove clipping
-          coord_cartesian(clip = "off") +
-      # add space between ticks and labels
-          theme(axis.text.y = element_text(margin = margin(r = 2)))
 
-      ggsave(
+
+    p <- p +
+    # reverse ticks
+    #     annotation_logticks(sides = "l", short = unit(- 0.07, "cm"), mid = unit(- 0.14, "cm"), long = unit(- 0.21, "cm")) +
+    # remove clipping
+    #     coord_cartesian(clip = "off") +
+    # add space between ticks and labels
+    #       theme(axis.text.y = element_text(margin = margin(r = 4)))
+
+    ggsave(
       plot = p,
       filename = fnTmp,
-      width = 160,
-      height = 100,
+      width = 297,
+      height = 210,
       units = "mm"
-      )
-    } else {
+    )
 
-
-      p <- p +
-      # reverse ticks
-          annotation_logticks(sides = "l", short = unit(- 0.07, "cm"), mid = unit(- 0.14, "cm"), long = unit(- 0.21, "cm")) +
-      # remove clipping
-          coord_cartesian(clip = "off") +
-      # add space between ticks and labels
-            theme(axis.text.y = element_text(margin = margin(r = 4)))
-
-      ggsave(
-        plot = p,
-        filename = fnTmp,
-        width = 78,
-        height = 104,
-        units = "mm"
-      )
-    }
     embed_fonts(fnTmp, outfile=fn)
     file.remove(fnTmp)
 
diff --git a/trainbenchmark/trainbenchmark-reporting/individual.R b/trainbenchmark/trainbenchmark-reporting/individual.R
index e4e985b25..3245c26b4 100644
--- a/trainbenchmark/trainbenchmark-reporting/individual.R
+++ b/trainbenchmark/trainbenchmark-reporting/individual.R
@@ -129,7 +129,7 @@ for (scenario in c("inject", "repair")) {
     for (cpage in 1:6) {
       p = ggplot(df) + #na.omit(df)) +
         aes(x = as.factor(Model), y = Time) +
-        labs(title = paste("Individual query execution time,", phase, "phase"), x = element_blank(), y = element_blank()) +#, x = "Model size\n#Elements") +#, y = "Execution times [ms]") +
+        labs(title = paste("Individual", workloads[cpage], paste(toupper(substring(scenario, 1,1)), substring(scenario, 2), sep="", collapse=" "), "Query\n", phase, "Phase"), x = "Number of elements", y = "Execution times [ms]") +
         geom_point(aes(col = Tool, shape = Tool), size = 2.0) +
         scale_shape_manual( values = c(1,16, 0,15, 2,17, 5,18, 8,10),
                             labels = c( "Name Lookup",
@@ -173,34 +173,36 @@ for (scenario in c("inject", "repair")) {
         facet_grid_paginate(~ Workload, nrow=1, ncol = 1, page=cpage, scale = "free") +
         theme_bw() +
         theme(
-          plot.title = element_blank(), # element_text(hjust = 0.5),
+          plot.title = element_text(hjust = 0.5),
           # text = element_text(family="Open Sans", size = 10),
           # legend.key = element_blank(),
           legend.title = element_blank(),
-          strip.text.x = element_blank(),
-          legend.position = "none", # "none", #
+          # strip.text.x = element_blank(),
+          legend.position = "bottom", # "none", #
           axis.text = element_text(size = 7)
           #, panel.grid.minor = element_blank()
-        )
-        p <- p +
-        # reverse ticks
-            annotation_logticks(sides = "l", short = unit(- 0.06, "cm"), mid = unit(- 0.10, "cm"), long = unit(- 0.14, "cm")) +
-        # remove clipping
-            coord_cartesian(clip = "off") +
-        # add space between ticks and labels
-            theme(axis.text.y = element_text(margin = margin(r = 2)))
+      )
+      # p <- p +
+      # reverse ticks
+      #    annotation_logticks(sides = "l", short = unit(- 0.06, "cm"), mid = unit(- 0.10, "cm"), long = unit(- 0.14, "cm")) +
+      # remove clipping
+      #    coord_cartesian(clip = "off") +
+      # add space between ticks and labels
+      #    theme(axis.text.y = element_text(margin = margin(r = 2)))
 
         fnTmp <- paste("../diagrams/recent/", scenario, "/", phase.filename, "-", workloads[cpage], "-", scenario, "-tmp.pdf", sep = "")
         fn <- paste("../diagrams/recent/", scenario, "/", phase.filename, "-", workloads[cpage], "-", scenario, ".pdf", sep = "")
         ggsave(
         plot = p,
         filename = fnTmp,
-        width = 78,
-        height = 104,
+        width = 297,
+        height = 210,
         units = "mm"
         )
-        embed_fonts(fnTmp, outfile = fn)
+
+        embed_fonts(fnTmp, outfile=fn)
         file.remove(fnTmp)
-        }
+
     }
+  }
 }
diff --git a/trainbenchmark/trainbenchmark-reporting/toolwise.R b/trainbenchmark/trainbenchmark-reporting/toolwise.R
deleted file mode 100644
index 3d0db01f3..000000000
--- a/trainbenchmark/trainbenchmark-reporting/toolwise.R
+++ /dev/null
@@ -1,154 +0,0 @@
-library(data.table)
-library(reshape2)
-library(plyr)
-library(ggplot2)
-library(ggrepel)
-library(arules)
-library(ggforce)
-
-source('util.R')
-
-args = commandArgs(trailingOnly=TRUE)
-if (length(args)==0) {
-  stop("At least one argument must be supplied (tool-name).\n", call.=FALSE)
-}
-toolName = args[1]
-
-# prepare output directory
-output_dir = paste("../diagrams/merged", toolName, sep="/")
-if (!(dir.exists(output_dir))) {
-    dir.create(output_dir)
-}
-
-# constants
-workloads = c(
-  "PosLength",  "SwitchMonitored",
-  "RouteSensor", "SwitchSet",
-  "ConnectedSegments", "SemaphoreNeighbor"
-)
-phases = c("Read", "Check", "Read.and.Check", "Transformation", "Recheck", "Transformation.and.Recheck")
-phasesPrettified = c("Read", "Check", "Read and Check", "Transformation", "Recheck", "Transformation and Recheck")
-
-sizes = list()      # 1     2      4      8      16      32      64      128     256     512     1024    2048   4096
-sizes[["Repair"]] = c("8k", "15k", "33k", "66k", "135k", "271k", "566k", "1.1M", "2.2M", "4.6M", "9.3M", "18M", "37M")
-
-runList = read.csv(paste("../results/tools/", toolName, "/run-list.csv", sep=""), colClasses=c(rep("character",1)))
-
-# load the data
-tsvs = list.files(paste("../results/tools/", toolName, "/all-runs/", sep=""), pattern = "times-.*\\.csv", full.names = T, recursive = T)
-
-l = lapply(tsvs, read.csv)
-times = rbindlist(l)
-
-# preprocess the data
-times$Tool = factor(times$Tool, levels = runList$Runs)
-keep_descriptions_first_char(times)
-
-times$Model = gsub("\\D+", "", times$Model)
-times$Model = as.numeric(times$Model)
-times$Time = times$Time / 10^6
-# make the phases a factor with a fixed set of values to help dcasting
-# (e.g. Batch measurements do not have Transformation and Recheck attributes,
-# hence accessing the "Transformation" attribute would throw an error)
-times$Phase = factor(times$Phase, levels = c("Read", "Check", "Transformation", "Recheck"))
-
-times.wide = dcast(data = times,
-                   formula = Tool + Workload + Description + Model + Run ~ Phase,
-                   value.var = "Time",
-                   drop = T,
-                   fun.aggregate = mean
-)
-
-# calculate aggregated values
-times.derived = times.wide
-times.derived$Read.and.Check = times.derived$Read + times.derived$Check
-times.derived$Transformation.and.Recheck = times.derived$Transformation + times.derived$Recheck
-
-# calculate the median value of runs
-times.aggregated.runs = ddply(
-  .data = times.derived,
-  .variables = c("Tool", "Workload", "Description", "Model"),
-  .fun = colwise(median),
-  .progress = "text"
-)
-# drop the "Run" column
-times.aggregated.runs = subset(times.aggregated.runs, select = -c(Run))
-
-times.processed = melt(
-  data = times.aggregated.runs,
-  id.vars = c("Tool", "Workload", "Description", "Model"),
-  measure.vars = phases,
-  variable.name = "Phase",
-  value.name = "Time"
-)
-
-# beautify plotted record:
-# 1. change dots to spaces
-# 2. make sure that the phases are still factors
-times.plot = times.processed
-times.plot$Phase = gsub('\\.', ' ', times.plot$Phase)
-times.plot$Phase = factor(times.plot$Phase, levels = phasesPrettified)
-times.plot$Workload = factor(times.plot$Workload, levels = workloads)
-
-### line charts
-for (phase in phasesPrettified) {
-  phase.filename = gsub(' ', '-', phase)
-  workloadSizes = sizes[["Repair"]]
-
-  # filter the dataframe to the current phase
-  df = times.plot[times.plot$Phase == phase, ]
-
-  # do not visualize empty data sets
-  if (nrow(df) == 0) {
-    print(paste("No rows to visualize for phase", phase))
-    next
-  }
-
-  # x axis labels
-  xbreaks = unique(df$Model)
-  currentWorkloadSizes = head(workloadSizes, n=length(xbreaks))
-  xlabels = paste(xbreaks, "\n", currentWorkloadSizes, sep = "")
-
-  # drop every other models size
-  maxLabel = max(log2(max(df$Model)), 2)
-  if (maxLabel %% 2) {
-    start = 3
-  } else {
-    start = 2
-  }
-  filter = seq(start, maxLabel, by=2)
-
-  xlabels[filter] = ""
-
-  # y axis labels
-  yaxis = nice_y_axis()
-  ybreaks = yaxis$ybreaks
-  ylabels = yaxis$ylabels
-
-  p = ggplot(df) + #na.omit(df)) +
-    aes(x = as.factor(Model), y = Time) +
-    labs(title = paste("Individual query execution time,", phase, "phase, ", toolName), x = "Model size\n#Elements", y = "Execution times [ms]") +
-    geom_point(aes(col = Tool, shape = Tool), size = 2.0) +
-    scale_shape_manual(values = seq(0, 15)) +
-    geom_line(aes(col = Tool, group = Tool), size = 0.5) +
-    scale_x_discrete(breaks = xbreaks, labels = xlabels) +
-    scale_y_log10(breaks = ybreaks, labels = ylabels) +
-    guides(color = guide_legend(ncol = 4)) +
-    theme_bw() +
-    theme(
-      plot.title = element_text(hjust = 0.5),
-      text = element_text(size = 10),
-      legend.key = element_blank(),
-      legend.title = element_blank(),
-      legend.position = "bottom",
-      axis.text = element_text(size = 9)
-    )
-  print(p)
-  for (cpage in 1:6) {
-    ggsave(
-      plot = p + facet_grid_paginate(~ Workload, nrow=1, ncol = 1, page=cpage, scale = "free"),
-      filename = paste(output_dir, "/", toolName, "-", phase.filename, "-",workloads[cpage], ".pdf", sep=""),
-      width = 250, height = 150, units = "mm"
-    )
-  }
-}
diff --git a/trainbenchmark/trainbenchmark-reporting/toolwise.py b/trainbenchmark/trainbenchmark-reporting/toolwise.py
deleted file mode 100644
index 93a275357..000000000
--- a/trainbenchmark/trainbenchmark-reporting/toolwise.py
+++ /dev/null
@@ -1,33 +0,0 @@
-import argparse
-import json
-import logging
-import os.path
-import subprocess
-
-
-FORMAT = '%(asctime)s %(levelname)-8s %(threadName)-10s (%(filename)s:%(lineno)d): %(message)s'
-logger = logging.getLogger('toolwise')
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='Plot results per tool.')
-    parser.add_argument(
-        "-v", "--verbose", help="Print debug messages.", action="store_true")
-    parser.add_argument(
-        "-f", "--file-config", default='merge_results.json', help="Config file to use.")
-    args = parser.parse_args()
-    logging.basicConfig(format=FORMAT, level=logging.DEBUG if args.verbose else logging.INFO)
-    # load config file
-    with open('merge_results.json') as fdr:
-        content = json.load(fdr)
-    # update with local version, if existing
-    directory, basename = os.path.split(os.path.abspath('merge_results.json'))
-    local_config_file = os.path.join(directory, 'local-' + basename)
-    if os.path.exists(local_config_file):
-        with open(local_config_file) as fdr:
-            content.update(json.load(fdr))
-    else:
-        logger.debug('No local config file found.')
-    for tool in content.get('toolwise', []):
-        logging.info('Processing %s now.', tool)
-        subprocess.call(["Rscript", "toolwise.R", tool])
diff --git a/trainbenchmark/trainbenchmark-reporting/toolwise.sh b/trainbenchmark/trainbenchmark-reporting/toolwise.sh
deleted file mode 100755
index e64f70cd8..000000000
--- a/trainbenchmark/trainbenchmark-reporting/toolwise.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/bash
-python toolwise.py --verbose
-# --file-config <file>
-# --verbose
-- 
GitLab