update diagram drawing, remove unfinished toolwise option

939f5bdd · Johannes Mey · 21b8c45a · 939f5bdd · 939f5bdd · 939f5bdd
Commit 939f5bdd authored 5 years ago by Johannes Mey
--- a/trainbenchmark/trainbenchmark-reporting/build.gradle
+++ b/trainbenchmark/trainbenchmark-reporting/build.gradle
@@ -17,10 +17,3 @@ task plotIndividual(type: Exec) {
    commandLine 'Rscript', 'individual.R'
    dependsOn doMerge
 }
-task plotToolwise(type: Exec) {
-    group = 'Benchmark'
-    description = 'Plots the individual TrainBenchmark results per tool'
-    commandLine './toolwise.sh'
-    dependsOn doMerge
-}
--- a/trainbenchmark/trainbenchmark-reporting/combined.R
+++ b/trainbenchmark/trainbenchmark-reporting/combined.R
@@ -130,7 +130,7 @@ for (scenario in c("inject", "repair")) {
    p = ggplot(df) + #na.omit(df)) +
      aes(x = as.factor(Model), y = Time) +
-      labs(title = paste("Individual query execution time,", phase, "phase"), x = element_blank(), y = element_blank()) +#, x = "Model size\n#Elements", y = "Execution times [ms]") +
+      labs(title = paste("All", paste(toupper(substring(scenario, 1,1)), substring(scenario, 2), sep="", collapse=" "), "Queries in Sequence\n", phase, "Phase"), x = "Number of elements", y = "Execution times [ms]") +
      geom_point(aes(col = Tool, shape = Tool), size = 2.0) +
      scale_shape_manual( values = c(1,16, 0,15, 2,17, 5,18, 8,10),
@@ -174,96 +174,36 @@ for (scenario in c("inject", "repair")) {
      scale_y_log10(breaks = ybreaks, minor_breaks=NULL, labels = ylabels, limits = c(3e-3,3e4), expand = c(0, 0)) +
      theme_bw() +
      theme(
-        plot.title = element_blank(), # element_text(hjust = 0.5),
+        plot.title = element_text(hjust = 0.5),
        # text = element_text(family="Open Sans", size = 10),
        # legend.key = element_blank(),
        legend.title = element_blank(),
-        strip.text.x = element_blank(),
+        # strip.text.x = element_blank(),
-        legend.position = "none", # "none", #
+        legend.position = "bottom", # "none", #
        axis.text = element_text(size = 7)
        #, panel.grid.minor = element_blank()
      )
    fnTmp <- paste("../diagrams/recent/benchmark/", scenario, "/", phase.filename, "-", scenario, "-tmp.pdf", sep="")
    fn <- paste("../diagrams/recent/benchmark/", scenario, "/", phase.filename, "-", scenario, ".pdf", sep="")
-    if (phase == "Read") {
-      p = p + scale_y_log10(breaks = ybreaks, minor_breaks=NULL, labels = ylabels, limits = c(3e0,3e4), expand = c(0, 0)) +
-          scale_colour_manual(values = c( "#56B4E9", "#56B4E9", # Sky blue - Name Lookup
-          "#009E73", "#009E73", # Bluish green - Intrinsic References
-          "#0072B2", "#0072B2", # Blue - Manual Serialization
-          "#E69F00", "#E69F00", # Orange - RelAST
-          "#D55E00", # Vermillion - Tinkergraph
-          "#CC79A7", # ReddishPurple - VIATRA
-          "#F0E442"  # Yellow
-          ),
-          labels = c(
-          "Name Lookup / Reflection-based",
-          "Name Lookup (Incremental) / Reflection-based",
-          "Intrinsic References / Modified Reflection-based",
-          "Intrinsic References (Incremental) / Modified Reflection-based",
-          "Relational RAGs / Hand-written",
-          "Relational RAGs (Incremental) / Hand-written",
-          "Relational RAGs / Generated",
-          "Relational RAGs (Incremental) / Generated"
-          )
-          ) +
-          scale_shape_manual( values = c(1,16, 0,15, 2,17, 5,18, 8,10),
-          labels = c(
-          "Name Lookup / Reflection-based",
-          "Name Lookup (Incremental) / Reflection-based",
-          "Intrinsic References / Modified Reflection-based",
-          "Intrinsic References (Incremental) / Modified Reflection-based",
-          "Relational RAGs / Hand-written",
-          "Relational RAGs (Incremental) / Hand-written",
-          "Relational RAGs / Generated",
-          "Relational RAGs (Incremental) / Generated"
-          ),
-          guide=guide_legend(ncol=2,nrow=4)
-          ) +
-          theme(
-          plot.title = element_blank(), # element_text(hjust = 0.5),
-          # text = element_text(family="Open Sans", size = 10),
-          # legend.key = element_blank(),
-          legend.title = element_blank(),
-          strip.text.x = element_blank(),
-          legend.position = "none", # "none", #
-          axis.text = element_text(size = 9)
-          #, panel.grid.minor = element_blank()
-          )
-      p <- p +
-      # reverse ticks
-          annotation_logticks(sides = "l", short = unit(- 0.06, "cm"), mid = unit(- 0.10, "cm"), long = unit(- 0.14, "cm")) +
-      # remove clipping
-          coord_cartesian(clip = "off") +
-      # add space between ticks and labels
-          theme(axis.text.y = element_text(margin = margin(r = 2)))
-      ggsave(
-      plot = p,
-      filename = fnTmp,
-      width = 160,
-      height = 100,
-      units = "mm"
-      )
-    } else {
    p <- p +
    # reverse ticks
-          annotation_logticks(sides = "l", short = unit(- 0.07, "cm"), mid = unit(- 0.14, "cm"), long = unit(- 0.21, "cm")) +
+    #     annotation_logticks(sides = "l", short = unit(- 0.07, "cm"), mid = unit(- 0.14, "cm"), long = unit(- 0.21, "cm")) +
    # remove clipping
-          coord_cartesian(clip = "off") +
+    #     coord_cartesian(clip = "off") +
    # add space between ticks and labels
-            theme(axis.text.y = element_text(margin = margin(r = 4)))
+    #       theme(axis.text.y = element_text(margin = margin(r = 4)))
    ggsave(
      plot = p,
      filename = fnTmp,
-        width = 78,
+      width = 297,
-        height = 104,
+      height = 210,
      units = "mm"
    )
-    }
    embed_fonts(fnTmp, outfile=fn)
    file.remove(fnTmp)

--- a/trainbenchmark/trainbenchmark-reporting/individual.R
+++ b/trainbenchmark/trainbenchmark-reporting/individual.R
@@ -129,7 +129,7 @@ for (scenario in c("inject", "repair")) {
    for (cpage in 1:6) {
      p = ggplot(df) + #na.omit(df)) +
        aes(x = as.factor(Model), y = Time) +
-        labs(title = paste("Individual query execution time,", phase, "phase"), x = element_blank(), y = element_blank()) +#, x = "Model size\n#Elements") +#, y = "Execution times [ms]") +
+        labs(title = paste("Individual", workloads[cpage], paste(toupper(substring(scenario, 1,1)), substring(scenario, 2), sep="", collapse=" "), "Query\n", phase, "Phase"), x = "Number of elements", y = "Execution times [ms]") +
        geom_point(aes(col = Tool, shape = Tool), size = 2.0) +
        scale_shape_manual( values = c(1,16, 0,15, 2,17, 5,18, 8,10),
                            labels = c( "Name Lookup",
@@ -173,34 +173,36 @@ for (scenario in c("inject", "repair")) {
        facet_grid_paginate(~ Workload, nrow=1, ncol = 1, page=cpage, scale = "free") +
        theme_bw() +
        theme(
-          plot.title = element_blank(), # element_text(hjust = 0.5),
+          plot.title = element_text(hjust = 0.5),
          # text = element_text(family="Open Sans", size = 10),
          # legend.key = element_blank(),
          legend.title = element_blank(),
-          strip.text.x = element_blank(),
+          # strip.text.x = element_blank(),
-          legend.position = "none", # "none", #
+          legend.position = "bottom", # "none", #
          axis.text = element_text(size = 7)
          #, panel.grid.minor = element_blank()
      )
-        p <- p +
+      # p <- p +
      # reverse ticks
-            annotation_logticks(sides = "l", short = unit(- 0.06, "cm"), mid = unit(- 0.10, "cm"), long = unit(- 0.14, "cm")) +
+      #    annotation_logticks(sides = "l", short = unit(- 0.06, "cm"), mid = unit(- 0.10, "cm"), long = unit(- 0.14, "cm")) +
      # remove clipping
-            coord_cartesian(clip = "off") +
+      #    coord_cartesian(clip = "off") +
      # add space between ticks and labels
-            theme(axis.text.y = element_text(margin = margin(r = 2)))
+      #    theme(axis.text.y = element_text(margin = margin(r = 2)))
        fnTmp <- paste("../diagrams/recent/", scenario, "/", phase.filename, "-", workloads[cpage], "-", scenario, "-tmp.pdf", sep = "")
        fn <- paste("../diagrams/recent/", scenario, "/", phase.filename, "-", workloads[cpage], "-", scenario, ".pdf", sep = "")
        ggsave(
        plot = p,
        filename = fnTmp,
-        width = 78,
+        width = 297,
-        height = 104,
+        height = 210,
        units = "mm"
        )
        embed_fonts(fnTmp, outfile=fn)
        file.remove(fnTmp)
    }
  }
 }
--- a/trainbenchmark/trainbenchmark-reporting/toolwise.R
+++ b/trainbenchmark/trainbenchmark-reporting/toolwise.R
-library(data.table)
-library(reshape2)
-library(plyr)
-library(ggplot2)
-library(ggrepel)
-library(arules)
-library(ggforce)
-source('util.R')
-args = commandArgs(trailingOnly=TRUE)
-if (length(args)==0) {
-  stop("At least one argument must be supplied (tool-name).\n", call.=FALSE)
-}
-toolName = args[1]
-# prepare output directory
-output_dir = paste("../diagrams/merged", toolName, sep="/")
-if (!(dir.exists(output_dir))) {
-    dir.create(output_dir)
-}
-# constants
-workloads = c(
-  "PosLength",  "SwitchMonitored",
-  "RouteSensor", "SwitchSet",
-  "ConnectedSegments", "SemaphoreNeighbor"
-)
-phases = c("Read", "Check", "Read.and.Check", "Transformation", "Recheck", "Transformation.and.Recheck")
-phasesPrettified = c("Read", "Check", "Read and Check", "Transformation", "Recheck", "Transformation and Recheck")
-sizes = list()      # 1     2      4      8      16      32      64      128     256     512     1024    2048   4096
-sizes[["Repair"]] = c("8k", "15k", "33k", "66k", "135k", "271k", "566k", "1.1M", "2.2M", "4.6M", "9.3M", "18M", "37M")
-runList = read.csv(paste("../results/tools/", toolName, "/run-list.csv", sep=""), colClasses=c(rep("character",1)))
-# load the data
-tsvs = list.files(paste("../results/tools/", toolName, "/all-runs/", sep=""), pattern = "times-.*\\.csv", full.names = T, recursive = T)
-l = lapply(tsvs, read.csv)
-times = rbindlist(l)
-# preprocess the data
-times$Tool = factor(times$Tool, levels = runList$Runs)
-keep_descriptions_first_char(times)
-times$Model = gsub("\\D+", "", times$Model)
-times$Model = as.numeric(times$Model)
-times$Time = times$Time / 10^6
-# make the phases a factor with a fixed set of values to help dcasting
-# (e.g. Batch measurements do not have Transformation and Recheck attributes,
-# hence accessing the "Transformation" attribute would throw an error)
-times$Phase = factor(times$Phase, levels = c("Read", "Check", "Transformation", "Recheck"))
-times.wide = dcast(data = times,
-                   formula = Tool + Workload + Description + Model + Run ~ Phase,
-                   value.var = "Time",
-                   drop = T,
-                   fun.aggregate = mean
-)
-# calculate aggregated values
-times.derived = times.wide
-times.derived$Read.and.Check = times.derived$Read + times.derived$Check
-times.derived$Transformation.and.Recheck = times.derived$Transformation + times.derived$Recheck
-# calculate the median value of runs
-times.aggregated.runs = ddply(
-  .data = times.derived,
-  .variables = c("Tool", "Workload", "Description", "Model"),
-  .fun = colwise(median),
-  .progress = "text"
-)
-# drop the "Run" column
-times.aggregated.runs = subset(times.aggregated.runs, select = -c(Run))
-times.processed = melt(
-  data = times.aggregated.runs,
-  id.vars = c("Tool", "Workload", "Description", "Model"),
-  measure.vars = phases,
-  variable.name = "Phase",
-  value.name = "Time"
-)
-# beautify plotted record:
-# 1. change dots to spaces
-# 2. make sure that the phases are still factors
-times.plot = times.processed
-times.plot$Phase = gsub('\\.', ' ', times.plot$Phase)
-times.plot$Phase = factor(times.plot$Phase, levels = phasesPrettified)
-times.plot$Workload = factor(times.plot$Workload, levels = workloads)
-### line charts
-for (phase in phasesPrettified) {
-  phase.filename = gsub(' ', '-', phase)
-  workloadSizes = sizes[["Repair"]]
-  # filter the dataframe to the current phase
-  df = times.plot[times.plot$Phase == phase, ]
-  # do not visualize empty data sets
-  if (nrow(df) == 0) {
-    print(paste("No rows to visualize for phase", phase))
-    next
-  }
-  # x axis labels
-  xbreaks = unique(df$Model)
-  currentWorkloadSizes = head(workloadSizes, n=length(xbreaks))
-  xlabels = paste(xbreaks, "\n", currentWorkloadSizes, sep = "")
-  # drop every other models size
-  maxLabel = max(log2(max(df$Model)), 2)
-  if (maxLabel %% 2) {
-    start = 3
-  } else {
-    start = 2
-  }
-  filter = seq(start, maxLabel, by=2)
-  xlabels[filter] = ""
-  # y axis labels
-  yaxis = nice_y_axis()
-  ybreaks = yaxis$ybreaks
-  ylabels = yaxis$ylabels
-  p = ggplot(df) + #na.omit(df)) +
-    aes(x = as.factor(Model), y = Time) +
-    labs(title = paste("Individual query execution time,", phase, "phase, ", toolName), x = "Model size\n#Elements", y = "Execution times [ms]") +
-    geom_point(aes(col = Tool, shape = Tool), size = 2.0) +
-    scale_shape_manual(values = seq(0, 15)) +
-    geom_line(aes(col = Tool, group = Tool), size = 0.5) +
-    scale_x_discrete(breaks = xbreaks, labels = xlabels) +
-    scale_y_log10(breaks = ybreaks, labels = ylabels) +
-    guides(color = guide_legend(ncol = 4)) +
-    theme_bw() +
-    theme(
-      plot.title = element_text(hjust = 0.5),
-      text = element_text(size = 10),
-      legend.key = element_blank(),
-      legend.title = element_blank(),
-      legend.position = "bottom",
-      axis.text = element_text(size = 9)
-    )
-  print(p)
-  for (cpage in 1:6) {
-    ggsave(
-      plot = p + facet_grid_paginate(~ Workload, nrow=1, ncol = 1, page=cpage, scale = "free"),
-      filename = paste(output_dir, "/", toolName, "-", phase.filename, "-",workloads[cpage], ".pdf", sep=""),
-      width = 250, height = 150, units = "mm"
-    )
-  }
-}
--- a/trainbenchmark/trainbenchmark-reporting/toolwise.py
+++ b/trainbenchmark/trainbenchmark-reporting/toolwise.py
-import argparse
-import json
-import logging
-import os.path
-import subprocess
-FORMAT = '%(asctime)s %(levelname)-8s %(threadName)-10s (%(filename)s:%(lineno)d): %(message)s'
-logger = logging.getLogger('toolwise')
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='Plot results per tool.')
-    parser.add_argument(
-        "-v", "--verbose", help="Print debug messages.", action="store_true")
-    parser.add_argument(
-        "-f", "--file-config", default='merge_results.json', help="Config file to use.")
-    args = parser.parse_args()
-    logging.basicConfig(format=FORMAT, level=logging.DEBUG if args.verbose else logging.INFO)
-    # load config file
-    with open('merge_results.json') as fdr:
-        content = json.load(fdr)
-    # update with local version, if existing
-    directory, basename = os.path.split(os.path.abspath('merge_results.json'))
-    local_config_file = os.path.join(directory, 'local-' + basename)
-    if os.path.exists(local_config_file):
-        with open(local_config_file) as fdr:
-            content.update(json.load(fdr))
-    else:
-        logger.debug('No local config file found.')
-    for tool in content.get('toolwise', []):
-        logging.info('Processing %s now.', tool)
-        subprocess.call(["Rscript", "toolwise.R", tool])
--- a/trainbenchmark/trainbenchmark-reporting/toolwise.sh
+++ b/trainbenchmark/trainbenchmark-reporting/toolwise.sh
-#!/bin/bash
-python toolwise.py --verbose
-# --file-config <file>
-# --verbose