Skip to content
Snippets Groups Projects
Select Git revision
  • 25723ff15d4accac1c07df4b1690acfdef6c0608
  • clf default protected
  • kinetic
  • hydro
  • indigo
  • obsolete/master
  • groovy
  • 0.3.2
  • 0.3.1
  • 0.3.0
  • 0.1.35
  • 0.2.4
  • 0.2.3
  • 0.2.2
  • 0.2.1
  • 0.1.34
  • 0.1.33
  • 0.1.32
  • 0.1.31
  • 0.1.30
  • 0.1.29
  • 0.1.28
  • 0.1.27
  • 0.2.0
  • 0.1.26
  • 0.1.25
  • 0.1.24
27 results

CMakeLists.txt

Blame
  • individual.R 5.20 KiB
    library(data.table)
    library(reshape2)
    library(plyr)
    library(ggplot2)
    library(ggrepel)
    library(arules)
    library(ggforce)
    
    source('util.R')
    
    # constants
    workloads = c(
      "PosLength",  "SwitchMonitored",
      "RouteSensor", "SwitchSet",
      "ConnectedSegments", "SemaphoreNeighbor"
    )
    phases = c("Read", "Check", "Read.and.Check", "Transformation", "Recheck", "Transformation.and.Recheck")
    phasesPrettified = c("Read", "Check", "Read and Check", "Transformation", "Recheck", "Transformation and Recheck")
    
    sizes = list()      # 1     2      4      8      16      32      64      128     256     512     1024    2048   4096
    sizes[["Repair"]] = c("8k", "15k", "33k", "66k", "135k", "271k", "566k", "1.1M", "2.2M", "4.6M", "9.3M", "18M", "37M")
    
    toolList = read.csv("tool-list.csv", colClasses=c(rep("character",1)))
    
    # load the data
    tsvs = list.files("../results/recent", pattern = "times-.*\\.csv", full.names = T, recursive = T)
    
    l = lapply(tsvs, read.csv)
    times = rbindlist(l)
    
    # preprocess the data
    times$Tool = factor(times$Tool, levels = toolList$Tool)
    keep_descriptions_first_char(times)
    
    times$Model = gsub("\\D+", "", times$Model)
    times$Model = as.numeric(times$Model)
    times$Time = times$Time / 10^6
    # make the phases a factor with a fixed set of values to help dcasting
    # (e.g. Batch measurements do not have Transformation and Recheck attributes,
    # hence accessing the "Transformation" attribute would throw an error)
    times$Phase = factor(times$Phase, levels = c("Read", "Check", "Transformation", "Recheck"))
    
    times.wide = dcast(data = times,
                       formula = Tool + Workload + Description + Model + Run ~ Phase,
                       value.var = "Time",
                       drop = T,
                       fun.aggregate = mean
    )
    
    # calculate aggregated values
    times.derived = times.wide
    times.derived$Read.and.Check = times.derived$Read + times.derived$Check
    times.derived$Transformation.and.Recheck = times.derived$Transformation + times.derived$Recheck
    
    # calculate the median value of runs
    times.aggregated.runs = ddply(
      .data = times.derived,
      .variables = c("Tool", "Workload", "Description", "Model"),
      .fun = colwise(median),
      .progress = "text"
    )
    # drop the "Run" column
    times.aggregated.runs = subset(times.aggregated.runs, select = -c(Run))
    
    times.processed = melt(
      data = times.aggregated.runs,
      id.vars = c("Tool", "Workload", "Description", "Model"),
      measure.vars = phases,
      variable.name = "Phase",
      value.name = "Time"
    )
    
    # beautify plotted record:
    # 1. change dots to spaces
    # 2. make sure that the phases are still factors
    times.plot = times.processed
    times.plot$Phase = gsub('\\.', ' ', times.plot$Phase)
    times.plot$Phase = factor(times.plot$Phase, levels = phasesPrettified)
    times.plot$Workload = factor(times.plot$Workload, levels = workloads)
    
    ### line charts
    for (phase in phasesPrettified) {
      phase.filename = gsub(' ', '-', phase)
      workloadSizes = sizes[["Repair"]]
    
      # filter the dataframe to the current phase
      df = times.plot[times.plot$Phase == phase, ]
    
      # do not visualize empty data sets
      if (nrow(df) == 0) {
        print(paste("No rows to visualize for phase", phase))
        next
      }
    
      # x axis labels
      xbreaks = unique(df$Model)
      currentWorkloadSizes = head(workloadSizes, n=length(xbreaks))
      xlabels = paste(xbreaks, "\n", currentWorkloadSizes, sep = "")
    
      # drop every other models size
      maxLabel = max(log2(max(df$Model)), 2)
      if (maxLabel %% 2) {
        start = 3
      } else {
        start = 2
      }
      filter = seq(start, maxLabel, by=2)
    
      xlabels[filter] = ""
    
      # y axis labels
      yaxis = nice_y_axis()
      ybreaks = yaxis$ybreaks
      yminor_breaks = yaxis$yminor_breaks
      ylabels = yaxis$ylabels
    
      for (cpage in 1:6) {
        p = ggplot(df) + #na.omit(df)) +
          aes(x = as.factor(Model), y = Time) +
          labs(title = paste("Individual query execution time,", phase, "phase"), x = "Model size\n#Elements", y = "Execution times [ms]") +
          geom_point(aes(col = Tool, shape = Tool), size = 2.0) +
          # scale_shape_manual(values = seq(0, 15)) +
          scale_shape_manual(values = c(0, 15, 1, 16, 2, 17, 4, 7)) +
          scale_colour_manual(values = c(
              "#56B4E9", "#56B4E9", # Sky blue - JastAdd NameLookup
              "#009E73", "#009E73", # Bluish green - JastAdd Optimized
              "#0072B2", "#0072B2", # Blue - JastAdd Specialized
              "#E69F00", # Orange - TinkerGraph
              "#D55E00", # Vermillion - VIATRA
              "#F0E442", # Yellow
              "#CC79A7")) +    
          geom_line(aes(col = Tool, group = Tool), size = 0.5) +
          scale_x_discrete(breaks = xbreaks, labels = xlabels) +
          scale_y_log10(breaks = ybreaks, minor_breaks=yminor_breaks, labels = ylabels) +
          facet_grid_paginate(~ Workload, nrow=1, ncol = 1, page=cpage, scale = "free") +
          guides(color = guide_legend(ncol = 4)) +
          theme_bw() +
          theme(
            plot.title = element_text(hjust = 0.5),
            text = element_text(size = 10),
            legend.key = element_blank(),
            legend.title = element_blank(),
            legend.position = "bottom",
            axis.text = element_text(size = 9)
    	#, panel.grid.minor = element_blank()
          )
        print(p)
        ggsave(
          plot = p,
          filename = paste("../diagrams/recent/individual-", phase.filename, "-",workloads[cpage], ".pdf", sep=""),
          width = 280, height = 210, units = "mm"
        )
      }
    
    }