diff --git a/trainbenchmark/trainbenchmark-reporting/individual.R b/trainbenchmark/trainbenchmark-reporting/individual.R index 075533c2bec65d6f536e226b1a213b95d2fab6e9..9d9a1f67a5f3c3949cd8c37a4cde2b466006cdf2 100644 --- a/trainbenchmark/trainbenchmark-reporting/individual.R +++ b/trainbenchmark/trainbenchmark-reporting/individual.R @@ -29,153 +29,156 @@ sizes[["Repair"]] = c("8k", "15k", "33k", "66k", "135k", "271k", "566k", "1.1M", toolList = read.csv("tool-list.csv") -# load the data -tsvs = list.files("../results/merged/individual", pattern = "times-.*\\.csv", full.names = T, recursive = T) - -l = lapply(tsvs, read.csv) -times = rbindlist(l) - -# preprocess the data -times$Tool = factor(times$Tool, levels = toolList$Tool) -keep_descriptions_first_char(times) - -times$Model = gsub("\\D+", "", times$Model) -times$Model = as.numeric(times$Model) -times$Time = times$Time / 10^6 -# make the phases a factor with a fixed set of values to help dcasting -# (e.g. Batch measurements do not have Transformation and Recheck attributes, -# hence accessing the "Transformation" attribute would throw an error) -times$Phase = factor(times$Phase, levels = c("Read", "Check", "Transformation", "Recheck")) - -times.wide = dcast(data = times, - formula = Tool + Workload + Description + Model + Run ~ Phase, - value.var = "Time", - drop = T, - fun.aggregate = mean -) - -# calculate aggregated values -times.derived = times.wide -times.derived$Read.and.Check = times.derived$Read + times.derived$Check -times.derived$Transformation.and.Recheck = times.derived$Transformation + times.derived$Recheck - -# calculate the median value of runs -times.aggregated.runs = ddply( - .data = times.derived, - .variables = c("Tool", "Workload", "Description", "Model"), - .fun = colwise(median), - .progress = "text" -) -# drop the "Run" column -times.aggregated.runs = subset(times.aggregated.runs, select = -c(Run)) - -times.processed = melt( - data = times.aggregated.runs, - id.vars = c("Tool", "Workload", "Description", "Model"), - measure.vars = phases, - variable.name = "Phase", - value.name = "Time" -) - -# beautify plotted record: -# 1. change dots to spaces -# 2. make sure that the phases are still factors -times.plot = times.processed -times.plot$Phase = gsub('\\.', ' ', times.plot$Phase) -times.plot$Phase = factor(times.plot$Phase, levels = phasesPrettified) -times.plot$Workload = factor(times.plot$Workload, levels = workloads) - -### line charts -for (phase in phasesPrettified) { - phase.filename = gsub(' ', '-', phase) - workloadSizes = sizes[["Repair"]] - - # filter the dataframe to the current phase - df = times.plot[times.plot$Phase == phase, ] - - # do not visualize empty data sets - if (nrow(df) == 0) { - print(paste("No rows to visualize for phase", phase)) - next - } - - # x axis labels - xbreaks = unique(df$Model) - currentWorkloadSizes = head(workloadSizes, n=length(xbreaks)) - xlabels = paste(xbreaks, "\n", currentWorkloadSizes, sep = "") - - # drop every other models size - maxLabel = max(log2(max(df$Model)), 2) - if (maxLabel %% 2) { - start = 3 - } else { - start = 2 - } - filter = seq(start, maxLabel, by=2) - - xlabels[filter] = "" - - # y axis labels - yaxis = nice_y_axis() - ybreaks = yaxis$ybreaks - yminor_breaks = yaxis$yminor_breaks - ylabels = yaxis$ylabels - - for (cpage in 1:6) { - p = ggplot(df) + #na.omit(df)) + - aes(x = as.factor(Model), y = Time) + - labs(title = paste("Individual query execution time,", phase, "phase"), x = "Model size\n#Elements", y = "Execution times [ms]") + - geom_point(aes(col = Tool, shape = Tool), size = 2.0) + - # scale_shape_manual(values = seq(0, 15)) + - # scale_shape_manual(#values = c(0, 15, 1, 16, 2, 17, 4, 7), - # # labels=c( - # # "Name Lookup ", "Name Lookup (Incremental)", # http://www.sthda.com/english/wiki/r-plot-pch-symbols-the-different-point-shapes-available-in-r - JastAdd NameLookup - # # "Intrinsic References", "Intrinsic References (Incremental)", # Tud-cyan - JastAdd Optimized - # # "Grammar Extension", "Grammar Extension (Incremental)", # HKS65 (green) - JastAdd Specialized - # # "TinkerGraph", # HKS92 (grey) - TinkerGraph - # # "Viatra (Incremental)", # HKS33 (violet) - VIATRA - # # "(none)" - # # ) - # ) + - # scale_colour_manual( - # values = c( - # "#EE7F00", "#EE7F00", # HKS07 (orange) - JastAdd NameLookup - # "#009EE0", "#009EE0", # Tud-cyan - JastAdd Optimized - # "#6AB023", "#6AB023", # HKS65 (green) - JastAdd Specialized - # "#727879", # HKS92 (grey) - TinkerGraph - # "#93107E", # HKS33 (violet) - VIATRA - # "#F0E442", # Yellow - # "#CC79A7"), - # labels=c( - # "Name Lookup ", "Name Lookup (Incremental)", # HKS07 (orange) - JastAdd NameLookup - # "Intrinsic References", "Intrinsic References (Incremental)", # Tud-cyan - JastAdd Optimized - # "Grammar Extension", "Grammar Extension (Incremental)", # HKS65 (green) - JastAdd Specialized - # "TinkerGraph", # HKS92 (grey) - TinkerGraph - # "Viatra (Incremental)", # HKS33 (violet) - VIATRA - # "(none)" - # )) + - geom_line(aes(col = Tool, group = Tool), size = 0.5) + - scale_x_discrete(breaks = xbreaks, labels = xlabels) + - scale_y_log10(breaks = ybreaks, minor_breaks=yminor_breaks, labels = ylabels, limits = c(2e-3,8e4), expand = c(0, 0)) + - facet_grid_paginate(~ Workload, nrow=1, ncol = 1, page=cpage, scale = "free") + - guides(color = guide_legend(ncol = 5)) + - theme_bw() + - theme( - plot.title = element_text(hjust = 0.5), - # text = element_text(family="Open Sans", size = 10), - # legend.key = element_blank(), - legend.title = element_blank(), - legend.position = "bottom", - axis.text = element_text(size = 16) - #, panel.grid.minor = element_blank() +for (scenario in c("inject", "repair")) { + + # load the data + tsvs = list.files("../results/merged/individual", pattern = paste("times-.*", scenario, ".*\\.csv", sep=""), full.names = T, recursive = T) + + l = lapply(tsvs, read.csv) + times = rbindlist(l) + + # preprocess the data + times$Tool = factor(times$Tool, levels = toolList$Tool) + keep_descriptions_first_char(times) + + times$Model = gsub("\\D+", "", times$Model) + times$Model = as.numeric(times$Model) + times$Time = times$Time / 10^6 + # make the phases a factor with a fixed set of values to help dcasting + # (e.g. Batch measurements do not have Transformation and Recheck attributes, + # hence accessing the "Transformation" attribute would throw an error) + times$Phase = factor(times$Phase, levels = c("Read", "Check", "Transformation", "Recheck")) + + times.wide = dcast(data = times, + formula = Tool + Workload + Description + Model + Run ~ Phase, + value.var = "Time", + drop = T, + fun.aggregate = mean + ) + + # calculate aggregated values + times.derived = times.wide + times.derived$Read.and.Check = times.derived$Read + times.derived$Check + times.derived$Transformation.and.Recheck = times.derived$Transformation + times.derived$Recheck + + # calculate the median value of runs + times.aggregated.runs = ddply( + .data = times.derived, + .variables = c("Tool", "Workload", "Description", "Model"), + .fun = colwise(median), + .progress = "text" + ) + # drop the "Run" column + times.aggregated.runs = subset(times.aggregated.runs, select = -c(Run)) + + times.processed = melt( + data = times.aggregated.runs, + id.vars = c("Tool", "Workload", "Description", "Model"), + measure.vars = phases, + variable.name = "Phase", + value.name = "Time" + ) + + # beautify plotted record: + # 1. change dots to spaces + # 2. make sure that the phases are still factors + times.plot = times.processed + times.plot$Phase = gsub('\\.', ' ', times.plot$Phase) + times.plot$Phase = factor(times.plot$Phase, levels = phasesPrettified) + times.plot$Workload = factor(times.plot$Workload, levels = workloads) + + ### line charts + for (phase in phasesPrettified) { + phase.filename = gsub(' ', '-', phase) + workloadSizes = sizes[["Repair"]] + + # filter the dataframe to the current phase + df = times.plot[times.plot$Phase == phase, ] + + # do not visualize empty data sets + if (nrow(df) == 0) { + print(paste("No rows to visualize for phase", phase)) + next + } + + # x axis labels + xbreaks = unique(df$Model) + currentWorkloadSizes = head(workloadSizes, n=length(xbreaks)) + xlabels = paste(xbreaks, "\n", currentWorkloadSizes, sep = "") + + # drop every other models size + maxLabel = max(log2(max(df$Model)), 2) + if (maxLabel %% 2) { + start = 3 + } else { + start = 2 + } + filter = seq(start, maxLabel, by=2) + + xlabels[filter] = "" + + # y axis labels + yaxis = nice_y_axis() + ybreaks = yaxis$ybreaks + yminor_breaks = yaxis$yminor_breaks + ylabels = yaxis$ylabels + + for (cpage in 1:6) { + p = ggplot(df) + #na.omit(df)) + + aes(x = as.factor(Model), y = Time) + + labs(title = paste("Individual query execution time,", phase, "phase"), x = "Model size\n#Elements", y = "Execution times [ms]") + + geom_point(aes(col = Tool, shape = Tool), size = 2.0) + + # scale_shape_manual(values = seq(0, 15)) + + # scale_shape_manual(#values = c(0, 15, 1, 16, 2, 17, 4, 7), + # # labels=c( + # # "Name Lookup ", "Name Lookup (Incremental)", # http://www.sthda.com/english/wiki/r-plot-pch-symbols-the-different-point-shapes-available-in-r - JastAdd NameLookup + # # "Intrinsic References", "Intrinsic References (Incremental)", # Tud-cyan - JastAdd Optimized + # # "Grammar Extension", "Grammar Extension (Incremental)", # HKS65 (green) - JastAdd Specialized + # # "TinkerGraph", # HKS92 (grey) - TinkerGraph + # # "Viatra (Incremental)", # HKS33 (violet) - VIATRA + # # "(none)" + # # ) + # ) + + # scale_colour_manual( + # values = c( + # "#EE7F00", "#EE7F00", # HKS07 (orange) - JastAdd NameLookup + # "#009EE0", "#009EE0", # Tud-cyan - JastAdd Optimized + # "#6AB023", "#6AB023", # HKS65 (green) - JastAdd Specialized + # "#727879", # HKS92 (grey) - TinkerGraph + # "#93107E", # HKS33 (violet) - VIATRA + # "#F0E442", # Yellow + # "#CC79A7"), + # labels=c( + # "Name Lookup ", "Name Lookup (Incremental)", # HKS07 (orange) - JastAdd NameLookup + # "Intrinsic References", "Intrinsic References (Incremental)", # Tud-cyan - JastAdd Optimized + # "Grammar Extension", "Grammar Extension (Incremental)", # HKS65 (green) - JastAdd Specialized + # "TinkerGraph", # HKS92 (grey) - TinkerGraph + # "Viatra (Incremental)", # HKS33 (violet) - VIATRA + # "(none)" + # )) + + geom_line(aes(col = Tool, group = Tool), size = 0.5) + + scale_x_discrete(breaks = xbreaks, labels = xlabels) + + scale_y_log10(breaks = ybreaks, minor_breaks=yminor_breaks, labels = ylabels, limits = c(2e-3,8e4), expand = c(0, 0)) + + facet_grid_paginate(~ Workload, nrow=1, ncol = 1, page=cpage, scale = "free") + + guides(color = guide_legend(ncol = 5)) + + theme_bw() + + theme( + plot.title = element_text(hjust = 0.5), + # text = element_text(family="Open Sans", size = 10), + # legend.key = element_blank(), + legend.title = element_blank(), + legend.position = "bottom", + axis.text = element_text(size = 16) + #, panel.grid.minor = element_blank() + ) + print(p) + ggsave( + plot = p, + filename = paste("../diagrams/recent/", scenario, "/", phase.filename, "-", workloads[cpage], "-", scenario, ".pdf", sep=""), + width = 500, height = 300, units = "mm" ) - print(p) - ggsave( - plot = p, - filename = paste("../diagrams/recent/", phase.filename, "-",workloads[cpage], ".pdf", sep=""), - width = 500, height = 300, units = "mm" - ) - embed_fonts(paste("../diagrams/recent/", phase.filename, "-",workloads[cpage], ".pdf", sep=""), outfile=paste("../diagrams/recent/", phase.filename, "-",workloads[cpage], "-embed", ".pdf", sep="")) - } + embed_fonts(paste("../diagrams/recent/", scenario, "/", phase.filename, "-", workloads[cpage], "-", scenario, ".pdf", sep=""), outfile=paste("../diagrams/recent/", scenario, "/", phase.filename, "-",workloads[cpage], "-", scenario, "-embed", ".pdf", sep="")) + } -} + } +} \ No newline at end of file