Select Git revision
CMakeLists.txt
individual.R 5.20 KiB
library(data.table)
library(reshape2)
library(plyr)
library(ggplot2)
library(ggrepel)
library(arules)
library(ggforce)
source('util.R')
# constants
workloads = c(
"PosLength", "SwitchMonitored",
"RouteSensor", "SwitchSet",
"ConnectedSegments", "SemaphoreNeighbor"
)
phases = c("Read", "Check", "Read.and.Check", "Transformation", "Recheck", "Transformation.and.Recheck")
phasesPrettified = c("Read", "Check", "Read and Check", "Transformation", "Recheck", "Transformation and Recheck")
sizes = list() # 1 2 4 8 16 32 64 128 256 512 1024 2048 4096
sizes[["Repair"]] = c("8k", "15k", "33k", "66k", "135k", "271k", "566k", "1.1M", "2.2M", "4.6M", "9.3M", "18M", "37M")
toolList = read.csv("tool-list.csv", colClasses=c(rep("character",1)))
# load the data
tsvs = list.files("../results/recent", pattern = "times-.*\\.csv", full.names = T, recursive = T)
l = lapply(tsvs, read.csv)
times = rbindlist(l)
# preprocess the data
times$Tool = factor(times$Tool, levels = toolList$Tool)
keep_descriptions_first_char(times)
times$Model = gsub("\\D+", "", times$Model)
times$Model = as.numeric(times$Model)
times$Time = times$Time / 10^6
# make the phases a factor with a fixed set of values to help dcasting
# (e.g. Batch measurements do not have Transformation and Recheck attributes,
# hence accessing the "Transformation" attribute would throw an error)
times$Phase = factor(times$Phase, levels = c("Read", "Check", "Transformation", "Recheck"))
times.wide = dcast(data = times,
formula = Tool + Workload + Description + Model + Run ~ Phase,
value.var = "Time",
drop = T,
fun.aggregate = mean
)
# calculate aggregated values
times.derived = times.wide
times.derived$Read.and.Check = times.derived$Read + times.derived$Check
times.derived$Transformation.and.Recheck = times.derived$Transformation + times.derived$Recheck
# calculate the median value of runs
times.aggregated.runs = ddply(
.data = times.derived,
.variables = c("Tool", "Workload", "Description", "Model"),
.fun = colwise(median),
.progress = "text"
)
# drop the "Run" column
times.aggregated.runs = subset(times.aggregated.runs, select = -c(Run))
times.processed = melt(
data = times.aggregated.runs,
id.vars = c("Tool", "Workload", "Description", "Model"),
measure.vars = phases,
variable.name = "Phase",
value.name = "Time"
)
# beautify plotted record:
# 1. change dots to spaces
# 2. make sure that the phases are still factors
times.plot = times.processed
times.plot$Phase = gsub('\\.', ' ', times.plot$Phase)
times.plot$Phase = factor(times.plot$Phase, levels = phasesPrettified)
times.plot$Workload = factor(times.plot$Workload, levels = workloads)
### line charts
for (phase in phasesPrettified) {
phase.filename = gsub(' ', '-', phase)
workloadSizes = sizes[["Repair"]]
# filter the dataframe to the current phase
df = times.plot[times.plot$Phase == phase, ]
# do not visualize empty data sets
if (nrow(df) == 0) {
print(paste("No rows to visualize for phase", phase))
next
}
# x axis labels
xbreaks = unique(df$Model)
currentWorkloadSizes = head(workloadSizes, n=length(xbreaks))
xlabels = paste(xbreaks, "\n", currentWorkloadSizes, sep = "")
# drop every other models size
maxLabel = max(log2(max(df$Model)), 2)
if (maxLabel %% 2) {
start = 3
} else {
start = 2
}
filter = seq(start, maxLabel, by=2)
xlabels[filter] = ""
# y axis labels
yaxis = nice_y_axis()
ybreaks = yaxis$ybreaks
yminor_breaks = yaxis$yminor_breaks
ylabels = yaxis$ylabels
for (cpage in 1:6) {
p = ggplot(df) + #na.omit(df)) +
aes(x = as.factor(Model), y = Time) +
labs(title = paste("Individual query execution time,", phase, "phase"), x = "Model size\n#Elements", y = "Execution times [ms]") +
geom_point(aes(col = Tool, shape = Tool), size = 2.0) +
# scale_shape_manual(values = seq(0, 15)) +
scale_shape_manual(values = c(0, 15, 1, 16, 2, 17, 4, 7)) +
scale_colour_manual(values = c(
"#56B4E9", "#56B4E9", # Sky blue - JastAdd NameLookup
"#009E73", "#009E73", # Bluish green - JastAdd Optimized
"#0072B2", "#0072B2", # Blue - JastAdd Specialized
"#E69F00", # Orange - TinkerGraph
"#D55E00", # Vermillion - VIATRA
"#F0E442", # Yellow
"#CC79A7")) +
geom_line(aes(col = Tool, group = Tool), size = 0.5) +
scale_x_discrete(breaks = xbreaks, labels = xlabels) +
scale_y_log10(breaks = ybreaks, minor_breaks=yminor_breaks, labels = ylabels) +
facet_grid_paginate(~ Workload, nrow=1, ncol = 1, page=cpage, scale = "free") +
guides(color = guide_legend(ncol = 4)) +
theme_bw() +
theme(
plot.title = element_text(hjust = 0.5),
text = element_text(size = 10),
legend.key = element_blank(),
legend.title = element_blank(),
legend.position = "bottom",
axis.text = element_text(size = 9)
#, panel.grid.minor = element_blank()
)
print(p)
ggsave(
plot = p,
filename = paste("../diagrams/recent/individual-", phase.filename, "-",workloads[cpage], ".pdf", sep=""),
width = 280, height = 210, units = "mm"
)
}
}