Skip to content
Snippets Groups Projects
Commit 91bf83a3 authored by Johannes Mey's avatar Johannes Mey
Browse files

create inject AND repair diagrams

parent b9e044fe
No related branches found
No related tags found
No related merge requests found
...@@ -29,153 +29,156 @@ sizes[["Repair"]] = c("8k", "15k", "33k", "66k", "135k", "271k", "566k", "1.1M", ...@@ -29,153 +29,156 @@ sizes[["Repair"]] = c("8k", "15k", "33k", "66k", "135k", "271k", "566k", "1.1M",
toolList = read.csv("tool-list.csv") toolList = read.csv("tool-list.csv")
# load the data for (scenario in c("inject", "repair")) {
tsvs = list.files("../results/merged/individual", pattern = "times-.*\\.csv", full.names = T, recursive = T)
# load the data
l = lapply(tsvs, read.csv) tsvs = list.files("../results/merged/individual", pattern = paste("times-.*", scenario, ".*\\.csv", sep=""), full.names = T, recursive = T)
times = rbindlist(l)
l = lapply(tsvs, read.csv)
# preprocess the data times = rbindlist(l)
times$Tool = factor(times$Tool, levels = toolList$Tool)
keep_descriptions_first_char(times) # preprocess the data
times$Tool = factor(times$Tool, levels = toolList$Tool)
times$Model = gsub("\\D+", "", times$Model) keep_descriptions_first_char(times)
times$Model = as.numeric(times$Model)
times$Time = times$Time / 10^6 times$Model = gsub("\\D+", "", times$Model)
# make the phases a factor with a fixed set of values to help dcasting times$Model = as.numeric(times$Model)
# (e.g. Batch measurements do not have Transformation and Recheck attributes, times$Time = times$Time / 10^6
# hence accessing the "Transformation" attribute would throw an error) # make the phases a factor with a fixed set of values to help dcasting
times$Phase = factor(times$Phase, levels = c("Read", "Check", "Transformation", "Recheck")) # (e.g. Batch measurements do not have Transformation and Recheck attributes,
# hence accessing the "Transformation" attribute would throw an error)
times.wide = dcast(data = times, times$Phase = factor(times$Phase, levels = c("Read", "Check", "Transformation", "Recheck"))
formula = Tool + Workload + Description + Model + Run ~ Phase,
value.var = "Time", times.wide = dcast(data = times,
drop = T, formula = Tool + Workload + Description + Model + Run ~ Phase,
fun.aggregate = mean value.var = "Time",
) drop = T,
fun.aggregate = mean
# calculate aggregated values )
times.derived = times.wide
times.derived$Read.and.Check = times.derived$Read + times.derived$Check # calculate aggregated values
times.derived$Transformation.and.Recheck = times.derived$Transformation + times.derived$Recheck times.derived = times.wide
times.derived$Read.and.Check = times.derived$Read + times.derived$Check
# calculate the median value of runs times.derived$Transformation.and.Recheck = times.derived$Transformation + times.derived$Recheck
times.aggregated.runs = ddply(
.data = times.derived, # calculate the median value of runs
.variables = c("Tool", "Workload", "Description", "Model"), times.aggregated.runs = ddply(
.fun = colwise(median), .data = times.derived,
.progress = "text" .variables = c("Tool", "Workload", "Description", "Model"),
) .fun = colwise(median),
# drop the "Run" column .progress = "text"
times.aggregated.runs = subset(times.aggregated.runs, select = -c(Run)) )
# drop the "Run" column
times.processed = melt( times.aggregated.runs = subset(times.aggregated.runs, select = -c(Run))
data = times.aggregated.runs,
id.vars = c("Tool", "Workload", "Description", "Model"), times.processed = melt(
measure.vars = phases, data = times.aggregated.runs,
variable.name = "Phase", id.vars = c("Tool", "Workload", "Description", "Model"),
value.name = "Time" measure.vars = phases,
) variable.name = "Phase",
value.name = "Time"
# beautify plotted record: )
# 1. change dots to spaces
# 2. make sure that the phases are still factors # beautify plotted record:
times.plot = times.processed # 1. change dots to spaces
times.plot$Phase = gsub('\\.', ' ', times.plot$Phase) # 2. make sure that the phases are still factors
times.plot$Phase = factor(times.plot$Phase, levels = phasesPrettified) times.plot = times.processed
times.plot$Workload = factor(times.plot$Workload, levels = workloads) times.plot$Phase = gsub('\\.', ' ', times.plot$Phase)
times.plot$Phase = factor(times.plot$Phase, levels = phasesPrettified)
### line charts times.plot$Workload = factor(times.plot$Workload, levels = workloads)
for (phase in phasesPrettified) {
phase.filename = gsub(' ', '-', phase) ### line charts
workloadSizes = sizes[["Repair"]] for (phase in phasesPrettified) {
phase.filename = gsub(' ', '-', phase)
# filter the dataframe to the current phase workloadSizes = sizes[["Repair"]]
df = times.plot[times.plot$Phase == phase, ]
# filter the dataframe to the current phase
# do not visualize empty data sets df = times.plot[times.plot$Phase == phase, ]
if (nrow(df) == 0) {
print(paste("No rows to visualize for phase", phase)) # do not visualize empty data sets
next if (nrow(df) == 0) {
} print(paste("No rows to visualize for phase", phase))
next
# x axis labels }
xbreaks = unique(df$Model)
currentWorkloadSizes = head(workloadSizes, n=length(xbreaks)) # x axis labels
xlabels = paste(xbreaks, "\n", currentWorkloadSizes, sep = "") xbreaks = unique(df$Model)
currentWorkloadSizes = head(workloadSizes, n=length(xbreaks))
# drop every other models size xlabels = paste(xbreaks, "\n", currentWorkloadSizes, sep = "")
maxLabel = max(log2(max(df$Model)), 2)
if (maxLabel %% 2) { # drop every other models size
start = 3 maxLabel = max(log2(max(df$Model)), 2)
} else { if (maxLabel %% 2) {
start = 2 start = 3
} } else {
filter = seq(start, maxLabel, by=2) start = 2
}
xlabels[filter] = "" filter = seq(start, maxLabel, by=2)
# y axis labels xlabels[filter] = ""
yaxis = nice_y_axis()
ybreaks = yaxis$ybreaks # y axis labels
yminor_breaks = yaxis$yminor_breaks yaxis = nice_y_axis()
ylabels = yaxis$ylabels ybreaks = yaxis$ybreaks
yminor_breaks = yaxis$yminor_breaks
for (cpage in 1:6) { ylabels = yaxis$ylabels
p = ggplot(df) + #na.omit(df)) +
aes(x = as.factor(Model), y = Time) + for (cpage in 1:6) {
labs(title = paste("Individual query execution time,", phase, "phase"), x = "Model size\n#Elements", y = "Execution times [ms]") + p = ggplot(df) + #na.omit(df)) +
geom_point(aes(col = Tool, shape = Tool), size = 2.0) + aes(x = as.factor(Model), y = Time) +
# scale_shape_manual(values = seq(0, 15)) + labs(title = paste("Individual query execution time,", phase, "phase"), x = "Model size\n#Elements", y = "Execution times [ms]") +
# scale_shape_manual(#values = c(0, 15, 1, 16, 2, 17, 4, 7), geom_point(aes(col = Tool, shape = Tool), size = 2.0) +
# # labels=c( # scale_shape_manual(values = seq(0, 15)) +
# # "Name Lookup ", "Name Lookup (Incremental)", # http://www.sthda.com/english/wiki/r-plot-pch-symbols-the-different-point-shapes-available-in-r - JastAdd NameLookup # scale_shape_manual(#values = c(0, 15, 1, 16, 2, 17, 4, 7),
# # "Intrinsic References", "Intrinsic References (Incremental)", # Tud-cyan - JastAdd Optimized # # labels=c(
# # "Grammar Extension", "Grammar Extension (Incremental)", # HKS65 (green) - JastAdd Specialized # # "Name Lookup ", "Name Lookup (Incremental)", # http://www.sthda.com/english/wiki/r-plot-pch-symbols-the-different-point-shapes-available-in-r - JastAdd NameLookup
# # "TinkerGraph", # HKS92 (grey) - TinkerGraph # # "Intrinsic References", "Intrinsic References (Incremental)", # Tud-cyan - JastAdd Optimized
# # "Viatra (Incremental)", # HKS33 (violet) - VIATRA # # "Grammar Extension", "Grammar Extension (Incremental)", # HKS65 (green) - JastAdd Specialized
# # "(none)" # # "TinkerGraph", # HKS92 (grey) - TinkerGraph
# # ) # # "Viatra (Incremental)", # HKS33 (violet) - VIATRA
# ) + # # "(none)"
# scale_colour_manual( # # )
# values = c( # ) +
# "#EE7F00", "#EE7F00", # HKS07 (orange) - JastAdd NameLookup # scale_colour_manual(
# "#009EE0", "#009EE0", # Tud-cyan - JastAdd Optimized # values = c(
# "#6AB023", "#6AB023", # HKS65 (green) - JastAdd Specialized # "#EE7F00", "#EE7F00", # HKS07 (orange) - JastAdd NameLookup
# "#727879", # HKS92 (grey) - TinkerGraph # "#009EE0", "#009EE0", # Tud-cyan - JastAdd Optimized
# "#93107E", # HKS33 (violet) - VIATRA # "#6AB023", "#6AB023", # HKS65 (green) - JastAdd Specialized
# "#F0E442", # Yellow # "#727879", # HKS92 (grey) - TinkerGraph
# "#CC79A7"), # "#93107E", # HKS33 (violet) - VIATRA
# labels=c( # "#F0E442", # Yellow
# "Name Lookup ", "Name Lookup (Incremental)", # HKS07 (orange) - JastAdd NameLookup # "#CC79A7"),
# "Intrinsic References", "Intrinsic References (Incremental)", # Tud-cyan - JastAdd Optimized # labels=c(
# "Grammar Extension", "Grammar Extension (Incremental)", # HKS65 (green) - JastAdd Specialized # "Name Lookup ", "Name Lookup (Incremental)", # HKS07 (orange) - JastAdd NameLookup
# "TinkerGraph", # HKS92 (grey) - TinkerGraph # "Intrinsic References", "Intrinsic References (Incremental)", # Tud-cyan - JastAdd Optimized
# "Viatra (Incremental)", # HKS33 (violet) - VIATRA # "Grammar Extension", "Grammar Extension (Incremental)", # HKS65 (green) - JastAdd Specialized
# "(none)" # "TinkerGraph", # HKS92 (grey) - TinkerGraph
# )) + # "Viatra (Incremental)", # HKS33 (violet) - VIATRA
geom_line(aes(col = Tool, group = Tool), size = 0.5) + # "(none)"
scale_x_discrete(breaks = xbreaks, labels = xlabels) + # )) +
scale_y_log10(breaks = ybreaks, minor_breaks=yminor_breaks, labels = ylabels, limits = c(2e-3,8e4), expand = c(0, 0)) + geom_line(aes(col = Tool, group = Tool), size = 0.5) +
facet_grid_paginate(~ Workload, nrow=1, ncol = 1, page=cpage, scale = "free") + scale_x_discrete(breaks = xbreaks, labels = xlabels) +
guides(color = guide_legend(ncol = 5)) + scale_y_log10(breaks = ybreaks, minor_breaks=yminor_breaks, labels = ylabels, limits = c(2e-3,8e4), expand = c(0, 0)) +
theme_bw() + facet_grid_paginate(~ Workload, nrow=1, ncol = 1, page=cpage, scale = "free") +
theme( guides(color = guide_legend(ncol = 5)) +
plot.title = element_text(hjust = 0.5), theme_bw() +
# text = element_text(family="Open Sans", size = 10), theme(
# legend.key = element_blank(), plot.title = element_text(hjust = 0.5),
legend.title = element_blank(), # text = element_text(family="Open Sans", size = 10),
legend.position = "bottom", # legend.key = element_blank(),
axis.text = element_text(size = 16) legend.title = element_blank(),
#, panel.grid.minor = element_blank() legend.position = "bottom",
axis.text = element_text(size = 16)
#, panel.grid.minor = element_blank()
)
print(p)
ggsave(
plot = p,
filename = paste("../diagrams/recent/", scenario, "/", phase.filename, "-", workloads[cpage], "-", scenario, ".pdf", sep=""),
width = 500, height = 300, units = "mm"
) )
print(p) embed_fonts(paste("../diagrams/recent/", scenario, "/", phase.filename, "-", workloads[cpage], "-", scenario, ".pdf", sep=""), outfile=paste("../diagrams/recent/", scenario, "/", phase.filename, "-",workloads[cpage], "-", scenario, "-embed", ".pdf", sep=""))
ggsave( }
plot = p,
filename = paste("../diagrams/recent/", phase.filename, "-",workloads[cpage], ".pdf", sep=""),
width = 500, height = 300, units = "mm"
)
embed_fonts(paste("../diagrams/recent/", phase.filename, "-",workloads[cpage], ".pdf", sep=""), outfile=paste("../diagrams/recent/", phase.filename, "-",workloads[cpage], "-embed", ".pdf", sep=""))
}
} }
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment