Skip to content
Snippets Groups Projects
Commit dcea58ae authored by René Schöne's avatar René Schöne
Browse files

Alternate scripts reporting.

parent 70286d09
Branches artifact-evaluation
No related tags found
No related merge requests found
Rplots.pdf
local-merge_results.json
task doMerge(type: Exec) {
group = 'Benchmark'
description = 'Merges the results'
commandLine './do-merge.sh'
}
task plot(type: Exec) {
group = 'Benchmark'
description = 'Plots the \'classic\' TrainBenchmark result'
commandLine 'Rscript', 'report.R'
dependsOn doMerge
}
task plotIndividual(type: Exec) {
group = 'Benchmark'
description = 'Plots the individual TrainBenchmark results'
commandLine 'Rscript', 'individual.R'
dependsOn doMerge
}
task plotToolwise(type: Exec) {
group = 'Benchmark'
description = 'Plots the individual TrainBenchmark results per tool'
commandLine './toolwise.sh'
dependsOn doMerge
}
python merge_results.py --result-dir ../results/ --create-run-dirs --create-toolwise-dirs $@
{
"tools": [
"tinkergraph",
"drools",
"mysql",
"kiama",
"jastadd-java-references",
"jastadd-java-references-incremental",
"jastadd-symbolic-references-incremental",
"jastadd-symbolic-references",
"sqlite",
"viatra",
"racr-cpp",
"racr-python",
"racr-scheme",
"neo4j",
"sesame",
"emfapi",
"rdf4j",
"epsilon",
"eclipseocl"
],
"ignored": [
],
"toolwise": [
]
}
#!/usr/bin/env python
import argparse
import csv
import glob
import json
import logging
import os
import os.path
import re
import sys
FORMAT = '%(asctime)s %(levelname)-8s %(threadName)-10s (%(filename)s:%(lineno)d): %(message)s'
BENCHMARK_PATTERN = re.compile('.*-(BatchModel|Repair|Inject)Test.*')
logger = logging.getLogger('merge_result')
SIZE_PATTERN = re.compile('.*-railway-[^\\-]*-([^\\-]*)-.csv')
NAME_PATTERN = re.compile('(times|matches)-([^\\-]*)-.*.csv')
RUN_PATTERN = re.compile('run-(....)-(..)-(..)-(..)-(..)-(..)')
RUN_REPLACMENT = r'\1_\2_\3 \4:\5:\6'
def include_file_config(args):
def override_if_defined(key, convert=lambda x: x):
keyArgs = key.replace('-', '_')
value = content.get(key) or content.get(keyArgs)
if value:
setattr(args, keyArgs, convert(value))
# load config file
with open(args.file_config) as fdr:
content = json.load(fdr)
# update with local version, if existing
directory, basename = os.path.split(os.path.abspath(args.file_config))
local_config_file = os.path.join(directory, 'local-' + basename)
if os.path.exists(local_config_file):
with open(local_config_file) as fdr:
content.update(json.load(fdr))
else:
logger.debug('No local config file found.')
if not content.get('tools'):
logger.error('Key "tools" not found in config file "' + args.file_config + '". Exiting.')
sys.exit(1)
args.tools = content['tools']
override_if_defined('max-size', int)
override_if_defined('dry-run', bool)
override_if_defined('result-dir')
override_if_defined('create-run-dirs', bool)
override_if_defined('create-toolwise-dirs', bool)
override_if_defined('no-clean', bool)
override_if_defined('verbose', bool)
def create_link(fileToLink, linkName, dry_run):
if dry_run:
return
if os.path.lexists(linkName):
os.unlink(linkName)
(logger.info if args.dry_run else logger.debug)('Linking %s to %s', fileToLink, linkName)
os.symlink(fileToLink, linkName)
def ensure_directory(dir_name, dry_run):
if dry_run:
return
if not os.path.exists(dir_name):
logger.info('Creating %s', dir_name)
os.mkdir(dir_name)
def exceeds(filename, max_size):
match = SIZE_PATTERN.match(filename)
return int(match.group(1)) > max_size if match else False
def remove_if_there(the_list, element_to_remove):
if element_to_remove in the_list:
the_list.remove(element_to_remove)
def new_run_name(old_run_name):
return RUN_PATTERN.sub(RUN_REPLACMENT, old_run_name)
def copy_replace(fileTocopy, all_runs_dir, tool_name, run, dry_run):
"""
Take fileTocopy, copy it to all_runs_dir, while replacing tool_name with run
in both its name and its content
"""
run_name = new_run_name(run)
targetFile = os.path.join(
all_runs_dir, os.path.basename(fileTocopy).replace(tool_name, run_name))
first = True
with open(fileTocopy) as fdr_source, open(targetFile, 'w') as fdr_target:
for line in fdr_source:
if first:
first = False
else:
line = line.replace(tool_name, run_name)
fdr_target.write(line)
def main(args):
"""
Main process.
Used directory structure/variables:
results/ -> result_dir
tools/ -
tool1/ -> tool_dir
tool1-run1/ -> run_dir
times.csv -
matches.csv -
run-list.csv -
all-runs/ -> all_runs_dir
times-run1.csv@ -
run1/ -> global_run_dir
merged/ -> merged_dir
individual/ -> merged_dir_individual
times.csv@ -
combined/ -> merged_dir_benchmark
times.csv@ -
"""
log_action = logger.info if args.dry_run else logger.debug
# Gathering paths, creating top-level directories
result_dir = os.path.abspath(args.result_dir)
merged_dir = os.path.join(result_dir, 'merged')
merged_dir_benchmark = os.path.join(merged_dir, 'benchmark')
merged_dir_individual = os.path.join(merged_dir, 'individual')
for dir_name in (merged_dir, merged_dir_benchmark, merged_dir_individual):
ensure_directory(dir_name, args.dry_run)
# Gathering tools
tools = []
reader = csv.reader(args.tools)
next(reader)
for row in reader:
if not row:
continue
tools.append(row[0])
logger.debug('result_dir: %s, tools: %s', result_dir, tools)
# Clean symlinks if requested or max_size is set
if (args.clean or args.max_size) and not args.dry_run:
for dir_to_clean in [merged_dir, merged_dir_benchmark, merged_dir_individual]:
for link in os.listdir(dir_to_clean):
linkName = os.path.join(dir_to_clean, link)
if os.path.islink(linkName):
os.unlink(linkName)
if (args.clean or args.create_toolwise_dirs) and not args.dry_run:
for linkName in glob.iglob(os.path.join(result_dir, 'tools', '*', 'all-runs', '*.csv')):
os.remove(linkName)
# Merge results
for tool in tools:
if tool.startswith('#'):
logger.debug('Ignoring tool "%s"', tool[1:])
continue
already_merged = []
tool_dir = os.path.join(result_dir, 'tools', tool)
if not os.path.exists(tool_dir):
logger.warn('Tool not found: %s', tool)
continue
all_runs_dir = os.path.join(tool_dir, 'all-runs')
ensure_directory(all_runs_dir, args.dry_run)
runs = sorted(os.listdir(tool_dir), reverse=True)
remove_if_there(runs, 'all-runs')
remove_if_there(runs, 'run-list.csv')
if args.create_toolwise_dirs:
# write out run-list.csv
with open(os.path.join(tool_dir, 'run-list.csv'), 'w') as fdr:
fdr.write('Runs\n')
for run in runs:
fdr.write(new_run_name(run) + '\n')
for run in runs:
run_dir = os.path.join(tool_dir, run)
global_run_dir = os.path.join(result_dir, run)
if not os.path.isdir(run_dir):
continue
ensure_directory(global_run_dir, args.dry_run)
for csvFile in os.listdir(run_dir):
# link file in run directory
fileToLink = os.path.join(tool_dir, run, csvFile)
linkName = os.path.join(global_run_dir, csvFile)
create_link(fileToLink, linkName, args.dry_run)
# skip if max-size is set and size is exceeded
if args.max_size and exceeds(fileToLink, args.max_size):
continue
if args.create_toolwise_dirs:
# link in all-runs (rename file accordingly)
match = NAME_PATTERN.match(csvFile)
if match:
if not BENCHMARK_PATTERN.match(csvFile):
tool_name = match.group(2)
copy_replace(fileToLink, all_runs_dir, tool_name, run, args.dry_run)
else:
logging.warn('file did not match pattern: %s', csvFile)
# link file in merged directory
if csvFile not in already_merged:
linkName = os.path.join(merged_dir_benchmark if BENCHMARK_PATTERN.match(csvFile)
else merged_dir_individual, csvFile)
create_link(fileToLink, linkName, args.dry_run)
already_merged.append(csvFile)
else:
log_action('Skipping %s', csvFile)
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description='Merge results of all benchmark runs.',
epilog="""The config file must contain the key "tools" specifying the tools to process.
It further can contain any long version of arguments to this program as a default value.
Any command line parameter will override such a default value.
Additionally, a local version of the file will be read, overriding the default values.
Its filename is "local-" prepended to the name of the config file.""")
parser.add_argument(
"-r", "--result-dir", help="Path to result directory to search in.", type=str)
parser.add_argument(
"-c", "--create-run-dirs", help="Whether to recreate runs directories.",
action="store_true")
parser.add_argument(
"-t", "--create-toolwise-dirs", help="Whether to recreate toolwise regression directories.",
action="store_true")
parser.add_argument(
"-d", "--dry-run", help="Only print action, don't execute them.",
action="store_true")
parser.add_argument(
"-n", "--no-clean", help="Don't remove previously existing symlinks in merged dir.",
dest='clean', action="store_false")
parser.add_argument(
"-v", "--verbose", help="Print debug messages.", action="store_true")
parser.add_argument(
"-m", "--max-size", type=int,
help="Maximum benchmark size to include. Implies cleaning existing symlinks.")
parser.add_argument(
"-f", "--file-config", default='merge_results.json', help="Config file to use.")
args = parser.parse_args()
include_file_config(args)
logging.basicConfig(format=FORMAT, level=logging.DEBUG if args.verbose else logging.INFO)
main(args)
library(data.table)
library(reshape2)
library(plyr)
library(ggplot2)
library(ggrepel)
library(arules)
library(ggforce)
source('util.R')
args = commandArgs(trailingOnly=TRUE)
if (length(args)==0) {
stop("At least one argument must be supplied (tool-name).\n", call.=FALSE)
}
toolName = args[1]
# prepare output directory
output_dir = paste("../diagrams/merged", toolName, sep="/")
if (!(dir.exists(output_dir))) {
dir.create(output_dir)
}
# constants
workloads = c(
"PosLength", "SwitchMonitored",
"RouteSensor", "SwitchSet",
"ConnectedSegments", "SemaphoreNeighbor"
)
phases = c("Read", "Check", "Read.and.Check", "Transformation", "Recheck", "Transformation.and.Recheck")
phasesPrettified = c("Read", "Check", "Read and Check", "Transformation", "Recheck", "Transformation and Recheck")
sizes = list() # 1 2 4 8 16 32 64 128 256 512 1024 2048 4096
sizes[["Repair"]] = c("8k", "15k", "33k", "66k", "135k", "271k", "566k", "1.1M", "2.2M", "4.6M", "9.3M", "18M", "37M")
runList = read.csv(paste("../results/tools/", toolName, "/run-list.csv", sep=""), colClasses=c(rep("character",1)))
# load the data
tsvs = list.files(paste("../results/tools/", toolName, "/all-runs/", sep=""), pattern = "times-.*\\.csv", full.names = T, recursive = T)
l = lapply(tsvs, read.csv)
times = rbindlist(l)
# preprocess the data
times$Tool = factor(times$Tool, levels = runList$Runs)
keep_descriptions_first_char(times)
times$Model = gsub("\\D+", "", times$Model)
times$Model = as.numeric(times$Model)
times$Time = times$Time / 10^6
# make the phases a factor with a fixed set of values to help dcasting
# (e.g. Batch measurements do not have Transformation and Recheck attributes,
# hence accessing the "Transformation" attribute would throw an error)
times$Phase = factor(times$Phase, levels = c("Read", "Check", "Transformation", "Recheck"))
times.wide = dcast(data = times,
formula = Tool + Workload + Description + Model + Run ~ Phase,
value.var = "Time",
drop = T,
fun.aggregate = mean
)
# calculate aggregated values
times.derived = times.wide
times.derived$Read.and.Check = times.derived$Read + times.derived$Check
times.derived$Transformation.and.Recheck = times.derived$Transformation + times.derived$Recheck
# calculate the median value of runs
times.aggregated.runs = ddply(
.data = times.derived,
.variables = c("Tool", "Workload", "Description", "Model"),
.fun = colwise(median),
.progress = "text"
)
# drop the "Run" column
times.aggregated.runs = subset(times.aggregated.runs, select = -c(Run))
times.processed = melt(
data = times.aggregated.runs,
id.vars = c("Tool", "Workload", "Description", "Model"),
measure.vars = phases,
variable.name = "Phase",
value.name = "Time"
)
# beautify plotted record:
# 1. change dots to spaces
# 2. make sure that the phases are still factors
times.plot = times.processed
times.plot$Phase = gsub('\\.', ' ', times.plot$Phase)
times.plot$Phase = factor(times.plot$Phase, levels = phasesPrettified)
times.plot$Workload = factor(times.plot$Workload, levels = workloads)
### line charts
for (phase in phasesPrettified) {
phase.filename = gsub(' ', '-', phase)
workloadSizes = sizes[["Repair"]]
# filter the dataframe to the current phase
df = times.plot[times.plot$Phase == phase, ]
# do not visualize empty data sets
if (nrow(df) == 0) {
print(paste("No rows to visualize for phase", phase))
next
}
# x axis labels
xbreaks = unique(df$Model)
currentWorkloadSizes = head(workloadSizes, n=length(xbreaks))
xlabels = paste(xbreaks, "\n", currentWorkloadSizes, sep = "")
# drop every other models size
maxLabel = max(log2(max(df$Model)), 2)
if (maxLabel %% 2) {
start = 3
} else {
start = 2
}
filter = seq(start, maxLabel, by=2)
xlabels[filter] = ""
# y axis labels
yaxis = nice_y_axis()
ybreaks = yaxis$ybreaks
ylabels = yaxis$ylabels
p = ggplot(df) + #na.omit(df)) +
aes(x = as.factor(Model), y = Time) +
labs(title = paste("Individual query execution time,", phase, "phase, ", toolName), x = "Model size\n#Elements", y = "Execution times [ms]") +
geom_point(aes(col = Tool, shape = Tool), size = 2.0) +
scale_shape_manual(values = seq(0, 15)) +
geom_line(aes(col = Tool, group = Tool), size = 0.5) +
scale_x_discrete(breaks = xbreaks, labels = xlabels) +
scale_y_log10(breaks = ybreaks, labels = ylabels) +
guides(color = guide_legend(ncol = 4)) +
theme_bw() +
theme(
plot.title = element_text(hjust = 0.5),
text = element_text(size = 10),
legend.key = element_blank(),
legend.title = element_blank(),
legend.position = "bottom",
axis.text = element_text(size = 9)
)
print(p)
for (cpage in 1:6) {
ggsave(
plot = p + facet_grid_paginate(~ Workload, nrow=1, ncol = 1, page=cpage, scale = "free"),
filename = paste(output_dir, "/", toolName, "-", phase.filename, "-",workloads[cpage], ".pdf", sep=""),
width = 250, height = 150, units = "mm"
)
}
}
import argparse
import json
import logging
import os.path
import subprocess
FORMAT = '%(asctime)s %(levelname)-8s %(threadName)-10s (%(filename)s:%(lineno)d): %(message)s'
logger = logging.getLogger('toolwise')
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Plot results per tool.')
parser.add_argument(
"-v", "--verbose", help="Print debug messages.", action="store_true")
parser.add_argument(
"-f", "--file-config", default='merge_results.json', help="Config file to use.")
args = parser.parse_args()
logging.basicConfig(format=FORMAT, level=logging.DEBUG if args.verbose else logging.INFO)
# load config file
with open('merge_results.json') as fdr:
content = json.load(fdr)
# update with local version, if existing
directory, basename = os.path.split(os.path.abspath('merge_results.json'))
local_config_file = os.path.join(directory, 'local-' + basename)
if os.path.exists(local_config_file):
with open(local_config_file) as fdr:
content.update(json.load(fdr))
else:
logger.debug('No local config file found.')
for tool in content.get('toolwise', []):
logging.info('Processing %s now.', tool)
subprocess.call(["Rscript", "toolwise.R", tool])
#!/bin/bash
python toolwise.py
# --file-config <file>
# --verbose
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment