Skip to content
Snippets Groups Projects
Commit dcea58ae authored by René Schöne's avatar René Schöne
Browse files

Alternate scripts reporting.

parent 70286d09
No related branches found
No related tags found
No related merge requests found
Rplots.pdf
local-merge_results.json
task doMerge(type: Exec) {
group = 'Benchmark'
description = 'Merges the results'
commandLine './do-merge.sh'
}
task plot(type: Exec) {
commandLine 'Rscript', 'report.R'
group = 'Benchmark'
description = 'Plots the \'classic\' TrainBenchmark result'
commandLine 'Rscript', 'report.R'
dependsOn doMerge
}
task plotIndividual(type: Exec) {
commandLine 'Rscript', 'individual.R'
}
\ No newline at end of file
group = 'Benchmark'
description = 'Plots the individual TrainBenchmark results'
commandLine 'Rscript', 'individual.R'
dependsOn doMerge
}
task plotToolwise(type: Exec) {
group = 'Benchmark'
description = 'Plots the individual TrainBenchmark results per tool'
commandLine './toolwise.sh'
dependsOn doMerge
}
python merge_results.py --result-dir ../results/ --create-run-dirs --create-toolwise-dirs $@
{
"tools": [
"tinkergraph",
"drools",
"mysql",
"kiama",
"jastadd-java-references",
"jastadd-java-references-incremental",
"jastadd-symbolic-references-incremental",
"jastadd-symbolic-references",
"sqlite",
"viatra",
"racr-cpp",
"racr-python",
"racr-scheme",
"neo4j",
"sesame",
"emfapi",
"rdf4j",
"epsilon",
"eclipseocl"
],
"ignored": [
],
"toolwise": [
]
}
#!/usr/bin/env python
import argparse
import csv
import glob
import json
import logging
import os
import os.path
import re
import sys
FORMAT = '%(asctime)s %(levelname)-8s %(threadName)-10s (%(filename)s:%(lineno)d): %(message)s'
BENCHMARK_PATTERN = re.compile('.*-(BatchModel|Repair|Inject)Test.*')
logger = logging.getLogger('merge_result')
SIZE_PATTERN = re.compile('.*-railway-[^\\-]*-([^\\-]*)-.csv')
NAME_PATTERN = re.compile('(times|matches)-([^\\-]*)-.*.csv')
RUN_PATTERN = re.compile('run-(....)-(..)-(..)-(..)-(..)-(..)')
RUN_REPLACMENT = r'\1_\2_\3 \4:\5:\6'
def include_file_config(args):
def override_if_defined(key, convert=lambda x: x):
keyArgs = key.replace('-', '_')
value = content.get(key) or content.get(keyArgs)
if value:
setattr(args, keyArgs, convert(value))
# load config file
with open(args.file_config) as fdr:
content = json.load(fdr)
# update with local version, if existing
directory, basename = os.path.split(os.path.abspath(args.file_config))
local_config_file = os.path.join(directory, 'local-' + basename)
if os.path.exists(local_config_file):
with open(local_config_file) as fdr:
content.update(json.load(fdr))
else:
logger.debug('No local config file found.')
if not content.get('tools'):
logger.error('Key "tools" not found in config file "' + args.file_config + '". Exiting.')
sys.exit(1)
args.tools = content['tools']
override_if_defined('max-size', int)
override_if_defined('dry-run', bool)
override_if_defined('result-dir')
override_if_defined('create-run-dirs', bool)
override_if_defined('create-toolwise-dirs', bool)
override_if_defined('no-clean', bool)
override_if_defined('verbose', bool)
def create_link(fileToLink, linkName, dry_run):
if dry_run:
return
if os.path.lexists(linkName):
os.unlink(linkName)
(logger.info if args.dry_run else logger.debug)('Linking %s to %s', fileToLink, linkName)
os.symlink(fileToLink, linkName)
def ensure_directory(dir_name, dry_run):
if dry_run:
return
if not os.path.exists(dir_name):
logger.info('Creating %s', dir_name)
os.mkdir(dir_name)
def exceeds(filename, max_size):
match = SIZE_PATTERN.match(filename)
return int(match.group(1)) > max_size if match else False
def remove_if_there(the_list, element_to_remove):
if element_to_remove in the_list:
the_list.remove(element_to_remove)
def new_run_name(old_run_name):
return RUN_PATTERN.sub(RUN_REPLACMENT, old_run_name)
def copy_replace(fileTocopy, all_runs_dir, tool_name, run, dry_run):
"""
Take fileTocopy, copy it to all_runs_dir, while replacing tool_name with run
in both its name and its content
"""
run_name = new_run_name(run)
targetFile = os.path.join(
all_runs_dir, os.path.basename(fileTocopy).replace(tool_name, run_name))
first = True
with open(fileTocopy) as fdr_source, open(targetFile, 'w') as fdr_target:
for line in fdr_source:
if first:
first = False
else:
line = line.replace(tool_name, run_name)
fdr_target.write(line)
def main(args):
"""
Main process.
Used directory structure/variables:
results/ -> result_dir
tools/ -
tool1/ -> tool_dir
tool1-run1/ -> run_dir
times.csv -
matches.csv -
run-list.csv -
all-runs/ -> all_runs_dir
times-run1.csv@ -
run1/ -> global_run_dir
merged/ -> merged_dir
individual/ -> merged_dir_individual
times.csv@ -
combined/ -> merged_dir_benchmark
times.csv@ -
"""
log_action = logger.info if args.dry_run else logger.debug
# Gathering paths, creating top-level directories
result_dir = os.path.abspath(args.result_dir)
merged_dir = os.path.join(result_dir, 'merged')
merged_dir_benchmark = os.path.join(merged_dir, 'benchmark')
merged_dir_individual = os.path.join(merged_dir, 'individual')
for dir_name in (merged_dir, merged_dir_benchmark, merged_dir_individual):
ensure_directory(dir_name, args.dry_run)
# Gathering tools
tools = []
reader = csv.reader(args.tools)
next(reader)
for row in reader:
if not row:
continue
tools.append(row[0])
logger.debug('result_dir: %s, tools: %s', result_dir, tools)
# Clean symlinks if requested or max_size is set
if (args.clean or args.max_size) and not args.dry_run:
for dir_to_clean in [merged_dir, merged_dir_benchmark, merged_dir_individual]:
for link in os.listdir(dir_to_clean):
linkName = os.path.join(dir_to_clean, link)
if os.path.islink(linkName):
os.unlink(linkName)
if (args.clean or args.create_toolwise_dirs) and not args.dry_run:
for linkName in glob.iglob(os.path.join(result_dir, 'tools', '*', 'all-runs', '*.csv')):
os.remove(linkName)
# Merge results
for tool in tools:
if tool.startswith('#'):
logger.debug('Ignoring tool "%s"', tool[1:])
continue
already_merged = []
tool_dir = os.path.join(result_dir, 'tools', tool)
if not os.path.exists(tool_dir):
logger.warn('Tool not found: %s', tool)
continue
all_runs_dir = os.path.join(tool_dir, 'all-runs')
ensure_directory(all_runs_dir, args.dry_run)
runs = sorted(os.listdir(tool_dir), reverse=True)
remove_if_there(runs, 'all-runs')
remove_if_there(runs, 'run-list.csv')
if args.create_toolwise_dirs:
# write out run-list.csv
with open(os.path.join(tool_dir, 'run-list.csv'), 'w') as fdr:
fdr.write('Runs\n')
for run in runs:
fdr.write(new_run_name(run) + '\n')
for run in runs:
run_dir = os.path.join(tool_dir, run)
global_run_dir = os.path.join(result_dir, run)
if not os.path.isdir(run_dir):
continue
ensure_directory(global_run_dir, args.dry_run)
for csvFile in os.listdir(run_dir):
# link file in run directory
fileToLink = os.path.join(tool_dir, run, csvFile)
linkName = os.path.join(global_run_dir, csvFile)
create_link(fileToLink, linkName, args.dry_run)
# skip if max-size is set and size is exceeded
if args.max_size and exceeds(fileToLink, args.max_size):
continue
if args.create_toolwise_dirs:
# link in all-runs (rename file accordingly)
match = NAME_PATTERN.match(csvFile)
if match:
if not BENCHMARK_PATTERN.match(csvFile):
tool_name = match.group(2)
copy_replace(fileToLink, all_runs_dir, tool_name, run, args.dry_run)
else:
logging.warn('file did not match pattern: %s', csvFile)
# link file in merged directory
if csvFile not in already_merged:
linkName = os.path.join(merged_dir_benchmark if BENCHMARK_PATTERN.match(csvFile)
else merged_dir_individual, csvFile)
create_link(fileToLink, linkName, args.dry_run)
already_merged.append(csvFile)
else:
log_action('Skipping %s', csvFile)
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description='Merge results of all benchmark runs.',
epilog="""The config file must contain the key "tools" specifying the tools to process.
It further can contain any long version of arguments to this program as a default value.
Any command line parameter will override such a default value.
Additionally, a local version of the file will be read, overriding the default values.
Its filename is "local-" prepended to the name of the config file.""")
parser.add_argument(
"-r", "--result-dir", help="Path to result directory to search in.", type=str)
parser.add_argument(
"-c", "--create-run-dirs", help="Whether to recreate runs directories.",
action="store_true")
parser.add_argument(
"-t", "--create-toolwise-dirs", help="Whether to recreate toolwise regression directories.",
action="store_true")
parser.add_argument(
"-d", "--dry-run", help="Only print action, don't execute them.",
action="store_true")
parser.add_argument(
"-n", "--no-clean", help="Don't remove previously existing symlinks in merged dir.",
dest='clean', action="store_false")
parser.add_argument(
"-v", "--verbose", help="Print debug messages.", action="store_true")
parser.add_argument(
"-m", "--max-size", type=int,
help="Maximum benchmark size to include. Implies cleaning existing symlinks.")
parser.add_argument(
"-f", "--file-config", default='merge_results.json', help="Config file to use.")
args = parser.parse_args()
include_file_config(args)
logging.basicConfig(format=FORMAT, level=logging.DEBUG if args.verbose else logging.INFO)
main(args)
library(data.table)
library(reshape2)
library(plyr)
library(ggplot2)
library(ggrepel)
library(arules)
library(ggforce)
source('util.R')
args = commandArgs(trailingOnly=TRUE)
if (length(args)==0) {
stop("At least one argument must be supplied (tool-name).\n", call.=FALSE)
}
toolName = args[1]
# prepare output directory
output_dir = paste("../diagrams/merged", toolName, sep="/")
if (!(dir.exists(output_dir))) {
dir.create(output_dir)
}
# constants
workloads = c(
"PosLength", "SwitchMonitored",
"RouteSensor", "SwitchSet",
"ConnectedSegments", "SemaphoreNeighbor"
)
phases = c("Read", "Check", "Read.and.Check", "Transformation", "Recheck", "Transformation.and.Recheck")
phasesPrettified = c("Read", "Check", "Read and Check", "Transformation", "Recheck", "Transformation and Recheck")
sizes = list() # 1 2 4 8 16 32 64 128 256 512 1024 2048 4096
sizes[["Repair"]] = c("8k", "15k", "33k", "66k", "135k", "271k", "566k", "1.1M", "2.2M", "4.6M", "9.3M", "18M", "37M")
runList = read.csv(paste("../results/tools/", toolName, "/run-list.csv", sep=""), colClasses=c(rep("character",1)))
# load the data
tsvs = list.files(paste("../results/tools/", toolName, "/all-runs/", sep=""), pattern = "times-.*\\.csv", full.names = T, recursive = T)
l = lapply(tsvs, read.csv)
times = rbindlist(l)
# preprocess the data
times$Tool = factor(times$Tool, levels = runList$Runs)
keep_descriptions_first_char(times)
times$Model = gsub("\\D+", "", times$Model)
times$Model = as.numeric(times$Model)
times$Time = times$Time / 10^6
# make the phases a factor with a fixed set of values to help dcasting
# (e.g. Batch measurements do not have Transformation and Recheck attributes,
# hence accessing the "Transformation" attribute would throw an error)
times$Phase = factor(times$Phase, levels = c("Read", "Check", "Transformation", "Recheck"))
times.wide = dcast(data = times,
formula = Tool + Workload + Description + Model + Run ~ Phase,
value.var = "Time",
drop = T,
fun.aggregate = mean
)
# calculate aggregated values
times.derived = times.wide
times.derived$Read.and.Check = times.derived$Read + times.derived$Check
times.derived$Transformation.and.Recheck = times.derived$Transformation + times.derived$Recheck
# calculate the median value of runs
times.aggregated.runs = ddply(
.data = times.derived,
.variables = c("Tool", "Workload", "Description", "Model"),
.fun = colwise(median),
.progress = "text"
)
# drop the "Run" column
times.aggregated.runs = subset(times.aggregated.runs, select = -c(Run))
times.processed = melt(
data = times.aggregated.runs,
id.vars = c("Tool", "Workload", "Description", "Model"),
measure.vars = phases,
variable.name = "Phase",
value.name = "Time"
)
# beautify plotted record:
# 1. change dots to spaces
# 2. make sure that the phases are still factors
times.plot = times.processed
times.plot$Phase = gsub('\\.', ' ', times.plot$Phase)
times.plot$Phase = factor(times.plot$Phase, levels = phasesPrettified)
times.plot$Workload = factor(times.plot$Workload, levels = workloads)
### line charts
for (phase in phasesPrettified) {
phase.filename = gsub(' ', '-', phase)
workloadSizes = sizes[["Repair"]]
# filter the dataframe to the current phase
df = times.plot[times.plot$Phase == phase, ]
# do not visualize empty data sets
if (nrow(df) == 0) {
print(paste("No rows to visualize for phase", phase))
next
}
# x axis labels
xbreaks = unique(df$Model)
currentWorkloadSizes = head(workloadSizes, n=length(xbreaks))
xlabels = paste(xbreaks, "\n", currentWorkloadSizes, sep = "")
# drop every other models size
maxLabel = max(log2(max(df$Model)), 2)
if (maxLabel %% 2) {
start = 3
} else {
start = 2
}
filter = seq(start, maxLabel, by=2)
xlabels[filter] = ""
# y axis labels
yaxis = nice_y_axis()
ybreaks = yaxis$ybreaks
ylabels = yaxis$ylabels
p = ggplot(df) + #na.omit(df)) +
aes(x = as.factor(Model), y = Time) +
labs(title = paste("Individual query execution time,", phase, "phase, ", toolName), x = "Model size\n#Elements", y = "Execution times [ms]") +
geom_point(aes(col = Tool, shape = Tool), size = 2.0) +
scale_shape_manual(values = seq(0, 15)) +
geom_line(aes(col = Tool, group = Tool), size = 0.5) +
scale_x_discrete(breaks = xbreaks, labels = xlabels) +
scale_y_log10(breaks = ybreaks, labels = ylabels) +
guides(color = guide_legend(ncol = 4)) +
theme_bw() +
theme(
plot.title = element_text(hjust = 0.5),
text = element_text(size = 10),
legend.key = element_blank(),
legend.title = element_blank(),
legend.position = "bottom",
axis.text = element_text(size = 9)
)
print(p)
for (cpage in 1:6) {
ggsave(
plot = p + facet_grid_paginate(~ Workload, nrow=1, ncol = 1, page=cpage, scale = "free"),
filename = paste(output_dir, "/", toolName, "-", phase.filename, "-",workloads[cpage], ".pdf", sep=""),
width = 250, height = 150, units = "mm"
)
}
}
import argparse
import json
import logging
import os.path
import subprocess
FORMAT = '%(asctime)s %(levelname)-8s %(threadName)-10s (%(filename)s:%(lineno)d): %(message)s'
logger = logging.getLogger('toolwise')
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Plot results per tool.')
parser.add_argument(
"-v", "--verbose", help="Print debug messages.", action="store_true")
parser.add_argument(
"-f", "--file-config", default='merge_results.json', help="Config file to use.")
args = parser.parse_args()
logging.basicConfig(format=FORMAT, level=logging.DEBUG if args.verbose else logging.INFO)
# load config file
with open('merge_results.json') as fdr:
content = json.load(fdr)
# update with local version, if existing
directory, basename = os.path.split(os.path.abspath('merge_results.json'))
local_config_file = os.path.join(directory, 'local-' + basename)
if os.path.exists(local_config_file):
with open(local_config_file) as fdr:
content.update(json.load(fdr))
else:
logger.debug('No local config file found.')
for tool in content.get('toolwise', []):
logging.info('Processing %s now.', tool)
subprocess.call(["Rscript", "toolwise.R", tool])
#!/bin/bash
python toolwise.py
# --file-config <file>
# --verbose
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment