93 lines
3.5 KiB
R
93 lines
3.5 KiB
R
library(ggplot2)
|
|
|
|
################################################################################
|
|
### General Helpers ###
|
|
################################################################################
|
|
|
|
# Read/Combine simulation data logs
|
|
read.logs <- function(pattern) {
|
|
# Folder containing log files
|
|
path <- if (file.exists('results/')) './' else './simulations/'
|
|
path <- paste0(path, 'results/')
|
|
# Read all log files and augment with meta-parameters
|
|
file.names <- list.files(path, pattern, full.names = TRUE)
|
|
sim <- do.call(rbind, lapply(file.names, function(path) {
|
|
# Read simulation log (one file)
|
|
sim <- read.csv(path, comment.char = '#')
|
|
# Add simulation arguments as columns
|
|
args <- Filter(function(line) startsWith(line, '#'), readLines(path))
|
|
args <- sub('# ?', '', args)
|
|
args <- regmatches(args, regexpr(' ', args), invert = TRUE)
|
|
# Try to convert meta-parameters from string into int/num/bool/...
|
|
for (arg in args) {
|
|
val <- tryCatch(
|
|
eval(parse(text = arg[2])),
|
|
error = function(err) arg[2]
|
|
)
|
|
if (length(val) > 1) val <- paste(val, collapse = ',')
|
|
sim[[arg[1]]] <- val
|
|
}
|
|
sim
|
|
}))
|
|
# Convert methods to factors
|
|
sim$method <- factor(sim$method,
|
|
levels = c("opg", "mave", "cve", "sir", "save", "phdy", "nn.opg", "nn.ref"),
|
|
labels = c("OPG", "MAVE", "CVE", "SIR", "SAVE", "PHD", "NN-OPG", "NN-Ref")
|
|
)
|
|
sim
|
|
}
|
|
|
|
|
|
################################################################################
|
|
### Bit Data ###
|
|
################################################################################
|
|
|
|
# Read/Combine big data simulation logs
|
|
sim <- read.logs('sim_big_.*csv')
|
|
|
|
# Compute repetition mean and standard deviation over replications
|
|
(aggr <- merge(
|
|
aggregate(dist.subspace ~ dataset + n + p + method, sim, mean),
|
|
aggregate(dist.subspace ~ dataset + n + p + method, sim, sd),
|
|
by = c("dataset", "n", "p", "method"),
|
|
suffixes = c(".mean", ".sd")
|
|
))
|
|
|
|
# plots and tables
|
|
ggplot(aggr, aes(x = n, y = dist.subspace.mean,
|
|
group = interaction(dataset, method),
|
|
color = method, linetype = dataset)) +
|
|
geom_line() +
|
|
geom_errorbar(aes(
|
|
ymin = dist.subspace.mean - dist.subspace.sd,
|
|
ymax = dist.subspace.mean + dist.subspace.sd
|
|
), width = 0.2) +
|
|
scale_x_continuous(trans = 'log2')
|
|
|
|
################################################################################
|
|
### Binary Response ###
|
|
################################################################################
|
|
|
|
# Read/Combine binary data simulation logs
|
|
sim <- read.logs('sim_binary_[0-9_]*\\.csv')
|
|
|
|
# Aggregated Tables
|
|
aggr.formula <- cbind(dist.subspace, accuracy) ~ dataset + method
|
|
aggr <- merge(
|
|
aggregate(aggr.formula, sim, mean,
|
|
na.action = na.pass),
|
|
aggregate(aggr.formula, sim, sd,
|
|
na.action = na.pass),
|
|
by = attr(terms(aggr.formula), "term.labels"),
|
|
suffixes = c(".mean", ".sd")
|
|
)
|
|
print(aggr[with(aggr, order(dataset, dist.subspace.mean)), ], digits = 3)
|
|
|
|
|
|
# box-plot subspace comparison
|
|
ggplot(sim, aes(x = method, y = dist.subspace,
|
|
group = interaction(dataset, method),
|
|
color = dataset)) +
|
|
geom_boxplot() +
|
|
labs(title = "Sim. Binary", x = "Methods", y = "Subspace Dist.", color = "Datasets")
|