From a673b50c7a6ec85c571c3a0dfc8c790895edb056 Mon Sep 17 00:00:00 2001
From: daniel <daniel@kapla.at>
Date: Fri, 10 Sep 2021 14:58:47 +0200
Subject: [PATCH] add: custom metrics support and metric.subspace, wip: new
 binary response datasets, add: nn$evaluate, add: simulation_binary

---
 NNSDR/NAMESPACE                    |   1 +
 NNSDR/R/datasets.R                 |  48 +++++++++
 NNSDR/R/metric_subspace.R          |  58 ++++++++++
 NNSDR/R/nnsdr_class.R              |  26 ++++-
 real_data/MNIST.R                  | 163 ++++++++++++++++++++++++++++
 simulations/edit_sim_data.R        |  92 ++++++++++++++++
 simulations/simulations.R          |   5 +-
 simulations/simulations_bigdata.R  |   4 +-
 simulations/simulations_bigdata.sh |  36 +++----
 simulations/simulations_binary.R   | 167 ++++++++++-------------------
 simulations/simulations_binary.sh  |  24 +++++
 11 files changed, 493 insertions(+), 131 deletions(-)
 create mode 100644 NNSDR/R/metric_subspace.R
 create mode 100644 real_data/MNIST.R
 create mode 100644 simulations/edit_sim_data.R
 create mode 100644 simulations/simulations_binary.sh

diff --git a/NNSDR/NAMESPACE b/NNSDR/NAMESPACE
index 0dd3b71..815c72f 100644
--- a/NNSDR/NAMESPACE
+++ b/NNSDR/NAMESPACE
@@ -7,6 +7,7 @@ export(dist.grassmann)
 export(dist.mave)
 export(dist.subspace)
 export(get.script)
+export(metric.subspace)
 export(nnsdr)
 export(parse.args)
 export(reinitialize_weights)
diff --git a/NNSDR/R/datasets.R b/NNSDR/R/datasets.R
index 8f8bfc0..88d8da7 100644
--- a/NNSDR/R/datasets.R
+++ b/NNSDR/R/datasets.R
@@ -255,6 +255,54 @@ dataset <- function(name = "M1", n = NULL, p = 20, sd = 0.5, ...) {
         XB <- X %*% B
         Y <- 10 * sin(pi * XB[, 1] * XB[, 2]) + 20 * (XB[, 3] - 0.5)^2 + 5 * XB[, 4] + rnorm(n, sd = sd)
         Y <- as.matrix(Y)
+    } else if (name == "B1") {
+        if (missing(n)) { n <- 400 }
+
+        B <- diag(1, p, 1)
+        X <- matrix(runif(n * p, -2, 2), n, p)
+        Y <- (X %*% B + rnorm(n, 0, sd)) > 0
+    } else if (name == "B2") {
+        if (missing(n)) { n <- 400 }
+
+        B <- diag(1, p, 2)
+        Y <- as.matrix(sample(as.logical(0:1), n, replace = TRUE))
+        X <- matrix(rnorm(n * p, sd = sd), n, p)
+        x1 <- sample(c(-1, 1), n, replace = TRUE)
+        x2 <- x1 * c(-1, 1)[Y + 1]
+        X[, 1] <- X[, 1] + x1
+        X[, 2] <- X[, 2] + x2
+    } else if (name == "B3") {
+        if (missing(n)) { n <- 1600 }
+        if (missing(sd)) { sd <- 0.1 }
+
+        B <- diag(1, p, 3)
+        Y <- sample(as.logical(0:1), n, replace = TRUE)
+        x <- seq(0, 6 * pi, length.out = n)
+        X <- matrix(rnorm(n * p, sd = sd), n, p)
+        X[, 1] <- X[, 1] + 0.5 * sin(x + pi * Y)
+        X[, 2] <- X[, 2] + 0.5 * cos(x + pi * Y)
+        X[, 3] <- X[, 3] + (x - 3 * pi) / (3 * pi)
+    } else if (name == "B4") {
+        # Let
+        #   X ~ N_p(0, I_p)
+        # then,
+        #   Y | X ~ Binom(sin(atan2(Z_1, Z_2) + Z_3)^2)
+        # or in other words
+        #   E(Y | X) = sin(atan2(Z_1, Z_2) + Z_3)^2
+
+        if (missing(n)) { n <- 1600 }
+
+        B <- diag(1, p, 3)
+        X <- matrix(rnorm(n * p, sd = sd), n, p)
+
+        # Angle and Height for each X with respect to the spiral manifold
+        XB <- X %*% B
+        phi <- atan2(XB[, 1], XB[, 2])
+        h <- XB[, 3]
+
+        prob <- sin(phi + h)^2
+        Y <- apply(cbind(prob, 1 - prob), 1, sample, x = as.logical(0:1),
+            size = 1, replace = FALSE)
     } else {
         stop("Got unknown dataset name.")
     }
diff --git a/NNSDR/R/metric_subspace.R b/NNSDR/R/metric_subspace.R
new file mode 100644
index 0000000..4eaa46f
--- /dev/null
+++ b/NNSDR/R/metric_subspace.R
@@ -0,0 +1,58 @@
+#' @export
+metric.subspace <- function(B_true,
+    X = NULL, Y = NULL,
+    type = c("Refinement", "OPG"),
+    name = "metric.subspace",
+    normalize = FALSE
+) {
+    type <- match.arg(type)
+
+    if (!is.matrix(B_true))
+        B_true <- as.matrix(B_true)
+    P_true <- B_true %*% solve(crossprod(B_true), t(B_true))
+    P_true <- tf$constant(P_true, dtype = 'float32')
+
+    if (normalize) {
+        rankSum <- 2 * ncol(B_true)
+        c <- 1 / sqrt(min(rankSum, 2 * nrow(B_true) - rankSum))
+    } else {
+        c <- sqrt(2)
+    }
+    c <- tf$constant(c, dtype = 'float32')
+
+    if (type == "Refinement") {
+        structure(function(model) {
+                B <- model$get_layer('reduction')$weights
+                function(y_true, y_pred) {
+                    P <- tf$linalg$matmul(B, B, transpose_b = TRUE)
+                    diff <- P_true - P
+                    c * tf$sqrt(tf$reduce_sum(tf$math$multiply(diff, diff)))
+                }
+            },
+            class = c("nnsdr.metric", "Refinement"),
+            name = name
+        )
+    } else {
+        X <- tf$cast(X, dtype = 'float32')
+        begin <- tf$cast(c(0, nrow(B_true) - ncol(B_true) - 1), dtype = 'int32')
+        size <- tf$cast(c(nrow(B_true), ncol(B_true)), dtype = 'int32')
+        structure(function(model) {
+                function(y_true, y_pred) {
+                    with(tf$GradientTape() %as% tape, {
+                        tape$watch(X)
+                        out <- model(X)
+                    })
+                    G <- tape$gradient(out, X)
+                    B <- tf$linalg$eigh(tf$linalg$matmul(G, G, transpose_a = TRUE))
+                    B <- tf$linalg$qr(tf$slice(B[[2]], begin, size))$q
+
+                    P <- tf$linalg$matmul(B, B, transpose_b = TRUE)
+                    diff <- P_true - P
+                    c * tf$sqrt(tf$reduce_sum(tf$math$multiply(diff, diff)))
+                }
+            },
+            class = c("nnsdr.metric", "OPG"),
+            name = name
+        )
+    }
+}
diff --git a/NNSDR/R/nnsdr_class.R b/NNSDR/R/nnsdr_class.R
index fabf7f1..782b042 100644
--- a/NNSDR/R/nnsdr_class.R
+++ b/NNSDR/R/nnsdr_class.R
@@ -72,7 +72,7 @@ build.MLP <- function(input_shapes, d, name, add_reduction,
 
     if (!is.null(metrics)) {
         metrics <- as.list(metrics)
-        for (i in seq_along(metrics)) {
+        for (i in rev(seq_along(metrics))) {
             metric <- metrics[[i]]
             if (all(c("nnsdr.metric", name) %in% class(metric))) {
                 metric_fn <- reticulate::py_func(metric(mlp))
@@ -275,6 +275,30 @@ nnsdr <- setRefClass('nnsdr',
                 as.array(output)
             }
         },
+        evaluate = function(inputs, output) {
+            if (is.list(inputs)) {
+                inputs <- Map(tf$cast, as.list(inputs), dtype = 'float32')
+            } else {
+                inputs <- list(tf$cast(inputs, dtype = 'float32'))
+            }
+            eval.opg <- .self$nn.opg$evaluate(inputs, output,
+                return_dict = TRUE, verbose = 0L)
+
+            if (is.null(.self$history.ref))
+                return(data.frame(eval.opg, row.names = "OPG"))
+
+            eval.ref <- .self$nn.ref$evaluate(inputs, output,
+                return_dict = TRUE, verbose = 0L)
+
+            # Convert to data.frame
+            eval.opg <- data.frame(eval.opg, row.names = "OPG")
+            eval.ref <- data.frame(eval.ref, row.names = "Refinement")
+            # Augment mutualy exclusive columns
+            eval.opg[setdiff(names(eval.ref), names(eval.opg))] <- NA
+            eval.ref[setdiff(names(eval.opg), names(eval.ref))] <- NA
+            # Combine/Bind
+            rbind(eval.opg, eval.ref)
+        },
         coef = function(type = c('Refinement', 'OPG')) {
             type <- match.arg(type)
             if (type == 'Refinement') {
diff --git a/real_data/MNIST.R b/real_data/MNIST.R
new file mode 100644
index 0000000..88d37cf
--- /dev/null
+++ b/real_data/MNIST.R
@@ -0,0 +1,163 @@
+library(keras)
+
+num_classes <- 10L
+epochs <- 20L
+batch_size <- 128L
+
+################################################################################
+###                     Loading & Prepair MNIST dataset                      ###
+################################################################################
+
+c(c(x_train, y_train), c(x_test, y_test)) %<-% dataset_mnist()
+
+x_train <- array_reshape(x_train, c(nrow(x_train), prod(dim(x_train)[-1])))
+x_test  <- array_reshape(x_test , c(nrow(x_test ), prod(dim(x_test )[-1])))
+
+# x_train <- x_train / 255
+# x_test  <- x_test  / 255
+center <- apply(x_train, 2, mean)
+x_train <- (x_train - center) / 128
+x_test  <- (x_test  - center) / 128
+
+y_train <- to_categorical(y_train, num_classes)
+y_test  <- to_categorical(y_test , num_classes)
+
+################################################################################
+###                              Model Creation                              ###
+################################################################################
+
+model <- keras_model_sequential(name = 'base_model')
+model %>%
+    layer_dense(units = 256L, activation = 'relu',
+                input_shape = ncol(x_train)) %>%
+    layer_dropout(rate = 0.4) %>%
+    layer_dense(units = 128L, activation = 'relu') %>%
+    layer_dropout(rate = 0.3) %>%
+    layer_dense(units = num_classes, activation = 'softmax')
+
+summary(model)
+
+model %>% compile(
+    loss = 'categorical_crossentropy',
+    optimizer = 'RMSProp',
+    metrics = c('accuracy')
+)
+
+################################################################################
+###                           Base Model Training                            ###
+################################################################################
+
+history.base <- model %>% fit(
+    x_train, y_train,
+    batch_size = batch_size,
+    epochs = epochs,
+    verbose = 1L,
+    validation_split = 0.1
+)
+
+plot(history.base)
+
+score <- model %>% evaluate(x_test, y_test, verbose = 0L)
+
+cat('Test loss:     ', score[['loss']],     '\n',
+    'Test accuracy: ', score[['accuracy']], '\n', sep = '')
+
+################################################################################
+###                           OPG Data Reduction                            ###
+################################################################################
+library(tensorflow)
+library(ggplot2)
+
+G <- local({
+    X = tf$cast(x_train, 'float32')
+    with(tf$GradientTape() %as% tape, {
+        tape$watch(X)
+        Y <- model(X)
+    })
+    as.matrix(tape$gradient(Y, X))
+})
+eig <- eigen(var(G), symmetric = TRUE)
+B.opg <- eig$vectors[, 1:2]
+
+# ggplot(data.frame(values = eig$values[1:25]), aes(x = seq_along(values), y = values)) +
+#     geom_line()
+
+ggplot(data.frame(x_test %*% B.opg, y = factor(apply(y_test, 1, which.max))),
+       aes(x = X1, y = X2, group = y, color = y)) +
+    geom_point()
+
+################################################################################
+###                             Refinement Model                             ###
+################################################################################
+weights <- model$get_weights()
+
+model.ref <- keras_model_sequential(name = 'Refinement')
+model.ref %>%
+    layer_dense(units = ncol(B.opg), activation = 'relu',
+                input_shape = ncol(x_train), use_bias = FALSE,
+                weights = list(B.opg)) %>%
+    layer_dense(units = 256L, activation = 'relu',
+                weights = list(crossprod(B.opg, weights[[1]]), weights[[2]])) %>%
+    layer_dropout(rate = 0.4) %>%
+    layer_dense(units = 128L, activation = 'relu',
+                weights = weights[3:4]) %>%
+    layer_dropout(rate = 0.3) %>%
+    layer_dense(units = num_classes, activation = 'softmax',
+                weights = weights[5:6])
+
+summary(model.ref)
+
+model.ref %>% compile(
+    loss = 'categorical_crossentropy',
+    optimizer = 'RMSProp',
+    metrics = c('accuracy')
+)
+
+################################################################################
+###                        Refinement Model Training                         ###
+################################################################################
+
+history.ref <- model.ref %>% fit(
+    x_train, y_train,
+    batch_size = batch_size,
+    epochs = epochs,
+    verbose = 1L,
+    validation_split = 0.1
+)
+
+plot(history.ref)
+
+score <- model.ref %>% evaluate(x_test, y_test, verbose = 0L)
+
+cat('Test loss:     ', score[['loss']],     '\n',
+    'Test accuracy: ', score[['accuracy']], '\n', sep = '')
+
+### Combine Histories
+hist <- structure(list(
+    params = list(
+        verbose = min(history.base$params$verbose, history.ref$params$verbose),
+        epochs = history.base$params$epochs + history.ref$params$epochs,
+        steps = max(history.base$params$steps, history.ref$params$steps)
+    ),
+    metrics = lapply(structure(names(history.base$metrics), names = names(history.base$metrics)),
+        function(name) c(history.base$metrics[[name]], history.ref$metrics[[name]]))
+), class = "keras_training_history")
+
+plot(hist, smooth = FALSE)
+
+
+B.ref <- model.ref$get_weights()[[1]]
+
+ggplot(data.frame(x_test %*% B.ref, y = factor(apply(y_test, 1, which.max))),
+       aes(x = X1, y = X2, group = y, color = y)) +
+    geom_point()
+
+
+B.pca <- eigen(var(x_train), symmetric = TRUE)$vectors[, 1:2]
+ggplot(data.frame(x_test %*% B.pca, y = factor(apply(y_test, 1, which.max))),
+       aes(x = X1, y = X2, group = y, color = y)) +
+    geom_point()
+
+image.ref <- matrix(((B.ref - min(B.ref)) / abs(diff(range(B.ref))))[, 2], 28, 28)
+plot(c(0, 28), c(0, 28), type = "n", xlab = "", ylab = "")
+rasterImage(image.ref, 0, 0, 28, 28, interpolate = TRUE)
diff --git a/simulations/edit_sim_data.R b/simulations/edit_sim_data.R
new file mode 100644
index 0000000..e6c69e2
--- /dev/null
+++ b/simulations/edit_sim_data.R
@@ -0,0 +1,92 @@
+library(ggplot2)
+
+################################################################################
+###                             General Helpers                              ###
+################################################################################
+
+# Read/Combine simulation data logs
+read.logs <- function(pattern) {
+    # Folder containing log files
+    path <- if (file.exists('results/')) './' else './simulations/'
+    path <- paste0(path, 'results/')
+    # Read all log files and augment with meta-parameters
+    file.names <- list.files(path, pattern, full.names = TRUE)
+    sim <- do.call(rbind, lapply(file.names, function(path) {
+        # Read simulation log (one file)
+        sim <- read.csv(path, comment.char = '#')
+        # Add simulation arguments as columns
+        args <- Filter(function(line) startsWith(line, '#'), readLines(path))
+        args <- sub('# ?', '', args)
+        args <- regmatches(args, regexpr(' ', args), invert = TRUE)
+        # Try to convert meta-parameters from string into int/num/bool/...
+        for (arg in args) {
+            val <- tryCatch(
+                eval(parse(text = arg[2])),
+                error = function(err) arg[2]
+            )
+            if (length(val) > 1) val <- paste(val, collapse = ',')
+            sim[[arg[1]]] <- val
+        }
+        sim
+    }))
+    # Convert methods to factors
+    sim$method <- factor(sim$method,
+        levels = c("opg", "mave", "cve", "sir", "save", "phdy", "nn.opg", "nn.ref"),
+        labels = c("OPG", "MAVE", "CVE", "SIR", "SAVE", "PHD",  "NN-OPG", "NN-Ref")
+    )
+    sim
+}
+
+
+################################################################################
+###                                 Bit Data                                 ###
+################################################################################
+
+# Read/Combine big data simulation logs
+sim <- read.logs('sim_big_.*csv')
+
+# Compute repetition mean and standard deviation over replications
+(aggr <- merge(
+    aggregate(dist.subspace ~ dataset + n + p + method, sim, mean),
+    aggregate(dist.subspace ~ dataset + n + p + method, sim, sd),
+    by = c("dataset", "n", "p", "method"),
+    suffixes = c(".mean", ".sd")
+))
+
+# plots and tables
+ggplot(aggr, aes(x = n, y = dist.subspace.mean,
+                 group = interaction(dataset, method),
+                 color = method, linetype = dataset)) +
+    geom_line() +
+    geom_errorbar(aes(
+        ymin = dist.subspace.mean - dist.subspace.sd,
+        ymax = dist.subspace.mean + dist.subspace.sd
+    ), width = 0.2) +
+    scale_x_continuous(trans = 'log2')
+
+################################################################################
+###                             Binary Response                              ###
+################################################################################
+
+# Read/Combine binary data simulation logs
+sim <- read.logs('sim_binary_[0-9_]*\\.csv')
+
+# Aggregated Tables
+aggr.formula <- cbind(dist.subspace, accuracy) ~ dataset + method
+aggr <- merge(
+    aggregate(aggr.formula, sim, mean,
+              na.action = na.pass),
+    aggregate(aggr.formula, sim, sd,
+              na.action = na.pass),
+    by = attr(terms(aggr.formula), "term.labels"),
+    suffixes = c(".mean", ".sd")
+)
+print(aggr[with(aggr, order(dataset, dist.subspace.mean)), ], digits = 3)
+
+
+# box-plot subspace comparison
+ggplot(sim, aes(x = method, y = dist.subspace,
+                group = interaction(dataset, method),
+                color = dataset)) +
+    geom_boxplot() +
+    labs(title = "Sim. Binary", x = "Methods", y = "Subspace Dist.", color = "Datasets")
diff --git a/simulations/simulations.R b/simulations/simulations.R
index 4c595bf..4cc1477 100644
--- a/simulations/simulations.R
+++ b/simulations/simulations.R
@@ -11,7 +11,7 @@ suppressPackageStartupMessages({
 args <- parse.args(defaults = list(
     # Simulation configuration
     reps = 100,             # Number of replications
-    dataset = '1',          # Name (number) of the data set
+    dataset = 'M1',         # Name (number) of the data set
     # Neuronal Net. structure/definitions
     hidden_units = 512L,
     activation = 'relu',
@@ -24,7 +24,8 @@ args <- parse.args(defaults = list(
 ))
 
 ## Generate reference data (gets re-sampled for each replication)
-ds <- dataset(args$dataset) # Generates a list with `X`, `Y`, `B` and `name`
+# Generates a list with `X`, `Y`, `B` and `name`
+ds <- dataset(args$dataset, n = 100)
 
 ## Build Dimension Reduction Neuronal Network model (matching the data)
 nn <- nnsdr$new(
diff --git a/simulations/simulations_bigdata.R b/simulations/simulations_bigdata.R
index 757c1ce..9a6070c 100644
--- a/simulations/simulations_bigdata.R
+++ b/simulations/simulations_bigdata.R
@@ -11,7 +11,7 @@ suppressPackageStartupMessages({
 args <- parse.args(defaults = list(
     # Simulation configuration
     reps = 10L,             # Number of replications
-    dataset = '6',          # Name (number) of the data set
+    dataset = 'M6',          # Name (number) of the data set
     # Sets if reference methods shall be evaluated
     run_mave = TRUE,
     run_cve = TRUE,
@@ -34,6 +34,8 @@ args <- parse.args(defaults = list(
 # Number of observations are irrelevant for the reference to generate a matching
 # `NNSDR` estimator
 ds <- dataset(args$dataset, n = 100L, p = args$p) # Generates a list with `X`, `Y`, `B` and `name`
+# normalize dataset name (before written to the log/results file)
+args$dataset <- ds$name
 
 ## Build Dimension Reduction Neuronal Network model (matching the data)
 nn <- nnsdr$new(
diff --git a/simulations/simulations_bigdata.sh b/simulations/simulations_bigdata.sh
index d0ffe07..0107f2d 100644
--- a/simulations/simulations_bigdata.sh
+++ b/simulations/simulations_bigdata.sh
@@ -10,24 +10,24 @@ user_interupt() {
     exit
 }
 
-# Simulation for big data with `p` proportional to `sqrt(n)`
-for ds in 6 8; do
-    command="Rscript simulations_bigdata.R --reps=10 --run_mave=FALSE --run_cve=FALSE --dataset=$ds --n=1000 --p=32 --epochs=200,400"
-    echo -e "\n$command"
-    time eval "$command"
-    command="Rscript simulations_bigdata.R --reps=10 --run_mave=FALSE --run_cve=FALSE --dataset=$ds --n=4000 --p=63 --epochs=100,200"
-    echo -e "\n$command"
-    time eval "$command"
-    command="Rscript simulations_bigdata.R --reps=10 --run_mave=FALSE --run_cve=FALSE --dataset=$ds --n=16000 --p=126 --epochs=50,100"
-    echo -e "\n$command"
-    time eval "$command"
-    command="Rscript simulations_bigdata.R --reps=10 --run_mave=FALSE --run_cve=FALSE --dataset=$ds --n=64000 --p=253 --epochs=25,50"
-    echo -e "\n$command"
-    time eval "$command"
-    command="Rscript simulations_bigdata.R --reps=10 --run_mave=FALSE --run_cve=FALSE --dataset=$ds --n=256000 --p=506 --epochs=12,25"
-    echo -e "\n$command"
-    time eval "$command"
-done
+# # Simulation for big data with `p` proportional to `sqrt(n)`
+# for ds in 6 8; do
+#     command="Rscript simulations_bigdata.R --reps=10 --run_mave=FALSE --run_cve=FALSE --dataset=$ds --n=1000 --p=32 --epochs=200,400"
+#     echo -e "\n$command"
+#     time eval "$command"
+#     command="Rscript simulations_bigdata.R --reps=10 --run_mave=FALSE --run_cve=FALSE --dataset=$ds --n=4000 --p=63 --epochs=100,200"
+#     echo -e "\n$command"
+#     time eval "$command"
+#     command="Rscript simulations_bigdata.R --reps=10 --run_mave=FALSE --run_cve=FALSE --dataset=$ds --n=16000 --p=126 --epochs=50,100"
+#     echo -e "\n$command"
+#     time eval "$command"
+#     command="Rscript simulations_bigdata.R --reps=10 --run_mave=FALSE --run_cve=FALSE --dataset=$ds --n=64000 --p=253 --epochs=25,50"
+#     echo -e "\n$command"
+#     time eval "$command"
+#     command="Rscript simulations_bigdata.R --reps=10 --run_mave=FALSE --run_cve=FALSE --dataset=$ds --n=256000 --p=506 --epochs=12,25"
+#     echo -e "\n$command"
+#     time eval "$command"
+# done
 
 # Simulation for big data with `p` proportional to `n` (note: for the base case
 # of `n = 1000`, `p = 32` see above)
diff --git a/simulations/simulations_binary.R b/simulations/simulations_binary.R
index 4a27c3f..a90f536 100644
--- a/simulations/simulations_binary.R
+++ b/simulations/simulations_binary.R
@@ -1,9 +1,8 @@
 #!/usr/bin/env Rscript
 
-library(MAVE)
-library(CVarE)
 Sys.setenv(TF_CPP_MIN_LOG_LEVEL = "3") # Suppress `tensorflow` notes/warnings
 suppressPackageStartupMessages({
+    library(dr)
     library(NNSDR)
 })
 
@@ -11,97 +10,21 @@ suppressPackageStartupMessages({
 args <- parse.args(defaults = list(
     # Simulation configuration
     reps = 100,             # Number of replications
-    dataset = 'B2',         # Name ('B' for Binary) of the data set
+    dataset = 'B1',         # Name ('B' for Binary) of the data set
     # Neuronal Net. structure/definitions
     hidden_units = 512L,
     activation = 'relu',
     trainable_reduction = TRUE,
     # Neuronal Net. training
-    epochs = c(100L, 200L), # Number of training epochs for (`OPG`, Refinement)
+    epochs = c(3L, 5L), # Number of training epochs for (`OPG`, Refinement)
     batch_size = 32L,
     initializer = 'fromOPG',
     seed = 956294L
 ))
 
-################################################################################
-
-dataset <- function(name = "M1", n = NULL, p = 20, sd = 0.5, ...) {
-    name <- toupper(name)
-    if (nchar(name) == 1) { name <- paste0("M", name) }
-
-    if (name == "B1") {
-        if (missing(n)) { n <- 400 }
-        if (missing(sd)) { sd <- 1 }
-        eps <- sqrt(.Machine$double.eps)
-
-        B <- diag(1, p, 2)
-        Z <- matrix(rnorm(2 * n, 0, sd), n, 2)
-        Y <- sample(c(FALSE, TRUE), n, replace = TRUE)
-        theta <- rnorm(n, Y * pi, 0.25 * pi)
-        X <- cbind(
-            10 * (cos(theta) + 0.75 * (2 * Y - 1)) + Z[, 1],
-            10 * sin(theta) + Z[, 2],
-            matrix(rnorm(n * (p - 2), 0, sd), n)
-        )
-    } else if (name == "B2") {
-        if (missing(n)) { n <- 400 }
-        if (missing(sd)) { sd <- 0.2 }
-        eps <- sqrt(.Machine$double.eps)
-
-        B <- diag(1, p, 2)
-        X <- matrix(runif(n * p, -2, 2), n, p)
-        XB <- X %*% B
-        Y <- (sin(XB[, 1]) / (XB[, 2]^2 + eps) + rnorm(n, 0, sd)) > 0
-    # } else if (name == "B2") {
-    #     if (missing(n)) { n <- 400 }
-    #     if (missing(sd)) { sd <- 0.2 }
-    #     eps <- sqrt(.Machine$double.eps)
-
-    #     B <- diag(1, p, 2)
-    #     X <- matrix(runif(n * p, -2, 2), n, p)
-    #     XB <- X %*% B
-    #     Y <- (sin(XB[, 1]) / (XB[, 2]^2 + eps) + rnorm(n, 0, sd)) > 0
-    } else {
-        stop("Got unknown dataset name.")
-    }
-
-    return(list(X = X, Y = as.matrix(Y), B = B, name = name))
-}
-
 ## Generate reference data (gets re-sampled for each replication)
-ds <- dataset(args$dataset, n = 100) # Generates a list with `X`, `Y`, `B` and `name`
-# plot(ds$X %*% ds$B, col = 2 * (ds$Y + 1))
-
-################################################################################
-
-metric.subspace <- function(B_true, name = "metric.subspace", normalize = FALSE) {
-    if (!is.matrix(B_true))
-        B_true <- as.matrix(B_true)
-    P_true <- B_true %*% solve(crossprod(B_true), t(B_true))
-    P_true <- tf$constant(P_true, dtype = 'float32')
-
-    if (normalize) {
-        rankSum <- 2 * ncol(B_true)
-        c <- 1 / sqrt(min(rankSum, 2 * nrow(B_true) - rankSum))
-    } else {
-        c <- sqrt(2)
-    }
-    c <- tf$constant(c, dtype = 'float32')
-
-    structure(function(model) {
-            B <- model$get_layer('reduction')$weights
-            function(y_true, y_pred) {
-                P <- tf$linalg$matmul(B, B, transpose_b = TRUE)
-                diff <- P_true - P
-                c * tf$sqrt(tf$reduce_sum(tf$math$multiply(diff, diff)))
-            }
-        },
-        class = c("nnsdr.metric", "Refinement"),
-        name = name
-    )
-}
-
-ds <- dataset(args$dataset)
+# Generates a list with `X`, `Y`, `B` and `name`
+ds <- dataset(args$dataset, n = 1000)
 
 ## Build Dimension Reduction Neuronal Network model (matching the data)
 nn <- nnsdr$new(
@@ -112,36 +35,62 @@ nn <- nnsdr$new(
     trainable_reduction = args$trainable_reduction,
     output_activation = 'sigmoid',
     loss = 'binary_crossentropy',
-    metrics = list('accuracy', metric.subspace(ds$B, normalize = TRUE))
     # metrics = list('accuracy')
+    metrics = list(
+        'accuracy',
+        metric.subspace(ds$B, ds$X, ds$Y, type = "OPG", normalize = TRUE),
+        metric.subspace(ds$B, type = "Refinement", normalize = TRUE)
+    )
 )
 
-with(ds, {
-    ## Sample test dataset
-    ds.test <- dataset(ds$name, n = 1000)
+## Open simulation log file, write simulation configuration and header
+log <- file(format(Sys.time(), "results/sim_binary_%Y%m%d_%H%M.csv"), "w", blocking = FALSE)
+cat(paste('#', names(args), args, sep = ' ', collapse = '\n'), '\n',
+    'method,replication,dist.subspace,dist.grassmann,accuracy\n',
+    sep = '', file = log, append = TRUE)
 
+## Set seed for sampling simulation data (NOT effecting the `NN` initialization)
+set.seed(args$seed)
+
+## Repeated simulation runs
+for (rep in seq_len(args$reps)) {
+    ## Re-sample seeded data for each simulation replication
+    with(dataset(ds$name), {
+        ## Sample test dataset
+        ds.test <- dataset(ds$name, n = 1000)
+
+        ## Starting with the reference methods `SIR`, `SAVE` and `PHD`
+        for (method in c("sir", "save", "phdy")) {
+            fit <- dr(Y ~ X, method = method)
+            d.sub <- dist.subspace(B, dr.basis(fit, ncol(B)), normalize = TRUE)
+            d.gra <- dist.grassmann(B, dr.basis(fit, ncol(B)))
+            accuracy <- NA
+            cat('"', method, '",', rep, ',', d.sub, ',', d.gra, ',', accuracy,
+                '\n', sep = '', file = log, append = TRUE)
+        }
+
+        ## Fit `NNSDR` model
+        nn$fit(X, Y, epochs = args$epochs,
+            batch_size = args$batch_size, initializer = args$initializer)
+        # Model evaluation (with metrics)
+        eval <- nn$evaluate(ds.test$X, ds.test$Y)
+        # `OPG` estimate
+        d.sub <- dist.subspace(B, coef(nn, 'OPG'), normalize = TRUE)
+        d.gra <- dist.grassmann(B, coef(nn, 'OPG'))
+        accuracy <- eval[["OPG", "accuracy"]]
+        cat('"nn.opg",', rep, ',', d.sub, ',', d.gra, ',', accuracy,
+            '\n', sep = '', file = log, append = TRUE)
+        # Refinement estimate
+        d.sub <- dist.subspace(B, coef(nn), normalize = TRUE)
+        d.gra <- dist.grassmann(B, coef(nn))
+        accuracy <- eval[["Refinement", "accuracy"]]
+        cat('"nn.ref",', rep, ',', d.sub, ',', d.gra, ',', accuracy,
+            '\n', sep = '', file = log, append = TRUE)
+    })
+
+    ## Reset model
     nn$reset()
-    nn$fit(X, Y, epochs = args$epochs,
-           batch_size = args$batch_size, initializer = args$initializer,
-           verbose = 2L)
+}
 
-    # `OPG` estimate
-    cat("OPG subspace: ", dist.subspace(B, coef(nn, 'OPG'), normalize = TRUE), '\n')
-    cat("OPG grassmann:", dist.grassmann(B, coef(nn, 'OPG')), '\n')
-    # Refinement estimate
-    cat("Ref subspace: ", dist.subspace(B, coef(nn), normalize = TRUE), '\n')
-    cat("Ref grassmann:", dist.grassmann(B, coef(nn)), '\n')
-    # MSE
-    cat("MSE: ", mean((nn$predict(ds.test$X) - ds.test$Y)^2), '\n')
-})
-
-library(ggplot2)
-
-ggplot(nn$history, aes(x = epoch)) +
-    geom_line(aes(y = loss), col = 'red') +
-    geom_line(aes(y = accuracy), col = 'blue')
-
-with(dataset('B2', n = 400), {
-    ggplot(data.frame(XB1 = (X %*% B)[, 1], XB2 = (X %*% B)[, 2], Y = Y)) +
-        geom_point(aes(x = XB1, y = XB2, col = Y))
-})
+## Finished, close simulation log file
+close(log)
diff --git a/simulations/simulations_binary.sh b/simulations/simulations_binary.sh
new file mode 100644
index 0000000..494e6e9
--- /dev/null
+++ b/simulations/simulations_binary.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+# Catch termination signal `SIGINT` and invoke `user_interupt`
+trap user_interupt SIGINT
+
+# Reports an user interupt and exits the simulation script (do not continue next
+# statement, allows to exit bash loop with `^C`)
+user_interupt() {
+    echo -e '\nUser Interrupt -> stopping simulation\n'
+    exit
+}
+
+command="Rscript simulations_binary.R --reps=100 --dataset=B1 --hidden_units=1024 --epochs=5,10"
+echo -e "\n$command"
+time eval "$command"
+command="Rscript simulations_binary.R --reps=100 --dataset=B2"
+echo -e "\n$command"
+time eval "$command"
+command="Rscript simulations_binary.R --reps=100 --dataset=B3 --hidden_units=1024,128,32 --epochs=10,200"
+echo -e "\n$command"
+time eval "$command"
+command="Rscript simulations_binary.R --reps=100 --dataset=B4 --hidden_units=1024,128,32 --epochs=10,200"
+echo -e "\n$command"
+time eval "$command"