Browse Source

add: custom metrics support and metric.subspace,

wip: new binary response datasets,
add: nn$evaluate,
add: simulation_binary
master
Daniel Kapla 8 months ago
parent
commit
a673b50c7a
11 changed files with 494 additions and 132 deletions
  1. +1
    -0
      NNSDR/NAMESPACE
  2. +48
    -0
      NNSDR/R/datasets.R
  3. +58
    -0
      NNSDR/R/metric_subspace.R
  4. +25
    -1
      NNSDR/R/nnsdr_class.R
  5. +163
    -0
      real_data/MNIST.R
  6. +92
    -0
      simulations/edit_sim_data.R
  7. +3
    -2
      simulations/simulations.R
  8. +3
    -1
      simulations/simulations_bigdata.R
  9. +18
    -18
      simulations/simulations_bigdata.sh
  10. +59
    -110
      simulations/simulations_binary.R
  11. +24
    -0
      simulations/simulations_binary.sh

+ 1
- 0
NNSDR/NAMESPACE View File

@@ -7,6 +7,7 @@ export(dist.grassmann)
export(dist.mave)
export(dist.subspace)
export(get.script)
export(metric.subspace)
export(nnsdr)
export(parse.args)
export(reinitialize_weights)

+ 48
- 0
NNSDR/R/datasets.R View File

@@ -255,6 +255,54 @@ dataset <- function(name = "M1", n = NULL, p = 20, sd = 0.5, ...) {
XB <- X %*% B
Y <- 10 * sin(pi * XB[, 1] * XB[, 2]) + 20 * (XB[, 3] - 0.5)^2 + 5 * XB[, 4] + rnorm(n, sd = sd)
Y <- as.matrix(Y)
} else if (name == "B1") {
if (missing(n)) { n <- 400 }

B <- diag(1, p, 1)
X <- matrix(runif(n * p, -2, 2), n, p)
Y <- (X %*% B + rnorm(n, 0, sd)) > 0
} else if (name == "B2") {
if (missing(n)) { n <- 400 }

B <- diag(1, p, 2)
Y <- as.matrix(sample(as.logical(0:1), n, replace = TRUE))
X <- matrix(rnorm(n * p, sd = sd), n, p)
x1 <- sample(c(-1, 1), n, replace = TRUE)
x2 <- x1 * c(-1, 1)[Y + 1]
X[, 1] <- X[, 1] + x1
X[, 2] <- X[, 2] + x2
} else if (name == "B3") {
if (missing(n)) { n <- 1600 }
if (missing(sd)) { sd <- 0.1 }

B <- diag(1, p, 3)
Y <- sample(as.logical(0:1), n, replace = TRUE)
x <- seq(0, 6 * pi, length.out = n)
X <- matrix(rnorm(n * p, sd = sd), n, p)
X[, 1] <- X[, 1] + 0.5 * sin(x + pi * Y)
X[, 2] <- X[, 2] + 0.5 * cos(x + pi * Y)
X[, 3] <- X[, 3] + (x - 3 * pi) / (3 * pi)
} else if (name == "B4") {
# Let
# X ~ N_p(0, I_p)
# then,
# Y | X ~ Binom(sin(atan2(Z_1, Z_2) + Z_3)^2)
# or in other words
# E(Y | X) = sin(atan2(Z_1, Z_2) + Z_3)^2

if (missing(n)) { n <- 1600 }

B <- diag(1, p, 3)
X <- matrix(rnorm(n * p, sd = sd), n, p)

# Angle and Height for each X with respect to the spiral manifold
XB <- X %*% B
phi <- atan2(XB[, 1], XB[, 2])
h <- XB[, 3]

prob <- sin(phi + h)^2
Y <- apply(cbind(prob, 1 - prob), 1, sample, x = as.logical(0:1),
size = 1, replace = FALSE)
} else {
stop("Got unknown dataset name.")
}

+ 58
- 0
NNSDR/R/metric_subspace.R View File

@@ -0,0 +1,58 @@
#' @export
metric.subspace <- function(B_true,
X = NULL, Y = NULL,
type = c("Refinement", "OPG"),
name = "metric.subspace",
normalize = FALSE
) {
type <- match.arg(type)

if (!is.matrix(B_true))
B_true <- as.matrix(B_true)
P_true <- B_true %*% solve(crossprod(B_true), t(B_true))
P_true <- tf$constant(P_true, dtype = 'float32')

if (normalize) {
rankSum <- 2 * ncol(B_true)
c <- 1 / sqrt(min(rankSum, 2 * nrow(B_true) - rankSum))
} else {
c <- sqrt(2)
}
c <- tf$constant(c, dtype = 'float32')

if (type == "Refinement") {
structure(function(model) {
B <- model$get_layer('reduction')$weights
function(y_true, y_pred) {
P <- tf$linalg$matmul(B, B, transpose_b = TRUE)
diff <- P_true - P
c * tf$sqrt(tf$reduce_sum(tf$math$multiply(diff, diff)))
}
},
class = c("nnsdr.metric", "Refinement"),
name = name
)
} else {
X <- tf$cast(X, dtype = 'float32')
begin <- tf$cast(c(0, nrow(B_true) - ncol(B_true) - 1), dtype = 'int32')
size <- tf$cast(c(nrow(B_true), ncol(B_true)), dtype = 'int32')
structure(function(model) {
function(y_true, y_pred) {
with(tf$GradientTape() %as% tape, {
tape$watch(X)
out <- model(X)
})
G <- tape$gradient(out, X)
B <- tf$linalg$eigh(tf$linalg$matmul(G, G, transpose_a = TRUE))
B <- tf$linalg$qr(tf$slice(B[[2]], begin, size))$q

P <- tf$linalg$matmul(B, B, transpose_b = TRUE)
diff <- P_true - P
c * tf$sqrt(tf$reduce_sum(tf$math$multiply(diff, diff)))
}
},
class = c("nnsdr.metric", "OPG"),
name = name
)
}
}

+ 25
- 1
NNSDR/R/nnsdr_class.R View File

@@ -72,7 +72,7 @@ build.MLP <- function(input_shapes, d, name, add_reduction,

if (!is.null(metrics)) {
metrics <- as.list(metrics)
for (i in seq_along(metrics)) {
for (i in rev(seq_along(metrics))) {
metric <- metrics[[i]]
if (all(c("nnsdr.metric", name) %in% class(metric))) {
metric_fn <- reticulate::py_func(metric(mlp))
@@ -275,6 +275,30 @@ nnsdr <- setRefClass('nnsdr',
as.array(output)
}
},
evaluate = function(inputs, output) {
if (is.list(inputs)) {
inputs <- Map(tf$cast, as.list(inputs), dtype = 'float32')
} else {
inputs <- list(tf$cast(inputs, dtype = 'float32'))
}
eval.opg <- .self$nn.opg$evaluate(inputs, output,
return_dict = TRUE, verbose = 0L)

if (is.null(.self$history.ref))
return(data.frame(eval.opg, row.names = "OPG"))

eval.ref <- .self$nn.ref$evaluate(inputs, output,
return_dict = TRUE, verbose = 0L)

# Convert to data.frame
eval.opg <- data.frame(eval.opg, row.names = "OPG")
eval.ref <- data.frame(eval.ref, row.names = "Refinement")
# Augment mutualy exclusive columns
eval.opg[setdiff(names(eval.ref), names(eval.opg))] <- NA
eval.ref[setdiff(names(eval.opg), names(eval.ref))] <- NA
# Combine/Bind
rbind(eval.opg, eval.ref)
},
coef = function(type = c('Refinement', 'OPG')) {
type <- match.arg(type)
if (type == 'Refinement') {

+ 163
- 0
real_data/MNIST.R View File

@@ -0,0 +1,163 @@
library(keras)

num_classes <- 10L
epochs <- 20L
batch_size <- 128L

################################################################################
### Loading & Prepair MNIST dataset ###
################################################################################

c(c(x_train, y_train), c(x_test, y_test)) %<-% dataset_mnist()

x_train <- array_reshape(x_train, c(nrow(x_train), prod(dim(x_train)[-1])))
x_test <- array_reshape(x_test , c(nrow(x_test ), prod(dim(x_test )[-1])))

# x_train <- x_train / 255
# x_test <- x_test / 255
center <- apply(x_train, 2, mean)
x_train <- (x_train - center) / 128
x_test <- (x_test - center) / 128

y_train <- to_categorical(y_train, num_classes)
y_test <- to_categorical(y_test , num_classes)

################################################################################
### Model Creation ###
################################################################################

model <- keras_model_sequential(name = 'base_model')
model %>%
layer_dense(units = 256L, activation = 'relu',
input_shape = ncol(x_train)) %>%
layer_dropout(rate = 0.4) %>%
layer_dense(units = 128L, activation = 'relu') %>%
layer_dropout(rate = 0.3) %>%
layer_dense(units = num_classes, activation = 'softmax')

summary(model)

model %>% compile(
loss = 'categorical_crossentropy',
optimizer = 'RMSProp',
metrics = c('accuracy')
)

################################################################################
### Base Model Training ###
################################################################################

history.base <- model %>% fit(
x_train, y_train,
batch_size = batch_size,
epochs = epochs,
verbose = 1L,
validation_split = 0.1
)

plot(history.base)

score <- model %>% evaluate(x_test, y_test, verbose = 0L)

cat('Test loss: ', score[['loss']], '\n',
'Test accuracy: ', score[['accuracy']], '\n', sep = '')

################################################################################
### OPG Data Reduction ###
################################################################################
library(tensorflow)
library(ggplot2)

G <- local({
X = tf$cast(x_train, 'float32')
with(tf$GradientTape() %as% tape, {
tape$watch(X)
Y <- model(X)
})
as.matrix(tape$gradient(Y, X))
})
eig <- eigen(var(G), symmetric = TRUE)
B.opg <- eig$vectors[, 1:2]

# ggplot(data.frame(values = eig$values[1:25]), aes(x = seq_along(values), y = values)) +
# geom_line()

ggplot(data.frame(x_test %*% B.opg, y = factor(apply(y_test, 1, which.max))),
aes(x = X1, y = X2, group = y, color = y)) +
geom_point()

################################################################################
### Refinement Model ###
################################################################################
weights <- model$get_weights()

model.ref <- keras_model_sequential(name = 'Refinement')
model.ref %>%
layer_dense(units = ncol(B.opg), activation = 'relu',
input_shape = ncol(x_train), use_bias = FALSE,
weights = list(B.opg)) %>%
layer_dense(units = 256L, activation = 'relu',
weights = list(crossprod(B.opg, weights[[1]]), weights[[2]])) %>%
layer_dropout(rate = 0.4) %>%
layer_dense(units = 128L, activation = 'relu',
weights = weights[3:4]) %>%
layer_dropout(rate = 0.3) %>%
layer_dense(units = num_classes, activation = 'softmax',
weights = weights[5:6])

summary(model.ref)

model.ref %>% compile(
loss = 'categorical_crossentropy',
optimizer = 'RMSProp',
metrics = c('accuracy')
)

################################################################################
### Refinement Model Training ###
################################################################################

history.ref <- model.ref %>% fit(
x_train, y_train,
batch_size = batch_size,
epochs = epochs,
verbose = 1L,
validation_split = 0.1
)

plot(history.ref)

score <- model.ref %>% evaluate(x_test, y_test, verbose = 0L)

cat('Test loss: ', score[['loss']], '\n',
'Test accuracy: ', score[['accuracy']], '\n', sep = '')

### Combine Histories
hist <- structure(list(
params = list(
verbose = min(history.base$params$verbose, history.ref$params$verbose),
epochs = history.base$params$epochs + history.ref$params$epochs,
steps = max(history.base$params$steps, history.ref$params$steps)
),
metrics = lapply(structure(names(history.base$metrics), names = names(history.base$metrics)),
function(name) c(history.base$metrics[[name]], history.ref$metrics[[name]]))
), class = "keras_training_history")

plot(hist, smooth = FALSE)


B.ref <- model.ref$get_weights()[[1]]

ggplot(data.frame(x_test %*% B.ref, y = factor(apply(y_test, 1, which.max))),
aes(x = X1, y = X2, group = y, color = y)) +
geom_point()


B.pca <- eigen(var(x_train), symmetric = TRUE)$vectors[, 1:2]
ggplot(data.frame(x_test %*% B.pca, y = factor(apply(y_test, 1, which.max))),
aes(x = X1, y = X2, group = y, color = y)) +
geom_point()

image.ref <- matrix(((B.ref - min(B.ref)) / abs(diff(range(B.ref))))[, 2], 28, 28)
plot(c(0, 28), c(0, 28), type = "n", xlab = "", ylab = "")
rasterImage(image.ref, 0, 0, 28, 28, interpolate = TRUE)

+ 92
- 0
simulations/edit_sim_data.R View File

@@ -0,0 +1,92 @@
library(ggplot2)

################################################################################
### General Helpers ###
################################################################################

# Read/Combine simulation data logs
read.logs <- function(pattern) {
# Folder containing log files
path <- if (file.exists('results/')) './' else './simulations/'
path <- paste0(path, 'results/')
# Read all log files and augment with meta-parameters
file.names <- list.files(path, pattern, full.names = TRUE)
sim <- do.call(rbind, lapply(file.names, function(path) {
# Read simulation log (one file)
sim <- read.csv(path, comment.char = '#')
# Add simulation arguments as columns
args <- Filter(function(line) startsWith(line, '#'), readLines(path))
args <- sub('# ?', '', args)
args <- regmatches(args, regexpr(' ', args), invert = TRUE)
# Try to convert meta-parameters from string into int/num/bool/...
for (arg in args) {
val <- tryCatch(
eval(parse(text = arg[2])),
error = function(err) arg[2]
)
if (length(val) > 1) val <- paste(val, collapse = ',')
sim[[arg[1]]] <- val
}
sim
}))
# Convert methods to factors
sim$method <- factor(sim$method,
levels = c("opg", "mave", "cve", "sir", "save", "phdy", "nn.opg", "nn.ref"),
labels = c("OPG", "MAVE", "CVE", "SIR", "SAVE", "PHD", "NN-OPG", "NN-Ref")
)
sim
}


################################################################################
### Bit Data ###
################################################################################

# Read/Combine big data simulation logs
sim <- read.logs('sim_big_.*csv')

# Compute repetition mean and standard deviation over replications
(aggr <- merge(
aggregate(dist.subspace ~ dataset + n + p + method, sim, mean),
aggregate(dist.subspace ~ dataset + n + p + method, sim, sd),
by = c("dataset", "n", "p", "method"),
suffixes = c(".mean", ".sd")
))

# plots and tables
ggplot(aggr, aes(x = n, y = dist.subspace.mean,
group = interaction(dataset, method),
color = method, linetype = dataset)) +
geom_line() +
geom_errorbar(aes(
ymin = dist.subspace.mean - dist.subspace.sd,
ymax = dist.subspace.mean + dist.subspace.sd
), width = 0.2) +
scale_x_continuous(trans = 'log2')

################################################################################
### Binary Response ###
################################################################################

# Read/Combine binary data simulation logs
sim <- read.logs('sim_binary_[0-9_]*\\.csv')

# Aggregated Tables
aggr.formula <- cbind(dist.subspace, accuracy) ~ dataset + method
aggr <- merge(
aggregate(aggr.formula, sim, mean,
na.action = na.pass),
aggregate(aggr.formula, sim, sd,
na.action = na.pass),
by = attr(terms(aggr.formula), "term.labels"),
suffixes = c(".mean", ".sd")
)
print(aggr[with(aggr, order(dataset, dist.subspace.mean)), ], digits = 3)


# box-plot subspace comparison
ggplot(sim, aes(x = method, y = dist.subspace,
group = interaction(dataset, method),
color = dataset)) +
geom_boxplot() +
labs(title = "Sim. Binary", x = "Methods", y = "Subspace Dist.", color = "Datasets")

+ 3
- 2
simulations/simulations.R View File

@@ -11,7 +11,7 @@ suppressPackageStartupMessages({
args <- parse.args(defaults = list(
# Simulation configuration
reps = 100, # Number of replications
dataset = '1', # Name (number) of the data set
dataset = 'M1', # Name (number) of the data set
# Neuronal Net. structure/definitions
hidden_units = 512L,
activation = 'relu',
@@ -24,7 +24,8 @@ args <- parse.args(defaults = list(
))

## Generate reference data (gets re-sampled for each replication)
ds <- dataset(args$dataset) # Generates a list with `X`, `Y`, `B` and `name`
# Generates a list with `X`, `Y`, `B` and `name`
ds <- dataset(args$dataset, n = 100)

## Build Dimension Reduction Neuronal Network model (matching the data)
nn <- nnsdr$new(

+ 3
- 1
simulations/simulations_bigdata.R View File

@@ -11,7 +11,7 @@ suppressPackageStartupMessages({
args <- parse.args(defaults = list(
# Simulation configuration
reps = 10L, # Number of replications
dataset = '6', # Name (number) of the data set
dataset = 'M6', # Name (number) of the data set
# Sets if reference methods shall be evaluated
run_mave = TRUE,
run_cve = TRUE,
@@ -34,6 +34,8 @@ args <- parse.args(defaults = list(
# Number of observations are irrelevant for the reference to generate a matching
# `NNSDR` estimator
ds <- dataset(args$dataset, n = 100L, p = args$p) # Generates a list with `X`, `Y`, `B` and `name`
# normalize dataset name (before written to the log/results file)
args$dataset <- ds$name

## Build Dimension Reduction Neuronal Network model (matching the data)
nn <- nnsdr$new(

+ 18
- 18
simulations/simulations_bigdata.sh View File

@@ -10,24 +10,24 @@ user_interupt() {
exit
}

# Simulation for big data with `p` proportional to `sqrt(n)`
for ds in 6 8; do
command="Rscript simulations_bigdata.R --reps=10 --run_mave=FALSE --run_cve=FALSE --dataset=$ds --n=1000 --p=32 --epochs=200,400"
echo -e "\n$command"
time eval "$command"
command="Rscript simulations_bigdata.R --reps=10 --run_mave=FALSE --run_cve=FALSE --dataset=$ds --n=4000 --p=63 --epochs=100,200"
echo -e "\n$command"
time eval "$command"
command="Rscript simulations_bigdata.R --reps=10 --run_mave=FALSE --run_cve=FALSE --dataset=$ds --n=16000 --p=126 --epochs=50,100"
echo -e "\n$command"
time eval "$command"
command="Rscript simulations_bigdata.R --reps=10 --run_mave=FALSE --run_cve=FALSE --dataset=$ds --n=64000 --p=253 --epochs=25,50"
echo -e "\n$command"
time eval "$command"
command="Rscript simulations_bigdata.R --reps=10 --run_mave=FALSE --run_cve=FALSE --dataset=$ds --n=256000 --p=506 --epochs=12,25"
echo -e "\n$command"
time eval "$command"
done
# # Simulation for big data with `p` proportional to `sqrt(n)`
# for ds in 6 8; do
# command="Rscript simulations_bigdata.R --reps=10 --run_mave=FALSE --run_cve=FALSE --dataset=$ds --n=1000 --p=32 --epochs=200,400"
# echo -e "\n$command"
# time eval "$command"
# command="Rscript simulations_bigdata.R --reps=10 --run_mave=FALSE --run_cve=FALSE --dataset=$ds --n=4000 --p=63 --epochs=100,200"
# echo -e "\n$command"
# time eval "$command"
# command="Rscript simulations_bigdata.R --reps=10 --run_mave=FALSE --run_cve=FALSE --dataset=$ds --n=16000 --p=126 --epochs=50,100"
# echo -e "\n$command"
# time eval "$command"
# command="Rscript simulations_bigdata.R --reps=10 --run_mave=FALSE --run_cve=FALSE --dataset=$ds --n=64000 --p=253 --epochs=25,50"
# echo -e "\n$command"
# time eval "$command"
# command="Rscript simulations_bigdata.R --reps=10 --run_mave=FALSE --run_cve=FALSE --dataset=$ds --n=256000 --p=506 --epochs=12,25"
# echo -e "\n$command"
# time eval "$command"
# done

# Simulation for big data with `p` proportional to `n` (note: for the base case
# of `n = 1000`, `p = 32` see above)

+ 59
- 110
simulations/simulations_binary.R View File

@@ -1,9 +1,8 @@
#!/usr/bin/env Rscript

library(MAVE)
library(CVarE)
Sys.setenv(TF_CPP_MIN_LOG_LEVEL = "3") # Suppress `tensorflow` notes/warnings
suppressPackageStartupMessages({
library(dr)
library(NNSDR)
})

@@ -11,97 +10,21 @@ suppressPackageStartupMessages({
args <- parse.args(defaults = list(
# Simulation configuration
reps = 100, # Number of replications
dataset = 'B2', # Name ('B' for Binary) of the data set
dataset = 'B1', # Name ('B' for Binary) of the data set
# Neuronal Net. structure/definitions
hidden_units = 512L,
activation = 'relu',
trainable_reduction = TRUE,
# Neuronal Net. training
epochs = c(100L, 200L), # Number of training epochs for (`OPG`, Refinement)
epochs = c(3L, 5L), # Number of training epochs for (`OPG`, Refinement)
batch_size = 32L,
initializer = 'fromOPG',
seed = 956294L
))

################################################################################

dataset <- function(name = "M1", n = NULL, p = 20, sd = 0.5, ...) {
name <- toupper(name)
if (nchar(name) == 1) { name <- paste0("M", name) }

if (name == "B1") {
if (missing(n)) { n <- 400 }
if (missing(sd)) { sd <- 1 }
eps <- sqrt(.Machine$double.eps)

B <- diag(1, p, 2)
Z <- matrix(rnorm(2 * n, 0, sd), n, 2)
Y <- sample(c(FALSE, TRUE), n, replace = TRUE)
theta <- rnorm(n, Y * pi, 0.25 * pi)
X <- cbind(
10 * (cos(theta) + 0.75 * (2 * Y - 1)) + Z[, 1],
10 * sin(theta) + Z[, 2],
matrix(rnorm(n * (p - 2), 0, sd), n)
)
} else if (name == "B2") {
if (missing(n)) { n <- 400 }
if (missing(sd)) { sd <- 0.2 }
eps <- sqrt(.Machine$double.eps)

B <- diag(1, p, 2)
X <- matrix(runif(n * p, -2, 2), n, p)
XB <- X %*% B
Y <- (sin(XB[, 1]) / (XB[, 2]^2 + eps) + rnorm(n, 0, sd)) > 0
# } else if (name == "B2") {
# if (missing(n)) { n <- 400 }
# if (missing(sd)) { sd <- 0.2 }
# eps <- sqrt(.Machine$double.eps)

# B <- diag(1, p, 2)
# X <- matrix(runif(n * p, -2, 2), n, p)
# XB <- X %*% B
# Y <- (sin(XB[, 1]) / (XB[, 2]^2 + eps) + rnorm(n, 0, sd)) > 0
} else {
stop("Got unknown dataset name.")
}

return(list(X = X, Y = as.matrix(Y), B = B, name = name))
}

## Generate reference data (gets re-sampled for each replication)
ds <- dataset(args$dataset, n = 100) # Generates a list with `X`, `Y`, `B` and `name`
# plot(ds$X %*% ds$B, col = 2 * (ds$Y + 1))

################################################################################

metric.subspace <- function(B_true, name = "metric.subspace", normalize = FALSE) {
if (!is.matrix(B_true))
B_true <- as.matrix(B_true)
P_true <- B_true %*% solve(crossprod(B_true), t(B_true))
P_true <- tf$constant(P_true, dtype = 'float32')

if (normalize) {
rankSum <- 2 * ncol(B_true)
c <- 1 / sqrt(min(rankSum, 2 * nrow(B_true) - rankSum))
} else {
c <- sqrt(2)
}
c <- tf$constant(c, dtype = 'float32')

structure(function(model) {
B <- model$get_layer('reduction')$weights
function(y_true, y_pred) {
P <- tf$linalg$matmul(B, B, transpose_b = TRUE)
diff <- P_true - P
c * tf$sqrt(tf$reduce_sum(tf$math$multiply(diff, diff)))
}
},
class = c("nnsdr.metric", "Refinement"),
name = name
)
}

ds <- dataset(args$dataset)
# Generates a list with `X`, `Y`, `B` and `name`
ds <- dataset(args$dataset, n = 1000)

## Build Dimension Reduction Neuronal Network model (matching the data)
nn <- nnsdr$new(
@@ -112,36 +35,62 @@ nn <- nnsdr$new(
trainable_reduction = args$trainable_reduction,
output_activation = 'sigmoid',
loss = 'binary_crossentropy',
metrics = list('accuracy', metric.subspace(ds$B, normalize = TRUE))
# metrics = list('accuracy')
metrics = list(
'accuracy',
metric.subspace(ds$B, ds$X, ds$Y, type = "OPG", normalize = TRUE),
metric.subspace(ds$B, type = "Refinement", normalize = TRUE)
)
)

with(ds, {
## Sample test dataset
ds.test <- dataset(ds$name, n = 1000)

## Open simulation log file, write simulation configuration and header
log <- file(format(Sys.time(), "results/sim_binary_%Y%m%d_%H%M.csv"), "w", blocking = FALSE)
cat(paste('#', names(args), args, sep = ' ', collapse = '\n'), '\n',
'method,replication,dist.subspace,dist.grassmann,accuracy\n',
sep = '', file = log, append = TRUE)

## Set seed for sampling simulation data (NOT effecting the `NN` initialization)
set.seed(args$seed)

## Repeated simulation runs
for (rep in seq_len(args$reps)) {
## Re-sample seeded data for each simulation replication
with(dataset(ds$name), {
## Sample test dataset
ds.test <- dataset(ds$name, n = 1000)

## Starting with the reference methods `SIR`, `SAVE` and `PHD`
for (method in c("sir", "save", "phdy")) {
fit <- dr(Y ~ X, method = method)
d.sub <- dist.subspace(B, dr.basis(fit, ncol(B)), normalize = TRUE)
d.gra <- dist.grassmann(B, dr.basis(fit, ncol(B)))
accuracy <- NA
cat('"', method, '",', rep, ',', d.sub, ',', d.gra, ',', accuracy,
'\n', sep = '', file = log, append = TRUE)
}

## Fit `NNSDR` model
nn$fit(X, Y, epochs = args$epochs,
batch_size = args$batch_size, initializer = args$initializer)
# Model evaluation (with metrics)
eval <- nn$evaluate(ds.test$X, ds.test$Y)
# `OPG` estimate
d.sub <- dist.subspace(B, coef(nn, 'OPG'), normalize = TRUE)
d.gra <- dist.grassmann(B, coef(nn, 'OPG'))
accuracy <- eval[["OPG", "accuracy"]]
cat('"nn.opg",', rep, ',', d.sub, ',', d.gra, ',', accuracy,
'\n', sep = '', file = log, append = TRUE)
# Refinement estimate
d.sub <- dist.subspace(B, coef(nn), normalize = TRUE)
d.gra <- dist.grassmann(B, coef(nn))
accuracy <- eval[["Refinement", "accuracy"]]
cat('"nn.ref",', rep, ',', d.sub, ',', d.gra, ',', accuracy,
'\n', sep = '', file = log, append = TRUE)
})

## Reset model
nn$reset()
nn$fit(X, Y, epochs = args$epochs,
batch_size = args$batch_size, initializer = args$initializer,
verbose = 2L)

# `OPG` estimate
cat("OPG subspace: ", dist.subspace(B, coef(nn, 'OPG'), normalize = TRUE), '\n')
cat("OPG grassmann:", dist.grassmann(B, coef(nn, 'OPG')), '\n')
# Refinement estimate
cat("Ref subspace: ", dist.subspace(B, coef(nn), normalize = TRUE), '\n')
cat("Ref grassmann:", dist.grassmann(B, coef(nn)), '\n')
# MSE
cat("MSE: ", mean((nn$predict(ds.test$X) - ds.test$Y)^2), '\n')
})

library(ggplot2)

ggplot(nn$history, aes(x = epoch)) +
geom_line(aes(y = loss), col = 'red') +
geom_line(aes(y = accuracy), col = 'blue')
}

with(dataset('B2', n = 400), {
ggplot(data.frame(XB1 = (X %*% B)[, 1], XB2 = (X %*% B)[, 2], Y = Y)) +
geom_point(aes(x = XB1, y = XB2, col = Y))
})
## Finished, close simulation log file
close(log)

+ 24
- 0
simulations/simulations_binary.sh View File

@@ -0,0 +1,24 @@
#!/bin/bash

# Catch termination signal `SIGINT` and invoke `user_interupt`
trap user_interupt SIGINT

# Reports an user interupt and exits the simulation script (do not continue next
# statement, allows to exit bash loop with `^C`)
user_interupt() {
echo -e '\nUser Interrupt -> stopping simulation\n'
exit
}

command="Rscript simulations_binary.R --reps=100 --dataset=B1 --hidden_units=1024 --epochs=5,10"
echo -e "\n$command"
time eval "$command"
command="Rscript simulations_binary.R --reps=100 --dataset=B2"
echo -e "\n$command"
time eval "$command"
command="Rscript simulations_binary.R --reps=100 --dataset=B3 --hidden_units=1024,128,32 --epochs=10,200"
echo -e "\n$command"
time eval "$command"
command="Rscript simulations_binary.R --reps=100 --dataset=B4 --hidden_units=1024,128,32 --epochs=10,200"
echo -e "\n$command"
time eval "$command"

Loading…
Cancel
Save