parent
2c5a87ccfe
commit
0214823794
@ -0,0 +1,48 @@ |
||||
library(MAVE) |
||||
library(CVarE) |
||||
library(NNSDR) |
||||
|
||||
set.seed(797) |
||||
|
||||
dataset <- function(n = 100, p = 10, sd = 0.5) { |
||||
X <- matrix(rnorm(n * (p - 1)), n, p - 1) |
||||
X <- cbind(-0.5 * (X[, 1] + X[, 2]) + 0.001 * rnorm(n), X) |
||||
B <- diag(p)[, 4, drop = FALSE] |
||||
Y <- as.matrix(X[, 4]^2 + rnorm(n, 0, sd)) |
||||
return(list(X = X, Y = Y, B = B)) |
||||
} |
||||
|
||||
nn <- nnsdr$new( |
||||
input_shapes = list(x = 10L), |
||||
d = 1L, |
||||
hidden_units = 512L, |
||||
activation = 'relu', |
||||
trainable_reduction = TRUE |
||||
) |
||||
|
||||
sim <- data.frame(mave = rep(NA, 10), cve = NA, nn.opg = NA, nn.ref = NA) |
||||
for (i in 1:10) { |
||||
cat(i, "/ 10\n") |
||||
with(dataset(), { |
||||
dr <- mave.compute(X, Y, method = 'meanMAVE', max.dim = 1) |
||||
sim[i, 'mave'] <<- dist.subspace(B, coef(dr, 1), normalize = TRUE) |
||||
|
||||
dr <- cve.call(X, Y, k = 1) |
||||
sim[i, 'cve'] <<- dist.subspace(B, coef(dr, 1), normalize = TRUE) |
||||
|
||||
nn$fit(X, Y, epochs = c(200L, 400L), batch_size = 32L, initializer = 'fromOPG') |
||||
sim[i, 'nn.opg'] <<- dist.subspace(B, coef(nn, 'OPG'), normalize = TRUE) |
||||
sim[i, 'nn.ref'] <<- dist.subspace(B, coef(nn), normalize = TRUE) |
||||
}) |
||||
nn$reset() |
||||
} |
||||
round(t(data.frame( |
||||
mean = colMeans(sim), |
||||
median = apply(sim, 2, median), |
||||
sd = apply(sim, 2, sd) |
||||
)), 3) |
||||
|
||||
# &$\mave$& $\cve$&$\nn_{512}$ \\ |
||||
# mean & 0.917 & 0.164 & 0.101 \\ |
||||
# median & 0.999 & 0.162 & 0.096 \\ |
||||
# sd & 0.256 & 0.057 & 0.032 \\ |
@ -0,0 +1,98 @@ |
||||
#!/usr/bin/env Rscript |
||||
|
||||
library(MAVE) |
||||
library(CVarE) |
||||
Sys.setenv(TF_CPP_MIN_LOG_LEVEL = "3") # Suppress `tensorflow` notes/warnings |
||||
suppressPackageStartupMessages({ |
||||
library(NNSDR) |
||||
}) |
||||
|
||||
## Parse script parameters |
||||
args <- parse.args(defaults = list( |
||||
# Simulation configuration |
||||
reps = 100, # Number of replications |
||||
dataset = '1', # Name (number) of the data set |
||||
# Neuronal Net. structure/definitions |
||||
hidden_units = 512L, |
||||
activation = 'relu', # or `relu` |
||||
trainable_reduction = TRUE, |
||||
# Neuronal Net. training |
||||
epochs = c(200L, 400L), # Number of training epochs for (`OPG`, Refinement) |
||||
batch_size = 32L, |
||||
initializer = 'fromOPG', |
||||
seed = 1390L |
||||
)) |
||||
|
||||
## Generate reference data (gets re-sampled for each replication) |
||||
ds <- dataset(args$dataset) # Generates a list with `X`, `Y`, `B` and `name` |
||||
|
||||
## Build Dimension Reduction Neuronal Network model (matching the data) |
||||
nn <- nnsdr$new( |
||||
input_shapes = list(x = ncol(ds$X)), |
||||
d = ncol(ds$B), |
||||
hidden_units = args$hidden_units, |
||||
activation = args$activation, |
||||
trainable_reduction = args$trainable_reduction |
||||
) |
||||
|
||||
## Open simulation log file, write simulation configuration and header |
||||
log <- file(format(Sys.time(), "results/sim_%Y%m%d_%H%M.csv"), "w", blocking = FALSE) |
||||
cat(paste('#', names(args), args, sep = ' ', collapse = '\n'), '\n', |
||||
'method,replication,dist.subspace,dist.grassmann,mse\n', |
||||
sep = '', file = log, append = TRUE) |
||||
|
||||
## Set seed for sampling simulation data (NOT effecting the `NN` initialization) |
||||
set.seed(args$seed) |
||||
|
||||
## Repeated simulation runs |
||||
for (rep in seq_len(args$reps)) { |
||||
## Re-sample seeded data for each simulation replication |
||||
with(dataset(ds$name), { |
||||
## Sample test dataset |
||||
ds.test <- dataset(ds$name, n = 1000) |
||||
|
||||
## First the reference method `MAVE` |
||||
dr <- mave(Y ~ X, method = "meanMAVE") |
||||
d.sub <- dist.subspace(B, coef(dr, ncol(B)), normalize = TRUE) |
||||
d.gra <- dist.grassmann(B, coef(dr, ncol(B))) |
||||
mse <- mean((predict(dr, ds.test$X, dim = ncol(B)) - ds.test$Y)^2) |
||||
cat('"mave",', rep, ',', d.sub, ',', d.gra, ',', mse, '\n', sep = '', |
||||
file = log, append = TRUE) |
||||
## and the `OPG` method |
||||
dr <- mave(Y ~ X, method = "meanOPG") |
||||
d.sub <- dist.subspace(B, coef(dr, ncol(B)), normalize = TRUE) |
||||
d.gra <- dist.grassmann(B, coef(dr, ncol(B))) |
||||
mse <- mean((predict(dr, ds.test$X, dim = ncol(B)) - ds.test$Y)^2) |
||||
cat('"opg",', rep, ',', d.sub, ',', d.gra, ',', mse, '\n', sep = '', |
||||
file = log, append = TRUE) |
||||
|
||||
## Next the `CVE` method |
||||
dr <- cve(Y ~ X, k = ncol(B)) |
||||
d.sub <- dist.subspace(B, coef(dr, ncol(B)), normalize = TRUE) |
||||
d.gra <- dist.grassmann(B, coef(dr, ncol(B))) |
||||
mse <- mean((predict(dr, ds.test$X, k = ncol(B)) - ds.test$Y)^2) |
||||
cat('"cve",', rep, ',', d.sub, ',', d.gra, ',', mse, '\n', sep = '', |
||||
file = log, append = TRUE) |
||||
|
||||
## Fit `NNSDR` model |
||||
nn$fit(X, Y, epochs = args$epochs, |
||||
batch_size = args$batch_size, initializer = args$initializer) |
||||
# `OPG` estimate |
||||
d.sub <- dist.subspace(B, coef(nn, 'OPG'), normalize = TRUE) |
||||
d.gra <- dist.grassmann(B, coef(nn, 'OPG')) |
||||
cat('"nn.opg",', rep, ',', d.sub, ',', d.gra, ',', NA, '\n', sep = '', |
||||
file = log, append = TRUE) |
||||
# Refinement estimate |
||||
d.sub <- dist.subspace(B, coef(nn), normalize = TRUE) |
||||
d.gra <- dist.grassmann(B, coef(nn)) |
||||
mse <- mean((nn$predict(ds.test$X) - ds.test$Y)^2) |
||||
cat('"nn.ref",', rep, ',', d.sub, ',', d.gra, ',', mse, '\n', sep = '', |
||||
file = log, append = TRUE) |
||||
}) |
||||
|
||||
## Reset model |
||||
nn$reset() |
||||
} |
||||
|
||||
## Finished, close simulation log file |
||||
close(log) |
@ -0,0 +1,111 @@ |
||||
#!/usr/bin/env Rscript |
||||
|
||||
library(MAVE) |
||||
library(CVarE) |
||||
Sys.setenv(TF_CPP_MIN_LOG_LEVEL = "3") # Suppress `tensorflow` notes/warnings |
||||
suppressPackageStartupMessages({ |
||||
library(NNSDR) |
||||
}) |
||||
|
||||
## Parse script parameters |
||||
args <- parse.args(defaults = list( |
||||
# Simulation configuration |
||||
reps = 10L, # Number of replications |
||||
dataset = '6', # Name (number) of the data set |
||||
# Neuronal Net. structure/definitions |
||||
hidden_units = 512L, |
||||
activation = 'relu', |
||||
trainable_reduction = TRUE, |
||||
# Neuronal Net. training |
||||
epochs = c(200L, 400L), # Number of training epochs for (`OPG`, Refinement) |
||||
batch_size = 32L, |
||||
initializer = 'fromOPG', |
||||
# Simulation data generation configuration |
||||
seed = 1390L, |
||||
n = 100L, |
||||
p = 20L |
||||
)) |
||||
|
||||
## Generate reference data (gets re-sampled for each replication) |
||||
# Number of observations are irrelevant for the reference to generate a matching |
||||
# `NNSDR` estimator |
||||
ds <- dataset(args$dataset, n = 100L, p = args$p) # Generates a list with `X`, `Y`, `B` and `name` |
||||
|
||||
## Build Dimension Reduction Neuronal Network model (matching the data) |
||||
nn <- nnsdr$new( |
||||
input_shapes = list(x = ncol(ds$X)), |
||||
d = ncol(ds$B), |
||||
hidden_units = args$hidden_units, |
||||
activation = args$activation, |
||||
trainable_reduction = args$trainable_reduction |
||||
) |
||||
|
||||
## Open simulation log file, write simulation configuration and header |
||||
log <- file(format(Sys.time(), "results/sim_big_%Y%m%d_%H%M.csv"), "w", blocking = FALSE) |
||||
cat(paste('#', names(args), args, sep = ' ', collapse = '\n'), '\n', |
||||
'method,replication,dist.subspace,dist.grassmann,mse,time.user,time.system,time.elapsed\n', |
||||
sep = '', file = log, append = TRUE) |
||||
|
||||
## Repeated simulation runs |
||||
for (rep in seq_len(args$reps)) { |
||||
## Re-sample seeded data for each simulation replication |
||||
with(dataset(ds$name, n = args$n, p = args$p), { |
||||
## Sample test dataset |
||||
ds.test <- dataset(ds$name, n = 1000L, p = args$p) |
||||
|
||||
## First the reference method `MAVE` |
||||
# To be fair for measuring the time, set `max.dim` to true reduction dimension |
||||
# and with `screen = ncol(X)` screening is turned "off". |
||||
time <- system.time(dr <- mave.compute(X, Y, max.dim = ncol(B), |
||||
method = "meanMAVE", screen = ncol(X))) |
||||
d.sub <- dist.subspace(B, coef(dr, ncol(B)), normalize = TRUE) |
||||
d.gra <- dist.grassmann(B, coef(dr, ncol(B))) |
||||
mse <- mean((predict(dr, ds.test$X, dim = ncol(B)) - ds.test$Y)^2) |
||||
cat('"mave",', rep, ',', d.sub, ',', d.gra, ',', mse, ',', |
||||
time['user.self'], ',', time['sys.self'], ',', time['elapsed'], '\n', |
||||
sep = '', file = log, append = TRUE) |
||||
## and the `OPG` method |
||||
time <- system.time(dr <- mave.compute(X, Y, max.dim = ncol(B), |
||||
method = "meanOPG", screen = ncol(X))) |
||||
d.sub <- dist.subspace(B, coef(dr, ncol(B)), normalize = TRUE) |
||||
d.gra <- dist.grassmann(B, coef(dr, ncol(B))) |
||||
mse <- mean((predict(dr, ds.test$X, dim = ncol(B)) - ds.test$Y)^2) |
||||
cat('"opg",', rep, ',', d.sub, ',', d.gra, ',', mse, ',', |
||||
time['user.self'], ',', time['sys.self'], ',', time['elapsed'], '\n', |
||||
sep = '', file = log, append = TRUE) |
||||
|
||||
## Next the CVE method |
||||
time <- system.time(dr <- cve.call(X, Y, k = ncol(B))) |
||||
d.sub <- dist.subspace(B, coef(dr, ncol(B)), normalize = TRUE) |
||||
d.gra <- dist.grassmann(B, coef(dr, ncol(B))) |
||||
mse <- mean((predict(dr, ds.test$X, k = ncol(B)) - ds.test$Y)^2) |
||||
cat('"cve",', rep, ',', d.sub, ',', d.gra, ',', mse, ',', |
||||
time['user.self'], ',', time['sys.self'], ',', time['elapsed'], '\n', |
||||
sep = '', file = log, append = TRUE) |
||||
|
||||
## Fit `DR` Neuronal Network model |
||||
time <- system.time(nn$fit(X, Y, epochs = args$epochs, |
||||
batch_size = args$batch_size, initializer = args$initializer)) |
||||
# OPG estimate |
||||
d.sub <- dist.subspace(B, coef(nn, 'OPG'), normalize = TRUE) |
||||
d.gra <- dist.grassmann(B, coef(nn, 'OPG')) |
||||
cat('"nn.opg",', rep, ',', d.sub, ',', d.gra, ',NA,NA,NA,NA\n', |
||||
sep = '', file = log, append = TRUE) |
||||
# Refinement estimate |
||||
d.sub <- dist.subspace(B, coef(nn), normalize = TRUE) |
||||
d.gra <- dist.grassmann(B, coef(nn)) |
||||
mse <- mean((nn$predict(ds.test$X) - ds.test$Y)^2) |
||||
cat('"nn.ref",', rep, ',', d.sub, ',', d.gra, ',', mse, ',', |
||||
time['user.self'], ',', time['sys.self'], ',', time['elapsed'], '\n', |
||||
sep = '', file = log, append = TRUE) |
||||
}) |
||||
|
||||
## Invoke the garbage collector |
||||
gc() |
||||
|
||||
## Reset model |
||||
nn$reset() |
||||
} |
||||
|
||||
## Finished, close simulation log file |
||||
close(log) |
Loading…
Reference in new issue