Merge branch 'master' of https://git.art-ist.cc/daniel/tensor_predictors
This commit is contained in:
commit
780a5b7eba
@ -2416,7 +2416,7 @@
|
||||
|
||||
|
||||
@misc{lichess-database,
|
||||
author = {Duplessis, Thibault},
|
||||
author = {Thibault Duplessis},
|
||||
year = {2013},
|
||||
title = {lichess.org open database},
|
||||
url = {https://database.lichess.org},
|
||||
|
||||
@ -1,6 +1,8 @@
|
||||
library(tensorPredictors)
|
||||
library(parallel)
|
||||
library(pROC)
|
||||
suppressPackageStartupMessages({
|
||||
library(parallel)
|
||||
library(pROC)
|
||||
})
|
||||
|
||||
#' Mode-Wise PCA preprocessing (generalized (2D)^2 PCA)
|
||||
#'
|
||||
@ -75,6 +77,12 @@ loo.predict <- function(method, X, y, ...) {
|
||||
}, mc.cores = getOption("mc.cores", max(1L, parallel::detectCores() - 1L))))
|
||||
}
|
||||
|
||||
# "Projects" a sequence to its first `nr.freq` frequency components
|
||||
proj.fft <- function(sequence, nr.freq = 5L) {
|
||||
F <- fft(sequence)
|
||||
Re(fft(`[<-`(F, head(order(abs(F)), -nr.freq), 0+0i), inverse = TRUE)) / length(F)
|
||||
}
|
||||
|
||||
|
||||
# Load full EEG dataset (3D tensor for each subject)
|
||||
c(X, y) %<-% readRDS("eeg_data_3d.rds")
|
||||
@ -93,21 +101,23 @@ y.hat.15.15 <- loo.predict(gmlm_tensor_normal, preprocess(X, 15, 15, 3), y)
|
||||
y.hat.20.30 <- loo.predict(gmlm_tensor_normal, preprocess(X, 20, 30, 3), y)
|
||||
y.hat <- loo.predict(gmlm_tensor_normal, X, y)
|
||||
y.hat.fft <- loo.predict(gmlm_tensor_normal, X, y, proj.betas = list(proj.fft, NULL, NULL))
|
||||
y.hat.fft <- loo.predict(gmlm_tensor_normal, X, y, proj.betas = list(proj.fft, NULL, NULL))
|
||||
|
||||
# classification performance measures table by leave-one-out cross-validation
|
||||
(loo.cv <- apply(cbind(y.hat.3.4, y.hat.15.15, y.hat.20.30, y.hat, y.hat.fft), 2, function(y.pred) {
|
||||
(loo.cv <- apply(cbind(y.hat.3.4, y.hat.15.15, y.hat.20.30, y.hat, y.hat.fft), 2, function(y.pred) {
|
||||
sapply(c("acc", "err", "fpr", "tpr", "fnr", "tnr", "auc", "auc.sd"),
|
||||
function(FUN) { match.fun(FUN)(as.integer(y) - 1L, y.pred) })
|
||||
}))
|
||||
#> y.hat.3.4 y.hat.15.15 y.hat.20.30 y.hat
|
||||
#> acc 0.83606557 0.80327869 0.80327869 0.79508197
|
||||
#> err 0.16393443 0.19672131 0.19672131 0.20491803
|
||||
#> fpr 0.31111111 0.33333333 0.33333333 0.35555556
|
||||
#> tpr 0.92207792 0.88311688 0.88311688 0.88311688
|
||||
#> fnr 0.07792208 0.11688312 0.11688312 0.11688312
|
||||
#> tnr 0.68888889 0.66666667 0.66666667 0.64444444
|
||||
#> auc 0.88051948 0.86984127 0.86926407 0.86810967
|
||||
#> auc.sd 0.03118211 0.03254642 0.03259186 0.03295883
|
||||
#> y.hat.3.4 y.hat.15.15 y.hat.20.30 y.hat y.hat.fft
|
||||
#> acc 0.83606557 0.80327869 0.80327869 0.79508197 0.79508197
|
||||
#> err 0.16393443 0.19672131 0.19672131 0.20491803 0.20491803
|
||||
#> fpr 0.31111111 0.33333333 0.33333333 0.35555556 0.33333333
|
||||
#> tpr 0.92207792 0.88311688 0.88311688 0.88311688 0.87012987
|
||||
#> fnr 0.07792208 0.11688312 0.11688312 0.11688312 0.12987013
|
||||
#> tnr 0.68888889 0.66666667 0.66666667 0.64444444 0.66666667
|
||||
#> auc 0.88051948 0.86984127 0.86926407 0.86810967 0.86810967
|
||||
#> auc.sd 0.03118211 0.03254642 0.03259186 0.03295883 0.03354029
|
||||
|
||||
|
||||
################################## Tensor SIR ##################################
|
||||
|
||||
135
sim/sim_efficiency.R
Normal file
135
sim/sim_efficiency.R
Normal file
@ -0,0 +1,135 @@
|
||||
library(tensorPredictors)
|
||||
|
||||
|
||||
# Set PRNG seed to the first 4 digits of the golden ratio for reproducability
|
||||
set.seed(1618L, "Mersenne-Twister", "Inversion", "Rejection")
|
||||
|
||||
### Simulation configuration
|
||||
reps <- 100 # number of simulation replications
|
||||
sample.sizes <- c(100, 200, 300, 500, 750) # sample sizes `n`
|
||||
|
||||
# Parameterize the "true" reductions on the dimension
|
||||
gen.beta <- function(pj) {
|
||||
as.matrix((-1)^seq_len(pj))
|
||||
}
|
||||
# the precision matrices are simply diag(pj)
|
||||
|
||||
|
||||
# sampling from the conditional matrix normal `X | Y = y ~ N(mu(y), I_{p1 p2})`
|
||||
sample.data <- function(sample.size, betas, Omegas, eta1 = 0) {
|
||||
# responce is a standard normal variable
|
||||
y <- rnorm(sample.size)
|
||||
# F(y) is identical to y, except its a tensor (last axis is sample axis)
|
||||
F <- array(y, dim = c(mapply(ncol, betas), sample.size))
|
||||
|
||||
# sample predictors from tensor normal X | Y = y (last axis is sample axis)
|
||||
sample.axis <- length(betas) + 1L
|
||||
Deltas <- Map(solve, Omegas) # normal covariances
|
||||
mu_y <- mlm(mlm(F, betas) + as.vector(eta1), Deltas) # conditional mean
|
||||
X <- mu_y + rtensornorm(sample.size, 0, Deltas, sample.axis) # responses X
|
||||
|
||||
list(X = X, F = F, y = y, sample.axis = sample.axis)
|
||||
}
|
||||
|
||||
|
||||
# Open simulation CSV log file
|
||||
log.name <- format(Sys.time(), "sim_efficiency-%Y%m%dT%H%M.csv")
|
||||
log.file <- file(log.name, "w")
|
||||
# Counts new number of writes purely here to write the CSV header the first time
|
||||
log.writes <- 0L
|
||||
|
||||
# Setting p1 = p2 = pj (note, in the paper `p = p1 p2`)
|
||||
mode.dims <- round(1.2^unique(round(logb(2:32, 1.2))))
|
||||
for (pj in mode.dims) {
|
||||
|
||||
betas.true <- list(gen.beta(pj), gen.beta(pj))
|
||||
B.true <- kronecker(betas.true[[2]], betas.true[[1]])
|
||||
Omegas.true <- list(diag(pj), diag(pj))
|
||||
|
||||
for (sample.size in sample.sizes) {
|
||||
|
||||
sim <- sapply(seq_len(reps), function(.) {
|
||||
c(X, F, y, sample.axis) %<-% sample.data(sample.size, betas.true, Omegas.true)
|
||||
|
||||
ds.lm <- tryCatch({
|
||||
B.lm <- unname(lm.fit(t(`dim<-`(X, c(pj^2, sample.size))), drop(F))$coefficients)
|
||||
dist.subspace(B.true, B.lm, normalize = TRUE)
|
||||
}, error = function(.) NA)
|
||||
|
||||
# c(., betas.vec, Omegas.vec) %<-% gmlm_tensor_normal(`dim<-`(X, c(pj^2, sample.size)), drop(F))
|
||||
|
||||
c(., betas.gmlm, Omegas.gmlm) %<-% gmlm_tensor_normal(X, F)
|
||||
|
||||
c(., betas.mani, Omegas.mani) %<-% gmlm_tensor_normal(X, F,
|
||||
proj.Omegas = rep(list(function(O) { diag(mean(diag(O)), nrow(O)) }), 2)
|
||||
)
|
||||
|
||||
# ds.vec <- dist.subspace(B.true, betas.vec[[1]], normalize = TRUE)
|
||||
ds.vec <- NA
|
||||
ds.gmlm <- dist.subspace(betas.true, betas.gmlm, normalize = TRUE) # equiv to R> dist.subspace(B.true, B.gmlm)
|
||||
ds.mani <- dist.subspace(betas.true, betas.mani, normalize = TRUE)
|
||||
|
||||
c(lm = ds.lm, vec = ds.vec, gmlm = ds.gmlm, mani = ds.mani)
|
||||
})
|
||||
|
||||
sim <- as.data.frame(t(sim))
|
||||
sim$sample.size <- sample.size
|
||||
sim$pj <- pj
|
||||
|
||||
# Append current simulation results to log-file
|
||||
write.table(sim, file = log.file, sep = ",",
|
||||
row.names = FALSE, col.names = (log.writes <- log.writes + 1L) < 2L
|
||||
)
|
||||
|
||||
# print progress
|
||||
cat(sprintf("mode dim (%d): %d/%d - sample size (%d): %d/%d\n",
|
||||
pj, which(pj == mode.dims), length(mode.dims),
|
||||
sample.size, which(sample.size == sample.sizes), length(sample.sizes)
|
||||
))
|
||||
}
|
||||
}
|
||||
close(log.file)
|
||||
|
||||
|
||||
# Read simulation data back in
|
||||
sim <- read.csv(log.name)
|
||||
|
||||
with(merge(
|
||||
aggregate(sim, . ~ sample.size + pj, mean, na.rm = TRUE, na.action = na.pass),
|
||||
aggregate(sim, . ~ sample.size + pj, sd, na.rm = TRUE, na.action = na.pass),
|
||||
by = c("sample.size", "pj"),
|
||||
suffixes = c("", ".sd"),
|
||||
all = FALSE
|
||||
), {
|
||||
plot(range(pj), 0:1, type = "n",
|
||||
main = "Simulation -- Efficiency Gain",
|
||||
xlab = expression(tilde(p)),
|
||||
ylab = expression(d(B, hat(B)))
|
||||
)
|
||||
for (sz in sort(unique(sample.size))) {
|
||||
i <- order(pj)[(sample.size == sz)[order(pj)]]
|
||||
i <- i[!(is.na(lm[i]) | is.na(lm.sd[i]))]
|
||||
polygon(c(pj[i], rev(pj[i])), c(lm[i] + lm.sd[i], rev(lm[i] - lm.sd[i])),
|
||||
col = paste0("#cf7d03", "50"), border = NA
|
||||
)
|
||||
i <- order(pj)[(sample.size == sz)[order(pj)]]
|
||||
polygon(c(pj[i], rev(pj[i])), c(vec[i] + vec.sd[i], rev(vec[i] - vec.sd[i])),
|
||||
col = paste0("#b30303", "50"), border = NA
|
||||
)
|
||||
polygon(c(pj[i], rev(pj[i])), c(gmlm[i] + gmlm.sd[i], rev(gmlm[i] - gmlm.sd[i])),
|
||||
col = paste0("#002d8d", "50"), border = NA
|
||||
)
|
||||
polygon(c(pj[i], rev(pj[i])), c(mani[i] + mani.sd[i], rev(mani[i] - mani.sd[i])),
|
||||
col = paste0("#006e18", "50"), border = NA
|
||||
)
|
||||
}
|
||||
lty.idx <- 1L
|
||||
for (sz in sort(unique(sample.size))) {
|
||||
i <- order(pj)[(sample.size == sz)[order(pj)]]
|
||||
lines(pj[i], lm[i], type = "b", pch = 16, col = "#cf7d03", lty = lty.idx, lwd = 2)
|
||||
lines(pj[i], vec[i], type = "b", pch = 16, col = "#b30303", lty = lty.idx, lwd = 2)
|
||||
lines(pj[i], gmlm[i], type = "b", pch = 16, col = "#002d8d", lty = lty.idx, lwd = 2)
|
||||
lines(pj[i], mani[i], type = "b", pch = 16, col = "#006e18", lty = lty.idx, lwd = 2)
|
||||
lty.idx <- lty.idx + 1L
|
||||
}
|
||||
})
|
||||
@ -116,9 +116,9 @@ gmlm_tensor_normal <- function(X, F, sample.axis = length(dim(X)),
|
||||
diag(Sigmas[[j]]) <- diag(Sigmas[[j]]) + reg.factor * min_max[2]
|
||||
}
|
||||
}
|
||||
# Compute (unconstraint but regularized) Omega_j as covariance inverse
|
||||
# Compute (unconstraint) Omega_j's as covariance inverse
|
||||
Omegas[[j]] <- solve(Sigmas[[j]])
|
||||
# Project Omega_j to the Omega_j's manifold
|
||||
# Project Omega_j's to their manifolds
|
||||
if (is.function(proj_j <- proj.Omegas[[j]])) {
|
||||
Omegas[[j]] <- proj_j(Omegas[[j]])
|
||||
# Reverse computation of `Sigma_j` as inverse of `Omega_j`
|
||||
|
||||
@ -44,14 +44,14 @@ kpir.ls <- function(X, Fy, max.iter = 20L, sample.axis = 1L,
|
||||
}, modes, dim(Fy)[modes])
|
||||
|
||||
|
||||
### Step 2: iterate per mode (axis) least squares estimates
|
||||
### Step 2: iterate per mode (axis) least squares estimates
|
||||
for (iter in seq_len(max.iter)) {
|
||||
|
||||
|
||||
# Invoke logger for previous iterate
|
||||
if (is.function(logger)) {
|
||||
logger("ls", iter - 1L, alphas)
|
||||
}
|
||||
|
||||
|
||||
# cyclic iterate over modes
|
||||
for (j in seq_along(modes)) {
|
||||
# least squares solution for `alpha_j | alpha_i, i != j`
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user