add (initial): source
This commit is contained in:
parent
4810cf0cf0
commit
b96b330c58
|
@ -58,7 +58,12 @@ dkms.conf
|
|||
.Rapp.history
|
||||
|
||||
# Session Data files
|
||||
.RData
|
||||
*.RData
|
||||
*.Rdata
|
||||
|
||||
# R Data Object files
|
||||
*.Rds
|
||||
*.rds
|
||||
|
||||
# Example code in package build process
|
||||
*-Ex.R
|
||||
|
@ -90,3 +95,5 @@ vignettes/*.pdf
|
|||
# Shiny token, see https://shiny.rstudio.com/articles/shinyapps.html
|
||||
rsconnect/
|
||||
|
||||
# General Work In Progress files
|
||||
wip/
|
||||
|
|
|
@ -0,0 +1,66 @@
|
|||
# Source Code. # Loaded functions.
|
||||
source('../tensor_predictors/poi.R') # POI
|
||||
|
||||
# Load C implentation of 'FastPOI-C' subroutine.
|
||||
# Required for using 'use.C = TRUE' in the POI method.
|
||||
# Compiled via.
|
||||
# $ cd ../tensor_predictors/
|
||||
# $ R CMD SHLIB poi.c
|
||||
dyn.load('../tensor_predictors/poi.so')
|
||||
# dyn.load('../tensor_predictors/poi.dll') # On Windows
|
||||
# In this case 'use.C = TRUE' is required cause the R implementation is not
|
||||
# sufficient due to memory exhaustion (and runtime).
|
||||
|
||||
# Load Dataset.
|
||||
# > dataset <- read.table(file = 'egg.extracted.means.txt', header = TRUE,
|
||||
# > stringsAsFactors = FALSE, check.names = FALSE)
|
||||
# Save as Rdata file for faster loading.
|
||||
# > saveRDS(dataset, file = 'eeg_data.rds')
|
||||
dataset <- readRDS('../data_analysis/eeg_data.rds')
|
||||
|
||||
# Positive and negative case index.
|
||||
set.seed(42)
|
||||
zero <- sample(which(dataset$Case_Control == 0))
|
||||
one <- sample(which(dataset$Case_Control == 1))
|
||||
|
||||
# 10-fold test groups.
|
||||
zero <- list(zero[ 1: 4], zero[ 5: 8], zero[ 9:12], zero[13:16],
|
||||
zero[17:20], zero[21:25], zero[26:30],
|
||||
zero[31:35], zero[36:40], zero[41:45])
|
||||
|
||||
one <- list(one[ 1: 8], one[ 9:16], one[17:24], one[25:32],
|
||||
one[33:40], one[41:48], one[49:56],
|
||||
one[57:63], one[64:70], one[71:77])
|
||||
|
||||
# Iterate data folds.
|
||||
folds <- vector('list', 10)
|
||||
for (i in seq_along(folds)) {
|
||||
cat('\r%d/%d ', i, length(folds))
|
||||
|
||||
# Call garbage collector.
|
||||
gc()
|
||||
|
||||
# Formulate PFC-GEP for EEG data.
|
||||
index <- c(zero[[i]], one[[i]])
|
||||
X <- scale(dataset[-index, -(1:2)], scale = FALSE, center = TRUE)
|
||||
Fy <- scale(dataset$Case_Control[-index], scale = FALSE, center = TRUE)
|
||||
B <- crossprod(X) / nrow(X) # Sigma
|
||||
P_Fy <- Fy %*% solve(crossprod(Fy), t(Fy))
|
||||
A <- crossprod(X, P_Fy %*% X) / nrow(X) # Sigma_fit
|
||||
|
||||
# Before Starting POI on (very big GEP) call the garbage collector.
|
||||
gc()
|
||||
poi <- POI(A, B, 1L, lambda = lambda, use.C = TRUE)
|
||||
rm(A, B)
|
||||
gc()
|
||||
|
||||
# Set fold index.
|
||||
poi$index = index
|
||||
|
||||
folds[[i]] <- poi
|
||||
}
|
||||
cat('\n')
|
||||
|
||||
# Save complete 10 fold results.
|
||||
file <- sprintf('eeg_analysis_poi.rds')
|
||||
saveRDS(folds, file = file)
|
|
@ -0,0 +1,140 @@
|
|||
suppressPackageStartupMessages({
|
||||
library(pROC)
|
||||
})
|
||||
|
||||
source('../tensor_predictors/approx_kronecker.R')
|
||||
source('../tensor_predictors/multi_assign.R')
|
||||
# Load EEG dataset
|
||||
dataset <- readRDS('eeg_data.rds')
|
||||
# Load EEG k-fold simulation results.
|
||||
folds <- readRDS('eeg_analysis_poi.rds')
|
||||
# Set dimenional parameters.
|
||||
p <- 64L # nr. of predictors (count of sensorce)
|
||||
t <- 256L # nr. of time points (measurements)
|
||||
|
||||
labels <- vector('list', length(folds))
|
||||
predictions <- vector('list', length(folds))
|
||||
alphas <- matrix(0, length(folds), t)
|
||||
betas <- matrix(0, length(folds), p)
|
||||
# For each fold.
|
||||
for (i in seq_along(folds)) {
|
||||
fold <- folds[[i]]
|
||||
# Factorize POI result in alpha, beta.
|
||||
c(alpha, beta) %<-% approx.kronecker(fold$Q, c(t, 1), c(p, 1))
|
||||
# Drop small values of alpha, beta.
|
||||
alpha[abs(alpha) < 1e-6] <- 0
|
||||
beta[abs(beta) < 1e-6] <- 0
|
||||
# Reconstruct B from factorization.
|
||||
B <- kronecker(alpha, beta)
|
||||
# Select folds train/test sets.
|
||||
X_train <- as.matrix(dataset[-fold$index, -(1:2)])
|
||||
y_train <- as.factor(dataset[-fold$index, 'Case_Control'])
|
||||
X_test <- as.matrix(dataset[fold$index, -(1:2)])
|
||||
y_test <- as.factor(dataset[fold$index, 'Case_Control'])
|
||||
# Predict via a logit model building on the reduced data.
|
||||
model <- glm(y ~ x, family = binomial(link = "logit"),
|
||||
data = data.frame(x = X_train %*% B, y = y_train))
|
||||
y_hat <- predict(model, data.frame(x = X_test %*% B), type = "response")
|
||||
# Set target and prediction values for the ROC curve.
|
||||
labels[[i]] <- y_test
|
||||
predictions[[i]] <- y_hat
|
||||
alphas[i, ] <- as.vector(alpha)
|
||||
betas[i, ] <- as.vector(beta)
|
||||
}
|
||||
|
||||
# acc: Accuracy. P(Yhat = Y). Estimated as: (TP+TN)/(P+N).
|
||||
acc <- function(y_true, y_pred) mean(round(y_pred) == y_true)
|
||||
# err: Error rate. P(Yhat != Y). Estimated as: (FP+FN)/(P+N).
|
||||
err <- function(y_true, y_pred) mean(round(y_pred) != y_true)
|
||||
# fpr: False positive rate. P(Yhat = + | Y = -). aliases: Fallout.
|
||||
fpr <- function(y_true, y_pred) mean((round(y_pred) == 1)[y_true == 0])
|
||||
# tpr: True positive rate. P(Yhat = + | Y = +). aliases: Sensitivity, Recall.
|
||||
tpr <- function(y_true, y_pred) mean((round(y_pred) == 1)[y_true == 1])
|
||||
# fnr: False negative rate. P(Yhat = - | Y = +). aliases: Miss.
|
||||
fnr <- function(y_true, y_pred) mean((round(y_pred) == 0)[y_true == 1])
|
||||
# tnr: True negative rate. P(Yhat = - | Y = -).
|
||||
tnr <- function(y_true, y_pred) mean((round(y_pred) == 0)[y_true == 0])
|
||||
|
||||
# Combined accuracy, error, ...
|
||||
cat("acc: ", acc(unlist(labels), unlist(predictions)), "\n",
|
||||
"err: ", err(unlist(labels), unlist(predictions)), "\n",
|
||||
"fpr: ", fpr(unlist(labels), unlist(predictions)), "\n",
|
||||
"tpr: ", tpr(unlist(labels), unlist(predictions)), "\n",
|
||||
"fnr: ", fnr(unlist(labels), unlist(predictions)), "\n",
|
||||
"tnr: ", tnr(unlist(labels), unlist(predictions)), "\n",
|
||||
"auc: ", roc(unlist(labels), unlist(predictions), quiet = TRUE)$auc, "\n",
|
||||
sep = '')
|
||||
# Confidence interval for AUC.
|
||||
ci(roc(unlist(labels), unlist(predictions), quiet = TRUE))
|
||||
|
||||
# Means of per fold accuracy, error, ...
|
||||
cat("acc: ", mean(mapply(acc, labels, predictions)), "\n",
|
||||
"err: ", mean(mapply(err, labels, predictions)), "\n",
|
||||
"fpr: ", mean(mapply(fpr, labels, predictions)), "\n",
|
||||
"tpr: ", mean(mapply(tpr, labels, predictions)), "\n",
|
||||
"fnr: ", mean(mapply(fnr, labels, predictions)), "\n",
|
||||
"tnr: ", mean(mapply(tnr, labels, predictions)), "\n",
|
||||
"auc: ", mean(mapply(function(...) roc(...)$auc, labels, predictions,
|
||||
MoreArgs = list(direction = '<', quiet = TRUE))), "\n",
|
||||
sep = '')
|
||||
# Means of per fold CI.
|
||||
rowMeans(mapply(function(...) ci(roc(...)), labels, predictions,
|
||||
MoreArgs = list(direction = '<', quiet = TRUE)))
|
||||
sd(mapply(function(...) roc(...)$auc, labels, predictions,
|
||||
MoreArgs = list(direction = '<', quiet = TRUE)))
|
||||
|
||||
################################################################################
|
||||
### plot ###
|
||||
################################################################################
|
||||
multiplot <- function(..., plotlist = NULL, cols) {
|
||||
library(grid)
|
||||
# Make a list from the ... arguments and plotlist
|
||||
plots <- c(list(...), plotlist)
|
||||
numPlots = length(plots)
|
||||
# Make the panel
|
||||
plotCols = cols
|
||||
# Number of rows needed, calculated from cols
|
||||
plotRows = ceiling(numPlots / plotCols)
|
||||
# Set up the page
|
||||
grid.newpage()
|
||||
pushViewport(viewport(layout = grid.layout(plotRows, plotCols)))
|
||||
vplayout <- function(x, y) {
|
||||
viewport(layout.pos.row = x, layout.pos.col = y)
|
||||
}
|
||||
# Make each plot, in the correct location
|
||||
for (i in 1:numPlots) {
|
||||
curRow = ceiling(i / plotCols)
|
||||
curCol = (i - 1) %% plotCols + 1
|
||||
print(plots[[i]], vp = vplayout(curRow, curCol))
|
||||
}
|
||||
}
|
||||
|
||||
pa <- ggplot(data.frame(time = rep(1:ncol(alphas), 2),
|
||||
means = c(colMeans(abs(alphas)), .5 * colMeans(!alphas)),
|
||||
type = factor(rep(c(0, 1), each = ncol(alphas)),
|
||||
labels = c('mean', 'dropped'))),
|
||||
aes(x = time, y = means, fill = type)) +
|
||||
geom_col(position = 'dodge') +
|
||||
labs(title = 'Components of alpha', x = 'time', y = 'means') +
|
||||
coord_cartesian(ylim = c(0, 0.5)) +
|
||||
scale_y_continuous(sec.axis = sec_axis(trans = ~ . * 2,
|
||||
name = 'dropped',
|
||||
labels = scales::percent)) +
|
||||
theme(legend.position = 'top',
|
||||
legend.title = element_blank())
|
||||
|
||||
pb <- ggplot(data.frame(time = rep(1:ncol(betas), 2),
|
||||
means = c(colMeans(abs(betas)), .5 * colMeans(!betas)),
|
||||
type = factor(rep(c(0, 1), each = ncol(betas)),
|
||||
labels = c('mean', 'dropped'))),
|
||||
aes(x = time, y = means, fill = type)) +
|
||||
geom_col(position = 'dodge') +
|
||||
labs(title = 'Components of beta', x = 'sensors', y = 'means') +
|
||||
coord_cartesian(ylim = c(0, 0.5)) +
|
||||
scale_y_continuous(sec.axis = sec_axis(trans = ~ . * 2,
|
||||
name = 'dropped',
|
||||
labels = scales::percent)) +
|
||||
theme(legend.position = 'top',
|
||||
legend.title = element_blank())
|
||||
|
||||
multiplot(pa, pb, cols = 1)
|
|
@ -0,0 +1,142 @@
|
|||
suppressPackageStartupMessages({
|
||||
library(pROC)
|
||||
})
|
||||
|
||||
source('../tensor_predictors/approx_kronecker.R')
|
||||
source('../tensor_predictors/multi_assign.R')
|
||||
source('../tensor_predictors/tensor_predictors.R')
|
||||
source('../tensor_predictors/lsir.R')
|
||||
source('../tensor_predictors/pca2d.R')
|
||||
|
||||
# acc: Accuracy. P(Yhat = Y). Estimated as: (TP+TN)/(P+N).
|
||||
acc <- function(y_true, y_pred) mean(round(y_pred) == y_true)
|
||||
# err: Error rate. P(Yhat != Y). Estimated as: (FP+FN)/(P+N).
|
||||
err <- function(y_true, y_pred) mean(round(y_pred) != y_true)
|
||||
# fpr: False positive rate. P(Yhat = + | Y = -). aliases: Fallout.
|
||||
fpr <- function(y_true, y_pred) mean((round(y_pred) == 1)[y_true == 0])
|
||||
# tpr: True positive rate. P(Yhat = + | Y = +). aliases: Sensitivity, Recall.
|
||||
tpr <- function(y_true, y_pred) mean((round(y_pred) == 1)[y_true == 1])
|
||||
# fnr: False negative rate. P(Yhat = - | Y = +). aliases: Miss.
|
||||
fnr <- function(y_true, y_pred) mean((round(y_pred) == 0)[y_true == 1])
|
||||
# tnr: True negative rate. P(Yhat = - | Y = -).
|
||||
tnr <- function(y_true, y_pred) mean((round(y_pred) == 0)[y_true == 0])
|
||||
|
||||
# Load EEG dataset
|
||||
dataset <- readRDS('eeg_data.rds')
|
||||
|
||||
#' @param ppc Number of "p"redictor "p"rincipal "c"omponents.
|
||||
#' @param tpc Number of "t"ime "p"rincipal "c"omponents.
|
||||
egg_analysis_reduced <- function(methods, ppc, tpc) {
|
||||
# Set dimenional parameters.
|
||||
n <- nrow(dataset) # sample size (nr. of people)
|
||||
p <- 64L # nr. of predictors (count of sensorce)
|
||||
t <- 256L # nr. of time points (measurements)
|
||||
|
||||
# Extract dimension names from X.
|
||||
nNames <- dataset$PersonID
|
||||
tNames <- as.character(seq(t))
|
||||
pNames <- unlist(strsplit(colnames(dataset)[2 + t * seq(p)], '_'))[c(T, F)]
|
||||
|
||||
# Split into X-y.
|
||||
X <- as.matrix(dataset[, -(1:2)])
|
||||
y <- dataset$Case_Control
|
||||
# Reshape X as 3D tenros of shape (n, t, p) aka. samples, timesteps, predictors.
|
||||
# (Each of the n rows in X iterate over the time bevore switching sensorce.)
|
||||
X <- array(X, dim = c(n, t, p),
|
||||
dimnames = list(nNames, tNames, pNames))
|
||||
# Reorder axis to (p, t, n) = (predictors, timesteps, samples).
|
||||
X <- aperm(X, c(3, 2, 1))
|
||||
|
||||
# Compute Mean of X.
|
||||
X_mean <- apply(X, c(1, 2), mean)
|
||||
X_center <- X - as.vector(X_mean)
|
||||
|
||||
# Compute "left" and "right" cov-matrices.
|
||||
Sigma_t <- matrix(apply(apply(X_center, 3, crossprod), 1, mean), t, t)
|
||||
Sigma_p <- matrix(apply(apply(X_center, 3, tcrossprod), 1, mean), p, p)
|
||||
# Get "left", "right" principal components.
|
||||
V_p <- svd(Sigma_p, ppc, 0L)$u
|
||||
V_t <- svd(Sigma_t, tpc, 0L)$u
|
||||
|
||||
# Reduce dimension.
|
||||
X_reduced <- apply(X_center, 3, function(x) crossprod(V_p, x %*% V_t))
|
||||
dim(X_reduced) <- c(ppc, tpc, n)
|
||||
|
||||
# Vectorize to shape of (predictors * timesteps, samples) and transpose to
|
||||
# (samples, predictors * timesteps).
|
||||
X_vec <- t(matrix(X_reduced, ppc * tpc, n))
|
||||
|
||||
loo.cv <- expand.grid(method = names(methods), fold = 1:n)
|
||||
loo.cv$y_true <- y[loo.cv$fold]
|
||||
loo.cv$y_pred <- NA
|
||||
|
||||
# Performe LOO cross-validation for each method.
|
||||
for (i in 1L:n) {
|
||||
# Print progress.
|
||||
cat(sprintf("\rCross-Validation (p-PC: %d, t-PC: %d): %4d/%d",
|
||||
ppc, tpc, i, n))
|
||||
# Leave Out the i-th element.
|
||||
X_train <- X_vec[-i, ]
|
||||
X_test <- X_vec[i, ]
|
||||
y_train <- y[-i]
|
||||
# Center y.
|
||||
y_train <- scale(y_train, center = TRUE, scale = FALSE)
|
||||
|
||||
# For each method.
|
||||
for (method.name in names(methods)) {
|
||||
method <- methods[[method.name]]
|
||||
# Compute reduction using current method under common API.
|
||||
sdr <- method(X_train, y_train, ppc, tpc)
|
||||
B <- kronecker(sdr$alpha, sdr$beta)
|
||||
# Fit a linear model (which ensures a common sdr direction if possible).
|
||||
model <- glm(y ~ x, family = binomial(link = "logit"),
|
||||
data = data.frame(y = y[-i], x = X_train %*% B))
|
||||
# Predict out of sample and store in LOO CV data.frame.
|
||||
y_pred <- predict(model, data.frame(x = X_test %*% B), type = "response")
|
||||
loo.cv[loo.cv$method == method.name & loo.cv$fold == i, 'y_pred'] <- y_pred
|
||||
}
|
||||
}
|
||||
|
||||
for (method.name in names(methods)) {
|
||||
labels <- loo.cv[loo.cv$method == method.name, 'y_true']
|
||||
predictions <- loo.cv[loo.cv$method == method.name, 'y_pred']
|
||||
ROC <- roc(unlist(labels), unlist(predictions), quiet = TRUE)
|
||||
# Combined accuracy, error, ...
|
||||
cat("\nMethod: ", method.name, "\n",
|
||||
"acc: ", acc(unlist(labels), unlist(predictions)), "\n",
|
||||
"err: ", err(unlist(labels), unlist(predictions)), "\n",
|
||||
"fpr: ", fpr(unlist(labels), unlist(predictions)), "\n",
|
||||
"tpr: ", tpr(unlist(labels), unlist(predictions)), "\n",
|
||||
"fnr: ", fnr(unlist(labels), unlist(predictions)), "\n",
|
||||
"tnr: ", tnr(unlist(labels), unlist(predictions)), "\n",
|
||||
"auc: ", ROC$auc, "\n",
|
||||
"auc sd: ", sqrt(var(ROC)), "\n",
|
||||
sep = '')
|
||||
}
|
||||
|
||||
loo.cv
|
||||
}
|
||||
|
||||
methods <- list(
|
||||
KPIR_LS = function(...) tensor_predictor(..., method = "KPIR_LS"),
|
||||
KPIR_MLE = function(...) tensor_predictor(..., method = "KPIR_MLE"),
|
||||
KPFC1 = function(...) tensor_predictor(..., method = "KPFC1"),
|
||||
KPFC2 = function(...) tensor_predictor(..., method = "KPFC2"),
|
||||
LSIR = LSIR
|
||||
)
|
||||
|
||||
# ppc, tpc
|
||||
# ------------
|
||||
params <- list( c( 4, 3)
|
||||
, c( 15, 15)
|
||||
, c( 30, 20)
|
||||
)
|
||||
|
||||
for (param in params) {
|
||||
c(ppc, tpc) %<-% param
|
||||
sim <- egg_analysis_reduced(methods, ppc, tpc)
|
||||
|
||||
attr(sim, 'param') <- c(ppc = ppc, tpc = tpc)
|
||||
|
||||
saveRDS(sim, file = sprintf('eeg_analysis_reduced_%d_%d.rds', ppc, tpc))
|
||||
}
|
|
@ -0,0 +1,53 @@
|
|||
# # Generate Sample Data.
|
||||
# n <- 250
|
||||
# # see: simulation_binary.R
|
||||
# data <- simulateData.binary(n / 2, n / 2, (p <- 10), (t <- 5), 0.3, 0.3)
|
||||
# X <- data$X
|
||||
# colnames(X) <- paste('X[', outer(1:p, 1:t, paste, sep = ','), ']', sep = '')
|
||||
# Y <- 2 * data$Y
|
||||
# write.csv(data.frame(X, Y), file = 'example_data.csv', row.names = FALSE)
|
||||
|
||||
suppressPackageStartupMessages({
|
||||
library(pROC)
|
||||
})
|
||||
|
||||
source('../tensor_predictors/tensor_predictors.R')
|
||||
|
||||
# Read sample data from file and split into predictors and responces.
|
||||
data <- read.csv('example_data.csv')
|
||||
X <- as.matrix(data[, names(data) != 'Y'])
|
||||
Y <- as.matrix(data[, 'Y'])
|
||||
|
||||
# Set parameters (and check)
|
||||
n <- nrow(X)
|
||||
p <- 10
|
||||
t <- 5
|
||||
stopifnot(p * t == ncol(X))
|
||||
|
||||
# Setup 10-fold (folds contains indices of the test set).
|
||||
folds <- split(sample.int(n), (seq(0, n - 1) * 10) %/% n)
|
||||
labels <- vector('list', 10) # True test values (per fold)
|
||||
predictions <- vector('list', 10) # Predictions on test set.
|
||||
|
||||
for (i in seq_along(folds)) {
|
||||
fold <- folds[[i]]
|
||||
# Split data into train and test sets.
|
||||
X.train <- X[-fold, ]
|
||||
Y.train <- Y[-fold, , drop = FALSE]
|
||||
X.test <- X[fold, ]
|
||||
Y.test <- Y[fold, , drop = FALSE]
|
||||
|
||||
# Compute reduction (method = c('KPIR_LS' ,'KPIR_MLE', 'KPFC1', 'KPFC2', 'KPFC3'))
|
||||
# or LSIR(X.train, Y.train, p, t) in 'lsir.R'.
|
||||
dr <- tensor_predictor(X.train, Y.train, p, t, method = 'KPIR_LS')
|
||||
B <- kronecker(dr$alpha, dr$beta) # Also available: Gamma_1, Gamma_2, Gamma, B.
|
||||
# Predict via a logit model building on the reduced data.
|
||||
model <- glm(y ~ x, family = binomial(link = "logit"),
|
||||
data = data.frame(x = X.train %*% B, y = as.integer(Y.train > 0)))
|
||||
|
||||
labels[[i]] <- as.integer(Y.test > 0)
|
||||
predictions[[i]] <- predict(model, data.frame(x = X.test %*% B), type = "response")
|
||||
}
|
||||
|
||||
(meanAUC <- mean(mapply(function(...) roc(...)$auc, labels, predictions,
|
||||
MoreArgs = list(direction = '<', quiet = TRUE))))
|
|
@ -0,0 +1,56 @@
|
|||
# # Generate Sample Data.
|
||||
# n <- 250
|
||||
# # see: simulation_binary.R
|
||||
# data <- simulateData.binary(n / 2, n / 2, (p <- 10), (t <- 5), 0.3, 0.3)
|
||||
# X <- data$X
|
||||
# colnames(X) <- paste('X[', outer(1:p, 1:t, paste, sep = ','), ']', sep = '')
|
||||
# Y <- 2 * data$Y
|
||||
# write.csv(data.frame(X, Y), file = 'example_data.csv', row.names = FALSE)
|
||||
|
||||
suppressPackageStartupMessages({
|
||||
library(pROC)
|
||||
})
|
||||
|
||||
source('../tensor_predictors/tensor_predictors.R')
|
||||
|
||||
# Read sample data from file and split into predictors and responces.
|
||||
data <- read.csv('example_data.csv')
|
||||
X <- as.matrix(data[, names(data) != 'Y'])
|
||||
Y <- as.matrix(data[, 'Y'])
|
||||
|
||||
# Set parameters (and check)
|
||||
n <- nrow(X)
|
||||
p <- 10
|
||||
t <- 5
|
||||
stopifnot(p * t == ncol(X))
|
||||
|
||||
# Setup folds (folds contains indices of the test set).
|
||||
nr.folds <- n # leave-one-out when number of folds equals the sample size `n`.
|
||||
folds <- split(sample.int(n), (seq(0, n - 1) * nr.folds) %/% n)
|
||||
labels <- vector('list', nr.folds) # True test values (per fold)
|
||||
predictions <- vector('list', nr.folds) # Predictions on test set.
|
||||
|
||||
for (i in seq_along(folds)) {
|
||||
fold <- folds[[i]]
|
||||
# Split data into train and test sets.
|
||||
X.train <- X[-fold, ]
|
||||
Y.train <- Y[-fold, , drop = FALSE]
|
||||
X.test <- X[fold, ]
|
||||
Y.test <- Y[fold, , drop = FALSE]
|
||||
|
||||
# Compute reduction (method = c('KPIR_LS' ,'KPIR_MLE', 'KPFC1', 'KPFC2', 'KPFC3'))
|
||||
# or LSIR(X.train, Y.train, p, t) in 'lsir.R'.
|
||||
dr <- tensor_predictor(X.train, Y.train, p, t, method = 'KPIR_LS')
|
||||
B <- kronecker(dr$alpha, dr$beta) # Also available: Gamma_1, Gamma_2, Gamma, B.
|
||||
# Predict via a logit model building on the reduced data.
|
||||
model <- glm(y ~ x, family = binomial(link = "logit"),
|
||||
data = data.frame(x = X.train %*% B, y = as.integer(Y.train > 0)))
|
||||
|
||||
labels[[i]] <- as.integer(Y.test > 0)
|
||||
predictions[[i]] <- predict(model, data.frame(x = X.test %*% B), type = "response")
|
||||
}
|
||||
|
||||
# Compute classic ROC for predicted samples (mean AUC makes no sense for leave-one-out)
|
||||
y.true <- unlist(labels)
|
||||
y.pred <- unlist(predictions)
|
||||
roc(y.true, y.pred)
|
|
@ -0,0 +1,37 @@
|
|||
# implementation contains fallback if the package is not available but for this
|
||||
# case required!
|
||||
library(RSpectra)
|
||||
|
||||
# Load POI function and compiled C subroutine.
|
||||
source('../tensor_predictors/poi.R')
|
||||
dyn.load('../tensor_predictors/poi.so') # "Shared Object" of POI-Subrountine
|
||||
|
||||
# Load data from sent data file (last Email)
|
||||
dataset <- readRDS('../eeg_analysis/eeg_data.rds')
|
||||
|
||||
maxit <- 400L # Upper bound for number of optimization iterations.
|
||||
|
||||
for (i in 1:nrow(dataset)) {
|
||||
gc() # To be on the save side, call the garbage collector (free memory)
|
||||
|
||||
# Formulate PFC-GEP (Principal Fitted Components - Generalized Eigenvalue
|
||||
# Problem) for EEG data.
|
||||
X <- scale(dataset[-i, -(1:2)], scale = FALSE, center = TRUE)
|
||||
Fy <- scale(dataset$Case_Control[-i], scale = FALSE, center = TRUE)
|
||||
B <- crossprod(X) / nrow(X) # Sigma
|
||||
P_Fy <- Fy %*% solve(crossprod(Fy), t(Fy))
|
||||
A <- crossprod(X, P_Fy %*% X) / nrow(X) # Sigma_fit
|
||||
|
||||
# Call POI using C subroutine (requires "dyn.load" of subroutine)
|
||||
poi_res <- POI(A, B, 1L, maxit = maxit, use.C = TRUE)
|
||||
# Again, be nice to memory and delete with an explicit fall to gc.
|
||||
rm(A, B)
|
||||
gc()
|
||||
|
||||
# Store results, do analysis, ... (addapt to needs) .
|
||||
poi_res$maxit = maxit
|
||||
poi_res$loo_index = i # Keep track of LOO position.
|
||||
|
||||
# Save i-th LOO result to file for analysis/validation/visualization/...
|
||||
saveRDS(poi_res, file = sprintf('eeg_poi_loo_%d.rds', i))
|
||||
}
|
|
@ -0,0 +1,166 @@
|
|||
source('../tensor_predictors/random.R')
|
||||
source('../tensor_predictors/multi_assign.R')
|
||||
source('../tensor_predictors/tensor_predictors.R')
|
||||
source('../tensor_predictors/lsir.R')
|
||||
source('../tensor_predictors/pca2d.R')
|
||||
|
||||
#' @param n0 number of controls
|
||||
#' @param n1 number of cases
|
||||
simulateData.binary <- function(n0, n1, p, t, rho.p, rho.t) {
|
||||
# Response vector
|
||||
Y <- c(rep(1, n1), rep(0, n0))
|
||||
|
||||
# Section 7.1.2 of Tensor_Predictors-4.pdf
|
||||
alpha0 <- as.matrix(rep(0, t))
|
||||
alpha1 <- as.matrix(1 / ((t + 1) - 1:t))
|
||||
beta <- as.matrix(rep(1 / sqrt(p), p))
|
||||
mu0 <- kronecker(alpha0, beta)
|
||||
mu1 <- kronecker(alpha1, beta)
|
||||
|
||||
sigma1 <- rho.t^abs(outer(1:t, 1:t, FUN = `-`))
|
||||
sigma2 <- rho.p^abs(outer(1:p, 1:p, FUN = `-`))
|
||||
sigma <- kronecker(sigma1, sigma2)
|
||||
|
||||
# Compute Delta
|
||||
# Delta = Sigma + E[vec(X)]E[vec(X)^t] - E{E[vec(X)|Y]E[vec(X)^t|Y]}
|
||||
n <- n0 + n1
|
||||
muAvg <- (n0 * mu0 + n1 * mu1) / n
|
||||
mat0 <- mu0 %*% t(mu0)
|
||||
mat1 <- mu1 %*% t(mu1)
|
||||
matAvg <- (n0 * mat0 + n1 * mat1) / n
|
||||
Delta <- sigma + (muAvg %*% t(muAvg)) - matAvg
|
||||
|
||||
X1 <- rmvnorm(n1, mu1, Delta)
|
||||
X0 <- rmvnorm(n0, mu0, Delta)
|
||||
X <- rbind(X1, X0)
|
||||
|
||||
# Center data
|
||||
Y <- scale(Y, center = TRUE, scale = FALSE)
|
||||
X <- scale(X, center = TRUE, scale = FALSE)
|
||||
|
||||
alpha <- alpha0 - alpha1
|
||||
Gamma_1 <- alpha / norm(alpha, 'F')
|
||||
Gamma_2 <- beta / norm(beta, 'F')
|
||||
list(Y = Y, X = X,
|
||||
Gamma_1 = Gamma_1, Gamma_2 = Gamma_2,
|
||||
Gamma = kronecker(Gamma_1, Gamma_2),
|
||||
alpha = alpha, beta = beta,
|
||||
Delta = Delta
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
simulation.binary <- function(methods, reps, n0, n1, p, t, rho.p, rho.t) {
|
||||
nsim <- length(methods) * reps
|
||||
results <- vector('list', nsim)
|
||||
E1 <- vector('list', nsim)
|
||||
E2 <- vector('list', nsim)
|
||||
vec1 <- vector('list', nsim)
|
||||
vec2 <- vector('list', nsim)
|
||||
Phi <- vector('list', nsim)
|
||||
phi1 <- vector('list', nsim)
|
||||
phi2 <- vector('list', nsim)
|
||||
|
||||
i <- 1
|
||||
for (rep in 1:reps) {
|
||||
set.seed(rep)
|
||||
ds <- simulateData.binary(n0, n1, p, t, rho.p, rho.t)
|
||||
for (method.name in names(methods)) {
|
||||
cat(sprintf('\r%4d/%d in %s', rep, reps, method.name))
|
||||
|
||||
method <- methods[[method.name]]
|
||||
sdr <- method(ds$X, ds$Y, p, t)
|
||||
# Store which silumation is at index i.
|
||||
results[[i]] <- c(method = method.name, rep = rep)
|
||||
# Compute simpulation validation metrics.
|
||||
E1[[i]] <-
|
||||
norm(kronecker(ds$alpha, ds$beta) - kronecker(sdr$alpha, sdr$beta), 'F') /
|
||||
norm(kronecker(ds$alpha, ds$beta), 'F')
|
||||
E2[[i]] <- norm(ds$Delta - sdr$Delta, 'F') / norm(ds$Delta, 'F')
|
||||
vec1[[i]] <- as.double(kronecker(sdr$alpha, sdr$beta))
|
||||
vec2[[i]] <- as.double(sdr$Delta)
|
||||
# Subspace distances.
|
||||
if (!('Gamma' %in% names(sdr))) {
|
||||
# Assuming r = k = 1
|
||||
sdr$Gamma_1 <- sdr$alpha / norm(sdr$alpha, 'F')
|
||||
sdr$Gamma_2 <- sdr$beta / norm(sdr$beta, 'F')
|
||||
sdr$Gamma <- kronecker(sdr$Gamma_1, sdr$Gamma_2)
|
||||
}
|
||||
Phi[[i]] <- norm(tcrossprod(ds$Gamma) - tcrossprod(sdr$Gamma), 'F')
|
||||
phi1[[i]] <- norm(tcrossprod(ds$Gamma_1) - tcrossprod(sdr$Gamma_1), 'F')
|
||||
phi2[[i]] <- norm(tcrossprod(ds$Gamma_2) - tcrossprod(sdr$Gamma_2), 'F')
|
||||
i <- i + 1
|
||||
}
|
||||
}
|
||||
cat('\n')
|
||||
|
||||
# Aggregate per method statistics.
|
||||
statistics <- list()
|
||||
for (method.name in names(methods)) {
|
||||
m <- which(unlist(lapply(results, `[`, 1)) == method.name)
|
||||
|
||||
# Convert list of vec(alpha %x% beta) to a matrix with vec(alpha %x% beta)
|
||||
# in its columns.
|
||||
tmp <- matrix(unlist(vec1[m]), ncol = length(m))
|
||||
V1 <- sum(apply(tmp, 1, var))
|
||||
|
||||
# Convert list of vec(Delta) to a matrix with vec(Delta) in its columns.
|
||||
tmp <- matrix(unlist(vec2[m]), ncol = length(m))
|
||||
V2 <- sum(apply(tmp, 1, var))
|
||||
|
||||
statistics[[method.name]] <- list(
|
||||
mean.E1 = mean(unlist(E1[m])),
|
||||
sd.E1 = sd(unlist(E1[m])),
|
||||
mean.E2 = mean(unlist(E2[m])),
|
||||
sd.E2 = sd(unlist(E2[m])),
|
||||
V1 = V1,
|
||||
V2 = V2,
|
||||
Phi = mean(unlist(Phi[m])),
|
||||
phi1 = mean(unlist(phi1[m])),
|
||||
phi2 = mean(unlist(phi2[m]))
|
||||
)
|
||||
}
|
||||
# transform the statistics list into a data.frame with row and col names.
|
||||
stat <- t(matrix(unlist(statistics), ncol = length(statistics)))
|
||||
rownames(stat) <- names(statistics)
|
||||
colnames(stat) <- names(statistics[[1]])
|
||||
stat <- as.data.frame(stat)
|
||||
attr(stat, "params") <- c(reps = reps, n0 = n0, n1 = n1, p = p, t = t,
|
||||
rho.p = rho.p, rho.t = rho.t)
|
||||
return(stat)
|
||||
}
|
||||
|
||||
methods <- list(
|
||||
KPIR_LS = function(...) tensor_predictor(..., method = "KPIR_LS"),
|
||||
KPIR_MLE = function(...) tensor_predictor(..., method = "KPIR_MLE"),
|
||||
KPFC1 = function(...) tensor_predictor(..., method = "KPFC1"),
|
||||
KPFC2 = function(...) tensor_predictor(..., method = "KPFC2"),
|
||||
KPFC3 = function(...) tensor_predictor(..., method = "KPFC3"),
|
||||
LSIR = function(X, Fy, p, t) LSIR(X, Fy, p, t, k = 1, r = 1),
|
||||
PCA2d = function(X, y = NULL, p, t, k = 1, r = 1, d1 = 1, d2 = 1) {
|
||||
pca <- PCA2d(X, p, t, k, r)
|
||||
pca$Gamma_1 <- pca$alpha[, 1:d1, drop = FALSE]
|
||||
pca$Gamma_2 <- pca$beta[, 1:d2, drop = FALSE]
|
||||
pca$Gamma <- kronecker(pca$Gamma_1, pca$Gamma_2)
|
||||
pca$Delta <- kronecker(pca$Sigma_t, pca$Sigma_p)
|
||||
return(pca)
|
||||
}
|
||||
)
|
||||
|
||||
# n0, n1, p, t, rho.p, rho.t
|
||||
# -----------------------------------
|
||||
params <- list( c( 250, 250, 10, 5, 0.3, 0.3)
|
||||
, c( 500, 500, 10, 5, 0.3, 0.3)
|
||||
, c(1000, 1000, 10, 5, 0.3, 0.3)
|
||||
)
|
||||
|
||||
for (param in params) {
|
||||
c(n0, n1, p, t, rho.p, rho.t) %<-% param
|
||||
sim <- simulation.binary(methods, 500, n0, n1, p, t, rho.p, rho.t)
|
||||
|
||||
print(attr(sim, "params"))
|
||||
print(round(sim, 2))
|
||||
|
||||
saveRDS(sim, file = sprintf("simulation_3_desc_%d_%d_%d_%d_%f_%f.rds",
|
||||
n0, n1, p, t, rho.p, rho.t))
|
||||
}
|
|
@ -0,0 +1,153 @@
|
|||
source('../tensor_predictors/random.R')
|
||||
source('../tensor_predictors/multi_assign.R')
|
||||
source('../tensor_predictors/tensor_predictors.R')
|
||||
source('../tensor_predictors/lsir.R')
|
||||
source('../tensor_predictors/pca2d.R')
|
||||
|
||||
simulateData.cont <- function(n, p, t, k, r, d1, d2, delta.identity = FALSE) {
|
||||
|
||||
stopifnot(d1 <= r, d2 <= k)
|
||||
|
||||
y <- rnorm(n)
|
||||
ns <- r * k / 2
|
||||
Fy <- do.call(cbind, lapply(1:ns, function(s, z) {
|
||||
cbind(cos(s * z), sin(s * z))
|
||||
}, z = 2 * pi * y))
|
||||
Fy <- scale(Fy, scale = FALSE)
|
||||
|
||||
Gamma_1 <- diag(1, t, d1)
|
||||
gamma_1 <- diag(1, d1, r)
|
||||
alpha <- Gamma_1 %*% gamma_1
|
||||
Gamma_2 <- diag(1, p, d2)
|
||||
gamma_2 <- diag(1, d2, k)
|
||||
beta <- Gamma_2 %*% gamma_2
|
||||
|
||||
if (delta.identity) {
|
||||
Delta <- diag(1, p * t, p * t)
|
||||
} else {
|
||||
Delta <- crossprod(matrix(rnorm((p * t)^2), p * t))
|
||||
DM_Delta <- diag(sqrt(1 / diag(Delta)))
|
||||
Delta <- DM_Delta %*% Delta %*% DM_Delta
|
||||
}
|
||||
|
||||
X <- tcrossprod(Fy, kronecker(alpha, beta)) + rmvnorm(n, sigma = Delta)
|
||||
X <- scale(X, scale = FALSE)
|
||||
|
||||
return(list(X = X, y = y, Fy = Fy,
|
||||
Gamma = kronecker(Gamma_1, Gamma_2),
|
||||
Gamma_1 = Gamma_1, gamma_1 = gamma_1, alpha = alpha,
|
||||
Gamma_2 = Gamma_2, gamma_2 = gamma_2, beta = beta,
|
||||
Delta = Delta))
|
||||
}
|
||||
|
||||
simulation.cont <- function(methods, reps, n, p, t, k, r, d1, d2) {
|
||||
nsim <- length(methods) * reps
|
||||
results <- vector('list', nsim)
|
||||
E1 <- vector('list', nsim)
|
||||
E2 <- vector('list', nsim)
|
||||
vec1 <- vector('list', nsim)
|
||||
vec2 <- vector('list', nsim)
|
||||
Phi <- vector('list', nsim)
|
||||
phi1 <- vector('list', nsim)
|
||||
phi2 <- vector('list', nsim)
|
||||
|
||||
i <- 1
|
||||
for (rep in 1:reps) {
|
||||
set.seed(rep)
|
||||
ds <- simulateData.cont(n, p, t, k, r, d1, d2)
|
||||
for (method.name in names(methods)) {
|
||||
cat(sprintf('\r%4d/%d in %s', rep, reps, method.name))
|
||||
|
||||
method <- methods[[method.name]]
|
||||
sdr <- method(ds$X, ds$Fy, p, t, k, r, d1, d2)
|
||||
# Store which silumation is at index i.
|
||||
results[[i]] <- c(method = method.name, rep = rep)
|
||||
# Compute simpulation validation metrics.
|
||||
E1[[i]] <-
|
||||
norm(kronecker(ds$alpha, ds$beta) - kronecker(sdr$alpha, sdr$beta), 'F') /
|
||||
norm(kronecker(ds$alpha, ds$beta), 'F')
|
||||
E2[[i]] <- norm(ds$Delta - sdr$Delta, 'F') / norm(ds$Delta, 'F')
|
||||
vec1[[i]] <- as.double(kronecker(sdr$alpha, sdr$beta))
|
||||
vec2[[i]] <- as.double(sdr$Delta)
|
||||
# Subspace distances.
|
||||
Phi[[i]] <- norm(tcrossprod(ds$Gamma) - tcrossprod(sdr$Gamma), 'F')
|
||||
phi1[[i]] <- norm(tcrossprod(ds$Gamma_1) - tcrossprod(sdr$Gamma_1), 'F')
|
||||
phi2[[i]] <- norm(tcrossprod(ds$Gamma_2) - tcrossprod(sdr$Gamma_2), 'F')
|
||||
i <- i + 1
|
||||
}
|
||||
}
|
||||
cat('\n')
|
||||
|
||||
# Aggregate per method statistics.
|
||||
statistics <- list()
|
||||
for (method.name in names(methods)) {
|
||||
m <- which(unlist(lapply(results, `[`, 1)) == method.name)
|
||||
|
||||
# Convert list of vec(alpha %x% beta) to a matrix with vec(alpha %x% beta)
|
||||
# in its columns.
|
||||
tmp <- matrix(unlist(vec1[m]), ncol = length(m))
|
||||
V1 <- sum(apply(tmp, 1, var))
|
||||
|
||||
# Convert list of vec(Delta) to a matrix with vec(Delta) in its columns.
|
||||
tmp <- matrix(unlist(vec2[m]), ncol = length(m))
|
||||
V2 <- sum(apply(tmp, 1, var))
|
||||
|
||||
statistics[[method.name]] <- list(
|
||||
mean.E1 = mean(unlist(E1[m])),
|
||||
sd.E1 = sd(unlist(E1[m])),
|
||||
mean.E2 = mean(unlist(E2[m])),
|
||||
sd.E2 = sd(unlist(E2[m])),
|
||||
V1 = V1,
|
||||
V2 = V2,
|
||||
Phi = mean(unlist(Phi[m])),
|
||||
phi1 = mean(unlist(phi1[m])),
|
||||
phi2 = mean(unlist(phi2[m]))
|
||||
)
|
||||
}
|
||||
# transform the statistics list into a data.frame with row and col names.
|
||||
stat <- t(matrix(unlist(statistics), ncol = length(statistics)))
|
||||
rownames(stat) <- names(statistics)
|
||||
colnames(stat) <- names(statistics[[1]])
|
||||
stat <- as.data.frame(stat)
|
||||
attr(stat, "params") <- c(reps = reps, n = n, p = p, t = t, k = k, r = r,
|
||||
d1 = d1, d2 = d2)
|
||||
return(stat)
|
||||
}
|
||||
|
||||
methods <- list(
|
||||
KPIR_LS = function(...) tensor_predictor(..., method = "KPIR_LS"),
|
||||
KPIR_MLE = function(...) tensor_predictor(..., method = "KPIR_MLE"),
|
||||
KPFC1 = function(...) tensor_predictor(..., method = "KPFC1"),
|
||||
KPFC2 = function(...) tensor_predictor(..., method = "KPFC2"),
|
||||
KPFC3 = function(...) tensor_predictor(..., method = "KPFC3"),
|
||||
PCA2d = function(X, y = NULL, p, t, k = 1L, r = 1L, d1 = 1L, d2 = 1L) {
|
||||
pca <- PCA2d(X, p, t, k, r)
|
||||
# Note: alpha, beta are not realy meaningfull for (d1, d2) != (r, k)
|
||||
pca$Gamma_1 <- pca$alpha[, 1:d1, drop = FALSE]
|
||||
pca$Gamma_2 <- pca$beta[, 1:d2, drop = FALSE]
|
||||
pca$Gamma <- kronecker(pca$Gamma_1, pca$Gamma_2)
|
||||
pca$Delta <- kronecker(pca$Sigma_t, pca$Sigma_p)
|
||||
return(pca)
|
||||
}
|
||||
)
|
||||
|
||||
# n, p, t, k, r, d1, d2
|
||||
# -----------------------------
|
||||
params <- list( c( 500, 10, 8, 6, 6, 6, 6)
|
||||
, c( 500, 10, 8, 6, 6, 4, 4)
|
||||
, c( 500, 10, 8, 6, 6, 2, 2)
|
||||
, c(5000, 10, 8, 6, 6, 6, 6)
|
||||
, c(5000, 10, 8, 6, 6, 4, 4)
|
||||
, c(5000, 10, 8, 6, 6, 2, 2)
|
||||
)
|
||||
|
||||
for (param in params) {
|
||||
c(n, p, t, k, r, d1, d2) %<-% param
|
||||
sim <- simulation.cont(methods, 500, n, p, t, k, r, d1, d2)
|
||||
|
||||
print(attr(sim, "params"))
|
||||
print(round(sim, 2))
|
||||
|
||||
saveRDS(sim, file = sprintf("simulation_cont_%d_%d_%d_%d_%d_%d_%d.rds",
|
||||
n, p, t, k, r, d1, d2))
|
||||
}
|
|
@ -0,0 +1,146 @@
|
|||
# Source Code. # Loaded functions.
|
||||
source('../tensor_predictors/multi_assign.R') # %<-%
|
||||
source('../tensor_predictors/approx_kronecker.R') # approx_kronecker
|
||||
source('../tensor_predictors/poi.R') # POI
|
||||
source('../tensor_predictors/subspace.R') # subspace
|
||||
source('../tensor_predictors/random.R') # rmvnorm
|
||||
|
||||
# Load C impleentation of 'FastPOI-C' subroutine.
|
||||
# Required for using 'use.C = TRUE' in the POI method.
|
||||
dyn.load('../tensor_predictors/poi.so')
|
||||
# When 'use.C = FALSE' the POI method uses a base R implementation.
|
||||
use.C = TRUE
|
||||
|
||||
simulateData.sparse <- function(n, p, t, k, r, scale, degree = 2) {
|
||||
# Define true reduction matrices alpha, beta.
|
||||
alpha <- diag(1, t, r)
|
||||
beta <- diag(1, p, k)
|
||||
|
||||
# Create true "random" covariance of inverse model.
|
||||
R <- matrix(rnorm((p * t)^2), p * t) # random square matrix.
|
||||
sigma <- tcrossprod(R / sqrt(rowSums(R^2))) # sym. pos.def. with diag = 1.
|
||||
|
||||
# Sample responces.
|
||||
y <- rnorm(n, 0, 1)
|
||||
# equiv to cbind(y^1, y^2, ..., y^degree)
|
||||
Fy <- t(vapply(y, `^`, double(degree), seq(degree)))
|
||||
|
||||
# Calc X according the inverse regression model.
|
||||
X <- tcrossprod(scale(Fy, scale = FALSE, center = TRUE), kronecker(alpha, beta))
|
||||
X <- X + (scale * rmvnorm(n, sigma = sigma))
|
||||
|
||||
return(list(X = X, y = y, Fy = Fy, alpha = alpha, beta = beta))
|
||||
}
|
||||
|
||||
# # True Positives Rate
|
||||
# tpr <- function(Y, Y_hat) {
|
||||
# sum(as.logical(Y_hat) & as.logical(Y)) / sum(as.logical(Y)) # TP / P
|
||||
# }
|
||||
# False Positives Rate
|
||||
fpr <- function(Y, Y_hat) {
|
||||
sum(as.logical(Y_hat) & !Y) / sum(!Y) # FP / N
|
||||
}
|
||||
# False Negative Rate
|
||||
fnr <- function(Y, Y_hat) {
|
||||
sum(!Y_hat & as.logical(Y)) / sum(as.logical(Y)) # FN / P
|
||||
}
|
||||
# False Rate (rate of false positives and negatives)
|
||||
fr <- function(Y, Y_hat) {
|
||||
sum(as.logical(Y) != as.logical(Y_hat)) / length(Y)
|
||||
}
|
||||
|
||||
simulation.sparse <- function(scales, reps, n, p, t, k, r,
|
||||
eps = 100 * .Machine$double.eps) {
|
||||
results <- vector('list', length(scales) * reps)
|
||||
|
||||
i <- 0
|
||||
for (scale in scales) {
|
||||
for (rep in 1:reps) {
|
||||
cat(sprintf('\r%4d/%d for scale = %.2f', rep, reps, scale))
|
||||
|
||||
ds <- simulateData.sparse(n, p, t, k, r, scale)
|
||||
# Formulate PFC-GEP for given dataset.
|
||||
X <- scale(ds$X, scale = FALSE, center = TRUE)
|
||||
Fy <- scale(ds$Fy, scale = FALSE, center = TRUE)
|
||||
Sigma <- crossprod(X) / nrow(X)
|
||||
P_Fy <- Fy %*% solve(crossprod(Fy), t(Fy))
|
||||
Sigma_fit <- crossprod(X, P_Fy %*% X) / nrow(X)
|
||||
|
||||
poi <- POI(Sigma_fit, Sigma, k * r, use.C = use.C)
|
||||
# Calc approx. alpha, beta and drop further drop "zero" from konecker
|
||||
# factorization approximation.
|
||||
c(alpha, beta) %<-% approx.kronecker(poi$Q, dim(ds$alpha), dim(ds$beta))
|
||||
alpha[abs(alpha) < eps] <- 0
|
||||
beta[abs(beta) < eps] <- 0
|
||||
|
||||
# Compair estimates against true alpha, beta.
|
||||
result <- list(
|
||||
scale = scale,
|
||||
lambda = poi$lambda,
|
||||
# alpha_tpr = tpr(ds$alpha, alpha),
|
||||
alpha_fpr = fpr(ds$alpha, alpha),
|
||||
alpha_fnr = fnr(ds$alpha, alpha),
|
||||
alpha_fr = fr(ds$alpha, alpha),
|
||||
# beta_tpr = tpr(ds$beta, beta),
|
||||
beta_fpr = fpr(ds$beta, beta),
|
||||
beta_fnr = fnr(ds$beta, beta),
|
||||
beta_fr = fr(ds$beta, beta)
|
||||
)
|
||||
# Component-wise validation (_c_ stands for component)
|
||||
if (ncol(alpha) > 1) {
|
||||
ds_c_alpha <- apply(!!ds$alpha, 1, any)
|
||||
c_alpha <- apply(!! alpha, 1, any)
|
||||
# result$alpha_c_tpr <- tpr(ds_c_alpha, c_alpha)
|
||||
result$alpha_c_fpr <- fpr(ds_c_alpha, c_alpha)
|
||||
result$alpha_c_fnr <- fnr(ds_c_alpha, c_alpha)
|
||||
result$alpha_c_fr <- fr(ds_c_alpha, c_alpha)
|
||||
}
|
||||
if (ncol(beta) > 1) {
|
||||
ds_c_beta <- apply(!!ds$beta, 1, any)
|
||||
c_beta <- apply(!! beta, 1, any)
|
||||
# result$beta_c_tpr <- tpr(ds_c_beta, c_beta)
|
||||
result$beta_c_fpr <- fpr(ds_c_beta, c_beta)
|
||||
result$beta_c_fnr <- fnr(ds_c_beta, c_beta)
|
||||
result$beta_c_fr <- fr(ds_c_beta, c_beta)
|
||||
}
|
||||
results[[i <- i + 1]] <- result
|
||||
}
|
||||
cat('\n')
|
||||
}
|
||||
|
||||
# Restructure results list of lists as data.frame.
|
||||
results <- as.data.frame(t(sapply(results, function(res, cols) {
|
||||
unlist(res[cols])
|
||||
}, names(results[[1]]))))
|
||||
results$scale <- as.factor(results$scale)
|
||||
attr(results, 'params') <- list(
|
||||
reps = reps, n = n, p = p, t = t, k = k, r = r, eps = eps)
|
||||
|
||||
results
|
||||
}
|
||||
|
||||
reps <- 500
|
||||
# n, p, t, k, r
|
||||
# --------------------
|
||||
params <- list( c(100, 10, 5, 1, 2)
|
||||
, c(100, 7, 5, 1, 2)
|
||||
, c(100, 5, 3, 1, 2)
|
||||
, c(500, 10, 5, 1, 2)
|
||||
, c(500, 7, 5, 1, 2)
|
||||
, c(500, 5, 3, 1, 2)
|
||||
)
|
||||
scales <- seq(0.5, 6, 0.25)
|
||||
|
||||
for (param in params) {
|
||||
c(n, p, t, k, r) %<-% param
|
||||
results <- simulation.sparse(scales, reps, n, p, t, k, r)
|
||||
sim <- aggregate(results[, 'scale' != names(results)],
|
||||
by = list(scale = results$scale), mean)
|
||||
attr(sim, 'params') <- attr(results, 'params')
|
||||
|
||||
file.name <- sprintf("simulation_sparse_%d_%d_%d_%d_%d.rds", n, p, t, k, r)
|
||||
saveRDS(sim, file = file.name)
|
||||
|
||||
cat(file.name, '\n')
|
||||
print(sim, digits = 2)
|
||||
}
|
|
@ -0,0 +1,40 @@
|
|||
#' Approximates kronecker product decomposition.
|
||||
#'
|
||||
#' Approximates the matrices `A` and `B` such that
|
||||
#' C = A %x% B
|
||||
#' with `%x%` the kronecker product of the matrixes `A` and `B`
|
||||
#' of dimensions `dimA` and `dimB` respectively.
|
||||
#'
|
||||
#' @param C desired kronecker product result.
|
||||
#' @param dimA length 2 vector of dimensions of \code{A}.
|
||||
#' @param dimB length 2 vector of dimensions of \code{B}.
|
||||
#'
|
||||
#' @return list with attributes `A` and `B`.
|
||||
#'
|
||||
#' @examples
|
||||
#' A <- matrix(seq(14), 7, 2)
|
||||
#' B <- matrix(c(T, F), 3, 4)
|
||||
#' C <- kronecker(A, B) # the same as 'C <- A %x% B'
|
||||
#' approx.kronecker(C, dim(A), dim(B))
|
||||
#'
|
||||
#' @seealso C.F. Van Loan / Journal of Computational and Applied Mathematics
|
||||
#' 123 (2000) 85-100 (pp. 93-95)
|
||||
#'
|
||||
#' @imports RSpectra
|
||||
#'
|
||||
approx.kronecker <- function(C, dimA, dimB) {
|
||||
|
||||
dim(C) <- c(dimB[1L], dimA[1L], dimB[2L], dimA[2L])
|
||||
R <- aperm(C, c(2L, 4L, 1L, 3L))
|
||||
dim(R) <- c(prod(dimA), prod(dimB))
|
||||
|
||||
svdR <- try(RSpectra::svds(R, 1L), silent = TRUE)
|
||||
if (is(svdR, 'try-error')) {
|
||||
svdR <- svd(R, 1L, 1L)
|
||||
}
|
||||
|
||||
return(list(
|
||||
A = array(sqrt(svdR$d[1]) * svdR$u, dimA),
|
||||
B = array(sqrt(svdR$d[1]) * svdR$v, dimB)
|
||||
))
|
||||
}
|
|
@ -0,0 +1,69 @@
|
|||
source('../tensor_predictors/matpow.R')
|
||||
|
||||
#' Longitudinal Sliced Inverse Regression
|
||||
#'
|
||||
#' @param X matrix of dim \eqn{n \times p t} with each row representing a
|
||||
#' vectorized \eqn{p \times t} observation.
|
||||
#' @param y vector of \eqn{n} elements as factors. (can be coersed to factors)
|
||||
#' @param p,t,k,r dimensions.
|
||||
#'
|
||||
#' @returns a list with components
|
||||
#' alpha: matrix of \eqn{t \times r}
|
||||
#' beta: matrix of \eqn{p \times k}
|
||||
#'
|
||||
#' TODO: finish
|
||||
#'
|
||||
LSIR <- function(X, y, p, t, k = 1L, r = 1L) {
|
||||
# the code assumes:
|
||||
# alpha: T x r, beta: p x k, X_i: p x T, for ith observation
|
||||
|
||||
# Check and transform parameters.
|
||||
if (!is.matrix(X)) X <- as.matrix(X)
|
||||
n <- nrow(X)
|
||||
stopifnot(
|
||||
ncol(X) == p * t,
|
||||
n == length(y)
|
||||
)
|
||||
if (!is.factor(y)) y <- factor(y)
|
||||
|
||||
# Restructure X into a 3D tensor with axis (observations, predictors, time).
|
||||
dim(X) <- c(n, p, t)
|
||||
|
||||
# Estimate predictor/time covariance matrices \hat{Sigma}_1, \hat{Sigma}_2.
|
||||
sigma_p <- matrix(rowMeans(apply(X, 3, cov)), p, p)
|
||||
sigma_t <- matrix(rowMeans(apply(X, 2, cov)), t, t)
|
||||
|
||||
# Normalize X as vec(Z) = Sigma^-1/2 (vec(X) - E(vec(X)))
|
||||
dim(X) <- c(n, p * t)
|
||||
sigma_p_isqrt <- matpow(sigma_p, -0.5)
|
||||
sigma_t_isqrt <- matpow(sigma_t, -0.5)
|
||||
Z <- scale(X, scale = FALSE) %*% kronecker(sigma_t_isqrt, sigma_p_isqrt)
|
||||
# Both as 3D tensors.
|
||||
dim(X) <- dim(Z) <- c(n, p, t)
|
||||
|
||||
# Estimate the conditional predictor/time covariance matrix Omega = cov(E(Z|Y)).
|
||||
omega_p <- matrix(Reduce(`+`, lapply(levels(y), function(l) {
|
||||
rowMeans(apply(Z[y == l, , ], 3, function(z) {
|
||||
(nrow(z) / n) * tcrossprod(colMeans(z))
|
||||
}))
|
||||
})), p, p)
|
||||
omega_t <- matrix(Reduce(`+`, lapply(levels(y), function(l) {
|
||||
rowMeans(apply(Z[y == l, , ], 2, function(z) {
|
||||
(nrow(z) / n) * tcrossprod(colMeans(z))
|
||||
}))
|
||||
})), t, t)
|
||||
omega <- kronecker(omega_t, omega_p)
|
||||
|
||||
# Compute seperate SVD of estimated omega's and use that for an estimate of
|
||||
# a central subspace basis.
|
||||
svd_p <- La.svd(omega_p)
|
||||
svd_t <- La.svd(omega_t)
|
||||
beta <- sigma_p_isqrt %*% svd_p$u[, k]
|
||||
alpha <- sigma_t_isqrt %*% svd_t$u[, r]
|
||||
|
||||
return(list(sigma_p = sigma_p, sigma_t = sigma_t,
|
||||
sigma = kronecker(sigma_t, sigma_p),
|
||||
alpha = alpha, beta = beta,
|
||||
Delta = omega,
|
||||
B = kronecker(alpha, beta)))
|
||||
}
|
|
@ -0,0 +1,89 @@
|
|||
#' Generalized matrix power function for symmetric matrices.
|
||||
#'
|
||||
#' Using the SVD of the matrix \eqn{A = U D V'} where \eqn{D} is the
|
||||
#' diagonal matrix with the singular values of \eqn{A}, the powers are then
|
||||
#' computed as \deqn{A^p = U D^p V'} using the symmetrie of \eqn{A}.
|
||||
#'
|
||||
#' @details
|
||||
#' It is assumed that the argument \code{A} is symmeric and it is not checked
|
||||
#' for symmerie, the result will just be wrong. The actual formula for negative
|
||||
#' powers is \deqn{A^-p = V D^-p U'}. For symmetric matrices \eqn{U = V} which
|
||||
#' gives the formula used by this function.
|
||||
#' The reason is for speed, using the symmetrie propertie as described avoids
|
||||
#' two transpositions in the algorithm (one in \code{svd} using \code{La.svd}).
|
||||
#'
|
||||
#' @param A Matrix.
|
||||
#' @param pow numeric power.
|
||||
#' @param tol relative tolerance to detect zero singular values as well as the
|
||||
#' \code{qr} factorization tolerance.
|
||||
#'
|
||||
#' @return a matrix.
|
||||
#'
|
||||
#' @seealso \code{\link{solve}}, \code{\link{qr}}, \code{\link{svd}}.
|
||||
#'
|
||||
#' @examples
|
||||
#' # General full rank square matrices.
|
||||
#' A <- matrix(rnorm(121), 11, 11)
|
||||
#' all.equal(matpow(A, 1), A)
|
||||
#' all.equal(matpow(A, 0), diag(nrow(A)))
|
||||
#' all.equal(matpow(A, -1), solve(A))
|
||||
#'
|
||||
#' # Roots of full rank symmetric matrices.
|
||||
#' A <- crossprod(A)
|
||||
#' B <- matpow(A, 0.5)
|
||||
#' all.equal(B %*% B, A)
|
||||
#' all.equal(matpow(A, -0.5), solve(B))
|
||||
#' C <- matpow(A, -0.5)
|
||||
#' all.equal(C %*% C %*% A, diag(nrow(A)))
|
||||
#' all.equal(A %*% C %*% C, diag(nrow(A)))
|
||||
#'
|
||||
#' # General singular matrices.
|
||||
#' A <- matrix(rnorm(72), 12, 12) # rank(A) = 6
|
||||
#' B <- matpow(A, -1) # B = A^+
|
||||
#' # Check generalized inverse properties.
|
||||
#' all.equal(A %*% B %*% A, A)
|
||||
#' all.equal(B %*% A %*% B, B)
|
||||
#' all.equal(B %*% A, t(B %*% A))
|
||||
#' all.equal(A %*% B, t(A %*% B))
|
||||
#'
|
||||
#' # Roots of singular symmetric matrices.
|
||||
#' A <- crossprod(matrix(rnorm(72), 12, 12)) # rank(A) = 6
|
||||
#' B <- matpow(A, -0.5) # B = (A^+)^1/2
|
||||
#' # Check generalized inverse properties.
|
||||
#' all.equal(A %*% B %*% B %*% A, A)
|
||||
#' all.equal(B %*% B %*% A %*% B %*% B, B %*% B)
|
||||
#' all.equal(B %*% A, t(B %*% A))
|
||||
#' all.equal(A %*% B, t(A %*% B))
|
||||
#'
|
||||
matpow <- function(A, pow, tol = 1e-7) {
|
||||
if (nrow(A) != ncol(A)) {
|
||||
stop("Expected a square matix, but 'A' is ", nrow(A), " by ", ncol(A))
|
||||
}
|
||||
# Case study for negative, zero or positive power.
|
||||
if (pow > 0) {
|
||||
if (pow == 1) { return(A) }
|
||||
# Perform SVD and return power as A^pow = U diag(d^pow) V'.
|
||||
svdA <- La.svd(A)
|
||||
return(svdA$u %*% ((svdA$d^pow) * svdA$vt))
|
||||
} else if (pow == 0) {
|
||||
return(diag(nrow(A)))
|
||||
} else {
|
||||
# make QR decomposition.
|
||||
qrA <- qr(A, tol = tol)
|
||||
# Check rank of A.
|
||||
if (qrA$rank == nrow(A)) {
|
||||
# Full rank, calc inverse the classic way using A's QR decomposition
|
||||
return(matpow(solve(qrA), abs(pow), tol = tol))
|
||||
} else {
|
||||
# For singular matrices use the SVD decomposition for the power
|
||||
svdA <- svd(A)
|
||||
# Get (numerically) positive singular values.
|
||||
positives <- svdA$d > tol * svdA$d[1]
|
||||
# Apply the negative power to positive singular values and augment
|
||||
# the rest with zero.
|
||||
d <- c(svdA$d[positives]^pow, rep(0, sum(!positives)))
|
||||
# The pseudo invers as A^pow = V diag(d^pow) U' for pow < 0.
|
||||
return(svdA$v %*% (d * t(svdA$u)))
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,32 @@
|
|||
#' Multi-Value assigne operator.
|
||||
#'
|
||||
#' @param lhs vector of variables (or variable names) to assign values.
|
||||
#' @param rhs object that can be coersed to list of the same length as
|
||||
#' \code{lhs} storing the values of the variables defined in \code{lhs}.
|
||||
#'
|
||||
#' @details The parameter names \code{lhs} and \code{rhs} stand for "Left Hand
|
||||
#' Side" and "Right Hand Side", respectively.
|
||||
#'
|
||||
#' @examples
|
||||
#' c(a, b) %<-% list(1, 2)
|
||||
#' # is equivalent to
|
||||
#' ## a <- 1
|
||||
#' ## b <- 2
|
||||
#'
|
||||
#' # Switching the values of a, b could be done by.
|
||||
#' c(a, b) %<-% list(b, a)
|
||||
#' note the usage of 'list' on the right side, otherwise an exraction of the
|
||||
#' first two values of the concatenated object is performed. See next:
|
||||
#'
|
||||
#' # Extract values.
|
||||
#' c(d1, d2, d3) %<-% 1:10
|
||||
#' extracting the first three valus from the vector of length 10.
|
||||
#'
|
||||
"%<-%" <- function(lhs, rhs) {
|
||||
var.names <- make.names(as.list(substitute(lhs))[-1])
|
||||
values <- as.list(rhs)
|
||||
env <- parent.frame()
|
||||
for (i in seq_along(var.names)) {
|
||||
assign(var.names[i], values[[i]], envir = env)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,29 @@
|
|||
|
||||
#' @param X Matrix of dim (n, p * t) with each row the vectorized (p, t) observation.
|
||||
#' @param p nr. predictors
|
||||
#' @param t nr. timepoints
|
||||
#' @param ppc reduced nr. predictors (p-principal components)
|
||||
#' @param tpc reduced nr. timepoints (t-principal components)
|
||||
#'
|
||||
#' @details The `i`th observation is stored in a row such that its matrix equiv
|
||||
#' is given by `matrix(X[i, ], p, t)`.
|
||||
#'
|
||||
PCA2d <- function(X, p, t, ppc, tpc, scale = FALSE) {
|
||||
stopifnot(ncol(X) == p * t, ppc <= p, tpc <= t)
|
||||
|
||||
X <- scale(X, center = TRUE, scale = scale)
|
||||
|
||||
# Left/Right aka predictor/time covariance matrices.
|
||||
dim(X) <- c(nrow(X), p, t)
|
||||
Sigma_p <- matrix(apply(apply(X, 1, tcrossprod), 1, mean), p, p) # Sigma_beta
|
||||
Sigma_t <- matrix(apply(apply(X, 1, crossprod), 1, mean), t, t) # Sigma_alpha
|
||||
dim(X) <- c(nrow(X), p * t)
|
||||
|
||||
V_p <- La.svd(Sigma_p, ppc, 0)$u
|
||||
V_t <- La.svd(Sigma_t, tpc, 0)$u
|
||||
|
||||
X <- X %*% kronecker(V_t, V_p)
|
||||
|
||||
return(list(reduced = X, alpha = V_t, beta = V_p,
|
||||
Sigma_t = Sigma_t, Sigma_p = Sigma_p))
|
||||
}
|
|
@ -0,0 +1,79 @@
|
|||
#' Penalysed Orthogonal Iteration.
|
||||
#'
|
||||
#' @param lambda Default: 0.75 * lambda_max for FastPOI-C method.
|
||||
#'
|
||||
#' @note use.C required 'poi.so' beeing dynamicaly loaded.
|
||||
#' dyn.load('../tensor_predictors/poi.so')
|
||||
POI <- function(A, B, d,
|
||||
lambda = 0.75 * sqrt(max(rowSums(Delta^2))),
|
||||
update.tol = 1e-3,
|
||||
tol = 100 * .Machine$double.eps,
|
||||
maxit = 400L,
|
||||
maxit.outer = maxit,
|
||||
maxit.inner = maxit,
|
||||
use.C = FALSE,
|
||||
method = 'FastPOI-C') {
|
||||
|
||||
# TODO:
|
||||
stopifnot(method == 'FastPOI-C')
|
||||
|
||||
if (nrow(A) < 100) {
|
||||
Delta <- eigen(A, symmetric = TRUE)$vectors[, 1:d, drop = FALSE]
|
||||
} else {
|
||||
Delta <- try(RSpectra::eigs_sym(A, d)$vectors, silent = TRUE)
|
||||
if (is(Delta, 'try-error')) {
|
||||
Delta <- eigen(A, symmetric = TRUE)$vectors[1:d, , drop = FALSE]
|
||||
}
|
||||
}
|
||||
|
||||
# Set initial value.
|
||||
Z <- Delta
|
||||
|
||||
# Step 1: Optimization.
|
||||
# The "inner" optimization loop, aka repeated coordinate optimization.
|
||||
if (use.C) {
|
||||
Z <- .Call('FastPOI_C_sub', A, B, Delta, lambda, as.integer(maxit.inner),
|
||||
PACKAGE = 'poi')
|
||||
} else {
|
||||
p <- nrow(Z)
|
||||
for (iter.inner in 1:maxit.inner) {
|
||||
Zold <- Z
|
||||
for (g in 1:p) {
|
||||
a <- Delta[g, ] - B[g, ] %*% Z + B[g, g] * Z[g, ]
|
||||
a_norm <- sqrt(sum(a^2))
|
||||
if (a_norm > lambda) {
|
||||
Z[g, ] <- a * ((1 - lambda / a_norm) / B[g, g])
|
||||
} else {
|
||||
Z[g, ] <- 0
|
||||
}
|
||||
}
|
||||
if (norm(Z - Zold, 'F') < update.tol) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Step 2: QR decomposition.
|
||||
if (d == 1L) {
|
||||
Z_norm <- sqrt(sum(Z^2))
|
||||
if (Z_norm < tol) {
|
||||
Q <- matrix(0, p, d)
|
||||
} else {
|
||||
Q <- Z / Z_norm
|
||||
}
|
||||
} else {
|
||||
# Detect zero columns.
|
||||
zeroColumn <- colSums(abs(Z)) < tol
|
||||
if (all(zeroColumn)) {
|
||||
Q <- matrix(0, p, d)
|
||||
} else if (any(zeroColumn)) {
|
||||
Q <- matrix(0, p, d)
|
||||
Q[, !zeroColumn] <- qr.Q(qr(Z))
|
||||
} else {
|
||||
Q <- qr.Q(qr(Z))
|
||||
}
|
||||
}
|
||||
|
||||
return(list(Z = Z, Q = Q, iter.inner = if (use.C) NA else iter.inner,
|
||||
lambda = lambda))
|
||||
}
|
|
@ -0,0 +1,74 @@
|
|||
#include <math.h>
|
||||
|
||||
#include <R.h>
|
||||
#include <Rinternals.h>
|
||||
|
||||
SEXP FastPOI_C_sub(SEXP in_A, SEXP in_B, SEXP in_Delta, SEXP in_lambda, SEXP in_maxit) {
|
||||
int i, j, k, g;
|
||||
|
||||
int p = nrows(in_Delta);
|
||||
int d = ncols(in_Delta);
|
||||
int maxit = asInteger(in_maxit);
|
||||
|
||||
SEXP out_Z = PROTECT(allocMatrix(REALSXP, p, d));
|
||||
double* Z = REAL(out_Z);
|
||||
double* Zold = (double*)R_alloc(p * d, sizeof(double));
|
||||
double* Delta = REAL(in_Delta);
|
||||
double* a = (double*)R_alloc(d, sizeof(double));
|
||||
double* A = REAL(in_A);
|
||||
double* B = REAL(in_B);
|
||||
double a_norm;
|
||||
double lambda = asReal(in_lambda);
|
||||
double scale;
|
||||
double res;
|
||||
|
||||
// Set initial value.
|
||||
for (j = 0; j < p * d; ++j) {
|
||||
Zold[j] = Z[j] = Delta[j];
|
||||
}
|
||||
|
||||
for (i = 0; i < maxit; ++i) {
|
||||
// Store current value in Z 'old'.
|
||||
// Cyclic updating variables.
|
||||
for (g = 0; g < p; ++g) {
|
||||
for (j = 0; j < d; ++j) {
|
||||
a[j] = Delta[j * p + g];
|
||||
for (k = 0; k < p; ++k) {
|
||||
if (k != g) {
|
||||
a[j] -= B[k * p + g] * Z[j * p + k];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
a_norm = a[0] * a[0];
|
||||
for (j = 1; j < d; ++j) {
|
||||
a_norm += a[j] * a[j];
|
||||
}
|
||||
a_norm = sqrt(a_norm);
|
||||
|
||||
if (a_norm > lambda) {
|
||||
scale = (1.0 - (lambda / a_norm)) / B[g * p + g];
|
||||
} else {
|
||||
scale = 0.0;
|
||||
}
|
||||
for (j = 0; j < d; ++j) {
|
||||
Z[j * p + g] = scale * a[j];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Copy Z to Zold and check break condition.
|
||||
res = 0;
|
||||
for (j = 0; j < p * d; ++j) {
|
||||
res += (Z[j] - Zold[j]) * (Z[j] - Zold[j]);
|
||||
Zold[j] = Z[j];
|
||||
}
|
||||
if (res < 1e-6) {
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
UNPROTECT(1);
|
||||
return out_Z;
|
||||
}
|
|
@ -0,0 +1,30 @@
|
|||
#' Multivariate Normal Distribution.
|
||||
#'
|
||||
#' Random generation for the multivariate normal distribution.
|
||||
#' \deqn{X \sim N_p(\mu, \Sigma)}{X ~ N_p(\mu, \Sigma)}
|
||||
#'
|
||||
#' @param n number of samples.
|
||||
#' @param mu mean
|
||||
#' @param sigma covariance matrix.
|
||||
#'
|
||||
#' @return a \eqn{n\times p}{n x p} matrix with samples in its rows.
|
||||
#'
|
||||
#' @examples
|
||||
#' \dontrun{
|
||||
#' rmvnorm(20, sigma = matrix(c(2, 1, 1, 2), 2))
|
||||
#' rmvnorm(20, mu = c(3, -1, 2))
|
||||
#' }
|
||||
#' @keywords internal
|
||||
rmvnorm <- function(n = 1, mu = rep(0, p), sigma = diag(p)) {
|
||||
if (!missing(sigma)) {
|
||||
p <- nrow(sigma)
|
||||
} else if (!missing(mu)) {
|
||||
mu <- matrix(mu, ncol = 1)
|
||||
p <- nrow(mu)
|
||||
} else {
|
||||
stop("At least one of 'mu' or 'sigma' must be supplied.")
|
||||
}
|
||||
|
||||
# See: https://en.wikipedia.org/wiki/Multivariate_normal_distribution
|
||||
return(rep(mu, each = n) + matrix(rnorm(n * p), n) %*% chol(sigma))
|
||||
}
|
|
@ -0,0 +1,35 @@
|
|||
#' Angle between two subspaces
|
||||
#'
|
||||
#' Computes the principal angle between two subspaces spaned by the columns of
|
||||
#' the matrices \code{A} and \code{B}.
|
||||
#'
|
||||
#' @param A,B Numeric matrices with column considered as the subspace spanning
|
||||
#' vectors. Both must have the same number of rows (a.k.a must live in the
|
||||
#' same space).
|
||||
#' @param is.orth boolean determining if passed matrices A, B are allready
|
||||
#' orthogonalized. If set to TRUE, A and B are assumed to have orthogonal
|
||||
#' columns (which is not checked).
|
||||
#'
|
||||
#' @returns angle in radiants.
|
||||
#'
|
||||
subspace <- function(A, B, is.orth = FALSE) {
|
||||
if (!is.numeric(A) || !is.numeric(B)) {
|
||||
stop("Arguments 'A' and 'B' must be numeric.")
|
||||
}
|
||||
if (is.vector(A)) A <- as.matrix(A)
|
||||
if (is.vector(B)) B <- as.matrix(B)
|
||||
if (nrow(A) != nrow(B)) {
|
||||
stop("Matrices 'A' and 'B' must have the same number of rows.")
|
||||
}
|
||||
if (!is.orth) {
|
||||
A <- qr.Q(qr(A))
|
||||
B <- qr.Q(qr(B))
|
||||
}
|
||||
if (ncol(A) < ncol(B)) {
|
||||
tmp <- A; A <- B; B <- tmp
|
||||
}
|
||||
for (k in 1:ncol(A)) {
|
||||
B <- B - tcrossprod(A[, k]) %*% B
|
||||
}
|
||||
asin(min(1, La.svd(B, 0L, 0L)$d))
|
||||
}
|
|
@ -0,0 +1,170 @@
|
|||
source('../tensor_predictors/matpow.R')
|
||||
source('../tensor_predictors/multi_assign.R')
|
||||
source('../tensor_predictors/approx_kronecker.R')
|
||||
|
||||
log.likelihood <- function(par, X, Fy, Delta.inv, da, db) {
|
||||
alpha <- matrix(par[1:prod(da)], da[1L])
|
||||
beta <- matrix(par[(prod(da) + 1):length(par)], db[1L])
|
||||
error <- X - tcrossprod(Fy, kronecker(alpha, beta))
|
||||
sum(error * (error %*% Delta.inv))
|
||||
}
|
||||
|
||||
tensor_predictor <- function(X, Fy, p, t, k = 1L, r = 1L, d1 = 1L, d2 = 1L,
|
||||
method = "KPIR_LS",
|
||||
eps1 = 1e-2, eps2 = 1e-2, maxit = 10L) {
|
||||
# Validate method using unexact matching.
|
||||
methods <- list(KPIR_LS = "KPIR_LS", KPIR_MLE = "KPIR_MLE",
|
||||
KPFC1 = "KPFC1", KPFC2 = "KPFC2", KPFC3 = "KPFC3")
|
||||
method <- methods[[toupper(method), exact = FALSE]]
|
||||
if (is.null(method)) {
|
||||
stop("Unable to determine method.")
|
||||
}
|
||||
|
||||
if (method %in% c("KPIR_LS", "KPIR_MLE")) {
|
||||
## Step 1:
|
||||
# OLS estimate of the model `X = F_y B + epsilon`.
|
||||
B <- t(solve(crossprod(Fy), crossprod(Fy, X)))
|
||||
|
||||
# Estimate alpha, beta as nearest kronecker approximation.
|
||||
c(alpha, beta) %<-% approx.kronecker(B, c(t, r), c(p, k))
|
||||
|
||||
if (method == "KPIR_LS") {
|
||||
# Estimate Delta.
|
||||
B <- kronecker(alpha, beta)
|
||||
rank <- if (ncol(Fy) == 1) 1L else qr(Fy)$rank
|
||||
Delta <- crossprod(X - tcrossprod(Fy, B)) / (nrow(X) - rank)
|
||||
|
||||
} else { # KPIR_MLE
|
||||
# Estimate initial Delta.
|
||||
B <- kronecker(alpha, beta)
|
||||
Delta <- crossprod(X - tcrossprod(Fy, B)) / nrow(X)
|
||||
|
||||
for (. in 1:maxit) {
|
||||
# Optimize log-likelihood for alpha, beta with fixed Delta.
|
||||
opt <- optim(c(alpha, beta), log.likelihood, gr = NULL,
|
||||
X, Fy, matpow(Delta, -1), c(t, r), c(p, k))
|
||||
# Store previous alpha, beta and Delta (for break consition).
|
||||
Delta.last <- Delta
|
||||
B.last <- B
|
||||
# Extract optimized alpha, beta.
|
||||
alpha <- matrix(opt$par[1:(t * r)], t, r)
|
||||
beta <- matrix(opt$par[(t * r + 1):length(opt$par)], p, k)
|
||||
# Calc new Delta with likelihood optimized alpha, beta.
|
||||
B <- kronecker(alpha, beta)
|
||||
Delta <- crossprod(X - tcrossprod(Fy, B)) / nrow(X)
|
||||
# Check break condition 1.
|
||||
if (norm(Delta - Delta.last, 'F') < eps1 * norm(Delta, 'F')) {
|
||||
# Check break condition 2.
|
||||
if (norm(B - B.last, 'F') < eps2 * norm(B, 'F')) {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Construct basis from alpha and beta.
|
||||
Gamma_1 <- if(d1 > 1L) La.svd(alpha, d1, 0L)$u
|
||||
else alpha / norm(alpha, 'F')
|
||||
Gamma_2 <- if(d2 > 1L) La.svd(beta, d2, 0L)$u
|
||||
else beta / norm(beta, 'F')
|
||||
Gamma <- kronecker(Gamma_1, Gamma_2)
|
||||
} else if (method %in% c("KPFC1", "KPFC2", "KPFC3")) {
|
||||
## Step 1:
|
||||
# OLS extimate of the model `X = F_y B + epsilon`.
|
||||
B <- t(solve(crossprod(Fy), crossprod(Fy, X)))
|
||||
|
||||
## Step 2:
|
||||
# Estimate Delta_mle.
|
||||
P_Fy <- Fy %*% solve(crossprod(Fy), t(Fy))
|
||||
Q_Fy <- diag(nrow(P_Fy)) - P_Fy
|
||||
Delta_fit <- crossprod(X, P_Fy %*% X) / nrow(X)
|
||||
Delta_res <- crossprod(X, Q_Fy %*% X) / nrow(X)
|
||||
# Compute Delta_mle using equation (7).
|
||||
D <- matpow(Delta_res, -0.5)
|
||||
Delta <- with(La.svd(D %*% Delta_fit %*% D), {
|
||||
K <- diag(c(rep(0, d1 * d2), d[-(1:(d1 * d2))]))
|
||||
D <- matpow(Delta_res, 0.5)
|
||||
Delta_res + (D %*% u %*% tcrossprod(K, u) %*% D)
|
||||
})
|
||||
|
||||
## Step 3:
|
||||
# Set Gamma to be the first `d = d1 * d2` eigenvectors of (25).
|
||||
D <- matpow(Delta, -0.5)
|
||||
Gamma <- with(La.svd(D %*% Delta_fit %*% D, d1 * d2, 0L), {
|
||||
La.svd(matpow(Delta, 0.5) %*% u[, 1:(d1 * d2)])$u
|
||||
})
|
||||
|
||||
if (method == "KPFC1") {
|
||||
# Compute lower_gamma using (26).
|
||||
D <- crossprod(Gamma, matpow(Delta, -1))
|
||||
lower_gamma <- solve(D %*% Gamma, D %*% B)
|
||||
|
||||
## Step 4a:
|
||||
# Calc MLE estimate of B.
|
||||
B <- Gamma %*% lower_gamma
|
||||
# Using the VLP approx. for a kronecker product factorization.
|
||||
c(alpha, beta) %<-% approx.kronecker(B, c(t, r), c(p, k))
|
||||
|
||||
# Construct basis from alpha and beta.
|
||||
Gamma_1 <- if(d1 > 1L) La.svd(alpha, d1, 0L)$u
|
||||
else alpha / norm(alpha, 'F')
|
||||
Gamma_2 <- if(d2 > 1L) La.svd(beta, d2, 0L)$u
|
||||
else beta / norm(beta, 'F')
|
||||
Gamma <- kronecker(Gamma_1, Gamma_2)
|
||||
|
||||
} else { # KPFC2, KPFC3
|
||||
## Step 4b:
|
||||
# Estimate Gamma's as nearest kronecker approximation of Gamma.
|
||||
c(Gamma_1, Gamma_2) %<-% approx.kronecker(Gamma, c(t, d1), c(p, d2))
|
||||
Gamma <- kronecker(Gamma_1, Gamma_2)
|
||||
# Compute lower_gamma using (26).
|
||||
D <- crossprod(Gamma, matpow(Delta, -1))
|
||||
lower_gamma <- solve(D %*% Gamma, D %*% B)
|
||||
|
||||
if (prod(dim(lower_gamma)) == 1) {
|
||||
# If lower_gamma is a scalar, then alpha, beta is only scaled.
|
||||
# (shortcut)
|
||||
lg1 <- lg2 <- sqrt(abs(as.vector(lower_gamma)))
|
||||
alpha <- lg1 * Gamma_1
|
||||
beta <- lg2 * Gamma_2
|
||||
} else if (method == "KPFC2") {
|
||||
## Step 5c:
|
||||
c(alpha, beta) %<-% approx.kronecker(Gamma %*% lower_gamma,
|
||||
c(t, r), c(p, k))
|
||||
} else { # KPFC3
|
||||
## Step 5d:
|
||||
c(lg1, lg2) %<-% approx.kronecker(lower_gamma,
|
||||
c(d1, r), c(d2, k))
|
||||
alpha <- Gamma_1 %*% lg1
|
||||
beta <- Gamma_2 %*% lg2
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return(structure(
|
||||
list(alpha = alpha,
|
||||
beta = beta,
|
||||
Gamma = Gamma,
|
||||
Gamma_1 = Gamma_1, Gamma_2 = Gamma_2,
|
||||
Delta = Delta),
|
||||
class = c("tensor_predictor", method)
|
||||
))
|
||||
}
|
||||
|
||||
#' TODO: Write this properly!
|
||||
reduce <- function(object, data, use = 'Gamma') {
|
||||
if (use == 'Gamma') {
|
||||
projection <- object$Gamma
|
||||
} else if (use == 'alpha_beta') {
|
||||
projection <- kronecker(object$alpha, object$beta)
|
||||
} else {
|
||||
stop("Unkown 'use' parameter value.")
|
||||
}
|
||||
|
||||
# ensure alignement of multiple calls.
|
||||
if (projection[1] < 0) {
|
||||
projection <- -projection
|
||||
}
|
||||
|
||||
return(data %*% projection)
|
||||
}
|
Loading…
Reference in New Issue