tensor_predictors/eeg_analysis/eeg_analysis_poi_step2.R

141 lines
6.2 KiB
R
Raw Normal View History

2020-06-10 14:35:27 +00:00
suppressPackageStartupMessages({
library(pROC)
})
source('../tensor_predictors/approx_kronecker.R')
source('../tensor_predictors/multi_assign.R')
# Load EEG dataset
dataset <- readRDS('eeg_data.rds')
# Load EEG k-fold simulation results.
folds <- readRDS('eeg_analysis_poi.rds')
# Set dimenional parameters.
p <- 64L # nr. of predictors (count of sensorce)
t <- 256L # nr. of time points (measurements)
labels <- vector('list', length(folds))
predictions <- vector('list', length(folds))
alphas <- matrix(0, length(folds), t)
betas <- matrix(0, length(folds), p)
# For each fold.
for (i in seq_along(folds)) {
fold <- folds[[i]]
# Factorize POI result in alpha, beta.
c(alpha, beta) %<-% approx.kronecker(fold$Q, c(t, 1), c(p, 1))
# Drop small values of alpha, beta.
alpha[abs(alpha) < 1e-6] <- 0
beta[abs(beta) < 1e-6] <- 0
# Reconstruct B from factorization.
B <- kronecker(alpha, beta)
# Select folds train/test sets.
X_train <- as.matrix(dataset[-fold$index, -(1:2)])
y_train <- as.factor(dataset[-fold$index, 'Case_Control'])
X_test <- as.matrix(dataset[fold$index, -(1:2)])
y_test <- as.factor(dataset[fold$index, 'Case_Control'])
# Predict via a logit model building on the reduced data.
model <- glm(y ~ x, family = binomial(link = "logit"),
data = data.frame(x = X_train %*% B, y = y_train))
y_hat <- predict(model, data.frame(x = X_test %*% B), type = "response")
# Set target and prediction values for the ROC curve.
labels[[i]] <- y_test
predictions[[i]] <- y_hat
alphas[i, ] <- as.vector(alpha)
betas[i, ] <- as.vector(beta)
}
# acc: Accuracy. P(Yhat = Y). Estimated as: (TP+TN)/(P+N).
acc <- function(y_true, y_pred) mean(round(y_pred) == y_true)
# err: Error rate. P(Yhat != Y). Estimated as: (FP+FN)/(P+N).
err <- function(y_true, y_pred) mean(round(y_pred) != y_true)
# fpr: False positive rate. P(Yhat = + | Y = -). aliases: Fallout.
fpr <- function(y_true, y_pred) mean((round(y_pred) == 1)[y_true == 0])
# tpr: True positive rate. P(Yhat = + | Y = +). aliases: Sensitivity, Recall.
tpr <- function(y_true, y_pred) mean((round(y_pred) == 1)[y_true == 1])
# fnr: False negative rate. P(Yhat = - | Y = +). aliases: Miss.
fnr <- function(y_true, y_pred) mean((round(y_pred) == 0)[y_true == 1])
# tnr: True negative rate. P(Yhat = - | Y = -).
tnr <- function(y_true, y_pred) mean((round(y_pred) == 0)[y_true == 0])
# Combined accuracy, error, ...
cat("acc: ", acc(unlist(labels), unlist(predictions)), "\n",
"err: ", err(unlist(labels), unlist(predictions)), "\n",
"fpr: ", fpr(unlist(labels), unlist(predictions)), "\n",
"tpr: ", tpr(unlist(labels), unlist(predictions)), "\n",
"fnr: ", fnr(unlist(labels), unlist(predictions)), "\n",
"tnr: ", tnr(unlist(labels), unlist(predictions)), "\n",
"auc: ", roc(unlist(labels), unlist(predictions), quiet = TRUE)$auc, "\n",
sep = '')
# Confidence interval for AUC.
ci(roc(unlist(labels), unlist(predictions), quiet = TRUE))
# Means of per fold accuracy, error, ...
cat("acc: ", mean(mapply(acc, labels, predictions)), "\n",
"err: ", mean(mapply(err, labels, predictions)), "\n",
"fpr: ", mean(mapply(fpr, labels, predictions)), "\n",
"tpr: ", mean(mapply(tpr, labels, predictions)), "\n",
"fnr: ", mean(mapply(fnr, labels, predictions)), "\n",
"tnr: ", mean(mapply(tnr, labels, predictions)), "\n",
"auc: ", mean(mapply(function(...) roc(...)$auc, labels, predictions,
MoreArgs = list(direction = '<', quiet = TRUE))), "\n",
sep = '')
# Means of per fold CI.
rowMeans(mapply(function(...) ci(roc(...)), labels, predictions,
MoreArgs = list(direction = '<', quiet = TRUE)))
sd(mapply(function(...) roc(...)$auc, labels, predictions,
MoreArgs = list(direction = '<', quiet = TRUE)))
################################################################################
### plot ###
################################################################################
multiplot <- function(..., plotlist = NULL, cols) {
library(grid)
# Make a list from the ... arguments and plotlist
plots <- c(list(...), plotlist)
numPlots = length(plots)
# Make the panel
plotCols = cols
# Number of rows needed, calculated from cols
plotRows = ceiling(numPlots / plotCols)
# Set up the page
grid.newpage()
pushViewport(viewport(layout = grid.layout(plotRows, plotCols)))
vplayout <- function(x, y) {
viewport(layout.pos.row = x, layout.pos.col = y)
}
# Make each plot, in the correct location
for (i in 1:numPlots) {
curRow = ceiling(i / plotCols)
curCol = (i - 1) %% plotCols + 1
print(plots[[i]], vp = vplayout(curRow, curCol))
}
}
pa <- ggplot(data.frame(time = rep(1:ncol(alphas), 2),
means = c(colMeans(abs(alphas)), .5 * colMeans(!alphas)),
type = factor(rep(c(0, 1), each = ncol(alphas)),
labels = c('mean', 'dropped'))),
aes(x = time, y = means, fill = type)) +
geom_col(position = 'dodge') +
labs(title = 'Components of alpha', x = 'time', y = 'means') +
coord_cartesian(ylim = c(0, 0.5)) +
scale_y_continuous(sec.axis = sec_axis(trans = ~ . * 2,
name = 'dropped',
labels = scales::percent)) +
theme(legend.position = 'top',
legend.title = element_blank())
pb <- ggplot(data.frame(time = rep(1:ncol(betas), 2),
means = c(colMeans(abs(betas)), .5 * colMeans(!betas)),
type = factor(rep(c(0, 1), each = ncol(betas)),
labels = c('mean', 'dropped'))),
aes(x = time, y = means, fill = type)) +
geom_col(position = 'dodge') +
labs(title = 'Components of beta', x = 'sensors', y = 'means') +
coord_cartesian(ylim = c(0, 0.5)) +
scale_y_continuous(sec.axis = sec_axis(trans = ~ . * 2,
name = 'dropped',
labels = scales::percent)) +
theme(legend.position = 'top',
legend.title = element_blank())
multiplot(pa, pb, cols = 1)