suppressPackageStartupMessages({ library(pROC) }) source('../tensor_predictors/approx_kronecker.R') source('../tensor_predictors/multi_assign.R') # Load EEG dataset dataset <- readRDS('eeg_data.rds') # Load EEG k-fold simulation results. folds <- readRDS('eeg_analysis_poi.rds') # Set dimenional parameters. p <- 64L # nr. of predictors (count of sensorce) t <- 256L # nr. of time points (measurements) labels <- vector('list', length(folds)) predictions <- vector('list', length(folds)) alphas <- matrix(0, length(folds), t) betas <- matrix(0, length(folds), p) # For each fold. for (i in seq_along(folds)) { fold <- folds[[i]] # Factorize POI result in alpha, beta. c(alpha, beta) %<-% approx.kronecker(fold$Q, c(t, 1), c(p, 1)) # Drop small values of alpha, beta. alpha[abs(alpha) < 1e-6] <- 0 beta[abs(beta) < 1e-6] <- 0 # Reconstruct B from factorization. B <- kronecker(alpha, beta) # Select folds train/test sets. X_train <- as.matrix(dataset[-fold$index, -(1:2)]) y_train <- as.factor(dataset[-fold$index, 'Case_Control']) X_test <- as.matrix(dataset[fold$index, -(1:2)]) y_test <- as.factor(dataset[fold$index, 'Case_Control']) # Predict via a logit model building on the reduced data. model <- glm(y ~ x, family = binomial(link = "logit"), data = data.frame(x = X_train %*% B, y = y_train)) y_hat <- predict(model, data.frame(x = X_test %*% B), type = "response") # Set target and prediction values for the ROC curve. labels[[i]] <- y_test predictions[[i]] <- y_hat alphas[i, ] <- as.vector(alpha) betas[i, ] <- as.vector(beta) } # acc: Accuracy. P(Yhat = Y). Estimated as: (TP+TN)/(P+N). acc <- function(y_true, y_pred) mean(round(y_pred) == y_true) # err: Error rate. P(Yhat != Y). Estimated as: (FP+FN)/(P+N). err <- function(y_true, y_pred) mean(round(y_pred) != y_true) # fpr: False positive rate. P(Yhat = + | Y = -). aliases: Fallout. fpr <- function(y_true, y_pred) mean((round(y_pred) == 1)[y_true == 0]) # tpr: True positive rate. P(Yhat = + | Y = +). aliases: Sensitivity, Recall. tpr <- function(y_true, y_pred) mean((round(y_pred) == 1)[y_true == 1]) # fnr: False negative rate. P(Yhat = - | Y = +). aliases: Miss. fnr <- function(y_true, y_pred) mean((round(y_pred) == 0)[y_true == 1]) # tnr: True negative rate. P(Yhat = - | Y = -). tnr <- function(y_true, y_pred) mean((round(y_pred) == 0)[y_true == 0]) # Combined accuracy, error, ... cat("acc: ", acc(unlist(labels), unlist(predictions)), "\n", "err: ", err(unlist(labels), unlist(predictions)), "\n", "fpr: ", fpr(unlist(labels), unlist(predictions)), "\n", "tpr: ", tpr(unlist(labels), unlist(predictions)), "\n", "fnr: ", fnr(unlist(labels), unlist(predictions)), "\n", "tnr: ", tnr(unlist(labels), unlist(predictions)), "\n", "auc: ", roc(unlist(labels), unlist(predictions), quiet = TRUE)$auc, "\n", sep = '') # Confidence interval for AUC. ci(roc(unlist(labels), unlist(predictions), quiet = TRUE)) # Means of per fold accuracy, error, ... cat("acc: ", mean(mapply(acc, labels, predictions)), "\n", "err: ", mean(mapply(err, labels, predictions)), "\n", "fpr: ", mean(mapply(fpr, labels, predictions)), "\n", "tpr: ", mean(mapply(tpr, labels, predictions)), "\n", "fnr: ", mean(mapply(fnr, labels, predictions)), "\n", "tnr: ", mean(mapply(tnr, labels, predictions)), "\n", "auc: ", mean(mapply(function(...) roc(...)$auc, labels, predictions, MoreArgs = list(direction = '<', quiet = TRUE))), "\n", sep = '') # Means of per fold CI. rowMeans(mapply(function(...) ci(roc(...)), labels, predictions, MoreArgs = list(direction = '<', quiet = TRUE))) sd(mapply(function(...) roc(...)$auc, labels, predictions, MoreArgs = list(direction = '<', quiet = TRUE))) ################################################################################ ### plot ### ################################################################################ multiplot <- function(..., plotlist = NULL, cols) { library(grid) # Make a list from the ... arguments and plotlist plots <- c(list(...), plotlist) numPlots = length(plots) # Make the panel plotCols = cols # Number of rows needed, calculated from cols plotRows = ceiling(numPlots / plotCols) # Set up the page grid.newpage() pushViewport(viewport(layout = grid.layout(plotRows, plotCols))) vplayout <- function(x, y) { viewport(layout.pos.row = x, layout.pos.col = y) } # Make each plot, in the correct location for (i in 1:numPlots) { curRow = ceiling(i / plotCols) curCol = (i - 1) %% plotCols + 1 print(plots[[i]], vp = vplayout(curRow, curCol)) } } pa <- ggplot(data.frame(time = rep(1:ncol(alphas), 2), means = c(colMeans(abs(alphas)), .5 * colMeans(!alphas)), type = factor(rep(c(0, 1), each = ncol(alphas)), labels = c('mean', 'dropped'))), aes(x = time, y = means, fill = type)) + geom_col(position = 'dodge') + labs(title = 'Components of alpha', x = 'time', y = 'means') + coord_cartesian(ylim = c(0, 0.5)) + scale_y_continuous(sec.axis = sec_axis(trans = ~ . * 2, name = 'dropped', labels = scales::percent)) + theme(legend.position = 'top', legend.title = element_blank()) pb <- ggplot(data.frame(time = rep(1:ncol(betas), 2), means = c(colMeans(abs(betas)), .5 * colMeans(!betas)), type = factor(rep(c(0, 1), each = ncol(betas)), labels = c('mean', 'dropped'))), aes(x = time, y = means, fill = type)) + geom_col(position = 'dodge') + labs(title = 'Components of beta', x = 'sensors', y = 'means') + coord_cartesian(ylim = c(0, 0.5)) + scale_y_continuous(sec.axis = sec_axis(trans = ~ . * 2, name = 'dropped', labels = scales::percent)) + theme(legend.position = 'top', legend.title = element_blank()) multiplot(pa, pb, cols = 1)