tensor_predictors/eeg_analysis/eeg_analysis_poi_step2.R

suppressPackageStartupMessages({
    library(pROC)
})

source('../tensor_predictors/approx_kronecker.R')
source('../tensor_predictors/multi_assign.R')
# Load EEG dataset
dataset <- readRDS('eeg_data.rds')
# Load EEG k-fold simulation results.
folds <- readRDS('eeg_analysis_poi.rds')
# Set dimenional parameters.
p <- 64L    # nr. of predictors (count of sensorce)
t <- 256L   # nr. of time points (measurements)

labels      <- vector('list', length(folds))
predictions <- vector('list', length(folds))
alphas <- matrix(0, length(folds), t)
betas  <- matrix(0, length(folds), p)
# For each fold.
for (i in seq_along(folds)) {
    fold <- folds[[i]]
    # Factorize POI result in alpha, beta.
    c(alpha, beta) %<-% approx.kronecker(fold$Q, c(t, 1), c(p, 1))
    # Drop small values of alpha, beta.
    alpha[abs(alpha) < 1e-6] <- 0
    beta[abs(beta) < 1e-6] <- 0
    # Reconstruct B from factorization.
    B <- kronecker(alpha, beta)
    # Select folds train/test sets.
    X_train <- as.matrix(dataset[-fold$index, -(1:2)])
    y_train <- as.factor(dataset[-fold$index, 'Case_Control'])
    X_test <- as.matrix(dataset[fold$index, -(1:2)])
    y_test <- as.factor(dataset[fold$index, 'Case_Control'])
    # Predict via a logit model building on the reduced data.
    model <- glm(y ~ x, family = binomial(link = "logit"),
                 data = data.frame(x = X_train %*% B, y = y_train))
    y_hat <- predict(model, data.frame(x = X_test %*% B), type = "response")
    # Set target and prediction values for the ROC curve.
    labels[[i]]      <- y_test
    predictions[[i]] <- y_hat
    alphas[i, ] <- as.vector(alpha)
    betas[i, ]  <- as.vector(beta)
}

# acc: Accuracy. P(Yhat = Y). Estimated as: (TP+TN)/(P+N).
acc <- function(y_true, y_pred) mean(round(y_pred) == y_true)
# err: Error rate. P(Yhat != Y). Estimated as: (FP+FN)/(P+N).
err <- function(y_true, y_pred) mean(round(y_pred) != y_true)
# fpr: False positive rate. P(Yhat = + | Y = -). aliases: Fallout.
fpr <- function(y_true, y_pred) mean((round(y_pred) == 1)[y_true == 0])
# tpr: True positive rate.  P(Yhat = + | Y = +). aliases: Sensitivity, Recall.
tpr <- function(y_true, y_pred) mean((round(y_pred) == 1)[y_true == 1])
# fnr: False negative rate. P(Yhat = - | Y = +). aliases: Miss.
fnr <- function(y_true, y_pred) mean((round(y_pred) == 0)[y_true == 1])
# tnr: True negative rate.  P(Yhat = - | Y = -).
tnr <- function(y_true, y_pred) mean((round(y_pred) == 0)[y_true == 0])

# Combined accuracy, error, ...
cat("acc: ", acc(unlist(labels), unlist(predictions)), "\n",
    "err: ", err(unlist(labels), unlist(predictions)), "\n",
    "fpr: ", fpr(unlist(labels), unlist(predictions)), "\n",
    "tpr: ", tpr(unlist(labels), unlist(predictions)), "\n",
    "fnr: ", fnr(unlist(labels), unlist(predictions)), "\n",
    "tnr: ", tnr(unlist(labels), unlist(predictions)), "\n",
    "auc: ", roc(unlist(labels), unlist(predictions), quiet = TRUE)$auc, "\n",
    sep = '')
# Confidence interval for AUC.
ci(roc(unlist(labels), unlist(predictions), quiet = TRUE))

# Means of per fold accuracy, error, ...
cat("acc: ", mean(mapply(acc, labels, predictions)), "\n",
    "err: ", mean(mapply(err, labels, predictions)), "\n",
    "fpr: ", mean(mapply(fpr, labels, predictions)), "\n",
    "tpr: ", mean(mapply(tpr, labels, predictions)), "\n",
    "fnr: ", mean(mapply(fnr, labels, predictions)), "\n",
    "tnr: ", mean(mapply(tnr, labels, predictions)), "\n",
    "auc: ", mean(mapply(function(...) roc(...)$auc, labels, predictions,
        MoreArgs = list(direction = '<', quiet = TRUE))), "\n",
    sep = '')
# Means of per fold CI.
rowMeans(mapply(function(...) ci(roc(...)), labels, predictions,
    MoreArgs = list(direction = '<', quiet = TRUE)))
sd(mapply(function(...) roc(...)$auc, labels, predictions,
        MoreArgs = list(direction = '<', quiet = TRUE)))

################################################################################
###                                   plot                                   ###
################################################################################
multiplot <- function(..., plotlist = NULL, cols) {
    library(grid)
    # Make a list from the ... arguments and plotlist
    plots <- c(list(...), plotlist)
    numPlots = length(plots)
    # Make the panel
    plotCols = cols
    # Number of rows needed, calculated from cols
    plotRows = ceiling(numPlots / plotCols)
    # Set up the page
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(plotRows, plotCols)))
    vplayout <- function(x, y) {
        viewport(layout.pos.row = x, layout.pos.col = y)
    }
    # Make each plot, in the correct location
    for (i in 1:numPlots) {
        curRow = ceiling(i / plotCols)
        curCol = (i - 1) %% plotCols + 1
        print(plots[[i]], vp = vplayout(curRow, curCol))
    }
}

pa <- ggplot(data.frame(time = rep(1:ncol(alphas), 2),
                        means = c(colMeans(abs(alphas)), .5 * colMeans(!alphas)),
                        type = factor(rep(c(0, 1), each = ncol(alphas)),
                                      labels = c('mean', 'dropped'))),
             aes(x = time, y = means, fill = type)) +
    geom_col(position = 'dodge') +
    labs(title = 'Components of alpha', x = 'time', y = 'means') +
    coord_cartesian(ylim = c(0, 0.5)) +
    scale_y_continuous(sec.axis = sec_axis(trans = ~ . * 2,
                                           name = 'dropped',
                                           labels = scales::percent)) +
    theme(legend.position = 'top',
          legend.title = element_blank())

pb <- ggplot(data.frame(time = rep(1:ncol(betas), 2),
                        means = c(colMeans(abs(betas)), .5 * colMeans(!betas)),
                        type = factor(rep(c(0, 1), each = ncol(betas)),
                                      labels = c('mean', 'dropped'))),
             aes(x = time, y = means, fill = type)) +
    geom_col(position = 'dodge') +
    labs(title = 'Components of beta', x = 'sensors', y = 'means') +
    coord_cartesian(ylim = c(0, 0.5)) +
    scale_y_continuous(sec.axis = sec_axis(trans = ~ . * 2,
                                           name = 'dropped',
                                           labels = scales::percent)) +
    theme(legend.position = 'top',
          legend.title = element_blank())

multiplot(pa, pb, cols = 1)
add (initial): source 2020-06-10 14:35:27 +00:00			`suppressPackageStartupMessages({`
			`library(pROC)`
			`})`

			`source('../tensor_predictors/approx_kronecker.R')`
			`source('../tensor_predictors/multi_assign.R')`
			`# Load EEG dataset`
			`dataset <- readRDS('eeg_data.rds')`
			`# Load EEG k-fold simulation results.`
			`folds <- readRDS('eeg_analysis_poi.rds')`
			`# Set dimenional parameters.`
			`p <- 64L # nr. of predictors (count of sensorce)`
			`t <- 256L # nr. of time points (measurements)`

			`labels <- vector('list', length(folds))`
			`predictions <- vector('list', length(folds))`
			`alphas <- matrix(0, length(folds), t)`
			`betas <- matrix(0, length(folds), p)`
			`# For each fold.`
			`for (i in seq_along(folds)) {`
			`fold <- folds[[i]]`
			`# Factorize POI result in alpha, beta.`
			`c(alpha, beta) %<-% approx.kronecker(fold$Q, c(t, 1), c(p, 1))`
			`# Drop small values of alpha, beta.`
			`alpha[abs(alpha) < 1e-6] <- 0`
			`beta[abs(beta) < 1e-6] <- 0`
			`# Reconstruct B from factorization.`
			`B <- kronecker(alpha, beta)`
			`# Select folds train/test sets.`
			`X_train <- as.matrix(dataset[-fold$index, -(1:2)])`
			`y_train <- as.factor(dataset[-fold$index, 'Case_Control'])`
			`X_test <- as.matrix(dataset[fold$index, -(1:2)])`
			`y_test <- as.factor(dataset[fold$index, 'Case_Control'])`
			`# Predict via a logit model building on the reduced data.`
			`model <- glm(y ~ x, family = binomial(link = "logit"),`
			`data = data.frame(x = X_train %*% B, y = y_train))`
			`y_hat <- predict(model, data.frame(x = X_test %*% B), type = "response")`
			`# Set target and prediction values for the ROC curve.`
			`labels[[i]] <- y_test`
			`predictions[[i]] <- y_hat`
			`alphas[i, ] <- as.vector(alpha)`
			`betas[i, ] <- as.vector(beta)`
			`}`

			`# acc: Accuracy. P(Yhat = Y). Estimated as: (TP+TN)/(P+N).`
			`acc <- function(y_true, y_pred) mean(round(y_pred) == y_true)`
			`# err: Error rate. P(Yhat != Y). Estimated as: (FP+FN)/(P+N).`
			`err <- function(y_true, y_pred) mean(round(y_pred) != y_true)`
			`# fpr: False positive rate. P(Yhat = + \| Y = -). aliases: Fallout.`
			`fpr <- function(y_true, y_pred) mean((round(y_pred) == 1)[y_true == 0])`
			`# tpr: True positive rate. P(Yhat = + \| Y = +). aliases: Sensitivity, Recall.`
			`tpr <- function(y_true, y_pred) mean((round(y_pred) == 1)[y_true == 1])`
			`# fnr: False negative rate. P(Yhat = - \| Y = +). aliases: Miss.`
			`fnr <- function(y_true, y_pred) mean((round(y_pred) == 0)[y_true == 1])`
			`# tnr: True negative rate. P(Yhat = - \| Y = -).`
			`tnr <- function(y_true, y_pred) mean((round(y_pred) == 0)[y_true == 0])`

			`# Combined accuracy, error, ...`
			`cat("acc: ", acc(unlist(labels), unlist(predictions)), "\n",`
			`"err: ", err(unlist(labels), unlist(predictions)), "\n",`
			`"fpr: ", fpr(unlist(labels), unlist(predictions)), "\n",`
			`"tpr: ", tpr(unlist(labels), unlist(predictions)), "\n",`
			`"fnr: ", fnr(unlist(labels), unlist(predictions)), "\n",`
			`"tnr: ", tnr(unlist(labels), unlist(predictions)), "\n",`
			`"auc: ", roc(unlist(labels), unlist(predictions), quiet = TRUE)$auc, "\n",`
			`sep = '')`
			`# Confidence interval for AUC.`
			`ci(roc(unlist(labels), unlist(predictions), quiet = TRUE))`

			`# Means of per fold accuracy, error, ...`
			`cat("acc: ", mean(mapply(acc, labels, predictions)), "\n",`
			`"err: ", mean(mapply(err, labels, predictions)), "\n",`
			`"fpr: ", mean(mapply(fpr, labels, predictions)), "\n",`
			`"tpr: ", mean(mapply(tpr, labels, predictions)), "\n",`
			`"fnr: ", mean(mapply(fnr, labels, predictions)), "\n",`
			`"tnr: ", mean(mapply(tnr, labels, predictions)), "\n",`
			`"auc: ", mean(mapply(function(...) roc(...)$auc, labels, predictions,`
			`MoreArgs = list(direction = '<', quiet = TRUE))), "\n",`
			`sep = '')`
			`# Means of per fold CI.`
			`rowMeans(mapply(function(...) ci(roc(...)), labels, predictions,`
			`MoreArgs = list(direction = '<', quiet = TRUE)))`
			`sd(mapply(function(...) roc(...)$auc, labels, predictions,`
			`MoreArgs = list(direction = '<', quiet = TRUE)))`

			`################################################################################`
			`### plot ###`
			`################################################################################`
			`multiplot <- function(..., plotlist = NULL, cols) {`
			`library(grid)`
			`# Make a list from the ... arguments and plotlist`
			`plots <- c(list(...), plotlist)`
			`numPlots = length(plots)`
			`# Make the panel`
			`plotCols = cols`
			`# Number of rows needed, calculated from cols`
			`plotRows = ceiling(numPlots / plotCols)`
			`# Set up the page`
			`grid.newpage()`
			`pushViewport(viewport(layout = grid.layout(plotRows, plotCols)))`
			`vplayout <- function(x, y) {`
			`viewport(layout.pos.row = x, layout.pos.col = y)`
			`}`
			`# Make each plot, in the correct location`
			`for (i in 1:numPlots) {`
			`curRow = ceiling(i / plotCols)`
			`curCol = (i - 1) %% plotCols + 1`
			`print(plots[[i]], vp = vplayout(curRow, curCol))`
			`}`
			`}`

			`pa <- ggplot(data.frame(time = rep(1:ncol(alphas), 2),`
			`means = c(colMeans(abs(alphas)), .5 * colMeans(!alphas)),`
			`type = factor(rep(c(0, 1), each = ncol(alphas)),`
			`labels = c('mean', 'dropped'))),`
			`aes(x = time, y = means, fill = type)) +`
			`geom_col(position = 'dodge') +`
			`labs(title = 'Components of alpha', x = 'time', y = 'means') +`
			`coord_cartesian(ylim = c(0, 0.5)) +`
			`scale_y_continuous(sec.axis = sec_axis(trans = ~ . * 2,`
			`name = 'dropped',`
			`labels = scales::percent)) +`
			`theme(legend.position = 'top',`
			`legend.title = element_blank())`

			`pb <- ggplot(data.frame(time = rep(1:ncol(betas), 2),`
			`means = c(colMeans(abs(betas)), .5 * colMeans(!betas)),`
			`type = factor(rep(c(0, 1), each = ncol(betas)),`
			`labels = c('mean', 'dropped'))),`
			`aes(x = time, y = means, fill = type)) +`
			`geom_col(position = 'dodge') +`
			`labs(title = 'Components of beta', x = 'sensors', y = 'means') +`
			`coord_cartesian(ylim = c(0, 0.5)) +`
			`scale_y_continuous(sec.axis = sec_axis(trans = ~ . * 2,`
			`name = 'dropped',`
			`labels = scales::percent)) +`
			`theme(legend.position = 'top',`
			`legend.title = element_blank())`

			`multiplot(pa, pb, cols = 1)`