library(tensorPredictors)
library(parallel)
library(pROC)

#' Mode-Wise PCA preprocessing (generalized (2D)^2 PCA)
#'
#' @param npc_time      Number of Principal Components for time axis
#' @param npc_sensor    Number of Principal Components for sensor axis
#' @param npc_condition Number of Principal Components for stimulus condition axis
preprocess <- function(X, npc_time, npc_sensor, npc_condition) {
    # Mode covariances (for predictor and time point modes)
    c(Sigma_t, Sigma_s, Sigma_c) %<-% mcov(X)

    # "predictor" (sensor) and time point principal components
    V_t <- svd(Sigma_t, npc_time, 0L)$u
    V_s <- svd(Sigma_s, npc_sensor, 0L)$u
    V_c <- svd(Sigma_c, npc_condition, 0L)$u

    # reduce with mode wise PCs
    mlm(X, list(V_t, V_s, V_c), modes = 1:3, transposed = TRUE)
}


### Classification performance measures
# acc: Accuracy. P(Yhat = Y). Estimated as: (TP+TN)/(P+N).
acc <- function(y.true, y.pred) mean(round(y.pred) == y.true)
# err: Error rate. P(Yhat != Y). Estimated as: (FP+FN)/(P+N).
err <- function(y.true, y.pred) mean(round(y.pred) != y.true)
# fpr: False positive rate. P(Yhat = + | Y = -). aliases: Fallout.
fpr <- function(y.true, y.pred) mean((round(y.pred) == 1)[y.true == 0])
# tpr: True positive rate.  P(Yhat = + | Y = +). aliases: Sensitivity, Recall.
tpr <- function(y.true, y.pred) mean((round(y.pred) == 1)[y.true == 1])
# fnr: False negative rate. P(Yhat = - | Y = +). aliases: Miss.
fnr <- function(y.true, y.pred) mean((round(y.pred) == 0)[y.true == 1])
# tnr: True negative rate.  P(Yhat = - | Y = -).
tnr <- function(y.true, y.pred) mean((round(y.pred) == 0)[y.true == 0])
# auc: Area Under the Curve
auc <- function(y.true, y.pred) {
    as.numeric(pROC::roc(y.true, y.pred, quiet = TRUE, direction = "<")$auc)
}
auc.sd <- function(y.true, y.pred) {
    sqrt(pROC::var(pROC::roc(y.true, y.pred, quiet = TRUE, direction = "<")))
}


# # unified API for all reduction procedures
# GMLM <- list(
#     fit = function(X, y) tensorPredictors::gmlm_tensor_normal(X, as.integer(y), sample.axis = 4L),
#     reduce = function(X, fit) mlm(X, fit$betas, 1:3, TRUE),
#     applicable = function(X) TRUE
# )
# TSIR <- list(
#     fit = function(X, y) tensorPredictors::TSIR(X, y, c(1L, 1L, 1L), sample.axis = 4L),
#     reduce = function(X, fit) mlm(X, fit, 1:3, TRUE),
#     applicable = function(X) TRUE
# )
# KPIR_LS <- list(
#     fit = function(X, y) {
#         if (any(dim(X)[-4] > dim(X)[4])) {
#             stop("Dimensions too big")
#         }
#         tensorPredictors::kpir.ls(X, as.integer(y), sample.axis = 4L)
#     },
#     reduce = function(X, fit) if (is.null(fit)) NA else mlm(X, fit$alphas, 1:3, TRUE),
#     applicable = function(X) all(dim(X)[1:3] <= dim(X)[4])
# )

#' Leave-one-out prediction using TSIR
#'
#' @param method reduction method to be applied
#' @param X 3D EEG data (preprocessed or not)
#' @param y binary responce vector
#' @param ... additional arguments passed on to `method`
loo.predict <- function(method, X, y, ...) {
    # get method function name as character string for logging
    method.name <- as.character(substitute(method))

    # Parallel Leave-One-Out prediction
    unlist(parallel::mclapply(seq_along(y), function(i) {
        # Fit with i'th observation removed
        fit <- method(X[ , , , -i], y[-i], sample.axis = 4L, ...)

        # Reduce the entire data set
        r <- as.vector(mlm(X, fit$betas, modes = 1:3, transpose = TRUE))
        # Fit a logit model on reduced data with i'th observation removed
        logit <- glm(y ~ r, family = binomial(link = "logit"),
            data = data.frame(y = y[-i], r = r[-i])
        )
        # predict i'th response given i'th reduced observation
        y.hat <- predict(logit, newdata = data.frame(r = r[i]), type = "response")

        # report progress
        cat(sprintf("%s - dim: (%d, %d, %d) - %3d/%d\n",
            method.name, dim(X)[1], dim(X)[2], dim(X)[3], i, length(y)
        ))

        y.hat
    }, mc.cores = getOption("mc.cores", max(1L, parallel::detectCores() - 1L))))
}


# Load full EEG dataset (3D tensor for each subject)
c(X, y) %<-% readRDS("eeg_data_3d.rds")


##################################### GMLM #####################################

# perform preprocessed (reduced) and raw (not reduced) leave-one-out prediction
y.hat.3.4   <- loo.predict(gmlm_tensor_normal, preprocess(X,  3,  4, 3), y)
y.hat.15.15 <- loo.predict(gmlm_tensor_normal, preprocess(X, 15, 15, 3), y)
y.hat.20.30 <- loo.predict(gmlm_tensor_normal, preprocess(X, 20, 30, 3), y)
y.hat       <- loo.predict(gmlm_tensor_normal, X, y)

# classification performance measures table by leave-one-out cross-validation
(loo.cv <- apply(cbind(y.hat.3.4, y.hat.15.15, y.hat.20.30, y.hat), 2, function(y.pred) {
    sapply(c("acc", "err", "fpr", "tpr", "fnr", "tnr", "auc", "auc.sd"),
        function(FUN) { match.fun(FUN)(as.integer(y) - 1L, y.pred) })
}))
#>         y.hat.3.4 y.hat.15.15 y.hat.20.30      y.hat
#> acc    0.83606557  0.80327869  0.80327869 0.79508197
#> err    0.16393443  0.19672131  0.19672131 0.20491803
#> fpr    0.31111111  0.33333333  0.33333333 0.35555556
#> tpr    0.92207792  0.88311688  0.88311688 0.88311688
#> fnr    0.07792208  0.11688312  0.11688312 0.11688312
#> tnr    0.68888889  0.66666667  0.66666667 0.64444444
#> auc    0.88051948  0.86984127  0.86926407 0.86810967
#> auc.sd 0.03118211  0.03254642  0.03259186 0.03295883


################################## Tensor SIR ##################################

# perform preprocessed (reduced) and raw (not reduced) leave-one-out prediction
y.hat.3.4   <- loo.predict(TSIR, preprocess(X,  3,  4, 3), y)
y.hat.15.15 <- loo.predict(TSIR, preprocess(X, 15, 15, 3), y)
y.hat.20.30 <- loo.predict(TSIR, preprocess(X, 20, 30, 3), y)
y.hat       <- loo.predict(TSIR, X, y)

# classification performance measures table by leave-one-out cross-validation
(loo.cv <- apply(cbind(y.hat.3.4, y.hat.15.15, y.hat.20.30, y.hat), 2, function(y.pred) {
    sapply(c("acc", "err", "fpr", "tpr", "fnr", "tnr", "auc", "auc.sd"),
        function(FUN) { match.fun(FUN)(as.integer(y) - 1L, y.pred) })
}))
#>         y.hat.3.4 y.hat.15.15 y.hat.20.30      y.hat
#> acc    0.81967213  0.84426230  0.81147541 0.80327869
#> err    0.18032787  0.15573770  0.18852459 0.19672131
#> fpr    0.33333333  0.24444444  0.33333333 0.33333333
#> tpr    0.90909091  0.89610390  0.89610390 0.88311688
#> fnr    0.09090909  0.10389610  0.10389610 0.11688312
#> tnr    0.66666667  0.75555556  0.66666667 0.66666667
#> auc    0.86522367  0.89379509  0.88196248 0.85974026
#> auc.sd 0.03357539  0.03055047  0.02986038 0.03367847


# perform preprocessed (reduced) and raw (not reduced) leave-one-out prediction
y.hat.3.4   <- loo.predict(TSIR, preprocess(X,  3,  4, 3), y, cond.threshold = 25)
y.hat.15.15 <- loo.predict(TSIR, preprocess(X, 15, 15, 3), y, cond.threshold = 25)
y.hat.20.30 <- loo.predict(TSIR, preprocess(X, 20, 30, 3), y, cond.threshold = 25)
y.hat       <- loo.predict(TSIR, X, y, cond.threshold = 25)

# classification performance measures table by leave-one-out cross-validation
(loo.cv <- apply(cbind(y.hat.3.4, y.hat.15.15, y.hat.20.30, y.hat), 2, function(y.pred) {
    sapply(c("acc", "err", "fpr", "tpr", "fnr", "tnr", "auc", "auc.sd"),
        function(FUN) { match.fun(FUN)(as.integer(y) - 1L, y.pred) })
}))
#>         y.hat.3.4 y.hat.15.15 y.hat.20.30      y.hat
#> acc    0.81967213  0.77049180  0.76229508 0.77049180
#> err    0.18032787  0.22950820  0.23770492 0.22950820
#> fpr    0.33333333  0.37777778  0.40000000 0.37777778
#> tpr    0.90909091  0.85714286  0.85714286 0.85714286
#> fnr    0.09090909  0.14285714  0.14285714 0.14285714
#> tnr    0.66666667  0.62222222  0.60000000 0.62222222
#> auc    0.86522367  0.84386724  0.84415584 0.84040404
#> auc.sd 0.03357539  0.03542706  0.03519592 0.03558135