options(keep.source = TRUE, keep.source.pkgs = TRUE)

library(tensorPredictors)

# Load as 3D predictors `X` and flat response `y`
c(X, F, y) %<-% local({
    # Load from file
    ds <- readRDS("eeg_data.rds")

    # Dimension values
    n <- nrow(ds)       # sample size (nr. of people)
    p <- 64L            # nr. of predictors (count of sensorce)
    t <- 256L           # nr. of time points (measurements)

    # Extract dimension names
    nNames <- ds$PersonID
    tNames <- as.character(seq(t))
    pNames <- unlist(strsplit(colnames(ds)[2 + t * seq(p)], "_"))[c(TRUE, FALSE)]

    # Split into predictors (with proper dims and names) and response
    X <- array(as.matrix(ds[, -(1:2)]),
        dim = c(person = n, time = t, sensor = p),
        dimnames = list(person = nNames, time = tNames, sensor = pNames)
    )
    y <- ds$Case_Control

    list(X, array(y, c(n, 1L, 1L)), y)
})

# fit a tensor normal model to the data sample axis 1 indexes persons)
fit.gmlm <- gmlm_tensor_normal(X, F, sample.axis = 1L)

# Performa a LOO prediction
y.hat <- sapply(seq_len(dim(X)[1L]), function(i) {
    # Fit with i'th observation removes
    fit <- gmlm_tensor_normal(X[-i, , ], F[-i, , , drop = FALSE], sample.axis = 1L)

    # Reduce the entire data set
    r <- as.vector(mlm(X, fit$betas, modes = 2:3, transpose = TRUE))
    # Fit a logit model on reduced data with i'th observation removed
    logit <- glm(y ~ r, family = binomial(link = "logit"),
        data = data.frame(y = y[-i], r = r[-i])
    )
    # predict i'th response given i'th reduced observation
    y.hat <- predict(logit, newdata = data.frame(r = r[i]), type = "response")
    # report progress
    cat(sprintf("%3d/%d\n", i, dim(X)[1L]))

    y.hat
})

### Classification performance measures
# acc: Accuracy. P(Yhat = Y). Estimated as: (TP+TN)/(P+N).
(acc <- mean(round(y.hat) == y))                                                # 0.7868852
# err: Error rate. P(Yhat != Y). Estimated as: (FP+FN)/(P+N).
(err <- mean(round(y.hat) != y))                                                # 0.2131148
# fpr: False positive rate. P(Yhat = + | Y = -). aliases: Fallout.
(fpr <- mean((round(y.hat) == 1)[y == 0]))                                      # 0.4
# tpr: True positive rate.  P(Yhat = + | Y = +). aliases: Sensitivity, Recall.
(tpr <- mean((round(y.hat) == 1)[y == 1]))                                      # 0.8961039
# fnr: False negative rate. P(Yhat = - | Y = +). aliases: Miss.
(fnr <- mean((round(y.hat) == 0)[y == 1]))                                      # 0.1038961
# tnr: True negative rate.  P(Yhat = - | Y = -).
(tnr <- mean((round(y.hat) == 0)[y == 0]))                                      # 0.6
# auc: Area Under the Curve
(auc <- pROC::roc(y, y.hat, quiet = TRUE)$auc)                                  # 0.838961


with(fit.gmlm, {
    par.reset <- par(mfrow = c(2, 1))
    plot(seq(0, 1, len = 256), betas[[1]], main = "Time", xlab = "Time [s]", ylab = expression(beta[1]))
    plot(betas[[2]], main = "Sensors", xlab = "Sensor Index", ylab = expression(beta[2]))
    par(par.reset)
})


# dimX <- c(4, 3, 5)
# Omegas <- Map(function(p) {
#     O <- matrix(rnorm(p^2), p)
#     O %*% t(O)
# }, dimX)

#         # Numerically more stable version of `sum(log(mapply(det, Omegas)) / dimX)`
#         # which is itself equivalent to `log(det(Omega)) / prod(nrow(Omega))` where
#         # `Omega <- Reduce(kronecker, rev(Omegas))`.

# Omega <- Reduce(kronecker, rev(Omegas))
# log(det(Omega)) / prod(nrow(Omega))

# (det.Omega <- sum(log(mapply(det, Omegas)) / dimX))
# sum(mapply(function(Omega) {
#     sum(log(eigen(Omega, TRUE, TRUE)$values))
# }, Omegas) / dimX)