options(keep.source = TRUE, keep.source.pkgs = TRUE) library(tensorPredictors) # Load as 3D predictors `X` and flat response `y` c(X, F, y) %<-% local({ # Load from file ds <- readRDS("eeg_data.rds") # Dimension values n <- nrow(ds) # sample size (nr. of people) p <- 64L # nr. of predictors (count of sensorce) t <- 256L # nr. of time points (measurements) # Extract dimension names nNames <- ds$PersonID tNames <- as.character(seq(t)) pNames <- unlist(strsplit(colnames(ds)[2 + t * seq(p)], "_"))[c(TRUE, FALSE)] # Split into predictors (with proper dims and names) and response X <- array(as.matrix(ds[, -(1:2)]), dim = c(person = n, time = t, sensor = p), dimnames = list(person = nNames, time = tNames, sensor = pNames) ) y <- ds$Case_Control list(X, array(y, c(n, 1L, 1L)), y) }) # fit a tensor normal model to the data sample axis 1 indexes persons) fit.gmlm <- gmlm_tensor_normal(X, F, sample.axis = 1L) # Performa a LOO prediction y.hat <- sapply(seq_len(dim(X)[1L]), function(i) { # Fit with i'th observation removes fit <- gmlm_tensor_normal(X[-i, , ], F[-i, , , drop = FALSE], sample.axis = 1L) # Reduce the entire data set r <- as.vector(mlm(X, fit$betas, modes = 2:3, transpose = TRUE)) # Fit a logit model on reduced data with i'th observation removed logit <- glm(y ~ r, family = binomial(link = "logit"), data = data.frame(y = y[-i], r = r[-i]) ) # predict i'th response given i'th reduced observation y.hat <- predict(logit, newdata = data.frame(r = r[i]), type = "response") # report progress cat(sprintf("%3d/%d\n", i, dim(X)[1L])) y.hat }) ### Classification performance measures # acc: Accuracy. P(Yhat = Y). Estimated as: (TP+TN)/(P+N). (acc <- mean(round(y.hat) == y)) # 0.7868852 # err: Error rate. P(Yhat != Y). Estimated as: (FP+FN)/(P+N). (err <- mean(round(y.hat) != y)) # 0.2131148 # fpr: False positive rate. P(Yhat = + | Y = -). aliases: Fallout. (fpr <- mean((round(y.hat) == 1)[y == 0])) # 0.4 # tpr: True positive rate. P(Yhat = + | Y = +). aliases: Sensitivity, Recall. (tpr <- mean((round(y.hat) == 1)[y == 1])) # 0.8961039 # fnr: False negative rate. P(Yhat = - | Y = +). aliases: Miss. (fnr <- mean((round(y.hat) == 0)[y == 1])) # 0.1038961 # tnr: True negative rate. P(Yhat = - | Y = -). (tnr <- mean((round(y.hat) == 0)[y == 0])) # 0.6 # auc: Area Under the Curve (auc <- pROC::roc(y, y.hat, quiet = TRUE)$auc) # 0.838961 with(fit.gmlm, { par.reset <- par(mfrow = c(2, 1)) plot(seq(0, 1, len = 256), betas[[1]], main = "Time", xlab = "Time [s]", ylab = expression(beta[1])) plot(betas[[2]], main = "Sensors", xlab = "Sensor Index", ylab = expression(beta[2])) par(par.reset) }) # dimX <- c(4, 3, 5) # Omegas <- Map(function(p) { # O <- matrix(rnorm(p^2), p) # O %*% t(O) # }, dimX) # # Numerically more stable version of `sum(log(mapply(det, Omegas)) / dimX)` # # which is itself equivalent to `log(det(Omega)) / prod(nrow(Omega))` where # # `Omega <- Reduce(kronecker, rev(Omegas))`. # Omega <- Reduce(kronecker, rev(Omegas)) # log(det(Omega)) / prod(nrow(Omega)) # (det.Omega <- sum(log(mapply(det, Omegas)) / dimX)) # sum(mapply(function(Omega) { # sum(log(eigen(Omega, TRUE, TRUE)$values)) # }, Omegas) / dimX)