tensor_predictors/examples/example_loo.R

# # Generate Sample Data.
# n <- 250
# # see: simulation_binary.R
# data <- simulateData.binary(n / 2, n / 2, (p <- 10), (t <- 5), 0.3, 0.3)
# X <- data$X
# colnames(X) <- paste('X[', outer(1:p, 1:t, paste, sep = ','), ']', sep = '')
# Y <- 2 * data$Y
# write.csv(data.frame(X, Y), file = 'example_data.csv', row.names = FALSE)

suppressPackageStartupMessages({
    library(pROC)
})

source('../tensor_predictors/tensor_predictors.R')

# Read sample data from file and split into predictors and responces.
data <- read.csv('example_data.csv')
X <- as.matrix(data[, names(data) != 'Y'])
Y <- as.matrix(data[, 'Y'])

# Set parameters (and check)
n <- nrow(X)
p <- 10
t <- 5
stopifnot(p * t == ncol(X))

# Setup folds (folds contains indices of the test set).
nr.folds <- n # leave-one-out when number of folds equals the sample size `n`.
folds <- split(sample.int(n), (seq(0, n - 1) * nr.folds) %/% n)
labels      <- vector('list', nr.folds) # True test values (per fold)
predictions <- vector('list', nr.folds) # Predictions on test set.

for (i in seq_along(folds)) {
    fold <- folds[[i]]
    # Split data into train and test sets.
    X.train <- X[-fold, ]
    Y.train <- Y[-fold, , drop = FALSE]
    X.test <- X[fold, ]
    Y.test <- Y[fold, , drop = FALSE]

    # Compute reduction (method = c('KPIR_LS' ,'KPIR_MLE', 'KPFC1', 'KPFC2', 'KPFC3'))
    # or LSIR(X.train, Y.train, p, t) in 'lsir.R'.
    dr <- tensor_predictor(X.train, Y.train, p, t, method = 'KPIR_LS')
    B <- kronecker(dr$alpha, dr$beta) # Also available: Gamma_1, Gamma_2, Gamma, B.
    # Predict via a logit model building on the reduced data.
    model <- glm(y ~ x, family = binomial(link = "logit"),
                 data = data.frame(x = X.train %*% B, y = as.integer(Y.train > 0)))

    labels[[i]] <- as.integer(Y.test > 0)
    predictions[[i]] <- predict(model, data.frame(x = X.test %*% B), type = "response")
}

# Compute classic ROC for predicted samples (mean AUC makes no sense for leave-one-out)
y.true <- unlist(labels)
y.pred <- unlist(predictions)
roc(y.true, y.pred)