tensor_predictors/examples/example_loo.R

57 lines
2.0 KiB
R

# # Generate Sample Data.
# n <- 250
# # see: simulation_binary.R
# data <- simulateData.binary(n / 2, n / 2, (p <- 10), (t <- 5), 0.3, 0.3)
# X <- data$X
# colnames(X) <- paste('X[', outer(1:p, 1:t, paste, sep = ','), ']', sep = '')
# Y <- 2 * data$Y
# write.csv(data.frame(X, Y), file = 'example_data.csv', row.names = FALSE)
suppressPackageStartupMessages({
library(pROC)
})
source('../tensor_predictors/tensor_predictors.R')
# Read sample data from file and split into predictors and responces.
data <- read.csv('example_data.csv')
X <- as.matrix(data[, names(data) != 'Y'])
Y <- as.matrix(data[, 'Y'])
# Set parameters (and check)
n <- nrow(X)
p <- 10
t <- 5
stopifnot(p * t == ncol(X))
# Setup folds (folds contains indices of the test set).
nr.folds <- n # leave-one-out when number of folds equals the sample size `n`.
folds <- split(sample.int(n), (seq(0, n - 1) * nr.folds) %/% n)
labels <- vector('list', nr.folds) # True test values (per fold)
predictions <- vector('list', nr.folds) # Predictions on test set.
for (i in seq_along(folds)) {
fold <- folds[[i]]
# Split data into train and test sets.
X.train <- X[-fold, ]
Y.train <- Y[-fold, , drop = FALSE]
X.test <- X[fold, ]
Y.test <- Y[fold, , drop = FALSE]
# Compute reduction (method = c('KPIR_LS' ,'KPIR_MLE', 'KPFC1', 'KPFC2', 'KPFC3'))
# or LSIR(X.train, Y.train, p, t) in 'lsir.R'.
dr <- tensor_predictor(X.train, Y.train, p, t, method = 'KPIR_LS')
B <- kronecker(dr$alpha, dr$beta) # Also available: Gamma_1, Gamma_2, Gamma, B.
# Predict via a logit model building on the reduced data.
model <- glm(y ~ x, family = binomial(link = "logit"),
data = data.frame(x = X.train %*% B, y = as.integer(Y.train > 0)))
labels[[i]] <- as.integer(Y.test > 0)
predictions[[i]] <- predict(model, data.frame(x = X.test %*% B), type = "response")
}
# Compute classic ROC for predicted samples (mean AUC makes no sense for leave-one-out)
y.true <- unlist(labels)
y.pred <- unlist(predictions)
roc(y.true, y.pred)