tensor_predictors/examples/example_cross_validation.R

54 lines
1.9 KiB
R

# # Generate Sample Data.
# n <- 250
# # see: simulation_binary.R
# data <- simulateData.binary(n / 2, n / 2, (p <- 10), (t <- 5), 0.3, 0.3)
# X <- data$X
# colnames(X) <- paste('X[', outer(1:p, 1:t, paste, sep = ','), ']', sep = '')
# Y <- 2 * data$Y
# write.csv(data.frame(X, Y), file = 'example_data.csv', row.names = FALSE)
suppressPackageStartupMessages({
library(pROC)
})
source('../tensor_predictors/tensor_predictors.R')
# Read sample data from file and split into predictors and responces.
data <- read.csv('example_data.csv')
X <- as.matrix(data[, names(data) != 'Y'])
Y <- as.matrix(data[, 'Y'])
# Set parameters (and check)
n <- nrow(X)
p <- 10
t <- 5
stopifnot(p * t == ncol(X))
# Setup 10-fold (folds contains indices of the test set).
folds <- split(sample.int(n), (seq(0, n - 1) * 10) %/% n)
labels <- vector('list', 10) # True test values (per fold)
predictions <- vector('list', 10) # Predictions on test set.
for (i in seq_along(folds)) {
fold <- folds[[i]]
# Split data into train and test sets.
X.train <- X[-fold, ]
Y.train <- Y[-fold, , drop = FALSE]
X.test <- X[fold, ]
Y.test <- Y[fold, , drop = FALSE]
# Compute reduction (method = c('KPIR_LS' ,'KPIR_MLE', 'KPFC1', 'KPFC2', 'KPFC3'))
# or LSIR(X.train, Y.train, p, t) in 'lsir.R'.
dr <- tensor_predictor(X.train, Y.train, p, t, method = 'KPIR_LS')
B <- kronecker(dr$alpha, dr$beta) # Also available: Gamma_1, Gamma_2, Gamma, B.
# Predict via a logit model building on the reduced data.
model <- glm(y ~ x, family = binomial(link = "logit"),
data = data.frame(x = X.train %*% B, y = as.integer(Y.train > 0)))
labels[[i]] <- as.integer(Y.test > 0)
predictions[[i]] <- predict(model, data.frame(x = X.test %*% B), type = "response")
}
(meanAUC <- mean(mapply(function(...) roc(...)$auc, labels, predictions,
MoreArgs = list(direction = '<', quiet = TRUE))))