57 lines
2.0 KiB
R
57 lines
2.0 KiB
R
# # Generate Sample Data.
|
|
# n <- 250
|
|
# # see: simulation_binary.R
|
|
# data <- simulateData.binary(n / 2, n / 2, (p <- 10), (t <- 5), 0.3, 0.3)
|
|
# X <- data$X
|
|
# colnames(X) <- paste('X[', outer(1:p, 1:t, paste, sep = ','), ']', sep = '')
|
|
# Y <- 2 * data$Y
|
|
# write.csv(data.frame(X, Y), file = 'example_data.csv', row.names = FALSE)
|
|
|
|
suppressPackageStartupMessages({
|
|
library(pROC)
|
|
})
|
|
|
|
source('../tensor_predictors/tensor_predictors.R')
|
|
|
|
# Read sample data from file and split into predictors and responces.
|
|
data <- read.csv('example_data.csv')
|
|
X <- as.matrix(data[, names(data) != 'Y'])
|
|
Y <- as.matrix(data[, 'Y'])
|
|
|
|
# Set parameters (and check)
|
|
n <- nrow(X)
|
|
p <- 10
|
|
t <- 5
|
|
stopifnot(p * t == ncol(X))
|
|
|
|
# Setup folds (folds contains indices of the test set).
|
|
nr.folds <- n # leave-one-out when number of folds equals the sample size `n`.
|
|
folds <- split(sample.int(n), (seq(0, n - 1) * nr.folds) %/% n)
|
|
labels <- vector('list', nr.folds) # True test values (per fold)
|
|
predictions <- vector('list', nr.folds) # Predictions on test set.
|
|
|
|
for (i in seq_along(folds)) {
|
|
fold <- folds[[i]]
|
|
# Split data into train and test sets.
|
|
X.train <- X[-fold, ]
|
|
Y.train <- Y[-fold, , drop = FALSE]
|
|
X.test <- X[fold, ]
|
|
Y.test <- Y[fold, , drop = FALSE]
|
|
|
|
# Compute reduction (method = c('KPIR_LS' ,'KPIR_MLE', 'KPFC1', 'KPFC2', 'KPFC3'))
|
|
# or LSIR(X.train, Y.train, p, t) in 'lsir.R'.
|
|
dr <- tensor_predictor(X.train, Y.train, p, t, method = 'KPIR_LS')
|
|
B <- kronecker(dr$alpha, dr$beta) # Also available: Gamma_1, Gamma_2, Gamma, B.
|
|
# Predict via a logit model building on the reduced data.
|
|
model <- glm(y ~ x, family = binomial(link = "logit"),
|
|
data = data.frame(x = X.train %*% B, y = as.integer(Y.train > 0)))
|
|
|
|
labels[[i]] <- as.integer(Y.test > 0)
|
|
predictions[[i]] <- predict(model, data.frame(x = X.test %*% B), type = "response")
|
|
}
|
|
|
|
# Compute classic ROC for predicted samples (mean AUC makes no sense for leave-one-out)
|
|
y.true <- unlist(labels)
|
|
y.pred <- unlist(predictions)
|
|
roc(y.true, y.pred)
|