tensor_predictors/simulations/simulation_sparse.R

147 lines
5.5 KiB
R
Raw Normal View History

2020-06-10 14:35:27 +00:00
# Source Code. # Loaded functions.
source('../tensor_predictors/multi_assign.R') # %<-%
source('../tensor_predictors/approx_kronecker.R') # approx_kronecker
source('../tensor_predictors/poi.R') # POI
source('../tensor_predictors/subspace.R') # subspace
source('../tensor_predictors/random.R') # rmvnorm
# Load C impleentation of 'FastPOI-C' subroutine.
# Required for using 'use.C = TRUE' in the POI method.
dyn.load('../tensor_predictors/poi.so')
# When 'use.C = FALSE' the POI method uses a base R implementation.
use.C = TRUE
simulateData.sparse <- function(n, p, t, k, r, scale, degree = 2) {
# Define true reduction matrices alpha, beta.
alpha <- diag(1, t, r)
beta <- diag(1, p, k)
# Create true "random" covariance of inverse model.
R <- matrix(rnorm((p * t)^2), p * t) # random square matrix.
sigma <- tcrossprod(R / sqrt(rowSums(R^2))) # sym. pos.def. with diag = 1.
# Sample responces.
y <- rnorm(n, 0, 1)
# equiv to cbind(y^1, y^2, ..., y^degree)
Fy <- t(vapply(y, `^`, double(degree), seq(degree)))
# Calc X according the inverse regression model.
X <- tcrossprod(scale(Fy, scale = FALSE, center = TRUE), kronecker(alpha, beta))
X <- X + (scale * rmvnorm(n, sigma = sigma))
return(list(X = X, y = y, Fy = Fy, alpha = alpha, beta = beta))
}
# # True Positives Rate
# tpr <- function(Y, Y_hat) {
# sum(as.logical(Y_hat) & as.logical(Y)) / sum(as.logical(Y)) # TP / P
# }
# False Positives Rate
fpr <- function(Y, Y_hat) {
sum(as.logical(Y_hat) & !Y) / sum(!Y) # FP / N
}
# False Negative Rate
fnr <- function(Y, Y_hat) {
sum(!Y_hat & as.logical(Y)) / sum(as.logical(Y)) # FN / P
}
# False Rate (rate of false positives and negatives)
fr <- function(Y, Y_hat) {
sum(as.logical(Y) != as.logical(Y_hat)) / length(Y)
}
simulation.sparse <- function(scales, reps, n, p, t, k, r,
eps = 100 * .Machine$double.eps) {
results <- vector('list', length(scales) * reps)
i <- 0
for (scale in scales) {
for (rep in 1:reps) {
cat(sprintf('\r%4d/%d for scale = %.2f', rep, reps, scale))
ds <- simulateData.sparse(n, p, t, k, r, scale)
# Formulate PFC-GEP for given dataset.
X <- scale(ds$X, scale = FALSE, center = TRUE)
Fy <- scale(ds$Fy, scale = FALSE, center = TRUE)
Sigma <- crossprod(X) / nrow(X)
P_Fy <- Fy %*% solve(crossprod(Fy), t(Fy))
Sigma_fit <- crossprod(X, P_Fy %*% X) / nrow(X)
poi <- POI(Sigma_fit, Sigma, k * r, use.C = use.C)
# Calc approx. alpha, beta and drop further drop "zero" from konecker
# factorization approximation.
c(alpha, beta) %<-% approx.kronecker(poi$Q, dim(ds$alpha), dim(ds$beta))
alpha[abs(alpha) < eps] <- 0
beta[abs(beta) < eps] <- 0
# Compair estimates against true alpha, beta.
result <- list(
scale = scale,
lambda = poi$lambda,
# alpha_tpr = tpr(ds$alpha, alpha),
alpha_fpr = fpr(ds$alpha, alpha),
alpha_fnr = fnr(ds$alpha, alpha),
alpha_fr = fr(ds$alpha, alpha),
# beta_tpr = tpr(ds$beta, beta),
beta_fpr = fpr(ds$beta, beta),
beta_fnr = fnr(ds$beta, beta),
beta_fr = fr(ds$beta, beta)
)
# Component-wise validation (_c_ stands for component)
if (ncol(alpha) > 1) {
ds_c_alpha <- apply(!!ds$alpha, 1, any)
c_alpha <- apply(!! alpha, 1, any)
# result$alpha_c_tpr <- tpr(ds_c_alpha, c_alpha)
result$alpha_c_fpr <- fpr(ds_c_alpha, c_alpha)
result$alpha_c_fnr <- fnr(ds_c_alpha, c_alpha)
result$alpha_c_fr <- fr(ds_c_alpha, c_alpha)
}
if (ncol(beta) > 1) {
ds_c_beta <- apply(!!ds$beta, 1, any)
c_beta <- apply(!! beta, 1, any)
# result$beta_c_tpr <- tpr(ds_c_beta, c_beta)
result$beta_c_fpr <- fpr(ds_c_beta, c_beta)
result$beta_c_fnr <- fnr(ds_c_beta, c_beta)
result$beta_c_fr <- fr(ds_c_beta, c_beta)
}
results[[i <- i + 1]] <- result
}
cat('\n')
}
# Restructure results list of lists as data.frame.
results <- as.data.frame(t(sapply(results, function(res, cols) {
unlist(res[cols])
}, names(results[[1]]))))
results$scale <- as.factor(results$scale)
attr(results, 'params') <- list(
reps = reps, n = n, p = p, t = t, k = k, r = r, eps = eps)
results
}
reps <- 500
# n, p, t, k, r
# --------------------
params <- list( c(100, 10, 5, 1, 2)
, c(100, 7, 5, 1, 2)
, c(100, 5, 3, 1, 2)
, c(500, 10, 5, 1, 2)
, c(500, 7, 5, 1, 2)
, c(500, 5, 3, 1, 2)
)
scales <- seq(0.5, 6, 0.25)
for (param in params) {
c(n, p, t, k, r) %<-% param
results <- simulation.sparse(scales, reps, n, p, t, k, r)
sim <- aggregate(results[, 'scale' != names(results)],
by = list(scale = results$scale), mean)
attr(sim, 'params') <- attr(results, 'params')
file.name <- sprintf("simulation_sparse_%d_%d_%d_%d_%d.rds", n, p, t, k, r)
saveRDS(sim, file = file.name)
cat(file.name, '\n')
print(sim, digits = 2)
}