147 lines
5.5 KiB
R
147 lines
5.5 KiB
R
|
# Source Code. # Loaded functions.
|
||
|
source('../tensor_predictors/multi_assign.R') # %<-%
|
||
|
source('../tensor_predictors/approx_kronecker.R') # approx_kronecker
|
||
|
source('../tensor_predictors/poi.R') # POI
|
||
|
source('../tensor_predictors/subspace.R') # subspace
|
||
|
source('../tensor_predictors/random.R') # rmvnorm
|
||
|
|
||
|
# Load C impleentation of 'FastPOI-C' subroutine.
|
||
|
# Required for using 'use.C = TRUE' in the POI method.
|
||
|
dyn.load('../tensor_predictors/poi.so')
|
||
|
# When 'use.C = FALSE' the POI method uses a base R implementation.
|
||
|
use.C = TRUE
|
||
|
|
||
|
simulateData.sparse <- function(n, p, t, k, r, scale, degree = 2) {
|
||
|
# Define true reduction matrices alpha, beta.
|
||
|
alpha <- diag(1, t, r)
|
||
|
beta <- diag(1, p, k)
|
||
|
|
||
|
# Create true "random" covariance of inverse model.
|
||
|
R <- matrix(rnorm((p * t)^2), p * t) # random square matrix.
|
||
|
sigma <- tcrossprod(R / sqrt(rowSums(R^2))) # sym. pos.def. with diag = 1.
|
||
|
|
||
|
# Sample responces.
|
||
|
y <- rnorm(n, 0, 1)
|
||
|
# equiv to cbind(y^1, y^2, ..., y^degree)
|
||
|
Fy <- t(vapply(y, `^`, double(degree), seq(degree)))
|
||
|
|
||
|
# Calc X according the inverse regression model.
|
||
|
X <- tcrossprod(scale(Fy, scale = FALSE, center = TRUE), kronecker(alpha, beta))
|
||
|
X <- X + (scale * rmvnorm(n, sigma = sigma))
|
||
|
|
||
|
return(list(X = X, y = y, Fy = Fy, alpha = alpha, beta = beta))
|
||
|
}
|
||
|
|
||
|
# # True Positives Rate
|
||
|
# tpr <- function(Y, Y_hat) {
|
||
|
# sum(as.logical(Y_hat) & as.logical(Y)) / sum(as.logical(Y)) # TP / P
|
||
|
# }
|
||
|
# False Positives Rate
|
||
|
fpr <- function(Y, Y_hat) {
|
||
|
sum(as.logical(Y_hat) & !Y) / sum(!Y) # FP / N
|
||
|
}
|
||
|
# False Negative Rate
|
||
|
fnr <- function(Y, Y_hat) {
|
||
|
sum(!Y_hat & as.logical(Y)) / sum(as.logical(Y)) # FN / P
|
||
|
}
|
||
|
# False Rate (rate of false positives and negatives)
|
||
|
fr <- function(Y, Y_hat) {
|
||
|
sum(as.logical(Y) != as.logical(Y_hat)) / length(Y)
|
||
|
}
|
||
|
|
||
|
simulation.sparse <- function(scales, reps, n, p, t, k, r,
|
||
|
eps = 100 * .Machine$double.eps) {
|
||
|
results <- vector('list', length(scales) * reps)
|
||
|
|
||
|
i <- 0
|
||
|
for (scale in scales) {
|
||
|
for (rep in 1:reps) {
|
||
|
cat(sprintf('\r%4d/%d for scale = %.2f', rep, reps, scale))
|
||
|
|
||
|
ds <- simulateData.sparse(n, p, t, k, r, scale)
|
||
|
# Formulate PFC-GEP for given dataset.
|
||
|
X <- scale(ds$X, scale = FALSE, center = TRUE)
|
||
|
Fy <- scale(ds$Fy, scale = FALSE, center = TRUE)
|
||
|
Sigma <- crossprod(X) / nrow(X)
|
||
|
P_Fy <- Fy %*% solve(crossprod(Fy), t(Fy))
|
||
|
Sigma_fit <- crossprod(X, P_Fy %*% X) / nrow(X)
|
||
|
|
||
|
poi <- POI(Sigma_fit, Sigma, k * r, use.C = use.C)
|
||
|
# Calc approx. alpha, beta and drop further drop "zero" from konecker
|
||
|
# factorization approximation.
|
||
|
c(alpha, beta) %<-% approx.kronecker(poi$Q, dim(ds$alpha), dim(ds$beta))
|
||
|
alpha[abs(alpha) < eps] <- 0
|
||
|
beta[abs(beta) < eps] <- 0
|
||
|
|
||
|
# Compair estimates against true alpha, beta.
|
||
|
result <- list(
|
||
|
scale = scale,
|
||
|
lambda = poi$lambda,
|
||
|
# alpha_tpr = tpr(ds$alpha, alpha),
|
||
|
alpha_fpr = fpr(ds$alpha, alpha),
|
||
|
alpha_fnr = fnr(ds$alpha, alpha),
|
||
|
alpha_fr = fr(ds$alpha, alpha),
|
||
|
# beta_tpr = tpr(ds$beta, beta),
|
||
|
beta_fpr = fpr(ds$beta, beta),
|
||
|
beta_fnr = fnr(ds$beta, beta),
|
||
|
beta_fr = fr(ds$beta, beta)
|
||
|
)
|
||
|
# Component-wise validation (_c_ stands for component)
|
||
|
if (ncol(alpha) > 1) {
|
||
|
ds_c_alpha <- apply(!!ds$alpha, 1, any)
|
||
|
c_alpha <- apply(!! alpha, 1, any)
|
||
|
# result$alpha_c_tpr <- tpr(ds_c_alpha, c_alpha)
|
||
|
result$alpha_c_fpr <- fpr(ds_c_alpha, c_alpha)
|
||
|
result$alpha_c_fnr <- fnr(ds_c_alpha, c_alpha)
|
||
|
result$alpha_c_fr <- fr(ds_c_alpha, c_alpha)
|
||
|
}
|
||
|
if (ncol(beta) > 1) {
|
||
|
ds_c_beta <- apply(!!ds$beta, 1, any)
|
||
|
c_beta <- apply(!! beta, 1, any)
|
||
|
# result$beta_c_tpr <- tpr(ds_c_beta, c_beta)
|
||
|
result$beta_c_fpr <- fpr(ds_c_beta, c_beta)
|
||
|
result$beta_c_fnr <- fnr(ds_c_beta, c_beta)
|
||
|
result$beta_c_fr <- fr(ds_c_beta, c_beta)
|
||
|
}
|
||
|
results[[i <- i + 1]] <- result
|
||
|
}
|
||
|
cat('\n')
|
||
|
}
|
||
|
|
||
|
# Restructure results list of lists as data.frame.
|
||
|
results <- as.data.frame(t(sapply(results, function(res, cols) {
|
||
|
unlist(res[cols])
|
||
|
}, names(results[[1]]))))
|
||
|
results$scale <- as.factor(results$scale)
|
||
|
attr(results, 'params') <- list(
|
||
|
reps = reps, n = n, p = p, t = t, k = k, r = r, eps = eps)
|
||
|
|
||
|
results
|
||
|
}
|
||
|
|
||
|
reps <- 500
|
||
|
# n, p, t, k, r
|
||
|
# --------------------
|
||
|
params <- list( c(100, 10, 5, 1, 2)
|
||
|
, c(100, 7, 5, 1, 2)
|
||
|
, c(100, 5, 3, 1, 2)
|
||
|
, c(500, 10, 5, 1, 2)
|
||
|
, c(500, 7, 5, 1, 2)
|
||
|
, c(500, 5, 3, 1, 2)
|
||
|
)
|
||
|
scales <- seq(0.5, 6, 0.25)
|
||
|
|
||
|
for (param in params) {
|
||
|
c(n, p, t, k, r) %<-% param
|
||
|
results <- simulation.sparse(scales, reps, n, p, t, k, r)
|
||
|
sim <- aggregate(results[, 'scale' != names(results)],
|
||
|
by = list(scale = results$scale), mean)
|
||
|
attr(sim, 'params') <- attr(results, 'params')
|
||
|
|
||
|
file.name <- sprintf("simulation_sparse_%d_%d_%d_%d_%d.rds", n, p, t, k, r)
|
||
|
saveRDS(sim, file = file.name)
|
||
|
|
||
|
cat(file.name, '\n')
|
||
|
print(sim, digits = 2)
|
||
|
}
|