tensor_predictors/simulations/simulation_poi.R

112 lines
3.4 KiB
R

################################################################################
### LDA (sparse Linear Discrimina Analysis) ###
################################################################################
devtools::load_all('tensorPredictors/')
C <- function(rho, p) {
res <- matrix(rho, p, p)
diag(res) <- 1
res
}
R <- function(rho, p) {
rho^abs(outer(1:p, 1:p, `-`))
}
dataset <- function(nr) {
K <- 3 # Nr. Groups
n.i <- 30 # Sample group size for each of the K groups
n <- K * n.i # Sample size
p <- 200 # Nr. of predictors
# Generate test data
V <- cbind(matrix(c(
2, 1, 2, 1, 2,
1,-1, 1,-1, 1,
0, 1,-1, 1, 0
), 3, 5, byrow = TRUE),
matrix(0, 3, p - 5)
)
W <- cbind(matrix(c(
-1, 1, 1, 1, 1,
1,-1, 1,-1, 1,
1, 1,-1, 1, 0
), 3, 5, byrow = TRUE),
matrix(0, 3, p - 5)
)
if (nr == 1) { # Model 1
y <- factor(rep(1:K, each = n.i))
X <- rmvnorm(n, mu = rep(0, p)) + V[y, ]
B <- cbind(V[1, ] - V[2, ], V[2, ] - V[3, ])
} else if (nr == 2) { # Model 2
y <- factor(rep(1:K, each = n.i))
X <- rmvnorm(n, sigma = C(0.5, p)) + (V %*% C(0.5, p))[y, ]
B <- cbind(V[1, ] - V[2, ], V[2, ] - V[3, ])
} else if (nr == 3) { # Model 3
y <- factor(rep(1:K, each = n.i))
X <- rmvnorm(n, sigma = R(0.5, p)) + (V %*% R(0.5, p))[y, ]
B <- cbind(V[1, ] - V[2, ], V[2, ] - V[3, ])
} else if (nr == 4) { # Model 4
y <- factor(rep(1:K, each = n.i))
X <- rmvnorm(n, sigma = C(0.5, p)) + (W %*% C(0.5, p))[y, ]
B <- cbind(W[1, ] - W[2, ], W[2, ] - W[3, ])
} else if (nr == 5) { # Model 5
K <- 4
n <- K * n.i
W.tilde <- 2 * rbind(W, colMeans(W))
mu.tilde <- W.tilde %*% C(0.5, p)
y <- factor(rep(1:K, each = n.i))
X <- rmvnorm(n, sigma = C(0.5, p)) + mu.tilde[y, ]
B <- cbind(W[1, ] - W[2, ], W[2, ] - W[3, ])
} else {
stop("Unknown model nr.")
}
list(X = X, y = y, B = qr.Q(qr(B)))
}
# # Model 1
# fit <- with(dataset(1), {
# with(GEP(X, y, 'lda'), POI(lhs, rhs, ncol(B), method = 'POI-C'))
# })
# fit <- with(dataset(1), {
# with(GEP(X, y, 'lda'), POI(lhs, rhs, ncol(B), method = 'FastPOI-C'))
# })
# fit <- with(dataset(1), {
# with(GEP(X, y, 'lda'), POI(lhs, rhs, ncol(B), method = 'POI-C', use.C = TRUE))
# })
# fit <- with(dataset(1), {
# with(GEP(X, y, 'lda'), POI(lhs, rhs, ncol(B), method = 'FastPOI-C', use.C = TRUE))
# })
# head(fit$vectors, 10)
count <- 0
nr.reps <- 100
sim <- replicate(nr.reps, {
res <- double(0)
for (model.nr in 1:5) {
for (method in c('POI-C', 'FastPOI-C')) {
for (use.C in c(FALSE, TRUE)) {
dist <- with(dataset(model.nr), {
fit <- with(GEP(X, y, 'lda'), {
POI(lhs, rhs, ncol(B), method = 'POI-C', use.C = use.C)
})
# dist.subspace(B, fit$vectors, is.ortho = FALSE, normalize = TRUE)
dist.projection(B, fit$vectors)
})
names(dist) <- paste('M', model.nr, '-', method, '-', use.C)
res <- c(res, dist)
}
}
}
cat("Counter", (count <<- count + 1), "/", nr.reps, "\n")
res
})
(stats <- as.matrix(rowMeans(sim)))