tensor_predictors/simulations/simulation_poi.R

140 lines
4.2 KiB
R

################################################################################
### Sparce SIR against SIR ###
################################################################################
devtools::load_all('tensorPredictors/')
library(dr)
n <- 100
p <- 10
X <- rmvnorm(n, sigma = 0.5^abs(outer(1:p, 1:p, `-`)))
y <- rowSums(X[, 1:3]) + rnorm(n, 0.5)
B <- as.matrix(1:p <= 3) / sqrt(3)
dr.sir <- dr(y ~ X, method = 'sir', numdir = ncol(B))
B.sir <- dr.sir$evectors[, seq_len(ncol(B)), drop = FALSE]
dist.projection(B, B.sir)
B.poi <- with(GEP(X, y, 'sir'), {
POI(lhs, rhs, ncol(B), method = 'POI-C', use.C = TRUE)$vectors
})
dist.projection(B, B.poi)
################################################################################
### LDA (sparse Linear Discrimina Analysis) ###
################################################################################
devtools::load_all('tensorPredictors/')
C <- function(rho, p) {
res <- matrix(rho, p, p)
diag(res) <- 1
res
}
R <- function(rho, p) {
rho^abs(outer(1:p, 1:p, `-`))
}
dataset <- function(nr) {
K <- 3 # Nr. Groups
n.i <- 30 # Sample group size for each of the K groups
n <- K * n.i # Sample size
p <- 200 # Nr. of predictors
# Generate test data
V <- cbind(matrix(c(
2, 1, 2, 1, 2,
1,-1, 1,-1, 1,
0, 1,-1, 1, 0
), 3, 5, byrow = TRUE),
matrix(0, 3, p - 5)
)
W <- cbind(matrix(c(
-1, 1, 1, 1, 1,
1,-1, 1,-1, 1,
1, 1,-1, 1, 0
), 3, 5, byrow = TRUE),
matrix(0, 3, p - 5)
)
if (nr == 1) { # Model 1
y <- factor(rep(1:K, each = n.i))
X <- rmvnorm(n, mu = rep(0, p)) + V[y, ]
B <- cbind(V[1, ] - V[2, ], V[2, ] - V[3, ])
} else if (nr == 2) { # Model 2
y <- factor(rep(1:K, each = n.i))
X <- rmvnorm(n, sigma = C(0.5, p)) + (V %*% C(0.5, p))[y, ]
B <- cbind(V[1, ] - V[2, ], V[2, ] - V[3, ])
} else if (nr == 3) { # Model 3
y <- factor(rep(1:K, each = n.i))
X <- rmvnorm(n, sigma = R(0.5, p)) + (V %*% R(0.5, p))[y, ]
B <- cbind(V[1, ] - V[2, ], V[2, ] - V[3, ])
} else if (nr == 4) { # Model 4
y <- factor(rep(1:K, each = n.i))
X <- rmvnorm(n, sigma = C(0.5, p)) + (W %*% C(0.5, p))[y, ]
B <- cbind(W[1, ] - W[2, ], W[2, ] - W[3, ])
} else if (nr == 5) { # Model 5
K <- 4
n <- K * n.i
W.tilde <- 2 * rbind(W, colMeans(W))
mu.tilde <- W.tilde %*% C(0.5, p)
y <- factor(rep(1:K, each = n.i))
X <- rmvnorm(n, sigma = C(0.5, p)) + mu.tilde[y, ]
B <- cbind(W[1, ] - W[2, ], W[2, ] - W[3, ])
} else {
stop("Unknown model nr.")
}
list(X = X, y = y, B = qr.Q(qr(B)))
}
# Model 1
fit <- with(dataset(1), {
with(GEP(X, y, 'lda'), POI(lhs, rhs, ncol(B), method = 'POI-C'))
})
fit <- with(dataset(1), {
with(GEP(X, y, 'lda'), POI(lhs, rhs, ncol(B), method = 'FastPOI-C'))
})
fit <- with(dataset(1), {
with(GEP(X, y, 'lda'), POI(lhs, rhs, ncol(B), method = 'POI-C', use.C = TRUE))
})
fit <- with(dataset(1), {
with(GEP(X, y, 'lda'), POI(lhs, rhs, ncol(B), method = 'FastPOI-C', use.C = TRUE))
})
# head(fit$vectors, 10)
count <- 0
nr.reps <- 100
sim <- replicate(nr.reps, {
res <- double(0)
for (model.nr in 1:5) {
with(dataset(model.nr), {
for (method in c('POI-C', 'FastPOI-C')) {
for (use.C in c(FALSE, TRUE)) {
fit <- with(GEP(X, y, 'lda'), {
POI(lhs, rhs, ncol(B), method = 'POI-C', use.C = use.C)
})
dist <- dist.projection(B, fit$vectors)
names(dist) <- paste('M', model.nr, '-', method, '-', use.C)
res <<- c(res, dist)
}
}
fit <- with(GEP(X, y, 'lda'), {
solve.gep(lhs, rhs, ncol(B))
})
dist <- dist.projection(B, fit$vectors)
names(dist) <- paste('M', model.nr, '- solve -', use.C)
res <<- c(res, dist)
})
}
cat("Counter", (count <<- count + 1), "/", nr.reps, "\n")
res
})
(stats <- as.matrix(rowMeans(sim)))