182 lines
		
	
	
		
			6.4 KiB
		
	
	
	
		
			R
		
	
	
	
	
	
			
		
		
	
	
			182 lines
		
	
	
		
			6.4 KiB
		
	
	
	
		
			R
		
	
	
	
	
	
library(tensorPredictors)
 | 
						|
 | 
						|
 | 
						|
# Set PRNG seed to the first 4 digits of the golden ratio for reproducability
 | 
						|
set.seed(1618L, "Mersenne-Twister", "Inversion", "Rejection")
 | 
						|
 | 
						|
### Simulation configuration
 | 
						|
reps <- 100                     # number of simulation replications
 | 
						|
sample.sizes <- c(100, 200, 300, 500, 750)  # sample sizes `n`
 | 
						|
 | 
						|
# Parameterize the "true" reductions on the dimension
 | 
						|
gen.beta <- function(pj) {
 | 
						|
    as.matrix((-1)^seq_len(pj))
 | 
						|
}
 | 
						|
# the precision matrices are simply diag(pj)
 | 
						|
 | 
						|
 | 
						|
# sampling from the conditional matrix normal `X | Y = y ~ N(mu(y), I_{p1 p2})`
 | 
						|
sample.data <- function(sample.size, betas, Omegas, eta1 = 0) {
 | 
						|
    # responce is a standard normal variable
 | 
						|
    y <- rnorm(sample.size)
 | 
						|
    # F(y) is identical to y, except its a tensor (last axis is sample axis)
 | 
						|
    F <- array(y, dim = c(mapply(ncol, betas), sample.size))
 | 
						|
 | 
						|
    # sample predictors from tensor normal X | Y = y (last axis is sample axis)
 | 
						|
    sample.axis <- length(betas) + 1L
 | 
						|
    Deltas <- Map(solve, Omegas)                            # normal covariances
 | 
						|
    mu_y <- mlm(mlm(F, betas) + as.vector(eta1), Deltas)    # conditional mean
 | 
						|
    X <- mu_y + rtensornorm(sample.size, 0, Deltas, sample.axis)           # responses X
 | 
						|
 | 
						|
    list(X = X, F = F, y = y, sample.axis = sample.axis)
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
# Open simulation CSV log file
 | 
						|
log.name <- format(Sys.time(), "sim_efficiency-%Y%m%dT%H%M.csv")
 | 
						|
log.file <- file(log.name, "w")
 | 
						|
# Counts new number of writes purely here to write the CSV header the first time
 | 
						|
log.writes <- 0L
 | 
						|
 | 
						|
# Setting p1 = p2 = pj  (note, in the paper `p = p1 p2`)
 | 
						|
mode.dims <- round(1.2^unique(round(logb(2:200, 1.2))))
 | 
						|
for (pj in mode.dims) {
 | 
						|
 | 
						|
    betas.true <- list(gen.beta(pj), gen.beta(pj))
 | 
						|
    B.true <- kronecker(betas.true[[2]], betas.true[[1]])
 | 
						|
    Omegas.true <- list(diag(pj), diag(pj))
 | 
						|
 | 
						|
    for (sample.size in sample.sizes) {
 | 
						|
 | 
						|
        sim <- sapply(seq_len(reps), function(.) {
 | 
						|
            c(X, F, y, sample.axis) %<-% sample.data(sample.size, betas.true, Omegas.true)
 | 
						|
 | 
						|
            ds.lm <- tryCatch({
 | 
						|
                unname(lm.fit(t(`dim<-`(X, c(pj^2, sample.size))), drop(F))$coefficients)
 | 
						|
                dist.subspace(B.true, B.lm, normalize = TRUE)
 | 
						|
            }, error = function(.) NA)
 | 
						|
 | 
						|
            c(., betas.vec, Omegas.vec) %<-% gmlm_tensor_normal(`dim<-`(X, c(pj^2, sample.size)), drop(F))
 | 
						|
 | 
						|
            c(., betas.gmlm, Omegas.gmlm) %<-% gmlm_tensor_normal(X, F)
 | 
						|
 | 
						|
            c(., betas.mani, Omegas.mani) %<-% gmlm_tensor_normal(X, F,
 | 
						|
                proj.Omegas = rep(list(function(O) { diag(mean(diag(O)), nrow(O)) }), 2)
 | 
						|
            )
 | 
						|
 | 
						|
            
 | 
						|
            ds.vec  <- dist.subspace(B.true, betas.vec[[1]], normalize = TRUE)
 | 
						|
            ds.gmlm <- dist.subspace(betas.true, betas.gmlm, normalize = TRUE)   # equiv to R> dist.subspace(B.true, B.gmlm)
 | 
						|
            ds.mani <- dist.subspace(betas.true, betas.mani, normalize = TRUE)
 | 
						|
 | 
						|
            c(lm = ds.lm, vec = ds.vec, gmlm = ds.gmlm, mani = ds.mani)
 | 
						|
        })
 | 
						|
 | 
						|
        sim <- as.data.frame(t(sim))
 | 
						|
        sim$sample.size <- sample.size
 | 
						|
        sim$pj <- pj
 | 
						|
 | 
						|
        # boxplot(t(sim))
 | 
						|
        # summary(t(sim))
 | 
						|
 | 
						|
        # Append current simulation results to log-file
 | 
						|
        write.table(sim, file = log.file, sep = ",",
 | 
						|
            row.names = FALSE, col.names = (log.writes <- log.writes + 1L) < 2L
 | 
						|
        )
 | 
						|
 | 
						|
        # print progress
 | 
						|
        cat(sprintf("mode dim (%d): %d/%d - sample size (%d): %d/%d\n",
 | 
						|
            pj, which(pj == mode.dims), length(mode.dims),
 | 
						|
            sample.size, which(sample.size == sample.sizes), length(sample.sizes)
 | 
						|
        ))
 | 
						|
    }
 | 
						|
}
 | 
						|
close(log.file)
 | 
						|
 | 
						|
 | 
						|
# Read simulation data back in
 | 
						|
sim <- read.csv(log.name)
 | 
						|
 | 
						|
# with(aggregate(sim, . ~ sample.size + pj, mean), {
 | 
						|
#     plot(range(pj), range(c(vec, gmlm, mani)), type = "n",
 | 
						|
#         main = "Simulation -- Efficiency Gain",
 | 
						|
#         xlab = expression(tilde(p)),
 | 
						|
#         ylab = expression(d(B, hat(B)))
 | 
						|
#     )
 | 
						|
#     for (sz in sort(unique(sample.size))) {
 | 
						|
#         i <- order(pj)[sample.size == sz]
 | 
						|
#         lines(pj[i], vec[i], type = "b", pch = 16, col = sz %/% 100, lty = 1)
 | 
						|
#         lines(pj[i], gmlm[i], type = "b", pch = 16, col = sz %/% 100, lty = 2)
 | 
						|
#         lines(pj[i], mani[i], type = "b", pch = 16, col = sz %/% 100, lty = 3)
 | 
						|
#     }
 | 
						|
#     sd <- aggregate(sim, . ~ sample.size + pj, sd)
 | 
						|
# })
 | 
						|
 | 
						|
with(merge(
 | 
						|
    aggregate(sim[names(sim) != "lm"], . ~ sample.size + pj, mean),
 | 
						|
    aggregate(sim[names(sim) != "lm"], . ~ sample.size + pj, sd),
 | 
						|
    by = c("sample.size", "pj"),
 | 
						|
    suffixes = c("", ".sd"),
 | 
						|
    all = FALSE
 | 
						|
), {
 | 
						|
    plot(range(pj), range(c(vec, gmlm, mani)), type = "n",
 | 
						|
        main = "Simulation -- Efficiency Gain",
 | 
						|
        xlab = expression(tilde(p)),
 | 
						|
        ylab = expression(d(B, hat(B)))
 | 
						|
    )
 | 
						|
    # colors <- c("#cf7d03ff", "#002d8d", "#006e18")
 | 
						|
    # col.idx <- 0L
 | 
						|
    lty.idx <- 0L
 | 
						|
    for (sz in sort(unique(sample.size))) {
 | 
						|
        i <- order(pj)[(sample.size == sz)[order(pj)]]
 | 
						|
        # polygon(c(pj[i], rev(pj[i])), c(lm[i] + lm.sd[i], rev(lm[i] - lm.sd[i])),
 | 
						|
        #     col = paste0("#cf7d03", "50"), border = NA
 | 
						|
        # )
 | 
						|
        polygon(c(pj[i], rev(pj[i])), c(vec[i] + vec.sd[i], rev(vec[i] - vec.sd[i])),
 | 
						|
            col = paste0("#b30303", "50"), border = NA
 | 
						|
        )
 | 
						|
        polygon(c(pj[i], rev(pj[i])), c(gmlm[i] + gmlm.sd[i], rev(gmlm[i] - gmlm.sd[i])),
 | 
						|
            col = paste0("#002d8d", "50"), border = NA
 | 
						|
        )
 | 
						|
        polygon(c(pj[i], rev(pj[i])), c(mani[i] + mani.sd[i], rev(mani[i] - mani.sd[i])),
 | 
						|
            col = paste0("#006e18", "50"), border = NA
 | 
						|
        )
 | 
						|
    }
 | 
						|
    lty.idx <- 1L
 | 
						|
    for (sz in sort(unique(sample.size))) {
 | 
						|
        i <- order(pj)[(sample.size == sz)[order(pj)]]
 | 
						|
        # lines(pj[i], lm[i],   type = "b", pch = 16, col = "#cf7d03", lty = lty.idx, lwd = 2)
 | 
						|
        lines(pj[i], vec[i],  type = "b", pch = 16, col = "#b30303", lty = lty.idx, lwd = 2)
 | 
						|
        lines(pj[i], gmlm[i], type = "b", pch = 16, col = "#002d8d", lty = lty.idx, lwd = 2)
 | 
						|
        lines(pj[i], mani[i], type = "b", pch = 16, col = "#006e18", lty = lty.idx, lwd = 2)
 | 
						|
        lty.idx <- lty.idx + 1L
 | 
						|
    }
 | 
						|
})
 | 
						|
 | 
						|
 | 
						|
 | 
						|
# unname(lm.fit(t(`dim<-`(X, c(pj^2, sample.size))), drop(F))$coefficients)
 | 
						|
# unname(lm(drop(F) ~ t(`dim<-`(X, c(pj^2, sample.size))) - 1)$coefficients)
 | 
						|
 | 
						|
 | 
						|
 | 
						|
 | 
						|
# require(utils)
 | 
						|
 | 
						|
# set.seed(129)
 | 
						|
 | 
						|
# n <- 7 ; p <- 2
 | 
						|
# X <- matrix(rnorm(n * p), n, p) # no intercept!
 | 
						|
# y <- rnorm(n)
 | 
						|
# w <- rnorm(n)^2
 | 
						|
 | 
						|
# str(lmw <- lm.wfit(x = X, y = y, w = w))
 | 
						|
 | 
						|
# str(lm. <- lm.fit (x = X, y = y))
 | 
						|
 | 
						|
 | 
						|
# if(require("microbenchmark")) {
 | 
						|
#     mb <- microbenchmark(lm(y~X), lm.fit(X,y), .lm.fit(X,y))
 | 
						|
#     print(mb)
 | 
						|
#     boxplot(mb, notch=TRUE)
 | 
						|
# } |