add: Efficiency simulation requested by Annals reviewer

2025-11-03 11:28:39 +01:00 · 2025-11-03 11:28:39 +01:00 · 29711908a2
commit 29711908a2
parent 3724166e8d
1 changed files with 14 additions and 61 deletions
--- a/sim/sim_efficiency.R
+++ b/sim/sim_efficiency.R
@ -39,7 +39,7 @@ log.file <- file(log.name, "w")
 log.writes <- 0L

 # Setting p1 = p2 = pj  (note, in the paper `p = p1 p2`)
-mode.dims <- round(1.2^unique(round(logb(2:200, 1.2))))
+mode.dims <- round(1.2^unique(round(logb(2:32, 1.2))))
 for (pj in mode.dims) {

    betas.true <- list(gen.beta(pj), gen.beta(pj))
@ -52,11 +52,11 @@ for (pj in mode.dims) {
            c(X, F, y, sample.axis) %<-% sample.data(sample.size, betas.true, Omegas.true)

            ds.lm <- tryCatch({
-                unname(lm.fit(t(`dim<-`(X, c(pj^2, sample.size))), drop(F))$coefficients)
+                B.lm <- unname(lm.fit(t(`dim<-`(X, c(pj^2, sample.size))), drop(F))$coefficients)
                dist.subspace(B.true, B.lm, normalize = TRUE)
            }, error = function(.) NA)

-            c(., betas.vec, Omegas.vec) %<-% gmlm_tensor_normal(`dim<-`(X, c(pj^2, sample.size)), drop(F))
+            # c(., betas.vec, Omegas.vec) %<-% gmlm_tensor_normal(`dim<-`(X, c(pj^2, sample.size)), drop(F))

            c(., betas.gmlm, Omegas.gmlm) %<-% gmlm_tensor_normal(X, F)

@ -64,8 +64,8 @@ for (pj in mode.dims) {
                proj.Omegas = rep(list(function(O) { diag(mean(diag(O)), nrow(O)) }), 2)
            )

-            
-            ds.vec  <- dist.subspace(B.true, betas.vec[[1]], normalize = TRUE)
+            # ds.vec  <- dist.subspace(B.true, betas.vec[[1]], normalize = TRUE)
+            ds.vec <- NA
            ds.gmlm <- dist.subspace(betas.true, betas.gmlm, normalize = TRUE)   # equiv to R> dist.subspace(B.true, B.gmlm)
            ds.mani <- dist.subspace(betas.true, betas.mani, normalize = TRUE)

@ -76,9 +76,6 @@ for (pj in mode.dims) {
        sim$sample.size <- sample.size
        sim$pj <- pj

-        # boxplot(t(sim))
-        # summary(t(sim))
-
        # Append current simulation results to log-file
        write.table(sim, file = log.file, sep = ",",
            row.names = FALSE, col.names = (log.writes <- log.writes + 1L) < 2L
@ -97,41 +94,25 @@ close(log.file)
 # Read simulation data back in
 sim <- read.csv(log.name)

-# with(aggregate(sim, . ~ sample.size + pj, mean), {
-#     plot(range(pj), range(c(vec, gmlm, mani)), type = "n",
-#         main = "Simulation -- Efficiency Gain",
-#         xlab = expression(tilde(p)),
-#         ylab = expression(d(B, hat(B)))
-#     )
-#     for (sz in sort(unique(sample.size))) {
-#         i <- order(pj)[sample.size == sz]
-#         lines(pj[i], vec[i], type = "b", pch = 16, col = sz %/% 100, lty = 1)
-#         lines(pj[i], gmlm[i], type = "b", pch = 16, col = sz %/% 100, lty = 2)
-#         lines(pj[i], mani[i], type = "b", pch = 16, col = sz %/% 100, lty = 3)
-#     }
-#     sd <- aggregate(sim, . ~ sample.size + pj, sd)
-# })
-
 with(merge(
-    aggregate(sim[names(sim) != "lm"], . ~ sample.size + pj, mean),
-    aggregate(sim[names(sim) != "lm"], . ~ sample.size + pj, sd),
+    aggregate(sim, . ~ sample.size + pj, mean, na.rm = TRUE, na.action = na.pass),
+    aggregate(sim, . ~ sample.size + pj, sd, na.rm = TRUE, na.action = na.pass),
    by = c("sample.size", "pj"),
    suffixes = c("", ".sd"),
    all = FALSE
 ), {
-    plot(range(pj), range(c(vec, gmlm, mani)), type = "n",
+    plot(range(pj), 0:1, type = "n",
        main = "Simulation -- Efficiency Gain",
        xlab = expression(tilde(p)),
        ylab = expression(d(B, hat(B)))
    )
-    # colors <- c("#cf7d03ff", "#002d8d", "#006e18")
-    # col.idx <- 0L
-    lty.idx <- 0L
    for (sz in sort(unique(sample.size))) {
        i <- order(pj)[(sample.size == sz)[order(pj)]]
-        # polygon(c(pj[i], rev(pj[i])), c(lm[i] + lm.sd[i], rev(lm[i] - lm.sd[i])),
-        #     col = paste0("#cf7d03", "50"), border = NA
-        # )
+        i <- i[!(is.na(lm[i]) | is.na(lm.sd[i]))]
+        polygon(c(pj[i], rev(pj[i])), c(lm[i] + lm.sd[i], rev(lm[i] - lm.sd[i])),
+            col = paste0("#cf7d03", "50"), border = NA
+        )
+        i <- order(pj)[(sample.size == sz)[order(pj)]]
        polygon(c(pj[i], rev(pj[i])), c(vec[i] + vec.sd[i], rev(vec[i] - vec.sd[i])),
            col = paste0("#b30303", "50"), border = NA
        )
@ -145,38 +126,10 @@ with(merge(
    lty.idx <- 1L
    for (sz in sort(unique(sample.size))) {
        i <- order(pj)[(sample.size == sz)[order(pj)]]
-        # lines(pj[i], lm[i],   type = "b", pch = 16, col = "#cf7d03", lty = lty.idx, lwd = 2)
+        lines(pj[i], lm[i],   type = "b", pch = 16, col = "#cf7d03", lty = lty.idx, lwd = 2)
        lines(pj[i], vec[i],  type = "b", pch = 16, col = "#b30303", lty = lty.idx, lwd = 2)
        lines(pj[i], gmlm[i], type = "b", pch = 16, col = "#002d8d", lty = lty.idx, lwd = 2)
        lines(pj[i], mani[i], type = "b", pch = 16, col = "#006e18", lty = lty.idx, lwd = 2)
        lty.idx <- lty.idx + 1L
    }
 })
-
-
-
-# unname(lm.fit(t(`dim<-`(X, c(pj^2, sample.size))), drop(F))$coefficients)
-# unname(lm(drop(F) ~ t(`dim<-`(X, c(pj^2, sample.size))) - 1)$coefficients)
-
-
-
-
-# require(utils)
-
-# set.seed(129)
-
-# n <- 7 ; p <- 2
-# X <- matrix(rnorm(n * p), n, p) # no intercept!
-# y <- rnorm(n)
-# w <- rnorm(n)^2
-
-# str(lmw <- lm.wfit(x = X, y = y, w = w))
-
-# str(lm. <- lm.fit (x = X, y = y))
-
-
-# if(require("microbenchmark")) {
-#     mb <- microbenchmark(lm(y~X), lm.fit(X,y), .lm.fit(X,y))
-#     print(mb)
-#     boxplot(mb, notch=TRUE)
-# }