library(tensorPredictors) # Data set sample size in every simulation sample.size <- 500L # Nr. of per simulation replications reps <- 100L # number of observation/response axes (order of the tensors) orders <- c(2L, 3L, 4L) # auto correlation coefficient for the mode-wise auto scatter matrices `Omegas` rhos <- seq(0, 0.8, by = 0.1) setwd("~/Work/tensorPredictors/sim/") base.name <- format(Sys.time(), "failure_of_tsir-%Y%m%dT%H%M") # data sampling routine sample.data <- function(sample.size, betas, Omegas) { dimF <- mapply(ncol, betas) # responce is a standard normal variable y <- rnorm(sample.size) y.pow <- Reduce(function(a, b) outer(a, b, `+`), Map(seq, 0L, len = dimF)) F <- t(outer(y, as.vector(y.pow), `^`)) dim(F) <- c(dimF, sample.size) # sample predictors from tensor normal X | Y = y (last axis is sample axis) sample.axis <- length(betas) + 1L Sigmas <- Map(solve, Omegas) mu_y <- mlm(F, Map(`%*%`, Sigmas, betas)) X <- mu_y + rtensornorm(sample.size, 0, Sigmas, sample.axis) list(X = X, F = F, y = y, sample.axis = sample.axis) } # Create a CSV logger to write simulation results to log.file <- paste(base.name, "csv", sep = ".") logger <- CSV.logger( file.name = log.file, header = c("rho", "order", "sample.size", "rep", "beta.version", outer( "dist.subspace", c("gmlm", "tsir", "sir"), paste, sep = "." )) ) for (order in orders) { # setup problem dimensions given the tensor order dimX <- rep(2L, order) dimF <- rep(2L, order) # as well as the projections onto rank 1 matrices proj.betas <- Map(.projRank, rep(1L, order)) for (rho in rhos) { # Scatter matrices (Omegas) given as autoregression structure `AR(rho)` Omegas <- Map(function(p) rho^abs(outer(1:p, 1:p, `-`)), dimX) # Version 1 betas (reductions) beta.version <- 1L betas <- Map(function(nr, nc) { `[<-`(matrix(0, nr, nc), 1, , 1) }, dimX, dimF) B.true <- Reduce(kronecker, rev(betas)) # `(B.min %*% vec(X)) | Y = y` proportional to `(1 + y)^order`) B.min.true <- B.true[, 1L, drop = FALSE] # Version 1: repeated simulations for (rep in seq_len(reps)) { # Sample training data c(X, F, y, sample.axis) %<-% sample.data(sample.size, betas, Omegas) # Fit models to provided data fit.gmlm <- gmlm_tensor_normal(X, F, sample.axis = sample.axis, proj.betas = proj.betas) fit.tsir <- TSIR(X, y, d = rep(1L, order), sample.axis = sample.axis) fit.sir <- SIR(mat(X, sample.axis), y, d = 1L) # Extract minimal reduction matrices from fitted models B.gmlm <- qr.Q(qr(Reduce(kronecker, rev(fit.gmlm$betas))))[, 1L, drop = FALSE] B.tsir <- Reduce(kronecker, rev(fit.tsir)) B.sir <- fit.sir # Compute estimation to true minimal `B` distance dist.subspace.gmlm <- dist.subspace(B.min.true, B.gmlm, normalize = TRUE) dist.subspace.tsir <- dist.subspace(B.min.true, B.tsir, normalize = TRUE) dist.subspace.sir <- dist.subspace(B.min.true, B.sir, normalize = TRUE) # Write to simulation log file (CSV file) logger() # and print progress cat(sprintf("order %d, rho %.2f, version %d: rep: %d/%d\n", order, rho, beta.version, rep, reps)) } # Version 2 betas (reductions) beta.version <- 2L betas <- Map(function(nr, nc) { tcrossprod((-1)^seq_len(nr), (-1)^seq_len(nc)) }, dimX, dimF) B.true <- Reduce(kronecker, rev(betas)) # `(B.min %*% vec(X)) | Y = y` proportional to `(1 - y)^order`) B.min.true <- B.true[, 1L, drop = FALSE] # Version 2: repeated simulations (identical to Version 1) for (rep in seq_len(reps)) { # Sample training data c(X, F, y, sample.axis) %<-% sample.data(sample.size, betas, Omegas) # Fit models to provided data fit.gmlm <- gmlm_tensor_normal(X, F, sample.axis = sample.axis, proj.betas = proj.betas) fit.tsir <- TSIR(X, y, d = rep(1L, order), sample.axis = sample.axis) fit.sir <- SIR(mat(X, sample.axis), y, d = 1L) # Extract minimal reduction matrices from fitted models B.gmlm <- qr.Q(qr(Reduce(kronecker, rev(fit.gmlm$betas))))[, 1L, drop = FALSE] B.tsir <- Reduce(kronecker, rev(fit.tsir)) B.sir <- fit.sir # Compute estimation to true minimal `B` distance dist.subspace.gmlm <- dist.subspace(B.min.true, B.gmlm, normalize = TRUE) dist.subspace.tsir <- dist.subspace(B.min.true, B.tsir, normalize = TRUE) dist.subspace.sir <- dist.subspace(B.min.true, B.sir, normalize = TRUE) # Write to simulation log file (CSV file) logger() # and print progress cat(sprintf("order %d, rho %.2f, version %d: rep: %d/%d\n", order, rho, beta.version, rep, reps)) } } } ### read simulation results and generate plots if (!interactive()) { pdf(file = paste(base.name, "pdf", sep = ".")) } # Read simulation results back from log file sim <- read.csv(log.file) # Source utility function used in most simulations (extracted for convenience) source("./sim_utils.R") # aggregate results grouping <- sim[c("rho", "order", "beta.version")] sim.dist <- sim[startsWith(names(sim), "dist")] aggr <- aggregate(sim.dist, grouping, mean) # reset (possible lost) config orders <- sort(unique(aggr$order)) rhos <- sort(unique(aggr$rho)) # new grouping for the aggregates layout(rbind( matrix(seq_len(2 * length(orders)), ncol = 2), 2 * length(orders) + 1 ), heights = c(rep(6L, length(orders)), 1L)) for (group in split(aggr, aggr[c("order", "beta.version")])) { order <- group$order[[1]] beta.version <- group$beta.version[[1]] rho <- group$rho plot(range(rho), 0:1, main = sprintf("V%d, order %d", beta.version, order), type = "n", bty = "n", axes = FALSE, xlab = expression(rho), ylab = "Subspace Distance") axis(1, at = rho) axis(2, at = seq(0, 1, by = 0.2)) with(group, { lines(rho, dist.subspace.gmlm, col = col.methods["gmlm"], lwd = 3, type = "b", pch = 16) lines(rho, dist.subspace.tsir, col = col.methods["tsir"], lwd = 2, type = "b", pch = 16) lines(rho, dist.subspace.sir, col = col.methods["sir"], lwd = 2, type = "b", pch = 16) }) if (order == 3L && beta.version == 2L) { abline(v = 0.5, lty = "dotted", col = "black") abline(h = group$dist.subspace.tsir[which(rho == 0.5)], lty = "dotted", col = "black") } } methods <- c("GMLM", "TSIR", "SIR") restor.par <- par( fig = c(0, 1, 0, 1), oma = c(0, 0, 0, 0), mar = c(1, 0, 0, 0), new = TRUE ) plot(0, type = "n", bty = "n", axes = FALSE, xlab = "", ylab = "") legend("bottom", col = col.methods[tolower(methods)], legend = methods, horiz = TRUE, lty = 1, bty = "n", lwd = c(3, 2, 2), pch = 16) par(restor.par) # new grouping for the aggregates layout(rbind( matrix(seq_len(2 * 3), ncol = 2), 2 * 3 + 1 ), heights = c(rep(6L, 3), 1L)) for (group in split(aggr, aggr[c("rho", "beta.version")])) { rho <- group$rho[[1]] beta.version <- group$beta.version[[1]] if (!(rho %in% c(0, .5, .8))) { next } order <- group$order plot(range(order), 0:1, main = sprintf("V%d, rho %.1f", beta.version, rho), type = "n", bty = "n", axes = FALSE, xlab = expression(order), ylab = "Subspace Distance") axis(1, at = order) axis(2, at = seq(0, 1, by = 0.2)) with(group, { lines(order, dist.subspace.gmlm, col = col.methods["gmlm"], lwd = 3, type = "b", pch = 16) lines(order, dist.subspace.tsir, col = col.methods["tsir"], lwd = 2, type = "b", pch = 16) lines(order, dist.subspace.sir, col = col.methods["sir"], lwd = 2, type = "b", pch = 16) }) if (rho == 0.5 && beta.version == 2L) { abline(v = 0.5, lty = "dotted", col = "black") abline(h = group$dist.subspace.tsir[which(order == 3L)], lty = "dotted", col = "black") } } methods <- c("GMLM", "TSIR", "SIR") restor.par <- par( fig = c(0, 1, 0, 1), oma = c(0, 0, 0, 0), mar = c(1, 0, 0, 0), new = TRUE ) plot(0, type = "n", bty = "n", axes = FALSE, xlab = "", ylab = "") legend("bottom", col = col.methods[tolower(methods)], legend = methods, horiz = TRUE, lty = 1, bty = "n", lwd = c(3, 2, 2), pch = 16) par(restor.par) ### Version 1 # 2'nd order # 1 + 2 y + y^2 # (1 + y)^2 # 3'rd order # 1 + 3 y + 3 y^2 + y^4 # (1 + y)^3 # 4'th order # 1 + 4 y + 6 y^2 + 4 y^3 + y^4 # (1 + y)^4 ### Version 2 # 2'nd order # 1 - 2 y + y^2 # (1 - y)^2 = 1 - 2 y + y^2 # 3'rd order # 1 - 3 y + 3 y^2 - y^3 # -(y - 1)^3 # 4'th order # 1 - 4 y + 6 y^2 - 4 y^3 + y^4 # (y - 1)^4