261 lines
8.8 KiB
R
261 lines
8.8 KiB
R
|
library(tensorPredictors)
|
||
|
|
||
|
# Data set sample size in every simulation
|
||
|
sample.size <- 500L
|
||
|
# Nr. of per simulation replications
|
||
|
reps <- 100L
|
||
|
# number of observation/response axes (order of the tensors)
|
||
|
orders <- c(2L, 3L, 4L)
|
||
|
# auto correlation coefficient for the mode-wise auto scatter matrices `Omegas`
|
||
|
rhos <- seq(0, 0.8, by = 0.1)
|
||
|
|
||
|
|
||
|
setwd("~/Work/tensorPredictors/sim/")
|
||
|
base.name <- format(Sys.time(), "failure_of_tsir-%Y%m%dT%H%M")
|
||
|
|
||
|
# data sampling routine
|
||
|
sample.data <- function(sample.size, betas, Omegas) {
|
||
|
dimF <- mapply(ncol, betas)
|
||
|
|
||
|
# responce is a standard normal variable
|
||
|
y <- rnorm(sample.size)
|
||
|
y.pow <- Reduce(function(a, b) outer(a, b, `+`), Map(seq, 0L, len = dimF))
|
||
|
F <- t(outer(y, as.vector(y.pow), `^`))
|
||
|
dim(F) <- c(dimF, sample.size)
|
||
|
|
||
|
# sample predictors from tensor normal X | Y = y (last axis is sample axis)
|
||
|
sample.axis <- length(betas) + 1L
|
||
|
Sigmas <- Map(solve, Omegas)
|
||
|
mu_y <- mlm(F, Map(`%*%`, Sigmas, betas))
|
||
|
X <- mu_y + rtensornorm(sample.size, 0, Sigmas, sample.axis)
|
||
|
|
||
|
list(X = X, F = F, y = y, sample.axis = sample.axis)
|
||
|
}
|
||
|
|
||
|
# Create a CSV logger to write simulation results to
|
||
|
log.file <- paste(base.name, "csv", sep = ".")
|
||
|
logger <- CSV.logger(
|
||
|
file.name = log.file,
|
||
|
header = c("rho", "order", "sample.size", "rep", "beta.version", outer(
|
||
|
"dist.subspace", c("gmlm", "tsir", "sir"),
|
||
|
paste, sep = "."
|
||
|
))
|
||
|
)
|
||
|
|
||
|
for (order in orders) {
|
||
|
# setup problem dimensions given the tensor order
|
||
|
dimX <- rep(2L, order)
|
||
|
dimF <- rep(2L, order)
|
||
|
|
||
|
# as well as the projections onto rank 1 matrices
|
||
|
proj.betas <- Map(.projRank, rep(1L, order))
|
||
|
|
||
|
for (rho in rhos) {
|
||
|
# Scatter matrices (Omegas) given as autoregression structure `AR(rho)`
|
||
|
Omegas <- Map(function(p) rho^abs(outer(1:p, 1:p, `-`)), dimX)
|
||
|
|
||
|
# Version 1 betas (reductions)
|
||
|
beta.version <- 1L
|
||
|
betas <- Map(function(nr, nc) {
|
||
|
`[<-`(matrix(0, nr, nc), 1, , 1)
|
||
|
}, dimX, dimF)
|
||
|
B.true <- Reduce(kronecker, rev(betas))
|
||
|
# `(B.min %*% vec(X)) | Y = y` proportional to `(1 + y)^order`)
|
||
|
B.min.true <- B.true[, 1L, drop = FALSE]
|
||
|
|
||
|
# Version 1: repeated simulations
|
||
|
for (rep in seq_len(reps)) {
|
||
|
# Sample training data
|
||
|
c(X, F, y, sample.axis) %<-% sample.data(sample.size, betas, Omegas)
|
||
|
|
||
|
# Fit models to provided data
|
||
|
fit.gmlm <- gmlm_tensor_normal(X, F, sample.axis = sample.axis, proj.betas = proj.betas)
|
||
|
fit.tsir <- TSIR(X, y, d = rep(1L, order), sample.axis = sample.axis)
|
||
|
fit.sir <- SIR(mat(X, sample.axis), y, d = 1L)
|
||
|
|
||
|
# Extract minimal reduction matrices from fitted models
|
||
|
B.gmlm <- qr.Q(qr(Reduce(kronecker, rev(fit.gmlm$betas))))[, 1L, drop = FALSE]
|
||
|
B.tsir <- Reduce(kronecker, rev(fit.tsir))
|
||
|
B.sir <- fit.sir
|
||
|
|
||
|
# Compute estimation to true minimal `B` distance
|
||
|
dist.subspace.gmlm <- dist.subspace(B.min.true, B.gmlm, normalize = TRUE)
|
||
|
dist.subspace.tsir <- dist.subspace(B.min.true, B.tsir, normalize = TRUE)
|
||
|
dist.subspace.sir <- dist.subspace(B.min.true, B.sir, normalize = TRUE)
|
||
|
|
||
|
# Write to simulation log file (CSV file)
|
||
|
logger()
|
||
|
|
||
|
# and print progress
|
||
|
cat(sprintf("order %d, rho %.2f, version %d: rep: %d/%d\n",
|
||
|
order, rho, beta.version, rep, reps))
|
||
|
}
|
||
|
|
||
|
|
||
|
# Version 2 betas (reductions)
|
||
|
beta.version <- 2L
|
||
|
betas <- Map(function(nr, nc) {
|
||
|
tcrossprod((-1)^seq_len(nr), (-1)^seq_len(nc))
|
||
|
}, dimX, dimF)
|
||
|
B.true <- Reduce(kronecker, rev(betas))
|
||
|
# `(B.min %*% vec(X)) | Y = y` proportional to `(1 - y)^order`)
|
||
|
B.min.true <- B.true[, 1L, drop = FALSE]
|
||
|
|
||
|
# Version 2: repeated simulations (identical to Version 1)
|
||
|
for (rep in seq_len(reps)) {
|
||
|
# Sample training data
|
||
|
c(X, F, y, sample.axis) %<-% sample.data(sample.size, betas, Omegas)
|
||
|
|
||
|
# Fit models to provided data
|
||
|
fit.gmlm <- gmlm_tensor_normal(X, F, sample.axis = sample.axis, proj.betas = proj.betas)
|
||
|
fit.tsir <- TSIR(X, y, d = rep(1L, order), sample.axis = sample.axis)
|
||
|
fit.sir <- SIR(mat(X, sample.axis), y, d = 1L)
|
||
|
|
||
|
# Extract minimal reduction matrices from fitted models
|
||
|
B.gmlm <- qr.Q(qr(Reduce(kronecker, rev(fit.gmlm$betas))))[, 1L, drop = FALSE]
|
||
|
B.tsir <- Reduce(kronecker, rev(fit.tsir))
|
||
|
B.sir <- fit.sir
|
||
|
|
||
|
# Compute estimation to true minimal `B` distance
|
||
|
dist.subspace.gmlm <- dist.subspace(B.min.true, B.gmlm, normalize = TRUE)
|
||
|
dist.subspace.tsir <- dist.subspace(B.min.true, B.tsir, normalize = TRUE)
|
||
|
dist.subspace.sir <- dist.subspace(B.min.true, B.sir, normalize = TRUE)
|
||
|
|
||
|
# Write to simulation log file (CSV file)
|
||
|
logger()
|
||
|
|
||
|
# and print progress
|
||
|
cat(sprintf("order %d, rho %.2f, version %d: rep: %d/%d\n",
|
||
|
order, rho, beta.version, rep, reps))
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
### read simulation results and generate plots
|
||
|
if (!interactive()) { pdf(file = paste(base.name, "pdf", sep = ".")) }
|
||
|
|
||
|
# Read simulation results back from log file
|
||
|
sim <- read.csv(log.file)
|
||
|
|
||
|
# Source utility function used in most simulations (extracted for convenience)
|
||
|
source("./sim_utils.R")
|
||
|
|
||
|
# aggregate results
|
||
|
grouping <- sim[c("rho", "order", "beta.version")]
|
||
|
sim.dist <- sim[startsWith(names(sim), "dist")]
|
||
|
aggr <- aggregate(sim.dist, grouping, mean)
|
||
|
|
||
|
# reset (possible lost) config
|
||
|
orders <- sort(unique(aggr$order))
|
||
|
rhos <- sort(unique(aggr$rho))
|
||
|
|
||
|
# new grouping for the aggregates
|
||
|
layout(rbind(
|
||
|
matrix(seq_len(2 * length(orders)), ncol = 2),
|
||
|
2 * length(orders) + 1
|
||
|
), heights = c(rep(6L, length(orders)), 1L))
|
||
|
|
||
|
for (group in split(aggr, aggr[c("order", "beta.version")])) {
|
||
|
order <- group$order[[1]]
|
||
|
beta.version <- group$beta.version[[1]]
|
||
|
|
||
|
rho <- group$rho
|
||
|
|
||
|
plot(range(rho), 0:1, main = sprintf("V%d, order %d", beta.version, order),
|
||
|
type = "n", bty = "n", axes = FALSE, xlab = expression(rho), ylab = "Subspace Distance")
|
||
|
axis(1, at = rho)
|
||
|
axis(2, at = seq(0, 1, by = 0.2))
|
||
|
with(group, {
|
||
|
lines(rho, dist.subspace.gmlm, col = col.methods["gmlm"], lwd = 3, type = "b", pch = 16)
|
||
|
lines(rho, dist.subspace.tsir, col = col.methods["tsir"], lwd = 2, type = "b", pch = 16)
|
||
|
lines(rho, dist.subspace.sir, col = col.methods["sir"], lwd = 2, type = "b", pch = 16)
|
||
|
})
|
||
|
if (order == 3L && beta.version == 2L) {
|
||
|
abline(v = 0.5, lty = "dotted", col = "black")
|
||
|
abline(h = group$dist.subspace.tsir[which(rho == 0.5)],
|
||
|
lty = "dotted", col = "black")
|
||
|
}
|
||
|
}
|
||
|
methods <- c("GMLM", "TSIR", "SIR")
|
||
|
restor.par <- par(
|
||
|
fig = c(0, 1, 0, 1),
|
||
|
oma = c(0, 0, 0, 0),
|
||
|
mar = c(1, 0, 0, 0),
|
||
|
new = TRUE
|
||
|
)
|
||
|
plot(0, type = "n", bty = "n", axes = FALSE, xlab = "", ylab = "")
|
||
|
legend("bottom", col = col.methods[tolower(methods)], legend = methods,
|
||
|
horiz = TRUE, lty = 1, bty = "n", lwd = c(3, 2, 2), pch = 16)
|
||
|
par(restor.par)
|
||
|
|
||
|
|
||
|
# new grouping for the aggregates
|
||
|
layout(rbind(
|
||
|
matrix(seq_len(2 * 3), ncol = 2),
|
||
|
2 * 3 + 1
|
||
|
), heights = c(rep(6L, 3), 1L))
|
||
|
|
||
|
for (group in split(aggr, aggr[c("rho", "beta.version")])) {
|
||
|
rho <- group$rho[[1]]
|
||
|
beta.version <- group$beta.version[[1]]
|
||
|
|
||
|
if (!(rho %in% c(0, .5, .8))) { next }
|
||
|
|
||
|
order <- group$order
|
||
|
|
||
|
plot(range(order), 0:1, main = sprintf("V%d, rho %.1f", beta.version, rho),
|
||
|
type = "n", bty = "n", axes = FALSE, xlab = expression(order), ylab = "Subspace Distance")
|
||
|
axis(1, at = order)
|
||
|
axis(2, at = seq(0, 1, by = 0.2))
|
||
|
with(group, {
|
||
|
lines(order, dist.subspace.gmlm, col = col.methods["gmlm"], lwd = 3, type = "b", pch = 16)
|
||
|
lines(order, dist.subspace.tsir, col = col.methods["tsir"], lwd = 2, type = "b", pch = 16)
|
||
|
lines(order, dist.subspace.sir, col = col.methods["sir"], lwd = 2, type = "b", pch = 16)
|
||
|
})
|
||
|
if (rho == 0.5 && beta.version == 2L) {
|
||
|
abline(v = 0.5, lty = "dotted", col = "black")
|
||
|
abline(h = group$dist.subspace.tsir[which(order == 3L)],
|
||
|
lty = "dotted", col = "black")
|
||
|
}
|
||
|
}
|
||
|
methods <- c("GMLM", "TSIR", "SIR")
|
||
|
restor.par <- par(
|
||
|
fig = c(0, 1, 0, 1),
|
||
|
oma = c(0, 0, 0, 0),
|
||
|
mar = c(1, 0, 0, 0),
|
||
|
new = TRUE
|
||
|
)
|
||
|
plot(0, type = "n", bty = "n", axes = FALSE, xlab = "", ylab = "")
|
||
|
legend("bottom", col = col.methods[tolower(methods)], legend = methods,
|
||
|
horiz = TRUE, lty = 1, bty = "n", lwd = c(3, 2, 2), pch = 16)
|
||
|
par(restor.par)
|
||
|
|
||
|
|
||
|
|
||
|
### Version 1
|
||
|
# 2'nd order
|
||
|
# 1 + 2 y + y^2
|
||
|
# (1 + y)^2
|
||
|
|
||
|
# 3'rd order
|
||
|
# 1 + 3 y + 3 y^2 + y^4
|
||
|
# (1 + y)^3
|
||
|
|
||
|
# 4'th order
|
||
|
# 1 + 4 y + 6 y^2 + 4 y^3 + y^4
|
||
|
# (1 + y)^4
|
||
|
|
||
|
|
||
|
### Version 2
|
||
|
# 2'nd order
|
||
|
# 1 - 2 y + y^2
|
||
|
# (1 - y)^2 = 1 - 2 y + y^2
|
||
|
|
||
|
# 3'rd order
|
||
|
# 1 - 3 y + 3 y^2 - y^3
|
||
|
# -(y - 1)^3
|
||
|
|
||
|
# 4'th order
|
||
|
# 1 - 4 y + 6 y^2 - 4 y^3 + y^4
|
||
|
# (y - 1)^4
|