library(tensorPredictors)

set.seed(314159265, "Mersenne-Twister", "Inversion", "Rejection")

### simulation configuration
reps <- 100                     # number of simulation replications
max.iter <- 10000               # maximum number of iterations for GMLM
sample.sizes <- c(100, 200, 300, 500, 750)  # sample sizes `n`
N <- 2000                       # validation set size
p <- c(2, 3, 5)                 # preditor dimensions
q <- c(1, 2, 3)                 # functions of y dimensions (response dimensions)
r <- length(p)

# initial consistency checks
stopifnot(exprs = {
    r == length(p)
    r == length(q)
    all(outer(p, sample.sizes, `<`))
})

# projection matrix `P_A` as a projection onto the span of `A`
proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A)))

# setup model parameters
alphas <- Map(matrix, Map(rnorm, p * q), p)                     # reduction matrices
Omegas <- Map(function(pj) 0.5^abs(outer(1:pj, 1:pj, `-`)), p)  # mode scatter
eta1 <- 0                                                       # intercept

# data sampling routine
sample.data <- function(n, eta1, alphas, Omegas, sample.axis = r + 1L) {
    # generate response (sample axis is last axis)
    y <- sample.int(prod(q), n, replace = TRUE)                 # uniform samples
    Fy <- array(outer(seq_len(prod(q)), y, `==`), dim = c(q, n))
    Fy <- Fy - c(rowMeans(Fy, dims = r))

    # sample predictors as X | Y = y (sample axis is last axis)
    Deltas <- Map(solve, Omegas)                                # normal covariances
    mu_y <- mlm(mlm(Fy, alphas) + c(eta1), Deltas)              # conditional mean
    X <- mu_y + rtensornorm(n, 0, Deltas, r + 1L)               # responses X

    # permute axis to requested get the sample axis
    if (sample.axis != r + 1L) {
        perm <- integer(r + 1L)
        perm[sample.axis] <- r + 1L
        perm[-sample.axis] <- seq_len(r)
        X <- aperm(X, perm)
        Fy <- aperm(Fy, perm)
    }

    list(X = X, Fy = Fy, y = y, sample.axis = sample.axis)
}

### Logging Errors and Warnings
# Register a global warning and error handler for logging warnings/errors with
# current simulation repetition session informatin allowing to reproduce problems
exceptionLogger <- function(ex) {
    # retrieve current simulation repetition information
    rep.info <- get("rep.info", envir = .GlobalEnv)
    # setup an error log file with the same name as `file`
    log <- paste0(rep.info$file, ".log")
    # Write (append) condition message with reproduction info to the log
    cat("\n\n------------------------------------------------------------\n",
        sprintf("file <- \"%s\"\nn <- %d\nrep <- %d\n.Random.seed <- c(%s)\n%s\nTraceback:\n",
            rep.info$file, rep.info$n, rep.info$rep,
            paste(rep.info$.Random.seed, collapse = ","),
            as.character.error(ex)
    ), sep = "", file = log, append = TRUE)
    # add Traceback (see: `traceback()` which the following is addapted from)
    n <- length(x <- .traceback(NULL, max.lines = -1L))
    if (n == 0L) {
        cat("No traceback available", "\n", file = log, append = TRUE)
    } else {
        for (i in 1L:n) {
            xi <- x[[i]]
            label <- paste0(n - i + 1L, ": ")
            m <- length(xi)
            srcloc <- if (!is.null(srcref <- attr(xi, "srcref"))) {
                srcfile <- attr(srcref, "srcfile")
                paste0(" at ", basename(srcfile$filename), "#", srcref[1L])
            }
            if (isTRUE(attr(xi, "truncated"))) {
                xi <- c(xi, " ...")
                m <- length(xi)
            }
            if (!is.null(srcloc)) {
                xi[m] <- paste0(xi[m], srcloc)
            }
            if (m > 1) {
                label <- c(label, rep(substr("          ", 1L,
                  nchar(label, type = "w")), m - 1L))
            }
            cat(paste0(label, xi), sep = "\n", file = log, append = TRUE)
        }
    }
}
globalCallingHandlers(list(
    message = exceptionLogger, warning = exceptionLogger, error = exceptionLogger
))


### for every sample size
start <- format(Sys.time(), "%Y%m%dT%H%M")
for (n in sample.sizes) {
    ### write new simulation result file
    file <- paste0(paste("sim-normal", start, n, sep = "-"), ".csv")
    # CSV header, used to ensure correct value/column mapping when writing to file
    header <- outer(
        c("dist.subspace", "dist.projection", "error.pred"),  # measures
        c("gmlm", "pca", "hopca", "tsir"),                    # methods
        paste, sep = ".")
    cat(paste0(header, collapse = ","), "\n", sep = "", file = file)

    ### repeated simulation
    for (rep in seq_len(reps)) {
        ### Repetition session state info
        # Stores specific session variables before starting the current
        # simulation replication. This allows to log state information which
        # can be used to replicate a specific simulation repetition in case of
        # errors/warnings from the logs
        rep.info <- list(n = n, rep = rep, file = file, .Random.seed = .Random.seed)

        ### sample (training) data
        c(X, Fy, y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas)

        ### Fit data using different methods
        fit.gmlm <- GMLM.default(X, Fy, sample.axis = sample.axis, max.iter = max.iter)
        fit.hopca <- HOPCA(X, npc = q, sample.axis = sample.axis)
        fit.pca <- prcomp(mat(X, sample.axis), rank. = prod(q))
        fit.tsir <- TSIR(X, y, q, sample.axis = sample.axis)

        ### Compute Reductions `B.*` where `B.*` spans the reduction subspaces
        B.true <- Reduce(`%x%`, rev(Map(`%*%`, Omegas, alphas)))
        B.gmlm <- with(fit.gmlm, Reduce(`%x%`, rev(Map(`%*%`, Omegas, alphas))))
        B.hopca <- Reduce(`%x%`, rev(fit.hopca))
        B.pca <- fit.pca$rotation
        B.tsir <- Reduce(`%x%`, rev(fit.tsir))

        # Subspace Distances: Normalized `|| P_A - P_B ||_F` where
        #   `P_A = A (A' A)^-1/2 A'` and the normalization means that with
        #   respect to the dimensions of `A, B` the subspace distance is in the
        #   range `[0, 1]`.
        dist.subspace.gmlm  <- dist.subspace(B.true, B.gmlm,  normalize = TRUE)
        dist.subspace.hopca <- dist.subspace(B.true, B.hopca, normalize = TRUE)
        dist.subspace.pca   <- dist.subspace(B.true, B.pca,   normalize = TRUE)
        dist.subspace.tsir  <- dist.subspace(B.true, B.tsir,  normalize = TRUE)

        # Projection Distances: Spectral norm (2-norm) `|| P_A - P_B ||_2`.
        dist.projection.gmlm  <- dist.projection(B.true, B.gmlm)
        dist.projection.hopca <- dist.projection(B.true, B.hopca)
        dist.projection.pca   <- dist.projection(B.true, B.pca)
        dist.projection.tsir  <- dist.projection(B.true, B.tsir)

        ### Prediction Errors: (using new independend sample of size `N`)
        c(X, Fy, y, sample.axis) %<-% sample.data(N, eta1, alphas, Omegas)
        # centered model matrix of vectorized `X`s
        vecX <- scale(mat(X, sample.axis), center = TRUE, scale = FALSE)
        P.true <- proj(B.true)
        error.pred.gmlm  <- norm(P.true - proj(B.gmlm),  "2")
        error.pred.hopca <- norm(P.true - proj(B.hopca), "2")
        error.pred.pca   <- norm(P.true - proj(B.pca),   "2")
        error.pred.tsir  <- norm(P.true - proj(B.tsir),  "2")

        # format estimation/prediction errors and write to file and console
        line <- paste0(Map(get, header), collapse = ",")
        cat(line, "\n", sep = "", file = file, append = TRUE)
        # report progress
        cat(sprintf("sample size: %d/%d - rep: %d/%d\n",
            which(n == sample.sizes), length(sample.sizes), rep, reps))
    }
}