tensor_predictors/simulations/kpir_sim.R

library(tensorPredictors)
library(dplyr)
library(ggplot2)

###          Exec all methods for a given data set and collect logs          ###
sim <- function(X, Fy, shape, alpha.true, beta.true, max.iter = 500L) {

    # Logger creator
    logger <- function(name) {
        eval(substitute(function(iter, loss, alpha, beta, ...) {
            hist[iter + 1L, ] <<- c(
                iter = iter,
                loss = loss,
                dist = (dist <- dist.subspace(c(kronecker(alpha.true, beta.true)),
                                              c(kronecker(alpha, beta)))),
                dist.alpha = (dist.alpha <- dist.subspace(c(alpha.true), c(alpha))),
                dist.beta  = (dist.beta  <- dist.subspace(c( beta.true), c(beta ))),
                norm.alpha = norm(alpha, "F"),
                norm.beta  = norm(beta, "F")
            )

           cat(sprintf(
                "%s(%3d) | l = %-12.4f - dist = %-.4e - alpha(%d, %d) = %-.4e - beta(%d, %d) = %-.4e\n",
                name, iter, loss, dist,
                nrow(alpha), ncol(alpha), dist.alpha,
                nrow(beta),  ncol(beta),  dist.beta
            ))
        }, list(hist = as.symbol(paste0("hist.", name)))))
    }

    # Initialize logger history targets
    hist.base <- hist.new <- hist.momentum <- hist.approx <- # hist.kron <-
        data.frame(iter = seq(0L, max.iter),
            loss = NA, dist = NA, dist.alpha = NA, dist.beta = NA,
            norm.alpha = NA, norm.beta = NA
        )

    # Base (old)
    kpir.base(X, Fy, shape, max.iter = max.iter, logger = logger("base"))

    # New (simple Gradient Descent)
    kpir.new(X, Fy, shape, max.iter = max.iter, logger = logger("new"))

    # Gradient Descent with Nesterov Momentum
    kpir.momentum(X, Fy, shape, max.iter = max.iter, logger = logger("momentum"))

    # # Residual Covariance Kronecker product assumpton version
    # kpir.kron(X, Fy, shape, max.iter = max.iter, logger = logger("kron"))

    # Approximated MLE with Nesterov Momentum
    kpir.approx(X, Fy, shape, max.iter = max.iter, logger = logger("approx"))

    # Add method tags
    hist.base$method     <- factor("base")
    hist.new$method      <- factor("new")
    hist.momentum$method <- factor("momentum")
    # hist.kron$method     <- factor("kron")
    hist.approx$method   <- factor("approx")

    # Combine results and return
    rbind(hist.base, hist.new, hist.momentum, hist.approx) #, hist.kron
}

## Plot helper functions
plot.hist <- function(hist, response, ...) {
    ggplot(hist, aes(x = iter, color = method, group = interaction(method, repetition))) +
        geom_line(aes_(y = as.name(response)), na.rm = TRUE) +
        geom_point(data = with(sub <- subset(hist, !is.na(as.symbol(response))),
            aggregate(sub, list(method, repetition), tail, 1)
        ), aes_(y = as.name(response))) +
        labs(...) +
        theme(legend.position = "bottom")
}
plot.stats <- function(hist, response, ..., title = "Stats") {
    ggplot(hist, aes_(x = quote(iter), y = as.name(response),
                      color = quote(method), group = quote(method))) +
        geom_ribbon(aes(color = NULL, fill = method), alpha = 0.2,
                    stat = "summary", fun.min = "min", fun.max = "max", na.rm = TRUE) +
        geom_ribbon(aes(color = NULL, fill = method), alpha = 0.4,
                    stat = "summary", na.rm = TRUE,
                    fun.min = function(y) quantile(y, 0.25),
                    fun.max = function(y) quantile(y, 0.75)) +
        geom_line(stat = "summary", fun = "mean", na.rm = TRUE) +
        labs(title = title, ...) +
        theme(legend.position = "bottom")
}
plot.mean <- function(hist, response, ..., title = "Mean") {
    ggplot(hist, aes_(x = quote(iter), y = as.name(response),
                      color = quote(method), group = quote(method))) +
        geom_line(stat = "summary", fun = "mean", na.rm = TRUE) +
        labs(title = title, ...) +
        theme(legend.position = "bottom")
}
plot.median <- function(hist, response, ..., title = "Median") {
    ggplot(hist, aes_(x = quote(iter), y = as.name(response),
                      color = quote(method), group = quote(method))) +
        geom_line(stat = "summary", fun = "median", na.rm = TRUE) +
        labs(title = title, ...) +
        theme(legend.position = "bottom")
}

## Generate some test data / DEBUG
n <- 200    # Sample Size
p <- sample(1:15, 1) # 11
q <- sample(1:15, 1) # 3
k <- sample(1:15, 1) # 7
r <- sample(1:15, 1) # 5
print(c(n, p, q, k, r))

hist <- NULL
reps <- 20
for (rep in 1:reps) {
    cat(sprintf("%4d / %d simulation rep. started\n", rep, reps))

    alpha.true <- alpha <- matrix(rnorm(q * r), q, r)
    beta.true <- beta <- matrix(rnorm(p * k), p, k)
    y <- rnorm(n)
    Fy <- do.call(cbind, Map(function(slope, offset) {
            sin(slope * y + offset)
        },
        head(rep(seq(1, ceiling(0.5 * k * r)), each = 2), k * r),
        head(rep(c(0, pi / 2), ceiling(0.5 * k * r)), k * r)
    ))
    Delta <- 0.5^abs(outer(seq_len(p * q), seq_len(p * q), `-`))
    X <- tcrossprod(Fy, kronecker(alpha, beta)) + CVarE:::rmvnorm(n, sigma = Delta)

    hist.sim <- sim(X, Fy, shape = c(p, q, k, r), alpha.true, beta.true)
    hist.sim$repetition <- rep

    hist <- rbind(hist, hist.sim)
}

# Save simulation results
datetime <- format(Sys.time(), "%Y%m%dT%H%M")
saveRDS(hist, file = sprintf("AR_%s.rds", datetime))

# for GGPlot2, as factors for grouping
hist$repetition <- factor(hist$repetition)

plot.hist(hist, "loss")
dev.print(png, file = sprintf("sim01_loss_%s.png", datetime), width = 768, height = 768, res = 125)
plot.stats(hist, "loss")
dev.print(png, file = sprintf("sim01_loss_stats_%s.png", datetime), width = 768, height = 768, res = 125)

plot.hist(hist, "dist")
dev.print(png, file = sprintf("sim01_dist_%s.png", datetime), width = 768, height = 768, res = 125)
plot.stats(hist, "dist")
dev.print(png, file = sprintf("sim01_dist_stats_%s.png", datetime), width = 768, height = 768, res = 125)

plot.hist(hist, "dist.alpha")
dev.print(png, file = sprintf("sim01_dist_alpha_%s.png", datetime), width = 768, height = 768, res = 125)
plot.stats(hist, "dist.alpha")
dev.print(png, file = sprintf("sim01_dist_alpha_stats_%s.png", datetime), width = 768, height = 768, res = 125)

plot.hist(hist, "dist.beta")
dev.print(png, file = sprintf("sim01_dist_beta_%s.png", datetime), width = 768, height = 768, res = 125)
plot.stats(hist, "dist.beta")
dev.print(png, file = sprintf("sim01_dist_beta_stats_%s.png", datetime), width = 768, height = 768, res = 125)

plot.hist(hist, "norm.alpha")
dev.print(png, file = sprintf("sim01_norm_alpha_%s.png", datetime), width = 768, height = 768, res = 125)
plot.stats(hist, "norm.alpha")
dev.print(png, file = sprintf("sim01_norm_alpha_stats_%s.png", datetime), width = 768, height = 768, res = 125)

plot.hist(hist, "norm.beta")
dev.print(png, file = sprintf("sim01_norm_beta_%s.png", datetime), width = 768, height = 768, res = 125)
plot.stats(hist, "norm.beta")
dev.print(png, file = sprintf("sim01_norm_beta_stats_%s.png", datetime), width = 768, height = 768, res = 125)


n <- 200    # Sample Size
p <- 11 # sample(1:15, 1)
q <- 3  # sample(1:15, 1)
k <- 7  # sample(1:15, 1)
r <- 5  # sample(1:15, 1)
print(c(n, p, q, k, r))

hist <- NULL
reps <- 20

Delta.1 <- sqrt(0.5)^abs(outer(seq_len(q), seq_len(q), `-`))
Delta.2 <- sqrt(0.5)^abs(outer(seq_len(p), seq_len(p), `-`))
Delta <- kronecker(Delta.1, Delta.2)
for (rep in 1:reps) {
    cat(sprintf("%4d / %d simulation rep. started\n", rep, reps))

    alpha.true <- alpha <- matrix(rnorm(q * r), q, r)
    beta.true <- beta <- matrix(rnorm(p * k), p, k)
    y <- rnorm(n)
    Fy <- do.call(cbind, Map(function(slope, offset) {
            sin(slope * y + offset)
        },
        head(rep(seq(1, ceiling(0.5 * k * r)), each = 2), k * r),
        head(rep(c(0, pi / 2), ceiling(0.5 * k * r)), k * r)
    ))
    X <- tcrossprod(Fy, kronecker(alpha, beta)) + CVarE:::rmvnorm(n, sigma = Delta)

    hist.sim <- sim(X, Fy, shape = c(p, q, k, r), alpha.true, beta.true)
    hist.sim$repetition <- rep

    hist <- rbind(hist, hist.sim)
}


# Save simulation results
datetime <- format(Sys.time(), "%Y%m%dT%H%M")
saveRDS(hist, file = sprintf("sim02_%s.rds", datetime))

# for GGPlot2, as factors for grouping
hist$repetition <- factor(hist$repetition)

plot.hist(hist, "loss")
dev.print(png, file = sprintf("sim02_loss_%s.png", datetime), width = 768, height = 768, res = 125)
plot.stats(hist, "loss")
dev.print(png, file = sprintf("sim02_loss_stats_%s.png", datetime), width = 768, height = 768, res = 125)

plot.hist(hist, "dist")
dev.print(png, file = sprintf("sim02_dist_%s.png", datetime), width = 768, height = 768, res = 125)
plot.stats(hist, "dist")
dev.print(png, file = sprintf("sim02_dist_stats_%s.png", datetime), width = 768, height = 768, res = 125)
plot.mean(hist, "dist")
plot.median(hist, "dist")

plot.hist(hist, "dist.alpha")
dev.print(png, file = sprintf("sim02_dist_alpha_%s.png", datetime), width = 768, height = 768, res = 125)
plot.stats(hist, "dist.alpha")
dev.print(png, file = sprintf("sim02_dist_alpha_stats_%s.png", datetime), width = 768, height = 768, res = 125)
plot.mean(hist, "dist.alpha")
plot.median(hist, "dist.alpha")

plot.hist(hist, "dist.beta")
dev.print(png, file = sprintf("sim02_dist_beta_%s.png", datetime), width = 768, height = 768, res = 125)
plot.stats(hist, "dist.beta")
dev.print(png, file = sprintf("sim02_dist_beta_stats_%s.png", datetime), width = 768, height = 768, res = 125)
plot.mean(hist, "dist.beta")
plot.median(hist, "dist.beta")

plot.hist(hist, "norm.alpha")
dev.print(png, file = sprintf("sim02_norm_alpha_%s.png", datetime), width = 768, height = 768, res = 125)
plot.stats(hist, "norm.alpha")
dev.print(png, file = sprintf("sim02_norm_alpha_stats_%s.png", datetime), width = 768, height = 768, res = 125)

plot.hist(hist, "norm.beta")
dev.print(png, file = sprintf("sim02_norm_beta_%s.png", datetime), width = 768, height = 768, res = 125)
plot.stats(hist, "norm.beta")
dev.print(png, file = sprintf("sim02_norm_beta_stats_%s.png", datetime), width = 768, height = 768, res = 125)

plot.hist2 <- function(hist, response, type = "all", ...) {
    # Extract final results from history
    sub <- na.omit(hist[c("iter", response, "method", "repetition")])
    sub <- aggregate(sub, list(sub$method, sub$repetition), tail, 1)

    # Setup ggplot
    p <- ggplot(hist, aes_(x = quote(iter),
                           y = as.name(response),
                           color = quote(method),
                           group = quote(interaction(method, repetition))))
    # Add requested layers
    if (type == "all") {
        p <- p + geom_line(na.rm = TRUE)
        p <- p + geom_point(data = sub)
    } else if (type == "mean") {
        p <- p + geom_line(alpha = 0.5, na.rm = TRUE, linetype = "dotted")
        p <- p + geom_point(data = sub, alpha = 0.5)
        p <- p + geom_line(aes(group = method),
                           stat = "summary", fun = "mean", na.rm = TRUE)
    } else if (type == "median") {
        p <- p + geom_line(alpha = 0.5, na.rm = TRUE, linetype = "dotted")
        p <- p + geom_point(data = sub, alpha = 0.5)
        p <- p + geom_line(aes(group = method),
                           stat = "summary", fun = "median", na.rm = TRUE)
    }
    # return with theme and annotations
    p + labs(...) + theme(legend.position = "bottom")
}

plot.hist2(hist, "dist.alpha", "all", title = "all") + coord_trans(x = "log1p")
plot.hist2(hist, "dist.alpha", "mean", title = "mean") + coord_trans(x = "log1p")
plot.hist2(hist, "dist.alpha", "median", title = "median") + coord_trans(x = "log1p")


################################################################################
###                                   EEG                                    ###
################################################################################

suppressPackageStartupMessages({
    library(pROC)
})

# acc: Accuracy. P(Yhat = Y). Estimated as: (TP+TN)/(P+N).
acc <- function(y_true, y_pred) mean(round(y_pred) == y_true)
# err: Error rate. P(Yhat != Y). Estimated as: (FP+FN)/(P+N).
err <- function(y_true, y_pred) mean(round(y_pred) != y_true)
# fpr: False positive rate. P(Yhat = + | Y = -). aliases: Fallout.
fpr <- function(y_true, y_pred) mean((round(y_pred) == 1)[y_true == 0])
# tpr: True positive rate.  P(Yhat = + | Y = +). aliases: Sensitivity, Recall.
tpr <- function(y_true, y_pred) mean((round(y_pred) == 1)[y_true == 1])
# fnr: False negative rate. P(Yhat = - | Y = +). aliases: Miss.
fnr <- function(y_true, y_pred) mean((round(y_pred) == 0)[y_true == 1])
# tnr: True negative rate.  P(Yhat = - | Y = -).
tnr <- function(y_true, y_pred) mean((round(y_pred) == 0)[y_true == 0])

# Load EEG dataset
dataset <- readRDS('eeg_analysis/eeg_data.rds')

eeg_cross_validation <- function(nrFolds = 10L) {
    # Set dimenional parameters.
    n <- nrow(dataset)  # sample size (nr. of people)
    p <- 64L            # nr. of predictors (count of sensorce)
    t <- 256L           # nr. of time points (measurements)

    # Extract dimension names from X.
    nNames <- dataset$PersonID
    tNames <- as.character(seq(t))
    pNames <- unlist(strsplit(colnames(dataset)[2 + t * seq(p)], '_'))[c(T, F)]

    # Split into X-y.
    X <- as.matrix(dataset[, -(1:2)])
    y <- dataset$Case_Control
    # Reshape X as 3D tenros of shape (n, t, p) aka. samples, timesteps, predictors.
    # (Each of the n rows in X iterate over the time bevore switching sensorce.)
    dim(X) <- c(n, t, p)
    dimnames(X) <- list(nNames, tNames, pNames)

    # Setup Cross-Validation result
    CV <- data.frame(
        fold = (seq_len(n) %% nrFolds) + 1L,
        y_true = y,
        y_pred = NA
    )

    #

}

#' @param ppc Number of "p"redictor "p"rincipal "c"omponents.
#' @param tpc Number of "t"ime "p"rincipal "c"omponents.
egg_analysis_reduced <- function(methods, ppc, tpc) {
    # Set dimenional parameters.
    n <- nrow(dataset)  # sample size (nr. of people)
    p <- 64L            # nr. of predictors (count of sensorce)
    t <- 256L           # nr. of time points (measurements)

    # Extract dimension names from X.
    nNames <- dataset$PersonID
    tNames <- as.character(seq(t))
    pNames <- unlist(strsplit(colnames(dataset)[2 + t * seq(p)], '_'))[c(T, F)]

    # Split into X-y.
    X <- as.matrix(dataset[, -(1:2)])
    y <- dataset$Case_Control
    # Reshape X as 3D tenros of shape (n, t, p) aka. samples, timesteps, predictors.
    # (Each of the n rows in X iterate over the time bevore switching sensorce.)
    X <- array(X, dim = c(n, t, p),
                dimnames = list(nNames, tNames, pNames))
    # Reorder axis to (p, t, n) = (predictors, timesteps, samples).
    X <- aperm(X, c(3, 2, 1))

    # Compute Mean of X.
    X_mean <- apply(X, c(1, 2), mean)
    X_center <- X - as.vector(X_mean)

    # Compute "left" and "right" cov-matrices.
    Sigma_t <- matrix(apply(apply(X_center, 3,  crossprod), 1, mean), t, t)
    Sigma_p <- matrix(apply(apply(X_center, 3, tcrossprod), 1, mean), p, p)
    # Get "left", "right" principal components.
    V_p <- svd(Sigma_p, ppc, 0L)$u
    V_t <- svd(Sigma_t, tpc, 0L)$u

    # Reduce dimension.
    X_reduced <- apply(X_center, 3, function(x) crossprod(V_p, x %*% V_t))
    dim(X_reduced) <- c(ppc, tpc, n)

    # Vectorize to shape of (predictors * timesteps, samples) and transpose to
    # (samples, predictors * timesteps).
    X_vec <- t(matrix(X_reduced, ppc * tpc, n))

    loo.cv <- expand.grid(method = names(methods), fold = 1:n)
    loo.cv$y_true <- y[loo.cv$fold]
    loo.cv$y_pred <- NA

    # Performe LOO cross-validation for each method.
    for (i in 1L:n) {
        # Print progress.
        cat(sprintf("\rCross-Validation (p-PC: %d, t-PC: %d): %4d/%d",
                    ppc, tpc, i, n))
        # Leave Out the i-th element.
        X_train <- X_vec[-i, ]
        X_test <- X_vec[i, ]
        y_train <- y[-i]
        # Center y.
        y_train <- scale(y_train, center = TRUE, scale = FALSE)

        # For each method.
        for (method.name in names(methods)) {
            method <- methods[[method.name]]
            # Compute reduction using current method under common API.
            sdr <- method(X_train, y_train, ppc, tpc)
            B <- kronecker(sdr$alpha, sdr$beta)
            # Fit a linear model (which ensures a common sdr direction if possible).
            model <- glm(y ~ x, family = binomial(link = "logit"),
                        data = data.frame(y = y[-i], x = X_train %*% B))
            # Predict out of sample and store in LOO CV data.frame.
            y_pred <- predict(model, data.frame(x = X_test %*% B), type = "response")
            loo.cv[loo.cv$method == method.name & loo.cv$fold == i, 'y_pred'] <- y_pred
        }
    }

    for (method.name in names(methods)) {
        labels <- loo.cv[loo.cv$method == method.name, 'y_true']
        predictions <- loo.cv[loo.cv$method == method.name, 'y_pred']
        ROC <- roc(unlist(labels), unlist(predictions), quiet = TRUE)
        # Combined accuracy, error, ...
        cat("\nMethod: ", method.name, "\n",
            "acc: ", acc(unlist(labels), unlist(predictions)), "\n",
            "err: ", err(unlist(labels), unlist(predictions)), "\n",
            "fpr: ", fpr(unlist(labels), unlist(predictions)), "\n",
            "tpr: ", tpr(unlist(labels), unlist(predictions)), "\n",
            "fnr: ", fnr(unlist(labels), unlist(predictions)), "\n",
            "tnr: ", tnr(unlist(labels), unlist(predictions)), "\n",
            "auc: ", ROC$auc, "\n",
            "auc sd: ", sqrt(var(ROC)), "\n",
            sep = '')
    }

    loo.cv
}

methods <- list(
    KPIR_LS  = function(...) tensor_predictor(..., method = "KPIR_LS"),
    KPIR_MLE = function(...) tensor_predictor(..., method = "KPIR_MLE"),
    KPFC1    = function(...) tensor_predictor(..., method = "KPFC1"),
    KPFC2    = function(...) tensor_predictor(..., method = "KPFC2"),
    LSIR     = LSIR
)

#                 ppc, tpc
#              ------------
params <- list( c(  4,   3)
              , c( 15,  15)
              , c( 30,  20)
)

for (param in params) {
    c(ppc, tpc) %<-% param
    sim <- egg_analysis_reduced(methods, ppc, tpc)

    attr(sim, 'param') <- c(ppc = ppc, tpc = tpc)

    saveRDS(sim, file = sprintf('eeg_analysis_reduced_%d_%d.rds', ppc, tpc))
}


# plot.hist(hist, "loss",
#     title = bquote(paste("Optimization Objective: negative log-likelihood ",
#         l(hat(alpha), hat(beta)))),
#     subtitle = bquote(paste(Delta[i][j] == 0.25, " * ", 0.5^abs(i - j), ", ",
#         "20 repetitions, ", n == .(n), ", ",
#         p == .(p), ", ", q == .(q), ", ", k == .(k), ", ", r == .(r))),
#     x = "nr. of iterations",
#     y = expression(l(hat(alpha), hat(beta)))
# )
# plot.stats(hist, "loss",
#     title = bquote(paste("Optimization Objective: negative log-likelihood ",
#         l(hat(alpha), hat(beta)))),
#     subtitle = bquote(paste(Delta[i][j] == 0.25, " * ", 0.5^abs(i - j), ", ",
#         "20 repetitions, ", n == .(n), ", ",
#         p == .(p), ", ", q == .(q), ", ", k == .(k), ", ", r == .(r))),
#     x = "nr. of iterations",
#     y = expression(l(hat(alpha), hat(beta)))
# )


# dev.print(png, file = sprintf("sim01_loss_stat_%s.png", datetime),
#           width = 768, height = 768, res = 125)


# ggplot(hist, aes(x = iter, color = method, group = interaction(method, repetition))) +
#     geom_line(aes(y = dist)) +
#     geom_point(data = with(sub <- subset(hist, !is.na(dist)),
#         aggregate(sub, list(method, repetition), tail, 1)
#     ), aes(y = dist)) +
#     labs(
#         title = bquote(paste("Distance of estimate ", hat(B), " to true ", B == alpha %*% beta)),
#         subtitle = bquote(paste(Delta[i][j] == 0.25, " * ", 0.5^abs(i - j), ", ",
#             "20 repetitions, ", n == .(n), ", ",
#             p == .(p), ", ", q == .(q), ", ", k == .(k), ", ", r == .(r))),
#         x = "nr. of iterations",
#         y = expression(abs(B * B^T - hat(B) * hat(B)^T)),
#         color = "method"
#     ) +
#     theme(legend.position = "bottom")

# dev.print(png, file = sprintf("sim01_dist_%s.png", datetime),
#           width = 768, height = 768, res = 125)

# ggplot(hist, aes(x = iter, y = dist, color = method, group = method)) +
#     geom_ribbon(aes(color = NULL, fill = method), alpha = 0.2,
#                 stat = "summary", fun.min = "min", fun.max = "max", na.rm = TRUE) +
#     geom_ribbon(aes(color = NULL, fill = method), alpha = 0.4,
#                 stat = "summary", fun.min = function(y) quantile(y, 0.25),
#                                   fun.max = function(y) quantile(y, 0.75), na.rm = TRUE) +
#     geom_line(stat = "summary", fun = "mean", na.rm = TRUE) +
#     labs(
#         title = bquote(paste("Distance of estimate ", hat(B), " to true ", B == alpha %*% beta)),
#         subtitle = bquote(paste(Delta[i][j] == 0.25, " * ", 0.5^abs(i - j), ", ",
#             "20 repetitions, ", n == .(n), ", ",
#             p == .(p), ", ", q == .(q), ", ", k == .(k), ", ", r == .(r))),
#         x = "nr. of iterations",
#         y = expression(abs(B * B^T - hat(B) * hat(B)^T)),
#         color = "method"
#     ) +
#     theme(legend.position = "bottom")

# dev.print(png, file = sprintf("sim01_dist_stat_%s.png", datetime),
#           width = 768, height = 768, res = 125)


# ggplot(hist, aes(x = iter, color = method, group = interaction(method, repetition))) +
#     geom_line(aes(y = dist.alpha)) +
#     geom_point(data = with(sub <- subset(hist, !is.na(dist.alpha)),
#         aggregate(sub, list(method, repetition), tail, 1)
#     ), aes(y = dist.alpha)) +
#     labs(
#         title = bquote(paste("Distance of estimate ", hat(alpha), " to true ", alpha)),
#         subtitle = bquote(paste(Delta[i][j] == 0.25, " * ", 0.5^abs(i - j), ", ",
#             "20 repetitions, ", n == .(n), ", ",
#             p == .(p), ", ", q == .(q), ", ", k == .(k), ", ", r == .(r))),
#         x = "nr. of iterations",
#         y = expression(abs(alpha * alpha^T - hat(alpha) * hat(alpha)^T)),
#         color = "method"
#     ) +
#     theme(legend.position = "bottom")

# dev.print(png, file = sprintf("sim01_dist_alpha_%s.png", datetime),
#           width = 768, height = 768, res = 125)


# ggplot(hist, aes(x = iter, color = method, group = interaction(method, repetition))) +
#     geom_line(aes(y = dist.beta)) +
#     geom_point(data = with(sub <- subset(hist, !is.na(dist.beta)),
#         aggregate(sub, list(method, repetition), tail, 1)
#     ), aes(y = dist.beta)) +
#     labs(
#         title = bquote(paste("Distance of estimate ", hat(beta), " to true ", beta)),
#         subtitle = bquote(paste(Delta[i][j] == 0.25, " * ", 0.5^abs(i - j), ", ",
#             "20 repetitions, ", n == .(n), ", ",
#             p == .(p), ", ", q == .(q), ", ", k == .(k), ", ", r == .(r))),
#         x = "nr. of iterations",
#         y = expression(abs(beta * beta^T - hat(beta) * hat(beta)^T)),
#         color = "method"
#     ) +
#     theme(legend.position = "bottom")

# dev.print(png, file = sprintf("sim01_dist_beta_%s.png", datetime),
#           width = 768, height = 768, res = 125)


# ggplot(hist, aes(x = iter, color = method, group = interaction(method, repetition))) +
#     geom_line(aes(y = norm.alpha)) +
#     geom_point(data = with(sub <- subset(hist, !is.na(norm.alpha)),
#         aggregate(sub, list(method, repetition), tail, 1)
#     ), aes(y = norm.alpha)) +
#     labs(
#         title = expression(paste("Norm of ", hat(alpha))),
#         subtitle = bquote(paste(Delta[i][j] == 0.25, " * ", 0.5^abs(i - j), ", ",
#             "20 repetitions, ", n == .(n), ", ",
#             p == .(p), ", ", q == .(q), ", ", k == .(k), ", ", r == .(r))),
#         x = "nr. of iterations",
#         y = expression(abs(hat(alpha))[F]),
#         color = "method"
#     ) +
#     theme(legend.position = "bottom")

# dev.print(png, file = sprintf("sim01_norm_alpha_%s.png", datetime),
#           width = 768, height = 768, res = 125)

# ggplot(hist, aes(x = iter, color = method, group = interaction(method, repetition))) +
#     geom_line(aes(y = norm.beta)) +
#     geom_point(data = with(sub <- subset(hist, !is.na(norm.beta)),
#         aggregate(sub, list(method, repetition), tail, 1)
#     ), aes(y = norm.beta)) +
#     labs(
#         title = expression(paste("Norm of ", hat(beta))),
#         subtitle = bquote(paste(Delta[i][j] == 0.25, " * ", 0.5^abs(i - j), ", ",
#             "20 repetitions, ", n == .(n), ", ",
#             p == .(p), ", ", q == .(q), ", ", k == .(k), ", ", r == .(r))),
#         x = "nr. of iterations",
#         y = expression(abs(hat(beta))[F]),
#         color = "method"
#     ) +
#     theme(legend.position = "bottom")

# dev.print(png, file = sprintf("sim01_norm_beta_%s.png", datetime),
#           width = 768, height = 768, res = 125)


# local({
#     par(mfrow = c(2, 3), oma = c(2, 1, 1, 1), mar = c(3.1, 2.1, 2.1, 1.1), lwd = 2)
#     plot(c(1, max.iter), range(c(hist.base$loss, hist.new$loss, hist.momentum$loss, hist.kron$loss), na.rm = TRUE),
#         type = "n", log = "x", main = "loss")
#     lines(    hist.base$loss, col = 2)
#     lines(     hist.new$loss, col = 3)
#     lines(hist.momentum$loss, col = 4)
#     lines(    hist.kron$loss, col = 5)
#     yrange <- range(c(hist.base$step.size, hist.new$step.size, hist.momentum$step.size, hist.kron$step.size),
#                     na.rm = TRUE)
#     plot(c(1, max.iter), yrange,
#         type = "n", log = "x", main = "step.size")
#     lines(    hist.base$step.size, col = 2)
#     lines(     hist.new$step.size, col = 3)
#     lines(hist.momentum$step.size, col = 4)
#     lines(    hist.kron$step.size, col = 5)
#     # lines(    hist.base$step.size, col = 4) # there is no step.size
#     plot(0, 0, type = "l", bty = "n", xaxt = "n", yaxt = "n")
#     legend("topleft", legend = c("Base", "GD", "GD + Momentum", "Kron + GD + Momentum"), col = 2:5,
#         lwd = par("lwd"), xpd = TRUE, horiz = FALSE, cex = 1.2, bty = "n",
#         x.intersp = 1, y.intersp = 1.5)
#         # xpd = TRUE makes the legend plot to the figure
#     plot(c(1, max.iter), range(c(hist.base$dist, hist.new$dist, hist.momentum$dist, hist.kron$dist), na.rm = TRUE),
#         type = "n", log = "x", main = "dist")
#     lines(    hist.base$dist, col = 2)
#     lines(     hist.new$dist, col = 3)
#     lines(hist.momentum$dist, col = 4)
#     lines(    hist.kron$dist, col = 5)
#     plot(c(1, max.iter), range(c(hist.base$dist.alpha, hist.new$dist.alpha, hist.momentum$dist.alpha, hist.kron$dist.alpha), na.rm = TRUE),
#         type = "n", log = "x", main = "dist.alpha")
#     lines(    hist.base$dist.alpha, col = 2)
#     lines(     hist.new$dist.alpha, col = 3)
#     lines(hist.momentum$dist.alpha, col = 4)
#     lines(    hist.kron$dist.alpha, col = 5)
#     plot(c(1, max.iter), range(c(hist.base$dist.beta, hist.new$dist.beta, hist.momentum$dist.beta, hist.kron$dist.beta), na.rm = TRUE),
#         type = "n", log = "x", main = "dist.beta")
#     lines(    hist.base$dist.beta, col = 2)
#     lines(     hist.new$dist.beta, col = 3)
#     lines(hist.momentum$dist.beta, col = 4)
#     lines(    hist.kron$dist.beta, col = 5)
#     # par(fig = c(0, 1, 0, 1), oma = c(0, 0, 0, 0), mar = c(0, 0, 0, 0), new = TRUE)
#     # plot(0, 0, type = 'l', bty = 'n', xaxt = 'n', yaxt = 'n')
#     # legend('bottom', legend = c('GD', 'GD + Nesterov Momentum', 'Alternating'), col = 2:4,
#     #     lwd = 5, xpd = TRUE, horiz = TRUE, cex = 1, seg.len = 1, bty = 'n')
#     #     # xpd = TRUE makes the legend plot to the figure
# })
# dev.print(png, file = "loss.png", width = 768, height = 768, res = 125)

# with(list(a1 = alpha, a2 = fit.base$alpha, a3 = fit.new$alpha, a4 = fit.momentum$alpha,
#           b1 = beta,  b2 = fit.base$beta,  b3 = fit.new$beta,  b4 = fit.momentum$beta), {
#     par(mfrow = c(2, 4))
#     a2 <- sign(sum(sign(a1 * a2))) * a2
#     a3 <- sign(sum(sign(a1 * a3))) * a3
#     a4 <- sign(sum(sign(a1 * a4))) * a4
#     b2 <- sign(sum(sign(b1 * b2))) * b2
#     b3 <- sign(sum(sign(b1 * b3))) * b3
#     b4 <- sign(sum(sign(b1 * b4))) * b4

#     matrixImage(a1, main = expression(alpha))
#     matrixImage(a2, main = expression(paste(hat(alpha)["Base"])))
#     matrixImage(a3, main = expression(paste(hat(alpha)["GD"])))
#     matrixImage(a4, main = expression(paste(hat(alpha)["GD+Nest"])))
#     matrixImage(b1, main = expression(beta))
#     matrixImage(b2, main = expression(paste(hat(beta)["Base"])))
#     matrixImage(b3, main = expression(paste(hat(beta)["GD"])))
#     matrixImage(b4, main = expression(paste(hat(beta)["GD+Nest"])))
# })
# dev.print(png, file = "estimates.png", width = 768, height = 768, res = 125)


# with(list(d1 = Delta, d2 = fit.base$Delta, d3 = fit.new$Delta, d4 = fit.momentum$Delta), {
#     par(mfrow = c(2, 2))

#     matrixImage(d1, main = expression(Delta))
#     matrixImage(d2, main = expression(hat(Delta)["Base"]))
#     matrixImage(d3, main = expression(hat(Delta)["GD"]))
#     matrixImage(d4, main = expression(hat(Delta)["GD+Nest"]))
# })
# dev.print(png, file = "Delta.png", width = 768, height = 768, res = 125)

# with(list(a1 = alpha, a2 = fit.base$alpha, a3 = fit.new$alpha, a4 = fit.momentum$alpha,
#           b1 = beta,  b2 = fit.base$beta,  b3 = fit.new$beta,  b4 = fit.momentum$beta), {
#     par(mfrow = c(2, 2))

#     matrixImage(kronecker(a1, b1), main = expression(B))
#     matrixImage(kronecker(a2, b2), main = expression(hat(B)["Base"]))
#     matrixImage(kronecker(a3, b3), main = expression(hat(B)["GD"]))
#     matrixImage(kronecker(a4, b4), main = expression(hat(B)["GD+Nest"]))
# })
# dev.print(png, file = "B.png", width = 768, height = 768, res = 125)

# with(list(a1 = alpha, a2 = fit.base$alpha, a3 = fit.new$alpha, a4 = fit.momentum$alpha,
#           b1 = beta,  b2 = fit.base$beta,  b3 = fit.new$beta,  b4 = fit.momentum$beta), {
#     par(mfrow = c(3, 1), lwd = 1)

#     d2 <- kronecker(a1, b1) - kronecker(a2, b2)
#     d3 <- kronecker(a1, b1) - kronecker(a3, b3)
#     d4 <- kronecker(a1, b1) - kronecker(a4, b4)
#     xlim <- c(-1, 1) * max(abs(c(d2, d3, d4)))
#     breaks <- seq(xlim[1], xlim[2], len = 41)

#     hist(d2, main = expression(paste(base,      (B - hat(B))[i])),
#          breaks = breaks, xlim = xlim, freq = FALSE)
#     lines(density(d2), col = 2)
#     abline(v = range(d2), lty = 2)
#     hist(d3, main = expression(paste(GD,        (B - hat(B))[i])),
#          breaks = breaks, xlim = xlim, freq = FALSE)
#     lines(density(d3), col = 3)
#     abline(v = range(d3), lty = 2)
#     hist(d4, main = expression(paste(GD + Nest, (B - hat(B))[i])),
#          breaks = breaks, xlim = xlim, freq = FALSE)
#     lines(density(d4), col = 4)
#     abline(v = range(d4), lty = 2)
# })
# dev.print(png, file = "hist.png", width = 768, height = 768, res = 125)


# options(width = 300)
# print(pr <- prof.tree::prof.tree("./Rprof.out"), limit = NULL
#     , pruneFun = function(x) x$percent > 0.01)

# par(mfrow = c(2, 2))
# matrixImage(alpha, main = "alpha")
# matrixImage(fit$alpha, main = "fit$alpha")
# matrixImage(beta, main = "beta")
# matrixImage(fit$beta, main = "fit$beta")

# if (diff(dim(alpha) * dim(beta)) > 0) {
#     par(mfrow = c(2, 1))
# } else {
#     par(mfrow = c(1, 2))
# }
# matrixImage(kronecker(alpha, beta), main = "kronecker(alpha, beta)")
# matrixImage(kronecker(fit$alpha, fit$beta), main = "kronecker(fit$alpha, fit$beta)")

# matrixImage(Delta, main = "Delta")
# matrixImage(fit$Delta, main = "fit$Delta")


# local({
#     a <- (-1 * (sum(sign(fit$alpha) * sign(alpha)) < 0)) * fit$alpha / mean(fit$alpha^2)
#     b <- alpha / mean(alpha^2)

#     norm(a - b, "F")
# })
# local({
#     a <- (-1 * (sum(sign(fit$beta) * sign(beta)) < 0)) * fit$beta / mean(fit$beta^2)
#     b <- beta / mean(beta^2)

#     norm(a - b, "F")
# })


# # Which Sequence?
# x <- y <- 1
# replicate(40, x <<- (y <<- x + y) - x)


# # Face-Splitting Product
# n <- 100
# p <- 3
# q <- 500
# A <- matrix(rnorm(n * p), n)
# B <- matrix(rnorm(n * q), n)

# faceSplit <- function(A, B) {
#     C <- vapply(seq_len(ncol(A)), function(i) A[, i] * B, B)
#     dim(C) <- c(nrow(A), ncol(A) * ncol(B))
#     C
# }

# all.equal(
#     tkhatriRao(A, B),
#     faceSplit(A, B)
# )
# microbenchmark::microbenchmark(
#     tkhatriRao(A, B),
#     faceSplit(A, B)
# )


# dist.kron <- function(a0, b0, a1, b1) {
#     sqrt(sum(a0^2) * sum(b0^2) -
#          2 * sum(diag(crossprod(a0, a1))) * sum(diag(crossprod(b0, b1))) +
#          sum(a1^2) * sum(b1^2))
# }


# alpha <- matrix(rnorm(q * r), q)
# beta  <- matrix(rnorm(p * k), p)
# alpha.true <- matrix(rnorm(q * r), q)
# beta.true  <- matrix(rnorm(p * k), p)
# all.equal(
#     dist.kron(alpha, beta, alpha.true, beta.true),
#     norm(kronecker(alpha, beta) - kronecker(alpha.true, beta.true), "F")
# )

# A <- matrix(rnorm(p^2), p)
# B <- matrix(rnorm(p^2), p)

# tr <- function(A) sum(diag(A))
# tr(crossprod(A, B))
# tr(tcrossprod(B, A))

# tr(crossprod(A, A))
# tr(tcrossprod(A, A))
# sum(A^2)

# alpha <- matrix(rnorm(q * r), q)
# beta  <- matrix(rnorm(p * k), p)

# norm(kronecker(alpha, beta), "F")^2
# norm(alpha, "F")^2 * norm(beta, "F")^2
# tr(crossprod(kronecker(alpha, beta)))
# tr(tcrossprod(kronecker(alpha, beta)))
# tr(crossprod(kronecker(t(alpha), t(beta))))
# tr(crossprod(alpha)) * tr(crossprod(beta))
# tr(tcrossprod(alpha)) * tr(tcrossprod(beta))
# tr(crossprod(alpha)) * tr(tcrossprod(beta))
# sum(alpha^2) * sum(beta^2)

# alpha <- matrix(rnorm(q * r), q)
# beta  <- matrix(rnorm(p * k), p)
# alpha.true <- matrix(rnorm(q * r), q)
# beta.true  <- matrix(rnorm(p * k), p)
# microbenchmark::microbenchmark(
#     norm(kronecker(alpha, beta), "F")^2,
#     norm(alpha, "F")^2 * norm(beta, "F")^2,
#     tr(crossprod(kronecker(alpha, beta))),
#     tr(tcrossprod(kronecker(alpha, beta))),
#     tr(crossprod(kronecker(t(alpha), t(beta)))),
#     tr(crossprod(alpha)) * tr(crossprod(beta)),
#     tr(tcrossprod(alpha)) * tr(tcrossprod(beta)),
#     tr(crossprod(alpha)) * tr(tcrossprod(beta)),
#     sum(alpha^2) * sum(beta^2),

#     setup = {
#         p <- sample(1:10, 1)
#         q <- sample(1:10, 1)
#         k <- sample(1:10, 1)
#         r <- sample(1:10, 1)
#         assign("alpha", matrix(rnorm(q * r), q), .GlobalEnv)
#         assign("beta", matrix(rnorm(p * k), p), .GlobalEnv)
#         assign("alpha.true", matrix(rnorm(q * r), q), .GlobalEnv)
#         assign("beta.true", matrix(rnorm(p * k), p), .GlobalEnv)
#     }
# )


# p <- sample(1:15, 1) # 11
# q <- sample(1:15, 1) # 3
# k <- sample(1:15, 1) # 7
# r <- sample(1:15, 1) # 5
# A <- matrix(rnorm(q * r), q)
# B <- matrix(rnorm(p * k), p)
# a <- matrix(rnorm(q * r), q)
# b <- matrix(rnorm(p * k), p)

# all.equal(
#     kronecker(A + a, B + b),
#     kronecker(A, B) + kronecker(A, b) + kronecker(a, B) + kronecker(a, b)
# )


# p <- 200L
# n <- 100L
# R <- matrix(rnorm(n * p), n)
# A <- matrix(rnorm(p^2), p)              # Base Matrix
# B <- A + 0.01 * matrix(rnorm(p^2), p)   # Distortion / Update of A
# A.inv <- solve(A)

# microbenchmark::microbenchmark(
#     solve = R %*% solve(B),
#     neumann.raw = R %*% (A.inv + A.inv %*% (A - B) %*% A.inv + A.inv %*% (A - B) %*% A.inv %*% (A - B) %*% A.inv),
#     neumann.fun = {
#         AD <- A.inv %*% (A - B)
#         res <- A.inv + AD %*% A.inv
#         res <- A.inv + AD %*% res
#         R %*% res
#     }
# )

# all.equal(
#     A.inv + A.inv %*% (A - B) %*% A.inv + A.inv %*% (A - B) %*% A.inv %*% (A - B) %*% A.inv,
#     {
#         DA <- (A - B) %*% A.inv
#         res <- A.inv + A.inv %*% DA
#         res <- A.inv + res %*% DA
#         res
#     }
# )

# all.equal(
#     A.inv + A.inv %*% (A - B) %*% A.inv + A.inv %*% (A - B) %*% A.inv %*% (A - B) %*% A.inv,
#     {
#         AD <- A.inv %*% (A - B)
#         res <- A.inv + AD %*% A.inv
#         res <- A.inv + AD %*% res
#         res
#     }
# )

# #####
# sym <- function(A) A + t(A)
# n <- 101
# p <- 7
# q <- 11
# r <- 3
# k <- 5
# R <- array(rnorm(n * p * q), dim = c(n = n, p = p, q = q))
# F <- array(rnorm(n * k * r), dim = c(n = n, k = k, r = r))
# alpha <- array(rnorm(q * r), dim = c(q = q, r = r))
# beta  <- array(rnorm(p * k), dim = c(p = p, k = k))
# Delta.1 <- sym(matrix(rnorm(q * q), q, q))
# dim(Delta.1) <- c(q = q, q = q)
# Delta.2 <- sym(matrix(rnorm(p * p), p, p))
# dim(Delta.2) <- c(p = p, p = p)

# Delta <- kronecker(Delta.1, Delta.2)

# grad.alpha.1 <- local({
#     Di.1 <- solve(Delta.1)
#     Di.2 <- solve(Delta.2)
#     .R <- sapply(seq_len(n), function(i) tcrossprod(Di.1, R[i, , ]) %*% Di.2)
#     dim(.R) <- c(q, p, n)
#     .F <- sapply(seq_len(n), function(i) beta %*% F[i, , ])
#     dim(.F) <- c(p, r, n)

#     .C <- sapply(seq_len(n), function(i) .R[i, , ] %*% .F[i, , ])
#     dim(.C) <- c(n, q, r)
#     colSums(.C)
# })
# grad.alpha.2 <- local({
#     # Delta.1^-1 R' Delta.2^-1
#     .R <- aperm(R, c(2, 1, 3))
#     dim(.R) <- c(q, n * p)
#     .R <- solve(Delta.1) %*% .R
#     dim(.R) <- c(q, n, p)
#     .R <- aperm(.R, c(3, 2, 1))
#     dim(.R) <- c(p, n * q)
#     .R <- solve(Delta.2) %*% .R
#     dim(.R) <- c(p, n, q)
#     .R <- aperm(.R, c(2, 3, 1)) # n x q x p

#     # beta F
#     .F <- aperm(F, c(2, 1, 3))
#     dim(.F) <- c(k, n * r)
#     .F <- beta %*% .F
#     dim(.F) <- c(p, n, r)
#     .F <- aperm(.F, c(2, 1, 3)) # n x p x r

#     # (Delta.1^-1 R' Delta.2^-1) (beta F)
#     .R <- aperm(.R, c(1, 3, 2))
#     dim(.R) <- c(n * p, q)
#     dim(.F) <- c(n * p, r)
#     crossprod(.R, .F)
# })

# all.equal(
#     grad.alpha.1,
#     grad.alpha.2
# )

# all.equal({
#     .R <- matrix(0, q, p)
#     Di.1 <- solve(Delta.1)
#     Di.2 <- solve(Delta.2)
#     for (i in 1:n) {
#         .R <- .R + Di.1 %*% t(R[i, , ]) %*% Di.2
#     }
#     .R
# }, {
#     .R <- R
#     Di.1 <- solve(Delta.1)
#     Di.2 <- solve(Delta.2)
#     .R <- sapply(seq_len(n), function(i) tcrossprod(Di.1, R[i, , ]) %*% Di.2)
#     dim(.R) <- c(q, p, n)
#     .R <- aperm(.R, c(3, 1, 2))
#     colSums(.R)
# })
# all.equal({
#     Di.1 <- solve(Delta.1)
#     Di.2 <- solve(Delta.2)
#     .R <- sapply(seq_len(n), function(i) tcrossprod(Di.1, R[i, , ]) %*% Di.2)
#     dim(.R) <- c(q, p, n)
#     .R <- aperm(.R, c(3, 1, 2))
#     .R
# }, {
#     .R <- R
#     dim(.R) <- c(n * p, q)
#     .R <- .R %*% solve(Delta.1)
#     dim(.R) <- c(n, p, q)
#     .R <- aperm(.R, c(1, 3, 2))
#     dim(.R) <- c(n * q, p)
#     .R <- .R %*% solve(Delta.2)
#     dim(.R) <- c(n, q, p)
#     .R
# })
# all.equal({
#     .F <- matrix(0, p, r)
#     for (i in 1:n) {
#         .F <- .F + beta %*% F[i, , ]
#     }
#     .F
# }, {
#     .F <- apply(F, 1, function(Fi) beta %*% Fi)
#     dim(.F) <- c(p, r, n)
#     .F <- aperm(.F, c(3, 1, 2))
#     colSums(.F)
# })
# all.equal({
#     .F <- apply(F, 1, function(Fi) beta %*% Fi)
#     dim(.F) <- c(p, r, n)
#     .F <- aperm(.F, c(3, 1, 2))
#     colSums(.F)
# }, {
#     .F <- aperm(F, c(1, 3, 2))
#     dim(.F) <- c(n * r, k)
#     .F <- tcrossprod(.F, beta)
#     dim(.F) <- c(n, r, p)
#     t(colSums(.F))
# })

# all.equal({
#     Di.1 <- solve(Delta.1)
#     Di.2 <- solve(Delta.2)
#     grad.alpha <- 0
#     grad.beta  <- 0
#     dim(R) <- c(n, p, q)
#     dim(F) <- c(n, k, r)
#     for (i in 1:n) {
#         grad.alpha <- grad.alpha + (
#             Di.1 %*% t(R[i, , ]) %*% Di.2 %*% beta %*% F[i, , ]
#         )
#         grad.beta <- grad.beta + (
#             Di.2 %*% R[i, , ] %*% Di.1 %*% alpha %*% t(F[i, , ])
#         )
#     }

#     g1 <- c(dim(grad.alpha), dim(grad.beta), grad.alpha, grad.beta)
# }, {
#     # Note that the order is important since for grad.beta the residuals do NOT
#     # need to be transposes.

#     # left/right standardized residuals Delta_1^-1 R_i' Delta_2^-1 for i in 1:n
#     dim(R) <- c(n * p, q)
#     .R <- R %*% solve(Delta.1)
#     dim(.R) <- c(n, p, q)
#     .R <- aperm(.R, c(1, 3, 2))
#     dim(.R) <- c(n * q, p)
#     .R <- .R %*% solve(Delta.2)
#     dim(.R) <- c(n, q, p)

#     # gradient with respect to beta
#     # Responces times beta (alpha f_i')
#     dim(F) <- c(n * k, r)
#     .F <- tcrossprod(F, alpha)
#     dim(.F) <- c(n, k, q)
#     .F <- aperm(.F, c(1, 3, 2))
#     # Matricize
#     dim(.R) <- c(n * q, p)
#     dim(.F) <- c(n * q, k)
#     grad.beta <- crossprod(.R, .F)

#     # gradient with respect to beta
#     # Responces times alpha
#     dim(F) <- c(n, k, r)
#     .F <- aperm(F, c(1, 3, 2))
#     dim(.F) <- c(n * r, k)
#     .F <- tcrossprod(.F, beta)
#     dim(.F) <- c(n, r, p)
#     .F <- aperm(.F, c(1, 3, 2))
#     # Transpose stand. residuals
#     dim(.R) <- c(n, q, p)
#     .R <- aperm(.R, c(1, 3, 2))
#     # Matricize
#     dim(.R) <- c(n * p, q)
#     dim(.F) <- c(n * p, r)
#     grad.alpha <- crossprod(.R, .F)

#     g2 <- c(dim(grad.alpha), dim(grad.beta), grad.alpha, grad.beta)
# })


# microbenchmark::microbenchmark(R1 = {
#     Di.1 <- solve(Delta.1)
#     Di.2 <- solve(Delta.2)
#     grad.alpha <- 0 # matrix(0, q, r)
#     grad.beta  <- 0 # matrix(0, p, k)
#     dim(R) <- c(n, p, q)
#     dim(F) <- c(n, k, r)
#     for (i in 1:n) {
#         grad.alpha <- grad.alpha + (
#             Di.1 %*% t(R[i, , ]) %*% Di.2 %*% beta %*% F[i, , ]
#         )
#         grad.beta <- grad.beta + (
#             Di.2 %*% R[i, , ] %*% Di.1 %*% alpha %*% t(F[i, , ])
#         )
#     }

#     g1 <- c(dim(grad.alpha), dim(grad.beta), grad.alpha, grad.beta)
# }, R3 = {
#     # Note that the order is important since for grad.beta the residuals do NOT
#     # need to be transposes.

#     # left/right standardized residuals Delta_1^-1 R_i' Delta_2^-1 for i in 1:n
#     dim(R) <- c(n * p, q)
#     .R <- R %*% solve(Delta.1)
#     dim(.R) <- c(n, p, q)
#     .R <- aperm(.R, c(1, 3, 2))
#     dim(.R) <- c(n * q, p)
#     .R <- .R %*% solve(Delta.2)
#     dim(.R) <- c(n, q, p)

#     # gradient with respect to beta
#     # Responces times beta (alpha f_i')
#     dim(F) <- c(n * k, r)
#     .F <- tcrossprod(F, alpha)
#     dim(.F) <- c(n, k, q)
#     .F <- aperm(.F, c(1, 3, 2))
#     # Matricize
#     dim(.R) <- c(n * q, p)
#     dim(.F) <- c(n * q, k)
#     grad.beta <- crossprod(.R, .F)

#     # gradient with respect to beta
#     # Responces times alpha
#     dim(F) <- c(n, k, r)
#     .F <- aperm(F, c(1, 3, 2))
#     dim(.F) <- c(n * r, k)
#     .F <- tcrossprod(.F, beta)
#     dim(.F) <- c(n, r, p)
#     .F <- aperm(.F, c(1, 3, 2))
#     # Transpose stand. residuals
#     dim(.R) <- c(n, q, p)
#     .R <- aperm(.R, c(1, 3, 2))
#     # Matricize
#     dim(.R) <- c(n * p, q)
#     dim(.F) <- c(n * p, r)
#     grad.alpha <- crossprod(.R, .F)

#     g2 <- c(dim(grad.alpha), dim(grad.beta), grad.alpha, grad.beta)
# })


# n <- 100
# p <- 7
# q <- 11
# k <- 3
# r <- 5

# X <- array(rnorm(n * p * q), dim = c(n = n, p = p, q = q))
# F <- array(rnorm(n * k * r), dim = c(n = n, k = k, r = r))
# alpha <- array(rnorm(q * r), dim = c(q = q, r = r))
# beta  <- array(rnorm(p * k), dim = c(p = p, k = k))

# all.equal({
#     R <- array(NA, dim = c(n, p, q))
#     for (i in 1:n) {
#         R[i, , ] <- X[i, , ] - beta %*% F[i, , ] %*% t(alpha)
#     }
#     R
# }, {
#     X - (F %x_3% alpha %x_2% beta)
# }, check.attributes = FALSE)

# microbenchmark::microbenchmark(base = {
#     R <- array(NA, dim = c(n, p, q))
#     for (i in 1:n) {
#         R[i, , ] <- X[i, , ] - beta %*% F[i, , ] %*% t(alpha)
#     }
#     R
# }, ttm = {
#     X - (F %x_3% alpha %x_2% beta)
# })


# n <- 100; p <- 7; q <- 11; k <- 3; r <- 5
# sym <- function(x) t(x) + x
# Di.1 <- sym(matrix(rnorm(q^2), q, q))
# Di.2 <- sym(matrix(rnorm(p^2), p, p))
# R <- array(rnorm(n, p, q), dim = c(n, p, q))
# F <- array(rnorm(n, k, r), dim = c(n, k, r))
# alpha <- matrix(rnorm(q * r), q, r)
# beta <- matrix(rnorm(p * k), p, k)

# all.equal({
#     .R <- array(NA, dim = c(n, p, q))
#     for (i in 1:n) {
#         .R[i, , ] <- Di.2 %*% R[i, , ] %*% Di.1
#     }
#     .R
# }, {
#     R %x_3% Di.1 %x_2% Di.2
# })
# all.equal({
#     .Rt <- array(NA, dim = c(n, q, p))
#     for (i in 1:n) {
#         .Rt[i, , ] <- Di.1 %*% t(R[i, , ]) %*% Di.2
#     }
#     .Rt
# }, {
#     .Rt <- R %x_3% Di.1 %x_2% Di.2
#     aperm(.Rt, c(1, 3, 2))
# })
# all.equal({
#     .Fa <- array(NA, dim = c(n, q, k))
#     for (i in 1:n) {
#         .Fa[i, , ] <- alpha %*% t(F[i, , ])
#     }
#     .Fa
# }, {
#     aperm(F %x_3% alpha, c(1, 3, 2))
# })
# all.equal({
#     .Fb <- array(NA, dim = c(n, p, r))
#     for (i in 1:n) {
#         .Fb[i, , ] <- beta %*% F[i, , ]
#     }
#     .Fb
# }, {
#     F %x_2% beta
# })
# all.equal({
#     .F <- array(NA, dim = c(n, p, q))
#     for (i in 1:n) {
#         .F[i, , ] <- beta %*% F[i, , ] %*% t(alpha)
#     }
#     .F
# }, {
#     F %x_3% alpha %x_2% beta
# })
# all.equal({
#     .Ga <- 0
#     for (i in 1:n) {
#         .Ga <- .Ga + Di.1 %*% t(R[i, , ]) %*% Di.2 %*% beta %*% F[i, , ]
#     }
#     .Ga
# }, {
#     .R <- R %x_3% Di.1 %x_2% Di.2
#     dim(.R) <- c(n * p, q)
#     .Fb <- F %x_2% beta
#     dim(.Fb) <- c(n * p, r)
#     crossprod(.R, .Fb)
# })
# all.equal({
#     .Gb <- 0
#     for (i in 1:n) {
#         .Gb <- .Gb + Di.2 %*% R[i, , ] %*% Di.1 %*% alpha %*% t(F[i, , ])
#     }
#     .Gb
# }, {
#     .Rt <- aperm(R %x_3% Di.1 %x_2% Di.2, c(1, 3, 2))
#     dim(.Rt) <- c(n * q, p)
#     .Fa <- aperm(F %x_3% alpha, c(1, 3, 2))
#     dim(.Fa) <- c(n * q, k)
#     crossprod(.Rt, .Fa)
# })
# all.equal({
#     .Ga <- 0
#     for (i in 1:n) {
#         .Ga <- .Ga + Di.1 %*% t(R[i, , ]) %*% Di.2 %*% beta %*% F[i, , ]
#     }
#     .Gb <- 0
#     for (i in 1:n) {
#         .Gb <- .Gb + Di.2 %*% R[i, , ] %*% Di.1 %*% alpha %*% t(F[i, , ])
#     }
#     c(.Ga, .Gb)
# }, {
#     .R <- R %x_3% Di.1 %x_2% Di.2
#     .Fb <- F %x_2% beta
#     .Fa <- aperm(F %x_3% alpha, c(1, 3, 2))

#     dim(.R)  <- c(n * p, q)
#     dim(.Fb) <- c(n * p, r)
#     .Ga <- crossprod(.R, .Fb)

#     dim(.R) <- c(n, p, q)
#     .R <- aperm(.R, c(1, 3, 2))
#     dim(.R)  <- c(n * q, p)
#     dim(.Fa) <- c(n * q, k)
#     .Gb <- crossprod(.R, .Fa)

#     c(.Ga, .Gb)
# })
# all.equal({
#     .Ga <- 0
#     for (i in 1:n) {
#         .Ga <- .Ga + Di.1 %*% t(R[i, , ]) %*% Di.2 %*% beta %*% F[i, , ]
#     }
#     .Gb <- 0
#     for (i in 1:n) {
#         .Gb <- .Gb + Di.2 %*% R[i, , ] %*% Di.1 %*% alpha %*% t(F[i, , ])
#     }
#     c(.Ga, .Gb)
# }, {
#     .R <- R %x_3% Di.1 %x_2% Di.2

#     .Ga <- tcrossprod(mat(.R, 3), mat(F %x_2% beta, 3))
#     .Gb <- tcrossprod(mat(.R, 2), mat(F %x_3% alpha, 2))

#     c(.Ga, .Gb)
# })


# n <- 101; p <- 5; q <- 7

# sym <- function(x) crossprod(x)
# D1 <- sym(matrix(rnorm(q^2), q, q))
# D2 <- sym(matrix(rnorm(p^2), p, p))

# X <- tensorPredictors:::rmvnorm(n, sigma = kronecker(D1, D2))
# dim(X) <- c(n, p, q)

# D1.hat <- tcrossprod(mat(X, 3)) / n
# D2.hat <- tcrossprod(mat(X, 2)) / n

# local({
#     par(mfrow = c(2, 2))
#     matrixImage(D1,     main = "D1")
#     matrixImage(D1.hat, main = "D1.hat")
#     matrixImage(D2,     main = "D2")
#     matrixImage(D2.hat, main = "D2.hat")
# })

# sum(X^2) / n
# sum(diag(D1.hat))
# sum(diag(D2.hat))
# sum(diag(kronecker(D1, D2)))
# sum(diag(kronecker(D1.hat / sqrt(sum(diag(D1.hat))),
#                    D2.hat / sqrt(sum(diag(D1.hat))))))

# all.equal({
#     mat(X, 1) %*% kronecker(D1.hat, D2.hat)
# }, {
#     mat(X %x_3% D1.hat %x_2% D2.hat, 1)
# })
# all.equal({
#     C <- mat(X, 1) %*% kronecker(D1.hat, D2.hat) * (n / sum(X^2))
#     dim(C) <- c(n, p, q)
#     C
# }, {
#     (X %x_3% D1.hat %x_2% D2.hat) / sum(diag(D1.hat))
# })


# D.1 <- tcrossprod(mat(X, 3))
# D.2 <- tcrossprod(mat(X, 2))
# tr <- sum(diag(D.1))
# D.1 <- D.1 / sqrt(n * tr)
# D.2 <- D.2 / sqrt(n * tr)

# sum(diag(kronecker(D1, D2)))
# sum(diag(kronecker(D.1, D.2)))
# det(kronecker(D1, D2))
# det(kronecker(D.1, D.2))
# det(D.1)^p * det(D.2)^q

# log(det(kronecker(D.1, D.2)))
# p * log(det(D.1)) + q * log(det(D.2))


     d <- ggplot(mtcars, aes(cyl, mpg)) + geom_point()
     d + stat_summary(fun.data = "mean_cl_boot", colour = "red", size = 2)

     # Orientation follows the discrete axis
     ggplot(mtcars, aes(mpg, factor(cyl))) +
       geom_point() +
       stat_summary(fun.data = "mean_cl_boot", colour = "red", size = 2)

     # You can supply individual functions to summarise the value at
     # each x:
     d + stat_summary(fun = "median", colour = "red", size = 2, geom = "point")
     d + stat_summary(fun = "mean", colour = "red", size = 2, geom = "point")
     d + aes(colour = factor(vs)) + stat_summary(fun = mean, geom="line")

     d + stat_summary(fun = mean, fun.min = min, fun.max = max, colour = "red")

     d <- ggplot(diamonds, aes(cut))
     d + geom_bar()
     d + stat_summary(aes(y = price), fun = "mean", geom = "bar")

     # Orientation of stat_summary_bin is ambiguous and must be specified directly
     ggplot(diamonds, aes(carat, price)) +
       stat_summary_bin(fun = "mean", geom = "bar", orientation = 'y')


     # Don't use ylim to zoom into a summary plot - this throws the
     # data away
     p <- ggplot(mtcars, aes(cyl, mpg)) +
       stat_summary(fun = "mean", geom = "point")
     p
     p + ylim(15, 30)
     # Instead use coord_cartesian
     p + coord_cartesian(ylim = c(15, 30))

     # A set of useful summary functions is provided from the Hmisc package:
     stat_sum_df <- function(fun, geom="crossbar", ...) {
       stat_summary(fun.data = fun, colour = "red", geom = geom, width = 0.2, ...)
     }
     d <- ggplot(mtcars, aes(cyl, mpg)) + geom_point()
     # The crossbar geom needs grouping to be specified when used with
     # a continuous x axis.
     d + stat_sum_df("mean_cl_boot", mapping = aes(group = cyl))
     d + stat_sum_df("mean_sdl", mapping = aes(group = cyl))
     d + stat_sum_df("mean_sdl", fun.args = list(mult = 1), mapping = aes(group = cyl))
     d + stat_sum_df("median_hilow", mapping = aes(group = cyl))

     # An example with highly skewed distributions:
     if (require("ggplot2movies")) {
     set.seed(596)
     mov <- movies[sample(nrow(movies), 1000), ]
      m2 <-
        ggplot(mov, aes(x = factor(round(rating)), y = votes)) +
        geom_point()
      m2 <-
        m2 +
        stat_summary(
          fun.data = "mean_cl_boot",
          geom = "crossbar",
          colour = "red", width = 0.3
        ) +
        xlab("rating")
     m2
     # Notice how the overplotting skews off visual perception of the mean
     # supplementing the raw data with summary statistics is _very_ important

     # Next, we'll look at votes on a log scale.

     # Transforming the scale means the data are transformed
     # first, after which statistics are computed:
     m2 + scale_y_log10()
     # Transforming the coordinate system occurs after the
     # statistic has been computed. This means we're calculating the summary on the raw data
     # and stretching the geoms onto the log scale.  Compare the widths of the
     # standard errors.
     m2 + coord_trans(y="log10")
     }