library(tensorPredictors) library(dplyr) library(ggplot2) ## Logger callbacks log.prog <- function(max.iter) { function(iter, loss, alpha, beta, ...) { select <- as.integer(seq(1, max.iter, len = 30) <= iter) cat("\r[", paste(c(" ", "=")[1 + select], collapse = ""), "] ", iter, "/", max.iter, sep = "") } } ### Exec all methods for a given data set and collect logs ### sim <- function(X, Fy, alpha.true, beta.true, max.iter = 500L) { # Logger creator logger <- function(name) { eval(substitute(function(iter, loss, alpha, beta, ...) { tryCatch({ hist[iter + 1L, ] <<- c( iter = iter, loss = loss, dist = (dist <- dist.subspace(c(kronecker(alpha.true, beta.true)), c(kronecker(alpha, beta)))), dist.alpha = (dist.alpha <- dist.subspace(c(alpha.true), c(alpha))), dist.beta = (dist.beta <- dist.subspace(c( beta.true), c(beta ))), norm.alpha = norm(alpha, "F"), norm.beta = norm(beta, "F"), mse = mean((X - mlm(Fy, alpha, beta, modes = 3:2))^2) )}, error = function(e) { cat("Error in ", name, ", dim(alpha): ", dim(alpha), ", dim(alpha.true): ", dim(alpha.true), ", dim(beta)", dim(beta), ", dim(beta.true)", dim(beta.true), "\n") stop(e) }) cat(sprintf( "%s(%3d) | l = %-12.4f - dist = %-.4e - alpha(%d, %d) = %-.4e - beta(%d, %d) = %-.4e\n", name, iter, loss, dist, nrow(alpha), ncol(alpha), dist.alpha, nrow(beta), ncol(beta), dist.beta )) }, list(hist = as.symbol(paste0("hist.", name))))) } # Initialize logger history targets hist.base <- hist.new.vlp <- hist.new.ls <- hist.ls <- hist.momentum.vlp <- hist.momentum.ls <- hist.approx.vlp <- hist.approx.ls <- data.frame(iter = seq(0L, max.iter), loss = NA, dist = NA, dist.alpha = NA, dist.beta = NA, norm.alpha = NA, norm.beta = NA, mse = NA ) # Base (old) kpir.base(X, Fy, max.iter = max.iter, logger = logger("base")) # New (simple Gradient Descent, using VLP initialization) kpir.new(X, Fy, max.iter = max.iter, init.method = "vlp", logger = logger("new.vlp")) kpir.new(X, Fy, max.iter = max.iter, init.method = "ls", logger = logger("new.ls")) # Least Squares estimate (alternating estimation) kpir.ls(X, Fy, sample.mode = 1L, max.iter = max.iter, logger = logger("ls")) # Gradient Descent with Nesterov Momentum kpir.momentum(X, Fy, max.iter = max.iter, init.method = "vlp", logger = logger("momentum.vlp")) kpir.momentum(X, Fy, max.iter = max.iter, init.method = "ls", logger = logger("momentum.ls")) # Approximated MLE with Nesterov Momentum kpir.approx(X, Fy, max.iter = max.iter, init.method = "vlp", logger = logger("approx.vlp")) kpir.approx(X, Fy, max.iter = max.iter, init.method = "ls", logger = logger("approx.ls")) # Add method tags hist.base$method <- factor("base") hist.new.vlp$method <- factor("new") hist.new.ls$method <- factor("new") hist.ls$method <- factor("ls") hist.momentum.vlp$method <- factor("momentum") hist.momentum.ls$method <- factor("momentum") hist.approx.vlp$method <- factor("approx") hist.approx.ls$method <- factor("approx") # Add init. method tag hist.base$init <- factor("vlp") hist.new.vlp$init <- factor("vlp") hist.new.ls$init <- factor("ls") hist.ls$init <- factor("ls") hist.momentum.vlp$init <- factor("vlp") hist.momentum.ls$init <- factor("ls") hist.approx.vlp$init <- factor("vlp") hist.approx.ls$init <- factor("ls") # Combine results and return rbind( hist.base, hist.new.vlp, hist.new.ls, hist.ls, hist.momentum.vlp, hist.momentum.ls, hist.approx.vlp, hist.approx.ls ) } ## Plot helper function plot.hist2 <- function(hist, response, type = "all", ...) { # Extract final results from history sub <- na.omit(hist[c("iter", response, "method", "init", "repetition")]) sub <- aggregate(sub, list(sub$method, sub$init, sub$repetition), tail, 1) # Setup ggplot p <- ggplot(hist, aes_(x = quote(iter), y = as.name(response), color = quote(method), linetype = quote(init), group = quote(interaction(method, repetition, init)))) # Add requested layers if (type == "all") { p <- p + geom_line(na.rm = TRUE) p <- p + geom_point(data = sub) } else if (type == "mean") { p <- p + geom_line(alpha = 0.4, na.rm = TRUE, linetype = "dotted") p <- p + geom_point(data = sub, alpha = 0.4) p <- p + geom_line(aes(group = interaction(method, init)), stat = "summary", fun = "mean", na.rm = TRUE) } else if (type == "median") { p <- p + geom_line(alpha = 0.4, na.rm = TRUE, linetype = "dotted") p <- p + geom_point(data = sub, alpha = 0.4) p <- p + geom_line(aes(group = interaction(method, init)), stat = "summary", fun = "median", na.rm = TRUE) } # return with theme and annotations p + labs(...) + theme(legend.position = "bottom") } ################################################################################ ### Sim 1 / vec(X) has AR(0.5) Covariance ### ################################################################################ ## Generate some test data / DEBUG n <- 200 # Sample Size p <- sample(2:15, 1) # 11 q <- sample(2:15, 1) # 7 k <- min(sample(1:15, 1), p - 1) # 3 r <- min(sample(1:15, 1), q - 1) # 5 print(c(n, p, q, k, r)) hist <- NULL reps <- 20 for (rep in 1:reps) { cat(sprintf("%4d / %d simulation rep. started\n", rep, reps)) alpha.true <- alpha <- matrix(rnorm(q * r), q, r) beta.true <- beta <- matrix(rnorm(p * k), p, k) y <- rnorm(n) Fy <- do.call(cbind, Map(function(slope, offset) { sin(slope * y + offset) }, head(rep(seq(1, ceiling(0.5 * k * r)), each = 2), k * r), head(rep(c(0, pi / 2), ceiling(0.5 * k * r)), k * r) )) Delta <- 0.5^abs(outer(seq_len(p * q), seq_len(p * q), `-`)) X <- tcrossprod(Fy, kronecker(alpha, beta)) + CVarE:::rmvnorm(n, sigma = Delta) dim(X) <- c(n, p, q) dim(Fy) <- c(n, k, r) hist.sim <- sim(X, Fy, alpha.true, beta.true) hist.sim$repetition <- rep hist <- rbind(hist, hist.sim) } # Save simulation results datetime <- format(Sys.time(), "%Y%m%dT%H%M") saveRDS(hist, file = sprintf("AR_%s.rds", datetime)) # for GGPlot2, as factors for grouping hist$repetition <- factor(hist$repetition) # Save simulation results sim.name <- "sim01" datetime <- format(Sys.time(), "%Y%m%dT%H%M") saveRDS(hist, file = sprintf("%s_%s.rds", sim.name, datetime)) # for GGPlot2, as factors for grouping hist$repetition <- factor(hist$repetition) for (response in c("loss", "dist", "dist.alpha", "dist.beta")) { for (fun in c("all", "mean", "median")) { print(plot.hist2(hist, response, fun, title = fun) + coord_trans(x = "log1p")) dev.print(png, file = sprintf("%s_%s_%s_%s.png", sim.name, datetime, response, fun), width = 768, height = 768, res = 125) } } ################################################################################ ### Sim 2 / X has AR(0.707) %x% AR(0.707) Covariance ### ################################################################################ n <- 200 # Sample Size p <- 11 # sample(1:15, 1) q <- 7 # sample(1:15, 1) k <- 3 # sample(1:15, 1) r <- 5 # sample(1:15, 1) print(c(n, p, q, k, r)) hist <- NULL reps <- 20 max.iter <- 2 Delta.1 <- sqrt(0.5)^abs(outer(seq_len(p), seq_len(p), `-`)) Delta.2 <- sqrt(0.5)^abs(outer(seq_len(q), seq_len(q), `-`)) for (rep in 1:reps) { cat(sprintf("\n\033[1m%4d / %d simulation rep. started\033[0m\n", rep, reps)) alpha.1.true <- alpha.1 <- matrix(rnorm(q * r), q, r) alpha.2.true <- alpha.2 <- matrix(rnorm(p * k), p, k) y <- rnorm(n) Fy <- do.call(cbind, Map(function(slope, offset) { sin(slope * y + offset) }, head(rep(seq(1, ceiling(0.5 * k * r)), each = 2), k * r), head(rep(c(0, pi / 2), ceiling(0.5 * k * r)), k * r) )) dim(Fy) <- c(n, k, r) X <- mlm(Fy, alpha.1, alpha.2, modes = 3:2) X <- X + rtensornorm(n, 0, Delta.1, Delta.2, sample.mode = 1L) hist.sim <- sim(X, Fy, alpha.1.true, alpha.2.true, max.iter = max.iter) hist.sim$repetition <- rep hist <- rbind(hist, hist.sim) } # Save simulation results sim.name <- "sim02" datetime <- format(Sys.time(), "%Y%m%dT%H%M") saveRDS(hist, file = sprintf("%s_%s.rds", sim.name, datetime)) # for GGPlot2, as factors for grouping hist$repetition <- factor(hist$repetition) for (response in c("loss", "mse", "dist", "dist.alpha", "dist.beta")) { for (fun in c("all", "mean", "median")) { title <- paste(fun, paste(c("n", "p", "q", "k", "r"), c(n, p, q, k, r), sep = "=", collapse = ", ")) print(plot.hist2(hist, response, fun, title = title) + coord_trans(x = "log1p")) dev.print(png, file = sprintf("%s_%s_%s_%s.png", sim.name, datetime, response, fun), width = 768, height = 768, res = 125) if (response != "loss") { print(plot.hist2(hist, response, fun, title = title) + coord_trans(x = "log1p", y = "log1p")) dev.print(png, file = sprintf("%s_%s_%s_%s_log.png", sim.name, datetime, response, fun), width = 768, height = 768, res = 125) } } } stats <- local({ # final result from history sub <- na.omit(hist) sub <- aggregate(sub, list( method = sub$method, init = sub$init, repetition = sub$repetition ), tail, 1) # aggregate statistics over repetitions stats.mean <- aggregate(subset(sub, select = c("loss", "mse", "dist.alpha", "dist.beta")), list(method = sub$method, init = sub$init), mean) stats.sd <- aggregate(subset(sub, select = c("loss", "mse", "dist.alpha", "dist.beta")), list(method = sub$method, init = sub$init), sd) # merge mean and sd stats together merge(stats.mean, stats.sd, by = c("method", "init"), suffixes = c(".mean", ".sd")) }) print(stats, digits = 2) # method init loss.mean mse.mean dist.alpha.mean dist.beta.mean loss.sd mse.sd dist.alpha.sd dist.beta.sd # 1 approx ls 5457 0.99 0.033 0.030 163 0.025 0.017 0.012 # 2 approx vlp 6819 3.99 0.267 0.287 1995 12.256 0.448 0.457 # 3 base vlp -2642 1.82 0.248 0.271 1594 2.714 0.447 0.458 # 4 momentum ls -3479 0.99 0.037 0.035 95 0.025 0.017 0.015 # 5 momentum vlp -2704 1.78 0.233 0.260 1452 2.658 0.438 0.448 # 6 new ls -3479 0.99 0.037 0.035 95 0.025 0.017 0.015 # 7 new vlp -2704 1.78 0.233 0.260 1452 2.658 0.438 0.448 ################################################################################ ### Sim 3 ### ################################################################################ n <- 200 p <- c(7, 11, 5) # response dimensions (order 3) q <- c(3, 6, 2) # predictor dimensions (order 3) # currently only kpir.ls suppoert higher orders (order > 2) sim3 <- function(X, Fy, alphas.true, max.iter = 500L) { # Logger creator logger <- function(name) { eval(substitute(function(iter, loss, alpha, beta, ...) { hist[iter + 1L, ] <<- c( iter = iter, loss = loss, mse = (mse <- mean((X - mlm(Fy, alpha, beta, modes = 3:2))^2)), (dist <- unlist(Map(dist.subspace, alphas, alphas.true))) ) cat(sprintf( "%s(%3d) | loss: %-12.4f - mse: %-12.4f - sum(dist): %-.4e\n", name, iter, loss, sum(dist) )) }, list(hist = as.symbol(paste0("hist.", name))))) } # Initialize logger history targets hist.ls <- do.call(data.frame, c(list( iter = seq(0, r), loss = NA, mse = NA), dist = rep(NA, length(dim(X)) - 1L) )) # Approximated MLE with Nesterov Momentum kpir.ls(X, Fy, sample.mode = 1L, max.iter = max.iter, logger = logger("ls")) # Add method tags hist.ls$method <- factor("ls") # # Combine results and return # rbind(hist.base, hist.new, hist.momentum, hist.approx, hist.ls) hist.ls } sample.data3 <- function(n, p, q) { stopifnot(length(p) == length(q)) stopifnot(all(q <= p)) Deltas <- Map(function(nrow) { }, p) list(X, Fy, alphas, Deltas) } ################################################################################ ### WIP ### ################################################################################ n <- 200 # Sample Size p <- 11 # sample(1:15, 1) q <- 3 # sample(1:15, 1) k <- 7 # sample(1:15, 1) r <- 5 # sample(1:15, 1) print(c(n, p, q, k, r)) alpha.true <- alpha <- matrix(rnorm(q * r), q, r) beta.true <- beta <- matrix(rnorm(p * k), p, k) y <- rnorm(n) Fy <- do.call(cbind, Map(function(slope, offset) { sin(slope * y + offset) }, head(rep(seq(1, ceiling(0.5 * k * r)), each = 2), k * r), head(rep(c(0, pi / 2), ceiling(0.5 * k * r)), k * r) )) X <- tcrossprod(Fy, kronecker(alpha, beta)) + CVarE:::rmvnorm(n, sigma = Delta) Delta.1 <- sqrt(0.5)^abs(outer(seq_len(q), seq_len(q), `-`)) Delta.2 <- sqrt(0.5)^abs(outer(seq_len(p), seq_len(p), `-`)) Delta <- kronecker(Delta.1, Delta.2) shape <- c(p, q, k, r) # Base (old) Rprof(fit.base <- kpir.base(X, Fy, shape, max.iter = 500, logger = prog(500))) # New (simple Gradient Descent) Rprof(fit.new <- kpir.new(X, Fy, shape, max.iter = 500, logger = prog(500))) # Gradient Descent with Nesterov Momentum Rprof(fit.momentum <- kpir.momentum(X, Fy, shape, max.iter = 500, logger = prog(500))) # # Residual Covariance Kronecker product assumpton version # Rprof(fit.kron <- kpir.kron(X, Fy, shape, max.iter = 500, logger = prog(500))) # Approximated MLE with Nesterov Momentum Rprof("kpir.approx.Rprof") fit.approx <- kpir.approx(X, Fy, shape, max.iter = 500, logger = prog(500)) summaryRprof("kpir.approx.Rprof") par(mfrow = c(2, 2)) matrixImage(Delta, main = expression(Delta)) matrixImage(fit.base$Delta, main = expression(hat(Delta)), sub = "base") matrixImage(fit.momentum$Delta, main = expression(hat(Delta)), sub = "momentum") matrixImage(kronecker(fit.approx$Delta.1, fit.approx$Delta.2), main = expression(hat(Delta)), sub = "approx") par(mfrow = c(2, 2)) matrixImage(Delta.1, main = expression(Delta[1])) matrixImage(fit.approx$Delta.1, main = expression(hat(Delta)[1]), sub = "approx") matrixImage(Delta.2, main = expression(Delta[2])) matrixImage(fit.approx$Delta.2, main = expression(hat(Delta)[2]), sub = "approx") par(mfrow = c(2, 2)) matrixImage(alpha.true, main = expression(alpha)) matrixImage(fit.base$alpha, main = expression(hat(alpha)), sub = "base") matrixImage(fit.momentum$alpha, main = expression(hat(alpha)), sub = "momentum") matrixImage(fit.approx$alpha, main = expression(hat(alpha)), sub = "approx") par(mfrow = c(2, 2)) matrixImage(beta.true, main = expression(beta)) matrixImage(fit.base$beta, main = expression(hat(beta)), sub = "base") matrixImage(fit.momentum$beta, main = expression(hat(beta)), sub = "momentum") matrixImage(fit.approx$beta, main = expression(hat(beta)), sub = "approx") ################################################################################ ### EEG ### ################################################################################ suppressPackageStartupMessages({ library(pROC) }) # acc: Accuracy. P(Yhat = Y). Estimated as: (TP+TN)/(P+N). acc <- function(y_true, y_pred) mean(round(y_pred) == y_true) # err: Error rate. P(Yhat != Y). Estimated as: (FP+FN)/(P+N). err <- function(y_true, y_pred) mean(round(y_pred) != y_true) # fpr: False positive rate. P(Yhat = + | Y = -). aliases: Fallout. fpr <- function(y_true, y_pred) mean((round(y_pred) == 1)[y_true == 0]) # tpr: True positive rate. P(Yhat = + | Y = +). aliases: Sensitivity, Recall. tpr <- function(y_true, y_pred) mean((round(y_pred) == 1)[y_true == 1]) # fnr: False negative rate. P(Yhat = - | Y = +). aliases: Miss. fnr <- function(y_true, y_pred) mean((round(y_pred) == 0)[y_true == 1]) # tnr: True negative rate. P(Yhat = - | Y = -). tnr <- function(y_true, y_pred) mean((round(y_pred) == 0)[y_true == 0]) # Load EEG dataset dataset <- readRDS('eeg_analysis/eeg_data.rds') eeg_cross_validation <- function(nrFolds = 10L) { # Set dimenional parameters. n <- nrow(dataset) # sample size (nr. of people) p <- 64L # nr. of predictors (count of sensorce) t <- 256L # nr. of time points (measurements) # Extract dimension names from X. nNames <- dataset$PersonID tNames <- as.character(seq(t)) pNames <- unlist(strsplit(colnames(dataset)[2 + t * seq(p)], '_'))[c(T, F)] # Split into X-y. X <- as.matrix(dataset[, -(1:2)]) y <- dataset$Case_Control # Reshape X as 3D tenros of shape (n, t, p) aka. samples, timesteps, predictors. # (Each of the n rows in X iterate over the time bevore switching sensorce.) dim(X) <- c(n, t, p) dimnames(X) <- list(nNames, tNames, pNames) # Setup Cross-Validation result CV <- data.frame( fold = (seq_len(n) %% nrFolds) + 1L, y_true = y, y_pred = NA ) # } #' @param ppc Number of "p"redictor "p"rincipal "c"omponents. #' @param tpc Number of "t"ime "p"rincipal "c"omponents. egg_analysis_reduced <- function(methods, ppc, tpc) { # Set dimenional parameters. n <- nrow(dataset) # sample size (nr. of people) p <- 64L # nr. of predictors (count of sensorce) t <- 256L # nr. of time points (measurements) # Extract dimension names from X. nNames <- dataset$PersonID tNames <- as.character(seq(t)) pNames <- unlist(strsplit(colnames(dataset)[2 + t * seq(p)], '_'))[c(T, F)] # Split into X-y. X <- as.matrix(dataset[, -(1:2)]) y <- dataset$Case_Control # Reshape X as 3D tenros of shape (n, t, p) aka. samples, timesteps, predictors. # (Each of the n rows in X iterate over the time bevore switching sensorce.) X <- array(X, dim = c(n, t, p), dimnames = list(nNames, tNames, pNames)) # Reorder axis to (p, t, n) = (predictors, timesteps, samples). X <- aperm(X, c(3, 2, 1)) # Compute Mean of X. X_mean <- apply(X, c(1, 2), mean) X_center <- X - as.vector(X_mean) # Compute "left" and "right" cov-matrices. Sigma_t <- matrix(apply(apply(X_center, 3, crossprod), 1, mean), t, t) Sigma_p <- matrix(apply(apply(X_center, 3, tcrossprod), 1, mean), p, p) # Get "left", "right" principal components. V_p <- svd(Sigma_p, ppc, 0L)$u V_t <- svd(Sigma_t, tpc, 0L)$u # Reduce dimension. X_reduced <- apply(X_center, 3, function(x) crossprod(V_p, x %*% V_t)) dim(X_reduced) <- c(ppc, tpc, n) # Vectorize to shape of (predictors * timesteps, samples) and transpose to # (samples, predictors * timesteps). X_vec <- t(matrix(X_reduced, ppc * tpc, n)) loo.cv <- expand.grid(method = names(methods), fold = 1:n) loo.cv$y_true <- y[loo.cv$fold] loo.cv$y_pred <- NA # Performe LOO cross-validation for each method. for (i in 1L:n) { # Print progress. cat(sprintf("\rCross-Validation (p-PC: %d, t-PC: %d): %4d/%d", ppc, tpc, i, n)) # Leave Out the i-th element. X_train <- X_vec[-i, ] X_test <- X_vec[i, ] y_train <- y[-i] # Center y. y_train <- scale(y_train, center = TRUE, scale = FALSE) # For each method. for (method.name in names(methods)) { method <- methods[[method.name]] # Compute reduction using current method under common API. sdr <- method(X_train, y_train, ppc, tpc) B <- kronecker(sdr$alpha, sdr$beta) # Fit a linear model (which ensures a common sdr direction if possible). model <- glm(y ~ x, family = binomial(link = "logit"), data = data.frame(y = y[-i], x = X_train %*% B)) # Predict out of sample and store in LOO CV data.frame. y_pred <- predict(model, data.frame(x = X_test %*% B), type = "response") loo.cv[loo.cv$method == method.name & loo.cv$fold == i, 'y_pred'] <- y_pred } } for (method.name in names(methods)) { labels <- loo.cv[loo.cv$method == method.name, 'y_true'] predictions <- loo.cv[loo.cv$method == method.name, 'y_pred'] ROC <- roc(unlist(labels), unlist(predictions), quiet = TRUE) # Combined accuracy, error, ... cat("\nMethod: ", method.name, "\n", "acc: ", acc(unlist(labels), unlist(predictions)), "\n", "err: ", err(unlist(labels), unlist(predictions)), "\n", "fpr: ", fpr(unlist(labels), unlist(predictions)), "\n", "tpr: ", tpr(unlist(labels), unlist(predictions)), "\n", "fnr: ", fnr(unlist(labels), unlist(predictions)), "\n", "tnr: ", tnr(unlist(labels), unlist(predictions)), "\n", "auc: ", ROC$auc, "\n", "auc sd: ", sqrt(var(ROC)), "\n", sep = '') } loo.cv } methods <- list( KPIR_LS = function(...) tensor_predictor(..., method = "KPIR_LS"), KPIR_MLE = function(...) tensor_predictor(..., method = "KPIR_MLE"), KPFC1 = function(...) tensor_predictor(..., method = "KPFC1"), KPFC2 = function(...) tensor_predictor(..., method = "KPFC2"), LSIR = LSIR ) # ppc, tpc # ------------ params <- list( c( 4, 3) , c( 15, 15) , c( 30, 20) ) for (param in params) { c(ppc, tpc) %<-% param sim <- egg_analysis_reduced(methods, ppc, tpc) attr(sim, 'param') <- c(ppc = ppc, tpc = tpc) saveRDS(sim, file = sprintf('eeg_analysis_reduced_%d_%d.rds', ppc, tpc)) } # plot.hist(hist, "loss", # title = bquote(paste("Optimization Objective: negative log-likelihood ", # l(hat(alpha), hat(beta)))), # subtitle = bquote(paste(Delta[i][j] == 0.25, " * ", 0.5^abs(i - j), ", ", # "20 repetitions, ", n == .(n), ", ", # p == .(p), ", ", q == .(q), ", ", k == .(k), ", ", r == .(r))), # x = "nr. of iterations", # y = expression(l(hat(alpha), hat(beta))) # ) # plot.stats(hist, "loss", # title = bquote(paste("Optimization Objective: negative log-likelihood ", # l(hat(alpha), hat(beta)))), # subtitle = bquote(paste(Delta[i][j] == 0.25, " * ", 0.5^abs(i - j), ", ", # "20 repetitions, ", n == .(n), ", ", # p == .(p), ", ", q == .(q), ", ", k == .(k), ", ", r == .(r))), # x = "nr. of iterations", # y = expression(l(hat(alpha), hat(beta))) # ) # dev.print(png, file = sprintf("sim01_loss_stat_%s.png", datetime), # width = 768, height = 768, res = 125) # ggplot(hist, aes(x = iter, color = method, group = interaction(method, repetition))) + # geom_line(aes(y = dist)) + # geom_point(data = with(sub <- subset(hist, !is.na(dist)), # aggregate(sub, list(method, repetition), tail, 1) # ), aes(y = dist)) + # labs( # title = bquote(paste("Distance of estimate ", hat(B), " to true ", B == alpha %*% beta)), # subtitle = bquote(paste(Delta[i][j] == 0.25, " * ", 0.5^abs(i - j), ", ", # "20 repetitions, ", n == .(n), ", ", # p == .(p), ", ", q == .(q), ", ", k == .(k), ", ", r == .(r))), # x = "nr. of iterations", # y = expression(abs(B * B^T - hat(B) * hat(B)^T)), # color = "method" # ) + # theme(legend.position = "bottom") # dev.print(png, file = sprintf("sim01_dist_%s.png", datetime), # width = 768, height = 768, res = 125) # ggplot(hist, aes(x = iter, y = dist, color = method, group = method)) + # geom_ribbon(aes(color = NULL, fill = method), alpha = 0.2, # stat = "summary", fun.min = "min", fun.max = "max", na.rm = TRUE) + # geom_ribbon(aes(color = NULL, fill = method), alpha = 0.4, # stat = "summary", fun.min = function(y) quantile(y, 0.25), # fun.max = function(y) quantile(y, 0.75), na.rm = TRUE) + # geom_line(stat = "summary", fun = "mean", na.rm = TRUE) + # labs( # title = bquote(paste("Distance of estimate ", hat(B), " to true ", B == alpha %*% beta)), # subtitle = bquote(paste(Delta[i][j] == 0.25, " * ", 0.5^abs(i - j), ", ", # "20 repetitions, ", n == .(n), ", ", # p == .(p), ", ", q == .(q), ", ", k == .(k), ", ", r == .(r))), # x = "nr. of iterations", # y = expression(abs(B * B^T - hat(B) * hat(B)^T)), # color = "method" # ) + # theme(legend.position = "bottom") # dev.print(png, file = sprintf("sim01_dist_stat_%s.png", datetime), # width = 768, height = 768, res = 125) # ggplot(hist, aes(x = iter, color = method, group = interaction(method, repetition))) + # geom_line(aes(y = dist.alpha)) + # geom_point(data = with(sub <- subset(hist, !is.na(dist.alpha)), # aggregate(sub, list(method, repetition), tail, 1) # ), aes(y = dist.alpha)) + # labs( # title = bquote(paste("Distance of estimate ", hat(alpha), " to true ", alpha)), # subtitle = bquote(paste(Delta[i][j] == 0.25, " * ", 0.5^abs(i - j), ", ", # "20 repetitions, ", n == .(n), ", ", # p == .(p), ", ", q == .(q), ", ", k == .(k), ", ", r == .(r))), # x = "nr. of iterations", # y = expression(abs(alpha * alpha^T - hat(alpha) * hat(alpha)^T)), # color = "method" # ) + # theme(legend.position = "bottom") # dev.print(png, file = sprintf("sim01_dist_alpha_%s.png", datetime), # width = 768, height = 768, res = 125) # ggplot(hist, aes(x = iter, color = method, group = interaction(method, repetition))) + # geom_line(aes(y = dist.beta)) + # geom_point(data = with(sub <- subset(hist, !is.na(dist.beta)), # aggregate(sub, list(method, repetition), tail, 1) # ), aes(y = dist.beta)) + # labs( # title = bquote(paste("Distance of estimate ", hat(beta), " to true ", beta)), # subtitle = bquote(paste(Delta[i][j] == 0.25, " * ", 0.5^abs(i - j), ", ", # "20 repetitions, ", n == .(n), ", ", # p == .(p), ", ", q == .(q), ", ", k == .(k), ", ", r == .(r))), # x = "nr. of iterations", # y = expression(abs(beta * beta^T - hat(beta) * hat(beta)^T)), # color = "method" # ) + # theme(legend.position = "bottom") # dev.print(png, file = sprintf("sim01_dist_beta_%s.png", datetime), # width = 768, height = 768, res = 125) # ggplot(hist, aes(x = iter, color = method, group = interaction(method, repetition))) + # geom_line(aes(y = norm.alpha)) + # geom_point(data = with(sub <- subset(hist, !is.na(norm.alpha)), # aggregate(sub, list(method, repetition), tail, 1) # ), aes(y = norm.alpha)) + # labs( # title = expression(paste("Norm of ", hat(alpha))), # subtitle = bquote(paste(Delta[i][j] == 0.25, " * ", 0.5^abs(i - j), ", ", # "20 repetitions, ", n == .(n), ", ", # p == .(p), ", ", q == .(q), ", ", k == .(k), ", ", r == .(r))), # x = "nr. of iterations", # y = expression(abs(hat(alpha))[F]), # color = "method" # ) + # theme(legend.position = "bottom") # dev.print(png, file = sprintf("sim01_norm_alpha_%s.png", datetime), # width = 768, height = 768, res = 125) # ggplot(hist, aes(x = iter, color = method, group = interaction(method, repetition))) + # geom_line(aes(y = norm.beta)) + # geom_point(data = with(sub <- subset(hist, !is.na(norm.beta)), # aggregate(sub, list(method, repetition), tail, 1) # ), aes(y = norm.beta)) + # labs( # title = expression(paste("Norm of ", hat(beta))), # subtitle = bquote(paste(Delta[i][j] == 0.25, " * ", 0.5^abs(i - j), ", ", # "20 repetitions, ", n == .(n), ", ", # p == .(p), ", ", q == .(q), ", ", k == .(k), ", ", r == .(r))), # x = "nr. of iterations", # y = expression(abs(hat(beta))[F]), # color = "method" # ) + # theme(legend.position = "bottom") # dev.print(png, file = sprintf("sim01_norm_beta_%s.png", datetime), # width = 768, height = 768, res = 125) # local({ # par(mfrow = c(2, 3), oma = c(2, 1, 1, 1), mar = c(3.1, 2.1, 2.1, 1.1), lwd = 2) # plot(c(1, max.iter), range(c(hist.base$loss, hist.new$loss, hist.momentum$loss, hist.kron$loss), na.rm = TRUE), # type = "n", log = "x", main = "loss") # lines( hist.base$loss, col = 2) # lines( hist.new$loss, col = 3) # lines(hist.momentum$loss, col = 4) # lines( hist.kron$loss, col = 5) # yrange <- range(c(hist.base$step.size, hist.new$step.size, hist.momentum$step.size, hist.kron$step.size), # na.rm = TRUE) # plot(c(1, max.iter), yrange, # type = "n", log = "x", main = "step.size") # lines( hist.base$step.size, col = 2) # lines( hist.new$step.size, col = 3) # lines(hist.momentum$step.size, col = 4) # lines( hist.kron$step.size, col = 5) # # lines( hist.base$step.size, col = 4) # there is no step.size # plot(0, 0, type = "l", bty = "n", xaxt = "n", yaxt = "n") # legend("topleft", legend = c("Base", "GD", "GD + Momentum", "Kron + GD + Momentum"), col = 2:5, # lwd = par("lwd"), xpd = TRUE, horiz = FALSE, cex = 1.2, bty = "n", # x.intersp = 1, y.intersp = 1.5) # # xpd = TRUE makes the legend plot to the figure # plot(c(1, max.iter), range(c(hist.base$dist, hist.new$dist, hist.momentum$dist, hist.kron$dist), na.rm = TRUE), # type = "n", log = "x", main = "dist") # lines( hist.base$dist, col = 2) # lines( hist.new$dist, col = 3) # lines(hist.momentum$dist, col = 4) # lines( hist.kron$dist, col = 5) # plot(c(1, max.iter), range(c(hist.base$dist.alpha, hist.new$dist.alpha, hist.momentum$dist.alpha, hist.kron$dist.alpha), na.rm = TRUE), # type = "n", log = "x", main = "dist.alpha") # lines( hist.base$dist.alpha, col = 2) # lines( hist.new$dist.alpha, col = 3) # lines(hist.momentum$dist.alpha, col = 4) # lines( hist.kron$dist.alpha, col = 5) # plot(c(1, max.iter), range(c(hist.base$dist.beta, hist.new$dist.beta, hist.momentum$dist.beta, hist.kron$dist.beta), na.rm = TRUE), # type = "n", log = "x", main = "dist.beta") # lines( hist.base$dist.beta, col = 2) # lines( hist.new$dist.beta, col = 3) # lines(hist.momentum$dist.beta, col = 4) # lines( hist.kron$dist.beta, col = 5) # # par(fig = c(0, 1, 0, 1), oma = c(0, 0, 0, 0), mar = c(0, 0, 0, 0), new = TRUE) # # plot(0, 0, type = 'l', bty = 'n', xaxt = 'n', yaxt = 'n') # # legend('bottom', legend = c('GD', 'GD + Nesterov Momentum', 'Alternating'), col = 2:4, # # lwd = 5, xpd = TRUE, horiz = TRUE, cex = 1, seg.len = 1, bty = 'n') # # # xpd = TRUE makes the legend plot to the figure # }) # dev.print(png, file = "loss.png", width = 768, height = 768, res = 125) # with(list(a1 = alpha, a2 = fit.base$alpha, a3 = fit.new$alpha, a4 = fit.momentum$alpha, # b1 = beta, b2 = fit.base$beta, b3 = fit.new$beta, b4 = fit.momentum$beta), { # par(mfrow = c(2, 4)) # a2 <- sign(sum(sign(a1 * a2))) * a2 # a3 <- sign(sum(sign(a1 * a3))) * a3 # a4 <- sign(sum(sign(a1 * a4))) * a4 # b2 <- sign(sum(sign(b1 * b2))) * b2 # b3 <- sign(sum(sign(b1 * b3))) * b3 # b4 <- sign(sum(sign(b1 * b4))) * b4 # matrixImage(a1, main = expression(alpha)) # matrixImage(a2, main = expression(paste(hat(alpha)["Base"]))) # matrixImage(a3, main = expression(paste(hat(alpha)["GD"]))) # matrixImage(a4, main = expression(paste(hat(alpha)["GD+Nest"]))) # matrixImage(b1, main = expression(beta)) # matrixImage(b2, main = expression(paste(hat(beta)["Base"]))) # matrixImage(b3, main = expression(paste(hat(beta)["GD"]))) # matrixImage(b4, main = expression(paste(hat(beta)["GD+Nest"]))) # }) # dev.print(png, file = "estimates.png", width = 768, height = 768, res = 125) # with(list(d1 = Delta, d2 = fit.base$Delta, d3 = fit.new$Delta, d4 = fit.momentum$Delta), { # par(mfrow = c(2, 2)) # matrixImage(d1, main = expression(Delta)) # matrixImage(d2, main = expression(hat(Delta)["Base"])) # matrixImage(d3, main = expression(hat(Delta)["GD"])) # matrixImage(d4, main = expression(hat(Delta)["GD+Nest"])) # }) # dev.print(png, file = "Delta.png", width = 768, height = 768, res = 125) # with(list(a1 = alpha, a2 = fit.base$alpha, a3 = fit.new$alpha, a4 = fit.momentum$alpha, # b1 = beta, b2 = fit.base$beta, b3 = fit.new$beta, b4 = fit.momentum$beta), { # par(mfrow = c(2, 2)) # matrixImage(kronecker(a1, b1), main = expression(B)) # matrixImage(kronecker(a2, b2), main = expression(hat(B)["Base"])) # matrixImage(kronecker(a3, b3), main = expression(hat(B)["GD"])) # matrixImage(kronecker(a4, b4), main = expression(hat(B)["GD+Nest"])) # }) # dev.print(png, file = "B.png", width = 768, height = 768, res = 125) # with(list(a1 = alpha, a2 = fit.base$alpha, a3 = fit.new$alpha, a4 = fit.momentum$alpha, # b1 = beta, b2 = fit.base$beta, b3 = fit.new$beta, b4 = fit.momentum$beta), { # par(mfrow = c(3, 1), lwd = 1) # d2 <- kronecker(a1, b1) - kronecker(a2, b2) # d3 <- kronecker(a1, b1) - kronecker(a3, b3) # d4 <- kronecker(a1, b1) - kronecker(a4, b4) # xlim <- c(-1, 1) * max(abs(c(d2, d3, d4))) # breaks <- seq(xlim[1], xlim[2], len = 41) # hist(d2, main = expression(paste(base, (B - hat(B))[i])), # breaks = breaks, xlim = xlim, freq = FALSE) # lines(density(d2), col = 2) # abline(v = range(d2), lty = 2) # hist(d3, main = expression(paste(GD, (B - hat(B))[i])), # breaks = breaks, xlim = xlim, freq = FALSE) # lines(density(d3), col = 3) # abline(v = range(d3), lty = 2) # hist(d4, main = expression(paste(GD + Nest, (B - hat(B))[i])), # breaks = breaks, xlim = xlim, freq = FALSE) # lines(density(d4), col = 4) # abline(v = range(d4), lty = 2) # }) # dev.print(png, file = "hist.png", width = 768, height = 768, res = 125) # options(width = 300) # print(pr <- prof.tree::prof.tree("./Rprof.out"), limit = NULL # , pruneFun = function(x) x$percent > 0.01) # par(mfrow = c(2, 2)) # matrixImage(alpha, main = "alpha") # matrixImage(fit$alpha, main = "fit$alpha") # matrixImage(beta, main = "beta") # matrixImage(fit$beta, main = "fit$beta") # if (diff(dim(alpha) * dim(beta)) > 0) { # par(mfrow = c(2, 1)) # } else { # par(mfrow = c(1, 2)) # } # matrixImage(kronecker(alpha, beta), main = "kronecker(alpha, beta)") # matrixImage(kronecker(fit$alpha, fit$beta), main = "kronecker(fit$alpha, fit$beta)") # matrixImage(Delta, main = "Delta") # matrixImage(fit$Delta, main = "fit$Delta") # local({ # a <- (-1 * (sum(sign(fit$alpha) * sign(alpha)) < 0)) * fit$alpha / mean(fit$alpha^2) # b <- alpha / mean(alpha^2) # norm(a - b, "F") # }) # local({ # a <- (-1 * (sum(sign(fit$beta) * sign(beta)) < 0)) * fit$beta / mean(fit$beta^2) # b <- beta / mean(beta^2) # norm(a - b, "F") # }) # # Which Sequence? # x <- y <- 1 # replicate(40, x <<- (y <<- x + y) - x) # # Face-Splitting Product # n <- 100 # p <- 3 # q <- 500 # A <- matrix(rnorm(n * p), n) # B <- matrix(rnorm(n * q), n) # faceSplit <- function(A, B) { # C <- vapply(seq_len(ncol(A)), function(i) A[, i] * B, B) # dim(C) <- c(nrow(A), ncol(A) * ncol(B)) # C # } # all.equal( # tkhatriRao(A, B), # faceSplit(A, B) # ) # microbenchmark::microbenchmark( # tkhatriRao(A, B), # faceSplit(A, B) # ) # dist.kron <- function(a0, b0, a1, b1) { # sqrt(sum(a0^2) * sum(b0^2) - # 2 * sum(diag(crossprod(a0, a1))) * sum(diag(crossprod(b0, b1))) + # sum(a1^2) * sum(b1^2)) # } # alpha <- matrix(rnorm(q * r), q) # beta <- matrix(rnorm(p * k), p) # alpha.true <- matrix(rnorm(q * r), q) # beta.true <- matrix(rnorm(p * k), p) # all.equal( # dist.kron(alpha, beta, alpha.true, beta.true), # norm(kronecker(alpha, beta) - kronecker(alpha.true, beta.true), "F") # ) # A <- matrix(rnorm(p^2), p) # B <- matrix(rnorm(p^2), p) # tr <- function(A) sum(diag(A)) # tr(crossprod(A, B)) # tr(tcrossprod(B, A)) # tr(crossprod(A, A)) # tr(tcrossprod(A, A)) # sum(A^2) # alpha <- matrix(rnorm(q * r), q) # beta <- matrix(rnorm(p * k), p) # norm(kronecker(alpha, beta), "F")^2 # norm(alpha, "F")^2 * norm(beta, "F")^2 # tr(crossprod(kronecker(alpha, beta))) # tr(tcrossprod(kronecker(alpha, beta))) # tr(crossprod(kronecker(t(alpha), t(beta)))) # tr(crossprod(alpha)) * tr(crossprod(beta)) # tr(tcrossprod(alpha)) * tr(tcrossprod(beta)) # tr(crossprod(alpha)) * tr(tcrossprod(beta)) # sum(alpha^2) * sum(beta^2) # alpha <- matrix(rnorm(q * r), q) # beta <- matrix(rnorm(p * k), p) # alpha.true <- matrix(rnorm(q * r), q) # beta.true <- matrix(rnorm(p * k), p) # microbenchmark::microbenchmark( # norm(kronecker(alpha, beta), "F")^2, # norm(alpha, "F")^2 * norm(beta, "F")^2, # tr(crossprod(kronecker(alpha, beta))), # tr(tcrossprod(kronecker(alpha, beta))), # tr(crossprod(kronecker(t(alpha), t(beta)))), # tr(crossprod(alpha)) * tr(crossprod(beta)), # tr(tcrossprod(alpha)) * tr(tcrossprod(beta)), # tr(crossprod(alpha)) * tr(tcrossprod(beta)), # sum(alpha^2) * sum(beta^2), # setup = { # p <- sample(1:10, 1) # q <- sample(1:10, 1) # k <- sample(1:10, 1) # r <- sample(1:10, 1) # assign("alpha", matrix(rnorm(q * r), q), .GlobalEnv) # assign("beta", matrix(rnorm(p * k), p), .GlobalEnv) # assign("alpha.true", matrix(rnorm(q * r), q), .GlobalEnv) # assign("beta.true", matrix(rnorm(p * k), p), .GlobalEnv) # } # ) # p <- sample(1:15, 1) # 11 # q <- sample(1:15, 1) # 3 # k <- sample(1:15, 1) # 7 # r <- sample(1:15, 1) # 5 # A <- matrix(rnorm(q * r), q) # B <- matrix(rnorm(p * k), p) # a <- matrix(rnorm(q * r), q) # b <- matrix(rnorm(p * k), p) # all.equal( # kronecker(A + a, B + b), # kronecker(A, B) + kronecker(A, b) + kronecker(a, B) + kronecker(a, b) # ) # p <- 200L # n <- 100L # R <- matrix(rnorm(n * p), n) # A <- matrix(rnorm(p^2), p) # Base Matrix # B <- A + 0.01 * matrix(rnorm(p^2), p) # Distortion / Update of A # A.inv <- solve(A) # microbenchmark::microbenchmark( # solve = R %*% solve(B), # neumann.raw = R %*% (A.inv + A.inv %*% (A - B) %*% A.inv + A.inv %*% (A - B) %*% A.inv %*% (A - B) %*% A.inv), # neumann.fun = { # AD <- A.inv %*% (A - B) # res <- A.inv + AD %*% A.inv # res <- A.inv + AD %*% res # R %*% res # } # ) # all.equal( # A.inv + A.inv %*% (A - B) %*% A.inv + A.inv %*% (A - B) %*% A.inv %*% (A - B) %*% A.inv, # { # DA <- (A - B) %*% A.inv # res <- A.inv + A.inv %*% DA # res <- A.inv + res %*% DA # res # } # ) # all.equal( # A.inv + A.inv %*% (A - B) %*% A.inv + A.inv %*% (A - B) %*% A.inv %*% (A - B) %*% A.inv, # { # AD <- A.inv %*% (A - B) # res <- A.inv + AD %*% A.inv # res <- A.inv + AD %*% res # res # } # ) # ##### # sym <- function(A) A + t(A) # n <- 101 # p <- 7 # q <- 11 # r <- 3 # k <- 5 # R <- array(rnorm(n * p * q), dim = c(n = n, p = p, q = q)) # F <- array(rnorm(n * k * r), dim = c(n = n, k = k, r = r)) # alpha <- array(rnorm(q * r), dim = c(q = q, r = r)) # beta <- array(rnorm(p * k), dim = c(p = p, k = k)) # Delta.1 <- sym(matrix(rnorm(q * q), q, q)) # dim(Delta.1) <- c(q = q, q = q) # Delta.2 <- sym(matrix(rnorm(p * p), p, p)) # dim(Delta.2) <- c(p = p, p = p) # Delta <- kronecker(Delta.1, Delta.2) # grad.alpha.1 <- local({ # Di.1 <- solve(Delta.1) # Di.2 <- solve(Delta.2) # .R <- sapply(seq_len(n), function(i) tcrossprod(Di.1, R[i, , ]) %*% Di.2) # dim(.R) <- c(q, p, n) # .F <- sapply(seq_len(n), function(i) beta %*% F[i, , ]) # dim(.F) <- c(p, r, n) # .C <- sapply(seq_len(n), function(i) .R[i, , ] %*% .F[i, , ]) # dim(.C) <- c(n, q, r) # colSums(.C) # }) # grad.alpha.2 <- local({ # # Delta.1^-1 R' Delta.2^-1 # .R <- aperm(R, c(2, 1, 3)) # dim(.R) <- c(q, n * p) # .R <- solve(Delta.1) %*% .R # dim(.R) <- c(q, n, p) # .R <- aperm(.R, c(3, 2, 1)) # dim(.R) <- c(p, n * q) # .R <- solve(Delta.2) %*% .R # dim(.R) <- c(p, n, q) # .R <- aperm(.R, c(2, 3, 1)) # n x q x p # # beta F # .F <- aperm(F, c(2, 1, 3)) # dim(.F) <- c(k, n * r) # .F <- beta %*% .F # dim(.F) <- c(p, n, r) # .F <- aperm(.F, c(2, 1, 3)) # n x p x r # # (Delta.1^-1 R' Delta.2^-1) (beta F) # .R <- aperm(.R, c(1, 3, 2)) # dim(.R) <- c(n * p, q) # dim(.F) <- c(n * p, r) # crossprod(.R, .F) # }) # all.equal( # grad.alpha.1, # grad.alpha.2 # ) # all.equal({ # .R <- matrix(0, q, p) # Di.1 <- solve(Delta.1) # Di.2 <- solve(Delta.2) # for (i in 1:n) { # .R <- .R + Di.1 %*% t(R[i, , ]) %*% Di.2 # } # .R # }, { # .R <- R # Di.1 <- solve(Delta.1) # Di.2 <- solve(Delta.2) # .R <- sapply(seq_len(n), function(i) tcrossprod(Di.1, R[i, , ]) %*% Di.2) # dim(.R) <- c(q, p, n) # .R <- aperm(.R, c(3, 1, 2)) # colSums(.R) # }) # all.equal({ # Di.1 <- solve(Delta.1) # Di.2 <- solve(Delta.2) # .R <- sapply(seq_len(n), function(i) tcrossprod(Di.1, R[i, , ]) %*% Di.2) # dim(.R) <- c(q, p, n) # .R <- aperm(.R, c(3, 1, 2)) # .R # }, { # .R <- R # dim(.R) <- c(n * p, q) # .R <- .R %*% solve(Delta.1) # dim(.R) <- c(n, p, q) # .R <- aperm(.R, c(1, 3, 2)) # dim(.R) <- c(n * q, p) # .R <- .R %*% solve(Delta.2) # dim(.R) <- c(n, q, p) # .R # }) # all.equal({ # .F <- matrix(0, p, r) # for (i in 1:n) { # .F <- .F + beta %*% F[i, , ] # } # .F # }, { # .F <- apply(F, 1, function(Fi) beta %*% Fi) # dim(.F) <- c(p, r, n) # .F <- aperm(.F, c(3, 1, 2)) # colSums(.F) # }) # all.equal({ # .F <- apply(F, 1, function(Fi) beta %*% Fi) # dim(.F) <- c(p, r, n) # .F <- aperm(.F, c(3, 1, 2)) # colSums(.F) # }, { # .F <- aperm(F, c(1, 3, 2)) # dim(.F) <- c(n * r, k) # .F <- tcrossprod(.F, beta) # dim(.F) <- c(n, r, p) # t(colSums(.F)) # }) # all.equal({ # Di.1 <- solve(Delta.1) # Di.2 <- solve(Delta.2) # grad.alpha <- 0 # grad.beta <- 0 # dim(R) <- c(n, p, q) # dim(F) <- c(n, k, r) # for (i in 1:n) { # grad.alpha <- grad.alpha + ( # Di.1 %*% t(R[i, , ]) %*% Di.2 %*% beta %*% F[i, , ] # ) # grad.beta <- grad.beta + ( # Di.2 %*% R[i, , ] %*% Di.1 %*% alpha %*% t(F[i, , ]) # ) # } # g1 <- c(dim(grad.alpha), dim(grad.beta), grad.alpha, grad.beta) # }, { # # Note that the order is important since for grad.beta the residuals do NOT # # need to be transposes. # # left/right standardized residuals Delta_1^-1 R_i' Delta_2^-1 for i in 1:n # dim(R) <- c(n * p, q) # .R <- R %*% solve(Delta.1) # dim(.R) <- c(n, p, q) # .R <- aperm(.R, c(1, 3, 2)) # dim(.R) <- c(n * q, p) # .R <- .R %*% solve(Delta.2) # dim(.R) <- c(n, q, p) # # gradient with respect to beta # # Responces times beta (alpha f_i') # dim(F) <- c(n * k, r) # .F <- tcrossprod(F, alpha) # dim(.F) <- c(n, k, q) # .F <- aperm(.F, c(1, 3, 2)) # # Matricize # dim(.R) <- c(n * q, p) # dim(.F) <- c(n * q, k) # grad.beta <- crossprod(.R, .F) # # gradient with respect to beta # # Responces times alpha # dim(F) <- c(n, k, r) # .F <- aperm(F, c(1, 3, 2)) # dim(.F) <- c(n * r, k) # .F <- tcrossprod(.F, beta) # dim(.F) <- c(n, r, p) # .F <- aperm(.F, c(1, 3, 2)) # # Transpose stand. residuals # dim(.R) <- c(n, q, p) # .R <- aperm(.R, c(1, 3, 2)) # # Matricize # dim(.R) <- c(n * p, q) # dim(.F) <- c(n * p, r) # grad.alpha <- crossprod(.R, .F) # g2 <- c(dim(grad.alpha), dim(grad.beta), grad.alpha, grad.beta) # }) # microbenchmark::microbenchmark(R1 = { # Di.1 <- solve(Delta.1) # Di.2 <- solve(Delta.2) # grad.alpha <- 0 # matrix(0, q, r) # grad.beta <- 0 # matrix(0, p, k) # dim(R) <- c(n, p, q) # dim(F) <- c(n, k, r) # for (i in 1:n) { # grad.alpha <- grad.alpha + ( # Di.1 %*% t(R[i, , ]) %*% Di.2 %*% beta %*% F[i, , ] # ) # grad.beta <- grad.beta + ( # Di.2 %*% R[i, , ] %*% Di.1 %*% alpha %*% t(F[i, , ]) # ) # } # g1 <- c(dim(grad.alpha), dim(grad.beta), grad.alpha, grad.beta) # }, R3 = { # # Note that the order is important since for grad.beta the residuals do NOT # # need to be transposes. # # left/right standardized residuals Delta_1^-1 R_i' Delta_2^-1 for i in 1:n # dim(R) <- c(n * p, q) # .R <- R %*% solve(Delta.1) # dim(.R) <- c(n, p, q) # .R <- aperm(.R, c(1, 3, 2)) # dim(.R) <- c(n * q, p) # .R <- .R %*% solve(Delta.2) # dim(.R) <- c(n, q, p) # # gradient with respect to beta # # Responces times beta (alpha f_i') # dim(F) <- c(n * k, r) # .F <- tcrossprod(F, alpha) # dim(.F) <- c(n, k, q) # .F <- aperm(.F, c(1, 3, 2)) # # Matricize # dim(.R) <- c(n * q, p) # dim(.F) <- c(n * q, k) # grad.beta <- crossprod(.R, .F) # # gradient with respect to beta # # Responces times alpha # dim(F) <- c(n, k, r) # .F <- aperm(F, c(1, 3, 2)) # dim(.F) <- c(n * r, k) # .F <- tcrossprod(.F, beta) # dim(.F) <- c(n, r, p) # .F <- aperm(.F, c(1, 3, 2)) # # Transpose stand. residuals # dim(.R) <- c(n, q, p) # .R <- aperm(.R, c(1, 3, 2)) # # Matricize # dim(.R) <- c(n * p, q) # dim(.F) <- c(n * p, r) # grad.alpha <- crossprod(.R, .F) # g2 <- c(dim(grad.alpha), dim(grad.beta), grad.alpha, grad.beta) # }) # n <- 100 # p <- 7 # q <- 11 # k <- 3 # r <- 5 # X <- array(rnorm(n * p * q), dim = c(n = n, p = p, q = q)) # F <- array(rnorm(n * k * r), dim = c(n = n, k = k, r = r)) # alpha <- array(rnorm(q * r), dim = c(q = q, r = r)) # beta <- array(rnorm(p * k), dim = c(p = p, k = k)) # all.equal({ # R <- array(NA, dim = c(n, p, q)) # for (i in 1:n) { # R[i, , ] <- X[i, , ] - beta %*% F[i, , ] %*% t(alpha) # } # R # }, { # X - (F %x_3% alpha %x_2% beta) # }, check.attributes = FALSE) # microbenchmark::microbenchmark(base = { # R <- array(NA, dim = c(n, p, q)) # for (i in 1:n) { # R[i, , ] <- X[i, , ] - beta %*% F[i, , ] %*% t(alpha) # } # R # }, ttm = { # X - (F %x_3% alpha %x_2% beta) # }) # n <- 100; p <- 7; q <- 11; k <- 3; r <- 5 # sym <- function(x) t(x) + x # Di.1 <- sym(matrix(rnorm(q^2), q, q)) # Di.2 <- sym(matrix(rnorm(p^2), p, p)) # R <- array(rnorm(n, p, q), dim = c(n, p, q)) # F <- array(rnorm(n, k, r), dim = c(n, k, r)) # alpha <- matrix(rnorm(q * r), q, r) # beta <- matrix(rnorm(p * k), p, k) # all.equal({ # .R <- array(NA, dim = c(n, p, q)) # for (i in 1:n) { # .R[i, , ] <- Di.2 %*% R[i, , ] %*% Di.1 # } # .R # }, { # R %x_3% Di.1 %x_2% Di.2 # }) # all.equal({ # .Rt <- array(NA, dim = c(n, q, p)) # for (i in 1:n) { # .Rt[i, , ] <- Di.1 %*% t(R[i, , ]) %*% Di.2 # } # .Rt # }, { # .Rt <- R %x_3% Di.1 %x_2% Di.2 # aperm(.Rt, c(1, 3, 2)) # }) # all.equal({ # .Fa <- array(NA, dim = c(n, q, k)) # for (i in 1:n) { # .Fa[i, , ] <- alpha %*% t(F[i, , ]) # } # .Fa # }, { # aperm(F %x_3% alpha, c(1, 3, 2)) # }) # all.equal({ # .Fb <- array(NA, dim = c(n, p, r)) # for (i in 1:n) { # .Fb[i, , ] <- beta %*% F[i, , ] # } # .Fb # }, { # F %x_2% beta # }) # all.equal({ # .F <- array(NA, dim = c(n, p, q)) # for (i in 1:n) { # .F[i, , ] <- beta %*% F[i, , ] %*% t(alpha) # } # .F # }, { # F %x_3% alpha %x_2% beta # }) # all.equal({ # .Ga <- 0 # for (i in 1:n) { # .Ga <- .Ga + Di.1 %*% t(R[i, , ]) %*% Di.2 %*% beta %*% F[i, , ] # } # .Ga # }, { # .R <- R %x_3% Di.1 %x_2% Di.2 # dim(.R) <- c(n * p, q) # .Fb <- F %x_2% beta # dim(.Fb) <- c(n * p, r) # crossprod(.R, .Fb) # }) # all.equal({ # .Gb <- 0 # for (i in 1:n) { # .Gb <- .Gb + Di.2 %*% R[i, , ] %*% Di.1 %*% alpha %*% t(F[i, , ]) # } # .Gb # }, { # .Rt <- aperm(R %x_3% Di.1 %x_2% Di.2, c(1, 3, 2)) # dim(.Rt) <- c(n * q, p) # .Fa <- aperm(F %x_3% alpha, c(1, 3, 2)) # dim(.Fa) <- c(n * q, k) # crossprod(.Rt, .Fa) # }) # all.equal({ # .Ga <- 0 # for (i in 1:n) { # .Ga <- .Ga + Di.1 %*% t(R[i, , ]) %*% Di.2 %*% beta %*% F[i, , ] # } # .Gb <- 0 # for (i in 1:n) { # .Gb <- .Gb + Di.2 %*% R[i, , ] %*% Di.1 %*% alpha %*% t(F[i, , ]) # } # c(.Ga, .Gb) # }, { # .R <- R %x_3% Di.1 %x_2% Di.2 # .Fb <- F %x_2% beta # .Fa <- aperm(F %x_3% alpha, c(1, 3, 2)) # dim(.R) <- c(n * p, q) # dim(.Fb) <- c(n * p, r) # .Ga <- crossprod(.R, .Fb) # dim(.R) <- c(n, p, q) # .R <- aperm(.R, c(1, 3, 2)) # dim(.R) <- c(n * q, p) # dim(.Fa) <- c(n * q, k) # .Gb <- crossprod(.R, .Fa) # c(.Ga, .Gb) # }) # all.equal({ # .Ga <- 0 # for (i in 1:n) { # .Ga <- .Ga + Di.1 %*% t(R[i, , ]) %*% Di.2 %*% beta %*% F[i, , ] # } # .Gb <- 0 # for (i in 1:n) { # .Gb <- .Gb + Di.2 %*% R[i, , ] %*% Di.1 %*% alpha %*% t(F[i, , ]) # } # c(.Ga, .Gb) # }, { # .R <- R %x_3% Di.1 %x_2% Di.2 # .Ga <- tcrossprod(mat(.R, 3), mat(F %x_2% beta, 3)) # .Gb <- tcrossprod(mat(.R, 2), mat(F %x_3% alpha, 2)) # c(.Ga, .Gb) # }) # n <- 101; p <- 5; q <- 7 # sym <- function(x) crossprod(x) # D1 <- sym(matrix(rnorm(q^2), q, q)) # D2 <- sym(matrix(rnorm(p^2), p, p)) # X <- tensorPredictors:::rmvnorm(n, sigma = kronecker(D1, D2)) # dim(X) <- c(n, p, q) # D1.hat <- tcrossprod(mat(X, 3)) / n # D2.hat <- tcrossprod(mat(X, 2)) / n # local({ # par(mfrow = c(2, 2)) # matrixImage(D1, main = "D1") # matrixImage(D1.hat, main = "D1.hat") # matrixImage(D2, main = "D2") # matrixImage(D2.hat, main = "D2.hat") # }) # sum(X^2) / n # sum(diag(D1.hat)) # sum(diag(D2.hat)) # sum(diag(kronecker(D1, D2))) # sum(diag(kronecker(D1.hat / sqrt(sum(diag(D1.hat))), # D2.hat / sqrt(sum(diag(D1.hat)))))) # all.equal({ # mat(X, 1) %*% kronecker(D1.hat, D2.hat) # }, { # mat(X %x_3% D1.hat %x_2% D2.hat, 1) # }) # all.equal({ # C <- mat(X, 1) %*% kronecker(D1.hat, D2.hat) * (n / sum(X^2)) # dim(C) <- c(n, p, q) # C # }, { # (X %x_3% D1.hat %x_2% D2.hat) / sum(diag(D1.hat)) # }) # D.1 <- tcrossprod(mat(X, 3)) # D.2 <- tcrossprod(mat(X, 2)) # tr <- sum(diag(D.1)) # D.1 <- D.1 / sqrt(n * tr) # D.2 <- D.2 / sqrt(n * tr) # sum(diag(kronecker(D1, D2))) # sum(diag(kronecker(D.1, D.2))) # det(kronecker(D1, D2)) # det(kronecker(D.1, D.2)) # det(D.1)^p * det(D.2)^q # log(det(kronecker(D.1, D.2))) # p * log(det(D.1)) + q * log(det(D.2)) d <- ggplot(mtcars, aes(cyl, mpg)) + geom_point() d + stat_summary(fun.data = "mean_cl_boot", colour = "red", size = 2) # Orientation follows the discrete axis ggplot(mtcars, aes(mpg, factor(cyl))) + geom_point() + stat_summary(fun.data = "mean_cl_boot", colour = "red", size = 2) # You can supply individual functions to summarise the value at # each x: d + stat_summary(fun = "median", colour = "red", size = 2, geom = "point") d + stat_summary(fun = "mean", colour = "red", size = 2, geom = "point") d + aes(colour = factor(vs)) + stat_summary(fun = mean, geom="line") d + stat_summary(fun = mean, fun.min = min, fun.max = max, colour = "red") d <- ggplot(diamonds, aes(cut)) d + geom_bar() d + stat_summary(aes(y = price), fun = "mean", geom = "bar") # Orientation of stat_summary_bin is ambiguous and must be specified directly ggplot(diamonds, aes(carat, price)) + stat_summary_bin(fun = "mean", geom = "bar", orientation = 'y') # Don't use ylim to zoom into a summary plot - this throws the # data away p <- ggplot(mtcars, aes(cyl, mpg)) + stat_summary(fun = "mean", geom = "point") p p + ylim(15, 30) # Instead use coord_cartesian p + coord_cartesian(ylim = c(15, 30)) # A set of useful summary functions is provided from the Hmisc package: stat_sum_df <- function(fun, geom="crossbar", ...) { stat_summary(fun.data = fun, colour = "red", geom = geom, width = 0.2, ...) } d <- ggplot(mtcars, aes(cyl, mpg)) + geom_point() # The crossbar geom needs grouping to be specified when used with # a continuous x axis. d + stat_sum_df("mean_cl_boot", mapping = aes(group = cyl)) d + stat_sum_df("mean_sdl", mapping = aes(group = cyl)) d + stat_sum_df("mean_sdl", fun.args = list(mult = 1), mapping = aes(group = cyl)) d + stat_sum_df("median_hilow", mapping = aes(group = cyl)) # An example with highly skewed distributions: if (require("ggplot2movies")) { set.seed(596) mov <- movies[sample(nrow(movies), 1000), ] m2 <- ggplot(mov, aes(x = factor(round(rating)), y = votes)) + geom_point() m2 <- m2 + stat_summary( fun.data = "mean_cl_boot", geom = "crossbar", colour = "red", width = 0.3 ) + xlab("rating") m2 # Notice how the overplotting skews off visual perception of the mean # supplementing the raw data with summary statistics is _very_ important # Next, we'll look at votes on a log scale. # Transforming the scale means the data are transformed # first, after which statistics are computed: m2 + scale_y_log10() # Transforming the coordinate system occurs after the # statistic has been computed. This means we're calculating the summary on the raw data # and stretching the geoms onto the log scale. Compare the widths of the # standard errors. m2 + coord_trans(y="log10") }