#' HOPIR subroutine for the LS solution given preprocessed `X`, `Fy` and initial #' values `alphas`. #' #' @keywords internal HOPIR.ls <- function(X, Fy, alphas, sample.axis, algorithm, ..., logger) { # Get axis indices (observation modes) modes <- seq_along(dim(X))[-sample.axis] n <- dim(X)[sample.axis] # observation count (scalar) p <- dim(X)[-sample.axis] # predictor dimensions (vector) # Least Squares Deltas Estimates given alphas fun.Deltas <- function(alphas) { # Residuals R <- X - mlm(Fy, alphas, modes = modes) # `Delta` moment estimates Deltas <- Map(mcrossprod, list(R), mode = modes) Map(`*`, p / (n * prod(p)), Deltas) } # wrap logger, provide unified logger interface for all HOPIR subroutines if (is.function(logger)) { callback <- function(iter, alphas) { logger("ls", iter, alphas, fun.Deltas(alphas)) } } else { callback <- NULL } if (algorithm == "icu") { # Call (proper parameterized) Iterative Cyclic Updating Optimizer alphas <- ICU( # Optimization Objective (MSE) fun.loss = function(alphas) mean((X - mlm(Fy, alphas, modes))^2), # Updating rule (optimal solution for `alpha_j` given the rest) fun.update = function(alphas, j) { Z <- mlm(Fy, alphas[-j], modes = modes[-j]) # least squares solution for `alpha_j | alpha_i, i != j` alphas[[j]] <- t(solve( mcrossprod(Z, Z, modes[j]), mcrossprod(Z, X, modes[j]) )) }, # Initial parameter estimates params = alphas, ..., callback = callback) } else { # Call (proper parameterized) Nesterov Accelerated Gradient Descent alphas <- NAGD( # Setup objective function (MSE) fun.loss = function(alphas) mean((X - mlm(Fy, alphas, modes))^2), # Gradient of the objective with respect to the parameter matirces `alpha_j` fun.grad = function(alphas) { # Residuals R <- X - mlm(Fy, alphas, modes) # Gradients for each alpha Map(function(j) { # MLM of Fy with alpha_k, k in [r] \ j Fa <- mlm(Fy, alphas[-j], modes[-j]) # Gradient of the loss with respect to alpha_j (-2 / prod(dim(X))) * mcrossprod(R, Fa, modes[j]) }, seq_along(modes)) }, params = alphas, # Linear Combination of parameters, basically: a * lhs + b * rhs for each # combination of elements in the LHS and RHS lists with scalars a, b. fun.lincomb = function(a, LHS, b, RHS) { Map(function(lhs, rhs) a * lhs + b * rhs, LHS, RHS) }, # squared norm of parameters fun.norm2 = function(params) sum(unlist(params, use.names = FALSE)^2), ..., callback = callback) } # Final estimate includes Deltas list(alphas = alphas, Deltas = fun.Deltas(alphas)) } #' HPOIR subroutine for the MLE estimation given preprocessed data and initial #' alpha, Delta parameters #' #' @keywords internal HOPIR.mle <- function(X, Fy, alphas, Deltas, sample.axis, algorithm, ..., logger) { # Get axis indices (observation modes) modes <- seq_along(dim(X))[-sample.axis] n <- dim(X)[sample.axis] # observation count (scalar) p <- dim(X)[-sample.axis] # predictor dimensions (vector) if (algorithm == "icu") { # Call (proper parameterized) Iterative Cyclic Updating Optimizer params <- ICU( # Optimization Objective (negative log-likelihood) fun.loss = function(params) { # residuals R <- X - mlm(Fy, params$alphas, modes) # negative log-likelihood (without additive constant term) 0.5 * ( n * prod(p) * sum(log(unlist(Map(det, params$Deltas))) / p) + sum(mlm(R, Map(solve, params$Deltas), modes) * R) ) }, # Updating rule, optimal solution for `alpha_j` or `Delta_j` given # all other parameters fun.update = function(params, index) { # residuals R <- X - mlm(Fy, params$alphas, modes) # mode (axis) index j <- index[2] if (index[1] == 1) { # compute subterms Delta.invs <- Map(solve, params$Deltas) Delta.inv.alphas <- Map(`%*%`, Delta.invs, alphas) XxDi <- mlm(X, Delta.invs[-j], modes[-j]) Fxa <- mlm(Fy, alphas[-j], modes[-j]) FxDia <- mlm(Fy, Delta.inv.alphas[-j], modes[-j]) # alpha update mcrossprod(XxDi, Fxa, modes[j]) %*% solve(mcrossprod(FxDia, Fxa, modes[j])) } else { # index[1] == 2 # Delta update (p[j] / (n * prod(p))) * mcrossprod( mlm(R, Map(solve, params$Deltas[-j]), modes[-j]), R, modes[j]) } }, # collection of initial alpha and Delta parameters params = list(alphas = alphas, Deltas = Deltas), # parameter "path"-indices, first index {1, 2} toggles between alphas # and Deltas, second index is the mode. # Example: `list(list("a1", "a2", "a3"), list("D1", "D2", "D3"))[[c(2, 1)]] == "D1"` indices = c(Map(c, 1L, seq_along(modes)), Map(c, 2L, seq_along(modes))), ..., callback = if (is.function(logger)) { function(iter, params) { logger("mle", iter, params$alphas, params$Deltas) } } else { NULL }) } else { # Call (proper parameterized) Nesterov Accelerated Gradient Descent # Note that only the `alphas` are subject of Gradient Descent and the # `Deltas` are additional parameters updated given the cueent `alphas`. # Meaning that the gradient is the gradient of the loss with respect to # the `alphas` only. params <- NAGD( # Setup objective function (negative log-likelihood) fun.loss = function(alphas, Deltas) { # residuals R <- X - mlm(Fy, alphas, modes) # negative log-likelihood (without additive constant term) 0.5 * ( n * prod(p) * sum(log(unlist(Map(det, Deltas))) / p) + sum(mlm(R, Map(solve, Deltas), modes) * R) ) }, # Gradient of the objective with respect to the parameter matirces # `alphas` only, only the first argument, the second are `more.params`. fun.grad = function(alphas, Deltas) { # Residuals R <- X - mlm(Fy, alphas, modes) # Gradients for each alpha Map(function(j) { # MLM of Fy with alpha_k, k in [r] \ j Fa <- mlm(Fy, alphas[-j], modes[-j]) # Gradient of the loss with respect to alpha_j (-2 / prod(dim(X))) * mcrossprod(R, Fa, modes[j]) }, seq_along(modes)) }, # Initial parameters (subject to Gradient Descent) params = alphas, # Initial additional parameters (subject to updating given `params`) more.params = Deltas, # Update `Deltas` given `alphas` fun.more.params = function(alphas, old.Deltas) { # Residuals R <- X - mlm(Fy, alphas, modes) # Solve cross dependent System of Delta equations using the ICU # algorithm (with a small number of iterations, should be enough) ICU( fun.loss = function(Deltas) { # negative log-likelihood (without additive constant term) 0.5 * ( n * prod(p) * sum(log(unlist(Map(det, Deltas))) / p) + sum(mlm(R, Map(solve, Deltas), modes) * R) ) }, fun.update = function(Deltas, j) { # Delta update (p[j] / (n * prod(p))) * mcrossprod( mlm(R, Map(solve, Deltas[-j]), modes[-j]), R, modes[j]) }, params = old.Deltas, max.iter = 5L) }, # Linear Combination of parameters, basically: a * lhs + b * rhs for each # combination of elements in the LHS and RHS lists with scalars a, b. fun.lincomb = function(a, LHS, b, RHS) { Map(function(lhs, rhs) a * lhs + b * rhs, LHS, RHS) }, # squared norm of parameters fun.norm2 = function(params) sum(unlist(params, use.names = FALSE)^2), ..., callback = if (is.function(logger)) { function(iter, alphas, Deltas) { logger("mle", iter, alphas, Deltas) } } else { NULL }) # Remap parameter names names(params) <- c("alphas", "Deltas") } params } #' Higher Order Parametric Inverse Regression #' #' @export HOPIR <- function(X, Fy, sample.axis, method = c("ls", "mle"), algorithm = c("icu", "nagd"), ..., center = TRUE, logger = NULL ) { # Predair and check input parameters method <- match.arg(method) algorithm <- match.arg(algorithm) # ensure response is tensor valued if (!is.array(Fy)) { # scalar response case (add new axis of size 1) dim(Fy) <- ifelse(seq_along(dim(X)) == sample.axis, dim(X)[sample.axis], 1L) } # Check dimensions and matching of axis (tensor order) stopifnot(exprs = { length(dim(X)) == length(dim(Fy)) dim(X)[sample.axis] == dim(Fy)[sample.axis] }) # warn about occurence of an axis without reduction if (any(dim(Fy)[-sample.axis] >= dim(X)[-sample.axis])) { warning("Degenerate case 'any(dim(Fy)[-sample.axis] >= dim(X)[-sample.axis])'") } # Set the default logger if (is.logical(logger) && logger) { start <- Sys.time() logger <- function(method, iter, ...) { if (iter) { cat(sprintf("%4d - %s - Elapsed: %s\n", iter, method, format(Sys.time() - start))) } } } # Get axis indices (observation modes) modes <- seq_along(dim(X))[-sample.axis] n <- dim(X)[sample.axis] # observation count (scalar) p <- dim(X)[-sample.axis] # predictor dimensions (vector) # center data (predictors and responses) if (center) { # Means for X and Fy (a.k.a. sum elements over the sample axis) meanX <- apply(X, modes, mean, simplify = TRUE) meanFy <- apply(Fy, modes, mean, simplify = TRUE) # Center both X and Fy X <- sweep(X, modes, meanX) Fy <- sweep(Fy, modes, meanFy) } else { meanX <- meanFy <- NA } ### Step 0: Initial parameter estimates HOPCA alphas <- Map(function(mode, ncol) { La.svd(mcrossprod(X, mode = mode), ncol)$u }, modes, dim(Fy)[modes]) ### Step 1: LS estimate ls <- HOPIR.ls(X, Fy, alphas, sample.axis, algorithm, ..., logger = logger) ### Step 2: MLE estimate if (method == "mle") { mle <- HOPIR.mle(X, Fy, ls$alphas, ls$Deltas, sample.axis, algorithm, ..., logger = logger) # add means and LS estimates as attribute structure( c(mle, list(meanX = meanX, meanFy = meanFy)), ls = ls ) } else { # add means to LS estimates c(ls, list(meanX = meanX, meanFy = meanFy)) } }