tensor_predictors/tensorPredictors/R/HOPIR.R

294 lines
12 KiB
R

#' HOPIR subroutine for the LS solution given preprocessed `X`, `Fy` and initial
#' values `alphas`.
#'
#' @keywords internal
HOPIR.ls <- function(X, Fy, alphas, sample.axis, algorithm, ..., logger) {
# Get axis indices (observation modes)
modes <- seq_along(dim(X))[-sample.axis]
n <- dim(X)[sample.axis] # observation count (scalar)
p <- dim(X)[-sample.axis] # predictor dimensions (vector)
# Least Squares Deltas Estimates given alphas
fun.Deltas <- function(alphas) {
# Residuals
R <- X - mlm(Fy, alphas, modes = modes)
# `Delta` moment estimates
Deltas <- Map(mcrossprod, list(R), mode = modes)
Map(`*`, p / (n * prod(p)), Deltas)
}
# wrap logger, provide unified logger interface for all HOPIR subroutines
if (is.function(logger)) {
callback <- function(iter, alphas) {
logger("ls", iter, alphas, fun.Deltas(alphas))
}
} else {
callback <- NULL
}
if (algorithm == "icu") {
# Call (proper parameterized) Iterative Cyclic Updating Optimizer
alphas <- ICU(
# Optimization Objective (MSE)
fun.loss = function(alphas) mean((X - mlm(Fy, alphas, modes))^2),
# Updating rule (optimal solution for `alpha_j` given the rest)
fun.update = function(alphas, j) {
Z <- mlm(Fy, alphas[-j], modes = modes[-j])
# least squares solution for `alpha_j | alpha_i, i != j`
alphas[[j]] <- t(solve(
mcrossprod(Z, Z, modes[j]), mcrossprod(Z, X, modes[j])
))
},
# Initial parameter estimates
params = alphas,
...,
callback = callback)
} else {
# Call (proper parameterized) Nesterov Accelerated Gradient Descent
alphas <- NAGD(
# Setup objective function (MSE)
fun.loss = function(alphas) mean((X - mlm(Fy, alphas, modes))^2),
# Gradient of the objective with respect to the parameter matirces `alpha_j`
fun.grad = function(alphas) {
# Residuals
R <- X - mlm(Fy, alphas, modes)
# Gradients for each alpha
Map(function(j) {
# MLM of Fy with alpha_k, k in [r] \ j
Fa <- mlm(Fy, alphas[-j], modes[-j])
# Gradient of the loss with respect to alpha_j
(-2 / prod(dim(X))) * mcrossprod(R, Fa, modes[j])
}, seq_along(modes))
},
params = alphas,
# Linear Combination of parameters, basically: a * lhs + b * rhs for each
# combination of elements in the LHS and RHS lists with scalars a, b.
fun.lincomb = function(a, LHS, b, RHS) {
Map(function(lhs, rhs) a * lhs + b * rhs, LHS, RHS)
},
# squared norm of parameters
fun.norm2 = function(params) sum(unlist(params, use.names = FALSE)^2),
...,
callback = callback)
}
# Final estimate includes Deltas
list(alphas = alphas, Deltas = fun.Deltas(alphas))
}
#' HPOIR subroutine for the MLE estimation given proprocessed data and initial
#' alphas, Deltas paramters
#'
#' @keywords internal
HOPIR.mle <- function(X, Fy, alphas, Deltas, sample.axis, algorithm, ..., logger) {
# Get axis indices (observation modes)
modes <- seq_along(dim(X))[-sample.axis]
n <- dim(X)[sample.axis] # observation count (scalar)
p <- dim(X)[-sample.axis] # predictor dimensions (vector)
if (algorithm == "icu") {
# Call (proper parameterized) Iterative Cyclic Updating Optimizer
params <- ICU(
# Optimization Objective (negative log-likelihood)
fun.loss = function(params) {
# residuals
R <- X - mlm(Fy, params$alphas, modes)
# negative log-likelihood (without additive constant term)
0.5 * (
n * prod(p) * sum(log(unlist(Map(det, params$Deltas))) / p) +
sum(mlm(R, Map(solve, params$Deltas), modes) * R)
)
},
# Updating rule, optimal solution for `alpha_j` or `Delta_j` given
# all other parameters
fun.update = function(params, index) {
# residuals
R <- X - mlm(Fy, params$alphas, modes)
# mode (axis) index
j <- index[2]
if (index[1] == 1) {
# compute subterms
Delta.invs <- Map(solve, params$Deltas)
Delta.inv.alphas <- Map(`%*%`, Delta.invs, alphas)
XxDi <- mlm(X, Delta.invs[-j], modes[-j])
Fxa <- mlm(Fy, alphas[-j], modes[-j])
FxDia <- mlm(Fy, Delta.inv.alphas[-j], modes[-j])
# alpha update
mcrossprod(XxDi, Fxa, modes[j]) %*%
solve(mcrossprod(FxDia, Fxa, modes[j]))
} else { # index[1] == 2
# Delta update
(p[j] / (n * prod(p))) * mcrossprod(
mlm(R, Map(solve, params$Deltas[-j]), modes[-j]), R, modes[j])
}
},
# collection of initial alpha and Delta parameters
params = list(alphas = alphas, Deltas = Deltas),
# parameter "path"-indices, first index {1, 2} toggles between alphas
# and Deltas, second index is the mode.
# Example: `list(list("a1", "a2", "a3"), list("D1", "D2", "D3"))[[c(2, 1)]] == "D1"`
indices = c(Map(c, 1L, seq_along(modes)), Map(c, 2L, seq_along(modes))),
...,
callback = if (is.function(logger)) {
function(iter, params) {
logger("mle", iter, params$alphas, params$Deltas)
}
} else {
NULL
})
} else {
# Call (proper parameterized) Nesterov Accelerated Gradient Descent
# Note that only the `alphas` are subject of Gradient Descent and the
# `Deltas` are additional parameters updated given the cueent `alphas`.
# Meaning that the gradient is the gradient of the loss with respect to
# the `alphas` only.
params <- NAGD(
# Setup objective function (negative log-likelihood)
fun.loss = function(alphas, Deltas) {
# residuals
R <- X - mlm(Fy, alphas, modes)
# negative log-likelihood (without additive constant term)
0.5 * (
n * prod(p) * sum(log(unlist(Map(det, Deltas))) / p) +
sum(mlm(R, Map(solve, Deltas), modes) * R)
)
},
# Gradient of the objective with respect to the parameter matirces
# `alphas` only, only the first argument, the second are `more.params`.
fun.grad = function(alphas, Deltas) {
# Residuals
R <- X - mlm(Fy, alphas, modes)
# Gradients for each alpha
Map(function(j) {
# MLM of Fy with alpha_k, k in [r] \ j
Fa <- mlm(Fy, alphas[-j], modes[-j])
# Gradient of the loss with respect to alpha_j
(-2 / prod(dim(X))) * mcrossprod(R, Fa, modes[j])
}, seq_along(modes))
},
# Initial parameters (subject to Gradient Descent)
params = alphas,
# Initial additional parameters (subject to updating given `params`)
more.params = Deltas,
# Update `Deltas` given `alphas`
fun.more.params = function(alphas, old.Deltas) {
# Residuals
R <- X - mlm(Fy, alphas, modes)
# Solve cross dependent System of Delta equations using the ICU
# algorithm (with a small number of iterations, should be enough)
ICU(
fun.loss = function(Deltas) {
# negative log-likelihood (without additive constant term)
0.5 * (
n * prod(p) * sum(log(unlist(Map(det, Deltas))) / p) +
sum(mlm(R, Map(solve, Deltas), modes) * R)
)
},
fun.update = function(Deltas, j) {
# Delta update
(p[j] / (n * prod(p))) * mcrossprod(
mlm(R, Map(solve, Deltas[-j]), modes[-j]), R, modes[j])
},
params = old.Deltas,
max.iter = 5L)
},
# Linear Combination of parameters, basically: a * lhs + b * rhs for each
# combination of elements in the LHS and RHS lists with scalars a, b.
fun.lincomb = function(a, LHS, b, RHS) {
Map(function(lhs, rhs) a * lhs + b * rhs, LHS, RHS)
},
# squared norm of parameters
fun.norm2 = function(params) sum(unlist(params, use.names = FALSE)^2),
...,
callback = if (is.function(logger)) {
function(iter, alphas, Deltas) {
logger("mle", iter, alphas, Deltas)
}
} else {
NULL
})
# Remap parameter names
names(params) <- c("alphas", "Deltas")
}
params
}
#' Higher Order Parametric Inverse Regression
#'
#' @export
HOPIR <- function(X, Fy, sample.axis, method = c("ls", "mle"),
algorithm = c("icu", "nagd"), ..., center = TRUE, logger = NULL
) {
# Predair and check input parameters
method <- match.arg(method)
algorithm <- match.arg(algorithm)
# ensure response is tensor valued
if (!is.array(Fy)) {
# scalar response case (add new axis of size 1)
dim(Fy) <- ifelse(seq_along(dim(X)) == sample.axis, dim(X)[sample.axis], 1L)
}
# Check dimensions and matching of axis (tensor order)
stopifnot(exprs = {
length(dim(X)) == length(dim(Fy))
dim(X)[sample.axis] == dim(Fy)[sample.axis]
})
# warn about occurence of an axis without reduction
if (any(dim(Fy)[-sample.axis] >= dim(X)[-sample.axis])) {
warning("Degenerate case 'any(dim(Fy)[-sample.axis] >= dim(X)[-sample.axis])'")
}
# Set the default logger
if (is.logical(logger) && logger) {
start <- Sys.time()
logger <- function(method, iter, ...) {
if (iter) {
cat(sprintf("%4d - %s - Elapsed: %s\n",
iter, method, format(Sys.time() - start)))
}
}
}
# Get axis indices (observation modes)
modes <- seq_along(dim(X))[-sample.axis]
n <- dim(X)[sample.axis] # observation count (scalar)
p <- dim(X)[-sample.axis] # predictor dimensions (vector)
# center data (predictors and responses)
if (center) {
# Means for X and Fy (a.k.a. sum elements over the sample axis)
meanX <- apply(X, modes, mean, simplify = TRUE)
meanFy <- apply(Fy, modes, mean, simplify = TRUE)
# Center both X and Fy
X <- sweep(X, modes, meanX)
Fy <- sweep(Fy, modes, meanFy)
} else {
meanX <- meanFy <- NA
}
### Step 0: Initial parameter estimates HOPCA
alphas <- Map(function(mode, ncol) {
La.svd(mcrossprod(X, mode = mode), ncol)$u
}, modes, dim(Fy)[modes])
### Step 1: LS estimate
ls <- HOPIR.ls(X, Fy, alphas, sample.axis,
algorithm, ..., logger = logger)
### Step 2: MLE estimate
if (method == "mle") {
mle <- HOPIR.mle(X, Fy, ls$alphas, ls$Deltas, sample.axis,
algorithm, ..., logger = logger)
# add means and LS estimates as attribute
structure(
c(mle, list(meanX = meanX, meanFy = meanFy)),
ls = ls
)
} else {
# add means to LS estimates
c(ls, list(meanX = meanX, meanFy = meanFy))
}
}