294 lines
12 KiB
R
294 lines
12 KiB
R
#' HOPIR subroutine for the LS solution given preprocessed `X`, `Fy` and initial
|
|
#' values `alphas`.
|
|
#'
|
|
#' @keywords internal
|
|
HOPIR.ls <- function(X, Fy, alphas, sample.axis, algorithm, ..., logger) {
|
|
# Get axis indices (observation modes)
|
|
modes <- seq_along(dim(X))[-sample.axis]
|
|
n <- dim(X)[sample.axis] # observation count (scalar)
|
|
p <- dim(X)[-sample.axis] # predictor dimensions (vector)
|
|
|
|
# Least Squares Deltas Estimates given alphas
|
|
fun.Deltas <- function(alphas) {
|
|
# Residuals
|
|
R <- X - mlm(Fy, alphas, modes = modes)
|
|
# `Delta` moment estimates
|
|
Deltas <- Map(mcrossprod, list(R), mode = modes)
|
|
Map(`*`, p / (n * prod(p)), Deltas)
|
|
}
|
|
|
|
# wrap logger, provide unified logger interface for all HOPIR subroutines
|
|
if (is.function(logger)) {
|
|
callback <- function(iter, alphas) {
|
|
logger("ls", iter, alphas, fun.Deltas(alphas))
|
|
}
|
|
} else {
|
|
callback <- NULL
|
|
}
|
|
|
|
if (algorithm == "icu") {
|
|
# Call (proper parameterized) Iterative Cyclic Updating Optimizer
|
|
alphas <- ICU(
|
|
# Optimization Objective (MSE)
|
|
fun.loss = function(alphas) mean((X - mlm(Fy, alphas, modes))^2),
|
|
# Updating rule (optimal solution for `alpha_j` given the rest)
|
|
fun.update = function(alphas, j) {
|
|
Z <- mlm(Fy, alphas[-j], modes = modes[-j])
|
|
# least squares solution for `alpha_j | alpha_i, i != j`
|
|
alphas[[j]] <- t(solve(
|
|
mcrossprod(Z, Z, modes[j]), mcrossprod(Z, X, modes[j])
|
|
))
|
|
},
|
|
# Initial parameter estimates
|
|
params = alphas,
|
|
...,
|
|
callback = callback)
|
|
} else {
|
|
# Call (proper parameterized) Nesterov Accelerated Gradient Descent
|
|
alphas <- NAGD(
|
|
# Setup objective function (MSE)
|
|
fun.loss = function(alphas) mean((X - mlm(Fy, alphas, modes))^2),
|
|
# Gradient of the objective with respect to the parameter matirces `alpha_j`
|
|
fun.grad = function(alphas) {
|
|
# Residuals
|
|
R <- X - mlm(Fy, alphas, modes)
|
|
# Gradients for each alpha
|
|
Map(function(j) {
|
|
# MLM of Fy with alpha_k, k in [r] \ j
|
|
Fa <- mlm(Fy, alphas[-j], modes[-j])
|
|
# Gradient of the loss with respect to alpha_j
|
|
(-2 / prod(dim(X))) * mcrossprod(R, Fa, modes[j])
|
|
}, seq_along(modes))
|
|
},
|
|
params = alphas,
|
|
# Linear Combination of parameters, basically: a * lhs + b * rhs for each
|
|
# combination of elements in the LHS and RHS lists with scalars a, b.
|
|
fun.lincomb = function(a, LHS, b, RHS) {
|
|
Map(function(lhs, rhs) a * lhs + b * rhs, LHS, RHS)
|
|
},
|
|
# squared norm of parameters
|
|
fun.norm2 = function(params) sum(unlist(params, use.names = FALSE)^2),
|
|
...,
|
|
callback = callback)
|
|
}
|
|
|
|
# Final estimate includes Deltas
|
|
list(alphas = alphas, Deltas = fun.Deltas(alphas))
|
|
}
|
|
|
|
#' HPOIR subroutine for the MLE estimation given preprocessed data and initial
|
|
#' alpha, Delta parameters
|
|
#'
|
|
#' @keywords internal
|
|
HOPIR.mle <- function(X, Fy, alphas, Deltas, sample.axis, algorithm, ..., logger) {
|
|
# Get axis indices (observation modes)
|
|
modes <- seq_along(dim(X))[-sample.axis]
|
|
n <- dim(X)[sample.axis] # observation count (scalar)
|
|
p <- dim(X)[-sample.axis] # predictor dimensions (vector)
|
|
|
|
if (algorithm == "icu") {
|
|
# Call (proper parameterized) Iterative Cyclic Updating Optimizer
|
|
params <- ICU(
|
|
# Optimization Objective (negative log-likelihood)
|
|
fun.loss = function(params) {
|
|
# residuals
|
|
R <- X - mlm(Fy, params$alphas, modes)
|
|
# negative log-likelihood (without additive constant term)
|
|
0.5 * (
|
|
n * prod(p) * sum(log(unlist(Map(det, params$Deltas))) / p) +
|
|
sum(mlm(R, Map(solve, params$Deltas), modes) * R)
|
|
)
|
|
},
|
|
# Updating rule, optimal solution for `alpha_j` or `Delta_j` given
|
|
# all other parameters
|
|
fun.update = function(params, index) {
|
|
# residuals
|
|
R <- X - mlm(Fy, params$alphas, modes)
|
|
# mode (axis) index
|
|
j <- index[2]
|
|
if (index[1] == 1) {
|
|
# compute subterms
|
|
Delta.invs <- Map(solve, params$Deltas)
|
|
Delta.inv.alphas <- Map(`%*%`, Delta.invs, alphas)
|
|
XxDi <- mlm(X, Delta.invs[-j], modes[-j])
|
|
Fxa <- mlm(Fy, alphas[-j], modes[-j])
|
|
FxDia <- mlm(Fy, Delta.inv.alphas[-j], modes[-j])
|
|
# alpha update
|
|
mcrossprod(XxDi, Fxa, modes[j]) %*%
|
|
solve(mcrossprod(FxDia, Fxa, modes[j]))
|
|
} else { # index[1] == 2
|
|
# Delta update
|
|
(p[j] / (n * prod(p))) * mcrossprod(
|
|
mlm(R, Map(solve, params$Deltas[-j]), modes[-j]), R, modes[j])
|
|
}
|
|
},
|
|
# collection of initial alpha and Delta parameters
|
|
params = list(alphas = alphas, Deltas = Deltas),
|
|
# parameter "path"-indices, first index {1, 2} toggles between alphas
|
|
# and Deltas, second index is the mode.
|
|
# Example: `list(list("a1", "a2", "a3"), list("D1", "D2", "D3"))[[c(2, 1)]] == "D1"`
|
|
indices = c(Map(c, 1L, seq_along(modes)), Map(c, 2L, seq_along(modes))),
|
|
...,
|
|
callback = if (is.function(logger)) {
|
|
function(iter, params) {
|
|
logger("mle", iter, params$alphas, params$Deltas)
|
|
}
|
|
} else {
|
|
NULL
|
|
})
|
|
} else {
|
|
# Call (proper parameterized) Nesterov Accelerated Gradient Descent
|
|
# Note that only the `alphas` are subject of Gradient Descent and the
|
|
# `Deltas` are additional parameters updated given the cueent `alphas`.
|
|
# Meaning that the gradient is the gradient of the loss with respect to
|
|
# the `alphas` only.
|
|
params <- NAGD(
|
|
# Setup objective function (negative log-likelihood)
|
|
fun.loss = function(alphas, Deltas) {
|
|
# residuals
|
|
R <- X - mlm(Fy, alphas, modes)
|
|
# negative log-likelihood (without additive constant term)
|
|
0.5 * (
|
|
n * prod(p) * sum(log(unlist(Map(det, Deltas))) / p) +
|
|
sum(mlm(R, Map(solve, Deltas), modes) * R)
|
|
)
|
|
},
|
|
# Gradient of the objective with respect to the parameter matirces
|
|
# `alphas` only, only the first argument, the second are `more.params`.
|
|
fun.grad = function(alphas, Deltas) {
|
|
# Residuals
|
|
R <- X - mlm(Fy, alphas, modes)
|
|
# Gradients for each alpha
|
|
Map(function(j) {
|
|
# MLM of Fy with alpha_k, k in [r] \ j
|
|
Fa <- mlm(Fy, alphas[-j], modes[-j])
|
|
# Gradient of the loss with respect to alpha_j
|
|
(-2 / prod(dim(X))) * mcrossprod(R, Fa, modes[j])
|
|
}, seq_along(modes))
|
|
},
|
|
# Initial parameters (subject to Gradient Descent)
|
|
params = alphas,
|
|
# Initial additional parameters (subject to updating given `params`)
|
|
more.params = Deltas,
|
|
# Update `Deltas` given `alphas`
|
|
fun.more.params = function(alphas, old.Deltas) {
|
|
# Residuals
|
|
R <- X - mlm(Fy, alphas, modes)
|
|
# Solve cross dependent System of Delta equations using the ICU
|
|
# algorithm (with a small number of iterations, should be enough)
|
|
ICU(
|
|
fun.loss = function(Deltas) {
|
|
# negative log-likelihood (without additive constant term)
|
|
0.5 * (
|
|
n * prod(p) * sum(log(unlist(Map(det, Deltas))) / p) +
|
|
sum(mlm(R, Map(solve, Deltas), modes) * R)
|
|
)
|
|
},
|
|
fun.update = function(Deltas, j) {
|
|
# Delta update
|
|
(p[j] / (n * prod(p))) * mcrossprod(
|
|
mlm(R, Map(solve, Deltas[-j]), modes[-j]), R, modes[j])
|
|
},
|
|
params = old.Deltas,
|
|
max.iter = 5L)
|
|
},
|
|
# Linear Combination of parameters, basically: a * lhs + b * rhs for each
|
|
# combination of elements in the LHS and RHS lists with scalars a, b.
|
|
fun.lincomb = function(a, LHS, b, RHS) {
|
|
Map(function(lhs, rhs) a * lhs + b * rhs, LHS, RHS)
|
|
},
|
|
# squared norm of parameters
|
|
fun.norm2 = function(params) sum(unlist(params, use.names = FALSE)^2),
|
|
...,
|
|
callback = if (is.function(logger)) {
|
|
function(iter, alphas, Deltas) {
|
|
logger("mle", iter, alphas, Deltas)
|
|
}
|
|
} else {
|
|
NULL
|
|
})
|
|
# Remap parameter names
|
|
names(params) <- c("alphas", "Deltas")
|
|
}
|
|
|
|
params
|
|
}
|
|
|
|
#' Higher Order Parametric Inverse Regression
|
|
#'
|
|
#' @export
|
|
HOPIR <- function(X, Fy, sample.axis, method = c("ls", "mle"),
|
|
algorithm = c("icu", "nagd"), ..., center = TRUE, logger = NULL
|
|
) {
|
|
# Predair and check input parameters
|
|
method <- match.arg(method)
|
|
algorithm <- match.arg(algorithm)
|
|
# ensure response is tensor valued
|
|
if (!is.array(Fy)) {
|
|
# scalar response case (add new axis of size 1)
|
|
dim(Fy) <- ifelse(seq_along(dim(X)) == sample.axis, dim(X)[sample.axis], 1L)
|
|
}
|
|
# Check dimensions and matching of axis (tensor order)
|
|
stopifnot(exprs = {
|
|
length(dim(X)) == length(dim(Fy))
|
|
dim(X)[sample.axis] == dim(Fy)[sample.axis]
|
|
})
|
|
# warn about occurence of an axis without reduction
|
|
if (any(dim(Fy)[-sample.axis] >= dim(X)[-sample.axis])) {
|
|
warning("Degenerate case 'any(dim(Fy)[-sample.axis] >= dim(X)[-sample.axis])'")
|
|
}
|
|
|
|
# Set the default logger
|
|
if (is.logical(logger) && logger) {
|
|
start <- Sys.time()
|
|
logger <- function(method, iter, ...) {
|
|
if (iter) {
|
|
cat(sprintf("%4d - %s - Elapsed: %s\n",
|
|
iter, method, format(Sys.time() - start)))
|
|
}
|
|
}
|
|
}
|
|
|
|
# Get axis indices (observation modes)
|
|
modes <- seq_along(dim(X))[-sample.axis]
|
|
n <- dim(X)[sample.axis] # observation count (scalar)
|
|
p <- dim(X)[-sample.axis] # predictor dimensions (vector)
|
|
|
|
# center data (predictors and responses)
|
|
if (center) {
|
|
# Means for X and Fy (a.k.a. sum elements over the sample axis)
|
|
meanX <- apply(X, modes, mean, simplify = TRUE)
|
|
meanFy <- apply(Fy, modes, mean, simplify = TRUE)
|
|
# Center both X and Fy
|
|
X <- sweep(X, modes, meanX)
|
|
Fy <- sweep(Fy, modes, meanFy)
|
|
} else {
|
|
meanX <- meanFy <- NA
|
|
}
|
|
|
|
|
|
### Step 0: Initial parameter estimates HOPCA
|
|
alphas <- Map(function(mode, ncol) {
|
|
La.svd(mcrossprod(X, mode = mode), ncol)$u
|
|
}, modes, dim(Fy)[modes])
|
|
|
|
### Step 1: LS estimate
|
|
ls <- HOPIR.ls(X, Fy, alphas, sample.axis,
|
|
algorithm, ..., logger = logger)
|
|
|
|
### Step 2: MLE estimate
|
|
if (method == "mle") {
|
|
mle <- HOPIR.mle(X, Fy, ls$alphas, ls$Deltas, sample.axis,
|
|
algorithm, ..., logger = logger)
|
|
|
|
# add means and LS estimates as attribute
|
|
structure(
|
|
c(mle, list(meanX = meanX, meanFy = meanFy)),
|
|
ls = ls
|
|
)
|
|
} else {
|
|
# add means to LS estimates
|
|
c(ls, list(meanX = meanX, meanFy = meanFy))
|
|
}
|
|
}
|