137 lines
4.4 KiB
R
137 lines
4.4 KiB
R
#' Using unbiased (but not MLE) estimates for the Kronecker decomposed
|
|
#' covariance matrices Delta_1, Delta_2 for approximating the log-likelihood
|
|
#' giving a closed form solution for the gradient.
|
|
#'
|
|
#' Delta_1 = n^-1 sum_i R_i' R_i,
|
|
#' Delta_2 = n^-1 sum_i R_i R_i'.
|
|
#'
|
|
#' @export
|
|
kpir.approx <- function(X, Fy, shape = c(dim(X)[-1], dim(Fy[-1])),
|
|
max.iter = 500L, max.line.iter = 50L, step.size = 1e-3,
|
|
nesterov.scaling = function(a, t) 0.5 * (1 + sqrt(1 + (2 * a)^2)),
|
|
eps = .Machine$double.eps,
|
|
logger = NULL
|
|
) {
|
|
|
|
# Check if X and Fy have same number of observations
|
|
stopifnot(nrow(X) == NROW(Fy))
|
|
n <- nrow(X) # Number of observations
|
|
|
|
# Get and check predictor dimensions
|
|
if (length(dim(X)) == 2L) {
|
|
stopifnot(!missing(shape))
|
|
stopifnot(ncol(X) == prod(shape[1:2]))
|
|
p <- as.integer(shape[1]) # Predictor "height"
|
|
q <- as.integer(shape[2]) # Predictor "width"
|
|
} else if (length(dim(X)) == 3L) {
|
|
p <- dim(X)[2]
|
|
q <- dim(X)[3]
|
|
dim(X) <- c(n, p * q)
|
|
} else {
|
|
stop("'X' must be a matrix or 3-tensor")
|
|
}
|
|
|
|
# Get and check response dimensions
|
|
if (!is.array(Fy)) {
|
|
Fy <- as.array(Fy)
|
|
}
|
|
if (length(dim(Fy)) == 1L) {
|
|
k <- r <- 1L
|
|
dim(Fy) <- c(n, 1L)
|
|
} else if (length(dim(Fy)) == 2L) {
|
|
stopifnot(!missing(shape))
|
|
stopifnot(ncol(Fy) == prod(shape[3:4]))
|
|
k <- as.integer(shape[3]) # Response functional "height"
|
|
r <- as.integer(shape[4]) # Response functional "width"
|
|
} else if (length(dim(Fy)) == 3L) {
|
|
k <- dim(Fy)[2]
|
|
r <- dim(Fy)[3]
|
|
dim(Fy) <- c(n, k * r)
|
|
} else {
|
|
stop("'Fy' must be a vector, matrix or 3-tensor")
|
|
}
|
|
|
|
|
|
### Step 1: (Approx) Least Squares solution for `X = Fy B' + epsilon`
|
|
# Vectorize
|
|
dim(Fy) <- c(n, k * r)
|
|
dim(X) <- c(n, p * q)
|
|
# Solve
|
|
cpFy <- crossprod(Fy) # TODO: Check/Test and/or replace
|
|
if (n <= k * r || qr(cpFy)$rank < k * r) {
|
|
# In case of under-determined system replace the inverse in the normal
|
|
# equation by the Moore-Penrose Pseudo Inverse
|
|
B <- t(matpow(cpFy, -1) %*% crossprod(Fy, X))
|
|
} else {
|
|
# Compute OLS estimate by the Normal Equation
|
|
B <- t(solve(cpFy, crossprod(Fy, X)))
|
|
}
|
|
|
|
# De-Vectroize (from now on tensor arithmetics)
|
|
dim(Fy) <- c(n, k, r)
|
|
dim(X) <- c(n, p, q)
|
|
|
|
# Decompose `B = alpha x beta` into `alpha` and `beta`
|
|
c(alpha0, beta0) %<-% approx.kronecker(B, c(q, r), c(p, k))
|
|
|
|
# Compute residuals
|
|
R <- X - (Fy %x_3% alpha0 %x_2% beta0)
|
|
|
|
# Covariance estimates and scaling factor
|
|
Delta.1 <- tcrossprod(mat(R, 3))
|
|
Delta.2 <- tcrossprod(mat(R, 2))
|
|
s <- sum(diag(Delta.1))
|
|
|
|
# Inverse Covariances
|
|
Delta.1.inv <- solve(Delta.1)
|
|
Delta.2.inv <- solve(Delta.2)
|
|
|
|
# cross dependent covariance estimates
|
|
S.1 <- n^-1 * tcrossprod(mat(R, 3), mat(R %x_2% Delta.2.inv, 3))
|
|
S.2 <- n^-1 * tcrossprod(mat(R, 2), mat(R %x_3% Delta.1.inv, 2))
|
|
|
|
# Evaluate negative log-likelihood (2 pi term dropped)
|
|
loss <- -0.5 * n * (p * q * log(s) - p * log(det(Delta.1)) -
|
|
q * log(det(Delta.2)) - s * sum(S.1 * Delta.1.inv))
|
|
|
|
# Gradient "generating" tensor
|
|
G <- (sum(S.1 * Delta.1.inv) - p * q / s) * R
|
|
G <- G + R %x_2% ((diag(q, p, p) - s * (Delta.2.inv %*% S.2)) %*% Delta.2.inv)
|
|
G <- G + R %x_3% ((diag(p, q, q) - s * (Delta.1.inv %*% S.1)) %*% Delta.1.inv)
|
|
G <- G + s * (R %x_2% Delta.2.inv %x_3% Delta.1.inv)
|
|
|
|
# Call history callback (logger) before the first iteration
|
|
if (is.function(logger)) {
|
|
logger(0L, loss, alpha0, beta0, Delta.1, Delta.2, NA)
|
|
}
|
|
|
|
|
|
### Step 2: MLE estimate with LS solution as starting value
|
|
a0 <- 0
|
|
a1 <- 1
|
|
alpha1 <- alpha0
|
|
beta1 <- beta0
|
|
|
|
# main descent loop
|
|
no.nesterov <- TRUE
|
|
break.reason <- NA
|
|
for (iter in seq_len(max.iter)) {
|
|
if (no.nesterov) {
|
|
# without extrapolation as fallback
|
|
alpha.moment <- alpha1
|
|
beta.moment <- beta1
|
|
} else {
|
|
# extrapolation using previous direction
|
|
alpha.moment <- alpha1 + ((a0 - 1) / a1) * (alpha1 - alpha0)
|
|
beta.moment <- beta1 + ((a0 - 1) / a1) * ( beta1 - beta0)
|
|
}
|
|
}
|
|
|
|
# Extrapolated residuals
|
|
R <- X - (Fy %x_3% alpha.moment %x_2% beta.moment)
|
|
|
|
|
|
|
|
list(loss = loss, alpha = alpha1, beta = beta1, Delta = Delta)
|
|
}
|