tensor_predictors/tensorPredictors/R/gmlm_ising.R

139 lines
4.8 KiB
R

#' Specialized version of the GMLM for the Ising model (inverse Ising problem)
#'
#' @export
gmlm_ising <- function(X, F, sample.axis = length(dim(X)),
max.iter = 1000L,
eps = sqrt(.Machine$double.eps),
step.size = function(iter) 1e-2 * (1000 / (iter + 1000)),
zig.zag.threashold = 5L,
patience = 5L,
nr.slices = 10L, # only for univariate `F(y) = y`
slice.method = c("cut", "ecdf"), # only for univariate `F(y) = y` and `y` is a factor or integer
logger = function(...) { }
) {
# # Special case for univariate response `vec F(y) = y`
# # Due to high computational costs we use slicing
# if (length(F) == prod(dim(F))) {
# y <- as.vector(F)
# if (!(is.factor(y) || is.integer(y))) {
# slice.method <- match.arg(slice.method)
# if (slice.method == "ecdf") {
# y <- cut(ecdf(y)(y), nr.slices)
# } else {
# y <- cut(y, nr.slices)
# }
# }
# slices <- split(seq_len(sample.size), y, drop = TRUE)
# } else {
# slices <- seq_len(sample.size)
# }
dimX <- head(dim(X), -1)
dimF <- head(dim(F), -1)
sample.axis <- length(dim(X))
modes <- seq_len(length(dim(X)) - 1)
sample.size <- tail(dim(X), 1)
betas <- Map(matrix, Map(rnorm, dimX * dimF), dimX)
Omegas <- Map(diag, dimX)
grad2_betas <- Map(array, 0, Map(dim, betas))
grad2_Omegas <- Map(array, 0, Map(dim, Omegas))
# Keep track of the last loss to accumulate loss difference sign changes
# indicating optimization instabilities as a sign to stop
last_loss <- Inf
accum_sign <- 1
# non improving iteration counter
non_improving <- 0L
# technical access points to dynamicaly access a multi-dimensional array
# with its last index
`X[..., i]` <- slice.expr(X, sample.axis, index = i)
`F[..., i]` <- slice.expr(F, sample.axis, index = i, drop = FALSE)
# the next expression if accessing the precomputed `mlm(F, betas)`
`BF[..., i]` <- slice.expr(BF, sample.axis, nr.axis = sample.axis, drop = FALSE) # BF[..., i]
# Iterate till a break condition triggers or till max. nr. of iterations
for (iter in seq_len(max.iter)) {
grad_betas <- Map(matrix, 0, dimX, dimF)
Omega <- Reduce(kronecker, rev(Omegas))
R2 <- array(0, dim = c(dimX, dimX))
# negative log-likelihood
loss <- 0
BF <- mlm(F, betas)
for (i in seq_len(sample.size)) {
params_i <- Omega + diag(as.vector(eval(`BF[..., i]`)))
m2_i <- ising_m2(params_i)
# accumulate loss
x_i <- as.vector(eval(`X[..., i]`))
loss <- loss - (sum(x_i * (params_i %*% x_i)) + log(attr(m2_i, "prob_0")))
R2_i <- tcrossprod(x_i) - m2_i
R1_i <- diag(R2_i)
dim(R1_i) <- dimX
for (j in modes) {
grad_betas[[j]] <- grad_betas[[j]] +
mcrossprod(
R1_i, mlm(eval(`F[..., i]`), betas[-j], modes[-j]), j,
dimB = ifelse(j != modes, dimX, dimF)
)
}
R2 <- R2 + as.vector(R2_i)
}
grad_Omegas <- Map(function(j) {
grad <- mlm(kronperm(R2), Map(as.vector, Omegas[-j]), modes[-j], transposed = TRUE)
dim(grad) <- dim(Omegas[[j]])
grad
}, modes)
# update and accumulate alternating loss
accum_sign <- sign(last_loss - loss) - accum_sign
# check if accumulated alternating signs exceed stopping threshold
if (abs(accum_sign) > zig.zag.threashold) { break }
# increment non improving counter if thats the case
if (!(loss < last_loss)) {
non_improving <- non_improving + 1L
} else {
non_improving <- 0L
}
if (non_improving > patience) { break }
# store current loss for the next iteration
last_loss <- loss
# Accumulate root mean squared gradiends
grad2_betas <- Map(function(g2, g) 0.9 * g2 + 0.1 * (g * g),
grad2_betas, grad_betas)
grad2_Omegas <- Map(function(g2, g) 0.9 * g2 + 0.1 * (g * g),
grad2_Omegas, grad_Omegas)
# logging (before parameter update)
logger(iter, loss, betas, Omegas, grad_betas, grad_Omegas)
# gradualy decrease the step size
step <- if (is.function(step.size)) step.size(iter) else step.size
# Update Parameters
betas <- Map(function(beta, grad, m2) {
beta + (step / (sqrt(m2) + eps)) * grad
}, betas, grad_betas, grad2_betas)
Omegas <- Map(function(Omega, grad, m2) {
Omega + (step / (sqrt(m2) + eps)) * grad
}, Omegas, grad_Omegas, grad2_Omegas)
}
list(betas = betas, Omegas = Omegas)
}