tensor_predictors/tensorPredictors/R/kpir_new.R

#' Gradient Descent Bases Tensor Predictors method
#'
#' @export
kpir.new <- function(X, Fy, shape = c(dim(X)[-1], dim(Fy[-1])),
    max.iter = 500L, max.line.iter = 50L, step.size = 1e-3,
    max.init.iter = 20L, init.method = c("ls", "vlp"),
    eps = .Machine$double.eps,
    logger = NULL
) {

    # Check if X and Fy have same number of observations
    stopifnot(nrow(X) == NROW(Fy))
    n <- nrow(X)                        # Number of observations

    # Get and check predictor dimensions
    if (length(dim(X)) == 2L) {
        stopifnot(!missing(shape))
        stopifnot(ncol(X) == prod(shape[1:2]))
        p <- as.integer(shape[1])       # Predictor "height"
        q <- as.integer(shape[2])       # Predictor "width"
    } else if (length(dim(X)) == 3L) {
        p <- dim(X)[2]
        q <- dim(X)[3]
        dim(X) <- c(n, p * q)
    } else {
        stop("'X' must be a matrix or 3-tensor")
    }

    # Get and check response dimensions
    if (!is.array(Fy)) {
        Fy <- as.array(Fy)
    }
    if (length(dim(Fy)) == 1L) {
        k <- r <- 1L
        dim(Fy) <- c(n, 1L)
    } else if (length(dim(Fy)) == 2L) {
        stopifnot(!missing(shape))
        stopifnot(ncol(Fy) == prod(shape[3:4]))
        k <- as.integer(shape[3])       # Response functional "height"
        r <- as.integer(shape[4])       # Response functional "width"
    } else if (length(dim(Fy)) == 3L) {
        k <- dim(Fy)[2]
        r <- dim(Fy)[3]
        dim(Fy) <- c(n, k * r)
    } else {
        stop("'Fy' must be a vector, matrix or 3-tensor")
    }

    ### Step 1: (Approx) Least Squares initial estimate
    init.method <- match.arg(init.method)
    if (init.method == "ls") {
        dim(X) <- c(n, p, q)
        dim(Fy) <- c(n, k, r)
        ls <- kpir.ls(X, Fy, max.iter = max.init.iter, sample.axis = 1L, eps = eps)
        c(beta, alpha) %<-% ls$alphas
        dim(X) <- c(n, p * q)
        dim(Fy) <- c(n, k * r)
    } else { # Van Loan and Pitsianis
        # solution for `X = Fy B' + epsilon`
        cpFy <- crossprod(Fy)               # TODO: Check/Test and/or replace
        if (n <= k * r || qr(cpFy)$rank < k * r) {
            # In case of under-determined system replace the inverse in the normal
            # equation by the Moore-Penrose Pseudo Inverse
            B <- t(matpow(cpFy, -1) %*% crossprod(Fy, X))
        } else {
            # Compute OLS estimate by the Normal Equation
            B <- t(solve(cpFy, crossprod(Fy, X)))
        }

        # Decompose `B = alpha x beta` into `alpha` and `beta`
        c(alpha, beta) %<-% approx.kronecker(B, c(q, r), c(p, k))
    }

    # Compute residuals
    resid <- X - tcrossprod(Fy, kronecker(alpha, beta))

    # Covariance estimate
    Delta <- crossprod(resid) / n

    # Transformed Residuals (using `matpow` as robust inversion algo,
    # uses Moore-Penrose Pseudo Inverse in case of singular `Delta`)
    resid.trans <- resid %*% matpow(Delta, -1)

    # Evaluate negative log-likelihood
    loss <- 0.5 * (n * log(det(Delta)) + sum(resid.trans * resid))

    # Call history callback (logger) before the first iterate
    if (is.function(logger)) {
        logger(0L, loss, alpha, beta, Delta, NA)
    }


    ### Step 2: MLE with LS solution as starting value
    for (iter in seq_len(max.iter)) {
        # Sum over kronecker prod by observation (Face-Splitting Product)
        KR <- colSums(rowKronecker(Fy, resid.trans))
        dim(KR) <- c(p, q, k, r)

        # `alpha` Gradient
        R.Alpha <- aperm(KR, c(2, 4, 1, 3))
        dim(R.Alpha) <- c(q * r, p * k)
        grad.alpha <- c(R.Alpha %*% c(beta))

        # `beta` Gradient
        R.Beta <- aperm(KR, c(1, 3, 2, 4))
        dim(R.Beta) <- c(p * k, q * r)
        grad.beta <- c(R.Beta %*% c(alpha))

        # Line Search (Armijo type)
        # The `inner.prod` is used in the Armijo break condition but does not
        # depend on the step size.
        inner.prod <- sum(grad.alpha^2) + sum(grad.beta^2)

        # Line Search loop
        for (delta in step.size * 0.618034^seq.int(0L, length.out = max.line.iter)) {
            # Update `alpha` and `beta` (note: add(+), the gradients are already
            # pointing into the negative slope direction of the loss cause they are
            # the gradients of the log-likelihood [NOT the negative log-likelihood])
            alpha.temp <- alpha + delta * grad.alpha
            beta.temp  <- beta  + delta * grad.beta

            # Update Residuals, Covariance and transformed Residuals
            resid <- X - tcrossprod(Fy, kronecker(alpha.temp, beta.temp))
            Delta <- crossprod(resid) / n
            resid.trans <- resid %*% matpow(Delta, -1)

            # Evaluate negative log-likelihood
            loss.temp <- 0.5 * (n * log(det(Delta)) + sum(resid.trans * resid))

            # Armijo line search break condition
            if (loss.temp <= loss - 0.1 * delta * inner.prod) {
                break
            }
        }

        # Call logger (invoce history callback)
        if (is.function(logger)) {
            logger(iter, loss.temp, alpha.temp, beta.temp, Delta, delta)
        }

        # Ensure descent
        if (loss.temp < loss) {
            alpha <- alpha.temp
            beta  <- beta.temp

            # check break conditions (in descent case)
            if (mean(abs(alpha)) + mean(abs(beta)) < eps) {
                break   # basically, estimates are zero -> stop
            }
            if (inner.prod < eps * (p * q + r * k)) {
                break   # mean squared gradient is smaller than epsilon -> stop
            }
            if (abs(loss.temp - loss) < eps) {
                break   # decrease is too small (slow) -> stop
            }

            loss  <- loss.temp
        } else {
            break
        }

        # Set next iter starting step.size to line searched step size
        # (while allowing it to encrease)
        step.size <- 1.618034 * delta

    }

    list(loss = loss, alpha = alpha, beta = beta, Delta = Delta)
}
wip: reimplementation of KPIR 2022-03-22 15:26:24 +00:00			`#' Gradient Descent Bases Tensor Predictors method`
			`#'`
			`#' @export`
			`kpir.new <- function(X, Fy, shape = c(dim(X)[-1], dim(Fy[-1])),`
			`max.iter = 500L, max.line.iter = 50L, step.size = 1e-3,`
add: kpir.ls as initial value estimate (alternative to VLP) 2022-05-18 16:33:28 +00:00			`max.init.iter = 20L, init.method = c("ls", "vlp"),`
wip: reimplementation of KPIR 2022-03-22 15:26:24 +00:00			`eps = .Machine$double.eps,`
			`logger = NULL`
			`) {`

			`# Check if X and Fy have same number of observations`
			`stopifnot(nrow(X) == NROW(Fy))`
			`n <- nrow(X) # Number of observations`

			`# Get and check predictor dimensions`
			`if (length(dim(X)) == 2L) {`
			`stopifnot(!missing(shape))`
			`stopifnot(ncol(X) == prod(shape[1:2]))`
			`p <- as.integer(shape[1]) # Predictor "height"`
			`q <- as.integer(shape[2]) # Predictor "width"`
			`} else if (length(dim(X)) == 3L) {`
			`p <- dim(X)[2]`
			`q <- dim(X)[3]`
			`dim(X) <- c(n, p * q)`
			`} else {`
			`stop("'X' must be a matrix or 3-tensor")`
			`}`

			`# Get and check response dimensions`
			`if (!is.array(Fy)) {`
			`Fy <- as.array(Fy)`
			`}`
			`if (length(dim(Fy)) == 1L) {`
			`k <- r <- 1L`
			`dim(Fy) <- c(n, 1L)`
			`} else if (length(dim(Fy)) == 2L) {`
			`stopifnot(!missing(shape))`
			`stopifnot(ncol(Fy) == prod(shape[3:4]))`
			`k <- as.integer(shape[3]) # Response functional "height"`
			`r <- as.integer(shape[4]) # Response functional "width"`
			`} else if (length(dim(Fy)) == 3L) {`
			`k <- dim(Fy)[2]`
			`r <- dim(Fy)[3]`
			`dim(Fy) <- c(n, k * r)`
			`} else {`
			`stop("'Fy' must be a vector, matrix or 3-tensor")`
			`}`

add: kpir.ls as initial value estimate (alternative to VLP) 2022-05-18 16:33:28 +00:00			`### Step 1: (Approx) Least Squares initial estimate`
			`init.method <- match.arg(init.method)`
			`if (init.method == "ls") {`
			`dim(X) <- c(n, p, q)`
			`dim(Fy) <- c(n, k, r)`
add: eeg_sim, add: Higher Order PCA (hopca) 2022-05-24 19:07:40 +00:00			`ls <- kpir.ls(X, Fy, max.iter = max.init.iter, sample.axis = 1L, eps = eps)`
add: kpir.ls as initial value estimate (alternative to VLP) 2022-05-18 16:33:28 +00:00			`c(beta, alpha) %<-% ls$alphas`
			`dim(X) <- c(n, p * q)`
			`dim(Fy) <- c(n, k * r)`
			`} else { # Van Loan and Pitsianis`
			# solution for `X = Fy B' + epsilon`
			`cpFy <- crossprod(Fy) # TODO: Check/Test and/or replace`
			`if (n <= k * r \|\| qr(cpFy)$rank < k * r) {`
			`# In case of under-determined system replace the inverse in the normal`
			`# equation by the Moore-Penrose Pseudo Inverse`
			`B <- t(matpow(cpFy, -1) %*% crossprod(Fy, X))`
			`} else {`
			`# Compute OLS estimate by the Normal Equation`
			`B <- t(solve(cpFy, crossprod(Fy, X)))`
			`}`
wip: reimplementation of KPIR 2022-03-22 15:26:24 +00:00
add: kpir.ls as initial value estimate (alternative to VLP) 2022-05-18 16:33:28 +00:00			# Decompose `B = alpha x beta` into `alpha` and `beta`
			`c(alpha, beta) %<-% approx.kronecker(B, c(q, r), c(p, k))`
			`}`
wip: reimplementation of KPIR 2022-03-22 15:26:24 +00:00
			`# Compute residuals`
			`resid <- X - tcrossprod(Fy, kronecker(alpha, beta))`

			`# Covariance estimate`
			`Delta <- crossprod(resid) / n`

			# Transformed Residuals (using `matpow` as robust inversion algo,
			# uses Moore-Penrose Pseudo Inverse in case of singular `Delta`)
			`resid.trans <- resid %*% matpow(Delta, -1)`

			`# Evaluate negative log-likelihood`
			`loss <- 0.5 * (n * log(det(Delta)) + sum(resid.trans * resid))`

			`# Call history callback (logger) before the first iterate`
			`if (is.function(logger)) {`
			`logger(0L, loss, alpha, beta, Delta, NA)`
			`}`


			`### Step 2: MLE with LS solution as starting value`
			`for (iter in seq_len(max.iter)) {`
			`# Sum over kronecker prod by observation (Face-Splitting Product)`
			`KR <- colSums(rowKronecker(Fy, resid.trans))`
			`dim(KR) <- c(p, q, k, r)`

			# `alpha` Gradient
			`R.Alpha <- aperm(KR, c(2, 4, 1, 3))`
			`dim(R.Alpha) <- c(q * r, p * k)`
			`grad.alpha <- c(R.Alpha %*% c(beta))`

			# `beta` Gradient
			`R.Beta <- aperm(KR, c(1, 3, 2, 4))`
			`dim(R.Beta) <- c(p * k, q * r)`
			`grad.beta <- c(R.Beta %*% c(alpha))`

			`# Line Search (Armijo type)`
			# The `inner.prod` is used in the Armijo break condition but does not
			`# depend on the step size.`
			`inner.prod <- sum(grad.alpha^2) + sum(grad.beta^2)`

			`# Line Search loop`
add: mvbernoulli, wip: tensorPredictors, add: GMLM 2022-10-06 12:25:40 +00:00			`for (delta in step.size * 0.618034^seq.int(0L, length.out = max.line.iter)) {`
wip: reimplementation of KPIR 2022-03-22 15:26:24 +00:00			# Update `alpha` and `beta` (note: add(+), the gradients are already
			`# pointing into the negative slope direction of the loss cause they are`
			`# the gradients of the log-likelihood [NOT the negative log-likelihood])`
			`alpha.temp <- alpha + delta * grad.alpha`
			`beta.temp <- beta + delta * grad.beta`

			`# Update Residuals, Covariance and transformed Residuals`
			`resid <- X - tcrossprod(Fy, kronecker(alpha.temp, beta.temp))`
			`Delta <- crossprod(resid) / n`
			`resid.trans <- resid %*% matpow(Delta, -1)`

			`# Evaluate negative log-likelihood`
			`loss.temp <- 0.5 * (n * log(det(Delta)) + sum(resid.trans * resid))`

			`# Armijo line search break condition`
			`if (loss.temp <= loss - 0.1 * delta * inner.prod) {`
			`break`
			`}`
			`}`

			`# Call logger (invoce history callback)`
			`if (is.function(logger)) {`
			`logger(iter, loss.temp, alpha.temp, beta.temp, Delta, delta)`
			`}`

			`# Ensure descent`
			`if (loss.temp < loss) {`
			`alpha <- alpha.temp`
			`beta <- beta.temp`

			`# check break conditions (in descent case)`
			`if (mean(abs(alpha)) + mean(abs(beta)) < eps) {`
			`break # basically, estimates are zero -> stop`
			`}`
			`if (inner.prod < eps * (p * q + r * k)) {`
			`break # mean squared gradient is smaller than epsilon -> stop`
			`}`
			`if (abs(loss.temp - loss) < eps) {`
			`break # decrease is too small (slow) -> stop`
			`}`

			`loss <- loss.temp`
			`} else {`
			`break`
			`}`

			`# Set next iter starting step.size to line searched step size`
			`# (while allowing it to encrease)`
			`step.size <- 1.618034 * delta`

			`}`

			`list(loss = loss, alpha = alpha, beta = beta, Delta = Delta)`
			`}`