fix: momentum bug,
wip: datasets, notes, ...
This commit is contained in:
		
							parent
							
								
									4b68c245a6
								
							
						
					
					
						commit
						300fc11f3f
					
				@ -289,7 +289,7 @@ cve.call <- function(X, Y, method = "simple",
 | 
				
			|||||||
    } else {
 | 
					    } else {
 | 
				
			||||||
        tol <- as.double(tol)
 | 
					        tol <- as.double(tol)
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    if (!is.numeric(slack) || length(slack) > 1L || slack < 0.0) {
 | 
					    if (!is.numeric(slack) || length(slack) > 1L) {
 | 
				
			||||||
        stop("Break condition slack 'slack' must be not negative number.")
 | 
					        stop("Break condition slack 'slack' must be not negative number.")
 | 
				
			||||||
    } else {
 | 
					    } else {
 | 
				
			||||||
        slack <- as.double(slack)
 | 
					        slack <- as.double(slack)
 | 
				
			||||||
 | 
				
			|||||||
@ -1,3 +1,83 @@
 | 
				
			|||||||
 | 
					#'
 | 
				
			||||||
 | 
					#' @param n number of samples.
 | 
				
			||||||
 | 
					#' @param mu mean
 | 
				
			||||||
 | 
					#' @param sigma covariance matrix.
 | 
				
			||||||
 | 
					#'
 | 
				
			||||||
 | 
					#' @returns a \eqn{n\times p} matrix with samples in its rows.
 | 
				
			||||||
 | 
					#'
 | 
				
			||||||
 | 
					#' @examples
 | 
				
			||||||
 | 
					#' rmvnorm(20, sigma = matrix(c(2, 1, 1, 2), 2))
 | 
				
			||||||
 | 
					#' rmvnorm(20, mu = c(3, -1, 2))
 | 
				
			||||||
 | 
					rmvnorm <- function(n = 1, mu = rep(0, p), sigma = diag(p)) {
 | 
				
			||||||
 | 
					    if (!missing(sigma)) {
 | 
				
			||||||
 | 
					        p <- nrow(sigma)
 | 
				
			||||||
 | 
					    } else if (!missing(mu)) {
 | 
				
			||||||
 | 
					        mu <- matrix(mu, ncol = 1)
 | 
				
			||||||
 | 
					        p <- nrow(mu)
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					        stop("At least one of 'mu' or 'sigma' must be supplied.")
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # See: https://en.wikipedia.org/wiki/Multivariate_normal_distribution
 | 
				
			||||||
 | 
					    return(rep(mu, each = n) + matrix(rnorm(n * p), n) %*% chol(sigma))
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#' Samples from the multivariate t distribution (student distribution).
 | 
				
			||||||
 | 
					#'
 | 
				
			||||||
 | 
					#' @param n number of samples.
 | 
				
			||||||
 | 
					#' @param mu mean, ... TODO:
 | 
				
			||||||
 | 
					#' @param sigma a \eqn{k\times k} positive definite matrix. If the degree
 | 
				
			||||||
 | 
					#' \eqn{\nu} if bigger than 2 the created covariance is
 | 
				
			||||||
 | 
					#' \deqn{var(x) = \Sigma\frac{\nu}{\nu - 2}}
 | 
				
			||||||
 | 
					#' for \eqn{\nu > 2}.
 | 
				
			||||||
 | 
					#' @param df degree of freedom \eqn{\nu}.
 | 
				
			||||||
 | 
					#'
 | 
				
			||||||
 | 
					#' @returns a \eqn{n\times p} matrix with samples in its rows.
 | 
				
			||||||
 | 
					#'
 | 
				
			||||||
 | 
					#' @examples
 | 
				
			||||||
 | 
					#' rmvt(20, c(0, 1), matrix(c(3, 1, 1, 2), 2), 3)
 | 
				
			||||||
 | 
					#' rmvt(20, sigma = matrix(c(2, 1, 1, 2), 2), 3)
 | 
				
			||||||
 | 
					#' rmvt(20, mu = c(3, -1, 2), 3)
 | 
				
			||||||
 | 
					rmvt <- function(n = 1, mu = rep(0, p), sigma = diag(p), df = Inf) {
 | 
				
			||||||
 | 
					    if (!missing(sigma)) {
 | 
				
			||||||
 | 
					        p <- nrow(sigma)
 | 
				
			||||||
 | 
					    } else if (!missing(mu)) {
 | 
				
			||||||
 | 
					        mu <- matrix(mu, ncol = 1)
 | 
				
			||||||
 | 
					        p <- nrow(mu)
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					        stop("At least one of 'mu' or 'sigma' must be supplied.")
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (df == Inf) {
 | 
				
			||||||
 | 
					        Z <- 1
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					        Z <- sqrt(df / rchisq(n, df))
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return(rmvnorm(n, sigma = sigma) * Z + rep(mu, each = n))
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#' Generalized Normal Distribution.
 | 
				
			||||||
 | 
					#' see: https://en.wikipedia.org/wiki/Generalized_normal_distribution
 | 
				
			||||||
 | 
					rgnorm <- function(n = 1, mu = 0, alpha = 1, beta = 1) {
 | 
				
			||||||
 | 
					    if (alpha <= 0 | beta <= 0) {
 | 
				
			||||||
 | 
					        stop("alpha and beta must be positive.")
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    lambda <- (1 / alpha)^beta
 | 
				
			||||||
 | 
					    scales <- qgamma(runif(n), shape = 1 / beta, scale = 1 / lambda)^(1 / beta)
 | 
				
			||||||
 | 
					    return(scales * ((-1)^rbinom(n, 1, 0.5)) + mu)
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#' Laplace distribution
 | 
				
			||||||
 | 
					#' see: https://en.wikipedia.org/wiki/Laplace_distribution
 | 
				
			||||||
 | 
					rlaplace <- function(n = 1, mu = 0, sigma = 1) {
 | 
				
			||||||
 | 
					    U <- runif(n, -0.5, 0.5)
 | 
				
			||||||
 | 
					    scale <- sigma / sqrt(2)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return(mu - scale * sign(U) * log(1 - 2 * abs(U)))
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#' Generates test datasets.
 | 
					#' Generates test datasets.
 | 
				
			||||||
#'
 | 
					#'
 | 
				
			||||||
#' Provides sample datasets. There are 5 different datasets named
 | 
					#' Provides sample datasets. There are 5 different datasets named
 | 
				
			||||||
@ -41,72 +121,73 @@
 | 
				
			|||||||
#' @import stats
 | 
					#' @import stats
 | 
				
			||||||
#' @importFrom stats rnorm rbinom
 | 
					#' @importFrom stats rnorm rbinom
 | 
				
			||||||
#' @export
 | 
					#' @export
 | 
				
			||||||
dataset <- function(name = "M1", n, B, p.mix = 0.3, lambda = 1.0) {
 | 
					dataset <- function(name = "M1", n = NULL, p = 20, sigma = 0.5, ...) {
 | 
				
			||||||
    # validate parameters
 | 
					    name <- toupper(name)
 | 
				
			||||||
    stopifnot(name %in% c("M1", "M2", "M3", "M4", "M5"))
 | 
					    if (nchar(name) == 1) { name <- paste0("M", name) }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # set default values if not supplied
 | 
					 | 
				
			||||||
    if (missing(n)) {
 | 
					 | 
				
			||||||
        n <- if (name %in% c("M1", "M2")) 200 else if (name != "M5") 100 else 42
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    if (missing(B)) {
 | 
					 | 
				
			||||||
        p <- 12
 | 
					 | 
				
			||||||
    if (name == "M1") {
 | 
					    if (name == "M1") {
 | 
				
			||||||
 | 
					        if (missing(n)) { n <- 100 }
 | 
				
			||||||
 | 
					        # B ... `p x 1`
 | 
				
			||||||
 | 
					        B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, p - 6)), ncol = 1)
 | 
				
			||||||
 | 
					        X <- rmvnorm(n, sigma = sigma^abs(outer(1:p, 1:p, FUN = `-`)))
 | 
				
			||||||
 | 
					        beta <- 0.5
 | 
				
			||||||
 | 
					        Y <- cos(X %*% B) + rgnorm(n, 0,
 | 
				
			||||||
 | 
					            alpha = sqrt(0.25 * gamma(1 / beta) / gamma(3 / beta)),
 | 
				
			||||||
 | 
					            beta = beta
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					    } else if (name == "M2") {
 | 
				
			||||||
 | 
					        if (missing(n)) { n <- 100 }
 | 
				
			||||||
 | 
					        prob <- 0.3
 | 
				
			||||||
 | 
					        lambda <- 1 # dispersion
 | 
				
			||||||
 | 
					        # B ... `p x 1`
 | 
				
			||||||
 | 
					        B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, p - 6)), ncol = 1)
 | 
				
			||||||
 | 
					        Z <- 2 * rbinom(n, 1, prob) - 1
 | 
				
			||||||
 | 
					        X <- matrix(rep(lambda * Z, p) + rnorm(n * p), n)
 | 
				
			||||||
 | 
					        Y <- cos(X %*% B) + rnorm(n, 0, sigma)
 | 
				
			||||||
 | 
					    } else if (name == "M3") {
 | 
				
			||||||
 | 
					        if (missing(n)) { n <- 200 }
 | 
				
			||||||
 | 
					        # B ... `p x 1`
 | 
				
			||||||
 | 
					        B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, p - 6)), ncol = 1)
 | 
				
			||||||
 | 
					        X <- matrix(rnorm(n * p), n)
 | 
				
			||||||
 | 
					        Y <- 1.5 * log(2 + abs(X %*% B)) + rnorm(n, 0, sigma^2)
 | 
				
			||||||
 | 
					    } else if (name == "M4") {
 | 
				
			||||||
 | 
					        if (missing(n)) { n <- 200 }
 | 
				
			||||||
 | 
					        # B ... `p x 2`
 | 
				
			||||||
        B <- cbind(
 | 
					        B <- cbind(
 | 
				
			||||||
                c( 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0),
 | 
					            c(rep(1 / sqrt(6), 6), rep(0, p - 6)),
 | 
				
			||||||
                c( 1,-1, 1,-1, 1,-1, 0, 0, 0, 0, 0, 0)
 | 
					            c(rep(c(1, -1), 3) / sqrt(6), rep(0, p - 6))
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					        X <- rmvnorm(n, sigma = sigma^abs(outer(1:p, 1:p, FUN = `-`)))
 | 
				
			||||||
 | 
					        XB <- X %*% B
 | 
				
			||||||
 | 
					        Y <- (XB[, 1]) / (0.5 + (XB[, 2] + 1.5)^2) + rnorm(n, 0, sigma^2)
 | 
				
			||||||
 | 
					    } else if (name == "M5") {
 | 
				
			||||||
 | 
					        if (missing(n)) { n <- 200 }
 | 
				
			||||||
 | 
					        # B ... `p x 2`
 | 
				
			||||||
 | 
					        B <- cbind(
 | 
				
			||||||
 | 
					            c(rep(1,        6), rep(0, p - 6)),
 | 
				
			||||||
 | 
					            c(rep(c(1, -1), 3), rep(0, p - 6))
 | 
				
			||||||
        ) / sqrt(6)
 | 
					        ) / sqrt(6)
 | 
				
			||||||
        } else if (name == "M2") {
 | 
					        X <- matrix(runif(n * p), n)
 | 
				
			||||||
            B <- cbind(
 | 
					        XB <- X %*% B
 | 
				
			||||||
                c(c(1, 0), rep(0, 10)),
 | 
					        Y <- cos(XB[, 1] * pi) * (XB[, 2] + 1)^2 + rnorm(n, 0, sigma^2)
 | 
				
			||||||
                c(c(0, 1), rep(0, 10))
 | 
					    } else if (name == "M6") {
 | 
				
			||||||
            )
 | 
					        if (missing(n)) { n <- 200 }
 | 
				
			||||||
 | 
					        # B ... `p x 3`
 | 
				
			||||||
 | 
					        B <- diag(p)[, -(3:(p - 1))]
 | 
				
			||||||
 | 
					        X <- matrix(rnorm(n * p), n)
 | 
				
			||||||
 | 
					        Y <- rowSums((X %*% B)^2) + rnorm(n, 0, sigma^2)
 | 
				
			||||||
 | 
					    } else if (name == "M7") {
 | 
				
			||||||
 | 
					        if (missing(n)) { n <- 400 }
 | 
				
			||||||
 | 
					        # B ... `p x 4`
 | 
				
			||||||
 | 
					        B <- diag(p)[, -(4:(p - 1))]
 | 
				
			||||||
 | 
					        # "R"andom "M"ulti"V"ariate "S"tudent
 | 
				
			||||||
 | 
					        X <- rmvt(n = n, sigma = diag(p), df = 3)
 | 
				
			||||||
 | 
					        XB <- X %*% B
 | 
				
			||||||
 | 
					        Y <- (XB[, 1]) * (XB[, 2])^2 + (XB[, 3]) * (XB[, 4])
 | 
				
			||||||
 | 
					        Y <- Y + rlaplace(n, 0, sigma)
 | 
				
			||||||
    } else {
 | 
					    } else {
 | 
				
			||||||
            B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, 6)), 12, 1)
 | 
					        stop("Got unknown dataset name.")
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    } else {
 | 
					 | 
				
			||||||
        p <- nrow(B)
 | 
					 | 
				
			||||||
        # validate col. nr to match dataset `k = ncol(B)`
 | 
					 | 
				
			||||||
        stopifnot(
 | 
					 | 
				
			||||||
            name %in% c("M1", "M2") && ncol(B) == 2,
 | 
					 | 
				
			||||||
            name %in% c("M3", "M4", "M5") && ncol(B) == 1
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    # set link function `g` for model `Y ~ g(B'X) + epsilon`
 | 
					 | 
				
			||||||
    if (name == "M1") {
 | 
					 | 
				
			||||||
        g <- function(BX) { BX[1] / (0.5 + (BX[2] + 1.5)^2) }
 | 
					 | 
				
			||||||
    } else if (name == "M2") {
 | 
					 | 
				
			||||||
        g <- function(BX) { BX[1] * BX[2]^2 }
 | 
					 | 
				
			||||||
    } else if (name %in% c("M3", "M4")) {
 | 
					 | 
				
			||||||
        g <- function(BX) { cos(BX[1]) }
 | 
					 | 
				
			||||||
    } else { # name == "M5"
 | 
					 | 
				
			||||||
        g <- function(BX) { 2 * log(abs(BX[1]) + 1) }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    # compute X
 | 
					 | 
				
			||||||
    if (name != "M4") {
 | 
					 | 
				
			||||||
        # compute root of the covariance matrix according the dataset
 | 
					 | 
				
			||||||
        if (name %in% c("M1", "M3")) {
 | 
					 | 
				
			||||||
            # Variance-Covariance structure for `X ~ N_p(0, \Sigma)` with
 | 
					 | 
				
			||||||
            # `\Sigma_{i, j} = 0.5^{|i - j|}`.
 | 
					 | 
				
			||||||
            Sigma <- matrix(0.5^abs(kronecker(1:p, 1:p, '-')), p, p)
 | 
					 | 
				
			||||||
            # decompose Sigma to Sigma.root^T Sigma.root = Sigma for usage in creation of `X`
 | 
					 | 
				
			||||||
            Sigma.root <- chol(Sigma)
 | 
					 | 
				
			||||||
        } else { # name %in% c("M2", "M5")
 | 
					 | 
				
			||||||
            Sigma.root <- diag(rep(1, p)) # d-dim identity
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
        # data `X` as multivariate random normal variable with
 | 
					 | 
				
			||||||
        # variance matrix `Sigma`.
 | 
					 | 
				
			||||||
        X <- replicate(p, rnorm(n, 0, 1)) %*% Sigma.root
 | 
					 | 
				
			||||||
    } else { # name == "M4"
 | 
					 | 
				
			||||||
        X <- t(replicate(100, rep((1 - 2 * rbinom(1, 1, p.mix)) * lambda, p) + rnorm(p, 0, 1)))
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    # responce `y ~ g(B'X) + epsilon` with `epsilon ~ N(0, 1 / 2)`
 | 
					 | 
				
			||||||
    Y <- apply(X, 1, function(X_i) {
 | 
					 | 
				
			||||||
        g(t(B) %*% X_i) + rnorm(1, 0, 0.5)
 | 
					 | 
				
			||||||
    })
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return(list(X = X, Y = Y, B = B, name = name))
 | 
					    return(list(X = X, Y = Y, B = B, name = name))
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
				
			|||||||
@ -11,6 +11,17 @@
 | 
				
			|||||||
#' @param X data matrix with samples in its rows.
 | 
					#' @param X data matrix with samples in its rows.
 | 
				
			||||||
#' @param k Dimension of lower dimensional projection.
 | 
					#' @param k Dimension of lower dimensional projection.
 | 
				
			||||||
#' @param nObs number of points in a slice, see \eqn{nObs} in CVE paper.
 | 
					#' @param nObs number of points in a slice, see \eqn{nObs} in CVE paper.
 | 
				
			||||||
 | 
					#' @param version either \code{1} or \code{2}, where
 | 
				
			||||||
 | 
					#' \itemize{
 | 
				
			||||||
 | 
					#'      \item 1: uses the following formula:
 | 
				
			||||||
 | 
					#'          \deqn{%
 | 
				
			||||||
 | 
					#'          h = (2 * tr(\Sigma) / p) * (1.2 * n^{-1 / (4 + k)})^2}{%
 | 
				
			||||||
 | 
					#'          h = (2 * tr(\Sigma) / p) * (1.2 * n^(\frac{-1}{4 + k}))^2}
 | 
				
			||||||
 | 
					#'      \item 2: uses
 | 
				
			||||||
 | 
					#'          \deqn{%
 | 
				
			||||||
 | 
					#'          h = (2 * tr(\Sigma) / p) * \chi_k^-1((nObs - 1) / (n - 1))}{%
 | 
				
			||||||
 | 
					#'          h = (2 * tr(\Sigma) / p) * \chi_k^{-1}(\frac{nObs - 1}{n - 1})}
 | 
				
			||||||
 | 
					#'  }
 | 
				
			||||||
#'
 | 
					#'
 | 
				
			||||||
#' @return Estimated bandwidth \code{h}.
 | 
					#' @return Estimated bandwidth \code{h}.
 | 
				
			||||||
#'
 | 
					#'
 | 
				
			||||||
@ -34,12 +45,17 @@
 | 
				
			|||||||
#' print(cve.obj.simple$res$'1'$h)
 | 
					#' print(cve.obj.simple$res$'1'$h)
 | 
				
			||||||
#' print(estimate.bandwidth(x, k = k))
 | 
					#' print(estimate.bandwidth(x, k = k))
 | 
				
			||||||
#' @export
 | 
					#' @export
 | 
				
			||||||
estimate.bandwidth <- function(X, k, nObs) {
 | 
					estimate.bandwidth <- function (X, k, nObs, version = 1L) {
 | 
				
			||||||
    n <- nrow(X)
 | 
					    n <- nrow(X)
 | 
				
			||||||
    p <- ncol(X)
 | 
					    p <- ncol(X)
 | 
				
			||||||
 | 
					    if (version == 1) {
 | 
				
			||||||
        X_centered <- scale(X, center = TRUE, scale = FALSE)
 | 
					        X_centered <- scale(X, center = TRUE, scale = FALSE)
 | 
				
			||||||
        Sigma <- crossprod(X_centered, X_centered)/n
 | 
					        Sigma <- crossprod(X_centered, X_centered)/n
 | 
				
			||||||
 | 
					 | 
				
			||||||
        return((2 * sum(diag(Sigma))/p) * (1.2 * n^(-1/(4 + k)))^2)
 | 
					        return((2 * sum(diag(Sigma))/p) * (1.2 * n^(-1/(4 + k)))^2)
 | 
				
			||||||
 | 
					    } else if (version == 2) {
 | 
				
			||||||
 | 
					        X_c <- scale(X, center = TRUE, scale = FALSE)
 | 
				
			||||||
 | 
					        return(2 * qchisq((nObs - 1) / (n - 1), k) * sum(X_c^2) / (n * p))
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					        stop("Unknown version.")
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
				
			|||||||
@ -1,35 +1,4 @@
 | 
				
			|||||||
#' Predicts SDR dimension using \code{\link[mda]{mars}} via a Cross-Validation.
 | 
					predict_dim_cv <- function(object) {
 | 
				
			||||||
#'
 | 
					 | 
				
			||||||
#' @param object instance of class \code{cve} (result of \code{cve},
 | 
					 | 
				
			||||||
#'  \code{cve.call}).
 | 
					 | 
				
			||||||
#' @param ... ignored.
 | 
					 | 
				
			||||||
#'
 | 
					 | 
				
			||||||
#' @return list with
 | 
					 | 
				
			||||||
#' \itemize{
 | 
					 | 
				
			||||||
#'    \item MSE: Mean Square Error,
 | 
					 | 
				
			||||||
#'    \item k: predicted dimensions.
 | 
					 | 
				
			||||||
#' }
 | 
					 | 
				
			||||||
#'
 | 
					 | 
				
			||||||
#' @examples
 | 
					 | 
				
			||||||
#' # create B for simulation
 | 
					 | 
				
			||||||
#' B <- rep(1, 5) / sqrt(5)
 | 
					 | 
				
			||||||
#' 
 | 
					 | 
				
			||||||
#' set.seed(21)
 | 
					 | 
				
			||||||
#' # creat predictor data x ~ N(0, I_p)
 | 
					 | 
				
			||||||
#' x <- matrix(rnorm(500), 100)
 | 
					 | 
				
			||||||
#' 
 | 
					 | 
				
			||||||
#' # simulate response variable
 | 
					 | 
				
			||||||
#' #    y = f(B'x) + err
 | 
					 | 
				
			||||||
#' # with f(x1) = x1 and err ~ N(0, 0.25^2)
 | 
					 | 
				
			||||||
#' y <- x %*% B + 0.25 * rnorm(100)
 | 
					 | 
				
			||||||
#'
 | 
					 | 
				
			||||||
#' # Calculate cve for unknown k between min.dim and max.dim.
 | 
					 | 
				
			||||||
#' cve.obj.simple <- cve(y ~ x) 
 | 
					 | 
				
			||||||
#'
 | 
					 | 
				
			||||||
#' predict_dim(cve.obj.simple)
 | 
					 | 
				
			||||||
#'
 | 
					 | 
				
			||||||
#' @export
 | 
					 | 
				
			||||||
predict_dim <- function(object, ...) {
 | 
					 | 
				
			||||||
    # Get centered training data and dimensions
 | 
					    # Get centered training data and dimensions
 | 
				
			||||||
    X <- scale(object$X, center = TRUE, scale = FALSE)
 | 
					    X <- scale(object$X, center = TRUE, scale = FALSE)
 | 
				
			||||||
    n <- nrow(object$X) # umber of training data samples
 | 
					    n <- nrow(object$X) # umber of training data samples
 | 
				
			||||||
@ -59,3 +28,173 @@ predict_dim <- function(object, ...) {
 | 
				
			|||||||
        k = as.integer(names(which.min(MSE)))
 | 
					        k = as.integer(names(which.min(MSE)))
 | 
				
			||||||
    ))
 | 
					    ))
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					# TODO: write doc
 | 
				
			||||||
 | 
					predict_dim_elbow <- function(object) {
 | 
				
			||||||
 | 
					    # extract original data from object (cve result)
 | 
				
			||||||
 | 
					    X <- object$X
 | 
				
			||||||
 | 
					    Y <- object$Y
 | 
				
			||||||
 | 
					    # Get dimensions
 | 
				
			||||||
 | 
					    n <- nrow(X)
 | 
				
			||||||
 | 
					    p <- ncol(X)
 | 
				
			||||||
 | 
					    # Compute persistent data.
 | 
				
			||||||
 | 
					    i = rep(1:n, n)
 | 
				
			||||||
 | 
					    j = rep(1:n, each = n)
 | 
				
			||||||
 | 
					    D.eucl = matrix((X[i, ] - X[j, ])^2 %*% rep(1, p), n)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    losses <- vector("double", length(object$res))
 | 
				
			||||||
 | 
					    names(losses) <- names(object$res)
 | 
				
			||||||
 | 
					    # Compute per sample losses with alternative bandwidth for each dimension.
 | 
				
			||||||
 | 
					    for (dr.k in object$res) {
 | 
				
			||||||
 | 
					        # extract dimension specific estimates and dimensions.
 | 
				
			||||||
 | 
					        k <- dr.k$k
 | 
				
			||||||
 | 
					        V <- dr.k$V
 | 
				
			||||||
 | 
					        q <- ncol(V)
 | 
				
			||||||
 | 
					        # estimate bandwidth according alternative formula (see: TODO: see)
 | 
				
			||||||
 | 
					        h <- estimate.bandwidth(X, k, sqrt(n), version = 2L)
 | 
				
			||||||
 | 
					        # Projected `X`
 | 
				
			||||||
 | 
					        XV <- X %*% V
 | 
				
			||||||
 | 
					        # Devectorized distance matrix
 | 
				
			||||||
 | 
					        # (inefficient in R but fast in C)
 | 
				
			||||||
 | 
					        D <- matrix((XV[i, , drop = F] - XV[j, , drop = F])^2 %*% rep(1, q), n)
 | 
				
			||||||
 | 
					        D <- D.eucl - D
 | 
				
			||||||
 | 
					        # Apply kernel
 | 
				
			||||||
 | 
					        K <- exp((-0.5 / h^2) * D^2)
 | 
				
			||||||
 | 
					        # sum columns
 | 
				
			||||||
 | 
					        colSumsK <- colSums(K)
 | 
				
			||||||
 | 
					        # compute weighted and square meighted reponses
 | 
				
			||||||
 | 
					        y1 <- (K %*% Y) / colSumsK
 | 
				
			||||||
 | 
					        y2 <- (K %*% Y^2) / colSumsK
 | 
				
			||||||
 | 
					        # total loss
 | 
				
			||||||
 | 
					        losses[[as.character(k)]] <- mean(y2 - y1^2)
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return(list(
 | 
				
			||||||
 | 
					        losses = losses,
 | 
				
			||||||
 | 
					        k = as.integer(names(which.min(losses)))
 | 
				
			||||||
 | 
					    ))
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					predict_dim_wilcoxon <- function(object, p.value = 0.05) {
 | 
				
			||||||
 | 
					    # extract original data from object (cve result)
 | 
				
			||||||
 | 
					    X <- object$X
 | 
				
			||||||
 | 
					    Y <- object$Y
 | 
				
			||||||
 | 
					    # Get dimensions
 | 
				
			||||||
 | 
					    n <- nrow(X)
 | 
				
			||||||
 | 
					    p <- ncol(X)
 | 
				
			||||||
 | 
					    # Compute persistent data.
 | 
				
			||||||
 | 
					    i = rep(1:n, n)
 | 
				
			||||||
 | 
					    j = rep(1:n, each = n)
 | 
				
			||||||
 | 
					    D.eucl = matrix((X[i, ] - X[j, ])^2 %*% rep(1, p), n)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    L <- matrix(NA, n, length(object$res))
 | 
				
			||||||
 | 
					    colnames(L) <- names(object$res)
 | 
				
			||||||
 | 
					    # Compute per sample losses with alternative bandwidth for each dimension.
 | 
				
			||||||
 | 
					    for (dr.k in object$res) {
 | 
				
			||||||
 | 
					        # extract dimension specific estimates and dimensions.
 | 
				
			||||||
 | 
					        k <- dr.k$k
 | 
				
			||||||
 | 
					        V <- dr.k$V
 | 
				
			||||||
 | 
					        q <- ncol(V)
 | 
				
			||||||
 | 
					        # estimate bandwidth according alternative formula (see: TODO: see)
 | 
				
			||||||
 | 
					        h <- estimate.bandwidth(X, k, sqrt(n), version = 2L)
 | 
				
			||||||
 | 
					        # Projected `X`
 | 
				
			||||||
 | 
					        XV <- X %*% V
 | 
				
			||||||
 | 
					        # Devectorized distance matrix
 | 
				
			||||||
 | 
					        # (inefficient in R but fast in C)
 | 
				
			||||||
 | 
					        D <- matrix((XV[i, , drop = F] - XV[j, , drop = F])^2 %*% rep(1, q), n)
 | 
				
			||||||
 | 
					        D <- D.eucl - D
 | 
				
			||||||
 | 
					        # Apply kernel
 | 
				
			||||||
 | 
					        K <- exp((-0.5 / h^2) * D^2)
 | 
				
			||||||
 | 
					        # sum columns
 | 
				
			||||||
 | 
					        colSumsK <- colSums(K)
 | 
				
			||||||
 | 
					        # compute weighted and square meighted reponses
 | 
				
			||||||
 | 
					        y1 <- (K %*% Y) / colSumsK
 | 
				
			||||||
 | 
					        y2 <- (K %*% Y^2) / colSumsK
 | 
				
			||||||
 | 
					        # element-wise L for dim. k
 | 
				
			||||||
 | 
					        L[, as.character(k)] <- y2 - y1^2
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for (ind in seq_len(length(object$res) - 1L)) {
 | 
				
			||||||
 | 
					        p.test <- wilcox.test(L[, ind], L[, ind + 1L],
 | 
				
			||||||
 | 
					                              alternative = "less")$p.value
 | 
				
			||||||
 | 
					        if (p.test < p.value) {
 | 
				
			||||||
 | 
					            return(list(
 | 
				
			||||||
 | 
					                p.value = p.test,
 | 
				
			||||||
 | 
					                k = object$res[[ind]]$k
 | 
				
			||||||
 | 
					            ))
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return(list(
 | 
				
			||||||
 | 
					        p.value = NA,
 | 
				
			||||||
 | 
					        k = object$res[[length(object$res)]]$k
 | 
				
			||||||
 | 
					    ))
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#' Predicts SDR dimension using \code{\link[mda]{mars}} via a Cross-Validation.
 | 
				
			||||||
 | 
					#'  TODO: rewrite!!!
 | 
				
			||||||
 | 
					#' 
 | 
				
			||||||
 | 
					#' @param object instance of class \code{cve} (result of \code{cve},
 | 
				
			||||||
 | 
					#'  \code{cve.call}).
 | 
				
			||||||
 | 
					#' @param ... ignored.
 | 
				
			||||||
 | 
					#'
 | 
				
			||||||
 | 
					#' @return list with
 | 
				
			||||||
 | 
					#' \itemize{
 | 
				
			||||||
 | 
					#'    \item MSE: Mean Square Error,
 | 
				
			||||||
 | 
					#'    \item k: predicted dimensions.
 | 
				
			||||||
 | 
					#' }
 | 
				
			||||||
 | 
					#'
 | 
				
			||||||
 | 
					#' @section cv:
 | 
				
			||||||
 | 
					#'  Cross-validation ... TODO:
 | 
				
			||||||
 | 
					#'
 | 
				
			||||||
 | 
					#' @section elbow:
 | 
				
			||||||
 | 
					#'  Cross-validation ... TODO:
 | 
				
			||||||
 | 
					#'
 | 
				
			||||||
 | 
					#' @section wilcoxon:
 | 
				
			||||||
 | 
					#'  Cross-validation ... TODO:
 | 
				
			||||||
 | 
					#'
 | 
				
			||||||
 | 
					#' @examples
 | 
				
			||||||
 | 
					#' # create B for simulation
 | 
				
			||||||
 | 
					#' B <- rep(1, 5) / sqrt(5)
 | 
				
			||||||
 | 
					#' 
 | 
				
			||||||
 | 
					#' set.seed(21)
 | 
				
			||||||
 | 
					#' # creat predictor data x ~ N(0, I_p)
 | 
				
			||||||
 | 
					#' x <- matrix(rnorm(500), 100)
 | 
				
			||||||
 | 
					#' 
 | 
				
			||||||
 | 
					#' # simulate response variable
 | 
				
			||||||
 | 
					#' #    y = f(B'x) + err
 | 
				
			||||||
 | 
					#' # with f(x1) = x1 and err ~ N(0, 0.25^2)
 | 
				
			||||||
 | 
					#' y <- x %*% B + 0.25 * rnorm(100)
 | 
				
			||||||
 | 
					#'
 | 
				
			||||||
 | 
					#' # Calculate cve for unknown k between min.dim and max.dim.
 | 
				
			||||||
 | 
					#' cve.obj.simple <- cve(y ~ x) 
 | 
				
			||||||
 | 
					#'
 | 
				
			||||||
 | 
					#' predict_dim(cve.obj.simple)
 | 
				
			||||||
 | 
					#'
 | 
				
			||||||
 | 
					#' @export
 | 
				
			||||||
 | 
					predict_dim <- function(object, ..., method = "CV") {
 | 
				
			||||||
 | 
					    # Check if there are dimensions to select.
 | 
				
			||||||
 | 
					    if (length(object$res) == 1L) {
 | 
				
			||||||
 | 
					        return(list(
 | 
				
			||||||
 | 
					            message = "Only one dim. estimated.",
 | 
				
			||||||
 | 
					            k = as.integer(names(object$res))
 | 
				
			||||||
 | 
					        ))
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Determine method "fuzzy".
 | 
				
			||||||
 | 
					    methods <- c("cv", "elbow", "wilcoxon")
 | 
				
			||||||
 | 
					    names(methods) <- methods
 | 
				
			||||||
 | 
					    method <- methods[[tolower(method), exact = FALSE]]
 | 
				
			||||||
 | 
					    if (is.null(method)) {
 | 
				
			||||||
 | 
					        stop('Unable to determine method.')
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (method == "cv") {
 | 
				
			||||||
 | 
					        return(predict_dim_cv(object))
 | 
				
			||||||
 | 
					    } else if (method == "elbow") {
 | 
				
			||||||
 | 
					        return(predict_dim_elbow(object))
 | 
				
			||||||
 | 
					    } else if (method == "wilcoxon") {
 | 
				
			||||||
 | 
					        return(predict_dim_wilcoxon(object))
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					        stop("Unable to determine method.")
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										43
									
								
								CVE_C/man/predict_dim.Rd
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										43
									
								
								CVE_C/man/predict_dim.Rd
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,43 @@
 | 
				
			|||||||
 | 
					% Generated by roxygen2: do not edit by hand
 | 
				
			||||||
 | 
					% Please edit documentation in R/predict_dim.R
 | 
				
			||||||
 | 
					\name{predict_dim}
 | 
				
			||||||
 | 
					\alias{predict_dim}
 | 
				
			||||||
 | 
					\title{Predicts SDR dimension using \code{\link[mda]{mars}} via a Cross-Validation.}
 | 
				
			||||||
 | 
					\usage{
 | 
				
			||||||
 | 
					predict_dim(object, ...)
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					\arguments{
 | 
				
			||||||
 | 
					\item{object}{instance of class \code{cve} (result of \code{cve},
 | 
				
			||||||
 | 
					\code{cve.call}).}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					\item{...}{ignored.}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					\value{
 | 
				
			||||||
 | 
					list with
 | 
				
			||||||
 | 
					\itemize{
 | 
				
			||||||
 | 
					   \item MSE: Mean Square Error,
 | 
				
			||||||
 | 
					   \item k: predicted dimensions.
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					\description{
 | 
				
			||||||
 | 
					Predicts SDR dimension using \code{\link[mda]{mars}} via a Cross-Validation.
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					\examples{
 | 
				
			||||||
 | 
					# create B for simulation
 | 
				
			||||||
 | 
					B <- rep(1, 5) / sqrt(5)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					set.seed(21)
 | 
				
			||||||
 | 
					# creat predictor data x ~ N(0, I_p)
 | 
				
			||||||
 | 
					x <- matrix(rnorm(500), 100)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# simulate response variable
 | 
				
			||||||
 | 
					#    y = f(B'x) + err
 | 
				
			||||||
 | 
					# with f(x1) = x1 and err ~ N(0, 0.25^2)
 | 
				
			||||||
 | 
					y <- x \%*\% B + 0.25 * rnorm(100)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Calculate cve for unknown k between min.dim and max.dim.
 | 
				
			||||||
 | 
					cve.obj.simple <- cve(y ~ x) 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					predict_dim(cve.obj.simple)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
@ -17,6 +17,7 @@ void cve(const mat *X, const mat *Y, const double h,
 | 
				
			|||||||
    double loss, loss_last, loss_best, err, tau;
 | 
					    double loss, loss_last, loss_best, err, tau;
 | 
				
			||||||
    double tol = tol_init * sqrt((double)(2 * q));
 | 
					    double tol = tol_init * sqrt((double)(2 * q));
 | 
				
			||||||
    double agility = -2.0 * (1.0 - momentum) / (h * h);
 | 
					    double agility = -2.0 * (1.0 - momentum) / (h * h);
 | 
				
			||||||
 | 
					    double sumK;
 | 
				
			||||||
    double c = agility / (double)n;
 | 
					    double c = agility / (double)n;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // TODO: check parameters! dim, ...
 | 
					    // TODO: check parameters! dim, ...
 | 
				
			||||||
@ -87,8 +88,9 @@ void cve(const mat *X, const mat *Y, const double h,
 | 
				
			|||||||
            S = laplace(adjacence(L, Y, y1, D, W, gauss, S), workMem);
 | 
					            S = laplace(adjacence(L, Y, y1, D, W, gauss, S), workMem);
 | 
				
			||||||
        } else if (method == weighted) {
 | 
					        } else if (method == weighted) {
 | 
				
			||||||
            colSumsK = elemApply(colSumsK, '-', 1.0, colSumsK);
 | 
					            colSumsK = elemApply(colSumsK, '-', 1.0, colSumsK);
 | 
				
			||||||
            loss_last = dot(L, '/', colSumsK);
 | 
					            sumK = sum(colSumsK);
 | 
				
			||||||
            c = agility / sum(colSumsK);
 | 
					            loss_last = dot(L, '*', colSumsK) / sumK;
 | 
				
			||||||
 | 
					            c = agility / sumK;
 | 
				
			||||||
            /* Calculate the scaling matrix S */
 | 
					            /* Calculate the scaling matrix S */
 | 
				
			||||||
            S = laplace(adjacence(L, Y, y1, D, K, gauss, S), workMem);
 | 
					            S = laplace(adjacence(L, Y, y1, D, K, gauss, S), workMem);
 | 
				
			||||||
        } else {
 | 
					        } else {
 | 
				
			||||||
@ -100,10 +102,8 @@ void cve(const mat *X, const mat *Y, const double h,
 | 
				
			|||||||
        G = matrixprod(c, tmp2, V, 0.0, G);
 | 
					        G = matrixprod(c, tmp2, V, 0.0, G);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if (logger) {
 | 
					        if (logger) {
 | 
				
			||||||
            callLogger(logger, loggerEnv,
 | 
					            callLogger(logger, loggerEnv, attempt, /* iter <- 0L */ -1,
 | 
				
			||||||
                       attempt, /* iter <- 0L */ -1,
 | 
					                       L, V, G, loss_last, /* err <- NA */ -1.0, tau);
 | 
				
			||||||
                       L, V, G,
 | 
					 | 
				
			||||||
                       loss_last, /* err <- NA */ -1.0, tau);
 | 
					 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        /* Compute Skew-Symmetric matrix `A` used in Cayley transform.
 | 
					        /* Compute Skew-Symmetric matrix `A` used in Cayley transform.
 | 
				
			||||||
@ -120,9 +120,6 @@ void cve(const mat *X, const mat *Y, const double h,
 | 
				
			|||||||
            /* Move `V` along the gradient direction. */
 | 
					            /* Move `V` along the gradient direction. */
 | 
				
			||||||
            V_tau = cayleyTransform(A, V, V_tau, workMem);
 | 
					            V_tau = cayleyTransform(A, V, V_tau, workMem);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            // Rprintf("Start attempt(%2d), iter (%2d): err: %f, loss: %f, tau: %f\n",
 | 
					 | 
				
			||||||
            //         attempt, iter, dist(V, V_tau), loss_last, tau);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            /* Embed X_i's in V space */
 | 
					            /* Embed X_i's in V space */
 | 
				
			||||||
            XV = matrixprod(1.0, X, V_tau, 0.0, XV);
 | 
					            XV = matrixprod(1.0, X, V_tau, 0.0, XV);
 | 
				
			||||||
            /* Compute embedded distances */
 | 
					            /* Compute embedded distances */
 | 
				
			||||||
@ -146,7 +143,8 @@ void cve(const mat *X, const mat *Y, const double h,
 | 
				
			|||||||
                loss = mean(L);
 | 
					                loss = mean(L);
 | 
				
			||||||
            } else if (method == weighted) {
 | 
					            } else if (method == weighted) {
 | 
				
			||||||
                colSumsK = elemApply(colSumsK, '-', 1.0, colSumsK);
 | 
					                colSumsK = elemApply(colSumsK, '-', 1.0, colSumsK);
 | 
				
			||||||
                loss = dot(L, '/', colSumsK);
 | 
					                sumK = sum(colSumsK);
 | 
				
			||||||
 | 
					                loss = dot(L, '*', colSumsK) / sumK;
 | 
				
			||||||
            } else {
 | 
					            } else {
 | 
				
			||||||
                // TODO: error handling!
 | 
					                // TODO: error handling!
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
@ -154,22 +152,26 @@ void cve(const mat *X, const mat *Y, const double h,
 | 
				
			|||||||
            /* Check if step is appropriate, iff not reduce learning rate. */
 | 
					            /* Check if step is appropriate, iff not reduce learning rate. */
 | 
				
			||||||
            if ((loss - loss_last) > loss_last * slack) {
 | 
					            if ((loss - loss_last) > loss_last * slack) {
 | 
				
			||||||
                tau *= gamma;
 | 
					                tau *= gamma;
 | 
				
			||||||
 | 
					                iter -= 1;
 | 
				
			||||||
                A = elemApply(A, '*', gamma, A); // scale A by gamma
 | 
					                A = elemApply(A, '*', gamma, A); // scale A by gamma
 | 
				
			||||||
                continue;
 | 
					                continue;
 | 
				
			||||||
 | 
					            } else {
 | 
				
			||||||
 | 
					                tau /= gamma;
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            /* Compute error, use workMem. */
 | 
					            /* Compute error, use workMem. */
 | 
				
			||||||
            err = dist(V, V_tau);
 | 
					            err = dist(V, V_tau);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            // Rprintf("%2d - iter: %2d, loss: %1.3f, err: %1.3f, tau: %1.3f, norm(G) = %1.3f\n",
 | 
				
			||||||
 | 
					            //         attempt, iter, loss, err, tau, sqrt(squareSum(G)));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            /* Shift next step to current step and store loss to last. */
 | 
					            /* Shift next step to current step and store loss to last. */
 | 
				
			||||||
            V = copy(V_tau, V);            
 | 
					            V = copy(V_tau, V);            
 | 
				
			||||||
            loss_last = loss;
 | 
					            loss_last = loss;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            if (logger) {
 | 
					            if (logger) {
 | 
				
			||||||
                callLogger(logger, loggerEnv,
 | 
					                callLogger(logger, loggerEnv, attempt, iter,
 | 
				
			||||||
                           attempt, iter,
 | 
					                           L, V, G, loss, err, tau);
 | 
				
			||||||
                           L, V, G,
 | 
					 | 
				
			||||||
                           loss, err, tau);
 | 
					 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            /* Check Break condition. */
 | 
					            /* Check Break condition. */
 | 
				
			||||||
@ -183,7 +185,7 @@ void cve(const mat *X, const mat *Y, const double h,
 | 
				
			|||||||
            } else if (method == weighted) {
 | 
					            } else if (method == weighted) {
 | 
				
			||||||
                /* Calculate the scaling matrix S */
 | 
					                /* Calculate the scaling matrix S */
 | 
				
			||||||
                S = laplace(adjacence(L, Y, y1, D, K, gauss, S), workMem);
 | 
					                S = laplace(adjacence(L, Y, y1, D, K, gauss, S), workMem);
 | 
				
			||||||
                c = agility / sum(colSumsK);
 | 
					                c = agility / sumK; // n removed previousely
 | 
				
			||||||
            } else {
 | 
					            } else {
 | 
				
			||||||
                // TODO: error handling!
 | 
					                // TODO: error handling!
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
@ -198,6 +200,8 @@ void cve(const mat *X, const mat *Y, const double h,
 | 
				
			|||||||
            A = skew(tau, G, V, 0.0, A);
 | 
					            A = skew(tau, G, V, 0.0, A);
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // Rprintf("\n");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        /* Check if current attempt improved previous ones */
 | 
					        /* Check if current attempt improved previous ones */
 | 
				
			||||||
        if (attempt == 0 || loss < loss_best) {
 | 
					        if (attempt == 0 || loss < loss_best) {
 | 
				
			||||||
            loss_best = loss;
 | 
					            loss_best = loss;
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										379
									
								
								LaTeX/notes.tex
									
									
									
									
									
								
							
							
						
						
									
										379
									
								
								LaTeX/notes.tex
									
									
									
									
									
								
							@ -4,58 +4,369 @@
 | 
				
			|||||||
\usepackage[T1]{fontenc}
 | 
					\usepackage[T1]{fontenc}
 | 
				
			||||||
\usepackage{amsmath, amsfonts, amssymb, amsthm}
 | 
					\usepackage{amsmath, amsfonts, amssymb, amsthm}
 | 
				
			||||||
\usepackage{tikz}
 | 
					\usepackage{tikz}
 | 
				
			||||||
 | 
					\usepackage{listings}
 | 
				
			||||||
\usepackage{fullpage}
 | 
					\usepackage{fullpage}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					\lstdefinelanguage{PseudoCode} {
 | 
				
			||||||
 | 
						morekeywords={
 | 
				
			||||||
 | 
							for,
 | 
				
			||||||
 | 
					        while,
 | 
				
			||||||
 | 
					        repeat,
 | 
				
			||||||
 | 
							from,
 | 
				
			||||||
 | 
							each,
 | 
				
			||||||
 | 
							foreach,
 | 
				
			||||||
 | 
							break,
 | 
				
			||||||
 | 
							continue,
 | 
				
			||||||
 | 
							in,
 | 
				
			||||||
 | 
							do,
 | 
				
			||||||
 | 
							as,
 | 
				
			||||||
 | 
							and,
 | 
				
			||||||
 | 
							or,
 | 
				
			||||||
 | 
							end,
 | 
				
			||||||
 | 
							return,
 | 
				
			||||||
 | 
							if,
 | 
				
			||||||
 | 
							then,
 | 
				
			||||||
 | 
							else,
 | 
				
			||||||
 | 
							function,
 | 
				
			||||||
 | 
							begin,
 | 
				
			||||||
 | 
							to,
 | 
				
			||||||
 | 
							new,
 | 
				
			||||||
 | 
							input,
 | 
				
			||||||
 | 
							output
 | 
				
			||||||
 | 
						},
 | 
				
			||||||
 | 
						morecomment=[l]{/*},
 | 
				
			||||||
 | 
						morecomment=[l]{//},
 | 
				
			||||||
 | 
						% basicstyle=\ttfamily,
 | 
				
			||||||
 | 
						% keywordstyle=\color{blue}, %\ttfamily,
 | 
				
			||||||
 | 
						commentstyle=\color{gray}\it,
 | 
				
			||||||
 | 
						keywordstyle=\bf,
 | 
				
			||||||
 | 
						rulecolor=\color{black},
 | 
				
			||||||
 | 
						literate=%
 | 
				
			||||||
 | 
							{!=}{{$\neq$}}1
 | 
				
			||||||
 | 
							{<=}{{$\leq$}}1
 | 
				
			||||||
 | 
							{>=}{{$\geq$}}1
 | 
				
			||||||
 | 
							{->}{{$\rightarrow$}}1
 | 
				
			||||||
 | 
							{<-}{{$\leftarrow$}}1
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					%	},
 | 
				
			||||||
 | 
					% 	tabsize=3,
 | 
				
			||||||
 | 
					% 	sensitive=false,
 | 
				
			||||||
 | 
					% 	morecomment=[l]{#},
 | 
				
			||||||
 | 
					% 	morestring=[b]",
 | 
				
			||||||
 | 
					% 	extendedchars=true,
 | 
				
			||||||
 | 
					% 	inputencoding=utf8,
 | 
				
			||||||
 | 
					% 	literate=%
 | 
				
			||||||
 | 
					% 		{!=}{{$\neq$}}1
 | 
				
			||||||
 | 
					% 		{<=}{{$\leq$}}1
 | 
				
			||||||
 | 
					% 		{>=}{{$\geq$}}1
 | 
				
			||||||
 | 
					% 		{<>}{{$\neq$}}1
 | 
				
			||||||
 | 
					% 		{:=}{{$\ \leftarrow\quad$}}1
 | 
				
			||||||
 | 
					% 		{Ö}{{\"O}}1
 | 
				
			||||||
 | 
					% 		{Ä}{{\"A}}1
 | 
				
			||||||
 | 
					% 		{Ü}{{\"U}}1
 | 
				
			||||||
 | 
					% 		{ß}{{\ss{}}}1
 | 
				
			||||||
 | 
					% 		{ü}{{\"u}}1
 | 
				
			||||||
 | 
					% 		{ä}{{\"a}}1
 | 
				
			||||||
 | 
					% 		{ö}{{\"o}}1
 | 
				
			||||||
 | 
					% 		{~}{{\textasciitilde}}1,
 | 
				
			||||||
 | 
					% 	texcl=true % use all chars from \usepackage[utf8]{inputenc}
 | 
				
			||||||
 | 
					% }
 | 
				
			||||||
 | 
					\lstset{
 | 
				
			||||||
 | 
					    tabsize=4,
 | 
				
			||||||
 | 
						xleftmargin=0pt,    % left margin
 | 
				
			||||||
 | 
						numbers=left,       % linenumber position
 | 
				
			||||||
 | 
					    numbersep=15pt,     % left linenumber padding
 | 
				
			||||||
 | 
						numberstyle=\tiny,
 | 
				
			||||||
 | 
						basicstyle=\ttfamily,
 | 
				
			||||||
 | 
						keywordstyle=\color{black!60},
 | 
				
			||||||
 | 
						commentstyle=\ttfamily\color{gray!70},
 | 
				
			||||||
 | 
						breaklines=true,
 | 
				
			||||||
 | 
						literate=
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					\renewcommand{\epsilon}{\varepsilon}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
\newcommand{\vecl}{\ensuremath{\operatorname{vec}_l}}
 | 
					\newcommand{\vecl}{\ensuremath{\operatorname{vec}_l}}
 | 
				
			||||||
\newcommand{\Sym}{\ensuremath{\operatorname{Sym}}}
 | 
					\newcommand{\Sym}{\ensuremath{\operatorname{Sym}}}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					\renewcommand{\vec}{\operatorname{vec}}
 | 
				
			||||||
 | 
					\newcommand{\devec}{\operatorname*{devec}}
 | 
				
			||||||
 | 
					\newcommand{\svec}{\operatorname{svec}}
 | 
				
			||||||
 | 
					\newcommand{\sym}{\operatorname{sym}}
 | 
				
			||||||
 | 
					\renewcommand{\skew}{\operatorname{skew}}
 | 
				
			||||||
 | 
					\newcommand{\rowSums}{\operatorname{rowSums}}
 | 
				
			||||||
 | 
					\newcommand{\colSums}{\operatorname{colSums}}
 | 
				
			||||||
 | 
					\newcommand{\diag}{\operatorname{diag}}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
\begin{document}
 | 
					\begin{document}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Indexing a given matrix $A = (a_{ij})_{i,j = 1, ..., n} \in \mathbb{R}^{n\times n}$ given as
 | 
					\section{Kronecker Product Properties}
 | 
				
			||||||
 | 
					The \emph{mixed-product} property for matrices $A, B, C, D$ holds if and only if the following matrix products are well defined
 | 
				
			||||||
\begin{displaymath}
 | 
					\begin{displaymath}
 | 
				
			||||||
    A = \begin{pmatrix}
 | 
						(A\otimes B)(C \otimes D) = (A C) \otimes (B C).
 | 
				
			||||||
        a_{0,0} & a_{0,1} & a_{0,2} & \ldots & a_{0,n-1} \\
 | 
					\end{displaymath}
 | 
				
			||||||
        a_{1,0} & a_{1,1} & a_{1,2} & \ldots & a_{1,n-1} \\
 | 
					In combination with the \emph{Hadamard product} (element-wise multiplication) for matrices $A, C$ of the same size as well as $B, D$ of the same size is
 | 
				
			||||||
        a_{2,0} & a_{2,1} & a_{2,2} & \ldots & a_{2,n-1} \\
 | 
					\begin{displaymath}
 | 
				
			||||||
        \vdots & \vdots & \vdots & \ddots & \vdots \\
 | 
						(A\otimes B)\circ (C \otimes D) = (A \circ C) \otimes (B \circ D).
 | 
				
			||||||
        a_{n-1,0} & a_{n-1,1} & a_{n-1,2} & \ldots & a_{n-1,n-1}
 | 
					\end{displaymath}
 | 
				
			||||||
    \end{pmatrix}
 | 
					The \emph{transpose} of the Kronecker product fulfills
 | 
				
			||||||
 | 
					\begin{displaymath}
 | 
				
			||||||
 | 
						(A\otimes B)^T = A^T \otimes B^T
 | 
				
			||||||
\end{displaymath}
 | 
					\end{displaymath}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
A symmetric matrix with zero main diagonal, meaning a matrix $S = S^T$ with $S_{i,i} = 0,\ \forall i = 1,..,n$ is givne in the following form
 | 
					\section{Distance Computation}
 | 
				
			||||||
\begin{displaymath}
 | 
					The pair-wise distances $d_V(X_{i,:}, X_{j,:})$ arranged in the distance matrix $D\in\mathbb{R}^{n\times n}$ can be written as
 | 
				
			||||||
    S = \begin{pmatrix}
 | 
					\begin{align*}
 | 
				
			||||||
        0 & s_{1,0} & s_{2,0} & \ldots & s_{n-1,0} \\
 | 
						\vec(D) = \rowSums(((X Q)\otimes 1_n - 1_n \otimes (X Q))^2)
 | 
				
			||||||
        s_{1,0} & 0 & s_{2,1} & \ldots & s_{n-1,1} \\
 | 
					\end{align*}
 | 
				
			||||||
        s_{2,0} & s_{2,1} & 0 & \ldots & s_{n-1,2} \\
 | 
					This can be computed in $\mathcal{O}(n^2p + np^2)$ time (vectorization and devectorization takes $\mathcal{O}(1)$).
 | 
				
			||||||
        \vdots & \vdots & \vdots & \ddots & \vdots \\
 | 
					 | 
				
			||||||
        s_{n-1,0} & s_{n-1,1} & s_{n-1,2} & \ldots & 0
 | 
					 | 
				
			||||||
    \end{pmatrix}
 | 
					 | 
				
			||||||
\end{displaymath}
 | 
					 | 
				
			||||||
Therefore its sufficient to store only the lower triangular part, for memory efficiency and some further alrogithmic shortcuts (sometime they are more expencife) the symmetric matrix $S$ is stored in packed form, meanin in a vector of the length $\frac{n(n-1)}{2}$. We use (like for matrices) a column-major order of elements and define the $\vecl:\Sym(n)\to \mathbb{R}^{n(n-1) / 2}$ opperator defined as
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The matrices $K, W$ are define through there elements as
 | 
				
			||||||
\begin{displaymath}
 | 
					\begin{displaymath}
 | 
				
			||||||
    \vecl(S) = (s_{1,0}, s_{2,0},\cdots,s_{n-1,0},s_{2,1}\cdots,s_{n-1,n-2})^T
 | 
						k_{i j} = \exp\left(-\frac{d_{i j}^2}{2 h^2}\right),\qquad w_{i j} = \frac{k_{i j}}{\sum_{m} k_{m j}}.
 | 
				
			||||||
\end{displaymath}
 | 
					\end{displaymath}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
The relation between the matrix indices $i,j$ and the $\vecl$ index $k$ is given by
 | 
					Next are $\bar{y}^{(m)}$ and the ``element-wise'' loss $l_i = L_n(V, X_i)$.
 | 
				
			||||||
 | 
					 | 
				
			||||||
\begin{displaymath}
 | 
					\begin{displaymath}
 | 
				
			||||||
    (\vecl(S)_k = s_{i,j} \quad\Leftrightarrow\quad k = jn+i) : j \in \{0,...,n-2\} \land j < i < n.
 | 
						\bar{y}^{(m)} = W^T Y^m,\qquad l = \bar{y}^{(2)} - (\bar{y}^{(1)})^2
 | 
				
			||||||
\end{displaymath}
 | 
					\end{displaymath}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
\begin{center}
 | 
					\section{Gradient Computation}
 | 
				
			||||||
    \begin{tikzpicture}[xscale=1,yscale=-1]
 | 
					The model
 | 
				
			||||||
        % \foreach \i in {0,...,5} {
 | 
					\begin{displaymath}
 | 
				
			||||||
        %     \node at ({mod(\i, 3)}, {int(\i / 3)}) {$\i$};
 | 
						Y \sim g(B^T X) + \epsilon.
 | 
				
			||||||
 | 
					\end{displaymath}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Assume a data set $(X_i, Y_i)$ for $i = 1, ..., n$ with $X$ a $n\times p$ matrix such that each row represents one sample. Now let $l_i = L_n(V, X_i)$, $\bar{y}^{(1)}_j = (W^T Y)_j$ as well as $d_{i j}, w_{i j}$ the distance and weight matrix components. Then the gradient for the ``simple'' CVE method is given as
 | 
				
			||||||
 | 
					\begin{displaymath}
 | 
				
			||||||
 | 
						\nabla L_n(V) = \frac{1}{nh^2}\sum_{i = 1}^{n} \sum_{j = 1}^{n} (l_j - (Y_i - \bar{y}^{(1)}_j)^2) w_{i j} d_{i j} \nabla_V d_V(X_{i,:}, X_{j,:}).
 | 
				
			||||||
 | 
					\end{displaymath}
 | 
				
			||||||
 | 
					This representation is cumbersome and a direct implementation has a asymptotic run-time of $\Theta(n^2p^2)$ because it is a double sum over $n$, therefore quadratic in $n$, and the form of $\nabla_V d_V$.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This can be optimized and written in matrix notation. First the distance gradient is given as
 | 
				
			||||||
 | 
					\begin{displaymath}
 | 
				
			||||||
 | 
						\nabla_V d_V(X_{i,:}, X_{j,:}) = -2 (X_{i,:} - X_{j,:})^T (X_{i,:} - X_{j,:}) V
 | 
				
			||||||
 | 
					\end{displaymath}
 | 
				
			||||||
 | 
					(Note: $X_{i,:}\in\mathbb{R}^{1\times p}$, aka a row representing one sample). In addition define the $n\times n$ matrix $S$ through its elements
 | 
				
			||||||
 | 
					\begin{displaymath}
 | 
				
			||||||
 | 
						s_{i j} = (l_j - (Y_i - \bar{y}^{(1)}_j)^2) w_{i j} d_{i j}.
 | 
				
			||||||
 | 
					\end{displaymath}
 | 
				
			||||||
 | 
					Substitution in the gradient leads to
 | 
				
			||||||
 | 
					\begin{align*}
 | 
				
			||||||
 | 
						\nabla L_n(V)
 | 
				
			||||||
 | 
							&= -\frac{2}{nh^2}\sum_{i = 1}^{n} \sum_{j = 1}^{n} s_{i j} (X_{i,:} - X_{j,:})^T (X_{i,:} - X_{j,:}) V \\
 | 
				
			||||||
 | 
							&= -\frac{2}{nh^2}\sum_{i = 1}^{n} \sum_{j = 1}^{n} s_{i j} \left( X_{i,:}^T X_{i,:} - X_{i,:}^T X_{j,:} - X_{j,:}^T X_{i,:} + X_{j,:}^T X_{j,:} \right) V \\
 | 
				
			||||||
 | 
							&= -\frac{2}{nh^2} \left( \sum_{i = 1}^{n}\sum_{j = 1}^{n} (s_{i j} + s_{j i}) X_{i,:}^T X_{i,:} - \sum_{i = 1}^{n}\sum_{j = 1}^{n} (s_{i j} + s_{j i}) X_{i,:}^T X_{j,:} \right) V \\
 | 
				
			||||||
 | 
							&= -\frac{2}{nh^2} \left( X^T \diag(\colSums(S + S^T)) X - X^T (S + S^T) X \right) V \\
 | 
				
			||||||
 | 
							&= -\frac{2}{nh^2} X^T \left( \diag(\colSums(S + S^T)) - (S + S^T) \right) X V
 | 
				
			||||||
 | 
					\end{align*}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					\begin{center}{\bf
 | 
				
			||||||
 | 
						ATTENTION: The given R examples are to illustrate the inplementation in C which is 0-indexed!
 | 
				
			||||||
 | 
					}\end{center}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The \emph{vertorization} operation maps a matrix $A\in\mathbb{R}^{n\times m}$ into $\mathbb{R}^{nm}$ by stacking the columns of $A$;
 | 
				
			||||||
 | 
					\begin{displaymath}
 | 
				
			||||||
 | 
						\vec(A) = (a_{0,0}, a_{0,1}, a_{0,2},...,a_{0,n-1},a_{1,0},a_{1,1},...,a_{n-1,n-1})^T.
 | 
				
			||||||
 | 
					\end{displaymath}
 | 
				
			||||||
 | 
					The relation $\vec(A)_k = a_{i,j}$ holds for $k=nj+i$ such that $0\leq k < n^2$ and $0\leq i < n, 0 \leq j < m$. This operation is obviously a bijection. When going ``backwards'' the dimension of the original space is required, therefore let $\devec_n$ be the operation such that $\devec_n(\vec(A)) = A$ for $A\in\mathbb{R}^{n\times m}$.\footnote{Note that for $B\in\mathbb{R}^{p\times q}$ such that $pq = nm$ the $\devec_n(\vec(B))\in\mathbb{R}^{n\times m}$.}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					For symmetric matrices the information stored in $a_{i,j} = a_{j,i}$ is twice stored in $A=A^T\in\mathbb{R}^{n\times n}$, to remove this redundency the \emph{symmetric vectorization} is defined which saves the main diagonal and the lower triangular part of the symmetric matrix according the scema
 | 
				
			||||||
 | 
					\begin{displaymath}
 | 
				
			||||||
 | 
						\svec(A) = (a_{0,0},2a_{1,0},2a_{2,n},...,2a_{n-1,0},a_{1,1},2a_{2,1},...,2a_{n-1,1},a_{2,2},...,a_{n-1,n-1})
 | 
				
			||||||
 | 
					\end{displaymath}
 | 
				
			||||||
 | 
					A it more formal
 | 
				
			||||||
 | 
					\begin{displaymath}
 | 
				
			||||||
 | 
						\svec(A)_{k} = (2-\delta_{i,j})a_{i,j} \quad\text{for}\quad k = n j + i - \frac{j(j + 1)}{2}, 0\leq j \leq i < n^2.
 | 
				
			||||||
 | 
					\end{displaymath}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					\begin{lstlisting}[language=R]
 | 
				
			||||||
 | 
					n <- 3
 | 
				
			||||||
 | 
					k <- function(i, j, n) { (j * n) + i - (j * (j + 1) / 2) }
 | 
				
			||||||
 | 
					i <- function(n) { rep(1:n - 1, n) }
 | 
				
			||||||
 | 
					j <- function(n) { rep(1:n - 1, each = n) }
 | 
				
			||||||
 | 
					A <- matrix(k(i(n), j(n), n), n)
 | 
				
			||||||
 | 
					A[which(j(n) > i(n))] <- NA
 | 
				
			||||||
 | 
					A
 | 
				
			||||||
 | 
					#      [,1] [,2] [,3]
 | 
				
			||||||
 | 
					# [1,]    0   NA   NA
 | 
				
			||||||
 | 
					# [2,]    1    3   NA
 | 
				
			||||||
 | 
					# [3,]    2    4    5
 | 
				
			||||||
 | 
					vec <- function(A) { as.vector(A) }
 | 
				
			||||||
 | 
					svec <- function(A) {
 | 
				
			||||||
 | 
						n <- nrow(A)
 | 
				
			||||||
 | 
						((2 - (i(n) == j(n))) * A)[i(n) >= j(n)]
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					svec(matrix(1, n, n))
 | 
				
			||||||
 | 
					# [1] 1 2 2 1 2 1
 | 
				
			||||||
 | 
					devec <- function(vec, n) { matrix(vec, n) }
 | 
				
			||||||
 | 
					\end{lstlisting}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					For a quadratic matrix $A\in\mathbb{R}^{n\times n}$ we define
 | 
				
			||||||
 | 
					\begin{displaymath}
 | 
				
			||||||
 | 
						\sym(A) := \frac{A + A^T}{2}, \qquad \skew(A) := \frac{A - A^T}{2}.
 | 
				
			||||||
 | 
					\end{displaymath}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					% For a Matrix $A\in\mathbb{R}^{n\times n}$ the \emph{vectorization} operation is defined as a mapping from the matrices into a 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					% Indexing a given matrix $A = (a_{ij})_{i,j = 1, ..., n} \in \mathbb{R}^{n\times n}$ given as
 | 
				
			||||||
 | 
					% \begin{displaymath}
 | 
				
			||||||
 | 
					%     A = \begin{pmatrix}
 | 
				
			||||||
 | 
					%         a_{0,0} & a_{0,1} & a_{0,2} & \ldots & a_{0,n-1} \\
 | 
				
			||||||
 | 
					%         a_{1,0} & a_{1,1} & a_{1,2} & \ldots & a_{1,n-1} \\
 | 
				
			||||||
 | 
					%         a_{2,0} & a_{2,1} & a_{2,2} & \ldots & a_{2,n-1} \\
 | 
				
			||||||
 | 
					%         \vdots & \vdots & \vdots & \ddots & \vdots \\
 | 
				
			||||||
 | 
					%         a_{n-1,0} & a_{n-1,1} & a_{n-1,2} & \ldots & a_{n-1,n-1}
 | 
				
			||||||
 | 
					%     \end{pmatrix}
 | 
				
			||||||
 | 
					% \end{displaymath}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					% A symmetric matrix with zero main diagonal, meaning a matrix $S = S^T$ with $S_{i,i} = 0,\ \forall i = 1,..,n$ is given in the following form
 | 
				
			||||||
 | 
					% \begin{displaymath}
 | 
				
			||||||
 | 
					%     S = \begin{pmatrix}
 | 
				
			||||||
 | 
					%         0 & s_{1,0} & s_{2,0} & \ldots & s_{n-1,0} \\
 | 
				
			||||||
 | 
					%         s_{1,0} & 0 & s_{2,1} & \ldots & s_{n-1,1} \\
 | 
				
			||||||
 | 
					%         s_{2,0} & s_{2,1} & 0 & \ldots & s_{n-1,2} \\
 | 
				
			||||||
 | 
					%         \vdots & \vdots & \vdots & \ddots & \vdots \\
 | 
				
			||||||
 | 
					%         s_{n-1,0} & s_{n-1,1} & s_{n-1,2} & \ldots & 0
 | 
				
			||||||
 | 
					%     \end{pmatrix}
 | 
				
			||||||
 | 
					% \end{displaymath}
 | 
				
			||||||
 | 
					% Therefore its sufficient to store only the lower triangular part, for memory efficiency and some further algorithmic shortcuts (sometime they are more expensive) the symmetric matrix $S$ is stored in packed form, meaning in a vector of the length $\frac{n(n-1)}{2}$. We use (like for matrices) a column-major order of elements and define the $\vecl:\Sym(n)\to \mathbb{R}^{n(n-1) / 2}$ operator defined as
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					% \begin{displaymath}
 | 
				
			||||||
 | 
					%     \vecl(S) = (s_{1,0}, s_{2,0},\cdots,s_{n-1,0},s_{2,1}\cdots,s_{n-1,n-2})^T
 | 
				
			||||||
 | 
					% \end{displaymath}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					% The relation between the matrix indices $i,j$ and the $\vecl$ index $k$ is given by
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					% \begin{displaymath}
 | 
				
			||||||
 | 
					%     (\vecl(S)_k = s_{i,j} \quad\Leftrightarrow\quad k = jn+i) : j \in \{0,...,n-2\} \land j < i < n.
 | 
				
			||||||
 | 
					% \end{displaymath}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					% \begin{center}
 | 
				
			||||||
 | 
					%     \begin{tikzpicture}[xscale=1,yscale=-1]
 | 
				
			||||||
 | 
					%         % \foreach \i in {0,...,5} {
 | 
				
			||||||
 | 
					%         %     \node at ({mod(\i, 3)}, {int(\i / 3)}) {$\i$};
 | 
				
			||||||
 | 
					%         % }
 | 
				
			||||||
 | 
					%         \foreach \i in {1,...,4} {
 | 
				
			||||||
 | 
					%             \foreach \j in {1,...,\i} {
 | 
				
			||||||
 | 
					%                 \node at (\j, \i) {$\i,\j$};
 | 
				
			||||||
 | 
					%             }
 | 
				
			||||||
%         }
 | 
					%         }
 | 
				
			||||||
        \foreach \i in {1,...,4} {
 | 
					 | 
				
			||||||
            \foreach \j in {1,...,\i} {
 | 
					 | 
				
			||||||
                \node at (\j, \i) {$\i,\j$};
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
    \end{tikzpicture}
 | 
					%     \end{tikzpicture}
 | 
				
			||||||
\end{center}
 | 
					% \end{center}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					\newpage
 | 
				
			||||||
 | 
					\section{Algorithm}
 | 
				
			||||||
 | 
					The basic algorithm reads as follows:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Mit
 | 
				
			||||||
 | 
					\begin{displaymath}
 | 
				
			||||||
 | 
						X_{diff} := X\otimes 1_n - 1_n\otimes X
 | 
				
			||||||
 | 
					\end{displaymath}
 | 
				
			||||||
 | 
					gilt
 | 
				
			||||||
 | 
					\begin{displaymath}
 | 
				
			||||||
 | 
						X_{diff}Q := (X\otimes 1_n - 1_n\otimes X)Q = XQ\otimes 1_n - 1_n\otimes XQ
 | 
				
			||||||
 | 
					\end{displaymath}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					\newcommand{\rStiefel}{\operatorname{rStiefel}}
 | 
				
			||||||
 | 
					% \lstset{language=PseudoCode}
 | 
				
			||||||
 | 
					% \begin{lstlisting}[mathescape, caption=Erste Phase von \texttt{HDE} (siehe \cite{HDE}), label=code:HDE, captionpos=b]
 | 
				
			||||||
 | 
					% \begin{lstlisting}[mathescape]
 | 
				
			||||||
 | 
					% // Hallo Welt
 | 
				
			||||||
 | 
					% /* Hallo comment */
 | 
				
			||||||
 | 
					% $X_{diff} \leftarrow X\otimes 1_n - 1_n\otimes X$
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					% for attempt from 1 to attempts do
 | 
				
			||||||
 | 
					% 	if $\exists V_{init}$ then
 | 
				
			||||||
 | 
					% 		$V \leftarrow V_{init}$
 | 
				
			||||||
 | 
					% 	else
 | 
				
			||||||
 | 
					% 		$V \leftarrow \rStiefel(p, q)$
 | 
				
			||||||
 | 
					% 	end if
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					% 	/* Projection matrix into null space */
 | 
				
			||||||
 | 
					% 	$Q \leftarrow I_p - VV^T$
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					% 	/* Pair-wise distances (row sum of squared elements) */
 | 
				
			||||||
 | 
					% 	$D \leftarrow$ foreach $i,j=1,...,n$ as $D_{i,j}\leftarrow \|(X_{i,:}-X_{j,:})Q\|_2^2$
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					% 	/* Weights */
 | 
				
			||||||
 | 
					% 	$W \leftarrow$ foreach $i,j=1,...,n$ as $W_{i,j} \leftarrow \frac{k(D_{i,j})}{\sum_{i} k(D_{i,j})}$
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					% 	$\bar{y}_1 \leftarrow W^TY$
 | 
				
			||||||
 | 
					% 	$\bar{y}_2 \leftarrow W^T(Y\odot Y)$
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					% 	/* Element-wise losses */
 | 
				
			||||||
 | 
					% 	$L \leftarrow \bar{y}_2 - \bar{y}_1^2$
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					% 	for epoch from 1 to epochs do
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					% 		$G_t \leftarrow \gamma G_{t-1} + (1-\gamma) \nabla_c L(V)$
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					% 	end for
 | 
				
			||||||
 | 
					% end for
 | 
				
			||||||
 | 
					% \end{lstlisting}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The loss at a given position is
 | 
				
			||||||
 | 
					\begin{displaymath}
 | 
				
			||||||
 | 
						L_n(V) = \frac{1}{nh^2}\sum_{i = 0}^{n - 1} \sum_{j = 0}^{n - 1} (L_j - (Y_i - \bar{y}^{(1)}_j)^2) w_{i j} d_{i j} \nabla_V d_V(X_{i,:}, X_{j,:})
 | 
				
			||||||
 | 
					\end{displaymath}
 | 
				
			||||||
 | 
					Now let the matrix $S$ be defined through its coefficients
 | 
				
			||||||
 | 
					\begin{displaymath}
 | 
				
			||||||
 | 
						s_{i j} = (L_j - (Y_i - \bar{y}^{(1)}_j)^2) w_{i j} d_{i j}
 | 
				
			||||||
 | 
					\end{displaymath}
 | 
				
			||||||
 | 
					This matrix is \underline{not} symmetric but we can consider the symmetric $S + S^T$ with a zero main diagonal because $D$ has a zero main diagonal, meaning $s_{i i} = 0$ because $d_{i i} = 0$ for each $i$. Therefore the following holds due to the fact that $\nabla_V d_V(X_{i,:}, X_{j,:}) = \nabla_V d_V(X_{j,:}, X_{i,:})$.
 | 
				
			||||||
 | 
					\begin{displaymath}
 | 
				
			||||||
 | 
						L_n(V) = \frac{1}{nh^2}\sum_{j = 0}^{n - 1} \sum_{i = j}^{n - 1} (s_{i j} + s_{j i}) \nabla_V d_V(X_{i,:}, X_{j,:})
 | 
				
			||||||
 | 
					\end{displaymath}
 | 
				
			||||||
 | 
					Note the summation indices $0 \leq j \leq i < n$. Substitution with $\nabla_V d_V(X_{i,:}, X_{j,:}) = -2 (X_{i,:} - X_{j,:})^T(X_{i,:} - X_{j,:}) V$ evaluates to
 | 
				
			||||||
 | 
					\begin{displaymath}
 | 
				
			||||||
 | 
						L_n(V) = -\frac{2}{nh^2}\sum_{j = 0}^{n - 1} \sum_{i = j}^{n - 1} (s_{i j} + s_{j i}) (X_{i,:} - X_{j,:})^T(X_{i,:} - X_{j,:}) V
 | 
				
			||||||
 | 
					\end{displaymath}
 | 
				
			||||||
 | 
					Let $X_{-}$ be the matrix containing all pairs of $X_{i,:}$ to $X_{j,:}$ differences using the same row indexing scheme as the symmetric vectorization.
 | 
				
			||||||
 | 
					\begin{displaymath}
 | 
				
			||||||
 | 
						(X_{-})_{k,:} = X_{i,:} - X_{j,:} \quad\text{for}\quad k = n j + i - \frac{j(j + 1)}{2}, 0\leq j \leq i < n^2
 | 
				
			||||||
 | 
					\end{displaymath}
 | 
				
			||||||
 | 
					With the $X_{-}$ matrix the above double sum can be formalized in matrix notation as follows\footnote{only valid cause $s_{i i} = 0$}
 | 
				
			||||||
 | 
					\begin{displaymath}
 | 
				
			||||||
 | 
						L_n(V) = -\frac{2}{nh^2} X_{-}^T(\svec(\sym(S)) \circ_r X_{-}) V
 | 
				
			||||||
 | 
					\end{displaymath}
 | 
				
			||||||
 | 
					where $\circ_r$ means the ``recycled'' hadamard product, this is for a vector $x\in\mathbb{R}^n$ and a Matrix $M\in\mathbb{R}^{n\times m}$ just the element wise multiplication for each column of $M$ with $x$, or equivalent $x\circ_r M = \underbrace{(x, x, ..., x)}_{{n\times m}} \circ M$ where $\circ$ is the element-wise product.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					\begin{lstlisting}[mathescape, language=PseudoCode]
 | 
				
			||||||
 | 
					/* Starting value and initial gradient. */
 | 
				
			||||||
 | 
					$V_1 \leftarrow V_{init}$ if $\exists V_{init}$ else $\rStiefel(p, q)$
 | 
				
			||||||
 | 
					$G_1 \leftarrow (1 - \mu) \nabla L_n(V_0)$
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* Optimization loop */
 | 
				
			||||||
 | 
					$t \leftarrow 1$
 | 
				
			||||||
 | 
					while $t\leq\,$max.iter do
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Update on stiefel manifold. */
 | 
				
			||||||
 | 
						$A \leftarrow G_tV_t^T - V_tG_t^T$
 | 
				
			||||||
 | 
						$V_{t+1} \leftarrow (I_p + \tau A)^{-1}(I_p - \tau A)V_{t}$
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Check break condition. */
 | 
				
			||||||
 | 
						if $\|V_{t+1}V_{t+1}^T - V_{t}^TV_{t}\|_2^2 \leq \sqrt{2q}\,$tol then
 | 
				
			||||||
 | 
							break
 | 
				
			||||||
 | 
						end if
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
						/* Check for decrease. */
 | 
				
			||||||
 | 
						if $L_n(V_{t+1}) - L_n(V_{t}) > L_n(V_{t})\,$slack then // TODO: slack?
 | 
				
			||||||
 | 
							/* Reduce step-size. */
 | 
				
			||||||
 | 
							$\tau \leftarrow \gamma\tau$
 | 
				
			||||||
 | 
						else
 | 
				
			||||||
 | 
							/* Gradient at next position (with momentum). */
 | 
				
			||||||
 | 
							$G_{t+1} \leftarrow \mu G_{t} + (1 - \mu) \nabla L_n(V_{t+1})$
 | 
				
			||||||
 | 
							/* Increase step index */
 | 
				
			||||||
 | 
							$t \leftarrow t + 1$
 | 
				
			||||||
 | 
						end if
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					end while
 | 
				
			||||||
 | 
					\end{lstlisting}
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
					
 | 
				
			||||||
\end{document}
 | 
					\end{document}
 | 
				
			||||||
@ -1,13 +1,31 @@
 | 
				
			|||||||
# Usage:
 | 
					# Usage:
 | 
				
			||||||
# ~$ Rscript runtime_test.R
 | 
					# ~$ Rscript runtime_test.R
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					textplot <- function(...) {
 | 
				
			||||||
 | 
					    text <- unlist(list(...))
 | 
				
			||||||
 | 
					    if (length(text) > 20) {
 | 
				
			||||||
 | 
					        text <- c(text[1:17],
 | 
				
			||||||
 | 
					                  '   ...... (skipped, text too long) ......',
 | 
				
			||||||
 | 
					                  text[c(-1, 0) + length(text)])
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    plot(NA, xlim = c(0, 1), ylim = c(0, 1),
 | 
				
			||||||
 | 
					         bty = 'n', xaxt = 'n', yaxt = 'n', xlab = '', ylab = '')
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					    for (i in seq_along(text)) {
 | 
				
			||||||
 | 
					        text(0, 1 - (i / 20),
 | 
				
			||||||
 | 
					             text[[i]], pos = 4)
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# library(CVEpureR) # load CVE's pure R implementation
 | 
					# library(CVEpureR) # load CVE's pure R implementation
 | 
				
			||||||
library(CVE) # load CVE
 | 
					library(CVE) # load CVE
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#' Writes log information to console. (to not get bored^^)
 | 
					#' Writes log information to console. (to not get bored^^)
 | 
				
			||||||
tell.user <- function(name, start.time, i, length) {
 | 
					tell.user <- function(name, start, i, length) {
 | 
				
			||||||
    cat("\rRunning Test (", name, "):",
 | 
					    cat("\rRunning Test (", name, "):",
 | 
				
			||||||
        i, "/", length,
 | 
					        i, "/", length,
 | 
				
			||||||
        " - elapsed:", format(Sys.time() - start.time), "\033[K")
 | 
					        " - elapsed:", format(Sys.time() - start), "\033[K")
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
#' Computes "distance" of spanned subspaces.
 | 
					#' Computes "distance" of spanned subspaces.
 | 
				
			||||||
#' @param B1 Semi-orthonormal basis matrix
 | 
					#' @param B1 Semi-orthonormal basis matrix
 | 
				
			||||||
@ -29,19 +47,14 @@ MAXIT <- 50L
 | 
				
			|||||||
# number of arbitrary starting values for curvilinear optimization
 | 
					# number of arbitrary starting values for curvilinear optimization
 | 
				
			||||||
ATTEMPTS <- 10L
 | 
					ATTEMPTS <- 10L
 | 
				
			||||||
# set names of datasets
 | 
					# set names of datasets
 | 
				
			||||||
dataset.names <- c("M1", "M2", "M3", "M4", "M5")
 | 
					ds.names <- paste0("M", seq(7))
 | 
				
			||||||
# Set used CVE method
 | 
					# Set used CVE method
 | 
				
			||||||
methods <- c("simple") # c("legacy", "simple", "linesearch", "sgd")
 | 
					methods <- c("simple", "weighted") # c("legacy", "simple", "linesearch", "sgd")
 | 
				
			||||||
 | 
					 | 
				
			||||||
if ("legacy" %in% methods) {
 | 
					 | 
				
			||||||
    # Source legacy code (but only if needed)
 | 
					 | 
				
			||||||
    source("CVE_legacy/function_script.R")
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Setup error and time tracking variables
 | 
					# Setup error and time tracking variables
 | 
				
			||||||
error <- matrix(NA, SIM.NR, length(methods) * length(dataset.names))
 | 
					error <- matrix(NA, SIM.NR, length(methods) * length(ds.names))
 | 
				
			||||||
time <- matrix(NA, SIM.NR, ncol(error))
 | 
					time <- matrix(NA, SIM.NR, ncol(error))
 | 
				
			||||||
colnames(error) <- kronecker(paste0(dataset.names, '-'), methods, paste0)
 | 
					colnames(error) <- kronecker(paste0(ds.names, '-'), methods, paste0)
 | 
				
			||||||
colnames(time) <- colnames(error)
 | 
					colnames(time) <- colnames(error)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Create new log file and write CSV (actualy TSV) header.
 | 
					# Create new log file and write CSV (actualy TSV) header.
 | 
				
			||||||
@ -56,13 +69,12 @@ cat('Plotting to file:', path, '\n')
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
# only for telling user (to stdout)
 | 
					# only for telling user (to stdout)
 | 
				
			||||||
count <- 0
 | 
					count <- 0
 | 
				
			||||||
start.time <- Sys.time()
 | 
					start <- Sys.time()
 | 
				
			||||||
# Start simulation loop.
 | 
					# Start simulation loop.
 | 
				
			||||||
for (sim in 1:SIM.NR) {
 | 
					for (sim in 1:SIM.NR) {
 | 
				
			||||||
    # Repeat for each dataset.
 | 
					    # Repeat for each dataset.
 | 
				
			||||||
    for (name in dataset.names) {
 | 
					    for (name in ds.names) {
 | 
				
			||||||
        count <- count + 1
 | 
					        tell.user(name, start, (count <- count + 1), SIM.NR * length(ds.names))
 | 
				
			||||||
        tell.user(name, start.time, count, SIM.NR * length(dataset.names))
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # Create a new dataset
 | 
					        # Create a new dataset
 | 
				
			||||||
        ds <- dataset(name)
 | 
					        ds <- dataset(name)
 | 
				
			||||||
@ -71,35 +83,20 @@ for (sim in 1:SIM.NR) {
 | 
				
			|||||||
        X <- ds$X
 | 
					        X <- ds$X
 | 
				
			||||||
        data <- cbind(Y, X)
 | 
					        data <- cbind(Y, X)
 | 
				
			||||||
        # get dimensions
 | 
					        # get dimensions
 | 
				
			||||||
        dim <- ncol(X)
 | 
					        k <- ncol(ds$B)
 | 
				
			||||||
        truedim <- ncol(ds$B)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
        for (method in methods) {
 | 
					        for (method in methods) {
 | 
				
			||||||
            if (tolower(method) == "legacy") {
 | 
					 | 
				
			||||||
                dr.time <- system.time(
 | 
					 | 
				
			||||||
                    dr <- stiefel_opt(data,
 | 
					 | 
				
			||||||
                        k = dim - truedim,
 | 
					 | 
				
			||||||
                        k0 = ATTEMPTS,
 | 
					 | 
				
			||||||
                        h = estimate.bandwidth(X,
 | 
					 | 
				
			||||||
                                               k = truedim,
 | 
					 | 
				
			||||||
                                               nObs = sqrt(nrow(X))),
 | 
					 | 
				
			||||||
                        maxit = MAXIT
 | 
					 | 
				
			||||||
                    )
 | 
					 | 
				
			||||||
                )
 | 
					 | 
				
			||||||
                dr$B <- fill_base(dr$est_base)[, 1:truedim]
 | 
					 | 
				
			||||||
            } else {
 | 
					 | 
				
			||||||
            dr.time <- system.time(
 | 
					            dr.time <- system.time(
 | 
				
			||||||
                dr <- cve.call(X, Y,
 | 
					                dr <- cve.call(X, Y,
 | 
				
			||||||
                    method = method,
 | 
					                    method = method,
 | 
				
			||||||
                        k = truedim,
 | 
					                    k = k,
 | 
				
			||||||
                    attempts = ATTEMPTS
 | 
					                    attempts = ATTEMPTS
 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
                dr$B <- coef(dr, truedim)
 | 
					            dr$B <- coef(dr, k)
 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
            key <- paste0(name, '-', method)
 | 
					            key <- paste0(name, '-', method)
 | 
				
			||||||
            error[sim, key] <- subspace.dist(dr$B, ds$B) / sqrt(2 * truedim)
 | 
					            error[sim, key] <- subspace.dist(dr$B, ds$B) / sqrt(2 * k)
 | 
				
			||||||
            time[sim, key] <- dr.time["elapsed"]
 | 
					            time[sim, key] <- dr.time["elapsed"]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            # Log results to file (mostly for long running simulations)
 | 
					            # Log results to file (mostly for long running simulations)
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										55
									
								
								test.R
									
									
									
									
									
								
							
							
						
						
									
										55
									
								
								test.R
									
									
									
									
									
								
							@ -1,3 +1,19 @@
 | 
				
			|||||||
 | 
					textplot <- function(...) {
 | 
				
			||||||
 | 
					    text <- unlist(list(...))
 | 
				
			||||||
 | 
					    if (length(text) > 20) {
 | 
				
			||||||
 | 
					        text <- c(text[1:17],
 | 
				
			||||||
 | 
					                  '   ...... (skipped, text too long) ......',
 | 
				
			||||||
 | 
					                  text[c(-1, 0) + length(text)])
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    plot(NA, xlim = c(0, 1), ylim = c(0, 1),
 | 
				
			||||||
 | 
					         bty = 'n', xaxt = 'n', yaxt = 'n', xlab = '', ylab = '')
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					    for (i in seq_along(text)) {
 | 
				
			||||||
 | 
					        text(0, 1 - (i / 20),
 | 
				
			||||||
 | 
					             text[[i]], pos = 4)
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
args <- commandArgs(TRUE)
 | 
					args <- commandArgs(TRUE)
 | 
				
			||||||
if (length(args) > 0L) {
 | 
					if (length(args) > 0L) {
 | 
				
			||||||
@ -10,11 +26,12 @@ if (length(args) > 1L) {
 | 
				
			|||||||
} else {
 | 
					} else {
 | 
				
			||||||
    momentum <- 0.0
 | 
					    momentum <- 0.0
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					seed <- 42
 | 
				
			||||||
max.iter <- 50L
 | 
					max.iter <- 50L
 | 
				
			||||||
attempts <- 25L
 | 
					attempts <- 25L
 | 
				
			||||||
 | 
					
 | 
				
			||||||
library(CVE)
 | 
					library(CVE)
 | 
				
			||||||
path <- paste0('~/Projects/CVE/tmp/logger_', method, '_', momentum, '.C.pdf')
 | 
					path <- paste0('~/Projects/CVE/tmp/logger_', method, '.C.pdf')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Define logger for `cve()` method.
 | 
					# Define logger for `cve()` method.
 | 
				
			||||||
logger <- function(attempt, iter, data) {
 | 
					logger <- function(attempt, iter, data) {
 | 
				
			||||||
@ -29,12 +46,14 @@ logger <- function(attempt, iter, data) {
 | 
				
			|||||||
    true.error.history[iter + 1, attempt] <<- true.error
 | 
					    true.error.history[iter + 1, attempt] <<- true.error
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pdf(path)
 | 
					pdf(path, width = 8.27, height = 11.7) # width, height unit is inces -> A4
 | 
				
			||||||
par(mfrow = c(2, 2))
 | 
					layout(matrix(c(1, 1,
 | 
				
			||||||
 | 
					                2, 3,
 | 
				
			||||||
 | 
					                4, 5), nrow = 3, byrow = TRUE))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
for (name in paste0("M", seq(5))) {
 | 
					for (name in paste0("M", seq(7))) {
 | 
				
			||||||
    # Seed random number generator
 | 
					    # Seed random number generator
 | 
				
			||||||
    set.seed(42)
 | 
					    set.seed(seed)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Create a dataset
 | 
					    # Create a dataset
 | 
				
			||||||
    ds <- dataset(name)
 | 
					    ds <- dataset(name)
 | 
				
			||||||
@ -52,11 +71,37 @@ for (name in paste0("M", seq(5))) {
 | 
				
			|||||||
    tau.history        <- matrix(NA, max.iter + 1, attempts)
 | 
					    tau.history        <- matrix(NA, max.iter + 1, attempts)
 | 
				
			||||||
    true.error.history <- matrix(NA, max.iter + 1, attempts)
 | 
					    true.error.history <- matrix(NA, max.iter + 1, attempts)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    time <- system.time(
 | 
				
			||||||
        dr <- cve(Y ~ X, k = k, method = method,
 | 
					        dr <- cve(Y ~ X, k = k, method = method,
 | 
				
			||||||
                momentum = momentum,
 | 
					                momentum = momentum,
 | 
				
			||||||
                max.iter = max.iter, attempts = attempts,
 | 
					                max.iter = max.iter, attempts = attempts,
 | 
				
			||||||
                logger = logger)
 | 
					                logger = logger)
 | 
				
			||||||
 | 
					    )["elapsed"]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Extract finaly selected values:
 | 
				
			||||||
 | 
					    B.est <- coef(dr, k)
 | 
				
			||||||
 | 
					    true.error <- norm(tcrossprod(B.est) - tcrossprod(B), 'F') / sqrt(2 * k)
 | 
				
			||||||
 | 
					    loss <- dr$res[[as.character(k)]]$loss
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Write metadata.
 | 
				
			||||||
 | 
					    textplot(
 | 
				
			||||||
 | 
					        paste0("Seed value: ",     seed),
 | 
				
			||||||
 | 
					               "",
 | 
				
			||||||
 | 
					        paste0("Dataset Name: ",   ds$name),
 | 
				
			||||||
 | 
					        paste0("dim(X) = (", nrow(X), ", ", ncol(X), ")"),
 | 
				
			||||||
 | 
					        paste0("dim(B) = (", nrow(B), ", ", ncol(B), ")"),
 | 
				
			||||||
 | 
					               "",
 | 
				
			||||||
 | 
					        paste0("CVE method: ",     dr$method),
 | 
				
			||||||
 | 
					        paste0("Max Iterations: ", max.iter),
 | 
				
			||||||
 | 
					        paste0("Attempts: ",       attempts),
 | 
				
			||||||
 | 
					        paste0("Momentum: ",       momentum),
 | 
				
			||||||
 | 
					               "CVE call:",
 | 
				
			||||||
 | 
					               paste0("  > ", format(dr$call)),
 | 
				
			||||||
 | 
					               "",
 | 
				
			||||||
 | 
					        paste0("True Error: ", round(true.error, 3)),
 | 
				
			||||||
 | 
					        paste0("loss: ",       round(loss, 3)),
 | 
				
			||||||
 | 
					        paste0("time: ",       round(time, 3), " s")
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
    # Plot history's
 | 
					    # Plot history's
 | 
				
			||||||
    matplot(loss.history,       type = 'l', log = 'y', xlab = 'i (iteration)',
 | 
					    matplot(loss.history,       type = 'l', log = 'y', xlab = 'i (iteration)',
 | 
				
			||||||
        main = paste('loss', name),
 | 
					        main = paste('loss', name),
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user