diff --git a/CVE_C/R/CVE.R b/CVE_C/R/CVE.R index 8246733..32ead7d 100644 --- a/CVE_C/R/CVE.R +++ b/CVE_C/R/CVE.R @@ -289,7 +289,7 @@ cve.call <- function(X, Y, method = "simple", } else { tol <- as.double(tol) } - if (!is.numeric(slack) || length(slack) > 1L || slack < 0.0) { + if (!is.numeric(slack) || length(slack) > 1L) { stop("Break condition slack 'slack' must be not negative number.") } else { slack <- as.double(slack) diff --git a/CVE_C/R/datasets.R b/CVE_C/R/datasets.R index 0aea91f..29ea0b5 100644 --- a/CVE_C/R/datasets.R +++ b/CVE_C/R/datasets.R @@ -1,3 +1,83 @@ +#' +#' @param n number of samples. +#' @param mu mean +#' @param sigma covariance matrix. +#' +#' @returns a \eqn{n\times p} matrix with samples in its rows. +#' +#' @examples +#' rmvnorm(20, sigma = matrix(c(2, 1, 1, 2), 2)) +#' rmvnorm(20, mu = c(3, -1, 2)) +rmvnorm <- function(n = 1, mu = rep(0, p), sigma = diag(p)) { + if (!missing(sigma)) { + p <- nrow(sigma) + } else if (!missing(mu)) { + mu <- matrix(mu, ncol = 1) + p <- nrow(mu) + } else { + stop("At least one of 'mu' or 'sigma' must be supplied.") + } + + # See: https://en.wikipedia.org/wiki/Multivariate_normal_distribution + return(rep(mu, each = n) + matrix(rnorm(n * p), n) %*% chol(sigma)) +} + +#' Samples from the multivariate t distribution (student distribution). +#' +#' @param n number of samples. +#' @param mu mean, ... TODO: +#' @param sigma a \eqn{k\times k} positive definite matrix. If the degree +#' \eqn{\nu} if bigger than 2 the created covariance is +#' \deqn{var(x) = \Sigma\frac{\nu}{\nu - 2}} +#' for \eqn{\nu > 2}. +#' @param df degree of freedom \eqn{\nu}. +#' +#' @returns a \eqn{n\times p} matrix with samples in its rows. +#' +#' @examples +#' rmvt(20, c(0, 1), matrix(c(3, 1, 1, 2), 2), 3) +#' rmvt(20, sigma = matrix(c(2, 1, 1, 2), 2), 3) +#' rmvt(20, mu = c(3, -1, 2), 3) +rmvt <- function(n = 1, mu = rep(0, p), sigma = diag(p), df = Inf) { + if (!missing(sigma)) { + p <- nrow(sigma) + } else if (!missing(mu)) { + mu <- matrix(mu, ncol = 1) + p <- nrow(mu) + } else { + stop("At least one of 'mu' or 'sigma' must be supplied.") + } + + if (df == Inf) { + Z <- 1 + } else { + Z <- sqrt(df / rchisq(n, df)) + } + + return(rmvnorm(n, sigma = sigma) * Z + rep(mu, each = n)) +} + + +#' Generalized Normal Distribution. +#' see: https://en.wikipedia.org/wiki/Generalized_normal_distribution +rgnorm <- function(n = 1, mu = 0, alpha = 1, beta = 1) { + if (alpha <= 0 | beta <= 0) { + stop("alpha and beta must be positive.") + } + lambda <- (1 / alpha)^beta + scales <- qgamma(runif(n), shape = 1 / beta, scale = 1 / lambda)^(1 / beta) + return(scales * ((-1)^rbinom(n, 1, 0.5)) + mu) +} + +#' Laplace distribution +#' see: https://en.wikipedia.org/wiki/Laplace_distribution +rlaplace <- function(n = 1, mu = 0, sigma = 1) { + U <- runif(n, -0.5, 0.5) + scale <- sigma / sqrt(2) + + return(mu - scale * sign(U) * log(1 - 2 * abs(U))) +} + #' Generates test datasets. #' #' Provides sample datasets. There are 5 different datasets named @@ -41,72 +121,73 @@ #' @import stats #' @importFrom stats rnorm rbinom #' @export -dataset <- function(name = "M1", n, B, p.mix = 0.3, lambda = 1.0) { - # validate parameters - stopifnot(name %in% c("M1", "M2", "M3", "M4", "M5")) +dataset <- function(name = "M1", n = NULL, p = 20, sigma = 0.5, ...) { + name <- toupper(name) + if (nchar(name) == 1) { name <- paste0("M", name) } - # set default values if not supplied - if (missing(n)) { - n <- if (name %in% c("M1", "M2")) 200 else if (name != "M5") 100 else 42 - } - if (missing(B)) { - p <- 12 - if (name == "M1") { - B <- cbind( - c( 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0), - c( 1,-1, 1,-1, 1,-1, 0, 0, 0, 0, 0, 0) - ) / sqrt(6) - } else if (name == "M2") { - B <- cbind( - c(c(1, 0), rep(0, 10)), - c(c(0, 1), rep(0, 10)) - ) - } else { - B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, 6)), 12, 1) - } - } else { - p <- nrow(B) - # validate col. nr to match dataset `k = ncol(B)` - stopifnot( - name %in% c("M1", "M2") && ncol(B) == 2, - name %in% c("M3", "M4", "M5") && ncol(B) == 1 - ) - } - - # set link function `g` for model `Y ~ g(B'X) + epsilon` if (name == "M1") { - g <- function(BX) { BX[1] / (0.5 + (BX[2] + 1.5)^2) } + if (missing(n)) { n <- 100 } + # B ... `p x 1` + B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, p - 6)), ncol = 1) + X <- rmvnorm(n, sigma = sigma^abs(outer(1:p, 1:p, FUN = `-`))) + beta <- 0.5 + Y <- cos(X %*% B) + rgnorm(n, 0, + alpha = sqrt(0.25 * gamma(1 / beta) / gamma(3 / beta)), + beta = beta + ) } else if (name == "M2") { - g <- function(BX) { BX[1] * BX[2]^2 } - } else if (name %in% c("M3", "M4")) { - g <- function(BX) { cos(BX[1]) } - } else { # name == "M5" - g <- function(BX) { 2 * log(abs(BX[1]) + 1) } + if (missing(n)) { n <- 100 } + prob <- 0.3 + lambda <- 1 # dispersion + # B ... `p x 1` + B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, p - 6)), ncol = 1) + Z <- 2 * rbinom(n, 1, prob) - 1 + X <- matrix(rep(lambda * Z, p) + rnorm(n * p), n) + Y <- cos(X %*% B) + rnorm(n, 0, sigma) + } else if (name == "M3") { + if (missing(n)) { n <- 200 } + # B ... `p x 1` + B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, p - 6)), ncol = 1) + X <- matrix(rnorm(n * p), n) + Y <- 1.5 * log(2 + abs(X %*% B)) + rnorm(n, 0, sigma^2) + } else if (name == "M4") { + if (missing(n)) { n <- 200 } + # B ... `p x 2` + B <- cbind( + c(rep(1 / sqrt(6), 6), rep(0, p - 6)), + c(rep(c(1, -1), 3) / sqrt(6), rep(0, p - 6)) + ) + X <- rmvnorm(n, sigma = sigma^abs(outer(1:p, 1:p, FUN = `-`))) + XB <- X %*% B + Y <- (XB[, 1]) / (0.5 + (XB[, 2] + 1.5)^2) + rnorm(n, 0, sigma^2) + } else if (name == "M5") { + if (missing(n)) { n <- 200 } + # B ... `p x 2` + B <- cbind( + c(rep(1, 6), rep(0, p - 6)), + c(rep(c(1, -1), 3), rep(0, p - 6)) + ) / sqrt(6) + X <- matrix(runif(n * p), n) + XB <- X %*% B + Y <- cos(XB[, 1] * pi) * (XB[, 2] + 1)^2 + rnorm(n, 0, sigma^2) + } else if (name == "M6") { + if (missing(n)) { n <- 200 } + # B ... `p x 3` + B <- diag(p)[, -(3:(p - 1))] + X <- matrix(rnorm(n * p), n) + Y <- rowSums((X %*% B)^2) + rnorm(n, 0, sigma^2) + } else if (name == "M7") { + if (missing(n)) { n <- 400 } + # B ... `p x 4` + B <- diag(p)[, -(4:(p - 1))] + # "R"andom "M"ulti"V"ariate "S"tudent + X <- rmvt(n = n, sigma = diag(p), df = 3) + XB <- X %*% B + Y <- (XB[, 1]) * (XB[, 2])^2 + (XB[, 3]) * (XB[, 4]) + Y <- Y + rlaplace(n, 0, sigma) + } else { + stop("Got unknown dataset name.") } - # compute X - if (name != "M4") { - # compute root of the covariance matrix according the dataset - if (name %in% c("M1", "M3")) { - # Variance-Covariance structure for `X ~ N_p(0, \Sigma)` with - # `\Sigma_{i, j} = 0.5^{|i - j|}`. - Sigma <- matrix(0.5^abs(kronecker(1:p, 1:p, '-')), p, p) - # decompose Sigma to Sigma.root^T Sigma.root = Sigma for usage in creation of `X` - Sigma.root <- chol(Sigma) - } else { # name %in% c("M2", "M5") - Sigma.root <- diag(rep(1, p)) # d-dim identity - } - # data `X` as multivariate random normal variable with - # variance matrix `Sigma`. - X <- replicate(p, rnorm(n, 0, 1)) %*% Sigma.root - } else { # name == "M4" - X <- t(replicate(100, rep((1 - 2 * rbinom(1, 1, p.mix)) * lambda, p) + rnorm(p, 0, 1))) - } - - # responce `y ~ g(B'X) + epsilon` with `epsilon ~ N(0, 1 / 2)` - Y <- apply(X, 1, function(X_i) { - g(t(B) %*% X_i) + rnorm(1, 0, 0.5) - }) - return(list(X = X, Y = Y, B = B, name = name)) } diff --git a/CVE_C/R/estimateBandwidth.R b/CVE_C/R/estimateBandwidth.R index 15543dc..fa521dc 100644 --- a/CVE_C/R/estimateBandwidth.R +++ b/CVE_C/R/estimateBandwidth.R @@ -11,6 +11,17 @@ #' @param X data matrix with samples in its rows. #' @param k Dimension of lower dimensional projection. #' @param nObs number of points in a slice, see \eqn{nObs} in CVE paper. +#' @param version either \code{1} or \code{2}, where +#' \itemize{ +#' \item 1: uses the following formula: +#' \deqn{% +#' h = (2 * tr(\Sigma) / p) * (1.2 * n^{-1 / (4 + k)})^2}{% +#' h = (2 * tr(\Sigma) / p) * (1.2 * n^(\frac{-1}{4 + k}))^2} +#' \item 2: uses +#' \deqn{% +#' h = (2 * tr(\Sigma) / p) * \chi_k^-1((nObs - 1) / (n - 1))}{% +#' h = (2 * tr(\Sigma) / p) * \chi_k^{-1}(\frac{nObs - 1}{n - 1})} +#' } #' #' @return Estimated bandwidth \code{h}. #' @@ -34,12 +45,17 @@ #' print(cve.obj.simple$res$'1'$h) #' print(estimate.bandwidth(x, k = k)) #' @export -estimate.bandwidth <- function(X, k, nObs) { +estimate.bandwidth <- function (X, k, nObs, version = 1L) { n <- nrow(X) p <- ncol(X) - - X_centered <- scale(X, center = TRUE, scale = FALSE) - Sigma <- crossprod(X_centered, X_centered) / n - - return((2 * sum(diag(Sigma)) / p) * (1.2 * n^(-1 / (4 + k)))^2) + if (version == 1) { + X_centered <- scale(X, center = TRUE, scale = FALSE) + Sigma <- crossprod(X_centered, X_centered)/n + return((2 * sum(diag(Sigma))/p) * (1.2 * n^(-1/(4 + k)))^2) + } else if (version == 2) { + X_c <- scale(X, center = TRUE, scale = FALSE) + return(2 * qchisq((nObs - 1) / (n - 1), k) * sum(X_c^2) / (n * p)) + } else { + stop("Unknown version.") + } } diff --git a/CVE_C/R/predict_dim.R b/CVE_C/R/predict_dim.R index 5784f92..4fec72e 100644 --- a/CVE_C/R/predict_dim.R +++ b/CVE_C/R/predict_dim.R @@ -1,35 +1,4 @@ -#' Predicts SDR dimension using \code{\link[mda]{mars}} via a Cross-Validation. -#' -#' @param object instance of class \code{cve} (result of \code{cve}, -#' \code{cve.call}). -#' @param ... ignored. -#' -#' @return list with -#' \itemize{ -#' \item MSE: Mean Square Error, -#' \item k: predicted dimensions. -#' } -#' -#' @examples -#' # create B for simulation -#' B <- rep(1, 5) / sqrt(5) -#' -#' set.seed(21) -#' # creat predictor data x ~ N(0, I_p) -#' x <- matrix(rnorm(500), 100) -#' -#' # simulate response variable -#' # y = f(B'x) + err -#' # with f(x1) = x1 and err ~ N(0, 0.25^2) -#' y <- x %*% B + 0.25 * rnorm(100) -#' -#' # Calculate cve for unknown k between min.dim and max.dim. -#' cve.obj.simple <- cve(y ~ x) -#' -#' predict_dim(cve.obj.simple) -#' -#' @export -predict_dim <- function(object, ...) { +predict_dim_cv <- function(object) { # Get centered training data and dimensions X <- scale(object$X, center = TRUE, scale = FALSE) n <- nrow(object$X) # umber of training data samples @@ -59,3 +28,173 @@ predict_dim <- function(object, ...) { k = as.integer(names(which.min(MSE))) )) } +# TODO: write doc +predict_dim_elbow <- function(object) { + # extract original data from object (cve result) + X <- object$X + Y <- object$Y + # Get dimensions + n <- nrow(X) + p <- ncol(X) + # Compute persistent data. + i = rep(1:n, n) + j = rep(1:n, each = n) + D.eucl = matrix((X[i, ] - X[j, ])^2 %*% rep(1, p), n) + + losses <- vector("double", length(object$res)) + names(losses) <- names(object$res) + # Compute per sample losses with alternative bandwidth for each dimension. + for (dr.k in object$res) { + # extract dimension specific estimates and dimensions. + k <- dr.k$k + V <- dr.k$V + q <- ncol(V) + # estimate bandwidth according alternative formula (see: TODO: see) + h <- estimate.bandwidth(X, k, sqrt(n), version = 2L) + # Projected `X` + XV <- X %*% V + # Devectorized distance matrix + # (inefficient in R but fast in C) + D <- matrix((XV[i, , drop = F] - XV[j, , drop = F])^2 %*% rep(1, q), n) + D <- D.eucl - D + # Apply kernel + K <- exp((-0.5 / h^2) * D^2) + # sum columns + colSumsK <- colSums(K) + # compute weighted and square meighted reponses + y1 <- (K %*% Y) / colSumsK + y2 <- (K %*% Y^2) / colSumsK + # total loss + losses[[as.character(k)]] <- mean(y2 - y1^2) + } + + return(list( + losses = losses, + k = as.integer(names(which.min(losses))) + )) +} + +predict_dim_wilcoxon <- function(object, p.value = 0.05) { + # extract original data from object (cve result) + X <- object$X + Y <- object$Y + # Get dimensions + n <- nrow(X) + p <- ncol(X) + # Compute persistent data. + i = rep(1:n, n) + j = rep(1:n, each = n) + D.eucl = matrix((X[i, ] - X[j, ])^2 %*% rep(1, p), n) + + L <- matrix(NA, n, length(object$res)) + colnames(L) <- names(object$res) + # Compute per sample losses with alternative bandwidth for each dimension. + for (dr.k in object$res) { + # extract dimension specific estimates and dimensions. + k <- dr.k$k + V <- dr.k$V + q <- ncol(V) + # estimate bandwidth according alternative formula (see: TODO: see) + h <- estimate.bandwidth(X, k, sqrt(n), version = 2L) + # Projected `X` + XV <- X %*% V + # Devectorized distance matrix + # (inefficient in R but fast in C) + D <- matrix((XV[i, , drop = F] - XV[j, , drop = F])^2 %*% rep(1, q), n) + D <- D.eucl - D + # Apply kernel + K <- exp((-0.5 / h^2) * D^2) + # sum columns + colSumsK <- colSums(K) + # compute weighted and square meighted reponses + y1 <- (K %*% Y) / colSumsK + y2 <- (K %*% Y^2) / colSumsK + # element-wise L for dim. k + L[, as.character(k)] <- y2 - y1^2 + } + + for (ind in seq_len(length(object$res) - 1L)) { + p.test <- wilcox.test(L[, ind], L[, ind + 1L], + alternative = "less")$p.value + if (p.test < p.value) { + return(list( + p.value = p.test, + k = object$res[[ind]]$k + )) + } + } + + return(list( + p.value = NA, + k = object$res[[length(object$res)]]$k + )) +} + +#' Predicts SDR dimension using \code{\link[mda]{mars}} via a Cross-Validation. +#' TODO: rewrite!!! +#' +#' @param object instance of class \code{cve} (result of \code{cve}, +#' \code{cve.call}). +#' @param ... ignored. +#' +#' @return list with +#' \itemize{ +#' \item MSE: Mean Square Error, +#' \item k: predicted dimensions. +#' } +#' +#' @section cv: +#' Cross-validation ... TODO: +#' +#' @section elbow: +#' Cross-validation ... TODO: +#' +#' @section wilcoxon: +#' Cross-validation ... TODO: +#' +#' @examples +#' # create B for simulation +#' B <- rep(1, 5) / sqrt(5) +#' +#' set.seed(21) +#' # creat predictor data x ~ N(0, I_p) +#' x <- matrix(rnorm(500), 100) +#' +#' # simulate response variable +#' # y = f(B'x) + err +#' # with f(x1) = x1 and err ~ N(0, 0.25^2) +#' y <- x %*% B + 0.25 * rnorm(100) +#' +#' # Calculate cve for unknown k between min.dim and max.dim. +#' cve.obj.simple <- cve(y ~ x) +#' +#' predict_dim(cve.obj.simple) +#' +#' @export +predict_dim <- function(object, ..., method = "CV") { + # Check if there are dimensions to select. + if (length(object$res) == 1L) { + return(list( + message = "Only one dim. estimated.", + k = as.integer(names(object$res)) + )) + } + + # Determine method "fuzzy". + methods <- c("cv", "elbow", "wilcoxon") + names(methods) <- methods + method <- methods[[tolower(method), exact = FALSE]] + if (is.null(method)) { + stop('Unable to determine method.') + } + + if (method == "cv") { + return(predict_dim_cv(object)) + } else if (method == "elbow") { + return(predict_dim_elbow(object)) + } else if (method == "wilcoxon") { + return(predict_dim_wilcoxon(object)) + } else { + stop("Unable to determine method.") + } +} diff --git a/CVE_C/man/predict_dim.Rd b/CVE_C/man/predict_dim.Rd new file mode 100644 index 0000000..134f7ba --- /dev/null +++ b/CVE_C/man/predict_dim.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/predict_dim.R +\name{predict_dim} +\alias{predict_dim} +\title{Predicts SDR dimension using \code{\link[mda]{mars}} via a Cross-Validation.} +\usage{ +predict_dim(object, ...) +} +\arguments{ +\item{object}{instance of class \code{cve} (result of \code{cve}, +\code{cve.call}).} + +\item{...}{ignored.} +} +\value{ +list with +\itemize{ + \item MSE: Mean Square Error, + \item k: predicted dimensions. +} +} +\description{ +Predicts SDR dimension using \code{\link[mda]{mars}} via a Cross-Validation. +} +\examples{ +# create B for simulation +B <- rep(1, 5) / sqrt(5) + +set.seed(21) +# creat predictor data x ~ N(0, I_p) +x <- matrix(rnorm(500), 100) + +# simulate response variable +# y = f(B'x) + err +# with f(x1) = x1 and err ~ N(0, 0.25^2) +y <- x \%*\% B + 0.25 * rnorm(100) + +# Calculate cve for unknown k between min.dim and max.dim. +cve.obj.simple <- cve(y ~ x) + +predict_dim(cve.obj.simple) + +} diff --git a/CVE_C/src/cve.c b/CVE_C/src/cve.c index d13d1a6..e9e925b 100644 --- a/CVE_C/src/cve.c +++ b/CVE_C/src/cve.c @@ -17,6 +17,7 @@ void cve(const mat *X, const mat *Y, const double h, double loss, loss_last, loss_best, err, tau; double tol = tol_init * sqrt((double)(2 * q)); double agility = -2.0 * (1.0 - momentum) / (h * h); + double sumK; double c = agility / (double)n; // TODO: check parameters! dim, ... @@ -87,8 +88,9 @@ void cve(const mat *X, const mat *Y, const double h, S = laplace(adjacence(L, Y, y1, D, W, gauss, S), workMem); } else if (method == weighted) { colSumsK = elemApply(colSumsK, '-', 1.0, colSumsK); - loss_last = dot(L, '/', colSumsK); - c = agility / sum(colSumsK); + sumK = sum(colSumsK); + loss_last = dot(L, '*', colSumsK) / sumK; + c = agility / sumK; /* Calculate the scaling matrix S */ S = laplace(adjacence(L, Y, y1, D, K, gauss, S), workMem); } else { @@ -100,10 +102,8 @@ void cve(const mat *X, const mat *Y, const double h, G = matrixprod(c, tmp2, V, 0.0, G); if (logger) { - callLogger(logger, loggerEnv, - attempt, /* iter <- 0L */ -1, - L, V, G, - loss_last, /* err <- NA */ -1.0, tau); + callLogger(logger, loggerEnv, attempt, /* iter <- 0L */ -1, + L, V, G, loss_last, /* err <- NA */ -1.0, tau); } /* Compute Skew-Symmetric matrix `A` used in Cayley transform. @@ -120,9 +120,6 @@ void cve(const mat *X, const mat *Y, const double h, /* Move `V` along the gradient direction. */ V_tau = cayleyTransform(A, V, V_tau, workMem); - // Rprintf("Start attempt(%2d), iter (%2d): err: %f, loss: %f, tau: %f\n", - // attempt, iter, dist(V, V_tau), loss_last, tau); - /* Embed X_i's in V space */ XV = matrixprod(1.0, X, V_tau, 0.0, XV); /* Compute embedded distances */ @@ -146,7 +143,8 @@ void cve(const mat *X, const mat *Y, const double h, loss = mean(L); } else if (method == weighted) { colSumsK = elemApply(colSumsK, '-', 1.0, colSumsK); - loss = dot(L, '/', colSumsK); + sumK = sum(colSumsK); + loss = dot(L, '*', colSumsK) / sumK; } else { // TODO: error handling! } @@ -154,22 +152,26 @@ void cve(const mat *X, const mat *Y, const double h, /* Check if step is appropriate, iff not reduce learning rate. */ if ((loss - loss_last) > loss_last * slack) { tau *= gamma; + iter -= 1; A = elemApply(A, '*', gamma, A); // scale A by gamma continue; + } else { + tau /= gamma; } /* Compute error, use workMem. */ err = dist(V, V_tau); + // Rprintf("%2d - iter: %2d, loss: %1.3f, err: %1.3f, tau: %1.3f, norm(G) = %1.3f\n", + // attempt, iter, loss, err, tau, sqrt(squareSum(G))); + /* Shift next step to current step and store loss to last. */ V = copy(V_tau, V); loss_last = loss; if (logger) { - callLogger(logger, loggerEnv, - attempt, iter, - L, V, G, - loss, err, tau); + callLogger(logger, loggerEnv, attempt, iter, + L, V, G, loss, err, tau); } /* Check Break condition. */ @@ -183,7 +185,7 @@ void cve(const mat *X, const mat *Y, const double h, } else if (method == weighted) { /* Calculate the scaling matrix S */ S = laplace(adjacence(L, Y, y1, D, K, gauss, S), workMem); - c = agility / sum(colSumsK); + c = agility / sumK; // n removed previousely } else { // TODO: error handling! } @@ -198,6 +200,8 @@ void cve(const mat *X, const mat *Y, const double h, A = skew(tau, G, V, 0.0, A); } + // Rprintf("\n"); + /* Check if current attempt improved previous ones */ if (attempt == 0 || loss < loss_best) { loss_best = loss; diff --git a/LaTeX/notes.tex b/LaTeX/notes.tex index acd4690..6b4dac9 100644 --- a/LaTeX/notes.tex +++ b/LaTeX/notes.tex @@ -4,58 +4,369 @@ \usepackage[T1]{fontenc} \usepackage{amsmath, amsfonts, amssymb, amsthm} \usepackage{tikz} +\usepackage{listings} \usepackage{fullpage} + +\lstdefinelanguage{PseudoCode} { + morekeywords={ + for, + while, + repeat, + from, + each, + foreach, + break, + continue, + in, + do, + as, + and, + or, + end, + return, + if, + then, + else, + function, + begin, + to, + new, + input, + output + }, + morecomment=[l]{/*}, + morecomment=[l]{//}, + % basicstyle=\ttfamily, + % keywordstyle=\color{blue}, %\ttfamily, + commentstyle=\color{gray}\it, + keywordstyle=\bf, + rulecolor=\color{black}, + literate=% + {!=}{{$\neq$}}1 + {<=}{{$\leq$}}1 + {>=}{{$\geq$}}1 + {->}{{$\rightarrow$}}1 + {<-}{{$\leftarrow$}}1 +} + +% }, +% tabsize=3, +% sensitive=false, +% morecomment=[l]{#}, +% morestring=[b]", +% extendedchars=true, +% inputencoding=utf8, +% literate=% +% {!=}{{$\neq$}}1 +% {<=}{{$\leq$}}1 +% {>=}{{$\geq$}}1 +% {<>}{{$\neq$}}1 +% {:=}{{$\ \leftarrow\quad$}}1 +% {Ö}{{\"O}}1 +% {Ä}{{\"A}}1 +% {Ü}{{\"U}}1 +% {ß}{{\ss{}}}1 +% {ü}{{\"u}}1 +% {ä}{{\"a}}1 +% {ö}{{\"o}}1 +% {~}{{\textasciitilde}}1, +% texcl=true % use all chars from \usepackage[utf8]{inputenc} +% } +\lstset{ + tabsize=4, + xleftmargin=0pt, % left margin + numbers=left, % linenumber position + numbersep=15pt, % left linenumber padding + numberstyle=\tiny, + basicstyle=\ttfamily, + keywordstyle=\color{black!60}, + commentstyle=\ttfamily\color{gray!70}, + breaklines=true, + literate= +} + +\renewcommand{\epsilon}{\varepsilon} + \newcommand{\vecl}{\ensuremath{\operatorname{vec}_l}} \newcommand{\Sym}{\ensuremath{\operatorname{Sym}}} +\renewcommand{\vec}{\operatorname{vec}} +\newcommand{\devec}{\operatorname*{devec}} +\newcommand{\svec}{\operatorname{svec}} +\newcommand{\sym}{\operatorname{sym}} +\renewcommand{\skew}{\operatorname{skew}} +\newcommand{\rowSums}{\operatorname{rowSums}} +\newcommand{\colSums}{\operatorname{colSums}} +\newcommand{\diag}{\operatorname{diag}} + \begin{document} -Indexing a given matrix $A = (a_{ij})_{i,j = 1, ..., n} \in \mathbb{R}^{n\times n}$ given as +\section{Kronecker Product Properties} +The \emph{mixed-product} property for matrices $A, B, C, D$ holds if and only if the following matrix products are well defined \begin{displaymath} - A = \begin{pmatrix} - a_{0,0} & a_{0,1} & a_{0,2} & \ldots & a_{0,n-1} \\ - a_{1,0} & a_{1,1} & a_{1,2} & \ldots & a_{1,n-1} \\ - a_{2,0} & a_{2,1} & a_{2,2} & \ldots & a_{2,n-1} \\ - \vdots & \vdots & \vdots & \ddots & \vdots \\ - a_{n-1,0} & a_{n-1,1} & a_{n-1,2} & \ldots & a_{n-1,n-1} - \end{pmatrix} + (A\otimes B)(C \otimes D) = (A C) \otimes (B C). +\end{displaymath} +In combination with the \emph{Hadamard product} (element-wise multiplication) for matrices $A, C$ of the same size as well as $B, D$ of the same size is +\begin{displaymath} + (A\otimes B)\circ (C \otimes D) = (A \circ C) \otimes (B \circ D). +\end{displaymath} +The \emph{transpose} of the Kronecker product fulfills +\begin{displaymath} + (A\otimes B)^T = A^T \otimes B^T \end{displaymath} -A symmetric matrix with zero main diagonal, meaning a matrix $S = S^T$ with $S_{i,i} = 0,\ \forall i = 1,..,n$ is givne in the following form -\begin{displaymath} - S = \begin{pmatrix} - 0 & s_{1,0} & s_{2,0} & \ldots & s_{n-1,0} \\ - s_{1,0} & 0 & s_{2,1} & \ldots & s_{n-1,1} \\ - s_{2,0} & s_{2,1} & 0 & \ldots & s_{n-1,2} \\ - \vdots & \vdots & \vdots & \ddots & \vdots \\ - s_{n-1,0} & s_{n-1,1} & s_{n-1,2} & \ldots & 0 - \end{pmatrix} -\end{displaymath} -Therefore its sufficient to store only the lower triangular part, for memory efficiency and some further alrogithmic shortcuts (sometime they are more expencife) the symmetric matrix $S$ is stored in packed form, meanin in a vector of the length $\frac{n(n-1)}{2}$. We use (like for matrices) a column-major order of elements and define the $\vecl:\Sym(n)\to \mathbb{R}^{n(n-1) / 2}$ opperator defined as +\section{Distance Computation} +The pair-wise distances $d_V(X_{i,:}, X_{j,:})$ arranged in the distance matrix $D\in\mathbb{R}^{n\times n}$ can be written as +\begin{align*} + \vec(D) = \rowSums(((X Q)\otimes 1_n - 1_n \otimes (X Q))^2) +\end{align*} +This can be computed in $\mathcal{O}(n^2p + np^2)$ time (vectorization and devectorization takes $\mathcal{O}(1)$). +The matrices $K, W$ are define through there elements as \begin{displaymath} - \vecl(S) = (s_{1,0}, s_{2,0},\cdots,s_{n-1,0},s_{2,1}\cdots,s_{n-1,n-2})^T + k_{i j} = \exp\left(-\frac{d_{i j}^2}{2 h^2}\right),\qquad w_{i j} = \frac{k_{i j}}{\sum_{m} k_{m j}}. \end{displaymath} -The relation between the matrix indices $i,j$ and the $\vecl$ index $k$ is given by - +Next are $\bar{y}^{(m)}$ and the ``element-wise'' loss $l_i = L_n(V, X_i)$. \begin{displaymath} - (\vecl(S)_k = s_{i,j} \quad\Leftrightarrow\quad k = jn+i) : j \in \{0,...,n-2\} \land j < i < n. + \bar{y}^{(m)} = W^T Y^m,\qquad l = \bar{y}^{(2)} - (\bar{y}^{(1)})^2 \end{displaymath} -\begin{center} - \begin{tikzpicture}[xscale=1,yscale=-1] - % \foreach \i in {0,...,5} { - % \node at ({mod(\i, 3)}, {int(\i / 3)}) {$\i$}; - % } - \foreach \i in {1,...,4} { - \foreach \j in {1,...,\i} { - \node at (\j, \i) {$\i,\j$}; - } - } +\section{Gradient Computation} +The model +\begin{displaymath} + Y \sim g(B^T X) + \epsilon. +\end{displaymath} + +Assume a data set $(X_i, Y_i)$ for $i = 1, ..., n$ with $X$ a $n\times p$ matrix such that each row represents one sample. Now let $l_i = L_n(V, X_i)$, $\bar{y}^{(1)}_j = (W^T Y)_j$ as well as $d_{i j}, w_{i j}$ the distance and weight matrix components. Then the gradient for the ``simple'' CVE method is given as +\begin{displaymath} + \nabla L_n(V) = \frac{1}{nh^2}\sum_{i = 1}^{n} \sum_{j = 1}^{n} (l_j - (Y_i - \bar{y}^{(1)}_j)^2) w_{i j} d_{i j} \nabla_V d_V(X_{i,:}, X_{j,:}). +\end{displaymath} +This representation is cumbersome and a direct implementation has a asymptotic run-time of $\Theta(n^2p^2)$ because it is a double sum over $n$, therefore quadratic in $n$, and the form of $\nabla_V d_V$. + +This can be optimized and written in matrix notation. First the distance gradient is given as +\begin{displaymath} + \nabla_V d_V(X_{i,:}, X_{j,:}) = -2 (X_{i,:} - X_{j,:})^T (X_{i,:} - X_{j,:}) V +\end{displaymath} +(Note: $X_{i,:}\in\mathbb{R}^{1\times p}$, aka a row representing one sample). In addition define the $n\times n$ matrix $S$ through its elements +\begin{displaymath} + s_{i j} = (l_j - (Y_i - \bar{y}^{(1)}_j)^2) w_{i j} d_{i j}. +\end{displaymath} +Substitution in the gradient leads to +\begin{align*} + \nabla L_n(V) + &= -\frac{2}{nh^2}\sum_{i = 1}^{n} \sum_{j = 1}^{n} s_{i j} (X_{i,:} - X_{j,:})^T (X_{i,:} - X_{j,:}) V \\ + &= -\frac{2}{nh^2}\sum_{i = 1}^{n} \sum_{j = 1}^{n} s_{i j} \left( X_{i,:}^T X_{i,:} - X_{i,:}^T X_{j,:} - X_{j,:}^T X_{i,:} + X_{j,:}^T X_{j,:} \right) V \\ + &= -\frac{2}{nh^2} \left( \sum_{i = 1}^{n}\sum_{j = 1}^{n} (s_{i j} + s_{j i}) X_{i,:}^T X_{i,:} - \sum_{i = 1}^{n}\sum_{j = 1}^{n} (s_{i j} + s_{j i}) X_{i,:}^T X_{j,:} \right) V \\ + &= -\frac{2}{nh^2} \left( X^T \diag(\colSums(S + S^T)) X - X^T (S + S^T) X \right) V \\ + &= -\frac{2}{nh^2} X^T \left( \diag(\colSums(S + S^T)) - (S + S^T) \right) X V +\end{align*} + +\begin{center}{\bf + ATTENTION: The given R examples are to illustrate the inplementation in C which is 0-indexed! +}\end{center} + +The \emph{vertorization} operation maps a matrix $A\in\mathbb{R}^{n\times m}$ into $\mathbb{R}^{nm}$ by stacking the columns of $A$; +\begin{displaymath} + \vec(A) = (a_{0,0}, a_{0,1}, a_{0,2},...,a_{0,n-1},a_{1,0},a_{1,1},...,a_{n-1,n-1})^T. +\end{displaymath} +The relation $\vec(A)_k = a_{i,j}$ holds for $k=nj+i$ such that $0\leq k < n^2$ and $0\leq i < n, 0 \leq j < m$. This operation is obviously a bijection. When going ``backwards'' the dimension of the original space is required, therefore let $\devec_n$ be the operation such that $\devec_n(\vec(A)) = A$ for $A\in\mathbb{R}^{n\times m}$.\footnote{Note that for $B\in\mathbb{R}^{p\times q}$ such that $pq = nm$ the $\devec_n(\vec(B))\in\mathbb{R}^{n\times m}$.} + +For symmetric matrices the information stored in $a_{i,j} = a_{j,i}$ is twice stored in $A=A^T\in\mathbb{R}^{n\times n}$, to remove this redundency the \emph{symmetric vectorization} is defined which saves the main diagonal and the lower triangular part of the symmetric matrix according the scema +\begin{displaymath} + \svec(A) = (a_{0,0},2a_{1,0},2a_{2,n},...,2a_{n-1,0},a_{1,1},2a_{2,1},...,2a_{n-1,1},a_{2,2},...,a_{n-1,n-1}) +\end{displaymath} +A it more formal +\begin{displaymath} + \svec(A)_{k} = (2-\delta_{i,j})a_{i,j} \quad\text{for}\quad k = n j + i - \frac{j(j + 1)}{2}, 0\leq j \leq i < n^2. +\end{displaymath} + +\begin{lstlisting}[language=R] +n <- 3 +k <- function(i, j, n) { (j * n) + i - (j * (j + 1) / 2) } +i <- function(n) { rep(1:n - 1, n) } +j <- function(n) { rep(1:n - 1, each = n) } +A <- matrix(k(i(n), j(n), n), n) +A[which(j(n) > i(n))] <- NA +A +# [,1] [,2] [,3] +# [1,] 0 NA NA +# [2,] 1 3 NA +# [3,] 2 4 5 +vec <- function(A) { as.vector(A) } +svec <- function(A) { + n <- nrow(A) + ((2 - (i(n) == j(n))) * A)[i(n) >= j(n)] +} +svec(matrix(1, n, n)) +# [1] 1 2 2 1 2 1 +devec <- function(vec, n) { matrix(vec, n) } +\end{lstlisting} + +For a quadratic matrix $A\in\mathbb{R}^{n\times n}$ we define +\begin{displaymath} + \sym(A) := \frac{A + A^T}{2}, \qquad \skew(A) := \frac{A - A^T}{2}. +\end{displaymath} + +% For a Matrix $A\in\mathbb{R}^{n\times n}$ the \emph{vectorization} operation is defined as a mapping from the matrices into a + +% Indexing a given matrix $A = (a_{ij})_{i,j = 1, ..., n} \in \mathbb{R}^{n\times n}$ given as +% \begin{displaymath} +% A = \begin{pmatrix} +% a_{0,0} & a_{0,1} & a_{0,2} & \ldots & a_{0,n-1} \\ +% a_{1,0} & a_{1,1} & a_{1,2} & \ldots & a_{1,n-1} \\ +% a_{2,0} & a_{2,1} & a_{2,2} & \ldots & a_{2,n-1} \\ +% \vdots & \vdots & \vdots & \ddots & \vdots \\ +% a_{n-1,0} & a_{n-1,1} & a_{n-1,2} & \ldots & a_{n-1,n-1} +% \end{pmatrix} +% \end{displaymath} + +% A symmetric matrix with zero main diagonal, meaning a matrix $S = S^T$ with $S_{i,i} = 0,\ \forall i = 1,..,n$ is given in the following form +% \begin{displaymath} +% S = \begin{pmatrix} +% 0 & s_{1,0} & s_{2,0} & \ldots & s_{n-1,0} \\ +% s_{1,0} & 0 & s_{2,1} & \ldots & s_{n-1,1} \\ +% s_{2,0} & s_{2,1} & 0 & \ldots & s_{n-1,2} \\ +% \vdots & \vdots & \vdots & \ddots & \vdots \\ +% s_{n-1,0} & s_{n-1,1} & s_{n-1,2} & \ldots & 0 +% \end{pmatrix} +% \end{displaymath} +% Therefore its sufficient to store only the lower triangular part, for memory efficiency and some further algorithmic shortcuts (sometime they are more expensive) the symmetric matrix $S$ is stored in packed form, meaning in a vector of the length $\frac{n(n-1)}{2}$. We use (like for matrices) a column-major order of elements and define the $\vecl:\Sym(n)\to \mathbb{R}^{n(n-1) / 2}$ operator defined as + +% \begin{displaymath} +% \vecl(S) = (s_{1,0}, s_{2,0},\cdots,s_{n-1,0},s_{2,1}\cdots,s_{n-1,n-2})^T +% \end{displaymath} + +% The relation between the matrix indices $i,j$ and the $\vecl$ index $k$ is given by + +% \begin{displaymath} +% (\vecl(S)_k = s_{i,j} \quad\Leftrightarrow\quad k = jn+i) : j \in \{0,...,n-2\} \land j < i < n. +% \end{displaymath} + +% \begin{center} +% \begin{tikzpicture}[xscale=1,yscale=-1] +% % \foreach \i in {0,...,5} { +% % \node at ({mod(\i, 3)}, {int(\i / 3)}) {$\i$}; +% % } +% \foreach \i in {1,...,4} { +% \foreach \j in {1,...,\i} { +% \node at (\j, \i) {$\i,\j$}; +% } +% } - \end{tikzpicture} -\end{center} +% \end{tikzpicture} +% \end{center} + +\newpage +\section{Algorithm} +The basic algorithm reads as follows: + +Mit +\begin{displaymath} + X_{diff} := X\otimes 1_n - 1_n\otimes X +\end{displaymath} +gilt +\begin{displaymath} + X_{diff}Q := (X\otimes 1_n - 1_n\otimes X)Q = XQ\otimes 1_n - 1_n\otimes XQ +\end{displaymath} + +\newcommand{\rStiefel}{\operatorname{rStiefel}} +% \lstset{language=PseudoCode} +% \begin{lstlisting}[mathescape, caption=Erste Phase von \texttt{HDE} (siehe \cite{HDE}), label=code:HDE, captionpos=b] +% \begin{lstlisting}[mathescape] +% // Hallo Welt +% /* Hallo comment */ +% $X_{diff} \leftarrow X\otimes 1_n - 1_n\otimes X$ + +% for attempt from 1 to attempts do +% if $\exists V_{init}$ then +% $V \leftarrow V_{init}$ +% else +% $V \leftarrow \rStiefel(p, q)$ +% end if + +% /* Projection matrix into null space */ +% $Q \leftarrow I_p - VV^T$ + +% /* Pair-wise distances (row sum of squared elements) */ +% $D \leftarrow$ foreach $i,j=1,...,n$ as $D_{i,j}\leftarrow \|(X_{i,:}-X_{j,:})Q\|_2^2$ + +% /* Weights */ +% $W \leftarrow$ foreach $i,j=1,...,n$ as $W_{i,j} \leftarrow \frac{k(D_{i,j})}{\sum_{i} k(D_{i,j})}$ + +% $\bar{y}_1 \leftarrow W^TY$ +% $\bar{y}_2 \leftarrow W^T(Y\odot Y)$ + +% /* Element-wise losses */ +% $L \leftarrow \bar{y}_2 - \bar{y}_1^2$ + +% for epoch from 1 to epochs do + +% $G_t \leftarrow \gamma G_{t-1} + (1-\gamma) \nabla_c L(V)$ + +% end for +% end for +% \end{lstlisting} + +The loss at a given position is +\begin{displaymath} + L_n(V) = \frac{1}{nh^2}\sum_{i = 0}^{n - 1} \sum_{j = 0}^{n - 1} (L_j - (Y_i - \bar{y}^{(1)}_j)^2) w_{i j} d_{i j} \nabla_V d_V(X_{i,:}, X_{j,:}) +\end{displaymath} +Now let the matrix $S$ be defined through its coefficients +\begin{displaymath} + s_{i j} = (L_j - (Y_i - \bar{y}^{(1)}_j)^2) w_{i j} d_{i j} +\end{displaymath} +This matrix is \underline{not} symmetric but we can consider the symmetric $S + S^T$ with a zero main diagonal because $D$ has a zero main diagonal, meaning $s_{i i} = 0$ because $d_{i i} = 0$ for each $i$. Therefore the following holds due to the fact that $\nabla_V d_V(X_{i,:}, X_{j,:}) = \nabla_V d_V(X_{j,:}, X_{i,:})$. +\begin{displaymath} + L_n(V) = \frac{1}{nh^2}\sum_{j = 0}^{n - 1} \sum_{i = j}^{n - 1} (s_{i j} + s_{j i}) \nabla_V d_V(X_{i,:}, X_{j,:}) +\end{displaymath} +Note the summation indices $0 \leq j \leq i < n$. Substitution with $\nabla_V d_V(X_{i,:}, X_{j,:}) = -2 (X_{i,:} - X_{j,:})^T(X_{i,:} - X_{j,:}) V$ evaluates to +\begin{displaymath} + L_n(V) = -\frac{2}{nh^2}\sum_{j = 0}^{n - 1} \sum_{i = j}^{n - 1} (s_{i j} + s_{j i}) (X_{i,:} - X_{j,:})^T(X_{i,:} - X_{j,:}) V +\end{displaymath} +Let $X_{-}$ be the matrix containing all pairs of $X_{i,:}$ to $X_{j,:}$ differences using the same row indexing scheme as the symmetric vectorization. +\begin{displaymath} + (X_{-})_{k,:} = X_{i,:} - X_{j,:} \quad\text{for}\quad k = n j + i - \frac{j(j + 1)}{2}, 0\leq j \leq i < n^2 +\end{displaymath} +With the $X_{-}$ matrix the above double sum can be formalized in matrix notation as follows\footnote{only valid cause $s_{i i} = 0$} +\begin{displaymath} + L_n(V) = -\frac{2}{nh^2} X_{-}^T(\svec(\sym(S)) \circ_r X_{-}) V +\end{displaymath} +where $\circ_r$ means the ``recycled'' hadamard product, this is for a vector $x\in\mathbb{R}^n$ and a Matrix $M\in\mathbb{R}^{n\times m}$ just the element wise multiplication for each column of $M$ with $x$, or equivalent $x\circ_r M = \underbrace{(x, x, ..., x)}_{{n\times m}} \circ M$ where $\circ$ is the element-wise product. + + +\begin{lstlisting}[mathescape, language=PseudoCode] +/* Starting value and initial gradient. */ +$V_1 \leftarrow V_{init}$ if $\exists V_{init}$ else $\rStiefel(p, q)$ +$G_1 \leftarrow (1 - \mu) \nabla L_n(V_0)$ + +/* Optimization loop */ +$t \leftarrow 1$ +while $t\leq\,$max.iter do + + /* Update on stiefel manifold. */ + $A \leftarrow G_tV_t^T - V_tG_t^T$ + $V_{t+1} \leftarrow (I_p + \tau A)^{-1}(I_p - \tau A)V_{t}$ + + /* Check break condition. */ + if $\|V_{t+1}V_{t+1}^T - V_{t}^TV_{t}\|_2^2 \leq \sqrt{2q}\,$tol then + break + end if + + /* Check for decrease. */ + if $L_n(V_{t+1}) - L_n(V_{t}) > L_n(V_{t})\,$slack then // TODO: slack? + /* Reduce step-size. */ + $\tau \leftarrow \gamma\tau$ + else + /* Gradient at next position (with momentum). */ + $G_{t+1} \leftarrow \mu G_{t} + (1 - \mu) \nabla L_n(V_{t+1})$ + /* Increase step index */ + $t \leftarrow t + 1$ + end if + +end while +\end{lstlisting} + \end{document} \ No newline at end of file diff --git a/runtime_test.R b/runtime_test.R index 9589537..c73d9a6 100644 --- a/runtime_test.R +++ b/runtime_test.R @@ -1,13 +1,31 @@ # Usage: # ~$ Rscript runtime_test.R + +textplot <- function(...) { + text <- unlist(list(...)) + if (length(text) > 20) { + text <- c(text[1:17], + ' ...... (skipped, text too long) ......', + text[c(-1, 0) + length(text)]) + } + + plot(NA, xlim = c(0, 1), ylim = c(0, 1), + bty = 'n', xaxt = 'n', yaxt = 'n', xlab = '', ylab = '') + + for (i in seq_along(text)) { + text(0, 1 - (i / 20), + text[[i]], pos = 4) + } +} + # library(CVEpureR) # load CVE's pure R implementation library(CVE) # load CVE #' Writes log information to console. (to not get bored^^) -tell.user <- function(name, start.time, i, length) { +tell.user <- function(name, start, i, length) { cat("\rRunning Test (", name, "):", i, "/", length, - " - elapsed:", format(Sys.time() - start.time), "\033[K") + " - elapsed:", format(Sys.time() - start), "\033[K") } #' Computes "distance" of spanned subspaces. #' @param B1 Semi-orthonormal basis matrix @@ -29,19 +47,14 @@ MAXIT <- 50L # number of arbitrary starting values for curvilinear optimization ATTEMPTS <- 10L # set names of datasets -dataset.names <- c("M1", "M2", "M3", "M4", "M5") +ds.names <- paste0("M", seq(7)) # Set used CVE method -methods <- c("simple") # c("legacy", "simple", "linesearch", "sgd") - -if ("legacy" %in% methods) { - # Source legacy code (but only if needed) - source("CVE_legacy/function_script.R") -} +methods <- c("simple", "weighted") # c("legacy", "simple", "linesearch", "sgd") # Setup error and time tracking variables -error <- matrix(NA, SIM.NR, length(methods) * length(dataset.names)) +error <- matrix(NA, SIM.NR, length(methods) * length(ds.names)) time <- matrix(NA, SIM.NR, ncol(error)) -colnames(error) <- kronecker(paste0(dataset.names, '-'), methods, paste0) +colnames(error) <- kronecker(paste0(ds.names, '-'), methods, paste0) colnames(time) <- colnames(error) # Create new log file and write CSV (actualy TSV) header. @@ -56,13 +69,12 @@ cat('Plotting to file:', path, '\n') # only for telling user (to stdout) count <- 0 -start.time <- Sys.time() +start <- Sys.time() # Start simulation loop. for (sim in 1:SIM.NR) { # Repeat for each dataset. - for (name in dataset.names) { - count <- count + 1 - tell.user(name, start.time, count, SIM.NR * length(dataset.names)) + for (name in ds.names) { + tell.user(name, start, (count <- count + 1), SIM.NR * length(ds.names)) # Create a new dataset ds <- dataset(name) @@ -71,35 +83,20 @@ for (sim in 1:SIM.NR) { X <- ds$X data <- cbind(Y, X) # get dimensions - dim <- ncol(X) - truedim <- ncol(ds$B) + k <- ncol(ds$B) for (method in methods) { - if (tolower(method) == "legacy") { - dr.time <- system.time( - dr <- stiefel_opt(data, - k = dim - truedim, - k0 = ATTEMPTS, - h = estimate.bandwidth(X, - k = truedim, - nObs = sqrt(nrow(X))), - maxit = MAXIT - ) + dr.time <- system.time( + dr <- cve.call(X, Y, + method = method, + k = k, + attempts = ATTEMPTS ) - dr$B <- fill_base(dr$est_base)[, 1:truedim] - } else { - dr.time <- system.time( - dr <- cve.call(X, Y, - method = method, - k = truedim, - attempts = ATTEMPTS - ) - ) - dr$B <- coef(dr, truedim) - } + ) + dr$B <- coef(dr, k) key <- paste0(name, '-', method) - error[sim, key] <- subspace.dist(dr$B, ds$B) / sqrt(2 * truedim) + error[sim, key] <- subspace.dist(dr$B, ds$B) / sqrt(2 * k) time[sim, key] <- dr.time["elapsed"] # Log results to file (mostly for long running simulations) diff --git a/test.R b/test.R index bd714f6..364ac37 100644 --- a/test.R +++ b/test.R @@ -1,3 +1,19 @@ +textplot <- function(...) { + text <- unlist(list(...)) + if (length(text) > 20) { + text <- c(text[1:17], + ' ...... (skipped, text too long) ......', + text[c(-1, 0) + length(text)]) + } + + plot(NA, xlim = c(0, 1), ylim = c(0, 1), + bty = 'n', xaxt = 'n', yaxt = 'n', xlab = '', ylab = '') + + for (i in seq_along(text)) { + text(0, 1 - (i / 20), + text[[i]], pos = 4) + } +} args <- commandArgs(TRUE) if (length(args) > 0L) { @@ -10,11 +26,12 @@ if (length(args) > 1L) { } else { momentum <- 0.0 } +seed <- 42 max.iter <- 50L attempts <- 25L library(CVE) -path <- paste0('~/Projects/CVE/tmp/logger_', method, '_', momentum, '.C.pdf') +path <- paste0('~/Projects/CVE/tmp/logger_', method, '.C.pdf') # Define logger for `cve()` method. logger <- function(attempt, iter, data) { @@ -29,12 +46,14 @@ logger <- function(attempt, iter, data) { true.error.history[iter + 1, attempt] <<- true.error } -pdf(path) -par(mfrow = c(2, 2)) +pdf(path, width = 8.27, height = 11.7) # width, height unit is inces -> A4 +layout(matrix(c(1, 1, + 2, 3, + 4, 5), nrow = 3, byrow = TRUE)) -for (name in paste0("M", seq(5))) { +for (name in paste0("M", seq(7))) { # Seed random number generator - set.seed(42) + set.seed(seed) # Create a dataset ds <- dataset(name) @@ -52,11 +71,37 @@ for (name in paste0("M", seq(5))) { tau.history <- matrix(NA, max.iter + 1, attempts) true.error.history <- matrix(NA, max.iter + 1, attempts) - dr <- cve(Y ~ X, k = k, method = method, - momentum = momentum, - max.iter = max.iter, attempts = attempts, - logger = logger) + time <- system.time( + dr <- cve(Y ~ X, k = k, method = method, + momentum = momentum, + max.iter = max.iter, attempts = attempts, + logger = logger) + )["elapsed"] + # Extract finaly selected values: + B.est <- coef(dr, k) + true.error <- norm(tcrossprod(B.est) - tcrossprod(B), 'F') / sqrt(2 * k) + loss <- dr$res[[as.character(k)]]$loss + + # Write metadata. + textplot( + paste0("Seed value: ", seed), + "", + paste0("Dataset Name: ", ds$name), + paste0("dim(X) = (", nrow(X), ", ", ncol(X), ")"), + paste0("dim(B) = (", nrow(B), ", ", ncol(B), ")"), + "", + paste0("CVE method: ", dr$method), + paste0("Max Iterations: ", max.iter), + paste0("Attempts: ", attempts), + paste0("Momentum: ", momentum), + "CVE call:", + paste0(" > ", format(dr$call)), + "", + paste0("True Error: ", round(true.error, 3)), + paste0("loss: ", round(loss, 3)), + paste0("time: ", round(time, 3), " s") + ) # Plot history's matplot(loss.history, type = 'l', log = 'y', xlab = 'i (iteration)', main = paste('loss', name),