diff --git a/CVE_C/DESCRIPTION b/CVE/DESCRIPTION similarity index 100% rename from CVE_C/DESCRIPTION rename to CVE/DESCRIPTION diff --git a/CVE_C/NAMESPACE b/CVE/NAMESPACE similarity index 85% rename from CVE_C/NAMESPACE rename to CVE/NAMESPACE index 0f9e2ff..36aaac2 100644 --- a/CVE_C/NAMESPACE +++ b/CVE/NAMESPACE @@ -9,15 +9,10 @@ export(cve) export(cve.call) export(dataset) export(directions) -export(elem.pairs) export(estimate.bandwidth) export(null) export(predict_dim) -export(projTangentStiefel) export(rStiefel) -export(retractStiefel) -export(skew) -export(sym) import(stats) importFrom(graphics,boxplot) importFrom(graphics,lines) diff --git a/CVE_C/R/CVE.R b/CVE/R/CVE.R similarity index 80% rename from CVE_C/R/CVE.R rename to CVE/R/CVE.R index 32ead7d..e7fdc7b 100644 --- a/CVE_C/R/CVE.R +++ b/CVE/R/CVE.R @@ -20,7 +20,7 @@ #' zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g} #' is an unknown, continuous non-constant function, #' and \eqn{B = (b_1, ..., b_k)} is -#' a real \eqn{p \times k}{p x k} of rank \eqn{k <= p}{k \leq p}. +#' a real \eqn{p \times k}{p x k} of rank \eqn{k \leq p}{k <= p}. #' Without loss of generality \eqn{B} is assumed to be orthonormal. #' #' @author Daniel Kapla, Lukas Fertl, Bura Efstathia @@ -36,26 +36,46 @@ #' @inherit CVE-package description #' #' @param formula an object of class \code{"formula"} which is a symbolic -#' description of the model to be fitted. +#' description of the model to be fitted like \eqn{Y\sim X}{Y ~ X} where +#' \eqn{Y} is a \eqn{n}-dimensional vector of the response variable and +#' \eqn{X} is a \eqn{n\times p}{n x p} matrix of the predictors. #' @param data an optional data frame, containing the data for the formula if -#' supplied. -#' @param method specifies the CVE method variation as one of +#' supplied like \code{data <- data.frame(Y, X)} with dimension +#' \eqn{n \times (p + 1)}{n x (p + 1)}. By default the variables are taken from +#' the environment from which \code{cve} is called. +#' @param method This character string specifies the method of fitting. The +#' options are #' \itemize{ -#' \item "simple" exact implementation as described in the paper listed -#' below. -#' \item "weighted" variation with addaptive weighting of slices. +#' \item "simple" implementation as described in the paper. +#' \item "weighted" variation with adaptive weighting of slices. #' } +#' see paper. #' @param max.dim upper bounds for \code{k}, (ignored if \code{k} is supplied). -#' @param ... Parameters passed on to \code{cve.call}. +#' @param ... optional parameters passed on to \code{cve.call}. #' #' @return an S3 object of class \code{cve} with components: #' \describe{ -#' \item{X}{Original training data,} -#' \item{Y}{Responce of original training data,} +#' \item{X}{design matrix of predictor vector used for calculating +#' cve-estimate,} +#' \item{Y}{\eqn{n}-dimensional vector of responses used for calculating +#' cve-estimate,} #' \item{method}{Name of used method,} #' \item{call}{the matched call,} -#' \item{res}{list of components \code{V, L, B, loss, h} and \code{k} for -#' each \eqn{k=min.dim,...,max.dim} (dimension).} +#' \item{res}{list of components \code{V, L, B, loss, h} for +#' each \code{k = min.dim, ..., max.dim}. If \code{k} was supplied in the +#' call \code{min.dim = max.dim = k}. +#' \itemize{ +#' \item \code{B} is the cve-estimate with dimension +#' \eqn{p\times k}{p x k}. +#' \item \code{V} is the orthogonal complement of \eqn{B}. +#' \item \code{L} is the loss for each sample seperatels such that +#' it's mean is \code{loss}. +#' \item \code{loss} is the value of the target function that is +#' minimized, evaluated at \eqn{V}. +#' \item \code{h} bandwidth parameter used to calculate +#' \code{B, V, loss, L}. +#' } +#' } #' } #' #' @examples @@ -66,7 +86,7 @@ #' b1 <- rep(1 / sqrt(p), p) #' b2 <- (-1)^seq(1, p) / sqrt(p) #' B <- cbind(b1, b2) -#' # samplsize +#' # sample size #' n <- 200 #' set.seed(21) #' # creat predictor data x ~ N(0, I_p) @@ -139,10 +159,12 @@ cve <- function(formula, data, method = "simple", max.dim = 10L, ...) { #' @inherit cve title #' @inherit cve description #' +#' @param X Design matrix with dimension \eqn{n\times p}{n x p}. +#' @param Y numeric array of length \eqn{n} of Responses. +#' @param h bandwidth or function to estimate bandwidth, defaults to internaly +#' estimated bandwidth. #' @param nObs parameter for choosing bandwidth \code{h} using #' \code{\link{estimate.bandwidth}} (ignored if \code{h} is supplied). -#' @param X data matrix with samples in its rows. -#' @param Y Responses (1 dimensional). #' @param method specifies the CVE method variation as one of #' \itemize{ #' \item "simple" exact implementation as described in the paper listed @@ -156,19 +178,23 @@ cve <- function(formula, data, method = "simple", max.dim = 10L, ...) { #' @param tau Initial step-size. #' @param tol Tolerance for break condition. #' @param max.iter maximum number of optimization steps. -#' @param attempts number of arbitrary different starting points. -#' @param logger a logger function (only for advanced user, significantly slows -#' down the computation). -#' @param h bandwidth or function to estimate bandwidth, defaults to internaly -#' estimated bandwidth. -#' @param momentum number of [0, 1) giving the ration of momentum for eucledian -#' gradient update with a momentum term. +#' @param attempts If \code{V.init} not supplied, the optimization is carried +#' out \code{attempts} times with starting values drawn from the invariant +#' measure on the Stiefel manifold (see \code{\link{rStiefel}}). +#' @param momentum number of \eqn{[0, 1)} giving the ration of momentum for +#' eucledian gradient update with a momentum term. \code{momentum = 0} +#' corresponds to normal gradient descend. #' @param slack Positive scaling to allow small increases of the loss while -#' optimizing. -#' @param gamma step-size reduction multiple. +#' optimizing, i.e. \code{slack = 0.1} allows the target function to +#' increase up to \eqn{10 \%} in one optimization step. +#' @param gamma step-size reduction multiple. If gradient step with step size +#' \code{tau} is not accepted \code{gamma * tau} is set to the next step +#' size. #' @param V.init Semi-orthogonal matrix of dimensions `(ncol(X), ncol(X) - k) -#' as optimization starting value. (If supplied, \code{attempts} is -#' set to 1 and \code{k} to match dimension) +#' used as starting value in the optimization. (If supplied, +#' \code{attempts} is set to 0 and \code{k} to match dimension). +#' @param logger a logger function (only for advanced user, slows down the +#' computation). #' #' @inherit cve return #' @@ -253,6 +279,7 @@ cve.call <- function(X, Y, method = "simple", stop("Dimension missmatch of 'V.init' and 'X'") } min.dim <- max.dim <- ncol(X) - ncol(V.init) + storage.mode(V.init) <- "double" attempts <- 0L } else if (missing(k) || is.null(k)) { min.dim <- as.integer(min.dim) @@ -320,6 +347,9 @@ cve.call <- function(X, Y, method = "simple", } } + # Convert numerical values to "double". + storage.mode(X) <- storage.mode(Y) <- "double" + if (is.function(logger)) { loggerEnv <- environment(logger) } else { diff --git a/CVE_C/R/coef.R b/CVE/R/coef.R similarity index 91% rename from CVE_C/R/coef.R rename to CVE/R/coef.R index 6d71308..5bc63c9 100644 --- a/CVE_C/R/coef.R +++ b/CVE/R/coef.R @@ -1,8 +1,10 @@ #' Gets estimated SDR basis. #' -#' Returns the SDR basis matrix for SDR dimension(s). +#' Returns the SDR basis matrix for dimension \code{k}, i.e. returns the +#' cve-estimate with dimension \eqn{p\times k}{p x k}. +#' #' @param object instance of \code{cve} as output from \code{\link{cve}} or -#' \code{\link{cve.call}} +#' \code{\link{cve.call}}. #' @param k the SDR dimension. #' @param ... ignored. #' diff --git a/CVE/R/datasets.R b/CVE/R/datasets.R new file mode 100644 index 0000000..14556ef --- /dev/null +++ b/CVE/R/datasets.R @@ -0,0 +1,279 @@ +#' Multivariate Normal Distribution. +#' +#' Random generation for the multivariate normal distribution. +#' \deqn{X \sim N_p(\mu, \Sigma)}{X ~ N_p(\mu, \Sigma)} +#' +#' @param n number of samples. +#' @param mu mean +#' @param sigma covariance matrix. +#' +#' @return a \eqn{n\times p}{n x p} matrix with samples in its rows. +#' +#' @examples +#' \dontrun{ +#' rmvnorm(20, sigma = matrix(c(2, 1, 1, 2), 2)) +#' rmvnorm(20, mu = c(3, -1, 2)) +#' } +#' @keywords internal +rmvnorm <- function(n = 1, mu = rep(0, p), sigma = diag(p)) { + if (!missing(sigma)) { + p <- nrow(sigma) + } else if (!missing(mu)) { + mu <- matrix(mu, ncol = 1) + p <- nrow(mu) + } else { + stop("At least one of 'mu' or 'sigma' must be supplied.") + } + + # See: https://en.wikipedia.org/wiki/Multivariate_normal_distribution + return(rep(mu, each = n) + matrix(rnorm(n * p), n) %*% chol(sigma)) +} + +#' Multivariate t distribution. +#' +#' Random generation from multivariate t distribution (student distribution). +#' +#' @param n number of samples. +#' @param mu mean +#' @param sigma a \eqn{k\times k}{k x k} positive definite matrix. If the degree +#' \eqn{\nu} if bigger than 2 the created covariance is +#' \deqn{var(x) = \Sigma\frac{\nu}{\nu - 2}} +#' for \eqn{\nu > 2}. +#' @param df degree of freedom \eqn{\nu}. +#' +#' @return a \eqn{n\times p}{n x p} matrix with samples in its rows. +#' +#' @examples +#' \dontrun{ +#' rmvt(20, c(0, 1), matrix(c(3, 1, 1, 2), 2), 3) +#' rmvt(20, sigma = matrix(c(2, 1, 1, 2), 2), 3) +#' rmvt(20, mu = c(3, -1, 2), 3) +#' } +#' @keywords internal +rmvt <- function(n = 1, mu = rep(0, p), sigma = diag(p), df = Inf) { + if (!missing(sigma)) { + p <- nrow(sigma) + } else if (!missing(mu)) { + mu <- matrix(mu, ncol = 1) + p <- nrow(mu) + } else { + stop("At least one of 'mu' or 'sigma' must be supplied.") + } + + if (df == Inf) { + Z <- 1 + } else { + Z <- sqrt(df / rchisq(n, df)) + } + + return(rmvnorm(n, sigma = sigma) * Z + rep(mu, each = n)) +} + +#' Generalized Normal Distribution. +#' +#' Random generation for generalized Normal Distribution. +#' +#' @param n Number of generated samples. +#' @param mu mean. +#' @param alpha first shape parameter. +#' @param beta second shape parameter. +#' +#' @return numeric array of length \eqn{n}. +#' +#' @seealso https://en.wikipedia.org/wiki/Generalized_normal_distribution +#' @keywords internal +rgnorm <- function(n = 1, mu = 0, alpha = 1, beta = 1) { + if (alpha <= 0 | beta <= 0) { + stop("alpha and beta must be positive.") + } + lambda <- (1 / alpha)^beta + scales <- qgamma(runif(n), shape = 1 / beta, scale = 1 / lambda)^(1 / beta) + return(scales * ((-1)^rbinom(n, 1, 0.5)) + mu) +} + +#' Laplace distribution +#' +#' Random generation for Laplace distribution. +#' +#' @param n Number of generated samples. +#' @param mu mean. +#' @param sd standard deviation. +#' +#' @return numeric array of length \eqn{n}. +#' +#' @seealso https://en.wikipedia.org/wiki/Laplace_distribution +#' @keywords internal +rlaplace <- function(n = 1, mu = 0, sd = 1) { + U <- runif(n, -0.5, 0.5) + scale <- sd / sqrt(2) + + return(mu - scale * sign(U) * log(1 - 2 * abs(U))) +} + +#' Generates test datasets. +#' +#' Provides sample datasets M1-M7 used in the paper Conditional variance +#' estimation for sufficient dimension reduction, Lukas Fertl, Efstathia Bura. +#' The general model is given by: +#' \deqn{Y = g(B'X) + \epsilon} +#' +#' @param name One of \code{"M1"}, \code{"M2"}, \code{"M3"}, \code{"M4",} +#' \code{"M5"}, \code{"M6"} or \code{"M7"}. Alternative just the dataset number +#' 1-7. +#' @param n number of samples. +#' @param p Dimension of random variable \eqn{X}. +#' @param sd standard diviation for error term \eqn{\epsilon}. +#' @param ... Additional parameters only for "M2" (namely \code{pmix} and +#' \code{lambda}), see: below. +#' +#' @return List with elements +#' \itemize{ +#' \item{X}{data, a \eqn{n\times p}{n x p} matrix.} +#' \item{Y}{response.} +#' \item{B}{the dim-reduction matrix} +#' \item{name}{Name of the dataset (name parameter)} +#' } +#' +#' @section M1: +#' The predictors are distributed as +#' \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, \Sigma)} with +#' \eqn{\Sigma_{i, j} = 0.5^{|i - j|}}{\Sigma_ij = 0.5^|i - j|} for +#' \eqn{i, j = 1,..., p} for a subspace dimension of \eqn{k = 1} with a default +#' of \eqn{n = 100} data points. \eqn{p = 20}, +#' \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)}, and \eqn{Y} is +#' given as \deqn{Y = cos(b_1'X) + \epsilon} where \eqn{\epsilon} is +#' distributed as generalized normal distribution with location 0, +#' shape-parameter 0.5, and the scale-parameter is chosen such that +#' \eqn{Var(\epsilon) = 0.5}. +#' @section M2: +#' The predictors are distributed as \eqn{X \sim Z 1_p \lambda + N_p(0, I_p)}{X ~ Z 1_p \lambda + N_p(0, I_p)}. with +#' \eqn{Z \sim 2 Binom(p_{mix}) - 1\in\{-1, 1\}}{Z~2Binom(pmix)-1} where +#' \eqn{1_p} is the \eqn{p}-dimensional vector of one's, for a subspace +#' dimension of \eqn{k = 1} with a default of \eqn{n = 100} data points. +#' \eqn{p = 20}, \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)}, +#' and \eqn{Y} is \deqn{Y = cos(b_1'X) + 0.5\epsilon} where \eqn{\epsilon} is +#' standard normal. +#' Defaults for \code{pmix} is 0.3 and \code{lambda} defaults to 1. +#' @section M3: +#' The predictors are distributed as \eqn{X\sim N_p(0, I_p)}{X~N_p(0, I_p)} +#' for a subspace +#' dimension of \eqn{k = 1} with a default of \eqn{n = 100} data points. +#' \eqn{p = 20}, \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)}, +#' and \eqn{Y} is +#' \deqn{Y = 2 log(|b_1'X| + 2) + 0.5\epsilon} where \eqn{\epsilon} is +#' standard normal. +#' @section M4: +#' The predictors are distributed as \eqn{X\sim N_p(0,\Sigma)}{X~N_p(0,\Sigma)} +#' with \eqn{\Sigma_{i, j} = 0.5^{|i - j|}}{\Sigma_ij = 0.5^|i - j|} for +#' \eqn{i, j = 1,..., p} for a subspace dimension of \eqn{k = 2} with a default +#' of \eqn{n = 100} data points. \eqn{p = 20}, +#' \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)}, +#' \eqn{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / sqrt(6)} +#' and \eqn{Y} is given as \deqn{Y = \frac{b_1'X}{0.5 + (1.5 + b_2'X)^2} + 0.5\epsilon}{Y = (b_1'X) / (0.5 + (1.5 + b_2'X)^2) + 0.5\epsilon} +#' where \eqn{\epsilon} is standard normal. +#' @section M5: +#' The predictors are distributed as \eqn{X\sim U([0,1]^p)}{X~U([0, 1]^p)} +#' where \eqn{U([0, 1]^p)} is the uniform distribution with +#' independent components on the \eqn{p}-dimensional hypercube for a subspace +#' dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points. +#' \eqn{p = 20}, +#' \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)}, +#' \eqn{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / sqrt(6)} +#' and \eqn{Y} is given as \deqn{Y = cos(\pi b_1'X)(b_2'X + 1)^2 + 0.5\epsilon} +#' where \eqn{\epsilon} is standard normal. +#' @section M6: +#' The predictors are distributed as \eqn{X\sim N_p(0, I_p)}{X~N_p(0, I_p)} +#' for a subspace dimension of \eqn{k = 3} with a default of \eqn{n = 200} data +#' point. \eqn{p = 20, b_1 = e_1, b_2 = e_2}, and \eqn{b_3 = e_p}, where +#' \eqn{e_j} is the \eqn{j}-th unit vector in the \eqn{p}-dimensional space. +#' \eqn{Y} is given as \deqn{Y = (b_1'X)^2+(b_2'X)^2+(b_3'X)^2+0.5\epsilon} +#' where \eqn{\epsilon} is standard normal. +#' @section M7: +#' The predictors are distributed as \eqn{X\sim t_3(I_p)}{X~t_3(I_p)} where +#' \eqn{t_3(I_p)} is the standard multivariate t-distribution with 3 degrees of +#' freedom, for a subspace dimension of \eqn{k = 4} with a default of +#' \eqn{n = 200} data points. +#' \eqn{p = 20, b_1 = e_1, b_2 = e_2, b_3 = e_3}, and \eqn{b_4 = e_p}, where +#' \eqn{e_j} is the \eqn{j}-th unit vector in the \eqn{p}-dimensional space. +#' \eqn{Y} is given as \deqn{Y = (b_1'X)(b_2'X)^2+(b_3'X)(b_4'X)+0.5\epsilon} +#' where \eqn{\epsilon} is distributed as generalized normal distribution with +#' location 0, shape-parameter 1, and the scale-parameter is chosen such that +#' \eqn{Var(\epsilon) = 0.25}. +#' +#' @references Fertl Lukas, Bura Efstathia. (2019), Conditional Variance +#' Estimation for Sufficient Dimension Reduction. Working Paper. +#' +#' @import stats +#' @importFrom stats rnorm rbinom +#' @export +dataset <- function(name = "M1", n = NULL, p = 20, sd = 0.5, ...) { + name <- toupper(name) + if (nchar(name) == 1) { name <- paste0("M", name) } + + if (name == "M1") { + if (missing(n)) { n <- 100 } + # B ... `p x 1` + B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, p - 6)), ncol = 1) + X <- rmvnorm(n, sigma = 0.5^abs(outer(1:p, 1:p, FUN = `-`))) + beta <- 0.5 + Y <- cos(X %*% B) + rgnorm(n, 0, + alpha = sqrt(sd^2 * gamma(1 / beta) / gamma(3 / beta)), + beta = beta + ) + } else if (name == "M2") { + if (missing(n)) { n <- 100 } + params <- list(...) + pmix <- if (is.null(params$pmix)) { 0.3 } else { params$pmix } + lambda <- if (is.null(params$lambda)) { 1 } else { params$lambda } + # B ... `p x 1` + B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, p - 6)), ncol = 1) + Z <- 2 * rbinom(n, 1, pmix) - 1 + X <- matrix(rep(lambda * Z, p) + rnorm(n * p), n) + Y <- cos(X %*% B) + rnorm(n, 0, sd) + } else if (name == "M3") { + if (missing(n)) { n <- 100 } + # B ... `p x 1` + B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, p - 6)), ncol = 1) + X <- matrix(rnorm(n * p), n) + Y <- 2 * log(2 + abs(X %*% B)) + rnorm(n, 0, sd) + } else if (name == "M4") { + if (missing(n)) { n <- 200 } + # B ... `p x 2` + B <- cbind( + c(rep(1 / sqrt(6), 6), rep(0, p - 6)), + c(rep(c(1, -1), 3) / sqrt(6), rep(0, p - 6)) + ) + X <- rmvnorm(n, sigma = 0.5^abs(outer(1:p, 1:p, FUN = `-`))) + XB <- X %*% B + Y <- (XB[, 1]) / (0.5 + (XB[, 2] + 1.5)^2) + rnorm(n, 0, sd) + } else if (name == "M5") { + if (missing(n)) { n <- 200 } + # B ... `p x 2` + B <- cbind( + c(rep(1, 6), rep(0, p - 6)), + c(rep(c(1, -1), 3), rep(0, p - 6)) + ) / sqrt(6) + X <- matrix(runif(n * p), n) + XB <- X %*% B + Y <- cos(XB[, 1] * pi) * (XB[, 2] + 1)^2 + rnorm(n, 0, sd) + } else if (name == "M6") { + if (missing(n)) { n <- 200 } + # B ... `p x 3` + B <- diag(p)[, -(3:(p - 1))] + X <- matrix(rnorm(n * p), n) + Y <- rowSums((X %*% B)^2) + rnorm(n, 0, sd) + } else if (name == "M7") { + if (missing(n)) { n <- 400 } + # B ... `p x 4` + B <- diag(p)[, -(4:(p - 1))] + # "R"andom "M"ulti"V"ariate "S"tudent + X <- rmvt(n = n, sigma = diag(p), df = 3) + XB <- X %*% B + Y <- (XB[, 1]) * (XB[, 2])^2 + (XB[, 3]) * (XB[, 4]) + Y <- Y + rlaplace(n, 0, sd) + } else { + stop("Got unknown dataset name.") + } + + return(list(X = X, Y = Y, B = B, name = name)) +} diff --git a/CVE_C/R/directions.R b/CVE/R/directions.R similarity index 76% rename from CVE_C/R/directions.R rename to CVE/R/directions.R index 4546539..ba648e5 100644 --- a/CVE_C/R/directions.R +++ b/CVE/R/directions.R @@ -5,9 +5,15 @@ directions <- function(dr, k) { #' Computes projected training data \code{X} for given dimension `k`. #' -#' @param dr Instance of 'cve' as returned by \code{cve}. +#' Projects the dimensional design matrix \eqn{X} on the columnspace of the +#' cve-estimate for given dimension \eqn{k}. +#' +#' @param dr Instance of \code{'cve'} as returned by \code{\link{cve}}. #' @param k SDR dimension to use for projection. #' +#' @return the \eqn{n\times k}{n x k} dimensional matrix \eqn{X B} where \eqn{B} +#' is the cve-estimate for dimension \eqn{k}. +#' #' @examples #' # create B for simulation (k = 1) #' B <- rep(1, 5) / sqrt(5) diff --git a/CVE_C/R/estimateBandwidth.R b/CVE/R/estimateBandwidth.R similarity index 56% rename from CVE_C/R/estimateBandwidth.R rename to CVE/R/estimateBandwidth.R index fa521dc..bc671c9 100644 --- a/CVE_C/R/estimateBandwidth.R +++ b/CVE/R/estimateBandwidth.R @@ -1,27 +1,22 @@ #' Bandwidth estimation for CVE. #' -#' Estimates a bandwidth \code{h} according +#' If no bandwidth or function for calculating it is supplied, the CVE method +#' defaults to using the following formula (version 1) #' \deqn{% -#' h = (2 * tr(\Sigma) / p) * (1.2 * n^{-1 / (4 + k)})^2}{% -#' h = (2 * tr(\Sigma) / p) * (1.2 * n^(\frac{-1}{4 + k}))^2} -#' with \eqn{n} the sample size, \eqn{p} its dimension -#' (\code{n <- nrow(X); p <- ncol(X)}) and the covariance-matrix \eqn{\Sigma} -#' which is \code{(n-1)/n} times the sample covariance estimate. +#' h = \frac{2 tr(\Sigma)}{p} (1.2 n^{\frac{-1}{4 + k}})^2}{% +#' h = (2 * tr(\Sigma) / p) * (1.2 * n^(-1 / (4 + k)))^2} +#' Alternative version 2 is used for dimension prediction which is given by +#' \deqn{% +#' h = (2 * tr(\Sigma) / p) * \chi_k^{-1}(\frac{nObs - 1}{n - 1})}{% +#' h = (2 * tr(\Sigma) / p) * \chi_k^-1((nObs - 1) / (n - 1))} +#' with \eqn{n} the sample size, \eqn{p} its dimension and the +#' covariance-matrix \eqn{\Sigma}, which is \code{(n-1)/n} times the sample +#' covariance estimate. #' -#' @param X data matrix with samples in its rows. +#' @param X a \eqn{n\times p}{n x p} matrix with samples in its rows. #' @param k Dimension of lower dimensional projection. -#' @param nObs number of points in a slice, see \eqn{nObs} in CVE paper. -#' @param version either \code{1} or \code{2}, where -#' \itemize{ -#' \item 1: uses the following formula: -#' \deqn{% -#' h = (2 * tr(\Sigma) / p) * (1.2 * n^{-1 / (4 + k)})^2}{% -#' h = (2 * tr(\Sigma) / p) * (1.2 * n^(\frac{-1}{4 + k}))^2} -#' \item 2: uses -#' \deqn{% -#' h = (2 * tr(\Sigma) / p) * \chi_k^-1((nObs - 1) / (n - 1))}{% -#' h = (2 * tr(\Sigma) / p) * \chi_k^{-1}(\frac{nObs - 1}{n - 1})} -#' } +#' @param nObs number of points in a slice, only for version 2. +#' @param version either \code{1} or \code{2}. #' #' @return Estimated bandwidth \code{h}. #' diff --git a/CVE_C/R/plot.R b/CVE/R/plot.R similarity index 79% rename from CVE_C/R/plot.R rename to CVE/R/plot.R index d981fff..df3a43c 100644 --- a/CVE_C/R/plot.R +++ b/CVE/R/plot.R @@ -1,6 +1,9 @@ #' Loss distribution elbow plot. #' -#' Boxplots of the loss from \code{min.dim} to \code{max.dim} \code{k} values. +#' Boxplots of the output \code{L} from \code{\link{cve}} over \code{k} from +#' \code{min.dim} to \code{max.dim}. For given \code{k}, \code{L} corresponds +#' to \eqn{L_n(V, X_i)} where \eqn{V \in S(p, p - k)}{V} is the minimizer of +#' \eqn{L_n(V)}, for further details see the paper. #' #' @param x Object of class \code{"cve"} (result of [\code{\link{cve}}]). #' @param ... Pass through parameters to [\code{\link{plot}}] and @@ -31,6 +34,9 @@ #' # elbow plot #' plot(cve.obj.simple) #' +#' @references Fertl Lukas, Bura Efstathia. (2019), Conditional Variance +#' Estimation for Sufficient Dimension Reduction. Working Paper. +#' #' @seealso see \code{\link{par}} for graphical parameters to pass through #' as well as \code{\link{plot}}, the standard plot utility. #' @method plot cve diff --git a/CVE_C/R/predict.R b/CVE/R/predict.R similarity index 88% rename from CVE_C/R/predict.R rename to CVE/R/predict.R index f882d27..703395f 100644 --- a/CVE_C/R/predict.R +++ b/CVE/R/predict.R @@ -1,6 +1,7 @@ #' Predict method for CVE Fits. #' -#' Predict responces using reduced data with \code{\link{mars}}. +#' Predict response using projected data where the forward model \eqn{g(B'X)} +#' is estimated using \code{\link{mars}}. #' #' @param object instance of class \code{cve} (result of \code{cve}, #' \code{cve.call}). @@ -36,7 +37,7 @@ #' #' # plot prediction against y.test #' plot(yhat, y.test) -#' @seealso \code{\link{cve}}, \code{\link{cve.call}} or \pkg{\link{mars}}. +#' @seealso \code{\link{cve}}, \code{\link{cve.call}} and \pkg{\link{mars}}. #' #' @rdname predict.cve #' diff --git a/CVE_C/R/predict_dim.R b/CVE/R/predict_dim.R similarity index 77% rename from CVE_C/R/predict_dim.R rename to CVE/R/predict_dim.R index 4fec72e..3e390b1 100644 --- a/CVE_C/R/predict_dim.R +++ b/CVE/R/predict_dim.R @@ -36,10 +36,6 @@ predict_dim_elbow <- function(object) { # Get dimensions n <- nrow(X) p <- ncol(X) - # Compute persistent data. - i = rep(1:n, n) - j = rep(1:n, each = n) - D.eucl = matrix((X[i, ] - X[j, ])^2 %*% rep(1, p), n) losses <- vector("double", length(object$res)) names(losses) <- names(object$res) @@ -48,16 +44,16 @@ predict_dim_elbow <- function(object) { # extract dimension specific estimates and dimensions. k <- dr.k$k V <- dr.k$V - q <- ncol(V) - # estimate bandwidth according alternative formula (see: TODO: see) + # estimate bandwidth according alternative formula. h <- estimate.bandwidth(X, k, sqrt(n), version = 2L) # Projected `X` - XV <- X %*% V - # Devectorized distance matrix - # (inefficient in R but fast in C) - D <- matrix((XV[i, , drop = F] - XV[j, , drop = F])^2 %*% rep(1, q), n) - D <- D.eucl - D + XQ <- X %*% (diag(1, p) - tcrossprod(V)) # X (I - V V') + # Compute distances + d2 <- tcrossprod(XQ) # XQ XQ' + d1 <- matrix(diag(d2), n, n) + D <- d1 - 2 * d2 + t(d1) # Apply kernel + # Note: CVE uses for d = ||Q(X_i - X_j)|| the kernel exp(-d^4 / (2 h^2)) K <- exp((-0.5 / h^2) * D^2) # sum columns colSumsK <- colSums(K) @@ -81,11 +77,7 @@ predict_dim_wilcoxon <- function(object, p.value = 0.05) { # Get dimensions n <- nrow(X) p <- ncol(X) - # Compute persistent data. - i = rep(1:n, n) - j = rep(1:n, each = n) - D.eucl = matrix((X[i, ] - X[j, ])^2 %*% rep(1, p), n) - + L <- matrix(NA, n, length(object$res)) colnames(L) <- names(object$res) # Compute per sample losses with alternative bandwidth for each dimension. @@ -93,16 +85,16 @@ predict_dim_wilcoxon <- function(object, p.value = 0.05) { # extract dimension specific estimates and dimensions. k <- dr.k$k V <- dr.k$V - q <- ncol(V) - # estimate bandwidth according alternative formula (see: TODO: see) + # estimate bandwidth according alternative formula. h <- estimate.bandwidth(X, k, sqrt(n), version = 2L) # Projected `X` - XV <- X %*% V - # Devectorized distance matrix - # (inefficient in R but fast in C) - D <- matrix((XV[i, , drop = F] - XV[j, , drop = F])^2 %*% rep(1, q), n) - D <- D.eucl - D + XQ <- X %*% (diag(1, p) - tcrossprod(V)) # X (I - V V') + # Compute distances + d2 <- tcrossprod(XQ) # XQ XQ' + d1 <- matrix(diag(d2), n, n) + D <- d1 - 2 * d2 + t(d1) # Apply kernel + # Note: CVE uses for d = ||Q(X_i - X_j)|| the kernel exp(-d^4 / (2 h^2)) K <- exp((-0.5 / h^2) * D^2) # sum columns colSumsK <- colSums(K) @@ -130,27 +122,24 @@ predict_dim_wilcoxon <- function(object, p.value = 0.05) { )) } -#' Predicts SDR dimension using \code{\link[mda]{mars}} via a Cross-Validation. -#' TODO: rewrite!!! +#' \code{"TODO: @Lukas"} #' -#' @param object instance of class \code{cve} (result of \code{cve}, -#' \code{cve.call}). +#' @param object instance of class \code{cve} (result of \code{\link{cve}}, +#' \code{\link{cve.call}}). +#' @param method one of \code{"CV"}, \code{"elbow"} or \code{"wilcoxon"}. #' @param ... ignored. #' -#' @return list with -#' \itemize{ -#' \item MSE: Mean Square Error, -#' \item k: predicted dimensions. -#' } +#' @return list with \code{"k"} the predicted dimension and method dependent +#' informatoin. #' -#' @section cv: -#' Cross-validation ... TODO: +#' @section Method cv: +#' TODO: \code{"TODO: @Lukas"}. #' -#' @section elbow: -#' Cross-validation ... TODO: +#' @section Method elbow: +#' TODO: \code{"TODO: @Lukas"}. #' -#' @section wilcoxon: -#' Cross-validation ... TODO: +#' @section Method wilcoxon: +#' TODO: \code{"TODO: @Lukas"}. #' #' @examples #' # create B for simulation diff --git a/CVE_C/R/summary.R b/CVE/R/summary.R similarity index 86% rename from CVE_C/R/summary.R rename to CVE/R/summary.R index a50d6d6..90a9cfe 100644 --- a/CVE_C/R/summary.R +++ b/CVE/R/summary.R @@ -1,5 +1,9 @@ #' Prints a summary of a \code{cve} result. -#' @param object Instance of 'cve' as returned by \code{cve}. +#' +#' Prints a summary statistics of output \code{L} from \code{cve} for +#' \code{k = min.dim, ..., max.dim}. +#' +#' @param object Instance of \code{"cve"} as returned by \code{\link{cve}}. #' @param ... ignored. #' #' @examples diff --git a/CVE/R/util.R b/CVE/R/util.R new file mode 100644 index 0000000..c91954e --- /dev/null +++ b/CVE/R/util.R @@ -0,0 +1,24 @@ +#' Draws a sample from the invariant measure on the Stiefel manifold +#' \eqn{S(p, q)}. +#' +#' @param p row dimension +#' @param q col dimension +#' @return \eqn{p \times q}{p x q} semi-orthogonal matrix. +#' @examples +#' V <- rStiefel(6, 4) +#' @export +rStiefel <- function(p, q) { + return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q)))) +} + +#' Null space basis of given matrix `V` +#' +#' @param V `(p, q)` matrix +#' @return Semi-orthogonal `(p, p - q)` matrix spaning the null space of `V`. +#' @keywords internal +#' @export +null <- function(V) { + tmp <- qr(V) + set <- if(tmp$rank == 0L) seq_len(ncol(V)) else -seq_len(tmp$rank) + return(qr.Q(tmp, complete = TRUE)[, set, drop = FALSE]) +} diff --git a/CVE_C/inst/doc/CVE_paper.pdf b/CVE/inst/doc/CVE_paper.pdf similarity index 100% rename from CVE_C/inst/doc/CVE_paper.pdf rename to CVE/inst/doc/CVE_paper.pdf diff --git a/CVE_C/man/CVE-package.Rd b/CVE/man/CVE-package.Rd similarity index 96% rename from CVE_C/man/CVE-package.Rd rename to CVE/man/CVE-package.Rd index 2ab4c5e..6ee7bb4 100644 --- a/CVE_C/man/CVE-package.Rd +++ b/CVE/man/CVE-package.Rd @@ -26,7 +26,7 @@ variance-covariance matrix \eqn{Var(X) = \Sigma_X}. \eqn{\epsilon} is a mean zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g} is an unknown, continuous non-constant function, and \eqn{B = (b_1, ..., b_k)} is -a real \eqn{p \times k}{p x k} of rank \eqn{k <= p}{k \leq p}. +a real \eqn{p \times k}{p x k} of rank \eqn{k \leq p}{k <= p}. Without loss of generality \eqn{B} is assumed to be orthonormal. } \references{ diff --git a/CVE_C/man/coef.cve.Rd b/CVE/man/coef.cve.Rd similarity index 90% rename from CVE_C/man/coef.cve.Rd rename to CVE/man/coef.cve.Rd index 6ca80b0..c3c4cff 100644 --- a/CVE_C/man/coef.cve.Rd +++ b/CVE/man/coef.cve.Rd @@ -8,7 +8,7 @@ } \arguments{ \item{object}{instance of \code{cve} as output from \code{\link{cve}} or -\code{\link{cve.call}}} +\code{\link{cve.call}}.} \item{k}{the SDR dimension.} @@ -18,7 +18,8 @@ dir the matrix of CS or CMS of given dimension } \description{ -Returns the SDR basis matrix for SDR dimension(s). +Returns the SDR basis matrix for dimension \code{k}, i.e. returns the +cve-estimate with dimension \eqn{p\times k}{p x k}. } \examples{ # set dimensions for simulation model diff --git a/CVE_C/man/cve.Rd b/CVE/man/cve.Rd similarity index 70% rename from CVE_C/man/cve.Rd rename to CVE/man/cve.Rd index 4e9a159..94f500c 100644 --- a/CVE_C/man/cve.Rd +++ b/CVE/man/cve.Rd @@ -8,31 +8,51 @@ cve(formula, data, method = "simple", max.dim = 10L, ...) } \arguments{ \item{formula}{an object of class \code{"formula"} which is a symbolic -description of the model to be fitted.} +description of the model to be fitted like \eqn{Y\sim X}{Y ~ X} where +\eqn{Y} is a \eqn{n}-dimensional vector of the response variable and +\eqn{X} is a \eqn{n\times p}{n x p} matrix of the predictors.} \item{data}{an optional data frame, containing the data for the formula if -supplied.} +supplied like \code{data <- data.frame(Y, X)} with dimension +\eqn{n \times (p + 1)}{n x (p + 1)}. By default the variables are taken from +the environment from which \code{cve} is called.} -\item{method}{specifies the CVE method variation as one of +\item{method}{This character string specifies the method of fitting. The +options are \itemize{ - \item "simple" exact implementation as described in the paper listed - below. - \item "weighted" variation with addaptive weighting of slices. -}} + \item "simple" implementation as described in the paper. + \item "weighted" variation with adaptive weighting of slices. +} +see paper.} \item{max.dim}{upper bounds for \code{k}, (ignored if \code{k} is supplied).} -\item{...}{Parameters passed on to \code{cve.call}.} +\item{...}{optional parameters passed on to \code{cve.call}.} } \value{ an S3 object of class \code{cve} with components: \describe{ - \item{X}{Original training data,} - \item{Y}{Responce of original training data,} + \item{X}{design matrix of predictor vector used for calculating + cve-estimate,} + \item{Y}{\eqn{n}-dimensional vector of responses used for calculating + cve-estimate,} \item{method}{Name of used method,} \item{call}{the matched call,} - \item{res}{list of components \code{V, L, B, loss, h} and \code{k} for - each \eqn{k=min.dim,...,max.dim} (dimension).} + \item{res}{list of components \code{V, L, B, loss, h} for + each \code{k = min.dim, ..., max.dim}. If \code{k} was supplied in the + call \code{min.dim = max.dim = k}. + \itemize{ + \item \code{B} is the cve-estimate with dimension + \eqn{p\times k}{p x k}. + \item \code{V} is the orthogonal complement of \eqn{B}. + \item \code{L} is the loss for each sample seperatels such that + it's mean is \code{loss}. + \item \code{loss} is the value of the target function that is + minimized, evaluated at \eqn{V}. + \item \code{h} bandwidth parameter used to calculate + \code{B, V, loss, L}. + } + } } } \description{ @@ -56,7 +76,7 @@ variance-covariance matrix \eqn{Var(X) = \Sigma_X}. \eqn{\epsilon} is a mean zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g} is an unknown, continuous non-constant function, and \eqn{B = (b_1, ..., b_k)} is -a real \eqn{p \times k}{p x k} of rank \eqn{k <= p}{k \leq p}. +a real \eqn{p \times k}{p x k} of rank \eqn{k \leq p}{k <= p}. Without loss of generality \eqn{B} is assumed to be orthonormal. } \examples{ @@ -67,7 +87,7 @@ k <- 2 b1 <- rep(1 / sqrt(p), p) b2 <- (-1)^seq(1, p) / sqrt(p) B <- cbind(b1, b2) -# samplsize +# sample size n <- 200 set.seed(21) # creat predictor data x ~ N(0, I_p) diff --git a/CVE_C/man/cve.call.Rd b/CVE/man/cve.call.Rd similarity index 64% rename from CVE_C/man/cve.call.Rd rename to CVE/man/cve.call.Rd index 9b6f0d4..f3bef08 100644 --- a/CVE_C/man/cve.call.Rd +++ b/CVE/man/cve.call.Rd @@ -10,9 +10,9 @@ cve.call(X, Y, method = "simple", nObs = sqrt(nrow(X)), h = NULL, max.iter = 50L, attempts = 10L, logger = NULL) } \arguments{ -\item{X}{data matrix with samples in its rows.} +\item{X}{Design matrix with dimension \eqn{n\times p}{n x p}.} -\item{Y}{Responses (1 dimensional).} +\item{Y}{numeric array of length \eqn{n} of Responses.} \item{method}{specifies the CVE method variation as one of \itemize{ @@ -34,38 +34,59 @@ estimated bandwidth.} \item{k}{Dimension of lower dimensional projection, if \code{k} is given only the specified dimension \code{B} matrix is estimated.} -\item{momentum}{number of [0, 1) giving the ration of momentum for eucledian -gradient update with a momentum term.} +\item{momentum}{number of \eqn{[0, 1)} giving the ration of momentum for +eucledian gradient update with a momentum term. \code{momentum = 0} +corresponds to normal gradient descend.} \item{tau}{Initial step-size.} \item{tol}{Tolerance for break condition.} \item{slack}{Positive scaling to allow small increases of the loss while -optimizing.} +optimizing, i.e. \code{slack = 0.1} allows the target function to +increase up to \eqn{10 \%} in one optimization step.} -\item{gamma}{step-size reduction multiple.} +\item{gamma}{step-size reduction multiple. If gradient step with step size +\code{tau} is not accepted \code{gamma * tau} is set to the next step +size.} \item{V.init}{Semi-orthogonal matrix of dimensions `(ncol(X), ncol(X) - k) -as optimization starting value. (If supplied, \code{attempts} is -set to 1 and \code{k} to match dimension)} +used as starting value in the optimization. (If supplied, +\code{attempts} is set to 0 and \code{k} to match dimension).} \item{max.iter}{maximum number of optimization steps.} -\item{attempts}{number of arbitrary different starting points.} +\item{attempts}{If \code{V.init} not supplied, the optimization is carried +out \code{attempts} times with starting values drawn from the invariant +measure on the Stiefel manifold (see \code{\link{rStiefel}}).} -\item{logger}{a logger function (only for advanced user, significantly slows -down the computation).} +\item{logger}{a logger function (only for advanced user, slows down the +computation).} } \value{ an S3 object of class \code{cve} with components: \describe{ - \item{X}{Original training data,} - \item{Y}{Responce of original training data,} + \item{X}{design matrix of predictor vector used for calculating + cve-estimate,} + \item{Y}{\eqn{n}-dimensional vector of responses used for calculating + cve-estimate,} \item{method}{Name of used method,} \item{call}{the matched call,} - \item{res}{list of components \code{V, L, B, loss, h} and \code{k} for - each \eqn{k=min.dim,...,max.dim} (dimension).} + \item{res}{list of components \code{V, L, B, loss, h} for + each \code{k = min.dim, ..., max.dim}. If \code{k} was supplied in the + call \code{min.dim = max.dim = k}. + \itemize{ + \item \code{B} is the cve-estimate with dimension + \eqn{p\times k}{p x k}. + \item \code{V} is the orthogonal complement of \eqn{B}. + \item \code{L} is the loss for each sample seperatels such that + it's mean is \code{loss}. + \item \code{loss} is the value of the target function that is + minimized, evaluated at \eqn{V}. + \item \code{h} bandwidth parameter used to calculate + \code{B, V, loss, L}. + } + } } } \description{ @@ -89,7 +110,7 @@ variance-covariance matrix \eqn{Var(X) = \Sigma_X}. \eqn{\epsilon} is a mean zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g} is an unknown, continuous non-constant function, and \eqn{B = (b_1, ..., b_k)} is -a real \eqn{p \times k}{p x k} of rank \eqn{k <= p}{k \leq p}. +a real \eqn{p \times k}{p x k} of rank \eqn{k \leq p}{k <= p}. Without loss of generality \eqn{B} is assumed to be orthonormal. } \examples{ diff --git a/CVE/man/dataset.Rd b/CVE/man/dataset.Rd new file mode 100644 index 0000000..69999bd --- /dev/null +++ b/CVE/man/dataset.Rd @@ -0,0 +1,127 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/datasets.R +\name{dataset} +\alias{dataset} +\title{Generates test datasets.} +\usage{ +dataset(name = "M1", n = NULL, p = 20, sd = 0.5, ...) +} +\arguments{ +\item{name}{One of \code{"M1"}, \code{"M2"}, \code{"M3"}, \code{"M4",} +\code{"M5"}, \code{"M6"} or \code{"M7"}. Alternative just the dataset number +1-7.} + +\item{n}{number of samples.} + +\item{p}{Dimension of random variable \eqn{X}.} + +\item{sd}{standard diviation for error term \eqn{\epsilon}.} + +\item{...}{Additional parameters only for "M2" (namely \code{pmix} and +\code{lambda}), see: below.} +} +\value{ +List with elements +\itemize{ + \item{X}{data, a \eqn{n\times p}{n x p} matrix.} + \item{Y}{response.} + \item{B}{the dim-reduction matrix} + \item{name}{Name of the dataset (name parameter)} +} +} +\description{ +Provides sample datasets M1-M7 used in the paper Conditional variance +estimation for sufficient dimension reduction, Lukas Fertl, Efstathia Bura. +The general model is given by: +\deqn{Y = g(B'X) + \epsilon} +} +\section{M1}{ + +The predictors are distributed as +\eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, \Sigma)} with +\eqn{\Sigma_{i, j} = 0.5^{|i - j|}}{\Sigma_ij = 0.5^|i - j|} for +\eqn{i, j = 1,..., p} for a subspace dimension of \eqn{k = 1} with a default +of \eqn{n = 100} data points. \eqn{p = 20}, +\eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)}, and \eqn{Y} is +given as \deqn{Y = cos(b_1'X) + \epsilon} where \eqn{\epsilon} is +distributed as generalized normal distribution with location 0, +shape-parameter 0.5, and the scale-parameter is chosen such that +\eqn{Var(\epsilon) = 0.5}. +} + +\section{M2}{ + +The predictors are distributed as \eqn{X \sim Z 1_p \lambda + N_p(0, I_p)}{X ~ Z 1_p \lambda + N_p(0, I_p)}. with +\eqn{Z \sim 2 Binom(p_{mix}) - 1\in\{-1, 1\}}{Z~2Binom(pmix)-1} where +\eqn{1_p} is the \eqn{p}-dimensional vector of one's, for a subspace +dimension of \eqn{k = 1} with a default of \eqn{n = 100} data points. +\eqn{p = 20}, \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)}, +and \eqn{Y} is \deqn{Y = cos(b_1'X) + 0.5\epsilon} where \eqn{\epsilon} is +standard normal. +Defaults for \code{pmix} is 0.3 and \code{lambda} defaults to 1. +} + +\section{M3}{ + +The predictors are distributed as \eqn{X\sim N_p(0, I_p)}{X~N_p(0, I_p)} +for a subspace +dimension of \eqn{k = 1} with a default of \eqn{n = 100} data points. +\eqn{p = 20}, \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)}, +and \eqn{Y} is +\deqn{Y = 2 log(|b_1'X| + 2) + 0.5\epsilon} where \eqn{\epsilon} is +standard normal. +} + +\section{M4}{ + +The predictors are distributed as \eqn{X\sim N_p(0,\Sigma)}{X~N_p(0,\Sigma)} +with \eqn{\Sigma_{i, j} = 0.5^{|i - j|}}{\Sigma_ij = 0.5^|i - j|} for +\eqn{i, j = 1,..., p} for a subspace dimension of \eqn{k = 2} with a default +of \eqn{n = 100} data points. \eqn{p = 20}, +\eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)}, +\eqn{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / sqrt(6)} +and \eqn{Y} is given as \deqn{Y = \frac{b_1'X}{0.5 + (1.5 + b_2'X)^2} + 0.5\epsilon}{Y = (b_1'X) / (0.5 + (1.5 + b_2'X)^2) + 0.5\epsilon} +where \eqn{\epsilon} is standard normal. +} + +\section{M5}{ + +The predictors are distributed as \eqn{X\sim U([0,1]^p)}{X~U([0, 1]^p)} +where \eqn{U([0, 1]^p)} is the uniform distribution with +independent components on the \eqn{p}-dimensional hypercube for a subspace +dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points. +\eqn{p = 20}, +\eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)}, +\eqn{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / sqrt(6)} +and \eqn{Y} is given as \deqn{Y = cos(\pi b_1'X)(b_2'X + 1)^2 + 0.5\epsilon} +where \eqn{\epsilon} is standard normal. +} + +\section{M6}{ + +The predictors are distributed as \eqn{X\sim N_p(0, I_p)}{X~N_p(0, I_p)} +for a subspace dimension of \eqn{k = 3} with a default of \eqn{n = 200} data +point. \eqn{p = 20, b_1 = e_1, b_2 = e_2}, and \eqn{b_3 = e_p}, where +\eqn{e_j} is the \eqn{j}-th unit vector in the \eqn{p}-dimensional space. +\eqn{Y} is given as \deqn{Y = (b_1'X)^2+(b_2'X)^2+(b_3'X)^2+0.5\epsilon} +where \eqn{\epsilon} is standard normal. +} + +\section{M7}{ + +The predictors are distributed as \eqn{X\sim t_3(I_p)}{X~t_3(I_p)} where +\eqn{t_3(I_p)} is the standard multivariate t-distribution with 3 degrees of +freedom, for a subspace dimension of \eqn{k = 4} with a default of +\eqn{n = 200} data points. +\eqn{p = 20, b_1 = e_1, b_2 = e_2, b_3 = e_3}, and \eqn{b_4 = e_p}, where +\eqn{e_j} is the \eqn{j}-th unit vector in the \eqn{p}-dimensional space. +\eqn{Y} is given as \deqn{Y = (b_1'X)(b_2'X)^2+(b_3'X)(b_4'X)+0.5\epsilon} +where \eqn{\epsilon} is distributed as generalized normal distribution with +location 0, shape-parameter 1, and the scale-parameter is chosen such that +\eqn{Var(\epsilon) = 0.25}. +} + +\references{ +Fertl Lukas, Bura Efstathia. (2019), Conditional Variance +Estimation for Sufficient Dimension Reduction. Working Paper. +} diff --git a/CVE_C/man/directions.cve.Rd b/CVE/man/directions.cve.Rd similarity index 73% rename from CVE_C/man/directions.cve.Rd rename to CVE/man/directions.cve.Rd index 3abd2db..acc8164 100644 --- a/CVE_C/man/directions.cve.Rd +++ b/CVE/man/directions.cve.Rd @@ -8,12 +8,17 @@ \method{directions}{cve}(dr, k) } \arguments{ -\item{dr}{Instance of 'cve' as returned by \code{cve}.} +\item{dr}{Instance of \code{'cve'} as returned by \code{\link{cve}}.} \item{k}{SDR dimension to use for projection.} } +\value{ +the \eqn{n\times k}{n x k} dimensional matrix \eqn{X B} where \eqn{B} + is the cve-estimate for dimension \eqn{k}. +} \description{ -Computes projected training data \code{X} for given dimension `k`. +Projects the dimensional design matrix \eqn{X} on the columnspace of the +cve-estimate for given dimension \eqn{k}. } \examples{ # create B for simulation (k = 1) diff --git a/CVE_C/man/estimate.bandwidth.Rd b/CVE/man/estimate.bandwidth.Rd similarity index 50% rename from CVE_C/man/estimate.bandwidth.Rd rename to CVE/man/estimate.bandwidth.Rd index aa0b96e..d7f6cbe 100644 --- a/CVE_C/man/estimate.bandwidth.Rd +++ b/CVE/man/estimate.bandwidth.Rd @@ -4,26 +4,33 @@ \alias{estimate.bandwidth} \title{Bandwidth estimation for CVE.} \usage{ -estimate.bandwidth(X, k, nObs) +estimate.bandwidth(X, k, nObs, version = 1L) } \arguments{ -\item{X}{data matrix with samples in its rows.} +\item{X}{a \eqn{n\times p}{n x p} matrix with samples in its rows.} \item{k}{Dimension of lower dimensional projection.} -\item{nObs}{number of points in a slice, see \eqn{nObs} in CVE paper.} +\item{nObs}{number of points in a slice, only for version 2.} + +\item{version}{either \code{1} or \code{2}.} } \value{ Estimated bandwidth \code{h}. } \description{ -Estimates a bandwidth \code{h} according +If no bandwidth or function for calculating it is supplied, the CVE method +defaults to using the following formula (version 1) \deqn{% -h = (2 * tr(\Sigma) / p) * (1.2 * n^{-1 / (4 + k)})^2}{% -h = (2 * tr(\Sigma) / p) * (1.2 * n^(\frac{-1}{4 + k}))^2} -with \eqn{n} the sample size, \eqn{p} its dimension -(\code{n <- nrow(X); p <- ncol(X)}) and the covariance-matrix \eqn{\Sigma} -which is \code{(n-1)/n} times the sample covariance estimate. + h = \frac{2 tr(\Sigma)}{p} (1.2 n^{\frac{-1}{4 + k}})^2}{% + h = (2 * tr(\Sigma) / p) * (1.2 * n^(-1 / (4 + k)))^2} +Alternative version 2 is used for dimension prediction which is given by + \deqn{% + h = (2 * tr(\Sigma) / p) * \chi_k^{-1}(\frac{nObs - 1}{n - 1})}{% + h = (2 * tr(\Sigma) / p) * \chi_k^-1((nObs - 1) / (n - 1))} +with \eqn{n} the sample size, \eqn{p} its dimension and the +covariance-matrix \eqn{\Sigma}, which is \code{(n-1)/n} times the sample +covariance estimate. } \examples{ # set dimensions for simulation model diff --git a/CVE_C/man/null.Rd b/CVE/man/null.Rd similarity index 100% rename from CVE_C/man/null.Rd rename to CVE/man/null.Rd diff --git a/CVE_C/man/plot.cve.Rd b/CVE/man/plot.cve.Rd similarity index 75% rename from CVE_C/man/plot.cve.Rd rename to CVE/man/plot.cve.Rd index 8893782..18593ee 100644 --- a/CVE_C/man/plot.cve.Rd +++ b/CVE/man/plot.cve.Rd @@ -13,7 +13,10 @@ [\code{\link{lines}}]} } \description{ -Boxplots of the loss from \code{min.dim} to \code{max.dim} \code{k} values. +Boxplots of the output \code{L} from \code{\link{cve}} over \code{k} from +\code{min.dim} to \code{max.dim}. For given \code{k}, \code{L} corresponds +to \eqn{L_n(V, X_i)} where \eqn{V \in S(p, p - k)}{V} is the minimizer of +\eqn{L_n(V)}, for further details see the paper. } \examples{ # create B for simulation @@ -41,6 +44,10 @@ cve.obj.simple <- cve(Y ~ X, h = estimate.bandwidth, nObs = sqrt(nrow(X))) # elbow plot plot(cve.obj.simple) +} +\references{ +Fertl Lukas, Bura Efstathia. (2019), Conditional Variance +Estimation for Sufficient Dimension Reduction. Working Paper. } \seealso{ see \code{\link{par}} for graphical parameters to pass through diff --git a/CVE_C/man/predict.cve.Rd b/CVE/man/predict.cve.Rd similarity index 86% rename from CVE_C/man/predict.cve.Rd rename to CVE/man/predict.cve.Rd index 7998f47..95813e6 100644 --- a/CVE_C/man/predict.cve.Rd +++ b/CVE/man/predict.cve.Rd @@ -20,7 +20,8 @@ prediced response of data \code{newdata}. } \description{ -Predict responces using reduced data with \code{\link{mars}}. +Predict response using projected data where the forward model \eqn{g(B'X)} +is estimated using \code{\link{mars}}. } \examples{ # create B for simulation @@ -50,5 +51,5 @@ yhat <- predict(cve.obj.simple, x.test, 1) plot(yhat, y.test) } \seealso{ -\code{\link{cve}}, \code{\link{cve.call}} or \pkg{\link{mars}}. +\code{\link{cve}}, \code{\link{cve.call}} and \pkg{\link{mars}}. } diff --git a/CVE_C/man/predict_dim.Rd b/CVE/man/predict_dim.Rd similarity index 53% rename from CVE_C/man/predict_dim.Rd rename to CVE/man/predict_dim.Rd index 134f7ba..cb4b8b1 100644 --- a/CVE_C/man/predict_dim.Rd +++ b/CVE/man/predict_dim.Rd @@ -2,26 +2,40 @@ % Please edit documentation in R/predict_dim.R \name{predict_dim} \alias{predict_dim} -\title{Predicts SDR dimension using \code{\link[mda]{mars}} via a Cross-Validation.} +\title{\code{"TODO: @Lukas"}} \usage{ -predict_dim(object, ...) +predict_dim(object, ..., method = "CV") } \arguments{ -\item{object}{instance of class \code{cve} (result of \code{cve}, -\code{cve.call}).} +\item{object}{instance of class \code{cve} (result of \code{\link{cve}}, +\code{\link{cve.call}}).} \item{...}{ignored.} + +\item{method}{one of \code{"CV"}, \code{"elbow"} or \code{"wilcoxon"}.} } \value{ -list with -\itemize{ - \item MSE: Mean Square Error, - \item k: predicted dimensions. -} +list with \code{"k"} the predicted dimension and method dependent + informatoin. } \description{ -Predicts SDR dimension using \code{\link[mda]{mars}} via a Cross-Validation. +\code{"TODO: @Lukas"} } +\section{Method cv}{ + +TODO: \code{"TODO: @Lukas"}. +} + +\section{Method elbow}{ + +TODO: \code{"TODO: @Lukas"}. +} + +\section{Method wilcoxon}{ + +TODO: \code{"TODO: @Lukas"}. +} + \examples{ # create B for simulation B <- rep(1, 5) / sqrt(5) diff --git a/CVE_C/man/rStiefel.Rd b/CVE/man/rStiefel.Rd similarity index 81% rename from CVE_C/man/rStiefel.Rd rename to CVE/man/rStiefel.Rd index e2be197..470270a 100644 --- a/CVE_C/man/rStiefel.Rd +++ b/CVE/man/rStiefel.Rd @@ -2,7 +2,8 @@ % Please edit documentation in R/util.R \name{rStiefel} \alias{rStiefel} -\title{Draws a sample from the invariant measure on the Stiefel manifold \eqn{S(p, q)}.} +\title{Draws a sample from the invariant measure on the Stiefel manifold +\eqn{S(p, q)}.} \usage{ rStiefel(p, q) } @@ -12,10 +13,11 @@ rStiefel(p, q) \item{q}{col dimension} } \value{ -\code{p} times \code{q} semi-orthogonal matrix. +\eqn{p \times q}{p x q} semi-orthogonal matrix. } \description{ -Draws a sample from the invariant measure on the Stiefel manifold \eqn{S(p, q)}. +Draws a sample from the invariant measure on the Stiefel manifold +\eqn{S(p, q)}. } \examples{ V <- rStiefel(6, 4) diff --git a/CVE/man/rgnorm.Rd b/CVE/man/rgnorm.Rd new file mode 100644 index 0000000..ed85d22 --- /dev/null +++ b/CVE/man/rgnorm.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/datasets.R +\name{rgnorm} +\alias{rgnorm} +\title{Generalized Normal Distribution.} +\usage{ +rgnorm(n = 1, mu = 0, alpha = 1, beta = 1) +} +\arguments{ +\item{n}{Number of generated samples.} + +\item{mu}{mean.} + +\item{alpha}{first shape parameter.} + +\item{beta}{second shape parameter.} +} +\value{ +numeric array of length \eqn{n}. +} +\description{ +Random generation for generalized Normal Distribution. +} +\seealso{ +https://en.wikipedia.org/wiki/Generalized_normal_distribution +} +\keyword{internal} diff --git a/CVE/man/rlaplace.Rd b/CVE/man/rlaplace.Rd new file mode 100644 index 0000000..4cfcf52 --- /dev/null +++ b/CVE/man/rlaplace.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/datasets.R +\name{rlaplace} +\alias{rlaplace} +\title{Laplace distribution} +\usage{ +rlaplace(n = 1, mu = 0, sd = 1) +} +\arguments{ +\item{n}{Number of generated samples.} + +\item{mu}{mean.} + +\item{sd}{standard deviation.} +} +\value{ +numeric array of length \eqn{n}. +} +\description{ +Random generation for Laplace distribution. +} +\seealso{ +https://en.wikipedia.org/wiki/Laplace_distribution +} +\keyword{internal} diff --git a/CVE/man/rmvnorm.Rd b/CVE/man/rmvnorm.Rd new file mode 100644 index 0000000..0b53ba2 --- /dev/null +++ b/CVE/man/rmvnorm.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/datasets.R +\name{rmvnorm} +\alias{rmvnorm} +\title{Multivariate Normal Distribution.} +\usage{ +rmvnorm(n = 1, mu = rep(0, p), sigma = diag(p)) +} +\arguments{ +\item{n}{number of samples.} + +\item{mu}{mean} + +\item{sigma}{covariance matrix.} +} +\value{ +a \eqn{n\times p}{n x p} matrix with samples in its rows. +} +\description{ +Random generation for the multivariate normal distribution. +\deqn{X \sim N_p(\mu, \Sigma)}{X ~ N_p(\mu, \Sigma)} +} +\examples{ +\dontrun{ +rmvnorm(20, sigma = matrix(c(2, 1, 1, 2), 2)) +rmvnorm(20, mu = c(3, -1, 2)) +} +} +\keyword{internal} diff --git a/CVE/man/rmvt.Rd b/CVE/man/rmvt.Rd new file mode 100644 index 0000000..38ea6f7 --- /dev/null +++ b/CVE/man/rmvt.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/datasets.R +\name{rmvt} +\alias{rmvt} +\title{Multivariate t distribution.} +\usage{ +rmvt(n = 1, mu = rep(0, p), sigma = diag(p), df = Inf) +} +\arguments{ +\item{n}{number of samples.} + +\item{mu}{mean} + +\item{sigma}{a \eqn{k\times k}{k x k} positive definite matrix. If the degree +\eqn{\nu} if bigger than 2 the created covariance is +\deqn{var(x) = \Sigma\frac{\nu}{\nu - 2}} +for \eqn{\nu > 2}.} + +\item{df}{degree of freedom \eqn{\nu}.} +} +\value{ +a \eqn{n\times p}{n x p} matrix with samples in its rows. +} +\description{ +Random generation from multivariate t distribution (student distribution). +} +\examples{ +\dontrun{ +rmvt(20, c(0, 1), matrix(c(3, 1, 1, 2), 2), 3) +rmvt(20, sigma = matrix(c(2, 1, 1, 2), 2), 3) +rmvt(20, mu = c(3, -1, 2), 3) +} +} +\keyword{internal} diff --git a/CVE_C/man/summary.cve.Rd b/CVE/man/summary.cve.Rd similarity index 79% rename from CVE_C/man/summary.cve.Rd rename to CVE/man/summary.cve.Rd index 731bb66..3c46043 100644 --- a/CVE_C/man/summary.cve.Rd +++ b/CVE/man/summary.cve.Rd @@ -7,12 +7,13 @@ \method{summary}{cve}(object, ...) } \arguments{ -\item{object}{Instance of 'cve' as returned by \code{cve}.} +\item{object}{Instance of \code{"cve"} as returned by \code{\link{cve}}.} \item{...}{ignored.} } \description{ -Prints a summary of a \code{cve} result. +Prints a summary statistics of output \code{L} from \code{cve} for +\code{k = min.dim, ..., max.dim}. } \examples{ # create B for simulation diff --git a/CVE_C/src/Makevars b/CVE/src/Makevars similarity index 100% rename from CVE_C/src/Makevars rename to CVE/src/Makevars diff --git a/CVE_C/src/Makevars.win b/CVE/src/Makevars.win similarity index 100% rename from CVE_C/src/Makevars.win rename to CVE/src/Makevars.win diff --git a/CVE_C/src/callLogger.c b/CVE/src/callLogger.c similarity index 100% rename from CVE_C/src/callLogger.c rename to CVE/src/callLogger.c diff --git a/CVE_C/src/cve.c b/CVE/src/cve.c similarity index 100% rename from CVE_C/src/cve.c rename to CVE/src/cve.c diff --git a/CVE_C/src/cve.h b/CVE/src/cve.h similarity index 100% rename from CVE_C/src/cve.h rename to CVE/src/cve.h diff --git a/CVE_C/src/cve_subroutines.c b/CVE/src/cve_subroutines.c similarity index 67% rename from CVE_C/src/cve_subroutines.c rename to CVE/src/cve_subroutines.c index 50ef975..693c21c 100644 --- a/CVE_C/src/cve_subroutines.c +++ b/CVE/src/cve_subroutines.c @@ -161,97 +161,3 @@ mat* adjacence(const mat *vec_L, const mat *vec_Y, const mat *vec_y1, return mat_S; } - -// int getWorkLen(const int n, const int p, const int q) { -// int mpq; /**< Max of p and q */ -// int nn = ((n - 1) * n) / 2; - -// if (p > q) { -// mpq = p; -// } else { -// mpq = q; -// } -// if (nn * p < (mpq + 1) * mpq) { -// return 2 * (mpq + 1) * mpq; -// } else { -// return (nn + mpq) * mpq; -// } -// } - -// double cost(const unsigned int method, -// const int n, -// const double *Y, -// const double *vecK, -// const double *colSums, -// double *y1, double *L) { -// int i, j, k; -// double tmp, sum; - -// for (i = 0; i < n; ++i) { -// y1[i] = Y[i]; -// L[i] = Y[i] * Y[i]; -// } - -// for (k = j = 0; j < n; ++j) { -// for (i = j + 1; i < n; ++i, ++k) { -// y1[i] += Y[j] * vecK[k]; -// y1[j] += Y[i] * vecK[k]; -// L[i] += Y[j] * Y[j] * vecK[k]; -// L[j] += Y[i] * Y[i] * vecK[k]; -// } -// } - -// for (i = 0; i < n; ++i) { -// y1[i] /= colSums[i]; -// L[i] /= colSums[i]; -// } - -// tmp = 0.0; -// if (method == CVE_METHOD_WEIGHTED) { -// sum = 0.0; -// for (i = 0; i < n; ++i) { -// tmp += (colSums[i] - 1.0) * (L[i] -= y1[i] * y1[i]); -// sum += colSums[i]; -// } -// return tmp / (sum - (double)n); // TODO: check for division by zero! -// } else { -// for (i = 0; i < n; ++i) { -// tmp += (L[i] -= y1[i] * y1[i]); -// } -// return tmp / (double)n; -// } -// } - -// void scaling(const unsigned int method, -// const int n, -// const double *Y, const double *y1, const double *L, -// const double *vecD, const double *vecK, -// const double *colSums, -// double *vecS) { -// int i, j, k, nn = (n * (n - 1)) / 2; -// double tmp; - -// if (method == CVE_METHOD_WEIGHTED) { -// for (k = j = 0; j < n; ++j) { -// for (i = j + 1; i < n; ++i, ++k) { -// tmp = Y[j] - y1[i]; -// vecS[k] = (L[i] - (tmp * tmp)); -// tmp = Y[i] - y1[j]; -// vecS[k] += (L[j] - (tmp * tmp)); -// } -// } -// } else { -// for (k = j = 0; j < n; ++j) { -// for (i = j + 1; i < n; ++i, ++k) { -// tmp = Y[j] - y1[i]; -// vecS[k] = (L[i] - (tmp * tmp)) / colSums[i]; -// tmp = Y[i] - y1[j]; -// vecS[k] += (L[j] - (tmp * tmp)) / colSums[j]; -// } -// } -// } - -// for (k = 0; k < nn; ++k) { -// vecS[k] *= vecK[k] * vecD[k]; -// } -// } diff --git a/CVE_C/src/export.c b/CVE/src/export.c similarity index 100% rename from CVE_C/src/export.c rename to CVE/src/export.c diff --git a/CVE_C/src/init.c b/CVE/src/init.c similarity index 88% rename from CVE_C/src/init.c rename to CVE/src/init.c index 8f480c4..d182d46 100644 --- a/CVE_C/src/init.c +++ b/CVE/src/init.c @@ -17,8 +17,8 @@ static const R_CallMethodDef CallEntries[] = { {NULL, NULL, 0} }; -/* Restrict C entrypoints to registered routines. */ -void R_initCVE(DllInfo *dll) { +/* Restrict C entry points to registered routines. */ +void R_init_CVE(DllInfo *dll) { R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); R_useDynamicSymbols(dll, FALSE); } diff --git a/CVE_C/src/matrix.c b/CVE/src/matrix.c similarity index 99% rename from CVE_C/src/matrix.c rename to CVE/src/matrix.c index bc50f0b..5bb39ad 100644 --- a/CVE_C/src/matrix.c +++ b/CVE/src/matrix.c @@ -914,7 +914,7 @@ mat* laplace(mat *A, double *workMem) { * \_____/ \_____/ * IpA C = ImA B * \_______/ - * IpA C = Y ==> C = IpA^-1 Y + * IpA C = Y ==> C = IpA^-1 Y * * @param A Skew-Symmetric matrix of dimension `(n, n)`. * @param B Matrix of dimensions `(n, m)` with `m <= n`. diff --git a/CVE_C/src/rStiefel.c b/CVE/src/rStiefel.c similarity index 100% rename from CVE_C/src/rStiefel.c rename to CVE/src/rStiefel.c diff --git a/CVE_C/R/datasets.R b/CVE_C/R/datasets.R deleted file mode 100644 index 29ea0b5..0000000 --- a/CVE_C/R/datasets.R +++ /dev/null @@ -1,193 +0,0 @@ -#' -#' @param n number of samples. -#' @param mu mean -#' @param sigma covariance matrix. -#' -#' @returns a \eqn{n\times p} matrix with samples in its rows. -#' -#' @examples -#' rmvnorm(20, sigma = matrix(c(2, 1, 1, 2), 2)) -#' rmvnorm(20, mu = c(3, -1, 2)) -rmvnorm <- function(n = 1, mu = rep(0, p), sigma = diag(p)) { - if (!missing(sigma)) { - p <- nrow(sigma) - } else if (!missing(mu)) { - mu <- matrix(mu, ncol = 1) - p <- nrow(mu) - } else { - stop("At least one of 'mu' or 'sigma' must be supplied.") - } - - # See: https://en.wikipedia.org/wiki/Multivariate_normal_distribution - return(rep(mu, each = n) + matrix(rnorm(n * p), n) %*% chol(sigma)) -} - -#' Samples from the multivariate t distribution (student distribution). -#' -#' @param n number of samples. -#' @param mu mean, ... TODO: -#' @param sigma a \eqn{k\times k} positive definite matrix. If the degree -#' \eqn{\nu} if bigger than 2 the created covariance is -#' \deqn{var(x) = \Sigma\frac{\nu}{\nu - 2}} -#' for \eqn{\nu > 2}. -#' @param df degree of freedom \eqn{\nu}. -#' -#' @returns a \eqn{n\times p} matrix with samples in its rows. -#' -#' @examples -#' rmvt(20, c(0, 1), matrix(c(3, 1, 1, 2), 2), 3) -#' rmvt(20, sigma = matrix(c(2, 1, 1, 2), 2), 3) -#' rmvt(20, mu = c(3, -1, 2), 3) -rmvt <- function(n = 1, mu = rep(0, p), sigma = diag(p), df = Inf) { - if (!missing(sigma)) { - p <- nrow(sigma) - } else if (!missing(mu)) { - mu <- matrix(mu, ncol = 1) - p <- nrow(mu) - } else { - stop("At least one of 'mu' or 'sigma' must be supplied.") - } - - if (df == Inf) { - Z <- 1 - } else { - Z <- sqrt(df / rchisq(n, df)) - } - - return(rmvnorm(n, sigma = sigma) * Z + rep(mu, each = n)) -} - - -#' Generalized Normal Distribution. -#' see: https://en.wikipedia.org/wiki/Generalized_normal_distribution -rgnorm <- function(n = 1, mu = 0, alpha = 1, beta = 1) { - if (alpha <= 0 | beta <= 0) { - stop("alpha and beta must be positive.") - } - lambda <- (1 / alpha)^beta - scales <- qgamma(runif(n), shape = 1 / beta, scale = 1 / lambda)^(1 / beta) - return(scales * ((-1)^rbinom(n, 1, 0.5)) + mu) -} - -#' Laplace distribution -#' see: https://en.wikipedia.org/wiki/Laplace_distribution -rlaplace <- function(n = 1, mu = 0, sigma = 1) { - U <- runif(n, -0.5, 0.5) - scale <- sigma / sqrt(2) - - return(mu - scale * sign(U) * log(1 - 2 * abs(U))) -} - -#' Generates test datasets. -#' -#' Provides sample datasets. There are 5 different datasets named -#' M1, M2, M3, M4 and M5 described in the paper references below. -#' The general model is given by: -#' \deqn{Y = g(B'X) + \epsilon} -#' -#' @param name One of \code{"M1"}, \code{"M2"}, \code{"M3"}, \code{"M4"} or \code{"M5"} -#' @param n nr samples -#' @param B SDR basis used for dataset creation if supplied. -#' @param p Dim. of random variable \code{X}. -#' @param p.mix Only for \code{"M4"}, see: below. -#' @param lambda Only for \code{"M4"}, see: below. -#' -#' @return List with elements -#' \itemize{ -#' \item{X}{data} -#' \item{Y}{response} -#' \item{B}{Used dim-reduction matrix} -#' \item{name}{Name of the dataset (name parameter)} -#' } -#' -#' @section M1: -#' The data follows \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} for a subspace -#' dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points. -#' The link function \eqn{g} is given as -#' \deqn{g(x) = \frac{x_1}{0.5 + (x_2 + 1.5)^2} + \epsilon / 2}{% -#' g(x) = x_1 / (0.5 + (x_2 + 1.5)^2) + epsilon / 2} -#' @section M2: -#' \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} with \eqn{k = 2} with a -#' default of \eqn{n = 200} data points. -#' The link function \eqn{g} is given as -#' \deqn{g(x) = (b_1^T X) (b_2^T X)^2 + \epsilon / 2} -#' @section M3: -#' \deqn{g(x) = cos(b_1^T X) + \epsilon / 2} -#' @section M4: -#' TODO: -#' @section M5: -#' TODO: -#' -#' @import stats -#' @importFrom stats rnorm rbinom -#' @export -dataset <- function(name = "M1", n = NULL, p = 20, sigma = 0.5, ...) { - name <- toupper(name) - if (nchar(name) == 1) { name <- paste0("M", name) } - - if (name == "M1") { - if (missing(n)) { n <- 100 } - # B ... `p x 1` - B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, p - 6)), ncol = 1) - X <- rmvnorm(n, sigma = sigma^abs(outer(1:p, 1:p, FUN = `-`))) - beta <- 0.5 - Y <- cos(X %*% B) + rgnorm(n, 0, - alpha = sqrt(0.25 * gamma(1 / beta) / gamma(3 / beta)), - beta = beta - ) - } else if (name == "M2") { - if (missing(n)) { n <- 100 } - prob <- 0.3 - lambda <- 1 # dispersion - # B ... `p x 1` - B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, p - 6)), ncol = 1) - Z <- 2 * rbinom(n, 1, prob) - 1 - X <- matrix(rep(lambda * Z, p) + rnorm(n * p), n) - Y <- cos(X %*% B) + rnorm(n, 0, sigma) - } else if (name == "M3") { - if (missing(n)) { n <- 200 } - # B ... `p x 1` - B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, p - 6)), ncol = 1) - X <- matrix(rnorm(n * p), n) - Y <- 1.5 * log(2 + abs(X %*% B)) + rnorm(n, 0, sigma^2) - } else if (name == "M4") { - if (missing(n)) { n <- 200 } - # B ... `p x 2` - B <- cbind( - c(rep(1 / sqrt(6), 6), rep(0, p - 6)), - c(rep(c(1, -1), 3) / sqrt(6), rep(0, p - 6)) - ) - X <- rmvnorm(n, sigma = sigma^abs(outer(1:p, 1:p, FUN = `-`))) - XB <- X %*% B - Y <- (XB[, 1]) / (0.5 + (XB[, 2] + 1.5)^2) + rnorm(n, 0, sigma^2) - } else if (name == "M5") { - if (missing(n)) { n <- 200 } - # B ... `p x 2` - B <- cbind( - c(rep(1, 6), rep(0, p - 6)), - c(rep(c(1, -1), 3), rep(0, p - 6)) - ) / sqrt(6) - X <- matrix(runif(n * p), n) - XB <- X %*% B - Y <- cos(XB[, 1] * pi) * (XB[, 2] + 1)^2 + rnorm(n, 0, sigma^2) - } else if (name == "M6") { - if (missing(n)) { n <- 200 } - # B ... `p x 3` - B <- diag(p)[, -(3:(p - 1))] - X <- matrix(rnorm(n * p), n) - Y <- rowSums((X %*% B)^2) + rnorm(n, 0, sigma^2) - } else if (name == "M7") { - if (missing(n)) { n <- 400 } - # B ... `p x 4` - B <- diag(p)[, -(4:(p - 1))] - # "R"andom "M"ulti"V"ariate "S"tudent - X <- rmvt(n = n, sigma = diag(p), df = 3) - XB <- X %*% B - Y <- (XB[, 1]) * (XB[, 2])^2 + (XB[, 3]) * (XB[, 4]) - Y <- Y + rlaplace(n, 0, sigma) - } else { - stop("Got unknown dataset name.") - } - - return(list(X = X, Y = Y, B = B, name = name)) -} diff --git a/CVE_C/R/util.R b/CVE_C/R/util.R deleted file mode 100644 index 3053ac1..0000000 --- a/CVE_C/R/util.R +++ /dev/null @@ -1,82 +0,0 @@ -#' Draws a sample from the invariant measure on the Stiefel manifold \eqn{S(p, q)}. -#' -#' @param p row dimension -#' @param q col dimension -#' @return \code{p} times \code{q} semi-orthogonal matrix. -#' @examples -#' V <- rStiefel(6, 4) -#' @export -rStiefel <- function(p, q) { - return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q)))) -} - -#' Retraction to the manifold. -#' -#' @param A matrix. -#' @return `(p, q)` semi-orthogonal matrix, aka element of the Stiefel manifold. -#' @keywords internal -#' @export -retractStiefel <- function(A) { - return(qr.Q(qr(A))) -} - -#' Skew-Symmetric matrix computed from `A` as -#' \eqn{1/2 (A - A^T)}. -#' @param A Matrix of dim `(p, q)` -#' @return Skew-Symmetric matrix of dim `(p, p)`. -#' @keywords internal -#' @export -skew <- function(A) { - 0.5 * (A - t(A)) -} - -#' Symmetric matrix computed from `A` as -#' \eqn{1/2 (A + A^T)}. -#' @param A Matrix of dim `(p, q)` -#' @return Symmetric matrix of dim `(p, p)`. -#' @keywords internal -#' @export -sym <- function(A) { - 0.5 * (A + t(A)) -} - -#' Orthogonal Projection onto the tangent space of the stiefel manifold. -#' -#' @param V Point on the stiefel manifold. -#' @param G matrix to be projected onto the tangent space at `V`. -#' @return `(p, q)` matrix as element of the tangent space at `V`. -#' @keywords internal -#' @export -projTangentStiefel <- function(V, G) { - Q <- diag(1, nrow(V)) - V %*% t(V) - return(Q %*% G + V %*% skew(t(V) %*% G)) -} - -#' Null space basis of given matrix `V` -#' -#' @param V `(p, q)` matrix -#' @return Semi-orthogonal `(p, p - q)` matrix spaning the null space of `V`. -#' @keywords internal -#' @export -null <- function(V) { - tmp <- qr(V) - set <- if(tmp$rank == 0L) seq_len(ncol(V)) else -seq_len(tmp$rank) - return(qr.Q(tmp, complete = TRUE)[, set, drop = FALSE]) -} - -#' Creates a (numeric) matrix where each column contains -#' an element to element matching. -#' @param elements numeric vector of elements to match -#' @return matrix of size `(2, n * (n - 1) / 2)` for a argument of lenght `n`. -#' @keywords internal -#' @examples -#' elem.pairs(seq.int(2, 5)) -#' @export -elem.pairs <- function(elements) { - # Number of elements to match. - n <- length(elements) - # Create all combinations. - pairs <- rbind(rep(elements, each=n), rep(elements, n)) - # Select unique combinations without self interaction. - return(pairs[, pairs[1, ] < pairs[2, ]]) -} diff --git a/CVE_C/man/dataset.Rd b/CVE_C/man/dataset.Rd deleted file mode 100644 index d7323d1..0000000 --- a/CVE_C/man/dataset.Rd +++ /dev/null @@ -1,68 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/datasets.R -\name{dataset} -\alias{dataset} -\title{Generates test datasets.} -\usage{ -dataset(name = "M1", n, B, p.mix = 0.3, lambda = 1) -} -\arguments{ -\item{name}{One of \code{"M1"}, \code{"M2"}, \code{"M3"}, \code{"M4"} or \code{"M5"}} - -\item{n}{nr samples} - -\item{B}{SDR basis used for dataset creation if supplied.} - -\item{p.mix}{Only for \code{"M4"}, see: below.} - -\item{lambda}{Only for \code{"M4"}, see: below.} - -\item{p}{Dim. of random variable \code{X}.} -} -\value{ -List with elements -\itemize{ - \item{X}{data} - \item{Y}{response} - \item{B}{Used dim-reduction matrix} - \item{name}{Name of the dataset (name parameter)} -} -} -\description{ -Provides sample datasets. There are 5 different datasets named -M1, M2, M3, M4 and M5 described in the paper references below. -The general model is given by: -\deqn{Y = g(B'X) + \epsilon} -} -\section{M1}{ - -The data follows \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} for a subspace -dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points. -The link function \eqn{g} is given as -\deqn{g(x) = \frac{x_1}{0.5 + (x_2 + 1.5)^2} + \epsilon / 2}{% - g(x) = x_1 / (0.5 + (x_2 + 1.5)^2) + epsilon / 2} -} - -\section{M2}{ - -\eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} with \eqn{k = 2} with a -default of \eqn{n = 200} data points. -The link function \eqn{g} is given as -\deqn{g(x) = (b_1^T X) (b_2^T X)^2 + \epsilon / 2} -} - -\section{M3}{ - -\deqn{g(x) = cos(b_1^T X) + \epsilon / 2} -} - -\section{M4}{ - -TODO: -} - -\section{M5}{ - -TODO: -} - diff --git a/CVE_C/man/elem.pairs.Rd b/CVE_C/man/elem.pairs.Rd deleted file mode 100644 index 2d36c5d..0000000 --- a/CVE_C/man/elem.pairs.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/util.R -\name{elem.pairs} -\alias{elem.pairs} -\title{Creates a (numeric) matrix where each column contains -an element to element matching.} -\usage{ -elem.pairs(elements) -} -\arguments{ -\item{elements}{numeric vector of elements to match} -} -\value{ -matrix of size `(2, n * (n - 1) / 2)` for a argument of lenght `n`. -} -\description{ -Creates a (numeric) matrix where each column contains -an element to element matching. -} -\examples{ - elem.pairs(seq.int(2, 5)) -} -\keyword{internal} diff --git a/CVE_C/man/projTangentStiefel.Rd b/CVE_C/man/projTangentStiefel.Rd deleted file mode 100644 index e1e5402..0000000 --- a/CVE_C/man/projTangentStiefel.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/util.R -\name{projTangentStiefel} -\alias{projTangentStiefel} -\title{Orthogonal Projection onto the tangent space of the stiefel manifold.} -\usage{ -projTangentStiefel(V, G) -} -\arguments{ -\item{V}{Point on the stiefel manifold.} - -\item{G}{matrix to be projected onto the tangent space at `V`.} -} -\value{ -`(p, q)` matrix as element of the tangent space at `V`. -} -\description{ -Orthogonal Projection onto the tangent space of the stiefel manifold. -} -\keyword{internal} diff --git a/CVE_C/man/retractStiefel.Rd b/CVE_C/man/retractStiefel.Rd deleted file mode 100644 index 583706e..0000000 --- a/CVE_C/man/retractStiefel.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/util.R -\name{retractStiefel} -\alias{retractStiefel} -\title{Retraction to the manifold.} -\usage{ -retractStiefel(A) -} -\arguments{ -\item{A}{matrix.} -} -\value{ -`(p, q)` semi-orthogonal matrix, aka element of the Stiefel manifold. -} -\description{ -Retraction to the manifold. -} -\keyword{internal} diff --git a/CVE_C/man/skew.Rd b/CVE_C/man/skew.Rd deleted file mode 100644 index 9caf669..0000000 --- a/CVE_C/man/skew.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/util.R -\name{skew} -\alias{skew} -\title{Skew-Symmetric matrix computed from `A` as -\eqn{1/2 (A - A^T)}.} -\usage{ -skew(A) -} -\arguments{ -\item{A}{Matrix of dim `(p, q)`} -} -\value{ -Skew-Symmetric matrix of dim `(p, p)`. -} -\description{ -Skew-Symmetric matrix computed from `A` as -\eqn{1/2 (A - A^T)}. -} -\keyword{internal} diff --git a/CVE_C/man/sym.Rd b/CVE_C/man/sym.Rd deleted file mode 100644 index 3d1d407..0000000 --- a/CVE_C/man/sym.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/util.R -\name{sym} -\alias{sym} -\title{Symmetric matrix computed from `A` as -\eqn{1/2 (A + A^T)}.} -\usage{ -sym(A) -} -\arguments{ -\item{A}{Matrix of dim `(p, q)`} -} -\value{ -Symmetric matrix of dim `(p, p)`. -} -\description{ -Symmetric matrix computed from `A` as -\eqn{1/2 (A + A^T)}. -} -\keyword{internal} diff --git a/CVE_R/DESCRIPTION b/CVE_R/DESCRIPTION deleted file mode 100644 index c62b728..0000000 --- a/CVE_R/DESCRIPTION +++ /dev/null @@ -1,11 +0,0 @@ -Package: CVEpureR -Type: Package -Title: Conditional Variance Estimator for Sufficient Dimension Reduction -Version: 0.1 -Date: 2019-08-29 -Author: Loki -Maintainer: Loki -Description: Implementation of the Conditional Variance Estimation (CVE) method. This package version is writen in pure R. -License: GPL-3 -Encoding: UTF-8 -RoxygenNote: 6.1.1 diff --git a/CVE_R/NAMESPACE b/CVE_R/NAMESPACE deleted file mode 100644 index eb5d1c7..0000000 --- a/CVE_R/NAMESPACE +++ /dev/null @@ -1,23 +0,0 @@ -# Generated by roxygen2: do not edit by hand - -S3method(plot,cve) -S3method(summary,cve) -export(cve) -export(cve.call) -export(cve.grid.search) -export(cve_linesearch) -export(cve_sgd) -export(cve_simple) -export(dataset) -export(elem.pairs) -export(estimate.bandwidth) -export(grad) -export(null) -export(rStiefl) -import(stats) -importFrom(graphics,lines) -importFrom(graphics,plot) -importFrom(graphics,points) -importFrom(stats,model.frame) -importFrom(stats,rbinom) -importFrom(stats,rnorm) diff --git a/CVE_R/R/CVE.R b/CVE_R/R/CVE.R deleted file mode 100644 index c0ec00a..0000000 --- a/CVE_R/R/CVE.R +++ /dev/null @@ -1,265 +0,0 @@ -#' Conditional Variance Estimator (CVE) -#' -#' Conditional Variance Estimator for Sufficient Dimension -#' Reduction -#' -#' TODO: And some details -#' -#' -#' @references Fertl Likas, Bura Efstathia. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019 -#' -#' @docType package -#' @author Loki -#' @useDynLib CVE, .registration = TRUE -"_PACKAGE" - -#' Implementation of the CVE method. -#' -#' Conditional Variance Estimator (CVE) is a novel sufficient dimension -#' reduction (SDR) method assuming a model -#' \deqn{Y \sim g(B'X) + \epsilon}{Y ~ g(B'X) + epsilon} -#' where B'X is a lower dimensional projection of the predictors. -#' -#' @param formula Formel for the regression model defining `X`, `Y`. -#' See: \code{\link{formula}}. -#' @param data data.frame holding data for formula. -#' @param method The different only differe in the used optimization. -#' All of them are Gradient based optimization on a Stiefel manifold. -#' \itemize{ -#' \item "simple" Simple reduction of stepsize. -#' \item "sgd" stocastic gradient decent. -#' \item TODO: further -#' } -#' @param ... Further parameters depending on the used method. -#' @examples -#' library(CVE) -#' -#' # sample dataset -#' ds <- dataset("M5") -#' -#' # call ´cve´ with default method (aka "simple") -#' dr.simple <- cve(ds$Y ~ ds$X, k = ncol(ds$B)) -#' # plot optimization history (loss via iteration) -#' plot(dr.simple, main = "CVE M5 simple") -#' -#' # call ´cve´ with method "linesearch" using ´data.frame´ as data. -#' data <- data.frame(Y = ds$Y, X = ds$X) -#' # Note: ´Y, X´ are NOT defined, they are extracted from ´data´. -#' dr.linesearch <- cve(Y ~ ., data, method = "linesearch", k = ncol(ds$B)) -#' plot(dr.linesearch, main = "CVE M5 linesearch") -#' -#' @references Fertl L., Bura E. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019 -#' -#' @seealso \code{\link{formula}}. For a complete parameters list (dependent on -#' the method) see \code{\link{cve_simple}}, \code{\link{cve_sgd}} -#' @import stats -#' @importFrom stats model.frame -#' @export -cve <- function(formula, data, method = "simple", max.dim = 10L, ...) { - # check for type of `data` if supplied and set default - if (missing(data)) { - data <- environment(formula) - } else if (!is.data.frame(data)) { - stop("Parameter 'data' must be a 'data.frame' or missing.") - } - - # extract `X`, `Y` from `formula` with `data` - model <- stats::model.frame(formula, data) - X <- as.matrix(model[ ,-1L, drop = FALSE]) - Y <- as.double(model[ , 1L]) - - # pass extracted data on to [cve.call()] - dr <- cve.call(X, Y, method = method, max.dim = max.dim, ...) - - # overwrite `call` property from [cve.call()] - dr$call <- match.call() - return(dr) -} - -#' @param nObs as described in the Paper. -#' @param X Data -#' @param Y Responces -#' @param nObs Like in the paper. -#' @param k guess for SDR dimension. -#' @param ... Method specific parameters. -#' @rdname cve -#' @export -cve.call <- function(X, Y, method = "simple", - nObs = sqrt(nrow(X)), h = NULL, - min.dim = 1L, max.dim = 10L, k = NULL, - tau = 1.0, tol = 1e-3, - epochs = 50L, attempts = 10L, - logger = NULL) { - - # parameter checking - if (!(is.matrix(X) && is.numeric(X))) { - stop("Parameter 'X' should be a numeric matrices.") - } - if (!is.numeric(Y)) { - stop("Parameter 'Y' must be numeric.") - } - if (is.matrix(Y) || !is.double(Y)) { - Y <- as.double(Y) - } - if (nrow(X) != length(Y)) { - stop("Rows of 'X' and 'Y' elements are not compatible.") - } - if (ncol(X) < 2) { - stop("'X' is one dimensional, no need for dimension reduction.") - } - - if (missing(k) || is.null(k)) { - min.dim <- as.integer(min.dim) - max.dim <- as.integer(min(max.dim, ncol(X) - 1L)) - } else { - min.dim <- as.integer(k) - max.dim <- as.integer(k) - } - if (min.dim > max.dim) { - stop("'min.dim' bigger 'max.dim'.") - } - if (max.dim >= ncol(X)) { - stop("'max.dim' (or 'k') must be smaller than 'ncol(X)'.") - } - - if (is.function(h)) { - estimate.bandwidth <- h - h <- NULL - } - - if (!is.numeric(tau) || length(tau) > 1L || tau <= 0.0) { - stop("Initial step-width 'tau' must be positive number.") - } else { - tau <- as.double(tau) - } - if (!is.numeric(tol) || length(tol) > 1L || tol < 0.0) { - stop("Break condition tolerance 'tol' must be not negative number.") - } else { - tol <- as.double(tol) - } - - if (!is.numeric(epochs) || length(epochs) > 1L) { - stop("Parameter 'epochs' must be positive integer.") - } else if (!is.integer(epochs)) { - epochs <- as.integer(epochs) - } - if (epochs < 1L) { - stop("Parameter 'epochs' must be at least 1L.") - } - if (!is.numeric(attempts) || length(attempts) > 1L) { - stop("Parameter 'attempts' must be positive integer.") - } else if (!is.integer(attempts)) { - attempts <- as.integer(attempts) - } - if (attempts < 1L) { - stop("Parameter 'attempts' must be at least 1L.") - } - - if (is.function(logger)) { - loggerEnv <- environment(logger) - } else { - loggerEnv <- NULL - } - - # Call specified method. - method <- tolower(method) - call <- match.call() - dr <- list() - for (k in min.dim:max.dim) { - - if (missing(h) || is.null(h)) { - h <- estimate.bandwidth(X, k, nObs) - } else if (is.numeric(h) && h > 0.0) { - h <- as.double(h) - } else { - stop("Bandwidth 'h' must be positive numeric.") - } - - if (method == 'simple') { - dr.k <- .Call('cve_simple', PACKAGE = 'CVE', - X, Y, k, h, - tau, tol, - epochs, attempts, - logger, loggerEnv) - # dr.k <- cve_simple(X, Y, k, nObs = nObs, ...) - # } else if (method == 'linesearch') { - # dr.k <- cve_linesearch(X, Y, k, nObs = nObs, ...) - # } else if (method == 'rcg') { - # dr.k <- cve_rcg(X, Y, k, nObs = nObs, ...) - # } else if (method == 'momentum') { - # dr.k <- cve_momentum(X, Y, k, nObs = nObs, ...) - # } else if (method == 'rmsprob') { - # dr.k <- cve_rmsprob(X, Y, k, nObs = nObs, ...) - # } else if (method == 'sgdrmsprob') { - # dr.k <- cve_sgdrmsprob(X, Y, k, nObs = nObs, ...) - # } else if (method == 'sgd') { - # dr.k <- cve_sgd(X, Y, k, nObs = nObs, ...) - } else { - stop('Got unknown method.') - } - dr.k$B <- null(dr.k$V) - dr.k$loss <- mean(dr.k$L) - dr.k$h <- h - dr.k$k <- k - class(dr.k) <- "cve.k" - dr[[k]] <- dr.k - } - - # augment result information - dr$method <- method - dr$call <- call - class(dr) <- "cve" - return(dr) -} - -#' Ploting helper for objects of class \code{cve}. -#' -#' @param x Object of class \code{cve} (result of [cve()]). -#' @param content Specifies what to plot: -#' \itemize{ -#' \item "history" Plots the loss history from stiefel optimization -#' (default). -#' \item ... TODO: add (if there are any) -#' } -#' @param ... Pass through parameters to [plot()] and [lines()] -#' -#' @usage ## S3 method for class 'cve' -#' plot(x, content = "history", ...) -#' @seealso see \code{\link{par}} for graphical parameters to pass through -#' as well as \code{\link{plot}} for standard plot utility. -#' @importFrom graphics plot lines points -#' @method plot cve -#' @export -plot.cve <- function(x, ...) { - L <- c() - k <- c() - for (dr.k in x) { - if (class(dr.k) == 'cve.k') { - k <- c(k, paste0(dr.k$k)) - L <- c(L, dr.k$L) - } - } - L <- matrix(L, ncol = length(k)) - boxplot(L, main = "Loss ...", - xlab = "SDR dimension k", - ylab = expression(L(V, X[i])), - names = k) -} - -#' Prints a summary of a \code{cve} result. -#' @param object Instance of 'cve' as return of \code{cve}. -#' @method summary cve -#' @export -summary.cve <- function(object, ...) { - cat('Summary of CVE result - Method: "', object$method, '"\n', - '\n', - 'Dataset size: ', nrow(object$X), '\n', - 'Data Dimension: ', ncol(object$X), '\n', - 'SDR Dimension: ', object$k, '\n', - 'loss: ', object$loss, '\n', - '\n', - 'Called via:\n', - ' ', - sep='') - print(object$call) -} diff --git a/CVE_R/R/cve_linesearch.R b/CVE_R/R/cve_linesearch.R deleted file mode 100644 index d2eb115..0000000 --- a/CVE_R/R/cve_linesearch.R +++ /dev/null @@ -1,169 +0,0 @@ -#' Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe -#' conditions. -#' -#' @keywords internal -#' @export -cve_linesearch <- function(X, Y, k, - nObs = sqrt(nrow(X)), - h = NULL, - tau = 1.0, - tol = 1e-3, - rho1 = 0.1, - rho2 = 0.9, - slack = 0, - epochs = 50L, - attempts = 10L, - max.linesearch.iter = 10L, - logger = NULL -) { - # Set `grad` functions environment to enable if to find this environments - # local variabels, needed to enable the manipulation of this local variables - # from within `grad`. - environment(grad) <- environment() - - # Get dimensions. - n <- nrow(X) - p <- ncol(X) - q <- p - k - - # Save initial learning rate `tau`. - tau.init <- tau - # Addapt tolearance for break condition. - tol <- sqrt(2 * q) * tol - - # Estaimate bandwidth if not given. - if (missing(h) | !is.numeric(h)) { - h <- estimate.bandwidth(X, k, nObs) - } - - # Compute persistent data. - # Compute lookup indexes for symmetrie, lower/upper - # triangular parts and vectorization. - pair.index <- elem.pairs(seq(n)) - i <- pair.index[1, ] # `i` indices of `(i, j)` pairs - j <- pair.index[2, ] # `j` indices of `(i, j)` pairs - # Matrix of vectorized indices. (vec(index) -> seq) - index <- matrix(seq(n * n), n, n) - lower <- index[lower.tri(index)] - upper <- t(index)[lower] - - # Create all pairewise differences of rows of `X`. - X_diff <- X[i, , drop = F] - X[j, , drop = F] - # Identity matrix. - I_p <- diag(1, p) - - # Init tracking of current best (according multiple attempts). - V.best <- NULL - loss.best <- Inf - - # Start loop for multiple attempts. - for (attempt in 1:attempts) { - - # Sample a `(p, q)` dimensional matrix from the stiefel manifold as - # optimization start value. - V <- rStiefl(p, q) - - # Initial loss and gradient. - loss <- Inf - G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE) - # Set last loss (aka, loss after applying the step). - loss.last <- loss - - # Call logger with initial values before starting optimization. - if (is.function(logger)) { - epoch <- 0 # Set epoch count to 0 (only relevant for logging). - error <- NA - logger(environment()) - } - - ## Start optimization loop. - for (epoch in 1:epochs) { - - # Cayley transform matrix `A` - A <- (G %*% t(V)) - (V %*% t(G)) - - # Directional derivative of the loss at current position, given - # as `Tr(G^T \cdot A \cdot V)`. - loss.prime <- -0.5 * norm(A, type = 'F')^2 - - # Linesearch - tau.upper <- Inf - tau.lower <- 0 - tau <- tau.init - for (iter in 1:max.linesearch.iter) { - # Apply learning rate `tau`. - A.tau <- (tau / 2) * A - # Parallet transport (on Stiefl manifold) into direction of `G`. - inv <- solve(I_p + A.tau) - V.tau <- inv %*% ((I_p - A.tau) %*% V) - - # Loss at position after a step. - loss <- Inf # aka loss.tau - G.tau <- grad(X, Y, V.tau, h, loss.out = TRUE, persistent = TRUE) - - # Armijo condition. - if (loss > loss.last + (rho1 * tau * loss.prime)) { - tau.upper <- tau - tau <- (tau.lower + tau.upper) / 2 - next() - } - - V.prime.tau <- -0.5 * inv %*% A %*% (V + V.tau) - loss.prime.tau <- sum(G * V.prime.tau) # Tr(grad(tau)^T \cdot Y^'(tau)) - - # Wolfe condition. - if (loss.prime.tau < rho2 * loss.prime) { - tau.lower <- tau - if (tau.upper == Inf) { - tau <- 2 * tau.lower - } else { - tau <- (tau.lower + tau.upper) / 2 - } - } else { - break() - } - } - - # Compute error. - error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F") - - # Check break condition (epoch check to skip ignored gradient calc). - # Note: the devision by `sqrt(2 * k)` is included in `tol`. - if (error < tol | epoch >= epochs) { - # take last step and stop optimization. - V <- V.tau - # Final call to the logger before stopping optimization - if (is.function(logger)) { - G <- G.tau - logger(environment()) - } - break() - } - - # Perform the step and remember previous loss. - V <- V.tau - loss.last <- loss - G <- G.tau - - # Log after taking current step. - if (is.function(logger)) { - logger(environment()) - } - - } - - # Check if current attempt improved previous ones - if (loss < loss.best) { - loss.best <- loss - V.best <- V - } - - } - - return(list( - loss = loss.best, - V = V.best, - B = null(V.best), - h = h - )) -} diff --git a/CVE_R/R/cve_momentum.R b/CVE_R/R/cve_momentum.R deleted file mode 100644 index c87df10..0000000 --- a/CVE_R/R/cve_momentum.R +++ /dev/null @@ -1,139 +0,0 @@ -#' Implementation of the CVE method as a Riemann Conjugated Gradient method. -#' -#' @references A Riemannian Conjugate Gradient Algorithm with Implicit Vector -#' Transport for Optimization on the Stiefel Manifold -#' @keywords internal -#' @export -cve_momentum <- function(X, Y, k, - nObs = sqrt(nrow(X)), - h = NULL, - tau = 1.0, - tol = 1e-4, - rho = 0.1, # Momentum update. - slack = 0, - epochs = 50L, - attempts = 10L, - logger = NULL -) { - # Set `grad` functions environment to enable if to find this environments - # local variabels, needed to enable the manipulation of this local variables - # from within `grad`. - environment(grad) <- environment() - - # Get dimensions. - n <- nrow(X) # Number of samples. - p <- ncol(X) # Data dimensions - q <- p - k # Complement dimension of the SDR space. - - # Save initial learning rate `tau`. - tau.init <- tau - # Addapt tolearance for break condition. - tol <- sqrt(2 * q) * tol - - # Estaimate bandwidth if not given. - if (missing(h) || !is.numeric(h)) { - h <- estimate.bandwidth(X, k, nObs) - } - - # Compute persistent data. - # Compute lookup indexes for symmetrie, lower/upper - # triangular parts and vectorization. - pair.index <- elem.pairs(seq(n)) - i <- pair.index[1, ] # `i` indices of `(i, j)` pairs - j <- pair.index[2, ] # `j` indices of `(i, j)` pairs - # Index of vectorized matrix, for lower and upper triangular part. - lower <- ((i - 1) * n) + j - upper <- ((j - 1) * n) + i - - # Create all pairewise differences of rows of `X`. - X_diff <- X[i, , drop = F] - X[j, , drop = F] - # Identity matrix. - I_p <- diag(1, p) - - # Init tracking of current best (according multiple attempts). - V.best <- NULL - loss.best <- Inf - - # Start loop for multiple attempts. - for (attempt in 1:attempts) { - # Reset learning rate `tau`. - tau <- tau.init - - # Sample a `(p, q)` dimensional matrix from the stiefel manifold as - # optimization start value. - V <- rStiefl(p, q) - - # Initial loss and gradient. - loss <- Inf - G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE) - # Set last loss (aka, loss after applying the step). - loss.last <- loss - - # Call logger with initial values before starting optimization. - if (is.function(logger)) { - epoch <- 0 # Set epoch count to 0 (only relevant for logging). - error <- NA - logger(environment()) - } - - M <- matrix(0, p, q) - ## Start optimization loop. - for (epoch in 1:epochs) { - # Apply learning rate `tau`. - A <- projTangentStiefl(V, G) - # Momentum update. - M <- A + rho * projTangentStiefl(V, M) - # Parallet transport (on Stiefl manifold) into direction of `G`. - V.tau <- retractStiefl(V - tau * M) - - # Loss at position after a step. - loss <- grad(X, Y, V.tau, h, loss.only = TRUE, persistent = TRUE) - - # Check if step is appropriate, iff not reduce learning rate. - if ((loss - loss.last) > slack * loss.last) { - tau <- tau / 2 - next() # Keep position and try with smaller `tau`. - } - - # Compute error. - error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F") - - # Check break condition (epoch check to skip ignored gradient calc). - # Note: the devision by `sqrt(2 * k)` is included in `tol`. - if (error < tol || epoch >= epochs) { - # take last step and stop optimization. - V <- V.tau - # Call logger last time befor stoping. - if (is.function(logger)) { - logger(environment()) - } - break() - } - - # Perform the step and remember previous loss. - V <- V.tau - loss.last <- loss - - # Call logger after taking a step. - if (is.function(logger)) { - logger(environment()) - } - - # Compute gradient at new position. - G <- grad(X, Y, V, h, persistent = TRUE) - } - - # Check if current attempt improved previous ones - if (loss < loss.best) { - loss.best <- loss - V.best <- V - } - } - - return(list( - loss = loss.best, - V = V.best, - B = null(V.best), - h = h - )) -} diff --git a/CVE_R/R/cve_rcg.R b/CVE_R/R/cve_rcg.R deleted file mode 100644 index 0a459f5..0000000 --- a/CVE_R/R/cve_rcg.R +++ /dev/null @@ -1,179 +0,0 @@ -#' Implementation of the CVE method as a Riemann Conjugated Gradient method. -#' -#' @references A Riemannian Conjugate Gradient Algorithm with Implicit Vector -#' Transport for Optimization on the Stiefel Manifold -#' @keywords internal -#' @export -cve_rcg <- function(X, Y, k, - nObs = sqrt(nrow(X)), - h = NULL, - tau = 1.0, - tol = 1e-4, - rho = 1e-4, # For Armijo condition. - slack = 0, - epochs = 50L, - attempts = 10L, - max.linesearch.iter = 20L, - logger = NULL -) { - # Set `grad` functions environment to enable if to find this environments - # local variabels, needed to enable the manipulation of this local variables - # from within `grad`. - environment(grad) <- environment() - - # Get dimensions. - n <- nrow(X) # Number of samples. - p <- ncol(X) # Data dimensions - q <- p - k # Complement dimension of the SDR space. - - # Save initial learning rate `tau`. - tau.init <- tau - # Addapt tolearance for break condition. - tol <- sqrt(2 * q) * tol - - # Estaimate bandwidth if not given. - if (missing(h) || !is.numeric(h)) { - h <- estimate.bandwidth(X, k, nObs) - } - - # Compute persistent data. - # Compute lookup indexes for symmetrie, lower/upper - # triangular parts and vectorization. - pair.index <- elem.pairs(seq(n)) - i <- pair.index[1, ] # `i` indices of `(i, j)` pairs - j <- pair.index[2, ] # `j` indices of `(i, j)` pairs - # Index of vectorized matrix, for lower and upper triangular part. - lower <- ((i - 1) * n) + j - upper <- ((j - 1) * n) + i - - # Create all pairewise differences of rows of `X`. - X_diff <- X[i, , drop = F] - X[j, , drop = F] - # Identity matrix. - I_p <- diag(1, p) - - # Init tracking of current best (according multiple attempts). - V.best <- NULL - loss.best <- Inf - - # Start loop for multiple attempts. - for (attempt in 1:attempts) { - # Reset learning rate `tau`. - tau <- tau.init - - # Sample a `(p, q)` dimensional matrix from the stiefel manifold as - # optimization start value. - V <- rStiefl(p, q) - - # Initial loss and gradient. - loss <- Inf - G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE) - # Set last loss (aka, loss after applying the step). - loss.last <- loss - - # Cayley transform matrix `A` - A <- (G %*% t(V)) - (V %*% t(G)) - A.last <- A - - W <- -A - Z <- W %*% V - - # Compute directional derivative. - loss.prime <- sum(G * Z) # Tr(G^T Z) - - # Call logger with initial values before starting optimization. - if (is.function(logger)) { - epoch <- 0 # Set epoch count to 0 (only relevant for logging). - error <- NA - logger(environment()) - } - - ## Start optimization loop. - for (epoch in 1:epochs) { - # New directional derivative. - loss.prime <- sum(G * Z) - - # Reset `tau` for step-size selection. - tau <- tau.init - for (iter in 1:max.linesearch.iter) { - V.tau <- retractStiefl(V + tau * Z) - # Loss at position after a step. - loss <- grad(X, Y, V.tau, h, - loss.only = TRUE, persistent = TRUE) - # Check Armijo condition. - if (loss <= loss.last + (rho * tau * loss.prime)) { - break() # Iff fulfilled stop linesearch. - } - # Reduce step-size and continue linesearch. - tau <- tau / 2 - } - - # Compute error. - error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F") - - # Perform step with found step-size - V <- V.tau - loss.last <- loss - - # Call logger. - if (is.function(logger)) { - logger(environment()) - } - - # Check break condition. - # Note: the devision by `sqrt(2 * k)` is included in `tol`. - if (error < tol) { - break() - } - - # Compute Gradient at new position. - G <- grad(X, Y, V, h, persistent = TRUE) - # Store last `A` for `beta` computation. - A.last <- A - # Cayley transform matrix `A` - A <- (G %*% t(V)) - (V %*% t(G)) - - # Check 2. break condition. - if (norm(A, type = 'F') < tol) { - break() - } - - # New directional derivative. - loss.prime <- sum(G * Z) - - # Reset beta if needed. - if (loss.prime < 0) { - # Compute `beta` as described in paper. - beta.FR <- (norm(A, type = 'F') / norm(A.last, type = 'F'))^2 - beta.PR <- sum(A * (A - A.last)) / norm(A.last, type = 'F')^2 - if (beta.PR < -beta.FR) { - beta <- -beta.FR - } else if (abs(beta.PR) < beta.FR) { - beta <- beta.PR - } else if (beta.PR > beta.FR) { - beta <- beta.FR - } else { - beta <- 0 - } - } else { - beta <- 0 - } - - # Update direction. - W <- -A + beta * W - Z <- W %*% V - } - - # Check if current attempt improved previous ones - if (loss < loss.best) { - loss.best <- loss - V.best <- V - } - } - - return(list( - loss = loss.best, - V = V.best, - B = null(V.best), - h = h - )) -} diff --git a/CVE_R/R/cve_rmsprob.R b/CVE_R/R/cve_rmsprob.R deleted file mode 100644 index 10b0a7c..0000000 --- a/CVE_R/R/cve_rmsprob.R +++ /dev/null @@ -1,121 +0,0 @@ -#' Implementation of the CVE method as a Riemann Conjugated Gradient method. -#' -#' @references A Riemannian Conjugate Gradient Algorithm with Implicit Vector -#' Transport for Optimization on the Stiefel Manifold -#' @keywords internal -#' @export -cve_rmsprob <- function(X, Y, k, - nObs = sqrt(nrow(X)), - h = NULL, - tau = 0.1, - tol = 1e-4, - rho = 0.1, # Momentum update. - slack = 0, - epochs = 50L, - attempts = 10L, - epsilon = 1e-7, - max.linesearch.iter = 20L, - logger = NULL -) { - # Set `grad` functions environment to enable if to find this environments - # local variabels, needed to enable the manipulation of this local variables - # from within `grad`. - environment(grad) <- environment() - - # Get dimensions. - n <- nrow(X) # Number of samples. - p <- ncol(X) # Data dimensions - q <- p - k # Complement dimension of the SDR space. - - # Save initial learning rate `tau`. - tau.init <- tau - # Addapt tolearance for break condition. - tol <- sqrt(2 * q) * tol - - # Estaimate bandwidth if not given. - if (missing(h) || !is.numeric(h)) { - h <- estimate.bandwidth(X, k, nObs) - } - - # Compute persistent data. - # Compute lookup indexes for symmetrie, lower/upper - # triangular parts and vectorization. - pair.index <- elem.pairs(seq(n)) - i <- pair.index[1, ] # `i` indices of `(i, j)` pairs - j <- pair.index[2, ] # `j` indices of `(i, j)` pairs - # Index of vectorized matrix, for lower and upper triangular part. - lower <- ((i - 1) * n) + j - upper <- ((j - 1) * n) + i - - # Create all pairewise differences of rows of `X`. - X_diff <- X[i, , drop = F] - X[j, , drop = F] - # Identity matrix. - I_p <- diag(1, p) - - # Init tracking of current best (according multiple attempts). - V.best <- NULL - loss.best <- Inf - - # Start loop for multiple attempts. - for (attempt in 1:attempts) { - # Sample a `(p, q)` dimensional matrix from the stiefel manifold as - # optimization start value. - V <- rStiefl(p, q) - - # Call logger with initial values before starting optimization. - if (is.function(logger)) { - loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE) - epoch <- 0 # Set epoch count to 0 (only relevant for logging). - error <- NA - logger(environment()) - } - - M <- matrix(0, p, q) - ## Start optimization loop. - for (epoch in 1:epochs) { - # Compute gradient and loss at current position. - loss <- Inf - G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE) - # Projectd Gradient. - A <- projTangentStiefl(V, G) - # Projected element squared gradient. - Asq <- projTangentStiefl(V, G * G) - # Momentum update. - M <- (1 - rho) * Asq + rho * projTangentStiefl(V, M) - # Parallet transport (on Stiefl manifold) into direction of `G`. - V.tau <- retractStiefl(V - tau.init * A / (sqrt(abs(M)) + epsilon)) - - # Compute error. - error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F") - - # Perform step. - V <- V.tau - - # Call logger after taking a step. - if (is.function(logger)) { - # Set tau to an step size estimate (only for logging) - tau <- tau.init / mean(sqrt(abs(M)) + epsilon) - logger(environment()) - } - - # Check break condition. - # Note: the devision by `sqrt(2 * k)` is included in `tol`. - if (error < tol) { - break() - } - } - - # Check if current attempt improved previous ones - if (loss < loss.best) { - loss.best <- loss - V.best <- V - } - } - - return(list( - loss = loss.best, - V = V.best, - B = null(V.best), - h = h - )) -} diff --git a/CVE_R/R/cve_sgd.R b/CVE_R/R/cve_sgd.R deleted file mode 100644 index 3fee47f..0000000 --- a/CVE_R/R/cve_sgd.R +++ /dev/null @@ -1,129 +0,0 @@ -#' Simple implementation of the CVE method. 'Simple' means that this method is -#' a classic GD method unsing no further tricks. -#' -#' @keywords internal -#' @export -cve_sgd <- function(X, Y, k, - nObs = sqrt(nrow(X)), - h = NULL, - tau = 0.01, - tol = 1e-3, - epochs = 50L, - batch.size = 16L, - attempts = 10L, - logger = NULL -) { - # Set `grad` functions environment to enable if to find this environments - # local variabels, needed to enable the manipulation of this local variables - # from within `grad`. - environment(grad) <- environment() - - # Get dimensions. - n <- nrow(X) # Number of samples. - p <- ncol(X) # Data dimensions - q <- p - k # Complement dimension of the SDR space. - - # Save initial learning rate `tau`. - tau.init <- tau - # Addapt tolearance for break condition. - tol <- sqrt(2 * q) * tol - - # Estaimate bandwidth if not given. - if (missing(h) || !is.numeric(h)) { - h <- estimate.bandwidth(X, k, nObs) - } - - # Compute persistent data. - # Compute lookup indexes for symmetrie, lower/upper - # triangular parts and vectorization. - pair.index <- elem.pairs(seq(n)) - i <- pair.index[1, ] # `i` indices of `(i, j)` pairs - j <- pair.index[2, ] # `j` indices of `(i, j)` pairs - # Index of vectorized matrix, for lower and upper triangular part. - lower <- ((i - 1) * n) + j - upper <- ((j - 1) * n) + i - - # Create all pairewise differences of rows of `X`. - X_diff <- X[i, , drop = F] - X[j, , drop = F] - # Identity matrix. - I_p <- diag(1, p) - # Init a list of data indices (shuffled for batching). - indices <- seq(n) - - # Init tracking of current best (according multiple attempts). - V.best <- NULL - loss.best <- Inf - - # Start loop for multiple attempts. - for (attempt in 1:attempts) { - # Reset learning rate `tau`. - tau <- tau.init - - # Sample a `(p, q)` dimensional matrix from the stiefel manifold as - # optimization start value. - V <- rStiefl(p, q) - # Keep track of last `V` for computing error after an epoch. - V.last <- V - - if (is.function(logger)) { - loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE) - error <- NA - epoch <- 0 - logger(environment()) - } - - # Repeat `epochs` times - for (epoch in 1:epochs) { - # Shuffle batches - batch.shuffle <- sample(indices) - - # Make a step for each batch. - for (batch.start in seq(1, n, batch.size)) { - # Select batch data indices. - batch.end <- min(batch.start + batch.size - 1, length(batch.shuffle)) - batch <- batch.shuffle[batch.start:batch.end] - - # Compute batch gradient. - loss <- NULL - G <- grad(X[batch, ], Y[batch], V, h, loss.out = TRUE) - - # Cayley transform matrix. - A <- (G %*% t(V)) - (V %*% t(G)) - - # Apply learning rate `tau`. - A.tau <- tau * A - # Parallet transport (on Stiefl manifold) into direction of `G`. - V <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V) - } - # And the error for the history. - error <- norm(V.last %*% t(V.last) - V %*% t(V), type = "F") - V.last <- V - - if (is.function(logger)) { - # Compute loss at end of epoch for logging. - loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE) - logger(environment()) - } - - # Check break condition. - if (error < tol) { - break() - } - } - # Compute actual loss after finishing for comparing multiple attempts. - loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE) - - # After each attempt, check if last attempt reached a better result. - if (loss < loss.best) { - loss.best <- loss - V.best <- V - } - } - - return(list( - loss = loss.best, - V = V.best, - B = null(V.best), - h = h - )) -} diff --git a/CVE_R/R/cve_sgdrmsprob.R b/CVE_R/R/cve_sgdrmsprob.R deleted file mode 100644 index 98ee20f..0000000 --- a/CVE_R/R/cve_sgdrmsprob.R +++ /dev/null @@ -1,133 +0,0 @@ -#' Simple implementation of the CVE method. 'Simple' means that this method is -#' a classic GD method unsing no further tricks. -#' -#' @keywords internal -#' @export -cve_sgdrmsprob <- function(X, Y, k, - nObs = sqrt(nrow(X)), - h = NULL, - tau = 0.1, - tol = 1e-4, - rho = 0.1, - epochs = 50L, - batch.size = 16L, - attempts = 10L, - epsilon = 1e-7, - logger = NULL -) { - # Set `grad` functions environment to enable if to find this environments - # local variabels, needed to enable the manipulation of this local variables - # from within `grad`. - environment(grad) <- environment() - - # Get dimensions. - n <- nrow(X) # Number of samples. - p <- ncol(X) # Data dimensions - q <- p - k # Complement dimension of the SDR space. - - # Save initial learning rate `tau`. - tau.init <- tau - # Addapt tolearance for break condition. - tol <- sqrt(2 * q) * tol - - # Estaimate bandwidth if not given. - if (missing(h) || !is.numeric(h)) { - h <- estimate.bandwidth(X, k, nObs) - } - - # Compute persistent data. - # Compute lookup indexes for symmetrie, lower/upper - # triangular parts and vectorization. - pair.index <- elem.pairs(seq(n)) - i <- pair.index[1, ] # `i` indices of `(i, j)` pairs - j <- pair.index[2, ] # `j` indices of `(i, j)` pairs - # Index of vectorized matrix, for lower and upper triangular part. - lower <- ((i - 1) * n) + j - upper <- ((j - 1) * n) + i - - # Create all pairewise differences of rows of `X`. - X_diff <- X[i, , drop = F] - X[j, , drop = F] - # Identity matrix. - I_p <- diag(1, p) - # Init a list of data indices (shuffled for batching). - indices <- seq(n) - - # Init tracking of current best (according multiple attempts). - V.best <- NULL - loss.best <- Inf - - # Start loop for multiple attempts. - for (attempt in 1:attempts) { - # Reset learning rate `tau`. - tau <- tau.init - - # Sample a `(p, q)` dimensional matrix from the stiefel manifold as - # optimization start value. - V <- rStiefl(p, q) - # Keep track of last `V` for computing error after an epoch. - V.last <- V - - if (is.function(logger)) { - loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE) - error <- NA - epoch <- 0 - logger(environment()) - } - - M <- matrix(0, p, q) - # Repeat `epochs` times - for (epoch in 1:epochs) { - # Shuffle batches - batch.shuffle <- sample(indices) - - # Make a step for each batch. - for (batch.start in seq(1, n, batch.size)) { - # Select batch data indices. - batch.end <- min(batch.start + batch.size - 1, length(batch.shuffle)) - batch <- batch.shuffle[batch.start:batch.end] - - # Compute batch gradient. - loss <- NULL - G <- grad(X[batch, ], Y[batch], V, h, loss.out = TRUE) - - # Projectd Gradient. - A <- projTangentStiefl(V, G) - # Projected element squared gradient. - Asq <- projTangentStiefl(V, G * G) - # Momentum update. - M <- (1 - rho) * Asq + rho * projTangentStiefl(V, M) - # Parallet transport (on Stiefl manifold) into direction of `G`. - V <- retractStiefl(V - tau.init * A / (sqrt(abs(M)) + epsilon)) - } - # And the error for the history. - error <- norm(V.last %*% t(V.last) - V %*% t(V), type = "F") - V.last <- V - - if (is.function(logger)) { - # Compute loss at end of epoch for logging. - loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE) - logger(environment()) - } - - # Check break condition. - if (error < tol) { - break() - } - } - # Compute actual loss after finishing for comparing multiple attempts. - loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE) - - # After each attempt, check if last attempt reached a better result. - if (loss < loss.best) { - loss.best <- loss - V.best <- V - } - } - - return(list( - loss = loss.best, - V = V.best, - B = null(V.best), - h = h - )) -} diff --git a/CVE_R/R/cve_simple.R b/CVE_R/R/cve_simple.R deleted file mode 100644 index 6526a70..0000000 --- a/CVE_R/R/cve_simple.R +++ /dev/null @@ -1,139 +0,0 @@ -#' Simple implementation of the CVE method. 'Simple' means that this method is -#' a classic GD method unsing no further tricks. -#' -#' @keywords internal -#' @export -cve_simple <- function(X, Y, k, - nObs = sqrt(nrow(X)), - h = NULL, - tau = 1.0, - tol = 1e-3, - slack = 0, - epochs = 50L, - attempts = 10L, - logger = NULL -) { - # Set `grad` functions environment to enable if to find this environments - # local variabels, needed to enable the manipulation of this local variables - # from within `grad`. - environment(grad) <- environment() - - # Get dimensions. - n <- nrow(X) # Number of samples. - p <- ncol(X) # Data dimensions - q <- p - k # Complement dimension of the SDR space. - - # Save initial learning rate `tau`. - tau.init <- tau - # Addapt tolearance for break condition. - tol <- sqrt(2 * q) * tol - - # Estaimate bandwidth if not given. - if (missing(h) || !is.numeric(h)) { - h <- estimate.bandwidth(X, k, nObs) - } - - # Compute persistent data. - # Compute lookup indexes for symmetrie, lower/upper - # triangular parts and vectorization. - pair.index <- elem.pairs(seq(n)) - i <- pair.index[1, ] # `i` indices of `(i, j)` pairs - j <- pair.index[2, ] # `j` indices of `(i, j)` pairs - # Index of vectorized matrix, for lower and upper triangular part. - lower <- ((i - 1) * n) + j - upper <- ((j - 1) * n) + i - - # Create all pairewise differences of rows of `X`. - X_diff <- X[i, , drop = F] - X[j, , drop = F] - # Identity matrix. - I_p <- diag(1, p) - - # Init tracking of current best (according multiple attempts). - V.best <- NULL - loss.best <- Inf - - # Start loop for multiple attempts. - for (attempt in 1:attempts) { - # Reset learning rate `tau`. - tau <- tau.init - - # Sample a `(p, q)` dimensional matrix from the stiefel manifold as - # optimization start value. - V <- rStiefl(p, q) - - # Initial loss and gradient. - loss <- Inf - G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE) - # Set last loss (aka, loss after applying the step). - loss.last <- loss - - # Cayley transform matrix `A` - A <- (G %*% t(V)) - (V %*% t(G)) - - # Call logger with initial values before starting optimization. - if (is.function(logger)) { - logger(0L, attempt, loss, V, tau) - } - - ## Start optimization loop. - for (epoch in 1:epochs) { - # Apply learning rate `tau`. - A.tau <- tau * A - # Parallet transport (on Stiefl manifold) into direction of `G`. - V.tau <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V) - - # Loss at position after a step. - loss <- grad(X, Y, V.tau, h, loss.only = TRUE, persistent = TRUE) - - # Check if step is appropriate, iff not reduce learning rate. - if ((loss - loss.last) > slack * loss.last) { - tau <- tau / 2 - next() # Keep position and try with smaller `tau`. - } - - # Compute error. - error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F") - - # Check break condition (epoch check to skip ignored gradient calc). - # Note: the devision by `sqrt(2 * k)` is included in `tol`. - if (error < tol || epoch >= epochs) { - # take last step and stop optimization. - V <- V.tau - # Call logger last time befor stoping. - if (is.function(logger)) { - logger(epoch, attempt, loss, V, tau) - } - break() - } - - # Perform the step and remember previous loss. - V <- V.tau - loss.last <- loss - - # Call logger after taking a step. - if (is.function(logger)) { - logger(epoch, attempt, loss, V, tau) - } - - # Compute gradient at new position. - G <- grad(X, Y, V, h, persistent = TRUE) - - # Cayley transform matrix `A` - A <- (G %*% t(V)) - (V %*% t(G)) - } - - # Check if current attempt improved previous ones - if (loss < loss.best) { - loss.best <- loss - V.best <- V - } - - } - - return(list( - loss = loss.best, - V = V.best, - B = null(V.best), - h = h - )) -} diff --git a/CVE_R/R/datasets.R b/CVE_R/R/datasets.R deleted file mode 100644 index 70e08b5..0000000 --- a/CVE_R/R/datasets.R +++ /dev/null @@ -1,109 +0,0 @@ -#' Generates test datasets. -#' -#' Provides sample datasets. There are 5 different datasets named -#' M1, M2, M3, M4 and M5 described in the paper references below. -#' The general model is given by: -#' \deqn{Y ~ g(B'X) + \epsilon} -#' -#' @param name One of \code{"M1"}, \code{"M2"}, \code{"M3"}, \code{"M4"} or \code{"M5"} -#' @param n nr samples -#' @param p Dim. of random variable \code{X}. -#' @param p.mix Only for \code{"M4"}, see: below. -#' @param lambda Only for \code{"M4"}, see: below. -#' -#' @return List with elements -#' \itemize{ -#' \item{X}{data} -#' \item{Y}{response} -#' \item{B}{Used dim-reduction matrix} -#' \item{name}{Name of the dataset (name parameter)} -#' } -#' -#' @section M1: -#' The data follows \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} for a subspace -#' dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points. -#' The link function \eqn{g} is given as -#' \deqn{g(x) = \frac{x_1}{0.5 + (x_2 + 1.5)^2} + 0.5\epsilon}{g(x) = x_1 / (0.5 + (x_2 + 1.5)^2) + 0.5 epsilon} -#' @section M2: -#' \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} with \eqn{k = 2} with a default of \eqn{n = 200} data points. -#' The link function \eqn{g} is given as -#' \deqn{g(x) = x_1 x_2^2 + 0.5\epsilon}{g(x) = x_1 x_2^2 + 0.5 epsilon} -#' @section M3: -#' TODO: -#' @section M4: -#' TODO: -#' @section M5: -#' TODO: -#' -#' @import stats -#' @importFrom stats rnorm rbinom -#' @export -dataset <- function(name = "M1", n, B, p.mix = 0.3, lambda = 1.0) { - # validate parameters - stopifnot(name %in% c("M1", "M2", "M3", "M4", "M5")) - - # set default values if not supplied - if (missing(n)) { - n <- if (name %in% c("M1", "M2")) 200 else if (name != "M5") 100 else 42 - } - if (missing(B)) { - p <- 12 - if (name == "M1") { - B <- cbind( - c( 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0), - c( 1,-1, 1,-1, 1,-1, 0, 0, 0, 0, 0, 0) - ) / sqrt(6) - } else if (name == "M2") { - B <- cbind( - c(c(1, 0), rep(0, 10)), - c(c(0, 1), rep(0, 10)) - ) - } else { - B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, 6)), 12, 1) - } - } else { - p <- dim(B)[1] - # validate col. nr to match dataset `k = dim(B)[2]` - stopifnot( - name %in% c("M1", "M2") && dim(B)[2] == 2, - name %in% c("M3", "M4", "M5") && dim(B)[2] == 1 - ) - } - - # set link function `g` for model `Y ~ g(B'X) + epsilon` - if (name == "M1") { - g <- function(BX) { BX[1] / (0.5 + (BX[2] + 1.5)^2) } - } else if (name == "M2") { - g <- function(BX) { BX[1] * BX[2]^2 } - } else if (name %in% c("M3", "M4")) { - g <- function(BX) { cos(BX[1]) } - } else { # name == "M5" - g <- function(BX) { 2 * log(abs(BX[1]) + 1) } - } - - # compute X - if (name != "M4") { - # compute root of the covariance matrix according the dataset - if (name %in% c("M1", "M3")) { - # Variance-Covariance structure for `X ~ N_p(0, \Sigma)` with - # `\Sigma_{i, j} = 0.5^{|i - j|}`. - Sigma <- matrix(0.5^abs(kronecker(1:p, 1:p, '-')), p, p) - # decompose Sigma to Sigma.root^T Sigma.root = Sigma for usage in creation of `X` - Sigma.root <- chol(Sigma) - } else { # name %in% c("M2", "M5") - Sigma.root <- diag(rep(1, p)) # d-dim identity - } - # data `X` as multivariate random normal variable with - # variance matrix `Sigma`. - X <- replicate(p, rnorm(n, 0, 1)) %*% Sigma.root - } else { # name == "M4" - X <- t(replicate(100, rep((1 - 2 * rbinom(1, 1, p.mix)) * lambda, p) + rnorm(p, 0, 1))) - } - - # responce `y ~ g(B'X) + epsilon` with `epsilon ~ N(0, 1 / 2)` - Y <- apply(X, 1, function(X_i) { - g(t(B) %*% X_i) + rnorm(1, 0, 0.5) - }) - - return(list(X = X, Y = Y, B = B, name = name)) -} diff --git a/CVE_R/R/estimateBandwidth.R b/CVE_R/R/estimateBandwidth.R deleted file mode 100644 index 8e7edc7..0000000 --- a/CVE_R/R/estimateBandwidth.R +++ /dev/null @@ -1,27 +0,0 @@ -#' Estimated bandwidth for CVE. -#' -#' Estimates a propper bandwidth \code{h} according -#' \deqn{% -#' h = \chi_{p-q}^{-1}\left(\frac{nObs - 1}{n-1}\right)\frac{2 tr(\Sigma)}{p}}{% -#' h = qchisq( (nObs - 1)/(n - 1), p - q ) 2 tr(Sigma) / p} -#' -#' @param X data matrix of dimension (n x p) with n data points X_i of dimension -#' q. Therefor each row represents a datapoint of dimension p. -#' @param k Guess for rank(B). -#' @param nObs Ether numeric of a function. If specified as numeric value -#' its used in the computation of the bandwidth directly. If its a function -#' `nObs` is evaluated as \code{nObs(nrow(x))}. The default behaviou if not -#' supplied at all is to use \code{nObs <- nrow(x)^0.5}. -#' -#' @seealso [\code{\link{qchisq}}] -#' @export -estimate.bandwidth <- function(X, k, nObs) { - n <- nrow(X) - p <- ncol(X) - - X_centered <- scale(X, center=TRUE, scale=FALSE) - Sigma <- (1 / n) * t(X_centered) %*% X_centered - - quantil <- qchisq((nObs - 1) / (n - 1), k) - return(2 * quantil * sum(diag(Sigma)) / p) -} diff --git a/CVE_R/R/gradient.R b/CVE_R/R/gradient.R deleted file mode 100644 index f5cb257..0000000 --- a/CVE_R/R/gradient.R +++ /dev/null @@ -1,82 +0,0 @@ -#' Compute get gradient of `L(V)` given a dataset `X`. -#' -#' @param X Data matrix. -#' @param Y Responce. -#' @param V Position to compute the gradient at, aka point on Stiefl manifold. -#' @param h Bandwidth -#' @param loss.out Iff \code{TRUE} loss will be written to parent environment. -#' @param loss.only Boolean to only compute the loss, of \code{TRUE} a single -#' value loss is returned and \code{envir} is ignored. -#' @param persistent Determines if data indices and dependent calculations shall -#' be reused from the parent environment. ATTENTION: Do NOT set this flag, only -#' intended for internal usage by carefully aligned functions! -#' @keywords internal -#' @export -grad <- function(X, Y, V, h, - loss.out = FALSE, - loss.only = FALSE, - persistent = FALSE) { - # Get number of samples and dimension. - n <- nrow(X) - p <- ncol(X) - - if (!persistent) { - # Compute lookup indexes for symmetrie, lower/upper - # triangular parts and vectorization. - pair.index <- elem.pairs(seq(n)) - i <- pair.index[1, ] # `i` indices of `(i, j)` pairs - j <- pair.index[2, ] # `j` indices of `(i, j)` pairs - # Index of vectorized matrix, for lower and upper triangular part. - lower <- ((i - 1) * n) + j - upper <- ((j - 1) * n) + i - - # Create all pairewise differences of rows of `X`. - X_diff <- X[i, , drop = F] - X[j, , drop = F] - } - - # Projection matrix onto `span(V)` - Q <- diag(1, p) - tcrossprod(V, V) - - # Vectorized distance matrix `D`. - vecD <- colSums(tcrossprod(Q, X_diff)^2) - - # Create Kernel matrix (aka. apply kernel to distances) - K <- matrix(1, n, n) # `exp(0) == 1` - K[lower] <- exp((-0.5 / h) * vecD^2) # Set lower tri. part - K[upper] <- t(K)[upper] # Mirror lower tri. to upper - - # Weighted `Y` momentums - colSumsK <- colSums(K) - y1 <- (K %*% Y) / colSumsK - y2 <- (K %*% Y^2) / colSumsK - - # Per example loss `L(V, X_i)` - L <- y2 - y1^2 - if (loss.only) { - return(mean(L)) - } - if (loss.out) { - loss <<- mean(L) - } - - # Compute scaling vector `vecS` for `X_diff`. - tmp <- kronecker(matrix(y1, n, 1), matrix(Y, 1, n), `-`)^2 - tmp <- as.vector(L) - tmp - tmp <- tmp * K / colSumsK - vecS <- (tmp + t(tmp))[lower] * vecD - - # The gradient. - # 1. The `crossprod(A, B)` is equivalent to `t(A) %*% B`, - # 2. `(X_diff %*% V) * vecS` is first a marix matrix mult. and then using - # recycling to scale each row with the values of `vecS`. - # Note that `vecS` is a vector and that `R` uses column-major ordering - # of matrices. - # (See: notes for more details) - # TODO: Depending on n, p, q decide which version to take (for current - # datasets "inner" is faster, see: notes). - # inner = crossprod(X_diff, X_diff * vecS) %*% V, - # outer = crossprod(X_diff, (X_diff %*% V) * vecS) - G <- crossprod(X_diff, X_diff * vecS) %*% V - G <- (-2 / (n * h^2)) * G - return(G) -} diff --git a/CVE_R/R/gridSearch.R b/CVE_R/R/gridSearch.R deleted file mode 100644 index 2b2d5e7..0000000 --- a/CVE_R/R/gridSearch.R +++ /dev/null @@ -1,43 +0,0 @@ - -#' Performs a grid search for parameters over a parameter grid. -#' @examples -#' args <- list( -#' h = c(0.05, 0.1, 0.2), -#' method = c("simple", "sgd"), -#' tau = c(0.5, 0.1, 0.01) -#' ) -#' cve.grid.search(args) -#' @export -cve.grid.search <- function(X, Y, k, args) { - - args$stringsAsFactors = FALSE - args$KEEP.OUT.ATTRS = FALSE - grid <- do.call(expand.grid, args) - grid.length <- length(grid[[1]]) - - print(grid) - - for (i in 1:grid.length) { - arguments <- as.list(grid[i, ]) - # Set required arguments - arguments$X <- X - arguments$Y <- Y - arguments$k <- k - # print(arguments) - dr <- do.call(cve.call, arguments) - print(dr$loss) - } -} - -# ds <- dataset() -# X <- ds$X -# Y <- ds$Y -# (k <- ncol(ds$B)) -# args <- list( -# h = c(0.05, 0.1, 0.2), -# method = c("simple", "sgd"), -# tau = c(0.5, 0.1, 0.01), -# attempts = c(1L) -# ) - -# cve.grid.search(X, Y, k, args) \ No newline at end of file diff --git a/CVE_R/R/util.R b/CVE_R/R/util.R deleted file mode 100644 index d9d9efe..0000000 --- a/CVE_R/R/util.R +++ /dev/null @@ -1,82 +0,0 @@ -#' Samples uniform from the Stiefl Manifold. -#' -#' @param p row dim. -#' @param q col dim. -#' @return `(p, q)` semi-orthogonal matrix -#' @examples -#' V <- rStiefel(6, 4) -#' @export -rStiefl <- function(p, q) { - return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q)))) -} - -#' Retraction to the manifold. -#' -#' @param A matrix. -#' @return `(p, q)` semi-orthogonal matrix, aka element of the Stiefl manifold. -#' @keywords internal -#' @export -retractStiefl <- function(A) { - return(qr.Q(qr(A))) -} - -#' Skew-Symmetric matrix computed from `A` as -#' \eqn{1/2 (A - A^T)}. -#' @param A Matrix of dim `(p, q)` -#' @return Skew-Symmetric matrix of dim `(p, p)`. -#' @keywords internal -#' @export -skew <- function(A) { - 0.5 * (A - t(A)) -} - -#' Symmetric matrix computed from `A` as -#' \eqn{1/2 (A + A^T)}. -#' @param A Matrix of dim `(p, q)` -#' @return Symmetric matrix of dim `(p, p)`. -#' @keywords internal -#' @export -sym <- function(A) { - 0.5 * (A + t(A)) -} - -#' Orthogonal Projection onto the tangent space of the stiefl manifold. -#' -#' @param V Point on the stiefl manifold. -#' @param G matrix to be projected onto the tangent space at `V`. -#' @return `(p, q)` matrix as element of the tangent space at `V`. -#' @keywords internal -#' @export -projTangentStiefl <- function(V, G) { - Q <- diag(1, nrow(V)) - V %*% t(V) - return(Q %*% G + V %*% skew(t(V) %*% G)) -} - -#' Null space basis of given matrix `V` -#' -#' @param V `(p, q)` matrix -#' @return Semi-orthogonal `(p, p - q)` matrix spaning the null space of `V`. -#' @keywords internal -#' @export -null <- function(V) { - tmp <- qr(V) - set <- if(tmp$rank == 0L) seq_len(ncol(V)) else -seq_len(tmp$rank) - return(qr.Q(tmp, complete=TRUE)[, set, drop=FALSE]) -} - -#' Creates a (numeric) matrix where each column contains -#' an element to element matching. -#' @param elements numeric vector of elements to match -#' @return matrix of size `(2, n * (n - 1) / 2)` for a argument of lenght `n`. -#' @keywords internal -#' @examples -#' elem.pairs(seq.int(2, 5)) -#' @export -elem.pairs <- function(elements) { - # Number of elements to match. - n <- length(elements) - # Create all combinations. - pairs <- rbind(rep(elements, each=n), rep(elements, n)) - # Select unique combinations without self interaction. - return(pairs[, pairs[1, ] < pairs[2, ]]) -} diff --git a/CVE_R/demo/00Index b/CVE_R/demo/00Index deleted file mode 100644 index 46dac9a..0000000 --- a/CVE_R/demo/00Index +++ /dev/null @@ -1,2 +0,0 @@ -runtime_test Runtime comparison of CVE against MAVE for M1 - M5 datasets. -logging Example of a logger function for cve algorithm analysis. diff --git a/CVE_R/demo/logging.R b/CVE_R/demo/logging.R deleted file mode 100644 index b5e9265..0000000 --- a/CVE_R/demo/logging.R +++ /dev/null @@ -1,43 +0,0 @@ -library(CVEpureR) - -# Setup histories. -(epochs <- 50) -(attempts <- 10) -loss.history <- matrix(NA, epochs + 1, attempts) -error.history <- matrix(NA, epochs + 1, attempts) -tau.history <- matrix(NA, epochs + 1, attempts) -true.error.history <- matrix(NA, epochs + 1, attempts) - -# Create a dataset -ds <- dataset("M1") -X <- ds$X -Y <- ds$Y -B <- ds$B # the true `B` -(k <- ncol(ds$B)) - -# True projection matrix. -P <- B %*% solve(t(B) %*% B) %*% t(B) -# Define the logger for the `cve()` method. -logger <- function(env) { - # Note the `<<-` assignement! - loss.history[env$epoch + 1, env$attempt] <<- env$loss - error.history[env$epoch + 1, env$attempt] <<- env$error - tau.history[env$epoch + 1, env$attempt] <<- env$tau - # Compute true error by comparing to the true `B` - B.est <- null(env$V) # Function provided by CVE - P.est <- B.est %*% solve(t(B.est) %*% B.est) %*% t(B.est) - true.error <- norm(P - P.est, 'F') / sqrt(2 * k) - true.error.history[env$epoch + 1, env$attempt] <<- true.error -} -# Performe SDR for ONE `k`. -dr <- cve(Y ~ X, k = k, logger = logger, epochs = epochs, attempts = attempts) -# Plot history's -par(mfrow = c(2, 2)) -matplot(loss.history, type = 'l', log = 'y', xlab = 'iter', - main = 'loss', ylab = expression(L(V[iter]))) -matplot(error.history, type = 'l', log = 'y', xlab = 'iter', - main = 'error', ylab = 'error') -matplot(tau.history, type = 'l', log = 'y', xlab = 'iter', - main = 'tau', ylab = 'tau') -matplot(true.error.history, type = 'l', log = 'y', xlab = 'iter', - main = 'true error', ylab = 'true error') diff --git a/CVE_R/demo/runtime_test.R b/CVE_R/demo/runtime_test.R deleted file mode 100644 index 9e02407..0000000 --- a/CVE_R/demo/runtime_test.R +++ /dev/null @@ -1,89 +0,0 @@ -# Usage: -# ~$ Rscript runtime_test.R - -library(CVEpureR) # load CVE - -#' Writes progress to console. -tell.user <- function(name, start.time, i, length) { - cat("\rRunning Test (", name, "):", - i, "/", length, - " - elapsed:", format(Sys.time() - start.time), "\033[K") -} -subspace.dist <- function(B1, B2){ - P1 <- B1 %*% solve(t(B1) %*% B1) %*% t(B1) - P2 <- B2 %*% solve(t(B2) %*% B2) %*% t(B2) - return(norm(P1 - P2, type = 'F')) -} - -# Number of simulations -SIM.NR <- 50 -# maximal number of iterations in curvilinear search algorithm -MAXIT <- 50 -# number of arbitrary starting values for curvilinear optimization -ATTEMPTS <- 10 -# set names of datasets -dataset.names <- c("M1", "M2", "M3", "M4", "M5") -# Set used CVE method -methods <- c("simple") # c("legacy", "simple", "sgd", "linesearch") - -# Setup error and time tracking variables -error <- matrix(NA, SIM.NR, length(methods) * length(dataset.names)) -time <- matrix(NA, SIM.NR, ncol(error)) -colnames(error) <- kronecker(paste0(dataset.names, '-'), methods, paste0) -colnames(time) <- colnames(error) - -# only for telling user (to stdout) -count <- 0 -start.time <- Sys.time() -# Start simulation loop. -for (sim in 1:SIM.NR) { - # Repeat for each dataset. - for (name in dataset.names) { - count <- count + 1 - tell.user(name, start.time, count, SIM.NR * length(dataset.names)) - - # Create a new dataset - ds <- dataset(name) - # Prepare X, Y and combine to data matrix - Y <- ds$Y - X <- ds$X - data <- cbind(Y, X) - # get dimensions - dim <- ncol(X) - truedim <- ncol(ds$B) - - for (method in methods) { - dr.time <- system.time( - dr <- cve.call(X, Y, - method = method, - k = truedim, - attempts = ATTEMPTS - ) - ) - dr <- dr[[truedim]] - - key <- paste0(name, '-', method) - error[sim, key] <- subspace.dist(dr$B, ds$B) / sqrt(2 * truedim) - time[sim, key] <- dr.time["elapsed"] - } - } -} - -cat("\n\n## Time [sec] Means:\n") -print(colMeans(time)) -cat("\n## Error Means:\n") -print(colMeans(error)) - -at <- seq(ncol(error)) + rep(seq(ncol(error) / length(methods)) - 1, each = length(methods)) -boxplot(error, - main = paste0("Error (Nr of simulations ", SIM.NR, ")"), - ylab = "Error", - las = 2, - at = at -) -boxplot(time, - main = paste0("Time (Nr of simulations ", SIM.NR, ")"), - ylab = "Time [sec]", - las = 2, - at = at -) diff --git a/CVE_R/inst/doc/CVE_paper.pdf b/CVE_R/inst/doc/CVE_paper.pdf deleted file mode 100755 index e870293..0000000 Binary files a/CVE_R/inst/doc/CVE_paper.pdf and /dev/null differ diff --git a/CVE_R/man/CVEpureR-package.Rd b/CVE_R/man/CVEpureR-package.Rd deleted file mode 100644 index 1cc78bf..0000000 --- a/CVE_R/man/CVEpureR-package.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/CVE.R -\docType{package} -\name{CVEpureR-package} -\alias{CVEpureR} -\alias{CVEpureR-package} -\title{Conditional Variance Estimator (CVE)} -\description{ -Conditional Variance Estimator for Sufficient Dimension - Reduction -} -\details{ -TODO: And some details -} -\references{ -Fertl Likas, Bura Efstathia. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019 -} -\author{ -Loki -} diff --git a/CVE_R/man/cve.Rd b/CVE_R/man/cve.Rd deleted file mode 100644 index 07d86a2..0000000 --- a/CVE_R/man/cve.Rd +++ /dev/null @@ -1,71 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/CVE.R -\name{cve} -\alias{cve} -\alias{cve.call} -\title{Implementation of the CVE method.} -\usage{ -cve(formula, data, method = "simple", max.dim = 10, ...) - -cve.call(X, Y, method = "simple", nObs = nrow(X)^0.5, min.dim = 1, - max.dim = 10, k, ...) -} -\arguments{ -\item{formula}{Formel for the regression model defining `X`, `Y`. -See: \code{\link{formula}}.} - -\item{data}{data.frame holding data for formula.} - -\item{method}{The different only differe in the used optimization. - All of them are Gradient based optimization on a Stiefel manifold. -\itemize{ - \item "simple" Simple reduction of stepsize. - \item "sgd" stocastic gradient decent. - \item TODO: further -}} - -\item{...}{Further parameters depending on the used method.} - -\item{X}{Data} - -\item{Y}{Responces} - -\item{nObs}{as described in the Paper.} - -\item{k}{guess for SDR dimension.} - -\item{nObs}{Like in the paper.} - -\item{...}{Method specific parameters.} -} -\description{ -Conditional Variance Estimator (CVE) is a novel sufficient dimension -reduction (SDR) method assuming a model -\deqn{Y \sim g(B'X) + \epsilon}{Y ~ g(B'X) + epsilon} -where B'X is a lower dimensional projection of the predictors. -} -\examples{ -library(CVE) - -# sample dataset -ds <- dataset("M5") - -# call ´cve´ with default method (aka "simple") -dr.simple <- cve(ds$Y ~ ds$X, k = ncol(ds$B)) -# plot optimization history (loss via iteration) -plot(dr.simple, main = "CVE M5 simple") - -# call ´cve´ with method "linesearch" using ´data.frame´ as data. -data <- data.frame(Y = ds$Y, X = ds$X) -# Note: ´Y, X´ are NOT defined, they are extracted from ´data´. -dr.linesearch <- cve(Y ~ ., data, method = "linesearch", k = ncol(ds$B)) -plot(dr.linesearch, main = "CVE M5 linesearch") - -} -\references{ -Fertl L., Bura E. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019 -} -\seealso{ -\code{\link{formula}}. For a complete parameters list (dependent on - the method) see \code{\link{cve_simple}}, \code{\link{cve_sgd}} -} diff --git a/CVE_R/man/cve.grid.search.Rd b/CVE_R/man/cve.grid.search.Rd deleted file mode 100644 index 2e7ad79..0000000 --- a/CVE_R/man/cve.grid.search.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gridSearch.R -\name{cve.grid.search} -\alias{cve.grid.search} -\title{Performs a grid search for parameters over a parameter grid.} -\usage{ -cve.grid.search(X, Y, k, args) -} -\description{ -Performs a grid search for parameters over a parameter grid. -} -\examples{ -args <- list( - h = c(0.05, 0.1, 0.2), - method = c("simple", "sgd"), - tau = c(0.5, 0.1, 0.01) -) -cve.grid.search(args) -} diff --git a/CVE_R/man/cve_linesearch.Rd b/CVE_R/man/cve_linesearch.Rd deleted file mode 100644 index 0413346..0000000 --- a/CVE_R/man/cve_linesearch.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/cve_linesearch.R -\name{cve_linesearch} -\alias{cve_linesearch} -\title{Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe -conditions.} -\usage{ -cve_linesearch(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 1, - tol = 0.001, rho1 = 0.1, rho2 = 0.9, slack = 0, epochs = 50L, - attempts = 10L, max.linesearch.iter = 10L, logger = NULL) -} -\description{ -Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe -conditions. -} -\keyword{internal} diff --git a/CVE_R/man/cve_sgd.Rd b/CVE_R/man/cve_sgd.Rd deleted file mode 100644 index fc00f7d..0000000 --- a/CVE_R/man/cve_sgd.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/cve_sgd.R -\name{cve_sgd} -\alias{cve_sgd} -\title{Simple implementation of the CVE method. 'Simple' means that this method is -a classic GD method unsing no further tricks.} -\usage{ -cve_sgd(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 0.01, - tol = 0.001, epochs = 50L, batch.size = 16L, attempts = 10L, - logger = NULL) -} -\description{ -Simple implementation of the CVE method. 'Simple' means that this method is -a classic GD method unsing no further tricks. -} -\keyword{internal} diff --git a/CVE_R/man/cve_simple.Rd b/CVE_R/man/cve_simple.Rd deleted file mode 100644 index 67d185f..0000000 --- a/CVE_R/man/cve_simple.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/cve_simple.R -\name{cve_simple} -\alias{cve_simple} -\title{Simple implementation of the CVE method. 'Simple' means that this method is -a classic GD method unsing no further tricks.} -\usage{ -cve_simple(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 1, - tol = 0.001, slack = 0, epochs = 50L, attempts = 10L, - logger = NULL) -} -\description{ -Simple implementation of the CVE method. 'Simple' means that this method is -a classic GD method unsing no further tricks. -} -\keyword{internal} diff --git a/CVE_R/man/dataset.Rd b/CVE_R/man/dataset.Rd deleted file mode 100644 index 8e65402..0000000 --- a/CVE_R/man/dataset.Rd +++ /dev/null @@ -1,64 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/datasets.R -\name{dataset} -\alias{dataset} -\title{Generates test datasets.} -\usage{ -dataset(name = "M1", n, B, p.mix = 0.3, lambda = 1) -} -\arguments{ -\item{name}{One of \code{"M1"}, \code{"M2"}, \code{"M3"}, \code{"M4"} or \code{"M5"}} - -\item{n}{nr samples} - -\item{p.mix}{Only for \code{"M4"}, see: below.} - -\item{lambda}{Only for \code{"M4"}, see: below.} - -\item{p}{Dim. of random variable \code{X}.} -} -\value{ -List with elements -\itemize{ - \item{X}{data} - \item{Y}{response} - \item{B}{Used dim-reduction matrix} - \item{name}{Name of the dataset (name parameter)} -} -} -\description{ -Provides sample datasets. There are 5 different datasets named -M1, M2, M3, M4 and M5 described in the paper references below. -The general model is given by: -\deqn{Y ~ g(B'X) + \epsilon} -} -\section{M1}{ - -The data follows \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} for a subspace -dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points. -The link function \eqn{g} is given as -\deqn{g(x) = \frac{x_1}{0.5 + (x_2 + 1.5)^2} + 0.5\epsilon}{g(x) = x_1 / (0.5 + (x_2 + 1.5)^2) + 0.5 epsilon} -} - -\section{M2}{ - -\eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} with \eqn{k = 2} with a default of \eqn{n = 200} data points. -The link function \eqn{g} is given as -\deqn{g(x) = x_1 x_2^2 + 0.5\epsilon}{g(x) = x_1 x_2^2 + 0.5 epsilon} -} - -\section{M3}{ - -TODO: -} - -\section{M4}{ - -TODO: -} - -\section{M5}{ - -TODO: -} - diff --git a/CVE_R/man/elem.pairs.Rd b/CVE_R/man/elem.pairs.Rd deleted file mode 100644 index 2d36c5d..0000000 --- a/CVE_R/man/elem.pairs.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/util.R -\name{elem.pairs} -\alias{elem.pairs} -\title{Creates a (numeric) matrix where each column contains -an element to element matching.} -\usage{ -elem.pairs(elements) -} -\arguments{ -\item{elements}{numeric vector of elements to match} -} -\value{ -matrix of size `(2, n * (n - 1) / 2)` for a argument of lenght `n`. -} -\description{ -Creates a (numeric) matrix where each column contains -an element to element matching. -} -\examples{ - elem.pairs(seq.int(2, 5)) -} -\keyword{internal} diff --git a/CVE_R/man/estimate.bandwidth.Rd b/CVE_R/man/estimate.bandwidth.Rd deleted file mode 100644 index 4b2ae94..0000000 --- a/CVE_R/man/estimate.bandwidth.Rd +++ /dev/null @@ -1,28 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/estimateBandwidth.R -\name{estimate.bandwidth} -\alias{estimate.bandwidth} -\title{Estimated bandwidth for CVE.} -\usage{ -estimate.bandwidth(X, k, nObs) -} -\arguments{ -\item{X}{data matrix of dimension (n x p) with n data points X_i of dimension -q. Therefor each row represents a datapoint of dimension p.} - -\item{k}{Guess for rank(B).} - -\item{nObs}{Ether numeric of a function. If specified as numeric value -its used in the computation of the bandwidth directly. If its a function -`nObs` is evaluated as \code{nObs(nrow(x))}. The default behaviou if not -supplied at all is to use \code{nObs <- nrow(x)^0.5}.} -} -\description{ -Estimates a propper bandwidth \code{h} according -\deqn{% -h = \chi_{p-q}^{-1}\left(\frac{nObs - 1}{n-1}\right)\frac{2 tr(\Sigma)}{p}}{% -h = qchisq( (nObs - 1)/(n - 1), p - q ) 2 tr(Sigma) / p} -} -\seealso{ -[\code{\link{qchisq}}] -} diff --git a/CVE_R/man/grad.Rd b/CVE_R/man/grad.Rd deleted file mode 100644 index d890e93..0000000 --- a/CVE_R/man/grad.Rd +++ /dev/null @@ -1,31 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gradient.R -\name{grad} -\alias{grad} -\title{Compute get gradient of `L(V)` given a dataset `X`.} -\usage{ -grad(X, Y, V, h, loss.out = FALSE, loss.only = FALSE, - persistent = FALSE) -} -\arguments{ -\item{X}{Data matrix.} - -\item{Y}{Responce.} - -\item{V}{Position to compute the gradient at, aka point on Stiefl manifold.} - -\item{h}{Bandwidth} - -\item{loss.out}{Iff \code{TRUE} loss will be written to parent environment.} - -\item{loss.only}{Boolean to only compute the loss, of \code{TRUE} a single -value loss is returned and \code{envir} is ignored.} - -\item{persistent}{Determines if data indices and dependent calculations shall -be reused from the parent environment. ATTENTION: Do NOT set this flag, only -intended for internal usage by carefully aligned functions!} -} -\description{ -Compute get gradient of `L(V)` given a dataset `X`. -} -\keyword{internal} diff --git a/CVE_R/man/null.Rd b/CVE_R/man/null.Rd deleted file mode 100644 index 498b159..0000000 --- a/CVE_R/man/null.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/util.R -\name{null} -\alias{null} -\title{Null space basis of given matrix `V`} -\usage{ -null(V) -} -\arguments{ -\item{V}{`(p, q)` matrix} -} -\value{ -Semi-orthogonal `(p, p - q)` matrix spaning the null space of `V`. -} -\description{ -Null space basis of given matrix `V` -} -\keyword{internal} diff --git a/CVE_R/man/plot.cve.Rd b/CVE_R/man/plot.cve.Rd deleted file mode 100644 index 35f6921..0000000 --- a/CVE_R/man/plot.cve.Rd +++ /dev/null @@ -1,28 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/CVE.R -\name{plot.cve} -\alias{plot.cve} -\title{Ploting helper for objects of class \code{cve}.} -\usage{ -## S3 method for class 'cve' -plot(x, content = "history", ...) -} -\arguments{ -\item{x}{Object of class \code{cve} (result of [cve()]).} - -\item{...}{Pass through parameters to [plot()] and [lines()]} - -\item{content}{Specifies what to plot: -\itemize{ - \item "history" Plots the loss history from stiefel optimization - (default). - \item ... TODO: add (if there are any) -}} -} -\description{ -Ploting helper for objects of class \code{cve}. -} -\seealso{ -see \code{\link{par}} for graphical parameters to pass through - as well as \code{\link{plot}} for standard plot utility. -} diff --git a/CVE_R/man/rStiefl.Rd b/CVE_R/man/rStiefl.Rd deleted file mode 100644 index 31c40bb..0000000 --- a/CVE_R/man/rStiefl.Rd +++ /dev/null @@ -1,22 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/util.R -\name{rStiefl} -\alias{rStiefl} -\title{Samples uniform from the Stiefel Manifold} -\usage{ -rStiefl(p, q) -} -\arguments{ -\item{p}{row dim.} - -\item{q}{col dim.} -} -\value{ -`(p, q)` semi-orthogonal matrix -} -\description{ -Samples uniform from the Stiefel Manifold -} -\examples{ - V <- rStiefel(6, 4) -} diff --git a/CVE_R/man/summary.cve.Rd b/CVE_R/man/summary.cve.Rd deleted file mode 100644 index 1151f98..0000000 --- a/CVE_R/man/summary.cve.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/CVE.R -\name{summary.cve} -\alias{summary.cve} -\title{Prints a summary of a \code{cve} result.} -\usage{ -\method{summary}{cve}(object, ...) -} -\arguments{ -\item{object}{Instance of 'cve' as return of \code{cve}.} -} -\description{ -Prints a summary of a \code{cve} result. -} diff --git a/benchmark/benchmark.R b/benchmark/benchmark.R deleted file mode 100644 index 66777c8..0000000 --- a/benchmark/benchmark.R +++ /dev/null @@ -1,479 +0,0 @@ -library(microbenchmark) - -dyn.load("benchmark.so") - - -## rowSum* .call -------------------------------------------------------------- -rowSums.c <- function(M) { - stopifnot( - is.matrix(M), - is.numeric(M) - ) - if (!is.double(M)) { - M <- matrix(as.double(M), nrow = nrow(M)) - } - - .Call('R_rowSums', PACKAGE = 'benchmark', M) -} -rowSumsV2.c <- function(M) { - stopifnot( - is.matrix(M), - is.numeric(M) - ) - if (!is.double(M)) { - M <- matrix(as.double(M), nrow = nrow(M)) - } - - .Call('R_rowSumsV2', PACKAGE = 'benchmark', M) -} -rowSumsV3.c <- function(M) { - stopifnot( - is.matrix(M), - is.numeric(M) - ) - if (!is.double(M)) { - M <- matrix(as.double(M), nrow = nrow(M)) - } - - .Call('R_rowSumsV3', PACKAGE = 'benchmark', M) -} -colSums.c <- function(M) { - stopifnot( - is.matrix(M), - is.numeric(M) - ) - if (!is.double(M)) { - M <- matrix(as.double(M), nrow = nrow(M)) - } - - .Call('R_colSums', PACKAGE = 'benchmark', M) -} -rowSquareSums.c <- function(M) { - stopifnot( - is.matrix(M), - is.numeric(M) - ) - if (!is.double(M)) { - M <- matrix(as.double(M), nrow = nrow(M)) - } - - .Call('R_rowSquareSums', PACKAGE = 'benchmark', M) -} -rowSumsSymVec.c <- function(vecA, nrow, diag = 0.0) { - stopifnot( - is.vector(vecA), - is.numeric(vecA), - is.numeric(diag), - nrow * (nrow - 1) == length(vecA) * 2 - ) - if (!is.double(vecA)) { - vecA <- as.double(vecA) - } - .Call('R_rowSumsSymVec', PACKAGE = 'benchmark', - vecA, as.integer(nrow), as.double(diag)) -} -rowSweep.c <- function(A, v, op = '-') { - stopifnot( - is.matrix(A), - is.numeric(v) - ) - if (!is.double(A)) { - A <- matrix(as.double(A), nrow = nrow(A)) - } - if (!is.vector(v) || !is.double(v)) { - v <- as.double(v) - } - stopifnot( - nrow(A) == length(v), - op %in% c('+', '-', '*', '/') - ) - - .Call('R_rowSweep', PACKAGE = 'benchmark', A, v, op) -} - -## row*, col* tests ------------------------------------------------------------ -n <- 3000 -M <- matrix(runif(n * 12), n, 12) -stopifnot( - all.equal(rowSums(M^2), rowSums.c(M^2)), - all.equal(colSums(M), colSums.c(M)), - all.equal(rowSums(M), rowSumsV2.c(M)), - all.equal(rowSums(M), rowSumsV3.c(M)) -) -microbenchmark( - rowSums = rowSums(M), - rowSums.c = rowSums.c(M), - rowSumsV2.c = rowSumsV2.c(M), - rowSumsV3.c = rowSumsV3.c(M) -) -microbenchmark( - rowSums = rowSums(M^2), - rowSums.c = rowSums.c(M^2), - rowSqSums.c = rowSquareSums.c(M) -) -microbenchmark( - colSums = colSums(M), - colSums.c = colSums.c(M) -) - -sum = rowSums(M) -stopifnot(all.equal( - sweep(M, 1, sum, FUN = `/`), - rowSweep.c(M, sum, '/') # Col-Normalize) -), all.equal( - sweep(M, 1, sum, FUN = `/`), - M / sum -)) -microbenchmark( - sweep = sweep(M, 1, sum, FUN = `/`), - M / sum, - rowSweep.c = rowSweep.c(M, sum, '/') # Col-Normalize) -) - -# Create symmetric matrix with constant diagonal entries. -nrow <- 200 -diag <- 1.0 -Sym <- tcrossprod(runif(nrow)) -diag(Sym) <- diag -# Get vectorized lower triangular part of `Sym` matrix. -SymVec <- Sym[lower.tri(Sym)] -stopifnot(all.equal( - rowSums(Sym), - rowSumsSymVec.c(SymVec, nrow, diag) -)) -microbenchmark( - rowSums = rowSums(Sym), - rowSums.c = rowSums.c(Sym), - rowSumsSymVec.c = rowSumsSymVec.c(SymVec, nrow, diag) -) - -## Matrix-Matrix opperation .call --------------------------------------------- -transpose.c <- function(A) { - stopifnot( - is.matrix(A), is.numeric(A) - ) - if (!is.double(A)) { - A <- matrix(as.double(A), nrow(A), ncol(A)) - } - - .Call('R_transpose', PACKAGE = 'benchmark', A) -} - -sympMV.c <- function(vecA, x) { - stopifnot( - is.vector(vecA), is.numeric(vecA), - is.vector(x), is.numeric(x), - length(x) * (length(x) + 1) == 2 * length(vecA) - ) - if (!is.double(vecA)) { - vecA <- as.double(vecA) - } - if (!is.double(x)) { - x <- as.double(x) - } - - .Call('R_sympMV', PACKAGE = 'benchmark', vecA, x) -} - -matrixprod.c <- function(A, B) { - stopifnot( - is.matrix(A), is.numeric(A), - is.matrix(B), is.numeric(B), - ncol(A) == nrow(B) - ) - if (!is.double(A)) { - A <- matrix(as.double(A), nrow = nrow(A)) - } - if (!is.double(B)) { - B <- matrix(as.double(B), nrow = nrow(B)) - } - - .Call('R_matrixprod', PACKAGE = 'benchmark', A, B) -} -crossprod.c <- function(A, B) { - stopifnot( - is.matrix(A), is.numeric(A), - is.matrix(B), is.numeric(B), - nrow(A) == nrow(B) - ) - if (!is.double(A)) { - A <- matrix(as.double(A), nrow = nrow(A)) - } - if (!is.double(B)) { - B <- matrix(as.double(B), nrow = nrow(B)) - } - - .Call('R_crossprod', PACKAGE = 'benchmark', A, B) -} -kronecker.c <- function(A, B, op = '*') { - stopifnot( - is.matrix(A), is.numeric(A), - is.matrix(B), is.numeric(B), - is.character(op), op %in% c('*', '+', '/', '-') - ) - if (!is.double(A)) { - A <- matrix(as.double(A), nrow = nrow(A)) - } - if (!is.double(B)) { - B <- matrix(as.double(B), nrow = nrow(B)) - } - - .Call('R_kronecker', PACKAGE = 'benchmark', A, B, op) -} -skewSymRank2k.c <- function(A, B, alpha = 1, beta = 0) { - stopifnot( - is.matrix(A), is.numeric(A), - is.matrix(B), is.numeric(B), - all(dim(A) == dim(B)), - is.numeric(alpha), length(alpha) == 1L, - is.numeric(beta), length(beta) == 1L - ) - if (!is.double(A)) { - A <- matrix(as.double(A), nrow = nrow(A)) - } - if (!is.double(B)) { - B <- matrix(as.double(B), nrow = nrow(B)) - } - - .Call('R_skewSymRank2k', PACKAGE = 'benchmark', A, B, - as.double(alpha), as.double(beta)) -} - -## Matrix-Matrix opperation tests --------------------------------------------- -n <- 200 -k <- 100 -m <- 300 - -A <- matrix(runif(n * k), n, k) -B <- matrix(runif(k * m), k, m) -stopifnot( - all.equal(t(A), transpose.c(A)) -) -microbenchmark( - t(A), - transpose.c(A) -) - -Sym <- tcrossprod(runif(n)) -vecSym <- Sym[lower.tri(Sym, diag = T)] -x <- runif(n) -stopifnot(all.equal( - as.double(Sym %*% x), - sympMV.c(vecSym, x) -)) -microbenchmark( - Sym %*% x, - sympMV.c = sympMV.c(vecSym, x) -) - -stopifnot( - all.equal(A %*% B, matrixprod.c(A, B)) -) -microbenchmark( - "%*%" = A %*% B, - matrixprod.c = matrixprod.c(A, B) -) - -A <- matrix(runif(k * n), k, n) -B <- matrix(runif(k * m), k, m) -stopifnot( - all.equal(crossprod(A, B), crossprod.c(A, B)) -) -microbenchmark( - crossprod = crossprod(A, B), - crossprod.c = crossprod.c(A, B) -) - -n <- 100L -m <- 12L -p <- 11L -q <- 10L -A <- matrix(runif(n * m), n, m) -B <- matrix(runif(p * q), p, q) - -stopifnot(all.equal( - kronecker(A, B), - kronecker.c(A, B) -)) -microbenchmark( - kronecker = kronecker(A, B), - kronecker.c = kronecker.c(A, B) -) - -n <- 12 -k <- 11 -A <- matrix(runif(n * k), n, k) -B <- matrix(runif(n * k), n, k) -stopifnot(all.equal( - A %*% t(B) - B %*% t(A), skewSymRank2k.c(A, B) -)) -microbenchmark( - A %*% t(B) - B %*% t(A), - skewSymRank2k.c(A, B) -) - -## Orthogonal projection onto null space .Call -------------------------------- -nullProj.c <- function(B) { - stopifnot( - is.matrix(B), is.numeric(B) - ) - if (!is.double(B)) { - B <- matrix(as.double(B), nrow = nrow(B)) - } - - .Call('R_nullProj', PACKAGE = 'benchmark', B) -} -## Orthogonal projection onto null space tests -------------------------------- -p <- 12 -q <- 10 -V <- qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q))) - -# Projection matrix onto `span(V)` -Q <- diag(1, p) - tcrossprod(V, V) -stopifnot( - all.equal(Q, nullProj.c(V)) -) -microbenchmark( - nullProj = diag(1, p) - tcrossprod(V, V), - nullProj.c = nullProj.c(V) -) - -# ## Kronecker optimizations ---------------------------------------------------- -# library(microbenchmark) - -# dist.1 <- function(X_diff, Q) { -# rowSums((X_diff %*% Q)^2) -# } -# dist.2 <- function(X, Q) { -# ones <- rep(1, nrow(X)) -# proj <- X %*% Q -# rowSums((kronecker(proj, ones) - kronecker(ones, proj))^2) -# } - -# n <- 400L -# p <- 12L -# k <- 2L -# q <- p - k - -# X <- matrix(rnorm(n * p), n, p) -# Q <- diag(1, p) - tcrossprod(rnorm(p)) -# ones <- rep(1, n) -# X_diff <- kronecker(X, ones) - kronecker(ones, X) - -# stopifnot(all.equal(dist.1(X_diff, Q), dist.2(X, Q))) - -# microbenchmark( -# dist.1(X_diff, Q), -# dist.2(X, Q), -# times = 10L -# ) -# # if (!persistent) { -# # pair.index <- elem.pairs(seq(n)) -# # i <- pair.index[, 1] # `i` indices of `(i, j)` pairs -# # j <- pair.index[, 2] # `j` indices of `(i, j)` pairs -# # lower <- ((i - 1) * n) + j -# # upper <- ((j - 1) * n) + i -# # X_diff <- X[i, , drop = F] - X[j, , drop = F] -# # } - -# # # Projection matrix onto `span(V)` -# # Q <- diag(1, p) - tcrossprod(V, V) -# # # Vectorized distance matrix `D`. -# # vecD <- rowSums((X_diff %*% Q)^2) - - - -# ## WIP for gradient. ---------------------------------------------------------- - -grad.c <- function(X, X_diff, Y, V, h) { - stopifnot( - is.matrix(X), is.double(X), - is.matrix(X_diff), is.double(X_diff), - ncol(X_diff) == ncol(X), nrow(X_diff) == nrow(X) * (nrow(X) - 1) / 2, - is.vector(Y) || (is.matrix(Y) && pmin(dim(Y)) == 1L), is.double(Y), - length(Y) == nrow(X), - is.matrix(V), is.double(V), - nrow(V) == ncol(X), - is.vector(h), is.numeric(h), length(h) == 1 - ) - - .Call('R_grad', PACKAGE = 'benchmark', - X, X_diff, as.double(Y), V, as.double(h)); -} - -elem.pairs <- function(elements) { - # Number of elements to match. - n <- length(elements) - # Create all combinations. - pairs <- rbind(rep(elements, each=n), rep(elements, n)) - # Select unique combinations without self interaction. - return(pairs[, pairs[1, ] < pairs[2, ]]) -} -grad <- function(X, Y, V, h, persistent = TRUE) { - n <- nrow(X) - p <- ncol(X) - - if (!persistent) { - pair.index <- elem.pairs(seq(n)) - i <- pair.index[, 1] # `i` indices of `(i, j)` pairs - j <- pair.index[, 2] # `j` indices of `(i, j)` pairs - lower <- ((i - 1) * n) + j - upper <- ((j - 1) * n) + i - X_diff <- X[i, , drop = F] - X[j, , drop = F] - } - - # Projection matrix onto `span(V)` - Q <- diag(1, p) - tcrossprod(V, V) - # Vectorized distance matrix `D`. - vecD <- rowSums((X_diff %*% Q)^2) - - # Create Kernel matrix (aka. apply kernel to distances) - K <- matrix(1, n, n) # `exp(0) == 1` - K[lower] <- exp((-0.5 / h) * vecD^2) # Set lower tri. part - K[upper] <- t(K)[upper] # Mirror lower tri. to upper - - # Weighted `Y` momentums - colSumsK <- colSums(K) - y1 <- (K %*% Y) / colSumsK - y2 <- (K %*% Y^2) / colSumsK - # Per example loss `L(V, X_i)` - L <- y2 - y1^2 - - # Compute scaling vector `vecS` for `X_diff`. - tmp <- kronecker(matrix(y1, n, 1), matrix(Y, 1, n), `-`)^2 - tmp <- as.vector(L) - tmp - tmp <- tmp * K / colSumsK - vecS <- (tmp + t(tmp))[lower] * vecD - - G <- crossprod(X_diff, X_diff * vecS) %*% V - G <- (-2 / (n * h^2)) * G - return(G) -} -rStiefel <- function(p, q) { - return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q)))) -} - -n <- 200 -p <- 12 -q <- 10 - -X <- matrix(runif(n * p), n, p) -Y <- runif(n) -V <- rStiefel(p, q) -h <- 0.1 - -pair.index <- elem.pairs(seq(n)) -i <- pair.index[1, ] # `i` indices of `(i, j)` pairs -j <- pair.index[2, ] # `j` indices of `(i, j)` pairs -lower <- ((i - 1) * n) + j -upper <- ((j - 1) * n) + i -X_diff <- X[i, , drop = F] - X[j, , drop = F] - -stopifnot(all.equal( - grad(X, Y, V, h), - grad.c(X, X_diff, Y, V, h) -)) -microbenchmark( - grad = grad(X, Y, V, h), - grad.c = grad.c(X, X_diff, Y, V, h) -) diff --git a/benchmark/benchmark.c b/benchmark/benchmark.c deleted file mode 100644 index 785ab2f..0000000 --- a/benchmark/benchmark.c +++ /dev/null @@ -1,510 +0,0 @@ -#include -#include // for `mem*` functions. - -#include -#include -#include -// #include - -#include "benchmark.h" - -void rowSums(const double *A, - const int nrow, const int ncol, - double *sum) { - int i, j, block_size, block_size_i; - const double *A_block = A; - const double *A_end = A + nrow * ncol; - - if (nrow > 508) { - block_size = 508; - } else { - block_size = nrow; - } - - // Iterate `(block_size_i, ncol)` submatrix blocks. - for (i = 0; i < nrow; i += block_size_i) { - // Reset `A` to new block beginning. - A = A_block; - // Take block size of eveything left and reduce to max size. - block_size_i = nrow - i; - if (block_size_i > block_size) { - block_size_i = block_size; - } - // Copy blocks first column. - for (j = 0; j < block_size_i; j += 4) { - sum[j] = A[j]; - sum[j + 1] = A[j + 1]; - sum[j + 2] = A[j + 2]; - sum[j + 3] = A[j + 3]; - } - for (; j < block_size_i; ++j) { - sum[j] = A[j]; - } - // Sum following columns to the first one. - for (A += nrow; A < A_end; A += nrow) { - for (j = 0; j < block_size_i; j += 4) { - sum[j] += A[j]; - sum[j + 1] += A[j + 1]; - sum[j + 2] += A[j + 2]; - sum[j + 3] += A[j + 3]; - } - for (; j < block_size_i; ++j) { - sum[j] += A[j]; - } - } - // Step one block forth. - A_block += block_size_i; - sum += block_size_i; - } -} - -void rowSumsV2(const double *A, - const int nrow, const int ncol, - double *sum) { - int i, j, block_size, block_size_i; - const double *A_block = A; - const double *A_end = A + nrow * ncol; - - if (nrow > CVE_MEM_CHUNK_SIZE) { - block_size = CVE_MEM_CHUNK_SIZE; - } else { - block_size = nrow; - } - - // Iterate `(block_size_i, ncol)` submatrix blocks. - for (i = 0; i < nrow; i += block_size_i) { - // Reset `A` to new block beginning. - A = A_block; - // Take block size of eveything left and reduce to max size. - block_size_i = nrow - i; - if (block_size_i > block_size) { - block_size_i = block_size; - } - // Compute first blocks column, - for (j = 0; j < block_size_i; ++j) { - sum[j] = A[j]; - } - // and sum the following columns to the first one. - for (A += nrow; A < A_end; A += nrow) { - for (j = 0; j < block_size_i; ++j) { - sum[j] += A[j]; - } - } - // Step one block forth. - A_block += block_size_i; - sum += block_size_i; - } -} -void rowSumsV3(const double *A, - const int nrow, const int ncol, - double *sum) { - int i, onei = 1; - double* ones = (double*)malloc(ncol * sizeof(double)); - const double one = 1.0; - const double zero = 0.0; - - for (i = 0; i < ncol; ++i) { - ones[i] = 1.0; - } - - matrixprod(A, nrow, ncol, ones, ncol, 1, sum); - free(ones); -} - -void colSums(const double *A, const int nrow, const int ncol, - double *sums) { - int i, j, nrowb = 4 * (nrow / 4); // 4 dividable nrow block, biggest 4*k <= nrow. - double sum; - - for (j = 0; j < ncol; ++j) { - sum = 0.0; - for (i = 0; i < nrowb; i += 4) { - sum += A[i] - + A[i + 1] - + A[i + 2] - + A[i + 3]; - } - for (; i < nrow; ++i) { - sum += A[i]; - } - *(sums++) = sum; - A += nrow; - } -} - -void rowSquareSums(const double *A, - const int nrow, const int ncol, - double *sum) { - int i, j, block_size, block_size_i; - const double *A_block = A; - const double *A_end = A + nrow * ncol; - - if (nrow > 508) { - block_size = 508; - } else { - block_size = nrow; - } - - // Iterate `(block_size_i, ncol)` submatrix blocks. - for (i = 0; i < nrow; i += block_size_i) { - // Reset `A` to new block beginning. - A = A_block; - // Take block size of eveything left and reduce to max size. - block_size_i = nrow - i; - if (block_size_i > block_size) { // TODO: contains BUG!!! floor last one !!! - block_size_i = block_size; - } /// ... - // TODO: - // Copy blocks first column. - for (j = 0; j < block_size_i; j += 4) { - sum[j] = A[j] * A[j]; - sum[j + 1] = A[j + 1] * A[j + 1]; - sum[j + 2] = A[j + 2] * A[j + 2]; - sum[j + 3] = A[j + 3] * A[j + 3]; - } - for (; j < block_size_i; ++j) { - sum[j] = A[j] * A[j]; - } - // Sum following columns to the first one. - for (A += nrow; A < A_end; A += nrow) { - for (j = 0; j < block_size_i; j += 4) { - sum[j] += A[j] * A[j]; - sum[j + 1] += A[j + 1] * A[j + 1]; - sum[j + 2] += A[j + 2] * A[j + 2]; - sum[j + 3] += A[j + 3] * A[j + 3]; - } - for (; j < block_size_i; ++j) { - sum[j] += A[j] * A[j]; - } - } - // Step one block forth. - A_block += block_size_i; - sum += block_size_i; - } -} - -void rowSumsSymVec(const double *Avec, const int nrow, - const double diag, - double *sum) { - int i, j; - - if (diag == 0.0) { - memset(sum, 0, nrow * sizeof(double)); - } else { - for (i = 0; i < nrow; ++i) { - sum[i] = diag; - } - } - - for (j = 0; j < nrow; ++j) { - for (i = j + 1; i < nrow; ++i, ++Avec) { - sum[j] += *Avec; - sum[i] += *Avec; - } - } -} - -#define ROW_SWEEP_ALG(op) \ - /* Iterate `(block_size_i, ncol)` submatrix blocks. */ \ - for (i = 0; i < nrow; i += block_size_i) { \ - /* Set `A` and `C` to block beginning. */ \ - A = A_block; \ - C = C_block; \ - /* Get current block's row size. */ \ - block_size_i = nrow - i; \ - if (block_size_i > block_size) { \ - block_size_i = block_size; \ - } \ - /* Perform element wise operation for block. */ \ - for (; A < A_end; A += nrow, C += nrow) { \ - for (j = 0; j < block_size_i; ++j) { \ - C[j] = (A[j]) op (v[j]); \ - } \ - } \ - /* Step one block forth. */ \ - A_block += block_size_i; \ - C_block += block_size_i; \ - v += block_size_i; \ - } - -/* C[, j] = A[, j] * v for each j = 1 to ncol */ -void rowSweep(const double *A, const int nrow, const int ncol, - const char* op, - const double *v, // vector of length nrow - double *C) { - int i, j, block_size, block_size_i; - const double *A_block = A; - double *C_block = C; - const double *A_end = A + nrow * ncol; - - if (nrow > CVE_MEM_CHUNK_SMALL) { // small because 3 vectors in cache - block_size = CVE_MEM_CHUNK_SMALL; - } else { - block_size = nrow; - } - - if (*op == '+') { - ROW_SWEEP_ALG(+) - } else if (*op == '-') { - ROW_SWEEP_ALG(-) - } else if (*op == '*') { - ROW_SWEEP_ALG(*) - } else if (*op == '/') { - ROW_SWEEP_ALG(/) - } else { - error("Got unknown 'op' (opperation) argument."); - } -} - -void transpose(const double *A, const int nrow, const int ncol, double* T) { - int i, j, len = nrow * ncol; - - // Filling column-wise and accessing row-wise. - for (i = 0, j = 0; i < len; ++i, j += nrow) { - if (j >= len) { - j -= len - 1; - } - T[i] = A[j]; - } -} - -// Symmetric Packed matrix vector product. -// Computes -// y <- Ax -// where A is supplied as packed lower triangular part of a symmetric -// matrix A. Meaning that `vecA` is `vec_ltri(A)`. -void sympMV(const double* vecA, const int nrow, const double* x, double* y) { - double one = 1.0; - double zero = 0.0; - int onei = 1; - - F77_NAME(dspmv)("L", &nrow, &one, vecA, x, &onei, &zero, y, &onei); -} - -void matrixprod(const double *A, const int nrowA, const int ncolA, - const double *B, const int nrowB, const int ncolB, - double *C) { - const double one = 1.0; - const double zero = 0.0; - - // DGEMM with parameterization: - // C <- A %*% B - F77_NAME(dgemm)("N", "N", &nrowA, &ncolB, &ncolA, - &one, A, &nrowA, B, &nrowB, - &zero, C, &nrowA); -} - -void crossprod(const double *A, const int nrowA, const int ncolA, - const double *B, const int nrowB, const int ncolB, - double *C) { - const double one = 1.0; - const double zero = 0.0; - - // DGEMM with parameterization: - // C <- t(A) %*% B - F77_NAME(dgemm)("T", "N", &ncolA, &ncolB, &nrowA, - &one, A, &nrowA, B, &nrowB, - &zero, C, &ncolA); -} - -#define KRONECKER_ALG(op) \ - for (j = 0; j < ncolA; ++j) { \ - for (l = 0; l < ncolB; ++l) { \ - colB = B + (l * nrowB); \ - for (i = 0; i < nrowA; ++i) { \ - for (k = 0; k < nrowB; ++k) { \ - *(C++) = (A[i]) op (colB[k]); \ - } \ - } \ - } \ - A += nrowA; \ - } - -void kronecker(const double * restrict A, const int nrowA, const int ncolA, - const double * restrict B, const int nrowB, const int ncolB, - const char* op, - double * restrict C) { - int i, j, k, l; - const double *colB; - - if (*op == '+') { - KRONECKER_ALG(+) - } else if (*op == '-') { - KRONECKER_ALG(-) - } else if (*op == '*') { - KRONECKER_ALG(*) - } else if (*op == '/') { - KRONECKER_ALG(/) - } else { - error("Got unknown 'op' (opperation) argument."); - } -} - -void nullProj(const double *B, const int nrowB, const int ncolB, - double *Q) { - const double minusOne = -1.0; - const double one = 1.0; - - // Initialize Q as identity matrix. - memset(Q, 0, sizeof(double) * nrowB * nrowB); - double *Q_diag, *Q_end = Q + nrowB * nrowB; - for (Q_diag = Q; Q_diag < Q_end; Q_diag += nrowB + 1) { - *Q_diag = 1.0; - } - - // DGEMM with parameterization: - // Q <- (-1.0 * B %*% t(B)) + Q - F77_NAME(dgemm)("N", "T", &nrowB, &nrowB, &ncolB, - &minusOne, B, &nrowB, B, &nrowB, - &one, Q, &nrowB); -} - -void rangePairs(const int from, const int to, int *pairs) { - int i, j; - for (i = from; i < to; ++i) { - for (j = i + 1; j < to; ++j) { - pairs[0] = i; - pairs[1] = j; - pairs += 2; - } - } -} - -// A dence skwe-symmetric rank 2 update. -// Perform the update -// C := alpha (A * B^T - B * A^T) + beta C -void skewSymRank2k(const int nrow, const int ncol, - double alpha, const double *A, const double *B, - double beta, - double *C) { - F77_NAME(dgemm)("N", "T", - &nrow, &nrow, &ncol, - &alpha, A, &nrow, B, &nrow, - &beta, C, &nrow); - alpha *= -1.0; - beta = 1.0; - F77_NAME(dgemm)("N", "T", - &nrow, &nrow, &ncol, - &alpha, B, &nrow, A, &nrow, - &beta, C, &nrow); -} -// TODO: clarify -static inline double gaussKernel(const double x, const double scale) { - return exp(scale * x * x); -} - -// TODO: mutch potential for optimization!!! -static void weightedYandLoss(const int n, - const double *Y, - const double *vecD, - const double *vecW, - const double *colSums, - double *y1, double *L, double *vecS, - double *loss) { - int i, j, k, N = n * (n - 1) / 2; - double l; - - for (i = 0; i < n; ++i) { - y1[i] = Y[i]; - L[i] = Y[i] * Y[i]; - } - - for (k = j = 0; j < n; ++j) { - for (i = j + 1; i < n; ++i, ++k) { - y1[i] += Y[j] * vecW[k]; - y1[j] += Y[i] * vecW[k]; - L[i] += Y[j] * Y[j] * vecW[k]; - L[j] += Y[i] * Y[i] * vecW[k]; - } - } - - for (i = 0; i < n; ++i) { - y1[i] /= colSums[i]; - L[i] /= colSums[i]; - } - - l = 0.0; - for (i = 0; i < n; ++i) { - l += (L[i] -= y1[i] * y1[i]); - } - *loss = l / (double)n; - - for (k = j = 0; j < n; ++j) { - for (i = j + 1; i < n; ++i, ++k) { - l = Y[j] - y1[i]; - vecS[k] = (L[i] - (l * l)) / colSums[i]; - l = Y[i] - y1[j]; - vecS[k] += (L[j] - (l * l)) / colSums[j]; - } - } - - for (k = 0; k < N; ++k) { - vecS[k] *= vecW[k] * vecD[k]; - } -} - -void grad(const int n, const int p, const int q, - const double *X, - const double *X_diff, - const double *Y, - const double *V, - const double h, - double *G, double *loss) { - // Number of X_i to X_j not trivial pairs. - int i, N = (n * (n - 1)) / 2; - double scale = -0.5 / h; - - if (X_diff == (void*)0) { - // TODO: ... - } - - // Allocate and compute projection matrix `Q = I_p - V * V^T` - double *Q = (double*)malloc(p * p * sizeof(double)); - nullProj(V, p, q, Q); - - // allocate and compute vectorized distance matrix with a temporary - // projection of `X_diff`. - double *vecD = (double*)malloc(N * sizeof(double)); - double *X_proj; - if (p < 5) { // TODO: refine that! - X_proj = (double*)malloc(N * 5 * sizeof(double)); - } else { - X_proj = (double*)malloc(N * p * sizeof(double)); - } - matrixprod(X_diff, N, p, Q, p, p, X_proj); - rowSquareSums(X_proj, N, p, vecD); - - // Apply kernel to distence vector for weights computation. - double *vecK = X_proj; // reuse memory area, no longer needed. - for (i = 0; i < N; ++i) { - vecK[i] = gaussKernel(vecD[i], scale); - } - double *colSums = X_proj + N; // still allocated! - rowSumsSymVec(vecK, n, 1.0, colSums); // rowSums = colSums cause Sym - - // compute weighted responces of first end second momontum, aka y1, y2. - double *y1 = X_proj + N + n; - double *L = X_proj + N + (2 * n); - // Allocate X_diff scaling vector `vecS`, not in `X_proj` mem area because - // used symultanious to X_proj in final gradient computation. - double *vecS = (double*)malloc(N * sizeof(double)); - weightedYandLoss(n, Y, vecD, vecK, colSums, y1, L, vecS, loss); - - // compute the gradient using X_proj for intermidiate scaled X_diff. - rowSweep(X_diff, N, p, "*", vecS, X_proj); - // reuse Q which has the required dim (p, p). - crossprod(X_diff, N, p, X_proj, N, p, Q); - // Product with V - matrixprod(Q, p, p, V, p, q, G); - // And final scaling (TODO: move into matrixprod!) - scale = -2.0 / (((double)n) * h * h); - N = p * q; - for (i = 0; i < N; ++i) { - G[i] *= scale; - } - - free(vecS); - free(X_proj); - free(vecD); - free(Q); -} diff --git a/benchmark/benchmark.h b/benchmark/benchmark.h deleted file mode 100644 index 4e65ea9..0000000 --- a/benchmark/benchmark.h +++ /dev/null @@ -1,219 +0,0 @@ -#ifndef CVE_INCLUDE_GUARD_ -#define CVE_INCLUDE_GUARD_ - -#include - -#define CVE_MEM_CHUNK_SMALL 1016 -#define CVE_MEM_CHUNK_SIZE 2032 - -void rowSums(const double *A, - const int nrow, const int ncol, - double *sum); -SEXP R_rowSums(SEXP A) { - SEXP sums = PROTECT(allocVector(REALSXP, nrows(A))); - - rowSums(REAL(A), nrows(A), ncols(A), REAL(sums)); - - UNPROTECT(1); - return sums; -} -void rowSumsV2(const double *A, - const int nrow, const int ncol, - double *sum); -SEXP R_rowSumsV2(SEXP A) { - SEXP sums = PROTECT(allocVector(REALSXP, nrows(A))); - - rowSumsV2(REAL(A), nrows(A), ncols(A), REAL(sums)); - - UNPROTECT(1); - return sums; -} -void rowSumsV3(const double *A, - const int nrow, const int ncol, - double *sum); -SEXP R_rowSumsV3(SEXP A) { - SEXP sums = PROTECT(allocVector(REALSXP, nrows(A))); - - rowSumsV3(REAL(A), nrows(A), ncols(A), REAL(sums)); - - UNPROTECT(1); - return sums; -} - -void colSums(const double *A, - const int nrow, const int ncol, - double *sum); -SEXP R_colSums(SEXP A) { - SEXP sums = PROTECT(allocVector(REALSXP, ncols(A))); - - colSums(REAL(A), nrows(A), ncols(A), REAL(sums)); - - UNPROTECT(1); - return sums; -} - -void rowSquareSums(const double*, const int, const int, double*); -SEXP R_rowSquareSums(SEXP A) { - SEXP result = PROTECT(allocVector(REALSXP, nrows(A))); - - rowSquareSums(REAL(A), nrows(A), ncols(A), REAL(result)); - - UNPROTECT(1); - return result; -} - -void rowSumsSymVec(const double *Avec, const int nrow, - const double diag, - double *sum); -SEXP R_rowSumsSymVec(SEXP Avec, SEXP nrow, SEXP diag) { - SEXP sum = PROTECT(allocVector(REALSXP, *INTEGER(nrow))); - - rowSumsSymVec(REAL(Avec), *INTEGER(nrow), *REAL(diag), REAL(sum)); - - UNPROTECT(1); - return sum; -} - -void rowSweep(const double *A, const int nrow, const int ncol, - const char* op, - const double *v, // vector of length nrow - double *C); -SEXP R_rowSweep(SEXP A, SEXP v, SEXP op) { - SEXP C = PROTECT(allocMatrix(REALSXP, nrows(A), ncols(A))); - - rowSweep(REAL(A), nrows(A), ncols(A), - CHAR(STRING_ELT(op, 0)), - REAL(v), REAL(C)); - - UNPROTECT(1); - return C; -} - -void transpose(const double *A, const int nrow, const int ncol, double* T); -SEXP R_transpose(SEXP A) { - SEXP T = PROTECT(allocMatrix(REALSXP, ncols(A), nrows(A))); - - transpose(REAL(A), nrows(A), ncols(A), REAL(T)); - - UNPROTECT(1); /* T */ - return T; -} - -void sympMV(const double* vecA, const int nrow, const double* x, double* y); -SEXP R_sympMV(SEXP vecA, SEXP x) { - SEXP y = PROTECT(allocVector(REALSXP, length(x))); - - sympMV(REAL(vecA), length(x), REAL(x), REAL(y)); - - UNPROTECT(1); /* y */ - return y; -} - -void matrixprod(const double *A, const int nrowA, const int ncolA, - const double *B, const int nrowB, const int ncolB, - double *C); -SEXP R_matrixprod(SEXP A, SEXP B) { - SEXP C = PROTECT(allocMatrix(REALSXP, nrows(A), ncols(B))); - - matrixprod(REAL(A), nrows(A), ncols(A), - REAL(B), nrows(B), ncols(B), - REAL(C)); - - UNPROTECT(1); - return C; -} - -void crossprod(const double* A, const int nrowA, const int ncolA, - const double* B, const int ncolB, const int nrowB, - double* C); -SEXP R_crossprod(SEXP A, SEXP B) { - SEXP C = PROTECT(allocMatrix(REALSXP, ncols(A), ncols(B))); - - crossprod(REAL(A), nrows(A), ncols(A), - REAL(B), nrows(B), ncols(B), - REAL(C)); - - UNPROTECT(1); - return C; -} - -void kronecker(const double *A, const int nrowA, const int ncolA, - const double *B, const int nrowB, const int ncolB, - const char *op, - double *C); -SEXP R_kronecker(SEXP A, SEXP B, SEXP op) { - SEXP C = PROTECT(allocMatrix(REALSXP, - nrows(A) * nrows(B), - ncols(A) * ncols(B))); - - kronecker(REAL(A), nrows(A), ncols(A), - REAL(B), nrows(B), ncols(B), - CHAR(STRING_ELT(op, 0)), - REAL(C)); - - UNPROTECT(1); - return C; -} - -void skewSymRank2k(const int n, const int k, - double alpha, const double *A, const double *B, - double beta, - double *C); -SEXP R_skewSymRank2k(SEXP A, SEXP B, SEXP alpha, SEXP beta) { - SEXP C = PROTECT(allocMatrix(REALSXP, nrows(A), nrows(A))); - memset(REAL(C), 0, nrows(A) * nrows(A) * sizeof(double)); - - skewSymRank2k(nrows(A), ncols(A), - *REAL(alpha), REAL(A), REAL(B), - *REAL(beta), REAL(C)); - - UNPROTECT(1); - return C; -} - -void nullProj(const double* B, const int nrowB, const int ncolB, - double* Q); -SEXP R_nullProj(SEXP B) { - SEXP Q = PROTECT(allocMatrix(REALSXP, nrows(B), nrows(B))); - - nullProj(REAL(B), nrows(B), ncols(B), REAL(Q)); - - UNPROTECT(1); - return Q; -} - -void rangePairs(const int from, const int to, int *pairs); -SEXP R_rangePairs(SEXP from, SEXP to) { - int start = asInteger(from); - int end = asInteger(to) + 1; - int n = end - start; - - SEXP out = PROTECT(allocMatrix(INTSXP, 2, n * (n - 1) / 2)); - rangePairs(start, end, INTEGER(out)); - - UNPROTECT(1); - return out; -} - -void grad(const int n, const int p, const int q, - const double *X, - const double *X_diff, - const double *Y, - const double *V, - const double h, - double *G, double *const loss); -SEXP R_grad(SEXP X, SEXP X_diff, SEXP Y, SEXP V, SEXP h) { - int N = (nrows(X) * (nrows(X) - 1)) / 2; - - SEXP G = PROTECT(allocMatrix(REALSXP, nrows(V), ncols(V))); - SEXP loss = PROTECT(allocVector(REALSXP, 1)); - - grad(nrows(X), ncols(X), ncols(V), - REAL(X), REAL(X_diff), REAL(Y), REAL(V), *REAL(h), - REAL(G), REAL(loss)); - - UNPROTECT(2); - return G; -} - -#endif /* CVE_INCLUDE_GUARD_ */