2
0
Fork 0

- update: doc,

- wip: cleanup,
- update: datasets,
- remove: old code
This commit is contained in:
Daniel Kapla 2019-12-16 17:34:35 +01:00
parent 300fc11f3f
commit 9edefe994d
84 changed files with 812 additions and 4077 deletions

View File

@ -9,15 +9,10 @@ export(cve)
export(cve.call) export(cve.call)
export(dataset) export(dataset)
export(directions) export(directions)
export(elem.pairs)
export(estimate.bandwidth) export(estimate.bandwidth)
export(null) export(null)
export(predict_dim) export(predict_dim)
export(projTangentStiefel)
export(rStiefel) export(rStiefel)
export(retractStiefel)
export(skew)
export(sym)
import(stats) import(stats)
importFrom(graphics,boxplot) importFrom(graphics,boxplot)
importFrom(graphics,lines) importFrom(graphics,lines)

View File

@ -20,7 +20,7 @@
#' zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g} #' zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g}
#' is an unknown, continuous non-constant function, #' is an unknown, continuous non-constant function,
#' and \eqn{B = (b_1, ..., b_k)} is #' and \eqn{B = (b_1, ..., b_k)} is
#' a real \eqn{p \times k}{p x k} of rank \eqn{k <= p}{k \leq p}. #' a real \eqn{p \times k}{p x k} of rank \eqn{k \leq p}{k <= p}.
#' Without loss of generality \eqn{B} is assumed to be orthonormal. #' Without loss of generality \eqn{B} is assumed to be orthonormal.
#' #'
#' @author Daniel Kapla, Lukas Fertl, Bura Efstathia #' @author Daniel Kapla, Lukas Fertl, Bura Efstathia
@ -36,26 +36,46 @@
#' @inherit CVE-package description #' @inherit CVE-package description
#' #'
#' @param formula an object of class \code{"formula"} which is a symbolic #' @param formula an object of class \code{"formula"} which is a symbolic
#' description of the model to be fitted. #' description of the model to be fitted like \eqn{Y\sim X}{Y ~ X} where
#' \eqn{Y} is a \eqn{n}-dimensional vector of the response variable and
#' \eqn{X} is a \eqn{n\times p}{n x p} matrix of the predictors.
#' @param data an optional data frame, containing the data for the formula if #' @param data an optional data frame, containing the data for the formula if
#' supplied. #' supplied like \code{data <- data.frame(Y, X)} with dimension
#' @param method specifies the CVE method variation as one of #' \eqn{n \times (p + 1)}{n x (p + 1)}. By default the variables are taken from
#' the environment from which \code{cve} is called.
#' @param method This character string specifies the method of fitting. The
#' options are
#' \itemize{ #' \itemize{
#' \item "simple" exact implementation as described in the paper listed #' \item "simple" implementation as described in the paper.
#' below. #' \item "weighted" variation with adaptive weighting of slices.
#' \item "weighted" variation with addaptive weighting of slices.
#' } #' }
#' see paper.
#' @param max.dim upper bounds for \code{k}, (ignored if \code{k} is supplied). #' @param max.dim upper bounds for \code{k}, (ignored if \code{k} is supplied).
#' @param ... Parameters passed on to \code{cve.call}. #' @param ... optional parameters passed on to \code{cve.call}.
#' #'
#' @return an S3 object of class \code{cve} with components: #' @return an S3 object of class \code{cve} with components:
#' \describe{ #' \describe{
#' \item{X}{Original training data,} #' \item{X}{design matrix of predictor vector used for calculating
#' \item{Y}{Responce of original training data,} #' cve-estimate,}
#' \item{Y}{\eqn{n}-dimensional vector of responses used for calculating
#' cve-estimate,}
#' \item{method}{Name of used method,} #' \item{method}{Name of used method,}
#' \item{call}{the matched call,} #' \item{call}{the matched call,}
#' \item{res}{list of components \code{V, L, B, loss, h} and \code{k} for #' \item{res}{list of components \code{V, L, B, loss, h} for
#' each \eqn{k=min.dim,...,max.dim} (dimension).} #' each \code{k = min.dim, ..., max.dim}. If \code{k} was supplied in the
#' call \code{min.dim = max.dim = k}.
#' \itemize{
#' \item \code{B} is the cve-estimate with dimension
#' \eqn{p\times k}{p x k}.
#' \item \code{V} is the orthogonal complement of \eqn{B}.
#' \item \code{L} is the loss for each sample seperatels such that
#' it's mean is \code{loss}.
#' \item \code{loss} is the value of the target function that is
#' minimized, evaluated at \eqn{V}.
#' \item \code{h} bandwidth parameter used to calculate
#' \code{B, V, loss, L}.
#' }
#' }
#' } #' }
#' #'
#' @examples #' @examples
@ -66,7 +86,7 @@
#' b1 <- rep(1 / sqrt(p), p) #' b1 <- rep(1 / sqrt(p), p)
#' b2 <- (-1)^seq(1, p) / sqrt(p) #' b2 <- (-1)^seq(1, p) / sqrt(p)
#' B <- cbind(b1, b2) #' B <- cbind(b1, b2)
#' # samplsize #' # sample size
#' n <- 200 #' n <- 200
#' set.seed(21) #' set.seed(21)
#' # creat predictor data x ~ N(0, I_p) #' # creat predictor data x ~ N(0, I_p)
@ -139,10 +159,12 @@ cve <- function(formula, data, method = "simple", max.dim = 10L, ...) {
#' @inherit cve title #' @inherit cve title
#' @inherit cve description #' @inherit cve description
#' #'
#' @param X Design matrix with dimension \eqn{n\times p}{n x p}.
#' @param Y numeric array of length \eqn{n} of Responses.
#' @param h bandwidth or function to estimate bandwidth, defaults to internaly
#' estimated bandwidth.
#' @param nObs parameter for choosing bandwidth \code{h} using #' @param nObs parameter for choosing bandwidth \code{h} using
#' \code{\link{estimate.bandwidth}} (ignored if \code{h} is supplied). #' \code{\link{estimate.bandwidth}} (ignored if \code{h} is supplied).
#' @param X data matrix with samples in its rows.
#' @param Y Responses (1 dimensional).
#' @param method specifies the CVE method variation as one of #' @param method specifies the CVE method variation as one of
#' \itemize{ #' \itemize{
#' \item "simple" exact implementation as described in the paper listed #' \item "simple" exact implementation as described in the paper listed
@ -156,19 +178,23 @@ cve <- function(formula, data, method = "simple", max.dim = 10L, ...) {
#' @param tau Initial step-size. #' @param tau Initial step-size.
#' @param tol Tolerance for break condition. #' @param tol Tolerance for break condition.
#' @param max.iter maximum number of optimization steps. #' @param max.iter maximum number of optimization steps.
#' @param attempts number of arbitrary different starting points. #' @param attempts If \code{V.init} not supplied, the optimization is carried
#' @param logger a logger function (only for advanced user, significantly slows #' out \code{attempts} times with starting values drawn from the invariant
#' down the computation). #' measure on the Stiefel manifold (see \code{\link{rStiefel}}).
#' @param h bandwidth or function to estimate bandwidth, defaults to internaly #' @param momentum number of \eqn{[0, 1)} giving the ration of momentum for
#' estimated bandwidth. #' eucledian gradient update with a momentum term. \code{momentum = 0}
#' @param momentum number of [0, 1) giving the ration of momentum for eucledian #' corresponds to normal gradient descend.
#' gradient update with a momentum term.
#' @param slack Positive scaling to allow small increases of the loss while #' @param slack Positive scaling to allow small increases of the loss while
#' optimizing. #' optimizing, i.e. \code{slack = 0.1} allows the target function to
#' @param gamma step-size reduction multiple. #' increase up to \eqn{10 \%} in one optimization step.
#' @param gamma step-size reduction multiple. If gradient step with step size
#' \code{tau} is not accepted \code{gamma * tau} is set to the next step
#' size.
#' @param V.init Semi-orthogonal matrix of dimensions `(ncol(X), ncol(X) - k) #' @param V.init Semi-orthogonal matrix of dimensions `(ncol(X), ncol(X) - k)
#' as optimization starting value. (If supplied, \code{attempts} is #' used as starting value in the optimization. (If supplied,
#' set to 1 and \code{k} to match dimension) #' \code{attempts} is set to 0 and \code{k} to match dimension).
#' @param logger a logger function (only for advanced user, slows down the
#' computation).
#' #'
#' @inherit cve return #' @inherit cve return
#' #'
@ -253,6 +279,7 @@ cve.call <- function(X, Y, method = "simple",
stop("Dimension missmatch of 'V.init' and 'X'") stop("Dimension missmatch of 'V.init' and 'X'")
} }
min.dim <- max.dim <- ncol(X) - ncol(V.init) min.dim <- max.dim <- ncol(X) - ncol(V.init)
storage.mode(V.init) <- "double"
attempts <- 0L attempts <- 0L
} else if (missing(k) || is.null(k)) { } else if (missing(k) || is.null(k)) {
min.dim <- as.integer(min.dim) min.dim <- as.integer(min.dim)
@ -320,6 +347,9 @@ cve.call <- function(X, Y, method = "simple",
} }
} }
# Convert numerical values to "double".
storage.mode(X) <- storage.mode(Y) <- "double"
if (is.function(logger)) { if (is.function(logger)) {
loggerEnv <- environment(logger) loggerEnv <- environment(logger)
} else { } else {

View File

@ -1,8 +1,10 @@
#' Gets estimated SDR basis. #' Gets estimated SDR basis.
#' #'
#' Returns the SDR basis matrix for SDR dimension(s). #' Returns the SDR basis matrix for dimension \code{k}, i.e. returns the
#' cve-estimate with dimension \eqn{p\times k}{p x k}.
#'
#' @param object instance of \code{cve} as output from \code{\link{cve}} or #' @param object instance of \code{cve} as output from \code{\link{cve}} or
#' \code{\link{cve.call}} #' \code{\link{cve.call}}.
#' @param k the SDR dimension. #' @param k the SDR dimension.
#' @param ... ignored. #' @param ... ignored.
#' #'

279
CVE/R/datasets.R Normal file
View File

@ -0,0 +1,279 @@
#' Multivariate Normal Distribution.
#'
#' Random generation for the multivariate normal distribution.
#' \deqn{X \sim N_p(\mu, \Sigma)}{X ~ N_p(\mu, \Sigma)}
#'
#' @param n number of samples.
#' @param mu mean
#' @param sigma covariance matrix.
#'
#' @return a \eqn{n\times p}{n x p} matrix with samples in its rows.
#'
#' @examples
#' \dontrun{
#' rmvnorm(20, sigma = matrix(c(2, 1, 1, 2), 2))
#' rmvnorm(20, mu = c(3, -1, 2))
#' }
#' @keywords internal
rmvnorm <- function(n = 1, mu = rep(0, p), sigma = diag(p)) {
if (!missing(sigma)) {
p <- nrow(sigma)
} else if (!missing(mu)) {
mu <- matrix(mu, ncol = 1)
p <- nrow(mu)
} else {
stop("At least one of 'mu' or 'sigma' must be supplied.")
}
# See: https://en.wikipedia.org/wiki/Multivariate_normal_distribution
return(rep(mu, each = n) + matrix(rnorm(n * p), n) %*% chol(sigma))
}
#' Multivariate t distribution.
#'
#' Random generation from multivariate t distribution (student distribution).
#'
#' @param n number of samples.
#' @param mu mean
#' @param sigma a \eqn{k\times k}{k x k} positive definite matrix. If the degree
#' \eqn{\nu} if bigger than 2 the created covariance is
#' \deqn{var(x) = \Sigma\frac{\nu}{\nu - 2}}
#' for \eqn{\nu > 2}.
#' @param df degree of freedom \eqn{\nu}.
#'
#' @return a \eqn{n\times p}{n x p} matrix with samples in its rows.
#'
#' @examples
#' \dontrun{
#' rmvt(20, c(0, 1), matrix(c(3, 1, 1, 2), 2), 3)
#' rmvt(20, sigma = matrix(c(2, 1, 1, 2), 2), 3)
#' rmvt(20, mu = c(3, -1, 2), 3)
#' }
#' @keywords internal
rmvt <- function(n = 1, mu = rep(0, p), sigma = diag(p), df = Inf) {
if (!missing(sigma)) {
p <- nrow(sigma)
} else if (!missing(mu)) {
mu <- matrix(mu, ncol = 1)
p <- nrow(mu)
} else {
stop("At least one of 'mu' or 'sigma' must be supplied.")
}
if (df == Inf) {
Z <- 1
} else {
Z <- sqrt(df / rchisq(n, df))
}
return(rmvnorm(n, sigma = sigma) * Z + rep(mu, each = n))
}
#' Generalized Normal Distribution.
#'
#' Random generation for generalized Normal Distribution.
#'
#' @param n Number of generated samples.
#' @param mu mean.
#' @param alpha first shape parameter.
#' @param beta second shape parameter.
#'
#' @return numeric array of length \eqn{n}.
#'
#' @seealso https://en.wikipedia.org/wiki/Generalized_normal_distribution
#' @keywords internal
rgnorm <- function(n = 1, mu = 0, alpha = 1, beta = 1) {
if (alpha <= 0 | beta <= 0) {
stop("alpha and beta must be positive.")
}
lambda <- (1 / alpha)^beta
scales <- qgamma(runif(n), shape = 1 / beta, scale = 1 / lambda)^(1 / beta)
return(scales * ((-1)^rbinom(n, 1, 0.5)) + mu)
}
#' Laplace distribution
#'
#' Random generation for Laplace distribution.
#'
#' @param n Number of generated samples.
#' @param mu mean.
#' @param sd standard deviation.
#'
#' @return numeric array of length \eqn{n}.
#'
#' @seealso https://en.wikipedia.org/wiki/Laplace_distribution
#' @keywords internal
rlaplace <- function(n = 1, mu = 0, sd = 1) {
U <- runif(n, -0.5, 0.5)
scale <- sd / sqrt(2)
return(mu - scale * sign(U) * log(1 - 2 * abs(U)))
}
#' Generates test datasets.
#'
#' Provides sample datasets M1-M7 used in the paper Conditional variance
#' estimation for sufficient dimension reduction, Lukas Fertl, Efstathia Bura.
#' The general model is given by:
#' \deqn{Y = g(B'X) + \epsilon}
#'
#' @param name One of \code{"M1"}, \code{"M2"}, \code{"M3"}, \code{"M4",}
#' \code{"M5"}, \code{"M6"} or \code{"M7"}. Alternative just the dataset number
#' 1-7.
#' @param n number of samples.
#' @param p Dimension of random variable \eqn{X}.
#' @param sd standard diviation for error term \eqn{\epsilon}.
#' @param ... Additional parameters only for "M2" (namely \code{pmix} and
#' \code{lambda}), see: below.
#'
#' @return List with elements
#' \itemize{
#' \item{X}{data, a \eqn{n\times p}{n x p} matrix.}
#' \item{Y}{response.}
#' \item{B}{the dim-reduction matrix}
#' \item{name}{Name of the dataset (name parameter)}
#' }
#'
#' @section M1:
#' The predictors are distributed as
#' \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, \Sigma)} with
#' \eqn{\Sigma_{i, j} = 0.5^{|i - j|}}{\Sigma_ij = 0.5^|i - j|} for
#' \eqn{i, j = 1,..., p} for a subspace dimension of \eqn{k = 1} with a default
#' of \eqn{n = 100} data points. \eqn{p = 20},
#' \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)}, and \eqn{Y} is
#' given as \deqn{Y = cos(b_1'X) + \epsilon} where \eqn{\epsilon} is
#' distributed as generalized normal distribution with location 0,
#' shape-parameter 0.5, and the scale-parameter is chosen such that
#' \eqn{Var(\epsilon) = 0.5}.
#' @section M2:
#' The predictors are distributed as \eqn{X \sim Z 1_p \lambda + N_p(0, I_p)}{X ~ Z 1_p \lambda + N_p(0, I_p)}. with
#' \eqn{Z \sim 2 Binom(p_{mix}) - 1\in\{-1, 1\}}{Z~2Binom(pmix)-1} where
#' \eqn{1_p} is the \eqn{p}-dimensional vector of one's, for a subspace
#' dimension of \eqn{k = 1} with a default of \eqn{n = 100} data points.
#' \eqn{p = 20}, \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)},
#' and \eqn{Y} is \deqn{Y = cos(b_1'X) + 0.5\epsilon} where \eqn{\epsilon} is
#' standard normal.
#' Defaults for \code{pmix} is 0.3 and \code{lambda} defaults to 1.
#' @section M3:
#' The predictors are distributed as \eqn{X\sim N_p(0, I_p)}{X~N_p(0, I_p)}
#' for a subspace
#' dimension of \eqn{k = 1} with a default of \eqn{n = 100} data points.
#' \eqn{p = 20}, \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)},
#' and \eqn{Y} is
#' \deqn{Y = 2 log(|b_1'X| + 2) + 0.5\epsilon} where \eqn{\epsilon} is
#' standard normal.
#' @section M4:
#' The predictors are distributed as \eqn{X\sim N_p(0,\Sigma)}{X~N_p(0,\Sigma)}
#' with \eqn{\Sigma_{i, j} = 0.5^{|i - j|}}{\Sigma_ij = 0.5^|i - j|} for
#' \eqn{i, j = 1,..., p} for a subspace dimension of \eqn{k = 2} with a default
#' of \eqn{n = 100} data points. \eqn{p = 20},
#' \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)},
#' \eqn{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / sqrt(6)}
#' and \eqn{Y} is given as \deqn{Y = \frac{b_1'X}{0.5 + (1.5 + b_2'X)^2} + 0.5\epsilon}{Y = (b_1'X) / (0.5 + (1.5 + b_2'X)^2) + 0.5\epsilon}
#' where \eqn{\epsilon} is standard normal.
#' @section M5:
#' The predictors are distributed as \eqn{X\sim U([0,1]^p)}{X~U([0, 1]^p)}
#' where \eqn{U([0, 1]^p)} is the uniform distribution with
#' independent components on the \eqn{p}-dimensional hypercube for a subspace
#' dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points.
#' \eqn{p = 20},
#' \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)},
#' \eqn{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / sqrt(6)}
#' and \eqn{Y} is given as \deqn{Y = cos(\pi b_1'X)(b_2'X + 1)^2 + 0.5\epsilon}
#' where \eqn{\epsilon} is standard normal.
#' @section M6:
#' The predictors are distributed as \eqn{X\sim N_p(0, I_p)}{X~N_p(0, I_p)}
#' for a subspace dimension of \eqn{k = 3} with a default of \eqn{n = 200} data
#' point. \eqn{p = 20, b_1 = e_1, b_2 = e_2}, and \eqn{b_3 = e_p}, where
#' \eqn{e_j} is the \eqn{j}-th unit vector in the \eqn{p}-dimensional space.
#' \eqn{Y} is given as \deqn{Y = (b_1'X)^2+(b_2'X)^2+(b_3'X)^2+0.5\epsilon}
#' where \eqn{\epsilon} is standard normal.
#' @section M7:
#' The predictors are distributed as \eqn{X\sim t_3(I_p)}{X~t_3(I_p)} where
#' \eqn{t_3(I_p)} is the standard multivariate t-distribution with 3 degrees of
#' freedom, for a subspace dimension of \eqn{k = 4} with a default of
#' \eqn{n = 200} data points.
#' \eqn{p = 20, b_1 = e_1, b_2 = e_2, b_3 = e_3}, and \eqn{b_4 = e_p}, where
#' \eqn{e_j} is the \eqn{j}-th unit vector in the \eqn{p}-dimensional space.
#' \eqn{Y} is given as \deqn{Y = (b_1'X)(b_2'X)^2+(b_3'X)(b_4'X)+0.5\epsilon}
#' where \eqn{\epsilon} is distributed as generalized normal distribution with
#' location 0, shape-parameter 1, and the scale-parameter is chosen such that
#' \eqn{Var(\epsilon) = 0.25}.
#'
#' @references Fertl Lukas, Bura Efstathia. (2019), Conditional Variance
#' Estimation for Sufficient Dimension Reduction. Working Paper.
#'
#' @import stats
#' @importFrom stats rnorm rbinom
#' @export
dataset <- function(name = "M1", n = NULL, p = 20, sd = 0.5, ...) {
name <- toupper(name)
if (nchar(name) == 1) { name <- paste0("M", name) }
if (name == "M1") {
if (missing(n)) { n <- 100 }
# B ... `p x 1`
B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, p - 6)), ncol = 1)
X <- rmvnorm(n, sigma = 0.5^abs(outer(1:p, 1:p, FUN = `-`)))
beta <- 0.5
Y <- cos(X %*% B) + rgnorm(n, 0,
alpha = sqrt(sd^2 * gamma(1 / beta) / gamma(3 / beta)),
beta = beta
)
} else if (name == "M2") {
if (missing(n)) { n <- 100 }
params <- list(...)
pmix <- if (is.null(params$pmix)) { 0.3 } else { params$pmix }
lambda <- if (is.null(params$lambda)) { 1 } else { params$lambda }
# B ... `p x 1`
B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, p - 6)), ncol = 1)
Z <- 2 * rbinom(n, 1, pmix) - 1
X <- matrix(rep(lambda * Z, p) + rnorm(n * p), n)
Y <- cos(X %*% B) + rnorm(n, 0, sd)
} else if (name == "M3") {
if (missing(n)) { n <- 100 }
# B ... `p x 1`
B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, p - 6)), ncol = 1)
X <- matrix(rnorm(n * p), n)
Y <- 2 * log(2 + abs(X %*% B)) + rnorm(n, 0, sd)
} else if (name == "M4") {
if (missing(n)) { n <- 200 }
# B ... `p x 2`
B <- cbind(
c(rep(1 / sqrt(6), 6), rep(0, p - 6)),
c(rep(c(1, -1), 3) / sqrt(6), rep(0, p - 6))
)
X <- rmvnorm(n, sigma = 0.5^abs(outer(1:p, 1:p, FUN = `-`)))
XB <- X %*% B
Y <- (XB[, 1]) / (0.5 + (XB[, 2] + 1.5)^2) + rnorm(n, 0, sd)
} else if (name == "M5") {
if (missing(n)) { n <- 200 }
# B ... `p x 2`
B <- cbind(
c(rep(1, 6), rep(0, p - 6)),
c(rep(c(1, -1), 3), rep(0, p - 6))
) / sqrt(6)
X <- matrix(runif(n * p), n)
XB <- X %*% B
Y <- cos(XB[, 1] * pi) * (XB[, 2] + 1)^2 + rnorm(n, 0, sd)
} else if (name == "M6") {
if (missing(n)) { n <- 200 }
# B ... `p x 3`
B <- diag(p)[, -(3:(p - 1))]
X <- matrix(rnorm(n * p), n)
Y <- rowSums((X %*% B)^2) + rnorm(n, 0, sd)
} else if (name == "M7") {
if (missing(n)) { n <- 400 }
# B ... `p x 4`
B <- diag(p)[, -(4:(p - 1))]
# "R"andom "M"ulti"V"ariate "S"tudent
X <- rmvt(n = n, sigma = diag(p), df = 3)
XB <- X %*% B
Y <- (XB[, 1]) * (XB[, 2])^2 + (XB[, 3]) * (XB[, 4])
Y <- Y + rlaplace(n, 0, sd)
} else {
stop("Got unknown dataset name.")
}
return(list(X = X, Y = Y, B = B, name = name))
}

View File

@ -5,9 +5,15 @@ directions <- function(dr, k) {
#' Computes projected training data \code{X} for given dimension `k`. #' Computes projected training data \code{X} for given dimension `k`.
#' #'
#' @param dr Instance of 'cve' as returned by \code{cve}. #' Projects the dimensional design matrix \eqn{X} on the columnspace of the
#' cve-estimate for given dimension \eqn{k}.
#'
#' @param dr Instance of \code{'cve'} as returned by \code{\link{cve}}.
#' @param k SDR dimension to use for projection. #' @param k SDR dimension to use for projection.
#' #'
#' @return the \eqn{n\times k}{n x k} dimensional matrix \eqn{X B} where \eqn{B}
#' is the cve-estimate for dimension \eqn{k}.
#'
#' @examples #' @examples
#' # create B for simulation (k = 1) #' # create B for simulation (k = 1)
#' B <- rep(1, 5) / sqrt(5) #' B <- rep(1, 5) / sqrt(5)

View File

@ -1,27 +1,22 @@
#' Bandwidth estimation for CVE. #' Bandwidth estimation for CVE.
#' #'
#' Estimates a bandwidth \code{h} according #' If no bandwidth or function for calculating it is supplied, the CVE method
#' defaults to using the following formula (version 1)
#' \deqn{% #' \deqn{%
#' h = (2 * tr(\Sigma) / p) * (1.2 * n^{-1 / (4 + k)})^2}{% #' h = \frac{2 tr(\Sigma)}{p} (1.2 n^{\frac{-1}{4 + k}})^2}{%
#' h = (2 * tr(\Sigma) / p) * (1.2 * n^(\frac{-1}{4 + k}))^2} #' h = (2 * tr(\Sigma) / p) * (1.2 * n^(-1 / (4 + k)))^2}
#' with \eqn{n} the sample size, \eqn{p} its dimension #' Alternative version 2 is used for dimension prediction which is given by
#' (\code{n <- nrow(X); p <- ncol(X)}) and the covariance-matrix \eqn{\Sigma} #' \deqn{%
#' which is \code{(n-1)/n} times the sample covariance estimate. #' h = (2 * tr(\Sigma) / p) * \chi_k^{-1}(\frac{nObs - 1}{n - 1})}{%
#' h = (2 * tr(\Sigma) / p) * \chi_k^-1((nObs - 1) / (n - 1))}
#' with \eqn{n} the sample size, \eqn{p} its dimension and the
#' covariance-matrix \eqn{\Sigma}, which is \code{(n-1)/n} times the sample
#' covariance estimate.
#' #'
#' @param X data matrix with samples in its rows. #' @param X a \eqn{n\times p}{n x p} matrix with samples in its rows.
#' @param k Dimension of lower dimensional projection. #' @param k Dimension of lower dimensional projection.
#' @param nObs number of points in a slice, see \eqn{nObs} in CVE paper. #' @param nObs number of points in a slice, only for version 2.
#' @param version either \code{1} or \code{2}, where #' @param version either \code{1} or \code{2}.
#' \itemize{
#' \item 1: uses the following formula:
#' \deqn{%
#' h = (2 * tr(\Sigma) / p) * (1.2 * n^{-1 / (4 + k)})^2}{%
#' h = (2 * tr(\Sigma) / p) * (1.2 * n^(\frac{-1}{4 + k}))^2}
#' \item 2: uses
#' \deqn{%
#' h = (2 * tr(\Sigma) / p) * \chi_k^-1((nObs - 1) / (n - 1))}{%
#' h = (2 * tr(\Sigma) / p) * \chi_k^{-1}(\frac{nObs - 1}{n - 1})}
#' }
#' #'
#' @return Estimated bandwidth \code{h}. #' @return Estimated bandwidth \code{h}.
#' #'

View File

@ -1,6 +1,9 @@
#' Loss distribution elbow plot. #' Loss distribution elbow plot.
#' #'
#' Boxplots of the loss from \code{min.dim} to \code{max.dim} \code{k} values. #' Boxplots of the output \code{L} from \code{\link{cve}} over \code{k} from
#' \code{min.dim} to \code{max.dim}. For given \code{k}, \code{L} corresponds
#' to \eqn{L_n(V, X_i)} where \eqn{V \in S(p, p - k)}{V} is the minimizer of
#' \eqn{L_n(V)}, for further details see the paper.
#' #'
#' @param x Object of class \code{"cve"} (result of [\code{\link{cve}}]). #' @param x Object of class \code{"cve"} (result of [\code{\link{cve}}]).
#' @param ... Pass through parameters to [\code{\link{plot}}] and #' @param ... Pass through parameters to [\code{\link{plot}}] and
@ -31,6 +34,9 @@
#' # elbow plot #' # elbow plot
#' plot(cve.obj.simple) #' plot(cve.obj.simple)
#' #'
#' @references Fertl Lukas, Bura Efstathia. (2019), Conditional Variance
#' Estimation for Sufficient Dimension Reduction. Working Paper.
#'
#' @seealso see \code{\link{par}} for graphical parameters to pass through #' @seealso see \code{\link{par}} for graphical parameters to pass through
#' as well as \code{\link{plot}}, the standard plot utility. #' as well as \code{\link{plot}}, the standard plot utility.
#' @method plot cve #' @method plot cve

View File

@ -1,6 +1,7 @@
#' Predict method for CVE Fits. #' Predict method for CVE Fits.
#' #'
#' Predict responces using reduced data with \code{\link{mars}}. #' Predict response using projected data where the forward model \eqn{g(B'X)}
#' is estimated using \code{\link{mars}}.
#' #'
#' @param object instance of class \code{cve} (result of \code{cve}, #' @param object instance of class \code{cve} (result of \code{cve},
#' \code{cve.call}). #' \code{cve.call}).
@ -36,7 +37,7 @@
#' #'
#' # plot prediction against y.test #' # plot prediction against y.test
#' plot(yhat, y.test) #' plot(yhat, y.test)
#' @seealso \code{\link{cve}}, \code{\link{cve.call}} or \pkg{\link{mars}}. #' @seealso \code{\link{cve}}, \code{\link{cve.call}} and \pkg{\link{mars}}.
#' #'
#' @rdname predict.cve #' @rdname predict.cve
#' #'

View File

@ -36,10 +36,6 @@ predict_dim_elbow <- function(object) {
# Get dimensions # Get dimensions
n <- nrow(X) n <- nrow(X)
p <- ncol(X) p <- ncol(X)
# Compute persistent data.
i = rep(1:n, n)
j = rep(1:n, each = n)
D.eucl = matrix((X[i, ] - X[j, ])^2 %*% rep(1, p), n)
losses <- vector("double", length(object$res)) losses <- vector("double", length(object$res))
names(losses) <- names(object$res) names(losses) <- names(object$res)
@ -48,16 +44,16 @@ predict_dim_elbow <- function(object) {
# extract dimension specific estimates and dimensions. # extract dimension specific estimates and dimensions.
k <- dr.k$k k <- dr.k$k
V <- dr.k$V V <- dr.k$V
q <- ncol(V) # estimate bandwidth according alternative formula.
# estimate bandwidth according alternative formula (see: TODO: see)
h <- estimate.bandwidth(X, k, sqrt(n), version = 2L) h <- estimate.bandwidth(X, k, sqrt(n), version = 2L)
# Projected `X` # Projected `X`
XV <- X %*% V XQ <- X %*% (diag(1, p) - tcrossprod(V)) # X (I - V V')
# Devectorized distance matrix # Compute distances
# (inefficient in R but fast in C) d2 <- tcrossprod(XQ) # XQ XQ'
D <- matrix((XV[i, , drop = F] - XV[j, , drop = F])^2 %*% rep(1, q), n) d1 <- matrix(diag(d2), n, n)
D <- D.eucl - D D <- d1 - 2 * d2 + t(d1)
# Apply kernel # Apply kernel
# Note: CVE uses for d = ||Q(X_i - X_j)|| the kernel exp(-d^4 / (2 h^2))
K <- exp((-0.5 / h^2) * D^2) K <- exp((-0.5 / h^2) * D^2)
# sum columns # sum columns
colSumsK <- colSums(K) colSumsK <- colSums(K)
@ -81,11 +77,7 @@ predict_dim_wilcoxon <- function(object, p.value = 0.05) {
# Get dimensions # Get dimensions
n <- nrow(X) n <- nrow(X)
p <- ncol(X) p <- ncol(X)
# Compute persistent data.
i = rep(1:n, n)
j = rep(1:n, each = n)
D.eucl = matrix((X[i, ] - X[j, ])^2 %*% rep(1, p), n)
L <- matrix(NA, n, length(object$res)) L <- matrix(NA, n, length(object$res))
colnames(L) <- names(object$res) colnames(L) <- names(object$res)
# Compute per sample losses with alternative bandwidth for each dimension. # Compute per sample losses with alternative bandwidth for each dimension.
@ -93,16 +85,16 @@ predict_dim_wilcoxon <- function(object, p.value = 0.05) {
# extract dimension specific estimates and dimensions. # extract dimension specific estimates and dimensions.
k <- dr.k$k k <- dr.k$k
V <- dr.k$V V <- dr.k$V
q <- ncol(V) # estimate bandwidth according alternative formula.
# estimate bandwidth according alternative formula (see: TODO: see)
h <- estimate.bandwidth(X, k, sqrt(n), version = 2L) h <- estimate.bandwidth(X, k, sqrt(n), version = 2L)
# Projected `X` # Projected `X`
XV <- X %*% V XQ <- X %*% (diag(1, p) - tcrossprod(V)) # X (I - V V')
# Devectorized distance matrix # Compute distances
# (inefficient in R but fast in C) d2 <- tcrossprod(XQ) # XQ XQ'
D <- matrix((XV[i, , drop = F] - XV[j, , drop = F])^2 %*% rep(1, q), n) d1 <- matrix(diag(d2), n, n)
D <- D.eucl - D D <- d1 - 2 * d2 + t(d1)
# Apply kernel # Apply kernel
# Note: CVE uses for d = ||Q(X_i - X_j)|| the kernel exp(-d^4 / (2 h^2))
K <- exp((-0.5 / h^2) * D^2) K <- exp((-0.5 / h^2) * D^2)
# sum columns # sum columns
colSumsK <- colSums(K) colSumsK <- colSums(K)
@ -130,27 +122,24 @@ predict_dim_wilcoxon <- function(object, p.value = 0.05) {
)) ))
} }
#' Predicts SDR dimension using \code{\link[mda]{mars}} via a Cross-Validation. #' \code{"TODO: @Lukas"}
#' TODO: rewrite!!!
#' #'
#' @param object instance of class \code{cve} (result of \code{cve}, #' @param object instance of class \code{cve} (result of \code{\link{cve}},
#' \code{cve.call}). #' \code{\link{cve.call}}).
#' @param method one of \code{"CV"}, \code{"elbow"} or \code{"wilcoxon"}.
#' @param ... ignored. #' @param ... ignored.
#' #'
#' @return list with #' @return list with \code{"k"} the predicted dimension and method dependent
#' \itemize{ #' informatoin.
#' \item MSE: Mean Square Error,
#' \item k: predicted dimensions.
#' }
#' #'
#' @section cv: #' @section Method cv:
#' Cross-validation ... TODO: #' TODO: \code{"TODO: @Lukas"}.
#' #'
#' @section elbow: #' @section Method elbow:
#' Cross-validation ... TODO: #' TODO: \code{"TODO: @Lukas"}.
#' #'
#' @section wilcoxon: #' @section Method wilcoxon:
#' Cross-validation ... TODO: #' TODO: \code{"TODO: @Lukas"}.
#' #'
#' @examples #' @examples
#' # create B for simulation #' # create B for simulation

View File

@ -1,5 +1,9 @@
#' Prints a summary of a \code{cve} result. #' Prints a summary of a \code{cve} result.
#' @param object Instance of 'cve' as returned by \code{cve}. #'
#' Prints a summary statistics of output \code{L} from \code{cve} for
#' \code{k = min.dim, ..., max.dim}.
#'
#' @param object Instance of \code{"cve"} as returned by \code{\link{cve}}.
#' @param ... ignored. #' @param ... ignored.
#' #'
#' @examples #' @examples

24
CVE/R/util.R Normal file
View File

@ -0,0 +1,24 @@
#' Draws a sample from the invariant measure on the Stiefel manifold
#' \eqn{S(p, q)}.
#'
#' @param p row dimension
#' @param q col dimension
#' @return \eqn{p \times q}{p x q} semi-orthogonal matrix.
#' @examples
#' V <- rStiefel(6, 4)
#' @export
rStiefel <- function(p, q) {
return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q))))
}
#' Null space basis of given matrix `V`
#'
#' @param V `(p, q)` matrix
#' @return Semi-orthogonal `(p, p - q)` matrix spaning the null space of `V`.
#' @keywords internal
#' @export
null <- function(V) {
tmp <- qr(V)
set <- if(tmp$rank == 0L) seq_len(ncol(V)) else -seq_len(tmp$rank)
return(qr.Q(tmp, complete = TRUE)[, set, drop = FALSE])
}

View File

@ -26,7 +26,7 @@ variance-covariance matrix \eqn{Var(X) = \Sigma_X}. \eqn{\epsilon} is a mean
zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g} zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g}
is an unknown, continuous non-constant function, is an unknown, continuous non-constant function,
and \eqn{B = (b_1, ..., b_k)} is and \eqn{B = (b_1, ..., b_k)} is
a real \eqn{p \times k}{p x k} of rank \eqn{k <= p}{k \leq p}. a real \eqn{p \times k}{p x k} of rank \eqn{k \leq p}{k <= p}.
Without loss of generality \eqn{B} is assumed to be orthonormal. Without loss of generality \eqn{B} is assumed to be orthonormal.
} }
\references{ \references{

View File

@ -8,7 +8,7 @@
} }
\arguments{ \arguments{
\item{object}{instance of \code{cve} as output from \code{\link{cve}} or \item{object}{instance of \code{cve} as output from \code{\link{cve}} or
\code{\link{cve.call}}} \code{\link{cve.call}}.}
\item{k}{the SDR dimension.} \item{k}{the SDR dimension.}
@ -18,7 +18,8 @@
dir the matrix of CS or CMS of given dimension dir the matrix of CS or CMS of given dimension
} }
\description{ \description{
Returns the SDR basis matrix for SDR dimension(s). Returns the SDR basis matrix for dimension \code{k}, i.e. returns the
cve-estimate with dimension \eqn{p\times k}{p x k}.
} }
\examples{ \examples{
# set dimensions for simulation model # set dimensions for simulation model

View File

@ -8,31 +8,51 @@ cve(formula, data, method = "simple", max.dim = 10L, ...)
} }
\arguments{ \arguments{
\item{formula}{an object of class \code{"formula"} which is a symbolic \item{formula}{an object of class \code{"formula"} which is a symbolic
description of the model to be fitted.} description of the model to be fitted like \eqn{Y\sim X}{Y ~ X} where
\eqn{Y} is a \eqn{n}-dimensional vector of the response variable and
\eqn{X} is a \eqn{n\times p}{n x p} matrix of the predictors.}
\item{data}{an optional data frame, containing the data for the formula if \item{data}{an optional data frame, containing the data for the formula if
supplied.} supplied like \code{data <- data.frame(Y, X)} with dimension
\eqn{n \times (p + 1)}{n x (p + 1)}. By default the variables are taken from
the environment from which \code{cve} is called.}
\item{method}{specifies the CVE method variation as one of \item{method}{This character string specifies the method of fitting. The
options are
\itemize{ \itemize{
\item "simple" exact implementation as described in the paper listed \item "simple" implementation as described in the paper.
below. \item "weighted" variation with adaptive weighting of slices.
\item "weighted" variation with addaptive weighting of slices. }
}} see paper.}
\item{max.dim}{upper bounds for \code{k}, (ignored if \code{k} is supplied).} \item{max.dim}{upper bounds for \code{k}, (ignored if \code{k} is supplied).}
\item{...}{Parameters passed on to \code{cve.call}.} \item{...}{optional parameters passed on to \code{cve.call}.}
} }
\value{ \value{
an S3 object of class \code{cve} with components: an S3 object of class \code{cve} with components:
\describe{ \describe{
\item{X}{Original training data,} \item{X}{design matrix of predictor vector used for calculating
\item{Y}{Responce of original training data,} cve-estimate,}
\item{Y}{\eqn{n}-dimensional vector of responses used for calculating
cve-estimate,}
\item{method}{Name of used method,} \item{method}{Name of used method,}
\item{call}{the matched call,} \item{call}{the matched call,}
\item{res}{list of components \code{V, L, B, loss, h} and \code{k} for \item{res}{list of components \code{V, L, B, loss, h} for
each \eqn{k=min.dim,...,max.dim} (dimension).} each \code{k = min.dim, ..., max.dim}. If \code{k} was supplied in the
call \code{min.dim = max.dim = k}.
\itemize{
\item \code{B} is the cve-estimate with dimension
\eqn{p\times k}{p x k}.
\item \code{V} is the orthogonal complement of \eqn{B}.
\item \code{L} is the loss for each sample seperatels such that
it's mean is \code{loss}.
\item \code{loss} is the value of the target function that is
minimized, evaluated at \eqn{V}.
\item \code{h} bandwidth parameter used to calculate
\code{B, V, loss, L}.
}
}
} }
} }
\description{ \description{
@ -56,7 +76,7 @@ variance-covariance matrix \eqn{Var(X) = \Sigma_X}. \eqn{\epsilon} is a mean
zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g} zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g}
is an unknown, continuous non-constant function, is an unknown, continuous non-constant function,
and \eqn{B = (b_1, ..., b_k)} is and \eqn{B = (b_1, ..., b_k)} is
a real \eqn{p \times k}{p x k} of rank \eqn{k <= p}{k \leq p}. a real \eqn{p \times k}{p x k} of rank \eqn{k \leq p}{k <= p}.
Without loss of generality \eqn{B} is assumed to be orthonormal. Without loss of generality \eqn{B} is assumed to be orthonormal.
} }
\examples{ \examples{
@ -67,7 +87,7 @@ k <- 2
b1 <- rep(1 / sqrt(p), p) b1 <- rep(1 / sqrt(p), p)
b2 <- (-1)^seq(1, p) / sqrt(p) b2 <- (-1)^seq(1, p) / sqrt(p)
B <- cbind(b1, b2) B <- cbind(b1, b2)
# samplsize # sample size
n <- 200 n <- 200
set.seed(21) set.seed(21)
# creat predictor data x ~ N(0, I_p) # creat predictor data x ~ N(0, I_p)

View File

@ -10,9 +10,9 @@ cve.call(X, Y, method = "simple", nObs = sqrt(nrow(X)), h = NULL,
max.iter = 50L, attempts = 10L, logger = NULL) max.iter = 50L, attempts = 10L, logger = NULL)
} }
\arguments{ \arguments{
\item{X}{data matrix with samples in its rows.} \item{X}{Design matrix with dimension \eqn{n\times p}{n x p}.}
\item{Y}{Responses (1 dimensional).} \item{Y}{numeric array of length \eqn{n} of Responses.}
\item{method}{specifies the CVE method variation as one of \item{method}{specifies the CVE method variation as one of
\itemize{ \itemize{
@ -34,38 +34,59 @@ estimated bandwidth.}
\item{k}{Dimension of lower dimensional projection, if \code{k} is given \item{k}{Dimension of lower dimensional projection, if \code{k} is given
only the specified dimension \code{B} matrix is estimated.} only the specified dimension \code{B} matrix is estimated.}
\item{momentum}{number of [0, 1) giving the ration of momentum for eucledian \item{momentum}{number of \eqn{[0, 1)} giving the ration of momentum for
gradient update with a momentum term.} eucledian gradient update with a momentum term. \code{momentum = 0}
corresponds to normal gradient descend.}
\item{tau}{Initial step-size.} \item{tau}{Initial step-size.}
\item{tol}{Tolerance for break condition.} \item{tol}{Tolerance for break condition.}
\item{slack}{Positive scaling to allow small increases of the loss while \item{slack}{Positive scaling to allow small increases of the loss while
optimizing.} optimizing, i.e. \code{slack = 0.1} allows the target function to
increase up to \eqn{10 \%} in one optimization step.}
\item{gamma}{step-size reduction multiple.} \item{gamma}{step-size reduction multiple. If gradient step with step size
\code{tau} is not accepted \code{gamma * tau} is set to the next step
size.}
\item{V.init}{Semi-orthogonal matrix of dimensions `(ncol(X), ncol(X) - k) \item{V.init}{Semi-orthogonal matrix of dimensions `(ncol(X), ncol(X) - k)
as optimization starting value. (If supplied, \code{attempts} is used as starting value in the optimization. (If supplied,
set to 1 and \code{k} to match dimension)} \code{attempts} is set to 0 and \code{k} to match dimension).}
\item{max.iter}{maximum number of optimization steps.} \item{max.iter}{maximum number of optimization steps.}
\item{attempts}{number of arbitrary different starting points.} \item{attempts}{If \code{V.init} not supplied, the optimization is carried
out \code{attempts} times with starting values drawn from the invariant
measure on the Stiefel manifold (see \code{\link{rStiefel}}).}
\item{logger}{a logger function (only for advanced user, significantly slows \item{logger}{a logger function (only for advanced user, slows down the
down the computation).} computation).}
} }
\value{ \value{
an S3 object of class \code{cve} with components: an S3 object of class \code{cve} with components:
\describe{ \describe{
\item{X}{Original training data,} \item{X}{design matrix of predictor vector used for calculating
\item{Y}{Responce of original training data,} cve-estimate,}
\item{Y}{\eqn{n}-dimensional vector of responses used for calculating
cve-estimate,}
\item{method}{Name of used method,} \item{method}{Name of used method,}
\item{call}{the matched call,} \item{call}{the matched call,}
\item{res}{list of components \code{V, L, B, loss, h} and \code{k} for \item{res}{list of components \code{V, L, B, loss, h} for
each \eqn{k=min.dim,...,max.dim} (dimension).} each \code{k = min.dim, ..., max.dim}. If \code{k} was supplied in the
call \code{min.dim = max.dim = k}.
\itemize{
\item \code{B} is the cve-estimate with dimension
\eqn{p\times k}{p x k}.
\item \code{V} is the orthogonal complement of \eqn{B}.
\item \code{L} is the loss for each sample seperatels such that
it's mean is \code{loss}.
\item \code{loss} is the value of the target function that is
minimized, evaluated at \eqn{V}.
\item \code{h} bandwidth parameter used to calculate
\code{B, V, loss, L}.
}
}
} }
} }
\description{ \description{
@ -89,7 +110,7 @@ variance-covariance matrix \eqn{Var(X) = \Sigma_X}. \eqn{\epsilon} is a mean
zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g} zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g}
is an unknown, continuous non-constant function, is an unknown, continuous non-constant function,
and \eqn{B = (b_1, ..., b_k)} is and \eqn{B = (b_1, ..., b_k)} is
a real \eqn{p \times k}{p x k} of rank \eqn{k <= p}{k \leq p}. a real \eqn{p \times k}{p x k} of rank \eqn{k \leq p}{k <= p}.
Without loss of generality \eqn{B} is assumed to be orthonormal. Without loss of generality \eqn{B} is assumed to be orthonormal.
} }
\examples{ \examples{

127
CVE/man/dataset.Rd Normal file
View File

@ -0,0 +1,127 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/datasets.R
\name{dataset}
\alias{dataset}
\title{Generates test datasets.}
\usage{
dataset(name = "M1", n = NULL, p = 20, sd = 0.5, ...)
}
\arguments{
\item{name}{One of \code{"M1"}, \code{"M2"}, \code{"M3"}, \code{"M4",}
\code{"M5"}, \code{"M6"} or \code{"M7"}. Alternative just the dataset number
1-7.}
\item{n}{number of samples.}
\item{p}{Dimension of random variable \eqn{X}.}
\item{sd}{standard diviation for error term \eqn{\epsilon}.}
\item{...}{Additional parameters only for "M2" (namely \code{pmix} and
\code{lambda}), see: below.}
}
\value{
List with elements
\itemize{
\item{X}{data, a \eqn{n\times p}{n x p} matrix.}
\item{Y}{response.}
\item{B}{the dim-reduction matrix}
\item{name}{Name of the dataset (name parameter)}
}
}
\description{
Provides sample datasets M1-M7 used in the paper Conditional variance
estimation for sufficient dimension reduction, Lukas Fertl, Efstathia Bura.
The general model is given by:
\deqn{Y = g(B'X) + \epsilon}
}
\section{M1}{
The predictors are distributed as
\eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, \Sigma)} with
\eqn{\Sigma_{i, j} = 0.5^{|i - j|}}{\Sigma_ij = 0.5^|i - j|} for
\eqn{i, j = 1,..., p} for a subspace dimension of \eqn{k = 1} with a default
of \eqn{n = 100} data points. \eqn{p = 20},
\eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)}, and \eqn{Y} is
given as \deqn{Y = cos(b_1'X) + \epsilon} where \eqn{\epsilon} is
distributed as generalized normal distribution with location 0,
shape-parameter 0.5, and the scale-parameter is chosen such that
\eqn{Var(\epsilon) = 0.5}.
}
\section{M2}{
The predictors are distributed as \eqn{X \sim Z 1_p \lambda + N_p(0, I_p)}{X ~ Z 1_p \lambda + N_p(0, I_p)}. with
\eqn{Z \sim 2 Binom(p_{mix}) - 1\in\{-1, 1\}}{Z~2Binom(pmix)-1} where
\eqn{1_p} is the \eqn{p}-dimensional vector of one's, for a subspace
dimension of \eqn{k = 1} with a default of \eqn{n = 100} data points.
\eqn{p = 20}, \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)},
and \eqn{Y} is \deqn{Y = cos(b_1'X) + 0.5\epsilon} where \eqn{\epsilon} is
standard normal.
Defaults for \code{pmix} is 0.3 and \code{lambda} defaults to 1.
}
\section{M3}{
The predictors are distributed as \eqn{X\sim N_p(0, I_p)}{X~N_p(0, I_p)}
for a subspace
dimension of \eqn{k = 1} with a default of \eqn{n = 100} data points.
\eqn{p = 20}, \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)},
and \eqn{Y} is
\deqn{Y = 2 log(|b_1'X| + 2) + 0.5\epsilon} where \eqn{\epsilon} is
standard normal.
}
\section{M4}{
The predictors are distributed as \eqn{X\sim N_p(0,\Sigma)}{X~N_p(0,\Sigma)}
with \eqn{\Sigma_{i, j} = 0.5^{|i - j|}}{\Sigma_ij = 0.5^|i - j|} for
\eqn{i, j = 1,..., p} for a subspace dimension of \eqn{k = 2} with a default
of \eqn{n = 100} data points. \eqn{p = 20},
\eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)},
\eqn{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / sqrt(6)}
and \eqn{Y} is given as \deqn{Y = \frac{b_1'X}{0.5 + (1.5 + b_2'X)^2} + 0.5\epsilon}{Y = (b_1'X) / (0.5 + (1.5 + b_2'X)^2) + 0.5\epsilon}
where \eqn{\epsilon} is standard normal.
}
\section{M5}{
The predictors are distributed as \eqn{X\sim U([0,1]^p)}{X~U([0, 1]^p)}
where \eqn{U([0, 1]^p)} is the uniform distribution with
independent components on the \eqn{p}-dimensional hypercube for a subspace
dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points.
\eqn{p = 20},
\eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)},
\eqn{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / sqrt(6)}
and \eqn{Y} is given as \deqn{Y = cos(\pi b_1'X)(b_2'X + 1)^2 + 0.5\epsilon}
where \eqn{\epsilon} is standard normal.
}
\section{M6}{
The predictors are distributed as \eqn{X\sim N_p(0, I_p)}{X~N_p(0, I_p)}
for a subspace dimension of \eqn{k = 3} with a default of \eqn{n = 200} data
point. \eqn{p = 20, b_1 = e_1, b_2 = e_2}, and \eqn{b_3 = e_p}, where
\eqn{e_j} is the \eqn{j}-th unit vector in the \eqn{p}-dimensional space.
\eqn{Y} is given as \deqn{Y = (b_1'X)^2+(b_2'X)^2+(b_3'X)^2+0.5\epsilon}
where \eqn{\epsilon} is standard normal.
}
\section{M7}{
The predictors are distributed as \eqn{X\sim t_3(I_p)}{X~t_3(I_p)} where
\eqn{t_3(I_p)} is the standard multivariate t-distribution with 3 degrees of
freedom, for a subspace dimension of \eqn{k = 4} with a default of
\eqn{n = 200} data points.
\eqn{p = 20, b_1 = e_1, b_2 = e_2, b_3 = e_3}, and \eqn{b_4 = e_p}, where
\eqn{e_j} is the \eqn{j}-th unit vector in the \eqn{p}-dimensional space.
\eqn{Y} is given as \deqn{Y = (b_1'X)(b_2'X)^2+(b_3'X)(b_4'X)+0.5\epsilon}
where \eqn{\epsilon} is distributed as generalized normal distribution with
location 0, shape-parameter 1, and the scale-parameter is chosen such that
\eqn{Var(\epsilon) = 0.25}.
}
\references{
Fertl Lukas, Bura Efstathia. (2019), Conditional Variance
Estimation for Sufficient Dimension Reduction. Working Paper.
}

View File

@ -8,12 +8,17 @@
\method{directions}{cve}(dr, k) \method{directions}{cve}(dr, k)
} }
\arguments{ \arguments{
\item{dr}{Instance of 'cve' as returned by \code{cve}.} \item{dr}{Instance of \code{'cve'} as returned by \code{\link{cve}}.}
\item{k}{SDR dimension to use for projection.} \item{k}{SDR dimension to use for projection.}
} }
\value{
the \eqn{n\times k}{n x k} dimensional matrix \eqn{X B} where \eqn{B}
is the cve-estimate for dimension \eqn{k}.
}
\description{ \description{
Computes projected training data \code{X} for given dimension `k`. Projects the dimensional design matrix \eqn{X} on the columnspace of the
cve-estimate for given dimension \eqn{k}.
} }
\examples{ \examples{
# create B for simulation (k = 1) # create B for simulation (k = 1)

View File

@ -4,26 +4,33 @@
\alias{estimate.bandwidth} \alias{estimate.bandwidth}
\title{Bandwidth estimation for CVE.} \title{Bandwidth estimation for CVE.}
\usage{ \usage{
estimate.bandwidth(X, k, nObs) estimate.bandwidth(X, k, nObs, version = 1L)
} }
\arguments{ \arguments{
\item{X}{data matrix with samples in its rows.} \item{X}{a \eqn{n\times p}{n x p} matrix with samples in its rows.}
\item{k}{Dimension of lower dimensional projection.} \item{k}{Dimension of lower dimensional projection.}
\item{nObs}{number of points in a slice, see \eqn{nObs} in CVE paper.} \item{nObs}{number of points in a slice, only for version 2.}
\item{version}{either \code{1} or \code{2}.}
} }
\value{ \value{
Estimated bandwidth \code{h}. Estimated bandwidth \code{h}.
} }
\description{ \description{
Estimates a bandwidth \code{h} according If no bandwidth or function for calculating it is supplied, the CVE method
defaults to using the following formula (version 1)
\deqn{% \deqn{%
h = (2 * tr(\Sigma) / p) * (1.2 * n^{-1 / (4 + k)})^2}{% h = \frac{2 tr(\Sigma)}{p} (1.2 n^{\frac{-1}{4 + k}})^2}{%
h = (2 * tr(\Sigma) / p) * (1.2 * n^(\frac{-1}{4 + k}))^2} h = (2 * tr(\Sigma) / p) * (1.2 * n^(-1 / (4 + k)))^2}
with \eqn{n} the sample size, \eqn{p} its dimension Alternative version 2 is used for dimension prediction which is given by
(\code{n <- nrow(X); p <- ncol(X)}) and the covariance-matrix \eqn{\Sigma} \deqn{%
which is \code{(n-1)/n} times the sample covariance estimate. h = (2 * tr(\Sigma) / p) * \chi_k^{-1}(\frac{nObs - 1}{n - 1})}{%
h = (2 * tr(\Sigma) / p) * \chi_k^-1((nObs - 1) / (n - 1))}
with \eqn{n} the sample size, \eqn{p} its dimension and the
covariance-matrix \eqn{\Sigma}, which is \code{(n-1)/n} times the sample
covariance estimate.
} }
\examples{ \examples{
# set dimensions for simulation model # set dimensions for simulation model

View File

@ -13,7 +13,10 @@
[\code{\link{lines}}]} [\code{\link{lines}}]}
} }
\description{ \description{
Boxplots of the loss from \code{min.dim} to \code{max.dim} \code{k} values. Boxplots of the output \code{L} from \code{\link{cve}} over \code{k} from
\code{min.dim} to \code{max.dim}. For given \code{k}, \code{L} corresponds
to \eqn{L_n(V, X_i)} where \eqn{V \in S(p, p - k)}{V} is the minimizer of
\eqn{L_n(V)}, for further details see the paper.
} }
\examples{ \examples{
# create B for simulation # create B for simulation
@ -41,6 +44,10 @@ cve.obj.simple <- cve(Y ~ X, h = estimate.bandwidth, nObs = sqrt(nrow(X)))
# elbow plot # elbow plot
plot(cve.obj.simple) plot(cve.obj.simple)
}
\references{
Fertl Lukas, Bura Efstathia. (2019), Conditional Variance
Estimation for Sufficient Dimension Reduction. Working Paper.
} }
\seealso{ \seealso{
see \code{\link{par}} for graphical parameters to pass through see \code{\link{par}} for graphical parameters to pass through

View File

@ -20,7 +20,8 @@
prediced response of data \code{newdata}. prediced response of data \code{newdata}.
} }
\description{ \description{
Predict responces using reduced data with \code{\link{mars}}. Predict response using projected data where the forward model \eqn{g(B'X)}
is estimated using \code{\link{mars}}.
} }
\examples{ \examples{
# create B for simulation # create B for simulation
@ -50,5 +51,5 @@ yhat <- predict(cve.obj.simple, x.test, 1)
plot(yhat, y.test) plot(yhat, y.test)
} }
\seealso{ \seealso{
\code{\link{cve}}, \code{\link{cve.call}} or \pkg{\link{mars}}. \code{\link{cve}}, \code{\link{cve.call}} and \pkg{\link{mars}}.
} }

View File

@ -2,26 +2,40 @@
% Please edit documentation in R/predict_dim.R % Please edit documentation in R/predict_dim.R
\name{predict_dim} \name{predict_dim}
\alias{predict_dim} \alias{predict_dim}
\title{Predicts SDR dimension using \code{\link[mda]{mars}} via a Cross-Validation.} \title{\code{"TODO: @Lukas"}}
\usage{ \usage{
predict_dim(object, ...) predict_dim(object, ..., method = "CV")
} }
\arguments{ \arguments{
\item{object}{instance of class \code{cve} (result of \code{cve}, \item{object}{instance of class \code{cve} (result of \code{\link{cve}},
\code{cve.call}).} \code{\link{cve.call}}).}
\item{...}{ignored.} \item{...}{ignored.}
\item{method}{one of \code{"CV"}, \code{"elbow"} or \code{"wilcoxon"}.}
} }
\value{ \value{
list with list with \code{"k"} the predicted dimension and method dependent
\itemize{ informatoin.
\item MSE: Mean Square Error,
\item k: predicted dimensions.
}
} }
\description{ \description{
Predicts SDR dimension using \code{\link[mda]{mars}} via a Cross-Validation. \code{"TODO: @Lukas"}
} }
\section{Method cv}{
TODO: \code{"TODO: @Lukas"}.
}
\section{Method elbow}{
TODO: \code{"TODO: @Lukas"}.
}
\section{Method wilcoxon}{
TODO: \code{"TODO: @Lukas"}.
}
\examples{ \examples{
# create B for simulation # create B for simulation
B <- rep(1, 5) / sqrt(5) B <- rep(1, 5) / sqrt(5)

View File

@ -2,7 +2,8 @@
% Please edit documentation in R/util.R % Please edit documentation in R/util.R
\name{rStiefel} \name{rStiefel}
\alias{rStiefel} \alias{rStiefel}
\title{Draws a sample from the invariant measure on the Stiefel manifold \eqn{S(p, q)}.} \title{Draws a sample from the invariant measure on the Stiefel manifold
\eqn{S(p, q)}.}
\usage{ \usage{
rStiefel(p, q) rStiefel(p, q)
} }
@ -12,10 +13,11 @@ rStiefel(p, q)
\item{q}{col dimension} \item{q}{col dimension}
} }
\value{ \value{
\code{p} times \code{q} semi-orthogonal matrix. \eqn{p \times q}{p x q} semi-orthogonal matrix.
} }
\description{ \description{
Draws a sample from the invariant measure on the Stiefel manifold \eqn{S(p, q)}. Draws a sample from the invariant measure on the Stiefel manifold
\eqn{S(p, q)}.
} }
\examples{ \examples{
V <- rStiefel(6, 4) V <- rStiefel(6, 4)

27
CVE/man/rgnorm.Rd Normal file
View File

@ -0,0 +1,27 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/datasets.R
\name{rgnorm}
\alias{rgnorm}
\title{Generalized Normal Distribution.}
\usage{
rgnorm(n = 1, mu = 0, alpha = 1, beta = 1)
}
\arguments{
\item{n}{Number of generated samples.}
\item{mu}{mean.}
\item{alpha}{first shape parameter.}
\item{beta}{second shape parameter.}
}
\value{
numeric array of length \eqn{n}.
}
\description{
Random generation for generalized Normal Distribution.
}
\seealso{
https://en.wikipedia.org/wiki/Generalized_normal_distribution
}
\keyword{internal}

25
CVE/man/rlaplace.Rd Normal file
View File

@ -0,0 +1,25 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/datasets.R
\name{rlaplace}
\alias{rlaplace}
\title{Laplace distribution}
\usage{
rlaplace(n = 1, mu = 0, sd = 1)
}
\arguments{
\item{n}{Number of generated samples.}
\item{mu}{mean.}
\item{sd}{standard deviation.}
}
\value{
numeric array of length \eqn{n}.
}
\description{
Random generation for Laplace distribution.
}
\seealso{
https://en.wikipedia.org/wiki/Laplace_distribution
}
\keyword{internal}

29
CVE/man/rmvnorm.Rd Normal file
View File

@ -0,0 +1,29 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/datasets.R
\name{rmvnorm}
\alias{rmvnorm}
\title{Multivariate Normal Distribution.}
\usage{
rmvnorm(n = 1, mu = rep(0, p), sigma = diag(p))
}
\arguments{
\item{n}{number of samples.}
\item{mu}{mean}
\item{sigma}{covariance matrix.}
}
\value{
a \eqn{n\times p}{n x p} matrix with samples in its rows.
}
\description{
Random generation for the multivariate normal distribution.
\deqn{X \sim N_p(\mu, \Sigma)}{X ~ N_p(\mu, \Sigma)}
}
\examples{
\dontrun{
rmvnorm(20, sigma = matrix(c(2, 1, 1, 2), 2))
rmvnorm(20, mu = c(3, -1, 2))
}
}
\keyword{internal}

34
CVE/man/rmvt.Rd Normal file
View File

@ -0,0 +1,34 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/datasets.R
\name{rmvt}
\alias{rmvt}
\title{Multivariate t distribution.}
\usage{
rmvt(n = 1, mu = rep(0, p), sigma = diag(p), df = Inf)
}
\arguments{
\item{n}{number of samples.}
\item{mu}{mean}
\item{sigma}{a \eqn{k\times k}{k x k} positive definite matrix. If the degree
\eqn{\nu} if bigger than 2 the created covariance is
\deqn{var(x) = \Sigma\frac{\nu}{\nu - 2}}
for \eqn{\nu > 2}.}
\item{df}{degree of freedom \eqn{\nu}.}
}
\value{
a \eqn{n\times p}{n x p} matrix with samples in its rows.
}
\description{
Random generation from multivariate t distribution (student distribution).
}
\examples{
\dontrun{
rmvt(20, c(0, 1), matrix(c(3, 1, 1, 2), 2), 3)
rmvt(20, sigma = matrix(c(2, 1, 1, 2), 2), 3)
rmvt(20, mu = c(3, -1, 2), 3)
}
}
\keyword{internal}

View File

@ -7,12 +7,13 @@
\method{summary}{cve}(object, ...) \method{summary}{cve}(object, ...)
} }
\arguments{ \arguments{
\item{object}{Instance of 'cve' as returned by \code{cve}.} \item{object}{Instance of \code{"cve"} as returned by \code{\link{cve}}.}
\item{...}{ignored.} \item{...}{ignored.}
} }
\description{ \description{
Prints a summary of a \code{cve} result. Prints a summary statistics of output \code{L} from \code{cve} for
\code{k = min.dim, ..., max.dim}.
} }
\examples{ \examples{
# create B for simulation # create B for simulation

View File

@ -161,97 +161,3 @@ mat* adjacence(const mat *vec_L, const mat *vec_Y, const mat *vec_y1,
return mat_S; return mat_S;
} }
// int getWorkLen(const int n, const int p, const int q) {
// int mpq; /**< Max of p and q */
// int nn = ((n - 1) * n) / 2;
// if (p > q) {
// mpq = p;
// } else {
// mpq = q;
// }
// if (nn * p < (mpq + 1) * mpq) {
// return 2 * (mpq + 1) * mpq;
// } else {
// return (nn + mpq) * mpq;
// }
// }
// double cost(const unsigned int method,
// const int n,
// const double *Y,
// const double *vecK,
// const double *colSums,
// double *y1, double *L) {
// int i, j, k;
// double tmp, sum;
// for (i = 0; i < n; ++i) {
// y1[i] = Y[i];
// L[i] = Y[i] * Y[i];
// }
// for (k = j = 0; j < n; ++j) {
// for (i = j + 1; i < n; ++i, ++k) {
// y1[i] += Y[j] * vecK[k];
// y1[j] += Y[i] * vecK[k];
// L[i] += Y[j] * Y[j] * vecK[k];
// L[j] += Y[i] * Y[i] * vecK[k];
// }
// }
// for (i = 0; i < n; ++i) {
// y1[i] /= colSums[i];
// L[i] /= colSums[i];
// }
// tmp = 0.0;
// if (method == CVE_METHOD_WEIGHTED) {
// sum = 0.0;
// for (i = 0; i < n; ++i) {
// tmp += (colSums[i] - 1.0) * (L[i] -= y1[i] * y1[i]);
// sum += colSums[i];
// }
// return tmp / (sum - (double)n); // TODO: check for division by zero!
// } else {
// for (i = 0; i < n; ++i) {
// tmp += (L[i] -= y1[i] * y1[i]);
// }
// return tmp / (double)n;
// }
// }
// void scaling(const unsigned int method,
// const int n,
// const double *Y, const double *y1, const double *L,
// const double *vecD, const double *vecK,
// const double *colSums,
// double *vecS) {
// int i, j, k, nn = (n * (n - 1)) / 2;
// double tmp;
// if (method == CVE_METHOD_WEIGHTED) {
// for (k = j = 0; j < n; ++j) {
// for (i = j + 1; i < n; ++i, ++k) {
// tmp = Y[j] - y1[i];
// vecS[k] = (L[i] - (tmp * tmp));
// tmp = Y[i] - y1[j];
// vecS[k] += (L[j] - (tmp * tmp));
// }
// }
// } else {
// for (k = j = 0; j < n; ++j) {
// for (i = j + 1; i < n; ++i, ++k) {
// tmp = Y[j] - y1[i];
// vecS[k] = (L[i] - (tmp * tmp)) / colSums[i];
// tmp = Y[i] - y1[j];
// vecS[k] += (L[j] - (tmp * tmp)) / colSums[j];
// }
// }
// }
// for (k = 0; k < nn; ++k) {
// vecS[k] *= vecK[k] * vecD[k];
// }
// }

View File

@ -17,8 +17,8 @@ static const R_CallMethodDef CallEntries[] = {
{NULL, NULL, 0} {NULL, NULL, 0}
}; };
/* Restrict C entrypoints to registered routines. */ /* Restrict C entry points to registered routines. */
void R_initCVE(DllInfo *dll) { void R_init_CVE(DllInfo *dll) {
R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
R_useDynamicSymbols(dll, FALSE); R_useDynamicSymbols(dll, FALSE);
} }

View File

@ -914,7 +914,7 @@ mat* laplace(mat *A, double *workMem) {
* \_____/ \_____/ * \_____/ \_____/
* IpA C = ImA B * IpA C = ImA B
* \_______/ * \_______/
* IpA C = Y ==> C = IpA^-1 Y * IpA C = Y ==> C = IpA^-1 Y
* *
* @param A Skew-Symmetric matrix of dimension `(n, n)`. * @param A Skew-Symmetric matrix of dimension `(n, n)`.
* @param B Matrix of dimensions `(n, m)` with `m <= n`. * @param B Matrix of dimensions `(n, m)` with `m <= n`.

View File

@ -1,193 +0,0 @@
#'
#' @param n number of samples.
#' @param mu mean
#' @param sigma covariance matrix.
#'
#' @returns a \eqn{n\times p} matrix with samples in its rows.
#'
#' @examples
#' rmvnorm(20, sigma = matrix(c(2, 1, 1, 2), 2))
#' rmvnorm(20, mu = c(3, -1, 2))
rmvnorm <- function(n = 1, mu = rep(0, p), sigma = diag(p)) {
if (!missing(sigma)) {
p <- nrow(sigma)
} else if (!missing(mu)) {
mu <- matrix(mu, ncol = 1)
p <- nrow(mu)
} else {
stop("At least one of 'mu' or 'sigma' must be supplied.")
}
# See: https://en.wikipedia.org/wiki/Multivariate_normal_distribution
return(rep(mu, each = n) + matrix(rnorm(n * p), n) %*% chol(sigma))
}
#' Samples from the multivariate t distribution (student distribution).
#'
#' @param n number of samples.
#' @param mu mean, ... TODO:
#' @param sigma a \eqn{k\times k} positive definite matrix. If the degree
#' \eqn{\nu} if bigger than 2 the created covariance is
#' \deqn{var(x) = \Sigma\frac{\nu}{\nu - 2}}
#' for \eqn{\nu > 2}.
#' @param df degree of freedom \eqn{\nu}.
#'
#' @returns a \eqn{n\times p} matrix with samples in its rows.
#'
#' @examples
#' rmvt(20, c(0, 1), matrix(c(3, 1, 1, 2), 2), 3)
#' rmvt(20, sigma = matrix(c(2, 1, 1, 2), 2), 3)
#' rmvt(20, mu = c(3, -1, 2), 3)
rmvt <- function(n = 1, mu = rep(0, p), sigma = diag(p), df = Inf) {
if (!missing(sigma)) {
p <- nrow(sigma)
} else if (!missing(mu)) {
mu <- matrix(mu, ncol = 1)
p <- nrow(mu)
} else {
stop("At least one of 'mu' or 'sigma' must be supplied.")
}
if (df == Inf) {
Z <- 1
} else {
Z <- sqrt(df / rchisq(n, df))
}
return(rmvnorm(n, sigma = sigma) * Z + rep(mu, each = n))
}
#' Generalized Normal Distribution.
#' see: https://en.wikipedia.org/wiki/Generalized_normal_distribution
rgnorm <- function(n = 1, mu = 0, alpha = 1, beta = 1) {
if (alpha <= 0 | beta <= 0) {
stop("alpha and beta must be positive.")
}
lambda <- (1 / alpha)^beta
scales <- qgamma(runif(n), shape = 1 / beta, scale = 1 / lambda)^(1 / beta)
return(scales * ((-1)^rbinom(n, 1, 0.5)) + mu)
}
#' Laplace distribution
#' see: https://en.wikipedia.org/wiki/Laplace_distribution
rlaplace <- function(n = 1, mu = 0, sigma = 1) {
U <- runif(n, -0.5, 0.5)
scale <- sigma / sqrt(2)
return(mu - scale * sign(U) * log(1 - 2 * abs(U)))
}
#' Generates test datasets.
#'
#' Provides sample datasets. There are 5 different datasets named
#' M1, M2, M3, M4 and M5 described in the paper references below.
#' The general model is given by:
#' \deqn{Y = g(B'X) + \epsilon}
#'
#' @param name One of \code{"M1"}, \code{"M2"}, \code{"M3"}, \code{"M4"} or \code{"M5"}
#' @param n nr samples
#' @param B SDR basis used for dataset creation if supplied.
#' @param p Dim. of random variable \code{X}.
#' @param p.mix Only for \code{"M4"}, see: below.
#' @param lambda Only for \code{"M4"}, see: below.
#'
#' @return List with elements
#' \itemize{
#' \item{X}{data}
#' \item{Y}{response}
#' \item{B}{Used dim-reduction matrix}
#' \item{name}{Name of the dataset (name parameter)}
#' }
#'
#' @section M1:
#' The data follows \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} for a subspace
#' dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points.
#' The link function \eqn{g} is given as
#' \deqn{g(x) = \frac{x_1}{0.5 + (x_2 + 1.5)^2} + \epsilon / 2}{%
#' g(x) = x_1 / (0.5 + (x_2 + 1.5)^2) + epsilon / 2}
#' @section M2:
#' \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} with \eqn{k = 2} with a
#' default of \eqn{n = 200} data points.
#' The link function \eqn{g} is given as
#' \deqn{g(x) = (b_1^T X) (b_2^T X)^2 + \epsilon / 2}
#' @section M3:
#' \deqn{g(x) = cos(b_1^T X) + \epsilon / 2}
#' @section M4:
#' TODO:
#' @section M5:
#' TODO:
#'
#' @import stats
#' @importFrom stats rnorm rbinom
#' @export
dataset <- function(name = "M1", n = NULL, p = 20, sigma = 0.5, ...) {
name <- toupper(name)
if (nchar(name) == 1) { name <- paste0("M", name) }
if (name == "M1") {
if (missing(n)) { n <- 100 }
# B ... `p x 1`
B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, p - 6)), ncol = 1)
X <- rmvnorm(n, sigma = sigma^abs(outer(1:p, 1:p, FUN = `-`)))
beta <- 0.5
Y <- cos(X %*% B) + rgnorm(n, 0,
alpha = sqrt(0.25 * gamma(1 / beta) / gamma(3 / beta)),
beta = beta
)
} else if (name == "M2") {
if (missing(n)) { n <- 100 }
prob <- 0.3
lambda <- 1 # dispersion
# B ... `p x 1`
B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, p - 6)), ncol = 1)
Z <- 2 * rbinom(n, 1, prob) - 1
X <- matrix(rep(lambda * Z, p) + rnorm(n * p), n)
Y <- cos(X %*% B) + rnorm(n, 0, sigma)
} else if (name == "M3") {
if (missing(n)) { n <- 200 }
# B ... `p x 1`
B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, p - 6)), ncol = 1)
X <- matrix(rnorm(n * p), n)
Y <- 1.5 * log(2 + abs(X %*% B)) + rnorm(n, 0, sigma^2)
} else if (name == "M4") {
if (missing(n)) { n <- 200 }
# B ... `p x 2`
B <- cbind(
c(rep(1 / sqrt(6), 6), rep(0, p - 6)),
c(rep(c(1, -1), 3) / sqrt(6), rep(0, p - 6))
)
X <- rmvnorm(n, sigma = sigma^abs(outer(1:p, 1:p, FUN = `-`)))
XB <- X %*% B
Y <- (XB[, 1]) / (0.5 + (XB[, 2] + 1.5)^2) + rnorm(n, 0, sigma^2)
} else if (name == "M5") {
if (missing(n)) { n <- 200 }
# B ... `p x 2`
B <- cbind(
c(rep(1, 6), rep(0, p - 6)),
c(rep(c(1, -1), 3), rep(0, p - 6))
) / sqrt(6)
X <- matrix(runif(n * p), n)
XB <- X %*% B
Y <- cos(XB[, 1] * pi) * (XB[, 2] + 1)^2 + rnorm(n, 0, sigma^2)
} else if (name == "M6") {
if (missing(n)) { n <- 200 }
# B ... `p x 3`
B <- diag(p)[, -(3:(p - 1))]
X <- matrix(rnorm(n * p), n)
Y <- rowSums((X %*% B)^2) + rnorm(n, 0, sigma^2)
} else if (name == "M7") {
if (missing(n)) { n <- 400 }
# B ... `p x 4`
B <- diag(p)[, -(4:(p - 1))]
# "R"andom "M"ulti"V"ariate "S"tudent
X <- rmvt(n = n, sigma = diag(p), df = 3)
XB <- X %*% B
Y <- (XB[, 1]) * (XB[, 2])^2 + (XB[, 3]) * (XB[, 4])
Y <- Y + rlaplace(n, 0, sigma)
} else {
stop("Got unknown dataset name.")
}
return(list(X = X, Y = Y, B = B, name = name))
}

View File

@ -1,82 +0,0 @@
#' Draws a sample from the invariant measure on the Stiefel manifold \eqn{S(p, q)}.
#'
#' @param p row dimension
#' @param q col dimension
#' @return \code{p} times \code{q} semi-orthogonal matrix.
#' @examples
#' V <- rStiefel(6, 4)
#' @export
rStiefel <- function(p, q) {
return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q))))
}
#' Retraction to the manifold.
#'
#' @param A matrix.
#' @return `(p, q)` semi-orthogonal matrix, aka element of the Stiefel manifold.
#' @keywords internal
#' @export
retractStiefel <- function(A) {
return(qr.Q(qr(A)))
}
#' Skew-Symmetric matrix computed from `A` as
#' \eqn{1/2 (A - A^T)}.
#' @param A Matrix of dim `(p, q)`
#' @return Skew-Symmetric matrix of dim `(p, p)`.
#' @keywords internal
#' @export
skew <- function(A) {
0.5 * (A - t(A))
}
#' Symmetric matrix computed from `A` as
#' \eqn{1/2 (A + A^T)}.
#' @param A Matrix of dim `(p, q)`
#' @return Symmetric matrix of dim `(p, p)`.
#' @keywords internal
#' @export
sym <- function(A) {
0.5 * (A + t(A))
}
#' Orthogonal Projection onto the tangent space of the stiefel manifold.
#'
#' @param V Point on the stiefel manifold.
#' @param G matrix to be projected onto the tangent space at `V`.
#' @return `(p, q)` matrix as element of the tangent space at `V`.
#' @keywords internal
#' @export
projTangentStiefel <- function(V, G) {
Q <- diag(1, nrow(V)) - V %*% t(V)
return(Q %*% G + V %*% skew(t(V) %*% G))
}
#' Null space basis of given matrix `V`
#'
#' @param V `(p, q)` matrix
#' @return Semi-orthogonal `(p, p - q)` matrix spaning the null space of `V`.
#' @keywords internal
#' @export
null <- function(V) {
tmp <- qr(V)
set <- if(tmp$rank == 0L) seq_len(ncol(V)) else -seq_len(tmp$rank)
return(qr.Q(tmp, complete = TRUE)[, set, drop = FALSE])
}
#' Creates a (numeric) matrix where each column contains
#' an element to element matching.
#' @param elements numeric vector of elements to match
#' @return matrix of size `(2, n * (n - 1) / 2)` for a argument of lenght `n`.
#' @keywords internal
#' @examples
#' elem.pairs(seq.int(2, 5))
#' @export
elem.pairs <- function(elements) {
# Number of elements to match.
n <- length(elements)
# Create all combinations.
pairs <- rbind(rep(elements, each=n), rep(elements, n))
# Select unique combinations without self interaction.
return(pairs[, pairs[1, ] < pairs[2, ]])
}

View File

@ -1,68 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/datasets.R
\name{dataset}
\alias{dataset}
\title{Generates test datasets.}
\usage{
dataset(name = "M1", n, B, p.mix = 0.3, lambda = 1)
}
\arguments{
\item{name}{One of \code{"M1"}, \code{"M2"}, \code{"M3"}, \code{"M4"} or \code{"M5"}}
\item{n}{nr samples}
\item{B}{SDR basis used for dataset creation if supplied.}
\item{p.mix}{Only for \code{"M4"}, see: below.}
\item{lambda}{Only for \code{"M4"}, see: below.}
\item{p}{Dim. of random variable \code{X}.}
}
\value{
List with elements
\itemize{
\item{X}{data}
\item{Y}{response}
\item{B}{Used dim-reduction matrix}
\item{name}{Name of the dataset (name parameter)}
}
}
\description{
Provides sample datasets. There are 5 different datasets named
M1, M2, M3, M4 and M5 described in the paper references below.
The general model is given by:
\deqn{Y = g(B'X) + \epsilon}
}
\section{M1}{
The data follows \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} for a subspace
dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points.
The link function \eqn{g} is given as
\deqn{g(x) = \frac{x_1}{0.5 + (x_2 + 1.5)^2} + \epsilon / 2}{%
g(x) = x_1 / (0.5 + (x_2 + 1.5)^2) + epsilon / 2}
}
\section{M2}{
\eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} with \eqn{k = 2} with a
default of \eqn{n = 200} data points.
The link function \eqn{g} is given as
\deqn{g(x) = (b_1^T X) (b_2^T X)^2 + \epsilon / 2}
}
\section{M3}{
\deqn{g(x) = cos(b_1^T X) + \epsilon / 2}
}
\section{M4}{
TODO:
}
\section{M5}{
TODO:
}

View File

@ -1,23 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/util.R
\name{elem.pairs}
\alias{elem.pairs}
\title{Creates a (numeric) matrix where each column contains
an element to element matching.}
\usage{
elem.pairs(elements)
}
\arguments{
\item{elements}{numeric vector of elements to match}
}
\value{
matrix of size `(2, n * (n - 1) / 2)` for a argument of lenght `n`.
}
\description{
Creates a (numeric) matrix where each column contains
an element to element matching.
}
\examples{
elem.pairs(seq.int(2, 5))
}
\keyword{internal}

View File

@ -1,20 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/util.R
\name{projTangentStiefel}
\alias{projTangentStiefel}
\title{Orthogonal Projection onto the tangent space of the stiefel manifold.}
\usage{
projTangentStiefel(V, G)
}
\arguments{
\item{V}{Point on the stiefel manifold.}
\item{G}{matrix to be projected onto the tangent space at `V`.}
}
\value{
`(p, q)` matrix as element of the tangent space at `V`.
}
\description{
Orthogonal Projection onto the tangent space of the stiefel manifold.
}
\keyword{internal}

View File

@ -1,18 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/util.R
\name{retractStiefel}
\alias{retractStiefel}
\title{Retraction to the manifold.}
\usage{
retractStiefel(A)
}
\arguments{
\item{A}{matrix.}
}
\value{
`(p, q)` semi-orthogonal matrix, aka element of the Stiefel manifold.
}
\description{
Retraction to the manifold.
}
\keyword{internal}

View File

@ -1,20 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/util.R
\name{skew}
\alias{skew}
\title{Skew-Symmetric matrix computed from `A` as
\eqn{1/2 (A - A^T)}.}
\usage{
skew(A)
}
\arguments{
\item{A}{Matrix of dim `(p, q)`}
}
\value{
Skew-Symmetric matrix of dim `(p, p)`.
}
\description{
Skew-Symmetric matrix computed from `A` as
\eqn{1/2 (A - A^T)}.
}
\keyword{internal}

View File

@ -1,20 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/util.R
\name{sym}
\alias{sym}
\title{Symmetric matrix computed from `A` as
\eqn{1/2 (A + A^T)}.}
\usage{
sym(A)
}
\arguments{
\item{A}{Matrix of dim `(p, q)`}
}
\value{
Symmetric matrix of dim `(p, p)`.
}
\description{
Symmetric matrix computed from `A` as
\eqn{1/2 (A + A^T)}.
}
\keyword{internal}

View File

@ -1,11 +0,0 @@
Package: CVEpureR
Type: Package
Title: Conditional Variance Estimator for Sufficient Dimension Reduction
Version: 0.1
Date: 2019-08-29
Author: Loki
Maintainer: Loki <loki@no.mail>
Description: Implementation of the Conditional Variance Estimation (CVE) method. This package version is writen in pure R.
License: GPL-3
Encoding: UTF-8
RoxygenNote: 6.1.1

View File

@ -1,23 +0,0 @@
# Generated by roxygen2: do not edit by hand
S3method(plot,cve)
S3method(summary,cve)
export(cve)
export(cve.call)
export(cve.grid.search)
export(cve_linesearch)
export(cve_sgd)
export(cve_simple)
export(dataset)
export(elem.pairs)
export(estimate.bandwidth)
export(grad)
export(null)
export(rStiefl)
import(stats)
importFrom(graphics,lines)
importFrom(graphics,plot)
importFrom(graphics,points)
importFrom(stats,model.frame)
importFrom(stats,rbinom)
importFrom(stats,rnorm)

View File

@ -1,265 +0,0 @@
#' Conditional Variance Estimator (CVE)
#'
#' Conditional Variance Estimator for Sufficient Dimension
#' Reduction
#'
#' TODO: And some details
#'
#'
#' @references Fertl Likas, Bura Efstathia. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019
#'
#' @docType package
#' @author Loki
#' @useDynLib CVE, .registration = TRUE
"_PACKAGE"
#' Implementation of the CVE method.
#'
#' Conditional Variance Estimator (CVE) is a novel sufficient dimension
#' reduction (SDR) method assuming a model
#' \deqn{Y \sim g(B'X) + \epsilon}{Y ~ g(B'X) + epsilon}
#' where B'X is a lower dimensional projection of the predictors.
#'
#' @param formula Formel for the regression model defining `X`, `Y`.
#' See: \code{\link{formula}}.
#' @param data data.frame holding data for formula.
#' @param method The different only differe in the used optimization.
#' All of them are Gradient based optimization on a Stiefel manifold.
#' \itemize{
#' \item "simple" Simple reduction of stepsize.
#' \item "sgd" stocastic gradient decent.
#' \item TODO: further
#' }
#' @param ... Further parameters depending on the used method.
#' @examples
#' library(CVE)
#'
#' # sample dataset
#' ds <- dataset("M5")
#'
#' # call ´cve´ with default method (aka "simple")
#' dr.simple <- cve(ds$Y ~ ds$X, k = ncol(ds$B))
#' # plot optimization history (loss via iteration)
#' plot(dr.simple, main = "CVE M5 simple")
#'
#' # call ´cve´ with method "linesearch" using ´data.frame´ as data.
#' data <- data.frame(Y = ds$Y, X = ds$X)
#' # Note: ´Y, X´ are NOT defined, they are extracted from ´data´.
#' dr.linesearch <- cve(Y ~ ., data, method = "linesearch", k = ncol(ds$B))
#' plot(dr.linesearch, main = "CVE M5 linesearch")
#'
#' @references Fertl L., Bura E. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019
#'
#' @seealso \code{\link{formula}}. For a complete parameters list (dependent on
#' the method) see \code{\link{cve_simple}}, \code{\link{cve_sgd}}
#' @import stats
#' @importFrom stats model.frame
#' @export
cve <- function(formula, data, method = "simple", max.dim = 10L, ...) {
# check for type of `data` if supplied and set default
if (missing(data)) {
data <- environment(formula)
} else if (!is.data.frame(data)) {
stop("Parameter 'data' must be a 'data.frame' or missing.")
}
# extract `X`, `Y` from `formula` with `data`
model <- stats::model.frame(formula, data)
X <- as.matrix(model[ ,-1L, drop = FALSE])
Y <- as.double(model[ , 1L])
# pass extracted data on to [cve.call()]
dr <- cve.call(X, Y, method = method, max.dim = max.dim, ...)
# overwrite `call` property from [cve.call()]
dr$call <- match.call()
return(dr)
}
#' @param nObs as described in the Paper.
#' @param X Data
#' @param Y Responces
#' @param nObs Like in the paper.
#' @param k guess for SDR dimension.
#' @param ... Method specific parameters.
#' @rdname cve
#' @export
cve.call <- function(X, Y, method = "simple",
nObs = sqrt(nrow(X)), h = NULL,
min.dim = 1L, max.dim = 10L, k = NULL,
tau = 1.0, tol = 1e-3,
epochs = 50L, attempts = 10L,
logger = NULL) {
# parameter checking
if (!(is.matrix(X) && is.numeric(X))) {
stop("Parameter 'X' should be a numeric matrices.")
}
if (!is.numeric(Y)) {
stop("Parameter 'Y' must be numeric.")
}
if (is.matrix(Y) || !is.double(Y)) {
Y <- as.double(Y)
}
if (nrow(X) != length(Y)) {
stop("Rows of 'X' and 'Y' elements are not compatible.")
}
if (ncol(X) < 2) {
stop("'X' is one dimensional, no need for dimension reduction.")
}
if (missing(k) || is.null(k)) {
min.dim <- as.integer(min.dim)
max.dim <- as.integer(min(max.dim, ncol(X) - 1L))
} else {
min.dim <- as.integer(k)
max.dim <- as.integer(k)
}
if (min.dim > max.dim) {
stop("'min.dim' bigger 'max.dim'.")
}
if (max.dim >= ncol(X)) {
stop("'max.dim' (or 'k') must be smaller than 'ncol(X)'.")
}
if (is.function(h)) {
estimate.bandwidth <- h
h <- NULL
}
if (!is.numeric(tau) || length(tau) > 1L || tau <= 0.0) {
stop("Initial step-width 'tau' must be positive number.")
} else {
tau <- as.double(tau)
}
if (!is.numeric(tol) || length(tol) > 1L || tol < 0.0) {
stop("Break condition tolerance 'tol' must be not negative number.")
} else {
tol <- as.double(tol)
}
if (!is.numeric(epochs) || length(epochs) > 1L) {
stop("Parameter 'epochs' must be positive integer.")
} else if (!is.integer(epochs)) {
epochs <- as.integer(epochs)
}
if (epochs < 1L) {
stop("Parameter 'epochs' must be at least 1L.")
}
if (!is.numeric(attempts) || length(attempts) > 1L) {
stop("Parameter 'attempts' must be positive integer.")
} else if (!is.integer(attempts)) {
attempts <- as.integer(attempts)
}
if (attempts < 1L) {
stop("Parameter 'attempts' must be at least 1L.")
}
if (is.function(logger)) {
loggerEnv <- environment(logger)
} else {
loggerEnv <- NULL
}
# Call specified method.
method <- tolower(method)
call <- match.call()
dr <- list()
for (k in min.dim:max.dim) {
if (missing(h) || is.null(h)) {
h <- estimate.bandwidth(X, k, nObs)
} else if (is.numeric(h) && h > 0.0) {
h <- as.double(h)
} else {
stop("Bandwidth 'h' must be positive numeric.")
}
if (method == 'simple') {
dr.k <- .Call('cve_simple', PACKAGE = 'CVE',
X, Y, k, h,
tau, tol,
epochs, attempts,
logger, loggerEnv)
# dr.k <- cve_simple(X, Y, k, nObs = nObs, ...)
# } else if (method == 'linesearch') {
# dr.k <- cve_linesearch(X, Y, k, nObs = nObs, ...)
# } else if (method == 'rcg') {
# dr.k <- cve_rcg(X, Y, k, nObs = nObs, ...)
# } else if (method == 'momentum') {
# dr.k <- cve_momentum(X, Y, k, nObs = nObs, ...)
# } else if (method == 'rmsprob') {
# dr.k <- cve_rmsprob(X, Y, k, nObs = nObs, ...)
# } else if (method == 'sgdrmsprob') {
# dr.k <- cve_sgdrmsprob(X, Y, k, nObs = nObs, ...)
# } else if (method == 'sgd') {
# dr.k <- cve_sgd(X, Y, k, nObs = nObs, ...)
} else {
stop('Got unknown method.')
}
dr.k$B <- null(dr.k$V)
dr.k$loss <- mean(dr.k$L)
dr.k$h <- h
dr.k$k <- k
class(dr.k) <- "cve.k"
dr[[k]] <- dr.k
}
# augment result information
dr$method <- method
dr$call <- call
class(dr) <- "cve"
return(dr)
}
#' Ploting helper for objects of class \code{cve}.
#'
#' @param x Object of class \code{cve} (result of [cve()]).
#' @param content Specifies what to plot:
#' \itemize{
#' \item "history" Plots the loss history from stiefel optimization
#' (default).
#' \item ... TODO: add (if there are any)
#' }
#' @param ... Pass through parameters to [plot()] and [lines()]
#'
#' @usage ## S3 method for class 'cve'
#' plot(x, content = "history", ...)
#' @seealso see \code{\link{par}} for graphical parameters to pass through
#' as well as \code{\link{plot}} for standard plot utility.
#' @importFrom graphics plot lines points
#' @method plot cve
#' @export
plot.cve <- function(x, ...) {
L <- c()
k <- c()
for (dr.k in x) {
if (class(dr.k) == 'cve.k') {
k <- c(k, paste0(dr.k$k))
L <- c(L, dr.k$L)
}
}
L <- matrix(L, ncol = length(k))
boxplot(L, main = "Loss ...",
xlab = "SDR dimension k",
ylab = expression(L(V, X[i])),
names = k)
}
#' Prints a summary of a \code{cve} result.
#' @param object Instance of 'cve' as return of \code{cve}.
#' @method summary cve
#' @export
summary.cve <- function(object, ...) {
cat('Summary of CVE result - Method: "', object$method, '"\n',
'\n',
'Dataset size: ', nrow(object$X), '\n',
'Data Dimension: ', ncol(object$X), '\n',
'SDR Dimension: ', object$k, '\n',
'loss: ', object$loss, '\n',
'\n',
'Called via:\n',
' ',
sep='')
print(object$call)
}

View File

@ -1,169 +0,0 @@
#' Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe
#' conditions.
#'
#' @keywords internal
#' @export
cve_linesearch <- function(X, Y, k,
nObs = sqrt(nrow(X)),
h = NULL,
tau = 1.0,
tol = 1e-3,
rho1 = 0.1,
rho2 = 0.9,
slack = 0,
epochs = 50L,
attempts = 10L,
max.linesearch.iter = 10L,
logger = NULL
) {
# Set `grad` functions environment to enable if to find this environments
# local variabels, needed to enable the manipulation of this local variables
# from within `grad`.
environment(grad) <- environment()
# Get dimensions.
n <- nrow(X)
p <- ncol(X)
q <- p - k
# Save initial learning rate `tau`.
tau.init <- tau
# Addapt tolearance for break condition.
tol <- sqrt(2 * q) * tol
# Estaimate bandwidth if not given.
if (missing(h) | !is.numeric(h)) {
h <- estimate.bandwidth(X, k, nObs)
}
# Compute persistent data.
# Compute lookup indexes for symmetrie, lower/upper
# triangular parts and vectorization.
pair.index <- elem.pairs(seq(n))
i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
# Matrix of vectorized indices. (vec(index) -> seq)
index <- matrix(seq(n * n), n, n)
lower <- index[lower.tri(index)]
upper <- t(index)[lower]
# Create all pairewise differences of rows of `X`.
X_diff <- X[i, , drop = F] - X[j, , drop = F]
# Identity matrix.
I_p <- diag(1, p)
# Init tracking of current best (according multiple attempts).
V.best <- NULL
loss.best <- Inf
# Start loop for multiple attempts.
for (attempt in 1:attempts) {
# Sample a `(p, q)` dimensional matrix from the stiefel manifold as
# optimization start value.
V <- rStiefl(p, q)
# Initial loss and gradient.
loss <- Inf
G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE)
# Set last loss (aka, loss after applying the step).
loss.last <- loss
# Call logger with initial values before starting optimization.
if (is.function(logger)) {
epoch <- 0 # Set epoch count to 0 (only relevant for logging).
error <- NA
logger(environment())
}
## Start optimization loop.
for (epoch in 1:epochs) {
# Cayley transform matrix `A`
A <- (G %*% t(V)) - (V %*% t(G))
# Directional derivative of the loss at current position, given
# as `Tr(G^T \cdot A \cdot V)`.
loss.prime <- -0.5 * norm(A, type = 'F')^2
# Linesearch
tau.upper <- Inf
tau.lower <- 0
tau <- tau.init
for (iter in 1:max.linesearch.iter) {
# Apply learning rate `tau`.
A.tau <- (tau / 2) * A
# Parallet transport (on Stiefl manifold) into direction of `G`.
inv <- solve(I_p + A.tau)
V.tau <- inv %*% ((I_p - A.tau) %*% V)
# Loss at position after a step.
loss <- Inf # aka loss.tau
G.tau <- grad(X, Y, V.tau, h, loss.out = TRUE, persistent = TRUE)
# Armijo condition.
if (loss > loss.last + (rho1 * tau * loss.prime)) {
tau.upper <- tau
tau <- (tau.lower + tau.upper) / 2
next()
}
V.prime.tau <- -0.5 * inv %*% A %*% (V + V.tau)
loss.prime.tau <- sum(G * V.prime.tau) # Tr(grad(tau)^T \cdot Y^'(tau))
# Wolfe condition.
if (loss.prime.tau < rho2 * loss.prime) {
tau.lower <- tau
if (tau.upper == Inf) {
tau <- 2 * tau.lower
} else {
tau <- (tau.lower + tau.upper) / 2
}
} else {
break()
}
}
# Compute error.
error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
# Check break condition (epoch check to skip ignored gradient calc).
# Note: the devision by `sqrt(2 * k)` is included in `tol`.
if (error < tol | epoch >= epochs) {
# take last step and stop optimization.
V <- V.tau
# Final call to the logger before stopping optimization
if (is.function(logger)) {
G <- G.tau
logger(environment())
}
break()
}
# Perform the step and remember previous loss.
V <- V.tau
loss.last <- loss
G <- G.tau
# Log after taking current step.
if (is.function(logger)) {
logger(environment())
}
}
# Check if current attempt improved previous ones
if (loss < loss.best) {
loss.best <- loss
V.best <- V
}
}
return(list(
loss = loss.best,
V = V.best,
B = null(V.best),
h = h
))
}

View File

@ -1,139 +0,0 @@
#' Implementation of the CVE method as a Riemann Conjugated Gradient method.
#'
#' @references A Riemannian Conjugate Gradient Algorithm with Implicit Vector
#' Transport for Optimization on the Stiefel Manifold
#' @keywords internal
#' @export
cve_momentum <- function(X, Y, k,
nObs = sqrt(nrow(X)),
h = NULL,
tau = 1.0,
tol = 1e-4,
rho = 0.1, # Momentum update.
slack = 0,
epochs = 50L,
attempts = 10L,
logger = NULL
) {
# Set `grad` functions environment to enable if to find this environments
# local variabels, needed to enable the manipulation of this local variables
# from within `grad`.
environment(grad) <- environment()
# Get dimensions.
n <- nrow(X) # Number of samples.
p <- ncol(X) # Data dimensions
q <- p - k # Complement dimension of the SDR space.
# Save initial learning rate `tau`.
tau.init <- tau
# Addapt tolearance for break condition.
tol <- sqrt(2 * q) * tol
# Estaimate bandwidth if not given.
if (missing(h) || !is.numeric(h)) {
h <- estimate.bandwidth(X, k, nObs)
}
# Compute persistent data.
# Compute lookup indexes for symmetrie, lower/upper
# triangular parts and vectorization.
pair.index <- elem.pairs(seq(n))
i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
# Index of vectorized matrix, for lower and upper triangular part.
lower <- ((i - 1) * n) + j
upper <- ((j - 1) * n) + i
# Create all pairewise differences of rows of `X`.
X_diff <- X[i, , drop = F] - X[j, , drop = F]
# Identity matrix.
I_p <- diag(1, p)
# Init tracking of current best (according multiple attempts).
V.best <- NULL
loss.best <- Inf
# Start loop for multiple attempts.
for (attempt in 1:attempts) {
# Reset learning rate `tau`.
tau <- tau.init
# Sample a `(p, q)` dimensional matrix from the stiefel manifold as
# optimization start value.
V <- rStiefl(p, q)
# Initial loss and gradient.
loss <- Inf
G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE)
# Set last loss (aka, loss after applying the step).
loss.last <- loss
# Call logger with initial values before starting optimization.
if (is.function(logger)) {
epoch <- 0 # Set epoch count to 0 (only relevant for logging).
error <- NA
logger(environment())
}
M <- matrix(0, p, q)
## Start optimization loop.
for (epoch in 1:epochs) {
# Apply learning rate `tau`.
A <- projTangentStiefl(V, G)
# Momentum update.
M <- A + rho * projTangentStiefl(V, M)
# Parallet transport (on Stiefl manifold) into direction of `G`.
V.tau <- retractStiefl(V - tau * M)
# Loss at position after a step.
loss <- grad(X, Y, V.tau, h, loss.only = TRUE, persistent = TRUE)
# Check if step is appropriate, iff not reduce learning rate.
if ((loss - loss.last) > slack * loss.last) {
tau <- tau / 2
next() # Keep position and try with smaller `tau`.
}
# Compute error.
error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
# Check break condition (epoch check to skip ignored gradient calc).
# Note: the devision by `sqrt(2 * k)` is included in `tol`.
if (error < tol || epoch >= epochs) {
# take last step and stop optimization.
V <- V.tau
# Call logger last time befor stoping.
if (is.function(logger)) {
logger(environment())
}
break()
}
# Perform the step and remember previous loss.
V <- V.tau
loss.last <- loss
# Call logger after taking a step.
if (is.function(logger)) {
logger(environment())
}
# Compute gradient at new position.
G <- grad(X, Y, V, h, persistent = TRUE)
}
# Check if current attempt improved previous ones
if (loss < loss.best) {
loss.best <- loss
V.best <- V
}
}
return(list(
loss = loss.best,
V = V.best,
B = null(V.best),
h = h
))
}

View File

@ -1,179 +0,0 @@
#' Implementation of the CVE method as a Riemann Conjugated Gradient method.
#'
#' @references A Riemannian Conjugate Gradient Algorithm with Implicit Vector
#' Transport for Optimization on the Stiefel Manifold
#' @keywords internal
#' @export
cve_rcg <- function(X, Y, k,
nObs = sqrt(nrow(X)),
h = NULL,
tau = 1.0,
tol = 1e-4,
rho = 1e-4, # For Armijo condition.
slack = 0,
epochs = 50L,
attempts = 10L,
max.linesearch.iter = 20L,
logger = NULL
) {
# Set `grad` functions environment to enable if to find this environments
# local variabels, needed to enable the manipulation of this local variables
# from within `grad`.
environment(grad) <- environment()
# Get dimensions.
n <- nrow(X) # Number of samples.
p <- ncol(X) # Data dimensions
q <- p - k # Complement dimension of the SDR space.
# Save initial learning rate `tau`.
tau.init <- tau
# Addapt tolearance for break condition.
tol <- sqrt(2 * q) * tol
# Estaimate bandwidth if not given.
if (missing(h) || !is.numeric(h)) {
h <- estimate.bandwidth(X, k, nObs)
}
# Compute persistent data.
# Compute lookup indexes for symmetrie, lower/upper
# triangular parts and vectorization.
pair.index <- elem.pairs(seq(n))
i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
# Index of vectorized matrix, for lower and upper triangular part.
lower <- ((i - 1) * n) + j
upper <- ((j - 1) * n) + i
# Create all pairewise differences of rows of `X`.
X_diff <- X[i, , drop = F] - X[j, , drop = F]
# Identity matrix.
I_p <- diag(1, p)
# Init tracking of current best (according multiple attempts).
V.best <- NULL
loss.best <- Inf
# Start loop for multiple attempts.
for (attempt in 1:attempts) {
# Reset learning rate `tau`.
tau <- tau.init
# Sample a `(p, q)` dimensional matrix from the stiefel manifold as
# optimization start value.
V <- rStiefl(p, q)
# Initial loss and gradient.
loss <- Inf
G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE)
# Set last loss (aka, loss after applying the step).
loss.last <- loss
# Cayley transform matrix `A`
A <- (G %*% t(V)) - (V %*% t(G))
A.last <- A
W <- -A
Z <- W %*% V
# Compute directional derivative.
loss.prime <- sum(G * Z) # Tr(G^T Z)
# Call logger with initial values before starting optimization.
if (is.function(logger)) {
epoch <- 0 # Set epoch count to 0 (only relevant for logging).
error <- NA
logger(environment())
}
## Start optimization loop.
for (epoch in 1:epochs) {
# New directional derivative.
loss.prime <- sum(G * Z)
# Reset `tau` for step-size selection.
tau <- tau.init
for (iter in 1:max.linesearch.iter) {
V.tau <- retractStiefl(V + tau * Z)
# Loss at position after a step.
loss <- grad(X, Y, V.tau, h,
loss.only = TRUE, persistent = TRUE)
# Check Armijo condition.
if (loss <= loss.last + (rho * tau * loss.prime)) {
break() # Iff fulfilled stop linesearch.
}
# Reduce step-size and continue linesearch.
tau <- tau / 2
}
# Compute error.
error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
# Perform step with found step-size
V <- V.tau
loss.last <- loss
# Call logger.
if (is.function(logger)) {
logger(environment())
}
# Check break condition.
# Note: the devision by `sqrt(2 * k)` is included in `tol`.
if (error < tol) {
break()
}
# Compute Gradient at new position.
G <- grad(X, Y, V, h, persistent = TRUE)
# Store last `A` for `beta` computation.
A.last <- A
# Cayley transform matrix `A`
A <- (G %*% t(V)) - (V %*% t(G))
# Check 2. break condition.
if (norm(A, type = 'F') < tol) {
break()
}
# New directional derivative.
loss.prime <- sum(G * Z)
# Reset beta if needed.
if (loss.prime < 0) {
# Compute `beta` as described in paper.
beta.FR <- (norm(A, type = 'F') / norm(A.last, type = 'F'))^2
beta.PR <- sum(A * (A - A.last)) / norm(A.last, type = 'F')^2
if (beta.PR < -beta.FR) {
beta <- -beta.FR
} else if (abs(beta.PR) < beta.FR) {
beta <- beta.PR
} else if (beta.PR > beta.FR) {
beta <- beta.FR
} else {
beta <- 0
}
} else {
beta <- 0
}
# Update direction.
W <- -A + beta * W
Z <- W %*% V
}
# Check if current attempt improved previous ones
if (loss < loss.best) {
loss.best <- loss
V.best <- V
}
}
return(list(
loss = loss.best,
V = V.best,
B = null(V.best),
h = h
))
}

View File

@ -1,121 +0,0 @@
#' Implementation of the CVE method as a Riemann Conjugated Gradient method.
#'
#' @references A Riemannian Conjugate Gradient Algorithm with Implicit Vector
#' Transport for Optimization on the Stiefel Manifold
#' @keywords internal
#' @export
cve_rmsprob <- function(X, Y, k,
nObs = sqrt(nrow(X)),
h = NULL,
tau = 0.1,
tol = 1e-4,
rho = 0.1, # Momentum update.
slack = 0,
epochs = 50L,
attempts = 10L,
epsilon = 1e-7,
max.linesearch.iter = 20L,
logger = NULL
) {
# Set `grad` functions environment to enable if to find this environments
# local variabels, needed to enable the manipulation of this local variables
# from within `grad`.
environment(grad) <- environment()
# Get dimensions.
n <- nrow(X) # Number of samples.
p <- ncol(X) # Data dimensions
q <- p - k # Complement dimension of the SDR space.
# Save initial learning rate `tau`.
tau.init <- tau
# Addapt tolearance for break condition.
tol <- sqrt(2 * q) * tol
# Estaimate bandwidth if not given.
if (missing(h) || !is.numeric(h)) {
h <- estimate.bandwidth(X, k, nObs)
}
# Compute persistent data.
# Compute lookup indexes for symmetrie, lower/upper
# triangular parts and vectorization.
pair.index <- elem.pairs(seq(n))
i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
# Index of vectorized matrix, for lower and upper triangular part.
lower <- ((i - 1) * n) + j
upper <- ((j - 1) * n) + i
# Create all pairewise differences of rows of `X`.
X_diff <- X[i, , drop = F] - X[j, , drop = F]
# Identity matrix.
I_p <- diag(1, p)
# Init tracking of current best (according multiple attempts).
V.best <- NULL
loss.best <- Inf
# Start loop for multiple attempts.
for (attempt in 1:attempts) {
# Sample a `(p, q)` dimensional matrix from the stiefel manifold as
# optimization start value.
V <- rStiefl(p, q)
# Call logger with initial values before starting optimization.
if (is.function(logger)) {
loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
epoch <- 0 # Set epoch count to 0 (only relevant for logging).
error <- NA
logger(environment())
}
M <- matrix(0, p, q)
## Start optimization loop.
for (epoch in 1:epochs) {
# Compute gradient and loss at current position.
loss <- Inf
G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE)
# Projectd Gradient.
A <- projTangentStiefl(V, G)
# Projected element squared gradient.
Asq <- projTangentStiefl(V, G * G)
# Momentum update.
M <- (1 - rho) * Asq + rho * projTangentStiefl(V, M)
# Parallet transport (on Stiefl manifold) into direction of `G`.
V.tau <- retractStiefl(V - tau.init * A / (sqrt(abs(M)) + epsilon))
# Compute error.
error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
# Perform step.
V <- V.tau
# Call logger after taking a step.
if (is.function(logger)) {
# Set tau to an step size estimate (only for logging)
tau <- tau.init / mean(sqrt(abs(M)) + epsilon)
logger(environment())
}
# Check break condition.
# Note: the devision by `sqrt(2 * k)` is included in `tol`.
if (error < tol) {
break()
}
}
# Check if current attempt improved previous ones
if (loss < loss.best) {
loss.best <- loss
V.best <- V
}
}
return(list(
loss = loss.best,
V = V.best,
B = null(V.best),
h = h
))
}

View File

@ -1,129 +0,0 @@
#' Simple implementation of the CVE method. 'Simple' means that this method is
#' a classic GD method unsing no further tricks.
#'
#' @keywords internal
#' @export
cve_sgd <- function(X, Y, k,
nObs = sqrt(nrow(X)),
h = NULL,
tau = 0.01,
tol = 1e-3,
epochs = 50L,
batch.size = 16L,
attempts = 10L,
logger = NULL
) {
# Set `grad` functions environment to enable if to find this environments
# local variabels, needed to enable the manipulation of this local variables
# from within `grad`.
environment(grad) <- environment()
# Get dimensions.
n <- nrow(X) # Number of samples.
p <- ncol(X) # Data dimensions
q <- p - k # Complement dimension of the SDR space.
# Save initial learning rate `tau`.
tau.init <- tau
# Addapt tolearance for break condition.
tol <- sqrt(2 * q) * tol
# Estaimate bandwidth if not given.
if (missing(h) || !is.numeric(h)) {
h <- estimate.bandwidth(X, k, nObs)
}
# Compute persistent data.
# Compute lookup indexes for symmetrie, lower/upper
# triangular parts and vectorization.
pair.index <- elem.pairs(seq(n))
i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
# Index of vectorized matrix, for lower and upper triangular part.
lower <- ((i - 1) * n) + j
upper <- ((j - 1) * n) + i
# Create all pairewise differences of rows of `X`.
X_diff <- X[i, , drop = F] - X[j, , drop = F]
# Identity matrix.
I_p <- diag(1, p)
# Init a list of data indices (shuffled for batching).
indices <- seq(n)
# Init tracking of current best (according multiple attempts).
V.best <- NULL
loss.best <- Inf
# Start loop for multiple attempts.
for (attempt in 1:attempts) {
# Reset learning rate `tau`.
tau <- tau.init
# Sample a `(p, q)` dimensional matrix from the stiefel manifold as
# optimization start value.
V <- rStiefl(p, q)
# Keep track of last `V` for computing error after an epoch.
V.last <- V
if (is.function(logger)) {
loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
error <- NA
epoch <- 0
logger(environment())
}
# Repeat `epochs` times
for (epoch in 1:epochs) {
# Shuffle batches
batch.shuffle <- sample(indices)
# Make a step for each batch.
for (batch.start in seq(1, n, batch.size)) {
# Select batch data indices.
batch.end <- min(batch.start + batch.size - 1, length(batch.shuffle))
batch <- batch.shuffle[batch.start:batch.end]
# Compute batch gradient.
loss <- NULL
G <- grad(X[batch, ], Y[batch], V, h, loss.out = TRUE)
# Cayley transform matrix.
A <- (G %*% t(V)) - (V %*% t(G))
# Apply learning rate `tau`.
A.tau <- tau * A
# Parallet transport (on Stiefl manifold) into direction of `G`.
V <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V)
}
# And the error for the history.
error <- norm(V.last %*% t(V.last) - V %*% t(V), type = "F")
V.last <- V
if (is.function(logger)) {
# Compute loss at end of epoch for logging.
loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
logger(environment())
}
# Check break condition.
if (error < tol) {
break()
}
}
# Compute actual loss after finishing for comparing multiple attempts.
loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
# After each attempt, check if last attempt reached a better result.
if (loss < loss.best) {
loss.best <- loss
V.best <- V
}
}
return(list(
loss = loss.best,
V = V.best,
B = null(V.best),
h = h
))
}

View File

@ -1,133 +0,0 @@
#' Simple implementation of the CVE method. 'Simple' means that this method is
#' a classic GD method unsing no further tricks.
#'
#' @keywords internal
#' @export
cve_sgdrmsprob <- function(X, Y, k,
nObs = sqrt(nrow(X)),
h = NULL,
tau = 0.1,
tol = 1e-4,
rho = 0.1,
epochs = 50L,
batch.size = 16L,
attempts = 10L,
epsilon = 1e-7,
logger = NULL
) {
# Set `grad` functions environment to enable if to find this environments
# local variabels, needed to enable the manipulation of this local variables
# from within `grad`.
environment(grad) <- environment()
# Get dimensions.
n <- nrow(X) # Number of samples.
p <- ncol(X) # Data dimensions
q <- p - k # Complement dimension of the SDR space.
# Save initial learning rate `tau`.
tau.init <- tau
# Addapt tolearance for break condition.
tol <- sqrt(2 * q) * tol
# Estaimate bandwidth if not given.
if (missing(h) || !is.numeric(h)) {
h <- estimate.bandwidth(X, k, nObs)
}
# Compute persistent data.
# Compute lookup indexes for symmetrie, lower/upper
# triangular parts and vectorization.
pair.index <- elem.pairs(seq(n))
i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
# Index of vectorized matrix, for lower and upper triangular part.
lower <- ((i - 1) * n) + j
upper <- ((j - 1) * n) + i
# Create all pairewise differences of rows of `X`.
X_diff <- X[i, , drop = F] - X[j, , drop = F]
# Identity matrix.
I_p <- diag(1, p)
# Init a list of data indices (shuffled for batching).
indices <- seq(n)
# Init tracking of current best (according multiple attempts).
V.best <- NULL
loss.best <- Inf
# Start loop for multiple attempts.
for (attempt in 1:attempts) {
# Reset learning rate `tau`.
tau <- tau.init
# Sample a `(p, q)` dimensional matrix from the stiefel manifold as
# optimization start value.
V <- rStiefl(p, q)
# Keep track of last `V` for computing error after an epoch.
V.last <- V
if (is.function(logger)) {
loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
error <- NA
epoch <- 0
logger(environment())
}
M <- matrix(0, p, q)
# Repeat `epochs` times
for (epoch in 1:epochs) {
# Shuffle batches
batch.shuffle <- sample(indices)
# Make a step for each batch.
for (batch.start in seq(1, n, batch.size)) {
# Select batch data indices.
batch.end <- min(batch.start + batch.size - 1, length(batch.shuffle))
batch <- batch.shuffle[batch.start:batch.end]
# Compute batch gradient.
loss <- NULL
G <- grad(X[batch, ], Y[batch], V, h, loss.out = TRUE)
# Projectd Gradient.
A <- projTangentStiefl(V, G)
# Projected element squared gradient.
Asq <- projTangentStiefl(V, G * G)
# Momentum update.
M <- (1 - rho) * Asq + rho * projTangentStiefl(V, M)
# Parallet transport (on Stiefl manifold) into direction of `G`.
V <- retractStiefl(V - tau.init * A / (sqrt(abs(M)) + epsilon))
}
# And the error for the history.
error <- norm(V.last %*% t(V.last) - V %*% t(V), type = "F")
V.last <- V
if (is.function(logger)) {
# Compute loss at end of epoch for logging.
loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
logger(environment())
}
# Check break condition.
if (error < tol) {
break()
}
}
# Compute actual loss after finishing for comparing multiple attempts.
loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
# After each attempt, check if last attempt reached a better result.
if (loss < loss.best) {
loss.best <- loss
V.best <- V
}
}
return(list(
loss = loss.best,
V = V.best,
B = null(V.best),
h = h
))
}

View File

@ -1,139 +0,0 @@
#' Simple implementation of the CVE method. 'Simple' means that this method is
#' a classic GD method unsing no further tricks.
#'
#' @keywords internal
#' @export
cve_simple <- function(X, Y, k,
nObs = sqrt(nrow(X)),
h = NULL,
tau = 1.0,
tol = 1e-3,
slack = 0,
epochs = 50L,
attempts = 10L,
logger = NULL
) {
# Set `grad` functions environment to enable if to find this environments
# local variabels, needed to enable the manipulation of this local variables
# from within `grad`.
environment(grad) <- environment()
# Get dimensions.
n <- nrow(X) # Number of samples.
p <- ncol(X) # Data dimensions
q <- p - k # Complement dimension of the SDR space.
# Save initial learning rate `tau`.
tau.init <- tau
# Addapt tolearance for break condition.
tol <- sqrt(2 * q) * tol
# Estaimate bandwidth if not given.
if (missing(h) || !is.numeric(h)) {
h <- estimate.bandwidth(X, k, nObs)
}
# Compute persistent data.
# Compute lookup indexes for symmetrie, lower/upper
# triangular parts and vectorization.
pair.index <- elem.pairs(seq(n))
i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
# Index of vectorized matrix, for lower and upper triangular part.
lower <- ((i - 1) * n) + j
upper <- ((j - 1) * n) + i
# Create all pairewise differences of rows of `X`.
X_diff <- X[i, , drop = F] - X[j, , drop = F]
# Identity matrix.
I_p <- diag(1, p)
# Init tracking of current best (according multiple attempts).
V.best <- NULL
loss.best <- Inf
# Start loop for multiple attempts.
for (attempt in 1:attempts) {
# Reset learning rate `tau`.
tau <- tau.init
# Sample a `(p, q)` dimensional matrix from the stiefel manifold as
# optimization start value.
V <- rStiefl(p, q)
# Initial loss and gradient.
loss <- Inf
G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE)
# Set last loss (aka, loss after applying the step).
loss.last <- loss
# Cayley transform matrix `A`
A <- (G %*% t(V)) - (V %*% t(G))
# Call logger with initial values before starting optimization.
if (is.function(logger)) {
logger(0L, attempt, loss, V, tau)
}
## Start optimization loop.
for (epoch in 1:epochs) {
# Apply learning rate `tau`.
A.tau <- tau * A
# Parallet transport (on Stiefl manifold) into direction of `G`.
V.tau <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V)
# Loss at position after a step.
loss <- grad(X, Y, V.tau, h, loss.only = TRUE, persistent = TRUE)
# Check if step is appropriate, iff not reduce learning rate.
if ((loss - loss.last) > slack * loss.last) {
tau <- tau / 2
next() # Keep position and try with smaller `tau`.
}
# Compute error.
error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
# Check break condition (epoch check to skip ignored gradient calc).
# Note: the devision by `sqrt(2 * k)` is included in `tol`.
if (error < tol || epoch >= epochs) {
# take last step and stop optimization.
V <- V.tau
# Call logger last time befor stoping.
if (is.function(logger)) {
logger(epoch, attempt, loss, V, tau)
}
break()
}
# Perform the step and remember previous loss.
V <- V.tau
loss.last <- loss
# Call logger after taking a step.
if (is.function(logger)) {
logger(epoch, attempt, loss, V, tau)
}
# Compute gradient at new position.
G <- grad(X, Y, V, h, persistent = TRUE)
# Cayley transform matrix `A`
A <- (G %*% t(V)) - (V %*% t(G))
}
# Check if current attempt improved previous ones
if (loss < loss.best) {
loss.best <- loss
V.best <- V
}
}
return(list(
loss = loss.best,
V = V.best,
B = null(V.best),
h = h
))
}

View File

@ -1,109 +0,0 @@
#' Generates test datasets.
#'
#' Provides sample datasets. There are 5 different datasets named
#' M1, M2, M3, M4 and M5 described in the paper references below.
#' The general model is given by:
#' \deqn{Y ~ g(B'X) + \epsilon}
#'
#' @param name One of \code{"M1"}, \code{"M2"}, \code{"M3"}, \code{"M4"} or \code{"M5"}
#' @param n nr samples
#' @param p Dim. of random variable \code{X}.
#' @param p.mix Only for \code{"M4"}, see: below.
#' @param lambda Only for \code{"M4"}, see: below.
#'
#' @return List with elements
#' \itemize{
#' \item{X}{data}
#' \item{Y}{response}
#' \item{B}{Used dim-reduction matrix}
#' \item{name}{Name of the dataset (name parameter)}
#' }
#'
#' @section M1:
#' The data follows \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} for a subspace
#' dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points.
#' The link function \eqn{g} is given as
#' \deqn{g(x) = \frac{x_1}{0.5 + (x_2 + 1.5)^2} + 0.5\epsilon}{g(x) = x_1 / (0.5 + (x_2 + 1.5)^2) + 0.5 epsilon}
#' @section M2:
#' \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} with \eqn{k = 2} with a default of \eqn{n = 200} data points.
#' The link function \eqn{g} is given as
#' \deqn{g(x) = x_1 x_2^2 + 0.5\epsilon}{g(x) = x_1 x_2^2 + 0.5 epsilon}
#' @section M3:
#' TODO:
#' @section M4:
#' TODO:
#' @section M5:
#' TODO:
#'
#' @import stats
#' @importFrom stats rnorm rbinom
#' @export
dataset <- function(name = "M1", n, B, p.mix = 0.3, lambda = 1.0) {
# validate parameters
stopifnot(name %in% c("M1", "M2", "M3", "M4", "M5"))
# set default values if not supplied
if (missing(n)) {
n <- if (name %in% c("M1", "M2")) 200 else if (name != "M5") 100 else 42
}
if (missing(B)) {
p <- 12
if (name == "M1") {
B <- cbind(
c( 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0),
c( 1,-1, 1,-1, 1,-1, 0, 0, 0, 0, 0, 0)
) / sqrt(6)
} else if (name == "M2") {
B <- cbind(
c(c(1, 0), rep(0, 10)),
c(c(0, 1), rep(0, 10))
)
} else {
B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, 6)), 12, 1)
}
} else {
p <- dim(B)[1]
# validate col. nr to match dataset `k = dim(B)[2]`
stopifnot(
name %in% c("M1", "M2") && dim(B)[2] == 2,
name %in% c("M3", "M4", "M5") && dim(B)[2] == 1
)
}
# set link function `g` for model `Y ~ g(B'X) + epsilon`
if (name == "M1") {
g <- function(BX) { BX[1] / (0.5 + (BX[2] + 1.5)^2) }
} else if (name == "M2") {
g <- function(BX) { BX[1] * BX[2]^2 }
} else if (name %in% c("M3", "M4")) {
g <- function(BX) { cos(BX[1]) }
} else { # name == "M5"
g <- function(BX) { 2 * log(abs(BX[1]) + 1) }
}
# compute X
if (name != "M4") {
# compute root of the covariance matrix according the dataset
if (name %in% c("M1", "M3")) {
# Variance-Covariance structure for `X ~ N_p(0, \Sigma)` with
# `\Sigma_{i, j} = 0.5^{|i - j|}`.
Sigma <- matrix(0.5^abs(kronecker(1:p, 1:p, '-')), p, p)
# decompose Sigma to Sigma.root^T Sigma.root = Sigma for usage in creation of `X`
Sigma.root <- chol(Sigma)
} else { # name %in% c("M2", "M5")
Sigma.root <- diag(rep(1, p)) # d-dim identity
}
# data `X` as multivariate random normal variable with
# variance matrix `Sigma`.
X <- replicate(p, rnorm(n, 0, 1)) %*% Sigma.root
} else { # name == "M4"
X <- t(replicate(100, rep((1 - 2 * rbinom(1, 1, p.mix)) * lambda, p) + rnorm(p, 0, 1)))
}
# responce `y ~ g(B'X) + epsilon` with `epsilon ~ N(0, 1 / 2)`
Y <- apply(X, 1, function(X_i) {
g(t(B) %*% X_i) + rnorm(1, 0, 0.5)
})
return(list(X = X, Y = Y, B = B, name = name))
}

View File

@ -1,27 +0,0 @@
#' Estimated bandwidth for CVE.
#'
#' Estimates a propper bandwidth \code{h} according
#' \deqn{%
#' h = \chi_{p-q}^{-1}\left(\frac{nObs - 1}{n-1}\right)\frac{2 tr(\Sigma)}{p}}{%
#' h = qchisq( (nObs - 1)/(n - 1), p - q ) 2 tr(Sigma) / p}
#'
#' @param X data matrix of dimension (n x p) with n data points X_i of dimension
#' q. Therefor each row represents a datapoint of dimension p.
#' @param k Guess for rank(B).
#' @param nObs Ether numeric of a function. If specified as numeric value
#' its used in the computation of the bandwidth directly. If its a function
#' `nObs` is evaluated as \code{nObs(nrow(x))}. The default behaviou if not
#' supplied at all is to use \code{nObs <- nrow(x)^0.5}.
#'
#' @seealso [\code{\link{qchisq}}]
#' @export
estimate.bandwidth <- function(X, k, nObs) {
n <- nrow(X)
p <- ncol(X)
X_centered <- scale(X, center=TRUE, scale=FALSE)
Sigma <- (1 / n) * t(X_centered) %*% X_centered
quantil <- qchisq((nObs - 1) / (n - 1), k)
return(2 * quantil * sum(diag(Sigma)) / p)
}

View File

@ -1,82 +0,0 @@
#' Compute get gradient of `L(V)` given a dataset `X`.
#'
#' @param X Data matrix.
#' @param Y Responce.
#' @param V Position to compute the gradient at, aka point on Stiefl manifold.
#' @param h Bandwidth
#' @param loss.out Iff \code{TRUE} loss will be written to parent environment.
#' @param loss.only Boolean to only compute the loss, of \code{TRUE} a single
#' value loss is returned and \code{envir} is ignored.
#' @param persistent Determines if data indices and dependent calculations shall
#' be reused from the parent environment. ATTENTION: Do NOT set this flag, only
#' intended for internal usage by carefully aligned functions!
#' @keywords internal
#' @export
grad <- function(X, Y, V, h,
loss.out = FALSE,
loss.only = FALSE,
persistent = FALSE) {
# Get number of samples and dimension.
n <- nrow(X)
p <- ncol(X)
if (!persistent) {
# Compute lookup indexes for symmetrie, lower/upper
# triangular parts and vectorization.
pair.index <- elem.pairs(seq(n))
i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
# Index of vectorized matrix, for lower and upper triangular part.
lower <- ((i - 1) * n) + j
upper <- ((j - 1) * n) + i
# Create all pairewise differences of rows of `X`.
X_diff <- X[i, , drop = F] - X[j, , drop = F]
}
# Projection matrix onto `span(V)`
Q <- diag(1, p) - tcrossprod(V, V)
# Vectorized distance matrix `D`.
vecD <- colSums(tcrossprod(Q, X_diff)^2)
# Create Kernel matrix (aka. apply kernel to distances)
K <- matrix(1, n, n) # `exp(0) == 1`
K[lower] <- exp((-0.5 / h) * vecD^2) # Set lower tri. part
K[upper] <- t(K)[upper] # Mirror lower tri. to upper
# Weighted `Y` momentums
colSumsK <- colSums(K)
y1 <- (K %*% Y) / colSumsK
y2 <- (K %*% Y^2) / colSumsK
# Per example loss `L(V, X_i)`
L <- y2 - y1^2
if (loss.only) {
return(mean(L))
}
if (loss.out) {
loss <<- mean(L)
}
# Compute scaling vector `vecS` for `X_diff`.
tmp <- kronecker(matrix(y1, n, 1), matrix(Y, 1, n), `-`)^2
tmp <- as.vector(L) - tmp
tmp <- tmp * K / colSumsK
vecS <- (tmp + t(tmp))[lower] * vecD
# The gradient.
# 1. The `crossprod(A, B)` is equivalent to `t(A) %*% B`,
# 2. `(X_diff %*% V) * vecS` is first a marix matrix mult. and then using
# recycling to scale each row with the values of `vecS`.
# Note that `vecS` is a vector and that `R` uses column-major ordering
# of matrices.
# (See: notes for more details)
# TODO: Depending on n, p, q decide which version to take (for current
# datasets "inner" is faster, see: notes).
# inner = crossprod(X_diff, X_diff * vecS) %*% V,
# outer = crossprod(X_diff, (X_diff %*% V) * vecS)
G <- crossprod(X_diff, X_diff * vecS) %*% V
G <- (-2 / (n * h^2)) * G
return(G)
}

View File

@ -1,43 +0,0 @@
#' Performs a grid search for parameters over a parameter grid.
#' @examples
#' args <- list(
#' h = c(0.05, 0.1, 0.2),
#' method = c("simple", "sgd"),
#' tau = c(0.5, 0.1, 0.01)
#' )
#' cve.grid.search(args)
#' @export
cve.grid.search <- function(X, Y, k, args) {
args$stringsAsFactors = FALSE
args$KEEP.OUT.ATTRS = FALSE
grid <- do.call(expand.grid, args)
grid.length <- length(grid[[1]])
print(grid)
for (i in 1:grid.length) {
arguments <- as.list(grid[i, ])
# Set required arguments
arguments$X <- X
arguments$Y <- Y
arguments$k <- k
# print(arguments)
dr <- do.call(cve.call, arguments)
print(dr$loss)
}
}
# ds <- dataset()
# X <- ds$X
# Y <- ds$Y
# (k <- ncol(ds$B))
# args <- list(
# h = c(0.05, 0.1, 0.2),
# method = c("simple", "sgd"),
# tau = c(0.5, 0.1, 0.01),
# attempts = c(1L)
# )
# cve.grid.search(X, Y, k, args)

View File

@ -1,82 +0,0 @@
#' Samples uniform from the Stiefl Manifold.
#'
#' @param p row dim.
#' @param q col dim.
#' @return `(p, q)` semi-orthogonal matrix
#' @examples
#' V <- rStiefel(6, 4)
#' @export
rStiefl <- function(p, q) {
return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q))))
}
#' Retraction to the manifold.
#'
#' @param A matrix.
#' @return `(p, q)` semi-orthogonal matrix, aka element of the Stiefl manifold.
#' @keywords internal
#' @export
retractStiefl <- function(A) {
return(qr.Q(qr(A)))
}
#' Skew-Symmetric matrix computed from `A` as
#' \eqn{1/2 (A - A^T)}.
#' @param A Matrix of dim `(p, q)`
#' @return Skew-Symmetric matrix of dim `(p, p)`.
#' @keywords internal
#' @export
skew <- function(A) {
0.5 * (A - t(A))
}
#' Symmetric matrix computed from `A` as
#' \eqn{1/2 (A + A^T)}.
#' @param A Matrix of dim `(p, q)`
#' @return Symmetric matrix of dim `(p, p)`.
#' @keywords internal
#' @export
sym <- function(A) {
0.5 * (A + t(A))
}
#' Orthogonal Projection onto the tangent space of the stiefl manifold.
#'
#' @param V Point on the stiefl manifold.
#' @param G matrix to be projected onto the tangent space at `V`.
#' @return `(p, q)` matrix as element of the tangent space at `V`.
#' @keywords internal
#' @export
projTangentStiefl <- function(V, G) {
Q <- diag(1, nrow(V)) - V %*% t(V)
return(Q %*% G + V %*% skew(t(V) %*% G))
}
#' Null space basis of given matrix `V`
#'
#' @param V `(p, q)` matrix
#' @return Semi-orthogonal `(p, p - q)` matrix spaning the null space of `V`.
#' @keywords internal
#' @export
null <- function(V) {
tmp <- qr(V)
set <- if(tmp$rank == 0L) seq_len(ncol(V)) else -seq_len(tmp$rank)
return(qr.Q(tmp, complete=TRUE)[, set, drop=FALSE])
}
#' Creates a (numeric) matrix where each column contains
#' an element to element matching.
#' @param elements numeric vector of elements to match
#' @return matrix of size `(2, n * (n - 1) / 2)` for a argument of lenght `n`.
#' @keywords internal
#' @examples
#' elem.pairs(seq.int(2, 5))
#' @export
elem.pairs <- function(elements) {
# Number of elements to match.
n <- length(elements)
# Create all combinations.
pairs <- rbind(rep(elements, each=n), rep(elements, n))
# Select unique combinations without self interaction.
return(pairs[, pairs[1, ] < pairs[2, ]])
}

View File

@ -1,2 +0,0 @@
runtime_test Runtime comparison of CVE against MAVE for M1 - M5 datasets.
logging Example of a logger function for cve algorithm analysis.

View File

@ -1,43 +0,0 @@
library(CVEpureR)
# Setup histories.
(epochs <- 50)
(attempts <- 10)
loss.history <- matrix(NA, epochs + 1, attempts)
error.history <- matrix(NA, epochs + 1, attempts)
tau.history <- matrix(NA, epochs + 1, attempts)
true.error.history <- matrix(NA, epochs + 1, attempts)
# Create a dataset
ds <- dataset("M1")
X <- ds$X
Y <- ds$Y
B <- ds$B # the true `B`
(k <- ncol(ds$B))
# True projection matrix.
P <- B %*% solve(t(B) %*% B) %*% t(B)
# Define the logger for the `cve()` method.
logger <- function(env) {
# Note the `<<-` assignement!
loss.history[env$epoch + 1, env$attempt] <<- env$loss
error.history[env$epoch + 1, env$attempt] <<- env$error
tau.history[env$epoch + 1, env$attempt] <<- env$tau
# Compute true error by comparing to the true `B`
B.est <- null(env$V) # Function provided by CVE
P.est <- B.est %*% solve(t(B.est) %*% B.est) %*% t(B.est)
true.error <- norm(P - P.est, 'F') / sqrt(2 * k)
true.error.history[env$epoch + 1, env$attempt] <<- true.error
}
# Performe SDR for ONE `k`.
dr <- cve(Y ~ X, k = k, logger = logger, epochs = epochs, attempts = attempts)
# Plot history's
par(mfrow = c(2, 2))
matplot(loss.history, type = 'l', log = 'y', xlab = 'iter',
main = 'loss', ylab = expression(L(V[iter])))
matplot(error.history, type = 'l', log = 'y', xlab = 'iter',
main = 'error', ylab = 'error')
matplot(tau.history, type = 'l', log = 'y', xlab = 'iter',
main = 'tau', ylab = 'tau')
matplot(true.error.history, type = 'l', log = 'y', xlab = 'iter',
main = 'true error', ylab = 'true error')

View File

@ -1,89 +0,0 @@
# Usage:
# ~$ Rscript runtime_test.R
library(CVEpureR) # load CVE
#' Writes progress to console.
tell.user <- function(name, start.time, i, length) {
cat("\rRunning Test (", name, "):",
i, "/", length,
" - elapsed:", format(Sys.time() - start.time), "\033[K")
}
subspace.dist <- function(B1, B2){
P1 <- B1 %*% solve(t(B1) %*% B1) %*% t(B1)
P2 <- B2 %*% solve(t(B2) %*% B2) %*% t(B2)
return(norm(P1 - P2, type = 'F'))
}
# Number of simulations
SIM.NR <- 50
# maximal number of iterations in curvilinear search algorithm
MAXIT <- 50
# number of arbitrary starting values for curvilinear optimization
ATTEMPTS <- 10
# set names of datasets
dataset.names <- c("M1", "M2", "M3", "M4", "M5")
# Set used CVE method
methods <- c("simple") # c("legacy", "simple", "sgd", "linesearch")
# Setup error and time tracking variables
error <- matrix(NA, SIM.NR, length(methods) * length(dataset.names))
time <- matrix(NA, SIM.NR, ncol(error))
colnames(error) <- kronecker(paste0(dataset.names, '-'), methods, paste0)
colnames(time) <- colnames(error)
# only for telling user (to stdout)
count <- 0
start.time <- Sys.time()
# Start simulation loop.
for (sim in 1:SIM.NR) {
# Repeat for each dataset.
for (name in dataset.names) {
count <- count + 1
tell.user(name, start.time, count, SIM.NR * length(dataset.names))
# Create a new dataset
ds <- dataset(name)
# Prepare X, Y and combine to data matrix
Y <- ds$Y
X <- ds$X
data <- cbind(Y, X)
# get dimensions
dim <- ncol(X)
truedim <- ncol(ds$B)
for (method in methods) {
dr.time <- system.time(
dr <- cve.call(X, Y,
method = method,
k = truedim,
attempts = ATTEMPTS
)
)
dr <- dr[[truedim]]
key <- paste0(name, '-', method)
error[sim, key] <- subspace.dist(dr$B, ds$B) / sqrt(2 * truedim)
time[sim, key] <- dr.time["elapsed"]
}
}
}
cat("\n\n## Time [sec] Means:\n")
print(colMeans(time))
cat("\n## Error Means:\n")
print(colMeans(error))
at <- seq(ncol(error)) + rep(seq(ncol(error) / length(methods)) - 1, each = length(methods))
boxplot(error,
main = paste0("Error (Nr of simulations ", SIM.NR, ")"),
ylab = "Error",
las = 2,
at = at
)
boxplot(time,
main = paste0("Time (Nr of simulations ", SIM.NR, ")"),
ylab = "Time [sec]",
las = 2,
at = at
)

Binary file not shown.

View File

@ -1,20 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/CVE.R
\docType{package}
\name{CVEpureR-package}
\alias{CVEpureR}
\alias{CVEpureR-package}
\title{Conditional Variance Estimator (CVE)}
\description{
Conditional Variance Estimator for Sufficient Dimension
Reduction
}
\details{
TODO: And some details
}
\references{
Fertl Likas, Bura Efstathia. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019
}
\author{
Loki
}

View File

@ -1,71 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/CVE.R
\name{cve}
\alias{cve}
\alias{cve.call}
\title{Implementation of the CVE method.}
\usage{
cve(formula, data, method = "simple", max.dim = 10, ...)
cve.call(X, Y, method = "simple", nObs = nrow(X)^0.5, min.dim = 1,
max.dim = 10, k, ...)
}
\arguments{
\item{formula}{Formel for the regression model defining `X`, `Y`.
See: \code{\link{formula}}.}
\item{data}{data.frame holding data for formula.}
\item{method}{The different only differe in the used optimization.
All of them are Gradient based optimization on a Stiefel manifold.
\itemize{
\item "simple" Simple reduction of stepsize.
\item "sgd" stocastic gradient decent.
\item TODO: further
}}
\item{...}{Further parameters depending on the used method.}
\item{X}{Data}
\item{Y}{Responces}
\item{nObs}{as described in the Paper.}
\item{k}{guess for SDR dimension.}
\item{nObs}{Like in the paper.}
\item{...}{Method specific parameters.}
}
\description{
Conditional Variance Estimator (CVE) is a novel sufficient dimension
reduction (SDR) method assuming a model
\deqn{Y \sim g(B'X) + \epsilon}{Y ~ g(B'X) + epsilon}
where B'X is a lower dimensional projection of the predictors.
}
\examples{
library(CVE)
# sample dataset
ds <- dataset("M5")
# call ´cve´ with default method (aka "simple")
dr.simple <- cve(ds$Y ~ ds$X, k = ncol(ds$B))
# plot optimization history (loss via iteration)
plot(dr.simple, main = "CVE M5 simple")
# call ´cve´ with method "linesearch" using ´data.frame´ as data.
data <- data.frame(Y = ds$Y, X = ds$X)
# Note: ´Y, X´ are NOT defined, they are extracted from ´data´.
dr.linesearch <- cve(Y ~ ., data, method = "linesearch", k = ncol(ds$B))
plot(dr.linesearch, main = "CVE M5 linesearch")
}
\references{
Fertl L., Bura E. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019
}
\seealso{
\code{\link{formula}}. For a complete parameters list (dependent on
the method) see \code{\link{cve_simple}}, \code{\link{cve_sgd}}
}

View File

@ -1,19 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/gridSearch.R
\name{cve.grid.search}
\alias{cve.grid.search}
\title{Performs a grid search for parameters over a parameter grid.}
\usage{
cve.grid.search(X, Y, k, args)
}
\description{
Performs a grid search for parameters over a parameter grid.
}
\examples{
args <- list(
h = c(0.05, 0.1, 0.2),
method = c("simple", "sgd"),
tau = c(0.5, 0.1, 0.01)
)
cve.grid.search(args)
}

View File

@ -1,16 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/cve_linesearch.R
\name{cve_linesearch}
\alias{cve_linesearch}
\title{Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe
conditions.}
\usage{
cve_linesearch(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 1,
tol = 0.001, rho1 = 0.1, rho2 = 0.9, slack = 0, epochs = 50L,
attempts = 10L, max.linesearch.iter = 10L, logger = NULL)
}
\description{
Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe
conditions.
}
\keyword{internal}

View File

@ -1,16 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/cve_sgd.R
\name{cve_sgd}
\alias{cve_sgd}
\title{Simple implementation of the CVE method. 'Simple' means that this method is
a classic GD method unsing no further tricks.}
\usage{
cve_sgd(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 0.01,
tol = 0.001, epochs = 50L, batch.size = 16L, attempts = 10L,
logger = NULL)
}
\description{
Simple implementation of the CVE method. 'Simple' means that this method is
a classic GD method unsing no further tricks.
}
\keyword{internal}

View File

@ -1,16 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/cve_simple.R
\name{cve_simple}
\alias{cve_simple}
\title{Simple implementation of the CVE method. 'Simple' means that this method is
a classic GD method unsing no further tricks.}
\usage{
cve_simple(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 1,
tol = 0.001, slack = 0, epochs = 50L, attempts = 10L,
logger = NULL)
}
\description{
Simple implementation of the CVE method. 'Simple' means that this method is
a classic GD method unsing no further tricks.
}
\keyword{internal}

View File

@ -1,64 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/datasets.R
\name{dataset}
\alias{dataset}
\title{Generates test datasets.}
\usage{
dataset(name = "M1", n, B, p.mix = 0.3, lambda = 1)
}
\arguments{
\item{name}{One of \code{"M1"}, \code{"M2"}, \code{"M3"}, \code{"M4"} or \code{"M5"}}
\item{n}{nr samples}
\item{p.mix}{Only for \code{"M4"}, see: below.}
\item{lambda}{Only for \code{"M4"}, see: below.}
\item{p}{Dim. of random variable \code{X}.}
}
\value{
List with elements
\itemize{
\item{X}{data}
\item{Y}{response}
\item{B}{Used dim-reduction matrix}
\item{name}{Name of the dataset (name parameter)}
}
}
\description{
Provides sample datasets. There are 5 different datasets named
M1, M2, M3, M4 and M5 described in the paper references below.
The general model is given by:
\deqn{Y ~ g(B'X) + \epsilon}
}
\section{M1}{
The data follows \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} for a subspace
dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points.
The link function \eqn{g} is given as
\deqn{g(x) = \frac{x_1}{0.5 + (x_2 + 1.5)^2} + 0.5\epsilon}{g(x) = x_1 / (0.5 + (x_2 + 1.5)^2) + 0.5 epsilon}
}
\section{M2}{
\eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} with \eqn{k = 2} with a default of \eqn{n = 200} data points.
The link function \eqn{g} is given as
\deqn{g(x) = x_1 x_2^2 + 0.5\epsilon}{g(x) = x_1 x_2^2 + 0.5 epsilon}
}
\section{M3}{
TODO:
}
\section{M4}{
TODO:
}
\section{M5}{
TODO:
}

View File

@ -1,23 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/util.R
\name{elem.pairs}
\alias{elem.pairs}
\title{Creates a (numeric) matrix where each column contains
an element to element matching.}
\usage{
elem.pairs(elements)
}
\arguments{
\item{elements}{numeric vector of elements to match}
}
\value{
matrix of size `(2, n * (n - 1) / 2)` for a argument of lenght `n`.
}
\description{
Creates a (numeric) matrix where each column contains
an element to element matching.
}
\examples{
elem.pairs(seq.int(2, 5))
}
\keyword{internal}

View File

@ -1,28 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/estimateBandwidth.R
\name{estimate.bandwidth}
\alias{estimate.bandwidth}
\title{Estimated bandwidth for CVE.}
\usage{
estimate.bandwidth(X, k, nObs)
}
\arguments{
\item{X}{data matrix of dimension (n x p) with n data points X_i of dimension
q. Therefor each row represents a datapoint of dimension p.}
\item{k}{Guess for rank(B).}
\item{nObs}{Ether numeric of a function. If specified as numeric value
its used in the computation of the bandwidth directly. If its a function
`nObs` is evaluated as \code{nObs(nrow(x))}. The default behaviou if not
supplied at all is to use \code{nObs <- nrow(x)^0.5}.}
}
\description{
Estimates a propper bandwidth \code{h} according
\deqn{%
h = \chi_{p-q}^{-1}\left(\frac{nObs - 1}{n-1}\right)\frac{2 tr(\Sigma)}{p}}{%
h = qchisq( (nObs - 1)/(n - 1), p - q ) 2 tr(Sigma) / p}
}
\seealso{
[\code{\link{qchisq}}]
}

View File

@ -1,31 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/gradient.R
\name{grad}
\alias{grad}
\title{Compute get gradient of `L(V)` given a dataset `X`.}
\usage{
grad(X, Y, V, h, loss.out = FALSE, loss.only = FALSE,
persistent = FALSE)
}
\arguments{
\item{X}{Data matrix.}
\item{Y}{Responce.}
\item{V}{Position to compute the gradient at, aka point on Stiefl manifold.}
\item{h}{Bandwidth}
\item{loss.out}{Iff \code{TRUE} loss will be written to parent environment.}
\item{loss.only}{Boolean to only compute the loss, of \code{TRUE} a single
value loss is returned and \code{envir} is ignored.}
\item{persistent}{Determines if data indices and dependent calculations shall
be reused from the parent environment. ATTENTION: Do NOT set this flag, only
intended for internal usage by carefully aligned functions!}
}
\description{
Compute get gradient of `L(V)` given a dataset `X`.
}
\keyword{internal}

View File

@ -1,18 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/util.R
\name{null}
\alias{null}
\title{Null space basis of given matrix `V`}
\usage{
null(V)
}
\arguments{
\item{V}{`(p, q)` matrix}
}
\value{
Semi-orthogonal `(p, p - q)` matrix spaning the null space of `V`.
}
\description{
Null space basis of given matrix `V`
}
\keyword{internal}

View File

@ -1,28 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/CVE.R
\name{plot.cve}
\alias{plot.cve}
\title{Ploting helper for objects of class \code{cve}.}
\usage{
## S3 method for class 'cve'
plot(x, content = "history", ...)
}
\arguments{
\item{x}{Object of class \code{cve} (result of [cve()]).}
\item{...}{Pass through parameters to [plot()] and [lines()]}
\item{content}{Specifies what to plot:
\itemize{
\item "history" Plots the loss history from stiefel optimization
(default).
\item ... TODO: add (if there are any)
}}
}
\description{
Ploting helper for objects of class \code{cve}.
}
\seealso{
see \code{\link{par}} for graphical parameters to pass through
as well as \code{\link{plot}} for standard plot utility.
}

View File

@ -1,22 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/util.R
\name{rStiefl}
\alias{rStiefl}
\title{Samples uniform from the Stiefel Manifold}
\usage{
rStiefl(p, q)
}
\arguments{
\item{p}{row dim.}
\item{q}{col dim.}
}
\value{
`(p, q)` semi-orthogonal matrix
}
\description{
Samples uniform from the Stiefel Manifold
}
\examples{
V <- rStiefel(6, 4)
}

View File

@ -1,14 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/CVE.R
\name{summary.cve}
\alias{summary.cve}
\title{Prints a summary of a \code{cve} result.}
\usage{
\method{summary}{cve}(object, ...)
}
\arguments{
\item{object}{Instance of 'cve' as return of \code{cve}.}
}
\description{
Prints a summary of a \code{cve} result.
}

View File

@ -1,479 +0,0 @@
library(microbenchmark)
dyn.load("benchmark.so")
## rowSum* .call --------------------------------------------------------------
rowSums.c <- function(M) {
stopifnot(
is.matrix(M),
is.numeric(M)
)
if (!is.double(M)) {
M <- matrix(as.double(M), nrow = nrow(M))
}
.Call('R_rowSums', PACKAGE = 'benchmark', M)
}
rowSumsV2.c <- function(M) {
stopifnot(
is.matrix(M),
is.numeric(M)
)
if (!is.double(M)) {
M <- matrix(as.double(M), nrow = nrow(M))
}
.Call('R_rowSumsV2', PACKAGE = 'benchmark', M)
}
rowSumsV3.c <- function(M) {
stopifnot(
is.matrix(M),
is.numeric(M)
)
if (!is.double(M)) {
M <- matrix(as.double(M), nrow = nrow(M))
}
.Call('R_rowSumsV3', PACKAGE = 'benchmark', M)
}
colSums.c <- function(M) {
stopifnot(
is.matrix(M),
is.numeric(M)
)
if (!is.double(M)) {
M <- matrix(as.double(M), nrow = nrow(M))
}
.Call('R_colSums', PACKAGE = 'benchmark', M)
}
rowSquareSums.c <- function(M) {
stopifnot(
is.matrix(M),
is.numeric(M)
)
if (!is.double(M)) {
M <- matrix(as.double(M), nrow = nrow(M))
}
.Call('R_rowSquareSums', PACKAGE = 'benchmark', M)
}
rowSumsSymVec.c <- function(vecA, nrow, diag = 0.0) {
stopifnot(
is.vector(vecA),
is.numeric(vecA),
is.numeric(diag),
nrow * (nrow - 1) == length(vecA) * 2
)
if (!is.double(vecA)) {
vecA <- as.double(vecA)
}
.Call('R_rowSumsSymVec', PACKAGE = 'benchmark',
vecA, as.integer(nrow), as.double(diag))
}
rowSweep.c <- function(A, v, op = '-') {
stopifnot(
is.matrix(A),
is.numeric(v)
)
if (!is.double(A)) {
A <- matrix(as.double(A), nrow = nrow(A))
}
if (!is.vector(v) || !is.double(v)) {
v <- as.double(v)
}
stopifnot(
nrow(A) == length(v),
op %in% c('+', '-', '*', '/')
)
.Call('R_rowSweep', PACKAGE = 'benchmark', A, v, op)
}
## row*, col* tests ------------------------------------------------------------
n <- 3000
M <- matrix(runif(n * 12), n, 12)
stopifnot(
all.equal(rowSums(M^2), rowSums.c(M^2)),
all.equal(colSums(M), colSums.c(M)),
all.equal(rowSums(M), rowSumsV2.c(M)),
all.equal(rowSums(M), rowSumsV3.c(M))
)
microbenchmark(
rowSums = rowSums(M),
rowSums.c = rowSums.c(M),
rowSumsV2.c = rowSumsV2.c(M),
rowSumsV3.c = rowSumsV3.c(M)
)
microbenchmark(
rowSums = rowSums(M^2),
rowSums.c = rowSums.c(M^2),
rowSqSums.c = rowSquareSums.c(M)
)
microbenchmark(
colSums = colSums(M),
colSums.c = colSums.c(M)
)
sum = rowSums(M)
stopifnot(all.equal(
sweep(M, 1, sum, FUN = `/`),
rowSweep.c(M, sum, '/') # Col-Normalize)
), all.equal(
sweep(M, 1, sum, FUN = `/`),
M / sum
))
microbenchmark(
sweep = sweep(M, 1, sum, FUN = `/`),
M / sum,
rowSweep.c = rowSweep.c(M, sum, '/') # Col-Normalize)
)
# Create symmetric matrix with constant diagonal entries.
nrow <- 200
diag <- 1.0
Sym <- tcrossprod(runif(nrow))
diag(Sym) <- diag
# Get vectorized lower triangular part of `Sym` matrix.
SymVec <- Sym[lower.tri(Sym)]
stopifnot(all.equal(
rowSums(Sym),
rowSumsSymVec.c(SymVec, nrow, diag)
))
microbenchmark(
rowSums = rowSums(Sym),
rowSums.c = rowSums.c(Sym),
rowSumsSymVec.c = rowSumsSymVec.c(SymVec, nrow, diag)
)
## Matrix-Matrix opperation .call ---------------------------------------------
transpose.c <- function(A) {
stopifnot(
is.matrix(A), is.numeric(A)
)
if (!is.double(A)) {
A <- matrix(as.double(A), nrow(A), ncol(A))
}
.Call('R_transpose', PACKAGE = 'benchmark', A)
}
sympMV.c <- function(vecA, x) {
stopifnot(
is.vector(vecA), is.numeric(vecA),
is.vector(x), is.numeric(x),
length(x) * (length(x) + 1) == 2 * length(vecA)
)
if (!is.double(vecA)) {
vecA <- as.double(vecA)
}
if (!is.double(x)) {
x <- as.double(x)
}
.Call('R_sympMV', PACKAGE = 'benchmark', vecA, x)
}
matrixprod.c <- function(A, B) {
stopifnot(
is.matrix(A), is.numeric(A),
is.matrix(B), is.numeric(B),
ncol(A) == nrow(B)
)
if (!is.double(A)) {
A <- matrix(as.double(A), nrow = nrow(A))
}
if (!is.double(B)) {
B <- matrix(as.double(B), nrow = nrow(B))
}
.Call('R_matrixprod', PACKAGE = 'benchmark', A, B)
}
crossprod.c <- function(A, B) {
stopifnot(
is.matrix(A), is.numeric(A),
is.matrix(B), is.numeric(B),
nrow(A) == nrow(B)
)
if (!is.double(A)) {
A <- matrix(as.double(A), nrow = nrow(A))
}
if (!is.double(B)) {
B <- matrix(as.double(B), nrow = nrow(B))
}
.Call('R_crossprod', PACKAGE = 'benchmark', A, B)
}
kronecker.c <- function(A, B, op = '*') {
stopifnot(
is.matrix(A), is.numeric(A),
is.matrix(B), is.numeric(B),
is.character(op), op %in% c('*', '+', '/', '-')
)
if (!is.double(A)) {
A <- matrix(as.double(A), nrow = nrow(A))
}
if (!is.double(B)) {
B <- matrix(as.double(B), nrow = nrow(B))
}
.Call('R_kronecker', PACKAGE = 'benchmark', A, B, op)
}
skewSymRank2k.c <- function(A, B, alpha = 1, beta = 0) {
stopifnot(
is.matrix(A), is.numeric(A),
is.matrix(B), is.numeric(B),
all(dim(A) == dim(B)),
is.numeric(alpha), length(alpha) == 1L,
is.numeric(beta), length(beta) == 1L
)
if (!is.double(A)) {
A <- matrix(as.double(A), nrow = nrow(A))
}
if (!is.double(B)) {
B <- matrix(as.double(B), nrow = nrow(B))
}
.Call('R_skewSymRank2k', PACKAGE = 'benchmark', A, B,
as.double(alpha), as.double(beta))
}
## Matrix-Matrix opperation tests ---------------------------------------------
n <- 200
k <- 100
m <- 300
A <- matrix(runif(n * k), n, k)
B <- matrix(runif(k * m), k, m)
stopifnot(
all.equal(t(A), transpose.c(A))
)
microbenchmark(
t(A),
transpose.c(A)
)
Sym <- tcrossprod(runif(n))
vecSym <- Sym[lower.tri(Sym, diag = T)]
x <- runif(n)
stopifnot(all.equal(
as.double(Sym %*% x),
sympMV.c(vecSym, x)
))
microbenchmark(
Sym %*% x,
sympMV.c = sympMV.c(vecSym, x)
)
stopifnot(
all.equal(A %*% B, matrixprod.c(A, B))
)
microbenchmark(
"%*%" = A %*% B,
matrixprod.c = matrixprod.c(A, B)
)
A <- matrix(runif(k * n), k, n)
B <- matrix(runif(k * m), k, m)
stopifnot(
all.equal(crossprod(A, B), crossprod.c(A, B))
)
microbenchmark(
crossprod = crossprod(A, B),
crossprod.c = crossprod.c(A, B)
)
n <- 100L
m <- 12L
p <- 11L
q <- 10L
A <- matrix(runif(n * m), n, m)
B <- matrix(runif(p * q), p, q)
stopifnot(all.equal(
kronecker(A, B),
kronecker.c(A, B)
))
microbenchmark(
kronecker = kronecker(A, B),
kronecker.c = kronecker.c(A, B)
)
n <- 12
k <- 11
A <- matrix(runif(n * k), n, k)
B <- matrix(runif(n * k), n, k)
stopifnot(all.equal(
A %*% t(B) - B %*% t(A), skewSymRank2k.c(A, B)
))
microbenchmark(
A %*% t(B) - B %*% t(A),
skewSymRank2k.c(A, B)
)
## Orthogonal projection onto null space .Call --------------------------------
nullProj.c <- function(B) {
stopifnot(
is.matrix(B), is.numeric(B)
)
if (!is.double(B)) {
B <- matrix(as.double(B), nrow = nrow(B))
}
.Call('R_nullProj', PACKAGE = 'benchmark', B)
}
## Orthogonal projection onto null space tests --------------------------------
p <- 12
q <- 10
V <- qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q)))
# Projection matrix onto `span(V)`
Q <- diag(1, p) - tcrossprod(V, V)
stopifnot(
all.equal(Q, nullProj.c(V))
)
microbenchmark(
nullProj = diag(1, p) - tcrossprod(V, V),
nullProj.c = nullProj.c(V)
)
# ## Kronecker optimizations ----------------------------------------------------
# library(microbenchmark)
# dist.1 <- function(X_diff, Q) {
# rowSums((X_diff %*% Q)^2)
# }
# dist.2 <- function(X, Q) {
# ones <- rep(1, nrow(X))
# proj <- X %*% Q
# rowSums((kronecker(proj, ones) - kronecker(ones, proj))^2)
# }
# n <- 400L
# p <- 12L
# k <- 2L
# q <- p - k
# X <- matrix(rnorm(n * p), n, p)
# Q <- diag(1, p) - tcrossprod(rnorm(p))
# ones <- rep(1, n)
# X_diff <- kronecker(X, ones) - kronecker(ones, X)
# stopifnot(all.equal(dist.1(X_diff, Q), dist.2(X, Q)))
# microbenchmark(
# dist.1(X_diff, Q),
# dist.2(X, Q),
# times = 10L
# )
# # if (!persistent) {
# # pair.index <- elem.pairs(seq(n))
# # i <- pair.index[, 1] # `i` indices of `(i, j)` pairs
# # j <- pair.index[, 2] # `j` indices of `(i, j)` pairs
# # lower <- ((i - 1) * n) + j
# # upper <- ((j - 1) * n) + i
# # X_diff <- X[i, , drop = F] - X[j, , drop = F]
# # }
# # # Projection matrix onto `span(V)`
# # Q <- diag(1, p) - tcrossprod(V, V)
# # # Vectorized distance matrix `D`.
# # vecD <- rowSums((X_diff %*% Q)^2)
# ## WIP for gradient. ----------------------------------------------------------
grad.c <- function(X, X_diff, Y, V, h) {
stopifnot(
is.matrix(X), is.double(X),
is.matrix(X_diff), is.double(X_diff),
ncol(X_diff) == ncol(X), nrow(X_diff) == nrow(X) * (nrow(X) - 1) / 2,
is.vector(Y) || (is.matrix(Y) && pmin(dim(Y)) == 1L), is.double(Y),
length(Y) == nrow(X),
is.matrix(V), is.double(V),
nrow(V) == ncol(X),
is.vector(h), is.numeric(h), length(h) == 1
)
.Call('R_grad', PACKAGE = 'benchmark',
X, X_diff, as.double(Y), V, as.double(h));
}
elem.pairs <- function(elements) {
# Number of elements to match.
n <- length(elements)
# Create all combinations.
pairs <- rbind(rep(elements, each=n), rep(elements, n))
# Select unique combinations without self interaction.
return(pairs[, pairs[1, ] < pairs[2, ]])
}
grad <- function(X, Y, V, h, persistent = TRUE) {
n <- nrow(X)
p <- ncol(X)
if (!persistent) {
pair.index <- elem.pairs(seq(n))
i <- pair.index[, 1] # `i` indices of `(i, j)` pairs
j <- pair.index[, 2] # `j` indices of `(i, j)` pairs
lower <- ((i - 1) * n) + j
upper <- ((j - 1) * n) + i
X_diff <- X[i, , drop = F] - X[j, , drop = F]
}
# Projection matrix onto `span(V)`
Q <- diag(1, p) - tcrossprod(V, V)
# Vectorized distance matrix `D`.
vecD <- rowSums((X_diff %*% Q)^2)
# Create Kernel matrix (aka. apply kernel to distances)
K <- matrix(1, n, n) # `exp(0) == 1`
K[lower] <- exp((-0.5 / h) * vecD^2) # Set lower tri. part
K[upper] <- t(K)[upper] # Mirror lower tri. to upper
# Weighted `Y` momentums
colSumsK <- colSums(K)
y1 <- (K %*% Y) / colSumsK
y2 <- (K %*% Y^2) / colSumsK
# Per example loss `L(V, X_i)`
L <- y2 - y1^2
# Compute scaling vector `vecS` for `X_diff`.
tmp <- kronecker(matrix(y1, n, 1), matrix(Y, 1, n), `-`)^2
tmp <- as.vector(L) - tmp
tmp <- tmp * K / colSumsK
vecS <- (tmp + t(tmp))[lower] * vecD
G <- crossprod(X_diff, X_diff * vecS) %*% V
G <- (-2 / (n * h^2)) * G
return(G)
}
rStiefel <- function(p, q) {
return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q))))
}
n <- 200
p <- 12
q <- 10
X <- matrix(runif(n * p), n, p)
Y <- runif(n)
V <- rStiefel(p, q)
h <- 0.1
pair.index <- elem.pairs(seq(n))
i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
lower <- ((i - 1) * n) + j
upper <- ((j - 1) * n) + i
X_diff <- X[i, , drop = F] - X[j, , drop = F]
stopifnot(all.equal(
grad(X, Y, V, h),
grad.c(X, X_diff, Y, V, h)
))
microbenchmark(
grad = grad(X, Y, V, h),
grad.c = grad.c(X, X_diff, Y, V, h)
)

View File

@ -1,510 +0,0 @@
#include <stdlib.h>
#include <string.h> // for `mem*` functions.
#include <R_ext/BLAS.h>
#include <R_ext/Lapack.h>
#include <R_ext/Error.h>
// #include <Rmath.h>
#include "benchmark.h"
void rowSums(const double *A,
const int nrow, const int ncol,
double *sum) {
int i, j, block_size, block_size_i;
const double *A_block = A;
const double *A_end = A + nrow * ncol;
if (nrow > 508) {
block_size = 508;
} else {
block_size = nrow;
}
// Iterate `(block_size_i, ncol)` submatrix blocks.
for (i = 0; i < nrow; i += block_size_i) {
// Reset `A` to new block beginning.
A = A_block;
// Take block size of eveything left and reduce to max size.
block_size_i = nrow - i;
if (block_size_i > block_size) {
block_size_i = block_size;
}
// Copy blocks first column.
for (j = 0; j < block_size_i; j += 4) {
sum[j] = A[j];
sum[j + 1] = A[j + 1];
sum[j + 2] = A[j + 2];
sum[j + 3] = A[j + 3];
}
for (; j < block_size_i; ++j) {
sum[j] = A[j];
}
// Sum following columns to the first one.
for (A += nrow; A < A_end; A += nrow) {
for (j = 0; j < block_size_i; j += 4) {
sum[j] += A[j];
sum[j + 1] += A[j + 1];
sum[j + 2] += A[j + 2];
sum[j + 3] += A[j + 3];
}
for (; j < block_size_i; ++j) {
sum[j] += A[j];
}
}
// Step one block forth.
A_block += block_size_i;
sum += block_size_i;
}
}
void rowSumsV2(const double *A,
const int nrow, const int ncol,
double *sum) {
int i, j, block_size, block_size_i;
const double *A_block = A;
const double *A_end = A + nrow * ncol;
if (nrow > CVE_MEM_CHUNK_SIZE) {
block_size = CVE_MEM_CHUNK_SIZE;
} else {
block_size = nrow;
}
// Iterate `(block_size_i, ncol)` submatrix blocks.
for (i = 0; i < nrow; i += block_size_i) {
// Reset `A` to new block beginning.
A = A_block;
// Take block size of eveything left and reduce to max size.
block_size_i = nrow - i;
if (block_size_i > block_size) {
block_size_i = block_size;
}
// Compute first blocks column,
for (j = 0; j < block_size_i; ++j) {
sum[j] = A[j];
}
// and sum the following columns to the first one.
for (A += nrow; A < A_end; A += nrow) {
for (j = 0; j < block_size_i; ++j) {
sum[j] += A[j];
}
}
// Step one block forth.
A_block += block_size_i;
sum += block_size_i;
}
}
void rowSumsV3(const double *A,
const int nrow, const int ncol,
double *sum) {
int i, onei = 1;
double* ones = (double*)malloc(ncol * sizeof(double));
const double one = 1.0;
const double zero = 0.0;
for (i = 0; i < ncol; ++i) {
ones[i] = 1.0;
}
matrixprod(A, nrow, ncol, ones, ncol, 1, sum);
free(ones);
}
void colSums(const double *A, const int nrow, const int ncol,
double *sums) {
int i, j, nrowb = 4 * (nrow / 4); // 4 dividable nrow block, biggest 4*k <= nrow.
double sum;
for (j = 0; j < ncol; ++j) {
sum = 0.0;
for (i = 0; i < nrowb; i += 4) {
sum += A[i]
+ A[i + 1]
+ A[i + 2]
+ A[i + 3];
}
for (; i < nrow; ++i) {
sum += A[i];
}
*(sums++) = sum;
A += nrow;
}
}
void rowSquareSums(const double *A,
const int nrow, const int ncol,
double *sum) {
int i, j, block_size, block_size_i;
const double *A_block = A;
const double *A_end = A + nrow * ncol;
if (nrow > 508) {
block_size = 508;
} else {
block_size = nrow;
}
// Iterate `(block_size_i, ncol)` submatrix blocks.
for (i = 0; i < nrow; i += block_size_i) {
// Reset `A` to new block beginning.
A = A_block;
// Take block size of eveything left and reduce to max size.
block_size_i = nrow - i;
if (block_size_i > block_size) { // TODO: contains BUG!!! floor last one !!!
block_size_i = block_size;
} /// ...
// TODO:
// Copy blocks first column.
for (j = 0; j < block_size_i; j += 4) {
sum[j] = A[j] * A[j];
sum[j + 1] = A[j + 1] * A[j + 1];
sum[j + 2] = A[j + 2] * A[j + 2];
sum[j + 3] = A[j + 3] * A[j + 3];
}
for (; j < block_size_i; ++j) {
sum[j] = A[j] * A[j];
}
// Sum following columns to the first one.
for (A += nrow; A < A_end; A += nrow) {
for (j = 0; j < block_size_i; j += 4) {
sum[j] += A[j] * A[j];
sum[j + 1] += A[j + 1] * A[j + 1];
sum[j + 2] += A[j + 2] * A[j + 2];
sum[j + 3] += A[j + 3] * A[j + 3];
}
for (; j < block_size_i; ++j) {
sum[j] += A[j] * A[j];
}
}
// Step one block forth.
A_block += block_size_i;
sum += block_size_i;
}
}
void rowSumsSymVec(const double *Avec, const int nrow,
const double diag,
double *sum) {
int i, j;
if (diag == 0.0) {
memset(sum, 0, nrow * sizeof(double));
} else {
for (i = 0; i < nrow; ++i) {
sum[i] = diag;
}
}
for (j = 0; j < nrow; ++j) {
for (i = j + 1; i < nrow; ++i, ++Avec) {
sum[j] += *Avec;
sum[i] += *Avec;
}
}
}
#define ROW_SWEEP_ALG(op) \
/* Iterate `(block_size_i, ncol)` submatrix blocks. */ \
for (i = 0; i < nrow; i += block_size_i) { \
/* Set `A` and `C` to block beginning. */ \
A = A_block; \
C = C_block; \
/* Get current block's row size. */ \
block_size_i = nrow - i; \
if (block_size_i > block_size) { \
block_size_i = block_size; \
} \
/* Perform element wise operation for block. */ \
for (; A < A_end; A += nrow, C += nrow) { \
for (j = 0; j < block_size_i; ++j) { \
C[j] = (A[j]) op (v[j]); \
} \
} \
/* Step one block forth. */ \
A_block += block_size_i; \
C_block += block_size_i; \
v += block_size_i; \
}
/* C[, j] = A[, j] * v for each j = 1 to ncol */
void rowSweep(const double *A, const int nrow, const int ncol,
const char* op,
const double *v, // vector of length nrow
double *C) {
int i, j, block_size, block_size_i;
const double *A_block = A;
double *C_block = C;
const double *A_end = A + nrow * ncol;
if (nrow > CVE_MEM_CHUNK_SMALL) { // small because 3 vectors in cache
block_size = CVE_MEM_CHUNK_SMALL;
} else {
block_size = nrow;
}
if (*op == '+') {
ROW_SWEEP_ALG(+)
} else if (*op == '-') {
ROW_SWEEP_ALG(-)
} else if (*op == '*') {
ROW_SWEEP_ALG(*)
} else if (*op == '/') {
ROW_SWEEP_ALG(/)
} else {
error("Got unknown 'op' (opperation) argument.");
}
}
void transpose(const double *A, const int nrow, const int ncol, double* T) {
int i, j, len = nrow * ncol;
// Filling column-wise and accessing row-wise.
for (i = 0, j = 0; i < len; ++i, j += nrow) {
if (j >= len) {
j -= len - 1;
}
T[i] = A[j];
}
}
// Symmetric Packed matrix vector product.
// Computes
// y <- Ax
// where A is supplied as packed lower triangular part of a symmetric
// matrix A. Meaning that `vecA` is `vec_ltri(A)`.
void sympMV(const double* vecA, const int nrow, const double* x, double* y) {
double one = 1.0;
double zero = 0.0;
int onei = 1;
F77_NAME(dspmv)("L", &nrow, &one, vecA, x, &onei, &zero, y, &onei);
}
void matrixprod(const double *A, const int nrowA, const int ncolA,
const double *B, const int nrowB, const int ncolB,
double *C) {
const double one = 1.0;
const double zero = 0.0;
// DGEMM with parameterization:
// C <- A %*% B
F77_NAME(dgemm)("N", "N", &nrowA, &ncolB, &ncolA,
&one, A, &nrowA, B, &nrowB,
&zero, C, &nrowA);
}
void crossprod(const double *A, const int nrowA, const int ncolA,
const double *B, const int nrowB, const int ncolB,
double *C) {
const double one = 1.0;
const double zero = 0.0;
// DGEMM with parameterization:
// C <- t(A) %*% B
F77_NAME(dgemm)("T", "N", &ncolA, &ncolB, &nrowA,
&one, A, &nrowA, B, &nrowB,
&zero, C, &ncolA);
}
#define KRONECKER_ALG(op) \
for (j = 0; j < ncolA; ++j) { \
for (l = 0; l < ncolB; ++l) { \
colB = B + (l * nrowB); \
for (i = 0; i < nrowA; ++i) { \
for (k = 0; k < nrowB; ++k) { \
*(C++) = (A[i]) op (colB[k]); \
} \
} \
} \
A += nrowA; \
}
void kronecker(const double * restrict A, const int nrowA, const int ncolA,
const double * restrict B, const int nrowB, const int ncolB,
const char* op,
double * restrict C) {
int i, j, k, l;
const double *colB;
if (*op == '+') {
KRONECKER_ALG(+)
} else if (*op == '-') {
KRONECKER_ALG(-)
} else if (*op == '*') {
KRONECKER_ALG(*)
} else if (*op == '/') {
KRONECKER_ALG(/)
} else {
error("Got unknown 'op' (opperation) argument.");
}
}
void nullProj(const double *B, const int nrowB, const int ncolB,
double *Q) {
const double minusOne = -1.0;
const double one = 1.0;
// Initialize Q as identity matrix.
memset(Q, 0, sizeof(double) * nrowB * nrowB);
double *Q_diag, *Q_end = Q + nrowB * nrowB;
for (Q_diag = Q; Q_diag < Q_end; Q_diag += nrowB + 1) {
*Q_diag = 1.0;
}
// DGEMM with parameterization:
// Q <- (-1.0 * B %*% t(B)) + Q
F77_NAME(dgemm)("N", "T", &nrowB, &nrowB, &ncolB,
&minusOne, B, &nrowB, B, &nrowB,
&one, Q, &nrowB);
}
void rangePairs(const int from, const int to, int *pairs) {
int i, j;
for (i = from; i < to; ++i) {
for (j = i + 1; j < to; ++j) {
pairs[0] = i;
pairs[1] = j;
pairs += 2;
}
}
}
// A dence skwe-symmetric rank 2 update.
// Perform the update
// C := alpha (A * B^T - B * A^T) + beta C
void skewSymRank2k(const int nrow, const int ncol,
double alpha, const double *A, const double *B,
double beta,
double *C) {
F77_NAME(dgemm)("N", "T",
&nrow, &nrow, &ncol,
&alpha, A, &nrow, B, &nrow,
&beta, C, &nrow);
alpha *= -1.0;
beta = 1.0;
F77_NAME(dgemm)("N", "T",
&nrow, &nrow, &ncol,
&alpha, B, &nrow, A, &nrow,
&beta, C, &nrow);
}
// TODO: clarify
static inline double gaussKernel(const double x, const double scale) {
return exp(scale * x * x);
}
// TODO: mutch potential for optimization!!!
static void weightedYandLoss(const int n,
const double *Y,
const double *vecD,
const double *vecW,
const double *colSums,
double *y1, double *L, double *vecS,
double *loss) {
int i, j, k, N = n * (n - 1) / 2;
double l;
for (i = 0; i < n; ++i) {
y1[i] = Y[i];
L[i] = Y[i] * Y[i];
}
for (k = j = 0; j < n; ++j) {
for (i = j + 1; i < n; ++i, ++k) {
y1[i] += Y[j] * vecW[k];
y1[j] += Y[i] * vecW[k];
L[i] += Y[j] * Y[j] * vecW[k];
L[j] += Y[i] * Y[i] * vecW[k];
}
}
for (i = 0; i < n; ++i) {
y1[i] /= colSums[i];
L[i] /= colSums[i];
}
l = 0.0;
for (i = 0; i < n; ++i) {
l += (L[i] -= y1[i] * y1[i]);
}
*loss = l / (double)n;
for (k = j = 0; j < n; ++j) {
for (i = j + 1; i < n; ++i, ++k) {
l = Y[j] - y1[i];
vecS[k] = (L[i] - (l * l)) / colSums[i];
l = Y[i] - y1[j];
vecS[k] += (L[j] - (l * l)) / colSums[j];
}
}
for (k = 0; k < N; ++k) {
vecS[k] *= vecW[k] * vecD[k];
}
}
void grad(const int n, const int p, const int q,
const double *X,
const double *X_diff,
const double *Y,
const double *V,
const double h,
double *G, double *loss) {
// Number of X_i to X_j not trivial pairs.
int i, N = (n * (n - 1)) / 2;
double scale = -0.5 / h;
if (X_diff == (void*)0) {
// TODO: ...
}
// Allocate and compute projection matrix `Q = I_p - V * V^T`
double *Q = (double*)malloc(p * p * sizeof(double));
nullProj(V, p, q, Q);
// allocate and compute vectorized distance matrix with a temporary
// projection of `X_diff`.
double *vecD = (double*)malloc(N * sizeof(double));
double *X_proj;
if (p < 5) { // TODO: refine that!
X_proj = (double*)malloc(N * 5 * sizeof(double));
} else {
X_proj = (double*)malloc(N * p * sizeof(double));
}
matrixprod(X_diff, N, p, Q, p, p, X_proj);
rowSquareSums(X_proj, N, p, vecD);
// Apply kernel to distence vector for weights computation.
double *vecK = X_proj; // reuse memory area, no longer needed.
for (i = 0; i < N; ++i) {
vecK[i] = gaussKernel(vecD[i], scale);
}
double *colSums = X_proj + N; // still allocated!
rowSumsSymVec(vecK, n, 1.0, colSums); // rowSums = colSums cause Sym
// compute weighted responces of first end second momontum, aka y1, y2.
double *y1 = X_proj + N + n;
double *L = X_proj + N + (2 * n);
// Allocate X_diff scaling vector `vecS`, not in `X_proj` mem area because
// used symultanious to X_proj in final gradient computation.
double *vecS = (double*)malloc(N * sizeof(double));
weightedYandLoss(n, Y, vecD, vecK, colSums, y1, L, vecS, loss);
// compute the gradient using X_proj for intermidiate scaled X_diff.
rowSweep(X_diff, N, p, "*", vecS, X_proj);
// reuse Q which has the required dim (p, p).
crossprod(X_diff, N, p, X_proj, N, p, Q);
// Product with V
matrixprod(Q, p, p, V, p, q, G);
// And final scaling (TODO: move into matrixprod!)
scale = -2.0 / (((double)n) * h * h);
N = p * q;
for (i = 0; i < N; ++i) {
G[i] *= scale;
}
free(vecS);
free(X_proj);
free(vecD);
free(Q);
}

View File

@ -1,219 +0,0 @@
#ifndef CVE_INCLUDE_GUARD_
#define CVE_INCLUDE_GUARD_
#include <Rinternals.h>
#define CVE_MEM_CHUNK_SMALL 1016
#define CVE_MEM_CHUNK_SIZE 2032
void rowSums(const double *A,
const int nrow, const int ncol,
double *sum);
SEXP R_rowSums(SEXP A) {
SEXP sums = PROTECT(allocVector(REALSXP, nrows(A)));
rowSums(REAL(A), nrows(A), ncols(A), REAL(sums));
UNPROTECT(1);
return sums;
}
void rowSumsV2(const double *A,
const int nrow, const int ncol,
double *sum);
SEXP R_rowSumsV2(SEXP A) {
SEXP sums = PROTECT(allocVector(REALSXP, nrows(A)));
rowSumsV2(REAL(A), nrows(A), ncols(A), REAL(sums));
UNPROTECT(1);
return sums;
}
void rowSumsV3(const double *A,
const int nrow, const int ncol,
double *sum);
SEXP R_rowSumsV3(SEXP A) {
SEXP sums = PROTECT(allocVector(REALSXP, nrows(A)));
rowSumsV3(REAL(A), nrows(A), ncols(A), REAL(sums));
UNPROTECT(1);
return sums;
}
void colSums(const double *A,
const int nrow, const int ncol,
double *sum);
SEXP R_colSums(SEXP A) {
SEXP sums = PROTECT(allocVector(REALSXP, ncols(A)));
colSums(REAL(A), nrows(A), ncols(A), REAL(sums));
UNPROTECT(1);
return sums;
}
void rowSquareSums(const double*, const int, const int, double*);
SEXP R_rowSquareSums(SEXP A) {
SEXP result = PROTECT(allocVector(REALSXP, nrows(A)));
rowSquareSums(REAL(A), nrows(A), ncols(A), REAL(result));
UNPROTECT(1);
return result;
}
void rowSumsSymVec(const double *Avec, const int nrow,
const double diag,
double *sum);
SEXP R_rowSumsSymVec(SEXP Avec, SEXP nrow, SEXP diag) {
SEXP sum = PROTECT(allocVector(REALSXP, *INTEGER(nrow)));
rowSumsSymVec(REAL(Avec), *INTEGER(nrow), *REAL(diag), REAL(sum));
UNPROTECT(1);
return sum;
}
void rowSweep(const double *A, const int nrow, const int ncol,
const char* op,
const double *v, // vector of length nrow
double *C);
SEXP R_rowSweep(SEXP A, SEXP v, SEXP op) {
SEXP C = PROTECT(allocMatrix(REALSXP, nrows(A), ncols(A)));
rowSweep(REAL(A), nrows(A), ncols(A),
CHAR(STRING_ELT(op, 0)),
REAL(v), REAL(C));
UNPROTECT(1);
return C;
}
void transpose(const double *A, const int nrow, const int ncol, double* T);
SEXP R_transpose(SEXP A) {
SEXP T = PROTECT(allocMatrix(REALSXP, ncols(A), nrows(A)));
transpose(REAL(A), nrows(A), ncols(A), REAL(T));
UNPROTECT(1); /* T */
return T;
}
void sympMV(const double* vecA, const int nrow, const double* x, double* y);
SEXP R_sympMV(SEXP vecA, SEXP x) {
SEXP y = PROTECT(allocVector(REALSXP, length(x)));
sympMV(REAL(vecA), length(x), REAL(x), REAL(y));
UNPROTECT(1); /* y */
return y;
}
void matrixprod(const double *A, const int nrowA, const int ncolA,
const double *B, const int nrowB, const int ncolB,
double *C);
SEXP R_matrixprod(SEXP A, SEXP B) {
SEXP C = PROTECT(allocMatrix(REALSXP, nrows(A), ncols(B)));
matrixprod(REAL(A), nrows(A), ncols(A),
REAL(B), nrows(B), ncols(B),
REAL(C));
UNPROTECT(1);
return C;
}
void crossprod(const double* A, const int nrowA, const int ncolA,
const double* B, const int ncolB, const int nrowB,
double* C);
SEXP R_crossprod(SEXP A, SEXP B) {
SEXP C = PROTECT(allocMatrix(REALSXP, ncols(A), ncols(B)));
crossprod(REAL(A), nrows(A), ncols(A),
REAL(B), nrows(B), ncols(B),
REAL(C));
UNPROTECT(1);
return C;
}
void kronecker(const double *A, const int nrowA, const int ncolA,
const double *B, const int nrowB, const int ncolB,
const char *op,
double *C);
SEXP R_kronecker(SEXP A, SEXP B, SEXP op) {
SEXP C = PROTECT(allocMatrix(REALSXP,
nrows(A) * nrows(B),
ncols(A) * ncols(B)));
kronecker(REAL(A), nrows(A), ncols(A),
REAL(B), nrows(B), ncols(B),
CHAR(STRING_ELT(op, 0)),
REAL(C));
UNPROTECT(1);
return C;
}
void skewSymRank2k(const int n, const int k,
double alpha, const double *A, const double *B,
double beta,
double *C);
SEXP R_skewSymRank2k(SEXP A, SEXP B, SEXP alpha, SEXP beta) {
SEXP C = PROTECT(allocMatrix(REALSXP, nrows(A), nrows(A)));
memset(REAL(C), 0, nrows(A) * nrows(A) * sizeof(double));
skewSymRank2k(nrows(A), ncols(A),
*REAL(alpha), REAL(A), REAL(B),
*REAL(beta), REAL(C));
UNPROTECT(1);
return C;
}
void nullProj(const double* B, const int nrowB, const int ncolB,
double* Q);
SEXP R_nullProj(SEXP B) {
SEXP Q = PROTECT(allocMatrix(REALSXP, nrows(B), nrows(B)));
nullProj(REAL(B), nrows(B), ncols(B), REAL(Q));
UNPROTECT(1);
return Q;
}
void rangePairs(const int from, const int to, int *pairs);
SEXP R_rangePairs(SEXP from, SEXP to) {
int start = asInteger(from);
int end = asInteger(to) + 1;
int n = end - start;
SEXP out = PROTECT(allocMatrix(INTSXP, 2, n * (n - 1) / 2));
rangePairs(start, end, INTEGER(out));
UNPROTECT(1);
return out;
}
void grad(const int n, const int p, const int q,
const double *X,
const double *X_diff,
const double *Y,
const double *V,
const double h,
double *G, double *const loss);
SEXP R_grad(SEXP X, SEXP X_diff, SEXP Y, SEXP V, SEXP h) {
int N = (nrows(X) * (nrows(X) - 1)) / 2;
SEXP G = PROTECT(allocMatrix(REALSXP, nrows(V), ncols(V)));
SEXP loss = PROTECT(allocVector(REALSXP, 1));
grad(nrows(X), ncols(X), ncols(V),
REAL(X), REAL(X_diff), REAL(Y), REAL(V), *REAL(h),
REAL(G), REAL(loss));
UNPROTECT(2);
return G;
}
#endif /* CVE_INCLUDE_GUARD_ */