- update: doc,
- wip: cleanup, - update: datasets, - remove: old code
This commit is contained in:
parent
300fc11f3f
commit
9edefe994d
|
@ -9,15 +9,10 @@ export(cve)
|
|||
export(cve.call)
|
||||
export(dataset)
|
||||
export(directions)
|
||||
export(elem.pairs)
|
||||
export(estimate.bandwidth)
|
||||
export(null)
|
||||
export(predict_dim)
|
||||
export(projTangentStiefel)
|
||||
export(rStiefel)
|
||||
export(retractStiefel)
|
||||
export(skew)
|
||||
export(sym)
|
||||
import(stats)
|
||||
importFrom(graphics,boxplot)
|
||||
importFrom(graphics,lines)
|
|
@ -20,7 +20,7 @@
|
|||
#' zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g}
|
||||
#' is an unknown, continuous non-constant function,
|
||||
#' and \eqn{B = (b_1, ..., b_k)} is
|
||||
#' a real \eqn{p \times k}{p x k} of rank \eqn{k <= p}{k \leq p}.
|
||||
#' a real \eqn{p \times k}{p x k} of rank \eqn{k \leq p}{k <= p}.
|
||||
#' Without loss of generality \eqn{B} is assumed to be orthonormal.
|
||||
#'
|
||||
#' @author Daniel Kapla, Lukas Fertl, Bura Efstathia
|
||||
|
@ -36,26 +36,46 @@
|
|||
#' @inherit CVE-package description
|
||||
#'
|
||||
#' @param formula an object of class \code{"formula"} which is a symbolic
|
||||
#' description of the model to be fitted.
|
||||
#' description of the model to be fitted like \eqn{Y\sim X}{Y ~ X} where
|
||||
#' \eqn{Y} is a \eqn{n}-dimensional vector of the response variable and
|
||||
#' \eqn{X} is a \eqn{n\times p}{n x p} matrix of the predictors.
|
||||
#' @param data an optional data frame, containing the data for the formula if
|
||||
#' supplied.
|
||||
#' @param method specifies the CVE method variation as one of
|
||||
#' supplied like \code{data <- data.frame(Y, X)} with dimension
|
||||
#' \eqn{n \times (p + 1)}{n x (p + 1)}. By default the variables are taken from
|
||||
#' the environment from which \code{cve} is called.
|
||||
#' @param method This character string specifies the method of fitting. The
|
||||
#' options are
|
||||
#' \itemize{
|
||||
#' \item "simple" exact implementation as described in the paper listed
|
||||
#' below.
|
||||
#' \item "weighted" variation with addaptive weighting of slices.
|
||||
#' \item "simple" implementation as described in the paper.
|
||||
#' \item "weighted" variation with adaptive weighting of slices.
|
||||
#' }
|
||||
#' see paper.
|
||||
#' @param max.dim upper bounds for \code{k}, (ignored if \code{k} is supplied).
|
||||
#' @param ... Parameters passed on to \code{cve.call}.
|
||||
#' @param ... optional parameters passed on to \code{cve.call}.
|
||||
#'
|
||||
#' @return an S3 object of class \code{cve} with components:
|
||||
#' \describe{
|
||||
#' \item{X}{Original training data,}
|
||||
#' \item{Y}{Responce of original training data,}
|
||||
#' \item{X}{design matrix of predictor vector used for calculating
|
||||
#' cve-estimate,}
|
||||
#' \item{Y}{\eqn{n}-dimensional vector of responses used for calculating
|
||||
#' cve-estimate,}
|
||||
#' \item{method}{Name of used method,}
|
||||
#' \item{call}{the matched call,}
|
||||
#' \item{res}{list of components \code{V, L, B, loss, h} and \code{k} for
|
||||
#' each \eqn{k=min.dim,...,max.dim} (dimension).}
|
||||
#' \item{res}{list of components \code{V, L, B, loss, h} for
|
||||
#' each \code{k = min.dim, ..., max.dim}. If \code{k} was supplied in the
|
||||
#' call \code{min.dim = max.dim = k}.
|
||||
#' \itemize{
|
||||
#' \item \code{B} is the cve-estimate with dimension
|
||||
#' \eqn{p\times k}{p x k}.
|
||||
#' \item \code{V} is the orthogonal complement of \eqn{B}.
|
||||
#' \item \code{L} is the loss for each sample seperatels such that
|
||||
#' it's mean is \code{loss}.
|
||||
#' \item \code{loss} is the value of the target function that is
|
||||
#' minimized, evaluated at \eqn{V}.
|
||||
#' \item \code{h} bandwidth parameter used to calculate
|
||||
#' \code{B, V, loss, L}.
|
||||
#' }
|
||||
#' }
|
||||
#' }
|
||||
#'
|
||||
#' @examples
|
||||
|
@ -66,7 +86,7 @@
|
|||
#' b1 <- rep(1 / sqrt(p), p)
|
||||
#' b2 <- (-1)^seq(1, p) / sqrt(p)
|
||||
#' B <- cbind(b1, b2)
|
||||
#' # samplsize
|
||||
#' # sample size
|
||||
#' n <- 200
|
||||
#' set.seed(21)
|
||||
#' # creat predictor data x ~ N(0, I_p)
|
||||
|
@ -139,10 +159,12 @@ cve <- function(formula, data, method = "simple", max.dim = 10L, ...) {
|
|||
#' @inherit cve title
|
||||
#' @inherit cve description
|
||||
#'
|
||||
#' @param X Design matrix with dimension \eqn{n\times p}{n x p}.
|
||||
#' @param Y numeric array of length \eqn{n} of Responses.
|
||||
#' @param h bandwidth or function to estimate bandwidth, defaults to internaly
|
||||
#' estimated bandwidth.
|
||||
#' @param nObs parameter for choosing bandwidth \code{h} using
|
||||
#' \code{\link{estimate.bandwidth}} (ignored if \code{h} is supplied).
|
||||
#' @param X data matrix with samples in its rows.
|
||||
#' @param Y Responses (1 dimensional).
|
||||
#' @param method specifies the CVE method variation as one of
|
||||
#' \itemize{
|
||||
#' \item "simple" exact implementation as described in the paper listed
|
||||
|
@ -156,19 +178,23 @@ cve <- function(formula, data, method = "simple", max.dim = 10L, ...) {
|
|||
#' @param tau Initial step-size.
|
||||
#' @param tol Tolerance for break condition.
|
||||
#' @param max.iter maximum number of optimization steps.
|
||||
#' @param attempts number of arbitrary different starting points.
|
||||
#' @param logger a logger function (only for advanced user, significantly slows
|
||||
#' down the computation).
|
||||
#' @param h bandwidth or function to estimate bandwidth, defaults to internaly
|
||||
#' estimated bandwidth.
|
||||
#' @param momentum number of [0, 1) giving the ration of momentum for eucledian
|
||||
#' gradient update with a momentum term.
|
||||
#' @param attempts If \code{V.init} not supplied, the optimization is carried
|
||||
#' out \code{attempts} times with starting values drawn from the invariant
|
||||
#' measure on the Stiefel manifold (see \code{\link{rStiefel}}).
|
||||
#' @param momentum number of \eqn{[0, 1)} giving the ration of momentum for
|
||||
#' eucledian gradient update with a momentum term. \code{momentum = 0}
|
||||
#' corresponds to normal gradient descend.
|
||||
#' @param slack Positive scaling to allow small increases of the loss while
|
||||
#' optimizing.
|
||||
#' @param gamma step-size reduction multiple.
|
||||
#' optimizing, i.e. \code{slack = 0.1} allows the target function to
|
||||
#' increase up to \eqn{10 \%} in one optimization step.
|
||||
#' @param gamma step-size reduction multiple. If gradient step with step size
|
||||
#' \code{tau} is not accepted \code{gamma * tau} is set to the next step
|
||||
#' size.
|
||||
#' @param V.init Semi-orthogonal matrix of dimensions `(ncol(X), ncol(X) - k)
|
||||
#' as optimization starting value. (If supplied, \code{attempts} is
|
||||
#' set to 1 and \code{k} to match dimension)
|
||||
#' used as starting value in the optimization. (If supplied,
|
||||
#' \code{attempts} is set to 0 and \code{k} to match dimension).
|
||||
#' @param logger a logger function (only for advanced user, slows down the
|
||||
#' computation).
|
||||
#'
|
||||
#' @inherit cve return
|
||||
#'
|
||||
|
@ -253,6 +279,7 @@ cve.call <- function(X, Y, method = "simple",
|
|||
stop("Dimension missmatch of 'V.init' and 'X'")
|
||||
}
|
||||
min.dim <- max.dim <- ncol(X) - ncol(V.init)
|
||||
storage.mode(V.init) <- "double"
|
||||
attempts <- 0L
|
||||
} else if (missing(k) || is.null(k)) {
|
||||
min.dim <- as.integer(min.dim)
|
||||
|
@ -320,6 +347,9 @@ cve.call <- function(X, Y, method = "simple",
|
|||
}
|
||||
}
|
||||
|
||||
# Convert numerical values to "double".
|
||||
storage.mode(X) <- storage.mode(Y) <- "double"
|
||||
|
||||
if (is.function(logger)) {
|
||||
loggerEnv <- environment(logger)
|
||||
} else {
|
|
@ -1,8 +1,10 @@
|
|||
#' Gets estimated SDR basis.
|
||||
#'
|
||||
#' Returns the SDR basis matrix for SDR dimension(s).
|
||||
#' Returns the SDR basis matrix for dimension \code{k}, i.e. returns the
|
||||
#' cve-estimate with dimension \eqn{p\times k}{p x k}.
|
||||
#'
|
||||
#' @param object instance of \code{cve} as output from \code{\link{cve}} or
|
||||
#' \code{\link{cve.call}}
|
||||
#' \code{\link{cve.call}}.
|
||||
#' @param k the SDR dimension.
|
||||
#' @param ... ignored.
|
||||
#'
|
|
@ -0,0 +1,279 @@
|
|||
#' Multivariate Normal Distribution.
|
||||
#'
|
||||
#' Random generation for the multivariate normal distribution.
|
||||
#' \deqn{X \sim N_p(\mu, \Sigma)}{X ~ N_p(\mu, \Sigma)}
|
||||
#'
|
||||
#' @param n number of samples.
|
||||
#' @param mu mean
|
||||
#' @param sigma covariance matrix.
|
||||
#'
|
||||
#' @return a \eqn{n\times p}{n x p} matrix with samples in its rows.
|
||||
#'
|
||||
#' @examples
|
||||
#' \dontrun{
|
||||
#' rmvnorm(20, sigma = matrix(c(2, 1, 1, 2), 2))
|
||||
#' rmvnorm(20, mu = c(3, -1, 2))
|
||||
#' }
|
||||
#' @keywords internal
|
||||
rmvnorm <- function(n = 1, mu = rep(0, p), sigma = diag(p)) {
|
||||
if (!missing(sigma)) {
|
||||
p <- nrow(sigma)
|
||||
} else if (!missing(mu)) {
|
||||
mu <- matrix(mu, ncol = 1)
|
||||
p <- nrow(mu)
|
||||
} else {
|
||||
stop("At least one of 'mu' or 'sigma' must be supplied.")
|
||||
}
|
||||
|
||||
# See: https://en.wikipedia.org/wiki/Multivariate_normal_distribution
|
||||
return(rep(mu, each = n) + matrix(rnorm(n * p), n) %*% chol(sigma))
|
||||
}
|
||||
|
||||
#' Multivariate t distribution.
|
||||
#'
|
||||
#' Random generation from multivariate t distribution (student distribution).
|
||||
#'
|
||||
#' @param n number of samples.
|
||||
#' @param mu mean
|
||||
#' @param sigma a \eqn{k\times k}{k x k} positive definite matrix. If the degree
|
||||
#' \eqn{\nu} if bigger than 2 the created covariance is
|
||||
#' \deqn{var(x) = \Sigma\frac{\nu}{\nu - 2}}
|
||||
#' for \eqn{\nu > 2}.
|
||||
#' @param df degree of freedom \eqn{\nu}.
|
||||
#'
|
||||
#' @return a \eqn{n\times p}{n x p} matrix with samples in its rows.
|
||||
#'
|
||||
#' @examples
|
||||
#' \dontrun{
|
||||
#' rmvt(20, c(0, 1), matrix(c(3, 1, 1, 2), 2), 3)
|
||||
#' rmvt(20, sigma = matrix(c(2, 1, 1, 2), 2), 3)
|
||||
#' rmvt(20, mu = c(3, -1, 2), 3)
|
||||
#' }
|
||||
#' @keywords internal
|
||||
rmvt <- function(n = 1, mu = rep(0, p), sigma = diag(p), df = Inf) {
|
||||
if (!missing(sigma)) {
|
||||
p <- nrow(sigma)
|
||||
} else if (!missing(mu)) {
|
||||
mu <- matrix(mu, ncol = 1)
|
||||
p <- nrow(mu)
|
||||
} else {
|
||||
stop("At least one of 'mu' or 'sigma' must be supplied.")
|
||||
}
|
||||
|
||||
if (df == Inf) {
|
||||
Z <- 1
|
||||
} else {
|
||||
Z <- sqrt(df / rchisq(n, df))
|
||||
}
|
||||
|
||||
return(rmvnorm(n, sigma = sigma) * Z + rep(mu, each = n))
|
||||
}
|
||||
|
||||
#' Generalized Normal Distribution.
|
||||
#'
|
||||
#' Random generation for generalized Normal Distribution.
|
||||
#'
|
||||
#' @param n Number of generated samples.
|
||||
#' @param mu mean.
|
||||
#' @param alpha first shape parameter.
|
||||
#' @param beta second shape parameter.
|
||||
#'
|
||||
#' @return numeric array of length \eqn{n}.
|
||||
#'
|
||||
#' @seealso https://en.wikipedia.org/wiki/Generalized_normal_distribution
|
||||
#' @keywords internal
|
||||
rgnorm <- function(n = 1, mu = 0, alpha = 1, beta = 1) {
|
||||
if (alpha <= 0 | beta <= 0) {
|
||||
stop("alpha and beta must be positive.")
|
||||
}
|
||||
lambda <- (1 / alpha)^beta
|
||||
scales <- qgamma(runif(n), shape = 1 / beta, scale = 1 / lambda)^(1 / beta)
|
||||
return(scales * ((-1)^rbinom(n, 1, 0.5)) + mu)
|
||||
}
|
||||
|
||||
#' Laplace distribution
|
||||
#'
|
||||
#' Random generation for Laplace distribution.
|
||||
#'
|
||||
#' @param n Number of generated samples.
|
||||
#' @param mu mean.
|
||||
#' @param sd standard deviation.
|
||||
#'
|
||||
#' @return numeric array of length \eqn{n}.
|
||||
#'
|
||||
#' @seealso https://en.wikipedia.org/wiki/Laplace_distribution
|
||||
#' @keywords internal
|
||||
rlaplace <- function(n = 1, mu = 0, sd = 1) {
|
||||
U <- runif(n, -0.5, 0.5)
|
||||
scale <- sd / sqrt(2)
|
||||
|
||||
return(mu - scale * sign(U) * log(1 - 2 * abs(U)))
|
||||
}
|
||||
|
||||
#' Generates test datasets.
|
||||
#'
|
||||
#' Provides sample datasets M1-M7 used in the paper Conditional variance
|
||||
#' estimation for sufficient dimension reduction, Lukas Fertl, Efstathia Bura.
|
||||
#' The general model is given by:
|
||||
#' \deqn{Y = g(B'X) + \epsilon}
|
||||
#'
|
||||
#' @param name One of \code{"M1"}, \code{"M2"}, \code{"M3"}, \code{"M4",}
|
||||
#' \code{"M5"}, \code{"M6"} or \code{"M7"}. Alternative just the dataset number
|
||||
#' 1-7.
|
||||
#' @param n number of samples.
|
||||
#' @param p Dimension of random variable \eqn{X}.
|
||||
#' @param sd standard diviation for error term \eqn{\epsilon}.
|
||||
#' @param ... Additional parameters only for "M2" (namely \code{pmix} and
|
||||
#' \code{lambda}), see: below.
|
||||
#'
|
||||
#' @return List with elements
|
||||
#' \itemize{
|
||||
#' \item{X}{data, a \eqn{n\times p}{n x p} matrix.}
|
||||
#' \item{Y}{response.}
|
||||
#' \item{B}{the dim-reduction matrix}
|
||||
#' \item{name}{Name of the dataset (name parameter)}
|
||||
#' }
|
||||
#'
|
||||
#' @section M1:
|
||||
#' The predictors are distributed as
|
||||
#' \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, \Sigma)} with
|
||||
#' \eqn{\Sigma_{i, j} = 0.5^{|i - j|}}{\Sigma_ij = 0.5^|i - j|} for
|
||||
#' \eqn{i, j = 1,..., p} for a subspace dimension of \eqn{k = 1} with a default
|
||||
#' of \eqn{n = 100} data points. \eqn{p = 20},
|
||||
#' \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)}, and \eqn{Y} is
|
||||
#' given as \deqn{Y = cos(b_1'X) + \epsilon} where \eqn{\epsilon} is
|
||||
#' distributed as generalized normal distribution with location 0,
|
||||
#' shape-parameter 0.5, and the scale-parameter is chosen such that
|
||||
#' \eqn{Var(\epsilon) = 0.5}.
|
||||
#' @section M2:
|
||||
#' The predictors are distributed as \eqn{X \sim Z 1_p \lambda + N_p(0, I_p)}{X ~ Z 1_p \lambda + N_p(0, I_p)}. with
|
||||
#' \eqn{Z \sim 2 Binom(p_{mix}) - 1\in\{-1, 1\}}{Z~2Binom(pmix)-1} where
|
||||
#' \eqn{1_p} is the \eqn{p}-dimensional vector of one's, for a subspace
|
||||
#' dimension of \eqn{k = 1} with a default of \eqn{n = 100} data points.
|
||||
#' \eqn{p = 20}, \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)},
|
||||
#' and \eqn{Y} is \deqn{Y = cos(b_1'X) + 0.5\epsilon} where \eqn{\epsilon} is
|
||||
#' standard normal.
|
||||
#' Defaults for \code{pmix} is 0.3 and \code{lambda} defaults to 1.
|
||||
#' @section M3:
|
||||
#' The predictors are distributed as \eqn{X\sim N_p(0, I_p)}{X~N_p(0, I_p)}
|
||||
#' for a subspace
|
||||
#' dimension of \eqn{k = 1} with a default of \eqn{n = 100} data points.
|
||||
#' \eqn{p = 20}, \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)},
|
||||
#' and \eqn{Y} is
|
||||
#' \deqn{Y = 2 log(|b_1'X| + 2) + 0.5\epsilon} where \eqn{\epsilon} is
|
||||
#' standard normal.
|
||||
#' @section M4:
|
||||
#' The predictors are distributed as \eqn{X\sim N_p(0,\Sigma)}{X~N_p(0,\Sigma)}
|
||||
#' with \eqn{\Sigma_{i, j} = 0.5^{|i - j|}}{\Sigma_ij = 0.5^|i - j|} for
|
||||
#' \eqn{i, j = 1,..., p} for a subspace dimension of \eqn{k = 2} with a default
|
||||
#' of \eqn{n = 100} data points. \eqn{p = 20},
|
||||
#' \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)},
|
||||
#' \eqn{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / sqrt(6)}
|
||||
#' and \eqn{Y} is given as \deqn{Y = \frac{b_1'X}{0.5 + (1.5 + b_2'X)^2} + 0.5\epsilon}{Y = (b_1'X) / (0.5 + (1.5 + b_2'X)^2) + 0.5\epsilon}
|
||||
#' where \eqn{\epsilon} is standard normal.
|
||||
#' @section M5:
|
||||
#' The predictors are distributed as \eqn{X\sim U([0,1]^p)}{X~U([0, 1]^p)}
|
||||
#' where \eqn{U([0, 1]^p)} is the uniform distribution with
|
||||
#' independent components on the \eqn{p}-dimensional hypercube for a subspace
|
||||
#' dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points.
|
||||
#' \eqn{p = 20},
|
||||
#' \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)},
|
||||
#' \eqn{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / sqrt(6)}
|
||||
#' and \eqn{Y} is given as \deqn{Y = cos(\pi b_1'X)(b_2'X + 1)^2 + 0.5\epsilon}
|
||||
#' where \eqn{\epsilon} is standard normal.
|
||||
#' @section M6:
|
||||
#' The predictors are distributed as \eqn{X\sim N_p(0, I_p)}{X~N_p(0, I_p)}
|
||||
#' for a subspace dimension of \eqn{k = 3} with a default of \eqn{n = 200} data
|
||||
#' point. \eqn{p = 20, b_1 = e_1, b_2 = e_2}, and \eqn{b_3 = e_p}, where
|
||||
#' \eqn{e_j} is the \eqn{j}-th unit vector in the \eqn{p}-dimensional space.
|
||||
#' \eqn{Y} is given as \deqn{Y = (b_1'X)^2+(b_2'X)^2+(b_3'X)^2+0.5\epsilon}
|
||||
#' where \eqn{\epsilon} is standard normal.
|
||||
#' @section M7:
|
||||
#' The predictors are distributed as \eqn{X\sim t_3(I_p)}{X~t_3(I_p)} where
|
||||
#' \eqn{t_3(I_p)} is the standard multivariate t-distribution with 3 degrees of
|
||||
#' freedom, for a subspace dimension of \eqn{k = 4} with a default of
|
||||
#' \eqn{n = 200} data points.
|
||||
#' \eqn{p = 20, b_1 = e_1, b_2 = e_2, b_3 = e_3}, and \eqn{b_4 = e_p}, where
|
||||
#' \eqn{e_j} is the \eqn{j}-th unit vector in the \eqn{p}-dimensional space.
|
||||
#' \eqn{Y} is given as \deqn{Y = (b_1'X)(b_2'X)^2+(b_3'X)(b_4'X)+0.5\epsilon}
|
||||
#' where \eqn{\epsilon} is distributed as generalized normal distribution with
|
||||
#' location 0, shape-parameter 1, and the scale-parameter is chosen such that
|
||||
#' \eqn{Var(\epsilon) = 0.25}.
|
||||
#'
|
||||
#' @references Fertl Lukas, Bura Efstathia. (2019), Conditional Variance
|
||||
#' Estimation for Sufficient Dimension Reduction. Working Paper.
|
||||
#'
|
||||
#' @import stats
|
||||
#' @importFrom stats rnorm rbinom
|
||||
#' @export
|
||||
dataset <- function(name = "M1", n = NULL, p = 20, sd = 0.5, ...) {
|
||||
name <- toupper(name)
|
||||
if (nchar(name) == 1) { name <- paste0("M", name) }
|
||||
|
||||
if (name == "M1") {
|
||||
if (missing(n)) { n <- 100 }
|
||||
# B ... `p x 1`
|
||||
B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, p - 6)), ncol = 1)
|
||||
X <- rmvnorm(n, sigma = 0.5^abs(outer(1:p, 1:p, FUN = `-`)))
|
||||
beta <- 0.5
|
||||
Y <- cos(X %*% B) + rgnorm(n, 0,
|
||||
alpha = sqrt(sd^2 * gamma(1 / beta) / gamma(3 / beta)),
|
||||
beta = beta
|
||||
)
|
||||
} else if (name == "M2") {
|
||||
if (missing(n)) { n <- 100 }
|
||||
params <- list(...)
|
||||
pmix <- if (is.null(params$pmix)) { 0.3 } else { params$pmix }
|
||||
lambda <- if (is.null(params$lambda)) { 1 } else { params$lambda }
|
||||
# B ... `p x 1`
|
||||
B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, p - 6)), ncol = 1)
|
||||
Z <- 2 * rbinom(n, 1, pmix) - 1
|
||||
X <- matrix(rep(lambda * Z, p) + rnorm(n * p), n)
|
||||
Y <- cos(X %*% B) + rnorm(n, 0, sd)
|
||||
} else if (name == "M3") {
|
||||
if (missing(n)) { n <- 100 }
|
||||
# B ... `p x 1`
|
||||
B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, p - 6)), ncol = 1)
|
||||
X <- matrix(rnorm(n * p), n)
|
||||
Y <- 2 * log(2 + abs(X %*% B)) + rnorm(n, 0, sd)
|
||||
} else if (name == "M4") {
|
||||
if (missing(n)) { n <- 200 }
|
||||
# B ... `p x 2`
|
||||
B <- cbind(
|
||||
c(rep(1 / sqrt(6), 6), rep(0, p - 6)),
|
||||
c(rep(c(1, -1), 3) / sqrt(6), rep(0, p - 6))
|
||||
)
|
||||
X <- rmvnorm(n, sigma = 0.5^abs(outer(1:p, 1:p, FUN = `-`)))
|
||||
XB <- X %*% B
|
||||
Y <- (XB[, 1]) / (0.5 + (XB[, 2] + 1.5)^2) + rnorm(n, 0, sd)
|
||||
} else if (name == "M5") {
|
||||
if (missing(n)) { n <- 200 }
|
||||
# B ... `p x 2`
|
||||
B <- cbind(
|
||||
c(rep(1, 6), rep(0, p - 6)),
|
||||
c(rep(c(1, -1), 3), rep(0, p - 6))
|
||||
) / sqrt(6)
|
||||
X <- matrix(runif(n * p), n)
|
||||
XB <- X %*% B
|
||||
Y <- cos(XB[, 1] * pi) * (XB[, 2] + 1)^2 + rnorm(n, 0, sd)
|
||||
} else if (name == "M6") {
|
||||
if (missing(n)) { n <- 200 }
|
||||
# B ... `p x 3`
|
||||
B <- diag(p)[, -(3:(p - 1))]
|
||||
X <- matrix(rnorm(n * p), n)
|
||||
Y <- rowSums((X %*% B)^2) + rnorm(n, 0, sd)
|
||||
} else if (name == "M7") {
|
||||
if (missing(n)) { n <- 400 }
|
||||
# B ... `p x 4`
|
||||
B <- diag(p)[, -(4:(p - 1))]
|
||||
# "R"andom "M"ulti"V"ariate "S"tudent
|
||||
X <- rmvt(n = n, sigma = diag(p), df = 3)
|
||||
XB <- X %*% B
|
||||
Y <- (XB[, 1]) * (XB[, 2])^2 + (XB[, 3]) * (XB[, 4])
|
||||
Y <- Y + rlaplace(n, 0, sd)
|
||||
} else {
|
||||
stop("Got unknown dataset name.")
|
||||
}
|
||||
|
||||
return(list(X = X, Y = Y, B = B, name = name))
|
||||
}
|
|
@ -5,9 +5,15 @@ directions <- function(dr, k) {
|
|||
|
||||
#' Computes projected training data \code{X} for given dimension `k`.
|
||||
#'
|
||||
#' @param dr Instance of 'cve' as returned by \code{cve}.
|
||||
#' Projects the dimensional design matrix \eqn{X} on the columnspace of the
|
||||
#' cve-estimate for given dimension \eqn{k}.
|
||||
#'
|
||||
#' @param dr Instance of \code{'cve'} as returned by \code{\link{cve}}.
|
||||
#' @param k SDR dimension to use for projection.
|
||||
#'
|
||||
#' @return the \eqn{n\times k}{n x k} dimensional matrix \eqn{X B} where \eqn{B}
|
||||
#' is the cve-estimate for dimension \eqn{k}.
|
||||
#'
|
||||
#' @examples
|
||||
#' # create B for simulation (k = 1)
|
||||
#' B <- rep(1, 5) / sqrt(5)
|
|
@ -1,27 +1,22 @@
|
|||
#' Bandwidth estimation for CVE.
|
||||
#'
|
||||
#' Estimates a bandwidth \code{h} according
|
||||
#' If no bandwidth or function for calculating it is supplied, the CVE method
|
||||
#' defaults to using the following formula (version 1)
|
||||
#' \deqn{%
|
||||
#' h = (2 * tr(\Sigma) / p) * (1.2 * n^{-1 / (4 + k)})^2}{%
|
||||
#' h = (2 * tr(\Sigma) / p) * (1.2 * n^(\frac{-1}{4 + k}))^2}
|
||||
#' with \eqn{n} the sample size, \eqn{p} its dimension
|
||||
#' (\code{n <- nrow(X); p <- ncol(X)}) and the covariance-matrix \eqn{\Sigma}
|
||||
#' which is \code{(n-1)/n} times the sample covariance estimate.
|
||||
#' h = \frac{2 tr(\Sigma)}{p} (1.2 n^{\frac{-1}{4 + k}})^2}{%
|
||||
#' h = (2 * tr(\Sigma) / p) * (1.2 * n^(-1 / (4 + k)))^2}
|
||||
#' Alternative version 2 is used for dimension prediction which is given by
|
||||
#' \deqn{%
|
||||
#' h = (2 * tr(\Sigma) / p) * \chi_k^{-1}(\frac{nObs - 1}{n - 1})}{%
|
||||
#' h = (2 * tr(\Sigma) / p) * \chi_k^-1((nObs - 1) / (n - 1))}
|
||||
#' with \eqn{n} the sample size, \eqn{p} its dimension and the
|
||||
#' covariance-matrix \eqn{\Sigma}, which is \code{(n-1)/n} times the sample
|
||||
#' covariance estimate.
|
||||
#'
|
||||
#' @param X data matrix with samples in its rows.
|
||||
#' @param X a \eqn{n\times p}{n x p} matrix with samples in its rows.
|
||||
#' @param k Dimension of lower dimensional projection.
|
||||
#' @param nObs number of points in a slice, see \eqn{nObs} in CVE paper.
|
||||
#' @param version either \code{1} or \code{2}, where
|
||||
#' \itemize{
|
||||
#' \item 1: uses the following formula:
|
||||
#' \deqn{%
|
||||
#' h = (2 * tr(\Sigma) / p) * (1.2 * n^{-1 / (4 + k)})^2}{%
|
||||
#' h = (2 * tr(\Sigma) / p) * (1.2 * n^(\frac{-1}{4 + k}))^2}
|
||||
#' \item 2: uses
|
||||
#' \deqn{%
|
||||
#' h = (2 * tr(\Sigma) / p) * \chi_k^-1((nObs - 1) / (n - 1))}{%
|
||||
#' h = (2 * tr(\Sigma) / p) * \chi_k^{-1}(\frac{nObs - 1}{n - 1})}
|
||||
#' }
|
||||
#' @param nObs number of points in a slice, only for version 2.
|
||||
#' @param version either \code{1} or \code{2}.
|
||||
#'
|
||||
#' @return Estimated bandwidth \code{h}.
|
||||
#'
|
|
@ -1,6 +1,9 @@
|
|||
#' Loss distribution elbow plot.
|
||||
#'
|
||||
#' Boxplots of the loss from \code{min.dim} to \code{max.dim} \code{k} values.
|
||||
#' Boxplots of the output \code{L} from \code{\link{cve}} over \code{k} from
|
||||
#' \code{min.dim} to \code{max.dim}. For given \code{k}, \code{L} corresponds
|
||||
#' to \eqn{L_n(V, X_i)} where \eqn{V \in S(p, p - k)}{V} is the minimizer of
|
||||
#' \eqn{L_n(V)}, for further details see the paper.
|
||||
#'
|
||||
#' @param x Object of class \code{"cve"} (result of [\code{\link{cve}}]).
|
||||
#' @param ... Pass through parameters to [\code{\link{plot}}] and
|
||||
|
@ -31,6 +34,9 @@
|
|||
#' # elbow plot
|
||||
#' plot(cve.obj.simple)
|
||||
#'
|
||||
#' @references Fertl Lukas, Bura Efstathia. (2019), Conditional Variance
|
||||
#' Estimation for Sufficient Dimension Reduction. Working Paper.
|
||||
#'
|
||||
#' @seealso see \code{\link{par}} for graphical parameters to pass through
|
||||
#' as well as \code{\link{plot}}, the standard plot utility.
|
||||
#' @method plot cve
|
|
@ -1,6 +1,7 @@
|
|||
#' Predict method for CVE Fits.
|
||||
#'
|
||||
#' Predict responces using reduced data with \code{\link{mars}}.
|
||||
#' Predict response using projected data where the forward model \eqn{g(B'X)}
|
||||
#' is estimated using \code{\link{mars}}.
|
||||
#'
|
||||
#' @param object instance of class \code{cve} (result of \code{cve},
|
||||
#' \code{cve.call}).
|
||||
|
@ -36,7 +37,7 @@
|
|||
#'
|
||||
#' # plot prediction against y.test
|
||||
#' plot(yhat, y.test)
|
||||
#' @seealso \code{\link{cve}}, \code{\link{cve.call}} or \pkg{\link{mars}}.
|
||||
#' @seealso \code{\link{cve}}, \code{\link{cve.call}} and \pkg{\link{mars}}.
|
||||
#'
|
||||
#' @rdname predict.cve
|
||||
#'
|
|
@ -36,10 +36,6 @@ predict_dim_elbow <- function(object) {
|
|||
# Get dimensions
|
||||
n <- nrow(X)
|
||||
p <- ncol(X)
|
||||
# Compute persistent data.
|
||||
i = rep(1:n, n)
|
||||
j = rep(1:n, each = n)
|
||||
D.eucl = matrix((X[i, ] - X[j, ])^2 %*% rep(1, p), n)
|
||||
|
||||
losses <- vector("double", length(object$res))
|
||||
names(losses) <- names(object$res)
|
||||
|
@ -48,16 +44,16 @@ predict_dim_elbow <- function(object) {
|
|||
# extract dimension specific estimates and dimensions.
|
||||
k <- dr.k$k
|
||||
V <- dr.k$V
|
||||
q <- ncol(V)
|
||||
# estimate bandwidth according alternative formula (see: TODO: see)
|
||||
# estimate bandwidth according alternative formula.
|
||||
h <- estimate.bandwidth(X, k, sqrt(n), version = 2L)
|
||||
# Projected `X`
|
||||
XV <- X %*% V
|
||||
# Devectorized distance matrix
|
||||
# (inefficient in R but fast in C)
|
||||
D <- matrix((XV[i, , drop = F] - XV[j, , drop = F])^2 %*% rep(1, q), n)
|
||||
D <- D.eucl - D
|
||||
XQ <- X %*% (diag(1, p) - tcrossprod(V)) # X (I - V V')
|
||||
# Compute distances
|
||||
d2 <- tcrossprod(XQ) # XQ XQ'
|
||||
d1 <- matrix(diag(d2), n, n)
|
||||
D <- d1 - 2 * d2 + t(d1)
|
||||
# Apply kernel
|
||||
# Note: CVE uses for d = ||Q(X_i - X_j)|| the kernel exp(-d^4 / (2 h^2))
|
||||
K <- exp((-0.5 / h^2) * D^2)
|
||||
# sum columns
|
||||
colSumsK <- colSums(K)
|
||||
|
@ -81,11 +77,7 @@ predict_dim_wilcoxon <- function(object, p.value = 0.05) {
|
|||
# Get dimensions
|
||||
n <- nrow(X)
|
||||
p <- ncol(X)
|
||||
# Compute persistent data.
|
||||
i = rep(1:n, n)
|
||||
j = rep(1:n, each = n)
|
||||
D.eucl = matrix((X[i, ] - X[j, ])^2 %*% rep(1, p), n)
|
||||
|
||||
|
||||
L <- matrix(NA, n, length(object$res))
|
||||
colnames(L) <- names(object$res)
|
||||
# Compute per sample losses with alternative bandwidth for each dimension.
|
||||
|
@ -93,16 +85,16 @@ predict_dim_wilcoxon <- function(object, p.value = 0.05) {
|
|||
# extract dimension specific estimates and dimensions.
|
||||
k <- dr.k$k
|
||||
V <- dr.k$V
|
||||
q <- ncol(V)
|
||||
# estimate bandwidth according alternative formula (see: TODO: see)
|
||||
# estimate bandwidth according alternative formula.
|
||||
h <- estimate.bandwidth(X, k, sqrt(n), version = 2L)
|
||||
# Projected `X`
|
||||
XV <- X %*% V
|
||||
# Devectorized distance matrix
|
||||
# (inefficient in R but fast in C)
|
||||
D <- matrix((XV[i, , drop = F] - XV[j, , drop = F])^2 %*% rep(1, q), n)
|
||||
D <- D.eucl - D
|
||||
XQ <- X %*% (diag(1, p) - tcrossprod(V)) # X (I - V V')
|
||||
# Compute distances
|
||||
d2 <- tcrossprod(XQ) # XQ XQ'
|
||||
d1 <- matrix(diag(d2), n, n)
|
||||
D <- d1 - 2 * d2 + t(d1)
|
||||
# Apply kernel
|
||||
# Note: CVE uses for d = ||Q(X_i - X_j)|| the kernel exp(-d^4 / (2 h^2))
|
||||
K <- exp((-0.5 / h^2) * D^2)
|
||||
# sum columns
|
||||
colSumsK <- colSums(K)
|
||||
|
@ -130,27 +122,24 @@ predict_dim_wilcoxon <- function(object, p.value = 0.05) {
|
|||
))
|
||||
}
|
||||
|
||||
#' Predicts SDR dimension using \code{\link[mda]{mars}} via a Cross-Validation.
|
||||
#' TODO: rewrite!!!
|
||||
#' \code{"TODO: @Lukas"}
|
||||
#'
|
||||
#' @param object instance of class \code{cve} (result of \code{cve},
|
||||
#' \code{cve.call}).
|
||||
#' @param object instance of class \code{cve} (result of \code{\link{cve}},
|
||||
#' \code{\link{cve.call}}).
|
||||
#' @param method one of \code{"CV"}, \code{"elbow"} or \code{"wilcoxon"}.
|
||||
#' @param ... ignored.
|
||||
#'
|
||||
#' @return list with
|
||||
#' \itemize{
|
||||
#' \item MSE: Mean Square Error,
|
||||
#' \item k: predicted dimensions.
|
||||
#' }
|
||||
#' @return list with \code{"k"} the predicted dimension and method dependent
|
||||
#' informatoin.
|
||||
#'
|
||||
#' @section cv:
|
||||
#' Cross-validation ... TODO:
|
||||
#' @section Method cv:
|
||||
#' TODO: \code{"TODO: @Lukas"}.
|
||||
#'
|
||||
#' @section elbow:
|
||||
#' Cross-validation ... TODO:
|
||||
#' @section Method elbow:
|
||||
#' TODO: \code{"TODO: @Lukas"}.
|
||||
#'
|
||||
#' @section wilcoxon:
|
||||
#' Cross-validation ... TODO:
|
||||
#' @section Method wilcoxon:
|
||||
#' TODO: \code{"TODO: @Lukas"}.
|
||||
#'
|
||||
#' @examples
|
||||
#' # create B for simulation
|
|
@ -1,5 +1,9 @@
|
|||
#' Prints a summary of a \code{cve} result.
|
||||
#' @param object Instance of 'cve' as returned by \code{cve}.
|
||||
#'
|
||||
#' Prints a summary statistics of output \code{L} from \code{cve} for
|
||||
#' \code{k = min.dim, ..., max.dim}.
|
||||
#'
|
||||
#' @param object Instance of \code{"cve"} as returned by \code{\link{cve}}.
|
||||
#' @param ... ignored.
|
||||
#'
|
||||
#' @examples
|
|
@ -0,0 +1,24 @@
|
|||
#' Draws a sample from the invariant measure on the Stiefel manifold
|
||||
#' \eqn{S(p, q)}.
|
||||
#'
|
||||
#' @param p row dimension
|
||||
#' @param q col dimension
|
||||
#' @return \eqn{p \times q}{p x q} semi-orthogonal matrix.
|
||||
#' @examples
|
||||
#' V <- rStiefel(6, 4)
|
||||
#' @export
|
||||
rStiefel <- function(p, q) {
|
||||
return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q))))
|
||||
}
|
||||
|
||||
#' Null space basis of given matrix `V`
|
||||
#'
|
||||
#' @param V `(p, q)` matrix
|
||||
#' @return Semi-orthogonal `(p, p - q)` matrix spaning the null space of `V`.
|
||||
#' @keywords internal
|
||||
#' @export
|
||||
null <- function(V) {
|
||||
tmp <- qr(V)
|
||||
set <- if(tmp$rank == 0L) seq_len(ncol(V)) else -seq_len(tmp$rank)
|
||||
return(qr.Q(tmp, complete = TRUE)[, set, drop = FALSE])
|
||||
}
|
|
@ -26,7 +26,7 @@ variance-covariance matrix \eqn{Var(X) = \Sigma_X}. \eqn{\epsilon} is a mean
|
|||
zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g}
|
||||
is an unknown, continuous non-constant function,
|
||||
and \eqn{B = (b_1, ..., b_k)} is
|
||||
a real \eqn{p \times k}{p x k} of rank \eqn{k <= p}{k \leq p}.
|
||||
a real \eqn{p \times k}{p x k} of rank \eqn{k \leq p}{k <= p}.
|
||||
Without loss of generality \eqn{B} is assumed to be orthonormal.
|
||||
}
|
||||
\references{
|
|
@ -8,7 +8,7 @@
|
|||
}
|
||||
\arguments{
|
||||
\item{object}{instance of \code{cve} as output from \code{\link{cve}} or
|
||||
\code{\link{cve.call}}}
|
||||
\code{\link{cve.call}}.}
|
||||
|
||||
\item{k}{the SDR dimension.}
|
||||
|
||||
|
@ -18,7 +18,8 @@
|
|||
dir the matrix of CS or CMS of given dimension
|
||||
}
|
||||
\description{
|
||||
Returns the SDR basis matrix for SDR dimension(s).
|
||||
Returns the SDR basis matrix for dimension \code{k}, i.e. returns the
|
||||
cve-estimate with dimension \eqn{p\times k}{p x k}.
|
||||
}
|
||||
\examples{
|
||||
# set dimensions for simulation model
|
|
@ -8,31 +8,51 @@ cve(formula, data, method = "simple", max.dim = 10L, ...)
|
|||
}
|
||||
\arguments{
|
||||
\item{formula}{an object of class \code{"formula"} which is a symbolic
|
||||
description of the model to be fitted.}
|
||||
description of the model to be fitted like \eqn{Y\sim X}{Y ~ X} where
|
||||
\eqn{Y} is a \eqn{n}-dimensional vector of the response variable and
|
||||
\eqn{X} is a \eqn{n\times p}{n x p} matrix of the predictors.}
|
||||
|
||||
\item{data}{an optional data frame, containing the data for the formula if
|
||||
supplied.}
|
||||
supplied like \code{data <- data.frame(Y, X)} with dimension
|
||||
\eqn{n \times (p + 1)}{n x (p + 1)}. By default the variables are taken from
|
||||
the environment from which \code{cve} is called.}
|
||||
|
||||
\item{method}{specifies the CVE method variation as one of
|
||||
\item{method}{This character string specifies the method of fitting. The
|
||||
options are
|
||||
\itemize{
|
||||
\item "simple" exact implementation as described in the paper listed
|
||||
below.
|
||||
\item "weighted" variation with addaptive weighting of slices.
|
||||
}}
|
||||
\item "simple" implementation as described in the paper.
|
||||
\item "weighted" variation with adaptive weighting of slices.
|
||||
}
|
||||
see paper.}
|
||||
|
||||
\item{max.dim}{upper bounds for \code{k}, (ignored if \code{k} is supplied).}
|
||||
|
||||
\item{...}{Parameters passed on to \code{cve.call}.}
|
||||
\item{...}{optional parameters passed on to \code{cve.call}.}
|
||||
}
|
||||
\value{
|
||||
an S3 object of class \code{cve} with components:
|
||||
\describe{
|
||||
\item{X}{Original training data,}
|
||||
\item{Y}{Responce of original training data,}
|
||||
\item{X}{design matrix of predictor vector used for calculating
|
||||
cve-estimate,}
|
||||
\item{Y}{\eqn{n}-dimensional vector of responses used for calculating
|
||||
cve-estimate,}
|
||||
\item{method}{Name of used method,}
|
||||
\item{call}{the matched call,}
|
||||
\item{res}{list of components \code{V, L, B, loss, h} and \code{k} for
|
||||
each \eqn{k=min.dim,...,max.dim} (dimension).}
|
||||
\item{res}{list of components \code{V, L, B, loss, h} for
|
||||
each \code{k = min.dim, ..., max.dim}. If \code{k} was supplied in the
|
||||
call \code{min.dim = max.dim = k}.
|
||||
\itemize{
|
||||
\item \code{B} is the cve-estimate with dimension
|
||||
\eqn{p\times k}{p x k}.
|
||||
\item \code{V} is the orthogonal complement of \eqn{B}.
|
||||
\item \code{L} is the loss for each sample seperatels such that
|
||||
it's mean is \code{loss}.
|
||||
\item \code{loss} is the value of the target function that is
|
||||
minimized, evaluated at \eqn{V}.
|
||||
\item \code{h} bandwidth parameter used to calculate
|
||||
\code{B, V, loss, L}.
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
\description{
|
||||
|
@ -56,7 +76,7 @@ variance-covariance matrix \eqn{Var(X) = \Sigma_X}. \eqn{\epsilon} is a mean
|
|||
zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g}
|
||||
is an unknown, continuous non-constant function,
|
||||
and \eqn{B = (b_1, ..., b_k)} is
|
||||
a real \eqn{p \times k}{p x k} of rank \eqn{k <= p}{k \leq p}.
|
||||
a real \eqn{p \times k}{p x k} of rank \eqn{k \leq p}{k <= p}.
|
||||
Without loss of generality \eqn{B} is assumed to be orthonormal.
|
||||
}
|
||||
\examples{
|
||||
|
@ -67,7 +87,7 @@ k <- 2
|
|||
b1 <- rep(1 / sqrt(p), p)
|
||||
b2 <- (-1)^seq(1, p) / sqrt(p)
|
||||
B <- cbind(b1, b2)
|
||||
# samplsize
|
||||
# sample size
|
||||
n <- 200
|
||||
set.seed(21)
|
||||
# creat predictor data x ~ N(0, I_p)
|
|
@ -10,9 +10,9 @@ cve.call(X, Y, method = "simple", nObs = sqrt(nrow(X)), h = NULL,
|
|||
max.iter = 50L, attempts = 10L, logger = NULL)
|
||||
}
|
||||
\arguments{
|
||||
\item{X}{data matrix with samples in its rows.}
|
||||
\item{X}{Design matrix with dimension \eqn{n\times p}{n x p}.}
|
||||
|
||||
\item{Y}{Responses (1 dimensional).}
|
||||
\item{Y}{numeric array of length \eqn{n} of Responses.}
|
||||
|
||||
\item{method}{specifies the CVE method variation as one of
|
||||
\itemize{
|
||||
|
@ -34,38 +34,59 @@ estimated bandwidth.}
|
|||
\item{k}{Dimension of lower dimensional projection, if \code{k} is given
|
||||
only the specified dimension \code{B} matrix is estimated.}
|
||||
|
||||
\item{momentum}{number of [0, 1) giving the ration of momentum for eucledian
|
||||
gradient update with a momentum term.}
|
||||
\item{momentum}{number of \eqn{[0, 1)} giving the ration of momentum for
|
||||
eucledian gradient update with a momentum term. \code{momentum = 0}
|
||||
corresponds to normal gradient descend.}
|
||||
|
||||
\item{tau}{Initial step-size.}
|
||||
|
||||
\item{tol}{Tolerance for break condition.}
|
||||
|
||||
\item{slack}{Positive scaling to allow small increases of the loss while
|
||||
optimizing.}
|
||||
optimizing, i.e. \code{slack = 0.1} allows the target function to
|
||||
increase up to \eqn{10 \%} in one optimization step.}
|
||||
|
||||
\item{gamma}{step-size reduction multiple.}
|
||||
\item{gamma}{step-size reduction multiple. If gradient step with step size
|
||||
\code{tau} is not accepted \code{gamma * tau} is set to the next step
|
||||
size.}
|
||||
|
||||
\item{V.init}{Semi-orthogonal matrix of dimensions `(ncol(X), ncol(X) - k)
|
||||
as optimization starting value. (If supplied, \code{attempts} is
|
||||
set to 1 and \code{k} to match dimension)}
|
||||
used as starting value in the optimization. (If supplied,
|
||||
\code{attempts} is set to 0 and \code{k} to match dimension).}
|
||||
|
||||
\item{max.iter}{maximum number of optimization steps.}
|
||||
|
||||
\item{attempts}{number of arbitrary different starting points.}
|
||||
\item{attempts}{If \code{V.init} not supplied, the optimization is carried
|
||||
out \code{attempts} times with starting values drawn from the invariant
|
||||
measure on the Stiefel manifold (see \code{\link{rStiefel}}).}
|
||||
|
||||
\item{logger}{a logger function (only for advanced user, significantly slows
|
||||
down the computation).}
|
||||
\item{logger}{a logger function (only for advanced user, slows down the
|
||||
computation).}
|
||||
}
|
||||
\value{
|
||||
an S3 object of class \code{cve} with components:
|
||||
\describe{
|
||||
\item{X}{Original training data,}
|
||||
\item{Y}{Responce of original training data,}
|
||||
\item{X}{design matrix of predictor vector used for calculating
|
||||
cve-estimate,}
|
||||
\item{Y}{\eqn{n}-dimensional vector of responses used for calculating
|
||||
cve-estimate,}
|
||||
\item{method}{Name of used method,}
|
||||
\item{call}{the matched call,}
|
||||
\item{res}{list of components \code{V, L, B, loss, h} and \code{k} for
|
||||
each \eqn{k=min.dim,...,max.dim} (dimension).}
|
||||
\item{res}{list of components \code{V, L, B, loss, h} for
|
||||
each \code{k = min.dim, ..., max.dim}. If \code{k} was supplied in the
|
||||
call \code{min.dim = max.dim = k}.
|
||||
\itemize{
|
||||
\item \code{B} is the cve-estimate with dimension
|
||||
\eqn{p\times k}{p x k}.
|
||||
\item \code{V} is the orthogonal complement of \eqn{B}.
|
||||
\item \code{L} is the loss for each sample seperatels such that
|
||||
it's mean is \code{loss}.
|
||||
\item \code{loss} is the value of the target function that is
|
||||
minimized, evaluated at \eqn{V}.
|
||||
\item \code{h} bandwidth parameter used to calculate
|
||||
\code{B, V, loss, L}.
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
\description{
|
||||
|
@ -89,7 +110,7 @@ variance-covariance matrix \eqn{Var(X) = \Sigma_X}. \eqn{\epsilon} is a mean
|
|||
zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g}
|
||||
is an unknown, continuous non-constant function,
|
||||
and \eqn{B = (b_1, ..., b_k)} is
|
||||
a real \eqn{p \times k}{p x k} of rank \eqn{k <= p}{k \leq p}.
|
||||
a real \eqn{p \times k}{p x k} of rank \eqn{k \leq p}{k <= p}.
|
||||
Without loss of generality \eqn{B} is assumed to be orthonormal.
|
||||
}
|
||||
\examples{
|
|
@ -0,0 +1,127 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/datasets.R
|
||||
\name{dataset}
|
||||
\alias{dataset}
|
||||
\title{Generates test datasets.}
|
||||
\usage{
|
||||
dataset(name = "M1", n = NULL, p = 20, sd = 0.5, ...)
|
||||
}
|
||||
\arguments{
|
||||
\item{name}{One of \code{"M1"}, \code{"M2"}, \code{"M3"}, \code{"M4",}
|
||||
\code{"M5"}, \code{"M6"} or \code{"M7"}. Alternative just the dataset number
|
||||
1-7.}
|
||||
|
||||
\item{n}{number of samples.}
|
||||
|
||||
\item{p}{Dimension of random variable \eqn{X}.}
|
||||
|
||||
\item{sd}{standard diviation for error term \eqn{\epsilon}.}
|
||||
|
||||
\item{...}{Additional parameters only for "M2" (namely \code{pmix} and
|
||||
\code{lambda}), see: below.}
|
||||
}
|
||||
\value{
|
||||
List with elements
|
||||
\itemize{
|
||||
\item{X}{data, a \eqn{n\times p}{n x p} matrix.}
|
||||
\item{Y}{response.}
|
||||
\item{B}{the dim-reduction matrix}
|
||||
\item{name}{Name of the dataset (name parameter)}
|
||||
}
|
||||
}
|
||||
\description{
|
||||
Provides sample datasets M1-M7 used in the paper Conditional variance
|
||||
estimation for sufficient dimension reduction, Lukas Fertl, Efstathia Bura.
|
||||
The general model is given by:
|
||||
\deqn{Y = g(B'X) + \epsilon}
|
||||
}
|
||||
\section{M1}{
|
||||
|
||||
The predictors are distributed as
|
||||
\eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, \Sigma)} with
|
||||
\eqn{\Sigma_{i, j} = 0.5^{|i - j|}}{\Sigma_ij = 0.5^|i - j|} for
|
||||
\eqn{i, j = 1,..., p} for a subspace dimension of \eqn{k = 1} with a default
|
||||
of \eqn{n = 100} data points. \eqn{p = 20},
|
||||
\eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)}, and \eqn{Y} is
|
||||
given as \deqn{Y = cos(b_1'X) + \epsilon} where \eqn{\epsilon} is
|
||||
distributed as generalized normal distribution with location 0,
|
||||
shape-parameter 0.5, and the scale-parameter is chosen such that
|
||||
\eqn{Var(\epsilon) = 0.5}.
|
||||
}
|
||||
|
||||
\section{M2}{
|
||||
|
||||
The predictors are distributed as \eqn{X \sim Z 1_p \lambda + N_p(0, I_p)}{X ~ Z 1_p \lambda + N_p(0, I_p)}. with
|
||||
\eqn{Z \sim 2 Binom(p_{mix}) - 1\in\{-1, 1\}}{Z~2Binom(pmix)-1} where
|
||||
\eqn{1_p} is the \eqn{p}-dimensional vector of one's, for a subspace
|
||||
dimension of \eqn{k = 1} with a default of \eqn{n = 100} data points.
|
||||
\eqn{p = 20}, \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)},
|
||||
and \eqn{Y} is \deqn{Y = cos(b_1'X) + 0.5\epsilon} where \eqn{\epsilon} is
|
||||
standard normal.
|
||||
Defaults for \code{pmix} is 0.3 and \code{lambda} defaults to 1.
|
||||
}
|
||||
|
||||
\section{M3}{
|
||||
|
||||
The predictors are distributed as \eqn{X\sim N_p(0, I_p)}{X~N_p(0, I_p)}
|
||||
for a subspace
|
||||
dimension of \eqn{k = 1} with a default of \eqn{n = 100} data points.
|
||||
\eqn{p = 20}, \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)},
|
||||
and \eqn{Y} is
|
||||
\deqn{Y = 2 log(|b_1'X| + 2) + 0.5\epsilon} where \eqn{\epsilon} is
|
||||
standard normal.
|
||||
}
|
||||
|
||||
\section{M4}{
|
||||
|
||||
The predictors are distributed as \eqn{X\sim N_p(0,\Sigma)}{X~N_p(0,\Sigma)}
|
||||
with \eqn{\Sigma_{i, j} = 0.5^{|i - j|}}{\Sigma_ij = 0.5^|i - j|} for
|
||||
\eqn{i, j = 1,..., p} for a subspace dimension of \eqn{k = 2} with a default
|
||||
of \eqn{n = 100} data points. \eqn{p = 20},
|
||||
\eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)},
|
||||
\eqn{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / sqrt(6)}
|
||||
and \eqn{Y} is given as \deqn{Y = \frac{b_1'X}{0.5 + (1.5 + b_2'X)^2} + 0.5\epsilon}{Y = (b_1'X) / (0.5 + (1.5 + b_2'X)^2) + 0.5\epsilon}
|
||||
where \eqn{\epsilon} is standard normal.
|
||||
}
|
||||
|
||||
\section{M5}{
|
||||
|
||||
The predictors are distributed as \eqn{X\sim U([0,1]^p)}{X~U([0, 1]^p)}
|
||||
where \eqn{U([0, 1]^p)} is the uniform distribution with
|
||||
independent components on the \eqn{p}-dimensional hypercube for a subspace
|
||||
dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points.
|
||||
\eqn{p = 20},
|
||||
\eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)},
|
||||
\eqn{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / sqrt(6)}
|
||||
and \eqn{Y} is given as \deqn{Y = cos(\pi b_1'X)(b_2'X + 1)^2 + 0.5\epsilon}
|
||||
where \eqn{\epsilon} is standard normal.
|
||||
}
|
||||
|
||||
\section{M6}{
|
||||
|
||||
The predictors are distributed as \eqn{X\sim N_p(0, I_p)}{X~N_p(0, I_p)}
|
||||
for a subspace dimension of \eqn{k = 3} with a default of \eqn{n = 200} data
|
||||
point. \eqn{p = 20, b_1 = e_1, b_2 = e_2}, and \eqn{b_3 = e_p}, where
|
||||
\eqn{e_j} is the \eqn{j}-th unit vector in the \eqn{p}-dimensional space.
|
||||
\eqn{Y} is given as \deqn{Y = (b_1'X)^2+(b_2'X)^2+(b_3'X)^2+0.5\epsilon}
|
||||
where \eqn{\epsilon} is standard normal.
|
||||
}
|
||||
|
||||
\section{M7}{
|
||||
|
||||
The predictors are distributed as \eqn{X\sim t_3(I_p)}{X~t_3(I_p)} where
|
||||
\eqn{t_3(I_p)} is the standard multivariate t-distribution with 3 degrees of
|
||||
freedom, for a subspace dimension of \eqn{k = 4} with a default of
|
||||
\eqn{n = 200} data points.
|
||||
\eqn{p = 20, b_1 = e_1, b_2 = e_2, b_3 = e_3}, and \eqn{b_4 = e_p}, where
|
||||
\eqn{e_j} is the \eqn{j}-th unit vector in the \eqn{p}-dimensional space.
|
||||
\eqn{Y} is given as \deqn{Y = (b_1'X)(b_2'X)^2+(b_3'X)(b_4'X)+0.5\epsilon}
|
||||
where \eqn{\epsilon} is distributed as generalized normal distribution with
|
||||
location 0, shape-parameter 1, and the scale-parameter is chosen such that
|
||||
\eqn{Var(\epsilon) = 0.25}.
|
||||
}
|
||||
|
||||
\references{
|
||||
Fertl Lukas, Bura Efstathia. (2019), Conditional Variance
|
||||
Estimation for Sufficient Dimension Reduction. Working Paper.
|
||||
}
|
|
@ -8,12 +8,17 @@
|
|||
\method{directions}{cve}(dr, k)
|
||||
}
|
||||
\arguments{
|
||||
\item{dr}{Instance of 'cve' as returned by \code{cve}.}
|
||||
\item{dr}{Instance of \code{'cve'} as returned by \code{\link{cve}}.}
|
||||
|
||||
\item{k}{SDR dimension to use for projection.}
|
||||
}
|
||||
\value{
|
||||
the \eqn{n\times k}{n x k} dimensional matrix \eqn{X B} where \eqn{B}
|
||||
is the cve-estimate for dimension \eqn{k}.
|
||||
}
|
||||
\description{
|
||||
Computes projected training data \code{X} for given dimension `k`.
|
||||
Projects the dimensional design matrix \eqn{X} on the columnspace of the
|
||||
cve-estimate for given dimension \eqn{k}.
|
||||
}
|
||||
\examples{
|
||||
# create B for simulation (k = 1)
|
|
@ -4,26 +4,33 @@
|
|||
\alias{estimate.bandwidth}
|
||||
\title{Bandwidth estimation for CVE.}
|
||||
\usage{
|
||||
estimate.bandwidth(X, k, nObs)
|
||||
estimate.bandwidth(X, k, nObs, version = 1L)
|
||||
}
|
||||
\arguments{
|
||||
\item{X}{data matrix with samples in its rows.}
|
||||
\item{X}{a \eqn{n\times p}{n x p} matrix with samples in its rows.}
|
||||
|
||||
\item{k}{Dimension of lower dimensional projection.}
|
||||
|
||||
\item{nObs}{number of points in a slice, see \eqn{nObs} in CVE paper.}
|
||||
\item{nObs}{number of points in a slice, only for version 2.}
|
||||
|
||||
\item{version}{either \code{1} or \code{2}.}
|
||||
}
|
||||
\value{
|
||||
Estimated bandwidth \code{h}.
|
||||
}
|
||||
\description{
|
||||
Estimates a bandwidth \code{h} according
|
||||
If no bandwidth or function for calculating it is supplied, the CVE method
|
||||
defaults to using the following formula (version 1)
|
||||
\deqn{%
|
||||
h = (2 * tr(\Sigma) / p) * (1.2 * n^{-1 / (4 + k)})^2}{%
|
||||
h = (2 * tr(\Sigma) / p) * (1.2 * n^(\frac{-1}{4 + k}))^2}
|
||||
with \eqn{n} the sample size, \eqn{p} its dimension
|
||||
(\code{n <- nrow(X); p <- ncol(X)}) and the covariance-matrix \eqn{\Sigma}
|
||||
which is \code{(n-1)/n} times the sample covariance estimate.
|
||||
h = \frac{2 tr(\Sigma)}{p} (1.2 n^{\frac{-1}{4 + k}})^2}{%
|
||||
h = (2 * tr(\Sigma) / p) * (1.2 * n^(-1 / (4 + k)))^2}
|
||||
Alternative version 2 is used for dimension prediction which is given by
|
||||
\deqn{%
|
||||
h = (2 * tr(\Sigma) / p) * \chi_k^{-1}(\frac{nObs - 1}{n - 1})}{%
|
||||
h = (2 * tr(\Sigma) / p) * \chi_k^-1((nObs - 1) / (n - 1))}
|
||||
with \eqn{n} the sample size, \eqn{p} its dimension and the
|
||||
covariance-matrix \eqn{\Sigma}, which is \code{(n-1)/n} times the sample
|
||||
covariance estimate.
|
||||
}
|
||||
\examples{
|
||||
# set dimensions for simulation model
|
|
@ -13,7 +13,10 @@
|
|||
[\code{\link{lines}}]}
|
||||
}
|
||||
\description{
|
||||
Boxplots of the loss from \code{min.dim} to \code{max.dim} \code{k} values.
|
||||
Boxplots of the output \code{L} from \code{\link{cve}} over \code{k} from
|
||||
\code{min.dim} to \code{max.dim}. For given \code{k}, \code{L} corresponds
|
||||
to \eqn{L_n(V, X_i)} where \eqn{V \in S(p, p - k)}{V} is the minimizer of
|
||||
\eqn{L_n(V)}, for further details see the paper.
|
||||
}
|
||||
\examples{
|
||||
# create B for simulation
|
||||
|
@ -41,6 +44,10 @@ cve.obj.simple <- cve(Y ~ X, h = estimate.bandwidth, nObs = sqrt(nrow(X)))
|
|||
# elbow plot
|
||||
plot(cve.obj.simple)
|
||||
|
||||
}
|
||||
\references{
|
||||
Fertl Lukas, Bura Efstathia. (2019), Conditional Variance
|
||||
Estimation for Sufficient Dimension Reduction. Working Paper.
|
||||
}
|
||||
\seealso{
|
||||
see \code{\link{par}} for graphical parameters to pass through
|
|
@ -20,7 +20,8 @@
|
|||
prediced response of data \code{newdata}.
|
||||
}
|
||||
\description{
|
||||
Predict responces using reduced data with \code{\link{mars}}.
|
||||
Predict response using projected data where the forward model \eqn{g(B'X)}
|
||||
is estimated using \code{\link{mars}}.
|
||||
}
|
||||
\examples{
|
||||
# create B for simulation
|
||||
|
@ -50,5 +51,5 @@ yhat <- predict(cve.obj.simple, x.test, 1)
|
|||
plot(yhat, y.test)
|
||||
}
|
||||
\seealso{
|
||||
\code{\link{cve}}, \code{\link{cve.call}} or \pkg{\link{mars}}.
|
||||
\code{\link{cve}}, \code{\link{cve.call}} and \pkg{\link{mars}}.
|
||||
}
|
|
@ -2,26 +2,40 @@
|
|||
% Please edit documentation in R/predict_dim.R
|
||||
\name{predict_dim}
|
||||
\alias{predict_dim}
|
||||
\title{Predicts SDR dimension using \code{\link[mda]{mars}} via a Cross-Validation.}
|
||||
\title{\code{"TODO: @Lukas"}}
|
||||
\usage{
|
||||
predict_dim(object, ...)
|
||||
predict_dim(object, ..., method = "CV")
|
||||
}
|
||||
\arguments{
|
||||
\item{object}{instance of class \code{cve} (result of \code{cve},
|
||||
\code{cve.call}).}
|
||||
\item{object}{instance of class \code{cve} (result of \code{\link{cve}},
|
||||
\code{\link{cve.call}}).}
|
||||
|
||||
\item{...}{ignored.}
|
||||
|
||||
\item{method}{one of \code{"CV"}, \code{"elbow"} or \code{"wilcoxon"}.}
|
||||
}
|
||||
\value{
|
||||
list with
|
||||
\itemize{
|
||||
\item MSE: Mean Square Error,
|
||||
\item k: predicted dimensions.
|
||||
}
|
||||
list with \code{"k"} the predicted dimension and method dependent
|
||||
informatoin.
|
||||
}
|
||||
\description{
|
||||
Predicts SDR dimension using \code{\link[mda]{mars}} via a Cross-Validation.
|
||||
\code{"TODO: @Lukas"}
|
||||
}
|
||||
\section{Method cv}{
|
||||
|
||||
TODO: \code{"TODO: @Lukas"}.
|
||||
}
|
||||
|
||||
\section{Method elbow}{
|
||||
|
||||
TODO: \code{"TODO: @Lukas"}.
|
||||
}
|
||||
|
||||
\section{Method wilcoxon}{
|
||||
|
||||
TODO: \code{"TODO: @Lukas"}.
|
||||
}
|
||||
|
||||
\examples{
|
||||
# create B for simulation
|
||||
B <- rep(1, 5) / sqrt(5)
|
|
@ -2,7 +2,8 @@
|
|||
% Please edit documentation in R/util.R
|
||||
\name{rStiefel}
|
||||
\alias{rStiefel}
|
||||
\title{Draws a sample from the invariant measure on the Stiefel manifold \eqn{S(p, q)}.}
|
||||
\title{Draws a sample from the invariant measure on the Stiefel manifold
|
||||
\eqn{S(p, q)}.}
|
||||
\usage{
|
||||
rStiefel(p, q)
|
||||
}
|
||||
|
@ -12,10 +13,11 @@ rStiefel(p, q)
|
|||
\item{q}{col dimension}
|
||||
}
|
||||
\value{
|
||||
\code{p} times \code{q} semi-orthogonal matrix.
|
||||
\eqn{p \times q}{p x q} semi-orthogonal matrix.
|
||||
}
|
||||
\description{
|
||||
Draws a sample from the invariant measure on the Stiefel manifold \eqn{S(p, q)}.
|
||||
Draws a sample from the invariant measure on the Stiefel manifold
|
||||
\eqn{S(p, q)}.
|
||||
}
|
||||
\examples{
|
||||
V <- rStiefel(6, 4)
|
|
@ -0,0 +1,27 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/datasets.R
|
||||
\name{rgnorm}
|
||||
\alias{rgnorm}
|
||||
\title{Generalized Normal Distribution.}
|
||||
\usage{
|
||||
rgnorm(n = 1, mu = 0, alpha = 1, beta = 1)
|
||||
}
|
||||
\arguments{
|
||||
\item{n}{Number of generated samples.}
|
||||
|
||||
\item{mu}{mean.}
|
||||
|
||||
\item{alpha}{first shape parameter.}
|
||||
|
||||
\item{beta}{second shape parameter.}
|
||||
}
|
||||
\value{
|
||||
numeric array of length \eqn{n}.
|
||||
}
|
||||
\description{
|
||||
Random generation for generalized Normal Distribution.
|
||||
}
|
||||
\seealso{
|
||||
https://en.wikipedia.org/wiki/Generalized_normal_distribution
|
||||
}
|
||||
\keyword{internal}
|
|
@ -0,0 +1,25 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/datasets.R
|
||||
\name{rlaplace}
|
||||
\alias{rlaplace}
|
||||
\title{Laplace distribution}
|
||||
\usage{
|
||||
rlaplace(n = 1, mu = 0, sd = 1)
|
||||
}
|
||||
\arguments{
|
||||
\item{n}{Number of generated samples.}
|
||||
|
||||
\item{mu}{mean.}
|
||||
|
||||
\item{sd}{standard deviation.}
|
||||
}
|
||||
\value{
|
||||
numeric array of length \eqn{n}.
|
||||
}
|
||||
\description{
|
||||
Random generation for Laplace distribution.
|
||||
}
|
||||
\seealso{
|
||||
https://en.wikipedia.org/wiki/Laplace_distribution
|
||||
}
|
||||
\keyword{internal}
|
|
@ -0,0 +1,29 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/datasets.R
|
||||
\name{rmvnorm}
|
||||
\alias{rmvnorm}
|
||||
\title{Multivariate Normal Distribution.}
|
||||
\usage{
|
||||
rmvnorm(n = 1, mu = rep(0, p), sigma = diag(p))
|
||||
}
|
||||
\arguments{
|
||||
\item{n}{number of samples.}
|
||||
|
||||
\item{mu}{mean}
|
||||
|
||||
\item{sigma}{covariance matrix.}
|
||||
}
|
||||
\value{
|
||||
a \eqn{n\times p}{n x p} matrix with samples in its rows.
|
||||
}
|
||||
\description{
|
||||
Random generation for the multivariate normal distribution.
|
||||
\deqn{X \sim N_p(\mu, \Sigma)}{X ~ N_p(\mu, \Sigma)}
|
||||
}
|
||||
\examples{
|
||||
\dontrun{
|
||||
rmvnorm(20, sigma = matrix(c(2, 1, 1, 2), 2))
|
||||
rmvnorm(20, mu = c(3, -1, 2))
|
||||
}
|
||||
}
|
||||
\keyword{internal}
|
|
@ -0,0 +1,34 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/datasets.R
|
||||
\name{rmvt}
|
||||
\alias{rmvt}
|
||||
\title{Multivariate t distribution.}
|
||||
\usage{
|
||||
rmvt(n = 1, mu = rep(0, p), sigma = diag(p), df = Inf)
|
||||
}
|
||||
\arguments{
|
||||
\item{n}{number of samples.}
|
||||
|
||||
\item{mu}{mean}
|
||||
|
||||
\item{sigma}{a \eqn{k\times k}{k x k} positive definite matrix. If the degree
|
||||
\eqn{\nu} if bigger than 2 the created covariance is
|
||||
\deqn{var(x) = \Sigma\frac{\nu}{\nu - 2}}
|
||||
for \eqn{\nu > 2}.}
|
||||
|
||||
\item{df}{degree of freedom \eqn{\nu}.}
|
||||
}
|
||||
\value{
|
||||
a \eqn{n\times p}{n x p} matrix with samples in its rows.
|
||||
}
|
||||
\description{
|
||||
Random generation from multivariate t distribution (student distribution).
|
||||
}
|
||||
\examples{
|
||||
\dontrun{
|
||||
rmvt(20, c(0, 1), matrix(c(3, 1, 1, 2), 2), 3)
|
||||
rmvt(20, sigma = matrix(c(2, 1, 1, 2), 2), 3)
|
||||
rmvt(20, mu = c(3, -1, 2), 3)
|
||||
}
|
||||
}
|
||||
\keyword{internal}
|
|
@ -7,12 +7,13 @@
|
|||
\method{summary}{cve}(object, ...)
|
||||
}
|
||||
\arguments{
|
||||
\item{object}{Instance of 'cve' as returned by \code{cve}.}
|
||||
\item{object}{Instance of \code{"cve"} as returned by \code{\link{cve}}.}
|
||||
|
||||
\item{...}{ignored.}
|
||||
}
|
||||
\description{
|
||||
Prints a summary of a \code{cve} result.
|
||||
Prints a summary statistics of output \code{L} from \code{cve} for
|
||||
\code{k = min.dim, ..., max.dim}.
|
||||
}
|
||||
\examples{
|
||||
# create B for simulation
|
|
@ -161,97 +161,3 @@ mat* adjacence(const mat *vec_L, const mat *vec_Y, const mat *vec_y1,
|
|||
|
||||
return mat_S;
|
||||
}
|
||||
|
||||
// int getWorkLen(const int n, const int p, const int q) {
|
||||
// int mpq; /**< Max of p and q */
|
||||
// int nn = ((n - 1) * n) / 2;
|
||||
|
||||
// if (p > q) {
|
||||
// mpq = p;
|
||||
// } else {
|
||||
// mpq = q;
|
||||
// }
|
||||
// if (nn * p < (mpq + 1) * mpq) {
|
||||
// return 2 * (mpq + 1) * mpq;
|
||||
// } else {
|
||||
// return (nn + mpq) * mpq;
|
||||
// }
|
||||
// }
|
||||
|
||||
// double cost(const unsigned int method,
|
||||
// const int n,
|
||||
// const double *Y,
|
||||
// const double *vecK,
|
||||
// const double *colSums,
|
||||
// double *y1, double *L) {
|
||||
// int i, j, k;
|
||||
// double tmp, sum;
|
||||
|
||||
// for (i = 0; i < n; ++i) {
|
||||
// y1[i] = Y[i];
|
||||
// L[i] = Y[i] * Y[i];
|
||||
// }
|
||||
|
||||
// for (k = j = 0; j < n; ++j) {
|
||||
// for (i = j + 1; i < n; ++i, ++k) {
|
||||
// y1[i] += Y[j] * vecK[k];
|
||||
// y1[j] += Y[i] * vecK[k];
|
||||
// L[i] += Y[j] * Y[j] * vecK[k];
|
||||
// L[j] += Y[i] * Y[i] * vecK[k];
|
||||
// }
|
||||
// }
|
||||
|
||||
// for (i = 0; i < n; ++i) {
|
||||
// y1[i] /= colSums[i];
|
||||
// L[i] /= colSums[i];
|
||||
// }
|
||||
|
||||
// tmp = 0.0;
|
||||
// if (method == CVE_METHOD_WEIGHTED) {
|
||||
// sum = 0.0;
|
||||
// for (i = 0; i < n; ++i) {
|
||||
// tmp += (colSums[i] - 1.0) * (L[i] -= y1[i] * y1[i]);
|
||||
// sum += colSums[i];
|
||||
// }
|
||||
// return tmp / (sum - (double)n); // TODO: check for division by zero!
|
||||
// } else {
|
||||
// for (i = 0; i < n; ++i) {
|
||||
// tmp += (L[i] -= y1[i] * y1[i]);
|
||||
// }
|
||||
// return tmp / (double)n;
|
||||
// }
|
||||
// }
|
||||
|
||||
// void scaling(const unsigned int method,
|
||||
// const int n,
|
||||
// const double *Y, const double *y1, const double *L,
|
||||
// const double *vecD, const double *vecK,
|
||||
// const double *colSums,
|
||||
// double *vecS) {
|
||||
// int i, j, k, nn = (n * (n - 1)) / 2;
|
||||
// double tmp;
|
||||
|
||||
// if (method == CVE_METHOD_WEIGHTED) {
|
||||
// for (k = j = 0; j < n; ++j) {
|
||||
// for (i = j + 1; i < n; ++i, ++k) {
|
||||
// tmp = Y[j] - y1[i];
|
||||
// vecS[k] = (L[i] - (tmp * tmp));
|
||||
// tmp = Y[i] - y1[j];
|
||||
// vecS[k] += (L[j] - (tmp * tmp));
|
||||
// }
|
||||
// }
|
||||
// } else {
|
||||
// for (k = j = 0; j < n; ++j) {
|
||||
// for (i = j + 1; i < n; ++i, ++k) {
|
||||
// tmp = Y[j] - y1[i];
|
||||
// vecS[k] = (L[i] - (tmp * tmp)) / colSums[i];
|
||||
// tmp = Y[i] - y1[j];
|
||||
// vecS[k] += (L[j] - (tmp * tmp)) / colSums[j];
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
// for (k = 0; k < nn; ++k) {
|
||||
// vecS[k] *= vecK[k] * vecD[k];
|
||||
// }
|
||||
// }
|
|
@ -17,8 +17,8 @@ static const R_CallMethodDef CallEntries[] = {
|
|||
{NULL, NULL, 0}
|
||||
};
|
||||
|
||||
/* Restrict C entrypoints to registered routines. */
|
||||
void R_initCVE(DllInfo *dll) {
|
||||
/* Restrict C entry points to registered routines. */
|
||||
void R_init_CVE(DllInfo *dll) {
|
||||
R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
|
||||
R_useDynamicSymbols(dll, FALSE);
|
||||
}
|
|
@ -914,7 +914,7 @@ mat* laplace(mat *A, double *workMem) {
|
|||
* \_____/ \_____/
|
||||
* IpA C = ImA B
|
||||
* \_______/
|
||||
* IpA C = Y ==> C = IpA^-1 Y
|
||||
* IpA C = Y ==> C = IpA^-1 Y
|
||||
*
|
||||
* @param A Skew-Symmetric matrix of dimension `(n, n)`.
|
||||
* @param B Matrix of dimensions `(n, m)` with `m <= n`.
|
|
@ -1,193 +0,0 @@
|
|||
#'
|
||||
#' @param n number of samples.
|
||||
#' @param mu mean
|
||||
#' @param sigma covariance matrix.
|
||||
#'
|
||||
#' @returns a \eqn{n\times p} matrix with samples in its rows.
|
||||
#'
|
||||
#' @examples
|
||||
#' rmvnorm(20, sigma = matrix(c(2, 1, 1, 2), 2))
|
||||
#' rmvnorm(20, mu = c(3, -1, 2))
|
||||
rmvnorm <- function(n = 1, mu = rep(0, p), sigma = diag(p)) {
|
||||
if (!missing(sigma)) {
|
||||
p <- nrow(sigma)
|
||||
} else if (!missing(mu)) {
|
||||
mu <- matrix(mu, ncol = 1)
|
||||
p <- nrow(mu)
|
||||
} else {
|
||||
stop("At least one of 'mu' or 'sigma' must be supplied.")
|
||||
}
|
||||
|
||||
# See: https://en.wikipedia.org/wiki/Multivariate_normal_distribution
|
||||
return(rep(mu, each = n) + matrix(rnorm(n * p), n) %*% chol(sigma))
|
||||
}
|
||||
|
||||
#' Samples from the multivariate t distribution (student distribution).
|
||||
#'
|
||||
#' @param n number of samples.
|
||||
#' @param mu mean, ... TODO:
|
||||
#' @param sigma a \eqn{k\times k} positive definite matrix. If the degree
|
||||
#' \eqn{\nu} if bigger than 2 the created covariance is
|
||||
#' \deqn{var(x) = \Sigma\frac{\nu}{\nu - 2}}
|
||||
#' for \eqn{\nu > 2}.
|
||||
#' @param df degree of freedom \eqn{\nu}.
|
||||
#'
|
||||
#' @returns a \eqn{n\times p} matrix with samples in its rows.
|
||||
#'
|
||||
#' @examples
|
||||
#' rmvt(20, c(0, 1), matrix(c(3, 1, 1, 2), 2), 3)
|
||||
#' rmvt(20, sigma = matrix(c(2, 1, 1, 2), 2), 3)
|
||||
#' rmvt(20, mu = c(3, -1, 2), 3)
|
||||
rmvt <- function(n = 1, mu = rep(0, p), sigma = diag(p), df = Inf) {
|
||||
if (!missing(sigma)) {
|
||||
p <- nrow(sigma)
|
||||
} else if (!missing(mu)) {
|
||||
mu <- matrix(mu, ncol = 1)
|
||||
p <- nrow(mu)
|
||||
} else {
|
||||
stop("At least one of 'mu' or 'sigma' must be supplied.")
|
||||
}
|
||||
|
||||
if (df == Inf) {
|
||||
Z <- 1
|
||||
} else {
|
||||
Z <- sqrt(df / rchisq(n, df))
|
||||
}
|
||||
|
||||
return(rmvnorm(n, sigma = sigma) * Z + rep(mu, each = n))
|
||||
}
|
||||
|
||||
|
||||
#' Generalized Normal Distribution.
|
||||
#' see: https://en.wikipedia.org/wiki/Generalized_normal_distribution
|
||||
rgnorm <- function(n = 1, mu = 0, alpha = 1, beta = 1) {
|
||||
if (alpha <= 0 | beta <= 0) {
|
||||
stop("alpha and beta must be positive.")
|
||||
}
|
||||
lambda <- (1 / alpha)^beta
|
||||
scales <- qgamma(runif(n), shape = 1 / beta, scale = 1 / lambda)^(1 / beta)
|
||||
return(scales * ((-1)^rbinom(n, 1, 0.5)) + mu)
|
||||
}
|
||||
|
||||
#' Laplace distribution
|
||||
#' see: https://en.wikipedia.org/wiki/Laplace_distribution
|
||||
rlaplace <- function(n = 1, mu = 0, sigma = 1) {
|
||||
U <- runif(n, -0.5, 0.5)
|
||||
scale <- sigma / sqrt(2)
|
||||
|
||||
return(mu - scale * sign(U) * log(1 - 2 * abs(U)))
|
||||
}
|
||||
|
||||
#' Generates test datasets.
|
||||
#'
|
||||
#' Provides sample datasets. There are 5 different datasets named
|
||||
#' M1, M2, M3, M4 and M5 described in the paper references below.
|
||||
#' The general model is given by:
|
||||
#' \deqn{Y = g(B'X) + \epsilon}
|
||||
#'
|
||||
#' @param name One of \code{"M1"}, \code{"M2"}, \code{"M3"}, \code{"M4"} or \code{"M5"}
|
||||
#' @param n nr samples
|
||||
#' @param B SDR basis used for dataset creation if supplied.
|
||||
#' @param p Dim. of random variable \code{X}.
|
||||
#' @param p.mix Only for \code{"M4"}, see: below.
|
||||
#' @param lambda Only for \code{"M4"}, see: below.
|
||||
#'
|
||||
#' @return List with elements
|
||||
#' \itemize{
|
||||
#' \item{X}{data}
|
||||
#' \item{Y}{response}
|
||||
#' \item{B}{Used dim-reduction matrix}
|
||||
#' \item{name}{Name of the dataset (name parameter)}
|
||||
#' }
|
||||
#'
|
||||
#' @section M1:
|
||||
#' The data follows \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} for a subspace
|
||||
#' dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points.
|
||||
#' The link function \eqn{g} is given as
|
||||
#' \deqn{g(x) = \frac{x_1}{0.5 + (x_2 + 1.5)^2} + \epsilon / 2}{%
|
||||
#' g(x) = x_1 / (0.5 + (x_2 + 1.5)^2) + epsilon / 2}
|
||||
#' @section M2:
|
||||
#' \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} with \eqn{k = 2} with a
|
||||
#' default of \eqn{n = 200} data points.
|
||||
#' The link function \eqn{g} is given as
|
||||
#' \deqn{g(x) = (b_1^T X) (b_2^T X)^2 + \epsilon / 2}
|
||||
#' @section M3:
|
||||
#' \deqn{g(x) = cos(b_1^T X) + \epsilon / 2}
|
||||
#' @section M4:
|
||||
#' TODO:
|
||||
#' @section M5:
|
||||
#' TODO:
|
||||
#'
|
||||
#' @import stats
|
||||
#' @importFrom stats rnorm rbinom
|
||||
#' @export
|
||||
dataset <- function(name = "M1", n = NULL, p = 20, sigma = 0.5, ...) {
|
||||
name <- toupper(name)
|
||||
if (nchar(name) == 1) { name <- paste0("M", name) }
|
||||
|
||||
if (name == "M1") {
|
||||
if (missing(n)) { n <- 100 }
|
||||
# B ... `p x 1`
|
||||
B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, p - 6)), ncol = 1)
|
||||
X <- rmvnorm(n, sigma = sigma^abs(outer(1:p, 1:p, FUN = `-`)))
|
||||
beta <- 0.5
|
||||
Y <- cos(X %*% B) + rgnorm(n, 0,
|
||||
alpha = sqrt(0.25 * gamma(1 / beta) / gamma(3 / beta)),
|
||||
beta = beta
|
||||
)
|
||||
} else if (name == "M2") {
|
||||
if (missing(n)) { n <- 100 }
|
||||
prob <- 0.3
|
||||
lambda <- 1 # dispersion
|
||||
# B ... `p x 1`
|
||||
B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, p - 6)), ncol = 1)
|
||||
Z <- 2 * rbinom(n, 1, prob) - 1
|
||||
X <- matrix(rep(lambda * Z, p) + rnorm(n * p), n)
|
||||
Y <- cos(X %*% B) + rnorm(n, 0, sigma)
|
||||
} else if (name == "M3") {
|
||||
if (missing(n)) { n <- 200 }
|
||||
# B ... `p x 1`
|
||||
B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, p - 6)), ncol = 1)
|
||||
X <- matrix(rnorm(n * p), n)
|
||||
Y <- 1.5 * log(2 + abs(X %*% B)) + rnorm(n, 0, sigma^2)
|
||||
} else if (name == "M4") {
|
||||
if (missing(n)) { n <- 200 }
|
||||
# B ... `p x 2`
|
||||
B <- cbind(
|
||||
c(rep(1 / sqrt(6), 6), rep(0, p - 6)),
|
||||
c(rep(c(1, -1), 3) / sqrt(6), rep(0, p - 6))
|
||||
)
|
||||
X <- rmvnorm(n, sigma = sigma^abs(outer(1:p, 1:p, FUN = `-`)))
|
||||
XB <- X %*% B
|
||||
Y <- (XB[, 1]) / (0.5 + (XB[, 2] + 1.5)^2) + rnorm(n, 0, sigma^2)
|
||||
} else if (name == "M5") {
|
||||
if (missing(n)) { n <- 200 }
|
||||
# B ... `p x 2`
|
||||
B <- cbind(
|
||||
c(rep(1, 6), rep(0, p - 6)),
|
||||
c(rep(c(1, -1), 3), rep(0, p - 6))
|
||||
) / sqrt(6)
|
||||
X <- matrix(runif(n * p), n)
|
||||
XB <- X %*% B
|
||||
Y <- cos(XB[, 1] * pi) * (XB[, 2] + 1)^2 + rnorm(n, 0, sigma^2)
|
||||
} else if (name == "M6") {
|
||||
if (missing(n)) { n <- 200 }
|
||||
# B ... `p x 3`
|
||||
B <- diag(p)[, -(3:(p - 1))]
|
||||
X <- matrix(rnorm(n * p), n)
|
||||
Y <- rowSums((X %*% B)^2) + rnorm(n, 0, sigma^2)
|
||||
} else if (name == "M7") {
|
||||
if (missing(n)) { n <- 400 }
|
||||
# B ... `p x 4`
|
||||
B <- diag(p)[, -(4:(p - 1))]
|
||||
# "R"andom "M"ulti"V"ariate "S"tudent
|
||||
X <- rmvt(n = n, sigma = diag(p), df = 3)
|
||||
XB <- X %*% B
|
||||
Y <- (XB[, 1]) * (XB[, 2])^2 + (XB[, 3]) * (XB[, 4])
|
||||
Y <- Y + rlaplace(n, 0, sigma)
|
||||
} else {
|
||||
stop("Got unknown dataset name.")
|
||||
}
|
||||
|
||||
return(list(X = X, Y = Y, B = B, name = name))
|
||||
}
|
|
@ -1,82 +0,0 @@
|
|||
#' Draws a sample from the invariant measure on the Stiefel manifold \eqn{S(p, q)}.
|
||||
#'
|
||||
#' @param p row dimension
|
||||
#' @param q col dimension
|
||||
#' @return \code{p} times \code{q} semi-orthogonal matrix.
|
||||
#' @examples
|
||||
#' V <- rStiefel(6, 4)
|
||||
#' @export
|
||||
rStiefel <- function(p, q) {
|
||||
return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q))))
|
||||
}
|
||||
|
||||
#' Retraction to the manifold.
|
||||
#'
|
||||
#' @param A matrix.
|
||||
#' @return `(p, q)` semi-orthogonal matrix, aka element of the Stiefel manifold.
|
||||
#' @keywords internal
|
||||
#' @export
|
||||
retractStiefel <- function(A) {
|
||||
return(qr.Q(qr(A)))
|
||||
}
|
||||
|
||||
#' Skew-Symmetric matrix computed from `A` as
|
||||
#' \eqn{1/2 (A - A^T)}.
|
||||
#' @param A Matrix of dim `(p, q)`
|
||||
#' @return Skew-Symmetric matrix of dim `(p, p)`.
|
||||
#' @keywords internal
|
||||
#' @export
|
||||
skew <- function(A) {
|
||||
0.5 * (A - t(A))
|
||||
}
|
||||
|
||||
#' Symmetric matrix computed from `A` as
|
||||
#' \eqn{1/2 (A + A^T)}.
|
||||
#' @param A Matrix of dim `(p, q)`
|
||||
#' @return Symmetric matrix of dim `(p, p)`.
|
||||
#' @keywords internal
|
||||
#' @export
|
||||
sym <- function(A) {
|
||||
0.5 * (A + t(A))
|
||||
}
|
||||
|
||||
#' Orthogonal Projection onto the tangent space of the stiefel manifold.
|
||||
#'
|
||||
#' @param V Point on the stiefel manifold.
|
||||
#' @param G matrix to be projected onto the tangent space at `V`.
|
||||
#' @return `(p, q)` matrix as element of the tangent space at `V`.
|
||||
#' @keywords internal
|
||||
#' @export
|
||||
projTangentStiefel <- function(V, G) {
|
||||
Q <- diag(1, nrow(V)) - V %*% t(V)
|
||||
return(Q %*% G + V %*% skew(t(V) %*% G))
|
||||
}
|
||||
|
||||
#' Null space basis of given matrix `V`
|
||||
#'
|
||||
#' @param V `(p, q)` matrix
|
||||
#' @return Semi-orthogonal `(p, p - q)` matrix spaning the null space of `V`.
|
||||
#' @keywords internal
|
||||
#' @export
|
||||
null <- function(V) {
|
||||
tmp <- qr(V)
|
||||
set <- if(tmp$rank == 0L) seq_len(ncol(V)) else -seq_len(tmp$rank)
|
||||
return(qr.Q(tmp, complete = TRUE)[, set, drop = FALSE])
|
||||
}
|
||||
|
||||
#' Creates a (numeric) matrix where each column contains
|
||||
#' an element to element matching.
|
||||
#' @param elements numeric vector of elements to match
|
||||
#' @return matrix of size `(2, n * (n - 1) / 2)` for a argument of lenght `n`.
|
||||
#' @keywords internal
|
||||
#' @examples
|
||||
#' elem.pairs(seq.int(2, 5))
|
||||
#' @export
|
||||
elem.pairs <- function(elements) {
|
||||
# Number of elements to match.
|
||||
n <- length(elements)
|
||||
# Create all combinations.
|
||||
pairs <- rbind(rep(elements, each=n), rep(elements, n))
|
||||
# Select unique combinations without self interaction.
|
||||
return(pairs[, pairs[1, ] < pairs[2, ]])
|
||||
}
|
|
@ -1,68 +0,0 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/datasets.R
|
||||
\name{dataset}
|
||||
\alias{dataset}
|
||||
\title{Generates test datasets.}
|
||||
\usage{
|
||||
dataset(name = "M1", n, B, p.mix = 0.3, lambda = 1)
|
||||
}
|
||||
\arguments{
|
||||
\item{name}{One of \code{"M1"}, \code{"M2"}, \code{"M3"}, \code{"M4"} or \code{"M5"}}
|
||||
|
||||
\item{n}{nr samples}
|
||||
|
||||
\item{B}{SDR basis used for dataset creation if supplied.}
|
||||
|
||||
\item{p.mix}{Only for \code{"M4"}, see: below.}
|
||||
|
||||
\item{lambda}{Only for \code{"M4"}, see: below.}
|
||||
|
||||
\item{p}{Dim. of random variable \code{X}.}
|
||||
}
|
||||
\value{
|
||||
List with elements
|
||||
\itemize{
|
||||
\item{X}{data}
|
||||
\item{Y}{response}
|
||||
\item{B}{Used dim-reduction matrix}
|
||||
\item{name}{Name of the dataset (name parameter)}
|
||||
}
|
||||
}
|
||||
\description{
|
||||
Provides sample datasets. There are 5 different datasets named
|
||||
M1, M2, M3, M4 and M5 described in the paper references below.
|
||||
The general model is given by:
|
||||
\deqn{Y = g(B'X) + \epsilon}
|
||||
}
|
||||
\section{M1}{
|
||||
|
||||
The data follows \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} for a subspace
|
||||
dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points.
|
||||
The link function \eqn{g} is given as
|
||||
\deqn{g(x) = \frac{x_1}{0.5 + (x_2 + 1.5)^2} + \epsilon / 2}{%
|
||||
g(x) = x_1 / (0.5 + (x_2 + 1.5)^2) + epsilon / 2}
|
||||
}
|
||||
|
||||
\section{M2}{
|
||||
|
||||
\eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} with \eqn{k = 2} with a
|
||||
default of \eqn{n = 200} data points.
|
||||
The link function \eqn{g} is given as
|
||||
\deqn{g(x) = (b_1^T X) (b_2^T X)^2 + \epsilon / 2}
|
||||
}
|
||||
|
||||
\section{M3}{
|
||||
|
||||
\deqn{g(x) = cos(b_1^T X) + \epsilon / 2}
|
||||
}
|
||||
|
||||
\section{M4}{
|
||||
|
||||
TODO:
|
||||
}
|
||||
|
||||
\section{M5}{
|
||||
|
||||
TODO:
|
||||
}
|
||||
|
|
@ -1,23 +0,0 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/util.R
|
||||
\name{elem.pairs}
|
||||
\alias{elem.pairs}
|
||||
\title{Creates a (numeric) matrix where each column contains
|
||||
an element to element matching.}
|
||||
\usage{
|
||||
elem.pairs(elements)
|
||||
}
|
||||
\arguments{
|
||||
\item{elements}{numeric vector of elements to match}
|
||||
}
|
||||
\value{
|
||||
matrix of size `(2, n * (n - 1) / 2)` for a argument of lenght `n`.
|
||||
}
|
||||
\description{
|
||||
Creates a (numeric) matrix where each column contains
|
||||
an element to element matching.
|
||||
}
|
||||
\examples{
|
||||
elem.pairs(seq.int(2, 5))
|
||||
}
|
||||
\keyword{internal}
|
|
@ -1,20 +0,0 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/util.R
|
||||
\name{projTangentStiefel}
|
||||
\alias{projTangentStiefel}
|
||||
\title{Orthogonal Projection onto the tangent space of the stiefel manifold.}
|
||||
\usage{
|
||||
projTangentStiefel(V, G)
|
||||
}
|
||||
\arguments{
|
||||
\item{V}{Point on the stiefel manifold.}
|
||||
|
||||
\item{G}{matrix to be projected onto the tangent space at `V`.}
|
||||
}
|
||||
\value{
|
||||
`(p, q)` matrix as element of the tangent space at `V`.
|
||||
}
|
||||
\description{
|
||||
Orthogonal Projection onto the tangent space of the stiefel manifold.
|
||||
}
|
||||
\keyword{internal}
|
|
@ -1,18 +0,0 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/util.R
|
||||
\name{retractStiefel}
|
||||
\alias{retractStiefel}
|
||||
\title{Retraction to the manifold.}
|
||||
\usage{
|
||||
retractStiefel(A)
|
||||
}
|
||||
\arguments{
|
||||
\item{A}{matrix.}
|
||||
}
|
||||
\value{
|
||||
`(p, q)` semi-orthogonal matrix, aka element of the Stiefel manifold.
|
||||
}
|
||||
\description{
|
||||
Retraction to the manifold.
|
||||
}
|
||||
\keyword{internal}
|
|
@ -1,20 +0,0 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/util.R
|
||||
\name{skew}
|
||||
\alias{skew}
|
||||
\title{Skew-Symmetric matrix computed from `A` as
|
||||
\eqn{1/2 (A - A^T)}.}
|
||||
\usage{
|
||||
skew(A)
|
||||
}
|
||||
\arguments{
|
||||
\item{A}{Matrix of dim `(p, q)`}
|
||||
}
|
||||
\value{
|
||||
Skew-Symmetric matrix of dim `(p, p)`.
|
||||
}
|
||||
\description{
|
||||
Skew-Symmetric matrix computed from `A` as
|
||||
\eqn{1/2 (A - A^T)}.
|
||||
}
|
||||
\keyword{internal}
|
|
@ -1,20 +0,0 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/util.R
|
||||
\name{sym}
|
||||
\alias{sym}
|
||||
\title{Symmetric matrix computed from `A` as
|
||||
\eqn{1/2 (A + A^T)}.}
|
||||
\usage{
|
||||
sym(A)
|
||||
}
|
||||
\arguments{
|
||||
\item{A}{Matrix of dim `(p, q)`}
|
||||
}
|
||||
\value{
|
||||
Symmetric matrix of dim `(p, p)`.
|
||||
}
|
||||
\description{
|
||||
Symmetric matrix computed from `A` as
|
||||
\eqn{1/2 (A + A^T)}.
|
||||
}
|
||||
\keyword{internal}
|
|
@ -1,11 +0,0 @@
|
|||
Package: CVEpureR
|
||||
Type: Package
|
||||
Title: Conditional Variance Estimator for Sufficient Dimension Reduction
|
||||
Version: 0.1
|
||||
Date: 2019-08-29
|
||||
Author: Loki
|
||||
Maintainer: Loki <loki@no.mail>
|
||||
Description: Implementation of the Conditional Variance Estimation (CVE) method. This package version is writen in pure R.
|
||||
License: GPL-3
|
||||
Encoding: UTF-8
|
||||
RoxygenNote: 6.1.1
|
|
@ -1,23 +0,0 @@
|
|||
# Generated by roxygen2: do not edit by hand
|
||||
|
||||
S3method(plot,cve)
|
||||
S3method(summary,cve)
|
||||
export(cve)
|
||||
export(cve.call)
|
||||
export(cve.grid.search)
|
||||
export(cve_linesearch)
|
||||
export(cve_sgd)
|
||||
export(cve_simple)
|
||||
export(dataset)
|
||||
export(elem.pairs)
|
||||
export(estimate.bandwidth)
|
||||
export(grad)
|
||||
export(null)
|
||||
export(rStiefl)
|
||||
import(stats)
|
||||
importFrom(graphics,lines)
|
||||
importFrom(graphics,plot)
|
||||
importFrom(graphics,points)
|
||||
importFrom(stats,model.frame)
|
||||
importFrom(stats,rbinom)
|
||||
importFrom(stats,rnorm)
|
265
CVE_R/R/CVE.R
265
CVE_R/R/CVE.R
|
@ -1,265 +0,0 @@
|
|||
#' Conditional Variance Estimator (CVE)
|
||||
#'
|
||||
#' Conditional Variance Estimator for Sufficient Dimension
|
||||
#' Reduction
|
||||
#'
|
||||
#' TODO: And some details
|
||||
#'
|
||||
#'
|
||||
#' @references Fertl Likas, Bura Efstathia. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019
|
||||
#'
|
||||
#' @docType package
|
||||
#' @author Loki
|
||||
#' @useDynLib CVE, .registration = TRUE
|
||||
"_PACKAGE"
|
||||
|
||||
#' Implementation of the CVE method.
|
||||
#'
|
||||
#' Conditional Variance Estimator (CVE) is a novel sufficient dimension
|
||||
#' reduction (SDR) method assuming a model
|
||||
#' \deqn{Y \sim g(B'X) + \epsilon}{Y ~ g(B'X) + epsilon}
|
||||
#' where B'X is a lower dimensional projection of the predictors.
|
||||
#'
|
||||
#' @param formula Formel for the regression model defining `X`, `Y`.
|
||||
#' See: \code{\link{formula}}.
|
||||
#' @param data data.frame holding data for formula.
|
||||
#' @param method The different only differe in the used optimization.
|
||||
#' All of them are Gradient based optimization on a Stiefel manifold.
|
||||
#' \itemize{
|
||||
#' \item "simple" Simple reduction of stepsize.
|
||||
#' \item "sgd" stocastic gradient decent.
|
||||
#' \item TODO: further
|
||||
#' }
|
||||
#' @param ... Further parameters depending on the used method.
|
||||
#' @examples
|
||||
#' library(CVE)
|
||||
#'
|
||||
#' # sample dataset
|
||||
#' ds <- dataset("M5")
|
||||
#'
|
||||
#' # call ´cve´ with default method (aka "simple")
|
||||
#' dr.simple <- cve(ds$Y ~ ds$X, k = ncol(ds$B))
|
||||
#' # plot optimization history (loss via iteration)
|
||||
#' plot(dr.simple, main = "CVE M5 simple")
|
||||
#'
|
||||
#' # call ´cve´ with method "linesearch" using ´data.frame´ as data.
|
||||
#' data <- data.frame(Y = ds$Y, X = ds$X)
|
||||
#' # Note: ´Y, X´ are NOT defined, they are extracted from ´data´.
|
||||
#' dr.linesearch <- cve(Y ~ ., data, method = "linesearch", k = ncol(ds$B))
|
||||
#' plot(dr.linesearch, main = "CVE M5 linesearch")
|
||||
#'
|
||||
#' @references Fertl L., Bura E. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019
|
||||
#'
|
||||
#' @seealso \code{\link{formula}}. For a complete parameters list (dependent on
|
||||
#' the method) see \code{\link{cve_simple}}, \code{\link{cve_sgd}}
|
||||
#' @import stats
|
||||
#' @importFrom stats model.frame
|
||||
#' @export
|
||||
cve <- function(formula, data, method = "simple", max.dim = 10L, ...) {
|
||||
# check for type of `data` if supplied and set default
|
||||
if (missing(data)) {
|
||||
data <- environment(formula)
|
||||
} else if (!is.data.frame(data)) {
|
||||
stop("Parameter 'data' must be a 'data.frame' or missing.")
|
||||
}
|
||||
|
||||
# extract `X`, `Y` from `formula` with `data`
|
||||
model <- stats::model.frame(formula, data)
|
||||
X <- as.matrix(model[ ,-1L, drop = FALSE])
|
||||
Y <- as.double(model[ , 1L])
|
||||
|
||||
# pass extracted data on to [cve.call()]
|
||||
dr <- cve.call(X, Y, method = method, max.dim = max.dim, ...)
|
||||
|
||||
# overwrite `call` property from [cve.call()]
|
||||
dr$call <- match.call()
|
||||
return(dr)
|
||||
}
|
||||
|
||||
#' @param nObs as described in the Paper.
|
||||
#' @param X Data
|
||||
#' @param Y Responces
|
||||
#' @param nObs Like in the paper.
|
||||
#' @param k guess for SDR dimension.
|
||||
#' @param ... Method specific parameters.
|
||||
#' @rdname cve
|
||||
#' @export
|
||||
cve.call <- function(X, Y, method = "simple",
|
||||
nObs = sqrt(nrow(X)), h = NULL,
|
||||
min.dim = 1L, max.dim = 10L, k = NULL,
|
||||
tau = 1.0, tol = 1e-3,
|
||||
epochs = 50L, attempts = 10L,
|
||||
logger = NULL) {
|
||||
|
||||
# parameter checking
|
||||
if (!(is.matrix(X) && is.numeric(X))) {
|
||||
stop("Parameter 'X' should be a numeric matrices.")
|
||||
}
|
||||
if (!is.numeric(Y)) {
|
||||
stop("Parameter 'Y' must be numeric.")
|
||||
}
|
||||
if (is.matrix(Y) || !is.double(Y)) {
|
||||
Y <- as.double(Y)
|
||||
}
|
||||
if (nrow(X) != length(Y)) {
|
||||
stop("Rows of 'X' and 'Y' elements are not compatible.")
|
||||
}
|
||||
if (ncol(X) < 2) {
|
||||
stop("'X' is one dimensional, no need for dimension reduction.")
|
||||
}
|
||||
|
||||
if (missing(k) || is.null(k)) {
|
||||
min.dim <- as.integer(min.dim)
|
||||
max.dim <- as.integer(min(max.dim, ncol(X) - 1L))
|
||||
} else {
|
||||
min.dim <- as.integer(k)
|
||||
max.dim <- as.integer(k)
|
||||
}
|
||||
if (min.dim > max.dim) {
|
||||
stop("'min.dim' bigger 'max.dim'.")
|
||||
}
|
||||
if (max.dim >= ncol(X)) {
|
||||
stop("'max.dim' (or 'k') must be smaller than 'ncol(X)'.")
|
||||
}
|
||||
|
||||
if (is.function(h)) {
|
||||
estimate.bandwidth <- h
|
||||
h <- NULL
|
||||
}
|
||||
|
||||
if (!is.numeric(tau) || length(tau) > 1L || tau <= 0.0) {
|
||||
stop("Initial step-width 'tau' must be positive number.")
|
||||
} else {
|
||||
tau <- as.double(tau)
|
||||
}
|
||||
if (!is.numeric(tol) || length(tol) > 1L || tol < 0.0) {
|
||||
stop("Break condition tolerance 'tol' must be not negative number.")
|
||||
} else {
|
||||
tol <- as.double(tol)
|
||||
}
|
||||
|
||||
if (!is.numeric(epochs) || length(epochs) > 1L) {
|
||||
stop("Parameter 'epochs' must be positive integer.")
|
||||
} else if (!is.integer(epochs)) {
|
||||
epochs <- as.integer(epochs)
|
||||
}
|
||||
if (epochs < 1L) {
|
||||
stop("Parameter 'epochs' must be at least 1L.")
|
||||
}
|
||||
if (!is.numeric(attempts) || length(attempts) > 1L) {
|
||||
stop("Parameter 'attempts' must be positive integer.")
|
||||
} else if (!is.integer(attempts)) {
|
||||
attempts <- as.integer(attempts)
|
||||
}
|
||||
if (attempts < 1L) {
|
||||
stop("Parameter 'attempts' must be at least 1L.")
|
||||
}
|
||||
|
||||
if (is.function(logger)) {
|
||||
loggerEnv <- environment(logger)
|
||||
} else {
|
||||
loggerEnv <- NULL
|
||||
}
|
||||
|
||||
# Call specified method.
|
||||
method <- tolower(method)
|
||||
call <- match.call()
|
||||
dr <- list()
|
||||
for (k in min.dim:max.dim) {
|
||||
|
||||
if (missing(h) || is.null(h)) {
|
||||
h <- estimate.bandwidth(X, k, nObs)
|
||||
} else if (is.numeric(h) && h > 0.0) {
|
||||
h <- as.double(h)
|
||||
} else {
|
||||
stop("Bandwidth 'h' must be positive numeric.")
|
||||
}
|
||||
|
||||
if (method == 'simple') {
|
||||
dr.k <- .Call('cve_simple', PACKAGE = 'CVE',
|
||||
X, Y, k, h,
|
||||
tau, tol,
|
||||
epochs, attempts,
|
||||
logger, loggerEnv)
|
||||
# dr.k <- cve_simple(X, Y, k, nObs = nObs, ...)
|
||||
# } else if (method == 'linesearch') {
|
||||
# dr.k <- cve_linesearch(X, Y, k, nObs = nObs, ...)
|
||||
# } else if (method == 'rcg') {
|
||||
# dr.k <- cve_rcg(X, Y, k, nObs = nObs, ...)
|
||||
# } else if (method == 'momentum') {
|
||||
# dr.k <- cve_momentum(X, Y, k, nObs = nObs, ...)
|
||||
# } else if (method == 'rmsprob') {
|
||||
# dr.k <- cve_rmsprob(X, Y, k, nObs = nObs, ...)
|
||||
# } else if (method == 'sgdrmsprob') {
|
||||
# dr.k <- cve_sgdrmsprob(X, Y, k, nObs = nObs, ...)
|
||||
# } else if (method == 'sgd') {
|
||||
# dr.k <- cve_sgd(X, Y, k, nObs = nObs, ...)
|
||||
} else {
|
||||
stop('Got unknown method.')
|
||||
}
|
||||
dr.k$B <- null(dr.k$V)
|
||||
dr.k$loss <- mean(dr.k$L)
|
||||
dr.k$h <- h
|
||||
dr.k$k <- k
|
||||
class(dr.k) <- "cve.k"
|
||||
dr[[k]] <- dr.k
|
||||
}
|
||||
|
||||
# augment result information
|
||||
dr$method <- method
|
||||
dr$call <- call
|
||||
class(dr) <- "cve"
|
||||
return(dr)
|
||||
}
|
||||
|
||||
#' Ploting helper for objects of class \code{cve}.
|
||||
#'
|
||||
#' @param x Object of class \code{cve} (result of [cve()]).
|
||||
#' @param content Specifies what to plot:
|
||||
#' \itemize{
|
||||
#' \item "history" Plots the loss history from stiefel optimization
|
||||
#' (default).
|
||||
#' \item ... TODO: add (if there are any)
|
||||
#' }
|
||||
#' @param ... Pass through parameters to [plot()] and [lines()]
|
||||
#'
|
||||
#' @usage ## S3 method for class 'cve'
|
||||
#' plot(x, content = "history", ...)
|
||||
#' @seealso see \code{\link{par}} for graphical parameters to pass through
|
||||
#' as well as \code{\link{plot}} for standard plot utility.
|
||||
#' @importFrom graphics plot lines points
|
||||
#' @method plot cve
|
||||
#' @export
|
||||
plot.cve <- function(x, ...) {
|
||||
L <- c()
|
||||
k <- c()
|
||||
for (dr.k in x) {
|
||||
if (class(dr.k) == 'cve.k') {
|
||||
k <- c(k, paste0(dr.k$k))
|
||||
L <- c(L, dr.k$L)
|
||||
}
|
||||
}
|
||||
L <- matrix(L, ncol = length(k))
|
||||
boxplot(L, main = "Loss ...",
|
||||
xlab = "SDR dimension k",
|
||||
ylab = expression(L(V, X[i])),
|
||||
names = k)
|
||||
}
|
||||
|
||||
#' Prints a summary of a \code{cve} result.
|
||||
#' @param object Instance of 'cve' as return of \code{cve}.
|
||||
#' @method summary cve
|
||||
#' @export
|
||||
summary.cve <- function(object, ...) {
|
||||
cat('Summary of CVE result - Method: "', object$method, '"\n',
|
||||
'\n',
|
||||
'Dataset size: ', nrow(object$X), '\n',
|
||||
'Data Dimension: ', ncol(object$X), '\n',
|
||||
'SDR Dimension: ', object$k, '\n',
|
||||
'loss: ', object$loss, '\n',
|
||||
'\n',
|
||||
'Called via:\n',
|
||||
' ',
|
||||
sep='')
|
||||
print(object$call)
|
||||
}
|
|
@ -1,169 +0,0 @@
|
|||
#' Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe
|
||||
#' conditions.
|
||||
#'
|
||||
#' @keywords internal
|
||||
#' @export
|
||||
cve_linesearch <- function(X, Y, k,
|
||||
nObs = sqrt(nrow(X)),
|
||||
h = NULL,
|
||||
tau = 1.0,
|
||||
tol = 1e-3,
|
||||
rho1 = 0.1,
|
||||
rho2 = 0.9,
|
||||
slack = 0,
|
||||
epochs = 50L,
|
||||
attempts = 10L,
|
||||
max.linesearch.iter = 10L,
|
||||
logger = NULL
|
||||
) {
|
||||
# Set `grad` functions environment to enable if to find this environments
|
||||
# local variabels, needed to enable the manipulation of this local variables
|
||||
# from within `grad`.
|
||||
environment(grad) <- environment()
|
||||
|
||||
# Get dimensions.
|
||||
n <- nrow(X)
|
||||
p <- ncol(X)
|
||||
q <- p - k
|
||||
|
||||
# Save initial learning rate `tau`.
|
||||
tau.init <- tau
|
||||
# Addapt tolearance for break condition.
|
||||
tol <- sqrt(2 * q) * tol
|
||||
|
||||
# Estaimate bandwidth if not given.
|
||||
if (missing(h) | !is.numeric(h)) {
|
||||
h <- estimate.bandwidth(X, k, nObs)
|
||||
}
|
||||
|
||||
# Compute persistent data.
|
||||
# Compute lookup indexes for symmetrie, lower/upper
|
||||
# triangular parts and vectorization.
|
||||
pair.index <- elem.pairs(seq(n))
|
||||
i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
|
||||
j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
|
||||
# Matrix of vectorized indices. (vec(index) -> seq)
|
||||
index <- matrix(seq(n * n), n, n)
|
||||
lower <- index[lower.tri(index)]
|
||||
upper <- t(index)[lower]
|
||||
|
||||
# Create all pairewise differences of rows of `X`.
|
||||
X_diff <- X[i, , drop = F] - X[j, , drop = F]
|
||||
# Identity matrix.
|
||||
I_p <- diag(1, p)
|
||||
|
||||
# Init tracking of current best (according multiple attempts).
|
||||
V.best <- NULL
|
||||
loss.best <- Inf
|
||||
|
||||
# Start loop for multiple attempts.
|
||||
for (attempt in 1:attempts) {
|
||||
|
||||
# Sample a `(p, q)` dimensional matrix from the stiefel manifold as
|
||||
# optimization start value.
|
||||
V <- rStiefl(p, q)
|
||||
|
||||
# Initial loss and gradient.
|
||||
loss <- Inf
|
||||
G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE)
|
||||
# Set last loss (aka, loss after applying the step).
|
||||
loss.last <- loss
|
||||
|
||||
# Call logger with initial values before starting optimization.
|
||||
if (is.function(logger)) {
|
||||
epoch <- 0 # Set epoch count to 0 (only relevant for logging).
|
||||
error <- NA
|
||||
logger(environment())
|
||||
}
|
||||
|
||||
## Start optimization loop.
|
||||
for (epoch in 1:epochs) {
|
||||
|
||||
# Cayley transform matrix `A`
|
||||
A <- (G %*% t(V)) - (V %*% t(G))
|
||||
|
||||
# Directional derivative of the loss at current position, given
|
||||
# as `Tr(G^T \cdot A \cdot V)`.
|
||||
loss.prime <- -0.5 * norm(A, type = 'F')^2
|
||||
|
||||
# Linesearch
|
||||
tau.upper <- Inf
|
||||
tau.lower <- 0
|
||||
tau <- tau.init
|
||||
for (iter in 1:max.linesearch.iter) {
|
||||
# Apply learning rate `tau`.
|
||||
A.tau <- (tau / 2) * A
|
||||
# Parallet transport (on Stiefl manifold) into direction of `G`.
|
||||
inv <- solve(I_p + A.tau)
|
||||
V.tau <- inv %*% ((I_p - A.tau) %*% V)
|
||||
|
||||
# Loss at position after a step.
|
||||
loss <- Inf # aka loss.tau
|
||||
G.tau <- grad(X, Y, V.tau, h, loss.out = TRUE, persistent = TRUE)
|
||||
|
||||
# Armijo condition.
|
||||
if (loss > loss.last + (rho1 * tau * loss.prime)) {
|
||||
tau.upper <- tau
|
||||
tau <- (tau.lower + tau.upper) / 2
|
||||
next()
|
||||
}
|
||||
|
||||
V.prime.tau <- -0.5 * inv %*% A %*% (V + V.tau)
|
||||
loss.prime.tau <- sum(G * V.prime.tau) # Tr(grad(tau)^T \cdot Y^'(tau))
|
||||
|
||||
# Wolfe condition.
|
||||
if (loss.prime.tau < rho2 * loss.prime) {
|
||||
tau.lower <- tau
|
||||
if (tau.upper == Inf) {
|
||||
tau <- 2 * tau.lower
|
||||
} else {
|
||||
tau <- (tau.lower + tau.upper) / 2
|
||||
}
|
||||
} else {
|
||||
break()
|
||||
}
|
||||
}
|
||||
|
||||
# Compute error.
|
||||
error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
|
||||
|
||||
# Check break condition (epoch check to skip ignored gradient calc).
|
||||
# Note: the devision by `sqrt(2 * k)` is included in `tol`.
|
||||
if (error < tol | epoch >= epochs) {
|
||||
# take last step and stop optimization.
|
||||
V <- V.tau
|
||||
# Final call to the logger before stopping optimization
|
||||
if (is.function(logger)) {
|
||||
G <- G.tau
|
||||
logger(environment())
|
||||
}
|
||||
break()
|
||||
}
|
||||
|
||||
# Perform the step and remember previous loss.
|
||||
V <- V.tau
|
||||
loss.last <- loss
|
||||
G <- G.tau
|
||||
|
||||
# Log after taking current step.
|
||||
if (is.function(logger)) {
|
||||
logger(environment())
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
# Check if current attempt improved previous ones
|
||||
if (loss < loss.best) {
|
||||
loss.best <- loss
|
||||
V.best <- V
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return(list(
|
||||
loss = loss.best,
|
||||
V = V.best,
|
||||
B = null(V.best),
|
||||
h = h
|
||||
))
|
||||
}
|
|
@ -1,139 +0,0 @@
|
|||
#' Implementation of the CVE method as a Riemann Conjugated Gradient method.
|
||||
#'
|
||||
#' @references A Riemannian Conjugate Gradient Algorithm with Implicit Vector
|
||||
#' Transport for Optimization on the Stiefel Manifold
|
||||
#' @keywords internal
|
||||
#' @export
|
||||
cve_momentum <- function(X, Y, k,
|
||||
nObs = sqrt(nrow(X)),
|
||||
h = NULL,
|
||||
tau = 1.0,
|
||||
tol = 1e-4,
|
||||
rho = 0.1, # Momentum update.
|
||||
slack = 0,
|
||||
epochs = 50L,
|
||||
attempts = 10L,
|
||||
logger = NULL
|
||||
) {
|
||||
# Set `grad` functions environment to enable if to find this environments
|
||||
# local variabels, needed to enable the manipulation of this local variables
|
||||
# from within `grad`.
|
||||
environment(grad) <- environment()
|
||||
|
||||
# Get dimensions.
|
||||
n <- nrow(X) # Number of samples.
|
||||
p <- ncol(X) # Data dimensions
|
||||
q <- p - k # Complement dimension of the SDR space.
|
||||
|
||||
# Save initial learning rate `tau`.
|
||||
tau.init <- tau
|
||||
# Addapt tolearance for break condition.
|
||||
tol <- sqrt(2 * q) * tol
|
||||
|
||||
# Estaimate bandwidth if not given.
|
||||
if (missing(h) || !is.numeric(h)) {
|
||||
h <- estimate.bandwidth(X, k, nObs)
|
||||
}
|
||||
|
||||
# Compute persistent data.
|
||||
# Compute lookup indexes for symmetrie, lower/upper
|
||||
# triangular parts and vectorization.
|
||||
pair.index <- elem.pairs(seq(n))
|
||||
i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
|
||||
j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
|
||||
# Index of vectorized matrix, for lower and upper triangular part.
|
||||
lower <- ((i - 1) * n) + j
|
||||
upper <- ((j - 1) * n) + i
|
||||
|
||||
# Create all pairewise differences of rows of `X`.
|
||||
X_diff <- X[i, , drop = F] - X[j, , drop = F]
|
||||
# Identity matrix.
|
||||
I_p <- diag(1, p)
|
||||
|
||||
# Init tracking of current best (according multiple attempts).
|
||||
V.best <- NULL
|
||||
loss.best <- Inf
|
||||
|
||||
# Start loop for multiple attempts.
|
||||
for (attempt in 1:attempts) {
|
||||
# Reset learning rate `tau`.
|
||||
tau <- tau.init
|
||||
|
||||
# Sample a `(p, q)` dimensional matrix from the stiefel manifold as
|
||||
# optimization start value.
|
||||
V <- rStiefl(p, q)
|
||||
|
||||
# Initial loss and gradient.
|
||||
loss <- Inf
|
||||
G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE)
|
||||
# Set last loss (aka, loss after applying the step).
|
||||
loss.last <- loss
|
||||
|
||||
# Call logger with initial values before starting optimization.
|
||||
if (is.function(logger)) {
|
||||
epoch <- 0 # Set epoch count to 0 (only relevant for logging).
|
||||
error <- NA
|
||||
logger(environment())
|
||||
}
|
||||
|
||||
M <- matrix(0, p, q)
|
||||
## Start optimization loop.
|
||||
for (epoch in 1:epochs) {
|
||||
# Apply learning rate `tau`.
|
||||
A <- projTangentStiefl(V, G)
|
||||
# Momentum update.
|
||||
M <- A + rho * projTangentStiefl(V, M)
|
||||
# Parallet transport (on Stiefl manifold) into direction of `G`.
|
||||
V.tau <- retractStiefl(V - tau * M)
|
||||
|
||||
# Loss at position after a step.
|
||||
loss <- grad(X, Y, V.tau, h, loss.only = TRUE, persistent = TRUE)
|
||||
|
||||
# Check if step is appropriate, iff not reduce learning rate.
|
||||
if ((loss - loss.last) > slack * loss.last) {
|
||||
tau <- tau / 2
|
||||
next() # Keep position and try with smaller `tau`.
|
||||
}
|
||||
|
||||
# Compute error.
|
||||
error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
|
||||
|
||||
# Check break condition (epoch check to skip ignored gradient calc).
|
||||
# Note: the devision by `sqrt(2 * k)` is included in `tol`.
|
||||
if (error < tol || epoch >= epochs) {
|
||||
# take last step and stop optimization.
|
||||
V <- V.tau
|
||||
# Call logger last time befor stoping.
|
||||
if (is.function(logger)) {
|
||||
logger(environment())
|
||||
}
|
||||
break()
|
||||
}
|
||||
|
||||
# Perform the step and remember previous loss.
|
||||
V <- V.tau
|
||||
loss.last <- loss
|
||||
|
||||
# Call logger after taking a step.
|
||||
if (is.function(logger)) {
|
||||
logger(environment())
|
||||
}
|
||||
|
||||
# Compute gradient at new position.
|
||||
G <- grad(X, Y, V, h, persistent = TRUE)
|
||||
}
|
||||
|
||||
# Check if current attempt improved previous ones
|
||||
if (loss < loss.best) {
|
||||
loss.best <- loss
|
||||
V.best <- V
|
||||
}
|
||||
}
|
||||
|
||||
return(list(
|
||||
loss = loss.best,
|
||||
V = V.best,
|
||||
B = null(V.best),
|
||||
h = h
|
||||
))
|
||||
}
|
|
@ -1,179 +0,0 @@
|
|||
#' Implementation of the CVE method as a Riemann Conjugated Gradient method.
|
||||
#'
|
||||
#' @references A Riemannian Conjugate Gradient Algorithm with Implicit Vector
|
||||
#' Transport for Optimization on the Stiefel Manifold
|
||||
#' @keywords internal
|
||||
#' @export
|
||||
cve_rcg <- function(X, Y, k,
|
||||
nObs = sqrt(nrow(X)),
|
||||
h = NULL,
|
||||
tau = 1.0,
|
||||
tol = 1e-4,
|
||||
rho = 1e-4, # For Armijo condition.
|
||||
slack = 0,
|
||||
epochs = 50L,
|
||||
attempts = 10L,
|
||||
max.linesearch.iter = 20L,
|
||||
logger = NULL
|
||||
) {
|
||||
# Set `grad` functions environment to enable if to find this environments
|
||||
# local variabels, needed to enable the manipulation of this local variables
|
||||
# from within `grad`.
|
||||
environment(grad) <- environment()
|
||||
|
||||
# Get dimensions.
|
||||
n <- nrow(X) # Number of samples.
|
||||
p <- ncol(X) # Data dimensions
|
||||
q <- p - k # Complement dimension of the SDR space.
|
||||
|
||||
# Save initial learning rate `tau`.
|
||||
tau.init <- tau
|
||||
# Addapt tolearance for break condition.
|
||||
tol <- sqrt(2 * q) * tol
|
||||
|
||||
# Estaimate bandwidth if not given.
|
||||
if (missing(h) || !is.numeric(h)) {
|
||||
h <- estimate.bandwidth(X, k, nObs)
|
||||
}
|
||||
|
||||
# Compute persistent data.
|
||||
# Compute lookup indexes for symmetrie, lower/upper
|
||||
# triangular parts and vectorization.
|
||||
pair.index <- elem.pairs(seq(n))
|
||||
i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
|
||||
j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
|
||||
# Index of vectorized matrix, for lower and upper triangular part.
|
||||
lower <- ((i - 1) * n) + j
|
||||
upper <- ((j - 1) * n) + i
|
||||
|
||||
# Create all pairewise differences of rows of `X`.
|
||||
X_diff <- X[i, , drop = F] - X[j, , drop = F]
|
||||
# Identity matrix.
|
||||
I_p <- diag(1, p)
|
||||
|
||||
# Init tracking of current best (according multiple attempts).
|
||||
V.best <- NULL
|
||||
loss.best <- Inf
|
||||
|
||||
# Start loop for multiple attempts.
|
||||
for (attempt in 1:attempts) {
|
||||
# Reset learning rate `tau`.
|
||||
tau <- tau.init
|
||||
|
||||
# Sample a `(p, q)` dimensional matrix from the stiefel manifold as
|
||||
# optimization start value.
|
||||
V <- rStiefl(p, q)
|
||||
|
||||
# Initial loss and gradient.
|
||||
loss <- Inf
|
||||
G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE)
|
||||
# Set last loss (aka, loss after applying the step).
|
||||
loss.last <- loss
|
||||
|
||||
# Cayley transform matrix `A`
|
||||
A <- (G %*% t(V)) - (V %*% t(G))
|
||||
A.last <- A
|
||||
|
||||
W <- -A
|
||||
Z <- W %*% V
|
||||
|
||||
# Compute directional derivative.
|
||||
loss.prime <- sum(G * Z) # Tr(G^T Z)
|
||||
|
||||
# Call logger with initial values before starting optimization.
|
||||
if (is.function(logger)) {
|
||||
epoch <- 0 # Set epoch count to 0 (only relevant for logging).
|
||||
error <- NA
|
||||
logger(environment())
|
||||
}
|
||||
|
||||
## Start optimization loop.
|
||||
for (epoch in 1:epochs) {
|
||||
# New directional derivative.
|
||||
loss.prime <- sum(G * Z)
|
||||
|
||||
# Reset `tau` for step-size selection.
|
||||
tau <- tau.init
|
||||
for (iter in 1:max.linesearch.iter) {
|
||||
V.tau <- retractStiefl(V + tau * Z)
|
||||
# Loss at position after a step.
|
||||
loss <- grad(X, Y, V.tau, h,
|
||||
loss.only = TRUE, persistent = TRUE)
|
||||
# Check Armijo condition.
|
||||
if (loss <= loss.last + (rho * tau * loss.prime)) {
|
||||
break() # Iff fulfilled stop linesearch.
|
||||
}
|
||||
# Reduce step-size and continue linesearch.
|
||||
tau <- tau / 2
|
||||
}
|
||||
|
||||
# Compute error.
|
||||
error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
|
||||
|
||||
# Perform step with found step-size
|
||||
V <- V.tau
|
||||
loss.last <- loss
|
||||
|
||||
# Call logger.
|
||||
if (is.function(logger)) {
|
||||
logger(environment())
|
||||
}
|
||||
|
||||
# Check break condition.
|
||||
# Note: the devision by `sqrt(2 * k)` is included in `tol`.
|
||||
if (error < tol) {
|
||||
break()
|
||||
}
|
||||
|
||||
# Compute Gradient at new position.
|
||||
G <- grad(X, Y, V, h, persistent = TRUE)
|
||||
# Store last `A` for `beta` computation.
|
||||
A.last <- A
|
||||
# Cayley transform matrix `A`
|
||||
A <- (G %*% t(V)) - (V %*% t(G))
|
||||
|
||||
# Check 2. break condition.
|
||||
if (norm(A, type = 'F') < tol) {
|
||||
break()
|
||||
}
|
||||
|
||||
# New directional derivative.
|
||||
loss.prime <- sum(G * Z)
|
||||
|
||||
# Reset beta if needed.
|
||||
if (loss.prime < 0) {
|
||||
# Compute `beta` as described in paper.
|
||||
beta.FR <- (norm(A, type = 'F') / norm(A.last, type = 'F'))^2
|
||||
beta.PR <- sum(A * (A - A.last)) / norm(A.last, type = 'F')^2
|
||||
if (beta.PR < -beta.FR) {
|
||||
beta <- -beta.FR
|
||||
} else if (abs(beta.PR) < beta.FR) {
|
||||
beta <- beta.PR
|
||||
} else if (beta.PR > beta.FR) {
|
||||
beta <- beta.FR
|
||||
} else {
|
||||
beta <- 0
|
||||
}
|
||||
} else {
|
||||
beta <- 0
|
||||
}
|
||||
|
||||
# Update direction.
|
||||
W <- -A + beta * W
|
||||
Z <- W %*% V
|
||||
}
|
||||
|
||||
# Check if current attempt improved previous ones
|
||||
if (loss < loss.best) {
|
||||
loss.best <- loss
|
||||
V.best <- V
|
||||
}
|
||||
}
|
||||
|
||||
return(list(
|
||||
loss = loss.best,
|
||||
V = V.best,
|
||||
B = null(V.best),
|
||||
h = h
|
||||
))
|
||||
}
|
|
@ -1,121 +0,0 @@
|
|||
#' Implementation of the CVE method as a Riemann Conjugated Gradient method.
|
||||
#'
|
||||
#' @references A Riemannian Conjugate Gradient Algorithm with Implicit Vector
|
||||
#' Transport for Optimization on the Stiefel Manifold
|
||||
#' @keywords internal
|
||||
#' @export
|
||||
cve_rmsprob <- function(X, Y, k,
|
||||
nObs = sqrt(nrow(X)),
|
||||
h = NULL,
|
||||
tau = 0.1,
|
||||
tol = 1e-4,
|
||||
rho = 0.1, # Momentum update.
|
||||
slack = 0,
|
||||
epochs = 50L,
|
||||
attempts = 10L,
|
||||
epsilon = 1e-7,
|
||||
max.linesearch.iter = 20L,
|
||||
logger = NULL
|
||||
) {
|
||||
# Set `grad` functions environment to enable if to find this environments
|
||||
# local variabels, needed to enable the manipulation of this local variables
|
||||
# from within `grad`.
|
||||
environment(grad) <- environment()
|
||||
|
||||
# Get dimensions.
|
||||
n <- nrow(X) # Number of samples.
|
||||
p <- ncol(X) # Data dimensions
|
||||
q <- p - k # Complement dimension of the SDR space.
|
||||
|
||||
# Save initial learning rate `tau`.
|
||||
tau.init <- tau
|
||||
# Addapt tolearance for break condition.
|
||||
tol <- sqrt(2 * q) * tol
|
||||
|
||||
# Estaimate bandwidth if not given.
|
||||
if (missing(h) || !is.numeric(h)) {
|
||||
h <- estimate.bandwidth(X, k, nObs)
|
||||
}
|
||||
|
||||
# Compute persistent data.
|
||||
# Compute lookup indexes for symmetrie, lower/upper
|
||||
# triangular parts and vectorization.
|
||||
pair.index <- elem.pairs(seq(n))
|
||||
i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
|
||||
j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
|
||||
# Index of vectorized matrix, for lower and upper triangular part.
|
||||
lower <- ((i - 1) * n) + j
|
||||
upper <- ((j - 1) * n) + i
|
||||
|
||||
# Create all pairewise differences of rows of `X`.
|
||||
X_diff <- X[i, , drop = F] - X[j, , drop = F]
|
||||
# Identity matrix.
|
||||
I_p <- diag(1, p)
|
||||
|
||||
# Init tracking of current best (according multiple attempts).
|
||||
V.best <- NULL
|
||||
loss.best <- Inf
|
||||
|
||||
# Start loop for multiple attempts.
|
||||
for (attempt in 1:attempts) {
|
||||
# Sample a `(p, q)` dimensional matrix from the stiefel manifold as
|
||||
# optimization start value.
|
||||
V <- rStiefl(p, q)
|
||||
|
||||
# Call logger with initial values before starting optimization.
|
||||
if (is.function(logger)) {
|
||||
loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
|
||||
epoch <- 0 # Set epoch count to 0 (only relevant for logging).
|
||||
error <- NA
|
||||
logger(environment())
|
||||
}
|
||||
|
||||
M <- matrix(0, p, q)
|
||||
## Start optimization loop.
|
||||
for (epoch in 1:epochs) {
|
||||
# Compute gradient and loss at current position.
|
||||
loss <- Inf
|
||||
G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE)
|
||||
# Projectd Gradient.
|
||||
A <- projTangentStiefl(V, G)
|
||||
# Projected element squared gradient.
|
||||
Asq <- projTangentStiefl(V, G * G)
|
||||
# Momentum update.
|
||||
M <- (1 - rho) * Asq + rho * projTangentStiefl(V, M)
|
||||
# Parallet transport (on Stiefl manifold) into direction of `G`.
|
||||
V.tau <- retractStiefl(V - tau.init * A / (sqrt(abs(M)) + epsilon))
|
||||
|
||||
# Compute error.
|
||||
error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
|
||||
|
||||
# Perform step.
|
||||
V <- V.tau
|
||||
|
||||
# Call logger after taking a step.
|
||||
if (is.function(logger)) {
|
||||
# Set tau to an step size estimate (only for logging)
|
||||
tau <- tau.init / mean(sqrt(abs(M)) + epsilon)
|
||||
logger(environment())
|
||||
}
|
||||
|
||||
# Check break condition.
|
||||
# Note: the devision by `sqrt(2 * k)` is included in `tol`.
|
||||
if (error < tol) {
|
||||
break()
|
||||
}
|
||||
}
|
||||
|
||||
# Check if current attempt improved previous ones
|
||||
if (loss < loss.best) {
|
||||
loss.best <- loss
|
||||
V.best <- V
|
||||
}
|
||||
}
|
||||
|
||||
return(list(
|
||||
loss = loss.best,
|
||||
V = V.best,
|
||||
B = null(V.best),
|
||||
h = h
|
||||
))
|
||||
}
|
|
@ -1,129 +0,0 @@
|
|||
#' Simple implementation of the CVE method. 'Simple' means that this method is
|
||||
#' a classic GD method unsing no further tricks.
|
||||
#'
|
||||
#' @keywords internal
|
||||
#' @export
|
||||
cve_sgd <- function(X, Y, k,
|
||||
nObs = sqrt(nrow(X)),
|
||||
h = NULL,
|
||||
tau = 0.01,
|
||||
tol = 1e-3,
|
||||
epochs = 50L,
|
||||
batch.size = 16L,
|
||||
attempts = 10L,
|
||||
logger = NULL
|
||||
) {
|
||||
# Set `grad` functions environment to enable if to find this environments
|
||||
# local variabels, needed to enable the manipulation of this local variables
|
||||
# from within `grad`.
|
||||
environment(grad) <- environment()
|
||||
|
||||
# Get dimensions.
|
||||
n <- nrow(X) # Number of samples.
|
||||
p <- ncol(X) # Data dimensions
|
||||
q <- p - k # Complement dimension of the SDR space.
|
||||
|
||||
# Save initial learning rate `tau`.
|
||||
tau.init <- tau
|
||||
# Addapt tolearance for break condition.
|
||||
tol <- sqrt(2 * q) * tol
|
||||
|
||||
# Estaimate bandwidth if not given.
|
||||
if (missing(h) || !is.numeric(h)) {
|
||||
h <- estimate.bandwidth(X, k, nObs)
|
||||
}
|
||||
|
||||
# Compute persistent data.
|
||||
# Compute lookup indexes for symmetrie, lower/upper
|
||||
# triangular parts and vectorization.
|
||||
pair.index <- elem.pairs(seq(n))
|
||||
i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
|
||||
j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
|
||||
# Index of vectorized matrix, for lower and upper triangular part.
|
||||
lower <- ((i - 1) * n) + j
|
||||
upper <- ((j - 1) * n) + i
|
||||
|
||||
# Create all pairewise differences of rows of `X`.
|
||||
X_diff <- X[i, , drop = F] - X[j, , drop = F]
|
||||
# Identity matrix.
|
||||
I_p <- diag(1, p)
|
||||
# Init a list of data indices (shuffled for batching).
|
||||
indices <- seq(n)
|
||||
|
||||
# Init tracking of current best (according multiple attempts).
|
||||
V.best <- NULL
|
||||
loss.best <- Inf
|
||||
|
||||
# Start loop for multiple attempts.
|
||||
for (attempt in 1:attempts) {
|
||||
# Reset learning rate `tau`.
|
||||
tau <- tau.init
|
||||
|
||||
# Sample a `(p, q)` dimensional matrix from the stiefel manifold as
|
||||
# optimization start value.
|
||||
V <- rStiefl(p, q)
|
||||
# Keep track of last `V` for computing error after an epoch.
|
||||
V.last <- V
|
||||
|
||||
if (is.function(logger)) {
|
||||
loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
|
||||
error <- NA
|
||||
epoch <- 0
|
||||
logger(environment())
|
||||
}
|
||||
|
||||
# Repeat `epochs` times
|
||||
for (epoch in 1:epochs) {
|
||||
# Shuffle batches
|
||||
batch.shuffle <- sample(indices)
|
||||
|
||||
# Make a step for each batch.
|
||||
for (batch.start in seq(1, n, batch.size)) {
|
||||
# Select batch data indices.
|
||||
batch.end <- min(batch.start + batch.size - 1, length(batch.shuffle))
|
||||
batch <- batch.shuffle[batch.start:batch.end]
|
||||
|
||||
# Compute batch gradient.
|
||||
loss <- NULL
|
||||
G <- grad(X[batch, ], Y[batch], V, h, loss.out = TRUE)
|
||||
|
||||
# Cayley transform matrix.
|
||||
A <- (G %*% t(V)) - (V %*% t(G))
|
||||
|
||||
# Apply learning rate `tau`.
|
||||
A.tau <- tau * A
|
||||
# Parallet transport (on Stiefl manifold) into direction of `G`.
|
||||
V <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V)
|
||||
}
|
||||
# And the error for the history.
|
||||
error <- norm(V.last %*% t(V.last) - V %*% t(V), type = "F")
|
||||
V.last <- V
|
||||
|
||||
if (is.function(logger)) {
|
||||
# Compute loss at end of epoch for logging.
|
||||
loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
|
||||
logger(environment())
|
||||
}
|
||||
|
||||
# Check break condition.
|
||||
if (error < tol) {
|
||||
break()
|
||||
}
|
||||
}
|
||||
# Compute actual loss after finishing for comparing multiple attempts.
|
||||
loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
|
||||
|
||||
# After each attempt, check if last attempt reached a better result.
|
||||
if (loss < loss.best) {
|
||||
loss.best <- loss
|
||||
V.best <- V
|
||||
}
|
||||
}
|
||||
|
||||
return(list(
|
||||
loss = loss.best,
|
||||
V = V.best,
|
||||
B = null(V.best),
|
||||
h = h
|
||||
))
|
||||
}
|
|
@ -1,133 +0,0 @@
|
|||
#' Simple implementation of the CVE method. 'Simple' means that this method is
|
||||
#' a classic GD method unsing no further tricks.
|
||||
#'
|
||||
#' @keywords internal
|
||||
#' @export
|
||||
cve_sgdrmsprob <- function(X, Y, k,
|
||||
nObs = sqrt(nrow(X)),
|
||||
h = NULL,
|
||||
tau = 0.1,
|
||||
tol = 1e-4,
|
||||
rho = 0.1,
|
||||
epochs = 50L,
|
||||
batch.size = 16L,
|
||||
attempts = 10L,
|
||||
epsilon = 1e-7,
|
||||
logger = NULL
|
||||
) {
|
||||
# Set `grad` functions environment to enable if to find this environments
|
||||
# local variabels, needed to enable the manipulation of this local variables
|
||||
# from within `grad`.
|
||||
environment(grad) <- environment()
|
||||
|
||||
# Get dimensions.
|
||||
n <- nrow(X) # Number of samples.
|
||||
p <- ncol(X) # Data dimensions
|
||||
q <- p - k # Complement dimension of the SDR space.
|
||||
|
||||
# Save initial learning rate `tau`.
|
||||
tau.init <- tau
|
||||
# Addapt tolearance for break condition.
|
||||
tol <- sqrt(2 * q) * tol
|
||||
|
||||
# Estaimate bandwidth if not given.
|
||||
if (missing(h) || !is.numeric(h)) {
|
||||
h <- estimate.bandwidth(X, k, nObs)
|
||||
}
|
||||
|
||||
# Compute persistent data.
|
||||
# Compute lookup indexes for symmetrie, lower/upper
|
||||
# triangular parts and vectorization.
|
||||
pair.index <- elem.pairs(seq(n))
|
||||
i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
|
||||
j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
|
||||
# Index of vectorized matrix, for lower and upper triangular part.
|
||||
lower <- ((i - 1) * n) + j
|
||||
upper <- ((j - 1) * n) + i
|
||||
|
||||
# Create all pairewise differences of rows of `X`.
|
||||
X_diff <- X[i, , drop = F] - X[j, , drop = F]
|
||||
# Identity matrix.
|
||||
I_p <- diag(1, p)
|
||||
# Init a list of data indices (shuffled for batching).
|
||||
indices <- seq(n)
|
||||
|
||||
# Init tracking of current best (according multiple attempts).
|
||||
V.best <- NULL
|
||||
loss.best <- Inf
|
||||
|
||||
# Start loop for multiple attempts.
|
||||
for (attempt in 1:attempts) {
|
||||
# Reset learning rate `tau`.
|
||||
tau <- tau.init
|
||||
|
||||
# Sample a `(p, q)` dimensional matrix from the stiefel manifold as
|
||||
# optimization start value.
|
||||
V <- rStiefl(p, q)
|
||||
# Keep track of last `V` for computing error after an epoch.
|
||||
V.last <- V
|
||||
|
||||
if (is.function(logger)) {
|
||||
loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
|
||||
error <- NA
|
||||
epoch <- 0
|
||||
logger(environment())
|
||||
}
|
||||
|
||||
M <- matrix(0, p, q)
|
||||
# Repeat `epochs` times
|
||||
for (epoch in 1:epochs) {
|
||||
# Shuffle batches
|
||||
batch.shuffle <- sample(indices)
|
||||
|
||||
# Make a step for each batch.
|
||||
for (batch.start in seq(1, n, batch.size)) {
|
||||
# Select batch data indices.
|
||||
batch.end <- min(batch.start + batch.size - 1, length(batch.shuffle))
|
||||
batch <- batch.shuffle[batch.start:batch.end]
|
||||
|
||||
# Compute batch gradient.
|
||||
loss <- NULL
|
||||
G <- grad(X[batch, ], Y[batch], V, h, loss.out = TRUE)
|
||||
|
||||
# Projectd Gradient.
|
||||
A <- projTangentStiefl(V, G)
|
||||
# Projected element squared gradient.
|
||||
Asq <- projTangentStiefl(V, G * G)
|
||||
# Momentum update.
|
||||
M <- (1 - rho) * Asq + rho * projTangentStiefl(V, M)
|
||||
# Parallet transport (on Stiefl manifold) into direction of `G`.
|
||||
V <- retractStiefl(V - tau.init * A / (sqrt(abs(M)) + epsilon))
|
||||
}
|
||||
# And the error for the history.
|
||||
error <- norm(V.last %*% t(V.last) - V %*% t(V), type = "F")
|
||||
V.last <- V
|
||||
|
||||
if (is.function(logger)) {
|
||||
# Compute loss at end of epoch for logging.
|
||||
loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
|
||||
logger(environment())
|
||||
}
|
||||
|
||||
# Check break condition.
|
||||
if (error < tol) {
|
||||
break()
|
||||
}
|
||||
}
|
||||
# Compute actual loss after finishing for comparing multiple attempts.
|
||||
loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE)
|
||||
|
||||
# After each attempt, check if last attempt reached a better result.
|
||||
if (loss < loss.best) {
|
||||
loss.best <- loss
|
||||
V.best <- V
|
||||
}
|
||||
}
|
||||
|
||||
return(list(
|
||||
loss = loss.best,
|
||||
V = V.best,
|
||||
B = null(V.best),
|
||||
h = h
|
||||
))
|
||||
}
|
|
@ -1,139 +0,0 @@
|
|||
#' Simple implementation of the CVE method. 'Simple' means that this method is
|
||||
#' a classic GD method unsing no further tricks.
|
||||
#'
|
||||
#' @keywords internal
|
||||
#' @export
|
||||
cve_simple <- function(X, Y, k,
|
||||
nObs = sqrt(nrow(X)),
|
||||
h = NULL,
|
||||
tau = 1.0,
|
||||
tol = 1e-3,
|
||||
slack = 0,
|
||||
epochs = 50L,
|
||||
attempts = 10L,
|
||||
logger = NULL
|
||||
) {
|
||||
# Set `grad` functions environment to enable if to find this environments
|
||||
# local variabels, needed to enable the manipulation of this local variables
|
||||
# from within `grad`.
|
||||
environment(grad) <- environment()
|
||||
|
||||
# Get dimensions.
|
||||
n <- nrow(X) # Number of samples.
|
||||
p <- ncol(X) # Data dimensions
|
||||
q <- p - k # Complement dimension of the SDR space.
|
||||
|
||||
# Save initial learning rate `tau`.
|
||||
tau.init <- tau
|
||||
# Addapt tolearance for break condition.
|
||||
tol <- sqrt(2 * q) * tol
|
||||
|
||||
# Estaimate bandwidth if not given.
|
||||
if (missing(h) || !is.numeric(h)) {
|
||||
h <- estimate.bandwidth(X, k, nObs)
|
||||
}
|
||||
|
||||
# Compute persistent data.
|
||||
# Compute lookup indexes for symmetrie, lower/upper
|
||||
# triangular parts and vectorization.
|
||||
pair.index <- elem.pairs(seq(n))
|
||||
i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
|
||||
j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
|
||||
# Index of vectorized matrix, for lower and upper triangular part.
|
||||
lower <- ((i - 1) * n) + j
|
||||
upper <- ((j - 1) * n) + i
|
||||
|
||||
# Create all pairewise differences of rows of `X`.
|
||||
X_diff <- X[i, , drop = F] - X[j, , drop = F]
|
||||
# Identity matrix.
|
||||
I_p <- diag(1, p)
|
||||
|
||||
# Init tracking of current best (according multiple attempts).
|
||||
V.best <- NULL
|
||||
loss.best <- Inf
|
||||
|
||||
# Start loop for multiple attempts.
|
||||
for (attempt in 1:attempts) {
|
||||
# Reset learning rate `tau`.
|
||||
tau <- tau.init
|
||||
|
||||
# Sample a `(p, q)` dimensional matrix from the stiefel manifold as
|
||||
# optimization start value.
|
||||
V <- rStiefl(p, q)
|
||||
|
||||
# Initial loss and gradient.
|
||||
loss <- Inf
|
||||
G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE)
|
||||
# Set last loss (aka, loss after applying the step).
|
||||
loss.last <- loss
|
||||
|
||||
# Cayley transform matrix `A`
|
||||
A <- (G %*% t(V)) - (V %*% t(G))
|
||||
|
||||
# Call logger with initial values before starting optimization.
|
||||
if (is.function(logger)) {
|
||||
logger(0L, attempt, loss, V, tau)
|
||||
}
|
||||
|
||||
## Start optimization loop.
|
||||
for (epoch in 1:epochs) {
|
||||
# Apply learning rate `tau`.
|
||||
A.tau <- tau * A
|
||||
# Parallet transport (on Stiefl manifold) into direction of `G`.
|
||||
V.tau <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V)
|
||||
|
||||
# Loss at position after a step.
|
||||
loss <- grad(X, Y, V.tau, h, loss.only = TRUE, persistent = TRUE)
|
||||
|
||||
# Check if step is appropriate, iff not reduce learning rate.
|
||||
if ((loss - loss.last) > slack * loss.last) {
|
||||
tau <- tau / 2
|
||||
next() # Keep position and try with smaller `tau`.
|
||||
}
|
||||
|
||||
# Compute error.
|
||||
error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F")
|
||||
|
||||
# Check break condition (epoch check to skip ignored gradient calc).
|
||||
# Note: the devision by `sqrt(2 * k)` is included in `tol`.
|
||||
if (error < tol || epoch >= epochs) {
|
||||
# take last step and stop optimization.
|
||||
V <- V.tau
|
||||
# Call logger last time befor stoping.
|
||||
if (is.function(logger)) {
|
||||
logger(epoch, attempt, loss, V, tau)
|
||||
}
|
||||
break()
|
||||
}
|
||||
|
||||
# Perform the step and remember previous loss.
|
||||
V <- V.tau
|
||||
loss.last <- loss
|
||||
|
||||
# Call logger after taking a step.
|
||||
if (is.function(logger)) {
|
||||
logger(epoch, attempt, loss, V, tau)
|
||||
}
|
||||
|
||||
# Compute gradient at new position.
|
||||
G <- grad(X, Y, V, h, persistent = TRUE)
|
||||
|
||||
# Cayley transform matrix `A`
|
||||
A <- (G %*% t(V)) - (V %*% t(G))
|
||||
}
|
||||
|
||||
# Check if current attempt improved previous ones
|
||||
if (loss < loss.best) {
|
||||
loss.best <- loss
|
||||
V.best <- V
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return(list(
|
||||
loss = loss.best,
|
||||
V = V.best,
|
||||
B = null(V.best),
|
||||
h = h
|
||||
))
|
||||
}
|
|
@ -1,109 +0,0 @@
|
|||
#' Generates test datasets.
|
||||
#'
|
||||
#' Provides sample datasets. There are 5 different datasets named
|
||||
#' M1, M2, M3, M4 and M5 described in the paper references below.
|
||||
#' The general model is given by:
|
||||
#' \deqn{Y ~ g(B'X) + \epsilon}
|
||||
#'
|
||||
#' @param name One of \code{"M1"}, \code{"M2"}, \code{"M3"}, \code{"M4"} or \code{"M5"}
|
||||
#' @param n nr samples
|
||||
#' @param p Dim. of random variable \code{X}.
|
||||
#' @param p.mix Only for \code{"M4"}, see: below.
|
||||
#' @param lambda Only for \code{"M4"}, see: below.
|
||||
#'
|
||||
#' @return List with elements
|
||||
#' \itemize{
|
||||
#' \item{X}{data}
|
||||
#' \item{Y}{response}
|
||||
#' \item{B}{Used dim-reduction matrix}
|
||||
#' \item{name}{Name of the dataset (name parameter)}
|
||||
#' }
|
||||
#'
|
||||
#' @section M1:
|
||||
#' The data follows \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} for a subspace
|
||||
#' dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points.
|
||||
#' The link function \eqn{g} is given as
|
||||
#' \deqn{g(x) = \frac{x_1}{0.5 + (x_2 + 1.5)^2} + 0.5\epsilon}{g(x) = x_1 / (0.5 + (x_2 + 1.5)^2) + 0.5 epsilon}
|
||||
#' @section M2:
|
||||
#' \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} with \eqn{k = 2} with a default of \eqn{n = 200} data points.
|
||||
#' The link function \eqn{g} is given as
|
||||
#' \deqn{g(x) = x_1 x_2^2 + 0.5\epsilon}{g(x) = x_1 x_2^2 + 0.5 epsilon}
|
||||
#' @section M3:
|
||||
#' TODO:
|
||||
#' @section M4:
|
||||
#' TODO:
|
||||
#' @section M5:
|
||||
#' TODO:
|
||||
#'
|
||||
#' @import stats
|
||||
#' @importFrom stats rnorm rbinom
|
||||
#' @export
|
||||
dataset <- function(name = "M1", n, B, p.mix = 0.3, lambda = 1.0) {
|
||||
# validate parameters
|
||||
stopifnot(name %in% c("M1", "M2", "M3", "M4", "M5"))
|
||||
|
||||
# set default values if not supplied
|
||||
if (missing(n)) {
|
||||
n <- if (name %in% c("M1", "M2")) 200 else if (name != "M5") 100 else 42
|
||||
}
|
||||
if (missing(B)) {
|
||||
p <- 12
|
||||
if (name == "M1") {
|
||||
B <- cbind(
|
||||
c( 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0),
|
||||
c( 1,-1, 1,-1, 1,-1, 0, 0, 0, 0, 0, 0)
|
||||
) / sqrt(6)
|
||||
} else if (name == "M2") {
|
||||
B <- cbind(
|
||||
c(c(1, 0), rep(0, 10)),
|
||||
c(c(0, 1), rep(0, 10))
|
||||
)
|
||||
} else {
|
||||
B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, 6)), 12, 1)
|
||||
}
|
||||
} else {
|
||||
p <- dim(B)[1]
|
||||
# validate col. nr to match dataset `k = dim(B)[2]`
|
||||
stopifnot(
|
||||
name %in% c("M1", "M2") && dim(B)[2] == 2,
|
||||
name %in% c("M3", "M4", "M5") && dim(B)[2] == 1
|
||||
)
|
||||
}
|
||||
|
||||
# set link function `g` for model `Y ~ g(B'X) + epsilon`
|
||||
if (name == "M1") {
|
||||
g <- function(BX) { BX[1] / (0.5 + (BX[2] + 1.5)^2) }
|
||||
} else if (name == "M2") {
|
||||
g <- function(BX) { BX[1] * BX[2]^2 }
|
||||
} else if (name %in% c("M3", "M4")) {
|
||||
g <- function(BX) { cos(BX[1]) }
|
||||
} else { # name == "M5"
|
||||
g <- function(BX) { 2 * log(abs(BX[1]) + 1) }
|
||||
}
|
||||
|
||||
# compute X
|
||||
if (name != "M4") {
|
||||
# compute root of the covariance matrix according the dataset
|
||||
if (name %in% c("M1", "M3")) {
|
||||
# Variance-Covariance structure for `X ~ N_p(0, \Sigma)` with
|
||||
# `\Sigma_{i, j} = 0.5^{|i - j|}`.
|
||||
Sigma <- matrix(0.5^abs(kronecker(1:p, 1:p, '-')), p, p)
|
||||
# decompose Sigma to Sigma.root^T Sigma.root = Sigma for usage in creation of `X`
|
||||
Sigma.root <- chol(Sigma)
|
||||
} else { # name %in% c("M2", "M5")
|
||||
Sigma.root <- diag(rep(1, p)) # d-dim identity
|
||||
}
|
||||
# data `X` as multivariate random normal variable with
|
||||
# variance matrix `Sigma`.
|
||||
X <- replicate(p, rnorm(n, 0, 1)) %*% Sigma.root
|
||||
} else { # name == "M4"
|
||||
X <- t(replicate(100, rep((1 - 2 * rbinom(1, 1, p.mix)) * lambda, p) + rnorm(p, 0, 1)))
|
||||
}
|
||||
|
||||
# responce `y ~ g(B'X) + epsilon` with `epsilon ~ N(0, 1 / 2)`
|
||||
Y <- apply(X, 1, function(X_i) {
|
||||
g(t(B) %*% X_i) + rnorm(1, 0, 0.5)
|
||||
})
|
||||
|
||||
return(list(X = X, Y = Y, B = B, name = name))
|
||||
}
|
|
@ -1,27 +0,0 @@
|
|||
#' Estimated bandwidth for CVE.
|
||||
#'
|
||||
#' Estimates a propper bandwidth \code{h} according
|
||||
#' \deqn{%
|
||||
#' h = \chi_{p-q}^{-1}\left(\frac{nObs - 1}{n-1}\right)\frac{2 tr(\Sigma)}{p}}{%
|
||||
#' h = qchisq( (nObs - 1)/(n - 1), p - q ) 2 tr(Sigma) / p}
|
||||
#'
|
||||
#' @param X data matrix of dimension (n x p) with n data points X_i of dimension
|
||||
#' q. Therefor each row represents a datapoint of dimension p.
|
||||
#' @param k Guess for rank(B).
|
||||
#' @param nObs Ether numeric of a function. If specified as numeric value
|
||||
#' its used in the computation of the bandwidth directly. If its a function
|
||||
#' `nObs` is evaluated as \code{nObs(nrow(x))}. The default behaviou if not
|
||||
#' supplied at all is to use \code{nObs <- nrow(x)^0.5}.
|
||||
#'
|
||||
#' @seealso [\code{\link{qchisq}}]
|
||||
#' @export
|
||||
estimate.bandwidth <- function(X, k, nObs) {
|
||||
n <- nrow(X)
|
||||
p <- ncol(X)
|
||||
|
||||
X_centered <- scale(X, center=TRUE, scale=FALSE)
|
||||
Sigma <- (1 / n) * t(X_centered) %*% X_centered
|
||||
|
||||
quantil <- qchisq((nObs - 1) / (n - 1), k)
|
||||
return(2 * quantil * sum(diag(Sigma)) / p)
|
||||
}
|
|
@ -1,82 +0,0 @@
|
|||
#' Compute get gradient of `L(V)` given a dataset `X`.
|
||||
#'
|
||||
#' @param X Data matrix.
|
||||
#' @param Y Responce.
|
||||
#' @param V Position to compute the gradient at, aka point on Stiefl manifold.
|
||||
#' @param h Bandwidth
|
||||
#' @param loss.out Iff \code{TRUE} loss will be written to parent environment.
|
||||
#' @param loss.only Boolean to only compute the loss, of \code{TRUE} a single
|
||||
#' value loss is returned and \code{envir} is ignored.
|
||||
#' @param persistent Determines if data indices and dependent calculations shall
|
||||
#' be reused from the parent environment. ATTENTION: Do NOT set this flag, only
|
||||
#' intended for internal usage by carefully aligned functions!
|
||||
#' @keywords internal
|
||||
#' @export
|
||||
grad <- function(X, Y, V, h,
|
||||
loss.out = FALSE,
|
||||
loss.only = FALSE,
|
||||
persistent = FALSE) {
|
||||
# Get number of samples and dimension.
|
||||
n <- nrow(X)
|
||||
p <- ncol(X)
|
||||
|
||||
if (!persistent) {
|
||||
# Compute lookup indexes for symmetrie, lower/upper
|
||||
# triangular parts and vectorization.
|
||||
pair.index <- elem.pairs(seq(n))
|
||||
i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
|
||||
j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
|
||||
# Index of vectorized matrix, for lower and upper triangular part.
|
||||
lower <- ((i - 1) * n) + j
|
||||
upper <- ((j - 1) * n) + i
|
||||
|
||||
# Create all pairewise differences of rows of `X`.
|
||||
X_diff <- X[i, , drop = F] - X[j, , drop = F]
|
||||
}
|
||||
|
||||
# Projection matrix onto `span(V)`
|
||||
Q <- diag(1, p) - tcrossprod(V, V)
|
||||
|
||||
# Vectorized distance matrix `D`.
|
||||
vecD <- colSums(tcrossprod(Q, X_diff)^2)
|
||||
|
||||
# Create Kernel matrix (aka. apply kernel to distances)
|
||||
K <- matrix(1, n, n) # `exp(0) == 1`
|
||||
K[lower] <- exp((-0.5 / h) * vecD^2) # Set lower tri. part
|
||||
K[upper] <- t(K)[upper] # Mirror lower tri. to upper
|
||||
|
||||
# Weighted `Y` momentums
|
||||
colSumsK <- colSums(K)
|
||||
y1 <- (K %*% Y) / colSumsK
|
||||
y2 <- (K %*% Y^2) / colSumsK
|
||||
|
||||
# Per example loss `L(V, X_i)`
|
||||
L <- y2 - y1^2
|
||||
if (loss.only) {
|
||||
return(mean(L))
|
||||
}
|
||||
if (loss.out) {
|
||||
loss <<- mean(L)
|
||||
}
|
||||
|
||||
# Compute scaling vector `vecS` for `X_diff`.
|
||||
tmp <- kronecker(matrix(y1, n, 1), matrix(Y, 1, n), `-`)^2
|
||||
tmp <- as.vector(L) - tmp
|
||||
tmp <- tmp * K / colSumsK
|
||||
vecS <- (tmp + t(tmp))[lower] * vecD
|
||||
|
||||
# The gradient.
|
||||
# 1. The `crossprod(A, B)` is equivalent to `t(A) %*% B`,
|
||||
# 2. `(X_diff %*% V) * vecS` is first a marix matrix mult. and then using
|
||||
# recycling to scale each row with the values of `vecS`.
|
||||
# Note that `vecS` is a vector and that `R` uses column-major ordering
|
||||
# of matrices.
|
||||
# (See: notes for more details)
|
||||
# TODO: Depending on n, p, q decide which version to take (for current
|
||||
# datasets "inner" is faster, see: notes).
|
||||
# inner = crossprod(X_diff, X_diff * vecS) %*% V,
|
||||
# outer = crossprod(X_diff, (X_diff %*% V) * vecS)
|
||||
G <- crossprod(X_diff, X_diff * vecS) %*% V
|
||||
G <- (-2 / (n * h^2)) * G
|
||||
return(G)
|
||||
}
|
|
@ -1,43 +0,0 @@
|
|||
|
||||
#' Performs a grid search for parameters over a parameter grid.
|
||||
#' @examples
|
||||
#' args <- list(
|
||||
#' h = c(0.05, 0.1, 0.2),
|
||||
#' method = c("simple", "sgd"),
|
||||
#' tau = c(0.5, 0.1, 0.01)
|
||||
#' )
|
||||
#' cve.grid.search(args)
|
||||
#' @export
|
||||
cve.grid.search <- function(X, Y, k, args) {
|
||||
|
||||
args$stringsAsFactors = FALSE
|
||||
args$KEEP.OUT.ATTRS = FALSE
|
||||
grid <- do.call(expand.grid, args)
|
||||
grid.length <- length(grid[[1]])
|
||||
|
||||
print(grid)
|
||||
|
||||
for (i in 1:grid.length) {
|
||||
arguments <- as.list(grid[i, ])
|
||||
# Set required arguments
|
||||
arguments$X <- X
|
||||
arguments$Y <- Y
|
||||
arguments$k <- k
|
||||
# print(arguments)
|
||||
dr <- do.call(cve.call, arguments)
|
||||
print(dr$loss)
|
||||
}
|
||||
}
|
||||
|
||||
# ds <- dataset()
|
||||
# X <- ds$X
|
||||
# Y <- ds$Y
|
||||
# (k <- ncol(ds$B))
|
||||
# args <- list(
|
||||
# h = c(0.05, 0.1, 0.2),
|
||||
# method = c("simple", "sgd"),
|
||||
# tau = c(0.5, 0.1, 0.01),
|
||||
# attempts = c(1L)
|
||||
# )
|
||||
|
||||
# cve.grid.search(X, Y, k, args)
|
|
@ -1,82 +0,0 @@
|
|||
#' Samples uniform from the Stiefl Manifold.
|
||||
#'
|
||||
#' @param p row dim.
|
||||
#' @param q col dim.
|
||||
#' @return `(p, q)` semi-orthogonal matrix
|
||||
#' @examples
|
||||
#' V <- rStiefel(6, 4)
|
||||
#' @export
|
||||
rStiefl <- function(p, q) {
|
||||
return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q))))
|
||||
}
|
||||
|
||||
#' Retraction to the manifold.
|
||||
#'
|
||||
#' @param A matrix.
|
||||
#' @return `(p, q)` semi-orthogonal matrix, aka element of the Stiefl manifold.
|
||||
#' @keywords internal
|
||||
#' @export
|
||||
retractStiefl <- function(A) {
|
||||
return(qr.Q(qr(A)))
|
||||
}
|
||||
|
||||
#' Skew-Symmetric matrix computed from `A` as
|
||||
#' \eqn{1/2 (A - A^T)}.
|
||||
#' @param A Matrix of dim `(p, q)`
|
||||
#' @return Skew-Symmetric matrix of dim `(p, p)`.
|
||||
#' @keywords internal
|
||||
#' @export
|
||||
skew <- function(A) {
|
||||
0.5 * (A - t(A))
|
||||
}
|
||||
|
||||
#' Symmetric matrix computed from `A` as
|
||||
#' \eqn{1/2 (A + A^T)}.
|
||||
#' @param A Matrix of dim `(p, q)`
|
||||
#' @return Symmetric matrix of dim `(p, p)`.
|
||||
#' @keywords internal
|
||||
#' @export
|
||||
sym <- function(A) {
|
||||
0.5 * (A + t(A))
|
||||
}
|
||||
|
||||
#' Orthogonal Projection onto the tangent space of the stiefl manifold.
|
||||
#'
|
||||
#' @param V Point on the stiefl manifold.
|
||||
#' @param G matrix to be projected onto the tangent space at `V`.
|
||||
#' @return `(p, q)` matrix as element of the tangent space at `V`.
|
||||
#' @keywords internal
|
||||
#' @export
|
||||
projTangentStiefl <- function(V, G) {
|
||||
Q <- diag(1, nrow(V)) - V %*% t(V)
|
||||
return(Q %*% G + V %*% skew(t(V) %*% G))
|
||||
}
|
||||
|
||||
#' Null space basis of given matrix `V`
|
||||
#'
|
||||
#' @param V `(p, q)` matrix
|
||||
#' @return Semi-orthogonal `(p, p - q)` matrix spaning the null space of `V`.
|
||||
#' @keywords internal
|
||||
#' @export
|
||||
null <- function(V) {
|
||||
tmp <- qr(V)
|
||||
set <- if(tmp$rank == 0L) seq_len(ncol(V)) else -seq_len(tmp$rank)
|
||||
return(qr.Q(tmp, complete=TRUE)[, set, drop=FALSE])
|
||||
}
|
||||
|
||||
#' Creates a (numeric) matrix where each column contains
|
||||
#' an element to element matching.
|
||||
#' @param elements numeric vector of elements to match
|
||||
#' @return matrix of size `(2, n * (n - 1) / 2)` for a argument of lenght `n`.
|
||||
#' @keywords internal
|
||||
#' @examples
|
||||
#' elem.pairs(seq.int(2, 5))
|
||||
#' @export
|
||||
elem.pairs <- function(elements) {
|
||||
# Number of elements to match.
|
||||
n <- length(elements)
|
||||
# Create all combinations.
|
||||
pairs <- rbind(rep(elements, each=n), rep(elements, n))
|
||||
# Select unique combinations without self interaction.
|
||||
return(pairs[, pairs[1, ] < pairs[2, ]])
|
||||
}
|
|
@ -1,2 +0,0 @@
|
|||
runtime_test Runtime comparison of CVE against MAVE for M1 - M5 datasets.
|
||||
logging Example of a logger function for cve algorithm analysis.
|
|
@ -1,43 +0,0 @@
|
|||
library(CVEpureR)
|
||||
|
||||
# Setup histories.
|
||||
(epochs <- 50)
|
||||
(attempts <- 10)
|
||||
loss.history <- matrix(NA, epochs + 1, attempts)
|
||||
error.history <- matrix(NA, epochs + 1, attempts)
|
||||
tau.history <- matrix(NA, epochs + 1, attempts)
|
||||
true.error.history <- matrix(NA, epochs + 1, attempts)
|
||||
|
||||
# Create a dataset
|
||||
ds <- dataset("M1")
|
||||
X <- ds$X
|
||||
Y <- ds$Y
|
||||
B <- ds$B # the true `B`
|
||||
(k <- ncol(ds$B))
|
||||
|
||||
# True projection matrix.
|
||||
P <- B %*% solve(t(B) %*% B) %*% t(B)
|
||||
# Define the logger for the `cve()` method.
|
||||
logger <- function(env) {
|
||||
# Note the `<<-` assignement!
|
||||
loss.history[env$epoch + 1, env$attempt] <<- env$loss
|
||||
error.history[env$epoch + 1, env$attempt] <<- env$error
|
||||
tau.history[env$epoch + 1, env$attempt] <<- env$tau
|
||||
# Compute true error by comparing to the true `B`
|
||||
B.est <- null(env$V) # Function provided by CVE
|
||||
P.est <- B.est %*% solve(t(B.est) %*% B.est) %*% t(B.est)
|
||||
true.error <- norm(P - P.est, 'F') / sqrt(2 * k)
|
||||
true.error.history[env$epoch + 1, env$attempt] <<- true.error
|
||||
}
|
||||
# Performe SDR for ONE `k`.
|
||||
dr <- cve(Y ~ X, k = k, logger = logger, epochs = epochs, attempts = attempts)
|
||||
# Plot history's
|
||||
par(mfrow = c(2, 2))
|
||||
matplot(loss.history, type = 'l', log = 'y', xlab = 'iter',
|
||||
main = 'loss', ylab = expression(L(V[iter])))
|
||||
matplot(error.history, type = 'l', log = 'y', xlab = 'iter',
|
||||
main = 'error', ylab = 'error')
|
||||
matplot(tau.history, type = 'l', log = 'y', xlab = 'iter',
|
||||
main = 'tau', ylab = 'tau')
|
||||
matplot(true.error.history, type = 'l', log = 'y', xlab = 'iter',
|
||||
main = 'true error', ylab = 'true error')
|
|
@ -1,89 +0,0 @@
|
|||
# Usage:
|
||||
# ~$ Rscript runtime_test.R
|
||||
|
||||
library(CVEpureR) # load CVE
|
||||
|
||||
#' Writes progress to console.
|
||||
tell.user <- function(name, start.time, i, length) {
|
||||
cat("\rRunning Test (", name, "):",
|
||||
i, "/", length,
|
||||
" - elapsed:", format(Sys.time() - start.time), "\033[K")
|
||||
}
|
||||
subspace.dist <- function(B1, B2){
|
||||
P1 <- B1 %*% solve(t(B1) %*% B1) %*% t(B1)
|
||||
P2 <- B2 %*% solve(t(B2) %*% B2) %*% t(B2)
|
||||
return(norm(P1 - P2, type = 'F'))
|
||||
}
|
||||
|
||||
# Number of simulations
|
||||
SIM.NR <- 50
|
||||
# maximal number of iterations in curvilinear search algorithm
|
||||
MAXIT <- 50
|
||||
# number of arbitrary starting values for curvilinear optimization
|
||||
ATTEMPTS <- 10
|
||||
# set names of datasets
|
||||
dataset.names <- c("M1", "M2", "M3", "M4", "M5")
|
||||
# Set used CVE method
|
||||
methods <- c("simple") # c("legacy", "simple", "sgd", "linesearch")
|
||||
|
||||
# Setup error and time tracking variables
|
||||
error <- matrix(NA, SIM.NR, length(methods) * length(dataset.names))
|
||||
time <- matrix(NA, SIM.NR, ncol(error))
|
||||
colnames(error) <- kronecker(paste0(dataset.names, '-'), methods, paste0)
|
||||
colnames(time) <- colnames(error)
|
||||
|
||||
# only for telling user (to stdout)
|
||||
count <- 0
|
||||
start.time <- Sys.time()
|
||||
# Start simulation loop.
|
||||
for (sim in 1:SIM.NR) {
|
||||
# Repeat for each dataset.
|
||||
for (name in dataset.names) {
|
||||
count <- count + 1
|
||||
tell.user(name, start.time, count, SIM.NR * length(dataset.names))
|
||||
|
||||
# Create a new dataset
|
||||
ds <- dataset(name)
|
||||
# Prepare X, Y and combine to data matrix
|
||||
Y <- ds$Y
|
||||
X <- ds$X
|
||||
data <- cbind(Y, X)
|
||||
# get dimensions
|
||||
dim <- ncol(X)
|
||||
truedim <- ncol(ds$B)
|
||||
|
||||
for (method in methods) {
|
||||
dr.time <- system.time(
|
||||
dr <- cve.call(X, Y,
|
||||
method = method,
|
||||
k = truedim,
|
||||
attempts = ATTEMPTS
|
||||
)
|
||||
)
|
||||
dr <- dr[[truedim]]
|
||||
|
||||
key <- paste0(name, '-', method)
|
||||
error[sim, key] <- subspace.dist(dr$B, ds$B) / sqrt(2 * truedim)
|
||||
time[sim, key] <- dr.time["elapsed"]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cat("\n\n## Time [sec] Means:\n")
|
||||
print(colMeans(time))
|
||||
cat("\n## Error Means:\n")
|
||||
print(colMeans(error))
|
||||
|
||||
at <- seq(ncol(error)) + rep(seq(ncol(error) / length(methods)) - 1, each = length(methods))
|
||||
boxplot(error,
|
||||
main = paste0("Error (Nr of simulations ", SIM.NR, ")"),
|
||||
ylab = "Error",
|
||||
las = 2,
|
||||
at = at
|
||||
)
|
||||
boxplot(time,
|
||||
main = paste0("Time (Nr of simulations ", SIM.NR, ")"),
|
||||
ylab = "Time [sec]",
|
||||
las = 2,
|
||||
at = at
|
||||
)
|
Binary file not shown.
|
@ -1,20 +0,0 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/CVE.R
|
||||
\docType{package}
|
||||
\name{CVEpureR-package}
|
||||
\alias{CVEpureR}
|
||||
\alias{CVEpureR-package}
|
||||
\title{Conditional Variance Estimator (CVE)}
|
||||
\description{
|
||||
Conditional Variance Estimator for Sufficient Dimension
|
||||
Reduction
|
||||
}
|
||||
\details{
|
||||
TODO: And some details
|
||||
}
|
||||
\references{
|
||||
Fertl Likas, Bura Efstathia. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019
|
||||
}
|
||||
\author{
|
||||
Loki
|
||||
}
|
|
@ -1,71 +0,0 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/CVE.R
|
||||
\name{cve}
|
||||
\alias{cve}
|
||||
\alias{cve.call}
|
||||
\title{Implementation of the CVE method.}
|
||||
\usage{
|
||||
cve(formula, data, method = "simple", max.dim = 10, ...)
|
||||
|
||||
cve.call(X, Y, method = "simple", nObs = nrow(X)^0.5, min.dim = 1,
|
||||
max.dim = 10, k, ...)
|
||||
}
|
||||
\arguments{
|
||||
\item{formula}{Formel for the regression model defining `X`, `Y`.
|
||||
See: \code{\link{formula}}.}
|
||||
|
||||
\item{data}{data.frame holding data for formula.}
|
||||
|
||||
\item{method}{The different only differe in the used optimization.
|
||||
All of them are Gradient based optimization on a Stiefel manifold.
|
||||
\itemize{
|
||||
\item "simple" Simple reduction of stepsize.
|
||||
\item "sgd" stocastic gradient decent.
|
||||
\item TODO: further
|
||||
}}
|
||||
|
||||
\item{...}{Further parameters depending on the used method.}
|
||||
|
||||
\item{X}{Data}
|
||||
|
||||
\item{Y}{Responces}
|
||||
|
||||
\item{nObs}{as described in the Paper.}
|
||||
|
||||
\item{k}{guess for SDR dimension.}
|
||||
|
||||
\item{nObs}{Like in the paper.}
|
||||
|
||||
\item{...}{Method specific parameters.}
|
||||
}
|
||||
\description{
|
||||
Conditional Variance Estimator (CVE) is a novel sufficient dimension
|
||||
reduction (SDR) method assuming a model
|
||||
\deqn{Y \sim g(B'X) + \epsilon}{Y ~ g(B'X) + epsilon}
|
||||
where B'X is a lower dimensional projection of the predictors.
|
||||
}
|
||||
\examples{
|
||||
library(CVE)
|
||||
|
||||
# sample dataset
|
||||
ds <- dataset("M5")
|
||||
|
||||
# call ´cve´ with default method (aka "simple")
|
||||
dr.simple <- cve(ds$Y ~ ds$X, k = ncol(ds$B))
|
||||
# plot optimization history (loss via iteration)
|
||||
plot(dr.simple, main = "CVE M5 simple")
|
||||
|
||||
# call ´cve´ with method "linesearch" using ´data.frame´ as data.
|
||||
data <- data.frame(Y = ds$Y, X = ds$X)
|
||||
# Note: ´Y, X´ are NOT defined, they are extracted from ´data´.
|
||||
dr.linesearch <- cve(Y ~ ., data, method = "linesearch", k = ncol(ds$B))
|
||||
plot(dr.linesearch, main = "CVE M5 linesearch")
|
||||
|
||||
}
|
||||
\references{
|
||||
Fertl L., Bura E. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019
|
||||
}
|
||||
\seealso{
|
||||
\code{\link{formula}}. For a complete parameters list (dependent on
|
||||
the method) see \code{\link{cve_simple}}, \code{\link{cve_sgd}}
|
||||
}
|
|
@ -1,19 +0,0 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/gridSearch.R
|
||||
\name{cve.grid.search}
|
||||
\alias{cve.grid.search}
|
||||
\title{Performs a grid search for parameters over a parameter grid.}
|
||||
\usage{
|
||||
cve.grid.search(X, Y, k, args)
|
||||
}
|
||||
\description{
|
||||
Performs a grid search for parameters over a parameter grid.
|
||||
}
|
||||
\examples{
|
||||
args <- list(
|
||||
h = c(0.05, 0.1, 0.2),
|
||||
method = c("simple", "sgd"),
|
||||
tau = c(0.5, 0.1, 0.01)
|
||||
)
|
||||
cve.grid.search(args)
|
||||
}
|
|
@ -1,16 +0,0 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/cve_linesearch.R
|
||||
\name{cve_linesearch}
|
||||
\alias{cve_linesearch}
|
||||
\title{Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe
|
||||
conditions.}
|
||||
\usage{
|
||||
cve_linesearch(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 1,
|
||||
tol = 0.001, rho1 = 0.1, rho2 = 0.9, slack = 0, epochs = 50L,
|
||||
attempts = 10L, max.linesearch.iter = 10L, logger = NULL)
|
||||
}
|
||||
\description{
|
||||
Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe
|
||||
conditions.
|
||||
}
|
||||
\keyword{internal}
|
|
@ -1,16 +0,0 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/cve_sgd.R
|
||||
\name{cve_sgd}
|
||||
\alias{cve_sgd}
|
||||
\title{Simple implementation of the CVE method. 'Simple' means that this method is
|
||||
a classic GD method unsing no further tricks.}
|
||||
\usage{
|
||||
cve_sgd(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 0.01,
|
||||
tol = 0.001, epochs = 50L, batch.size = 16L, attempts = 10L,
|
||||
logger = NULL)
|
||||
}
|
||||
\description{
|
||||
Simple implementation of the CVE method. 'Simple' means that this method is
|
||||
a classic GD method unsing no further tricks.
|
||||
}
|
||||
\keyword{internal}
|
|
@ -1,16 +0,0 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/cve_simple.R
|
||||
\name{cve_simple}
|
||||
\alias{cve_simple}
|
||||
\title{Simple implementation of the CVE method. 'Simple' means that this method is
|
||||
a classic GD method unsing no further tricks.}
|
||||
\usage{
|
||||
cve_simple(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 1,
|
||||
tol = 0.001, slack = 0, epochs = 50L, attempts = 10L,
|
||||
logger = NULL)
|
||||
}
|
||||
\description{
|
||||
Simple implementation of the CVE method. 'Simple' means that this method is
|
||||
a classic GD method unsing no further tricks.
|
||||
}
|
||||
\keyword{internal}
|
|
@ -1,64 +0,0 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/datasets.R
|
||||
\name{dataset}
|
||||
\alias{dataset}
|
||||
\title{Generates test datasets.}
|
||||
\usage{
|
||||
dataset(name = "M1", n, B, p.mix = 0.3, lambda = 1)
|
||||
}
|
||||
\arguments{
|
||||
\item{name}{One of \code{"M1"}, \code{"M2"}, \code{"M3"}, \code{"M4"} or \code{"M5"}}
|
||||
|
||||
\item{n}{nr samples}
|
||||
|
||||
\item{p.mix}{Only for \code{"M4"}, see: below.}
|
||||
|
||||
\item{lambda}{Only for \code{"M4"}, see: below.}
|
||||
|
||||
\item{p}{Dim. of random variable \code{X}.}
|
||||
}
|
||||
\value{
|
||||
List with elements
|
||||
\itemize{
|
||||
\item{X}{data}
|
||||
\item{Y}{response}
|
||||
\item{B}{Used dim-reduction matrix}
|
||||
\item{name}{Name of the dataset (name parameter)}
|
||||
}
|
||||
}
|
||||
\description{
|
||||
Provides sample datasets. There are 5 different datasets named
|
||||
M1, M2, M3, M4 and M5 described in the paper references below.
|
||||
The general model is given by:
|
||||
\deqn{Y ~ g(B'X) + \epsilon}
|
||||
}
|
||||
\section{M1}{
|
||||
|
||||
The data follows \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} for a subspace
|
||||
dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points.
|
||||
The link function \eqn{g} is given as
|
||||
\deqn{g(x) = \frac{x_1}{0.5 + (x_2 + 1.5)^2} + 0.5\epsilon}{g(x) = x_1 / (0.5 + (x_2 + 1.5)^2) + 0.5 epsilon}
|
||||
}
|
||||
|
||||
\section{M2}{
|
||||
|
||||
\eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} with \eqn{k = 2} with a default of \eqn{n = 200} data points.
|
||||
The link function \eqn{g} is given as
|
||||
\deqn{g(x) = x_1 x_2^2 + 0.5\epsilon}{g(x) = x_1 x_2^2 + 0.5 epsilon}
|
||||
}
|
||||
|
||||
\section{M3}{
|
||||
|
||||
TODO:
|
||||
}
|
||||
|
||||
\section{M4}{
|
||||
|
||||
TODO:
|
||||
}
|
||||
|
||||
\section{M5}{
|
||||
|
||||
TODO:
|
||||
}
|
||||
|
|
@ -1,23 +0,0 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/util.R
|
||||
\name{elem.pairs}
|
||||
\alias{elem.pairs}
|
||||
\title{Creates a (numeric) matrix where each column contains
|
||||
an element to element matching.}
|
||||
\usage{
|
||||
elem.pairs(elements)
|
||||
}
|
||||
\arguments{
|
||||
\item{elements}{numeric vector of elements to match}
|
||||
}
|
||||
\value{
|
||||
matrix of size `(2, n * (n - 1) / 2)` for a argument of lenght `n`.
|
||||
}
|
||||
\description{
|
||||
Creates a (numeric) matrix where each column contains
|
||||
an element to element matching.
|
||||
}
|
||||
\examples{
|
||||
elem.pairs(seq.int(2, 5))
|
||||
}
|
||||
\keyword{internal}
|
|
@ -1,28 +0,0 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/estimateBandwidth.R
|
||||
\name{estimate.bandwidth}
|
||||
\alias{estimate.bandwidth}
|
||||
\title{Estimated bandwidth for CVE.}
|
||||
\usage{
|
||||
estimate.bandwidth(X, k, nObs)
|
||||
}
|
||||
\arguments{
|
||||
\item{X}{data matrix of dimension (n x p) with n data points X_i of dimension
|
||||
q. Therefor each row represents a datapoint of dimension p.}
|
||||
|
||||
\item{k}{Guess for rank(B).}
|
||||
|
||||
\item{nObs}{Ether numeric of a function. If specified as numeric value
|
||||
its used in the computation of the bandwidth directly. If its a function
|
||||
`nObs` is evaluated as \code{nObs(nrow(x))}. The default behaviou if not
|
||||
supplied at all is to use \code{nObs <- nrow(x)^0.5}.}
|
||||
}
|
||||
\description{
|
||||
Estimates a propper bandwidth \code{h} according
|
||||
\deqn{%
|
||||
h = \chi_{p-q}^{-1}\left(\frac{nObs - 1}{n-1}\right)\frac{2 tr(\Sigma)}{p}}{%
|
||||
h = qchisq( (nObs - 1)/(n - 1), p - q ) 2 tr(Sigma) / p}
|
||||
}
|
||||
\seealso{
|
||||
[\code{\link{qchisq}}]
|
||||
}
|
|
@ -1,31 +0,0 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/gradient.R
|
||||
\name{grad}
|
||||
\alias{grad}
|
||||
\title{Compute get gradient of `L(V)` given a dataset `X`.}
|
||||
\usage{
|
||||
grad(X, Y, V, h, loss.out = FALSE, loss.only = FALSE,
|
||||
persistent = FALSE)
|
||||
}
|
||||
\arguments{
|
||||
\item{X}{Data matrix.}
|
||||
|
||||
\item{Y}{Responce.}
|
||||
|
||||
\item{V}{Position to compute the gradient at, aka point on Stiefl manifold.}
|
||||
|
||||
\item{h}{Bandwidth}
|
||||
|
||||
\item{loss.out}{Iff \code{TRUE} loss will be written to parent environment.}
|
||||
|
||||
\item{loss.only}{Boolean to only compute the loss, of \code{TRUE} a single
|
||||
value loss is returned and \code{envir} is ignored.}
|
||||
|
||||
\item{persistent}{Determines if data indices and dependent calculations shall
|
||||
be reused from the parent environment. ATTENTION: Do NOT set this flag, only
|
||||
intended for internal usage by carefully aligned functions!}
|
||||
}
|
||||
\description{
|
||||
Compute get gradient of `L(V)` given a dataset `X`.
|
||||
}
|
||||
\keyword{internal}
|
|
@ -1,18 +0,0 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/util.R
|
||||
\name{null}
|
||||
\alias{null}
|
||||
\title{Null space basis of given matrix `V`}
|
||||
\usage{
|
||||
null(V)
|
||||
}
|
||||
\arguments{
|
||||
\item{V}{`(p, q)` matrix}
|
||||
}
|
||||
\value{
|
||||
Semi-orthogonal `(p, p - q)` matrix spaning the null space of `V`.
|
||||
}
|
||||
\description{
|
||||
Null space basis of given matrix `V`
|
||||
}
|
||||
\keyword{internal}
|
|
@ -1,28 +0,0 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/CVE.R
|
||||
\name{plot.cve}
|
||||
\alias{plot.cve}
|
||||
\title{Ploting helper for objects of class \code{cve}.}
|
||||
\usage{
|
||||
## S3 method for class 'cve'
|
||||
plot(x, content = "history", ...)
|
||||
}
|
||||
\arguments{
|
||||
\item{x}{Object of class \code{cve} (result of [cve()]).}
|
||||
|
||||
\item{...}{Pass through parameters to [plot()] and [lines()]}
|
||||
|
||||
\item{content}{Specifies what to plot:
|
||||
\itemize{
|
||||
\item "history" Plots the loss history from stiefel optimization
|
||||
(default).
|
||||
\item ... TODO: add (if there are any)
|
||||
}}
|
||||
}
|
||||
\description{
|
||||
Ploting helper for objects of class \code{cve}.
|
||||
}
|
||||
\seealso{
|
||||
see \code{\link{par}} for graphical parameters to pass through
|
||||
as well as \code{\link{plot}} for standard plot utility.
|
||||
}
|
|
@ -1,22 +0,0 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/util.R
|
||||
\name{rStiefl}
|
||||
\alias{rStiefl}
|
||||
\title{Samples uniform from the Stiefel Manifold}
|
||||
\usage{
|
||||
rStiefl(p, q)
|
||||
}
|
||||
\arguments{
|
||||
\item{p}{row dim.}
|
||||
|
||||
\item{q}{col dim.}
|
||||
}
|
||||
\value{
|
||||
`(p, q)` semi-orthogonal matrix
|
||||
}
|
||||
\description{
|
||||
Samples uniform from the Stiefel Manifold
|
||||
}
|
||||
\examples{
|
||||
V <- rStiefel(6, 4)
|
||||
}
|
|
@ -1,14 +0,0 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/CVE.R
|
||||
\name{summary.cve}
|
||||
\alias{summary.cve}
|
||||
\title{Prints a summary of a \code{cve} result.}
|
||||
\usage{
|
||||
\method{summary}{cve}(object, ...)
|
||||
}
|
||||
\arguments{
|
||||
\item{object}{Instance of 'cve' as return of \code{cve}.}
|
||||
}
|
||||
\description{
|
||||
Prints a summary of a \code{cve} result.
|
||||
}
|
|
@ -1,479 +0,0 @@
|
|||
library(microbenchmark)
|
||||
|
||||
dyn.load("benchmark.so")
|
||||
|
||||
|
||||
## rowSum* .call --------------------------------------------------------------
|
||||
rowSums.c <- function(M) {
|
||||
stopifnot(
|
||||
is.matrix(M),
|
||||
is.numeric(M)
|
||||
)
|
||||
if (!is.double(M)) {
|
||||
M <- matrix(as.double(M), nrow = nrow(M))
|
||||
}
|
||||
|
||||
.Call('R_rowSums', PACKAGE = 'benchmark', M)
|
||||
}
|
||||
rowSumsV2.c <- function(M) {
|
||||
stopifnot(
|
||||
is.matrix(M),
|
||||
is.numeric(M)
|
||||
)
|
||||
if (!is.double(M)) {
|
||||
M <- matrix(as.double(M), nrow = nrow(M))
|
||||
}
|
||||
|
||||
.Call('R_rowSumsV2', PACKAGE = 'benchmark', M)
|
||||
}
|
||||
rowSumsV3.c <- function(M) {
|
||||
stopifnot(
|
||||
is.matrix(M),
|
||||
is.numeric(M)
|
||||
)
|
||||
if (!is.double(M)) {
|
||||
M <- matrix(as.double(M), nrow = nrow(M))
|
||||
}
|
||||
|
||||
.Call('R_rowSumsV3', PACKAGE = 'benchmark', M)
|
||||
}
|
||||
colSums.c <- function(M) {
|
||||
stopifnot(
|
||||
is.matrix(M),
|
||||
is.numeric(M)
|
||||
)
|
||||
if (!is.double(M)) {
|
||||
M <- matrix(as.double(M), nrow = nrow(M))
|
||||
}
|
||||
|
||||
.Call('R_colSums', PACKAGE = 'benchmark', M)
|
||||
}
|
||||
rowSquareSums.c <- function(M) {
|
||||
stopifnot(
|
||||
is.matrix(M),
|
||||
is.numeric(M)
|
||||
)
|
||||
if (!is.double(M)) {
|
||||
M <- matrix(as.double(M), nrow = nrow(M))
|
||||
}
|
||||
|
||||
.Call('R_rowSquareSums', PACKAGE = 'benchmark', M)
|
||||
}
|
||||
rowSumsSymVec.c <- function(vecA, nrow, diag = 0.0) {
|
||||
stopifnot(
|
||||
is.vector(vecA),
|
||||
is.numeric(vecA),
|
||||
is.numeric(diag),
|
||||
nrow * (nrow - 1) == length(vecA) * 2
|
||||
)
|
||||
if (!is.double(vecA)) {
|
||||
vecA <- as.double(vecA)
|
||||
}
|
||||
.Call('R_rowSumsSymVec', PACKAGE = 'benchmark',
|
||||
vecA, as.integer(nrow), as.double(diag))
|
||||
}
|
||||
rowSweep.c <- function(A, v, op = '-') {
|
||||
stopifnot(
|
||||
is.matrix(A),
|
||||
is.numeric(v)
|
||||
)
|
||||
if (!is.double(A)) {
|
||||
A <- matrix(as.double(A), nrow = nrow(A))
|
||||
}
|
||||
if (!is.vector(v) || !is.double(v)) {
|
||||
v <- as.double(v)
|
||||
}
|
||||
stopifnot(
|
||||
nrow(A) == length(v),
|
||||
op %in% c('+', '-', '*', '/')
|
||||
)
|
||||
|
||||
.Call('R_rowSweep', PACKAGE = 'benchmark', A, v, op)
|
||||
}
|
||||
|
||||
## row*, col* tests ------------------------------------------------------------
|
||||
n <- 3000
|
||||
M <- matrix(runif(n * 12), n, 12)
|
||||
stopifnot(
|
||||
all.equal(rowSums(M^2), rowSums.c(M^2)),
|
||||
all.equal(colSums(M), colSums.c(M)),
|
||||
all.equal(rowSums(M), rowSumsV2.c(M)),
|
||||
all.equal(rowSums(M), rowSumsV3.c(M))
|
||||
)
|
||||
microbenchmark(
|
||||
rowSums = rowSums(M),
|
||||
rowSums.c = rowSums.c(M),
|
||||
rowSumsV2.c = rowSumsV2.c(M),
|
||||
rowSumsV3.c = rowSumsV3.c(M)
|
||||
)
|
||||
microbenchmark(
|
||||
rowSums = rowSums(M^2),
|
||||
rowSums.c = rowSums.c(M^2),
|
||||
rowSqSums.c = rowSquareSums.c(M)
|
||||
)
|
||||
microbenchmark(
|
||||
colSums = colSums(M),
|
||||
colSums.c = colSums.c(M)
|
||||
)
|
||||
|
||||
sum = rowSums(M)
|
||||
stopifnot(all.equal(
|
||||
sweep(M, 1, sum, FUN = `/`),
|
||||
rowSweep.c(M, sum, '/') # Col-Normalize)
|
||||
), all.equal(
|
||||
sweep(M, 1, sum, FUN = `/`),
|
||||
M / sum
|
||||
))
|
||||
microbenchmark(
|
||||
sweep = sweep(M, 1, sum, FUN = `/`),
|
||||
M / sum,
|
||||
rowSweep.c = rowSweep.c(M, sum, '/') # Col-Normalize)
|
||||
)
|
||||
|
||||
# Create symmetric matrix with constant diagonal entries.
|
||||
nrow <- 200
|
||||
diag <- 1.0
|
||||
Sym <- tcrossprod(runif(nrow))
|
||||
diag(Sym) <- diag
|
||||
# Get vectorized lower triangular part of `Sym` matrix.
|
||||
SymVec <- Sym[lower.tri(Sym)]
|
||||
stopifnot(all.equal(
|
||||
rowSums(Sym),
|
||||
rowSumsSymVec.c(SymVec, nrow, diag)
|
||||
))
|
||||
microbenchmark(
|
||||
rowSums = rowSums(Sym),
|
||||
rowSums.c = rowSums.c(Sym),
|
||||
rowSumsSymVec.c = rowSumsSymVec.c(SymVec, nrow, diag)
|
||||
)
|
||||
|
||||
## Matrix-Matrix opperation .call ---------------------------------------------
|
||||
transpose.c <- function(A) {
|
||||
stopifnot(
|
||||
is.matrix(A), is.numeric(A)
|
||||
)
|
||||
if (!is.double(A)) {
|
||||
A <- matrix(as.double(A), nrow(A), ncol(A))
|
||||
}
|
||||
|
||||
.Call('R_transpose', PACKAGE = 'benchmark', A)
|
||||
}
|
||||
|
||||
sympMV.c <- function(vecA, x) {
|
||||
stopifnot(
|
||||
is.vector(vecA), is.numeric(vecA),
|
||||
is.vector(x), is.numeric(x),
|
||||
length(x) * (length(x) + 1) == 2 * length(vecA)
|
||||
)
|
||||
if (!is.double(vecA)) {
|
||||
vecA <- as.double(vecA)
|
||||
}
|
||||
if (!is.double(x)) {
|
||||
x <- as.double(x)
|
||||
}
|
||||
|
||||
.Call('R_sympMV', PACKAGE = 'benchmark', vecA, x)
|
||||
}
|
||||
|
||||
matrixprod.c <- function(A, B) {
|
||||
stopifnot(
|
||||
is.matrix(A), is.numeric(A),
|
||||
is.matrix(B), is.numeric(B),
|
||||
ncol(A) == nrow(B)
|
||||
)
|
||||
if (!is.double(A)) {
|
||||
A <- matrix(as.double(A), nrow = nrow(A))
|
||||
}
|
||||
if (!is.double(B)) {
|
||||
B <- matrix(as.double(B), nrow = nrow(B))
|
||||
}
|
||||
|
||||
.Call('R_matrixprod', PACKAGE = 'benchmark', A, B)
|
||||
}
|
||||
crossprod.c <- function(A, B) {
|
||||
stopifnot(
|
||||
is.matrix(A), is.numeric(A),
|
||||
is.matrix(B), is.numeric(B),
|
||||
nrow(A) == nrow(B)
|
||||
)
|
||||
if (!is.double(A)) {
|
||||
A <- matrix(as.double(A), nrow = nrow(A))
|
||||
}
|
||||
if (!is.double(B)) {
|
||||
B <- matrix(as.double(B), nrow = nrow(B))
|
||||
}
|
||||
|
||||
.Call('R_crossprod', PACKAGE = 'benchmark', A, B)
|
||||
}
|
||||
kronecker.c <- function(A, B, op = '*') {
|
||||
stopifnot(
|
||||
is.matrix(A), is.numeric(A),
|
||||
is.matrix(B), is.numeric(B),
|
||||
is.character(op), op %in% c('*', '+', '/', '-')
|
||||
)
|
||||
if (!is.double(A)) {
|
||||
A <- matrix(as.double(A), nrow = nrow(A))
|
||||
}
|
||||
if (!is.double(B)) {
|
||||
B <- matrix(as.double(B), nrow = nrow(B))
|
||||
}
|
||||
|
||||
.Call('R_kronecker', PACKAGE = 'benchmark', A, B, op)
|
||||
}
|
||||
skewSymRank2k.c <- function(A, B, alpha = 1, beta = 0) {
|
||||
stopifnot(
|
||||
is.matrix(A), is.numeric(A),
|
||||
is.matrix(B), is.numeric(B),
|
||||
all(dim(A) == dim(B)),
|
||||
is.numeric(alpha), length(alpha) == 1L,
|
||||
is.numeric(beta), length(beta) == 1L
|
||||
)
|
||||
if (!is.double(A)) {
|
||||
A <- matrix(as.double(A), nrow = nrow(A))
|
||||
}
|
||||
if (!is.double(B)) {
|
||||
B <- matrix(as.double(B), nrow = nrow(B))
|
||||
}
|
||||
|
||||
.Call('R_skewSymRank2k', PACKAGE = 'benchmark', A, B,
|
||||
as.double(alpha), as.double(beta))
|
||||
}
|
||||
|
||||
## Matrix-Matrix opperation tests ---------------------------------------------
|
||||
n <- 200
|
||||
k <- 100
|
||||
m <- 300
|
||||
|
||||
A <- matrix(runif(n * k), n, k)
|
||||
B <- matrix(runif(k * m), k, m)
|
||||
stopifnot(
|
||||
all.equal(t(A), transpose.c(A))
|
||||
)
|
||||
microbenchmark(
|
||||
t(A),
|
||||
transpose.c(A)
|
||||
)
|
||||
|
||||
Sym <- tcrossprod(runif(n))
|
||||
vecSym <- Sym[lower.tri(Sym, diag = T)]
|
||||
x <- runif(n)
|
||||
stopifnot(all.equal(
|
||||
as.double(Sym %*% x),
|
||||
sympMV.c(vecSym, x)
|
||||
))
|
||||
microbenchmark(
|
||||
Sym %*% x,
|
||||
sympMV.c = sympMV.c(vecSym, x)
|
||||
)
|
||||
|
||||
stopifnot(
|
||||
all.equal(A %*% B, matrixprod.c(A, B))
|
||||
)
|
||||
microbenchmark(
|
||||
"%*%" = A %*% B,
|
||||
matrixprod.c = matrixprod.c(A, B)
|
||||
)
|
||||
|
||||
A <- matrix(runif(k * n), k, n)
|
||||
B <- matrix(runif(k * m), k, m)
|
||||
stopifnot(
|
||||
all.equal(crossprod(A, B), crossprod.c(A, B))
|
||||
)
|
||||
microbenchmark(
|
||||
crossprod = crossprod(A, B),
|
||||
crossprod.c = crossprod.c(A, B)
|
||||
)
|
||||
|
||||
n <- 100L
|
||||
m <- 12L
|
||||
p <- 11L
|
||||
q <- 10L
|
||||
A <- matrix(runif(n * m), n, m)
|
||||
B <- matrix(runif(p * q), p, q)
|
||||
|
||||
stopifnot(all.equal(
|
||||
kronecker(A, B),
|
||||
kronecker.c(A, B)
|
||||
))
|
||||
microbenchmark(
|
||||
kronecker = kronecker(A, B),
|
||||
kronecker.c = kronecker.c(A, B)
|
||||
)
|
||||
|
||||
n <- 12
|
||||
k <- 11
|
||||
A <- matrix(runif(n * k), n, k)
|
||||
B <- matrix(runif(n * k), n, k)
|
||||
stopifnot(all.equal(
|
||||
A %*% t(B) - B %*% t(A), skewSymRank2k.c(A, B)
|
||||
))
|
||||
microbenchmark(
|
||||
A %*% t(B) - B %*% t(A),
|
||||
skewSymRank2k.c(A, B)
|
||||
)
|
||||
|
||||
## Orthogonal projection onto null space .Call --------------------------------
|
||||
nullProj.c <- function(B) {
|
||||
stopifnot(
|
||||
is.matrix(B), is.numeric(B)
|
||||
)
|
||||
if (!is.double(B)) {
|
||||
B <- matrix(as.double(B), nrow = nrow(B))
|
||||
}
|
||||
|
||||
.Call('R_nullProj', PACKAGE = 'benchmark', B)
|
||||
}
|
||||
## Orthogonal projection onto null space tests --------------------------------
|
||||
p <- 12
|
||||
q <- 10
|
||||
V <- qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q)))
|
||||
|
||||
# Projection matrix onto `span(V)`
|
||||
Q <- diag(1, p) - tcrossprod(V, V)
|
||||
stopifnot(
|
||||
all.equal(Q, nullProj.c(V))
|
||||
)
|
||||
microbenchmark(
|
||||
nullProj = diag(1, p) - tcrossprod(V, V),
|
||||
nullProj.c = nullProj.c(V)
|
||||
)
|
||||
|
||||
# ## Kronecker optimizations ----------------------------------------------------
|
||||
# library(microbenchmark)
|
||||
|
||||
# dist.1 <- function(X_diff, Q) {
|
||||
# rowSums((X_diff %*% Q)^2)
|
||||
# }
|
||||
# dist.2 <- function(X, Q) {
|
||||
# ones <- rep(1, nrow(X))
|
||||
# proj <- X %*% Q
|
||||
# rowSums((kronecker(proj, ones) - kronecker(ones, proj))^2)
|
||||
# }
|
||||
|
||||
# n <- 400L
|
||||
# p <- 12L
|
||||
# k <- 2L
|
||||
# q <- p - k
|
||||
|
||||
# X <- matrix(rnorm(n * p), n, p)
|
||||
# Q <- diag(1, p) - tcrossprod(rnorm(p))
|
||||
# ones <- rep(1, n)
|
||||
# X_diff <- kronecker(X, ones) - kronecker(ones, X)
|
||||
|
||||
# stopifnot(all.equal(dist.1(X_diff, Q), dist.2(X, Q)))
|
||||
|
||||
# microbenchmark(
|
||||
# dist.1(X_diff, Q),
|
||||
# dist.2(X, Q),
|
||||
# times = 10L
|
||||
# )
|
||||
# # if (!persistent) {
|
||||
# # pair.index <- elem.pairs(seq(n))
|
||||
# # i <- pair.index[, 1] # `i` indices of `(i, j)` pairs
|
||||
# # j <- pair.index[, 2] # `j` indices of `(i, j)` pairs
|
||||
# # lower <- ((i - 1) * n) + j
|
||||
# # upper <- ((j - 1) * n) + i
|
||||
# # X_diff <- X[i, , drop = F] - X[j, , drop = F]
|
||||
# # }
|
||||
|
||||
# # # Projection matrix onto `span(V)`
|
||||
# # Q <- diag(1, p) - tcrossprod(V, V)
|
||||
# # # Vectorized distance matrix `D`.
|
||||
# # vecD <- rowSums((X_diff %*% Q)^2)
|
||||
|
||||
|
||||
|
||||
# ## WIP for gradient. ----------------------------------------------------------
|
||||
|
||||
grad.c <- function(X, X_diff, Y, V, h) {
|
||||
stopifnot(
|
||||
is.matrix(X), is.double(X),
|
||||
is.matrix(X_diff), is.double(X_diff),
|
||||
ncol(X_diff) == ncol(X), nrow(X_diff) == nrow(X) * (nrow(X) - 1) / 2,
|
||||
is.vector(Y) || (is.matrix(Y) && pmin(dim(Y)) == 1L), is.double(Y),
|
||||
length(Y) == nrow(X),
|
||||
is.matrix(V), is.double(V),
|
||||
nrow(V) == ncol(X),
|
||||
is.vector(h), is.numeric(h), length(h) == 1
|
||||
)
|
||||
|
||||
.Call('R_grad', PACKAGE = 'benchmark',
|
||||
X, X_diff, as.double(Y), V, as.double(h));
|
||||
}
|
||||
|
||||
elem.pairs <- function(elements) {
|
||||
# Number of elements to match.
|
||||
n <- length(elements)
|
||||
# Create all combinations.
|
||||
pairs <- rbind(rep(elements, each=n), rep(elements, n))
|
||||
# Select unique combinations without self interaction.
|
||||
return(pairs[, pairs[1, ] < pairs[2, ]])
|
||||
}
|
||||
grad <- function(X, Y, V, h, persistent = TRUE) {
|
||||
n <- nrow(X)
|
||||
p <- ncol(X)
|
||||
|
||||
if (!persistent) {
|
||||
pair.index <- elem.pairs(seq(n))
|
||||
i <- pair.index[, 1] # `i` indices of `(i, j)` pairs
|
||||
j <- pair.index[, 2] # `j` indices of `(i, j)` pairs
|
||||
lower <- ((i - 1) * n) + j
|
||||
upper <- ((j - 1) * n) + i
|
||||
X_diff <- X[i, , drop = F] - X[j, , drop = F]
|
||||
}
|
||||
|
||||
# Projection matrix onto `span(V)`
|
||||
Q <- diag(1, p) - tcrossprod(V, V)
|
||||
# Vectorized distance matrix `D`.
|
||||
vecD <- rowSums((X_diff %*% Q)^2)
|
||||
|
||||
# Create Kernel matrix (aka. apply kernel to distances)
|
||||
K <- matrix(1, n, n) # `exp(0) == 1`
|
||||
K[lower] <- exp((-0.5 / h) * vecD^2) # Set lower tri. part
|
||||
K[upper] <- t(K)[upper] # Mirror lower tri. to upper
|
||||
|
||||
# Weighted `Y` momentums
|
||||
colSumsK <- colSums(K)
|
||||
y1 <- (K %*% Y) / colSumsK
|
||||
y2 <- (K %*% Y^2) / colSumsK
|
||||
# Per example loss `L(V, X_i)`
|
||||
L <- y2 - y1^2
|
||||
|
||||
# Compute scaling vector `vecS` for `X_diff`.
|
||||
tmp <- kronecker(matrix(y1, n, 1), matrix(Y, 1, n), `-`)^2
|
||||
tmp <- as.vector(L) - tmp
|
||||
tmp <- tmp * K / colSumsK
|
||||
vecS <- (tmp + t(tmp))[lower] * vecD
|
||||
|
||||
G <- crossprod(X_diff, X_diff * vecS) %*% V
|
||||
G <- (-2 / (n * h^2)) * G
|
||||
return(G)
|
||||
}
|
||||
rStiefel <- function(p, q) {
|
||||
return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q))))
|
||||
}
|
||||
|
||||
n <- 200
|
||||
p <- 12
|
||||
q <- 10
|
||||
|
||||
X <- matrix(runif(n * p), n, p)
|
||||
Y <- runif(n)
|
||||
V <- rStiefel(p, q)
|
||||
h <- 0.1
|
||||
|
||||
pair.index <- elem.pairs(seq(n))
|
||||
i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
|
||||
j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
|
||||
lower <- ((i - 1) * n) + j
|
||||
upper <- ((j - 1) * n) + i
|
||||
X_diff <- X[i, , drop = F] - X[j, , drop = F]
|
||||
|
||||
stopifnot(all.equal(
|
||||
grad(X, Y, V, h),
|
||||
grad.c(X, X_diff, Y, V, h)
|
||||
))
|
||||
microbenchmark(
|
||||
grad = grad(X, Y, V, h),
|
||||
grad.c = grad.c(X, X_diff, Y, V, h)
|
||||
)
|
|
@ -1,510 +0,0 @@
|
|||
#include <stdlib.h>
|
||||
#include <string.h> // for `mem*` functions.
|
||||
|
||||
#include <R_ext/BLAS.h>
|
||||
#include <R_ext/Lapack.h>
|
||||
#include <R_ext/Error.h>
|
||||
// #include <Rmath.h>
|
||||
|
||||
#include "benchmark.h"
|
||||
|
||||
void rowSums(const double *A,
|
||||
const int nrow, const int ncol,
|
||||
double *sum) {
|
||||
int i, j, block_size, block_size_i;
|
||||
const double *A_block = A;
|
||||
const double *A_end = A + nrow * ncol;
|
||||
|
||||
if (nrow > 508) {
|
||||
block_size = 508;
|
||||
} else {
|
||||
block_size = nrow;
|
||||
}
|
||||
|
||||
// Iterate `(block_size_i, ncol)` submatrix blocks.
|
||||
for (i = 0; i < nrow; i += block_size_i) {
|
||||
// Reset `A` to new block beginning.
|
||||
A = A_block;
|
||||
// Take block size of eveything left and reduce to max size.
|
||||
block_size_i = nrow - i;
|
||||
if (block_size_i > block_size) {
|
||||
block_size_i = block_size;
|
||||
}
|
||||
// Copy blocks first column.
|
||||
for (j = 0; j < block_size_i; j += 4) {
|
||||
sum[j] = A[j];
|
||||
sum[j + 1] = A[j + 1];
|
||||
sum[j + 2] = A[j + 2];
|
||||
sum[j + 3] = A[j + 3];
|
||||
}
|
||||
for (; j < block_size_i; ++j) {
|
||||
sum[j] = A[j];
|
||||
}
|
||||
// Sum following columns to the first one.
|
||||
for (A += nrow; A < A_end; A += nrow) {
|
||||
for (j = 0; j < block_size_i; j += 4) {
|
||||
sum[j] += A[j];
|
||||
sum[j + 1] += A[j + 1];
|
||||
sum[j + 2] += A[j + 2];
|
||||
sum[j + 3] += A[j + 3];
|
||||
}
|
||||
for (; j < block_size_i; ++j) {
|
||||
sum[j] += A[j];
|
||||
}
|
||||
}
|
||||
// Step one block forth.
|
||||
A_block += block_size_i;
|
||||
sum += block_size_i;
|
||||
}
|
||||
}
|
||||
|
||||
void rowSumsV2(const double *A,
|
||||
const int nrow, const int ncol,
|
||||
double *sum) {
|
||||
int i, j, block_size, block_size_i;
|
||||
const double *A_block = A;
|
||||
const double *A_end = A + nrow * ncol;
|
||||
|
||||
if (nrow > CVE_MEM_CHUNK_SIZE) {
|
||||
block_size = CVE_MEM_CHUNK_SIZE;
|
||||
} else {
|
||||
block_size = nrow;
|
||||
}
|
||||
|
||||
// Iterate `(block_size_i, ncol)` submatrix blocks.
|
||||
for (i = 0; i < nrow; i += block_size_i) {
|
||||
// Reset `A` to new block beginning.
|
||||
A = A_block;
|
||||
// Take block size of eveything left and reduce to max size.
|
||||
block_size_i = nrow - i;
|
||||
if (block_size_i > block_size) {
|
||||
block_size_i = block_size;
|
||||
}
|
||||
// Compute first blocks column,
|
||||
for (j = 0; j < block_size_i; ++j) {
|
||||
sum[j] = A[j];
|
||||
}
|
||||
// and sum the following columns to the first one.
|
||||
for (A += nrow; A < A_end; A += nrow) {
|
||||
for (j = 0; j < block_size_i; ++j) {
|
||||
sum[j] += A[j];
|
||||
}
|
||||
}
|
||||
// Step one block forth.
|
||||
A_block += block_size_i;
|
||||
sum += block_size_i;
|
||||
}
|
||||
}
|
||||
void rowSumsV3(const double *A,
|
||||
const int nrow, const int ncol,
|
||||
double *sum) {
|
||||
int i, onei = 1;
|
||||
double* ones = (double*)malloc(ncol * sizeof(double));
|
||||
const double one = 1.0;
|
||||
const double zero = 0.0;
|
||||
|
||||
for (i = 0; i < ncol; ++i) {
|
||||
ones[i] = 1.0;
|
||||
}
|
||||
|
||||
matrixprod(A, nrow, ncol, ones, ncol, 1, sum);
|
||||
free(ones);
|
||||
}
|
||||
|
||||
void colSums(const double *A, const int nrow, const int ncol,
|
||||
double *sums) {
|
||||
int i, j, nrowb = 4 * (nrow / 4); // 4 dividable nrow block, biggest 4*k <= nrow.
|
||||
double sum;
|
||||
|
||||
for (j = 0; j < ncol; ++j) {
|
||||
sum = 0.0;
|
||||
for (i = 0; i < nrowb; i += 4) {
|
||||
sum += A[i]
|
||||
+ A[i + 1]
|
||||
+ A[i + 2]
|
||||
+ A[i + 3];
|
||||
}
|
||||
for (; i < nrow; ++i) {
|
||||
sum += A[i];
|
||||
}
|
||||
*(sums++) = sum;
|
||||
A += nrow;
|
||||
}
|
||||
}
|
||||
|
||||
void rowSquareSums(const double *A,
|
||||
const int nrow, const int ncol,
|
||||
double *sum) {
|
||||
int i, j, block_size, block_size_i;
|
||||
const double *A_block = A;
|
||||
const double *A_end = A + nrow * ncol;
|
||||
|
||||
if (nrow > 508) {
|
||||
block_size = 508;
|
||||
} else {
|
||||
block_size = nrow;
|
||||
}
|
||||
|
||||
// Iterate `(block_size_i, ncol)` submatrix blocks.
|
||||
for (i = 0; i < nrow; i += block_size_i) {
|
||||
// Reset `A` to new block beginning.
|
||||
A = A_block;
|
||||
// Take block size of eveything left and reduce to max size.
|
||||
block_size_i = nrow - i;
|
||||
if (block_size_i > block_size) { // TODO: contains BUG!!! floor last one !!!
|
||||
block_size_i = block_size;
|
||||
} /// ...
|
||||
// TODO:
|
||||
// Copy blocks first column.
|
||||
for (j = 0; j < block_size_i; j += 4) {
|
||||
sum[j] = A[j] * A[j];
|
||||
sum[j + 1] = A[j + 1] * A[j + 1];
|
||||
sum[j + 2] = A[j + 2] * A[j + 2];
|
||||
sum[j + 3] = A[j + 3] * A[j + 3];
|
||||
}
|
||||
for (; j < block_size_i; ++j) {
|
||||
sum[j] = A[j] * A[j];
|
||||
}
|
||||
// Sum following columns to the first one.
|
||||
for (A += nrow; A < A_end; A += nrow) {
|
||||
for (j = 0; j < block_size_i; j += 4) {
|
||||
sum[j] += A[j] * A[j];
|
||||
sum[j + 1] += A[j + 1] * A[j + 1];
|
||||
sum[j + 2] += A[j + 2] * A[j + 2];
|
||||
sum[j + 3] += A[j + 3] * A[j + 3];
|
||||
}
|
||||
for (; j < block_size_i; ++j) {
|
||||
sum[j] += A[j] * A[j];
|
||||
}
|
||||
}
|
||||
// Step one block forth.
|
||||
A_block += block_size_i;
|
||||
sum += block_size_i;
|
||||
}
|
||||
}
|
||||
|
||||
void rowSumsSymVec(const double *Avec, const int nrow,
|
||||
const double diag,
|
||||
double *sum) {
|
||||
int i, j;
|
||||
|
||||
if (diag == 0.0) {
|
||||
memset(sum, 0, nrow * sizeof(double));
|
||||
} else {
|
||||
for (i = 0; i < nrow; ++i) {
|
||||
sum[i] = diag;
|
||||
}
|
||||
}
|
||||
|
||||
for (j = 0; j < nrow; ++j) {
|
||||
for (i = j + 1; i < nrow; ++i, ++Avec) {
|
||||
sum[j] += *Avec;
|
||||
sum[i] += *Avec;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#define ROW_SWEEP_ALG(op) \
|
||||
/* Iterate `(block_size_i, ncol)` submatrix blocks. */ \
|
||||
for (i = 0; i < nrow; i += block_size_i) { \
|
||||
/* Set `A` and `C` to block beginning. */ \
|
||||
A = A_block; \
|
||||
C = C_block; \
|
||||
/* Get current block's row size. */ \
|
||||
block_size_i = nrow - i; \
|
||||
if (block_size_i > block_size) { \
|
||||
block_size_i = block_size; \
|
||||
} \
|
||||
/* Perform element wise operation for block. */ \
|
||||
for (; A < A_end; A += nrow, C += nrow) { \
|
||||
for (j = 0; j < block_size_i; ++j) { \
|
||||
C[j] = (A[j]) op (v[j]); \
|
||||
} \
|
||||
} \
|
||||
/* Step one block forth. */ \
|
||||
A_block += block_size_i; \
|
||||
C_block += block_size_i; \
|
||||
v += block_size_i; \
|
||||
}
|
||||
|
||||
/* C[, j] = A[, j] * v for each j = 1 to ncol */
|
||||
void rowSweep(const double *A, const int nrow, const int ncol,
|
||||
const char* op,
|
||||
const double *v, // vector of length nrow
|
||||
double *C) {
|
||||
int i, j, block_size, block_size_i;
|
||||
const double *A_block = A;
|
||||
double *C_block = C;
|
||||
const double *A_end = A + nrow * ncol;
|
||||
|
||||
if (nrow > CVE_MEM_CHUNK_SMALL) { // small because 3 vectors in cache
|
||||
block_size = CVE_MEM_CHUNK_SMALL;
|
||||
} else {
|
||||
block_size = nrow;
|
||||
}
|
||||
|
||||
if (*op == '+') {
|
||||
ROW_SWEEP_ALG(+)
|
||||
} else if (*op == '-') {
|
||||
ROW_SWEEP_ALG(-)
|
||||
} else if (*op == '*') {
|
||||
ROW_SWEEP_ALG(*)
|
||||
} else if (*op == '/') {
|
||||
ROW_SWEEP_ALG(/)
|
||||
} else {
|
||||
error("Got unknown 'op' (opperation) argument.");
|
||||
}
|
||||
}
|
||||
|
||||
void transpose(const double *A, const int nrow, const int ncol, double* T) {
|
||||
int i, j, len = nrow * ncol;
|
||||
|
||||
// Filling column-wise and accessing row-wise.
|
||||
for (i = 0, j = 0; i < len; ++i, j += nrow) {
|
||||
if (j >= len) {
|
||||
j -= len - 1;
|
||||
}
|
||||
T[i] = A[j];
|
||||
}
|
||||
}
|
||||
|
||||
// Symmetric Packed matrix vector product.
|
||||
// Computes
|
||||
// y <- Ax
|
||||
// where A is supplied as packed lower triangular part of a symmetric
|
||||
// matrix A. Meaning that `vecA` is `vec_ltri(A)`.
|
||||
void sympMV(const double* vecA, const int nrow, const double* x, double* y) {
|
||||
double one = 1.0;
|
||||
double zero = 0.0;
|
||||
int onei = 1;
|
||||
|
||||
F77_NAME(dspmv)("L", &nrow, &one, vecA, x, &onei, &zero, y, &onei);
|
||||
}
|
||||
|
||||
void matrixprod(const double *A, const int nrowA, const int ncolA,
|
||||
const double *B, const int nrowB, const int ncolB,
|
||||
double *C) {
|
||||
const double one = 1.0;
|
||||
const double zero = 0.0;
|
||||
|
||||
// DGEMM with parameterization:
|
||||
// C <- A %*% B
|
||||
F77_NAME(dgemm)("N", "N", &nrowA, &ncolB, &ncolA,
|
||||
&one, A, &nrowA, B, &nrowB,
|
||||
&zero, C, &nrowA);
|
||||
}
|
||||
|
||||
void crossprod(const double *A, const int nrowA, const int ncolA,
|
||||
const double *B, const int nrowB, const int ncolB,
|
||||
double *C) {
|
||||
const double one = 1.0;
|
||||
const double zero = 0.0;
|
||||
|
||||
// DGEMM with parameterization:
|
||||
// C <- t(A) %*% B
|
||||
F77_NAME(dgemm)("T", "N", &ncolA, &ncolB, &nrowA,
|
||||
&one, A, &nrowA, B, &nrowB,
|
||||
&zero, C, &ncolA);
|
||||
}
|
||||
|
||||
#define KRONECKER_ALG(op) \
|
||||
for (j = 0; j < ncolA; ++j) { \
|
||||
for (l = 0; l < ncolB; ++l) { \
|
||||
colB = B + (l * nrowB); \
|
||||
for (i = 0; i < nrowA; ++i) { \
|
||||
for (k = 0; k < nrowB; ++k) { \
|
||||
*(C++) = (A[i]) op (colB[k]); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
A += nrowA; \
|
||||
}
|
||||
|
||||
void kronecker(const double * restrict A, const int nrowA, const int ncolA,
|
||||
const double * restrict B, const int nrowB, const int ncolB,
|
||||
const char* op,
|
||||
double * restrict C) {
|
||||
int i, j, k, l;
|
||||
const double *colB;
|
||||
|
||||
if (*op == '+') {
|
||||
KRONECKER_ALG(+)
|
||||
} else if (*op == '-') {
|
||||
KRONECKER_ALG(-)
|
||||
} else if (*op == '*') {
|
||||
KRONECKER_ALG(*)
|
||||
} else if (*op == '/') {
|
||||
KRONECKER_ALG(/)
|
||||
} else {
|
||||
error("Got unknown 'op' (opperation) argument.");
|
||||
}
|
||||
}
|
||||
|
||||
void nullProj(const double *B, const int nrowB, const int ncolB,
|
||||
double *Q) {
|
||||
const double minusOne = -1.0;
|
||||
const double one = 1.0;
|
||||
|
||||
// Initialize Q as identity matrix.
|
||||
memset(Q, 0, sizeof(double) * nrowB * nrowB);
|
||||
double *Q_diag, *Q_end = Q + nrowB * nrowB;
|
||||
for (Q_diag = Q; Q_diag < Q_end; Q_diag += nrowB + 1) {
|
||||
*Q_diag = 1.0;
|
||||
}
|
||||
|
||||
// DGEMM with parameterization:
|
||||
// Q <- (-1.0 * B %*% t(B)) + Q
|
||||
F77_NAME(dgemm)("N", "T", &nrowB, &nrowB, &ncolB,
|
||||
&minusOne, B, &nrowB, B, &nrowB,
|
||||
&one, Q, &nrowB);
|
||||
}
|
||||
|
||||
void rangePairs(const int from, const int to, int *pairs) {
|
||||
int i, j;
|
||||
for (i = from; i < to; ++i) {
|
||||
for (j = i + 1; j < to; ++j) {
|
||||
pairs[0] = i;
|
||||
pairs[1] = j;
|
||||
pairs += 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// A dence skwe-symmetric rank 2 update.
|
||||
// Perform the update
|
||||
// C := alpha (A * B^T - B * A^T) + beta C
|
||||
void skewSymRank2k(const int nrow, const int ncol,
|
||||
double alpha, const double *A, const double *B,
|
||||
double beta,
|
||||
double *C) {
|
||||
F77_NAME(dgemm)("N", "T",
|
||||
&nrow, &nrow, &ncol,
|
||||
&alpha, A, &nrow, B, &nrow,
|
||||
&beta, C, &nrow);
|
||||
alpha *= -1.0;
|
||||
beta = 1.0;
|
||||
F77_NAME(dgemm)("N", "T",
|
||||
&nrow, &nrow, &ncol,
|
||||
&alpha, B, &nrow, A, &nrow,
|
||||
&beta, C, &nrow);
|
||||
}
|
||||
// TODO: clarify
|
||||
static inline double gaussKernel(const double x, const double scale) {
|
||||
return exp(scale * x * x);
|
||||
}
|
||||
|
||||
// TODO: mutch potential for optimization!!!
|
||||
static void weightedYandLoss(const int n,
|
||||
const double *Y,
|
||||
const double *vecD,
|
||||
const double *vecW,
|
||||
const double *colSums,
|
||||
double *y1, double *L, double *vecS,
|
||||
double *loss) {
|
||||
int i, j, k, N = n * (n - 1) / 2;
|
||||
double l;
|
||||
|
||||
for (i = 0; i < n; ++i) {
|
||||
y1[i] = Y[i];
|
||||
L[i] = Y[i] * Y[i];
|
||||
}
|
||||
|
||||
for (k = j = 0; j < n; ++j) {
|
||||
for (i = j + 1; i < n; ++i, ++k) {
|
||||
y1[i] += Y[j] * vecW[k];
|
||||
y1[j] += Y[i] * vecW[k];
|
||||
L[i] += Y[j] * Y[j] * vecW[k];
|
||||
L[j] += Y[i] * Y[i] * vecW[k];
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < n; ++i) {
|
||||
y1[i] /= colSums[i];
|
||||
L[i] /= colSums[i];
|
||||
}
|
||||
|
||||
l = 0.0;
|
||||
for (i = 0; i < n; ++i) {
|
||||
l += (L[i] -= y1[i] * y1[i]);
|
||||
}
|
||||
*loss = l / (double)n;
|
||||
|
||||
for (k = j = 0; j < n; ++j) {
|
||||
for (i = j + 1; i < n; ++i, ++k) {
|
||||
l = Y[j] - y1[i];
|
||||
vecS[k] = (L[i] - (l * l)) / colSums[i];
|
||||
l = Y[i] - y1[j];
|
||||
vecS[k] += (L[j] - (l * l)) / colSums[j];
|
||||
}
|
||||
}
|
||||
|
||||
for (k = 0; k < N; ++k) {
|
||||
vecS[k] *= vecW[k] * vecD[k];
|
||||
}
|
||||
}
|
||||
|
||||
void grad(const int n, const int p, const int q,
|
||||
const double *X,
|
||||
const double *X_diff,
|
||||
const double *Y,
|
||||
const double *V,
|
||||
const double h,
|
||||
double *G, double *loss) {
|
||||
// Number of X_i to X_j not trivial pairs.
|
||||
int i, N = (n * (n - 1)) / 2;
|
||||
double scale = -0.5 / h;
|
||||
|
||||
if (X_diff == (void*)0) {
|
||||
// TODO: ...
|
||||
}
|
||||
|
||||
// Allocate and compute projection matrix `Q = I_p - V * V^T`
|
||||
double *Q = (double*)malloc(p * p * sizeof(double));
|
||||
nullProj(V, p, q, Q);
|
||||
|
||||
// allocate and compute vectorized distance matrix with a temporary
|
||||
// projection of `X_diff`.
|
||||
double *vecD = (double*)malloc(N * sizeof(double));
|
||||
double *X_proj;
|
||||
if (p < 5) { // TODO: refine that!
|
||||
X_proj = (double*)malloc(N * 5 * sizeof(double));
|
||||
} else {
|
||||
X_proj = (double*)malloc(N * p * sizeof(double));
|
||||
}
|
||||
matrixprod(X_diff, N, p, Q, p, p, X_proj);
|
||||
rowSquareSums(X_proj, N, p, vecD);
|
||||
|
||||
// Apply kernel to distence vector for weights computation.
|
||||
double *vecK = X_proj; // reuse memory area, no longer needed.
|
||||
for (i = 0; i < N; ++i) {
|
||||
vecK[i] = gaussKernel(vecD[i], scale);
|
||||
}
|
||||
double *colSums = X_proj + N; // still allocated!
|
||||
rowSumsSymVec(vecK, n, 1.0, colSums); // rowSums = colSums cause Sym
|
||||
|
||||
// compute weighted responces of first end second momontum, aka y1, y2.
|
||||
double *y1 = X_proj + N + n;
|
||||
double *L = X_proj + N + (2 * n);
|
||||
// Allocate X_diff scaling vector `vecS`, not in `X_proj` mem area because
|
||||
// used symultanious to X_proj in final gradient computation.
|
||||
double *vecS = (double*)malloc(N * sizeof(double));
|
||||
weightedYandLoss(n, Y, vecD, vecK, colSums, y1, L, vecS, loss);
|
||||
|
||||
// compute the gradient using X_proj for intermidiate scaled X_diff.
|
||||
rowSweep(X_diff, N, p, "*", vecS, X_proj);
|
||||
// reuse Q which has the required dim (p, p).
|
||||
crossprod(X_diff, N, p, X_proj, N, p, Q);
|
||||
// Product with V
|
||||
matrixprod(Q, p, p, V, p, q, G);
|
||||
// And final scaling (TODO: move into matrixprod!)
|
||||
scale = -2.0 / (((double)n) * h * h);
|
||||
N = p * q;
|
||||
for (i = 0; i < N; ++i) {
|
||||
G[i] *= scale;
|
||||
}
|
||||
|
||||
free(vecS);
|
||||
free(X_proj);
|
||||
free(vecD);
|
||||
free(Q);
|
||||
}
|
|
@ -1,219 +0,0 @@
|
|||
#ifndef CVE_INCLUDE_GUARD_
|
||||
#define CVE_INCLUDE_GUARD_
|
||||
|
||||
#include <Rinternals.h>
|
||||
|
||||
#define CVE_MEM_CHUNK_SMALL 1016
|
||||
#define CVE_MEM_CHUNK_SIZE 2032
|
||||
|
||||
void rowSums(const double *A,
|
||||
const int nrow, const int ncol,
|
||||
double *sum);
|
||||
SEXP R_rowSums(SEXP A) {
|
||||
SEXP sums = PROTECT(allocVector(REALSXP, nrows(A)));
|
||||
|
||||
rowSums(REAL(A), nrows(A), ncols(A), REAL(sums));
|
||||
|
||||
UNPROTECT(1);
|
||||
return sums;
|
||||
}
|
||||
void rowSumsV2(const double *A,
|
||||
const int nrow, const int ncol,
|
||||
double *sum);
|
||||
SEXP R_rowSumsV2(SEXP A) {
|
||||
SEXP sums = PROTECT(allocVector(REALSXP, nrows(A)));
|
||||
|
||||
rowSumsV2(REAL(A), nrows(A), ncols(A), REAL(sums));
|
||||
|
||||
UNPROTECT(1);
|
||||
return sums;
|
||||
}
|
||||
void rowSumsV3(const double *A,
|
||||
const int nrow, const int ncol,
|
||||
double *sum);
|
||||
SEXP R_rowSumsV3(SEXP A) {
|
||||
SEXP sums = PROTECT(allocVector(REALSXP, nrows(A)));
|
||||
|
||||
rowSumsV3(REAL(A), nrows(A), ncols(A), REAL(sums));
|
||||
|
||||
UNPROTECT(1);
|
||||
return sums;
|
||||
}
|
||||
|
||||
void colSums(const double *A,
|
||||
const int nrow, const int ncol,
|
||||
double *sum);
|
||||
SEXP R_colSums(SEXP A) {
|
||||
SEXP sums = PROTECT(allocVector(REALSXP, ncols(A)));
|
||||
|
||||
colSums(REAL(A), nrows(A), ncols(A), REAL(sums));
|
||||
|
||||
UNPROTECT(1);
|
||||
return sums;
|
||||
}
|
||||
|
||||
void rowSquareSums(const double*, const int, const int, double*);
|
||||
SEXP R_rowSquareSums(SEXP A) {
|
||||
SEXP result = PROTECT(allocVector(REALSXP, nrows(A)));
|
||||
|
||||
rowSquareSums(REAL(A), nrows(A), ncols(A), REAL(result));
|
||||
|
||||
UNPROTECT(1);
|
||||
return result;
|
||||
}
|
||||
|
||||
void rowSumsSymVec(const double *Avec, const int nrow,
|
||||
const double diag,
|
||||
double *sum);
|
||||
SEXP R_rowSumsSymVec(SEXP Avec, SEXP nrow, SEXP diag) {
|
||||
SEXP sum = PROTECT(allocVector(REALSXP, *INTEGER(nrow)));
|
||||
|
||||
rowSumsSymVec(REAL(Avec), *INTEGER(nrow), *REAL(diag), REAL(sum));
|
||||
|
||||
UNPROTECT(1);
|
||||
return sum;
|
||||
}
|
||||
|
||||
void rowSweep(const double *A, const int nrow, const int ncol,
|
||||
const char* op,
|
||||
const double *v, // vector of length nrow
|
||||
double *C);
|
||||
SEXP R_rowSweep(SEXP A, SEXP v, SEXP op) {
|
||||
SEXP C = PROTECT(allocMatrix(REALSXP, nrows(A), ncols(A)));
|
||||
|
||||
rowSweep(REAL(A), nrows(A), ncols(A),
|
||||
CHAR(STRING_ELT(op, 0)),
|
||||
REAL(v), REAL(C));
|
||||
|
||||
UNPROTECT(1);
|
||||
return C;
|
||||
}
|
||||
|
||||
void transpose(const double *A, const int nrow, const int ncol, double* T);
|
||||
SEXP R_transpose(SEXP A) {
|
||||
SEXP T = PROTECT(allocMatrix(REALSXP, ncols(A), nrows(A)));
|
||||
|
||||
transpose(REAL(A), nrows(A), ncols(A), REAL(T));
|
||||
|
||||
UNPROTECT(1); /* T */
|
||||
return T;
|
||||
}
|
||||
|
||||
void sympMV(const double* vecA, const int nrow, const double* x, double* y);
|
||||
SEXP R_sympMV(SEXP vecA, SEXP x) {
|
||||
SEXP y = PROTECT(allocVector(REALSXP, length(x)));
|
||||
|
||||
sympMV(REAL(vecA), length(x), REAL(x), REAL(y));
|
||||
|
||||
UNPROTECT(1); /* y */
|
||||
return y;
|
||||
}
|
||||
|
||||
void matrixprod(const double *A, const int nrowA, const int ncolA,
|
||||
const double *B, const int nrowB, const int ncolB,
|
||||
double *C);
|
||||
SEXP R_matrixprod(SEXP A, SEXP B) {
|
||||
SEXP C = PROTECT(allocMatrix(REALSXP, nrows(A), ncols(B)));
|
||||
|
||||
matrixprod(REAL(A), nrows(A), ncols(A),
|
||||
REAL(B), nrows(B), ncols(B),
|
||||
REAL(C));
|
||||
|
||||
UNPROTECT(1);
|
||||
return C;
|
||||
}
|
||||
|
||||
void crossprod(const double* A, const int nrowA, const int ncolA,
|
||||
const double* B, const int ncolB, const int nrowB,
|
||||
double* C);
|
||||
SEXP R_crossprod(SEXP A, SEXP B) {
|
||||
SEXP C = PROTECT(allocMatrix(REALSXP, ncols(A), ncols(B)));
|
||||
|
||||
crossprod(REAL(A), nrows(A), ncols(A),
|
||||
REAL(B), nrows(B), ncols(B),
|
||||
REAL(C));
|
||||
|
||||
UNPROTECT(1);
|
||||
return C;
|
||||
}
|
||||
|
||||
void kronecker(const double *A, const int nrowA, const int ncolA,
|
||||
const double *B, const int nrowB, const int ncolB,
|
||||
const char *op,
|
||||
double *C);
|
||||
SEXP R_kronecker(SEXP A, SEXP B, SEXP op) {
|
||||
SEXP C = PROTECT(allocMatrix(REALSXP,
|
||||
nrows(A) * nrows(B),
|
||||
ncols(A) * ncols(B)));
|
||||
|
||||
kronecker(REAL(A), nrows(A), ncols(A),
|
||||
REAL(B), nrows(B), ncols(B),
|
||||
CHAR(STRING_ELT(op, 0)),
|
||||
REAL(C));
|
||||
|
||||
UNPROTECT(1);
|
||||
return C;
|
||||
}
|
||||
|
||||
void skewSymRank2k(const int n, const int k,
|
||||
double alpha, const double *A, const double *B,
|
||||
double beta,
|
||||
double *C);
|
||||
SEXP R_skewSymRank2k(SEXP A, SEXP B, SEXP alpha, SEXP beta) {
|
||||
SEXP C = PROTECT(allocMatrix(REALSXP, nrows(A), nrows(A)));
|
||||
memset(REAL(C), 0, nrows(A) * nrows(A) * sizeof(double));
|
||||
|
||||
skewSymRank2k(nrows(A), ncols(A),
|
||||
*REAL(alpha), REAL(A), REAL(B),
|
||||
*REAL(beta), REAL(C));
|
||||
|
||||
UNPROTECT(1);
|
||||
return C;
|
||||
}
|
||||
|
||||
void nullProj(const double* B, const int nrowB, const int ncolB,
|
||||
double* Q);
|
||||
SEXP R_nullProj(SEXP B) {
|
||||
SEXP Q = PROTECT(allocMatrix(REALSXP, nrows(B), nrows(B)));
|
||||
|
||||
nullProj(REAL(B), nrows(B), ncols(B), REAL(Q));
|
||||
|
||||
UNPROTECT(1);
|
||||
return Q;
|
||||
}
|
||||
|
||||
void rangePairs(const int from, const int to, int *pairs);
|
||||
SEXP R_rangePairs(SEXP from, SEXP to) {
|
||||
int start = asInteger(from);
|
||||
int end = asInteger(to) + 1;
|
||||
int n = end - start;
|
||||
|
||||
SEXP out = PROTECT(allocMatrix(INTSXP, 2, n * (n - 1) / 2));
|
||||
rangePairs(start, end, INTEGER(out));
|
||||
|
||||
UNPROTECT(1);
|
||||
return out;
|
||||
}
|
||||
|
||||
void grad(const int n, const int p, const int q,
|
||||
const double *X,
|
||||
const double *X_diff,
|
||||
const double *Y,
|
||||
const double *V,
|
||||
const double h,
|
||||
double *G, double *const loss);
|
||||
SEXP R_grad(SEXP X, SEXP X_diff, SEXP Y, SEXP V, SEXP h) {
|
||||
int N = (nrows(X) * (nrows(X) - 1)) / 2;
|
||||
|
||||
SEXP G = PROTECT(allocMatrix(REALSXP, nrows(V), ncols(V)));
|
||||
SEXP loss = PROTECT(allocVector(REALSXP, 1));
|
||||
|
||||
grad(nrows(X), ncols(X), ncols(V),
|
||||
REAL(X), REAL(X_diff), REAL(Y), REAL(V), *REAL(h),
|
||||
REAL(G), REAL(loss));
|
||||
|
||||
UNPROTECT(2);
|
||||
return G;
|
||||
}
|
||||
|
||||
#endif /* CVE_INCLUDE_GUARD_ */
|
Loading…
Reference in New Issue