2
0
Fork 0

change: logger parameters,

fix: gKscale in cve.c to use h^2,
add: user interrupt to cve.c,
updated test und runtime_test to new version
This commit is contained in:
Daniel Kapla 2019-11-25 20:49:43 +01:00
parent 875982a010
commit 6fbffe6d74
20 changed files with 202 additions and 226 deletions

View File

@ -13,6 +13,7 @@ export(directions)
export(elem.pairs) export(elem.pairs)
export(estimate.bandwidth) export(estimate.bandwidth)
export(null) export(null)
export(predict.dim)
export(projTangentStiefel) export(projTangentStiefel)
export(rStiefel) export(rStiefel)
export(retractStiefel) export(retractStiefel)

View File

@ -11,7 +11,6 @@
#' CVE compared to MAVE and other SDR techniques is demonstrated in simulation #' CVE compared to MAVE and other SDR techniques is demonstrated in simulation
#' studies. CVE is shown to outperform MAVE in some model set-ups, while it #' studies. CVE is shown to outperform MAVE in some model set-ups, while it
#' remains largely on par under most others. #' remains largely on par under most others.
#'
#' Let \eqn{Y} be real denotes a univariate response and \eqn{X} a real #' Let \eqn{Y} be real denotes a univariate response and \eqn{X} a real
#' \eqn{p}-dimensional covariate vector. We assume that the dependence of #' \eqn{p}-dimensional covariate vector. We assume that the dependence of
#' \eqn{Y} and \eqn{X} is modelled by #' \eqn{Y} and \eqn{X} is modelled by
@ -25,17 +24,16 @@
#' Without loss of generality \eqn{B} is assumed to be orthonormal. #' Without loss of generality \eqn{B} is assumed to be orthonormal.
#' #'
#' @author Daniel Kapla, Lukas Fertl, Bura Efstathia #' @author Daniel Kapla, Lukas Fertl, Bura Efstathia
#' @references Fertl Lukas, Bura Efstathia. Conditional Variance Estimation for #' @references Fertl Lukas, Bura Efstathia. (2019), Conditional Variance
#' Sufficient Dimension Reduction, 2019 #' Estimation for Sufficient Dimension Reduction. Working Paper.
#' #'
#' @importFrom stats model.frame
#' @docType package #' @docType package
#' @useDynLib CVE, .registration = TRUE #' @useDynLib CVE, .registration = TRUE
"_PACKAGE" "_PACKAGE"
#' Conditional Variance Estimator (CVE). #' Conditional Variance Estimator (CVE).
#' #'
#' TODO: reuse of package description and details!!!! #' @inherit CVE-package description
#' #'
#' @param formula an object of class \code{"formula"} which is a symbolic #' @param formula an object of class \code{"formula"} which is a symbolic
#' description of the model to be fitted. #' description of the model to be fitted.
@ -49,28 +47,37 @@
#' } #' }
#' @param ... Parameters passed on to \code{cve.call}. #' @param ... Parameters passed on to \code{cve.call}.
#' #'
#' @return dr is a S3 object of class \code{cve} with named properties: #' @return an S3 object of class \code{cve} with components:
#' \itemize{ #' \describe{
#' \item X: Original training data, #' \item{X}{Original training data,}
#' \item Y: Responce of original training data, #' \item{Y}{Responce of original training data,}
#' \item method: Name of used method, #' \item{method}{Name of used method,}
#' \item call: The method call #' \item{call}{the matched call,}
#' \item{res}{list of components \code{V, L, B, loss, h} and \code{k} for
#' each \eqn{k=min.dim,...,max.dim} (dimension).}
#' } #' }
#' as well as indexed entries \code{dr$res[[k]]} storing the k-dimensional SDR
#' projection matrices.
#' #'
#' @examples #' @examples
#' # create dataset #' # create dataset
#' x <- matrix(rnorm(400), 100, 4) #' x <- matrix(rnorm(400), 100, 4)
#' y <- x[, 1] + x[, 2] + as.matrix(rnorm(100)) #' y <- x[, 1] + x[, 2] + as.matrix(rnorm(100))
#' #'
#' # Call CVE using momentum. #' # Call CVE.
#' dr.momentum <- cve(y ~ x, momentum = 0.2) #' dr <- cve(y ~ x)
#' # Call weighted CVE. #' # Call weighted CVE.
#' dr.weighted <- cve(y ~ x, method = "weighted") #' dr.weighted <- cve(y ~ x, method = "weighted")
#' #'
#' # Training data responces of reduced data.
#' y.est <- directions(dr, 1)
#' # Extract SDR subspace basis of dimension 1.
#' B <- coef(dr.momentum, 1)
#'
#' @seealso For a detailed description of \code{formula} see #' @seealso For a detailed description of \code{formula} see
#' \code{\link{formula}}. #' \code{\link{formula}}.
#' @references Fertl Lukas, Bura Efstathia. (2019), Conditional Variance
#' Estimation for Sufficient Dimension Reduction. Working Paper.
#'
#' @importFrom stats model.frame
#' @export #' @export
cve <- function(formula, data, method = "simple", max.dim = 10L, ...) { cve <- function(formula, data, method = "simple", max.dim = 10L, ...) {
# check for type of `data` if supplied and set default # check for type of `data` if supplied and set default
@ -93,6 +100,9 @@ cve <- function(formula, data, method = "simple", max.dim = 10L, ...) {
return(dr) return(dr)
} }
#' @inherit cve title
#' @inherit cve description
#'
#' @param nObs parameter for choosing bandwidth \code{h} using #' @param nObs parameter for choosing bandwidth \code{h} using
#' \code{\link{estimate.bandwidth}} (ignored if \code{h} is supplied). #' \code{\link{estimate.bandwidth}} (ignored if \code{h} is supplied).
#' @param X data matrix with samples in its rows. #' @param X data matrix with samples in its rows.
@ -117,18 +127,25 @@ cve <- function(formula, data, method = "simple", max.dim = 10L, ...) {
#' @param V.init Semi-orthogonal matrix of dimensions `(ncol(X), ncol(X) - k)` #' as optimization starting value. (If supplied, \code{attempts} is #' @param V.init Semi-orthogonal matrix of dimensions `(ncol(X), ncol(X) - k)` #' as optimization starting value. (If supplied, \code{attempts} is
#' set to 1 and \code{k} to match dimension) #' set to 1 and \code{k} to match dimension)
#' #'
#' @return dr is a list which contains: #' @inherit cve return
#' \itemize{ #'
#' \item dir: dir[[d]] is the central space with d-dimension #' @examples
#' d = 1, 2, ..., p reduced direction of different dimensions #' # Create a dataset (n samples):
#' \item y: the value of response #' n <- 100
#' \item idx: the index of variables which survives after screening #' X <- matrix(rnorm(4 * n), n)
#' \item max.dim: the largest dimensions of CS or CMS which have been calculated in mave function #' Y <- matrix(X[, 1] + cos(X[, 2]) + rnorm(n, 0, .1), n)
#' \item ky: parameter used for DIM for selection #'
#' \item x: the original training data #' # Create logger function:
#' logger <- function(attempt, iter, data) {
#' if (iter == 0) {
#' cat("Starting attempt nr:", attempt, "\n")
#' }
#' cat(" iter:", iter, "loss:", data$loss, "\n")
#' } #' }
#' #'
#' @rdname cve #' Call 'cve' with logger:
#' cve(Y ~ X, logger = logger)
#'
#' @export #' @export
cve.call <- function(X, Y, method = "simple", cve.call <- function(X, Y, method = "simple",
nObs = sqrt(nrow(X)), h = NULL, nObs = sqrt(nrow(X)), h = NULL,
@ -138,16 +155,16 @@ cve.call <- function(X, Y, method = "simple",
V.init = NULL, V.init = NULL,
max.iter = 50L, attempts = 10L, max.iter = 50L, attempts = 10L,
logger = NULL) { logger = NULL) {
# get method bitmask # Determine method with partial matching (shortcuts: "Weight" -> "weighted")
methods <- list( methods <- list(
"simple" = 0L, "simple" = 0L,
"weighted" = 1L "weighted" = 1L
) )
method <- tolower(method) method_nr <- methods[[tolower(method), exact = FALSE]]
if (!(method %in% names(methods))) { if (!is.integer(method_nr)) {
stop('Got unknown method.') stop('Unable to determine method.')
} }
method_bitmask <- methods[[method]] method <- names(which(method_nr == methods))
# parameter checking # parameter checking
if (!is.numeric(momentum) || length(momentum) > 1L) { if (!is.numeric(momentum) || length(momentum) > 1L) {
@ -273,7 +290,7 @@ cve.call <- function(X, Y, method = "simple",
dr.k <- .Call('cve', PACKAGE = 'CVE', dr.k <- .Call('cve', PACKAGE = 'CVE',
X, Y, k, h, X, Y, k, h,
method_bitmask, method_nr,
V.init, V.init,
momentum, tau, tol, momentum, tau, tol,
slack, gamma, slack, gamma,

View File

@ -3,7 +3,7 @@
#' Provides sample datasets. There are 5 different datasets named #' Provides sample datasets. There are 5 different datasets named
#' M1, M2, M3, M4 and M5 described in the paper references below. #' M1, M2, M3, M4 and M5 described in the paper references below.
#' The general model is given by: #' The general model is given by:
#' \deqn{Y ~ g(B'X) + \epsilon} #' \deqn{Y = g(B'X) + \epsilon}
#' #'
#' @param name One of \code{"M1"}, \code{"M2"}, \code{"M3"}, \code{"M4"} or \code{"M5"} #' @param name One of \code{"M1"}, \code{"M2"}, \code{"M3"}, \code{"M4"} or \code{"M5"}
#' @param n nr samples #' @param n nr samples

View File

@ -3,7 +3,7 @@
#' Estimates a bandwidth \code{h} according #' Estimates a bandwidth \code{h} according
#' \deqn{% #' \deqn{%
#' h = (2 * tr(\Sigma) / p) * (1.2 * n^{-1 / (4 + k)})^2}{% #' h = (2 * tr(\Sigma) / p) * (1.2 * n^{-1 / (4 + k)})^2}{%
#' h = (2 * tr(Sigma) / p) * (1.2 * n^(-1 / (4 + k)))^2} #' h = (2 * tr(\Sigma) / p) * (1.2 * n^(\frac{-1}{4 + k}))^2}
#' with \eqn{n} the sample size, \eqn{p} its dimension #' with \eqn{n} the sample size, \eqn{p} its dimension
#' (\code{n <- nrow(X); p <- ncol(X)}) and the covariance-matrix \eqn{\Sigma} #' (\code{n <- nrow(X); p <- ncol(X)}) and the covariance-matrix \eqn{\Sigma}
#' which is \code{(n-1)/n} times the sample covariance estimate. #' which is \code{(n-1)/n} times the sample covariance estimate.

View File

@ -1,16 +1,21 @@
#' @rdname predict.dim.cve
#' @method predict.dim cve
#' @alias predict.dim.cve
#' @export
predict.dim <- function(object, ...) {
UseMethod("predict.dim")
}
#' Predicts SDR dimension using \code{\link[mda]{mars}} via a Cross-Validation. #' Predicts SDR dimension using \code{\link[mda]{mars}} via a Cross-Validation.
#' #'
#' @param object instance of class \code{cve} (result of \code{cve}, #' @param object instance of class \code{cve} (result of \code{cve},
#' \code{cve.call}). #' \code{cve.call}).
#' @param ... ignored. #' @param ... ignored.
#'
#' @return list with
#' \itemize{
#' \item MSE: Mean Square Error,
#' \item k: predicted dimensions.
#' }
#'
#' @export
predict.dim <- function(object, ...) {
UseMethod("predict.dim")
}
#' @aliases predict.dim
#' @method predict.dim cve #' @method predict.dim cve
#' @export #' @export
predict.dim.cve <- function(object, ...) { predict.dim.cve <- function(object, ...) {

View File

@ -17,8 +17,6 @@ continuous predictors and link function. The effectiveness and accuracy of
CVE compared to MAVE and other SDR techniques is demonstrated in simulation CVE compared to MAVE and other SDR techniques is demonstrated in simulation
studies. CVE is shown to outperform MAVE in some model set-ups, while it studies. CVE is shown to outperform MAVE in some model set-ups, while it
remains largely on par under most others. remains largely on par under most others.
}
\details{
Let \eqn{Y} be real denotes a univariate response and \eqn{X} a real Let \eqn{Y} be real denotes a univariate response and \eqn{X} a real
\eqn{p}-dimensional covariate vector. We assume that the dependence of \eqn{p}-dimensional covariate vector. We assume that the dependence of
\eqn{Y} and \eqn{X} is modelled by \eqn{Y} and \eqn{X} is modelled by
@ -32,8 +30,8 @@ a real \eqn{p \times k}{p x k} of rank \eqn{k <= p}{k \leq p}.
Without loss of generality \eqn{B} is assumed to be orthonormal. Without loss of generality \eqn{B} is assumed to be orthonormal.
} }
\references{ \references{
Fertl Lukas, Bura Efstathia. Conditional Variance Estimation for Fertl Lukas, Bura Efstathia. (2019), Conditional Variance
Sufficient Dimension Reduction, 2019 Estimation for Sufficient Dimension Reduction. Working Paper.
} }
\author{ \author{
Daniel Kapla, Lukas Fertl, Bura Efstathia Daniel Kapla, Lukas Fertl, Bura Efstathia

View File

@ -2,15 +2,9 @@
% Please edit documentation in R/CVE.R % Please edit documentation in R/CVE.R
\name{cve} \name{cve}
\alias{cve} \alias{cve}
\alias{cve.call}
\title{Conditional Variance Estimator (CVE).} \title{Conditional Variance Estimator (CVE).}
\usage{ \usage{
cve(formula, data, method = "simple", max.dim = 10L, ...) cve(formula, data, method = "simple", max.dim = 10L, ...)
cve.call(X, Y, method = "simple", nObs = sqrt(nrow(X)), h = NULL,
min.dim = 1L, max.dim = 10L, k = NULL, momentum = 0, tau = 1,
tol = 0.001, slack = 0, gamma = 0.5, V.init = NULL,
max.iter = 50L, attempts = 10L, logger = NULL)
} }
\arguments{ \arguments{
\item{formula}{an object of class \code{"formula"} which is a symbolic \item{formula}{an object of class \code{"formula"} which is a symbolic
@ -26,82 +20,62 @@ supplied.}
\item "weighted" variation with addaptive weighting of slices. \item "weighted" variation with addaptive weighting of slices.
}} }}
\item{max.dim}{upper bounds for \code{k}, (ignored if \code{k} is supplied).}
\item{...}{Parameters passed on to \code{cve.call}.} \item{...}{Parameters passed on to \code{cve.call}.}
\item{X}{data matrix with samples in its rows.}
\item{Y}{Responses (1 dimensional).}
\item{nObs}{parameter for choosing bandwidth \code{h} using
\code{\link{estimate.bandwidth}} (ignored if \code{h} is supplied).}
\item{h}{bandwidth or function to estimate bandwidth, defaults to internaly
estimated bandwidth.}
\item{min.dim}{lower bounds for \code{k}, (ignored if \code{k} is supplied).}
\item{k}{Dimension of lower dimensional projection, if \code{k} is given
only the specified dimension \code{B} matrix is estimated.}
\item{momentum}{number of [0, 1) giving the ration of momentum for eucledian
gradient update with a momentum term.}
\item{tau}{Initial step-size.}
\item{tol}{Tolerance for break condition.}
\item{slack}{Positive scaling to allow small increases of the loss while
optimizing.}
\item{gamma}{step-size reduction multiple.}
\item{V.init}{Semi-orthogonal matrix of dimensions `(ncol(X), ncol(X) - k)` #' as optimization starting value. (If supplied, \code{attempts} is
set to 1 and \code{k} to match dimension)}
\item{max.iter}{maximum number of optimization steps.}
\item{attempts}{number of arbitrary different starting points.}
\item{logger}{a logger function (only for advanced user, significantly slows
down the computation).}
} }
\value{ \value{
dr is a S3 object of class \code{cve} with named properties: an S3 object of class \code{cve} with components:
\itemize{ \describe{
\item X: Original training data, \item{X}{Original training data,}
\item Y: Responce of original training data, \item{Y}{Responce of original training data,}
\item method: Name of used method, \item{method}{Name of used method,}
\item call: The method call \item{call}{the matched call,}
} \item{res}{list of components \code{V, L, B, loss, h} and \code{k} for
as well as indexed entries \code{dr$res[[k]]} storing the k-dimensional SDR each \eqn{k=min.dim,...,max.dim} (dimension).}
projection matrices.
dr is a list which contains:
\itemize{
\item dir: dir[[d]] is the central space with d-dimension
d = 1, 2, ..., p reduced direction of different dimensions
\item y: the value of response
\item idx: the index of variables which survives after screening
\item max.dim: the largest dimensions of CS or CMS which have been calculated in mave function
\item ky: parameter used for DIM for selection
\item x: the original training data
} }
} }
\description{ \description{
TODO: reuse of package description and details!!!! Conditional Variance Estimation (CVE) is a novel sufficient dimension
reduction (SDR) method for regressions satisfying \eqn{E(Y|X) = E(Y|B'X)},
where \eqn{B'X} is a lower dimensional projection of the predictors. CVE,
similarly to its main competitor, the mean average variance estimation
(MAVE), is not based on inverse regression, and does not require the
restrictive linearity and constant variance conditions of moment based SDR
methods. CVE is data-driven and applies to additive error regressions with
continuous predictors and link function. The effectiveness and accuracy of
CVE compared to MAVE and other SDR techniques is demonstrated in simulation
studies. CVE is shown to outperform MAVE in some model set-ups, while it
remains largely on par under most others.
Let \eqn{Y} be real denotes a univariate response and \eqn{X} a real
\eqn{p}-dimensional covariate vector. We assume that the dependence of
\eqn{Y} and \eqn{X} is modelled by
\deqn{Y = g(B'X) + \epsilon}
where \eqn{X} is independent of \eqn{\epsilon} with positive definite
variance-covariance matrix \eqn{Var(X) = \Sigma_X}. \eqn{\epsilon} is a mean
zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g}
is an unknown, continuous non-constant function,
and \eqn{B = (b_1, ..., b_k)} is
a real \eqn{p \times k}{p x k} of rank \eqn{k <= p}{k \leq p}.
Without loss of generality \eqn{B} is assumed to be orthonormal.
} }
\examples{ \examples{
# create dataset # create dataset
x <- matrix(rnorm(400), 100, 4) x <- matrix(rnorm(400), 100, 4)
y <- x[, 1] + x[, 2] + as.matrix(rnorm(100)) y <- x[, 1] + x[, 2] + as.matrix(rnorm(100))
# Call CVE using momentum. # Call CVE.
dr.momentum <- cve(y ~ x, momentum = 0.2) dr <- cve(y ~ x)
# Call weighted CVE. # Call weighted CVE.
dr.weighted <- cve(y ~ x, method = "weighted") dr.weighted <- cve(y ~ x, method = "weighted")
# Training data responces of reduced data.
y.est <- directions(dr, 1)
# Extract SDR subspace basis of dimension 1.
B <- coef(dr.momentum, 1)
}
\references{
Fertl Lukas, Bura Efstathia. (2019), Conditional Variance
Estimation for Sufficient Dimension Reduction. Working Paper.
} }
\seealso{ \seealso{
For a detailed description of \code{formula} see For a detailed description of \code{formula} see

View File

@ -32,7 +32,7 @@ List with elements
Provides sample datasets. There are 5 different datasets named Provides sample datasets. There are 5 different datasets named
M1, M2, M3, M4 and M5 described in the paper references below. M1, M2, M3, M4 and M5 described in the paper references below.
The general model is given by: The general model is given by:
\deqn{Y ~ g(B'X) + \epsilon} \deqn{Y = g(B'X) + \epsilon}
} }
\section{M1}{ \section{M1}{

View File

@ -20,7 +20,7 @@ Estimated bandwidth \code{h}.
Estimates a bandwidth \code{h} according Estimates a bandwidth \code{h} according
\deqn{% \deqn{%
h = (2 * tr(\Sigma) / p) * (1.2 * n^{-1 / (4 + k)})^2}{% h = (2 * tr(\Sigma) / p) * (1.2 * n^{-1 / (4 + k)})^2}{%
h = (2 * tr(Sigma) / p) * (1.2 * n^(-1 / (4 + k)))^2} h = (2 * tr(\Sigma) / p) * (1.2 * n^(\frac{-1}{4 + k}))^2}
with \eqn{n} the sample size, \eqn{p} its dimension with \eqn{n} the sample size, \eqn{p} its dimension
(\code{n <- nrow(X); p <- ncol(X)}) and the covariance-matrix \eqn{\Sigma} (\code{n <- nrow(X); p <- ncol(X)}) and the covariance-matrix \eqn{\Sigma}
which is \code{(n-1)/n} times the sample covariance estimate. which is \code{(n-1)/n} times the sample covariance estimate.

View File

@ -1,20 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/predict_dim.R
\name{predict.dim}
\alias{predict.dim}
\alias{predict.dim.cve}
\title{Predicts SDR dimension using \code{\link[mda]{mars}} via a Cross-Validation.}
\usage{
\method{predict.dim}{cve}(object, ...)
\method{predict.dim}{cve}(object, ...)
}
\arguments{
\item{object}{instance of class \code{cve} (result of \code{cve},
\code{cve.call}).}
\item{...}{ignored.}
}
\description{
Predicts SDR dimension using \code{\link[mda]{mars}} via a Cross-Validation.
}

View File

@ -5,41 +5,68 @@
* optimization values for logging the optimization progress. * optimization values for logging the optimization progress.
* The supplied parameters to the logger functions are as follows: * The supplied parameters to the logger functions are as follows:
* - attempt: Attempts counter. * - attempt: Attempts counter.
* - epoch: Current epoch staring with 0 as initial epoch. * - iter: Current iter staring with 0 as initial iter.
* - L: Per X_i to X_j pair loss. * - L: Per X_i to X_j pair loss.
* - V: Current estimated SDR null space basis. * - V: Current estimated SDR null space basis.
* - tau: Step-size. * - tau: Step-size.
* - err: Error \eqn{|| V V^T - V_{tau} V_{tau}^T ||}. * - err: Error \eqn{|| V V^T - V_{tau} V_{tau}^T ||}.
* *
* @param logger Pointer to a SEXP R object representing an R function. * @param logger Pointer to a SEXP R object representing an R function.
* @param loggerEnv Pointer to a SEXP R object representing an R environment. * @param env Pointer to a SEXP R object representing an R environment.
* @param L Pointer to a SEXP R object representing an R environment.
* @param V Pointer memory area of size `nrowV * ncolV` storing `V`.
* @param nrowV Nr. of rows of `V`.
* @param ncolV Nr. of columns of `V`.
* @param G Pointer memory area of size `nrowG * ncolG` storing `G`.
* @param nrowG Nr. of rows of `G`.
* @param ncolG Nr. of columns of `G`.
* @param loss Current loss L(V).
* @param err Errof for break condition (0.0 befor first iteration).
* @param tau Current step-size.
*/ */
void callLogger(SEXP logger, SEXP env, void callLogger(SEXP logger, SEXP env,
const int attempt, const int epoch, const int attempt, const int iter,
const double* L, const int lenL, const double* L, const int lenL,
const double* V, const int nrowV, const int ncolV, const double* V, const int nrowV, const int ncolV,
const double tau) { const double* G, const int nrowG, const int ncolG,
const double loss, const double err, const double tau) {
/* Create R objects to be passed to R logger function. */ /* Create R objects to be passed to R logger function. */
// Attempt is converted from 0-indexed to 1-indexed as R index. // Attempt is converted from 0-indexed to 1-indexed as R index.
SEXP R_attempt = PROTECT(ScalarInteger(attempt + 1)); SEXP r_attempt = PROTECT(ScalarInteger(attempt + 1));
// No index shift for the epoch because the 0 epoch is before the first SEXP r_iter = PROTECT(ScalarInteger(iter + 1));
// optimization step.
SEXP R_epoch = PROTECT(ScalarInteger(epoch));
SEXP R_L = PROTECT(allocVector(REALSXP, lenL));
SEXP R_V = PROTECT(allocMatrix(REALSXP, nrowV, ncolV));
SEXP R_tau = PROTECT(ScalarReal(tau));
/* Copy data to created R objects. */ /* Create R representations of L, V and G */
memcpy(REAL(R_L), L, lenL * sizeof(double)); SEXP r_L = PROTECT(allocVector(REALSXP, lenL));
memcpy(REAL(R_V), V, nrowV * ncolV * sizeof(double)); SEXP r_V = PROTECT(allocMatrix(REALSXP, nrowV, ncolV));
SEXP r_G = PROTECT(allocMatrix(REALSXP, nrowG, ncolG));
/* Copy data to R objects */
memcpy(REAL(r_L), L, lenL * sizeof(double));
memcpy(REAL(r_V), V, nrowV * ncolV * sizeof(double));
memcpy(REAL(r_G), G, nrowG * ncolG * sizeof(double));
/* Build data list passed to logger */
SEXP data = PROTECT(allocVector(VECSXP, 6));
SET_VECTOR_ELT(data, 0, r_L);
SET_VECTOR_ELT(data, 1, r_V);
SET_VECTOR_ELT(data, 2, r_G);
SET_VECTOR_ELT(data, 3, PROTECT(ScalarReal(loss)));
SET_VECTOR_ELT(data, 4, PROTECT(ScalarReal(err < 0.0 ? NA_REAL : err)));
SET_VECTOR_ELT(data, 5, PROTECT(ScalarReal(tau)));
SEXP names = PROTECT(allocVector(STRSXP, 6));
SET_STRING_ELT(names, 0, mkChar("L"));
SET_STRING_ELT(names, 1, mkChar("V"));
SET_STRING_ELT(names, 2, mkChar("G"));
SET_STRING_ELT(names, 3, mkChar("loss"));
SET_STRING_ELT(names, 4, mkChar("err"));
SET_STRING_ELT(names, 5, mkChar("tau"));
setAttrib(data, R_NamesSymbol, names);
/* Create logger function call as R language expression. */ /* Create logger function call as R language expression. */
SEXP R_exp = PROTECT(lang6(logger, R_epoch, R_attempt, SEXP loggerCall = PROTECT(lang4(logger, r_attempt, r_iter, data));
R_L, R_V, R_tau));
/* Evaluate the logger function call expression. */ /* Evaluate the logger function call expression. */
eval(R_exp, env); eval(loggerCall, env);
/* Unprotext created R objects. */ /* Unprotect created R objects. */
UNPROTECT(6); UNPROTECT(11);
} }

View File

@ -1,3 +1,5 @@
#include <R_ext/Utils.h> // for R_CheckUserInterrupt
#include "cve.h" #include "cve.h"
// TODO: clarify // TODO: clarify
@ -18,7 +20,7 @@ void cve_sub(const int n, const int p, const int q,
int attempt = 0, iter, i, nn = (n * (n - 1)) / 2; int attempt = 0, iter, i, nn = (n * (n - 1)) / 2;
double loss, loss_last, loss_best, err, tau; double loss, loss_last, loss_best, err, tau;
double tol = tol_init * sqrt((double)(2 * q)); double tol = tol_init * sqrt((double)(2 * q));
double gKscale = -0.5 / h; double gKscale = -0.5 / (h * h);
double agility = -2.0 * (1.0 - momentum) / (h * h); double agility = -2.0 * (1.0 - momentum) / (h * h);
double c = agility / (double)n; double c = agility / (double)n;
@ -84,12 +86,6 @@ void cve_sub(const int n, const int p, const int q,
* the working memory (only intermidiate result, needed for `vecS`). */ * the working memory (only intermidiate result, needed for `vecS`). */
loss_last = cost(method, n, Y, vecK, colSums, y1, L); loss_last = cost(method, n, Y, vecK, colSums, y1, L);
if (logger) {
callLogger(logger, loggerEnv,
attempt, 0,
L, n, V, p, q, tau);
}
/* Calc the scaling vector used for final computation of grad. */ /* Calc the scaling vector used for final computation of grad. */
scaling(method, n, Y, y1, L, vecD, vecK, colSums, vecS); scaling(method, n, Y, y1, L, vecD, vecK, colSums, vecS);
@ -109,12 +105,27 @@ void cve_sub(const int n, const int p, const int q,
} }
scale(c, G, p * q); // in-place scale(c, G, p * q); // in-place
if (logger) {
callLogger(logger, loggerEnv,
attempt, /* iter <- 0L */ -1,
L, n,
V, p, q,
G, p, q,
loss_last, /* err <- NA */ -1.0, tau);
}
/* Compute Skew-Symmetric matrix `A` used in Cayley transform. /* Compute Skew-Symmetric matrix `A` used in Cayley transform.
* `A <- tau * (G V^T - V G^T) + 0 * A`*/ * `A <- tau * (G V^T - V G^T) + 0 * A`*/
skew(p, q, tau, G, V, 0.0, A); skew(p, q, tau, G, V, 0.0, A);
for (iter = 0; iter < maxIter; ++iter) { for (iter = 0; iter < maxIter; ++iter) {
/* Move V allong A */ /* Before Starting next iteration check if the Uer has requested an
* interupt (aka. ^C, or "Stop" button).
* If interrupted the algorithm will be exited here and everything
* will be discharted! */
R_CheckUserInterrupt();
/* Move `V` along the gradient direction. */
cayleyTransform(p, q, A, V, V_tau, workMem); cayleyTransform(p, q, A, V, V_tau, workMem);
/* Create projection matrix for `V_tau`. */ /* Create projection matrix for `V_tau`. */
@ -145,21 +156,24 @@ void cve_sub(const int n, const int p, const int q,
continue; continue;
} }
// Compute error, use workMem (keep first `n`, they store `y1`). /* Compute error, use workMem (keep first `n`, they store `y1`). */
skew(p, q, 1.0, V, V_tau, 0.0, workMem); skew(p, q, 1.0, V, V_tau, 0.0, workMem);
err = norm(workMem, p, p, "F"); err = norm(workMem, p, p, "F");
// Shift next step to current step and store loss to last. /* Shift next step to current step and store loss to last. */
memcpy(V, V_tau, p * q * sizeof(double)); memcpy(V, V_tau, p * q * sizeof(double));
loss_last = loss; loss_last = loss;
if (logger) { if (logger) {
callLogger(logger, loggerEnv, callLogger(logger, loggerEnv,
attempt, iter + 1, attempt, iter,
L, n, V, p, q, tau); L, n,
V, p, q,
G, p, q,
loss, err, tau);
} }
// Check Break condition. /* Check Break condition. */
if (err < tol || iter + 1 >= maxIter) { if (err < tol || iter + 1 >= maxIter) {
break; break;
} }

View File

@ -39,7 +39,8 @@ void callLogger(SEXP logger, SEXP env,
const int attempt, const int epoch, const int attempt, const int epoch,
const double* L, const int lenL, const double* L, const int lenL,
const double* V, const int nrowV, const int ncolV, const double* V, const int nrowV, const int ncolV,
const double tau); const double* G, const int nrowG, const int ncolG,
const double loss, const double err, const double tau);
/* CVE sub-routines */ /* CVE sub-routines */
int getWorkLen(const int n, const int p, const int q); int getWorkLen(const int n, const int p, const int q);

View File

@ -11,6 +11,10 @@ Doc:
- [ ] Data set descriptions and augmentations. - [ ] Data set descriptions and augmentations.
- [x] Demonstration of the `Logger` function usage (Demo file or so, ...) - [x] Demonstration of the `Logger` function usage (Demo file or so, ...)
- [ ] Update Paper (to new version / version consistent with current code!) - [ ] Update Paper (to new version / version consistent with current code!)
- [ ] Reference Paper in DESCRIPTION file (in Description or specific tag)
- [ ] Split `cve` and `cve.call` docs.
- [ ] "Copy" form `dr` package (specifically `dr.directions` -> description)
- [ ] Document `C` code.
Methods to be implemented: Methods to be implemented:
- [x] simple - [x] simple
@ -33,12 +37,18 @@ Features (functions):
- [x] `predict.dim.cve` Cross-validation or `aov` (in stats package) or "elbow" estimation - [x] `predict.dim.cve` Cross-validation or `aov` (in stats package) or "elbow" estimation
- [x] `plot.elbow` - [x] `plot.elbow`
- [x] `summary` - [x] `summary`
- [ ] Consider `cor.test` for dimension selection
- [x] Check for user interrupt (`R_CheckUserInterrupt`)
Changes: Changes:
- [x] New `estimate.bandwidth` implementation. - [x] New `estimate.bandwidth` implementation.
(h = 2 * (tr(\Sigma) / p) * (6/5 * n^(-1 / (4 + k)))^2, (h = 2 * (tr(\Sigma) / p) * (6/5 * n^(-1 / (4 + k)))^2,
\Sigma = 1/n * (X-mean)'(X-mean)) \Sigma = 1/n * (X-mean)'(X-mean))
Errors:
- [x] `CVE_C` compare to `CVE_legacy`.
- [x] fix: `predict.dim` not found.
# Development # Development
## Build and install. ## Build and install.
To build the package the `devtools` package is used. This also provides `roxygen2` which is used for documentation and automatic creation of the `NAMESPACE` file. To build the package the `devtools` package is used. This also provides `roxygen2` which is used for documentation and automatic creation of the `NAMESPACE` file.

View File

@ -1,43 +0,0 @@
## Installing CVE (C implementation)
(setwd('~/Projects/CVE/CVE_C'))
# equiv to Rcpp::compileAttributes().
library(devtools)
pkgbuild::compile_dll()
document() # See bug: https://github.com/stan-dev/rstantools/issues/52
pkgbuild::clean_dll()
(path <- build(vignettes = FALSE))
install.packages(path, repos = NULL, type = "source")
library(CVE)
## Installing CVEpureR
(setwd('~/Projects/CVE/CVE_R'))
library(devtools)
document() # See bug: https://github.com/stan-dev/rstantools/issues/52
(path <- build(vignettes = FALSE))
install.packages(path, repos = NULL, type = "source")
library(CVEpureR)
ds <- dataset("M1")
gc()
path <- '~/Projects/CVE/tmp/R.prof'
Rprof(path, append = F, line.profiling = T)
cve.res <- cve.call(ds$X, ds$Y, k = ncol(ds$B)) # , method = "linesearch"
Rprof(NULL)
(prof <- summaryRprof(path)) # , lines = "both"))
cve.res[[ncol(ds$B)]]$loss
X <- ds$X
Y <- ds$Y
k <- ncol(ds$B)
system.time(
cve.res <- cve(Y ~ X, k = k)
)
system.time(
cve.res <- cve(Y ~ X, k = k, method = "sgd", tau = 0.01, batch.size = 32L)
)
system.time(
cve.res <- cve(Y ~ X, k = k, method = "linesearch")
)

View File

@ -95,7 +95,7 @@ for (sim in 1:SIM.NR) {
attempts = ATTEMPTS attempts = ATTEMPTS
) )
) )
dr$B <- basis(dr, truedim) dr$B <- coef(dr, truedim)
} }
key <- paste0(name, '-', method) key <- paste0(name, '-', method)

22
test.R
View File

@ -13,28 +13,20 @@ if (length(args) > 1L) {
max.iter <- 50L max.iter <- 50L
attempts <- 25L attempts <- 25L
# library(CVEpureR)
# path <- paste0('~/Projects/CVE/tmp/logger_', method, '.R.pdf')
library(CVE) library(CVE)
path <- paste0('~/Projects/CVE/tmp/logger_', method, '.C.pdf') path <- paste0('~/Projects/CVE/tmp/logger_', method, '_', momentum, '.C.pdf')
# Define logger for `cve()` method. # Define logger for `cve()` method.
logger <- function(epoch, attempt, L, V, tau) { logger <- function(iter, attempt, data) {
# Note the `<<-` assignement! # Note the `<<-` assignement!
loss.history[epoch + 1, attempt] <<- mean(L) loss.history[iter + 1, attempt] <<- data$loss
if (epoch == 0) { error.history[iter + 1, attempt] <<- if (data$err > 0) data$err else NA
error <- NA tau.history[iter + 1, attempt] <<- data$tau
} else {
error <- norm(V %*% t(V) - V_last %*% t(V_last), type = 'F')
}
V_last <<- V
error.history[epoch + 1, attempt] <<- error
tau.history[epoch + 1, attempt] <<- tau
# Compute true error by comparing to the true `B` # Compute true error by comparing to the true `B`
B.est <- null(V) # Function provided by CVE B.est <- null(data$V) # Function provided by CVE
P.est <- B.est %*% solve(t(B.est) %*% B.est) %*% t(B.est) P.est <- B.est %*% solve(t(B.est) %*% B.est) %*% t(B.est)
true.error <- norm(P - P.est, 'F') / sqrt(2 * k) true.error <- norm(P - P.est, 'F') / sqrt(2 * k)
true.error.history[epoch + 1, attempt] <<- true.error true.error.history[iter + 1, attempt] <<- true.error
} }
pdf(path) pdf(path)