change: logger parameters,
fix: gKscale in cve.c to use h^2, add: user interrupt to cve.c, updated test und runtime_test to new version
This commit is contained in:
parent
875982a010
commit
6fbffe6d74
|
@ -13,6 +13,7 @@ export(directions)
|
|||
export(elem.pairs)
|
||||
export(estimate.bandwidth)
|
||||
export(null)
|
||||
export(predict.dim)
|
||||
export(projTangentStiefel)
|
||||
export(rStiefel)
|
||||
export(retractStiefel)
|
||||
|
|
|
@ -11,7 +11,6 @@
|
|||
#' CVE compared to MAVE and other SDR techniques is demonstrated in simulation
|
||||
#' studies. CVE is shown to outperform MAVE in some model set-ups, while it
|
||||
#' remains largely on par under most others.
|
||||
#'
|
||||
#' Let \eqn{Y} be real denotes a univariate response and \eqn{X} a real
|
||||
#' \eqn{p}-dimensional covariate vector. We assume that the dependence of
|
||||
#' \eqn{Y} and \eqn{X} is modelled by
|
||||
|
@ -25,17 +24,16 @@
|
|||
#' Without loss of generality \eqn{B} is assumed to be orthonormal.
|
||||
#'
|
||||
#' @author Daniel Kapla, Lukas Fertl, Bura Efstathia
|
||||
#' @references Fertl Lukas, Bura Efstathia. Conditional Variance Estimation for
|
||||
#' Sufficient Dimension Reduction, 2019
|
||||
#' @references Fertl Lukas, Bura Efstathia. (2019), Conditional Variance
|
||||
#' Estimation for Sufficient Dimension Reduction. Working Paper.
|
||||
#'
|
||||
#' @importFrom stats model.frame
|
||||
#' @docType package
|
||||
#' @useDynLib CVE, .registration = TRUE
|
||||
"_PACKAGE"
|
||||
|
||||
#' Conditional Variance Estimator (CVE).
|
||||
#'
|
||||
#' TODO: reuse of package description and details!!!!
|
||||
#' @inherit CVE-package description
|
||||
#'
|
||||
#' @param formula an object of class \code{"formula"} which is a symbolic
|
||||
#' description of the model to be fitted.
|
||||
|
@ -49,28 +47,37 @@
|
|||
#' }
|
||||
#' @param ... Parameters passed on to \code{cve.call}.
|
||||
#'
|
||||
#' @return dr is a S3 object of class \code{cve} with named properties:
|
||||
#' \itemize{
|
||||
#' \item X: Original training data,
|
||||
#' \item Y: Responce of original training data,
|
||||
#' \item method: Name of used method,
|
||||
#' \item call: The method call
|
||||
#' @return an S3 object of class \code{cve} with components:
|
||||
#' \describe{
|
||||
#' \item{X}{Original training data,}
|
||||
#' \item{Y}{Responce of original training data,}
|
||||
#' \item{method}{Name of used method,}
|
||||
#' \item{call}{the matched call,}
|
||||
#' \item{res}{list of components \code{V, L, B, loss, h} and \code{k} for
|
||||
#' each \eqn{k=min.dim,...,max.dim} (dimension).}
|
||||
#' }
|
||||
#' as well as indexed entries \code{dr$res[[k]]} storing the k-dimensional SDR
|
||||
#' projection matrices.
|
||||
#'
|
||||
#' @examples
|
||||
#' # create dataset
|
||||
#' x <- matrix(rnorm(400), 100, 4)
|
||||
#' y <- x[, 1] + x[, 2] + as.matrix(rnorm(100))
|
||||
#'
|
||||
#' # Call CVE using momentum.
|
||||
#' dr.momentum <- cve(y ~ x, momentum = 0.2)
|
||||
#' # Call CVE.
|
||||
#' dr <- cve(y ~ x)
|
||||
#' # Call weighted CVE.
|
||||
#' dr.weighted <- cve(y ~ x, method = "weighted")
|
||||
#'
|
||||
#' # Training data responces of reduced data.
|
||||
#' y.est <- directions(dr, 1)
|
||||
#' # Extract SDR subspace basis of dimension 1.
|
||||
#' B <- coef(dr.momentum, 1)
|
||||
#'
|
||||
#' @seealso For a detailed description of \code{formula} see
|
||||
#' \code{\link{formula}}.
|
||||
#' @references Fertl Lukas, Bura Efstathia. (2019), Conditional Variance
|
||||
#' Estimation for Sufficient Dimension Reduction. Working Paper.
|
||||
#'
|
||||
#' @importFrom stats model.frame
|
||||
#' @export
|
||||
cve <- function(formula, data, method = "simple", max.dim = 10L, ...) {
|
||||
# check for type of `data` if supplied and set default
|
||||
|
@ -93,6 +100,9 @@ cve <- function(formula, data, method = "simple", max.dim = 10L, ...) {
|
|||
return(dr)
|
||||
}
|
||||
|
||||
#' @inherit cve title
|
||||
#' @inherit cve description
|
||||
#'
|
||||
#' @param nObs parameter for choosing bandwidth \code{h} using
|
||||
#' \code{\link{estimate.bandwidth}} (ignored if \code{h} is supplied).
|
||||
#' @param X data matrix with samples in its rows.
|
||||
|
@ -117,18 +127,25 @@ cve <- function(formula, data, method = "simple", max.dim = 10L, ...) {
|
|||
#' @param V.init Semi-orthogonal matrix of dimensions `(ncol(X), ncol(X) - k)` #' as optimization starting value. (If supplied, \code{attempts} is
|
||||
#' set to 1 and \code{k} to match dimension)
|
||||
#'
|
||||
#' @return dr is a list which contains:
|
||||
#' \itemize{
|
||||
#' \item dir: dir[[d]] is the central space with d-dimension
|
||||
#' d = 1, 2, ..., p reduced direction of different dimensions
|
||||
#' \item y: the value of response
|
||||
#' \item idx: the index of variables which survives after screening
|
||||
#' \item max.dim: the largest dimensions of CS or CMS which have been calculated in mave function
|
||||
#' \item ky: parameter used for DIM for selection
|
||||
#' \item x: the original training data
|
||||
#' @inherit cve return
|
||||
#'
|
||||
#' @examples
|
||||
#' # Create a dataset (n samples):
|
||||
#' n <- 100
|
||||
#' X <- matrix(rnorm(4 * n), n)
|
||||
#' Y <- matrix(X[, 1] + cos(X[, 2]) + rnorm(n, 0, .1), n)
|
||||
#'
|
||||
#' # Create logger function:
|
||||
#' logger <- function(attempt, iter, data) {
|
||||
#' if (iter == 0) {
|
||||
#' cat("Starting attempt nr:", attempt, "\n")
|
||||
#' }
|
||||
#' cat(" iter:", iter, "loss:", data$loss, "\n")
|
||||
#' }
|
||||
#'
|
||||
#' @rdname cve
|
||||
#' Call 'cve' with logger:
|
||||
#' cve(Y ~ X, logger = logger)
|
||||
#'
|
||||
#' @export
|
||||
cve.call <- function(X, Y, method = "simple",
|
||||
nObs = sqrt(nrow(X)), h = NULL,
|
||||
|
@ -138,16 +155,16 @@ cve.call <- function(X, Y, method = "simple",
|
|||
V.init = NULL,
|
||||
max.iter = 50L, attempts = 10L,
|
||||
logger = NULL) {
|
||||
# get method bitmask
|
||||
# Determine method with partial matching (shortcuts: "Weight" -> "weighted")
|
||||
methods <- list(
|
||||
"simple" = 0L,
|
||||
"weighted" = 1L
|
||||
)
|
||||
method <- tolower(method)
|
||||
if (!(method %in% names(methods))) {
|
||||
stop('Got unknown method.')
|
||||
method_nr <- methods[[tolower(method), exact = FALSE]]
|
||||
if (!is.integer(method_nr)) {
|
||||
stop('Unable to determine method.')
|
||||
}
|
||||
method_bitmask <- methods[[method]]
|
||||
method <- names(which(method_nr == methods))
|
||||
|
||||
# parameter checking
|
||||
if (!is.numeric(momentum) || length(momentum) > 1L) {
|
||||
|
@ -273,7 +290,7 @@ cve.call <- function(X, Y, method = "simple",
|
|||
|
||||
dr.k <- .Call('cve', PACKAGE = 'CVE',
|
||||
X, Y, k, h,
|
||||
method_bitmask,
|
||||
method_nr,
|
||||
V.init,
|
||||
momentum, tau, tol,
|
||||
slack, gamma,
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
#' Provides sample datasets. There are 5 different datasets named
|
||||
#' M1, M2, M3, M4 and M5 described in the paper references below.
|
||||
#' The general model is given by:
|
||||
#' \deqn{Y ~ g(B'X) + \epsilon}
|
||||
#' \deqn{Y = g(B'X) + \epsilon}
|
||||
#'
|
||||
#' @param name One of \code{"M1"}, \code{"M2"}, \code{"M3"}, \code{"M4"} or \code{"M5"}
|
||||
#' @param n nr samples
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
#' Estimates a bandwidth \code{h} according
|
||||
#' \deqn{%
|
||||
#' h = (2 * tr(\Sigma) / p) * (1.2 * n^{-1 / (4 + k)})^2}{%
|
||||
#' h = (2 * tr(Sigma) / p) * (1.2 * n^(-1 / (4 + k)))^2}
|
||||
#' h = (2 * tr(\Sigma) / p) * (1.2 * n^(\frac{-1}{4 + k}))^2}
|
||||
#' with \eqn{n} the sample size, \eqn{p} its dimension
|
||||
#' (\code{n <- nrow(X); p <- ncol(X)}) and the covariance-matrix \eqn{\Sigma}
|
||||
#' which is \code{(n-1)/n} times the sample covariance estimate.
|
||||
|
|
|
@ -1,16 +1,21 @@
|
|||
#' @rdname predict.dim.cve
|
||||
#' @method predict.dim cve
|
||||
#' @alias predict.dim.cve
|
||||
#' @export
|
||||
predict.dim <- function(object, ...) {
|
||||
UseMethod("predict.dim")
|
||||
}
|
||||
|
||||
#' Predicts SDR dimension using \code{\link[mda]{mars}} via a Cross-Validation.
|
||||
#'
|
||||
#' @param object instance of class \code{cve} (result of \code{cve},
|
||||
#' \code{cve.call}).
|
||||
#' @param ... ignored.
|
||||
#'
|
||||
#' @return list with
|
||||
#' \itemize{
|
||||
#' \item MSE: Mean Square Error,
|
||||
#' \item k: predicted dimensions.
|
||||
#' }
|
||||
#'
|
||||
#' @export
|
||||
predict.dim <- function(object, ...) {
|
||||
UseMethod("predict.dim")
|
||||
}
|
||||
|
||||
#' @aliases predict.dim
|
||||
#' @method predict.dim cve
|
||||
#' @export
|
||||
predict.dim.cve <- function(object, ...) {
|
||||
|
|
|
@ -17,8 +17,6 @@ continuous predictors and link function. The effectiveness and accuracy of
|
|||
CVE compared to MAVE and other SDR techniques is demonstrated in simulation
|
||||
studies. CVE is shown to outperform MAVE in some model set-ups, while it
|
||||
remains largely on par under most others.
|
||||
}
|
||||
\details{
|
||||
Let \eqn{Y} be real denotes a univariate response and \eqn{X} a real
|
||||
\eqn{p}-dimensional covariate vector. We assume that the dependence of
|
||||
\eqn{Y} and \eqn{X} is modelled by
|
||||
|
@ -32,8 +30,8 @@ a real \eqn{p \times k}{p x k} of rank \eqn{k <= p}{k \leq p}.
|
|||
Without loss of generality \eqn{B} is assumed to be orthonormal.
|
||||
}
|
||||
\references{
|
||||
Fertl Lukas, Bura Efstathia. Conditional Variance Estimation for
|
||||
Sufficient Dimension Reduction, 2019
|
||||
Fertl Lukas, Bura Efstathia. (2019), Conditional Variance
|
||||
Estimation for Sufficient Dimension Reduction. Working Paper.
|
||||
}
|
||||
\author{
|
||||
Daniel Kapla, Lukas Fertl, Bura Efstathia
|
||||
|
|
108
CVE_C/man/cve.Rd
108
CVE_C/man/cve.Rd
|
@ -2,15 +2,9 @@
|
|||
% Please edit documentation in R/CVE.R
|
||||
\name{cve}
|
||||
\alias{cve}
|
||||
\alias{cve.call}
|
||||
\title{Conditional Variance Estimator (CVE).}
|
||||
\usage{
|
||||
cve(formula, data, method = "simple", max.dim = 10L, ...)
|
||||
|
||||
cve.call(X, Y, method = "simple", nObs = sqrt(nrow(X)), h = NULL,
|
||||
min.dim = 1L, max.dim = 10L, k = NULL, momentum = 0, tau = 1,
|
||||
tol = 0.001, slack = 0, gamma = 0.5, V.init = NULL,
|
||||
max.iter = 50L, attempts = 10L, logger = NULL)
|
||||
}
|
||||
\arguments{
|
||||
\item{formula}{an object of class \code{"formula"} which is a symbolic
|
||||
|
@ -26,82 +20,62 @@ supplied.}
|
|||
\item "weighted" variation with addaptive weighting of slices.
|
||||
}}
|
||||
|
||||
\item{max.dim}{upper bounds for \code{k}, (ignored if \code{k} is supplied).}
|
||||
|
||||
\item{...}{Parameters passed on to \code{cve.call}.}
|
||||
|
||||
\item{X}{data matrix with samples in its rows.}
|
||||
|
||||
\item{Y}{Responses (1 dimensional).}
|
||||
|
||||
\item{nObs}{parameter for choosing bandwidth \code{h} using
|
||||
\code{\link{estimate.bandwidth}} (ignored if \code{h} is supplied).}
|
||||
|
||||
\item{h}{bandwidth or function to estimate bandwidth, defaults to internaly
|
||||
estimated bandwidth.}
|
||||
|
||||
\item{min.dim}{lower bounds for \code{k}, (ignored if \code{k} is supplied).}
|
||||
|
||||
\item{k}{Dimension of lower dimensional projection, if \code{k} is given
|
||||
only the specified dimension \code{B} matrix is estimated.}
|
||||
|
||||
\item{momentum}{number of [0, 1) giving the ration of momentum for eucledian
|
||||
gradient update with a momentum term.}
|
||||
|
||||
\item{tau}{Initial step-size.}
|
||||
|
||||
\item{tol}{Tolerance for break condition.}
|
||||
|
||||
\item{slack}{Positive scaling to allow small increases of the loss while
|
||||
optimizing.}
|
||||
|
||||
\item{gamma}{step-size reduction multiple.}
|
||||
|
||||
\item{V.init}{Semi-orthogonal matrix of dimensions `(ncol(X), ncol(X) - k)` #' as optimization starting value. (If supplied, \code{attempts} is
|
||||
set to 1 and \code{k} to match dimension)}
|
||||
|
||||
\item{max.iter}{maximum number of optimization steps.}
|
||||
|
||||
\item{attempts}{number of arbitrary different starting points.}
|
||||
|
||||
\item{logger}{a logger function (only for advanced user, significantly slows
|
||||
down the computation).}
|
||||
}
|
||||
\value{
|
||||
dr is a S3 object of class \code{cve} with named properties:
|
||||
\itemize{
|
||||
\item X: Original training data,
|
||||
\item Y: Responce of original training data,
|
||||
\item method: Name of used method,
|
||||
\item call: The method call
|
||||
}
|
||||
as well as indexed entries \code{dr$res[[k]]} storing the k-dimensional SDR
|
||||
projection matrices.
|
||||
|
||||
dr is a list which contains:
|
||||
\itemize{
|
||||
\item dir: dir[[d]] is the central space with d-dimension
|
||||
d = 1, 2, ..., p reduced direction of different dimensions
|
||||
\item y: the value of response
|
||||
\item idx: the index of variables which survives after screening
|
||||
\item max.dim: the largest dimensions of CS or CMS which have been calculated in mave function
|
||||
\item ky: parameter used for DIM for selection
|
||||
\item x: the original training data
|
||||
an S3 object of class \code{cve} with components:
|
||||
\describe{
|
||||
\item{X}{Original training data,}
|
||||
\item{Y}{Responce of original training data,}
|
||||
\item{method}{Name of used method,}
|
||||
\item{call}{the matched call,}
|
||||
\item{res}{list of components \code{V, L, B, loss, h} and \code{k} for
|
||||
each \eqn{k=min.dim,...,max.dim} (dimension).}
|
||||
}
|
||||
}
|
||||
\description{
|
||||
TODO: reuse of package description and details!!!!
|
||||
Conditional Variance Estimation (CVE) is a novel sufficient dimension
|
||||
reduction (SDR) method for regressions satisfying \eqn{E(Y|X) = E(Y|B'X)},
|
||||
where \eqn{B'X} is a lower dimensional projection of the predictors. CVE,
|
||||
similarly to its main competitor, the mean average variance estimation
|
||||
(MAVE), is not based on inverse regression, and does not require the
|
||||
restrictive linearity and constant variance conditions of moment based SDR
|
||||
methods. CVE is data-driven and applies to additive error regressions with
|
||||
continuous predictors and link function. The effectiveness and accuracy of
|
||||
CVE compared to MAVE and other SDR techniques is demonstrated in simulation
|
||||
studies. CVE is shown to outperform MAVE in some model set-ups, while it
|
||||
remains largely on par under most others.
|
||||
Let \eqn{Y} be real denotes a univariate response and \eqn{X} a real
|
||||
\eqn{p}-dimensional covariate vector. We assume that the dependence of
|
||||
\eqn{Y} and \eqn{X} is modelled by
|
||||
\deqn{Y = g(B'X) + \epsilon}
|
||||
where \eqn{X} is independent of \eqn{\epsilon} with positive definite
|
||||
variance-covariance matrix \eqn{Var(X) = \Sigma_X}. \eqn{\epsilon} is a mean
|
||||
zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g}
|
||||
is an unknown, continuous non-constant function,
|
||||
and \eqn{B = (b_1, ..., b_k)} is
|
||||
a real \eqn{p \times k}{p x k} of rank \eqn{k <= p}{k \leq p}.
|
||||
Without loss of generality \eqn{B} is assumed to be orthonormal.
|
||||
}
|
||||
\examples{
|
||||
# create dataset
|
||||
x <- matrix(rnorm(400), 100, 4)
|
||||
y <- x[, 1] + x[, 2] + as.matrix(rnorm(100))
|
||||
|
||||
# Call CVE using momentum.
|
||||
dr.momentum <- cve(y ~ x, momentum = 0.2)
|
||||
# Call CVE.
|
||||
dr <- cve(y ~ x)
|
||||
# Call weighted CVE.
|
||||
dr.weighted <- cve(y ~ x, method = "weighted")
|
||||
|
||||
# Training data responces of reduced data.
|
||||
y.est <- directions(dr, 1)
|
||||
# Extract SDR subspace basis of dimension 1.
|
||||
B <- coef(dr.momentum, 1)
|
||||
|
||||
}
|
||||
\references{
|
||||
Fertl Lukas, Bura Efstathia. (2019), Conditional Variance
|
||||
Estimation for Sufficient Dimension Reduction. Working Paper.
|
||||
}
|
||||
\seealso{
|
||||
For a detailed description of \code{formula} see
|
||||
|
|
|
@ -32,7 +32,7 @@ List with elements
|
|||
Provides sample datasets. There are 5 different datasets named
|
||||
M1, M2, M3, M4 and M5 described in the paper references below.
|
||||
The general model is given by:
|
||||
\deqn{Y ~ g(B'X) + \epsilon}
|
||||
\deqn{Y = g(B'X) + \epsilon}
|
||||
}
|
||||
\section{M1}{
|
||||
|
||||
|
|
|
@ -20,7 +20,7 @@ Estimated bandwidth \code{h}.
|
|||
Estimates a bandwidth \code{h} according
|
||||
\deqn{%
|
||||
h = (2 * tr(\Sigma) / p) * (1.2 * n^{-1 / (4 + k)})^2}{%
|
||||
h = (2 * tr(Sigma) / p) * (1.2 * n^(-1 / (4 + k)))^2}
|
||||
h = (2 * tr(\Sigma) / p) * (1.2 * n^(\frac{-1}{4 + k}))^2}
|
||||
with \eqn{n} the sample size, \eqn{p} its dimension
|
||||
(\code{n <- nrow(X); p <- ncol(X)}) and the covariance-matrix \eqn{\Sigma}
|
||||
which is \code{(n-1)/n} times the sample covariance estimate.
|
||||
|
|
|
@ -1,20 +0,0 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/predict_dim.R
|
||||
\name{predict.dim}
|
||||
\alias{predict.dim}
|
||||
\alias{predict.dim.cve}
|
||||
\title{Predicts SDR dimension using \code{\link[mda]{mars}} via a Cross-Validation.}
|
||||
\usage{
|
||||
\method{predict.dim}{cve}(object, ...)
|
||||
|
||||
\method{predict.dim}{cve}(object, ...)
|
||||
}
|
||||
\arguments{
|
||||
\item{object}{instance of class \code{cve} (result of \code{cve},
|
||||
\code{cve.call}).}
|
||||
|
||||
\item{...}{ignored.}
|
||||
}
|
||||
\description{
|
||||
Predicts SDR dimension using \code{\link[mda]{mars}} via a Cross-Validation.
|
||||
}
|
|
@ -5,41 +5,68 @@
|
|||
* optimization values for logging the optimization progress.
|
||||
* The supplied parameters to the logger functions are as follows:
|
||||
* - attempt: Attempts counter.
|
||||
* - epoch: Current epoch staring with 0 as initial epoch.
|
||||
* - iter: Current iter staring with 0 as initial iter.
|
||||
* - L: Per X_i to X_j pair loss.
|
||||
* - V: Current estimated SDR null space basis.
|
||||
* - tau: Step-size.
|
||||
* - err: Error \eqn{|| V V^T - V_{tau} V_{tau}^T ||}.
|
||||
*
|
||||
* @param logger Pointer to a SEXP R object representing an R function.
|
||||
* @param loggerEnv Pointer to a SEXP R object representing an R environment.
|
||||
* @param env Pointer to a SEXP R object representing an R environment.
|
||||
* @param L Pointer to a SEXP R object representing an R environment.
|
||||
* @param V Pointer memory area of size `nrowV * ncolV` storing `V`.
|
||||
* @param nrowV Nr. of rows of `V`.
|
||||
* @param ncolV Nr. of columns of `V`.
|
||||
* @param G Pointer memory area of size `nrowG * ncolG` storing `G`.
|
||||
* @param nrowG Nr. of rows of `G`.
|
||||
* @param ncolG Nr. of columns of `G`.
|
||||
* @param loss Current loss L(V).
|
||||
* @param err Errof for break condition (0.0 befor first iteration).
|
||||
* @param tau Current step-size.
|
||||
*/
|
||||
void callLogger(SEXP logger, SEXP env,
|
||||
const int attempt, const int epoch,
|
||||
const int attempt, const int iter,
|
||||
const double* L, const int lenL,
|
||||
const double* V, const int nrowV, const int ncolV,
|
||||
const double tau) {
|
||||
const double* G, const int nrowG, const int ncolG,
|
||||
const double loss, const double err, const double tau) {
|
||||
/* Create R objects to be passed to R logger function. */
|
||||
// Attempt is converted from 0-indexed to 1-indexed as R index.
|
||||
SEXP R_attempt = PROTECT(ScalarInteger(attempt + 1));
|
||||
// No index shift for the epoch because the 0 epoch is before the first
|
||||
// optimization step.
|
||||
SEXP R_epoch = PROTECT(ScalarInteger(epoch));
|
||||
SEXP R_L = PROTECT(allocVector(REALSXP, lenL));
|
||||
SEXP R_V = PROTECT(allocMatrix(REALSXP, nrowV, ncolV));
|
||||
SEXP R_tau = PROTECT(ScalarReal(tau));
|
||||
SEXP r_attempt = PROTECT(ScalarInteger(attempt + 1));
|
||||
SEXP r_iter = PROTECT(ScalarInteger(iter + 1));
|
||||
|
||||
/* Copy data to created R objects. */
|
||||
memcpy(REAL(R_L), L, lenL * sizeof(double));
|
||||
memcpy(REAL(R_V), V, nrowV * ncolV * sizeof(double));
|
||||
/* Create R representations of L, V and G */
|
||||
SEXP r_L = PROTECT(allocVector(REALSXP, lenL));
|
||||
SEXP r_V = PROTECT(allocMatrix(REALSXP, nrowV, ncolV));
|
||||
SEXP r_G = PROTECT(allocMatrix(REALSXP, nrowG, ncolG));
|
||||
/* Copy data to R objects */
|
||||
memcpy(REAL(r_L), L, lenL * sizeof(double));
|
||||
memcpy(REAL(r_V), V, nrowV * ncolV * sizeof(double));
|
||||
memcpy(REAL(r_G), G, nrowG * ncolG * sizeof(double));
|
||||
|
||||
/* Build data list passed to logger */
|
||||
SEXP data = PROTECT(allocVector(VECSXP, 6));
|
||||
SET_VECTOR_ELT(data, 0, r_L);
|
||||
SET_VECTOR_ELT(data, 1, r_V);
|
||||
SET_VECTOR_ELT(data, 2, r_G);
|
||||
SET_VECTOR_ELT(data, 3, PROTECT(ScalarReal(loss)));
|
||||
SET_VECTOR_ELT(data, 4, PROTECT(ScalarReal(err < 0.0 ? NA_REAL : err)));
|
||||
SET_VECTOR_ELT(data, 5, PROTECT(ScalarReal(tau)));
|
||||
SEXP names = PROTECT(allocVector(STRSXP, 6));
|
||||
SET_STRING_ELT(names, 0, mkChar("L"));
|
||||
SET_STRING_ELT(names, 1, mkChar("V"));
|
||||
SET_STRING_ELT(names, 2, mkChar("G"));
|
||||
SET_STRING_ELT(names, 3, mkChar("loss"));
|
||||
SET_STRING_ELT(names, 4, mkChar("err"));
|
||||
SET_STRING_ELT(names, 5, mkChar("tau"));
|
||||
setAttrib(data, R_NamesSymbol, names);
|
||||
|
||||
/* Create logger function call as R language expression. */
|
||||
SEXP R_exp = PROTECT(lang6(logger, R_epoch, R_attempt,
|
||||
R_L, R_V, R_tau));
|
||||
SEXP loggerCall = PROTECT(lang4(logger, r_attempt, r_iter, data));
|
||||
|
||||
/* Evaluate the logger function call expression. */
|
||||
eval(R_exp, env);
|
||||
eval(loggerCall, env);
|
||||
|
||||
/* Unprotext created R objects. */
|
||||
UNPROTECT(6);
|
||||
/* Unprotect created R objects. */
|
||||
UNPROTECT(11);
|
||||
}
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
#include <R_ext/Utils.h> // for R_CheckUserInterrupt
|
||||
|
||||
#include "cve.h"
|
||||
|
||||
// TODO: clarify
|
||||
|
@ -18,7 +20,7 @@ void cve_sub(const int n, const int p, const int q,
|
|||
int attempt = 0, iter, i, nn = (n * (n - 1)) / 2;
|
||||
double loss, loss_last, loss_best, err, tau;
|
||||
double tol = tol_init * sqrt((double)(2 * q));
|
||||
double gKscale = -0.5 / h;
|
||||
double gKscale = -0.5 / (h * h);
|
||||
double agility = -2.0 * (1.0 - momentum) / (h * h);
|
||||
double c = agility / (double)n;
|
||||
|
||||
|
@ -84,12 +86,6 @@ void cve_sub(const int n, const int p, const int q,
|
|||
* the working memory (only intermidiate result, needed for `vecS`). */
|
||||
loss_last = cost(method, n, Y, vecK, colSums, y1, L);
|
||||
|
||||
if (logger) {
|
||||
callLogger(logger, loggerEnv,
|
||||
attempt, 0,
|
||||
L, n, V, p, q, tau);
|
||||
}
|
||||
|
||||
/* Calc the scaling vector used for final computation of grad. */
|
||||
scaling(method, n, Y, y1, L, vecD, vecK, colSums, vecS);
|
||||
|
||||
|
@ -109,12 +105,27 @@ void cve_sub(const int n, const int p, const int q,
|
|||
}
|
||||
scale(c, G, p * q); // in-place
|
||||
|
||||
if (logger) {
|
||||
callLogger(logger, loggerEnv,
|
||||
attempt, /* iter <- 0L */ -1,
|
||||
L, n,
|
||||
V, p, q,
|
||||
G, p, q,
|
||||
loss_last, /* err <- NA */ -1.0, tau);
|
||||
}
|
||||
|
||||
/* Compute Skew-Symmetric matrix `A` used in Cayley transform.
|
||||
* `A <- tau * (G V^T - V G^T) + 0 * A`*/
|
||||
skew(p, q, tau, G, V, 0.0, A);
|
||||
|
||||
for (iter = 0; iter < maxIter; ++iter) {
|
||||
/* Move V allong A */
|
||||
/* Before Starting next iteration check if the Uer has requested an
|
||||
* interupt (aka. ^C, or "Stop" button).
|
||||
* If interrupted the algorithm will be exited here and everything
|
||||
* will be discharted! */
|
||||
R_CheckUserInterrupt();
|
||||
|
||||
/* Move `V` along the gradient direction. */
|
||||
cayleyTransform(p, q, A, V, V_tau, workMem);
|
||||
|
||||
/* Create projection matrix for `V_tau`. */
|
||||
|
@ -145,21 +156,24 @@ void cve_sub(const int n, const int p, const int q,
|
|||
continue;
|
||||
}
|
||||
|
||||
// Compute error, use workMem (keep first `n`, they store `y1`).
|
||||
/* Compute error, use workMem (keep first `n`, they store `y1`). */
|
||||
skew(p, q, 1.0, V, V_tau, 0.0, workMem);
|
||||
err = norm(workMem, p, p, "F");
|
||||
|
||||
// Shift next step to current step and store loss to last.
|
||||
/* Shift next step to current step and store loss to last. */
|
||||
memcpy(V, V_tau, p * q * sizeof(double));
|
||||
loss_last = loss;
|
||||
|
||||
if (logger) {
|
||||
callLogger(logger, loggerEnv,
|
||||
attempt, iter + 1,
|
||||
L, n, V, p, q, tau);
|
||||
attempt, iter,
|
||||
L, n,
|
||||
V, p, q,
|
||||
G, p, q,
|
||||
loss, err, tau);
|
||||
}
|
||||
|
||||
// Check Break condition.
|
||||
/* Check Break condition. */
|
||||
if (err < tol || iter + 1 >= maxIter) {
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -39,7 +39,8 @@ void callLogger(SEXP logger, SEXP env,
|
|||
const int attempt, const int epoch,
|
||||
const double* L, const int lenL,
|
||||
const double* V, const int nrowV, const int ncolV,
|
||||
const double tau);
|
||||
const double* G, const int nrowG, const int ncolG,
|
||||
const double loss, const double err, const double tau);
|
||||
|
||||
/* CVE sub-routines */
|
||||
int getWorkLen(const int n, const int p, const int q);
|
||||
|
|
10
README.md
10
README.md
|
@ -11,6 +11,10 @@ Doc:
|
|||
- [ ] Data set descriptions and augmentations.
|
||||
- [x] Demonstration of the `Logger` function usage (Demo file or so, ...)
|
||||
- [ ] Update Paper (to new version / version consistent with current code!)
|
||||
- [ ] Reference Paper in DESCRIPTION file (in Description or specific tag)
|
||||
- [ ] Split `cve` and `cve.call` docs.
|
||||
- [ ] "Copy" form `dr` package (specifically `dr.directions` -> description)
|
||||
- [ ] Document `C` code.
|
||||
|
||||
Methods to be implemented:
|
||||
- [x] simple
|
||||
|
@ -33,12 +37,18 @@ Features (functions):
|
|||
- [x] `predict.dim.cve` Cross-validation or `aov` (in stats package) or "elbow" estimation
|
||||
- [x] `plot.elbow`
|
||||
- [x] `summary`
|
||||
- [ ] Consider `cor.test` for dimension selection
|
||||
- [x] Check for user interrupt (`R_CheckUserInterrupt`)
|
||||
|
||||
Changes:
|
||||
- [x] New `estimate.bandwidth` implementation.
|
||||
(h = 2 * (tr(\Sigma) / p) * (6/5 * n^(-1 / (4 + k)))^2,
|
||||
\Sigma = 1/n * (X-mean)'(X-mean))
|
||||
|
||||
Errors:
|
||||
- [x] `CVE_C` compare to `CVE_legacy`.
|
||||
- [x] fix: `predict.dim` not found.
|
||||
|
||||
# Development
|
||||
## Build and install.
|
||||
To build the package the `devtools` package is used. This also provides `roxygen2` which is used for documentation and automatic creation of the `NAMESPACE` file.
|
||||
|
|
|
@ -1,43 +0,0 @@
|
|||
## Installing CVE (C implementation)
|
||||
(setwd('~/Projects/CVE/CVE_C'))
|
||||
# equiv to Rcpp::compileAttributes().
|
||||
library(devtools)
|
||||
pkgbuild::compile_dll()
|
||||
document() # See bug: https://github.com/stan-dev/rstantools/issues/52
|
||||
pkgbuild::clean_dll()
|
||||
(path <- build(vignettes = FALSE))
|
||||
install.packages(path, repos = NULL, type = "source")
|
||||
library(CVE)
|
||||
|
||||
## Installing CVEpureR
|
||||
(setwd('~/Projects/CVE/CVE_R'))
|
||||
library(devtools)
|
||||
document() # See bug: https://github.com/stan-dev/rstantools/issues/52
|
||||
(path <- build(vignettes = FALSE))
|
||||
install.packages(path, repos = NULL, type = "source")
|
||||
library(CVEpureR)
|
||||
|
||||
|
||||
ds <- dataset("M1")
|
||||
gc()
|
||||
path <- '~/Projects/CVE/tmp/R.prof'
|
||||
Rprof(path, append = F, line.profiling = T)
|
||||
cve.res <- cve.call(ds$X, ds$Y, k = ncol(ds$B)) # , method = "linesearch"
|
||||
Rprof(NULL)
|
||||
(prof <- summaryRprof(path)) # , lines = "both"))
|
||||
cve.res[[ncol(ds$B)]]$loss
|
||||
|
||||
X <- ds$X
|
||||
Y <- ds$Y
|
||||
k <- ncol(ds$B)
|
||||
system.time(
|
||||
cve.res <- cve(Y ~ X, k = k)
|
||||
)
|
||||
|
||||
system.time(
|
||||
cve.res <- cve(Y ~ X, k = k, method = "sgd", tau = 0.01, batch.size = 32L)
|
||||
)
|
||||
|
||||
system.time(
|
||||
cve.res <- cve(Y ~ X, k = k, method = "linesearch")
|
||||
)
|
|
@ -95,7 +95,7 @@ for (sim in 1:SIM.NR) {
|
|||
attempts = ATTEMPTS
|
||||
)
|
||||
)
|
||||
dr$B <- basis(dr, truedim)
|
||||
dr$B <- coef(dr, truedim)
|
||||
}
|
||||
|
||||
key <- paste0(name, '-', method)
|
||||
|
|
22
test.R
22
test.R
|
@ -13,28 +13,20 @@ if (length(args) > 1L) {
|
|||
max.iter <- 50L
|
||||
attempts <- 25L
|
||||
|
||||
# library(CVEpureR)
|
||||
# path <- paste0('~/Projects/CVE/tmp/logger_', method, '.R.pdf')
|
||||
library(CVE)
|
||||
path <- paste0('~/Projects/CVE/tmp/logger_', method, '.C.pdf')
|
||||
path <- paste0('~/Projects/CVE/tmp/logger_', method, '_', momentum, '.C.pdf')
|
||||
|
||||
# Define logger for `cve()` method.
|
||||
logger <- function(epoch, attempt, L, V, tau) {
|
||||
logger <- function(iter, attempt, data) {
|
||||
# Note the `<<-` assignement!
|
||||
loss.history[epoch + 1, attempt] <<- mean(L)
|
||||
if (epoch == 0) {
|
||||
error <- NA
|
||||
} else {
|
||||
error <- norm(V %*% t(V) - V_last %*% t(V_last), type = 'F')
|
||||
}
|
||||
V_last <<- V
|
||||
error.history[epoch + 1, attempt] <<- error
|
||||
tau.history[epoch + 1, attempt] <<- tau
|
||||
loss.history[iter + 1, attempt] <<- data$loss
|
||||
error.history[iter + 1, attempt] <<- if (data$err > 0) data$err else NA
|
||||
tau.history[iter + 1, attempt] <<- data$tau
|
||||
# Compute true error by comparing to the true `B`
|
||||
B.est <- null(V) # Function provided by CVE
|
||||
B.est <- null(data$V) # Function provided by CVE
|
||||
P.est <- B.est %*% solve(t(B.est) %*% B.est) %*% t(B.est)
|
||||
true.error <- norm(P - P.est, 'F') / sqrt(2 * k)
|
||||
true.error.history[epoch + 1, attempt] <<- true.error
|
||||
true.error.history[iter + 1, attempt] <<- true.error
|
||||
}
|
||||
|
||||
pdf(path)
|
||||
|
|
Loading…
Reference in New Issue