diff --git a/CVE/NAMESPACE b/CVE/NAMESPACE index 96e9b07..0644f17 100644 --- a/CVE/NAMESPACE +++ b/CVE/NAMESPACE @@ -1,10 +1,18 @@ # Generated by roxygen2: do not edit by hand +S3method(plot,cve) export(cve) -export(cve_cpp) +export(cve.call) export(dataset) export(estimateBandwidth) export(rStiefel) import(Rcpp) +import(stats) importFrom(Rcpp,evalCpp) +importFrom(graphics,lines) +importFrom(graphics,plot) +importFrom(graphics,points) +importFrom(stats,model.frame) +importFrom(stats,rbinom) +importFrom(stats,rnorm) useDynLib(CVE) diff --git a/CVE/R/CVE.R b/CVE/R/CVE.R index c5bd402..0132e42 100644 --- a/CVE/R/CVE.R +++ b/CVE/R/CVE.R @@ -1,44 +1,145 @@ -#' Conditional Variance Estimator +#' Implementation of the CVE method. #' #' Conditional Variance Estimator (CVE) is a novel sufficient dimension -#' reduction (SDR) method for regressions satisfying E(Y|X) = E(Y|B'X), -#' where B'X is a lower dimensional projection of the predictors. +#' reduction (SDR) method assuming a model +#' \deqn{Y \sim g(B'X) + \epsilon}{Y ~ g(B'X) + epsilon} +#' where B'X is a lower dimensional projection of the predictors. #' -#' @param X A matrix of type numeric of dimensions N times dim where N is the number -#' of entries with dim as data dimension. -#' @param Y A vector of type numeric of length N (coresponds to \code{x}). -#' @param k Guess for rank(B). -#' @param nObs As describet in the paper. -#' -#' @param tol Tolerance for optimization stopping creteria. +#' @param formula Formel for the regression model defining `X`, `Y`. +#' See: \code{\link{formula}}. +#' @param data data.frame holding data for formula. +#' @param method The different only differe in the used optimization. +#' All of them are Gradient based optimization on a Stiefel manifold. +#' \itemize{ +#' \item "simple" Simple reduction of stepsize. +#' \item "linesearch" determines stepsize with backtracking linesearch +#' using Armijo-Wolf conditions. +#' \item TODO: further +#' } +#' @param ... Further parameters depending on the used method. +#' TODO: See ... +#' @examples +#' library(CVE) +#' ds <- dataset("M5") +#' X <- ds$X +#' Y <- ds$Y +#' dr <- cve(Y ~ X, k = 1) +#' +#' @references Fertl L, Bura E. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019 +#' +#' @import stats +#' @importFrom stats model.frame #' @export +cve <- function(formula, data, method = "simple", ...) { + # check for type of `data` if supplied and set default + if (missing(data)) { + data <- environment(formula) + } else if (!is.data.frame(data)) { + stop('Parameter `data` must be a `data.frame` or missing.') + } + + # extract `X`, `Y` from `formula` with `data` + model <- stats::model.frame(formula, data) + X <- as.matrix(model[,-1, drop=FALSE]) + Y <- as.matrix(model[, 1, drop=FALSE]) + + # pass extracted data on to [cve.call()] + dr <- cve.call(X, Y, method = method, ...) + + # overwrite `call` property from [cve.call()] + dr$call <- match.call() + return(dr) +} + +#' @rdname cve +#' @export +cve.call <- function(X, Y, method = "simple", nObs = nrow(X)^.5, k, ...) { + + # TODO: replace default value of `k` by `max.dim` when fast enough + if (missing(k)) { + stop("TODO: parameter `k` (rank(B)) is required, replace by `max.dim`.") + } + + # parameter checking + if (!(is.matrix(X) && is.matrix(Y))) { + stop('X and Y should be matrices.') + } + if (nrow(X) != nrow(Y)) { + stop('Rows of X and Y are not compatible.') + } + if (ncol(X) < 2) { + stop('X is one dimensional, no need for dimension reduction.') + } + if (ncol(Y) > 1) { + stop('Only one dimensional responces Y are supported.') + } + + # call C++ CVE implementation + # dr ... Dimension Reduction + dr <- cve_cpp(X, Y, tolower(method), k = k, nObs = nObs, ...) + + # augment result information + dr$method <- method + dr$call <- match.call() + class(dr) <- "cve" + return(dr) +} + +# TODO: write summary +# summary.cve <- function() { +# # code # +# } + +#' Ploting helper for objects of class \code{cve}. #' -#' @seealso TODO: \code{vignette("CVE_paper", package="CVE")}. +#' @param x Object of class \code{cve} (result of [cve()]). +#' @param content Specifies what to plot: +#' \itemize{ +#' \item "history" Plots the loss history from stiefel optimization +#' (default). +#' \item ... TODO: add (if there are any) +#' } +#' @param ... Pass through parameters to [plot()] and [lines()] #' -#' @references Fertl Likas, Bura Efstathia. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019 -cve <- function(X, Y, k, - nObs = sqrt(nrow(X)), - tauInitial = 1.0, - tol = 1e-3, - slack = -1e-10, - maxIter = 50L, - attempts = 10L -) { - # check data parameter types - stopifnot( - is.matrix(X), - is.vector(Y), - typeof(X) == 'double', - typeof(Y) == 'double' +#' @seealso see \code{\link{par}} for graphical parameters to pass through. +#' @importFrom graphics plot lines points +#' @method plot cve +#' @export +plot.cve <- function(x, ...) { + + H <- x$history + H_1 <- H[H[, 1] > 0, 1] + + defaults <- list( + main = "History", + xlab = "Iterations i", + ylab = expression(loss == L[n](V^{(i)})), + xlim = c(1, nrow(H)), + ylim = c(0, max(H)), + type = "l" ) - # call CVE C++ implementation - return(cve_cpp(X, Y, k, - nObs, - tauInitial, - tol, - slack, - maxIter, - attempts - )) + call.plot <- match.call() + keys <- names(defaults) + keys <- keys[match(keys, names(call.plot)[-1], nomatch = 0) == 0] + + for (key in keys) { + call.plot[[key]] <- defaults[[key]] + } + + call.plot[[1L]] <- quote(plot) + call.plot$x <- quote(1:length(H_1)) + call.plot$y <- quote(H_1) + + eval(call.plot) + + if (ncol(H) > 1) { + for (i in 2:ncol(H)) { + H_i <- H[H[, i] > 0, i] + lines(1:length(H_i), H_i) + } + } + x.ends <- apply(H, 2, function(h) { length(h[h > 0]) }) + y.ends <- apply(H, 2, function(h) { min(h[h > 0]) }) + points(x.ends, y.ends) } diff --git a/CVE/R/RcppExports.R b/CVE/R/RcppExports.R index 870ed91..7efc872 100644 --- a/CVE/R/RcppExports.R +++ b/CVE/R/RcppExports.R @@ -1,6 +1,18 @@ # Generated by using Rcpp::compileAttributes() -> do not edit by hand # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 +#' Gradient computation of the loss `L_n(V)`. +#' +#' The loss is defined as +#' \deqn{L_n(V) := \frac{1}{n}\sum_{j=1}^n y_2(V, X_j) - y_1(V, X_j)^2}{L_n(V) := 1/n sum_j( (y_2(V, X_j) - y_1(V, X_j)^2 )} +#' with +#' \deqn{y_l(s_0) := \sum_{i=1}^n w_i(V, s_0)Y_i^l}{y_l(s_0) := sum_i(w_i(V, s_0) Y_i^l)} +#' +#' @rdname optStiefel +#' @keywords internal +#' @name gradient +NULL + #' Stiefel Optimization. #' #' Stiefel Optimization for \code{V} given a dataset \code{X} and responces @@ -24,8 +36,13 @@ #' orthogonal space spaned by \code{V}. #' #' @rdname optStiefel -#' @name optStiefel #' @keywords internal +#' @name optStiefel_simple +NULL + +#' @rdname optStiefel +#' @keywords internal +#' @name optStiefel_linesearch NULL #' Estimated bandwidth for CVE. @@ -87,9 +104,8 @@ rStiefel <- function(p, q) { #' and the matrix \code{B} estimated for the model as a orthogonal basis of the #' orthogonal space spaned by \code{V}. #' -#' @rdname cve_cpp_V1 -#' @export -cve_cpp <- function(X, Y, k, nObs, tauInitial = 1., tol = 1e-5, slack = -1e-10, maxIter = 50L, attempts = 10L) { - .Call('_CVE_cve_cpp', PACKAGE = 'CVE', X, Y, k, nObs, tauInitial, tol, slack, maxIter, attempts) +#' @keywords internal +cve_cpp <- function(X, Y, method, k, nObs, tauInitial = 1., rho1 = 0.1, rho2 = 0.9, tol = 1e-5, maxIter = 50L, maxLineSearchIter = 10L, attempts = 10L) { + .Call('_CVE_cve_cpp', PACKAGE = 'CVE', X, Y, method, k, nObs, tauInitial, rho1, rho2, tol, maxIter, maxLineSearchIter, attempts) } diff --git a/CVE/R/datasets.R b/CVE/R/datasets.R index 237ae8f..68af0f4 100644 --- a/CVE/R/datasets.R +++ b/CVE/R/datasets.R @@ -12,10 +12,12 @@ #' @param lambda Only for \code{"M4"}, see: below. #' #' @return List with elements -#' \item{X}{data} -#' \item{Y}{response} -#' \item{B}{Used dim-reduction matrix} -#' \item{name}{Name of the dataset (name parameter)} +#' \itemize{ +#' \item{X}{data} +#' \item{Y}{response} +#' \item{B}{Used dim-reduction matrix} +#' \item{name}{Name of the dataset (name parameter)} +#' } #' #' @section M1: #' The data follows \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} for a subspace @@ -33,9 +35,9 @@ #' @section M5: #' TODO: #' +#' @import stats +#' @importFrom stats rnorm rbinom #' @export -#' -#' @references Fertl Likas, Bura Efstathia. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019 dataset <- function(name = "M1", n, B, p.mix = 0.3, lambda = 1.0) { # validate parameters stopifnot(name %in% c("M1", "M2", "M3", "M4", "M5")) diff --git a/CVE/R/package.R b/CVE/R/package.R index 5ec658f..5db98b1 100644 --- a/CVE/R/package.R +++ b/CVE/R/package.R @@ -5,6 +5,9 @@ #' #' TODO: And some details #' +#' +#' @references Fertl Likas, Bura Efstathia. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019 +#' #' @docType package #' @author Loki #' @import Rcpp diff --git a/CVE/man/CVE-package.Rd b/CVE/man/CVE-package.Rd index aed7898..96c7dc3 100644 --- a/CVE/man/CVE-package.Rd +++ b/CVE/man/CVE-package.Rd @@ -12,6 +12,9 @@ Conditional Variance Estimator for Sufficient Dimension \details{ TODO: And some details } +\references{ +Fertl Likas, Bura Efstathia. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019 +} \author{ Loki } diff --git a/CVE/man/cve.Rd b/CVE/man/cve.Rd index 0874071..3d391d7 100644 --- a/CVE/man/cve.Rd +++ b/CVE/man/cve.Rd @@ -2,31 +2,45 @@ % Please edit documentation in R/CVE.R \name{cve} \alias{cve} -\title{Conditional Variance Estimator} +\alias{cve.call} +\title{Implementation of the CVE method.} \usage{ -cve(X, Y, k, nObs = sqrt(nrow(X)), tauInitial = 1, tol = 0.001, - slack = -1e-10, maxIter = 50L, attempts = 10L) +cve(formula, data, method = "simple", ...) + +cve.call(X, Y, method = "simple", nObs = nrow(X)^0.5, k, ...) } \arguments{ -\item{X}{A matrix of type numeric of dimensions N times dim where N is the number -of entries with dim as data dimension.} +\item{formula}{Formel for the regression model defining `X`, `Y`. +See: \code{\link{formula}}.} -\item{Y}{A vector of type numeric of length N (coresponds to \code{x}).} +\item{data}{data.frame holding data for formula.} -\item{k}{Guess for rank(B).} +\item{method}{The different only differe in the used optimization. + All of them are Gradient based optimization on a Stiefel manifold. +\itemize{ + \item "simple" Simple reduction of stepsize. + \item "linesearch" determines stepsize with backtracking linesearch + using Armijo-Wolf conditions. + \item TODO: further +}} -\item{nObs}{As describet in the paper.} - -\item{tol}{Tolerance for optimization stopping creteria.} +\item{...}{Further parameters depending on the used method. +TODO: See ...} } \description{ Conditional Variance Estimator (CVE) is a novel sufficient dimension - reduction (SDR) method for regressions satisfying E(Y|X) = E(Y|B'X), - where B'X is a lower dimensional projection of the predictors. +reduction (SDR) method assuming a model +\deqn{Y \sim g(B'X) + \epsilon}{Y ~ g(B'X) + epsilon} +where B'X is a lower dimensional projection of the predictors. +} +\examples{ +library(CVE) +ds <- dataset("M5") +X <- ds$X +Y <- ds$Y +dr <- cve(Y ~ X, k = 1) + } \references{ -Fertl Likas, Bura Efstathia. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019 -} -\seealso{ -TODO: \code{vignette("CVE_paper", package="CVE")}. +Fertl L, Bura E. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019 } diff --git a/CVE/man/cve_cpp_V1.Rd b/CVE/man/cve_cpp.Rd similarity index 86% rename from CVE/man/cve_cpp_V1.Rd rename to CVE/man/cve_cpp.Rd index 98fa864..5b9e746 100644 --- a/CVE/man/cve_cpp_V1.Rd +++ b/CVE/man/cve_cpp.Rd @@ -4,8 +4,9 @@ \alias{cve_cpp} \title{Conditional Variance Estimation (CVE) method.} \usage{ -cve_cpp(X, Y, k, nObs, tauInitial = 1, tol = 1e-05, slack = -1e-10, - maxIter = 50L, attempts = 10L) +cve_cpp(X, Y, method, k, nObs, tauInitial = 1, rho1 = 0.1, + rho2 = 0.9, tol = 1e-05, maxIter = 50L, maxLineSearchIter = 10L, + attempts = 10L) } \arguments{ \item{X}{data points} @@ -19,13 +20,13 @@ cve_cpp(X, Y, k, nObs, tauInitial = 1, tol = 1e-05, slack = -1e-10, \item{tol}{Tolerance for update error used for stopping criterion (default 1e-5)} -\item{slack}{Ratio of small negative error allowed in loss optimization (default -1e-10)} - \item{maxIter}{Upper bound of optimization iterations (default 50)} \item{attempts}{Number of tryes with new random optimization starting points (default 10)} \item{tau}{Initial step size (default 1)} + +\item{slack}{Ratio of small negative error allowed in loss optimization (default -1e-10)} } \value{ List containing the bandwidth \code{h}, optimization objective \code{V} @@ -35,3 +36,4 @@ List containing the bandwidth \code{h}, optimization objective \code{V} \description{ This version uses a "simple" stiefel optimization schema. } +\keyword{internal} diff --git a/CVE/man/dataset.Rd b/CVE/man/dataset.Rd index 1cce988..da7be5b 100644 --- a/CVE/man/dataset.Rd +++ b/CVE/man/dataset.Rd @@ -19,10 +19,12 @@ dataset(name = "M1", n, B, p.mix = 0.3, lambda = 1) } \value{ List with elements -\item{X}{data} -\item{Y}{response} -\item{B}{Used dim-reduction matrix} -\item{name}{Name of the dataset (name parameter)} +\itemize{ + \item{X}{data} + \item{Y}{response} + \item{B}{Used dim-reduction matrix} + \item{name}{Name of the dataset (name parameter)} +} } \description{ Provides sample datasets. There are 5 different datasets named @@ -60,6 +62,3 @@ TODO: TODO: } -\references{ -Fertl Likas, Bura Efstathia. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019 -} diff --git a/CVE/man/optStiefel.Rd b/CVE/man/optStiefel.Rd index 79af707..d955e00 100644 --- a/CVE/man/optStiefel.Rd +++ b/CVE/man/optStiefel.Rd @@ -1,8 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/RcppExports.R -\name{optStiefel} -\alias{optStiefel} -\title{Stiefel Optimization.} +\name{gradient} +\alias{gradient} +\alias{optStiefel_simple} +\alias{optStiefel_linesearch} +\title{Gradient computation of the loss `L_n(V)`.} \arguments{ \item{X}{data points} @@ -27,6 +29,11 @@ List containing the bandwidth \code{h}, optimization objective \code{V} orthogonal space spaned by \code{V}. } \description{ +The loss is defined as +\deqn{L_n(V) := \frac{1}{n}\sum_{j=1}^n y_2(V, X_j) - y_1(V, X_j)^2}{L_n(V) := 1/n sum_j( (y_2(V, X_j) - y_1(V, X_j)^2 )} +with +\deqn{y_l(s_0) := \sum_{i=1}^n w_i(V, s_0)Y_i^l}{y_l(s_0) := sum_i(w_i(V, s_0) Y_i^l)} + Stiefel Optimization for \code{V} given a dataset \code{X} and responces \code{Y} for the model \deqn{Y\sim g(B'X) + \epsilon}{Y ~ g(B'X) + epsilon} to compute the matrix `B` such that \eqn{span{B} = span(V)^{\bot}}{% diff --git a/CVE/man/plot.cve.Rd b/CVE/man/plot.cve.Rd new file mode 100644 index 0000000..3262013 --- /dev/null +++ b/CVE/man/plot.cve.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CVE.R +\name{plot.cve} +\alias{plot.cve} +\title{Ploting helper for objects of class \code{"cve"}.} +\usage{ +\method{plot}{cve}(x, ...) +} +\arguments{ +\item{x}{Object of class \code{"cve"} (result of [cve()]).} + +\item{...}{Pass through parameters to [plot()] and [lines()]} + +\item{content}{Specifies what to plot: +\itemize{ + \item "history" Plots the loss history from stiefel optimization. + \item ... TODO: add (if there are any) +}} +} +\description{ +Ploting helper for objects of class \code{"cve"}. +} +\seealso{ +see \code{\link{par}} for graphical parameters to pass through. +} diff --git a/CVE/src/CVE.cpp b/CVE/src/CVE.cpp index ab48a2e..8381110 100644 --- a/CVE/src/CVE.cpp +++ b/CVE/src/CVE.cpp @@ -1,8 +1,3 @@ -// -// Usage: -// ~$ R -e "library(Rcpp); Rcpp::sourceCpp('cve_V1.cpp')" -// - // only `RcppArmadillo.h` which includes `Rcpp.h` #include @@ -14,6 +9,9 @@ // required for `R::qchisq()` used in `estimateBandwidth()` #include +// for proper error handling +#include + //' Estimated bandwidth for CVE. //' //' Estimates a propper bandwidth \code{h} according @@ -72,18 +70,27 @@ arma::mat rStiefel(arma::uword p, arma::uword q) { return Q; } +//' Gradient computation of the loss `L_n(V)`. +//' +//' The loss is defined as +//' \deqn{L_n(V) := \frac{1}{n}\sum_{j=1}^n y_2(V, X_j) - y_1(V, X_j)^2}{L_n(V) := 1/n sum_j( (y_2(V, X_j) - y_1(V, X_j)^2 )} +//' with +//' \deqn{y_l(s_0) := \sum_{i=1}^n w_i(V, s_0)Y_i^l}{y_l(s_0) := sum_i(w_i(V, s_0) Y_i^l)} +//' +//' @rdname optStiefel +//' @keywords internal +//' @name gradient double gradient(const arma::mat& X, const arma::mat& X_diff, const arma::mat& Y, const arma::mat& Y_rep, const arma::mat& V, const double h, - arma::mat* G = 0 + arma::mat* G = 0 // out ) { using namespace arma; uword n = X.n_rows; - uword p = X.n_cols; // orthogonal projection matrix `Q = I - VV'` for dist computation mat Q = -(V * V.t()); @@ -138,18 +145,17 @@ double gradient(const arma::mat& X, //' orthogonal space spaned by \code{V}. //' //' @rdname optStiefel -//' @name optStiefel //' @keywords internal -double optStiefel( +//' @name optStiefel_simple +double optStiefel_simple( const arma::mat& X, const arma::vec& Y, const int k, const double h, const double tauInitial, const double tol, - const double slack, const int maxIter, - arma::mat& V, // out + arma::mat& V, // out arma::vec& history // out ) { using namespace arma; @@ -177,9 +183,9 @@ double optStiefel( double loss; double error = datum::inf; double tau = tauInitial; - int count; + int iter; // main optimization loop - for (count = 0; count < maxIter && error > tol; ++count) { + for (iter = 0; iter < maxIter && error > tol; ++iter) { // calc gradient `G = grad_V(L)(V)` mat G; loss = gradient(X, X_diff, Y, Y_rep, V, h, &G); @@ -190,11 +196,11 @@ double optStiefel( // loss after step `L(V(tau))` double loss_tau = gradient(X, X_diff, Y, Y_rep, V_tau, h); - // store `loss` in `history` and increase `count` - history(count) = loss; + // store `loss` in `history` and increase `iter` + history(iter) = loss; // validate if loss decreased - if ((loss_tau - loss) > slack * loss) { + if ((loss_tau - loss) > 0.0) { // ignore step, retry with half the step size tau = tau / 2.; error = datum::inf; @@ -208,7 +214,121 @@ double optStiefel( } // store final `loss` - history(count) = loss; + history(iter) = loss; + + return loss; +} + +//' @rdname optStiefel +//' @keywords internal +//' @name optStiefel_linesearch +double optStiefel_linesearch( + const arma::mat& X, + const arma::vec& Y, + const int k, + const double h, + const double tauInitial, + const double tol, + const int maxIter, + const double rho1, + const double rho2, + const int maxLineSearchIter, + arma::mat& V, // out + arma::vec& history // out +) { + using namespace arma; + + // get dimensions + const uword n = X.n_rows; // nr samples + const uword p = X.n_cols; // dim of random variable `X` + const uword q = p - k; // rank(V) e.g. dim of ortho space of span{B} + + // all `X_i - X_j` differences, `X_diff.row(i * n + j) = X_i - X_j` + mat X_diff(n * n, p); + for (uword i = 0, k = 0; i < n; ++i) { + for (uword j = 0; j < n; ++j) { + X_diff.row(k++) = X.row(i) - X.row(j); + } + } + const vec Y_rep = repmat(Y, n, 1); + const mat I_p = eye(p, p); + const mat I_2q = eye(2 * q, 2 * q); + + // initial start value for `V` + V = rStiefel(p, q); + + // first gradient initialization + mat G; + double loss = gradient(X, X_diff, Y, Y_rep, V, h, &G); + + // set first `loss` in history + history(0) = loss; + + // main curvilinear optimization loop + double error = datum::inf; + int iter = 0; + while (iter++ < maxIter && error > tol) { + // helper matrices `lU` (linesearch U), `lV` (linesearch V) + // as describet in [Wen, Yin] Lemma 4. + mat lU = join_rows(G, V); // linesearch "U" + mat lV = join_rows(V, -1.0 * G); // linesearch "V" + // `A = G V' - V G'` + mat A = lU * lV.t(); + + // set initial step size for curvilinear line search + double tau = tauInitial, lower = 0., upper = datum::inf; + + // TODO: check if `tau` is valid for inverting + + // set line search internal gradient and loss to cycle for next iteration + mat V_tau; // next position after a step of size `tau`, a.k.a. `V(tau)` + mat G_tau; // gradient of `V` at `V(tau) = V_tau` + double loss_tau; // loss (objective) at position `V(tau)` + int lsIter = 0; // linesearch iter + // start line search + do { + mat HV = inv(I_2q + (tau/2.) * lV.t() * lU) * lV.t(); + // next step `V` + V_tau = V - tau * (lU * (HV * V)); + + double LprimeV = -trace(G.t() * A * V); + + mat lB = I_p - (tau / 2.) * lU * HV; + + loss_tau = gradient(X, X_diff, Y, Y_rep, V_tau, h, &G_tau); + + double LprimeV_tau = -2. * trace(G_tau.t() * lB * A * (V + V_tau)); + + // Armijo condition + if (loss_tau > loss + (rho1 * tau * LprimeV)) { + upper = tau; + tau = (lower + upper) / 2.; + // Wolfe condition + } else if (LprimeV_tau < rho2 * LprimeV) { + lower = tau; + if (upper == datum::inf) { + tau = 2. * lower; + } else { + tau = (lower + upper) / 2.; + } + } else { + break; + } + } while (++lsIter < maxLineSearchIter); + + // compute error (break condition) + // Note: `error` is the decrease of the objective `L_n(V)` and not the + // norm of the gradient as proposed in [Wen, Yin] Algorithm 1. + error = loss - loss_tau; + + // cycle `V`, `G` and `loss` for next iteration + V = V_tau; + loss = loss_tau; + G = G_tau; + + // store final `loss` + history(iter) = loss; + } return loss; } @@ -232,18 +352,20 @@ double optStiefel( //' and the matrix \code{B} estimated for the model as a orthogonal basis of the //' orthogonal space spaned by \code{V}. //' -//' @rdname cve_cpp_V1 -//' @export +//' @keywords internal // [[Rcpp::export]] Rcpp::List cve_cpp( const arma::mat& X, const arma::vec& Y, + const std::string method, const int k, const double nObs, const double tauInitial = 1., + const double rho1 = 0.1, + const double rho2 = 0.9, const double tol = 1e-5, - const double slack = -1e-10, const int maxIter = 50, + const int maxLineSearchIter = 10, const int attempts = 10 ) { using namespace arma; @@ -263,12 +385,27 @@ Rcpp::List cve_cpp( // declare output variables mat V; // estimated `V` space vec hist = vec(history.n_rows, fill::zeros); // optimization history - double loss = optStiefel(X, Y, k, h, tauInitial, tol, slack, maxIter, V, hist); + double loss; + if (method == "simple") { + loss = optStiefel_simple( + X, Y, k, h, + tauInitial, tol, maxIter, + V, hist + ); + } else if (method == "linesearch") { + loss = optStiefel_linesearch( + X, Y, k, h, + tauInitial, tol, maxIter, rho1, rho2, maxLineSearchIter, + V, hist + ); + } else { + throw std::invalid_argument("Unknown method."); + } if (loss < loss_best) { loss_best = loss; V_best = V; } - // write history to history collection + // add history to history collection history.col(i) = hist; } diff --git a/CVE/src/RcppExports.cpp b/CVE/src/RcppExports.cpp index 590c0c1..9ce7a63 100644 --- a/CVE/src/RcppExports.cpp +++ b/CVE/src/RcppExports.cpp @@ -32,21 +32,24 @@ BEGIN_RCPP END_RCPP } // cve_cpp -Rcpp::List cve_cpp(const arma::mat& X, const arma::vec& Y, const int k, const double nObs, const double tauInitial, const double tol, const double slack, const int maxIter, const int attempts); -RcppExport SEXP _CVE_cve_cpp(SEXP XSEXP, SEXP YSEXP, SEXP kSEXP, SEXP nObsSEXP, SEXP tauInitialSEXP, SEXP tolSEXP, SEXP slackSEXP, SEXP maxIterSEXP, SEXP attemptsSEXP) { +Rcpp::List cve_cpp(const arma::mat& X, const arma::vec& Y, const std::string method, const int k, const double nObs, const double tauInitial, const double rho1, const double rho2, const double tol, const int maxIter, const int maxLineSearchIter, const int attempts); +RcppExport SEXP _CVE_cve_cpp(SEXP XSEXP, SEXP YSEXP, SEXP methodSEXP, SEXP kSEXP, SEXP nObsSEXP, SEXP tauInitialSEXP, SEXP rho1SEXP, SEXP rho2SEXP, SEXP tolSEXP, SEXP maxIterSEXP, SEXP maxLineSearchIterSEXP, SEXP attemptsSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const arma::mat& >::type X(XSEXP); Rcpp::traits::input_parameter< const arma::vec& >::type Y(YSEXP); + Rcpp::traits::input_parameter< const std::string >::type method(methodSEXP); Rcpp::traits::input_parameter< const int >::type k(kSEXP); Rcpp::traits::input_parameter< const double >::type nObs(nObsSEXP); Rcpp::traits::input_parameter< const double >::type tauInitial(tauInitialSEXP); + Rcpp::traits::input_parameter< const double >::type rho1(rho1SEXP); + Rcpp::traits::input_parameter< const double >::type rho2(rho2SEXP); Rcpp::traits::input_parameter< const double >::type tol(tolSEXP); - Rcpp::traits::input_parameter< const double >::type slack(slackSEXP); Rcpp::traits::input_parameter< const int >::type maxIter(maxIterSEXP); + Rcpp::traits::input_parameter< const int >::type maxLineSearchIter(maxLineSearchIterSEXP); Rcpp::traits::input_parameter< const int >::type attempts(attemptsSEXP); - rcpp_result_gen = Rcpp::wrap(cve_cpp(X, Y, k, nObs, tauInitial, tol, slack, maxIter, attempts)); + rcpp_result_gen = Rcpp::wrap(cve_cpp(X, Y, method, k, nObs, tauInitial, rho1, rho2, tol, maxIter, maxLineSearchIter, attempts)); return rcpp_result_gen; END_RCPP } @@ -54,7 +57,7 @@ END_RCPP static const R_CallMethodDef CallEntries[] = { {"_CVE_estimateBandwidth", (DL_FUNC) &_CVE_estimateBandwidth, 3}, {"_CVE_rStiefel", (DL_FUNC) &_CVE_rStiefel, 2}, - {"_CVE_cve_cpp", (DL_FUNC) &_CVE_cve_cpp, 9}, + {"_CVE_cve_cpp", (DL_FUNC) &_CVE_cve_cpp, 12}, {NULL, NULL, 0} }; diff --git a/README.md b/README.md index e69de29..1a0c581 100644 --- a/README.md +++ b/README.md @@ -0,0 +1,7 @@ + +# Overview +- **CVE/**: Contains actual `R` package. +- **CVE_legacy/**: Contains original (first) `R` implementatin of the CVE method. +The `*.R` and `*.cpp` files in the root directory are _development_ and _test_ files. + +## TODO: README.md diff --git a/cve_V1.cpp b/cve_V1.cpp index 314553a..88f2260 100644 --- a/cve_V1.cpp +++ b/cve_V1.cpp @@ -1,5 +1,5 @@ // -// Standalone implementation for development. +// Development file. // // Usage: // ~$ R -e "library(Rcpp); Rcpp::sourceCpp('cve_V1.cpp')" @@ -74,6 +74,15 @@ arma::mat rStiefel(arma::uword p, arma::uword q) { return Q; } +//' Gradient computation of the loss `L_n(V)`. +//' +//' The loss is defined as +//' \deqn{L_n(V) := \frac{1}{n}\sum_{j=1}^n (y_2(V, X_j) - y_1(V, X_j)^2)} +//' with +//' \deqn{y_l(s_0) := \sum_{i=1}^n w_i(V, s_0)Y_i^l}{y_l(s_0) := sum_i(w_i(V, s_0) Y_i^l)} +//' +//' @rdname optStiefel +//' @keywords internal double gradient(const arma::mat& X, const arma::mat& X_diff, const arma::mat& Y, @@ -178,9 +187,9 @@ double optStiefel( double loss; double error = datum::inf; double tau = tauInitial; - int count; + int iter; // main optimization loop - for (count = 0; count < maxIter && error > tol; ++count) { + for (iter = 0; iter < maxIter && error > tol; ++iter) { // calc gradient `G = grad_V(L)(V)` mat G; loss = gradient(X, X_diff, Y, Y_rep, V, h, &G); @@ -191,8 +200,8 @@ double optStiefel( // loss after step `L(V(tau))` double loss_tau = gradient(X, X_diff, Y, Y_rep, V_tau, h); - // store `loss` in `history` and increase `count` - history(count) = loss; + // store `loss` in `history` and increase `iter` + history(iter) = loss; // validate if loss decreased if ((loss_tau - loss) > slack * loss) { @@ -209,7 +218,7 @@ double optStiefel( } // store final `loss` - history(count) = loss; + history(iter) = loss; return loss; } @@ -233,7 +242,7 @@ double optStiefel( //' and the matrix \code{B} estimated for the model as a orthogonal basis of the //' orthogonal space spaned by \code{V}. //' -//' @rdname cve_cpp_V1 +//' @rdname cve_cpp //' @export // [[Rcpp::export]] Rcpp::List cve_cpp( diff --git a/cve_V2.cpp b/cve_V2.cpp index 6ce888f..f47095a 100644 --- a/cve_V2.cpp +++ b/cve_V2.cpp @@ -1,5 +1,5 @@ // -// Standalone implementation for development. +// Development file. // // Usage: // ~$ R -e "library(Rcpp); Rcpp::sourceCpp('cve_V2.cpp')" @@ -149,12 +149,12 @@ double optStiefel( const int k, const double h, const double tauInitial, - const double rho1, - const double rho2, const double tol, const int maxIter, + const double rho1, + const double rho2, const int maxLineSeachIter, - arma::mat& V, // out + arma::mat& V, // out arma::vec& history // out ) { using namespace arma; @@ -309,7 +309,7 @@ Rcpp::List cve_cpp( mat V; // estimated `V` space vec hist = vec(history.n_rows, fill::zeros); // optimization history double loss = optStiefel(X, Y, k, h, - tauInitial, rho1, rho2, tol, maxIter, maxLineSeachIter, V, hist + tauInitial, tol, maxIter, rho1, rho2, maxLineSeachIter, V, hist ); if (loss < loss_best) { loss_best = loss; diff --git a/runtime_tests_grad.cpp b/runtime_tests_grad.cpp index d13ea3d..a3571d0 100644 --- a/runtime_tests_grad.cpp +++ b/runtime_tests_grad.cpp @@ -252,27 +252,49 @@ arma::mat grad_p(const arma::mat& X_ref, loss /= n; // scaling for gradient summation - for (uword k = 0; k < n; ++k) { - for (uword l = 0; l < n; ++l) { + // this scaling matrix is the lower triangular matrix defined as + // + // S_kl := (s_{kl} + s_{lk}) D_{kl} + // s_ij := (L_n(V, X_j) - (Y_i - y1(V, X_j))^2) W_ij + double factor; + for (uword l = 0; l < n; ++l) { + for (uword k = l + 1; k < n; ++k) { tmp = Y[k] - y1[l]; - S[l * n + k] = (L[l] - (tmp * tmp)) * W[l * n + k] * D[l * n + k]; + factor = (L[l] - (tmp * tmp)) * W[l * n + k]; // \tile{S}_{kl} + tmp = Y[l] - y1[k]; + factor += (L[k] - (tmp * tmp)) * W[k * n + l]; // \tile{S}_{lk} + S[l * n + k] = factor * D[l * n + k]; // (s_kl + s_lk) * D_kl } } // gradient - double factor = -2. / (n * h * h); - for (uword j = 0; j < q; ++j) { + // reuse memory area of `Q` + // no longer needed and provides enough space (`q < p`) + double* GD = Q; + const double* X_l; + const double* X_k; + for (uword j = 0; j < p; ++j) { for (uword i = 0; i < p; ++i) { sum = 0.0; - for (uword k = 0; k < n; ++k) { - for (uword l = 0; l < n; ++l) { - mvm = 0.0; - for (uword m = 0; m < p; ++m) { - mvm += (X[l * p + m] - X[k * p + m]) * V[j * p + m]; - } - sum += S[l * n + k] * (X[l * p + i] - X[k * p + i]) * mvm; + for (uword l = 0; l < n; ++l) { + X_l = X + (l * p); + for (uword k = l + 1; k < n; ++k) { + X_k = X + (k * p); + sum += S[l * n + k] * (X_l[i] - X_k[i]) * (X_l[j] - X_k[j]); } } + GD[j * p + i] = sum; + } + } + + // distance gradient `DG` to gradient by multiplying with `V` + factor = -2. / (n * h * h); + for (uword i = 0; i < p; ++i) { + for (uword j = 0; j < q; ++j) { + sum = 0.0; + for (uword k = 0; k < p; ++k) { + sum += GD[k * p + i] * V[j * p + k]; + } G[j * p + i] = factor * sum; } } @@ -352,16 +374,17 @@ setup.tests <- function () { } counter <<- counter + 1 } -(mbm <- microbenchmark( +mbm <- microbenchmark( arma = arma_grad(X, X_diff, Y, Y_rep, V, h), grad = grad(Xt, Y, V, h), grad_p = grad_p(Xt, Y, V, h), check = comp.all, setup = setup.tests(), times = 100L -)) - -cat("Total time:", format(Sys.time() - time.start), '\n') +) +cat("\033[1m\033[92mTotal time:", format(Sys.time() - time.start), '\n') +print(mbm) +cat("\033[0m") boxplot(mbm, las = 2, xlab = NULL)