// -*- mode: C++; c-indent-level: 4; c-basic-offset: 4; indent-tabs-mode: nil; -*- // // Usage: // ~$ R -e "library(Rcpp); Rcpp::sourceCpp('cve_V2.cpp')" // // only `RcppArmadillo.h` which includes `Rcpp.h` #include // through the depends attribute `Rcpp` is tolled to create // hooks for `RcppArmadillo` needed by the build process. // // [[Rcpp::depends(RcppArmadillo)]] // required for `R::qchisq()` used in `estimateBandwidth()` #include //' Estimated bandwidth for CVE. //' //' Estimates a propper bandwidth \code{h} according //' \deqn{h = \chi_{p-q}^{-1}\left(\frac{nObs - 1}{n-1}\right)}\frac{2 tr(\Sigma)}{p}}{% //' h = qchisq( (nObs - 1)/(n - 1), p - q ) 2 tr(Sigma) / p} //' //' @param X data matrix of dimension (n x p) with n data points X_i of dimension //' q. Therefor each row represents a datapoint of dimension p. //' @param k Guess for rank(B). //' @param nObs Ether numeric of a function. If specified as numeric value //' its used in the computation of the bandwidth directly. If its a function //' `nObs` is evaluated as \code{nObs(nrow(x))}. The default behaviou if not //' supplied at all is to use \code{nObs <- nrow(x)^0.5}. //' //' @seealso [qchisq()] //' //' @export // [[Rcpp::export]] double estimateBandwidth(const arma::mat& X, arma::uword k, double nObs) { using namespace arma; uword n = X.n_rows; // nr samples uword p = X.n_cols; // dimension of rand. var. `X` // column mean mat M = mean(X); // center `X` mat C = X.each_row() - M; // trace of covariance matrix, `traceSigma = Tr(C' C)` double traceSigma = accu(C % C); // compute estimated bandwidth double qchi2 = R::qchisq((nObs - 1.0) / (n - 1), static_cast(k), 1, 0); return 2.0 * qchi2 * traceSigma / (p * n); } //' Random element from Stiefel Manifold `S(p, q)`. //' //' Draws an element of \eqn{S(p, q)} which is the Stiefel Manifold. //' This is done by taking the Q-component of the QR decomposition //' from a `(p, q)` Matrix with independent standart normal entries. //' As a semi-orthogonal Matrix the result `V` satisfies \eqn{V'V = I_q}. //' //' @param p Row dimension //' @param q Column dimension //' //' @returns Matrix of dim `(p, q)`. //' //' @seealso //' //' @export // [[Rcpp::export]] arma::mat rStiefel(arma::uword p, arma::uword q) { arma::mat Q, R; arma::qr_econ(Q, R, arma::randn(p, q)); return Q; } double gradient(const arma::mat& X, const arma::mat& X_diff, const arma::mat& Y, const arma::mat& Y_rep, const arma::mat& V, const double h, arma::mat* G = 0 ) { using namespace arma; uword n = X.n_rows; uword p = X.n_cols; // orthogonal projection matrix `Q = I - VV'` for dist computation mat Q = -(V * V.t()); Q.diag() += 1; // calc pairwise distances as `D` with `D(i, j) = d_i(V, X_j)` vec D_vec = sum(square(X_diff * Q), 1); mat D = reshape(D_vec, n, n); // calc weights as `W` with `W(i, j) = w_i(V, X_j)` mat W = exp(D / (-2.0 * h)); // column-wise normalization via 1-norm W = normalise(W, 1); vec W_vec = vectorise(W); // weighted `Y` means (first and second order) vec y1 = W.t() * Y; vec y2 = W.t() * square(Y); // loss for each `X_i`, meaning `L(i) = L_n(V, X_i)` vec L = y2 - square(y1); // `loss = L_n(V)` double loss = mean(L); // check if gradient as output variable is set if (G != 0) { // `G = grad(L_n(V))` a.k.a. gradient of `L` with respect to `V` vec scale = (repelem(L, n, 1) - square(Y_rep - repelem(y1, n, 1))) % W_vec % D_vec; mat X_diff_scale = X_diff.each_col() % scale; (*G) = X_diff_scale.t() * X_diff * V; (*G) *= -2.0 / (h * h * n); } return loss; } //' Stiefel Optimization with curvilinear linesearch. //' //' TODO: finish doc. comment //' Stiefel Optimization for \code{V} given a dataset \code{X} and responces //' \code{Y} for the model \deqn{Y\sim g(B'X) + \epsilon}{Y ~ g(B'X) + epsilon} //' to compute the matrix `B` such that \eqn{span{B} = span(V)^{\bot}}{% //' span(B) = orth(span(B))}. //' The curvilinear linesearch uses Armijo-Wolfe conditions: // \deqn{L(V(tau)) > L(V(0)) + rho_1 * tau * L(V(0))'} //' \deqn{L(V(tau))' < rho_2 * L(V(0))'} //' //' @param X data points //' @param Y response //' @param k assumed \eqn{rank(B)} //' @param nObs parameter for bandwidth estimation, typical value //' \code{nObs = nrow(X)^lambda} with \code{lambda} in the range [0.3, 0.8]. //' @param tau Initial step size //' @param tol Tolerance for update error used for stopping criterion //' @param maxIter Upper bound of optimization iterations //' //' @return List containing the bandwidth \code{h}, optimization objective \code{V} //' and the matrix \code{B} estimated for the model as a orthogonal basis of the //' orthogonal space spaned by \code{V}. //' //' @rdname optStiefel double optStiefel( const arma::mat& X, const arma::vec& Y, const int k, const double h, const double tauInitial, const double rho1, const double rho2, const double tol, const int maxIter, const int maxLineSeachIter, arma::mat& V, // out arma::vec& history // out ) { using namespace arma; // get dimensions const uword n = X.n_rows; // nr samples const uword p = X.n_cols; // dim of random variable `X` const uword q = p - k; // rank(V) e.g. dim of ortho space of span{B} // all `X_i - X_j` differences, `X_diff.row(i * n + j) = X_i - X_j` mat X_diff(n * n, p); for (uword i = 0, k = 0; i < n; ++i) { for (uword j = 0; j < n; ++j) { X_diff.row(k++) = X.row(i) - X.row(j); } } const vec Y_rep = repmat(Y, n, 1); const mat I_p = eye(p, p); const mat I_2q = eye(2 * q, 2 * q); // initial start value for `V` V = rStiefel(p, q); // first gradient initialization mat G; double loss = gradient(X, X_diff, Y, Y_rep, V, h, &G); // set first `loss` in history history(0) = loss; // main curvilinear optimization loop double error = datum::inf; int iter = 0; while (iter++ < maxIter && error > tol) { // helper matrices `lU` (linesearch U), `lV` (linesearch V) // as describet in [Wen, Yin] Lemma 4. mat lU = join_rows(G, V); // linesearch "U" mat lV = join_rows(V, -1.0 * G); // linesearch "V" // `A = G V' - V G'` mat A = lU * lV.t(); // set initial step size for curvilinear line search double tau = tauInitial, lower = 0., upper = datum::inf; // check if `tau` is valid for inverting // set line search internal gradient and loss to cycle for next iteration mat V_tau; // next position after a step of size `tau`, a.k.a. `V(tau)` mat G_tau; // gradient of `V` at `V(tau) = V_tau` double loss_tau; // loss (objective) at position `V(tau)` int lsIter = 0; // linesearch iter // start line search do { mat HV = inv(I_2q + (tau/2.) * lV.t() * lU) * lV.t(); // next step `V` V_tau = V - tau * (lU * (HV * V)); double LprimeV = -trace(G.t() * A * V); mat lB = I_p - (tau / 2.) * lU * HV; loss_tau = gradient(X, X_diff, Y, Y_rep, V_tau, h, &G_tau); double LprimeV_tau = -2. * trace(G_tau.t() * lB * A * (V + V_tau)); // Armijo condition if (loss_tau > loss + (rho1 * tau * LprimeV)) { upper = tau; tau = (lower + upper) / 2.; // Wolfe condition } else if (LprimeV_tau < rho2 * LprimeV) { lower = tau; if (upper == datum::inf) { tau = 2. * lower; } else { tau = (lower + upper) / 2.; } } else { break; } } while (++lsIter < maxLineSeachIter); // compute error (break condition) // Note: `error` is the decrease of the objective `L_n(V)` and not the // norm of the gradient as proposed in [Wen, Yin] Algorithm 1. error = loss - loss_tau; // cycle `V`, `G` and `loss` for next iteration V = V_tau; loss = loss_tau; G = G_tau; // store final `loss` history(iter) = loss; } return loss; } //' Conditional Variance Estimation (CVE) method. //' //' This version uses a curvilinear linesearch for the stiefel optimization. //' //' @param X data points //' @param Y response //' @param k assumed \eqn{rank(B)} //' @param nObs parameter for bandwidth estimation, typical value //' \code{nObs = nrow(X)^lambda} with \code{lambda} in the range [0.3, 0.8]. //' @param tau Initial step size (default 1) //' @param tol Tolerance for update error used for stopping criterion (default 1e-5) //' @param slack Ratio of small negative error allowed in loss optimization (default -1e-10) //' @param maxIter Upper bound of optimization iterations (default 50) //' @param attempts Number of tryes with new random optimization starting points (default 10) //' //' @return List containing the bandwidth \code{h}, optimization objective \code{V} //' and the matrix \code{B} estimated for the model as a orthogonal basis of the //' orthogonal space spaned by \code{V}. //' //' @rdname cve_cpp_V2 //' @export // [[Rcpp::export]] Rcpp::List cve_cpp( const arma::mat& X, const arma::vec& Y, const int k, const double nObs, const double tauInitial = 1., const double rho1 = 0.05, const double rho2 = 0.95, const double tol = 1e-6, const int maxIter = 50, const int maxLineSeachIter = 10, const int attempts = 10 ) { using namespace arma; // tracker of current best results mat V_best; double loss_best = datum::inf; // estimate bandwidth double h = estimateBandwidth(X, k, nObs); // loss history for each optimization attempt // each column contaions the iteration history for the loss mat history = mat(maxIter + 1, attempts); // multiple stiefel optimization attempts for (int i = 0; i < attempts; ++i) { // declare output variables mat V; // estimated `V` space vec hist = vec(history.n_rows, fill::zeros); // optimization history double loss = optStiefel(X, Y, k, h, tauInitial, rho1, rho2, tol, maxIter, maxLineSeachIter, V, hist ); if (loss < loss_best) { loss_best = loss; V_best = V; } // write history to history collection history.col(i) = hist; } // get `B` as kernal of `V.t()` mat B = null(V_best.t()); return Rcpp::List::create( Rcpp::Named("history") = history, Rcpp::Named("loss") = loss_best, Rcpp::Named("h") = h, Rcpp::Named("V") = V_best, Rcpp::Named("B") = B ); } /*** R source("CVE/R/datasets.R") ds <- dataset() print(system.time( cve.res <- cve_cpp( X = ds$X, Y = ds$Y, k = ncol(ds$B), nObs = sqrt(nrow(ds$X)) ) )) pdf('plots/cve_V2_history.pdf') H <- cve.res$history H_i <- H[H[, 1] > 0, 1] plot(1:length(H_i), H_i, main = "History cve_V2", xlab = "Iterations i", ylab = expression(loss == L[n](V^{(i)})), xlim = c(1, nrow(H)), ylim = c(0, max(H)), type = "l" ) for (i in 2:ncol(H)) { H_i <- H[H[, i] > 0, i] lines(1:length(H_i), H_i) } x.ends <- apply(H, 2, function(h) { length(h[h > 0]) }) y.ends <- apply(H, 2, function(h) { min(h[h > 0]) }) points(x.ends, y.ends) */