diff --git a/CVE_R/R/CVE.R b/CVE_R/R/CVE.R index e6e6ee2..773d62c 100644 --- a/CVE_R/R/CVE.R +++ b/CVE_R/R/CVE.R @@ -134,8 +134,8 @@ cve.call <- function(X, Y, method = "simple", nObs = nrow(X)^.5, } # augment result information - dr.k$method <- method - dr.k$call <- call + dr$method <- method + dr$call <- call class(dr) <- "cve" return(dr) } diff --git a/CVE_R/demo/00Index b/CVE_R/demo/00Index index 7930296..46dac9a 100644 --- a/CVE_R/demo/00Index +++ b/CVE_R/demo/00Index @@ -1,2 +1,2 @@ runtime_test Runtime comparison of CVE against MAVE for M1 - M5 datasets. -logging Example of a logger function for cve algorithm analysis. \ No newline at end of file +logging Example of a logger function for cve algorithm analysis. diff --git a/cve_V0.R b/cve_V0.R deleted file mode 100644 index f1abfbe..0000000 --- a/cve_V0.R +++ /dev/null @@ -1,173 +0,0 @@ - -#' Euclidean vector norm (2-norm) -#' -#' @param x Numeric vector -#' @return Numeric -norm2 <- function(x) { return(sum(x^2)) } - -#' Samples uniform from the Stiefel Manifold -#' -#' @param p row dim. -#' @param q col dim. -#' @return `(p, q)` semi-orthogonal matrix -rStiefl <- function(p, q) { - return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q)))) -} - -#' Matrix Trace -#' -#' @param M Square matrix -#' @return Trace \eqn{Tr(M)} -Tr <- function(M) { - return(sum(diag(M))) -} - -#' Null space basis of given matrix `B` -#' -#' @param B `(p, q)` matrix -#' @return Semi-orthogonal `(p, p - q)` matrix `Q` spaning the null space of `B` -null <- function(M) { - tmp <- qr(M) - set <- if(tmp$rank == 0L) seq_len(ncol(M)) else -seq_len(tmp$rank) - return(qr.Q(tmp, complete = TRUE)[, set, drop = FALSE]) -} - -#### -#chooses bandwith h according to formula in paper -#dim...dimension of X vector -#k... row dim of V (dim times q matrix) corresponding to a basis of orthogonal complement of B in model -# N...sample size -#nObs... nObs in bandwith formula -#tr...trace of sample covariance matrix of X -estimateBandwidth<-function(X, k, nObs) { - n <- nrow(X) - p <- ncol(X) - - X_centered <- scale(X, center = TRUE, scale = FALSE) - Sigma <- (1 / n) * t(X_centered) %*% X_centered - - quantil <- qchisq((nObs - 1) / (n - 1), k) - return(2 * quantil * Tr(Sigma) / p) -} - -########### -# evaluates L(V) and returns L_n(V),(L_tilde_n(V,X_i))_{i=1,..,n} and grad_V L_n(V) (p times k) -# V... (dim times q) matrix -# Xl... output of Xl_fun -# dtemp...vector with pairwise distances |X_i - X_j| -# q...output of q_ind function -# Y... vector with N Y_i values -# if grad=T, gradient of L(V) also returned -LV <- function(V, Xl, dtemp, h, q, Y, grad = TRUE) { - N <- length(Y) - k <- if (is.vector(V)) { 1 } else { ncol(V) } - Xlv <- Xl %*% V - d <- dtemp - ((Xlv^2) %*% rep(1, k)) - w <- dnorm(d / h) / dnorm(0) - w <- matrix(w, N, q) - w <- apply(w, 2, function(x) { x / sum(x) }) - y1 <- t(w) %*% Y - y2 <- t(w) %*% (Y^2) - sig <- y2 - y1^2 - - result <- list(var = mean(sig), sig = sig) - if (grad == TRUE) { - tmp1 <- (kronecker(sig, rep(1, N)) - (as.vector(kronecker(rep(1, q), Y)) - kronecker(y1, rep(1, N)))^2) - if (k == 1) { - grad_d <- -2 * Xl * as.vector(Xlv) - grad <- (1 / h^2) * (1 / q) * t(grad_d * as.vector(d) * as.vector(w)) %*% tmp1 - } else { - grad <- matrix(0, nrow(V), ncol(V)) - for (j in 1:k) { - grad_d <- -2 * Xl * as.vector(Xlv[ ,j]) - grad[ ,j] <- (1 / h^2) * (1 / q) * t(grad_d * as.vector(d) * as.vector(w)) %*% tmp1 - } - } - result$grad = grad - } - - return(result) -} -#### performs stiefle optimization of argmin_{V : V'V=I_k} L_n(V) -#through curvilinear search with k0 starting values drawn uniformly on stiefel maniquefold -#dat...(N times dim+1) matrix with first column corresponding to Y values, the other columns -#consists of X data matrix, (i.e. dat=cbind(Y,X)) -#h... bandwidth -#k...row dimension of V that is calculated, corresponds to dimension of orthogonal complement of B -#k0... number of arbitrary starting values -#p...fraction of data points used as shifting point -#maxIter... number of maximal iterations in curvilinear search -#nObs.. nObs parameter for choosing bandwidth if no h is supplied -#lambda_0...initial stepsize -#tol...tolerance for stoping iterations -#sclack_para...if relative improvment is worse than sclack_para the stepsize is reduced -#output: -#est_base...Vhat_k= argmin_V:V'V=I_k L_n(V) a (dim times k) matrix where dim is row-dimension of X data matrix -#var...value of L_n(Vhat_k) -#aov_dat... (L_tilde_n(Vhat_k,X_i))_{i=1,..,N} -#count...number of iterations -#h...bandwidth -cve_R <- function( - X, Y, k, - nObs = sqrt(nrow(X)), - tauInitial = 1.0, - tol = 1e-3, - slack = 0, - maxIter = 50L, - attempts = 10L -) { - # get dimensions - n <- nrow(X) - p <- ncol(X) - q <- p - k - - Xl <- kronecker(rep(1, n), X) - kronecker(X, rep(1, n)) - Xd <- apply(Xl, 1, norm2) - I_p <- diag(1, p) - # estimate bandwidth - h <- estimateBandwidth(X, k, nObs) - - Lbest <- Inf - Vend <- mat.or.vec(p, q) - - for (. in 1:attempts) { - Vnew <- Vold <- rStiefl(p, q) - Lnew <- Lold <- exp(10000) - tau <- tauInitial - - error <- Inf - count <- 0 - while (error > tol & count < maxIter) { - - tmp <- LV(Vold, Xl, Xd, h, n, Y) - G <- tmp$grad - Lold <- tmp$var - - W <- tau * (G %*% t(Vold) - Vold %*% t(G)) - Vnew <- solve(I_p + W) %*% (I_p - W) %*% Vold - - Lnew <- LV(Vnew, Xl, Xd, h, n, Y, grad = FALSE)$var - - if ((Lnew - Lold) > slack * Lold) { - tau = tau / 2 - error <- Inf - } else { - error <- norm(Vold %*% t(Vold) - Vnew %*% t(Vnew), "F") / sqrt(2 * k) - Vold <- Vnew - } - count <- count + 1 - } - - if (Lbest > Lnew) { - Lbest <- Lnew - Vend <- Vnew - } - } - - return(list( - loss = Lbest, - V = Vend, - B = null(Vend), - h = h - )) -} diff --git a/cve_V1.cpp b/cve_V1.cpp deleted file mode 100644 index 88f2260..0000000 --- a/cve_V1.cpp +++ /dev/null @@ -1,330 +0,0 @@ -// -// Development file. -// -// Usage: -// ~$ R -e "library(Rcpp); Rcpp::sourceCpp('cve_V1.cpp')" -// - -// only `RcppArmadillo.h` which includes `Rcpp.h` -#include - -// through the depends attribute `Rcpp` is tolled to create -// hooks for `RcppArmadillo` needed by the build process. -// -// [[Rcpp::depends(RcppArmadillo)]] - -// required for `R::qchisq()` used in `estimateBandwidth()` -#include - -//' Estimated bandwidth for CVE. -//' -//' Estimates a propper bandwidth \code{h} according -//' \deqn{h = \chi_{p-q}^{-1}\left(\frac{nObs - 1}{n-1}\right)\frac{2 tr(\Sigma)}{p}}{% -//' h = qchisq( (nObs - 1)/(n - 1), p - q ) 2 tr(Sigma) / p} -//' -//' @param X data matrix of dimension (n x p) with n data points X_i of dimension -//' q. Therefor each row represents a datapoint of dimension p. -//' @param k Guess for rank(B). -//' @param nObs Ether numeric of a function. If specified as numeric value -//' its used in the computation of the bandwidth directly. If its a function -//' `nObs` is evaluated as \code{nObs(nrow(x))}. The default behaviou if not -//' supplied at all is to use \code{nObs <- nrow(x)^0.5}. -//' -//' @seealso [qchisq()] -//' -//' @export -// [[Rcpp::export]] -double estimateBandwidth(const arma::mat& X, arma::uword k, double nObs) { - using namespace arma; - - uword n = X.n_rows; // nr samples - uword p = X.n_cols; // dimension of rand. var. `X` - - // column mean - mat M = mean(X); - // center `X` - mat C = X.each_row() - M; - // trace of covariance matrix, `traceSigma = Tr(C' C)` - double traceSigma = accu(C % C); - // compute estimated bandwidth - double qchi2 = R::qchisq((nObs - 1.0) / (n - 1), static_cast(k), 1, 0); - - return 2.0 * qchi2 * traceSigma / (p * n); -} - -//' Random element from Stiefel Manifold `S(p, q)`. -//' -//' Draws an element of \eqn{S(p, q)} which is the Stiefel Manifold. -//' This is done by taking the Q-component of the QR decomposition -//' from a `(p, q)` Matrix with independent standart normal entries. -//' As a semi-orthogonal Matrix the result `V` satisfies \eqn{V'V = I_q}. -//' -//' @param p Row dimension -//' @param q Column dimension -//' -//' @return Matrix of dim `(p, q)`. -//' -//' @seealso -//' -//' @export -// [[Rcpp::export]] -arma::mat rStiefel(arma::uword p, arma::uword q) { - arma::mat Q, R; - arma::qr_econ(Q, R, arma::randn(p, q)); - return Q; -} - -//' Gradient computation of the loss `L_n(V)`. -//' -//' The loss is defined as -//' \deqn{L_n(V) := \frac{1}{n}\sum_{j=1}^n (y_2(V, X_j) - y_1(V, X_j)^2)} -//' with -//' \deqn{y_l(s_0) := \sum_{i=1}^n w_i(V, s_0)Y_i^l}{y_l(s_0) := sum_i(w_i(V, s_0) Y_i^l)} -//' -//' @rdname optStiefel -//' @keywords internal -double gradient(const arma::mat& X, - const arma::mat& X_diff, - const arma::mat& Y, - const arma::mat& Y_rep, - const arma::mat& V, - const double h, - arma::mat* G = 0 -) { - using namespace arma; - - uword n = X.n_rows; - uword p = X.n_cols; - - // orthogonal projection matrix `Q = I - VV'` for dist computation - mat Q = -(V * V.t()); - Q.diag() += 1; - // calc pairwise distances as `D` with `D(i, j) = d_i(V, X_j)` - vec D_vec = sum(square(X_diff * Q), 1); - mat D = reshape(D_vec, n, n); - // calc weights as `W` with `W(i, j) = w_i(V, X_j)` - mat W = exp(D / (-2.0 * h)); - // column-wise normalization via 1-norm - W = normalise(W, 1); - vec W_vec = vectorise(W); - // weighted `Y` means (first and second order) - vec y1 = W.t() * Y; - vec y2 = W.t() * square(Y); - // loss for each `X_i`, meaning `L(i) = L_n(V, X_i)` - vec L = y2 - square(y1); - // `loss = L_n(V)` - double loss = mean(L); - // check if gradient as output variable is set - if (G != 0) { - // `G = grad(L_n(V))` a.k.a. gradient of `L` with respect to `V` - vec scale = (repelem(L, n, 1) - square(Y_rep - repelem(y1, n, 1))) % W_vec % D_vec; - mat X_diff_scale = X_diff.each_col() % scale; - (*G) = X_diff_scale.t() * X_diff * V; - (*G) *= -2.0 / (h * h * n); - } - - return loss; -} - -//' Stiefel Optimization. -//' -//' Stiefel Optimization for \code{V} given a dataset \code{X} and responces -//' \code{Y} for the model \deqn{Y\sim g(B'X) + \epsilon}{Y ~ g(B'X) + epsilon} -//' to compute the matrix `B` such that \eqn{span{B} = span(V)^{\bot}}{% -//' span(B) = orth(span(B))}. -//' -//' @param X data points -//' @param Y response -//' @param k assumed \eqn{rank(B)} -//' @param nObs parameter for bandwidth estimation, typical value -//' \code{nObs = nrow(X)^lambda} with \code{lambda} in the range [0.3, 0.8]. -//' @param tau Initial step size -//' @param tol Tolerance for update error used for stopping criterion -//' \eqn{|| V(j) V(j)' - V(j+1) V(j+1)' ||_2 < tol}{% -//' \| V_j V_j' - V_{j+1} V_{j+1}' \|_2 < tol}. -//' @param maxIter Upper bound of optimization iterations -//' -//' @return List containing the bandwidth \code{h}, optimization objective \code{V} -//' and the matrix \code{B} estimated for the model as a orthogonal basis of the -//' orthogonal space spaned by \code{V}. -//' -//' @rdname optStiefel -//' @keywords internal -double optStiefel( - const arma::mat& X, - const arma::vec& Y, - const int k, - const double h, - const double tauInitial, - const double tol, - const double slack, - const int maxIter, - arma::mat& V, // out - arma::vec& history // out -) { - using namespace arma; - - // get dimensions - const uword n = X.n_rows; // nr samples - const uword p = X.n_cols; // dim of random variable `X` - const uword q = p - k; // rank(V) e.g. dim of ortho space of span{B} - - // all `X_i - X_j` differences, `X_diff.row(i * n + j) = X_i - X_j` - mat X_diff(n * n, p); - for (uword i = 0, k = 0; i < n; ++i) { - for (uword j = 0; j < n; ++j) { - X_diff.row(k++) = X.row(i) - X.row(j); - } - } - const vec Y_rep = repmat(Y, n, 1); - const mat I_p = eye(p, p); - - // initial start value for `V` - V = rStiefel(p, q); - - // init optimization `loss`es, `error` and stepsize `tau` - // double loss_next = datum::inf; - double loss; - double error = datum::inf; - double tau = tauInitial; - int iter; - // main optimization loop - for (iter = 0; iter < maxIter && error > tol; ++iter) { - // calc gradient `G = grad_V(L)(V)` - mat G; - loss = gradient(X, X_diff, Y, Y_rep, V, h, &G); - // matrix `A` for colescy-transform of the gradient - mat A = tau * ((G * V.t()) - (V * G.t())); - // next iteration step of `V` - mat V_tau = inv(I_p + A) * (I_p - A) * V; - // loss after step `L(V(tau))` - double loss_tau = gradient(X, X_diff, Y, Y_rep, V_tau, h); - - // store `loss` in `history` and increase `iter` - history(iter) = loss; - - // validate if loss decreased - if ((loss_tau - loss) > slack * loss) { - // ignore step, retry with half the step size - tau = tau / 2.; - error = datum::inf; - } else { - // compute step error (break condition) - error = norm((V * V.t()) - (V_tau * V_tau.t()), 2) / (2 * q); - // shift for next iteration - V = V_tau; - loss = loss_tau; - } - } - - // store final `loss` - history(iter) = loss; - - return loss; -} - -//' Conditional Variance Estimation (CVE) method. -//' -//' This version uses a "simple" stiefel optimization schema. -//' -//' @param X data points -//' @param Y response -//' @param k assumed \eqn{rank(B)} -//' @param nObs parameter for bandwidth estimation, typical value -//' \code{nObs = nrow(X)^lambda} with \code{lambda} in the range [0.3, 0.8]. -//' @param tau Initial step size (default 1) -//' @param tol Tolerance for update error used for stopping criterion (default 1e-5) -//' @param slack Ratio of small negative error allowed in loss optimization (default -1e-10) -//' @param maxIter Upper bound of optimization iterations (default 50) -//' @param attempts Number of tryes with new random optimization starting points (default 10) -//' -//' @return List containing the bandwidth \code{h}, optimization objective \code{V} -//' and the matrix \code{B} estimated for the model as a orthogonal basis of the -//' orthogonal space spaned by \code{V}. -//' -//' @rdname cve_cpp -//' @export -// [[Rcpp::export]] -Rcpp::List cve_cpp( - const arma::mat& X, - const arma::vec& Y, - const int k, - const double nObs, - const double tauInitial = 1., - const double tol = 1e-5, - const double slack = -1e-10, - const int maxIter = 50, - const int attempts = 10 -) { - using namespace arma; - - // tracker of current best results - mat V_best; - double loss_best = datum::inf; - // estimate bandwidth - double h = estimateBandwidth(X, k, nObs); - - // loss history for each optimization attempt - // each column contaions the iteration history for the loss - mat history = mat(maxIter + 1, attempts); - - // multiple stiefel optimization attempts - for (int i = 0; i < attempts; ++i) { - // declare output variables - mat V; // estimated `V` space - vec hist = vec(history.n_rows, fill::zeros); // optimization history - double loss = optStiefel(X, Y, k, h, tauInitial, tol, slack, maxIter, V, hist); - if (loss < loss_best) { - loss_best = loss; - V_best = V; - } - // write history to history collection - history.col(i) = hist; - } - - // get `B` as kernal of `V.t()` - mat B = null(V_best.t()); - - return Rcpp::List::create( - Rcpp::Named("history") = history, - Rcpp::Named("loss") = loss_best, - Rcpp::Named("h") = h, - Rcpp::Named("V") = V_best, - Rcpp::Named("B") = B - ); -} - -/*** R - -source("CVE/R/datasets.R") -ds <- dataset() - -print(system.time( - cve.res <- cve_cpp( - X = ds$X, - Y = ds$Y, - k = ncol(ds$B), - nObs = sqrt(nrow(ds$X)) - ) -)) - -pdf('plots/cve_V1_history.pdf') -H <- cve.res$history -H_i <- H[H[, 1] > 0, 1] -plot(1:length(H_i), H_i, - main = "History cve_V1", - xlab = "Iterations i", - ylab = expression(loss == L[n](V^{(i)})), - xlim = c(1, nrow(H)), - ylim = c(0, max(H)), - type = "l" -) -for (i in 2:ncol(H)) { - H_i <- H[H[, i] > 0, i] - lines(1:length(H_i), H_i) -} -x.ends <- apply(H, 2, function(h) { length(h[h > 0]) }) -y.ends <- apply(H, 2, function(h) { min(h[h > 0]) }) -points(x.ends, y.ends) - -*/ diff --git a/cve_V2.cpp b/cve_V2.cpp deleted file mode 100644 index f47095a..0000000 --- a/cve_V2.cpp +++ /dev/null @@ -1,367 +0,0 @@ -// -// Development file. -// -// Usage: -// ~$ R -e "library(Rcpp); Rcpp::sourceCpp('cve_V2.cpp')" -// - -// only `RcppArmadillo.h` which includes `Rcpp.h` -#include - -// through the depends attribute `Rcpp` is tolled to create -// hooks for `RcppArmadillo` needed by the build process. -// -// [[Rcpp::depends(RcppArmadillo)]] - -// required for `R::qchisq()` used in `estimateBandwidth()` -#include - -//' Estimated bandwidth for CVE. -//' -//' Estimates a propper bandwidth \code{h} according -//' \deqn{h = \chi_{p-q}^{-1}\left(\frac{nObs - 1}{n-1}\right)\frac{2 tr(\Sigma)}{p}}{% -//' h = qchisq( (nObs - 1)/(n - 1), p - q ) 2 tr(Sigma) / p} -//' -//' @param X data matrix of dimension (n x p) with n data points X_i of dimension -//' q. Therefor each row represents a datapoint of dimension p. -//' @param k Guess for rank(B). -//' @param nObs Ether numeric of a function. If specified as numeric value -//' its used in the computation of the bandwidth directly. If its a function -//' `nObs` is evaluated as \code{nObs(nrow(x))}. The default behaviou if not -//' supplied at all is to use \code{nObs <- nrow(x)^0.5}. -//' -//' @seealso [qchisq()] -//' -//' @export -// [[Rcpp::export]] -double estimateBandwidth(const arma::mat& X, arma::uword k, double nObs) { - using namespace arma; - - uword n = X.n_rows; // nr samples - uword p = X.n_cols; // dimension of rand. var. `X` - - // column mean - mat M = mean(X); - // center `X` - mat C = X.each_row() - M; - // trace of covariance matrix, `traceSigma = Tr(C' C)` - double traceSigma = accu(C % C); - // compute estimated bandwidth - double qchi2 = R::qchisq((nObs - 1.0) / (n - 1), static_cast(k), 1, 0); - - return 2.0 * qchi2 * traceSigma / (p * n); -} - -//' Random element from Stiefel Manifold `S(p, q)`. -//' -//' Draws an element of \eqn{S(p, q)} which is the Stiefel Manifold. -//' This is done by taking the Q-component of the QR decomposition -//' from a `(p, q)` Matrix with independent standart normal entries. -//' As a semi-orthogonal Matrix the result `V` satisfies \eqn{V'V = I_q}. -//' -//' @param p Row dimension -//' @param q Column dimension -//' -//' @return Matrix of dim `(p, q)`. -//' -//' @seealso -//' -//' @export -// [[Rcpp::export]] -arma::mat rStiefel(arma::uword p, arma::uword q) { - arma::mat Q, R; - arma::qr_econ(Q, R, arma::randn(p, q)); - return Q; -} - -double gradient(const arma::mat& X, - const arma::mat& X_diff, - const arma::mat& Y, - const arma::mat& Y_rep, - const arma::mat& V, - const double h, - arma::mat* G = 0 -) { - using namespace arma; - - uword n = X.n_rows; - uword p = X.n_cols; - - // orthogonal projection matrix `Q = I - VV'` for dist computation - mat Q = -(V * V.t()); - Q.diag() += 1; - // calc pairwise distances as `D` with `D(i, j) = d_i(V, X_j)` - vec D_vec = sum(square(X_diff * Q), 1); - mat D = reshape(D_vec, n, n); - // calc weights as `W` with `W(i, j) = w_i(V, X_j)` - mat W = exp(D / (-2.0 * h)); - // column-wise normalization via 1-norm - W = normalise(W, 1); - vec W_vec = vectorise(W); - // weighted `Y` means (first and second order) - vec y1 = W.t() * Y; - vec y2 = W.t() * square(Y); - // loss for each `X_i`, meaning `L(i) = L_n(V, X_i)` - vec L = y2 - square(y1); - // `loss = L_n(V)` - double loss = mean(L); - // check if gradient as output variable is set - if (G != 0) { - // `G = grad(L_n(V))` a.k.a. gradient of `L` with respect to `V` - vec scale = (repelem(L, n, 1) - square(Y_rep - repelem(y1, n, 1))) % W_vec % D_vec; - mat X_diff_scale = X_diff.each_col() % scale; - (*G) = X_diff_scale.t() * X_diff * V; - (*G) *= -2.0 / (h * h * n); - } - - return loss; -} - -//' Stiefel Optimization with curvilinear linesearch. -//' -//' TODO: finish doc. comment -//' Stiefel Optimization for \code{V} given a dataset \code{X} and responces -//' \code{Y} for the model \deqn{Y\sim g(B'X) + \epsilon}{Y ~ g(B'X) + epsilon} -//' to compute the matrix `B` such that \eqn{span{B} = span(V)^{\bot}}{% -//' span(B) = orth(span(B))}. -//' The curvilinear linesearch uses Armijo-Wolfe conditions: -// \deqn{L(V(tau)) > L(V(0)) + rho_1 * tau * L(V(0))'} -//' \deqn{L(V(tau))' < rho_2 * L(V(0))'} -//' -//' @param X data points -//' @param Y response -//' @param k assumed \eqn{rank(B)} -//' @param nObs parameter for bandwidth estimation, typical value -//' \code{nObs = nrow(X)^lambda} with \code{lambda} in the range [0.3, 0.8]. -//' @param tau Initial step size -//' @param tol Tolerance for update error used for stopping criterion -//' @param maxIter Upper bound of optimization iterations -//' -//' @return List containing the bandwidth \code{h}, optimization objective \code{V} -//' and the matrix \code{B} estimated for the model as a orthogonal basis of the -//' orthogonal space spaned by \code{V}. -//' -//' @rdname optStiefel -//' @keywords internal -double optStiefel( - const arma::mat& X, - const arma::vec& Y, - const int k, - const double h, - const double tauInitial, - const double tol, - const int maxIter, - const double rho1, - const double rho2, - const int maxLineSeachIter, - arma::mat& V, // out - arma::vec& history // out -) { - using namespace arma; - - // get dimensions - const uword n = X.n_rows; // nr samples - const uword p = X.n_cols; // dim of random variable `X` - const uword q = p - k; // rank(V) e.g. dim of ortho space of span{B} - - // all `X_i - X_j` differences, `X_diff.row(i * n + j) = X_i - X_j` - mat X_diff(n * n, p); - for (uword i = 0, k = 0; i < n; ++i) { - for (uword j = 0; j < n; ++j) { - X_diff.row(k++) = X.row(i) - X.row(j); - } - } - const vec Y_rep = repmat(Y, n, 1); - const mat I_p = eye(p, p); - const mat I_2q = eye(2 * q, 2 * q); - - // initial start value for `V` - V = rStiefel(p, q); - - // first gradient initialization - mat G; - double loss = gradient(X, X_diff, Y, Y_rep, V, h, &G); - - // set first `loss` in history - history(0) = loss; - - // main curvilinear optimization loop - double error = datum::inf; - int iter = 0; - while (iter++ < maxIter && error > tol) { - // helper matrices `lU` (linesearch U), `lV` (linesearch V) - // as describet in [Wen, Yin] Lemma 4. - mat lU = join_rows(G, V); // linesearch "U" - mat lV = join_rows(V, -1.0 * G); // linesearch "V" - // `A = G V' - V G'` - mat A = lU * lV.t(); - - // set initial step size for curvilinear line search - double tau = tauInitial, lower = 0., upper = datum::inf; - - // check if `tau` is valid for inverting - - // set line search internal gradient and loss to cycle for next iteration - mat V_tau; // next position after a step of size `tau`, a.k.a. `V(tau)` - mat G_tau; // gradient of `V` at `V(tau) = V_tau` - double loss_tau; // loss (objective) at position `V(tau)` - int lsIter = 0; // linesearch iter - // start line search - do { - mat HV = inv(I_2q + (tau/2.) * lV.t() * lU) * lV.t(); - // next step `V` - V_tau = V - tau * (lU * (HV * V)); - - double LprimeV = -trace(G.t() * A * V); - - mat lB = I_p - (tau / 2.) * lU * HV; - - loss_tau = gradient(X, X_diff, Y, Y_rep, V_tau, h, &G_tau); - - double LprimeV_tau = -2. * trace(G_tau.t() * lB * A * (V + V_tau)); - - // Armijo condition - if (loss_tau > loss + (rho1 * tau * LprimeV)) { - upper = tau; - tau = (lower + upper) / 2.; - // Wolfe condition - } else if (LprimeV_tau < rho2 * LprimeV) { - lower = tau; - if (upper == datum::inf) { - tau = 2. * lower; - } else { - tau = (lower + upper) / 2.; - } - } else { - break; - } - } while (++lsIter < maxLineSeachIter); - - // compute error (break condition) - // Note: `error` is the decrease of the objective `L_n(V)` and not the - // norm of the gradient as proposed in [Wen, Yin] Algorithm 1. - error = loss - loss_tau; - - // cycle `V`, `G` and `loss` for next iteration - V = V_tau; - loss = loss_tau; - G = G_tau; - - // store final `loss` - history(iter) = loss; - - } - - return loss; -} - - -//' Conditional Variance Estimation (CVE) method. -//' -//' This version uses a curvilinear linesearch for the stiefel optimization. -//' -//' @param X data points -//' @param Y response -//' @param k assumed \eqn{rank(B)} -//' @param nObs parameter for bandwidth estimation, typical value -//' \code{nObs = nrow(X)^lambda} with \code{lambda} in the range [0.3, 0.8]. -//' @param tau Initial step size (default 1) -//' @param tol Tolerance for update error used for stopping criterion (default 1e-5) -//' @param slack Ratio of small negative error allowed in loss optimization (default -1e-10) -//' @param maxIter Upper bound of optimization iterations (default 50) -//' @param attempts Number of tryes with new random optimization starting points (default 10) -//' -//' @return List containing the bandwidth \code{h}, optimization objective \code{V} -//' and the matrix \code{B} estimated for the model as a orthogonal basis of the -//' orthogonal space spaned by \code{V}. -//' -//' @rdname cve_cpp_V2 -//' @export -// [[Rcpp::export]] -Rcpp::List cve_cpp( - const arma::mat& X, - const arma::vec& Y, - const int k, - const double nObs, - const double tauInitial = 1., - const double rho1 = 0.05, - const double rho2 = 0.95, - const double tol = 1e-6, - const int maxIter = 50, - const int maxLineSeachIter = 10, - const int attempts = 10 -) { - using namespace arma; - - // tracker of current best results - mat V_best; - double loss_best = datum::inf; - // estimate bandwidth - double h = estimateBandwidth(X, k, nObs); - - // loss history for each optimization attempt - // each column contaions the iteration history for the loss - mat history = mat(maxIter + 1, attempts); - - // multiple stiefel optimization attempts - for (int i = 0; i < attempts; ++i) { - // declare output variables - mat V; // estimated `V` space - vec hist = vec(history.n_rows, fill::zeros); // optimization history - double loss = optStiefel(X, Y, k, h, - tauInitial, tol, maxIter, rho1, rho2, maxLineSeachIter, V, hist - ); - if (loss < loss_best) { - loss_best = loss; - V_best = V; - } - // write history to history collection - history.col(i) = hist; - } - - // get `B` as kernal of `V.t()` - mat B = null(V_best.t()); - - return Rcpp::List::create( - Rcpp::Named("history") = history, - Rcpp::Named("loss") = loss_best, - Rcpp::Named("h") = h, - Rcpp::Named("V") = V_best, - Rcpp::Named("B") = B - ); -} - -/*** R - -source("CVE/R/datasets.R") -ds <- dataset() - -print(system.time( - cve.res <- cve_cpp( - X = ds$X, - Y = ds$Y, - k = ncol(ds$B), - nObs = sqrt(nrow(ds$X)) - ) -)) - -pdf('plots/cve_V2_history.pdf') -H <- cve.res$history -H_i <- H[H[, 1] > 0, 1] -plot(1:length(H_i), H_i, - main = "History cve_V2", - xlab = "Iterations i", - ylab = expression(loss == L[n](V^{(i)})), - xlim = c(1, nrow(H)), - ylim = c(0, max(H)), - type = "l" -) -for (i in 2:ncol(H)) { - H_i <- H[H[, i] > 0, i] - lines(1:length(H_i), H_i) -} -x.ends <- apply(H, 2, function(h) { length(h[h > 0]) }) -y.ends <- apply(H, 2, function(h) { min(h[h > 0]) }) -points(x.ends, y.ends) - -*/ diff --git a/runtime_test.R b/runtime_test.R index 5f66084..c0ae73b 100644 --- a/runtime_test.R +++ b/runtime_test.R @@ -1,5 +1,7 @@ # Usage: # ~$ Rscript runtime_test.R +# library(CVEpureR) # load CVE's pure R implementation +library(CVE) # load CVE #' Writes log information to console. (to not get bored^^) tell.user <- function(name, start.time, i, length) { @@ -24,7 +26,6 @@ dataset.names <- c("M1", "M2", "M3", "M4", "M5") # Set used CVE method methods <- c("simple") # c("legacy", "simple", "sgd", "linesearch") -library(CVE) # load CVE if ("legacy" %in% methods) { # Source legacy code (but only if needed) source("CVE_legacy/function_script.R")