wip: cleanup
This commit is contained in:
parent
03153ef05e
commit
6f46cb2eaf
|
@ -134,8 +134,8 @@ cve.call <- function(X, Y, method = "simple", nObs = nrow(X)^.5,
|
||||||
}
|
}
|
||||||
|
|
||||||
# augment result information
|
# augment result information
|
||||||
dr.k$method <- method
|
dr$method <- method
|
||||||
dr.k$call <- call
|
dr$call <- call
|
||||||
class(dr) <- "cve"
|
class(dr) <- "cve"
|
||||||
return(dr)
|
return(dr)
|
||||||
}
|
}
|
||||||
|
|
173
cve_V0.R
173
cve_V0.R
|
@ -1,173 +0,0 @@
|
||||||
|
|
||||||
#' Euclidean vector norm (2-norm)
|
|
||||||
#'
|
|
||||||
#' @param x Numeric vector
|
|
||||||
#' @return Numeric
|
|
||||||
norm2 <- function(x) { return(sum(x^2)) }
|
|
||||||
|
|
||||||
#' Samples uniform from the Stiefel Manifold
|
|
||||||
#'
|
|
||||||
#' @param p row dim.
|
|
||||||
#' @param q col dim.
|
|
||||||
#' @return `(p, q)` semi-orthogonal matrix
|
|
||||||
rStiefl <- function(p, q) {
|
|
||||||
return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q))))
|
|
||||||
}
|
|
||||||
|
|
||||||
#' Matrix Trace
|
|
||||||
#'
|
|
||||||
#' @param M Square matrix
|
|
||||||
#' @return Trace \eqn{Tr(M)}
|
|
||||||
Tr <- function(M) {
|
|
||||||
return(sum(diag(M)))
|
|
||||||
}
|
|
||||||
|
|
||||||
#' Null space basis of given matrix `B`
|
|
||||||
#'
|
|
||||||
#' @param B `(p, q)` matrix
|
|
||||||
#' @return Semi-orthogonal `(p, p - q)` matrix `Q` spaning the null space of `B`
|
|
||||||
null <- function(M) {
|
|
||||||
tmp <- qr(M)
|
|
||||||
set <- if(tmp$rank == 0L) seq_len(ncol(M)) else -seq_len(tmp$rank)
|
|
||||||
return(qr.Q(tmp, complete = TRUE)[, set, drop = FALSE])
|
|
||||||
}
|
|
||||||
|
|
||||||
####
|
|
||||||
#chooses bandwith h according to formula in paper
|
|
||||||
#dim...dimension of X vector
|
|
||||||
#k... row dim of V (dim times q matrix) corresponding to a basis of orthogonal complement of B in model
|
|
||||||
# N...sample size
|
|
||||||
#nObs... nObs in bandwith formula
|
|
||||||
#tr...trace of sample covariance matrix of X
|
|
||||||
estimateBandwidth<-function(X, k, nObs) {
|
|
||||||
n <- nrow(X)
|
|
||||||
p <- ncol(X)
|
|
||||||
|
|
||||||
X_centered <- scale(X, center = TRUE, scale = FALSE)
|
|
||||||
Sigma <- (1 / n) * t(X_centered) %*% X_centered
|
|
||||||
|
|
||||||
quantil <- qchisq((nObs - 1) / (n - 1), k)
|
|
||||||
return(2 * quantil * Tr(Sigma) / p)
|
|
||||||
}
|
|
||||||
|
|
||||||
###########
|
|
||||||
# evaluates L(V) and returns L_n(V),(L_tilde_n(V,X_i))_{i=1,..,n} and grad_V L_n(V) (p times k)
|
|
||||||
# V... (dim times q) matrix
|
|
||||||
# Xl... output of Xl_fun
|
|
||||||
# dtemp...vector with pairwise distances |X_i - X_j|
|
|
||||||
# q...output of q_ind function
|
|
||||||
# Y... vector with N Y_i values
|
|
||||||
# if grad=T, gradient of L(V) also returned
|
|
||||||
LV <- function(V, Xl, dtemp, h, q, Y, grad = TRUE) {
|
|
||||||
N <- length(Y)
|
|
||||||
k <- if (is.vector(V)) { 1 } else { ncol(V) }
|
|
||||||
Xlv <- Xl %*% V
|
|
||||||
d <- dtemp - ((Xlv^2) %*% rep(1, k))
|
|
||||||
w <- dnorm(d / h) / dnorm(0)
|
|
||||||
w <- matrix(w, N, q)
|
|
||||||
w <- apply(w, 2, function(x) { x / sum(x) })
|
|
||||||
y1 <- t(w) %*% Y
|
|
||||||
y2 <- t(w) %*% (Y^2)
|
|
||||||
sig <- y2 - y1^2
|
|
||||||
|
|
||||||
result <- list(var = mean(sig), sig = sig)
|
|
||||||
if (grad == TRUE) {
|
|
||||||
tmp1 <- (kronecker(sig, rep(1, N)) - (as.vector(kronecker(rep(1, q), Y)) - kronecker(y1, rep(1, N)))^2)
|
|
||||||
if (k == 1) {
|
|
||||||
grad_d <- -2 * Xl * as.vector(Xlv)
|
|
||||||
grad <- (1 / h^2) * (1 / q) * t(grad_d * as.vector(d) * as.vector(w)) %*% tmp1
|
|
||||||
} else {
|
|
||||||
grad <- matrix(0, nrow(V), ncol(V))
|
|
||||||
for (j in 1:k) {
|
|
||||||
grad_d <- -2 * Xl * as.vector(Xlv[ ,j])
|
|
||||||
grad[ ,j] <- (1 / h^2) * (1 / q) * t(grad_d * as.vector(d) * as.vector(w)) %*% tmp1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
result$grad = grad
|
|
||||||
}
|
|
||||||
|
|
||||||
return(result)
|
|
||||||
}
|
|
||||||
#### performs stiefle optimization of argmin_{V : V'V=I_k} L_n(V)
|
|
||||||
#through curvilinear search with k0 starting values drawn uniformly on stiefel maniquefold
|
|
||||||
#dat...(N times dim+1) matrix with first column corresponding to Y values, the other columns
|
|
||||||
#consists of X data matrix, (i.e. dat=cbind(Y,X))
|
|
||||||
#h... bandwidth
|
|
||||||
#k...row dimension of V that is calculated, corresponds to dimension of orthogonal complement of B
|
|
||||||
#k0... number of arbitrary starting values
|
|
||||||
#p...fraction of data points used as shifting point
|
|
||||||
#maxIter... number of maximal iterations in curvilinear search
|
|
||||||
#nObs.. nObs parameter for choosing bandwidth if no h is supplied
|
|
||||||
#lambda_0...initial stepsize
|
|
||||||
#tol...tolerance for stoping iterations
|
|
||||||
#sclack_para...if relative improvment is worse than sclack_para the stepsize is reduced
|
|
||||||
#output:
|
|
||||||
#est_base...Vhat_k= argmin_V:V'V=I_k L_n(V) a (dim times k) matrix where dim is row-dimension of X data matrix
|
|
||||||
#var...value of L_n(Vhat_k)
|
|
||||||
#aov_dat... (L_tilde_n(Vhat_k,X_i))_{i=1,..,N}
|
|
||||||
#count...number of iterations
|
|
||||||
#h...bandwidth
|
|
||||||
cve_R <- function(
|
|
||||||
X, Y, k,
|
|
||||||
nObs = sqrt(nrow(X)),
|
|
||||||
tauInitial = 1.0,
|
|
||||||
tol = 1e-3,
|
|
||||||
slack = 0,
|
|
||||||
maxIter = 50L,
|
|
||||||
attempts = 10L
|
|
||||||
) {
|
|
||||||
# get dimensions
|
|
||||||
n <- nrow(X)
|
|
||||||
p <- ncol(X)
|
|
||||||
q <- p - k
|
|
||||||
|
|
||||||
Xl <- kronecker(rep(1, n), X) - kronecker(X, rep(1, n))
|
|
||||||
Xd <- apply(Xl, 1, norm2)
|
|
||||||
I_p <- diag(1, p)
|
|
||||||
# estimate bandwidth
|
|
||||||
h <- estimateBandwidth(X, k, nObs)
|
|
||||||
|
|
||||||
Lbest <- Inf
|
|
||||||
Vend <- mat.or.vec(p, q)
|
|
||||||
|
|
||||||
for (. in 1:attempts) {
|
|
||||||
Vnew <- Vold <- rStiefl(p, q)
|
|
||||||
Lnew <- Lold <- exp(10000)
|
|
||||||
tau <- tauInitial
|
|
||||||
|
|
||||||
error <- Inf
|
|
||||||
count <- 0
|
|
||||||
while (error > tol & count < maxIter) {
|
|
||||||
|
|
||||||
tmp <- LV(Vold, Xl, Xd, h, n, Y)
|
|
||||||
G <- tmp$grad
|
|
||||||
Lold <- tmp$var
|
|
||||||
|
|
||||||
W <- tau * (G %*% t(Vold) - Vold %*% t(G))
|
|
||||||
Vnew <- solve(I_p + W) %*% (I_p - W) %*% Vold
|
|
||||||
|
|
||||||
Lnew <- LV(Vnew, Xl, Xd, h, n, Y, grad = FALSE)$var
|
|
||||||
|
|
||||||
if ((Lnew - Lold) > slack * Lold) {
|
|
||||||
tau = tau / 2
|
|
||||||
error <- Inf
|
|
||||||
} else {
|
|
||||||
error <- norm(Vold %*% t(Vold) - Vnew %*% t(Vnew), "F") / sqrt(2 * k)
|
|
||||||
Vold <- Vnew
|
|
||||||
}
|
|
||||||
count <- count + 1
|
|
||||||
}
|
|
||||||
|
|
||||||
if (Lbest > Lnew) {
|
|
||||||
Lbest <- Lnew
|
|
||||||
Vend <- Vnew
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return(list(
|
|
||||||
loss = Lbest,
|
|
||||||
V = Vend,
|
|
||||||
B = null(Vend),
|
|
||||||
h = h
|
|
||||||
))
|
|
||||||
}
|
|
330
cve_V1.cpp
330
cve_V1.cpp
|
@ -1,330 +0,0 @@
|
||||||
//
|
|
||||||
// Development file.
|
|
||||||
//
|
|
||||||
// Usage:
|
|
||||||
// ~$ R -e "library(Rcpp); Rcpp::sourceCpp('cve_V1.cpp')"
|
|
||||||
//
|
|
||||||
|
|
||||||
// only `RcppArmadillo.h` which includes `Rcpp.h`
|
|
||||||
#include <RcppArmadillo.h>
|
|
||||||
|
|
||||||
// through the depends attribute `Rcpp` is tolled to create
|
|
||||||
// hooks for `RcppArmadillo` needed by the build process.
|
|
||||||
//
|
|
||||||
// [[Rcpp::depends(RcppArmadillo)]]
|
|
||||||
|
|
||||||
// required for `R::qchisq()` used in `estimateBandwidth()`
|
|
||||||
#include <Rmath.h>
|
|
||||||
|
|
||||||
//' Estimated bandwidth for CVE.
|
|
||||||
//'
|
|
||||||
//' Estimates a propper bandwidth \code{h} according
|
|
||||||
//' \deqn{h = \chi_{p-q}^{-1}\left(\frac{nObs - 1}{n-1}\right)\frac{2 tr(\Sigma)}{p}}{%
|
|
||||||
//' h = qchisq( (nObs - 1)/(n - 1), p - q ) 2 tr(Sigma) / p}
|
|
||||||
//'
|
|
||||||
//' @param X data matrix of dimension (n x p) with n data points X_i of dimension
|
|
||||||
//' q. Therefor each row represents a datapoint of dimension p.
|
|
||||||
//' @param k Guess for rank(B).
|
|
||||||
//' @param nObs Ether numeric of a function. If specified as numeric value
|
|
||||||
//' its used in the computation of the bandwidth directly. If its a function
|
|
||||||
//' `nObs` is evaluated as \code{nObs(nrow(x))}. The default behaviou if not
|
|
||||||
//' supplied at all is to use \code{nObs <- nrow(x)^0.5}.
|
|
||||||
//'
|
|
||||||
//' @seealso [qchisq()]
|
|
||||||
//'
|
|
||||||
//' @export
|
|
||||||
// [[Rcpp::export]]
|
|
||||||
double estimateBandwidth(const arma::mat& X, arma::uword k, double nObs) {
|
|
||||||
using namespace arma;
|
|
||||||
|
|
||||||
uword n = X.n_rows; // nr samples
|
|
||||||
uword p = X.n_cols; // dimension of rand. var. `X`
|
|
||||||
|
|
||||||
// column mean
|
|
||||||
mat M = mean(X);
|
|
||||||
// center `X`
|
|
||||||
mat C = X.each_row() - M;
|
|
||||||
// trace of covariance matrix, `traceSigma = Tr(C' C)`
|
|
||||||
double traceSigma = accu(C % C);
|
|
||||||
// compute estimated bandwidth
|
|
||||||
double qchi2 = R::qchisq((nObs - 1.0) / (n - 1), static_cast<double>(k), 1, 0);
|
|
||||||
|
|
||||||
return 2.0 * qchi2 * traceSigma / (p * n);
|
|
||||||
}
|
|
||||||
|
|
||||||
//' Random element from Stiefel Manifold `S(p, q)`.
|
|
||||||
//'
|
|
||||||
//' Draws an element of \eqn{S(p, q)} which is the Stiefel Manifold.
|
|
||||||
//' This is done by taking the Q-component of the QR decomposition
|
|
||||||
//' from a `(p, q)` Matrix with independent standart normal entries.
|
|
||||||
//' As a semi-orthogonal Matrix the result `V` satisfies \eqn{V'V = I_q}.
|
|
||||||
//'
|
|
||||||
//' @param p Row dimension
|
|
||||||
//' @param q Column dimension
|
|
||||||
//'
|
|
||||||
//' @return Matrix of dim `(p, q)`.
|
|
||||||
//'
|
|
||||||
//' @seealso <https://en.wikipedia.org/wiki/Stiefel_manifold>
|
|
||||||
//'
|
|
||||||
//' @export
|
|
||||||
// [[Rcpp::export]]
|
|
||||||
arma::mat rStiefel(arma::uword p, arma::uword q) {
|
|
||||||
arma::mat Q, R;
|
|
||||||
arma::qr_econ(Q, R, arma::randn<arma::mat>(p, q));
|
|
||||||
return Q;
|
|
||||||
}
|
|
||||||
|
|
||||||
//' Gradient computation of the loss `L_n(V)`.
|
|
||||||
//'
|
|
||||||
//' The loss is defined as
|
|
||||||
//' \deqn{L_n(V) := \frac{1}{n}\sum_{j=1}^n (y_2(V, X_j) - y_1(V, X_j)^2)}
|
|
||||||
//' with
|
|
||||||
//' \deqn{y_l(s_0) := \sum_{i=1}^n w_i(V, s_0)Y_i^l}{y_l(s_0) := sum_i(w_i(V, s_0) Y_i^l)}
|
|
||||||
//'
|
|
||||||
//' @rdname optStiefel
|
|
||||||
//' @keywords internal
|
|
||||||
double gradient(const arma::mat& X,
|
|
||||||
const arma::mat& X_diff,
|
|
||||||
const arma::mat& Y,
|
|
||||||
const arma::mat& Y_rep,
|
|
||||||
const arma::mat& V,
|
|
||||||
const double h,
|
|
||||||
arma::mat* G = 0
|
|
||||||
) {
|
|
||||||
using namespace arma;
|
|
||||||
|
|
||||||
uword n = X.n_rows;
|
|
||||||
uword p = X.n_cols;
|
|
||||||
|
|
||||||
// orthogonal projection matrix `Q = I - VV'` for dist computation
|
|
||||||
mat Q = -(V * V.t());
|
|
||||||
Q.diag() += 1;
|
|
||||||
// calc pairwise distances as `D` with `D(i, j) = d_i(V, X_j)`
|
|
||||||
vec D_vec = sum(square(X_diff * Q), 1);
|
|
||||||
mat D = reshape(D_vec, n, n);
|
|
||||||
// calc weights as `W` with `W(i, j) = w_i(V, X_j)`
|
|
||||||
mat W = exp(D / (-2.0 * h));
|
|
||||||
// column-wise normalization via 1-norm
|
|
||||||
W = normalise(W, 1);
|
|
||||||
vec W_vec = vectorise(W);
|
|
||||||
// weighted `Y` means (first and second order)
|
|
||||||
vec y1 = W.t() * Y;
|
|
||||||
vec y2 = W.t() * square(Y);
|
|
||||||
// loss for each `X_i`, meaning `L(i) = L_n(V, X_i)`
|
|
||||||
vec L = y2 - square(y1);
|
|
||||||
// `loss = L_n(V)`
|
|
||||||
double loss = mean(L);
|
|
||||||
// check if gradient as output variable is set
|
|
||||||
if (G != 0) {
|
|
||||||
// `G = grad(L_n(V))` a.k.a. gradient of `L` with respect to `V`
|
|
||||||
vec scale = (repelem(L, n, 1) - square(Y_rep - repelem(y1, n, 1))) % W_vec % D_vec;
|
|
||||||
mat X_diff_scale = X_diff.each_col() % scale;
|
|
||||||
(*G) = X_diff_scale.t() * X_diff * V;
|
|
||||||
(*G) *= -2.0 / (h * h * n);
|
|
||||||
}
|
|
||||||
|
|
||||||
return loss;
|
|
||||||
}
|
|
||||||
|
|
||||||
//' Stiefel Optimization.
|
|
||||||
//'
|
|
||||||
//' Stiefel Optimization for \code{V} given a dataset \code{X} and responces
|
|
||||||
//' \code{Y} for the model \deqn{Y\sim g(B'X) + \epsilon}{Y ~ g(B'X) + epsilon}
|
|
||||||
//' to compute the matrix `B` such that \eqn{span{B} = span(V)^{\bot}}{%
|
|
||||||
//' span(B) = orth(span(B))}.
|
|
||||||
//'
|
|
||||||
//' @param X data points
|
|
||||||
//' @param Y response
|
|
||||||
//' @param k assumed \eqn{rank(B)}
|
|
||||||
//' @param nObs parameter for bandwidth estimation, typical value
|
|
||||||
//' \code{nObs = nrow(X)^lambda} with \code{lambda} in the range [0.3, 0.8].
|
|
||||||
//' @param tau Initial step size
|
|
||||||
//' @param tol Tolerance for update error used for stopping criterion
|
|
||||||
//' \eqn{|| V(j) V(j)' - V(j+1) V(j+1)' ||_2 < tol}{%
|
|
||||||
//' \| V_j V_j' - V_{j+1} V_{j+1}' \|_2 < tol}.
|
|
||||||
//' @param maxIter Upper bound of optimization iterations
|
|
||||||
//'
|
|
||||||
//' @return List containing the bandwidth \code{h}, optimization objective \code{V}
|
|
||||||
//' and the matrix \code{B} estimated for the model as a orthogonal basis of the
|
|
||||||
//' orthogonal space spaned by \code{V}.
|
|
||||||
//'
|
|
||||||
//' @rdname optStiefel
|
|
||||||
//' @keywords internal
|
|
||||||
double optStiefel(
|
|
||||||
const arma::mat& X,
|
|
||||||
const arma::vec& Y,
|
|
||||||
const int k,
|
|
||||||
const double h,
|
|
||||||
const double tauInitial,
|
|
||||||
const double tol,
|
|
||||||
const double slack,
|
|
||||||
const int maxIter,
|
|
||||||
arma::mat& V, // out
|
|
||||||
arma::vec& history // out
|
|
||||||
) {
|
|
||||||
using namespace arma;
|
|
||||||
|
|
||||||
// get dimensions
|
|
||||||
const uword n = X.n_rows; // nr samples
|
|
||||||
const uword p = X.n_cols; // dim of random variable `X`
|
|
||||||
const uword q = p - k; // rank(V) e.g. dim of ortho space of span{B}
|
|
||||||
|
|
||||||
// all `X_i - X_j` differences, `X_diff.row(i * n + j) = X_i - X_j`
|
|
||||||
mat X_diff(n * n, p);
|
|
||||||
for (uword i = 0, k = 0; i < n; ++i) {
|
|
||||||
for (uword j = 0; j < n; ++j) {
|
|
||||||
X_diff.row(k++) = X.row(i) - X.row(j);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
const vec Y_rep = repmat(Y, n, 1);
|
|
||||||
const mat I_p = eye<mat>(p, p);
|
|
||||||
|
|
||||||
// initial start value for `V`
|
|
||||||
V = rStiefel(p, q);
|
|
||||||
|
|
||||||
// init optimization `loss`es, `error` and stepsize `tau`
|
|
||||||
// double loss_next = datum::inf;
|
|
||||||
double loss;
|
|
||||||
double error = datum::inf;
|
|
||||||
double tau = tauInitial;
|
|
||||||
int iter;
|
|
||||||
// main optimization loop
|
|
||||||
for (iter = 0; iter < maxIter && error > tol; ++iter) {
|
|
||||||
// calc gradient `G = grad_V(L)(V)`
|
|
||||||
mat G;
|
|
||||||
loss = gradient(X, X_diff, Y, Y_rep, V, h, &G);
|
|
||||||
// matrix `A` for colescy-transform of the gradient
|
|
||||||
mat A = tau * ((G * V.t()) - (V * G.t()));
|
|
||||||
// next iteration step of `V`
|
|
||||||
mat V_tau = inv(I_p + A) * (I_p - A) * V;
|
|
||||||
// loss after step `L(V(tau))`
|
|
||||||
double loss_tau = gradient(X, X_diff, Y, Y_rep, V_tau, h);
|
|
||||||
|
|
||||||
// store `loss` in `history` and increase `iter`
|
|
||||||
history(iter) = loss;
|
|
||||||
|
|
||||||
// validate if loss decreased
|
|
||||||
if ((loss_tau - loss) > slack * loss) {
|
|
||||||
// ignore step, retry with half the step size
|
|
||||||
tau = tau / 2.;
|
|
||||||
error = datum::inf;
|
|
||||||
} else {
|
|
||||||
// compute step error (break condition)
|
|
||||||
error = norm((V * V.t()) - (V_tau * V_tau.t()), 2) / (2 * q);
|
|
||||||
// shift for next iteration
|
|
||||||
V = V_tau;
|
|
||||||
loss = loss_tau;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// store final `loss`
|
|
||||||
history(iter) = loss;
|
|
||||||
|
|
||||||
return loss;
|
|
||||||
}
|
|
||||||
|
|
||||||
//' Conditional Variance Estimation (CVE) method.
|
|
||||||
//'
|
|
||||||
//' This version uses a "simple" stiefel optimization schema.
|
|
||||||
//'
|
|
||||||
//' @param X data points
|
|
||||||
//' @param Y response
|
|
||||||
//' @param k assumed \eqn{rank(B)}
|
|
||||||
//' @param nObs parameter for bandwidth estimation, typical value
|
|
||||||
//' \code{nObs = nrow(X)^lambda} with \code{lambda} in the range [0.3, 0.8].
|
|
||||||
//' @param tau Initial step size (default 1)
|
|
||||||
//' @param tol Tolerance for update error used for stopping criterion (default 1e-5)
|
|
||||||
//' @param slack Ratio of small negative error allowed in loss optimization (default -1e-10)
|
|
||||||
//' @param maxIter Upper bound of optimization iterations (default 50)
|
|
||||||
//' @param attempts Number of tryes with new random optimization starting points (default 10)
|
|
||||||
//'
|
|
||||||
//' @return List containing the bandwidth \code{h}, optimization objective \code{V}
|
|
||||||
//' and the matrix \code{B} estimated for the model as a orthogonal basis of the
|
|
||||||
//' orthogonal space spaned by \code{V}.
|
|
||||||
//'
|
|
||||||
//' @rdname cve_cpp
|
|
||||||
//' @export
|
|
||||||
// [[Rcpp::export]]
|
|
||||||
Rcpp::List cve_cpp(
|
|
||||||
const arma::mat& X,
|
|
||||||
const arma::vec& Y,
|
|
||||||
const int k,
|
|
||||||
const double nObs,
|
|
||||||
const double tauInitial = 1.,
|
|
||||||
const double tol = 1e-5,
|
|
||||||
const double slack = -1e-10,
|
|
||||||
const int maxIter = 50,
|
|
||||||
const int attempts = 10
|
|
||||||
) {
|
|
||||||
using namespace arma;
|
|
||||||
|
|
||||||
// tracker of current best results
|
|
||||||
mat V_best;
|
|
||||||
double loss_best = datum::inf;
|
|
||||||
// estimate bandwidth
|
|
||||||
double h = estimateBandwidth(X, k, nObs);
|
|
||||||
|
|
||||||
// loss history for each optimization attempt
|
|
||||||
// each column contaions the iteration history for the loss
|
|
||||||
mat history = mat(maxIter + 1, attempts);
|
|
||||||
|
|
||||||
// multiple stiefel optimization attempts
|
|
||||||
for (int i = 0; i < attempts; ++i) {
|
|
||||||
// declare output variables
|
|
||||||
mat V; // estimated `V` space
|
|
||||||
vec hist = vec(history.n_rows, fill::zeros); // optimization history
|
|
||||||
double loss = optStiefel(X, Y, k, h, tauInitial, tol, slack, maxIter, V, hist);
|
|
||||||
if (loss < loss_best) {
|
|
||||||
loss_best = loss;
|
|
||||||
V_best = V;
|
|
||||||
}
|
|
||||||
// write history to history collection
|
|
||||||
history.col(i) = hist;
|
|
||||||
}
|
|
||||||
|
|
||||||
// get `B` as kernal of `V.t()`
|
|
||||||
mat B = null(V_best.t());
|
|
||||||
|
|
||||||
return Rcpp::List::create(
|
|
||||||
Rcpp::Named("history") = history,
|
|
||||||
Rcpp::Named("loss") = loss_best,
|
|
||||||
Rcpp::Named("h") = h,
|
|
||||||
Rcpp::Named("V") = V_best,
|
|
||||||
Rcpp::Named("B") = B
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*** R
|
|
||||||
|
|
||||||
source("CVE/R/datasets.R")
|
|
||||||
ds <- dataset()
|
|
||||||
|
|
||||||
print(system.time(
|
|
||||||
cve.res <- cve_cpp(
|
|
||||||
X = ds$X,
|
|
||||||
Y = ds$Y,
|
|
||||||
k = ncol(ds$B),
|
|
||||||
nObs = sqrt(nrow(ds$X))
|
|
||||||
)
|
|
||||||
))
|
|
||||||
|
|
||||||
pdf('plots/cve_V1_history.pdf')
|
|
||||||
H <- cve.res$history
|
|
||||||
H_i <- H[H[, 1] > 0, 1]
|
|
||||||
plot(1:length(H_i), H_i,
|
|
||||||
main = "History cve_V1",
|
|
||||||
xlab = "Iterations i",
|
|
||||||
ylab = expression(loss == L[n](V^{(i)})),
|
|
||||||
xlim = c(1, nrow(H)),
|
|
||||||
ylim = c(0, max(H)),
|
|
||||||
type = "l"
|
|
||||||
)
|
|
||||||
for (i in 2:ncol(H)) {
|
|
||||||
H_i <- H[H[, i] > 0, i]
|
|
||||||
lines(1:length(H_i), H_i)
|
|
||||||
}
|
|
||||||
x.ends <- apply(H, 2, function(h) { length(h[h > 0]) })
|
|
||||||
y.ends <- apply(H, 2, function(h) { min(h[h > 0]) })
|
|
||||||
points(x.ends, y.ends)
|
|
||||||
|
|
||||||
*/
|
|
367
cve_V2.cpp
367
cve_V2.cpp
|
@ -1,367 +0,0 @@
|
||||||
//
|
|
||||||
// Development file.
|
|
||||||
//
|
|
||||||
// Usage:
|
|
||||||
// ~$ R -e "library(Rcpp); Rcpp::sourceCpp('cve_V2.cpp')"
|
|
||||||
//
|
|
||||||
|
|
||||||
// only `RcppArmadillo.h` which includes `Rcpp.h`
|
|
||||||
#include <RcppArmadillo.h>
|
|
||||||
|
|
||||||
// through the depends attribute `Rcpp` is tolled to create
|
|
||||||
// hooks for `RcppArmadillo` needed by the build process.
|
|
||||||
//
|
|
||||||
// [[Rcpp::depends(RcppArmadillo)]]
|
|
||||||
|
|
||||||
// required for `R::qchisq()` used in `estimateBandwidth()`
|
|
||||||
#include <Rmath.h>
|
|
||||||
|
|
||||||
//' Estimated bandwidth for CVE.
|
|
||||||
//'
|
|
||||||
//' Estimates a propper bandwidth \code{h} according
|
|
||||||
//' \deqn{h = \chi_{p-q}^{-1}\left(\frac{nObs - 1}{n-1}\right)\frac{2 tr(\Sigma)}{p}}{%
|
|
||||||
//' h = qchisq( (nObs - 1)/(n - 1), p - q ) 2 tr(Sigma) / p}
|
|
||||||
//'
|
|
||||||
//' @param X data matrix of dimension (n x p) with n data points X_i of dimension
|
|
||||||
//' q. Therefor each row represents a datapoint of dimension p.
|
|
||||||
//' @param k Guess for rank(B).
|
|
||||||
//' @param nObs Ether numeric of a function. If specified as numeric value
|
|
||||||
//' its used in the computation of the bandwidth directly. If its a function
|
|
||||||
//' `nObs` is evaluated as \code{nObs(nrow(x))}. The default behaviou if not
|
|
||||||
//' supplied at all is to use \code{nObs <- nrow(x)^0.5}.
|
|
||||||
//'
|
|
||||||
//' @seealso [qchisq()]
|
|
||||||
//'
|
|
||||||
//' @export
|
|
||||||
// [[Rcpp::export]]
|
|
||||||
double estimateBandwidth(const arma::mat& X, arma::uword k, double nObs) {
|
|
||||||
using namespace arma;
|
|
||||||
|
|
||||||
uword n = X.n_rows; // nr samples
|
|
||||||
uword p = X.n_cols; // dimension of rand. var. `X`
|
|
||||||
|
|
||||||
// column mean
|
|
||||||
mat M = mean(X);
|
|
||||||
// center `X`
|
|
||||||
mat C = X.each_row() - M;
|
|
||||||
// trace of covariance matrix, `traceSigma = Tr(C' C)`
|
|
||||||
double traceSigma = accu(C % C);
|
|
||||||
// compute estimated bandwidth
|
|
||||||
double qchi2 = R::qchisq((nObs - 1.0) / (n - 1), static_cast<double>(k), 1, 0);
|
|
||||||
|
|
||||||
return 2.0 * qchi2 * traceSigma / (p * n);
|
|
||||||
}
|
|
||||||
|
|
||||||
//' Random element from Stiefel Manifold `S(p, q)`.
|
|
||||||
//'
|
|
||||||
//' Draws an element of \eqn{S(p, q)} which is the Stiefel Manifold.
|
|
||||||
//' This is done by taking the Q-component of the QR decomposition
|
|
||||||
//' from a `(p, q)` Matrix with independent standart normal entries.
|
|
||||||
//' As a semi-orthogonal Matrix the result `V` satisfies \eqn{V'V = I_q}.
|
|
||||||
//'
|
|
||||||
//' @param p Row dimension
|
|
||||||
//' @param q Column dimension
|
|
||||||
//'
|
|
||||||
//' @return Matrix of dim `(p, q)`.
|
|
||||||
//'
|
|
||||||
//' @seealso <https://en.wikipedia.org/wiki/Stiefel_manifold>
|
|
||||||
//'
|
|
||||||
//' @export
|
|
||||||
// [[Rcpp::export]]
|
|
||||||
arma::mat rStiefel(arma::uword p, arma::uword q) {
|
|
||||||
arma::mat Q, R;
|
|
||||||
arma::qr_econ(Q, R, arma::randn<arma::mat>(p, q));
|
|
||||||
return Q;
|
|
||||||
}
|
|
||||||
|
|
||||||
double gradient(const arma::mat& X,
|
|
||||||
const arma::mat& X_diff,
|
|
||||||
const arma::mat& Y,
|
|
||||||
const arma::mat& Y_rep,
|
|
||||||
const arma::mat& V,
|
|
||||||
const double h,
|
|
||||||
arma::mat* G = 0
|
|
||||||
) {
|
|
||||||
using namespace arma;
|
|
||||||
|
|
||||||
uword n = X.n_rows;
|
|
||||||
uword p = X.n_cols;
|
|
||||||
|
|
||||||
// orthogonal projection matrix `Q = I - VV'` for dist computation
|
|
||||||
mat Q = -(V * V.t());
|
|
||||||
Q.diag() += 1;
|
|
||||||
// calc pairwise distances as `D` with `D(i, j) = d_i(V, X_j)`
|
|
||||||
vec D_vec = sum(square(X_diff * Q), 1);
|
|
||||||
mat D = reshape(D_vec, n, n);
|
|
||||||
// calc weights as `W` with `W(i, j) = w_i(V, X_j)`
|
|
||||||
mat W = exp(D / (-2.0 * h));
|
|
||||||
// column-wise normalization via 1-norm
|
|
||||||
W = normalise(W, 1);
|
|
||||||
vec W_vec = vectorise(W);
|
|
||||||
// weighted `Y` means (first and second order)
|
|
||||||
vec y1 = W.t() * Y;
|
|
||||||
vec y2 = W.t() * square(Y);
|
|
||||||
// loss for each `X_i`, meaning `L(i) = L_n(V, X_i)`
|
|
||||||
vec L = y2 - square(y1);
|
|
||||||
// `loss = L_n(V)`
|
|
||||||
double loss = mean(L);
|
|
||||||
// check if gradient as output variable is set
|
|
||||||
if (G != 0) {
|
|
||||||
// `G = grad(L_n(V))` a.k.a. gradient of `L` with respect to `V`
|
|
||||||
vec scale = (repelem(L, n, 1) - square(Y_rep - repelem(y1, n, 1))) % W_vec % D_vec;
|
|
||||||
mat X_diff_scale = X_diff.each_col() % scale;
|
|
||||||
(*G) = X_diff_scale.t() * X_diff * V;
|
|
||||||
(*G) *= -2.0 / (h * h * n);
|
|
||||||
}
|
|
||||||
|
|
||||||
return loss;
|
|
||||||
}
|
|
||||||
|
|
||||||
//' Stiefel Optimization with curvilinear linesearch.
|
|
||||||
//'
|
|
||||||
//' TODO: finish doc. comment
|
|
||||||
//' Stiefel Optimization for \code{V} given a dataset \code{X} and responces
|
|
||||||
//' \code{Y} for the model \deqn{Y\sim g(B'X) + \epsilon}{Y ~ g(B'X) + epsilon}
|
|
||||||
//' to compute the matrix `B` such that \eqn{span{B} = span(V)^{\bot}}{%
|
|
||||||
//' span(B) = orth(span(B))}.
|
|
||||||
//' The curvilinear linesearch uses Armijo-Wolfe conditions:
|
|
||||||
// \deqn{L(V(tau)) > L(V(0)) + rho_1 * tau * L(V(0))'}
|
|
||||||
//' \deqn{L(V(tau))' < rho_2 * L(V(0))'}
|
|
||||||
//'
|
|
||||||
//' @param X data points
|
|
||||||
//' @param Y response
|
|
||||||
//' @param k assumed \eqn{rank(B)}
|
|
||||||
//' @param nObs parameter for bandwidth estimation, typical value
|
|
||||||
//' \code{nObs = nrow(X)^lambda} with \code{lambda} in the range [0.3, 0.8].
|
|
||||||
//' @param tau Initial step size
|
|
||||||
//' @param tol Tolerance for update error used for stopping criterion
|
|
||||||
//' @param maxIter Upper bound of optimization iterations
|
|
||||||
//'
|
|
||||||
//' @return List containing the bandwidth \code{h}, optimization objective \code{V}
|
|
||||||
//' and the matrix \code{B} estimated for the model as a orthogonal basis of the
|
|
||||||
//' orthogonal space spaned by \code{V}.
|
|
||||||
//'
|
|
||||||
//' @rdname optStiefel
|
|
||||||
//' @keywords internal
|
|
||||||
double optStiefel(
|
|
||||||
const arma::mat& X,
|
|
||||||
const arma::vec& Y,
|
|
||||||
const int k,
|
|
||||||
const double h,
|
|
||||||
const double tauInitial,
|
|
||||||
const double tol,
|
|
||||||
const int maxIter,
|
|
||||||
const double rho1,
|
|
||||||
const double rho2,
|
|
||||||
const int maxLineSeachIter,
|
|
||||||
arma::mat& V, // out
|
|
||||||
arma::vec& history // out
|
|
||||||
) {
|
|
||||||
using namespace arma;
|
|
||||||
|
|
||||||
// get dimensions
|
|
||||||
const uword n = X.n_rows; // nr samples
|
|
||||||
const uword p = X.n_cols; // dim of random variable `X`
|
|
||||||
const uword q = p - k; // rank(V) e.g. dim of ortho space of span{B}
|
|
||||||
|
|
||||||
// all `X_i - X_j` differences, `X_diff.row(i * n + j) = X_i - X_j`
|
|
||||||
mat X_diff(n * n, p);
|
|
||||||
for (uword i = 0, k = 0; i < n; ++i) {
|
|
||||||
for (uword j = 0; j < n; ++j) {
|
|
||||||
X_diff.row(k++) = X.row(i) - X.row(j);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
const vec Y_rep = repmat(Y, n, 1);
|
|
||||||
const mat I_p = eye<mat>(p, p);
|
|
||||||
const mat I_2q = eye<mat>(2 * q, 2 * q);
|
|
||||||
|
|
||||||
// initial start value for `V`
|
|
||||||
V = rStiefel(p, q);
|
|
||||||
|
|
||||||
// first gradient initialization
|
|
||||||
mat G;
|
|
||||||
double loss = gradient(X, X_diff, Y, Y_rep, V, h, &G);
|
|
||||||
|
|
||||||
// set first `loss` in history
|
|
||||||
history(0) = loss;
|
|
||||||
|
|
||||||
// main curvilinear optimization loop
|
|
||||||
double error = datum::inf;
|
|
||||||
int iter = 0;
|
|
||||||
while (iter++ < maxIter && error > tol) {
|
|
||||||
// helper matrices `lU` (linesearch U), `lV` (linesearch V)
|
|
||||||
// as describet in [Wen, Yin] Lemma 4.
|
|
||||||
mat lU = join_rows(G, V); // linesearch "U"
|
|
||||||
mat lV = join_rows(V, -1.0 * G); // linesearch "V"
|
|
||||||
// `A = G V' - V G'`
|
|
||||||
mat A = lU * lV.t();
|
|
||||||
|
|
||||||
// set initial step size for curvilinear line search
|
|
||||||
double tau = tauInitial, lower = 0., upper = datum::inf;
|
|
||||||
|
|
||||||
// check if `tau` is valid for inverting
|
|
||||||
|
|
||||||
// set line search internal gradient and loss to cycle for next iteration
|
|
||||||
mat V_tau; // next position after a step of size `tau`, a.k.a. `V(tau)`
|
|
||||||
mat G_tau; // gradient of `V` at `V(tau) = V_tau`
|
|
||||||
double loss_tau; // loss (objective) at position `V(tau)`
|
|
||||||
int lsIter = 0; // linesearch iter
|
|
||||||
// start line search
|
|
||||||
do {
|
|
||||||
mat HV = inv(I_2q + (tau/2.) * lV.t() * lU) * lV.t();
|
|
||||||
// next step `V`
|
|
||||||
V_tau = V - tau * (lU * (HV * V));
|
|
||||||
|
|
||||||
double LprimeV = -trace(G.t() * A * V);
|
|
||||||
|
|
||||||
mat lB = I_p - (tau / 2.) * lU * HV;
|
|
||||||
|
|
||||||
loss_tau = gradient(X, X_diff, Y, Y_rep, V_tau, h, &G_tau);
|
|
||||||
|
|
||||||
double LprimeV_tau = -2. * trace(G_tau.t() * lB * A * (V + V_tau));
|
|
||||||
|
|
||||||
// Armijo condition
|
|
||||||
if (loss_tau > loss + (rho1 * tau * LprimeV)) {
|
|
||||||
upper = tau;
|
|
||||||
tau = (lower + upper) / 2.;
|
|
||||||
// Wolfe condition
|
|
||||||
} else if (LprimeV_tau < rho2 * LprimeV) {
|
|
||||||
lower = tau;
|
|
||||||
if (upper == datum::inf) {
|
|
||||||
tau = 2. * lower;
|
|
||||||
} else {
|
|
||||||
tau = (lower + upper) / 2.;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
} while (++lsIter < maxLineSeachIter);
|
|
||||||
|
|
||||||
// compute error (break condition)
|
|
||||||
// Note: `error` is the decrease of the objective `L_n(V)` and not the
|
|
||||||
// norm of the gradient as proposed in [Wen, Yin] Algorithm 1.
|
|
||||||
error = loss - loss_tau;
|
|
||||||
|
|
||||||
// cycle `V`, `G` and `loss` for next iteration
|
|
||||||
V = V_tau;
|
|
||||||
loss = loss_tau;
|
|
||||||
G = G_tau;
|
|
||||||
|
|
||||||
// store final `loss`
|
|
||||||
history(iter) = loss;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
return loss;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
//' Conditional Variance Estimation (CVE) method.
|
|
||||||
//'
|
|
||||||
//' This version uses a curvilinear linesearch for the stiefel optimization.
|
|
||||||
//'
|
|
||||||
//' @param X data points
|
|
||||||
//' @param Y response
|
|
||||||
//' @param k assumed \eqn{rank(B)}
|
|
||||||
//' @param nObs parameter for bandwidth estimation, typical value
|
|
||||||
//' \code{nObs = nrow(X)^lambda} with \code{lambda} in the range [0.3, 0.8].
|
|
||||||
//' @param tau Initial step size (default 1)
|
|
||||||
//' @param tol Tolerance for update error used for stopping criterion (default 1e-5)
|
|
||||||
//' @param slack Ratio of small negative error allowed in loss optimization (default -1e-10)
|
|
||||||
//' @param maxIter Upper bound of optimization iterations (default 50)
|
|
||||||
//' @param attempts Number of tryes with new random optimization starting points (default 10)
|
|
||||||
//'
|
|
||||||
//' @return List containing the bandwidth \code{h}, optimization objective \code{V}
|
|
||||||
//' and the matrix \code{B} estimated for the model as a orthogonal basis of the
|
|
||||||
//' orthogonal space spaned by \code{V}.
|
|
||||||
//'
|
|
||||||
//' @rdname cve_cpp_V2
|
|
||||||
//' @export
|
|
||||||
// [[Rcpp::export]]
|
|
||||||
Rcpp::List cve_cpp(
|
|
||||||
const arma::mat& X,
|
|
||||||
const arma::vec& Y,
|
|
||||||
const int k,
|
|
||||||
const double nObs,
|
|
||||||
const double tauInitial = 1.,
|
|
||||||
const double rho1 = 0.05,
|
|
||||||
const double rho2 = 0.95,
|
|
||||||
const double tol = 1e-6,
|
|
||||||
const int maxIter = 50,
|
|
||||||
const int maxLineSeachIter = 10,
|
|
||||||
const int attempts = 10
|
|
||||||
) {
|
|
||||||
using namespace arma;
|
|
||||||
|
|
||||||
// tracker of current best results
|
|
||||||
mat V_best;
|
|
||||||
double loss_best = datum::inf;
|
|
||||||
// estimate bandwidth
|
|
||||||
double h = estimateBandwidth(X, k, nObs);
|
|
||||||
|
|
||||||
// loss history for each optimization attempt
|
|
||||||
// each column contaions the iteration history for the loss
|
|
||||||
mat history = mat(maxIter + 1, attempts);
|
|
||||||
|
|
||||||
// multiple stiefel optimization attempts
|
|
||||||
for (int i = 0; i < attempts; ++i) {
|
|
||||||
// declare output variables
|
|
||||||
mat V; // estimated `V` space
|
|
||||||
vec hist = vec(history.n_rows, fill::zeros); // optimization history
|
|
||||||
double loss = optStiefel(X, Y, k, h,
|
|
||||||
tauInitial, tol, maxIter, rho1, rho2, maxLineSeachIter, V, hist
|
|
||||||
);
|
|
||||||
if (loss < loss_best) {
|
|
||||||
loss_best = loss;
|
|
||||||
V_best = V;
|
|
||||||
}
|
|
||||||
// write history to history collection
|
|
||||||
history.col(i) = hist;
|
|
||||||
}
|
|
||||||
|
|
||||||
// get `B` as kernal of `V.t()`
|
|
||||||
mat B = null(V_best.t());
|
|
||||||
|
|
||||||
return Rcpp::List::create(
|
|
||||||
Rcpp::Named("history") = history,
|
|
||||||
Rcpp::Named("loss") = loss_best,
|
|
||||||
Rcpp::Named("h") = h,
|
|
||||||
Rcpp::Named("V") = V_best,
|
|
||||||
Rcpp::Named("B") = B
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*** R
|
|
||||||
|
|
||||||
source("CVE/R/datasets.R")
|
|
||||||
ds <- dataset()
|
|
||||||
|
|
||||||
print(system.time(
|
|
||||||
cve.res <- cve_cpp(
|
|
||||||
X = ds$X,
|
|
||||||
Y = ds$Y,
|
|
||||||
k = ncol(ds$B),
|
|
||||||
nObs = sqrt(nrow(ds$X))
|
|
||||||
)
|
|
||||||
))
|
|
||||||
|
|
||||||
pdf('plots/cve_V2_history.pdf')
|
|
||||||
H <- cve.res$history
|
|
||||||
H_i <- H[H[, 1] > 0, 1]
|
|
||||||
plot(1:length(H_i), H_i,
|
|
||||||
main = "History cve_V2",
|
|
||||||
xlab = "Iterations i",
|
|
||||||
ylab = expression(loss == L[n](V^{(i)})),
|
|
||||||
xlim = c(1, nrow(H)),
|
|
||||||
ylim = c(0, max(H)),
|
|
||||||
type = "l"
|
|
||||||
)
|
|
||||||
for (i in 2:ncol(H)) {
|
|
||||||
H_i <- H[H[, i] > 0, i]
|
|
||||||
lines(1:length(H_i), H_i)
|
|
||||||
}
|
|
||||||
x.ends <- apply(H, 2, function(h) { length(h[h > 0]) })
|
|
||||||
y.ends <- apply(H, 2, function(h) { min(h[h > 0]) })
|
|
||||||
points(x.ends, y.ends)
|
|
||||||
|
|
||||||
*/
|
|
|
@ -1,5 +1,7 @@
|
||||||
# Usage:
|
# Usage:
|
||||||
# ~$ Rscript runtime_test.R
|
# ~$ Rscript runtime_test.R
|
||||||
|
# library(CVEpureR) # load CVE's pure R implementation
|
||||||
|
library(CVE) # load CVE
|
||||||
|
|
||||||
#' Writes log information to console. (to not get bored^^)
|
#' Writes log information to console. (to not get bored^^)
|
||||||
tell.user <- function(name, start.time, i, length) {
|
tell.user <- function(name, start.time, i, length) {
|
||||||
|
@ -24,7 +26,6 @@ dataset.names <- c("M1", "M2", "M3", "M4", "M5")
|
||||||
# Set used CVE method
|
# Set used CVE method
|
||||||
methods <- c("simple") # c("legacy", "simple", "sgd", "linesearch")
|
methods <- c("simple") # c("legacy", "simple", "sgd", "linesearch")
|
||||||
|
|
||||||
library(CVE) # load CVE
|
|
||||||
if ("legacy" %in% methods) {
|
if ("legacy" %in% methods) {
|
||||||
# Source legacy code (but only if needed)
|
# Source legacy code (but only if needed)
|
||||||
source("CVE_legacy/function_script.R")
|
source("CVE_legacy/function_script.R")
|
||||||
|
|
Loading…
Reference in New Issue