add: runtime test,
add: new CVE pure R implementation, fix: small adaptation of legacy code to make it run, wip: ...
This commit is contained in:
parent
a796a38799
commit
7155d0e9db
|
@ -5,7 +5,7 @@ Version: 1.0
|
|||
Date: 2019-07-29
|
||||
Author: Loki
|
||||
Maintainer: Loki <loki@no.mail>
|
||||
Description: More about what it does (maybe more than one line)
|
||||
Description: Implementation of the Conditional Variance Estimation (CVE) method. This package version is writen by using Rcpp, RcppArmadillo.
|
||||
License: GPL-3
|
||||
Imports: Rcpp (>= 1.0.2)
|
||||
LinkingTo: Rcpp, RcppArmadillo
|
||||
|
|
|
@ -94,6 +94,8 @@ rStiefel <- function(p, q) {
|
|||
#' @param k assumed \eqn{rank(B)}
|
||||
#' @param nObs parameter for bandwidth estimation, typical value
|
||||
#' \code{nObs = nrow(X)^lambda} with \code{lambda} in the range [0.3, 0.8].
|
||||
#' @param h Bandwidth, if not specified \code{nObs} is used to compute a bandwidth.
|
||||
#' on the other hand if given (positive floating point number) \code{nObs} is ignored.
|
||||
#' @param tau Initial step size (default 1)
|
||||
#' @param tol Tolerance for update error used for stopping criterion (default 1e-5)
|
||||
#' @param slack Ratio of small negative error allowed in loss optimization (default -1e-10)
|
||||
|
@ -105,7 +107,7 @@ rStiefel <- function(p, q) {
|
|||
#' orthogonal space spaned by \code{V}.
|
||||
#'
|
||||
#' @keywords internal
|
||||
cve_cpp <- function(X, Y, method, k, nObs, tauInitial = 1., rho1 = 0.1, rho2 = 0.9, tol = 1e-5, maxIter = 50L, maxLineSearchIter = 10L, attempts = 10L) {
|
||||
.Call('_CVE_cve_cpp', PACKAGE = 'CVE', X, Y, method, k, nObs, tauInitial, rho1, rho2, tol, maxIter, maxLineSearchIter, attempts)
|
||||
cve_cpp <- function(X, Y, method, k, nObs, h = -1., tauInitial = 1., rho1 = 0.1, rho2 = 0.9, tol = 1e-5, maxIter = 50L, maxLineSearchIter = 10L, attempts = 10L) {
|
||||
.Call('_CVE_cve_cpp', PACKAGE = 'CVE', X, Y, method, k, nObs, h, tauInitial, rho1, rho2, tol, maxIter, maxLineSearchIter, attempts)
|
||||
}
|
||||
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
\alias{cve_cpp}
|
||||
\title{Conditional Variance Estimation (CVE) method.}
|
||||
\usage{
|
||||
cve_cpp(X, Y, method, k, nObs, tauInitial = 1, rho1 = 0.1,
|
||||
cve_cpp(X, Y, method, k, nObs, h = -1, tauInitial = 1, rho1 = 0.1,
|
||||
rho2 = 0.9, tol = 1e-05, maxIter = 50L, maxLineSearchIter = 10L,
|
||||
attempts = 10L)
|
||||
}
|
||||
|
@ -18,6 +18,9 @@ cve_cpp(X, Y, method, k, nObs, tauInitial = 1, rho1 = 0.1,
|
|||
\item{nObs}{parameter for bandwidth estimation, typical value
|
||||
\code{nObs = nrow(X)^lambda} with \code{lambda} in the range [0.3, 0.8].}
|
||||
|
||||
\item{h}{Bandwidth, if not specified \code{nObs} is used to compute a bandwidth.
|
||||
on the other hand if given (positive floating point number) \code{nObs} is ignored.}
|
||||
|
||||
\item{tol}{Tolerance for update error used for stopping criterion (default 1e-5)}
|
||||
|
||||
\item{maxIter}{Upper bound of optimization iterations (default 50)}
|
||||
|
|
|
@ -342,6 +342,8 @@ double optStiefel_linesearch(
|
|||
//' @param k assumed \eqn{rank(B)}
|
||||
//' @param nObs parameter for bandwidth estimation, typical value
|
||||
//' \code{nObs = nrow(X)^lambda} with \code{lambda} in the range [0.3, 0.8].
|
||||
//' @param h Bandwidth, if not specified \code{nObs} is used to compute a bandwidth.
|
||||
//' on the other hand if given (positive floating point number) \code{nObs} is ignored.
|
||||
//' @param tau Initial step size (default 1)
|
||||
//' @param tol Tolerance for update error used for stopping criterion (default 1e-5)
|
||||
//' @param slack Ratio of small negative error allowed in loss optimization (default -1e-10)
|
||||
|
@ -360,6 +362,7 @@ Rcpp::List cve_cpp(
|
|||
const std::string method,
|
||||
const int k,
|
||||
const double nObs,
|
||||
double h = -1., // default value to be overwritten
|
||||
const double tauInitial = 1.,
|
||||
const double rho1 = 0.1,
|
||||
const double rho2 = 0.9,
|
||||
|
@ -374,7 +377,9 @@ Rcpp::List cve_cpp(
|
|||
mat V_best;
|
||||
double loss_best = datum::inf;
|
||||
// estimate bandwidth
|
||||
double h = estimateBandwidth(X, k, nObs);
|
||||
if (h <= 0.0) {
|
||||
h = estimateBandwidth(X, k, nObs);
|
||||
}
|
||||
|
||||
// loss history for each optimization attempt
|
||||
// each column contaions the iteration history for the loss
|
||||
|
|
|
@ -32,8 +32,8 @@ BEGIN_RCPP
|
|||
END_RCPP
|
||||
}
|
||||
// cve_cpp
|
||||
Rcpp::List cve_cpp(const arma::mat& X, const arma::vec& Y, const std::string method, const int k, const double nObs, const double tauInitial, const double rho1, const double rho2, const double tol, const int maxIter, const int maxLineSearchIter, const int attempts);
|
||||
RcppExport SEXP _CVE_cve_cpp(SEXP XSEXP, SEXP YSEXP, SEXP methodSEXP, SEXP kSEXP, SEXP nObsSEXP, SEXP tauInitialSEXP, SEXP rho1SEXP, SEXP rho2SEXP, SEXP tolSEXP, SEXP maxIterSEXP, SEXP maxLineSearchIterSEXP, SEXP attemptsSEXP) {
|
||||
Rcpp::List cve_cpp(const arma::mat& X, const arma::vec& Y, const std::string method, const int k, const double nObs, double h, const double tauInitial, const double rho1, const double rho2, const double tol, const int maxIter, const int maxLineSearchIter, const int attempts);
|
||||
RcppExport SEXP _CVE_cve_cpp(SEXP XSEXP, SEXP YSEXP, SEXP methodSEXP, SEXP kSEXP, SEXP nObsSEXP, SEXP hSEXP, SEXP tauInitialSEXP, SEXP rho1SEXP, SEXP rho2SEXP, SEXP tolSEXP, SEXP maxIterSEXP, SEXP maxLineSearchIterSEXP, SEXP attemptsSEXP) {
|
||||
BEGIN_RCPP
|
||||
Rcpp::RObject rcpp_result_gen;
|
||||
Rcpp::RNGScope rcpp_rngScope_gen;
|
||||
|
@ -42,6 +42,7 @@ BEGIN_RCPP
|
|||
Rcpp::traits::input_parameter< const std::string >::type method(methodSEXP);
|
||||
Rcpp::traits::input_parameter< const int >::type k(kSEXP);
|
||||
Rcpp::traits::input_parameter< const double >::type nObs(nObsSEXP);
|
||||
Rcpp::traits::input_parameter< double >::type h(hSEXP);
|
||||
Rcpp::traits::input_parameter< const double >::type tauInitial(tauInitialSEXP);
|
||||
Rcpp::traits::input_parameter< const double >::type rho1(rho1SEXP);
|
||||
Rcpp::traits::input_parameter< const double >::type rho2(rho2SEXP);
|
||||
|
@ -49,7 +50,7 @@ BEGIN_RCPP
|
|||
Rcpp::traits::input_parameter< const int >::type maxIter(maxIterSEXP);
|
||||
Rcpp::traits::input_parameter< const int >::type maxLineSearchIter(maxLineSearchIterSEXP);
|
||||
Rcpp::traits::input_parameter< const int >::type attempts(attemptsSEXP);
|
||||
rcpp_result_gen = Rcpp::wrap(cve_cpp(X, Y, method, k, nObs, tauInitial, rho1, rho2, tol, maxIter, maxLineSearchIter, attempts));
|
||||
rcpp_result_gen = Rcpp::wrap(cve_cpp(X, Y, method, k, nObs, h, tauInitial, rho1, rho2, tol, maxIter, maxLineSearchIter, attempts));
|
||||
return rcpp_result_gen;
|
||||
END_RCPP
|
||||
}
|
||||
|
@ -57,7 +58,7 @@ END_RCPP
|
|||
static const R_CallMethodDef CallEntries[] = {
|
||||
{"_CVE_estimateBandwidth", (DL_FUNC) &_CVE_estimateBandwidth, 3},
|
||||
{"_CVE_rStiefel", (DL_FUNC) &_CVE_rStiefel, 2},
|
||||
{"_CVE_cve_cpp", (DL_FUNC) &_CVE_cve_cpp, 12},
|
||||
{"_CVE_cve_cpp", (DL_FUNC) &_CVE_cve_cpp, 13},
|
||||
{NULL, NULL, 0}
|
||||
};
|
||||
|
||||
|
|
|
@ -0,0 +1,11 @@
|
|||
Package: CVE
|
||||
Type: Package
|
||||
Title: Conditional Variance Estimator for Sufficient Dimension Reduction
|
||||
Version: 0.1
|
||||
Date: 2019-08-29
|
||||
Author: Loki
|
||||
Maintainer: Loki <loki@no.mail>
|
||||
Description: Implementation of the Conditional Variance Estimation (CVE) method. This package version is writen in pure R.
|
||||
License: GPL-3
|
||||
Encoding: UTF-8
|
||||
RoxygenNote: 6.1.1
|
|
@ -0,0 +1,24 @@
|
|||
# Generated by roxygen2: do not edit by hand
|
||||
|
||||
S3method(plot,cve)
|
||||
S3method(summary,cve)
|
||||
export(col.pair.apply)
|
||||
export(cve)
|
||||
export(cve.call)
|
||||
export(cve.grid.search)
|
||||
export(cve_sgd)
|
||||
export(cve_simple)
|
||||
export(dataset)
|
||||
export(elem.pairs)
|
||||
export(estimate.bandwidth)
|
||||
export(grad)
|
||||
export(null)
|
||||
export(rStiefl)
|
||||
export(row.pair.apply)
|
||||
import(stats)
|
||||
importFrom(graphics,lines)
|
||||
importFrom(graphics,plot)
|
||||
importFrom(graphics,points)
|
||||
importFrom(stats,model.frame)
|
||||
importFrom(stats,rbinom)
|
||||
importFrom(stats,rnorm)
|
|
@ -0,0 +1,188 @@
|
|||
#' Implementation of the CVE method.
|
||||
#'
|
||||
#' Conditional Variance Estimator (CVE) is a novel sufficient dimension
|
||||
#' reduction (SDR) method assuming a model
|
||||
#' \deqn{Y \sim g(B'X) + \epsilon}{Y ~ g(B'X) + epsilon}
|
||||
#' where B'X is a lower dimensional projection of the predictors.
|
||||
#'
|
||||
#' @param formula Formel for the regression model defining `X`, `Y`.
|
||||
#' See: \code{\link{formula}}.
|
||||
#' @param data data.frame holding data for formula.
|
||||
#' @param method The different only differe in the used optimization.
|
||||
#' All of them are Gradient based optimization on a Stiefel manifold.
|
||||
#' \itemize{
|
||||
#' \item "simple" Simple reduction of stepsize.
|
||||
#' \item "sgd" stocastic gradient decent.
|
||||
#' \item TODO: further
|
||||
#' }
|
||||
#' @param ... Further parameters depending on the used method.
|
||||
#' @examples
|
||||
#' library(CVE)
|
||||
#'
|
||||
#' # sample dataset
|
||||
#' ds <- dataset("M5")
|
||||
#'
|
||||
#' # call ´cve´ with default method (aka "simple")
|
||||
#' dr.simple <- cve(ds$Y ~ ds$X, k = ncol(ds$B))
|
||||
#' # plot optimization history (loss via iteration)
|
||||
#' plot(dr.simple, main = "CVE M5 simple")
|
||||
#'
|
||||
#' # call ´cve´ with method "linesearch" using ´data.frame´ as data.
|
||||
#' data <- data.frame(Y = ds$Y, X = ds$X)
|
||||
#' # Note: ´Y, X´ are NOT defined, they are extracted from ´data´.
|
||||
#' dr.linesearch <- cve(Y ~ ., data, method = "linesearch", k = ncol(ds$B))
|
||||
#' plot(dr.linesearch, main = "CVE M5 linesearch")
|
||||
#'
|
||||
#' @references Fertl L., Bura E. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019
|
||||
#'
|
||||
#' @seealso \code{\link{formula}}. For a complete parameters list (dependent on
|
||||
#' the method) see \code{\link{cve_simple}}, \code{\link{cve_sgd}}
|
||||
#' @import stats
|
||||
#' @importFrom stats model.frame
|
||||
#' @export
|
||||
cve <- function(formula, data, method = "simple", ...) {
|
||||
# check for type of `data` if supplied and set default
|
||||
if (missing(data)) {
|
||||
data <- environment(formula)
|
||||
} else if (!is.data.frame(data)) {
|
||||
stop('Parameter `data` must be a `data.frame` or missing.')
|
||||
}
|
||||
|
||||
# extract `X`, `Y` from `formula` with `data`
|
||||
model <- stats::model.frame(formula, data)
|
||||
X <- as.matrix(model[,-1, drop = FALSE])
|
||||
Y <- as.matrix(model[, 1, drop = FALSE])
|
||||
|
||||
# pass extracted data on to [cve.call()]
|
||||
dr <- cve.call(X, Y, method = method, ...)
|
||||
|
||||
# overwrite `call` property from [cve.call()]
|
||||
dr$call <- match.call()
|
||||
return(dr)
|
||||
}
|
||||
|
||||
#' @param nObs as describet in the Paper.
|
||||
#' @param X Data
|
||||
#' @param Y Responces
|
||||
#' @param nObs Like in the paper.
|
||||
#' @param k guess for SDR dimension.
|
||||
#' @param ... Method specific parameters.
|
||||
#' @rdname cve
|
||||
#' @export
|
||||
cve.call <- function(X, Y, method = "simple", nObs = nrow(X)^.5, k, ...) {
|
||||
|
||||
# TODO: replace default value of `k` by `max.dim` when fast enough
|
||||
if (missing(k)) {
|
||||
stop("TODO: parameter `k` (rank(B)) is required, replace by `max.dim`.")
|
||||
}
|
||||
|
||||
# parameter checking
|
||||
if (!is.matrix(X)) {
|
||||
stop('X should be a matrices.')
|
||||
}
|
||||
if (is.matrix(Y)) {
|
||||
Y <- as.vector(Y)
|
||||
}
|
||||
if (nrow(X) != length(Y)) {
|
||||
stop('Rows of X and number of Y elements are not compatible.')
|
||||
}
|
||||
if (ncol(X) < 2) {
|
||||
stop('X is one dimensional, no need for dimension reduction.')
|
||||
}
|
||||
|
||||
# Call specified method.
|
||||
method <- tolower(method)
|
||||
if (method == 'simple') {
|
||||
dr <- cve_simple(X, Y, k, nObs = nObs, ...)
|
||||
} else if (method == 'sgd') {
|
||||
dr <- cve_sgd(X, Y, k, nObs = nObs, ...)
|
||||
} else {
|
||||
stop('Got unknown method.')
|
||||
}
|
||||
|
||||
# augment result information
|
||||
dr$method <- method
|
||||
dr$call <- match.call()
|
||||
class(dr) <- "cve"
|
||||
return(dr)
|
||||
}
|
||||
|
||||
# TODO: write summary
|
||||
# summary.cve <- function() {
|
||||
# # code #
|
||||
# }
|
||||
|
||||
#' Ploting helper for objects of class \code{cve}.
|
||||
#'
|
||||
#' @param x Object of class \code{cve} (result of [cve()]).
|
||||
#' @param content Specifies what to plot:
|
||||
#' \itemize{
|
||||
#' \item "history" Plots the loss history from stiefel optimization
|
||||
#' (default).
|
||||
#' \item ... TODO: add (if there are any)
|
||||
#' }
|
||||
#' @param ... Pass through parameters to [plot()] and [lines()]
|
||||
#'
|
||||
#' @usage ## S3 method for class 'cve'
|
||||
#' plot(x, content = "history", ...)
|
||||
#' @seealso see \code{\link{par}} for graphical parameters to pass through
|
||||
#' as well as \code{\link{plot}} for standard plot utility.
|
||||
#' @importFrom graphics plot lines points
|
||||
#' @method plot cve
|
||||
#' @export
|
||||
plot.cve <- function(x, ...) {
|
||||
|
||||
H <- x$history
|
||||
H_1 <- H[!is.na(H[, 1]), 1]
|
||||
|
||||
defaults <- list(
|
||||
main = "History",
|
||||
xlab = "Iterations i",
|
||||
ylab = expression(loss == L[n](V^{(i)})),
|
||||
xlim = c(1, nrow(H)),
|
||||
ylim = c(0, max(H)),
|
||||
type = "l"
|
||||
)
|
||||
|
||||
call.plot <- match.call()
|
||||
keys <- names(defaults)
|
||||
keys <- keys[match(keys, names(call.plot)[-1], nomatch = 0) == 0]
|
||||
|
||||
for (key in keys) {
|
||||
call.plot[[key]] <- defaults[[key]]
|
||||
}
|
||||
|
||||
call.plot[[1L]] <- quote(plot)
|
||||
call.plot$x <- quote(1:length(H_1))
|
||||
call.plot$y <- quote(H_1)
|
||||
|
||||
eval(call.plot)
|
||||
|
||||
if (ncol(H) > 1) {
|
||||
for (i in 2:ncol(H)) {
|
||||
H_i <- H[H[, i] > 0, i]
|
||||
lines(1:length(H_i), H_i)
|
||||
}
|
||||
}
|
||||
x.ends <- apply(H, 2, function(h) { length(h[!is.na(h)]) })
|
||||
y.ends <- apply(H, 2, function(h) { tail(h[!is.na(h)], n=1) })
|
||||
points(x.ends, y.ends)
|
||||
}
|
||||
|
||||
#' Prints a summary of a \code{cve} result.
|
||||
#' @param object Instance of 'cve' as return of \code{cve}.
|
||||
#' @method summary cve
|
||||
#' @export
|
||||
summary.cve <- function(object, ...) {
|
||||
cat('Summary of CVE result - Method: "', object$method, '"\n',
|
||||
'\n',
|
||||
'Dataset size: ', nrow(object$X), '\n',
|
||||
'Data Dimension: ', ncol(object$X), '\n',
|
||||
'SDR Dimension: ', object$k, '\n',
|
||||
'loss: ', object$loss, '\n',
|
||||
'\n',
|
||||
'Called via:\n',
|
||||
' ',
|
||||
sep='')
|
||||
print(object$call)
|
||||
}
|
|
@ -0,0 +1,90 @@
|
|||
#' Simple implementation of the CVE method. 'Simple' means that this method is
|
||||
#' a classic GD method unsing no further tricks.
|
||||
#'
|
||||
#' @keywords internal
|
||||
#' @export
|
||||
cve_sgd <- function(X, Y, k,
|
||||
nObs = sqrt(nrow(X)),
|
||||
h = NULL,
|
||||
tau = 0.01,
|
||||
epochs = 50L,
|
||||
batch.size = 16L,
|
||||
attempts = 10L
|
||||
) {
|
||||
# Get dimensions.
|
||||
n <- nrow(X) # Number of samples.
|
||||
p <- ncol(X) # Data dimensions
|
||||
q <- p - k # Complement dimension of the SDR space.
|
||||
|
||||
# Save initial learning rate `tau`.
|
||||
tau.init <- tau
|
||||
|
||||
# Estaimate bandwidth if not given.
|
||||
if (missing(h) | !is.numeric(h)) {
|
||||
h <- estimate.bandwidth(X, k, nObs)
|
||||
}
|
||||
|
||||
# Init a list of data indices (shuffled for batching).
|
||||
indices <- seq(n)
|
||||
I_p <- diag(1, p)
|
||||
|
||||
# Init tracking of current best (according multiple attempts).
|
||||
V.best <- NULL
|
||||
loss.best <- Inf
|
||||
|
||||
# Start loop for multiple attempts.
|
||||
for (attempt in 1:attempts) {
|
||||
# Reset learning rate `tau`.
|
||||
tau <- tau.init
|
||||
|
||||
# Sample a starting basis from the Stiefl manifold.
|
||||
V <- rStiefl(p, q)
|
||||
|
||||
# Repeat `epochs` times
|
||||
for (epoch in 1:epochs) {
|
||||
# Shuffle batches
|
||||
batch.shuffle <- sample(indices)
|
||||
|
||||
# Make a step for each batch.
|
||||
for (start in seq(1, n, batch.size)) {
|
||||
# Select batch data indices.
|
||||
batch <- batch.shuffle[start:(start + batch.size - 1)]
|
||||
# Remove `NA`'s (when `n` isn't a multiple of `batch.size`).
|
||||
batch <- batch[!is.na(batch)]
|
||||
|
||||
# Compute batch gradient.
|
||||
loss <- NULL
|
||||
G <- grad(X[batch, ], Y[batch], V, h)
|
||||
|
||||
# Cayley transform matrix.
|
||||
A <- (G %*% t(V)) - (V %*% t(G))
|
||||
|
||||
# Apply learning rate `tau`.
|
||||
A.tau <- tau * A
|
||||
# Parallet transport (on Stiefl manifold) into direction of `G`.
|
||||
V <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V)
|
||||
}
|
||||
}
|
||||
# Compute actuall loss after finishing optimization.
|
||||
loss <- grad(X, Y, V, h, loss.only = TRUE)
|
||||
# After each attempt, check if last attempt reached a better result.
|
||||
if (!is.null(V.best)) { # Only required if there is already a result.
|
||||
if (loss < loss.best) {
|
||||
loss.best <- loss
|
||||
V.best <- V
|
||||
}
|
||||
} else {
|
||||
loss.best <- loss
|
||||
V.best <- V
|
||||
}
|
||||
}
|
||||
|
||||
return(list(
|
||||
X = X, Y = Y, k = k,
|
||||
nObs = nObs, h = h, tau = tau,
|
||||
epochs = epochs, batch = batch, attempts = attempts,
|
||||
loss = loss.best,
|
||||
V = V.best,
|
||||
B = null(V.best)
|
||||
))
|
||||
}
|
|
@ -0,0 +1,137 @@
|
|||
#' Simple implementation of the CVE method. 'Simple' means that this method is
|
||||
#' a classic GD method unsing no further tricks.
|
||||
#'
|
||||
#' @keywords internal
|
||||
#' @export
|
||||
cve_simple <- function(X, Y, k,
|
||||
nObs = sqrt(nrow(X)),
|
||||
h = NULL,
|
||||
tau = 1.0,
|
||||
tol = 1e-3,
|
||||
slack = 0,
|
||||
epochs = 50L,
|
||||
attempts = 10L
|
||||
) {
|
||||
# Addapt tolearance for break condition
|
||||
tol <- sqrt(2 * k) * tol
|
||||
tau.init <- tau # remember to reset for new attempt
|
||||
|
||||
# Get dimensions.
|
||||
n <- nrow(X)
|
||||
p <- ncol(X)
|
||||
q <- p - k
|
||||
|
||||
# Estaimate bandwidth if not given.
|
||||
if (missing(h) | !is.numeric(h)) {
|
||||
h <- estimate.bandwidth(X, k, nObs)
|
||||
}
|
||||
|
||||
# Compue all static data.
|
||||
X_diff <- row.pair.apply(X, `-`)
|
||||
index <- matrix(seq(n * n), n, n)
|
||||
tri.i <- row.pair.apply(index[, 1, drop = FALSE], function(i, j) { i })
|
||||
tri.j <- row.pair.apply(index[, 1, drop = FALSE], function(i, j) { j })
|
||||
lower.tri.ind <- index[lower.tri(index)]
|
||||
upper.tri.ind <- t(index)[lower.tri.ind] # ATTENTION: corret order
|
||||
|
||||
I_p <- diag(1, p)
|
||||
|
||||
# Init variables for best attempt
|
||||
loss.best <- Inf
|
||||
V.best <- NULL
|
||||
|
||||
# Take a view attempts with different starting values
|
||||
for (attempt in 1:attempts) {
|
||||
|
||||
# reset step width `tau`
|
||||
tau <- tau.init
|
||||
|
||||
# Sample a `(p, q)` dimensional matrix from the stiefel manifold as
|
||||
# optimization start value.
|
||||
V <- rStiefl(p, q)
|
||||
|
||||
## Initial loss calculation
|
||||
# Orthogonal projection to `span(V)`.
|
||||
Q <- I_p - (V %*% t(V))
|
||||
|
||||
# Compute vectorized distance matrix `D`.
|
||||
vecD <- rowSums((X_diff %*% Q)^2)
|
||||
# Compute weights matrix `W`
|
||||
W <- matrix(1, n, n) # init (`exp(0) = 1` in the diagonal)
|
||||
W[lower.tri.ind] <- exp(vecD / (-2 * h)) # set lower triangular part
|
||||
W[upper.tri.ind] <- t(W)[upper.tri.ind] # mirror to upper triangular part
|
||||
W <- sweep(W, 2, colSums(W), FUN = `/`) # normalize columns
|
||||
|
||||
# Weighted `Y` momentums
|
||||
y1 <- Y %*% W # is 1D anyway, avoid transposing `W`
|
||||
y2 <- Y^2 %*% W
|
||||
|
||||
# Get per sample loss `L(V, X_i)`
|
||||
L <- y2 - y1^2
|
||||
# Sum to tolal loss `L(V)`
|
||||
loss <- mean(L)
|
||||
|
||||
## Start optimization loop.
|
||||
for (iter in 1:epochs) {
|
||||
|
||||
# index according a lower triangular matrix stored in column major order
|
||||
# by only the `i` or `j` index.
|
||||
# vecW <- lower.tri.vector(W) + upper.tri.vector(W)
|
||||
vecW <- W[lower.tri.ind] + W[upper.tri.ind]
|
||||
S <- (L[tri.j] - (Y[tri.i] - y1[tri.j])^2) * vecW * vecD
|
||||
|
||||
# Gradient
|
||||
G <- t(X_diff) %*% sweep(X_diff %*% V, 1, S, `*`);
|
||||
G <- (-2 / (n * h^2)) * G
|
||||
|
||||
# Cayley transform matrix `A`
|
||||
A <- (G %*% t(V)) - (V %*% t(G))
|
||||
|
||||
# Compute next `V` by step size `tau` unsing the Cayley transform
|
||||
# via a parallel transport into the gradient direction.
|
||||
A.tau <- tau * A
|
||||
V.tau <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V)
|
||||
|
||||
# Orthogonal projection to `span(V.tau)`.
|
||||
Q <- I_p - (V.tau %*% t(V.tau))
|
||||
|
||||
# Compute vectorized distance matrix `D`.
|
||||
vecD <- rowSums((X_diff %*% Q)^2)
|
||||
# Compute weights matrix `W` (only update values, diag keeps 1's)
|
||||
W[lower.tri.ind] <- exp(vecD / (-2 * h)) # set lower triangular part
|
||||
W[upper.tri.ind] <- t(W)[upper.tri.ind] # mirror to upper triangular part
|
||||
W <- sweep(W, 2, colSums(W), FUN = `/`) # normalize columns
|
||||
|
||||
# Weighted `Y` momentums
|
||||
y1 <- Y %*% W # is 1D anyway, avoid transposing `W`
|
||||
y2 <- Y^2 %*% W
|
||||
|
||||
# Get per sample loss `L(V, X_i)`
|
||||
L <- y2 - y1^2
|
||||
# Sum to tolal loss `L(V)`
|
||||
loss.tau <- mean(L)
|
||||
|
||||
# Check if step is appropriate
|
||||
if (loss != Inf & loss.tau - loss > slack * loss) {
|
||||
tau <- tau / 2
|
||||
} else {
|
||||
loss <- loss.tau
|
||||
V <- V.tau
|
||||
}
|
||||
}
|
||||
|
||||
# Check if current attempt improved previous ones
|
||||
if (loss.tau < loss.best) {
|
||||
loss.best <- loss.tau
|
||||
V.best <- V.tau
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return(list(
|
||||
loss = loss.best,
|
||||
V = V.best,
|
||||
B = null(V.best),
|
||||
h = h
|
||||
))
|
||||
}
|
|
@ -0,0 +1,109 @@
|
|||
#' Generates test datasets.
|
||||
#'
|
||||
#' Provides sample datasets. There are 5 different datasets named
|
||||
#' M1, M2, M3, M4 and M5 describet in the paper references below.
|
||||
#' The general model is given by:
|
||||
#' \deqn{Y ~ g(B'X) + \epsilon}
|
||||
#'
|
||||
#' @param name One of \code{"M1"}, \code{"M2"}, \code{"M3"}, \code{"M4"} or \code{"M5"}
|
||||
#' @param n nr samples
|
||||
#' @param p Dim. of random variable \code{X}.
|
||||
#' @param p.mix Only for \code{"M4"}, see: below.
|
||||
#' @param lambda Only for \code{"M4"}, see: below.
|
||||
#'
|
||||
#' @return List with elements
|
||||
#' \itemize{
|
||||
#' \item{X}{data}
|
||||
#' \item{Y}{response}
|
||||
#' \item{B}{Used dim-reduction matrix}
|
||||
#' \item{name}{Name of the dataset (name parameter)}
|
||||
#' }
|
||||
#'
|
||||
#' @section M1:
|
||||
#' The data follows \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} for a subspace
|
||||
#' dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points.
|
||||
#' The link function \eqn{g} is given as
|
||||
#' \deqn{g(x) = \frac{x_1}{0.5 + (x_2 + 1.5)^2} + 0.5\epsilon}{g(x) = x_1 / (0.5 + (x_2 + 1.5)^2) + 0.5 epsilon}
|
||||
#' @section M2:
|
||||
#' \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} with \eqn{k = 2} with a default of \eqn{n = 200} data points.
|
||||
#' The link function \eqn{g} is given as
|
||||
#' \deqn{g(x) = x_1 x_2^2 + 0.5\epsilon}{g(x) = x_1 x_2^2 + 0.5 epsilon}
|
||||
#' @section M3:
|
||||
#' TODO:
|
||||
#' @section M4:
|
||||
#' TODO:
|
||||
#' @section M5:
|
||||
#' TODO:
|
||||
#'
|
||||
#' @import stats
|
||||
#' @importFrom stats rnorm rbinom
|
||||
#' @export
|
||||
dataset <- function(name = "M1", n, B, p.mix = 0.3, lambda = 1.0) {
|
||||
# validate parameters
|
||||
stopifnot(name %in% c("M1", "M2", "M3", "M4", "M5"))
|
||||
|
||||
# set default values if not supplied
|
||||
if (missing(n)) {
|
||||
n <- if (name %in% c("M1", "M2")) 200 else if (name != "M5") 100 else 42
|
||||
}
|
||||
if (missing(B)) {
|
||||
p <- 12
|
||||
if (name == "M1") {
|
||||
B <- cbind(
|
||||
c( 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0),
|
||||
c( 1,-1, 1,-1, 1,-1, 0, 0, 0, 0, 0, 0)
|
||||
) / sqrt(6)
|
||||
} else if (name == "M2") {
|
||||
B <- cbind(
|
||||
c(c(1, 0), rep(0, 10)),
|
||||
c(c(0, 1), rep(0, 10))
|
||||
)
|
||||
} else {
|
||||
B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, 6)), 12, 1)
|
||||
}
|
||||
} else {
|
||||
p <- dim(B)[1]
|
||||
# validate col. nr to match dataset `k = dim(B)[2]`
|
||||
stopifnot(
|
||||
name %in% c("M1", "M2") && dim(B)[2] == 2,
|
||||
name %in% c("M3", "M4", "M5") && dim(B)[2] == 1
|
||||
)
|
||||
}
|
||||
|
||||
# set link function `g` for model `Y ~ g(B'X) + epsilon`
|
||||
if (name == "M1") {
|
||||
g <- function(BX) { BX[1] / (0.5 + (BX[2] + 1.5)^2) }
|
||||
} else if (name == "M2") {
|
||||
g <- function(BX) { BX[1] * BX[2]^2 }
|
||||
} else if (name %in% c("M3", "M4")) {
|
||||
g <- function(BX) { cos(BX[1]) }
|
||||
} else { # name == "M5"
|
||||
g <- function(BX) { 2 * log(abs(BX[1]) + 1) }
|
||||
}
|
||||
|
||||
# compute X
|
||||
if (name != "M4") {
|
||||
# compute root of the covariance matrix according the dataset
|
||||
if (name %in% c("M1", "M3")) {
|
||||
# Variance-Covariance structure for `X ~ N_p(0, \Sigma)` with
|
||||
# `\Sigma_{i, j} = 0.5^{|i - j|}`.
|
||||
Sigma <- matrix(0.5^abs(kronecker(1:p, 1:p, '-')), p, p)
|
||||
# decompose Sigma to Sigma.root^T Sigma.root = Sigma for usage in creation of `X`
|
||||
Sigma.root <- chol(Sigma)
|
||||
} else { # name %in% c("M2", "M5")
|
||||
Sigma.root <- diag(rep(1, p)) # d-dim identity
|
||||
}
|
||||
# data `X` as multivariate random normal variable with
|
||||
# variance matrix `Sigma`.
|
||||
X <- replicate(p, rnorm(n, 0, 1)) %*% Sigma.root
|
||||
} else { # name == "M4"
|
||||
X <- t(replicate(100, rep((1 - 2 * rbinom(1, 1, p.mix)) * lambda, p) + rnorm(p, 0, 1)))
|
||||
}
|
||||
|
||||
# responce `y ~ g(B'X) + epsilon` with `epsilon ~ N(0, 1 / 2)`
|
||||
Y <- apply(X, 1, function(X_i) {
|
||||
g(t(B) %*% X_i) + rnorm(1, 0, 0.5)
|
||||
})
|
||||
|
||||
return(list(X = X, Y = Y, B = B, name = name))
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
#' Estimated bandwidth for CVE.
|
||||
#'
|
||||
#' Estimates a propper bandwidth \code{h} according
|
||||
#' \deqn{%
|
||||
#' h = \chi_{p-q}^{-1}\left(\frac{nObs - 1}{n-1}\right)\frac{2 tr(\Sigma)}{p}}{%
|
||||
#' h = qchisq( (nObs - 1)/(n - 1), p - q ) 2 tr(Sigma) / p}
|
||||
#'
|
||||
#' @param X data matrix of dimension (n x p) with n data points X_i of dimension
|
||||
#' q. Therefor each row represents a datapoint of dimension p.
|
||||
#' @param k Guess for rank(B).
|
||||
#' @param nObs Ether numeric of a function. If specified as numeric value
|
||||
#' its used in the computation of the bandwidth directly. If its a function
|
||||
#' `nObs` is evaluated as \code{nObs(nrow(x))}. The default behaviou if not
|
||||
#' supplied at all is to use \code{nObs <- nrow(x)^0.5}.
|
||||
#'
|
||||
#' @seealso [\code{\link{qchisq}}]
|
||||
#' @export
|
||||
estimate.bandwidth <- function(X, k, nObs) {
|
||||
n <- nrow(X)
|
||||
p <- ncol(X)
|
||||
|
||||
X_centered <- scale(X, center=TRUE, scale=FALSE)
|
||||
Sigma <- (1 / n) * t(X_centered) %*% X_centered
|
||||
|
||||
quantil <- qchisq((nObs - 1) / (n - 1), k)
|
||||
return(2 * quantil * sum(diag(Sigma)) / p)
|
||||
}
|
|
@ -0,0 +1,65 @@
|
|||
#' Compute get gradient of `L(V)` given a dataset `X`.
|
||||
#'
|
||||
#' @param X Data matrix.
|
||||
#' @param Y Responce.
|
||||
#' @param V Position to compute the gradient at, aka point on Stiefl manifold.
|
||||
#' @param h Bandwidth
|
||||
#' @param loss.only Boolean to only compute the loss, of \code{TRUE} a single
|
||||
#' value loss is returned and \code{envir} is ignored.
|
||||
#' @keywords internal
|
||||
#' @export
|
||||
grad <- function(X, Y, V, h, loss.only = FALSE, loss.out = FALSE) {
|
||||
# Get number of samples and dimension.
|
||||
n <- nrow(X)
|
||||
p <- ncol(X)
|
||||
|
||||
# Compute lookup indexes for symmetrie, lower/upper
|
||||
# triangular parts and vectorization.
|
||||
pair.index <- elem.pairs(seq(n))
|
||||
i <- pair.index[, 1] # `i` indices of `(i, j)` pairs
|
||||
j <- pair.index[, 2] # `j` indices of `(i, j)` pairs
|
||||
# Matrix of vectorized indices. (vec(index) -> seq)
|
||||
index <- matrix(seq(n * n), n, n)
|
||||
lower <- index[lower.tri(index)]
|
||||
upper <- t(index)[lower]
|
||||
|
||||
# Projection matrix onto `span(V)`
|
||||
Q <- diag(1, p) - (V %*% t(V))
|
||||
|
||||
# Create all pairewise differences of rows of `X`.
|
||||
X_diff <- X[i, , drop=F] - X[j, , drop=F]
|
||||
|
||||
# Vectorized distance matrix `D`.
|
||||
vecD <- rowSums((X_diff %*% Q)^2)
|
||||
|
||||
# Weight matrix `W` (dnorm ... gaussean density function)
|
||||
W <- matrix(dnorm(0), n, n)
|
||||
W[lower] <- dnorm(vecD / h) # Set lower tri. part
|
||||
W[upper] <- t(W)[upper] # Mirror lower tri. to upper
|
||||
W <- sweep(W, 2, colSums(W), FUN=`/`) # Col-Normalize
|
||||
|
||||
# Weighted `Y` momentums
|
||||
y1 <- Y %*% W # Result is 1D -> transposition irrelevant
|
||||
y2 <- Y^2 %*% W
|
||||
|
||||
# Per example loss `L(V, X_i)`
|
||||
L <- y2 - y1^2
|
||||
if (loss.only) {
|
||||
# Mean for total loss `L(V)`.
|
||||
return(mean(L))
|
||||
} else if (loss.out) {
|
||||
# Bubble environments up and write to loss variable, aka out param.
|
||||
loss <<- mean(L)
|
||||
}
|
||||
|
||||
# Vectorized Weights with forced symmetry
|
||||
vecS <- (L[i] - (Y[j] - y1[i])^2) * W[lower]
|
||||
vecS <- vecS + ((L[j] - (Y[i] - y1[j])^2) * W[upper])
|
||||
# Compute scaling of `X` row differences
|
||||
vecS <- vecS * vecD
|
||||
|
||||
# The gradient.
|
||||
G <- t(X_diff) %*% sweep(X_diff %*% V, 1, vecS, `*`)
|
||||
G <- (-2 / (n * h^2)) * G
|
||||
return(G)
|
||||
}
|
|
@ -0,0 +1,43 @@
|
|||
|
||||
#' Performs a grid search for parameters over a parameter grid.
|
||||
#' @examples
|
||||
#' args <- list(
|
||||
#' h = c(0.05, 0.1, 0.2),
|
||||
#' method = c("simple", "sgd"),
|
||||
#' tau = c(0.5, 0.1, 0.01)
|
||||
#' )
|
||||
#' cve.grid.search(args)
|
||||
#' @export
|
||||
cve.grid.search <- function(X, Y, k, args) {
|
||||
|
||||
args$stringsAsFactors = FALSE
|
||||
args$KEEP.OUT.ATTRS = FALSE
|
||||
grid <- do.call(expand.grid, args)
|
||||
grid.length <- length(grid[[1]])
|
||||
|
||||
print(grid)
|
||||
|
||||
for (i in 1:grid.length) {
|
||||
arguments <- as.list(grid[i, ])
|
||||
# Set required arguments
|
||||
arguments$X <- X
|
||||
arguments$Y <- Y
|
||||
arguments$k <- k
|
||||
# print(arguments)
|
||||
dr <- do.call(cve.call, arguments)
|
||||
print(dr$loss)
|
||||
}
|
||||
}
|
||||
|
||||
# ds <- dataset()
|
||||
# X <- ds$X
|
||||
# Y <- ds$Y
|
||||
# (k <- ncol(ds$B))
|
||||
# args <- list(
|
||||
# h = c(0.05, 0.1, 0.2),
|
||||
# method = c("simple", "sgd"),
|
||||
# tau = c(0.5, 0.1, 0.01),
|
||||
# attempts = c(1L)
|
||||
# )
|
||||
|
||||
# cve.grid.search(X, Y, k, args)
|
|
@ -0,0 +1,63 @@
|
|||
#' Samples uniform from the Stiefel Manifold
|
||||
#'
|
||||
#' @param p row dim.
|
||||
#' @param q col dim.
|
||||
#' @return `(p, q)` semi-orthogonal matrix
|
||||
#' @examples
|
||||
#' V <- rStiefel(6, 4)
|
||||
#' @export
|
||||
rStiefl <- function(p, q) {
|
||||
return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q))))
|
||||
}
|
||||
|
||||
#' Null space basis of given matrix `V`
|
||||
#'
|
||||
#' @param V `(p, q)` matrix
|
||||
#' @return Semi-orthogonal `(p, p - q)` matrix spaning the null space of `V`.
|
||||
#' @keywords internal
|
||||
#' @export
|
||||
null <- function(V) {
|
||||
tmp <- qr(V)
|
||||
set <- if(tmp$rank == 0L) seq_len(ncol(V)) else -seq_len(tmp$rank)
|
||||
return(qr.Q(tmp, complete=TRUE)[, set, drop=FALSE])
|
||||
}
|
||||
|
||||
#' Creates a (numeric) matrix where each row contains
|
||||
#' an element to element matching.
|
||||
#' @param elements numeric vector of elements to match
|
||||
#' @return matrix of size `(n * (n - 1) / 2, 2)` for a argument of lenght `n`.
|
||||
#' @keywords internal
|
||||
#' @export
|
||||
elem.pairs <- function(elements) {
|
||||
# Number of elements to match.
|
||||
n <- length(elements)
|
||||
# Create all combinations.
|
||||
pairs <- cbind(rep(elements, each=n), rep(elements, n))
|
||||
# Select unique combinations without self interaction.
|
||||
return(pairs[pairs[, 1] < pairs[, 2], ])
|
||||
}
|
||||
|
||||
#' Apply function to pairs of matrix rows or columns.
|
||||
#'
|
||||
#' \code{row.pair.apply} applies \code{FUN} to each unique row pair without self
|
||||
#' interaction while \code{col.pair.apply} does the same for columns.
|
||||
#' @param X Matrix.
|
||||
#' @param FUN Function to apply to each pair.
|
||||
#' @examples
|
||||
#' X <- matrix(seq(12), 4, 3)
|
||||
#' # matrix containing all row to row differences.
|
||||
#' row.pair.apply(X, `-`)
|
||||
#' @aliases row.pair.apply, col.pair.apply
|
||||
#' @keywords internal
|
||||
#' @export
|
||||
row.pair.apply <- function(X, FUN) {
|
||||
pairs <- elem.pairs(seq(nrow(X)))
|
||||
return(FUN(X[pairs[, 1], ], X[pairs[, 2], ]))
|
||||
}
|
||||
#' @rdname row.pair.apply
|
||||
#' @keywords internal
|
||||
#' @export
|
||||
col.pair.apply <- function(X, FUN) {
|
||||
pairs <- elem.pairs(seq(ncol(X)))
|
||||
return(FUN(X[, pairs[, 1]], X[, pairs[, 2]]))
|
||||
}
|
Binary file not shown.
|
@ -0,0 +1,70 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/CVE.R
|
||||
\name{cve}
|
||||
\alias{cve}
|
||||
\alias{cve.call}
|
||||
\title{Implementation of the CVE method.}
|
||||
\usage{
|
||||
cve(formula, data, method = "simple", ...)
|
||||
|
||||
cve.call(X, Y, method = "simple", nObs = nrow(X)^0.5, k, ...)
|
||||
}
|
||||
\arguments{
|
||||
\item{formula}{Formel for the regression model defining `X`, `Y`.
|
||||
See: \code{\link{formula}}.}
|
||||
|
||||
\item{data}{data.frame holding data for formula.}
|
||||
|
||||
\item{method}{The different only differe in the used optimization.
|
||||
All of them are Gradient based optimization on a Stiefel manifold.
|
||||
\itemize{
|
||||
\item "simple" Simple reduction of stepsize.
|
||||
\item "sgd" stocastic gradient decent.
|
||||
\item TODO: further
|
||||
}}
|
||||
|
||||
\item{...}{Further parameters depending on the used method.}
|
||||
|
||||
\item{X}{Data}
|
||||
|
||||
\item{Y}{Responces}
|
||||
|
||||
\item{nObs}{as describet in the Paper.}
|
||||
|
||||
\item{k}{guess for SDR dimension.}
|
||||
|
||||
\item{nObs}{Like in the paper.}
|
||||
|
||||
\item{...}{Method specific parameters.}
|
||||
}
|
||||
\description{
|
||||
Conditional Variance Estimator (CVE) is a novel sufficient dimension
|
||||
reduction (SDR) method assuming a model
|
||||
\deqn{Y \sim g(B'X) + \epsilon}{Y ~ g(B'X) + epsilon}
|
||||
where B'X is a lower dimensional projection of the predictors.
|
||||
}
|
||||
\examples{
|
||||
library(CVE)
|
||||
|
||||
# sample dataset
|
||||
ds <- dataset("M5")
|
||||
|
||||
# call ´cve´ with default method (aka "simple")
|
||||
dr.simple <- cve(ds$Y ~ ds$X, k = ncol(ds$B))
|
||||
# plot optimization history (loss via iteration)
|
||||
plot(dr.simple, main = "CVE M5 simple")
|
||||
|
||||
# call ´cve´ with method "linesearch" using ´data.frame´ as data.
|
||||
data <- data.frame(Y = ds$Y, X = ds$X)
|
||||
# Note: ´Y, X´ are NOT defined, they are extracted from ´data´.
|
||||
dr.linesearch <- cve(Y ~ ., data, method = "linesearch", k = ncol(ds$B))
|
||||
plot(dr.linesearch, main = "CVE M5 linesearch")
|
||||
|
||||
}
|
||||
\references{
|
||||
Fertl L., Bura E. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019
|
||||
}
|
||||
\seealso{
|
||||
\code{\link{formula}}. For a complete parameters list (dependent on
|
||||
the method) see \code{\link{cve_simple}}, \code{\link{cve_sgd}}
|
||||
}
|
|
@ -0,0 +1,19 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/gridSearch.R
|
||||
\name{cve.grid.search}
|
||||
\alias{cve.grid.search}
|
||||
\title{Performs a grid search for parameters over a parameter grid.}
|
||||
\usage{
|
||||
cve.grid.search(X, Y, k, args)
|
||||
}
|
||||
\description{
|
||||
Performs a grid search for parameters over a parameter grid.
|
||||
}
|
||||
\examples{
|
||||
args <- list(
|
||||
h = c(0.05, 0.1, 0.2),
|
||||
method = c("simple", "sgd"),
|
||||
tau = c(0.5, 0.1, 0.01)
|
||||
)
|
||||
cve.grid.search(args)
|
||||
}
|
|
@ -0,0 +1,15 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/cve_sgd.R
|
||||
\name{cve_sgd}
|
||||
\alias{cve_sgd}
|
||||
\title{Simple implementation of the CVE method. 'Simple' means that this method is
|
||||
a classic GD method unsing no further tricks.}
|
||||
\usage{
|
||||
cve_sgd(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 0.01,
|
||||
epochs = 50L, batch.size = 16L, attempts = 10L)
|
||||
}
|
||||
\description{
|
||||
Simple implementation of the CVE method. 'Simple' means that this method is
|
||||
a classic GD method unsing no further tricks.
|
||||
}
|
||||
\keyword{internal}
|
|
@ -0,0 +1,15 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/cve_simple.R
|
||||
\name{cve_simple}
|
||||
\alias{cve_simple}
|
||||
\title{Simple implementation of the CVE method. 'Simple' means that this method is
|
||||
a classic GD method unsing no further tricks.}
|
||||
\usage{
|
||||
cve_simple(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 1,
|
||||
tol = 0.001, slack = 0, epochs = 50L, attempts = 10L)
|
||||
}
|
||||
\description{
|
||||
Simple implementation of the CVE method. 'Simple' means that this method is
|
||||
a classic GD method unsing no further tricks.
|
||||
}
|
||||
\keyword{internal}
|
|
@ -0,0 +1,64 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/datasets.R
|
||||
\name{dataset}
|
||||
\alias{dataset}
|
||||
\title{Generates test datasets.}
|
||||
\usage{
|
||||
dataset(name = "M1", n, B, p.mix = 0.3, lambda = 1)
|
||||
}
|
||||
\arguments{
|
||||
\item{name}{One of \code{"M1"}, \code{"M2"}, \code{"M3"}, \code{"M4"} or \code{"M5"}}
|
||||
|
||||
\item{n}{nr samples}
|
||||
|
||||
\item{p.mix}{Only for \code{"M4"}, see: below.}
|
||||
|
||||
\item{lambda}{Only for \code{"M4"}, see: below.}
|
||||
|
||||
\item{p}{Dim. of random variable \code{X}.}
|
||||
}
|
||||
\value{
|
||||
List with elements
|
||||
\itemize{
|
||||
\item{X}{data}
|
||||
\item{Y}{response}
|
||||
\item{B}{Used dim-reduction matrix}
|
||||
\item{name}{Name of the dataset (name parameter)}
|
||||
}
|
||||
}
|
||||
\description{
|
||||
Provides sample datasets. There are 5 different datasets named
|
||||
M1, M2, M3, M4 and M5 describet in the paper references below.
|
||||
The general model is given by:
|
||||
\deqn{Y ~ g(B'X) + \epsilon}
|
||||
}
|
||||
\section{M1}{
|
||||
|
||||
The data follows \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} for a subspace
|
||||
dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points.
|
||||
The link function \eqn{g} is given as
|
||||
\deqn{g(x) = \frac{x_1}{0.5 + (x_2 + 1.5)^2} + 0.5\epsilon}{g(x) = x_1 / (0.5 + (x_2 + 1.5)^2) + 0.5 epsilon}
|
||||
}
|
||||
|
||||
\section{M2}{
|
||||
|
||||
\eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} with \eqn{k = 2} with a default of \eqn{n = 200} data points.
|
||||
The link function \eqn{g} is given as
|
||||
\deqn{g(x) = x_1 x_2^2 + 0.5\epsilon}{g(x) = x_1 x_2^2 + 0.5 epsilon}
|
||||
}
|
||||
|
||||
\section{M3}{
|
||||
|
||||
TODO:
|
||||
}
|
||||
|
||||
\section{M4}{
|
||||
|
||||
TODO:
|
||||
}
|
||||
|
||||
\section{M5}{
|
||||
|
||||
TODO:
|
||||
}
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/util.R
|
||||
\name{elem.pairs}
|
||||
\alias{elem.pairs}
|
||||
\title{Creates a (numeric) matrix where each row contains
|
||||
an element to element matching.}
|
||||
\usage{
|
||||
elem.pairs(elements)
|
||||
}
|
||||
\arguments{
|
||||
\item{elements}{numeric vector of elements to match}
|
||||
}
|
||||
\value{
|
||||
matrix of size `(n * (n - 1) / 2, 2)` for a argument of lenght `n`.
|
||||
}
|
||||
\description{
|
||||
Creates a (numeric) matrix where each row contains
|
||||
an element to element matching.
|
||||
}
|
||||
\keyword{internal}
|
|
@ -0,0 +1,28 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/estimateBandwidth.R
|
||||
\name{estimate.bandwidth}
|
||||
\alias{estimate.bandwidth}
|
||||
\title{Estimated bandwidth for CVE.}
|
||||
\usage{
|
||||
estimate.bandwidth(X, k, nObs)
|
||||
}
|
||||
\arguments{
|
||||
\item{X}{data matrix of dimension (n x p) with n data points X_i of dimension
|
||||
q. Therefor each row represents a datapoint of dimension p.}
|
||||
|
||||
\item{k}{Guess for rank(B).}
|
||||
|
||||
\item{nObs}{Ether numeric of a function. If specified as numeric value
|
||||
its used in the computation of the bandwidth directly. If its a function
|
||||
`nObs` is evaluated as \code{nObs(nrow(x))}. The default behaviou if not
|
||||
supplied at all is to use \code{nObs <- nrow(x)^0.5}.}
|
||||
}
|
||||
\description{
|
||||
Estimates a propper bandwidth \code{h} according
|
||||
\deqn{%
|
||||
h = \chi_{p-q}^{-1}\left(\frac{nObs - 1}{n-1}\right)\frac{2 tr(\Sigma)}{p}}{%
|
||||
h = qchisq( (nObs - 1)/(n - 1), p - q ) 2 tr(Sigma) / p}
|
||||
}
|
||||
\seealso{
|
||||
[\code{\link{qchisq}}]
|
||||
}
|
|
@ -0,0 +1,24 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/gradient.R
|
||||
\name{grad}
|
||||
\alias{grad}
|
||||
\title{Compute get gradient of `L(V)` given a dataset `X`.}
|
||||
\usage{
|
||||
grad(X, Y, V, h, loss.only = FALSE, loss.out = FALSE)
|
||||
}
|
||||
\arguments{
|
||||
\item{X}{Data matrix.}
|
||||
|
||||
\item{Y}{Responce.}
|
||||
|
||||
\item{V}{Position to compute the gradient at, aka point on Stiefl manifold.}
|
||||
|
||||
\item{h}{Bandwidth}
|
||||
|
||||
\item{loss.only}{Boolean to only compute the loss, of \code{TRUE} a single
|
||||
value loss is returned and \code{envir} is ignored.}
|
||||
}
|
||||
\description{
|
||||
Compute get gradient of `L(V)` given a dataset `X`.
|
||||
}
|
||||
\keyword{internal}
|
|
@ -0,0 +1,18 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/util.R
|
||||
\name{null}
|
||||
\alias{null}
|
||||
\title{Null space basis of given matrix `V`}
|
||||
\usage{
|
||||
null(V)
|
||||
}
|
||||
\arguments{
|
||||
\item{V}{`(p, q)` matrix}
|
||||
}
|
||||
\value{
|
||||
Semi-orthogonal `(p, p - q)` matrix spaning the null space of `V`.
|
||||
}
|
||||
\description{
|
||||
Null space basis of given matrix `V`
|
||||
}
|
||||
\keyword{internal}
|
|
@ -0,0 +1,28 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/CVE.R
|
||||
\name{plot.cve}
|
||||
\alias{plot.cve}
|
||||
\title{Ploting helper for objects of class \code{cve}.}
|
||||
\usage{
|
||||
## S3 method for class 'cve'
|
||||
plot(x, content = "history", ...)
|
||||
}
|
||||
\arguments{
|
||||
\item{x}{Object of class \code{cve} (result of [cve()]).}
|
||||
|
||||
\item{...}{Pass through parameters to [plot()] and [lines()]}
|
||||
|
||||
\item{content}{Specifies what to plot:
|
||||
\itemize{
|
||||
\item "history" Plots the loss history from stiefel optimization
|
||||
(default).
|
||||
\item ... TODO: add (if there are any)
|
||||
}}
|
||||
}
|
||||
\description{
|
||||
Ploting helper for objects of class \code{cve}.
|
||||
}
|
||||
\seealso{
|
||||
see \code{\link{par}} for graphical parameters to pass through
|
||||
as well as \code{\link{plot}} for standard plot utility.
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/util.R
|
||||
\name{rStiefl}
|
||||
\alias{rStiefl}
|
||||
\title{Samples uniform from the Stiefel Manifold}
|
||||
\usage{
|
||||
rStiefl(p, q)
|
||||
}
|
||||
\arguments{
|
||||
\item{p}{row dim.}
|
||||
|
||||
\item{q}{col dim.}
|
||||
}
|
||||
\value{
|
||||
`(p, q)` semi-orthogonal matrix
|
||||
}
|
||||
\description{
|
||||
Samples uniform from the Stiefel Manifold
|
||||
}
|
||||
\examples{
|
||||
V <- rStiefel(6, 4)
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/util.R
|
||||
\name{row.pair.apply}
|
||||
\alias{row.pair.apply}
|
||||
\alias{row.pair.apply,}
|
||||
\alias{col.pair.apply}
|
||||
\title{Apply function to pairs of matrix rows or columns.}
|
||||
\usage{
|
||||
row.pair.apply(X, FUN)
|
||||
|
||||
col.pair.apply(X, FUN)
|
||||
}
|
||||
\arguments{
|
||||
\item{X}{Matrix.}
|
||||
|
||||
\item{FUN}{Function to apply to each pair.}
|
||||
}
|
||||
\description{
|
||||
\code{row.pair.apply} applies \code{FUN} to each unique row pair without self
|
||||
interaction while \code{col.pair.apply} does the same for columns.
|
||||
}
|
||||
\examples{
|
||||
X <- matrix(seq(12), 4, 3)
|
||||
# matrix containing all row to row differences.
|
||||
row.pair.apply(X, `-`)
|
||||
}
|
||||
\keyword{internal}
|
|
@ -0,0 +1,14 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/CVE.R
|
||||
\name{summary.cve}
|
||||
\alias{summary.cve}
|
||||
\title{Prints a summary of a \code{cve} result.}
|
||||
\usage{
|
||||
\method{summary}{cve}(object, ...)
|
||||
}
|
||||
\arguments{
|
||||
\item{object}{Instance of 'cve' as return of \code{cve}.}
|
||||
}
|
||||
\description{
|
||||
Prints a summary of a \code{cve} result.
|
||||
}
|
|
@ -1,15 +1,15 @@
|
|||
library("Matrix")
|
||||
library("fields")
|
||||
library('dr')
|
||||
library('mvtnorm')
|
||||
library(mgcv)
|
||||
library(pracma)
|
||||
library(RVAideMemoire)
|
||||
# library("fields")
|
||||
# library('dr')
|
||||
# library('mvtnorm')
|
||||
# library(mgcv)
|
||||
# library(pracma)
|
||||
# library(RVAideMemoire)
|
||||
library("MAVE")
|
||||
library(pls)
|
||||
library(LaplacesDemon)
|
||||
library(earth)
|
||||
library("latex2exp")
|
||||
# library(pls)
|
||||
# library(LaplacesDemon)
|
||||
# library(earth)
|
||||
# library("latex2exp")
|
||||
|
||||
#################################################
|
||||
|
||||
|
@ -269,6 +269,9 @@ LV<-function(V,Xl,dtemp,h,q,Y,grad=T){
|
|||
#h...bandwidth
|
||||
#count2...number of times sclack_para is active
|
||||
stiefl_opt<-function(dat,h=NULL,k,k0=30,p=1,maxit=50,nObs=sqrt(length(dat[,1])),lambda_0=1,tol=10^(-3),sclack_para=0){
|
||||
|
||||
history <- matrix(NA, maxit, k0)
|
||||
|
||||
Y<-dat[,1]
|
||||
X<-dat[,-1]
|
||||
N<-length(Y)
|
||||
|
@ -324,15 +327,20 @@ stiefl_opt<-function(dat,h=NULL,k,k0=30,p=1,maxit=50,nObs=sqrt(length(dat[,1])),
|
|||
Vold<-Vnew
|
||||
count<-count+1
|
||||
#print(count)
|
||||
|
||||
# Write to history
|
||||
history[count, u] <- Lnew
|
||||
|
||||
}
|
||||
if(best>Lnew){
|
||||
best<-Lnew
|
||||
Vend<-Vnew
|
||||
sig<-tmp3$sig
|
||||
}
|
||||
|
||||
}
|
||||
ret<-list(Vend,best,sig,count,h,count2)
|
||||
names(ret)<-c('est_base','var','aov_dat','count','h','count2')
|
||||
ret<-list(Vend,best,sig,count,h,count2, history)
|
||||
names(ret)<-c('est_base','var','aov_dat','count','h','count2', 'history')
|
||||
return(ret)
|
||||
}
|
||||
##### calculates distance between two subspaces
|
||||
|
|
|
@ -0,0 +1,114 @@
|
|||
# Usage:
|
||||
# ~$ Rscript runtime_test.R
|
||||
|
||||
#' Writes log information to console. (to not get bored^^)
|
||||
tell.user <- function(name, start.time, i, length) {
|
||||
cat("\rRunning Test (", name, "):",
|
||||
i, "/", length,
|
||||
" - elapsed:", format(Sys.time() - start.time), "\033[K")
|
||||
}
|
||||
|
||||
library(CVE) # load CVE
|
||||
source("CVE_legacy/function_script.R") # Source legacy code
|
||||
|
||||
# Number of simulations
|
||||
SIM.NR <- 50
|
||||
# maximal number of iterations in curvilinear search algorithm
|
||||
MAXIT <- 50
|
||||
# number of arbitrary starting values for curvilinear optimization
|
||||
ATTEMPTS <- 10
|
||||
# set names of datasets
|
||||
dataset.names <- c("M1", "M2", "M3", "M4", "M5")
|
||||
# Set used CVE method
|
||||
# methods <- c("legacy", "simple", "sgd")
|
||||
methods <- c("legacy", "simple", "sgd")
|
||||
|
||||
# Setup error and time tracking variables
|
||||
error <- matrix(NA, SIM.NR, length(methods) * length(dataset.names))
|
||||
time <- matrix(NA, SIM.NR, ncol(error))
|
||||
colnames(error) <- kronecker(paste0(dataset.names, '-'), methods, paste0)
|
||||
colnames(time) <- colnames(error)
|
||||
|
||||
# Create new log file and write CSV (actualy TSV) header.
|
||||
# (do not overwrite existing logs)
|
||||
log.nr <- length(list.files('tmp/', pattern = '.*\\.log'))
|
||||
file <- file.path('tmp', paste0('test', log.nr, '.log'))
|
||||
cat('dataset\tmethod\terror\ttime\n', sep = '', file = file)
|
||||
# Open a new pdf device for plotting into (do not overwrite existing ones)
|
||||
pdf(file.path('tmp', paste0('test', log.nr, '.pdf')))
|
||||
|
||||
# only for telling user (to stdout)
|
||||
count <- 0
|
||||
start.time <- Sys.time()
|
||||
# Start simulation loop.
|
||||
for (sim in 1:SIM.NR) {
|
||||
# Repeat for each dataset.
|
||||
for (name in dataset.names) {
|
||||
count <- count + 1
|
||||
tell.user(name, start.time, count, SIM.NR * length(dataset.names))
|
||||
|
||||
# Create a new dataset
|
||||
ds <- dataset(name)
|
||||
# Prepare X, Y and combine to data matrix
|
||||
Y <- ds$Y
|
||||
X <- ds$X
|
||||
data <- cbind(Y, X)
|
||||
# get dimensions
|
||||
dim <- ncol(X)
|
||||
truedim <- ncol(ds$B)
|
||||
|
||||
for (method in methods) {
|
||||
if (tolower(method) == "legacy") {
|
||||
dr.time <- system.time(
|
||||
dr <- stiefl_opt(data,
|
||||
k = dim - truedim,
|
||||
k0 = ATTEMPTS,
|
||||
h = estimate.bandwidth(X, k = truedim, nObs = sqrt(nrow(X))),
|
||||
maxit = MAXIT
|
||||
)
|
||||
)
|
||||
dr$B <- fill_base(dr$est_base)[, 1:truedim]
|
||||
} else {
|
||||
dr.time <- system.time(
|
||||
dr <- cve.call(X, Y,
|
||||
method = method,
|
||||
k = truedim,
|
||||
attempts = ATTEMPTS
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
key <- paste0(name, '-', method)
|
||||
error[sim, key] <- subspace_dist(dr$B, ds$B) / sqrt(2 * truedim)
|
||||
time[sim, key] <- dr.time["elapsed"]
|
||||
|
||||
# Log results to file (mostly for long running simulations)
|
||||
cat(paste0(
|
||||
c(name, method, error[sim, key], time[sim, key]),
|
||||
collapse = '\t'
|
||||
), '\n',
|
||||
sep = '', file = file, append = TRUE
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cat("\n\n## Time [sec] Means:\n")
|
||||
print(colMeans(time))
|
||||
cat("\n## Error Means:\n")
|
||||
print(colMeans(error))
|
||||
|
||||
at <- seq(ncol(error)) + rep(seq(ncol(error) / length(methods)) - 1, each = length(methods))
|
||||
boxplot(error,
|
||||
main = paste0("Error (Nr of simulations ", SIM.NR, ")"),
|
||||
ylab = "Error",
|
||||
las = 2,
|
||||
at = at
|
||||
)
|
||||
boxplot(time,
|
||||
main = paste0("Time (Nr of simulations ", SIM.NR, ")"),
|
||||
ylab = "Time [sec]",
|
||||
las = 2,
|
||||
at = at
|
||||
)
|
||||
|
Loading…
Reference in New Issue