parent
50302ce18a
commit
dea610bcb9
159
CVE_C/R/CVE.R
159
CVE_C/R/CVE.R
|
@ -1,59 +1,70 @@
|
|||
#' Conditional Variance Estimator (CVE)
|
||||
#' Conditional Variance Estimator (CVE) Package.
|
||||
#'
|
||||
#' Conditional Variance Estimator for Sufficient Dimension
|
||||
#' Reduction
|
||||
#' Conditional Variance Estimation (CVE) is a novel sufficient dimension
|
||||
#' reduction (SDR) method for regressions satisfying \eqn{E(Y|X) = E(Y|B'X)},
|
||||
#' where \eqn{B'X} is a lower dimensional projection of the predictors. CVE,
|
||||
#' similarly to its main competitor, the mean average variance estimation
|
||||
#' (MAVE), is not based on inverse regression, and does not require the
|
||||
#' restrictive linearity and constant variance conditions of moment based SDR
|
||||
#' methods. CVE is data-driven and applies to additive error regressions with
|
||||
#' continuous predictors and link function. The effectiveness and accuracy of
|
||||
#' CVE compared to MAVE and other SDR techniques is demonstrated in simulation
|
||||
#' studies. CVE is shown to outperform MAVE in some model set-ups, while it
|
||||
#' remains largely on par under most others.
|
||||
#'
|
||||
#' TODO: And some details
|
||||
#' Let \eqn{Y} be real denotes a univariate response and \eqn{X} a real
|
||||
#' \eqn{p}-dimensional covariate vector. We assume that the dependence of
|
||||
#' \eqn{Y} and \eqn{X} is modelled by
|
||||
#' \deqn{Y = g(B'X) + \epsilon}
|
||||
#' where \eqn{X} is independent of \eqn{\epsilon} with positive definite
|
||||
#' variance-covariance matrix \eqn{Var(X) = \Sigma_X}. \eqn{\epsilon} is a mean
|
||||
#' zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g}
|
||||
#' is an unknown, continuous non-constant function,
|
||||
#' and \eqn{B = (b_1, ..., b_k)} is
|
||||
#' a real \eqn{p \times k}{p x k} of rank \eqn{k <= p}{k \leq p}.
|
||||
#' Without loss of generality \eqn{B} is assumed to be orthonormal.
|
||||
#'
|
||||
#' @author Daniel Kapla, Lukas Fertl, Bura Efstathia
|
||||
#' @references Fertl Lukas, Bura Efstathia. Conditional Variance Estimation for
|
||||
#' Sufficient Dimension Reduction, 2019
|
||||
#'
|
||||
#' @references Fertl Likas, Bura Efstathia. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019
|
||||
#'
|
||||
#' @importFrom stats model.frame
|
||||
#' @docType package
|
||||
#' @author Loki
|
||||
#' @useDynLib CVE, .registration = TRUE
|
||||
"_PACKAGE"
|
||||
|
||||
#' Implementation of the CVE method.
|
||||
#' Conditional Variance Estimator (CVE).
|
||||
#'
|
||||
#' Conditional Variance Estimator (CVE) is a novel sufficient dimension
|
||||
#' reduction (SDR) method assuming a model
|
||||
#' \deqn{Y \sim g(B'X) + \epsilon}{Y ~ g(B'X) + epsilon}
|
||||
#' where B'X is a lower dimensional projection of the predictors.
|
||||
#' TODO: reuse of package description and details!!!!
|
||||
#'
|
||||
#' @param formula Formel for the regression model defining `X`, `Y`.
|
||||
#' See: \code{\link{formula}}.
|
||||
#' @param data data.frame holding data for formula.
|
||||
#' @param method The different only differe in the used optimization.
|
||||
#' All of them are Gradient based optimization on a Stiefel manifold.
|
||||
#' @param formula an object of class \code{"formula"} which is a symbolic
|
||||
#' description of the model to be fitted.
|
||||
#' @param data an optional data frame, containing the data for the formula if
|
||||
#' supplied.
|
||||
#' @param method specifies the CVE method variation as one of
|
||||
#' \itemize{
|
||||
#' \item "simple" Simple reduction of stepsize.
|
||||
#' \item "sgd" stocastic gradient decent.
|
||||
#' \item TODO: further
|
||||
#' \item "simple" exact implementation as describet in the paper listed
|
||||
#' below.
|
||||
#' \item "weighted" variation with addaptive weighting of slices.
|
||||
#' }
|
||||
#' @param ... Further parameters depending on the used method.
|
||||
#' @param ... Parameters passed on to \code{cve.call}.
|
||||
#' @examples
|
||||
#' library(CVE)
|
||||
#'
|
||||
#' # sample dataset
|
||||
#' ds <- dataset("M5")
|
||||
#' # create dataset
|
||||
#' n <- 200
|
||||
#' p <- 12
|
||||
#' X <- matrix(rnorm(n * p), n, p)
|
||||
#' B <- cbind(c(1, rep(0, p - 1)), c(0, 1, rep(0, p - 2)))
|
||||
#' Y <- X %*% B
|
||||
#' Y <- Y[, 1L]^2 + Y[, 2L]^2 + rnorm(n, 0, 0.3)
|
||||
#'
|
||||
#' # call ´cve´ with default method (aka "simple")
|
||||
#' dr.simple <- cve(ds$Y ~ ds$X, k = ncol(ds$B))
|
||||
#' # plot optimization history (loss via iteration)
|
||||
#' plot(dr.simple, main = "CVE M5 simple")
|
||||
#' # Call the CVE method.
|
||||
#' dr <- cve(Y ~ X)
|
||||
#' round(dr[[2]]$B, 1)
|
||||
#'
|
||||
#' # call ´cve´ with method "linesearch" using ´data.frame´ as data.
|
||||
#' data <- data.frame(Y = ds$Y, X = ds$X)
|
||||
#' # Note: ´Y, X´ are NOT defined, they are extracted from ´data´.
|
||||
#' dr.linesearch <- cve(Y ~ ., data, method = "linesearch", k = ncol(ds$B))
|
||||
#' plot(dr.linesearch, main = "CVE M5 linesearch")
|
||||
#'
|
||||
#' @references Fertl L., Bura E. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019
|
||||
#'
|
||||
#' @seealso \code{\link{formula}}. For a complete parameters list (dependent on
|
||||
#' the method) see \code{\link{cve_simple}}, \code{\link{cve_sgd}}
|
||||
#' @import stats
|
||||
#' @importFrom stats model.frame
|
||||
#' @seealso For a detailed description of the formula parameter see
|
||||
#' [\code{\link{formula}}].
|
||||
#' @export
|
||||
cve <- function(formula, data, method = "simple", max.dim = 10L, ...) {
|
||||
# check for type of `data` if supplied and set default
|
||||
|
@ -76,12 +87,19 @@ cve <- function(formula, data, method = "simple", max.dim = 10L, ...) {
|
|||
return(dr)
|
||||
}
|
||||
|
||||
#' @param nObs as describet in the Paper.
|
||||
#' @param X Data
|
||||
#' @param Y Responces
|
||||
#' @param nObs Like in the paper.
|
||||
#' @param k guess for SDR dimension.
|
||||
#' @param ... Method specific parameters.
|
||||
#' @param nObs parameter for choosing bandwidth \code{h} using
|
||||
#' \code{\link{estimate.bandwidth}} (ignored if \code{h} is supplied).
|
||||
#' @param X data matrix with samples in its rows.
|
||||
#' @param Y Responces (1 dimensional).
|
||||
#' @param k Dimension of lower dimensional projection, if given only the
|
||||
#' specified dimension is estimated.
|
||||
#' @param min.dim lower bounds for \code{k}, (ignored if \code{k} is supplied).
|
||||
#' @param max.dim upper bounds for \code{k}, (ignored if \code{k} is supplied).
|
||||
#' @param tau Initial step-size.
|
||||
#' @param tol Tolerance for break condition.
|
||||
#' @param epochs maximum number of optimization steps.
|
||||
#' @param attempts number of arbitrary different starting points.
|
||||
#' @param logger a logger function (only for addvanced user).
|
||||
#' @rdname cve
|
||||
#' @export
|
||||
cve.call <- function(X, Y, method = "simple",
|
||||
|
@ -122,9 +140,16 @@ cve.call <- function(X, Y, method = "simple",
|
|||
stop("'max.dim' (or 'k') must be smaller than 'ncol(X)'.")
|
||||
}
|
||||
|
||||
if (is.function(h)) {
|
||||
if (missing(h) || is.null(h)) {
|
||||
estimate <- TRUE
|
||||
} else if (is.function(h)) {
|
||||
estimate <- TRUE
|
||||
estimate.bandwidth <- h
|
||||
h <- NULL
|
||||
} else if (is.numeric(h) && h > 0.0) {
|
||||
estimate <- FALSE
|
||||
h <- as.double(h)
|
||||
} else {
|
||||
stop("Bandwidth 'h' must be positive numeric.")
|
||||
}
|
||||
|
||||
if (!is.numeric(tau) || length(tau) > 1L || tau <= 0.0) {
|
||||
|
@ -167,12 +192,8 @@ cve.call <- function(X, Y, method = "simple",
|
|||
dr <- list()
|
||||
for (k in min.dim:max.dim) {
|
||||
|
||||
if (missing(h) || is.null(h)) {
|
||||
if (estimate) {
|
||||
h <- estimate.bandwidth(X, k, nObs)
|
||||
} else if (is.numeric(h) && h > 0.0) {
|
||||
h <- as.double(h)
|
||||
} else {
|
||||
stop("Bandwidth 'h' must be positive numeric.")
|
||||
}
|
||||
|
||||
if (method == 'simple') {
|
||||
|
@ -206,32 +227,22 @@ cve.call <- function(X, Y, method = "simple",
|
|||
}
|
||||
|
||||
# augment result information
|
||||
dr$X <- X
|
||||
dr$Y <- Y
|
||||
dr$method <- method
|
||||
dr$call <- call
|
||||
class(dr) <- "cve"
|
||||
return(dr)
|
||||
}
|
||||
|
||||
# TODO: write summary
|
||||
# summary.cve <- function() {
|
||||
# # code #
|
||||
# }
|
||||
|
||||
#' Ploting helper for objects of class \code{cve}.
|
||||
#' Loss distribution kink plot.
|
||||
#'
|
||||
#' @param x Object of class \code{cve} (result of [cve()]).
|
||||
#' @param content Specifies what to plot:
|
||||
#' \itemize{
|
||||
#' \item "history" Plots the loss history from stiefel optimization
|
||||
#' (default).
|
||||
#' \item ... TODO: add (if there are any)
|
||||
#' }
|
||||
#' @param ... Pass through parameters to [plot()] and [lines()]
|
||||
#' @param x Object of class \code{"cve"} (result of [\code{\link{cve}}]).
|
||||
#' @param ... Pass through parameters to [\code{\link{plot}}] and
|
||||
#' [\code{\link{lines}}]
|
||||
#'
|
||||
#' @usage ## S3 method for class 'cve'
|
||||
#' plot(x, content = "history", ...)
|
||||
#' @seealso see \code{\link{par}} for graphical parameters to pass through
|
||||
#' as well as \code{\link{plot}} for standard plot utility.
|
||||
#' as well as \code{\link{plot}}, the standard plot utility.
|
||||
#' @importFrom graphics plot lines points
|
||||
#' @method plot cve
|
||||
#' @export
|
||||
|
@ -244,12 +255,12 @@ plot.cve <- function(x, ...) {
|
|||
L <- c(L, dr.k$L)
|
||||
}
|
||||
}
|
||||
L <- matrix(L, ncol = length(k))
|
||||
boxplot(L, main = "Loss ...",
|
||||
xlab = "SDR dimension k",
|
||||
ylab = expression(L(V, X[i])),
|
||||
L <- matrix(L, ncol = length(k)) / var(x$Y)
|
||||
boxplot(L, main = "Kink plot",
|
||||
xlab = "SDR dimension",
|
||||
ylab = "Sample loss distribution",
|
||||
names = k)
|
||||
|
||||
# lines(apply(L, 2, mean)) # TODO: ?
|
||||
}
|
||||
|
||||
#' Prints a summary of a \code{cve} result.
|
||||
|
|
|
@ -23,13 +23,15 @@
|
|||
#' The data follows \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} for a subspace
|
||||
#' dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points.
|
||||
#' The link function \eqn{g} is given as
|
||||
#' \deqn{g(x) = \frac{x_1}{0.5 + (x_2 + 1.5)^2} + 0.5\epsilon}{g(x) = x_1 / (0.5 + (x_2 + 1.5)^2) + 0.5 epsilon}
|
||||
#' \deqn{g(x) = \frac{x_1}{0.5 + (x_2 + 1.5)^2} + \epsilon / 2}{%
|
||||
#' g(x) = x_1 / (0.5 + (x_2 + 1.5)^2) + epsilon / 2}
|
||||
#' @section M2:
|
||||
#' \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} with \eqn{k = 2} with a default of \eqn{n = 200} data points.
|
||||
#' \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} with \eqn{k = 2} with a
|
||||
#' default of \eqn{n = 200} data points.
|
||||
#' The link function \eqn{g} is given as
|
||||
#' \deqn{g(x) = x_1 x_2^2 + 0.5\epsilon}{g(x) = x_1 x_2^2 + 0.5 epsilon}
|
||||
#' \deqn{g(x) = (b_1^T X) (b_2^T X)^2 + \epsilon / 2}
|
||||
#' @section M3:
|
||||
#' TODO:
|
||||
#' \deqn{g(x) = cos(b_1^T X) + \epsilon / 2}
|
||||
#' @section M4:
|
||||
#' TODO:
|
||||
#' @section M5:
|
||||
|
|
|
@ -1,17 +1,16 @@
|
|||
#' Estimated bandwidth for CVE.
|
||||
#' Bandwidth estimation for CVE.
|
||||
#'
|
||||
#' Estimates a propper bandwidth \code{h} according
|
||||
#' \deqn{%
|
||||
#' h = \chi_{p-q}^{-1}\left(\frac{nObs - 1}{n-1}\right)\frac{2 tr(\Sigma)}{p}}{%
|
||||
#' h = qchisq( (nObs - 1)/(n - 1), p - q ) 2 tr(Sigma) / p}
|
||||
#' h = \chi_{k}^{-1}\left(\frac{nObs - 1}{n-1}\right)\frac{2 tr(\Sigma)}{p}}{%
|
||||
#' h = qchisq( (nObs - 1)/(n - 1), k ) * (2 tr(\Sigma) / p)}
|
||||
#' with \eqn{n} the number of sample and \eqn{p} its dimension
|
||||
#' (\code{n <- nrow(X); p <- ncol(X)}) and the covariance-matrix \eqn{\Sigma}
|
||||
#' which is given by the standard maximum likelihood estimate.
|
||||
#'
|
||||
#' @param X data matrix of dimension (n x p) with n data points X_i of dimension
|
||||
#' q. Therefor each row represents a datapoint of dimension p.
|
||||
#' @param k Guess for rank(B).
|
||||
#' @param nObs Ether numeric of a function. If specified as numeric value
|
||||
#' its used in the computation of the bandwidth directly. If its a function
|
||||
#' `nObs` is evaluated as \code{nObs(nrow(x))}. The default behaviou if not
|
||||
#' supplied at all is to use \code{nObs <- nrow(x)^0.5}.
|
||||
#' @param nObs Expected number of points in a slice, see paper.
|
||||
#' @param X data matrix with samples in its rows.
|
||||
#' @param k Dimension of lower dimensional projection.
|
||||
#'
|
||||
#' @seealso [\code{\link{qchisq}}]
|
||||
#' @export
|
||||
|
@ -19,7 +18,7 @@ estimate.bandwidth <- function(X, k, nObs) {
|
|||
n <- nrow(X)
|
||||
p <- ncol(X)
|
||||
|
||||
X_centered <- scale(X, center=TRUE, scale=FALSE)
|
||||
X_centered <- scale(X, center = TRUE, scale = FALSE)
|
||||
Sigma <- (1 / n) * t(X_centered) %*% X_centered
|
||||
|
||||
quantil <- qchisq((nObs - 1) / (n - 1), k)
|
||||
|
|
|
@ -4,17 +4,37 @@
|
|||
\name{CVE-package}
|
||||
\alias{CVE}
|
||||
\alias{CVE-package}
|
||||
\title{Conditional Variance Estimator (CVE)}
|
||||
\title{Conditional Variance Estimator (CVE) Package.}
|
||||
\description{
|
||||
Conditional Variance Estimator for Sufficient Dimension
|
||||
Reduction
|
||||
Conditional Variance Estimation (CVE) is a novel sufficient dimension
|
||||
reduction (SDR) method for regressions satisfying \eqn{E(Y|X) = E(Y|B'X)},
|
||||
where \eqn{B'X} is a lower dimensional projection of the predictors. CVE,
|
||||
similarly to its main competitor, the mean average variance estimation
|
||||
(MAVE), is not based on inverse regression, and does not require the
|
||||
restrictive linearity and constant variance conditions of moment based SDR
|
||||
methods. CVE is data-driven and applies to additive error regressions with
|
||||
continuous predictors and link function. The effectiveness and accuracy of
|
||||
CVE compared to MAVE and other SDR techniques is demonstrated in simulation
|
||||
studies. CVE is shown to outperform MAVE in some model set-ups, while it
|
||||
remains largely on par under most others.
|
||||
}
|
||||
\details{
|
||||
TODO: And some details
|
||||
Let \eqn{Y} be real denotes a univariate response and \eqn{X} a real
|
||||
\eqn{p}-dimensional covariate vector. We assume that the dependence of
|
||||
\eqn{Y} and \eqn{X} is modelled by
|
||||
\deqn{Y = g(B'X) + \epsilon}
|
||||
where \eqn{X} is independent of \eqn{\epsilon} with positive definite
|
||||
variance-covariance matrix \eqn{Var(X) = \Sigma_X}. \eqn{\epsilon} is a mean
|
||||
zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g}
|
||||
is an unknown, continuous non-constant function,
|
||||
and \eqn{B = (b_1, ..., b_k)} is
|
||||
a real \eqn{p \times k}{p x k} of rank \eqn{k <= p}{k \leq p}.
|
||||
Without loss of generality \eqn{B} is assumed to be orthonormal.
|
||||
}
|
||||
\references{
|
||||
Fertl Likas, Bura Efstathia. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019
|
||||
Fertl Lukas, Bura Efstathia. Conditional Variance Estimation for
|
||||
Sufficient Dimension Reduction, 2019
|
||||
}
|
||||
\author{
|
||||
Loki
|
||||
Daniel Kapla, Lukas Fertl, Bura Efstathia
|
||||
}
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
\name{cve}
|
||||
\alias{cve}
|
||||
\alias{cve.call}
|
||||
\title{Implementation of the CVE method.}
|
||||
\title{Conditional Variance Estimator (CVE).}
|
||||
\usage{
|
||||
cve(formula, data, method = "simple", max.dim = 10L, ...)
|
||||
|
||||
|
@ -12,61 +12,65 @@ cve.call(X, Y, method = "simple", nObs = sqrt(nrow(X)), h = NULL,
|
|||
epochs = 50L, attempts = 10L, logger = NULL)
|
||||
}
|
||||
\arguments{
|
||||
\item{formula}{Formel for the regression model defining `X`, `Y`.
|
||||
See: \code{\link{formula}}.}
|
||||
\item{formula}{an object of class \code{"formula"} which is a symbolic
|
||||
description of the model to be fitted.}
|
||||
|
||||
\item{data}{data.frame holding data for formula.}
|
||||
\item{data}{an optional data frame, containing the data for the formula if
|
||||
supplied.}
|
||||
|
||||
\item{method}{The different only differe in the used optimization.
|
||||
All of them are Gradient based optimization on a Stiefel manifold.
|
||||
\item{method}{specifies the CVE method variation as one of
|
||||
\itemize{
|
||||
\item "simple" Simple reduction of stepsize.
|
||||
\item "sgd" stocastic gradient decent.
|
||||
\item TODO: further
|
||||
\item "simple" exact implementation as describet in the paper listed
|
||||
below.
|
||||
\item "weighted" variation with addaptive weighting of slices.
|
||||
}}
|
||||
|
||||
\item{...}{Further parameters depending on the used method.}
|
||||
\item{max.dim}{upper bounds for \code{k}, (ignored if \code{k} is supplied).}
|
||||
|
||||
\item{X}{Data}
|
||||
\item{...}{Parameters passed on to \code{cve.call}.}
|
||||
|
||||
\item{Y}{Responces}
|
||||
\item{X}{data matrix with samples in its rows.}
|
||||
|
||||
\item{nObs}{as describet in the Paper.}
|
||||
\item{Y}{Responces (1 dimensional).}
|
||||
|
||||
\item{k}{guess for SDR dimension.}
|
||||
\item{nObs}{parameter for choosing bandwidth \code{h} using
|
||||
\code{\link{estimate.bandwidth}} (ignored if \code{h} is supplied).}
|
||||
|
||||
\item{nObs}{Like in the paper.}
|
||||
\item{min.dim}{lower bounds for \code{k}, (ignored if \code{k} is supplied).}
|
||||
|
||||
\item{...}{Method specific parameters.}
|
||||
\item{k}{Dimension of lower dimensional projection, if given only the
|
||||
specified dimension is estimated.}
|
||||
|
||||
\item{tau}{Initial step-size.}
|
||||
|
||||
\item{tol}{Tolerance for break condition.}
|
||||
|
||||
\item{epochs}{maximum number of optimization steps.}
|
||||
|
||||
\item{attempts}{number of arbitrary different starting points.}
|
||||
|
||||
\item{logger}{a logger function (only for addvanced user).}
|
||||
}
|
||||
\description{
|
||||
Conditional Variance Estimator (CVE) is a novel sufficient dimension
|
||||
reduction (SDR) method assuming a model
|
||||
\deqn{Y \sim g(B'X) + \epsilon}{Y ~ g(B'X) + epsilon}
|
||||
where B'X is a lower dimensional projection of the predictors.
|
||||
TODO: reuse of package description and details!!!!
|
||||
}
|
||||
\examples{
|
||||
library(CVE)
|
||||
|
||||
# sample dataset
|
||||
ds <- dataset("M5")
|
||||
# create dataset
|
||||
n <- 200
|
||||
p <- 12
|
||||
X <- matrix(rnorm(n * p), n, p)
|
||||
B <- cbind(c(1, rep(0, p - 1)), c(0, 1, rep(0, p - 2)))
|
||||
Y <- X \%*\% B
|
||||
Y <- Y[, 1L]^2 + Y[, 2L]^2 + rnorm(n, 0, 0.3)
|
||||
|
||||
# call ´cve´ with default method (aka "simple")
|
||||
dr.simple <- cve(ds$Y ~ ds$X, k = ncol(ds$B))
|
||||
# plot optimization history (loss via iteration)
|
||||
plot(dr.simple, main = "CVE M5 simple")
|
||||
# Call the CVE method.
|
||||
dr <- cve(Y ~ X)
|
||||
round(dr[[2]]$B, 1)
|
||||
|
||||
# call ´cve´ with method "linesearch" using ´data.frame´ as data.
|
||||
data <- data.frame(Y = ds$Y, X = ds$X)
|
||||
# Note: ´Y, X´ are NOT defined, they are extracted from ´data´.
|
||||
dr.linesearch <- cve(Y ~ ., data, method = "linesearch", k = ncol(ds$B))
|
||||
plot(dr.linesearch, main = "CVE M5 linesearch")
|
||||
|
||||
}
|
||||
\references{
|
||||
Fertl L., Bura E. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019
|
||||
}
|
||||
\seealso{
|
||||
\code{\link{formula}}. For a complete parameters list (dependent on
|
||||
the method) see \code{\link{cve_simple}}, \code{\link{cve_sgd}}
|
||||
For a detailed description of the formula parameter see
|
||||
[\code{\link{formula}}].
|
||||
}
|
||||
|
|
|
@ -37,19 +37,21 @@ The general model is given by:
|
|||
The data follows \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} for a subspace
|
||||
dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points.
|
||||
The link function \eqn{g} is given as
|
||||
\deqn{g(x) = \frac{x_1}{0.5 + (x_2 + 1.5)^2} + 0.5\epsilon}{g(x) = x_1 / (0.5 + (x_2 + 1.5)^2) + 0.5 epsilon}
|
||||
\deqn{g(x) = \frac{x_1}{0.5 + (x_2 + 1.5)^2} + \epsilon / 2}{%
|
||||
g(x) = x_1 / (0.5 + (x_2 + 1.5)^2) + epsilon / 2}
|
||||
}
|
||||
|
||||
\section{M2}{
|
||||
|
||||
\eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} with \eqn{k = 2} with a default of \eqn{n = 200} data points.
|
||||
\eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} with \eqn{k = 2} with a
|
||||
default of \eqn{n = 200} data points.
|
||||
The link function \eqn{g} is given as
|
||||
\deqn{g(x) = x_1 x_2^2 + 0.5\epsilon}{g(x) = x_1 x_2^2 + 0.5 epsilon}
|
||||
\deqn{g(x) = (b_1^T X) (b_2^T X)^2 + \epsilon / 2}
|
||||
}
|
||||
|
||||
\section{M3}{
|
||||
|
||||
TODO:
|
||||
\deqn{g(x) = cos(b_1^T X) + \epsilon / 2}
|
||||
}
|
||||
|
||||
\section{M4}{
|
||||
|
|
|
@ -2,26 +2,25 @@
|
|||
% Please edit documentation in R/estimateBandwidth.R
|
||||
\name{estimate.bandwidth}
|
||||
\alias{estimate.bandwidth}
|
||||
\title{Estimated bandwidth for CVE.}
|
||||
\title{Bandwidth estimation for CVE.}
|
||||
\usage{
|
||||
estimate.bandwidth(X, k, nObs)
|
||||
}
|
||||
\arguments{
|
||||
\item{X}{data matrix of dimension (n x p) with n data points X_i of dimension
|
||||
q. Therefor each row represents a datapoint of dimension p.}
|
||||
\item{X}{data matrix with samples in its rows.}
|
||||
|
||||
\item{k}{Guess for rank(B).}
|
||||
\item{k}{Dimension of lower dimensional projection.}
|
||||
|
||||
\item{nObs}{Ether numeric of a function. If specified as numeric value
|
||||
its used in the computation of the bandwidth directly. If its a function
|
||||
`nObs` is evaluated as \code{nObs(nrow(x))}. The default behaviou if not
|
||||
supplied at all is to use \code{nObs <- nrow(x)^0.5}.}
|
||||
\item{nObs}{Expected number of points in a slice, see paper.}
|
||||
}
|
||||
\description{
|
||||
Estimates a propper bandwidth \code{h} according
|
||||
\deqn{%
|
||||
h = \chi_{p-q}^{-1}\left(\frac{nObs - 1}{n-1}\right)\frac{2 tr(\Sigma)}{p}}{%
|
||||
h = qchisq( (nObs - 1)/(n - 1), p - q ) 2 tr(Sigma) / p}
|
||||
h = \chi_{k}^{-1}\left(\frac{nObs - 1}{n-1}\right)\frac{2 tr(\Sigma)}{p}}{%
|
||||
h = qchisq( (nObs - 1)/(n - 1), k ) * (2 tr(\Sigma) / p)}
|
||||
with \eqn{n} the number of sample and \eqn{p} its dimension
|
||||
(\code{n <- nrow(X); p <- ncol(X)}) and the covariance-matrix \eqn{\Sigma}
|
||||
which is given by the standard maximum likelihood estimate.
|
||||
}
|
||||
\seealso{
|
||||
[\code{\link{qchisq}}]
|
||||
|
|
|
@ -2,27 +2,20 @@
|
|||
% Please edit documentation in R/CVE.R
|
||||
\name{plot.cve}
|
||||
\alias{plot.cve}
|
||||
\title{Ploting helper for objects of class \code{cve}.}
|
||||
\title{Creates a kink plot of the sample loss distribution over SDR dimensions.}
|
||||
\usage{
|
||||
## S3 method for class 'cve'
|
||||
plot(x, content = "history", ...)
|
||||
\method{plot}{cve}(x, ...)
|
||||
}
|
||||
\arguments{
|
||||
\item{x}{Object of class \code{cve} (result of [cve()]).}
|
||||
\item{x}{Object of class \code{"cve"} (result of [\code{\link{cve}}]).}
|
||||
|
||||
\item{...}{Pass through parameters to [plot()] and [lines()]}
|
||||
|
||||
\item{content}{Specifies what to plot:
|
||||
\itemize{
|
||||
\item "history" Plots the loss history from stiefel optimization
|
||||
(default).
|
||||
\item ... TODO: add (if there are any)
|
||||
}}
|
||||
\item{...}{Pass through parameters to [\code{\link{plot}}] and
|
||||
[\code{\link{lines}}]}
|
||||
}
|
||||
\description{
|
||||
Ploting helper for objects of class \code{cve}.
|
||||
Creates a kink plot of the sample loss distribution over SDR dimensions.
|
||||
}
|
||||
\seealso{
|
||||
see \code{\link{par}} for graphical parameters to pass through
|
||||
as well as \code{\link{plot}} for standard plot utility.
|
||||
as well as \code{\link{plot}}, the standard plot utility.
|
||||
}
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
\usepackage[utf8]{inputenc}
|
||||
\usepackage[T1]{fontenc}
|
||||
\usepackage{amsmath, amsfonts, amssymb, amsthm}
|
||||
\usepackage{tikz}
|
||||
\usepackage{fullpage}
|
||||
|
||||
\newcommand{\vecl}{\ensuremath{\operatorname{vec}_l}}
|
||||
|
@ -43,4 +44,18 @@ The relation between the matrix indices $i,j$ and the $\vecl$ index $k$ is given
|
|||
(\vecl(S)_k = s_{i,j} \quad\Leftrightarrow\quad k = jn+i) : j \in \{0,...,n-2\} \land j < i < n.
|
||||
\end{displaymath}
|
||||
|
||||
\begin{center}
|
||||
\begin{tikzpicture}[xscale=1,yscale=-1]
|
||||
% \foreach \i in {0,...,5} {
|
||||
% \node at ({mod(\i, 3)}, {int(\i / 3)}) {$\i$};
|
||||
% }
|
||||
\foreach \i in {1,...,4} {
|
||||
\foreach \j in {1,...,\i} {
|
||||
\node at (\j, \i) {$\i,\j$};
|
||||
}
|
||||
}
|
||||
|
||||
\end{tikzpicture}
|
||||
\end{center}
|
||||
|
||||
\end{document}
|
114
benchmark.R
114
benchmark.R
|
@ -13,7 +13,29 @@ rowSums.c <- function(M) {
|
|||
M <- matrix(as.double(M), nrow = nrow(M))
|
||||
}
|
||||
|
||||
.Call('R_rowSums', PACKAGE = 'wip', M)
|
||||
.Call('R_rowSums', PACKAGE = 'benchmark', M)
|
||||
}
|
||||
rowSumsV2.c <- function(M) {
|
||||
stopifnot(
|
||||
is.matrix(M),
|
||||
is.numeric(M)
|
||||
)
|
||||
if (!is.double(M)) {
|
||||
M <- matrix(as.double(M), nrow = nrow(M))
|
||||
}
|
||||
|
||||
.Call('R_rowSumsV2', PACKAGE = 'benchmark', M)
|
||||
}
|
||||
rowSumsV3.c <- function(M) {
|
||||
stopifnot(
|
||||
is.matrix(M),
|
||||
is.numeric(M)
|
||||
)
|
||||
if (!is.double(M)) {
|
||||
M <- matrix(as.double(M), nrow = nrow(M))
|
||||
}
|
||||
|
||||
.Call('R_rowSumsV3', PACKAGE = 'benchmark', M)
|
||||
}
|
||||
colSums.c <- function(M) {
|
||||
stopifnot(
|
||||
|
@ -24,7 +46,7 @@ colSums.c <- function(M) {
|
|||
M <- matrix(as.double(M), nrow = nrow(M))
|
||||
}
|
||||
|
||||
.Call('R_colSums', PACKAGE = 'wip', M)
|
||||
.Call('R_colSums', PACKAGE = 'benchmark', M)
|
||||
}
|
||||
rowSquareSums.c <- function(M) {
|
||||
stopifnot(
|
||||
|
@ -35,7 +57,7 @@ rowSquareSums.c <- function(M) {
|
|||
M <- matrix(as.double(M), nrow = nrow(M))
|
||||
}
|
||||
|
||||
.Call('R_rowSquareSums', PACKAGE = 'wip', M)
|
||||
.Call('R_rowSquareSums', PACKAGE = 'benchmark', M)
|
||||
}
|
||||
rowSumsSymVec.c <- function(vecA, nrow, diag = 0.0) {
|
||||
stopifnot(
|
||||
|
@ -47,7 +69,7 @@ rowSumsSymVec.c <- function(vecA, nrow, diag = 0.0) {
|
|||
if (!is.double(vecA)) {
|
||||
vecA <- as.double(vecA)
|
||||
}
|
||||
.Call('R_rowSumsSymVec', PACKAGE = 'wip',
|
||||
.Call('R_rowSumsSymVec', PACKAGE = 'benchmark',
|
||||
vecA, as.integer(nrow), as.double(diag))
|
||||
}
|
||||
rowSweep.c <- function(A, v, op = '-') {
|
||||
|
@ -66,7 +88,7 @@ rowSweep.c <- function(A, v, op = '-') {
|
|||
op %in% c('+', '-', '*', '/')
|
||||
)
|
||||
|
||||
.Call('R_rowSweep', PACKAGE = 'wip', A, v, op)
|
||||
.Call('R_rowSweep', PACKAGE = 'benchmark', A, v, op)
|
||||
}
|
||||
|
||||
## row*, col* tests ------------------------------------------------------------
|
||||
|
@ -74,7 +96,15 @@ n <- 3000
|
|||
M <- matrix(runif(n * 12), n, 12)
|
||||
stopifnot(
|
||||
all.equal(rowSums(M^2), rowSums.c(M^2)),
|
||||
all.equal(colSums(M), colSums.c(M))
|
||||
all.equal(colSums(M), colSums.c(M)),
|
||||
all.equal(rowSums(M), rowSumsV2.c(M)),
|
||||
all.equal(rowSums(M), rowSumsV3.c(M))
|
||||
)
|
||||
microbenchmark(
|
||||
rowSums = rowSums(M),
|
||||
rowSums.c = rowSums.c(M),
|
||||
rowSumsV2.c = rowSumsV2.c(M),
|
||||
rowSumsV3.c = rowSumsV3.c(M)
|
||||
)
|
||||
microbenchmark(
|
||||
rowSums = rowSums(M^2),
|
||||
|
@ -126,7 +156,23 @@ transpose.c <- function(A) {
|
|||
A <- matrix(as.double(A), nrow(A), ncol(A))
|
||||
}
|
||||
|
||||
.Call('R_transpose', PACKAGE = 'wip', A)
|
||||
.Call('R_transpose', PACKAGE = 'benchmark', A)
|
||||
}
|
||||
|
||||
sympMV.c <- function(vecA, x) {
|
||||
stopifnot(
|
||||
is.vector(vecA), is.numeric(vecA),
|
||||
is.vector(x), is.numeric(x),
|
||||
length(x) * (length(x) + 1) == 2 * length(vecA)
|
||||
)
|
||||
if (!is.double(vecA)) {
|
||||
vecA <- as.double(vecA)
|
||||
}
|
||||
if (!is.double(x)) {
|
||||
x <- as.double(x)
|
||||
}
|
||||
|
||||
.Call('R_sympMV', PACKAGE = 'benchmark', vecA, x)
|
||||
}
|
||||
|
||||
matrixprod.c <- function(A, B) {
|
||||
|
@ -142,7 +188,7 @@ matrixprod.c <- function(A, B) {
|
|||
B <- matrix(as.double(B), nrow = nrow(B))
|
||||
}
|
||||
|
||||
.Call('R_matrixprod', PACKAGE = 'wip', A, B)
|
||||
.Call('R_matrixprod', PACKAGE = 'benchmark', A, B)
|
||||
}
|
||||
crossprod.c <- function(A, B) {
|
||||
stopifnot(
|
||||
|
@ -157,7 +203,7 @@ crossprod.c <- function(A, B) {
|
|||
B <- matrix(as.double(B), nrow = nrow(B))
|
||||
}
|
||||
|
||||
.Call('R_crossprod', PACKAGE = 'wip', A, B)
|
||||
.Call('R_crossprod', PACKAGE = 'benchmark', A, B)
|
||||
}
|
||||
skewSymRank2k.c <- function(A, B, alpha = 1, beta = 0) {
|
||||
stopifnot(
|
||||
|
@ -174,7 +220,7 @@ skewSymRank2k.c <- function(A, B, alpha = 1, beta = 0) {
|
|||
B <- matrix(as.double(B), nrow = nrow(B))
|
||||
}
|
||||
|
||||
.Call('R_skewSymRank2k', PACKAGE = 'wip', A, B,
|
||||
.Call('R_skewSymRank2k', PACKAGE = 'benchmark', A, B,
|
||||
as.double(alpha), as.double(beta))
|
||||
}
|
||||
|
||||
|
@ -193,6 +239,18 @@ microbenchmark(
|
|||
transpose.c(A)
|
||||
)
|
||||
|
||||
Sym <- tcrossprod(runif(n))
|
||||
vecSym <- Sym[lower.tri(Sym, diag = T)]
|
||||
x <- runif(n)
|
||||
stopifnot(all.equal(
|
||||
as.double(Sym %*% x),
|
||||
sympMV.c(vecSym, x)
|
||||
))
|
||||
microbenchmark(
|
||||
Sym %*% x,
|
||||
sympMV.c = sympMV.c(vecSym, x)
|
||||
)
|
||||
|
||||
stopifnot(
|
||||
all.equal(A %*% B, matrixprod.c(A, B))
|
||||
)
|
||||
|
@ -232,7 +290,7 @@ nullProj.c <- function(B) {
|
|||
B <- matrix(as.double(B), nrow = nrow(B))
|
||||
}
|
||||
|
||||
.Call('R_nullProj', PACKAGE = 'wip', B)
|
||||
.Call('R_nullProj', PACKAGE = 'benchmark', B)
|
||||
}
|
||||
## Orthogonal projection onto null space tests --------------------------------
|
||||
p <- 12
|
||||
|
@ -252,7 +310,7 @@ microbenchmark(
|
|||
|
||||
# ## WIP for gradient. ----------------------------------------------------------
|
||||
|
||||
gradient.c <- function(X, X_diff, Y, V, h) {
|
||||
grad.c <- function(X, X_diff, Y, V, h) {
|
||||
stopifnot(
|
||||
is.matrix(X), is.double(X),
|
||||
is.matrix(X_diff), is.double(X_diff),
|
||||
|
@ -264,7 +322,7 @@ gradient.c <- function(X, X_diff, Y, V, h) {
|
|||
is.vector(h), is.numeric(h), length(h) == 1
|
||||
)
|
||||
|
||||
.Call('R_gradient', PACKAGE = 'wip',
|
||||
.Call('R_grad', PACKAGE = 'benchmark',
|
||||
X, X_diff, as.double(Y), V, as.double(h));
|
||||
}
|
||||
|
||||
|
@ -294,25 +352,23 @@ grad <- function(X, Y, V, h, persistent = TRUE) {
|
|||
# Vectorized distance matrix `D`.
|
||||
vecD <- rowSums((X_diff %*% Q)^2)
|
||||
|
||||
|
||||
# Weight matrix `W` (dnorm ... gaussean density function)
|
||||
W <- matrix(1, n, n) # `exp(0) == 1`
|
||||
W[lower] <- exp((-0.5 / h) * vecD^2) # Set lower tri. part
|
||||
W[upper] <- t.default(W)[upper] # Mirror lower tri. to upper
|
||||
W <- sweep(W, 2, colSums(W), FUN = `/`) # Col-Normalize
|
||||
# Create Kernel matrix (aka. apply kernel to distances)
|
||||
K <- matrix(1, n, n) # `exp(0) == 1`
|
||||
K[lower] <- exp((-0.5 / h) * vecD^2) # Set lower tri. part
|
||||
K[upper] <- t(K)[upper] # Mirror lower tri. to upper
|
||||
|
||||
# Weighted `Y` momentums
|
||||
y1 <- Y %*% W # Result is 1D -> transposition irrelevant
|
||||
y2 <- Y^2 %*% W
|
||||
|
||||
colSumsK <- colSums(K)
|
||||
y1 <- (K %*% Y) / colSumsK
|
||||
y2 <- (K %*% Y^2) / colSumsK
|
||||
# Per example loss `L(V, X_i)`
|
||||
L <- y2 - y1^2
|
||||
|
||||
# Vectorized Weights with forced symmetry
|
||||
vecS <- (L[i] - (Y[j] - y1[i])^2) * W[lower]
|
||||
vecS <- vecS + ((L[j] - (Y[i] - y1[j])^2) * W[upper])
|
||||
# Compute scaling of `X` row differences
|
||||
vecS <- vecS * vecD
|
||||
# Compute scaling vector `vecS` for `X_diff`.
|
||||
tmp <- kronecker(matrix(y1, n, 1), matrix(Y, 1, n), `-`)^2
|
||||
tmp <- as.vector(L) - tmp
|
||||
tmp <- tmp * K / colSumsK
|
||||
vecS <- (tmp + t(tmp))[lower] * vecD
|
||||
|
||||
G <- crossprod(X_diff, X_diff * vecS) %*% V
|
||||
G <- (-2 / (n * h^2)) * G
|
||||
|
@ -340,9 +396,9 @@ X_diff <- X[i, , drop = F] - X[j, , drop = F]
|
|||
|
||||
stopifnot(all.equal(
|
||||
grad(X, Y, V, h),
|
||||
gradient.c(X, X_diff, Y, V, h)
|
||||
grad.c(X, X_diff, Y, V, h)
|
||||
))
|
||||
microbenchmark(
|
||||
grad = grad(X, Y, V, h),
|
||||
gradient.c = gradient.c(X, X_diff, Y, V, h)
|
||||
grad.c = grad.c(X, X_diff, Y, V, h)
|
||||
)
|
||||
|
|
191
benchmark.c
191
benchmark.c
|
@ -6,9 +6,59 @@
|
|||
#include <R_ext/Error.h>
|
||||
// #include <Rmath.h>
|
||||
|
||||
#include "wip.h"
|
||||
#include "benchmark.h"
|
||||
|
||||
static inline void rowSums(const double *A,
|
||||
void rowSums(const double *A,
|
||||
const int nrow, const int ncol,
|
||||
double *sum) {
|
||||
int i, j, block_size, block_size_i;
|
||||
const double *A_block = A;
|
||||
const double *A_end = A + nrow * ncol;
|
||||
|
||||
if (nrow > 508) {
|
||||
block_size = 508;
|
||||
} else {
|
||||
block_size = nrow;
|
||||
}
|
||||
|
||||
// Iterate `(block_size_i, ncol)` submatrix blocks.
|
||||
for (i = 0; i < nrow; i += block_size_i) {
|
||||
// Reset `A` to new block beginning.
|
||||
A = A_block;
|
||||
// Take block size of eveything left and reduce to max size.
|
||||
block_size_i = nrow - i;
|
||||
if (block_size_i > block_size) {
|
||||
block_size_i = block_size;
|
||||
}
|
||||
// Copy blocks first column.
|
||||
for (j = 0; j < block_size_i; j += 4) {
|
||||
sum[j] = A[j];
|
||||
sum[j + 1] = A[j + 1];
|
||||
sum[j + 2] = A[j + 2];
|
||||
sum[j + 3] = A[j + 3];
|
||||
}
|
||||
for (; j < block_size_i; ++j) {
|
||||
sum[j] = A[j];
|
||||
}
|
||||
// Sum following columns to the first one.
|
||||
for (A += nrow; A < A_end; A += nrow) {
|
||||
for (j = 0; j < block_size_i; j += 4) {
|
||||
sum[j] += A[j];
|
||||
sum[j + 1] += A[j + 1];
|
||||
sum[j + 2] += A[j + 2];
|
||||
sum[j + 3] += A[j + 3];
|
||||
}
|
||||
for (; j < block_size_i; ++j) {
|
||||
sum[j] += A[j];
|
||||
}
|
||||
}
|
||||
// Step one block forth.
|
||||
A_block += block_size_i;
|
||||
sum += block_size_i;
|
||||
}
|
||||
}
|
||||
|
||||
void rowSumsV2(const double *A,
|
||||
const int nrow, const int ncol,
|
||||
double *sum) {
|
||||
int i, j, block_size, block_size_i;
|
||||
|
@ -45,33 +95,54 @@ static inline void rowSums(const double *A,
|
|||
sum += block_size_i;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void colSums(const double *A,
|
||||
void rowSumsV3(const double *A,
|
||||
const int nrow, const int ncol,
|
||||
double *sum) {
|
||||
int j;
|
||||
double *sum_end = sum + ncol;
|
||||
int i, onei = 1;
|
||||
double* ones = (double*)malloc(ncol * sizeof(double));
|
||||
const double one = 1.0;
|
||||
const double zero = 0.0;
|
||||
|
||||
memset(sum, 0, sizeof(double) * ncol);
|
||||
for (; sum < sum_end; ++sum) {
|
||||
for (j = 0; j < nrow; ++j) {
|
||||
*sum += A[j];
|
||||
for (i = 0; i < ncol; ++i) {
|
||||
ones[i] = 1.0;
|
||||
}
|
||||
|
||||
matrixprod(A, nrow, ncol, ones, ncol, 1, sum);
|
||||
free(ones);
|
||||
}
|
||||
|
||||
void colSums(const double *A, const int nrow, const int ncol,
|
||||
double *sums) {
|
||||
int i, j, nrowb = 4 * (nrow / 4); // 4 dividable nrow block, biggest 4*k <= nrow.
|
||||
double sum;
|
||||
|
||||
for (j = 0; j < ncol; ++j) {
|
||||
sum = 0.0;
|
||||
for (i = 0; i < nrowb; i += 4) {
|
||||
sum += A[i]
|
||||
+ A[i + 1]
|
||||
+ A[i + 2]
|
||||
+ A[i + 3];
|
||||
}
|
||||
for (; i < nrow; ++i) {
|
||||
sum += A[i];
|
||||
}
|
||||
*(sums++) = sum;
|
||||
A += nrow;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void rowSquareSums(const double *A,
|
||||
void rowSquareSums(const double *A,
|
||||
const int nrow, const int ncol,
|
||||
double *sum) {
|
||||
int i, j, block_size, block_size_i;
|
||||
const double *A_block = A;
|
||||
const double *A_end = A + nrow * ncol;
|
||||
|
||||
if (nrow < CVE_MEM_CHUNK_SIZE) {
|
||||
block_size = nrow;
|
||||
if (nrow > 508) {
|
||||
block_size = 508;
|
||||
} else {
|
||||
block_size = CVE_MEM_CHUNK_SIZE;
|
||||
block_size = nrow;
|
||||
}
|
||||
|
||||
// Iterate `(block_size_i, ncol)` submatrix blocks.
|
||||
|
@ -80,16 +151,29 @@ static inline void rowSquareSums(const double *A,
|
|||
A = A_block;
|
||||
// Take block size of eveything left and reduce to max size.
|
||||
block_size_i = nrow - i;
|
||||
if (block_size_i > block_size) {
|
||||
if (block_size_i > block_size) { // TODO: contains BUG!!! floor last one !!!
|
||||
block_size_i = block_size;
|
||||
} /// ...
|
||||
// TODO:
|
||||
// Copy blocks first column.
|
||||
for (j = 0; j < block_size_i; j += 4) {
|
||||
sum[j] = A[j] * A[j];
|
||||
sum[j + 1] = A[j + 1] * A[j + 1];
|
||||
sum[j + 2] = A[j + 2] * A[j + 2];
|
||||
sum[j + 3] = A[j + 3] * A[j + 3];
|
||||
}
|
||||
// Compute first blocks column,
|
||||
for (j = 0; j < block_size_i; ++j) {
|
||||
for (; j < block_size_i; ++j) {
|
||||
sum[j] = A[j] * A[j];
|
||||
}
|
||||
// and sum the following columns to the first one.
|
||||
// Sum following columns to the first one.
|
||||
for (A += nrow; A < A_end; A += nrow) {
|
||||
for (j = 0; j < block_size_i; ++j) {
|
||||
for (j = 0; j < block_size_i; j += 4) {
|
||||
sum[j] += A[j] * A[j];
|
||||
sum[j + 1] += A[j + 1] * A[j + 1];
|
||||
sum[j + 2] += A[j + 2] * A[j + 2];
|
||||
sum[j + 3] += A[j + 3] * A[j + 3];
|
||||
}
|
||||
for (; j < block_size_i; ++j) {
|
||||
sum[j] += A[j] * A[j];
|
||||
}
|
||||
}
|
||||
|
@ -99,7 +183,7 @@ static inline void rowSquareSums(const double *A,
|
|||
}
|
||||
}
|
||||
|
||||
static inline void rowSumsSymVec(const double *Avec, const int nrow,
|
||||
void rowSumsSymVec(const double *Avec, const int nrow,
|
||||
const double diag,
|
||||
double *sum) {
|
||||
int i, j;
|
||||
|
@ -121,7 +205,7 @@ static inline void rowSumsSymVec(const double *Avec, const int nrow,
|
|||
}
|
||||
|
||||
/* C[, j] = A[, j] * v for each j = 1 to ncol */
|
||||
static void rowSweep(const double *A, const int nrow, const int ncol,
|
||||
void rowSweep(const double *A, const int nrow, const int ncol,
|
||||
const char* op,
|
||||
const double *v, // vector of length nrow
|
||||
double *C) {
|
||||
|
@ -241,7 +325,20 @@ void transpose(const double *A, const int nrow, const int ncol, double* T) {
|
|||
}
|
||||
}
|
||||
|
||||
static inline void matrixprod(const double *A, const int nrowA, const int ncolA,
|
||||
// Symmetric Packed matrix vector product.
|
||||
// Computes
|
||||
// y <- Ax
|
||||
// where A is supplied as packed lower triangular part of a symmetric
|
||||
// matrix A. Meaning that `vecA` is `vec_ltri(A)`.
|
||||
void sympMV(const double* vecA, const int nrow, const double* x, double* y) {
|
||||
double one = 1.0;
|
||||
double zero = 0.0;
|
||||
int onei = 1;
|
||||
|
||||
F77_NAME(dspmv)("L", &nrow, &one, vecA, x, &onei, &zero, y, &onei);
|
||||
}
|
||||
|
||||
void matrixprod(const double *A, const int nrowA, const int ncolA,
|
||||
const double *B, const int nrowB, const int ncolB,
|
||||
double *C) {
|
||||
const double one = 1.0;
|
||||
|
@ -254,7 +351,7 @@ static inline void matrixprod(const double *A, const int nrowA, const int ncolA,
|
|||
&zero, C, &nrowA);
|
||||
}
|
||||
|
||||
static inline void crossprod(const double *A, const int nrowA, const int ncolA,
|
||||
void crossprod(const double *A, const int nrowA, const int ncolA,
|
||||
const double *B, const int nrowB, const int ncolB,
|
||||
double *C) {
|
||||
const double one = 1.0;
|
||||
|
@ -267,7 +364,7 @@ static inline void crossprod(const double *A, const int nrowA, const int ncolA,
|
|||
&zero, C, &ncolA);
|
||||
}
|
||||
|
||||
static inline void nullProj(const double *B, const int nrowB, const int ncolB,
|
||||
void nullProj(const double *B, const int nrowB, const int ncolB,
|
||||
double *Q) {
|
||||
const double minusOne = -1.0;
|
||||
const double one = 1.0;
|
||||
|
@ -286,7 +383,7 @@ static inline void nullProj(const double *B, const int nrowB, const int ncolB,
|
|||
&one, Q, &nrowB);
|
||||
}
|
||||
|
||||
static inline void rangePairs(const int from, const int to, int *pairs) {
|
||||
void rangePairs(const int from, const int to, int *pairs) {
|
||||
int i, j;
|
||||
for (i = from; i < to; ++i) {
|
||||
for (j = i + 1; j < to; ++j) {
|
||||
|
@ -300,7 +397,7 @@ static inline void rangePairs(const int from, const int to, int *pairs) {
|
|||
// A dence skwe-symmetric rank 2 update.
|
||||
// Perform the update
|
||||
// C := alpha (A * B^T - B * A^T) + beta C
|
||||
static void skewSymRank2k(const int nrow, const int ncol,
|
||||
void skewSymRank2k(const int nrow, const int ncol,
|
||||
double alpha, const double *A, const double *B,
|
||||
double beta,
|
||||
double *C) {
|
||||
|
@ -308,7 +405,6 @@ static void skewSymRank2k(const int nrow, const int ncol,
|
|||
&nrow, &nrow, &ncol,
|
||||
&alpha, A, &nrow, B, &nrow,
|
||||
&beta, C, &nrow);
|
||||
|
||||
alpha *= -1.0;
|
||||
beta = 1.0;
|
||||
F77_NAME(dgemm)("N", "T",
|
||||
|
@ -316,32 +412,41 @@ static void skewSymRank2k(const int nrow, const int ncol,
|
|||
&alpha, B, &nrow, A, &nrow,
|
||||
&beta, C, &nrow);
|
||||
}
|
||||
// TODO: clarify
|
||||
static inline double gaussKernel(const double x, const double scale) {
|
||||
return exp(scale * x * x);
|
||||
}
|
||||
|
||||
// TODO: mutch potential for optimization!!!
|
||||
static inline void weightedYandLoss(const int n,
|
||||
static void weightedYandLoss(const int n,
|
||||
const double *Y,
|
||||
const double *vecD,
|
||||
const double *vecW,
|
||||
const double *colSums,
|
||||
double *y1, double *L, double *vecS,
|
||||
double *const loss) {
|
||||
double *loss) {
|
||||
int i, j, k, N = n * (n - 1) / 2;
|
||||
double l;
|
||||
|
||||
for (i = 0; i < n; ++i) {
|
||||
y1[i] = Y[i] / colSums[i];
|
||||
L[i] = Y[i] * Y[i] / colSums[i];
|
||||
y1[i] = Y[i];
|
||||
L[i] = Y[i] * Y[i];
|
||||
}
|
||||
|
||||
for (k = j = 0; j < n; ++j) {
|
||||
for (i = j + 1; i < n; ++i, ++k) {
|
||||
y1[i] += Y[j] * vecW[k] / colSums[i];
|
||||
y1[j] += Y[i] * vecW[k] / colSums[j];
|
||||
L[i] += Y[j] * Y[j] * vecW[k] / colSums[i];
|
||||
L[j] += Y[i] * Y[i] * vecW[k] / colSums[j];
|
||||
y1[i] += Y[j] * vecW[k];
|
||||
y1[j] += Y[i] * vecW[k];
|
||||
L[i] += Y[j] * Y[j] * vecW[k];
|
||||
L[j] += Y[i] * Y[i] * vecW[k];
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < n; ++i) {
|
||||
y1[i] /= colSums[i];
|
||||
L[i] /= colSums[i];
|
||||
}
|
||||
|
||||
l = 0.0;
|
||||
for (i = 0; i < n; ++i) {
|
||||
l += (L[i] -= y1[i] * y1[i]);
|
||||
|
@ -362,17 +467,13 @@ static inline void weightedYandLoss(const int n,
|
|||
}
|
||||
}
|
||||
|
||||
inline double gaussKernel(const double x, const double scale) {
|
||||
return exp(scale * x * x);
|
||||
}
|
||||
|
||||
static void gradient(const int n, const int p, const int q,
|
||||
void grad(const int n, const int p, const int q,
|
||||
const double *X,
|
||||
const double *X_diff,
|
||||
const double *Y,
|
||||
const double *V,
|
||||
const double h,
|
||||
double *G, double *const loss) {
|
||||
double *G, double *loss) {
|
||||
// Number of X_i to X_j not trivial pairs.
|
||||
int i, N = (n * (n - 1)) / 2;
|
||||
double scale = -0.5 / h;
|
||||
|
@ -398,12 +499,12 @@ static void gradient(const int n, const int p, const int q,
|
|||
rowSquareSums(X_proj, N, p, vecD);
|
||||
|
||||
// Apply kernel to distence vector for weights computation.
|
||||
double *vecW = X_proj; // reuse memory area, no longer needed.
|
||||
double *vecK = X_proj; // reuse memory area, no longer needed.
|
||||
for (i = 0; i < N; ++i) {
|
||||
vecW[i] = gaussKernel(vecD[i], scale);
|
||||
vecK[i] = gaussKernel(vecD[i], scale);
|
||||
}
|
||||
double *colSums = X_proj + N; // still allocated!
|
||||
rowSumsSymVec(vecW, n, 1.0, colSums); // rowSums = colSums cause Sym
|
||||
rowSumsSymVec(vecK, n, 1.0, colSums); // rowSums = colSums cause Sym
|
||||
|
||||
// compute weighted responces of first end second momontum, aka y1, y2.
|
||||
double *y1 = X_proj + N + n;
|
||||
|
@ -411,7 +512,7 @@ static void gradient(const int n, const int p, const int q,
|
|||
// Allocate X_diff scaling vector `vecS`, not in `X_proj` mem area because
|
||||
// used symultanious to X_proj in final gradient computation.
|
||||
double *vecS = (double*)malloc(N * sizeof(double));
|
||||
weightedYandLoss(n, Y, vecD, vecW, colSums, y1, L, vecS, loss);
|
||||
weightedYandLoss(n, Y, vecD, vecK, colSums, y1, L, vecS, loss);
|
||||
|
||||
// compute the gradient using X_proj for intermidiate scaled X_diff.
|
||||
rowSweep(X_diff, N, p, "*", vecS, X_proj);
|
||||
|
|
58
benchmark.h
58
benchmark.h
|
@ -6,7 +6,7 @@
|
|||
#define CVE_MEM_CHUNK_SMALL 1016
|
||||
#define CVE_MEM_CHUNK_SIZE 2032
|
||||
|
||||
static inline void rowSums(const double *A,
|
||||
void rowSums(const double *A,
|
||||
const int nrow, const int ncol,
|
||||
double *sum);
|
||||
SEXP R_rowSums(SEXP A) {
|
||||
|
@ -17,8 +17,30 @@ SEXP R_rowSums(SEXP A) {
|
|||
UNPROTECT(1);
|
||||
return sums;
|
||||
}
|
||||
void rowSumsV2(const double *A,
|
||||
const int nrow, const int ncol,
|
||||
double *sum);
|
||||
SEXP R_rowSumsV2(SEXP A) {
|
||||
SEXP sums = PROTECT(allocVector(REALSXP, nrows(A)));
|
||||
|
||||
static inline void colSums(const double *A,
|
||||
rowSumsV2(REAL(A), nrows(A), ncols(A), REAL(sums));
|
||||
|
||||
UNPROTECT(1);
|
||||
return sums;
|
||||
}
|
||||
void rowSumsV3(const double *A,
|
||||
const int nrow, const int ncol,
|
||||
double *sum);
|
||||
SEXP R_rowSumsV3(SEXP A) {
|
||||
SEXP sums = PROTECT(allocVector(REALSXP, nrows(A)));
|
||||
|
||||
rowSumsV3(REAL(A), nrows(A), ncols(A), REAL(sums));
|
||||
|
||||
UNPROTECT(1);
|
||||
return sums;
|
||||
}
|
||||
|
||||
void colSums(const double *A,
|
||||
const int nrow, const int ncol,
|
||||
double *sum);
|
||||
SEXP R_colSums(SEXP A) {
|
||||
|
@ -30,7 +52,7 @@ SEXP R_colSums(SEXP A) {
|
|||
return sums;
|
||||
}
|
||||
|
||||
static inline void rowSquareSums(const double*, const int, const int, double*);
|
||||
void rowSquareSums(const double*, const int, const int, double*);
|
||||
SEXP R_rowSquareSums(SEXP A) {
|
||||
SEXP result = PROTECT(allocVector(REALSXP, nrows(A)));
|
||||
|
||||
|
@ -40,7 +62,7 @@ SEXP R_rowSquareSums(SEXP A) {
|
|||
return result;
|
||||
}
|
||||
|
||||
static inline void rowSumsSymVec(const double *Avec, const int nrow,
|
||||
void rowSumsSymVec(const double *Avec, const int nrow,
|
||||
const double diag,
|
||||
double *sum);
|
||||
SEXP R_rowSumsSymVec(SEXP Avec, SEXP nrow, SEXP diag) {
|
||||
|
@ -52,7 +74,7 @@ SEXP R_rowSumsSymVec(SEXP Avec, SEXP nrow, SEXP diag) {
|
|||
return sum;
|
||||
}
|
||||
|
||||
static void rowSweep(const double *A, const int nrow, const int ncol,
|
||||
void rowSweep(const double *A, const int nrow, const int ncol,
|
||||
const char* op,
|
||||
const double *v, // vector of length nrow
|
||||
double *C);
|
||||
|
@ -77,7 +99,17 @@ SEXP R_transpose(SEXP A) {
|
|||
return T;
|
||||
}
|
||||
|
||||
static inline void matrixprod(const double *A, const int nrowA, const int ncolA,
|
||||
void sympMV(const double* vecA, const int nrow, const double* x, double* y);
|
||||
SEXP R_sympMV(SEXP vecA, SEXP x) {
|
||||
SEXP y = PROTECT(allocVector(REALSXP, length(x)));
|
||||
|
||||
sympMV(REAL(vecA), length(x), REAL(x), REAL(y));
|
||||
|
||||
UNPROTECT(1); /* y */
|
||||
return y;
|
||||
}
|
||||
|
||||
void matrixprod(const double *A, const int nrowA, const int ncolA,
|
||||
const double *B, const int nrowB, const int ncolB,
|
||||
double *C);
|
||||
SEXP R_matrixprod(SEXP A, SEXP B) {
|
||||
|
@ -91,7 +123,7 @@ SEXP R_matrixprod(SEXP A, SEXP B) {
|
|||
return C;
|
||||
}
|
||||
|
||||
static inline void crossprod(const double* A, const int nrowA, const int ncolA,
|
||||
void crossprod(const double* A, const int nrowA, const int ncolA,
|
||||
const double* B, const int ncolB, const int nrowB,
|
||||
double* C);
|
||||
SEXP R_crossprod(SEXP A, SEXP B) {
|
||||
|
@ -105,7 +137,7 @@ SEXP R_crossprod(SEXP A, SEXP B) {
|
|||
return C;
|
||||
}
|
||||
|
||||
static void skewSymRank2k(const int n, const int k,
|
||||
void skewSymRank2k(const int n, const int k,
|
||||
double alpha, const double *A, const double *B,
|
||||
double beta,
|
||||
double *C);
|
||||
|
@ -121,7 +153,7 @@ SEXP R_skewSymRank2k(SEXP A, SEXP B, SEXP alpha, SEXP beta) {
|
|||
return C;
|
||||
}
|
||||
|
||||
static inline void nullProj(const double* B, const int nrowB, const int ncolB,
|
||||
void nullProj(const double* B, const int nrowB, const int ncolB,
|
||||
double* Q);
|
||||
SEXP R_nullProj(SEXP B) {
|
||||
SEXP Q = PROTECT(allocMatrix(REALSXP, nrows(B), nrows(B)));
|
||||
|
@ -132,7 +164,7 @@ SEXP R_nullProj(SEXP B) {
|
|||
return Q;
|
||||
}
|
||||
|
||||
static inline void rangePairs(const int from, const int to, int *pairs);
|
||||
void rangePairs(const int from, const int to, int *pairs);
|
||||
SEXP R_rangePairs(SEXP from, SEXP to) {
|
||||
int start = asInteger(from);
|
||||
int end = asInteger(to) + 1;
|
||||
|
@ -145,20 +177,20 @@ SEXP R_rangePairs(SEXP from, SEXP to) {
|
|||
return out;
|
||||
}
|
||||
|
||||
static void gradient(const int n, const int p, const int q,
|
||||
void grad(const int n, const int p, const int q,
|
||||
const double *X,
|
||||
const double *X_diff,
|
||||
const double *Y,
|
||||
const double *V,
|
||||
const double h,
|
||||
double *G, double *const loss);
|
||||
SEXP R_gradient(SEXP X, SEXP X_diff, SEXP Y, SEXP V, SEXP h) {
|
||||
SEXP R_grad(SEXP X, SEXP X_diff, SEXP Y, SEXP V, SEXP h) {
|
||||
int N = (nrows(X) * (nrows(X) - 1)) / 2;
|
||||
|
||||
SEXP G = PROTECT(allocMatrix(REALSXP, nrows(V), ncols(V)));
|
||||
SEXP loss = PROTECT(allocVector(REALSXP, 1));
|
||||
|
||||
gradient(nrows(X), ncols(X), ncols(V),
|
||||
grad(nrows(X), ncols(X), ncols(V),
|
||||
REAL(X), REAL(X_diff), REAL(Y), REAL(V), *REAL(h),
|
||||
REAL(G), REAL(loss));
|
||||
|
||||
|
|
|
@ -16,7 +16,7 @@ subspace.dist <- function(B1, B2){
|
|||
}
|
||||
|
||||
# Number of simulations
|
||||
SIM.NR <- 20
|
||||
SIM.NR <- 50
|
||||
# maximal number of iterations in curvilinear search algorithm
|
||||
MAXIT <- 50
|
||||
# number of arbitrary starting values for curvilinear optimization
|
||||
|
@ -24,7 +24,7 @@ ATTEMPTS <- 10
|
|||
# set names of datasets
|
||||
dataset.names <- c("M1", "M2", "M3", "M4", "M5")
|
||||
# Set used CVE method
|
||||
methods <- c("simple") # c("legacy", "simple", "sgd", "linesearch")
|
||||
methods <- c("simple") # c("legacy", "simple", "linesearch", "sgd")
|
||||
|
||||
if ("legacy" %in% methods) {
|
||||
# Source legacy code (but only if needed)
|
||||
|
@ -43,7 +43,9 @@ log.nr <- length(list.files('tmp/', pattern = '.*\\.log'))
|
|||
file <- file.path('tmp', paste0('test', log.nr, '.log'))
|
||||
cat('dataset\tmethod\terror\ttime\n', sep = '', file = file)
|
||||
# Open a new pdf device for plotting into (do not overwrite existing ones)
|
||||
pdf(file.path('tmp', paste0('test', log.nr, '.pdf')))
|
||||
path <- paste0('test', log.nr, '.pdf')
|
||||
pdf(file.path('tmp', path))
|
||||
cat('Plotting to file:', path, '\n')
|
||||
|
||||
# only for telling user (to stdout)
|
||||
count <- 0
|
||||
|
|
Loading…
Reference in New Issue