2
0
Fork 0

wip: benchmarking,

wip: doc
This commit is contained in:
Daniel Kapla 2019-10-18 09:06:36 +02:00
parent 50302ce18a
commit dea610bcb9
13 changed files with 545 additions and 309 deletions

View File

@ -1,59 +1,70 @@
#' Conditional Variance Estimator (CVE) #' Conditional Variance Estimator (CVE) Package.
#' #'
#' Conditional Variance Estimator for Sufficient Dimension #' Conditional Variance Estimation (CVE) is a novel sufficient dimension
#' Reduction #' reduction (SDR) method for regressions satisfying \eqn{E(Y|X) = E(Y|B'X)},
#' where \eqn{B'X} is a lower dimensional projection of the predictors. CVE,
#' similarly to its main competitor, the mean average variance estimation
#' (MAVE), is not based on inverse regression, and does not require the
#' restrictive linearity and constant variance conditions of moment based SDR
#' methods. CVE is data-driven and applies to additive error regressions with
#' continuous predictors and link function. The effectiveness and accuracy of
#' CVE compared to MAVE and other SDR techniques is demonstrated in simulation
#' studies. CVE is shown to outperform MAVE in some model set-ups, while it
#' remains largely on par under most others.
#' #'
#' TODO: And some details #' Let \eqn{Y} be real denotes a univariate response and \eqn{X} a real
#' \eqn{p}-dimensional covariate vector. We assume that the dependence of
#' \eqn{Y} and \eqn{X} is modelled by
#' \deqn{Y = g(B'X) + \epsilon}
#' where \eqn{X} is independent of \eqn{\epsilon} with positive definite
#' variance-covariance matrix \eqn{Var(X) = \Sigma_X}. \eqn{\epsilon} is a mean
#' zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g}
#' is an unknown, continuous non-constant function,
#' and \eqn{B = (b_1, ..., b_k)} is
#' a real \eqn{p \times k}{p x k} of rank \eqn{k <= p}{k \leq p}.
#' Without loss of generality \eqn{B} is assumed to be orthonormal.
#' #'
#' @author Daniel Kapla, Lukas Fertl, Bura Efstathia
#' @references Fertl Lukas, Bura Efstathia. Conditional Variance Estimation for
#' Sufficient Dimension Reduction, 2019
#' #'
#' @references Fertl Likas, Bura Efstathia. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019 #' @importFrom stats model.frame
#'
#' @docType package #' @docType package
#' @author Loki
#' @useDynLib CVE, .registration = TRUE #' @useDynLib CVE, .registration = TRUE
"_PACKAGE" "_PACKAGE"
#' Implementation of the CVE method. #' Conditional Variance Estimator (CVE).
#' #'
#' Conditional Variance Estimator (CVE) is a novel sufficient dimension #' TODO: reuse of package description and details!!!!
#' reduction (SDR) method assuming a model
#' \deqn{Y \sim g(B'X) + \epsilon}{Y ~ g(B'X) + epsilon}
#' where B'X is a lower dimensional projection of the predictors.
#' #'
#' @param formula Formel for the regression model defining `X`, `Y`. #' @param formula an object of class \code{"formula"} which is a symbolic
#' See: \code{\link{formula}}. #' description of the model to be fitted.
#' @param data data.frame holding data for formula. #' @param data an optional data frame, containing the data for the formula if
#' @param method The different only differe in the used optimization. #' supplied.
#' All of them are Gradient based optimization on a Stiefel manifold. #' @param method specifies the CVE method variation as one of
#' \itemize{ #' \itemize{
#' \item "simple" Simple reduction of stepsize. #' \item "simple" exact implementation as describet in the paper listed
#' \item "sgd" stocastic gradient decent. #' below.
#' \item TODO: further #' \item "weighted" variation with addaptive weighting of slices.
#' } #' }
#' @param ... Further parameters depending on the used method. #' @param ... Parameters passed on to \code{cve.call}.
#' @examples #' @examples
#' library(CVE) #' library(CVE)
#' #'
#' # sample dataset #' # create dataset
#' ds <- dataset("M5") #' n <- 200
#' p <- 12
#' X <- matrix(rnorm(n * p), n, p)
#' B <- cbind(c(1, rep(0, p - 1)), c(0, 1, rep(0, p - 2)))
#' Y <- X %*% B
#' Y <- Y[, 1L]^2 + Y[, 2L]^2 + rnorm(n, 0, 0.3)
#' #'
#' # call ´cve´ with default method (aka "simple") #' # Call the CVE method.
#' dr.simple <- cve(ds$Y ~ ds$X, k = ncol(ds$B)) #' dr <- cve(Y ~ X)
#' # plot optimization history (loss via iteration) #' round(dr[[2]]$B, 1)
#' plot(dr.simple, main = "CVE M5 simple")
#' #'
#' # call ´cve´ with method "linesearch" using ´data.frame´ as data. #' @seealso For a detailed description of the formula parameter see
#' data <- data.frame(Y = ds$Y, X = ds$X) #' [\code{\link{formula}}].
#' # Note: ´Y, X´ are NOT defined, they are extracted from ´data´.
#' dr.linesearch <- cve(Y ~ ., data, method = "linesearch", k = ncol(ds$B))
#' plot(dr.linesearch, main = "CVE M5 linesearch")
#'
#' @references Fertl L., Bura E. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019
#'
#' @seealso \code{\link{formula}}. For a complete parameters list (dependent on
#' the method) see \code{\link{cve_simple}}, \code{\link{cve_sgd}}
#' @import stats
#' @importFrom stats model.frame
#' @export #' @export
cve <- function(formula, data, method = "simple", max.dim = 10L, ...) { cve <- function(formula, data, method = "simple", max.dim = 10L, ...) {
# check for type of `data` if supplied and set default # check for type of `data` if supplied and set default
@ -76,12 +87,19 @@ cve <- function(formula, data, method = "simple", max.dim = 10L, ...) {
return(dr) return(dr)
} }
#' @param nObs as describet in the Paper. #' @param nObs parameter for choosing bandwidth \code{h} using
#' @param X Data #' \code{\link{estimate.bandwidth}} (ignored if \code{h} is supplied).
#' @param Y Responces #' @param X data matrix with samples in its rows.
#' @param nObs Like in the paper. #' @param Y Responces (1 dimensional).
#' @param k guess for SDR dimension. #' @param k Dimension of lower dimensional projection, if given only the
#' @param ... Method specific parameters. #' specified dimension is estimated.
#' @param min.dim lower bounds for \code{k}, (ignored if \code{k} is supplied).
#' @param max.dim upper bounds for \code{k}, (ignored if \code{k} is supplied).
#' @param tau Initial step-size.
#' @param tol Tolerance for break condition.
#' @param epochs maximum number of optimization steps.
#' @param attempts number of arbitrary different starting points.
#' @param logger a logger function (only for addvanced user).
#' @rdname cve #' @rdname cve
#' @export #' @export
cve.call <- function(X, Y, method = "simple", cve.call <- function(X, Y, method = "simple",
@ -122,9 +140,16 @@ cve.call <- function(X, Y, method = "simple",
stop("'max.dim' (or 'k') must be smaller than 'ncol(X)'.") stop("'max.dim' (or 'k') must be smaller than 'ncol(X)'.")
} }
if (is.function(h)) { if (missing(h) || is.null(h)) {
estimate <- TRUE
} else if (is.function(h)) {
estimate <- TRUE
estimate.bandwidth <- h estimate.bandwidth <- h
h <- NULL } else if (is.numeric(h) && h > 0.0) {
estimate <- FALSE
h <- as.double(h)
} else {
stop("Bandwidth 'h' must be positive numeric.")
} }
if (!is.numeric(tau) || length(tau) > 1L || tau <= 0.0) { if (!is.numeric(tau) || length(tau) > 1L || tau <= 0.0) {
@ -167,12 +192,8 @@ cve.call <- function(X, Y, method = "simple",
dr <- list() dr <- list()
for (k in min.dim:max.dim) { for (k in min.dim:max.dim) {
if (missing(h) || is.null(h)) { if (estimate) {
h <- estimate.bandwidth(X, k, nObs) h <- estimate.bandwidth(X, k, nObs)
} else if (is.numeric(h) && h > 0.0) {
h <- as.double(h)
} else {
stop("Bandwidth 'h' must be positive numeric.")
} }
if (method == 'simple') { if (method == 'simple') {
@ -206,32 +227,22 @@ cve.call <- function(X, Y, method = "simple",
} }
# augment result information # augment result information
dr$X <- X
dr$Y <- Y
dr$method <- method dr$method <- method
dr$call <- call dr$call <- call
class(dr) <- "cve" class(dr) <- "cve"
return(dr) return(dr)
} }
# TODO: write summary #' Loss distribution kink plot.
# summary.cve <- function() {
# # code #
# }
#' Ploting helper for objects of class \code{cve}.
#' #'
#' @param x Object of class \code{cve} (result of [cve()]). #' @param x Object of class \code{"cve"} (result of [\code{\link{cve}}]).
#' @param content Specifies what to plot: #' @param ... Pass through parameters to [\code{\link{plot}}] and
#' \itemize{ #' [\code{\link{lines}}]
#' \item "history" Plots the loss history from stiefel optimization
#' (default).
#' \item ... TODO: add (if there are any)
#' }
#' @param ... Pass through parameters to [plot()] and [lines()]
#' #'
#' @usage ## S3 method for class 'cve'
#' plot(x, content = "history", ...)
#' @seealso see \code{\link{par}} for graphical parameters to pass through #' @seealso see \code{\link{par}} for graphical parameters to pass through
#' as well as \code{\link{plot}} for standard plot utility. #' as well as \code{\link{plot}}, the standard plot utility.
#' @importFrom graphics plot lines points #' @importFrom graphics plot lines points
#' @method plot cve #' @method plot cve
#' @export #' @export
@ -244,12 +255,12 @@ plot.cve <- function(x, ...) {
L <- c(L, dr.k$L) L <- c(L, dr.k$L)
} }
} }
L <- matrix(L, ncol = length(k)) L <- matrix(L, ncol = length(k)) / var(x$Y)
boxplot(L, main = "Loss ...", boxplot(L, main = "Kink plot",
xlab = "SDR dimension k", xlab = "SDR dimension",
ylab = expression(L(V, X[i])), ylab = "Sample loss distribution",
names = k) names = k)
# lines(apply(L, 2, mean)) # TODO: ?
} }
#' Prints a summary of a \code{cve} result. #' Prints a summary of a \code{cve} result.

View File

@ -23,13 +23,15 @@
#' The data follows \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} for a subspace #' The data follows \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} for a subspace
#' dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points. #' dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points.
#' The link function \eqn{g} is given as #' The link function \eqn{g} is given as
#' \deqn{g(x) = \frac{x_1}{0.5 + (x_2 + 1.5)^2} + 0.5\epsilon}{g(x) = x_1 / (0.5 + (x_2 + 1.5)^2) + 0.5 epsilon} #' \deqn{g(x) = \frac{x_1}{0.5 + (x_2 + 1.5)^2} + \epsilon / 2}{%
#' g(x) = x_1 / (0.5 + (x_2 + 1.5)^2) + epsilon / 2}
#' @section M2: #' @section M2:
#' \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} with \eqn{k = 2} with a default of \eqn{n = 200} data points. #' \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} with \eqn{k = 2} with a
#' default of \eqn{n = 200} data points.
#' The link function \eqn{g} is given as #' The link function \eqn{g} is given as
#' \deqn{g(x) = x_1 x_2^2 + 0.5\epsilon}{g(x) = x_1 x_2^2 + 0.5 epsilon} #' \deqn{g(x) = (b_1^T X) (b_2^T X)^2 + \epsilon / 2}
#' @section M3: #' @section M3:
#' TODO: #' \deqn{g(x) = cos(b_1^T X) + \epsilon / 2}
#' @section M4: #' @section M4:
#' TODO: #' TODO:
#' @section M5: #' @section M5:

View File

@ -1,17 +1,16 @@
#' Estimated bandwidth for CVE. #' Bandwidth estimation for CVE.
#' #'
#' Estimates a propper bandwidth \code{h} according #' Estimates a propper bandwidth \code{h} according
#' \deqn{% #' \deqn{%
#' h = \chi_{p-q}^{-1}\left(\frac{nObs - 1}{n-1}\right)\frac{2 tr(\Sigma)}{p}}{% #' h = \chi_{k}^{-1}\left(\frac{nObs - 1}{n-1}\right)\frac{2 tr(\Sigma)}{p}}{%
#' h = qchisq( (nObs - 1)/(n - 1), p - q ) 2 tr(Sigma) / p} #' h = qchisq( (nObs - 1)/(n - 1), k ) * (2 tr(\Sigma) / p)}
#' with \eqn{n} the number of sample and \eqn{p} its dimension
#' (\code{n <- nrow(X); p <- ncol(X)}) and the covariance-matrix \eqn{\Sigma}
#' which is given by the standard maximum likelihood estimate.
#' #'
#' @param X data matrix of dimension (n x p) with n data points X_i of dimension #' @param nObs Expected number of points in a slice, see paper.
#' q. Therefor each row represents a datapoint of dimension p. #' @param X data matrix with samples in its rows.
#' @param k Guess for rank(B). #' @param k Dimension of lower dimensional projection.
#' @param nObs Ether numeric of a function. If specified as numeric value
#' its used in the computation of the bandwidth directly. If its a function
#' `nObs` is evaluated as \code{nObs(nrow(x))}. The default behaviou if not
#' supplied at all is to use \code{nObs <- nrow(x)^0.5}.
#' #'
#' @seealso [\code{\link{qchisq}}] #' @seealso [\code{\link{qchisq}}]
#' @export #' @export

View File

@ -4,17 +4,37 @@
\name{CVE-package} \name{CVE-package}
\alias{CVE} \alias{CVE}
\alias{CVE-package} \alias{CVE-package}
\title{Conditional Variance Estimator (CVE)} \title{Conditional Variance Estimator (CVE) Package.}
\description{ \description{
Conditional Variance Estimator for Sufficient Dimension Conditional Variance Estimation (CVE) is a novel sufficient dimension
Reduction reduction (SDR) method for regressions satisfying \eqn{E(Y|X) = E(Y|B'X)},
where \eqn{B'X} is a lower dimensional projection of the predictors. CVE,
similarly to its main competitor, the mean average variance estimation
(MAVE), is not based on inverse regression, and does not require the
restrictive linearity and constant variance conditions of moment based SDR
methods. CVE is data-driven and applies to additive error regressions with
continuous predictors and link function. The effectiveness and accuracy of
CVE compared to MAVE and other SDR techniques is demonstrated in simulation
studies. CVE is shown to outperform MAVE in some model set-ups, while it
remains largely on par under most others.
} }
\details{ \details{
TODO: And some details Let \eqn{Y} be real denotes a univariate response and \eqn{X} a real
\eqn{p}-dimensional covariate vector. We assume that the dependence of
\eqn{Y} and \eqn{X} is modelled by
\deqn{Y = g(B'X) + \epsilon}
where \eqn{X} is independent of \eqn{\epsilon} with positive definite
variance-covariance matrix \eqn{Var(X) = \Sigma_X}. \eqn{\epsilon} is a mean
zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g}
is an unknown, continuous non-constant function,
and \eqn{B = (b_1, ..., b_k)} is
a real \eqn{p \times k}{p x k} of rank \eqn{k <= p}{k \leq p}.
Without loss of generality \eqn{B} is assumed to be orthonormal.
} }
\references{ \references{
Fertl Likas, Bura Efstathia. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019 Fertl Lukas, Bura Efstathia. Conditional Variance Estimation for
Sufficient Dimension Reduction, 2019
} }
\author{ \author{
Loki Daniel Kapla, Lukas Fertl, Bura Efstathia
} }

View File

@ -3,7 +3,7 @@
\name{cve} \name{cve}
\alias{cve} \alias{cve}
\alias{cve.call} \alias{cve.call}
\title{Implementation of the CVE method.} \title{Conditional Variance Estimator (CVE).}
\usage{ \usage{
cve(formula, data, method = "simple", max.dim = 10L, ...) cve(formula, data, method = "simple", max.dim = 10L, ...)
@ -12,61 +12,65 @@ cve.call(X, Y, method = "simple", nObs = sqrt(nrow(X)), h = NULL,
epochs = 50L, attempts = 10L, logger = NULL) epochs = 50L, attempts = 10L, logger = NULL)
} }
\arguments{ \arguments{
\item{formula}{Formel for the regression model defining `X`, `Y`. \item{formula}{an object of class \code{"formula"} which is a symbolic
See: \code{\link{formula}}.} description of the model to be fitted.}
\item{data}{data.frame holding data for formula.} \item{data}{an optional data frame, containing the data for the formula if
supplied.}
\item{method}{The different only differe in the used optimization. \item{method}{specifies the CVE method variation as one of
All of them are Gradient based optimization on a Stiefel manifold.
\itemize{ \itemize{
\item "simple" Simple reduction of stepsize. \item "simple" exact implementation as describet in the paper listed
\item "sgd" stocastic gradient decent. below.
\item TODO: further \item "weighted" variation with addaptive weighting of slices.
}} }}
\item{...}{Further parameters depending on the used method.} \item{max.dim}{upper bounds for \code{k}, (ignored if \code{k} is supplied).}
\item{X}{Data} \item{...}{Parameters passed on to \code{cve.call}.}
\item{Y}{Responces} \item{X}{data matrix with samples in its rows.}
\item{nObs}{as describet in the Paper.} \item{Y}{Responces (1 dimensional).}
\item{k}{guess for SDR dimension.} \item{nObs}{parameter for choosing bandwidth \code{h} using
\code{\link{estimate.bandwidth}} (ignored if \code{h} is supplied).}
\item{nObs}{Like in the paper.} \item{min.dim}{lower bounds for \code{k}, (ignored if \code{k} is supplied).}
\item{...}{Method specific parameters.} \item{k}{Dimension of lower dimensional projection, if given only the
specified dimension is estimated.}
\item{tau}{Initial step-size.}
\item{tol}{Tolerance for break condition.}
\item{epochs}{maximum number of optimization steps.}
\item{attempts}{number of arbitrary different starting points.}
\item{logger}{a logger function (only for addvanced user).}
} }
\description{ \description{
Conditional Variance Estimator (CVE) is a novel sufficient dimension TODO: reuse of package description and details!!!!
reduction (SDR) method assuming a model
\deqn{Y \sim g(B'X) + \epsilon}{Y ~ g(B'X) + epsilon}
where B'X is a lower dimensional projection of the predictors.
} }
\examples{ \examples{
library(CVE) library(CVE)
# sample dataset # create dataset
ds <- dataset("M5") n <- 200
p <- 12
X <- matrix(rnorm(n * p), n, p)
B <- cbind(c(1, rep(0, p - 1)), c(0, 1, rep(0, p - 2)))
Y <- X \%*\% B
Y <- Y[, 1L]^2 + Y[, 2L]^2 + rnorm(n, 0, 0.3)
# call ´cve´ with default method (aka "simple") # Call the CVE method.
dr.simple <- cve(ds$Y ~ ds$X, k = ncol(ds$B)) dr <- cve(Y ~ X)
# plot optimization history (loss via iteration) round(dr[[2]]$B, 1)
plot(dr.simple, main = "CVE M5 simple")
# call ´cve´ with method "linesearch" using ´data.frame´ as data.
data <- data.frame(Y = ds$Y, X = ds$X)
# Note: ´Y, X´ are NOT defined, they are extracted from ´data´.
dr.linesearch <- cve(Y ~ ., data, method = "linesearch", k = ncol(ds$B))
plot(dr.linesearch, main = "CVE M5 linesearch")
}
\references{
Fertl L., Bura E. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019
} }
\seealso{ \seealso{
\code{\link{formula}}. For a complete parameters list (dependent on For a detailed description of the formula parameter see
the method) see \code{\link{cve_simple}}, \code{\link{cve_sgd}} [\code{\link{formula}}].
} }

View File

@ -37,19 +37,21 @@ The general model is given by:
The data follows \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} for a subspace The data follows \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} for a subspace
dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points. dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points.
The link function \eqn{g} is given as The link function \eqn{g} is given as
\deqn{g(x) = \frac{x_1}{0.5 + (x_2 + 1.5)^2} + 0.5\epsilon}{g(x) = x_1 / (0.5 + (x_2 + 1.5)^2) + 0.5 epsilon} \deqn{g(x) = \frac{x_1}{0.5 + (x_2 + 1.5)^2} + \epsilon / 2}{%
g(x) = x_1 / (0.5 + (x_2 + 1.5)^2) + epsilon / 2}
} }
\section{M2}{ \section{M2}{
\eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} with \eqn{k = 2} with a default of \eqn{n = 200} data points. \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} with \eqn{k = 2} with a
default of \eqn{n = 200} data points.
The link function \eqn{g} is given as The link function \eqn{g} is given as
\deqn{g(x) = x_1 x_2^2 + 0.5\epsilon}{g(x) = x_1 x_2^2 + 0.5 epsilon} \deqn{g(x) = (b_1^T X) (b_2^T X)^2 + \epsilon / 2}
} }
\section{M3}{ \section{M3}{
TODO: \deqn{g(x) = cos(b_1^T X) + \epsilon / 2}
} }
\section{M4}{ \section{M4}{

View File

@ -2,26 +2,25 @@
% Please edit documentation in R/estimateBandwidth.R % Please edit documentation in R/estimateBandwidth.R
\name{estimate.bandwidth} \name{estimate.bandwidth}
\alias{estimate.bandwidth} \alias{estimate.bandwidth}
\title{Estimated bandwidth for CVE.} \title{Bandwidth estimation for CVE.}
\usage{ \usage{
estimate.bandwidth(X, k, nObs) estimate.bandwidth(X, k, nObs)
} }
\arguments{ \arguments{
\item{X}{data matrix of dimension (n x p) with n data points X_i of dimension \item{X}{data matrix with samples in its rows.}
q. Therefor each row represents a datapoint of dimension p.}
\item{k}{Guess for rank(B).} \item{k}{Dimension of lower dimensional projection.}
\item{nObs}{Ether numeric of a function. If specified as numeric value \item{nObs}{Expected number of points in a slice, see paper.}
its used in the computation of the bandwidth directly. If its a function
`nObs` is evaluated as \code{nObs(nrow(x))}. The default behaviou if not
supplied at all is to use \code{nObs <- nrow(x)^0.5}.}
} }
\description{ \description{
Estimates a propper bandwidth \code{h} according Estimates a propper bandwidth \code{h} according
\deqn{% \deqn{%
h = \chi_{p-q}^{-1}\left(\frac{nObs - 1}{n-1}\right)\frac{2 tr(\Sigma)}{p}}{% h = \chi_{k}^{-1}\left(\frac{nObs - 1}{n-1}\right)\frac{2 tr(\Sigma)}{p}}{%
h = qchisq( (nObs - 1)/(n - 1), p - q ) 2 tr(Sigma) / p} h = qchisq( (nObs - 1)/(n - 1), k ) * (2 tr(\Sigma) / p)}
with \eqn{n} the number of sample and \eqn{p} its dimension
(\code{n <- nrow(X); p <- ncol(X)}) and the covariance-matrix \eqn{\Sigma}
which is given by the standard maximum likelihood estimate.
} }
\seealso{ \seealso{
[\code{\link{qchisq}}] [\code{\link{qchisq}}]

View File

@ -2,27 +2,20 @@
% Please edit documentation in R/CVE.R % Please edit documentation in R/CVE.R
\name{plot.cve} \name{plot.cve}
\alias{plot.cve} \alias{plot.cve}
\title{Ploting helper for objects of class \code{cve}.} \title{Creates a kink plot of the sample loss distribution over SDR dimensions.}
\usage{ \usage{
## S3 method for class 'cve' \method{plot}{cve}(x, ...)
plot(x, content = "history", ...)
} }
\arguments{ \arguments{
\item{x}{Object of class \code{cve} (result of [cve()]).} \item{x}{Object of class \code{"cve"} (result of [\code{\link{cve}}]).}
\item{...}{Pass through parameters to [plot()] and [lines()]} \item{...}{Pass through parameters to [\code{\link{plot}}] and
[\code{\link{lines}}]}
\item{content}{Specifies what to plot:
\itemize{
\item "history" Plots the loss history from stiefel optimization
(default).
\item ... TODO: add (if there are any)
}}
} }
\description{ \description{
Ploting helper for objects of class \code{cve}. Creates a kink plot of the sample loss distribution over SDR dimensions.
} }
\seealso{ \seealso{
see \code{\link{par}} for graphical parameters to pass through see \code{\link{par}} for graphical parameters to pass through
as well as \code{\link{plot}} for standard plot utility. as well as \code{\link{plot}}, the standard plot utility.
} }

View File

@ -3,6 +3,7 @@
\usepackage[utf8]{inputenc} \usepackage[utf8]{inputenc}
\usepackage[T1]{fontenc} \usepackage[T1]{fontenc}
\usepackage{amsmath, amsfonts, amssymb, amsthm} \usepackage{amsmath, amsfonts, amssymb, amsthm}
\usepackage{tikz}
\usepackage{fullpage} \usepackage{fullpage}
\newcommand{\vecl}{\ensuremath{\operatorname{vec}_l}} \newcommand{\vecl}{\ensuremath{\operatorname{vec}_l}}
@ -43,4 +44,18 @@ The relation between the matrix indices $i,j$ and the $\vecl$ index $k$ is given
(\vecl(S)_k = s_{i,j} \quad\Leftrightarrow\quad k = jn+i) : j \in \{0,...,n-2\} \land j < i < n. (\vecl(S)_k = s_{i,j} \quad\Leftrightarrow\quad k = jn+i) : j \in \{0,...,n-2\} \land j < i < n.
\end{displaymath} \end{displaymath}
\begin{center}
\begin{tikzpicture}[xscale=1,yscale=-1]
% \foreach \i in {0,...,5} {
% \node at ({mod(\i, 3)}, {int(\i / 3)}) {$\i$};
% }
\foreach \i in {1,...,4} {
\foreach \j in {1,...,\i} {
\node at (\j, \i) {$\i,\j$};
}
}
\end{tikzpicture}
\end{center}
\end{document} \end{document}

View File

@ -13,7 +13,29 @@ rowSums.c <- function(M) {
M <- matrix(as.double(M), nrow = nrow(M)) M <- matrix(as.double(M), nrow = nrow(M))
} }
.Call('R_rowSums', PACKAGE = 'wip', M) .Call('R_rowSums', PACKAGE = 'benchmark', M)
}
rowSumsV2.c <- function(M) {
stopifnot(
is.matrix(M),
is.numeric(M)
)
if (!is.double(M)) {
M <- matrix(as.double(M), nrow = nrow(M))
}
.Call('R_rowSumsV2', PACKAGE = 'benchmark', M)
}
rowSumsV3.c <- function(M) {
stopifnot(
is.matrix(M),
is.numeric(M)
)
if (!is.double(M)) {
M <- matrix(as.double(M), nrow = nrow(M))
}
.Call('R_rowSumsV3', PACKAGE = 'benchmark', M)
} }
colSums.c <- function(M) { colSums.c <- function(M) {
stopifnot( stopifnot(
@ -24,7 +46,7 @@ colSums.c <- function(M) {
M <- matrix(as.double(M), nrow = nrow(M)) M <- matrix(as.double(M), nrow = nrow(M))
} }
.Call('R_colSums', PACKAGE = 'wip', M) .Call('R_colSums', PACKAGE = 'benchmark', M)
} }
rowSquareSums.c <- function(M) { rowSquareSums.c <- function(M) {
stopifnot( stopifnot(
@ -35,7 +57,7 @@ rowSquareSums.c <- function(M) {
M <- matrix(as.double(M), nrow = nrow(M)) M <- matrix(as.double(M), nrow = nrow(M))
} }
.Call('R_rowSquareSums', PACKAGE = 'wip', M) .Call('R_rowSquareSums', PACKAGE = 'benchmark', M)
} }
rowSumsSymVec.c <- function(vecA, nrow, diag = 0.0) { rowSumsSymVec.c <- function(vecA, nrow, diag = 0.0) {
stopifnot( stopifnot(
@ -47,7 +69,7 @@ rowSumsSymVec.c <- function(vecA, nrow, diag = 0.0) {
if (!is.double(vecA)) { if (!is.double(vecA)) {
vecA <- as.double(vecA) vecA <- as.double(vecA)
} }
.Call('R_rowSumsSymVec', PACKAGE = 'wip', .Call('R_rowSumsSymVec', PACKAGE = 'benchmark',
vecA, as.integer(nrow), as.double(diag)) vecA, as.integer(nrow), as.double(diag))
} }
rowSweep.c <- function(A, v, op = '-') { rowSweep.c <- function(A, v, op = '-') {
@ -66,7 +88,7 @@ rowSweep.c <- function(A, v, op = '-') {
op %in% c('+', '-', '*', '/') op %in% c('+', '-', '*', '/')
) )
.Call('R_rowSweep', PACKAGE = 'wip', A, v, op) .Call('R_rowSweep', PACKAGE = 'benchmark', A, v, op)
} }
## row*, col* tests ------------------------------------------------------------ ## row*, col* tests ------------------------------------------------------------
@ -74,7 +96,15 @@ n <- 3000
M <- matrix(runif(n * 12), n, 12) M <- matrix(runif(n * 12), n, 12)
stopifnot( stopifnot(
all.equal(rowSums(M^2), rowSums.c(M^2)), all.equal(rowSums(M^2), rowSums.c(M^2)),
all.equal(colSums(M), colSums.c(M)) all.equal(colSums(M), colSums.c(M)),
all.equal(rowSums(M), rowSumsV2.c(M)),
all.equal(rowSums(M), rowSumsV3.c(M))
)
microbenchmark(
rowSums = rowSums(M),
rowSums.c = rowSums.c(M),
rowSumsV2.c = rowSumsV2.c(M),
rowSumsV3.c = rowSumsV3.c(M)
) )
microbenchmark( microbenchmark(
rowSums = rowSums(M^2), rowSums = rowSums(M^2),
@ -126,7 +156,23 @@ transpose.c <- function(A) {
A <- matrix(as.double(A), nrow(A), ncol(A)) A <- matrix(as.double(A), nrow(A), ncol(A))
} }
.Call('R_transpose', PACKAGE = 'wip', A) .Call('R_transpose', PACKAGE = 'benchmark', A)
}
sympMV.c <- function(vecA, x) {
stopifnot(
is.vector(vecA), is.numeric(vecA),
is.vector(x), is.numeric(x),
length(x) * (length(x) + 1) == 2 * length(vecA)
)
if (!is.double(vecA)) {
vecA <- as.double(vecA)
}
if (!is.double(x)) {
x <- as.double(x)
}
.Call('R_sympMV', PACKAGE = 'benchmark', vecA, x)
} }
matrixprod.c <- function(A, B) { matrixprod.c <- function(A, B) {
@ -142,7 +188,7 @@ matrixprod.c <- function(A, B) {
B <- matrix(as.double(B), nrow = nrow(B)) B <- matrix(as.double(B), nrow = nrow(B))
} }
.Call('R_matrixprod', PACKAGE = 'wip', A, B) .Call('R_matrixprod', PACKAGE = 'benchmark', A, B)
} }
crossprod.c <- function(A, B) { crossprod.c <- function(A, B) {
stopifnot( stopifnot(
@ -157,7 +203,7 @@ crossprod.c <- function(A, B) {
B <- matrix(as.double(B), nrow = nrow(B)) B <- matrix(as.double(B), nrow = nrow(B))
} }
.Call('R_crossprod', PACKAGE = 'wip', A, B) .Call('R_crossprod', PACKAGE = 'benchmark', A, B)
} }
skewSymRank2k.c <- function(A, B, alpha = 1, beta = 0) { skewSymRank2k.c <- function(A, B, alpha = 1, beta = 0) {
stopifnot( stopifnot(
@ -174,7 +220,7 @@ skewSymRank2k.c <- function(A, B, alpha = 1, beta = 0) {
B <- matrix(as.double(B), nrow = nrow(B)) B <- matrix(as.double(B), nrow = nrow(B))
} }
.Call('R_skewSymRank2k', PACKAGE = 'wip', A, B, .Call('R_skewSymRank2k', PACKAGE = 'benchmark', A, B,
as.double(alpha), as.double(beta)) as.double(alpha), as.double(beta))
} }
@ -193,6 +239,18 @@ microbenchmark(
transpose.c(A) transpose.c(A)
) )
Sym <- tcrossprod(runif(n))
vecSym <- Sym[lower.tri(Sym, diag = T)]
x <- runif(n)
stopifnot(all.equal(
as.double(Sym %*% x),
sympMV.c(vecSym, x)
))
microbenchmark(
Sym %*% x,
sympMV.c = sympMV.c(vecSym, x)
)
stopifnot( stopifnot(
all.equal(A %*% B, matrixprod.c(A, B)) all.equal(A %*% B, matrixprod.c(A, B))
) )
@ -232,7 +290,7 @@ nullProj.c <- function(B) {
B <- matrix(as.double(B), nrow = nrow(B)) B <- matrix(as.double(B), nrow = nrow(B))
} }
.Call('R_nullProj', PACKAGE = 'wip', B) .Call('R_nullProj', PACKAGE = 'benchmark', B)
} }
## Orthogonal projection onto null space tests -------------------------------- ## Orthogonal projection onto null space tests --------------------------------
p <- 12 p <- 12
@ -252,7 +310,7 @@ microbenchmark(
# ## WIP for gradient. ---------------------------------------------------------- # ## WIP for gradient. ----------------------------------------------------------
gradient.c <- function(X, X_diff, Y, V, h) { grad.c <- function(X, X_diff, Y, V, h) {
stopifnot( stopifnot(
is.matrix(X), is.double(X), is.matrix(X), is.double(X),
is.matrix(X_diff), is.double(X_diff), is.matrix(X_diff), is.double(X_diff),
@ -264,7 +322,7 @@ gradient.c <- function(X, X_diff, Y, V, h) {
is.vector(h), is.numeric(h), length(h) == 1 is.vector(h), is.numeric(h), length(h) == 1
) )
.Call('R_gradient', PACKAGE = 'wip', .Call('R_grad', PACKAGE = 'benchmark',
X, X_diff, as.double(Y), V, as.double(h)); X, X_diff, as.double(Y), V, as.double(h));
} }
@ -294,25 +352,23 @@ grad <- function(X, Y, V, h, persistent = TRUE) {
# Vectorized distance matrix `D`. # Vectorized distance matrix `D`.
vecD <- rowSums((X_diff %*% Q)^2) vecD <- rowSums((X_diff %*% Q)^2)
# Create Kernel matrix (aka. apply kernel to distances)
# Weight matrix `W` (dnorm ... gaussean density function) K <- matrix(1, n, n) # `exp(0) == 1`
W <- matrix(1, n, n) # `exp(0) == 1` K[lower] <- exp((-0.5 / h) * vecD^2) # Set lower tri. part
W[lower] <- exp((-0.5 / h) * vecD^2) # Set lower tri. part K[upper] <- t(K)[upper] # Mirror lower tri. to upper
W[upper] <- t.default(W)[upper] # Mirror lower tri. to upper
W <- sweep(W, 2, colSums(W), FUN = `/`) # Col-Normalize
# Weighted `Y` momentums # Weighted `Y` momentums
y1 <- Y %*% W # Result is 1D -> transposition irrelevant colSumsK <- colSums(K)
y2 <- Y^2 %*% W y1 <- (K %*% Y) / colSumsK
y2 <- (K %*% Y^2) / colSumsK
# Per example loss `L(V, X_i)` # Per example loss `L(V, X_i)`
L <- y2 - y1^2 L <- y2 - y1^2
# Vectorized Weights with forced symmetry # Compute scaling vector `vecS` for `X_diff`.
vecS <- (L[i] - (Y[j] - y1[i])^2) * W[lower] tmp <- kronecker(matrix(y1, n, 1), matrix(Y, 1, n), `-`)^2
vecS <- vecS + ((L[j] - (Y[i] - y1[j])^2) * W[upper]) tmp <- as.vector(L) - tmp
# Compute scaling of `X` row differences tmp <- tmp * K / colSumsK
vecS <- vecS * vecD vecS <- (tmp + t(tmp))[lower] * vecD
G <- crossprod(X_diff, X_diff * vecS) %*% V G <- crossprod(X_diff, X_diff * vecS) %*% V
G <- (-2 / (n * h^2)) * G G <- (-2 / (n * h^2)) * G
@ -340,9 +396,9 @@ X_diff <- X[i, , drop = F] - X[j, , drop = F]
stopifnot(all.equal( stopifnot(all.equal(
grad(X, Y, V, h), grad(X, Y, V, h),
gradient.c(X, X_diff, Y, V, h) grad.c(X, X_diff, Y, V, h)
)) ))
microbenchmark( microbenchmark(
grad = grad(X, Y, V, h), grad = grad(X, Y, V, h),
gradient.c = gradient.c(X, X_diff, Y, V, h) grad.c = grad.c(X, X_diff, Y, V, h)
) )

View File

@ -6,9 +6,59 @@
#include <R_ext/Error.h> #include <R_ext/Error.h>
// #include <Rmath.h> // #include <Rmath.h>
#include "wip.h" #include "benchmark.h"
static inline void rowSums(const double *A, void rowSums(const double *A,
const int nrow, const int ncol,
double *sum) {
int i, j, block_size, block_size_i;
const double *A_block = A;
const double *A_end = A + nrow * ncol;
if (nrow > 508) {
block_size = 508;
} else {
block_size = nrow;
}
// Iterate `(block_size_i, ncol)` submatrix blocks.
for (i = 0; i < nrow; i += block_size_i) {
// Reset `A` to new block beginning.
A = A_block;
// Take block size of eveything left and reduce to max size.
block_size_i = nrow - i;
if (block_size_i > block_size) {
block_size_i = block_size;
}
// Copy blocks first column.
for (j = 0; j < block_size_i; j += 4) {
sum[j] = A[j];
sum[j + 1] = A[j + 1];
sum[j + 2] = A[j + 2];
sum[j + 3] = A[j + 3];
}
for (; j < block_size_i; ++j) {
sum[j] = A[j];
}
// Sum following columns to the first one.
for (A += nrow; A < A_end; A += nrow) {
for (j = 0; j < block_size_i; j += 4) {
sum[j] += A[j];
sum[j + 1] += A[j + 1];
sum[j + 2] += A[j + 2];
sum[j + 3] += A[j + 3];
}
for (; j < block_size_i; ++j) {
sum[j] += A[j];
}
}
// Step one block forth.
A_block += block_size_i;
sum += block_size_i;
}
}
void rowSumsV2(const double *A,
const int nrow, const int ncol, const int nrow, const int ncol,
double *sum) { double *sum) {
int i, j, block_size, block_size_i; int i, j, block_size, block_size_i;
@ -45,33 +95,54 @@ static inline void rowSums(const double *A,
sum += block_size_i; sum += block_size_i;
} }
} }
void rowSumsV3(const double *A,
static inline void colSums(const double *A,
const int nrow, const int ncol, const int nrow, const int ncol,
double *sum) { double *sum) {
int j; int i, onei = 1;
double *sum_end = sum + ncol; double* ones = (double*)malloc(ncol * sizeof(double));
const double one = 1.0;
const double zero = 0.0;
memset(sum, 0, sizeof(double) * ncol); for (i = 0; i < ncol; ++i) {
for (; sum < sum_end; ++sum) { ones[i] = 1.0;
for (j = 0; j < nrow; ++j) {
*sum += A[j];
} }
matrixprod(A, nrow, ncol, ones, ncol, 1, sum);
free(ones);
}
void colSums(const double *A, const int nrow, const int ncol,
double *sums) {
int i, j, nrowb = 4 * (nrow / 4); // 4 dividable nrow block, biggest 4*k <= nrow.
double sum;
for (j = 0; j < ncol; ++j) {
sum = 0.0;
for (i = 0; i < nrowb; i += 4) {
sum += A[i]
+ A[i + 1]
+ A[i + 2]
+ A[i + 3];
}
for (; i < nrow; ++i) {
sum += A[i];
}
*(sums++) = sum;
A += nrow; A += nrow;
} }
} }
static inline void rowSquareSums(const double *A, void rowSquareSums(const double *A,
const int nrow, const int ncol, const int nrow, const int ncol,
double *sum) { double *sum) {
int i, j, block_size, block_size_i; int i, j, block_size, block_size_i;
const double *A_block = A; const double *A_block = A;
const double *A_end = A + nrow * ncol; const double *A_end = A + nrow * ncol;
if (nrow < CVE_MEM_CHUNK_SIZE) { if (nrow > 508) {
block_size = nrow; block_size = 508;
} else { } else {
block_size = CVE_MEM_CHUNK_SIZE; block_size = nrow;
} }
// Iterate `(block_size_i, ncol)` submatrix blocks. // Iterate `(block_size_i, ncol)` submatrix blocks.
@ -80,16 +151,29 @@ static inline void rowSquareSums(const double *A,
A = A_block; A = A_block;
// Take block size of eveything left and reduce to max size. // Take block size of eveything left and reduce to max size.
block_size_i = nrow - i; block_size_i = nrow - i;
if (block_size_i > block_size) { if (block_size_i > block_size) { // TODO: contains BUG!!! floor last one !!!
block_size_i = block_size; block_size_i = block_size;
} /// ...
// TODO:
// Copy blocks first column.
for (j = 0; j < block_size_i; j += 4) {
sum[j] = A[j] * A[j];
sum[j + 1] = A[j + 1] * A[j + 1];
sum[j + 2] = A[j + 2] * A[j + 2];
sum[j + 3] = A[j + 3] * A[j + 3];
} }
// Compute first blocks column, for (; j < block_size_i; ++j) {
for (j = 0; j < block_size_i; ++j) {
sum[j] = A[j] * A[j]; sum[j] = A[j] * A[j];
} }
// and sum the following columns to the first one. // Sum following columns to the first one.
for (A += nrow; A < A_end; A += nrow) { for (A += nrow; A < A_end; A += nrow) {
for (j = 0; j < block_size_i; ++j) { for (j = 0; j < block_size_i; j += 4) {
sum[j] += A[j] * A[j];
sum[j + 1] += A[j + 1] * A[j + 1];
sum[j + 2] += A[j + 2] * A[j + 2];
sum[j + 3] += A[j + 3] * A[j + 3];
}
for (; j < block_size_i; ++j) {
sum[j] += A[j] * A[j]; sum[j] += A[j] * A[j];
} }
} }
@ -99,7 +183,7 @@ static inline void rowSquareSums(const double *A,
} }
} }
static inline void rowSumsSymVec(const double *Avec, const int nrow, void rowSumsSymVec(const double *Avec, const int nrow,
const double diag, const double diag,
double *sum) { double *sum) {
int i, j; int i, j;
@ -121,7 +205,7 @@ static inline void rowSumsSymVec(const double *Avec, const int nrow,
} }
/* C[, j] = A[, j] * v for each j = 1 to ncol */ /* C[, j] = A[, j] * v for each j = 1 to ncol */
static void rowSweep(const double *A, const int nrow, const int ncol, void rowSweep(const double *A, const int nrow, const int ncol,
const char* op, const char* op,
const double *v, // vector of length nrow const double *v, // vector of length nrow
double *C) { double *C) {
@ -241,7 +325,20 @@ void transpose(const double *A, const int nrow, const int ncol, double* T) {
} }
} }
static inline void matrixprod(const double *A, const int nrowA, const int ncolA, // Symmetric Packed matrix vector product.
// Computes
// y <- Ax
// where A is supplied as packed lower triangular part of a symmetric
// matrix A. Meaning that `vecA` is `vec_ltri(A)`.
void sympMV(const double* vecA, const int nrow, const double* x, double* y) {
double one = 1.0;
double zero = 0.0;
int onei = 1;
F77_NAME(dspmv)("L", &nrow, &one, vecA, x, &onei, &zero, y, &onei);
}
void matrixprod(const double *A, const int nrowA, const int ncolA,
const double *B, const int nrowB, const int ncolB, const double *B, const int nrowB, const int ncolB,
double *C) { double *C) {
const double one = 1.0; const double one = 1.0;
@ -254,7 +351,7 @@ static inline void matrixprod(const double *A, const int nrowA, const int ncolA,
&zero, C, &nrowA); &zero, C, &nrowA);
} }
static inline void crossprod(const double *A, const int nrowA, const int ncolA, void crossprod(const double *A, const int nrowA, const int ncolA,
const double *B, const int nrowB, const int ncolB, const double *B, const int nrowB, const int ncolB,
double *C) { double *C) {
const double one = 1.0; const double one = 1.0;
@ -267,7 +364,7 @@ static inline void crossprod(const double *A, const int nrowA, const int ncolA,
&zero, C, &ncolA); &zero, C, &ncolA);
} }
static inline void nullProj(const double *B, const int nrowB, const int ncolB, void nullProj(const double *B, const int nrowB, const int ncolB,
double *Q) { double *Q) {
const double minusOne = -1.0; const double minusOne = -1.0;
const double one = 1.0; const double one = 1.0;
@ -286,7 +383,7 @@ static inline void nullProj(const double *B, const int nrowB, const int ncolB,
&one, Q, &nrowB); &one, Q, &nrowB);
} }
static inline void rangePairs(const int from, const int to, int *pairs) { void rangePairs(const int from, const int to, int *pairs) {
int i, j; int i, j;
for (i = from; i < to; ++i) { for (i = from; i < to; ++i) {
for (j = i + 1; j < to; ++j) { for (j = i + 1; j < to; ++j) {
@ -300,7 +397,7 @@ static inline void rangePairs(const int from, const int to, int *pairs) {
// A dence skwe-symmetric rank 2 update. // A dence skwe-symmetric rank 2 update.
// Perform the update // Perform the update
// C := alpha (A * B^T - B * A^T) + beta C // C := alpha (A * B^T - B * A^T) + beta C
static void skewSymRank2k(const int nrow, const int ncol, void skewSymRank2k(const int nrow, const int ncol,
double alpha, const double *A, const double *B, double alpha, const double *A, const double *B,
double beta, double beta,
double *C) { double *C) {
@ -308,7 +405,6 @@ static void skewSymRank2k(const int nrow, const int ncol,
&nrow, &nrow, &ncol, &nrow, &nrow, &ncol,
&alpha, A, &nrow, B, &nrow, &alpha, A, &nrow, B, &nrow,
&beta, C, &nrow); &beta, C, &nrow);
alpha *= -1.0; alpha *= -1.0;
beta = 1.0; beta = 1.0;
F77_NAME(dgemm)("N", "T", F77_NAME(dgemm)("N", "T",
@ -316,32 +412,41 @@ static void skewSymRank2k(const int nrow, const int ncol,
&alpha, B, &nrow, A, &nrow, &alpha, B, &nrow, A, &nrow,
&beta, C, &nrow); &beta, C, &nrow);
} }
// TODO: clarify
static inline double gaussKernel(const double x, const double scale) {
return exp(scale * x * x);
}
// TODO: mutch potential for optimization!!! // TODO: mutch potential for optimization!!!
static inline void weightedYandLoss(const int n, static void weightedYandLoss(const int n,
const double *Y, const double *Y,
const double *vecD, const double *vecD,
const double *vecW, const double *vecW,
const double *colSums, const double *colSums,
double *y1, double *L, double *vecS, double *y1, double *L, double *vecS,
double *const loss) { double *loss) {
int i, j, k, N = n * (n - 1) / 2; int i, j, k, N = n * (n - 1) / 2;
double l; double l;
for (i = 0; i < n; ++i) { for (i = 0; i < n; ++i) {
y1[i] = Y[i] / colSums[i]; y1[i] = Y[i];
L[i] = Y[i] * Y[i] / colSums[i]; L[i] = Y[i] * Y[i];
} }
for (k = j = 0; j < n; ++j) { for (k = j = 0; j < n; ++j) {
for (i = j + 1; i < n; ++i, ++k) { for (i = j + 1; i < n; ++i, ++k) {
y1[i] += Y[j] * vecW[k] / colSums[i]; y1[i] += Y[j] * vecW[k];
y1[j] += Y[i] * vecW[k] / colSums[j]; y1[j] += Y[i] * vecW[k];
L[i] += Y[j] * Y[j] * vecW[k] / colSums[i]; L[i] += Y[j] * Y[j] * vecW[k];
L[j] += Y[i] * Y[i] * vecW[k] / colSums[j]; L[j] += Y[i] * Y[i] * vecW[k];
} }
} }
for (i = 0; i < n; ++i) {
y1[i] /= colSums[i];
L[i] /= colSums[i];
}
l = 0.0; l = 0.0;
for (i = 0; i < n; ++i) { for (i = 0; i < n; ++i) {
l += (L[i] -= y1[i] * y1[i]); l += (L[i] -= y1[i] * y1[i]);
@ -362,17 +467,13 @@ static inline void weightedYandLoss(const int n,
} }
} }
inline double gaussKernel(const double x, const double scale) { void grad(const int n, const int p, const int q,
return exp(scale * x * x);
}
static void gradient(const int n, const int p, const int q,
const double *X, const double *X,
const double *X_diff, const double *X_diff,
const double *Y, const double *Y,
const double *V, const double *V,
const double h, const double h,
double *G, double *const loss) { double *G, double *loss) {
// Number of X_i to X_j not trivial pairs. // Number of X_i to X_j not trivial pairs.
int i, N = (n * (n - 1)) / 2; int i, N = (n * (n - 1)) / 2;
double scale = -0.5 / h; double scale = -0.5 / h;
@ -398,12 +499,12 @@ static void gradient(const int n, const int p, const int q,
rowSquareSums(X_proj, N, p, vecD); rowSquareSums(X_proj, N, p, vecD);
// Apply kernel to distence vector for weights computation. // Apply kernel to distence vector for weights computation.
double *vecW = X_proj; // reuse memory area, no longer needed. double *vecK = X_proj; // reuse memory area, no longer needed.
for (i = 0; i < N; ++i) { for (i = 0; i < N; ++i) {
vecW[i] = gaussKernel(vecD[i], scale); vecK[i] = gaussKernel(vecD[i], scale);
} }
double *colSums = X_proj + N; // still allocated! double *colSums = X_proj + N; // still allocated!
rowSumsSymVec(vecW, n, 1.0, colSums); // rowSums = colSums cause Sym rowSumsSymVec(vecK, n, 1.0, colSums); // rowSums = colSums cause Sym
// compute weighted responces of first end second momontum, aka y1, y2. // compute weighted responces of first end second momontum, aka y1, y2.
double *y1 = X_proj + N + n; double *y1 = X_proj + N + n;
@ -411,7 +512,7 @@ static void gradient(const int n, const int p, const int q,
// Allocate X_diff scaling vector `vecS`, not in `X_proj` mem area because // Allocate X_diff scaling vector `vecS`, not in `X_proj` mem area because
// used symultanious to X_proj in final gradient computation. // used symultanious to X_proj in final gradient computation.
double *vecS = (double*)malloc(N * sizeof(double)); double *vecS = (double*)malloc(N * sizeof(double));
weightedYandLoss(n, Y, vecD, vecW, colSums, y1, L, vecS, loss); weightedYandLoss(n, Y, vecD, vecK, colSums, y1, L, vecS, loss);
// compute the gradient using X_proj for intermidiate scaled X_diff. // compute the gradient using X_proj for intermidiate scaled X_diff.
rowSweep(X_diff, N, p, "*", vecS, X_proj); rowSweep(X_diff, N, p, "*", vecS, X_proj);

View File

@ -6,7 +6,7 @@
#define CVE_MEM_CHUNK_SMALL 1016 #define CVE_MEM_CHUNK_SMALL 1016
#define CVE_MEM_CHUNK_SIZE 2032 #define CVE_MEM_CHUNK_SIZE 2032
static inline void rowSums(const double *A, void rowSums(const double *A,
const int nrow, const int ncol, const int nrow, const int ncol,
double *sum); double *sum);
SEXP R_rowSums(SEXP A) { SEXP R_rowSums(SEXP A) {
@ -17,8 +17,30 @@ SEXP R_rowSums(SEXP A) {
UNPROTECT(1); UNPROTECT(1);
return sums; return sums;
} }
void rowSumsV2(const double *A,
const int nrow, const int ncol,
double *sum);
SEXP R_rowSumsV2(SEXP A) {
SEXP sums = PROTECT(allocVector(REALSXP, nrows(A)));
static inline void colSums(const double *A, rowSumsV2(REAL(A), nrows(A), ncols(A), REAL(sums));
UNPROTECT(1);
return sums;
}
void rowSumsV3(const double *A,
const int nrow, const int ncol,
double *sum);
SEXP R_rowSumsV3(SEXP A) {
SEXP sums = PROTECT(allocVector(REALSXP, nrows(A)));
rowSumsV3(REAL(A), nrows(A), ncols(A), REAL(sums));
UNPROTECT(1);
return sums;
}
void colSums(const double *A,
const int nrow, const int ncol, const int nrow, const int ncol,
double *sum); double *sum);
SEXP R_colSums(SEXP A) { SEXP R_colSums(SEXP A) {
@ -30,7 +52,7 @@ SEXP R_colSums(SEXP A) {
return sums; return sums;
} }
static inline void rowSquareSums(const double*, const int, const int, double*); void rowSquareSums(const double*, const int, const int, double*);
SEXP R_rowSquareSums(SEXP A) { SEXP R_rowSquareSums(SEXP A) {
SEXP result = PROTECT(allocVector(REALSXP, nrows(A))); SEXP result = PROTECT(allocVector(REALSXP, nrows(A)));
@ -40,7 +62,7 @@ SEXP R_rowSquareSums(SEXP A) {
return result; return result;
} }
static inline void rowSumsSymVec(const double *Avec, const int nrow, void rowSumsSymVec(const double *Avec, const int nrow,
const double diag, const double diag,
double *sum); double *sum);
SEXP R_rowSumsSymVec(SEXP Avec, SEXP nrow, SEXP diag) { SEXP R_rowSumsSymVec(SEXP Avec, SEXP nrow, SEXP diag) {
@ -52,7 +74,7 @@ SEXP R_rowSumsSymVec(SEXP Avec, SEXP nrow, SEXP diag) {
return sum; return sum;
} }
static void rowSweep(const double *A, const int nrow, const int ncol, void rowSweep(const double *A, const int nrow, const int ncol,
const char* op, const char* op,
const double *v, // vector of length nrow const double *v, // vector of length nrow
double *C); double *C);
@ -77,7 +99,17 @@ SEXP R_transpose(SEXP A) {
return T; return T;
} }
static inline void matrixprod(const double *A, const int nrowA, const int ncolA, void sympMV(const double* vecA, const int nrow, const double* x, double* y);
SEXP R_sympMV(SEXP vecA, SEXP x) {
SEXP y = PROTECT(allocVector(REALSXP, length(x)));
sympMV(REAL(vecA), length(x), REAL(x), REAL(y));
UNPROTECT(1); /* y */
return y;
}
void matrixprod(const double *A, const int nrowA, const int ncolA,
const double *B, const int nrowB, const int ncolB, const double *B, const int nrowB, const int ncolB,
double *C); double *C);
SEXP R_matrixprod(SEXP A, SEXP B) { SEXP R_matrixprod(SEXP A, SEXP B) {
@ -91,7 +123,7 @@ SEXP R_matrixprod(SEXP A, SEXP B) {
return C; return C;
} }
static inline void crossprod(const double* A, const int nrowA, const int ncolA, void crossprod(const double* A, const int nrowA, const int ncolA,
const double* B, const int ncolB, const int nrowB, const double* B, const int ncolB, const int nrowB,
double* C); double* C);
SEXP R_crossprod(SEXP A, SEXP B) { SEXP R_crossprod(SEXP A, SEXP B) {
@ -105,7 +137,7 @@ SEXP R_crossprod(SEXP A, SEXP B) {
return C; return C;
} }
static void skewSymRank2k(const int n, const int k, void skewSymRank2k(const int n, const int k,
double alpha, const double *A, const double *B, double alpha, const double *A, const double *B,
double beta, double beta,
double *C); double *C);
@ -121,7 +153,7 @@ SEXP R_skewSymRank2k(SEXP A, SEXP B, SEXP alpha, SEXP beta) {
return C; return C;
} }
static inline void nullProj(const double* B, const int nrowB, const int ncolB, void nullProj(const double* B, const int nrowB, const int ncolB,
double* Q); double* Q);
SEXP R_nullProj(SEXP B) { SEXP R_nullProj(SEXP B) {
SEXP Q = PROTECT(allocMatrix(REALSXP, nrows(B), nrows(B))); SEXP Q = PROTECT(allocMatrix(REALSXP, nrows(B), nrows(B)));
@ -132,7 +164,7 @@ SEXP R_nullProj(SEXP B) {
return Q; return Q;
} }
static inline void rangePairs(const int from, const int to, int *pairs); void rangePairs(const int from, const int to, int *pairs);
SEXP R_rangePairs(SEXP from, SEXP to) { SEXP R_rangePairs(SEXP from, SEXP to) {
int start = asInteger(from); int start = asInteger(from);
int end = asInteger(to) + 1; int end = asInteger(to) + 1;
@ -145,20 +177,20 @@ SEXP R_rangePairs(SEXP from, SEXP to) {
return out; return out;
} }
static void gradient(const int n, const int p, const int q, void grad(const int n, const int p, const int q,
const double *X, const double *X,
const double *X_diff, const double *X_diff,
const double *Y, const double *Y,
const double *V, const double *V,
const double h, const double h,
double *G, double *const loss); double *G, double *const loss);
SEXP R_gradient(SEXP X, SEXP X_diff, SEXP Y, SEXP V, SEXP h) { SEXP R_grad(SEXP X, SEXP X_diff, SEXP Y, SEXP V, SEXP h) {
int N = (nrows(X) * (nrows(X) - 1)) / 2; int N = (nrows(X) * (nrows(X) - 1)) / 2;
SEXP G = PROTECT(allocMatrix(REALSXP, nrows(V), ncols(V))); SEXP G = PROTECT(allocMatrix(REALSXP, nrows(V), ncols(V)));
SEXP loss = PROTECT(allocVector(REALSXP, 1)); SEXP loss = PROTECT(allocVector(REALSXP, 1));
gradient(nrows(X), ncols(X), ncols(V), grad(nrows(X), ncols(X), ncols(V),
REAL(X), REAL(X_diff), REAL(Y), REAL(V), *REAL(h), REAL(X), REAL(X_diff), REAL(Y), REAL(V), *REAL(h),
REAL(G), REAL(loss)); REAL(G), REAL(loss));

View File

@ -16,7 +16,7 @@ subspace.dist <- function(B1, B2){
} }
# Number of simulations # Number of simulations
SIM.NR <- 20 SIM.NR <- 50
# maximal number of iterations in curvilinear search algorithm # maximal number of iterations in curvilinear search algorithm
MAXIT <- 50 MAXIT <- 50
# number of arbitrary starting values for curvilinear optimization # number of arbitrary starting values for curvilinear optimization
@ -24,7 +24,7 @@ ATTEMPTS <- 10
# set names of datasets # set names of datasets
dataset.names <- c("M1", "M2", "M3", "M4", "M5") dataset.names <- c("M1", "M2", "M3", "M4", "M5")
# Set used CVE method # Set used CVE method
methods <- c("simple") # c("legacy", "simple", "sgd", "linesearch") methods <- c("simple") # c("legacy", "simple", "linesearch", "sgd")
if ("legacy" %in% methods) { if ("legacy" %in% methods) {
# Source legacy code (but only if needed) # Source legacy code (but only if needed)
@ -43,7 +43,9 @@ log.nr <- length(list.files('tmp/', pattern = '.*\\.log'))
file <- file.path('tmp', paste0('test', log.nr, '.log')) file <- file.path('tmp', paste0('test', log.nr, '.log'))
cat('dataset\tmethod\terror\ttime\n', sep = '', file = file) cat('dataset\tmethod\terror\ttime\n', sep = '', file = file)
# Open a new pdf device for plotting into (do not overwrite existing ones) # Open a new pdf device for plotting into (do not overwrite existing ones)
pdf(file.path('tmp', paste0('test', log.nr, '.pdf'))) path <- paste0('test', log.nr, '.pdf')
pdf(file.path('tmp', path))
cat('Plotting to file:', path, '\n')
# only for telling user (to stdout) # only for telling user (to stdout)
count <- 0 count <- 0