2
0
Fork 0

update: documentation

This commit is contained in:
Daniel Kapla 2021-03-05 14:52:45 +01:00
parent 8e95902e79
commit 2344120dd9
20 changed files with 323 additions and 500 deletions

1
.gitattributes vendored Normal file
View File

@ -0,0 +1 @@
*.pdf filter=lfs diff=lfs merge=lfs -text

16
.gitignore vendored Normal file
View File

@ -0,0 +1,16 @@
simulations/results/*
literature/*
CVE/src/*.o
CVE/src/*.so
CVE/src/*.dll
*.tgz
*.tar.xz
*.tar.gz
*.zip
*.so
*.Rcheck/*
tmp/*
wip/*

View File

@ -1,12 +1,12 @@
Package: CVE
Type: Package
Title: Conditional Variance Estimator for Sufficient Dimension Reduction
Version: 0.2
Date: 2019-12-20
Version: 0.3
Date: 2021-03-04
Author: Daniel Kapla <daniel@kapla.at>, Lukas Fertl <lukas.fertl@chello.at>
Maintainer: Daniel Kapla <daniel@kapla.at>
Description: Implementation of the Conditional Variance Estimation (CVE) method.
License: GPL-3
Encoding: UTF-8
Imports: stats,graphics,mda
RoxygenNote: 6.1.1
Imports: stats,mda
RoxygenNote: 7.0.2

View File

@ -2,7 +2,6 @@
S3method(coef,cve)
S3method(directions,cve)
S3method(plot,cve)
S3method(predict,cve)
S3method(summary,cve)
export(cve)
@ -14,10 +13,6 @@ export(null)
export(predict_dim)
export(rStiefel)
import(stats)
importFrom(graphics,boxplot)
importFrom(graphics,lines)
importFrom(graphics,plot)
importFrom(graphics,points)
importFrom(mda,mars)
importFrom(stats,model.frame)
importFrom(stats,rbinom)

View File

@ -3,7 +3,7 @@
#' Conditional Variance Estimation (CVE) is a novel sufficient dimension
#' reduction (SDR) method for regressions satisfying \eqn{E(Y|X) = E(Y|B'X)},
#' where \eqn{B'X} is a lower dimensional projection of the predictors and
#' \eqn{Y} is a univariate responce. CVE,
#' \eqn{Y} is a univariate response. CVE,
#' similarly to its main competitor, the mean average variance estimation
#' (MAVE), is not based on inverse regression, and does not require the
#' restrictive linearity and constant variance conditions of moment based SDR
@ -11,7 +11,9 @@
#' continuous predictors and link function. Let \eqn{X} be a real
#' \eqn{p}-dimensional covariate vector. We assume that the dependence of
#' \eqn{Y} and \eqn{X} is modelled by
#'
#' \deqn{Y = g(B'X) + \epsilon}
#'
#' where \eqn{X} is independent of \eqn{\epsilon} with positive definite
#' variance-covariance matrix \eqn{Var(X) = \Sigma_X}. \eqn{\epsilon} is a mean
#' zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g}
@ -20,9 +22,28 @@
#' a real \eqn{p \times k}{p x k} matrix of rank \eqn{k \leq p}{k <= p}.
#' Without loss of generality \eqn{B} is assumed to be orthonormal.
#'
#' Further, the extended Ensemble Conditional Variance Estimation (ECVE) is
#' implemented which is a SDR method in regressions with continuous response and
#' predictors. ECVE applies to general non-additive error regression models.
#'
#' \deqn{Y = g(B'X, \epsilon)}
#'
#' It operates under the assumption that the predictors can be replaced by a
#' lower dimensional projection without loss of information.It is a
#' semiparametric forward regression model based exhaustive sufficient dimension
#' reduction estimation method that is shown to be consistent under mild
#' assumptions.
#'
#' @author Daniel Kapla, Lukas Fertl, Bura Efstathia
#' @references Fertl, L. and Bura, E. (2019), Conditional Variance
#' Estimation for Sufficient Dimension Reduction. Working Paper.
#'
#' @references
#' [1] Fertl, L. and Bura, E. (2021), Conditional Variance
#' Estimation for Sufficient Dimension Reduction.
#' arXiv:2102.08782
#'
#' [2] Fertl, L. and Bura, E. (2021), Ensemble Conditional Variance
#' Estimation for Sufficient Dimension Reduction.
#' arXiv:2102.13435
#'
#' @docType package
#' @useDynLib CVE, .registration = TRUE
@ -30,43 +51,25 @@
#' Conditional Variance Estimator (CVE).
#'
#' @description
#' This is the main function in the \code{CVE} package. It creates objects of
#' class \code{"cve"} to estimate the mean subspace. Helper functions that
#' require a \code{"cve"} object can then be applied to the output from this
#' function.
#'
#' @param formula an object of class \code{"formula"} which is a symbolic
#' description of the model to be fitted like \eqn{Y\sim X}{Y ~ X} where
#' \eqn{Y} is a \eqn{n}-dimensional vector of the response variable and
#' \eqn{X} is a \eqn{n\times p}{n x p} matrix of the predictors.
#' @param data an optional data frame, containing the data for the formula if
#' supplied like \code{data <- data.frame(Y, X)} with dimension
#' \eqn{n \times (p + 1)}{n x (p + 1)}. By default the variables are taken from
#' the environment from which \code{cve} is called.
#' @param method This character string specifies the method of fitting. The
#' options are
#' \itemize{
#' \item "simple" implementation,
#' \item "weighted" variation with adaptive weighting of slices.
#' }
#' see Fertl, L. and Bura, E. (2019).
#' @param max.dim upper bounds for \code{k}, (ignored if \code{k} is supplied).
#' @param ... optional parameters passed on to \code{cve.call}.
#'
#'
#' Conditional Variance Estimation (CVE) is a sufficient dimension reduction
#' (SDR) method for regressions studying \eqn{E(Y|X)}, the conditional
#' expectation of a response \eqn{Y} given a set of predictors \eqn{X}. This
#' function provides methods for estimating the dimension and the subspace
#' spanned by the columns of a \eqn{p\times k}{p x k} matrix \eqn{B} of minimal
#' rank \eqn{k} such that
#' \deqn{%
#' E(Y|X) = E(Y|B'X) %
#' }
#'
#' \deqn{E(Y|X) = E(Y|B'X)}
#'
#' or, equivalently,
#' \deqn{%
#' Y = g(B'X) + \epsilon %
#' }
#'
#' \deqn{Y = g(B'X) + \epsilon}
#'
#' where \eqn{X} is independent of \eqn{\epsilon} with positive definite
#' variance-covariance matrix \eqn{Var(X) = \Sigma_X}. \eqn{\epsilon} is a mean
#' zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g}
@ -81,6 +84,41 @@
#' The primary output from this method is a set of orthonormal vectors,
#' \eqn{\hat{B}}{Bhat}, whose span estimates \eqn{span(B)}.
#'
#' The method central implements the Ensemble Conditional Variance Estimation
#' (ECVE) as described in [2]. It augments the CVE method by applying an
#' ensemble of functions (parameter \code{func_list}) to the response to
#' estimate the central subspace. This corresponds to the generalization
#'
#' \deqn{F(Y|X) = F(Y|B'X)}
#'
#' or, equivalently,
#'
#' \deqn{Y = g(B'X, \epsilon)}
#'
#' where \eqn{F} is the conditional cumulative distribution function.
#'
#' @param formula an object of class \code{"formula"} which is a symbolic
#' description of the model to be fitted like \eqn{Y\sim X}{Y ~ X} where
#' \eqn{Y} is a \eqn{n}-dimensional vector of the response variable and
#' \eqn{X} is a \eqn{n\times p}{n x p} matrix of the predictors.
#' @param data an optional data frame, containing the data for the formula if
#' supplied like \code{data <- data.frame(Y, X)} with dimension
#' \eqn{n \times (p + 1)}{n x (p + 1)}. By default the variables are taken from
#' the environment from which \code{cve} is called.
#' @param method This character string specifies the method of fitting. The
#' options are
#' \itemize{
#' \item \code{"mean"} method to estimate the mean subspace, see [1].
#' \item \code{"central"} ensemble method to estimate the central subspace,
#' see [2].
#' \item \code{"weighted.mean"} variation of \code{"mean"} method with
#' adaptive weighting of slices, see [1].
#' \item \code{"weighted.central"} variation of \code{"central"} method with
#' adaptive weighting of slices, see [2].
#' }
#' @param max.dim upper bounds for \code{k}, (ignored if \code{k} is supplied).
#' @param ... optional parameters passed on to \code{\link{cve.call}}.
#'
#' @return an S3 object of class \code{cve} with components:
#' \describe{
#' \item{X}{design matrix of predictor vector used for calculating
@ -108,40 +146,42 @@
#'
#' @examples
#' # set dimensions for simulation model
#' p <- 8
#' p <- 5
#' k <- 2
#' # create B for simulation
#' b1 <- rep(1 / sqrt(p), p)
#' b2 <- (-1)^seq(1, p) / sqrt(p)
#' B <- cbind(b1, b2)
#' # sample size
#' n <- 200
#' n <- 100
#' set.seed(21)
#'
#' # creat predictor data x ~ N(0, I_p)
#' x <- matrix(rnorm(n * p), n, p)
#' # simulate response variable
#' # y = f(B'x) + err
#' # with f(x1, x2) = x1^2 + 2 * x2 and err ~ N(0, 0.25^2)
#' y <- (x %*% b1)^2 + 2 * (x %*% b2) + 0.25 * rnorm(100)
#' # calculate cve with method 'mean' for k unknown in 1, ..., 4
#' cve.obj.s <- cve(y ~ x, max.dim = 4) # default method 'mean'
#' y <- (x %*% b1)^2 + 2 * (x %*% b2) + 0.25 * rnorm(n)
#'
#' # calculate cve with method 'mean' for k unknown in 1, ..., 3
#' cve.obj.s <- cve(y ~ x, max.dim = 2) # default method 'mean'
#' # calculate cve with method 'weighed' for k = 2
#' cve.obj.w <- cve(y ~ x, k = 2, method = 'weighted.mean')
#' # estimate dimension from cve.obj.s
#' khat <- predict_dim(cve.obj.s)$k
#' # get cve-estimate for B with dimensions (p, k = khat)
#' B2 <- coef(cve.obj.s, k = khat)
#' B2 <- coef(cve.obj.s, k = 2)
#'
#' # get projected X data (same as cve.obj.s$X %*% B2)
#' proj.X <- directions(cve.obj.s, k = khat)
#' proj.X <- directions(cve.obj.s, k = 2)
#' # plot y against projected data
#' plot(proj.X[, 1], y)
#' plot(proj.X[, 2], y)
#'
#' # creat 10 new x points and y according to model
#' x.new <- matrix(rnorm(10 * p), 10, p)
#' y.new <- (x.new %*% b1)^2 + 2 * (x.new %*% b2) + 0.25 * rnorm(10)
#' # predict y.new
#' yhat <- predict(cve.obj.s, x.new, khat)
#' yhat <- predict(cve.obj.s, x.new, 2)
#' plot(y.new, yhat)
#'
#' # projection matrix on span(B)
#' # same as B %*% t(B) since B is semi-orthogonal
#' PB <- B %*% solve(t(B) %*% B) %*% t(B)
@ -158,8 +198,15 @@
#'
#' @seealso For a detailed description of \code{formula} see
#' \code{\link{formula}}.
#' @references Fertl, L. and Bura, E. (2019), Conditional Variance
#' Estimation for Sufficient Dimension Reduction. Working Paper.
#'
#' @references
#' [1] Fertl, L. and Bura, E. (2021), Conditional Variance
#' Estimation for Sufficient Dimension Reduction.
#' arXiv:2102.08782
#'
#' [2] Fertl, L. and Bura, E. (2021), Ensemble Conditional Variance
#' Estimation for Sufficient Dimension Reduction.
#' arXiv:2102.13435
#'
#' @importFrom stats model.frame
#' @export
@ -191,19 +238,27 @@ cve <- function(formula, data, method = "mean", max.dim = 10L, ...) {
#' @inherit cve description
#'
#' @param X Design predictor matrix.
#' @param Y \eqn{n}-dimensional vector of responces.
#' @param Y \eqn{n}-dimensional vector of responses.
#' @param h bandwidth or function to estimate bandwidth, defaults to internaly
#' estimated bandwidth.
#' @param nObs parameter for choosing bandwidth \code{h} using
#' \code{\link{estimate.bandwidth}} (ignored if \code{h} is supplied).
#' @param method specifies the CVE method variation as one of
#' @param method This character string specifies the method of fitting. The
#' options are
#' \itemize{
#' \item "simple" exact implementation as described in the paper listed
#' below.
#' \item "weighted" variation with addaptive weighting of slices.
#' \item \code{"mean"} method to estimate the mean subspace, see [1].
#' \item \code{"central"} ensemble method to estimate the central subspace,
#' see [2].
#' \item \code{"weighted.mean"} variation of \code{"mean"} method with
#' adaptive weighting of slices, see [1].
#' \item \code{"weighted.central"} variation of \code{"central"} method with
#' adaptive weighting of slices, see [2].
#' }
#' @param func_list a list of functions applied to \code{Y} to form the ensamble
#' CVE for central sub-space estimation.
#' @param func_list a list of functions applied to \code{Y} used by ECVE
#' (see [2]) for central subspace estimation. The default ensemble are
#' indicator functions of the \eqn{[0, 10], (10, 20], ..., (90, 100]}
#' percent response quantiles. (only relevant if \code{method} is
#' \code{"central"} or \code{"weighted.central"}, ignored otherwise)
#' @param k Dimension of lower dimensional projection, if \code{k} is given
#' only the specified dimension \code{B} matrix is estimated.
#' @param min.dim lower bounds for \code{k}, (ignored if \code{k} is supplied).
@ -214,6 +269,9 @@ cve <- function(formula, data, method = "mean", max.dim = 10L, ...) {
#' @param attempts If \code{V.init} not supplied, the optimization is carried
#' out \code{attempts} times with starting values drawn from the invariant
#' measure on the Stiefel manifold (see \code{\link{rStiefel}}).
#' @param nr.proj The number of projection used for projective resampling for
#' multivariate response \eqn{Y} (under active development, ignored for
#' univariate response).
#' @param momentum number of \eqn{[0, 1)} giving the ration of momentum for
#' eucledian gradient update with a momentum term. \code{momentum = 0}
#' corresponds to normal gradient descend.
@ -230,6 +288,7 @@ cve <- function(formula, data, method = "mean", max.dim = 10L, ...) {
#' computation).
#'
#' @inherit cve return
#' @inherit cve references
#'
#' @examples
#' # create B for simulation (k = 1)
@ -262,7 +321,7 @@ cve.call <- function(X, Y,
momentum = 0.0, tau = 1.0, tol = 1e-3,
slack = 0.0, gamma = 0.5,
V.init = NULL,
max.iter = 50L, attempts = 10L, nr.proj = 500L,
max.iter = 50L, attempts = 10L, nr.proj = 1L,
logger = NULL
) {
# Determine method with partial matching (shortcuts: "Weight" -> "weighted")

View File

@ -14,7 +14,7 @@
#' # set dimensions for simulation model
#' p <- 8 # sample dimension
#' k <- 2 # real dimension of SDR subspace
#' n <- 200 # samplesize
#' n <- 100 # samplesize
#' # create B for simulation
#' b1 <- rep(1 / sqrt(p), p)
#' b2 <- (-1)^seq(1, p) / sqrt(p)
@ -25,10 +25,10 @@
#' x <- matrix(rnorm(n * p), n, p)
#' # simulate response variable
#' # y = f(B'x) + err
#' # with f(x1, x2) = x1^2 + 2 * x2 and err ~ N(0, 0.25^2)
#' y <- (x %*% b1)^2 + 2 * (x %*% b2) + 0.25 * rnorm(100)
#' # calculate cve for k = 1, ..., 5
#' cve.obj <- cve(y ~ x, max.dim = 5)
#' # with f(x1, x2) = x1^2 + 2 * x2 and err ~ N(0, 0.1^2)
#' y <- (x %*% b1)^2 + 2 * (x %*% b2) + 0.1 * rnorm(100)
#' # calculate cve for k = 2, 3
#' cve.obj <- cve(y ~ x, min.dim = 2, max.dim = 3)
#' # get cve-estimate for B with dimensions (p, k = 2)
#' B2 <- coef(cve.obj, k = 2)
#'

View File

@ -200,8 +200,10 @@ rlaplace <- function(n = 1, mu = 0, sd = 1) {
#' location 0, shape-parameter 1, and the scale-parameter is chosen such that
#' \eqn{Var(\epsilon) = 0.25}.
#'
#' @references Fertl, L. and Bura, E. (2019), Conditional Variance
#' Estimation for Sufficient Dimension Reduction. Working Paper.
#' @references
#' Fertl, L. and Bura, E. (2021), Conditional Variance
#' Estimation for Sufficient Dimension Reduction.
#' arXiv:2102.08782
#'
#' @import stats
#' @importFrom stats rnorm rbinom

View File

@ -1,62 +0,0 @@
#' Elbow plot of the loss function.
#'
#' Boxplots of the output \code{L} from \code{\link{cve}} over \code{k} from
#' \code{min.dim} to \code{max.dim}. For given \code{k}, \code{L} corresponds
#' to \eqn{L_n(V, X_i)} where \eqn{V} is the minimizer of \eqn{L_n(V)} where
#' \eqn{V} is an element of a Stiefel manifold (see
#' Fertl, L. and Bura, E. (2019)).
#'
#' @param x an object of class \code{"cve"}, usually, a result of a call to
#' \code{\link{cve}} or \code{\link{cve.call}}.
#' @param ... Pass through parameters to [\code{\link{plot}}] and
#' [\code{\link{lines}}]
#'
#' @examples
#' # create B for simulation
#' B <- cbind(rep(1, 6), (-1)^seq(6)) / sqrt(6)
#'
#' set.seed(21)
#' # creat predictor data x ~ N(0, I_p)
#' X <- matrix(rnorm(600), 100)
#'
#' # simulate response variable
#' # y = f(B'x) + err
#' # with f(x1, x2) = x1^2 + 2 x2 and err ~ N(0, 0.25^2)
#' Y <- (X %*% B[, 1])^2 + 2 * X %*% B[, 2] + rnorm(100, 0, .1)
#'
#' # Create bandwidth estimation function
#' estimate.bandwidth <- function(X, k, nObs) {
#' n <- nrow(X)
#' p <- ncol(X)
#' X_c <- scale(X, center = TRUE, scale = FALSE)
#' 2 * qchisq((nObs - 1) / (n - 1), k) * sum(X_c^2) / (n * p)
#' }
#' # calculate cve with method 'simple' for k = min.dim,...,max.dim
#' cve.obj.simple <- cve(Y ~ X, h = estimate.bandwidth, nObs = sqrt(nrow(X)))
#'
#' # elbow plot
#' plot(cve.obj.simple)
#'
#' @references Fertl, L. and Bura, E. (2019), Conditional Variance
#' Estimation for Sufficient Dimension Reduction. Working Paper.
#'
#' @seealso see \code{\link{par}} for graphical parameters to pass through
#' as well as \code{\link{plot}}, the standard plot utility.
#' @method plot cve
#' @importFrom graphics plot lines points boxplot
#' @export
plot.cve <- function(x, ...) {
L <- c()
k <- c()
for (dr.k in x$res) {
if (class(dr.k) == 'cve.k') {
k <- c(k, as.character(dr.k$k))
L <- c(L, dr.k$L)
}
}
L <- matrix(L, ncol = length(k)) / var(x$Y)
boxplot(L, main = "elbow plot",
xlab = "SDR dimension",
ylab = "Sample loss distribution",
names = k)
}

View File

@ -1,7 +1,7 @@
#' Predict method for CVE Fits.
#'
#' Predict response using projected data. The forward model \eqn{g(B' X)} is
#' estimated with \code{\link{mars}} in the \code{\pkg{mda}} package.
#' estimated with \code{\link{mars}} in the \pkg{mda} package.
#'
#' @param object an object of class \code{"cve"}, usually, a result of a call to
#' \code{\link{cve}} or \code{\link{cve.call}}.

Binary file not shown.

View File

@ -9,7 +9,7 @@
Conditional Variance Estimation (CVE) is a novel sufficient dimension
reduction (SDR) method for regressions satisfying \eqn{E(Y|X) = E(Y|B'X)},
where \eqn{B'X} is a lower dimensional projection of the predictors and
\eqn{Y} is a univariate responce. CVE,
\eqn{Y} is a univariate response. CVE,
similarly to its main competitor, the mean average variance estimation
(MAVE), is not based on inverse regression, and does not require the
restrictive linearity and constant variance conditions of moment based SDR
@ -17,7 +17,10 @@ methods. CVE is data-driven and applies to additive error regressions with
continuous predictors and link function. Let \eqn{X} be a real
\eqn{p}-dimensional covariate vector. We assume that the dependence of
\eqn{Y} and \eqn{X} is modelled by
}
\details{
\deqn{Y = g(B'X) + \epsilon}
where \eqn{X} is independent of \eqn{\epsilon} with positive definite
variance-covariance matrix \eqn{Var(X) = \Sigma_X}. \eqn{\epsilon} is a mean
zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g}
@ -25,10 +28,27 @@ is an unknown, continuous non-constant function,
and \eqn{B = (b_1, ..., b_k)} is
a real \eqn{p \times k}{p x k} matrix of rank \eqn{k \leq p}{k <= p}.
Without loss of generality \eqn{B} is assumed to be orthonormal.
Further, the extended Ensemble Conditional Variance Estimation (ECVE) is
implemented which is a SDR method in regressions with continuous response and
predictors. ECVE applies to general non-additive error regression models.
\deqn{Y = g(B'X, \epsilon)}
It operates under the assumption that the predictors can be replaced by a
lower dimensional projection without loss of information.It is a
semiparametric forward regression model based exhaustive sufficient dimension
reduction estimation method that is shown to be consistent under mild
assumptions.
}
\references{
Fertl, L. and Bura, E. (2019), Conditional Variance
Estimation for Sufficient Dimension Reduction. Working Paper.
[1] Fertl, L. and Bura, E. (2021), Conditional Variance
Estimation for Sufficient Dimension Reduction.
arXiv:2102.08782
[2] Fertl, L. and Bura, E. (2021), Ensemble Conditional Variance
Estimation for Sufficient Dimension Reduction.
arXiv:2102.13435
}
\author{
Daniel Kapla, Lukas Fertl, Bura Efstathia

View File

@ -25,7 +25,7 @@ cve-estimate of \eqn{B} with dimension \eqn{p\times k}{p x k}.
# set dimensions for simulation model
p <- 8 # sample dimension
k <- 2 # real dimension of SDR subspace
n <- 200 # samplesize
n <- 100 # samplesize
# create B for simulation
b1 <- rep(1 / sqrt(p), p)
b2 <- (-1)^seq(1, p) / sqrt(p)
@ -36,10 +36,10 @@ set.seed(21)
x <- matrix(rnorm(n * p), n, p)
# simulate response variable
# y = f(B'x) + err
# with f(x1, x2) = x1^2 + 2 * x2 and err ~ N(0, 0.25^2)
y <- (x \%*\% b1)^2 + 2 * (x \%*\% b2) + 0.25 * rnorm(100)
# calculate cve for k = 1, ..., 5
cve.obj <- cve(y ~ x, max.dim = 5)
# with f(x1, x2) = x1^2 + 2 * x2 and err ~ N(0, 0.1^2)
y <- (x \%*\% b1)^2 + 2 * (x \%*\% b2) + 0.1 * rnorm(100)
# calculate cve for k = 2, 3
cve.obj <- cve(y ~ x, min.dim = 2, max.dim = 3)
# get cve-estimate for B with dimensions (p, k = 2)
B2 <- coef(cve.obj, k = 2)

View File

@ -4,7 +4,7 @@
\alias{cve}
\title{Conditional Variance Estimator (CVE).}
\usage{
cve(formula, data, method = "simple", max.dim = 10L, ...)
cve(formula, data, method = "mean", max.dim = 10L, ...)
}
\arguments{
\item{formula}{an object of class \code{"formula"} which is a symbolic
@ -20,42 +20,17 @@ the environment from which \code{cve} is called.}
\item{method}{This character string specifies the method of fitting. The
options are
\itemize{
\item "simple" implementation,
\item "weighted" variation with adaptive weighting of slices.
}
see Fertl, L. and Bura, E. (2019).}
\item "mean" method to estimate the mean subspace, see [1].
\item "central" ensemble method to estimate the central subspace, see [2].
\item "weighted.mean" variation of `"mean"` method with adaptive weighting
of slices, see [1].
\item "weighted.central" variation of `"central"` method with adaptive
weighting of slices, see [2].
}}
\item{max.dim}{upper bounds for \code{k}, (ignored if \code{k} is supplied).}
\item{...}{optional parameters passed on to \code{cve.call}.
Conditional Variance Estimation (CVE) is a sufficient dimension reduction
(SDR) method for regressions studying \eqn{E(Y|X)}, the conditional
expectation of a response \eqn{Y} given a set of predictors \eqn{X}. This
function provides methods for estimating the dimension and the subspace
spanned by the columns of a \eqn{p\times k}{p x k} matrix \eqn{B} of minimal
rank \eqn{k} such that
\deqn{%
E(Y|X) = E(Y|B'X) %
}
or, equivalently,
\deqn{%
Y = g(B'X) + \epsilon %
}
where \eqn{X} is independent of \eqn{\epsilon} with positive definite
variance-covariance matrix \eqn{Var(X) = \Sigma_X}. \eqn{\epsilon} is a mean
zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g}
is an unknown, continuous non-constant function, and \eqn{B = (b_1,..., b_k)}
is a real \eqn{p \times k}{p x k} matrix of rank \eqn{k \leq p}{k <= p}.
Both the dimension \eqn{k} and the subspace \eqn{span(B)} are unknown. The
CVE method makes very few assumptions.
A kernel matrix \eqn{\hat{B}}{Bhat} is estimated such that the column space
of \eqn{\hat{B}}{Bhat} should be close to the mean subspace \eqn{span(B)}.
The primary output from this method is a set of orthonormal vectors,
\eqn{\hat{B}}{Bhat}, whose span estimates \eqn{span(B)}.}
\item{...}{optional parameters passed on to \code{\link{cve.call}}.}
}
\value{
an S3 object of class \code{cve} with components:
@ -88,61 +63,108 @@ This is the main function in the \code{CVE} package. It creates objects of
class \code{"cve"} to estimate the mean subspace. Helper functions that
require a \code{"cve"} object can then be applied to the output from this
function.
Conditional Variance Estimation (CVE) is a sufficient dimension reduction
(SDR) method for regressions studying \eqn{E(Y|X)}, the conditional
expectation of a response \eqn{Y} given a set of predictors \eqn{X}. This
function provides methods for estimating the dimension and the subspace
spanned by the columns of a \eqn{p\times k}{p x k} matrix \eqn{B} of minimal
rank \eqn{k} such that
\deqn{E(Y|X) = E(Y|B'X)}
or, equivalently,
\deqn{Y = g(B'X) + \epsilon}
where \eqn{X} is independent of \eqn{\epsilon} with positive definite
variance-covariance matrix \eqn{Var(X) = \Sigma_X}. \eqn{\epsilon} is a mean
zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g}
is an unknown, continuous non-constant function, and \eqn{B = (b_1,..., b_k)}
is a real \eqn{p \times k}{p x k} matrix of rank \eqn{k \leq p}{k <= p}.
Both the dimension \eqn{k} and the subspace \eqn{span(B)} are unknown. The
CVE method makes very few assumptions.
A kernel matrix \eqn{\hat{B}}{Bhat} is estimated such that the column space
of \eqn{\hat{B}}{Bhat} should be close to the mean subspace \eqn{span(B)}.
The primary output from this method is a set of orthonormal vectors,
\eqn{\hat{B}}{Bhat}, whose span estimates \eqn{span(B)}.
The method central implements the Ensemble Conditional Variance Estimation
(ECVE) as described in [2]. It augments the CVE method by applying an
ensemble of functions (parameter \code{func_list}) to the response to
estimate the central subspace. This corresponds to the generalization
\deqn{F(Y|X) = F(Y|B'X)}
or, equivalently,
\deqn{Y = g(B'X, \epsilon)}
where \eqn{F} is the conditional cumulative distribution function.
}
\examples{
# set dimensions for simulation model
p <- 8
p <- 5
k <- 2
# create B for simulation
b1 <- rep(1 / sqrt(p), p)
b2 <- (-1)^seq(1, p) / sqrt(p)
B <- cbind(b1, b2)
# sample size
n <- 200
n <- 100
set.seed(21)
# creat predictor data x ~ N(0, I_p)
x <- matrix(rnorm(n * p), n, p)
# simulate response variable
# y = f(B'x) + err
# with f(x1, x2) = x1^2 + 2 * x2 and err ~ N(0, 0.25^2)
y <- (x \%*\% b1)^2 + 2 * (x \%*\% b2) + 0.25 * rnorm(100)
# calculate cve with method 'simple' for k unknown in 1, ..., 4
cve.obj.s <- cve(y ~ x, max.dim = 4) # default method 'simple'
y <- (x \%*\% b1)^2 + 2 * (x \%*\% b2) + 0.25 * rnorm(n)
# calculate cve with method 'mean' for k unknown in 1, ..., 3
cve.obj.s <- cve(y ~ x, max.dim = 2) # default method 'mean'
# calculate cve with method 'weighed' for k = 2
cve.obj.w <- cve(y ~ x, k = 2, method = 'weighted.mean')
# estimate dimension from cve.obj.s
khat <- predict_dim(cve.obj.s)$k
# get cve-estimate for B with dimensions (p, k = khat)
B2 <- coef(cve.obj.s, k = khat)
B2 <- coef(cve.obj.s, k = 2)
# get projected X data (same as cve.obj.s$X \%*\% B2)
proj.X <- directions(cve.obj.s, k = khat)
proj.X <- directions(cve.obj.s, k = 2)
# plot y against projected data
plot(proj.X[, 1], y)
plot(proj.X[, 2], y)
# creat 10 new x points and y according to model
x.new <- matrix(rnorm(10 * p), 10, p)
y.new <- (x.new \%*\% b1)^2 + 2 * (x.new \%*\% b2) + 0.25 * rnorm(10)
# predict y.new
yhat <- predict(cve.obj.s, x.new, khat)
yhat <- predict(cve.obj.s, x.new, 2)
plot(y.new, yhat)
# projection matrix on span(B)
# same as B \%*\% t(B) since B is semi-orthogonal
PB <- B \%*\% solve(t(B) \%*\% B) \%*\% t(B)
# cve estimates for B with simple and weighted method
# cve estimates for B with mean and weighted method
B.s <- coef(cve.obj.s, k = 2)
B.w <- coef(cve.obj.w, k = 2)
# same as B.s \%*\% t(B.s) since B.s is semi-orthogonal (same vor B.w)
PB.s <- B.s \%*\% solve(t(B.s) \%*\% B.s) \%*\% t(B.s)
PB.w <- B.w \%*\% solve(t(B.w) \%*\% B.w) \%*\% t(B.w)
# compare estimation accuracy of simple and weighted cve estimate by
# compare estimation accuracy of mean and weighted cve estimate by
# Frobenius norm of difference of projections.
norm(PB - PB.s, type = 'F')
norm(PB - PB.w, type = 'F')
}
\references{
Fertl, L. and Bura, E. (2019), Conditional Variance
Estimation for Sufficient Dimension Reduction. Working Paper.
[1] Fertl, L. and Bura, E. (2021), Conditional Variance
Estimation for Sufficient Dimension Reduction.
arXiv:2102.08782
[2] Fertl, L. and Bura, E. (2021), Ensemble Conditional Variance
Estimation for Sufficient Dimension Reduction.
arXiv:2102.13435
}
\seealso{
For a detailed description of \code{formula} see

View File

@ -7,8 +7,8 @@
cve.call(
X,
Y,
method = "simple",
func_list = list(function(x) x),
method = c("mean", "weighted.mean", "central", "weighted.central"),
func_list = NULL,
nObs = sqrt(nrow(X)),
h = NULL,
min.dim = 1L,
@ -22,23 +22,31 @@ cve.call(
V.init = NULL,
max.iter = 50L,
attempts = 10L,
nr.proj = 1L,
logger = NULL
)
}
\arguments{
\item{X}{Design predictor matrix.}
\item{Y}{\eqn{n}-dimensional vector of responces.}
\item{Y}{\eqn{n}-dimensional vector of responses.}
\item{method}{specifies the CVE method variation as one of
\item{method}{This character string specifies the method of fitting. The
options are
\itemize{
\item "simple" exact implementation as described in the paper listed
below.
\item "weighted" variation with addaptive weighting of slices.
\item "mean" method to estimate the mean subspace, see [1].
\item "central" ensemble method to estimate the central subspace, see [2].
\item "weighted.mean" variation of `"mean"` method with adaptive weighting
of slices, see [1].
\item "weighted.central" variation of `"central"` method with adaptive
weighting of slices, see [2].
}}
\item{func_list}{a list of functions applied to `Y` to form the ensamble
CVE for central sub-space estimation.}
\item{func_list}{a list of functions applied to \code{Y} used by ECVE
(see [2]) for central subspace estimation. The default ensemble are
indicator functions of the \eqn{[0, 10], (10, 20], ..., (90, 100]}
percent response quantiles. (only relevant if \code{method} is
\code{"central"} or \code{"weighted.central"}, ignored otherwise)}
\item{nObs}{parameter for choosing bandwidth \code{h} using
\code{\link{estimate.bandwidth}} (ignored if \code{h} is supplied).}
@ -79,6 +87,10 @@ used as starting value in the optimization. (If supplied,
out \code{attempts} times with starting values drawn from the invariant
measure on the Stiefel manifold (see \code{\link{rStiefel}}).}
\item{nr.proj}{The number of projection used for projective resampling for
multivariate response \eqn{Y} (under active development, ignored for
univariate response).}
\item{logger}{a logger function (only for advanced users, slows down the
computation).}
}
@ -113,6 +125,46 @@ This is the main function in the \code{CVE} package. It creates objects of
class \code{"cve"} to estimate the mean subspace. Helper functions that
require a \code{"cve"} object can then be applied to the output from this
function.
Conditional Variance Estimation (CVE) is a sufficient dimension reduction
(SDR) method for regressions studying \eqn{E(Y|X)}, the conditional
expectation of a response \eqn{Y} given a set of predictors \eqn{X}. This
function provides methods for estimating the dimension and the subspace
spanned by the columns of a \eqn{p\times k}{p x k} matrix \eqn{B} of minimal
rank \eqn{k} such that
\deqn{E(Y|X) = E(Y|B'X)}
or, equivalently,
\deqn{Y = g(B'X) + \epsilon}
where \eqn{X} is independent of \eqn{\epsilon} with positive definite
variance-covariance matrix \eqn{Var(X) = \Sigma_X}. \eqn{\epsilon} is a mean
zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g}
is an unknown, continuous non-constant function, and \eqn{B = (b_1,..., b_k)}
is a real \eqn{p \times k}{p x k} matrix of rank \eqn{k \leq p}{k <= p}.
Both the dimension \eqn{k} and the subspace \eqn{span(B)} are unknown. The
CVE method makes very few assumptions.
A kernel matrix \eqn{\hat{B}}{Bhat} is estimated such that the column space
of \eqn{\hat{B}}{Bhat} should be close to the mean subspace \eqn{span(B)}.
The primary output from this method is a set of orthonormal vectors,
\eqn{\hat{B}}{Bhat}, whose span estimates \eqn{span(B)}.
The method central implements the Ensemble Conditional Variance Estimation
(ECVE) as described in [2]. It augments the CVE method by applying an
ensemble of functions (parameter \code{func_list}) to the response to
estimate the central subspace. This corresponds to the generalization
\deqn{F(Y|X) = F(Y|B'X)}
or, equivalently,
\deqn{Y = g(B'X, \epsilon)}
where \eqn{F} is the conditional cumulative distribution function.
}
\examples{
# create B for simulation (k = 1)
@ -138,3 +190,12 @@ cve.obj.simple2 <- cve.call(X, Y, k = 1)
coef(cve.obj.simple1, k = 1)
coef(cve.obj.simple2, k = 1)
}
\references{
[1] Fertl, L. and Bura, E. (2021), Conditional Variance
Estimation for Sufficient Dimension Reduction.
arXiv:2102.08782
[2] Fertl, L. and Bura, E. (2021), Ensemble Conditional Variance
Estimation for Sufficient Dimension Reduction.
arXiv:2102.13435
}

View File

@ -122,6 +122,7 @@ location 0, shape-parameter 1, and the scale-parameter is chosen such that
}
\references{
Fertl, L. and Bura, E. (2019), Conditional Variance
Estimation for Sufficient Dimension Reduction. Working Paper.
Fertl, L. and Bura, E. (2021), Conditional Variance
Estimation for Sufficient Dimension Reduction.
arXiv:2102.08782
}

View File

@ -33,11 +33,11 @@ x <- matrix(rnorm(500), 100, 5)
# y = f(B'x) + err
# with f(x1) = x1 and err ~ N(0, 0.25^2)
y <- x \%*\% B + 0.25 * rnorm(100)
# calculate cve with method 'simple' for k = 1
# calculate cve with method 'mean' for k = 1
set.seed(21)
cve.obj.simple <- cve(y ~ x, k = 1, method = 'mean')
cve.obj.mean <- cve(y ~ x, k = 1, method = 'mean')
# get projected data for k = 1
x.proj <- directions(cve.obj.simple, k = 1)
x.proj <- directions(cve.obj.mean, k = 1)
# plot y against projected data
plot(x.proj, y)

View File

@ -1,57 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/plot.R
\name{plot.cve}
\alias{plot.cve}
\title{Elbow plot of the loss function.}
\usage{
\method{plot}{cve}(x, ...)
}
\arguments{
\item{x}{an object of class \code{"cve"}, usually, a result of a call to
\code{\link{cve}} or \code{\link{cve.call}}.}
\item{...}{Pass through parameters to [\code{\link{plot}}] and
[\code{\link{lines}}]}
}
\description{
Boxplots of the output \code{L} from \code{\link{cve}} over \code{k} from
\code{min.dim} to \code{max.dim}. For given \code{k}, \code{L} corresponds
to \eqn{L_n(V, X_i)} where \eqn{V} is the minimizer of \eqn{L_n(V)} where
\eqn{V} is an element of a Stiefel manifold (see
Fertl, L. and Bura, E. (2019)).
}
\examples{
# create B for simulation
B <- cbind(rep(1, 6), (-1)^seq(6)) / sqrt(6)
set.seed(21)
# creat predictor data x ~ N(0, I_p)
X <- matrix(rnorm(600), 100)
# simulate response variable
# y = f(B'x) + err
# with f(x1, x2) = x1^2 + 2 x2 and err ~ N(0, 0.25^2)
Y <- (X \%*\% B[, 1])^2 + 2 * X \%*\% B[, 2] + rnorm(100, 0, .1)
# Create bandwidth estimation function
estimate.bandwidth <- function(X, k, nObs) {
n <- nrow(X)
p <- ncol(X)
X_c <- scale(X, center = TRUE, scale = FALSE)
2 * qchisq((nObs - 1) / (n - 1), k) * sum(X_c^2) / (n * p)
}
# calculate cve with method 'simple' for k = min.dim,...,max.dim
cve.obj.simple <- cve(Y ~ X, h = estimate.bandwidth, nObs = sqrt(nrow(X)))
# elbow plot
plot(cve.obj.simple)
}
\references{
Fertl, L. and Bura, E. (2019), Conditional Variance
Estimation for Sufficient Dimension Reduction. Working Paper.
}
\seealso{
see \code{\link{par}} for graphical parameters to pass through
as well as \code{\link{plot}}, the standard plot utility.
}

View File

@ -21,7 +21,7 @@ prediced respone(s) for \code{newdata}.
}
\description{
Predict response using projected data. The forward model \eqn{g(B' X)} is
estimated with \code{\link{mars}} in the \code{\pkg{mda}} package.
estimated with \code{\link{mars}} in the \pkg{mda} package.
}
\examples{
# create B for simulation

View File

@ -1,235 +0,0 @@
library(CVE)
library(reticulate)
library(tensorflow)
#' Null space basis of given matrix `V`
#'
#' @param V `(p, q)` matrix
#' @return Semi-orthogonal `(p, p - q)` matrix spaning the null space of `V`.
#' @keywords internal
#' @export
null <- function(V) {
tmp <- qr(V)
set <- if(tmp$rank == 0L) seq_len(ncol(V)) else -seq_len(tmp$rank)
return(qr.Q(tmp, complete = TRUE)[, set, drop = FALSE])
}
subspace_dist <- function(A, B) {
P <- A %*% solve(t(A) %*% A, t(A))
Q <- B %*% solve(t(B) %*% B, t(B))
norm(P - Q, 'F') / sqrt(ncol(A) + ncol(B))
}
estimate.bandwidth <- function (X, k, nObs = sqrt(nrow(X)), version = 1L) {
n <- nrow(X)
p <- ncol(X)
X_c <- scale(X, center = TRUE, scale = FALSE)
if (version == 1) {
(2 * sum(X_c^2) / (n * p)) * (1.2 * n^(-1 / (4 + k)))^2
} else if (version == 2) {
2 * qchisq((nObs - 1) / (n - 1), k) * sum(X_c^2) / (n * p)
} else {
stop("Unknown version.")
}
}
tf_Variable <- function(obj, dtype = "float32", ...) {
tf$Variable(obj, dtype = dtype, ...)
}
tf_constant <- function(obj, dtype = "float32", ...) {
tf$constant(obj, dtype = dtype, ...)
}
cve.tf <- function(X, Y, k, h = estimate.bandwidth(X, k, sqrt(nrow(X))),
V.init = NULL, optimizer_initialier = tf$optimizers$RMSprop, attempts = 10L,
nr.projections = nrow(X)^(3 / 2),
sd_noise = 0, method = c("simple", "weighted")
) {
method <- match.arg(method)
`-0.5` <- tf_constant(-0.5)
`1` <- tf_constant(1)
`2` <- tf_constant(2)
n <- nrow(X)
p <- ncol(X)
k <- as.integer(k)
q <- p - k
if (!is.matrix(Y))
Y <- as.matrix(Y)
# Projective resampling.
if (ncol(Y) > 1L) {
R <- matrix(rnorm(ncol(Y) * nr.projections), ncol(Y))
R <- t(t(R) / sqrt(colSums(R^2)))
Y <- Y %*% R
}
X <- tf_constant(scale(X))
Y <- tf_constant(scale(Y))
I <- tf_constant(diag(1, p))
h <- tf_Variable(h)
loss <- tf_function(function(V) {
Q <- I - tf$matmul(V, V, transpose_b = TRUE)
if (sd_noise > 0)
XQ <- tf$matmul(X + tf$random$normal(list(n, p), stddev = sd_noise), Q)
else
XQ <- tf$matmul(X, Q)
S <- tf$matmul(XQ, XQ, transpose_b = TRUE)
d <- tf$linalg$diag_part(S)
D <- tf$reshape(d, list(n, 1L)) + tf$reshape(d, list(1L, n)) - `2` * S
K <- tf$exp((`-0.5` / h) * tf$pow(D, 2L))
w <- tf$reduce_sum(K, 1L, keepdims = TRUE)
y1 <- tf$divide(tf$matmul(K, Y), w)
y2 <- tf$divide(tf$matmul(K, tf$pow(Y, 2L)), w)
if (method == "simple") {
l <- tf$reduce_mean(y2 - tf$pow(y1, 2L))
} else {# weighted
w <- w - `1`
w <- w / tf$reduce_sum(w)
l <- tf$reduce_sum(w * (y2 - tf$pow(y1, 2L)))
l <- l / tf$cast(tf$shape(Y)[2], "float32")
}
l
})
if (is.null(V.init))
V.init <- qr.Q(qr(matrix(rnorm(p * q), p, q)))
else
attempts <- 1L
V <- tf_Variable(V.init, constraint = function(w) { tf$linalg$qr(w)$q })
min.loss <- Inf
for (attempt in seq_len(attempts)) {
optimizer = optimizer_initialier()
out <- tf$while_loop(
cond = tf_function(function(i, L) i < 400L),
body = tf_function(function(i, L) {
with(tf$GradientTape() %as% tape, {
tape$watch(V)
L <- loss(V)
})
grad <- tape$gradient(L, V)
optimizer$apply_gradients(list(list(grad, V)))
list(i + 1L, L)
}),
loop_vars = list(tf_constant(0L, "int32"), tf_constant(Inf))
)
if (as.numeric(out[[2]]) < min.loss) {
min.loss <- as.numeric(out[[2]])
min.V <- as.matrix(V)
}
V$assign(qr.Q(qr(matrix(rnorm(p * q), p, q))))
}
list(B = null(min.V), V = min.V, loss = min.loss)
}
# ds <- dataset(1)
# out <- cve.call2(ds$X, ds$Y, ncol(ds$B))
plot.sim <- function(sim) {
name <- deparse(substitute(sim))
ssd <- sapply(sim, function(s) subspace_dist(s$B.true, s$B.est))
print(summary(ssd))
h <- hist(ssd, freq = FALSE, breaks = seq(0, 1, 0.1), main = name,
xlab = "Subspace Distance")
lines(density(ssd, from = 0, to = 1))
stat <- c(Median = median(ssd), Mean = mean(ssd))
abline(v = stat, lty = 2)
text(stat, max(h$density), names(stat),
pos = if(diff(stat) > 0) c("2", "4") else c("4", "2"))
}
multivariate.dataset <- function(dataset = 1, n = 100, p = 6, q = 4) {
CVE <- getNamespace('CVE')
X <- matrix(rnorm(n * p), n, p)
if (dataset == 1) {
Delta <- diag(1, q, q)
Delta[1, 2] <- Delta[2, 1] <- -0.5
epsilon <- CVE$rmvnorm(n, sigma = Delta)
B <- matrix(0, p, q)
B[1, 1] <- 1
B[2, 2] <- 2 / sqrt(5)
B[3, 2] <- 1 / sqrt(5)
Y <- X %*% B + epsilon
B <- B[, 1:2]
}
if (dataset == 2) {
B <- matrix(c(0.8, 0.6, 0, 0, 0, 0))
eps <- matrix(0, n, q)
Delta <- diag(1, q, q)
for(i in 1:n) {
Delta[1, 2] <- Delta[2, 1] <- sin(X[i, ] %*% B)
eps[i, ] <- CVE$rmvnorm(1, sigma = Delta)
}
Y<-cbind(exp(eps[, 1]), eps[, 2:4])
}
list(X = X, Y = Y, B = B)
}
set.seed(42)
reps <- 5L
sim.cve.m <- vector("list", reps)
sim.cve.c <- vector("list", reps)
sim.cve.wm <- vector("list", reps)
sim.cve.wc <- vector("list", reps)
sim.tf1 <- vector("list", reps)
sim.tf2 <- vector("list", reps)
start <- Sys.time()
for (i in 1:reps) {
# ds <- dataset(1)
ds <- multivariate.dataset(2, n = 400)
sim.cve.m[[i]] <- list(
B.est = coef(CVE::cve.call(ds$X, ds$Y, k = ncol(ds$B), method = "mean"), ncol(ds$B)),
B.true = ds$B
)
sim.cve.c[[i]] <- list(
B.est = coef(CVE::cve.call(ds$X, ds$Y, k = ncol(ds$B), method = "central"), ncol(ds$B)),
B.true = ds$B
)
sim.cve.wm[[i]] <- list(
B.est = coef(CVE::cve.call(ds$X, ds$Y, k = ncol(ds$B), method = "weighted.mean"), ncol(ds$B)),
B.true = ds$B
)
sim.cve.wc[[i]] <- list(
B.est = coef(CVE::cve.call(ds$X, ds$Y, k = ncol(ds$B), method = "weighted.central"), ncol(ds$B)),
B.true = ds$B
)
# sim.tf1[[i]] <- list(
# B.est = cve.tf(ds$X, ds$Y, ncol(ds$B),
# optimizer_initialier = tf$optimizers$Adam)$B,
# B.true = ds$B
# )
# sim.tf2[[i]] <- list(
# B.est = cve.tf(ds$X, ds$Y, ncol(ds$B),
# optimizer_initialier = tf$optimizers$Adam,
# method = "weighted")$B,
# B.true = ds$B
# )
cat(sprintf("\r%4d/%d -", i, reps), format(Sys.time() - start), '\n')
}
# pdf('subspace_comp.pdf')
plot.sim(sim.cve)
plot.sim(sim.tf1)
plot.sim(sim.tf2)
par(mfrow = c(2, 2))
plot.sim(sim.cve.m)
plot.sim(sim.cve.c)
plot.sim(sim.cve.wm)
plot.sim(sim.cve.wc)
# dev.off()