203 lines
6.9 KiB
R
203 lines
6.9 KiB
R
% Generated by roxygen2: do not edit by hand
|
|
% Please edit documentation in R/CVE.R
|
|
\name{cve.call}
|
|
\alias{cve.call}
|
|
\title{Conditional Variance Estimator (CVE).}
|
|
\usage{
|
|
cve.call(
|
|
X,
|
|
Y,
|
|
method = c("mean", "weighted.mean", "central", "weighted.central"),
|
|
func_list = NULL,
|
|
nObs = sqrt(nrow(X)),
|
|
h = NULL,
|
|
min.dim = 1L,
|
|
max.dim = 10L,
|
|
k = NULL,
|
|
momentum = 0,
|
|
tau = 1,
|
|
tol = 0.001,
|
|
slack = 0,
|
|
gamma = 0.5,
|
|
V.init = NULL,
|
|
max.iter = 50L,
|
|
attempts = 10L,
|
|
nr.proj = 1L,
|
|
logger = NULL
|
|
)
|
|
}
|
|
\arguments{
|
|
\item{X}{Design predictor matrix.}
|
|
|
|
\item{Y}{\eqn{n}-dimensional vector of responses.}
|
|
|
|
\item{method}{This character string specifies the method of fitting. The
|
|
options are
|
|
\itemize{
|
|
\item \code{"mean"} method to estimate the mean subspace, see [1].
|
|
\item \code{"central"} ensemble method to estimate the central subspace,
|
|
see [2].
|
|
\item \code{"weighted.mean"} variation of \code{"mean"} method with
|
|
adaptive weighting of slices, see [1].
|
|
\item \code{"weighted.central"} variation of \code{"central"} method with
|
|
adaptive weighting of slices, see [2].
|
|
}}
|
|
|
|
\item{func_list}{a list of functions applied to \code{Y} used by ECVE
|
|
(see [2]) for central subspace estimation. The default ensemble are
|
|
indicator functions of the \eqn{[0, 10], (10, 20], ..., (90, 100]}
|
|
percent response quantiles. (only relevant if \code{method} is
|
|
\code{"central"} or \code{"weighted.central"}, ignored otherwise)}
|
|
|
|
\item{nObs}{parameter for choosing bandwidth \code{h} using
|
|
\code{\link{estimate.bandwidth}} (ignored if \code{h} is supplied).}
|
|
|
|
\item{h}{bandwidth or function to estimate bandwidth, defaults to internaly
|
|
estimated bandwidth.}
|
|
|
|
\item{min.dim}{lower bounds for \code{k}, (ignored if \code{k} is supplied).}
|
|
|
|
\item{max.dim}{upper bounds for \code{k}, (ignored if \code{k} is supplied).}
|
|
|
|
\item{k}{Dimension of lower dimensional projection, if \code{k} is given
|
|
only the specified dimension \code{B} matrix is estimated.}
|
|
|
|
\item{momentum}{number of \eqn{[0, 1)} giving the ration of momentum for
|
|
eucledian gradient update with a momentum term. \code{momentum = 0}
|
|
corresponds to normal gradient descend.}
|
|
|
|
\item{tau}{Initial step-size.}
|
|
|
|
\item{tol}{Tolerance for break condition.}
|
|
|
|
\item{slack}{Positive scaling to allow small increases of the loss while
|
|
optimizing, i.e. \code{slack = 0.1} allows the target function to
|
|
increase up to \eqn{10 \%} in one optimization step.}
|
|
|
|
\item{gamma}{step-size reduction multiple. If gradient step with step size
|
|
\code{tau} is not accepted \code{gamma * tau} is set to the next step
|
|
size.}
|
|
|
|
\item{V.init}{Semi-orthogonal matrix of dimensions `(ncol(X), ncol(X) - k)
|
|
used as starting value in the optimization. (If supplied,
|
|
\code{attempts} is set to 0 and \code{k} to match dimension).}
|
|
|
|
\item{max.iter}{maximum number of optimization steps.}
|
|
|
|
\item{attempts}{If \code{V.init} not supplied, the optimization is carried
|
|
out \code{attempts} times with starting values drawn from the invariant
|
|
measure on the Stiefel manifold (see \code{\link{rStiefel}}).}
|
|
|
|
\item{nr.proj}{The number of projection used for projective resampling for
|
|
multivariate response \eqn{Y} (under active development, ignored for
|
|
univariate response).}
|
|
|
|
\item{logger}{a logger function (only for advanced users, slows down the
|
|
computation).}
|
|
}
|
|
\value{
|
|
an S3 object of class \code{cve} with components:
|
|
\describe{
|
|
\item{X}{design matrix of predictor vector used for calculating
|
|
cve-estimate,}
|
|
\item{Y}{\eqn{n}-dimensional vector of responses used for calculating
|
|
cve-estimate,}
|
|
\item{method}{Name of used method,}
|
|
\item{call}{the matched call,}
|
|
\item{res}{list of components \code{V, L, B, loss, h} for
|
|
each \code{k = min.dim, ..., max.dim}. If \code{k} was supplied in the
|
|
call \code{min.dim = max.dim = k}.
|
|
\itemize{
|
|
\item \code{B} is the cve-estimate with dimension
|
|
\eqn{p\times k}{p x k}.
|
|
\item \code{V} is the orthogonal complement of \eqn{B}.
|
|
\item \code{L} is the loss for each sample seperatels such that
|
|
it's mean is \code{loss}.
|
|
\item \code{loss} is the value of the target function that is
|
|
minimized, evaluated at \eqn{V}.
|
|
\item \code{h} bandwidth parameter used to calculate
|
|
\code{B, V, loss, L}.
|
|
}
|
|
}
|
|
}
|
|
}
|
|
\description{
|
|
This is the main function in the \code{CVE} package. It creates objects of
|
|
class \code{"cve"} to estimate the mean subspace. Helper functions that
|
|
require a \code{"cve"} object can then be applied to the output from this
|
|
function.
|
|
|
|
Conditional Variance Estimation (CVE) is a sufficient dimension reduction
|
|
(SDR) method for regressions studying \eqn{E(Y|X)}, the conditional
|
|
expectation of a response \eqn{Y} given a set of predictors \eqn{X}. This
|
|
function provides methods for estimating the dimension and the subspace
|
|
spanned by the columns of a \eqn{p\times k}{p x k} matrix \eqn{B} of minimal
|
|
rank \eqn{k} such that
|
|
|
|
\deqn{E(Y|X) = E(Y|B'X)}
|
|
|
|
or, equivalently,
|
|
|
|
\deqn{Y = g(B'X) + \epsilon}
|
|
|
|
where \eqn{X} is independent of \eqn{\epsilon} with positive definite
|
|
variance-covariance matrix \eqn{Var(X) = \Sigma_X}. \eqn{\epsilon} is a mean
|
|
zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g}
|
|
is an unknown, continuous non-constant function, and \eqn{B = (b_1,..., b_k)}
|
|
is a real \eqn{p \times k}{p x k} matrix of rank \eqn{k \leq p}{k <= p}.
|
|
|
|
Both the dimension \eqn{k} and the subspace \eqn{span(B)} are unknown. The
|
|
CVE method makes very few assumptions.
|
|
|
|
A kernel matrix \eqn{\hat{B}}{Bhat} is estimated such that the column space
|
|
of \eqn{\hat{B}}{Bhat} should be close to the mean subspace \eqn{span(B)}.
|
|
The primary output from this method is a set of orthonormal vectors,
|
|
\eqn{\hat{B}}{Bhat}, whose span estimates \eqn{span(B)}.
|
|
|
|
The method central implements the Ensemble Conditional Variance Estimation
|
|
(ECVE) as described in [2]. It augments the CVE method by applying an
|
|
ensemble of functions (parameter \code{func_list}) to the response to
|
|
estimate the central subspace. This corresponds to the generalization
|
|
|
|
\deqn{F(Y|X) = F(Y|B'X)}
|
|
|
|
or, equivalently,
|
|
|
|
\deqn{Y = g(B'X, \epsilon)}
|
|
|
|
where \eqn{F} is the conditional cumulative distribution function.
|
|
}
|
|
\examples{
|
|
# create B for simulation (k = 1)
|
|
B <- rep(1, 5) / sqrt(5)
|
|
|
|
set.seed(21)
|
|
# creat predictor data X ~ N(0, I_p)
|
|
X <- matrix(rnorm(500), 100, 5)
|
|
# simulate response variable
|
|
# Y = f(B'X) + err
|
|
# with f(x1) = x1 and err ~ N(0, 0.25^2)
|
|
Y <- X \%*\% B + 0.25 * rnorm(100)
|
|
|
|
# calculate cve with method 'simple' for k = 1
|
|
set.seed(21)
|
|
cve.obj.simple1 <- cve(Y ~ X, k = 1)
|
|
|
|
# same as
|
|
set.seed(21)
|
|
cve.obj.simple2 <- cve.call(X, Y, k = 1)
|
|
|
|
# extract estimated B's.
|
|
coef(cve.obj.simple1, k = 1)
|
|
coef(cve.obj.simple2, k = 1)
|
|
}
|
|
\references{
|
|
[1] Fertl, L. and Bura, E. (2021), Conditional Variance
|
|
Estimation for Sufficient Dimension Reduction.
|
|
arXiv:2102.08782
|
|
|
|
[2] Fertl, L. and Bura, E. (2021), Ensemble Conditional Variance
|
|
Estimation for Sufficient Dimension Reduction.
|
|
arXiv:2102.13435
|
|
}
|