121 lines
4.5 KiB
R
121 lines
4.5 KiB
R
% Generated by roxygen2: do not edit by hand
|
|
% Please edit documentation in R/CVE.R
|
|
\name{cve}
|
|
\alias{cve}
|
|
\title{Conditional Variance Estimator (CVE).}
|
|
\usage{
|
|
cve(formula, data, method = "simple", max.dim = 10L, ...)
|
|
}
|
|
\arguments{
|
|
\item{formula}{an object of class \code{"formula"} which is a symbolic
|
|
description of the model to be fitted.}
|
|
|
|
\item{data}{an optional data frame, containing the data for the formula if
|
|
supplied.}
|
|
|
|
\item{method}{specifies the CVE method variation as one of
|
|
\itemize{
|
|
\item "simple" exact implementation as described in the paper listed
|
|
below.
|
|
\item "weighted" variation with addaptive weighting of slices.
|
|
}}
|
|
|
|
\item{max.dim}{upper bounds for \code{k}, (ignored if \code{k} is supplied).}
|
|
|
|
\item{...}{Parameters passed on to \code{cve.call}.}
|
|
}
|
|
\value{
|
|
an S3 object of class \code{cve} with components:
|
|
\describe{
|
|
\item{X}{Original training data,}
|
|
\item{Y}{Responce of original training data,}
|
|
\item{method}{Name of used method,}
|
|
\item{call}{the matched call,}
|
|
\item{res}{list of components \code{V, L, B, loss, h} and \code{k} for
|
|
each \eqn{k=min.dim,...,max.dim} (dimension).}
|
|
}
|
|
}
|
|
\description{
|
|
Conditional Variance Estimation (CVE) is a novel sufficient dimension
|
|
reduction (SDR) method for regressions satisfying \eqn{E(Y|X) = E(Y|B'X)},
|
|
where \eqn{B'X} is a lower dimensional projection of the predictors. CVE,
|
|
similarly to its main competitor, the mean average variance estimation
|
|
(MAVE), is not based on inverse regression, and does not require the
|
|
restrictive linearity and constant variance conditions of moment based SDR
|
|
methods. CVE is data-driven and applies to additive error regressions with
|
|
continuous predictors and link function. The effectiveness and accuracy of
|
|
CVE compared to MAVE and other SDR techniques is demonstrated in simulation
|
|
studies. CVE is shown to outperform MAVE in some model set-ups, while it
|
|
remains largely on par under most others.
|
|
Let \eqn{Y} be real denotes a univariate response and \eqn{X} a real
|
|
\eqn{p}-dimensional covariate vector. We assume that the dependence of
|
|
\eqn{Y} and \eqn{X} is modelled by
|
|
\deqn{Y = g(B'X) + \epsilon}
|
|
where \eqn{X} is independent of \eqn{\epsilon} with positive definite
|
|
variance-covariance matrix \eqn{Var(X) = \Sigma_X}. \eqn{\epsilon} is a mean
|
|
zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g}
|
|
is an unknown, continuous non-constant function,
|
|
and \eqn{B = (b_1, ..., b_k)} is
|
|
a real \eqn{p \times k}{p x k} of rank \eqn{k <= p}{k \leq p}.
|
|
Without loss of generality \eqn{B} is assumed to be orthonormal.
|
|
}
|
|
\examples{
|
|
# set dimensions for simulation model
|
|
p <- 8
|
|
k <- 2
|
|
# create B for simulation
|
|
b1 <- rep(1 / sqrt(p), p)
|
|
b2 <- (-1)^seq(1, p) / sqrt(p)
|
|
B <- cbind(b1, b2)
|
|
# samplsize
|
|
n <- 200
|
|
set.seed(21)
|
|
# creat predictor data x ~ N(0, I_p)
|
|
x <- matrix(rnorm(n * p), n, p)
|
|
# simulate response variable
|
|
# y = f(B'x) + err
|
|
# with f(x1, x2) = x1^2 + 2 * x2 and err ~ N(0, 0.25^2)
|
|
y <- (x \%*\% b1)^2 + 2 * (x \%*\% b2) + 0.25 * rnorm(100)
|
|
# calculate cve with method 'simple' for k unknown in 1, ..., 4
|
|
cve.obj.s <- cve(y ~ x, max.dim = 4) # default method 'simple'
|
|
# calculate cve with method 'weighed' for k = 2
|
|
cve.obj.w <- cve(y ~ x, k = 2, method = 'weighted')
|
|
# estimate dimension from cve.obj.s
|
|
khat <- predict_dim(cve.obj.s)$k
|
|
# get cve-estimate for B with dimensions (p, k = khat)
|
|
B2 <- coef(cve.obj.s, k = khat)
|
|
# get projected X data (same as cve.obj.s$X \%*\% B2)
|
|
proj.X <- directions(cve.obj.s, k = khat)
|
|
# plot y against projected data
|
|
plot(proj.X[, 1], y)
|
|
plot(proj.X[, 2], y)
|
|
# creat 10 new x points and y according to model
|
|
x.new <- matrix(rnorm(10 * p), 10, p)
|
|
y.new <- (x.new \%*\% b1)^2 + 2 * (x.new \%*\% b2) + 0.25 * rnorm(10)
|
|
# predict y.new
|
|
yhat <- predict(cve.obj.s, x.new, khat)
|
|
plot(y.new, yhat)
|
|
# projection matrix on span(B)
|
|
# same as B \%*\% t(B) since B is semi-orthogonal
|
|
PB <- B \%*\% solve(t(B) \%*\% B) \%*\% t(B)
|
|
# cve estimates for B with simple and weighted method
|
|
B.s <- coef(cve.obj.s, k = 2)
|
|
B.w <- coef(cve.obj.w, k = 2)
|
|
# same as B.s \%*\% t(B.s) since B.s is semi-orthogonal (same vor B.w)
|
|
PB.s <- B.s \%*\% solve(t(B.s) \%*\% B.s) \%*\% t(B.s)
|
|
PB.w <- B.w \%*\% solve(t(B.w) \%*\% B.w) \%*\% t(B.w)
|
|
# compare estimation accuracy of simple and weighted cve estimate by
|
|
# Frobenius norm of difference of projections.
|
|
norm(PB - PB.s, type = 'F')
|
|
norm(PB - PB.w, type = 'F')
|
|
|
|
}
|
|
\references{
|
|
Fertl Lukas, Bura Efstathia. (2019), Conditional Variance
|
|
Estimation for Sufficient Dimension Reduction. Working Paper.
|
|
}
|
|
\seealso{
|
|
For a detailed description of \code{formula} see
|
|
\code{\link{formula}}.
|
|
}
|