Compare commits
20 Commits
Author | SHA1 | Date |
---|---|---|
Daniel Kapla | 9d0f551dc3 | |
Daniel Kapla | f641f2132c | |
Daniel Kapla | ac34244094 | |
Daniel Kapla | a419a8cdc7 | |
Daniel Kapla | c554ae6e9c | |
Daniel Kapla | 1454833f7d | |
Daniel Kapla | 2344120dd9 | |
Daniel Kapla | 8e95902e79 | |
Daniel Kapla | 25b20984d5 | |
Daniel Kapla | e93bfdda05 | |
Daniel Kapla | ad7ae9eeec | |
Daniel Kapla | 05c2aea44a | |
Daniel Kapla | 025b9eb2af | |
Daniel Kapla | 4a950d6df2 | |
Daniel Kapla | 70ceccb599 | |
Daniel Kapla | 54b7c9de2d | |
Daniel Kapla | 4696620363 | |
Michael Vesely | 1ac3b4c9e6 | |
Daniel Kapla | 377b3503ab | |
Daniel Kapla | 02ca1868ac |
|
@ -0,0 +1,2 @@
|
||||||
|
*.pdf filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.jpg filter=lfs diff=lfs merge=lfs -text
|
|
@ -0,0 +1,17 @@
|
||||||
|
simulations/results/*
|
||||||
|
literature/*
|
||||||
|
doc/*
|
||||||
|
|
||||||
|
CVarE/src/*.o
|
||||||
|
CVarE/src/*.so
|
||||||
|
CVarE/src/*.dll
|
||||||
|
|
||||||
|
*.tgz
|
||||||
|
*.tar.xz
|
||||||
|
*.tar.gz
|
||||||
|
*.zip
|
||||||
|
*.so
|
||||||
|
*.Rcheck/*
|
||||||
|
|
||||||
|
tmp/*
|
||||||
|
wip/*
|
|
@ -1,12 +0,0 @@
|
||||||
Package: CVE
|
|
||||||
Type: Package
|
|
||||||
Title: Conditional Variance Estimator for Sufficient Dimension Reduction
|
|
||||||
Version: 0.2
|
|
||||||
Date: 2019-12-20
|
|
||||||
Author: Daniel Kapla <daniel@kapla.at>, Lukas Fertl <lukas.fertl@chello.at>
|
|
||||||
Maintainer: Daniel Kapla <daniel@kapla.at>
|
|
||||||
Description: Implementation of the Conditional Variance Estimation (CVE) method.
|
|
||||||
License: GPL-3
|
|
||||||
Encoding: UTF-8
|
|
||||||
Imports: stats,graphics,mda
|
|
||||||
RoxygenNote: 6.1.1
|
|
62
CVE/R/plot.R
62
CVE/R/plot.R
|
@ -1,62 +0,0 @@
|
||||||
#' Elbow plot of the loss function.
|
|
||||||
#'
|
|
||||||
#' Boxplots of the output \code{L} from \code{\link{cve}} over \code{k} from
|
|
||||||
#' \code{min.dim} to \code{max.dim}. For given \code{k}, \code{L} corresponds
|
|
||||||
#' to \eqn{L_n(V, X_i)} where \eqn{V} is a stiefel manifold element as
|
|
||||||
#' minimizer of
|
|
||||||
#' \eqn{L_n(V)}, for further details see Fertl, L. and Bura, E. (2019).
|
|
||||||
#'
|
|
||||||
#' @param x an object of class \code{"cve"}, usually, a result of a call to
|
|
||||||
#' \code{\link{cve}} or \code{\link{cve.call}}.
|
|
||||||
#' @param ... Pass through parameters to [\code{\link{plot}}] and
|
|
||||||
#' [\code{\link{lines}}]
|
|
||||||
#'
|
|
||||||
#' @examples
|
|
||||||
#' # create B for simulation
|
|
||||||
#' B <- cbind(rep(1, 6), (-1)^seq(6)) / sqrt(6)
|
|
||||||
#'
|
|
||||||
#' set.seed(21)
|
|
||||||
#' # creat predictor data x ~ N(0, I_p)
|
|
||||||
#' X <- matrix(rnorm(600), 100)
|
|
||||||
#'
|
|
||||||
#' # simulate response variable
|
|
||||||
#' # y = f(B'x) + err
|
|
||||||
#' # with f(x1, x2) = x1^2 + 2 x2 and err ~ N(0, 0.25^2)
|
|
||||||
#' Y <- (X %*% B[, 1])^2 + 2 * X %*% B[, 2] + rnorm(100, 0, .1)
|
|
||||||
#'
|
|
||||||
#' # Create bandwidth estimation function
|
|
||||||
#' estimate.bandwidth <- function(X, k, nObs) {
|
|
||||||
#' n <- nrow(X)
|
|
||||||
#' p <- ncol(X)
|
|
||||||
#' X_c <- scale(X, center = TRUE, scale = FALSE)
|
|
||||||
#' 2 * qchisq((nObs - 1) / (n - 1), k) * sum(X_c^2) / (n * p)
|
|
||||||
#' }
|
|
||||||
#' # calculate cve with method 'simple' for k = min.dim,...,max.dim
|
|
||||||
#' cve.obj.simple <- cve(Y ~ X, h = estimate.bandwidth, nObs = sqrt(nrow(X)))
|
|
||||||
#'
|
|
||||||
#' # elbow plot
|
|
||||||
#' plot(cve.obj.simple)
|
|
||||||
#'
|
|
||||||
#' @references Fertl, L. and Bura, E. (2019), Conditional Variance
|
|
||||||
#' Estimation for Sufficient Dimension Reduction. Working Paper.
|
|
||||||
#'
|
|
||||||
#' @seealso see \code{\link{par}} for graphical parameters to pass through
|
|
||||||
#' as well as \code{\link{plot}}, the standard plot utility.
|
|
||||||
#' @method plot cve
|
|
||||||
#' @importFrom graphics plot lines points boxplot
|
|
||||||
#' @export
|
|
||||||
plot.cve <- function(x, ...) {
|
|
||||||
L <- c()
|
|
||||||
k <- c()
|
|
||||||
for (dr.k in x$res) {
|
|
||||||
if (class(dr.k) == 'cve.k') {
|
|
||||||
k <- c(k, as.character(dr.k$k))
|
|
||||||
L <- c(L, dr.k$L)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
L <- matrix(L, ncol = length(k)) / var(x$Y)
|
|
||||||
boxplot(L, main = "elbow plot",
|
|
||||||
xlab = "SDR dimension",
|
|
||||||
ylab = "Sample loss distribution",
|
|
||||||
names = k)
|
|
||||||
}
|
|
Binary file not shown.
|
@ -1,57 +0,0 @@
|
||||||
% Generated by roxygen2: do not edit by hand
|
|
||||||
% Please edit documentation in R/plot.R
|
|
||||||
\name{plot.cve}
|
|
||||||
\alias{plot.cve}
|
|
||||||
\title{Elbow plot of the loss function.}
|
|
||||||
\usage{
|
|
||||||
\method{plot}{cve}(x, ...)
|
|
||||||
}
|
|
||||||
\arguments{
|
|
||||||
\item{x}{an object of class \code{"cve"}, usually, a result of a call to
|
|
||||||
\code{\link{cve}} or \code{\link{cve.call}}.}
|
|
||||||
|
|
||||||
\item{...}{Pass through parameters to [\code{\link{plot}}] and
|
|
||||||
[\code{\link{lines}}]}
|
|
||||||
}
|
|
||||||
\description{
|
|
||||||
Boxplots of the output \code{L} from \code{\link{cve}} over \code{k} from
|
|
||||||
\code{min.dim} to \code{max.dim}. For given \code{k}, \code{L} corresponds
|
|
||||||
to \eqn{L_n(V, X_i)} where \eqn{V} is a stiefel manifold element as
|
|
||||||
minimizer of
|
|
||||||
\eqn{L_n(V)}, for further details see Fertl, L. and Bura, E. (2019).
|
|
||||||
}
|
|
||||||
\examples{
|
|
||||||
# create B for simulation
|
|
||||||
B <- cbind(rep(1, 6), (-1)^seq(6)) / sqrt(6)
|
|
||||||
|
|
||||||
set.seed(21)
|
|
||||||
# creat predictor data x ~ N(0, I_p)
|
|
||||||
X <- matrix(rnorm(600), 100)
|
|
||||||
|
|
||||||
# simulate response variable
|
|
||||||
# y = f(B'x) + err
|
|
||||||
# with f(x1, x2) = x1^2 + 2 x2 and err ~ N(0, 0.25^2)
|
|
||||||
Y <- (X \%*\% B[, 1])^2 + 2 * X \%*\% B[, 2] + rnorm(100, 0, .1)
|
|
||||||
|
|
||||||
# Create bandwidth estimation function
|
|
||||||
estimate.bandwidth <- function(X, k, nObs) {
|
|
||||||
n <- nrow(X)
|
|
||||||
p <- ncol(X)
|
|
||||||
X_c <- scale(X, center = TRUE, scale = FALSE)
|
|
||||||
2 * qchisq((nObs - 1) / (n - 1), k) * sum(X_c^2) / (n * p)
|
|
||||||
}
|
|
||||||
# calculate cve with method 'simple' for k = min.dim,...,max.dim
|
|
||||||
cve.obj.simple <- cve(Y ~ X, h = estimate.bandwidth, nObs = sqrt(nrow(X)))
|
|
||||||
|
|
||||||
# elbow plot
|
|
||||||
plot(cve.obj.simple)
|
|
||||||
|
|
||||||
}
|
|
||||||
\references{
|
|
||||||
Fertl, L. and Bura, E. (2019), Conditional Variance
|
|
||||||
Estimation for Sufficient Dimension Reduction. Working Paper.
|
|
||||||
}
|
|
||||||
\seealso{
|
|
||||||
see \code{\link{par}} for graphical parameters to pass through
|
|
||||||
as well as \code{\link{plot}}, the standard plot utility.
|
|
||||||
}
|
|
|
@ -0,0 +1,33 @@
|
||||||
|
Package: CVarE
|
||||||
|
Type: Package
|
||||||
|
Title: Conditional Variance Estimator for Sufficient Dimension Reduction
|
||||||
|
Version: 1.1
|
||||||
|
Date: 2021-03-09
|
||||||
|
Maintainer: Daniel Kapla <daniel@kapla.at>
|
||||||
|
Author: Daniel Kapla [aut, cph, cre],
|
||||||
|
Lukas Fertl [aut, cph],
|
||||||
|
Efstathia Bura [ctb]
|
||||||
|
Authors@R: c(
|
||||||
|
person("Daniel", "Kapla", role = c("aut", "cph", "cre"), email = "daniel@kapla.at"),
|
||||||
|
person("Lukas", "Fertl", role = c("aut", "cph")),
|
||||||
|
person("Efstathia", "Bura", role = "ctb")
|
||||||
|
)
|
||||||
|
Description: Implementation of the CVE (Conditional Variance Estimation) method
|
||||||
|
proposed by Fertl, L. and Bura, E. (2021) <arXiv:2102.08782> and the ECVE
|
||||||
|
(Ensemble Conditional Variance Estimation) method introduced in
|
||||||
|
Fertl, L. and Bura, E. (2021) <arXiv:2102.13435>.
|
||||||
|
CVE and ECVE are sufficient dimension reduction methods
|
||||||
|
in regressions with continuous response and predictors. CVE applies to general
|
||||||
|
additive error regression models while ECVE generalizes to non-additive error
|
||||||
|
regression models. They operate under the assumption that the predictors can
|
||||||
|
be replaced by a lower dimensional projection without loss of information.
|
||||||
|
It is a semiparametric forward regression model based exhaustive sufficient
|
||||||
|
dimension reduction estimation method that is shown to be consistent under mild
|
||||||
|
assumptions.
|
||||||
|
License: GPL-3
|
||||||
|
Contact: <daniel@kapla.at>
|
||||||
|
URL: https://git.art-ist.cc/daniel/CVE
|
||||||
|
Encoding: UTF-8
|
||||||
|
NeedsCompilation: yes
|
||||||
|
Imports: stats, mda
|
||||||
|
RoxygenNote: 7.0.2
|
|
@ -2,7 +2,6 @@
|
||||||
|
|
||||||
S3method(coef,cve)
|
S3method(coef,cve)
|
||||||
S3method(directions,cve)
|
S3method(directions,cve)
|
||||||
S3method(plot,cve)
|
|
||||||
S3method(predict,cve)
|
S3method(predict,cve)
|
||||||
S3method(summary,cve)
|
S3method(summary,cve)
|
||||||
export(cve)
|
export(cve)
|
||||||
|
@ -14,12 +13,8 @@ export(null)
|
||||||
export(predict_dim)
|
export(predict_dim)
|
||||||
export(rStiefel)
|
export(rStiefel)
|
||||||
import(stats)
|
import(stats)
|
||||||
importFrom(graphics,boxplot)
|
|
||||||
importFrom(graphics,lines)
|
|
||||||
importFrom(graphics,plot)
|
|
||||||
importFrom(graphics,points)
|
|
||||||
importFrom(mda,mars)
|
importFrom(mda,mars)
|
||||||
importFrom(stats,model.frame)
|
importFrom(stats,model.frame)
|
||||||
importFrom(stats,rbinom)
|
importFrom(stats,rbinom)
|
||||||
importFrom(stats,rnorm)
|
importFrom(stats,rnorm)
|
||||||
useDynLib(CVE, .registration = TRUE)
|
useDynLib(CVarE, .registration = TRUE)
|
|
@ -3,7 +3,7 @@
|
||||||
#' Conditional Variance Estimation (CVE) is a novel sufficient dimension
|
#' Conditional Variance Estimation (CVE) is a novel sufficient dimension
|
||||||
#' reduction (SDR) method for regressions satisfying \eqn{E(Y|X) = E(Y|B'X)},
|
#' reduction (SDR) method for regressions satisfying \eqn{E(Y|X) = E(Y|B'X)},
|
||||||
#' where \eqn{B'X} is a lower dimensional projection of the predictors and
|
#' where \eqn{B'X} is a lower dimensional projection of the predictors and
|
||||||
#' \eqn{Y} is a univariate responce. CVE,
|
#' \eqn{Y} is a univariate response. CVE,
|
||||||
#' similarly to its main competitor, the mean average variance estimation
|
#' similarly to its main competitor, the mean average variance estimation
|
||||||
#' (MAVE), is not based on inverse regression, and does not require the
|
#' (MAVE), is not based on inverse regression, and does not require the
|
||||||
#' restrictive linearity and constant variance conditions of moment based SDR
|
#' restrictive linearity and constant variance conditions of moment based SDR
|
||||||
|
@ -11,7 +11,9 @@
|
||||||
#' continuous predictors and link function. Let \eqn{X} be a real
|
#' continuous predictors and link function. Let \eqn{X} be a real
|
||||||
#' \eqn{p}-dimensional covariate vector. We assume that the dependence of
|
#' \eqn{p}-dimensional covariate vector. We assume that the dependence of
|
||||||
#' \eqn{Y} and \eqn{X} is modelled by
|
#' \eqn{Y} and \eqn{X} is modelled by
|
||||||
|
#'
|
||||||
#' \deqn{Y = g(B'X) + \epsilon}
|
#' \deqn{Y = g(B'X) + \epsilon}
|
||||||
|
#'
|
||||||
#' where \eqn{X} is independent of \eqn{\epsilon} with positive definite
|
#' where \eqn{X} is independent of \eqn{\epsilon} with positive definite
|
||||||
#' variance-covariance matrix \eqn{Var(X) = \Sigma_X}. \eqn{\epsilon} is a mean
|
#' variance-covariance matrix \eqn{Var(X) = \Sigma_X}. \eqn{\epsilon} is a mean
|
||||||
#' zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g}
|
#' zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g}
|
||||||
|
@ -20,21 +22,81 @@
|
||||||
#' a real \eqn{p \times k}{p x k} matrix of rank \eqn{k \leq p}{k <= p}.
|
#' a real \eqn{p \times k}{p x k} matrix of rank \eqn{k \leq p}{k <= p}.
|
||||||
#' Without loss of generality \eqn{B} is assumed to be orthonormal.
|
#' Without loss of generality \eqn{B} is assumed to be orthonormal.
|
||||||
#'
|
#'
|
||||||
|
#' Further, the extended Ensemble Conditional Variance Estimation (ECVE) is
|
||||||
|
#' implemented which is a SDR method in regressions with continuous response and
|
||||||
|
#' predictors. ECVE applies to general non-additive error regression models.
|
||||||
|
#'
|
||||||
|
#' \deqn{Y = g(B'X, \epsilon)}
|
||||||
|
#'
|
||||||
|
#' It operates under the assumption that the predictors can be replaced by a
|
||||||
|
#' lower dimensional projection without loss of information.It is a
|
||||||
|
#' semiparametric forward regression model based exhaustive sufficient dimension
|
||||||
|
#' reduction estimation method that is shown to be consistent under mild
|
||||||
|
#' assumptions.
|
||||||
|
#'
|
||||||
#' @author Daniel Kapla, Lukas Fertl, Bura Efstathia
|
#' @author Daniel Kapla, Lukas Fertl, Bura Efstathia
|
||||||
#' @references Fertl, L. and Bura, E. (2019), Conditional Variance
|
#'
|
||||||
#' Estimation for Sufficient Dimension Reduction. Working Paper.
|
#' @references
|
||||||
|
#' [1] Fertl, L. and Bura, E. (2021) "Conditional Variance
|
||||||
|
#' Estimation for Sufficient Dimension Reduction"
|
||||||
|
#' <arXiv:2102.08782>
|
||||||
|
#'
|
||||||
|
#' [2] Fertl, L. and Bura, E. (2021) "Ensemble Conditional Variance
|
||||||
|
#' Estimation for Sufficient Dimension Reduction"
|
||||||
|
#' <arXiv:2102.13435>
|
||||||
#'
|
#'
|
||||||
#' @docType package
|
#' @docType package
|
||||||
#' @useDynLib CVE, .registration = TRUE
|
#' @useDynLib CVarE, .registration = TRUE
|
||||||
"_PACKAGE"
|
"_PACKAGE"
|
||||||
|
|
||||||
#' Conditional Variance Estimator (CVE).
|
#' Conditional Variance Estimator (CVE).
|
||||||
#'
|
#'
|
||||||
|
#' @description
|
||||||
#' This is the main function in the \code{CVE} package. It creates objects of
|
#' This is the main function in the \code{CVE} package. It creates objects of
|
||||||
#' class \code{"cve"} to estimate the mean subspace. Helper functions that
|
#' class \code{"cve"} to estimate the mean subspace. Helper functions that
|
||||||
#' require a \code{"cve"} object can then be applied to the output from this
|
#' require a \code{"cve"} object can then be applied to the output from this
|
||||||
#' function.
|
#' function.
|
||||||
#'
|
#'
|
||||||
|
#' Conditional Variance Estimation (CVE) is a sufficient dimension reduction
|
||||||
|
#' (SDR) method for regressions studying \eqn{E(Y|X)}, the conditional
|
||||||
|
#' expectation of a response \eqn{Y} given a set of predictors \eqn{X}. This
|
||||||
|
#' function provides methods for estimating the dimension and the subspace
|
||||||
|
#' spanned by the columns of a \eqn{p\times k}{p x k} matrix \eqn{B} of minimal
|
||||||
|
#' rank \eqn{k} such that
|
||||||
|
#'
|
||||||
|
#' \deqn{E(Y|X) = E(Y|B'X)}
|
||||||
|
#'
|
||||||
|
#' or, equivalently,
|
||||||
|
#'
|
||||||
|
#' \deqn{Y = g(B'X) + \epsilon}
|
||||||
|
#'
|
||||||
|
#' where \eqn{X} is independent of \eqn{\epsilon} with positive definite
|
||||||
|
#' variance-covariance matrix \eqn{Var(X) = \Sigma_X}. \eqn{\epsilon} is a mean
|
||||||
|
#' zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g}
|
||||||
|
#' is an unknown, continuous non-constant function, and \eqn{B = (b_1,..., b_k)}
|
||||||
|
#' is a real \eqn{p \times k}{p x k} matrix of rank \eqn{k \leq p}{k <= p}.
|
||||||
|
#'
|
||||||
|
#' Both the dimension \eqn{k} and the subspace \eqn{span(B)} are unknown. The
|
||||||
|
#' CVE method makes very few assumptions.
|
||||||
|
#'
|
||||||
|
#' A kernel matrix \eqn{\hat{B}}{Bhat} is estimated such that the column space
|
||||||
|
#' of \eqn{\hat{B}}{Bhat} should be close to the mean subspace \eqn{span(B)}.
|
||||||
|
#' The primary output from this method is a set of orthonormal vectors,
|
||||||
|
#' \eqn{\hat{B}}{Bhat}, whose span estimates \eqn{span(B)}.
|
||||||
|
#'
|
||||||
|
#' The method central implements the Ensemble Conditional Variance Estimation
|
||||||
|
#' (ECVE) as described in [2]. It augments the CVE method by applying an
|
||||||
|
#' ensemble of functions (parameter \code{func_list}) to the response to
|
||||||
|
#' estimate the central subspace. This corresponds to the generalization
|
||||||
|
#'
|
||||||
|
#' \deqn{F(Y|X) = F(Y|B'X)}
|
||||||
|
#'
|
||||||
|
#' or, equivalently,
|
||||||
|
#'
|
||||||
|
#' \deqn{Y = g(B'X, \epsilon)}
|
||||||
|
#'
|
||||||
|
#' where \eqn{F} is the conditional cumulative distribution function.
|
||||||
|
#'
|
||||||
#' @param formula an object of class \code{"formula"} which is a symbolic
|
#' @param formula an object of class \code{"formula"} which is a symbolic
|
||||||
#' description of the model to be fitted like \eqn{Y\sim X}{Y ~ X} where
|
#' description of the model to be fitted like \eqn{Y\sim X}{Y ~ X} where
|
||||||
#' \eqn{Y} is a \eqn{n}-dimensional vector of the response variable and
|
#' \eqn{Y} is a \eqn{n}-dimensional vector of the response variable and
|
||||||
|
@ -46,41 +108,17 @@
|
||||||
#' @param method This character string specifies the method of fitting. The
|
#' @param method This character string specifies the method of fitting. The
|
||||||
#' options are
|
#' options are
|
||||||
#' \itemize{
|
#' \itemize{
|
||||||
#' \item "simple" implementation,
|
#' \item \code{"mean"} method to estimate the mean subspace, see [1].
|
||||||
#' \item "weighted" variation with adaptive weighting of slices.
|
#' \item \code{"central"} ensemble method to estimate the central subspace,
|
||||||
|
#' see [2].
|
||||||
|
#' \item \code{"weighted.mean"} variation of \code{"mean"} method with
|
||||||
|
#' adaptive weighting of slices, see [1].
|
||||||
|
#' \item \code{"weighted.central"} variation of \code{"central"} method with
|
||||||
|
#' adaptive weighting of slices, see [2].
|
||||||
#' }
|
#' }
|
||||||
#' see Fertl, L. and Bura, E. (2019).
|
|
||||||
#' @param max.dim upper bounds for \code{k}, (ignored if \code{k} is supplied).
|
#' @param max.dim upper bounds for \code{k}, (ignored if \code{k} is supplied).
|
||||||
#' @param ... optional parameters passed on to \code{cve.call}.
|
#' @param ... optional parameters passed on to \code{\link{cve.call}}.
|
||||||
#'
|
#'
|
||||||
#'
|
|
||||||
#' Conditional Variance Estimation (CVE) is a sufficient dimension reduction
|
|
||||||
#' (SDR) method for regressions studying \eqn{E(Y|X)}, the conditional
|
|
||||||
#' expectation of a response \eqn{Y} given a set of predictors \eqn{X}. This
|
|
||||||
#' function provides methods for estimating the dimension and the subspace
|
|
||||||
#' spanned by the columns of a \eqn{p\times k}{p x k} matrix \eqn{B} of minimal
|
|
||||||
#' rank \eqn{k} such that
|
|
||||||
#' \deqn{%
|
|
||||||
#' E(Y|X) = E(Y|B'X) %
|
|
||||||
#' }
|
|
||||||
#' or, equivalently,
|
|
||||||
#' \deqn{%
|
|
||||||
#' Y = g(B'X) + \epsilon %
|
|
||||||
#' }
|
|
||||||
#' where \eqn{X} is independent of \eqn{\epsilon} with positive definite
|
|
||||||
#' variance-covariance matrix \eqn{Var(X) = \Sigma_X}. \eqn{\epsilon} is a mean
|
|
||||||
#' zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g}
|
|
||||||
#' is an unknown, continuous non-constant function, and \eqn{B = (b_1,..., b_k)}
|
|
||||||
#' is a real \eqn{p \times k}{p x k} matrix of rank \eqn{k \leq p}{k <= p}.
|
|
||||||
#'
|
|
||||||
#' Both the dimension \eqn{k} and the subspace \eqn{span(B)} are unknown. The
|
|
||||||
#' CVE method makes very few assumptions.
|
|
||||||
#'
|
|
||||||
#' A kernel matrix \eqn{\hat{B}}{Bhat} is estimated such that the column space
|
|
||||||
#' of \eqn{\hat{B}}{Bhat} should be close to the mean subspace \eqn{span(B)}.
|
|
||||||
#' The primary output from this method is a set of orthonormal vectors,
|
|
||||||
#' \eqn{\hat{B}}{Bhat}, whose span estimates \eqn{span(B)}.
|
|
||||||
#'
|
|
||||||
#' @return an S3 object of class \code{cve} with components:
|
#' @return an S3 object of class \code{cve} with components:
|
||||||
#' \describe{
|
#' \describe{
|
||||||
#' \item{X}{design matrix of predictor vector used for calculating
|
#' \item{X}{design matrix of predictor vector used for calculating
|
||||||
|
@ -108,62 +146,71 @@
|
||||||
#'
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
#' # set dimensions for simulation model
|
#' # set dimensions for simulation model
|
||||||
#' p <- 8
|
#' p <- 5
|
||||||
#' k <- 2
|
#' k <- 2
|
||||||
#' # create B for simulation
|
#' # create B for simulation
|
||||||
#' b1 <- rep(1 / sqrt(p), p)
|
#' b1 <- rep(1 / sqrt(p), p)
|
||||||
#' b2 <- (-1)^seq(1, p) / sqrt(p)
|
#' b2 <- (-1)^seq(1, p) / sqrt(p)
|
||||||
#' B <- cbind(b1, b2)
|
#' B <- cbind(b1, b2)
|
||||||
#' # sample size
|
#' # sample size
|
||||||
#' n <- 200
|
#' n <- 100
|
||||||
#' set.seed(21)
|
#' set.seed(21)
|
||||||
|
#'
|
||||||
#' # creat predictor data x ~ N(0, I_p)
|
#' # creat predictor data x ~ N(0, I_p)
|
||||||
#' x <- matrix(rnorm(n * p), n, p)
|
#' x <- matrix(rnorm(n * p), n, p)
|
||||||
#' # simulate response variable
|
#' # simulate response variable
|
||||||
#' # y = f(B'x) + err
|
#' # y = f(B'x) + err
|
||||||
#' # with f(x1, x2) = x1^2 + 2 * x2 and err ~ N(0, 0.25^2)
|
#' # with f(x1, x2) = x1^2 + 2 * x2 and err ~ N(0, 0.25^2)
|
||||||
#' y <- (x %*% b1)^2 + 2 * (x %*% b2) + 0.25 * rnorm(100)
|
#' y <- (x %*% b1)^2 + 2 * (x %*% b2) + 0.25 * rnorm(n)
|
||||||
#' # calculate cve with method 'simple' for k unknown in 1, ..., 4
|
#'
|
||||||
#' cve.obj.s <- cve(y ~ x, max.dim = 4) # default method 'simple'
|
#' # calculate cve with method 'mean' for k unknown in 1, ..., 3
|
||||||
|
#' cve.obj.s <- cve(y ~ x, max.dim = 2) # default method 'mean'
|
||||||
#' # calculate cve with method 'weighed' for k = 2
|
#' # calculate cve with method 'weighed' for k = 2
|
||||||
#' cve.obj.w <- cve(y ~ x, k = 2, method = 'weighted')
|
#' cve.obj.w <- cve(y ~ x, k = 2, method = 'weighted.mean')
|
||||||
#' # estimate dimension from cve.obj.s
|
#' B2 <- coef(cve.obj.s, k = 2)
|
||||||
#' khat <- predict_dim(cve.obj.s)$k
|
#'
|
||||||
#' # get cve-estimate for B with dimensions (p, k = khat)
|
|
||||||
#' B2 <- coef(cve.obj.s, k = khat)
|
|
||||||
#' # get projected X data (same as cve.obj.s$X %*% B2)
|
#' # get projected X data (same as cve.obj.s$X %*% B2)
|
||||||
#' proj.X <- directions(cve.obj.s, k = khat)
|
#' proj.X <- directions(cve.obj.s, k = 2)
|
||||||
#' # plot y against projected data
|
#' # plot y against projected data
|
||||||
#' plot(proj.X[, 1], y)
|
#' plot(proj.X[, 1], y)
|
||||||
#' plot(proj.X[, 2], y)
|
#' plot(proj.X[, 2], y)
|
||||||
|
#'
|
||||||
#' # creat 10 new x points and y according to model
|
#' # creat 10 new x points and y according to model
|
||||||
#' x.new <- matrix(rnorm(10 * p), 10, p)
|
#' x.new <- matrix(rnorm(10 * p), 10, p)
|
||||||
#' y.new <- (x.new %*% b1)^2 + 2 * (x.new %*% b2) + 0.25 * rnorm(10)
|
#' y.new <- (x.new %*% b1)^2 + 2 * (x.new %*% b2) + 0.25 * rnorm(10)
|
||||||
#' # predict y.new
|
#' # predict y.new
|
||||||
#' yhat <- predict(cve.obj.s, x.new, khat)
|
#' yhat <- predict(cve.obj.s, x.new, 2)
|
||||||
#' plot(y.new, yhat)
|
#' plot(y.new, yhat)
|
||||||
|
#'
|
||||||
#' # projection matrix on span(B)
|
#' # projection matrix on span(B)
|
||||||
#' # same as B %*% t(B) since B is semi-orthogonal
|
#' # same as B %*% t(B) since B is semi-orthogonal
|
||||||
#' PB <- B %*% solve(t(B) %*% B) %*% t(B)
|
#' PB <- B %*% solve(t(B) %*% B) %*% t(B)
|
||||||
#' # cve estimates for B with simple and weighted method
|
#' # cve estimates for B with mean and weighted method
|
||||||
#' B.s <- coef(cve.obj.s, k = 2)
|
#' B.s <- coef(cve.obj.s, k = 2)
|
||||||
#' B.w <- coef(cve.obj.w, k = 2)
|
#' B.w <- coef(cve.obj.w, k = 2)
|
||||||
#' # same as B.s %*% t(B.s) since B.s is semi-orthogonal (same vor B.w)
|
#' # same as B.s %*% t(B.s) since B.s is semi-orthogonal (same vor B.w)
|
||||||
#' PB.s <- B.s %*% solve(t(B.s) %*% B.s) %*% t(B.s)
|
#' PB.s <- B.s %*% solve(t(B.s) %*% B.s) %*% t(B.s)
|
||||||
#' PB.w <- B.w %*% solve(t(B.w) %*% B.w) %*% t(B.w)
|
#' PB.w <- B.w %*% solve(t(B.w) %*% B.w) %*% t(B.w)
|
||||||
#' # compare estimation accuracy of simple and weighted cve estimate by
|
#' # compare estimation accuracy of mean and weighted cve estimate by
|
||||||
#' # Frobenius norm of difference of projections.
|
#' # Frobenius norm of difference of projections.
|
||||||
#' norm(PB - PB.s, type = 'F')
|
#' norm(PB - PB.s, type = 'F')
|
||||||
#' norm(PB - PB.w, type = 'F')
|
#' norm(PB - PB.w, type = 'F')
|
||||||
#'
|
#'
|
||||||
#' @seealso For a detailed description of \code{formula} see
|
#' @seealso For a detailed description of \code{formula} see
|
||||||
#' \code{\link{formula}}.
|
#' \code{\link{formula}}.
|
||||||
#' @references Fertl, L. and Bura, E. (2019), Conditional Variance
|
#'
|
||||||
#' Estimation for Sufficient Dimension Reduction. Working Paper.
|
#' @references
|
||||||
|
#' [1] Fertl, L. and Bura, E. (2021) "Conditional Variance
|
||||||
|
#' Estimation for Sufficient Dimension Reduction"
|
||||||
|
#' <arXiv:2102.08782>
|
||||||
|
#'
|
||||||
|
#' [2] Fertl, L. and Bura, E. (2021) "Ensemble Conditional Variance
|
||||||
|
#' Estimation for Sufficient Dimension Reduction"
|
||||||
|
#' <arXiv:2102.13435>
|
||||||
#'
|
#'
|
||||||
#' @importFrom stats model.frame
|
#' @importFrom stats model.frame
|
||||||
#' @export
|
#' @export
|
||||||
cve <- function(formula, data, method = "simple", max.dim = 10L, ...) {
|
cve <- function(formula, data, method = "mean", max.dim = 10L, ...) {
|
||||||
# check for type of `data` if supplied and set default
|
# check for type of `data` if supplied and set default
|
||||||
if (missing(data)) {
|
if (missing(data)) {
|
||||||
data <- environment(formula)
|
data <- environment(formula)
|
||||||
|
@ -173,8 +220,11 @@ cve <- function(formula, data, method = "simple", max.dim = 10L, ...) {
|
||||||
|
|
||||||
# extract `X`, `Y` from `formula` with `data`
|
# extract `X`, `Y` from `formula` with `data`
|
||||||
model <- stats::model.frame(formula, data)
|
model <- stats::model.frame(formula, data)
|
||||||
X <- as.matrix(model[ ,-1L, drop = FALSE])
|
Y <- stats::model.response(model, "double")
|
||||||
Y <- as.double(model[ , 1L])
|
X <- stats::model.matrix(model, data)
|
||||||
|
if ("(Intercept)" %in% colnames(X)) {
|
||||||
|
X <- X[, "(Intercept)" != colnames(X), drop = FALSE]
|
||||||
|
}
|
||||||
|
|
||||||
# pass extracted data on to [cve.call()]
|
# pass extracted data on to [cve.call()]
|
||||||
dr <- cve.call(X, Y, method = method, max.dim = max.dim, ...)
|
dr <- cve.call(X, Y, method = method, max.dim = max.dim, ...)
|
||||||
|
@ -188,17 +238,27 @@ cve <- function(formula, data, method = "simple", max.dim = 10L, ...) {
|
||||||
#' @inherit cve description
|
#' @inherit cve description
|
||||||
#'
|
#'
|
||||||
#' @param X Design predictor matrix.
|
#' @param X Design predictor matrix.
|
||||||
#' @param Y \eqn{n}-dimensional vector of responces.
|
#' @param Y \eqn{n}-dimensional vector of responses.
|
||||||
#' @param h bandwidth or function to estimate bandwidth, defaults to internaly
|
#' @param h bandwidth or function to estimate bandwidth, defaults to internaly
|
||||||
#' estimated bandwidth.
|
#' estimated bandwidth.
|
||||||
#' @param nObs parameter for choosing bandwidth \code{h} using
|
#' @param nObs parameter for choosing bandwidth \code{h} using
|
||||||
#' \code{\link{estimate.bandwidth}} (ignored if \code{h} is supplied).
|
#' \code{\link{estimate.bandwidth}} (ignored if \code{h} is supplied).
|
||||||
#' @param method specifies the CVE method variation as one of
|
#' @param method This character string specifies the method of fitting. The
|
||||||
|
#' options are
|
||||||
#' \itemize{
|
#' \itemize{
|
||||||
#' \item "simple" exact implementation as described in the paper listed
|
#' \item \code{"mean"} method to estimate the mean subspace, see [1].
|
||||||
#' below.
|
#' \item \code{"central"} ensemble method to estimate the central subspace,
|
||||||
#' \item "weighted" variation with addaptive weighting of slices.
|
#' see [2].
|
||||||
|
#' \item \code{"weighted.mean"} variation of \code{"mean"} method with
|
||||||
|
#' adaptive weighting of slices, see [1].
|
||||||
|
#' \item \code{"weighted.central"} variation of \code{"central"} method with
|
||||||
|
#' adaptive weighting of slices, see [2].
|
||||||
#' }
|
#' }
|
||||||
|
#' @param func_list a list of functions applied to \code{Y} used by ECVE
|
||||||
|
#' (see [2]) for central subspace estimation. The default ensemble are
|
||||||
|
#' indicator functions of the \eqn{[0, 10], (10, 20], ..., (90, 100]}
|
||||||
|
#' percent response quantiles. (only relevant if \code{method} is
|
||||||
|
#' \code{"central"} or \code{"weighted.central"}, ignored otherwise)
|
||||||
#' @param k Dimension of lower dimensional projection, if \code{k} is given
|
#' @param k Dimension of lower dimensional projection, if \code{k} is given
|
||||||
#' only the specified dimension \code{B} matrix is estimated.
|
#' only the specified dimension \code{B} matrix is estimated.
|
||||||
#' @param min.dim lower bounds for \code{k}, (ignored if \code{k} is supplied).
|
#' @param min.dim lower bounds for \code{k}, (ignored if \code{k} is supplied).
|
||||||
|
@ -209,6 +269,9 @@ cve <- function(formula, data, method = "simple", max.dim = 10L, ...) {
|
||||||
#' @param attempts If \code{V.init} not supplied, the optimization is carried
|
#' @param attempts If \code{V.init} not supplied, the optimization is carried
|
||||||
#' out \code{attempts} times with starting values drawn from the invariant
|
#' out \code{attempts} times with starting values drawn from the invariant
|
||||||
#' measure on the Stiefel manifold (see \code{\link{rStiefel}}).
|
#' measure on the Stiefel manifold (see \code{\link{rStiefel}}).
|
||||||
|
#' @param nr.proj The number of projection used for projective resampling for
|
||||||
|
#' multivariate response \eqn{Y} (under active development, ignored for
|
||||||
|
#' univariate response).
|
||||||
#' @param momentum number of \eqn{[0, 1)} giving the ration of momentum for
|
#' @param momentum number of \eqn{[0, 1)} giving the ration of momentum for
|
||||||
#' eucledian gradient update with a momentum term. \code{momentum = 0}
|
#' eucledian gradient update with a momentum term. \code{momentum = 0}
|
||||||
#' corresponds to normal gradient descend.
|
#' corresponds to normal gradient descend.
|
||||||
|
@ -225,6 +288,7 @@ cve <- function(formula, data, method = "simple", max.dim = 10L, ...) {
|
||||||
#' computation).
|
#' computation).
|
||||||
#'
|
#'
|
||||||
#' @inherit cve return
|
#' @inherit cve return
|
||||||
|
#' @inherit cve references
|
||||||
#'
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
#' # create B for simulation (k = 1)
|
#' # create B for simulation (k = 1)
|
||||||
|
@ -250,24 +314,39 @@ cve <- function(formula, data, method = "simple", max.dim = 10L, ...) {
|
||||||
#' coef(cve.obj.simple1, k = 1)
|
#' coef(cve.obj.simple1, k = 1)
|
||||||
#' coef(cve.obj.simple2, k = 1)
|
#' coef(cve.obj.simple2, k = 1)
|
||||||
#' @export
|
#' @export
|
||||||
cve.call <- function(X, Y, method = "simple",
|
cve.call <- function(X, Y,
|
||||||
nObs = sqrt(nrow(X)), h = NULL,
|
method = c("mean", "weighted.mean", "central", "weighted.central"),
|
||||||
min.dim = 1L, max.dim = 10L, k = NULL,
|
func_list = NULL, nObs = sqrt(nrow(X)), h = NULL,
|
||||||
momentum = 0.0, tau = 1.0, tol = 1e-3,
|
min.dim = 1L, max.dim = 10L, k = NULL,
|
||||||
slack = 0.0, gamma = 0.5,
|
momentum = 0.0, tau = 1.0, tol = 1e-3,
|
||||||
V.init = NULL,
|
slack = 0.0, gamma = 0.5,
|
||||||
max.iter = 50L, attempts = 10L,
|
V.init = NULL,
|
||||||
logger = NULL) {
|
max.iter = 50L, attempts = 10L, nr.proj = 1L,
|
||||||
|
logger = NULL
|
||||||
|
) {
|
||||||
# Determine method with partial matching (shortcuts: "Weight" -> "weighted")
|
# Determine method with partial matching (shortcuts: "Weight" -> "weighted")
|
||||||
methods <- list(
|
method <- match.arg(method)
|
||||||
"simple" = 0L,
|
method_nr <- if(startsWith(method, "weighted")) 1L else 0L
|
||||||
"weighted" = 1L
|
|
||||||
)
|
# Set default functions for ensamble methods (of indentity else)
|
||||||
method_nr <- methods[[tolower(method), exact = FALSE]]
|
if (is.null(func_list)) {
|
||||||
if (!is.integer(method_nr)) {
|
if (endsWith(method, "central")) {
|
||||||
stop('Unable to determine method.')
|
func_list <- list(
|
||||||
|
function(Y) { q <- quantile(Y, 0.1); as.double(Y <= q) },
|
||||||
|
function(Y) { q <- quantile(Y, c(0.1, 0.2)); as.double(q[1] < Y & Y <= q[2]) },
|
||||||
|
function(Y) { q <- quantile(Y, c(0.2, 0.3)); as.double(q[1] < Y & Y <= q[2]) },
|
||||||
|
function(Y) { q <- quantile(Y, c(0.3, 0.4)); as.double(q[1] < Y & Y <= q[2]) },
|
||||||
|
function(Y) { q <- quantile(Y, c(0.4, 0.5)); as.double(q[1] < Y & Y <= q[2]) },
|
||||||
|
function(Y) { q <- quantile(Y, c(0.5, 0.6)); as.double(q[1] < Y & Y <= q[2]) },
|
||||||
|
function(Y) { q <- quantile(Y, c(0.6, 0.7)); as.double(q[1] < Y & Y <= q[2]) },
|
||||||
|
function(Y) { q <- quantile(Y, c(0.7, 0.8)); as.double(q[1] < Y & Y <= q[2]) },
|
||||||
|
function(Y) { q <- quantile(Y, c(0.8, 0.9)); as.double(q[1] < Y & Y <= q[2]) },
|
||||||
|
function(Y) { q <- quantile(Y, 0.9); as.double(q < Y) }
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
func_list <- list(function(Y) Y)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
method <- names(which(method_nr == methods))
|
|
||||||
|
|
||||||
# parameter checking
|
# parameter checking
|
||||||
if (!is.numeric(momentum) || length(momentum) > 1L) {
|
if (!is.numeric(momentum) || length(momentum) > 1L) {
|
||||||
|
@ -286,10 +365,13 @@ cve.call <- function(X, Y, method = "simple",
|
||||||
if (!is.numeric(Y)) {
|
if (!is.numeric(Y)) {
|
||||||
stop("Parameter 'Y' must be numeric.")
|
stop("Parameter 'Y' must be numeric.")
|
||||||
}
|
}
|
||||||
if (is.matrix(Y) || !is.double(Y)) {
|
if (!is.double(Y)) {
|
||||||
Y <- as.double(Y)
|
storage.mode(Y) <- "double"
|
||||||
}
|
}
|
||||||
if (nrow(X) != length(Y)) {
|
if (!is.matrix(Y)) {
|
||||||
|
Y <- as.matrix(Y)
|
||||||
|
}
|
||||||
|
if (nrow(X) != nrow(Y)) {
|
||||||
stop("Rows of 'X' and 'Y' elements are not compatible.")
|
stop("Rows of 'X' and 'Y' elements are not compatible.")
|
||||||
}
|
}
|
||||||
if (ncol(X) < 2) {
|
if (ncol(X) < 2) {
|
||||||
|
@ -364,6 +446,13 @@ cve.call <- function(X, Y, method = "simple",
|
||||||
stop("Parameter 'max.iter' must be at least 1L.")
|
stop("Parameter 'max.iter' must be at least 1L.")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!is.integer(nr.proj)) {
|
||||||
|
nr.proj <- as.integer(nr.proj)
|
||||||
|
}
|
||||||
|
if (length(nr.proj) > 1 || nr.proj < 1) {
|
||||||
|
stop("Parameter 'nr.proj' must be a single positive integer.")
|
||||||
|
}
|
||||||
|
|
||||||
if (is.null(V.init)) {
|
if (is.null(V.init)) {
|
||||||
if (!is.numeric(attempts) || length(attempts) > 1L) {
|
if (!is.numeric(attempts) || length(attempts) > 1L) {
|
||||||
stop("Parameter 'attempts' must be positive integer.")
|
stop("Parameter 'attempts' must be positive integer.")
|
||||||
|
@ -375,8 +464,19 @@ cve.call <- function(X, Y, method = "simple",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Projective resampling of the multivariate `Y` as a `n x nr.proj` matrix.
|
||||||
|
if (ncol(Y) > 1) {
|
||||||
|
projections <- matrix(rnorm(ncol(Y) * nr.proj), nr.proj)
|
||||||
|
projections <- projections / sqrt(rowSums(projections^2))
|
||||||
|
Y <- Y %*% t(projections)
|
||||||
|
}
|
||||||
|
# Evaluate each function given (possible projected) `Y` and build a
|
||||||
|
# `n x (|func_list| * nr.proj)` matrix.
|
||||||
|
Fy <- vapply(func_list, do.call, Y, list(Y))
|
||||||
|
dim(Fy) <- c(nrow(Fy), prod(dim(Fy)[-1]))
|
||||||
|
|
||||||
# Convert numerical values to "double".
|
# Convert numerical values to "double".
|
||||||
storage.mode(X) <- storage.mode(Y) <- "double"
|
storage.mode(X) <- storage.mode(Fy) <- "double"
|
||||||
|
|
||||||
if (is.function(logger)) {
|
if (is.function(logger)) {
|
||||||
loggerEnv <- environment(logger)
|
loggerEnv <- environment(logger)
|
||||||
|
@ -395,8 +495,8 @@ cve.call <- function(X, Y, method = "simple",
|
||||||
h <- estimate.bandwidth(X, k, nObs)
|
h <- estimate.bandwidth(X, k, nObs)
|
||||||
}
|
}
|
||||||
|
|
||||||
dr.k <- .Call('cve_export', PACKAGE = 'CVE',
|
dr.k <- .Call('cve_export', PACKAGE = 'CVarE',
|
||||||
X, Y, k, h,
|
X, Fy, k, h,
|
||||||
method_nr,
|
method_nr,
|
||||||
V.init,
|
V.init,
|
||||||
momentum, tau, tol,
|
momentum, tau, tol,
|
||||||
|
@ -415,6 +515,7 @@ cve.call <- function(X, Y, method = "simple",
|
||||||
# augment result information
|
# augment result information
|
||||||
dr$X <- X
|
dr$X <- X
|
||||||
dr$Y <- Y
|
dr$Y <- Y
|
||||||
|
dr$Fy <- Fy
|
||||||
dr$method <- method
|
dr$method <- method
|
||||||
dr$call <- call
|
dr$call <- call
|
||||||
class(dr) <- "cve"
|
class(dr) <- "cve"
|
|
@ -14,7 +14,7 @@
|
||||||
#' # set dimensions for simulation model
|
#' # set dimensions for simulation model
|
||||||
#' p <- 8 # sample dimension
|
#' p <- 8 # sample dimension
|
||||||
#' k <- 2 # real dimension of SDR subspace
|
#' k <- 2 # real dimension of SDR subspace
|
||||||
#' n <- 200 # samplesize
|
#' n <- 100 # samplesize
|
||||||
#' # create B for simulation
|
#' # create B for simulation
|
||||||
#' b1 <- rep(1 / sqrt(p), p)
|
#' b1 <- rep(1 / sqrt(p), p)
|
||||||
#' b2 <- (-1)^seq(1, p) / sqrt(p)
|
#' b2 <- (-1)^seq(1, p) / sqrt(p)
|
||||||
|
@ -25,10 +25,10 @@
|
||||||
#' x <- matrix(rnorm(n * p), n, p)
|
#' x <- matrix(rnorm(n * p), n, p)
|
||||||
#' # simulate response variable
|
#' # simulate response variable
|
||||||
#' # y = f(B'x) + err
|
#' # y = f(B'x) + err
|
||||||
#' # with f(x1, x2) = x1^2 + 2 * x2 and err ~ N(0, 0.25^2)
|
#' # with f(x1, x2) = x1^2 + 2 * x2 and err ~ N(0, 0.1^2)
|
||||||
#' y <- (x %*% b1)^2 + 2 * (x %*% b2) + 0.25 * rnorm(100)
|
#' y <- (x %*% b1)^2 + 2 * (x %*% b2) + 0.1 * rnorm(100)
|
||||||
#' # calculate cve for k = 1, ..., 5
|
#' # calculate cve for k = 2, 3
|
||||||
#' cve.obj <- cve(y ~ x, max.dim = 5)
|
#' cve.obj <- cve(y ~ x, min.dim = 2, max.dim = 3)
|
||||||
#' # get cve-estimate for B with dimensions (p, k = 2)
|
#' # get cve-estimate for B with dimensions (p, k = 2)
|
||||||
#' B2 <- coef(cve.obj, k = 2)
|
#' B2 <- coef(cve.obj, k = 2)
|
||||||
#'
|
#'
|
|
@ -10,10 +10,9 @@
|
||||||
#' @return a \eqn{n\times p}{n x p} matrix with samples in its rows.
|
#' @return a \eqn{n\times p}{n x p} matrix with samples in its rows.
|
||||||
#'
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
#' \dontrun{
|
#' CVarE:::rmvnorm(20, sigma = matrix(c(2, 1, 1, 2), 2))
|
||||||
#' rmvnorm(20, sigma = matrix(c(2, 1, 1, 2), 2))
|
#' CVarE:::rmvnorm(20, mu = c(3, -1, 2))
|
||||||
#' rmvnorm(20, mu = c(3, -1, 2))
|
#'
|
||||||
#' }
|
|
||||||
#' @keywords internal
|
#' @keywords internal
|
||||||
rmvnorm <- function(n = 1, mu = rep(0, p), sigma = diag(p)) {
|
rmvnorm <- function(n = 1, mu = rep(0, p), sigma = diag(p)) {
|
||||||
if (!missing(sigma)) {
|
if (!missing(sigma)) {
|
||||||
|
@ -25,11 +24,10 @@ rmvnorm <- function(n = 1, mu = rep(0, p), sigma = diag(p)) {
|
||||||
stop("At least one of 'mu' or 'sigma' must be supplied.")
|
stop("At least one of 'mu' or 'sigma' must be supplied.")
|
||||||
}
|
}
|
||||||
|
|
||||||
# See: https://en.wikipedia.org/wiki/Multivariate_normal_distribution
|
|
||||||
return(rep(mu, each = n) + matrix(rnorm(n * p), n) %*% chol(sigma))
|
return(rep(mu, each = n) + matrix(rnorm(n * p), n) %*% chol(sigma))
|
||||||
}
|
}
|
||||||
|
|
||||||
#' Multivariate t distribution.
|
#' Multivariate t Distribution.
|
||||||
#'
|
#'
|
||||||
#' Random generation from multivariate t distribution (student distribution).
|
#' Random generation from multivariate t distribution (student distribution).
|
||||||
#'
|
#'
|
||||||
|
@ -44,11 +42,10 @@ rmvnorm <- function(n = 1, mu = rep(0, p), sigma = diag(p)) {
|
||||||
#' @return a \eqn{n\times p}{n x p} matrix with samples in its rows.
|
#' @return a \eqn{n\times p}{n x p} matrix with samples in its rows.
|
||||||
#'
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
#' \dontrun{
|
#' CVarE:::rmvt(20, c(0, 1), matrix(c(3, 1, 1, 2), 2), 3)
|
||||||
#' rmvt(20, c(0, 1), matrix(c(3, 1, 1, 2), 2), 3)
|
#' CVarE:::rmvt(20, sigma = matrix(c(2, 1, 1, 2), 2), df = 3)
|
||||||
#' rmvt(20, sigma = matrix(c(2, 1, 1, 2), 2), 3)
|
#' CVarE:::rmvt(20, mu = c(3, -1, 2), df = 3)
|
||||||
#' rmvt(20, mu = c(3, -1, 2), 3)
|
#'
|
||||||
#' }
|
|
||||||
#' @keywords internal
|
#' @keywords internal
|
||||||
rmvt <- function(n = 1, mu = rep(0, p), sigma = diag(p), df = Inf) {
|
rmvt <- function(n = 1, mu = rep(0, p), sigma = diag(p), df = Inf) {
|
||||||
if (!missing(sigma)) {
|
if (!missing(sigma)) {
|
||||||
|
@ -80,7 +77,6 @@ rmvt <- function(n = 1, mu = rep(0, p), sigma = diag(p), df = Inf) {
|
||||||
#'
|
#'
|
||||||
#' @return numeric array of length \eqn{n}.
|
#' @return numeric array of length \eqn{n}.
|
||||||
#'
|
#'
|
||||||
#' @seealso https://en.wikipedia.org/wiki/Generalized_normal_distribution
|
|
||||||
#' @keywords internal
|
#' @keywords internal
|
||||||
rgnorm <- function(n = 1, mu = 0, alpha = 1, beta = 1) {
|
rgnorm <- function(n = 1, mu = 0, alpha = 1, beta = 1) {
|
||||||
if (alpha <= 0 | beta <= 0) {
|
if (alpha <= 0 | beta <= 0) {
|
||||||
|
@ -101,7 +97,6 @@ rgnorm <- function(n = 1, mu = 0, alpha = 1, beta = 1) {
|
||||||
#'
|
#'
|
||||||
#' @return numeric array of length \eqn{n}.
|
#' @return numeric array of length \eqn{n}.
|
||||||
#'
|
#'
|
||||||
#' @seealso https://en.wikipedia.org/wiki/Laplace_distribution
|
|
||||||
#' @keywords internal
|
#' @keywords internal
|
||||||
rlaplace <- function(n = 1, mu = 0, sd = 1) {
|
rlaplace <- function(n = 1, mu = 0, sd = 1) {
|
||||||
U <- runif(n, -0.5, 0.5)
|
U <- runif(n, -0.5, 0.5)
|
||||||
|
@ -200,8 +195,10 @@ rlaplace <- function(n = 1, mu = 0, sd = 1) {
|
||||||
#' location 0, shape-parameter 1, and the scale-parameter is chosen such that
|
#' location 0, shape-parameter 1, and the scale-parameter is chosen such that
|
||||||
#' \eqn{Var(\epsilon) = 0.25}.
|
#' \eqn{Var(\epsilon) = 0.25}.
|
||||||
#'
|
#'
|
||||||
#' @references Fertl, L. and Bura, E. (2019), Conditional Variance
|
#' @references
|
||||||
#' Estimation for Sufficient Dimension Reduction. Working Paper.
|
#' Fertl, L. and Bura, E. (2021) "Conditional Variance
|
||||||
|
#' Estimation for Sufficient Dimension Reduction"
|
||||||
|
#' <arXiv:2102.08782>
|
||||||
#'
|
#'
|
||||||
#' @import stats
|
#' @import stats
|
||||||
#' @importFrom stats rnorm rbinom
|
#' @importFrom stats rnorm rbinom
|
|
@ -5,8 +5,8 @@ directions <- function(object, k, ...) {
|
||||||
|
|
||||||
#' Computes projected training data \code{X} for given dimension `k`.
|
#' Computes projected training data \code{X} for given dimension `k`.
|
||||||
#'
|
#'
|
||||||
#' Returns \eqn{B'X}. That is the dimensional design matrix \eqn{X} on the
|
#' Returns \eqn{B'X}. That is, it computes the projection of the \eqn{n x p}
|
||||||
#' columnspace of the cve-estimate for given dimension \eqn{k}.
|
#' design matrix \eqn{X} on the column space of \eqn{B} of dimension \eqn{k}.
|
||||||
#'
|
#'
|
||||||
#' @param object an object of class \code{"cve"}, usually, a result of a call to
|
#' @param object an object of class \code{"cve"}, usually, a result of a call to
|
||||||
#' \code{\link{cve}} or \code{\link{cve.call}}.
|
#' \code{\link{cve}} or \code{\link{cve.call}}.
|
||||||
|
@ -26,11 +26,11 @@ directions <- function(object, k, ...) {
|
||||||
#' # y = f(B'x) + err
|
#' # y = f(B'x) + err
|
||||||
#' # with f(x1) = x1 and err ~ N(0, 0.25^2)
|
#' # with f(x1) = x1 and err ~ N(0, 0.25^2)
|
||||||
#' y <- x %*% B + 0.25 * rnorm(100)
|
#' y <- x %*% B + 0.25 * rnorm(100)
|
||||||
#' # calculate cve with method 'simple' for k = 1
|
#' # calculate cve with method 'mean' for k = 1
|
||||||
#' set.seed(21)
|
#' set.seed(21)
|
||||||
#' cve.obj.simple <- cve(y ~ x, k = 1, method = 'simple')
|
#' cve.obj.mean <- cve(y ~ x, k = 1, method = 'mean')
|
||||||
#' # get projected data for k = 1
|
#' # get projected data for k = 1
|
||||||
#' x.proj <- directions(cve.obj.simple, k = 1)
|
#' x.proj <- directions(cve.obj.mean, k = 1)
|
||||||
#' # plot y against projected data
|
#' # plot y against projected data
|
||||||
#' plot(x.proj, y)
|
#' plot(x.proj, y)
|
||||||
#'
|
#'
|
|
@ -37,7 +37,6 @@
|
||||||
#' # calculate cve with method 'simple' for k = 1
|
#' # calculate cve with method 'simple' for k = 1
|
||||||
#' set.seed(21)
|
#' set.seed(21)
|
||||||
#' cve.obj.simple <- cve(y ~ x, k = k)
|
#' cve.obj.simple <- cve(y ~ x, k = k)
|
||||||
#' print(cve.obj.simple$res$'1'$h)
|
|
||||||
#' print(estimate.bandwidth(x, k = k))
|
#' print(estimate.bandwidth(x, k = k))
|
||||||
#' @export
|
#' @export
|
||||||
estimate.bandwidth <- function (X, k, nObs, version = 1L) {
|
estimate.bandwidth <- function (X, k, nObs, version = 1L) {
|
|
@ -1,7 +1,7 @@
|
||||||
#' Predict method for CVE Fits.
|
#' Predict method for CVE Fits.
|
||||||
#'
|
#'
|
||||||
#' Predict response using projected data \eqn{B'C} by fitting
|
#' Predict response using projected data. The forward model \eqn{g(B' X)} is
|
||||||
#' \eqn{g(B'C) + \epsilon} using \code{\link{mars}}.
|
#' estimated with \code{\link{mars}} in the \pkg{mda} package.
|
||||||
#'
|
#'
|
||||||
#' @param object an object of class \code{"cve"}, usually, a result of a call to
|
#' @param object an object of class \code{"cve"}, usually, a result of a call to
|
||||||
#' \code{\link{cve}} or \code{\link{cve.call}}.
|
#' \code{\link{cve}} or \code{\link{cve.call}}.
|
||||||
|
@ -9,7 +9,7 @@
|
||||||
#' @param k dimension of SDR space to be used for data projection.
|
#' @param k dimension of SDR space to be used for data projection.
|
||||||
#' @param ... further arguments passed to \code{\link{mars}}.
|
#' @param ... further arguments passed to \code{\link{mars}}.
|
||||||
#'
|
#'
|
||||||
#' @return prediced response at \code{newdata}.
|
#' @return prediced respone(s) for \code{newdata}.
|
||||||
#'
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
#' # create B for simulation
|
#' # create B for simulation
|
|
@ -7,8 +7,8 @@ predict_dim_cv <- function(object) {
|
||||||
Sigma_root <- eig$vectors %*% tcrossprod(diag(sqrt(eig$values)), eig$vectors)
|
Sigma_root <- eig$vectors %*% tcrossprod(diag(sqrt(eig$values)), eig$vectors)
|
||||||
X <- X %*% solve(Sigma_root)
|
X <- X %*% solve(Sigma_root)
|
||||||
|
|
||||||
pred <- matrix(0, n, length(object$res))
|
pred <- array(0, c(n, ncol(object$Fy), length(object$res)),
|
||||||
colnames(pred) <- names(object$res)
|
dimnames = list(NULL, NULL, names(object$res)))
|
||||||
for (dr.k in object$res) {
|
for (dr.k in object$res) {
|
||||||
# get "name" of current dimension
|
# get "name" of current dimension
|
||||||
k <- as.character(dr.k$k)
|
k <- as.character(dr.k$k)
|
||||||
|
@ -16,12 +16,11 @@ predict_dim_cv <- function(object) {
|
||||||
X.proj <- X %*% dr.k$B
|
X.proj <- X %*% dr.k$B
|
||||||
|
|
||||||
for (i in 1:n) {
|
for (i in 1:n) {
|
||||||
model <- mda::mars(X.proj[-i, ], object$Y[-i])
|
model <- mda::mars(X.proj[-i, ], object$Fy[-i, ])
|
||||||
pred[i, k] <- predict(model, X.proj[i, , drop = F])
|
pred[i, , k] <- predict(model, X.proj[i, , drop = FALSE])
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
MSE <- colMeans((pred - object$Y)^2)
|
MSE <- apply((pred - as.numeric(object$Fy))^2, 3, mean)
|
||||||
|
|
||||||
return(list(
|
return(list(
|
||||||
MSE = MSE,
|
MSE = MSE,
|
||||||
|
@ -30,6 +29,9 @@ predict_dim_cv <- function(object) {
|
||||||
}
|
}
|
||||||
|
|
||||||
predict_dim_elbow <- function(object) {
|
predict_dim_elbow <- function(object) {
|
||||||
|
if (ncol(object$Fy) > 1) # TODO: Implement or find better way
|
||||||
|
stop("For multivariate or central models not supported yet.")
|
||||||
|
|
||||||
# extract original data from object (cve result)
|
# extract original data from object (cve result)
|
||||||
X <- object$X
|
X <- object$X
|
||||||
Y <- object$Y
|
Y <- object$Y
|
||||||
|
@ -71,6 +73,9 @@ predict_dim_elbow <- function(object) {
|
||||||
}
|
}
|
||||||
|
|
||||||
predict_dim_wilcoxon <- function(object, p.value = 0.05) {
|
predict_dim_wilcoxon <- function(object, p.value = 0.05) {
|
||||||
|
if (ncol(object$Fy) > 1) # TODO: Implement or find better way
|
||||||
|
stop("For multivariate or central models not supported yet.")
|
||||||
|
|
||||||
# extract original data from object (cve result)
|
# extract original data from object (cve result)
|
||||||
X <- object$X
|
X <- object$X
|
||||||
Y <- object$Y
|
Y <- object$Y
|
||||||
|
@ -122,31 +127,32 @@ predict_dim_wilcoxon <- function(object, p.value = 0.05) {
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
#' Estimate Dimension of Reduction Space.
|
#' Estimate Dimension of the Sufficient Reduction.
|
||||||
#'
|
#'
|
||||||
#' This function estimates the dimension of the mean dimension reduction space,
|
#' This function estimates the dimension, i.e. the rank of \eqn{B}. The default
|
||||||
#' i.e. number of columns of \eqn{B} matrix. The default method \code{'CV'}
|
#' method \code{'CV'} performs leave-one-out (LOO) cross-validation using
|
||||||
#' performs l.o.o cross-validation using \code{mars}. Given
|
#' \code{mars} as follows for \code{k = min.dim, ..., max.dim} a
|
||||||
#' \code{k = min.dim, ..., max.dim} a cross-validation via \code{mars} is
|
#' cross-validation via \code{mars} is
|
||||||
#' performed on the dataset \eqn{(Y_i, B_k' X_i)_{i = 1, ..., n}} where
|
#' performed on the dataset \eqn{(Y_i, B_k' X_i)_{i = 1, ..., n}} where
|
||||||
#' \eqn{B_k} is the \eqn{p \times k}{p x k} dimensional CVE estimate. The
|
#' \eqn{B_k} is the \eqn{p \times k}{p x k} dimensional CVE estimate. The
|
||||||
#' estimated SDR dimension is the \eqn{k} where the
|
#' estimated SDR dimension is the \eqn{k} where the
|
||||||
#' cross-validation mean squared error is minimal. The method \code{'elbow'}
|
#' cross-validation mean squared error is minimal. The method \code{'elbow'}
|
||||||
#' estimates the dimension via \eqn{k = argmin_k L_n(V_{p - k})} where
|
#' estimates the dimension via \eqn{k = argmin_k L_n(V_{p - k})} where
|
||||||
#' \eqn{V_{p - k}} is space that is orthogonal to the columns-space of the CVE estimate of \eqn{B_k}. Method \code{'wilcoxon'} is similar to \code{'elbow'}
|
#' \eqn{V_{p - k}} is the space that is orthogonal to the column space of the
|
||||||
#' but finds the minimum using the wilcoxon-test.
|
#' CVE estimate of \eqn{B_k}. Method \code{'wilcoxon'} finds the minimum using
|
||||||
|
#' the Wilcoxon test.
|
||||||
#'
|
#'
|
||||||
#' @param object an object of class \code{"cve"}, usually, a result of a call to
|
#' @param object an object of class \code{"cve"}, usually, a result of a call to
|
||||||
#' \code{\link{cve}} or \code{\link{cve.call}}.
|
#' \code{\link{cve}} or \code{\link{cve.call}}.
|
||||||
#' @param method This parameter specify which method will be used in dimension
|
#' @param method This parameter specifies which method is used in dimension
|
||||||
#' estimation. It provides three methods \code{'CV'} (default), \code{'elbow'},
|
#' estimation. It provides three options: \code{'CV'} (default),
|
||||||
#' and \code{'wilcoxon'} to estimate the dimension of the SDR.
|
#' \code{'elbow'} and \code{'wilcoxon'}.
|
||||||
#' @param ... ignored.
|
#' @param ... ignored.
|
||||||
#'
|
#'
|
||||||
#' @return list with
|
#' @return A \code{list} with
|
||||||
#' \describe{
|
#' \describe{
|
||||||
#' \item{}{cretirion of method for \code{k = min.dim, ..., max.dim}.}
|
#' \item{}{criterion for method and \code{k = min.dim, ..., max.dim}.}
|
||||||
#' \item{k}{estimated dimension as argmin over \eqn{k} of criterion.}
|
#' \item{k}{estimated dimension is the minimizer of the criterion.}
|
||||||
#' }
|
#' }
|
||||||
#'
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
|
@ -163,7 +169,7 @@ predict_dim_wilcoxon <- function(object, p.value = 0.05) {
|
||||||
#' y <- x %*% B + 0.25 * rnorm(100)
|
#' y <- x %*% B + 0.25 * rnorm(100)
|
||||||
#'
|
#'
|
||||||
#' # Calculate cve for unknown k between min.dim and max.dim.
|
#' # Calculate cve for unknown k between min.dim and max.dim.
|
||||||
#' cve.obj.simple <- cve(y ~ x)
|
#' cve.obj.simple <- cve(y ~ x)
|
||||||
#'
|
#'
|
||||||
#' predict_dim(cve.obj.simple)
|
#' predict_dim(cve.obj.simple)
|
||||||
#'
|
#'
|
|
@ -1,18 +1,19 @@
|
||||||
#' Prints a summary of a \code{cve} result.
|
#' Prints summary statistics of the \eqn{L} \code{cve} component.
|
||||||
#'
|
#'
|
||||||
#' Prints a summary statistics of output \code{L} from \code{cve} for
|
#' Prints a summary statistics of the \code{L} component of a \code{cve} object #' for \code{k = min.dim, ..., max.dim}.
|
||||||
#' \code{k = min.dim, ..., max.dim}.
|
|
||||||
#'
|
#'
|
||||||
#' @param object an object of class \code{"cve"}, usually, a result of a call to
|
#' @param object an object of class \code{"cve"}, usually, a result of a call to
|
||||||
#' \code{\link{cve}} or \code{\link{cve.call}}.
|
#' \code{\link{cve}} or \code{\link{cve.call}}.
|
||||||
#' @param ... ignored.
|
#' @param ... ignored.
|
||||||
#'
|
#'
|
||||||
|
#' @return No return value, prints human readable summary.
|
||||||
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
#' # create B for simulation
|
#' # create B for simulation
|
||||||
#' B <- rep(1, 5) / sqrt(5)
|
#' B <- rep(1, 5) / sqrt(5)
|
||||||
#'
|
#'
|
||||||
#' set.seed(21)
|
#' set.seed(21)
|
||||||
#' #creat predictor data x ~ N(0, I_p)
|
#' # create predictor data x ~ N(0, I_p)
|
||||||
#' x <- matrix(rnorm(500), 100)
|
#' x <- matrix(rnorm(500), 100)
|
||||||
#'
|
#'
|
||||||
#' # simulate response variable
|
#' # simulate response variable
|
||||||
|
@ -20,7 +21,7 @@
|
||||||
#' # with f(x1) = x1 and err ~ N(0, 0.25^2)
|
#' # with f(x1) = x1 and err ~ N(0, 0.25^2)
|
||||||
#' y <- x %*% B + 0.25 * rnorm(100)
|
#' y <- x %*% B + 0.25 * rnorm(100)
|
||||||
#'
|
#'
|
||||||
#' # calculate cve for unknown k between min.dim and max.dim.
|
#' # calculate cve for unknown reduction dimension.
|
||||||
#' cve.obj.simple <- cve(y ~ x)
|
#' cve.obj.simple <- cve(y ~ x)
|
||||||
#'
|
#'
|
||||||
#' summary(cve.obj.simple)
|
#' summary(cve.obj.simple)
|
|
@ -1,11 +1,13 @@
|
||||||
#' Draws a sample from the invariant measure on the Stiefel manifold
|
#' Random sample from Stiefel manifold.
|
||||||
|
#'
|
||||||
|
#' Draws a random sample from the invariant measure on the Stiefel manifold
|
||||||
#' \eqn{S(p, q)}.
|
#' \eqn{S(p, q)}.
|
||||||
#'
|
#'
|
||||||
#' @param p row dimension
|
#' @param p row dimension
|
||||||
#' @param q col dimension
|
#' @param q col dimension
|
||||||
#' @return \eqn{p \times q}{p x q} semi-orthogonal matrix.
|
#' @return A \eqn{p \times q}{p x q} semi-orthogonal matrix.
|
||||||
#' @examples
|
#' @examples
|
||||||
#' V <- rStiefel(6, 4)
|
#' V <- rStiefel(6, 4)
|
||||||
#' @export
|
#' @export
|
||||||
rStiefel <- function(p, q) {
|
rStiefel <- function(p, q) {
|
||||||
return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q))))
|
return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q))))
|
Binary file not shown.
Binary file not shown.
|
@ -1,15 +1,15 @@
|
||||||
% Generated by roxygen2: do not edit by hand
|
% Generated by roxygen2: do not edit by hand
|
||||||
% Please edit documentation in R/CVE.R
|
% Please edit documentation in R/CVE.R
|
||||||
\docType{package}
|
\docType{package}
|
||||||
\name{CVE-package}
|
\name{CVarE-package}
|
||||||
\alias{CVE}
|
\alias{CVarE}
|
||||||
\alias{CVE-package}
|
\alias{CVarE-package}
|
||||||
\title{Conditional Variance Estimator (CVE) Package.}
|
\title{Conditional Variance Estimator (CVE) Package.}
|
||||||
\description{
|
\description{
|
||||||
Conditional Variance Estimation (CVE) is a novel sufficient dimension
|
Conditional Variance Estimation (CVE) is a novel sufficient dimension
|
||||||
reduction (SDR) method for regressions satisfying \eqn{E(Y|X) = E(Y|B'X)},
|
reduction (SDR) method for regressions satisfying \eqn{E(Y|X) = E(Y|B'X)},
|
||||||
where \eqn{B'X} is a lower dimensional projection of the predictors and
|
where \eqn{B'X} is a lower dimensional projection of the predictors and
|
||||||
\eqn{Y} is a univariate responce. CVE,
|
\eqn{Y} is a univariate response. CVE,
|
||||||
similarly to its main competitor, the mean average variance estimation
|
similarly to its main competitor, the mean average variance estimation
|
||||||
(MAVE), is not based on inverse regression, and does not require the
|
(MAVE), is not based on inverse regression, and does not require the
|
||||||
restrictive linearity and constant variance conditions of moment based SDR
|
restrictive linearity and constant variance conditions of moment based SDR
|
||||||
|
@ -17,7 +17,10 @@ methods. CVE is data-driven and applies to additive error regressions with
|
||||||
continuous predictors and link function. Let \eqn{X} be a real
|
continuous predictors and link function. Let \eqn{X} be a real
|
||||||
\eqn{p}-dimensional covariate vector. We assume that the dependence of
|
\eqn{p}-dimensional covariate vector. We assume that the dependence of
|
||||||
\eqn{Y} and \eqn{X} is modelled by
|
\eqn{Y} and \eqn{X} is modelled by
|
||||||
|
}
|
||||||
|
\details{
|
||||||
\deqn{Y = g(B'X) + \epsilon}
|
\deqn{Y = g(B'X) + \epsilon}
|
||||||
|
|
||||||
where \eqn{X} is independent of \eqn{\epsilon} with positive definite
|
where \eqn{X} is independent of \eqn{\epsilon} with positive definite
|
||||||
variance-covariance matrix \eqn{Var(X) = \Sigma_X}. \eqn{\epsilon} is a mean
|
variance-covariance matrix \eqn{Var(X) = \Sigma_X}. \eqn{\epsilon} is a mean
|
||||||
zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g}
|
zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g}
|
||||||
|
@ -25,10 +28,34 @@ is an unknown, continuous non-constant function,
|
||||||
and \eqn{B = (b_1, ..., b_k)} is
|
and \eqn{B = (b_1, ..., b_k)} is
|
||||||
a real \eqn{p \times k}{p x k} matrix of rank \eqn{k \leq p}{k <= p}.
|
a real \eqn{p \times k}{p x k} matrix of rank \eqn{k \leq p}{k <= p}.
|
||||||
Without loss of generality \eqn{B} is assumed to be orthonormal.
|
Without loss of generality \eqn{B} is assumed to be orthonormal.
|
||||||
|
|
||||||
|
Further, the extended Ensemble Conditional Variance Estimation (ECVE) is
|
||||||
|
implemented which is a SDR method in regressions with continuous response and
|
||||||
|
predictors. ECVE applies to general non-additive error regression models.
|
||||||
|
|
||||||
|
\deqn{Y = g(B'X, \epsilon)}
|
||||||
|
|
||||||
|
It operates under the assumption that the predictors can be replaced by a
|
||||||
|
lower dimensional projection without loss of information.It is a
|
||||||
|
semiparametric forward regression model based exhaustive sufficient dimension
|
||||||
|
reduction estimation method that is shown to be consistent under mild
|
||||||
|
assumptions.
|
||||||
}
|
}
|
||||||
\references{
|
\references{
|
||||||
Fertl, L. and Bura, E. (2019), Conditional Variance
|
[1] Fertl, L. and Bura, E. (2021) "Conditional Variance
|
||||||
Estimation for Sufficient Dimension Reduction. Working Paper.
|
Estimation for Sufficient Dimension Reduction"
|
||||||
|
<arXiv:2102.08782>
|
||||||
|
|
||||||
|
[2] Fertl, L. and Bura, E. (2021) "Ensemble Conditional Variance
|
||||||
|
Estimation for Sufficient Dimension Reduction"
|
||||||
|
<arXiv:2102.13435>
|
||||||
|
}
|
||||||
|
\seealso{
|
||||||
|
Useful links:
|
||||||
|
\itemize{
|
||||||
|
\item \url{https://git.art-ist.cc/daniel/CVE}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
\author{
|
\author{
|
||||||
Daniel Kapla, Lukas Fertl, Bura Efstathia
|
Daniel Kapla, Lukas Fertl, Bura Efstathia
|
|
@ -25,7 +25,7 @@ cve-estimate of \eqn{B} with dimension \eqn{p\times k}{p x k}.
|
||||||
# set dimensions for simulation model
|
# set dimensions for simulation model
|
||||||
p <- 8 # sample dimension
|
p <- 8 # sample dimension
|
||||||
k <- 2 # real dimension of SDR subspace
|
k <- 2 # real dimension of SDR subspace
|
||||||
n <- 200 # samplesize
|
n <- 100 # samplesize
|
||||||
# create B for simulation
|
# create B for simulation
|
||||||
b1 <- rep(1 / sqrt(p), p)
|
b1 <- rep(1 / sqrt(p), p)
|
||||||
b2 <- (-1)^seq(1, p) / sqrt(p)
|
b2 <- (-1)^seq(1, p) / sqrt(p)
|
||||||
|
@ -36,10 +36,10 @@ set.seed(21)
|
||||||
x <- matrix(rnorm(n * p), n, p)
|
x <- matrix(rnorm(n * p), n, p)
|
||||||
# simulate response variable
|
# simulate response variable
|
||||||
# y = f(B'x) + err
|
# y = f(B'x) + err
|
||||||
# with f(x1, x2) = x1^2 + 2 * x2 and err ~ N(0, 0.25^2)
|
# with f(x1, x2) = x1^2 + 2 * x2 and err ~ N(0, 0.1^2)
|
||||||
y <- (x \%*\% b1)^2 + 2 * (x \%*\% b2) + 0.25 * rnorm(100)
|
y <- (x \%*\% b1)^2 + 2 * (x \%*\% b2) + 0.1 * rnorm(100)
|
||||||
# calculate cve for k = 1, ..., 5
|
# calculate cve for k = 2, 3
|
||||||
cve.obj <- cve(y ~ x, max.dim = 5)
|
cve.obj <- cve(y ~ x, min.dim = 2, max.dim = 3)
|
||||||
# get cve-estimate for B with dimensions (p, k = 2)
|
# get cve-estimate for B with dimensions (p, k = 2)
|
||||||
B2 <- coef(cve.obj, k = 2)
|
B2 <- coef(cve.obj, k = 2)
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
\alias{cve}
|
\alias{cve}
|
||||||
\title{Conditional Variance Estimator (CVE).}
|
\title{Conditional Variance Estimator (CVE).}
|
||||||
\usage{
|
\usage{
|
||||||
cve(formula, data, method = "simple", max.dim = 10L, ...)
|
cve(formula, data, method = "mean", max.dim = 10L, ...)
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
\item{formula}{an object of class \code{"formula"} which is a symbolic
|
\item{formula}{an object of class \code{"formula"} which is a symbolic
|
||||||
|
@ -20,42 +20,18 @@ the environment from which \code{cve} is called.}
|
||||||
\item{method}{This character string specifies the method of fitting. The
|
\item{method}{This character string specifies the method of fitting. The
|
||||||
options are
|
options are
|
||||||
\itemize{
|
\itemize{
|
||||||
\item "simple" implementation,
|
\item \code{"mean"} method to estimate the mean subspace, see [1].
|
||||||
\item "weighted" variation with adaptive weighting of slices.
|
\item \code{"central"} ensemble method to estimate the central subspace,
|
||||||
}
|
see [2].
|
||||||
see Fertl, L. and Bura, E. (2019).}
|
\item \code{"weighted.mean"} variation of \code{"mean"} method with
|
||||||
|
adaptive weighting of slices, see [1].
|
||||||
|
\item \code{"weighted.central"} variation of \code{"central"} method with
|
||||||
|
adaptive weighting of slices, see [2].
|
||||||
|
}}
|
||||||
|
|
||||||
\item{max.dim}{upper bounds for \code{k}, (ignored if \code{k} is supplied).}
|
\item{max.dim}{upper bounds for \code{k}, (ignored if \code{k} is supplied).}
|
||||||
|
|
||||||
\item{...}{optional parameters passed on to \code{cve.call}.
|
\item{...}{optional parameters passed on to \code{\link{cve.call}}.}
|
||||||
|
|
||||||
|
|
||||||
Conditional Variance Estimation (CVE) is a sufficient dimension reduction
|
|
||||||
(SDR) method for regressions studying \eqn{E(Y|X)}, the conditional
|
|
||||||
expectation of a response \eqn{Y} given a set of predictors \eqn{X}. This
|
|
||||||
function provides methods for estimating the dimension and the subspace
|
|
||||||
spanned by the columns of a \eqn{p\times k}{p x k} matrix \eqn{B} of minimal
|
|
||||||
rank \eqn{k} such that
|
|
||||||
\deqn{%
|
|
||||||
E(Y|X) = E(Y|B'X) %
|
|
||||||
}
|
|
||||||
or, equivalently,
|
|
||||||
\deqn{%
|
|
||||||
Y = g(B'X) + \epsilon %
|
|
||||||
}
|
|
||||||
where \eqn{X} is independent of \eqn{\epsilon} with positive definite
|
|
||||||
variance-covariance matrix \eqn{Var(X) = \Sigma_X}. \eqn{\epsilon} is a mean
|
|
||||||
zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g}
|
|
||||||
is an unknown, continuous non-constant function, and \eqn{B = (b_1,..., b_k)}
|
|
||||||
is a real \eqn{p \times k}{p x k} matrix of rank \eqn{k \leq p}{k <= p}.
|
|
||||||
|
|
||||||
Both the dimension \eqn{k} and the subspace \eqn{span(B)} are unknown. The
|
|
||||||
CVE method makes very few assumptions.
|
|
||||||
|
|
||||||
A kernel matrix \eqn{\hat{B}}{Bhat} is estimated such that the column space
|
|
||||||
of \eqn{\hat{B}}{Bhat} should be close to the mean subspace \eqn{span(B)}.
|
|
||||||
The primary output from this method is a set of orthonormal vectors,
|
|
||||||
\eqn{\hat{B}}{Bhat}, whose span estimates \eqn{span(B)}.}
|
|
||||||
}
|
}
|
||||||
\value{
|
\value{
|
||||||
an S3 object of class \code{cve} with components:
|
an S3 object of class \code{cve} with components:
|
||||||
|
@ -88,61 +64,108 @@ This is the main function in the \code{CVE} package. It creates objects of
|
||||||
class \code{"cve"} to estimate the mean subspace. Helper functions that
|
class \code{"cve"} to estimate the mean subspace. Helper functions that
|
||||||
require a \code{"cve"} object can then be applied to the output from this
|
require a \code{"cve"} object can then be applied to the output from this
|
||||||
function.
|
function.
|
||||||
|
|
||||||
|
Conditional Variance Estimation (CVE) is a sufficient dimension reduction
|
||||||
|
(SDR) method for regressions studying \eqn{E(Y|X)}, the conditional
|
||||||
|
expectation of a response \eqn{Y} given a set of predictors \eqn{X}. This
|
||||||
|
function provides methods for estimating the dimension and the subspace
|
||||||
|
spanned by the columns of a \eqn{p\times k}{p x k} matrix \eqn{B} of minimal
|
||||||
|
rank \eqn{k} such that
|
||||||
|
|
||||||
|
\deqn{E(Y|X) = E(Y|B'X)}
|
||||||
|
|
||||||
|
or, equivalently,
|
||||||
|
|
||||||
|
\deqn{Y = g(B'X) + \epsilon}
|
||||||
|
|
||||||
|
where \eqn{X} is independent of \eqn{\epsilon} with positive definite
|
||||||
|
variance-covariance matrix \eqn{Var(X) = \Sigma_X}. \eqn{\epsilon} is a mean
|
||||||
|
zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g}
|
||||||
|
is an unknown, continuous non-constant function, and \eqn{B = (b_1,..., b_k)}
|
||||||
|
is a real \eqn{p \times k}{p x k} matrix of rank \eqn{k \leq p}{k <= p}.
|
||||||
|
|
||||||
|
Both the dimension \eqn{k} and the subspace \eqn{span(B)} are unknown. The
|
||||||
|
CVE method makes very few assumptions.
|
||||||
|
|
||||||
|
A kernel matrix \eqn{\hat{B}}{Bhat} is estimated such that the column space
|
||||||
|
of \eqn{\hat{B}}{Bhat} should be close to the mean subspace \eqn{span(B)}.
|
||||||
|
The primary output from this method is a set of orthonormal vectors,
|
||||||
|
\eqn{\hat{B}}{Bhat}, whose span estimates \eqn{span(B)}.
|
||||||
|
|
||||||
|
The method central implements the Ensemble Conditional Variance Estimation
|
||||||
|
(ECVE) as described in [2]. It augments the CVE method by applying an
|
||||||
|
ensemble of functions (parameter \code{func_list}) to the response to
|
||||||
|
estimate the central subspace. This corresponds to the generalization
|
||||||
|
|
||||||
|
\deqn{F(Y|X) = F(Y|B'X)}
|
||||||
|
|
||||||
|
or, equivalently,
|
||||||
|
|
||||||
|
\deqn{Y = g(B'X, \epsilon)}
|
||||||
|
|
||||||
|
where \eqn{F} is the conditional cumulative distribution function.
|
||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
# set dimensions for simulation model
|
# set dimensions for simulation model
|
||||||
p <- 8
|
p <- 5
|
||||||
k <- 2
|
k <- 2
|
||||||
# create B for simulation
|
# create B for simulation
|
||||||
b1 <- rep(1 / sqrt(p), p)
|
b1 <- rep(1 / sqrt(p), p)
|
||||||
b2 <- (-1)^seq(1, p) / sqrt(p)
|
b2 <- (-1)^seq(1, p) / sqrt(p)
|
||||||
B <- cbind(b1, b2)
|
B <- cbind(b1, b2)
|
||||||
# sample size
|
# sample size
|
||||||
n <- 200
|
n <- 100
|
||||||
set.seed(21)
|
set.seed(21)
|
||||||
|
|
||||||
# creat predictor data x ~ N(0, I_p)
|
# creat predictor data x ~ N(0, I_p)
|
||||||
x <- matrix(rnorm(n * p), n, p)
|
x <- matrix(rnorm(n * p), n, p)
|
||||||
# simulate response variable
|
# simulate response variable
|
||||||
# y = f(B'x) + err
|
# y = f(B'x) + err
|
||||||
# with f(x1, x2) = x1^2 + 2 * x2 and err ~ N(0, 0.25^2)
|
# with f(x1, x2) = x1^2 + 2 * x2 and err ~ N(0, 0.25^2)
|
||||||
y <- (x \%*\% b1)^2 + 2 * (x \%*\% b2) + 0.25 * rnorm(100)
|
y <- (x \%*\% b1)^2 + 2 * (x \%*\% b2) + 0.25 * rnorm(n)
|
||||||
# calculate cve with method 'simple' for k unknown in 1, ..., 4
|
|
||||||
cve.obj.s <- cve(y ~ x, max.dim = 4) # default method 'simple'
|
# calculate cve with method 'mean' for k unknown in 1, ..., 3
|
||||||
|
cve.obj.s <- cve(y ~ x, max.dim = 2) # default method 'mean'
|
||||||
# calculate cve with method 'weighed' for k = 2
|
# calculate cve with method 'weighed' for k = 2
|
||||||
cve.obj.w <- cve(y ~ x, k = 2, method = 'weighted')
|
cve.obj.w <- cve(y ~ x, k = 2, method = 'weighted.mean')
|
||||||
# estimate dimension from cve.obj.s
|
B2 <- coef(cve.obj.s, k = 2)
|
||||||
khat <- predict_dim(cve.obj.s)$k
|
|
||||||
# get cve-estimate for B with dimensions (p, k = khat)
|
|
||||||
B2 <- coef(cve.obj.s, k = khat)
|
|
||||||
# get projected X data (same as cve.obj.s$X \%*\% B2)
|
# get projected X data (same as cve.obj.s$X \%*\% B2)
|
||||||
proj.X <- directions(cve.obj.s, k = khat)
|
proj.X <- directions(cve.obj.s, k = 2)
|
||||||
# plot y against projected data
|
# plot y against projected data
|
||||||
plot(proj.X[, 1], y)
|
plot(proj.X[, 1], y)
|
||||||
plot(proj.X[, 2], y)
|
plot(proj.X[, 2], y)
|
||||||
|
|
||||||
# creat 10 new x points and y according to model
|
# creat 10 new x points and y according to model
|
||||||
x.new <- matrix(rnorm(10 * p), 10, p)
|
x.new <- matrix(rnorm(10 * p), 10, p)
|
||||||
y.new <- (x.new \%*\% b1)^2 + 2 * (x.new \%*\% b2) + 0.25 * rnorm(10)
|
y.new <- (x.new \%*\% b1)^2 + 2 * (x.new \%*\% b2) + 0.25 * rnorm(10)
|
||||||
# predict y.new
|
# predict y.new
|
||||||
yhat <- predict(cve.obj.s, x.new, khat)
|
yhat <- predict(cve.obj.s, x.new, 2)
|
||||||
plot(y.new, yhat)
|
plot(y.new, yhat)
|
||||||
|
|
||||||
# projection matrix on span(B)
|
# projection matrix on span(B)
|
||||||
# same as B \%*\% t(B) since B is semi-orthogonal
|
# same as B \%*\% t(B) since B is semi-orthogonal
|
||||||
PB <- B \%*\% solve(t(B) \%*\% B) \%*\% t(B)
|
PB <- B \%*\% solve(t(B) \%*\% B) \%*\% t(B)
|
||||||
# cve estimates for B with simple and weighted method
|
# cve estimates for B with mean and weighted method
|
||||||
B.s <- coef(cve.obj.s, k = 2)
|
B.s <- coef(cve.obj.s, k = 2)
|
||||||
B.w <- coef(cve.obj.w, k = 2)
|
B.w <- coef(cve.obj.w, k = 2)
|
||||||
# same as B.s \%*\% t(B.s) since B.s is semi-orthogonal (same vor B.w)
|
# same as B.s \%*\% t(B.s) since B.s is semi-orthogonal (same vor B.w)
|
||||||
PB.s <- B.s \%*\% solve(t(B.s) \%*\% B.s) \%*\% t(B.s)
|
PB.s <- B.s \%*\% solve(t(B.s) \%*\% B.s) \%*\% t(B.s)
|
||||||
PB.w <- B.w \%*\% solve(t(B.w) \%*\% B.w) \%*\% t(B.w)
|
PB.w <- B.w \%*\% solve(t(B.w) \%*\% B.w) \%*\% t(B.w)
|
||||||
# compare estimation accuracy of simple and weighted cve estimate by
|
# compare estimation accuracy of mean and weighted cve estimate by
|
||||||
# Frobenius norm of difference of projections.
|
# Frobenius norm of difference of projections.
|
||||||
norm(PB - PB.s, type = 'F')
|
norm(PB - PB.s, type = 'F')
|
||||||
norm(PB - PB.w, type = 'F')
|
norm(PB - PB.w, type = 'F')
|
||||||
|
|
||||||
}
|
}
|
||||||
\references{
|
\references{
|
||||||
Fertl, L. and Bura, E. (2019), Conditional Variance
|
[1] Fertl, L. and Bura, E. (2021) "Conditional Variance
|
||||||
Estimation for Sufficient Dimension Reduction. Working Paper.
|
Estimation for Sufficient Dimension Reduction"
|
||||||
|
<arXiv:2102.08782>
|
||||||
|
|
||||||
|
[2] Fertl, L. and Bura, E. (2021) "Ensemble Conditional Variance
|
||||||
|
Estimation for Sufficient Dimension Reduction"
|
||||||
|
<arXiv:2102.13435>
|
||||||
}
|
}
|
||||||
\seealso{
|
\seealso{
|
||||||
For a detailed description of \code{formula} see
|
For a detailed description of \code{formula} see
|
|
@ -4,23 +4,51 @@
|
||||||
\alias{cve.call}
|
\alias{cve.call}
|
||||||
\title{Conditional Variance Estimator (CVE).}
|
\title{Conditional Variance Estimator (CVE).}
|
||||||
\usage{
|
\usage{
|
||||||
cve.call(X, Y, method = "simple", nObs = sqrt(nrow(X)), h = NULL,
|
cve.call(
|
||||||
min.dim = 1L, max.dim = 10L, k = NULL, momentum = 0, tau = 1,
|
X,
|
||||||
tol = 0.001, slack = 0, gamma = 0.5, V.init = NULL,
|
Y,
|
||||||
max.iter = 50L, attempts = 10L, logger = NULL)
|
method = c("mean", "weighted.mean", "central", "weighted.central"),
|
||||||
|
func_list = NULL,
|
||||||
|
nObs = sqrt(nrow(X)),
|
||||||
|
h = NULL,
|
||||||
|
min.dim = 1L,
|
||||||
|
max.dim = 10L,
|
||||||
|
k = NULL,
|
||||||
|
momentum = 0,
|
||||||
|
tau = 1,
|
||||||
|
tol = 0.001,
|
||||||
|
slack = 0,
|
||||||
|
gamma = 0.5,
|
||||||
|
V.init = NULL,
|
||||||
|
max.iter = 50L,
|
||||||
|
attempts = 10L,
|
||||||
|
nr.proj = 1L,
|
||||||
|
logger = NULL
|
||||||
|
)
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
\item{X}{Design predictor matrix.}
|
\item{X}{Design predictor matrix.}
|
||||||
|
|
||||||
\item{Y}{\eqn{n}-dimensional vector of responces.}
|
\item{Y}{\eqn{n}-dimensional vector of responses.}
|
||||||
|
|
||||||
\item{method}{specifies the CVE method variation as one of
|
\item{method}{This character string specifies the method of fitting. The
|
||||||
|
options are
|
||||||
\itemize{
|
\itemize{
|
||||||
\item "simple" exact implementation as described in the paper listed
|
\item \code{"mean"} method to estimate the mean subspace, see [1].
|
||||||
below.
|
\item \code{"central"} ensemble method to estimate the central subspace,
|
||||||
\item "weighted" variation with addaptive weighting of slices.
|
see [2].
|
||||||
|
\item \code{"weighted.mean"} variation of \code{"mean"} method with
|
||||||
|
adaptive weighting of slices, see [1].
|
||||||
|
\item \code{"weighted.central"} variation of \code{"central"} method with
|
||||||
|
adaptive weighting of slices, see [2].
|
||||||
}}
|
}}
|
||||||
|
|
||||||
|
\item{func_list}{a list of functions applied to \code{Y} used by ECVE
|
||||||
|
(see [2]) for central subspace estimation. The default ensemble are
|
||||||
|
indicator functions of the \eqn{[0, 10], (10, 20], ..., (90, 100]}
|
||||||
|
percent response quantiles. (only relevant if \code{method} is
|
||||||
|
\code{"central"} or \code{"weighted.central"}, ignored otherwise)}
|
||||||
|
|
||||||
\item{nObs}{parameter for choosing bandwidth \code{h} using
|
\item{nObs}{parameter for choosing bandwidth \code{h} using
|
||||||
\code{\link{estimate.bandwidth}} (ignored if \code{h} is supplied).}
|
\code{\link{estimate.bandwidth}} (ignored if \code{h} is supplied).}
|
||||||
|
|
||||||
|
@ -60,6 +88,10 @@ used as starting value in the optimization. (If supplied,
|
||||||
out \code{attempts} times with starting values drawn from the invariant
|
out \code{attempts} times with starting values drawn from the invariant
|
||||||
measure on the Stiefel manifold (see \code{\link{rStiefel}}).}
|
measure on the Stiefel manifold (see \code{\link{rStiefel}}).}
|
||||||
|
|
||||||
|
\item{nr.proj}{The number of projection used for projective resampling for
|
||||||
|
multivariate response \eqn{Y} (under active development, ignored for
|
||||||
|
univariate response).}
|
||||||
|
|
||||||
\item{logger}{a logger function (only for advanced users, slows down the
|
\item{logger}{a logger function (only for advanced users, slows down the
|
||||||
computation).}
|
computation).}
|
||||||
}
|
}
|
||||||
|
@ -94,6 +126,46 @@ This is the main function in the \code{CVE} package. It creates objects of
|
||||||
class \code{"cve"} to estimate the mean subspace. Helper functions that
|
class \code{"cve"} to estimate the mean subspace. Helper functions that
|
||||||
require a \code{"cve"} object can then be applied to the output from this
|
require a \code{"cve"} object can then be applied to the output from this
|
||||||
function.
|
function.
|
||||||
|
|
||||||
|
Conditional Variance Estimation (CVE) is a sufficient dimension reduction
|
||||||
|
(SDR) method for regressions studying \eqn{E(Y|X)}, the conditional
|
||||||
|
expectation of a response \eqn{Y} given a set of predictors \eqn{X}. This
|
||||||
|
function provides methods for estimating the dimension and the subspace
|
||||||
|
spanned by the columns of a \eqn{p\times k}{p x k} matrix \eqn{B} of minimal
|
||||||
|
rank \eqn{k} such that
|
||||||
|
|
||||||
|
\deqn{E(Y|X) = E(Y|B'X)}
|
||||||
|
|
||||||
|
or, equivalently,
|
||||||
|
|
||||||
|
\deqn{Y = g(B'X) + \epsilon}
|
||||||
|
|
||||||
|
where \eqn{X} is independent of \eqn{\epsilon} with positive definite
|
||||||
|
variance-covariance matrix \eqn{Var(X) = \Sigma_X}. \eqn{\epsilon} is a mean
|
||||||
|
zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g}
|
||||||
|
is an unknown, continuous non-constant function, and \eqn{B = (b_1,..., b_k)}
|
||||||
|
is a real \eqn{p \times k}{p x k} matrix of rank \eqn{k \leq p}{k <= p}.
|
||||||
|
|
||||||
|
Both the dimension \eqn{k} and the subspace \eqn{span(B)} are unknown. The
|
||||||
|
CVE method makes very few assumptions.
|
||||||
|
|
||||||
|
A kernel matrix \eqn{\hat{B}}{Bhat} is estimated such that the column space
|
||||||
|
of \eqn{\hat{B}}{Bhat} should be close to the mean subspace \eqn{span(B)}.
|
||||||
|
The primary output from this method is a set of orthonormal vectors,
|
||||||
|
\eqn{\hat{B}}{Bhat}, whose span estimates \eqn{span(B)}.
|
||||||
|
|
||||||
|
The method central implements the Ensemble Conditional Variance Estimation
|
||||||
|
(ECVE) as described in [2]. It augments the CVE method by applying an
|
||||||
|
ensemble of functions (parameter \code{func_list}) to the response to
|
||||||
|
estimate the central subspace. This corresponds to the generalization
|
||||||
|
|
||||||
|
\deqn{F(Y|X) = F(Y|B'X)}
|
||||||
|
|
||||||
|
or, equivalently,
|
||||||
|
|
||||||
|
\deqn{Y = g(B'X, \epsilon)}
|
||||||
|
|
||||||
|
where \eqn{F} is the conditional cumulative distribution function.
|
||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
# create B for simulation (k = 1)
|
# create B for simulation (k = 1)
|
||||||
|
@ -119,3 +191,12 @@ cve.obj.simple2 <- cve.call(X, Y, k = 1)
|
||||||
coef(cve.obj.simple1, k = 1)
|
coef(cve.obj.simple1, k = 1)
|
||||||
coef(cve.obj.simple2, k = 1)
|
coef(cve.obj.simple2, k = 1)
|
||||||
}
|
}
|
||||||
|
\references{
|
||||||
|
[1] Fertl, L. and Bura, E. (2021) "Conditional Variance
|
||||||
|
Estimation for Sufficient Dimension Reduction"
|
||||||
|
<arXiv:2102.08782>
|
||||||
|
|
||||||
|
[2] Fertl, L. and Bura, E. (2021) "Ensemble Conditional Variance
|
||||||
|
Estimation for Sufficient Dimension Reduction"
|
||||||
|
<arXiv:2102.13435>
|
||||||
|
}
|
|
@ -122,6 +122,7 @@ location 0, shape-parameter 1, and the scale-parameter is chosen such that
|
||||||
}
|
}
|
||||||
|
|
||||||
\references{
|
\references{
|
||||||
Fertl, L. and Bura, E. (2019), Conditional Variance
|
Fertl, L. and Bura, E. (2021) "Conditional Variance
|
||||||
Estimation for Sufficient Dimension Reduction. Working Paper.
|
Estimation for Sufficient Dimension Reduction"
|
||||||
|
<arXiv:2102.08782>
|
||||||
}
|
}
|
|
@ -20,8 +20,8 @@ the \eqn{n\times k}{n x k} dimensional matrix \eqn{X B} where \eqn{B}
|
||||||
is the cve-estimate for dimension \eqn{k}.
|
is the cve-estimate for dimension \eqn{k}.
|
||||||
}
|
}
|
||||||
\description{
|
\description{
|
||||||
Returns \eqn{B'X}. That is the dimensional design matrix \eqn{X} on the
|
Returns \eqn{B'X}. That is, it computes the projection of the \eqn{n x p}
|
||||||
columnspace of the cve-estimate for given dimension \eqn{k}.
|
design matrix \eqn{X} on the column space of \eqn{B} of dimension \eqn{k}.
|
||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
# create B for simulation (k = 1)
|
# create B for simulation (k = 1)
|
||||||
|
@ -33,11 +33,11 @@ x <- matrix(rnorm(500), 100, 5)
|
||||||
# y = f(B'x) + err
|
# y = f(B'x) + err
|
||||||
# with f(x1) = x1 and err ~ N(0, 0.25^2)
|
# with f(x1) = x1 and err ~ N(0, 0.25^2)
|
||||||
y <- x \%*\% B + 0.25 * rnorm(100)
|
y <- x \%*\% B + 0.25 * rnorm(100)
|
||||||
# calculate cve with method 'simple' for k = 1
|
# calculate cve with method 'mean' for k = 1
|
||||||
set.seed(21)
|
set.seed(21)
|
||||||
cve.obj.simple <- cve(y ~ x, k = 1, method = 'simple')
|
cve.obj.mean <- cve(y ~ x, k = 1, method = 'mean')
|
||||||
# get projected data for k = 1
|
# get projected data for k = 1
|
||||||
x.proj <- directions(cve.obj.simple, k = 1)
|
x.proj <- directions(cve.obj.mean, k = 1)
|
||||||
# plot y against projected data
|
# plot y against projected data
|
||||||
plot(x.proj, y)
|
plot(x.proj, y)
|
||||||
|
|
|
@ -49,6 +49,5 @@ y <- x \%*\% B + 0.25 * rnorm(100)
|
||||||
# calculate cve with method 'simple' for k = 1
|
# calculate cve with method 'simple' for k = 1
|
||||||
set.seed(21)
|
set.seed(21)
|
||||||
cve.obj.simple <- cve(y ~ x, k = k)
|
cve.obj.simple <- cve(y ~ x, k = k)
|
||||||
print(cve.obj.simple$res$'1'$h)
|
|
||||||
print(estimate.bandwidth(x, k = k))
|
print(estimate.bandwidth(x, k = k))
|
||||||
}
|
}
|
|
@ -17,11 +17,11 @@
|
||||||
\item{...}{further arguments passed to \code{\link{mars}}.}
|
\item{...}{further arguments passed to \code{\link{mars}}.}
|
||||||
}
|
}
|
||||||
\value{
|
\value{
|
||||||
prediced response at \code{newdata}.
|
prediced respone(s) for \code{newdata}.
|
||||||
}
|
}
|
||||||
\description{
|
\description{
|
||||||
Predict response using projected data \eqn{B'C} by fitting
|
Predict response using projected data. The forward model \eqn{g(B' X)} is
|
||||||
\eqn{g(B'C) + \epsilon} using \code{\link{mars}}.
|
estimated with \code{\link{mars}} in the \pkg{mda} package.
|
||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
# create B for simulation
|
# create B for simulation
|
|
@ -2,7 +2,7 @@
|
||||||
% Please edit documentation in R/predict_dim.R
|
% Please edit documentation in R/predict_dim.R
|
||||||
\name{predict_dim}
|
\name{predict_dim}
|
||||||
\alias{predict_dim}
|
\alias{predict_dim}
|
||||||
\title{Estimate Dimension of Reduction Space.}
|
\title{Estimate Dimension of the Sufficient Reduction.}
|
||||||
\usage{
|
\usage{
|
||||||
predict_dim(object, ..., method = "CV")
|
predict_dim(object, ..., method = "CV")
|
||||||
}
|
}
|
||||||
|
@ -12,29 +12,30 @@ predict_dim(object, ..., method = "CV")
|
||||||
|
|
||||||
\item{...}{ignored.}
|
\item{...}{ignored.}
|
||||||
|
|
||||||
\item{method}{This parameter specify which method will be used in dimension
|
\item{method}{This parameter specifies which method is used in dimension
|
||||||
estimation. It provides three methods \code{'CV'} (default), \code{'elbow'},
|
estimation. It provides three options: \code{'CV'} (default),
|
||||||
and \code{'wilcoxon'} to estimate the dimension of the SDR.}
|
\code{'elbow'} and \code{'wilcoxon'}.}
|
||||||
}
|
}
|
||||||
\value{
|
\value{
|
||||||
list with
|
A \code{list} with
|
||||||
\describe{
|
\describe{
|
||||||
\item{}{cretirion of method for \code{k = min.dim, ..., max.dim}.}
|
\item{}{criterion for method and \code{k = min.dim, ..., max.dim}.}
|
||||||
\item{k}{estimated dimension as argmin over \eqn{k} of criterion.}
|
\item{k}{estimated dimension is the minimizer of the criterion.}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
\description{
|
\description{
|
||||||
This function estimates the dimension of the mean dimension reduction space,
|
This function estimates the dimension, i.e. the rank of \eqn{B}. The default
|
||||||
i.e. number of columns of \eqn{B} matrix. The default method \code{'CV'}
|
method \code{'CV'} performs leave-one-out (LOO) cross-validation using
|
||||||
performs l.o.o cross-validation using \code{mars}. Given
|
\code{mars} as follows for \code{k = min.dim, ..., max.dim} a
|
||||||
\code{k = min.dim, ..., max.dim} a cross-validation via \code{mars} is
|
cross-validation via \code{mars} is
|
||||||
performed on the dataset \eqn{(Y_i, B_k' X_i)_{i = 1, ..., n}} where
|
performed on the dataset \eqn{(Y_i, B_k' X_i)_{i = 1, ..., n}} where
|
||||||
\eqn{B_k} is the \eqn{p \times k}{p x k} dimensional CVE estimate. The
|
\eqn{B_k} is the \eqn{p \times k}{p x k} dimensional CVE estimate. The
|
||||||
estimated SDR dimension is the \eqn{k} where the
|
estimated SDR dimension is the \eqn{k} where the
|
||||||
cross-validation mean squared error is minimal. The method \code{'elbow'}
|
cross-validation mean squared error is minimal. The method \code{'elbow'}
|
||||||
estimates the dimension via \eqn{k = argmin_k L_n(V_{p - k})} where
|
estimates the dimension via \eqn{k = argmin_k L_n(V_{p - k})} where
|
||||||
\eqn{V_{p - k}} is space that is orthogonal to the columns-space of the CVE estimate of \eqn{B_k}. Method \code{'wilcoxon'} is similar to \code{'elbow'}
|
\eqn{V_{p - k}} is the space that is orthogonal to the column space of the
|
||||||
but finds the minimum using the wilcoxon-test.
|
CVE estimate of \eqn{B_k}. Method \code{'wilcoxon'} finds the minimum using
|
||||||
|
the Wilcoxon test.
|
||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
# create B for simulation
|
# create B for simulation
|
||||||
|
@ -50,7 +51,7 @@ x <- matrix(rnorm(500), 100)
|
||||||
y <- x \%*\% B + 0.25 * rnorm(100)
|
y <- x \%*\% B + 0.25 * rnorm(100)
|
||||||
|
|
||||||
# Calculate cve for unknown k between min.dim and max.dim.
|
# Calculate cve for unknown k between min.dim and max.dim.
|
||||||
cve.obj.simple <- cve(y ~ x)
|
cve.obj.simple <- cve(y ~ x)
|
||||||
|
|
||||||
predict_dim(cve.obj.simple)
|
predict_dim(cve.obj.simple)
|
||||||
|
|
|
@ -2,8 +2,7 @@
|
||||||
% Please edit documentation in R/util.R
|
% Please edit documentation in R/util.R
|
||||||
\name{rStiefel}
|
\name{rStiefel}
|
||||||
\alias{rStiefel}
|
\alias{rStiefel}
|
||||||
\title{Draws a sample from the invariant measure on the Stiefel manifold
|
\title{Random sample from Stiefel manifold.}
|
||||||
\eqn{S(p, q)}.}
|
|
||||||
\usage{
|
\usage{
|
||||||
rStiefel(p, q)
|
rStiefel(p, q)
|
||||||
}
|
}
|
||||||
|
@ -13,12 +12,12 @@ rStiefel(p, q)
|
||||||
\item{q}{col dimension}
|
\item{q}{col dimension}
|
||||||
}
|
}
|
||||||
\value{
|
\value{
|
||||||
\eqn{p \times q}{p x q} semi-orthogonal matrix.
|
A \eqn{p \times q}{p x q} semi-orthogonal matrix.
|
||||||
}
|
}
|
||||||
\description{
|
\description{
|
||||||
Draws a sample from the invariant measure on the Stiefel manifold
|
Draws a random sample from the invariant measure on the Stiefel manifold
|
||||||
\eqn{S(p, q)}.
|
\eqn{S(p, q)}.
|
||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
V <- rStiefel(6, 4)
|
V <- rStiefel(6, 4)
|
||||||
}
|
}
|
|
@ -21,7 +21,4 @@ numeric array of length \eqn{n}.
|
||||||
\description{
|
\description{
|
||||||
Random generation for generalized Normal Distribution.
|
Random generation for generalized Normal Distribution.
|
||||||
}
|
}
|
||||||
\seealso{
|
|
||||||
https://en.wikipedia.org/wiki/Generalized_normal_distribution
|
|
||||||
}
|
|
||||||
\keyword{internal}
|
\keyword{internal}
|
|
@ -19,7 +19,4 @@ numeric array of length \eqn{n}.
|
||||||
\description{
|
\description{
|
||||||
Random generation for Laplace distribution.
|
Random generation for Laplace distribution.
|
||||||
}
|
}
|
||||||
\seealso{
|
|
||||||
https://en.wikipedia.org/wiki/Laplace_distribution
|
|
||||||
}
|
|
||||||
\keyword{internal}
|
\keyword{internal}
|
|
@ -21,9 +21,8 @@ Random generation for the multivariate normal distribution.
|
||||||
\deqn{X \sim N_p(\mu, \Sigma)}{X ~ N_p(\mu, \Sigma)}
|
\deqn{X \sim N_p(\mu, \Sigma)}{X ~ N_p(\mu, \Sigma)}
|
||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
\dontrun{
|
CVarE:::rmvnorm(20, sigma = matrix(c(2, 1, 1, 2), 2))
|
||||||
rmvnorm(20, sigma = matrix(c(2, 1, 1, 2), 2))
|
CVarE:::rmvnorm(20, mu = c(3, -1, 2))
|
||||||
rmvnorm(20, mu = c(3, -1, 2))
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
\keyword{internal}
|
\keyword{internal}
|
|
@ -2,7 +2,7 @@
|
||||||
% Please edit documentation in R/datasets.R
|
% Please edit documentation in R/datasets.R
|
||||||
\name{rmvt}
|
\name{rmvt}
|
||||||
\alias{rmvt}
|
\alias{rmvt}
|
||||||
\title{Multivariate t distribution.}
|
\title{Multivariate t Distribution.}
|
||||||
\usage{
|
\usage{
|
||||||
rmvt(n = 1, mu = rep(0, p), sigma = diag(p), df = Inf)
|
rmvt(n = 1, mu = rep(0, p), sigma = diag(p), df = Inf)
|
||||||
}
|
}
|
||||||
|
@ -25,10 +25,9 @@ a \eqn{n\times p}{n x p} matrix with samples in its rows.
|
||||||
Random generation from multivariate t distribution (student distribution).
|
Random generation from multivariate t distribution (student distribution).
|
||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
\dontrun{
|
CVarE:::rmvt(20, c(0, 1), matrix(c(3, 1, 1, 2), 2), 3)
|
||||||
rmvt(20, c(0, 1), matrix(c(3, 1, 1, 2), 2), 3)
|
CVarE:::rmvt(20, sigma = matrix(c(2, 1, 1, 2), 2), df = 3)
|
||||||
rmvt(20, sigma = matrix(c(2, 1, 1, 2), 2), 3)
|
CVarE:::rmvt(20, mu = c(3, -1, 2), df = 3)
|
||||||
rmvt(20, mu = c(3, -1, 2), 3)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
\keyword{internal}
|
\keyword{internal}
|
|
@ -2,7 +2,7 @@
|
||||||
% Please edit documentation in R/summary.R
|
% Please edit documentation in R/summary.R
|
||||||
\name{summary.cve}
|
\name{summary.cve}
|
||||||
\alias{summary.cve}
|
\alias{summary.cve}
|
||||||
\title{Prints a summary of a \code{cve} result.}
|
\title{Prints summary statistics of the \eqn{L} \code{cve} component.}
|
||||||
\usage{
|
\usage{
|
||||||
\method{summary}{cve}(object, ...)
|
\method{summary}{cve}(object, ...)
|
||||||
}
|
}
|
||||||
|
@ -12,16 +12,18 @@
|
||||||
|
|
||||||
\item{...}{ignored.}
|
\item{...}{ignored.}
|
||||||
}
|
}
|
||||||
|
\value{
|
||||||
|
No return value, prints human readable summary.
|
||||||
|
}
|
||||||
\description{
|
\description{
|
||||||
Prints a summary statistics of output \code{L} from \code{cve} for
|
Prints a summary statistics of the \code{L} component of a \code{cve} object #' for \code{k = min.dim, ..., max.dim}.
|
||||||
\code{k = min.dim, ..., max.dim}.
|
|
||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
# create B for simulation
|
# create B for simulation
|
||||||
B <- rep(1, 5) / sqrt(5)
|
B <- rep(1, 5) / sqrt(5)
|
||||||
|
|
||||||
set.seed(21)
|
set.seed(21)
|
||||||
#creat predictor data x ~ N(0, I_p)
|
# create predictor data x ~ N(0, I_p)
|
||||||
x <- matrix(rnorm(500), 100)
|
x <- matrix(rnorm(500), 100)
|
||||||
|
|
||||||
# simulate response variable
|
# simulate response variable
|
||||||
|
@ -29,7 +31,7 @@ x <- matrix(rnorm(500), 100)
|
||||||
# with f(x1) = x1 and err ~ N(0, 0.25^2)
|
# with f(x1) = x1 and err ~ N(0, 0.25^2)
|
||||||
y <- x \%*\% B + 0.25 * rnorm(100)
|
y <- x \%*\% B + 0.25 * rnorm(100)
|
||||||
|
|
||||||
# calculate cve for unknown k between min.dim and max.dim.
|
# calculate cve for unknown reduction dimension.
|
||||||
cve.obj.simple <- cve(y ~ x)
|
cve.obj.simple <- cve(y ~ x)
|
||||||
|
|
||||||
summary(cve.obj.simple)
|
summary(cve.obj.simple)
|
|
@ -1,7 +1,7 @@
|
||||||
#include "cve.h"
|
#include "cve.h"
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Calles a R function passed to the algoritm and supplied intermidiate
|
* Calls a R function passed to the algorithm and supplied intermediate
|
||||||
* optimization values for logging the optimization progress.
|
* optimization values for logging the optimization progress.
|
||||||
* The supplied parameters to the logger functions are as follows:
|
* The supplied parameters to the logger functions are as follows:
|
||||||
* - attempt: Attempts counter.
|
* - attempt: Attempts counter.
|
||||||
|
@ -19,7 +19,7 @@
|
||||||
* @param V Pointer memory area of size `nrowV * ncolV` storing `V`.
|
* @param V Pointer memory area of size `nrowV * ncolV` storing `V`.
|
||||||
* @param G Pointer memory area of size `nrowG * ncolG` storing `G`.
|
* @param G Pointer memory area of size `nrowG * ncolG` storing `G`.
|
||||||
* @param loss Current loss L(V).
|
* @param loss Current loss L(V).
|
||||||
* @param err Errof for break condition (0.0 befor first iteration).
|
* @param err Error for break condition (0.0 before first iteration).
|
||||||
* @param tau Current step-size.
|
* @param tau Current step-size.
|
||||||
*/
|
*/
|
||||||
void callLogger(SEXP logger, SEXP env,
|
void callLogger(SEXP logger, SEXP env,
|
||||||
|
@ -32,11 +32,11 @@ void callLogger(SEXP logger, SEXP env,
|
||||||
SEXP r_iter = PROTECT(ScalarInteger(iter + 1));
|
SEXP r_iter = PROTECT(ScalarInteger(iter + 1));
|
||||||
|
|
||||||
/* Create R representations of L, V and G */
|
/* Create R representations of L, V and G */
|
||||||
SEXP r_L = PROTECT(allocVector(REALSXP, L->nrow));
|
SEXP r_L = PROTECT(allocMatrix(REALSXP, L->nrow, L->ncol));
|
||||||
SEXP r_V = PROTECT(allocMatrix(REALSXP, V->nrow, V->ncol));
|
SEXP r_V = PROTECT(allocMatrix(REALSXP, V->nrow, V->ncol));
|
||||||
SEXP r_G = PROTECT(allocMatrix(REALSXP, G->nrow, G->ncol));
|
SEXP r_G = PROTECT(allocMatrix(REALSXP, G->nrow, G->ncol));
|
||||||
/* Copy data to R objects */
|
/* Copy data to R objects */
|
||||||
memcpy(REAL(r_L), L->elem, L->nrow * sizeof(double));
|
memcpy(REAL(r_L), L->elem, L->nrow * L->ncol * sizeof(double));
|
||||||
memcpy(REAL(r_V), V->elem, V->nrow * V->ncol * sizeof(double));
|
memcpy(REAL(r_V), V->elem, V->nrow * V->ncol * sizeof(double));
|
||||||
memcpy(REAL(r_G), G->elem, G->nrow * G->ncol * sizeof(double));
|
memcpy(REAL(r_G), G->elem, G->nrow * G->ncol * sizeof(double));
|
||||||
|
|
||||||
|
@ -63,6 +63,6 @@ void callLogger(SEXP logger, SEXP env,
|
||||||
/* Evaluate the logger function call expression. */
|
/* Evaluate the logger function call expression. */
|
||||||
eval(loggerCall, env);
|
eval(loggerCall, env);
|
||||||
|
|
||||||
/* Unprotect created R objects. */
|
/* Unlock created R objects. */
|
||||||
UNPROTECT(11);
|
UNPROTECT(11);
|
||||||
}
|
}
|
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
#include "cve.h"
|
#include "cve.h"
|
||||||
|
|
||||||
void cve(const mat *X, const mat *Y, const double h,
|
void cve(const mat *X, const mat *Fy, const double h,
|
||||||
const unsigned int method,
|
const unsigned int method,
|
||||||
const double momentum,
|
const double momentum,
|
||||||
const double tau_init, const double tol_init,
|
const double tau_init, const double tol_init,
|
||||||
|
@ -10,8 +10,7 @@ void cve(const mat *X, const mat *Y, const double h,
|
||||||
const int maxIter, const int attempts,
|
const int maxIter, const int attempts,
|
||||||
mat *V, mat *L,
|
mat *V, mat *L,
|
||||||
SEXP logger, SEXP loggerEnv) {
|
SEXP logger, SEXP loggerEnv) {
|
||||||
|
|
||||||
// TODO: param and dim. validation.
|
|
||||||
int n = X->nrow, p = X->ncol, q = V->ncol;
|
int n = X->nrow, p = X->ncol, q = V->ncol;
|
||||||
int attempt = 0, iter;
|
int attempt = 0, iter;
|
||||||
double loss, loss_last, loss_best, err, tau;
|
double loss, loss_last, loss_best, err, tau;
|
||||||
|
@ -20,17 +19,16 @@ void cve(const mat *X, const mat *Y, const double h,
|
||||||
double sumK;
|
double sumK;
|
||||||
double c = agility / (double)n;
|
double c = agility / (double)n;
|
||||||
|
|
||||||
// TODO: check parameters! dim, ...
|
|
||||||
|
|
||||||
/* Create further intermediate or internal variables. */
|
/* Create further intermediate or internal variables. */
|
||||||
mat *lvecD_e = (void*)0;
|
mat *lvecD_e = (void*)0;
|
||||||
mat *Ysquared = (void*)0;
|
mat *Fy_sq = (void*)0;
|
||||||
mat *XV = (void*)0;
|
mat *XV = (void*)0;
|
||||||
mat *lvecD = (void*)0; // TODO: combine. aka in-place lvecToSym
|
mat *lvecD = (void*)0;
|
||||||
mat *D = (void*)0; // TODO: combine. aka in-place lvecToSym
|
mat *D = (void*)0;
|
||||||
mat *lvecK = (void*)0; // TODO: combine. aka in-place lvecToSym
|
mat *lvecK = (void*)0;
|
||||||
mat *K = (void*)0; // TODO: combine. aka in-place lvecToSym
|
mat *K = (void*)0;
|
||||||
mat *colSumsK = (void*)0;
|
mat *colSumsK = (void*)0;
|
||||||
|
mat *rowSumsL = (void*)0;
|
||||||
mat *W = (void*)0;
|
mat *W = (void*)0;
|
||||||
mat *y1 = (void*)0;
|
mat *y1 = (void*)0;
|
||||||
mat *y2 = (void*)0;
|
mat *y2 = (void*)0;
|
||||||
|
@ -43,7 +41,7 @@ void cve(const mat *X, const mat *Y, const double h,
|
||||||
mat *V_best = (void*)0;
|
mat *V_best = (void*)0;
|
||||||
mat *L_best = (void*)0;
|
mat *L_best = (void*)0;
|
||||||
|
|
||||||
/* Allocate appropiate amount of working memory. */
|
/* Allocate appropriate amount of working memory. */
|
||||||
int workLen = 2 * (p + 1) * p;
|
int workLen = 2 * (p + 1) * p;
|
||||||
if (workLen < n) {
|
if (workLen < n) {
|
||||||
workLen = n;
|
workLen = n;
|
||||||
|
@ -51,7 +49,7 @@ void cve(const mat *X, const mat *Y, const double h,
|
||||||
double *workMem = (double*)R_alloc(workLen, sizeof(double));
|
double *workMem = (double*)R_alloc(workLen, sizeof(double));
|
||||||
|
|
||||||
lvecD_e = rowDiffSquareSums(X, lvecD_e);
|
lvecD_e = rowDiffSquareSums(X, lvecD_e);
|
||||||
Ysquared = hadamard(1.0, Y, Y, 0.0, Ysquared);
|
Fy_sq = hadamard(1.0, Fy, Fy, 0.0, Fy_sq);
|
||||||
|
|
||||||
do {
|
do {
|
||||||
/* (Re)set learning rate. */
|
/* (Re)set learning rate. */
|
||||||
|
@ -59,7 +57,7 @@ void cve(const mat *X, const mat *Y, const double h,
|
||||||
|
|
||||||
/* Check if start value for `V` was supplied. */
|
/* Check if start value for `V` was supplied. */
|
||||||
if (attempts > 0) {
|
if (attempts > 0) {
|
||||||
/* Sample start value from stiefel manifold. */
|
/* Sample start value from Stiefel manifold. */
|
||||||
V = rStiefel(p, q, V, workMem);
|
V = rStiefel(p, q, V, workMem);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -76,23 +74,27 @@ void cve(const mat *X, const mat *Y, const double h,
|
||||||
colSumsK = colSums(K, colSumsK);
|
colSumsK = colSums(K, colSumsK);
|
||||||
/* Normalize K columns to obtain weight matrix W */
|
/* Normalize K columns to obtain weight matrix W */
|
||||||
W = colApply(K, '/', colSumsK, W);
|
W = colApply(K, '/', colSumsK, W);
|
||||||
/* first and second order weighted responces */
|
/* first and second order weighted responses */
|
||||||
y1 = matrixprod(1.0, W, Y, 0.0, y1);
|
y1 = matrixprod(1.0, W, Fy, 0.0, y1);
|
||||||
y2 = matrixprod(1.0, W, Ysquared, 0.0, y2);
|
y2 = matrixprod(1.0, W, Fy_sq, 0.0, y2);
|
||||||
/* Compute losses */
|
/* Compute losses */
|
||||||
L = hadamard(-1.0, y1, y1, 1.0, copy(y2, L));
|
L = hadamard(-1.0, y1, y1, 1.0, copy(y2, L));
|
||||||
/* Compute initial loss */
|
/* Compute initial loss */
|
||||||
if (method == weighted) {
|
if (method == weighted) {
|
||||||
colSumsK = elemApply(colSumsK, '-', 1.0, colSumsK);
|
colSumsK = elemApply(colSumsK, '-', 1.0, colSumsK);
|
||||||
sumK = sum(colSumsK);
|
sumK = sum(colSumsK);
|
||||||
loss_last = dot(L, '*', colSumsK) / sumK;
|
if (L->ncol == 1) {
|
||||||
|
loss_last = dot(L, '*', colSumsK) / sumK;
|
||||||
|
} else {
|
||||||
|
loss_last = dot(rowSums(L, rowSumsL), '*', colSumsK) / sumK;
|
||||||
|
}
|
||||||
c = agility / sumK;
|
c = agility / sumK;
|
||||||
/* Calculate the scaling matrix S */
|
/* Calculate the scaling matrix S */
|
||||||
S = laplace(adjacence(L, Y, y1, D, K, gauss, S), workMem);
|
S = laplace(adjacence(L, Fy, y1, D, K, gauss, S), workMem);
|
||||||
} else { /* simple */
|
} else { /* simple */
|
||||||
loss_last = mean(L);
|
loss_last = mean(L);
|
||||||
/* Calculate the scaling matrix S */
|
/* Calculate the scaling matrix S */
|
||||||
S = laplace(adjacence(L, Y, y1, D, W, gauss, S), workMem);
|
S = laplace(adjacence(L, Fy, y1, D, W, gauss, S), workMem);
|
||||||
}
|
}
|
||||||
/* Gradient */
|
/* Gradient */
|
||||||
tmp1 = matrixprod(1.0, S, X, 0.0, tmp1);
|
tmp1 = matrixprod(1.0, S, X, 0.0, tmp1);
|
||||||
|
@ -109,8 +111,8 @@ void cve(const mat *X, const mat *Y, const double h,
|
||||||
A = skew(tau, G, V, 0.0, A);
|
A = skew(tau, G, V, 0.0, A);
|
||||||
|
|
||||||
for (iter = 0; iter < maxIter; ++iter) {
|
for (iter = 0; iter < maxIter; ++iter) {
|
||||||
/* Before Starting next iteration check if the Uer has requested an
|
/* Before next iteration, check if the User has requested an
|
||||||
* interupt (aka. ^C, or "Stop" button).
|
* interrupt (aka. ^C, or "Stop" button).
|
||||||
* If interrupted the algorithm will be exited here and everything
|
* If interrupted the algorithm will be exited here and everything
|
||||||
* will be discharted! */
|
* will be discharted! */
|
||||||
R_CheckUserInterrupt();
|
R_CheckUserInterrupt();
|
||||||
|
@ -131,16 +133,20 @@ void cve(const mat *X, const mat *Y, const double h,
|
||||||
colSumsK = colSums(K, colSumsK);
|
colSumsK = colSums(K, colSumsK);
|
||||||
/* Normalize K columns to obtain weight matrix W */
|
/* Normalize K columns to obtain weight matrix W */
|
||||||
W = colApply(K, '/', colSumsK, W);
|
W = colApply(K, '/', colSumsK, W);
|
||||||
/* first and second order weighted responces */
|
/* first and second order weighted responses */
|
||||||
y1 = matrixprod(1.0, W, Y, 0.0, y1);
|
y1 = matrixprod(1.0, W, Fy, 0.0, y1);
|
||||||
y2 = matrixprod(1.0, W, Ysquared, 0.0, y2);
|
y2 = matrixprod(1.0, W, Fy_sq, 0.0, y2);
|
||||||
/* Compute losses */
|
/* Compute losses */
|
||||||
L = hadamard(-1.0, y1, y1, 1.0, copy(y2, L));
|
L = hadamard(-1.0, y1, y1, 1.0, copy(y2, L));
|
||||||
/* Compute loss */
|
/* Compute loss */
|
||||||
if (method == weighted) {
|
if (method == weighted) {
|
||||||
colSumsK = elemApply(colSumsK, '-', 1.0, colSumsK);
|
colSumsK = elemApply(colSumsK, '-', 1.0, colSumsK);
|
||||||
sumK = sum(colSumsK);
|
sumK = sum(colSumsK);
|
||||||
loss = dot(L, '*', colSumsK) / sumK;
|
if (L->ncol == 1) {
|
||||||
|
loss = dot(L, '*', colSumsK) / sumK;
|
||||||
|
} else {
|
||||||
|
loss = dot(rowSums(L, rowSumsL), '*', colSumsK) / sumK;
|
||||||
|
}
|
||||||
} else { /* simple */
|
} else { /* simple */
|
||||||
loss = mean(L);
|
loss = mean(L);
|
||||||
}
|
}
|
||||||
|
@ -158,11 +164,8 @@ void cve(const mat *X, const mat *Y, const double h,
|
||||||
/* Compute error, use workMem. */
|
/* Compute error, use workMem. */
|
||||||
err = dist(V, V_tau);
|
err = dist(V, V_tau);
|
||||||
|
|
||||||
// Rprintf("%2d - iter: %2d, loss: %1.3f, err: %1.3f, tau: %1.3f, norm(G) = %1.3f\n",
|
|
||||||
// attempt, iter, loss, err, tau, sqrt(squareSum(G)));
|
|
||||||
|
|
||||||
/* Shift next step to current step and store loss to last. */
|
/* Shift next step to current step and store loss to last. */
|
||||||
V = copy(V_tau, V);
|
V = copy(V_tau, V);
|
||||||
loss_last = loss;
|
loss_last = loss;
|
||||||
|
|
||||||
if (logger) {
|
if (logger) {
|
||||||
|
@ -177,11 +180,11 @@ void cve(const mat *X, const mat *Y, const double h,
|
||||||
|
|
||||||
if (method == weighted) {
|
if (method == weighted) {
|
||||||
/* Calculate the scaling matrix S */
|
/* Calculate the scaling matrix S */
|
||||||
S = laplace(adjacence(L, Y, y1, D, K, gauss, S), workMem);
|
S = laplace(adjacence(L, Fy, y1, D, K, gauss, S), workMem);
|
||||||
c = agility / sumK; // n removed previousely
|
c = agility / sumK; // n removed previously
|
||||||
} else { /* simple */
|
} else { /* simple */
|
||||||
/* Calculate the scaling matrix S */
|
/* Calculate the scaling matrix S */
|
||||||
S = laplace(adjacence(L, Y, y1, D, W, gauss, S), workMem);
|
S = laplace(adjacence(L, Fy, y1, D, W, gauss, S), workMem);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Gradient */
|
/* Gradient */
|
||||||
|
@ -194,8 +197,6 @@ void cve(const mat *X, const mat *Y, const double h,
|
||||||
A = skew(tau, G, V, 0.0, A);
|
A = skew(tau, G, V, 0.0, A);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Rprintf("\n");
|
|
||||||
|
|
||||||
/* Check if current attempt improved previous ones */
|
/* Check if current attempt improved previous ones */
|
||||||
if (attempt == 0 || loss < loss_best) {
|
if (attempt == 0 || loss < loss_best) {
|
||||||
loss_best = loss;
|
loss_best = loss;
|
|
@ -65,9 +65,9 @@ mat* applyKernel(const mat* A, const double h, kernel kernel, mat* B) {
|
||||||
* v . . . . . .
|
* v . . . . . .
|
||||||
* s[n-1] s[2n-1] . . . s[n-1] . . . s[nn-1]
|
* s[n-1] s[2n-1] . . . s[n-1] . . . s[nn-1]
|
||||||
*
|
*
|
||||||
* @param L per sample loss vector of (lenght `n`).
|
* @param L per sample loss vector of (length `n`).
|
||||||
* @param Y responces (lenght `n`).
|
* @param Y responses (length `n`).
|
||||||
* @param y1 weighted responces (lenght `n`).
|
* @param y1 weighted responses (length `n`).
|
||||||
* @param D distance matrix (dim. `n x n`).
|
* @param D distance matrix (dim. `n x n`).
|
||||||
* @param W weight matrix (dim. `n x n`).
|
* @param W weight matrix (dim. `n x n`).
|
||||||
* @param kernel the kernel to be used.
|
* @param kernel the kernel to be used.
|
||||||
|
@ -106,57 +106,67 @@ mat* applyKernel(const mat* A, const double h, kernel kernel, mat* B) {
|
||||||
* n +--------------------+
|
* n +--------------------+
|
||||||
*/
|
*/
|
||||||
// TODO: fix: cache misses in Y?!
|
// TODO: fix: cache misses in Y?!
|
||||||
mat* adjacence(const mat *vec_L, const mat *vec_Y, const mat *vec_y1,
|
mat* adjacence(const mat *mat_L, const mat *mat_Fy, const mat *mat_y1,
|
||||||
const mat *mat_D, const mat *mat_W, kernel kernel,
|
const mat *mat_D, const mat *mat_W, kernel kernel,
|
||||||
mat *mat_S) {
|
mat *mat_S) {
|
||||||
int i, j, k, n = vec_L->nrow;
|
int i, j, k, l, n = mat_L->nrow, m = mat_L->ncol;
|
||||||
int block_size, block_batch_size;
|
int block_size, block_batch_size;
|
||||||
int max_size = 64 < n ? 64 : n; // Block Size set to 64
|
int max_size = 64 < n ? 64 : n; // Block Size set to 64
|
||||||
|
|
||||||
double Y_j, tmp0, tmp1, tmp2, tmp3;
|
double Y_j, t0, t1, t2, t3; // internal temp. values.
|
||||||
double *Y = vec_Y->elem;
|
double *Y, *L, *y1;
|
||||||
double *L = vec_L->elem;
|
|
||||||
double *y1 = vec_y1->elem;
|
|
||||||
double *D, *W, *S;
|
double *D, *W, *S;
|
||||||
|
|
||||||
// TODO: Check dims.
|
|
||||||
|
|
||||||
if (!mat_S) {
|
if (!mat_S) {
|
||||||
mat_S = matrix(n, n);
|
mat_S = zero(n, n);
|
||||||
|
} else {
|
||||||
|
memset(mat_S->elem, 0, n * n * sizeof(double));
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < n; i += block_size) {
|
for (l = 0; l < m; ++l) {
|
||||||
/* Set blocks (left upper corner) */
|
Y = mat_Fy->elem + l * n;
|
||||||
S = mat_S->elem + i;
|
L = mat_L->elem + l * n;
|
||||||
D = mat_D->elem + i;
|
y1 = mat_y1->elem + l * n;
|
||||||
W = mat_W->elem + i;
|
for (i = 0; i < n; i += block_size) {
|
||||||
/* determine block size */
|
/* Set blocks (left upper corner) */
|
||||||
block_size = n - i;
|
S = mat_S->elem + i;
|
||||||
if (block_size > max_size) {
|
D = mat_D->elem + i;
|
||||||
block_size = max_size;
|
W = mat_W->elem + i;
|
||||||
}
|
/* determine block size */
|
||||||
block_batch_size = 4 * (block_size / 4);
|
block_size = n - i;
|
||||||
/* for each column in the block */
|
if (block_size > max_size) {
|
||||||
for (j = 0; j < n; ++j, S += n, D += n, W += n) {
|
block_size = max_size;
|
||||||
Y_j = Y[j];
|
|
||||||
/* iterate over block rows */
|
|
||||||
for (k = 0; k < block_batch_size; k += 4) {
|
|
||||||
tmp0 = Y_j - y1[k];
|
|
||||||
tmp1 = Y_j - y1[k + 1];
|
|
||||||
tmp2 = Y_j - y1[k + 2];
|
|
||||||
tmp3 = Y_j - y1[k + 3];
|
|
||||||
S[k] = (L[k] - (tmp0 * tmp0)) * D[k] * W[k];
|
|
||||||
S[k + 1] = (L[k + 1] - (tmp1 * tmp1)) * D[k + 1] * W[k + 1];
|
|
||||||
S[k + 2] = (L[k + 2] - (tmp2 * tmp2)) * D[k + 2] * W[k + 2];
|
|
||||||
S[k + 3] = (L[k + 3] - (tmp3 * tmp3)) * D[k + 3] * W[k + 3];
|
|
||||||
}
|
}
|
||||||
for (; k < block_size; ++k) {
|
block_batch_size = 4 * (block_size / 4);
|
||||||
tmp0 = Y_j - y1[k];
|
/* for each column in the block */
|
||||||
S[k] = (L[k] - (tmp0 * tmp0)) * D[k] * W[k];
|
for (j = 0; j < n; ++j, S += n, D += n, W += n) {
|
||||||
|
Y_j = Y[j];
|
||||||
|
/* iterate over block rows */
|
||||||
|
for (k = 0; k < block_batch_size; k += 4) {
|
||||||
|
t0 = Y_j - y1[k];
|
||||||
|
t1 = Y_j - y1[k + 1];
|
||||||
|
t2 = Y_j - y1[k + 2];
|
||||||
|
t3 = Y_j - y1[k + 3];
|
||||||
|
S[k] += (L[k] - (t0 * t0)) * D[k] * W[k];
|
||||||
|
S[k + 1] += (L[k + 1] - (t1 * t1)) * D[k + 1] * W[k + 1];
|
||||||
|
S[k + 2] += (L[k + 2] - (t2 * t2)) * D[k + 2] * W[k + 2];
|
||||||
|
S[k + 3] += (L[k + 3] - (t3 * t3)) * D[k + 3] * W[k + 3];
|
||||||
|
}
|
||||||
|
for (; k < block_size; ++k) {
|
||||||
|
t0 = Y_j - y1[k];
|
||||||
|
S[k] += (L[k] - (t0 * t0)) * D[k] * W[k];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
L += block_size;
|
||||||
|
y1 += block_size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (m > 1) {
|
||||||
|
S = mat_S->elem;
|
||||||
|
for (i = 0; i < n * n; ++i) {
|
||||||
|
S[i] /= m;
|
||||||
}
|
}
|
||||||
L += block_size;
|
|
||||||
y1 += block_size;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return mat_S;
|
return mat_S;
|
|
@ -7,7 +7,7 @@
|
||||||
*
|
*
|
||||||
* @details Reuses the memory area of the SEXP object, therefore manipulation
|
* @details Reuses the memory area of the SEXP object, therefore manipulation
|
||||||
* of the returned matrix works in place of the SEXP object. In addition,
|
* of the returned matrix works in place of the SEXP object. In addition,
|
||||||
* a reference to the original SEXP is stored and will be retriefed from
|
* a reference to the original SEXP is stored and will be retrieved from
|
||||||
* `asSEXP()` if the matrix was created through this function.
|
* `asSEXP()` if the matrix was created through this function.
|
||||||
*/
|
*/
|
||||||
static mat* asMat(SEXP S) {
|
static mat* asMat(SEXP S) {
|
||||||
|
@ -25,7 +25,7 @@ static mat* asMat(SEXP S) {
|
||||||
return M;
|
return M;
|
||||||
}
|
}
|
||||||
|
|
||||||
SEXP cve_export(SEXP X, SEXP Y, SEXP k, SEXP h,
|
SEXP cve_export(SEXP X, SEXP Fy, SEXP k, SEXP h,
|
||||||
SEXP method,
|
SEXP method,
|
||||||
SEXP V, // initial
|
SEXP V, // initial
|
||||||
SEXP momentum, SEXP tau, SEXP tol,
|
SEXP momentum, SEXP tau, SEXP tol,
|
||||||
|
@ -47,7 +47,7 @@ SEXP cve_export(SEXP X, SEXP Y, SEXP k, SEXP h,
|
||||||
|
|
||||||
/* Create output list. */
|
/* Create output list. */
|
||||||
SEXP Vout = PROTECT(allocMatrix(REALSXP, p, q));
|
SEXP Vout = PROTECT(allocMatrix(REALSXP, p, q));
|
||||||
SEXP Lout = PROTECT(allocVector(REALSXP, n));
|
SEXP Lout = PROTECT(allocMatrix(REALSXP, n, ncols(Fy)));
|
||||||
|
|
||||||
/* Check `attempts`, if not positive use passed values of `V` as
|
/* Check `attempts`, if not positive use passed values of `V` as
|
||||||
* optimization start value without further attempts.
|
* optimization start value without further attempts.
|
||||||
|
@ -58,7 +58,7 @@ SEXP cve_export(SEXP X, SEXP Y, SEXP k, SEXP h,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Call CVE */
|
/* Call CVE */
|
||||||
cve(asMat(X), asMat(Y), asReal(h),
|
cve(asMat(X), asMat(Fy), asReal(h),
|
||||||
asInteger(method),
|
asInteger(method),
|
||||||
asReal(momentum), asReal(tau), asReal(tol),
|
asReal(momentum), asReal(tau), asReal(tol),
|
||||||
asReal(slack), asReal(gamma),
|
asReal(slack), asReal(gamma),
|
|
@ -4,7 +4,7 @@
|
||||||
#include <R_ext/Rdynload.h>
|
#include <R_ext/Rdynload.h>
|
||||||
|
|
||||||
/* .Call calls */
|
/* .Call calls */
|
||||||
extern SEXP cve_export(SEXP X, SEXP Y, SEXP k, SEXP h,
|
extern SEXP cve_export(SEXP X, SEXP Fy, SEXP k, SEXP h,
|
||||||
SEXP method,
|
SEXP method,
|
||||||
SEXP V, // initial
|
SEXP V, // initial
|
||||||
SEXP momentum, SEXP tau, SEXP tol,
|
SEXP momentum, SEXP tau, SEXP tol,
|
|
@ -55,9 +55,9 @@ mat* copy(mat *src, mat *dest) {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sum of all elements.
|
* Sum of all elements.
|
||||||
*
|
*
|
||||||
* @param A matrix.
|
* @param A matrix.
|
||||||
*
|
*
|
||||||
* @returns the sum of elements of `A`.
|
* @returns the sum of elements of `A`.
|
||||||
*/
|
*/
|
||||||
double sum(const mat* A) {
|
double sum(const mat* A) {
|
25
README.md
25
README.md
|
@ -5,25 +5,34 @@ install.packages("mda")
|
||||||
```
|
```
|
||||||
A release will be available in a few days.
|
A release will be available in a few days.
|
||||||
|
|
||||||
- Linux: Not available jet (one or two days, state 17.12.2019)
|
- Linux: release 1 Version 0.2 [CVE_0.2.tar.gz](https://git.art-ist.cc/daniel/CVE/releases/download/0.2/CVE_0.2.tar.gz)
|
||||||
- Windows: Not available jet (one or two days, state 17.12.2019)
|
- Windows: release 1 Version 0.2 [CVE_0.2.zip](https://git.art-ist.cc/daniel/CVE/releases/download/0.2/CVE_0.2.zip)
|
||||||
- MacOS: Not available jet (unknown)
|
- MacOS: Not available jet (unknown)
|
||||||
|
|
||||||
Open `R` and then the following:
|
Open `R` and then the following:
|
||||||
```R
|
```R
|
||||||
# addapt to download file.
|
# addapt to download file.
|
||||||
install.packages("path/to/cve_0.2.<end>", repos = NULL)
|
install.packages("path/to/cve_0.2.<end>", repos = NULL)
|
||||||
library(CVE) # Test installation.
|
library(CVarE) # Test installation.
|
||||||
```
|
```
|
||||||
Please consult the man-pages `?install.package` and `?library` for further information.
|
Please consult the man-pages `?install.package` and `?library` for further information.
|
||||||
|
|
||||||
## Installing Source
|
## Installing Source
|
||||||
Cloning the `CVE` repository and using `R`'s build and install routines from a terminal.
|
Cloning the `CVarE` repository and using `R`'s build and install routines from a terminal.
|
||||||
```bash
|
```bash
|
||||||
git clone https://git.art-ist.cc/daniel/CVE.git # Clone repository
|
git clone https://git.art-ist.cc/daniel/CVE.git # Clone repository
|
||||||
cd CVE # Go into the repository
|
cd CVarE # Go into the repository
|
||||||
R CMD build CVE # Build package tarbal
|
R CMD build CVarE # Build package tarbal
|
||||||
R CMD INSTALL CVE_0.2.tar.gz # Install package
|
R CMD INSTALL CVarE_1.0.tar.gz # Install package
|
||||||
|
```
|
||||||
|
### Alternative Installing Source from within R
|
||||||
|
Using the `devtools` the package can also be directly installed from within `R`
|
||||||
|
```R
|
||||||
|
library(devtools) # Load the dectools
|
||||||
|
setwd('<path_to_repo>/CVarE') # Go into package root
|
||||||
|
(path <- build(vignettes = FALSE)) # Create source package
|
||||||
|
install.packages(path, repos = NULL, type = "source") # Install source package
|
||||||
|
library(CVarE) # Test
|
||||||
```
|
```
|
||||||
|
|
||||||
### Windows / macOS
|
### Windows / macOS
|
||||||
|
@ -31,4 +40,4 @@ Installing from source (for any package which contains compiled code, in our cas
|
||||||
See _R Installation and Administration_ from [r-project manuals](https://cran.r-project.org/manuals.html).
|
See _R Installation and Administration_ from [r-project manuals](https://cran.r-project.org/manuals.html).
|
||||||
|
|
||||||
# Repository Structure
|
# Repository Structure
|
||||||
The repository is structured in two directories, the `CVE/` directory which is the `R` package root and `simulations/` where all simulation scripts can be found (and `README.md` which is this).
|
The repository is structured in two directories, the `CVarE/` directory which is the `R` package root and `simulations/` where all simulation scripts can be found (and `README.md` which is this).
|
||||||
|
|
Loading…
Reference in New Issue