update: documentation

2021-03-05 14:52:45 +01:00 · 2021-03-05 14:52:45 +01:00 · 2344120dd9
parent 8e95902e79
commit 2344120dd9
20 changed files with 323 additions and 500 deletions
--- a/.gitattributes
+++ b/.gitattributes
@ -0,0 +1 @@
 *.pdf filter=lfs diff=lfs merge=lfs -text
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,16 @@
 simulations/results/*
 literature/*
 CVE/src/*.o
 CVE/src/*.so
 CVE/src/*.dll
 *.tgz
 *.tar.xz
 *.tar.gz
 *.zip
 *.so
 *.Rcheck/*
 tmp/*
 wip/*
--- a/CVE/DESCRIPTION
+++ b/CVE/DESCRIPTION
@ -1,12 +1,12 @@
 Package: CVE
 Type: Package
 Title: Conditional Variance Estimator for Sufficient Dimension Reduction
-Version: 0.2
+Version: 0.3
-Date: 2019-12-20
+Date: 2021-03-04
 Author: Daniel Kapla <daniel@kapla.at>, Lukas Fertl <lukas.fertl@chello.at>
 Maintainer: Daniel Kapla <daniel@kapla.at>
 Description: Implementation of the Conditional Variance Estimation (CVE) method.
 License: GPL-3
 Encoding: UTF-8
-Imports: stats,graphics,mda
+Imports: stats,mda
-RoxygenNote: 6.1.1
+RoxygenNote: 7.0.2
--- a/CVE/NAMESPACE
+++ b/CVE/NAMESPACE
@ -2,7 +2,6 @@
 S3method(coef,cve)
 S3method(directions,cve)
 S3method(plot,cve)
 S3method(predict,cve)
 S3method(summary,cve)
 export(cve)
@ -14,10 +13,6 @@ export(null)
 export(predict_dim)
 export(rStiefel)
 import(stats)
 importFrom(graphics,boxplot)
 importFrom(graphics,lines)
 importFrom(graphics,plot)
 importFrom(graphics,points)
 importFrom(mda,mars)
 importFrom(stats,model.frame)
 importFrom(stats,rbinom)
--- a/CVE/R/CVE.R
+++ b/CVE/R/CVE.R
@ -3,7 +3,7 @@
 #' Conditional Variance Estimation (CVE) is a novel sufficient dimension
 #' reduction (SDR) method for regressions satisfying \eqn{E(Y|X) = E(Y|B'X)},
 #' where \eqn{B'X} is a lower dimensional projection of the predictors and
-#' \eqn{Y} is a univariate responce. CVE,
+#' \eqn{Y} is a univariate response. CVE,
 #' similarly to its main competitor, the mean average variance estimation
 #' (MAVE), is not based on inverse regression, and does not require the
 #' restrictive linearity and constant variance conditions of moment based SDR
@ -11,7 +11,9 @@
 #' continuous predictors and link function. Let \eqn{X} be a real
 #' \eqn{p}-dimensional covariate vector. We assume that the dependence of
 #' \eqn{Y} and \eqn{X} is modelled by
 #'
 #' \deqn{Y = g(B'X) + \epsilon}
 #'
 #' where \eqn{X} is independent of \eqn{\epsilon} with positive definite
 #' variance-covariance matrix \eqn{Var(X) = \Sigma_X}. \eqn{\epsilon} is a mean
 #' zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g}
@ -20,9 +22,28 @@
 #' a real \eqn{p \times k}{p x k} matrix of rank \eqn{k \leq p}{k <= p}. 
 #' Without loss of generality \eqn{B} is assumed to be orthonormal.
 #'
 #' Further, the extended Ensemble Conditional Variance Estimation (ECVE) is
 #' implemented which is a SDR method in regressions with continuous response and
 #' predictors. ECVE applies to general non-additive error regression models.
 #'
 #' \deqn{Y = g(B'X, \epsilon)}
 #'
 #' It operates under the assumption that the predictors can be replaced by a
 #' lower dimensional projection without loss of information.It is a
 #' semiparametric forward regression model based exhaustive sufficient dimension
 #' reduction estimation method that is shown to be consistent under mild
 #' assumptions.
 #'
 #' @author Daniel Kapla, Lukas Fertl, Bura Efstathia
-#' @references Fertl, L. and Bura, E. (2019), Conditional Variance
+#'
-#' Estimation for Sufficient Dimension Reduction. Working Paper.
+#' @references
 #'    [1] Fertl, L. and Bura, E. (2021), Conditional Variance
 #'          Estimation for Sufficient Dimension Reduction.
 #'          arXiv:2102.08782
 #'
 #'    [2] Fertl, L. and Bura, E. (2021), Ensemble Conditional Variance
 #'          Estimation for Sufficient Dimension Reduction.
 #'          arXiv:2102.13435
 #'
 #' @docType package
 #' @useDynLib CVE, .registration = TRUE
@ -30,11 +51,52 @@
 #' Conditional Variance Estimator (CVE).
 #'
 #' @description
 #' This is the main function in the \code{CVE} package. It creates objects of
 #' class \code{"cve"} to estimate the mean subspace. Helper functions that
 #' require a \code{"cve"} object can then be applied to the output from this
 #' function.
 #'
 #' Conditional Variance Estimation (CVE) is a sufficient dimension reduction
 #' (SDR) method for regressions studying \eqn{E(Y|X)}, the conditional
 #' expectation of a response \eqn{Y} given a set of predictors \eqn{X}. This
 #' function provides methods for estimating the dimension and the subspace
 #' spanned by the columns of a \eqn{p\times k}{p x k} matrix \eqn{B} of minimal
 #' rank \eqn{k} such that
 #'
 #' \deqn{E(Y|X) = E(Y|B'X)}
 #'
 #' or, equivalently,
 #' 
 #' \deqn{Y = g(B'X) + \epsilon}
 #'
 #' where \eqn{X} is independent of \eqn{\epsilon} with positive definite
 #' variance-covariance matrix \eqn{Var(X) = \Sigma_X}. \eqn{\epsilon} is a mean
 #' zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g}
 #' is an unknown, continuous non-constant function, and \eqn{B = (b_1,..., b_k)}
 #' is a real \eqn{p \times k}{p x k} matrix of rank \eqn{k \leq p}{k <= p}.
 #'
 #' Both the dimension \eqn{k} and the subspace \eqn{span(B)} are unknown. The 
 #' CVE method makes very few assumptions.
 #'
 #' A kernel matrix \eqn{\hat{B}}{Bhat} is estimated such that the column space
 #' of \eqn{\hat{B}}{Bhat} should be close to the mean subspace \eqn{span(B)}.
 #' The primary output from this method is a set of orthonormal vectors,
 #' \eqn{\hat{B}}{Bhat}, whose span estimates \eqn{span(B)}.
 #'
 #' The method central implements the Ensemble Conditional Variance Estimation
 #' (ECVE) as described in [2]. It augments the CVE method by applying an
 #' ensemble of functions (parameter \code{func_list}) to the response to
 #' estimate the central subspace. This corresponds to the generalization
 #'
 #' \deqn{F(Y|X) = F(Y|B'X)}
 #'
 #' or, equivalently,
 #'
 #' \deqn{Y = g(B'X, \epsilon)}
 #'
 #' where \eqn{F} is the conditional cumulative distribution function.
 #'
 #' @param formula an object of class \code{"formula"} which is a symbolic
 #' description of the model to be fitted like \eqn{Y\sim X}{Y ~ X} where
 #' \eqn{Y} is a \eqn{n}-dimensional vector of the response variable and
@ -46,41 +108,17 @@
 #' @param method This character string specifies the method of fitting. The
 #' options are
 #' \itemize{
-#'      \item "simple" implementation,
+#'    \item \code{"mean"} method to estimate the mean subspace, see [1].
-#'      \item "weighted" variation with adaptive weighting of slices.
+#'    \item \code{"central"} ensemble method to estimate the central subspace,
 #'      see [2].
 #'    \item \code{"weighted.mean"} variation of \code{"mean"} method with
 #'      adaptive weighting of slices, see [1].
 #'    \item \code{"weighted.central"} variation of \code{"central"} method with
 #'      adaptive weighting of slices, see [2].
 #' }
 #' see Fertl, L. and Bura, E. (2019).
 #' @param max.dim upper bounds for \code{k}, (ignored if \code{k} is supplied).
-#' @param ... optional parameters passed on to \code{cve.call}.
+#' @param ... optional parameters passed on to \code{\link{cve.call}}.
 #'
 #'  
 #' Conditional Variance Estimation (CVE) is a sufficient dimension reduction
 #' (SDR) method for regressions studying \eqn{E(Y|X)}, the conditional
 #' expectation of a response \eqn{Y} given a set of predictors \eqn{X}. This
 #' function provides methods for estimating the dimension and the subspace
 #' spanned by the columns of a \eqn{p\times k}{p x k} matrix \eqn{B} of minimal
 #' rank \eqn{k} such that
 #' \deqn{%
 #'     E(Y|X) = E(Y|B'X) %
 #' }
 #' or, equivalently, 
 #' \deqn{%
 #'     Y = g(B'X) + \epsilon %
 #' }
 #' where \eqn{X} is independent of \eqn{\epsilon} with positive definite
 #' variance-covariance matrix \eqn{Var(X) = \Sigma_X}. \eqn{\epsilon} is a mean
 #' zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g}
 #' is an unknown, continuous non-constant function, and \eqn{B = (b_1,..., b_k)}
 #' is a real \eqn{p \times k}{p x k} matrix of rank \eqn{k \leq p}{k <= p}. 
 #' 
 #' Both the dimension \eqn{k} and the subspace \eqn{span(B)} are unknown. The 
 #' CVE method makes very few assumptions.
 #'
 #' A kernel matrix \eqn{\hat{B}}{Bhat} is estimated such that the column space
 #' of \eqn{\hat{B}}{Bhat} should be close to the mean subspace \eqn{span(B)}.
 #' The primary output from this method is a set of orthonormal vectors,
 #' \eqn{\hat{B}}{Bhat}, whose span estimates \eqn{span(B)}.
 #' 
 #' @return an S3 object of class \code{cve} with components:
 #' \describe{
 #'    \item{X}{design matrix of predictor vector used for calculating
@ -108,40 +146,42 @@
 #'
 #' @examples
 #' # set dimensions for simulation model
-#' p <- 8
+#' p <- 5
 #' k <- 2
 #' # create B for simulation
 #' b1 <- rep(1 / sqrt(p), p)
 #' b2 <- (-1)^seq(1, p) / sqrt(p)
 #' B <- cbind(b1, b2)
 #' # sample size
-#' n <- 200
+#' n <- 100
 #' set.seed(21)
 #'
 #' # creat predictor data x ~ N(0, I_p)
 #' x <- matrix(rnorm(n * p), n, p)
 #' # simulate response variable
 #' #     y = f(B'x) + err
 #' # with f(x1, x2) = x1^2 + 2 * x2 and err ~ N(0, 0.25^2)
-#' y <- (x %*% b1)^2 + 2 * (x %*% b2) + 0.25 * rnorm(100)
+#' y <- (x %*% b1)^2 + 2 * (x %*% b2) + 0.25 * rnorm(n)
-#' # calculate cve with method 'mean' for k unknown in 1, ..., 4
+#'
-#' cve.obj.s <- cve(y ~ x, max.dim = 4) # default method 'mean'
+#' # calculate cve with method 'mean' for k unknown in 1, ..., 3
 #' cve.obj.s <- cve(y ~ x, max.dim = 2) # default method 'mean'
 #' # calculate cve with method 'weighed' for k = 2
 #' cve.obj.w <- cve(y ~ x, k = 2, method = 'weighted.mean')
-#' # estimate dimension from cve.obj.s
+#' B2 <- coef(cve.obj.s, k = 2)
-#' khat <- predict_dim(cve.obj.s)$k
+#'
 #' # get cve-estimate for B with dimensions (p, k = khat)
 #' B2 <- coef(cve.obj.s, k = khat)
 #' # get projected X data (same as cve.obj.s$X %*% B2)
-#' proj.X <- directions(cve.obj.s, k = khat)
+#' proj.X <- directions(cve.obj.s, k = 2)
 #' #  plot y against projected data
 #' plot(proj.X[, 1], y)
 #' plot(proj.X[, 2], y)
 #'
 #' # creat 10 new x points and y according to model
 #' x.new <- matrix(rnorm(10 * p), 10, p)
 #' y.new <- (x.new %*% b1)^2 + 2 * (x.new %*% b2) + 0.25 * rnorm(10)
 #' # predict y.new
-#' yhat <- predict(cve.obj.s, x.new, khat)
+#' yhat <- predict(cve.obj.s, x.new, 2)
 #' plot(y.new, yhat)
 #'
 #' # projection matrix on span(B)
 #' # same as B %*% t(B) since B is semi-orthogonal
 #' PB <- B %*% solve(t(B) %*% B) %*% t(B)
@ -158,8 +198,15 @@
 #'
 #' @seealso For a detailed description of \code{formula} see
 #'      \code{\link{formula}}.
-#' @references Fertl, L. and Bura, E. (2019), Conditional Variance
+#'
-#' Estimation for Sufficient Dimension Reduction. Working Paper.
+#' @references
 #'    [1] Fertl, L. and Bura, E. (2021), Conditional Variance
 #'          Estimation for Sufficient Dimension Reduction.
 #'          arXiv:2102.08782
 #'
 #'    [2] Fertl, L. and Bura, E. (2021), Ensemble Conditional Variance
 #'          Estimation for Sufficient Dimension Reduction.
 #'          arXiv:2102.13435
 #'
 #' @importFrom stats model.frame
 #' @export
@ -191,19 +238,27 @@ cve <- function(formula, data, method = "mean", max.dim = 10L, ...) {
 #' @inherit cve description
 #'
 #' @param X Design predictor matrix.
-#' @param Y \eqn{n}-dimensional vector of responces.
+#' @param Y \eqn{n}-dimensional vector of responses.
 #' @param h bandwidth or function to estimate bandwidth, defaults to internaly
 #'      estimated bandwidth.
 #' @param nObs parameter for choosing bandwidth \code{h} using
 #'   \code{\link{estimate.bandwidth}} (ignored if \code{h} is supplied).
-#' @param method specifies the CVE method variation as one of
+#' @param method This character string specifies the method of fitting. The
 #' options are
 #' \itemize{
-#'      \item "simple" exact implementation as described in the paper listed 
+#'    \item \code{"mean"} method to estimate the mean subspace, see [1].
-#'          below.
+#'    \item \code{"central"} ensemble method to estimate the central subspace,
-#'      \item "weighted" variation with addaptive weighting of slices.
+#'      see [2].
 #'    \item \code{"weighted.mean"} variation of \code{"mean"} method with
 #'      adaptive weighting of slices, see [1].
 #'    \item \code{"weighted.central"} variation of \code{"central"} method with
 #'      adaptive weighting of slices, see [2].
 #' }
-#' @param func_list a list of functions applied to \code{Y} to form the ensamble
+#' @param func_list a list of functions applied to \code{Y} used by ECVE
-#'      CVE for central sub-space estimation.
+#'      (see [2]) for central subspace estimation. The default ensemble are
 #'      indicator functions of the \eqn{[0, 10], (10, 20], ..., (90, 100]}
 #'      percent response quantiles. (only relevant if \code{method} is
 #'      \code{"central"} or \code{"weighted.central"}, ignored otherwise)
 #' @param k Dimension of lower dimensional projection, if \code{k} is given
 #'      only the specified dimension \code{B} matrix is estimated.
 #' @param min.dim lower bounds for \code{k}, (ignored if \code{k} is supplied).
@ -214,6 +269,9 @@ cve <- function(formula, data, method = "mean", max.dim = 10L, ...) {
 #' @param attempts If \code{V.init} not supplied, the optimization is carried
 #'      out \code{attempts} times with starting values drawn from the invariant
 #'      measure on the Stiefel manifold (see \code{\link{rStiefel}}).
 #' @param nr.proj The number of projection used for projective resampling for
 #'      multivariate response \eqn{Y} (under active development, ignored for
 #'      univariate response).
 #' @param momentum number of \eqn{[0, 1)} giving the ration of momentum for
 #'      eucledian gradient update with a momentum term. \code{momentum = 0}
 #'      corresponds to normal gradient descend.
@ -230,6 +288,7 @@ cve <- function(formula, data, method = "mean", max.dim = 10L, ...) {
 #'      computation).
 #'
 #' @inherit cve return
 #' @inherit cve references
 #'
 #' @examples
 #' # create B for simulation (k = 1)
@ -262,7 +321,7 @@ cve.call <- function(X, Y,
    momentum = 0.0, tau = 1.0, tol = 1e-3,
    slack = 0.0, gamma = 0.5,
    V.init = NULL,
-    max.iter = 50L, attempts = 10L, nr.proj = 500L,
+    max.iter = 50L, attempts = 10L, nr.proj = 1L,
    logger = NULL
 ) {
    # Determine method with partial matching (shortcuts: "Weight" -> "weighted")
--- a/CVE/R/coef.R
+++ b/CVE/R/coef.R
@ -14,7 +14,7 @@
 #' # set dimensions for simulation model
 #' p <- 8 # sample dimension
 #' k <- 2 # real dimension of SDR subspace
-#' n <- 200 # samplesize
+#' n <- 100 # samplesize
 #' # create B for simulation
 #' b1 <- rep(1 / sqrt(p), p)
 #' b2 <- (-1)^seq(1, p) / sqrt(p)
@ -25,10 +25,10 @@
 #' x <- matrix(rnorm(n * p), n, p)
 #' # simulate response variable
 #' #    y = f(B'x) + err
-#' # with f(x1, x2) = x1^2 + 2 * x2 and err ~ N(0, 0.25^2)
+#' # with f(x1, x2) = x1^2 + 2 * x2 and err ~ N(0, 0.1^2)
-#' y <- (x %*% b1)^2 + 2 * (x %*% b2) + 0.25 * rnorm(100)
+#' y <- (x %*% b1)^2 + 2 * (x %*% b2) + 0.1 * rnorm(100)
-#' # calculate cve for k = 1, ..., 5
+#' # calculate cve for k = 2, 3
-#' cve.obj <- cve(y ~ x, max.dim = 5)
+#' cve.obj <- cve(y ~ x, min.dim = 2, max.dim = 3)
 #' # get cve-estimate for B with dimensions (p, k = 2)
 #' B2 <- coef(cve.obj, k = 2)
 #'
--- a/CVE/R/datasets.R
+++ b/CVE/R/datasets.R
@ -200,8 +200,10 @@ rlaplace <- function(n = 1, mu = 0, sd = 1) {
 #' location 0, shape-parameter 1, and the scale-parameter is chosen such that
 #' \eqn{Var(\epsilon) = 0.25}.
 #'
-#' @references Fertl, L. and Bura, E. (2019), Conditional Variance
+#' @references
-#' Estimation for Sufficient Dimension Reduction. Working Paper.
+#'    Fertl, L. and Bura, E. (2021), Conditional Variance
 #'          Estimation for Sufficient Dimension Reduction.
 #'          arXiv:2102.08782
 #'
 #' @import stats
 #' @importFrom stats rnorm rbinom
--- a/CVE/R/plot.R
+++ b/CVE/R/plot.R
@ -1,62 +0,0 @@
 #' Elbow plot of the loss function.
 #'
 #' Boxplots of the output \code{L} from \code{\link{cve}} over \code{k} from
 #' \code{min.dim} to \code{max.dim}. For given \code{k}, \code{L} corresponds
 #' to \eqn{L_n(V, X_i)} where \eqn{V} is the minimizer of \eqn{L_n(V)} where
 #' \eqn{V} is an element of a Stiefel manifold (see
 #' Fertl, L. and Bura, E. (2019)).
 #'
 #' @param x an object of class \code{"cve"}, usually, a result of a call to
 #'          \code{\link{cve}} or \code{\link{cve.call}}.
 #' @param ... Pass through parameters to [\code{\link{plot}}] and
 #'      [\code{\link{lines}}]
 #'
 #' @examples
 #' # create B for simulation
 #' B <- cbind(rep(1, 6), (-1)^seq(6)) / sqrt(6)
 #' 
 #' set.seed(21)
 #' # creat predictor data x ~ N(0, I_p)
 #' X <- matrix(rnorm(600), 100)
 #' 
 #' # simulate response variable
 #' #    y = f(B'x) + err
 #' # with f(x1, x2) = x1^2 + 2 x2 and err ~ N(0, 0.25^2)
 #' Y <- (X %*% B[, 1])^2 + 2 * X %*% B[, 2] + rnorm(100, 0, .1)
 #'
 #' # Create bandwidth estimation function
 #' estimate.bandwidth <- function(X, k, nObs) {
 #'     n <- nrow(X)
 #'     p <- ncol(X)
 #'     X_c <- scale(X, center = TRUE, scale = FALSE)
 #'     2 * qchisq((nObs - 1) / (n - 1), k) * sum(X_c^2) / (n * p)
 #' }
 #' # calculate cve with method 'simple' for k = min.dim,...,max.dim
 #' cve.obj.simple <- cve(Y ~ X, h = estimate.bandwidth, nObs = sqrt(nrow(X)))
 #' 
 #' # elbow plot
 #' plot(cve.obj.simple)
 #'
 #' @references Fertl, L. and Bura, E. (2019), Conditional Variance
 #' Estimation for Sufficient Dimension Reduction. Working Paper.
 #'
 #' @seealso see \code{\link{par}} for graphical parameters to pass through
 #'      as well as \code{\link{plot}}, the standard plot utility.
 #' @method plot cve
 #' @importFrom graphics plot lines points boxplot
 #' @export
 plot.cve <- function(x, ...) {
    L <- c()
    k <- c()
    for (dr.k in x$res) {
        if (class(dr.k) == 'cve.k') {
            k <- c(k, as.character(dr.k$k))
            L <- c(L, dr.k$L)
        }
    }
    L <- matrix(L, ncol = length(k)) / var(x$Y)
    boxplot(L, main = "elbow plot",
            xlab = "SDR dimension",
            ylab = "Sample loss distribution",
            names = k)
 }
--- a/CVE/R/predict.R
+++ b/CVE/R/predict.R
@ -1,7 +1,7 @@
 #' Predict method for CVE Fits.
 #'
 #' Predict response using projected data. The forward model \eqn{g(B' X)} is
-#' estimated with \code{\link{mars}} in the \code{\pkg{mda}} package.
+#' estimated with \code{\link{mars}} in the \pkg{mda} package.
 #'
 #' @param object an object of class \code{"cve"}, usually, a result of a call to
 #'          \code{\link{cve}} or \code{\link{cve.call}}.
--- a/CVE/inst/doc/CVE_paper.pdf
+++ b/CVE/inst/doc/CVE_paper.pdf
--- a/CVE/man/CVE-package.Rd
+++ b/CVE/man/CVE-package.Rd
@ -9,7 +9,7 @@
 Conditional Variance Estimation (CVE) is a novel sufficient dimension
 reduction (SDR) method for regressions satisfying \eqn{E(Y|X) = E(Y|B'X)},
 where \eqn{B'X} is a lower dimensional projection of the predictors and
-\eqn{Y} is a univariate responce. CVE,
+\eqn{Y} is a univariate response. CVE,
 similarly to its main competitor, the mean average variance estimation
 (MAVE), is not based on inverse regression, and does not require the
 restrictive linearity and constant variance conditions of moment based SDR
@ -17,7 +17,10 @@ methods. CVE is data-driven and applies to additive error regressions with
 continuous predictors and link function. Let \eqn{X} be a real
 \eqn{p}-dimensional covariate vector. We assume that the dependence of
 \eqn{Y} and \eqn{X} is modelled by
 }
 \details{
 \deqn{Y = g(B'X) + \epsilon}
 where \eqn{X} is independent of \eqn{\epsilon} with positive definite
 variance-covariance matrix \eqn{Var(X) = \Sigma_X}. \eqn{\epsilon} is a mean
 zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g}
@ -25,10 +28,27 @@ is an unknown, continuous non-constant function,
 and \eqn{B = (b_1, ..., b_k)} is
 a real \eqn{p \times k}{p x k} matrix of rank \eqn{k \leq p}{k <= p}. 
 Without loss of generality \eqn{B} is assumed to be orthonormal.
 Further, the extended Ensemble Conditional Variance Estimation (ECVE) is
 implemented which is a SDR method in regressions with continuous response and
 predictors. ECVE applies to general non-additive error regression models.
 \deqn{Y = g(B'X, \epsilon)}
 It operates under the assumption that the predictors can be replaced by a
 lower dimensional projection without loss of information.It is a
 semiparametric forward regression model based exhaustive sufficient dimension
 reduction estimation method that is shown to be consistent under mild
 assumptions.
 }
 \references{
-Fertl, L. and Bura, E. (2019), Conditional Variance
+[1] Fertl, L. and Bura, E. (2021), Conditional Variance
-Estimation for Sufficient Dimension Reduction. Working Paper.
+         Estimation for Sufficient Dimension Reduction.
         arXiv:2102.08782
   [2] Fertl, L. and Bura, E. (2021), Ensemble Conditional Variance
         Estimation for Sufficient Dimension Reduction.
         arXiv:2102.13435
 }
 \author{
 Daniel Kapla, Lukas Fertl, Bura Efstathia
--- a/CVE/man/coef.cve.Rd
+++ b/CVE/man/coef.cve.Rd
@ -25,7 +25,7 @@ cve-estimate of \eqn{B} with dimension \eqn{p\times k}{p x k}.
 # set dimensions for simulation model
 p <- 8 # sample dimension
 k <- 2 # real dimension of SDR subspace
-n <- 200 # samplesize
+n <- 100 # samplesize
 # create B for simulation
 b1 <- rep(1 / sqrt(p), p)
 b2 <- (-1)^seq(1, p) / sqrt(p)
@ -36,10 +36,10 @@ set.seed(21)
 x <- matrix(rnorm(n * p), n, p)
 # simulate response variable
 #    y = f(B'x) + err
-# with f(x1, x2) = x1^2 + 2 * x2 and err ~ N(0, 0.25^2)
+# with f(x1, x2) = x1^2 + 2 * x2 and err ~ N(0, 0.1^2)
-y <- (x \%*\% b1)^2 + 2 * (x \%*\% b2) + 0.25 * rnorm(100)
+y <- (x \%*\% b1)^2 + 2 * (x \%*\% b2) + 0.1 * rnorm(100)
-# calculate cve for k = 1, ..., 5
+# calculate cve for k = 2, 3
-cve.obj <- cve(y ~ x, max.dim = 5)
+cve.obj <- cve(y ~ x, min.dim = 2, max.dim = 3)
 # get cve-estimate for B with dimensions (p, k = 2)
 B2 <- coef(cve.obj, k = 2)
--- a/CVE/man/cve.Rd
+++ b/CVE/man/cve.Rd
@ -4,7 +4,7 @@
 \alias{cve}
 \title{Conditional Variance Estimator (CVE).}
 \usage{
-cve(formula, data, method = "simple", max.dim = 10L, ...)
+cve(formula, data, method = "mean", max.dim = 10L, ...)
 }
 \arguments{
 \item{formula}{an object of class \code{"formula"} which is a symbolic
@ -20,42 +20,17 @@ the environment from which \code{cve} is called.}
 \item{method}{This character string specifies the method of fitting. The
 options are
 \itemize{
-     \item "simple" implementation,
+   \item "mean" method to estimate the mean subspace, see [1].
-     \item "weighted" variation with adaptive weighting of slices.
+   \item "central" ensemble method to estimate the central subspace, see [2].
-}
+   \item "weighted.mean" variation of `"mean"` method with adaptive weighting
-see Fertl, L. and Bura, E. (2019).}
+     of slices, see [1].
   \item "weighted.central" variation of `"central"` method with adaptive
     weighting of slices, see [2].
 }}
 \item{max.dim}{upper bounds for \code{k}, (ignored if \code{k} is supplied).}
-\item{...}{optional parameters passed on to \code{cve.call}.
+\item{...}{optional parameters passed on to \code{\link{cve.call}}.}
 Conditional Variance Estimation (CVE) is a sufficient dimension reduction
 (SDR) method for regressions studying \eqn{E(Y|X)}, the conditional
 expectation of a response \eqn{Y} given a set of predictors \eqn{X}. This
 function provides methods for estimating the dimension and the subspace
 spanned by the columns of a \eqn{p\times k}{p x k} matrix \eqn{B} of minimal
 rank \eqn{k} such that
 \deqn{%
    E(Y|X) = E(Y|B'X) %
 }
 or, equivalently, 
 \deqn{%
    Y = g(B'X) + \epsilon %
 }
 where \eqn{X} is independent of \eqn{\epsilon} with positive definite
 variance-covariance matrix \eqn{Var(X) = \Sigma_X}. \eqn{\epsilon} is a mean
 zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g}
 is an unknown, continuous non-constant function, and \eqn{B = (b_1,..., b_k)}
 is a real \eqn{p \times k}{p x k} matrix of rank \eqn{k \leq p}{k <= p}. 
 Both the dimension \eqn{k} and the subspace \eqn{span(B)} are unknown. The 
 CVE method makes very few assumptions.
 A kernel matrix \eqn{\hat{B}}{Bhat} is estimated such that the column space
 of \eqn{\hat{B}}{Bhat} should be close to the mean subspace \eqn{span(B)}.
 The primary output from this method is a set of orthonormal vectors,
 \eqn{\hat{B}}{Bhat}, whose span estimates \eqn{span(B)}.}
 }
 \value{
 an S3 object of class \code{cve} with components:
@ -88,61 +63,108 @@ This is the main function in the \code{CVE} package. It creates objects of
 class \code{"cve"} to estimate the mean subspace. Helper functions that
 require a \code{"cve"} object can then be applied to the output from this
 function.
 Conditional Variance Estimation (CVE) is a sufficient dimension reduction
 (SDR) method for regressions studying \eqn{E(Y|X)}, the conditional
 expectation of a response \eqn{Y} given a set of predictors \eqn{X}. This
 function provides methods for estimating the dimension and the subspace
 spanned by the columns of a \eqn{p\times k}{p x k} matrix \eqn{B} of minimal
 rank \eqn{k} such that
 \deqn{E(Y|X) = E(Y|B'X)}
 or, equivalently,
 \deqn{Y = g(B'X) + \epsilon}
 where \eqn{X} is independent of \eqn{\epsilon} with positive definite
 variance-covariance matrix \eqn{Var(X) = \Sigma_X}. \eqn{\epsilon} is a mean
 zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g}
 is an unknown, continuous non-constant function, and \eqn{B = (b_1,..., b_k)}
 is a real \eqn{p \times k}{p x k} matrix of rank \eqn{k \leq p}{k <= p}.
 Both the dimension \eqn{k} and the subspace \eqn{span(B)} are unknown. The 
 CVE method makes very few assumptions.
 A kernel matrix \eqn{\hat{B}}{Bhat} is estimated such that the column space
 of \eqn{\hat{B}}{Bhat} should be close to the mean subspace \eqn{span(B)}.
 The primary output from this method is a set of orthonormal vectors,
 \eqn{\hat{B}}{Bhat}, whose span estimates \eqn{span(B)}.
 The method central implements the Ensemble Conditional Variance Estimation
 (ECVE) as described in [2]. It augments the CVE method by applying an
 ensemble of functions (parameter \code{func_list}) to the response to
 estimate the central subspace. This corresponds to the generalization
 \deqn{F(Y|X) = F(Y|B'X)}
 or, equivalently, 
 \deqn{Y = g(B'X, \epsilon)}
 where \eqn{F} is the conditional cumulative distribution function.
 }
 \examples{
 # set dimensions for simulation model
-p <- 8
+p <- 5
 k <- 2
 # create B for simulation
 b1 <- rep(1 / sqrt(p), p)
 b2 <- (-1)^seq(1, p) / sqrt(p)
 B <- cbind(b1, b2)
 # sample size
-n <- 200
+n <- 100
 set.seed(21)
 # creat predictor data x ~ N(0, I_p)
 x <- matrix(rnorm(n * p), n, p)
 # simulate response variable
 #     y = f(B'x) + err
 # with f(x1, x2) = x1^2 + 2 * x2 and err ~ N(0, 0.25^2)
-y <- (x \%*\% b1)^2 + 2 * (x \%*\% b2) + 0.25 * rnorm(100)
+y <- (x \%*\% b1)^2 + 2 * (x \%*\% b2) + 0.25 * rnorm(n)
-# calculate cve with method 'simple' for k unknown in 1, ..., 4
+
-cve.obj.s <- cve(y ~ x, max.dim = 4) # default method 'simple'
+# calculate cve with method 'mean' for k unknown in 1, ..., 3
 cve.obj.s <- cve(y ~ x, max.dim = 2) # default method 'mean'
 # calculate cve with method 'weighed' for k = 2
 cve.obj.w <- cve(y ~ x, k = 2, method = 'weighted.mean')
-# estimate dimension from cve.obj.s
+B2 <- coef(cve.obj.s, k = 2)
-khat <- predict_dim(cve.obj.s)$k
+
 # get cve-estimate for B with dimensions (p, k = khat)
 B2 <- coef(cve.obj.s, k = khat)
 # get projected X data (same as cve.obj.s$X \%*\% B2)
-proj.X <- directions(cve.obj.s, k = khat)
+proj.X <- directions(cve.obj.s, k = 2)
 #  plot y against projected data
 plot(proj.X[, 1], y)
 plot(proj.X[, 2], y)
 # creat 10 new x points and y according to model
 x.new <- matrix(rnorm(10 * p), 10, p)
 y.new <- (x.new \%*\% b1)^2 + 2 * (x.new \%*\% b2) + 0.25 * rnorm(10)
 # predict y.new
-yhat <- predict(cve.obj.s, x.new, khat)
+yhat <- predict(cve.obj.s, x.new, 2)
 plot(y.new, yhat)
 # projection matrix on span(B)
 # same as B \%*\% t(B) since B is semi-orthogonal
 PB <- B \%*\% solve(t(B) \%*\% B) \%*\% t(B)
-# cve estimates for B with simple and weighted method
+# cve estimates for B with mean and weighted method
 B.s <- coef(cve.obj.s, k = 2)
 B.w <- coef(cve.obj.w, k = 2)
 # same as B.s \%*\% t(B.s) since B.s is semi-orthogonal (same vor B.w)
 PB.s <- B.s \%*\% solve(t(B.s) \%*\% B.s) \%*\% t(B.s)
 PB.w <- B.w \%*\% solve(t(B.w) \%*\% B.w) \%*\% t(B.w)
-# compare estimation accuracy of simple and weighted cve estimate by
+# compare estimation accuracy of mean and weighted cve estimate by
 # Frobenius norm of difference of projections.
 norm(PB - PB.s, type = 'F')
 norm(PB - PB.w, type = 'F')
 }
 \references{
-Fertl, L. and Bura, E. (2019), Conditional Variance
+[1] Fertl, L. and Bura, E. (2021), Conditional Variance
-Estimation for Sufficient Dimension Reduction. Working Paper.
+         Estimation for Sufficient Dimension Reduction.
         arXiv:2102.08782
   [2] Fertl, L. and Bura, E. (2021), Ensemble Conditional Variance
         Estimation for Sufficient Dimension Reduction.
         arXiv:2102.13435
 }
 \seealso{
 For a detailed description of \code{formula} see
--- a/CVE/man/cve.call.Rd
+++ b/CVE/man/cve.call.Rd
@ -7,8 +7,8 @@
 cve.call(
  X,
  Y,
-  method = "simple",
+  method = c("mean", "weighted.mean", "central", "weighted.central"),
-  func_list = list(function(x) x),
+  func_list = NULL,
  nObs = sqrt(nrow(X)),
  h = NULL,
  min.dim = 1L,
@ -22,23 +22,31 @@ cve.call(
  V.init = NULL,
  max.iter = 50L,
  attempts = 10L,
  nr.proj = 1L,
  logger = NULL
 )
 }
 \arguments{
 \item{X}{Design predictor matrix.}
-\item{Y}{\eqn{n}-dimensional vector of responces.}
+\item{Y}{\eqn{n}-dimensional vector of responses.}
-\item{method}{specifies the CVE method variation as one of
+\item{method}{This character string specifies the method of fitting. The
 options are
 \itemize{
-     \item "simple" exact implementation as described in the paper listed 
+   \item "mean" method to estimate the mean subspace, see [1].
-         below.
+   \item "central" ensemble method to estimate the central subspace, see [2].
-     \item "weighted" variation with addaptive weighting of slices.
+   \item "weighted.mean" variation of `"mean"` method with adaptive weighting
     of slices, see [1].
   \item "weighted.central" variation of `"central"` method with adaptive
     weighting of slices, see [2].
 }}
-\item{func_list}{a list of functions applied to `Y` to form the ensamble
+\item{func_list}{a list of functions applied to \code{Y} used by ECVE
-CVE for central sub-space estimation.}
+(see [2]) for central subspace estimation. The default ensemble are
 indicator functions of the \eqn{[0, 10], (10, 20], ..., (90, 100]}
 percent response quantiles. (only relevant if \code{method} is
 \code{"central"} or \code{"weighted.central"}, ignored otherwise)}
 \item{nObs}{parameter for choosing bandwidth \code{h} using
 \code{\link{estimate.bandwidth}} (ignored if \code{h} is supplied).}
@ -79,6 +87,10 @@ used as starting value in the optimization. (If supplied,
 out \code{attempts} times with starting values drawn from the invariant
 measure on the Stiefel manifold (see \code{\link{rStiefel}}).}
 \item{nr.proj}{The number of projection used for projective resampling for
 multivariate response \eqn{Y} (under active development, ignored for
 univariate response).}
 \item{logger}{a logger function (only for advanced users, slows down the
 computation).}
 }
@ -113,6 +125,46 @@ This is the main function in the \code{CVE} package. It creates objects of
 class \code{"cve"} to estimate the mean subspace. Helper functions that
 require a \code{"cve"} object can then be applied to the output from this
 function.
 Conditional Variance Estimation (CVE) is a sufficient dimension reduction
 (SDR) method for regressions studying \eqn{E(Y|X)}, the conditional
 expectation of a response \eqn{Y} given a set of predictors \eqn{X}. This
 function provides methods for estimating the dimension and the subspace
 spanned by the columns of a \eqn{p\times k}{p x k} matrix \eqn{B} of minimal
 rank \eqn{k} such that
 \deqn{E(Y|X) = E(Y|B'X)}
 or, equivalently,
 \deqn{Y = g(B'X) + \epsilon}
 where \eqn{X} is independent of \eqn{\epsilon} with positive definite
 variance-covariance matrix \eqn{Var(X) = \Sigma_X}. \eqn{\epsilon} is a mean
 zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g}
 is an unknown, continuous non-constant function, and \eqn{B = (b_1,..., b_k)}
 is a real \eqn{p \times k}{p x k} matrix of rank \eqn{k \leq p}{k <= p}.
 Both the dimension \eqn{k} and the subspace \eqn{span(B)} are unknown. The 
 CVE method makes very few assumptions.
 A kernel matrix \eqn{\hat{B}}{Bhat} is estimated such that the column space
 of \eqn{\hat{B}}{Bhat} should be close to the mean subspace \eqn{span(B)}.
 The primary output from this method is a set of orthonormal vectors,
 \eqn{\hat{B}}{Bhat}, whose span estimates \eqn{span(B)}.
 The method central implements the Ensemble Conditional Variance Estimation
 (ECVE) as described in [2]. It augments the CVE method by applying an
 ensemble of functions (parameter \code{func_list}) to the response to
 estimate the central subspace. This corresponds to the generalization
 \deqn{F(Y|X) = F(Y|B'X)}
 or, equivalently, 
 \deqn{Y = g(B'X, \epsilon)}
 where \eqn{F} is the conditional cumulative distribution function.
 }
 \examples{
 # create B for simulation (k = 1)
@ -138,3 +190,12 @@ cve.obj.simple2 <- cve.call(X, Y, k = 1)
 coef(cve.obj.simple1, k = 1)
 coef(cve.obj.simple2, k = 1)
 }
 \references{
 [1] Fertl, L. and Bura, E. (2021), Conditional Variance
         Estimation for Sufficient Dimension Reduction.
         arXiv:2102.08782
   [2] Fertl, L. and Bura, E. (2021), Ensemble Conditional Variance
         Estimation for Sufficient Dimension Reduction.
         arXiv:2102.13435
 }
--- a/CVE/man/dataset.Rd
+++ b/CVE/man/dataset.Rd
@ -122,6 +122,7 @@ location 0, shape-parameter 1, and the scale-parameter is chosen such that
 }
 \references{
-Fertl, L. and Bura, E. (2019), Conditional Variance
+Fertl, L. and Bura, E. (2021), Conditional Variance
-Estimation for Sufficient Dimension Reduction. Working Paper.
+         Estimation for Sufficient Dimension Reduction.
         arXiv:2102.08782
 }
--- a/CVE/man/directions.cve.Rd
+++ b/CVE/man/directions.cve.Rd
@ -33,11 +33,11 @@ x <- matrix(rnorm(500), 100, 5)
 #    y = f(B'x) + err
 # with f(x1) = x1 and err ~ N(0, 0.25^2)
 y <- x \%*\% B + 0.25 * rnorm(100)
-# calculate cve with method 'simple' for k = 1
+# calculate cve with method 'mean' for k = 1
 set.seed(21)
-cve.obj.simple <- cve(y ~ x, k = 1, method = 'mean')
+cve.obj.mean <- cve(y ~ x, k = 1, method = 'mean')
 # get projected data for k = 1
-x.proj <- directions(cve.obj.simple, k = 1)
+x.proj <- directions(cve.obj.mean, k = 1)
 # plot y against projected data
 plot(x.proj, y)
--- a/CVE/man/plot.cve.Rd
+++ b/CVE/man/plot.cve.Rd
@ -1,57 +0,0 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/plot.R
 \name{plot.cve}
 \alias{plot.cve}
 \title{Elbow plot of the loss function.}
 \usage{
 \method{plot}{cve}(x, ...)
 }
 \arguments{
 \item{x}{an object of class \code{"cve"}, usually, a result of a call to
 \code{\link{cve}} or \code{\link{cve.call}}.}
 \item{...}{Pass through parameters to [\code{\link{plot}}] and
 [\code{\link{lines}}]}
 }
 \description{
 Boxplots of the output \code{L} from \code{\link{cve}} over \code{k} from
 \code{min.dim} to \code{max.dim}. For given \code{k}, \code{L} corresponds
 to \eqn{L_n(V, X_i)} where \eqn{V} is the minimizer of \eqn{L_n(V)} where
 \eqn{V} is an element of a Stiefel manifold (see
 Fertl, L. and Bura, E. (2019)).
 }
 \examples{
 # create B for simulation
 B <- cbind(rep(1, 6), (-1)^seq(6)) / sqrt(6)
 set.seed(21)
 # creat predictor data x ~ N(0, I_p)
 X <- matrix(rnorm(600), 100)
 # simulate response variable
 #    y = f(B'x) + err
 # with f(x1, x2) = x1^2 + 2 x2 and err ~ N(0, 0.25^2)
 Y <- (X \%*\% B[, 1])^2 + 2 * X \%*\% B[, 2] + rnorm(100, 0, .1)
 # Create bandwidth estimation function
 estimate.bandwidth <- function(X, k, nObs) {
    n <- nrow(X)
    p <- ncol(X)
    X_c <- scale(X, center = TRUE, scale = FALSE)
    2 * qchisq((nObs - 1) / (n - 1), k) * sum(X_c^2) / (n * p)
 }
 # calculate cve with method 'simple' for k = min.dim,...,max.dim
 cve.obj.simple <- cve(Y ~ X, h = estimate.bandwidth, nObs = sqrt(nrow(X)))
 # elbow plot
 plot(cve.obj.simple)
 }
 \references{
 Fertl, L. and Bura, E. (2019), Conditional Variance
 Estimation for Sufficient Dimension Reduction. Working Paper.
 }
 \seealso{
 see \code{\link{par}} for graphical parameters to pass through
     as well as \code{\link{plot}}, the standard plot utility.
 }
--- a/CVE/man/predict.cve.Rd
+++ b/CVE/man/predict.cve.Rd
@ -21,7 +21,7 @@ prediced respone(s) for \code{newdata}.
 }
 \description{
 Predict response using projected data. The forward model \eqn{g(B' X)} is
-estimated with \code{\link{mars}} in the \code{\pkg{mda}} package.
+estimated with \code{\link{mars}} in the \pkg{mda} package.
 }
 \examples{
 # create B for simulation
--- a/CVE/man/predict_dim.Rd
+++ b/CVE/man/predict_dim.Rd
@ -51,7 +51,7 @@ x <- matrix(rnorm(500), 100)
 y <- x \%*\% B + 0.25 * rnorm(100)
 # Calculate cve for unknown k between min.dim and max.dim.
-cve.obj.simple <- cve(y ~ x) 
+cve.obj.simple <- cve(y ~ x)
 predict_dim(cve.obj.simple)
--- a/cve_tensorflow.R
+++ b/cve_tensorflow.R
@ -1,235 +0,0 @@
 library(CVE)
 library(reticulate)
 library(tensorflow)
 #' Null space basis of given matrix `V`
 #'
 #' @param V `(p, q)` matrix
 #' @return Semi-orthogonal `(p, p - q)` matrix spaning the null space of `V`.
 #' @keywords internal
 #' @export
 null <- function(V) {
    tmp <- qr(V)
    set <- if(tmp$rank == 0L) seq_len(ncol(V)) else -seq_len(tmp$rank)
    return(qr.Q(tmp, complete = TRUE)[, set, drop = FALSE])
 }
 subspace_dist <- function(A, B) {
    P <- A %*% solve(t(A) %*% A, t(A))
    Q <- B %*% solve(t(B) %*% B, t(B))
    norm(P - Q, 'F') / sqrt(ncol(A) + ncol(B))
 }
 estimate.bandwidth <- function (X, k, nObs = sqrt(nrow(X)), version = 1L) {
    n <- nrow(X)
    p <- ncol(X)
    X_c <- scale(X, center = TRUE, scale = FALSE)
    if (version == 1) {
        (2 * sum(X_c^2) / (n * p)) * (1.2 * n^(-1 / (4 + k)))^2
    } else if (version == 2) {
        2 * qchisq((nObs - 1) / (n - 1), k) * sum(X_c^2) / (n * p)
    } else {
        stop("Unknown version.")
    }
 }
 tf_Variable <- function(obj, dtype = "float32", ...) {
    tf$Variable(obj, dtype = dtype, ...)
 }
 tf_constant <- function(obj, dtype = "float32", ...) {
    tf$constant(obj, dtype = dtype, ...)
 }
 cve.tf <- function(X, Y, k, h = estimate.bandwidth(X, k, sqrt(nrow(X))),
    V.init = NULL, optimizer_initialier = tf$optimizers$RMSprop, attempts = 10L,
    nr.projections = nrow(X)^(3 / 2),
    sd_noise = 0, method = c("simple", "weighted")
 ) {
    method <- match.arg(method)
    `-0.5` <- tf_constant(-0.5)
    `1` <- tf_constant(1)
    `2` <- tf_constant(2)
    n <- nrow(X)
    p <- ncol(X)
    k <- as.integer(k)
    q <- p - k
    if (!is.matrix(Y))
        Y <- as.matrix(Y)
    # Projective resampling.
    if (ncol(Y) > 1L) {
        R <- matrix(rnorm(ncol(Y) * nr.projections), ncol(Y))
        R <- t(t(R) / sqrt(colSums(R^2)))
        Y <- Y %*% R
    }
    X <- tf_constant(scale(X))
    Y <- tf_constant(scale(Y))
    I <- tf_constant(diag(1, p))
    h <- tf_Variable(h)
    loss <- tf_function(function(V) {
        Q <- I - tf$matmul(V, V, transpose_b = TRUE)
        if (sd_noise > 0)
            XQ <- tf$matmul(X + tf$random$normal(list(n, p), stddev = sd_noise), Q)
        else
            XQ <- tf$matmul(X, Q)
        S <- tf$matmul(XQ, XQ, transpose_b = TRUE)
        d <- tf$linalg$diag_part(S)
        D <- tf$reshape(d, list(n, 1L)) + tf$reshape(d, list(1L, n)) - `2` * S
        K <- tf$exp((`-0.5` / h) * tf$pow(D, 2L))
        w <- tf$reduce_sum(K, 1L, keepdims = TRUE)
        y1 <- tf$divide(tf$matmul(K, Y), w)
        y2 <- tf$divide(tf$matmul(K, tf$pow(Y, 2L)), w)
        if (method == "simple") {
            l <- tf$reduce_mean(y2 - tf$pow(y1, 2L))
        } else {# weighted
            w <- w - `1`
            w <- w / tf$reduce_sum(w)
            l <- tf$reduce_sum(w * (y2 - tf$pow(y1, 2L)))
            l <- l / tf$cast(tf$shape(Y)[2], "float32")
        }
        l
    })
    if (is.null(V.init))
        V.init <- qr.Q(qr(matrix(rnorm(p * q), p, q)))
    else
        attempts <- 1L
    V <- tf_Variable(V.init, constraint = function(w) { tf$linalg$qr(w)$q })
    min.loss <- Inf
    for (attempt in seq_len(attempts)) {
        optimizer = optimizer_initialier()
        out <- tf$while_loop(
            cond = tf_function(function(i, L) i < 400L),
            body = tf_function(function(i, L) {
                with(tf$GradientTape() %as% tape, {
                    tape$watch(V)
                    L <- loss(V)
                })
                grad <- tape$gradient(L, V)
                optimizer$apply_gradients(list(list(grad, V)))
                list(i + 1L, L)
            }),
            loop_vars = list(tf_constant(0L, "int32"), tf_constant(Inf))
        )
        if (as.numeric(out[[2]]) < min.loss) {
            min.loss <- as.numeric(out[[2]])
            min.V <- as.matrix(V)
        }
        V$assign(qr.Q(qr(matrix(rnorm(p * q), p, q))))
    }
    list(B = null(min.V), V = min.V, loss = min.loss)
 }
 # ds <- dataset(1)
 # out <- cve.call2(ds$X, ds$Y, ncol(ds$B))
 plot.sim <- function(sim) {
    name <- deparse(substitute(sim))
    ssd <- sapply(sim, function(s) subspace_dist(s$B.true, s$B.est))
    print(summary(ssd))
    h <- hist(ssd, freq = FALSE, breaks = seq(0, 1, 0.1), main = name,
              xlab = "Subspace Distance")
    lines(density(ssd, from = 0, to = 1))
    stat <- c(Median = median(ssd), Mean = mean(ssd))
    abline(v = stat, lty = 2)
    text(stat, max(h$density), names(stat),
         pos = if(diff(stat) > 0) c("2", "4") else c("4", "2"))
 }
 multivariate.dataset <- function(dataset = 1, n = 100, p = 6, q = 4) {
    CVE <- getNamespace('CVE')
    X <- matrix(rnorm(n * p), n, p)
    if (dataset == 1) {
        Delta <- diag(1, q, q)
        Delta[1, 2] <- Delta[2, 1] <- -0.5
        epsilon <- CVE$rmvnorm(n, sigma = Delta)
        B <- matrix(0, p, q)
        B[1, 1] <- 1
        B[2, 2] <- 2 / sqrt(5)
        B[3, 2] <- 1 / sqrt(5)
        Y <- X %*% B + epsilon
        B <- B[, 1:2]
    }
    if (dataset == 2) {
        B <- matrix(c(0.8, 0.6, 0, 0, 0, 0))
        eps <- matrix(0, n, q)
        Delta <- diag(1, q, q)
        for(i in 1:n) {
        Delta[1, 2] <- Delta[2, 1] <- sin(X[i, ] %*% B)
        eps[i, ] <- CVE$rmvnorm(1, sigma = Delta)
        }
        Y<-cbind(exp(eps[, 1]), eps[, 2:4])
    }
    list(X = X, Y = Y, B = B)
 }
 set.seed(42)
 reps <- 5L
 sim.cve.m <- vector("list", reps)
 sim.cve.c <- vector("list", reps)
 sim.cve.wm <- vector("list", reps)
 sim.cve.wc <- vector("list", reps)
 sim.tf1  <- vector("list", reps)
 sim.tf2  <- vector("list", reps)
 start <- Sys.time()
 for (i in 1:reps) {
    # ds <- dataset(1)
    ds <- multivariate.dataset(2, n = 400)
    sim.cve.m[[i]] <- list(
        B.est = coef(CVE::cve.call(ds$X, ds$Y, k = ncol(ds$B), method = "mean"), ncol(ds$B)),
        B.true = ds$B
    )
    sim.cve.c[[i]] <- list(
        B.est = coef(CVE::cve.call(ds$X, ds$Y, k = ncol(ds$B), method = "central"), ncol(ds$B)),
        B.true = ds$B
    )
    sim.cve.wm[[i]] <- list(
        B.est = coef(CVE::cve.call(ds$X, ds$Y, k = ncol(ds$B), method = "weighted.mean"), ncol(ds$B)),
        B.true = ds$B
    )
    sim.cve.wc[[i]] <- list(
        B.est = coef(CVE::cve.call(ds$X, ds$Y, k = ncol(ds$B), method = "weighted.central"), ncol(ds$B)),
        B.true = ds$B
    )
    # sim.tf1[[i]] <- list(
    #     B.est = cve.tf(ds$X, ds$Y, ncol(ds$B),
    #         optimizer_initialier = tf$optimizers$Adam)$B,
    #     B.true = ds$B
    # )
    # sim.tf2[[i]] <- list(
    #     B.est = cve.tf(ds$X, ds$Y, ncol(ds$B),
    #         optimizer_initialier = tf$optimizers$Adam,
    #         method = "weighted")$B,
    #     B.true = ds$B
    # )
    cat(sprintf("\r%4d/%d -", i, reps), format(Sys.time() - start), '\n')
 }
 # pdf('subspace_comp.pdf')
 plot.sim(sim.cve)
 plot.sim(sim.tf1)
 plot.sim(sim.tf2)
 par(mfrow = c(2, 2))
 plot.sim(sim.cve.m)
 plot.sim(sim.cve.c)
 plot.sim(sim.cve.wm)
 plot.sim(sim.cve.wc)
 # dev.off()