add: CVE,
add: R document files, add: CVE_paper, add: package-doc, fix: doc typo
This commit is contained in:
		
							parent
							
								
									4bc9ca2f58
								
							
						
					
					
						commit
						095e463463
					
				| @ -3,14 +3,8 @@ | |||||||
| export(cve) | export(cve) | ||||||
| export(cve_cpp) | export(cve_cpp) | ||||||
| export(dataset) | export(dataset) | ||||||
| export(estimate.bandwidth) | export(estimateBandwidth) | ||||||
| export(index_test) |  | ||||||
| export(kron_test) |  | ||||||
| export(rStiefel) | export(rStiefel) | ||||||
| export(test1) |  | ||||||
| export(test2) |  | ||||||
| export(test3) |  | ||||||
| export(test4) |  | ||||||
| import(Rcpp) | import(Rcpp) | ||||||
| importFrom(Rcpp,evalCpp) | importFrom(Rcpp,evalCpp) | ||||||
| useDynLib(CVE) | useDynLib(CVE) | ||||||
|  | |||||||
							
								
								
									
										44
									
								
								CVE/R/CVE.R
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										44
									
								
								CVE/R/CVE.R
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,44 @@ | |||||||
|  | #' Conditional Variance Estimator | ||||||
|  | #' | ||||||
|  | #' Conditional Variance Estimator (CVE) is a novel sufficient dimension | ||||||
|  | #'  reduction (SDR) method for regressions satisfying E(Y|X) = E(Y|B'X), | ||||||
|  | #'  where B'X is a lower dimensional projection of the predictors. | ||||||
|  | #' | ||||||
|  | #' @param X A matrix of type numeric of dimensions N times dim where N is the number | ||||||
|  | #'  of entries with dim as data dimension. | ||||||
|  | #' @param Y A vector of type numeric of length N (coresponds to \code{x}). | ||||||
|  | #' @param k Guess for rank(B). | ||||||
|  | #' @param nObs As describet in the paper. | ||||||
|  | #'   | ||||||
|  | #' @param tol Tolerance for optimization stopping creteria. | ||||||
|  | #' @export | ||||||
|  | #' | ||||||
|  | #' @seealso TODO: \code{vignette("CVE_paper", package="CVE")}. | ||||||
|  | #' | ||||||
|  | #' @references Fertl Likas, Bura Efstathia. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019 | ||||||
|  | cve <- function(X, Y, k, | ||||||
|  |                 nObs = sqrt(nrow(X)), | ||||||
|  |                 tauInitial = 1.0, | ||||||
|  |                 tol = 1e-3, | ||||||
|  |                 slack = -1e-10, | ||||||
|  |                 maxIter = 50L, | ||||||
|  |                 attempts = 10L | ||||||
|  | ) { | ||||||
|  |     # check data parameter types | ||||||
|  |     stopifnot( | ||||||
|  |         is.matrix(X), | ||||||
|  |         is.vector(Y), | ||||||
|  |         typeof(X) == 'double', | ||||||
|  |         typeof(Y) == 'double' | ||||||
|  |     ) | ||||||
|  | 
 | ||||||
|  |     # call CVE C++ implementation | ||||||
|  |     return(cve_cpp(X, Y, k, | ||||||
|  |                    nObs, | ||||||
|  |                    tauInitial, | ||||||
|  |                    tol, | ||||||
|  |                    slack, | ||||||
|  |                    maxIter, | ||||||
|  |                    attempts | ||||||
|  |     )) | ||||||
|  | } | ||||||
							
								
								
									
										95
									
								
								CVE/R/RcppExports.R
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										95
									
								
								CVE/R/RcppExports.R
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,95 @@ | |||||||
|  | # Generated by using Rcpp::compileAttributes() -> do not edit by hand | ||||||
|  | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 | ||||||
|  | 
 | ||||||
|  | #' Stiefel Optimization. | ||||||
|  | #' | ||||||
|  | #' Stiefel Optimization for \code{V} given a dataset \code{X} and responces | ||||||
|  | #' \code{Y} for the model \deqn{Y\sim g(B'X) + \epsilon}{Y ~ g(B'X) + epsilon} | ||||||
|  | #' to compute the matrix `B` such that \eqn{span{B} = span(V)^{\bot}}{% | ||||||
|  | #' span(B) = orth(span(B))}. | ||||||
|  | #' | ||||||
|  | #' @param X data points | ||||||
|  | #' @param Y response | ||||||
|  | #' @param k assumed \eqn{rank(B)} | ||||||
|  | #' @param nObs parameter for bandwidth estimation, typical value | ||||||
|  | #'     \code{nObs = nrow(X)^lambda} with \code{lambda} in the range [0.3, 0.8]. | ||||||
|  | #' @param tau Initial step size | ||||||
|  | #' @param tol Tolerance for update error used for stopping criterion | ||||||
|  | #'     \eqn{|| V(j) V(j)' - V(j+1) V(j+1)' ||_2 < tol}{% | ||||||
|  | #'          \| V_j V_j' - V_{j+1} V_{j+1}' \|_2 < tol}. | ||||||
|  | #' @param maxIter Upper bound of optimization iterations | ||||||
|  | #' | ||||||
|  | #' @return List containing the bandwidth \code{h}, optimization objective \code{V} | ||||||
|  | #'     and the matrix \code{B} estimated for the model as a orthogonal basis of the | ||||||
|  | #'     orthogonal space spaned by \code{V}. | ||||||
|  | #' | ||||||
|  | #' @rdname optStiefel | ||||||
|  | #' @name optStiefel | ||||||
|  | #' @keywords internal | ||||||
|  | NULL | ||||||
|  | 
 | ||||||
|  | #' Estimated bandwidth for CVE. | ||||||
|  | #' | ||||||
|  | #' Estimates a propper bandwidth \code{h} according | ||||||
|  | #' \deqn{h = \chi_{p-q}^{-1}\left(\frac{nObs - 1}{n-1}\right)\frac{2 tr(\Sigma)}{p}}{% | ||||||
|  | #'       h = qchisq( (nObs - 1)/(n - 1), p - q ) 2 tr(Sigma) / p} | ||||||
|  | #' | ||||||
|  | #' @param X data matrix of dimension (n x p) with n data points X_i of dimension | ||||||
|  | #'  q. Therefor each row represents a datapoint of dimension p. | ||||||
|  | #' @param k Guess for rank(B). | ||||||
|  | #' @param nObs Ether numeric of a function. If specified as numeric value | ||||||
|  | #'  its used in the computation of the bandwidth directly. If its a function | ||||||
|  | #'  `nObs` is evaluated as \code{nObs(nrow(x))}. The default behaviou if not | ||||||
|  | #'  supplied at all is to use \code{nObs <- nrow(x)^0.5}. | ||||||
|  | #' | ||||||
|  | #' @seealso [qchisq()] | ||||||
|  | #' | ||||||
|  | #' @export | ||||||
|  | estimateBandwidth <- function(X, k, nObs) { | ||||||
|  |     .Call('_CVE_estimateBandwidth', PACKAGE = 'CVE', X, k, nObs) | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | #' Random element from Stiefel Manifold `S(p, q)`. | ||||||
|  | #' | ||||||
|  | #' Draws an element of \eqn{S(p, q)} which is the Stiefel Manifold. | ||||||
|  | #' This is done by taking the Q-component of the QR decomposition | ||||||
|  | #' from a `(p, q)` Matrix with independent standart normal entries. | ||||||
|  | #' As a semi-orthogonal Matrix the result `V` satisfies \eqn{V'V = I_q}. | ||||||
|  | #' | ||||||
|  | #' @param p Row dimension | ||||||
|  | #' @param q Column dimension | ||||||
|  | #' | ||||||
|  | #' @return Matrix of dim `(p, q)`. | ||||||
|  | #' | ||||||
|  | #' @seealso <https://en.wikipedia.org/wiki/Stiefel_manifold> | ||||||
|  | #' | ||||||
|  | #' @export | ||||||
|  | rStiefel <- function(p, q) { | ||||||
|  |     .Call('_CVE_rStiefel', PACKAGE = 'CVE', p, q) | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | #' Conditional Variance Estimation (CVE) method. | ||||||
|  | #' | ||||||
|  | #' This version uses a "simple" stiefel optimization schema. | ||||||
|  | #' | ||||||
|  | #' @param X data points | ||||||
|  | #' @param Y response | ||||||
|  | #' @param k assumed \eqn{rank(B)} | ||||||
|  | #' @param nObs parameter for bandwidth estimation, typical value | ||||||
|  | #'     \code{nObs = nrow(X)^lambda} with \code{lambda} in the range [0.3, 0.8]. | ||||||
|  | #' @param tau Initial step size (default 1) | ||||||
|  | #' @param tol Tolerance for update error used for stopping criterion (default 1e-5) | ||||||
|  | #' @param slack Ratio of small negative error allowed in loss optimization (default -1e-10) | ||||||
|  | #' @param maxIter Upper bound of optimization iterations (default 50) | ||||||
|  | #' @param attempts Number of tryes with new random optimization starting points (default 10) | ||||||
|  | #' | ||||||
|  | #' @return List containing the bandwidth \code{h}, optimization objective \code{V} | ||||||
|  | #'     and the matrix \code{B} estimated for the model as a orthogonal basis of the | ||||||
|  | #'     orthogonal space spaned by \code{V}. | ||||||
|  | #' | ||||||
|  | #' @rdname cve_cpp_V1 | ||||||
|  | #' @export | ||||||
|  | cve_cpp <- function(X, Y, k, nObs, tauInitial = 1., tol = 1e-5, slack = -1e-10, maxIter = 50L, attempts = 10L) { | ||||||
|  |     .Call('_CVE_cve_cpp', PACKAGE = 'CVE', X, Y, k, nObs, tauInitial, tol, slack, maxIter, attempts) | ||||||
|  | } | ||||||
|  | 
 | ||||||
							
								
								
									
										13
									
								
								CVE/R/package.R
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										13
									
								
								CVE/R/package.R
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,13 @@ | |||||||
|  | #' Conditional Variance Estimator (CVE) | ||||||
|  | #' | ||||||
|  | #' Conditional Variance Estimator for Sufficient Dimension | ||||||
|  | #'  Reduction | ||||||
|  | #' | ||||||
|  | #' TODO: And some details | ||||||
|  | #' | ||||||
|  | #' @docType package | ||||||
|  | #' @author Loki | ||||||
|  | #' @import Rcpp | ||||||
|  | #' @importFrom Rcpp evalCpp | ||||||
|  | #' @useDynLib CVE | ||||||
|  | "_PACKAGE" | ||||||
							
								
								
									
										
											BIN
										
									
								
								CVE/inst/doc/CVE_paper.pdf
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								CVE/inst/doc/CVE_paper.pdf
									
									
									
									
									
										Executable file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										17
									
								
								CVE/man/CVE-package.Rd
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										17
									
								
								CVE/man/CVE-package.Rd
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,17 @@ | |||||||
|  | % Generated by roxygen2: do not edit by hand | ||||||
|  | % Please edit documentation in R/package.R | ||||||
|  | \docType{package} | ||||||
|  | \name{CVE-package} | ||||||
|  | \alias{CVE} | ||||||
|  | \alias{CVE-package} | ||||||
|  | \title{Conditional Variance Estimator (CVE)} | ||||||
|  | \description{ | ||||||
|  | Conditional Variance Estimator for Sufficient Dimension | ||||||
|  |  Reduction | ||||||
|  | } | ||||||
|  | \details{ | ||||||
|  | TODO: And some details | ||||||
|  | } | ||||||
|  | \author{ | ||||||
|  | Loki | ||||||
|  | } | ||||||
							
								
								
									
										32
									
								
								CVE/man/cve.Rd
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										32
									
								
								CVE/man/cve.Rd
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,32 @@ | |||||||
|  | % Generated by roxygen2: do not edit by hand | ||||||
|  | % Please edit documentation in R/CVE.R | ||||||
|  | \name{cve} | ||||||
|  | \alias{cve} | ||||||
|  | \title{Conditional Variance Estimator} | ||||||
|  | \usage{ | ||||||
|  | cve(X, Y, k, nObs = sqrt(nrow(X)), tauInitial = 1, tol = 0.001, | ||||||
|  |   slack = -1e-10, maxIter = 50L, attempts = 10L) | ||||||
|  | } | ||||||
|  | \arguments{ | ||||||
|  | \item{X}{A matrix of type numeric of dimensions N times dim where N is the number | ||||||
|  | of entries with dim as data dimension.} | ||||||
|  | 
 | ||||||
|  | \item{Y}{A vector of type numeric of length N (coresponds to \code{x}).} | ||||||
|  | 
 | ||||||
|  | \item{k}{Guess for rank(B).} | ||||||
|  | 
 | ||||||
|  | \item{nObs}{As describet in the paper.} | ||||||
|  | 
 | ||||||
|  | \item{tol}{Tolerance for optimization stopping creteria.} | ||||||
|  | } | ||||||
|  | \description{ | ||||||
|  | Conditional Variance Estimator (CVE) is a novel sufficient dimension | ||||||
|  |  reduction (SDR) method for regressions satisfying E(Y|X) = E(Y|B'X), | ||||||
|  |  where B'X is a lower dimensional projection of the predictors. | ||||||
|  | } | ||||||
|  | \references{ | ||||||
|  | Fertl Likas, Bura Efstathia. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019 | ||||||
|  | } | ||||||
|  | \seealso{ | ||||||
|  | TODO: \code{vignette("CVE_paper", package="CVE")}. | ||||||
|  | } | ||||||
							
								
								
									
										37
									
								
								CVE/man/cve_cpp_V1.Rd
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										37
									
								
								CVE/man/cve_cpp_V1.Rd
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,37 @@ | |||||||
|  | % Generated by roxygen2: do not edit by hand | ||||||
|  | % Please edit documentation in R/RcppExports.R | ||||||
|  | \name{cve_cpp} | ||||||
|  | \alias{cve_cpp} | ||||||
|  | \title{Conditional Variance Estimation (CVE) method.} | ||||||
|  | \usage{ | ||||||
|  | cve_cpp(X, Y, k, nObs, tauInitial = 1, tol = 1e-05, slack = -1e-10, | ||||||
|  |   maxIter = 50L, attempts = 10L) | ||||||
|  | } | ||||||
|  | \arguments{ | ||||||
|  | \item{X}{data points} | ||||||
|  | 
 | ||||||
|  | \item{Y}{response} | ||||||
|  | 
 | ||||||
|  | \item{k}{assumed \eqn{rank(B)}} | ||||||
|  | 
 | ||||||
|  | \item{nObs}{parameter for bandwidth estimation, typical value | ||||||
|  | \code{nObs = nrow(X)^lambda} with \code{lambda} in the range [0.3, 0.8].} | ||||||
|  | 
 | ||||||
|  | \item{tol}{Tolerance for update error used for stopping criterion (default 1e-5)} | ||||||
|  | 
 | ||||||
|  | \item{slack}{Ratio of small negative error allowed in loss optimization (default -1e-10)} | ||||||
|  | 
 | ||||||
|  | \item{maxIter}{Upper bound of optimization iterations (default 50)} | ||||||
|  | 
 | ||||||
|  | \item{attempts}{Number of tryes with new random optimization starting points (default 10)} | ||||||
|  | 
 | ||||||
|  | \item{tau}{Initial step size (default 1)} | ||||||
|  | } | ||||||
|  | \value{ | ||||||
|  | List containing the bandwidth \code{h}, optimization objective \code{V} | ||||||
|  |     and the matrix \code{B} estimated for the model as a orthogonal basis of the | ||||||
|  |     orthogonal space spaned by \code{V}. | ||||||
|  | } | ||||||
|  | \description{ | ||||||
|  | This version uses a "simple" stiefel optimization schema. | ||||||
|  | } | ||||||
							
								
								
									
										65
									
								
								CVE/man/dataset.Rd
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										65
									
								
								CVE/man/dataset.Rd
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,65 @@ | |||||||
|  | % Generated by roxygen2: do not edit by hand | ||||||
|  | % Please edit documentation in R/datasets.R | ||||||
|  | \name{dataset} | ||||||
|  | \alias{dataset} | ||||||
|  | \title{Generates test datasets.} | ||||||
|  | \usage{ | ||||||
|  | dataset(name = "M1", n, B, p.mix = 0.3, lambda = 1) | ||||||
|  | } | ||||||
|  | \arguments{ | ||||||
|  | \item{name}{One of \code{"M1"}, \code{"M2"}, \code{"M3"}, \code{"M4"} or \code{"M5"}} | ||||||
|  | 
 | ||||||
|  | \item{n}{nr samples} | ||||||
|  | 
 | ||||||
|  | \item{p.mix}{Only for \code{"M4"}, see: below.} | ||||||
|  | 
 | ||||||
|  | \item{lambda}{Only for \code{"M4"}, see: below.} | ||||||
|  | 
 | ||||||
|  | \item{p}{Dim. of random variable \code{X}.} | ||||||
|  | } | ||||||
|  | \value{ | ||||||
|  | List with elements | ||||||
|  | \item{X}{data} | ||||||
|  | \item{Y}{response} | ||||||
|  | \item{B}{Used dim-reduction matrix} | ||||||
|  | \item{name}{Name of the dataset (name parameter)} | ||||||
|  | } | ||||||
|  | \description{ | ||||||
|  | Provides sample datasets. There are 5 different datasets named | ||||||
|  | M1, M2, M3, M4 and M5 describet in the paper references below. | ||||||
|  | The general model is given by: | ||||||
|  | \deqn{Y ~ g(B'X) + \epsilon} | ||||||
|  | } | ||||||
|  | \section{M1}{ | ||||||
|  | 
 | ||||||
|  | The data follows \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} for a subspace | ||||||
|  | dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points. | ||||||
|  | The link function \eqn{g} is given as | ||||||
|  | \deqn{g(x) = \frac{x_1}{0.5 + (x_2 + 1.5)^2} + 0.5\epsilon}{g(x) = x_1 / (0.5 + (x_2 + 1.5)^2) + 0.5 epsilon} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | \section{M2}{ | ||||||
|  | 
 | ||||||
|  | \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} with \eqn{k = 2} with a default of \eqn{n = 200} data points. | ||||||
|  | The link function \eqn{g} is given as | ||||||
|  | \deqn{g(x) = x_1 x_2^2 + 0.5\epsilon}{g(x) = x_1 x_2^2 + 0.5 epsilon} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | \section{M3}{ | ||||||
|  | 
 | ||||||
|  | TODO: | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | \section{M4}{ | ||||||
|  | 
 | ||||||
|  | TODO: | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | \section{M5}{ | ||||||
|  | 
 | ||||||
|  | TODO: | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | \references{ | ||||||
|  | Fertl Likas, Bura Efstathia. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019 | ||||||
|  | } | ||||||
							
								
								
									
										27
									
								
								CVE/man/estimateBandwidth.Rd
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										27
									
								
								CVE/man/estimateBandwidth.Rd
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,27 @@ | |||||||
|  | % Generated by roxygen2: do not edit by hand | ||||||
|  | % Please edit documentation in R/RcppExports.R | ||||||
|  | \name{estimateBandwidth} | ||||||
|  | \alias{estimateBandwidth} | ||||||
|  | \title{Estimated bandwidth for CVE.} | ||||||
|  | \usage{ | ||||||
|  | estimateBandwidth(X, k, nObs) | ||||||
|  | } | ||||||
|  | \arguments{ | ||||||
|  | \item{X}{data matrix of dimension (n x p) with n data points X_i of dimension | ||||||
|  | q. Therefor each row represents a datapoint of dimension p.} | ||||||
|  | 
 | ||||||
|  | \item{k}{Guess for rank(B).} | ||||||
|  | 
 | ||||||
|  | \item{nObs}{Ether numeric of a function. If specified as numeric value | ||||||
|  | its used in the computation of the bandwidth directly. If its a function | ||||||
|  | `nObs` is evaluated as \code{nObs(nrow(x))}. The default behaviou if not | ||||||
|  | supplied at all is to use \code{nObs <- nrow(x)^0.5}.} | ||||||
|  | } | ||||||
|  | \description{ | ||||||
|  | Estimates a propper bandwidth \code{h} according | ||||||
|  | \deqn{h = \chi_{p-q}^{-1}\left(\frac{nObs - 1}{n-1}\right)\frac{2 tr(\Sigma)}{p}}{% | ||||||
|  |       h = qchisq( (nObs - 1)/(n - 1), p - q ) 2 tr(Sigma) / p} | ||||||
|  | } | ||||||
|  | \seealso{ | ||||||
|  | [qchisq()] | ||||||
|  | } | ||||||
							
								
								
									
										35
									
								
								CVE/man/optStiefel.Rd
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										35
									
								
								CVE/man/optStiefel.Rd
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,35 @@ | |||||||
|  | % Generated by roxygen2: do not edit by hand | ||||||
|  | % Please edit documentation in R/RcppExports.R | ||||||
|  | \name{optStiefel} | ||||||
|  | \alias{optStiefel} | ||||||
|  | \title{Stiefel Optimization.} | ||||||
|  | \arguments{ | ||||||
|  | \item{X}{data points} | ||||||
|  | 
 | ||||||
|  | \item{Y}{response} | ||||||
|  | 
 | ||||||
|  | \item{k}{assumed \eqn{rank(B)}} | ||||||
|  | 
 | ||||||
|  | \item{nObs}{parameter for bandwidth estimation, typical value | ||||||
|  | \code{nObs = nrow(X)^lambda} with \code{lambda} in the range [0.3, 0.8].} | ||||||
|  | 
 | ||||||
|  | \item{tau}{Initial step size} | ||||||
|  | 
 | ||||||
|  | \item{tol}{Tolerance for update error used for stopping criterion | ||||||
|  | \eqn{|| V(j) V(j)' - V(j+1) V(j+1)' ||_2 < tol}{% | ||||||
|  |      \| V_j V_j' - V_{j+1} V_{j+1}' \|_2 < tol}.} | ||||||
|  | 
 | ||||||
|  | \item{maxIter}{Upper bound of optimization iterations} | ||||||
|  | } | ||||||
|  | \value{ | ||||||
|  | List containing the bandwidth \code{h}, optimization objective \code{V} | ||||||
|  |     and the matrix \code{B} estimated for the model as a orthogonal basis of the | ||||||
|  |     orthogonal space spaned by \code{V}. | ||||||
|  | } | ||||||
|  | \description{ | ||||||
|  | Stiefel Optimization for \code{V} given a dataset \code{X} and responces | ||||||
|  | \code{Y} for the model \deqn{Y\sim g(B'X) + \epsilon}{Y ~ g(B'X) + epsilon} | ||||||
|  | to compute the matrix `B` such that \eqn{span{B} = span(V)^{\bot}}{% | ||||||
|  | span(B) = orth(span(B))}. | ||||||
|  | } | ||||||
|  | \keyword{internal} | ||||||
							
								
								
									
										25
									
								
								CVE/man/rStiefel.Rd
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										25
									
								
								CVE/man/rStiefel.Rd
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,25 @@ | |||||||
|  | % Generated by roxygen2: do not edit by hand | ||||||
|  | % Please edit documentation in R/RcppExports.R | ||||||
|  | \name{rStiefel} | ||||||
|  | \alias{rStiefel} | ||||||
|  | \title{Random element from Stiefel Manifold `S(p, q)`.} | ||||||
|  | \usage{ | ||||||
|  | rStiefel(p, q) | ||||||
|  | } | ||||||
|  | \arguments{ | ||||||
|  | \item{p}{Row dimension} | ||||||
|  | 
 | ||||||
|  | \item{q}{Column dimension} | ||||||
|  | } | ||||||
|  | \value{ | ||||||
|  | Matrix of dim `(p, q)`. | ||||||
|  | } | ||||||
|  | \description{ | ||||||
|  | Draws an element of \eqn{S(p, q)} which is the Stiefel Manifold. | ||||||
|  | This is done by taking the Q-component of the QR decomposition | ||||||
|  | from a `(p, q)` Matrix with independent standart normal entries. | ||||||
|  | As a semi-orthogonal Matrix the result `V` satisfies \eqn{V'V = I_q}. | ||||||
|  | } | ||||||
|  | \seealso{ | ||||||
|  | <https://en.wikipedia.org/wiki/Stiefel_manifold> | ||||||
|  | } | ||||||
							
								
								
									
										285
									
								
								CVE/src/CVE.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										285
									
								
								CVE/src/CVE.cpp
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,285 @@ | |||||||
|  | //
 | ||||||
|  | // Usage:
 | ||||||
|  | //  ~$ R -e "library(Rcpp); Rcpp::sourceCpp('cve_V1.cpp')"
 | ||||||
|  | //
 | ||||||
|  | 
 | ||||||
|  | // only `RcppArmadillo.h` which includes `Rcpp.h`
 | ||||||
|  | #include <RcppArmadillo.h> | ||||||
|  | 
 | ||||||
|  | // through the depends attribute `Rcpp` is tolled to create
 | ||||||
|  | // hooks for `RcppArmadillo` needed by the build process.
 | ||||||
|  | //
 | ||||||
|  | // [[Rcpp::depends(RcppArmadillo)]]
 | ||||||
|  | 
 | ||||||
|  | // required for `R::qchisq()` used in `estimateBandwidth()`
 | ||||||
|  | #include <Rmath.h> | ||||||
|  | 
 | ||||||
|  | //' Estimated bandwidth for CVE.
 | ||||||
|  | //'
 | ||||||
|  | //' Estimates a propper bandwidth \code{h} according
 | ||||||
|  | //' \deqn{h = \chi_{p-q}^{-1}\left(\frac{nObs - 1}{n-1}\right)\frac{2 tr(\Sigma)}{p}}{%
 | ||||||
|  | //'       h = qchisq( (nObs - 1)/(n - 1), p - q ) 2 tr(Sigma) / p}
 | ||||||
|  | //'
 | ||||||
|  | //' @param X data matrix of dimension (n x p) with n data points X_i of dimension
 | ||||||
|  | //'  q. Therefor each row represents a datapoint of dimension p.
 | ||||||
|  | //' @param k Guess for rank(B).
 | ||||||
|  | //' @param nObs Ether numeric of a function. If specified as numeric value
 | ||||||
|  | //'  its used in the computation of the bandwidth directly. If its a function
 | ||||||
|  | //'  `nObs` is evaluated as \code{nObs(nrow(x))}. The default behaviou if not
 | ||||||
|  | //'  supplied at all is to use \code{nObs <- nrow(x)^0.5}.
 | ||||||
|  | //'
 | ||||||
|  | //' @seealso [qchisq()]
 | ||||||
|  | //'
 | ||||||
|  | //' @export
 | ||||||
|  | // [[Rcpp::export]]
 | ||||||
|  | double estimateBandwidth(const arma::mat& X, arma::uword k, double nObs) { | ||||||
|  |     using namespace arma; | ||||||
|  | 
 | ||||||
|  |     uword n = X.n_rows; // nr samples
 | ||||||
|  |     uword p = X.n_cols; // dimension of rand. var. `X`
 | ||||||
|  | 
 | ||||||
|  |     // column mean
 | ||||||
|  |     mat M = mean(X); | ||||||
|  |     // center `X`
 | ||||||
|  |     mat C = X.each_row() - M; | ||||||
|  |     // trace of covariance matrix, `traceSigma = Tr(C' C)`
 | ||||||
|  |     double traceSigma = accu(C % C); | ||||||
|  |     // compute estimated bandwidth
 | ||||||
|  |     double qchi2 = R::qchisq((nObs - 1.0) / (n - 1), static_cast<double>(k), 1, 0); | ||||||
|  | 
 | ||||||
|  |     return 2.0 * qchi2 * traceSigma / (p * n); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | //' Random element from Stiefel Manifold `S(p, q)`.
 | ||||||
|  | //'
 | ||||||
|  | //' Draws an element of \eqn{S(p, q)} which is the Stiefel Manifold.
 | ||||||
|  | //' This is done by taking the Q-component of the QR decomposition
 | ||||||
|  | //' from a `(p, q)` Matrix with independent standart normal entries.
 | ||||||
|  | //' As a semi-orthogonal Matrix the result `V` satisfies \eqn{V'V = I_q}.
 | ||||||
|  | //'
 | ||||||
|  | //' @param p Row dimension
 | ||||||
|  | //' @param q Column dimension
 | ||||||
|  | //'
 | ||||||
|  | //' @return Matrix of dim `(p, q)`.
 | ||||||
|  | //'
 | ||||||
|  | //' @seealso <https://en.wikipedia.org/wiki/Stiefel_manifold>
 | ||||||
|  | //'
 | ||||||
|  | //' @export
 | ||||||
|  | // [[Rcpp::export]]
 | ||||||
|  | arma::mat rStiefel(arma::uword p, arma::uword q) { | ||||||
|  |     arma::mat Q, R; | ||||||
|  |     arma::qr_econ(Q, R, arma::randn<arma::mat>(p, q)); | ||||||
|  |     return Q; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | double gradient(const arma::mat& X, | ||||||
|  |                 const arma::mat& X_diff, | ||||||
|  |                 const arma::mat& Y, | ||||||
|  |                 const arma::mat& Y_rep, | ||||||
|  |                 const arma::mat& V, | ||||||
|  |                 const double h, | ||||||
|  |                 arma::mat* G = 0 | ||||||
|  | ) { | ||||||
|  |     using namespace arma; | ||||||
|  | 
 | ||||||
|  |     uword n = X.n_rows; | ||||||
|  |     uword p = X.n_cols; | ||||||
|  | 
 | ||||||
|  |     // orthogonal projection matrix `Q = I - VV'` for dist computation
 | ||||||
|  |     mat Q = -(V * V.t()); | ||||||
|  |     Q.diag() += 1; | ||||||
|  |     // calc pairwise distances as `D` with `D(i, j) = d_i(V, X_j)`
 | ||||||
|  |     vec D_vec = sum(square(X_diff * Q), 1); | ||||||
|  |     mat D = reshape(D_vec, n, n); | ||||||
|  |     // calc weights as `W` with `W(i, j) = w_i(V, X_j)`
 | ||||||
|  |     mat W = exp(D / (-2.0 * h)); | ||||||
|  |     // column-wise normalization via 1-norm
 | ||||||
|  |     W = normalise(W, 1); | ||||||
|  |     vec W_vec = vectorise(W); | ||||||
|  |     // weighted `Y` means (first and second order)
 | ||||||
|  |     vec y1 = W.t() * Y; | ||||||
|  |     vec y2 = W.t() * square(Y); | ||||||
|  |     // loss for each `X_i`, meaning `L(i) = L_n(V, X_i)`
 | ||||||
|  |     vec L = y2 - square(y1); | ||||||
|  |     // `loss = L_n(V)`
 | ||||||
|  |     double loss = mean(L); | ||||||
|  |     // check if gradient as output variable is set
 | ||||||
|  |     if (G != 0) { | ||||||
|  |         // `G = grad(L_n(V))` a.k.a. gradient of `L` with respect to `V`
 | ||||||
|  |         vec scale = (repelem(L, n, 1) - square(Y_rep - repelem(y1, n, 1))) % W_vec % D_vec; | ||||||
|  |         mat X_diff_scale = X_diff.each_col() % scale; | ||||||
|  |         (*G) = X_diff_scale.t() * X_diff * V; | ||||||
|  |         (*G) *= -2.0 / (h * h * n); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     return loss; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | //' Stiefel Optimization.
 | ||||||
|  | //'
 | ||||||
|  | //' Stiefel Optimization for \code{V} given a dataset \code{X} and responces
 | ||||||
|  | //' \code{Y} for the model \deqn{Y\sim g(B'X) + \epsilon}{Y ~ g(B'X) + epsilon}
 | ||||||
|  | //' to compute the matrix `B` such that \eqn{span{B} = span(V)^{\bot}}{%
 | ||||||
|  | //' span(B) = orth(span(B))}.
 | ||||||
|  | //'
 | ||||||
|  | //' @param X data points
 | ||||||
|  | //' @param Y response
 | ||||||
|  | //' @param k assumed \eqn{rank(B)}
 | ||||||
|  | //' @param nObs parameter for bandwidth estimation, typical value
 | ||||||
|  | //'     \code{nObs = nrow(X)^lambda} with \code{lambda} in the range [0.3, 0.8].
 | ||||||
|  | //' @param tau Initial step size
 | ||||||
|  | //' @param tol Tolerance for update error used for stopping criterion
 | ||||||
|  | //'     \eqn{|| V(j) V(j)' - V(j+1) V(j+1)' ||_2 < tol}{%
 | ||||||
|  | //'          \| V_j V_j' - V_{j+1} V_{j+1}' \|_2 < tol}.
 | ||||||
|  | //' @param maxIter Upper bound of optimization iterations
 | ||||||
|  | //'
 | ||||||
|  | //' @return List containing the bandwidth \code{h}, optimization objective \code{V}
 | ||||||
|  | //'     and the matrix \code{B} estimated for the model as a orthogonal basis of the
 | ||||||
|  | //'     orthogonal space spaned by \code{V}.
 | ||||||
|  | //'
 | ||||||
|  | //' @rdname optStiefel
 | ||||||
|  | //' @name optStiefel
 | ||||||
|  | //' @keywords internal
 | ||||||
|  | double optStiefel( | ||||||
|  |         const arma::mat& X, | ||||||
|  |         const arma::vec& Y, | ||||||
|  |         const int k, | ||||||
|  |         const double h, | ||||||
|  |         const double tauInitial, | ||||||
|  |         const double tol, | ||||||
|  |         const double slack, | ||||||
|  |         const int maxIter, | ||||||
|  |         arma::mat& V, // out
 | ||||||
|  |         arma::vec& history // out
 | ||||||
|  | ) { | ||||||
|  |     using namespace arma; | ||||||
|  | 
 | ||||||
|  |     // get dimensions
 | ||||||
|  |     const uword n = X.n_rows; // nr samples
 | ||||||
|  |     const uword p = X.n_cols; // dim of random variable `X`
 | ||||||
|  |     const uword q = p - k;    // rank(V) e.g. dim of ortho space of span{B}
 | ||||||
|  | 
 | ||||||
|  |     // all `X_i - X_j` differences, `X_diff.row(i * n + j) = X_i - X_j`
 | ||||||
|  |     mat X_diff(n * n, p); | ||||||
|  |     for (uword i = 0, k = 0; i < n; ++i) { | ||||||
|  |         for (uword j = 0; j < n; ++j) { | ||||||
|  |             X_diff.row(k++) = X.row(i) - X.row(j); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     const vec Y_rep = repmat(Y, n, 1); | ||||||
|  |     const mat I_p = eye<mat>(p, p); | ||||||
|  | 
 | ||||||
|  |     // initial start value for `V`
 | ||||||
|  |     V = rStiefel(p, q); | ||||||
|  | 
 | ||||||
|  |     // init optimization `loss`es, `error` and stepsize `tau`
 | ||||||
|  |     // double loss_next = datum::inf;
 | ||||||
|  |     double loss; | ||||||
|  |     double error = datum::inf; | ||||||
|  |     double tau = tauInitial; | ||||||
|  |     int count; | ||||||
|  |     // main optimization loop
 | ||||||
|  |     for (count = 0; count < maxIter && error > tol; ++count) { | ||||||
|  |         // calc gradient `G = grad_V(L)(V)`
 | ||||||
|  |         mat G; | ||||||
|  |         loss = gradient(X, X_diff, Y, Y_rep, V, h, &G); | ||||||
|  |         // matrix `A` for colescy-transform of the gradient
 | ||||||
|  |         mat A = tau * ((G * V.t()) - (V * G.t())); | ||||||
|  |         // next iteration step of `V`
 | ||||||
|  |         mat V_tau = inv(I_p + A) * (I_p - A) * V; | ||||||
|  |         // loss after step `L(V(tau))`
 | ||||||
|  |         double loss_tau = gradient(X, X_diff, Y, Y_rep, V_tau, h); | ||||||
|  | 
 | ||||||
|  |         // store `loss` in `history` and increase `count`
 | ||||||
|  |         history(count) = loss; | ||||||
|  | 
 | ||||||
|  |         // validate if loss decreased
 | ||||||
|  |         if ((loss_tau - loss) > slack * loss) { | ||||||
|  |             // ignore step, retry with half the step size
 | ||||||
|  |             tau = tau / 2.; | ||||||
|  |             error = datum::inf; | ||||||
|  |         } else { | ||||||
|  |             // compute step error (break condition)
 | ||||||
|  |             error = norm((V * V.t()) - (V_tau * V_tau.t()), 2) / (2 * q); | ||||||
|  |             // shift for next iteration
 | ||||||
|  |             V = V_tau; | ||||||
|  |             loss = loss_tau; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     // store final `loss`
 | ||||||
|  |     history(count) = loss; | ||||||
|  | 
 | ||||||
|  |     return loss; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | //' Conditional Variance Estimation (CVE) method.
 | ||||||
|  | //'
 | ||||||
|  | //' This version uses a "simple" stiefel optimization schema.
 | ||||||
|  | //'
 | ||||||
|  | //' @param X data points
 | ||||||
|  | //' @param Y response
 | ||||||
|  | //' @param k assumed \eqn{rank(B)}
 | ||||||
|  | //' @param nObs parameter for bandwidth estimation, typical value
 | ||||||
|  | //'     \code{nObs = nrow(X)^lambda} with \code{lambda} in the range [0.3, 0.8].
 | ||||||
|  | //' @param tau Initial step size (default 1)
 | ||||||
|  | //' @param tol Tolerance for update error used for stopping criterion (default 1e-5)
 | ||||||
|  | //' @param slack Ratio of small negative error allowed in loss optimization (default -1e-10)
 | ||||||
|  | //' @param maxIter Upper bound of optimization iterations (default 50)
 | ||||||
|  | //' @param attempts Number of tryes with new random optimization starting points (default 10)
 | ||||||
|  | //'
 | ||||||
|  | //' @return List containing the bandwidth \code{h}, optimization objective \code{V}
 | ||||||
|  | //'     and the matrix \code{B} estimated for the model as a orthogonal basis of the
 | ||||||
|  | //'     orthogonal space spaned by \code{V}.
 | ||||||
|  | //'
 | ||||||
|  | //' @rdname cve_cpp_V1
 | ||||||
|  | //' @export
 | ||||||
|  | // [[Rcpp::export]]
 | ||||||
|  | Rcpp::List cve_cpp( | ||||||
|  |         const arma::mat& X, | ||||||
|  |         const arma::vec& Y, | ||||||
|  |         const int k, | ||||||
|  |         const double nObs, | ||||||
|  |         const double tauInitial = 1., | ||||||
|  |         const double tol = 1e-5, | ||||||
|  |         const double slack = -1e-10, | ||||||
|  |         const int maxIter = 50, | ||||||
|  |         const int attempts = 10 | ||||||
|  | ) { | ||||||
|  |     using namespace arma; | ||||||
|  | 
 | ||||||
|  |     // tracker of current best results
 | ||||||
|  |     mat V_best; | ||||||
|  |     double loss_best = datum::inf; | ||||||
|  |     // estimate bandwidth
 | ||||||
|  |     double h = estimateBandwidth(X, k, nObs); | ||||||
|  | 
 | ||||||
|  |     // loss history for each optimization attempt
 | ||||||
|  |     // each column contaions the iteration history for the loss
 | ||||||
|  |     mat history = mat(maxIter + 1, attempts); | ||||||
|  | 
 | ||||||
|  |     // multiple stiefel optimization attempts
 | ||||||
|  |     for (int i = 0; i < attempts; ++i) { | ||||||
|  |         // declare output variables
 | ||||||
|  |         mat V; // estimated `V` space
 | ||||||
|  |         vec hist = vec(history.n_rows, fill::zeros); // optimization history
 | ||||||
|  |         double loss = optStiefel(X, Y, k, h, tauInitial, tol, slack, maxIter, V, hist); | ||||||
|  |         if (loss < loss_best) { | ||||||
|  |             loss_best = loss; | ||||||
|  |             V_best = V; | ||||||
|  |         } | ||||||
|  |         // write history to history collection
 | ||||||
|  |         history.col(i) = hist; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     // get `B` as kernal of `V.t()`
 | ||||||
|  |     mat B = null(V_best.t()); | ||||||
|  | 
 | ||||||
|  |     return Rcpp::List::create( | ||||||
|  |         Rcpp::Named("history") = history, | ||||||
|  |         Rcpp::Named("loss") = loss_best, | ||||||
|  |         Rcpp::Named("h") = h, | ||||||
|  |         Rcpp::Named("V") = V_best, | ||||||
|  |         Rcpp::Named("B") = B | ||||||
|  |     ); | ||||||
|  | } | ||||||
							
								
								
									
										64
									
								
								CVE/src/RcppExports.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										64
									
								
								CVE/src/RcppExports.cpp
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,64 @@ | |||||||
|  | // Generated by using Rcpp::compileAttributes() -> do not edit by hand
 | ||||||
|  | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 | ||||||
|  | 
 | ||||||
|  | #include <RcppArmadillo.h> | ||||||
|  | #include <Rcpp.h> | ||||||
|  | 
 | ||||||
|  | using namespace Rcpp; | ||||||
|  | 
 | ||||||
|  | // estimateBandwidth
 | ||||||
|  | double estimateBandwidth(const arma::mat& X, arma::uword k, double nObs); | ||||||
|  | RcppExport SEXP _CVE_estimateBandwidth(SEXP XSEXP, SEXP kSEXP, SEXP nObsSEXP) { | ||||||
|  | BEGIN_RCPP | ||||||
|  |     Rcpp::RObject rcpp_result_gen; | ||||||
|  |     Rcpp::RNGScope rcpp_rngScope_gen; | ||||||
|  |     Rcpp::traits::input_parameter< const arma::mat& >::type X(XSEXP); | ||||||
|  |     Rcpp::traits::input_parameter< arma::uword >::type k(kSEXP); | ||||||
|  |     Rcpp::traits::input_parameter< double >::type nObs(nObsSEXP); | ||||||
|  |     rcpp_result_gen = Rcpp::wrap(estimateBandwidth(X, k, nObs)); | ||||||
|  |     return rcpp_result_gen; | ||||||
|  | END_RCPP | ||||||
|  | } | ||||||
|  | // rStiefel
 | ||||||
|  | arma::mat rStiefel(arma::uword p, arma::uword q); | ||||||
|  | RcppExport SEXP _CVE_rStiefel(SEXP pSEXP, SEXP qSEXP) { | ||||||
|  | BEGIN_RCPP | ||||||
|  |     Rcpp::RObject rcpp_result_gen; | ||||||
|  |     Rcpp::RNGScope rcpp_rngScope_gen; | ||||||
|  |     Rcpp::traits::input_parameter< arma::uword >::type p(pSEXP); | ||||||
|  |     Rcpp::traits::input_parameter< arma::uword >::type q(qSEXP); | ||||||
|  |     rcpp_result_gen = Rcpp::wrap(rStiefel(p, q)); | ||||||
|  |     return rcpp_result_gen; | ||||||
|  | END_RCPP | ||||||
|  | } | ||||||
|  | // cve_cpp
 | ||||||
|  | Rcpp::List cve_cpp(const arma::mat& X, const arma::vec& Y, const int k, const double nObs, const double tauInitial, const double tol, const double slack, const int maxIter, const int attempts); | ||||||
|  | RcppExport SEXP _CVE_cve_cpp(SEXP XSEXP, SEXP YSEXP, SEXP kSEXP, SEXP nObsSEXP, SEXP tauInitialSEXP, SEXP tolSEXP, SEXP slackSEXP, SEXP maxIterSEXP, SEXP attemptsSEXP) { | ||||||
|  | BEGIN_RCPP | ||||||
|  |     Rcpp::RObject rcpp_result_gen; | ||||||
|  |     Rcpp::RNGScope rcpp_rngScope_gen; | ||||||
|  |     Rcpp::traits::input_parameter< const arma::mat& >::type X(XSEXP); | ||||||
|  |     Rcpp::traits::input_parameter< const arma::vec& >::type Y(YSEXP); | ||||||
|  |     Rcpp::traits::input_parameter< const int >::type k(kSEXP); | ||||||
|  |     Rcpp::traits::input_parameter< const double >::type nObs(nObsSEXP); | ||||||
|  |     Rcpp::traits::input_parameter< const double >::type tauInitial(tauInitialSEXP); | ||||||
|  |     Rcpp::traits::input_parameter< const double >::type tol(tolSEXP); | ||||||
|  |     Rcpp::traits::input_parameter< const double >::type slack(slackSEXP); | ||||||
|  |     Rcpp::traits::input_parameter< const int >::type maxIter(maxIterSEXP); | ||||||
|  |     Rcpp::traits::input_parameter< const int >::type attempts(attemptsSEXP); | ||||||
|  |     rcpp_result_gen = Rcpp::wrap(cve_cpp(X, Y, k, nObs, tauInitial, tol, slack, maxIter, attempts)); | ||||||
|  |     return rcpp_result_gen; | ||||||
|  | END_RCPP | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static const R_CallMethodDef CallEntries[] = { | ||||||
|  |     {"_CVE_estimateBandwidth", (DL_FUNC) &_CVE_estimateBandwidth, 3}, | ||||||
|  |     {"_CVE_rStiefel", (DL_FUNC) &_CVE_rStiefel, 2}, | ||||||
|  |     {"_CVE_cve_cpp", (DL_FUNC) &_CVE_cve_cpp, 9}, | ||||||
|  |     {NULL, NULL, 0} | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | RcppExport void R_init_CVE(DllInfo *dll) { | ||||||
|  |     R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); | ||||||
|  |     R_useDynamicSymbols(dll, FALSE); | ||||||
|  | } | ||||||
							
								
								
									
										13
									
								
								cve_V0.R
									
									
									
									
									
								
							
							
						
						
									
										13
									
								
								cve_V0.R
									
									
									
									
									
								
							| @ -2,14 +2,14 @@ | |||||||
| #' Euclidean vector norm (2-norm) | #' Euclidean vector norm (2-norm) | ||||||
| #' | #' | ||||||
| #' @param x Numeric vector | #' @param x Numeric vector | ||||||
| #' @returns Numeric | #' @return Numeric | ||||||
| norm2 <- function(x) { return(sum(x^2)) } | norm2 <- function(x) { return(sum(x^2)) } | ||||||
| 
 | 
 | ||||||
| #' Samples uniform from the Stiefel Manifold | #' Samples uniform from the Stiefel Manifold | ||||||
| #' | #' | ||||||
| #' @param p row dim. | #' @param p row dim. | ||||||
| #' @param q col dim. | #' @param q col dim. | ||||||
| #' @returns `(p, q)` semi-orthogonal matrix | #' @return `(p, q)` semi-orthogonal matrix | ||||||
| rStiefl <- function(p, q) { | rStiefl <- function(p, q) { | ||||||
|     return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q)))) |     return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q)))) | ||||||
| } | } | ||||||
| @ -17,7 +17,7 @@ rStiefl <- function(p, q) { | |||||||
| #' Matrix Trace | #' Matrix Trace | ||||||
| #' | #' | ||||||
| #' @param M Square matrix | #' @param M Square matrix | ||||||
| #' @returns Trace \eqn{Tr(M)} | #' @return Trace \eqn{Tr(M)} | ||||||
| Tr <- function(M) { | Tr <- function(M) { | ||||||
|     return(sum(diag(M))) |     return(sum(diag(M))) | ||||||
| } | } | ||||||
| @ -25,7 +25,7 @@ Tr <- function(M) { | |||||||
| #' Null space basis of given matrix `B` | #' Null space basis of given matrix `B` | ||||||
| #' | #' | ||||||
| #' @param B `(p, q)` matrix | #' @param B `(p, q)` matrix | ||||||
| #' @returns Semi-orthogonal `(p, p - q)` matrix `Q` spaning the null space of `B` | #' @return Semi-orthogonal `(p, p - q)` matrix `Q` spaning the null space of `B` | ||||||
| null <- function(M) { | null <- function(M) { | ||||||
|     tmp <- qr(M) |     tmp <- qr(M) | ||||||
|     set <- if(tmp$rank == 0L) seq_len(ncol(M)) else -seq_len(tmp$rank) |     set <- if(tmp$rank == 0L) seq_len(ncol(M)) else -seq_len(tmp$rank) | ||||||
| @ -60,8 +60,7 @@ estimateBandwidth<-function(X, k, nObs) { | |||||||
| # if grad=T, gradient of L(V) also returned | # if grad=T, gradient of L(V) also returned | ||||||
| LV <- function(V, Xl, dtemp, h, q, Y, grad = TRUE) { | LV <- function(V, Xl, dtemp, h, q, Y, grad = TRUE) { | ||||||
|   N <- length(Y) |   N <- length(Y) | ||||||
|   if (is.vector(V)) { k <- 1 } |   k <- if (is.vector(V)) { 1 } else { ncol(V) } | ||||||
|   else { k <- length(V[1,]) } |  | ||||||
|   Xlv <- Xl %*% V |   Xlv <- Xl %*% V | ||||||
|   d <- dtemp - ((Xlv^2) %*% rep(1, k)) |   d <- dtemp - ((Xlv^2) %*% rep(1, k)) | ||||||
|   w <- dnorm(d / h) / dnorm(0) |   w <- dnorm(d / h) / dnorm(0) | ||||||
| @ -108,7 +107,7 @@ LV <- function(V, Xl, dtemp, h, q, Y, grad = TRUE) { | |||||||
| #aov_dat... (L_tilde_n(Vhat_k,X_i))_{i=1,..,N} | #aov_dat... (L_tilde_n(Vhat_k,X_i))_{i=1,..,N} | ||||||
| #count...number of iterations  | #count...number of iterations  | ||||||
| #h...bandwidth | #h...bandwidth | ||||||
| cve_legacy <- function( | cve_R <- function( | ||||||
|         X, Y, k, |         X, Y, k, | ||||||
|         nObs = sqrt(nrow(X)), |         nObs = sqrt(nrow(X)), | ||||||
|         tauInitial = 1.0, |         tauInitial = 1.0, | ||||||
|  | |||||||
| @ -1,4 +1,6 @@ | |||||||
| //
 | //
 | ||||||
|  | // Standalone implementation for development.
 | ||||||
|  | //
 | ||||||
| // Usage:
 | // Usage:
 | ||||||
| //  ~$ R -e "library(Rcpp); Rcpp::sourceCpp('cve_V1.cpp')"
 | //  ~$ R -e "library(Rcpp); Rcpp::sourceCpp('cve_V1.cpp')"
 | ||||||
| //
 | //
 | ||||||
| @ -17,7 +19,7 @@ | |||||||
| //' Estimated bandwidth for CVE.
 | //' Estimated bandwidth for CVE.
 | ||||||
| //'
 | //'
 | ||||||
| //' Estimates a propper bandwidth \code{h} according
 | //' Estimates a propper bandwidth \code{h} according
 | ||||||
| //' \deqn{h = \chi_{p-q}^{-1}\left(\frac{nObs - 1}{n-1}\right)}\frac{2 tr(\Sigma)}{p}}{%
 | //' \deqn{h = \chi_{p-q}^{-1}\left(\frac{nObs - 1}{n-1}\right)\frac{2 tr(\Sigma)}{p}}{%
 | ||||||
| //'       h = qchisq( (nObs - 1)/(n - 1), p - q ) 2 tr(Sigma) / p}
 | //'       h = qchisq( (nObs - 1)/(n - 1), p - q ) 2 tr(Sigma) / p}
 | ||||||
| //'
 | //'
 | ||||||
| //' @param X data matrix of dimension (n x p) with n data points X_i of dimension
 | //' @param X data matrix of dimension (n x p) with n data points X_i of dimension
 | ||||||
| @ -60,7 +62,7 @@ double estimateBandwidth(const arma::mat& X, arma::uword k, double nObs) { | |||||||
| //' @param p Row dimension
 | //' @param p Row dimension
 | ||||||
| //' @param q Column dimension
 | //' @param q Column dimension
 | ||||||
| //'
 | //'
 | ||||||
| //' @returns Matrix of dim `(p, q)`.
 | //' @return Matrix of dim `(p, q)`.
 | ||||||
| //'
 | //'
 | ||||||
| //' @seealso <https://en.wikipedia.org/wiki/Stiefel_manifold>
 | //' @seealso <https://en.wikipedia.org/wiki/Stiefel_manifold>
 | ||||||
| //'
 | //'
 | ||||||
| @ -138,6 +140,7 @@ double gradient(const arma::mat& X, | |||||||
| //'     orthogonal space spaned by \code{V}.
 | //'     orthogonal space spaned by \code{V}.
 | ||||||
| //'
 | //'
 | ||||||
| //' @rdname optStiefel
 | //' @rdname optStiefel
 | ||||||
|  | //' @keywords internal
 | ||||||
| double optStiefel( | double optStiefel( | ||||||
|         const arma::mat& X, |         const arma::mat& X, | ||||||
|         const arma::vec& Y, |         const arma::vec& Y, | ||||||
|  | |||||||
| @ -1,4 +1,5 @@ | |||||||
| // -*- mode: C++; c-indent-level: 4; c-basic-offset: 4; indent-tabs-mode: nil; -*-
 | //
 | ||||||
|  | // Standalone implementation for development.
 | ||||||
| //
 | //
 | ||||||
| // Usage:
 | // Usage:
 | ||||||
| //  ~$ R -e "library(Rcpp); Rcpp::sourceCpp('cve_V2.cpp')"
 | //  ~$ R -e "library(Rcpp); Rcpp::sourceCpp('cve_V2.cpp')"
 | ||||||
| @ -18,7 +19,7 @@ | |||||||
| //' Estimated bandwidth for CVE.
 | //' Estimated bandwidth for CVE.
 | ||||||
| //'
 | //'
 | ||||||
| //' Estimates a propper bandwidth \code{h} according
 | //' Estimates a propper bandwidth \code{h} according
 | ||||||
| //' \deqn{h = \chi_{p-q}^{-1}\left(\frac{nObs - 1}{n-1}\right)}\frac{2 tr(\Sigma)}{p}}{%
 | //' \deqn{h = \chi_{p-q}^{-1}\left(\frac{nObs - 1}{n-1}\right)\frac{2 tr(\Sigma)}{p}}{%
 | ||||||
| //'       h = qchisq( (nObs - 1)/(n - 1), p - q ) 2 tr(Sigma) / p}
 | //'       h = qchisq( (nObs - 1)/(n - 1), p - q ) 2 tr(Sigma) / p}
 | ||||||
| //'
 | //'
 | ||||||
| //' @param X data matrix of dimension (n x p) with n data points X_i of dimension
 | //' @param X data matrix of dimension (n x p) with n data points X_i of dimension
 | ||||||
| @ -61,7 +62,7 @@ double estimateBandwidth(const arma::mat& X, arma::uword k, double nObs) { | |||||||
| //' @param p Row dimension
 | //' @param p Row dimension
 | ||||||
| //' @param q Column dimension
 | //' @param q Column dimension
 | ||||||
| //'
 | //'
 | ||||||
| //' @returns Matrix of dim `(p, q)`.
 | //' @return Matrix of dim `(p, q)`.
 | ||||||
| //'
 | //'
 | ||||||
| //' @seealso <https://en.wikipedia.org/wiki/Stiefel_manifold>
 | //' @seealso <https://en.wikipedia.org/wiki/Stiefel_manifold>
 | ||||||
| //'
 | //'
 | ||||||
| @ -141,6 +142,7 @@ double gradient(const arma::mat& X, | |||||||
| //'     orthogonal space spaned by \code{V}.
 | //'     orthogonal space spaned by \code{V}.
 | ||||||
| //'
 | //'
 | ||||||
| //' @rdname optStiefel
 | //' @rdname optStiefel
 | ||||||
|  | //' @keywords internal
 | ||||||
| double optStiefel( | double optStiefel( | ||||||
|         const arma::mat& X, |         const arma::mat& X, | ||||||
|         const arma::vec& Y, |         const arma::vec& Y, | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user