From 4bc9ca2f583613507ba0d804212e0f413d08f8fc Mon Sep 17 00:00:00 2001 From: daniel Date: Fri, 9 Aug 2019 23:34:37 +0200 Subject: [PATCH] init --- CVE/DESCRIPTION | 13 ++ CVE/NAMESPACE | 16 ++ CVE/R/datasets.R | 107 ++++++++++ CVE/src/Makevars | 14 ++ CVE/src/Makevars.win | 14 ++ CVE_legacy/M1.R | 66 +++++++ CVE_legacy/M2.R | 60 ++++++ CVE_legacy/M3.R | 65 ++++++ CVE_legacy/M4.R | 76 +++++++ CVE_legacy/M5.R | 64 ++++++ CVE_legacy/function_script.R | 374 +++++++++++++++++++++++++++++++++++ cve_V0.R | 174 ++++++++++++++++ cve_V1.cpp | 318 +++++++++++++++++++++++++++++ cve_V2.cpp | 365 ++++++++++++++++++++++++++++++++++ runtime_tests_grad.cpp | 368 ++++++++++++++++++++++++++++++++++ samplePackage/DESCRIPTION | 14 -- samplePackage/NAMESPACE | 5 - samplePackage/R/S3.R | 40 ---- samplePackage/R/S4.R | 58 ------ validate.R | 104 ++++++++++ 20 files changed, 2198 insertions(+), 117 deletions(-) create mode 100644 CVE/DESCRIPTION create mode 100644 CVE/NAMESPACE create mode 100644 CVE/R/datasets.R create mode 100644 CVE/src/Makevars create mode 100644 CVE/src/Makevars.win create mode 100755 CVE_legacy/M1.R create mode 100755 CVE_legacy/M2.R create mode 100755 CVE_legacy/M3.R create mode 100755 CVE_legacy/M4.R create mode 100755 CVE_legacy/M5.R create mode 100755 CVE_legacy/function_script.R create mode 100644 cve_V0.R create mode 100644 cve_V1.cpp create mode 100644 cve_V2.cpp create mode 100644 runtime_tests_grad.cpp delete mode 100644 samplePackage/DESCRIPTION delete mode 100644 samplePackage/NAMESPACE delete mode 100644 samplePackage/R/S3.R delete mode 100644 samplePackage/R/S4.R create mode 100644 validate.R diff --git a/CVE/DESCRIPTION b/CVE/DESCRIPTION new file mode 100644 index 0000000..a07883c --- /dev/null +++ b/CVE/DESCRIPTION @@ -0,0 +1,13 @@ +Package: CVE +Type: Package +Title: Conditional Variance Estimator for Sufficient Dimension Reduction +Version: 1.0 +Date: 2019-07-29 +Author: Loki +Maintainer: Loki +Description: More about what it does (maybe more than one line) +License: What license is it under? +Imports: Rcpp (>= 1.0.2) +LinkingTo: Rcpp, RcppArmadillo +Encoding: UTF-8 +RoxygenNote: 6.1.1 diff --git a/CVE/NAMESPACE b/CVE/NAMESPACE new file mode 100644 index 0000000..3c3674b --- /dev/null +++ b/CVE/NAMESPACE @@ -0,0 +1,16 @@ +# Generated by roxygen2: do not edit by hand + +export(cve) +export(cve_cpp) +export(dataset) +export(estimate.bandwidth) +export(index_test) +export(kron_test) +export(rStiefel) +export(test1) +export(test2) +export(test3) +export(test4) +import(Rcpp) +importFrom(Rcpp,evalCpp) +useDynLib(CVE) diff --git a/CVE/R/datasets.R b/CVE/R/datasets.R new file mode 100644 index 0000000..237ae8f --- /dev/null +++ b/CVE/R/datasets.R @@ -0,0 +1,107 @@ +#' Generates test datasets. +#' +#' Provides sample datasets. There are 5 different datasets named +#' M1, M2, M3, M4 and M5 describet in the paper references below. +#' The general model is given by: +#' \deqn{Y ~ g(B'X) + \epsilon} +#' +#' @param name One of \code{"M1"}, \code{"M2"}, \code{"M3"}, \code{"M4"} or \code{"M5"} +#' @param n nr samples +#' @param p Dim. of random variable \code{X}. +#' @param p.mix Only for \code{"M4"}, see: below. +#' @param lambda Only for \code{"M4"}, see: below. +#' +#' @return List with elements +#' \item{X}{data} +#' \item{Y}{response} +#' \item{B}{Used dim-reduction matrix} +#' \item{name}{Name of the dataset (name parameter)} +#' +#' @section M1: +#' The data follows \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} for a subspace +#' dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points. +#' The link function \eqn{g} is given as +#' \deqn{g(x) = \frac{x_1}{0.5 + (x_2 + 1.5)^2} + 0.5\epsilon}{g(x) = x_1 / (0.5 + (x_2 + 1.5)^2) + 0.5 epsilon} +#' @section M2: +#' \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} with \eqn{k = 2} with a default of \eqn{n = 200} data points. +#' The link function \eqn{g} is given as +#' \deqn{g(x) = x_1 x_2^2 + 0.5\epsilon}{g(x) = x_1 x_2^2 + 0.5 epsilon} +#' @section M3: +#' TODO: +#' @section M4: +#' TODO: +#' @section M5: +#' TODO: +#' +#' @export +#' +#' @references Fertl Likas, Bura Efstathia. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019 +dataset <- function(name = "M1", n, B, p.mix = 0.3, lambda = 1.0) { + # validate parameters + stopifnot(name %in% c("M1", "M2", "M3", "M4", "M5")) + + # set default values if not supplied + if (missing(n)) { + n <- if (name %in% c("M1", "M2")) 200 else if (name != "M5") 100 else 42 + } + if (missing(B)) { + p <- 12 + if (name == "M1") { + B <- cbind( + c( 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0), + c( 1,-1, 1,-1, 1,-1, 0, 0, 0, 0, 0, 0) + ) / sqrt(6) + } else if (name == "M2") { + B <- cbind( + c(c(1, 0), rep(0, 10)), + c(c(0, 1), rep(0, 10)) + ) + } else { + B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, 6)), 12, 1) + } + } else { + p <- dim(B)[1] + # validate col. nr to match dataset `k = dim(B)[2]` + stopifnot( + name %in% c("M1", "M2") && dim(B)[2] == 2, + name %in% c("M3", "M4", "M5") && dim(B)[2] == 1 + ) + } + + # set link function `g` for model `Y ~ g(B'X) + epsilon` + if (name == "M1") { + g <- function(BX) { BX[1] / (0.5 + (BX[2] + 1.5)^2) } + } else if (name == "M2") { + g <- function(BX) { BX[1] * BX[2]^2 } + } else if (name %in% c("M3", "M4")) { + g <- function(BX) { cos(BX[1]) } + } else { # name == "M5" + g <- function(BX) { 2 * log(abs(BX[1]) + 1) } + } + + # compute X + if (name != "M4") { + # compute root of the covariance matrix according the dataset + if (name %in% c("M1", "M3")) { + # Variance-Covariance structure for `X ~ N_p(0, \Sigma)` with + # `\Sigma_{i, j} = 0.5^{|i - j|}`. + Sigma <- matrix(0.5^abs(kronecker(1:p, 1:p, '-')), p, p) + # decompose Sigma to Sigma.root^T Sigma.root = Sigma for usage in creation of `X` + Sigma.root <- chol(Sigma) + } else { # name %in% c("M2", "M5") + Sigma.root <- diag(rep(1, p)) # d-dim identity + } + # data `X` as multivariate random normal variable with + # variance matrix `Sigma`. + X <- replicate(p, rnorm(n, 0, 1)) %*% Sigma.root + } else { # name == "M4" + X <- t(replicate(100, rep((1 - 2 * rbinom(1, 1, p.mix)) * lambda, p) + rnorm(p, 0, 1))) + } + + # responce `y ~ g(B'X) + epsilon` with `epsilon ~ N(0, 1 / 2)` + Y <- apply(X, 1, function(X_i) { + g(t(B) %*% X_i) + rnorm(1, 0, 0.5) + }) + + return(list(X = X, Y = Y, B = B, name = name)) +} diff --git a/CVE/src/Makevars b/CVE/src/Makevars new file mode 100644 index 0000000..d3e3f41 --- /dev/null +++ b/CVE/src/Makevars @@ -0,0 +1,14 @@ + +## With R 3.1.0 or later, you can uncomment the following line to tell R to +## enable compilation with C++11 (where available) +## +## Also, OpenMP support in Armadillo prefers C++11 support. However, for wider +## availability of the package we do not yet enforce this here. It is however +## recommended for client packages to set it. +## +## And with R 3.4.0, and RcppArmadillo 0.7.960.*, we turn C++11 on as OpenMP +## support within Armadillo prefers / requires it +CXX_STD = CXX11 + +PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) +PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) diff --git a/CVE/src/Makevars.win b/CVE/src/Makevars.win new file mode 100644 index 0000000..d3e3f41 --- /dev/null +++ b/CVE/src/Makevars.win @@ -0,0 +1,14 @@ + +## With R 3.1.0 or later, you can uncomment the following line to tell R to +## enable compilation with C++11 (where available) +## +## Also, OpenMP support in Armadillo prefers C++11 support. However, for wider +## availability of the package we do not yet enforce this here. It is however +## recommended for client packages to set it. +## +## And with R 3.4.0, and RcppArmadillo 0.7.960.*, we turn C++11 on as OpenMP +## support within Armadillo prefers / requires it +CXX_STD = CXX11 + +PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) +PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) diff --git a/CVE_legacy/M1.R b/CVE_legacy/M1.R new file mode 100755 index 0000000..9f2c73c --- /dev/null +++ b/CVE_legacy/M1.R @@ -0,0 +1,66 @@ +#initialize model parameterss +m<-100 #number of replications in simulation +dim<-12 #dimension of random variable X +truedim<-2 #dimension of B=b +qs<-dim-truedim # dimension of orthogonal complement of B +b1=c(1,1,1,1,1,1,0,0,0,0,0,0)/sqrt(6) +b2=c(1,-1,1,-1,1,-1,0,0,0,0,0,0)/sqrt(6) +b<-cbind(b1,b2) +P<-b%*%t(b) +sigma=0.5 #error standard deviation +N<-200 #sample size +K<-30 #number of arbitrary starting values for curvilinear optimization +MAXIT<-50 #maximal number of iterations in curvilinear search algorithm +##initailaize true covariancematrix of X +Sig<-mat.or.vec(dim,dim) +for (i in 1:dim){ + for (j in 1:dim){ + Sig[i,j]<-sigma^abs(i-j) + } +} +Sroot<-chol(Sig) + +M1_JASA<-mat.or.vec(m,8) +colnames(M1_JASA)<-c('CVE1','CVE2','CVE3','meanMAVE','csMAVE','phd','sir','save') +#link function for M1 +fM1<-function(x){return(x[1]/(0.5+(x[2]+1.5)^2))} +for (i in 1:m){ + #generate dat according to M1 + dat<-creat_sample_nor_nonstand(b,N,fM1,t(Sroot),sigma) + #est sample covariance matrix + Sig_est<-est_varmat(dat[,-1]) + #est trace of sample covariance matrix + tr<-var_tr(Sig_est) + #calculates Vhat_k for CVE1,CVE2, CVE3 for k=qs + CVE1<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.8),tr=tr),maxit = MAXIT,sclack_para = 0) + CVE2<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(2/3),tr=tr),maxit = MAXIT,sclack_para = 0) + CVE3<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.5),tr=tr),maxit = MAXIT,sclack_para = 0) + #calculate orthogonal complement of Vhat_k + #i.e. CVE1$est_base[,1:truedim] is estimator for B with dimension (dim times (dim-qs)) + CVE1$est_base<-fill_base(CVE1$est_base) + CVE2$est_base<-fill_base(CVE2$est_base) + CVE3$est_base<-fill_base(CVE3$est_base) + # calculate distance between true B and estimated B + M1_JASA[i,1]<-subspace_dist(CVE1$est_base[,1:truedim],b) + M1_JASA[i,2]<-subspace_dist(CVE2$est_base[,1:truedim],b) + M1_JASA[i,3]<-subspace_dist(CVE3$est_base[,1:truedim],b) + #meanMAVE + mod_t2<-mave(Y~.,data=as.data.frame(dat),method = 'meanMAVE') + M1_JASA[i,4]<-subspace_dist(mod_t2$dir[[truedim]],b) + #csMAVE + mod_t<-mave(Y~.,data=as.data.frame(dat),method = 'csMAVE') + M1_JASA[i,5]<-subspace_dist(mod_t$dir[[truedim]],b) + #phd + test4<-summary(dr(Y~.,data=as.data.frame(dat),method='phdy',numdir=truedim+1)) + M1_JASA[i,6]<-subspace_dist(orth(test4$evectors[,1:truedim]),b) + #sir + test5<-summary(dr(Y~.,data=as.data.frame(dat),method='sir',numdir=truedim+1)) + M1_JASA[i,7]<-subspace_dist(orth(test5$evectors[,1:truedim]),b) + #save + test3<-summary(dr(Y~.,data=as.data.frame(dat),method='save',numdir=truedim+1)) + M1_JASA[i,8]<-subspace_dist(orth(test3$evectors[,1:truedim]),b) + print(paste(i,paste('/',m))) +} +boxplot(M1_JASA[,]/sqrt(2*truedim),names=colnames(M1_JASA),ylab='err',main='M1') +summary(M1_JASA[,]) + diff --git a/CVE_legacy/M2.R b/CVE_legacy/M2.R new file mode 100755 index 0000000..5232dc8 --- /dev/null +++ b/CVE_legacy/M2.R @@ -0,0 +1,60 @@ +#initialize model parameterss +m<-100 #number of replications in simulation +dim<-12 #dimension of random variable X +truedim<-2 #dimension of B=b +qs<-dim-truedim # dimension of orthogonal complement of B +b1=c(1,rep(0,dim-1)) +b2=c(0,1,rep(0,dim-2)) +b<-cbind(b1,b2) #B +P<-b%*%t(b) +sigma=0.5 #error standard deviation +N<-200 #sample size +K<-30 #number of arbitrary starting values for curvilinear optimization +MAXIT<-50 #maximal number of iterations in curvilinear search algorithm + + + +M2_JASA<-mat.or.vec(m,8) +colnames(M2_JASA)<-c('CVE1','CVE2','CVE3','meanMAVE','csMAVE','phd','sir','save') +f_link<-function(x){return(x[1]*(x[2])^2)} +for (i in 1:m){ + #generate dat according to M2 + dat<-creat_sample(b,N,f_link,sigma) + #est sample covariance matrix + Sig_est<-est_varmat(dat[,-1]) + #est trace of sample covariance matrix + tr<-var_tr(Sig_est) + #CVE + #calculates Vhat_k for CVE1,CVE2, CVE3 for k=qs + CVE1<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.8),tr=tr),maxit = MAXIT,sclack_para = 0) + CVE2<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(2/3),tr=tr),maxit = MAXIT,sclack_para = 0) + CVE3<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.5),tr=tr),maxit = MAXIT,sclack_para = 0) + #calculate orthogonal complement of Vhat_k + #i.e. CVE1$est_base[,1:truedim] is estimator for B with dimension (dim times (dim-qs)) + CVE1$est_base<-fill_base(CVE1$est_base) + CVE2$est_base<-fill_base(CVE2$est_base) + CVE3$est_base<-fill_base(CVE3$est_base) + # calculate distance between true B and estimated B + M2_JASA[i,1]<-subspace_dist(CVE1$est_base[,1:truedim],b) + M2_JASA[i,2]<-subspace_dist(CVE2$est_base[,1:truedim],b) + M2_JASA[i,3]<-subspace_dist(CVE3$est_base[,1:truedim],b) + #meanMAVE + mod_t2<-mave(Y~.,data=as.data.frame(dat),method = 'meanMAVE') + M2_JASA[i,4]<-subspace_dist(mod_t2$dir[[truedim]],b) + #csMAVE + mod_t<-mave(Y~.,data=as.data.frame(dat),method = 'csMAVE') + M2_JASA[i,5]<-subspace_dist(mod_t$dir[[truedim]],b) + #phd + test4<-summary(dr(Y~.,data=as.data.frame(dat),method='phdy',numdir=truedim+1)) + M2_JASA[i,6]<-subspace_dist(orth(test4$evectors[,1:truedim]),b) + #sir + test5<-summary(dr(Y~.,data=as.data.frame(dat),method='sir',numdir=truedim+1)) + M2_JASA[i,7]<-subspace_dist(orth(test5$evectors[,1:truedim]),b) + #save + test3<-summary(dr(Y~.,data=as.data.frame(dat),method='save',numdir=truedim+1)) + M2_JASA[i,8]<-subspace_dist(orth(test3$evectors[,1:truedim]),b) + print(paste(i,paste('/',m))) +} +boxplot(M2_JASA[,]/sqrt(2*truedim),names=colnames(M2_JASA),ylab='err',main='M2') +summary(M2_JASA[,]) + diff --git a/CVE_legacy/M3.R b/CVE_legacy/M3.R new file mode 100755 index 0000000..89735c0 --- /dev/null +++ b/CVE_legacy/M3.R @@ -0,0 +1,65 @@ +#initialize model parameterss +m<-100 #number of replications in simulation +dim<-12 #dimension of random variable X +truedim<-1#dimension of B=b +qs<-dim-truedim# dimension of orthogonal complement of B +b1=c(1,1,1,1,1,1,0,0,0,0,0,0)/sqrt(6) +b<-b1#B +P<-b%*%t(b) +sigma=0.5#error standard deviation +N<-100#sample size +K<-30#number of arbitrary starting values for curvilinear optimization +MAXIT<-30#maximal number of iterations in curvilinear search algorithm +##initailaize true covariancematrix of X +Sig<-mat.or.vec(dim,dim) +for (i in 1:dim){ + for (j in 1:dim){ + Sig[i,j]<-sigma^abs(i-j) + } +} +Sroot<-chol(Sig) + +M3_JASA<-mat.or.vec(m,8) +colnames(M3_JASA)<-c('CVE1','CVE2','CVE3','meanMAVE','csMAVE','phd','sir','save') +for (i in 1:m){ + #generate dat according to M3 + dat<-creat_sample_nor_nonstand(b,N,cos,t(Sroot),sigma) # distribution + #est sample covariance matrix + Sig_est<-est_varmat(dat[,-1]) + #est trace of sample covariance matrix + tr<-var_tr(Sig_est) + #CVE + #calculates Vhat_k for CVE1,CVE2, CVE3 for k=qs + CVE1<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.8),tr=tr),maxit = MAXIT,sclack_para = 0) + CVE2<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(2/3),tr=tr),maxit = MAXIT,sclack_para = 0) + CVE3<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.5),tr=tr),maxit = MAXIT,sclack_para = 0) + #calculate orthogonal complement of Vhat_k + #i.e. CVE1$est_base[,1:truedim] is estimator for B with dimension (dim times (dim-qs)) + CVE1$est_base<-fill_base(CVE1$est_base) + CVE2$est_base<-fill_base(CVE2$est_base) + CVE3$est_base<-fill_base(CVE3$est_base) + # calculate distance between true B and estimated B + M3_JASA[i,1]<-subspace_dist(CVE1$est_base[,1:truedim],b) + M3_JASA[i,2]<-subspace_dist(CVE2$est_base[,1:truedim],b) + M3_JASA[i,3]<-subspace_dist(CVE3$est_base[,1:truedim],b) + #meanMAVE + mod_t2<-mave(Y~.,data=as.data.frame(dat),method = 'meanMAVE') + M3_JASA[i,4]<-subspace_dist(mod_t2$dir[[truedim]],b) + #csMAVE + mod_t<-mave(Y~.,data=as.data.frame(dat),method = 'csMAVE') + M3_JASA[i,5]<-subspace_dist(mod_t$dir[[truedim]],b) + #phd + test4<-summary(dr(Y~.,data=as.data.frame(dat),method='phdy',numdir=truedim+1)) + M3_JASA[i,6]<-subspace_dist(orth(test4$evectors[,1:truedim]),b) + #sir + test5<-summary(dr(Y~.,data=as.data.frame(dat),method='sir',numdir=truedim+1)) + M3_JASA[i,7]<-subspace_dist(orth(test5$evectors[,1:truedim]),b) + #save + test3<-summary(dr(Y~.,data=as.data.frame(dat),method='save',numdir=truedim+1)) + M3_JASA[i,8]<-subspace_dist(orth(test3$evectors[,1:truedim]),b) + + print(paste(i,paste('/',m))) +} +boxplot(M3_JASA[,]/sqrt(2*truedim),names=colnames(M3_JASA),ylab='err',main='M3') +summary(M3_JASA[,]) + diff --git a/CVE_legacy/M4.R b/CVE_legacy/M4.R new file mode 100755 index 0000000..23db3ad --- /dev/null +++ b/CVE_legacy/M4.R @@ -0,0 +1,76 @@ +#initialize model parameters +m<-100 #number of replications in simulation +dim<-12 #dimension of random variable X +truedim<-1#dimension of B=b +qs<-dim-truedim# dimension of orthogonal complement of B +b1=c(1,1,1,1,1,1,0,0,0,0,0,0)/sqrt(6) +b<-b1#B +P<-b%*%t(b) +sigma=0.5#error standard deviation +N<-100#sample size +K<-30#number of arbitrary starting values for curvilinear optimization +MAXIT<-30#maximal number of iterations in curvilinear search algorithm + +para<-seq(0,1.5,0.5)#model parameter corresponding to lambda in M4 +mix_prob<-seq(0.3,0.5,0.1) #model parameters corresponding to p_{mix} +M4_JASA<-array(data=NA,dim=c(length(para),length(mix_prob),m,5)) +for(o in 1:length(mix_prob)){ + for (u in 1:length(para)){ + colnames(M4_JASA[u,o,,])<-c('CVE1','CVE2','CVE3','meanMAVE','csMAVE') + for (i in 1:m){ + #generate dat according to M4 with p_{mix}=mix_prob[o] and lambda=para[u] + if(u==1){dat<-creat_sample(b,N,cos,sigma)} + else{dat<-creat_sample_noneliptic_gausmixture(b,N,cos,sigma,mix_prob[o],dispers_para = para[u])} + #est sample covariance matrix + Sig_est<-est_varmat(dat[,-1]) + #est trace of sample covariance matrix + tr<-var_tr(Sig_est) + #CVE + #calculates Vhat_k for CVE1,CVE2, CVE3 for k=qs + CVE1<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.8),tr=tr),maxit = MAXIT,sclack_para = 0) + CVE2<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(2/3),tr=tr),maxit = MAXIT,sclack_para = 0) + CVE3<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.5),tr=tr),maxit = MAXIT,sclack_para = 0) + #calculate orthogonal complement of Vhat_k + #i.e. CVE1$est_base[,1:truedim] is estimator for B with dimension (dim times (dim-qs)) + CVE1$est_base<-fill_base(CVE1$est_base) + CVE2$est_base<-fill_base(CVE2$est_base) + CVE3$est_base<-fill_base(CVE3$est_base) + # calculate distance between true B and estimated B + M4_JASA[u,o,i,1]<-subspace_dist(b,CVE1$est_base[,1:truedim]) + M4_JASA[u,o,i,2]<-subspace_dist(b,CVE2$est_base[,1:truedim]) + M4_JASA[u,o,i,3]<-subspace_dist(b,CVE3$est_base[,1:truedim]) + #csMAVE + mod_t<-mave(Y~.,data=as.data.frame(dat),method = 'meanMAVE') + M4_JASA[u,o,i,4]<-subspace_dist(b,mod_t$dir[[truedim]]) + #meanMAVE + mod_t2<-mave(Y~.,data=as.data.frame(dat),method = 'csMAVE') + M4_JASA[u,o,i,5]<-subspace_dist(b,mod_t2$dir[[truedim]]) + + } + print(paste(u,paste('/',length(para)))) + } +} + +par(mfrow=c(3,4)) +boxplot(Exp_cook_square[1,1,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','meanMAVE','csMAVE'),main=paste(paste('dispersion =',para[1]),paste('mixprob =',mix_prob[1]),sep=', '),ylim=c(0,1)) +boxplot(Exp_cook_square[2,1,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[2]),paste('mixprob =',mix_prob[1]),sep=', '),ylim=c(0,1)) +boxplot(Exp_cook_square[3,1,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[3]),paste('mixprob =',mix_prob[1]),sep=', '),ylim=c(0,1)) +boxplot(Exp_cook_square[4,1,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[4]),paste('mixprob =',mix_prob[1]),sep=', '),ylim=c(0,1)) + +boxplot(Exp_cook_square[1,2,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[1]),paste('mixprob =',mix_prob[2]),sep=', '),ylim=c(0,1)) +boxplot(Exp_cook_square[2,2,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[2]),paste('mixprob =',mix_prob[2]),sep=', '),ylim=c(0,1)) +boxplot(Exp_cook_square[3,2,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[3]),paste('mixprob =',mix_prob[2]),sep=', '),ylim=c(0,1)) +boxplot(Exp_cook_square[4,2,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[4]),paste('mixprob =',mix_prob[2]),sep=', '),ylim=c(0,1)) + +boxplot(Exp_cook_square[1,3,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[1]),paste('mixprob =',mix_prob[3]),sep=', '),ylim=c(0,1)) +boxplot(Exp_cook_square[2,3,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[2]),paste('mixprob =',mix_prob[3]),sep=', '),ylim=c(0,1)) +boxplot(Exp_cook_square[3,3,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[3]),paste('mixprob =',mix_prob[3]),sep=', '),ylim=c(0,1)) +boxplot(Exp_cook_square[4,3,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[4]),paste('mixprob =',mix_prob[3]),sep=', '),ylim=c(0,1)) +#boxplot(Exp_cook_square[5,3,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[5]),paste('mixprob =',mix_prob[3]),sep=', '),ylim=c(0,1)) + +boxplot(Exp_cook_square[1,4,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[1]),paste('mixprob =',mix_prob[4]),sep=', '),ylim=c(0,1)) +boxplot(Exp_cook_square[2,4,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[2]),paste('mixprob =',mix_prob[4]),sep=', '),ylim=c(0,1)) +boxplot(Exp_cook_square[3,4,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[3]),paste('mixprob =',mix_prob[4]),sep=', '),ylim=c(0,1)) +boxplot(Exp_cook_square[4,4,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[4]),paste('mixprob =',mix_prob[4]),sep=', '),ylim=c(0,1)) + +par(mfrow=c(1,1)) diff --git a/CVE_legacy/M5.R b/CVE_legacy/M5.R new file mode 100755 index 0000000..2dfb2f2 --- /dev/null +++ b/CVE_legacy/M5.R @@ -0,0 +1,64 @@ +#initialize model parameters +m<-500#number of replications in simulation +dim<-12#dimension of random variable X +truedim<-1#dimension of B=b +qs<-dim-truedim# dimension of orthogonal complement of B +b1=c(1,1,1,1,1,1,0,0,0,0,0,0)/sqrt(6) +b<-b1#B +P<-b%*%t(b) +sigma=0.5#error standard deviation +N<-42#sample size +K<-70#number of arbitrary starting values for curvilinear optimization +MAXIT<-50#maximal number of iterations in curvilinear search algorithm + +f_ln1<-function(x){return(2*log(abs(x)+1))}#link function +M5_JASA_lowN<-mat.or.vec(m,8) +colnames(M5_JASA_lowN)<-c('CVE1','CVE2','CVE3','meanMAVE','csMAVE','phd','sir','save') +for (i in 1:m){ + #generate dat according to M5 + dat<-creat_sample(b,N,f_ln1,sigma) + #est sample covariance matrix + Sig_est<-est_varmat(dat[,-1]) + #est trace of sample covariance matrix + tr<-var_tr(Sig_est) + #CVE + #calculates Vhat_k for CVE1,CVE2, CVE3 for k=qs + CVE1<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.8),tr=tr),maxit = MAXIT,sclack_para = 0) + CVE2<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(2/3),tr=tr),maxit = MAXIT,sclack_para = 0) + CVE3<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.5),tr=tr),maxit = MAXIT,sclack_para = 0) + #calculate orthogonal complement of Vhat_k + #i.e. CVE1$est_base[,1:truedim] is estimator for B with dimension (dim times (dim-qs)) + CVE1$est_base<-fill_base(CVE1$est_base) + CVE2$est_base<-fill_base(CVE2$est_base) + CVE3$est_base<-fill_base(CVE3$est_base) + # calculate distance between true B and estimated B + M5_JASA_lowN[i,1]<-subspace_dist(CVE1$est_base[,1:truedim],b) + M5_JASA_lowN[i,2]<-subspace_dist(CVE2$est_base[,1:truedim],b) + M5_JASA_lowN[i,3]<-subspace_dist(CVE3$est_base[,1:truedim],b) + #meanMAVE + mod_t2<-mave(Y~.,data=as.data.frame(dat),method = 'meanMAVE') + M5_JASA_lowN[i,4]<-subspace_dist(mod_t2$dir[[truedim]],b) + #csMAVE + mod_t<-mave(Y~.,data=as.data.frame(dat),method = 'csMAVE') + M5_JASA_lowN[i,5]<-subspace_dist(mod_t$dir[[truedim]],b) + #phd + test4<-summary(dr(Y~.,data=as.data.frame(dat),method='phdy',numdir=truedim+1)) + M5_JASA_lowN[i,6]<-subspace_dist(orth(test4$evectors[,1:truedim]),b) + #sir + test5<-summary(dr(Y~.,data=as.data.frame(dat),method='sir',numdir=truedim+1)) + M5_JASA_lowN[i,7]<-subspace_dist(orth(test5$evectors[,1:truedim]),b) + #save + test3<-summary(dr(Y~.,data=as.data.frame(dat),method='save',numdir=truedim+1)) + M5_JASA_lowN[i,8]<-subspace_dist(orth(test3$evectors[,1:truedim]),b) + + print(paste(i,paste('/',m))) +} +boxplot(M5_JASA_lowN[,]/sqrt(2*truedim),names=colnames(M5_JASA_lowN),ylab='err',main='M5') +summary(M5_JASA_lowN[,]) + +#### + +# par(mfrow=c(1,2)) +# boxplot(M3_JASA_L[,]/sqrt(2*1),names=colnames(M3_JASA_L),ylab='err',main='M3') +# boxplot(M5_JASA_lowN[1:100,-c(4,5,6)]/sqrt(2*1),names=colnames(M5_JASA_lowN)[-c(4,5,6)],ylab='err',main='M5') +# par(mfrow=c(1,1)) diff --git a/CVE_legacy/function_script.R b/CVE_legacy/function_script.R new file mode 100755 index 0000000..e21f429 --- /dev/null +++ b/CVE_legacy/function_script.R @@ -0,0 +1,374 @@ +library("Matrix") +library("fields") +library('dr') +library('mvtnorm') +library(mgcv) +library(pracma) +library(RVAideMemoire) +library("MAVE") +library(pls) +library(LaplacesDemon) +library(earth) +library("latex2exp") + +################################################# + +############################## + +################################ + + +fsquare<-function(x){ + ret<-0 + for (h in 1:length(x)){ret<-ret+x[h]^2} + return(ret) +} +f_id<-function(x){return(x[1])} +vfun<-function(x)return(c(cos(x),sin(x))) +vfun_grad<-function(x)return(c(-sin(x),cos(x))) +f_true<-function(x,sigma){return(sigma^2 + cos(x)^2)} +############################### +##creat sample for M2, b is B matrix of model, N..sample size, +#f...link function, sigma..standard deviation of error term +#produces sample with X standard normal with dimension to columnlength of b an +# epsilon centered normal with standard deviation sigma +creat_sample<-function(b,N,f,sigma){ + if (is.vector(b)==1){dim<-length(b)} + if (is.vector(b)==0){dim<-length(b[,1])} + + X<-mat.or.vec(N,dim) + Y<-mat.or.vec(N,1) + for (i in 1:N){ + X[i,]<-rnorm(dim,0,1) + Y[i]<-f(t(b)%*%X[i,])+rnorm(1,0,sigma) + } + dat<-cbind(Y,X) + return(dat) +} +##### +creat_b<-function(dim,n){ + temp<-mat.or.vec(dim,n) + + if (n>1){ + while (rankMatrix(temp,method = 'qr')< min(dim,n)){ + for (i in 1:n){ + temp[,i]<-rnorm(dim,0,1) + temp[,i]<-temp[,i]/sqrt(sum(temp[,i]^2)) + } + } + } + if (n==1) {temp<-rnorm(dim,0,1) + temp<-temp/sqrt(sum(temp^2))} + return(temp) +} +### + +### creat sample for M1,M3 +## b is B matrix of model, N..sample size, +#f...link function, sigma..standard deviation of error term +#produces sample with X normal with Covariancematrix SS' with dimension to columnlength of b an +# epsilon centered normal with standard deviation sigma +creat_sample_nor_nonstand<-function(b,N,f,S,sigma){ + #Var(x)= SS' + if (is.vector(b)==1){dim<-length(b)} + if (is.vector(b)==0){dim<-length(b[,1])} + + X<-mat.or.vec(N,dim) + Y<-mat.or.vec(N,1) + for (i in 1:N){ + X[i,]<-rnorm(dim,0,1)%*%t(S) + Y[i]<-f(t(b)%*%X[i,])+rnorm(1,0,sigma) + } + dat<-cbind(Y,X) + return(dat) +} +#### + +#### + +### +#### + +#### +#### creat sample for M4 +##creat sample for M2, b is B matrix of model, N..sample size, +#f...link function, sigma..standard deviation of error term +#produces sample with X Gausian mixture +#(i.e. X ~ (Zrep(-1,dim) + (1_Z)rep(1,dim))*dispers_para + N(0, I_p) and Z~B(pi) +#with dimension to columnlength of b an +# epsilon centered normal with standard deviation sigma +creat_sample_noneliptic_gausmixture<-function(b,N,f,sigma,pi,dispers_para=1){ + if (is.vector(b)==1){dim<-length(b)} + if (is.vector(b)==0){dim<-length(b[,1])} + + X<-mat.or.vec(N,dim) + Y<-mat.or.vec(N,1) + for (i in 1:N){ + p<-rbinom(1,1,pi) + X[i,]<-(-rep(1,dim)*p+rep(1,dim)*(1-p))*dispers_para+rnorm(dim,0,1) + Y[i]<-f(t(b)%*%X[i,])+rnorm(1,0,sigma) + } + dat<-cbind(Y,X) + return(dat) +} +### +############################### +#estimates covariance matrix by ML estimate +# X...n times dim data matrix with rows corresponding to X_i +est_varmat<-function(X){ + dim<-length(X[1,]) + Sig<-mat.or.vec(dim,dim)#Cov(X) + X<-scale(X,center = T,scale = F) + Sig<-t(X)%*%X/length(X[,1]) + return(Sig) +} +#### +# normalizes a vector x such that it sums to 1 +column_normalize<-function(x){return(x/sum(x))} +#### +#squared 2-norm of a vector x +norm2<-function(x){return(sum(x^2))} +### +# fills up the matrix estb (dim times q) (dim > q) to a dim times dim orthonormal matrix +fill_base<-function(estb){ + dim<-length(estb[,1]) + k<-length(estb[1,]) + P<-(diag(rep(1,dim))-estb%*%t(estb)) + rk<-0 + while (rk < dim){ + tmp1<-P%*%creat_b(dim,1) + nor<-sqrt(sum(tmp1^2)) + if(nor>10^(-4)){ + estb<-cbind(tmp1/nor,estb) + rkn<-rankMatrix(estb,method = 'qr') + if (rkn > rk){P<-(diag(rep(1,dim))-estb%*%t(estb)) + rk<-rkn} + } + if(nor <=10^(-4)){rk<-0} + + } + + return(estb) +} +################## +###new stiefel optimization +#draws from the uniform measure of Stiefel(p,k) +#returns a p times k dimensional semiorthogonal matrix +stiefl_startval<-function(p,k){return(qr.Q(qr(matrix(rnorm(p*k,0,1),p,k))))} +##### chooses shifting points +#auxiliary function for stielfel_opt +# X... N times dim data matrix +#S... sample covariance matrix +#0 < p <=1 fraction of data point used as shifting points +# if p=1 all data points used as shifting points +q_ind<-function(X,S,p){ + dim<-length(X[1,]) + N<-length(X[,1]) + if (p < 1){ + Sinv<-solve(S) + ind<-c() + bound<-qchisq(p,dim) + for (j in 1:N){if(t(X[j,])%*%Sinv%*%X[j,]<=bound){ind<-c(ind,j)}}#maybe demean + q<-length(ind) + if (q<=2){ + q<-N + ind<-seq(1,N)} + } + else { + q<-N + ind<-seq(1,N) + } + ret<-list(q,ind) + names(ret)<-c('q','ind') + return(ret) +} +#### +#calculates the trace of a quadratic matrix S +var_tr<-function(x){return(sum(diag(x)))} +#### +#chooses bandwith h according to formula in paper +#dim...dimension of X vector +#k... row dim of V (dim times q matrix) corresponding to a basis of orthogonal complement of B in model +# N...sample size +#nObs... nObs in bandwith formula +#tr...trace of sample covariance matrix of X +choose_h_2<-function(dim,k,N,nObs,tr){ + #h1<-qchisq(nObs/N,2*(dim-k)) + h2<-qchisq((nObs-1)/(N-1),(dim-k)) + return(h2*2*tr/dim) +} +#### +#auxiliary function for stiefel_opt +#X... data matrix of random variable X +#q,ind... output of function q_ind +Xl_fun<-function(X,q,ind){ + N<-length(X[,1]) + Xl<-(kronecker(rep(1,q),X)-kronecker(X[ind,],rep(1,N))) + return(Xl) +} +########### +# evaluates L(V) and returns L_n(V),(L_tilde_n(V,X_i))_{i=1,..,n} and grad_V L_n(V) (p times k) +# V... (dim times q) matrix +# Xl... output of Xl_fun +# dtemp...vector with pairwise distances |X_i - X_j| +# q...output of q_ind function +# Y... vector with N Y_i values +# if grad=T, gradient of L(V) also returned +LV<-function(V,Xl,dtemp,h,q,Y,grad=T){ + N<-length(Y) + if(is.vector(V)){k<-1} + else{k<-length(V[1,])} + Xlv<-Xl%*%V + d<-dtemp-((Xlv^2)%*%rep(1,k)) + w<-dnorm(d/h)/dnorm(0) + w<-matrix(w,N,q) + w<-apply(w,2,column_normalize) + mY<-t(w)%*%Y + sig<-t(w)%*%(Y^2)-(mY)^2 # \tilde{L}_n(V, s_0) mit s_0 coresponds to q_ind + if(grad==T){ + grad<-mat.or.vec(dim,k) + tmp1<-(kronecker(sig,rep(1,N))-(as.vector(kronecker(rep(1,q),Y))-kronecker(mY,rep(1,N)))^2) + if(k==1){ + grad_d<- -2*Xl*as.vector(Xlv) + grad<-(1/h^2)*(1/q)*t(grad_d*as.vector(d)*as.vector(w))%*%tmp1 + } + else{ + for (j in 1:(k)){ + grad_d<- -2*Xl*as.vector(Xlv[,j]) + grad[,j]<- (1/h^2)*(1/q)*t(grad_d*as.vector(d)*as.vector(w))%*%tmp1 + } + } + ret<-list(mean(sig),sig,grad) + names(ret)<-c('var','sig','grad') + } + else{ + ret<-list(mean(sig),sig) + names(ret)<-c('var','sig') + } + + return(ret) +} +#### performs stiefle optimization of argmin_{V : V'V=I_k} L_n(V) +#through curvilinear search with k0 starting values drawn uniformly on stiefel maniquefold +#dat...(N times dim+1) matrix with first column corresponding to Y values, the other columns +#consists of X data matrix, (i.e. dat=cbind(Y,X)) +#h... bandwidth +#k...row dimension of V that is calculated, corresponds to dimension of orthogonal complement of B +#k0... number of arbitrary starting values +#p...fraction of data points used as shifting point +#maxit... number of maximal iterations in curvilinear search +#nObs.. nObs parameter for choosing bandwidth if no h is supplied +#lambda_0...initial stepsize +#tol...tolerance for stoping iterations +#sclack_para...if relative improvment is worse than sclack_para the stepsize is reduced +#output: +#est_base...Vhat_k= argmin_V:V'V=I_k L_n(V) a (dim times k) matrix where dim is row-dimension of X data matrix +#var...value of L_n(Vhat_k) +#aov_dat... (L_tilde_n(Vhat_k,X_i))_{i=1,..,N} +#count...number of iterations +#h...bandwidth +#count2...number of times sclack_para is active +stiefl_opt<-function(dat,h=NULL,k,k0=30,p=1,maxit=50,nObs=sqrt(length(dat[,1])),lambda_0=1,tol=10^(-3),sclack_para=0){ + Y<-dat[,1] + X<-dat[,-1] + N<-length(Y) + dim<-length(X[1,]) + S<-est_varmat(X) + if(p<1){ + + tmp1<-q_ind(X,S,p) + q<-tmp1$q + ind<-tmp1$ind + } + else{ + q<-N + ind<-1:N + } + Xl<-(kronecker(rep(1,q),X)-kronecker(X[ind,],rep(1,N))) + dtemp<-apply(Xl,1,norm2) + tr<-var_tr(S) + if(is.null(h)){h<-choose_h(dim,k,N,nObs,tr)} + best<-exp(10000) + Vend<-mat.or.vec(dim,k) + sig<-mat.or.vec(q,1) + for(u in 1:k0){ + Vnew<-Vold<-stiefl_startval(dim,k) + #print(Vold) + #print(LV(Vold,Xl,dtemp,h,q,Y)$var) + Lnew<-Lold<-exp(10000) + lambda<-lambda_0 + err<-10 + count<-0 + count2<-0 + while(err>tol&count sclack_para){#/(count+1)^(0.5) + lambda=lambda/2 + err<-10 + count2<-count2+1 + count<-count-1 + Vnew<-Vold #!!!!! + + } + Vold<-Vnew + count<-count+1 + #print(count) + } + if(best>Lnew){ + best<-Lnew + Vend<-Vnew + sig<-tmp3$sig + } + } + ret<-list(Vend,best,sig,count,h,count2) + names(ret)<-c('est_base','var','aov_dat','count','h','count2') + return(ret) +} +##### calculates distance between two subspaces +#i.e. |b1%*%(b1%*%b1')^{-1}%*%b1' - b2%*%(b2%*%b2')^{-1}%*%b2'|_2 +subspace_dist<-function(b1,b2){ + #b1<-orth(b1) + #b2<-orth(b2) + P1<-b1%*%solve(t(b1)%*%b1)%*%t(b1) + P2<-b2%*%solve(t(b2)%*%b2)%*%t(b2) + return(sqrt(sum((P1-P2)^2))) +} +####performs local linaer regression +#x...datapoint at where estimated function should be evaluated +#h...bandwidth +#dat..(N times dim+1) matrix with first column corresponding to Y values, the other columns +#consists of X data matrix, (i.e. dat=cbind(Y,X)) +#beta...projection matrix +local_linear<-function(x,h,dat,beta){ + Y<-dat[,1] + X<-dat[,-1] + N<-length(Y) + X<-X%*%beta + x<-x%*%beta#beta%*%x + D_mat<-cbind(rep(1,N),X) + if (is.vector(X)){ + dim<-1 + d<-abs(X-rep(x,N)) + } + else{ + dim<-length(X[1,]) + d<-sqrt(apply(X-t(matrix(rep(x,N),dim,N)),1,norm2)) + } + K<-diag(dnorm(d/h)/dnorm(0)) + pred<-c(1,x)%*%solve(t(D_mat)%*%K%*%D_mat)%*%t(D_mat)%*%K%*%Y + return(pred) +} + + + diff --git a/cve_V0.R b/cve_V0.R new file mode 100644 index 0000000..192fcbb --- /dev/null +++ b/cve_V0.R @@ -0,0 +1,174 @@ + +#' Euclidean vector norm (2-norm) +#' +#' @param x Numeric vector +#' @returns Numeric +norm2 <- function(x) { return(sum(x^2)) } + +#' Samples uniform from the Stiefel Manifold +#' +#' @param p row dim. +#' @param q col dim. +#' @returns `(p, q)` semi-orthogonal matrix +rStiefl <- function(p, q) { + return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q)))) +} + +#' Matrix Trace +#' +#' @param M Square matrix +#' @returns Trace \eqn{Tr(M)} +Tr <- function(M) { + return(sum(diag(M))) +} + +#' Null space basis of given matrix `B` +#' +#' @param B `(p, q)` matrix +#' @returns Semi-orthogonal `(p, p - q)` matrix `Q` spaning the null space of `B` +null <- function(M) { + tmp <- qr(M) + set <- if(tmp$rank == 0L) seq_len(ncol(M)) else -seq_len(tmp$rank) + return(qr.Q(tmp, complete = TRUE)[, set, drop = FALSE]) +} + +#### +#chooses bandwith h according to formula in paper +#dim...dimension of X vector +#k... row dim of V (dim times q matrix) corresponding to a basis of orthogonal complement of B in model +# N...sample size +#nObs... nObs in bandwith formula +#tr...trace of sample covariance matrix of X +estimateBandwidth<-function(X, k, nObs) { + n <- nrow(X) + p <- ncol(X) + + X_centered <- scale(X, center = TRUE, scale = FALSE) + Sigma <- (1 / n) * t(X_centered) %*% X_centered + + quantil <- qchisq((nObs - 1) / (n - 1), k) + return(2 * quantil * Tr(Sigma) / p) +} + +########### +# evaluates L(V) and returns L_n(V),(L_tilde_n(V,X_i))_{i=1,..,n} and grad_V L_n(V) (p times k) +# V... (dim times q) matrix +# Xl... output of Xl_fun +# dtemp...vector with pairwise distances |X_i - X_j| +# q...output of q_ind function +# Y... vector with N Y_i values +# if grad=T, gradient of L(V) also returned +LV <- function(V, Xl, dtemp, h, q, Y, grad = TRUE) { + N <- length(Y) + if (is.vector(V)) { k <- 1 } + else { k <- length(V[1,]) } + Xlv <- Xl %*% V + d <- dtemp - ((Xlv^2) %*% rep(1, k)) + w <- dnorm(d / h) / dnorm(0) + w <- matrix(w, N, q) + w <- apply(w, 2, function(x) { x / sum(x) }) + y1 <- t(w) %*% Y + y2 <- t(w) %*% (Y^2) + sig <- y2 - y1^2 + + result <- list(var = mean(sig), sig = sig) + if (grad == TRUE) { + tmp1 <- (kronecker(sig, rep(1, N)) - (as.vector(kronecker(rep(1, q), Y)) - kronecker(y1, rep(1, N)))^2) + if (k == 1) { + grad_d <- -2 * Xl * as.vector(Xlv) + grad <- (1 / h^2) * (1 / q) * t(grad_d * as.vector(d) * as.vector(w)) %*% tmp1 + } else { + grad <- matrix(0, nrow(V), ncol(V)) + for (j in 1:k) { + grad_d <- -2 * Xl * as.vector(Xlv[ ,j]) + grad[ ,j] <- (1 / h^2) * (1 / q) * t(grad_d * as.vector(d) * as.vector(w)) %*% tmp1 + } + } + result$grad = grad + } + + return(result) +} +#### performs stiefle optimization of argmin_{V : V'V=I_k} L_n(V) +#through curvilinear search with k0 starting values drawn uniformly on stiefel maniquefold +#dat...(N times dim+1) matrix with first column corresponding to Y values, the other columns +#consists of X data matrix, (i.e. dat=cbind(Y,X)) +#h... bandwidth +#k...row dimension of V that is calculated, corresponds to dimension of orthogonal complement of B +#k0... number of arbitrary starting values +#p...fraction of data points used as shifting point +#maxIter... number of maximal iterations in curvilinear search +#nObs.. nObs parameter for choosing bandwidth if no h is supplied +#lambda_0...initial stepsize +#tol...tolerance for stoping iterations +#sclack_para...if relative improvment is worse than sclack_para the stepsize is reduced +#output: +#est_base...Vhat_k= argmin_V:V'V=I_k L_n(V) a (dim times k) matrix where dim is row-dimension of X data matrix +#var...value of L_n(Vhat_k) +#aov_dat... (L_tilde_n(Vhat_k,X_i))_{i=1,..,N} +#count...number of iterations +#h...bandwidth +cve_legacy <- function( + X, Y, k, + nObs = sqrt(nrow(X)), + tauInitial = 1.0, + tol = 1e-3, + slack = 0, + maxIter = 50L, + attempts = 10L +) { + # get dimensions + n <- nrow(X) + p <- ncol(X) + q <- p - k + + Xl <- kronecker(rep(1, n), X) - kronecker(X, rep(1, n)) + Xd <- apply(Xl, 1, norm2) + I_p <- diag(1, p) + # estimate bandwidth + h <- estimateBandwidth(X, k, nObs) + + Lbest <- Inf + Vend <- mat.or.vec(p, q) + + for (. in 1:attempts) { + Vnew <- Vold <- rStiefl(p, q) + Lnew <- Lold <- exp(10000) + tau <- tauInitial + + error <- Inf + count <- 0 + while (error > tol & count < maxIter) { + + tmp <- LV(Vold, Xl, Xd, h, n, Y) + G <- tmp$grad + Lold <- tmp$var + + W <- tau * (G %*% t(Vold) - Vold %*% t(G)) + Vnew <- solve(I_p + W) %*% (I_p - W) %*% Vold + + Lnew <- LV(Vnew, Xl, Xd, h, n, Y, grad = FALSE)$var + + if ((Lnew - Lold) > slack * Lold) { + tau = tau / 2 + error <- Inf + } else { + error <- norm(Vold %*% t(Vold) - Vnew %*% t(Vnew), "F") / sqrt(2 * k) + Vold <- Vnew + } + count <- count + 1 + } + + if (Lbest > Lnew) { + Lbest <- Lnew + Vend <- Vnew + } + } + + return(list( + loss = Lbest, + V = Vend, + B = null(Vend), + h = h + )) +} diff --git a/cve_V1.cpp b/cve_V1.cpp new file mode 100644 index 0000000..51c25aa --- /dev/null +++ b/cve_V1.cpp @@ -0,0 +1,318 @@ +// +// Usage: +// ~$ R -e "library(Rcpp); Rcpp::sourceCpp('cve_V1.cpp')" +// + +// only `RcppArmadillo.h` which includes `Rcpp.h` +#include + +// through the depends attribute `Rcpp` is tolled to create +// hooks for `RcppArmadillo` needed by the build process. +// +// [[Rcpp::depends(RcppArmadillo)]] + +// required for `R::qchisq()` used in `estimateBandwidth()` +#include + +//' Estimated bandwidth for CVE. +//' +//' Estimates a propper bandwidth \code{h} according +//' \deqn{h = \chi_{p-q}^{-1}\left(\frac{nObs - 1}{n-1}\right)}\frac{2 tr(\Sigma)}{p}}{% +//' h = qchisq( (nObs - 1)/(n - 1), p - q ) 2 tr(Sigma) / p} +//' +//' @param X data matrix of dimension (n x p) with n data points X_i of dimension +//' q. Therefor each row represents a datapoint of dimension p. +//' @param k Guess for rank(B). +//' @param nObs Ether numeric of a function. If specified as numeric value +//' its used in the computation of the bandwidth directly. If its a function +//' `nObs` is evaluated as \code{nObs(nrow(x))}. The default behaviou if not +//' supplied at all is to use \code{nObs <- nrow(x)^0.5}. +//' +//' @seealso [qchisq()] +//' +//' @export +// [[Rcpp::export]] +double estimateBandwidth(const arma::mat& X, arma::uword k, double nObs) { + using namespace arma; + + uword n = X.n_rows; // nr samples + uword p = X.n_cols; // dimension of rand. var. `X` + + // column mean + mat M = mean(X); + // center `X` + mat C = X.each_row() - M; + // trace of covariance matrix, `traceSigma = Tr(C' C)` + double traceSigma = accu(C % C); + // compute estimated bandwidth + double qchi2 = R::qchisq((nObs - 1.0) / (n - 1), static_cast(k), 1, 0); + + return 2.0 * qchi2 * traceSigma / (p * n); +} + +//' Random element from Stiefel Manifold `S(p, q)`. +//' +//' Draws an element of \eqn{S(p, q)} which is the Stiefel Manifold. +//' This is done by taking the Q-component of the QR decomposition +//' from a `(p, q)` Matrix with independent standart normal entries. +//' As a semi-orthogonal Matrix the result `V` satisfies \eqn{V'V = I_q}. +//' +//' @param p Row dimension +//' @param q Column dimension +//' +//' @returns Matrix of dim `(p, q)`. +//' +//' @seealso +//' +//' @export +// [[Rcpp::export]] +arma::mat rStiefel(arma::uword p, arma::uword q) { + arma::mat Q, R; + arma::qr_econ(Q, R, arma::randn(p, q)); + return Q; +} + +double gradient(const arma::mat& X, + const arma::mat& X_diff, + const arma::mat& Y, + const arma::mat& Y_rep, + const arma::mat& V, + const double h, + arma::mat* G = 0 +) { + using namespace arma; + + uword n = X.n_rows; + uword p = X.n_cols; + + // orthogonal projection matrix `Q = I - VV'` for dist computation + mat Q = -(V * V.t()); + Q.diag() += 1; + // calc pairwise distances as `D` with `D(i, j) = d_i(V, X_j)` + vec D_vec = sum(square(X_diff * Q), 1); + mat D = reshape(D_vec, n, n); + // calc weights as `W` with `W(i, j) = w_i(V, X_j)` + mat W = exp(D / (-2.0 * h)); + // column-wise normalization via 1-norm + W = normalise(W, 1); + vec W_vec = vectorise(W); + // weighted `Y` means (first and second order) + vec y1 = W.t() * Y; + vec y2 = W.t() * square(Y); + // loss for each `X_i`, meaning `L(i) = L_n(V, X_i)` + vec L = y2 - square(y1); + // `loss = L_n(V)` + double loss = mean(L); + // check if gradient as output variable is set + if (G != 0) { + // `G = grad(L_n(V))` a.k.a. gradient of `L` with respect to `V` + vec scale = (repelem(L, n, 1) - square(Y_rep - repelem(y1, n, 1))) % W_vec % D_vec; + mat X_diff_scale = X_diff.each_col() % scale; + (*G) = X_diff_scale.t() * X_diff * V; + (*G) *= -2.0 / (h * h * n); + } + + return loss; +} + +//' Stiefel Optimization. +//' +//' Stiefel Optimization for \code{V} given a dataset \code{X} and responces +//' \code{Y} for the model \deqn{Y\sim g(B'X) + \epsilon}{Y ~ g(B'X) + epsilon} +//' to compute the matrix `B` such that \eqn{span{B} = span(V)^{\bot}}{% +//' span(B) = orth(span(B))}. +//' +//' @param X data points +//' @param Y response +//' @param k assumed \eqn{rank(B)} +//' @param nObs parameter for bandwidth estimation, typical value +//' \code{nObs = nrow(X)^lambda} with \code{lambda} in the range [0.3, 0.8]. +//' @param tau Initial step size +//' @param tol Tolerance for update error used for stopping criterion +//' \eqn{|| V(j) V(j)' - V(j+1) V(j+1)' ||_2 < tol}{% +//' \| V_j V_j' - V_{j+1} V_{j+1}' \|_2 < tol}. +//' @param maxIter Upper bound of optimization iterations +//' +//' @return List containing the bandwidth \code{h}, optimization objective \code{V} +//' and the matrix \code{B} estimated for the model as a orthogonal basis of the +//' orthogonal space spaned by \code{V}. +//' +//' @rdname optStiefel +double optStiefel( + const arma::mat& X, + const arma::vec& Y, + const int k, + const double h, + const double tauInitial, + const double tol, + const double slack, + const int maxIter, + arma::mat& V, // out + arma::vec& history // out +) { + using namespace arma; + + // get dimensions + const uword n = X.n_rows; // nr samples + const uword p = X.n_cols; // dim of random variable `X` + const uword q = p - k; // rank(V) e.g. dim of ortho space of span{B} + + // all `X_i - X_j` differences, `X_diff.row(i * n + j) = X_i - X_j` + mat X_diff(n * n, p); + for (uword i = 0, k = 0; i < n; ++i) { + for (uword j = 0; j < n; ++j) { + X_diff.row(k++) = X.row(i) - X.row(j); + } + } + const vec Y_rep = repmat(Y, n, 1); + const mat I_p = eye(p, p); + + // initial start value for `V` + V = rStiefel(p, q); + + // init optimization `loss`es, `error` and stepsize `tau` + // double loss_next = datum::inf; + double loss; + double error = datum::inf; + double tau = tauInitial; + int count; + // main optimization loop + for (count = 0; count < maxIter && error > tol; ++count) { + // calc gradient `G = grad_V(L)(V)` + mat G; + loss = gradient(X, X_diff, Y, Y_rep, V, h, &G); + // matrix `A` for colescy-transform of the gradient + mat A = tau * ((G * V.t()) - (V * G.t())); + // next iteration step of `V` + mat V_tau = inv(I_p + A) * (I_p - A) * V; + // loss after step `L(V(tau))` + double loss_tau = gradient(X, X_diff, Y, Y_rep, V_tau, h); + + // store `loss` in `history` and increase `count` + history(count) = loss; + + // validate if loss decreased + if ((loss_tau - loss) > slack * loss) { + // ignore step, retry with half the step size + tau = tau / 2.; + error = datum::inf; + } else { + // compute step error (break condition) + error = norm((V * V.t()) - (V_tau * V_tau.t()), 2) / (2 * q); + // shift for next iteration + V = V_tau; + loss = loss_tau; + } + } + + // store final `loss` + history(count) = loss; + + return loss; +} + +//' Conditional Variance Estimation (CVE) method. +//' +//' This version uses a "simple" stiefel optimization schema. +//' +//' @param X data points +//' @param Y response +//' @param k assumed \eqn{rank(B)} +//' @param nObs parameter for bandwidth estimation, typical value +//' \code{nObs = nrow(X)^lambda} with \code{lambda} in the range [0.3, 0.8]. +//' @param tau Initial step size (default 1) +//' @param tol Tolerance for update error used for stopping criterion (default 1e-5) +//' @param slack Ratio of small negative error allowed in loss optimization (default -1e-10) +//' @param maxIter Upper bound of optimization iterations (default 50) +//' @param attempts Number of tryes with new random optimization starting points (default 10) +//' +//' @return List containing the bandwidth \code{h}, optimization objective \code{V} +//' and the matrix \code{B} estimated for the model as a orthogonal basis of the +//' orthogonal space spaned by \code{V}. +//' +//' @rdname cve_cpp_V1 +//' @export +// [[Rcpp::export]] +Rcpp::List cve_cpp( + const arma::mat& X, + const arma::vec& Y, + const int k, + const double nObs, + const double tauInitial = 1., + const double tol = 1e-5, + const double slack = -1e-10, + const int maxIter = 50, + const int attempts = 10 +) { + using namespace arma; + + // tracker of current best results + mat V_best; + double loss_best = datum::inf; + // estimate bandwidth + double h = estimateBandwidth(X, k, nObs); + + // loss history for each optimization attempt + // each column contaions the iteration history for the loss + mat history = mat(maxIter + 1, attempts); + + // multiple stiefel optimization attempts + for (int i = 0; i < attempts; ++i) { + // declare output variables + mat V; // estimated `V` space + vec hist = vec(history.n_rows, fill::zeros); // optimization history + double loss = optStiefel(X, Y, k, h, tauInitial, tol, slack, maxIter, V, hist); + if (loss < loss_best) { + loss_best = loss; + V_best = V; + } + // write history to history collection + history.col(i) = hist; + } + + // get `B` as kernal of `V.t()` + mat B = null(V_best.t()); + + return Rcpp::List::create( + Rcpp::Named("history") = history, + Rcpp::Named("loss") = loss_best, + Rcpp::Named("h") = h, + Rcpp::Named("V") = V_best, + Rcpp::Named("B") = B + ); +} + +/*** R + +source("CVE/R/datasets.R") +ds <- dataset() + +print(system.time( + cve.res <- cve_cpp( + X = ds$X, + Y = ds$Y, + k = ncol(ds$B), + nObs = sqrt(nrow(ds$X)) + ) +)) + +pdf('plots/cve_V1_history.pdf') +H <- cve.res$history +H_i <- H[H[, 1] > 0, 1] +plot(1:length(H_i), H_i, + main = "History cve_V1", + xlab = "Iterations i", + ylab = expression(loss == L[n](V^{(i)})), + xlim = c(1, nrow(H)), + ylim = c(0, max(H)), + type = "l" +) +for (i in 2:ncol(H)) { + H_i <- H[H[, i] > 0, i] + lines(1:length(H_i), H_i) +} +x.ends <- apply(H, 2, function(h) { length(h[h > 0]) }) +y.ends <- apply(H, 2, function(h) { min(h[h > 0]) }) +points(x.ends, y.ends) + +*/ diff --git a/cve_V2.cpp b/cve_V2.cpp new file mode 100644 index 0000000..204eb3a --- /dev/null +++ b/cve_V2.cpp @@ -0,0 +1,365 @@ +// -*- mode: C++; c-indent-level: 4; c-basic-offset: 4; indent-tabs-mode: nil; -*- +// +// Usage: +// ~$ R -e "library(Rcpp); Rcpp::sourceCpp('cve_V2.cpp')" +// + +// only `RcppArmadillo.h` which includes `Rcpp.h` +#include + +// through the depends attribute `Rcpp` is tolled to create +// hooks for `RcppArmadillo` needed by the build process. +// +// [[Rcpp::depends(RcppArmadillo)]] + +// required for `R::qchisq()` used in `estimateBandwidth()` +#include + +//' Estimated bandwidth for CVE. +//' +//' Estimates a propper bandwidth \code{h} according +//' \deqn{h = \chi_{p-q}^{-1}\left(\frac{nObs - 1}{n-1}\right)}\frac{2 tr(\Sigma)}{p}}{% +//' h = qchisq( (nObs - 1)/(n - 1), p - q ) 2 tr(Sigma) / p} +//' +//' @param X data matrix of dimension (n x p) with n data points X_i of dimension +//' q. Therefor each row represents a datapoint of dimension p. +//' @param k Guess for rank(B). +//' @param nObs Ether numeric of a function. If specified as numeric value +//' its used in the computation of the bandwidth directly. If its a function +//' `nObs` is evaluated as \code{nObs(nrow(x))}. The default behaviou if not +//' supplied at all is to use \code{nObs <- nrow(x)^0.5}. +//' +//' @seealso [qchisq()] +//' +//' @export +// [[Rcpp::export]] +double estimateBandwidth(const arma::mat& X, arma::uword k, double nObs) { + using namespace arma; + + uword n = X.n_rows; // nr samples + uword p = X.n_cols; // dimension of rand. var. `X` + + // column mean + mat M = mean(X); + // center `X` + mat C = X.each_row() - M; + // trace of covariance matrix, `traceSigma = Tr(C' C)` + double traceSigma = accu(C % C); + // compute estimated bandwidth + double qchi2 = R::qchisq((nObs - 1.0) / (n - 1), static_cast(k), 1, 0); + + return 2.0 * qchi2 * traceSigma / (p * n); +} + +//' Random element from Stiefel Manifold `S(p, q)`. +//' +//' Draws an element of \eqn{S(p, q)} which is the Stiefel Manifold. +//' This is done by taking the Q-component of the QR decomposition +//' from a `(p, q)` Matrix with independent standart normal entries. +//' As a semi-orthogonal Matrix the result `V` satisfies \eqn{V'V = I_q}. +//' +//' @param p Row dimension +//' @param q Column dimension +//' +//' @returns Matrix of dim `(p, q)`. +//' +//' @seealso +//' +//' @export +// [[Rcpp::export]] +arma::mat rStiefel(arma::uword p, arma::uword q) { + arma::mat Q, R; + arma::qr_econ(Q, R, arma::randn(p, q)); + return Q; +} + +double gradient(const arma::mat& X, + const arma::mat& X_diff, + const arma::mat& Y, + const arma::mat& Y_rep, + const arma::mat& V, + const double h, + arma::mat* G = 0 +) { + using namespace arma; + + uword n = X.n_rows; + uword p = X.n_cols; + + // orthogonal projection matrix `Q = I - VV'` for dist computation + mat Q = -(V * V.t()); + Q.diag() += 1; + // calc pairwise distances as `D` with `D(i, j) = d_i(V, X_j)` + vec D_vec = sum(square(X_diff * Q), 1); + mat D = reshape(D_vec, n, n); + // calc weights as `W` with `W(i, j) = w_i(V, X_j)` + mat W = exp(D / (-2.0 * h)); + // column-wise normalization via 1-norm + W = normalise(W, 1); + vec W_vec = vectorise(W); + // weighted `Y` means (first and second order) + vec y1 = W.t() * Y; + vec y2 = W.t() * square(Y); + // loss for each `X_i`, meaning `L(i) = L_n(V, X_i)` + vec L = y2 - square(y1); + // `loss = L_n(V)` + double loss = mean(L); + // check if gradient as output variable is set + if (G != 0) { + // `G = grad(L_n(V))` a.k.a. gradient of `L` with respect to `V` + vec scale = (repelem(L, n, 1) - square(Y_rep - repelem(y1, n, 1))) % W_vec % D_vec; + mat X_diff_scale = X_diff.each_col() % scale; + (*G) = X_diff_scale.t() * X_diff * V; + (*G) *= -2.0 / (h * h * n); + } + + return loss; +} + +//' Stiefel Optimization with curvilinear linesearch. +//' +//' TODO: finish doc. comment +//' Stiefel Optimization for \code{V} given a dataset \code{X} and responces +//' \code{Y} for the model \deqn{Y\sim g(B'X) + \epsilon}{Y ~ g(B'X) + epsilon} +//' to compute the matrix `B` such that \eqn{span{B} = span(V)^{\bot}}{% +//' span(B) = orth(span(B))}. +//' The curvilinear linesearch uses Armijo-Wolfe conditions: +// \deqn{L(V(tau)) > L(V(0)) + rho_1 * tau * L(V(0))'} +//' \deqn{L(V(tau))' < rho_2 * L(V(0))'} +//' +//' @param X data points +//' @param Y response +//' @param k assumed \eqn{rank(B)} +//' @param nObs parameter for bandwidth estimation, typical value +//' \code{nObs = nrow(X)^lambda} with \code{lambda} in the range [0.3, 0.8]. +//' @param tau Initial step size +//' @param tol Tolerance for update error used for stopping criterion +//' @param maxIter Upper bound of optimization iterations +//' +//' @return List containing the bandwidth \code{h}, optimization objective \code{V} +//' and the matrix \code{B} estimated for the model as a orthogonal basis of the +//' orthogonal space spaned by \code{V}. +//' +//' @rdname optStiefel +double optStiefel( + const arma::mat& X, + const arma::vec& Y, + const int k, + const double h, + const double tauInitial, + const double rho1, + const double rho2, + const double tol, + const int maxIter, + const int maxLineSeachIter, + arma::mat& V, // out + arma::vec& history // out +) { + using namespace arma; + + // get dimensions + const uword n = X.n_rows; // nr samples + const uword p = X.n_cols; // dim of random variable `X` + const uword q = p - k; // rank(V) e.g. dim of ortho space of span{B} + + // all `X_i - X_j` differences, `X_diff.row(i * n + j) = X_i - X_j` + mat X_diff(n * n, p); + for (uword i = 0, k = 0; i < n; ++i) { + for (uword j = 0; j < n; ++j) { + X_diff.row(k++) = X.row(i) - X.row(j); + } + } + const vec Y_rep = repmat(Y, n, 1); + const mat I_p = eye(p, p); + const mat I_2q = eye(2 * q, 2 * q); + + // initial start value for `V` + V = rStiefel(p, q); + + // first gradient initialization + mat G; + double loss = gradient(X, X_diff, Y, Y_rep, V, h, &G); + + // set first `loss` in history + history(0) = loss; + + // main curvilinear optimization loop + double error = datum::inf; + int iter = 0; + while (iter++ < maxIter && error > tol) { + // helper matrices `lU` (linesearch U), `lV` (linesearch V) + // as describet in [Wen, Yin] Lemma 4. + mat lU = join_rows(G, V); // linesearch "U" + mat lV = join_rows(V, -1.0 * G); // linesearch "V" + // `A = G V' - V G'` + mat A = lU * lV.t(); + + // set initial step size for curvilinear line search + double tau = tauInitial, lower = 0., upper = datum::inf; + + // check if `tau` is valid for inverting + + // set line search internal gradient and loss to cycle for next iteration + mat V_tau; // next position after a step of size `tau`, a.k.a. `V(tau)` + mat G_tau; // gradient of `V` at `V(tau) = V_tau` + double loss_tau; // loss (objective) at position `V(tau)` + int lsIter = 0; // linesearch iter + // start line search + do { + mat HV = inv(I_2q + (tau/2.) * lV.t() * lU) * lV.t(); + // next step `V` + V_tau = V - tau * (lU * (HV * V)); + + double LprimeV = -trace(G.t() * A * V); + + mat lB = I_p - (tau / 2.) * lU * HV; + + loss_tau = gradient(X, X_diff, Y, Y_rep, V_tau, h, &G_tau); + + double LprimeV_tau = -2. * trace(G_tau.t() * lB * A * (V + V_tau)); + + // Armijo condition + if (loss_tau > loss + (rho1 * tau * LprimeV)) { + upper = tau; + tau = (lower + upper) / 2.; + // Wolfe condition + } else if (LprimeV_tau < rho2 * LprimeV) { + lower = tau; + if (upper == datum::inf) { + tau = 2. * lower; + } else { + tau = (lower + upper) / 2.; + } + } else { + break; + } + } while (++lsIter < maxLineSeachIter); + + // compute error (break condition) + // Note: `error` is the decrease of the objective `L_n(V)` and not the + // norm of the gradient as proposed in [Wen, Yin] Algorithm 1. + error = loss - loss_tau; + + // cycle `V`, `G` and `loss` for next iteration + V = V_tau; + loss = loss_tau; + G = G_tau; + + // store final `loss` + history(iter) = loss; + + } + + return loss; +} + + +//' Conditional Variance Estimation (CVE) method. +//' +//' This version uses a curvilinear linesearch for the stiefel optimization. +//' +//' @param X data points +//' @param Y response +//' @param k assumed \eqn{rank(B)} +//' @param nObs parameter for bandwidth estimation, typical value +//' \code{nObs = nrow(X)^lambda} with \code{lambda} in the range [0.3, 0.8]. +//' @param tau Initial step size (default 1) +//' @param tol Tolerance for update error used for stopping criterion (default 1e-5) +//' @param slack Ratio of small negative error allowed in loss optimization (default -1e-10) +//' @param maxIter Upper bound of optimization iterations (default 50) +//' @param attempts Number of tryes with new random optimization starting points (default 10) +//' +//' @return List containing the bandwidth \code{h}, optimization objective \code{V} +//' and the matrix \code{B} estimated for the model as a orthogonal basis of the +//' orthogonal space spaned by \code{V}. +//' +//' @rdname cve_cpp_V2 +//' @export +// [[Rcpp::export]] +Rcpp::List cve_cpp( + const arma::mat& X, + const arma::vec& Y, + const int k, + const double nObs, + const double tauInitial = 1., + const double rho1 = 0.05, + const double rho2 = 0.95, + const double tol = 1e-6, + const int maxIter = 50, + const int maxLineSeachIter = 10, + const int attempts = 10 +) { + using namespace arma; + + // tracker of current best results + mat V_best; + double loss_best = datum::inf; + // estimate bandwidth + double h = estimateBandwidth(X, k, nObs); + + // loss history for each optimization attempt + // each column contaions the iteration history for the loss + mat history = mat(maxIter + 1, attempts); + + // multiple stiefel optimization attempts + for (int i = 0; i < attempts; ++i) { + // declare output variables + mat V; // estimated `V` space + vec hist = vec(history.n_rows, fill::zeros); // optimization history + double loss = optStiefel(X, Y, k, h, + tauInitial, rho1, rho2, tol, maxIter, maxLineSeachIter, V, hist + ); + if (loss < loss_best) { + loss_best = loss; + V_best = V; + } + // write history to history collection + history.col(i) = hist; + } + + // get `B` as kernal of `V.t()` + mat B = null(V_best.t()); + + return Rcpp::List::create( + Rcpp::Named("history") = history, + Rcpp::Named("loss") = loss_best, + Rcpp::Named("h") = h, + Rcpp::Named("V") = V_best, + Rcpp::Named("B") = B + ); +} + +/*** R + +source("CVE/R/datasets.R") +ds <- dataset() + +print(system.time( + cve.res <- cve_cpp( + X = ds$X, + Y = ds$Y, + k = ncol(ds$B), + nObs = sqrt(nrow(ds$X)) + ) +)) + +pdf('plots/cve_V2_history.pdf') +H <- cve.res$history +H_i <- H[H[, 1] > 0, 1] +plot(1:length(H_i), H_i, + main = "History cve_V2", + xlab = "Iterations i", + ylab = expression(loss == L[n](V^{(i)})), + xlim = c(1, nrow(H)), + ylim = c(0, max(H)), + type = "l" +) +for (i in 2:ncol(H)) { + H_i <- H[H[, i] > 0, i] + lines(1:length(H_i), H_i) +} +x.ends <- apply(H, 2, function(h) { length(h[h > 0]) }) +y.ends <- apply(H, 2, function(h) { min(h[h > 0]) }) +points(x.ends, y.ends) + +*/ diff --git a/runtime_tests_grad.cpp b/runtime_tests_grad.cpp new file mode 100644 index 0000000..d13ea3d --- /dev/null +++ b/runtime_tests_grad.cpp @@ -0,0 +1,368 @@ +// +// Usage (bash): +// ~$ R -e "library(Rcpp); sourceCpp('runtime_tests_grad.cpp')" +// +// Usage (R): +// > library(Rcpp) +// > sourceCpp('runtime_tests_grad.cpp') +// + +// [[Rcpp::depends(RcppArmadillo)]] +#include +#include + +// [[Rcpp::export]] +arma::mat arma_grad(const arma::mat& X, + const arma::mat& X_diff, + const arma::vec& Y, + const arma::vec& Y_rep, + const arma::mat& V, + const double h) { + using namespace arma; + + uword n = X.n_rows; + uword p = X.n_cols; + + // orthogonal projection matrix `Q = I - VV'` for dist computation + mat Q = -(V * V.t()); + Q.diag() += 1; + // calc pairwise distances as `D` with `D(i, j) = d_i(V, X_j)` + vec D_vec = sum(square(X_diff * Q), 1); + mat D = reshape(D_vec, n, n); + // calc weights as `W` with `W(i, j) = w_i(V, X_j)` + mat W = exp(D / (-2.0 * h)); + W = normalise(W, 1); // colomn-wise, 1-norm + vec W_vec = vectorise(W); + // centered weighted `Y` means + vec y1 = W.t() * Y; + vec y2 = W.t() * square(Y); + // loss for each X_i, meaning `L(i) = L_n(V, X_i)` + vec L = y2 - square(y1); + // "global" loss + double loss = mean(L); + // `G = \nabla_V L_n(V)` a.k.a. gradient of `L` with respect to `V` + vec scale = (repelem(L, n, 1) - square(Y_rep - repelem(y1, n, 1))) % W_vec % D_vec; + mat X_diff_scale = X_diff.each_col() % scale; + mat G = X_diff_scale.t() * X_diff * V; + G *= -2.0 / (h * h * n); + + return G; +} + +// ATTENTION: assumed `X` stores X_i's column wise, `X = cbind(X_1, X_2, ..., X_n)` +// [[Rcpp::export]] +arma::mat grad(const arma::mat& X, + const arma::vec& Y, + const arma::mat& V, + const double h +) { + using namespace arma; + + // get dimensions + const uword n = X.n_cols; + const uword p = X.n_rows; + const uword q = V.n_cols; + + // compute orthogonal projection + mat Q = -(V * V.t()); + Q.diag() += 1.0; + + // distance matrix `D(i, j) = d_i(V, X_j)` + mat D(n, n, fill::zeros); + // weights matrix `W(i, j) = w_i(V, X_j)` + mat W(n, n, fill::ones); // exp(0) = 1 + + double mvm = 0.0; // Matrix Vector Mult. + double sos = 0.0; // Sum Of Squares + // double wcn = 0.0; // Weights Column Norm + for (uword j = 0; j + 1 < n; ++j) { + for (uword i = j + 1; i < n; ++i) { + sos = 0.0; + for (uword k = 0; k < p; ++k) { + mvm = 0.0; + for (uword l = 0; l < p; ++l) { + mvm += Q(k, l) * (X(l, i) - X(l, j)); + } + sos += mvm * mvm; + } + D(i, j) = D(j, i) = sos; + W(i, j) = W(j, i) = std::exp(sos / (-2. * h)); + } + } + + // column normalization of weights `W` + double col_sum; + for (uword j = 0; j < n; ++j) { + col_sum = 0.0; + for (uword i = 0; i < n; ++i) { + col_sum += W(i, j); + } + for (uword i = 0; i < n; ++i) { + W(i, j) /= col_sum; + } + } + + // weighted first, secend order responce means `y1`, `y2` + // and component wise Loss `L(i) = L_n(V, X_i)` + vec y1(n); + vec y2(n); + vec L(n); + double tmp; + double loss = 0.0; + for (uword i = 0; i < n; ++i) { + mvm = 0.0; // Matrix Vector Mult. + sos = 0.0; // Sum Of Squared (weighted) + for (uword k = 0; k < n; ++k) { + mvm += (tmp = W(k, i) * Y(k)); + sos += tmp * Y(k); // W(k, i) * Y(k) * Y(k) + } + y1(i) = mvm; + y2(i) = sos; + loss += (L(i) = sos - (mvm * mvm)); // L_n(V, X_i) = y2(i) - y1(i)^2 + } + loss /= n; + + mat S(n, n); + for (uword k = 0; k < n; ++k) { + for (uword l = 0; l < n; ++l) { + tmp = Y(k) - y1(l); + S(k, l) = (L(l) - (tmp * tmp)) * W(k, l) * D(k, l); + } + } + + // gradient + mat G(p, q); + double factor = -2. / (n * h * h); + double gij; + for (uword j = 0; j < q; ++j) { + for (uword i = 0; i < p; ++i) { + gij = 0.0; + for (uword k = 0; k < n; ++k) { + for (uword l = 0; l < n; ++l) { + mvm = 0.0; + for (uword m = 0; m < p; ++m) { + mvm += (X(m, l) - X(m, k)) * V(m, j); + } + // gij += (S(k, l) + S(l, k)) * (X(i, l) - X(i, k)); + gij += S(k, l) * (X(i, l) - X(i, k)) * mvm; + } + } + G(i, j) = factor * gij; + } + } + + return G; +} + +// ATTENTION: assumed `X` stores X_i's column wise, `X = cbind(X_1, X_2, ..., X_n)` +// [[Rcpp::export]] +arma::mat grad_p(const arma::mat& X_ref, + const arma::vec& Y_ref, + const arma::mat& V_ref, + const double h +) { + using arma::uword; + + // get dimensions + const uword n = X_ref.n_cols; + const uword p = X_ref.n_rows; + const uword q = V_ref.n_cols; + + const double* X = X_ref.memptr(); + const double* Y = Y_ref.memptr(); + const double* V = V_ref.memptr(); + + // allocate memory for entire algorithm + // Q (p,p) D+W+S (n,n) y1+L (n) G (p,q) + uword memsize = (p * p) + (3 * n * n) + (2 * n) + (p * q); + double* mem = static_cast(malloc(sizeof(double) * memsize)); + + // assign pointer to memory associated memory area + double* Q = mem; + double* D = Q + (p * p); + double* W = D + (n * n); + double* S = W + (n * n); + double* y1 = S + (n * n); + double* L = y1 + n; + double* G = L + n; + + // compute orthogonal projection + double sum; + // compute orthogonal projection `Q = I_p - V V'` + for (uword j = 0; j < p; ++j) { + for (uword i = j; i < p; ++i) { + sum = 0.0; + for (uword k = 0; k < q; ++k) { + sum += V[k * p + i] * V[k * p + j]; + } + if (i == j) { + Q[j * p + i] = 1.0 - sum; + } else { + Q[j * p + i] = Q[i * p + j] = -sum; + } + } + } + + // set `diag(D) = 0` and `diag(W) = 1`. + for (uword i = 0; i < n * n; i += n + 1) { + D[i] = 0.0; + W[i] = 1.0; + } + // components (using symmetrie) of `D` and `W` (except `diag`) + double mvm = 0.0; // Matrix Vector Mult. + for (uword j = 0; j + 1 < n; ++j) { + for (uword i = j + 1; i < n; ++i) { + sum = 0.0; + for (uword k = 0; k < p; ++k) { + mvm = 0.0; + for (uword l = 0; l < p; ++l) { + mvm += Q[l * p + k] * (X[i * p + l] - X[j * p + l]); + } + sum += mvm * mvm; + } + D[j * n + i] = D[i * n + j] = sum; + W[j * n + i] = W[i * n + j] = std::exp(sum / (-2. * h)); + } + } + + // column normalization of weights `W` + for (uword j = 0; j < n; ++j) { + sum = 0.0; + for (uword i = 0; i < n; ++i) { + sum += W[j * n + i]; + } + for (uword i = 0; i < n; ++i) { + W[j * n + i] /= sum; + } + } + // weighted first, secend order responce means `y1`, `y2` + // and component wise Loss `L(i) = L_n(V, X_i)` + double tmp; + double loss = 0.0; + for (uword i = 0; i < n; ++i) { + mvm = 0.0; // Matrix Vector Mult. + sum = 0.0; // Sum Of (weighted) Squares + for (uword k = 0; k < n; ++k) { + mvm += (tmp = W[i * n + k] * Y[k]); + sum += tmp * Y[k]; + } + y1[i] = mvm; + loss += (L[i] = sum - (mvm * mvm)); + } + loss /= n; + + // scaling for gradient summation + for (uword k = 0; k < n; ++k) { + for (uword l = 0; l < n; ++l) { + tmp = Y[k] - y1[l]; + S[l * n + k] = (L[l] - (tmp * tmp)) * W[l * n + k] * D[l * n + k]; + } + } + + // gradient + double factor = -2. / (n * h * h); + for (uword j = 0; j < q; ++j) { + for (uword i = 0; i < p; ++i) { + sum = 0.0; + for (uword k = 0; k < n; ++k) { + for (uword l = 0; l < n; ++l) { + mvm = 0.0; + for (uword m = 0; m < p; ++m) { + mvm += (X[l * p + m] - X[k * p + m]) * V[j * p + m]; + } + sum += S[l * n + k] * (X[l * p + i] - X[k * p + i]) * mvm; + } + } + G[j * p + i] = factor * sum; + } + } + + // construct 'Armadillo' matrix from `G`s memory area + arma::mat Grad(G, p, q); + + // free entire allocated memory block + free(mem); + + return Grad; +} + + + +/*** R + +suppressMessages(library(microbenchmark)) + +cat("Start timing:\n") +time.start <- Sys.time() + +rStiefl <- function(p, q) { + return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q)))) +} + +## compare runtimes +n <- 200L +p <- 12L +q <- p - 2L +X <- matrix(rnorm(n * p), n, p) +Xt <- t(X) +X_diff <- kronecker(rep(1, n), X) - kronecker(X, rep(1, n)) +Y <- rnorm(n) +Y_rep <- kronecker(rep(1, n), Y) # repmat(Y, n, 1) +h <- 1. / 4.; +V <- rStiefl(p, q) + +# A <- arma_grad(X, X_diff, Y, Y_rep, V, h) +# G1 <- grad(Xt, Y, V, h) +# G2 <- grad_p(Xt, Y, V, h) +# +# print(round(A[1:6, 1:6], 3)) +# print(round(G1[1:6, 1:6], 3)) +# print(round(G2[1:6, 1:6], 3)) +# print(round(abs(A - G1), 9)) +# print(round(abs(A - G2), 9)) +# +# q() + + +comp <- function (A, B, tol = sqrt(.Machine$double.eps)) { + max(abs(A - B)) < tol +} +comp.all <- function (res) { + if (length(res) < 2) { + return(TRUE) + } + res.one = res[[1]] + for (i in 2:length(res)) { + if (!comp(res.one, res[[i]])) { + return(FALSE) + } + } + return(TRUE) +} +counter <- 0 +setup.tests <- function () { + if ((counter %% 3) == 0) { + X <<- matrix(rnorm(n * p), n, p) + Xt <<- t(X) + X_diff <<- kronecker(rep(1, n), X) - kronecker(X, rep(1, n)) + Y <<- rnorm(n) + Y_rep <<- kronecker(rep(1, n), Y) # arma::repmat(Y, n, 1) + h <<- 1. / 4.; + V <<- rStiefl(p, q) + } + counter <<- counter + 1 +} +(mbm <- microbenchmark( + arma = arma_grad(X, X_diff, Y, Y_rep, V, h), + grad = grad(Xt, Y, V, h), + grad_p = grad_p(Xt, Y, V, h), + check = comp.all, + setup = setup.tests(), + times = 100L +)) + +cat("Total time:", format(Sys.time() - time.start), '\n') + +boxplot(mbm, las = 2, xlab = NULL) + +*/ diff --git a/samplePackage/DESCRIPTION b/samplePackage/DESCRIPTION deleted file mode 100644 index 9d8025a..0000000 --- a/samplePackage/DESCRIPTION +++ /dev/null @@ -1,14 +0,0 @@ -Package: samplePackage -Title: A Sample for creating R Packages -Version: 0.0.0.0001 -Authors@R: - person(given = "First", - family = "Last", - role = c("aut", "cre"), - email = "first.last@example.com", - comment = structure("YOUR-ORCID-ID", .Names = "ORCID")) -Description: What the package does (one paragraph). -License: What license it uses -Encoding: UTF-8 -LazyData: true -RoxygenNote: 6.1.1 diff --git a/samplePackage/NAMESPACE b/samplePackage/NAMESPACE deleted file mode 100644 index 0131a61..0000000 --- a/samplePackage/NAMESPACE +++ /dev/null @@ -1,5 +0,0 @@ -# Generated by roxygen2: do not edit by hand - -export(area.circle) -exportClasses(circleS4) -exportClasses(rectangleS4) diff --git a/samplePackage/R/S3.R b/samplePackage/R/S3.R deleted file mode 100644 index 118d12d..0000000 --- a/samplePackage/R/S3.R +++ /dev/null @@ -1,40 +0,0 @@ - -# Constructors -circle <- function(r) structure(list(r=r), class="circle") -rectangle <- function(a, b) { - # equivalent to > structure(list(a=a, b=b), class="rectangle") - x <- list(a=a, b=b) - class(x) <- "rectangle" - x # return -} - -# generics (custom) -area <- function(shape) UseMethod("area") -diam <- function(shape) UseMethod("diam") - -# methods (implementation) -print.circle <- function(circle, ...) with(circle, cat("< circle r =", r, ">\n")) -#' Computes area of a circle object -#' -#' @param circle Instance of a circle. -#' -#' @returns Area of the given \code{circle}. -#' @export -area.circle <- function(circle) with(circle, pi * r^2) -diam.circle <- function(circle) with(circle, 2 * r) - -print.rectangle <- function(rect, ...) { - cat("\n", set="") -} -area.rectangle <- function(rect) rect$a * rect$b -diam.rectangle <- function(rect) sqrt(rect$a^2 + rect$b^2) - -# usage -circ <- circle(2) -rect <- rectangle(a = 1, b = 2) - -print(area(circ)) -print(diam(rect)) - -print(circ) -print(rect) diff --git a/samplePackage/R/S4.R b/samplePackage/R/S4.R deleted file mode 100644 index e2c3607..0000000 --- a/samplePackage/R/S4.R +++ /dev/null @@ -1,58 +0,0 @@ -library(methods) - -## Class definitions - -#' Represents a circle shape -#' -#' @param r radius of the circle -#' -#' @returns S4 object -#' @export -circleS4 <- setClass("circleS4", slots = list(r = "numeric")) - -#' Represents a rectangle shape -#' -#' @param w width of the rectangle -#' @param h height of the rectangle -#' -#' @returns S4 object -#' @export -rectangleS4 <- setClass("rectangleS4", slots = list(w = "numeric", h = "numeric")) - - -## setting class methods -# redefine generic methods -setMethod("show", "circleS4", function(object) { - cat("< circle r =", object@r, ">\n") -}) -setMethod("show", signature="rectangleS4", function(object) { - cat("\n", set="") -}) - -## define new generics for class assignement -# create a method to assign the value of a coordinate -setGeneric("area", def = function(object) standardGeneric("area") ) -setGeneric(name = "diam", def = function(object) { - standardGeneric("diam") -}) - -## assigne (custom) generics implementation to classes -setMethod("area", "circleS4", function(object) pi * object@r^2) -setMethod("diam", "circleS4", function(object) 2 * object@r) - -setMethod("area", signature=list(object = "rectangleS4"), function(object) { - object@w * object@h -}) -setMethod("diam", signature=list(object = "rectangleS4"), function(object) { - sqrt(rect@w^2 + rect@h^2) -}) - -# usage -circ <- circleS4(r = 2) -rect <- rectangleS4(w = 1, h = 2) - -print(area(circ)) -print(diam(rect)) - -print(circ) -print(rect) diff --git a/validate.R b/validate.R new file mode 100644 index 0000000..3d7c8d8 --- /dev/null +++ b/validate.R @@ -0,0 +1,104 @@ +# +# Usage: +# ~$ Rscript validate.R + +# load MAVE package for comparison +library(MAVE) +# load (and compile) cve and dataset source +library(Rcpp) +cat("Compiling source 'cve_V1.cpp'\n") +Rcpp::sourceCpp('cve_V1.cpp', embeddedR = FALSE) +# load dataset sampler +source('CVE/R/datasets.R') + +# set default nr of simulations +nr.sim <- 25 + +#' Orthogonal projection to sub-space spanned by `B` +#' +#' @param B Matrix +#' @return Orthogonal Projection Matrix +proj <- function(B) { + B %*% solve(t(B) %*% B) %*% t(B) +} + +#' Compute nObs given dataset dimension \code{n}. +#' +#' @param n Number of samples +#' @return Numeric estimate of \code{nObs} +nObs <- function (n) { n^0.5 } + +# dataset names +dataset.names <- c("M1", "M2", "M3", "M4", "M5") # M4 not implemented jet + +## prepare "logging" +# result error, time, ... data.frame's +error <- matrix(nrow = nr.sim, ncol = 2 * length(dataset.names)) +time <- matrix(nrow = nr.sim, ncol = 2 * length(dataset.names)) +# convert to data.frames +error <- as.data.frame(error) +time <- as.data.frame(time) +# set names +names(error) <- kronecker(c("CVE.", "MAVE."), dataset.names, paste0) +names(time) <- kronecker(c("CVE.", "MAVE."), dataset.names, paste0) + +# get current time +start.time <- Sys.time() +## main comparison loop (iterate `nr.sim` times for each dataset) +for (i in seq_along(dataset.names)) { + for (j in 1:nr.sim) { + name <- dataset.names[i] + # reporting progress + cat("\rRunning Test (", name, j , "):", + (i - 1) * nr.sim + j, "/", length(dataset.names) * nr.sim, + " - Time since start:", format(Sys.time() - start.time), "\033[K") + # create new dataset + ds <- dataset(name) + k <- ncol(ds$B) # real dim + # call CVE + cve.time <- system.time( + cve.res <- cve_cpp(ds$X, ds$Y, + k = k, + nObs = nObs(nrow(ds$X)), + verbose = FALSE) + ) + # call MAVE + mave.time <- system.time( + mave.res <- mave(Y ~ ., + data = data.frame(X = ds$X, Y = ds$Y), + method = "meanMAVE") + ) + # compute real and approximated sub-space projections + P <- proj(ds$B) # real + P.cve <- proj(cve.res$B) + P.mave <- proj(mave.res$dir[[k]]) + # compute (and store) errors + error[j, paste0("CVE.", name)] <- norm(P - P.cve, 'F') / sqrt(2 * k) + error[j, paste0("MAVE.", name)] <- norm(P - P.mave, 'F') / sqrt(2 * k) + # store run-times + time[j, paste0("CVE.", name)] <- cve.time["elapsed"] + time[j, paste0("MAVE.", name)] <- mave.time["elapsed"] + } +} + +cat("\n\n## Time [sec] Means:\n") +print(colMeans(time)) +cat("\n## Error Means:\n") +print(colMeans(error)) + +len <- length(dataset.names) +pdf("plots/Rplots_validate.pdf") +boxplot(as.matrix(error), + main = paste0("Error (nr.sim = ", nr.sim, ")"), + ylab = expression(error == group("||", P[B] - P[hat(B)], "||")[F] / sqrt(2*k)), + las = 2, + at = c(1:len, 1:len + len + 1) +) +boxplot(as.matrix(time), + main = paste0("Time (nr.sim = ", nr.sim, ")"), + ylab = "time [sec]", + las = 2, + at = c(1:len, 1:len + len + 1) +) +cat("Plot saved to 'plots/Rplots_validate.pdf'\n") +suppressMessages(dev.off())