init
This commit is contained in:
parent
0a7f13920e
commit
4bc9ca2f58
|
@ -0,0 +1,13 @@
|
||||||
|
Package: CVE
|
||||||
|
Type: Package
|
||||||
|
Title: Conditional Variance Estimator for Sufficient Dimension Reduction
|
||||||
|
Version: 1.0
|
||||||
|
Date: 2019-07-29
|
||||||
|
Author: Loki
|
||||||
|
Maintainer: Loki <loki@no.mail>
|
||||||
|
Description: More about what it does (maybe more than one line)
|
||||||
|
License: What license is it under?
|
||||||
|
Imports: Rcpp (>= 1.0.2)
|
||||||
|
LinkingTo: Rcpp, RcppArmadillo
|
||||||
|
Encoding: UTF-8
|
||||||
|
RoxygenNote: 6.1.1
|
|
@ -0,0 +1,16 @@
|
||||||
|
# Generated by roxygen2: do not edit by hand
|
||||||
|
|
||||||
|
export(cve)
|
||||||
|
export(cve_cpp)
|
||||||
|
export(dataset)
|
||||||
|
export(estimate.bandwidth)
|
||||||
|
export(index_test)
|
||||||
|
export(kron_test)
|
||||||
|
export(rStiefel)
|
||||||
|
export(test1)
|
||||||
|
export(test2)
|
||||||
|
export(test3)
|
||||||
|
export(test4)
|
||||||
|
import(Rcpp)
|
||||||
|
importFrom(Rcpp,evalCpp)
|
||||||
|
useDynLib(CVE)
|
|
@ -0,0 +1,107 @@
|
||||||
|
#' Generates test datasets.
|
||||||
|
#'
|
||||||
|
#' Provides sample datasets. There are 5 different datasets named
|
||||||
|
#' M1, M2, M3, M4 and M5 describet in the paper references below.
|
||||||
|
#' The general model is given by:
|
||||||
|
#' \deqn{Y ~ g(B'X) + \epsilon}
|
||||||
|
#'
|
||||||
|
#' @param name One of \code{"M1"}, \code{"M2"}, \code{"M3"}, \code{"M4"} or \code{"M5"}
|
||||||
|
#' @param n nr samples
|
||||||
|
#' @param p Dim. of random variable \code{X}.
|
||||||
|
#' @param p.mix Only for \code{"M4"}, see: below.
|
||||||
|
#' @param lambda Only for \code{"M4"}, see: below.
|
||||||
|
#'
|
||||||
|
#' @return List with elements
|
||||||
|
#' \item{X}{data}
|
||||||
|
#' \item{Y}{response}
|
||||||
|
#' \item{B}{Used dim-reduction matrix}
|
||||||
|
#' \item{name}{Name of the dataset (name parameter)}
|
||||||
|
#'
|
||||||
|
#' @section M1:
|
||||||
|
#' The data follows \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} for a subspace
|
||||||
|
#' dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points.
|
||||||
|
#' The link function \eqn{g} is given as
|
||||||
|
#' \deqn{g(x) = \frac{x_1}{0.5 + (x_2 + 1.5)^2} + 0.5\epsilon}{g(x) = x_1 / (0.5 + (x_2 + 1.5)^2) + 0.5 epsilon}
|
||||||
|
#' @section M2:
|
||||||
|
#' \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} with \eqn{k = 2} with a default of \eqn{n = 200} data points.
|
||||||
|
#' The link function \eqn{g} is given as
|
||||||
|
#' \deqn{g(x) = x_1 x_2^2 + 0.5\epsilon}{g(x) = x_1 x_2^2 + 0.5 epsilon}
|
||||||
|
#' @section M3:
|
||||||
|
#' TODO:
|
||||||
|
#' @section M4:
|
||||||
|
#' TODO:
|
||||||
|
#' @section M5:
|
||||||
|
#' TODO:
|
||||||
|
#'
|
||||||
|
#' @export
|
||||||
|
#'
|
||||||
|
#' @references Fertl Likas, Bura Efstathia. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019
|
||||||
|
dataset <- function(name = "M1", n, B, p.mix = 0.3, lambda = 1.0) {
|
||||||
|
# validate parameters
|
||||||
|
stopifnot(name %in% c("M1", "M2", "M3", "M4", "M5"))
|
||||||
|
|
||||||
|
# set default values if not supplied
|
||||||
|
if (missing(n)) {
|
||||||
|
n <- if (name %in% c("M1", "M2")) 200 else if (name != "M5") 100 else 42
|
||||||
|
}
|
||||||
|
if (missing(B)) {
|
||||||
|
p <- 12
|
||||||
|
if (name == "M1") {
|
||||||
|
B <- cbind(
|
||||||
|
c( 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0),
|
||||||
|
c( 1,-1, 1,-1, 1,-1, 0, 0, 0, 0, 0, 0)
|
||||||
|
) / sqrt(6)
|
||||||
|
} else if (name == "M2") {
|
||||||
|
B <- cbind(
|
||||||
|
c(c(1, 0), rep(0, 10)),
|
||||||
|
c(c(0, 1), rep(0, 10))
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, 6)), 12, 1)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
p <- dim(B)[1]
|
||||||
|
# validate col. nr to match dataset `k = dim(B)[2]`
|
||||||
|
stopifnot(
|
||||||
|
name %in% c("M1", "M2") && dim(B)[2] == 2,
|
||||||
|
name %in% c("M3", "M4", "M5") && dim(B)[2] == 1
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
# set link function `g` for model `Y ~ g(B'X) + epsilon`
|
||||||
|
if (name == "M1") {
|
||||||
|
g <- function(BX) { BX[1] / (0.5 + (BX[2] + 1.5)^2) }
|
||||||
|
} else if (name == "M2") {
|
||||||
|
g <- function(BX) { BX[1] * BX[2]^2 }
|
||||||
|
} else if (name %in% c("M3", "M4")) {
|
||||||
|
g <- function(BX) { cos(BX[1]) }
|
||||||
|
} else { # name == "M5"
|
||||||
|
g <- function(BX) { 2 * log(abs(BX[1]) + 1) }
|
||||||
|
}
|
||||||
|
|
||||||
|
# compute X
|
||||||
|
if (name != "M4") {
|
||||||
|
# compute root of the covariance matrix according the dataset
|
||||||
|
if (name %in% c("M1", "M3")) {
|
||||||
|
# Variance-Covariance structure for `X ~ N_p(0, \Sigma)` with
|
||||||
|
# `\Sigma_{i, j} = 0.5^{|i - j|}`.
|
||||||
|
Sigma <- matrix(0.5^abs(kronecker(1:p, 1:p, '-')), p, p)
|
||||||
|
# decompose Sigma to Sigma.root^T Sigma.root = Sigma for usage in creation of `X`
|
||||||
|
Sigma.root <- chol(Sigma)
|
||||||
|
} else { # name %in% c("M2", "M5")
|
||||||
|
Sigma.root <- diag(rep(1, p)) # d-dim identity
|
||||||
|
}
|
||||||
|
# data `X` as multivariate random normal variable with
|
||||||
|
# variance matrix `Sigma`.
|
||||||
|
X <- replicate(p, rnorm(n, 0, 1)) %*% Sigma.root
|
||||||
|
} else { # name == "M4"
|
||||||
|
X <- t(replicate(100, rep((1 - 2 * rbinom(1, 1, p.mix)) * lambda, p) + rnorm(p, 0, 1)))
|
||||||
|
}
|
||||||
|
|
||||||
|
# responce `y ~ g(B'X) + epsilon` with `epsilon ~ N(0, 1 / 2)`
|
||||||
|
Y <- apply(X, 1, function(X_i) {
|
||||||
|
g(t(B) %*% X_i) + rnorm(1, 0, 0.5)
|
||||||
|
})
|
||||||
|
|
||||||
|
return(list(X = X, Y = Y, B = B, name = name))
|
||||||
|
}
|
|
@ -0,0 +1,14 @@
|
||||||
|
|
||||||
|
## With R 3.1.0 or later, you can uncomment the following line to tell R to
|
||||||
|
## enable compilation with C++11 (where available)
|
||||||
|
##
|
||||||
|
## Also, OpenMP support in Armadillo prefers C++11 support. However, for wider
|
||||||
|
## availability of the package we do not yet enforce this here. It is however
|
||||||
|
## recommended for client packages to set it.
|
||||||
|
##
|
||||||
|
## And with R 3.4.0, and RcppArmadillo 0.7.960.*, we turn C++11 on as OpenMP
|
||||||
|
## support within Armadillo prefers / requires it
|
||||||
|
CXX_STD = CXX11
|
||||||
|
|
||||||
|
PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS)
|
||||||
|
PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS)
|
|
@ -0,0 +1,14 @@
|
||||||
|
|
||||||
|
## With R 3.1.0 or later, you can uncomment the following line to tell R to
|
||||||
|
## enable compilation with C++11 (where available)
|
||||||
|
##
|
||||||
|
## Also, OpenMP support in Armadillo prefers C++11 support. However, for wider
|
||||||
|
## availability of the package we do not yet enforce this here. It is however
|
||||||
|
## recommended for client packages to set it.
|
||||||
|
##
|
||||||
|
## And with R 3.4.0, and RcppArmadillo 0.7.960.*, we turn C++11 on as OpenMP
|
||||||
|
## support within Armadillo prefers / requires it
|
||||||
|
CXX_STD = CXX11
|
||||||
|
|
||||||
|
PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS)
|
||||||
|
PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS)
|
|
@ -0,0 +1,66 @@
|
||||||
|
#initialize model parameterss
|
||||||
|
m<-100 #number of replications in simulation
|
||||||
|
dim<-12 #dimension of random variable X
|
||||||
|
truedim<-2 #dimension of B=b
|
||||||
|
qs<-dim-truedim # dimension of orthogonal complement of B
|
||||||
|
b1=c(1,1,1,1,1,1,0,0,0,0,0,0)/sqrt(6)
|
||||||
|
b2=c(1,-1,1,-1,1,-1,0,0,0,0,0,0)/sqrt(6)
|
||||||
|
b<-cbind(b1,b2)
|
||||||
|
P<-b%*%t(b)
|
||||||
|
sigma=0.5 #error standard deviation
|
||||||
|
N<-200 #sample size
|
||||||
|
K<-30 #number of arbitrary starting values for curvilinear optimization
|
||||||
|
MAXIT<-50 #maximal number of iterations in curvilinear search algorithm
|
||||||
|
##initailaize true covariancematrix of X
|
||||||
|
Sig<-mat.or.vec(dim,dim)
|
||||||
|
for (i in 1:dim){
|
||||||
|
for (j in 1:dim){
|
||||||
|
Sig[i,j]<-sigma^abs(i-j)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Sroot<-chol(Sig)
|
||||||
|
|
||||||
|
M1_JASA<-mat.or.vec(m,8)
|
||||||
|
colnames(M1_JASA)<-c('CVE1','CVE2','CVE3','meanMAVE','csMAVE','phd','sir','save')
|
||||||
|
#link function for M1
|
||||||
|
fM1<-function(x){return(x[1]/(0.5+(x[2]+1.5)^2))}
|
||||||
|
for (i in 1:m){
|
||||||
|
#generate dat according to M1
|
||||||
|
dat<-creat_sample_nor_nonstand(b,N,fM1,t(Sroot),sigma)
|
||||||
|
#est sample covariance matrix
|
||||||
|
Sig_est<-est_varmat(dat[,-1])
|
||||||
|
#est trace of sample covariance matrix
|
||||||
|
tr<-var_tr(Sig_est)
|
||||||
|
#calculates Vhat_k for CVE1,CVE2, CVE3 for k=qs
|
||||||
|
CVE1<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.8),tr=tr),maxit = MAXIT,sclack_para = 0)
|
||||||
|
CVE2<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(2/3),tr=tr),maxit = MAXIT,sclack_para = 0)
|
||||||
|
CVE3<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.5),tr=tr),maxit = MAXIT,sclack_para = 0)
|
||||||
|
#calculate orthogonal complement of Vhat_k
|
||||||
|
#i.e. CVE1$est_base[,1:truedim] is estimator for B with dimension (dim times (dim-qs))
|
||||||
|
CVE1$est_base<-fill_base(CVE1$est_base)
|
||||||
|
CVE2$est_base<-fill_base(CVE2$est_base)
|
||||||
|
CVE3$est_base<-fill_base(CVE3$est_base)
|
||||||
|
# calculate distance between true B and estimated B
|
||||||
|
M1_JASA[i,1]<-subspace_dist(CVE1$est_base[,1:truedim],b)
|
||||||
|
M1_JASA[i,2]<-subspace_dist(CVE2$est_base[,1:truedim],b)
|
||||||
|
M1_JASA[i,3]<-subspace_dist(CVE3$est_base[,1:truedim],b)
|
||||||
|
#meanMAVE
|
||||||
|
mod_t2<-mave(Y~.,data=as.data.frame(dat),method = 'meanMAVE')
|
||||||
|
M1_JASA[i,4]<-subspace_dist(mod_t2$dir[[truedim]],b)
|
||||||
|
#csMAVE
|
||||||
|
mod_t<-mave(Y~.,data=as.data.frame(dat),method = 'csMAVE')
|
||||||
|
M1_JASA[i,5]<-subspace_dist(mod_t$dir[[truedim]],b)
|
||||||
|
#phd
|
||||||
|
test4<-summary(dr(Y~.,data=as.data.frame(dat),method='phdy',numdir=truedim+1))
|
||||||
|
M1_JASA[i,6]<-subspace_dist(orth(test4$evectors[,1:truedim]),b)
|
||||||
|
#sir
|
||||||
|
test5<-summary(dr(Y~.,data=as.data.frame(dat),method='sir',numdir=truedim+1))
|
||||||
|
M1_JASA[i,7]<-subspace_dist(orth(test5$evectors[,1:truedim]),b)
|
||||||
|
#save
|
||||||
|
test3<-summary(dr(Y~.,data=as.data.frame(dat),method='save',numdir=truedim+1))
|
||||||
|
M1_JASA[i,8]<-subspace_dist(orth(test3$evectors[,1:truedim]),b)
|
||||||
|
print(paste(i,paste('/',m)))
|
||||||
|
}
|
||||||
|
boxplot(M1_JASA[,]/sqrt(2*truedim),names=colnames(M1_JASA),ylab='err',main='M1')
|
||||||
|
summary(M1_JASA[,])
|
||||||
|
|
|
@ -0,0 +1,60 @@
|
||||||
|
#initialize model parameterss
|
||||||
|
m<-100 #number of replications in simulation
|
||||||
|
dim<-12 #dimension of random variable X
|
||||||
|
truedim<-2 #dimension of B=b
|
||||||
|
qs<-dim-truedim # dimension of orthogonal complement of B
|
||||||
|
b1=c(1,rep(0,dim-1))
|
||||||
|
b2=c(0,1,rep(0,dim-2))
|
||||||
|
b<-cbind(b1,b2) #B
|
||||||
|
P<-b%*%t(b)
|
||||||
|
sigma=0.5 #error standard deviation
|
||||||
|
N<-200 #sample size
|
||||||
|
K<-30 #number of arbitrary starting values for curvilinear optimization
|
||||||
|
MAXIT<-50 #maximal number of iterations in curvilinear search algorithm
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
M2_JASA<-mat.or.vec(m,8)
|
||||||
|
colnames(M2_JASA)<-c('CVE1','CVE2','CVE3','meanMAVE','csMAVE','phd','sir','save')
|
||||||
|
f_link<-function(x){return(x[1]*(x[2])^2)}
|
||||||
|
for (i in 1:m){
|
||||||
|
#generate dat according to M2
|
||||||
|
dat<-creat_sample(b,N,f_link,sigma)
|
||||||
|
#est sample covariance matrix
|
||||||
|
Sig_est<-est_varmat(dat[,-1])
|
||||||
|
#est trace of sample covariance matrix
|
||||||
|
tr<-var_tr(Sig_est)
|
||||||
|
#CVE
|
||||||
|
#calculates Vhat_k for CVE1,CVE2, CVE3 for k=qs
|
||||||
|
CVE1<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.8),tr=tr),maxit = MAXIT,sclack_para = 0)
|
||||||
|
CVE2<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(2/3),tr=tr),maxit = MAXIT,sclack_para = 0)
|
||||||
|
CVE3<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.5),tr=tr),maxit = MAXIT,sclack_para = 0)
|
||||||
|
#calculate orthogonal complement of Vhat_k
|
||||||
|
#i.e. CVE1$est_base[,1:truedim] is estimator for B with dimension (dim times (dim-qs))
|
||||||
|
CVE1$est_base<-fill_base(CVE1$est_base)
|
||||||
|
CVE2$est_base<-fill_base(CVE2$est_base)
|
||||||
|
CVE3$est_base<-fill_base(CVE3$est_base)
|
||||||
|
# calculate distance between true B and estimated B
|
||||||
|
M2_JASA[i,1]<-subspace_dist(CVE1$est_base[,1:truedim],b)
|
||||||
|
M2_JASA[i,2]<-subspace_dist(CVE2$est_base[,1:truedim],b)
|
||||||
|
M2_JASA[i,3]<-subspace_dist(CVE3$est_base[,1:truedim],b)
|
||||||
|
#meanMAVE
|
||||||
|
mod_t2<-mave(Y~.,data=as.data.frame(dat),method = 'meanMAVE')
|
||||||
|
M2_JASA[i,4]<-subspace_dist(mod_t2$dir[[truedim]],b)
|
||||||
|
#csMAVE
|
||||||
|
mod_t<-mave(Y~.,data=as.data.frame(dat),method = 'csMAVE')
|
||||||
|
M2_JASA[i,5]<-subspace_dist(mod_t$dir[[truedim]],b)
|
||||||
|
#phd
|
||||||
|
test4<-summary(dr(Y~.,data=as.data.frame(dat),method='phdy',numdir=truedim+1))
|
||||||
|
M2_JASA[i,6]<-subspace_dist(orth(test4$evectors[,1:truedim]),b)
|
||||||
|
#sir
|
||||||
|
test5<-summary(dr(Y~.,data=as.data.frame(dat),method='sir',numdir=truedim+1))
|
||||||
|
M2_JASA[i,7]<-subspace_dist(orth(test5$evectors[,1:truedim]),b)
|
||||||
|
#save
|
||||||
|
test3<-summary(dr(Y~.,data=as.data.frame(dat),method='save',numdir=truedim+1))
|
||||||
|
M2_JASA[i,8]<-subspace_dist(orth(test3$evectors[,1:truedim]),b)
|
||||||
|
print(paste(i,paste('/',m)))
|
||||||
|
}
|
||||||
|
boxplot(M2_JASA[,]/sqrt(2*truedim),names=colnames(M2_JASA),ylab='err',main='M2')
|
||||||
|
summary(M2_JASA[,])
|
||||||
|
|
|
@ -0,0 +1,65 @@
|
||||||
|
#initialize model parameterss
|
||||||
|
m<-100 #number of replications in simulation
|
||||||
|
dim<-12 #dimension of random variable X
|
||||||
|
truedim<-1#dimension of B=b
|
||||||
|
qs<-dim-truedim# dimension of orthogonal complement of B
|
||||||
|
b1=c(1,1,1,1,1,1,0,0,0,0,0,0)/sqrt(6)
|
||||||
|
b<-b1#B
|
||||||
|
P<-b%*%t(b)
|
||||||
|
sigma=0.5#error standard deviation
|
||||||
|
N<-100#sample size
|
||||||
|
K<-30#number of arbitrary starting values for curvilinear optimization
|
||||||
|
MAXIT<-30#maximal number of iterations in curvilinear search algorithm
|
||||||
|
##initailaize true covariancematrix of X
|
||||||
|
Sig<-mat.or.vec(dim,dim)
|
||||||
|
for (i in 1:dim){
|
||||||
|
for (j in 1:dim){
|
||||||
|
Sig[i,j]<-sigma^abs(i-j)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Sroot<-chol(Sig)
|
||||||
|
|
||||||
|
M3_JASA<-mat.or.vec(m,8)
|
||||||
|
colnames(M3_JASA)<-c('CVE1','CVE2','CVE3','meanMAVE','csMAVE','phd','sir','save')
|
||||||
|
for (i in 1:m){
|
||||||
|
#generate dat according to M3
|
||||||
|
dat<-creat_sample_nor_nonstand(b,N,cos,t(Sroot),sigma) # distribution
|
||||||
|
#est sample covariance matrix
|
||||||
|
Sig_est<-est_varmat(dat[,-1])
|
||||||
|
#est trace of sample covariance matrix
|
||||||
|
tr<-var_tr(Sig_est)
|
||||||
|
#CVE
|
||||||
|
#calculates Vhat_k for CVE1,CVE2, CVE3 for k=qs
|
||||||
|
CVE1<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.8),tr=tr),maxit = MAXIT,sclack_para = 0)
|
||||||
|
CVE2<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(2/3),tr=tr),maxit = MAXIT,sclack_para = 0)
|
||||||
|
CVE3<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.5),tr=tr),maxit = MAXIT,sclack_para = 0)
|
||||||
|
#calculate orthogonal complement of Vhat_k
|
||||||
|
#i.e. CVE1$est_base[,1:truedim] is estimator for B with dimension (dim times (dim-qs))
|
||||||
|
CVE1$est_base<-fill_base(CVE1$est_base)
|
||||||
|
CVE2$est_base<-fill_base(CVE2$est_base)
|
||||||
|
CVE3$est_base<-fill_base(CVE3$est_base)
|
||||||
|
# calculate distance between true B and estimated B
|
||||||
|
M3_JASA[i,1]<-subspace_dist(CVE1$est_base[,1:truedim],b)
|
||||||
|
M3_JASA[i,2]<-subspace_dist(CVE2$est_base[,1:truedim],b)
|
||||||
|
M3_JASA[i,3]<-subspace_dist(CVE3$est_base[,1:truedim],b)
|
||||||
|
#meanMAVE
|
||||||
|
mod_t2<-mave(Y~.,data=as.data.frame(dat),method = 'meanMAVE')
|
||||||
|
M3_JASA[i,4]<-subspace_dist(mod_t2$dir[[truedim]],b)
|
||||||
|
#csMAVE
|
||||||
|
mod_t<-mave(Y~.,data=as.data.frame(dat),method = 'csMAVE')
|
||||||
|
M3_JASA[i,5]<-subspace_dist(mod_t$dir[[truedim]],b)
|
||||||
|
#phd
|
||||||
|
test4<-summary(dr(Y~.,data=as.data.frame(dat),method='phdy',numdir=truedim+1))
|
||||||
|
M3_JASA[i,6]<-subspace_dist(orth(test4$evectors[,1:truedim]),b)
|
||||||
|
#sir
|
||||||
|
test5<-summary(dr(Y~.,data=as.data.frame(dat),method='sir',numdir=truedim+1))
|
||||||
|
M3_JASA[i,7]<-subspace_dist(orth(test5$evectors[,1:truedim]),b)
|
||||||
|
#save
|
||||||
|
test3<-summary(dr(Y~.,data=as.data.frame(dat),method='save',numdir=truedim+1))
|
||||||
|
M3_JASA[i,8]<-subspace_dist(orth(test3$evectors[,1:truedim]),b)
|
||||||
|
|
||||||
|
print(paste(i,paste('/',m)))
|
||||||
|
}
|
||||||
|
boxplot(M3_JASA[,]/sqrt(2*truedim),names=colnames(M3_JASA),ylab='err',main='M3')
|
||||||
|
summary(M3_JASA[,])
|
||||||
|
|
|
@ -0,0 +1,76 @@
|
||||||
|
#initialize model parameters
|
||||||
|
m<-100 #number of replications in simulation
|
||||||
|
dim<-12 #dimension of random variable X
|
||||||
|
truedim<-1#dimension of B=b
|
||||||
|
qs<-dim-truedim# dimension of orthogonal complement of B
|
||||||
|
b1=c(1,1,1,1,1,1,0,0,0,0,0,0)/sqrt(6)
|
||||||
|
b<-b1#B
|
||||||
|
P<-b%*%t(b)
|
||||||
|
sigma=0.5#error standard deviation
|
||||||
|
N<-100#sample size
|
||||||
|
K<-30#number of arbitrary starting values for curvilinear optimization
|
||||||
|
MAXIT<-30#maximal number of iterations in curvilinear search algorithm
|
||||||
|
|
||||||
|
para<-seq(0,1.5,0.5)#model parameter corresponding to lambda in M4
|
||||||
|
mix_prob<-seq(0.3,0.5,0.1) #model parameters corresponding to p_{mix}
|
||||||
|
M4_JASA<-array(data=NA,dim=c(length(para),length(mix_prob),m,5))
|
||||||
|
for(o in 1:length(mix_prob)){
|
||||||
|
for (u in 1:length(para)){
|
||||||
|
colnames(M4_JASA[u,o,,])<-c('CVE1','CVE2','CVE3','meanMAVE','csMAVE')
|
||||||
|
for (i in 1:m){
|
||||||
|
#generate dat according to M4 with p_{mix}=mix_prob[o] and lambda=para[u]
|
||||||
|
if(u==1){dat<-creat_sample(b,N,cos,sigma)}
|
||||||
|
else{dat<-creat_sample_noneliptic_gausmixture(b,N,cos,sigma,mix_prob[o],dispers_para = para[u])}
|
||||||
|
#est sample covariance matrix
|
||||||
|
Sig_est<-est_varmat(dat[,-1])
|
||||||
|
#est trace of sample covariance matrix
|
||||||
|
tr<-var_tr(Sig_est)
|
||||||
|
#CVE
|
||||||
|
#calculates Vhat_k for CVE1,CVE2, CVE3 for k=qs
|
||||||
|
CVE1<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.8),tr=tr),maxit = MAXIT,sclack_para = 0)
|
||||||
|
CVE2<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(2/3),tr=tr),maxit = MAXIT,sclack_para = 0)
|
||||||
|
CVE3<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.5),tr=tr),maxit = MAXIT,sclack_para = 0)
|
||||||
|
#calculate orthogonal complement of Vhat_k
|
||||||
|
#i.e. CVE1$est_base[,1:truedim] is estimator for B with dimension (dim times (dim-qs))
|
||||||
|
CVE1$est_base<-fill_base(CVE1$est_base)
|
||||||
|
CVE2$est_base<-fill_base(CVE2$est_base)
|
||||||
|
CVE3$est_base<-fill_base(CVE3$est_base)
|
||||||
|
# calculate distance between true B and estimated B
|
||||||
|
M4_JASA[u,o,i,1]<-subspace_dist(b,CVE1$est_base[,1:truedim])
|
||||||
|
M4_JASA[u,o,i,2]<-subspace_dist(b,CVE2$est_base[,1:truedim])
|
||||||
|
M4_JASA[u,o,i,3]<-subspace_dist(b,CVE3$est_base[,1:truedim])
|
||||||
|
#csMAVE
|
||||||
|
mod_t<-mave(Y~.,data=as.data.frame(dat),method = 'meanMAVE')
|
||||||
|
M4_JASA[u,o,i,4]<-subspace_dist(b,mod_t$dir[[truedim]])
|
||||||
|
#meanMAVE
|
||||||
|
mod_t2<-mave(Y~.,data=as.data.frame(dat),method = 'csMAVE')
|
||||||
|
M4_JASA[u,o,i,5]<-subspace_dist(b,mod_t2$dir[[truedim]])
|
||||||
|
|
||||||
|
}
|
||||||
|
print(paste(u,paste('/',length(para))))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
par(mfrow=c(3,4))
|
||||||
|
boxplot(Exp_cook_square[1,1,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','meanMAVE','csMAVE'),main=paste(paste('dispersion =',para[1]),paste('mixprob =',mix_prob[1]),sep=', '),ylim=c(0,1))
|
||||||
|
boxplot(Exp_cook_square[2,1,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[2]),paste('mixprob =',mix_prob[1]),sep=', '),ylim=c(0,1))
|
||||||
|
boxplot(Exp_cook_square[3,1,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[3]),paste('mixprob =',mix_prob[1]),sep=', '),ylim=c(0,1))
|
||||||
|
boxplot(Exp_cook_square[4,1,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[4]),paste('mixprob =',mix_prob[1]),sep=', '),ylim=c(0,1))
|
||||||
|
|
||||||
|
boxplot(Exp_cook_square[1,2,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[1]),paste('mixprob =',mix_prob[2]),sep=', '),ylim=c(0,1))
|
||||||
|
boxplot(Exp_cook_square[2,2,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[2]),paste('mixprob =',mix_prob[2]),sep=', '),ylim=c(0,1))
|
||||||
|
boxplot(Exp_cook_square[3,2,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[3]),paste('mixprob =',mix_prob[2]),sep=', '),ylim=c(0,1))
|
||||||
|
boxplot(Exp_cook_square[4,2,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[4]),paste('mixprob =',mix_prob[2]),sep=', '),ylim=c(0,1))
|
||||||
|
|
||||||
|
boxplot(Exp_cook_square[1,3,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[1]),paste('mixprob =',mix_prob[3]),sep=', '),ylim=c(0,1))
|
||||||
|
boxplot(Exp_cook_square[2,3,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[2]),paste('mixprob =',mix_prob[3]),sep=', '),ylim=c(0,1))
|
||||||
|
boxplot(Exp_cook_square[3,3,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[3]),paste('mixprob =',mix_prob[3]),sep=', '),ylim=c(0,1))
|
||||||
|
boxplot(Exp_cook_square[4,3,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[4]),paste('mixprob =',mix_prob[3]),sep=', '),ylim=c(0,1))
|
||||||
|
#boxplot(Exp_cook_square[5,3,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[5]),paste('mixprob =',mix_prob[3]),sep=', '),ylim=c(0,1))
|
||||||
|
|
||||||
|
boxplot(Exp_cook_square[1,4,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[1]),paste('mixprob =',mix_prob[4]),sep=', '),ylim=c(0,1))
|
||||||
|
boxplot(Exp_cook_square[2,4,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[2]),paste('mixprob =',mix_prob[4]),sep=', '),ylim=c(0,1))
|
||||||
|
boxplot(Exp_cook_square[3,4,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[3]),paste('mixprob =',mix_prob[4]),sep=', '),ylim=c(0,1))
|
||||||
|
boxplot(Exp_cook_square[4,4,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[4]),paste('mixprob =',mix_prob[4]),sep=', '),ylim=c(0,1))
|
||||||
|
|
||||||
|
par(mfrow=c(1,1))
|
|
@ -0,0 +1,64 @@
|
||||||
|
#initialize model parameters
|
||||||
|
m<-500#number of replications in simulation
|
||||||
|
dim<-12#dimension of random variable X
|
||||||
|
truedim<-1#dimension of B=b
|
||||||
|
qs<-dim-truedim# dimension of orthogonal complement of B
|
||||||
|
b1=c(1,1,1,1,1,1,0,0,0,0,0,0)/sqrt(6)
|
||||||
|
b<-b1#B
|
||||||
|
P<-b%*%t(b)
|
||||||
|
sigma=0.5#error standard deviation
|
||||||
|
N<-42#sample size
|
||||||
|
K<-70#number of arbitrary starting values for curvilinear optimization
|
||||||
|
MAXIT<-50#maximal number of iterations in curvilinear search algorithm
|
||||||
|
|
||||||
|
f_ln1<-function(x){return(2*log(abs(x)+1))}#link function
|
||||||
|
M5_JASA_lowN<-mat.or.vec(m,8)
|
||||||
|
colnames(M5_JASA_lowN)<-c('CVE1','CVE2','CVE3','meanMAVE','csMAVE','phd','sir','save')
|
||||||
|
for (i in 1:m){
|
||||||
|
#generate dat according to M5
|
||||||
|
dat<-creat_sample(b,N,f_ln1,sigma)
|
||||||
|
#est sample covariance matrix
|
||||||
|
Sig_est<-est_varmat(dat[,-1])
|
||||||
|
#est trace of sample covariance matrix
|
||||||
|
tr<-var_tr(Sig_est)
|
||||||
|
#CVE
|
||||||
|
#calculates Vhat_k for CVE1,CVE2, CVE3 for k=qs
|
||||||
|
CVE1<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.8),tr=tr),maxit = MAXIT,sclack_para = 0)
|
||||||
|
CVE2<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(2/3),tr=tr),maxit = MAXIT,sclack_para = 0)
|
||||||
|
CVE3<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.5),tr=tr),maxit = MAXIT,sclack_para = 0)
|
||||||
|
#calculate orthogonal complement of Vhat_k
|
||||||
|
#i.e. CVE1$est_base[,1:truedim] is estimator for B with dimension (dim times (dim-qs))
|
||||||
|
CVE1$est_base<-fill_base(CVE1$est_base)
|
||||||
|
CVE2$est_base<-fill_base(CVE2$est_base)
|
||||||
|
CVE3$est_base<-fill_base(CVE3$est_base)
|
||||||
|
# calculate distance between true B and estimated B
|
||||||
|
M5_JASA_lowN[i,1]<-subspace_dist(CVE1$est_base[,1:truedim],b)
|
||||||
|
M5_JASA_lowN[i,2]<-subspace_dist(CVE2$est_base[,1:truedim],b)
|
||||||
|
M5_JASA_lowN[i,3]<-subspace_dist(CVE3$est_base[,1:truedim],b)
|
||||||
|
#meanMAVE
|
||||||
|
mod_t2<-mave(Y~.,data=as.data.frame(dat),method = 'meanMAVE')
|
||||||
|
M5_JASA_lowN[i,4]<-subspace_dist(mod_t2$dir[[truedim]],b)
|
||||||
|
#csMAVE
|
||||||
|
mod_t<-mave(Y~.,data=as.data.frame(dat),method = 'csMAVE')
|
||||||
|
M5_JASA_lowN[i,5]<-subspace_dist(mod_t$dir[[truedim]],b)
|
||||||
|
#phd
|
||||||
|
test4<-summary(dr(Y~.,data=as.data.frame(dat),method='phdy',numdir=truedim+1))
|
||||||
|
M5_JASA_lowN[i,6]<-subspace_dist(orth(test4$evectors[,1:truedim]),b)
|
||||||
|
#sir
|
||||||
|
test5<-summary(dr(Y~.,data=as.data.frame(dat),method='sir',numdir=truedim+1))
|
||||||
|
M5_JASA_lowN[i,7]<-subspace_dist(orth(test5$evectors[,1:truedim]),b)
|
||||||
|
#save
|
||||||
|
test3<-summary(dr(Y~.,data=as.data.frame(dat),method='save',numdir=truedim+1))
|
||||||
|
M5_JASA_lowN[i,8]<-subspace_dist(orth(test3$evectors[,1:truedim]),b)
|
||||||
|
|
||||||
|
print(paste(i,paste('/',m)))
|
||||||
|
}
|
||||||
|
boxplot(M5_JASA_lowN[,]/sqrt(2*truedim),names=colnames(M5_JASA_lowN),ylab='err',main='M5')
|
||||||
|
summary(M5_JASA_lowN[,])
|
||||||
|
|
||||||
|
####
|
||||||
|
|
||||||
|
# par(mfrow=c(1,2))
|
||||||
|
# boxplot(M3_JASA_L[,]/sqrt(2*1),names=colnames(M3_JASA_L),ylab='err',main='M3')
|
||||||
|
# boxplot(M5_JASA_lowN[1:100,-c(4,5,6)]/sqrt(2*1),names=colnames(M5_JASA_lowN)[-c(4,5,6)],ylab='err',main='M5')
|
||||||
|
# par(mfrow=c(1,1))
|
|
@ -0,0 +1,374 @@
|
||||||
|
library("Matrix")
|
||||||
|
library("fields")
|
||||||
|
library('dr')
|
||||||
|
library('mvtnorm')
|
||||||
|
library(mgcv)
|
||||||
|
library(pracma)
|
||||||
|
library(RVAideMemoire)
|
||||||
|
library("MAVE")
|
||||||
|
library(pls)
|
||||||
|
library(LaplacesDemon)
|
||||||
|
library(earth)
|
||||||
|
library("latex2exp")
|
||||||
|
|
||||||
|
#################################################
|
||||||
|
|
||||||
|
##############################
|
||||||
|
|
||||||
|
################################
|
||||||
|
|
||||||
|
|
||||||
|
fsquare<-function(x){
|
||||||
|
ret<-0
|
||||||
|
for (h in 1:length(x)){ret<-ret+x[h]^2}
|
||||||
|
return(ret)
|
||||||
|
}
|
||||||
|
f_id<-function(x){return(x[1])}
|
||||||
|
vfun<-function(x)return(c(cos(x),sin(x)))
|
||||||
|
vfun_grad<-function(x)return(c(-sin(x),cos(x)))
|
||||||
|
f_true<-function(x,sigma){return(sigma^2 + cos(x)^2)}
|
||||||
|
###############################
|
||||||
|
##creat sample for M2, b is B matrix of model, N..sample size,
|
||||||
|
#f...link function, sigma..standard deviation of error term
|
||||||
|
#produces sample with X standard normal with dimension to columnlength of b an
|
||||||
|
# epsilon centered normal with standard deviation sigma
|
||||||
|
creat_sample<-function(b,N,f,sigma){
|
||||||
|
if (is.vector(b)==1){dim<-length(b)}
|
||||||
|
if (is.vector(b)==0){dim<-length(b[,1])}
|
||||||
|
|
||||||
|
X<-mat.or.vec(N,dim)
|
||||||
|
Y<-mat.or.vec(N,1)
|
||||||
|
for (i in 1:N){
|
||||||
|
X[i,]<-rnorm(dim,0,1)
|
||||||
|
Y[i]<-f(t(b)%*%X[i,])+rnorm(1,0,sigma)
|
||||||
|
}
|
||||||
|
dat<-cbind(Y,X)
|
||||||
|
return(dat)
|
||||||
|
}
|
||||||
|
#####
|
||||||
|
creat_b<-function(dim,n){
|
||||||
|
temp<-mat.or.vec(dim,n)
|
||||||
|
|
||||||
|
if (n>1){
|
||||||
|
while (rankMatrix(temp,method = 'qr')< min(dim,n)){
|
||||||
|
for (i in 1:n){
|
||||||
|
temp[,i]<-rnorm(dim,0,1)
|
||||||
|
temp[,i]<-temp[,i]/sqrt(sum(temp[,i]^2))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (n==1) {temp<-rnorm(dim,0,1)
|
||||||
|
temp<-temp/sqrt(sum(temp^2))}
|
||||||
|
return(temp)
|
||||||
|
}
|
||||||
|
###
|
||||||
|
|
||||||
|
### creat sample for M1,M3
|
||||||
|
## b is B matrix of model, N..sample size,
|
||||||
|
#f...link function, sigma..standard deviation of error term
|
||||||
|
#produces sample with X normal with Covariancematrix SS' with dimension to columnlength of b an
|
||||||
|
# epsilon centered normal with standard deviation sigma
|
||||||
|
creat_sample_nor_nonstand<-function(b,N,f,S,sigma){
|
||||||
|
#Var(x)= SS'
|
||||||
|
if (is.vector(b)==1){dim<-length(b)}
|
||||||
|
if (is.vector(b)==0){dim<-length(b[,1])}
|
||||||
|
|
||||||
|
X<-mat.or.vec(N,dim)
|
||||||
|
Y<-mat.or.vec(N,1)
|
||||||
|
for (i in 1:N){
|
||||||
|
X[i,]<-rnorm(dim,0,1)%*%t(S)
|
||||||
|
Y[i]<-f(t(b)%*%X[i,])+rnorm(1,0,sigma)
|
||||||
|
}
|
||||||
|
dat<-cbind(Y,X)
|
||||||
|
return(dat)
|
||||||
|
}
|
||||||
|
####
|
||||||
|
|
||||||
|
####
|
||||||
|
|
||||||
|
###
|
||||||
|
####
|
||||||
|
|
||||||
|
####
|
||||||
|
#### creat sample for M4
|
||||||
|
##creat sample for M2, b is B matrix of model, N..sample size,
|
||||||
|
#f...link function, sigma..standard deviation of error term
|
||||||
|
#produces sample with X Gausian mixture
|
||||||
|
#(i.e. X ~ (Zrep(-1,dim) + (1_Z)rep(1,dim))*dispers_para + N(0, I_p) and Z~B(pi)
|
||||||
|
#with dimension to columnlength of b an
|
||||||
|
# epsilon centered normal with standard deviation sigma
|
||||||
|
creat_sample_noneliptic_gausmixture<-function(b,N,f,sigma,pi,dispers_para=1){
|
||||||
|
if (is.vector(b)==1){dim<-length(b)}
|
||||||
|
if (is.vector(b)==0){dim<-length(b[,1])}
|
||||||
|
|
||||||
|
X<-mat.or.vec(N,dim)
|
||||||
|
Y<-mat.or.vec(N,1)
|
||||||
|
for (i in 1:N){
|
||||||
|
p<-rbinom(1,1,pi)
|
||||||
|
X[i,]<-(-rep(1,dim)*p+rep(1,dim)*(1-p))*dispers_para+rnorm(dim,0,1)
|
||||||
|
Y[i]<-f(t(b)%*%X[i,])+rnorm(1,0,sigma)
|
||||||
|
}
|
||||||
|
dat<-cbind(Y,X)
|
||||||
|
return(dat)
|
||||||
|
}
|
||||||
|
###
|
||||||
|
###############################
|
||||||
|
#estimates covariance matrix by ML estimate
|
||||||
|
# X...n times dim data matrix with rows corresponding to X_i
|
||||||
|
est_varmat<-function(X){
|
||||||
|
dim<-length(X[1,])
|
||||||
|
Sig<-mat.or.vec(dim,dim)#Cov(X)
|
||||||
|
X<-scale(X,center = T,scale = F)
|
||||||
|
Sig<-t(X)%*%X/length(X[,1])
|
||||||
|
return(Sig)
|
||||||
|
}
|
||||||
|
####
|
||||||
|
# normalizes a vector x such that it sums to 1
|
||||||
|
column_normalize<-function(x){return(x/sum(x))}
|
||||||
|
####
|
||||||
|
#squared 2-norm of a vector x
|
||||||
|
norm2<-function(x){return(sum(x^2))}
|
||||||
|
###
|
||||||
|
# fills up the matrix estb (dim times q) (dim > q) to a dim times dim orthonormal matrix
|
||||||
|
fill_base<-function(estb){
|
||||||
|
dim<-length(estb[,1])
|
||||||
|
k<-length(estb[1,])
|
||||||
|
P<-(diag(rep(1,dim))-estb%*%t(estb))
|
||||||
|
rk<-0
|
||||||
|
while (rk < dim){
|
||||||
|
tmp1<-P%*%creat_b(dim,1)
|
||||||
|
nor<-sqrt(sum(tmp1^2))
|
||||||
|
if(nor>10^(-4)){
|
||||||
|
estb<-cbind(tmp1/nor,estb)
|
||||||
|
rkn<-rankMatrix(estb,method = 'qr')
|
||||||
|
if (rkn > rk){P<-(diag(rep(1,dim))-estb%*%t(estb))
|
||||||
|
rk<-rkn}
|
||||||
|
}
|
||||||
|
if(nor <=10^(-4)){rk<-0}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return(estb)
|
||||||
|
}
|
||||||
|
##################
|
||||||
|
###new stiefel optimization
|
||||||
|
#draws from the uniform measure of Stiefel(p,k)
|
||||||
|
#returns a p times k dimensional semiorthogonal matrix
|
||||||
|
stiefl_startval<-function(p,k){return(qr.Q(qr(matrix(rnorm(p*k,0,1),p,k))))}
|
||||||
|
##### chooses shifting points
|
||||||
|
#auxiliary function for stielfel_opt
|
||||||
|
# X... N times dim data matrix
|
||||||
|
#S... sample covariance matrix
|
||||||
|
#0 < p <=1 fraction of data point used as shifting points
|
||||||
|
# if p=1 all data points used as shifting points
|
||||||
|
q_ind<-function(X,S,p){
|
||||||
|
dim<-length(X[1,])
|
||||||
|
N<-length(X[,1])
|
||||||
|
if (p < 1){
|
||||||
|
Sinv<-solve(S)
|
||||||
|
ind<-c()
|
||||||
|
bound<-qchisq(p,dim)
|
||||||
|
for (j in 1:N){if(t(X[j,])%*%Sinv%*%X[j,]<=bound){ind<-c(ind,j)}}#maybe demean
|
||||||
|
q<-length(ind)
|
||||||
|
if (q<=2){
|
||||||
|
q<-N
|
||||||
|
ind<-seq(1,N)}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
q<-N
|
||||||
|
ind<-seq(1,N)
|
||||||
|
}
|
||||||
|
ret<-list(q,ind)
|
||||||
|
names(ret)<-c('q','ind')
|
||||||
|
return(ret)
|
||||||
|
}
|
||||||
|
####
|
||||||
|
#calculates the trace of a quadratic matrix S
|
||||||
|
var_tr<-function(x){return(sum(diag(x)))}
|
||||||
|
####
|
||||||
|
#chooses bandwith h according to formula in paper
|
||||||
|
#dim...dimension of X vector
|
||||||
|
#k... row dim of V (dim times q matrix) corresponding to a basis of orthogonal complement of B in model
|
||||||
|
# N...sample size
|
||||||
|
#nObs... nObs in bandwith formula
|
||||||
|
#tr...trace of sample covariance matrix of X
|
||||||
|
choose_h_2<-function(dim,k,N,nObs,tr){
|
||||||
|
#h1<-qchisq(nObs/N,2*(dim-k))
|
||||||
|
h2<-qchisq((nObs-1)/(N-1),(dim-k))
|
||||||
|
return(h2*2*tr/dim)
|
||||||
|
}
|
||||||
|
####
|
||||||
|
#auxiliary function for stiefel_opt
|
||||||
|
#X... data matrix of random variable X
|
||||||
|
#q,ind... output of function q_ind
|
||||||
|
Xl_fun<-function(X,q,ind){
|
||||||
|
N<-length(X[,1])
|
||||||
|
Xl<-(kronecker(rep(1,q),X)-kronecker(X[ind,],rep(1,N)))
|
||||||
|
return(Xl)
|
||||||
|
}
|
||||||
|
###########
|
||||||
|
# evaluates L(V) and returns L_n(V),(L_tilde_n(V,X_i))_{i=1,..,n} and grad_V L_n(V) (p times k)
|
||||||
|
# V... (dim times q) matrix
|
||||||
|
# Xl... output of Xl_fun
|
||||||
|
# dtemp...vector with pairwise distances |X_i - X_j|
|
||||||
|
# q...output of q_ind function
|
||||||
|
# Y... vector with N Y_i values
|
||||||
|
# if grad=T, gradient of L(V) also returned
|
||||||
|
LV<-function(V,Xl,dtemp,h,q,Y,grad=T){
|
||||||
|
N<-length(Y)
|
||||||
|
if(is.vector(V)){k<-1}
|
||||||
|
else{k<-length(V[1,])}
|
||||||
|
Xlv<-Xl%*%V
|
||||||
|
d<-dtemp-((Xlv^2)%*%rep(1,k))
|
||||||
|
w<-dnorm(d/h)/dnorm(0)
|
||||||
|
w<-matrix(w,N,q)
|
||||||
|
w<-apply(w,2,column_normalize)
|
||||||
|
mY<-t(w)%*%Y
|
||||||
|
sig<-t(w)%*%(Y^2)-(mY)^2 # \tilde{L}_n(V, s_0) mit s_0 coresponds to q_ind
|
||||||
|
if(grad==T){
|
||||||
|
grad<-mat.or.vec(dim,k)
|
||||||
|
tmp1<-(kronecker(sig,rep(1,N))-(as.vector(kronecker(rep(1,q),Y))-kronecker(mY,rep(1,N)))^2)
|
||||||
|
if(k==1){
|
||||||
|
grad_d<- -2*Xl*as.vector(Xlv)
|
||||||
|
grad<-(1/h^2)*(1/q)*t(grad_d*as.vector(d)*as.vector(w))%*%tmp1
|
||||||
|
}
|
||||||
|
else{
|
||||||
|
for (j in 1:(k)){
|
||||||
|
grad_d<- -2*Xl*as.vector(Xlv[,j])
|
||||||
|
grad[,j]<- (1/h^2)*(1/q)*t(grad_d*as.vector(d)*as.vector(w))%*%tmp1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ret<-list(mean(sig),sig,grad)
|
||||||
|
names(ret)<-c('var','sig','grad')
|
||||||
|
}
|
||||||
|
else{
|
||||||
|
ret<-list(mean(sig),sig)
|
||||||
|
names(ret)<-c('var','sig')
|
||||||
|
}
|
||||||
|
|
||||||
|
return(ret)
|
||||||
|
}
|
||||||
|
#### performs stiefle optimization of argmin_{V : V'V=I_k} L_n(V)
|
||||||
|
#through curvilinear search with k0 starting values drawn uniformly on stiefel maniquefold
|
||||||
|
#dat...(N times dim+1) matrix with first column corresponding to Y values, the other columns
|
||||||
|
#consists of X data matrix, (i.e. dat=cbind(Y,X))
|
||||||
|
#h... bandwidth
|
||||||
|
#k...row dimension of V that is calculated, corresponds to dimension of orthogonal complement of B
|
||||||
|
#k0... number of arbitrary starting values
|
||||||
|
#p...fraction of data points used as shifting point
|
||||||
|
#maxit... number of maximal iterations in curvilinear search
|
||||||
|
#nObs.. nObs parameter for choosing bandwidth if no h is supplied
|
||||||
|
#lambda_0...initial stepsize
|
||||||
|
#tol...tolerance for stoping iterations
|
||||||
|
#sclack_para...if relative improvment is worse than sclack_para the stepsize is reduced
|
||||||
|
#output:
|
||||||
|
#est_base...Vhat_k= argmin_V:V'V=I_k L_n(V) a (dim times k) matrix where dim is row-dimension of X data matrix
|
||||||
|
#var...value of L_n(Vhat_k)
|
||||||
|
#aov_dat... (L_tilde_n(Vhat_k,X_i))_{i=1,..,N}
|
||||||
|
#count...number of iterations
|
||||||
|
#h...bandwidth
|
||||||
|
#count2...number of times sclack_para is active
|
||||||
|
stiefl_opt<-function(dat,h=NULL,k,k0=30,p=1,maxit=50,nObs=sqrt(length(dat[,1])),lambda_0=1,tol=10^(-3),sclack_para=0){
|
||||||
|
Y<-dat[,1]
|
||||||
|
X<-dat[,-1]
|
||||||
|
N<-length(Y)
|
||||||
|
dim<-length(X[1,])
|
||||||
|
S<-est_varmat(X)
|
||||||
|
if(p<1){
|
||||||
|
|
||||||
|
tmp1<-q_ind(X,S,p)
|
||||||
|
q<-tmp1$q
|
||||||
|
ind<-tmp1$ind
|
||||||
|
}
|
||||||
|
else{
|
||||||
|
q<-N
|
||||||
|
ind<-1:N
|
||||||
|
}
|
||||||
|
Xl<-(kronecker(rep(1,q),X)-kronecker(X[ind,],rep(1,N)))
|
||||||
|
dtemp<-apply(Xl,1,norm2)
|
||||||
|
tr<-var_tr(S)
|
||||||
|
if(is.null(h)){h<-choose_h(dim,k,N,nObs,tr)}
|
||||||
|
best<-exp(10000)
|
||||||
|
Vend<-mat.or.vec(dim,k)
|
||||||
|
sig<-mat.or.vec(q,1)
|
||||||
|
for(u in 1:k0){
|
||||||
|
Vnew<-Vold<-stiefl_startval(dim,k)
|
||||||
|
#print(Vold)
|
||||||
|
#print(LV(Vold,Xl,dtemp,h,q,Y)$var)
|
||||||
|
Lnew<-Lold<-exp(10000)
|
||||||
|
lambda<-lambda_0
|
||||||
|
err<-10
|
||||||
|
count<-0
|
||||||
|
count2<-0
|
||||||
|
while(err>tol&count<maxit){
|
||||||
|
#print(Vold)
|
||||||
|
tmp2<-LV(Vold,Xl,dtemp,h,q,Y)
|
||||||
|
G<-tmp2$grad
|
||||||
|
Lold<-tmp2$var
|
||||||
|
W<-G%*%t(Vold)-Vold%*%t(G)
|
||||||
|
stepsize<-lambda#/(2*sqrt(count+1))
|
||||||
|
Vnew<-solve(diag(1,dim)+stepsize*W)%*%(diag(1,dim)-stepsize*W)%*%Vold
|
||||||
|
# print(Vnew)
|
||||||
|
tmp3<-LV(Vnew,Xl,dtemp,h,q,Y,grad=F)
|
||||||
|
Lnew<-tmp3$var
|
||||||
|
err<-sqrt(sum((Vold%*%t(Vold)-Vnew%*%t(Vnew))^2))/sqrt(2*k)#sqrt(sum(tmp3$grad^2))/(dim*k)#
|
||||||
|
#print(err)
|
||||||
|
if(((Lnew-Lold)/Lold) > sclack_para){#/(count+1)^(0.5)
|
||||||
|
lambda=lambda/2
|
||||||
|
err<-10
|
||||||
|
count2<-count2+1
|
||||||
|
count<-count-1
|
||||||
|
Vnew<-Vold #!!!!!
|
||||||
|
|
||||||
|
}
|
||||||
|
Vold<-Vnew
|
||||||
|
count<-count+1
|
||||||
|
#print(count)
|
||||||
|
}
|
||||||
|
if(best>Lnew){
|
||||||
|
best<-Lnew
|
||||||
|
Vend<-Vnew
|
||||||
|
sig<-tmp3$sig
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ret<-list(Vend,best,sig,count,h,count2)
|
||||||
|
names(ret)<-c('est_base','var','aov_dat','count','h','count2')
|
||||||
|
return(ret)
|
||||||
|
}
|
||||||
|
##### calculates distance between two subspaces
|
||||||
|
#i.e. |b1%*%(b1%*%b1')^{-1}%*%b1' - b2%*%(b2%*%b2')^{-1}%*%b2'|_2
|
||||||
|
subspace_dist<-function(b1,b2){
|
||||||
|
#b1<-orth(b1)
|
||||||
|
#b2<-orth(b2)
|
||||||
|
P1<-b1%*%solve(t(b1)%*%b1)%*%t(b1)
|
||||||
|
P2<-b2%*%solve(t(b2)%*%b2)%*%t(b2)
|
||||||
|
return(sqrt(sum((P1-P2)^2)))
|
||||||
|
}
|
||||||
|
####performs local linaer regression
|
||||||
|
#x...datapoint at where estimated function should be evaluated
|
||||||
|
#h...bandwidth
|
||||||
|
#dat..(N times dim+1) matrix with first column corresponding to Y values, the other columns
|
||||||
|
#consists of X data matrix, (i.e. dat=cbind(Y,X))
|
||||||
|
#beta...projection matrix
|
||||||
|
local_linear<-function(x,h,dat,beta){
|
||||||
|
Y<-dat[,1]
|
||||||
|
X<-dat[,-1]
|
||||||
|
N<-length(Y)
|
||||||
|
X<-X%*%beta
|
||||||
|
x<-x%*%beta#beta%*%x
|
||||||
|
D_mat<-cbind(rep(1,N),X)
|
||||||
|
if (is.vector(X)){
|
||||||
|
dim<-1
|
||||||
|
d<-abs(X-rep(x,N))
|
||||||
|
}
|
||||||
|
else{
|
||||||
|
dim<-length(X[1,])
|
||||||
|
d<-sqrt(apply(X-t(matrix(rep(x,N),dim,N)),1,norm2))
|
||||||
|
}
|
||||||
|
K<-diag(dnorm(d/h)/dnorm(0))
|
||||||
|
pred<-c(1,x)%*%solve(t(D_mat)%*%K%*%D_mat)%*%t(D_mat)%*%K%*%Y
|
||||||
|
return(pred)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,174 @@
|
||||||
|
|
||||||
|
#' Euclidean vector norm (2-norm)
|
||||||
|
#'
|
||||||
|
#' @param x Numeric vector
|
||||||
|
#' @returns Numeric
|
||||||
|
norm2 <- function(x) { return(sum(x^2)) }
|
||||||
|
|
||||||
|
#' Samples uniform from the Stiefel Manifold
|
||||||
|
#'
|
||||||
|
#' @param p row dim.
|
||||||
|
#' @param q col dim.
|
||||||
|
#' @returns `(p, q)` semi-orthogonal matrix
|
||||||
|
rStiefl <- function(p, q) {
|
||||||
|
return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q))))
|
||||||
|
}
|
||||||
|
|
||||||
|
#' Matrix Trace
|
||||||
|
#'
|
||||||
|
#' @param M Square matrix
|
||||||
|
#' @returns Trace \eqn{Tr(M)}
|
||||||
|
Tr <- function(M) {
|
||||||
|
return(sum(diag(M)))
|
||||||
|
}
|
||||||
|
|
||||||
|
#' Null space basis of given matrix `B`
|
||||||
|
#'
|
||||||
|
#' @param B `(p, q)` matrix
|
||||||
|
#' @returns Semi-orthogonal `(p, p - q)` matrix `Q` spaning the null space of `B`
|
||||||
|
null <- function(M) {
|
||||||
|
tmp <- qr(M)
|
||||||
|
set <- if(tmp$rank == 0L) seq_len(ncol(M)) else -seq_len(tmp$rank)
|
||||||
|
return(qr.Q(tmp, complete = TRUE)[, set, drop = FALSE])
|
||||||
|
}
|
||||||
|
|
||||||
|
####
|
||||||
|
#chooses bandwith h according to formula in paper
|
||||||
|
#dim...dimension of X vector
|
||||||
|
#k... row dim of V (dim times q matrix) corresponding to a basis of orthogonal complement of B in model
|
||||||
|
# N...sample size
|
||||||
|
#nObs... nObs in bandwith formula
|
||||||
|
#tr...trace of sample covariance matrix of X
|
||||||
|
estimateBandwidth<-function(X, k, nObs) {
|
||||||
|
n <- nrow(X)
|
||||||
|
p <- ncol(X)
|
||||||
|
|
||||||
|
X_centered <- scale(X, center = TRUE, scale = FALSE)
|
||||||
|
Sigma <- (1 / n) * t(X_centered) %*% X_centered
|
||||||
|
|
||||||
|
quantil <- qchisq((nObs - 1) / (n - 1), k)
|
||||||
|
return(2 * quantil * Tr(Sigma) / p)
|
||||||
|
}
|
||||||
|
|
||||||
|
###########
|
||||||
|
# evaluates L(V) and returns L_n(V),(L_tilde_n(V,X_i))_{i=1,..,n} and grad_V L_n(V) (p times k)
|
||||||
|
# V... (dim times q) matrix
|
||||||
|
# Xl... output of Xl_fun
|
||||||
|
# dtemp...vector with pairwise distances |X_i - X_j|
|
||||||
|
# q...output of q_ind function
|
||||||
|
# Y... vector with N Y_i values
|
||||||
|
# if grad=T, gradient of L(V) also returned
|
||||||
|
LV <- function(V, Xl, dtemp, h, q, Y, grad = TRUE) {
|
||||||
|
N <- length(Y)
|
||||||
|
if (is.vector(V)) { k <- 1 }
|
||||||
|
else { k <- length(V[1,]) }
|
||||||
|
Xlv <- Xl %*% V
|
||||||
|
d <- dtemp - ((Xlv^2) %*% rep(1, k))
|
||||||
|
w <- dnorm(d / h) / dnorm(0)
|
||||||
|
w <- matrix(w, N, q)
|
||||||
|
w <- apply(w, 2, function(x) { x / sum(x) })
|
||||||
|
y1 <- t(w) %*% Y
|
||||||
|
y2 <- t(w) %*% (Y^2)
|
||||||
|
sig <- y2 - y1^2
|
||||||
|
|
||||||
|
result <- list(var = mean(sig), sig = sig)
|
||||||
|
if (grad == TRUE) {
|
||||||
|
tmp1 <- (kronecker(sig, rep(1, N)) - (as.vector(kronecker(rep(1, q), Y)) - kronecker(y1, rep(1, N)))^2)
|
||||||
|
if (k == 1) {
|
||||||
|
grad_d <- -2 * Xl * as.vector(Xlv)
|
||||||
|
grad <- (1 / h^2) * (1 / q) * t(grad_d * as.vector(d) * as.vector(w)) %*% tmp1
|
||||||
|
} else {
|
||||||
|
grad <- matrix(0, nrow(V), ncol(V))
|
||||||
|
for (j in 1:k) {
|
||||||
|
grad_d <- -2 * Xl * as.vector(Xlv[ ,j])
|
||||||
|
grad[ ,j] <- (1 / h^2) * (1 / q) * t(grad_d * as.vector(d) * as.vector(w)) %*% tmp1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result$grad = grad
|
||||||
|
}
|
||||||
|
|
||||||
|
return(result)
|
||||||
|
}
|
||||||
|
#### performs stiefle optimization of argmin_{V : V'V=I_k} L_n(V)
|
||||||
|
#through curvilinear search with k0 starting values drawn uniformly on stiefel maniquefold
|
||||||
|
#dat...(N times dim+1) matrix with first column corresponding to Y values, the other columns
|
||||||
|
#consists of X data matrix, (i.e. dat=cbind(Y,X))
|
||||||
|
#h... bandwidth
|
||||||
|
#k...row dimension of V that is calculated, corresponds to dimension of orthogonal complement of B
|
||||||
|
#k0... number of arbitrary starting values
|
||||||
|
#p...fraction of data points used as shifting point
|
||||||
|
#maxIter... number of maximal iterations in curvilinear search
|
||||||
|
#nObs.. nObs parameter for choosing bandwidth if no h is supplied
|
||||||
|
#lambda_0...initial stepsize
|
||||||
|
#tol...tolerance for stoping iterations
|
||||||
|
#sclack_para...if relative improvment is worse than sclack_para the stepsize is reduced
|
||||||
|
#output:
|
||||||
|
#est_base...Vhat_k= argmin_V:V'V=I_k L_n(V) a (dim times k) matrix where dim is row-dimension of X data matrix
|
||||||
|
#var...value of L_n(Vhat_k)
|
||||||
|
#aov_dat... (L_tilde_n(Vhat_k,X_i))_{i=1,..,N}
|
||||||
|
#count...number of iterations
|
||||||
|
#h...bandwidth
|
||||||
|
cve_legacy <- function(
|
||||||
|
X, Y, k,
|
||||||
|
nObs = sqrt(nrow(X)),
|
||||||
|
tauInitial = 1.0,
|
||||||
|
tol = 1e-3,
|
||||||
|
slack = 0,
|
||||||
|
maxIter = 50L,
|
||||||
|
attempts = 10L
|
||||||
|
) {
|
||||||
|
# get dimensions
|
||||||
|
n <- nrow(X)
|
||||||
|
p <- ncol(X)
|
||||||
|
q <- p - k
|
||||||
|
|
||||||
|
Xl <- kronecker(rep(1, n), X) - kronecker(X, rep(1, n))
|
||||||
|
Xd <- apply(Xl, 1, norm2)
|
||||||
|
I_p <- diag(1, p)
|
||||||
|
# estimate bandwidth
|
||||||
|
h <- estimateBandwidth(X, k, nObs)
|
||||||
|
|
||||||
|
Lbest <- Inf
|
||||||
|
Vend <- mat.or.vec(p, q)
|
||||||
|
|
||||||
|
for (. in 1:attempts) {
|
||||||
|
Vnew <- Vold <- rStiefl(p, q)
|
||||||
|
Lnew <- Lold <- exp(10000)
|
||||||
|
tau <- tauInitial
|
||||||
|
|
||||||
|
error <- Inf
|
||||||
|
count <- 0
|
||||||
|
while (error > tol & count < maxIter) {
|
||||||
|
|
||||||
|
tmp <- LV(Vold, Xl, Xd, h, n, Y)
|
||||||
|
G <- tmp$grad
|
||||||
|
Lold <- tmp$var
|
||||||
|
|
||||||
|
W <- tau * (G %*% t(Vold) - Vold %*% t(G))
|
||||||
|
Vnew <- solve(I_p + W) %*% (I_p - W) %*% Vold
|
||||||
|
|
||||||
|
Lnew <- LV(Vnew, Xl, Xd, h, n, Y, grad = FALSE)$var
|
||||||
|
|
||||||
|
if ((Lnew - Lold) > slack * Lold) {
|
||||||
|
tau = tau / 2
|
||||||
|
error <- Inf
|
||||||
|
} else {
|
||||||
|
error <- norm(Vold %*% t(Vold) - Vnew %*% t(Vnew), "F") / sqrt(2 * k)
|
||||||
|
Vold <- Vnew
|
||||||
|
}
|
||||||
|
count <- count + 1
|
||||||
|
}
|
||||||
|
|
||||||
|
if (Lbest > Lnew) {
|
||||||
|
Lbest <- Lnew
|
||||||
|
Vend <- Vnew
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return(list(
|
||||||
|
loss = Lbest,
|
||||||
|
V = Vend,
|
||||||
|
B = null(Vend),
|
||||||
|
h = h
|
||||||
|
))
|
||||||
|
}
|
|
@ -0,0 +1,318 @@
|
||||||
|
//
|
||||||
|
// Usage:
|
||||||
|
// ~$ R -e "library(Rcpp); Rcpp::sourceCpp('cve_V1.cpp')"
|
||||||
|
//
|
||||||
|
|
||||||
|
// only `RcppArmadillo.h` which includes `Rcpp.h`
|
||||||
|
#include <RcppArmadillo.h>
|
||||||
|
|
||||||
|
// through the depends attribute `Rcpp` is tolled to create
|
||||||
|
// hooks for `RcppArmadillo` needed by the build process.
|
||||||
|
//
|
||||||
|
// [[Rcpp::depends(RcppArmadillo)]]
|
||||||
|
|
||||||
|
// required for `R::qchisq()` used in `estimateBandwidth()`
|
||||||
|
#include <Rmath.h>
|
||||||
|
|
||||||
|
//' Estimated bandwidth for CVE.
|
||||||
|
//'
|
||||||
|
//' Estimates a propper bandwidth \code{h} according
|
||||||
|
//' \deqn{h = \chi_{p-q}^{-1}\left(\frac{nObs - 1}{n-1}\right)}\frac{2 tr(\Sigma)}{p}}{%
|
||||||
|
//' h = qchisq( (nObs - 1)/(n - 1), p - q ) 2 tr(Sigma) / p}
|
||||||
|
//'
|
||||||
|
//' @param X data matrix of dimension (n x p) with n data points X_i of dimension
|
||||||
|
//' q. Therefor each row represents a datapoint of dimension p.
|
||||||
|
//' @param k Guess for rank(B).
|
||||||
|
//' @param nObs Ether numeric of a function. If specified as numeric value
|
||||||
|
//' its used in the computation of the bandwidth directly. If its a function
|
||||||
|
//' `nObs` is evaluated as \code{nObs(nrow(x))}. The default behaviou if not
|
||||||
|
//' supplied at all is to use \code{nObs <- nrow(x)^0.5}.
|
||||||
|
//'
|
||||||
|
//' @seealso [qchisq()]
|
||||||
|
//'
|
||||||
|
//' @export
|
||||||
|
// [[Rcpp::export]]
|
||||||
|
double estimateBandwidth(const arma::mat& X, arma::uword k, double nObs) {
|
||||||
|
using namespace arma;
|
||||||
|
|
||||||
|
uword n = X.n_rows; // nr samples
|
||||||
|
uword p = X.n_cols; // dimension of rand. var. `X`
|
||||||
|
|
||||||
|
// column mean
|
||||||
|
mat M = mean(X);
|
||||||
|
// center `X`
|
||||||
|
mat C = X.each_row() - M;
|
||||||
|
// trace of covariance matrix, `traceSigma = Tr(C' C)`
|
||||||
|
double traceSigma = accu(C % C);
|
||||||
|
// compute estimated bandwidth
|
||||||
|
double qchi2 = R::qchisq((nObs - 1.0) / (n - 1), static_cast<double>(k), 1, 0);
|
||||||
|
|
||||||
|
return 2.0 * qchi2 * traceSigma / (p * n);
|
||||||
|
}
|
||||||
|
|
||||||
|
//' Random element from Stiefel Manifold `S(p, q)`.
|
||||||
|
//'
|
||||||
|
//' Draws an element of \eqn{S(p, q)} which is the Stiefel Manifold.
|
||||||
|
//' This is done by taking the Q-component of the QR decomposition
|
||||||
|
//' from a `(p, q)` Matrix with independent standart normal entries.
|
||||||
|
//' As a semi-orthogonal Matrix the result `V` satisfies \eqn{V'V = I_q}.
|
||||||
|
//'
|
||||||
|
//' @param p Row dimension
|
||||||
|
//' @param q Column dimension
|
||||||
|
//'
|
||||||
|
//' @returns Matrix of dim `(p, q)`.
|
||||||
|
//'
|
||||||
|
//' @seealso <https://en.wikipedia.org/wiki/Stiefel_manifold>
|
||||||
|
//'
|
||||||
|
//' @export
|
||||||
|
// [[Rcpp::export]]
|
||||||
|
arma::mat rStiefel(arma::uword p, arma::uword q) {
|
||||||
|
arma::mat Q, R;
|
||||||
|
arma::qr_econ(Q, R, arma::randn<arma::mat>(p, q));
|
||||||
|
return Q;
|
||||||
|
}
|
||||||
|
|
||||||
|
double gradient(const arma::mat& X,
|
||||||
|
const arma::mat& X_diff,
|
||||||
|
const arma::mat& Y,
|
||||||
|
const arma::mat& Y_rep,
|
||||||
|
const arma::mat& V,
|
||||||
|
const double h,
|
||||||
|
arma::mat* G = 0
|
||||||
|
) {
|
||||||
|
using namespace arma;
|
||||||
|
|
||||||
|
uword n = X.n_rows;
|
||||||
|
uword p = X.n_cols;
|
||||||
|
|
||||||
|
// orthogonal projection matrix `Q = I - VV'` for dist computation
|
||||||
|
mat Q = -(V * V.t());
|
||||||
|
Q.diag() += 1;
|
||||||
|
// calc pairwise distances as `D` with `D(i, j) = d_i(V, X_j)`
|
||||||
|
vec D_vec = sum(square(X_diff * Q), 1);
|
||||||
|
mat D = reshape(D_vec, n, n);
|
||||||
|
// calc weights as `W` with `W(i, j) = w_i(V, X_j)`
|
||||||
|
mat W = exp(D / (-2.0 * h));
|
||||||
|
// column-wise normalization via 1-norm
|
||||||
|
W = normalise(W, 1);
|
||||||
|
vec W_vec = vectorise(W);
|
||||||
|
// weighted `Y` means (first and second order)
|
||||||
|
vec y1 = W.t() * Y;
|
||||||
|
vec y2 = W.t() * square(Y);
|
||||||
|
// loss for each `X_i`, meaning `L(i) = L_n(V, X_i)`
|
||||||
|
vec L = y2 - square(y1);
|
||||||
|
// `loss = L_n(V)`
|
||||||
|
double loss = mean(L);
|
||||||
|
// check if gradient as output variable is set
|
||||||
|
if (G != 0) {
|
||||||
|
// `G = grad(L_n(V))` a.k.a. gradient of `L` with respect to `V`
|
||||||
|
vec scale = (repelem(L, n, 1) - square(Y_rep - repelem(y1, n, 1))) % W_vec % D_vec;
|
||||||
|
mat X_diff_scale = X_diff.each_col() % scale;
|
||||||
|
(*G) = X_diff_scale.t() * X_diff * V;
|
||||||
|
(*G) *= -2.0 / (h * h * n);
|
||||||
|
}
|
||||||
|
|
||||||
|
return loss;
|
||||||
|
}
|
||||||
|
|
||||||
|
//' Stiefel Optimization.
|
||||||
|
//'
|
||||||
|
//' Stiefel Optimization for \code{V} given a dataset \code{X} and responces
|
||||||
|
//' \code{Y} for the model \deqn{Y\sim g(B'X) + \epsilon}{Y ~ g(B'X) + epsilon}
|
||||||
|
//' to compute the matrix `B` such that \eqn{span{B} = span(V)^{\bot}}{%
|
||||||
|
//' span(B) = orth(span(B))}.
|
||||||
|
//'
|
||||||
|
//' @param X data points
|
||||||
|
//' @param Y response
|
||||||
|
//' @param k assumed \eqn{rank(B)}
|
||||||
|
//' @param nObs parameter for bandwidth estimation, typical value
|
||||||
|
//' \code{nObs = nrow(X)^lambda} with \code{lambda} in the range [0.3, 0.8].
|
||||||
|
//' @param tau Initial step size
|
||||||
|
//' @param tol Tolerance for update error used for stopping criterion
|
||||||
|
//' \eqn{|| V(j) V(j)' - V(j+1) V(j+1)' ||_2 < tol}{%
|
||||||
|
//' \| V_j V_j' - V_{j+1} V_{j+1}' \|_2 < tol}.
|
||||||
|
//' @param maxIter Upper bound of optimization iterations
|
||||||
|
//'
|
||||||
|
//' @return List containing the bandwidth \code{h}, optimization objective \code{V}
|
||||||
|
//' and the matrix \code{B} estimated for the model as a orthogonal basis of the
|
||||||
|
//' orthogonal space spaned by \code{V}.
|
||||||
|
//'
|
||||||
|
//' @rdname optStiefel
|
||||||
|
double optStiefel(
|
||||||
|
const arma::mat& X,
|
||||||
|
const arma::vec& Y,
|
||||||
|
const int k,
|
||||||
|
const double h,
|
||||||
|
const double tauInitial,
|
||||||
|
const double tol,
|
||||||
|
const double slack,
|
||||||
|
const int maxIter,
|
||||||
|
arma::mat& V, // out
|
||||||
|
arma::vec& history // out
|
||||||
|
) {
|
||||||
|
using namespace arma;
|
||||||
|
|
||||||
|
// get dimensions
|
||||||
|
const uword n = X.n_rows; // nr samples
|
||||||
|
const uword p = X.n_cols; // dim of random variable `X`
|
||||||
|
const uword q = p - k; // rank(V) e.g. dim of ortho space of span{B}
|
||||||
|
|
||||||
|
// all `X_i - X_j` differences, `X_diff.row(i * n + j) = X_i - X_j`
|
||||||
|
mat X_diff(n * n, p);
|
||||||
|
for (uword i = 0, k = 0; i < n; ++i) {
|
||||||
|
for (uword j = 0; j < n; ++j) {
|
||||||
|
X_diff.row(k++) = X.row(i) - X.row(j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const vec Y_rep = repmat(Y, n, 1);
|
||||||
|
const mat I_p = eye<mat>(p, p);
|
||||||
|
|
||||||
|
// initial start value for `V`
|
||||||
|
V = rStiefel(p, q);
|
||||||
|
|
||||||
|
// init optimization `loss`es, `error` and stepsize `tau`
|
||||||
|
// double loss_next = datum::inf;
|
||||||
|
double loss;
|
||||||
|
double error = datum::inf;
|
||||||
|
double tau = tauInitial;
|
||||||
|
int count;
|
||||||
|
// main optimization loop
|
||||||
|
for (count = 0; count < maxIter && error > tol; ++count) {
|
||||||
|
// calc gradient `G = grad_V(L)(V)`
|
||||||
|
mat G;
|
||||||
|
loss = gradient(X, X_diff, Y, Y_rep, V, h, &G);
|
||||||
|
// matrix `A` for colescy-transform of the gradient
|
||||||
|
mat A = tau * ((G * V.t()) - (V * G.t()));
|
||||||
|
// next iteration step of `V`
|
||||||
|
mat V_tau = inv(I_p + A) * (I_p - A) * V;
|
||||||
|
// loss after step `L(V(tau))`
|
||||||
|
double loss_tau = gradient(X, X_diff, Y, Y_rep, V_tau, h);
|
||||||
|
|
||||||
|
// store `loss` in `history` and increase `count`
|
||||||
|
history(count) = loss;
|
||||||
|
|
||||||
|
// validate if loss decreased
|
||||||
|
if ((loss_tau - loss) > slack * loss) {
|
||||||
|
// ignore step, retry with half the step size
|
||||||
|
tau = tau / 2.;
|
||||||
|
error = datum::inf;
|
||||||
|
} else {
|
||||||
|
// compute step error (break condition)
|
||||||
|
error = norm((V * V.t()) - (V_tau * V_tau.t()), 2) / (2 * q);
|
||||||
|
// shift for next iteration
|
||||||
|
V = V_tau;
|
||||||
|
loss = loss_tau;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// store final `loss`
|
||||||
|
history(count) = loss;
|
||||||
|
|
||||||
|
return loss;
|
||||||
|
}
|
||||||
|
|
||||||
|
//' Conditional Variance Estimation (CVE) method.
|
||||||
|
//'
|
||||||
|
//' This version uses a "simple" stiefel optimization schema.
|
||||||
|
//'
|
||||||
|
//' @param X data points
|
||||||
|
//' @param Y response
|
||||||
|
//' @param k assumed \eqn{rank(B)}
|
||||||
|
//' @param nObs parameter for bandwidth estimation, typical value
|
||||||
|
//' \code{nObs = nrow(X)^lambda} with \code{lambda} in the range [0.3, 0.8].
|
||||||
|
//' @param tau Initial step size (default 1)
|
||||||
|
//' @param tol Tolerance for update error used for stopping criterion (default 1e-5)
|
||||||
|
//' @param slack Ratio of small negative error allowed in loss optimization (default -1e-10)
|
||||||
|
//' @param maxIter Upper bound of optimization iterations (default 50)
|
||||||
|
//' @param attempts Number of tryes with new random optimization starting points (default 10)
|
||||||
|
//'
|
||||||
|
//' @return List containing the bandwidth \code{h}, optimization objective \code{V}
|
||||||
|
//' and the matrix \code{B} estimated for the model as a orthogonal basis of the
|
||||||
|
//' orthogonal space spaned by \code{V}.
|
||||||
|
//'
|
||||||
|
//' @rdname cve_cpp_V1
|
||||||
|
//' @export
|
||||||
|
// [[Rcpp::export]]
|
||||||
|
Rcpp::List cve_cpp(
|
||||||
|
const arma::mat& X,
|
||||||
|
const arma::vec& Y,
|
||||||
|
const int k,
|
||||||
|
const double nObs,
|
||||||
|
const double tauInitial = 1.,
|
||||||
|
const double tol = 1e-5,
|
||||||
|
const double slack = -1e-10,
|
||||||
|
const int maxIter = 50,
|
||||||
|
const int attempts = 10
|
||||||
|
) {
|
||||||
|
using namespace arma;
|
||||||
|
|
||||||
|
// tracker of current best results
|
||||||
|
mat V_best;
|
||||||
|
double loss_best = datum::inf;
|
||||||
|
// estimate bandwidth
|
||||||
|
double h = estimateBandwidth(X, k, nObs);
|
||||||
|
|
||||||
|
// loss history for each optimization attempt
|
||||||
|
// each column contaions the iteration history for the loss
|
||||||
|
mat history = mat(maxIter + 1, attempts);
|
||||||
|
|
||||||
|
// multiple stiefel optimization attempts
|
||||||
|
for (int i = 0; i < attempts; ++i) {
|
||||||
|
// declare output variables
|
||||||
|
mat V; // estimated `V` space
|
||||||
|
vec hist = vec(history.n_rows, fill::zeros); // optimization history
|
||||||
|
double loss = optStiefel(X, Y, k, h, tauInitial, tol, slack, maxIter, V, hist);
|
||||||
|
if (loss < loss_best) {
|
||||||
|
loss_best = loss;
|
||||||
|
V_best = V;
|
||||||
|
}
|
||||||
|
// write history to history collection
|
||||||
|
history.col(i) = hist;
|
||||||
|
}
|
||||||
|
|
||||||
|
// get `B` as kernal of `V.t()`
|
||||||
|
mat B = null(V_best.t());
|
||||||
|
|
||||||
|
return Rcpp::List::create(
|
||||||
|
Rcpp::Named("history") = history,
|
||||||
|
Rcpp::Named("loss") = loss_best,
|
||||||
|
Rcpp::Named("h") = h,
|
||||||
|
Rcpp::Named("V") = V_best,
|
||||||
|
Rcpp::Named("B") = B
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*** R
|
||||||
|
|
||||||
|
source("CVE/R/datasets.R")
|
||||||
|
ds <- dataset()
|
||||||
|
|
||||||
|
print(system.time(
|
||||||
|
cve.res <- cve_cpp(
|
||||||
|
X = ds$X,
|
||||||
|
Y = ds$Y,
|
||||||
|
k = ncol(ds$B),
|
||||||
|
nObs = sqrt(nrow(ds$X))
|
||||||
|
)
|
||||||
|
))
|
||||||
|
|
||||||
|
pdf('plots/cve_V1_history.pdf')
|
||||||
|
H <- cve.res$history
|
||||||
|
H_i <- H[H[, 1] > 0, 1]
|
||||||
|
plot(1:length(H_i), H_i,
|
||||||
|
main = "History cve_V1",
|
||||||
|
xlab = "Iterations i",
|
||||||
|
ylab = expression(loss == L[n](V^{(i)})),
|
||||||
|
xlim = c(1, nrow(H)),
|
||||||
|
ylim = c(0, max(H)),
|
||||||
|
type = "l"
|
||||||
|
)
|
||||||
|
for (i in 2:ncol(H)) {
|
||||||
|
H_i <- H[H[, i] > 0, i]
|
||||||
|
lines(1:length(H_i), H_i)
|
||||||
|
}
|
||||||
|
x.ends <- apply(H, 2, function(h) { length(h[h > 0]) })
|
||||||
|
y.ends <- apply(H, 2, function(h) { min(h[h > 0]) })
|
||||||
|
points(x.ends, y.ends)
|
||||||
|
|
||||||
|
*/
|
|
@ -0,0 +1,365 @@
|
||||||
|
// -*- mode: C++; c-indent-level: 4; c-basic-offset: 4; indent-tabs-mode: nil; -*-
|
||||||
|
//
|
||||||
|
// Usage:
|
||||||
|
// ~$ R -e "library(Rcpp); Rcpp::sourceCpp('cve_V2.cpp')"
|
||||||
|
//
|
||||||
|
|
||||||
|
// only `RcppArmadillo.h` which includes `Rcpp.h`
|
||||||
|
#include <RcppArmadillo.h>
|
||||||
|
|
||||||
|
// through the depends attribute `Rcpp` is tolled to create
|
||||||
|
// hooks for `RcppArmadillo` needed by the build process.
|
||||||
|
//
|
||||||
|
// [[Rcpp::depends(RcppArmadillo)]]
|
||||||
|
|
||||||
|
// required for `R::qchisq()` used in `estimateBandwidth()`
|
||||||
|
#include <Rmath.h>
|
||||||
|
|
||||||
|
//' Estimated bandwidth for CVE.
|
||||||
|
//'
|
||||||
|
//' Estimates a propper bandwidth \code{h} according
|
||||||
|
//' \deqn{h = \chi_{p-q}^{-1}\left(\frac{nObs - 1}{n-1}\right)}\frac{2 tr(\Sigma)}{p}}{%
|
||||||
|
//' h = qchisq( (nObs - 1)/(n - 1), p - q ) 2 tr(Sigma) / p}
|
||||||
|
//'
|
||||||
|
//' @param X data matrix of dimension (n x p) with n data points X_i of dimension
|
||||||
|
//' q. Therefor each row represents a datapoint of dimension p.
|
||||||
|
//' @param k Guess for rank(B).
|
||||||
|
//' @param nObs Ether numeric of a function. If specified as numeric value
|
||||||
|
//' its used in the computation of the bandwidth directly. If its a function
|
||||||
|
//' `nObs` is evaluated as \code{nObs(nrow(x))}. The default behaviou if not
|
||||||
|
//' supplied at all is to use \code{nObs <- nrow(x)^0.5}.
|
||||||
|
//'
|
||||||
|
//' @seealso [qchisq()]
|
||||||
|
//'
|
||||||
|
//' @export
|
||||||
|
// [[Rcpp::export]]
|
||||||
|
double estimateBandwidth(const arma::mat& X, arma::uword k, double nObs) {
|
||||||
|
using namespace arma;
|
||||||
|
|
||||||
|
uword n = X.n_rows; // nr samples
|
||||||
|
uword p = X.n_cols; // dimension of rand. var. `X`
|
||||||
|
|
||||||
|
// column mean
|
||||||
|
mat M = mean(X);
|
||||||
|
// center `X`
|
||||||
|
mat C = X.each_row() - M;
|
||||||
|
// trace of covariance matrix, `traceSigma = Tr(C' C)`
|
||||||
|
double traceSigma = accu(C % C);
|
||||||
|
// compute estimated bandwidth
|
||||||
|
double qchi2 = R::qchisq((nObs - 1.0) / (n - 1), static_cast<double>(k), 1, 0);
|
||||||
|
|
||||||
|
return 2.0 * qchi2 * traceSigma / (p * n);
|
||||||
|
}
|
||||||
|
|
||||||
|
//' Random element from Stiefel Manifold `S(p, q)`.
|
||||||
|
//'
|
||||||
|
//' Draws an element of \eqn{S(p, q)} which is the Stiefel Manifold.
|
||||||
|
//' This is done by taking the Q-component of the QR decomposition
|
||||||
|
//' from a `(p, q)` Matrix with independent standart normal entries.
|
||||||
|
//' As a semi-orthogonal Matrix the result `V` satisfies \eqn{V'V = I_q}.
|
||||||
|
//'
|
||||||
|
//' @param p Row dimension
|
||||||
|
//' @param q Column dimension
|
||||||
|
//'
|
||||||
|
//' @returns Matrix of dim `(p, q)`.
|
||||||
|
//'
|
||||||
|
//' @seealso <https://en.wikipedia.org/wiki/Stiefel_manifold>
|
||||||
|
//'
|
||||||
|
//' @export
|
||||||
|
// [[Rcpp::export]]
|
||||||
|
arma::mat rStiefel(arma::uword p, arma::uword q) {
|
||||||
|
arma::mat Q, R;
|
||||||
|
arma::qr_econ(Q, R, arma::randn<arma::mat>(p, q));
|
||||||
|
return Q;
|
||||||
|
}
|
||||||
|
|
||||||
|
double gradient(const arma::mat& X,
|
||||||
|
const arma::mat& X_diff,
|
||||||
|
const arma::mat& Y,
|
||||||
|
const arma::mat& Y_rep,
|
||||||
|
const arma::mat& V,
|
||||||
|
const double h,
|
||||||
|
arma::mat* G = 0
|
||||||
|
) {
|
||||||
|
using namespace arma;
|
||||||
|
|
||||||
|
uword n = X.n_rows;
|
||||||
|
uword p = X.n_cols;
|
||||||
|
|
||||||
|
// orthogonal projection matrix `Q = I - VV'` for dist computation
|
||||||
|
mat Q = -(V * V.t());
|
||||||
|
Q.diag() += 1;
|
||||||
|
// calc pairwise distances as `D` with `D(i, j) = d_i(V, X_j)`
|
||||||
|
vec D_vec = sum(square(X_diff * Q), 1);
|
||||||
|
mat D = reshape(D_vec, n, n);
|
||||||
|
// calc weights as `W` with `W(i, j) = w_i(V, X_j)`
|
||||||
|
mat W = exp(D / (-2.0 * h));
|
||||||
|
// column-wise normalization via 1-norm
|
||||||
|
W = normalise(W, 1);
|
||||||
|
vec W_vec = vectorise(W);
|
||||||
|
// weighted `Y` means (first and second order)
|
||||||
|
vec y1 = W.t() * Y;
|
||||||
|
vec y2 = W.t() * square(Y);
|
||||||
|
// loss for each `X_i`, meaning `L(i) = L_n(V, X_i)`
|
||||||
|
vec L = y2 - square(y1);
|
||||||
|
// `loss = L_n(V)`
|
||||||
|
double loss = mean(L);
|
||||||
|
// check if gradient as output variable is set
|
||||||
|
if (G != 0) {
|
||||||
|
// `G = grad(L_n(V))` a.k.a. gradient of `L` with respect to `V`
|
||||||
|
vec scale = (repelem(L, n, 1) - square(Y_rep - repelem(y1, n, 1))) % W_vec % D_vec;
|
||||||
|
mat X_diff_scale = X_diff.each_col() % scale;
|
||||||
|
(*G) = X_diff_scale.t() * X_diff * V;
|
||||||
|
(*G) *= -2.0 / (h * h * n);
|
||||||
|
}
|
||||||
|
|
||||||
|
return loss;
|
||||||
|
}
|
||||||
|
|
||||||
|
//' Stiefel Optimization with curvilinear linesearch.
|
||||||
|
//'
|
||||||
|
//' TODO: finish doc. comment
|
||||||
|
//' Stiefel Optimization for \code{V} given a dataset \code{X} and responces
|
||||||
|
//' \code{Y} for the model \deqn{Y\sim g(B'X) + \epsilon}{Y ~ g(B'X) + epsilon}
|
||||||
|
//' to compute the matrix `B` such that \eqn{span{B} = span(V)^{\bot}}{%
|
||||||
|
//' span(B) = orth(span(B))}.
|
||||||
|
//' The curvilinear linesearch uses Armijo-Wolfe conditions:
|
||||||
|
// \deqn{L(V(tau)) > L(V(0)) + rho_1 * tau * L(V(0))'}
|
||||||
|
//' \deqn{L(V(tau))' < rho_2 * L(V(0))'}
|
||||||
|
//'
|
||||||
|
//' @param X data points
|
||||||
|
//' @param Y response
|
||||||
|
//' @param k assumed \eqn{rank(B)}
|
||||||
|
//' @param nObs parameter for bandwidth estimation, typical value
|
||||||
|
//' \code{nObs = nrow(X)^lambda} with \code{lambda} in the range [0.3, 0.8].
|
||||||
|
//' @param tau Initial step size
|
||||||
|
//' @param tol Tolerance for update error used for stopping criterion
|
||||||
|
//' @param maxIter Upper bound of optimization iterations
|
||||||
|
//'
|
||||||
|
//' @return List containing the bandwidth \code{h}, optimization objective \code{V}
|
||||||
|
//' and the matrix \code{B} estimated for the model as a orthogonal basis of the
|
||||||
|
//' orthogonal space spaned by \code{V}.
|
||||||
|
//'
|
||||||
|
//' @rdname optStiefel
|
||||||
|
double optStiefel(
|
||||||
|
const arma::mat& X,
|
||||||
|
const arma::vec& Y,
|
||||||
|
const int k,
|
||||||
|
const double h,
|
||||||
|
const double tauInitial,
|
||||||
|
const double rho1,
|
||||||
|
const double rho2,
|
||||||
|
const double tol,
|
||||||
|
const int maxIter,
|
||||||
|
const int maxLineSeachIter,
|
||||||
|
arma::mat& V, // out
|
||||||
|
arma::vec& history // out
|
||||||
|
) {
|
||||||
|
using namespace arma;
|
||||||
|
|
||||||
|
// get dimensions
|
||||||
|
const uword n = X.n_rows; // nr samples
|
||||||
|
const uword p = X.n_cols; // dim of random variable `X`
|
||||||
|
const uword q = p - k; // rank(V) e.g. dim of ortho space of span{B}
|
||||||
|
|
||||||
|
// all `X_i - X_j` differences, `X_diff.row(i * n + j) = X_i - X_j`
|
||||||
|
mat X_diff(n * n, p);
|
||||||
|
for (uword i = 0, k = 0; i < n; ++i) {
|
||||||
|
for (uword j = 0; j < n; ++j) {
|
||||||
|
X_diff.row(k++) = X.row(i) - X.row(j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const vec Y_rep = repmat(Y, n, 1);
|
||||||
|
const mat I_p = eye<mat>(p, p);
|
||||||
|
const mat I_2q = eye<mat>(2 * q, 2 * q);
|
||||||
|
|
||||||
|
// initial start value for `V`
|
||||||
|
V = rStiefel(p, q);
|
||||||
|
|
||||||
|
// first gradient initialization
|
||||||
|
mat G;
|
||||||
|
double loss = gradient(X, X_diff, Y, Y_rep, V, h, &G);
|
||||||
|
|
||||||
|
// set first `loss` in history
|
||||||
|
history(0) = loss;
|
||||||
|
|
||||||
|
// main curvilinear optimization loop
|
||||||
|
double error = datum::inf;
|
||||||
|
int iter = 0;
|
||||||
|
while (iter++ < maxIter && error > tol) {
|
||||||
|
// helper matrices `lU` (linesearch U), `lV` (linesearch V)
|
||||||
|
// as describet in [Wen, Yin] Lemma 4.
|
||||||
|
mat lU = join_rows(G, V); // linesearch "U"
|
||||||
|
mat lV = join_rows(V, -1.0 * G); // linesearch "V"
|
||||||
|
// `A = G V' - V G'`
|
||||||
|
mat A = lU * lV.t();
|
||||||
|
|
||||||
|
// set initial step size for curvilinear line search
|
||||||
|
double tau = tauInitial, lower = 0., upper = datum::inf;
|
||||||
|
|
||||||
|
// check if `tau` is valid for inverting
|
||||||
|
|
||||||
|
// set line search internal gradient and loss to cycle for next iteration
|
||||||
|
mat V_tau; // next position after a step of size `tau`, a.k.a. `V(tau)`
|
||||||
|
mat G_tau; // gradient of `V` at `V(tau) = V_tau`
|
||||||
|
double loss_tau; // loss (objective) at position `V(tau)`
|
||||||
|
int lsIter = 0; // linesearch iter
|
||||||
|
// start line search
|
||||||
|
do {
|
||||||
|
mat HV = inv(I_2q + (tau/2.) * lV.t() * lU) * lV.t();
|
||||||
|
// next step `V`
|
||||||
|
V_tau = V - tau * (lU * (HV * V));
|
||||||
|
|
||||||
|
double LprimeV = -trace(G.t() * A * V);
|
||||||
|
|
||||||
|
mat lB = I_p - (tau / 2.) * lU * HV;
|
||||||
|
|
||||||
|
loss_tau = gradient(X, X_diff, Y, Y_rep, V_tau, h, &G_tau);
|
||||||
|
|
||||||
|
double LprimeV_tau = -2. * trace(G_tau.t() * lB * A * (V + V_tau));
|
||||||
|
|
||||||
|
// Armijo condition
|
||||||
|
if (loss_tau > loss + (rho1 * tau * LprimeV)) {
|
||||||
|
upper = tau;
|
||||||
|
tau = (lower + upper) / 2.;
|
||||||
|
// Wolfe condition
|
||||||
|
} else if (LprimeV_tau < rho2 * LprimeV) {
|
||||||
|
lower = tau;
|
||||||
|
if (upper == datum::inf) {
|
||||||
|
tau = 2. * lower;
|
||||||
|
} else {
|
||||||
|
tau = (lower + upper) / 2.;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} while (++lsIter < maxLineSeachIter);
|
||||||
|
|
||||||
|
// compute error (break condition)
|
||||||
|
// Note: `error` is the decrease of the objective `L_n(V)` and not the
|
||||||
|
// norm of the gradient as proposed in [Wen, Yin] Algorithm 1.
|
||||||
|
error = loss - loss_tau;
|
||||||
|
|
||||||
|
// cycle `V`, `G` and `loss` for next iteration
|
||||||
|
V = V_tau;
|
||||||
|
loss = loss_tau;
|
||||||
|
G = G_tau;
|
||||||
|
|
||||||
|
// store final `loss`
|
||||||
|
history(iter) = loss;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return loss;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//' Conditional Variance Estimation (CVE) method.
|
||||||
|
//'
|
||||||
|
//' This version uses a curvilinear linesearch for the stiefel optimization.
|
||||||
|
//'
|
||||||
|
//' @param X data points
|
||||||
|
//' @param Y response
|
||||||
|
//' @param k assumed \eqn{rank(B)}
|
||||||
|
//' @param nObs parameter for bandwidth estimation, typical value
|
||||||
|
//' \code{nObs = nrow(X)^lambda} with \code{lambda} in the range [0.3, 0.8].
|
||||||
|
//' @param tau Initial step size (default 1)
|
||||||
|
//' @param tol Tolerance for update error used for stopping criterion (default 1e-5)
|
||||||
|
//' @param slack Ratio of small negative error allowed in loss optimization (default -1e-10)
|
||||||
|
//' @param maxIter Upper bound of optimization iterations (default 50)
|
||||||
|
//' @param attempts Number of tryes with new random optimization starting points (default 10)
|
||||||
|
//'
|
||||||
|
//' @return List containing the bandwidth \code{h}, optimization objective \code{V}
|
||||||
|
//' and the matrix \code{B} estimated for the model as a orthogonal basis of the
|
||||||
|
//' orthogonal space spaned by \code{V}.
|
||||||
|
//'
|
||||||
|
//' @rdname cve_cpp_V2
|
||||||
|
//' @export
|
||||||
|
// [[Rcpp::export]]
|
||||||
|
Rcpp::List cve_cpp(
|
||||||
|
const arma::mat& X,
|
||||||
|
const arma::vec& Y,
|
||||||
|
const int k,
|
||||||
|
const double nObs,
|
||||||
|
const double tauInitial = 1.,
|
||||||
|
const double rho1 = 0.05,
|
||||||
|
const double rho2 = 0.95,
|
||||||
|
const double tol = 1e-6,
|
||||||
|
const int maxIter = 50,
|
||||||
|
const int maxLineSeachIter = 10,
|
||||||
|
const int attempts = 10
|
||||||
|
) {
|
||||||
|
using namespace arma;
|
||||||
|
|
||||||
|
// tracker of current best results
|
||||||
|
mat V_best;
|
||||||
|
double loss_best = datum::inf;
|
||||||
|
// estimate bandwidth
|
||||||
|
double h = estimateBandwidth(X, k, nObs);
|
||||||
|
|
||||||
|
// loss history for each optimization attempt
|
||||||
|
// each column contaions the iteration history for the loss
|
||||||
|
mat history = mat(maxIter + 1, attempts);
|
||||||
|
|
||||||
|
// multiple stiefel optimization attempts
|
||||||
|
for (int i = 0; i < attempts; ++i) {
|
||||||
|
// declare output variables
|
||||||
|
mat V; // estimated `V` space
|
||||||
|
vec hist = vec(history.n_rows, fill::zeros); // optimization history
|
||||||
|
double loss = optStiefel(X, Y, k, h,
|
||||||
|
tauInitial, rho1, rho2, tol, maxIter, maxLineSeachIter, V, hist
|
||||||
|
);
|
||||||
|
if (loss < loss_best) {
|
||||||
|
loss_best = loss;
|
||||||
|
V_best = V;
|
||||||
|
}
|
||||||
|
// write history to history collection
|
||||||
|
history.col(i) = hist;
|
||||||
|
}
|
||||||
|
|
||||||
|
// get `B` as kernal of `V.t()`
|
||||||
|
mat B = null(V_best.t());
|
||||||
|
|
||||||
|
return Rcpp::List::create(
|
||||||
|
Rcpp::Named("history") = history,
|
||||||
|
Rcpp::Named("loss") = loss_best,
|
||||||
|
Rcpp::Named("h") = h,
|
||||||
|
Rcpp::Named("V") = V_best,
|
||||||
|
Rcpp::Named("B") = B
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*** R
|
||||||
|
|
||||||
|
source("CVE/R/datasets.R")
|
||||||
|
ds <- dataset()
|
||||||
|
|
||||||
|
print(system.time(
|
||||||
|
cve.res <- cve_cpp(
|
||||||
|
X = ds$X,
|
||||||
|
Y = ds$Y,
|
||||||
|
k = ncol(ds$B),
|
||||||
|
nObs = sqrt(nrow(ds$X))
|
||||||
|
)
|
||||||
|
))
|
||||||
|
|
||||||
|
pdf('plots/cve_V2_history.pdf')
|
||||||
|
H <- cve.res$history
|
||||||
|
H_i <- H[H[, 1] > 0, 1]
|
||||||
|
plot(1:length(H_i), H_i,
|
||||||
|
main = "History cve_V2",
|
||||||
|
xlab = "Iterations i",
|
||||||
|
ylab = expression(loss == L[n](V^{(i)})),
|
||||||
|
xlim = c(1, nrow(H)),
|
||||||
|
ylim = c(0, max(H)),
|
||||||
|
type = "l"
|
||||||
|
)
|
||||||
|
for (i in 2:ncol(H)) {
|
||||||
|
H_i <- H[H[, i] > 0, i]
|
||||||
|
lines(1:length(H_i), H_i)
|
||||||
|
}
|
||||||
|
x.ends <- apply(H, 2, function(h) { length(h[h > 0]) })
|
||||||
|
y.ends <- apply(H, 2, function(h) { min(h[h > 0]) })
|
||||||
|
points(x.ends, y.ends)
|
||||||
|
|
||||||
|
*/
|
|
@ -0,0 +1,368 @@
|
||||||
|
//
|
||||||
|
// Usage (bash):
|
||||||
|
// ~$ R -e "library(Rcpp); sourceCpp('runtime_tests_grad.cpp')"
|
||||||
|
//
|
||||||
|
// Usage (R):
|
||||||
|
// > library(Rcpp)
|
||||||
|
// > sourceCpp('runtime_tests_grad.cpp')
|
||||||
|
//
|
||||||
|
|
||||||
|
// [[Rcpp::depends(RcppArmadillo)]]
|
||||||
|
#include <RcppArmadillo.h>
|
||||||
|
#include <math.h>
|
||||||
|
|
||||||
|
// [[Rcpp::export]]
|
||||||
|
arma::mat arma_grad(const arma::mat& X,
|
||||||
|
const arma::mat& X_diff,
|
||||||
|
const arma::vec& Y,
|
||||||
|
const arma::vec& Y_rep,
|
||||||
|
const arma::mat& V,
|
||||||
|
const double h) {
|
||||||
|
using namespace arma;
|
||||||
|
|
||||||
|
uword n = X.n_rows;
|
||||||
|
uword p = X.n_cols;
|
||||||
|
|
||||||
|
// orthogonal projection matrix `Q = I - VV'` for dist computation
|
||||||
|
mat Q = -(V * V.t());
|
||||||
|
Q.diag() += 1;
|
||||||
|
// calc pairwise distances as `D` with `D(i, j) = d_i(V, X_j)`
|
||||||
|
vec D_vec = sum(square(X_diff * Q), 1);
|
||||||
|
mat D = reshape(D_vec, n, n);
|
||||||
|
// calc weights as `W` with `W(i, j) = w_i(V, X_j)`
|
||||||
|
mat W = exp(D / (-2.0 * h));
|
||||||
|
W = normalise(W, 1); // colomn-wise, 1-norm
|
||||||
|
vec W_vec = vectorise(W);
|
||||||
|
// centered weighted `Y` means
|
||||||
|
vec y1 = W.t() * Y;
|
||||||
|
vec y2 = W.t() * square(Y);
|
||||||
|
// loss for each X_i, meaning `L(i) = L_n(V, X_i)`
|
||||||
|
vec L = y2 - square(y1);
|
||||||
|
// "global" loss
|
||||||
|
double loss = mean(L);
|
||||||
|
// `G = \nabla_V L_n(V)` a.k.a. gradient of `L` with respect to `V`
|
||||||
|
vec scale = (repelem(L, n, 1) - square(Y_rep - repelem(y1, n, 1))) % W_vec % D_vec;
|
||||||
|
mat X_diff_scale = X_diff.each_col() % scale;
|
||||||
|
mat G = X_diff_scale.t() * X_diff * V;
|
||||||
|
G *= -2.0 / (h * h * n);
|
||||||
|
|
||||||
|
return G;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ATTENTION: assumed `X` stores X_i's column wise, `X = cbind(X_1, X_2, ..., X_n)`
|
||||||
|
// [[Rcpp::export]]
|
||||||
|
arma::mat grad(const arma::mat& X,
|
||||||
|
const arma::vec& Y,
|
||||||
|
const arma::mat& V,
|
||||||
|
const double h
|
||||||
|
) {
|
||||||
|
using namespace arma;
|
||||||
|
|
||||||
|
// get dimensions
|
||||||
|
const uword n = X.n_cols;
|
||||||
|
const uword p = X.n_rows;
|
||||||
|
const uword q = V.n_cols;
|
||||||
|
|
||||||
|
// compute orthogonal projection
|
||||||
|
mat Q = -(V * V.t());
|
||||||
|
Q.diag() += 1.0;
|
||||||
|
|
||||||
|
// distance matrix `D(i, j) = d_i(V, X_j)`
|
||||||
|
mat D(n, n, fill::zeros);
|
||||||
|
// weights matrix `W(i, j) = w_i(V, X_j)`
|
||||||
|
mat W(n, n, fill::ones); // exp(0) = 1
|
||||||
|
|
||||||
|
double mvm = 0.0; // Matrix Vector Mult.
|
||||||
|
double sos = 0.0; // Sum Of Squares
|
||||||
|
// double wcn = 0.0; // Weights Column Norm
|
||||||
|
for (uword j = 0; j + 1 < n; ++j) {
|
||||||
|
for (uword i = j + 1; i < n; ++i) {
|
||||||
|
sos = 0.0;
|
||||||
|
for (uword k = 0; k < p; ++k) {
|
||||||
|
mvm = 0.0;
|
||||||
|
for (uword l = 0; l < p; ++l) {
|
||||||
|
mvm += Q(k, l) * (X(l, i) - X(l, j));
|
||||||
|
}
|
||||||
|
sos += mvm * mvm;
|
||||||
|
}
|
||||||
|
D(i, j) = D(j, i) = sos;
|
||||||
|
W(i, j) = W(j, i) = std::exp(sos / (-2. * h));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// column normalization of weights `W`
|
||||||
|
double col_sum;
|
||||||
|
for (uword j = 0; j < n; ++j) {
|
||||||
|
col_sum = 0.0;
|
||||||
|
for (uword i = 0; i < n; ++i) {
|
||||||
|
col_sum += W(i, j);
|
||||||
|
}
|
||||||
|
for (uword i = 0; i < n; ++i) {
|
||||||
|
W(i, j) /= col_sum;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// weighted first, secend order responce means `y1`, `y2`
|
||||||
|
// and component wise Loss `L(i) = L_n(V, X_i)`
|
||||||
|
vec y1(n);
|
||||||
|
vec y2(n);
|
||||||
|
vec L(n);
|
||||||
|
double tmp;
|
||||||
|
double loss = 0.0;
|
||||||
|
for (uword i = 0; i < n; ++i) {
|
||||||
|
mvm = 0.0; // Matrix Vector Mult.
|
||||||
|
sos = 0.0; // Sum Of Squared (weighted)
|
||||||
|
for (uword k = 0; k < n; ++k) {
|
||||||
|
mvm += (tmp = W(k, i) * Y(k));
|
||||||
|
sos += tmp * Y(k); // W(k, i) * Y(k) * Y(k)
|
||||||
|
}
|
||||||
|
y1(i) = mvm;
|
||||||
|
y2(i) = sos;
|
||||||
|
loss += (L(i) = sos - (mvm * mvm)); // L_n(V, X_i) = y2(i) - y1(i)^2
|
||||||
|
}
|
||||||
|
loss /= n;
|
||||||
|
|
||||||
|
mat S(n, n);
|
||||||
|
for (uword k = 0; k < n; ++k) {
|
||||||
|
for (uword l = 0; l < n; ++l) {
|
||||||
|
tmp = Y(k) - y1(l);
|
||||||
|
S(k, l) = (L(l) - (tmp * tmp)) * W(k, l) * D(k, l);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// gradient
|
||||||
|
mat G(p, q);
|
||||||
|
double factor = -2. / (n * h * h);
|
||||||
|
double gij;
|
||||||
|
for (uword j = 0; j < q; ++j) {
|
||||||
|
for (uword i = 0; i < p; ++i) {
|
||||||
|
gij = 0.0;
|
||||||
|
for (uword k = 0; k < n; ++k) {
|
||||||
|
for (uword l = 0; l < n; ++l) {
|
||||||
|
mvm = 0.0;
|
||||||
|
for (uword m = 0; m < p; ++m) {
|
||||||
|
mvm += (X(m, l) - X(m, k)) * V(m, j);
|
||||||
|
}
|
||||||
|
// gij += (S(k, l) + S(l, k)) * (X(i, l) - X(i, k));
|
||||||
|
gij += S(k, l) * (X(i, l) - X(i, k)) * mvm;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
G(i, j) = factor * gij;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return G;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ATTENTION: assumed `X` stores X_i's column wise, `X = cbind(X_1, X_2, ..., X_n)`
|
||||||
|
// [[Rcpp::export]]
|
||||||
|
arma::mat grad_p(const arma::mat& X_ref,
|
||||||
|
const arma::vec& Y_ref,
|
||||||
|
const arma::mat& V_ref,
|
||||||
|
const double h
|
||||||
|
) {
|
||||||
|
using arma::uword;
|
||||||
|
|
||||||
|
// get dimensions
|
||||||
|
const uword n = X_ref.n_cols;
|
||||||
|
const uword p = X_ref.n_rows;
|
||||||
|
const uword q = V_ref.n_cols;
|
||||||
|
|
||||||
|
const double* X = X_ref.memptr();
|
||||||
|
const double* Y = Y_ref.memptr();
|
||||||
|
const double* V = V_ref.memptr();
|
||||||
|
|
||||||
|
// allocate memory for entire algorithm
|
||||||
|
// Q (p,p) D+W+S (n,n) y1+L (n) G (p,q)
|
||||||
|
uword memsize = (p * p) + (3 * n * n) + (2 * n) + (p * q);
|
||||||
|
double* mem = static_cast<double*>(malloc(sizeof(double) * memsize));
|
||||||
|
|
||||||
|
// assign pointer to memory associated memory area
|
||||||
|
double* Q = mem;
|
||||||
|
double* D = Q + (p * p);
|
||||||
|
double* W = D + (n * n);
|
||||||
|
double* S = W + (n * n);
|
||||||
|
double* y1 = S + (n * n);
|
||||||
|
double* L = y1 + n;
|
||||||
|
double* G = L + n;
|
||||||
|
|
||||||
|
// compute orthogonal projection
|
||||||
|
double sum;
|
||||||
|
// compute orthogonal projection `Q = I_p - V V'`
|
||||||
|
for (uword j = 0; j < p; ++j) {
|
||||||
|
for (uword i = j; i < p; ++i) {
|
||||||
|
sum = 0.0;
|
||||||
|
for (uword k = 0; k < q; ++k) {
|
||||||
|
sum += V[k * p + i] * V[k * p + j];
|
||||||
|
}
|
||||||
|
if (i == j) {
|
||||||
|
Q[j * p + i] = 1.0 - sum;
|
||||||
|
} else {
|
||||||
|
Q[j * p + i] = Q[i * p + j] = -sum;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// set `diag(D) = 0` and `diag(W) = 1`.
|
||||||
|
for (uword i = 0; i < n * n; i += n + 1) {
|
||||||
|
D[i] = 0.0;
|
||||||
|
W[i] = 1.0;
|
||||||
|
}
|
||||||
|
// components (using symmetrie) of `D` and `W` (except `diag`)
|
||||||
|
double mvm = 0.0; // Matrix Vector Mult.
|
||||||
|
for (uword j = 0; j + 1 < n; ++j) {
|
||||||
|
for (uword i = j + 1; i < n; ++i) {
|
||||||
|
sum = 0.0;
|
||||||
|
for (uword k = 0; k < p; ++k) {
|
||||||
|
mvm = 0.0;
|
||||||
|
for (uword l = 0; l < p; ++l) {
|
||||||
|
mvm += Q[l * p + k] * (X[i * p + l] - X[j * p + l]);
|
||||||
|
}
|
||||||
|
sum += mvm * mvm;
|
||||||
|
}
|
||||||
|
D[j * n + i] = D[i * n + j] = sum;
|
||||||
|
W[j * n + i] = W[i * n + j] = std::exp(sum / (-2. * h));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// column normalization of weights `W`
|
||||||
|
for (uword j = 0; j < n; ++j) {
|
||||||
|
sum = 0.0;
|
||||||
|
for (uword i = 0; i < n; ++i) {
|
||||||
|
sum += W[j * n + i];
|
||||||
|
}
|
||||||
|
for (uword i = 0; i < n; ++i) {
|
||||||
|
W[j * n + i] /= sum;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// weighted first, secend order responce means `y1`, `y2`
|
||||||
|
// and component wise Loss `L(i) = L_n(V, X_i)`
|
||||||
|
double tmp;
|
||||||
|
double loss = 0.0;
|
||||||
|
for (uword i = 0; i < n; ++i) {
|
||||||
|
mvm = 0.0; // Matrix Vector Mult.
|
||||||
|
sum = 0.0; // Sum Of (weighted) Squares
|
||||||
|
for (uword k = 0; k < n; ++k) {
|
||||||
|
mvm += (tmp = W[i * n + k] * Y[k]);
|
||||||
|
sum += tmp * Y[k];
|
||||||
|
}
|
||||||
|
y1[i] = mvm;
|
||||||
|
loss += (L[i] = sum - (mvm * mvm));
|
||||||
|
}
|
||||||
|
loss /= n;
|
||||||
|
|
||||||
|
// scaling for gradient summation
|
||||||
|
for (uword k = 0; k < n; ++k) {
|
||||||
|
for (uword l = 0; l < n; ++l) {
|
||||||
|
tmp = Y[k] - y1[l];
|
||||||
|
S[l * n + k] = (L[l] - (tmp * tmp)) * W[l * n + k] * D[l * n + k];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// gradient
|
||||||
|
double factor = -2. / (n * h * h);
|
||||||
|
for (uword j = 0; j < q; ++j) {
|
||||||
|
for (uword i = 0; i < p; ++i) {
|
||||||
|
sum = 0.0;
|
||||||
|
for (uword k = 0; k < n; ++k) {
|
||||||
|
for (uword l = 0; l < n; ++l) {
|
||||||
|
mvm = 0.0;
|
||||||
|
for (uword m = 0; m < p; ++m) {
|
||||||
|
mvm += (X[l * p + m] - X[k * p + m]) * V[j * p + m];
|
||||||
|
}
|
||||||
|
sum += S[l * n + k] * (X[l * p + i] - X[k * p + i]) * mvm;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
G[j * p + i] = factor * sum;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// construct 'Armadillo' matrix from `G`s memory area
|
||||||
|
arma::mat Grad(G, p, q);
|
||||||
|
|
||||||
|
// free entire allocated memory block
|
||||||
|
free(mem);
|
||||||
|
|
||||||
|
return Grad;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*** R
|
||||||
|
|
||||||
|
suppressMessages(library(microbenchmark))
|
||||||
|
|
||||||
|
cat("Start timing:\n")
|
||||||
|
time.start <- Sys.time()
|
||||||
|
|
||||||
|
rStiefl <- function(p, q) {
|
||||||
|
return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q))))
|
||||||
|
}
|
||||||
|
|
||||||
|
## compare runtimes
|
||||||
|
n <- 200L
|
||||||
|
p <- 12L
|
||||||
|
q <- p - 2L
|
||||||
|
X <- matrix(rnorm(n * p), n, p)
|
||||||
|
Xt <- t(X)
|
||||||
|
X_diff <- kronecker(rep(1, n), X) - kronecker(X, rep(1, n))
|
||||||
|
Y <- rnorm(n)
|
||||||
|
Y_rep <- kronecker(rep(1, n), Y) # repmat(Y, n, 1)
|
||||||
|
h <- 1. / 4.;
|
||||||
|
V <- rStiefl(p, q)
|
||||||
|
|
||||||
|
# A <- arma_grad(X, X_diff, Y, Y_rep, V, h)
|
||||||
|
# G1 <- grad(Xt, Y, V, h)
|
||||||
|
# G2 <- grad_p(Xt, Y, V, h)
|
||||||
|
#
|
||||||
|
# print(round(A[1:6, 1:6], 3))
|
||||||
|
# print(round(G1[1:6, 1:6], 3))
|
||||||
|
# print(round(G2[1:6, 1:6], 3))
|
||||||
|
# print(round(abs(A - G1), 9))
|
||||||
|
# print(round(abs(A - G2), 9))
|
||||||
|
#
|
||||||
|
# q()
|
||||||
|
|
||||||
|
|
||||||
|
comp <- function (A, B, tol = sqrt(.Machine$double.eps)) {
|
||||||
|
max(abs(A - B)) < tol
|
||||||
|
}
|
||||||
|
comp.all <- function (res) {
|
||||||
|
if (length(res) < 2) {
|
||||||
|
return(TRUE)
|
||||||
|
}
|
||||||
|
res.one = res[[1]]
|
||||||
|
for (i in 2:length(res)) {
|
||||||
|
if (!comp(res.one, res[[i]])) {
|
||||||
|
return(FALSE)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return(TRUE)
|
||||||
|
}
|
||||||
|
counter <- 0
|
||||||
|
setup.tests <- function () {
|
||||||
|
if ((counter %% 3) == 0) {
|
||||||
|
X <<- matrix(rnorm(n * p), n, p)
|
||||||
|
Xt <<- t(X)
|
||||||
|
X_diff <<- kronecker(rep(1, n), X) - kronecker(X, rep(1, n))
|
||||||
|
Y <<- rnorm(n)
|
||||||
|
Y_rep <<- kronecker(rep(1, n), Y) # arma::repmat(Y, n, 1)
|
||||||
|
h <<- 1. / 4.;
|
||||||
|
V <<- rStiefl(p, q)
|
||||||
|
}
|
||||||
|
counter <<- counter + 1
|
||||||
|
}
|
||||||
|
(mbm <- microbenchmark(
|
||||||
|
arma = arma_grad(X, X_diff, Y, Y_rep, V, h),
|
||||||
|
grad = grad(Xt, Y, V, h),
|
||||||
|
grad_p = grad_p(Xt, Y, V, h),
|
||||||
|
check = comp.all,
|
||||||
|
setup = setup.tests(),
|
||||||
|
times = 100L
|
||||||
|
))
|
||||||
|
|
||||||
|
cat("Total time:", format(Sys.time() - time.start), '\n')
|
||||||
|
|
||||||
|
boxplot(mbm, las = 2, xlab = NULL)
|
||||||
|
|
||||||
|
*/
|
|
@ -1,14 +0,0 @@
|
||||||
Package: samplePackage
|
|
||||||
Title: A Sample for creating R Packages
|
|
||||||
Version: 0.0.0.0001
|
|
||||||
Authors@R:
|
|
||||||
person(given = "First",
|
|
||||||
family = "Last",
|
|
||||||
role = c("aut", "cre"),
|
|
||||||
email = "first.last@example.com",
|
|
||||||
comment = structure("YOUR-ORCID-ID", .Names = "ORCID"))
|
|
||||||
Description: What the package does (one paragraph).
|
|
||||||
License: What license it uses
|
|
||||||
Encoding: UTF-8
|
|
||||||
LazyData: true
|
|
||||||
RoxygenNote: 6.1.1
|
|
|
@ -1,5 +0,0 @@
|
||||||
# Generated by roxygen2: do not edit by hand
|
|
||||||
|
|
||||||
export(area.circle)
|
|
||||||
exportClasses(circleS4)
|
|
||||||
exportClasses(rectangleS4)
|
|
|
@ -1,40 +0,0 @@
|
||||||
|
|
||||||
# Constructors
|
|
||||||
circle <- function(r) structure(list(r=r), class="circle")
|
|
||||||
rectangle <- function(a, b) {
|
|
||||||
# equivalent to > structure(list(a=a, b=b), class="rectangle")
|
|
||||||
x <- list(a=a, b=b)
|
|
||||||
class(x) <- "rectangle"
|
|
||||||
x # return
|
|
||||||
}
|
|
||||||
|
|
||||||
# generics (custom)
|
|
||||||
area <- function(shape) UseMethod("area")
|
|
||||||
diam <- function(shape) UseMethod("diam")
|
|
||||||
|
|
||||||
# methods (implementation)
|
|
||||||
print.circle <- function(circle, ...) with(circle, cat("< circle r =", r, ">\n"))
|
|
||||||
#' Computes area of a circle object
|
|
||||||
#'
|
|
||||||
#' @param circle Instance of a circle.
|
|
||||||
#'
|
|
||||||
#' @returns Area of the given \code{circle}.
|
|
||||||
#' @export
|
|
||||||
area.circle <- function(circle) with(circle, pi * r^2)
|
|
||||||
diam.circle <- function(circle) with(circle, 2 * r)
|
|
||||||
|
|
||||||
print.rectangle <- function(rect, ...) {
|
|
||||||
cat("<rectangle a =", rect$a, ", b =", rect$b, ">\n", set="")
|
|
||||||
}
|
|
||||||
area.rectangle <- function(rect) rect$a * rect$b
|
|
||||||
diam.rectangle <- function(rect) sqrt(rect$a^2 + rect$b^2)
|
|
||||||
|
|
||||||
# usage
|
|
||||||
circ <- circle(2)
|
|
||||||
rect <- rectangle(a = 1, b = 2)
|
|
||||||
|
|
||||||
print(area(circ))
|
|
||||||
print(diam(rect))
|
|
||||||
|
|
||||||
print(circ)
|
|
||||||
print(rect)
|
|
|
@ -1,58 +0,0 @@
|
||||||
library(methods)
|
|
||||||
|
|
||||||
## Class definitions
|
|
||||||
|
|
||||||
#' Represents a circle shape
|
|
||||||
#'
|
|
||||||
#' @param r radius of the circle
|
|
||||||
#'
|
|
||||||
#' @returns S4 object
|
|
||||||
#' @export
|
|
||||||
circleS4 <- setClass("circleS4", slots = list(r = "numeric"))
|
|
||||||
|
|
||||||
#' Represents a rectangle shape
|
|
||||||
#'
|
|
||||||
#' @param w width of the rectangle
|
|
||||||
#' @param h height of the rectangle
|
|
||||||
#'
|
|
||||||
#' @returns S4 object
|
|
||||||
#' @export
|
|
||||||
rectangleS4 <- setClass("rectangleS4", slots = list(w = "numeric", h = "numeric"))
|
|
||||||
|
|
||||||
|
|
||||||
## setting class methods
|
|
||||||
# redefine generic methods
|
|
||||||
setMethod("show", "circleS4", function(object) {
|
|
||||||
cat("< circle r =", object@r, ">\n")
|
|
||||||
})
|
|
||||||
setMethod("show", signature="rectangleS4", function(object) {
|
|
||||||
cat("<rectangle w =", rect@w, ", h =", rect@h, ">\n", set="")
|
|
||||||
})
|
|
||||||
|
|
||||||
## define new generics for class assignement
|
|
||||||
# create a method to assign the value of a coordinate
|
|
||||||
setGeneric("area", def = function(object) standardGeneric("area") )
|
|
||||||
setGeneric(name = "diam", def = function(object) {
|
|
||||||
standardGeneric("diam")
|
|
||||||
})
|
|
||||||
|
|
||||||
## assigne (custom) generics implementation to classes
|
|
||||||
setMethod("area", "circleS4", function(object) pi * object@r^2)
|
|
||||||
setMethod("diam", "circleS4", function(object) 2 * object@r)
|
|
||||||
|
|
||||||
setMethod("area", signature=list(object = "rectangleS4"), function(object) {
|
|
||||||
object@w * object@h
|
|
||||||
})
|
|
||||||
setMethod("diam", signature=list(object = "rectangleS4"), function(object) {
|
|
||||||
sqrt(rect@w^2 + rect@h^2)
|
|
||||||
})
|
|
||||||
|
|
||||||
# usage
|
|
||||||
circ <- circleS4(r = 2)
|
|
||||||
rect <- rectangleS4(w = 1, h = 2)
|
|
||||||
|
|
||||||
print(area(circ))
|
|
||||||
print(diam(rect))
|
|
||||||
|
|
||||||
print(circ)
|
|
||||||
print(rect)
|
|
|
@ -0,0 +1,104 @@
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# ~$ Rscript validate.R
|
||||||
|
|
||||||
|
# load MAVE package for comparison
|
||||||
|
library(MAVE)
|
||||||
|
# load (and compile) cve and dataset source
|
||||||
|
library(Rcpp)
|
||||||
|
cat("Compiling source 'cve_V1.cpp'\n")
|
||||||
|
Rcpp::sourceCpp('cve_V1.cpp', embeddedR = FALSE)
|
||||||
|
# load dataset sampler
|
||||||
|
source('CVE/R/datasets.R')
|
||||||
|
|
||||||
|
# set default nr of simulations
|
||||||
|
nr.sim <- 25
|
||||||
|
|
||||||
|
#' Orthogonal projection to sub-space spanned by `B`
|
||||||
|
#'
|
||||||
|
#' @param B Matrix
|
||||||
|
#' @return Orthogonal Projection Matrix
|
||||||
|
proj <- function(B) {
|
||||||
|
B %*% solve(t(B) %*% B) %*% t(B)
|
||||||
|
}
|
||||||
|
|
||||||
|
#' Compute nObs given dataset dimension \code{n}.
|
||||||
|
#'
|
||||||
|
#' @param n Number of samples
|
||||||
|
#' @return Numeric estimate of \code{nObs}
|
||||||
|
nObs <- function (n) { n^0.5 }
|
||||||
|
|
||||||
|
# dataset names
|
||||||
|
dataset.names <- c("M1", "M2", "M3", "M4", "M5") # M4 not implemented jet
|
||||||
|
|
||||||
|
## prepare "logging"
|
||||||
|
# result error, time, ... data.frame's
|
||||||
|
error <- matrix(nrow = nr.sim, ncol = 2 * length(dataset.names))
|
||||||
|
time <- matrix(nrow = nr.sim, ncol = 2 * length(dataset.names))
|
||||||
|
# convert to data.frames
|
||||||
|
error <- as.data.frame(error)
|
||||||
|
time <- as.data.frame(time)
|
||||||
|
# set names
|
||||||
|
names(error) <- kronecker(c("CVE.", "MAVE."), dataset.names, paste0)
|
||||||
|
names(time) <- kronecker(c("CVE.", "MAVE."), dataset.names, paste0)
|
||||||
|
|
||||||
|
# get current time
|
||||||
|
start.time <- Sys.time()
|
||||||
|
## main comparison loop (iterate `nr.sim` times for each dataset)
|
||||||
|
for (i in seq_along(dataset.names)) {
|
||||||
|
for (j in 1:nr.sim) {
|
||||||
|
name <- dataset.names[i]
|
||||||
|
# reporting progress
|
||||||
|
cat("\rRunning Test (", name, j , "):",
|
||||||
|
(i - 1) * nr.sim + j, "/", length(dataset.names) * nr.sim,
|
||||||
|
" - Time since start:", format(Sys.time() - start.time), "\033[K")
|
||||||
|
# create new dataset
|
||||||
|
ds <- dataset(name)
|
||||||
|
k <- ncol(ds$B) # real dim
|
||||||
|
# call CVE
|
||||||
|
cve.time <- system.time(
|
||||||
|
cve.res <- cve_cpp(ds$X, ds$Y,
|
||||||
|
k = k,
|
||||||
|
nObs = nObs(nrow(ds$X)),
|
||||||
|
verbose = FALSE)
|
||||||
|
)
|
||||||
|
# call MAVE
|
||||||
|
mave.time <- system.time(
|
||||||
|
mave.res <- mave(Y ~ .,
|
||||||
|
data = data.frame(X = ds$X, Y = ds$Y),
|
||||||
|
method = "meanMAVE")
|
||||||
|
)
|
||||||
|
# compute real and approximated sub-space projections
|
||||||
|
P <- proj(ds$B) # real
|
||||||
|
P.cve <- proj(cve.res$B)
|
||||||
|
P.mave <- proj(mave.res$dir[[k]])
|
||||||
|
# compute (and store) errors
|
||||||
|
error[j, paste0("CVE.", name)] <- norm(P - P.cve, 'F') / sqrt(2 * k)
|
||||||
|
error[j, paste0("MAVE.", name)] <- norm(P - P.mave, 'F') / sqrt(2 * k)
|
||||||
|
# store run-times
|
||||||
|
time[j, paste0("CVE.", name)] <- cve.time["elapsed"]
|
||||||
|
time[j, paste0("MAVE.", name)] <- mave.time["elapsed"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("\n\n## Time [sec] Means:\n")
|
||||||
|
print(colMeans(time))
|
||||||
|
cat("\n## Error Means:\n")
|
||||||
|
print(colMeans(error))
|
||||||
|
|
||||||
|
len <- length(dataset.names)
|
||||||
|
pdf("plots/Rplots_validate.pdf")
|
||||||
|
boxplot(as.matrix(error),
|
||||||
|
main = paste0("Error (nr.sim = ", nr.sim, ")"),
|
||||||
|
ylab = expression(error == group("||", P[B] - P[hat(B)], "||")[F] / sqrt(2*k)),
|
||||||
|
las = 2,
|
||||||
|
at = c(1:len, 1:len + len + 1)
|
||||||
|
)
|
||||||
|
boxplot(as.matrix(time),
|
||||||
|
main = paste0("Time (nr.sim = ", nr.sim, ")"),
|
||||||
|
ylab = "time [sec]",
|
||||||
|
las = 2,
|
||||||
|
at = c(1:len, 1:len + len + 1)
|
||||||
|
)
|
||||||
|
cat("Plot saved to 'plots/Rplots_validate.pdf'\n")
|
||||||
|
suppressMessages(dev.off())
|
Loading…
Reference in New Issue