2
0
Fork 0

cleanup: removed old and legacy code

This commit is contained in:
Daniel Kapla 2019-12-16 17:40:30 +01:00
parent 9edefe994d
commit d5cd6075a9
13 changed files with 0 additions and 2036 deletions

View File

@ -1,66 +0,0 @@
#initialize model parameterss
m<-100 #number of replications in simulation
dim<-12 #dimension of random variable X
truedim<-2 #dimension of B=b
qs<-dim-truedim # dimension of orthogonal complement of B
b1=c(1,1,1,1,1,1,0,0,0,0,0,0)/sqrt(6)
b2=c(1,-1,1,-1,1,-1,0,0,0,0,0,0)/sqrt(6)
b<-cbind(b1,b2)
P<-b%*%t(b)
sigma=0.5 #error standard deviation
N<-200 #sample size
K<-30 #number of arbitrary starting values for curvilinear optimization
MAXIT<-50 #maximal number of iterations in curvilinear search algorithm
##initailaize true covariancematrix of X
Sig<-mat.or.vec(dim,dim)
for (i in 1:dim){
for (j in 1:dim){
Sig[i,j]<-sigma^abs(i-j)
}
}
Sroot<-chol(Sig)
M1_JASA<-mat.or.vec(m,8)
colnames(M1_JASA)<-c('CVE1','CVE2','CVE3','meanMAVE','csMAVE','phd','sir','save')
#link function for M1
fM1<-function(x){return(x[1]/(0.5+(x[2]+1.5)^2))}
for (i in 1:m){
#generate dat according to M1
dat<-creat_sample_nor_nonstand(b,N,fM1,t(Sroot),sigma)
#est sample covariance matrix
Sig_est<-est_varmat(dat[,-1])
#est trace of sample covariance matrix
tr<-var_tr(Sig_est)
#calculates Vhat_k for CVE1,CVE2, CVE3 for k=qs
CVE1<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.8),tr=tr),maxit = MAXIT,sclack_para = 0)
CVE2<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(2/3),tr=tr),maxit = MAXIT,sclack_para = 0)
CVE3<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.5),tr=tr),maxit = MAXIT,sclack_para = 0)
#calculate orthogonal complement of Vhat_k
#i.e. CVE1$est_base[,1:truedim] is estimator for B with dimension (dim times (dim-qs))
CVE1$est_base<-fill_base(CVE1$est_base)
CVE2$est_base<-fill_base(CVE2$est_base)
CVE3$est_base<-fill_base(CVE3$est_base)
# calculate distance between true B and estimated B
M1_JASA[i,1]<-subspace_dist(CVE1$est_base[,1:truedim],b)
M1_JASA[i,2]<-subspace_dist(CVE2$est_base[,1:truedim],b)
M1_JASA[i,3]<-subspace_dist(CVE3$est_base[,1:truedim],b)
#meanMAVE
mod_t2<-mave(Y~.,data=as.data.frame(dat),method = 'meanMAVE')
M1_JASA[i,4]<-subspace_dist(mod_t2$dir[[truedim]],b)
#csMAVE
mod_t<-mave(Y~.,data=as.data.frame(dat),method = 'csMAVE')
M1_JASA[i,5]<-subspace_dist(mod_t$dir[[truedim]],b)
#phd
test4<-summary(dr(Y~.,data=as.data.frame(dat),method='phdy',numdir=truedim+1))
M1_JASA[i,6]<-subspace_dist(orth(test4$evectors[,1:truedim]),b)
#sir
test5<-summary(dr(Y~.,data=as.data.frame(dat),method='sir',numdir=truedim+1))
M1_JASA[i,7]<-subspace_dist(orth(test5$evectors[,1:truedim]),b)
#save
test3<-summary(dr(Y~.,data=as.data.frame(dat),method='save',numdir=truedim+1))
M1_JASA[i,8]<-subspace_dist(orth(test3$evectors[,1:truedim]),b)
print(paste(i,paste('/',m)))
}
boxplot(M1_JASA[,]/sqrt(2*truedim),names=colnames(M1_JASA),ylab='err',main='M1')
summary(M1_JASA[,])

View File

@ -1,60 +0,0 @@
#initialize model parameterss
m<-100 #number of replications in simulation
dim<-12 #dimension of random variable X
truedim<-2 #dimension of B=b
qs<-dim-truedim # dimension of orthogonal complement of B
b1=c(1,rep(0,dim-1))
b2=c(0,1,rep(0,dim-2))
b<-cbind(b1,b2) #B
P<-b%*%t(b)
sigma=0.5 #error standard deviation
N<-200 #sample size
K<-30 #number of arbitrary starting values for curvilinear optimization
MAXIT<-50 #maximal number of iterations in curvilinear search algorithm
M2_JASA<-mat.or.vec(m,8)
colnames(M2_JASA)<-c('CVE1','CVE2','CVE3','meanMAVE','csMAVE','phd','sir','save')
f_link<-function(x){return(x[1]*(x[2])^2)}
for (i in 1:m){
#generate dat according to M2
dat<-creat_sample(b,N,f_link,sigma)
#est sample covariance matrix
Sig_est<-est_varmat(dat[,-1])
#est trace of sample covariance matrix
tr<-var_tr(Sig_est)
#CVE
#calculates Vhat_k for CVE1,CVE2, CVE3 for k=qs
CVE1<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.8),tr=tr),maxit = MAXIT,sclack_para = 0)
CVE2<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(2/3),tr=tr),maxit = MAXIT,sclack_para = 0)
CVE3<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.5),tr=tr),maxit = MAXIT,sclack_para = 0)
#calculate orthogonal complement of Vhat_k
#i.e. CVE1$est_base[,1:truedim] is estimator for B with dimension (dim times (dim-qs))
CVE1$est_base<-fill_base(CVE1$est_base)
CVE2$est_base<-fill_base(CVE2$est_base)
CVE3$est_base<-fill_base(CVE3$est_base)
# calculate distance between true B and estimated B
M2_JASA[i,1]<-subspace_dist(CVE1$est_base[,1:truedim],b)
M2_JASA[i,2]<-subspace_dist(CVE2$est_base[,1:truedim],b)
M2_JASA[i,3]<-subspace_dist(CVE3$est_base[,1:truedim],b)
#meanMAVE
mod_t2<-mave(Y~.,data=as.data.frame(dat),method = 'meanMAVE')
M2_JASA[i,4]<-subspace_dist(mod_t2$dir[[truedim]],b)
#csMAVE
mod_t<-mave(Y~.,data=as.data.frame(dat),method = 'csMAVE')
M2_JASA[i,5]<-subspace_dist(mod_t$dir[[truedim]],b)
#phd
test4<-summary(dr(Y~.,data=as.data.frame(dat),method='phdy',numdir=truedim+1))
M2_JASA[i,6]<-subspace_dist(orth(test4$evectors[,1:truedim]),b)
#sir
test5<-summary(dr(Y~.,data=as.data.frame(dat),method='sir',numdir=truedim+1))
M2_JASA[i,7]<-subspace_dist(orth(test5$evectors[,1:truedim]),b)
#save
test3<-summary(dr(Y~.,data=as.data.frame(dat),method='save',numdir=truedim+1))
M2_JASA[i,8]<-subspace_dist(orth(test3$evectors[,1:truedim]),b)
print(paste(i,paste('/',m)))
}
boxplot(M2_JASA[,]/sqrt(2*truedim),names=colnames(M2_JASA),ylab='err',main='M2')
summary(M2_JASA[,])

View File

@ -1,65 +0,0 @@
#initialize model parameterss
m<-100 #number of replications in simulation
dim<-12 #dimension of random variable X
truedim<-1#dimension of B=b
qs<-dim-truedim# dimension of orthogonal complement of B
b1=c(1,1,1,1,1,1,0,0,0,0,0,0)/sqrt(6)
b<-b1#B
P<-b%*%t(b)
sigma=0.5#error standard deviation
N<-100#sample size
K<-30#number of arbitrary starting values for curvilinear optimization
MAXIT<-30#maximal number of iterations in curvilinear search algorithm
##initailaize true covariancematrix of X
Sig<-mat.or.vec(dim,dim)
for (i in 1:dim){
for (j in 1:dim){
Sig[i,j]<-sigma^abs(i-j)
}
}
Sroot<-chol(Sig)
M3_JASA<-mat.or.vec(m,8)
colnames(M3_JASA)<-c('CVE1','CVE2','CVE3','meanMAVE','csMAVE','phd','sir','save')
for (i in 1:m){
#generate dat according to M3
dat<-creat_sample_nor_nonstand(b,N,cos,t(Sroot),sigma) # distribution
#est sample covariance matrix
Sig_est<-est_varmat(dat[,-1])
#est trace of sample covariance matrix
tr<-var_tr(Sig_est)
#CVE
#calculates Vhat_k for CVE1,CVE2, CVE3 for k=qs
CVE1<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.8),tr=tr),maxit = MAXIT,sclack_para = 0)
CVE2<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(2/3),tr=tr),maxit = MAXIT,sclack_para = 0)
CVE3<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.5),tr=tr),maxit = MAXIT,sclack_para = 0)
#calculate orthogonal complement of Vhat_k
#i.e. CVE1$est_base[,1:truedim] is estimator for B with dimension (dim times (dim-qs))
CVE1$est_base<-fill_base(CVE1$est_base)
CVE2$est_base<-fill_base(CVE2$est_base)
CVE3$est_base<-fill_base(CVE3$est_base)
# calculate distance between true B and estimated B
M3_JASA[i,1]<-subspace_dist(CVE1$est_base[,1:truedim],b)
M3_JASA[i,2]<-subspace_dist(CVE2$est_base[,1:truedim],b)
M3_JASA[i,3]<-subspace_dist(CVE3$est_base[,1:truedim],b)
#meanMAVE
mod_t2<-mave(Y~.,data=as.data.frame(dat),method = 'meanMAVE')
M3_JASA[i,4]<-subspace_dist(mod_t2$dir[[truedim]],b)
#csMAVE
mod_t<-mave(Y~.,data=as.data.frame(dat),method = 'csMAVE')
M3_JASA[i,5]<-subspace_dist(mod_t$dir[[truedim]],b)
#phd
test4<-summary(dr(Y~.,data=as.data.frame(dat),method='phdy',numdir=truedim+1))
M3_JASA[i,6]<-subspace_dist(orth(test4$evectors[,1:truedim]),b)
#sir
test5<-summary(dr(Y~.,data=as.data.frame(dat),method='sir',numdir=truedim+1))
M3_JASA[i,7]<-subspace_dist(orth(test5$evectors[,1:truedim]),b)
#save
test3<-summary(dr(Y~.,data=as.data.frame(dat),method='save',numdir=truedim+1))
M3_JASA[i,8]<-subspace_dist(orth(test3$evectors[,1:truedim]),b)
print(paste(i,paste('/',m)))
}
boxplot(M3_JASA[,]/sqrt(2*truedim),names=colnames(M3_JASA),ylab='err',main='M3')
summary(M3_JASA[,])

View File

@ -1,76 +0,0 @@
#initialize model parameters
m<-100 #number of replications in simulation
dim<-12 #dimension of random variable X
truedim<-1#dimension of B=b
qs<-dim-truedim# dimension of orthogonal complement of B
b1=c(1,1,1,1,1,1,0,0,0,0,0,0)/sqrt(6)
b<-b1#B
P<-b%*%t(b)
sigma=0.5#error standard deviation
N<-100#sample size
K<-30#number of arbitrary starting values for curvilinear optimization
MAXIT<-30#maximal number of iterations in curvilinear search algorithm
para<-seq(0,1.5,0.5)#model parameter corresponding to lambda in M4
mix_prob<-seq(0.3,0.5,0.1) #model parameters corresponding to p_{mix}
M4_JASA<-array(data=NA,dim=c(length(para),length(mix_prob),m,5))
for(o in 1:length(mix_prob)){
for (u in 1:length(para)){
colnames(M4_JASA[u,o,,])<-c('CVE1','CVE2','CVE3','meanMAVE','csMAVE')
for (i in 1:m){
#generate dat according to M4 with p_{mix}=mix_prob[o] and lambda=para[u]
if(u==1){dat<-creat_sample(b,N,cos,sigma)}
else{dat<-creat_sample_noneliptic_gausmixture(b,N,cos,sigma,mix_prob[o],dispers_para = para[u])}
#est sample covariance matrix
Sig_est<-est_varmat(dat[,-1])
#est trace of sample covariance matrix
tr<-var_tr(Sig_est)
#CVE
#calculates Vhat_k for CVE1,CVE2, CVE3 for k=qs
CVE1<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.8),tr=tr),maxit = MAXIT,sclack_para = 0)
CVE2<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(2/3),tr=tr),maxit = MAXIT,sclack_para = 0)
CVE3<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.5),tr=tr),maxit = MAXIT,sclack_para = 0)
#calculate orthogonal complement of Vhat_k
#i.e. CVE1$est_base[,1:truedim] is estimator for B with dimension (dim times (dim-qs))
CVE1$est_base<-fill_base(CVE1$est_base)
CVE2$est_base<-fill_base(CVE2$est_base)
CVE3$est_base<-fill_base(CVE3$est_base)
# calculate distance between true B and estimated B
M4_JASA[u,o,i,1]<-subspace_dist(b,CVE1$est_base[,1:truedim])
M4_JASA[u,o,i,2]<-subspace_dist(b,CVE2$est_base[,1:truedim])
M4_JASA[u,o,i,3]<-subspace_dist(b,CVE3$est_base[,1:truedim])
#csMAVE
mod_t<-mave(Y~.,data=as.data.frame(dat),method = 'meanMAVE')
M4_JASA[u,o,i,4]<-subspace_dist(b,mod_t$dir[[truedim]])
#meanMAVE
mod_t2<-mave(Y~.,data=as.data.frame(dat),method = 'csMAVE')
M4_JASA[u,o,i,5]<-subspace_dist(b,mod_t2$dir[[truedim]])
}
print(paste(u,paste('/',length(para))))
}
}
par(mfrow=c(3,4))
boxplot(Exp_cook_square[1,1,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','meanMAVE','csMAVE'),main=paste(paste('dispersion =',para[1]),paste('mixprob =',mix_prob[1]),sep=', '),ylim=c(0,1))
boxplot(Exp_cook_square[2,1,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[2]),paste('mixprob =',mix_prob[1]),sep=', '),ylim=c(0,1))
boxplot(Exp_cook_square[3,1,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[3]),paste('mixprob =',mix_prob[1]),sep=', '),ylim=c(0,1))
boxplot(Exp_cook_square[4,1,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[4]),paste('mixprob =',mix_prob[1]),sep=', '),ylim=c(0,1))
boxplot(Exp_cook_square[1,2,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[1]),paste('mixprob =',mix_prob[2]),sep=', '),ylim=c(0,1))
boxplot(Exp_cook_square[2,2,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[2]),paste('mixprob =',mix_prob[2]),sep=', '),ylim=c(0,1))
boxplot(Exp_cook_square[3,2,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[3]),paste('mixprob =',mix_prob[2]),sep=', '),ylim=c(0,1))
boxplot(Exp_cook_square[4,2,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[4]),paste('mixprob =',mix_prob[2]),sep=', '),ylim=c(0,1))
boxplot(Exp_cook_square[1,3,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[1]),paste('mixprob =',mix_prob[3]),sep=', '),ylim=c(0,1))
boxplot(Exp_cook_square[2,3,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[2]),paste('mixprob =',mix_prob[3]),sep=', '),ylim=c(0,1))
boxplot(Exp_cook_square[3,3,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[3]),paste('mixprob =',mix_prob[3]),sep=', '),ylim=c(0,1))
boxplot(Exp_cook_square[4,3,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[4]),paste('mixprob =',mix_prob[3]),sep=', '),ylim=c(0,1))
#boxplot(Exp_cook_square[5,3,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[5]),paste('mixprob =',mix_prob[3]),sep=', '),ylim=c(0,1))
boxplot(Exp_cook_square[1,4,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[1]),paste('mixprob =',mix_prob[4]),sep=', '),ylim=c(0,1))
boxplot(Exp_cook_square[2,4,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[2]),paste('mixprob =',mix_prob[4]),sep=', '),ylim=c(0,1))
boxplot(Exp_cook_square[3,4,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[3]),paste('mixprob =',mix_prob[4]),sep=', '),ylim=c(0,1))
boxplot(Exp_cook_square[4,4,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[4]),paste('mixprob =',mix_prob[4]),sep=', '),ylim=c(0,1))
par(mfrow=c(1,1))

View File

@ -1,64 +0,0 @@
#initialize model parameters
m<-500#number of replications in simulation
dim<-12#dimension of random variable X
truedim<-1#dimension of B=b
qs<-dim-truedim# dimension of orthogonal complement of B
b1=c(1,1,1,1,1,1,0,0,0,0,0,0)/sqrt(6)
b<-b1#B
P<-b%*%t(b)
sigma=0.5#error standard deviation
N<-42#sample size
K<-70#number of arbitrary starting values for curvilinear optimization
MAXIT<-50#maximal number of iterations in curvilinear search algorithm
f_ln1<-function(x){return(2*log(abs(x)+1))}#link function
M5_JASA_lowN<-mat.or.vec(m,8)
colnames(M5_JASA_lowN)<-c('CVE1','CVE2','CVE3','meanMAVE','csMAVE','phd','sir','save')
for (i in 1:m){
#generate dat according to M5
dat<-creat_sample(b,N,f_ln1,sigma)
#est sample covariance matrix
Sig_est<-est_varmat(dat[,-1])
#est trace of sample covariance matrix
tr<-var_tr(Sig_est)
#CVE
#calculates Vhat_k for CVE1,CVE2, CVE3 for k=qs
CVE1<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.8),tr=tr),maxit = MAXIT,sclack_para = 0)
CVE2<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(2/3),tr=tr),maxit = MAXIT,sclack_para = 0)
CVE3<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.5),tr=tr),maxit = MAXIT,sclack_para = 0)
#calculate orthogonal complement of Vhat_k
#i.e. CVE1$est_base[,1:truedim] is estimator for B with dimension (dim times (dim-qs))
CVE1$est_base<-fill_base(CVE1$est_base)
CVE2$est_base<-fill_base(CVE2$est_base)
CVE3$est_base<-fill_base(CVE3$est_base)
# calculate distance between true B and estimated B
M5_JASA_lowN[i,1]<-subspace_dist(CVE1$est_base[,1:truedim],b)
M5_JASA_lowN[i,2]<-subspace_dist(CVE2$est_base[,1:truedim],b)
M5_JASA_lowN[i,3]<-subspace_dist(CVE3$est_base[,1:truedim],b)
#meanMAVE
mod_t2<-mave(Y~.,data=as.data.frame(dat),method = 'meanMAVE')
M5_JASA_lowN[i,4]<-subspace_dist(mod_t2$dir[[truedim]],b)
#csMAVE
mod_t<-mave(Y~.,data=as.data.frame(dat),method = 'csMAVE')
M5_JASA_lowN[i,5]<-subspace_dist(mod_t$dir[[truedim]],b)
#phd
test4<-summary(dr(Y~.,data=as.data.frame(dat),method='phdy',numdir=truedim+1))
M5_JASA_lowN[i,6]<-subspace_dist(orth(test4$evectors[,1:truedim]),b)
#sir
test5<-summary(dr(Y~.,data=as.data.frame(dat),method='sir',numdir=truedim+1))
M5_JASA_lowN[i,7]<-subspace_dist(orth(test5$evectors[,1:truedim]),b)
#save
test3<-summary(dr(Y~.,data=as.data.frame(dat),method='save',numdir=truedim+1))
M5_JASA_lowN[i,8]<-subspace_dist(orth(test3$evectors[,1:truedim]),b)
print(paste(i,paste('/',m)))
}
boxplot(M5_JASA_lowN[,]/sqrt(2*truedim),names=colnames(M5_JASA_lowN),ylab='err',main='M5')
summary(M5_JASA_lowN[,])
####
# par(mfrow=c(1,2))
# boxplot(M3_JASA_L[,]/sqrt(2*1),names=colnames(M3_JASA_L),ylab='err',main='M3')
# boxplot(M5_JASA_lowN[1:100,-c(4,5,6)]/sqrt(2*1),names=colnames(M5_JASA_lowN)[-c(4,5,6)],ylab='err',main='M5')
# par(mfrow=c(1,1))

View File

@ -1,113 +0,0 @@
#C:\Users\Lukas\Desktop\owncloud\Shared\Lukas\CVE
install.packages("C:/Users/Lukas/Desktop/owncloud/Shared/Lukas/CVE_1.0.tar.gz", repos=NULL, type="source")
install.packages(file.choose(), repos=NULL, type="source")
dim<-12
N<-100
s<-0.5
dat<-creat_sample(rep(1,dim)/sqrt(dim),N,fsquare,0.5)
test<-cve(Y~.,data=as.data.frame(dat),k=1)
##############
#initialize model parameterss
m<-100 #number of replications in simulation
dim<-12 #dimension of random variable X
truedim<-2 #dimension of B=b
qs<-dim-truedim # dimension of orthogonal complement of B
b1=c(1,1,1,1,1,1,0,0,0,0,0,0)/sqrt(6)
b2=c(1,-1,1,-1,1,-1,0,0,0,0,0,0)/sqrt(6)
b<-cbind(b1,b2)
#b<-b1
P<-b%*%t(b)
sigma=0.5 #error standard deviation
N<-70 #sample size
K<-30 #number of arbitrary starting values for curvilinear optimization
MAXIT<-30 #maximal number of iterations in curvilinear search algorithm
var_vec<-mat.or.vec(m,12)
M1_weight<-mat.or.vec(m,13)
#colnames(M1_weight)<-c('CVE1','CVE2','CVE3','CVE1_Rcpp','CVE2_Rcpp','CVE3_Rcpp','meanMAVE','csMAVE','phd','sir','save','CVE4')
#link function for M1
fM1<-function(x){return(x[1]/(0.5+(x[2]+1.5)^2))}
for (i in 1:m){
#generate dat according to M1
dat<-creat_sample_nor_nonstand(b,N,fsquare,diag(rep(1,dim)),sigma)
#est sample covariance matrix
Sig_est<-est_varmat(dat[,-1])
#est trace of sample covariance matrix
tr<-var_tr(Sig_est)
#calculates Vhat_k for CVE1,CVE2, CVE3 for k=qs
CVE1<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.8),tr=tr),maxit = MAXIT,sclack_para = 0)
CVE2<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(2/3),tr=tr),maxit = MAXIT,sclack_para = 0)
CVE3<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.5),tr=tr),maxit = MAXIT,sclack_para = 0)
CVE4<-stiefl_weight_partial_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.8),tr=tr),maxit = MAXIT,sclack_para = 0)
CVE5<-stiefl_weight_partial_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(2/3),tr=tr),maxit = MAXIT,sclack_para = 0)
CVE6<-stiefl_weight_partial_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.5),tr=tr),maxit = MAXIT,sclack_para = 0)
#
CVE7<-stiefl_weight_full_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.8),tr=tr),maxit = MAXIT,sclack_para = 0)
CVE8<-stiefl_weight_full_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(2/3),tr=tr),maxit = MAXIT,sclack_para = 0)
CVE9<-stiefl_weight_full_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.5),tr=tr),maxit = MAXIT,sclack_para = 0)
#
#
var_vec[i,1]<-CVE1$var
var_vec[i,2]<-CVE2$var
var_vec[i,3]<-CVE3$var
var_vec[i,4]<-CVE4$var
var_vec[i,5]<-CVE5$var
var_vec[i,6]<-CVE6$var
#
var_vec[i,7]<-CVE7$var
var_vec[i,8]<-CVE8$var
var_vec[i,9]<-CVE9$var
CVE1$est_base<-fill_base(CVE1$est_base)
CVE2$est_base<-fill_base(CVE2$est_base)
CVE3$est_base<-fill_base(CVE3$est_base)
CVE4$est_base<-fill_base(CVE4$est_base)
CVE5$est_base<-fill_base(CVE5$est_base)
CVE6$est_base<-fill_base(CVE6$est_base)
CVE7$est_base<-fill_base(CVE7$est_base)
CVE8$est_base<-fill_base(CVE8$est_base)
CVE9$est_base<-fill_base(CVE9$est_base)
# calculate distance between true B and estimated B
M1_weight[i,1]<-subspace_dist(CVE1$est_base[,1:truedim],b)
M1_weight[i,2]<-subspace_dist(CVE2$est_base[,1:truedim],b)
M1_weight[i,3]<-subspace_dist(CVE3$est_base[,1:truedim],b)
M1_weight[i,4]<-subspace_dist(CVE4$est_base[,1:truedim],b)
M1_weight[i,5]<-subspace_dist(CVE5$est_base[,1:truedim],b)
M1_weight[i,6]<-subspace_dist(CVE6$est_base[,1:truedim],b)
M1_weight[i,7]<-subspace_dist(CVE7$est_base[,1:truedim],b)
M1_weight[i,8]<-subspace_dist(CVE8$est_base[,1:truedim],b)
M1_weight[i,9]<-subspace_dist(CVE9$est_base[,1:truedim],b)
CVE1_Rcpp<-cve(Y~.,data=as.data.frame(dat),k=truedim,nObs=N^0.8,attempts=K,tol=10^(-3),slack=0)[[2]]
CVE2_Rcpp<-cve(Y~.,data=as.data.frame(dat),k=truedim,nObs=N^(2/3),attempts=K,tol=10^(-3),slack=0)[[2]]
CVE3_Rcpp<-cve(Y~.,data=as.data.frame(dat),k=truedim,nObs=N^0.5,attempts=K,tol=10^(-3),slack=0)[[2]]
# CVE4_Rcpp<-cve(Y~.,data=as.data.frame(dat),k=truedim,h=h_opt,attempts=K,tol=10^(-3))[[2]]
#M1_Rcpp[i,12]<-subspace_dist(CVE4_Rcpp$B,b)
var_vec[i,10]<-CVE1_Rcpp$loss
var_vec[i,11]<-CVE2_Rcpp$loss
var_vec[i,12]<-CVE3_Rcpp$loss
#calculate orthogonal complement of Vhat_k
#i.e. CVE1$est_base[,1:truedim] is estimator for B with dimension (dim times (dim-qs))
M1_weight[i,10]<-subspace_dist(CVE1_Rcpp$B,b)
M1_weight[i,11]<-subspace_dist(CVE2_Rcpp$B,b)
M1_weight[i,12]<-subspace_dist(CVE3_Rcpp$B,b)
#meanMAVE
mod_t2<-mave(Y~.,data=as.data.frame(dat),method = 'meanMAVE')
M1_weight[i,13]<-subspace_dist(mod_t2$dir[[truedim]],b)
print(paste(i,paste('/',m)))
}
boxplot(M1_weight[1:(i-1),]/sqrt(2*truedim),names=colnames(M1_weight),ylab='err',main='M1')
summary(M1_weight[1:(i-1),])

View File

@ -1,189 +0,0 @@
library("mda") #library for mars
local_linear<-function(x,h,dat,beta){
Y<-dat[,1]
X<-dat[,-1]
N<-length(Y)
X<-X%*%beta
x<-x%*%beta#beta%*%x
D_mat<-cbind(rep(1,N),X)
if (is.vector(X)){
dim<-1
d<-abs(X-rep(x,N))
}
else{
dim<-length(X[1,])
d<-sqrt(apply(X-t(matrix(rep(x,N),dim,N)),1,norm2))
}
K<-diag(dnorm(d/h)/dnorm(0))
pred<-c(1,x)%*%solve(t(D_mat)%*%K%*%D_mat)%*%t(D_mat)%*%K%*%Y
return(pred)
}
##### performs estimation of small dimesnion by CV with local linear forward regression
est_dim_CV<-function(Blist,dat,h_loclin=NULL,dim.max,median_use=F,method_mars=F){
#standardize regressors by symmetric root of inverse covariance mat
Sig<-est_varmat(dat[,-1])
eig_dec<-eigen(Sig)
Sroot_inv<-eig_dec$vectors%*%((diag(eig_dec$values^(-1/2))))%*%t(eig_dec$vectors)
dat[,-1]<-as.matrix(dat[,-1])%*%Sroot_inv
N<-length(dat[,1])
dim<-length(dat[1,-1])
MSE<-mat.or.vec(N,dim.max)
for(u in 1:dim.max){
beta<-Blist[[u]]
if(is.null(h_loclin)){
#h_loclin<-(1/N)^(1/(3+2*u))
h_loclin<-1.2*N^(-1/(4+u))#(1/N)^(1/(3+2*j))
}
for(i in 1:N){
x<-dat[i,-1]
if(method_mars==F){MSE[i,u]<-(dat[i,1]-local_linear(x,h_loclin,as.matrix(dat[-i,]),beta))^2} #predict with local linear
if(method_mars==T){
dat_fit<-dat[-i,-1]%*%beta
X_new<-dat[i,-1]%*%beta
mars_mod<-mars(dat_fit,dat[-i,1]) #fit mars model
MSE[i,u]<-(dat[i,1]-predict(mars_mod,X_new))^2 #predict with mars model
}#predict with mars
}
}
if(median_use){MSE_ave<-apply(MSE,2,median)}
else{MSE_ave<-colMeans(MSE)}
#apply(MSE,2,median)
est_dim<-which.min(MSE_ave)
ret<-list(est_dim = est_dim, MSE_ave = MSE_ave, MSE = MSE)
return(ret)
}
###########
test_for_dim<-function(Lmat,dim.max=NULL,alpha=0.1,method='greater'){
#Lmat... matrix with dimension N times dim.max with columns corresponding to aov_dat for dim 1,2,3,....,max.dim
if(is.null(dim.max)){dim.max<-length(Lmat[1,])}
pval<-mat.or.vec(dim.max-1,1)
est_dim<-dim.max
# for (j in 1:(dim.max-1)){
j<-1
while(est_dim==dim.max&j<dim.max){
if (method=='greater'){
pval[(j)]<-t.test(Lmat[,(j)],Lmat[,(j+1)],alternative = 'greater',paired=T)$p.value#mod[[1]][,5][1]
if(pval[j]<alpha){est_dim<-j}
}
if (method=='lower'){
pval[(j)]<-t.test(Lmat[,(j)],Lmat[,(j+1)],alternative = 'less',paired=F)$p.value#mod[[1]][,5][1]
if(pval[j]>alpha){est_dim<-j}
}
j<-j+1
}
ret<-list(est_dim,pval)
names(ret)<-c('estdim','pval')
return(ret)
}
####
##
test_for_dim_elbow<-function(Lmat){
dim.max<-length(Lmat[1,])
ave<-colMeans(Lmat)
boxplot(Lmat,xlab='k')
lines(seq(1,dim.max),ave,col='red')
# tmp<-cbind(ave,seq(1,dim.max))
# colnames(tmp)<-c('response','k')
# diff<-lm(response~k,data=as.data.frame(tmp))$coefficients[2]
#
# est_dim<-dim.max
# i<-1
# while(i<dim.max&est_dim==dim.max){
# if(ave[i+1]-ave[i]>diff){est_dim<-i}
# i<-i+1
# }
return(which.min(ave))
}
######
#Small simulation example for truedim =1
set.seed(21)
dim<-6
truedim<-1
N<-50
b<-c(1,0,0,0,0,0)
m<-20
est_dim<-mat.or.vec(m,7)
dim.max<-4
for(i in 1:m){
dat<-creat_sample(b,N,fsquare,0.5)
Blist<-list()
Lmat<-mat.or.vec(N,dim.max)
for(u in 1:dim.max){ #calculate B for different possible truedim's
m1<-stiefl_opt(dat,k=(dim-u)) #original bandwidth selection rule used that controlls number of points in a slice!!!!!!, also choose_h_2
Blist[[u]]<-fill_base(m1$est_base)[,1:u]
Lmat[,u]<-m1$aov_dat
}
#estimate truedim with different methods
est_dim[i,1]<-est_dim_CV(Blist,dat,dim.max = dim.max,median_use = T)$est_dim
est_dim[i,2]<-est_dim_CV(Blist,dat,dim.max = dim.max,median_use = F)$est_dim
est_dim[i,3]<-est_dim_CV(Blist,dat,dim.max = dim.max,median_use = F,method_mars = T)$est_dim
est_dim[i,4]<-test_for_dim(Lmat)$estdim
est_dim[i,5]<-test_for_dim(Lmat,method = 'lower')$estdim
est_dim[i,6]<-test_for_dim_elbow(cbind((dat[,1]-mean(dat[,1]))^2,Lmat))-1
mod_t<-mave(Y~.,data=as.data.frame(dat),method = 'meanMAVE')
est_dim[i,7]<-which.min(mave.dim(mod_t)$cv)
print(i)
}
length(which(est_dim[,1]==truedim))/m #fraction of where dimension is estimated correctly with mwthod 1 (CV with median)
#0.5
length(which(est_dim[,2]==truedim))/m #fraction of where dimension is estimated correctly with mwthod 1 (CV with mean)
#0.9
length(which(est_dim[,3]==truedim))/m #fraction of where dimension is estimated correctly with mwthod 1 (CV with mars and mean)
#0.8
length(which(est_dim[,4]==truedim))/m #fraction of where dimension is estimated correctly with mwthod 1 (t.test method='greater')
#0.0
length(which(est_dim[,5]==truedim))/m #fraction of where dimension is estimated correctly with mwthod 1 (t.test method='lower')
#0.05
length(which(est_dim[,6]==truedim))/m #fraction of where dimension is estimated correctly with mwthod (elbow)
#1
length(which(est_dim[,7]==truedim))/m #fraction of where dimension is estimated correctly with mave
#0.95
##########
#Small simulation example for truedim =2
set.seed(21)
dim<-6
truedim<-2
N<-100
b<-cbind(c(1,rep(0,dim-1)),c(0,1,rep(0,dim-2)))
m<-20
est_dim<-mat.or.vec(m,7)
dim.max<-4
for(i in 1:m){
dat<-creat_sample(b,N,function(x){return(x[1]*x[2])},0.5)
Blist<-list()
Lmat<-mat.or.vec(N,dim.max)
for(u in 1:dim.max){
m1<-stiefl_opt(dat,k=(dim-u)) #original bandwidth selection used !!!!!!!!!!!!!!!!!!!!!, also choose_h_2
Blist[[u]]<-fill_base(m1$est_base)[,1:u]
Lmat[,u]<-m1$aov_dat
}
est_dim[i,1]<-est_dim_CV(Blist,dat,dim.max = dim.max,median_use = T)$est_dim
est_dim[i,2]<-est_dim_CV(Blist,dat,dim.max = dim.max,median_use = F)$est_dim
est_dim[i,3]<-est_dim_CV(Blist,dat,dim.max = dim.max,median_use = F,method_mars = T)$est_dim
est_dim[i,4]<-test_for_dim(Lmat)$estdim
est_dim[i,5]<-test_for_dim(Lmat,method = 'lower')$estdim
est_dim[i,6]<-test_for_dim_elbow(cbind((dat[,1]-mean(dat[,1]))^2,Lmat))-1
mod_t<-mave(Y~.,data=as.data.frame(dat),method = 'meanMAVE')
est_dim[i,7]<-which.min(mave.dim(mod_t)$cv)
print(i)
}
length(which(est_dim[,1]==truedim))/m #fraction of where dimension is estimated correctly with mwthod (CV with median)
length(which(est_dim[,2]==truedim))/m #fraction of where dimension is estimated correctly with mwthod (CV with mean)
length(which(est_dim[,3]==truedim))/m #fraction of where dimension is estimated correctly with mwthod (CV with mars and mean)
length(which(est_dim[,4]==truedim))/m #fraction of where dimension is estimated correctly with mwthod (t.test method='greater')
length(which(est_dim[,5]==truedim))/m #fraction of where dimension is estimated correctly with mwthod (t.test method='lower')
length(which(est_dim[,6]==truedim))/m #fraction of where dimension is estimated correctly with mwthod (elbow)

View File

@ -1,382 +0,0 @@
library("Matrix")
# library("fields")
# library('dr')
# library('mvtnorm')
# library(mgcv)
# library(pracma)
# library(RVAideMemoire)
library("MAVE")
# library(pls)
# library(LaplacesDemon)
# library(earth)
# library("latex2exp")
#################################################
##############################
################################
fsquare<-function(x){
ret<-0
for (h in 1:length(x)){ret<-ret+x[h]^2}
return(ret)
}
f_id<-function(x){return(x[1])}
vfun<-function(x)return(c(cos(x),sin(x)))
vfun_grad<-function(x)return(c(-sin(x),cos(x)))
f_true<-function(x,sigma){return(sigma^2 + cos(x)^2)}
###############################
##creat sample for M2, b is B matrix of model, N..sample size,
#f...link function, sigma..standard deviation of error term
#produces sample with X standard normal with dimension to columnlength of b an
# epsilon centered normal with standard deviation sigma
creat_sample<-function(b,N,f,sigma){
if (is.vector(b)==1){dim<-length(b)}
if (is.vector(b)==0){dim<-length(b[,1])}
X<-mat.or.vec(N,dim)
Y<-mat.or.vec(N,1)
for (i in 1:N){
X[i,]<-rnorm(dim,0,1)
Y[i]<-f(t(b)%*%X[i,])+rnorm(1,0,sigma)
}
dat<-cbind(Y,X)
return(dat)
}
#####
creat_b<-function(dim,n){
temp<-mat.or.vec(dim,n)
if (n>1){
while (rankMatrix(temp,method = 'qr')< min(dim,n)){
for (i in 1:n){
temp[,i]<-rnorm(dim,0,1)
temp[,i]<-temp[,i]/sqrt(sum(temp[,i]^2))
}
}
}
if (n==1) {temp<-rnorm(dim,0,1)
temp<-temp/sqrt(sum(temp^2))}
return(temp)
}
###
### creat sample for M1,M3
## b is B matrix of model, N..sample size,
#f...link function, sigma..standard deviation of error term
#produces sample with X normal with Covariancematrix SS' with dimension to columnlength of b an
# epsilon centered normal with standard deviation sigma
creat_sample_nor_nonstand<-function(b,N,f,S,sigma){
#Var(x)= SS'
if (is.vector(b)==1){dim<-length(b)}
if (is.vector(b)==0){dim<-length(b[,1])}
X<-mat.or.vec(N,dim)
Y<-mat.or.vec(N,1)
for (i in 1:N){
X[i,]<-rnorm(dim,0,1)%*%t(S)
Y[i]<-f(t(b)%*%X[i,])+rnorm(1,0,sigma)
}
dat<-cbind(Y,X)
return(dat)
}
####
####
###
####
####
#### creat sample for M4
##creat sample for M2, b is B matrix of model, N..sample size,
#f...link function, sigma..standard deviation of error term
#produces sample with X Gausian mixture
#(i.e. X ~ (Zrep(-1,dim) + (1_Z)rep(1,dim))*dispers_para + N(0, I_p) and Z~B(pi)
#with dimension to columnlength of b an
# epsilon centered normal with standard deviation sigma
creat_sample_noneliptic_gausmixture<-function(b,N,f,sigma,pi,dispers_para=1){
if (is.vector(b)==1){dim<-length(b)}
if (is.vector(b)==0){dim<-length(b[,1])}
X<-mat.or.vec(N,dim)
Y<-mat.or.vec(N,1)
for (i in 1:N){
p<-rbinom(1,1,pi)
X[i,]<-(-rep(1,dim)*p+rep(1,dim)*(1-p))*dispers_para+rnorm(dim,0,1)
Y[i]<-f(t(b)%*%X[i,])+rnorm(1,0,sigma)
}
dat<-cbind(Y,X)
return(dat)
}
###
###############################
#estimates covariance matrix by ML estimate
# X...n times dim data matrix with rows corresponding to X_i
est_varmat<-function(X){
dim<-length(X[1,])
Sig<-mat.or.vec(dim,dim)#Cov(X)
X<-scale(X,center = T,scale = F)
Sig<-t(X)%*%X/length(X[,1])
return(Sig)
}
####
# normalizes a vector x such that it sums to 1
column_normalize<-function(x){return(x/sum(x))}
####
#squared 2-norm of a vector x
norm2<-function(x){return(sum(x^2))}
###
# fills up the matrix estb (dim times q) (dim > q) to a dim times dim orthonormal matrix
fill_base<-function(estb){
dim<-length(estb[,1])
k<-length(estb[1,])
P<-(diag(rep(1,dim))-estb%*%t(estb))
rk<-0
while (rk < dim){
tmp1<-P%*%creat_b(dim,1)
nor<-sqrt(sum(tmp1^2))
if(nor>10^(-4)){
estb<-cbind(tmp1/nor,estb)
rkn<-rankMatrix(estb,method = 'qr')
if (rkn > rk){P<-(diag(rep(1,dim))-estb%*%t(estb))
rk<-rkn}
}
if(nor <=10^(-4)){rk<-0}
}
return(estb)
}
##################
###new stiefel optimization
#draws from the uniform measure of Stiefel(p,k)
#returns a p times k dimensional semiorthogonal matrix
stiefl_startval<-function(p,k){return(qr.Q(qr(matrix(rnorm(p*k,0,1),p,k))))}
##### chooses shifting points
#auxiliary function for stielfel_opt
# X... N times dim data matrix
#S... sample covariance matrix
#0 < p <=1 fraction of data point used as shifting points
# if p=1 all data points used as shifting points
q_ind<-function(X,S,p){
dim<-length(X[1,])
N<-length(X[,1])
if (p < 1){
Sinv<-solve(S)
ind<-c()
bound<-qchisq(p,dim)
for (j in 1:N){if(t(X[j,])%*%Sinv%*%X[j,]<=bound){ind<-c(ind,j)}}#maybe demean
q<-length(ind)
if (q<=2){
q<-N
ind<-seq(1,N)}
}
else {
q<-N
ind<-seq(1,N)
}
ret<-list(q,ind)
names(ret)<-c('q','ind')
return(ret)
}
####
#calculates the trace of a quadratic matrix S
var_tr<-function(x){return(sum(diag(x)))}
####
#chooses bandwith h according to formula in paper
#dim...dimension of X vector
#k... row dim of V (dim times q matrix) corresponding to a basis of orthogonal complement of B in model
# N...sample size
#nObs... nObs in bandwith formula
#tr...trace of sample covariance matrix of X
choose_h_2<-function(dim,k,N,nObs,tr){
#h1<-qchisq(nObs/N,2*(dim-k))
h2<-qchisq((nObs-1)/(N-1),(dim-k))
return(h2*2*tr/dim)
}
####
#auxiliary function for stiefel_opt
#X... data matrix of random variable X
#q,ind... output of function q_ind
Xl_fun<-function(X,q,ind){
N<-length(X[,1])
Xl<-(kronecker(rep(1,q),X)-kronecker(X[ind,],rep(1,N)))
return(Xl)
}
###########
# evaluates L(V) and returns L_n(V),(L_tilde_n(V,X_i))_{i=1,..,n} and grad_V L_n(V) (p times k)
# V... (dim times q) matrix
# Xl... output of Xl_fun
# dtemp...vector with pairwise distances |X_i - X_j|
# q...output of q_ind function
# Y... vector with N Y_i values
# if grad=T, gradient of L(V) also returned
LV<-function(V,Xl,dtemp,h,q,Y,grad=T){
N<-length(Y)
if(is.vector(V)){k<-1}
else{k<-length(V[1,])}
Xlv<-Xl%*%V
d<-dtemp-((Xlv^2)%*%rep(1,k))
w<-dnorm(d/h)/dnorm(0)
w<-matrix(w,N,q)
w<-apply(w,2,column_normalize)
mY<-t(w)%*%Y
sig<-t(w)%*%(Y^2)-(mY)^2 # \tilde{L}_n(V, s_0) mit s_0 coresponds to q_ind
if(grad==T){
grad<-mat.or.vec(dim,k)
tmp1<-(kronecker(sig,rep(1,N))-(as.vector(kronecker(rep(1,q),Y))-kronecker(mY,rep(1,N)))^2)
if(k==1){
grad_d<- -2*Xl*as.vector(Xlv)
grad<-(1/h^2)*(1/q)*t(grad_d*as.vector(d)*as.vector(w))%*%tmp1
}
else{
for (j in 1:(k)){
grad_d<- -2*Xl*as.vector(Xlv[,j])
grad[,j]<- (1/h^2)*(1/q)*t(grad_d*as.vector(d)*as.vector(w))%*%tmp1
}
}
ret<-list(mean(sig),sig,grad)
names(ret)<-c('var','sig','grad')
}
else{
ret<-list(mean(sig),sig)
names(ret)<-c('var','sig')
}
return(ret)
}
#### performs stiefle optimization of argmin_{V : V'V=I_k} L_n(V)
#through curvilinear search with k0 starting values drawn uniformly on stiefel maniquefold
#dat...(N times dim+1) matrix with first column corresponding to Y values, the other columns
#consists of X data matrix, (i.e. dat=cbind(Y,X))
#h... bandwidth
#k...row dimension of V that is calculated, corresponds to dimension of orthogonal complement of B
#k0... number of arbitrary starting values
#p...fraction of data points used as shifting point
#maxit... number of maximal iterations in curvilinear search
#nObs.. nObs parameter for choosing bandwidth if no h is supplied
#lambda_0...initial stepsize
#tol...tolerance for stoping iterations
#sclack_para...if relative improvment is worse than sclack_para the stepsize is reduced
#output:
#est_base...Vhat_k= argmin_V:V'V=I_k L_n(V) a (dim times k) matrix where dim is row-dimension of X data matrix
#var...value of L_n(Vhat_k)
#aov_dat... (L_tilde_n(Vhat_k,X_i))_{i=1,..,N}
#count...number of iterations
#h...bandwidth
#count2...number of times sclack_para is active
stiefl_opt<-function(dat,h=NULL,k,k0=30,p=1,maxit=50,nObs=sqrt(length(dat[,1])),lambda_0=1,tol=10^(-3),sclack_para=0){
history <- matrix(NA, maxit, k0)
Y<-dat[,1]
X<-dat[,-1]
N<-length(Y)
dim<-length(X[1,])
S<-est_varmat(X)
if(p<1){
tmp1<-q_ind(X,S,p)
q<-tmp1$q
ind<-tmp1$ind
}
else{
q<-N
ind<-1:N
}
Xl<-(kronecker(rep(1,q),X)-kronecker(X[ind,],rep(1,N)))
dtemp<-apply(Xl,1,norm2)
tr<-var_tr(S)
if(is.null(h)){h<-choose_h(dim,k,N,nObs,tr)}
best<-exp(10000)
Vend<-mat.or.vec(dim,k)
sig<-mat.or.vec(q,1)
for(u in 1:k0){
Vnew<-Vold<-stiefl_startval(dim,k)
#print(Vold)
#print(LV(Vold,Xl,dtemp,h,q,Y)$var)
Lnew<-Lold<-exp(10000)
lambda<-lambda_0
err<-10
count<-0
count2<-0
while(err>tol&count<maxit){
#print(Vold)
tmp2<-LV(Vold,Xl,dtemp,h,q,Y)
G<-tmp2$grad
Lold<-tmp2$var
W<-G%*%t(Vold)-Vold%*%t(G)
stepsize<-lambda#/(2*sqrt(count+1))
Vnew<-solve(diag(1,dim)+stepsize*W)%*%(diag(1,dim)-stepsize*W)%*%Vold
# print(Vnew)
tmp3<-LV(Vnew,Xl,dtemp,h,q,Y,grad=F)
Lnew<-tmp3$var
err<-sqrt(sum((Vold%*%t(Vold)-Vnew%*%t(Vnew))^2))/sqrt(2*k)#sqrt(sum(tmp3$grad^2))/(dim*k)#
#print(err)
if(((Lnew-Lold)/Lold) > sclack_para){#/(count+1)^(0.5)
lambda=lambda/2
err<-10
count2<-count2+1
count<-count-1
Vnew<-Vold #!!!!!
}
Vold<-Vnew
count<-count+1
#print(count)
# Write to history
history[count, u] <- Lnew
}
if(best>Lnew){
best<-Lnew
Vend<-Vnew
sig<-tmp3$sig
}
}
ret<-list(Vend,best,sig,count,h,count2, history)
names(ret)<-c('est_base','var','aov_dat','count','h','count2', 'history')
return(ret)
}
##### calculates distance between two subspaces
#i.e. |b1%*%(b1%*%b1')^{-1}%*%b1' - b2%*%(b2%*%b2')^{-1}%*%b2'|_2
subspace_dist<-function(b1,b2){
#b1<-orth(b1)
#b2<-orth(b2)
P1<-b1%*%solve(t(b1)%*%b1)%*%t(b1)
P2<-b2%*%solve(t(b2)%*%b2)%*%t(b2)
return(sqrt(sum((P1-P2)^2)))
}
####performs local linaer regression
#x...datapoint at where estimated function should be evaluated
#h...bandwidth
#dat..(N times dim+1) matrix with first column corresponding to Y values, the other columns
#consists of X data matrix, (i.e. dat=cbind(Y,X))
#beta...projection matrix
local_linear<-function(x,h,dat,beta){
Y<-dat[,1]
X<-dat[,-1]
N<-length(Y)
X<-X%*%beta
x<-x%*%beta#beta%*%x
D_mat<-cbind(rep(1,N),X)
if (is.vector(X)){
dim<-1
d<-abs(X-rep(x,N))
}
else{
dim<-length(X[1,])
d<-sqrt(apply(X-t(matrix(rep(x,N),dim,N)),1,norm2))
}
K<-diag(dnorm(d/h)/dnorm(0))
pred<-c(1,x)%*%solve(t(D_mat)%*%K%*%D_mat)%*%t(D_mat)%*%K%*%Y
return(pred)
}

View File

@ -1,236 +0,0 @@
LV_weight_partial<-function(V,Xl,dtemp,h,q,Y,grad=T){
N<-length(Y)
if(is.vector(V)){k<-1}
else{k<-length(V[1,])}
Xlv<-Xl%*%V
d<-dtemp-((Xlv^2)%*%rep(1,k))
w<-exp(-0.5*(d/h)^2)
w<-matrix(w,N,q)
wn<-apply(w,2,sum)-rep(1,q)#new
w<-apply(w,2,column_normalize)
mY<-t(w)%*%Y
sig<-t(w)%*%(Y^2)-(mY)^2
W<-(kronecker(t(wn),rep(1,N)))##new
if(grad==T){
grad<-mat.or.vec(dim,k)
tmp1<-(kronecker(sig,rep(1,N))-(as.vector(kronecker(rep(1,q),Y))-kronecker(mY,rep(1,N)))^2)
if(k==1){
grad_d<- -2*Xl*as.vector(Xlv)
grad<-(1/h^2)*(1/sum(wn))*t(grad_d*as.vector(d)*as.vector(w)*as.vector(W))%*%tmp1 #new
# wn_grad<-(-1/h^2)*t(grad_d*as.vector(d)*as.vector(w))%*%kronecker(diag(rep(1,q)),rep(1,N))
# grad<- wn_grad%*%(sig-rep(var1[2],q))/(sum(wn))+grad
}
else{
for (j in 1:(k)){
grad_d<- -2*Xl*as.vector(Xlv[,j])
grad[,j]<- (1/h^2)*(1/sum(wn))*t(grad_d*as.vector(d)*as.vector(w)*as.vector(W))%*%tmp1#new
# wn_grad<-(-1/h^2)*t(grad_d*as.vector(d)*as.vector(w))%*%kronecker(diag(rep(1,q)),rep(1,N))
# grad[,j]<- wn_grad%*%(sig-rep(var1[2],q))/(sum(wn))+grad
}
}
ret<-list(t(wn)%*%sig/sum(wn),sig,grad)#new
names(ret)<-c('var','sig','grad')
}
else{
ret<-list(t(wn)%*%sig/sum(wn),sig)#new
names(ret)<-c('var','sig')
}
return(ret)
}
################
stiefl_weight_partial_opt<-function(dat,h=NULL,k,k0=30,p=1,maxit=50,nObs=sqrt(length(dat[,1])),lambda_0=1,tol=10^(-3),sclack_para=0){
Y<-dat[,1]
X<-dat[,-1]
N<-length(Y)
dim<-length(X[1,])
if(p<1){
S<-est_varmat(X)
tmp1<-q_ind(X,S,p)
q<-tmp1$q
ind<-tmp1$ind
}
else{
q<-N
ind<-1:N
}
Xl<-(kronecker(rep(1,q),X)-kronecker(X[ind,],rep(1,N)))
dtemp<-apply(Xl,1,norm2)
if(is.null(h)){
S<-est_varmat(X)
tr<-var_tr(S)
h<-choose_h_2(dim,k,N,nObs,tr)
}
best<-exp(10000)
Vend<-mat.or.vec(dim,k)
sig<-mat.or.vec(q,1)
for(u in 1:k0){
Vnew<-Vold<-stiefl_startval(dim,k)
#print(Vold)
#print(LV(Vold,Xl,dtemp,h,q,Y)$var)
Lnew<-Lold<-exp(10000)
lambda<-lambda_0
err<-10
count<-0
count2<-0
while(err>tol&count<maxit){
#print(Vold)
tmp2<-LV_weight_partial(Vold,Xl,dtemp,h,q,Y)
G<-tmp2$grad
Lold<-tmp2$var
W<-G%*%t(Vold)-Vold%*%t(G)
stepsize<-lambda#/(2*sqrt(count+1))
Vnew<-solve(diag(1,dim)+stepsize*W)%*%(diag(1,dim)-stepsize*W)%*%Vold
# print(Vnew)
tmp3<-LV_weight_partial(Vnew,Xl,dtemp,h,q,Y,grad=F)
Lnew<-tmp3$var
err<-sqrt(sum((Vold%*%t(Vold)-Vnew%*%t(Vnew))^2))/sqrt(2*k)#sqrt(sum(tmp3$grad^2))/(dim*k)#
#print(err)
if(((Lnew-Lold)/Lold) > sclack_para){#/(count+1)^(0.5)
lambda=lambda/2
err<-10
count2<-count2+1
count<-count-1
Vnew<-Vold #!!!!!
}
Vold<-Vnew
count<-count+1
#print(count)
}
if(best>Lnew){
best<-Lnew
Vend<-Vnew
sig<-tmp3$sig
}
}
ret<-list(Vend,best,sig,count,h,count2)
names(ret)<-c('est_base','var','aov_dat','count','h','count2')
return(ret)
}
#################MAVE, OPG, rMAVE, rOPG from Bing Li book
opg=function(x,y,d){
p=dim(x)[2];
n=dim(x)[1]
c0=2.34;
p0=max(p,3);
rn=n^(-1/(2*(p0+6)));
h=c0*n^(-(1/(p0+6)))
sig=diag(var(x));
x=apply(x,2,standvec)
kmat=kern(x,h);
bmat=numeric()
for(i in 1:dim(x)[1]){
wi=kmat[,i];
xi=cbind(1,t(t(x)-x[i,]))
bmat=cbind(bmat,wls(xi,y,wi)$b)}
beta=eigen(bmat%*%t(bmat))$vectors[,1:d]
return(diag(sig^(-1/2))%*%beta)
}
######################
wls=function(x,y,w){
n=dim(x)[1];
p=dim(x)[2]-1
out=c(solve(t(x*w)%*%x/n)%*%apply(x*y*w,2,mean))
return(list(a=out[1],b=out[2:(p+1)]))
}
#################
kern=function(x,h){
x=as.matrix(x);
n=dim(x)[1]
k2=x%*%t(x);
k1=t(matrix(diag(k2),n,n));
k3=t(k1);
k=k1-2*k2+k3
return(exp(-(1/(2*h^2))*(k1-2*k2+k3)))
}
###############
standvec=function(x) return((x-mean(x))/sd(x))
##############
mave2=function(x,y,h,d,nit){
sig=diag(var(x));
n=dim(x)[1];
p=dim(x)[2]
x=apply(x,2,standvec);
beta=opg(x,y,d);#beta=opg(x,y,h,d);
kermat=kern(x,h)
for(iit in 1:nit){
b=numeric();
a=numeric();
for(i in 1:n){
wi=kermat[,i]/(apply(kermat,2,mean)[i])
ui=cbind(1,t(t(x)-x[i,])%*%beta)
out=wls(ui,y,wi);
a=c(a,out$a);b=cbind(b,out$b)}
out=0;out1=0;
for(i in 1:n){
xi=kronecker(t(t(x)-x[i,]),t(b[,i]))
yi=y-a[i];
wi=kermat[,i]/apply(kermat,2,mean)[i]
out=out+apply(xi*yi*wi,2,mean)
out1=out1+t(xi*wi)%*%xi/n}
beta=t(matrix(solve(out1)%*%out,d,p))
}
return(diag(sig^(-1/2))%*%beta)
}
######################
rmave=function(x,y,d,nit){
sig=diag(var(x));
n=dim(x)[1];
p=dim(x)[2]
x=apply(x,2,standvec)
c0=2.34;
p0=max(p,3);
h=c0*n^(-(1/(p0+6)));
rn=n^(-1/(2*(p0+6)))
beta=opg(x,y,d)
for(iit in 1:nit){
kermat=kern(x%*%beta,h);
mkermat=apply(kermat,2,mean)
b=numeric();a=numeric()
for(i in 1:n){
wi=kermat[,i]/mkermat[i];
ui=cbind(1,t(t(x)-x[i,])%*%beta)
out=wls(ui,y,wi);
a=c(a,out$a);b=cbind(b,out$b)
}
out=0;
out1=0
for(i in 1:n) {
xi=kronecker(t(t(x)-x[i,]),t(b[,i]));
yi=y-a[i]
wi=kermat[,i]/mkermat[i]
out=out+apply(xi*yi*wi,2,mean)
out1=out1+t(xi*wi)%*%xi/n}
beta=t(matrix(solve(out1)%*%out,d,p))
h=max(rn*h,c0*n^((-1/(d+4))))
}
return(diag(sig^(-1/2))%*%beta)
}
#########################
ropg=function(x,y,d,nit){
sig=diag(var(x));
x=apply(x,2,standvec);
p=dim(x)[2];
n=dim(x)[1]
c0=2.34;
p0=max(p,3);
rn=n^(-1/(2*(p0+6)));
h=c0*n^(-(1/(p0+6)))
beta=diag(p)
for(iit in 1:nit){
kmat=kern(x%*%beta,h);
bmat=numeric()
for(i in 1:dim(x)[1]){
wi=kmat[,i];
xi=cbind(1,t(t(x)-x[i,]))
bmat=cbind(bmat,wls(xi,y,wi)$b)
}
beta=eigen(bmat%*%t(bmat))$vectors[,1:d]
h=max(rn*h,c0*n^((-1/(d+4))))
}
beta.final=diag(sig^(-1/2))%*%beta
return(beta.final)
}

View File

@ -1,315 +0,0 @@
LV_weight_partial<-function(V,Xl,dtemp,h,q,Y,grad=T){
N<-length(Y)
if(is.vector(V)){k<-1}
else{k<-length(V[1,])}
Xlv<-Xl%*%V
d<-dtemp-((Xlv^2)%*%rep(1,k))
w<-exp(-0.5*(d/h)^2)
w<-matrix(w,N,q)
wn<-apply(w,2,sum)-rep(1,q)#new
w<-apply(w,2,column_normalize)
mY<-t(w)%*%Y
sig<-t(w)%*%(Y^2)-(mY)^2
W<-(kronecker(t(wn),rep(1,N)))##new
if(grad==T){
grad<-mat.or.vec(dim,k)
tmp1<-(kronecker(sig,rep(1,N))-(as.vector(kronecker(rep(1,q),Y))-kronecker(mY,rep(1,N)))^2)
if(k==1){
grad_d<- -2*Xl*as.vector(Xlv)
grad<-(1/h^2)*(1/sum(wn))*t(grad_d*as.vector(d)*as.vector(w)*as.vector(W))%*%tmp1 #new
# wn_grad<-(-1/h^2)*t(grad_d*as.vector(d)*as.vector(w))%*%kronecker(diag(rep(1,q)),rep(1,N))
# grad<- wn_grad%*%(sig-rep(var1[2],q))/(sum(wn))+grad
}
else{
for (j in 1:(k)){
grad_d<- -2*Xl*as.vector(Xlv[,j])
grad[,j]<- (1/h^2)*(1/sum(wn))*t(grad_d*as.vector(d)*as.vector(w)*as.vector(W))%*%tmp1#new
# wn_grad<-(-1/h^2)*t(grad_d*as.vector(d)*as.vector(w))%*%kronecker(diag(rep(1,q)),rep(1,N))
# grad[,j]<- wn_grad%*%(sig-rep(var1[2],q))/(sum(wn))+grad
}
}
ret<-list(t(wn)%*%sig/sum(wn),sig,grad)#new
names(ret)<-c('var','sig','grad')
}
else{
ret<-list(t(wn)%*%sig/sum(wn),sig)#new
names(ret)<-c('var','sig')
}
return(ret)
}
################
stiefl_weight_partial_opt<-function(dat,h=NULL,k,k0=30,p=1,maxit=50,nObs=sqrt(length(dat[,1])),lambda_0=1,tol=10^(-3),sclack_para=0){
Y<-dat[,1]
X<-dat[,-1]
N<-length(Y)
dim<-length(X[1,])
if(p<1){
S<-est_varmat(X)
tmp1<-q_ind(X,S,p)
q<-tmp1$q
ind<-tmp1$ind
}
else{
q<-N
ind<-1:N
}
Xl<-(kronecker(rep(1,q),X)-kronecker(X[ind,],rep(1,N)))
dtemp<-apply(Xl,1,norm2)
if(is.null(h)){
S<-est_varmat(X)
tr<-var_tr(S)
h<-choose_h_2(dim,k,N,nObs,tr)
}
best<-exp(10000)
Vend<-mat.or.vec(dim,k)
sig<-mat.or.vec(q,1)
for(u in 1:k0){
Vnew<-Vold<-stiefl_startval(dim,k)
#print(Vold)
#print(LV(Vold,Xl,dtemp,h,q,Y)$var)
Lnew<-Lold<-exp(10000)
lambda<-lambda_0
err<-10
count<-0
count2<-0
while(err>tol&count<maxit){
#print(Vold)
tmp2<-LV_weight_partial(Vold,Xl,dtemp,h,q,Y)
G<-tmp2$grad
Lold<-tmp2$var
W<-G%*%t(Vold)-Vold%*%t(G)
stepsize<-lambda#/(2*sqrt(count+1))
Vnew<-solve(diag(1,dim)+stepsize*W)%*%(diag(1,dim)-stepsize*W)%*%Vold
# print(Vnew)
tmp3<-LV_weight_partial(Vnew,Xl,dtemp,h,q,Y,grad=F)
Lnew<-tmp3$var
err<-sqrt(sum((Vold%*%t(Vold)-Vnew%*%t(Vnew))^2))/sqrt(2*k)#sqrt(sum(tmp3$grad^2))/(dim*k)#
#print(err)
if(((Lnew-Lold)/Lold) > sclack_para){#/(count+1)^(0.5)
lambda=lambda/2
err<-10
count2<-count2+1
count<-count-1
Vnew<-Vold #!!!!!
}
Vold<-Vnew
count<-count+1
#print(count)
}
if(best>Lnew){
best<-Lnew
Vend<-Vnew
sig<-tmp3$sig
}
}
ret<-list(Vend,best,sig,count,h,count2)
names(ret)<-c('est_base','var','aov_dat','count','h','count2')
return(ret)
}
#################MAVE, OPG, rMAVE, rOPG from Bing Li book
opg=function(x,y,d){
p=dim(x)[2];
n=dim(x)[1]
c0=2.34;
p0=max(p,3);
rn=n^(-1/(2*(p0+6)));
h=c0*n^(-(1/(p0+6)))
sig=diag(var(x));
x=apply(x,2,standvec)
kmat=kern(x,h);
bmat=numeric()
for(i in 1:dim(x)[1]){
wi=kmat[,i];
xi=cbind(1,t(t(x)-x[i,]))
bmat=cbind(bmat,wls(xi,y,wi)$b)}
beta=eigen(bmat%*%t(bmat))$vectors[,1:d]
return(diag(sig^(-1/2))%*%beta)
}
#######################
stiefl_opt_momentum<-function(dat,h=NULL,k,k0=30,p=1,maxit=50,nObs=sqrt(length(dat[,1])),lambda_0=1,tol=10^(-3),sclack_para=0,momentum_para=0.8){
Y<-dat[,1]
X<-dat[,-1]
N<-length(Y)
dim<-length(X[1,])
if(p<1){
S<-est_varmat(X)
tmp1<-q_ind(X,S,p)
q<-tmp1$q
ind<-tmp1$ind
}
else{
q<-N
ind<-1:N
}
Xl<-(kronecker(rep(1,q),X)-kronecker(X[ind,],rep(1,N)))
dtemp<-apply(Xl,1,norm2)
if(is.null(h)){
S<-est_varmat(X)
tr<-var_tr(S)
h<-choose_h_2(dim,k,N,nObs,tr)
}
best<-exp(10000)
Vend<-mat.or.vec(dim,k)
sig<-mat.or.vec(q,1)
for(u in 1:k0){
Vold<-stiefl_startval(dim,k)
#print(Vold)
#print(LV(Vold,Xl,dtemp,h,q,Y)$var)
Lnew<-Lold<-exp(10000)
lambda<-lambda_0
err<-10
count<-0
count2<-0
Lnew<-LV(Vold,Xl,dtemp,h,q,Y)$var
#print(Lnew)
if(best>Lnew){
best<-Lnew
Vend<-Vold
#sig<-tmp3$sig
}
}
Vnew<-Vold<-Vend
G<-matrix(rep(0,dim*k),dim,k)
while(err>tol&count<maxit){
#print(Vold)
tmp2<-LV(Vold,Xl,dtemp,h,q,Y)
#G<-tmp2$grad
G<-(1-momentum_para)*G + momentum_para*tmp2$grad
Lold<-tmp2$var
W<-G%*%t(Vold)-Vold%*%t(G)
stepsize<-lambda#/(2*sqrt(count+1))
Vnew<-solve(diag(1,dim)+stepsize*W)%*%(diag(1,dim)-stepsize*W)%*%Vold
# print(Vnew)
tmp3<-LV(Vnew,Xl,dtemp,h,q,Y,grad=F)
Lnew<-tmp3$var
err<-sqrt(sum((Vold%*%t(Vold)-Vnew%*%t(Vnew))^2))/sqrt(2*k)#sqrt(sum(tmp3$grad^2))/(dim*k)#
#print(err)
if(((Lnew-Lold)/Lold) > sclack_para){#/(count+1)^(0.5)
lambda=lambda/2
err<-10
count2<-count2+1
count<-count-1
Vnew<-Vold #!!!!!
Lnew<-Lold
}
Vold<-Vnew
count<-count+1
#print(count)
}
ret<-list(Vnew,Lnew,count,h,count2)
names(ret)<-c('est_base','var','count','h','count2')
return(ret)
}
######################
wls=function(x,y,w){
n=dim(x)[1];
p=dim(x)[2]-1
out=c(solve(t(x*w)%*%x/n)%*%apply(x*y*w,2,mean))
return(list(a=out[1],b=out[2:(p+1)]))
}
#################
kern=function(x,h){
x=as.matrix(x);
n=dim(x)[1]
k2=x%*%t(x);
k1=t(matrix(diag(k2),n,n));
k3=t(k1);
k=k1-2*k2+k3
return(exp(-(1/(2*h^2))*(k1-2*k2+k3)))
}
###############
standvec=function(x) return((x-mean(x))/sd(x))
##############
mave2=function(x,y,h,d,nit){
sig=diag(var(x));
n=dim(x)[1];
p=dim(x)[2]
x=apply(x,2,standvec);
beta=opg(x,y,d);#beta=opg(x,y,h,d);
kermat=kern(x,h)
for(iit in 1:nit){
b=numeric();
a=numeric();
for(i in 1:n){
wi=kermat[,i]/(apply(kermat,2,mean)[i])
ui=cbind(1,t(t(x)-x[i,])%*%beta)
out=wls(ui,y,wi);
a=c(a,out$a);b=cbind(b,out$b)}
out=0;out1=0;
for(i in 1:n){
xi=kronecker(t(t(x)-x[i,]),t(b[,i]))
yi=y-a[i];
wi=kermat[,i]/apply(kermat,2,mean)[i]
out=out+apply(xi*yi*wi,2,mean)
out1=out1+t(xi*wi)%*%xi/n}
beta=t(matrix(solve(out1)%*%out,d,p))
}
return(diag(sig^(-1/2))%*%beta)
}
######################
rmave=function(x,y,d,nit){
sig=diag(var(x));
n=dim(x)[1];
p=dim(x)[2]
x=apply(x,2,standvec)
c0=2.34;
p0=max(p,3);
h=c0*n^(-(1/(p0+6)));
rn=n^(-1/(2*(p0+6)))
beta=opg(x,y,d)
for(iit in 1:nit){
kermat=kern(x%*%beta,h);
mkermat=apply(kermat,2,mean)
b=numeric();a=numeric()
for(i in 1:n){
wi=kermat[,i]/mkermat[i];
ui=cbind(1,t(t(x)-x[i,])%*%beta)
out=wls(ui,y,wi);
a=c(a,out$a);b=cbind(b,out$b)
}
out=0;
out1=0
for(i in 1:n) {
xi=kronecker(t(t(x)-x[i,]),t(b[,i]));
yi=y-a[i]
wi=kermat[,i]/mkermat[i]
out=out+apply(xi*yi*wi,2,mean)
out1=out1+t(xi*wi)%*%xi/n}
beta=t(matrix(solve(out1)%*%out,d,p))
h=max(rn*h,c0*n^((-1/(d+4))))
}
return(diag(sig^(-1/2))%*%beta)
}
#########################
ropg=function(x,y,d,nit){
sig=diag(var(x));
x=apply(x,2,standvec);
p=dim(x)[2];
n=dim(x)[1]
c0=2.34;
p0=max(p,3);
rn=n^(-1/(2*(p0+6)));
h=c0*n^(-(1/(p0+6)))
beta=diag(p)
for(iit in 1:nit){
kmat=kern(x%*%beta,h);
bmat=numeric()
for(i in 1:dim(x)[1]){
wi=kmat[,i];
xi=cbind(1,t(t(x)-x[i,]))
bmat=cbind(bmat,wls(xi,y,wi)$b)
}
beta=eigen(bmat%*%t(bmat))$vectors[,1:d]
h=max(rn*h,c0*n^((-1/(d+4))))
}
beta.final=diag(sig^(-1/2))%*%beta
return(beta.final)
}

View File

@ -1,222 +0,0 @@
LV_weight_partial<-function(V,Xl,dtemp,h,q,Y,grad=T){
N<-length(Y)
if(is.vector(V)){k<-1}
else{k<-length(V[1,])}
Xlv<-Xl%*%V
d<-dtemp-((Xlv^2)%*%rep(1,k))
w<-exp(-0.5*(d/h)^2)#dnorm(d/h)/dnorm(0)
w<-matrix(w,N,q)
wn<-apply(w,2,sum)#new
w<-apply(w,2,column_normalize)
mY<-t(w)%*%Y
sig<-t(w)%*%(Y^2)-(mY)^2
W<-diag(kronecker(t(wn),rep(1,N)))##new
if(grad==T){
grad<-mat.or.vec(dim,k)
tmp1<-(kronecker(sig,rep(1,N))-(as.vector(kronecker(rep(1,q),Y))-kronecker(mY,rep(1,N)))^2)
if(k==1){
grad_d<- -2*Xl*as.vector(Xlv)
grad<-(1/h^2)*(1/sum(wn))*t(grad_d*as.vector(d)*as.vector(w)*as.vector(W))%*%tmp1 #new
# wn_grad<-(-1/h^2)*t(grad_d*as.vector(d)*as.vector(w))%*%kronecker(diag(rep(1,q)),rep(1,N))
# grad<- wn_grad%*%(sig-rep(var1[2],q))/(sum(wn))+grad
}
else{
for (j in 1:(k)){
grad_d<- -2*Xl*as.vector(Xlv[,j])
grad[,j]<- (1/h^2)*(1/sum(wn))*t(grad_d*as.vector(d)*as.vector(w)*as.vector(W))%*%tmp1#new
# wn_grad<-(-1/h^2)*t(grad_d*as.vector(d)*as.vector(w))%*%kronecker(diag(rep(1,q)),rep(1,N))
# grad[,j]<- wn_grad%*%(sig-rep(var1[2],q))/(sum(wn))+grad
}
}
ret<-list(t(wn)%*%sig/sum(wn),sig,grad)#new
names(ret)<-c('var','sig','grad')
}
else{
ret<-list(t(wn)%*%sig/sum(wn),sig)#new
names(ret)<-c('var','sig')
}
return(ret)
}
######
LV_weight_full<-function(V,Xl,dtemp,h,q,Y,grad=T){
N<-length(Y)
if(is.vector(V)){k<-1}
else{k<-length(V[1,])}
Xlv<-Xl%*%V
d<-dtemp-((Xlv^2)%*%rep(1,k))
w<-exp(-0.5*(d/h)^2)#dnorm(d/h)/dnorm(0)
w<-matrix(w,N,q)
wn<-apply(w,2,sum)#new
w<-apply(w,2,column_normalize)
mY<-t(w)%*%Y
sig<-t(w)%*%(Y^2)-(mY)^2
W<-(kronecker(t(wn),rep(1,N)))#new or ###diag(kronecker(t(wn),rep(1,N)))
var<-t(wn)%*%sig/sum(wn)#new
if(grad==T){
grad<-mat.or.vec(dim,k)
tmp1<-(kronecker(sig,rep(1,N))-(as.vector(kronecker(rep(1,q),Y))-kronecker(mY,rep(1,N)))^2)
if(k==1){
grad_d<- -2*Xl*as.vector(Xlv)
grad<-(1/h^2)*(1/sum(wn))*t(grad_d*as.vector(d)*as.vector(w)*as.vector(W))%*%tmp1#new
wn_grad<-(-1/h^2)*t(grad_d*as.vector(d)*as.vector(w))%*%kronecker(diag(rep(1,q)),rep(1,N))#new
grad<- wn_grad%*%(sig-rep(var,q))/(sum(wn))+grad#new
}
else{
for (j in 1:(k)){
grad_d<- -2*Xl*as.vector(Xlv[,j])
grad[,j]<- (1/h^2)*(1/sum(wn))*t(grad_d*as.vector(d)*as.vector(w)*as.vector(W))%*%tmp1#new
wn_grad<-(-1/h^2)*t(grad_d*as.vector(d)*as.vector(w))%*%kronecker(diag(rep(1,q)),rep(1,N))#new
grad[,j]<- wn_grad%*%(sig-rep(var,q))/(sum(wn))+grad[,j]#new
}
}
ret<-list(var,sig,grad)
names(ret)<-c('var','sig','grad')
}
else{
ret<-list(var,sig)
names(ret)<-c('var','sig')
}
return(ret)
}
####
stiefl_weight_partial_opt<-function(dat,h=NULL,k,k0=30,p=1,maxit=50,nObs=sqrt(length(dat[,1])),lambda_0=1,tol=10^(-3),sclack_para=0){
Y<-dat[,1]
X<-dat[,-1]
N<-length(Y)
dim<-length(X[1,])
if(p<1){
S<-est_varmat(X)
tmp1<-q_ind(X,S,p)
q<-tmp1$q
ind<-tmp1$ind
}
else{
q<-N
ind<-1:N
}
Xl<-(kronecker(rep(1,q),X)-kronecker(X[ind,],rep(1,N)))
dtemp<-apply(Xl,1,norm2)
if(is.null(h)){
S<-est_varmat(X)
tr<-var_tr(S)
h<-choose_h_2(dim,k,N,nObs,tr)
}
best<-exp(10000)
Vend<-mat.or.vec(dim,k)
sig<-mat.or.vec(q,1)
for(u in 1:k0){
Vnew<-Vold<-stiefl_startval(dim,k)
#print(Vold)
#print(LV(Vold,Xl,dtemp,h,q,Y)$var)
Lnew<-Lold<-exp(10000)
lambda<-lambda_0
err<-10
count<-0
count2<-0
while(err>tol&count<maxit){
#print(Vold)
tmp2<-LV_weight_partial(Vold,Xl,dtemp,h,q,Y)
G<-tmp2$grad
Lold<-tmp2$var
W<-G%*%t(Vold)-Vold%*%t(G)
stepsize<-lambda#/(2*sqrt(count+1))
Vnew<-solve(diag(1,dim)+stepsize*W)%*%(diag(1,dim)-stepsize*W)%*%Vold
# print(Vnew)
tmp3<-LV_weight_partial(Vnew,Xl,dtemp,h,q,Y,grad=F)
Lnew<-tmp3$var
err<-sqrt(sum((Vold%*%t(Vold)-Vnew%*%t(Vnew))^2))/sqrt(2*k)#sqrt(sum(tmp3$grad^2))/(dim*k)#
#print(err)
if(((Lnew-Lold)/Lold) > sclack_para){#/(count+1)^(0.5)
lambda=lambda/2
err<-10
count2<-count2+1
count<-count-1
Vnew<-Vold #!!!!!
}
Vold<-Vnew
count<-count+1
#print(count)
}
if(best>Lnew){
best<-Lnew
Vend<-Vnew
sig<-tmp3$sig
}
}
ret<-list(Vend,best,sig,count,h,count2)
names(ret)<-c('est_base','var','aov_dat','count','h','count2')
return(ret)
}
###########
stiefl_weight_full_opt<-function(dat,h=NULL,k,k0=30,p=1,maxit=50,nObs=sqrt(length(dat[,1])),lambda_0=1,tol=10^(-3),sclack_para=0){
Y<-dat[,1]
X<-dat[,-1]
N<-length(Y)
dim<-length(X[1,])
if(p<1){
S<-est_varmat(X)
tmp1<-q_ind(X,S,p)
q<-tmp1$q
ind<-tmp1$ind
}
else{
q<-N
ind<-1:N
}
Xl<-(kronecker(rep(1,q),X)-kronecker(X[ind,],rep(1,N)))
dtemp<-apply(Xl,1,norm2)
if(is.null(h)){
S<-est_varmat(X)
tr<-var_tr(S)
h<-choose_h_2(dim,k,N,nObs,tr)
}
best<-exp(10000)
Vend<-mat.or.vec(dim,k)
sig<-mat.or.vec(q,1)
for(u in 1:k0){
Vnew<-Vold<-stiefl_startval(dim,k)
#print(Vold)
#print(LV(Vold,Xl,dtemp,h,q,Y)$var)
Lnew<-Lold<-exp(10000)
lambda<-lambda_0
err<-10
count<-0
count2<-0
while(err>tol&count<maxit){
#print(Vold)
tmp2<-LV_weight_full(Vold,Xl,dtemp,h,q,Y)
G<-tmp2$grad
Lold<-tmp2$var
W<-G%*%t(Vold)-Vold%*%t(G)
stepsize<-lambda#/(2*sqrt(count+1))
Vnew<-solve(diag(1,dim)+stepsize*W)%*%(diag(1,dim)-stepsize*W)%*%Vold
# print(Vnew)
tmp3<-LV_weight_full(Vnew,Xl,dtemp,h,q,Y,grad=F)
Lnew<-tmp3$var
err<-sqrt(sum((Vold%*%t(Vold)-Vnew%*%t(Vnew))^2))/sqrt(2*k)#sqrt(sum(tmp3$grad^2))/(dim*k)#
#print(err)
if(((Lnew-Lold)/Lold) > sclack_para){#/(count+1)^(0.5)
lambda=lambda/2
err<-10
count2<-count2+1
count<-count-1
Vnew<-Vold #!!!!!
}
Vold<-Vnew
count<-count+1
#print(count)
}
if(best>Lnew){
best<-Lnew
Vend<-Vnew
sig<-tmp3$sig
}
}
ret<-list(Vend,best,sig,count,h,count2)
names(ret)<-c('est_base','var','aov_dat','count','h','count2')
return(ret)
}

View File

@ -1,128 +0,0 @@
# Usage:
# ~$ Rscript runtime_test.R
textplot <- function(...) {
text <- unlist(list(...))
if (length(text) > 20) {
text <- c(text[1:17],
' ...... (skipped, text too long) ......',
text[c(-1, 0) + length(text)])
}
plot(NA, xlim = c(0, 1), ylim = c(0, 1),
bty = 'n', xaxt = 'n', yaxt = 'n', xlab = '', ylab = '')
for (i in seq_along(text)) {
text(0, 1 - (i / 20),
text[[i]], pos = 4)
}
}
# library(CVEpureR) # load CVE's pure R implementation
library(CVE) # load CVE
#' Writes log information to console. (to not get bored^^)
tell.user <- function(name, start, i, length) {
cat("\rRunning Test (", name, "):",
i, "/", length,
" - elapsed:", format(Sys.time() - start), "\033[K")
}
#' Computes "distance" of spanned subspaces.
#' @param B1 Semi-orthonormal basis matrix
#' @param B2 Semi-orthonormal basis matrix
#' @return Frobenius norm of subspace projection matrix diff.
subspace.dist <- function(B1, B2){
P1 <- tcrossprod(B1, B1)
P2 <- tcrossprod(B2, B2)
return(norm(P1 - P2, type = 'F'))
}
# Set random seed
set.seed(437)
# Number of simulations
SIM.NR <- 50L
# maximal number of iterations in curvilinear search algorithm
MAXIT <- 50L
# number of arbitrary starting values for curvilinear optimization
ATTEMPTS <- 10L
# set names of datasets
ds.names <- paste0("M", seq(7))
# Set used CVE method
methods <- c("simple", "weighted") # c("legacy", "simple", "linesearch", "sgd")
# Setup error and time tracking variables
error <- matrix(NA, SIM.NR, length(methods) * length(ds.names))
time <- matrix(NA, SIM.NR, ncol(error))
colnames(error) <- kronecker(paste0(ds.names, '-'), methods, paste0)
colnames(time) <- colnames(error)
# Create new log file and write CSV (actualy TSV) header.
# (do not overwrite existing logs)
log.nr <- length(list.files('tmp/', pattern = '.*\\.log'))
file <- file.path('tmp', paste0('test', log.nr, '.log'))
cat('dataset\tmethod\terror\ttime\n', sep = '', file = file)
# Open a new pdf device for plotting into (do not overwrite existing ones)
path <- paste0('test', log.nr, '.pdf')
pdf(file.path('tmp', path))
cat('Plotting to file:', path, '\n')
# only for telling user (to stdout)
count <- 0
start <- Sys.time()
# Start simulation loop.
for (sim in 1:SIM.NR) {
# Repeat for each dataset.
for (name in ds.names) {
tell.user(name, start, (count <- count + 1), SIM.NR * length(ds.names))
# Create a new dataset
ds <- dataset(name)
# Prepare X, Y and combine to data matrix
Y <- ds$Y
X <- ds$X
data <- cbind(Y, X)
# get dimensions
k <- ncol(ds$B)
for (method in methods) {
dr.time <- system.time(
dr <- cve.call(X, Y,
method = method,
k = k,
attempts = ATTEMPTS
)
)
dr$B <- coef(dr, k)
key <- paste0(name, '-', method)
error[sim, key] <- subspace.dist(dr$B, ds$B) / sqrt(2 * k)
time[sim, key] <- dr.time["elapsed"]
# Log results to file (mostly for long running simulations)
cat(paste0(
c(name, method, error[sim, key], time[sim, key]),
collapse = '\t'
), '\n',
sep = '', file = file, append = TRUE
)
}
}
}
cat("\n\n## Time [sec] Summary:\n")
print(summary(time))
cat("\n## Error Summary:\n")
print(summary(error))
boxplot(error,
main = paste0("Error (Nr of simulations ", SIM.NR, ")"),
ylab = "Error",
las = 2
)
boxplot(time,
main = paste0("Time (Nr of simulations ", SIM.NR, ")"),
ylab = "Time [sec]",
las = 2
)

120
test.R
View File

@ -1,120 +0,0 @@
textplot <- function(...) {
text <- unlist(list(...))
if (length(text) > 20) {
text <- c(text[1:17],
' ...... (skipped, text too long) ......',
text[c(-1, 0) + length(text)])
}
plot(NA, xlim = c(0, 1), ylim = c(0, 1),
bty = 'n', xaxt = 'n', yaxt = 'n', xlab = '', ylab = '')
for (i in seq_along(text)) {
text(0, 1 - (i / 20),
text[[i]], pos = 4)
}
}
args <- commandArgs(TRUE)
if (length(args) > 0L) {
method <- args[1]
} else {
method <- "simple"
}
if (length(args) > 1L) {
momentum <- as.double(args[2])
} else {
momentum <- 0.0
}
seed <- 42
max.iter <- 50L
attempts <- 25L
library(CVE)
path <- paste0('~/Projects/CVE/tmp/logger_', method, '.C.pdf')
# Define logger for `cve()` method.
logger <- function(attempt, iter, data) {
# Note the `<<-` assignement!
loss.history[iter + 1, attempt] <<- data$loss
error.history[iter + 1, attempt] <<- data$err
tau.history[iter + 1, attempt] <<- data$tau
# Compute true error by comparing to the true `B`
B.est <- null(data$V) # Function provided by CVE
P.est <- B.est %*% solve(t(B.est) %*% B.est) %*% t(B.est)
true.error <- norm(P - P.est, 'F') / sqrt(2 * k)
true.error.history[iter + 1, attempt] <<- true.error
}
pdf(path, width = 8.27, height = 11.7) # width, height unit is inces -> A4
layout(matrix(c(1, 1,
2, 3,
4, 5), nrow = 3, byrow = TRUE))
for (name in paste0("M", seq(7))) {
# Seed random number generator
set.seed(seed)
# Create a dataset
ds <- dataset(name)
X <- ds$X
Y <- ds$Y
B <- ds$B # the true `B`
k <- ncol(ds$B)
# True projection matrix.
P <- B %*% solve(t(B) %*% B) %*% t(B)
# Setup histories.
V_last <- NULL
loss.history <- matrix(NA, max.iter + 1, attempts)
error.history <- matrix(NA, max.iter + 1, attempts)
tau.history <- matrix(NA, max.iter + 1, attempts)
true.error.history <- matrix(NA, max.iter + 1, attempts)
time <- system.time(
dr <- cve(Y ~ X, k = k, method = method,
momentum = momentum,
max.iter = max.iter, attempts = attempts,
logger = logger)
)["elapsed"]
# Extract finaly selected values:
B.est <- coef(dr, k)
true.error <- norm(tcrossprod(B.est) - tcrossprod(B), 'F') / sqrt(2 * k)
loss <- dr$res[[as.character(k)]]$loss
# Write metadata.
textplot(
paste0("Seed value: ", seed),
"",
paste0("Dataset Name: ", ds$name),
paste0("dim(X) = (", nrow(X), ", ", ncol(X), ")"),
paste0("dim(B) = (", nrow(B), ", ", ncol(B), ")"),
"",
paste0("CVE method: ", dr$method),
paste0("Max Iterations: ", max.iter),
paste0("Attempts: ", attempts),
paste0("Momentum: ", momentum),
"CVE call:",
paste0(" > ", format(dr$call)),
"",
paste0("True Error: ", round(true.error, 3)),
paste0("loss: ", round(loss, 3)),
paste0("time: ", round(time, 3), " s")
)
# Plot history's
matplot(loss.history, type = 'l', log = 'y', xlab = 'i (iteration)',
main = paste('loss', name),
ylab = expression(L(V[i])))
matplot(true.error.history, type = 'l', log = 'y', xlab = 'i (iteration)',
main = paste('true error', name),
ylab = expression(group('|', B*B^T - B[i]*B[i]^T, '|')[F] / sqrt(2*k)))
matplot(error.history, type = 'l', log = 'y', xlab = 'i (iteration)',
main = paste('error', name),
ylab = expression(group('|', V[i-1]*V[i-1]^T - V[i]*V[i]^T, '|')[F]))
matplot(tau.history, type = 'l', log = 'y', xlab = 'i (iteration)',
main = paste('learning rate', name),
ylab = expression(tau[i]))
}
cat("Created plot:", path, "\n")