cleanup: removed old and legacy code
This commit is contained in:
parent
9edefe994d
commit
d5cd6075a9
|
@ -1,66 +0,0 @@
|
|||
#initialize model parameterss
|
||||
m<-100 #number of replications in simulation
|
||||
dim<-12 #dimension of random variable X
|
||||
truedim<-2 #dimension of B=b
|
||||
qs<-dim-truedim # dimension of orthogonal complement of B
|
||||
b1=c(1,1,1,1,1,1,0,0,0,0,0,0)/sqrt(6)
|
||||
b2=c(1,-1,1,-1,1,-1,0,0,0,0,0,0)/sqrt(6)
|
||||
b<-cbind(b1,b2)
|
||||
P<-b%*%t(b)
|
||||
sigma=0.5 #error standard deviation
|
||||
N<-200 #sample size
|
||||
K<-30 #number of arbitrary starting values for curvilinear optimization
|
||||
MAXIT<-50 #maximal number of iterations in curvilinear search algorithm
|
||||
##initailaize true covariancematrix of X
|
||||
Sig<-mat.or.vec(dim,dim)
|
||||
for (i in 1:dim){
|
||||
for (j in 1:dim){
|
||||
Sig[i,j]<-sigma^abs(i-j)
|
||||
}
|
||||
}
|
||||
Sroot<-chol(Sig)
|
||||
|
||||
M1_JASA<-mat.or.vec(m,8)
|
||||
colnames(M1_JASA)<-c('CVE1','CVE2','CVE3','meanMAVE','csMAVE','phd','sir','save')
|
||||
#link function for M1
|
||||
fM1<-function(x){return(x[1]/(0.5+(x[2]+1.5)^2))}
|
||||
for (i in 1:m){
|
||||
#generate dat according to M1
|
||||
dat<-creat_sample_nor_nonstand(b,N,fM1,t(Sroot),sigma)
|
||||
#est sample covariance matrix
|
||||
Sig_est<-est_varmat(dat[,-1])
|
||||
#est trace of sample covariance matrix
|
||||
tr<-var_tr(Sig_est)
|
||||
#calculates Vhat_k for CVE1,CVE2, CVE3 for k=qs
|
||||
CVE1<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.8),tr=tr),maxit = MAXIT,sclack_para = 0)
|
||||
CVE2<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(2/3),tr=tr),maxit = MAXIT,sclack_para = 0)
|
||||
CVE3<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.5),tr=tr),maxit = MAXIT,sclack_para = 0)
|
||||
#calculate orthogonal complement of Vhat_k
|
||||
#i.e. CVE1$est_base[,1:truedim] is estimator for B with dimension (dim times (dim-qs))
|
||||
CVE1$est_base<-fill_base(CVE1$est_base)
|
||||
CVE2$est_base<-fill_base(CVE2$est_base)
|
||||
CVE3$est_base<-fill_base(CVE3$est_base)
|
||||
# calculate distance between true B and estimated B
|
||||
M1_JASA[i,1]<-subspace_dist(CVE1$est_base[,1:truedim],b)
|
||||
M1_JASA[i,2]<-subspace_dist(CVE2$est_base[,1:truedim],b)
|
||||
M1_JASA[i,3]<-subspace_dist(CVE3$est_base[,1:truedim],b)
|
||||
#meanMAVE
|
||||
mod_t2<-mave(Y~.,data=as.data.frame(dat),method = 'meanMAVE')
|
||||
M1_JASA[i,4]<-subspace_dist(mod_t2$dir[[truedim]],b)
|
||||
#csMAVE
|
||||
mod_t<-mave(Y~.,data=as.data.frame(dat),method = 'csMAVE')
|
||||
M1_JASA[i,5]<-subspace_dist(mod_t$dir[[truedim]],b)
|
||||
#phd
|
||||
test4<-summary(dr(Y~.,data=as.data.frame(dat),method='phdy',numdir=truedim+1))
|
||||
M1_JASA[i,6]<-subspace_dist(orth(test4$evectors[,1:truedim]),b)
|
||||
#sir
|
||||
test5<-summary(dr(Y~.,data=as.data.frame(dat),method='sir',numdir=truedim+1))
|
||||
M1_JASA[i,7]<-subspace_dist(orth(test5$evectors[,1:truedim]),b)
|
||||
#save
|
||||
test3<-summary(dr(Y~.,data=as.data.frame(dat),method='save',numdir=truedim+1))
|
||||
M1_JASA[i,8]<-subspace_dist(orth(test3$evectors[,1:truedim]),b)
|
||||
print(paste(i,paste('/',m)))
|
||||
}
|
||||
boxplot(M1_JASA[,]/sqrt(2*truedim),names=colnames(M1_JASA),ylab='err',main='M1')
|
||||
summary(M1_JASA[,])
|
||||
|
|
@ -1,60 +0,0 @@
|
|||
#initialize model parameterss
|
||||
m<-100 #number of replications in simulation
|
||||
dim<-12 #dimension of random variable X
|
||||
truedim<-2 #dimension of B=b
|
||||
qs<-dim-truedim # dimension of orthogonal complement of B
|
||||
b1=c(1,rep(0,dim-1))
|
||||
b2=c(0,1,rep(0,dim-2))
|
||||
b<-cbind(b1,b2) #B
|
||||
P<-b%*%t(b)
|
||||
sigma=0.5 #error standard deviation
|
||||
N<-200 #sample size
|
||||
K<-30 #number of arbitrary starting values for curvilinear optimization
|
||||
MAXIT<-50 #maximal number of iterations in curvilinear search algorithm
|
||||
|
||||
|
||||
|
||||
M2_JASA<-mat.or.vec(m,8)
|
||||
colnames(M2_JASA)<-c('CVE1','CVE2','CVE3','meanMAVE','csMAVE','phd','sir','save')
|
||||
f_link<-function(x){return(x[1]*(x[2])^2)}
|
||||
for (i in 1:m){
|
||||
#generate dat according to M2
|
||||
dat<-creat_sample(b,N,f_link,sigma)
|
||||
#est sample covariance matrix
|
||||
Sig_est<-est_varmat(dat[,-1])
|
||||
#est trace of sample covariance matrix
|
||||
tr<-var_tr(Sig_est)
|
||||
#CVE
|
||||
#calculates Vhat_k for CVE1,CVE2, CVE3 for k=qs
|
||||
CVE1<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.8),tr=tr),maxit = MAXIT,sclack_para = 0)
|
||||
CVE2<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(2/3),tr=tr),maxit = MAXIT,sclack_para = 0)
|
||||
CVE3<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.5),tr=tr),maxit = MAXIT,sclack_para = 0)
|
||||
#calculate orthogonal complement of Vhat_k
|
||||
#i.e. CVE1$est_base[,1:truedim] is estimator for B with dimension (dim times (dim-qs))
|
||||
CVE1$est_base<-fill_base(CVE1$est_base)
|
||||
CVE2$est_base<-fill_base(CVE2$est_base)
|
||||
CVE3$est_base<-fill_base(CVE3$est_base)
|
||||
# calculate distance between true B and estimated B
|
||||
M2_JASA[i,1]<-subspace_dist(CVE1$est_base[,1:truedim],b)
|
||||
M2_JASA[i,2]<-subspace_dist(CVE2$est_base[,1:truedim],b)
|
||||
M2_JASA[i,3]<-subspace_dist(CVE3$est_base[,1:truedim],b)
|
||||
#meanMAVE
|
||||
mod_t2<-mave(Y~.,data=as.data.frame(dat),method = 'meanMAVE')
|
||||
M2_JASA[i,4]<-subspace_dist(mod_t2$dir[[truedim]],b)
|
||||
#csMAVE
|
||||
mod_t<-mave(Y~.,data=as.data.frame(dat),method = 'csMAVE')
|
||||
M2_JASA[i,5]<-subspace_dist(mod_t$dir[[truedim]],b)
|
||||
#phd
|
||||
test4<-summary(dr(Y~.,data=as.data.frame(dat),method='phdy',numdir=truedim+1))
|
||||
M2_JASA[i,6]<-subspace_dist(orth(test4$evectors[,1:truedim]),b)
|
||||
#sir
|
||||
test5<-summary(dr(Y~.,data=as.data.frame(dat),method='sir',numdir=truedim+1))
|
||||
M2_JASA[i,7]<-subspace_dist(orth(test5$evectors[,1:truedim]),b)
|
||||
#save
|
||||
test3<-summary(dr(Y~.,data=as.data.frame(dat),method='save',numdir=truedim+1))
|
||||
M2_JASA[i,8]<-subspace_dist(orth(test3$evectors[,1:truedim]),b)
|
||||
print(paste(i,paste('/',m)))
|
||||
}
|
||||
boxplot(M2_JASA[,]/sqrt(2*truedim),names=colnames(M2_JASA),ylab='err',main='M2')
|
||||
summary(M2_JASA[,])
|
||||
|
|
@ -1,65 +0,0 @@
|
|||
#initialize model parameterss
|
||||
m<-100 #number of replications in simulation
|
||||
dim<-12 #dimension of random variable X
|
||||
truedim<-1#dimension of B=b
|
||||
qs<-dim-truedim# dimension of orthogonal complement of B
|
||||
b1=c(1,1,1,1,1,1,0,0,0,0,0,0)/sqrt(6)
|
||||
b<-b1#B
|
||||
P<-b%*%t(b)
|
||||
sigma=0.5#error standard deviation
|
||||
N<-100#sample size
|
||||
K<-30#number of arbitrary starting values for curvilinear optimization
|
||||
MAXIT<-30#maximal number of iterations in curvilinear search algorithm
|
||||
##initailaize true covariancematrix of X
|
||||
Sig<-mat.or.vec(dim,dim)
|
||||
for (i in 1:dim){
|
||||
for (j in 1:dim){
|
||||
Sig[i,j]<-sigma^abs(i-j)
|
||||
}
|
||||
}
|
||||
Sroot<-chol(Sig)
|
||||
|
||||
M3_JASA<-mat.or.vec(m,8)
|
||||
colnames(M3_JASA)<-c('CVE1','CVE2','CVE3','meanMAVE','csMAVE','phd','sir','save')
|
||||
for (i in 1:m){
|
||||
#generate dat according to M3
|
||||
dat<-creat_sample_nor_nonstand(b,N,cos,t(Sroot),sigma) # distribution
|
||||
#est sample covariance matrix
|
||||
Sig_est<-est_varmat(dat[,-1])
|
||||
#est trace of sample covariance matrix
|
||||
tr<-var_tr(Sig_est)
|
||||
#CVE
|
||||
#calculates Vhat_k for CVE1,CVE2, CVE3 for k=qs
|
||||
CVE1<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.8),tr=tr),maxit = MAXIT,sclack_para = 0)
|
||||
CVE2<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(2/3),tr=tr),maxit = MAXIT,sclack_para = 0)
|
||||
CVE3<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.5),tr=tr),maxit = MAXIT,sclack_para = 0)
|
||||
#calculate orthogonal complement of Vhat_k
|
||||
#i.e. CVE1$est_base[,1:truedim] is estimator for B with dimension (dim times (dim-qs))
|
||||
CVE1$est_base<-fill_base(CVE1$est_base)
|
||||
CVE2$est_base<-fill_base(CVE2$est_base)
|
||||
CVE3$est_base<-fill_base(CVE3$est_base)
|
||||
# calculate distance between true B and estimated B
|
||||
M3_JASA[i,1]<-subspace_dist(CVE1$est_base[,1:truedim],b)
|
||||
M3_JASA[i,2]<-subspace_dist(CVE2$est_base[,1:truedim],b)
|
||||
M3_JASA[i,3]<-subspace_dist(CVE3$est_base[,1:truedim],b)
|
||||
#meanMAVE
|
||||
mod_t2<-mave(Y~.,data=as.data.frame(dat),method = 'meanMAVE')
|
||||
M3_JASA[i,4]<-subspace_dist(mod_t2$dir[[truedim]],b)
|
||||
#csMAVE
|
||||
mod_t<-mave(Y~.,data=as.data.frame(dat),method = 'csMAVE')
|
||||
M3_JASA[i,5]<-subspace_dist(mod_t$dir[[truedim]],b)
|
||||
#phd
|
||||
test4<-summary(dr(Y~.,data=as.data.frame(dat),method='phdy',numdir=truedim+1))
|
||||
M3_JASA[i,6]<-subspace_dist(orth(test4$evectors[,1:truedim]),b)
|
||||
#sir
|
||||
test5<-summary(dr(Y~.,data=as.data.frame(dat),method='sir',numdir=truedim+1))
|
||||
M3_JASA[i,7]<-subspace_dist(orth(test5$evectors[,1:truedim]),b)
|
||||
#save
|
||||
test3<-summary(dr(Y~.,data=as.data.frame(dat),method='save',numdir=truedim+1))
|
||||
M3_JASA[i,8]<-subspace_dist(orth(test3$evectors[,1:truedim]),b)
|
||||
|
||||
print(paste(i,paste('/',m)))
|
||||
}
|
||||
boxplot(M3_JASA[,]/sqrt(2*truedim),names=colnames(M3_JASA),ylab='err',main='M3')
|
||||
summary(M3_JASA[,])
|
||||
|
|
@ -1,76 +0,0 @@
|
|||
#initialize model parameters
|
||||
m<-100 #number of replications in simulation
|
||||
dim<-12 #dimension of random variable X
|
||||
truedim<-1#dimension of B=b
|
||||
qs<-dim-truedim# dimension of orthogonal complement of B
|
||||
b1=c(1,1,1,1,1,1,0,0,0,0,0,0)/sqrt(6)
|
||||
b<-b1#B
|
||||
P<-b%*%t(b)
|
||||
sigma=0.5#error standard deviation
|
||||
N<-100#sample size
|
||||
K<-30#number of arbitrary starting values for curvilinear optimization
|
||||
MAXIT<-30#maximal number of iterations in curvilinear search algorithm
|
||||
|
||||
para<-seq(0,1.5,0.5)#model parameter corresponding to lambda in M4
|
||||
mix_prob<-seq(0.3,0.5,0.1) #model parameters corresponding to p_{mix}
|
||||
M4_JASA<-array(data=NA,dim=c(length(para),length(mix_prob),m,5))
|
||||
for(o in 1:length(mix_prob)){
|
||||
for (u in 1:length(para)){
|
||||
colnames(M4_JASA[u,o,,])<-c('CVE1','CVE2','CVE3','meanMAVE','csMAVE')
|
||||
for (i in 1:m){
|
||||
#generate dat according to M4 with p_{mix}=mix_prob[o] and lambda=para[u]
|
||||
if(u==1){dat<-creat_sample(b,N,cos,sigma)}
|
||||
else{dat<-creat_sample_noneliptic_gausmixture(b,N,cos,sigma,mix_prob[o],dispers_para = para[u])}
|
||||
#est sample covariance matrix
|
||||
Sig_est<-est_varmat(dat[,-1])
|
||||
#est trace of sample covariance matrix
|
||||
tr<-var_tr(Sig_est)
|
||||
#CVE
|
||||
#calculates Vhat_k for CVE1,CVE2, CVE3 for k=qs
|
||||
CVE1<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.8),tr=tr),maxit = MAXIT,sclack_para = 0)
|
||||
CVE2<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(2/3),tr=tr),maxit = MAXIT,sclack_para = 0)
|
||||
CVE3<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.5),tr=tr),maxit = MAXIT,sclack_para = 0)
|
||||
#calculate orthogonal complement of Vhat_k
|
||||
#i.e. CVE1$est_base[,1:truedim] is estimator for B with dimension (dim times (dim-qs))
|
||||
CVE1$est_base<-fill_base(CVE1$est_base)
|
||||
CVE2$est_base<-fill_base(CVE2$est_base)
|
||||
CVE3$est_base<-fill_base(CVE3$est_base)
|
||||
# calculate distance between true B and estimated B
|
||||
M4_JASA[u,o,i,1]<-subspace_dist(b,CVE1$est_base[,1:truedim])
|
||||
M4_JASA[u,o,i,2]<-subspace_dist(b,CVE2$est_base[,1:truedim])
|
||||
M4_JASA[u,o,i,3]<-subspace_dist(b,CVE3$est_base[,1:truedim])
|
||||
#csMAVE
|
||||
mod_t<-mave(Y~.,data=as.data.frame(dat),method = 'meanMAVE')
|
||||
M4_JASA[u,o,i,4]<-subspace_dist(b,mod_t$dir[[truedim]])
|
||||
#meanMAVE
|
||||
mod_t2<-mave(Y~.,data=as.data.frame(dat),method = 'csMAVE')
|
||||
M4_JASA[u,o,i,5]<-subspace_dist(b,mod_t2$dir[[truedim]])
|
||||
|
||||
}
|
||||
print(paste(u,paste('/',length(para))))
|
||||
}
|
||||
}
|
||||
|
||||
par(mfrow=c(3,4))
|
||||
boxplot(Exp_cook_square[1,1,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','meanMAVE','csMAVE'),main=paste(paste('dispersion =',para[1]),paste('mixprob =',mix_prob[1]),sep=', '),ylim=c(0,1))
|
||||
boxplot(Exp_cook_square[2,1,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[2]),paste('mixprob =',mix_prob[1]),sep=', '),ylim=c(0,1))
|
||||
boxplot(Exp_cook_square[3,1,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[3]),paste('mixprob =',mix_prob[1]),sep=', '),ylim=c(0,1))
|
||||
boxplot(Exp_cook_square[4,1,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[4]),paste('mixprob =',mix_prob[1]),sep=', '),ylim=c(0,1))
|
||||
|
||||
boxplot(Exp_cook_square[1,2,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[1]),paste('mixprob =',mix_prob[2]),sep=', '),ylim=c(0,1))
|
||||
boxplot(Exp_cook_square[2,2,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[2]),paste('mixprob =',mix_prob[2]),sep=', '),ylim=c(0,1))
|
||||
boxplot(Exp_cook_square[3,2,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[3]),paste('mixprob =',mix_prob[2]),sep=', '),ylim=c(0,1))
|
||||
boxplot(Exp_cook_square[4,2,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[4]),paste('mixprob =',mix_prob[2]),sep=', '),ylim=c(0,1))
|
||||
|
||||
boxplot(Exp_cook_square[1,3,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[1]),paste('mixprob =',mix_prob[3]),sep=', '),ylim=c(0,1))
|
||||
boxplot(Exp_cook_square[2,3,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[2]),paste('mixprob =',mix_prob[3]),sep=', '),ylim=c(0,1))
|
||||
boxplot(Exp_cook_square[3,3,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[3]),paste('mixprob =',mix_prob[3]),sep=', '),ylim=c(0,1))
|
||||
boxplot(Exp_cook_square[4,3,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[4]),paste('mixprob =',mix_prob[3]),sep=', '),ylim=c(0,1))
|
||||
#boxplot(Exp_cook_square[5,3,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[5]),paste('mixprob =',mix_prob[3]),sep=', '),ylim=c(0,1))
|
||||
|
||||
boxplot(Exp_cook_square[1,4,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[1]),paste('mixprob =',mix_prob[4]),sep=', '),ylim=c(0,1))
|
||||
boxplot(Exp_cook_square[2,4,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[2]),paste('mixprob =',mix_prob[4]),sep=', '),ylim=c(0,1))
|
||||
boxplot(Exp_cook_square[3,4,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[3]),paste('mixprob =',mix_prob[4]),sep=', '),ylim=c(0,1))
|
||||
boxplot(Exp_cook_square[4,4,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[4]),paste('mixprob =',mix_prob[4]),sep=', '),ylim=c(0,1))
|
||||
|
||||
par(mfrow=c(1,1))
|
|
@ -1,64 +0,0 @@
|
|||
#initialize model parameters
|
||||
m<-500#number of replications in simulation
|
||||
dim<-12#dimension of random variable X
|
||||
truedim<-1#dimension of B=b
|
||||
qs<-dim-truedim# dimension of orthogonal complement of B
|
||||
b1=c(1,1,1,1,1,1,0,0,0,0,0,0)/sqrt(6)
|
||||
b<-b1#B
|
||||
P<-b%*%t(b)
|
||||
sigma=0.5#error standard deviation
|
||||
N<-42#sample size
|
||||
K<-70#number of arbitrary starting values for curvilinear optimization
|
||||
MAXIT<-50#maximal number of iterations in curvilinear search algorithm
|
||||
|
||||
f_ln1<-function(x){return(2*log(abs(x)+1))}#link function
|
||||
M5_JASA_lowN<-mat.or.vec(m,8)
|
||||
colnames(M5_JASA_lowN)<-c('CVE1','CVE2','CVE3','meanMAVE','csMAVE','phd','sir','save')
|
||||
for (i in 1:m){
|
||||
#generate dat according to M5
|
||||
dat<-creat_sample(b,N,f_ln1,sigma)
|
||||
#est sample covariance matrix
|
||||
Sig_est<-est_varmat(dat[,-1])
|
||||
#est trace of sample covariance matrix
|
||||
tr<-var_tr(Sig_est)
|
||||
#CVE
|
||||
#calculates Vhat_k for CVE1,CVE2, CVE3 for k=qs
|
||||
CVE1<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.8),tr=tr),maxit = MAXIT,sclack_para = 0)
|
||||
CVE2<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(2/3),tr=tr),maxit = MAXIT,sclack_para = 0)
|
||||
CVE3<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.5),tr=tr),maxit = MAXIT,sclack_para = 0)
|
||||
#calculate orthogonal complement of Vhat_k
|
||||
#i.e. CVE1$est_base[,1:truedim] is estimator for B with dimension (dim times (dim-qs))
|
||||
CVE1$est_base<-fill_base(CVE1$est_base)
|
||||
CVE2$est_base<-fill_base(CVE2$est_base)
|
||||
CVE3$est_base<-fill_base(CVE3$est_base)
|
||||
# calculate distance between true B and estimated B
|
||||
M5_JASA_lowN[i,1]<-subspace_dist(CVE1$est_base[,1:truedim],b)
|
||||
M5_JASA_lowN[i,2]<-subspace_dist(CVE2$est_base[,1:truedim],b)
|
||||
M5_JASA_lowN[i,3]<-subspace_dist(CVE3$est_base[,1:truedim],b)
|
||||
#meanMAVE
|
||||
mod_t2<-mave(Y~.,data=as.data.frame(dat),method = 'meanMAVE')
|
||||
M5_JASA_lowN[i,4]<-subspace_dist(mod_t2$dir[[truedim]],b)
|
||||
#csMAVE
|
||||
mod_t<-mave(Y~.,data=as.data.frame(dat),method = 'csMAVE')
|
||||
M5_JASA_lowN[i,5]<-subspace_dist(mod_t$dir[[truedim]],b)
|
||||
#phd
|
||||
test4<-summary(dr(Y~.,data=as.data.frame(dat),method='phdy',numdir=truedim+1))
|
||||
M5_JASA_lowN[i,6]<-subspace_dist(orth(test4$evectors[,1:truedim]),b)
|
||||
#sir
|
||||
test5<-summary(dr(Y~.,data=as.data.frame(dat),method='sir',numdir=truedim+1))
|
||||
M5_JASA_lowN[i,7]<-subspace_dist(orth(test5$evectors[,1:truedim]),b)
|
||||
#save
|
||||
test3<-summary(dr(Y~.,data=as.data.frame(dat),method='save',numdir=truedim+1))
|
||||
M5_JASA_lowN[i,8]<-subspace_dist(orth(test3$evectors[,1:truedim]),b)
|
||||
|
||||
print(paste(i,paste('/',m)))
|
||||
}
|
||||
boxplot(M5_JASA_lowN[,]/sqrt(2*truedim),names=colnames(M5_JASA_lowN),ylab='err',main='M5')
|
||||
summary(M5_JASA_lowN[,])
|
||||
|
||||
####
|
||||
|
||||
# par(mfrow=c(1,2))
|
||||
# boxplot(M3_JASA_L[,]/sqrt(2*1),names=colnames(M3_JASA_L),ylab='err',main='M3')
|
||||
# boxplot(M5_JASA_lowN[1:100,-c(4,5,6)]/sqrt(2*1),names=colnames(M5_JASA_lowN)[-c(4,5,6)],ylab='err',main='M5')
|
||||
# par(mfrow=c(1,1))
|
|
@ -1,113 +0,0 @@
|
|||
|
||||
#C:\Users\Lukas\Desktop\owncloud\Shared\Lukas\CVE
|
||||
install.packages("C:/Users/Lukas/Desktop/owncloud/Shared/Lukas/CVE_1.0.tar.gz", repos=NULL, type="source")
|
||||
install.packages(file.choose(), repos=NULL, type="source")
|
||||
|
||||
dim<-12
|
||||
N<-100
|
||||
s<-0.5
|
||||
dat<-creat_sample(rep(1,dim)/sqrt(dim),N,fsquare,0.5)
|
||||
test<-cve(Y~.,data=as.data.frame(dat),k=1)
|
||||
##############
|
||||
#initialize model parameterss
|
||||
m<-100 #number of replications in simulation
|
||||
dim<-12 #dimension of random variable X
|
||||
truedim<-2 #dimension of B=b
|
||||
qs<-dim-truedim # dimension of orthogonal complement of B
|
||||
b1=c(1,1,1,1,1,1,0,0,0,0,0,0)/sqrt(6)
|
||||
b2=c(1,-1,1,-1,1,-1,0,0,0,0,0,0)/sqrt(6)
|
||||
b<-cbind(b1,b2)
|
||||
#b<-b1
|
||||
P<-b%*%t(b)
|
||||
sigma=0.5 #error standard deviation
|
||||
N<-70 #sample size
|
||||
K<-30 #number of arbitrary starting values for curvilinear optimization
|
||||
MAXIT<-30 #maximal number of iterations in curvilinear search algorithm
|
||||
var_vec<-mat.or.vec(m,12)
|
||||
M1_weight<-mat.or.vec(m,13)
|
||||
#colnames(M1_weight)<-c('CVE1','CVE2','CVE3','CVE1_Rcpp','CVE2_Rcpp','CVE3_Rcpp','meanMAVE','csMAVE','phd','sir','save','CVE4')
|
||||
#link function for M1
|
||||
fM1<-function(x){return(x[1]/(0.5+(x[2]+1.5)^2))}
|
||||
for (i in 1:m){
|
||||
#generate dat according to M1
|
||||
dat<-creat_sample_nor_nonstand(b,N,fsquare,diag(rep(1,dim)),sigma)
|
||||
#est sample covariance matrix
|
||||
Sig_est<-est_varmat(dat[,-1])
|
||||
#est trace of sample covariance matrix
|
||||
tr<-var_tr(Sig_est)
|
||||
|
||||
#calculates Vhat_k for CVE1,CVE2, CVE3 for k=qs
|
||||
CVE1<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.8),tr=tr),maxit = MAXIT,sclack_para = 0)
|
||||
CVE2<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(2/3),tr=tr),maxit = MAXIT,sclack_para = 0)
|
||||
CVE3<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.5),tr=tr),maxit = MAXIT,sclack_para = 0)
|
||||
|
||||
CVE4<-stiefl_weight_partial_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.8),tr=tr),maxit = MAXIT,sclack_para = 0)
|
||||
CVE5<-stiefl_weight_partial_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(2/3),tr=tr),maxit = MAXIT,sclack_para = 0)
|
||||
CVE6<-stiefl_weight_partial_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.5),tr=tr),maxit = MAXIT,sclack_para = 0)
|
||||
#
|
||||
CVE7<-stiefl_weight_full_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.8),tr=tr),maxit = MAXIT,sclack_para = 0)
|
||||
CVE8<-stiefl_weight_full_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(2/3),tr=tr),maxit = MAXIT,sclack_para = 0)
|
||||
CVE9<-stiefl_weight_full_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.5),tr=tr),maxit = MAXIT,sclack_para = 0)
|
||||
#
|
||||
#
|
||||
var_vec[i,1]<-CVE1$var
|
||||
var_vec[i,2]<-CVE2$var
|
||||
var_vec[i,3]<-CVE3$var
|
||||
|
||||
var_vec[i,4]<-CVE4$var
|
||||
var_vec[i,5]<-CVE5$var
|
||||
var_vec[i,6]<-CVE6$var
|
||||
#
|
||||
var_vec[i,7]<-CVE7$var
|
||||
var_vec[i,8]<-CVE8$var
|
||||
var_vec[i,9]<-CVE9$var
|
||||
|
||||
CVE1$est_base<-fill_base(CVE1$est_base)
|
||||
CVE2$est_base<-fill_base(CVE2$est_base)
|
||||
CVE3$est_base<-fill_base(CVE3$est_base)
|
||||
CVE4$est_base<-fill_base(CVE4$est_base)
|
||||
CVE5$est_base<-fill_base(CVE5$est_base)
|
||||
CVE6$est_base<-fill_base(CVE6$est_base)
|
||||
CVE7$est_base<-fill_base(CVE7$est_base)
|
||||
CVE8$est_base<-fill_base(CVE8$est_base)
|
||||
CVE9$est_base<-fill_base(CVE9$est_base)
|
||||
|
||||
# calculate distance between true B and estimated B
|
||||
M1_weight[i,1]<-subspace_dist(CVE1$est_base[,1:truedim],b)
|
||||
M1_weight[i,2]<-subspace_dist(CVE2$est_base[,1:truedim],b)
|
||||
M1_weight[i,3]<-subspace_dist(CVE3$est_base[,1:truedim],b)
|
||||
M1_weight[i,4]<-subspace_dist(CVE4$est_base[,1:truedim],b)
|
||||
M1_weight[i,5]<-subspace_dist(CVE5$est_base[,1:truedim],b)
|
||||
M1_weight[i,6]<-subspace_dist(CVE6$est_base[,1:truedim],b)
|
||||
M1_weight[i,7]<-subspace_dist(CVE7$est_base[,1:truedim],b)
|
||||
M1_weight[i,8]<-subspace_dist(CVE8$est_base[,1:truedim],b)
|
||||
M1_weight[i,9]<-subspace_dist(CVE9$est_base[,1:truedim],b)
|
||||
|
||||
|
||||
|
||||
CVE1_Rcpp<-cve(Y~.,data=as.data.frame(dat),k=truedim,nObs=N^0.8,attempts=K,tol=10^(-3),slack=0)[[2]]
|
||||
CVE2_Rcpp<-cve(Y~.,data=as.data.frame(dat),k=truedim,nObs=N^(2/3),attempts=K,tol=10^(-3),slack=0)[[2]]
|
||||
CVE3_Rcpp<-cve(Y~.,data=as.data.frame(dat),k=truedim,nObs=N^0.5,attempts=K,tol=10^(-3),slack=0)[[2]]
|
||||
|
||||
# CVE4_Rcpp<-cve(Y~.,data=as.data.frame(dat),k=truedim,h=h_opt,attempts=K,tol=10^(-3))[[2]]
|
||||
#M1_Rcpp[i,12]<-subspace_dist(CVE4_Rcpp$B,b)
|
||||
|
||||
var_vec[i,10]<-CVE1_Rcpp$loss
|
||||
var_vec[i,11]<-CVE2_Rcpp$loss
|
||||
var_vec[i,12]<-CVE3_Rcpp$loss
|
||||
|
||||
#calculate orthogonal complement of Vhat_k
|
||||
#i.e. CVE1$est_base[,1:truedim] is estimator for B with dimension (dim times (dim-qs))
|
||||
|
||||
|
||||
M1_weight[i,10]<-subspace_dist(CVE1_Rcpp$B,b)
|
||||
M1_weight[i,11]<-subspace_dist(CVE2_Rcpp$B,b)
|
||||
M1_weight[i,12]<-subspace_dist(CVE3_Rcpp$B,b)
|
||||
#meanMAVE
|
||||
mod_t2<-mave(Y~.,data=as.data.frame(dat),method = 'meanMAVE')
|
||||
M1_weight[i,13]<-subspace_dist(mod_t2$dir[[truedim]],b)
|
||||
|
||||
print(paste(i,paste('/',m)))
|
||||
}
|
||||
boxplot(M1_weight[1:(i-1),]/sqrt(2*truedim),names=colnames(M1_weight),ylab='err',main='M1')
|
||||
summary(M1_weight[1:(i-1),])
|
|
@ -1,189 +0,0 @@
|
|||
library("mda") #library for mars
|
||||
|
||||
local_linear<-function(x,h,dat,beta){
|
||||
Y<-dat[,1]
|
||||
X<-dat[,-1]
|
||||
N<-length(Y)
|
||||
X<-X%*%beta
|
||||
x<-x%*%beta#beta%*%x
|
||||
D_mat<-cbind(rep(1,N),X)
|
||||
if (is.vector(X)){
|
||||
dim<-1
|
||||
d<-abs(X-rep(x,N))
|
||||
}
|
||||
else{
|
||||
dim<-length(X[1,])
|
||||
d<-sqrt(apply(X-t(matrix(rep(x,N),dim,N)),1,norm2))
|
||||
}
|
||||
K<-diag(dnorm(d/h)/dnorm(0))
|
||||
pred<-c(1,x)%*%solve(t(D_mat)%*%K%*%D_mat)%*%t(D_mat)%*%K%*%Y
|
||||
return(pred)
|
||||
}
|
||||
##### performs estimation of small dimesnion by CV with local linear forward regression
|
||||
est_dim_CV<-function(Blist,dat,h_loclin=NULL,dim.max,median_use=F,method_mars=F){
|
||||
#standardize regressors by symmetric root of inverse covariance mat
|
||||
Sig<-est_varmat(dat[,-1])
|
||||
eig_dec<-eigen(Sig)
|
||||
Sroot_inv<-eig_dec$vectors%*%((diag(eig_dec$values^(-1/2))))%*%t(eig_dec$vectors)
|
||||
dat[,-1]<-as.matrix(dat[,-1])%*%Sroot_inv
|
||||
|
||||
N<-length(dat[,1])
|
||||
dim<-length(dat[1,-1])
|
||||
|
||||
MSE<-mat.or.vec(N,dim.max)
|
||||
for(u in 1:dim.max){
|
||||
beta<-Blist[[u]]
|
||||
if(is.null(h_loclin)){
|
||||
#h_loclin<-(1/N)^(1/(3+2*u))
|
||||
h_loclin<-1.2*N^(-1/(4+u))#(1/N)^(1/(3+2*j))
|
||||
|
||||
}
|
||||
|
||||
for(i in 1:N){
|
||||
x<-dat[i,-1]
|
||||
if(method_mars==F){MSE[i,u]<-(dat[i,1]-local_linear(x,h_loclin,as.matrix(dat[-i,]),beta))^2} #predict with local linear
|
||||
if(method_mars==T){
|
||||
dat_fit<-dat[-i,-1]%*%beta
|
||||
X_new<-dat[i,-1]%*%beta
|
||||
mars_mod<-mars(dat_fit,dat[-i,1]) #fit mars model
|
||||
MSE[i,u]<-(dat[i,1]-predict(mars_mod,X_new))^2 #predict with mars model
|
||||
}#predict with mars
|
||||
}
|
||||
}
|
||||
if(median_use){MSE_ave<-apply(MSE,2,median)}
|
||||
else{MSE_ave<-colMeans(MSE)}
|
||||
#apply(MSE,2,median)
|
||||
est_dim<-which.min(MSE_ave)
|
||||
ret<-list(est_dim = est_dim, MSE_ave = MSE_ave, MSE = MSE)
|
||||
return(ret)
|
||||
}
|
||||
###########
|
||||
test_for_dim<-function(Lmat,dim.max=NULL,alpha=0.1,method='greater'){
|
||||
#Lmat... matrix with dimension N times dim.max with columns corresponding to aov_dat for dim 1,2,3,....,max.dim
|
||||
if(is.null(dim.max)){dim.max<-length(Lmat[1,])}
|
||||
|
||||
pval<-mat.or.vec(dim.max-1,1)
|
||||
est_dim<-dim.max
|
||||
# for (j in 1:(dim.max-1)){
|
||||
j<-1
|
||||
while(est_dim==dim.max&j<dim.max){
|
||||
if (method=='greater'){
|
||||
pval[(j)]<-t.test(Lmat[,(j)],Lmat[,(j+1)],alternative = 'greater',paired=T)$p.value#mod[[1]][,5][1]
|
||||
if(pval[j]<alpha){est_dim<-j}
|
||||
}
|
||||
if (method=='lower'){
|
||||
pval[(j)]<-t.test(Lmat[,(j)],Lmat[,(j+1)],alternative = 'less',paired=F)$p.value#mod[[1]][,5][1]
|
||||
if(pval[j]>alpha){est_dim<-j}
|
||||
}
|
||||
j<-j+1
|
||||
|
||||
}
|
||||
ret<-list(est_dim,pval)
|
||||
names(ret)<-c('estdim','pval')
|
||||
return(ret)
|
||||
}
|
||||
####
|
||||
|
||||
##
|
||||
test_for_dim_elbow<-function(Lmat){
|
||||
dim.max<-length(Lmat[1,])
|
||||
ave<-colMeans(Lmat)
|
||||
boxplot(Lmat,xlab='k')
|
||||
lines(seq(1,dim.max),ave,col='red')
|
||||
# tmp<-cbind(ave,seq(1,dim.max))
|
||||
# colnames(tmp)<-c('response','k')
|
||||
# diff<-lm(response~k,data=as.data.frame(tmp))$coefficients[2]
|
||||
#
|
||||
# est_dim<-dim.max
|
||||
# i<-1
|
||||
# while(i<dim.max&est_dim==dim.max){
|
||||
# if(ave[i+1]-ave[i]>diff){est_dim<-i}
|
||||
# i<-i+1
|
||||
# }
|
||||
return(which.min(ave))
|
||||
}
|
||||
######
|
||||
#Small simulation example for truedim =1
|
||||
|
||||
set.seed(21)
|
||||
dim<-6
|
||||
truedim<-1
|
||||
N<-50
|
||||
b<-c(1,0,0,0,0,0)
|
||||
m<-20
|
||||
est_dim<-mat.or.vec(m,7)
|
||||
dim.max<-4
|
||||
for(i in 1:m){
|
||||
dat<-creat_sample(b,N,fsquare,0.5)
|
||||
Blist<-list()
|
||||
Lmat<-mat.or.vec(N,dim.max)
|
||||
for(u in 1:dim.max){ #calculate B for different possible truedim's
|
||||
m1<-stiefl_opt(dat,k=(dim-u)) #original bandwidth selection rule used that controlls number of points in a slice!!!!!!, also choose_h_2
|
||||
Blist[[u]]<-fill_base(m1$est_base)[,1:u]
|
||||
Lmat[,u]<-m1$aov_dat
|
||||
}
|
||||
#estimate truedim with different methods
|
||||
est_dim[i,1]<-est_dim_CV(Blist,dat,dim.max = dim.max,median_use = T)$est_dim
|
||||
est_dim[i,2]<-est_dim_CV(Blist,dat,dim.max = dim.max,median_use = F)$est_dim
|
||||
est_dim[i,3]<-est_dim_CV(Blist,dat,dim.max = dim.max,median_use = F,method_mars = T)$est_dim
|
||||
est_dim[i,4]<-test_for_dim(Lmat)$estdim
|
||||
est_dim[i,5]<-test_for_dim(Lmat,method = 'lower')$estdim
|
||||
est_dim[i,6]<-test_for_dim_elbow(cbind((dat[,1]-mean(dat[,1]))^2,Lmat))-1
|
||||
mod_t<-mave(Y~.,data=as.data.frame(dat),method = 'meanMAVE')
|
||||
est_dim[i,7]<-which.min(mave.dim(mod_t)$cv)
|
||||
|
||||
print(i)
|
||||
}
|
||||
|
||||
length(which(est_dim[,1]==truedim))/m #fraction of where dimension is estimated correctly with mwthod 1 (CV with median)
|
||||
#0.5
|
||||
length(which(est_dim[,2]==truedim))/m #fraction of where dimension is estimated correctly with mwthod 1 (CV with mean)
|
||||
#0.9
|
||||
length(which(est_dim[,3]==truedim))/m #fraction of where dimension is estimated correctly with mwthod 1 (CV with mars and mean)
|
||||
#0.8
|
||||
length(which(est_dim[,4]==truedim))/m #fraction of where dimension is estimated correctly with mwthod 1 (t.test method='greater')
|
||||
#0.0
|
||||
length(which(est_dim[,5]==truedim))/m #fraction of where dimension is estimated correctly with mwthod 1 (t.test method='lower')
|
||||
#0.05
|
||||
length(which(est_dim[,6]==truedim))/m #fraction of where dimension is estimated correctly with mwthod (elbow)
|
||||
#1
|
||||
length(which(est_dim[,7]==truedim))/m #fraction of where dimension is estimated correctly with mave
|
||||
#0.95
|
||||
##########
|
||||
#Small simulation example for truedim =2
|
||||
|
||||
set.seed(21)
|
||||
dim<-6
|
||||
truedim<-2
|
||||
N<-100
|
||||
b<-cbind(c(1,rep(0,dim-1)),c(0,1,rep(0,dim-2)))
|
||||
m<-20
|
||||
est_dim<-mat.or.vec(m,7)
|
||||
dim.max<-4
|
||||
for(i in 1:m){
|
||||
dat<-creat_sample(b,N,function(x){return(x[1]*x[2])},0.5)
|
||||
Blist<-list()
|
||||
Lmat<-mat.or.vec(N,dim.max)
|
||||
for(u in 1:dim.max){
|
||||
m1<-stiefl_opt(dat,k=(dim-u)) #original bandwidth selection used !!!!!!!!!!!!!!!!!!!!!, also choose_h_2
|
||||
Blist[[u]]<-fill_base(m1$est_base)[,1:u]
|
||||
Lmat[,u]<-m1$aov_dat
|
||||
}
|
||||
est_dim[i,1]<-est_dim_CV(Blist,dat,dim.max = dim.max,median_use = T)$est_dim
|
||||
est_dim[i,2]<-est_dim_CV(Blist,dat,dim.max = dim.max,median_use = F)$est_dim
|
||||
est_dim[i,3]<-est_dim_CV(Blist,dat,dim.max = dim.max,median_use = F,method_mars = T)$est_dim
|
||||
est_dim[i,4]<-test_for_dim(Lmat)$estdim
|
||||
est_dim[i,5]<-test_for_dim(Lmat,method = 'lower')$estdim
|
||||
est_dim[i,6]<-test_for_dim_elbow(cbind((dat[,1]-mean(dat[,1]))^2,Lmat))-1
|
||||
mod_t<-mave(Y~.,data=as.data.frame(dat),method = 'meanMAVE')
|
||||
est_dim[i,7]<-which.min(mave.dim(mod_t)$cv)
|
||||
|
||||
print(i)
|
||||
}
|
||||
|
||||
length(which(est_dim[,1]==truedim))/m #fraction of where dimension is estimated correctly with mwthod (CV with median)
|
||||
length(which(est_dim[,2]==truedim))/m #fraction of where dimension is estimated correctly with mwthod (CV with mean)
|
||||
length(which(est_dim[,3]==truedim))/m #fraction of where dimension is estimated correctly with mwthod (CV with mars and mean)
|
||||
length(which(est_dim[,4]==truedim))/m #fraction of where dimension is estimated correctly with mwthod (t.test method='greater')
|
||||
length(which(est_dim[,5]==truedim))/m #fraction of where dimension is estimated correctly with mwthod (t.test method='lower')
|
||||
length(which(est_dim[,6]==truedim))/m #fraction of where dimension is estimated correctly with mwthod (elbow)
|
|
@ -1,382 +0,0 @@
|
|||
library("Matrix")
|
||||
# library("fields")
|
||||
# library('dr')
|
||||
# library('mvtnorm')
|
||||
# library(mgcv)
|
||||
# library(pracma)
|
||||
# library(RVAideMemoire)
|
||||
library("MAVE")
|
||||
# library(pls)
|
||||
# library(LaplacesDemon)
|
||||
# library(earth)
|
||||
# library("latex2exp")
|
||||
|
||||
#################################################
|
||||
|
||||
##############################
|
||||
|
||||
################################
|
||||
|
||||
|
||||
fsquare<-function(x){
|
||||
ret<-0
|
||||
for (h in 1:length(x)){ret<-ret+x[h]^2}
|
||||
return(ret)
|
||||
}
|
||||
f_id<-function(x){return(x[1])}
|
||||
vfun<-function(x)return(c(cos(x),sin(x)))
|
||||
vfun_grad<-function(x)return(c(-sin(x),cos(x)))
|
||||
f_true<-function(x,sigma){return(sigma^2 + cos(x)^2)}
|
||||
###############################
|
||||
##creat sample for M2, b is B matrix of model, N..sample size,
|
||||
#f...link function, sigma..standard deviation of error term
|
||||
#produces sample with X standard normal with dimension to columnlength of b an
|
||||
# epsilon centered normal with standard deviation sigma
|
||||
creat_sample<-function(b,N,f,sigma){
|
||||
if (is.vector(b)==1){dim<-length(b)}
|
||||
if (is.vector(b)==0){dim<-length(b[,1])}
|
||||
|
||||
X<-mat.or.vec(N,dim)
|
||||
Y<-mat.or.vec(N,1)
|
||||
for (i in 1:N){
|
||||
X[i,]<-rnorm(dim,0,1)
|
||||
Y[i]<-f(t(b)%*%X[i,])+rnorm(1,0,sigma)
|
||||
}
|
||||
dat<-cbind(Y,X)
|
||||
return(dat)
|
||||
}
|
||||
#####
|
||||
creat_b<-function(dim,n){
|
||||
temp<-mat.or.vec(dim,n)
|
||||
|
||||
if (n>1){
|
||||
while (rankMatrix(temp,method = 'qr')< min(dim,n)){
|
||||
for (i in 1:n){
|
||||
temp[,i]<-rnorm(dim,0,1)
|
||||
temp[,i]<-temp[,i]/sqrt(sum(temp[,i]^2))
|
||||
}
|
||||
}
|
||||
}
|
||||
if (n==1) {temp<-rnorm(dim,0,1)
|
||||
temp<-temp/sqrt(sum(temp^2))}
|
||||
return(temp)
|
||||
}
|
||||
###
|
||||
|
||||
### creat sample for M1,M3
|
||||
## b is B matrix of model, N..sample size,
|
||||
#f...link function, sigma..standard deviation of error term
|
||||
#produces sample with X normal with Covariancematrix SS' with dimension to columnlength of b an
|
||||
# epsilon centered normal with standard deviation sigma
|
||||
creat_sample_nor_nonstand<-function(b,N,f,S,sigma){
|
||||
#Var(x)= SS'
|
||||
if (is.vector(b)==1){dim<-length(b)}
|
||||
if (is.vector(b)==0){dim<-length(b[,1])}
|
||||
|
||||
X<-mat.or.vec(N,dim)
|
||||
Y<-mat.or.vec(N,1)
|
||||
for (i in 1:N){
|
||||
X[i,]<-rnorm(dim,0,1)%*%t(S)
|
||||
Y[i]<-f(t(b)%*%X[i,])+rnorm(1,0,sigma)
|
||||
}
|
||||
dat<-cbind(Y,X)
|
||||
return(dat)
|
||||
}
|
||||
####
|
||||
|
||||
####
|
||||
|
||||
###
|
||||
####
|
||||
|
||||
####
|
||||
#### creat sample for M4
|
||||
##creat sample for M2, b is B matrix of model, N..sample size,
|
||||
#f...link function, sigma..standard deviation of error term
|
||||
#produces sample with X Gausian mixture
|
||||
#(i.e. X ~ (Zrep(-1,dim) + (1_Z)rep(1,dim))*dispers_para + N(0, I_p) and Z~B(pi)
|
||||
#with dimension to columnlength of b an
|
||||
# epsilon centered normal with standard deviation sigma
|
||||
creat_sample_noneliptic_gausmixture<-function(b,N,f,sigma,pi,dispers_para=1){
|
||||
if (is.vector(b)==1){dim<-length(b)}
|
||||
if (is.vector(b)==0){dim<-length(b[,1])}
|
||||
|
||||
X<-mat.or.vec(N,dim)
|
||||
Y<-mat.or.vec(N,1)
|
||||
for (i in 1:N){
|
||||
p<-rbinom(1,1,pi)
|
||||
X[i,]<-(-rep(1,dim)*p+rep(1,dim)*(1-p))*dispers_para+rnorm(dim,0,1)
|
||||
Y[i]<-f(t(b)%*%X[i,])+rnorm(1,0,sigma)
|
||||
}
|
||||
dat<-cbind(Y,X)
|
||||
return(dat)
|
||||
}
|
||||
###
|
||||
###############################
|
||||
#estimates covariance matrix by ML estimate
|
||||
# X...n times dim data matrix with rows corresponding to X_i
|
||||
est_varmat<-function(X){
|
||||
dim<-length(X[1,])
|
||||
Sig<-mat.or.vec(dim,dim)#Cov(X)
|
||||
X<-scale(X,center = T,scale = F)
|
||||
Sig<-t(X)%*%X/length(X[,1])
|
||||
return(Sig)
|
||||
}
|
||||
####
|
||||
# normalizes a vector x such that it sums to 1
|
||||
column_normalize<-function(x){return(x/sum(x))}
|
||||
####
|
||||
#squared 2-norm of a vector x
|
||||
norm2<-function(x){return(sum(x^2))}
|
||||
###
|
||||
# fills up the matrix estb (dim times q) (dim > q) to a dim times dim orthonormal matrix
|
||||
fill_base<-function(estb){
|
||||
dim<-length(estb[,1])
|
||||
k<-length(estb[1,])
|
||||
P<-(diag(rep(1,dim))-estb%*%t(estb))
|
||||
rk<-0
|
||||
while (rk < dim){
|
||||
tmp1<-P%*%creat_b(dim,1)
|
||||
nor<-sqrt(sum(tmp1^2))
|
||||
if(nor>10^(-4)){
|
||||
estb<-cbind(tmp1/nor,estb)
|
||||
rkn<-rankMatrix(estb,method = 'qr')
|
||||
if (rkn > rk){P<-(diag(rep(1,dim))-estb%*%t(estb))
|
||||
rk<-rkn}
|
||||
}
|
||||
if(nor <=10^(-4)){rk<-0}
|
||||
|
||||
}
|
||||
|
||||
return(estb)
|
||||
}
|
||||
##################
|
||||
###new stiefel optimization
|
||||
#draws from the uniform measure of Stiefel(p,k)
|
||||
#returns a p times k dimensional semiorthogonal matrix
|
||||
stiefl_startval<-function(p,k){return(qr.Q(qr(matrix(rnorm(p*k,0,1),p,k))))}
|
||||
##### chooses shifting points
|
||||
#auxiliary function for stielfel_opt
|
||||
# X... N times dim data matrix
|
||||
#S... sample covariance matrix
|
||||
#0 < p <=1 fraction of data point used as shifting points
|
||||
# if p=1 all data points used as shifting points
|
||||
q_ind<-function(X,S,p){
|
||||
dim<-length(X[1,])
|
||||
N<-length(X[,1])
|
||||
if (p < 1){
|
||||
Sinv<-solve(S)
|
||||
ind<-c()
|
||||
bound<-qchisq(p,dim)
|
||||
for (j in 1:N){if(t(X[j,])%*%Sinv%*%X[j,]<=bound){ind<-c(ind,j)}}#maybe demean
|
||||
q<-length(ind)
|
||||
if (q<=2){
|
||||
q<-N
|
||||
ind<-seq(1,N)}
|
||||
}
|
||||
else {
|
||||
q<-N
|
||||
ind<-seq(1,N)
|
||||
}
|
||||
ret<-list(q,ind)
|
||||
names(ret)<-c('q','ind')
|
||||
return(ret)
|
||||
}
|
||||
####
|
||||
#calculates the trace of a quadratic matrix S
|
||||
var_tr<-function(x){return(sum(diag(x)))}
|
||||
####
|
||||
#chooses bandwith h according to formula in paper
|
||||
#dim...dimension of X vector
|
||||
#k... row dim of V (dim times q matrix) corresponding to a basis of orthogonal complement of B in model
|
||||
# N...sample size
|
||||
#nObs... nObs in bandwith formula
|
||||
#tr...trace of sample covariance matrix of X
|
||||
choose_h_2<-function(dim,k,N,nObs,tr){
|
||||
#h1<-qchisq(nObs/N,2*(dim-k))
|
||||
h2<-qchisq((nObs-1)/(N-1),(dim-k))
|
||||
return(h2*2*tr/dim)
|
||||
}
|
||||
####
|
||||
#auxiliary function for stiefel_opt
|
||||
#X... data matrix of random variable X
|
||||
#q,ind... output of function q_ind
|
||||
Xl_fun<-function(X,q,ind){
|
||||
N<-length(X[,1])
|
||||
Xl<-(kronecker(rep(1,q),X)-kronecker(X[ind,],rep(1,N)))
|
||||
return(Xl)
|
||||
}
|
||||
###########
|
||||
# evaluates L(V) and returns L_n(V),(L_tilde_n(V,X_i))_{i=1,..,n} and grad_V L_n(V) (p times k)
|
||||
# V... (dim times q) matrix
|
||||
# Xl... output of Xl_fun
|
||||
# dtemp...vector with pairwise distances |X_i - X_j|
|
||||
# q...output of q_ind function
|
||||
# Y... vector with N Y_i values
|
||||
# if grad=T, gradient of L(V) also returned
|
||||
LV<-function(V,Xl,dtemp,h,q,Y,grad=T){
|
||||
N<-length(Y)
|
||||
if(is.vector(V)){k<-1}
|
||||
else{k<-length(V[1,])}
|
||||
Xlv<-Xl%*%V
|
||||
d<-dtemp-((Xlv^2)%*%rep(1,k))
|
||||
w<-dnorm(d/h)/dnorm(0)
|
||||
w<-matrix(w,N,q)
|
||||
w<-apply(w,2,column_normalize)
|
||||
mY<-t(w)%*%Y
|
||||
sig<-t(w)%*%(Y^2)-(mY)^2 # \tilde{L}_n(V, s_0) mit s_0 coresponds to q_ind
|
||||
if(grad==T){
|
||||
grad<-mat.or.vec(dim,k)
|
||||
tmp1<-(kronecker(sig,rep(1,N))-(as.vector(kronecker(rep(1,q),Y))-kronecker(mY,rep(1,N)))^2)
|
||||
if(k==1){
|
||||
grad_d<- -2*Xl*as.vector(Xlv)
|
||||
grad<-(1/h^2)*(1/q)*t(grad_d*as.vector(d)*as.vector(w))%*%tmp1
|
||||
}
|
||||
else{
|
||||
for (j in 1:(k)){
|
||||
grad_d<- -2*Xl*as.vector(Xlv[,j])
|
||||
grad[,j]<- (1/h^2)*(1/q)*t(grad_d*as.vector(d)*as.vector(w))%*%tmp1
|
||||
}
|
||||
}
|
||||
ret<-list(mean(sig),sig,grad)
|
||||
names(ret)<-c('var','sig','grad')
|
||||
}
|
||||
else{
|
||||
ret<-list(mean(sig),sig)
|
||||
names(ret)<-c('var','sig')
|
||||
}
|
||||
|
||||
return(ret)
|
||||
}
|
||||
#### performs stiefle optimization of argmin_{V : V'V=I_k} L_n(V)
|
||||
#through curvilinear search with k0 starting values drawn uniformly on stiefel maniquefold
|
||||
#dat...(N times dim+1) matrix with first column corresponding to Y values, the other columns
|
||||
#consists of X data matrix, (i.e. dat=cbind(Y,X))
|
||||
#h... bandwidth
|
||||
#k...row dimension of V that is calculated, corresponds to dimension of orthogonal complement of B
|
||||
#k0... number of arbitrary starting values
|
||||
#p...fraction of data points used as shifting point
|
||||
#maxit... number of maximal iterations in curvilinear search
|
||||
#nObs.. nObs parameter for choosing bandwidth if no h is supplied
|
||||
#lambda_0...initial stepsize
|
||||
#tol...tolerance for stoping iterations
|
||||
#sclack_para...if relative improvment is worse than sclack_para the stepsize is reduced
|
||||
#output:
|
||||
#est_base...Vhat_k= argmin_V:V'V=I_k L_n(V) a (dim times k) matrix where dim is row-dimension of X data matrix
|
||||
#var...value of L_n(Vhat_k)
|
||||
#aov_dat... (L_tilde_n(Vhat_k,X_i))_{i=1,..,N}
|
||||
#count...number of iterations
|
||||
#h...bandwidth
|
||||
#count2...number of times sclack_para is active
|
||||
stiefl_opt<-function(dat,h=NULL,k,k0=30,p=1,maxit=50,nObs=sqrt(length(dat[,1])),lambda_0=1,tol=10^(-3),sclack_para=0){
|
||||
|
||||
history <- matrix(NA, maxit, k0)
|
||||
|
||||
Y<-dat[,1]
|
||||
X<-dat[,-1]
|
||||
N<-length(Y)
|
||||
dim<-length(X[1,])
|
||||
S<-est_varmat(X)
|
||||
if(p<1){
|
||||
|
||||
tmp1<-q_ind(X,S,p)
|
||||
q<-tmp1$q
|
||||
ind<-tmp1$ind
|
||||
}
|
||||
else{
|
||||
q<-N
|
||||
ind<-1:N
|
||||
}
|
||||
Xl<-(kronecker(rep(1,q),X)-kronecker(X[ind,],rep(1,N)))
|
||||
dtemp<-apply(Xl,1,norm2)
|
||||
tr<-var_tr(S)
|
||||
if(is.null(h)){h<-choose_h(dim,k,N,nObs,tr)}
|
||||
best<-exp(10000)
|
||||
Vend<-mat.or.vec(dim,k)
|
||||
sig<-mat.or.vec(q,1)
|
||||
for(u in 1:k0){
|
||||
Vnew<-Vold<-stiefl_startval(dim,k)
|
||||
#print(Vold)
|
||||
#print(LV(Vold,Xl,dtemp,h,q,Y)$var)
|
||||
Lnew<-Lold<-exp(10000)
|
||||
lambda<-lambda_0
|
||||
err<-10
|
||||
count<-0
|
||||
count2<-0
|
||||
while(err>tol&count<maxit){
|
||||
#print(Vold)
|
||||
tmp2<-LV(Vold,Xl,dtemp,h,q,Y)
|
||||
G<-tmp2$grad
|
||||
Lold<-tmp2$var
|
||||
W<-G%*%t(Vold)-Vold%*%t(G)
|
||||
stepsize<-lambda#/(2*sqrt(count+1))
|
||||
Vnew<-solve(diag(1,dim)+stepsize*W)%*%(diag(1,dim)-stepsize*W)%*%Vold
|
||||
# print(Vnew)
|
||||
tmp3<-LV(Vnew,Xl,dtemp,h,q,Y,grad=F)
|
||||
Lnew<-tmp3$var
|
||||
err<-sqrt(sum((Vold%*%t(Vold)-Vnew%*%t(Vnew))^2))/sqrt(2*k)#sqrt(sum(tmp3$grad^2))/(dim*k)#
|
||||
#print(err)
|
||||
if(((Lnew-Lold)/Lold) > sclack_para){#/(count+1)^(0.5)
|
||||
lambda=lambda/2
|
||||
err<-10
|
||||
count2<-count2+1
|
||||
count<-count-1
|
||||
Vnew<-Vold #!!!!!
|
||||
|
||||
}
|
||||
Vold<-Vnew
|
||||
count<-count+1
|
||||
#print(count)
|
||||
|
||||
# Write to history
|
||||
history[count, u] <- Lnew
|
||||
|
||||
}
|
||||
if(best>Lnew){
|
||||
best<-Lnew
|
||||
Vend<-Vnew
|
||||
sig<-tmp3$sig
|
||||
}
|
||||
|
||||
}
|
||||
ret<-list(Vend,best,sig,count,h,count2, history)
|
||||
names(ret)<-c('est_base','var','aov_dat','count','h','count2', 'history')
|
||||
return(ret)
|
||||
}
|
||||
##### calculates distance between two subspaces
|
||||
#i.e. |b1%*%(b1%*%b1')^{-1}%*%b1' - b2%*%(b2%*%b2')^{-1}%*%b2'|_2
|
||||
subspace_dist<-function(b1,b2){
|
||||
#b1<-orth(b1)
|
||||
#b2<-orth(b2)
|
||||
P1<-b1%*%solve(t(b1)%*%b1)%*%t(b1)
|
||||
P2<-b2%*%solve(t(b2)%*%b2)%*%t(b2)
|
||||
return(sqrt(sum((P1-P2)^2)))
|
||||
}
|
||||
####performs local linaer regression
|
||||
#x...datapoint at where estimated function should be evaluated
|
||||
#h...bandwidth
|
||||
#dat..(N times dim+1) matrix with first column corresponding to Y values, the other columns
|
||||
#consists of X data matrix, (i.e. dat=cbind(Y,X))
|
||||
#beta...projection matrix
|
||||
local_linear<-function(x,h,dat,beta){
|
||||
Y<-dat[,1]
|
||||
X<-dat[,-1]
|
||||
N<-length(Y)
|
||||
X<-X%*%beta
|
||||
x<-x%*%beta#beta%*%x
|
||||
D_mat<-cbind(rep(1,N),X)
|
||||
if (is.vector(X)){
|
||||
dim<-1
|
||||
d<-abs(X-rep(x,N))
|
||||
}
|
||||
else{
|
||||
dim<-length(X[1,])
|
||||
d<-sqrt(apply(X-t(matrix(rep(x,N),dim,N)),1,norm2))
|
||||
}
|
||||
K<-diag(dnorm(d/h)/dnorm(0))
|
||||
pred<-c(1,x)%*%solve(t(D_mat)%*%K%*%D_mat)%*%t(D_mat)%*%K%*%Y
|
||||
return(pred)
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -1,236 +0,0 @@
|
|||
LV_weight_partial<-function(V,Xl,dtemp,h,q,Y,grad=T){
|
||||
N<-length(Y)
|
||||
if(is.vector(V)){k<-1}
|
||||
else{k<-length(V[1,])}
|
||||
Xlv<-Xl%*%V
|
||||
d<-dtemp-((Xlv^2)%*%rep(1,k))
|
||||
w<-exp(-0.5*(d/h)^2)
|
||||
w<-matrix(w,N,q)
|
||||
wn<-apply(w,2,sum)-rep(1,q)#new
|
||||
w<-apply(w,2,column_normalize)
|
||||
mY<-t(w)%*%Y
|
||||
sig<-t(w)%*%(Y^2)-(mY)^2
|
||||
W<-(kronecker(t(wn),rep(1,N)))##new
|
||||
if(grad==T){
|
||||
grad<-mat.or.vec(dim,k)
|
||||
tmp1<-(kronecker(sig,rep(1,N))-(as.vector(kronecker(rep(1,q),Y))-kronecker(mY,rep(1,N)))^2)
|
||||
if(k==1){
|
||||
grad_d<- -2*Xl*as.vector(Xlv)
|
||||
grad<-(1/h^2)*(1/sum(wn))*t(grad_d*as.vector(d)*as.vector(w)*as.vector(W))%*%tmp1 #new
|
||||
# wn_grad<-(-1/h^2)*t(grad_d*as.vector(d)*as.vector(w))%*%kronecker(diag(rep(1,q)),rep(1,N))
|
||||
# grad<- wn_grad%*%(sig-rep(var1[2],q))/(sum(wn))+grad
|
||||
}
|
||||
else{
|
||||
for (j in 1:(k)){
|
||||
grad_d<- -2*Xl*as.vector(Xlv[,j])
|
||||
grad[,j]<- (1/h^2)*(1/sum(wn))*t(grad_d*as.vector(d)*as.vector(w)*as.vector(W))%*%tmp1#new
|
||||
# wn_grad<-(-1/h^2)*t(grad_d*as.vector(d)*as.vector(w))%*%kronecker(diag(rep(1,q)),rep(1,N))
|
||||
# grad[,j]<- wn_grad%*%(sig-rep(var1[2],q))/(sum(wn))+grad
|
||||
}
|
||||
}
|
||||
ret<-list(t(wn)%*%sig/sum(wn),sig,grad)#new
|
||||
names(ret)<-c('var','sig','grad')
|
||||
}
|
||||
else{
|
||||
ret<-list(t(wn)%*%sig/sum(wn),sig)#new
|
||||
names(ret)<-c('var','sig')
|
||||
}
|
||||
|
||||
return(ret)
|
||||
}
|
||||
################
|
||||
stiefl_weight_partial_opt<-function(dat,h=NULL,k,k0=30,p=1,maxit=50,nObs=sqrt(length(dat[,1])),lambda_0=1,tol=10^(-3),sclack_para=0){
|
||||
Y<-dat[,1]
|
||||
X<-dat[,-1]
|
||||
N<-length(Y)
|
||||
dim<-length(X[1,])
|
||||
if(p<1){
|
||||
S<-est_varmat(X)
|
||||
tmp1<-q_ind(X,S,p)
|
||||
q<-tmp1$q
|
||||
ind<-tmp1$ind
|
||||
}
|
||||
else{
|
||||
q<-N
|
||||
ind<-1:N
|
||||
}
|
||||
Xl<-(kronecker(rep(1,q),X)-kronecker(X[ind,],rep(1,N)))
|
||||
dtemp<-apply(Xl,1,norm2)
|
||||
if(is.null(h)){
|
||||
S<-est_varmat(X)
|
||||
tr<-var_tr(S)
|
||||
h<-choose_h_2(dim,k,N,nObs,tr)
|
||||
}
|
||||
best<-exp(10000)
|
||||
Vend<-mat.or.vec(dim,k)
|
||||
sig<-mat.or.vec(q,1)
|
||||
for(u in 1:k0){
|
||||
Vnew<-Vold<-stiefl_startval(dim,k)
|
||||
#print(Vold)
|
||||
#print(LV(Vold,Xl,dtemp,h,q,Y)$var)
|
||||
Lnew<-Lold<-exp(10000)
|
||||
lambda<-lambda_0
|
||||
err<-10
|
||||
count<-0
|
||||
count2<-0
|
||||
while(err>tol&count<maxit){
|
||||
#print(Vold)
|
||||
tmp2<-LV_weight_partial(Vold,Xl,dtemp,h,q,Y)
|
||||
G<-tmp2$grad
|
||||
Lold<-tmp2$var
|
||||
W<-G%*%t(Vold)-Vold%*%t(G)
|
||||
stepsize<-lambda#/(2*sqrt(count+1))
|
||||
Vnew<-solve(diag(1,dim)+stepsize*W)%*%(diag(1,dim)-stepsize*W)%*%Vold
|
||||
# print(Vnew)
|
||||
tmp3<-LV_weight_partial(Vnew,Xl,dtemp,h,q,Y,grad=F)
|
||||
Lnew<-tmp3$var
|
||||
err<-sqrt(sum((Vold%*%t(Vold)-Vnew%*%t(Vnew))^2))/sqrt(2*k)#sqrt(sum(tmp3$grad^2))/(dim*k)#
|
||||
#print(err)
|
||||
if(((Lnew-Lold)/Lold) > sclack_para){#/(count+1)^(0.5)
|
||||
lambda=lambda/2
|
||||
err<-10
|
||||
count2<-count2+1
|
||||
count<-count-1
|
||||
Vnew<-Vold #!!!!!
|
||||
|
||||
}
|
||||
Vold<-Vnew
|
||||
count<-count+1
|
||||
#print(count)
|
||||
}
|
||||
if(best>Lnew){
|
||||
best<-Lnew
|
||||
Vend<-Vnew
|
||||
sig<-tmp3$sig
|
||||
}
|
||||
}
|
||||
ret<-list(Vend,best,sig,count,h,count2)
|
||||
names(ret)<-c('est_base','var','aov_dat','count','h','count2')
|
||||
return(ret)
|
||||
}
|
||||
|
||||
#################MAVE, OPG, rMAVE, rOPG from Bing Li book
|
||||
opg=function(x,y,d){
|
||||
p=dim(x)[2];
|
||||
n=dim(x)[1]
|
||||
c0=2.34;
|
||||
p0=max(p,3);
|
||||
rn=n^(-1/(2*(p0+6)));
|
||||
h=c0*n^(-(1/(p0+6)))
|
||||
sig=diag(var(x));
|
||||
x=apply(x,2,standvec)
|
||||
kmat=kern(x,h);
|
||||
bmat=numeric()
|
||||
for(i in 1:dim(x)[1]){
|
||||
wi=kmat[,i];
|
||||
xi=cbind(1,t(t(x)-x[i,]))
|
||||
bmat=cbind(bmat,wls(xi,y,wi)$b)}
|
||||
beta=eigen(bmat%*%t(bmat))$vectors[,1:d]
|
||||
return(diag(sig^(-1/2))%*%beta)
|
||||
}
|
||||
######################
|
||||
wls=function(x,y,w){
|
||||
n=dim(x)[1];
|
||||
p=dim(x)[2]-1
|
||||
out=c(solve(t(x*w)%*%x/n)%*%apply(x*y*w,2,mean))
|
||||
return(list(a=out[1],b=out[2:(p+1)]))
|
||||
}
|
||||
#################
|
||||
kern=function(x,h){
|
||||
x=as.matrix(x);
|
||||
n=dim(x)[1]
|
||||
k2=x%*%t(x);
|
||||
k1=t(matrix(diag(k2),n,n));
|
||||
k3=t(k1);
|
||||
k=k1-2*k2+k3
|
||||
return(exp(-(1/(2*h^2))*(k1-2*k2+k3)))
|
||||
}
|
||||
###############
|
||||
standvec=function(x) return((x-mean(x))/sd(x))
|
||||
##############
|
||||
mave2=function(x,y,h,d,nit){
|
||||
sig=diag(var(x));
|
||||
n=dim(x)[1];
|
||||
p=dim(x)[2]
|
||||
x=apply(x,2,standvec);
|
||||
beta=opg(x,y,d);#beta=opg(x,y,h,d);
|
||||
kermat=kern(x,h)
|
||||
for(iit in 1:nit){
|
||||
b=numeric();
|
||||
a=numeric();
|
||||
for(i in 1:n){
|
||||
wi=kermat[,i]/(apply(kermat,2,mean)[i])
|
||||
ui=cbind(1,t(t(x)-x[i,])%*%beta)
|
||||
out=wls(ui,y,wi);
|
||||
a=c(a,out$a);b=cbind(b,out$b)}
|
||||
out=0;out1=0;
|
||||
for(i in 1:n){
|
||||
xi=kronecker(t(t(x)-x[i,]),t(b[,i]))
|
||||
yi=y-a[i];
|
||||
wi=kermat[,i]/apply(kermat,2,mean)[i]
|
||||
out=out+apply(xi*yi*wi,2,mean)
|
||||
out1=out1+t(xi*wi)%*%xi/n}
|
||||
beta=t(matrix(solve(out1)%*%out,d,p))
|
||||
}
|
||||
return(diag(sig^(-1/2))%*%beta)
|
||||
}
|
||||
######################
|
||||
rmave=function(x,y,d,nit){
|
||||
sig=diag(var(x));
|
||||
n=dim(x)[1];
|
||||
p=dim(x)[2]
|
||||
x=apply(x,2,standvec)
|
||||
c0=2.34;
|
||||
p0=max(p,3);
|
||||
h=c0*n^(-(1/(p0+6)));
|
||||
rn=n^(-1/(2*(p0+6)))
|
||||
beta=opg(x,y,d)
|
||||
for(iit in 1:nit){
|
||||
kermat=kern(x%*%beta,h);
|
||||
mkermat=apply(kermat,2,mean)
|
||||
b=numeric();a=numeric()
|
||||
for(i in 1:n){
|
||||
wi=kermat[,i]/mkermat[i];
|
||||
ui=cbind(1,t(t(x)-x[i,])%*%beta)
|
||||
out=wls(ui,y,wi);
|
||||
a=c(a,out$a);b=cbind(b,out$b)
|
||||
}
|
||||
out=0;
|
||||
out1=0
|
||||
for(i in 1:n) {
|
||||
xi=kronecker(t(t(x)-x[i,]),t(b[,i]));
|
||||
yi=y-a[i]
|
||||
wi=kermat[,i]/mkermat[i]
|
||||
out=out+apply(xi*yi*wi,2,mean)
|
||||
out1=out1+t(xi*wi)%*%xi/n}
|
||||
beta=t(matrix(solve(out1)%*%out,d,p))
|
||||
h=max(rn*h,c0*n^((-1/(d+4))))
|
||||
}
|
||||
return(diag(sig^(-1/2))%*%beta)
|
||||
}
|
||||
#########################
|
||||
ropg=function(x,y,d,nit){
|
||||
sig=diag(var(x));
|
||||
x=apply(x,2,standvec);
|
||||
p=dim(x)[2];
|
||||
n=dim(x)[1]
|
||||
c0=2.34;
|
||||
p0=max(p,3);
|
||||
rn=n^(-1/(2*(p0+6)));
|
||||
h=c0*n^(-(1/(p0+6)))
|
||||
beta=diag(p)
|
||||
for(iit in 1:nit){
|
||||
kmat=kern(x%*%beta,h);
|
||||
bmat=numeric()
|
||||
for(i in 1:dim(x)[1]){
|
||||
wi=kmat[,i];
|
||||
xi=cbind(1,t(t(x)-x[i,]))
|
||||
bmat=cbind(bmat,wls(xi,y,wi)$b)
|
||||
}
|
||||
beta=eigen(bmat%*%t(bmat))$vectors[,1:d]
|
||||
h=max(rn*h,c0*n^((-1/(d+4))))
|
||||
}
|
||||
|
||||
beta.final=diag(sig^(-1/2))%*%beta
|
||||
return(beta.final)
|
||||
}
|
|
@ -1,315 +0,0 @@
|
|||
LV_weight_partial<-function(V,Xl,dtemp,h,q,Y,grad=T){
|
||||
N<-length(Y)
|
||||
if(is.vector(V)){k<-1}
|
||||
else{k<-length(V[1,])}
|
||||
Xlv<-Xl%*%V
|
||||
d<-dtemp-((Xlv^2)%*%rep(1,k))
|
||||
w<-exp(-0.5*(d/h)^2)
|
||||
w<-matrix(w,N,q)
|
||||
wn<-apply(w,2,sum)-rep(1,q)#new
|
||||
w<-apply(w,2,column_normalize)
|
||||
mY<-t(w)%*%Y
|
||||
sig<-t(w)%*%(Y^2)-(mY)^2
|
||||
W<-(kronecker(t(wn),rep(1,N)))##new
|
||||
if(grad==T){
|
||||
grad<-mat.or.vec(dim,k)
|
||||
tmp1<-(kronecker(sig,rep(1,N))-(as.vector(kronecker(rep(1,q),Y))-kronecker(mY,rep(1,N)))^2)
|
||||
if(k==1){
|
||||
grad_d<- -2*Xl*as.vector(Xlv)
|
||||
grad<-(1/h^2)*(1/sum(wn))*t(grad_d*as.vector(d)*as.vector(w)*as.vector(W))%*%tmp1 #new
|
||||
# wn_grad<-(-1/h^2)*t(grad_d*as.vector(d)*as.vector(w))%*%kronecker(diag(rep(1,q)),rep(1,N))
|
||||
# grad<- wn_grad%*%(sig-rep(var1[2],q))/(sum(wn))+grad
|
||||
}
|
||||
else{
|
||||
for (j in 1:(k)){
|
||||
grad_d<- -2*Xl*as.vector(Xlv[,j])
|
||||
grad[,j]<- (1/h^2)*(1/sum(wn))*t(grad_d*as.vector(d)*as.vector(w)*as.vector(W))%*%tmp1#new
|
||||
# wn_grad<-(-1/h^2)*t(grad_d*as.vector(d)*as.vector(w))%*%kronecker(diag(rep(1,q)),rep(1,N))
|
||||
# grad[,j]<- wn_grad%*%(sig-rep(var1[2],q))/(sum(wn))+grad
|
||||
}
|
||||
}
|
||||
ret<-list(t(wn)%*%sig/sum(wn),sig,grad)#new
|
||||
names(ret)<-c('var','sig','grad')
|
||||
}
|
||||
else{
|
||||
ret<-list(t(wn)%*%sig/sum(wn),sig)#new
|
||||
names(ret)<-c('var','sig')
|
||||
}
|
||||
|
||||
return(ret)
|
||||
}
|
||||
################
|
||||
stiefl_weight_partial_opt<-function(dat,h=NULL,k,k0=30,p=1,maxit=50,nObs=sqrt(length(dat[,1])),lambda_0=1,tol=10^(-3),sclack_para=0){
|
||||
Y<-dat[,1]
|
||||
X<-dat[,-1]
|
||||
N<-length(Y)
|
||||
dim<-length(X[1,])
|
||||
if(p<1){
|
||||
S<-est_varmat(X)
|
||||
tmp1<-q_ind(X,S,p)
|
||||
q<-tmp1$q
|
||||
ind<-tmp1$ind
|
||||
}
|
||||
else{
|
||||
q<-N
|
||||
ind<-1:N
|
||||
}
|
||||
Xl<-(kronecker(rep(1,q),X)-kronecker(X[ind,],rep(1,N)))
|
||||
dtemp<-apply(Xl,1,norm2)
|
||||
if(is.null(h)){
|
||||
S<-est_varmat(X)
|
||||
tr<-var_tr(S)
|
||||
h<-choose_h_2(dim,k,N,nObs,tr)
|
||||
}
|
||||
best<-exp(10000)
|
||||
Vend<-mat.or.vec(dim,k)
|
||||
sig<-mat.or.vec(q,1)
|
||||
for(u in 1:k0){
|
||||
Vnew<-Vold<-stiefl_startval(dim,k)
|
||||
#print(Vold)
|
||||
#print(LV(Vold,Xl,dtemp,h,q,Y)$var)
|
||||
Lnew<-Lold<-exp(10000)
|
||||
lambda<-lambda_0
|
||||
err<-10
|
||||
count<-0
|
||||
count2<-0
|
||||
while(err>tol&count<maxit){
|
||||
#print(Vold)
|
||||
tmp2<-LV_weight_partial(Vold,Xl,dtemp,h,q,Y)
|
||||
G<-tmp2$grad
|
||||
Lold<-tmp2$var
|
||||
W<-G%*%t(Vold)-Vold%*%t(G)
|
||||
stepsize<-lambda#/(2*sqrt(count+1))
|
||||
Vnew<-solve(diag(1,dim)+stepsize*W)%*%(diag(1,dim)-stepsize*W)%*%Vold
|
||||
# print(Vnew)
|
||||
tmp3<-LV_weight_partial(Vnew,Xl,dtemp,h,q,Y,grad=F)
|
||||
Lnew<-tmp3$var
|
||||
err<-sqrt(sum((Vold%*%t(Vold)-Vnew%*%t(Vnew))^2))/sqrt(2*k)#sqrt(sum(tmp3$grad^2))/(dim*k)#
|
||||
#print(err)
|
||||
if(((Lnew-Lold)/Lold) > sclack_para){#/(count+1)^(0.5)
|
||||
lambda=lambda/2
|
||||
err<-10
|
||||
count2<-count2+1
|
||||
count<-count-1
|
||||
Vnew<-Vold #!!!!!
|
||||
|
||||
}
|
||||
Vold<-Vnew
|
||||
count<-count+1
|
||||
#print(count)
|
||||
}
|
||||
if(best>Lnew){
|
||||
best<-Lnew
|
||||
Vend<-Vnew
|
||||
sig<-tmp3$sig
|
||||
}
|
||||
}
|
||||
ret<-list(Vend,best,sig,count,h,count2)
|
||||
names(ret)<-c('est_base','var','aov_dat','count','h','count2')
|
||||
return(ret)
|
||||
}
|
||||
|
||||
#################MAVE, OPG, rMAVE, rOPG from Bing Li book
|
||||
opg=function(x,y,d){
|
||||
p=dim(x)[2];
|
||||
n=dim(x)[1]
|
||||
c0=2.34;
|
||||
p0=max(p,3);
|
||||
rn=n^(-1/(2*(p0+6)));
|
||||
h=c0*n^(-(1/(p0+6)))
|
||||
sig=diag(var(x));
|
||||
x=apply(x,2,standvec)
|
||||
kmat=kern(x,h);
|
||||
bmat=numeric()
|
||||
for(i in 1:dim(x)[1]){
|
||||
wi=kmat[,i];
|
||||
xi=cbind(1,t(t(x)-x[i,]))
|
||||
bmat=cbind(bmat,wls(xi,y,wi)$b)}
|
||||
beta=eigen(bmat%*%t(bmat))$vectors[,1:d]
|
||||
return(diag(sig^(-1/2))%*%beta)
|
||||
}
|
||||
#######################
|
||||
stiefl_opt_momentum<-function(dat,h=NULL,k,k0=30,p=1,maxit=50,nObs=sqrt(length(dat[,1])),lambda_0=1,tol=10^(-3),sclack_para=0,momentum_para=0.8){
|
||||
Y<-dat[,1]
|
||||
X<-dat[,-1]
|
||||
N<-length(Y)
|
||||
dim<-length(X[1,])
|
||||
if(p<1){
|
||||
S<-est_varmat(X)
|
||||
tmp1<-q_ind(X,S,p)
|
||||
q<-tmp1$q
|
||||
ind<-tmp1$ind
|
||||
}
|
||||
else{
|
||||
q<-N
|
||||
ind<-1:N
|
||||
}
|
||||
Xl<-(kronecker(rep(1,q),X)-kronecker(X[ind,],rep(1,N)))
|
||||
dtemp<-apply(Xl,1,norm2)
|
||||
if(is.null(h)){
|
||||
S<-est_varmat(X)
|
||||
tr<-var_tr(S)
|
||||
h<-choose_h_2(dim,k,N,nObs,tr)
|
||||
}
|
||||
best<-exp(10000)
|
||||
Vend<-mat.or.vec(dim,k)
|
||||
sig<-mat.or.vec(q,1)
|
||||
for(u in 1:k0){
|
||||
Vold<-stiefl_startval(dim,k)
|
||||
#print(Vold)
|
||||
#print(LV(Vold,Xl,dtemp,h,q,Y)$var)
|
||||
Lnew<-Lold<-exp(10000)
|
||||
lambda<-lambda_0
|
||||
err<-10
|
||||
count<-0
|
||||
count2<-0
|
||||
Lnew<-LV(Vold,Xl,dtemp,h,q,Y)$var
|
||||
#print(Lnew)
|
||||
if(best>Lnew){
|
||||
best<-Lnew
|
||||
Vend<-Vold
|
||||
#sig<-tmp3$sig
|
||||
}
|
||||
}
|
||||
Vnew<-Vold<-Vend
|
||||
|
||||
G<-matrix(rep(0,dim*k),dim,k)
|
||||
while(err>tol&count<maxit){
|
||||
#print(Vold)
|
||||
tmp2<-LV(Vold,Xl,dtemp,h,q,Y)
|
||||
#G<-tmp2$grad
|
||||
G<-(1-momentum_para)*G + momentum_para*tmp2$grad
|
||||
Lold<-tmp2$var
|
||||
W<-G%*%t(Vold)-Vold%*%t(G)
|
||||
stepsize<-lambda#/(2*sqrt(count+1))
|
||||
Vnew<-solve(diag(1,dim)+stepsize*W)%*%(diag(1,dim)-stepsize*W)%*%Vold
|
||||
# print(Vnew)
|
||||
tmp3<-LV(Vnew,Xl,dtemp,h,q,Y,grad=F)
|
||||
Lnew<-tmp3$var
|
||||
err<-sqrt(sum((Vold%*%t(Vold)-Vnew%*%t(Vnew))^2))/sqrt(2*k)#sqrt(sum(tmp3$grad^2))/(dim*k)#
|
||||
#print(err)
|
||||
if(((Lnew-Lold)/Lold) > sclack_para){#/(count+1)^(0.5)
|
||||
lambda=lambda/2
|
||||
err<-10
|
||||
count2<-count2+1
|
||||
count<-count-1
|
||||
Vnew<-Vold #!!!!!
|
||||
Lnew<-Lold
|
||||
|
||||
}
|
||||
Vold<-Vnew
|
||||
count<-count+1
|
||||
|
||||
#print(count)
|
||||
}
|
||||
|
||||
ret<-list(Vnew,Lnew,count,h,count2)
|
||||
names(ret)<-c('est_base','var','count','h','count2')
|
||||
return(ret)
|
||||
}
|
||||
######################
|
||||
wls=function(x,y,w){
|
||||
n=dim(x)[1];
|
||||
p=dim(x)[2]-1
|
||||
out=c(solve(t(x*w)%*%x/n)%*%apply(x*y*w,2,mean))
|
||||
return(list(a=out[1],b=out[2:(p+1)]))
|
||||
}
|
||||
#################
|
||||
kern=function(x,h){
|
||||
x=as.matrix(x);
|
||||
n=dim(x)[1]
|
||||
k2=x%*%t(x);
|
||||
k1=t(matrix(diag(k2),n,n));
|
||||
k3=t(k1);
|
||||
k=k1-2*k2+k3
|
||||
return(exp(-(1/(2*h^2))*(k1-2*k2+k3)))
|
||||
}
|
||||
###############
|
||||
standvec=function(x) return((x-mean(x))/sd(x))
|
||||
##############
|
||||
mave2=function(x,y,h,d,nit){
|
||||
sig=diag(var(x));
|
||||
n=dim(x)[1];
|
||||
p=dim(x)[2]
|
||||
x=apply(x,2,standvec);
|
||||
beta=opg(x,y,d);#beta=opg(x,y,h,d);
|
||||
kermat=kern(x,h)
|
||||
for(iit in 1:nit){
|
||||
b=numeric();
|
||||
a=numeric();
|
||||
for(i in 1:n){
|
||||
wi=kermat[,i]/(apply(kermat,2,mean)[i])
|
||||
ui=cbind(1,t(t(x)-x[i,])%*%beta)
|
||||
out=wls(ui,y,wi);
|
||||
a=c(a,out$a);b=cbind(b,out$b)}
|
||||
out=0;out1=0;
|
||||
for(i in 1:n){
|
||||
xi=kronecker(t(t(x)-x[i,]),t(b[,i]))
|
||||
yi=y-a[i];
|
||||
wi=kermat[,i]/apply(kermat,2,mean)[i]
|
||||
out=out+apply(xi*yi*wi,2,mean)
|
||||
out1=out1+t(xi*wi)%*%xi/n}
|
||||
beta=t(matrix(solve(out1)%*%out,d,p))
|
||||
}
|
||||
return(diag(sig^(-1/2))%*%beta)
|
||||
}
|
||||
######################
|
||||
rmave=function(x,y,d,nit){
|
||||
sig=diag(var(x));
|
||||
n=dim(x)[1];
|
||||
p=dim(x)[2]
|
||||
x=apply(x,2,standvec)
|
||||
c0=2.34;
|
||||
p0=max(p,3);
|
||||
h=c0*n^(-(1/(p0+6)));
|
||||
rn=n^(-1/(2*(p0+6)))
|
||||
beta=opg(x,y,d)
|
||||
for(iit in 1:nit){
|
||||
kermat=kern(x%*%beta,h);
|
||||
mkermat=apply(kermat,2,mean)
|
||||
b=numeric();a=numeric()
|
||||
for(i in 1:n){
|
||||
wi=kermat[,i]/mkermat[i];
|
||||
ui=cbind(1,t(t(x)-x[i,])%*%beta)
|
||||
out=wls(ui,y,wi);
|
||||
a=c(a,out$a);b=cbind(b,out$b)
|
||||
}
|
||||
out=0;
|
||||
out1=0
|
||||
for(i in 1:n) {
|
||||
xi=kronecker(t(t(x)-x[i,]),t(b[,i]));
|
||||
yi=y-a[i]
|
||||
wi=kermat[,i]/mkermat[i]
|
||||
out=out+apply(xi*yi*wi,2,mean)
|
||||
out1=out1+t(xi*wi)%*%xi/n}
|
||||
beta=t(matrix(solve(out1)%*%out,d,p))
|
||||
h=max(rn*h,c0*n^((-1/(d+4))))
|
||||
}
|
||||
return(diag(sig^(-1/2))%*%beta)
|
||||
}
|
||||
#########################
|
||||
ropg=function(x,y,d,nit){
|
||||
sig=diag(var(x));
|
||||
x=apply(x,2,standvec);
|
||||
p=dim(x)[2];
|
||||
n=dim(x)[1]
|
||||
c0=2.34;
|
||||
p0=max(p,3);
|
||||
rn=n^(-1/(2*(p0+6)));
|
||||
h=c0*n^(-(1/(p0+6)))
|
||||
beta=diag(p)
|
||||
for(iit in 1:nit){
|
||||
kmat=kern(x%*%beta,h);
|
||||
bmat=numeric()
|
||||
for(i in 1:dim(x)[1]){
|
||||
wi=kmat[,i];
|
||||
xi=cbind(1,t(t(x)-x[i,]))
|
||||
bmat=cbind(bmat,wls(xi,y,wi)$b)
|
||||
}
|
||||
beta=eigen(bmat%*%t(bmat))$vectors[,1:d]
|
||||
h=max(rn*h,c0*n^((-1/(d+4))))
|
||||
}
|
||||
|
||||
beta.final=diag(sig^(-1/2))%*%beta
|
||||
return(beta.final)
|
||||
}
|
|
@ -1,222 +0,0 @@
|
|||
LV_weight_partial<-function(V,Xl,dtemp,h,q,Y,grad=T){
|
||||
N<-length(Y)
|
||||
if(is.vector(V)){k<-1}
|
||||
else{k<-length(V[1,])}
|
||||
Xlv<-Xl%*%V
|
||||
d<-dtemp-((Xlv^2)%*%rep(1,k))
|
||||
w<-exp(-0.5*(d/h)^2)#dnorm(d/h)/dnorm(0)
|
||||
w<-matrix(w,N,q)
|
||||
wn<-apply(w,2,sum)#new
|
||||
w<-apply(w,2,column_normalize)
|
||||
mY<-t(w)%*%Y
|
||||
sig<-t(w)%*%(Y^2)-(mY)^2
|
||||
W<-diag(kronecker(t(wn),rep(1,N)))##new
|
||||
if(grad==T){
|
||||
grad<-mat.or.vec(dim,k)
|
||||
tmp1<-(kronecker(sig,rep(1,N))-(as.vector(kronecker(rep(1,q),Y))-kronecker(mY,rep(1,N)))^2)
|
||||
if(k==1){
|
||||
grad_d<- -2*Xl*as.vector(Xlv)
|
||||
grad<-(1/h^2)*(1/sum(wn))*t(grad_d*as.vector(d)*as.vector(w)*as.vector(W))%*%tmp1 #new
|
||||
# wn_grad<-(-1/h^2)*t(grad_d*as.vector(d)*as.vector(w))%*%kronecker(diag(rep(1,q)),rep(1,N))
|
||||
# grad<- wn_grad%*%(sig-rep(var1[2],q))/(sum(wn))+grad
|
||||
}
|
||||
else{
|
||||
for (j in 1:(k)){
|
||||
grad_d<- -2*Xl*as.vector(Xlv[,j])
|
||||
grad[,j]<- (1/h^2)*(1/sum(wn))*t(grad_d*as.vector(d)*as.vector(w)*as.vector(W))%*%tmp1#new
|
||||
# wn_grad<-(-1/h^2)*t(grad_d*as.vector(d)*as.vector(w))%*%kronecker(diag(rep(1,q)),rep(1,N))
|
||||
# grad[,j]<- wn_grad%*%(sig-rep(var1[2],q))/(sum(wn))+grad
|
||||
}
|
||||
}
|
||||
ret<-list(t(wn)%*%sig/sum(wn),sig,grad)#new
|
||||
names(ret)<-c('var','sig','grad')
|
||||
}
|
||||
else{
|
||||
ret<-list(t(wn)%*%sig/sum(wn),sig)#new
|
||||
names(ret)<-c('var','sig')
|
||||
}
|
||||
|
||||
return(ret)
|
||||
}
|
||||
######
|
||||
LV_weight_full<-function(V,Xl,dtemp,h,q,Y,grad=T){
|
||||
N<-length(Y)
|
||||
if(is.vector(V)){k<-1}
|
||||
else{k<-length(V[1,])}
|
||||
Xlv<-Xl%*%V
|
||||
d<-dtemp-((Xlv^2)%*%rep(1,k))
|
||||
w<-exp(-0.5*(d/h)^2)#dnorm(d/h)/dnorm(0)
|
||||
w<-matrix(w,N,q)
|
||||
wn<-apply(w,2,sum)#new
|
||||
w<-apply(w,2,column_normalize)
|
||||
mY<-t(w)%*%Y
|
||||
sig<-t(w)%*%(Y^2)-(mY)^2
|
||||
W<-(kronecker(t(wn),rep(1,N)))#new or ###diag(kronecker(t(wn),rep(1,N)))
|
||||
var<-t(wn)%*%sig/sum(wn)#new
|
||||
if(grad==T){
|
||||
grad<-mat.or.vec(dim,k)
|
||||
tmp1<-(kronecker(sig,rep(1,N))-(as.vector(kronecker(rep(1,q),Y))-kronecker(mY,rep(1,N)))^2)
|
||||
if(k==1){
|
||||
grad_d<- -2*Xl*as.vector(Xlv)
|
||||
grad<-(1/h^2)*(1/sum(wn))*t(grad_d*as.vector(d)*as.vector(w)*as.vector(W))%*%tmp1#new
|
||||
wn_grad<-(-1/h^2)*t(grad_d*as.vector(d)*as.vector(w))%*%kronecker(diag(rep(1,q)),rep(1,N))#new
|
||||
grad<- wn_grad%*%(sig-rep(var,q))/(sum(wn))+grad#new
|
||||
}
|
||||
else{
|
||||
for (j in 1:(k)){
|
||||
grad_d<- -2*Xl*as.vector(Xlv[,j])
|
||||
grad[,j]<- (1/h^2)*(1/sum(wn))*t(grad_d*as.vector(d)*as.vector(w)*as.vector(W))%*%tmp1#new
|
||||
wn_grad<-(-1/h^2)*t(grad_d*as.vector(d)*as.vector(w))%*%kronecker(diag(rep(1,q)),rep(1,N))#new
|
||||
grad[,j]<- wn_grad%*%(sig-rep(var,q))/(sum(wn))+grad[,j]#new
|
||||
}
|
||||
}
|
||||
ret<-list(var,sig,grad)
|
||||
names(ret)<-c('var','sig','grad')
|
||||
}
|
||||
else{
|
||||
ret<-list(var,sig)
|
||||
names(ret)<-c('var','sig')
|
||||
}
|
||||
|
||||
return(ret)
|
||||
}
|
||||
####
|
||||
stiefl_weight_partial_opt<-function(dat,h=NULL,k,k0=30,p=1,maxit=50,nObs=sqrt(length(dat[,1])),lambda_0=1,tol=10^(-3),sclack_para=0){
|
||||
Y<-dat[,1]
|
||||
X<-dat[,-1]
|
||||
N<-length(Y)
|
||||
dim<-length(X[1,])
|
||||
if(p<1){
|
||||
S<-est_varmat(X)
|
||||
tmp1<-q_ind(X,S,p)
|
||||
q<-tmp1$q
|
||||
ind<-tmp1$ind
|
||||
}
|
||||
else{
|
||||
q<-N
|
||||
ind<-1:N
|
||||
}
|
||||
Xl<-(kronecker(rep(1,q),X)-kronecker(X[ind,],rep(1,N)))
|
||||
dtemp<-apply(Xl,1,norm2)
|
||||
if(is.null(h)){
|
||||
S<-est_varmat(X)
|
||||
tr<-var_tr(S)
|
||||
h<-choose_h_2(dim,k,N,nObs,tr)
|
||||
}
|
||||
best<-exp(10000)
|
||||
Vend<-mat.or.vec(dim,k)
|
||||
sig<-mat.or.vec(q,1)
|
||||
for(u in 1:k0){
|
||||
Vnew<-Vold<-stiefl_startval(dim,k)
|
||||
#print(Vold)
|
||||
#print(LV(Vold,Xl,dtemp,h,q,Y)$var)
|
||||
Lnew<-Lold<-exp(10000)
|
||||
lambda<-lambda_0
|
||||
err<-10
|
||||
count<-0
|
||||
count2<-0
|
||||
while(err>tol&count<maxit){
|
||||
#print(Vold)
|
||||
tmp2<-LV_weight_partial(Vold,Xl,dtemp,h,q,Y)
|
||||
G<-tmp2$grad
|
||||
Lold<-tmp2$var
|
||||
W<-G%*%t(Vold)-Vold%*%t(G)
|
||||
stepsize<-lambda#/(2*sqrt(count+1))
|
||||
Vnew<-solve(diag(1,dim)+stepsize*W)%*%(diag(1,dim)-stepsize*W)%*%Vold
|
||||
# print(Vnew)
|
||||
tmp3<-LV_weight_partial(Vnew,Xl,dtemp,h,q,Y,grad=F)
|
||||
Lnew<-tmp3$var
|
||||
err<-sqrt(sum((Vold%*%t(Vold)-Vnew%*%t(Vnew))^2))/sqrt(2*k)#sqrt(sum(tmp3$grad^2))/(dim*k)#
|
||||
#print(err)
|
||||
if(((Lnew-Lold)/Lold) > sclack_para){#/(count+1)^(0.5)
|
||||
lambda=lambda/2
|
||||
err<-10
|
||||
count2<-count2+1
|
||||
count<-count-1
|
||||
Vnew<-Vold #!!!!!
|
||||
|
||||
}
|
||||
Vold<-Vnew
|
||||
count<-count+1
|
||||
#print(count)
|
||||
}
|
||||
if(best>Lnew){
|
||||
best<-Lnew
|
||||
Vend<-Vnew
|
||||
sig<-tmp3$sig
|
||||
}
|
||||
}
|
||||
ret<-list(Vend,best,sig,count,h,count2)
|
||||
names(ret)<-c('est_base','var','aov_dat','count','h','count2')
|
||||
return(ret)
|
||||
}
|
||||
###########
|
||||
stiefl_weight_full_opt<-function(dat,h=NULL,k,k0=30,p=1,maxit=50,nObs=sqrt(length(dat[,1])),lambda_0=1,tol=10^(-3),sclack_para=0){
|
||||
Y<-dat[,1]
|
||||
X<-dat[,-1]
|
||||
N<-length(Y)
|
||||
dim<-length(X[1,])
|
||||
if(p<1){
|
||||
S<-est_varmat(X)
|
||||
tmp1<-q_ind(X,S,p)
|
||||
q<-tmp1$q
|
||||
ind<-tmp1$ind
|
||||
}
|
||||
else{
|
||||
q<-N
|
||||
ind<-1:N
|
||||
}
|
||||
Xl<-(kronecker(rep(1,q),X)-kronecker(X[ind,],rep(1,N)))
|
||||
dtemp<-apply(Xl,1,norm2)
|
||||
if(is.null(h)){
|
||||
S<-est_varmat(X)
|
||||
tr<-var_tr(S)
|
||||
h<-choose_h_2(dim,k,N,nObs,tr)
|
||||
}
|
||||
best<-exp(10000)
|
||||
Vend<-mat.or.vec(dim,k)
|
||||
sig<-mat.or.vec(q,1)
|
||||
for(u in 1:k0){
|
||||
Vnew<-Vold<-stiefl_startval(dim,k)
|
||||
#print(Vold)
|
||||
#print(LV(Vold,Xl,dtemp,h,q,Y)$var)
|
||||
Lnew<-Lold<-exp(10000)
|
||||
lambda<-lambda_0
|
||||
err<-10
|
||||
count<-0
|
||||
count2<-0
|
||||
while(err>tol&count<maxit){
|
||||
#print(Vold)
|
||||
tmp2<-LV_weight_full(Vold,Xl,dtemp,h,q,Y)
|
||||
G<-tmp2$grad
|
||||
Lold<-tmp2$var
|
||||
W<-G%*%t(Vold)-Vold%*%t(G)
|
||||
stepsize<-lambda#/(2*sqrt(count+1))
|
||||
Vnew<-solve(diag(1,dim)+stepsize*W)%*%(diag(1,dim)-stepsize*W)%*%Vold
|
||||
# print(Vnew)
|
||||
tmp3<-LV_weight_full(Vnew,Xl,dtemp,h,q,Y,grad=F)
|
||||
Lnew<-tmp3$var
|
||||
err<-sqrt(sum((Vold%*%t(Vold)-Vnew%*%t(Vnew))^2))/sqrt(2*k)#sqrt(sum(tmp3$grad^2))/(dim*k)#
|
||||
#print(err)
|
||||
if(((Lnew-Lold)/Lold) > sclack_para){#/(count+1)^(0.5)
|
||||
lambda=lambda/2
|
||||
err<-10
|
||||
count2<-count2+1
|
||||
count<-count-1
|
||||
Vnew<-Vold #!!!!!
|
||||
|
||||
}
|
||||
Vold<-Vnew
|
||||
count<-count+1
|
||||
#print(count)
|
||||
}
|
||||
if(best>Lnew){
|
||||
best<-Lnew
|
||||
Vend<-Vnew
|
||||
sig<-tmp3$sig
|
||||
}
|
||||
}
|
||||
ret<-list(Vend,best,sig,count,h,count2)
|
||||
names(ret)<-c('est_base','var','aov_dat','count','h','count2')
|
||||
return(ret)
|
||||
}
|
128
runtime_test.R
128
runtime_test.R
|
@ -1,128 +0,0 @@
|
|||
# Usage:
|
||||
# ~$ Rscript runtime_test.R
|
||||
|
||||
textplot <- function(...) {
|
||||
text <- unlist(list(...))
|
||||
if (length(text) > 20) {
|
||||
text <- c(text[1:17],
|
||||
' ...... (skipped, text too long) ......',
|
||||
text[c(-1, 0) + length(text)])
|
||||
}
|
||||
|
||||
plot(NA, xlim = c(0, 1), ylim = c(0, 1),
|
||||
bty = 'n', xaxt = 'n', yaxt = 'n', xlab = '', ylab = '')
|
||||
|
||||
for (i in seq_along(text)) {
|
||||
text(0, 1 - (i / 20),
|
||||
text[[i]], pos = 4)
|
||||
}
|
||||
}
|
||||
|
||||
# library(CVEpureR) # load CVE's pure R implementation
|
||||
library(CVE) # load CVE
|
||||
|
||||
#' Writes log information to console. (to not get bored^^)
|
||||
tell.user <- function(name, start, i, length) {
|
||||
cat("\rRunning Test (", name, "):",
|
||||
i, "/", length,
|
||||
" - elapsed:", format(Sys.time() - start), "\033[K")
|
||||
}
|
||||
#' Computes "distance" of spanned subspaces.
|
||||
#' @param B1 Semi-orthonormal basis matrix
|
||||
#' @param B2 Semi-orthonormal basis matrix
|
||||
#' @return Frobenius norm of subspace projection matrix diff.
|
||||
subspace.dist <- function(B1, B2){
|
||||
P1 <- tcrossprod(B1, B1)
|
||||
P2 <- tcrossprod(B2, B2)
|
||||
return(norm(P1 - P2, type = 'F'))
|
||||
}
|
||||
|
||||
# Set random seed
|
||||
set.seed(437)
|
||||
|
||||
# Number of simulations
|
||||
SIM.NR <- 50L
|
||||
# maximal number of iterations in curvilinear search algorithm
|
||||
MAXIT <- 50L
|
||||
# number of arbitrary starting values for curvilinear optimization
|
||||
ATTEMPTS <- 10L
|
||||
# set names of datasets
|
||||
ds.names <- paste0("M", seq(7))
|
||||
# Set used CVE method
|
||||
methods <- c("simple", "weighted") # c("legacy", "simple", "linesearch", "sgd")
|
||||
|
||||
# Setup error and time tracking variables
|
||||
error <- matrix(NA, SIM.NR, length(methods) * length(ds.names))
|
||||
time <- matrix(NA, SIM.NR, ncol(error))
|
||||
colnames(error) <- kronecker(paste0(ds.names, '-'), methods, paste0)
|
||||
colnames(time) <- colnames(error)
|
||||
|
||||
# Create new log file and write CSV (actualy TSV) header.
|
||||
# (do not overwrite existing logs)
|
||||
log.nr <- length(list.files('tmp/', pattern = '.*\\.log'))
|
||||
file <- file.path('tmp', paste0('test', log.nr, '.log'))
|
||||
cat('dataset\tmethod\terror\ttime\n', sep = '', file = file)
|
||||
# Open a new pdf device for plotting into (do not overwrite existing ones)
|
||||
path <- paste0('test', log.nr, '.pdf')
|
||||
pdf(file.path('tmp', path))
|
||||
cat('Plotting to file:', path, '\n')
|
||||
|
||||
# only for telling user (to stdout)
|
||||
count <- 0
|
||||
start <- Sys.time()
|
||||
# Start simulation loop.
|
||||
for (sim in 1:SIM.NR) {
|
||||
# Repeat for each dataset.
|
||||
for (name in ds.names) {
|
||||
tell.user(name, start, (count <- count + 1), SIM.NR * length(ds.names))
|
||||
|
||||
# Create a new dataset
|
||||
ds <- dataset(name)
|
||||
# Prepare X, Y and combine to data matrix
|
||||
Y <- ds$Y
|
||||
X <- ds$X
|
||||
data <- cbind(Y, X)
|
||||
# get dimensions
|
||||
k <- ncol(ds$B)
|
||||
|
||||
for (method in methods) {
|
||||
dr.time <- system.time(
|
||||
dr <- cve.call(X, Y,
|
||||
method = method,
|
||||
k = k,
|
||||
attempts = ATTEMPTS
|
||||
)
|
||||
)
|
||||
dr$B <- coef(dr, k)
|
||||
|
||||
key <- paste0(name, '-', method)
|
||||
error[sim, key] <- subspace.dist(dr$B, ds$B) / sqrt(2 * k)
|
||||
time[sim, key] <- dr.time["elapsed"]
|
||||
|
||||
# Log results to file (mostly for long running simulations)
|
||||
cat(paste0(
|
||||
c(name, method, error[sim, key], time[sim, key]),
|
||||
collapse = '\t'
|
||||
), '\n',
|
||||
sep = '', file = file, append = TRUE
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cat("\n\n## Time [sec] Summary:\n")
|
||||
print(summary(time))
|
||||
cat("\n## Error Summary:\n")
|
||||
print(summary(error))
|
||||
|
||||
boxplot(error,
|
||||
main = paste0("Error (Nr of simulations ", SIM.NR, ")"),
|
||||
ylab = "Error",
|
||||
las = 2
|
||||
)
|
||||
boxplot(time,
|
||||
main = paste0("Time (Nr of simulations ", SIM.NR, ")"),
|
||||
ylab = "Time [sec]",
|
||||
las = 2
|
||||
)
|
||||
|
120
test.R
120
test.R
|
@ -1,120 +0,0 @@
|
|||
textplot <- function(...) {
|
||||
text <- unlist(list(...))
|
||||
if (length(text) > 20) {
|
||||
text <- c(text[1:17],
|
||||
' ...... (skipped, text too long) ......',
|
||||
text[c(-1, 0) + length(text)])
|
||||
}
|
||||
|
||||
plot(NA, xlim = c(0, 1), ylim = c(0, 1),
|
||||
bty = 'n', xaxt = 'n', yaxt = 'n', xlab = '', ylab = '')
|
||||
|
||||
for (i in seq_along(text)) {
|
||||
text(0, 1 - (i / 20),
|
||||
text[[i]], pos = 4)
|
||||
}
|
||||
}
|
||||
|
||||
args <- commandArgs(TRUE)
|
||||
if (length(args) > 0L) {
|
||||
method <- args[1]
|
||||
} else {
|
||||
method <- "simple"
|
||||
}
|
||||
if (length(args) > 1L) {
|
||||
momentum <- as.double(args[2])
|
||||
} else {
|
||||
momentum <- 0.0
|
||||
}
|
||||
seed <- 42
|
||||
max.iter <- 50L
|
||||
attempts <- 25L
|
||||
|
||||
library(CVE)
|
||||
path <- paste0('~/Projects/CVE/tmp/logger_', method, '.C.pdf')
|
||||
|
||||
# Define logger for `cve()` method.
|
||||
logger <- function(attempt, iter, data) {
|
||||
# Note the `<<-` assignement!
|
||||
loss.history[iter + 1, attempt] <<- data$loss
|
||||
error.history[iter + 1, attempt] <<- data$err
|
||||
tau.history[iter + 1, attempt] <<- data$tau
|
||||
# Compute true error by comparing to the true `B`
|
||||
B.est <- null(data$V) # Function provided by CVE
|
||||
P.est <- B.est %*% solve(t(B.est) %*% B.est) %*% t(B.est)
|
||||
true.error <- norm(P - P.est, 'F') / sqrt(2 * k)
|
||||
true.error.history[iter + 1, attempt] <<- true.error
|
||||
}
|
||||
|
||||
pdf(path, width = 8.27, height = 11.7) # width, height unit is inces -> A4
|
||||
layout(matrix(c(1, 1,
|
||||
2, 3,
|
||||
4, 5), nrow = 3, byrow = TRUE))
|
||||
|
||||
for (name in paste0("M", seq(7))) {
|
||||
# Seed random number generator
|
||||
set.seed(seed)
|
||||
|
||||
# Create a dataset
|
||||
ds <- dataset(name)
|
||||
X <- ds$X
|
||||
Y <- ds$Y
|
||||
B <- ds$B # the true `B`
|
||||
k <- ncol(ds$B)
|
||||
# True projection matrix.
|
||||
P <- B %*% solve(t(B) %*% B) %*% t(B)
|
||||
|
||||
# Setup histories.
|
||||
V_last <- NULL
|
||||
loss.history <- matrix(NA, max.iter + 1, attempts)
|
||||
error.history <- matrix(NA, max.iter + 1, attempts)
|
||||
tau.history <- matrix(NA, max.iter + 1, attempts)
|
||||
true.error.history <- matrix(NA, max.iter + 1, attempts)
|
||||
|
||||
time <- system.time(
|
||||
dr <- cve(Y ~ X, k = k, method = method,
|
||||
momentum = momentum,
|
||||
max.iter = max.iter, attempts = attempts,
|
||||
logger = logger)
|
||||
)["elapsed"]
|
||||
|
||||
# Extract finaly selected values:
|
||||
B.est <- coef(dr, k)
|
||||
true.error <- norm(tcrossprod(B.est) - tcrossprod(B), 'F') / sqrt(2 * k)
|
||||
loss <- dr$res[[as.character(k)]]$loss
|
||||
|
||||
# Write metadata.
|
||||
textplot(
|
||||
paste0("Seed value: ", seed),
|
||||
"",
|
||||
paste0("Dataset Name: ", ds$name),
|
||||
paste0("dim(X) = (", nrow(X), ", ", ncol(X), ")"),
|
||||
paste0("dim(B) = (", nrow(B), ", ", ncol(B), ")"),
|
||||
"",
|
||||
paste0("CVE method: ", dr$method),
|
||||
paste0("Max Iterations: ", max.iter),
|
||||
paste0("Attempts: ", attempts),
|
||||
paste0("Momentum: ", momentum),
|
||||
"CVE call:",
|
||||
paste0(" > ", format(dr$call)),
|
||||
"",
|
||||
paste0("True Error: ", round(true.error, 3)),
|
||||
paste0("loss: ", round(loss, 3)),
|
||||
paste0("time: ", round(time, 3), " s")
|
||||
)
|
||||
# Plot history's
|
||||
matplot(loss.history, type = 'l', log = 'y', xlab = 'i (iteration)',
|
||||
main = paste('loss', name),
|
||||
ylab = expression(L(V[i])))
|
||||
matplot(true.error.history, type = 'l', log = 'y', xlab = 'i (iteration)',
|
||||
main = paste('true error', name),
|
||||
ylab = expression(group('|', B*B^T - B[i]*B[i]^T, '|')[F] / sqrt(2*k)))
|
||||
matplot(error.history, type = 'l', log = 'y', xlab = 'i (iteration)',
|
||||
main = paste('error', name),
|
||||
ylab = expression(group('|', V[i-1]*V[i-1]^T - V[i]*V[i]^T, '|')[F]))
|
||||
matplot(tau.history, type = 'l', log = 'y', xlab = 'i (iteration)',
|
||||
main = paste('learning rate', name),
|
||||
ylab = expression(tau[i]))
|
||||
}
|
||||
|
||||
cat("Created plot:", path, "\n")
|
Loading…
Reference in New Issue