From d5cd6075a99d5e682db378d10e2f3543eaf25f5c Mon Sep 17 00:00:00 2001 From: daniel Date: Mon, 16 Dec 2019 17:40:30 +0100 Subject: [PATCH] cleanup: removed old and legacy code --- CVE_legacy/M1.R | 66 ----- CVE_legacy/M2.R | 60 ----- CVE_legacy/M3.R | 65 ----- CVE_legacy/M4.R | 76 ------ CVE_legacy/M5.R | 64 ----- CVE_legacy/Test_weigthed_cve.R | 113 -------- CVE_legacy/estimation_of_dimension.R | 189 ------------- CVE_legacy/function_script.R | 382 --------------------------- CVE_legacy/function_script_2.R | 236 ----------------- CVE_legacy/function_script_3.R | 315 ---------------------- CVE_legacy/stiefel_weight_package.R | 222 ---------------- runtime_test.R | 128 --------- test.R | 120 --------- 13 files changed, 2036 deletions(-) delete mode 100755 CVE_legacy/M1.R delete mode 100755 CVE_legacy/M2.R delete mode 100755 CVE_legacy/M3.R delete mode 100755 CVE_legacy/M4.R delete mode 100755 CVE_legacy/M5.R delete mode 100644 CVE_legacy/Test_weigthed_cve.R delete mode 100644 CVE_legacy/estimation_of_dimension.R delete mode 100755 CVE_legacy/function_script.R delete mode 100644 CVE_legacy/function_script_2.R delete mode 100644 CVE_legacy/function_script_3.R delete mode 100644 CVE_legacy/stiefel_weight_package.R delete mode 100644 runtime_test.R delete mode 100644 test.R diff --git a/CVE_legacy/M1.R b/CVE_legacy/M1.R deleted file mode 100755 index 9f2c73c..0000000 --- a/CVE_legacy/M1.R +++ /dev/null @@ -1,66 +0,0 @@ -#initialize model parameterss -m<-100 #number of replications in simulation -dim<-12 #dimension of random variable X -truedim<-2 #dimension of B=b -qs<-dim-truedim # dimension of orthogonal complement of B -b1=c(1,1,1,1,1,1,0,0,0,0,0,0)/sqrt(6) -b2=c(1,-1,1,-1,1,-1,0,0,0,0,0,0)/sqrt(6) -b<-cbind(b1,b2) -P<-b%*%t(b) -sigma=0.5 #error standard deviation -N<-200 #sample size -K<-30 #number of arbitrary starting values for curvilinear optimization -MAXIT<-50 #maximal number of iterations in curvilinear search algorithm -##initailaize true covariancematrix of X -Sig<-mat.or.vec(dim,dim) -for (i in 1:dim){ - for (j in 1:dim){ - Sig[i,j]<-sigma^abs(i-j) - } -} -Sroot<-chol(Sig) - -M1_JASA<-mat.or.vec(m,8) -colnames(M1_JASA)<-c('CVE1','CVE2','CVE3','meanMAVE','csMAVE','phd','sir','save') -#link function for M1 -fM1<-function(x){return(x[1]/(0.5+(x[2]+1.5)^2))} -for (i in 1:m){ - #generate dat according to M1 - dat<-creat_sample_nor_nonstand(b,N,fM1,t(Sroot),sigma) - #est sample covariance matrix - Sig_est<-est_varmat(dat[,-1]) - #est trace of sample covariance matrix - tr<-var_tr(Sig_est) - #calculates Vhat_k for CVE1,CVE2, CVE3 for k=qs - CVE1<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.8),tr=tr),maxit = MAXIT,sclack_para = 0) - CVE2<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(2/3),tr=tr),maxit = MAXIT,sclack_para = 0) - CVE3<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.5),tr=tr),maxit = MAXIT,sclack_para = 0) - #calculate orthogonal complement of Vhat_k - #i.e. CVE1$est_base[,1:truedim] is estimator for B with dimension (dim times (dim-qs)) - CVE1$est_base<-fill_base(CVE1$est_base) - CVE2$est_base<-fill_base(CVE2$est_base) - CVE3$est_base<-fill_base(CVE3$est_base) - # calculate distance between true B and estimated B - M1_JASA[i,1]<-subspace_dist(CVE1$est_base[,1:truedim],b) - M1_JASA[i,2]<-subspace_dist(CVE2$est_base[,1:truedim],b) - M1_JASA[i,3]<-subspace_dist(CVE3$est_base[,1:truedim],b) - #meanMAVE - mod_t2<-mave(Y~.,data=as.data.frame(dat),method = 'meanMAVE') - M1_JASA[i,4]<-subspace_dist(mod_t2$dir[[truedim]],b) - #csMAVE - mod_t<-mave(Y~.,data=as.data.frame(dat),method = 'csMAVE') - M1_JASA[i,5]<-subspace_dist(mod_t$dir[[truedim]],b) - #phd - test4<-summary(dr(Y~.,data=as.data.frame(dat),method='phdy',numdir=truedim+1)) - M1_JASA[i,6]<-subspace_dist(orth(test4$evectors[,1:truedim]),b) - #sir - test5<-summary(dr(Y~.,data=as.data.frame(dat),method='sir',numdir=truedim+1)) - M1_JASA[i,7]<-subspace_dist(orth(test5$evectors[,1:truedim]),b) - #save - test3<-summary(dr(Y~.,data=as.data.frame(dat),method='save',numdir=truedim+1)) - M1_JASA[i,8]<-subspace_dist(orth(test3$evectors[,1:truedim]),b) - print(paste(i,paste('/',m))) -} -boxplot(M1_JASA[,]/sqrt(2*truedim),names=colnames(M1_JASA),ylab='err',main='M1') -summary(M1_JASA[,]) - diff --git a/CVE_legacy/M2.R b/CVE_legacy/M2.R deleted file mode 100755 index 5232dc8..0000000 --- a/CVE_legacy/M2.R +++ /dev/null @@ -1,60 +0,0 @@ -#initialize model parameterss -m<-100 #number of replications in simulation -dim<-12 #dimension of random variable X -truedim<-2 #dimension of B=b -qs<-dim-truedim # dimension of orthogonal complement of B -b1=c(1,rep(0,dim-1)) -b2=c(0,1,rep(0,dim-2)) -b<-cbind(b1,b2) #B -P<-b%*%t(b) -sigma=0.5 #error standard deviation -N<-200 #sample size -K<-30 #number of arbitrary starting values for curvilinear optimization -MAXIT<-50 #maximal number of iterations in curvilinear search algorithm - - - -M2_JASA<-mat.or.vec(m,8) -colnames(M2_JASA)<-c('CVE1','CVE2','CVE3','meanMAVE','csMAVE','phd','sir','save') -f_link<-function(x){return(x[1]*(x[2])^2)} -for (i in 1:m){ - #generate dat according to M2 - dat<-creat_sample(b,N,f_link,sigma) - #est sample covariance matrix - Sig_est<-est_varmat(dat[,-1]) - #est trace of sample covariance matrix - tr<-var_tr(Sig_est) - #CVE - #calculates Vhat_k for CVE1,CVE2, CVE3 for k=qs - CVE1<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.8),tr=tr),maxit = MAXIT,sclack_para = 0) - CVE2<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(2/3),tr=tr),maxit = MAXIT,sclack_para = 0) - CVE3<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.5),tr=tr),maxit = MAXIT,sclack_para = 0) - #calculate orthogonal complement of Vhat_k - #i.e. CVE1$est_base[,1:truedim] is estimator for B with dimension (dim times (dim-qs)) - CVE1$est_base<-fill_base(CVE1$est_base) - CVE2$est_base<-fill_base(CVE2$est_base) - CVE3$est_base<-fill_base(CVE3$est_base) - # calculate distance between true B and estimated B - M2_JASA[i,1]<-subspace_dist(CVE1$est_base[,1:truedim],b) - M2_JASA[i,2]<-subspace_dist(CVE2$est_base[,1:truedim],b) - M2_JASA[i,3]<-subspace_dist(CVE3$est_base[,1:truedim],b) - #meanMAVE - mod_t2<-mave(Y~.,data=as.data.frame(dat),method = 'meanMAVE') - M2_JASA[i,4]<-subspace_dist(mod_t2$dir[[truedim]],b) - #csMAVE - mod_t<-mave(Y~.,data=as.data.frame(dat),method = 'csMAVE') - M2_JASA[i,5]<-subspace_dist(mod_t$dir[[truedim]],b) - #phd - test4<-summary(dr(Y~.,data=as.data.frame(dat),method='phdy',numdir=truedim+1)) - M2_JASA[i,6]<-subspace_dist(orth(test4$evectors[,1:truedim]),b) - #sir - test5<-summary(dr(Y~.,data=as.data.frame(dat),method='sir',numdir=truedim+1)) - M2_JASA[i,7]<-subspace_dist(orth(test5$evectors[,1:truedim]),b) - #save - test3<-summary(dr(Y~.,data=as.data.frame(dat),method='save',numdir=truedim+1)) - M2_JASA[i,8]<-subspace_dist(orth(test3$evectors[,1:truedim]),b) - print(paste(i,paste('/',m))) -} -boxplot(M2_JASA[,]/sqrt(2*truedim),names=colnames(M2_JASA),ylab='err',main='M2') -summary(M2_JASA[,]) - diff --git a/CVE_legacy/M3.R b/CVE_legacy/M3.R deleted file mode 100755 index 89735c0..0000000 --- a/CVE_legacy/M3.R +++ /dev/null @@ -1,65 +0,0 @@ -#initialize model parameterss -m<-100 #number of replications in simulation -dim<-12 #dimension of random variable X -truedim<-1#dimension of B=b -qs<-dim-truedim# dimension of orthogonal complement of B -b1=c(1,1,1,1,1,1,0,0,0,0,0,0)/sqrt(6) -b<-b1#B -P<-b%*%t(b) -sigma=0.5#error standard deviation -N<-100#sample size -K<-30#number of arbitrary starting values for curvilinear optimization -MAXIT<-30#maximal number of iterations in curvilinear search algorithm -##initailaize true covariancematrix of X -Sig<-mat.or.vec(dim,dim) -for (i in 1:dim){ - for (j in 1:dim){ - Sig[i,j]<-sigma^abs(i-j) - } -} -Sroot<-chol(Sig) - -M3_JASA<-mat.or.vec(m,8) -colnames(M3_JASA)<-c('CVE1','CVE2','CVE3','meanMAVE','csMAVE','phd','sir','save') -for (i in 1:m){ - #generate dat according to M3 - dat<-creat_sample_nor_nonstand(b,N,cos,t(Sroot),sigma) # distribution - #est sample covariance matrix - Sig_est<-est_varmat(dat[,-1]) - #est trace of sample covariance matrix - tr<-var_tr(Sig_est) - #CVE - #calculates Vhat_k for CVE1,CVE2, CVE3 for k=qs - CVE1<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.8),tr=tr),maxit = MAXIT,sclack_para = 0) - CVE2<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(2/3),tr=tr),maxit = MAXIT,sclack_para = 0) - CVE3<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.5),tr=tr),maxit = MAXIT,sclack_para = 0) - #calculate orthogonal complement of Vhat_k - #i.e. CVE1$est_base[,1:truedim] is estimator for B with dimension (dim times (dim-qs)) - CVE1$est_base<-fill_base(CVE1$est_base) - CVE2$est_base<-fill_base(CVE2$est_base) - CVE3$est_base<-fill_base(CVE3$est_base) - # calculate distance between true B and estimated B - M3_JASA[i,1]<-subspace_dist(CVE1$est_base[,1:truedim],b) - M3_JASA[i,2]<-subspace_dist(CVE2$est_base[,1:truedim],b) - M3_JASA[i,3]<-subspace_dist(CVE3$est_base[,1:truedim],b) - #meanMAVE - mod_t2<-mave(Y~.,data=as.data.frame(dat),method = 'meanMAVE') - M3_JASA[i,4]<-subspace_dist(mod_t2$dir[[truedim]],b) - #csMAVE - mod_t<-mave(Y~.,data=as.data.frame(dat),method = 'csMAVE') - M3_JASA[i,5]<-subspace_dist(mod_t$dir[[truedim]],b) - #phd - test4<-summary(dr(Y~.,data=as.data.frame(dat),method='phdy',numdir=truedim+1)) - M3_JASA[i,6]<-subspace_dist(orth(test4$evectors[,1:truedim]),b) - #sir - test5<-summary(dr(Y~.,data=as.data.frame(dat),method='sir',numdir=truedim+1)) - M3_JASA[i,7]<-subspace_dist(orth(test5$evectors[,1:truedim]),b) - #save - test3<-summary(dr(Y~.,data=as.data.frame(dat),method='save',numdir=truedim+1)) - M3_JASA[i,8]<-subspace_dist(orth(test3$evectors[,1:truedim]),b) - - print(paste(i,paste('/',m))) -} -boxplot(M3_JASA[,]/sqrt(2*truedim),names=colnames(M3_JASA),ylab='err',main='M3') -summary(M3_JASA[,]) - diff --git a/CVE_legacy/M4.R b/CVE_legacy/M4.R deleted file mode 100755 index 23db3ad..0000000 --- a/CVE_legacy/M4.R +++ /dev/null @@ -1,76 +0,0 @@ -#initialize model parameters -m<-100 #number of replications in simulation -dim<-12 #dimension of random variable X -truedim<-1#dimension of B=b -qs<-dim-truedim# dimension of orthogonal complement of B -b1=c(1,1,1,1,1,1,0,0,0,0,0,0)/sqrt(6) -b<-b1#B -P<-b%*%t(b) -sigma=0.5#error standard deviation -N<-100#sample size -K<-30#number of arbitrary starting values for curvilinear optimization -MAXIT<-30#maximal number of iterations in curvilinear search algorithm - -para<-seq(0,1.5,0.5)#model parameter corresponding to lambda in M4 -mix_prob<-seq(0.3,0.5,0.1) #model parameters corresponding to p_{mix} -M4_JASA<-array(data=NA,dim=c(length(para),length(mix_prob),m,5)) -for(o in 1:length(mix_prob)){ - for (u in 1:length(para)){ - colnames(M4_JASA[u,o,,])<-c('CVE1','CVE2','CVE3','meanMAVE','csMAVE') - for (i in 1:m){ - #generate dat according to M4 with p_{mix}=mix_prob[o] and lambda=para[u] - if(u==1){dat<-creat_sample(b,N,cos,sigma)} - else{dat<-creat_sample_noneliptic_gausmixture(b,N,cos,sigma,mix_prob[o],dispers_para = para[u])} - #est sample covariance matrix - Sig_est<-est_varmat(dat[,-1]) - #est trace of sample covariance matrix - tr<-var_tr(Sig_est) - #CVE - #calculates Vhat_k for CVE1,CVE2, CVE3 for k=qs - CVE1<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.8),tr=tr),maxit = MAXIT,sclack_para = 0) - CVE2<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(2/3),tr=tr),maxit = MAXIT,sclack_para = 0) - CVE3<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.5),tr=tr),maxit = MAXIT,sclack_para = 0) - #calculate orthogonal complement of Vhat_k - #i.e. CVE1$est_base[,1:truedim] is estimator for B with dimension (dim times (dim-qs)) - CVE1$est_base<-fill_base(CVE1$est_base) - CVE2$est_base<-fill_base(CVE2$est_base) - CVE3$est_base<-fill_base(CVE3$est_base) - # calculate distance between true B and estimated B - M4_JASA[u,o,i,1]<-subspace_dist(b,CVE1$est_base[,1:truedim]) - M4_JASA[u,o,i,2]<-subspace_dist(b,CVE2$est_base[,1:truedim]) - M4_JASA[u,o,i,3]<-subspace_dist(b,CVE3$est_base[,1:truedim]) - #csMAVE - mod_t<-mave(Y~.,data=as.data.frame(dat),method = 'meanMAVE') - M4_JASA[u,o,i,4]<-subspace_dist(b,mod_t$dir[[truedim]]) - #meanMAVE - mod_t2<-mave(Y~.,data=as.data.frame(dat),method = 'csMAVE') - M4_JASA[u,o,i,5]<-subspace_dist(b,mod_t2$dir[[truedim]]) - - } - print(paste(u,paste('/',length(para)))) - } -} - -par(mfrow=c(3,4)) -boxplot(Exp_cook_square[1,1,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','meanMAVE','csMAVE'),main=paste(paste('dispersion =',para[1]),paste('mixprob =',mix_prob[1]),sep=', '),ylim=c(0,1)) -boxplot(Exp_cook_square[2,1,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[2]),paste('mixprob =',mix_prob[1]),sep=', '),ylim=c(0,1)) -boxplot(Exp_cook_square[3,1,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[3]),paste('mixprob =',mix_prob[1]),sep=', '),ylim=c(0,1)) -boxplot(Exp_cook_square[4,1,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[4]),paste('mixprob =',mix_prob[1]),sep=', '),ylim=c(0,1)) - -boxplot(Exp_cook_square[1,2,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[1]),paste('mixprob =',mix_prob[2]),sep=', '),ylim=c(0,1)) -boxplot(Exp_cook_square[2,2,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[2]),paste('mixprob =',mix_prob[2]),sep=', '),ylim=c(0,1)) -boxplot(Exp_cook_square[3,2,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[3]),paste('mixprob =',mix_prob[2]),sep=', '),ylim=c(0,1)) -boxplot(Exp_cook_square[4,2,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[4]),paste('mixprob =',mix_prob[2]),sep=', '),ylim=c(0,1)) - -boxplot(Exp_cook_square[1,3,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[1]),paste('mixprob =',mix_prob[3]),sep=', '),ylim=c(0,1)) -boxplot(Exp_cook_square[2,3,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[2]),paste('mixprob =',mix_prob[3]),sep=', '),ylim=c(0,1)) -boxplot(Exp_cook_square[3,3,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[3]),paste('mixprob =',mix_prob[3]),sep=', '),ylim=c(0,1)) -boxplot(Exp_cook_square[4,3,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[4]),paste('mixprob =',mix_prob[3]),sep=', '),ylim=c(0,1)) -#boxplot(Exp_cook_square[5,3,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[5]),paste('mixprob =',mix_prob[3]),sep=', '),ylim=c(0,1)) - -boxplot(Exp_cook_square[1,4,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[1]),paste('mixprob =',mix_prob[4]),sep=', '),ylim=c(0,1)) -boxplot(Exp_cook_square[2,4,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[2]),paste('mixprob =',mix_prob[4]),sep=', '),ylim=c(0,1)) -boxplot(Exp_cook_square[3,4,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[3]),paste('mixprob =',mix_prob[4]),sep=', '),ylim=c(0,1)) -boxplot(Exp_cook_square[4,4,,]/sqrt(2*truedim),names=c('CVE1','CVE2','CVE3','mMAVE','csMAVE'),main=paste(paste('dispersion =',para[4]),paste('mixprob =',mix_prob[4]),sep=', '),ylim=c(0,1)) - -par(mfrow=c(1,1)) diff --git a/CVE_legacy/M5.R b/CVE_legacy/M5.R deleted file mode 100755 index 2dfb2f2..0000000 --- a/CVE_legacy/M5.R +++ /dev/null @@ -1,64 +0,0 @@ -#initialize model parameters -m<-500#number of replications in simulation -dim<-12#dimension of random variable X -truedim<-1#dimension of B=b -qs<-dim-truedim# dimension of orthogonal complement of B -b1=c(1,1,1,1,1,1,0,0,0,0,0,0)/sqrt(6) -b<-b1#B -P<-b%*%t(b) -sigma=0.5#error standard deviation -N<-42#sample size -K<-70#number of arbitrary starting values for curvilinear optimization -MAXIT<-50#maximal number of iterations in curvilinear search algorithm - -f_ln1<-function(x){return(2*log(abs(x)+1))}#link function -M5_JASA_lowN<-mat.or.vec(m,8) -colnames(M5_JASA_lowN)<-c('CVE1','CVE2','CVE3','meanMAVE','csMAVE','phd','sir','save') -for (i in 1:m){ - #generate dat according to M5 - dat<-creat_sample(b,N,f_ln1,sigma) - #est sample covariance matrix - Sig_est<-est_varmat(dat[,-1]) - #est trace of sample covariance matrix - tr<-var_tr(Sig_est) - #CVE - #calculates Vhat_k for CVE1,CVE2, CVE3 for k=qs - CVE1<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.8),tr=tr),maxit = MAXIT,sclack_para = 0) - CVE2<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(2/3),tr=tr),maxit = MAXIT,sclack_para = 0) - CVE3<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.5),tr=tr),maxit = MAXIT,sclack_para = 0) - #calculate orthogonal complement of Vhat_k - #i.e. CVE1$est_base[,1:truedim] is estimator for B with dimension (dim times (dim-qs)) - CVE1$est_base<-fill_base(CVE1$est_base) - CVE2$est_base<-fill_base(CVE2$est_base) - CVE3$est_base<-fill_base(CVE3$est_base) - # calculate distance between true B and estimated B - M5_JASA_lowN[i,1]<-subspace_dist(CVE1$est_base[,1:truedim],b) - M5_JASA_lowN[i,2]<-subspace_dist(CVE2$est_base[,1:truedim],b) - M5_JASA_lowN[i,3]<-subspace_dist(CVE3$est_base[,1:truedim],b) - #meanMAVE - mod_t2<-mave(Y~.,data=as.data.frame(dat),method = 'meanMAVE') - M5_JASA_lowN[i,4]<-subspace_dist(mod_t2$dir[[truedim]],b) - #csMAVE - mod_t<-mave(Y~.,data=as.data.frame(dat),method = 'csMAVE') - M5_JASA_lowN[i,5]<-subspace_dist(mod_t$dir[[truedim]],b) - #phd - test4<-summary(dr(Y~.,data=as.data.frame(dat),method='phdy',numdir=truedim+1)) - M5_JASA_lowN[i,6]<-subspace_dist(orth(test4$evectors[,1:truedim]),b) - #sir - test5<-summary(dr(Y~.,data=as.data.frame(dat),method='sir',numdir=truedim+1)) - M5_JASA_lowN[i,7]<-subspace_dist(orth(test5$evectors[,1:truedim]),b) - #save - test3<-summary(dr(Y~.,data=as.data.frame(dat),method='save',numdir=truedim+1)) - M5_JASA_lowN[i,8]<-subspace_dist(orth(test3$evectors[,1:truedim]),b) - - print(paste(i,paste('/',m))) -} -boxplot(M5_JASA_lowN[,]/sqrt(2*truedim),names=colnames(M5_JASA_lowN),ylab='err',main='M5') -summary(M5_JASA_lowN[,]) - -#### - -# par(mfrow=c(1,2)) -# boxplot(M3_JASA_L[,]/sqrt(2*1),names=colnames(M3_JASA_L),ylab='err',main='M3') -# boxplot(M5_JASA_lowN[1:100,-c(4,5,6)]/sqrt(2*1),names=colnames(M5_JASA_lowN)[-c(4,5,6)],ylab='err',main='M5') -# par(mfrow=c(1,1)) diff --git a/CVE_legacy/Test_weigthed_cve.R b/CVE_legacy/Test_weigthed_cve.R deleted file mode 100644 index c5fd51c..0000000 --- a/CVE_legacy/Test_weigthed_cve.R +++ /dev/null @@ -1,113 +0,0 @@ - -#C:\Users\Lukas\Desktop\owncloud\Shared\Lukas\CVE -install.packages("C:/Users/Lukas/Desktop/owncloud/Shared/Lukas/CVE_1.0.tar.gz", repos=NULL, type="source") -install.packages(file.choose(), repos=NULL, type="source") - -dim<-12 -N<-100 -s<-0.5 -dat<-creat_sample(rep(1,dim)/sqrt(dim),N,fsquare,0.5) -test<-cve(Y~.,data=as.data.frame(dat),k=1) -############## -#initialize model parameterss -m<-100 #number of replications in simulation -dim<-12 #dimension of random variable X -truedim<-2 #dimension of B=b -qs<-dim-truedim # dimension of orthogonal complement of B -b1=c(1,1,1,1,1,1,0,0,0,0,0,0)/sqrt(6) -b2=c(1,-1,1,-1,1,-1,0,0,0,0,0,0)/sqrt(6) -b<-cbind(b1,b2) -#b<-b1 -P<-b%*%t(b) -sigma=0.5 #error standard deviation -N<-70 #sample size -K<-30 #number of arbitrary starting values for curvilinear optimization -MAXIT<-30 #maximal number of iterations in curvilinear search algorithm -var_vec<-mat.or.vec(m,12) -M1_weight<-mat.or.vec(m,13) -#colnames(M1_weight)<-c('CVE1','CVE2','CVE3','CVE1_Rcpp','CVE2_Rcpp','CVE3_Rcpp','meanMAVE','csMAVE','phd','sir','save','CVE4') -#link function for M1 -fM1<-function(x){return(x[1]/(0.5+(x[2]+1.5)^2))} -for (i in 1:m){ - #generate dat according to M1 - dat<-creat_sample_nor_nonstand(b,N,fsquare,diag(rep(1,dim)),sigma) - #est sample covariance matrix - Sig_est<-est_varmat(dat[,-1]) - #est trace of sample covariance matrix - tr<-var_tr(Sig_est) - - #calculates Vhat_k for CVE1,CVE2, CVE3 for k=qs - CVE1<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.8),tr=tr),maxit = MAXIT,sclack_para = 0) - CVE2<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(2/3),tr=tr),maxit = MAXIT,sclack_para = 0) - CVE3<-stiefl_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.5),tr=tr),maxit = MAXIT,sclack_para = 0) - - CVE4<-stiefl_weight_partial_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.8),tr=tr),maxit = MAXIT,sclack_para = 0) - CVE5<-stiefl_weight_partial_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(2/3),tr=tr),maxit = MAXIT,sclack_para = 0) - CVE6<-stiefl_weight_partial_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.5),tr=tr),maxit = MAXIT,sclack_para = 0) - # - CVE7<-stiefl_weight_full_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.8),tr=tr),maxit = MAXIT,sclack_para = 0) - CVE8<-stiefl_weight_full_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(2/3),tr=tr),maxit = MAXIT,sclack_para = 0) - CVE9<-stiefl_weight_full_opt(dat,k=qs,k0=K,h=choose_h_2(dim,k=dim-truedim,N=N,nObs=(N)^(0.5),tr=tr),maxit = MAXIT,sclack_para = 0) - # - # - var_vec[i,1]<-CVE1$var - var_vec[i,2]<-CVE2$var - var_vec[i,3]<-CVE3$var - - var_vec[i,4]<-CVE4$var - var_vec[i,5]<-CVE5$var - var_vec[i,6]<-CVE6$var - # - var_vec[i,7]<-CVE7$var - var_vec[i,8]<-CVE8$var - var_vec[i,9]<-CVE9$var - - CVE1$est_base<-fill_base(CVE1$est_base) - CVE2$est_base<-fill_base(CVE2$est_base) - CVE3$est_base<-fill_base(CVE3$est_base) - CVE4$est_base<-fill_base(CVE4$est_base) - CVE5$est_base<-fill_base(CVE5$est_base) - CVE6$est_base<-fill_base(CVE6$est_base) - CVE7$est_base<-fill_base(CVE7$est_base) - CVE8$est_base<-fill_base(CVE8$est_base) - CVE9$est_base<-fill_base(CVE9$est_base) - - # calculate distance between true B and estimated B - M1_weight[i,1]<-subspace_dist(CVE1$est_base[,1:truedim],b) - M1_weight[i,2]<-subspace_dist(CVE2$est_base[,1:truedim],b) - M1_weight[i,3]<-subspace_dist(CVE3$est_base[,1:truedim],b) - M1_weight[i,4]<-subspace_dist(CVE4$est_base[,1:truedim],b) - M1_weight[i,5]<-subspace_dist(CVE5$est_base[,1:truedim],b) - M1_weight[i,6]<-subspace_dist(CVE6$est_base[,1:truedim],b) - M1_weight[i,7]<-subspace_dist(CVE7$est_base[,1:truedim],b) - M1_weight[i,8]<-subspace_dist(CVE8$est_base[,1:truedim],b) - M1_weight[i,9]<-subspace_dist(CVE9$est_base[,1:truedim],b) - - - - CVE1_Rcpp<-cve(Y~.,data=as.data.frame(dat),k=truedim,nObs=N^0.8,attempts=K,tol=10^(-3),slack=0)[[2]] - CVE2_Rcpp<-cve(Y~.,data=as.data.frame(dat),k=truedim,nObs=N^(2/3),attempts=K,tol=10^(-3),slack=0)[[2]] - CVE3_Rcpp<-cve(Y~.,data=as.data.frame(dat),k=truedim,nObs=N^0.5,attempts=K,tol=10^(-3),slack=0)[[2]] - - # CVE4_Rcpp<-cve(Y~.,data=as.data.frame(dat),k=truedim,h=h_opt,attempts=K,tol=10^(-3))[[2]] - #M1_Rcpp[i,12]<-subspace_dist(CVE4_Rcpp$B,b) - - var_vec[i,10]<-CVE1_Rcpp$loss - var_vec[i,11]<-CVE2_Rcpp$loss - var_vec[i,12]<-CVE3_Rcpp$loss - - #calculate orthogonal complement of Vhat_k - #i.e. CVE1$est_base[,1:truedim] is estimator for B with dimension (dim times (dim-qs)) - - - M1_weight[i,10]<-subspace_dist(CVE1_Rcpp$B,b) - M1_weight[i,11]<-subspace_dist(CVE2_Rcpp$B,b) - M1_weight[i,12]<-subspace_dist(CVE3_Rcpp$B,b) - #meanMAVE - mod_t2<-mave(Y~.,data=as.data.frame(dat),method = 'meanMAVE') - M1_weight[i,13]<-subspace_dist(mod_t2$dir[[truedim]],b) - - print(paste(i,paste('/',m))) -} -boxplot(M1_weight[1:(i-1),]/sqrt(2*truedim),names=colnames(M1_weight),ylab='err',main='M1') -summary(M1_weight[1:(i-1),]) diff --git a/CVE_legacy/estimation_of_dimension.R b/CVE_legacy/estimation_of_dimension.R deleted file mode 100644 index 60a10b3..0000000 --- a/CVE_legacy/estimation_of_dimension.R +++ /dev/null @@ -1,189 +0,0 @@ -library("mda") #library for mars - -local_linear<-function(x,h,dat,beta){ - Y<-dat[,1] - X<-dat[,-1] - N<-length(Y) - X<-X%*%beta - x<-x%*%beta#beta%*%x - D_mat<-cbind(rep(1,N),X) - if (is.vector(X)){ - dim<-1 - d<-abs(X-rep(x,N)) - } - else{ - dim<-length(X[1,]) - d<-sqrt(apply(X-t(matrix(rep(x,N),dim,N)),1,norm2)) - } - K<-diag(dnorm(d/h)/dnorm(0)) - pred<-c(1,x)%*%solve(t(D_mat)%*%K%*%D_mat)%*%t(D_mat)%*%K%*%Y - return(pred) -} -##### performs estimation of small dimesnion by CV with local linear forward regression -est_dim_CV<-function(Blist,dat,h_loclin=NULL,dim.max,median_use=F,method_mars=F){ - #standardize regressors by symmetric root of inverse covariance mat - Sig<-est_varmat(dat[,-1]) - eig_dec<-eigen(Sig) - Sroot_inv<-eig_dec$vectors%*%((diag(eig_dec$values^(-1/2))))%*%t(eig_dec$vectors) - dat[,-1]<-as.matrix(dat[,-1])%*%Sroot_inv - - N<-length(dat[,1]) - dim<-length(dat[1,-1]) - - MSE<-mat.or.vec(N,dim.max) - for(u in 1:dim.max){ - beta<-Blist[[u]] - if(is.null(h_loclin)){ - #h_loclin<-(1/N)^(1/(3+2*u)) - h_loclin<-1.2*N^(-1/(4+u))#(1/N)^(1/(3+2*j)) - - } - - for(i in 1:N){ - x<-dat[i,-1] - if(method_mars==F){MSE[i,u]<-(dat[i,1]-local_linear(x,h_loclin,as.matrix(dat[-i,]),beta))^2} #predict with local linear - if(method_mars==T){ - dat_fit<-dat[-i,-1]%*%beta - X_new<-dat[i,-1]%*%beta - mars_mod<-mars(dat_fit,dat[-i,1]) #fit mars model - MSE[i,u]<-(dat[i,1]-predict(mars_mod,X_new))^2 #predict with mars model - }#predict with mars - } - } - if(median_use){MSE_ave<-apply(MSE,2,median)} - else{MSE_ave<-colMeans(MSE)} - #apply(MSE,2,median) - est_dim<-which.min(MSE_ave) - ret<-list(est_dim = est_dim, MSE_ave = MSE_ave, MSE = MSE) - return(ret) -} -########### -test_for_dim<-function(Lmat,dim.max=NULL,alpha=0.1,method='greater'){ - #Lmat... matrix with dimension N times dim.max with columns corresponding to aov_dat for dim 1,2,3,....,max.dim - if(is.null(dim.max)){dim.max<-length(Lmat[1,])} - - pval<-mat.or.vec(dim.max-1,1) - est_dim<-dim.max - # for (j in 1:(dim.max-1)){ - j<-1 - while(est_dim==dim.max&jalpha){est_dim<-j} - } - j<-j+1 - - } - ret<-list(est_dim,pval) - names(ret)<-c('estdim','pval') - return(ret) -} -#### - -## -test_for_dim_elbow<-function(Lmat){ - dim.max<-length(Lmat[1,]) - ave<-colMeans(Lmat) - boxplot(Lmat,xlab='k') - lines(seq(1,dim.max),ave,col='red') - # tmp<-cbind(ave,seq(1,dim.max)) - # colnames(tmp)<-c('response','k') - # diff<-lm(response~k,data=as.data.frame(tmp))$coefficients[2] - # - # est_dim<-dim.max - # i<-1 - # while(idiff){est_dim<-i} - # i<-i+1 - # } - return(which.min(ave)) -} -###### -#Small simulation example for truedim =1 - -set.seed(21) -dim<-6 -truedim<-1 -N<-50 -b<-c(1,0,0,0,0,0) -m<-20 -est_dim<-mat.or.vec(m,7) -dim.max<-4 -for(i in 1:m){ - dat<-creat_sample(b,N,fsquare,0.5) - Blist<-list() - Lmat<-mat.or.vec(N,dim.max) - for(u in 1:dim.max){ #calculate B for different possible truedim's - m1<-stiefl_opt(dat,k=(dim-u)) #original bandwidth selection rule used that controlls number of points in a slice!!!!!!, also choose_h_2 - Blist[[u]]<-fill_base(m1$est_base)[,1:u] - Lmat[,u]<-m1$aov_dat - } - #estimate truedim with different methods - est_dim[i,1]<-est_dim_CV(Blist,dat,dim.max = dim.max,median_use = T)$est_dim - est_dim[i,2]<-est_dim_CV(Blist,dat,dim.max = dim.max,median_use = F)$est_dim - est_dim[i,3]<-est_dim_CV(Blist,dat,dim.max = dim.max,median_use = F,method_mars = T)$est_dim - est_dim[i,4]<-test_for_dim(Lmat)$estdim - est_dim[i,5]<-test_for_dim(Lmat,method = 'lower')$estdim - est_dim[i,6]<-test_for_dim_elbow(cbind((dat[,1]-mean(dat[,1]))^2,Lmat))-1 - mod_t<-mave(Y~.,data=as.data.frame(dat),method = 'meanMAVE') - est_dim[i,7]<-which.min(mave.dim(mod_t)$cv) - - print(i) -} - -length(which(est_dim[,1]==truedim))/m #fraction of where dimension is estimated correctly with mwthod 1 (CV with median) -#0.5 -length(which(est_dim[,2]==truedim))/m #fraction of where dimension is estimated correctly with mwthod 1 (CV with mean) -#0.9 -length(which(est_dim[,3]==truedim))/m #fraction of where dimension is estimated correctly with mwthod 1 (CV with mars and mean) -#0.8 -length(which(est_dim[,4]==truedim))/m #fraction of where dimension is estimated correctly with mwthod 1 (t.test method='greater') -#0.0 -length(which(est_dim[,5]==truedim))/m #fraction of where dimension is estimated correctly with mwthod 1 (t.test method='lower') -#0.05 -length(which(est_dim[,6]==truedim))/m #fraction of where dimension is estimated correctly with mwthod (elbow) -#1 -length(which(est_dim[,7]==truedim))/m #fraction of where dimension is estimated correctly with mave -#0.95 -########## -#Small simulation example for truedim =2 - -set.seed(21) -dim<-6 -truedim<-2 -N<-100 -b<-cbind(c(1,rep(0,dim-1)),c(0,1,rep(0,dim-2))) -m<-20 -est_dim<-mat.or.vec(m,7) -dim.max<-4 -for(i in 1:m){ - dat<-creat_sample(b,N,function(x){return(x[1]*x[2])},0.5) - Blist<-list() - Lmat<-mat.or.vec(N,dim.max) - for(u in 1:dim.max){ - m1<-stiefl_opt(dat,k=(dim-u)) #original bandwidth selection used !!!!!!!!!!!!!!!!!!!!!, also choose_h_2 - Blist[[u]]<-fill_base(m1$est_base)[,1:u] - Lmat[,u]<-m1$aov_dat - } - est_dim[i,1]<-est_dim_CV(Blist,dat,dim.max = dim.max,median_use = T)$est_dim - est_dim[i,2]<-est_dim_CV(Blist,dat,dim.max = dim.max,median_use = F)$est_dim - est_dim[i,3]<-est_dim_CV(Blist,dat,dim.max = dim.max,median_use = F,method_mars = T)$est_dim - est_dim[i,4]<-test_for_dim(Lmat)$estdim - est_dim[i,5]<-test_for_dim(Lmat,method = 'lower')$estdim - est_dim[i,6]<-test_for_dim_elbow(cbind((dat[,1]-mean(dat[,1]))^2,Lmat))-1 - mod_t<-mave(Y~.,data=as.data.frame(dat),method = 'meanMAVE') - est_dim[i,7]<-which.min(mave.dim(mod_t)$cv) - - print(i) -} - -length(which(est_dim[,1]==truedim))/m #fraction of where dimension is estimated correctly with mwthod (CV with median) -length(which(est_dim[,2]==truedim))/m #fraction of where dimension is estimated correctly with mwthod (CV with mean) -length(which(est_dim[,3]==truedim))/m #fraction of where dimension is estimated correctly with mwthod (CV with mars and mean) -length(which(est_dim[,4]==truedim))/m #fraction of where dimension is estimated correctly with mwthod (t.test method='greater') -length(which(est_dim[,5]==truedim))/m #fraction of where dimension is estimated correctly with mwthod (t.test method='lower') -length(which(est_dim[,6]==truedim))/m #fraction of where dimension is estimated correctly with mwthod (elbow) diff --git a/CVE_legacy/function_script.R b/CVE_legacy/function_script.R deleted file mode 100755 index 12ff6ce..0000000 --- a/CVE_legacy/function_script.R +++ /dev/null @@ -1,382 +0,0 @@ -library("Matrix") -# library("fields") -# library('dr') -# library('mvtnorm') -# library(mgcv) -# library(pracma) -# library(RVAideMemoire) -library("MAVE") -# library(pls) -# library(LaplacesDemon) -# library(earth) -# library("latex2exp") - -################################################# - -############################## - -################################ - - -fsquare<-function(x){ - ret<-0 - for (h in 1:length(x)){ret<-ret+x[h]^2} - return(ret) -} -f_id<-function(x){return(x[1])} -vfun<-function(x)return(c(cos(x),sin(x))) -vfun_grad<-function(x)return(c(-sin(x),cos(x))) -f_true<-function(x,sigma){return(sigma^2 + cos(x)^2)} -############################### -##creat sample for M2, b is B matrix of model, N..sample size, -#f...link function, sigma..standard deviation of error term -#produces sample with X standard normal with dimension to columnlength of b an -# epsilon centered normal with standard deviation sigma -creat_sample<-function(b,N,f,sigma){ - if (is.vector(b)==1){dim<-length(b)} - if (is.vector(b)==0){dim<-length(b[,1])} - - X<-mat.or.vec(N,dim) - Y<-mat.or.vec(N,1) - for (i in 1:N){ - X[i,]<-rnorm(dim,0,1) - Y[i]<-f(t(b)%*%X[i,])+rnorm(1,0,sigma) - } - dat<-cbind(Y,X) - return(dat) -} -##### -creat_b<-function(dim,n){ - temp<-mat.or.vec(dim,n) - - if (n>1){ - while (rankMatrix(temp,method = 'qr')< min(dim,n)){ - for (i in 1:n){ - temp[,i]<-rnorm(dim,0,1) - temp[,i]<-temp[,i]/sqrt(sum(temp[,i]^2)) - } - } - } - if (n==1) {temp<-rnorm(dim,0,1) - temp<-temp/sqrt(sum(temp^2))} - return(temp) -} -### - -### creat sample for M1,M3 -## b is B matrix of model, N..sample size, -#f...link function, sigma..standard deviation of error term -#produces sample with X normal with Covariancematrix SS' with dimension to columnlength of b an -# epsilon centered normal with standard deviation sigma -creat_sample_nor_nonstand<-function(b,N,f,S,sigma){ - #Var(x)= SS' - if (is.vector(b)==1){dim<-length(b)} - if (is.vector(b)==0){dim<-length(b[,1])} - - X<-mat.or.vec(N,dim) - Y<-mat.or.vec(N,1) - for (i in 1:N){ - X[i,]<-rnorm(dim,0,1)%*%t(S) - Y[i]<-f(t(b)%*%X[i,])+rnorm(1,0,sigma) - } - dat<-cbind(Y,X) - return(dat) -} -#### - -#### - -### -#### - -#### -#### creat sample for M4 -##creat sample for M2, b is B matrix of model, N..sample size, -#f...link function, sigma..standard deviation of error term -#produces sample with X Gausian mixture -#(i.e. X ~ (Zrep(-1,dim) + (1_Z)rep(1,dim))*dispers_para + N(0, I_p) and Z~B(pi) -#with dimension to columnlength of b an -# epsilon centered normal with standard deviation sigma -creat_sample_noneliptic_gausmixture<-function(b,N,f,sigma,pi,dispers_para=1){ - if (is.vector(b)==1){dim<-length(b)} - if (is.vector(b)==0){dim<-length(b[,1])} - - X<-mat.or.vec(N,dim) - Y<-mat.or.vec(N,1) - for (i in 1:N){ - p<-rbinom(1,1,pi) - X[i,]<-(-rep(1,dim)*p+rep(1,dim)*(1-p))*dispers_para+rnorm(dim,0,1) - Y[i]<-f(t(b)%*%X[i,])+rnorm(1,0,sigma) - } - dat<-cbind(Y,X) - return(dat) -} -### -############################### -#estimates covariance matrix by ML estimate -# X...n times dim data matrix with rows corresponding to X_i -est_varmat<-function(X){ - dim<-length(X[1,]) - Sig<-mat.or.vec(dim,dim)#Cov(X) - X<-scale(X,center = T,scale = F) - Sig<-t(X)%*%X/length(X[,1]) - return(Sig) -} -#### -# normalizes a vector x such that it sums to 1 -column_normalize<-function(x){return(x/sum(x))} -#### -#squared 2-norm of a vector x -norm2<-function(x){return(sum(x^2))} -### -# fills up the matrix estb (dim times q) (dim > q) to a dim times dim orthonormal matrix -fill_base<-function(estb){ - dim<-length(estb[,1]) - k<-length(estb[1,]) - P<-(diag(rep(1,dim))-estb%*%t(estb)) - rk<-0 - while (rk < dim){ - tmp1<-P%*%creat_b(dim,1) - nor<-sqrt(sum(tmp1^2)) - if(nor>10^(-4)){ - estb<-cbind(tmp1/nor,estb) - rkn<-rankMatrix(estb,method = 'qr') - if (rkn > rk){P<-(diag(rep(1,dim))-estb%*%t(estb)) - rk<-rkn} - } - if(nor <=10^(-4)){rk<-0} - - } - - return(estb) -} -################## -###new stiefel optimization -#draws from the uniform measure of Stiefel(p,k) -#returns a p times k dimensional semiorthogonal matrix -stiefl_startval<-function(p,k){return(qr.Q(qr(matrix(rnorm(p*k,0,1),p,k))))} -##### chooses shifting points -#auxiliary function for stielfel_opt -# X... N times dim data matrix -#S... sample covariance matrix -#0 < p <=1 fraction of data point used as shifting points -# if p=1 all data points used as shifting points -q_ind<-function(X,S,p){ - dim<-length(X[1,]) - N<-length(X[,1]) - if (p < 1){ - Sinv<-solve(S) - ind<-c() - bound<-qchisq(p,dim) - for (j in 1:N){if(t(X[j,])%*%Sinv%*%X[j,]<=bound){ind<-c(ind,j)}}#maybe demean - q<-length(ind) - if (q<=2){ - q<-N - ind<-seq(1,N)} - } - else { - q<-N - ind<-seq(1,N) - } - ret<-list(q,ind) - names(ret)<-c('q','ind') - return(ret) -} -#### -#calculates the trace of a quadratic matrix S -var_tr<-function(x){return(sum(diag(x)))} -#### -#chooses bandwith h according to formula in paper -#dim...dimension of X vector -#k... row dim of V (dim times q matrix) corresponding to a basis of orthogonal complement of B in model -# N...sample size -#nObs... nObs in bandwith formula -#tr...trace of sample covariance matrix of X -choose_h_2<-function(dim,k,N,nObs,tr){ - #h1<-qchisq(nObs/N,2*(dim-k)) - h2<-qchisq((nObs-1)/(N-1),(dim-k)) - return(h2*2*tr/dim) -} -#### -#auxiliary function for stiefel_opt -#X... data matrix of random variable X -#q,ind... output of function q_ind -Xl_fun<-function(X,q,ind){ - N<-length(X[,1]) - Xl<-(kronecker(rep(1,q),X)-kronecker(X[ind,],rep(1,N))) - return(Xl) -} -########### -# evaluates L(V) and returns L_n(V),(L_tilde_n(V,X_i))_{i=1,..,n} and grad_V L_n(V) (p times k) -# V... (dim times q) matrix -# Xl... output of Xl_fun -# dtemp...vector with pairwise distances |X_i - X_j| -# q...output of q_ind function -# Y... vector with N Y_i values -# if grad=T, gradient of L(V) also returned -LV<-function(V,Xl,dtemp,h,q,Y,grad=T){ - N<-length(Y) - if(is.vector(V)){k<-1} - else{k<-length(V[1,])} - Xlv<-Xl%*%V - d<-dtemp-((Xlv^2)%*%rep(1,k)) - w<-dnorm(d/h)/dnorm(0) - w<-matrix(w,N,q) - w<-apply(w,2,column_normalize) - mY<-t(w)%*%Y - sig<-t(w)%*%(Y^2)-(mY)^2 # \tilde{L}_n(V, s_0) mit s_0 coresponds to q_ind - if(grad==T){ - grad<-mat.or.vec(dim,k) - tmp1<-(kronecker(sig,rep(1,N))-(as.vector(kronecker(rep(1,q),Y))-kronecker(mY,rep(1,N)))^2) - if(k==1){ - grad_d<- -2*Xl*as.vector(Xlv) - grad<-(1/h^2)*(1/q)*t(grad_d*as.vector(d)*as.vector(w))%*%tmp1 - } - else{ - for (j in 1:(k)){ - grad_d<- -2*Xl*as.vector(Xlv[,j]) - grad[,j]<- (1/h^2)*(1/q)*t(grad_d*as.vector(d)*as.vector(w))%*%tmp1 - } - } - ret<-list(mean(sig),sig,grad) - names(ret)<-c('var','sig','grad') - } - else{ - ret<-list(mean(sig),sig) - names(ret)<-c('var','sig') - } - - return(ret) -} -#### performs stiefle optimization of argmin_{V : V'V=I_k} L_n(V) -#through curvilinear search with k0 starting values drawn uniformly on stiefel maniquefold -#dat...(N times dim+1) matrix with first column corresponding to Y values, the other columns -#consists of X data matrix, (i.e. dat=cbind(Y,X)) -#h... bandwidth -#k...row dimension of V that is calculated, corresponds to dimension of orthogonal complement of B -#k0... number of arbitrary starting values -#p...fraction of data points used as shifting point -#maxit... number of maximal iterations in curvilinear search -#nObs.. nObs parameter for choosing bandwidth if no h is supplied -#lambda_0...initial stepsize -#tol...tolerance for stoping iterations -#sclack_para...if relative improvment is worse than sclack_para the stepsize is reduced -#output: -#est_base...Vhat_k= argmin_V:V'V=I_k L_n(V) a (dim times k) matrix where dim is row-dimension of X data matrix -#var...value of L_n(Vhat_k) -#aov_dat... (L_tilde_n(Vhat_k,X_i))_{i=1,..,N} -#count...number of iterations -#h...bandwidth -#count2...number of times sclack_para is active -stiefl_opt<-function(dat,h=NULL,k,k0=30,p=1,maxit=50,nObs=sqrt(length(dat[,1])),lambda_0=1,tol=10^(-3),sclack_para=0){ - - history <- matrix(NA, maxit, k0) - - Y<-dat[,1] - X<-dat[,-1] - N<-length(Y) - dim<-length(X[1,]) - S<-est_varmat(X) - if(p<1){ - - tmp1<-q_ind(X,S,p) - q<-tmp1$q - ind<-tmp1$ind - } - else{ - q<-N - ind<-1:N - } - Xl<-(kronecker(rep(1,q),X)-kronecker(X[ind,],rep(1,N))) - dtemp<-apply(Xl,1,norm2) - tr<-var_tr(S) - if(is.null(h)){h<-choose_h(dim,k,N,nObs,tr)} - best<-exp(10000) - Vend<-mat.or.vec(dim,k) - sig<-mat.or.vec(q,1) - for(u in 1:k0){ - Vnew<-Vold<-stiefl_startval(dim,k) - #print(Vold) - #print(LV(Vold,Xl,dtemp,h,q,Y)$var) - Lnew<-Lold<-exp(10000) - lambda<-lambda_0 - err<-10 - count<-0 - count2<-0 - while(err>tol&count sclack_para){#/(count+1)^(0.5) - lambda=lambda/2 - err<-10 - count2<-count2+1 - count<-count-1 - Vnew<-Vold #!!!!! - - } - Vold<-Vnew - count<-count+1 - #print(count) - - # Write to history - history[count, u] <- Lnew - - } - if(best>Lnew){ - best<-Lnew - Vend<-Vnew - sig<-tmp3$sig - } - - } - ret<-list(Vend,best,sig,count,h,count2, history) - names(ret)<-c('est_base','var','aov_dat','count','h','count2', 'history') - return(ret) -} -##### calculates distance between two subspaces -#i.e. |b1%*%(b1%*%b1')^{-1}%*%b1' - b2%*%(b2%*%b2')^{-1}%*%b2'|_2 -subspace_dist<-function(b1,b2){ - #b1<-orth(b1) - #b2<-orth(b2) - P1<-b1%*%solve(t(b1)%*%b1)%*%t(b1) - P2<-b2%*%solve(t(b2)%*%b2)%*%t(b2) - return(sqrt(sum((P1-P2)^2))) -} -####performs local linaer regression -#x...datapoint at where estimated function should be evaluated -#h...bandwidth -#dat..(N times dim+1) matrix with first column corresponding to Y values, the other columns -#consists of X data matrix, (i.e. dat=cbind(Y,X)) -#beta...projection matrix -local_linear<-function(x,h,dat,beta){ - Y<-dat[,1] - X<-dat[,-1] - N<-length(Y) - X<-X%*%beta - x<-x%*%beta#beta%*%x - D_mat<-cbind(rep(1,N),X) - if (is.vector(X)){ - dim<-1 - d<-abs(X-rep(x,N)) - } - else{ - dim<-length(X[1,]) - d<-sqrt(apply(X-t(matrix(rep(x,N),dim,N)),1,norm2)) - } - K<-diag(dnorm(d/h)/dnorm(0)) - pred<-c(1,x)%*%solve(t(D_mat)%*%K%*%D_mat)%*%t(D_mat)%*%K%*%Y - return(pred) -} - - - diff --git a/CVE_legacy/function_script_2.R b/CVE_legacy/function_script_2.R deleted file mode 100644 index 92cc4c2..0000000 --- a/CVE_legacy/function_script_2.R +++ /dev/null @@ -1,236 +0,0 @@ -LV_weight_partial<-function(V,Xl,dtemp,h,q,Y,grad=T){ - N<-length(Y) - if(is.vector(V)){k<-1} - else{k<-length(V[1,])} - Xlv<-Xl%*%V - d<-dtemp-((Xlv^2)%*%rep(1,k)) - w<-exp(-0.5*(d/h)^2) - w<-matrix(w,N,q) - wn<-apply(w,2,sum)-rep(1,q)#new - w<-apply(w,2,column_normalize) - mY<-t(w)%*%Y - sig<-t(w)%*%(Y^2)-(mY)^2 - W<-(kronecker(t(wn),rep(1,N)))##new - if(grad==T){ - grad<-mat.or.vec(dim,k) - tmp1<-(kronecker(sig,rep(1,N))-(as.vector(kronecker(rep(1,q),Y))-kronecker(mY,rep(1,N)))^2) - if(k==1){ - grad_d<- -2*Xl*as.vector(Xlv) - grad<-(1/h^2)*(1/sum(wn))*t(grad_d*as.vector(d)*as.vector(w)*as.vector(W))%*%tmp1 #new - # wn_grad<-(-1/h^2)*t(grad_d*as.vector(d)*as.vector(w))%*%kronecker(diag(rep(1,q)),rep(1,N)) - # grad<- wn_grad%*%(sig-rep(var1[2],q))/(sum(wn))+grad - } - else{ - for (j in 1:(k)){ - grad_d<- -2*Xl*as.vector(Xlv[,j]) - grad[,j]<- (1/h^2)*(1/sum(wn))*t(grad_d*as.vector(d)*as.vector(w)*as.vector(W))%*%tmp1#new - # wn_grad<-(-1/h^2)*t(grad_d*as.vector(d)*as.vector(w))%*%kronecker(diag(rep(1,q)),rep(1,N)) - # grad[,j]<- wn_grad%*%(sig-rep(var1[2],q))/(sum(wn))+grad - } - } - ret<-list(t(wn)%*%sig/sum(wn),sig,grad)#new - names(ret)<-c('var','sig','grad') - } - else{ - ret<-list(t(wn)%*%sig/sum(wn),sig)#new - names(ret)<-c('var','sig') - } - - return(ret) -} -################ -stiefl_weight_partial_opt<-function(dat,h=NULL,k,k0=30,p=1,maxit=50,nObs=sqrt(length(dat[,1])),lambda_0=1,tol=10^(-3),sclack_para=0){ - Y<-dat[,1] - X<-dat[,-1] - N<-length(Y) - dim<-length(X[1,]) - if(p<1){ - S<-est_varmat(X) - tmp1<-q_ind(X,S,p) - q<-tmp1$q - ind<-tmp1$ind - } - else{ - q<-N - ind<-1:N - } - Xl<-(kronecker(rep(1,q),X)-kronecker(X[ind,],rep(1,N))) - dtemp<-apply(Xl,1,norm2) - if(is.null(h)){ - S<-est_varmat(X) - tr<-var_tr(S) - h<-choose_h_2(dim,k,N,nObs,tr) - } - best<-exp(10000) - Vend<-mat.or.vec(dim,k) - sig<-mat.or.vec(q,1) - for(u in 1:k0){ - Vnew<-Vold<-stiefl_startval(dim,k) - #print(Vold) - #print(LV(Vold,Xl,dtemp,h,q,Y)$var) - Lnew<-Lold<-exp(10000) - lambda<-lambda_0 - err<-10 - count<-0 - count2<-0 - while(err>tol&count sclack_para){#/(count+1)^(0.5) - lambda=lambda/2 - err<-10 - count2<-count2+1 - count<-count-1 - Vnew<-Vold #!!!!! - - } - Vold<-Vnew - count<-count+1 - #print(count) - } - if(best>Lnew){ - best<-Lnew - Vend<-Vnew - sig<-tmp3$sig - } - } - ret<-list(Vend,best,sig,count,h,count2) - names(ret)<-c('est_base','var','aov_dat','count','h','count2') - return(ret) -} - -#################MAVE, OPG, rMAVE, rOPG from Bing Li book -opg=function(x,y,d){ - p=dim(x)[2]; - n=dim(x)[1] - c0=2.34; - p0=max(p,3); - rn=n^(-1/(2*(p0+6))); - h=c0*n^(-(1/(p0+6))) - sig=diag(var(x)); - x=apply(x,2,standvec) - kmat=kern(x,h); - bmat=numeric() - for(i in 1:dim(x)[1]){ - wi=kmat[,i]; - xi=cbind(1,t(t(x)-x[i,])) - bmat=cbind(bmat,wls(xi,y,wi)$b)} - beta=eigen(bmat%*%t(bmat))$vectors[,1:d] - return(diag(sig^(-1/2))%*%beta) -} -###################### -wls=function(x,y,w){ - n=dim(x)[1]; - p=dim(x)[2]-1 - out=c(solve(t(x*w)%*%x/n)%*%apply(x*y*w,2,mean)) - return(list(a=out[1],b=out[2:(p+1)])) -} -################# -kern=function(x,h){ - x=as.matrix(x); - n=dim(x)[1] - k2=x%*%t(x); - k1=t(matrix(diag(k2),n,n)); - k3=t(k1); - k=k1-2*k2+k3 - return(exp(-(1/(2*h^2))*(k1-2*k2+k3))) -} -############### -standvec=function(x) return((x-mean(x))/sd(x)) -############## -mave2=function(x,y,h,d,nit){ - sig=diag(var(x)); - n=dim(x)[1]; - p=dim(x)[2] - x=apply(x,2,standvec); - beta=opg(x,y,d);#beta=opg(x,y,h,d); - kermat=kern(x,h) - for(iit in 1:nit){ - b=numeric(); - a=numeric(); - for(i in 1:n){ - wi=kermat[,i]/(apply(kermat,2,mean)[i]) - ui=cbind(1,t(t(x)-x[i,])%*%beta) - out=wls(ui,y,wi); - a=c(a,out$a);b=cbind(b,out$b)} - out=0;out1=0; - for(i in 1:n){ - xi=kronecker(t(t(x)-x[i,]),t(b[,i])) - yi=y-a[i]; - wi=kermat[,i]/apply(kermat,2,mean)[i] - out=out+apply(xi*yi*wi,2,mean) - out1=out1+t(xi*wi)%*%xi/n} - beta=t(matrix(solve(out1)%*%out,d,p)) - } - return(diag(sig^(-1/2))%*%beta) -} -###################### -rmave=function(x,y,d,nit){ - sig=diag(var(x)); - n=dim(x)[1]; - p=dim(x)[2] - x=apply(x,2,standvec) - c0=2.34; - p0=max(p,3); - h=c0*n^(-(1/(p0+6))); - rn=n^(-1/(2*(p0+6))) - beta=opg(x,y,d) - for(iit in 1:nit){ - kermat=kern(x%*%beta,h); - mkermat=apply(kermat,2,mean) - b=numeric();a=numeric() - for(i in 1:n){ - wi=kermat[,i]/mkermat[i]; - ui=cbind(1,t(t(x)-x[i,])%*%beta) - out=wls(ui,y,wi); - a=c(a,out$a);b=cbind(b,out$b) - } - out=0; - out1=0 - for(i in 1:n) { - xi=kronecker(t(t(x)-x[i,]),t(b[,i])); - yi=y-a[i] - wi=kermat[,i]/mkermat[i] - out=out+apply(xi*yi*wi,2,mean) - out1=out1+t(xi*wi)%*%xi/n} - beta=t(matrix(solve(out1)%*%out,d,p)) - h=max(rn*h,c0*n^((-1/(d+4)))) - } - return(diag(sig^(-1/2))%*%beta) -} -######################### -ropg=function(x,y,d,nit){ - sig=diag(var(x)); - x=apply(x,2,standvec); - p=dim(x)[2]; - n=dim(x)[1] - c0=2.34; - p0=max(p,3); - rn=n^(-1/(2*(p0+6))); - h=c0*n^(-(1/(p0+6))) - beta=diag(p) - for(iit in 1:nit){ - kmat=kern(x%*%beta,h); - bmat=numeric() - for(i in 1:dim(x)[1]){ - wi=kmat[,i]; - xi=cbind(1,t(t(x)-x[i,])) - bmat=cbind(bmat,wls(xi,y,wi)$b) - } - beta=eigen(bmat%*%t(bmat))$vectors[,1:d] - h=max(rn*h,c0*n^((-1/(d+4)))) - } - - beta.final=diag(sig^(-1/2))%*%beta - return(beta.final) -} diff --git a/CVE_legacy/function_script_3.R b/CVE_legacy/function_script_3.R deleted file mode 100644 index 013c40e..0000000 --- a/CVE_legacy/function_script_3.R +++ /dev/null @@ -1,315 +0,0 @@ -LV_weight_partial<-function(V,Xl,dtemp,h,q,Y,grad=T){ - N<-length(Y) - if(is.vector(V)){k<-1} - else{k<-length(V[1,])} - Xlv<-Xl%*%V - d<-dtemp-((Xlv^2)%*%rep(1,k)) - w<-exp(-0.5*(d/h)^2) - w<-matrix(w,N,q) - wn<-apply(w,2,sum)-rep(1,q)#new - w<-apply(w,2,column_normalize) - mY<-t(w)%*%Y - sig<-t(w)%*%(Y^2)-(mY)^2 - W<-(kronecker(t(wn),rep(1,N)))##new - if(grad==T){ - grad<-mat.or.vec(dim,k) - tmp1<-(kronecker(sig,rep(1,N))-(as.vector(kronecker(rep(1,q),Y))-kronecker(mY,rep(1,N)))^2) - if(k==1){ - grad_d<- -2*Xl*as.vector(Xlv) - grad<-(1/h^2)*(1/sum(wn))*t(grad_d*as.vector(d)*as.vector(w)*as.vector(W))%*%tmp1 #new - # wn_grad<-(-1/h^2)*t(grad_d*as.vector(d)*as.vector(w))%*%kronecker(diag(rep(1,q)),rep(1,N)) - # grad<- wn_grad%*%(sig-rep(var1[2],q))/(sum(wn))+grad - } - else{ - for (j in 1:(k)){ - grad_d<- -2*Xl*as.vector(Xlv[,j]) - grad[,j]<- (1/h^2)*(1/sum(wn))*t(grad_d*as.vector(d)*as.vector(w)*as.vector(W))%*%tmp1#new - # wn_grad<-(-1/h^2)*t(grad_d*as.vector(d)*as.vector(w))%*%kronecker(diag(rep(1,q)),rep(1,N)) - # grad[,j]<- wn_grad%*%(sig-rep(var1[2],q))/(sum(wn))+grad - } - } - ret<-list(t(wn)%*%sig/sum(wn),sig,grad)#new - names(ret)<-c('var','sig','grad') - } - else{ - ret<-list(t(wn)%*%sig/sum(wn),sig)#new - names(ret)<-c('var','sig') - } - - return(ret) -} -################ -stiefl_weight_partial_opt<-function(dat,h=NULL,k,k0=30,p=1,maxit=50,nObs=sqrt(length(dat[,1])),lambda_0=1,tol=10^(-3),sclack_para=0){ - Y<-dat[,1] - X<-dat[,-1] - N<-length(Y) - dim<-length(X[1,]) - if(p<1){ - S<-est_varmat(X) - tmp1<-q_ind(X,S,p) - q<-tmp1$q - ind<-tmp1$ind - } - else{ - q<-N - ind<-1:N - } - Xl<-(kronecker(rep(1,q),X)-kronecker(X[ind,],rep(1,N))) - dtemp<-apply(Xl,1,norm2) - if(is.null(h)){ - S<-est_varmat(X) - tr<-var_tr(S) - h<-choose_h_2(dim,k,N,nObs,tr) - } - best<-exp(10000) - Vend<-mat.or.vec(dim,k) - sig<-mat.or.vec(q,1) - for(u in 1:k0){ - Vnew<-Vold<-stiefl_startval(dim,k) - #print(Vold) - #print(LV(Vold,Xl,dtemp,h,q,Y)$var) - Lnew<-Lold<-exp(10000) - lambda<-lambda_0 - err<-10 - count<-0 - count2<-0 - while(err>tol&count sclack_para){#/(count+1)^(0.5) - lambda=lambda/2 - err<-10 - count2<-count2+1 - count<-count-1 - Vnew<-Vold #!!!!! - - } - Vold<-Vnew - count<-count+1 - #print(count) - } - if(best>Lnew){ - best<-Lnew - Vend<-Vnew - sig<-tmp3$sig - } - } - ret<-list(Vend,best,sig,count,h,count2) - names(ret)<-c('est_base','var','aov_dat','count','h','count2') - return(ret) -} - -#################MAVE, OPG, rMAVE, rOPG from Bing Li book -opg=function(x,y,d){ - p=dim(x)[2]; - n=dim(x)[1] - c0=2.34; - p0=max(p,3); - rn=n^(-1/(2*(p0+6))); - h=c0*n^(-(1/(p0+6))) - sig=diag(var(x)); - x=apply(x,2,standvec) - kmat=kern(x,h); - bmat=numeric() - for(i in 1:dim(x)[1]){ - wi=kmat[,i]; - xi=cbind(1,t(t(x)-x[i,])) - bmat=cbind(bmat,wls(xi,y,wi)$b)} - beta=eigen(bmat%*%t(bmat))$vectors[,1:d] - return(diag(sig^(-1/2))%*%beta) -} -####################### -stiefl_opt_momentum<-function(dat,h=NULL,k,k0=30,p=1,maxit=50,nObs=sqrt(length(dat[,1])),lambda_0=1,tol=10^(-3),sclack_para=0,momentum_para=0.8){ - Y<-dat[,1] - X<-dat[,-1] - N<-length(Y) - dim<-length(X[1,]) - if(p<1){ - S<-est_varmat(X) - tmp1<-q_ind(X,S,p) - q<-tmp1$q - ind<-tmp1$ind - } - else{ - q<-N - ind<-1:N - } - Xl<-(kronecker(rep(1,q),X)-kronecker(X[ind,],rep(1,N))) - dtemp<-apply(Xl,1,norm2) - if(is.null(h)){ - S<-est_varmat(X) - tr<-var_tr(S) - h<-choose_h_2(dim,k,N,nObs,tr) - } - best<-exp(10000) - Vend<-mat.or.vec(dim,k) - sig<-mat.or.vec(q,1) - for(u in 1:k0){ - Vold<-stiefl_startval(dim,k) - #print(Vold) - #print(LV(Vold,Xl,dtemp,h,q,Y)$var) - Lnew<-Lold<-exp(10000) - lambda<-lambda_0 - err<-10 - count<-0 - count2<-0 - Lnew<-LV(Vold,Xl,dtemp,h,q,Y)$var - #print(Lnew) - if(best>Lnew){ - best<-Lnew - Vend<-Vold - #sig<-tmp3$sig - } - } - Vnew<-Vold<-Vend - - G<-matrix(rep(0,dim*k),dim,k) - while(err>tol&count sclack_para){#/(count+1)^(0.5) - lambda=lambda/2 - err<-10 - count2<-count2+1 - count<-count-1 - Vnew<-Vold #!!!!! - Lnew<-Lold - - } - Vold<-Vnew - count<-count+1 - - #print(count) - } - - ret<-list(Vnew,Lnew,count,h,count2) - names(ret)<-c('est_base','var','count','h','count2') - return(ret) -} -###################### -wls=function(x,y,w){ - n=dim(x)[1]; - p=dim(x)[2]-1 - out=c(solve(t(x*w)%*%x/n)%*%apply(x*y*w,2,mean)) - return(list(a=out[1],b=out[2:(p+1)])) -} -################# -kern=function(x,h){ - x=as.matrix(x); - n=dim(x)[1] - k2=x%*%t(x); - k1=t(matrix(diag(k2),n,n)); - k3=t(k1); - k=k1-2*k2+k3 - return(exp(-(1/(2*h^2))*(k1-2*k2+k3))) -} -############### -standvec=function(x) return((x-mean(x))/sd(x)) -############## -mave2=function(x,y,h,d,nit){ - sig=diag(var(x)); - n=dim(x)[1]; - p=dim(x)[2] - x=apply(x,2,standvec); - beta=opg(x,y,d);#beta=opg(x,y,h,d); - kermat=kern(x,h) - for(iit in 1:nit){ - b=numeric(); - a=numeric(); - for(i in 1:n){ - wi=kermat[,i]/(apply(kermat,2,mean)[i]) - ui=cbind(1,t(t(x)-x[i,])%*%beta) - out=wls(ui,y,wi); - a=c(a,out$a);b=cbind(b,out$b)} - out=0;out1=0; - for(i in 1:n){ - xi=kronecker(t(t(x)-x[i,]),t(b[,i])) - yi=y-a[i]; - wi=kermat[,i]/apply(kermat,2,mean)[i] - out=out+apply(xi*yi*wi,2,mean) - out1=out1+t(xi*wi)%*%xi/n} - beta=t(matrix(solve(out1)%*%out,d,p)) - } - return(diag(sig^(-1/2))%*%beta) -} -###################### -rmave=function(x,y,d,nit){ - sig=diag(var(x)); - n=dim(x)[1]; - p=dim(x)[2] - x=apply(x,2,standvec) - c0=2.34; - p0=max(p,3); - h=c0*n^(-(1/(p0+6))); - rn=n^(-1/(2*(p0+6))) - beta=opg(x,y,d) - for(iit in 1:nit){ - kermat=kern(x%*%beta,h); - mkermat=apply(kermat,2,mean) - b=numeric();a=numeric() - for(i in 1:n){ - wi=kermat[,i]/mkermat[i]; - ui=cbind(1,t(t(x)-x[i,])%*%beta) - out=wls(ui,y,wi); - a=c(a,out$a);b=cbind(b,out$b) - } - out=0; - out1=0 - for(i in 1:n) { - xi=kronecker(t(t(x)-x[i,]),t(b[,i])); - yi=y-a[i] - wi=kermat[,i]/mkermat[i] - out=out+apply(xi*yi*wi,2,mean) - out1=out1+t(xi*wi)%*%xi/n} - beta=t(matrix(solve(out1)%*%out,d,p)) - h=max(rn*h,c0*n^((-1/(d+4)))) - } - return(diag(sig^(-1/2))%*%beta) -} -######################### -ropg=function(x,y,d,nit){ - sig=diag(var(x)); - x=apply(x,2,standvec); - p=dim(x)[2]; - n=dim(x)[1] - c0=2.34; - p0=max(p,3); - rn=n^(-1/(2*(p0+6))); - h=c0*n^(-(1/(p0+6))) - beta=diag(p) - for(iit in 1:nit){ - kmat=kern(x%*%beta,h); - bmat=numeric() - for(i in 1:dim(x)[1]){ - wi=kmat[,i]; - xi=cbind(1,t(t(x)-x[i,])) - bmat=cbind(bmat,wls(xi,y,wi)$b) - } - beta=eigen(bmat%*%t(bmat))$vectors[,1:d] - h=max(rn*h,c0*n^((-1/(d+4)))) - } - - beta.final=diag(sig^(-1/2))%*%beta - return(beta.final) -} diff --git a/CVE_legacy/stiefel_weight_package.R b/CVE_legacy/stiefel_weight_package.R deleted file mode 100644 index 5209133..0000000 --- a/CVE_legacy/stiefel_weight_package.R +++ /dev/null @@ -1,222 +0,0 @@ -LV_weight_partial<-function(V,Xl,dtemp,h,q,Y,grad=T){ - N<-length(Y) - if(is.vector(V)){k<-1} - else{k<-length(V[1,])} - Xlv<-Xl%*%V - d<-dtemp-((Xlv^2)%*%rep(1,k)) - w<-exp(-0.5*(d/h)^2)#dnorm(d/h)/dnorm(0) - w<-matrix(w,N,q) - wn<-apply(w,2,sum)#new - w<-apply(w,2,column_normalize) - mY<-t(w)%*%Y - sig<-t(w)%*%(Y^2)-(mY)^2 - W<-diag(kronecker(t(wn),rep(1,N)))##new - if(grad==T){ - grad<-mat.or.vec(dim,k) - tmp1<-(kronecker(sig,rep(1,N))-(as.vector(kronecker(rep(1,q),Y))-kronecker(mY,rep(1,N)))^2) - if(k==1){ - grad_d<- -2*Xl*as.vector(Xlv) - grad<-(1/h^2)*(1/sum(wn))*t(grad_d*as.vector(d)*as.vector(w)*as.vector(W))%*%tmp1 #new - # wn_grad<-(-1/h^2)*t(grad_d*as.vector(d)*as.vector(w))%*%kronecker(diag(rep(1,q)),rep(1,N)) - # grad<- wn_grad%*%(sig-rep(var1[2],q))/(sum(wn))+grad - } - else{ - for (j in 1:(k)){ - grad_d<- -2*Xl*as.vector(Xlv[,j]) - grad[,j]<- (1/h^2)*(1/sum(wn))*t(grad_d*as.vector(d)*as.vector(w)*as.vector(W))%*%tmp1#new - # wn_grad<-(-1/h^2)*t(grad_d*as.vector(d)*as.vector(w))%*%kronecker(diag(rep(1,q)),rep(1,N)) - # grad[,j]<- wn_grad%*%(sig-rep(var1[2],q))/(sum(wn))+grad - } - } - ret<-list(t(wn)%*%sig/sum(wn),sig,grad)#new - names(ret)<-c('var','sig','grad') - } - else{ - ret<-list(t(wn)%*%sig/sum(wn),sig)#new - names(ret)<-c('var','sig') - } - - return(ret) -} -###### -LV_weight_full<-function(V,Xl,dtemp,h,q,Y,grad=T){ - N<-length(Y) - if(is.vector(V)){k<-1} - else{k<-length(V[1,])} - Xlv<-Xl%*%V - d<-dtemp-((Xlv^2)%*%rep(1,k)) - w<-exp(-0.5*(d/h)^2)#dnorm(d/h)/dnorm(0) - w<-matrix(w,N,q) - wn<-apply(w,2,sum)#new - w<-apply(w,2,column_normalize) - mY<-t(w)%*%Y - sig<-t(w)%*%(Y^2)-(mY)^2 - W<-(kronecker(t(wn),rep(1,N)))#new or ###diag(kronecker(t(wn),rep(1,N))) - var<-t(wn)%*%sig/sum(wn)#new - if(grad==T){ - grad<-mat.or.vec(dim,k) - tmp1<-(kronecker(sig,rep(1,N))-(as.vector(kronecker(rep(1,q),Y))-kronecker(mY,rep(1,N)))^2) - if(k==1){ - grad_d<- -2*Xl*as.vector(Xlv) - grad<-(1/h^2)*(1/sum(wn))*t(grad_d*as.vector(d)*as.vector(w)*as.vector(W))%*%tmp1#new - wn_grad<-(-1/h^2)*t(grad_d*as.vector(d)*as.vector(w))%*%kronecker(diag(rep(1,q)),rep(1,N))#new - grad<- wn_grad%*%(sig-rep(var,q))/(sum(wn))+grad#new - } - else{ - for (j in 1:(k)){ - grad_d<- -2*Xl*as.vector(Xlv[,j]) - grad[,j]<- (1/h^2)*(1/sum(wn))*t(grad_d*as.vector(d)*as.vector(w)*as.vector(W))%*%tmp1#new - wn_grad<-(-1/h^2)*t(grad_d*as.vector(d)*as.vector(w))%*%kronecker(diag(rep(1,q)),rep(1,N))#new - grad[,j]<- wn_grad%*%(sig-rep(var,q))/(sum(wn))+grad[,j]#new - } - } - ret<-list(var,sig,grad) - names(ret)<-c('var','sig','grad') - } - else{ - ret<-list(var,sig) - names(ret)<-c('var','sig') - } - - return(ret) -} -#### -stiefl_weight_partial_opt<-function(dat,h=NULL,k,k0=30,p=1,maxit=50,nObs=sqrt(length(dat[,1])),lambda_0=1,tol=10^(-3),sclack_para=0){ - Y<-dat[,1] - X<-dat[,-1] - N<-length(Y) - dim<-length(X[1,]) - if(p<1){ - S<-est_varmat(X) - tmp1<-q_ind(X,S,p) - q<-tmp1$q - ind<-tmp1$ind - } - else{ - q<-N - ind<-1:N - } - Xl<-(kronecker(rep(1,q),X)-kronecker(X[ind,],rep(1,N))) - dtemp<-apply(Xl,1,norm2) - if(is.null(h)){ - S<-est_varmat(X) - tr<-var_tr(S) - h<-choose_h_2(dim,k,N,nObs,tr) - } - best<-exp(10000) - Vend<-mat.or.vec(dim,k) - sig<-mat.or.vec(q,1) - for(u in 1:k0){ - Vnew<-Vold<-stiefl_startval(dim,k) - #print(Vold) - #print(LV(Vold,Xl,dtemp,h,q,Y)$var) - Lnew<-Lold<-exp(10000) - lambda<-lambda_0 - err<-10 - count<-0 - count2<-0 - while(err>tol&count sclack_para){#/(count+1)^(0.5) - lambda=lambda/2 - err<-10 - count2<-count2+1 - count<-count-1 - Vnew<-Vold #!!!!! - - } - Vold<-Vnew - count<-count+1 - #print(count) - } - if(best>Lnew){ - best<-Lnew - Vend<-Vnew - sig<-tmp3$sig - } - } - ret<-list(Vend,best,sig,count,h,count2) - names(ret)<-c('est_base','var','aov_dat','count','h','count2') - return(ret) -} -########### -stiefl_weight_full_opt<-function(dat,h=NULL,k,k0=30,p=1,maxit=50,nObs=sqrt(length(dat[,1])),lambda_0=1,tol=10^(-3),sclack_para=0){ - Y<-dat[,1] - X<-dat[,-1] - N<-length(Y) - dim<-length(X[1,]) - if(p<1){ - S<-est_varmat(X) - tmp1<-q_ind(X,S,p) - q<-tmp1$q - ind<-tmp1$ind - } - else{ - q<-N - ind<-1:N - } - Xl<-(kronecker(rep(1,q),X)-kronecker(X[ind,],rep(1,N))) - dtemp<-apply(Xl,1,norm2) - if(is.null(h)){ - S<-est_varmat(X) - tr<-var_tr(S) - h<-choose_h_2(dim,k,N,nObs,tr) - } - best<-exp(10000) - Vend<-mat.or.vec(dim,k) - sig<-mat.or.vec(q,1) - for(u in 1:k0){ - Vnew<-Vold<-stiefl_startval(dim,k) - #print(Vold) - #print(LV(Vold,Xl,dtemp,h,q,Y)$var) - Lnew<-Lold<-exp(10000) - lambda<-lambda_0 - err<-10 - count<-0 - count2<-0 - while(err>tol&count sclack_para){#/(count+1)^(0.5) - lambda=lambda/2 - err<-10 - count2<-count2+1 - count<-count-1 - Vnew<-Vold #!!!!! - - } - Vold<-Vnew - count<-count+1 - #print(count) - } - if(best>Lnew){ - best<-Lnew - Vend<-Vnew - sig<-tmp3$sig - } - } - ret<-list(Vend,best,sig,count,h,count2) - names(ret)<-c('est_base','var','aov_dat','count','h','count2') - return(ret) -} diff --git a/runtime_test.R b/runtime_test.R deleted file mode 100644 index c73d9a6..0000000 --- a/runtime_test.R +++ /dev/null @@ -1,128 +0,0 @@ -# Usage: -# ~$ Rscript runtime_test.R - -textplot <- function(...) { - text <- unlist(list(...)) - if (length(text) > 20) { - text <- c(text[1:17], - ' ...... (skipped, text too long) ......', - text[c(-1, 0) + length(text)]) - } - - plot(NA, xlim = c(0, 1), ylim = c(0, 1), - bty = 'n', xaxt = 'n', yaxt = 'n', xlab = '', ylab = '') - - for (i in seq_along(text)) { - text(0, 1 - (i / 20), - text[[i]], pos = 4) - } -} - -# library(CVEpureR) # load CVE's pure R implementation -library(CVE) # load CVE - -#' Writes log information to console. (to not get bored^^) -tell.user <- function(name, start, i, length) { - cat("\rRunning Test (", name, "):", - i, "/", length, - " - elapsed:", format(Sys.time() - start), "\033[K") -} -#' Computes "distance" of spanned subspaces. -#' @param B1 Semi-orthonormal basis matrix -#' @param B2 Semi-orthonormal basis matrix -#' @return Frobenius norm of subspace projection matrix diff. -subspace.dist <- function(B1, B2){ - P1 <- tcrossprod(B1, B1) - P2 <- tcrossprod(B2, B2) - return(norm(P1 - P2, type = 'F')) -} - -# Set random seed -set.seed(437) - -# Number of simulations -SIM.NR <- 50L -# maximal number of iterations in curvilinear search algorithm -MAXIT <- 50L -# number of arbitrary starting values for curvilinear optimization -ATTEMPTS <- 10L -# set names of datasets -ds.names <- paste0("M", seq(7)) -# Set used CVE method -methods <- c("simple", "weighted") # c("legacy", "simple", "linesearch", "sgd") - -# Setup error and time tracking variables -error <- matrix(NA, SIM.NR, length(methods) * length(ds.names)) -time <- matrix(NA, SIM.NR, ncol(error)) -colnames(error) <- kronecker(paste0(ds.names, '-'), methods, paste0) -colnames(time) <- colnames(error) - -# Create new log file and write CSV (actualy TSV) header. -# (do not overwrite existing logs) -log.nr <- length(list.files('tmp/', pattern = '.*\\.log')) -file <- file.path('tmp', paste0('test', log.nr, '.log')) -cat('dataset\tmethod\terror\ttime\n', sep = '', file = file) -# Open a new pdf device for plotting into (do not overwrite existing ones) -path <- paste0('test', log.nr, '.pdf') -pdf(file.path('tmp', path)) -cat('Plotting to file:', path, '\n') - -# only for telling user (to stdout) -count <- 0 -start <- Sys.time() -# Start simulation loop. -for (sim in 1:SIM.NR) { - # Repeat for each dataset. - for (name in ds.names) { - tell.user(name, start, (count <- count + 1), SIM.NR * length(ds.names)) - - # Create a new dataset - ds <- dataset(name) - # Prepare X, Y and combine to data matrix - Y <- ds$Y - X <- ds$X - data <- cbind(Y, X) - # get dimensions - k <- ncol(ds$B) - - for (method in methods) { - dr.time <- system.time( - dr <- cve.call(X, Y, - method = method, - k = k, - attempts = ATTEMPTS - ) - ) - dr$B <- coef(dr, k) - - key <- paste0(name, '-', method) - error[sim, key] <- subspace.dist(dr$B, ds$B) / sqrt(2 * k) - time[sim, key] <- dr.time["elapsed"] - - # Log results to file (mostly for long running simulations) - cat(paste0( - c(name, method, error[sim, key], time[sim, key]), - collapse = '\t' - ), '\n', - sep = '', file = file, append = TRUE - ) - } - } -} - -cat("\n\n## Time [sec] Summary:\n") -print(summary(time)) -cat("\n## Error Summary:\n") -print(summary(error)) - -boxplot(error, - main = paste0("Error (Nr of simulations ", SIM.NR, ")"), - ylab = "Error", - las = 2 -) -boxplot(time, - main = paste0("Time (Nr of simulations ", SIM.NR, ")"), - ylab = "Time [sec]", - las = 2 -) - diff --git a/test.R b/test.R deleted file mode 100644 index 364ac37..0000000 --- a/test.R +++ /dev/null @@ -1,120 +0,0 @@ -textplot <- function(...) { - text <- unlist(list(...)) - if (length(text) > 20) { - text <- c(text[1:17], - ' ...... (skipped, text too long) ......', - text[c(-1, 0) + length(text)]) - } - - plot(NA, xlim = c(0, 1), ylim = c(0, 1), - bty = 'n', xaxt = 'n', yaxt = 'n', xlab = '', ylab = '') - - for (i in seq_along(text)) { - text(0, 1 - (i / 20), - text[[i]], pos = 4) - } -} - -args <- commandArgs(TRUE) -if (length(args) > 0L) { - method <- args[1] -} else { - method <- "simple" -} -if (length(args) > 1L) { - momentum <- as.double(args[2]) -} else { - momentum <- 0.0 -} -seed <- 42 -max.iter <- 50L -attempts <- 25L - -library(CVE) -path <- paste0('~/Projects/CVE/tmp/logger_', method, '.C.pdf') - -# Define logger for `cve()` method. -logger <- function(attempt, iter, data) { - # Note the `<<-` assignement! - loss.history[iter + 1, attempt] <<- data$loss - error.history[iter + 1, attempt] <<- data$err - tau.history[iter + 1, attempt] <<- data$tau - # Compute true error by comparing to the true `B` - B.est <- null(data$V) # Function provided by CVE - P.est <- B.est %*% solve(t(B.est) %*% B.est) %*% t(B.est) - true.error <- norm(P - P.est, 'F') / sqrt(2 * k) - true.error.history[iter + 1, attempt] <<- true.error -} - -pdf(path, width = 8.27, height = 11.7) # width, height unit is inces -> A4 -layout(matrix(c(1, 1, - 2, 3, - 4, 5), nrow = 3, byrow = TRUE)) - -for (name in paste0("M", seq(7))) { - # Seed random number generator - set.seed(seed) - - # Create a dataset - ds <- dataset(name) - X <- ds$X - Y <- ds$Y - B <- ds$B # the true `B` - k <- ncol(ds$B) - # True projection matrix. - P <- B %*% solve(t(B) %*% B) %*% t(B) - - # Setup histories. - V_last <- NULL - loss.history <- matrix(NA, max.iter + 1, attempts) - error.history <- matrix(NA, max.iter + 1, attempts) - tau.history <- matrix(NA, max.iter + 1, attempts) - true.error.history <- matrix(NA, max.iter + 1, attempts) - - time <- system.time( - dr <- cve(Y ~ X, k = k, method = method, - momentum = momentum, - max.iter = max.iter, attempts = attempts, - logger = logger) - )["elapsed"] - - # Extract finaly selected values: - B.est <- coef(dr, k) - true.error <- norm(tcrossprod(B.est) - tcrossprod(B), 'F') / sqrt(2 * k) - loss <- dr$res[[as.character(k)]]$loss - - # Write metadata. - textplot( - paste0("Seed value: ", seed), - "", - paste0("Dataset Name: ", ds$name), - paste0("dim(X) = (", nrow(X), ", ", ncol(X), ")"), - paste0("dim(B) = (", nrow(B), ", ", ncol(B), ")"), - "", - paste0("CVE method: ", dr$method), - paste0("Max Iterations: ", max.iter), - paste0("Attempts: ", attempts), - paste0("Momentum: ", momentum), - "CVE call:", - paste0(" > ", format(dr$call)), - "", - paste0("True Error: ", round(true.error, 3)), - paste0("loss: ", round(loss, 3)), - paste0("time: ", round(time, 3), " s") - ) - # Plot history's - matplot(loss.history, type = 'l', log = 'y', xlab = 'i (iteration)', - main = paste('loss', name), - ylab = expression(L(V[i]))) - matplot(true.error.history, type = 'l', log = 'y', xlab = 'i (iteration)', - main = paste('true error', name), - ylab = expression(group('|', B*B^T - B[i]*B[i]^T, '|')[F] / sqrt(2*k))) - matplot(error.history, type = 'l', log = 'y', xlab = 'i (iteration)', - main = paste('error', name), - ylab = expression(group('|', V[i-1]*V[i-1]^T - V[i]*V[i]^T, '|')[F])) - matplot(tau.history, type = 'l', log = 'y', xlab = 'i (iteration)', - main = paste('learning rate', name), - ylab = expression(tau[i])) -} - -cat("Created plot:", path, "\n")