383 lines
11 KiB
R
Executable File
383 lines
11 KiB
R
Executable File
library("Matrix")
|
|
# library("fields")
|
|
# library('dr')
|
|
# library('mvtnorm')
|
|
# library(mgcv)
|
|
# library(pracma)
|
|
# library(RVAideMemoire)
|
|
library("MAVE")
|
|
# library(pls)
|
|
# library(LaplacesDemon)
|
|
# library(earth)
|
|
# library("latex2exp")
|
|
|
|
#################################################
|
|
|
|
##############################
|
|
|
|
################################
|
|
|
|
|
|
fsquare<-function(x){
|
|
ret<-0
|
|
for (h in 1:length(x)){ret<-ret+x[h]^2}
|
|
return(ret)
|
|
}
|
|
f_id<-function(x){return(x[1])}
|
|
vfun<-function(x)return(c(cos(x),sin(x)))
|
|
vfun_grad<-function(x)return(c(-sin(x),cos(x)))
|
|
f_true<-function(x,sigma){return(sigma^2 + cos(x)^2)}
|
|
###############################
|
|
##creat sample for M2, b is B matrix of model, N..sample size,
|
|
#f...link function, sigma..standard deviation of error term
|
|
#produces sample with X standard normal with dimension to columnlength of b an
|
|
# epsilon centered normal with standard deviation sigma
|
|
creat_sample<-function(b,N,f,sigma){
|
|
if (is.vector(b)==1){dim<-length(b)}
|
|
if (is.vector(b)==0){dim<-length(b[,1])}
|
|
|
|
X<-mat.or.vec(N,dim)
|
|
Y<-mat.or.vec(N,1)
|
|
for (i in 1:N){
|
|
X[i,]<-rnorm(dim,0,1)
|
|
Y[i]<-f(t(b)%*%X[i,])+rnorm(1,0,sigma)
|
|
}
|
|
dat<-cbind(Y,X)
|
|
return(dat)
|
|
}
|
|
#####
|
|
creat_b<-function(dim,n){
|
|
temp<-mat.or.vec(dim,n)
|
|
|
|
if (n>1){
|
|
while (rankMatrix(temp,method = 'qr')< min(dim,n)){
|
|
for (i in 1:n){
|
|
temp[,i]<-rnorm(dim,0,1)
|
|
temp[,i]<-temp[,i]/sqrt(sum(temp[,i]^2))
|
|
}
|
|
}
|
|
}
|
|
if (n==1) {temp<-rnorm(dim,0,1)
|
|
temp<-temp/sqrt(sum(temp^2))}
|
|
return(temp)
|
|
}
|
|
###
|
|
|
|
### creat sample for M1,M3
|
|
## b is B matrix of model, N..sample size,
|
|
#f...link function, sigma..standard deviation of error term
|
|
#produces sample with X normal with Covariancematrix SS' with dimension to columnlength of b an
|
|
# epsilon centered normal with standard deviation sigma
|
|
creat_sample_nor_nonstand<-function(b,N,f,S,sigma){
|
|
#Var(x)= SS'
|
|
if (is.vector(b)==1){dim<-length(b)}
|
|
if (is.vector(b)==0){dim<-length(b[,1])}
|
|
|
|
X<-mat.or.vec(N,dim)
|
|
Y<-mat.or.vec(N,1)
|
|
for (i in 1:N){
|
|
X[i,]<-rnorm(dim,0,1)%*%t(S)
|
|
Y[i]<-f(t(b)%*%X[i,])+rnorm(1,0,sigma)
|
|
}
|
|
dat<-cbind(Y,X)
|
|
return(dat)
|
|
}
|
|
####
|
|
|
|
####
|
|
|
|
###
|
|
####
|
|
|
|
####
|
|
#### creat sample for M4
|
|
##creat sample for M2, b is B matrix of model, N..sample size,
|
|
#f...link function, sigma..standard deviation of error term
|
|
#produces sample with X Gausian mixture
|
|
#(i.e. X ~ (Zrep(-1,dim) + (1_Z)rep(1,dim))*dispers_para + N(0, I_p) and Z~B(pi)
|
|
#with dimension to columnlength of b an
|
|
# epsilon centered normal with standard deviation sigma
|
|
creat_sample_noneliptic_gausmixture<-function(b,N,f,sigma,pi,dispers_para=1){
|
|
if (is.vector(b)==1){dim<-length(b)}
|
|
if (is.vector(b)==0){dim<-length(b[,1])}
|
|
|
|
X<-mat.or.vec(N,dim)
|
|
Y<-mat.or.vec(N,1)
|
|
for (i in 1:N){
|
|
p<-rbinom(1,1,pi)
|
|
X[i,]<-(-rep(1,dim)*p+rep(1,dim)*(1-p))*dispers_para+rnorm(dim,0,1)
|
|
Y[i]<-f(t(b)%*%X[i,])+rnorm(1,0,sigma)
|
|
}
|
|
dat<-cbind(Y,X)
|
|
return(dat)
|
|
}
|
|
###
|
|
###############################
|
|
#estimates covariance matrix by ML estimate
|
|
# X...n times dim data matrix with rows corresponding to X_i
|
|
est_varmat<-function(X){
|
|
dim<-length(X[1,])
|
|
Sig<-mat.or.vec(dim,dim)#Cov(X)
|
|
X<-scale(X,center = T,scale = F)
|
|
Sig<-t(X)%*%X/length(X[,1])
|
|
return(Sig)
|
|
}
|
|
####
|
|
# normalizes a vector x such that it sums to 1
|
|
column_normalize<-function(x){return(x/sum(x))}
|
|
####
|
|
#squared 2-norm of a vector x
|
|
norm2<-function(x){return(sum(x^2))}
|
|
###
|
|
# fills up the matrix estb (dim times q) (dim > q) to a dim times dim orthonormal matrix
|
|
fill_base<-function(estb){
|
|
dim<-length(estb[,1])
|
|
k<-length(estb[1,])
|
|
P<-(diag(rep(1,dim))-estb%*%t(estb))
|
|
rk<-0
|
|
while (rk < dim){
|
|
tmp1<-P%*%creat_b(dim,1)
|
|
nor<-sqrt(sum(tmp1^2))
|
|
if(nor>10^(-4)){
|
|
estb<-cbind(tmp1/nor,estb)
|
|
rkn<-rankMatrix(estb,method = 'qr')
|
|
if (rkn > rk){P<-(diag(rep(1,dim))-estb%*%t(estb))
|
|
rk<-rkn}
|
|
}
|
|
if(nor <=10^(-4)){rk<-0}
|
|
|
|
}
|
|
|
|
return(estb)
|
|
}
|
|
##################
|
|
###new stiefel optimization
|
|
#draws from the uniform measure of Stiefel(p,k)
|
|
#returns a p times k dimensional semiorthogonal matrix
|
|
stiefl_startval<-function(p,k){return(qr.Q(qr(matrix(rnorm(p*k,0,1),p,k))))}
|
|
##### chooses shifting points
|
|
#auxiliary function for stielfel_opt
|
|
# X... N times dim data matrix
|
|
#S... sample covariance matrix
|
|
#0 < p <=1 fraction of data point used as shifting points
|
|
# if p=1 all data points used as shifting points
|
|
q_ind<-function(X,S,p){
|
|
dim<-length(X[1,])
|
|
N<-length(X[,1])
|
|
if (p < 1){
|
|
Sinv<-solve(S)
|
|
ind<-c()
|
|
bound<-qchisq(p,dim)
|
|
for (j in 1:N){if(t(X[j,])%*%Sinv%*%X[j,]<=bound){ind<-c(ind,j)}}#maybe demean
|
|
q<-length(ind)
|
|
if (q<=2){
|
|
q<-N
|
|
ind<-seq(1,N)}
|
|
}
|
|
else {
|
|
q<-N
|
|
ind<-seq(1,N)
|
|
}
|
|
ret<-list(q,ind)
|
|
names(ret)<-c('q','ind')
|
|
return(ret)
|
|
}
|
|
####
|
|
#calculates the trace of a quadratic matrix S
|
|
var_tr<-function(x){return(sum(diag(x)))}
|
|
####
|
|
#chooses bandwith h according to formula in paper
|
|
#dim...dimension of X vector
|
|
#k... row dim of V (dim times q matrix) corresponding to a basis of orthogonal complement of B in model
|
|
# N...sample size
|
|
#nObs... nObs in bandwith formula
|
|
#tr...trace of sample covariance matrix of X
|
|
choose_h_2<-function(dim,k,N,nObs,tr){
|
|
#h1<-qchisq(nObs/N,2*(dim-k))
|
|
h2<-qchisq((nObs-1)/(N-1),(dim-k))
|
|
return(h2*2*tr/dim)
|
|
}
|
|
####
|
|
#auxiliary function for stiefel_opt
|
|
#X... data matrix of random variable X
|
|
#q,ind... output of function q_ind
|
|
Xl_fun<-function(X,q,ind){
|
|
N<-length(X[,1])
|
|
Xl<-(kronecker(rep(1,q),X)-kronecker(X[ind,],rep(1,N)))
|
|
return(Xl)
|
|
}
|
|
###########
|
|
# evaluates L(V) and returns L_n(V),(L_tilde_n(V,X_i))_{i=1,..,n} and grad_V L_n(V) (p times k)
|
|
# V... (dim times q) matrix
|
|
# Xl... output of Xl_fun
|
|
# dtemp...vector with pairwise distances |X_i - X_j|
|
|
# q...output of q_ind function
|
|
# Y... vector with N Y_i values
|
|
# if grad=T, gradient of L(V) also returned
|
|
LV<-function(V,Xl,dtemp,h,q,Y,grad=T){
|
|
N<-length(Y)
|
|
if(is.vector(V)){k<-1}
|
|
else{k<-length(V[1,])}
|
|
Xlv<-Xl%*%V
|
|
d<-dtemp-((Xlv^2)%*%rep(1,k))
|
|
w<-dnorm(d/h)/dnorm(0)
|
|
w<-matrix(w,N,q)
|
|
w<-apply(w,2,column_normalize)
|
|
mY<-t(w)%*%Y
|
|
sig<-t(w)%*%(Y^2)-(mY)^2 # \tilde{L}_n(V, s_0) mit s_0 coresponds to q_ind
|
|
if(grad==T){
|
|
grad<-mat.or.vec(dim,k)
|
|
tmp1<-(kronecker(sig,rep(1,N))-(as.vector(kronecker(rep(1,q),Y))-kronecker(mY,rep(1,N)))^2)
|
|
if(k==1){
|
|
grad_d<- -2*Xl*as.vector(Xlv)
|
|
grad<-(1/h^2)*(1/q)*t(grad_d*as.vector(d)*as.vector(w))%*%tmp1
|
|
}
|
|
else{
|
|
for (j in 1:(k)){
|
|
grad_d<- -2*Xl*as.vector(Xlv[,j])
|
|
grad[,j]<- (1/h^2)*(1/q)*t(grad_d*as.vector(d)*as.vector(w))%*%tmp1
|
|
}
|
|
}
|
|
ret<-list(mean(sig),sig,grad)
|
|
names(ret)<-c('var','sig','grad')
|
|
}
|
|
else{
|
|
ret<-list(mean(sig),sig)
|
|
names(ret)<-c('var','sig')
|
|
}
|
|
|
|
return(ret)
|
|
}
|
|
#### performs stiefle optimization of argmin_{V : V'V=I_k} L_n(V)
|
|
#through curvilinear search with k0 starting values drawn uniformly on stiefel maniquefold
|
|
#dat...(N times dim+1) matrix with first column corresponding to Y values, the other columns
|
|
#consists of X data matrix, (i.e. dat=cbind(Y,X))
|
|
#h... bandwidth
|
|
#k...row dimension of V that is calculated, corresponds to dimension of orthogonal complement of B
|
|
#k0... number of arbitrary starting values
|
|
#p...fraction of data points used as shifting point
|
|
#maxit... number of maximal iterations in curvilinear search
|
|
#nObs.. nObs parameter for choosing bandwidth if no h is supplied
|
|
#lambda_0...initial stepsize
|
|
#tol...tolerance for stoping iterations
|
|
#sclack_para...if relative improvment is worse than sclack_para the stepsize is reduced
|
|
#output:
|
|
#est_base...Vhat_k= argmin_V:V'V=I_k L_n(V) a (dim times k) matrix where dim is row-dimension of X data matrix
|
|
#var...value of L_n(Vhat_k)
|
|
#aov_dat... (L_tilde_n(Vhat_k,X_i))_{i=1,..,N}
|
|
#count...number of iterations
|
|
#h...bandwidth
|
|
#count2...number of times sclack_para is active
|
|
stiefl_opt<-function(dat,h=NULL,k,k0=30,p=1,maxit=50,nObs=sqrt(length(dat[,1])),lambda_0=1,tol=10^(-3),sclack_para=0){
|
|
|
|
history <- matrix(NA, maxit, k0)
|
|
|
|
Y<-dat[,1]
|
|
X<-dat[,-1]
|
|
N<-length(Y)
|
|
dim<-length(X[1,])
|
|
S<-est_varmat(X)
|
|
if(p<1){
|
|
|
|
tmp1<-q_ind(X,S,p)
|
|
q<-tmp1$q
|
|
ind<-tmp1$ind
|
|
}
|
|
else{
|
|
q<-N
|
|
ind<-1:N
|
|
}
|
|
Xl<-(kronecker(rep(1,q),X)-kronecker(X[ind,],rep(1,N)))
|
|
dtemp<-apply(Xl,1,norm2)
|
|
tr<-var_tr(S)
|
|
if(is.null(h)){h<-choose_h(dim,k,N,nObs,tr)}
|
|
best<-exp(10000)
|
|
Vend<-mat.or.vec(dim,k)
|
|
sig<-mat.or.vec(q,1)
|
|
for(u in 1:k0){
|
|
Vnew<-Vold<-stiefl_startval(dim,k)
|
|
#print(Vold)
|
|
#print(LV(Vold,Xl,dtemp,h,q,Y)$var)
|
|
Lnew<-Lold<-exp(10000)
|
|
lambda<-lambda_0
|
|
err<-10
|
|
count<-0
|
|
count2<-0
|
|
while(err>tol&count<maxit){
|
|
#print(Vold)
|
|
tmp2<-LV(Vold,Xl,dtemp,h,q,Y)
|
|
G<-tmp2$grad
|
|
Lold<-tmp2$var
|
|
W<-G%*%t(Vold)-Vold%*%t(G)
|
|
stepsize<-lambda#/(2*sqrt(count+1))
|
|
Vnew<-solve(diag(1,dim)+stepsize*W)%*%(diag(1,dim)-stepsize*W)%*%Vold
|
|
# print(Vnew)
|
|
tmp3<-LV(Vnew,Xl,dtemp,h,q,Y,grad=F)
|
|
Lnew<-tmp3$var
|
|
err<-sqrt(sum((Vold%*%t(Vold)-Vnew%*%t(Vnew))^2))/sqrt(2*k)#sqrt(sum(tmp3$grad^2))/(dim*k)#
|
|
#print(err)
|
|
if(((Lnew-Lold)/Lold) > sclack_para){#/(count+1)^(0.5)
|
|
lambda=lambda/2
|
|
err<-10
|
|
count2<-count2+1
|
|
count<-count-1
|
|
Vnew<-Vold #!!!!!
|
|
|
|
}
|
|
Vold<-Vnew
|
|
count<-count+1
|
|
#print(count)
|
|
|
|
# Write to history
|
|
history[count, u] <- Lnew
|
|
|
|
}
|
|
if(best>Lnew){
|
|
best<-Lnew
|
|
Vend<-Vnew
|
|
sig<-tmp3$sig
|
|
}
|
|
|
|
}
|
|
ret<-list(Vend,best,sig,count,h,count2, history)
|
|
names(ret)<-c('est_base','var','aov_dat','count','h','count2', 'history')
|
|
return(ret)
|
|
}
|
|
##### calculates distance between two subspaces
|
|
#i.e. |b1%*%(b1%*%b1')^{-1}%*%b1' - b2%*%(b2%*%b2')^{-1}%*%b2'|_2
|
|
subspace_dist<-function(b1,b2){
|
|
#b1<-orth(b1)
|
|
#b2<-orth(b2)
|
|
P1<-b1%*%solve(t(b1)%*%b1)%*%t(b1)
|
|
P2<-b2%*%solve(t(b2)%*%b2)%*%t(b2)
|
|
return(sqrt(sum((P1-P2)^2)))
|
|
}
|
|
####performs local linaer regression
|
|
#x...datapoint at where estimated function should be evaluated
|
|
#h...bandwidth
|
|
#dat..(N times dim+1) matrix with first column corresponding to Y values, the other columns
|
|
#consists of X data matrix, (i.e. dat=cbind(Y,X))
|
|
#beta...projection matrix
|
|
local_linear<-function(x,h,dat,beta){
|
|
Y<-dat[,1]
|
|
X<-dat[,-1]
|
|
N<-length(Y)
|
|
X<-X%*%beta
|
|
x<-x%*%beta#beta%*%x
|
|
D_mat<-cbind(rep(1,N),X)
|
|
if (is.vector(X)){
|
|
dim<-1
|
|
d<-abs(X-rep(x,N))
|
|
}
|
|
else{
|
|
dim<-length(X[1,])
|
|
d<-sqrt(apply(X-t(matrix(rep(x,N),dim,N)),1,norm2))
|
|
}
|
|
K<-diag(dnorm(d/h)/dnorm(0))
|
|
pred<-c(1,x)%*%solve(t(D_mat)%*%K%*%D_mat)%*%t(D_mat)%*%K%*%Y
|
|
return(pred)
|
|
}
|
|
|
|
|
|
|