175 lines
5.0 KiB
R
175 lines
5.0 KiB
R
|
|
||
|
#' Euclidean vector norm (2-norm)
|
||
|
#'
|
||
|
#' @param x Numeric vector
|
||
|
#' @returns Numeric
|
||
|
norm2 <- function(x) { return(sum(x^2)) }
|
||
|
|
||
|
#' Samples uniform from the Stiefel Manifold
|
||
|
#'
|
||
|
#' @param p row dim.
|
||
|
#' @param q col dim.
|
||
|
#' @returns `(p, q)` semi-orthogonal matrix
|
||
|
rStiefl <- function(p, q) {
|
||
|
return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q))))
|
||
|
}
|
||
|
|
||
|
#' Matrix Trace
|
||
|
#'
|
||
|
#' @param M Square matrix
|
||
|
#' @returns Trace \eqn{Tr(M)}
|
||
|
Tr <- function(M) {
|
||
|
return(sum(diag(M)))
|
||
|
}
|
||
|
|
||
|
#' Null space basis of given matrix `B`
|
||
|
#'
|
||
|
#' @param B `(p, q)` matrix
|
||
|
#' @returns Semi-orthogonal `(p, p - q)` matrix `Q` spaning the null space of `B`
|
||
|
null <- function(M) {
|
||
|
tmp <- qr(M)
|
||
|
set <- if(tmp$rank == 0L) seq_len(ncol(M)) else -seq_len(tmp$rank)
|
||
|
return(qr.Q(tmp, complete = TRUE)[, set, drop = FALSE])
|
||
|
}
|
||
|
|
||
|
####
|
||
|
#chooses bandwith h according to formula in paper
|
||
|
#dim...dimension of X vector
|
||
|
#k... row dim of V (dim times q matrix) corresponding to a basis of orthogonal complement of B in model
|
||
|
# N...sample size
|
||
|
#nObs... nObs in bandwith formula
|
||
|
#tr...trace of sample covariance matrix of X
|
||
|
estimateBandwidth<-function(X, k, nObs) {
|
||
|
n <- nrow(X)
|
||
|
p <- ncol(X)
|
||
|
|
||
|
X_centered <- scale(X, center = TRUE, scale = FALSE)
|
||
|
Sigma <- (1 / n) * t(X_centered) %*% X_centered
|
||
|
|
||
|
quantil <- qchisq((nObs - 1) / (n - 1), k)
|
||
|
return(2 * quantil * Tr(Sigma) / p)
|
||
|
}
|
||
|
|
||
|
###########
|
||
|
# evaluates L(V) and returns L_n(V),(L_tilde_n(V,X_i))_{i=1,..,n} and grad_V L_n(V) (p times k)
|
||
|
# V... (dim times q) matrix
|
||
|
# Xl... output of Xl_fun
|
||
|
# dtemp...vector with pairwise distances |X_i - X_j|
|
||
|
# q...output of q_ind function
|
||
|
# Y... vector with N Y_i values
|
||
|
# if grad=T, gradient of L(V) also returned
|
||
|
LV <- function(V, Xl, dtemp, h, q, Y, grad = TRUE) {
|
||
|
N <- length(Y)
|
||
|
if (is.vector(V)) { k <- 1 }
|
||
|
else { k <- length(V[1,]) }
|
||
|
Xlv <- Xl %*% V
|
||
|
d <- dtemp - ((Xlv^2) %*% rep(1, k))
|
||
|
w <- dnorm(d / h) / dnorm(0)
|
||
|
w <- matrix(w, N, q)
|
||
|
w <- apply(w, 2, function(x) { x / sum(x) })
|
||
|
y1 <- t(w) %*% Y
|
||
|
y2 <- t(w) %*% (Y^2)
|
||
|
sig <- y2 - y1^2
|
||
|
|
||
|
result <- list(var = mean(sig), sig = sig)
|
||
|
if (grad == TRUE) {
|
||
|
tmp1 <- (kronecker(sig, rep(1, N)) - (as.vector(kronecker(rep(1, q), Y)) - kronecker(y1, rep(1, N)))^2)
|
||
|
if (k == 1) {
|
||
|
grad_d <- -2 * Xl * as.vector(Xlv)
|
||
|
grad <- (1 / h^2) * (1 / q) * t(grad_d * as.vector(d) * as.vector(w)) %*% tmp1
|
||
|
} else {
|
||
|
grad <- matrix(0, nrow(V), ncol(V))
|
||
|
for (j in 1:k) {
|
||
|
grad_d <- -2 * Xl * as.vector(Xlv[ ,j])
|
||
|
grad[ ,j] <- (1 / h^2) * (1 / q) * t(grad_d * as.vector(d) * as.vector(w)) %*% tmp1
|
||
|
}
|
||
|
}
|
||
|
result$grad = grad
|
||
|
}
|
||
|
|
||
|
return(result)
|
||
|
}
|
||
|
#### performs stiefle optimization of argmin_{V : V'V=I_k} L_n(V)
|
||
|
#through curvilinear search with k0 starting values drawn uniformly on stiefel maniquefold
|
||
|
#dat...(N times dim+1) matrix with first column corresponding to Y values, the other columns
|
||
|
#consists of X data matrix, (i.e. dat=cbind(Y,X))
|
||
|
#h... bandwidth
|
||
|
#k...row dimension of V that is calculated, corresponds to dimension of orthogonal complement of B
|
||
|
#k0... number of arbitrary starting values
|
||
|
#p...fraction of data points used as shifting point
|
||
|
#maxIter... number of maximal iterations in curvilinear search
|
||
|
#nObs.. nObs parameter for choosing bandwidth if no h is supplied
|
||
|
#lambda_0...initial stepsize
|
||
|
#tol...tolerance for stoping iterations
|
||
|
#sclack_para...if relative improvment is worse than sclack_para the stepsize is reduced
|
||
|
#output:
|
||
|
#est_base...Vhat_k= argmin_V:V'V=I_k L_n(V) a (dim times k) matrix where dim is row-dimension of X data matrix
|
||
|
#var...value of L_n(Vhat_k)
|
||
|
#aov_dat... (L_tilde_n(Vhat_k,X_i))_{i=1,..,N}
|
||
|
#count...number of iterations
|
||
|
#h...bandwidth
|
||
|
cve_legacy <- function(
|
||
|
X, Y, k,
|
||
|
nObs = sqrt(nrow(X)),
|
||
|
tauInitial = 1.0,
|
||
|
tol = 1e-3,
|
||
|
slack = 0,
|
||
|
maxIter = 50L,
|
||
|
attempts = 10L
|
||
|
) {
|
||
|
# get dimensions
|
||
|
n <- nrow(X)
|
||
|
p <- ncol(X)
|
||
|
q <- p - k
|
||
|
|
||
|
Xl <- kronecker(rep(1, n), X) - kronecker(X, rep(1, n))
|
||
|
Xd <- apply(Xl, 1, norm2)
|
||
|
I_p <- diag(1, p)
|
||
|
# estimate bandwidth
|
||
|
h <- estimateBandwidth(X, k, nObs)
|
||
|
|
||
|
Lbest <- Inf
|
||
|
Vend <- mat.or.vec(p, q)
|
||
|
|
||
|
for (. in 1:attempts) {
|
||
|
Vnew <- Vold <- rStiefl(p, q)
|
||
|
Lnew <- Lold <- exp(10000)
|
||
|
tau <- tauInitial
|
||
|
|
||
|
error <- Inf
|
||
|
count <- 0
|
||
|
while (error > tol & count < maxIter) {
|
||
|
|
||
|
tmp <- LV(Vold, Xl, Xd, h, n, Y)
|
||
|
G <- tmp$grad
|
||
|
Lold <- tmp$var
|
||
|
|
||
|
W <- tau * (G %*% t(Vold) - Vold %*% t(G))
|
||
|
Vnew <- solve(I_p + W) %*% (I_p - W) %*% Vold
|
||
|
|
||
|
Lnew <- LV(Vnew, Xl, Xd, h, n, Y, grad = FALSE)$var
|
||
|
|
||
|
if ((Lnew - Lold) > slack * Lold) {
|
||
|
tau = tau / 2
|
||
|
error <- Inf
|
||
|
} else {
|
||
|
error <- norm(Vold %*% t(Vold) - Vnew %*% t(Vnew), "F") / sqrt(2 * k)
|
||
|
Vold <- Vnew
|
||
|
}
|
||
|
count <- count + 1
|
||
|
}
|
||
|
|
||
|
if (Lbest > Lnew) {
|
||
|
Lbest <- Lnew
|
||
|
Vend <- Vnew
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return(list(
|
||
|
loss = Lbest,
|
||
|
V = Vend,
|
||
|
B = null(Vend),
|
||
|
h = h
|
||
|
))
|
||
|
}
|