#' Euclidean vector norm (2-norm) #' #' @param x Numeric vector #' @returns Numeric norm2 <- function(x) { return(sum(x^2)) } #' Samples uniform from the Stiefel Manifold #' #' @param p row dim. #' @param q col dim. #' @returns `(p, q)` semi-orthogonal matrix rStiefl <- function(p, q) { return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q)))) } #' Matrix Trace #' #' @param M Square matrix #' @returns Trace \eqn{Tr(M)} Tr <- function(M) { return(sum(diag(M))) } #' Null space basis of given matrix `B` #' #' @param B `(p, q)` matrix #' @returns Semi-orthogonal `(p, p - q)` matrix `Q` spaning the null space of `B` null <- function(M) { tmp <- qr(M) set <- if(tmp$rank == 0L) seq_len(ncol(M)) else -seq_len(tmp$rank) return(qr.Q(tmp, complete = TRUE)[, set, drop = FALSE]) } #### #chooses bandwith h according to formula in paper #dim...dimension of X vector #k... row dim of V (dim times q matrix) corresponding to a basis of orthogonal complement of B in model # N...sample size #nObs... nObs in bandwith formula #tr...trace of sample covariance matrix of X estimateBandwidth<-function(X, k, nObs) { n <- nrow(X) p <- ncol(X) X_centered <- scale(X, center = TRUE, scale = FALSE) Sigma <- (1 / n) * t(X_centered) %*% X_centered quantil <- qchisq((nObs - 1) / (n - 1), k) return(2 * quantil * Tr(Sigma) / p) } ########### # evaluates L(V) and returns L_n(V),(L_tilde_n(V,X_i))_{i=1,..,n} and grad_V L_n(V) (p times k) # V... (dim times q) matrix # Xl... output of Xl_fun # dtemp...vector with pairwise distances |X_i - X_j| # q...output of q_ind function # Y... vector with N Y_i values # if grad=T, gradient of L(V) also returned LV <- function(V, Xl, dtemp, h, q, Y, grad = TRUE) { N <- length(Y) if (is.vector(V)) { k <- 1 } else { k <- length(V[1,]) } Xlv <- Xl %*% V d <- dtemp - ((Xlv^2) %*% rep(1, k)) w <- dnorm(d / h) / dnorm(0) w <- matrix(w, N, q) w <- apply(w, 2, function(x) { x / sum(x) }) y1 <- t(w) %*% Y y2 <- t(w) %*% (Y^2) sig <- y2 - y1^2 result <- list(var = mean(sig), sig = sig) if (grad == TRUE) { tmp1 <- (kronecker(sig, rep(1, N)) - (as.vector(kronecker(rep(1, q), Y)) - kronecker(y1, rep(1, N)))^2) if (k == 1) { grad_d <- -2 * Xl * as.vector(Xlv) grad <- (1 / h^2) * (1 / q) * t(grad_d * as.vector(d) * as.vector(w)) %*% tmp1 } else { grad <- matrix(0, nrow(V), ncol(V)) for (j in 1:k) { grad_d <- -2 * Xl * as.vector(Xlv[ ,j]) grad[ ,j] <- (1 / h^2) * (1 / q) * t(grad_d * as.vector(d) * as.vector(w)) %*% tmp1 } } result$grad = grad } return(result) } #### performs stiefle optimization of argmin_{V : V'V=I_k} L_n(V) #through curvilinear search with k0 starting values drawn uniformly on stiefel maniquefold #dat...(N times dim+1) matrix with first column corresponding to Y values, the other columns #consists of X data matrix, (i.e. dat=cbind(Y,X)) #h... bandwidth #k...row dimension of V that is calculated, corresponds to dimension of orthogonal complement of B #k0... number of arbitrary starting values #p...fraction of data points used as shifting point #maxIter... number of maximal iterations in curvilinear search #nObs.. nObs parameter for choosing bandwidth if no h is supplied #lambda_0...initial stepsize #tol...tolerance for stoping iterations #sclack_para...if relative improvment is worse than sclack_para the stepsize is reduced #output: #est_base...Vhat_k= argmin_V:V'V=I_k L_n(V) a (dim times k) matrix where dim is row-dimension of X data matrix #var...value of L_n(Vhat_k) #aov_dat... (L_tilde_n(Vhat_k,X_i))_{i=1,..,N} #count...number of iterations #h...bandwidth cve_legacy <- function( X, Y, k, nObs = sqrt(nrow(X)), tauInitial = 1.0, tol = 1e-3, slack = 0, maxIter = 50L, attempts = 10L ) { # get dimensions n <- nrow(X) p <- ncol(X) q <- p - k Xl <- kronecker(rep(1, n), X) - kronecker(X, rep(1, n)) Xd <- apply(Xl, 1, norm2) I_p <- diag(1, p) # estimate bandwidth h <- estimateBandwidth(X, k, nObs) Lbest <- Inf Vend <- mat.or.vec(p, q) for (. in 1:attempts) { Vnew <- Vold <- rStiefl(p, q) Lnew <- Lold <- exp(10000) tau <- tauInitial error <- Inf count <- 0 while (error > tol & count < maxIter) { tmp <- LV(Vold, Xl, Xd, h, n, Y) G <- tmp$grad Lold <- tmp$var W <- tau * (G %*% t(Vold) - Vold %*% t(G)) Vnew <- solve(I_p + W) %*% (I_p - W) %*% Vold Lnew <- LV(Vnew, Xl, Xd, h, n, Y, grad = FALSE)$var if ((Lnew - Lold) > slack * Lold) { tau = tau / 2 error <- Inf } else { error <- norm(Vold %*% t(Vold) - Vnew %*% t(Vnew), "F") / sqrt(2 * k) Vold <- Vnew } count <- count + 1 } if (Lbest > Lnew) { Lbest <- Lnew Vend <- Vnew } } return(list( loss = Lbest, V = Vend, B = null(Vend), h = h )) }