predict_dim_cv <- function(object) { # Get centered training data and dimensions X <- scale(object$X, center = TRUE, scale = FALSE) n <- nrow(object$X) # umber of training data samples Sigma <- (1 / n) * crossprod(X, X) eig <- eigen(Sigma) Sigma_root <- eig$vectors %*% tcrossprod(diag(sqrt(eig$values)), eig$vectors) X <- X %*% solve(Sigma_root) pred <- matrix(0, n, length(object$res)) colnames(pred) <- names(object$res) for (dr.k in object$res) { # get "name" of current dimension k <- as.character(dr.k$k) # Project dataset with current SDR basis X.proj <- X %*% dr.k$B for (i in 1:n) { model <- mda::mars(X.proj[-i, ], object$Y[-i]) pred[i, k] <- predict(model, X.proj[i, , drop = F]) } } MSE <- colMeans((pred - object$Y)^2) return(list( MSE = MSE, k = as.integer(names(which.min(MSE))) )) } # TODO: write doc predict_dim_elbow <- function(object) { # extract original data from object (cve result) X <- object$X Y <- object$Y # Get dimensions n <- nrow(X) p <- ncol(X) # Compute persistent data. i = rep(1:n, n) j = rep(1:n, each = n) D.eucl = matrix((X[i, ] - X[j, ])^2 %*% rep(1, p), n) losses <- vector("double", length(object$res)) names(losses) <- names(object$res) # Compute per sample losses with alternative bandwidth for each dimension. for (dr.k in object$res) { # extract dimension specific estimates and dimensions. k <- dr.k$k V <- dr.k$V q <- ncol(V) # estimate bandwidth according alternative formula (see: TODO: see) h <- estimate.bandwidth(X, k, sqrt(n), version = 2L) # Projected `X` XV <- X %*% V # Devectorized distance matrix # (inefficient in R but fast in C) D <- matrix((XV[i, , drop = F] - XV[j, , drop = F])^2 %*% rep(1, q), n) D <- D.eucl - D # Apply kernel K <- exp((-0.5 / h^2) * D^2) # sum columns colSumsK <- colSums(K) # compute weighted and square meighted reponses y1 <- (K %*% Y) / colSumsK y2 <- (K %*% Y^2) / colSumsK # total loss losses[[as.character(k)]] <- mean(y2 - y1^2) } return(list( losses = losses, k = as.integer(names(which.min(losses))) )) } predict_dim_wilcoxon <- function(object, p.value = 0.05) { # extract original data from object (cve result) X <- object$X Y <- object$Y # Get dimensions n <- nrow(X) p <- ncol(X) # Compute persistent data. i = rep(1:n, n) j = rep(1:n, each = n) D.eucl = matrix((X[i, ] - X[j, ])^2 %*% rep(1, p), n) L <- matrix(NA, n, length(object$res)) colnames(L) <- names(object$res) # Compute per sample losses with alternative bandwidth for each dimension. for (dr.k in object$res) { # extract dimension specific estimates and dimensions. k <- dr.k$k V <- dr.k$V q <- ncol(V) # estimate bandwidth according alternative formula (see: TODO: see) h <- estimate.bandwidth(X, k, sqrt(n), version = 2L) # Projected `X` XV <- X %*% V # Devectorized distance matrix # (inefficient in R but fast in C) D <- matrix((XV[i, , drop = F] - XV[j, , drop = F])^2 %*% rep(1, q), n) D <- D.eucl - D # Apply kernel K <- exp((-0.5 / h^2) * D^2) # sum columns colSumsK <- colSums(K) # compute weighted and square meighted reponses y1 <- (K %*% Y) / colSumsK y2 <- (K %*% Y^2) / colSumsK # element-wise L for dim. k L[, as.character(k)] <- y2 - y1^2 } for (ind in seq_len(length(object$res) - 1L)) { p.test <- wilcox.test(L[, ind], L[, ind + 1L], alternative = "less")$p.value if (p.test < p.value) { return(list( p.value = p.test, k = object$res[[ind]]$k )) } } return(list( p.value = NA, k = object$res[[length(object$res)]]$k )) } #' Predicts SDR dimension using \code{\link[mda]{mars}} via a Cross-Validation. #' TODO: rewrite!!! #' #' @param object instance of class \code{cve} (result of \code{cve}, #' \code{cve.call}). #' @param ... ignored. #' #' @return list with #' \itemize{ #' \item MSE: Mean Square Error, #' \item k: predicted dimensions. #' } #' #' @section cv: #' Cross-validation ... TODO: #' #' @section elbow: #' Cross-validation ... TODO: #' #' @section wilcoxon: #' Cross-validation ... TODO: #' #' @examples #' # create B for simulation #' B <- rep(1, 5) / sqrt(5) #' #' set.seed(21) #' # creat predictor data x ~ N(0, I_p) #' x <- matrix(rnorm(500), 100) #' #' # simulate response variable #' # y = f(B'x) + err #' # with f(x1) = x1 and err ~ N(0, 0.25^2) #' y <- x %*% B + 0.25 * rnorm(100) #' #' # Calculate cve for unknown k between min.dim and max.dim. #' cve.obj.simple <- cve(y ~ x) #' #' predict_dim(cve.obj.simple) #' #' @export predict_dim <- function(object, ..., method = "CV") { # Check if there are dimensions to select. if (length(object$res) == 1L) { return(list( message = "Only one dim. estimated.", k = as.integer(names(object$res)) )) } # Determine method "fuzzy". methods <- c("cv", "elbow", "wilcoxon") names(methods) <- methods method <- methods[[tolower(method), exact = FALSE]] if (is.null(method)) { stop('Unable to determine method.') } if (method == "cv") { return(predict_dim_cv(object)) } else if (method == "elbow") { return(predict_dim_elbow(object)) } else if (method == "wilcoxon") { return(predict_dim_wilcoxon(object)) } else { stop("Unable to determine method.") } }