predict_dim_cv <- function(object) { # Get centered training data and dimensions X <- scale(object$X, center = TRUE, scale = FALSE) n <- nrow(object$X) # umber of training data samples Sigma <- (1 / n) * crossprod(X, X) eig <- eigen(Sigma) Sigma_root <- eig$vectors %*% tcrossprod(diag(sqrt(eig$values)), eig$vectors) X <- X %*% solve(Sigma_root) pred <- matrix(0, n, length(object$res)) colnames(pred) <- names(object$res) for (dr.k in object$res) { # get "name" of current dimension k <- as.character(dr.k$k) # Project dataset with current SDR basis X.proj <- X %*% dr.k$B for (i in 1:n) { model <- mda::mars(X.proj[-i, ], object$Y[-i]) pred[i, k] <- predict(model, X.proj[i, , drop = F]) } } MSE <- colMeans((pred - object$Y)^2) return(list( MSE = MSE, k = as.integer(names(which.min(MSE))) )) } # TODO: write doc predict_dim_elbow <- function(object) { # extract original data from object (cve result) X <- object$X Y <- object$Y # Get dimensions n <- nrow(X) p <- ncol(X) losses <- vector("double", length(object$res)) names(losses) <- names(object$res) # Compute per sample losses with alternative bandwidth for each dimension. for (dr.k in object$res) { # extract dimension specific estimates and dimensions. k <- dr.k$k V <- dr.k$V # estimate bandwidth according alternative formula. h <- estimate.bandwidth(X, k, sqrt(n), version = 2L) # Projected `X` XQ <- X %*% (diag(1, p) - tcrossprod(V)) # X (I - V V') # Compute distances d2 <- tcrossprod(XQ) # XQ XQ' d1 <- matrix(diag(d2), n, n) D <- d1 - 2 * d2 + t(d1) # Apply kernel # Note: CVE uses for d = ||Q(X_i - X_j)|| the kernel exp(-d^4 / (2 h^2)) K <- exp((-0.5 / h^2) * D^2) # sum columns colSumsK <- colSums(K) # compute weighted and square meighted reponses y1 <- (K %*% Y) / colSumsK y2 <- (K %*% Y^2) / colSumsK # total loss losses[[as.character(k)]] <- mean(y2 - y1^2) } return(list( losses = losses, k = as.integer(names(which.min(losses))) )) } predict_dim_wilcoxon <- function(object, p.value = 0.05) { # extract original data from object (cve result) X <- object$X Y <- object$Y # Get dimensions n <- nrow(X) p <- ncol(X) L <- matrix(NA, n, length(object$res)) colnames(L) <- names(object$res) # Compute per sample losses with alternative bandwidth for each dimension. for (dr.k in object$res) { # extract dimension specific estimates and dimensions. k <- dr.k$k V <- dr.k$V # estimate bandwidth according alternative formula. h <- estimate.bandwidth(X, k, sqrt(n), version = 2L) # Projected `X` XQ <- X %*% (diag(1, p) - tcrossprod(V)) # X (I - V V') # Compute distances d2 <- tcrossprod(XQ) # XQ XQ' d1 <- matrix(diag(d2), n, n) D <- d1 - 2 * d2 + t(d1) # Apply kernel # Note: CVE uses for d = ||Q(X_i - X_j)|| the kernel exp(-d^4 / (2 h^2)) K <- exp((-0.5 / h^2) * D^2) # sum columns colSumsK <- colSums(K) # compute weighted and square meighted reponses y1 <- (K %*% Y) / colSumsK y2 <- (K %*% Y^2) / colSumsK # element-wise L for dim. k L[, as.character(k)] <- y2 - y1^2 } for (ind in seq_len(length(object$res) - 1L)) { p.test <- wilcox.test(L[, ind], L[, ind + 1L], alternative = "less")$p.value if (p.test < p.value) { return(list( p.value = p.test, k = object$res[[ind]]$k )) } } return(list( p.value = NA, k = object$res[[length(object$res)]]$k )) } #' \code{"TODO: @Lukas"} #' #' @param object instance of class \code{cve} (result of \code{\link{cve}}, #' \code{\link{cve.call}}). #' @param method one of \code{"CV"}, \code{"elbow"} or \code{"wilcoxon"}. #' @param ... ignored. #' #' @return list with \code{"k"} the predicted dimension and method dependent #' informatoin. #' #' @section Method cv: #' TODO: \code{"TODO: @Lukas"}. #' #' @section Method elbow: #' TODO: \code{"TODO: @Lukas"}. #' #' @section Method wilcoxon: #' TODO: \code{"TODO: @Lukas"}. #' #' @examples #' # create B for simulation #' B <- rep(1, 5) / sqrt(5) #' #' set.seed(21) #' # creat predictor data x ~ N(0, I_p) #' x <- matrix(rnorm(500), 100) #' #' # simulate response variable #' # y = f(B'x) + err #' # with f(x1) = x1 and err ~ N(0, 0.25^2) #' y <- x %*% B + 0.25 * rnorm(100) #' #' # Calculate cve for unknown k between min.dim and max.dim. #' cve.obj.simple <- cve(y ~ x) #' #' predict_dim(cve.obj.simple) #' #' @export predict_dim <- function(object, ..., method = "CV") { # Check if there are dimensions to select. if (length(object$res) == 1L) { return(list( message = "Only one dim. estimated.", k = as.integer(names(object$res)) )) } # Determine method "fuzzy". methods <- c("cv", "elbow", "wilcoxon") names(methods) <- methods method <- methods[[tolower(method), exact = FALSE]] if (is.null(method)) { stop('Unable to determine method.') } if (method == "cv") { return(predict_dim_cv(object)) } else if (method == "elbow") { return(predict_dim_elbow(object)) } else if (method == "wilcoxon") { return(predict_dim_wilcoxon(object)) } else { stop("Unable to determine method.") } }