2
0
Fork 0
CVE/CVarE/R/estimateBandwidth.R

56 lines
2.0 KiB
R

#' Bandwidth estimation for CVE.
#'
#' If no bandwidth or function for calculating it is supplied, the CVE method
#' defaults to using the following formula (version 1)
#' \deqn{%
#' h = \frac{2 tr(\Sigma)}{p} (1.2 n^{\frac{-1}{4 + k}})^2}{%
#' h = (2 * tr(\Sigma) / p) * (1.2 * n^(-1 / (4 + k)))^2}
#' Alternative version 2 is used for dimension prediction which is given by
#' \deqn{%
#' h = \frac{2 tr(\Sigma)}{p} \chi_k^{-1}(\frac{nObs - 1}{n - 1})}{%
#' h = (2 * tr(\Sigma) / p) * \chi_k^-1((nObs - 1) / (n - 1))}
#' with \eqn{n} the sample size, \eqn{p} the dimension of \eqn{X} and
#' \eqn{\Sigma} is \eqn{(n - 1) / n} times the sample covariance matrix of
#' \eqn{X}.
#'
#' @param X the \eqn{n\times p}{n x p} matrix of predictor values.
#' @param k the SDR dimension.
#' @param nObs number of points in a slice, only for version 2.
#' @param version either \code{1} or \code{2}.
#'
#' @return Estimated bandwidth \code{h}.
#'
#' @examples
#' # set dimensions for simulation model
#' p <- 5; k <- 1
#' # create B for simulation
#' B <- rep(1, p) / sqrt(p)
#' # samplsize
#' n <- 100
#' set.seed(21)
#' #creat predictor data x ~ N(0, I_p)
#' x <- matrix(rnorm(n * p), n, p)
#' # simulate response variable
#' # y = f(B'x) + err
#' # with f(x1) = x1 and err ~ N(0, 0.25^2)
#' y <- x %*% B + 0.25 * rnorm(100)
#' # calculate cve with method 'simple' for k = 1
#' set.seed(21)
#' cve.obj.simple <- cve(y ~ x, k = k)
#' print(estimate.bandwidth(x, k = k))
#' @export
estimate.bandwidth <- function (X, k, nObs, version = 1L) {
n <- nrow(X)
p <- ncol(X)
if (version == 1) {
X_centered <- scale(X, center = TRUE, scale = FALSE)
Sigma <- crossprod(X_centered, X_centered)/n
return((2 * sum(diag(Sigma))/p) * (1.2 * n^(-1/(4 + k)))^2)
} else if (version == 2) {
X_c <- scale(X, center = TRUE, scale = FALSE)
return(2 * qchisq((nObs - 1) / (n - 1), k) * sum(X_c^2) / (n * p))
} else {
stop("Unknown version.")
}
}