From 875982a010d42d8f013546266017ef92b5447b83 Mon Sep 17 00:00:00 2001 From: daniel Date: Fri, 22 Nov 2019 09:32:14 +0100 Subject: [PATCH] fix: R CMD check errors and warnings, change: extracted util functions into seperate files, add: C's sum funciton --- CVE_C/DESCRIPTION | 1 + CVE_C/NAMESPACE | 6 +- CVE_C/R/CVE.R | 235 ++++------------------------------- CVE_C/R/coef.R | 33 +++++ CVE_C/R/datasets.R | 9 +- CVE_C/R/directions.R | 19 +++ CVE_C/R/plot.R | 28 +++++ CVE_C/R/predict.R | 36 ++++++ CVE_C/R/predict_dim.R | 45 +++++++ CVE_C/R/summary.R | 32 +++++ CVE_C/man/basis.cve.Rd | 21 ---- CVE_C/man/coef.cve.Rd | 29 +++++ CVE_C/man/cve.Rd | 39 +++--- CVE_C/man/dataset.Rd | 2 + CVE_C/man/directions.cve.Rd | 2 +- CVE_C/man/plot.cve.Rd | 2 +- CVE_C/man/predict.cve.Rd | 12 +- CVE_C/man/predict.dim.cve.Rd | 20 +++ CVE_C/man/summary.cve.Rd | 4 +- CVE_C/src/Makevars | 4 + CVE_C/src/cve.c | 35 +++--- CVE_C/src/cve.h | 4 +- CVE_C/src/export.c | 4 +- CVE_C/src/init.c | 2 +- CVE_C/src/matrix.c | 17 +++ LICENSE | 1 - README.md | 100 ++++++++------- test.R | 14 +-- 28 files changed, 412 insertions(+), 344 deletions(-) create mode 100644 CVE_C/R/coef.R create mode 100644 CVE_C/R/directions.R create mode 100644 CVE_C/R/plot.R create mode 100644 CVE_C/R/predict.R create mode 100644 CVE_C/R/predict_dim.R create mode 100644 CVE_C/R/summary.R delete mode 100644 CVE_C/man/basis.cve.Rd create mode 100644 CVE_C/man/coef.cve.Rd create mode 100644 CVE_C/man/predict.dim.cve.Rd delete mode 100644 LICENSE diff --git a/CVE_C/DESCRIPTION b/CVE_C/DESCRIPTION index fb51946..04f5c23 100644 --- a/CVE_C/DESCRIPTION +++ b/CVE_C/DESCRIPTION @@ -8,4 +8,5 @@ Maintainer: Daniel Kapla Description: Implementation of the Conditional Variance Estimation (CVE) method. License: GPL-3 Encoding: UTF-8 +Imports: stats,graphics,mda RoxygenNote: 6.1.1 diff --git a/CVE_C/NAMESPACE b/CVE_C/NAMESPACE index ee465c9..5c1209e 100644 --- a/CVE_C/NAMESPACE +++ b/CVE_C/NAMESPACE @@ -1,12 +1,11 @@ # Generated by roxygen2: do not edit by hand -S3method(basis,cve) +S3method(coef,cve) S3method(directions,cve) S3method(plot,cve) S3method(predict,cve) S3method(predict.dim,cve) S3method(summary,cve) -export(basis) export(cve) export(cve.call) export(dataset) @@ -14,16 +13,17 @@ export(directions) export(elem.pairs) export(estimate.bandwidth) export(null) -export(predict.dim) export(projTangentStiefel) export(rStiefel) export(retractStiefel) export(skew) export(sym) import(stats) +importFrom(graphics,boxplot) importFrom(graphics,lines) importFrom(graphics,plot) importFrom(graphics,points) +importFrom(mda,mars) importFrom(stats,model.frame) importFrom(stats,rbinom) importFrom(stats,rnorm) diff --git a/CVE_C/R/CVE.R b/CVE_C/R/CVE.R index b8c7bb1..81feb0b 100644 --- a/CVE_C/R/CVE.R +++ b/CVE_C/R/CVE.R @@ -56,26 +56,21 @@ #' \item method: Name of used method, #' \item call: The method call #' } -#' as well as indexed entries \code{dr[[k]]} storing the k-dimensional SDR +#' as well as indexed entries \code{dr$res[[k]]} storing the k-dimensional SDR #' projection matrices. #' #' @examples -#' library(CVE) -#' #' # create dataset -#' n <- 200 -#' p <- 12 -#' X <- matrix(rnorm(n * p), n, p) -#' B <- cbind(c(1, rep(0, p - 1)), c(0, 1, rep(0, p - 2))) -#' Y <- X %*% B -#' Y <- Y[, 1L]^2 + Y[, 2L]^2 + rnorm(n, 0, 0.3) +#' x <- matrix(rnorm(400), 100, 4) +#' y <- x[, 1] + x[, 2] + as.matrix(rnorm(100)) #' -#' # Call the CVE method. -#' dr <- cve(Y ~ X) -#' (B <- basis(dr, 2)) +#' # Call CVE using momentum. +#' dr.momentum <- cve(y ~ x, momentum = 0.2) +#' # Call weighted CVE. +#' dr.weighted <- cve(y ~ x, method = "weighted") #' #' @seealso For a detailed description of \code{formula} see -#' [\code{\link{formula}}]. +#' \code{\link{formula}}. #' @export cve <- function(formula, data, method = "simple", max.dim = 10L, ...) { # check for type of `data` if supplied and set default @@ -108,10 +103,19 @@ cve <- function(formula, data, method = "simple", max.dim = 10L, ...) { #' @param max.dim upper bounds for \code{k}, (ignored if \code{k} is supplied). #' @param tau Initial step-size. #' @param tol Tolerance for break condition. -#' @param epochs maximum number of optimization steps. +#' @param max.iter maximum number of optimization steps. #' @param attempts number of arbitrary different starting points. #' @param logger a logger function (only for advanced user, significantly slows #' down the computation). +#' @param h bandwidth or function to estimate bandwidth, defaults to internaly +#' estimated bandwidth. +#' @param momentum number of [0, 1) giving the ration of momentum for eucledian +#' gradient update with a momentum term. +#' @param slack Positive scaling to allow small increases of the loss while +#' optimizing. +#' @param gamma step-size reduction multiple. +#' @param V.init Semi-orthogonal matrix of dimensions `(ncol(X), ncol(X) - k)` #' as optimization starting value. (If supplied, \code{attempts} is +#' set to 1 and \code{k} to match dimension) #' #' @return dr is a list which contains: #' \itemize{ @@ -132,7 +136,7 @@ cve.call <- function(X, Y, method = "simple", momentum = 0.0, tau = 1.0, tol = 1e-3, slack = 0.0, gamma = 0.5, V.init = NULL, - epochs = 50L, attempts = 10L, + max.iter = 50L, attempts = 10L, logger = NULL) { # get method bitmask methods <- list( @@ -230,13 +234,13 @@ cve.call <- function(X, Y, method = "simple", gamma <- as.double(gamma) } - if (!is.numeric(epochs) || length(epochs) > 1L) { - stop("Parameter 'epochs' must be positive integer.") - } else if (!is.integer(epochs)) { - epochs <- as.integer(epochs) + if (!is.numeric(max.iter) || length(max.iter) > 1L) { + stop("Parameter 'max.iter' must be positive integer.") + } else if (!is.integer(max.iter)) { + max.iter <- as.integer(max.iter) } - if (epochs < 1L) { - stop("Parameter 'epochs' must be at least 1L.") + if (max.iter < 1L) { + stop("Parameter 'max.iter' must be at least 1L.") } if (is.null(V.init)) { @@ -273,7 +277,7 @@ cve.call <- function(X, Y, method = "simple", V.init, momentum, tau, tol, slack, gamma, - epochs, attempts, + max.iter, attempts, logger, loggerEnv) dr.k$B <- null(dr.k$V) @@ -292,190 +296,3 @@ cve.call <- function(X, Y, method = "simple", class(dr) <- "cve" return(dr) } - -#' Loss distribution elbow plot. -#' -#' Boxplots of the loss from \code{min.dim} to \code{max.dim} \code{k} values. -#' -#' @param x Object of class \code{"cve"} (result of [\code{\link{cve}}]). -#' @param ... Pass through parameters to [\code{\link{plot}}] and -#' [\code{\link{lines}}] -#' -#' @seealso see \code{\link{par}} for graphical parameters to pass through -#' as well as \code{\link{plot}}, the standard plot utility. -#' @method plot cve -#' @importFrom graphics plot lines points -#' @export -plot.cve <- function(x, ...) { - L <- c() - k <- c() - for (dr.k in x$res) { - if (class(dr.k) == 'cve.k') { - k <- c(k, as.character(dr.k$k)) - L <- c(L, dr.k$L) - } - } - L <- matrix(L, ncol = length(k)) / var(x$Y) - boxplot(L, main = "elbow plot", - xlab = "SDR dimension", - ylab = "Sample loss distribution", - names = k) -} - -#' Prints a summary of a \code{cve} result. -#' @param object Instance of 'cve' as returned by \code{cve}. -#' @method summary cve -#' @export -summary.cve <- function(object, ...) { - cat('Summary of CVE result - Method: "', object$method, '"\n', - '\n', - 'Dataset size: ', nrow(object$X), '\n', - 'Data Dimension: ', ncol(object$X), '\n', - # 'SDR Dimension: ', object$k, '\n', - # 'loss: ', object$loss, '\n', - '\n', - 'Called via:\n', - ' ', - sep='') - print(object$call) - - L <- c() - k <- c() - for (dr.k in object$res) { - if (class(dr.k) == 'cve.k') { - k <- c(k, as.character(dr.k$k)) - L <- c(L, dr.k$L) - } - } - L <- matrix(L, ncol = length(k)) - S <- apply(L, 2, summary) - colnames(S) <- k - cat('\n') - print(S) -} - -#' @export -directions <- function(dr, k) { - UseMethod("directions") -} - -#' Computes projected training data \code{X} for given dimension `k`. -#' -#' @param dr Instance of 'cve' as returned by \code{cve}. -#' @param k SDR dimension to use for projection. -#' -#' @method directions cve -#' @aliases directions directions.cve -#' @export -directions.cve <- function(dr, k) { - if (!(k %in% names(dr$res))) { - stop("SDR directions for requested dimension `k` not computed.") - } - return(dr$X %*% dr$res[[as.character(k)]]$B) -} - -#' @export -basis <- function(dr, k) { - UseMethod("basis") -} - -#' Gets estimated SDR basis. -#' -#' @param dr Instance of 'cve' as returned by \code{cve}. -#' @param k SDR dimension of requested basis, if not given a list of all -#' computed basis is returned. -#' -#' @return List of basis matrices, or the SDR basis for supplied dimension `k`. -#' -#' @method basis cve -#' @aliases basis basis.cve -#' @export -basis.cve <- function(dr, k) { - if (missing(k)) { - Bs <- list() - for (k in names(dr$res)) { - Bs[[k]] <- dr$res[[k]]$B - } - return(Bs) - } else if (k %in% names(dr$res)) { - return(dr$res[[as.character(k)]]$B) - } else { - stop("Requested dimenion `k` not computed.") - } -} - - -#' Predict method for CVE Fits. -#' -#' Predict responces using reduced data with \code{\link{mars}}. -#' -#' @param object instance of class \code{cve} (result of \code{cve}, -#' \code{cve.call}). -#' @param X.new Matrix of the new data to be predicted. -#' @param dim dimension of SDR space to be used for data projecition. -#' @param ... further arguments passed to \code{\link{mars}}. -#' -#' @return prediced response of data \code{X.new}. -#' -#' @seealso \code{\link{cve}}, \code{\link{cve.call}} or \pkg{\link{mars}}. -#' -#' @examples -#' TODO: -#' -#' @aliases predict.cve -#' @rdname predict.cve -#' -#' @method predict cve -#' @export -predict.cve <- function(object, X.new, dim = NULL, ...) { - library(mda) - - if (!is.matrix(X.new)) { - X.new <- matrix(X.new, nrow = 1L) - } - - B <- dr$res[[as.character(dim)]]$B - - model <- mars(object$X %*% B, object$Y) - predict(model, X.new %*% B) -} - -#' @export -predict.dim <- function(dr) { - UseMethod("predict.dim") -} - -#' @method predict.dim cve -#' @export -predict.dim.cve <- function(dr) { - library(mda) - - # Get centered training data and dimensions - X <- scale(dr$X, center = TRUE, scale = FALSE) - n <- nrow(dr$X) # umber of training data samples - Sigma <- (1 / n) * crossprod(X, X) - eig <- eigen(Sigma) - Sigma_root <- eig$vectors %*% tcrossprod(diag(sqrt(eig$values)), eig$vectors) - X <- X %*% solve(Sigma_root) - - pred <- matrix(0, n, length(dr$res)) - colnames(pred) <- names(dr$res) - for (dr.k in dr$res) { - # get "name" of current dimension - k <- as.character(dr.k$k) - # Project dataset with current SDR basis - X.proj <- X %*% dr.k$B - - for (i in 1:n) { - model <- mars(X.proj[-i, ], dr$Y[-i]) - pred[i, k] <- predict(model, X.proj[i, , drop = F]) - } - - } - MSE <- colMeans((pred - dr$Y)^2) - - return(list( - MSE = MSE, - k = as.integer(names(which.min(MSE))) - )) -} diff --git a/CVE_C/R/coef.R b/CVE_C/R/coef.R new file mode 100644 index 0000000..a5b2ad6 --- /dev/null +++ b/CVE_C/R/coef.R @@ -0,0 +1,33 @@ +#' Gets estimated SDR basis. +#' +#' Returns the SDR basis matrix for SDR dimension(s). +#' @param object instance of \code{cve} as output from \code{\link{cve}} or +#' \code{\link{cve.call}} +#' @param k the SDR dimension. +#' @param ... ignored. +#' +#' @return dir the matrix of CS or CMS of given dimension +#' +#' @examples +#' x <- matrix(rnorm(400),100,4) +#' y <- x[, 1] + x[, 2] + as.matrix(rnorm(100)) +#' dr <- cve(y ~ x, k = 2) # Only for sub-space dim. 2 +#' B2 <- coef(dr, 2) +#' +#' @method coef cve +#' @aliases coef.cve +#' @rdname coef.cve +#' @export +coef.cve <- function(object, k, ...) { + if (missing(k)) { + Bs <- list() + for (k in names(object$res)) { + Bs[[k]] <- object$res[[k]]$B + } + return(Bs) + } else if (k %in% names(object$res)) { + return(object$res[[as.character(k)]]$B) + } else { + stop("Requested dimension `k` not computed.") + } +} diff --git a/CVE_C/R/datasets.R b/CVE_C/R/datasets.R index 0f84299..9eacbc2 100644 --- a/CVE_C/R/datasets.R +++ b/CVE_C/R/datasets.R @@ -7,6 +7,7 @@ #' #' @param name One of \code{"M1"}, \code{"M2"}, \code{"M3"}, \code{"M4"} or \code{"M5"} #' @param n nr samples +#' @param B SDR basis used for dataset creation if supplied. #' @param p Dim. of random variable \code{X}. #' @param p.mix Only for \code{"M4"}, see: below. #' @param lambda Only for \code{"M4"}, see: below. @@ -64,11 +65,11 @@ dataset <- function(name = "M1", n, B, p.mix = 0.3, lambda = 1.0) { B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, 6)), 12, 1) } } else { - p <- dim(B)[1] - # validate col. nr to match dataset `k = dim(B)[2]` + p <- nrow(B) + # validate col. nr to match dataset `k = ncol(B)` stopifnot( - name %in% c("M1", "M2") && dim(B)[2] == 2, - name %in% c("M3", "M4", "M5") && dim(B)[2] == 1 + name %in% c("M1", "M2") && ncol(B) == 2, + name %in% c("M3", "M4", "M5") && ncol(B) == 1 ) } diff --git a/CVE_C/R/directions.R b/CVE_C/R/directions.R new file mode 100644 index 0000000..1a9d76d --- /dev/null +++ b/CVE_C/R/directions.R @@ -0,0 +1,19 @@ +#' @export +directions <- function(dr, k) { + UseMethod("directions") +} + +#' Computes projected training data \code{X} for given dimension `k`. +#' +#' @param dr Instance of 'cve' as returned by \code{cve}. +#' @param k SDR dimension to use for projection. +#' +#' @method directions cve +#' @aliases directions directions.cve +#' @export +directions.cve <- function(dr, k) { + if (!(k %in% names(dr$res))) { + stop("SDR directions for requested dimension `k` not computed.") + } + return(dr$X %*% dr$res[[as.character(k)]]$B) +} diff --git a/CVE_C/R/plot.R b/CVE_C/R/plot.R new file mode 100644 index 0000000..96bd479 --- /dev/null +++ b/CVE_C/R/plot.R @@ -0,0 +1,28 @@ +#' Loss distribution elbow plot. +#' +#' Boxplots of the loss from \code{min.dim} to \code{max.dim} \code{k} values. +#' +#' @param x Object of class \code{"cve"} (result of [\code{\link{cve}}]). +#' @param ... Pass through parameters to [\code{\link{plot}}] and +#' [\code{\link{lines}}] +#' +#' @seealso see \code{\link{par}} for graphical parameters to pass through +#' as well as \code{\link{plot}}, the standard plot utility. +#' @method plot cve +#' @importFrom graphics plot lines points boxplot +#' @export +plot.cve <- function(x, ...) { + L <- c() + k <- c() + for (dr.k in x$res) { + if (class(dr.k) == 'cve.k') { + k <- c(k, as.character(dr.k$k)) + L <- c(L, dr.k$L) + } + } + L <- matrix(L, ncol = length(k)) / var(x$Y) + boxplot(L, main = "elbow plot", + xlab = "SDR dimension", + ylab = "Sample loss distribution", + names = k) +} diff --git a/CVE_C/R/predict.R b/CVE_C/R/predict.R new file mode 100644 index 0000000..074db6e --- /dev/null +++ b/CVE_C/R/predict.R @@ -0,0 +1,36 @@ +#' Predict method for CVE Fits. +#' +#' Predict responces using reduced data with \code{\link{mars}}. +#' +#' @param object instance of class \code{cve} (result of \code{cve}, +#' \code{cve.call}). +#' @param newdata Matrix of the new data to be predicted. +#' @param dim dimension of SDR space to be used for data projecition. +#' @param ... further arguments passed to \code{\link{mars}}. +#' +#' @return prediced response of data \code{newdata}. +#' +#' @seealso \code{\link{cve}}, \code{\link{cve.call}} or \pkg{\link{mars}}. +#' +#' @rdname predict.cve +#' +#' @importFrom mda mars +#' @method predict cve +#' @export +predict.cve <- function(object, newdata, dim, ...) { + if (missing(newdata)) { + stop("No data supplied.") + } + if (missing(dim)) { + stop("No dimension supplied.") + } + + if (!is.matrix(newdata)) { + newdata <- matrix(newdata, nrow = 1L) + } + + B <- object$res[[as.character(dim)]]$B + + model <- mda::mars(object$X %*% B, object$Y) + predict(model, newdata %*% B) +} diff --git a/CVE_C/R/predict_dim.R b/CVE_C/R/predict_dim.R new file mode 100644 index 0000000..0eaf650 --- /dev/null +++ b/CVE_C/R/predict_dim.R @@ -0,0 +1,45 @@ +#' @rdname predict.dim.cve +#' @method predict.dim cve +#' @alias predict.dim.cve +#' @export +predict.dim <- function(object, ...) { + UseMethod("predict.dim") +} + +#' Predicts SDR dimension using \code{\link[mda]{mars}} via a Cross-Validation. +#' +#' @param object instance of class \code{cve} (result of \code{cve}, +#' \code{cve.call}). +#' @param ... ignored. +#' @method predict.dim cve +#' @export +predict.dim.cve <- function(object, ...) { + # Get centered training data and dimensions + X <- scale(object$X, center = TRUE, scale = FALSE) + n <- nrow(object$X) # umber of training data samples + Sigma <- (1 / n) * crossprod(X, X) + eig <- eigen(Sigma) + Sigma_root <- eig$vectors %*% tcrossprod(diag(sqrt(eig$values)), eig$vectors) + X <- X %*% solve(Sigma_root) + + pred <- matrix(0, n, length(object$res)) + colnames(pred) <- names(object$res) + for (dr.k in object$res) { + # get "name" of current dimension + k <- as.character(dr.k$k) + # Project dataset with current SDR basis + X.proj <- X %*% dr.k$B + + for (i in 1:n) { + model <- mda::mars(X.proj[-i, ], object$Y[-i]) + pred[i, k] <- predict(model, X.proj[i, , drop = F]) + } + + } + MSE <- colMeans((pred - object$Y)^2) + + return(list( + MSE = MSE, + k = as.integer(names(which.min(MSE))) + )) +} diff --git a/CVE_C/R/summary.R b/CVE_C/R/summary.R new file mode 100644 index 0000000..9403f09 --- /dev/null +++ b/CVE_C/R/summary.R @@ -0,0 +1,32 @@ +#' Prints a summary of a \code{cve} result. +#' @param object Instance of 'cve' as returned by \code{cve}. +#' @param ... ignored. +#' @method summary cve +#' @export +summary.cve <- function(object, ...) { + cat('Summary of CVE result - Method: "', object$method, '"\n', + '\n', + 'Dataset size: ', nrow(object$X), '\n', + 'Data Dimension: ', ncol(object$X), '\n', + # 'SDR Dimension: ', object$k, '\n', + # 'loss: ', object$loss, '\n', + '\n', + 'Called via:\n', + ' ', + sep='') + print(object$call) + + L <- c() + k <- c() + for (dr.k in object$res) { + if (class(dr.k) == 'cve.k') { + k <- c(k, as.character(dr.k$k)) + L <- c(L, dr.k$L) + } + } + L <- matrix(L, ncol = length(k)) + S <- apply(L, 2, summary) + colnames(S) <- k + cat('\n') + print(S) +} diff --git a/CVE_C/man/basis.cve.Rd b/CVE_C/man/basis.cve.Rd deleted file mode 100644 index 03d5595..0000000 --- a/CVE_C/man/basis.cve.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/CVE.R -\name{basis.cve} -\alias{basis.cve} -\alias{basis} -\title{Gets estimated SDR basis.} -\usage{ -\method{basis}{cve}(dr, k) -} -\arguments{ -\item{dr}{Instance of 'cve' as returned by \code{cve}.} - -\item{k}{SDR dimension of requested basis, if not given a list of all -computed basis is returned.} -} -\value{ -List of basis matrices, or the SDR basis for supplied dimension `k`. -} -\description{ -Gets estimated SDR basis. -} diff --git a/CVE_C/man/coef.cve.Rd b/CVE_C/man/coef.cve.Rd new file mode 100644 index 0000000..54a68c3 --- /dev/null +++ b/CVE_C/man/coef.cve.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/coef.R +\name{coef.cve} +\alias{coef.cve} +\title{Gets estimated SDR basis.} +\usage{ +\method{coef}{cve}(object, k, ...) +} +\arguments{ +\item{object}{instance of \code{cve} as output from \code{\link{cve}} or +\code{\link{cve.call}}} + +\item{k}{the SDR dimension.} + +\item{...}{ignored.} +} +\value{ +dir the matrix of CS or CMS of given dimension +} +\description{ +Returns the SDR basis matrix for SDR dimension(s). +} +\examples{ +x <- matrix(rnorm(400),100,4) +y <- x[, 1] + x[, 2] + as.matrix(rnorm(100)) +dr <- cve(y ~ x, k = 2) # Only for sub-space dim. 2 +B2 <- coef(dr, 2) + +} diff --git a/CVE_C/man/cve.Rd b/CVE_C/man/cve.Rd index 5a3bde4..7acbf00 100644 --- a/CVE_C/man/cve.Rd +++ b/CVE_C/man/cve.Rd @@ -10,7 +10,7 @@ cve(formula, data, method = "simple", max.dim = 10L, ...) cve.call(X, Y, method = "simple", nObs = sqrt(nrow(X)), h = NULL, min.dim = 1L, max.dim = 10L, k = NULL, momentum = 0, tau = 1, tol = 0.001, slack = 0, gamma = 0.5, V.init = NULL, - epochs = 50L, attempts = 10L, logger = NULL) + max.iter = 50L, attempts = 10L, logger = NULL) } \arguments{ \item{formula}{an object of class \code{"formula"} which is a symbolic @@ -37,16 +37,30 @@ supplied.} \item{nObs}{parameter for choosing bandwidth \code{h} using \code{\link{estimate.bandwidth}} (ignored if \code{h} is supplied).} +\item{h}{bandwidth or function to estimate bandwidth, defaults to internaly +estimated bandwidth.} + \item{min.dim}{lower bounds for \code{k}, (ignored if \code{k} is supplied).} \item{k}{Dimension of lower dimensional projection, if \code{k} is given only the specified dimension \code{B} matrix is estimated.} +\item{momentum}{number of [0, 1) giving the ration of momentum for eucledian +gradient update with a momentum term.} + \item{tau}{Initial step-size.} \item{tol}{Tolerance for break condition.} -\item{epochs}{maximum number of optimization steps.} +\item{slack}{Positive scaling to allow small increases of the loss while +optimizing.} + +\item{gamma}{step-size reduction multiple.} + +\item{V.init}{Semi-orthogonal matrix of dimensions `(ncol(X), ncol(X) - k)` #' as optimization starting value. (If supplied, \code{attempts} is +set to 1 and \code{k} to match dimension)} + +\item{max.iter}{maximum number of optimization steps.} \item{attempts}{number of arbitrary different starting points.} @@ -61,7 +75,7 @@ dr is a S3 object of class \code{cve} with named properties: \item method: Name of used method, \item call: The method call } -as well as indexed entries \code{dr[[k]]} storing the k-dimensional SDR +as well as indexed entries \code{dr$res[[k]]} storing the k-dimensional SDR projection matrices. dr is a list which contains: @@ -79,22 +93,17 @@ dr is a list which contains: TODO: reuse of package description and details!!!! } \examples{ -library(CVE) - # create dataset -n <- 200 -p <- 12 -X <- matrix(rnorm(n * p), n, p) -B <- cbind(c(1, rep(0, p - 1)), c(0, 1, rep(0, p - 2))) -Y <- X \%*\% B -Y <- Y[, 1L]^2 + Y[, 2L]^2 + rnorm(n, 0, 0.3) +x <- matrix(rnorm(400), 100, 4) +y <- x[, 1] + x[, 2] + as.matrix(rnorm(100)) -# Call the CVE method. -dr <- cve(Y ~ X) -(B <- basis(dr, 2)) +# Call CVE using momentum. +dr.momentum <- cve(y ~ x, momentum = 0.2) +# Call weighted CVE. +dr.weighted <- cve(y ~ x, method = "weighted") } \seealso{ For a detailed description of \code{formula} see - [\code{\link{formula}}]. + \code{\link{formula}}. } diff --git a/CVE_C/man/dataset.Rd b/CVE_C/man/dataset.Rd index 9ce9e6e..3395abf 100644 --- a/CVE_C/man/dataset.Rd +++ b/CVE_C/man/dataset.Rd @@ -11,6 +11,8 @@ dataset(name = "M1", n, B, p.mix = 0.3, lambda = 1) \item{n}{nr samples} +\item{B}{SDR basis used for dataset creation if supplied.} + \item{p.mix}{Only for \code{"M4"}, see: below.} \item{lambda}{Only for \code{"M4"}, see: below.} diff --git a/CVE_C/man/directions.cve.Rd b/CVE_C/man/directions.cve.Rd index 351bfb7..3fe5bca 100644 --- a/CVE_C/man/directions.cve.Rd +++ b/CVE_C/man/directions.cve.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/CVE.R +% Please edit documentation in R/directions.R \name{directions.cve} \alias{directions.cve} \alias{directions} diff --git a/CVE_C/man/plot.cve.Rd b/CVE_C/man/plot.cve.Rd index c66c95b..4f795e1 100644 --- a/CVE_C/man/plot.cve.Rd +++ b/CVE_C/man/plot.cve.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/CVE.R +% Please edit documentation in R/plot.R \name{plot.cve} \alias{plot.cve} \title{Loss distribution elbow plot.} diff --git a/CVE_C/man/predict.cve.Rd b/CVE_C/man/predict.cve.Rd index a19993c..4660cd4 100644 --- a/CVE_C/man/predict.cve.Rd +++ b/CVE_C/man/predict.cve.Rd @@ -1,30 +1,26 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/CVE.R +% Please edit documentation in R/predict.R \name{predict.cve} \alias{predict.cve} \title{Predict method for CVE Fits.} \usage{ -\method{predict}{cve}(object, X.new, dim = NULL, ...) +\method{predict}{cve}(object, newdata, dim, ...) } \arguments{ \item{object}{instance of class \code{cve} (result of \code{cve}, \code{cve.call}).} -\item{X.new}{Matrix of the new data to be predicted.} +\item{newdata}{Matrix of the new data to be predicted.} \item{dim}{dimension of SDR space to be used for data projecition.} \item{...}{further arguments passed to \code{\link{mars}}.} } \value{ -prediced response of data \code{X.new}. +prediced response of data \code{newdata}. } \description{ Predict responces using reduced data with \code{\link{mars}}. -} -\examples{ - TODO: - } \seealso{ \code{\link{cve}}, \code{\link{cve.call}} or \pkg{\link{mars}}. diff --git a/CVE_C/man/predict.dim.cve.Rd b/CVE_C/man/predict.dim.cve.Rd new file mode 100644 index 0000000..01aee3f --- /dev/null +++ b/CVE_C/man/predict.dim.cve.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/predict_dim.R +\name{predict.dim} +\alias{predict.dim} +\alias{predict.dim.cve} +\title{Predicts SDR dimension using \code{\link[mda]{mars}} via a Cross-Validation.} +\usage{ +\method{predict.dim}{cve}(object, ...) + +\method{predict.dim}{cve}(object, ...) +} +\arguments{ +\item{object}{instance of class \code{cve} (result of \code{cve}, +\code{cve.call}).} + +\item{...}{ignored.} +} +\description{ +Predicts SDR dimension using \code{\link[mda]{mars}} via a Cross-Validation. +} diff --git a/CVE_C/man/summary.cve.Rd b/CVE_C/man/summary.cve.Rd index c90d9d1..028e08a 100644 --- a/CVE_C/man/summary.cve.Rd +++ b/CVE_C/man/summary.cve.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/CVE.R +% Please edit documentation in R/summary.R \name{summary.cve} \alias{summary.cve} \title{Prints a summary of a \code{cve} result.} @@ -8,6 +8,8 @@ } \arguments{ \item{object}{Instance of 'cve' as returned by \code{cve}.} + +\item{...}{ignored.} } \description{ Prints a summary of a \code{cve} result. diff --git a/CVE_C/src/Makevars b/CVE_C/src/Makevars index 22e0633..0f038bf 100644 --- a/CVE_C/src/Makevars +++ b/CVE_C/src/Makevars @@ -1,3 +1,7 @@ +# # For OpenMP support. +# # Turned OFF for supporting all platforms +# PKG_CFLAGS = $(SHLIB_OPENMP_CFLAGS) +# PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) $(SHLIB_OPENMP_CFLAGS) SHLIB_OPENMP_CFLAGS= SHLIB_OPENMP_CXXFLAGS= diff --git a/CVE_C/src/cve.c b/CVE_C/src/cve.c index 39d06ac..d1947bf 100644 --- a/CVE_C/src/cve.c +++ b/CVE_C/src/cve.c @@ -11,16 +11,16 @@ void cve_sub(const int n, const int p, const int q, const double momentum, const double tau_init, const double tol_init, const double slack, const double gamma, - const int epochs, const int attempts, + const int maxIter, const int attempts, double *V, double *L, SEXP logger, SEXP loggerEnv) { - int attempt = 0, epoch, i, nn = (n * (n - 1)) / 2; + int attempt = 0, iter, i, nn = (n * (n - 1)) / 2; double loss, loss_last, loss_best, err, tau; double tol = tol_init * sqrt((double)(2 * q)); double gKscale = -0.5 / h; double agility = -2.0 * (1.0 - momentum) / (h * h); - double c; + double c = agility / (double)n; /* Create further intermediate or internal variables. */ double *Q = (double*)R_alloc(p * p, sizeof(double)); @@ -100,21 +100,20 @@ void cve_sub(const int n, const int p, const int q, if (method == CVE_METHOD_WEIGHTED) { /* Compute summ of all kernel applied distances by summing the * colSums of the kernel matrix. */ - c = -(double)n; // to scale with sum(K) - n - for (i = 0; i < n; ++i) { - c += colSums[i]; - } + // c = -(double)n; // to scale with sum(K) - n + // for (i = 0; i < n; ++i) { + // c += colSums[i]; + // } // TODO: check for division by zero, but should not happen!!! - } else { - c = n; // TODO: move (init) up cause always the same ^^ ... + c = agility / (sum(colSums, n) - (double)n); } - scale(agility / c, G, p * q); // in-place + scale(c, G, p * q); // in-place /* Compute Skew-Symmetric matrix `A` used in Cayley transform. * `A <- tau * (G V^T - V G^T) + 0 * A`*/ skew(p, q, tau, G, V, 0.0, A); - for (epoch = 0; epoch < epochs; ++epoch) { + for (iter = 0; iter < maxIter; ++iter) { /* Move V allong A */ cayleyTransform(p, q, A, V, V_tau, workMem); @@ -156,12 +155,12 @@ void cve_sub(const int n, const int p, const int q, if (logger) { callLogger(logger, loggerEnv, - attempt, epoch + 1, + attempt, iter + 1, L, n, V, p, q, tau); } // Check Break condition. - if (err < tol || epoch + 1 >= epochs) { + if (err < tol || iter + 1 >= maxIter) { break; } @@ -179,15 +178,9 @@ void cve_sub(const int n, const int p, const int q, if (method == CVE_METHOD_WEIGHTED) { /* Compute summ of all kernel applied distances by summing the - * colSums of the kernel matrix. */ - c = -(double)n; // to scale with sum(K) - n - for (i = 0; i < n; ++i) { - c += colSums[i]; - } - c = agility / c; + * colSums of the kernel matrix. */ // TODO: check for division by zero, but should not happen!!! - } else { - c = agility / n; + c = agility / (sum(colSums, n) - (double)n); } F77_NAME(dgemm)("N", "N", &p, &q, &p, &c, workMem, &p, V, &p, diff --git a/CVE_C/src/cve.h b/CVE_C/src/cve.h index b4dc8b0..feeba04 100644 --- a/CVE_C/src/cve.h +++ b/CVE_C/src/cve.h @@ -31,7 +31,7 @@ void cve_sub(const int n, const int p, const int q, const double momentum, const double tau_init, const double tol_init, const double slack, const double gamma, - const int epochs, int attempts, + const int maxIter, int attempts, double *V, double *L, SEXP logger, SEXP loggerEnv); @@ -61,6 +61,8 @@ void rStiefel(const int p, const int q, double *V, double *workMem, int workLen); /* MATRIX */ +double sum(const double *A, const int nelem); + double norm(const double *A, const int nrow, const int ncol, const char *type); diff --git a/CVE_C/src/export.c b/CVE_C/src/export.c index c318dd0..f7b9d16 100644 --- a/CVE_C/src/export.c +++ b/CVE_C/src/export.c @@ -20,7 +20,7 @@ SEXP cve(SEXP X, SEXP Y, SEXP k, SEXP h, SEXP V, // initial SEXP momentum, SEXP tau, SEXP tol, SEXP slack, SEXP gamma, - SEXP epochs, SEXP attempts, + SEXP maxIter, SEXP attempts, SEXP logger, SEXP loggerEnv) { /* Handle logger parameter, set to NULL pointer if not a function. */ if (!(isFunction(logger) && isEnvironment(loggerEnv))) { @@ -53,7 +53,7 @@ SEXP cve(SEXP X, SEXP Y, SEXP k, SEXP h, asInteger(method), asReal(momentum), asReal(tau), asReal(tol), asReal(slack), asReal(gamma), - asInteger(epochs), asInteger(attempts), + asInteger(maxIter), asInteger(attempts), REAL(Vout), REAL(Lout), logger, loggerEnv); diff --git a/CVE_C/src/init.c b/CVE_C/src/init.c index 90fdb19..767231b 100644 --- a/CVE_C/src/init.c +++ b/CVE_C/src/init.c @@ -9,7 +9,7 @@ extern SEXP cve(SEXP X, SEXP Y, SEXP k, SEXP h, SEXP V, // initial SEXP momentum, SEXP tau, SEXP tol, SEXP slack, SEXP gamma, - SEXP epochs, SEXP attempts, + SEXP maxIter, SEXP attempts, SEXP logger, SEXP loggerEnv); static const R_CallMethodDef CallEntries[] = { diff --git a/CVE_C/src/matrix.c b/CVE_C/src/matrix.c index 717f781..2e3d261 100644 --- a/CVE_C/src/matrix.c +++ b/CVE_C/src/matrix.c @@ -10,6 +10,23 @@ // return newMat; // } +double sum(const double *A, const int nelem) { + int i, nelemb = (nelem / 4) * 4; + double sum = 0.0; + + for (i = 0; i < nelemb; i += 4) { + sum += A[i] + + A[i + 1] + + A[i + 2] + + A[i + 3]; + } + for (; i < nelem; ++i) { + sum += A[i]; + } + + return sum; +} + double norm(const double *A, const int nrow, const int ncol, const char *type) { int i, nelem = nrow * ncol; diff --git a/LICENSE b/LICENSE deleted file mode 100644 index d4a44af..0000000 --- a/LICENSE +++ /dev/null @@ -1 +0,0 @@ -GDPv2, GDPv3, MIT ???? diff --git a/README.md b/README.md index 1bb3224..bc36a4c 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,7 @@ Doc: - [x] Ref paper in doc - [ ] Data set descriptions and augmentations. - [x] Demonstration of the `Logger` function usage (Demo file or so, ...) +- [ ] Update Paper (to new version / version consistent with current code!) Methods to be implemented: - [x] simple @@ -28,21 +29,56 @@ Features (functions): - [x] Initial `V.init` parameter (only ONE try, ignore number of `attempts` parameter) - [x] `basis.cve` list of estimated `B`s (with `k` supplied, only `B`) - [x] `directions.cve` Projected `X` given `k` -- [ ] `predict.cve` using `mars` for predicting responses given new data. -- [ ] `predict.dim.cve` Cross-validation or `aov` (in stats package) or "elbow" estimation +- [x] `predict.cve` using `mars` for predicting responses given new data. +- [x] `predict.dim.cve` Cross-validation or `aov` (in stats package) or "elbow" estimation - [x] `plot.elbow` - [x] `summary` Changes: -- [-] New `estimate.bandwidth` implementation. +- [x] New `estimate.bandwidth` implementation. (h = 2 * (tr(\Sigma) / p) * (6/5 * n^(-1 / (4 + k)))^2, \Sigma = 1/n * (X-mean)'(X-mean)) +# Development +## Build and install. +To build the package the `devtools` package is used. This also provides `roxygen2` which is used for documentation and automatic creation of the `NAMESPACE` file. +```R +setwd("./CVE_R") # Set path to the package root. +library(devtools) # Load required `devtools` package. +document() # Create `.Rd` files and write `NAMESPACE`. +``` +Next the package needs to be build, therefore (if pure `R` package, aka. `C/C++`, `Fortran`, ... code) just do the following. +```bash +R CMD build CVE_C; R CMD INSTALL CVE_0.2.tar.gz +``` +Then we are ready for using the package. +As well as building the `NAMESPACE` and `*.Rd` files using `devtools` (`roxygen2`) the following resembles an entire build pipeline including checks. +```bash +R -q -e 'library(devtools); setwd("CVE_C"); pkgbuild::compile_dll(); document(); pkgbuild::clean_dll()' +R CMD build CVE_C; R CMD check CVE_0.2.tar.gz; +R CMD INSTALL CVE_0.2.tar.gz +``` +## Build and install from within `R`. +An alternative approach is the following. +```R +## Installing CVE (C implementation) +(setwd('~/Projects/CVE/CVE_C')) +# equiv to Rcpp::compileAttributes(). +library(devtools) +pkgbuild::compile_dll() # required for packages with C/C++ code +document() # See bug: https://github.com/stan-dev/rstantools/issues/52 +pkgbuild::clean_dll() +(path <- build(vignettes = FALSE)) +install.packages(path, repos = NULL, type = "source") +library(CVE) +``` +**Note: I only recommend this approach during development.** + # Package Structure ## Demos A demo is an `.R` file that lives in `demo/`. Demos are like examples but tend to -be longer. Instead of focussing on a single function, they show how to weave +be longer. Instead of focusing on a single function, they show how to weave together multiple functions to solve a problem. You list and access demos with `demo()`: @@ -57,7 +93,7 @@ The demo name is the name of the file without the extension, e.g. `demo/runtime_test.R` becomes `runtime_test`. By default the demo ask for human input for each plot: "Hit to see next plot". -This behaviour can be overridden by adding `devAskNewPage(ask = FALSE)` to +This behavior can be overridden by adding `devAskNewPage(ask = FALSE)` to the demo file. You can add pauses by adding: `readline("press any key to continue")`. @@ -71,9 +107,9 @@ Using the Linux `grep` program with the parameters `-rnw` and specifying a inclu ```bash grep --include=*\.{c,h,R} -rnw '.' -e "sweep" ``` -searches in all `C` source and header fils as well as `R` source files for the term _sweep_. +searches in all `C` source and header files as well as `R` source files for the term _sweep_. -## Recursive dir. compair with colored sructure (more or less). +## Recursive directory compare with colored structure (more or less). ```bash diff -r CVE_R/ CVE_C/ | grep -E "^([<>]|[^<>].*)" ``` @@ -102,41 +138,9 @@ echo dry_run=$dry_run echo stack_size=$stack_size ``` -# Development -## Build and install. -To build the package the `devtools` package is used. This also provides `roxygen2` which is used for documentation and authomatic creaton of the `NAMESPACE` file. -```R -setwd("./CVE_R") # Set path to the package root. -library(devtools) # Load required `devtools` package. -document() # Create `.Rd` files and write `NAMESPACE`. -``` -Next the package needs to be build, therefore (if pure `R` package, aka. `C/C++`, `Fortran`, ... code) just do the following. -```bash -R CMD build CVE_R -R CMD INSTALL CVE_0.1.tar.gz -``` -Then we are ready for using the package. -```R -library(CVE) -help(package = "CVE") -``` -## Build and install from within `R`. -An alternative approach is the following. -```R -setwd('./CVE_R') -getwd() - -library(devtools) -document() -# No vignettes to build but "inst/doc/" is required! -(path <- build(vignettes = FALSE)) -install.packages(path, repos = NULL, type = "source") -``` -**Note: I only recommend this approach during development.** - -# Analysing +# Analysis ## Logging (a `cve` run). -To log `loss`, `error` (estimated) the true error (error of current estimated `B` against the true `B`) or even the stepsize one can use the `logger` parameter. A `logger` is a function that gets the current `environment` of the CVE optimization methods (__do not alter this environment, only read from it__). This can be used to create logs like in the following example. +To log `loss`, `error` (estimated) the true error (error of current estimated `B` against the true `B`) or even the step size one can use the `logger` parameter. A `logger` is a function that gets the current `environment` of the CVE optimization methods (__do not alter this environment, only read from it__). This can be used to create logs like in the following example. ```R library(CVE) @@ -184,7 +188,7 @@ matplot(true.error.history, type = 'l', log = 'y', xlab = 'iter', ``` ## Reading log files. -The runtime tests (upcomming further tests) are creating log files saved in `tmp/`. These log files are `CSV` files (actualy `TSV`) with a header storing the test results. Depending on the test the files may contain differnt data. As an example we use the runtime test logs which store in each line the `dataset`, the used `method` as well as the `error` (actual error of estimated `B` against real `B`) and the `time`. For reading and analysing the data see the following example. +The run-time tests (upcoming further tests) are creating log files saved in `tmp/`. These log files are `CSV` files (actually `TSV`) with a header storing the test results. Depending on the test the files may contain different data. As an example we use the run-time test logs which store in each line the `dataset`, the used `method` as well as the `error` (actual error of estimated `B` against real `B`) and the `time`. For reading and analyzing the data see the following example. ```R # Load log as `data.frame` log <- read.csv('tmp/test0.log', sep = '\t') @@ -200,7 +204,7 @@ for (ds.name in paste0('M', seq(5))) { ## Environments and variable lookup. In the following a view simple examples of how `R` searches for variables. -In addition we manipulate funciton closures to alter the search path in variable lookup and outer scope variable manipulation. +In addition we manipulate function closures to alter the search path in variable lookup and outer scope variable manipulation. ```R droids <- "These aren't the droids you're looking for." @@ -225,7 +229,7 @@ jedi.seeks() # [1] "R2-D2", "C-3PO" ``` -The next example ilustrates how to write (without local copies) to variables outside the functions local environment. +The next example illustrates how to write (without local copies) to variables outside the functions local environment. ```R counting <- function() { count <<- count + 1 # Note the `<<-` assignment. @@ -255,7 +259,7 @@ counting <- function() { })() ``` -Another example for the usage of `do.call` where the evaluation of parameters is illustated (example taken (and altered) from `?do.call`). +Another example for the usage of `do.call` where the evaluation of parameters is illustrated (example taken (and altered) from `?do.call`). ```R ## examples of where objects will be found. A <- "A.Global" @@ -373,7 +377,7 @@ microbenchmark( # sweep 1313.177 1522.4010 2355.269 1879.2605 2065.399 18783.24 100 # recycle 719.001 786.1265 1157.285 881.8825 1163.202 19091.79 100 ``` -### Scaled `crossprod` with matmul order. +### Scaled `crossprod` with matrix multiplication order. ```R (n <- 200) (p <- 12) @@ -424,7 +428,7 @@ microbenchmark( ``` ## Using `Rprof()` for performance. -The standart method for profiling where an algorithm is spending its time is with `Rprof()`. +The standard method for profiling where an algorithm is spending its time is with `Rprof()`. ```R path <- '../tmp/R.prof' # path to profiling file Rprof(path) @@ -432,4 +436,4 @@ cve.res <- cve.call(X, Y, k = k) Rprof(NULL) (prof <- summaryRprof(path)) # Summarise results ``` -**Note: considure to run `gc()` before measuring**, aka cleaning up by explicitely calling the garbage collector. +**Note: consider to run `gc()` before measuring**, aka cleaning up by explicitly calling the garbage collector. diff --git a/test.R b/test.R index bfb6da6..51c3b6d 100644 --- a/test.R +++ b/test.R @@ -5,12 +5,12 @@ if (length(args) > 0L) { } else { method <- "simple" } -if (length((args) > 1L)) { +if (length(args) > 1L) { momentum <- as.double(args[2]) } else { momentum <- 0.0 } -epochs <- 50L +max.iter <- 50L attempts <- 25L # library(CVEpureR) @@ -55,14 +55,14 @@ for (name in paste0("M", seq(5))) { # Setup histories. V_last <- NULL - loss.history <- matrix(NA, epochs + 1, attempts) - error.history <- matrix(NA, epochs + 1, attempts) - tau.history <- matrix(NA, epochs + 1, attempts) - true.error.history <- matrix(NA, epochs + 1, attempts) + loss.history <- matrix(NA, max.iter + 1, attempts) + error.history <- matrix(NA, max.iter + 1, attempts) + tau.history <- matrix(NA, max.iter + 1, attempts) + true.error.history <- matrix(NA, max.iter + 1, attempts) dr <- cve(Y ~ X, k = k, method = method, momentum = momentum, - epochs = epochs, attempts = attempts, + max.iter = max.iter, attempts = attempts, logger = logger) # Plot history's