2
0
Fork 0

fix: R CMD check errors and warnings,

change: extracted util functions into seperate files,
add: C's sum funciton
This commit is contained in:
Daniel Kapla 2019-11-22 09:32:14 +01:00
parent 063c4d638b
commit 875982a010
28 changed files with 412 additions and 344 deletions

View File

@ -8,4 +8,5 @@ Maintainer: Daniel Kapla <daniel@kapla.at>
Description: Implementation of the Conditional Variance Estimation (CVE) method.
License: GPL-3
Encoding: UTF-8
Imports: stats,graphics,mda
RoxygenNote: 6.1.1

View File

@ -1,12 +1,11 @@
# Generated by roxygen2: do not edit by hand
S3method(basis,cve)
S3method(coef,cve)
S3method(directions,cve)
S3method(plot,cve)
S3method(predict,cve)
S3method(predict.dim,cve)
S3method(summary,cve)
export(basis)
export(cve)
export(cve.call)
export(dataset)
@ -14,16 +13,17 @@ export(directions)
export(elem.pairs)
export(estimate.bandwidth)
export(null)
export(predict.dim)
export(projTangentStiefel)
export(rStiefel)
export(retractStiefel)
export(skew)
export(sym)
import(stats)
importFrom(graphics,boxplot)
importFrom(graphics,lines)
importFrom(graphics,plot)
importFrom(graphics,points)
importFrom(mda,mars)
importFrom(stats,model.frame)
importFrom(stats,rbinom)
importFrom(stats,rnorm)

View File

@ -56,26 +56,21 @@
#' \item method: Name of used method,
#' \item call: The method call
#' }
#' as well as indexed entries \code{dr[[k]]} storing the k-dimensional SDR
#' as well as indexed entries \code{dr$res[[k]]} storing the k-dimensional SDR
#' projection matrices.
#'
#' @examples
#' library(CVE)
#'
#' # create dataset
#' n <- 200
#' p <- 12
#' X <- matrix(rnorm(n * p), n, p)
#' B <- cbind(c(1, rep(0, p - 1)), c(0, 1, rep(0, p - 2)))
#' Y <- X %*% B
#' Y <- Y[, 1L]^2 + Y[, 2L]^2 + rnorm(n, 0, 0.3)
#' x <- matrix(rnorm(400), 100, 4)
#' y <- x[, 1] + x[, 2] + as.matrix(rnorm(100))
#'
#' # Call the CVE method.
#' dr <- cve(Y ~ X)
#' (B <- basis(dr, 2))
#' # Call CVE using momentum.
#' dr.momentum <- cve(y ~ x, momentum = 0.2)
#' # Call weighted CVE.
#' dr.weighted <- cve(y ~ x, method = "weighted")
#'
#' @seealso For a detailed description of \code{formula} see
#' [\code{\link{formula}}].
#' \code{\link{formula}}.
#' @export
cve <- function(formula, data, method = "simple", max.dim = 10L, ...) {
# check for type of `data` if supplied and set default
@ -108,10 +103,19 @@ cve <- function(formula, data, method = "simple", max.dim = 10L, ...) {
#' @param max.dim upper bounds for \code{k}, (ignored if \code{k} is supplied).
#' @param tau Initial step-size.
#' @param tol Tolerance for break condition.
#' @param epochs maximum number of optimization steps.
#' @param max.iter maximum number of optimization steps.
#' @param attempts number of arbitrary different starting points.
#' @param logger a logger function (only for advanced user, significantly slows
#' down the computation).
#' @param h bandwidth or function to estimate bandwidth, defaults to internaly
#' estimated bandwidth.
#' @param momentum number of [0, 1) giving the ration of momentum for eucledian
#' gradient update with a momentum term.
#' @param slack Positive scaling to allow small increases of the loss while
#' optimizing.
#' @param gamma step-size reduction multiple.
#' @param V.init Semi-orthogonal matrix of dimensions `(ncol(X), ncol(X) - k)` #' as optimization starting value. (If supplied, \code{attempts} is
#' set to 1 and \code{k} to match dimension)
#'
#' @return dr is a list which contains:
#' \itemize{
@ -132,7 +136,7 @@ cve.call <- function(X, Y, method = "simple",
momentum = 0.0, tau = 1.0, tol = 1e-3,
slack = 0.0, gamma = 0.5,
V.init = NULL,
epochs = 50L, attempts = 10L,
max.iter = 50L, attempts = 10L,
logger = NULL) {
# get method bitmask
methods <- list(
@ -230,13 +234,13 @@ cve.call <- function(X, Y, method = "simple",
gamma <- as.double(gamma)
}
if (!is.numeric(epochs) || length(epochs) > 1L) {
stop("Parameter 'epochs' must be positive integer.")
} else if (!is.integer(epochs)) {
epochs <- as.integer(epochs)
if (!is.numeric(max.iter) || length(max.iter) > 1L) {
stop("Parameter 'max.iter' must be positive integer.")
} else if (!is.integer(max.iter)) {
max.iter <- as.integer(max.iter)
}
if (epochs < 1L) {
stop("Parameter 'epochs' must be at least 1L.")
if (max.iter < 1L) {
stop("Parameter 'max.iter' must be at least 1L.")
}
if (is.null(V.init)) {
@ -273,7 +277,7 @@ cve.call <- function(X, Y, method = "simple",
V.init,
momentum, tau, tol,
slack, gamma,
epochs, attempts,
max.iter, attempts,
logger, loggerEnv)
dr.k$B <- null(dr.k$V)
@ -292,190 +296,3 @@ cve.call <- function(X, Y, method = "simple",
class(dr) <- "cve"
return(dr)
}
#' Loss distribution elbow plot.
#'
#' Boxplots of the loss from \code{min.dim} to \code{max.dim} \code{k} values.
#'
#' @param x Object of class \code{"cve"} (result of [\code{\link{cve}}]).
#' @param ... Pass through parameters to [\code{\link{plot}}] and
#' [\code{\link{lines}}]
#'
#' @seealso see \code{\link{par}} for graphical parameters to pass through
#' as well as \code{\link{plot}}, the standard plot utility.
#' @method plot cve
#' @importFrom graphics plot lines points
#' @export
plot.cve <- function(x, ...) {
L <- c()
k <- c()
for (dr.k in x$res) {
if (class(dr.k) == 'cve.k') {
k <- c(k, as.character(dr.k$k))
L <- c(L, dr.k$L)
}
}
L <- matrix(L, ncol = length(k)) / var(x$Y)
boxplot(L, main = "elbow plot",
xlab = "SDR dimension",
ylab = "Sample loss distribution",
names = k)
}
#' Prints a summary of a \code{cve} result.
#' @param object Instance of 'cve' as returned by \code{cve}.
#' @method summary cve
#' @export
summary.cve <- function(object, ...) {
cat('Summary of CVE result - Method: "', object$method, '"\n',
'\n',
'Dataset size: ', nrow(object$X), '\n',
'Data Dimension: ', ncol(object$X), '\n',
# 'SDR Dimension: ', object$k, '\n',
# 'loss: ', object$loss, '\n',
'\n',
'Called via:\n',
' ',
sep='')
print(object$call)
L <- c()
k <- c()
for (dr.k in object$res) {
if (class(dr.k) == 'cve.k') {
k <- c(k, as.character(dr.k$k))
L <- c(L, dr.k$L)
}
}
L <- matrix(L, ncol = length(k))
S <- apply(L, 2, summary)
colnames(S) <- k
cat('\n')
print(S)
}
#' @export
directions <- function(dr, k) {
UseMethod("directions")
}
#' Computes projected training data \code{X} for given dimension `k`.
#'
#' @param dr Instance of 'cve' as returned by \code{cve}.
#' @param k SDR dimension to use for projection.
#'
#' @method directions cve
#' @aliases directions directions.cve
#' @export
directions.cve <- function(dr, k) {
if (!(k %in% names(dr$res))) {
stop("SDR directions for requested dimension `k` not computed.")
}
return(dr$X %*% dr$res[[as.character(k)]]$B)
}
#' @export
basis <- function(dr, k) {
UseMethod("basis")
}
#' Gets estimated SDR basis.
#'
#' @param dr Instance of 'cve' as returned by \code{cve}.
#' @param k SDR dimension of requested basis, if not given a list of all
#' computed basis is returned.
#'
#' @return List of basis matrices, or the SDR basis for supplied dimension `k`.
#'
#' @method basis cve
#' @aliases basis basis.cve
#' @export
basis.cve <- function(dr, k) {
if (missing(k)) {
Bs <- list()
for (k in names(dr$res)) {
Bs[[k]] <- dr$res[[k]]$B
}
return(Bs)
} else if (k %in% names(dr$res)) {
return(dr$res[[as.character(k)]]$B)
} else {
stop("Requested dimenion `k` not computed.")
}
}
#' Predict method for CVE Fits.
#'
#' Predict responces using reduced data with \code{\link{mars}}.
#'
#' @param object instance of class \code{cve} (result of \code{cve},
#' \code{cve.call}).
#' @param X.new Matrix of the new data to be predicted.
#' @param dim dimension of SDR space to be used for data projecition.
#' @param ... further arguments passed to \code{\link{mars}}.
#'
#' @return prediced response of data \code{X.new}.
#'
#' @seealso \code{\link{cve}}, \code{\link{cve.call}} or \pkg{\link{mars}}.
#'
#' @examples
#' TODO:
#'
#' @aliases predict.cve
#' @rdname predict.cve
#'
#' @method predict cve
#' @export
predict.cve <- function(object, X.new, dim = NULL, ...) {
library(mda)
if (!is.matrix(X.new)) {
X.new <- matrix(X.new, nrow = 1L)
}
B <- dr$res[[as.character(dim)]]$B
model <- mars(object$X %*% B, object$Y)
predict(model, X.new %*% B)
}
#' @export
predict.dim <- function(dr) {
UseMethod("predict.dim")
}
#' @method predict.dim cve
#' @export
predict.dim.cve <- function(dr) {
library(mda)
# Get centered training data and dimensions
X <- scale(dr$X, center = TRUE, scale = FALSE)
n <- nrow(dr$X) # umber of training data samples
Sigma <- (1 / n) * crossprod(X, X)
eig <- eigen(Sigma)
Sigma_root <- eig$vectors %*% tcrossprod(diag(sqrt(eig$values)), eig$vectors)
X <- X %*% solve(Sigma_root)
pred <- matrix(0, n, length(dr$res))
colnames(pred) <- names(dr$res)
for (dr.k in dr$res) {
# get "name" of current dimension
k <- as.character(dr.k$k)
# Project dataset with current SDR basis
X.proj <- X %*% dr.k$B
for (i in 1:n) {
model <- mars(X.proj[-i, ], dr$Y[-i])
pred[i, k] <- predict(model, X.proj[i, , drop = F])
}
}
MSE <- colMeans((pred - dr$Y)^2)
return(list(
MSE = MSE,
k = as.integer(names(which.min(MSE)))
))
}

33
CVE_C/R/coef.R Normal file
View File

@ -0,0 +1,33 @@
#' Gets estimated SDR basis.
#'
#' Returns the SDR basis matrix for SDR dimension(s).
#' @param object instance of \code{cve} as output from \code{\link{cve}} or
#' \code{\link{cve.call}}
#' @param k the SDR dimension.
#' @param ... ignored.
#'
#' @return dir the matrix of CS or CMS of given dimension
#'
#' @examples
#' x <- matrix(rnorm(400),100,4)
#' y <- x[, 1] + x[, 2] + as.matrix(rnorm(100))
#' dr <- cve(y ~ x, k = 2) # Only for sub-space dim. 2
#' B2 <- coef(dr, 2)
#'
#' @method coef cve
#' @aliases coef.cve
#' @rdname coef.cve
#' @export
coef.cve <- function(object, k, ...) {
if (missing(k)) {
Bs <- list()
for (k in names(object$res)) {
Bs[[k]] <- object$res[[k]]$B
}
return(Bs)
} else if (k %in% names(object$res)) {
return(object$res[[as.character(k)]]$B)
} else {
stop("Requested dimension `k` not computed.")
}
}

View File

@ -7,6 +7,7 @@
#'
#' @param name One of \code{"M1"}, \code{"M2"}, \code{"M3"}, \code{"M4"} or \code{"M5"}
#' @param n nr samples
#' @param B SDR basis used for dataset creation if supplied.
#' @param p Dim. of random variable \code{X}.
#' @param p.mix Only for \code{"M4"}, see: below.
#' @param lambda Only for \code{"M4"}, see: below.
@ -64,11 +65,11 @@ dataset <- function(name = "M1", n, B, p.mix = 0.3, lambda = 1.0) {
B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, 6)), 12, 1)
}
} else {
p <- dim(B)[1]
# validate col. nr to match dataset `k = dim(B)[2]`
p <- nrow(B)
# validate col. nr to match dataset `k = ncol(B)`
stopifnot(
name %in% c("M1", "M2") && dim(B)[2] == 2,
name %in% c("M3", "M4", "M5") && dim(B)[2] == 1
name %in% c("M1", "M2") && ncol(B) == 2,
name %in% c("M3", "M4", "M5") && ncol(B) == 1
)
}

19
CVE_C/R/directions.R Normal file
View File

@ -0,0 +1,19 @@
#' @export
directions <- function(dr, k) {
UseMethod("directions")
}
#' Computes projected training data \code{X} for given dimension `k`.
#'
#' @param dr Instance of 'cve' as returned by \code{cve}.
#' @param k SDR dimension to use for projection.
#'
#' @method directions cve
#' @aliases directions directions.cve
#' @export
directions.cve <- function(dr, k) {
if (!(k %in% names(dr$res))) {
stop("SDR directions for requested dimension `k` not computed.")
}
return(dr$X %*% dr$res[[as.character(k)]]$B)
}

28
CVE_C/R/plot.R Normal file
View File

@ -0,0 +1,28 @@
#' Loss distribution elbow plot.
#'
#' Boxplots of the loss from \code{min.dim} to \code{max.dim} \code{k} values.
#'
#' @param x Object of class \code{"cve"} (result of [\code{\link{cve}}]).
#' @param ... Pass through parameters to [\code{\link{plot}}] and
#' [\code{\link{lines}}]
#'
#' @seealso see \code{\link{par}} for graphical parameters to pass through
#' as well as \code{\link{plot}}, the standard plot utility.
#' @method plot cve
#' @importFrom graphics plot lines points boxplot
#' @export
plot.cve <- function(x, ...) {
L <- c()
k <- c()
for (dr.k in x$res) {
if (class(dr.k) == 'cve.k') {
k <- c(k, as.character(dr.k$k))
L <- c(L, dr.k$L)
}
}
L <- matrix(L, ncol = length(k)) / var(x$Y)
boxplot(L, main = "elbow plot",
xlab = "SDR dimension",
ylab = "Sample loss distribution",
names = k)
}

36
CVE_C/R/predict.R Normal file
View File

@ -0,0 +1,36 @@
#' Predict method for CVE Fits.
#'
#' Predict responces using reduced data with \code{\link{mars}}.
#'
#' @param object instance of class \code{cve} (result of \code{cve},
#' \code{cve.call}).
#' @param newdata Matrix of the new data to be predicted.
#' @param dim dimension of SDR space to be used for data projecition.
#' @param ... further arguments passed to \code{\link{mars}}.
#'
#' @return prediced response of data \code{newdata}.
#'
#' @seealso \code{\link{cve}}, \code{\link{cve.call}} or \pkg{\link{mars}}.
#'
#' @rdname predict.cve
#'
#' @importFrom mda mars
#' @method predict cve
#' @export
predict.cve <- function(object, newdata, dim, ...) {
if (missing(newdata)) {
stop("No data supplied.")
}
if (missing(dim)) {
stop("No dimension supplied.")
}
if (!is.matrix(newdata)) {
newdata <- matrix(newdata, nrow = 1L)
}
B <- object$res[[as.character(dim)]]$B
model <- mda::mars(object$X %*% B, object$Y)
predict(model, newdata %*% B)
}

45
CVE_C/R/predict_dim.R Normal file
View File

@ -0,0 +1,45 @@
#' @rdname predict.dim.cve
#' @method predict.dim cve
#' @alias predict.dim.cve
#' @export
predict.dim <- function(object, ...) {
UseMethod("predict.dim")
}
#' Predicts SDR dimension using \code{\link[mda]{mars}} via a Cross-Validation.
#'
#' @param object instance of class \code{cve} (result of \code{cve},
#' \code{cve.call}).
#' @param ... ignored.
#' @method predict.dim cve
#' @export
predict.dim.cve <- function(object, ...) {
# Get centered training data and dimensions
X <- scale(object$X, center = TRUE, scale = FALSE)
n <- nrow(object$X) # umber of training data samples
Sigma <- (1 / n) * crossprod(X, X)
eig <- eigen(Sigma)
Sigma_root <- eig$vectors %*% tcrossprod(diag(sqrt(eig$values)), eig$vectors)
X <- X %*% solve(Sigma_root)
pred <- matrix(0, n, length(object$res))
colnames(pred) <- names(object$res)
for (dr.k in object$res) {
# get "name" of current dimension
k <- as.character(dr.k$k)
# Project dataset with current SDR basis
X.proj <- X %*% dr.k$B
for (i in 1:n) {
model <- mda::mars(X.proj[-i, ], object$Y[-i])
pred[i, k] <- predict(model, X.proj[i, , drop = F])
}
}
MSE <- colMeans((pred - object$Y)^2)
return(list(
MSE = MSE,
k = as.integer(names(which.min(MSE)))
))
}

32
CVE_C/R/summary.R Normal file
View File

@ -0,0 +1,32 @@
#' Prints a summary of a \code{cve} result.
#' @param object Instance of 'cve' as returned by \code{cve}.
#' @param ... ignored.
#' @method summary cve
#' @export
summary.cve <- function(object, ...) {
cat('Summary of CVE result - Method: "', object$method, '"\n',
'\n',
'Dataset size: ', nrow(object$X), '\n',
'Data Dimension: ', ncol(object$X), '\n',
# 'SDR Dimension: ', object$k, '\n',
# 'loss: ', object$loss, '\n',
'\n',
'Called via:\n',
' ',
sep='')
print(object$call)
L <- c()
k <- c()
for (dr.k in object$res) {
if (class(dr.k) == 'cve.k') {
k <- c(k, as.character(dr.k$k))
L <- c(L, dr.k$L)
}
}
L <- matrix(L, ncol = length(k))
S <- apply(L, 2, summary)
colnames(S) <- k
cat('\n')
print(S)
}

View File

@ -1,21 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/CVE.R
\name{basis.cve}
\alias{basis.cve}
\alias{basis}
\title{Gets estimated SDR basis.}
\usage{
\method{basis}{cve}(dr, k)
}
\arguments{
\item{dr}{Instance of 'cve' as returned by \code{cve}.}
\item{k}{SDR dimension of requested basis, if not given a list of all
computed basis is returned.}
}
\value{
List of basis matrices, or the SDR basis for supplied dimension `k`.
}
\description{
Gets estimated SDR basis.
}

29
CVE_C/man/coef.cve.Rd Normal file
View File

@ -0,0 +1,29 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/coef.R
\name{coef.cve}
\alias{coef.cve}
\title{Gets estimated SDR basis.}
\usage{
\method{coef}{cve}(object, k, ...)
}
\arguments{
\item{object}{instance of \code{cve} as output from \code{\link{cve}} or
\code{\link{cve.call}}}
\item{k}{the SDR dimension.}
\item{...}{ignored.}
}
\value{
dir the matrix of CS or CMS of given dimension
}
\description{
Returns the SDR basis matrix for SDR dimension(s).
}
\examples{
x <- matrix(rnorm(400),100,4)
y <- x[, 1] + x[, 2] + as.matrix(rnorm(100))
dr <- cve(y ~ x, k = 2) # Only for sub-space dim. 2
B2 <- coef(dr, 2)
}

View File

@ -10,7 +10,7 @@ cve(formula, data, method = "simple", max.dim = 10L, ...)
cve.call(X, Y, method = "simple", nObs = sqrt(nrow(X)), h = NULL,
min.dim = 1L, max.dim = 10L, k = NULL, momentum = 0, tau = 1,
tol = 0.001, slack = 0, gamma = 0.5, V.init = NULL,
epochs = 50L, attempts = 10L, logger = NULL)
max.iter = 50L, attempts = 10L, logger = NULL)
}
\arguments{
\item{formula}{an object of class \code{"formula"} which is a symbolic
@ -37,16 +37,30 @@ supplied.}
\item{nObs}{parameter for choosing bandwidth \code{h} using
\code{\link{estimate.bandwidth}} (ignored if \code{h} is supplied).}
\item{h}{bandwidth or function to estimate bandwidth, defaults to internaly
estimated bandwidth.}
\item{min.dim}{lower bounds for \code{k}, (ignored if \code{k} is supplied).}
\item{k}{Dimension of lower dimensional projection, if \code{k} is given
only the specified dimension \code{B} matrix is estimated.}
\item{momentum}{number of [0, 1) giving the ration of momentum for eucledian
gradient update with a momentum term.}
\item{tau}{Initial step-size.}
\item{tol}{Tolerance for break condition.}
\item{epochs}{maximum number of optimization steps.}
\item{slack}{Positive scaling to allow small increases of the loss while
optimizing.}
\item{gamma}{step-size reduction multiple.}
\item{V.init}{Semi-orthogonal matrix of dimensions `(ncol(X), ncol(X) - k)` #' as optimization starting value. (If supplied, \code{attempts} is
set to 1 and \code{k} to match dimension)}
\item{max.iter}{maximum number of optimization steps.}
\item{attempts}{number of arbitrary different starting points.}
@ -61,7 +75,7 @@ dr is a S3 object of class \code{cve} with named properties:
\item method: Name of used method,
\item call: The method call
}
as well as indexed entries \code{dr[[k]]} storing the k-dimensional SDR
as well as indexed entries \code{dr$res[[k]]} storing the k-dimensional SDR
projection matrices.
dr is a list which contains:
@ -79,22 +93,17 @@ dr is a list which contains:
TODO: reuse of package description and details!!!!
}
\examples{
library(CVE)
# create dataset
n <- 200
p <- 12
X <- matrix(rnorm(n * p), n, p)
B <- cbind(c(1, rep(0, p - 1)), c(0, 1, rep(0, p - 2)))
Y <- X \%*\% B
Y <- Y[, 1L]^2 + Y[, 2L]^2 + rnorm(n, 0, 0.3)
x <- matrix(rnorm(400), 100, 4)
y <- x[, 1] + x[, 2] + as.matrix(rnorm(100))
# Call the CVE method.
dr <- cve(Y ~ X)
(B <- basis(dr, 2))
# Call CVE using momentum.
dr.momentum <- cve(y ~ x, momentum = 0.2)
# Call weighted CVE.
dr.weighted <- cve(y ~ x, method = "weighted")
}
\seealso{
For a detailed description of \code{formula} see
[\code{\link{formula}}].
\code{\link{formula}}.
}

View File

@ -11,6 +11,8 @@ dataset(name = "M1", n, B, p.mix = 0.3, lambda = 1)
\item{n}{nr samples}
\item{B}{SDR basis used for dataset creation if supplied.}
\item{p.mix}{Only for \code{"M4"}, see: below.}
\item{lambda}{Only for \code{"M4"}, see: below.}

View File

@ -1,5 +1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/CVE.R
% Please edit documentation in R/directions.R
\name{directions.cve}
\alias{directions.cve}
\alias{directions}

View File

@ -1,5 +1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/CVE.R
% Please edit documentation in R/plot.R
\name{plot.cve}
\alias{plot.cve}
\title{Loss distribution elbow plot.}

View File

@ -1,30 +1,26 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/CVE.R
% Please edit documentation in R/predict.R
\name{predict.cve}
\alias{predict.cve}
\title{Predict method for CVE Fits.}
\usage{
\method{predict}{cve}(object, X.new, dim = NULL, ...)
\method{predict}{cve}(object, newdata, dim, ...)
}
\arguments{
\item{object}{instance of class \code{cve} (result of \code{cve},
\code{cve.call}).}
\item{X.new}{Matrix of the new data to be predicted.}
\item{newdata}{Matrix of the new data to be predicted.}
\item{dim}{dimension of SDR space to be used for data projecition.}
\item{...}{further arguments passed to \code{\link{mars}}.}
}
\value{
prediced response of data \code{X.new}.
prediced response of data \code{newdata}.
}
\description{
Predict responces using reduced data with \code{\link{mars}}.
}
\examples{
TODO:
}
\seealso{
\code{\link{cve}}, \code{\link{cve.call}} or \pkg{\link{mars}}.

View File

@ -0,0 +1,20 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/predict_dim.R
\name{predict.dim}
\alias{predict.dim}
\alias{predict.dim.cve}
\title{Predicts SDR dimension using \code{\link[mda]{mars}} via a Cross-Validation.}
\usage{
\method{predict.dim}{cve}(object, ...)
\method{predict.dim}{cve}(object, ...)
}
\arguments{
\item{object}{instance of class \code{cve} (result of \code{cve},
\code{cve.call}).}
\item{...}{ignored.}
}
\description{
Predicts SDR dimension using \code{\link[mda]{mars}} via a Cross-Validation.
}

View File

@ -1,5 +1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/CVE.R
% Please edit documentation in R/summary.R
\name{summary.cve}
\alias{summary.cve}
\title{Prints a summary of a \code{cve} result.}
@ -8,6 +8,8 @@
}
\arguments{
\item{object}{Instance of 'cve' as returned by \code{cve}.}
\item{...}{ignored.}
}
\description{
Prints a summary of a \code{cve} result.

View File

@ -1,3 +1,7 @@
# # For OpenMP support.
# # Turned OFF for supporting all platforms
# PKG_CFLAGS = $(SHLIB_OPENMP_CFLAGS)
# PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) $(SHLIB_OPENMP_CFLAGS)
SHLIB_OPENMP_CFLAGS=
SHLIB_OPENMP_CXXFLAGS=

View File

@ -11,16 +11,16 @@ void cve_sub(const int n, const int p, const int q,
const double momentum,
const double tau_init, const double tol_init,
const double slack, const double gamma,
const int epochs, const int attempts,
const int maxIter, const int attempts,
double *V, double *L,
SEXP logger, SEXP loggerEnv) {
int attempt = 0, epoch, i, nn = (n * (n - 1)) / 2;
int attempt = 0, iter, i, nn = (n * (n - 1)) / 2;
double loss, loss_last, loss_best, err, tau;
double tol = tol_init * sqrt((double)(2 * q));
double gKscale = -0.5 / h;
double agility = -2.0 * (1.0 - momentum) / (h * h);
double c;
double c = agility / (double)n;
/* Create further intermediate or internal variables. */
double *Q = (double*)R_alloc(p * p, sizeof(double));
@ -100,21 +100,20 @@ void cve_sub(const int n, const int p, const int q,
if (method == CVE_METHOD_WEIGHTED) {
/* Compute summ of all kernel applied distances by summing the
* colSums of the kernel matrix. */
c = -(double)n; // to scale with sum(K) - n
for (i = 0; i < n; ++i) {
c += colSums[i];
}
// c = -(double)n; // to scale with sum(K) - n
// for (i = 0; i < n; ++i) {
// c += colSums[i];
// }
// TODO: check for division by zero, but should not happen!!!
} else {
c = n; // TODO: move (init) up cause always the same ^^ ...
c = agility / (sum(colSums, n) - (double)n);
}
scale(agility / c, G, p * q); // in-place
scale(c, G, p * q); // in-place
/* Compute Skew-Symmetric matrix `A` used in Cayley transform.
* `A <- tau * (G V^T - V G^T) + 0 * A`*/
skew(p, q, tau, G, V, 0.0, A);
for (epoch = 0; epoch < epochs; ++epoch) {
for (iter = 0; iter < maxIter; ++iter) {
/* Move V allong A */
cayleyTransform(p, q, A, V, V_tau, workMem);
@ -156,12 +155,12 @@ void cve_sub(const int n, const int p, const int q,
if (logger) {
callLogger(logger, loggerEnv,
attempt, epoch + 1,
attempt, iter + 1,
L, n, V, p, q, tau);
}
// Check Break condition.
if (err < tol || epoch + 1 >= epochs) {
if (err < tol || iter + 1 >= maxIter) {
break;
}
@ -179,15 +178,9 @@ void cve_sub(const int n, const int p, const int q,
if (method == CVE_METHOD_WEIGHTED) {
/* Compute summ of all kernel applied distances by summing the
* colSums of the kernel matrix. */
c = -(double)n; // to scale with sum(K) - n
for (i = 0; i < n; ++i) {
c += colSums[i];
}
c = agility / c;
* colSums of the kernel matrix. */
// TODO: check for division by zero, but should not happen!!!
} else {
c = agility / n;
c = agility / (sum(colSums, n) - (double)n);
}
F77_NAME(dgemm)("N", "N", &p, &q, &p,
&c, workMem, &p, V, &p,

View File

@ -31,7 +31,7 @@ void cve_sub(const int n, const int p, const int q,
const double momentum,
const double tau_init, const double tol_init,
const double slack, const double gamma,
const int epochs, int attempts,
const int maxIter, int attempts,
double *V, double *L,
SEXP logger, SEXP loggerEnv);
@ -61,6 +61,8 @@ void rStiefel(const int p, const int q, double *V,
double *workMem, int workLen);
/* MATRIX */
double sum(const double *A, const int nelem);
double norm(const double *A, const int nrow, const int ncol,
const char *type);

View File

@ -20,7 +20,7 @@ SEXP cve(SEXP X, SEXP Y, SEXP k, SEXP h,
SEXP V, // initial
SEXP momentum, SEXP tau, SEXP tol,
SEXP slack, SEXP gamma,
SEXP epochs, SEXP attempts,
SEXP maxIter, SEXP attempts,
SEXP logger, SEXP loggerEnv) {
/* Handle logger parameter, set to NULL pointer if not a function. */
if (!(isFunction(logger) && isEnvironment(loggerEnv))) {
@ -53,7 +53,7 @@ SEXP cve(SEXP X, SEXP Y, SEXP k, SEXP h,
asInteger(method),
asReal(momentum), asReal(tau), asReal(tol),
asReal(slack), asReal(gamma),
asInteger(epochs), asInteger(attempts),
asInteger(maxIter), asInteger(attempts),
REAL(Vout), REAL(Lout),
logger, loggerEnv);

View File

@ -9,7 +9,7 @@ extern SEXP cve(SEXP X, SEXP Y, SEXP k, SEXP h,
SEXP V, // initial
SEXP momentum, SEXP tau, SEXP tol,
SEXP slack, SEXP gamma,
SEXP epochs, SEXP attempts,
SEXP maxIter, SEXP attempts,
SEXP logger, SEXP loggerEnv);
static const R_CallMethodDef CallEntries[] = {

View File

@ -10,6 +10,23 @@
// return newMat;
// }
double sum(const double *A, const int nelem) {
int i, nelemb = (nelem / 4) * 4;
double sum = 0.0;
for (i = 0; i < nelemb; i += 4) {
sum += A[i]
+ A[i + 1]
+ A[i + 2]
+ A[i + 3];
}
for (; i < nelem; ++i) {
sum += A[i];
}
return sum;
}
double norm(const double *A, const int nrow, const int ncol,
const char *type) {
int i, nelem = nrow * ncol;

View File

@ -1 +0,0 @@
GDPv2, GDPv3, MIT ????

100
README.md
View File

@ -10,6 +10,7 @@ Doc:
- [x] Ref paper in doc
- [ ] Data set descriptions and augmentations.
- [x] Demonstration of the `Logger` function usage (Demo file or so, ...)
- [ ] Update Paper (to new version / version consistent with current code!)
Methods to be implemented:
- [x] simple
@ -28,21 +29,56 @@ Features (functions):
- [x] Initial `V.init` parameter (only ONE try, ignore number of `attempts` parameter)
- [x] `basis.cve` list of estimated `B`s (with `k` supplied, only `B`)
- [x] `directions.cve` Projected `X` given `k`
- [ ] `predict.cve` using `mars` for predicting responses given new data.
- [ ] `predict.dim.cve` Cross-validation or `aov` (in stats package) or "elbow" estimation
- [x] `predict.cve` using `mars` for predicting responses given new data.
- [x] `predict.dim.cve` Cross-validation or `aov` (in stats package) or "elbow" estimation
- [x] `plot.elbow`
- [x] `summary`
Changes:
- [-] New `estimate.bandwidth` implementation.
- [x] New `estimate.bandwidth` implementation.
(h = 2 * (tr(\Sigma) / p) * (6/5 * n^(-1 / (4 + k)))^2,
\Sigma = 1/n * (X-mean)'(X-mean))
# Development
## Build and install.
To build the package the `devtools` package is used. This also provides `roxygen2` which is used for documentation and automatic creation of the `NAMESPACE` file.
```R
setwd("./CVE_R") # Set path to the package root.
library(devtools) # Load required `devtools` package.
document() # Create `.Rd` files and write `NAMESPACE`.
```
Next the package needs to be build, therefore (if pure `R` package, aka. `C/C++`, `Fortran`, ... code) just do the following.
```bash
R CMD build CVE_C; R CMD INSTALL CVE_0.2.tar.gz
```
Then we are ready for using the package.
As well as building the `NAMESPACE` and `*.Rd` files using `devtools` (`roxygen2`) the following resembles an entire build pipeline including checks.
```bash
R -q -e 'library(devtools); setwd("CVE_C"); pkgbuild::compile_dll(); document(); pkgbuild::clean_dll()'
R CMD build CVE_C; R CMD check CVE_0.2.tar.gz;
R CMD INSTALL CVE_0.2.tar.gz
```
## Build and install from within `R`.
An alternative approach is the following.
```R
## Installing CVE (C implementation)
(setwd('~/Projects/CVE/CVE_C'))
# equiv to Rcpp::compileAttributes().
library(devtools)
pkgbuild::compile_dll() # required for packages with C/C++ code
document() # See bug: https://github.com/stan-dev/rstantools/issues/52
pkgbuild::clean_dll()
(path <- build(vignettes = FALSE))
install.packages(path, repos = NULL, type = "source")
library(CVE)
```
**Note: I only recommend this approach during development.**
# Package Structure
## Demos
A demo is an `.R` file that lives in `demo/`. Demos are like examples but tend to
be longer. Instead of focussing on a single function, they show how to weave
be longer. Instead of focusing on a single function, they show how to weave
together multiple functions to solve a problem.
You list and access demos with `demo()`:
@ -57,7 +93,7 @@ The demo name is the name of the file without the extension,
e.g. `demo/runtime_test.R` becomes `runtime_test`.
By default the demo ask for human input for each plot: "Hit to see next plot".
This behaviour can be overridden by adding `devAskNewPage(ask = FALSE)` to
This behavior can be overridden by adding `devAskNewPage(ask = FALSE)` to
the demo file. You can add pauses by adding:
`readline("press any key to continue")`.
@ -71,9 +107,9 @@ Using the Linux `grep` program with the parameters `-rnw` and specifying a inclu
```bash
grep --include=*\.{c,h,R} -rnw '.' -e "sweep"
```
searches in all `C` source and header fils as well as `R` source files for the term _sweep_.
searches in all `C` source and header files as well as `R` source files for the term _sweep_.
## Recursive dir. compair with colored sructure (more or less).
## Recursive directory compare with colored structure (more or less).
```bash
diff -r CVE_R/ CVE_C/ | grep -E "^([<>]|[^<>].*)"
```
@ -102,41 +138,9 @@ echo dry_run=$dry_run
echo stack_size=$stack_size
```
# Development
## Build and install.
To build the package the `devtools` package is used. This also provides `roxygen2` which is used for documentation and authomatic creaton of the `NAMESPACE` file.
```R
setwd("./CVE_R") # Set path to the package root.
library(devtools) # Load required `devtools` package.
document() # Create `.Rd` files and write `NAMESPACE`.
```
Next the package needs to be build, therefore (if pure `R` package, aka. `C/C++`, `Fortran`, ... code) just do the following.
```bash
R CMD build CVE_R
R CMD INSTALL CVE_0.1.tar.gz
```
Then we are ready for using the package.
```R
library(CVE)
help(package = "CVE")
```
## Build and install from within `R`.
An alternative approach is the following.
```R
setwd('./CVE_R')
getwd()
library(devtools)
document()
# No vignettes to build but "inst/doc/" is required!
(path <- build(vignettes = FALSE))
install.packages(path, repos = NULL, type = "source")
```
**Note: I only recommend this approach during development.**
# Analysing
# Analysis
## Logging (a `cve` run).
To log `loss`, `error` (estimated) the true error (error of current estimated `B` against the true `B`) or even the stepsize one can use the `logger` parameter. A `logger` is a function that gets the current `environment` of the CVE optimization methods (__do not alter this environment, only read from it__). This can be used to create logs like in the following example.
To log `loss`, `error` (estimated) the true error (error of current estimated `B` against the true `B`) or even the step size one can use the `logger` parameter. A `logger` is a function that gets the current `environment` of the CVE optimization methods (__do not alter this environment, only read from it__). This can be used to create logs like in the following example.
```R
library(CVE)
@ -184,7 +188,7 @@ matplot(true.error.history, type = 'l', log = 'y', xlab = 'iter',
```
## Reading log files.
The runtime tests (upcomming further tests) are creating log files saved in `tmp/`. These log files are `CSV` files (actualy `TSV`) with a header storing the test results. Depending on the test the files may contain differnt data. As an example we use the runtime test logs which store in each line the `dataset`, the used `method` as well as the `error` (actual error of estimated `B` against real `B`) and the `time`. For reading and analysing the data see the following example.
The run-time tests (upcoming further tests) are creating log files saved in `tmp/`. These log files are `CSV` files (actually `TSV`) with a header storing the test results. Depending on the test the files may contain different data. As an example we use the run-time test logs which store in each line the `dataset`, the used `method` as well as the `error` (actual error of estimated `B` against real `B`) and the `time`. For reading and analyzing the data see the following example.
```R
# Load log as `data.frame`
log <- read.csv('tmp/test0.log', sep = '\t')
@ -200,7 +204,7 @@ for (ds.name in paste0('M', seq(5))) {
## Environments and variable lookup.
In the following a view simple examples of how `R` searches for variables.
In addition we manipulate funciton closures to alter the search path in variable lookup and outer scope variable manipulation.
In addition we manipulate function closures to alter the search path in variable lookup and outer scope variable manipulation.
```R
droids <- "These aren't the droids you're looking for."
@ -225,7 +229,7 @@ jedi.seeks()
# [1] "R2-D2", "C-3PO"
```
The next example ilustrates how to write (without local copies) to variables outside the functions local environment.
The next example illustrates how to write (without local copies) to variables outside the functions local environment.
```R
counting <- function() {
count <<- count + 1 # Note the `<<-` assignment.
@ -255,7 +259,7 @@ counting <- function() {
})()
```
Another example for the usage of `do.call` where the evaluation of parameters is illustated (example taken (and altered) from `?do.call`).
Another example for the usage of `do.call` where the evaluation of parameters is illustrated (example taken (and altered) from `?do.call`).
```R
## examples of where objects will be found.
A <- "A.Global"
@ -373,7 +377,7 @@ microbenchmark(
# sweep 1313.177 1522.4010 2355.269 1879.2605 2065.399 18783.24 100
# recycle 719.001 786.1265 1157.285 881.8825 1163.202 19091.79 100
```
### Scaled `crossprod` with matmul order.
### Scaled `crossprod` with matrix multiplication order.
```R
(n <- 200)
(p <- 12)
@ -424,7 +428,7 @@ microbenchmark(
```
## Using `Rprof()` for performance.
The standart method for profiling where an algorithm is spending its time is with `Rprof()`.
The standard method for profiling where an algorithm is spending its time is with `Rprof()`.
```R
path <- '../tmp/R.prof' # path to profiling file
Rprof(path)
@ -432,4 +436,4 @@ cve.res <- cve.call(X, Y, k = k)
Rprof(NULL)
(prof <- summaryRprof(path)) # Summarise results
```
**Note: considure to run `gc()` before measuring**, aka cleaning up by explicitely calling the garbage collector.
**Note: consider to run `gc()` before measuring**, aka cleaning up by explicitly calling the garbage collector.

14
test.R
View File

@ -5,12 +5,12 @@ if (length(args) > 0L) {
} else {
method <- "simple"
}
if (length((args) > 1L)) {
if (length(args) > 1L) {
momentum <- as.double(args[2])
} else {
momentum <- 0.0
}
epochs <- 50L
max.iter <- 50L
attempts <- 25L
# library(CVEpureR)
@ -55,14 +55,14 @@ for (name in paste0("M", seq(5))) {
# Setup histories.
V_last <- NULL
loss.history <- matrix(NA, epochs + 1, attempts)
error.history <- matrix(NA, epochs + 1, attempts)
tau.history <- matrix(NA, epochs + 1, attempts)
true.error.history <- matrix(NA, epochs + 1, attempts)
loss.history <- matrix(NA, max.iter + 1, attempts)
error.history <- matrix(NA, max.iter + 1, attempts)
tau.history <- matrix(NA, max.iter + 1, attempts)
true.error.history <- matrix(NA, max.iter + 1, attempts)
dr <- cve(Y ~ X, k = k, method = method,
momentum = momentum,
epochs = epochs, attempts = attempts,
max.iter = max.iter, attempts = attempts,
logger = logger)
# Plot history's