fix: R CMD check errors and warnings,
change: extracted util functions into seperate files, add: C's sum funciton
This commit is contained in:
parent
063c4d638b
commit
875982a010
|
@ -8,4 +8,5 @@ Maintainer: Daniel Kapla <daniel@kapla.at>
|
||||||
Description: Implementation of the Conditional Variance Estimation (CVE) method.
|
Description: Implementation of the Conditional Variance Estimation (CVE) method.
|
||||||
License: GPL-3
|
License: GPL-3
|
||||||
Encoding: UTF-8
|
Encoding: UTF-8
|
||||||
|
Imports: stats,graphics,mda
|
||||||
RoxygenNote: 6.1.1
|
RoxygenNote: 6.1.1
|
||||||
|
|
|
@ -1,12 +1,11 @@
|
||||||
# Generated by roxygen2: do not edit by hand
|
# Generated by roxygen2: do not edit by hand
|
||||||
|
|
||||||
S3method(basis,cve)
|
S3method(coef,cve)
|
||||||
S3method(directions,cve)
|
S3method(directions,cve)
|
||||||
S3method(plot,cve)
|
S3method(plot,cve)
|
||||||
S3method(predict,cve)
|
S3method(predict,cve)
|
||||||
S3method(predict.dim,cve)
|
S3method(predict.dim,cve)
|
||||||
S3method(summary,cve)
|
S3method(summary,cve)
|
||||||
export(basis)
|
|
||||||
export(cve)
|
export(cve)
|
||||||
export(cve.call)
|
export(cve.call)
|
||||||
export(dataset)
|
export(dataset)
|
||||||
|
@ -14,16 +13,17 @@ export(directions)
|
||||||
export(elem.pairs)
|
export(elem.pairs)
|
||||||
export(estimate.bandwidth)
|
export(estimate.bandwidth)
|
||||||
export(null)
|
export(null)
|
||||||
export(predict.dim)
|
|
||||||
export(projTangentStiefel)
|
export(projTangentStiefel)
|
||||||
export(rStiefel)
|
export(rStiefel)
|
||||||
export(retractStiefel)
|
export(retractStiefel)
|
||||||
export(skew)
|
export(skew)
|
||||||
export(sym)
|
export(sym)
|
||||||
import(stats)
|
import(stats)
|
||||||
|
importFrom(graphics,boxplot)
|
||||||
importFrom(graphics,lines)
|
importFrom(graphics,lines)
|
||||||
importFrom(graphics,plot)
|
importFrom(graphics,plot)
|
||||||
importFrom(graphics,points)
|
importFrom(graphics,points)
|
||||||
|
importFrom(mda,mars)
|
||||||
importFrom(stats,model.frame)
|
importFrom(stats,model.frame)
|
||||||
importFrom(stats,rbinom)
|
importFrom(stats,rbinom)
|
||||||
importFrom(stats,rnorm)
|
importFrom(stats,rnorm)
|
||||||
|
|
235
CVE_C/R/CVE.R
235
CVE_C/R/CVE.R
|
@ -56,26 +56,21 @@
|
||||||
#' \item method: Name of used method,
|
#' \item method: Name of used method,
|
||||||
#' \item call: The method call
|
#' \item call: The method call
|
||||||
#' }
|
#' }
|
||||||
#' as well as indexed entries \code{dr[[k]]} storing the k-dimensional SDR
|
#' as well as indexed entries \code{dr$res[[k]]} storing the k-dimensional SDR
|
||||||
#' projection matrices.
|
#' projection matrices.
|
||||||
#'
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
#' library(CVE)
|
|
||||||
#'
|
|
||||||
#' # create dataset
|
#' # create dataset
|
||||||
#' n <- 200
|
#' x <- matrix(rnorm(400), 100, 4)
|
||||||
#' p <- 12
|
#' y <- x[, 1] + x[, 2] + as.matrix(rnorm(100))
|
||||||
#' X <- matrix(rnorm(n * p), n, p)
|
|
||||||
#' B <- cbind(c(1, rep(0, p - 1)), c(0, 1, rep(0, p - 2)))
|
|
||||||
#' Y <- X %*% B
|
|
||||||
#' Y <- Y[, 1L]^2 + Y[, 2L]^2 + rnorm(n, 0, 0.3)
|
|
||||||
#'
|
#'
|
||||||
#' # Call the CVE method.
|
#' # Call CVE using momentum.
|
||||||
#' dr <- cve(Y ~ X)
|
#' dr.momentum <- cve(y ~ x, momentum = 0.2)
|
||||||
#' (B <- basis(dr, 2))
|
#' # Call weighted CVE.
|
||||||
|
#' dr.weighted <- cve(y ~ x, method = "weighted")
|
||||||
#'
|
#'
|
||||||
#' @seealso For a detailed description of \code{formula} see
|
#' @seealso For a detailed description of \code{formula} see
|
||||||
#' [\code{\link{formula}}].
|
#' \code{\link{formula}}.
|
||||||
#' @export
|
#' @export
|
||||||
cve <- function(formula, data, method = "simple", max.dim = 10L, ...) {
|
cve <- function(formula, data, method = "simple", max.dim = 10L, ...) {
|
||||||
# check for type of `data` if supplied and set default
|
# check for type of `data` if supplied and set default
|
||||||
|
@ -108,10 +103,19 @@ cve <- function(formula, data, method = "simple", max.dim = 10L, ...) {
|
||||||
#' @param max.dim upper bounds for \code{k}, (ignored if \code{k} is supplied).
|
#' @param max.dim upper bounds for \code{k}, (ignored if \code{k} is supplied).
|
||||||
#' @param tau Initial step-size.
|
#' @param tau Initial step-size.
|
||||||
#' @param tol Tolerance for break condition.
|
#' @param tol Tolerance for break condition.
|
||||||
#' @param epochs maximum number of optimization steps.
|
#' @param max.iter maximum number of optimization steps.
|
||||||
#' @param attempts number of arbitrary different starting points.
|
#' @param attempts number of arbitrary different starting points.
|
||||||
#' @param logger a logger function (only for advanced user, significantly slows
|
#' @param logger a logger function (only for advanced user, significantly slows
|
||||||
#' down the computation).
|
#' down the computation).
|
||||||
|
#' @param h bandwidth or function to estimate bandwidth, defaults to internaly
|
||||||
|
#' estimated bandwidth.
|
||||||
|
#' @param momentum number of [0, 1) giving the ration of momentum for eucledian
|
||||||
|
#' gradient update with a momentum term.
|
||||||
|
#' @param slack Positive scaling to allow small increases of the loss while
|
||||||
|
#' optimizing.
|
||||||
|
#' @param gamma step-size reduction multiple.
|
||||||
|
#' @param V.init Semi-orthogonal matrix of dimensions `(ncol(X), ncol(X) - k)` #' as optimization starting value. (If supplied, \code{attempts} is
|
||||||
|
#' set to 1 and \code{k} to match dimension)
|
||||||
#'
|
#'
|
||||||
#' @return dr is a list which contains:
|
#' @return dr is a list which contains:
|
||||||
#' \itemize{
|
#' \itemize{
|
||||||
|
@ -132,7 +136,7 @@ cve.call <- function(X, Y, method = "simple",
|
||||||
momentum = 0.0, tau = 1.0, tol = 1e-3,
|
momentum = 0.0, tau = 1.0, tol = 1e-3,
|
||||||
slack = 0.0, gamma = 0.5,
|
slack = 0.0, gamma = 0.5,
|
||||||
V.init = NULL,
|
V.init = NULL,
|
||||||
epochs = 50L, attempts = 10L,
|
max.iter = 50L, attempts = 10L,
|
||||||
logger = NULL) {
|
logger = NULL) {
|
||||||
# get method bitmask
|
# get method bitmask
|
||||||
methods <- list(
|
methods <- list(
|
||||||
|
@ -230,13 +234,13 @@ cve.call <- function(X, Y, method = "simple",
|
||||||
gamma <- as.double(gamma)
|
gamma <- as.double(gamma)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!is.numeric(epochs) || length(epochs) > 1L) {
|
if (!is.numeric(max.iter) || length(max.iter) > 1L) {
|
||||||
stop("Parameter 'epochs' must be positive integer.")
|
stop("Parameter 'max.iter' must be positive integer.")
|
||||||
} else if (!is.integer(epochs)) {
|
} else if (!is.integer(max.iter)) {
|
||||||
epochs <- as.integer(epochs)
|
max.iter <- as.integer(max.iter)
|
||||||
}
|
}
|
||||||
if (epochs < 1L) {
|
if (max.iter < 1L) {
|
||||||
stop("Parameter 'epochs' must be at least 1L.")
|
stop("Parameter 'max.iter' must be at least 1L.")
|
||||||
}
|
}
|
||||||
|
|
||||||
if (is.null(V.init)) {
|
if (is.null(V.init)) {
|
||||||
|
@ -273,7 +277,7 @@ cve.call <- function(X, Y, method = "simple",
|
||||||
V.init,
|
V.init,
|
||||||
momentum, tau, tol,
|
momentum, tau, tol,
|
||||||
slack, gamma,
|
slack, gamma,
|
||||||
epochs, attempts,
|
max.iter, attempts,
|
||||||
logger, loggerEnv)
|
logger, loggerEnv)
|
||||||
|
|
||||||
dr.k$B <- null(dr.k$V)
|
dr.k$B <- null(dr.k$V)
|
||||||
|
@ -292,190 +296,3 @@ cve.call <- function(X, Y, method = "simple",
|
||||||
class(dr) <- "cve"
|
class(dr) <- "cve"
|
||||||
return(dr)
|
return(dr)
|
||||||
}
|
}
|
||||||
|
|
||||||
#' Loss distribution elbow plot.
|
|
||||||
#'
|
|
||||||
#' Boxplots of the loss from \code{min.dim} to \code{max.dim} \code{k} values.
|
|
||||||
#'
|
|
||||||
#' @param x Object of class \code{"cve"} (result of [\code{\link{cve}}]).
|
|
||||||
#' @param ... Pass through parameters to [\code{\link{plot}}] and
|
|
||||||
#' [\code{\link{lines}}]
|
|
||||||
#'
|
|
||||||
#' @seealso see \code{\link{par}} for graphical parameters to pass through
|
|
||||||
#' as well as \code{\link{plot}}, the standard plot utility.
|
|
||||||
#' @method plot cve
|
|
||||||
#' @importFrom graphics plot lines points
|
|
||||||
#' @export
|
|
||||||
plot.cve <- function(x, ...) {
|
|
||||||
L <- c()
|
|
||||||
k <- c()
|
|
||||||
for (dr.k in x$res) {
|
|
||||||
if (class(dr.k) == 'cve.k') {
|
|
||||||
k <- c(k, as.character(dr.k$k))
|
|
||||||
L <- c(L, dr.k$L)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
L <- matrix(L, ncol = length(k)) / var(x$Y)
|
|
||||||
boxplot(L, main = "elbow plot",
|
|
||||||
xlab = "SDR dimension",
|
|
||||||
ylab = "Sample loss distribution",
|
|
||||||
names = k)
|
|
||||||
}
|
|
||||||
|
|
||||||
#' Prints a summary of a \code{cve} result.
|
|
||||||
#' @param object Instance of 'cve' as returned by \code{cve}.
|
|
||||||
#' @method summary cve
|
|
||||||
#' @export
|
|
||||||
summary.cve <- function(object, ...) {
|
|
||||||
cat('Summary of CVE result - Method: "', object$method, '"\n',
|
|
||||||
'\n',
|
|
||||||
'Dataset size: ', nrow(object$X), '\n',
|
|
||||||
'Data Dimension: ', ncol(object$X), '\n',
|
|
||||||
# 'SDR Dimension: ', object$k, '\n',
|
|
||||||
# 'loss: ', object$loss, '\n',
|
|
||||||
'\n',
|
|
||||||
'Called via:\n',
|
|
||||||
' ',
|
|
||||||
sep='')
|
|
||||||
print(object$call)
|
|
||||||
|
|
||||||
L <- c()
|
|
||||||
k <- c()
|
|
||||||
for (dr.k in object$res) {
|
|
||||||
if (class(dr.k) == 'cve.k') {
|
|
||||||
k <- c(k, as.character(dr.k$k))
|
|
||||||
L <- c(L, dr.k$L)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
L <- matrix(L, ncol = length(k))
|
|
||||||
S <- apply(L, 2, summary)
|
|
||||||
colnames(S) <- k
|
|
||||||
cat('\n')
|
|
||||||
print(S)
|
|
||||||
}
|
|
||||||
|
|
||||||
#' @export
|
|
||||||
directions <- function(dr, k) {
|
|
||||||
UseMethod("directions")
|
|
||||||
}
|
|
||||||
|
|
||||||
#' Computes projected training data \code{X} for given dimension `k`.
|
|
||||||
#'
|
|
||||||
#' @param dr Instance of 'cve' as returned by \code{cve}.
|
|
||||||
#' @param k SDR dimension to use for projection.
|
|
||||||
#'
|
|
||||||
#' @method directions cve
|
|
||||||
#' @aliases directions directions.cve
|
|
||||||
#' @export
|
|
||||||
directions.cve <- function(dr, k) {
|
|
||||||
if (!(k %in% names(dr$res))) {
|
|
||||||
stop("SDR directions for requested dimension `k` not computed.")
|
|
||||||
}
|
|
||||||
return(dr$X %*% dr$res[[as.character(k)]]$B)
|
|
||||||
}
|
|
||||||
|
|
||||||
#' @export
|
|
||||||
basis <- function(dr, k) {
|
|
||||||
UseMethod("basis")
|
|
||||||
}
|
|
||||||
|
|
||||||
#' Gets estimated SDR basis.
|
|
||||||
#'
|
|
||||||
#' @param dr Instance of 'cve' as returned by \code{cve}.
|
|
||||||
#' @param k SDR dimension of requested basis, if not given a list of all
|
|
||||||
#' computed basis is returned.
|
|
||||||
#'
|
|
||||||
#' @return List of basis matrices, or the SDR basis for supplied dimension `k`.
|
|
||||||
#'
|
|
||||||
#' @method basis cve
|
|
||||||
#' @aliases basis basis.cve
|
|
||||||
#' @export
|
|
||||||
basis.cve <- function(dr, k) {
|
|
||||||
if (missing(k)) {
|
|
||||||
Bs <- list()
|
|
||||||
for (k in names(dr$res)) {
|
|
||||||
Bs[[k]] <- dr$res[[k]]$B
|
|
||||||
}
|
|
||||||
return(Bs)
|
|
||||||
} else if (k %in% names(dr$res)) {
|
|
||||||
return(dr$res[[as.character(k)]]$B)
|
|
||||||
} else {
|
|
||||||
stop("Requested dimenion `k` not computed.")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
#' Predict method for CVE Fits.
|
|
||||||
#'
|
|
||||||
#' Predict responces using reduced data with \code{\link{mars}}.
|
|
||||||
#'
|
|
||||||
#' @param object instance of class \code{cve} (result of \code{cve},
|
|
||||||
#' \code{cve.call}).
|
|
||||||
#' @param X.new Matrix of the new data to be predicted.
|
|
||||||
#' @param dim dimension of SDR space to be used for data projecition.
|
|
||||||
#' @param ... further arguments passed to \code{\link{mars}}.
|
|
||||||
#'
|
|
||||||
#' @return prediced response of data \code{X.new}.
|
|
||||||
#'
|
|
||||||
#' @seealso \code{\link{cve}}, \code{\link{cve.call}} or \pkg{\link{mars}}.
|
|
||||||
#'
|
|
||||||
#' @examples
|
|
||||||
#' TODO:
|
|
||||||
#'
|
|
||||||
#' @aliases predict.cve
|
|
||||||
#' @rdname predict.cve
|
|
||||||
#'
|
|
||||||
#' @method predict cve
|
|
||||||
#' @export
|
|
||||||
predict.cve <- function(object, X.new, dim = NULL, ...) {
|
|
||||||
library(mda)
|
|
||||||
|
|
||||||
if (!is.matrix(X.new)) {
|
|
||||||
X.new <- matrix(X.new, nrow = 1L)
|
|
||||||
}
|
|
||||||
|
|
||||||
B <- dr$res[[as.character(dim)]]$B
|
|
||||||
|
|
||||||
model <- mars(object$X %*% B, object$Y)
|
|
||||||
predict(model, X.new %*% B)
|
|
||||||
}
|
|
||||||
|
|
||||||
#' @export
|
|
||||||
predict.dim <- function(dr) {
|
|
||||||
UseMethod("predict.dim")
|
|
||||||
}
|
|
||||||
|
|
||||||
#' @method predict.dim cve
|
|
||||||
#' @export
|
|
||||||
predict.dim.cve <- function(dr) {
|
|
||||||
library(mda)
|
|
||||||
|
|
||||||
# Get centered training data and dimensions
|
|
||||||
X <- scale(dr$X, center = TRUE, scale = FALSE)
|
|
||||||
n <- nrow(dr$X) # umber of training data samples
|
|
||||||
Sigma <- (1 / n) * crossprod(X, X)
|
|
||||||
eig <- eigen(Sigma)
|
|
||||||
Sigma_root <- eig$vectors %*% tcrossprod(diag(sqrt(eig$values)), eig$vectors)
|
|
||||||
X <- X %*% solve(Sigma_root)
|
|
||||||
|
|
||||||
pred <- matrix(0, n, length(dr$res))
|
|
||||||
colnames(pred) <- names(dr$res)
|
|
||||||
for (dr.k in dr$res) {
|
|
||||||
# get "name" of current dimension
|
|
||||||
k <- as.character(dr.k$k)
|
|
||||||
# Project dataset with current SDR basis
|
|
||||||
X.proj <- X %*% dr.k$B
|
|
||||||
|
|
||||||
for (i in 1:n) {
|
|
||||||
model <- mars(X.proj[-i, ], dr$Y[-i])
|
|
||||||
pred[i, k] <- predict(model, X.proj[i, , drop = F])
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
MSE <- colMeans((pred - dr$Y)^2)
|
|
||||||
|
|
||||||
return(list(
|
|
||||||
MSE = MSE,
|
|
||||||
k = as.integer(names(which.min(MSE)))
|
|
||||||
))
|
|
||||||
}
|
|
||||||
|
|
|
@ -0,0 +1,33 @@
|
||||||
|
#' Gets estimated SDR basis.
|
||||||
|
#'
|
||||||
|
#' Returns the SDR basis matrix for SDR dimension(s).
|
||||||
|
#' @param object instance of \code{cve} as output from \code{\link{cve}} or
|
||||||
|
#' \code{\link{cve.call}}
|
||||||
|
#' @param k the SDR dimension.
|
||||||
|
#' @param ... ignored.
|
||||||
|
#'
|
||||||
|
#' @return dir the matrix of CS or CMS of given dimension
|
||||||
|
#'
|
||||||
|
#' @examples
|
||||||
|
#' x <- matrix(rnorm(400),100,4)
|
||||||
|
#' y <- x[, 1] + x[, 2] + as.matrix(rnorm(100))
|
||||||
|
#' dr <- cve(y ~ x, k = 2) # Only for sub-space dim. 2
|
||||||
|
#' B2 <- coef(dr, 2)
|
||||||
|
#'
|
||||||
|
#' @method coef cve
|
||||||
|
#' @aliases coef.cve
|
||||||
|
#' @rdname coef.cve
|
||||||
|
#' @export
|
||||||
|
coef.cve <- function(object, k, ...) {
|
||||||
|
if (missing(k)) {
|
||||||
|
Bs <- list()
|
||||||
|
for (k in names(object$res)) {
|
||||||
|
Bs[[k]] <- object$res[[k]]$B
|
||||||
|
}
|
||||||
|
return(Bs)
|
||||||
|
} else if (k %in% names(object$res)) {
|
||||||
|
return(object$res[[as.character(k)]]$B)
|
||||||
|
} else {
|
||||||
|
stop("Requested dimension `k` not computed.")
|
||||||
|
}
|
||||||
|
}
|
|
@ -7,6 +7,7 @@
|
||||||
#'
|
#'
|
||||||
#' @param name One of \code{"M1"}, \code{"M2"}, \code{"M3"}, \code{"M4"} or \code{"M5"}
|
#' @param name One of \code{"M1"}, \code{"M2"}, \code{"M3"}, \code{"M4"} or \code{"M5"}
|
||||||
#' @param n nr samples
|
#' @param n nr samples
|
||||||
|
#' @param B SDR basis used for dataset creation if supplied.
|
||||||
#' @param p Dim. of random variable \code{X}.
|
#' @param p Dim. of random variable \code{X}.
|
||||||
#' @param p.mix Only for \code{"M4"}, see: below.
|
#' @param p.mix Only for \code{"M4"}, see: below.
|
||||||
#' @param lambda Only for \code{"M4"}, see: below.
|
#' @param lambda Only for \code{"M4"}, see: below.
|
||||||
|
@ -64,11 +65,11 @@ dataset <- function(name = "M1", n, B, p.mix = 0.3, lambda = 1.0) {
|
||||||
B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, 6)), 12, 1)
|
B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, 6)), 12, 1)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
p <- dim(B)[1]
|
p <- nrow(B)
|
||||||
# validate col. nr to match dataset `k = dim(B)[2]`
|
# validate col. nr to match dataset `k = ncol(B)`
|
||||||
stopifnot(
|
stopifnot(
|
||||||
name %in% c("M1", "M2") && dim(B)[2] == 2,
|
name %in% c("M1", "M2") && ncol(B) == 2,
|
||||||
name %in% c("M3", "M4", "M5") && dim(B)[2] == 1
|
name %in% c("M3", "M4", "M5") && ncol(B) == 1
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,19 @@
|
||||||
|
#' @export
|
||||||
|
directions <- function(dr, k) {
|
||||||
|
UseMethod("directions")
|
||||||
|
}
|
||||||
|
|
||||||
|
#' Computes projected training data \code{X} for given dimension `k`.
|
||||||
|
#'
|
||||||
|
#' @param dr Instance of 'cve' as returned by \code{cve}.
|
||||||
|
#' @param k SDR dimension to use for projection.
|
||||||
|
#'
|
||||||
|
#' @method directions cve
|
||||||
|
#' @aliases directions directions.cve
|
||||||
|
#' @export
|
||||||
|
directions.cve <- function(dr, k) {
|
||||||
|
if (!(k %in% names(dr$res))) {
|
||||||
|
stop("SDR directions for requested dimension `k` not computed.")
|
||||||
|
}
|
||||||
|
return(dr$X %*% dr$res[[as.character(k)]]$B)
|
||||||
|
}
|
|
@ -0,0 +1,28 @@
|
||||||
|
#' Loss distribution elbow plot.
|
||||||
|
#'
|
||||||
|
#' Boxplots of the loss from \code{min.dim} to \code{max.dim} \code{k} values.
|
||||||
|
#'
|
||||||
|
#' @param x Object of class \code{"cve"} (result of [\code{\link{cve}}]).
|
||||||
|
#' @param ... Pass through parameters to [\code{\link{plot}}] and
|
||||||
|
#' [\code{\link{lines}}]
|
||||||
|
#'
|
||||||
|
#' @seealso see \code{\link{par}} for graphical parameters to pass through
|
||||||
|
#' as well as \code{\link{plot}}, the standard plot utility.
|
||||||
|
#' @method plot cve
|
||||||
|
#' @importFrom graphics plot lines points boxplot
|
||||||
|
#' @export
|
||||||
|
plot.cve <- function(x, ...) {
|
||||||
|
L <- c()
|
||||||
|
k <- c()
|
||||||
|
for (dr.k in x$res) {
|
||||||
|
if (class(dr.k) == 'cve.k') {
|
||||||
|
k <- c(k, as.character(dr.k$k))
|
||||||
|
L <- c(L, dr.k$L)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
L <- matrix(L, ncol = length(k)) / var(x$Y)
|
||||||
|
boxplot(L, main = "elbow plot",
|
||||||
|
xlab = "SDR dimension",
|
||||||
|
ylab = "Sample loss distribution",
|
||||||
|
names = k)
|
||||||
|
}
|
|
@ -0,0 +1,36 @@
|
||||||
|
#' Predict method for CVE Fits.
|
||||||
|
#'
|
||||||
|
#' Predict responces using reduced data with \code{\link{mars}}.
|
||||||
|
#'
|
||||||
|
#' @param object instance of class \code{cve} (result of \code{cve},
|
||||||
|
#' \code{cve.call}).
|
||||||
|
#' @param newdata Matrix of the new data to be predicted.
|
||||||
|
#' @param dim dimension of SDR space to be used for data projecition.
|
||||||
|
#' @param ... further arguments passed to \code{\link{mars}}.
|
||||||
|
#'
|
||||||
|
#' @return prediced response of data \code{newdata}.
|
||||||
|
#'
|
||||||
|
#' @seealso \code{\link{cve}}, \code{\link{cve.call}} or \pkg{\link{mars}}.
|
||||||
|
#'
|
||||||
|
#' @rdname predict.cve
|
||||||
|
#'
|
||||||
|
#' @importFrom mda mars
|
||||||
|
#' @method predict cve
|
||||||
|
#' @export
|
||||||
|
predict.cve <- function(object, newdata, dim, ...) {
|
||||||
|
if (missing(newdata)) {
|
||||||
|
stop("No data supplied.")
|
||||||
|
}
|
||||||
|
if (missing(dim)) {
|
||||||
|
stop("No dimension supplied.")
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!is.matrix(newdata)) {
|
||||||
|
newdata <- matrix(newdata, nrow = 1L)
|
||||||
|
}
|
||||||
|
|
||||||
|
B <- object$res[[as.character(dim)]]$B
|
||||||
|
|
||||||
|
model <- mda::mars(object$X %*% B, object$Y)
|
||||||
|
predict(model, newdata %*% B)
|
||||||
|
}
|
|
@ -0,0 +1,45 @@
|
||||||
|
#' @rdname predict.dim.cve
|
||||||
|
#' @method predict.dim cve
|
||||||
|
#' @alias predict.dim.cve
|
||||||
|
#' @export
|
||||||
|
predict.dim <- function(object, ...) {
|
||||||
|
UseMethod("predict.dim")
|
||||||
|
}
|
||||||
|
|
||||||
|
#' Predicts SDR dimension using \code{\link[mda]{mars}} via a Cross-Validation.
|
||||||
|
#'
|
||||||
|
#' @param object instance of class \code{cve} (result of \code{cve},
|
||||||
|
#' \code{cve.call}).
|
||||||
|
#' @param ... ignored.
|
||||||
|
#' @method predict.dim cve
|
||||||
|
#' @export
|
||||||
|
predict.dim.cve <- function(object, ...) {
|
||||||
|
# Get centered training data and dimensions
|
||||||
|
X <- scale(object$X, center = TRUE, scale = FALSE)
|
||||||
|
n <- nrow(object$X) # umber of training data samples
|
||||||
|
Sigma <- (1 / n) * crossprod(X, X)
|
||||||
|
eig <- eigen(Sigma)
|
||||||
|
Sigma_root <- eig$vectors %*% tcrossprod(diag(sqrt(eig$values)), eig$vectors)
|
||||||
|
X <- X %*% solve(Sigma_root)
|
||||||
|
|
||||||
|
pred <- matrix(0, n, length(object$res))
|
||||||
|
colnames(pred) <- names(object$res)
|
||||||
|
for (dr.k in object$res) {
|
||||||
|
# get "name" of current dimension
|
||||||
|
k <- as.character(dr.k$k)
|
||||||
|
# Project dataset with current SDR basis
|
||||||
|
X.proj <- X %*% dr.k$B
|
||||||
|
|
||||||
|
for (i in 1:n) {
|
||||||
|
model <- mda::mars(X.proj[-i, ], object$Y[-i])
|
||||||
|
pred[i, k] <- predict(model, X.proj[i, , drop = F])
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
MSE <- colMeans((pred - object$Y)^2)
|
||||||
|
|
||||||
|
return(list(
|
||||||
|
MSE = MSE,
|
||||||
|
k = as.integer(names(which.min(MSE)))
|
||||||
|
))
|
||||||
|
}
|
|
@ -0,0 +1,32 @@
|
||||||
|
#' Prints a summary of a \code{cve} result.
|
||||||
|
#' @param object Instance of 'cve' as returned by \code{cve}.
|
||||||
|
#' @param ... ignored.
|
||||||
|
#' @method summary cve
|
||||||
|
#' @export
|
||||||
|
summary.cve <- function(object, ...) {
|
||||||
|
cat('Summary of CVE result - Method: "', object$method, '"\n',
|
||||||
|
'\n',
|
||||||
|
'Dataset size: ', nrow(object$X), '\n',
|
||||||
|
'Data Dimension: ', ncol(object$X), '\n',
|
||||||
|
# 'SDR Dimension: ', object$k, '\n',
|
||||||
|
# 'loss: ', object$loss, '\n',
|
||||||
|
'\n',
|
||||||
|
'Called via:\n',
|
||||||
|
' ',
|
||||||
|
sep='')
|
||||||
|
print(object$call)
|
||||||
|
|
||||||
|
L <- c()
|
||||||
|
k <- c()
|
||||||
|
for (dr.k in object$res) {
|
||||||
|
if (class(dr.k) == 'cve.k') {
|
||||||
|
k <- c(k, as.character(dr.k$k))
|
||||||
|
L <- c(L, dr.k$L)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
L <- matrix(L, ncol = length(k))
|
||||||
|
S <- apply(L, 2, summary)
|
||||||
|
colnames(S) <- k
|
||||||
|
cat('\n')
|
||||||
|
print(S)
|
||||||
|
}
|
|
@ -1,21 +0,0 @@
|
||||||
% Generated by roxygen2: do not edit by hand
|
|
||||||
% Please edit documentation in R/CVE.R
|
|
||||||
\name{basis.cve}
|
|
||||||
\alias{basis.cve}
|
|
||||||
\alias{basis}
|
|
||||||
\title{Gets estimated SDR basis.}
|
|
||||||
\usage{
|
|
||||||
\method{basis}{cve}(dr, k)
|
|
||||||
}
|
|
||||||
\arguments{
|
|
||||||
\item{dr}{Instance of 'cve' as returned by \code{cve}.}
|
|
||||||
|
|
||||||
\item{k}{SDR dimension of requested basis, if not given a list of all
|
|
||||||
computed basis is returned.}
|
|
||||||
}
|
|
||||||
\value{
|
|
||||||
List of basis matrices, or the SDR basis for supplied dimension `k`.
|
|
||||||
}
|
|
||||||
\description{
|
|
||||||
Gets estimated SDR basis.
|
|
||||||
}
|
|
|
@ -0,0 +1,29 @@
|
||||||
|
% Generated by roxygen2: do not edit by hand
|
||||||
|
% Please edit documentation in R/coef.R
|
||||||
|
\name{coef.cve}
|
||||||
|
\alias{coef.cve}
|
||||||
|
\title{Gets estimated SDR basis.}
|
||||||
|
\usage{
|
||||||
|
\method{coef}{cve}(object, k, ...)
|
||||||
|
}
|
||||||
|
\arguments{
|
||||||
|
\item{object}{instance of \code{cve} as output from \code{\link{cve}} or
|
||||||
|
\code{\link{cve.call}}}
|
||||||
|
|
||||||
|
\item{k}{the SDR dimension.}
|
||||||
|
|
||||||
|
\item{...}{ignored.}
|
||||||
|
}
|
||||||
|
\value{
|
||||||
|
dir the matrix of CS or CMS of given dimension
|
||||||
|
}
|
||||||
|
\description{
|
||||||
|
Returns the SDR basis matrix for SDR dimension(s).
|
||||||
|
}
|
||||||
|
\examples{
|
||||||
|
x <- matrix(rnorm(400),100,4)
|
||||||
|
y <- x[, 1] + x[, 2] + as.matrix(rnorm(100))
|
||||||
|
dr <- cve(y ~ x, k = 2) # Only for sub-space dim. 2
|
||||||
|
B2 <- coef(dr, 2)
|
||||||
|
|
||||||
|
}
|
|
@ -10,7 +10,7 @@ cve(formula, data, method = "simple", max.dim = 10L, ...)
|
||||||
cve.call(X, Y, method = "simple", nObs = sqrt(nrow(X)), h = NULL,
|
cve.call(X, Y, method = "simple", nObs = sqrt(nrow(X)), h = NULL,
|
||||||
min.dim = 1L, max.dim = 10L, k = NULL, momentum = 0, tau = 1,
|
min.dim = 1L, max.dim = 10L, k = NULL, momentum = 0, tau = 1,
|
||||||
tol = 0.001, slack = 0, gamma = 0.5, V.init = NULL,
|
tol = 0.001, slack = 0, gamma = 0.5, V.init = NULL,
|
||||||
epochs = 50L, attempts = 10L, logger = NULL)
|
max.iter = 50L, attempts = 10L, logger = NULL)
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
\item{formula}{an object of class \code{"formula"} which is a symbolic
|
\item{formula}{an object of class \code{"formula"} which is a symbolic
|
||||||
|
@ -37,16 +37,30 @@ supplied.}
|
||||||
\item{nObs}{parameter for choosing bandwidth \code{h} using
|
\item{nObs}{parameter for choosing bandwidth \code{h} using
|
||||||
\code{\link{estimate.bandwidth}} (ignored if \code{h} is supplied).}
|
\code{\link{estimate.bandwidth}} (ignored if \code{h} is supplied).}
|
||||||
|
|
||||||
|
\item{h}{bandwidth or function to estimate bandwidth, defaults to internaly
|
||||||
|
estimated bandwidth.}
|
||||||
|
|
||||||
\item{min.dim}{lower bounds for \code{k}, (ignored if \code{k} is supplied).}
|
\item{min.dim}{lower bounds for \code{k}, (ignored if \code{k} is supplied).}
|
||||||
|
|
||||||
\item{k}{Dimension of lower dimensional projection, if \code{k} is given
|
\item{k}{Dimension of lower dimensional projection, if \code{k} is given
|
||||||
only the specified dimension \code{B} matrix is estimated.}
|
only the specified dimension \code{B} matrix is estimated.}
|
||||||
|
|
||||||
|
\item{momentum}{number of [0, 1) giving the ration of momentum for eucledian
|
||||||
|
gradient update with a momentum term.}
|
||||||
|
|
||||||
\item{tau}{Initial step-size.}
|
\item{tau}{Initial step-size.}
|
||||||
|
|
||||||
\item{tol}{Tolerance for break condition.}
|
\item{tol}{Tolerance for break condition.}
|
||||||
|
|
||||||
\item{epochs}{maximum number of optimization steps.}
|
\item{slack}{Positive scaling to allow small increases of the loss while
|
||||||
|
optimizing.}
|
||||||
|
|
||||||
|
\item{gamma}{step-size reduction multiple.}
|
||||||
|
|
||||||
|
\item{V.init}{Semi-orthogonal matrix of dimensions `(ncol(X), ncol(X) - k)` #' as optimization starting value. (If supplied, \code{attempts} is
|
||||||
|
set to 1 and \code{k} to match dimension)}
|
||||||
|
|
||||||
|
\item{max.iter}{maximum number of optimization steps.}
|
||||||
|
|
||||||
\item{attempts}{number of arbitrary different starting points.}
|
\item{attempts}{number of arbitrary different starting points.}
|
||||||
|
|
||||||
|
@ -61,7 +75,7 @@ dr is a S3 object of class \code{cve} with named properties:
|
||||||
\item method: Name of used method,
|
\item method: Name of used method,
|
||||||
\item call: The method call
|
\item call: The method call
|
||||||
}
|
}
|
||||||
as well as indexed entries \code{dr[[k]]} storing the k-dimensional SDR
|
as well as indexed entries \code{dr$res[[k]]} storing the k-dimensional SDR
|
||||||
projection matrices.
|
projection matrices.
|
||||||
|
|
||||||
dr is a list which contains:
|
dr is a list which contains:
|
||||||
|
@ -79,22 +93,17 @@ dr is a list which contains:
|
||||||
TODO: reuse of package description and details!!!!
|
TODO: reuse of package description and details!!!!
|
||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
library(CVE)
|
|
||||||
|
|
||||||
# create dataset
|
# create dataset
|
||||||
n <- 200
|
x <- matrix(rnorm(400), 100, 4)
|
||||||
p <- 12
|
y <- x[, 1] + x[, 2] + as.matrix(rnorm(100))
|
||||||
X <- matrix(rnorm(n * p), n, p)
|
|
||||||
B <- cbind(c(1, rep(0, p - 1)), c(0, 1, rep(0, p - 2)))
|
|
||||||
Y <- X \%*\% B
|
|
||||||
Y <- Y[, 1L]^2 + Y[, 2L]^2 + rnorm(n, 0, 0.3)
|
|
||||||
|
|
||||||
# Call the CVE method.
|
# Call CVE using momentum.
|
||||||
dr <- cve(Y ~ X)
|
dr.momentum <- cve(y ~ x, momentum = 0.2)
|
||||||
(B <- basis(dr, 2))
|
# Call weighted CVE.
|
||||||
|
dr.weighted <- cve(y ~ x, method = "weighted")
|
||||||
|
|
||||||
}
|
}
|
||||||
\seealso{
|
\seealso{
|
||||||
For a detailed description of \code{formula} see
|
For a detailed description of \code{formula} see
|
||||||
[\code{\link{formula}}].
|
\code{\link{formula}}.
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,6 +11,8 @@ dataset(name = "M1", n, B, p.mix = 0.3, lambda = 1)
|
||||||
|
|
||||||
\item{n}{nr samples}
|
\item{n}{nr samples}
|
||||||
|
|
||||||
|
\item{B}{SDR basis used for dataset creation if supplied.}
|
||||||
|
|
||||||
\item{p.mix}{Only for \code{"M4"}, see: below.}
|
\item{p.mix}{Only for \code{"M4"}, see: below.}
|
||||||
|
|
||||||
\item{lambda}{Only for \code{"M4"}, see: below.}
|
\item{lambda}{Only for \code{"M4"}, see: below.}
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
% Generated by roxygen2: do not edit by hand
|
% Generated by roxygen2: do not edit by hand
|
||||||
% Please edit documentation in R/CVE.R
|
% Please edit documentation in R/directions.R
|
||||||
\name{directions.cve}
|
\name{directions.cve}
|
||||||
\alias{directions.cve}
|
\alias{directions.cve}
|
||||||
\alias{directions}
|
\alias{directions}
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
% Generated by roxygen2: do not edit by hand
|
% Generated by roxygen2: do not edit by hand
|
||||||
% Please edit documentation in R/CVE.R
|
% Please edit documentation in R/plot.R
|
||||||
\name{plot.cve}
|
\name{plot.cve}
|
||||||
\alias{plot.cve}
|
\alias{plot.cve}
|
||||||
\title{Loss distribution elbow plot.}
|
\title{Loss distribution elbow plot.}
|
||||||
|
|
|
@ -1,30 +1,26 @@
|
||||||
% Generated by roxygen2: do not edit by hand
|
% Generated by roxygen2: do not edit by hand
|
||||||
% Please edit documentation in R/CVE.R
|
% Please edit documentation in R/predict.R
|
||||||
\name{predict.cve}
|
\name{predict.cve}
|
||||||
\alias{predict.cve}
|
\alias{predict.cve}
|
||||||
\title{Predict method for CVE Fits.}
|
\title{Predict method for CVE Fits.}
|
||||||
\usage{
|
\usage{
|
||||||
\method{predict}{cve}(object, X.new, dim = NULL, ...)
|
\method{predict}{cve}(object, newdata, dim, ...)
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
\item{object}{instance of class \code{cve} (result of \code{cve},
|
\item{object}{instance of class \code{cve} (result of \code{cve},
|
||||||
\code{cve.call}).}
|
\code{cve.call}).}
|
||||||
|
|
||||||
\item{X.new}{Matrix of the new data to be predicted.}
|
\item{newdata}{Matrix of the new data to be predicted.}
|
||||||
|
|
||||||
\item{dim}{dimension of SDR space to be used for data projecition.}
|
\item{dim}{dimension of SDR space to be used for data projecition.}
|
||||||
|
|
||||||
\item{...}{further arguments passed to \code{\link{mars}}.}
|
\item{...}{further arguments passed to \code{\link{mars}}.}
|
||||||
}
|
}
|
||||||
\value{
|
\value{
|
||||||
prediced response of data \code{X.new}.
|
prediced response of data \code{newdata}.
|
||||||
}
|
}
|
||||||
\description{
|
\description{
|
||||||
Predict responces using reduced data with \code{\link{mars}}.
|
Predict responces using reduced data with \code{\link{mars}}.
|
||||||
}
|
|
||||||
\examples{
|
|
||||||
TODO:
|
|
||||||
|
|
||||||
}
|
}
|
||||||
\seealso{
|
\seealso{
|
||||||
\code{\link{cve}}, \code{\link{cve.call}} or \pkg{\link{mars}}.
|
\code{\link{cve}}, \code{\link{cve.call}} or \pkg{\link{mars}}.
|
||||||
|
|
|
@ -0,0 +1,20 @@
|
||||||
|
% Generated by roxygen2: do not edit by hand
|
||||||
|
% Please edit documentation in R/predict_dim.R
|
||||||
|
\name{predict.dim}
|
||||||
|
\alias{predict.dim}
|
||||||
|
\alias{predict.dim.cve}
|
||||||
|
\title{Predicts SDR dimension using \code{\link[mda]{mars}} via a Cross-Validation.}
|
||||||
|
\usage{
|
||||||
|
\method{predict.dim}{cve}(object, ...)
|
||||||
|
|
||||||
|
\method{predict.dim}{cve}(object, ...)
|
||||||
|
}
|
||||||
|
\arguments{
|
||||||
|
\item{object}{instance of class \code{cve} (result of \code{cve},
|
||||||
|
\code{cve.call}).}
|
||||||
|
|
||||||
|
\item{...}{ignored.}
|
||||||
|
}
|
||||||
|
\description{
|
||||||
|
Predicts SDR dimension using \code{\link[mda]{mars}} via a Cross-Validation.
|
||||||
|
}
|
|
@ -1,5 +1,5 @@
|
||||||
% Generated by roxygen2: do not edit by hand
|
% Generated by roxygen2: do not edit by hand
|
||||||
% Please edit documentation in R/CVE.R
|
% Please edit documentation in R/summary.R
|
||||||
\name{summary.cve}
|
\name{summary.cve}
|
||||||
\alias{summary.cve}
|
\alias{summary.cve}
|
||||||
\title{Prints a summary of a \code{cve} result.}
|
\title{Prints a summary of a \code{cve} result.}
|
||||||
|
@ -8,6 +8,8 @@
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
\item{object}{Instance of 'cve' as returned by \code{cve}.}
|
\item{object}{Instance of 'cve' as returned by \code{cve}.}
|
||||||
|
|
||||||
|
\item{...}{ignored.}
|
||||||
}
|
}
|
||||||
\description{
|
\description{
|
||||||
Prints a summary of a \code{cve} result.
|
Prints a summary of a \code{cve} result.
|
||||||
|
|
|
@ -1,3 +1,7 @@
|
||||||
|
# # For OpenMP support.
|
||||||
|
# # Turned OFF for supporting all platforms
|
||||||
|
# PKG_CFLAGS = $(SHLIB_OPENMP_CFLAGS)
|
||||||
|
# PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) $(SHLIB_OPENMP_CFLAGS)
|
||||||
|
|
||||||
SHLIB_OPENMP_CFLAGS=
|
SHLIB_OPENMP_CFLAGS=
|
||||||
SHLIB_OPENMP_CXXFLAGS=
|
SHLIB_OPENMP_CXXFLAGS=
|
||||||
|
|
|
@ -11,16 +11,16 @@ void cve_sub(const int n, const int p, const int q,
|
||||||
const double momentum,
|
const double momentum,
|
||||||
const double tau_init, const double tol_init,
|
const double tau_init, const double tol_init,
|
||||||
const double slack, const double gamma,
|
const double slack, const double gamma,
|
||||||
const int epochs, const int attempts,
|
const int maxIter, const int attempts,
|
||||||
double *V, double *L,
|
double *V, double *L,
|
||||||
SEXP logger, SEXP loggerEnv) {
|
SEXP logger, SEXP loggerEnv) {
|
||||||
|
|
||||||
int attempt = 0, epoch, i, nn = (n * (n - 1)) / 2;
|
int attempt = 0, iter, i, nn = (n * (n - 1)) / 2;
|
||||||
double loss, loss_last, loss_best, err, tau;
|
double loss, loss_last, loss_best, err, tau;
|
||||||
double tol = tol_init * sqrt((double)(2 * q));
|
double tol = tol_init * sqrt((double)(2 * q));
|
||||||
double gKscale = -0.5 / h;
|
double gKscale = -0.5 / h;
|
||||||
double agility = -2.0 * (1.0 - momentum) / (h * h);
|
double agility = -2.0 * (1.0 - momentum) / (h * h);
|
||||||
double c;
|
double c = agility / (double)n;
|
||||||
|
|
||||||
/* Create further intermediate or internal variables. */
|
/* Create further intermediate or internal variables. */
|
||||||
double *Q = (double*)R_alloc(p * p, sizeof(double));
|
double *Q = (double*)R_alloc(p * p, sizeof(double));
|
||||||
|
@ -100,21 +100,20 @@ void cve_sub(const int n, const int p, const int q,
|
||||||
if (method == CVE_METHOD_WEIGHTED) {
|
if (method == CVE_METHOD_WEIGHTED) {
|
||||||
/* Compute summ of all kernel applied distances by summing the
|
/* Compute summ of all kernel applied distances by summing the
|
||||||
* colSums of the kernel matrix. */
|
* colSums of the kernel matrix. */
|
||||||
c = -(double)n; // to scale with sum(K) - n
|
// c = -(double)n; // to scale with sum(K) - n
|
||||||
for (i = 0; i < n; ++i) {
|
// for (i = 0; i < n; ++i) {
|
||||||
c += colSums[i];
|
// c += colSums[i];
|
||||||
}
|
// }
|
||||||
// TODO: check for division by zero, but should not happen!!!
|
// TODO: check for division by zero, but should not happen!!!
|
||||||
} else {
|
c = agility / (sum(colSums, n) - (double)n);
|
||||||
c = n; // TODO: move (init) up cause always the same ^^ ...
|
|
||||||
}
|
}
|
||||||
scale(agility / c, G, p * q); // in-place
|
scale(c, G, p * q); // in-place
|
||||||
|
|
||||||
/* Compute Skew-Symmetric matrix `A` used in Cayley transform.
|
/* Compute Skew-Symmetric matrix `A` used in Cayley transform.
|
||||||
* `A <- tau * (G V^T - V G^T) + 0 * A`*/
|
* `A <- tau * (G V^T - V G^T) + 0 * A`*/
|
||||||
skew(p, q, tau, G, V, 0.0, A);
|
skew(p, q, tau, G, V, 0.0, A);
|
||||||
|
|
||||||
for (epoch = 0; epoch < epochs; ++epoch) {
|
for (iter = 0; iter < maxIter; ++iter) {
|
||||||
/* Move V allong A */
|
/* Move V allong A */
|
||||||
cayleyTransform(p, q, A, V, V_tau, workMem);
|
cayleyTransform(p, q, A, V, V_tau, workMem);
|
||||||
|
|
||||||
|
@ -156,12 +155,12 @@ void cve_sub(const int n, const int p, const int q,
|
||||||
|
|
||||||
if (logger) {
|
if (logger) {
|
||||||
callLogger(logger, loggerEnv,
|
callLogger(logger, loggerEnv,
|
||||||
attempt, epoch + 1,
|
attempt, iter + 1,
|
||||||
L, n, V, p, q, tau);
|
L, n, V, p, q, tau);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check Break condition.
|
// Check Break condition.
|
||||||
if (err < tol || epoch + 1 >= epochs) {
|
if (err < tol || iter + 1 >= maxIter) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -180,14 +179,8 @@ void cve_sub(const int n, const int p, const int q,
|
||||||
if (method == CVE_METHOD_WEIGHTED) {
|
if (method == CVE_METHOD_WEIGHTED) {
|
||||||
/* Compute summ of all kernel applied distances by summing the
|
/* Compute summ of all kernel applied distances by summing the
|
||||||
* colSums of the kernel matrix. */
|
* colSums of the kernel matrix. */
|
||||||
c = -(double)n; // to scale with sum(K) - n
|
|
||||||
for (i = 0; i < n; ++i) {
|
|
||||||
c += colSums[i];
|
|
||||||
}
|
|
||||||
c = agility / c;
|
|
||||||
// TODO: check for division by zero, but should not happen!!!
|
// TODO: check for division by zero, but should not happen!!!
|
||||||
} else {
|
c = agility / (sum(colSums, n) - (double)n);
|
||||||
c = agility / n;
|
|
||||||
}
|
}
|
||||||
F77_NAME(dgemm)("N", "N", &p, &q, &p,
|
F77_NAME(dgemm)("N", "N", &p, &q, &p,
|
||||||
&c, workMem, &p, V, &p,
|
&c, workMem, &p, V, &p,
|
||||||
|
|
|
@ -31,7 +31,7 @@ void cve_sub(const int n, const int p, const int q,
|
||||||
const double momentum,
|
const double momentum,
|
||||||
const double tau_init, const double tol_init,
|
const double tau_init, const double tol_init,
|
||||||
const double slack, const double gamma,
|
const double slack, const double gamma,
|
||||||
const int epochs, int attempts,
|
const int maxIter, int attempts,
|
||||||
double *V, double *L,
|
double *V, double *L,
|
||||||
SEXP logger, SEXP loggerEnv);
|
SEXP logger, SEXP loggerEnv);
|
||||||
|
|
||||||
|
@ -61,6 +61,8 @@ void rStiefel(const int p, const int q, double *V,
|
||||||
double *workMem, int workLen);
|
double *workMem, int workLen);
|
||||||
|
|
||||||
/* MATRIX */
|
/* MATRIX */
|
||||||
|
double sum(const double *A, const int nelem);
|
||||||
|
|
||||||
double norm(const double *A, const int nrow, const int ncol,
|
double norm(const double *A, const int nrow, const int ncol,
|
||||||
const char *type);
|
const char *type);
|
||||||
|
|
||||||
|
|
|
@ -20,7 +20,7 @@ SEXP cve(SEXP X, SEXP Y, SEXP k, SEXP h,
|
||||||
SEXP V, // initial
|
SEXP V, // initial
|
||||||
SEXP momentum, SEXP tau, SEXP tol,
|
SEXP momentum, SEXP tau, SEXP tol,
|
||||||
SEXP slack, SEXP gamma,
|
SEXP slack, SEXP gamma,
|
||||||
SEXP epochs, SEXP attempts,
|
SEXP maxIter, SEXP attempts,
|
||||||
SEXP logger, SEXP loggerEnv) {
|
SEXP logger, SEXP loggerEnv) {
|
||||||
/* Handle logger parameter, set to NULL pointer if not a function. */
|
/* Handle logger parameter, set to NULL pointer if not a function. */
|
||||||
if (!(isFunction(logger) && isEnvironment(loggerEnv))) {
|
if (!(isFunction(logger) && isEnvironment(loggerEnv))) {
|
||||||
|
@ -53,7 +53,7 @@ SEXP cve(SEXP X, SEXP Y, SEXP k, SEXP h,
|
||||||
asInteger(method),
|
asInteger(method),
|
||||||
asReal(momentum), asReal(tau), asReal(tol),
|
asReal(momentum), asReal(tau), asReal(tol),
|
||||||
asReal(slack), asReal(gamma),
|
asReal(slack), asReal(gamma),
|
||||||
asInteger(epochs), asInteger(attempts),
|
asInteger(maxIter), asInteger(attempts),
|
||||||
REAL(Vout), REAL(Lout),
|
REAL(Vout), REAL(Lout),
|
||||||
logger, loggerEnv);
|
logger, loggerEnv);
|
||||||
|
|
||||||
|
|
|
@ -9,7 +9,7 @@ extern SEXP cve(SEXP X, SEXP Y, SEXP k, SEXP h,
|
||||||
SEXP V, // initial
|
SEXP V, // initial
|
||||||
SEXP momentum, SEXP tau, SEXP tol,
|
SEXP momentum, SEXP tau, SEXP tol,
|
||||||
SEXP slack, SEXP gamma,
|
SEXP slack, SEXP gamma,
|
||||||
SEXP epochs, SEXP attempts,
|
SEXP maxIter, SEXP attempts,
|
||||||
SEXP logger, SEXP loggerEnv);
|
SEXP logger, SEXP loggerEnv);
|
||||||
|
|
||||||
static const R_CallMethodDef CallEntries[] = {
|
static const R_CallMethodDef CallEntries[] = {
|
||||||
|
|
|
@ -10,6 +10,23 @@
|
||||||
// return newMat;
|
// return newMat;
|
||||||
// }
|
// }
|
||||||
|
|
||||||
|
double sum(const double *A, const int nelem) {
|
||||||
|
int i, nelemb = (nelem / 4) * 4;
|
||||||
|
double sum = 0.0;
|
||||||
|
|
||||||
|
for (i = 0; i < nelemb; i += 4) {
|
||||||
|
sum += A[i]
|
||||||
|
+ A[i + 1]
|
||||||
|
+ A[i + 2]
|
||||||
|
+ A[i + 3];
|
||||||
|
}
|
||||||
|
for (; i < nelem; ++i) {
|
||||||
|
sum += A[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
double norm(const double *A, const int nrow, const int ncol,
|
double norm(const double *A, const int nrow, const int ncol,
|
||||||
const char *type) {
|
const char *type) {
|
||||||
int i, nelem = nrow * ncol;
|
int i, nelem = nrow * ncol;
|
||||||
|
|
100
README.md
100
README.md
|
@ -10,6 +10,7 @@ Doc:
|
||||||
- [x] Ref paper in doc
|
- [x] Ref paper in doc
|
||||||
- [ ] Data set descriptions and augmentations.
|
- [ ] Data set descriptions and augmentations.
|
||||||
- [x] Demonstration of the `Logger` function usage (Demo file or so, ...)
|
- [x] Demonstration of the `Logger` function usage (Demo file or so, ...)
|
||||||
|
- [ ] Update Paper (to new version / version consistent with current code!)
|
||||||
|
|
||||||
Methods to be implemented:
|
Methods to be implemented:
|
||||||
- [x] simple
|
- [x] simple
|
||||||
|
@ -28,21 +29,56 @@ Features (functions):
|
||||||
- [x] Initial `V.init` parameter (only ONE try, ignore number of `attempts` parameter)
|
- [x] Initial `V.init` parameter (only ONE try, ignore number of `attempts` parameter)
|
||||||
- [x] `basis.cve` list of estimated `B`s (with `k` supplied, only `B`)
|
- [x] `basis.cve` list of estimated `B`s (with `k` supplied, only `B`)
|
||||||
- [x] `directions.cve` Projected `X` given `k`
|
- [x] `directions.cve` Projected `X` given `k`
|
||||||
- [ ] `predict.cve` using `mars` for predicting responses given new data.
|
- [x] `predict.cve` using `mars` for predicting responses given new data.
|
||||||
- [ ] `predict.dim.cve` Cross-validation or `aov` (in stats package) or "elbow" estimation
|
- [x] `predict.dim.cve` Cross-validation or `aov` (in stats package) or "elbow" estimation
|
||||||
- [x] `plot.elbow`
|
- [x] `plot.elbow`
|
||||||
- [x] `summary`
|
- [x] `summary`
|
||||||
|
|
||||||
Changes:
|
Changes:
|
||||||
- [-] New `estimate.bandwidth` implementation.
|
- [x] New `estimate.bandwidth` implementation.
|
||||||
(h = 2 * (tr(\Sigma) / p) * (6/5 * n^(-1 / (4 + k)))^2,
|
(h = 2 * (tr(\Sigma) / p) * (6/5 * n^(-1 / (4 + k)))^2,
|
||||||
\Sigma = 1/n * (X-mean)'(X-mean))
|
\Sigma = 1/n * (X-mean)'(X-mean))
|
||||||
|
|
||||||
|
# Development
|
||||||
|
## Build and install.
|
||||||
|
To build the package the `devtools` package is used. This also provides `roxygen2` which is used for documentation and automatic creation of the `NAMESPACE` file.
|
||||||
|
```R
|
||||||
|
setwd("./CVE_R") # Set path to the package root.
|
||||||
|
library(devtools) # Load required `devtools` package.
|
||||||
|
document() # Create `.Rd` files and write `NAMESPACE`.
|
||||||
|
```
|
||||||
|
Next the package needs to be build, therefore (if pure `R` package, aka. `C/C++`, `Fortran`, ... code) just do the following.
|
||||||
|
```bash
|
||||||
|
R CMD build CVE_C; R CMD INSTALL CVE_0.2.tar.gz
|
||||||
|
```
|
||||||
|
Then we are ready for using the package.
|
||||||
|
As well as building the `NAMESPACE` and `*.Rd` files using `devtools` (`roxygen2`) the following resembles an entire build pipeline including checks.
|
||||||
|
```bash
|
||||||
|
R -q -e 'library(devtools); setwd("CVE_C"); pkgbuild::compile_dll(); document(); pkgbuild::clean_dll()'
|
||||||
|
R CMD build CVE_C; R CMD check CVE_0.2.tar.gz;
|
||||||
|
R CMD INSTALL CVE_0.2.tar.gz
|
||||||
|
```
|
||||||
|
## Build and install from within `R`.
|
||||||
|
An alternative approach is the following.
|
||||||
|
```R
|
||||||
|
## Installing CVE (C implementation)
|
||||||
|
(setwd('~/Projects/CVE/CVE_C'))
|
||||||
|
# equiv to Rcpp::compileAttributes().
|
||||||
|
library(devtools)
|
||||||
|
pkgbuild::compile_dll() # required for packages with C/C++ code
|
||||||
|
document() # See bug: https://github.com/stan-dev/rstantools/issues/52
|
||||||
|
pkgbuild::clean_dll()
|
||||||
|
(path <- build(vignettes = FALSE))
|
||||||
|
install.packages(path, repos = NULL, type = "source")
|
||||||
|
library(CVE)
|
||||||
|
```
|
||||||
|
**Note: I only recommend this approach during development.**
|
||||||
|
|
||||||
# Package Structure
|
# Package Structure
|
||||||
|
|
||||||
## Demos
|
## Demos
|
||||||
A demo is an `.R` file that lives in `demo/`. Demos are like examples but tend to
|
A demo is an `.R` file that lives in `demo/`. Demos are like examples but tend to
|
||||||
be longer. Instead of focussing on a single function, they show how to weave
|
be longer. Instead of focusing on a single function, they show how to weave
|
||||||
together multiple functions to solve a problem.
|
together multiple functions to solve a problem.
|
||||||
|
|
||||||
You list and access demos with `demo()`:
|
You list and access demos with `demo()`:
|
||||||
|
@ -57,7 +93,7 @@ The demo name is the name of the file without the extension,
|
||||||
e.g. `demo/runtime_test.R` becomes `runtime_test`.
|
e.g. `demo/runtime_test.R` becomes `runtime_test`.
|
||||||
|
|
||||||
By default the demo ask for human input for each plot: "Hit to see next plot".
|
By default the demo ask for human input for each plot: "Hit to see next plot".
|
||||||
This behaviour can be overridden by adding `devAskNewPage(ask = FALSE)` to
|
This behavior can be overridden by adding `devAskNewPage(ask = FALSE)` to
|
||||||
the demo file. You can add pauses by adding:
|
the demo file. You can add pauses by adding:
|
||||||
`readline("press any key to continue")`.
|
`readline("press any key to continue")`.
|
||||||
|
|
||||||
|
@ -71,9 +107,9 @@ Using the Linux `grep` program with the parameters `-rnw` and specifying a inclu
|
||||||
```bash
|
```bash
|
||||||
grep --include=*\.{c,h,R} -rnw '.' -e "sweep"
|
grep --include=*\.{c,h,R} -rnw '.' -e "sweep"
|
||||||
```
|
```
|
||||||
searches in all `C` source and header fils as well as `R` source files for the term _sweep_.
|
searches in all `C` source and header files as well as `R` source files for the term _sweep_.
|
||||||
|
|
||||||
## Recursive dir. compair with colored sructure (more or less).
|
## Recursive directory compare with colored structure (more or less).
|
||||||
```bash
|
```bash
|
||||||
diff -r CVE_R/ CVE_C/ | grep -E "^([<>]|[^<>].*)"
|
diff -r CVE_R/ CVE_C/ | grep -E "^([<>]|[^<>].*)"
|
||||||
```
|
```
|
||||||
|
@ -102,41 +138,9 @@ echo dry_run=$dry_run
|
||||||
echo stack_size=$stack_size
|
echo stack_size=$stack_size
|
||||||
```
|
```
|
||||||
|
|
||||||
# Development
|
# Analysis
|
||||||
## Build and install.
|
|
||||||
To build the package the `devtools` package is used. This also provides `roxygen2` which is used for documentation and authomatic creaton of the `NAMESPACE` file.
|
|
||||||
```R
|
|
||||||
setwd("./CVE_R") # Set path to the package root.
|
|
||||||
library(devtools) # Load required `devtools` package.
|
|
||||||
document() # Create `.Rd` files and write `NAMESPACE`.
|
|
||||||
```
|
|
||||||
Next the package needs to be build, therefore (if pure `R` package, aka. `C/C++`, `Fortran`, ... code) just do the following.
|
|
||||||
```bash
|
|
||||||
R CMD build CVE_R
|
|
||||||
R CMD INSTALL CVE_0.1.tar.gz
|
|
||||||
```
|
|
||||||
Then we are ready for using the package.
|
|
||||||
```R
|
|
||||||
library(CVE)
|
|
||||||
help(package = "CVE")
|
|
||||||
```
|
|
||||||
## Build and install from within `R`.
|
|
||||||
An alternative approach is the following.
|
|
||||||
```R
|
|
||||||
setwd('./CVE_R')
|
|
||||||
getwd()
|
|
||||||
|
|
||||||
library(devtools)
|
|
||||||
document()
|
|
||||||
# No vignettes to build but "inst/doc/" is required!
|
|
||||||
(path <- build(vignettes = FALSE))
|
|
||||||
install.packages(path, repos = NULL, type = "source")
|
|
||||||
```
|
|
||||||
**Note: I only recommend this approach during development.**
|
|
||||||
|
|
||||||
# Analysing
|
|
||||||
## Logging (a `cve` run).
|
## Logging (a `cve` run).
|
||||||
To log `loss`, `error` (estimated) the true error (error of current estimated `B` against the true `B`) or even the stepsize one can use the `logger` parameter. A `logger` is a function that gets the current `environment` of the CVE optimization methods (__do not alter this environment, only read from it__). This can be used to create logs like in the following example.
|
To log `loss`, `error` (estimated) the true error (error of current estimated `B` against the true `B`) or even the step size one can use the `logger` parameter. A `logger` is a function that gets the current `environment` of the CVE optimization methods (__do not alter this environment, only read from it__). This can be used to create logs like in the following example.
|
||||||
```R
|
```R
|
||||||
library(CVE)
|
library(CVE)
|
||||||
|
|
||||||
|
@ -184,7 +188,7 @@ matplot(true.error.history, type = 'l', log = 'y', xlab = 'iter',
|
||||||
```
|
```
|
||||||
|
|
||||||
## Reading log files.
|
## Reading log files.
|
||||||
The runtime tests (upcomming further tests) are creating log files saved in `tmp/`. These log files are `CSV` files (actualy `TSV`) with a header storing the test results. Depending on the test the files may contain differnt data. As an example we use the runtime test logs which store in each line the `dataset`, the used `method` as well as the `error` (actual error of estimated `B` against real `B`) and the `time`. For reading and analysing the data see the following example.
|
The run-time tests (upcoming further tests) are creating log files saved in `tmp/`. These log files are `CSV` files (actually `TSV`) with a header storing the test results. Depending on the test the files may contain different data. As an example we use the run-time test logs which store in each line the `dataset`, the used `method` as well as the `error` (actual error of estimated `B` against real `B`) and the `time`. For reading and analyzing the data see the following example.
|
||||||
```R
|
```R
|
||||||
# Load log as `data.frame`
|
# Load log as `data.frame`
|
||||||
log <- read.csv('tmp/test0.log', sep = '\t')
|
log <- read.csv('tmp/test0.log', sep = '\t')
|
||||||
|
@ -200,7 +204,7 @@ for (ds.name in paste0('M', seq(5))) {
|
||||||
|
|
||||||
## Environments and variable lookup.
|
## Environments and variable lookup.
|
||||||
In the following a view simple examples of how `R` searches for variables.
|
In the following a view simple examples of how `R` searches for variables.
|
||||||
In addition we manipulate funciton closures to alter the search path in variable lookup and outer scope variable manipulation.
|
In addition we manipulate function closures to alter the search path in variable lookup and outer scope variable manipulation.
|
||||||
```R
|
```R
|
||||||
droids <- "These aren't the droids you're looking for."
|
droids <- "These aren't the droids you're looking for."
|
||||||
|
|
||||||
|
@ -225,7 +229,7 @@ jedi.seeks()
|
||||||
# [1] "R2-D2", "C-3PO"
|
# [1] "R2-D2", "C-3PO"
|
||||||
```
|
```
|
||||||
|
|
||||||
The next example ilustrates how to write (without local copies) to variables outside the functions local environment.
|
The next example illustrates how to write (without local copies) to variables outside the functions local environment.
|
||||||
```R
|
```R
|
||||||
counting <- function() {
|
counting <- function() {
|
||||||
count <<- count + 1 # Note the `<<-` assignment.
|
count <<- count + 1 # Note the `<<-` assignment.
|
||||||
|
@ -255,7 +259,7 @@ counting <- function() {
|
||||||
})()
|
})()
|
||||||
```
|
```
|
||||||
|
|
||||||
Another example for the usage of `do.call` where the evaluation of parameters is illustated (example taken (and altered) from `?do.call`).
|
Another example for the usage of `do.call` where the evaluation of parameters is illustrated (example taken (and altered) from `?do.call`).
|
||||||
```R
|
```R
|
||||||
## examples of where objects will be found.
|
## examples of where objects will be found.
|
||||||
A <- "A.Global"
|
A <- "A.Global"
|
||||||
|
@ -373,7 +377,7 @@ microbenchmark(
|
||||||
# sweep 1313.177 1522.4010 2355.269 1879.2605 2065.399 18783.24 100
|
# sweep 1313.177 1522.4010 2355.269 1879.2605 2065.399 18783.24 100
|
||||||
# recycle 719.001 786.1265 1157.285 881.8825 1163.202 19091.79 100
|
# recycle 719.001 786.1265 1157.285 881.8825 1163.202 19091.79 100
|
||||||
```
|
```
|
||||||
### Scaled `crossprod` with matmul order.
|
### Scaled `crossprod` with matrix multiplication order.
|
||||||
```R
|
```R
|
||||||
(n <- 200)
|
(n <- 200)
|
||||||
(p <- 12)
|
(p <- 12)
|
||||||
|
@ -424,7 +428,7 @@ microbenchmark(
|
||||||
```
|
```
|
||||||
|
|
||||||
## Using `Rprof()` for performance.
|
## Using `Rprof()` for performance.
|
||||||
The standart method for profiling where an algorithm is spending its time is with `Rprof()`.
|
The standard method for profiling where an algorithm is spending its time is with `Rprof()`.
|
||||||
```R
|
```R
|
||||||
path <- '../tmp/R.prof' # path to profiling file
|
path <- '../tmp/R.prof' # path to profiling file
|
||||||
Rprof(path)
|
Rprof(path)
|
||||||
|
@ -432,4 +436,4 @@ cve.res <- cve.call(X, Y, k = k)
|
||||||
Rprof(NULL)
|
Rprof(NULL)
|
||||||
(prof <- summaryRprof(path)) # Summarise results
|
(prof <- summaryRprof(path)) # Summarise results
|
||||||
```
|
```
|
||||||
**Note: considure to run `gc()` before measuring**, aka cleaning up by explicitely calling the garbage collector.
|
**Note: consider to run `gc()` before measuring**, aka cleaning up by explicitly calling the garbage collector.
|
||||||
|
|
14
test.R
14
test.R
|
@ -5,12 +5,12 @@ if (length(args) > 0L) {
|
||||||
} else {
|
} else {
|
||||||
method <- "simple"
|
method <- "simple"
|
||||||
}
|
}
|
||||||
if (length((args) > 1L)) {
|
if (length(args) > 1L) {
|
||||||
momentum <- as.double(args[2])
|
momentum <- as.double(args[2])
|
||||||
} else {
|
} else {
|
||||||
momentum <- 0.0
|
momentum <- 0.0
|
||||||
}
|
}
|
||||||
epochs <- 50L
|
max.iter <- 50L
|
||||||
attempts <- 25L
|
attempts <- 25L
|
||||||
|
|
||||||
# library(CVEpureR)
|
# library(CVEpureR)
|
||||||
|
@ -55,14 +55,14 @@ for (name in paste0("M", seq(5))) {
|
||||||
|
|
||||||
# Setup histories.
|
# Setup histories.
|
||||||
V_last <- NULL
|
V_last <- NULL
|
||||||
loss.history <- matrix(NA, epochs + 1, attempts)
|
loss.history <- matrix(NA, max.iter + 1, attempts)
|
||||||
error.history <- matrix(NA, epochs + 1, attempts)
|
error.history <- matrix(NA, max.iter + 1, attempts)
|
||||||
tau.history <- matrix(NA, epochs + 1, attempts)
|
tau.history <- matrix(NA, max.iter + 1, attempts)
|
||||||
true.error.history <- matrix(NA, epochs + 1, attempts)
|
true.error.history <- matrix(NA, max.iter + 1, attempts)
|
||||||
|
|
||||||
dr <- cve(Y ~ X, k = k, method = method,
|
dr <- cve(Y ~ X, k = k, method = method,
|
||||||
momentum = momentum,
|
momentum = momentum,
|
||||||
epochs = epochs, attempts = attempts,
|
max.iter = max.iter, attempts = attempts,
|
||||||
logger = logger)
|
logger = logger)
|
||||||
|
|
||||||
# Plot history's
|
# Plot history's
|
||||||
|
|
Loading…
Reference in New Issue