From 4b68c245a686a94a452aeefeab183d9d0668a190 Mon Sep 17 00:00:00 2001 From: daniel Date: Thu, 5 Dec 2019 17:35:29 +0100 Subject: [PATCH] rewrote (C): to use new Gradient formula, rewrote (C): subroutine interface to use matrix struct --- CVE_C/NAMESPACE | 3 +- CVE_C/R/CVE.R | 109 +++- CVE_C/R/coef.R | 33 +- CVE_C/R/directions.R | 18 + CVE_C/R/estimateBandwidth.R | 19 + CVE_C/R/plot.R | 25 + CVE_C/R/predict.R | 26 + CVE_C/R/predict_dim.R | 27 +- CVE_C/R/summary.R | 19 + CVE_C/man/coef.cve.Rd | 33 +- CVE_C/man/cve.Rd | 63 +- CVE_C/man/cve.call.Rd | 34 +- CVE_C/man/directions.cve.Rd | 19 + CVE_C/man/estimate.bandwidth.Rd | 20 + CVE_C/man/plot.cve.Rd | 27 + CVE_C/man/predict.cve.Rd | 27 + CVE_C/man/predict.dim.Rd | 24 - CVE_C/man/summary.cve.Rd | 19 + CVE_C/src/callLogger.c | 22 +- CVE_C/src/cve.c | 265 ++++---- CVE_C/src/cve.h | 167 +++-- CVE_C/src/cve_subroutines.c | 320 +++++++--- CVE_C/src/export.c | 71 ++- CVE_C/src/init.c | 16 +- CVE_C/src/matrix.c | 1053 +++++++++++++++++++++++++++---- CVE_C/src/rStiefel.c | 89 +-- CVE_C/src/rowColOp.c | 160 ----- CVE_C/src/sweep.c | 51 -- README.md | 4 +- runtime_test.R | 6 +- test.R | 4 +- 31 files changed, 1915 insertions(+), 858 deletions(-) delete mode 100644 CVE_C/man/predict.dim.Rd delete mode 100644 CVE_C/src/rowColOp.c delete mode 100644 CVE_C/src/sweep.c diff --git a/CVE_C/NAMESPACE b/CVE_C/NAMESPACE index d2c0f0f..0f9e2ff 100644 --- a/CVE_C/NAMESPACE +++ b/CVE_C/NAMESPACE @@ -4,7 +4,6 @@ S3method(coef,cve) S3method(directions,cve) S3method(plot,cve) S3method(predict,cve) -S3method(predict.dim,cve) S3method(summary,cve) export(cve) export(cve.call) @@ -13,7 +12,7 @@ export(directions) export(elem.pairs) export(estimate.bandwidth) export(null) -export(predict.dim) +export(predict_dim) export(projTangentStiefel) export(rStiefel) export(retractStiefel) diff --git a/CVE_C/R/CVE.R b/CVE_C/R/CVE.R index 0a7a35d..8246733 100644 --- a/CVE_C/R/CVE.R +++ b/CVE_C/R/CVE.R @@ -45,6 +45,7 @@ #' below. #' \item "weighted" variation with addaptive weighting of slices. #' } +#' @param max.dim upper bounds for \code{k}, (ignored if \code{k} is supplied). #' @param ... Parameters passed on to \code{cve.call}. #' #' @return an S3 object of class \code{cve} with components: @@ -58,19 +59,54 @@ #' } #' #' @examples -#' # create dataset -#' x <- matrix(rnorm(400), 100, 4) -#' y <- x[, 1] + x[, 2] + as.matrix(rnorm(100)) -#' -#' # Call CVE. -#' dr <- cve(y ~ x) -#' # Call weighted CVE. -#' dr.weighted <- cve(y ~ x, method = "weighted") -#' -#' # Training data responces of reduced data. -#' y.est <- directions(dr, 1) -#' # Extract SDR subspace basis of dimension 1. -#' B <- coef(dr.momentum, 1) +#' # set dimensions for simulation model +#' p <- 8 +#' k <- 2 +#' # create B for simulation +#' b1 <- rep(1 / sqrt(p), p) +#' b2 <- (-1)^seq(1, p) / sqrt(p) +#' B <- cbind(b1, b2) +#' # samplsize +#' n <- 200 +#' set.seed(21) +#' # creat predictor data x ~ N(0, I_p) +#' x <- matrix(rnorm(n * p), n, p) +#' # simulate response variable +#' # y = f(B'x) + err +#' # with f(x1, x2) = x1^2 + 2 * x2 and err ~ N(0, 0.25^2) +#' y <- (x %*% b1)^2 + 2 * (x %*% b2) + 0.25 * rnorm(100) +#' # calculate cve with method 'simple' for k unknown in 1, ..., 4 +#' cve.obj.s <- cve(y ~ x, max.dim = 4) # default method 'simple' +#' # calculate cve with method 'weighed' for k = 2 +#' cve.obj.w <- cve(y ~ x, k = 2, method = 'weighted') +#' # estimate dimension from cve.obj.s +#' khat <- predict_dim(cve.obj.s)$k +#' # get cve-estimate for B with dimensions (p, k = khat) +#' B2 <- coef(cve.obj.s, k = khat) +#' # get projected X data (same as cve.obj.s$X %*% B2) +#' proj.X <- directions(cve.obj.s, k = khat) +#' # plot y against projected data +#' plot(proj.X[, 1], y) +#' plot(proj.X[, 2], y) +#' # creat 10 new x points and y according to model +#' x.new <- matrix(rnorm(10 * p), 10, p) +#' y.new <- (x.new %*% b1)^2 + 2 * (x.new %*% b2) + 0.25 * rnorm(10) +#' # predict y.new +#' yhat <- predict(cve.obj.s, x.new, khat) +#' plot(y.new, yhat) +#' # projection matrix on span(B) +#' # same as B %*% t(B) since B is semi-orthogonal +#' PB <- B %*% solve(t(B) %*% B) %*% t(B) +#' # cve estimates for B with simple and weighted method +#' B.s <- coef(cve.obj.s, k = 2) +#' B.w <- coef(cve.obj.w, k = 2) +#' # same as B.s %*% t(B.s) since B.s is semi-orthogonal (same vor B.w) +#' PB.s <- B.s %*% solve(t(B.s) %*% B.s) %*% t(B.s) +#' PB.w <- B.w %*% solve(t(B.w) %*% B.w) %*% t(B.w) +#' # compare estimation accuracy of simple and weighted cve estimate by +#' # Frobenius norm of difference of projections. +#' norm(PB - PB.s, type = 'F') +#' norm(PB - PB.w, type = 'F') #' #' @seealso For a detailed description of \code{formula} see #' \code{\link{formula}}. @@ -107,6 +143,12 @@ cve <- function(formula, data, method = "simple", max.dim = 10L, ...) { #' \code{\link{estimate.bandwidth}} (ignored if \code{h} is supplied). #' @param X data matrix with samples in its rows. #' @param Y Responses (1 dimensional). +#' @param method specifies the CVE method variation as one of +#' \itemize{ +#' \item "simple" exact implementation as described in the paper listed +#' below. +#' \item "weighted" variation with addaptive weighting of slices. +#' } #' @param k Dimension of lower dimensional projection, if \code{k} is given #' only the specified dimension \code{B} matrix is estimated. #' @param min.dim lower bounds for \code{k}, (ignored if \code{k} is supplied). @@ -124,28 +166,35 @@ cve <- function(formula, data, method = "simple", max.dim = 10L, ...) { #' @param slack Positive scaling to allow small increases of the loss while #' optimizing. #' @param gamma step-size reduction multiple. -#' @param V.init Semi-orthogonal matrix of dimensions `(ncol(X), ncol(X) - k)` #' as optimization starting value. (If supplied, \code{attempts} is +#' @param V.init Semi-orthogonal matrix of dimensions `(ncol(X), ncol(X) - k) +#' as optimization starting value. (If supplied, \code{attempts} is #' set to 1 and \code{k} to match dimension) #' #' @inherit cve return #' #' @examples -#' # Create a dataset (n samples): -#' n <- 100 -#' X <- matrix(rnorm(4 * n), n) -#' Y <- matrix(X[, 1] + cos(X[, 2]) + rnorm(n, 0, .1), n) +#' # create B for simulation (k = 1) +#' B <- rep(1, 5) / sqrt(5) #' -#' # Create logger function: -#' logger <- function(attempt, iter, data) { -#' if (iter == 0) { -#' cat("Starting attempt nr:", attempt, "\n") -#' } -#' cat(" iter:", iter, "loss:", data$loss, "\n") -#' } -#' -#' Call 'cve' with logger: -#' cve(Y ~ X, logger = logger) +#' set.seed(21) +#' # creat predictor data X ~ N(0, I_p) +#' X <- matrix(rnorm(500), 100, 5) +#' # simulate response variable +#' # Y = f(B'X) + err +#' # with f(x1) = x1 and err ~ N(0, 0.25^2) +#' Y <- X %*% B + 0.25 * rnorm(100) +#' +#' # calculate cve with method 'simple' for k = 1 +#' set.seed(21) +#' cve.obj.simple1 <- cve(Y ~ X, k = 1) +#' +#' # same as +#' set.seed(21) +#' cve.obj.simple2 <- cve.call(X, Y, k = 1) #' +#' # extract estimated B's. +#' coef(cve.obj.simple1, k = 1) +#' coef(cve.obj.simple2, k = 1) #' @export cve.call <- function(X, Y, method = "simple", nObs = sqrt(nrow(X)), h = NULL, @@ -178,7 +227,7 @@ cve.call <- function(X, Y, method = "simple", } if (!(is.matrix(X) && is.numeric(X))) { - stop("Parameter 'X' should be a numeric matrices.") + stop("Parameter 'X' should be a numeric matrix.") } if (!is.numeric(Y)) { stop("Parameter 'Y' must be numeric.") @@ -288,7 +337,7 @@ cve.call <- function(X, Y, method = "simple", h <- estimate.bandwidth(X, k, nObs) } - dr.k <- .Call('cve', PACKAGE = 'CVE', + dr.k <- .Call('cve_export', PACKAGE = 'CVE', X, Y, k, h, method_nr, V.init, diff --git a/CVE_C/R/coef.R b/CVE_C/R/coef.R index a5b2ad6..6d71308 100644 --- a/CVE_C/R/coef.R +++ b/CVE_C/R/coef.R @@ -9,10 +9,35 @@ #' @return dir the matrix of CS or CMS of given dimension #' #' @examples -#' x <- matrix(rnorm(400),100,4) -#' y <- x[, 1] + x[, 2] + as.matrix(rnorm(100)) -#' dr <- cve(y ~ x, k = 2) # Only for sub-space dim. 2 -#' B2 <- coef(dr, 2) +#' # set dimensions for simulation model +#' p <- 8 # sample dimension +#' k <- 2 # real dimension of SDR subspace +#' n <- 200 # samplesize +#' # create B for simulation +#' b1 <- rep(1 / sqrt(p), p) +#' b2 <- (-1)^seq(1, p) / sqrt(p) +#' B <- cbind(b1, b2) +#' +#' set.seed(21) +#' # creat predictor data x ~ N(0, I_p) +#' x <- matrix(rnorm(n * p), n, p) +#' # simulate response variable +#' # y = f(B'x) + err +#' # with f(x1, x2) = x1^2 + 2 * x2 and err ~ N(0, 0.25^2) +#' y <- (x %*% b1)^2 + 2 * (x %*% b2) + 0.25 * rnorm(100) +#' # calculate cve for k = 1, ..., 5 +#' cve.obj <- cve(y ~ x, max.dim = 5) +#' # get cve-estimate for B with dimensions (p, k = 2) +#' B2 <- coef(cve.obj, k = 2) +#' +#' # Projection matrix on span(B) +#' # equivalent to `B %*% t(B)` since B is semi-orthonormal +#' PB <- B %*% solve(t(B) %*% B) %*% t(B) +#' # Projection matrix on span(B2) +#' # equivalent to `B2 %*% t(B2)` since B2 is semi-orthonormal +#' PB2 <- B2 %*% solve(t(B2) %*% B2) %*% t(B2) +#' # compare estimation accuracy by Frobenius norm of difference of projections +#' norm(PB - PB2, type = 'F') #' #' @method coef cve #' @aliases coef.cve diff --git a/CVE_C/R/directions.R b/CVE_C/R/directions.R index 1a9d76d..4546539 100644 --- a/CVE_C/R/directions.R +++ b/CVE_C/R/directions.R @@ -8,6 +8,24 @@ directions <- function(dr, k) { #' @param dr Instance of 'cve' as returned by \code{cve}. #' @param k SDR dimension to use for projection. #' +#' @examples +#' # create B for simulation (k = 1) +#' B <- rep(1, 5) / sqrt(5) +#' set.seed(21) +#' # creat predictor data x ~ N(0, I_p) +#' x <- matrix(rnorm(500), 100, 5) +#' # simulate response variable +#' # y = f(B'x) + err +#' # with f(x1) = x1 and err ~ N(0, 0.25^2) +#' y <- x %*% B + 0.25 * rnorm(100) +#' # calculate cve with method 'simple' for k = 1 +#' set.seed(21) +#' cve.obj.simple <- cve(y ~ x, k = 1, method = 'simple') +#' # get projected data for k = 1 +#' x.proj <- directions(cve.obj.simple, k = 1) +#' # plot y against projected data +#' plot(x.proj, y) +#' #' @method directions cve #' @aliases directions directions.cve #' @export diff --git a/CVE_C/R/estimateBandwidth.R b/CVE_C/R/estimateBandwidth.R index d358f40..15543dc 100644 --- a/CVE_C/R/estimateBandwidth.R +++ b/CVE_C/R/estimateBandwidth.R @@ -14,6 +14,25 @@ #' #' @return Estimated bandwidth \code{h}. #' +#' @examples +#' # set dimensions for simulation model +#' p <- 5; k <- 1 +#' # create B for simulation +#' B <- rep(1, p) / sqrt(p) +#' # samplsize +#' n <- 100 +#' set.seed(21) +#' #creat predictor data x ~ N(0, I_p) +#' x <- matrix(rnorm(n * p), n, p) +#' # simulate response variable +#' # y = f(B'x) + err +#' # with f(x1) = x1 and err ~ N(0, 0.25^2) +#' y <- x %*% B + 0.25 * rnorm(100) +#' # calculate cve with method 'simple' for k = 1 +#' set.seed(21) +#' cve.obj.simple <- cve(y ~ x, k = k) +#' print(cve.obj.simple$res$'1'$h) +#' print(estimate.bandwidth(x, k = k)) #' @export estimate.bandwidth <- function(X, k, nObs) { n <- nrow(X) diff --git a/CVE_C/R/plot.R b/CVE_C/R/plot.R index 96bd479..d981fff 100644 --- a/CVE_C/R/plot.R +++ b/CVE_C/R/plot.R @@ -5,6 +5,31 @@ #' @param x Object of class \code{"cve"} (result of [\code{\link{cve}}]). #' @param ... Pass through parameters to [\code{\link{plot}}] and #' [\code{\link{lines}}] +#' @examples +#' # create B for simulation +#' B <- cbind(rep(1, 6), (-1)^seq(6)) / sqrt(6) +#' +#' set.seed(21) +#' # creat predictor data x ~ N(0, I_p) +#' X <- matrix(rnorm(600), 100) +#' +#' # simulate response variable +#' # y = f(B'x) + err +#' # with f(x1, x2) = x1^2 + 2 x2 and err ~ N(0, 0.25^2) +#' Y <- (X %*% B[, 1])^2 + 2 * X %*% B[, 2] + rnorm(100, 0, .1) +#' +#' # Create bandwidth estimation function +#' estimate.bandwidth <- function(X, k, nObs) { +#' n <- nrow(X) +#' p <- ncol(X) +#' X_c <- scale(X, center = TRUE, scale = FALSE) +#' 2 * qchisq((nObs - 1) / (n - 1), k) * sum(X_c^2) / (n * p) +#' } +#' # calculate cve with method 'simple' for k = min.dim,...,max.dim +#' cve.obj.simple <- cve(Y ~ X, h = estimate.bandwidth, nObs = sqrt(nrow(X))) +#' +#' # elbow plot +#' plot(cve.obj.simple) #' #' @seealso see \code{\link{par}} for graphical parameters to pass through #' as well as \code{\link{plot}}, the standard plot utility. diff --git a/CVE_C/R/predict.R b/CVE_C/R/predict.R index 074db6e..f882d27 100644 --- a/CVE_C/R/predict.R +++ b/CVE_C/R/predict.R @@ -10,6 +10,32 @@ #' #' @return prediced response of data \code{newdata}. #' +#' @examples +#' # create B for simulation +#' B <- rep(1, 5) / sqrt(5) +#' +#' set.seed(21) +#' # creat predictor data x ~ N(0, I_p) +#' x <- matrix(rnorm(500), 100) +#' +#' # simulate response variable +#' # y = f(B'x) + err +#' # with f(x1) = x1 and err ~ N(0, 0.25^2) +#' y <- x %*% B + 0.25 * rnorm(100) +#' +#' x.train <- x[1:80, ] +#' x.test <- x[81:100, ] +#' y.train <- y[1:80, ] +#' y.test <- y[81:100, ] +#' +#' # calculate cve with method 'simple' for k = 1 +#' cve.obj.simple <- cve(y.train ~ x.train, k = 1) +#' +#' # predict y.test from x.test +#' yhat <- predict(cve.obj.simple, x.test, 1) +#' +#' # plot prediction against y.test +#' plot(yhat, y.test) #' @seealso \code{\link{cve}}, \code{\link{cve.call}} or \pkg{\link{mars}}. #' #' @rdname predict.cve diff --git a/CVE_C/R/predict_dim.R b/CVE_C/R/predict_dim.R index 5b1396c..5784f92 100644 --- a/CVE_C/R/predict_dim.R +++ b/CVE_C/R/predict_dim.R @@ -10,15 +10,26 @@ #' \item k: predicted dimensions. #' } #' +#' @examples +#' # create B for simulation +#' B <- rep(1, 5) / sqrt(5) +#' +#' set.seed(21) +#' # creat predictor data x ~ N(0, I_p) +#' x <- matrix(rnorm(500), 100) +#' +#' # simulate response variable +#' # y = f(B'x) + err +#' # with f(x1) = x1 and err ~ N(0, 0.25^2) +#' y <- x %*% B + 0.25 * rnorm(100) +#' +#' # Calculate cve for unknown k between min.dim and max.dim. +#' cve.obj.simple <- cve(y ~ x) +#' +#' predict_dim(cve.obj.simple) +#' #' @export -predict.dim <- function(object, ...) { - UseMethod("predict.dim") -} - -#' @aliases predict.dim -#' @method predict.dim cve -#' @export -predict.dim.cve <- function(object, ...) { +predict_dim <- function(object, ...) { # Get centered training data and dimensions X <- scale(object$X, center = TRUE, scale = FALSE) n <- nrow(object$X) # umber of training data samples diff --git a/CVE_C/R/summary.R b/CVE_C/R/summary.R index 9403f09..a50d6d6 100644 --- a/CVE_C/R/summary.R +++ b/CVE_C/R/summary.R @@ -1,6 +1,25 @@ #' Prints a summary of a \code{cve} result. #' @param object Instance of 'cve' as returned by \code{cve}. #' @param ... ignored. +#' +#' @examples +#' # create B for simulation +#' B <- rep(1, 5) / sqrt(5) +#' +#' set.seed(21) +#' #creat predictor data x ~ N(0, I_p) +#' x <- matrix(rnorm(500), 100) +#' +#' # simulate response variable +#' # y = f(B'x) + err +#' # with f(x1) = x1 and err ~ N(0, 0.25^2) +#' y <- x %*% B + 0.25 * rnorm(100) +#' +#' # calculate cve for unknown k between min.dim and max.dim. +#' cve.obj.simple <- cve(y ~ x) +#' +#' summary(cve.obj.simple) +#' #' @method summary cve #' @export summary.cve <- function(object, ...) { diff --git a/CVE_C/man/coef.cve.Rd b/CVE_C/man/coef.cve.Rd index 54a68c3..6ca80b0 100644 --- a/CVE_C/man/coef.cve.Rd +++ b/CVE_C/man/coef.cve.Rd @@ -21,9 +21,34 @@ dir the matrix of CS or CMS of given dimension Returns the SDR basis matrix for SDR dimension(s). } \examples{ -x <- matrix(rnorm(400),100,4) -y <- x[, 1] + x[, 2] + as.matrix(rnorm(100)) -dr <- cve(y ~ x, k = 2) # Only for sub-space dim. 2 -B2 <- coef(dr, 2) +# set dimensions for simulation model +p <- 8 # sample dimension +k <- 2 # real dimension of SDR subspace +n <- 200 # samplesize +# create B for simulation +b1 <- rep(1 / sqrt(p), p) +b2 <- (-1)^seq(1, p) / sqrt(p) +B <- cbind(b1, b2) + +set.seed(21) +# creat predictor data x ~ N(0, I_p) +x <- matrix(rnorm(n * p), n, p) +# simulate response variable +# y = f(B'x) + err +# with f(x1, x2) = x1^2 + 2 * x2 and err ~ N(0, 0.25^2) +y <- (x \%*\% b1)^2 + 2 * (x \%*\% b2) + 0.25 * rnorm(100) +# calculate cve for k = 1, ..., 5 +cve.obj <- cve(y ~ x, max.dim = 5) +# get cve-estimate for B with dimensions (p, k = 2) +B2 <- coef(cve.obj, k = 2) + +# Projection matrix on span(B) +# equivalent to `B \%*\% t(B)` since B is semi-orthonormal +PB <- B \%*\% solve(t(B) \%*\% B) \%*\% t(B) +# Projection matrix on span(B2) +# equivalent to `B2 \%*\% t(B2)` since B2 is semi-orthonormal +PB2 <- B2 \%*\% solve(t(B2) \%*\% B2) \%*\% t(B2) +# compare estimation accuracy by Frobenius norm of difference of projections +norm(PB - PB2, type = 'F') } diff --git a/CVE_C/man/cve.Rd b/CVE_C/man/cve.Rd index 0ba8a05..4e9a159 100644 --- a/CVE_C/man/cve.Rd +++ b/CVE_C/man/cve.Rd @@ -20,6 +20,8 @@ supplied.} \item "weighted" variation with addaptive weighting of slices. }} +\item{max.dim}{upper bounds for \code{k}, (ignored if \code{k} is supplied).} + \item{...}{Parameters passed on to \code{cve.call}.} } \value{ @@ -58,19 +60,54 @@ a real \eqn{p \times k}{p x k} of rank \eqn{k <= p}{k \leq p}. Without loss of generality \eqn{B} is assumed to be orthonormal. } \examples{ -# create dataset -x <- matrix(rnorm(400), 100, 4) -y <- x[, 1] + x[, 2] + as.matrix(rnorm(100)) - -# Call CVE. -dr <- cve(y ~ x) -# Call weighted CVE. -dr.weighted <- cve(y ~ x, method = "weighted") - -# Training data responces of reduced data. -y.est <- directions(dr, 1) -# Extract SDR subspace basis of dimension 1. -B <- coef(dr.momentum, 1) +# set dimensions for simulation model +p <- 8 +k <- 2 +# create B for simulation +b1 <- rep(1 / sqrt(p), p) +b2 <- (-1)^seq(1, p) / sqrt(p) +B <- cbind(b1, b2) +# samplsize +n <- 200 +set.seed(21) +# creat predictor data x ~ N(0, I_p) +x <- matrix(rnorm(n * p), n, p) +# simulate response variable +# y = f(B'x) + err +# with f(x1, x2) = x1^2 + 2 * x2 and err ~ N(0, 0.25^2) +y <- (x \%*\% b1)^2 + 2 * (x \%*\% b2) + 0.25 * rnorm(100) +# calculate cve with method 'simple' for k unknown in 1, ..., 4 +cve.obj.s <- cve(y ~ x, max.dim = 4) # default method 'simple' +# calculate cve with method 'weighed' for k = 2 +cve.obj.w <- cve(y ~ x, k = 2, method = 'weighted') +# estimate dimension from cve.obj.s +khat <- predict_dim(cve.obj.s)$k +# get cve-estimate for B with dimensions (p, k = khat) +B2 <- coef(cve.obj.s, k = khat) +# get projected X data (same as cve.obj.s$X \%*\% B2) +proj.X <- directions(cve.obj.s, k = khat) +# plot y against projected data +plot(proj.X[, 1], y) +plot(proj.X[, 2], y) +# creat 10 new x points and y according to model +x.new <- matrix(rnorm(10 * p), 10, p) +y.new <- (x.new \%*\% b1)^2 + 2 * (x.new \%*\% b2) + 0.25 * rnorm(10) +# predict y.new +yhat <- predict(cve.obj.s, x.new, khat) +plot(y.new, yhat) +# projection matrix on span(B) +# same as B \%*\% t(B) since B is semi-orthogonal +PB <- B \%*\% solve(t(B) \%*\% B) \%*\% t(B) +# cve estimates for B with simple and weighted method +B.s <- coef(cve.obj.s, k = 2) +B.w <- coef(cve.obj.w, k = 2) +# same as B.s \%*\% t(B.s) since B.s is semi-orthogonal (same vor B.w) +PB.s <- B.s \%*\% solve(t(B.s) \%*\% B.s) \%*\% t(B.s) +PB.w <- B.w \%*\% solve(t(B.w) \%*\% B.w) \%*\% t(B.w) +# compare estimation accuracy of simple and weighted cve estimate by +# Frobenius norm of difference of projections. +norm(PB - PB.s, type = 'F') +norm(PB - PB.w, type = 'F') } \references{ diff --git a/CVE_C/man/cve.call.Rd b/CVE_C/man/cve.call.Rd index d5e5076..9b6f0d4 100644 --- a/CVE_C/man/cve.call.Rd +++ b/CVE_C/man/cve.call.Rd @@ -14,6 +14,13 @@ cve.call(X, Y, method = "simple", nObs = sqrt(nrow(X)), h = NULL, \item{Y}{Responses (1 dimensional).} +\item{method}{specifies the CVE method variation as one of +\itemize{ + \item "simple" exact implementation as described in the paper listed + below. + \item "weighted" variation with addaptive weighting of slices. +}} + \item{nObs}{parameter for choosing bandwidth \code{h} using \code{\link{estimate.bandwidth}} (ignored if \code{h} is supplied).} @@ -39,7 +46,8 @@ optimizing.} \item{gamma}{step-size reduction multiple.} -\item{V.init}{Semi-orthogonal matrix of dimensions `(ncol(X), ncol(X) - k)` #' as optimization starting value. (If supplied, \code{attempts} is +\item{V.init}{Semi-orthogonal matrix of dimensions `(ncol(X), ncol(X) - k) +as optimization starting value. (If supplied, \code{attempts} is set to 1 and \code{k} to match dimension)} \item{max.iter}{maximum number of optimization steps.} @@ -84,3 +92,27 @@ and \eqn{B = (b_1, ..., b_k)} is a real \eqn{p \times k}{p x k} of rank \eqn{k <= p}{k \leq p}. Without loss of generality \eqn{B} is assumed to be orthonormal. } +\examples{ +# create B for simulation (k = 1) +B <- rep(1, 5) / sqrt(5) + +set.seed(21) +# creat predictor data X ~ N(0, I_p) +X <- matrix(rnorm(500), 100, 5) +# simulate response variable +# Y = f(B'X) + err +# with f(x1) = x1 and err ~ N(0, 0.25^2) +Y <- X \%*\% B + 0.25 * rnorm(100) + +# calculate cve with method 'simple' for k = 1 +set.seed(21) +cve.obj.simple1 <- cve(Y ~ X, k = 1) + +# same as +set.seed(21) +cve.obj.simple2 <- cve.call(X, Y, k = 1) + +# extract estimated B's. +coef(cve.obj.simple1, k = 1) +coef(cve.obj.simple2, k = 1) +} diff --git a/CVE_C/man/directions.cve.Rd b/CVE_C/man/directions.cve.Rd index 3fe5bca..3abd2db 100644 --- a/CVE_C/man/directions.cve.Rd +++ b/CVE_C/man/directions.cve.Rd @@ -15,3 +15,22 @@ \description{ Computes projected training data \code{X} for given dimension `k`. } +\examples{ +# create B for simulation (k = 1) +B <- rep(1, 5) / sqrt(5) +set.seed(21) +# creat predictor data x ~ N(0, I_p) +x <- matrix(rnorm(500), 100, 5) +# simulate response variable +# y = f(B'x) + err +# with f(x1) = x1 and err ~ N(0, 0.25^2) +y <- x \%*\% B + 0.25 * rnorm(100) +# calculate cve with method 'simple' for k = 1 +set.seed(21) +cve.obj.simple <- cve(y ~ x, k = 1, method = 'simple') +# get projected data for k = 1 +x.proj <- directions(cve.obj.simple, k = 1) +# plot y against projected data +plot(x.proj, y) + +} diff --git a/CVE_C/man/estimate.bandwidth.Rd b/CVE_C/man/estimate.bandwidth.Rd index 3c02a0e..aa0b96e 100644 --- a/CVE_C/man/estimate.bandwidth.Rd +++ b/CVE_C/man/estimate.bandwidth.Rd @@ -25,3 +25,23 @@ with \eqn{n} the sample size, \eqn{p} its dimension (\code{n <- nrow(X); p <- ncol(X)}) and the covariance-matrix \eqn{\Sigma} which is \code{(n-1)/n} times the sample covariance estimate. } +\examples{ +# set dimensions for simulation model +p <- 5; k <- 1 +# create B for simulation +B <- rep(1, p) / sqrt(p) +# samplsize +n <- 100 +set.seed(21) +#creat predictor data x ~ N(0, I_p) +x <- matrix(rnorm(n * p), n, p) +# simulate response variable +# y = f(B'x) + err +# with f(x1) = x1 and err ~ N(0, 0.25^2) +y <- x \%*\% B + 0.25 * rnorm(100) +# calculate cve with method 'simple' for k = 1 +set.seed(21) +cve.obj.simple <- cve(y ~ x, k = k) +print(cve.obj.simple$res$'1'$h) +print(estimate.bandwidth(x, k = k)) +} diff --git a/CVE_C/man/plot.cve.Rd b/CVE_C/man/plot.cve.Rd index 4f795e1..8893782 100644 --- a/CVE_C/man/plot.cve.Rd +++ b/CVE_C/man/plot.cve.Rd @@ -14,6 +14,33 @@ } \description{ Boxplots of the loss from \code{min.dim} to \code{max.dim} \code{k} values. +} +\examples{ +# create B for simulation +B <- cbind(rep(1, 6), (-1)^seq(6)) / sqrt(6) + +set.seed(21) +# creat predictor data x ~ N(0, I_p) +X <- matrix(rnorm(600), 100) + +# simulate response variable +# y = f(B'x) + err +# with f(x1, x2) = x1^2 + 2 x2 and err ~ N(0, 0.25^2) +Y <- (X \%*\% B[, 1])^2 + 2 * X \%*\% B[, 2] + rnorm(100, 0, .1) + +# Create bandwidth estimation function +estimate.bandwidth <- function(X, k, nObs) { + n <- nrow(X) + p <- ncol(X) + X_c <- scale(X, center = TRUE, scale = FALSE) + 2 * qchisq((nObs - 1) / (n - 1), k) * sum(X_c^2) / (n * p) +} +# calculate cve with method 'simple' for k = min.dim,...,max.dim +cve.obj.simple <- cve(Y ~ X, h = estimate.bandwidth, nObs = sqrt(nrow(X))) + +# elbow plot +plot(cve.obj.simple) + } \seealso{ see \code{\link{par}} for graphical parameters to pass through diff --git a/CVE_C/man/predict.cve.Rd b/CVE_C/man/predict.cve.Rd index 4660cd4..7998f47 100644 --- a/CVE_C/man/predict.cve.Rd +++ b/CVE_C/man/predict.cve.Rd @@ -22,6 +22,33 @@ prediced response of data \code{newdata}. \description{ Predict responces using reduced data with \code{\link{mars}}. } +\examples{ +# create B for simulation +B <- rep(1, 5) / sqrt(5) + +set.seed(21) +# creat predictor data x ~ N(0, I_p) +x <- matrix(rnorm(500), 100) + +# simulate response variable +# y = f(B'x) + err +# with f(x1) = x1 and err ~ N(0, 0.25^2) +y <- x \%*\% B + 0.25 * rnorm(100) + +x.train <- x[1:80, ] +x.test <- x[81:100, ] +y.train <- y[1:80, ] +y.test <- y[81:100, ] + +# calculate cve with method 'simple' for k = 1 +cve.obj.simple <- cve(y.train ~ x.train, k = 1) + +# predict y.test from x.test +yhat <- predict(cve.obj.simple, x.test, 1) + +# plot prediction against y.test +plot(yhat, y.test) +} \seealso{ \code{\link{cve}}, \code{\link{cve.call}} or \pkg{\link{mars}}. } diff --git a/CVE_C/man/predict.dim.Rd b/CVE_C/man/predict.dim.Rd deleted file mode 100644 index 08f4490..0000000 --- a/CVE_C/man/predict.dim.Rd +++ /dev/null @@ -1,24 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/predict_dim.R -\name{predict.dim} -\alias{predict.dim} -\title{Predicts SDR dimension using \code{\link[mda]{mars}} via a Cross-Validation.} -\usage{ -predict.dim(object, ...) -} -\arguments{ -\item{object}{instance of class \code{cve} (result of \code{cve}, -\code{cve.call}).} - -\item{...}{ignored.} -} -\value{ -list with -\itemize{ - \item MSE: Mean Square Error, - \item k: predicted dimensions. -} -} -\description{ -Predicts SDR dimension using \code{\link[mda]{mars}} via a Cross-Validation. -} diff --git a/CVE_C/man/summary.cve.Rd b/CVE_C/man/summary.cve.Rd index 028e08a..731bb66 100644 --- a/CVE_C/man/summary.cve.Rd +++ b/CVE_C/man/summary.cve.Rd @@ -14,3 +14,22 @@ \description{ Prints a summary of a \code{cve} result. } +\examples{ +# create B for simulation +B <- rep(1, 5) / sqrt(5) + +set.seed(21) +#creat predictor data x ~ N(0, I_p) +x <- matrix(rnorm(500), 100) + +# simulate response variable +# y = f(B'x) + err +# with f(x1) = x1 and err ~ N(0, 0.25^2) +y <- x \%*\% B + 0.25 * rnorm(100) + +# calculate cve for unknown k between min.dim and max.dim. +cve.obj.simple <- cve(y ~ x) + +summary(cve.obj.simple) + +} diff --git a/CVE_C/src/callLogger.c b/CVE_C/src/callLogger.c index f1eb13a..c51cef5 100644 --- a/CVE_C/src/callLogger.c +++ b/CVE_C/src/callLogger.c @@ -13,22 +13,18 @@ * * @param logger Pointer to a SEXP R object representing an R function. * @param env Pointer to a SEXP R object representing an R environment. + * @param attempt counter of attempts. + * @param iter optimization iteration counter. * @param L Pointer to a SEXP R object representing an R environment. * @param V Pointer memory area of size `nrowV * ncolV` storing `V`. - * @param nrowV Nr. of rows of `V`. - * @param ncolV Nr. of columns of `V`. * @param G Pointer memory area of size `nrowG * ncolG` storing `G`. - * @param nrowG Nr. of rows of `G`. - * @param ncolG Nr. of columns of `G`. * @param loss Current loss L(V). * @param err Errof for break condition (0.0 befor first iteration). * @param tau Current step-size. */ void callLogger(SEXP logger, SEXP env, const int attempt, const int iter, - const double* L, const int lenL, - const double* V, const int nrowV, const int ncolV, - const double* G, const int nrowG, const int ncolG, + const mat* L, const mat* V, const mat* G, const double loss, const double err, const double tau) { /* Create R objects to be passed to R logger function. */ // Attempt is converted from 0-indexed to 1-indexed as R index. @@ -36,13 +32,13 @@ void callLogger(SEXP logger, SEXP env, SEXP r_iter = PROTECT(ScalarInteger(iter + 1)); /* Create R representations of L, V and G */ - SEXP r_L = PROTECT(allocVector(REALSXP, lenL)); - SEXP r_V = PROTECT(allocMatrix(REALSXP, nrowV, ncolV)); - SEXP r_G = PROTECT(allocMatrix(REALSXP, nrowG, ncolG)); + SEXP r_L = PROTECT(allocVector(REALSXP, L->nrow)); + SEXP r_V = PROTECT(allocMatrix(REALSXP, V->nrow, V->ncol)); + SEXP r_G = PROTECT(allocMatrix(REALSXP, G->nrow, G->ncol)); /* Copy data to R objects */ - memcpy(REAL(r_L), L, lenL * sizeof(double)); - memcpy(REAL(r_V), V, nrowV * ncolV * sizeof(double)); - memcpy(REAL(r_G), G, nrowG * ncolG * sizeof(double)); + memcpy(REAL(r_L), L->elem, L->nrow * sizeof(double)); + memcpy(REAL(r_V), V->elem, V->nrow * V->ncol * sizeof(double)); + memcpy(REAL(r_G), G->elem, G->nrow * G->ncol * sizeof(double)); /* Build data list passed to logger */ SEXP data = PROTECT(allocVector(VECSXP, 6)); diff --git a/CVE_C/src/cve.c b/CVE_C/src/cve.c index 0b25d00..d13d1a6 100644 --- a/CVE_C/src/cve.c +++ b/CVE_C/src/cve.c @@ -2,121 +2,113 @@ #include "cve.h" -// TODO: clarify -static inline double gaussKernel(const double x, const double scale) { - return exp(scale * x * x); -} - -void cve_sub(const int n, const int p, const int q, - const double *X, const double *Y, const double h, - const unsigned int method, - const double momentum, - const double tau_init, const double tol_init, - const double slack, const double gamma, - const int maxIter, const int attempts, - double *V, double *L, - SEXP logger, SEXP loggerEnv) { - - int attempt = 0, iter, i, nn = (n * (n - 1)) / 2; +void cve(const mat *X, const mat *Y, const double h, + const unsigned int method, + const double momentum, + const double tau_init, const double tol_init, + const double slack, const double gamma, + const int maxIter, const int attempts, + mat *V, mat *L, + SEXP logger, SEXP loggerEnv) { + + // TODO: param and dim. validation. + int n = X->nrow, p = X->ncol, q = V->ncol; + int attempt = 0, iter; double loss, loss_last, loss_best, err, tau; double tol = tol_init * sqrt((double)(2 * q)); - double gKscale = -0.5 / (h * h); double agility = -2.0 * (1.0 - momentum) / (h * h); double c = agility / (double)n; + // TODO: check parameters! dim, ... + /* Create further intermediate or internal variables. */ - double *Q = (double*)R_alloc(p * p, sizeof(double)); - double *V_best = (double*)R_alloc(p * q, sizeof(double)); - double *L_best = (double*)R_alloc(n, sizeof(double)); - double *V_tau = (double*)R_alloc(p * q, sizeof(double)); - double *X_diff = (double*)R_alloc(nn * p, sizeof(double)); - double *X_proj = (double*)R_alloc(nn * p, sizeof(double)); - double *y1 = (double*)R_alloc(n, sizeof(double)); - double *vecD = (double*)R_alloc(nn, sizeof(double)); - double *vecK = (double*)R_alloc(nn, sizeof(double)); - double *vecS = (double*)R_alloc(nn, sizeof(double)); - double *colSums = (double*)R_alloc(n, sizeof(double)); - double *G = (double*)R_alloc(p * q, sizeof(double)); - double *A = (double*)R_alloc(p * p, sizeof(double)); + mat *lvecD_e = (void*)0; + mat *Ysquared = (void*)0; + mat *XV = (void*)0; + mat *lvecD = (void*)0; // TODO: combine. aka in-place lvecToSym + mat *D = (void*)0; // TODO: combine. aka in-place lvecToSym + mat *lvecK = (void*)0; // TODO: combine. aka in-place lvecToSym + mat *K = (void*)0; // TODO: combine. aka in-place lvecToSym + mat *colSumsK = (void*)0; + mat *W = (void*)0; + mat *y1 = (void*)0; + mat *y2 = (void*)0; + mat *S = (void*)0; + mat *tmp1 = (void*)0; + mat *tmp2 = (void*)0; + mat *G = (void*)0; + mat *A = (void*)0; + mat *V_tau = (void*)0; + mat *V_best = (void*)0; + mat *L_best = (void*)0; - double *V_init = (void*)0; - if (attempts < 1) { - V_init = (double*)R_alloc(p * q, sizeof(double)); - memcpy(V_init, V, p * q * sizeof(double)); + /* Allocate appropiate amount of working memory. */ + int workLen = 2 * (p + 1) * p; + if (workLen < n) { + workLen = n; } - - /* Determine size of working memory used by subroutines. */ - const int workLen = getWorkLen(n, p, q); double *workMem = (double*)R_alloc(workLen, sizeof(double)); - /* Compute X_diff, this is static for the entire algorithm. */ - rowDiffs(X, n, p, X_diff); + lvecD_e = rowDiffSquareSums(X, lvecD_e); + Ysquared = hadamard(1.0, Y, Y, 0.0, Ysquared); do { /* (Re)set learning rate. */ tau = tau_init; /* Check if start value for `V` was supplied. */ - if (V_init == (void*)0) { + if (attempts > 0) { /* Sample start value from stiefel manifold. */ - rStiefel(p, q, V, workMem, workLen); - } else { - /* (Re)Set start value of `V` to `V_init`. */ - memcpy(V, V_init, p * q * sizeof(double)); + V = rStiefel(p, q, V, workMem); } - /* Create projection matrix `Q <- I - V V^T` for initial `V`. */ - nullProj(V, p, q, Q); - - /* Compute Distance vector. */ - matrixprod(X, n, p, Q, p, p, X_proj); // here X_proj is only `(n, p)`. - rowDiffSquareSums(X_proj, n, p, vecD); - + /* Embed X_i's in V space */ + XV = matrixprod(1.0, X, V, 0.0, XV); + /* Compute embedded distances */ + lvecD = lincomb(1.0, lvecD_e, -1.0, rowDiffSquareSums(XV, lvecD)); /* Apply kernel to distances. */ - for (i = 0; i < nn; ++i) { - vecK[i] = gaussKernel(vecD[i], gKscale); + lvecK = applyKernel(lvecD, h, gauss, lvecK); + /* Transform lower vectors lvecD, lvecK into sym. matrices. */ + D = lvecToSym(lvecD, 0.0, D); + K = lvecToSym(lvecK, 1.0, K); + /* Compute column sums of kernel matrix K */ + colSumsK = colSums(K, colSumsK); + /* Normalize K columns to obtain weight matrix W */ + W = colApply(K, '/', colSumsK, W); + /* first and second order weighted responces */ + y1 = matrixprod(1.0, W, Y, 0.0, y1); + y2 = matrixprod(1.0, W, Ysquared, 0.0, y2); + /* Compute losses */ + L = hadamard(-1.0, y1, y1, 1.0, copy(y2, L)); + /* Compute initial loss */ + if (method == simple) { + loss_last = mean(L); + /* Calculate the scaling matrix S */ + S = laplace(adjacence(L, Y, y1, D, W, gauss, S), workMem); + } else if (method == weighted) { + colSumsK = elemApply(colSumsK, '-', 1.0, colSumsK); + loss_last = dot(L, '/', colSumsK); + c = agility / sum(colSumsK); + /* Calculate the scaling matrix S */ + S = laplace(adjacence(L, Y, y1, D, K, gauss, S), workMem); + } else { + // TODO: error handling! } - - /* Compute col(row) sums of kernal vector (sym. packed lower tri - * matrix.), because `K == K^T` the rowSums are equal to colSums. */ - rowSumsSymVec(vecK, n, 1.0, colSums); - - /* Compute loss given the kernel vector and its column sums. - * Additionally the first momentum `y1` is computed and stored in - * the working memory (only intermidiate result, needed for `vecS`). */ - loss_last = cost(method, n, Y, vecK, colSums, y1, L); - - /* Calc the scaling vector used for final computation of grad. */ - scaling(method, n, Y, y1, L, vecD, vecK, colSums, vecS); - - /* Compute the eucledian gradient `G`. */ - rowSweep(X_diff, nn, p, "*", vecS, X_proj); - crossprod(X_diff, nn, p, X_proj, nn, p, workMem); - matrixprod(workMem, p, p, V, p, q, G); - if (method == CVE_METHOD_WEIGHTED) { - /* Compute summ of all kernel applied distances by summing the - * colSums of the kernel matrix. */ - // c = -(double)n; // to scale with sum(K) - n - // for (i = 0; i < n; ++i) { - // c += colSums[i]; - // } - // TODO: check for division by zero, but should not happen!!! - c = agility / (sum(colSums, n) - (double)n); - } - scale(c, G, p * q); // in-place + /* Gradient */ + tmp1 = matrixprod(1.0, S, X, 0.0, tmp1); + tmp2 = crossprod(1.0, X, tmp1, 0.0, tmp2); + G = matrixprod(c, tmp2, V, 0.0, G); if (logger) { callLogger(logger, loggerEnv, attempt, /* iter <- 0L */ -1, - L, n, - V, p, q, - G, p, q, + L, V, G, loss_last, /* err <- NA */ -1.0, tau); } /* Compute Skew-Symmetric matrix `A` used in Cayley transform. * `A <- tau * (G V^T - V G^T) + 0 * A`*/ - skew(p, q, tau, G, V, 0.0, A); + A = skew(tau, G, V, 0.0, A); for (iter = 0; iter < maxIter; ++iter) { /* Before Starting next iteration check if the Uer has requested an @@ -126,50 +118,57 @@ void cve_sub(const int n, const int p, const int q, R_CheckUserInterrupt(); /* Move `V` along the gradient direction. */ - cayleyTransform(p, q, A, V, V_tau, workMem); + V_tau = cayleyTransform(A, V, V_tau, workMem); - /* Create projection matrix for `V_tau`. */ - nullProj(V_tau, p, q, Q); - - /* Compute Distance vector. */ - matrixprod(X, n, p, Q, p, p, X_proj); // here X_proj only `(n, p)`. - rowDiffSquareSums(X_proj, n, p, vecD); + // Rprintf("Start attempt(%2d), iter (%2d): err: %f, loss: %f, tau: %f\n", + // attempt, iter, dist(V, V_tau), loss_last, tau); + /* Embed X_i's in V space */ + XV = matrixprod(1.0, X, V_tau, 0.0, XV); + /* Compute embedded distances */ + lvecD = lincomb(1.0, lvecD_e, -1.0, rowDiffSquareSums(XV, lvecD)); /* Apply kernel to distances. */ - for (i = 0; i < nn; ++i) { - vecK[i] = gaussKernel(vecD[i], gKscale); + lvecK = applyKernel(lvecD, h, gauss, lvecK); + /* Transform lower vectors lvecD, lvecK into sym. matrices. */ + D = lvecToSym(lvecD, 0.0, D); + K = lvecToSym(lvecK, 1.0, K); + /* Compute column sums of kernel matrix K */ + colSumsK = colSums(K, colSumsK); + /* Normalize K columns to obtain weight matrix W */ + W = colApply(K, '/', colSumsK, W); + /* first and second order weighted responces */ + y1 = matrixprod(1.0, W, Y, 0.0, y1); + y2 = matrixprod(1.0, W, Ysquared, 0.0, y2); + /* Compute losses */ + L = hadamard(-1.0, y1, y1, 1.0, copy(y2, L)); + /* Compute loss */ + if (method == simple) { + loss = mean(L); + } else if (method == weighted) { + colSumsK = elemApply(colSumsK, '-', 1.0, colSumsK); + loss = dot(L, '/', colSumsK); + } else { + // TODO: error handling! } - /* Compute col(row) sums of kernal vector (sym. packed lower tri - * matrix.), because `K == K^T` the rowSums are equal to colSums. */ - rowSumsSymVec(vecK, n, 1.0, colSums); - - /* Compute loss given the kernel vector and its column sums. - * Additionally the first momentum `y1` is computed and stored in - * the working memory (only intermidiate result, needed for vecS).*/ - loss = cost(method, n, Y, vecK, colSums, y1, L); - /* Check if step is appropriate, iff not reduce learning rate. */ if ((loss - loss_last) > loss_last * slack) { tau *= gamma; - scale(gamma, A, p * p); + A = elemApply(A, '*', gamma, A); // scale A by gamma continue; } - /* Compute error, use workMem (keep first `n`, they store `y1`). */ - skew(p, q, 1.0, V, V_tau, 0.0, workMem); - err = norm(workMem, p, p, "F"); + /* Compute error, use workMem. */ + err = dist(V, V_tau); /* Shift next step to current step and store loss to last. */ - memcpy(V, V_tau, p * q * sizeof(double)); + V = copy(V_tau, V); loss_last = loss; if (logger) { callLogger(logger, loggerEnv, attempt, iter, - L, n, - V, p, q, - G, p, q, + L, V, G, loss, err, tau); } @@ -178,41 +177,35 @@ void cve_sub(const int n, const int p, const int q, break; } - /* Continue computing the gradient. */ - /* Calc the scaling vector used for final computation of grad. */ - scaling(method, n, Y, y1, L, vecD, vecK, colSums, vecS); - - /* Compute the eucledian gradient `G`. */ - rowSweep(X_diff, nn, p, "*", vecS, X_proj); - crossprod(X_diff, nn, p, X_proj, nn, p, workMem); - // /* Update without momentum */ - // matrixprod(workMem, p, p, V, p, q, G); - // scale(-2. / (((double)n) * h * h), G, p * q); // in-place - /* G <- momentum * G + agility * workMem V */ - - if (method == CVE_METHOD_WEIGHTED) { - /* Compute summ of all kernel applied distances by summing the - * colSums of the kernel matrix. */ - // TODO: check for division by zero, but should not happen!!! - c = agility / (sum(colSums, n) - (double)n); + if (method == simple) { + /* Calculate the scaling matrix S */ + S = laplace(adjacence(L, Y, y1, D, W, gauss, S), workMem); + } else if (method == weighted) { + /* Calculate the scaling matrix S */ + S = laplace(adjacence(L, Y, y1, D, K, gauss, S), workMem); + c = agility / sum(colSumsK); + } else { + // TODO: error handling! } - F77_NAME(dgemm)("N", "N", &p, &q, &p, - &c, workMem, &p, V, &p, - &momentum, G, &p); + + /* Gradient */ + tmp1 = matrixprod(1.0, S, X, 0.0, tmp1); + tmp2 = crossprod(1.0, X, tmp1, 0.0, tmp2); + G = matrixprod(c, tmp2, V, momentum, G); /* Compute Skew-Symmetric matrix `A` used in Cayley transform. * `A <- tau * (G V^T - V G^T) + 0 * A`*/ - skew(p, q, tau, G, V, 0.0, A); + A = skew(tau, G, V, 0.0, A); } /* Check if current attempt improved previous ones */ if (attempt == 0 || loss < loss_best) { loss_best = loss; - memcpy(V_best, V, p * q * sizeof(double)); - memcpy(L_best, L, n * sizeof(double)); + V_best = copy(V, V_best); + L_best = copy(L, L_best); } } while (++attempt < attempts); - memcpy(V, V_best, p * q * sizeof(double)); - memcpy(L, L_best, n * sizeof(double)); + V = copy(V_best, V); + L = copy(L_best, L); } diff --git a/CVE_C/src/cve.h b/CVE_C/src/cve.h index 6216ac4..4c60f2b 100644 --- a/CVE_C/src/cve.h +++ b/CVE_C/src/cve.h @@ -12,107 +12,90 @@ #define CVE_MEM_CHUNK_SIZE 2032 #define CVE_MEM_CHUNK_SMALL 1016 +#define BLOCK_SIZE 8 -/* Bis masks for method types */ -#define CVE_METHOD_WEIGHTED 1 +/** + * @struct Matrix of dimensions `nrow x ncol`. + */ +typedef struct matrix { + int nrow; /**< Number of rows */ + int ncol; /**< Number of columns */ + void *origin; /**< Reference to origin, see `asMat()`. */ + double *elem; /**< Column-major array of matrix elements. */ +} mat; -// typedef struct { -// unsigned int nrow; -// unsigned int ncol; -// unsigned int memsize; -// double *data; -// } mat; +typedef enum { + simple, + weighted +} method; -// mat* Matrix(const unsigned int nrow, const unsigned int ncol); +typedef enum { + gauss +} kernel; -void cve_sub(const int n, const int p, const int q, - const double *X, const double *Y, const double h, - const unsigned int method, - const double momentum, - const double tau_init, const double tol_init, - const double slack, const double gamma, - const int maxIter, int attempts, - double *V, double *L, - SEXP logger, SEXP loggerEnv); + +void cve(const mat *X, const mat *Y, const double h, + const unsigned int method, + const double momentum, + const double tau_init, const double tol_init, + const double slack, const double gamma, + const int maxIter, const int attempts, + mat *V, mat *L, + SEXP logger, SEXP loggerEnv); void callLogger(SEXP logger, SEXP env, - const int attempt, const int epoch, - const double* L, const int lenL, - const double* V, const int nrowV, const int ncolV, - const double* G, const int nrowG, const int ncolG, + const int attempt, const int iter, + const mat* L, const mat* V, const mat* G, const double loss, const double err, const double tau); -/* CVE sub-routines */ -int getWorkLen(const int n, const int p, const int q); -double cost(const unsigned int method, - const int n, - const double *Y, - const double *vecK, - const double *colSums, - double *y1, double *L); -void scaling(const unsigned int method, - const int n, - const double *Y, const double *y1, const double *L, - const double *vecD, const double *vecK, - const double *colSums, - double *vecS); +/******************************************************************************/ +/** rStiefel.c **/ +/******************************************************************************/ +/* Random element from Stiefel manifold. */ +mat* rStiefel(const int p, const int q, mat *V, double *workMem); -/* rStiefel */ -void rStiefel(const int p, const int q, double *V, - double *workMem, int workLen); +/******************************************************************************/ +/** matrix.c **/ +/******************************************************************************/ +/* Create and Copy matrices */ +mat* matrix(const int nrow, const int ncol); +mat* zero(const int nrow, const int ncol); +mat* copy(mat *src, mat *dest); +/* Matrix to scalar */ +double sum(const mat *A); +double mean(const mat *A); +double squareSum(const mat* A); +double dot(const mat *A, const char op, const mat *B); +double dist(const mat *A, const mat *B); +/* Matrix to vector (`ncol == 1` matrices, aka row vectors) */ +mat* rowSums(const mat *A, mat *sums); +mat* colSums(const mat *A, mat *sums); +mat* rowDiffSquareSums(const mat *X, mat *lvecD); +/* Matrix and scalar to Matrix */ +mat* elemApply(mat *A, const char op, const double scalar, mat *B); +/* Matrix and vector to Matrix */ +mat* colApply(mat *A, const char op, mat *B, mat *C); +/* Matrix and Matrix to Matrix */ +mat* lincomb(const double alpha, const mat *A, const double beta, mat *B); +mat* matrixprod(const double alpha, const mat *A, const mat *B, + double beta, mat* C); +mat* crossprod(const double alpha, const mat *A, const mat *B, + double beta, mat* C); +mat* hadamard(const double alpha, const mat* A, const mat *B, + double beta, mat* C); +mat* skew(const double alpha, mat* A, mat *B, double beta, mat* C); +/* Matrix Transformations */ +mat* cayleyTransform(mat *A, mat *B, mat *C, double *workMem); +mat* laplace(mat *A, double *workMem); +mat* lvecToSym(const mat* A, const double diag, mat* B); -/* MATRIX */ -double sum(const double *A, const int nelem); - -double norm(const double *A, const int nrow, const int ncol, - const char *type); - -void matrixprod(const double *A, const int nrowA, const int ncolA, - const double *B, const int nrowB, const int ncolB, - double *C); - -void crossprod(const double *A, const int nrowA, const int ncolA, - const double *B, const int nrowB, const int ncolB, - double *C); - -void skew(const int nrow, const int ncol, - double alpha, const double *A, const double *B, - double beta, - double *C); - -void nullProj(const double *B, const int nrowB, const int ncolB, - double *Q); - -void scale(const double s, double *A, const int nelem); - -void cayleyTransform(const int p, const int q, - const double *A, const double *B, - double *X, double *workMem); - -/* Row and column opperations. */ -void rowSums(const double *A, const int nrow, const int ncol, - double *sum); - -void colSums(const double *A, const int nrow, const int ncol, - double *sum); - -void rowSquareSums(const double *A, const int nrow, const int ncol, - double *sum); - -void rowSumsSymVec(const double *Avec, const int nrow, - const double diag, - double *sum); - -void rowDiffs(const double* X, const int nrow, const int ncol, - double *diffs); - -void rowDiffSquareSums(const double* X, const int nrow, const int ncol, - double *sum); - -/* SWEEP */ -void rowSweep(const double *A, const int nrow, const int ncol, - const char* op, - const double *v, // vector of length nrow - double *C); +/******************************************************************************/ +/** cve_subroutines.c **/ +/******************************************************************************/ +/* CVE specific sub-routines */ +mat* adjacence(const mat *vec_L, const mat *vec_Y, const mat *vec_y1, + const mat *mat_D, const mat *mat_W, kernel kernel, + mat *mat_S); +mat* applyKernel(const mat* A, const double h, kernel kernel, mat* B); #endif /* CVE_INCLUDE_GUARD_H_ */ diff --git a/CVE_C/src/cve_subroutines.c b/CVE_C/src/cve_subroutines.c index ad3faf7..50ef975 100644 --- a/CVE_C/src/cve_subroutines.c +++ b/CVE_C/src/cve_subroutines.c @@ -1,95 +1,257 @@ #include "cve.h" -int getWorkLen(const int n, const int p, const int q) { - int mpq; /**< Max of p and q */ - int nn = ((n - 1) * n) / 2; +/** + * Applies the requested kernel element-wise. + * + * @param A matrix to apply the kernel to. + * @param h bandwidth parameter. + * @param kernel the kernel to be used. + * @param B (in/out) matrix `A` with element-wise applied kernel. + * + * @returns ether the passed `B`, or a new created matrix if `B` was NULL. + */ +mat* applyKernel(const mat* A, const double h, kernel kernel, mat* B) { + int i, nn = A->nrow * A->ncol; + int nn4 = 4 * (nn / 4); + double scale; + const double * restrict a; + double * restrict b; - if (p > q) { - mpq = p; - } else { - mpq = q; + if (!B) { + B = matrix(A->nrow, A->ncol); + } else if (nn != B->nrow * B->ncol) { + // TODO: error handling! } - if (nn * p < (mpq + 1) * mpq) { - return 2 * (mpq + 1) * mpq; - } else { - return (nn + mpq) * mpq; + + a = A->elem; + b = B->elem; + switch (kernel) { + case gauss: + scale = -0.5 / (h * h); + for (i = 0; i < nn4; i += 4) { + b[i] = exp(scale * a[i] * a[i]); + b[i + 1] = exp(scale * a[i + 1] * a[i + 1]); + b[i + 2] = exp(scale * a[i + 2] * a[i + 2]); + b[i + 3] = exp(scale * a[i + 3] * a[i + 3]); + } + for (; i < nn; ++i) { + b[i] = exp(scale * a[i] * a[i]); + } + break; + default: + // TODO: error handling! + break; } + + return B; } -double cost(const unsigned int method, - const int n, - const double *Y, - const double *vecK, - const double *colSums, - double *y1, double *L) { - int i, j, k; - double tmp, sum; +/** + * Scaling matrix `S` defined as + * s_{i j} = (L_i - (Y_j - y1_i)^2) * d_{i j} * w_{i j} + * + * Mapping of vectors `L`, `y1` and `Y` combinations. + * + * Y[j] + * ------ j -----> + * s[0] s[n] . . . s[jn] . . . s[(j-1)n] + * s[1] s[n+1] . . . s[jn+1] . . . s[(j-1)n+1] + * | . . . . . . + * | . . . . . . + * . . . . . . + * L[i], y1[i] i s[i] s[n+i] . . . s[jn+i] . . . s[(j-1)n+i] + * . . . . . . + * | . . . . . . + * v . . . . . . + * s[n-1] s[2n-1] . . . s[n-1] . . . s[nn-1] + * + * @param L per sample loss vector of (lenght `n`). + * @param Y responces (lenght `n`). + * @param y1 weighted responces (lenght `n`). + * @param D distance matrix (dim. `n x n`). + * @param W weight matrix (dim. `n x n`). + * @param kernel the kernel to be used. + * + * @returns passed matrix `S` if not NULL, then a new `n x n` matrix is created. + * + * @example Basically equivalent to the following R function. + * r_LS <- function(L, Y, y1, D, W) { + * # get dimension + * n <- length(L) + * # Indices + * i <- rep(1:n, n) + * j <- rep(1:n, each = n) + * # Compute S + * matrix(L[i] - (Y[j] - y1[i])^2, n) * W * D + * } + * + * @details mapping for indices used in blocked implementation. + * + * n ..... Dimensions of `S`, a `n x n` matrix. + * B ..... BLOCK_SIZE + * rB .... block reminder, aka rB = B % n. + * + * Y[j] + * 0 ----- j -----> n + * 0 +--------------------+ + * | ' | + * ' | k B x n | + * ' | v | + * ' +--------------------+ + * L[i], y1[i] i | ' | + * ' | k B x n | + * ' | v | + * v +--------------------+ + * | k rB x n | + * n +--------------------+ + */ +// TODO: fix: cache misses in Y?! +mat* adjacence(const mat *vec_L, const mat *vec_Y, const mat *vec_y1, + const mat *mat_D, const mat *mat_W, kernel kernel, + mat *mat_S) { + int i, j, k, n = vec_L->nrow; + int block_size, block_batch_size; + int max_size = 64 < n ? 64 : n; // Block Size set to 64 - for (i = 0; i < n; ++i) { - y1[i] = Y[i]; - L[i] = Y[i] * Y[i]; + double Y_j, tmp0, tmp1, tmp2, tmp3; + double *Y = vec_Y->elem; + double *L = vec_L->elem; + double *y1 = vec_y1->elem; + double *D, *W, *S; + + // TODO: Check dims. + + if (!mat_S) { + mat_S = matrix(n, n); } - for (k = j = 0; j < n; ++j) { - for (i = j + 1; i < n; ++i, ++k) { - y1[i] += Y[j] * vecK[k]; - y1[j] += Y[i] * vecK[k]; - L[i] += Y[j] * Y[j] * vecK[k]; - L[j] += Y[i] * Y[i] * vecK[k]; + for (i = 0; i < n; i += block_size) { + /* Set blocks (left upper corner) */ + S = mat_S->elem + i; + D = mat_D->elem + i; + W = mat_W->elem + i; + /* determine block size */ + block_size = n - i; + if (block_size > max_size) { + block_size = max_size; } - } - - for (i = 0; i < n; ++i) { - y1[i] /= colSums[i]; - L[i] /= colSums[i]; - } - - tmp = 0.0; - if (method == CVE_METHOD_WEIGHTED) { - sum = 0.0; - for (i = 0; i < n; ++i) { - tmp += (colSums[i] - 1.0) * (L[i] -= y1[i] * y1[i]); - sum += colSums[i]; - } - return tmp / (sum - (double)n); // TODO: check for division by zero! - } else { - for (i = 0; i < n; ++i) { - tmp += (L[i] -= y1[i] * y1[i]); - } - return tmp / (double)n; - } -} - -void scaling(const unsigned int method, - const int n, - const double *Y, const double *y1, const double *L, - const double *vecD, const double *vecK, - const double *colSums, - double *vecS) { - int i, j, k, nn = (n * (n - 1)) / 2; - double tmp; - - if (method == CVE_METHOD_WEIGHTED) { - for (k = j = 0; j < n; ++j) { - for (i = j + 1; i < n; ++i, ++k) { - tmp = Y[j] - y1[i]; - vecS[k] = (L[i] - (tmp * tmp)); - tmp = Y[i] - y1[j]; - vecS[k] += (L[j] - (tmp * tmp)); - } - } - } else { - for (k = j = 0; j < n; ++j) { - for (i = j + 1; i < n; ++i, ++k) { - tmp = Y[j] - y1[i]; - vecS[k] = (L[i] - (tmp * tmp)) / colSums[i]; - tmp = Y[i] - y1[j]; - vecS[k] += (L[j] - (tmp * tmp)) / colSums[j]; + block_batch_size = 4 * (block_size / 4); + /* for each column in the block */ + for (j = 0; j < n; ++j, S += n, D += n, W += n) { + Y_j = Y[j]; + /* iterate over block rows */ + for (k = 0; k < block_batch_size; k += 4) { + tmp0 = Y_j - y1[k]; + tmp1 = Y_j - y1[k + 1]; + tmp2 = Y_j - y1[k + 2]; + tmp3 = Y_j - y1[k + 3]; + S[k] = (L[k] - (tmp0 * tmp0)) * D[k] * W[k]; + S[k + 1] = (L[k + 1] - (tmp1 * tmp1)) * D[k + 1] * W[k + 1]; + S[k + 2] = (L[k + 2] - (tmp2 * tmp2)) * D[k + 2] * W[k + 2]; + S[k + 3] = (L[k + 3] - (tmp3 * tmp3)) * D[k + 3] * W[k + 3]; + } + for (; k < block_size; ++k) { + tmp0 = Y_j - y1[k]; + S[k] = (L[k] - (tmp0 * tmp0)) * D[k] * W[k]; } } + L += block_size; + y1 += block_size; } - for (k = 0; k < nn; ++k) { - vecS[k] *= vecK[k] * vecD[k]; - } + return mat_S; } + +// int getWorkLen(const int n, const int p, const int q) { +// int mpq; /**< Max of p and q */ +// int nn = ((n - 1) * n) / 2; + +// if (p > q) { +// mpq = p; +// } else { +// mpq = q; +// } +// if (nn * p < (mpq + 1) * mpq) { +// return 2 * (mpq + 1) * mpq; +// } else { +// return (nn + mpq) * mpq; +// } +// } + +// double cost(const unsigned int method, +// const int n, +// const double *Y, +// const double *vecK, +// const double *colSums, +// double *y1, double *L) { +// int i, j, k; +// double tmp, sum; + +// for (i = 0; i < n; ++i) { +// y1[i] = Y[i]; +// L[i] = Y[i] * Y[i]; +// } + +// for (k = j = 0; j < n; ++j) { +// for (i = j + 1; i < n; ++i, ++k) { +// y1[i] += Y[j] * vecK[k]; +// y1[j] += Y[i] * vecK[k]; +// L[i] += Y[j] * Y[j] * vecK[k]; +// L[j] += Y[i] * Y[i] * vecK[k]; +// } +// } + +// for (i = 0; i < n; ++i) { +// y1[i] /= colSums[i]; +// L[i] /= colSums[i]; +// } + +// tmp = 0.0; +// if (method == CVE_METHOD_WEIGHTED) { +// sum = 0.0; +// for (i = 0; i < n; ++i) { +// tmp += (colSums[i] - 1.0) * (L[i] -= y1[i] * y1[i]); +// sum += colSums[i]; +// } +// return tmp / (sum - (double)n); // TODO: check for division by zero! +// } else { +// for (i = 0; i < n; ++i) { +// tmp += (L[i] -= y1[i] * y1[i]); +// } +// return tmp / (double)n; +// } +// } + +// void scaling(const unsigned int method, +// const int n, +// const double *Y, const double *y1, const double *L, +// const double *vecD, const double *vecK, +// const double *colSums, +// double *vecS) { +// int i, j, k, nn = (n * (n - 1)) / 2; +// double tmp; + +// if (method == CVE_METHOD_WEIGHTED) { +// for (k = j = 0; j < n; ++j) { +// for (i = j + 1; i < n; ++i, ++k) { +// tmp = Y[j] - y1[i]; +// vecS[k] = (L[i] - (tmp * tmp)); +// tmp = Y[i] - y1[j]; +// vecS[k] += (L[j] - (tmp * tmp)); +// } +// } +// } else { +// for (k = j = 0; j < n; ++j) { +// for (i = j + 1; i < n; ++i, ++k) { +// tmp = Y[j] - y1[i]; +// vecS[k] = (L[i] - (tmp * tmp)) / colSums[i]; +// tmp = Y[i] - y1[j]; +// vecS[k] += (L[j] - (tmp * tmp)) / colSums[j]; +// } +// } +// } + +// for (k = 0; k < nn; ++k) { +// vecS[k] *= vecK[k] * vecD[k]; +// } +// } diff --git a/CVE_C/src/export.c b/CVE_C/src/export.c index f7b9d16..73bef89 100644 --- a/CVE_C/src/export.c +++ b/CVE_C/src/export.c @@ -1,27 +1,37 @@ #include "cve.h" -// SEXP rStiefel_c(SEXP pin, SEXP qin) { -// int p = asInteger(pin); -// int q = asInteger(qin); +/** + * Converts a `SEXP` (S EXPression) into a matrix struct `mat`. + * + * @param S source struct to be converted. + * + * @details Reuses the memory area of the SEXP object, therefore manipulation + * of the returned matrix works in place of the SEXP object. In addition, + * a reference to the original SEXP is stored and will be retriefed from + * `asSEXP()` if the matrix was created through this function. + */ +static mat* asMat(SEXP S) { + // TODO: error checking and conversion + mat* M = (mat*)R_alloc(1, sizeof(mat)); + if (isMatrix(S)) { + M->nrow = (int)nrows(S); + M->ncol = (int)ncols(S); + } else { + M->nrow = (int)length(S); + M->ncol = 1; + } + M->origin = S; + M->elem = REAL(S); + return M; +} -// SEXP Vout = PROTECT(allocMatrix(REALSXP, p, q)); - -// int workLen = 2 * (p + 1) * q; -// double *workMem = (double*)R_alloc(workLen, sizeof(double)); - -// rStiefel(p, q, REAL(Vout), workMem, workLen); - -// UNPROTECT(1); -// return Vout; -// } - -SEXP cve(SEXP X, SEXP Y, SEXP k, SEXP h, - SEXP method, - SEXP V, // initial - SEXP momentum, SEXP tau, SEXP tol, - SEXP slack, SEXP gamma, - SEXP maxIter, SEXP attempts, - SEXP logger, SEXP loggerEnv) { +SEXP cve_export(SEXP X, SEXP Y, SEXP k, SEXP h, + SEXP method, + SEXP V, // initial + SEXP momentum, SEXP tau, SEXP tol, + SEXP slack, SEXP gamma, + SEXP maxIter, SEXP attempts, + SEXP logger, SEXP loggerEnv) { /* Handle logger parameter, set to NULL pointer if not a function. */ if (!(isFunction(logger) && isEnvironment(loggerEnv))) { logger = (void*)0; @@ -42,20 +52,19 @@ SEXP cve(SEXP X, SEXP Y, SEXP k, SEXP h, /* Check `attempts`, if not positive use passed values of `V` as * optimization start value without further attempts. * Therefor, copy from `V` to `Vout`. */ - if (asInteger(attempts) < 1L) { + if (asInteger(attempts) < 1) { // TODO: Check for memcpy(REAL(Vout), REAL(V), p * q * sizeof(double)); } - /* Call CVE simple subroutine. */ - cve_sub(n, p, q, - REAL(X), REAL(Y), asReal(h), - asInteger(method), - asReal(momentum), asReal(tau), asReal(tol), - asReal(slack), asReal(gamma), - asInteger(maxIter), asInteger(attempts), - REAL(Vout), REAL(Lout), - logger, loggerEnv); + /* Call CVE */ + cve(asMat(X), asMat(Y), asReal(h), + asInteger(method), + asReal(momentum), asReal(tau), asReal(tol), + asReal(slack), asReal(gamma), + asInteger(maxIter), asInteger(attempts), + asMat(Vout), asMat(Lout), + logger, loggerEnv); /* Build output list object with names "V", "L" */ SEXP out = PROTECT(allocVector(VECSXP, 2)); diff --git a/CVE_C/src/init.c b/CVE_C/src/init.c index 767231b..8f480c4 100644 --- a/CVE_C/src/init.c +++ b/CVE_C/src/init.c @@ -4,16 +4,16 @@ #include /* .Call calls */ -extern SEXP cve(SEXP X, SEXP Y, SEXP k, SEXP h, - SEXP method, - SEXP V, // initial - SEXP momentum, SEXP tau, SEXP tol, - SEXP slack, SEXP gamma, - SEXP maxIter, SEXP attempts, - SEXP logger, SEXP loggerEnv); +extern SEXP cve_export(SEXP X, SEXP Y, SEXP k, SEXP h, + SEXP method, + SEXP V, // initial + SEXP momentum, SEXP tau, SEXP tol, + SEXP slack, SEXP gamma, + SEXP maxIter, SEXP attempts, + SEXP logger, SEXP loggerEnv); static const R_CallMethodDef CallEntries[] = { - {"cve", (DL_FUNC) &cve, 15}, + {"cve_export", (DL_FUNC) &cve_export, 15}, {NULL, NULL, 0} }; diff --git a/CVE_C/src/matrix.c b/CVE_C/src/matrix.c index 2e3d261..bc50f0b 100644 --- a/CVE_C/src/matrix.c +++ b/CVE_C/src/matrix.c @@ -1,191 +1,978 @@ #include "cve.h" -// mat* Matrix(const unsigned int nrow, const unsigned int ncol) { -// mat* newMat = (mat*)R_alloc(1, sizeof(mat)); -// newMat->nrow = nrow; -// newMat->ncol = ncol; -// newMat->memsize = nrow * ncol; -// newMat->data = (double*)R_alloc(nrow * ncol, sizeof(double)); - -// return newMat; -// } - -double sum(const double *A, const int nelem) { - int i, nelemb = (nelem / 4) * 4; - double sum = 0.0; - - for (i = 0; i < nelemb; i += 4) { - sum += A[i] - + A[i + 1] - + A[i + 2] - + A[i + 3]; - } - for (; i < nelem; ++i) { - sum += A[i]; - } - - return sum; +/** + * Creates a matrix. + * + * @param nrow number of rows. + * @param ncol number of columns. + * + * @returns an uninitialized `nrow x ncol` matrix. + * + * @details matrix elements are NOT initialized, its content is "random". + */ +mat* matrix(const int nrow, const int ncol) { + mat* M = (mat*)R_alloc(1, sizeof(mat)); + M->nrow = nrow; + M->ncol = ncol; + M->origin = (void*)0; + M->elem = (double*)R_alloc(nrow * ncol, sizeof(double)); + return M; } -double norm(const double *A, const int nrow, const int ncol, - const char *type) { - int i, nelem = nrow * ncol; - int nelemb = (nelem / 4) * 4; - double sum = 0.0; +/** + * Creates a zero matrix. + * + * @param nrow number of rows. + * @param ncol number of columns. + * + * @returns a `nrow x ncol` matrix with all elements equal 0. + */ +mat* zero(const int nrow, const int ncol) { + mat* M = matrix(nrow, ncol); + /* Set all emements zero */ + memset(M->elem, 0, nrow * ncol * sizeof(double)); + return M; +} - if (*type == 'F') { - for (i = 0; i < nelemb; i += 4) { - sum += A[i] * A[i] - + A[i + 1] * A[i + 1] - + A[i + 2] * A[i + 2] - + A[i + 3] * A[i + 3]; +/** + * Copies elements form `src` to `dest` matrix. + * + * @param dest (in/out) destination matrix. + * @param src source matrix. + * + * @returns passed dest matrix. + */ +mat* copy(mat *src, mat *dest) { + if (!dest) { + dest = matrix(src->nrow, src->ncol); + } else if (src->nrow != dest->nrow || src->ncol != dest->ncol) { + // TODO: error handling! + } + memcpy(dest->elem, src->elem, dest->nrow * dest->ncol * sizeof(double)); + + return dest; +} + +/** + * Sum of all elements. + * + * @param A matrix. + * + * @returns the sum of elements of `A`. + */ +double sum(const mat* A) { + int i, nn = A->nrow * A->ncol; + int nn4 = 4 * (nn / 4); + double *a = A->elem; + double s = 0.0; + + for (i = 0; i < nn4; i += 4) { + s += a[i] + a[i + 1] + a[i + 2] + a[i + 3]; + } + for (; i < nn; ++i) { + s += a[i]; + } + + return s; +} + +/** + * Mean of all elements. + * + * @param A matrix. + * + * @returns the mean of elements of `A`. + */ +double mean(const mat* A) { + int i, nn = A->nrow * A->ncol; + int nn4 = 4 * (nn / 4); + double *a = A->elem; + double s = 0.0; + + for (i = 0; i < nn4; i += 4) { + s += a[i] + a[i + 1] + a[i + 2] + a[i + 3]; + } + for (; i < nn; ++i) { + s += a[i]; + } + + return s / (double)nn; +} + +#define CVE_DOT_ALG(op) \ + for (i = 0; i < nn4; i += 4) { \ + s += ((a[i]) op (b[i])) \ + + ((a[i + 1]) op (b[i + 1])) \ + + ((a[i + 2]) op (b[i + 2])) \ + + ((a[i + 3]) op (b[i + 3])); \ + } \ + for (; i < nn; ++i) { \ + s += (a[i]) op (b[i]); \ + } + +/** + * Element-wise sum of `A` and `op(B)`. + * + * sum(A + B), for op = '+', + * sum(A * B), for op = '*', + * sum(A - B), for op = '-', + * sum(A / B), for op = '/'. + * + * @param A matrix. + * @param op one of '+', '-', '*' or '/'. + * @param B matrix of the same size as `A`. + * + * @returns sum of element-wise products. + */ +double dot(const mat *A, const char op, const mat *B) { + int i, nn = A->nrow * A->ncol; + int nn4 = 4 * (nn / 4); + double *a = A->elem; + double *b = B->elem; + double s = 0.0; + + if (B->nrow * B->ncol != nn) { + // TODO: error handling! + } + + switch (op) { + case '+': + CVE_DOT_ALG(+) + break; + case '-': + CVE_DOT_ALG(-) + break; + case '*': + CVE_DOT_ALG(*) + break; + case '/': + CVE_DOT_ALG(/) + break; + default: + // TODO: error handling! + break; + } + + return s; +} + +/** + * Computes the sub-space distances according + * + * dist <- norm(A %*% t(A) - B %*% t(B), 'F') + * + * @param A semi-orthogonal matrix of dim. `n x m` such that `n < m`. + * @param B semi-orthogonal matrix of same dimensions as `A`. + * + * @details The semi-orthogonality will NOT be validated! + * + * @returns dist as describet above. + */ +// TODO: optimize! +double dist(const mat *A, const mat *B) { + int i, j, k, n = A->nrow, m = A->ncol; + double tmp1, tmp2, sum; + double *a = A->elem, *b = B->elem; + + if (A->nrow != B->nrow || A->ncol != B->ncol) { + // TODO: error handling! + } + + sum = 0.0; + for (i = 0; i < n; ++i) { + for (j = 0; j < n; ++j) { + tmp1 = tmp2 = 0.0; + for (k = 0; k < m; ++k) { + tmp1 += a[k * n + i] * a[k * n + j]; + tmp2 += b[k * n + i] * b[k * n + j]; + } + sum += (tmp1 - tmp2) * (tmp1 - tmp2); } - for (; i < nelem; ++i) { - sum += A[i] * A[i]; - } - return sqrt(sum); + } + + return sqrt(sum); +} + + +/** + * Sum of squared elements. + * + * @param A matrix. + * + * @returns the sum of squared elements of `A`. + */ +double squareSum(const mat* A) { + int i, nn = A->nrow * A->ncol; + int nn4 = 4 * (nn / 4); + double *a = A->elem; + double s = 0.0; + + for (i = 0; i < nn4; i += 4) { + s += (a[i] * a[i]) + + (a[i + 1] * a[i + 1]) + + (a[i + 2] * a[i + 2]) + + (a[i + 3] * a[i + 3]); + } + for (; i < nn; ++i) { + s += a[i] * a[i]; + } + + return s; +} + + +/** + * Row sums of a matrix `A`. + * + * @param A matrix of size `n x m`. + * @param sums vector of length `n`. + * + * @returns sums if not NULL, else a new creates vector of length `n`. + */ +mat* rowSums(const mat *A, mat *sums) { + int i, j, block_size, block_size_i, + nrow = A->nrow, ncol = A->ncol; + const double *a; + const double *A_block = A->elem; + const double *A_end = A->elem + nrow * ncol; + double *sum; + + if (!sums) { + sums = zero(nrow, 1); + } + sum = sums->elem; + + if (nrow > CVE_MEM_CHUNK_SIZE) { + block_size = CVE_MEM_CHUNK_SIZE; } else { - error("Unknown norm type."); - } -} - -void matrixprod(const double *A, const int nrowA, const int ncolA, - const double *B, const int nrowB, const int ncolB, - double *C) { - const double one = 1.0; - const double zero = 0.0; - - // DGEMM with parameterization: - // C <- A %*% B - F77_NAME(dgemm)("N", "N", &nrowA, &ncolB, &ncolA, - &one, A, &nrowA, B, &nrowB, - &zero, C, &nrowA); -} - -void crossprod(const double *A, const int nrowA, const int ncolA, - const double *B, const int nrowB, const int ncolB, - double *C) { - const double one = 1.0; - const double zero = 0.0; - - // DGEMM with parameterization: - // C <- t(A) %*% B - F77_NAME(dgemm)("T", "N", &ncolA, &ncolB, &nrowA, - &one, A, &nrowA, B, &nrowB, - &zero, C, &ncolA); -} - -void nullProj(const double *B, const int nrowB, const int ncolB, - double *Q) { - const double minusOne = -1.0; - const double one = 1.0; - int i, nelem = nrowB * nrowB; - - // Initialize as identity matrix. - memset(Q, 0, sizeof(double) * nrowB * nrowB); - for (i = 0; i < nelem; i += nrowB + 1) { - Q[i] = 1.0; + block_size = nrow; } - // DGEMM with parameterization: - // C <- (-1.0 * B %*% t(B)) + C - F77_NAME(dgemm)("N", "T", &nrowB, &nrowB, &ncolB, - &minusOne, B, &nrowB, B, &nrowB, - &one, Q, &nrowB); + // Iterate `(block_size_i, ncol)` submatrix blocks. + for (i = 0; i < nrow; i += block_size_i) { + // Reset `A` to new block beginning. + a = A_block; + // Take block size of eveything left and reduce to max size. + block_size_i = nrow - i; + if (block_size_i > block_size) { + block_size_i = block_size; + } + // Compute first blocks column, + for (j = 0; j < block_size_i; ++j) { + sum[j] = a[j]; + } + // and sum the following columns to the first one. + for (a += nrow; a < A_end; a += nrow) { + for (j = 0; j < block_size_i; ++j) { + sum[j] += a[j]; + } + } + // Step one block forth. + A_block += block_size_i; + sum += block_size_i; + } + + return sums; } -// In place scaling of elements of A. -void scale(const double s, double *A, const int nelem) { - int i, nelemb = (nelem / 4) * 4; +/** + * Column sums of a matrix `A`. + * + * @param A matrix of size `n x m`. + * @param sums (out) vector of length `m`. + * + * @returns sums if not NULL, else a new creates vector of length `m`. + * + * @details As a vector this the result `sums` must have matrix dim. `m x 1`. + * Meaning that the result is a row vector. + */ +mat* colSums(const mat *A, mat *sums) { + int i, j, nrow = A->nrow, ncol = A->ncol; + int nrowb = 4 * (nrow / 4); // 4 * floor(nrow / 4) + double *a = A->elem; + double sum; - if (s == 1.) { - return; + if (!sums) { + sums = matrix(ncol, 1); } - for (i = 0; i < nelemb; i += 4) { - A[i] *= s; - A[i + 1] *= s; - A[i + 2] *= s; - A[i + 3] *= s; - } - for (; i < nelem; ++i) { - A[i] *= s; + for (j = 0; j < ncol; ++j) { + sum = 0.0; + for (i = 0; i < nrowb; i += 4) { + sum += a[i] + + a[i + 1] + + a[i + 2] + + a[i + 3]; + } + for (; i < nrow; ++i) { + sum += a[i]; + } + sums->elem[j] = sum; + a += nrow; } + + return sums; } -// A dence skew-symmetric rank 2 update. -// Perform the update -// C := alpha (A * B^T - B * A^T) + beta C -void skew(const int nrow, const int ncol, - double alpha, const double *A, const double *B, - double beta, - double *C) { +/** + * Sum of squared row differences. + * + * d_k = \sum_{l} (X_{i l} - X_{j l})^2 + * + * For a input matrix `X` of dimension `n x p` the result is a vector of length + * `n * (n - 1) / 2` (aka number of all pairs of `n` elements). The relation + * between `i, j` and `k` is: + * + * k = j n + i - j (j + 1) / 2, for 0 <= j < i < n + * + * @param X input matrix of dim. `n x p`. + * @param lvecD (out) output vector of length `n * (n - 1) / 2`. + * + * @returns lvecD if not NULL, otherwise a new created vector. + * + * @details this computes the packed vectorized lower triangular part of the + * distance matrix of `X`, aka lvec(D). + */ +mat* rowDiffSquareSums(const mat *X, mat *lvecD) { + int i, j, k, l, nrow = X->nrow, ncol = X->ncol; + const double *x; + double *d, diff; + + if (!lvecD) { + lvecD = zero(nrow * (nrow - 1) / 2, 1); + } else if (nrow * (nrow - 1) == 2 * lvecD->nrow && lvecD->ncol == 1) { + memset(lvecD->elem, 0, (nrow * (nrow - 1) / 2) * sizeof(double)); + } else { + // TODO: error handling! + } + + d = lvecD->elem; + for (l = 0; l < ncol; ++l) { + x = X->elem + l * nrow; + for (j = k = 0; j < nrow; ++j) { + for (i = j + 1; i < nrow; ++i, ++k) { + diff = x[i] - x[j]; + d[k] += diff * diff; + } + } + } + + return lvecD; +} + + +#define COLAPPLY_ALG(op) \ + for (i = 0; i < nrow; i += block_size) { \ + /* Set a, c to block's left upper corner and b to block height */ \ + a = A->elem + i; \ + b = B->elem + i; \ + c = C->elem + i; \ + /* Get block size as everyting left, then restrict size. */ \ + block_size = nrow - i; \ + if (block_size > max_size) { \ + block_size = max_size; \ + } \ + /* remove reminders by 4 from block_size */ \ + block_batch_size = 4 * (block_size / 4); \ + /* Stay on i-index "height" and move "right" in j-direction */ \ + for (j = 0; j < ncol; ++j, a += nrow, c += nrow) { \ + /* Iterate over the block column */ \ + for (k = 0; k < block_batch_size; k += 4) { \ + c[k] = a[k] op b[k]; \ + c[k + 1] = a[k + 1] op b[k + 1]; \ + c[k + 2] = a[k + 2] op b[k + 2]; \ + c[k + 3] = a[k + 3] op b[k + 3]; \ + } \ + for (; k < block_size; ++k) { \ + c[k] = a[k] op b[k]; \ + } \ + } \ + } + +/* C[, j] = A[, j] op v for each j = 1 to ncol with op as one of +, -, *, / */ + +/** + * Elment-wise operation for each row to vector. + * C <- A op B + * using recycling (column-major order) with `op` ether '+', '-', '*' or '/'. + * + * @param A matrix of size `n x m`. + * @param op type of operation, ether '+', '-', '*' or '/'. + * @param B vector or length `m` to be sweeped over rows. + * @param C (in/out) result after row sweep. + * + * @returns Passed (overwritten) `C` or new created if `C` is NULL. + */ +mat* colApply(mat *A, const char op, mat *B, mat *C) { + int i, j, k, nrow = A->nrow, ncol = A->ncol; + int max_size = CVE_MEM_CHUNK_SMALL < nrow ? CVE_MEM_CHUNK_SMALL : nrow; + int block_size, block_batch_size; + const double * restrict a, * restrict b; + double * restrict c; + + /* Check if B length is the same as count A's rows. */ + if (nrow != B->nrow) { + // TODO: error handling! + } + /* Check for existance or shape of C */ + if (!C) { + C = matrix(nrow, ncol); + } else if (C->nrow != nrow || C->ncol != ncol) { + // TODO: error handling! + } + + switch (op) { + case '+': + COLAPPLY_ALG(+) + break; + case '-': + COLAPPLY_ALG(-) + break; + case '*': + COLAPPLY_ALG(*) + break; + case '/': + // TODO: check division by zero + COLAPPLY_ALG(/) + break; + default: + // TODO: error handling: + break; + } + + return C; +} +#undef COLAPPLY_ALG + +/** + * Element-wise applies `op` with scalar. + * + * B <- A + scalar, for op = '+', + * B <- A - scalar, for op = '-', + * B <- A * scalar, for op = '*', + * B <- A / scalar, for op = '/'. + * + * @param A matrix. + * @param op one of '+', '-', '*' or '/'. + * @param scalar scalar as right hand side of operation. + * @param B matrix of the same size as `A`. May even `A`. + */ +mat* elemApply(mat *A, const char op, const double scalar, mat *B) { + int i, n = A->nrow * A->ncol; + double *a, *b; + + if (!B) { + B = matrix(A->nrow, A->ncol); + } + + if (n != B->nrow * B->ncol || (op == '/' && scalar == 0.0)) { + // TODO: error handling! + } + + a = A->elem; + b = B->elem; + switch (op) { + case '+': + for (i = 0; i < n; ++i) { + b[i] = a[i] + scalar; + } + break; + case '-': + for (i = 0; i < n; ++i) { + b[i] = a[i] - scalar; + } + break; + case '*': + for (i = 0; i < n; ++i) { + b[i] = a[i] * scalar; + } + break; + case '/': + for (i = 0; i < n; ++i) { + b[i] = a[i] / scalar; + } + break; + default: + break; + } + + return B; +} + +/** + * Linear combination of A with B, + * B <- alpha A + beta B + * + * @param alpha scaling factor for `A`. + * @param A (in) first matrix. + * @param beta scaling factor for `B`. + * @param B (in/out) second matrix of the same length as `A`. + * + * @returns passed vector `B`. + * + * @details It is NOT allowed to pass NULL. + * Also the number of elements must match, but it is legal to pass matrices of + * different shape, the result may be unwanted but leagel. + */ +mat* lincomb(const double alpha, const mat *A, const double beta, mat *B) { + int i, nn = A->nrow * A->ncol; + int nnb = 4 * (nn / 4); + double *restrict a = A->elem, *restrict b = B->elem; + + if (nn != B->nrow * B->ncol) { + // TODO: error handling! + } + + for (i = 0; i < nnb; i += 4) { + b[i] = alpha * a[i] + beta * b[i]; + b[i + 1] = alpha * a[i + 1] + beta * b[i + 1]; + b[i + 2] = alpha * a[i + 2] + beta * b[i + 2]; + b[i + 3] = alpha * a[i + 3] + beta * b[i + 3]; + } + for (; i < nn; ++i) { + b[i] = alpha * a[i] + beta * b[i]; + } + + return B; +} + +/** + * Matrix matrix product. + * C <- alpha * A %*% B + beta * C + * + * @param alpha scaling factor for product of `A` with `B`. + * @param A (in) matrix of dimension `n x k`. + * @param B (in) matrix of dimension `k x m`. + * @param beta scaling factor for original values in target matrix. + * @param C (in/out) matrix of dimension `n x m` or NULL. + * + * @details if `C` is NULL, a new matrix of dimension `n x m` is created `beta` + * is set to 0. + * + * @returns given `C` or a new created matrix if `C` was NULL or NULL on error. + */ +mat* matrixprod(const double alpha, const mat *A, const mat *B, + double beta, mat* C) { + /* Check dimensions. */ + if (A->ncol != B->nrow) { + // TODO: error handling! + } + /* Check if `C` is given. */ + if (!C) { + /* with `beta` zero, DGEMM ensures to overwrite `C`. */ + beta = 0.0; + /* Create uninitialized matrix `C`. */ + C = matrix(A->nrow, B->ncol); + } else { + /* Check target dimensions. */ + if (C->nrow != A->nrow || C->ncol != B->ncol) { + // TODO: error handling! + } + } + + /* DGEMM ... Dence GEneralized Matrix Matrix level 3 BLAS routine */ + F77_NAME(dgemm)("N", "N", &(A->nrow), &(B->ncol), &(A->ncol), + &alpha, A->elem, &(A->nrow), B->elem, &(B->nrow), + &beta, C->elem, &(C->nrow)); + + return C; +} + + + +/** + * Cross-product. + * C <- alpha * t(A) %*% B + beta * C + * C <- alpha * crossprod(A, B) + beta * C + * + * @param alpha scaling factor for product of `A` with `B`. + * @param A (in) matrix of dimension `k x n`. + * @param B (in) matrix of dimension `k x m`. + * @param beta scaling factor for original values in target matrix. + * @param C (in/out) matrix of dimension `n x m` or NULL. + * + * @details if `C` is NULL, a new matrix of dimension `n x m` is created `beta` + * is set to 0. + * + * @returns given `C` or a new created matrix if `C` was NULL or NULL on error. + */ +mat* crossprod(const double alpha, const mat *A, const mat *B, + double beta, mat* C) { + /* Check dimensions. */ + if (A->nrow != B->nrow) { + // TODO: error handling! + } + /* Check if `C` is given. */ + if (!C) { + /* with `beta` zero, DGEMM ensures to overwrite `C`. */ + beta = 0.0; + /* Create uninitialized matrix `C`. */ + C = matrix(A->ncol, B->ncol); + } else { + /* Check target dimensions. */ + if (C->nrow != A->ncol || C->ncol != B->ncol) { + // TODO: error handling! + } + } + + /* DGEMM ... Dence GEneralized Matrix Matrix level 3 BLAS routine */ + F77_NAME(dgemm)("T", "N", &(A->ncol), &(B->ncol), &(A->nrow), + &alpha, A->elem, &(A->nrow), B->elem, &(B->nrow), + &beta, C->elem, &(C->nrow)); + + return C; +} + +/** + * Hadamard product (Schur product or element-wise product). + * + * C <- alpha * A * B + beta * C + * + * @param alpha scaling factor for `A o B`. + * @param A matrix. + * @param B matrix of same dimensions as `A`. + * @param beta scaling factor for `C`. + * @param C (in/out) matrix of same dimensions as `A` or NULL. + * + * @returns Passed C, or if C is NULL a new created matrix. + */ +mat* hadamard(const double alpha, const mat* A, const mat* B, + double beta, mat* C) { + int i, nn = A->nrow * A->ncol; + double *a, *b, *c; + + if (A->nrow != B->nrow || A->ncol != B->ncol) { + // TODO: error handling! + } + + if (!C) { + beta = 0.0; + C = zero(A->nrow, A->ncol); + } else if (A->nrow != C->nrow || A->ncol != C->ncol) { + // TOD0: error handling! + } + + a = A->elem; + b = B->elem; + c = C->elem; + if (alpha == 1.0 && beta == 0.0) { + for (i = 0; i < nn; ++i) { + c[i] = a[i] * b[i]; + } + } else { + for (i = 0; i < nn; ++i) { + c[i] = alpha * a[i] * b[i] + beta * c[i]; + } + } + + return C; +} + +/** + * Skew-Symmetric rank-2 matrixprod: + * + * C <- alpha * (A %*% t(B) - B %*% t(A)) + beta * C + * + * @param alpha first scaling factor. + * @param A (in) matrix of dimension `n x k`. + * @param B (in) matrix of dimension `n x k`. + * @param beta scaling factor for original values in target matrix. + * @param C (in/out) matrix of dimension `n x n` or NULL. + * + * @details if `C` is NULL, a new matrix of dimension `n x n` is created, `beta` + * is set to 0. + * + * @returns given `C` or a new created matrix if `C` was NULL or NULL on error. + */ +mat* skew(double alpha, mat *A, mat *B, double beta, mat *C) { + + if (A->nrow != B->nrow || A->ncol != B->ncol) { + // TODO: error handling! + } + + if (!C) { + beta = 0.0; + C = matrix(A->nrow, A->nrow); + } else if (C->nrow != A->nrow || C->ncol != A->nrow) { + // TODO: error handling! + } + F77_NAME(dgemm)("N", "T", - &nrow, &nrow, &ncol, - &alpha, A, &nrow, B, &nrow, - &beta, C, &nrow); + &(C->nrow), &(C->ncol), &(A->ncol), + &alpha, A->elem, &(A->nrow), B->elem, &(B->nrow), + &beta, C->elem, &(C->nrow)); alpha *= -1.0; beta = 1.0; F77_NAME(dgemm)("N", "T", - &nrow, &nrow, &ncol, - &alpha, B, &nrow, A, &nrow, - &beta, C, &nrow); + &(C->nrow), &(C->ncol), &(B->ncol), + &alpha, B->elem, &(B->nrow), A->elem, &(A->nrow), + &beta, C->elem, &(C->nrow)); + + return C; } +/** + * Integer square root. + * isqrt(n) = floor(sqrt(n)) + * which is the biggest positive integer such that its square is smaller or + * equal to `n`. + * + * @param n positive integer. + * + * @returns integer square root of `n`. + */ +static int isqrt(const int n) { + int x = 1, y; -/** Cayley transformation of matrix `B` using the Skew-Symmetri matrix `A`. - * X = (I + A)^-1 (I - A) B + if (n < 0) { + // TODO: error handling! + } else if (n < 4) { + return 1; + } else if (n < 9) { // Only required for n <= 4. + return 2; + } + + while (1) { + y = (x + n / x) / 2; + if (x == y || x + 1 == y) { + return x; + } + x = y; + } +} + +/** + * Vector to symmetric matrix transform. + * + * Given a vector `A` of length `N = n * (n - 1) / 2` a symmetric matrix `B` + * of dimension `n x n` is created such that (0-indexed) + * + * b_{i j} = b_{j i} = a_k, for k = j * n + i - j * (j + 1) / 2 + * + * with 0 <= j < i < n. The main diagonal elements are set to `diag`. + * + * @example The vector `A = (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)^T` with + * `diag = -1` results in + * + * ( -1 0 1 2 3 ) + * ( 0 -1 4 5 6 ) + * B = ( 1 4 -1 7 8 ) + * ( 2 5 7 -1 9 ) + * ( 3 6 8 9 -1 ) + * + * @param A (in) source vector of length `N = n * (n - 1) / 2`. + * @param B (in/out) destination matrix of dim. `n x n`. + * @param diag value of all diagonal elements. + * + * @returns passed matrix `B` or new created if `B` is NULL. + */ +mat* lvecToSym(const mat* A, const double diag, mat* B) { + int i, j, k, l, n, N = A->nrow; + int max_size, block_size; + double *a, *b, *L, *U; + + if (!B) { + n = (1 + isqrt(1 + 8 * N)) / 2; + if (n * (n - 1) != 2 * N) { + // TODO: error handling! + } + B = matrix(n, n); + } else { + n = B->nrow; + if (n * (n - 1) != 2 * N || n != B->ncol) { + // TODO: error handling! + } + } + + /* Copy consecutive values of `A` to lower triangular part of `B`. */ + a = A->elem; + for (j = k = 0; j < n; ++j) { + b = B->elem + j * n; + b[j] = diag; + for (i = j + 1; i < n; ++i, ++k) { + b[i] = a[k]; + } + } + + /* Mirror along the main diagonal, aka transposed copy of lower to upper. */ + b = B->elem; + max_size = BLOCK_SIZE * (n / BLOCK_SIZE); + /* Iterate over blocked columns */ + for (j = 0; j < n; j += BLOCK_SIZE) { + /* determine height of the block (for reminding partial blocks) */ + block_size = j < max_size ? BLOCK_SIZE : (n % BLOCK_SIZE); + /* Set diagonal block (left upper corner on the main diagonal) */ + L = b + (j * n + j); // diagonal block. + /* Transpose block on main diagonal */ + for (l = 0; l < block_size; ++l) { + for (k = l + 1; k < block_size; ++k) { + L[k * n + l] = L[l * n + k]; + } + } + /* Iterate complete blocks below previous halve block */ + for (i = j + BLOCK_SIZE; i < n; i += BLOCK_SIZE) { + /* determine height of the block (for reminding partial blocks) */ + block_size = i < max_size ? BLOCK_SIZE : (n % BLOCK_SIZE); + /* Set lower (L, below diag) and upper (U, above diag) blocks */ + L = b + j * n + i; + U = b + i * n + j; + /* Transpose lower (L) to upper (U) complete block. */ + for (l = 0; l < BLOCK_SIZE; ++l) { + for (k = 0; k < block_size; ++k) { + U[k * n + l] = L[l * n + k]; + } + } + } + } + + return B; +} + +/** + * In place transformation into symmetric matrix as + * A <- diag(colSums(A + t(A))) - (A + t(A)) + * + * @param A (in/out) square matrix. + * @param workMem (out) double[n] vector as working memory. Will be storing the + * diagonal elements of the result matrix. Must have at least length `n`. + * + * @returns laplace transformed input `A`. + * + * @details The workMem parameter is required as working memory. + */ +mat* laplace(mat *A, double *workMem) { + int i, j, k, l, n = A->nrow; + int max_size = BLOCK_SIZE * (n / BLOCK_SIZE); + int block_size; + double *a = A->elem; + double *L, *U; + + /* Check if A is square */ + if (n != A->ncol) { + // TODO: error handling! + } + + /* Ckeck if working memory is supplied. */ + if (!workMem) { + workMem = (double*)R_alloc(n, sizeof(double)); + } + + /* init working memory */ + memset(workMem, 0, n * sizeof(double)); + + /* Iterate over column slices */ + for (j = 0; j < n; j += BLOCK_SIZE) { + /* determine height of the block (for reminding partial blocks) */ + block_size = j < max_size ? BLOCK_SIZE : (n % BLOCK_SIZE); + /* Set diagonal block (left upper corner on the main diagonal) */ + L = a + (j * n + j); // D block in description. + /* Transpose block on main diagonal */ + for (l = 0; l < block_size; ++l) { + for (k = l + 1; k < block_size; ++k) { + workMem[j + l] += L[l * n + k] + = L[k * n + l] + = -(L[l * n + k] + L[k * n + l]); + } + } + /* Iterate complete blocks below previous halve block */ + for (i = j + BLOCK_SIZE; i < n; i += BLOCK_SIZE) { + /* determine height of the block (for reminding partial blocks) */ + block_size = i < max_size ? BLOCK_SIZE : (n % BLOCK_SIZE); + /* Set lower (L, below diag) and upper (U, above diag) blocks */ + L = a + j * n + i; + U = a + i * n + j; + /* Transpose lower (L) to upper (U) complete block. */ + for (l = 0; l < BLOCK_SIZE; ++l) { + for (k = 0; k < block_size; ++k) { + workMem[j + l] += L[l * n + k] + = U[k * n + l] + = -(L[l * n + k] + U[k * n + l]); + } + } + } + } + /* Sum remaining upper diagonal column sum parts and set diagonal */ + for (j = 0; j < n; ++j) { + for (i = 0; i < j; ++i) { + workMem[j] += a[i]; + } + a[j] = -workMem[j]; + a += n; + } + + return A; +} + +/** Cayley transformation of matrix `B` using a Skew-Symmetric matrix `A`. + * + * C = (I + A)^-1 (I - A) B + * * by solving the following linear equation: - * (I + A) X = (I - A) B ==> X = (I + A)^-1 (I - A) B + * + * (I + A) C = (I - A) B ==> C = (I + A)^-1 (I - A) B * \_____/ \_____/ - * IpA X = ImA B + * IpA C = ImA B * \_______/ - * IpA X = Y ==> X = IpA^-1 Y + * IpA C = Y ==> C = IpA^-1 Y * * @param A Skew-Symmetric matrix of dimension `(n, n)`. * @param B Matrix of dimensions `(n, m)` with `m <= n`. - * @return Transformed matrix `X`. + * @param C Matrix of dimensions `(n, m)` with `m <= n`. + * @param workMem working memory array of length at least `2 p^2 + p`. + * @return Transformed matrix `C`. * @note This opperation is equivalent to the R expression: * solve(diag(1, n) + A) %*% (diag(1, n) - A) %*% B * or * solve(diag(1, n) + A, (diag(1, n) - A) %*% B) */ -void cayleyTransform(const int p, const int q, - const double *A, const double *B, - double *X, double *workMem) { - int i, info, pp = p * p; - double zero = 0., one = 1.; +mat* cayleyTransform(mat *A, mat *B, mat *C, double *workMem) { + int i, info, pp = A->nrow * A->nrow; + double zero = 0.0, one = 1.0; + + // TODO: validate dimensions!!! + // TODO: calrify a bit ^^ + + if (!C) { + C = matrix(B->nrow, B->ncol); + } else if (B->nrow != C->nrow || B->ncol != C->ncol) { + // TODO: error handling! + } /* Allocate row permutation array used by `dgesv` */ int *ipiv = (int*)workMem; /* Create Matrix IpA = I + A (I plus A) */ - double *IpA = (double*)(ipiv + p); - memcpy(IpA, A, pp * sizeof(double)); - for (i = 0; i < pp; i += p + 1) { + double *IpA = (double*)(ipiv + A->nrow); + memcpy(IpA, A->elem, A->nrow * A->ncol * sizeof(double)); + for (i = 0; i < pp; i += A->nrow + 1) { IpA[i] += 1.; // +1 to diagonal elements. } /* Create Matrix ImA = I - A (I minus A) */ double *ImA = IpA + pp; + double *a = A->elem; for (i = 0; i < pp; ++i) { - ImA[i] = -A[i]; + ImA[i] = -a[i]; } - for (i = 0; i < pp; i += p + 1) { - ImA[i] += 1.; // +1 to diagonal elements. + for (i = 0; i < pp; i += A->nrow + 1) { + ImA[i] += 1.0; // +1 to diagonal elements. } /* Y as matrix-matrix product of ImA and B: * Y = 1 * ImA * B + 0 * Y */ - F77_CALL(dgemm)("N", "N", &p, &q, &p, - &one, ImA, &p, B, &p, &zero, X, &p); + F77_CALL(dgemm)("N", "N", &(A->nrow), &(B->ncol), &(A->nrow), + &one, ImA, &(A->nrow), B->elem, &(B->nrow), + &zero, C->elem, &(C->nrow)); - /* Solve system IpA Y = X for Y (and store result in X). - * aka. X = IpA^-1 X */ - F77_CALL(dgesv)(&p, &q, IpA, &p, ipiv, X, &p, &info); + /* Solve system IpA Y = C for Y (and store result in C). + * aka. C = IpA^-1 X */ + F77_CALL(dgesv)(&(A->nrow), &(B->ncol), IpA, &(A->nrow), + ipiv, C->elem, &(A->nrow), &info); if (info) { error("[ cayleyTransform ] error in dgesv - info %d", info); } + + return C; } diff --git a/CVE_C/src/rStiefel.c b/CVE_C/src/rStiefel.c index 5d84380..6ba7f2f 100644 --- a/CVE_C/src/rStiefel.c +++ b/CVE_C/src/rStiefel.c @@ -1,66 +1,31 @@ #include "cve.h" -// /** -// * Performas a QR factorization and computes the Q factor. -// * -// * @param A matrix. -// * @returns The Q factor of the QR factorization `A = QR`. -// */ -// SEXP qrQ(SEXP Ain) { -// int i, j, info; - -// if (!isMatrix(Ain)) { -// error("Argument must be a matrix."); -// } -// int nrow = nrows(Ain); -// int ncol = ncols(Ain); - -// double *A = (double*)R_alloc(nrow * ncol, sizeof(double)); -// memcpy(A, REAL(Ain), nrow * ncol * sizeof(double)); - -// // double *A = REAL(Ain); -// // Scalar factors of elementary reflectors. -// double *tau = (double*)R_alloc(ncol, sizeof(double)); - -// // Create Working memory area. -// int lenWork = 3 * nrow; -// double *memWork = (double*)R_alloc(lenWork, sizeof(double)); - -// F77_NAME(dgeqrf)(&nrow, &ncol, A, &nrow, tau, -// memWork, &lenWork, &info); - -// SEXP Qout = PROTECT(allocMatrix(REALSXP, nrow, ncol)); -// double *Q = REAL(Qout); - -// for (j = 0; j < ncol; ++j) { -// for (i = 0; i < nrow; ++i) { -// if (i == j) { -// Q[i + nrow * j] = 1.; -// } else { -// Q[i + nrow * j] = 0.; -// } -// } -// } - -// F77_NAME(dormqr)("L", "N", &nrow, &ncol, &ncol, A, &nrow, tau, Q, &nrow, -// memWork, &lenWork, &info); - -// UNPROTECT(1); -// return Qout; -// } - /** - * Draws a sample from invariant measure on the Stiefel manifold \eqn{S(p, q)}. + * Draws a sample from invariant measure on the Stiefel manifold \eqn{S(p, q)}. * - * @param p row dimension - * @param q col dimension - * @return \code{p} times \code{q} semi-orthogonal matrix. - * `V <- qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q)))` + * @param p row dimension + * @param q column dimension + * @param V (in/out) matrix of dimensions `p x q` or NULL. + * @param workMem work space array of length greater-equal than `2pq + q`. + * + * @return Passed matrix `V` or new created if `V` is NULL. + * + * @example Performs the same operation as the following `R` code: + * V <- qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q))) + * + * @details ATTENTION: The length of workMem must be at least `2pq + q`. */ -void rStiefel(const int p, const int q, double *V, - double *workMem, int workLen) { - int i, j, info; +mat* rStiefel(const int p, const int q, mat *V, double *workMem) { + int i, j, info, workLen = 2 * p * q + q; int pq = p * q; + double *v; + + if (!V) { + V = matrix(p, q); + } else if (V->nrow != p || V->ncol != q) { + // TODO: error handling! + } + v = V->elem; GetRNGstate(); for (i = 0; i < pq; ++i) { @@ -76,14 +41,12 @@ void rStiefel(const int p, const int q, double *V, for (j = 0; j < q; ++j) { for (i = 0; i < p; ++i) { - if (i == j) { - V[i + p * j] = 1.; - } else { - V[i + p * j] = 0.; - } + v[p * j + i] = i == j ? 1.0 : 0.0; } } - F77_NAME(dormqr)("L", "N", &p, &q, &q, workMem, &p, tau, V, &p, + F77_NAME(dormqr)("L", "N", &p, &q, &q, workMem, &p, tau, V->elem, &p, workMem + pq + q, &workLen, &info); + + return V; } diff --git a/CVE_C/src/rowColOp.c b/CVE_C/src/rowColOp.c deleted file mode 100644 index 2d9e1c2..0000000 --- a/CVE_C/src/rowColOp.c +++ /dev/null @@ -1,160 +0,0 @@ -#include "cve.h" - -/** - * Computes the row sums of a matrix `A`. - * @param A Pointer to col-major matrix elements, size is `nrow * ncol`. - * @param nrow Number of rows of `A`. - * @param ncol Number of columns of `A`. - * @param sum Pointer to output row sums of size `nrow`. - */ -void rowSums(const double *A, const int nrow, const int ncol, - double *sum) { - int i, j, block_size, block_size_i; - const double *A_block = A; - const double *A_end = A + nrow * ncol; - - if (nrow > CVE_MEM_CHUNK_SIZE) { - block_size = CVE_MEM_CHUNK_SIZE; - } else { - block_size = nrow; - } - - // Iterate `(block_size_i, ncol)` submatrix blocks. - for (i = 0; i < nrow; i += block_size_i) { - // Reset `A` to new block beginning. - A = A_block; - // Take block size of eveything left and reduce to max size. - block_size_i = nrow - i; - if (block_size_i > block_size) { - block_size_i = block_size; - } - // Compute first blocks column, - for (j = 0; j < block_size_i; ++j) { - sum[j] = A[j]; - } - // and sum the following columns to the first one. - for (A += nrow; A < A_end; A += nrow) { - for (j = 0; j < block_size_i; ++j) { - sum[j] += A[j]; - } - } - // Step one block forth. - A_block += block_size_i; - sum += block_size_i; - } -} - -void colSums(const double *A, const int nrow, const int ncol, - double *colSums) { - int i, j; - int nrowb = 4 * (nrow / 4); // 4 * floor(nrow / 4) - double colSum; - - for (j = 0; j < ncol; ++j) { - colSum = 0.0; - for (i = 0; i < nrowb; i += 4) { - colSum += A[i] - + A[i + 1] - + A[i + 2] - + A[i + 3]; - } - for (; i < nrow; ++i) { - colSum += A[i]; - } - *(colSums++) = colSum; - A += nrow; - } -} - -void rowSquareSums(const double *A, - const int nrow, const int ncol, - double *sum) { - int i, j, block_size, block_size_i; - const double *A_block = A; - const double *A_end = A + nrow * ncol; - - if (nrow > CVE_MEM_CHUNK_SIZE) { - block_size = CVE_MEM_CHUNK_SIZE; - } else { - block_size = nrow; - } - - // Iterate `(block_size_i, ncol)` submatrix blocks. - for (i = 0; i < nrow; i += block_size_i) { - // Reset `A` to new block beginning. - A = A_block; - // Take block size of eveything left and reduce to max size. - block_size_i = nrow - i; - if (block_size_i > block_size) { - block_size_i = block_size; - } - // Compute first blocks column, - for (j = 0; j < block_size_i; ++j) { - sum[j] = A[j] * A[j]; - } - // and sum the following columns to the first one. - for (A += nrow; A < A_end; A += nrow) { - for (j = 0; j < block_size_i; ++j) { - sum[j] += A[j] * A[j]; - } - } - // Step one block forth. - A_block += block_size_i; - sum += block_size_i; - } -} - -void rowSumsSymVec(const double *Avec, const int nrow, - const double diag, - double *sum) { - int i, j; - - if (diag == 0.0) { - memset(sum, 0, nrow * sizeof(double)); - } else { - for (i = 0; i < nrow; ++i) { - sum[i] = diag; - } - } - - for (j = 0; j < nrow; ++j) { - for (i = j + 1; i < nrow; ++i, ++Avec) { - sum[j] += *Avec; - sum[i] += *Avec; - } - } -} - -void rowDiffs(const double* X, const int nrow, const int ncol, - double *diffs) { - int i, j, k, l; - const double *Xcol; - - for (k = l = 0; l < ncol; ++l) { - Xcol = X + l * nrow; - for (i = 0; i < nrow; ++i) { - for (j = i + 1; j < nrow; ++j) { - diffs[k++] = Xcol[i] - Xcol[j]; - } - } - } -} - -void rowDiffSquareSums(const double* X, const int nrow, const int ncol, - double *sum) { - int i, j, k, l; - const double *Xcol; - double tmp; - - memset(sum, 0, ((nrow * (nrow - 1)) / 2) * sizeof(double)); - - for (l = 0; l < ncol; ++l) { - Xcol = X + l * nrow; - for (k = i = 0; i < nrow; ++i) { - for (j = i + 1; j < nrow; ++j, ++k) { - tmp = Xcol[i] - Xcol[j]; - sum[k] += tmp * tmp; - } - } - } -} diff --git a/CVE_C/src/sweep.c b/CVE_C/src/sweep.c deleted file mode 100644 index e1a3e23..0000000 --- a/CVE_C/src/sweep.c +++ /dev/null @@ -1,51 +0,0 @@ -#include "cve.h" - -#define ROW_SWEEP_ALG(op) \ - /* Iterate `(block_size_i, ncol)` submatrix blocks. */ \ - for (i = 0; i < nrow; i += block_size_i) { \ - /* Set `A` and `C` to block beginning. */ \ - A = A_block; \ - C = C_block; \ - /* Get current block's row size. */ \ - block_size_i = nrow - i; \ - if (block_size_i > block_size) { \ - block_size_i = block_size; \ - } \ - /* Perform element wise operation for block. */ \ - for (; A < A_end; A += nrow, C += nrow) { \ - for (j = 0; j < block_size_i; ++j) { \ - C[j] = (A[j]) op (v[j]); \ - } \ - } \ - /* Step one block forth. */ \ - A_block += block_size_i; \ - C_block += block_size_i; \ - v += block_size_i; \ - } - -/* C[, j] = A[, j] op v for each j = 1 to ncol with op as one of +, -, *, / */ -void rowSweep(const double *A, const int nrow, const int ncol, - const char* op, - const double *v, // vector of length nrow - double *C) { - int i, j, block_size, block_size_i; - const double *A_block = A; - double *C_block = C; - const double *A_end = A + nrow * ncol; - - if (nrow > CVE_MEM_CHUNK_SMALL) { // small because 3 vectors in cache - block_size = CVE_MEM_CHUNK_SMALL; - } else { - block_size = nrow; - } - - if (*op == '+') { - ROW_SWEEP_ALG(+) - } else if (*op == '-') { - ROW_SWEEP_ALG(-) - } else if (*op == '*') { - ROW_SWEEP_ALG(*) - } else if (*op == '/') { - ROW_SWEEP_ALG(/) - } -} diff --git a/README.md b/README.md index 7469d4b..3a7f202 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,8 @@ Performance: - [NOT Feasible] Stochastic Version - [NOT Feasible] Gradient Approximations (using Algebraic Software for alternative Loss function formulations and gradient optimizations) - [NOT Sufficient] Alternative Kernels for reducing samples -- [ ] (To Be further investigated) "Kronecker" optimization +- [x] (To Be further investigated) "Kronecker" optimization +- [ ] Implement "Kronecker" optimization Features (functions): - [x] Initial `V.init` parameter (only ONE try, ignore number of `attempts` parameter) @@ -48,6 +49,7 @@ Changes: Errors: - [x] `CVE_C` compare to `CVE_legacy`. - [x] fix: `predict.dim` not found. +- [ ] fix/check: error computation for break condition (in `cve.c`) # Development ## Build and install. diff --git a/runtime_test.R b/runtime_test.R index aab3e9c..9589537 100644 --- a/runtime_test.R +++ b/runtime_test.R @@ -23,11 +23,11 @@ subspace.dist <- function(B1, B2){ set.seed(437) # Number of simulations -SIM.NR <- 50 +SIM.NR <- 50L # maximal number of iterations in curvilinear search algorithm -MAXIT <- 50 +MAXIT <- 50L # number of arbitrary starting values for curvilinear optimization -ATTEMPTS <- 10 +ATTEMPTS <- 10L # set names of datasets dataset.names <- c("M1", "M2", "M3", "M4", "M5") # Set used CVE method diff --git a/test.R b/test.R index d510e4a..bd714f6 100644 --- a/test.R +++ b/test.R @@ -17,10 +17,10 @@ library(CVE) path <- paste0('~/Projects/CVE/tmp/logger_', method, '_', momentum, '.C.pdf') # Define logger for `cve()` method. -logger <- function(iter, attempt, data) { +logger <- function(attempt, iter, data) { # Note the `<<-` assignement! loss.history[iter + 1, attempt] <<- data$loss - error.history[iter + 1, attempt] <<- if (data$err > 0) data$err else NA + error.history[iter + 1, attempt] <<- data$err tau.history[iter + 1, attempt] <<- data$tau # Compute true error by comparing to the true `B` B.est <- null(data$V) # Function provided by CVE