diff --git a/CVE_C/DESCRIPTION b/CVE_C/DESCRIPTION new file mode 100644 index 0000000..988b3d8 --- /dev/null +++ b/CVE_C/DESCRIPTION @@ -0,0 +1,11 @@ +Package: CVE +Type: Package +Title: Conditional Variance Estimator for Sufficient Dimension Reduction +Version: 0.1 +Date: 2019-08-29 +Author: Loki +Maintainer: Loki +Description: Implementation of the Conditional Variance Estimation (CVE) method. This package version is writen in pure R. +License: GPL-3 +Encoding: UTF-8 +RoxygenNote: 6.1.1 diff --git a/CVE_C/NAMESPACE b/CVE_C/NAMESPACE new file mode 100644 index 0000000..65deac3 --- /dev/null +++ b/CVE_C/NAMESPACE @@ -0,0 +1,24 @@ +# Generated by roxygen2: do not edit by hand + +S3method(plot,cve) +S3method(summary,cve) +export(cve) +export(cve.call) +export(cve.grid.search) +export(cve_linesearch) +export(cve_sgd) +export(cve_simple) +export(dataset) +export(elem.pairs) +export(estimate.bandwidth) +export(grad) +export(null) +export(rStiefl) +import(stats) +importFrom(graphics,lines) +importFrom(graphics,plot) +importFrom(graphics,points) +importFrom(stats,model.frame) +importFrom(stats,rbinom) +importFrom(stats,rnorm) +useDynLib(CVE, .registration = TRUE) diff --git a/CVE_C/R/CVE.R b/CVE_C/R/CVE.R new file mode 100644 index 0000000..f32f9a6 --- /dev/null +++ b/CVE_C/R/CVE.R @@ -0,0 +1,223 @@ +#' Conditional Variance Estimator (CVE) +#' +#' Conditional Variance Estimator for Sufficient Dimension +#' Reduction +#' +#' TODO: And some details +#' +#' +#' @references Fertl Likas, Bura Efstathia. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019 +#' +#' @docType package +#' @author Loki +#' @useDynLib CVE, .registration = TRUE +"_PACKAGE" + +#' Implementation of the CVE method. +#' +#' Conditional Variance Estimator (CVE) is a novel sufficient dimension +#' reduction (SDR) method assuming a model +#' \deqn{Y \sim g(B'X) + \epsilon}{Y ~ g(B'X) + epsilon} +#' where B'X is a lower dimensional projection of the predictors. +#' +#' @param formula Formel for the regression model defining `X`, `Y`. +#' See: \code{\link{formula}}. +#' @param data data.frame holding data for formula. +#' @param method The different only differe in the used optimization. +#' All of them are Gradient based optimization on a Stiefel manifold. +#' \itemize{ +#' \item "simple" Simple reduction of stepsize. +#' \item "sgd" stocastic gradient decent. +#' \item TODO: further +#' } +#' @param ... Further parameters depending on the used method. +#' @examples +#' library(CVE) +#' +#' # sample dataset +#' ds <- dataset("M5") +#' +#' # call ´cve´ with default method (aka "simple") +#' dr.simple <- cve(ds$Y ~ ds$X, k = ncol(ds$B)) +#' # plot optimization history (loss via iteration) +#' plot(dr.simple, main = "CVE M5 simple") +#' +#' # call ´cve´ with method "linesearch" using ´data.frame´ as data. +#' data <- data.frame(Y = ds$Y, X = ds$X) +#' # Note: ´Y, X´ are NOT defined, they are extracted from ´data´. +#' dr.linesearch <- cve(Y ~ ., data, method = "linesearch", k = ncol(ds$B)) +#' plot(dr.linesearch, main = "CVE M5 linesearch") +#' +#' @references Fertl L., Bura E. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019 +#' +#' @seealso \code{\link{formula}}. For a complete parameters list (dependent on +#' the method) see \code{\link{cve_simple}}, \code{\link{cve_sgd}} +#' @import stats +#' @importFrom stats model.frame +#' @export +cve <- function(formula, data, method = "simple", max.dim = 10, ...) { + # check for type of `data` if supplied and set default + if (missing(data)) { + data <- environment(formula) + } else if (!is.data.frame(data)) { + stop('Parameter `data` must be a `data.frame` or missing.') + } + + # extract `X`, `Y` from `formula` with `data` + model <- stats::model.frame(formula, data) + X <- as.matrix(model[,-1, drop = FALSE]) + Y <- as.matrix(model[, 1, drop = FALSE]) + + # pass extracted data on to [cve.call()] + dr <- cve.call(X, Y, method = method, ...) + + # overwrite `call` property from [cve.call()] + dr$call <- match.call() + return(dr) +} + +#' @param nObs as describet in the Paper. +#' @param X Data +#' @param Y Responces +#' @param nObs Like in the paper. +#' @param k guess for SDR dimension. +#' @param ... Method specific parameters. +#' @rdname cve +#' @export +cve.call <- function(X, Y, method = "simple", nObs = nrow(X)^.5, + min.dim = 1, max.dim = 10, k, ...) { + + # parameter checking + if (!is.matrix(X)) { + stop('X should be a matrices.') + } + if (is.matrix(Y)) { + Y <- as.vector(Y) + } + if (nrow(X) != length(Y)) { + stop('Rows of X and number of Y elements are not compatible.') + } + if (ncol(X) < 2) { + stop('X is one dimensional, no need for dimension reduction.') + } + + if (!missing(k)) { + min.dim <- as.integer(k) + max.dim <- as.integer(k) + } else { + min.dim <- as.integer(min.dim) + max.dim <- as.integer(min(max.dim, ncol(X) - 1L)) + } + if (min.dim > max.dim) { + stop('`min.dim` bigger `max.dim`.') + } + if (max.dim >= ncol(X)) { + stop('`max.dim` must be smaller than `ncol(X)`.') + } + + # Call specified method. + method <- tolower(method) + call <- match.call() + dr <- list() + for (k in min.dim:max.dim) { + if (method == 'simple') { + dr.k <- cve_simple(X, Y, k, nObs = nObs, ...) + } else if (method == 'linesearch') { + dr.k <- cve_linesearch(X, Y, k, nObs = nObs, ...) + } else if (method == 'sgd') { + dr.k <- cve_sgd(X, Y, k, nObs = nObs, ...) + } else { + stop('Got unknown method.') + } + dr.k$k <- k + class(dr.k) <- "cve.k" + dr[[k]] <- dr.k + } + + # augment result information + dr$method <- method + dr$call <- call + class(dr) <- "cve" + return(dr) +} + +# TODO: write summary +# summary.cve <- function() { +# # code # +# } + +#' Ploting helper for objects of class \code{cve}. +#' +#' @param x Object of class \code{cve} (result of [cve()]). +#' @param content Specifies what to plot: +#' \itemize{ +#' \item "history" Plots the loss history from stiefel optimization +#' (default). +#' \item ... TODO: add (if there are any) +#' } +#' @param ... Pass through parameters to [plot()] and [lines()] +#' +#' @usage ## S3 method for class 'cve' +#' plot(x, content = "history", ...) +#' @seealso see \code{\link{par}} for graphical parameters to pass through +#' as well as \code{\link{plot}} for standard plot utility. +#' @importFrom graphics plot lines points +#' @method plot cve +#' @export +plot.cve <- function(x, ...) { + + + # H <- x$history + # H_1 <- H[!is.na(H[, 1]), 1] + + # defaults <- list( + # main = "History", + # xlab = "Iterations i", + # ylab = expression(loss == L[n](V^{(i)})), + # xlim = c(1, nrow(H)), + # ylim = c(0, max(H)), + # type = "l" + # ) + + # call.plot <- match.call() + # keys <- names(defaults) + # keys <- keys[match(keys, names(call.plot)[-1], nomatch = 0) == 0] + + # for (key in keys) { + # call.plot[[key]] <- defaults[[key]] + # } + + # call.plot[[1L]] <- quote(plot) + # call.plot$x <- quote(1:length(H_1)) + # call.plot$y <- quote(H_1) + + # eval(call.plot) + + # if (ncol(H) > 1) { + # for (i in 2:ncol(H)) { + # H_i <- H[H[, i] > 0, i] + # lines(1:length(H_i), H_i) + # } + # } + # x.ends <- apply(H, 2, function(h) { length(h[!is.na(h)]) }) + # y.ends <- apply(H, 2, function(h) { tail(h[!is.na(h)], n=1) }) + # points(x.ends, y.ends) +} + +#' Prints a summary of a \code{cve} result. +#' @param object Instance of 'cve' as return of \code{cve}. +#' @method summary cve +#' @export +summary.cve <- function(object, ...) { + cat('Summary of CVE result - Method: "', object$method, '"\n', + '\n', + 'Dataset size: ', nrow(object$X), '\n', + 'Data Dimension: ', ncol(object$X), '\n', + 'SDR Dimension: ', object$k, '\n', + 'loss: ', object$loss, '\n', + '\n', + 'Called via:\n', + ' ', + sep='') + print(object$call) +} diff --git a/CVE_C/R/cve_linesearch.R b/CVE_C/R/cve_linesearch.R new file mode 100644 index 0000000..d2eb115 --- /dev/null +++ b/CVE_C/R/cve_linesearch.R @@ -0,0 +1,169 @@ +#' Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe +#' conditions. +#' +#' @keywords internal +#' @export +cve_linesearch <- function(X, Y, k, + nObs = sqrt(nrow(X)), + h = NULL, + tau = 1.0, + tol = 1e-3, + rho1 = 0.1, + rho2 = 0.9, + slack = 0, + epochs = 50L, + attempts = 10L, + max.linesearch.iter = 10L, + logger = NULL +) { + # Set `grad` functions environment to enable if to find this environments + # local variabels, needed to enable the manipulation of this local variables + # from within `grad`. + environment(grad) <- environment() + + # Get dimensions. + n <- nrow(X) + p <- ncol(X) + q <- p - k + + # Save initial learning rate `tau`. + tau.init <- tau + # Addapt tolearance for break condition. + tol <- sqrt(2 * q) * tol + + # Estaimate bandwidth if not given. + if (missing(h) | !is.numeric(h)) { + h <- estimate.bandwidth(X, k, nObs) + } + + # Compute persistent data. + # Compute lookup indexes for symmetrie, lower/upper + # triangular parts and vectorization. + pair.index <- elem.pairs(seq(n)) + i <- pair.index[1, ] # `i` indices of `(i, j)` pairs + j <- pair.index[2, ] # `j` indices of `(i, j)` pairs + # Matrix of vectorized indices. (vec(index) -> seq) + index <- matrix(seq(n * n), n, n) + lower <- index[lower.tri(index)] + upper <- t(index)[lower] + + # Create all pairewise differences of rows of `X`. + X_diff <- X[i, , drop = F] - X[j, , drop = F] + # Identity matrix. + I_p <- diag(1, p) + + # Init tracking of current best (according multiple attempts). + V.best <- NULL + loss.best <- Inf + + # Start loop for multiple attempts. + for (attempt in 1:attempts) { + + # Sample a `(p, q)` dimensional matrix from the stiefel manifold as + # optimization start value. + V <- rStiefl(p, q) + + # Initial loss and gradient. + loss <- Inf + G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE) + # Set last loss (aka, loss after applying the step). + loss.last <- loss + + # Call logger with initial values before starting optimization. + if (is.function(logger)) { + epoch <- 0 # Set epoch count to 0 (only relevant for logging). + error <- NA + logger(environment()) + } + + ## Start optimization loop. + for (epoch in 1:epochs) { + + # Cayley transform matrix `A` + A <- (G %*% t(V)) - (V %*% t(G)) + + # Directional derivative of the loss at current position, given + # as `Tr(G^T \cdot A \cdot V)`. + loss.prime <- -0.5 * norm(A, type = 'F')^2 + + # Linesearch + tau.upper <- Inf + tau.lower <- 0 + tau <- tau.init + for (iter in 1:max.linesearch.iter) { + # Apply learning rate `tau`. + A.tau <- (tau / 2) * A + # Parallet transport (on Stiefl manifold) into direction of `G`. + inv <- solve(I_p + A.tau) + V.tau <- inv %*% ((I_p - A.tau) %*% V) + + # Loss at position after a step. + loss <- Inf # aka loss.tau + G.tau <- grad(X, Y, V.tau, h, loss.out = TRUE, persistent = TRUE) + + # Armijo condition. + if (loss > loss.last + (rho1 * tau * loss.prime)) { + tau.upper <- tau + tau <- (tau.lower + tau.upper) / 2 + next() + } + + V.prime.tau <- -0.5 * inv %*% A %*% (V + V.tau) + loss.prime.tau <- sum(G * V.prime.tau) # Tr(grad(tau)^T \cdot Y^'(tau)) + + # Wolfe condition. + if (loss.prime.tau < rho2 * loss.prime) { + tau.lower <- tau + if (tau.upper == Inf) { + tau <- 2 * tau.lower + } else { + tau <- (tau.lower + tau.upper) / 2 + } + } else { + break() + } + } + + # Compute error. + error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F") + + # Check break condition (epoch check to skip ignored gradient calc). + # Note: the devision by `sqrt(2 * k)` is included in `tol`. + if (error < tol | epoch >= epochs) { + # take last step and stop optimization. + V <- V.tau + # Final call to the logger before stopping optimization + if (is.function(logger)) { + G <- G.tau + logger(environment()) + } + break() + } + + # Perform the step and remember previous loss. + V <- V.tau + loss.last <- loss + G <- G.tau + + # Log after taking current step. + if (is.function(logger)) { + logger(environment()) + } + + } + + # Check if current attempt improved previous ones + if (loss < loss.best) { + loss.best <- loss + V.best <- V + } + + } + + return(list( + loss = loss.best, + V = V.best, + B = null(V.best), + h = h + )) +} diff --git a/CVE_C/R/cve_sgd.R b/CVE_C/R/cve_sgd.R new file mode 100644 index 0000000..3fee47f --- /dev/null +++ b/CVE_C/R/cve_sgd.R @@ -0,0 +1,129 @@ +#' Simple implementation of the CVE method. 'Simple' means that this method is +#' a classic GD method unsing no further tricks. +#' +#' @keywords internal +#' @export +cve_sgd <- function(X, Y, k, + nObs = sqrt(nrow(X)), + h = NULL, + tau = 0.01, + tol = 1e-3, + epochs = 50L, + batch.size = 16L, + attempts = 10L, + logger = NULL +) { + # Set `grad` functions environment to enable if to find this environments + # local variabels, needed to enable the manipulation of this local variables + # from within `grad`. + environment(grad) <- environment() + + # Get dimensions. + n <- nrow(X) # Number of samples. + p <- ncol(X) # Data dimensions + q <- p - k # Complement dimension of the SDR space. + + # Save initial learning rate `tau`. + tau.init <- tau + # Addapt tolearance for break condition. + tol <- sqrt(2 * q) * tol + + # Estaimate bandwidth if not given. + if (missing(h) || !is.numeric(h)) { + h <- estimate.bandwidth(X, k, nObs) + } + + # Compute persistent data. + # Compute lookup indexes for symmetrie, lower/upper + # triangular parts and vectorization. + pair.index <- elem.pairs(seq(n)) + i <- pair.index[1, ] # `i` indices of `(i, j)` pairs + j <- pair.index[2, ] # `j` indices of `(i, j)` pairs + # Index of vectorized matrix, for lower and upper triangular part. + lower <- ((i - 1) * n) + j + upper <- ((j - 1) * n) + i + + # Create all pairewise differences of rows of `X`. + X_diff <- X[i, , drop = F] - X[j, , drop = F] + # Identity matrix. + I_p <- diag(1, p) + # Init a list of data indices (shuffled for batching). + indices <- seq(n) + + # Init tracking of current best (according multiple attempts). + V.best <- NULL + loss.best <- Inf + + # Start loop for multiple attempts. + for (attempt in 1:attempts) { + # Reset learning rate `tau`. + tau <- tau.init + + # Sample a `(p, q)` dimensional matrix from the stiefel manifold as + # optimization start value. + V <- rStiefl(p, q) + # Keep track of last `V` for computing error after an epoch. + V.last <- V + + if (is.function(logger)) { + loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE) + error <- NA + epoch <- 0 + logger(environment()) + } + + # Repeat `epochs` times + for (epoch in 1:epochs) { + # Shuffle batches + batch.shuffle <- sample(indices) + + # Make a step for each batch. + for (batch.start in seq(1, n, batch.size)) { + # Select batch data indices. + batch.end <- min(batch.start + batch.size - 1, length(batch.shuffle)) + batch <- batch.shuffle[batch.start:batch.end] + + # Compute batch gradient. + loss <- NULL + G <- grad(X[batch, ], Y[batch], V, h, loss.out = TRUE) + + # Cayley transform matrix. + A <- (G %*% t(V)) - (V %*% t(G)) + + # Apply learning rate `tau`. + A.tau <- tau * A + # Parallet transport (on Stiefl manifold) into direction of `G`. + V <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V) + } + # And the error for the history. + error <- norm(V.last %*% t(V.last) - V %*% t(V), type = "F") + V.last <- V + + if (is.function(logger)) { + # Compute loss at end of epoch for logging. + loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE) + logger(environment()) + } + + # Check break condition. + if (error < tol) { + break() + } + } + # Compute actual loss after finishing for comparing multiple attempts. + loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE) + + # After each attempt, check if last attempt reached a better result. + if (loss < loss.best) { + loss.best <- loss + V.best <- V + } + } + + return(list( + loss = loss.best, + V = V.best, + B = null(V.best), + h = h + )) +} diff --git a/CVE_C/R/cve_simple.R b/CVE_C/R/cve_simple.R new file mode 100644 index 0000000..138751e --- /dev/null +++ b/CVE_C/R/cve_simple.R @@ -0,0 +1,141 @@ +#' Simple implementation of the CVE method. 'Simple' means that this method is +#' a classic GD method unsing no further tricks. +#' +#' @keywords internal +#' @export +cve_simple <- function(X, Y, k, + nObs = sqrt(nrow(X)), + h = NULL, + tau = 1.0, + tol = 1e-3, + slack = 0, + epochs = 50L, + attempts = 10L, + logger = NULL +) { + # Set `grad` functions environment to enable if to find this environments + # local variabels, needed to enable the manipulation of this local variables + # from within `grad`. + environment(grad) <- environment() + + # Get dimensions. + n <- nrow(X) # Number of samples. + p <- ncol(X) # Data dimensions + q <- p - k # Complement dimension of the SDR space. + + # Save initial learning rate `tau`. + tau.init <- tau + # Addapt tolearance for break condition. + tol <- sqrt(2 * q) * tol + + # Estaimate bandwidth if not given. + if (missing(h) || !is.numeric(h)) { + h <- estimate.bandwidth(X, k, nObs) + } + + # Compute persistent data. + # Compute lookup indexes for symmetrie, lower/upper + # triangular parts and vectorization. + pair.index <- elem.pairs(seq(n)) + i <- pair.index[1, ] # `i` indices of `(i, j)` pairs + j <- pair.index[2, ] # `j` indices of `(i, j)` pairs + # Index of vectorized matrix, for lower and upper triangular part. + lower <- ((i - 1) * n) + j + upper <- ((j - 1) * n) + i + + # Create all pairewise differences of rows of `X`. + X_diff <- X[i, , drop = F] - X[j, , drop = F] + # Identity matrix. + I_p <- diag(1, p) + + # Init tracking of current best (according multiple attempts). + V.best <- NULL + loss.best <- Inf + + # Start loop for multiple attempts. + for (attempt in 1:attempts) { + # Reset learning rate `tau`. + tau <- tau.init + + # Sample a `(p, q)` dimensional matrix from the stiefel manifold as + # optimization start value. + V <- rStiefl(p, q) + + # Initial loss and gradient. + loss <- Inf + G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE) + # Set last loss (aka, loss after applying the step). + loss.last <- loss + + # Cayley transform matrix `A` + A <- (G %*% t(V)) - (V %*% t(G)) + + # Call logger with initial values before starting optimization. + if (is.function(logger)) { + epoch <- 0 # Set epoch count to 0 (only relevant for logging). + error <- NA + logger(environment()) + } + + ## Start optimization loop. + for (epoch in 1:epochs) { + # Apply learning rate `tau`. + A.tau <- tau * A + # Parallet transport (on Stiefl manifold) into direction of `G`. + V.tau <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V) + + # Loss at position after a step. + loss <- grad(X, Y, V.tau, h, loss.only = TRUE, persistent = TRUE) + + # Check if step is appropriate, iff not reduce learning rate. + if ((loss - loss.last) > slack * loss.last) { + tau <- tau / 2 + next() # Keep position and try with smaller `tau`. + } + + # Compute error. + error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F") + + # Check break condition (epoch check to skip ignored gradient calc). + # Note: the devision by `sqrt(2 * k)` is included in `tol`. + if (error < tol || epoch >= epochs) { + # take last step and stop optimization. + V <- V.tau + # Call logger last time befor stoping. + if (is.function(logger)) { + logger(environment()) + } + break() + } + + # Perform the step and remember previous loss. + V <- V.tau + loss.last <- loss + + # Call logger after taking a step. + if (is.function(logger)) { + logger(environment()) + } + + # Compute gradient at new position. + G <- grad(X, Y, V, h, persistent = TRUE) + + # Cayley transform matrix `A` + A <- (G %*% t(V)) - (V %*% t(G)) + } + + # Check if current attempt improved previous ones + if (loss < loss.best) { + loss.best <- loss + V.best <- V + } + + } + + return(list( + loss = loss.best, + V = V.best, + B = null(V.best), + h = h + )) +} diff --git a/CVE_C/R/datasets.R b/CVE_C/R/datasets.R new file mode 100644 index 0000000..68af0f4 --- /dev/null +++ b/CVE_C/R/datasets.R @@ -0,0 +1,109 @@ +#' Generates test datasets. +#' +#' Provides sample datasets. There are 5 different datasets named +#' M1, M2, M3, M4 and M5 describet in the paper references below. +#' The general model is given by: +#' \deqn{Y ~ g(B'X) + \epsilon} +#' +#' @param name One of \code{"M1"}, \code{"M2"}, \code{"M3"}, \code{"M4"} or \code{"M5"} +#' @param n nr samples +#' @param p Dim. of random variable \code{X}. +#' @param p.mix Only for \code{"M4"}, see: below. +#' @param lambda Only for \code{"M4"}, see: below. +#' +#' @return List with elements +#' \itemize{ +#' \item{X}{data} +#' \item{Y}{response} +#' \item{B}{Used dim-reduction matrix} +#' \item{name}{Name of the dataset (name parameter)} +#' } +#' +#' @section M1: +#' The data follows \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} for a subspace +#' dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points. +#' The link function \eqn{g} is given as +#' \deqn{g(x) = \frac{x_1}{0.5 + (x_2 + 1.5)^2} + 0.5\epsilon}{g(x) = x_1 / (0.5 + (x_2 + 1.5)^2) + 0.5 epsilon} +#' @section M2: +#' \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} with \eqn{k = 2} with a default of \eqn{n = 200} data points. +#' The link function \eqn{g} is given as +#' \deqn{g(x) = x_1 x_2^2 + 0.5\epsilon}{g(x) = x_1 x_2^2 + 0.5 epsilon} +#' @section M3: +#' TODO: +#' @section M4: +#' TODO: +#' @section M5: +#' TODO: +#' +#' @import stats +#' @importFrom stats rnorm rbinom +#' @export +dataset <- function(name = "M1", n, B, p.mix = 0.3, lambda = 1.0) { + # validate parameters + stopifnot(name %in% c("M1", "M2", "M3", "M4", "M5")) + + # set default values if not supplied + if (missing(n)) { + n <- if (name %in% c("M1", "M2")) 200 else if (name != "M5") 100 else 42 + } + if (missing(B)) { + p <- 12 + if (name == "M1") { + B <- cbind( + c( 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0), + c( 1,-1, 1,-1, 1,-1, 0, 0, 0, 0, 0, 0) + ) / sqrt(6) + } else if (name == "M2") { + B <- cbind( + c(c(1, 0), rep(0, 10)), + c(c(0, 1), rep(0, 10)) + ) + } else { + B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, 6)), 12, 1) + } + } else { + p <- dim(B)[1] + # validate col. nr to match dataset `k = dim(B)[2]` + stopifnot( + name %in% c("M1", "M2") && dim(B)[2] == 2, + name %in% c("M3", "M4", "M5") && dim(B)[2] == 1 + ) + } + + # set link function `g` for model `Y ~ g(B'X) + epsilon` + if (name == "M1") { + g <- function(BX) { BX[1] / (0.5 + (BX[2] + 1.5)^2) } + } else if (name == "M2") { + g <- function(BX) { BX[1] * BX[2]^2 } + } else if (name %in% c("M3", "M4")) { + g <- function(BX) { cos(BX[1]) } + } else { # name == "M5" + g <- function(BX) { 2 * log(abs(BX[1]) + 1) } + } + + # compute X + if (name != "M4") { + # compute root of the covariance matrix according the dataset + if (name %in% c("M1", "M3")) { + # Variance-Covariance structure for `X ~ N_p(0, \Sigma)` with + # `\Sigma_{i, j} = 0.5^{|i - j|}`. + Sigma <- matrix(0.5^abs(kronecker(1:p, 1:p, '-')), p, p) + # decompose Sigma to Sigma.root^T Sigma.root = Sigma for usage in creation of `X` + Sigma.root <- chol(Sigma) + } else { # name %in% c("M2", "M5") + Sigma.root <- diag(rep(1, p)) # d-dim identity + } + # data `X` as multivariate random normal variable with + # variance matrix `Sigma`. + X <- replicate(p, rnorm(n, 0, 1)) %*% Sigma.root + } else { # name == "M4" + X <- t(replicate(100, rep((1 - 2 * rbinom(1, 1, p.mix)) * lambda, p) + rnorm(p, 0, 1))) + } + + # responce `y ~ g(B'X) + epsilon` with `epsilon ~ N(0, 1 / 2)` + Y <- apply(X, 1, function(X_i) { + g(t(B) %*% X_i) + rnorm(1, 0, 0.5) + }) + + return(list(X = X, Y = Y, B = B, name = name)) +} diff --git a/CVE_C/R/estimateBandwidth.R b/CVE_C/R/estimateBandwidth.R new file mode 100644 index 0000000..8e7edc7 --- /dev/null +++ b/CVE_C/R/estimateBandwidth.R @@ -0,0 +1,27 @@ +#' Estimated bandwidth for CVE. +#' +#' Estimates a propper bandwidth \code{h} according +#' \deqn{% +#' h = \chi_{p-q}^{-1}\left(\frac{nObs - 1}{n-1}\right)\frac{2 tr(\Sigma)}{p}}{% +#' h = qchisq( (nObs - 1)/(n - 1), p - q ) 2 tr(Sigma) / p} +#' +#' @param X data matrix of dimension (n x p) with n data points X_i of dimension +#' q. Therefor each row represents a datapoint of dimension p. +#' @param k Guess for rank(B). +#' @param nObs Ether numeric of a function. If specified as numeric value +#' its used in the computation of the bandwidth directly. If its a function +#' `nObs` is evaluated as \code{nObs(nrow(x))}. The default behaviou if not +#' supplied at all is to use \code{nObs <- nrow(x)^0.5}. +#' +#' @seealso [\code{\link{qchisq}}] +#' @export +estimate.bandwidth <- function(X, k, nObs) { + n <- nrow(X) + p <- ncol(X) + + X_centered <- scale(X, center=TRUE, scale=FALSE) + Sigma <- (1 / n) * t(X_centered) %*% X_centered + + quantil <- qchisq((nObs - 1) / (n - 1), k) + return(2 * quantil * sum(diag(Sigma)) / p) +} diff --git a/CVE_C/R/gradient.R b/CVE_C/R/gradient.R new file mode 100644 index 0000000..8baaa88 --- /dev/null +++ b/CVE_C/R/gradient.R @@ -0,0 +1,48 @@ +#' Compute get gradient of `L(V)` given a dataset `X`. +#' +#' @param X Data matrix. +#' @param Y Responce. +#' @param V Position to compute the gradient at, aka point on Stiefl manifold. +#' @param h Bandwidth +#' @param loss.out Iff \code{TRUE} loss will be written to parent environment. +#' @param loss.only Boolean to only compute the loss, of \code{TRUE} a single +#' value loss is returned and \code{envir} is ignored. +#' @param persistent Determines if data indices and dependent calculations shall +#' be reused from the parent environment. ATTENTION: Do NOT set this flag, only +#' intended for internal usage by carefully aligned functions! +#' @keywords internal +#' @export +grad <- function(X, Y, V, h, + loss.out = FALSE, + loss.only = FALSE, + persistent = FALSE) { + # Get number of samples and dimension. + n <- nrow(X) + p <- ncol(X) + + if (!persistent) { + # Compute lookup indexes for symmetrie, lower/upper + # triangular parts and vectorization. + pair.index <- elem.pairs(seq(n)) + i <- pair.index[1, ] # `i` indices of `(i, j)` pairs + j <- pair.index[2, ] # `j` indices of `(i, j)` pairs + # Index of vectorized matrix, for lower and upper triangular part. + lower <- ((i - 1) * n) + j + upper <- ((j - 1) * n) + i + + # Create all pairewise differences of rows of `X`. + X_diff <- X[i, , drop = F] - X[j, , drop = F] + } + + out <- .Call("grad_c", PACKAGE = "CVE", + X, X_diff, as.double(Y), V, as.double(h)); + + if (loss.only) { + return(out$loss) + } + if (loss.out) { + loss <<- out$loss + } + + return(out$G) +} diff --git a/CVE_C/R/gridSearch.R b/CVE_C/R/gridSearch.R new file mode 100644 index 0000000..2b2d5e7 --- /dev/null +++ b/CVE_C/R/gridSearch.R @@ -0,0 +1,43 @@ + +#' Performs a grid search for parameters over a parameter grid. +#' @examples +#' args <- list( +#' h = c(0.05, 0.1, 0.2), +#' method = c("simple", "sgd"), +#' tau = c(0.5, 0.1, 0.01) +#' ) +#' cve.grid.search(args) +#' @export +cve.grid.search <- function(X, Y, k, args) { + + args$stringsAsFactors = FALSE + args$KEEP.OUT.ATTRS = FALSE + grid <- do.call(expand.grid, args) + grid.length <- length(grid[[1]]) + + print(grid) + + for (i in 1:grid.length) { + arguments <- as.list(grid[i, ]) + # Set required arguments + arguments$X <- X + arguments$Y <- Y + arguments$k <- k + # print(arguments) + dr <- do.call(cve.call, arguments) + print(dr$loss) + } +} + +# ds <- dataset() +# X <- ds$X +# Y <- ds$Y +# (k <- ncol(ds$B)) +# args <- list( +# h = c(0.05, 0.1, 0.2), +# method = c("simple", "sgd"), +# tau = c(0.5, 0.1, 0.01), +# attempts = c(1L) +# ) + +# cve.grid.search(X, Y, k, args) \ No newline at end of file diff --git a/CVE_C/R/util.R b/CVE_C/R/util.R new file mode 100644 index 0000000..ca7606a --- /dev/null +++ b/CVE_C/R/util.R @@ -0,0 +1,40 @@ +#' Samples uniform from the Stiefel Manifold +#' +#' @param p row dim. +#' @param q col dim. +#' @return `(p, q)` semi-orthogonal matrix +#' @examples +#' V <- rStiefel(6, 4) +#' @export +rStiefl <- function(p, q) { + return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q)))) +} + +#' Null space basis of given matrix `V` +#' +#' @param V `(p, q)` matrix +#' @return Semi-orthogonal `(p, p - q)` matrix spaning the null space of `V`. +#' @keywords internal +#' @export +null <- function(V) { + tmp <- qr(V) + set <- if(tmp$rank == 0L) seq_len(ncol(V)) else -seq_len(tmp$rank) + return(qr.Q(tmp, complete=TRUE)[, set, drop=FALSE]) +} + +#' Creates a (numeric) matrix where each column contains +#' an element to element matching. +#' @param elements numeric vector of elements to match +#' @return matrix of size `(2, n * (n - 1) / 2)` for a argument of lenght `n`. +#' @keywords internal +#' @examples +#' elem.pairs(seq.int(2, 5)) +#' @export +elem.pairs <- function(elements) { + # Number of elements to match. + n <- length(elements) + # Create all combinations. + pairs <- rbind(rep(elements, each=n), rep(elements, n)) + # Select unique combinations without self interaction. + return(pairs[, pairs[1, ] < pairs[2, ]]) +} diff --git a/CVE_C/inst/doc/CVE_paper.pdf b/CVE_C/inst/doc/CVE_paper.pdf new file mode 100755 index 0000000..e870293 Binary files /dev/null and b/CVE_C/inst/doc/CVE_paper.pdf differ diff --git a/CVE_C/man/CVE-package.Rd b/CVE_C/man/CVE-package.Rd new file mode 100644 index 0000000..8bb0f5d --- /dev/null +++ b/CVE_C/man/CVE-package.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CVE.R +\docType{package} +\name{CVE-package} +\alias{CVE} +\alias{CVE-package} +\title{Conditional Variance Estimator (CVE)} +\description{ +Conditional Variance Estimator for Sufficient Dimension + Reduction +} +\details{ +TODO: And some details +} +\references{ +Fertl Likas, Bura Efstathia. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019 +} +\author{ +Loki +} diff --git a/CVE_C/man/cve.Rd b/CVE_C/man/cve.Rd new file mode 100644 index 0000000..d3e0d05 --- /dev/null +++ b/CVE_C/man/cve.Rd @@ -0,0 +1,71 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CVE.R +\name{cve} +\alias{cve} +\alias{cve.call} +\title{Implementation of the CVE method.} +\usage{ +cve(formula, data, method = "simple", max.dim = 10, ...) + +cve.call(X, Y, method = "simple", nObs = nrow(X)^0.5, min.dim = 1, + max.dim = 10, k, ...) +} +\arguments{ +\item{formula}{Formel for the regression model defining `X`, `Y`. +See: \code{\link{formula}}.} + +\item{data}{data.frame holding data for formula.} + +\item{method}{The different only differe in the used optimization. + All of them are Gradient based optimization on a Stiefel manifold. +\itemize{ + \item "simple" Simple reduction of stepsize. + \item "sgd" stocastic gradient decent. + \item TODO: further +}} + +\item{...}{Further parameters depending on the used method.} + +\item{X}{Data} + +\item{Y}{Responces} + +\item{nObs}{as describet in the Paper.} + +\item{k}{guess for SDR dimension.} + +\item{nObs}{Like in the paper.} + +\item{...}{Method specific parameters.} +} +\description{ +Conditional Variance Estimator (CVE) is a novel sufficient dimension +reduction (SDR) method assuming a model +\deqn{Y \sim g(B'X) + \epsilon}{Y ~ g(B'X) + epsilon} +where B'X is a lower dimensional projection of the predictors. +} +\examples{ +library(CVE) + +# sample dataset +ds <- dataset("M5") + +# call ´cve´ with default method (aka "simple") +dr.simple <- cve(ds$Y ~ ds$X, k = ncol(ds$B)) +# plot optimization history (loss via iteration) +plot(dr.simple, main = "CVE M5 simple") + +# call ´cve´ with method "linesearch" using ´data.frame´ as data. +data <- data.frame(Y = ds$Y, X = ds$X) +# Note: ´Y, X´ are NOT defined, they are extracted from ´data´. +dr.linesearch <- cve(Y ~ ., data, method = "linesearch", k = ncol(ds$B)) +plot(dr.linesearch, main = "CVE M5 linesearch") + +} +\references{ +Fertl L., Bura E. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019 +} +\seealso{ +\code{\link{formula}}. For a complete parameters list (dependent on + the method) see \code{\link{cve_simple}}, \code{\link{cve_sgd}} +} diff --git a/CVE_C/man/cve.grid.search.Rd b/CVE_C/man/cve.grid.search.Rd new file mode 100644 index 0000000..2e7ad79 --- /dev/null +++ b/CVE_C/man/cve.grid.search.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gridSearch.R +\name{cve.grid.search} +\alias{cve.grid.search} +\title{Performs a grid search for parameters over a parameter grid.} +\usage{ +cve.grid.search(X, Y, k, args) +} +\description{ +Performs a grid search for parameters over a parameter grid. +} +\examples{ +args <- list( + h = c(0.05, 0.1, 0.2), + method = c("simple", "sgd"), + tau = c(0.5, 0.1, 0.01) +) +cve.grid.search(args) +} diff --git a/CVE_C/man/cve_linesearch.Rd b/CVE_C/man/cve_linesearch.Rd new file mode 100644 index 0000000..0413346 --- /dev/null +++ b/CVE_C/man/cve_linesearch.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cve_linesearch.R +\name{cve_linesearch} +\alias{cve_linesearch} +\title{Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe +conditions.} +\usage{ +cve_linesearch(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 1, + tol = 0.001, rho1 = 0.1, rho2 = 0.9, slack = 0, epochs = 50L, + attempts = 10L, max.linesearch.iter = 10L, logger = NULL) +} +\description{ +Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe +conditions. +} +\keyword{internal} diff --git a/CVE_C/man/cve_sgd.Rd b/CVE_C/man/cve_sgd.Rd new file mode 100644 index 0000000..fc00f7d --- /dev/null +++ b/CVE_C/man/cve_sgd.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cve_sgd.R +\name{cve_sgd} +\alias{cve_sgd} +\title{Simple implementation of the CVE method. 'Simple' means that this method is +a classic GD method unsing no further tricks.} +\usage{ +cve_sgd(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 0.01, + tol = 0.001, epochs = 50L, batch.size = 16L, attempts = 10L, + logger = NULL) +} +\description{ +Simple implementation of the CVE method. 'Simple' means that this method is +a classic GD method unsing no further tricks. +} +\keyword{internal} diff --git a/CVE_C/man/cve_simple.Rd b/CVE_C/man/cve_simple.Rd new file mode 100644 index 0000000..67d185f --- /dev/null +++ b/CVE_C/man/cve_simple.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cve_simple.R +\name{cve_simple} +\alias{cve_simple} +\title{Simple implementation of the CVE method. 'Simple' means that this method is +a classic GD method unsing no further tricks.} +\usage{ +cve_simple(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 1, + tol = 0.001, slack = 0, epochs = 50L, attempts = 10L, + logger = NULL) +} +\description{ +Simple implementation of the CVE method. 'Simple' means that this method is +a classic GD method unsing no further tricks. +} +\keyword{internal} diff --git a/CVE_C/man/dataset.Rd b/CVE_C/man/dataset.Rd new file mode 100644 index 0000000..da7be5b --- /dev/null +++ b/CVE_C/man/dataset.Rd @@ -0,0 +1,64 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/datasets.R +\name{dataset} +\alias{dataset} +\title{Generates test datasets.} +\usage{ +dataset(name = "M1", n, B, p.mix = 0.3, lambda = 1) +} +\arguments{ +\item{name}{One of \code{"M1"}, \code{"M2"}, \code{"M3"}, \code{"M4"} or \code{"M5"}} + +\item{n}{nr samples} + +\item{p.mix}{Only for \code{"M4"}, see: below.} + +\item{lambda}{Only for \code{"M4"}, see: below.} + +\item{p}{Dim. of random variable \code{X}.} +} +\value{ +List with elements +\itemize{ + \item{X}{data} + \item{Y}{response} + \item{B}{Used dim-reduction matrix} + \item{name}{Name of the dataset (name parameter)} +} +} +\description{ +Provides sample datasets. There are 5 different datasets named +M1, M2, M3, M4 and M5 describet in the paper references below. +The general model is given by: +\deqn{Y ~ g(B'X) + \epsilon} +} +\section{M1}{ + +The data follows \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} for a subspace +dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points. +The link function \eqn{g} is given as +\deqn{g(x) = \frac{x_1}{0.5 + (x_2 + 1.5)^2} + 0.5\epsilon}{g(x) = x_1 / (0.5 + (x_2 + 1.5)^2) + 0.5 epsilon} +} + +\section{M2}{ + +\eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} with \eqn{k = 2} with a default of \eqn{n = 200} data points. +The link function \eqn{g} is given as +\deqn{g(x) = x_1 x_2^2 + 0.5\epsilon}{g(x) = x_1 x_2^2 + 0.5 epsilon} +} + +\section{M3}{ + +TODO: +} + +\section{M4}{ + +TODO: +} + +\section{M5}{ + +TODO: +} + diff --git a/CVE_C/man/elem.pairs.Rd b/CVE_C/man/elem.pairs.Rd new file mode 100644 index 0000000..2d36c5d --- /dev/null +++ b/CVE_C/man/elem.pairs.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/util.R +\name{elem.pairs} +\alias{elem.pairs} +\title{Creates a (numeric) matrix where each column contains +an element to element matching.} +\usage{ +elem.pairs(elements) +} +\arguments{ +\item{elements}{numeric vector of elements to match} +} +\value{ +matrix of size `(2, n * (n - 1) / 2)` for a argument of lenght `n`. +} +\description{ +Creates a (numeric) matrix where each column contains +an element to element matching. +} +\examples{ + elem.pairs(seq.int(2, 5)) +} +\keyword{internal} diff --git a/CVE_C/man/estimate.bandwidth.Rd b/CVE_C/man/estimate.bandwidth.Rd new file mode 100644 index 0000000..4b2ae94 --- /dev/null +++ b/CVE_C/man/estimate.bandwidth.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/estimateBandwidth.R +\name{estimate.bandwidth} +\alias{estimate.bandwidth} +\title{Estimated bandwidth for CVE.} +\usage{ +estimate.bandwidth(X, k, nObs) +} +\arguments{ +\item{X}{data matrix of dimension (n x p) with n data points X_i of dimension +q. Therefor each row represents a datapoint of dimension p.} + +\item{k}{Guess for rank(B).} + +\item{nObs}{Ether numeric of a function. If specified as numeric value +its used in the computation of the bandwidth directly. If its a function +`nObs` is evaluated as \code{nObs(nrow(x))}. The default behaviou if not +supplied at all is to use \code{nObs <- nrow(x)^0.5}.} +} +\description{ +Estimates a propper bandwidth \code{h} according +\deqn{% +h = \chi_{p-q}^{-1}\left(\frac{nObs - 1}{n-1}\right)\frac{2 tr(\Sigma)}{p}}{% +h = qchisq( (nObs - 1)/(n - 1), p - q ) 2 tr(Sigma) / p} +} +\seealso{ +[\code{\link{qchisq}}] +} diff --git a/CVE_C/man/grad.Rd b/CVE_C/man/grad.Rd new file mode 100644 index 0000000..d890e93 --- /dev/null +++ b/CVE_C/man/grad.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gradient.R +\name{grad} +\alias{grad} +\title{Compute get gradient of `L(V)` given a dataset `X`.} +\usage{ +grad(X, Y, V, h, loss.out = FALSE, loss.only = FALSE, + persistent = FALSE) +} +\arguments{ +\item{X}{Data matrix.} + +\item{Y}{Responce.} + +\item{V}{Position to compute the gradient at, aka point on Stiefl manifold.} + +\item{h}{Bandwidth} + +\item{loss.out}{Iff \code{TRUE} loss will be written to parent environment.} + +\item{loss.only}{Boolean to only compute the loss, of \code{TRUE} a single +value loss is returned and \code{envir} is ignored.} + +\item{persistent}{Determines if data indices and dependent calculations shall +be reused from the parent environment. ATTENTION: Do NOT set this flag, only +intended for internal usage by carefully aligned functions!} +} +\description{ +Compute get gradient of `L(V)` given a dataset `X`. +} +\keyword{internal} diff --git a/CVE_C/man/null.Rd b/CVE_C/man/null.Rd new file mode 100644 index 0000000..498b159 --- /dev/null +++ b/CVE_C/man/null.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/util.R +\name{null} +\alias{null} +\title{Null space basis of given matrix `V`} +\usage{ +null(V) +} +\arguments{ +\item{V}{`(p, q)` matrix} +} +\value{ +Semi-orthogonal `(p, p - q)` matrix spaning the null space of `V`. +} +\description{ +Null space basis of given matrix `V` +} +\keyword{internal} diff --git a/CVE_C/man/plot.cve.Rd b/CVE_C/man/plot.cve.Rd new file mode 100644 index 0000000..35f6921 --- /dev/null +++ b/CVE_C/man/plot.cve.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CVE.R +\name{plot.cve} +\alias{plot.cve} +\title{Ploting helper for objects of class \code{cve}.} +\usage{ +## S3 method for class 'cve' +plot(x, content = "history", ...) +} +\arguments{ +\item{x}{Object of class \code{cve} (result of [cve()]).} + +\item{...}{Pass through parameters to [plot()] and [lines()]} + +\item{content}{Specifies what to plot: +\itemize{ + \item "history" Plots the loss history from stiefel optimization + (default). + \item ... TODO: add (if there are any) +}} +} +\description{ +Ploting helper for objects of class \code{cve}. +} +\seealso{ +see \code{\link{par}} for graphical parameters to pass through + as well as \code{\link{plot}} for standard plot utility. +} diff --git a/CVE_C/man/rStiefl.Rd b/CVE_C/man/rStiefl.Rd new file mode 100644 index 0000000..31c40bb --- /dev/null +++ b/CVE_C/man/rStiefl.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/util.R +\name{rStiefl} +\alias{rStiefl} +\title{Samples uniform from the Stiefel Manifold} +\usage{ +rStiefl(p, q) +} +\arguments{ +\item{p}{row dim.} + +\item{q}{col dim.} +} +\value{ +`(p, q)` semi-orthogonal matrix +} +\description{ +Samples uniform from the Stiefel Manifold +} +\examples{ + V <- rStiefel(6, 4) +} diff --git a/CVE_C/man/summary.cve.Rd b/CVE_C/man/summary.cve.Rd new file mode 100644 index 0000000..1151f98 --- /dev/null +++ b/CVE_C/man/summary.cve.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CVE.R +\name{summary.cve} +\alias{summary.cve} +\title{Prints a summary of a \code{cve} result.} +\usage{ +\method{summary}{cve}(object, ...) +} +\arguments{ +\item{object}{Instance of 'cve' as return of \code{cve}.} +} +\description{ +Prints a summary of a \code{cve} result. +} diff --git a/CVE_C/src/Makevars b/CVE_C/src/Makevars new file mode 100644 index 0000000..d3e3f41 --- /dev/null +++ b/CVE_C/src/Makevars @@ -0,0 +1,14 @@ + +## With R 3.1.0 or later, you can uncomment the following line to tell R to +## enable compilation with C++11 (where available) +## +## Also, OpenMP support in Armadillo prefers C++11 support. However, for wider +## availability of the package we do not yet enforce this here. It is however +## recommended for client packages to set it. +## +## And with R 3.4.0, and RcppArmadillo 0.7.960.*, we turn C++11 on as OpenMP +## support within Armadillo prefers / requires it +CXX_STD = CXX11 + +PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) +PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) diff --git a/CVE_C/src/Makevars.win b/CVE_C/src/Makevars.win new file mode 100644 index 0000000..d3e3f41 --- /dev/null +++ b/CVE_C/src/Makevars.win @@ -0,0 +1,14 @@ + +## With R 3.1.0 or later, you can uncomment the following line to tell R to +## enable compilation with C++11 (where available) +## +## Also, OpenMP support in Armadillo prefers C++11 support. However, for wider +## availability of the package we do not yet enforce this here. It is however +## recommended for client packages to set it. +## +## And with R 3.4.0, and RcppArmadillo 0.7.960.*, we turn C++11 on as OpenMP +## support within Armadillo prefers / requires it +CXX_STD = CXX11 + +PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) +PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) diff --git a/CVE_C/src/config.h b/CVE_C/src/config.h new file mode 100644 index 0000000..33dc4c9 --- /dev/null +++ b/CVE_C/src/config.h @@ -0,0 +1,7 @@ +#ifndef CVE_INCLUDE_GUARD_CONFIG_ +#define CVE_INCLUDE_GUARD_CONFIG_ + +#define CVE_MEM_CHUNK_SIZE 2032 +#define CVE_MEM_CHUNK_SMALL 1016 + +#endif /* CVE_INCLUDE_GUARD_CONFIG_ */ \ No newline at end of file diff --git a/CVE_C/src/export.c b/CVE_C/src/export.c new file mode 100644 index 0000000..445abd0 --- /dev/null +++ b/CVE_C/src/export.c @@ -0,0 +1,29 @@ +#include + +void grad(const int n, const int p, const int q, + const double *X, + const double *X_diff, + const double *Y, + const double *V, + const double h, + double *G, double *loss); + +SEXP grad_c(SEXP X, SEXP X_diff, SEXP Y, SEXP V, SEXP h) { + SEXP G = PROTECT(allocMatrix(REALSXP, nrows(V), ncols(V))); + SEXP loss = PROTECT(ScalarReal(0.0)); + + grad(nrows(X), ncols(X), ncols(V), + REAL(X), REAL(X_diff), REAL(Y), REAL(V), *REAL(h), + REAL(G), REAL(loss)); + + SEXP out = PROTECT(allocVector(VECSXP, 2)); + SET_VECTOR_ELT(out, 0, G); + SET_VECTOR_ELT(out, 1, loss); + SEXP names = PROTECT(allocVector(STRSXP, 2)); + SET_STRING_ELT(names, 0, mkChar("G")); + SET_STRING_ELT(names, 1, mkChar("loss")); + setAttrib(out, install("names"), names); + + UNPROTECT(4); + return out; +} diff --git a/CVE_C/src/grad.c b/CVE_C/src/grad.c new file mode 100644 index 0000000..b90f297 --- /dev/null +++ b/CVE_C/src/grad.c @@ -0,0 +1,123 @@ +#include +#include + +#include "sums.h" +#include "sweep.h" +#include "matrix.h" +#include "indexing.h" + +// TODO: clarify +static inline double gaussKernel(const double x, const double scale) { + return exp(scale * x * x); +} + +// TODO: mutch potential for optimization!!! +static void weightedYandLoss(const int n, + const double *Y, + const double *vecD, + const double *vecW, + const double *colSums, + double *y1, double *L, double *vecS, + double *loss) { + int i, j, k, N = n * (n - 1) / 2; + double l; + + for (i = 0; i < n; ++i) { + y1[i] = Y[i] / colSums[i]; + L[i] = Y[i] * Y[i] / colSums[i]; + } + + for (k = j = 0; j < n; ++j) { + for (i = j + 1; i < n; ++i, ++k) { + y1[i] += Y[j] * vecW[k] / colSums[i]; + y1[j] += Y[i] * vecW[k] / colSums[j]; + L[i] += Y[j] * Y[j] * vecW[k] / colSums[i]; + L[j] += Y[i] * Y[i] * vecW[k] / colSums[j]; + } + } + + l = 0.0; + for (i = 0; i < n; ++i) { + l += (L[i] -= y1[i] * y1[i]); + } + *loss = l / (double)n; + + for (k = j = 0; j < n; ++j) { + for (i = j + 1; i < n; ++i, ++k) { + l = Y[j] - y1[i]; + vecS[k] = (L[i] - (l * l)) / colSums[i]; + l = Y[i] - y1[j]; + vecS[k] += (L[j] - (l * l)) / colSums[j]; + } + } + + for (k = 0; k < N; ++k) { + vecS[k] *= vecW[k] * vecD[k]; + } +} + +void grad(const int n, const int p, const int q, + const double *X, + const double *X_diff, + const double *Y, + const double *V, + const double h, + double *G, double *loss) { + // Number of X_i to X_j not trivial pairs. + int i, N = (n * (n - 1)) / 2; + double scale = -0.5 / h; + + if (X_diff == (void*)0) { + // TODO: ... + } + + // Allocate and compute projection matrix `Q = I_p - V * V^T` + double *Q = (double*)malloc(p * p * sizeof(double)); + nullProj(V, p, q, Q); + + // allocate and compute vectorized distance matrix with a temporary + // projection of `X_diff`. + double *vecD = (double*)malloc(N * sizeof(double)); + double *X_proj; + if (p < 5) { // TODO: refine that! + X_proj = (double*)malloc(N * 5 * sizeof(double)); + } else { + X_proj = (double*)malloc(N * p * sizeof(double)); + } + matrixprod(X_diff, N, p, Q, p, p, X_proj); + rowSquareSums(X_proj, N, p, vecD); + + // Apply kernel to distence vector for weights computation. + double *vecW = X_proj; // reuse memory area, no longer needed. + for (i = 0; i < N; ++i) { + vecW[i] = gaussKernel(vecD[i], scale); + } + double *colSums = X_proj + N; // still allocated! + rowSumsSymVec(vecW, n, 1.0, colSums); // rowSums = colSums cause Sym + + // compute weighted responces of first end second momontum, aka y1, y2. + double *y1 = X_proj + N + n; + double *L = X_proj + N + (2 * n); + // Allocate X_diff scaling vector `vecS`, not in `X_proj` mem area because + // used symultanious to X_proj in final gradient computation. + double *vecS = (double*)malloc(N * sizeof(double)); + weightedYandLoss(n, Y, vecD, vecW, colSums, y1, L, vecS, loss); + + // compute the gradient using X_proj for intermidiate scaled X_diff. + rowSweep(X_diff, N, p, "*", vecS, X_proj); + // reuse Q which has the required dim (p, p). + crossprod(X_diff, N, p, X_proj, N, p, Q); + // Product with V + matrixprod(Q, p, p, V, p, q, G); + // And final scaling (TODO: move into matrixprod!) + scale = -2.0 / (((double)n) * h * h); + N = p * q; + for (i = 0; i < N; ++i) { + G[i] *= scale; + } + + free(vecS); + free(X_proj); + free(vecD); + free(Q); +} diff --git a/CVE_C/src/indexing.c b/CVE_C/src/indexing.c new file mode 100644 index 0000000..36a91e7 --- /dev/null +++ b/CVE_C/src/indexing.c @@ -0,0 +1,12 @@ +#include "indexing.h" + +void rangePairs(const int from, const int to, int *pairs) { + int i, j, k; + + for (k = 0, i = from; i < to; ++i) { + for (j = i + 1; j < to; ++j, k += 2) { + pairs[k] = i; + pairs[k + 1] = j; + } + } +} diff --git a/CVE_C/src/indexing.h b/CVE_C/src/indexing.h new file mode 100644 index 0000000..db49581 --- /dev/null +++ b/CVE_C/src/indexing.h @@ -0,0 +1,8 @@ + +/* Include Guard */ +#ifndef CVE_INCLUDE_GUARD_INDEXING_ +#define CVE_INCLUDE_GUARD_INDEXING_ + +void rangePairs(const int from, const int to, int *pairs); + +#endif /* CVE_INCLUDE_GUARD_INDEXING_ */ diff --git a/CVE_C/src/init.c b/CVE_C/src/init.c new file mode 100644 index 0000000..e3a859f --- /dev/null +++ b/CVE_C/src/init.c @@ -0,0 +1,23 @@ +#include +#include +#include // for NULL +#include + +/* FIXME: + Check these declarations against the C/Fortran source code. +*/ + +/* .Call calls */ +extern SEXP grad_c(SEXP, SEXP, SEXP, SEXP, SEXP); + +static const R_CallMethodDef CallEntries[] = { + {"grad_c", (DL_FUNC) &grad_c, 5}, + {NULL, NULL, 0} +}; + +/* Restrict C entrypoints to registered routines. */ +void R_initCVE(DllInfo *dll) +{ + R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); + R_useDynamicSymbols(dll, FALSE); +} diff --git a/CVE_C/src/matrix.c b/CVE_C/src/matrix.c new file mode 100644 index 0000000..50a07ad --- /dev/null +++ b/CVE_C/src/matrix.c @@ -0,0 +1,71 @@ +#include // for `mem*` functions. + +#include "config.h" +#include "matrix.h" + +#include + +void matrixprod(const double *A, const int nrowA, const int ncolA, + const double *B, const int nrowB, const int ncolB, + double *C) { + const double one = 1.0; + const double zero = 0.0; + + // DGEMM with parameterization: + // C <- A %*% B + F77_NAME(dgemm)("N", "N", &nrowA, &ncolB, &ncolA, + &one, A, &nrowA, B, &nrowB, + &zero, C, &nrowA); +} + +void crossprod(const double *A, const int nrowA, const int ncolA, + const double *B, const int nrowB, const int ncolB, + double *C) { + const double one = 1.0; + const double zero = 0.0; + + // DGEMM with parameterization: + // C <- t(A) %*% B + F77_NAME(dgemm)("T", "N", &ncolA, &ncolB, &nrowA, + &one, A, &nrowA, B, &nrowB, + &zero, C, &ncolA); +} + +void nullProj(const double *B, const int nrowB, const int ncolB, + double *Q) { + const double minusOne = -1.0; + const double one = 1.0; + + // Initialize as identity matrix. + memset(Q, 0, sizeof(double) * nrowB * nrowB); + double *Q_diag, *Q_end = Q + nrowB * nrowB; + for (Q_diag = Q; Q_diag < Q_end; Q_diag += nrowB + 1) { + *Q_diag = 1.0; + } + + // DGEMM with parameterization: + // C <- (-1.0 * B %*% t(B)) + C + F77_NAME(dgemm)("N", "T", &nrowB, &nrowB, &ncolB, + &minusOne, B, &nrowB, B, &nrowB, + &one, Q, &nrowB); +} + +// A dence skwe-symmetric rank 2 update. +// Perform the update +// C := alpha (A * B^T - B * A^T) + beta C +void skewSymRank2k(const int nrow, const int ncol, + double alpha, const double *A, const double *B, + double beta, + double *C) { + F77_NAME(dgemm)("N", "T", + &nrow, &nrow, &ncol, + &alpha, A, &nrow, B, &nrow, + &beta, C, &nrow); + + alpha *= -1.0; + beta = 1.0; + F77_NAME(dgemm)("N", "T", + &nrow, &nrow, &ncol, + &alpha, B, &nrow, A, &nrow, + &beta, C, &nrow); +} diff --git a/CVE_C/src/matrix.h b/CVE_C/src/matrix.h new file mode 100644 index 0000000..e9ce39b --- /dev/null +++ b/CVE_C/src/matrix.h @@ -0,0 +1,25 @@ + +/* Include Guard */ +#ifndef CVE_INCLUDE_GUARD_MATRIX_ +#define CVE_INCLUDE_GUARD_MATRIX_ + +void matrixprod(const double *A, const int nrowA, const int ncolA, + const double *B, const int nrowB, const int ncolB, + double *C); + +void crossprod(const double *A, const int nrowA, const int ncolA, + const double *B, const int nrowB, const int ncolB, + double *C); + +void nullProj(const double *B, const int nrowB, const int ncolB, + double *Q); + +// A dence skwe-symmetric rank 2 update. +// Perform the update +// C := alpha (A * B^T - B * A^T) + beta C +void skewSymRank2k(const int nrow, const int ncol, + double alpha, const double *A, const double *B, + double beta, + double *C); + +#endif /* CVE_INCLUDE_GUARD_MATRIX_ */ diff --git a/CVE_C/src/sums.c b/CVE_C/src/sums.c new file mode 100644 index 0000000..068aa55 --- /dev/null +++ b/CVE_C/src/sums.c @@ -0,0 +1,113 @@ +#include // for `mem*` functions. + +#include "config.h" +#include "sums.h" + +void rowSums(const double *A, const int nrow, const int ncol, + double *sum) { + int i, j, block_size, block_size_i; + const double *A_block = A; + const double *A_end = A + nrow * ncol; + + if (nrow > CVE_MEM_CHUNK_SIZE) { + block_size = CVE_MEM_CHUNK_SIZE; + } else { + block_size = nrow; + } + + // Iterate `(block_size_i, ncol)` submatrix blocks. + for (i = 0; i < nrow; i += block_size_i) { + // Reset `A` to new block beginning. + A = A_block; + // Take block size of eveything left and reduce to max size. + block_size_i = nrow - i; + if (block_size_i > block_size) { + block_size_i = block_size; + } + // Compute first blocks column, + for (j = 0; j < block_size_i; ++j) { + sum[j] = A[j]; + } + // and sum the following columns to the first one. + for (A += nrow; A < A_end; A += nrow) { + for (j = 0; j < block_size_i; ++j) { + sum[j] += A[j]; + } + } + // Step one block forth. + A_block += block_size_i; + sum += block_size_i; + } +} + +void colSums(const double *A, const int nrow, const int ncol, + double *sum) { + int j; + double *sum_end = sum + ncol; + + memset(sum, 0, sizeof(double) * ncol); + for (; sum < sum_end; ++sum) { + for (j = 0; j < nrow; ++j) { + *sum += A[j]; + } + A += nrow; + } +} + +void rowSquareSums(const double *A, const int nrow, const int ncol, + double *sum) { + int i, j, block_size, block_size_i; + const double *A_block = A; + const double *A_end = A + nrow * ncol; + + if (nrow < CVE_MEM_CHUNK_SIZE) { + block_size = nrow; + } else { + block_size = CVE_MEM_CHUNK_SIZE; + } + + // Iterate `(block_size_i, ncol)` submatrix blocks. + for (i = 0; i < nrow; i += block_size_i) { + // Reset `A` to new block beginning. + A = A_block; + // Take block size of eveything left and reduce to max size. + block_size_i = nrow - i; + if (block_size_i > block_size) { + block_size_i = block_size; + } + // Compute first blocks column, + for (j = 0; j < block_size_i; ++j) { + sum[j] = A[j] * A[j]; + } + // and sum the following columns to the first one. + for (A += nrow; A < A_end; A += nrow) { + for (j = 0; j < block_size_i; ++j) { + sum[j] += A[j] * A[j]; + } + } + // Step one block forth. + A_block += block_size_i; + sum += block_size_i; + } +} + +void rowSumsSymVec(const double *Avec, const int nrow, + const double diag, + double *sum) { + int i, j; + + if (diag == 0.0) { + memset(sum, 0, nrow * sizeof(double)); + } else { + for (i = 0; i < nrow; ++i) { + sum[i] = diag; + } + } + + for (j = 0; j < nrow; ++j) { + for (i = j + 1; i < nrow; ++i, ++Avec) { + sum[j] += *Avec; + sum[i] += *Avec; + } + } +} diff --git a/CVE_C/src/sums.h b/CVE_C/src/sums.h new file mode 100644 index 0000000..f409487 --- /dev/null +++ b/CVE_C/src/sums.h @@ -0,0 +1,19 @@ + +/* Include Guard */ +#ifndef CVE_INCLUDE_GUARD_SUMS_ +#define CVE_INCLUDE_GUARD_SUMS_ + +void rowSums(const double *A, const int nrow, const int ncol, + double *sum); + +void colSums(const double *A, const int nrow, const int ncol, + double *sum); + +void rowSquareSums(const double *A, const int nrow, const int ncol, + double *sum); + +void rowSumsSymVec(const double *Avec, const int nrow, + const double diag, + double *sum); + +#endif /* CVE_INCLUDE_GUARD_SUMS_ */ diff --git a/CVE_C/src/sweep.c b/CVE_C/src/sweep.c new file mode 100644 index 0000000..8678e92 --- /dev/null +++ b/CVE_C/src/sweep.c @@ -0,0 +1,113 @@ +#include // for `error`. + +#include "config.h" +#include "sweep.h" + +/* C[, j] = A[, j] * v for each j = 1 to ncol */ +void rowSweep(const double *A, const int nrow, const int ncol, + const char* op, + const double *v, // vector of length nrow + double *C) { + int i, j, block_size, block_size_i; + const double *A_block = A; + double *C_block = C; + const double *A_end = A + nrow * ncol; + + if (nrow > CVE_MEM_CHUNK_SMALL) { // small because 3 vectors in cache + block_size = CVE_MEM_CHUNK_SMALL; + } else { + block_size = nrow; + } + + if (*op == '+') { + // Iterate `(block_size_i, ncol)` submatrix blocks. + for (i = 0; i < nrow; i += block_size_i) { + // Set `A` and `C` to block beginning. + A = A_block; + C = C_block; + // Get current block's row size. + block_size_i = nrow - i; + if (block_size_i > block_size) { + block_size_i = block_size; + } + // Perform element wise operation for block. + for (; A < A_end; A += nrow, C += nrow) { + for (j = 0; j < block_size_i; ++j) { + C[j] = A[j] + v[j]; // FUN = '+' + } + } + // Step one block forth. + A_block += block_size_i; + C_block += block_size_i; + v += block_size_i; + } + } else if (*op == '-') { + // Iterate `(block_size_i, ncol)` submatrix blocks. + for (i = 0; i < nrow; i += block_size_i) { + // Set `A` and `C` to block beginning. + A = A_block; + C = C_block; + // Get current block's row size. + block_size_i = nrow - i; + if (block_size_i > block_size) { + block_size_i = block_size; + } + // Perform element wise operation for block. + for (; A < A_end; A += nrow, C += nrow) { + for (j = 0; j < block_size_i; ++j) { + C[j] = A[j] - v[j]; // FUN = '-' + } + } + // Step one block forth. + A_block += block_size_i; + C_block += block_size_i; + v += block_size_i; + } + } else if (*op == '*') { + // Iterate `(block_size_i, ncol)` submatrix blocks. + for (i = 0; i < nrow; i += block_size_i) { + // Set `A` and `C` to block beginning. + A = A_block; + C = C_block; + // Get current block's row size. + block_size_i = nrow - i; + if (block_size_i > block_size) { + block_size_i = block_size; + } + // Perform element wise operation for block. + for (; A < A_end; A += nrow, C += nrow) { + for (j = 0; j < block_size_i; ++j) { + C[j] = A[j] * v[j]; // FUN = '*' + } + } + // Step one block forth. + A_block += block_size_i; + C_block += block_size_i; + v += block_size_i; + } + } else if (*op == '/') { + // Iterate `(block_size_i, ncol)` submatrix blocks. + for (i = 0; i < nrow; i += block_size_i) { + // Set `A` and `C` to block beginning. + A = A_block; + C = C_block; + // Get current block's row size. + block_size_i = nrow - i; + if (block_size_i > block_size) { + block_size_i = block_size; + } + // Perform element wise operation for block. + for (; A < A_end; A += nrow, C += nrow) { + for (j = 0; j < block_size_i; ++j) { + C[j] = A[j] / v[j]; // FUN = '/' + } + } + // Step one block forth. + A_block += block_size_i; + C_block += block_size_i; + v += block_size_i; + } + } else { + error("Got unknown 'op' (opperation) argument."); + } +} diff --git a/CVE_C/src/sweep.h b/CVE_C/src/sweep.h new file mode 100644 index 0000000..90be784 --- /dev/null +++ b/CVE_C/src/sweep.h @@ -0,0 +1,11 @@ + +/* Include Guard */ +#ifndef CVE_INCLUDE_GUARD_SWEEP_ +#define CVE_INCLUDE_GUARD_SWEEP_ + +void rowSweep(const double *A, const int nrow, const int ncol, + const char* op, + const double *v, // vector of length nrow + double *C); + +#endif /* CVE_INCLUDE_GUARD_SWEEP_ */ diff --git a/notes.md b/notes.md index 75602ba..ccb04a0 100644 --- a/notes.md +++ b/notes.md @@ -6,6 +6,35 @@ grep --include=*\.{c,h,R} -rnw '.' -e "sweep" ``` searches in all `C` source and header fils as well as `R` source files for the term _sweep_. +## Recursive dir. compair with colored sructure (more or less). +```bash +diff -r CVE_R/ CVE_C/ | grep -E "^([<>]|[^<>].*)" +``` + +## Parsing `bash` script parameters. +```bash +usage="$0 [-v|--verbose] [-n|--dry-run] [(-s|--stack-size) ] [-h|--help] [-- [p1, [p2, ...]]]" +verbose=false +help=false +dry_run=false +stack_size=0 + +while [ $# -gt 0 ]; do + case "$1" in + -v | --verbose ) verbose=true; shift ;; + -n | --dry-run ) dry_run=true; shift ;; + -s | --stack-size ) stack_size="$2"; shift; shift ;; + -h | --help ) echo $usage; exit ;; # On help print usage and exit. + -- ) shift; break ;; # Break param "parsing". + * ) echo $usage >&2; exit 1 ;; # Print usage and exit with failure. + esac +done + +echo verbose=$verbose +echo dry_run=$dry_run +echo stack_size=$stack_size +``` + # Development ## Build and install. To build the package the `devtools` package is used. This also provides `roxygen2` which is used for documentation and authomatic creaton of the `NAMESPACE` file. diff --git a/wip.R b/wip.R index 87fb9e8..25eb66d 100644 --- a/wip.R +++ b/wip.R @@ -118,6 +118,17 @@ microbenchmark( ) ## Matrix-Matrix opperation .call --------------------------------------------- +transpose.c <- function(A) { + stopifnot( + is.matrix(A), is.numeric(A) + ) + if (!is.double(A)) { + A <- matrix(as.double(A), nrow(A), ncol(A)) + } + + .Call('R_transpose', PACKAGE = 'wip', A) +} + matrixprod.c <- function(A, B) { stopifnot( is.matrix(A), is.numeric(A), @@ -174,6 +185,14 @@ m <- 300 A <- matrix(runif(n * k), n, k) B <- matrix(runif(k * m), k, m) +stopifnot( + all.equal(t(A), transpose.c(A)) +) +microbenchmark( + t(A), + transpose.c(A) +) + stopifnot( all.equal(A %*% B, matrixprod.c(A, B)) ) @@ -326,4 +345,4 @@ stopifnot(all.equal( microbenchmark( grad = grad(X, Y, V, h), gradient.c = gradient.c(X, X_diff, Y, V, h) -) \ No newline at end of file +) diff --git a/wip.c b/wip.c index 0c764eb..75c77d6 100644 --- a/wip.c +++ b/wip.c @@ -1,4 +1,5 @@ #include +#include // for `mem*` functions. #include #include @@ -99,15 +100,15 @@ static inline void rowSquareSums(const double *A, } static inline void rowSumsSymVec(const double *Avec, const int nrow, - const double *diag, + const double diag, double *sum) { int i, j; - if (*diag == 0.0) { + if (diag == 0.0) { memset(sum, 0, nrow * sizeof(double)); } else { for (i = 0; i < nrow; ++i) { - sum[i] = *diag; + sum[i] = diag; } } @@ -228,6 +229,18 @@ static void rowSweep(const double *A, const int nrow, const int ncol, } } +void transpose(const double *A, const int nrow, const int ncol, double* T) { + int i, j, len = nrow * ncol; + + // Filling column-wise and accessing row-wise. + for (i = 0, j = 0; i < len; ++i, j += nrow) { + if (j >= len) { + j -= len - 1; + } + T[i] = A[j]; + } +} + static inline void matrixprod(const double *A, const int nrowA, const int ncolA, const double *B, const int nrowB, const int ncolB, double *C) { @@ -363,7 +376,6 @@ static void gradient(const int n, const int p, const int q, // Number of X_i to X_j not trivial pairs. int i, N = (n * (n - 1)) / 2; double scale = -0.5 / h; - const double one = 1.0; if (X_diff == (void*)0) { // TODO: ... @@ -391,7 +403,7 @@ static void gradient(const int n, const int p, const int q, vecW[i] = gaussKernel(vecD[i], scale); } double *colSums = X_proj + N; // still allocated! - rowSumsSymVec(vecW, n, &one, colSums); // rowSums = colSums cause Sym + rowSumsSymVec(vecW, n, 1.0, colSums); // rowSums = colSums cause Sym // compute weighted responces of first end second momontum, aka y1, y2. double *y1 = X_proj + N + n; diff --git a/wip.h b/wip.h index a2d03f8..1e1f7dc 100644 --- a/wip.h +++ b/wip.h @@ -1,5 +1,5 @@ -#ifndef _CVE_INCLUDE_GUARD_ -#define _CVE_INCLUDE_GUARD_ +#ifndef CVE_INCLUDE_GUARD_ +#define CVE_INCLUDE_GUARD_ #include @@ -41,12 +41,12 @@ SEXP R_rowSquareSums(SEXP A) { } static inline void rowSumsSymVec(const double *Avec, const int nrow, - const double *diag, + const double diag, double *sum); SEXP R_rowSumsSymVec(SEXP Avec, SEXP nrow, SEXP diag) { SEXP sum = PROTECT(allocVector(REALSXP, *INTEGER(nrow))); - rowSumsSymVec(REAL(Avec), *INTEGER(nrow), REAL(diag), REAL(sum)); + rowSumsSymVec(REAL(Avec), *INTEGER(nrow), *REAL(diag), REAL(sum)); UNPROTECT(1); return sum; @@ -67,6 +67,16 @@ SEXP R_rowSweep(SEXP A, SEXP v, SEXP op) { return C; } +void transpose(const double *A, const int nrow, const int ncol, double* T); +SEXP R_transpose(SEXP A) { + SEXP T = PROTECT(allocMatrix(REALSXP, ncols(A), nrows(A))); + + transpose(REAL(A), nrows(A), ncols(A), REAL(T)); + + UNPROTECT(1); /* T */ + return T; +} + static inline void matrixprod(const double *A, const int nrowA, const int ncolA, const double *B, const int nrowB, const int ncolB, double *C); @@ -156,4 +166,4 @@ SEXP R_gradient(SEXP X, SEXP X_diff, SEXP Y, SEXP V, SEXP h) { return G; } -#endif /* _CVE_INCLUDE_GUARD_ */ +#endif /* CVE_INCLUDE_GUARD_ */