add: C implementation,
wip: smaller stuff
This commit is contained in:
		
							parent
							
								
									4d2651fe8a
								
							
						
					
					
						commit
						9e46a2d3d7
					
				
							
								
								
									
										11
									
								
								CVE_C/DESCRIPTION
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								CVE_C/DESCRIPTION
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,11 @@ | ||||
| Package: CVE | ||||
| Type: Package | ||||
| Title: Conditional Variance Estimator for Sufficient Dimension Reduction | ||||
| Version: 0.1 | ||||
| Date: 2019-08-29 | ||||
| Author: Loki | ||||
| Maintainer: Loki <loki@no.mail> | ||||
| Description: Implementation of the Conditional Variance Estimation (CVE) method. This package version is writen in pure R. | ||||
| License: GPL-3 | ||||
| Encoding: UTF-8 | ||||
| RoxygenNote: 6.1.1 | ||||
							
								
								
									
										24
									
								
								CVE_C/NAMESPACE
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										24
									
								
								CVE_C/NAMESPACE
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,24 @@ | ||||
| # Generated by roxygen2: do not edit by hand | ||||
| 
 | ||||
| S3method(plot,cve) | ||||
| S3method(summary,cve) | ||||
| export(cve) | ||||
| export(cve.call) | ||||
| export(cve.grid.search) | ||||
| export(cve_linesearch) | ||||
| export(cve_sgd) | ||||
| export(cve_simple) | ||||
| export(dataset) | ||||
| export(elem.pairs) | ||||
| export(estimate.bandwidth) | ||||
| export(grad) | ||||
| export(null) | ||||
| export(rStiefl) | ||||
| import(stats) | ||||
| importFrom(graphics,lines) | ||||
| importFrom(graphics,plot) | ||||
| importFrom(graphics,points) | ||||
| importFrom(stats,model.frame) | ||||
| importFrom(stats,rbinom) | ||||
| importFrom(stats,rnorm) | ||||
| useDynLib(CVE, .registration = TRUE) | ||||
							
								
								
									
										223
									
								
								CVE_C/R/CVE.R
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										223
									
								
								CVE_C/R/CVE.R
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,223 @@ | ||||
| #' Conditional Variance Estimator (CVE) | ||||
| #' | ||||
| #' Conditional Variance Estimator for Sufficient Dimension | ||||
| #'  Reduction | ||||
| #' | ||||
| #' TODO: And some details | ||||
| #' | ||||
| #' | ||||
| #' @references Fertl Likas, Bura Efstathia. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019 | ||||
| #' | ||||
| #' @docType package | ||||
| #' @author Loki | ||||
| #' @useDynLib CVE, .registration = TRUE | ||||
| "_PACKAGE" | ||||
| 
 | ||||
| #' Implementation of the CVE method. | ||||
| #' | ||||
| #' Conditional Variance Estimator (CVE) is a novel sufficient dimension | ||||
| #' reduction (SDR) method assuming a model | ||||
| #' \deqn{Y \sim g(B'X) + \epsilon}{Y ~ g(B'X) + epsilon} | ||||
| #' where B'X is a lower dimensional projection of the predictors. | ||||
| #' | ||||
| #' @param formula Formel for the regression model defining `X`, `Y`. | ||||
| #'  See: \code{\link{formula}}. | ||||
| #' @param data data.frame holding data for formula. | ||||
| #' @param method The different only differe in the used optimization. | ||||
| #'  All of them are Gradient based optimization on a Stiefel manifold. | ||||
| #' \itemize{ | ||||
| #'      \item "simple" Simple reduction of stepsize. | ||||
| #'      \item "sgd" stocastic gradient decent. | ||||
| #'      \item TODO: further | ||||
| #' } | ||||
| #' @param ... Further parameters depending on the used method. | ||||
| #' @examples | ||||
| #' library(CVE) | ||||
| #' | ||||
| #' # sample dataset | ||||
| #' ds <- dataset("M5") | ||||
| #' | ||||
| #' # call ´cve´ with default method (aka "simple") | ||||
| #' dr.simple <- cve(ds$Y ~ ds$X, k = ncol(ds$B)) | ||||
| #' # plot optimization history (loss via iteration) | ||||
| #' plot(dr.simple, main = "CVE M5 simple") | ||||
| #' | ||||
| #' # call ´cve´ with method "linesearch" using ´data.frame´ as data. | ||||
| #' data <- data.frame(Y = ds$Y, X = ds$X) | ||||
| #' # Note: ´Y, X´ are NOT defined, they are extracted from ´data´. | ||||
| #' dr.linesearch <- cve(Y ~ ., data, method = "linesearch", k = ncol(ds$B)) | ||||
| #' plot(dr.linesearch, main = "CVE M5 linesearch") | ||||
| #' | ||||
| #' @references Fertl L., Bura E. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019 | ||||
| #' | ||||
| #' @seealso \code{\link{formula}}. For a complete parameters list (dependent on | ||||
| #'  the method) see \code{\link{cve_simple}}, \code{\link{cve_sgd}} | ||||
| #' @import stats | ||||
| #' @importFrom stats model.frame | ||||
| #' @export | ||||
| cve <- function(formula, data, method = "simple", max.dim = 10, ...) { | ||||
|     # check for type of `data` if supplied and set default | ||||
|     if (missing(data)) { | ||||
|         data <- environment(formula) | ||||
|     } else if (!is.data.frame(data)) { | ||||
|         stop('Parameter `data` must be a `data.frame` or missing.') | ||||
|     } | ||||
| 
 | ||||
|     # extract `X`, `Y` from `formula` with `data` | ||||
|     model <- stats::model.frame(formula, data) | ||||
|     X <- as.matrix(model[,-1, drop = FALSE]) | ||||
|     Y <- as.matrix(model[, 1, drop = FALSE]) | ||||
| 
 | ||||
|     # pass extracted data on to [cve.call()] | ||||
|     dr <- cve.call(X, Y, method = method, ...) | ||||
| 
 | ||||
|     # overwrite `call` property from [cve.call()] | ||||
|     dr$call <- match.call() | ||||
|     return(dr) | ||||
| } | ||||
| 
 | ||||
| #' @param nObs as describet in the Paper. | ||||
| #' @param X Data | ||||
| #' @param Y Responces | ||||
| #' @param nObs Like in the paper. | ||||
| #' @param k guess for SDR dimension. | ||||
| #' @param ... Method specific parameters. | ||||
| #' @rdname cve | ||||
| #' @export | ||||
| cve.call <- function(X, Y, method = "simple", nObs = nrow(X)^.5, | ||||
|                      min.dim = 1, max.dim = 10, k, ...) { | ||||
| 
 | ||||
|     # parameter checking | ||||
|     if (!is.matrix(X)) { | ||||
|         stop('X should be a matrices.') | ||||
|     } | ||||
|     if (is.matrix(Y)) { | ||||
|         Y <- as.vector(Y) | ||||
|     } | ||||
|     if (nrow(X) != length(Y)) { | ||||
|         stop('Rows of X and number of Y elements are not compatible.') | ||||
|     } | ||||
|     if (ncol(X) < 2) { | ||||
|         stop('X is one dimensional, no need for dimension reduction.') | ||||
|     } | ||||
| 
 | ||||
|     if (!missing(k)) { | ||||
|         min.dim <- as.integer(k) | ||||
|         max.dim <- as.integer(k) | ||||
|     } else { | ||||
|         min.dim <- as.integer(min.dim) | ||||
|         max.dim <- as.integer(min(max.dim, ncol(X) - 1L)) | ||||
|     } | ||||
|     if (min.dim > max.dim) { | ||||
|         stop('`min.dim` bigger `max.dim`.') | ||||
|     } | ||||
|     if (max.dim >= ncol(X)) { | ||||
|         stop('`max.dim` must be smaller than `ncol(X)`.') | ||||
|     } | ||||
| 
 | ||||
|     # Call specified method. | ||||
|     method <- tolower(method) | ||||
|     call <- match.call() | ||||
|     dr <- list() | ||||
|     for (k in min.dim:max.dim) { | ||||
|         if (method == 'simple') { | ||||
|             dr.k <- cve_simple(X, Y, k, nObs = nObs, ...) | ||||
|         } else if (method == 'linesearch') { | ||||
|             dr.k <- cve_linesearch(X, Y, k, nObs = nObs, ...) | ||||
|         } else if (method == 'sgd') { | ||||
|             dr.k <- cve_sgd(X, Y, k, nObs = nObs, ...) | ||||
|         } else { | ||||
|             stop('Got unknown method.') | ||||
|         } | ||||
|         dr.k$k <- k | ||||
|         class(dr.k) <- "cve.k" | ||||
|         dr[[k]] <- dr.k | ||||
|     } | ||||
| 
 | ||||
|     # augment result information | ||||
|     dr$method <- method | ||||
|     dr$call <- call | ||||
|     class(dr) <- "cve" | ||||
|     return(dr) | ||||
| } | ||||
| 
 | ||||
| # TODO: write summary | ||||
| # summary.cve <- function() { | ||||
| #     # code # | ||||
| # } | ||||
| 
 | ||||
| #' Ploting helper for objects of class \code{cve}. | ||||
| #' | ||||
| #' @param x Object of class \code{cve} (result of [cve()]). | ||||
| #' @param content Specifies what to plot: | ||||
| #' \itemize{ | ||||
| #'      \item "history" Plots the loss history from stiefel optimization | ||||
| #'          (default). | ||||
| #'      \item ... TODO: add (if there are any) | ||||
| #' } | ||||
| #' @param ... Pass through parameters to [plot()] and [lines()] | ||||
| #' | ||||
| #' @usage ## S3 method for class 'cve' | ||||
| #' plot(x, content = "history", ...) | ||||
| #' @seealso see \code{\link{par}} for graphical parameters to pass through | ||||
| #'      as well as \code{\link{plot}} for standard plot utility. | ||||
| #' @importFrom graphics plot lines points | ||||
| #' @method plot cve | ||||
| #' @export | ||||
| plot.cve <- function(x, ...) { | ||||
| 
 | ||||
| 
 | ||||
|     # H <- x$history | ||||
|     # H_1 <- H[!is.na(H[, 1]), 1] | ||||
| 
 | ||||
|     # defaults <- list( | ||||
|     #     main = "History", | ||||
|     #     xlab = "Iterations i", | ||||
|     #     ylab = expression(loss == L[n](V^{(i)})), | ||||
|     #     xlim = c(1, nrow(H)), | ||||
|     #     ylim = c(0, max(H)), | ||||
|     #     type = "l" | ||||
|     # ) | ||||
| 
 | ||||
|     # call.plot <- match.call() | ||||
|     # keys <- names(defaults) | ||||
|     # keys <- keys[match(keys, names(call.plot)[-1], nomatch = 0) == 0] | ||||
| 
 | ||||
|     # for (key in keys) { | ||||
|     #     call.plot[[key]] <- defaults[[key]] | ||||
|     # } | ||||
| 
 | ||||
|     # call.plot[[1L]] <- quote(plot) | ||||
|     # call.plot$x <- quote(1:length(H_1)) | ||||
|     # call.plot$y <- quote(H_1) | ||||
| 
 | ||||
|     # eval(call.plot) | ||||
| 
 | ||||
|     # if (ncol(H) > 1) { | ||||
|     #     for (i in 2:ncol(H)) { | ||||
|     #         H_i <- H[H[, i] > 0, i] | ||||
|     #         lines(1:length(H_i), H_i) | ||||
|     #     } | ||||
|     # } | ||||
|     # x.ends <- apply(H, 2, function(h) { length(h[!is.na(h)]) }) | ||||
|     # y.ends <- apply(H, 2, function(h) { tail(h[!is.na(h)], n=1) }) | ||||
|     # points(x.ends, y.ends) | ||||
| } | ||||
| 
 | ||||
| #' Prints a summary of a \code{cve} result. | ||||
| #' @param object Instance of 'cve' as return of \code{cve}. | ||||
| #' @method summary cve | ||||
| #' @export | ||||
| summary.cve <- function(object, ...) { | ||||
|     cat('Summary of CVE result - Method: "', object$method, '"\n', | ||||
|         '\n', | ||||
|         'Dataset size:   ', nrow(object$X), '\n', | ||||
|         'Data Dimension: ', ncol(object$X), '\n', | ||||
|         'SDR Dimension:  ', object$k, '\n', | ||||
|         'loss:           ', object$loss, '\n', | ||||
|         '\n', | ||||
|         'Called via:\n', | ||||
|         '    ', | ||||
|         sep='') | ||||
|     print(object$call) | ||||
| } | ||||
							
								
								
									
										169
									
								
								CVE_C/R/cve_linesearch.R
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										169
									
								
								CVE_C/R/cve_linesearch.R
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,169 @@ | ||||
| #' Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe | ||||
| #' conditions. | ||||
| #' | ||||
| #' @keywords internal | ||||
| #' @export | ||||
| cve_linesearch <- function(X, Y, k, | ||||
|                            nObs = sqrt(nrow(X)), | ||||
|                            h = NULL, | ||||
|                            tau = 1.0, | ||||
|                            tol = 1e-3, | ||||
|                            rho1 = 0.1, | ||||
|                            rho2 = 0.9, | ||||
|                            slack = 0, | ||||
|                            epochs = 50L, | ||||
|                            attempts = 10L, | ||||
|                            max.linesearch.iter = 10L, | ||||
|                            logger = NULL | ||||
| ) { | ||||
|     # Set `grad` functions environment to enable if to find this environments | ||||
|     # local variabels, needed to enable the manipulation of this local variables | ||||
|     # from within `grad`. | ||||
|     environment(grad) <- environment() | ||||
| 
 | ||||
|     # Get dimensions. | ||||
|     n <- nrow(X) | ||||
|     p <- ncol(X) | ||||
|     q <- p - k | ||||
| 
 | ||||
|     # Save initial learning rate `tau`. | ||||
|     tau.init <- tau | ||||
|     # Addapt tolearance for break condition. | ||||
|     tol <- sqrt(2 * q) * tol | ||||
| 
 | ||||
|     # Estaimate bandwidth if not given. | ||||
|     if (missing(h) | !is.numeric(h)) { | ||||
|         h <- estimate.bandwidth(X, k, nObs) | ||||
|     } | ||||
| 
 | ||||
|     # Compute persistent data. | ||||
|     # Compute lookup indexes for symmetrie, lower/upper | ||||
|     # triangular parts and vectorization. | ||||
|     pair.index <- elem.pairs(seq(n)) | ||||
|     i <- pair.index[1, ] # `i` indices of `(i, j)` pairs | ||||
|     j <- pair.index[2, ] # `j` indices of `(i, j)` pairs | ||||
|     # Matrix of vectorized indices. (vec(index) -> seq) | ||||
|     index <- matrix(seq(n * n), n, n) | ||||
|     lower <- index[lower.tri(index)] | ||||
|     upper <- t(index)[lower] | ||||
| 
 | ||||
|     # Create all pairewise differences of rows of `X`. | ||||
|     X_diff <- X[i, , drop = F] - X[j, , drop = F] | ||||
|     # Identity matrix. | ||||
|     I_p <- diag(1, p) | ||||
| 
 | ||||
|     # Init tracking of current best (according multiple attempts). | ||||
|     V.best <- NULL | ||||
|     loss.best <- Inf | ||||
| 
 | ||||
|     # Start loop for multiple attempts. | ||||
|     for (attempt in 1:attempts) { | ||||
| 
 | ||||
|         # Sample a `(p, q)` dimensional matrix from the stiefel manifold as | ||||
|         # optimization start value. | ||||
|         V <- rStiefl(p, q) | ||||
| 
 | ||||
|         # Initial loss and gradient. | ||||
|         loss <- Inf | ||||
|         G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE) | ||||
|         # Set last loss (aka, loss after applying the step). | ||||
|         loss.last <- loss | ||||
| 
 | ||||
|         # Call logger with initial values before starting optimization. | ||||
|         if (is.function(logger)) { | ||||
|             epoch <- 0 # Set epoch count to 0 (only relevant for logging). | ||||
|             error <- NA | ||||
|             logger(environment()) | ||||
|         } | ||||
| 
 | ||||
|         ## Start optimization loop. | ||||
|         for (epoch in 1:epochs) { | ||||
| 
 | ||||
|             # Cayley transform matrix `A` | ||||
|             A <- (G %*% t(V)) - (V %*% t(G)) | ||||
| 
 | ||||
|             # Directional derivative of the loss at current position, given | ||||
|             # as `Tr(G^T \cdot A \cdot V)`. | ||||
|             loss.prime <- -0.5 * norm(A, type = 'F')^2 | ||||
| 
 | ||||
|             # Linesearch | ||||
|             tau.upper <- Inf | ||||
|             tau.lower <- 0 | ||||
|             tau <- tau.init | ||||
|             for (iter in 1:max.linesearch.iter) { | ||||
|                 # Apply learning rate `tau`. | ||||
|                 A.tau <- (tau / 2) * A | ||||
|                 # Parallet transport (on Stiefl manifold) into direction of `G`. | ||||
|                 inv <- solve(I_p + A.tau) | ||||
|                 V.tau <- inv %*% ((I_p - A.tau) %*% V) | ||||
| 
 | ||||
|                 # Loss at position after a step. | ||||
|                 loss <- Inf # aka loss.tau | ||||
|                 G.tau <- grad(X, Y, V.tau, h, loss.out = TRUE, persistent = TRUE) | ||||
| 
 | ||||
|                 # Armijo condition. | ||||
|                 if (loss > loss.last + (rho1 * tau * loss.prime)) { | ||||
|                     tau.upper <- tau | ||||
|                     tau <- (tau.lower + tau.upper) / 2 | ||||
|                     next() | ||||
|                 } | ||||
| 
 | ||||
|                 V.prime.tau <- -0.5 * inv %*% A %*% (V + V.tau) | ||||
|                 loss.prime.tau <- sum(G * V.prime.tau) # Tr(grad(tau)^T \cdot Y^'(tau)) | ||||
| 
 | ||||
|                 # Wolfe condition. | ||||
|                 if (loss.prime.tau < rho2 * loss.prime) { | ||||
|                     tau.lower <- tau | ||||
|                     if (tau.upper == Inf) { | ||||
|                         tau <- 2 * tau.lower | ||||
|                     } else { | ||||
|                         tau <- (tau.lower + tau.upper) / 2 | ||||
|                     } | ||||
|                 } else { | ||||
|                     break() | ||||
|                 } | ||||
|             } | ||||
| 
 | ||||
|             # Compute error. | ||||
|             error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F") | ||||
| 
 | ||||
|             # Check break condition (epoch check to skip ignored gradient calc). | ||||
|             # Note: the devision by `sqrt(2 * k)` is included in `tol`. | ||||
|             if (error < tol | epoch >= epochs) { | ||||
|                 # take last step and stop optimization. | ||||
|                 V <- V.tau | ||||
|                 # Final call to the logger before stopping optimization | ||||
|                 if (is.function(logger)) { | ||||
|                     G <- G.tau | ||||
|                     logger(environment()) | ||||
|                 } | ||||
|                 break() | ||||
|             } | ||||
| 
 | ||||
|             # Perform the step and remember previous loss. | ||||
|             V <- V.tau | ||||
|             loss.last <- loss | ||||
|             G <- G.tau | ||||
| 
 | ||||
|             # Log after taking current step. | ||||
|             if (is.function(logger)) { | ||||
|                 logger(environment()) | ||||
|             } | ||||
| 
 | ||||
|         } | ||||
| 
 | ||||
|         # Check if current attempt improved previous ones | ||||
|         if (loss < loss.best) { | ||||
|             loss.best <- loss | ||||
|             V.best <- V | ||||
|         } | ||||
| 
 | ||||
|     } | ||||
| 
 | ||||
|     return(list( | ||||
|         loss = loss.best, | ||||
|         V = V.best, | ||||
|         B = null(V.best), | ||||
|         h = h | ||||
|     )) | ||||
| } | ||||
							
								
								
									
										129
									
								
								CVE_C/R/cve_sgd.R
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										129
									
								
								CVE_C/R/cve_sgd.R
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,129 @@ | ||||
| #' Simple implementation of the CVE method. 'Simple' means that this method is | ||||
| #' a classic GD method unsing no further tricks. | ||||
| #' | ||||
| #' @keywords internal | ||||
| #' @export | ||||
| cve_sgd <- function(X, Y, k, | ||||
|                     nObs = sqrt(nrow(X)), | ||||
|                     h = NULL, | ||||
|                     tau = 0.01, | ||||
|                     tol = 1e-3, | ||||
|                     epochs = 50L, | ||||
|                     batch.size = 16L, | ||||
|                     attempts = 10L, | ||||
|                     logger = NULL | ||||
| ) { | ||||
|     # Set `grad` functions environment to enable if to find this environments | ||||
|     # local variabels, needed to enable the manipulation of this local variables | ||||
|     # from within `grad`. | ||||
|     environment(grad) <- environment() | ||||
| 
 | ||||
|     # Get dimensions. | ||||
|     n <- nrow(X) # Number of samples. | ||||
|     p <- ncol(X) # Data dimensions | ||||
|     q <- p - k   # Complement dimension of the SDR space. | ||||
| 
 | ||||
|     # Save initial learning rate `tau`. | ||||
|     tau.init <- tau | ||||
|     # Addapt tolearance for break condition. | ||||
|     tol <- sqrt(2 * q) * tol | ||||
| 
 | ||||
|     # Estaimate bandwidth if not given. | ||||
|     if (missing(h) || !is.numeric(h)) { | ||||
|         h <- estimate.bandwidth(X, k, nObs) | ||||
|     } | ||||
| 
 | ||||
|     # Compute persistent data. | ||||
|     # Compute lookup indexes for symmetrie, lower/upper | ||||
|     # triangular parts and vectorization. | ||||
|     pair.index <- elem.pairs(seq(n)) | ||||
|     i <- pair.index[1, ] # `i` indices of `(i, j)` pairs | ||||
|     j <- pair.index[2, ] # `j` indices of `(i, j)` pairs | ||||
|     # Index of vectorized matrix, for lower and upper triangular part. | ||||
|     lower <- ((i - 1) * n) + j | ||||
|     upper <- ((j - 1) * n) + i | ||||
| 
 | ||||
|     # Create all pairewise differences of rows of `X`. | ||||
|     X_diff <- X[i, , drop = F] - X[j, , drop = F] | ||||
|     # Identity matrix. | ||||
|     I_p <- diag(1, p) | ||||
|     # Init a list of data indices (shuffled for batching). | ||||
|     indices <- seq(n) | ||||
| 
 | ||||
|     # Init tracking of current best (according multiple attempts). | ||||
|     V.best <- NULL | ||||
|     loss.best <- Inf | ||||
| 
 | ||||
|     # Start loop for multiple attempts. | ||||
|     for (attempt in 1:attempts) { | ||||
|         # Reset learning rate `tau`. | ||||
|         tau <- tau.init | ||||
| 
 | ||||
|         # Sample a `(p, q)` dimensional matrix from the stiefel manifold as | ||||
|         # optimization start value. | ||||
|         V <- rStiefl(p, q) | ||||
|         # Keep track of last `V` for computing error after an epoch. | ||||
|         V.last <- V | ||||
| 
 | ||||
|         if (is.function(logger)) { | ||||
|             loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE) | ||||
|             error <- NA | ||||
|             epoch <- 0 | ||||
|             logger(environment()) | ||||
|         } | ||||
| 
 | ||||
|         # Repeat `epochs` times | ||||
|         for (epoch in 1:epochs) { | ||||
|             # Shuffle batches | ||||
|             batch.shuffle <- sample(indices) | ||||
| 
 | ||||
|             # Make a step for each batch. | ||||
|             for (batch.start in seq(1, n, batch.size)) { | ||||
|                 # Select batch data indices. | ||||
|                 batch.end <- min(batch.start + batch.size - 1, length(batch.shuffle)) | ||||
|                 batch <- batch.shuffle[batch.start:batch.end] | ||||
| 
 | ||||
|                 # Compute batch gradient. | ||||
|                 loss <- NULL | ||||
|                 G <- grad(X[batch, ], Y[batch], V, h, loss.out = TRUE) | ||||
| 
 | ||||
|                 # Cayley transform matrix. | ||||
|                 A <- (G %*% t(V)) - (V %*% t(G)) | ||||
| 
 | ||||
|                 # Apply learning rate `tau`. | ||||
|                 A.tau <- tau * A | ||||
|                 # Parallet transport (on Stiefl manifold) into direction of `G`. | ||||
|                 V <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V) | ||||
|             } | ||||
|             # And the error for the history. | ||||
|             error <- norm(V.last %*% t(V.last) - V %*% t(V), type = "F") | ||||
|             V.last <- V | ||||
| 
 | ||||
|             if (is.function(logger)) { | ||||
|                 # Compute loss at end of epoch for logging. | ||||
|                 loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE) | ||||
|                 logger(environment()) | ||||
|             } | ||||
| 
 | ||||
|             # Check break condition. | ||||
|             if (error < tol) { | ||||
|                 break() | ||||
|             } | ||||
|         } | ||||
|         # Compute actual loss after finishing for comparing multiple attempts. | ||||
|         loss <- grad(X, Y, V, h, loss.only = TRUE, persistent = TRUE) | ||||
| 
 | ||||
|         # After each attempt, check if last attempt reached a better result. | ||||
|         if (loss < loss.best) { | ||||
|             loss.best <- loss | ||||
|             V.best <- V | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     return(list( | ||||
|         loss = loss.best, | ||||
|         V = V.best, | ||||
|         B = null(V.best), | ||||
|         h = h | ||||
|     )) | ||||
| } | ||||
							
								
								
									
										141
									
								
								CVE_C/R/cve_simple.R
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										141
									
								
								CVE_C/R/cve_simple.R
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,141 @@ | ||||
| #' Simple implementation of the CVE method. 'Simple' means that this method is | ||||
| #' a classic GD method unsing no further tricks. | ||||
| #' | ||||
| #' @keywords internal | ||||
| #' @export | ||||
| cve_simple <- function(X, Y, k, | ||||
|                        nObs = sqrt(nrow(X)), | ||||
|                        h = NULL, | ||||
|                        tau = 1.0, | ||||
|                        tol = 1e-3, | ||||
|                        slack = 0, | ||||
|                        epochs = 50L, | ||||
|                        attempts = 10L, | ||||
|                        logger = NULL | ||||
| ) { | ||||
|     # Set `grad` functions environment to enable if to find this environments | ||||
|     # local variabels, needed to enable the manipulation of this local variables | ||||
|     # from within `grad`. | ||||
|     environment(grad) <- environment() | ||||
| 
 | ||||
|     # Get dimensions. | ||||
|     n <- nrow(X) # Number of samples. | ||||
|     p <- ncol(X) # Data dimensions | ||||
|     q <- p - k   # Complement dimension of the SDR space. | ||||
| 
 | ||||
|     # Save initial learning rate `tau`. | ||||
|     tau.init <- tau | ||||
|     # Addapt tolearance for break condition. | ||||
|     tol <- sqrt(2 * q) * tol | ||||
| 
 | ||||
|     # Estaimate bandwidth if not given. | ||||
|     if (missing(h) || !is.numeric(h)) { | ||||
|         h <- estimate.bandwidth(X, k, nObs) | ||||
|     } | ||||
| 
 | ||||
|     # Compute persistent data. | ||||
|     # Compute lookup indexes for symmetrie, lower/upper | ||||
|     # triangular parts and vectorization. | ||||
|     pair.index <- elem.pairs(seq(n)) | ||||
|     i <- pair.index[1, ] # `i` indices of `(i, j)` pairs | ||||
|     j <- pair.index[2, ] # `j` indices of `(i, j)` pairs | ||||
|     # Index of vectorized matrix, for lower and upper triangular part. | ||||
|     lower <- ((i - 1) * n) + j | ||||
|     upper <- ((j - 1) * n) + i | ||||
| 
 | ||||
|     # Create all pairewise differences of rows of `X`. | ||||
|     X_diff <- X[i, , drop = F] - X[j, , drop = F] | ||||
|     # Identity matrix. | ||||
|     I_p <- diag(1, p) | ||||
| 
 | ||||
|     # Init tracking of current best (according multiple attempts). | ||||
|     V.best <- NULL | ||||
|     loss.best <- Inf | ||||
| 
 | ||||
|     # Start loop for multiple attempts. | ||||
|     for (attempt in 1:attempts) { | ||||
|         # Reset learning rate `tau`. | ||||
|         tau <- tau.init | ||||
| 
 | ||||
|         # Sample a `(p, q)` dimensional matrix from the stiefel manifold as | ||||
|         # optimization start value. | ||||
|         V <- rStiefl(p, q) | ||||
| 
 | ||||
|         # Initial loss and gradient. | ||||
|         loss <- Inf | ||||
|         G <- grad(X, Y, V, h, loss.out = TRUE, persistent = TRUE) | ||||
|         # Set last loss (aka, loss after applying the step). | ||||
|         loss.last <- loss | ||||
| 
 | ||||
|         # Cayley transform matrix `A` | ||||
|         A <- (G %*% t(V)) - (V %*% t(G)) | ||||
| 
 | ||||
|         # Call logger with initial values before starting optimization. | ||||
|         if (is.function(logger)) { | ||||
|             epoch <- 0 # Set epoch count to 0 (only relevant for logging). | ||||
|             error <- NA | ||||
|             logger(environment()) | ||||
|         } | ||||
| 
 | ||||
|         ## Start optimization loop. | ||||
|         for (epoch in 1:epochs) { | ||||
|             # Apply learning rate `tau`. | ||||
|             A.tau <- tau * A | ||||
|             # Parallet transport (on Stiefl manifold) into direction of `G`. | ||||
|             V.tau <- solve(I_p + A.tau) %*% ((I_p - A.tau) %*% V) | ||||
| 
 | ||||
|             # Loss at position after a step. | ||||
|             loss <- grad(X, Y, V.tau, h, loss.only = TRUE, persistent = TRUE) | ||||
| 
 | ||||
|             # Check if step is appropriate, iff not reduce learning rate. | ||||
|             if ((loss - loss.last) > slack * loss.last) { | ||||
|                 tau <- tau / 2 | ||||
|                 next() # Keep position and try with smaller `tau`. | ||||
|             } | ||||
| 
 | ||||
|             # Compute error. | ||||
|             error <- norm(V %*% t(V) - V.tau %*% t(V.tau), type = "F") | ||||
| 
 | ||||
|             # Check break condition (epoch check to skip ignored gradient calc). | ||||
|             # Note: the devision by `sqrt(2 * k)` is included in `tol`. | ||||
|             if (error < tol || epoch >= epochs) { | ||||
|                 # take last step and stop optimization. | ||||
|                 V <- V.tau | ||||
|                 # Call logger last time befor stoping. | ||||
|                 if (is.function(logger)) { | ||||
|                     logger(environment()) | ||||
|                 } | ||||
|                 break() | ||||
|             } | ||||
| 
 | ||||
|             # Perform the step and remember previous loss. | ||||
|             V <- V.tau | ||||
|             loss.last <- loss | ||||
| 
 | ||||
|             # Call logger after taking a step. | ||||
|             if (is.function(logger)) { | ||||
|                 logger(environment()) | ||||
|             } | ||||
| 
 | ||||
|             # Compute gradient at new position. | ||||
|             G <- grad(X, Y, V, h, persistent = TRUE) | ||||
| 
 | ||||
|             # Cayley transform matrix `A` | ||||
|             A <- (G %*% t(V)) - (V %*% t(G)) | ||||
|         } | ||||
| 
 | ||||
|         # Check if current attempt improved previous ones | ||||
|         if (loss < loss.best) { | ||||
|             loss.best <- loss | ||||
|             V.best <- V | ||||
|         } | ||||
| 
 | ||||
|     } | ||||
| 
 | ||||
|     return(list( | ||||
|         loss = loss.best, | ||||
|         V = V.best, | ||||
|         B = null(V.best), | ||||
|         h = h | ||||
|     )) | ||||
| } | ||||
							
								
								
									
										109
									
								
								CVE_C/R/datasets.R
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										109
									
								
								CVE_C/R/datasets.R
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,109 @@ | ||||
| #' Generates test datasets. | ||||
| #' | ||||
| #' Provides sample datasets. There are 5 different datasets named | ||||
| #' M1, M2, M3, M4 and M5 describet in the paper references below. | ||||
| #' The general model is given by: | ||||
| #' \deqn{Y ~ g(B'X) + \epsilon} | ||||
| #' | ||||
| #' @param name One of \code{"M1"}, \code{"M2"}, \code{"M3"}, \code{"M4"} or \code{"M5"} | ||||
| #' @param n nr samples | ||||
| #' @param p Dim. of random variable \code{X}. | ||||
| #' @param p.mix Only for \code{"M4"}, see: below. | ||||
| #' @param lambda Only for \code{"M4"}, see: below. | ||||
| #' | ||||
| #' @return List with elements | ||||
| #' \itemize{ | ||||
| #'      \item{X}{data} | ||||
| #'      \item{Y}{response} | ||||
| #'      \item{B}{Used dim-reduction matrix} | ||||
| #'      \item{name}{Name of the dataset (name parameter)} | ||||
| #' } | ||||
| #' | ||||
| #' @section M1: | ||||
| #' The data follows \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} for a subspace | ||||
| #' dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points. | ||||
| #' The link function \eqn{g} is given as | ||||
| #' \deqn{g(x) = \frac{x_1}{0.5 + (x_2 + 1.5)^2} + 0.5\epsilon}{g(x) = x_1 / (0.5 + (x_2 + 1.5)^2) + 0.5 epsilon} | ||||
| #' @section M2: | ||||
| #' \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} with \eqn{k = 2} with a default of \eqn{n = 200} data points. | ||||
| #' The link function \eqn{g} is given as | ||||
| #' \deqn{g(x) = x_1 x_2^2 + 0.5\epsilon}{g(x) = x_1 x_2^2 + 0.5 epsilon} | ||||
| #' @section M3: | ||||
| #' TODO: | ||||
| #' @section M4: | ||||
| #' TODO: | ||||
| #' @section M5: | ||||
| #' TODO: | ||||
| #' | ||||
| #' @import stats | ||||
| #' @importFrom stats rnorm rbinom | ||||
| #' @export | ||||
| dataset <- function(name = "M1", n, B, p.mix = 0.3, lambda = 1.0) { | ||||
|     # validate parameters | ||||
|     stopifnot(name %in% c("M1", "M2", "M3", "M4", "M5")) | ||||
| 
 | ||||
|     # set default values if not supplied | ||||
|     if (missing(n)) { | ||||
|         n <- if (name %in% c("M1", "M2")) 200 else if (name != "M5") 100 else 42 | ||||
|     } | ||||
|     if (missing(B)) { | ||||
|         p <- 12 | ||||
|         if (name == "M1") { | ||||
|             B <- cbind( | ||||
|                 c( 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0), | ||||
|                 c( 1,-1, 1,-1, 1,-1, 0, 0, 0, 0, 0, 0) | ||||
|             ) / sqrt(6) | ||||
|         } else if (name == "M2") { | ||||
|             B <- cbind( | ||||
|                 c(c(1, 0), rep(0, 10)), | ||||
|                 c(c(0, 1), rep(0, 10)) | ||||
|             ) | ||||
|         } else { | ||||
|             B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, 6)), 12, 1) | ||||
|         } | ||||
|     } else { | ||||
|         p <- dim(B)[1] | ||||
|         # validate col. nr to match dataset `k = dim(B)[2]` | ||||
|         stopifnot( | ||||
|             name %in% c("M1", "M2") && dim(B)[2] == 2, | ||||
|             name %in% c("M3", "M4", "M5") && dim(B)[2] == 1 | ||||
|         ) | ||||
|     } | ||||
| 
 | ||||
|     # set link function `g` for model `Y ~ g(B'X) + epsilon` | ||||
|     if (name == "M1") { | ||||
|         g <- function(BX) { BX[1] / (0.5 + (BX[2] + 1.5)^2) } | ||||
|     } else if (name == "M2") { | ||||
|         g <- function(BX) { BX[1] * BX[2]^2 } | ||||
|     } else if (name %in% c("M3", "M4")) { | ||||
|         g <- function(BX) { cos(BX[1]) } | ||||
|     } else { # name == "M5" | ||||
|         g <- function(BX) { 2 * log(abs(BX[1]) + 1) } | ||||
|     } | ||||
| 
 | ||||
|     # compute X | ||||
|     if (name != "M4") { | ||||
|         # compute root of the covariance matrix according the dataset | ||||
|         if (name %in% c("M1", "M3")) { | ||||
|             # Variance-Covariance structure for `X ~ N_p(0, \Sigma)` with | ||||
|             # `\Sigma_{i, j} = 0.5^{|i - j|}`. | ||||
|             Sigma <- matrix(0.5^abs(kronecker(1:p, 1:p, '-')), p, p) | ||||
|             # decompose Sigma to Sigma.root^T Sigma.root = Sigma for usage in creation of `X` | ||||
|             Sigma.root <- chol(Sigma) | ||||
|         } else { # name %in% c("M2", "M5") | ||||
|             Sigma.root <- diag(rep(1, p)) # d-dim identity | ||||
|         } | ||||
|         # data `X` as multivariate random normal variable with | ||||
|         # variance matrix `Sigma`. | ||||
|         X <- replicate(p, rnorm(n, 0, 1)) %*% Sigma.root | ||||
|     } else { # name == "M4" | ||||
|         X <- t(replicate(100, rep((1 - 2 * rbinom(1, 1, p.mix)) * lambda, p) + rnorm(p, 0, 1))) | ||||
|     } | ||||
| 
 | ||||
|     # responce `y ~ g(B'X) + epsilon` with `epsilon ~ N(0, 1 / 2)` | ||||
|     Y <- apply(X, 1, function(X_i) { | ||||
|         g(t(B) %*% X_i) + rnorm(1, 0, 0.5) | ||||
|     }) | ||||
| 
 | ||||
|     return(list(X = X, Y = Y, B = B, name = name)) | ||||
| } | ||||
							
								
								
									
										27
									
								
								CVE_C/R/estimateBandwidth.R
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										27
									
								
								CVE_C/R/estimateBandwidth.R
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,27 @@ | ||||
| #' Estimated bandwidth for CVE. | ||||
| #' | ||||
| #' Estimates a propper bandwidth \code{h} according | ||||
| #' \deqn{% | ||||
| #' h = \chi_{p-q}^{-1}\left(\frac{nObs - 1}{n-1}\right)\frac{2 tr(\Sigma)}{p}}{% | ||||
| #' h = qchisq( (nObs - 1)/(n - 1), p - q ) 2 tr(Sigma) / p} | ||||
| #' | ||||
| #' @param X data matrix of dimension (n x p) with n data points X_i of dimension | ||||
| #'  q. Therefor each row represents a datapoint of dimension p. | ||||
| #' @param k Guess for rank(B). | ||||
| #' @param nObs Ether numeric of a function. If specified as numeric value | ||||
| #'  its used in the computation of the bandwidth directly. If its a function | ||||
| #'  `nObs` is evaluated as \code{nObs(nrow(x))}. The default behaviou if not | ||||
| #'  supplied at all is to use \code{nObs <- nrow(x)^0.5}. | ||||
| #' | ||||
| #' @seealso [\code{\link{qchisq}}] | ||||
| #' @export | ||||
| estimate.bandwidth <- function(X, k, nObs) { | ||||
|     n <- nrow(X) | ||||
|     p <- ncol(X) | ||||
| 
 | ||||
|     X_centered <- scale(X, center=TRUE, scale=FALSE) | ||||
|     Sigma <- (1 / n) * t(X_centered) %*% X_centered | ||||
| 
 | ||||
|     quantil <- qchisq((nObs - 1) / (n - 1), k) | ||||
|     return(2 * quantil * sum(diag(Sigma)) / p) | ||||
| } | ||||
							
								
								
									
										48
									
								
								CVE_C/R/gradient.R
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										48
									
								
								CVE_C/R/gradient.R
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,48 @@ | ||||
| #' Compute get gradient of `L(V)` given a dataset `X`. | ||||
| #' | ||||
| #' @param X Data matrix. | ||||
| #' @param Y Responce. | ||||
| #' @param V Position to compute the gradient at, aka point on Stiefl manifold. | ||||
| #' @param h Bandwidth | ||||
| #' @param loss.out Iff \code{TRUE} loss will be written to parent environment. | ||||
| #' @param loss.only Boolean to only compute the loss, of \code{TRUE} a single | ||||
| #'  value loss is returned and \code{envir} is ignored. | ||||
| #' @param persistent Determines if data indices and dependent calculations shall | ||||
| #'  be reused from the parent environment. ATTENTION: Do NOT set this flag, only | ||||
| #'  intended for internal usage by carefully aligned functions! | ||||
| #' @keywords internal | ||||
| #' @export | ||||
| grad <- function(X, Y, V, h, | ||||
|                  loss.out = FALSE, | ||||
|                  loss.only = FALSE, | ||||
|                  persistent = FALSE) { | ||||
|     # Get number of samples and dimension. | ||||
|     n <- nrow(X) | ||||
|     p <- ncol(X) | ||||
| 
 | ||||
|     if (!persistent) { | ||||
|         # Compute lookup indexes for symmetrie, lower/upper | ||||
|         # triangular parts and vectorization. | ||||
|         pair.index <- elem.pairs(seq(n)) | ||||
|         i <- pair.index[1, ] # `i` indices of `(i, j)` pairs | ||||
|         j <- pair.index[2, ] # `j` indices of `(i, j)` pairs | ||||
|         # Index of vectorized matrix, for lower and upper triangular part. | ||||
|         lower <- ((i - 1) * n) + j | ||||
|         upper <- ((j - 1) * n) + i | ||||
| 
 | ||||
|         # Create all pairewise differences of rows of `X`. | ||||
|         X_diff <- X[i, , drop = F] - X[j, , drop = F] | ||||
|     } | ||||
| 
 | ||||
|     out <- .Call("grad_c", PACKAGE = "CVE", | ||||
|                  X, X_diff, as.double(Y), V, as.double(h)); | ||||
| 
 | ||||
|     if (loss.only) { | ||||
|         return(out$loss) | ||||
|     } | ||||
|     if (loss.out) { | ||||
|         loss <<- out$loss | ||||
|     } | ||||
| 
 | ||||
|     return(out$G) | ||||
| } | ||||
							
								
								
									
										43
									
								
								CVE_C/R/gridSearch.R
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										43
									
								
								CVE_C/R/gridSearch.R
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,43 @@ | ||||
| 
 | ||||
| #' Performs a grid search for parameters over a parameter grid. | ||||
| #' @examples | ||||
| #' args <- list( | ||||
| #'     h = c(0.05, 0.1, 0.2), | ||||
| #'     method = c("simple", "sgd"), | ||||
| #'     tau = c(0.5, 0.1, 0.01) | ||||
| #' ) | ||||
| #' cve.grid.search(args) | ||||
| #' @export | ||||
| cve.grid.search <- function(X, Y, k, args) { | ||||
| 
 | ||||
|     args$stringsAsFactors = FALSE | ||||
|     args$KEEP.OUT.ATTRS = FALSE | ||||
|     grid <- do.call(expand.grid, args) | ||||
|     grid.length <- length(grid[[1]]) | ||||
| 
 | ||||
|     print(grid) | ||||
| 
 | ||||
|     for (i in 1:grid.length) { | ||||
|         arguments <- as.list(grid[i, ]) | ||||
|         # Set required arguments | ||||
|         arguments$X <- X | ||||
|         arguments$Y <- Y | ||||
|         arguments$k <- k | ||||
|         # print(arguments) | ||||
|         dr <- do.call(cve.call, arguments) | ||||
|         print(dr$loss) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| # ds <- dataset() | ||||
| # X <- ds$X | ||||
| # Y <- ds$Y | ||||
| # (k <- ncol(ds$B)) | ||||
| # args <- list( | ||||
| #     h = c(0.05, 0.1, 0.2), | ||||
| #     method = c("simple", "sgd"), | ||||
| #     tau = c(0.5, 0.1, 0.01), | ||||
| #     attempts = c(1L) | ||||
| # ) | ||||
| 
 | ||||
| # cve.grid.search(X, Y, k, args) | ||||
							
								
								
									
										40
									
								
								CVE_C/R/util.R
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										40
									
								
								CVE_C/R/util.R
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,40 @@ | ||||
| #' Samples uniform from the Stiefel Manifold | ||||
| #' | ||||
| #' @param p row dim. | ||||
| #' @param q col dim. | ||||
| #' @return `(p, q)` semi-orthogonal matrix | ||||
| #' @examples | ||||
| #'  V <- rStiefel(6, 4) | ||||
| #' @export | ||||
| rStiefl <- function(p, q) { | ||||
|     return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q)))) | ||||
| } | ||||
| 
 | ||||
| #' Null space basis of given matrix `V` | ||||
| #' | ||||
| #' @param V `(p, q)` matrix | ||||
| #' @return Semi-orthogonal `(p, p - q)` matrix spaning the null space of `V`. | ||||
| #' @keywords internal | ||||
| #' @export | ||||
| null <- function(V) { | ||||
|     tmp <- qr(V) | ||||
|     set <- if(tmp$rank == 0L) seq_len(ncol(V)) else -seq_len(tmp$rank) | ||||
|     return(qr.Q(tmp, complete=TRUE)[, set, drop=FALSE]) | ||||
| } | ||||
| 
 | ||||
| #' Creates a (numeric) matrix where each column contains | ||||
| #' an element to element matching. | ||||
| #' @param elements numeric vector of elements to match | ||||
| #' @return matrix of size `(2, n * (n - 1) / 2)` for a argument of lenght `n`. | ||||
| #' @keywords internal | ||||
| #' @examples | ||||
| #'  elem.pairs(seq.int(2, 5)) | ||||
| #' @export | ||||
| elem.pairs <- function(elements) { | ||||
|     # Number of elements to match. | ||||
|     n <- length(elements) | ||||
|     # Create all combinations. | ||||
|     pairs <- rbind(rep(elements, each=n), rep(elements, n)) | ||||
|     # Select unique combinations without self interaction. | ||||
|     return(pairs[, pairs[1, ] < pairs[2, ]]) | ||||
| } | ||||
							
								
								
									
										
											BIN
										
									
								
								CVE_C/inst/doc/CVE_paper.pdf
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								CVE_C/inst/doc/CVE_paper.pdf
									
									
									
									
									
										Executable file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										20
									
								
								CVE_C/man/CVE-package.Rd
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										20
									
								
								CVE_C/man/CVE-package.Rd
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,20 @@ | ||||
| % Generated by roxygen2: do not edit by hand | ||||
| % Please edit documentation in R/CVE.R | ||||
| \docType{package} | ||||
| \name{CVE-package} | ||||
| \alias{CVE} | ||||
| \alias{CVE-package} | ||||
| \title{Conditional Variance Estimator (CVE)} | ||||
| \description{ | ||||
| Conditional Variance Estimator for Sufficient Dimension | ||||
|  Reduction | ||||
| } | ||||
| \details{ | ||||
| TODO: And some details | ||||
| } | ||||
| \references{ | ||||
| Fertl Likas, Bura Efstathia. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019 | ||||
| } | ||||
| \author{ | ||||
| Loki | ||||
| } | ||||
							
								
								
									
										71
									
								
								CVE_C/man/cve.Rd
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										71
									
								
								CVE_C/man/cve.Rd
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,71 @@ | ||||
| % Generated by roxygen2: do not edit by hand | ||||
| % Please edit documentation in R/CVE.R | ||||
| \name{cve} | ||||
| \alias{cve} | ||||
| \alias{cve.call} | ||||
| \title{Implementation of the CVE method.} | ||||
| \usage{ | ||||
| cve(formula, data, method = "simple", max.dim = 10, ...) | ||||
| 
 | ||||
| cve.call(X, Y, method = "simple", nObs = nrow(X)^0.5, min.dim = 1, | ||||
|   max.dim = 10, k, ...) | ||||
| } | ||||
| \arguments{ | ||||
| \item{formula}{Formel for the regression model defining `X`, `Y`. | ||||
| See: \code{\link{formula}}.} | ||||
| 
 | ||||
| \item{data}{data.frame holding data for formula.} | ||||
| 
 | ||||
| \item{method}{The different only differe in the used optimization. | ||||
|  All of them are Gradient based optimization on a Stiefel manifold. | ||||
| \itemize{ | ||||
|      \item "simple" Simple reduction of stepsize. | ||||
|      \item "sgd" stocastic gradient decent. | ||||
|      \item TODO: further | ||||
| }} | ||||
| 
 | ||||
| \item{...}{Further parameters depending on the used method.} | ||||
| 
 | ||||
| \item{X}{Data} | ||||
| 
 | ||||
| \item{Y}{Responces} | ||||
| 
 | ||||
| \item{nObs}{as describet in the Paper.} | ||||
| 
 | ||||
| \item{k}{guess for SDR dimension.} | ||||
| 
 | ||||
| \item{nObs}{Like in the paper.} | ||||
| 
 | ||||
| \item{...}{Method specific parameters.} | ||||
| } | ||||
| \description{ | ||||
| Conditional Variance Estimator (CVE) is a novel sufficient dimension | ||||
| reduction (SDR) method assuming a model | ||||
| \deqn{Y \sim g(B'X) + \epsilon}{Y ~ g(B'X) + epsilon} | ||||
| where B'X is a lower dimensional projection of the predictors. | ||||
| } | ||||
| \examples{ | ||||
| library(CVE) | ||||
| 
 | ||||
| # sample dataset | ||||
| ds <- dataset("M5") | ||||
| 
 | ||||
| # call ´cve´ with default method (aka "simple") | ||||
| dr.simple <- cve(ds$Y ~ ds$X, k = ncol(ds$B)) | ||||
| # plot optimization history (loss via iteration) | ||||
| plot(dr.simple, main = "CVE M5 simple") | ||||
| 
 | ||||
| # call ´cve´ with method "linesearch" using ´data.frame´ as data. | ||||
| data <- data.frame(Y = ds$Y, X = ds$X) | ||||
| # Note: ´Y, X´ are NOT defined, they are extracted from ´data´. | ||||
| dr.linesearch <- cve(Y ~ ., data, method = "linesearch", k = ncol(ds$B)) | ||||
| plot(dr.linesearch, main = "CVE M5 linesearch") | ||||
| 
 | ||||
| } | ||||
| \references{ | ||||
| Fertl L., Bura E. Conditional Variance Estimation for Sufficient Dimension Reduction, 2019 | ||||
| } | ||||
| \seealso{ | ||||
| \code{\link{formula}}. For a complete parameters list (dependent on | ||||
|  the method) see \code{\link{cve_simple}}, \code{\link{cve_sgd}} | ||||
| } | ||||
							
								
								
									
										19
									
								
								CVE_C/man/cve.grid.search.Rd
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										19
									
								
								CVE_C/man/cve.grid.search.Rd
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,19 @@ | ||||
| % Generated by roxygen2: do not edit by hand | ||||
| % Please edit documentation in R/gridSearch.R | ||||
| \name{cve.grid.search} | ||||
| \alias{cve.grid.search} | ||||
| \title{Performs a grid search for parameters over a parameter grid.} | ||||
| \usage{ | ||||
| cve.grid.search(X, Y, k, args) | ||||
| } | ||||
| \description{ | ||||
| Performs a grid search for parameters over a parameter grid. | ||||
| } | ||||
| \examples{ | ||||
| args <- list( | ||||
|     h = c(0.05, 0.1, 0.2), | ||||
|     method = c("simple", "sgd"), | ||||
|     tau = c(0.5, 0.1, 0.01) | ||||
| ) | ||||
| cve.grid.search(args) | ||||
| } | ||||
							
								
								
									
										16
									
								
								CVE_C/man/cve_linesearch.Rd
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										16
									
								
								CVE_C/man/cve_linesearch.Rd
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,16 @@ | ||||
| % Generated by roxygen2: do not edit by hand | ||||
| % Please edit documentation in R/cve_linesearch.R | ||||
| \name{cve_linesearch} | ||||
| \alias{cve_linesearch} | ||||
| \title{Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe | ||||
| conditions.} | ||||
| \usage{ | ||||
| cve_linesearch(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 1, | ||||
|   tol = 0.001, rho1 = 0.1, rho2 = 0.9, slack = 0, epochs = 50L, | ||||
|   attempts = 10L, max.linesearch.iter = 10L, logger = NULL) | ||||
| } | ||||
| \description{ | ||||
| Implementation of the CVE method using curvilinear linesearch with Armijo-Wolfe | ||||
| conditions. | ||||
| } | ||||
| \keyword{internal} | ||||
							
								
								
									
										16
									
								
								CVE_C/man/cve_sgd.Rd
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										16
									
								
								CVE_C/man/cve_sgd.Rd
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,16 @@ | ||||
| % Generated by roxygen2: do not edit by hand | ||||
| % Please edit documentation in R/cve_sgd.R | ||||
| \name{cve_sgd} | ||||
| \alias{cve_sgd} | ||||
| \title{Simple implementation of the CVE method. 'Simple' means that this method is | ||||
| a classic GD method unsing no further tricks.} | ||||
| \usage{ | ||||
| cve_sgd(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 0.01, | ||||
|   tol = 0.001, epochs = 50L, batch.size = 16L, attempts = 10L, | ||||
|   logger = NULL) | ||||
| } | ||||
| \description{ | ||||
| Simple implementation of the CVE method. 'Simple' means that this method is | ||||
| a classic GD method unsing no further tricks. | ||||
| } | ||||
| \keyword{internal} | ||||
							
								
								
									
										16
									
								
								CVE_C/man/cve_simple.Rd
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										16
									
								
								CVE_C/man/cve_simple.Rd
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,16 @@ | ||||
| % Generated by roxygen2: do not edit by hand | ||||
| % Please edit documentation in R/cve_simple.R | ||||
| \name{cve_simple} | ||||
| \alias{cve_simple} | ||||
| \title{Simple implementation of the CVE method. 'Simple' means that this method is | ||||
| a classic GD method unsing no further tricks.} | ||||
| \usage{ | ||||
| cve_simple(X, Y, k, nObs = sqrt(nrow(X)), h = NULL, tau = 1, | ||||
|   tol = 0.001, slack = 0, epochs = 50L, attempts = 10L, | ||||
|   logger = NULL) | ||||
| } | ||||
| \description{ | ||||
| Simple implementation of the CVE method. 'Simple' means that this method is | ||||
| a classic GD method unsing no further tricks. | ||||
| } | ||||
| \keyword{internal} | ||||
							
								
								
									
										64
									
								
								CVE_C/man/dataset.Rd
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										64
									
								
								CVE_C/man/dataset.Rd
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,64 @@ | ||||
| % Generated by roxygen2: do not edit by hand | ||||
| % Please edit documentation in R/datasets.R | ||||
| \name{dataset} | ||||
| \alias{dataset} | ||||
| \title{Generates test datasets.} | ||||
| \usage{ | ||||
| dataset(name = "M1", n, B, p.mix = 0.3, lambda = 1) | ||||
| } | ||||
| \arguments{ | ||||
| \item{name}{One of \code{"M1"}, \code{"M2"}, \code{"M3"}, \code{"M4"} or \code{"M5"}} | ||||
| 
 | ||||
| \item{n}{nr samples} | ||||
| 
 | ||||
| \item{p.mix}{Only for \code{"M4"}, see: below.} | ||||
| 
 | ||||
| \item{lambda}{Only for \code{"M4"}, see: below.} | ||||
| 
 | ||||
| \item{p}{Dim. of random variable \code{X}.} | ||||
| } | ||||
| \value{ | ||||
| List with elements | ||||
| \itemize{ | ||||
|      \item{X}{data} | ||||
|      \item{Y}{response} | ||||
|      \item{B}{Used dim-reduction matrix} | ||||
|      \item{name}{Name of the dataset (name parameter)} | ||||
| } | ||||
| } | ||||
| \description{ | ||||
| Provides sample datasets. There are 5 different datasets named | ||||
| M1, M2, M3, M4 and M5 describet in the paper references below. | ||||
| The general model is given by: | ||||
| \deqn{Y ~ g(B'X) + \epsilon} | ||||
| } | ||||
| \section{M1}{ | ||||
| 
 | ||||
| The data follows \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} for a subspace | ||||
| dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points. | ||||
| The link function \eqn{g} is given as | ||||
| \deqn{g(x) = \frac{x_1}{0.5 + (x_2 + 1.5)^2} + 0.5\epsilon}{g(x) = x_1 / (0.5 + (x_2 + 1.5)^2) + 0.5 epsilon} | ||||
| } | ||||
| 
 | ||||
| \section{M2}{ | ||||
| 
 | ||||
| \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, Sigma)} with \eqn{k = 2} with a default of \eqn{n = 200} data points. | ||||
| The link function \eqn{g} is given as | ||||
| \deqn{g(x) = x_1 x_2^2 + 0.5\epsilon}{g(x) = x_1 x_2^2 + 0.5 epsilon} | ||||
| } | ||||
| 
 | ||||
| \section{M3}{ | ||||
| 
 | ||||
| TODO: | ||||
| } | ||||
| 
 | ||||
| \section{M4}{ | ||||
| 
 | ||||
| TODO: | ||||
| } | ||||
| 
 | ||||
| \section{M5}{ | ||||
| 
 | ||||
| TODO: | ||||
| } | ||||
| 
 | ||||
							
								
								
									
										23
									
								
								CVE_C/man/elem.pairs.Rd
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										23
									
								
								CVE_C/man/elem.pairs.Rd
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,23 @@ | ||||
| % Generated by roxygen2: do not edit by hand | ||||
| % Please edit documentation in R/util.R | ||||
| \name{elem.pairs} | ||||
| \alias{elem.pairs} | ||||
| \title{Creates a (numeric) matrix where each column contains | ||||
| an element to element matching.} | ||||
| \usage{ | ||||
| elem.pairs(elements) | ||||
| } | ||||
| \arguments{ | ||||
| \item{elements}{numeric vector of elements to match} | ||||
| } | ||||
| \value{ | ||||
| matrix of size `(2, n * (n - 1) / 2)` for a argument of lenght `n`. | ||||
| } | ||||
| \description{ | ||||
| Creates a (numeric) matrix where each column contains | ||||
| an element to element matching. | ||||
| } | ||||
| \examples{ | ||||
|  elem.pairs(seq.int(2, 5)) | ||||
| } | ||||
| \keyword{internal} | ||||
							
								
								
									
										28
									
								
								CVE_C/man/estimate.bandwidth.Rd
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										28
									
								
								CVE_C/man/estimate.bandwidth.Rd
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,28 @@ | ||||
| % Generated by roxygen2: do not edit by hand | ||||
| % Please edit documentation in R/estimateBandwidth.R | ||||
| \name{estimate.bandwidth} | ||||
| \alias{estimate.bandwidth} | ||||
| \title{Estimated bandwidth for CVE.} | ||||
| \usage{ | ||||
| estimate.bandwidth(X, k, nObs) | ||||
| } | ||||
| \arguments{ | ||||
| \item{X}{data matrix of dimension (n x p) with n data points X_i of dimension | ||||
| q. Therefor each row represents a datapoint of dimension p.} | ||||
| 
 | ||||
| \item{k}{Guess for rank(B).} | ||||
| 
 | ||||
| \item{nObs}{Ether numeric of a function. If specified as numeric value | ||||
| its used in the computation of the bandwidth directly. If its a function | ||||
| `nObs` is evaluated as \code{nObs(nrow(x))}. The default behaviou if not | ||||
| supplied at all is to use \code{nObs <- nrow(x)^0.5}.} | ||||
| } | ||||
| \description{ | ||||
| Estimates a propper bandwidth \code{h} according | ||||
| \deqn{% | ||||
| h = \chi_{p-q}^{-1}\left(\frac{nObs - 1}{n-1}\right)\frac{2 tr(\Sigma)}{p}}{% | ||||
| h = qchisq( (nObs - 1)/(n - 1), p - q ) 2 tr(Sigma) / p} | ||||
| } | ||||
| \seealso{ | ||||
| [\code{\link{qchisq}}] | ||||
| } | ||||
							
								
								
									
										31
									
								
								CVE_C/man/grad.Rd
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										31
									
								
								CVE_C/man/grad.Rd
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,31 @@ | ||||
| % Generated by roxygen2: do not edit by hand | ||||
| % Please edit documentation in R/gradient.R | ||||
| \name{grad} | ||||
| \alias{grad} | ||||
| \title{Compute get gradient of `L(V)` given a dataset `X`.} | ||||
| \usage{ | ||||
| grad(X, Y, V, h, loss.out = FALSE, loss.only = FALSE, | ||||
|   persistent = FALSE) | ||||
| } | ||||
| \arguments{ | ||||
| \item{X}{Data matrix.} | ||||
| 
 | ||||
| \item{Y}{Responce.} | ||||
| 
 | ||||
| \item{V}{Position to compute the gradient at, aka point on Stiefl manifold.} | ||||
| 
 | ||||
| \item{h}{Bandwidth} | ||||
| 
 | ||||
| \item{loss.out}{Iff \code{TRUE} loss will be written to parent environment.} | ||||
| 
 | ||||
| \item{loss.only}{Boolean to only compute the loss, of \code{TRUE} a single | ||||
| value loss is returned and \code{envir} is ignored.} | ||||
| 
 | ||||
| \item{persistent}{Determines if data indices and dependent calculations shall | ||||
| be reused from the parent environment. ATTENTION: Do NOT set this flag, only | ||||
| intended for internal usage by carefully aligned functions!} | ||||
| } | ||||
| \description{ | ||||
| Compute get gradient of `L(V)` given a dataset `X`. | ||||
| } | ||||
| \keyword{internal} | ||||
							
								
								
									
										18
									
								
								CVE_C/man/null.Rd
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								CVE_C/man/null.Rd
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,18 @@ | ||||
| % Generated by roxygen2: do not edit by hand | ||||
| % Please edit documentation in R/util.R | ||||
| \name{null} | ||||
| \alias{null} | ||||
| \title{Null space basis of given matrix `V`} | ||||
| \usage{ | ||||
| null(V) | ||||
| } | ||||
| \arguments{ | ||||
| \item{V}{`(p, q)` matrix} | ||||
| } | ||||
| \value{ | ||||
| Semi-orthogonal `(p, p - q)` matrix spaning the null space of `V`. | ||||
| } | ||||
| \description{ | ||||
| Null space basis of given matrix `V` | ||||
| } | ||||
| \keyword{internal} | ||||
							
								
								
									
										28
									
								
								CVE_C/man/plot.cve.Rd
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										28
									
								
								CVE_C/man/plot.cve.Rd
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,28 @@ | ||||
| % Generated by roxygen2: do not edit by hand | ||||
| % Please edit documentation in R/CVE.R | ||||
| \name{plot.cve} | ||||
| \alias{plot.cve} | ||||
| \title{Ploting helper for objects of class \code{cve}.} | ||||
| \usage{ | ||||
| ## S3 method for class 'cve' | ||||
| plot(x, content = "history", ...) | ||||
| } | ||||
| \arguments{ | ||||
| \item{x}{Object of class \code{cve} (result of [cve()]).} | ||||
| 
 | ||||
| \item{...}{Pass through parameters to [plot()] and [lines()]} | ||||
| 
 | ||||
| \item{content}{Specifies what to plot: | ||||
| \itemize{ | ||||
|      \item "history" Plots the loss history from stiefel optimization | ||||
|          (default). | ||||
|      \item ... TODO: add (if there are any) | ||||
| }} | ||||
| } | ||||
| \description{ | ||||
| Ploting helper for objects of class \code{cve}. | ||||
| } | ||||
| \seealso{ | ||||
| see \code{\link{par}} for graphical parameters to pass through | ||||
|      as well as \code{\link{plot}} for standard plot utility. | ||||
| } | ||||
							
								
								
									
										22
									
								
								CVE_C/man/rStiefl.Rd
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										22
									
								
								CVE_C/man/rStiefl.Rd
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,22 @@ | ||||
| % Generated by roxygen2: do not edit by hand | ||||
| % Please edit documentation in R/util.R | ||||
| \name{rStiefl} | ||||
| \alias{rStiefl} | ||||
| \title{Samples uniform from the Stiefel Manifold} | ||||
| \usage{ | ||||
| rStiefl(p, q) | ||||
| } | ||||
| \arguments{ | ||||
| \item{p}{row dim.} | ||||
| 
 | ||||
| \item{q}{col dim.} | ||||
| } | ||||
| \value{ | ||||
| `(p, q)` semi-orthogonal matrix | ||||
| } | ||||
| \description{ | ||||
| Samples uniform from the Stiefel Manifold | ||||
| } | ||||
| \examples{ | ||||
|  V <- rStiefel(6, 4) | ||||
| } | ||||
							
								
								
									
										14
									
								
								CVE_C/man/summary.cve.Rd
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								CVE_C/man/summary.cve.Rd
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,14 @@ | ||||
| % Generated by roxygen2: do not edit by hand | ||||
| % Please edit documentation in R/CVE.R | ||||
| \name{summary.cve} | ||||
| \alias{summary.cve} | ||||
| \title{Prints a summary of a \code{cve} result.} | ||||
| \usage{ | ||||
| \method{summary}{cve}(object, ...) | ||||
| } | ||||
| \arguments{ | ||||
| \item{object}{Instance of 'cve' as return of \code{cve}.} | ||||
| } | ||||
| \description{ | ||||
| Prints a summary of a \code{cve} result. | ||||
| } | ||||
							
								
								
									
										14
									
								
								CVE_C/src/Makevars
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								CVE_C/src/Makevars
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,14 @@ | ||||
| 
 | ||||
| ## With R 3.1.0 or later, you can uncomment the following line to tell R to  | ||||
| ## enable compilation with C++11 (where available) | ||||
| ## | ||||
| ## Also, OpenMP support in Armadillo prefers C++11 support. However, for wider | ||||
| ## availability of the package we do not yet enforce this here.  It is however | ||||
| ## recommended for client packages to set it. | ||||
| ## | ||||
| ## And with R 3.4.0, and RcppArmadillo 0.7.960.*, we turn C++11 on as OpenMP | ||||
| ## support within Armadillo prefers / requires it | ||||
| CXX_STD = CXX11 | ||||
| 
 | ||||
| PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS)  | ||||
| PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) | ||||
							
								
								
									
										14
									
								
								CVE_C/src/Makevars.win
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								CVE_C/src/Makevars.win
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,14 @@ | ||||
| 
 | ||||
| ## With R 3.1.0 or later, you can uncomment the following line to tell R to  | ||||
| ## enable compilation with C++11 (where available) | ||||
| ## | ||||
| ## Also, OpenMP support in Armadillo prefers C++11 support. However, for wider | ||||
| ## availability of the package we do not yet enforce this here.  It is however | ||||
| ## recommended for client packages to set it. | ||||
| ## | ||||
| ## And with R 3.4.0, and RcppArmadillo 0.7.960.*, we turn C++11 on as OpenMP | ||||
| ## support within Armadillo prefers / requires it | ||||
| CXX_STD = CXX11 | ||||
| 
 | ||||
| PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS)  | ||||
| PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) | ||||
							
								
								
									
										7
									
								
								CVE_C/src/config.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										7
									
								
								CVE_C/src/config.h
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,7 @@ | ||||
| #ifndef CVE_INCLUDE_GUARD_CONFIG_ | ||||
| #define CVE_INCLUDE_GUARD_CONFIG_ | ||||
| 
 | ||||
| #define CVE_MEM_CHUNK_SIZE 2032 | ||||
| #define CVE_MEM_CHUNK_SMALL 1016 | ||||
| 
 | ||||
| #endif /* CVE_INCLUDE_GUARD_CONFIG_ */ | ||||
							
								
								
									
										29
									
								
								CVE_C/src/export.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										29
									
								
								CVE_C/src/export.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,29 @@ | ||||
| #include <Rinternals.h> | ||||
| 
 | ||||
| void grad(const int n, const int p, const int q, | ||||
|           const double *X, | ||||
|           const double *X_diff, | ||||
|           const double *Y, | ||||
|           const double *V, | ||||
|           const double h, | ||||
|           double *G, double *loss); | ||||
| 
 | ||||
| SEXP grad_c(SEXP X, SEXP X_diff, SEXP Y, SEXP V, SEXP h) { | ||||
|     SEXP G = PROTECT(allocMatrix(REALSXP, nrows(V), ncols(V))); | ||||
|     SEXP loss = PROTECT(ScalarReal(0.0)); | ||||
| 
 | ||||
|     grad(nrows(X), ncols(X), ncols(V), | ||||
|          REAL(X), REAL(X_diff), REAL(Y), REAL(V), *REAL(h), | ||||
|          REAL(G), REAL(loss)); | ||||
| 
 | ||||
|     SEXP out = PROTECT(allocVector(VECSXP, 2)); | ||||
|     SET_VECTOR_ELT(out, 0, G); | ||||
|     SET_VECTOR_ELT(out, 1, loss); | ||||
|     SEXP names = PROTECT(allocVector(STRSXP, 2)); | ||||
|     SET_STRING_ELT(names, 0, mkChar("G")); | ||||
|     SET_STRING_ELT(names, 1, mkChar("loss")); | ||||
|     setAttrib(out, install("names"), names); | ||||
| 
 | ||||
|     UNPROTECT(4); | ||||
|     return out; | ||||
| } | ||||
							
								
								
									
										123
									
								
								CVE_C/src/grad.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										123
									
								
								CVE_C/src/grad.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,123 @@ | ||||
| #include <stdlib.h> | ||||
| #include <math.h> | ||||
| 
 | ||||
| #include "sums.h" | ||||
| #include "sweep.h" | ||||
| #include "matrix.h" | ||||
| #include "indexing.h" | ||||
| 
 | ||||
| // TODO: clarify
 | ||||
| static inline double gaussKernel(const double x, const double scale) { | ||||
|     return exp(scale * x * x); | ||||
| } | ||||
| 
 | ||||
| // TODO: mutch potential for optimization!!!
 | ||||
| static void weightedYandLoss(const int n, | ||||
|                              const double *Y, | ||||
|                              const double *vecD, | ||||
|                              const double *vecW, | ||||
|                              const double *colSums, | ||||
|                              double *y1, double *L, double *vecS, | ||||
|                              double *loss) { | ||||
|     int i, j, k, N = n * (n - 1) / 2; | ||||
|     double l; | ||||
| 
 | ||||
|     for (i = 0; i < n; ++i) { | ||||
|         y1[i] = Y[i] / colSums[i]; | ||||
|         L[i] = Y[i] * Y[i] / colSums[i]; | ||||
|     } | ||||
| 
 | ||||
|     for (k = j = 0; j < n; ++j) { | ||||
|         for (i = j + 1; i < n; ++i, ++k) { | ||||
|             y1[i] += Y[j] * vecW[k] / colSums[i]; | ||||
|             y1[j] += Y[i] * vecW[k] / colSums[j]; | ||||
|             L[i] += Y[j] * Y[j] * vecW[k] / colSums[i]; | ||||
|             L[j] += Y[i] * Y[i] * vecW[k] / colSums[j]; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     l = 0.0; | ||||
|     for (i = 0; i < n; ++i) { | ||||
|         l += (L[i] -= y1[i] * y1[i]); | ||||
|     } | ||||
|     *loss = l / (double)n; | ||||
| 
 | ||||
|     for (k = j = 0; j < n; ++j) { | ||||
|         for (i = j + 1; i < n; ++i, ++k) { | ||||
|             l = Y[j] - y1[i]; | ||||
|             vecS[k]  = (L[i] - (l * l)) / colSums[i]; | ||||
|             l = Y[i] - y1[j]; | ||||
|             vecS[k] += (L[j] - (l * l)) / colSums[j]; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     for (k = 0; k < N; ++k) { | ||||
|         vecS[k] *= vecW[k] * vecD[k]; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void grad(const int n, const int p, const int q, | ||||
|           const double *X, | ||||
|           const double *X_diff, | ||||
|           const double *Y, | ||||
|           const double *V, | ||||
|           const double h, | ||||
|           double *G, double *loss) { | ||||
|     // Number of X_i to X_j not trivial pairs.
 | ||||
|     int i, N = (n * (n - 1)) / 2; | ||||
|     double scale = -0.5 / h; | ||||
| 
 | ||||
|     if (X_diff == (void*)0) { | ||||
|         // TODO: ...
 | ||||
|     } | ||||
| 
 | ||||
|     // Allocate and compute projection matrix `Q = I_p - V * V^T`
 | ||||
|     double *Q = (double*)malloc(p * p * sizeof(double)); | ||||
|     nullProj(V, p, q, Q); | ||||
| 
 | ||||
|     // allocate and compute vectorized distance matrix with a temporary
 | ||||
|     // projection of `X_diff`.
 | ||||
|     double *vecD = (double*)malloc(N * sizeof(double)); | ||||
|     double *X_proj; | ||||
|     if (p < 5) { // TODO: refine that!
 | ||||
|         X_proj = (double*)malloc(N * 5 * sizeof(double)); | ||||
|     } else { | ||||
|         X_proj = (double*)malloc(N * p * sizeof(double)); | ||||
|     } | ||||
|     matrixprod(X_diff, N, p, Q, p, p, X_proj); | ||||
|     rowSquareSums(X_proj, N, p, vecD); | ||||
| 
 | ||||
|     // Apply kernel to distence vector for weights computation.
 | ||||
|     double *vecW = X_proj; // reuse memory area, no longer needed.
 | ||||
|     for (i = 0; i < N; ++i) { | ||||
|         vecW[i] = gaussKernel(vecD[i], scale); | ||||
|     } | ||||
|     double *colSums = X_proj + N; // still allocated!
 | ||||
|     rowSumsSymVec(vecW, n, 1.0, colSums); // rowSums = colSums cause Sym
 | ||||
| 
 | ||||
|     // compute weighted responces of first end second momontum, aka y1, y2.
 | ||||
|     double *y1 = X_proj + N + n; | ||||
|     double *L = X_proj + N + (2 * n); | ||||
|     // Allocate X_diff scaling vector `vecS`, not in `X_proj` mem area because
 | ||||
|     // used symultanious to X_proj in final gradient computation.
 | ||||
|     double *vecS = (double*)malloc(N * sizeof(double)); | ||||
|     weightedYandLoss(n, Y, vecD, vecW, colSums, y1, L, vecS, loss); | ||||
| 
 | ||||
|     // compute the gradient using X_proj for intermidiate scaled X_diff.
 | ||||
|     rowSweep(X_diff, N, p, "*", vecS, X_proj); | ||||
|     // reuse Q which has the required dim (p, p).
 | ||||
|     crossprod(X_diff, N, p, X_proj, N, p, Q); | ||||
|     // Product with V
 | ||||
|     matrixprod(Q, p, p, V, p, q, G); | ||||
|     // And final scaling (TODO: move into matrixprod!)
 | ||||
|     scale = -2.0 / (((double)n) * h * h); | ||||
|     N = p * q; | ||||
|     for (i = 0; i < N; ++i) { | ||||
|         G[i] *= scale; | ||||
|     } | ||||
| 
 | ||||
|     free(vecS); | ||||
|     free(X_proj); | ||||
|     free(vecD); | ||||
|     free(Q); | ||||
| } | ||||
							
								
								
									
										12
									
								
								CVE_C/src/indexing.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										12
									
								
								CVE_C/src/indexing.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,12 @@ | ||||
| #include "indexing.h" | ||||
| 
 | ||||
| void rangePairs(const int from, const int to, int *pairs) { | ||||
|     int i, j, k; | ||||
| 
 | ||||
|     for (k = 0, i = from; i < to; ++i) { | ||||
|        for (j = i + 1; j < to; ++j, k += 2) { | ||||
|             pairs[k] = i; | ||||
|             pairs[k + 1] = j; | ||||
|         } | ||||
|     } | ||||
| } | ||||
							
								
								
									
										8
									
								
								CVE_C/src/indexing.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										8
									
								
								CVE_C/src/indexing.h
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,8 @@ | ||||
| 
 | ||||
| /* Include Guard */ | ||||
| #ifndef CVE_INCLUDE_GUARD_INDEXING_ | ||||
| #define CVE_INCLUDE_GUARD_INDEXING_ | ||||
| 
 | ||||
| void rangePairs(const int from, const int to, int *pairs); | ||||
| 
 | ||||
| #endif /* CVE_INCLUDE_GUARD_INDEXING_ */ | ||||
							
								
								
									
										23
									
								
								CVE_C/src/init.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										23
									
								
								CVE_C/src/init.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,23 @@ | ||||
| #include <R.h> | ||||
| #include <Rinternals.h> | ||||
| #include <stdlib.h> // for NULL | ||||
| #include <R_ext/Rdynload.h> | ||||
| 
 | ||||
| /* FIXME: 
 | ||||
|    Check these declarations against the C/Fortran source code. | ||||
| */ | ||||
| 
 | ||||
| /* .Call calls */ | ||||
| extern SEXP grad_c(SEXP, SEXP, SEXP, SEXP, SEXP); | ||||
| 
 | ||||
| static const R_CallMethodDef CallEntries[] = { | ||||
|     {"grad_c",          (DL_FUNC) &grad_c,          5}, | ||||
|     {NULL, NULL, 0} | ||||
| }; | ||||
| 
 | ||||
| /* Restrict C entrypoints to registered routines. */ | ||||
| void R_initCVE(DllInfo *dll) | ||||
| { | ||||
|     R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); | ||||
|     R_useDynamicSymbols(dll, FALSE); | ||||
| } | ||||
							
								
								
									
										71
									
								
								CVE_C/src/matrix.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										71
									
								
								CVE_C/src/matrix.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,71 @@ | ||||
| #include <string.h> // for `mem*` functions. | ||||
| 
 | ||||
| #include "config.h" | ||||
| #include "matrix.h" | ||||
| 
 | ||||
| #include <R_ext/BLAS.h> | ||||
| 
 | ||||
| void matrixprod(const double *A, const int nrowA, const int ncolA, | ||||
|                 const double *B, const int nrowB, const int ncolB, | ||||
|                 double *C) { | ||||
|     const double one = 1.0; | ||||
|     const double zero = 0.0; | ||||
| 
 | ||||
|     // DGEMM with parameterization:
 | ||||
|     //     C <- A %*% B
 | ||||
|     F77_NAME(dgemm)("N", "N", &nrowA, &ncolB, &ncolA, | ||||
|                     &one, A, &nrowA, B, &nrowB, | ||||
|                     &zero, C, &nrowA); | ||||
| } | ||||
| 
 | ||||
| void crossprod(const double *A, const int nrowA, const int ncolA, | ||||
|                const double *B, const int nrowB, const int ncolB, | ||||
|                double *C) { | ||||
|     const double one = 1.0; | ||||
|     const double zero = 0.0; | ||||
| 
 | ||||
|     // DGEMM with parameterization:
 | ||||
|     //     C <- t(A) %*% B
 | ||||
|     F77_NAME(dgemm)("T", "N", &ncolA, &ncolB, &nrowA, | ||||
|                     &one, A, &nrowA, B, &nrowB, | ||||
|                     &zero, C, &ncolA); | ||||
| } | ||||
| 
 | ||||
| void nullProj(const double *B, const int nrowB, const int ncolB, | ||||
|               double *Q) { | ||||
|     const double minusOne = -1.0; | ||||
|     const double one = 1.0; | ||||
| 
 | ||||
|     // Initialize as identity matrix.
 | ||||
|     memset(Q, 0, sizeof(double) * nrowB * nrowB); | ||||
|     double *Q_diag, *Q_end = Q + nrowB * nrowB; | ||||
|     for (Q_diag = Q; Q_diag < Q_end; Q_diag += nrowB + 1) { | ||||
|         *Q_diag = 1.0; | ||||
|     } | ||||
| 
 | ||||
|     // DGEMM with parameterization:
 | ||||
|     //     C <- (-1.0 * B %*% t(B)) + C
 | ||||
|     F77_NAME(dgemm)("N", "T", &nrowB, &nrowB, &ncolB, | ||||
|                     &minusOne, B, &nrowB, B, &nrowB, | ||||
|                     &one, Q, &nrowB); | ||||
| } | ||||
| 
 | ||||
| // A dence skwe-symmetric rank 2 update.
 | ||||
| // Perform the update
 | ||||
| //      C := alpha (A * B^T - B * A^T) + beta C
 | ||||
| void skewSymRank2k(const int nrow, const int ncol, | ||||
|                    double alpha, const double *A, const double *B, | ||||
|                    double beta, | ||||
|                    double *C) { | ||||
|     F77_NAME(dgemm)("N", "T", | ||||
|              &nrow, &nrow, &ncol, | ||||
|              &alpha, A, &nrow, B, &nrow, | ||||
|              &beta, C, &nrow); | ||||
| 
 | ||||
|     alpha *= -1.0; | ||||
|     beta = 1.0; | ||||
|     F77_NAME(dgemm)("N", "T", | ||||
|              &nrow, &nrow, &ncol, | ||||
|              &alpha, B, &nrow, A, &nrow, | ||||
|              &beta, C, &nrow); | ||||
| } | ||||
							
								
								
									
										25
									
								
								CVE_C/src/matrix.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										25
									
								
								CVE_C/src/matrix.h
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,25 @@ | ||||
| 
 | ||||
| /* Include Guard */ | ||||
| #ifndef CVE_INCLUDE_GUARD_MATRIX_ | ||||
| #define CVE_INCLUDE_GUARD_MATRIX_ | ||||
| 
 | ||||
| void matrixprod(const double *A, const int nrowA, const int ncolA, | ||||
|                 const double *B, const int nrowB, const int ncolB, | ||||
|                        double *C); | ||||
| 
 | ||||
| void crossprod(const double *A, const int nrowA, const int ncolA, | ||||
|                const double *B, const int nrowB, const int ncolB, | ||||
|                double *C); | ||||
| 
 | ||||
| void nullProj(const double *B, const int nrowB, const int ncolB, | ||||
|               double *Q); | ||||
| 
 | ||||
| // A dence skwe-symmetric rank 2 update.
 | ||||
| // Perform the update
 | ||||
| //      C := alpha (A * B^T - B * A^T) + beta C
 | ||||
| void skewSymRank2k(const int nrow, const int ncol, | ||||
|                    double alpha, const double *A, const double *B, | ||||
|                    double beta, | ||||
|                    double *C); | ||||
| 
 | ||||
| #endif /* CVE_INCLUDE_GUARD_MATRIX_ */ | ||||
							
								
								
									
										113
									
								
								CVE_C/src/sums.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										113
									
								
								CVE_C/src/sums.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,113 @@ | ||||
| #include <string.h> // for `mem*` functions. | ||||
| 
 | ||||
| #include "config.h" | ||||
| #include "sums.h" | ||||
| 
 | ||||
| void rowSums(const double *A, const int nrow, const int ncol, | ||||
|              double *sum) { | ||||
|     int i, j, block_size, block_size_i; | ||||
|     const double *A_block = A; | ||||
|     const double *A_end = A + nrow * ncol; | ||||
| 
 | ||||
|     if (nrow > CVE_MEM_CHUNK_SIZE) { | ||||
|         block_size = CVE_MEM_CHUNK_SIZE; | ||||
|     } else { | ||||
|         block_size = nrow; | ||||
|     } | ||||
| 
 | ||||
|     // Iterate `(block_size_i, ncol)` submatrix blocks.
 | ||||
|     for (i = 0; i < nrow; i += block_size_i) { | ||||
|         // Reset `A` to new block beginning.
 | ||||
|         A = A_block; | ||||
|         // Take block size of eveything left and reduce to max size.
 | ||||
|         block_size_i = nrow - i; | ||||
|         if (block_size_i > block_size) { | ||||
|             block_size_i = block_size; | ||||
|         } | ||||
|         // Compute first blocks column,
 | ||||
|         for (j = 0; j < block_size_i; ++j) { | ||||
|             sum[j] = A[j]; | ||||
|         } | ||||
|         // and sum the following columns to the first one.
 | ||||
|         for (A += nrow; A < A_end; A += nrow) { | ||||
|             for (j = 0; j < block_size_i; ++j) { | ||||
|                 sum[j] += A[j]; | ||||
|             } | ||||
|         } | ||||
|         // Step one block forth.
 | ||||
|         A_block += block_size_i; | ||||
|         sum += block_size_i; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void colSums(const double *A, const int nrow, const int ncol, | ||||
|              double *sum) { | ||||
|     int j; | ||||
|     double *sum_end = sum + ncol; | ||||
| 
 | ||||
|     memset(sum, 0, sizeof(double) * ncol); | ||||
|     for (; sum < sum_end; ++sum) { | ||||
|         for (j = 0; j < nrow; ++j) { | ||||
|             *sum += A[j]; | ||||
|         } | ||||
|         A += nrow; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void rowSquareSums(const double *A, const int nrow, const int ncol, | ||||
|                    double *sum) { | ||||
|     int i, j, block_size, block_size_i; | ||||
|     const double *A_block = A; | ||||
|     const double *A_end = A + nrow * ncol; | ||||
| 
 | ||||
|     if (nrow < CVE_MEM_CHUNK_SIZE) { | ||||
|         block_size = nrow; | ||||
|     } else { | ||||
|         block_size = CVE_MEM_CHUNK_SIZE; | ||||
|     } | ||||
| 
 | ||||
|     // Iterate `(block_size_i, ncol)` submatrix blocks.
 | ||||
|     for (i = 0; i < nrow; i += block_size_i) { | ||||
|         // Reset `A` to new block beginning.
 | ||||
|         A = A_block; | ||||
|         // Take block size of eveything left and reduce to max size.
 | ||||
|         block_size_i = nrow - i; | ||||
|         if (block_size_i > block_size) { | ||||
|             block_size_i = block_size; | ||||
|         } | ||||
|         // Compute first blocks column,
 | ||||
|         for (j = 0; j < block_size_i; ++j) { | ||||
|             sum[j] = A[j] * A[j]; | ||||
|         } | ||||
|         // and sum the following columns to the first one.
 | ||||
|         for (A += nrow; A < A_end; A += nrow) { | ||||
|             for (j = 0; j < block_size_i; ++j) { | ||||
|                 sum[j] += A[j] * A[j]; | ||||
|             } | ||||
|         } | ||||
|         // Step one block forth.
 | ||||
|         A_block += block_size_i; | ||||
|         sum += block_size_i; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void rowSumsSymVec(const double *Avec, const int nrow, | ||||
|                    const double diag, | ||||
|                    double *sum) { | ||||
|     int i, j; | ||||
| 
 | ||||
|     if (diag == 0.0) { | ||||
|         memset(sum, 0, nrow * sizeof(double)); | ||||
|     } else { | ||||
|         for (i = 0; i < nrow; ++i) { | ||||
|             sum[i] = diag; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     for (j = 0; j < nrow; ++j) { | ||||
|         for (i = j + 1; i < nrow; ++i, ++Avec) { | ||||
|             sum[j] += *Avec; | ||||
|             sum[i] += *Avec; | ||||
|         } | ||||
|     } | ||||
| } | ||||
							
								
								
									
										19
									
								
								CVE_C/src/sums.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										19
									
								
								CVE_C/src/sums.h
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,19 @@ | ||||
| 
 | ||||
| /* Include Guard */ | ||||
| #ifndef CVE_INCLUDE_GUARD_SUMS_ | ||||
| #define CVE_INCLUDE_GUARD_SUMS_ | ||||
| 
 | ||||
| void rowSums(const double *A, const int nrow, const int ncol, | ||||
|              double *sum); | ||||
| 
 | ||||
| void colSums(const double *A, const int nrow, const int ncol, | ||||
|              double *sum); | ||||
| 
 | ||||
| void rowSquareSums(const double *A, const int nrow, const int ncol, | ||||
|                    double *sum); | ||||
| 
 | ||||
| void rowSumsSymVec(const double *Avec, const int nrow, | ||||
|                    const double diag, | ||||
|                    double *sum); | ||||
| 
 | ||||
| #endif /* CVE_INCLUDE_GUARD_SUMS_ */ | ||||
							
								
								
									
										113
									
								
								CVE_C/src/sweep.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										113
									
								
								CVE_C/src/sweep.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,113 @@ | ||||
| #include <R_ext/Error.h> // for `error`. | ||||
| 
 | ||||
| #include "config.h" | ||||
| #include "sweep.h" | ||||
| 
 | ||||
| /* C[, j] = A[, j] * v for each j = 1 to ncol */ | ||||
| void rowSweep(const double *A, const int nrow, const int ncol, | ||||
|               const char* op, | ||||
|               const double *v, // vector of length nrow
 | ||||
|               double *C) { | ||||
|     int i, j, block_size, block_size_i; | ||||
|     const double *A_block = A; | ||||
|     double *C_block = C; | ||||
|     const double *A_end = A + nrow * ncol; | ||||
| 
 | ||||
|     if (nrow > CVE_MEM_CHUNK_SMALL) { // small because 3 vectors in cache
 | ||||
|         block_size = CVE_MEM_CHUNK_SMALL; | ||||
|     } else { | ||||
|         block_size = nrow; | ||||
|     } | ||||
| 
 | ||||
|     if (*op == '+') { | ||||
|         // Iterate `(block_size_i, ncol)` submatrix blocks.
 | ||||
|         for (i = 0; i < nrow; i += block_size_i) { | ||||
|             // Set `A` and `C` to block beginning.
 | ||||
|             A = A_block; | ||||
|             C = C_block; | ||||
|             // Get current block's row size.
 | ||||
|             block_size_i = nrow - i; | ||||
|             if (block_size_i > block_size) { | ||||
|                 block_size_i = block_size; | ||||
|             } | ||||
|             // Perform element wise operation for block.
 | ||||
|             for (; A < A_end; A += nrow, C += nrow) { | ||||
|                 for (j = 0; j < block_size_i; ++j) { | ||||
|                     C[j] = A[j] + v[j]; // FUN = '+'
 | ||||
|                 } | ||||
|             } | ||||
|             // Step one block forth.
 | ||||
|             A_block += block_size_i; | ||||
|             C_block += block_size_i; | ||||
|             v += block_size_i; | ||||
|         } | ||||
|     } else if (*op == '-') { | ||||
|         // Iterate `(block_size_i, ncol)` submatrix blocks.
 | ||||
|         for (i = 0; i < nrow; i += block_size_i) { | ||||
|             // Set `A` and `C` to block beginning.
 | ||||
|             A = A_block; | ||||
|             C = C_block; | ||||
|             // Get current block's row size.
 | ||||
|             block_size_i = nrow - i; | ||||
|             if (block_size_i > block_size) { | ||||
|                 block_size_i = block_size; | ||||
|             } | ||||
|             // Perform element wise operation for block.
 | ||||
|             for (; A < A_end; A += nrow, C += nrow) { | ||||
|                 for (j = 0; j < block_size_i; ++j) { | ||||
|                     C[j] = A[j] - v[j]; // FUN = '-'
 | ||||
|                 } | ||||
|             } | ||||
|             // Step one block forth.
 | ||||
|             A_block += block_size_i; | ||||
|             C_block += block_size_i; | ||||
|             v += block_size_i; | ||||
|         } | ||||
|     } else if (*op == '*') { | ||||
|         // Iterate `(block_size_i, ncol)` submatrix blocks.
 | ||||
|         for (i = 0; i < nrow; i += block_size_i) { | ||||
|             // Set `A` and `C` to block beginning.
 | ||||
|             A = A_block; | ||||
|             C = C_block; | ||||
|             // Get current block's row size.
 | ||||
|             block_size_i = nrow - i; | ||||
|             if (block_size_i > block_size) { | ||||
|                 block_size_i = block_size; | ||||
|             } | ||||
|             // Perform element wise operation for block.
 | ||||
|             for (; A < A_end; A += nrow, C += nrow) { | ||||
|                 for (j = 0; j < block_size_i; ++j) { | ||||
|                     C[j] = A[j] * v[j]; // FUN = '*'
 | ||||
|                 } | ||||
|             } | ||||
|             // Step one block forth.
 | ||||
|             A_block += block_size_i; | ||||
|             C_block += block_size_i; | ||||
|             v += block_size_i; | ||||
|         } | ||||
|     } else if (*op == '/') { | ||||
|         // Iterate `(block_size_i, ncol)` submatrix blocks.
 | ||||
|         for (i = 0; i < nrow; i += block_size_i) { | ||||
|             // Set `A` and `C` to block beginning.
 | ||||
|             A = A_block; | ||||
|             C = C_block; | ||||
|             // Get current block's row size.
 | ||||
|             block_size_i = nrow - i; | ||||
|             if (block_size_i > block_size) { | ||||
|                 block_size_i = block_size; | ||||
|             } | ||||
|             // Perform element wise operation for block.
 | ||||
|             for (; A < A_end; A += nrow, C += nrow) { | ||||
|                 for (j = 0; j < block_size_i; ++j) { | ||||
|                     C[j] = A[j] / v[j]; // FUN = '/'
 | ||||
|                 } | ||||
|             } | ||||
|             // Step one block forth.
 | ||||
|             A_block += block_size_i; | ||||
|             C_block += block_size_i; | ||||
|             v += block_size_i; | ||||
|         } | ||||
|     } else { | ||||
|         error("Got unknown 'op' (opperation) argument."); | ||||
|     } | ||||
| } | ||||
							
								
								
									
										11
									
								
								CVE_C/src/sweep.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								CVE_C/src/sweep.h
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,11 @@ | ||||
| 
 | ||||
| /* Include Guard */ | ||||
| #ifndef CVE_INCLUDE_GUARD_SWEEP_ | ||||
| #define CVE_INCLUDE_GUARD_SWEEP_ | ||||
| 
 | ||||
| void rowSweep(const double *A, const int nrow, const int ncol, | ||||
|               const char* op, | ||||
|               const double *v, // vector of length nrow
 | ||||
|               double *C); | ||||
| 
 | ||||
| #endif /* CVE_INCLUDE_GUARD_SWEEP_ */ | ||||
							
								
								
									
										29
									
								
								notes.md
									
									
									
									
									
								
							
							
						
						
									
										29
									
								
								notes.md
									
									
									
									
									
								
							| @ -6,6 +6,35 @@ grep --include=*\.{c,h,R} -rnw '.' -e "sweep" | ||||
| ``` | ||||
| searches in all `C` source and header fils as well as `R` source files for the term _sweep_. | ||||
| 
 | ||||
| ## Recursive dir. compair with colored sructure (more or less). | ||||
| ```bash | ||||
| diff -r CVE_R/ CVE_C/ | grep -E "^([<>]|[^<>].*)" | ||||
| ``` | ||||
| 
 | ||||
| ## Parsing `bash` script parameters. | ||||
| ```bash | ||||
| usage="$0 [-v|--verbose] [-n|--dry-run] [(-s|--stack-size) <size>] [-h|--help] [-- [p1, [p2, ...]]]" | ||||
| verbose=false | ||||
| help=false | ||||
| dry_run=false | ||||
| stack_size=0 | ||||
| 
 | ||||
| while [ $# -gt 0 ]; do | ||||
|     case "$1" in | ||||
|         -v | --verbose )    verbose=true;      shift ;; | ||||
|         -n | --dry-run )    dry_run=true;      shift ;; | ||||
|         -s | --stack-size ) stack_size="$2";   shift; shift ;; | ||||
|         -h | --help )       echo $usage;       exit ;; # On help print usage and exit. | ||||
|         -- ) shift; break ;;            # Break param "parsing". | ||||
|          * ) echo $usage >&2; exit 1 ;; # Print usage and exit with failure. | ||||
|     esac | ||||
| done | ||||
| 
 | ||||
| echo verbose=$verbose | ||||
| echo dry_run=$dry_run | ||||
| echo stack_size=$stack_size | ||||
| ``` | ||||
| 
 | ||||
| # Development | ||||
| ## Build and install. | ||||
| To build the package the `devtools` package is used. This also provides `roxygen2` which is used for documentation and authomatic creaton of the `NAMESPACE` file. | ||||
|  | ||||
							
								
								
									
										19
									
								
								wip.R
									
									
									
									
									
								
							
							
						
						
									
										19
									
								
								wip.R
									
									
									
									
									
								
							| @ -118,6 +118,17 @@ microbenchmark( | ||||
| ) | ||||
| 
 | ||||
| ## Matrix-Matrix opperation .call --------------------------------------------- | ||||
| transpose.c <- function(A) { | ||||
|     stopifnot( | ||||
|         is.matrix(A), is.numeric(A) | ||||
|     ) | ||||
|     if (!is.double(A)) { | ||||
|         A <- matrix(as.double(A), nrow(A), ncol(A)) | ||||
|     } | ||||
| 
 | ||||
|     .Call('R_transpose', PACKAGE = 'wip', A) | ||||
| } | ||||
| 
 | ||||
| matrixprod.c <- function(A, B) { | ||||
|     stopifnot( | ||||
|         is.matrix(A), is.numeric(A), | ||||
| @ -174,6 +185,14 @@ m <- 300 | ||||
| 
 | ||||
| A <- matrix(runif(n * k), n, k) | ||||
| B <- matrix(runif(k * m), k, m) | ||||
| stopifnot( | ||||
|     all.equal(t(A), transpose.c(A)) | ||||
| ) | ||||
| microbenchmark( | ||||
|     t(A), | ||||
|     transpose.c(A) | ||||
| ) | ||||
| 
 | ||||
| stopifnot( | ||||
|     all.equal(A %*% B, matrixprod.c(A, B)) | ||||
| ) | ||||
|  | ||||
							
								
								
									
										22
									
								
								wip.c
									
									
									
									
									
								
							
							
						
						
									
										22
									
								
								wip.c
									
									
									
									
									
								
							| @ -1,4 +1,5 @@ | ||||
| #include <stdlib.h> | ||||
| #include <string.h> // for `mem*` functions. | ||||
| 
 | ||||
| #include <R_ext/BLAS.h> | ||||
| #include <R_ext/Lapack.h> | ||||
| @ -99,15 +100,15 @@ static inline void rowSquareSums(const double *A, | ||||
| } | ||||
| 
 | ||||
| static inline void rowSumsSymVec(const double *Avec, const int nrow, | ||||
|                                  const double *diag, | ||||
|                                  const double diag, | ||||
|                                  double *sum) { | ||||
|     int i, j; | ||||
| 
 | ||||
|     if (*diag == 0.0) { | ||||
|     if (diag == 0.0) { | ||||
|         memset(sum, 0, nrow * sizeof(double)); | ||||
|     } else { | ||||
|         for (i = 0; i < nrow; ++i) { | ||||
|             sum[i] = *diag; | ||||
|             sum[i] = diag; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
| @ -228,6 +229,18 @@ static void rowSweep(const double *A, const int nrow, const int ncol, | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void transpose(const double *A, const int nrow, const int ncol, double* T) { | ||||
|     int i, j, len = nrow * ncol; | ||||
| 
 | ||||
|     // Filling column-wise and accessing row-wise.
 | ||||
|     for (i = 0, j = 0; i < len; ++i, j += nrow) { | ||||
|         if (j >= len) { | ||||
|             j -= len - 1; | ||||
|         } | ||||
|         T[i] = A[j]; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| static inline void matrixprod(const double *A, const int nrowA, const int ncolA, | ||||
|                               const double *B, const int nrowB, const int ncolB, | ||||
|                               double *C) { | ||||
| @ -363,7 +376,6 @@ static void gradient(const int n, const int p, const int q, | ||||
|     // Number of X_i to X_j not trivial pairs.
 | ||||
|     int i, N = (n * (n - 1)) / 2; | ||||
|     double scale = -0.5 / h; | ||||
|     const double one = 1.0; | ||||
| 
 | ||||
|     if (X_diff == (void*)0) { | ||||
|         // TODO: ...
 | ||||
| @ -391,7 +403,7 @@ static void gradient(const int n, const int p, const int q, | ||||
|         vecW[i] = gaussKernel(vecD[i], scale); | ||||
|     } | ||||
|     double *colSums = X_proj + N; // still allocated!
 | ||||
|     rowSumsSymVec(vecW, n, &one, colSums); // rowSums = colSums cause Sym
 | ||||
|     rowSumsSymVec(vecW, n, 1.0, colSums); // rowSums = colSums cause Sym
 | ||||
| 
 | ||||
|     // compute weighted responces of first end second momontum, aka y1, y2.
 | ||||
|     double *y1 = X_proj + N + n; | ||||
|  | ||||
							
								
								
									
										20
									
								
								wip.h
									
									
									
									
									
								
							
							
						
						
									
										20
									
								
								wip.h
									
									
									
									
									
								
							| @ -1,5 +1,5 @@ | ||||
| #ifndef _CVE_INCLUDE_GUARD_ | ||||
| #define _CVE_INCLUDE_GUARD_ | ||||
| #ifndef CVE_INCLUDE_GUARD_ | ||||
| #define CVE_INCLUDE_GUARD_ | ||||
| 
 | ||||
| #include <Rinternals.h> | ||||
| 
 | ||||
| @ -41,12 +41,12 @@ SEXP R_rowSquareSums(SEXP A) { | ||||
| } | ||||
| 
 | ||||
| static inline void rowSumsSymVec(const double *Avec, const int nrow, | ||||
|                                  const double *diag, | ||||
|                                  const double diag, | ||||
|                                  double *sum); | ||||
| SEXP R_rowSumsSymVec(SEXP Avec, SEXP nrow, SEXP diag) { | ||||
|     SEXP sum = PROTECT(allocVector(REALSXP, *INTEGER(nrow))); | ||||
| 
 | ||||
|     rowSumsSymVec(REAL(Avec), *INTEGER(nrow), REAL(diag), REAL(sum)); | ||||
|     rowSumsSymVec(REAL(Avec), *INTEGER(nrow), *REAL(diag), REAL(sum)); | ||||
| 
 | ||||
|     UNPROTECT(1); | ||||
|     return sum; | ||||
| @ -67,6 +67,16 @@ SEXP R_rowSweep(SEXP A, SEXP v, SEXP op) { | ||||
|     return C; | ||||
| } | ||||
| 
 | ||||
| void transpose(const double *A, const int nrow, const int ncol, double* T); | ||||
| SEXP R_transpose(SEXP A) { | ||||
|     SEXP T = PROTECT(allocMatrix(REALSXP, ncols(A), nrows(A))); | ||||
| 
 | ||||
|     transpose(REAL(A), nrows(A), ncols(A), REAL(T)); | ||||
| 
 | ||||
|     UNPROTECT(1); /* T */ | ||||
|     return T; | ||||
| } | ||||
| 
 | ||||
| static inline void matrixprod(const double *A, const int nrowA, const int ncolA, | ||||
|                               const double *B, const int nrowB, const int ncolB, | ||||
|                               double *C); | ||||
| @ -156,4 +166,4 @@ SEXP R_gradient(SEXP X, SEXP X_diff, SEXP Y, SEXP V, SEXP h) { | ||||
|     return G; | ||||
| } | ||||
| 
 | ||||
| #endif /* _CVE_INCLUDE_GUARD_ */ | ||||
| #endif /* CVE_INCLUDE_GUARD_ */ | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user