From 05c2aea44aa8782838bc9f2ab57e3026e0b5b1bb Mon Sep 17 00:00:00 2001 From: daniel Date: Thu, 17 Sep 2020 18:27:31 +0200 Subject: [PATCH] add: multivariate Y (aka projective resampling) --- CVE/R/CVE.R | 83 ++++++++++++++++++++++++++++++------------- cve_tensorflow.R | 92 ++++++++++++++++++++++++++++++++---------------- 2 files changed, 121 insertions(+), 54 deletions(-) diff --git a/CVE/R/CVE.R b/CVE/R/CVE.R index 1a60e5f..6ec0e27 100644 --- a/CVE/R/CVE.R +++ b/CVE/R/CVE.R @@ -163,7 +163,7 @@ #' #' @importFrom stats model.frame #' @export -cve <- function(formula, data, method = "simple", max.dim = 10L, ...) { +cve <- function(formula, data, method = "mean", max.dim = 10L, ...) { # check for type of `data` if supplied and set default if (missing(data)) { data <- environment(formula) @@ -173,8 +173,11 @@ cve <- function(formula, data, method = "simple", max.dim = 10L, ...) { # extract `X`, `Y` from `formula` with `data` model <- stats::model.frame(formula, data) - X <- as.matrix(model[ ,-1L, drop = FALSE]) - Y <- as.double(model[ , 1L]) + Y <- stats::model.response(model, "double") + X <- stats::model.matrix(model) + if ("(Intercept)" %in% colnames(X)) { + X <- X[, "(Intercept)" != colnames(X), drop = FALSE] + } # pass extracted data on to [cve.call()] dr <- cve.call(X, Y, method = method, max.dim = max.dim, ...) @@ -252,25 +255,39 @@ cve <- function(formula, data, method = "simple", max.dim = 10L, ...) { #' coef(cve.obj.simple1, k = 1) #' coef(cve.obj.simple2, k = 1) #' @export -cve.call <- function(X, Y, method = "simple", - func_list = list(function (x) x), - nObs = sqrt(nrow(X)), h = NULL, - min.dim = 1L, max.dim = 10L, k = NULL, - momentum = 0.0, tau = 1.0, tol = 1e-3, - slack = 0.0, gamma = 0.5, - V.init = NULL, - max.iter = 50L, attempts = 10L, - logger = NULL) { +cve.call <- function(X, Y, + method = c("mean", "weighted.mean", "central", "weighted.central"), + func_list = NULL, nObs = sqrt(nrow(X)), h = NULL, + min.dim = 1L, max.dim = 10L, k = NULL, + momentum = 0.0, tau = 1.0, tol = 1e-3, + slack = 0.0, gamma = 0.5, + V.init = NULL, + max.iter = 50L, attempts = 10L, nr.proj = 500L, + logger = NULL +) { # Determine method with partial matching (shortcuts: "Weight" -> "weighted") - methods <- list( - "simple" = 0L, - "weighted" = 1L - ) - method_nr <- methods[[tolower(method), exact = FALSE]] - if (!is.integer(method_nr)) { - stop('Unable to determine method.') + method <- match.arg(method) + method_nr <- if(startsWith(method, "weighted")) 1L else 0L + + # Set default functions for ensamble methods (of indentity else) + if (is.null(func_list)) { + if (endsWith(method, "central")) { + func_list <- list( + function(Y) { q <- quantile(Y, 0.1); as.double(Y <= q) }, + function(Y) { q <- quantile(Y, c(0.1, 0.2)); as.double(q[1] < Y & Y <= q[2]) }, + function(Y) { q <- quantile(Y, c(0.2, 0.3)); as.double(q[1] < Y & Y <= q[2]) }, + function(Y) { q <- quantile(Y, c(0.3, 0.4)); as.double(q[1] < Y & Y <= q[2]) }, + function(Y) { q <- quantile(Y, c(0.4, 0.5)); as.double(q[1] < Y & Y <= q[2]) }, + function(Y) { q <- quantile(Y, c(0.5, 0.6)); as.double(q[1] < Y & Y <= q[2]) }, + function(Y) { q <- quantile(Y, c(0.6, 0.7)); as.double(q[1] < Y & Y <= q[2]) }, + function(Y) { q <- quantile(Y, c(0.7, 0.8)); as.double(q[1] < Y & Y <= q[2]) }, + function(Y) { q <- quantile(Y, c(0.8, 0.9)); as.double(q[1] < Y & Y <= q[2]) }, + function(Y) { q <- quantile(Y, 0.9); as.double(q < Y) } + ) + } else { + func_list <- list(function(Y) Y) + } } - method <- names(which(method_nr == methods)) # parameter checking if (!is.numeric(momentum) || length(momentum) > 1L) { @@ -289,10 +306,13 @@ cve.call <- function(X, Y, method = "simple", if (!is.numeric(Y)) { stop("Parameter 'Y' must be numeric.") } - if (is.matrix(Y) || !is.double(Y)) { - Y <- as.double(Y) + if (!is.double(Y)) { + storage.mode(Y) <- "double" } - if (nrow(X) != length(Y)) { + if (!is.matrix(Y)) { + Y <- as.matrix(Y) + } + if (nrow(X) != nrow(Y)) { stop("Rows of 'X' and 'Y' elements are not compatible.") } if (ncol(X) < 2) { @@ -367,6 +387,13 @@ cve.call <- function(X, Y, method = "simple", stop("Parameter 'max.iter' must be at least 1L.") } + if (!is.integer(nr.proj)) { + nr.proj <- as.integer(nr.proj) + } + if (length(nr.proj) > 1 || nr.proj < 1) { + stop("Parameter 'nr.proj' must be a single positive integer.") + } + if (is.null(V.init)) { if (!is.numeric(attempts) || length(attempts) > 1L) { stop("Parameter 'attempts' must be positive integer.") @@ -378,8 +405,16 @@ cve.call <- function(X, Y, method = "simple", } } - # Evaluate each function given `Y` and build a `n x |func_list|` matrix. + # Projective resampling of the multivariate `Y` as a `n x nr.proj` matrix. + if (ncol(Y) > 1) { + projections <- matrix(rnorm(ncol(Y) * nr.proj), nr.proj) + projections <- projections / sqrt(rowSums(projections^2)) + Y <- Y %*% t(projections) + } + # Evaluate each function given (possible projected) `Y` and build a + # `n x (|func_list| * nr.proj)` matrix. Fy <- vapply(func_list, do.call, Y, list(Y)) + dim(Fy) <- c(nrow(Fy), prod(dim(Fy)[-1])) # Convert numerical values to "double". storage.mode(X) <- storage.mode(Fy) <- "double" diff --git a/cve_tensorflow.R b/cve_tensorflow.R index 8f3bfa2..7e5d563 100644 --- a/cve_tensorflow.R +++ b/cve_tensorflow.R @@ -145,59 +145,91 @@ plot.sim <- function(sim) { pos = if(diff(stat) > 0) c("2", "4") else c("4", "2")) } -multivariate.dataset <- function(n = 100, p = 6, q = 4) { +multivariate.dataset <- function(dataset = 1, n = 100, p = 6, q = 4) { CVE <- getNamespace('CVE') - X <- matrix(rnorm(n * p), n, p) - Delta <- diag(1, q, q) - Delta[1, 2] <- Delta[2, 1] <- -0.5 - epsilon <- CVE$rmvnorm(n, sigma = Delta) - B <- matrix(0, p, q) - B[1, 1] <- 1 - B[2, 2] <- 2 / sqrt(5) - B[3, 2] <- 1 / sqrt(5) - Y <- X %*% B + epsilon + if (dataset == 1) { + Delta <- diag(1, q, q) + Delta[1, 2] <- Delta[2, 1] <- -0.5 + epsilon <- CVE$rmvnorm(n, sigma = Delta) + B <- matrix(0, p, q) + B[1, 1] <- 1 + B[2, 2] <- 2 / sqrt(5) + B[3, 2] <- 1 / sqrt(5) - list(X = X, Y = Y, B = B[, 1:2]) + Y <- X %*% B + epsilon + B <- B[, 1:2] + } + if (dataset == 2) { + B <- matrix(c(0.8, 0.6, 0, 0, 0, 0)) + eps <- matrix(0, n, q) + Delta <- diag(1, q, q) + for(i in 1:n) { + Delta[1, 2] <- Delta[2, 1] <- sin(X[i, ] %*% B) + eps[i, ] <- CVE$rmvnorm(1, sigma = Delta) + } + Y<-cbind(exp(eps[, 1]), eps[, 2:4]) + } + list(X = X, Y = Y, B = B) } set.seed(42) -reps <- 10L -sim.cve <- vector("list", reps) +reps <- 5L +sim.cve.m <- vector("list", reps) +sim.cve.c <- vector("list", reps) +sim.cve.wm <- vector("list", reps) +sim.cve.wc <- vector("list", reps) sim.tf1 <- vector("list", reps) sim.tf2 <- vector("list", reps) start <- Sys.time() for (i in 1:reps) { # ds <- dataset(1) - ds <- multivariate.dataset() + ds <- multivariate.dataset(2, n = 400) - # sim.cve[[i]] <- list( - # B.est = coef(CVE::cve.call(ds$X, ds$Y, k = ncol(ds$B)), ncol(ds$B)), + sim.cve.m[[i]] <- list( + B.est = coef(CVE::cve.call(ds$X, ds$Y, k = ncol(ds$B), method = "mean"), ncol(ds$B)), + B.true = ds$B + ) + sim.cve.c[[i]] <- list( + B.est = coef(CVE::cve.call(ds$X, ds$Y, k = ncol(ds$B), method = "central"), ncol(ds$B)), + B.true = ds$B + ) + sim.cve.wm[[i]] <- list( + B.est = coef(CVE::cve.call(ds$X, ds$Y, k = ncol(ds$B), method = "weighted.mean"), ncol(ds$B)), + B.true = ds$B + ) + sim.cve.wc[[i]] <- list( + B.est = coef(CVE::cve.call(ds$X, ds$Y, k = ncol(ds$B), method = "weighted.central"), ncol(ds$B)), + B.true = ds$B + ) + + # sim.tf1[[i]] <- list( + # B.est = cve.tf(ds$X, ds$Y, ncol(ds$B), + # optimizer_initialier = tf$optimizers$Adam)$B, + # B.true = ds$B + # ) + # sim.tf2[[i]] <- list( + # B.est = cve.tf(ds$X, ds$Y, ncol(ds$B), + # optimizer_initialier = tf$optimizers$Adam, + # method = "weighted")$B, # B.true = ds$B # ) - - sim.tf1[[i]] <- list( - B.est = cve.tf(ds$X, ds$Y, ncol(ds$B), - optimizer_initialier = tf$optimizers$Adam)$B, - B.true = ds$B - ) - - sim.tf2[[i]] <- list( - B.est = cve.tf(ds$X, ds$Y, ncol(ds$B), - optimizer_initialier = tf$optimizers$Adam, - method = "weighted")$B, - B.true = ds$B - ) cat(sprintf("\r%4d/%d -", i, reps), format(Sys.time() - start), '\n') } # pdf('subspace_comp.pdf') -par(mfrow = c(3, 1)) plot.sim(sim.cve) plot.sim(sim.tf1) plot.sim(sim.tf2) + +par(mfrow = c(2, 2)) +plot.sim(sim.cve.m) +plot.sim(sim.cve.c) +plot.sim(sim.cve.wm) +plot.sim(sim.cve.wc) + # dev.off()