add: multivariate Y (aka projective resampling)
This commit is contained in:
parent
025b9eb2af
commit
05c2aea44a
83
CVE/R/CVE.R
83
CVE/R/CVE.R
|
@ -163,7 +163,7 @@
|
||||||
#'
|
#'
|
||||||
#' @importFrom stats model.frame
|
#' @importFrom stats model.frame
|
||||||
#' @export
|
#' @export
|
||||||
cve <- function(formula, data, method = "simple", max.dim = 10L, ...) {
|
cve <- function(formula, data, method = "mean", max.dim = 10L, ...) {
|
||||||
# check for type of `data` if supplied and set default
|
# check for type of `data` if supplied and set default
|
||||||
if (missing(data)) {
|
if (missing(data)) {
|
||||||
data <- environment(formula)
|
data <- environment(formula)
|
||||||
|
@ -173,8 +173,11 @@ cve <- function(formula, data, method = "simple", max.dim = 10L, ...) {
|
||||||
|
|
||||||
# extract `X`, `Y` from `formula` with `data`
|
# extract `X`, `Y` from `formula` with `data`
|
||||||
model <- stats::model.frame(formula, data)
|
model <- stats::model.frame(formula, data)
|
||||||
X <- as.matrix(model[ ,-1L, drop = FALSE])
|
Y <- stats::model.response(model, "double")
|
||||||
Y <- as.double(model[ , 1L])
|
X <- stats::model.matrix(model)
|
||||||
|
if ("(Intercept)" %in% colnames(X)) {
|
||||||
|
X <- X[, "(Intercept)" != colnames(X), drop = FALSE]
|
||||||
|
}
|
||||||
|
|
||||||
# pass extracted data on to [cve.call()]
|
# pass extracted data on to [cve.call()]
|
||||||
dr <- cve.call(X, Y, method = method, max.dim = max.dim, ...)
|
dr <- cve.call(X, Y, method = method, max.dim = max.dim, ...)
|
||||||
|
@ -252,25 +255,39 @@ cve <- function(formula, data, method = "simple", max.dim = 10L, ...) {
|
||||||
#' coef(cve.obj.simple1, k = 1)
|
#' coef(cve.obj.simple1, k = 1)
|
||||||
#' coef(cve.obj.simple2, k = 1)
|
#' coef(cve.obj.simple2, k = 1)
|
||||||
#' @export
|
#' @export
|
||||||
cve.call <- function(X, Y, method = "simple",
|
cve.call <- function(X, Y,
|
||||||
func_list = list(function (x) x),
|
method = c("mean", "weighted.mean", "central", "weighted.central"),
|
||||||
nObs = sqrt(nrow(X)), h = NULL,
|
func_list = NULL, nObs = sqrt(nrow(X)), h = NULL,
|
||||||
min.dim = 1L, max.dim = 10L, k = NULL,
|
min.dim = 1L, max.dim = 10L, k = NULL,
|
||||||
momentum = 0.0, tau = 1.0, tol = 1e-3,
|
momentum = 0.0, tau = 1.0, tol = 1e-3,
|
||||||
slack = 0.0, gamma = 0.5,
|
slack = 0.0, gamma = 0.5,
|
||||||
V.init = NULL,
|
V.init = NULL,
|
||||||
max.iter = 50L, attempts = 10L,
|
max.iter = 50L, attempts = 10L, nr.proj = 500L,
|
||||||
logger = NULL) {
|
logger = NULL
|
||||||
|
) {
|
||||||
# Determine method with partial matching (shortcuts: "Weight" -> "weighted")
|
# Determine method with partial matching (shortcuts: "Weight" -> "weighted")
|
||||||
methods <- list(
|
method <- match.arg(method)
|
||||||
"simple" = 0L,
|
method_nr <- if(startsWith(method, "weighted")) 1L else 0L
|
||||||
"weighted" = 1L
|
|
||||||
)
|
# Set default functions for ensamble methods (of indentity else)
|
||||||
method_nr <- methods[[tolower(method), exact = FALSE]]
|
if (is.null(func_list)) {
|
||||||
if (!is.integer(method_nr)) {
|
if (endsWith(method, "central")) {
|
||||||
stop('Unable to determine method.')
|
func_list <- list(
|
||||||
|
function(Y) { q <- quantile(Y, 0.1); as.double(Y <= q) },
|
||||||
|
function(Y) { q <- quantile(Y, c(0.1, 0.2)); as.double(q[1] < Y & Y <= q[2]) },
|
||||||
|
function(Y) { q <- quantile(Y, c(0.2, 0.3)); as.double(q[1] < Y & Y <= q[2]) },
|
||||||
|
function(Y) { q <- quantile(Y, c(0.3, 0.4)); as.double(q[1] < Y & Y <= q[2]) },
|
||||||
|
function(Y) { q <- quantile(Y, c(0.4, 0.5)); as.double(q[1] < Y & Y <= q[2]) },
|
||||||
|
function(Y) { q <- quantile(Y, c(0.5, 0.6)); as.double(q[1] < Y & Y <= q[2]) },
|
||||||
|
function(Y) { q <- quantile(Y, c(0.6, 0.7)); as.double(q[1] < Y & Y <= q[2]) },
|
||||||
|
function(Y) { q <- quantile(Y, c(0.7, 0.8)); as.double(q[1] < Y & Y <= q[2]) },
|
||||||
|
function(Y) { q <- quantile(Y, c(0.8, 0.9)); as.double(q[1] < Y & Y <= q[2]) },
|
||||||
|
function(Y) { q <- quantile(Y, 0.9); as.double(q < Y) }
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
func_list <- list(function(Y) Y)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
method <- names(which(method_nr == methods))
|
|
||||||
|
|
||||||
# parameter checking
|
# parameter checking
|
||||||
if (!is.numeric(momentum) || length(momentum) > 1L) {
|
if (!is.numeric(momentum) || length(momentum) > 1L) {
|
||||||
|
@ -289,10 +306,13 @@ cve.call <- function(X, Y, method = "simple",
|
||||||
if (!is.numeric(Y)) {
|
if (!is.numeric(Y)) {
|
||||||
stop("Parameter 'Y' must be numeric.")
|
stop("Parameter 'Y' must be numeric.")
|
||||||
}
|
}
|
||||||
if (is.matrix(Y) || !is.double(Y)) {
|
if (!is.double(Y)) {
|
||||||
Y <- as.double(Y)
|
storage.mode(Y) <- "double"
|
||||||
}
|
}
|
||||||
if (nrow(X) != length(Y)) {
|
if (!is.matrix(Y)) {
|
||||||
|
Y <- as.matrix(Y)
|
||||||
|
}
|
||||||
|
if (nrow(X) != nrow(Y)) {
|
||||||
stop("Rows of 'X' and 'Y' elements are not compatible.")
|
stop("Rows of 'X' and 'Y' elements are not compatible.")
|
||||||
}
|
}
|
||||||
if (ncol(X) < 2) {
|
if (ncol(X) < 2) {
|
||||||
|
@ -367,6 +387,13 @@ cve.call <- function(X, Y, method = "simple",
|
||||||
stop("Parameter 'max.iter' must be at least 1L.")
|
stop("Parameter 'max.iter' must be at least 1L.")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!is.integer(nr.proj)) {
|
||||||
|
nr.proj <- as.integer(nr.proj)
|
||||||
|
}
|
||||||
|
if (length(nr.proj) > 1 || nr.proj < 1) {
|
||||||
|
stop("Parameter 'nr.proj' must be a single positive integer.")
|
||||||
|
}
|
||||||
|
|
||||||
if (is.null(V.init)) {
|
if (is.null(V.init)) {
|
||||||
if (!is.numeric(attempts) || length(attempts) > 1L) {
|
if (!is.numeric(attempts) || length(attempts) > 1L) {
|
||||||
stop("Parameter 'attempts' must be positive integer.")
|
stop("Parameter 'attempts' must be positive integer.")
|
||||||
|
@ -378,8 +405,16 @@ cve.call <- function(X, Y, method = "simple",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
# Evaluate each function given `Y` and build a `n x |func_list|` matrix.
|
# Projective resampling of the multivariate `Y` as a `n x nr.proj` matrix.
|
||||||
|
if (ncol(Y) > 1) {
|
||||||
|
projections <- matrix(rnorm(ncol(Y) * nr.proj), nr.proj)
|
||||||
|
projections <- projections / sqrt(rowSums(projections^2))
|
||||||
|
Y <- Y %*% t(projections)
|
||||||
|
}
|
||||||
|
# Evaluate each function given (possible projected) `Y` and build a
|
||||||
|
# `n x (|func_list| * nr.proj)` matrix.
|
||||||
Fy <- vapply(func_list, do.call, Y, list(Y))
|
Fy <- vapply(func_list, do.call, Y, list(Y))
|
||||||
|
dim(Fy) <- c(nrow(Fy), prod(dim(Fy)[-1]))
|
||||||
|
|
||||||
# Convert numerical values to "double".
|
# Convert numerical values to "double".
|
||||||
storage.mode(X) <- storage.mode(Fy) <- "double"
|
storage.mode(X) <- storage.mode(Fy) <- "double"
|
||||||
|
|
|
@ -145,59 +145,91 @@ plot.sim <- function(sim) {
|
||||||
pos = if(diff(stat) > 0) c("2", "4") else c("4", "2"))
|
pos = if(diff(stat) > 0) c("2", "4") else c("4", "2"))
|
||||||
}
|
}
|
||||||
|
|
||||||
multivariate.dataset <- function(n = 100, p = 6, q = 4) {
|
multivariate.dataset <- function(dataset = 1, n = 100, p = 6, q = 4) {
|
||||||
CVE <- getNamespace('CVE')
|
CVE <- getNamespace('CVE')
|
||||||
|
|
||||||
X <- matrix(rnorm(n * p), n, p)
|
X <- matrix(rnorm(n * p), n, p)
|
||||||
Delta <- diag(1, q, q)
|
|
||||||
Delta[1, 2] <- Delta[2, 1] <- -0.5
|
|
||||||
epsilon <- CVE$rmvnorm(n, sigma = Delta)
|
|
||||||
B <- matrix(0, p, q)
|
|
||||||
B[1, 1] <- 1
|
|
||||||
B[2, 2] <- 2 / sqrt(5)
|
|
||||||
B[3, 2] <- 1 / sqrt(5)
|
|
||||||
|
|
||||||
Y <- X %*% B + epsilon
|
if (dataset == 1) {
|
||||||
|
Delta <- diag(1, q, q)
|
||||||
|
Delta[1, 2] <- Delta[2, 1] <- -0.5
|
||||||
|
epsilon <- CVE$rmvnorm(n, sigma = Delta)
|
||||||
|
B <- matrix(0, p, q)
|
||||||
|
B[1, 1] <- 1
|
||||||
|
B[2, 2] <- 2 / sqrt(5)
|
||||||
|
B[3, 2] <- 1 / sqrt(5)
|
||||||
|
|
||||||
list(X = X, Y = Y, B = B[, 1:2])
|
Y <- X %*% B + epsilon
|
||||||
|
B <- B[, 1:2]
|
||||||
|
}
|
||||||
|
if (dataset == 2) {
|
||||||
|
B <- matrix(c(0.8, 0.6, 0, 0, 0, 0))
|
||||||
|
eps <- matrix(0, n, q)
|
||||||
|
Delta <- diag(1, q, q)
|
||||||
|
for(i in 1:n) {
|
||||||
|
Delta[1, 2] <- Delta[2, 1] <- sin(X[i, ] %*% B)
|
||||||
|
eps[i, ] <- CVE$rmvnorm(1, sigma = Delta)
|
||||||
|
}
|
||||||
|
Y<-cbind(exp(eps[, 1]), eps[, 2:4])
|
||||||
|
}
|
||||||
|
list(X = X, Y = Y, B = B)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
set.seed(42)
|
set.seed(42)
|
||||||
reps <- 10L
|
reps <- 5L
|
||||||
sim.cve <- vector("list", reps)
|
sim.cve.m <- vector("list", reps)
|
||||||
|
sim.cve.c <- vector("list", reps)
|
||||||
|
sim.cve.wm <- vector("list", reps)
|
||||||
|
sim.cve.wc <- vector("list", reps)
|
||||||
sim.tf1 <- vector("list", reps)
|
sim.tf1 <- vector("list", reps)
|
||||||
sim.tf2 <- vector("list", reps)
|
sim.tf2 <- vector("list", reps)
|
||||||
|
|
||||||
start <- Sys.time()
|
start <- Sys.time()
|
||||||
for (i in 1:reps) {
|
for (i in 1:reps) {
|
||||||
# ds <- dataset(1)
|
# ds <- dataset(1)
|
||||||
ds <- multivariate.dataset()
|
ds <- multivariate.dataset(2, n = 400)
|
||||||
|
|
||||||
# sim.cve[[i]] <- list(
|
sim.cve.m[[i]] <- list(
|
||||||
# B.est = coef(CVE::cve.call(ds$X, ds$Y, k = ncol(ds$B)), ncol(ds$B)),
|
B.est = coef(CVE::cve.call(ds$X, ds$Y, k = ncol(ds$B), method = "mean"), ncol(ds$B)),
|
||||||
|
B.true = ds$B
|
||||||
|
)
|
||||||
|
sim.cve.c[[i]] <- list(
|
||||||
|
B.est = coef(CVE::cve.call(ds$X, ds$Y, k = ncol(ds$B), method = "central"), ncol(ds$B)),
|
||||||
|
B.true = ds$B
|
||||||
|
)
|
||||||
|
sim.cve.wm[[i]] <- list(
|
||||||
|
B.est = coef(CVE::cve.call(ds$X, ds$Y, k = ncol(ds$B), method = "weighted.mean"), ncol(ds$B)),
|
||||||
|
B.true = ds$B
|
||||||
|
)
|
||||||
|
sim.cve.wc[[i]] <- list(
|
||||||
|
B.est = coef(CVE::cve.call(ds$X, ds$Y, k = ncol(ds$B), method = "weighted.central"), ncol(ds$B)),
|
||||||
|
B.true = ds$B
|
||||||
|
)
|
||||||
|
|
||||||
|
# sim.tf1[[i]] <- list(
|
||||||
|
# B.est = cve.tf(ds$X, ds$Y, ncol(ds$B),
|
||||||
|
# optimizer_initialier = tf$optimizers$Adam)$B,
|
||||||
|
# B.true = ds$B
|
||||||
|
# )
|
||||||
|
# sim.tf2[[i]] <- list(
|
||||||
|
# B.est = cve.tf(ds$X, ds$Y, ncol(ds$B),
|
||||||
|
# optimizer_initialier = tf$optimizers$Adam,
|
||||||
|
# method = "weighted")$B,
|
||||||
# B.true = ds$B
|
# B.true = ds$B
|
||||||
# )
|
# )
|
||||||
|
|
||||||
sim.tf1[[i]] <- list(
|
|
||||||
B.est = cve.tf(ds$X, ds$Y, ncol(ds$B),
|
|
||||||
optimizer_initialier = tf$optimizers$Adam)$B,
|
|
||||||
B.true = ds$B
|
|
||||||
)
|
|
||||||
|
|
||||||
sim.tf2[[i]] <- list(
|
|
||||||
B.est = cve.tf(ds$X, ds$Y, ncol(ds$B),
|
|
||||||
optimizer_initialier = tf$optimizers$Adam,
|
|
||||||
method = "weighted")$B,
|
|
||||||
B.true = ds$B
|
|
||||||
)
|
|
||||||
|
|
||||||
cat(sprintf("\r%4d/%d -", i, reps), format(Sys.time() - start), '\n')
|
cat(sprintf("\r%4d/%d -", i, reps), format(Sys.time() - start), '\n')
|
||||||
}
|
}
|
||||||
|
|
||||||
# pdf('subspace_comp.pdf')
|
# pdf('subspace_comp.pdf')
|
||||||
par(mfrow = c(3, 1))
|
|
||||||
plot.sim(sim.cve)
|
plot.sim(sim.cve)
|
||||||
plot.sim(sim.tf1)
|
plot.sim(sim.tf1)
|
||||||
plot.sim(sim.tf2)
|
plot.sim(sim.tf2)
|
||||||
|
|
||||||
|
par(mfrow = c(2, 2))
|
||||||
|
plot.sim(sim.cve.m)
|
||||||
|
plot.sim(sim.cve.c)
|
||||||
|
plot.sim(sim.cve.wm)
|
||||||
|
plot.sim(sim.cve.wc)
|
||||||
|
|
||||||
# dev.off()
|
# dev.off()
|
||||||
|
|
Loading…
Reference in New Issue