add: multivariate response to tensorflow wip.

2020-09-11 19:30:35 +02:00 · 2020-09-11 19:30:35 +02:00 · 025b9eb2af
commit 025b9eb2af
parent 4a950d6df2
1 changed files with 50 additions and 16 deletions
--- a/cve_tensorflow.R
+++ b/cve_tensorflow.R
@ -43,6 +43,7 @@ tf_constant <- function(obj, dtype = "float32", ...) {
 cve.tf <- function(X, Y, k, h = estimate.bandwidth(X, k, sqrt(nrow(X))),
    V.init = NULL, optimizer_initialier = tf$optimizers$RMSprop, attempts = 10L,
    nr.projections = nrow(X)^(3 / 2),
    sd_noise = 0, method = c("simple", "weighted")
 ) {
    method <- match.arg(method)
@ -55,15 +56,25 @@ cve.tf <- function(X, Y, k, h = estimate.bandwidth(X, k, sqrt(nrow(X))),
    k <- as.integer(k)
    q <- p - k
    if (!is.matrix(Y))
        Y <- as.matrix(Y)
    # Projective resampling.
    if (ncol(Y) > 1L) {
        R <- matrix(rnorm(ncol(Y) * nr.projections), ncol(Y))
        R <- t(t(R) / sqrt(colSums(R^2)))
        Y <- Y %*% R
    }
    X <- tf_constant(scale(X))
-    Y <- tf_constant(scale(as.matrix(Y)))
+    Y <- tf_constant(scale(Y))
    I <- tf_constant(diag(1, p))
    h <- tf_Variable(h)
    loss <- tf_function(function(V) {
        Q <- I - tf$matmul(V, V, transpose_b = TRUE)
        if (sd_noise > 0)
-            XQ <- tf$matmul(X + tf$random$normal(list(n, p), stddev = 0.05), Q)
+            XQ <- tf$matmul(X + tf$random$normal(list(n, p), stddev = sd_noise), Q)
        else
            XQ <- tf$matmul(X, Q)
        S <- tf$matmul(XQ, XQ, transpose_b = TRUE)
@ -76,9 +87,10 @@ cve.tf <- function(X, Y, k, h = estimate.bandwidth(X, k, sqrt(nrow(X))),
        if (method == "simple") {
            l <- tf$reduce_mean(y2 - tf$pow(y1, 2L))
        } else {# weighted
-            w <- tf$reduce_sum(K, 1L, keepdims = TRUE) - `1` # TODO: check/fix
+            w <- w - `1`
            w <- w / tf$reduce_sum(w)
            l <- tf$reduce_sum(w * (y2 - tf$pow(y1, 2L)))
            l <- l / tf$cast(tf$shape(Y)[2], "float32")
        }
        l
    })
@ -129,36 +141,58 @@ plot.sim <- function(sim) {
    lines(density(ssd, from = 0, to = 1))
    stat <- c(Median = median(ssd), Mean = mean(ssd))
    abline(v = stat, lty = 2)
-    text(stat, 1.02 * max(h$density), names(stat),
+    text(stat, max(h$density), names(stat),
         pos = if(diff(stat) > 0) c("2", "4") else c("4", "2"))
 }
 multivariate.dataset <- function(n = 100, p = 6, q = 4) {
    CVE <- getNamespace('CVE')
    X <- matrix(rnorm(n * p), n, p)
    Delta <- diag(1, q, q)
    Delta[1, 2] <- Delta[2, 1] <- -0.5
    epsilon <- CVE$rmvnorm(n, sigma = Delta)
    B <- matrix(0, p, q)
    B[1, 1] <- 1
    B[2, 2] <- 2 / sqrt(5)
    B[3, 2] <- 1 / sqrt(5)
    Y <- X %*% B + epsilon
    list(X = X, Y = Y, B = B[, 1:2])
 }
 set.seed(42)
-sim.cve <- vector("list", 100)
+reps <- 10L
-sim.tf1  <- vector("list", 100)
+sim.cve <- vector("list", reps)
-sim.tf2  <- vector("list", 100)
+sim.tf1  <- vector("list", reps)
 sim.tf2  <- vector("list", reps)
 start <- Sys.time()
-for (i in 1:100) {
+for (i in 1:reps) {
-    ds <- dataset(1)
+    # ds <- dataset(1)
    ds <- multivariate.dataset()
-    sim.cve[[i]] <- list(
+    # sim.cve[[i]] <- list(
-        B.est = coef(CVE::cve.call(ds$X, ds$Y, k = ncol(ds$B)), ncol(ds$B)),
+    #     B.est = coef(CVE::cve.call(ds$X, ds$Y, k = ncol(ds$B)), ncol(ds$B)),
-        B.true = ds$B
+    #     B.true = ds$B
-    )
+    # )
    sim.tf1[[i]] <- list(
-        B.est = cve.tf(ds$X, ds$Y, ncol(ds$B))$B,
+        B.est = cve.tf(ds$X, ds$Y, ncol(ds$B),
            optimizer_initialier = tf$optimizers$Adam)$B,
        B.true = ds$B
    )
    sim.tf2[[i]] <- list(
-        B.est = cve.tf(ds$X, ds$Y, ncol(ds$B), sd_noise = 0.05)$B,
+        B.est = cve.tf(ds$X, ds$Y, ncol(ds$B),
            optimizer_initialier = tf$optimizers$Adam,
            method = "weighted")$B,
        B.true = ds$B
    )
-    cat(sprintf("\r%4d/100 -", i), format(Sys.time() - start), '\n')
+    cat(sprintf("\r%4d/%d -", i, reps), format(Sys.time() - start), '\n')
 }
 # pdf('subspace_comp.pdf')