test_that("Tensor-Normal Score", { # setup dimensions n <- 11L p <- c(3L, 5L, 4L) q <- c(2L, 7L, 3L) # create "true" GLM parameters eta1 <- array(rnorm(prod(p)), dim = p) alphas <- Map(matrix, Map(rnorm, p * q), p) Omegas <- Map(function(pj) { solve(0.5^abs(outer(seq_len(pj), seq_len(pj), `-`))) }, p) params <- list(eta1, alphas, Omegas) # compute tensor normal parameters from GLM parameters Deltas <- Map(solve, Omegas) mu <- mlm(eta1, Deltas) # sample some test data sample.axis <- length(p) + 1L Fy <- array(rnorm(n * prod(q)), dim = c(q, n)) X <- mlm(Fy, Map(`%*%`, Deltas, alphas)) + rtensornorm(n, mu, Deltas, sample.axis) # Create a GLM family object family <- make.gmlm.family("normal") # first unpack the family object log.likelihood <- family$log.likelihood grad <- family$grad # compare numeric gradient against the analytic gradient provided by the # family at the initial parameters grad.num <- do.call(function(eta1, alphas, Omegas) { list( "Dl(eta1)" = num.deriv.function(function(eta1) { log.likelihood(X, Fy, eta1, alphas, Omegas) }, eta1), "Dl(alphas)" = Map(function(j) num.deriv.function(function(alpha_j) { alphas[[j]] <- alpha_j log.likelihood(X, Fy, eta1, alphas, Omegas) }, alphas[[j]]), seq_along(alphas)), "Dl(Omegas)" = Map(function(j) num.deriv.function(function(Omega_j) { Omegas[[j]] <- Omega_j log.likelihood(X, Fy, eta1, alphas, Omegas) }, Omegas[[j]], sym = TRUE), seq_along(Omegas)) ) }, params) grad.ana <- do.call(grad, c(list(X, Fy), params)) expect_equal(RMap(c, grad.ana), grad.num, tolerance = 1e-6) # test for correct dimensions ana.dims <- list( "Dl(eta1)" = p, "Dl(alphas)" = Map(c, p, q), "Dl(Omegas)" = Map(c, p, p) ) expect_identical(RMap(dim, grad.ana), ana.dims) }) test_that("Tensor-Normal moments of the sufficient statistic t(X)", { # config # # (estimated) sample size for sample estimates # n <- 250000 # sample estimates are too unreliable for testing purposes p <- c(2L, 3L) r <- length(p) # setup tensor normal parameters (NO dependence of Fy, set to zero) mu <- array(runif(prod(p), min = -0.5, max = 0.5), dim = p) # = mu_y Deltas <- Map(function(pj) 0.5^abs(outer(1:pj, 1:pj, `-`)), p) # compute GMLM parameters (NO alphas as Fy is zero) Omegas <- Map(solve, Deltas) eta1 <- mlm(mu, Omegas) # natural parameters of the tensor normal eta_y1 <- eta1 # + mlm(Fy, alphas) = + 0 eta_y2 <- -0.5 * Reduce(`%x%`, rev(Omegas)) # # draw a sample # X <- rtensornorm(n, mu, Deltas, r + 1L) # tensor normal log-partition function given eta_y log.partition <- function(eta_y1, eta_y2) { # eta_y1 as "model" matrix of dimensions `n by p` where `n` might be `1` if (length(eta_y1)^2 == length(eta_y2)) { pp <- length(eta_y1) dim(eta_y1) <- c(1L, pp) } else { eta_y1 <- mat(eta_y1, r + 1L) pp <- ncol(eta_y1) } # treat eta_y2 as square matrix if (!is.matrix(eta_y2)) { dim(eta_y2) <- c(pp, pp) } # evaluate log-partiton function in terms of natural parameters -0.25 * pp * mean((eta_y1 %*% solve(eta_y2)) * eta_y1) - 0.5 * log(det(-2 * eta_y2)) } # (analytic) first and second (un-centered) moment of the tensor normal # Eti = E[ti(X) | Y = y] Et1.ana <- mu Et2.ana <- Reduce(`%x%`, rev(Deltas)) + outer(c(mu), c(mu)) # (numeric) derivative of the log-partition function with respect to the natural # parameters of the exponential family form of the tensor normal Et1.num <- num.deriv(log.partition(eta_y1, eta_y2), eta_y1) Et2.num <- num.deriv(log.partition(eta_y1, eta_y2), eta_y2) # # (estimated) estimate from sample # Et1.est <- rowMeans(X, dims = r) # Et2.est <- colMeans(rowKronecker(mat(X, r + 1L), mat(X, r + 1L))) # (analytic) convariance blocks of the sufficient statistic # Ctij = Cov(ti(X), tj(X) | Y = y) for i, j = 1, 2 Ct11.ana <- Reduce(`%x%`, rev(Deltas)) Ct12.ana <- local({ # Analytic solution / `vec(mu %x% Delta) + vec(mu' %x% Delta)` ct12 <- c(mu) %x% Reduce(`%x%`, rev(Deltas)) # and symmetrize! dim(ct12) <- rep(prod(p), 3) ct12 + aperm(ct12, c(3, 1, 2)) }) Ct22.ana <- local({ Delta <- Reduce(`%x%`, rev(Deltas)) # Ct11.ana ct22 <- (2 * Delta + 4 * outer(c(mu), c(mu))) %x% Delta # TODO: What does this symmetrization do exactly? And why?! dim(ct22) <- rep(prod(p), 4) (1 / 4) * ( aperm(ct22, c(2, 3, 1, 4)) + aperm(ct22, c(2, 3, 4, 1)) + aperm(ct22, c(3, 2, 1, 4)) + aperm(ct22, c(3, 2, 4, 1)) ) }) # (numeric) second derivative of the log-partition function Ct11.num <- num.deriv2(function(eta_y1) log.partition(eta_y1, eta_y2), eta_y1) Ct12.num <- num.deriv2(log.partition, eta_y1, eta_y2) Ct22.num <- local({ ct22 <- num.deriv2(function(eta_y2) log.partition(eta_y1, eta_y2), eta_y2) # TODO: Why does this need to be symmetrized?! dim(ct22) <- rep(prod(p), 4) (1 / 4) * ( aperm(ct22, c(3, 4, 2, 1)) + aperm(ct22, c(4, 3, 2, 1)) + aperm(ct22, c(3, 4, 1, 2)) + aperm(ct22, c(4, 3, 1, 2)) ) }) # # (estimated) sample estimates of the sufficient statistic convariance blocks # T1 <- mat(X, r + 1L) # T2 <- rowKronecker(T1, T1) # Ct11.est <- cov(T1) # Ct12.est <- cov(T1, T2) # Ct22.est <- cov(T2, T2) tolerance <- 1e-3 expect_equal(c(Et1.ana), c(Et1.num), tolerance = tolerance) # expect_equal(c(Et1.ana), c(Et1.est), tolerance = 0.05, scale = 1) # expect_equal(c(Et1.num), c(Et1.est), tolerance = 0.05, scale = 1) expect_equal(c(Et2.ana), c(Et2.num), tolerance = tolerance) # expect_equal(c(Et2.ana), c(Et2.est), tolerance = 0.05, scale = 1) # expect_equal(c(Et2.num), c(Et2.est), tolerance = 0.05, scale = 1) expect_equal(c(Ct11.ana), c(Ct11.num), tolerance = tolerance) # expect_equal(c(Ct11.ana), c(Ct11.est), tolerance = 0.05, scale = 1) # expect_equal(c(Ct11.num), c(Ct11.est), tolerance = 0.05, scale = 1) expect_equal(c(Ct12.ana), c(Ct12.num), tolerance = tolerance) # expect_equal(c(Ct12.ana), c(Ct12.est), tolerance = 0.05, scale = 1) # expect_equal(c(Ct12.num), c(Ct12.est), tolerance = 0.05, scale = 1) expect_equal(c(Ct22.ana), c(Ct22.num), tolerance = tolerance) # expect_equal(c(Ct22.ana), c(Ct22.est), tolerance = 0.05, scale = 1) # expect_equal(c(Ct22.num), c(Ct22.est), tolerance = 0.05, scale = 1) })