tensor_predictors/tensorPredictors/tests/testthat/test-TensorNormalFamily.R

187 lines
6.8 KiB
R

test_that("Tensor-Normal Score", {
# setup dimensions
n <- 11L
p <- c(3L, 5L, 4L)
q <- c(2L, 7L, 3L)
# create "true" GLM parameters
eta1 <- array(rnorm(prod(p)), dim = p)
alphas <- Map(matrix, Map(rnorm, p * q), p)
Omegas <- Map(function(pj) {
solve(0.5^abs(outer(seq_len(pj), seq_len(pj), `-`)))
}, p)
params <- list(eta1, alphas, Omegas)
# compute tensor normal parameters from GLM parameters
Deltas <- Map(solve, Omegas)
mu <- mlm(eta1, Deltas)
# sample some test data
sample.axis <- length(p) + 1L
Fy <- array(rnorm(n * prod(q)), dim = c(q, n))
X <- mlm(Fy, Map(`%*%`, Deltas, alphas)) + rtensornorm(n, mu, Deltas, sample.axis)
# Create a GLM family object
family <- make.gmlm.family("normal")
# first unpack the family object
log.likelihood <- family$log.likelihood
grad <- family$grad
# compare numeric gradient against the analytic gradient provided by the
# family at the initial parameters
grad.num <- do.call(function(eta1, alphas, Omegas) {
list(
"Dl(eta1)" = num.deriv.function(function(eta1) {
log.likelihood(X, Fy, eta1, alphas, Omegas)
}, eta1),
"Dl(alphas)" = Map(function(j) num.deriv.function(function(alpha_j) {
alphas[[j]] <- alpha_j
log.likelihood(X, Fy, eta1, alphas, Omegas)
}, alphas[[j]]), seq_along(alphas)),
"Dl(Omegas)" = Map(function(j) num.deriv.function(function(Omega_j) {
Omegas[[j]] <- Omega_j
log.likelihood(X, Fy, eta1, alphas, Omegas)
}, Omegas[[j]], sym = TRUE), seq_along(Omegas))
)
}, params)
grad.ana <- do.call(grad, c(list(X, Fy), params))
expect_equal(RMap(c, grad.ana), grad.num, tolerance = 1e-6)
# test for correct dimensions
ana.dims <- list(
"Dl(eta1)" = p,
"Dl(alphas)" = Map(c, p, q),
"Dl(Omegas)" = Map(c, p, p)
)
expect_identical(RMap(dim, grad.ana), ana.dims)
})
test_that("Tensor-Normal moments of the sufficient statistic t(X)", {
# config
# # (estimated) sample size for sample estimates
# n <- 250000 # sample estimates are too unreliable for testing purposes
p <- c(2L, 3L)
r <- length(p)
# setup tensor normal parameters (NO dependence of Fy, set to zero)
mu <- array(runif(prod(p), min = -0.5, max = 0.5), dim = p) # = mu_y
Deltas <- Map(function(pj) 0.5^abs(outer(1:pj, 1:pj, `-`)), p)
# compute GMLM parameters (NO alphas as Fy is zero)
Omegas <- Map(solve, Deltas)
eta1 <- mlm(mu, Omegas)
# natural parameters of the tensor normal
eta_y1 <- eta1 # + mlm(Fy, alphas) = + 0
eta_y2 <- -0.5 * Reduce(`%x%`, rev(Omegas))
# # draw a sample
# X <- rtensornorm(n, mu, Deltas, r + 1L)
# tensor normal log-partition function given eta_y
log.partition <- function(eta_y1, eta_y2) {
# eta_y1 as "model" matrix of dimensions `n by p` where `n` might be `1`
if (length(eta_y1)^2 == length(eta_y2)) {
pp <- length(eta_y1)
dim(eta_y1) <- c(1L, pp)
} else {
eta_y1 <- mat(eta_y1, r + 1L)
pp <- ncol(eta_y1)
}
# treat eta_y2 as square matrix
if (!is.matrix(eta_y2)) {
dim(eta_y2) <- c(pp, pp)
}
# evaluate log-partiton function in terms of natural parameters
-0.25 * pp * mean((eta_y1 %*% solve(eta_y2)) * eta_y1) - 0.5 * log(det(-2 * eta_y2))
}
# (analytic) first and second (un-centered) moment of the tensor normal
# Eti = E[ti(X) | Y = y]
Et1.ana <- mu
Et2.ana <- Reduce(`%x%`, rev(Deltas)) + outer(c(mu), c(mu))
# (numeric) derivative of the log-partition function with respect to the natural
# parameters of the exponential family form of the tensor normal
Et1.num <- num.deriv(log.partition(eta_y1, eta_y2), eta_y1)
Et2.num <- num.deriv(log.partition(eta_y1, eta_y2), eta_y2)
# # (estimated) estimate from sample
# Et1.est <- rowMeans(X, dims = r)
# Et2.est <- colMeans(rowKronecker(mat(X, r + 1L), mat(X, r + 1L)))
# (analytic) convariance blocks of the sufficient statistic
# Ctij = Cov(ti(X), tj(X) | Y = y) for i, j = 1, 2
Ct11.ana <- Reduce(`%x%`, rev(Deltas))
Ct12.ana <- local({
# Analytic solution / `vec(mu %x% Delta) + vec(mu' %x% Delta)`
ct12 <- c(mu) %x% Reduce(`%x%`, rev(Deltas))
# and symmetrize!
dim(ct12) <- rep(prod(p), 3)
ct12 + aperm(ct12, c(3, 1, 2))
})
Ct22.ana <- local({
Delta <- Reduce(`%x%`, rev(Deltas)) # Ct11.ana
ct22 <- (2 * Delta + 4 * outer(c(mu), c(mu))) %x% Delta
# TODO: What does this symmetrization do exactly? And why?!
dim(ct22) <- rep(prod(p), 4)
(1 / 4) * (
aperm(ct22, c(2, 3, 1, 4)) +
aperm(ct22, c(2, 3, 4, 1)) +
aperm(ct22, c(3, 2, 1, 4)) +
aperm(ct22, c(3, 2, 4, 1))
)
})
# (numeric) second derivative of the log-partition function
Ct11.num <- num.deriv2(function(eta_y1) log.partition(eta_y1, eta_y2), eta_y1)
Ct12.num <- num.deriv2(log.partition, eta_y1, eta_y2)
Ct22.num <- local({
ct22 <- num.deriv2(function(eta_y2) log.partition(eta_y1, eta_y2), eta_y2)
# TODO: Why does this need to be symmetrized?!
dim(ct22) <- rep(prod(p), 4)
(1 / 4) * (
aperm(ct22, c(3, 4, 2, 1)) +
aperm(ct22, c(4, 3, 2, 1)) +
aperm(ct22, c(3, 4, 1, 2)) +
aperm(ct22, c(4, 3, 1, 2))
)
})
# # (estimated) sample estimates of the sufficient statistic convariance blocks
# T1 <- mat(X, r + 1L)
# T2 <- rowKronecker(T1, T1)
# Ct11.est <- cov(T1)
# Ct12.est <- cov(T1, T2)
# Ct22.est <- cov(T2, T2)
tolerance <- 1e-3
expect_equal(c(Et1.ana), c(Et1.num), tolerance = tolerance)
# expect_equal(c(Et1.ana), c(Et1.est), tolerance = 0.05, scale = 1)
# expect_equal(c(Et1.num), c(Et1.est), tolerance = 0.05, scale = 1)
expect_equal(c(Et2.ana), c(Et2.num), tolerance = tolerance)
# expect_equal(c(Et2.ana), c(Et2.est), tolerance = 0.05, scale = 1)
# expect_equal(c(Et2.num), c(Et2.est), tolerance = 0.05, scale = 1)
expect_equal(c(Ct11.ana), c(Ct11.num), tolerance = tolerance)
# expect_equal(c(Ct11.ana), c(Ct11.est), tolerance = 0.05, scale = 1)
# expect_equal(c(Ct11.num), c(Ct11.est), tolerance = 0.05, scale = 1)
expect_equal(c(Ct12.ana), c(Ct12.num), tolerance = tolerance)
# expect_equal(c(Ct12.ana), c(Ct12.est), tolerance = 0.05, scale = 1)
# expect_equal(c(Ct12.num), c(Ct12.est), tolerance = 0.05, scale = 1)
expect_equal(c(Ct22.ana), c(Ct22.num), tolerance = tolerance)
# expect_equal(c(Ct22.ana), c(Ct22.est), tolerance = 0.05, scale = 1)
# expect_equal(c(Ct22.num), c(Ct22.est), tolerance = 0.05, scale = 1)
})