187 lines
6.8 KiB
R
187 lines
6.8 KiB
R
test_that("Tensor-Normal Score", {
|
|
# setup dimensions
|
|
n <- 11L
|
|
p <- c(3L, 5L, 4L)
|
|
q <- c(2L, 7L, 3L)
|
|
|
|
# create "true" GLM parameters
|
|
eta1 <- array(rnorm(prod(p)), dim = p)
|
|
alphas <- Map(matrix, Map(rnorm, p * q), p)
|
|
Omegas <- Map(function(pj) {
|
|
solve(0.5^abs(outer(seq_len(pj), seq_len(pj), `-`)))
|
|
}, p)
|
|
params <- list(eta1, alphas, Omegas)
|
|
|
|
# compute tensor normal parameters from GLM parameters
|
|
Deltas <- Map(solve, Omegas)
|
|
mu <- mlm(eta1, Deltas)
|
|
|
|
# sample some test data
|
|
sample.axis <- length(p) + 1L
|
|
Fy <- array(rnorm(n * prod(q)), dim = c(q, n))
|
|
X <- mlm(Fy, Map(`%*%`, Deltas, alphas)) + rtensornorm(n, mu, Deltas, sample.axis)
|
|
|
|
# Create a GLM family object
|
|
family <- make.gmlm.family("normal")
|
|
|
|
# first unpack the family object
|
|
log.likelihood <- family$log.likelihood
|
|
grad <- family$grad
|
|
|
|
# compare numeric gradient against the analytic gradient provided by the
|
|
# family at the initial parameters
|
|
grad.num <- do.call(function(eta1, alphas, Omegas) {
|
|
list(
|
|
"Dl(eta1)" = num.deriv.function(function(eta1) {
|
|
log.likelihood(X, Fy, eta1, alphas, Omegas)
|
|
}, eta1),
|
|
"Dl(alphas)" = Map(function(j) num.deriv.function(function(alpha_j) {
|
|
alphas[[j]] <- alpha_j
|
|
log.likelihood(X, Fy, eta1, alphas, Omegas)
|
|
}, alphas[[j]]), seq_along(alphas)),
|
|
"Dl(Omegas)" = Map(function(j) num.deriv.function(function(Omega_j) {
|
|
Omegas[[j]] <- Omega_j
|
|
log.likelihood(X, Fy, eta1, alphas, Omegas)
|
|
}, Omegas[[j]], sym = TRUE), seq_along(Omegas))
|
|
)
|
|
}, params)
|
|
grad.ana <- do.call(grad, c(list(X, Fy), params))
|
|
|
|
expect_equal(RMap(c, grad.ana), grad.num, tolerance = 1e-6)
|
|
|
|
# test for correct dimensions
|
|
ana.dims <- list(
|
|
"Dl(eta1)" = p,
|
|
"Dl(alphas)" = Map(c, p, q),
|
|
"Dl(Omegas)" = Map(c, p, p)
|
|
)
|
|
|
|
expect_identical(RMap(dim, grad.ana), ana.dims)
|
|
})
|
|
|
|
test_that("Tensor-Normal moments of the sufficient statistic t(X)", {
|
|
# config
|
|
# # (estimated) sample size for sample estimates
|
|
# n <- 250000 # sample estimates are too unreliable for testing purposes
|
|
p <- c(2L, 3L)
|
|
r <- length(p)
|
|
|
|
# setup tensor normal parameters (NO dependence of Fy, set to zero)
|
|
mu <- array(runif(prod(p), min = -0.5, max = 0.5), dim = p) # = mu_y
|
|
Deltas <- Map(function(pj) 0.5^abs(outer(1:pj, 1:pj, `-`)), p)
|
|
|
|
# compute GMLM parameters (NO alphas as Fy is zero)
|
|
Omegas <- Map(solve, Deltas)
|
|
eta1 <- mlm(mu, Omegas)
|
|
|
|
# natural parameters of the tensor normal
|
|
eta_y1 <- eta1 # + mlm(Fy, alphas) = + 0
|
|
eta_y2 <- -0.5 * Reduce(`%x%`, rev(Omegas))
|
|
|
|
# # draw a sample
|
|
# X <- rtensornorm(n, mu, Deltas, r + 1L)
|
|
|
|
# tensor normal log-partition function given eta_y
|
|
log.partition <- function(eta_y1, eta_y2) {
|
|
# eta_y1 as "model" matrix of dimensions `n by p` where `n` might be `1`
|
|
if (length(eta_y1)^2 == length(eta_y2)) {
|
|
pp <- length(eta_y1)
|
|
dim(eta_y1) <- c(1L, pp)
|
|
} else {
|
|
eta_y1 <- mat(eta_y1, r + 1L)
|
|
pp <- ncol(eta_y1)
|
|
}
|
|
|
|
# treat eta_y2 as square matrix
|
|
if (!is.matrix(eta_y2)) {
|
|
dim(eta_y2) <- c(pp, pp)
|
|
}
|
|
|
|
# evaluate log-partiton function in terms of natural parameters
|
|
-0.25 * pp * mean((eta_y1 %*% solve(eta_y2)) * eta_y1) - 0.5 * log(det(-2 * eta_y2))
|
|
}
|
|
|
|
|
|
# (analytic) first and second (un-centered) moment of the tensor normal
|
|
# Eti = E[ti(X) | Y = y]
|
|
Et1.ana <- mu
|
|
Et2.ana <- Reduce(`%x%`, rev(Deltas)) + outer(c(mu), c(mu))
|
|
|
|
# (numeric) derivative of the log-partition function with respect to the natural
|
|
# parameters of the exponential family form of the tensor normal
|
|
Et1.num <- num.deriv(log.partition(eta_y1, eta_y2), eta_y1)
|
|
Et2.num <- num.deriv(log.partition(eta_y1, eta_y2), eta_y2)
|
|
|
|
# # (estimated) estimate from sample
|
|
# Et1.est <- rowMeans(X, dims = r)
|
|
# Et2.est <- colMeans(rowKronecker(mat(X, r + 1L), mat(X, r + 1L)))
|
|
|
|
# (analytic) convariance blocks of the sufficient statistic
|
|
# Ctij = Cov(ti(X), tj(X) | Y = y) for i, j = 1, 2
|
|
Ct11.ana <- Reduce(`%x%`, rev(Deltas))
|
|
Ct12.ana <- local({
|
|
# Analytic solution / `vec(mu %x% Delta) + vec(mu' %x% Delta)`
|
|
ct12 <- c(mu) %x% Reduce(`%x%`, rev(Deltas))
|
|
# and symmetrize!
|
|
dim(ct12) <- rep(prod(p), 3)
|
|
ct12 + aperm(ct12, c(3, 1, 2))
|
|
})
|
|
Ct22.ana <- local({
|
|
Delta <- Reduce(`%x%`, rev(Deltas)) # Ct11.ana
|
|
ct22 <- (2 * Delta + 4 * outer(c(mu), c(mu))) %x% Delta
|
|
|
|
# TODO: What does this symmetrization do exactly? And why?!
|
|
dim(ct22) <- rep(prod(p), 4)
|
|
(1 / 4) * (
|
|
aperm(ct22, c(2, 3, 1, 4)) +
|
|
aperm(ct22, c(2, 3, 4, 1)) +
|
|
aperm(ct22, c(3, 2, 1, 4)) +
|
|
aperm(ct22, c(3, 2, 4, 1))
|
|
)
|
|
})
|
|
|
|
# (numeric) second derivative of the log-partition function
|
|
Ct11.num <- num.deriv2(function(eta_y1) log.partition(eta_y1, eta_y2), eta_y1)
|
|
Ct12.num <- num.deriv2(log.partition, eta_y1, eta_y2)
|
|
Ct22.num <- local({
|
|
ct22 <- num.deriv2(function(eta_y2) log.partition(eta_y1, eta_y2), eta_y2)
|
|
# TODO: Why does this need to be symmetrized?!
|
|
dim(ct22) <- rep(prod(p), 4)
|
|
(1 / 4) * (
|
|
aperm(ct22, c(3, 4, 2, 1)) +
|
|
aperm(ct22, c(4, 3, 2, 1)) +
|
|
aperm(ct22, c(3, 4, 1, 2)) +
|
|
aperm(ct22, c(4, 3, 1, 2))
|
|
)
|
|
})
|
|
|
|
# # (estimated) sample estimates of the sufficient statistic convariance blocks
|
|
# T1 <- mat(X, r + 1L)
|
|
# T2 <- rowKronecker(T1, T1)
|
|
# Ct11.est <- cov(T1)
|
|
# Ct12.est <- cov(T1, T2)
|
|
# Ct22.est <- cov(T2, T2)
|
|
|
|
|
|
tolerance <- 1e-3
|
|
expect_equal(c(Et1.ana), c(Et1.num), tolerance = tolerance)
|
|
# expect_equal(c(Et1.ana), c(Et1.est), tolerance = 0.05, scale = 1)
|
|
# expect_equal(c(Et1.num), c(Et1.est), tolerance = 0.05, scale = 1)
|
|
|
|
expect_equal(c(Et2.ana), c(Et2.num), tolerance = tolerance)
|
|
# expect_equal(c(Et2.ana), c(Et2.est), tolerance = 0.05, scale = 1)
|
|
# expect_equal(c(Et2.num), c(Et2.est), tolerance = 0.05, scale = 1)
|
|
|
|
expect_equal(c(Ct11.ana), c(Ct11.num), tolerance = tolerance)
|
|
# expect_equal(c(Ct11.ana), c(Ct11.est), tolerance = 0.05, scale = 1)
|
|
# expect_equal(c(Ct11.num), c(Ct11.est), tolerance = 0.05, scale = 1)
|
|
|
|
expect_equal(c(Ct12.ana), c(Ct12.num), tolerance = tolerance)
|
|
# expect_equal(c(Ct12.ana), c(Ct12.est), tolerance = 0.05, scale = 1)
|
|
# expect_equal(c(Ct12.num), c(Ct12.est), tolerance = 0.05, scale = 1)
|
|
|
|
expect_equal(c(Ct22.ana), c(Ct22.num), tolerance = tolerance)
|
|
# expect_equal(c(Ct22.ana), c(Ct22.est), tolerance = 0.05, scale = 1)
|
|
# expect_equal(c(Ct22.num), c(Ct22.est), tolerance = 0.05, scale = 1)
|
|
})
|