ising_conditional_fit <- function(X, Y, ..., callback = NULL) { # get and check dimensions n <- if (is.null(nrow(Y))) length(Y) else nrow(Y) p <- ncol(X) q <- if (is.null(ncol(Y))) attr(Y, "p") else ncol(Y) # check dimensions stopifnot(nrow(X) == n) ### Initial value estimate # SVD of the predictor covariance estimate `Sigma = U_Sigma D_Sigma U_Sigma'` SigmaSVD <- La.svd(cov(X), min(p, q), 0) # Estimate `pi` as the single and two way effect means (approx conditional # probabilities through the marginal probability estimate) pi <- mean.mvbinary(Y, twoway = TRUE) # convert conditional probabilities into natural parameters (log-odds) theta <- ising_theta_from_cond_prob(pi) # convert natural parameters `theta` to square matrix form `Theta` Theta <- matrix(NA, q, q) Theta[lower.tri(diag(q), diag = TRUE)] <- theta Theta[upper.tri(diag(q))] <- t(Theta)[upper.tri(diag(q))] Theta <- (0.5 + diag(0.5, q, q)) * Theta # SVD of `Theta` ThetaSVD <- La.svd(Theta, min(p, q), 0) # Finally, initial `alpha` parameter estimate # `alpha_0 = U_Sigma D_Sigma^-1/2 D_Theta^1/2 U_Theta'` alpha <- with(list(S = SigmaSVD, T = ThetaSVD), { S$u %*% diag(sqrt(T$d[seq_len(min(p, q))] / S$d[seq_len(min(p, q))])) %*% t(T$u) }) ### Optimize log-likelihood for `alpha` tensorPredictors::NAGD( fun.loss = function(alpha) -ising_conditional_log_likelihood(alpha, X, Y), fun.grad = function(alpha) -ising_conditional_score(alpha, X, Y), params = alpha, ..., callback = callback ) } n <- 1000 p <- 7 q <- 9 alpha.true <- matrix(rnorm(p * q), p) X <- matrix(runif(n * p), n) theta <- function(alpha, x) { Theta <- crossprod(crossprod(x, alpha)) diag(Theta) <- 0.5 * diag(Theta) 2 * Theta[lower.tri(diag(ncol(alpha)), diag = TRUE)] } # sample Y ~ P( . | X = x) for x in X Y <- apply(X, 1, function(x) ising_sample(1, theta(alpha.true, x))) attr(Y, "p") <- as.integer(q) alpha.est <- ising_conditional_fit(X, Y, callback = function(iter, alpha) { cat(sprintf( "%4d - diff: %12.4f - ll: %12.4f\n", iter, min(norm(alpha - alpha.true, "F"), norm(alpha + alpha.true, "F")), ising_conditional_log_likelihood(alpha, X, Y) )) }) ## par(mfrow = c(3, 3), mar = c(2, 2, 1, 1)) for (i in 1:9) { tensorPredictors::matrixImage( flip(vech.inv(theta(alpha.true, X[i, ]))), main = paste(round(range(theta(alpha.true, X[i, ])), 3), collapse = " ") ) } par(mfrow = c(3, 3), mar = c(2, 2, 1, 1)) for (i in 1:9) { P <- flip(vech.inv(ising_cond_probs(theta(alpha.true, X[i, ])))) tensorPredictors::matrixImage( round(P, 5), P < .Machine$double.eps | 1 < P + .Machine$double.eps ) } par(mfrow = c(3, 3), mar = c(2, 2, 1, 1)) for (i in 1:3) { tensorPredictors::matrixImage( alpha.true ) tensorPredictors::matrixImage( flip(vech.inv(theta(alpha.true, X[i, ]))), main = paste(round(range(theta(alpha.true, X[i, ])), 3), collapse = " ") ) P <- flip(vech.inv(ising_cond_probs(theta(alpha.true, X[i, ])))) tensorPredictors::matrixImage( round(P, 5), P < .Machine$double.eps | 1 < P + .Machine$double.eps ) }