364 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			R
		
	
	
	
	
	
			
		
		
	
	
			364 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			R
		
	
	
	
	
	
library(mvbernoulli)
 | 
						|
 | 
						|
printMVBinary <- function(Y) {
 | 
						|
    Y <- array(as.integer(Y), dim = dim(Y))
 | 
						|
    eventIndex <- seq_len(nrow(Y))
 | 
						|
    eventNr <- apply(Y, 1, function(y) sum(y * 2^(rev(seq_len(p)) - 1)))
 | 
						|
    dimnames(Y) <- list(
 | 
						|
        "Index/Event" = paste(eventIndex, eventNr, sep = "/"),
 | 
						|
        "Bit Index" = as.character(rev(seq_len(p)) - 1)
 | 
						|
    )
 | 
						|
    print.table(Y, zero.print = ".")
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
n <- 100
 | 
						|
p <- 6
 | 
						|
 | 
						|
(theta <- rnorm(p * (p + 1) / 2))
 | 
						|
 | 
						|
pi <- ising_cond_probs(theta)
 | 
						|
all.equal(
 | 
						|
    theta,
 | 
						|
    ising_theta_from_cond_prob(pi)
 | 
						|
)
 | 
						|
 | 
						|
tensorPredictors::matrixImage({
 | 
						|
    Theta <- matrix(NA, p, p)
 | 
						|
    Theta[lower.tri(Theta, diag = TRUE)] <- theta
 | 
						|
    Theta[upper.tri(Theta)] <- t(Theta)[upper.tri(Theta)]
 | 
						|
    Theta
 | 
						|
}, main = expression(paste("natural Params ", Theta)))
 | 
						|
tensorPredictors::matrixImage({
 | 
						|
    PI <- matrix(NA, p, p)
 | 
						|
    PI[lower.tri(PI, diag = TRUE)] <- ising_cond_probs(theta)
 | 
						|
    PI[upper.tri(PI)] <- t(PI)[upper.tri(PI)]
 | 
						|
    PI
 | 
						|
}, main = expression(paste("Conditional Probs. P(", Y[i], " = ", Y[j], " = 1", " | ", Y[-i - j], " = ", 0, ")")))
 | 
						|
tensorPredictors::matrixImage({
 | 
						|
    MAR <- matrix(NA, p, p)
 | 
						|
    MAR[lower.tri(MAR, diag = TRUE)] <- ising_marginal_probs(theta)
 | 
						|
    MAR[upper.tri(MAR)] <- t(MAR)[upper.tri(MAR)]
 | 
						|
    MAR
 | 
						|
}, main = expression(paste("Marginal Probs. P(", Y[i], " = ", Y[j], " = 1)")))
 | 
						|
 | 
						|
Y <- matrix(sample(c(TRUE, FALSE), n * p, replace = TRUE), n)
 | 
						|
printMVBinary(Y)
 | 
						|
 | 
						|
 | 
						|
allY <- function(p) {
 | 
						|
    events <- c(FALSE, TRUE)
 | 
						|
    for (. in seq_len(p - 1)) {
 | 
						|
        events <- rbind(
 | 
						|
            cbind(FALSE, events),
 | 
						|
            cbind( TRUE, events)
 | 
						|
        )
 | 
						|
    }
 | 
						|
    events
 | 
						|
}
 | 
						|
printMVBinary(allY(p))
 | 
						|
 | 
						|
G <- ising_score(theta, Y)
 | 
						|
# Numeric gradiend
 | 
						|
log.likelihood <- function(theta, Y) {
 | 
						|
    p <- ncol(Y)
 | 
						|
    # check sizes
 | 
						|
    stopifnot(p * (p + 1) == 2 * length(theta))
 | 
						|
    # and reverse column order
 | 
						|
    # this is needed as internally the left are the high bits (high index) and
 | 
						|
    # the right are the low bits (low index) which means for matching indices
 | 
						|
    # we need to reverse the column order
 | 
						|
    Y <- Y[, rev(seq_len(p)), drop = FALSE]
 | 
						|
    # calc scaling factor
 | 
						|
    sum_0 <- sum(exp(
 | 
						|
        theta %*% apply(allY(p), 1, function(y) outer(y, y, `&`))[lower.tri(diag(p), diag = TRUE), ]
 | 
						|
    ))
 | 
						|
    # evaluate log likelihood
 | 
						|
    -log(sum_0) + mean(
 | 
						|
        theta %*% apply(Y, 1, function(y) outer(y, y, `&`))[lower.tri(diag(p), diag = TRUE), ]
 | 
						|
    )
 | 
						|
}
 | 
						|
 | 
						|
G.num <- local({
 | 
						|
    h <- 1e-6
 | 
						|
    mapply(function(i) {
 | 
						|
        delta <- h * (seq_along(theta) == i)
 | 
						|
        (log.likelihood(theta + delta, Y) - log.likelihood(theta - delta, Y)) / (2 * h)
 | 
						|
    }, seq_along(theta))
 | 
						|
})
 | 
						|
 | 
						|
data.frame(G, G.num)
 | 
						|
 | 
						|
 | 
						|
for (n in c(2, 7, 12, 13, 14)) {
 | 
						|
    for (p in 1:4) {
 | 
						|
        cat(sprintf("%6d / %6d\n", sum(mapply(choose, n, 0:p)), nrSubSets(n, p)))
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
p <- 5
 | 
						|
(A <- tcrossprod(apply(allY(p), 1, function(y) outer(y, y, `&`)[lower.tri(diag(p), diag = TRUE)])))
 | 
						|
 | 
						|
print.table(B <- ising_fisher_info(theta), zero.print = ".")
 | 
						|
 | 
						|
all.equal(A[lower.tri(A, TRUE)], B[lower.tri(B, TRUE)])
 | 
						|
 | 
						|
ising_fisher_info.R <- function(theta, p) {
 | 
						|
    stopifnot(2 * length(theta) == p * (p + 1))
 | 
						|
 | 
						|
    Y <- allY(p)
 | 
						|
 | 
						|
    # Ising model scaling factor for `P(Y = y) = p_0 exp(vech(y y')' theta)`
 | 
						|
    sum_0 <- sum(apply(Y, 1, function(y) {
 | 
						|
        vechYY <- outer(y, y, `&`)[lower.tri(diag(p), diag = TRUE)]
 | 
						|
        exp(sum(vechYY * theta))
 | 
						|
    }))
 | 
						|
    p_0 <- 1 / sum_0
 | 
						|
 | 
						|
    # E[vech(Y Y')]
 | 
						|
    EvechYY <- p_0 * rowSums(apply(Y, 1, function(y) {
 | 
						|
        vechYY <- outer(y, y, `&`)[lower.tri(diag(p), diag = TRUE)]
 | 
						|
        exp(sum(vechYY * theta)) * vechYY
 | 
						|
    }))
 | 
						|
 | 
						|
    # E[vech(Y Y') vech(Y Y')']
 | 
						|
    EvechYYvechYY <- p_0 * matrix(rowSums(apply(Y, 1, function(y) {
 | 
						|
        vechYY <- outer(y, y, `&`)[lower.tri(diag(p), diag = TRUE)]
 | 
						|
        exp(sum(vechYY * theta)) * outer(vechYY, vechYY)
 | 
						|
    })), p * (p + 1) / 2)
 | 
						|
 | 
						|
    # Cov(vech(Y Y'), vech(Y Y')) = E[vech(Y Y') vech(Y Y')'] - E[vech(Y Y')] E[vech(Y Y')]'
 | 
						|
    EvechYYvechYY - outer(EvechYY, EvechYY)
 | 
						|
}
 | 
						|
 | 
						|
all.equal(
 | 
						|
    ising_fisher_info.R(theta, p),
 | 
						|
    ising_fisher_info(theta)
 | 
						|
)
 | 
						|
 | 
						|
p <- 10
 | 
						|
theta <- rnorm(p * (p + 1) / 2)
 | 
						|
microbenchmark::microbenchmark(
 | 
						|
    ising_fisher_info.R(theta, p),
 | 
						|
    ising_fisher_info(theta)
 | 
						|
)
 | 
						|
 | 
						|
 | 
						|
ising_fisher_scoring <- function(Y) {
 | 
						|
    # initial estimate (guess)
 | 
						|
    ltri <- which(lower.tri(diag(p), diag = TRUE))
 | 
						|
    theta <- ising_theta_from_cond_prob(rowMeans(apply(Y, 1, function(y) outer(y, y, `&`)[ltri])))
 | 
						|
 | 
						|
    print(theta)
 | 
						|
 | 
						|
    ll <- log.likelihood(theta, Y)
 | 
						|
 | 
						|
    # iterate Fisher scoring
 | 
						|
    for (iter in 1:20) {
 | 
						|
        theta <- theta + solve(ising_fisher_info(theta), ising_score(theta, Y))
 | 
						|
 | 
						|
        ll <- c(ll, log.likelihood(theta, Y))
 | 
						|
 | 
						|
        cat("ll: ", tail(ll, 1), "\n")
 | 
						|
    }
 | 
						|
 | 
						|
    theta
 | 
						|
}
 | 
						|
ising_fisher_scoring(Y)
 | 
						|
 | 
						|
 | 
						|
 | 
						|
microbenchmark::microbenchmark(
 | 
						|
    cov.mvbinary(Y),        # double copy (TODO: change MVBinary conversion/SEXP binding)
 | 
						|
    cov(Y),                 # call the next expr. through default args
 | 
						|
    .Call(stats:::C_cov, Y, NULL, na.method = 4L, FALSE)
 | 
						|
)
 | 
						|
 | 
						|
 | 
						|
################################################################################
 | 
						|
###                         Conditional Ising Model                          ###
 | 
						|
################################################################################
 | 
						|
n <- 1000
 | 
						|
p <- 10
 | 
						|
q <- 10
 | 
						|
 | 
						|
alpha <- matrix(rnorm(p * q), p)
 | 
						|
X <- matrix(rnorm(n * p), n)
 | 
						|
theta <- function(alpha, x) {
 | 
						|
    Theta <- crossprod(crossprod(x, alpha))
 | 
						|
    diag(Theta) <- 0.5 * diag(Theta)
 | 
						|
    2 * Theta[lower.tri(diag(ncol(alpha)), diag = TRUE)]
 | 
						|
}
 | 
						|
# sample Y ~ P( . | X = x) for x in X
 | 
						|
system.time(Y <- apply(X, 1, function(x) ising_sample(1, theta(alpha, x))))
 | 
						|
attr(Y, "p") <- as.integer(q)
 | 
						|
class(Y) <- "mvbinary"
 | 
						|
 | 
						|
# For the numeric gradient comparison
 | 
						|
allY <- function(p) {
 | 
						|
    events <- c(FALSE, TRUE)
 | 
						|
    for (. in seq_len(p - 1)) {
 | 
						|
        events <- rbind(
 | 
						|
            cbind(FALSE, events),
 | 
						|
            cbind( TRUE, events)
 | 
						|
        )
 | 
						|
    }
 | 
						|
    events
 | 
						|
}
 | 
						|
ising_conditional_log_likelihood.R <- function(alpha, X, Y) {
 | 
						|
    # convert Y to a binary matrix
 | 
						|
    Y <- as.mvbmatrix(Y)
 | 
						|
    #retrieve dimensions
 | 
						|
    n <- nrow(X)
 | 
						|
    p <- ncol(X)
 | 
						|
    q <- ncol(Y)
 | 
						|
    # check dimensions
 | 
						|
    stopifnot({
 | 
						|
        nrow(Y) == n
 | 
						|
        all(dim(alpha) == c(p, q))
 | 
						|
    })
 | 
						|
 | 
						|
    # setup reused internal variables
 | 
						|
    vech_index <- which(lower.tri(diag(q), diag = TRUE))
 | 
						|
    aaY <- apply(allY(q), 1, function(y) outer(y, y, `&`))[vech_index, ]
 | 
						|
 | 
						|
    # sum over all observations
 | 
						|
    ll <- 0
 | 
						|
    for (i in seq_len(n)) {
 | 
						|
        # Theta = alpha' x x' alpha
 | 
						|
        Theta <- crossprod(crossprod(X[i, ], alpha))
 | 
						|
        # theta = vech((2 1_q 1_q' - I_q) o Theta)
 | 
						|
        theta <- ((2 - diag(q)) * Theta)[vech_index]
 | 
						|
 | 
						|
        # scaling factor `p_0^-1 = sum_y exp(vech(y y')' theta)`
 | 
						|
        sum_0 <- sum(exp(theta %*% aaY))
 | 
						|
 | 
						|
        print(log(sum_0))
 | 
						|
 | 
						|
        # evaluate log likelihood
 | 
						|
        ll <- ll + sum(theta * outer(Y[i, ], Y[i, ], `&`)[vech_index]) - log(sum_0)
 | 
						|
    }
 | 
						|
 | 
						|
    ll / n
 | 
						|
}
 | 
						|
# numeric gradiend (score of the log-likelihood)
 | 
						|
ising_conditional_score.R <- function(alpha, X, Y, h = 1e-6) {
 | 
						|
    matrix(mapply(function(i) {
 | 
						|
        delta <- h * (seq_along(alpha) == i)
 | 
						|
        (ising_conditional_log_likelihood.R(alpha + delta, X, Y) -
 | 
						|
            ising_conditional_log_likelihood.R(alpha - delta, X, Y)) / (2 * h)
 | 
						|
    }, seq_along(alpha)), nrow(alpha))
 | 
						|
}
 | 
						|
 | 
						|
stopifnot(all.equal(
 | 
						|
    ising_conditional_log_likelihood.R(alpha, X, Y),
 | 
						|
    ising_conditional_log_likelihood(alpha, X, Y)
 | 
						|
))
 | 
						|
microbenchmark::microbenchmark(
 | 
						|
    ising_conditional_log_likelihood.R(alpha, X, Y),
 | 
						|
    ising_conditional_log_likelihood(alpha, X, Y)
 | 
						|
)
 | 
						|
 | 
						|
stopifnot(all.equal(
 | 
						|
    ising_conditional_score.R(alpha, X, Y),
 | 
						|
    ising_conditional_score(alpha, X, Y)
 | 
						|
))
 | 
						|
microbenchmark::microbenchmark(
 | 
						|
    ising_conditional_score.R(alpha, X, Y),
 | 
						|
    ising_conditional_score(alpha, X, Y)
 | 
						|
)
 | 
						|
 | 
						|
################################################################################
 | 
						|
###                       Fit Conditional Ising Model                        ###
 | 
						|
################################################################################
 | 
						|
 | 
						|
ising_conditional_fit <- function(X, Y, ..., callback = NULL) {
 | 
						|
    # get and check dimensions
 | 
						|
    n <- if (is.null(nrow(Y))) length(Y) else nrow(Y)
 | 
						|
    p <- ncol(X)
 | 
						|
    q <- if (is.null(ncol(Y))) attr(Y, "p") else ncol(Y)
 | 
						|
    # check dimensions
 | 
						|
    stopifnot(nrow(X) == n)
 | 
						|
 | 
						|
    ### Initial value estimate
 | 
						|
    # SVD of the predictor covariance estimate `Sigma = U_Sigma D_Sigma U_Sigma'`
 | 
						|
    SigmaSVD <- La.svd(cov(X), min(p, q), 0)
 | 
						|
 | 
						|
    # Estimate `pi` as the single and two way effect means (approx conditional
 | 
						|
    # probabilities through the marginal probability estimate)
 | 
						|
    pi <- mean.mvbinary(Y, twoway = TRUE)
 | 
						|
 | 
						|
    # convert conditional probabilities into natural parameters (log-odds)
 | 
						|
    theta <- ising_theta_from_cond_prob(pi)
 | 
						|
 | 
						|
    # convert natural parameters `theta` to square matrix form `Theta`
 | 
						|
    Theta <- matrix(NA, q, q)
 | 
						|
    Theta[lower.tri(diag(q), diag = TRUE)] <- theta
 | 
						|
    Theta[upper.tri(diag(q))] <- t(Theta)[upper.tri(diag(q))]
 | 
						|
    Theta <- (0.5 + diag(0.5, q, q)) * Theta
 | 
						|
 | 
						|
    # SVD of `Theta`
 | 
						|
    ThetaSVD <- La.svd(Theta, min(p, q), 0)
 | 
						|
 | 
						|
    # Finally, initial `alpha` parameter estimate
 | 
						|
    #   `alpha_0 = U_Sigma D_Sigma^-1/2 D_Theta^1/2 U_Theta'`
 | 
						|
    alpha <- with(list(S = SigmaSVD, T = ThetaSVD), {
 | 
						|
        S$u %*% diag(sqrt(T$d[seq_len(min(p, q))] / S$d[seq_len(min(p, q))])) %*% t(T$u)
 | 
						|
    })
 | 
						|
 | 
						|
    ### Optimize log-likelihood for `alpha`
 | 
						|
    tensorPredictors::NAGD(
 | 
						|
        fun.loss = function(alpha) -ising_conditional_log_likelihood(alpha, X, Y),
 | 
						|
        fun.grad = function(alpha) -ising_conditional_score(alpha, X, Y),
 | 
						|
        params = alpha,
 | 
						|
        ...,
 | 
						|
        callback = callback
 | 
						|
    )
 | 
						|
}
 | 
						|
 | 
						|
n <- 1000
 | 
						|
p <- 7
 | 
						|
q <- 9
 | 
						|
 | 
						|
alpha.true <- matrix(rnorm(p * q), p)
 | 
						|
X <- matrix(rnorm(n * p), n)
 | 
						|
theta <- function(alpha, x) {
 | 
						|
    Theta <- crossprod(crossprod(x, alpha))
 | 
						|
    diag(Theta) <- 0.5 * diag(Theta)
 | 
						|
    2 * Theta[lower.tri(diag(ncol(alpha)), diag = TRUE)]
 | 
						|
}
 | 
						|
# sample Y ~ P( . | X = x) for x in X
 | 
						|
Y <- apply(X, 1, function(x) ising_sample(1, theta(alpha.true, x)))
 | 
						|
attr(Y, "p") <- as.integer(q)
 | 
						|
 | 
						|
max.iter <- 100L
 | 
						|
ising_conditional_fit(X, Y, max.iter = max.iter, callback = function(iter, alpha) {
 | 
						|
    cat(sprintf(
 | 
						|
        "%4d/%4d - diff: %12.4f - ll: %12.4f\n",
 | 
						|
        iter, max.iter,
 | 
						|
        min(norm(alpha - alpha.true, "F"), norm(alpha + alpha.true, "F")),
 | 
						|
        ising_conditional_log_likelihood(alpha, X, Y)
 | 
						|
    ))
 | 
						|
})
 | 
						|
 | 
						|
 | 
						|
ising_conditional_log_likelihood(alpha.true, X, Y)
 | 
						|
ising_conditional_log_likelihood.R(alpha.true, X, Y)
 | 
						|
 | 
						|
for (. in 1:10) {
 | 
						|
    print(ising_conditional_log_likelihood(matrix(rnorm(p * q), p, q), X, Y))
 | 
						|
}
 | 
						|
 | 
						|
YY <- as.mvbmatrix(Y)
 | 
						|
microbenchmark::microbenchmark(
 | 
						|
    mean.mvbinary(Y, twoway = TRUE),
 | 
						|
    rowMeans(apply(YY, 1, function(y) outer(y, y, `&`)))[lower.tri(diag(q), diag = TRUE)]
 | 
						|
)
 | 
						|
 | 
						|
par(mfrow = c(2, 2))
 | 
						|
tensorPredictors::matrixImage(alpha)
 | 
						|
tensorPredictors::matrixImage(alpha.true)
 | 
						|
tensorPredictors::matrixImage(alpha)
 | 
						|
tensorPredictors::matrixImage(-alpha.true)
 |