#include // R to C++ binding library #include // `std::exp` #include #include #include #include #include // mainly for debugging reasons #include "types.h" // MVBinary (Multivariate Binary Data) #include "bit_utils.h" // uint32_t, ... and the `bit*` functions #include "int_utils.h" // isqrt, ilog, ... #include "threadPool.h" /******************************************************************************/ /*** Ising model ***/ /******************************************************************************/ /** * The Ising model (as a special case of the Multi-Variate Bernoulli) has its * probability mass function (pmf) for a `p` dim. binary vector `y` defined as * * P(Y = y | theta) = p_0(theta) exp(T(y)' theta) * * with the parameter vector `theta` and a statistic `T` of `y`. The real valued * parameter vector `theta` is of dimension `p (p + 1) / 2` and the statistic * `T` has the same dimensions as a binary vector given by * * T(y) = vech(y y'). * * TODO: continue */ // [[Rcpp::export(rng = false)]] double ising_log_odds_sum(uint32_t y, const VechView& theta) { // Collects the result `T(theta)' y` (basically the sum of the log odds in // the parameter vector `theta` with a single or two way interaction in `y` double log_odds = 0.0; // Iterate over all bits in the event `y` for (; y; y &= y - 1) { // get LSB index int i = bitScanLS(y); // the single effect index in the parameter vector `theta` int base_index = (i * (2 * theta.dim() + 1 - i)) / 2; // add single effect log odds log_odds += theta[base_index]; // For all (remaining) other effects add the two way interaction odds for (uint32_t b = y & (y - 1); b; b &= b - 1) { log_odds += theta[base_index + bitScanLS(b) - i]; } } return log_odds; } //' Ising model scaling factor `p_0(theta)` for the Ising model //' // [[Rcpp::export(rng = false)]] double ising_prob0(const VechView& theta) { // Value of the event `(1, ..., 1)` const uint32_t max_event = (static_cast(1) << theta.dim()) - 1; // Accumulates `p_0(theta)^-1` double sum_0 = 1.0; // sum up all event (except `(0, .., 0)`, considured as initial value `1`) for (uint32_t a = 1; a <= max_event; ++a) { sum_0 += exp(ising_log_odds_sum(a, theta)); } // scaling factor `p_0(theta) = sum_0^-1` return 1.0 / sum_0; } //' Ising model probabilities for every event, this returns a vector of size `2^p` //' with indices corresponding to the events binary representation. //' //' Note: the R indexing leads to adding +1 to every index. //' // [[Rcpp::export(rng = false)]] Rcpp::NumericVector ising_probs(const VechView& theta) { // setup probability vector Rcpp::NumericVector probs(1 << theta.dim()); // Value of the event `(1, ..., 1)` const uint32_t max_event = (static_cast(1) << theta.dim()) - 1; // Accumulates `p_0(theta)^-1` double sum_0 = 1.0; // set prob for the zero event to `1`, scaled later with all the other events probs[0] = 1.0; // sum up all event (except `(0, .., 0)`, considured as initial value `1`) for (uint32_t a = 1; a <= max_event; ++a) { // set and accumulate (unscaled) probabilites sum_0 += (probs[a] = exp(ising_log_odds_sum(a, theta))); } // finish scaling factor calculation `p_0(theta) = 1 / sum_0` double prob_0 = 1.0 / sum_0; // scale probabilites for (auto& prob : probs) { prob *= prob_0; } return probs; } //' Computes the zero conditioned probabilities //' P(Y_i = 1 | Y_-i = 0) //' and //' P(Y_i = 1, Y_j = 1 | Y_-i = 0) //' from the natural parameters `theta` with matching indexing. //' //' This is the inverse function of `ising_theta_from_cond_prob` //' // [[Rcpp::export(rng = false)]] Rcpp::NumericVector ising_cond_probs(const VechView& theta) { // setup result of the same size as theta Rcpp::NumericVector pi(theta.size()); // set random variable dimension std::size_t p = theta.dim(); // iterate all single effects for (std::size_t i = 0; i < p; ++i) { // compute probs for single effects std::size_t base_index = (i * (2 * p + 1 - i)) / 2; double exp_i = exp(theta[base_index]); // the probability `P(Y_i = 1 | Y_-i = 0)` pi[base_index] = exp_i / (1 + exp_i); // iterate over bigger indexed components interacting with the `i`th one for (std::size_t j = i + 1; j < p; ++j) { // again compute exp(theta_j) double exp_j = exp(theta[(j * (2 * p + 1 - j)) / 2]); // as well as the two way exponent double exp_ij = exp(theta[base_index + (j - i)]); // two way consitional probability `P(Y_i = Y_j = 1 | Y_-i,-j = 0)` pi[base_index + (j - i)] = (exp_i * exp_j * exp_ij) / (1.0 + exp_i + exp_j + exp_i * exp_j * exp_ij); } } return pi; } //' Computes the expectation of `Y` under the Ising model with natural parameter //' `theta` given component wise by //' //' E Y_i = P(Y_i = 1) //' // [[Rcpp::export(rng = false)]] Rcpp::NumericVector ising_expectation(const VechView& theta) { const std::size_t p = theta.dim(); // dim of `Y` const uint32_t max_event = static_cast(-1) >> (32 - p); double sum_0 = 1.0; // accumulates `p_0(theta)^-1` Rcpp::NumericVector mu(p, 0.0); // `mu = E Y` // iterate all 2^p events (except the zero event) for (uint32_t y = 1; y <= max_event; ++y) { // dot product `vech(y y')' theta` for current event double dot = 0; // iterate all bits in `y` for (uint32_t a = y; a; a &= a - 1) { int i = bitScanLS(a); int base_index = (i * (2 * p + 1 - i)) / 2; // add single effects of `y` dot += theta[base_index]; // iterate over all (higher indexed) interactions and add then for (uint32_t b = a & (a - 1); b; b &= b - 1) { dot += theta[base_index + bitScanLS(b) - i]; } } // compute (unscaled) event `y` probability `exp(T(y)' theta)` double prob_y = exp(dot); sum_0 += prob_y; // add current (unscalled) probability to all components of set bits for (uint32_t a = y; a; a &= a - 1) { mu[bitScanLS(a)] += prob_y; } } // finalize `E[Y]` by scaling with `p_0 = sum_0^-1` double prob_0 = 1.0 / sum_0; for (auto& mui : mu) { mui *= prob_0; } return mu; } //' Computes the covariance (second centered moment) of `Y` under the Ising model //' with natural parameter `theta`. //' //' cov(Y, Y) = E[Y Y'] - E[Y] E[Y]' //' // [[Rcpp::export(rng = false)]] Rcpp::NumericMatrix ising_cov(const VechView& theta) { const std::size_t p = theta.dim(); // dim of `Y` const uint32_t max_event = static_cast(-1) >> (32 - p); double sum_0 = 1.0; // accumulates `p_0(theta)^-1` Rcpp::NumericMatrix cov(p, p); // iterate over all 2^p events (except the zero event) for (std::size_t y = 1; y <= max_event; ++y) { // dot product `vech(y y')' theta` for current event double dot = 0; // iterate all bits in `y` for (uint32_t a = y; a; a &= a - 1) { int i = bitScanLS(a); int base_index = (i * (2 * p + 1 - i)) / 2; // add single effects of `y` dot += theta[base_index]; // iterate over all (higher indexed) interactions and add then for (uint32_t b = a & (a - 1); b; b &= b - 1) { dot += theta[base_index + bitScanLS(b) - i]; } } // compute (unscaled) event `y` probability `exp(T(y)' theta)` double prob_y = exp(dot); sum_0 += prob_y; // sum E[Y, Y'] but still unscaled for (uint32_t a = y; a; a &= a - 1) { int i = bitScanLS(a); for (uint32_t b = y; b; b &= b - 1) { int j = bitScanLS(b); cov[i * p + j] += prob_y; } } } // finish computing `E[Y Y']` by scaling with `p_0 = sum_0^-1` double prob_0 = 1.0 / sum_0; for (auto& covij : cov) { covij *= prob_0; } // subtract outer product of expectation `-= E[Y] E[Y]'` const auto mu = ising_expectation(theta); // `mu = E[Y]` for (std::size_t j = 0; j < p; ++j) { for (std::size_t i = 0; i < p; ++i) { cov[j * p + i] -= mu[i] * mu[j]; } } return cov; } //' Computes the single and two way effect marginal probabilities //' //' P(Y_i = 1) //' and //' P(Y_i Y_j = 1) //' //' In its vectorized form this function computes E[vech(Y Y')] //' // [[Rcpp::export(rng = false)]] Rcpp::NumericVector ising_marginal_probs(const VechView& theta) { // Step 0: Setup (and validate) variables/parameters const std::size_t p = theta.dim(); if (p != theta.dim()) { Rcpp::stop("Parameter dimension does not match data dimension"); } const uint32_t max_event = static_cast(-1) >> (32 - p); double sum_0 = 1.0; // accumulates p_0(theta)^-1 Rcpp::NumericVector score(theta.size(), 0.0); // grad l(theta) // Step 1: Calc `-n E[vech(Y Y')]` where the sum over `y` is the sum over // all possible events `y in {0, 1}^p`. for (uint32_t y = 1; y <= max_event; ++y) { // sum of `T(y)' theta` for current instance `y` double log_odds = 0; // iterate all bits in `y` for (uint32_t a = y; a; a &= a - 1) { int i = bitScanLS(a); int base_index = (i * (2 * p + 1 - i)) / 2; // add single effects of `y` log_odds += theta[base_index]; // iterate over all (higher indexed) interactions and add then for (uint32_t b = a & (a - 1); b; b &= b - 1) { log_odds += theta[base_index + bitScanLS(b) - i]; } } // compute (unscaled) event `y` probability `exp(T(y)' theta)` double prob_y = exp(log_odds); sum_0 += prob_y; // at this point we know the (unscaled) probability of the event `y` // which needs to be added to the gradient at the set bit positions // of `y` corresponding to single and interaction effects for (uint32_t a = y; a; a &= a - 1) { int i = bitScanLS(a); int base_index = (i * (2 * p + 1 - i)) / 2; score[base_index] += prob_y; for (uint32_t b = a & (a - 1); b; b &= b - 1) { score[base_index + bitScanLS(b) - i] += prob_y; } } } // finalize `-E[vech(Y Y')]` by scaling with `-p_0 = -sum_0^-1` double prob_0 = 1.0 / sum_0; for (auto& s : score) { s *= prob_0; } return score; } //' Natural parameters from the sufficient conditional probability statistic `pi` //' //' Computes the natural parameters `theta` of the Ising model from zero //' conditioned probabilites for single and two way effects. //' //' This is the inverse function of `ising_cond_prob_from_theta` //' // [[Rcpp::export(rng = false)]] Rcpp::NumericVector ising_theta_from_cond_prob(const VechView& pi) { // initialize natural parameters vector theta Rcpp::NumericVector theta(pi.size()); // check if given probabilities are in the ragne [0, 1] if (std::any_of(pi.begin(), pi.end(), [](const double prob) { return (prob < 0.0) || (1.0 < prob); })) { Rcpp::stop("`pi` must contain only elements in the range [0, 1]"); } // get random variable dimension std::size_t p = pi.dim(); // iterate all single effects for (std::size_t i = 0; i < p; ++i) { // compute single effect theta_i std::size_t base_index = (i * (2 * p + 1 - i)) / 2; theta[base_index] = log(pi[base_index] / (1.0 - pi[base_index])); // iterate all higher indexed interactions with the currecnt effect for (std::size_t j = i + 1; j < p; ++j) { theta[base_index + (j - i)] = log( ((1 - pi(i) * pi(j)) * pi(i, j)) / (pi(i) * pi(j) * (1 - pi(i, j))) ); } } return theta; } //' Computes the log-likelihood at natural parameters `theta` of the Ising model //' given a set of observations `Y` //' //' l(theta) = log(p_0(theta)) + n^-1 sum_i vech(y_i y_i')' theta //' // [[Rcpp::export(rng = false)]] double ising_log_likelihood(const VechView& theta, const MVBinary& Y) { // sum the log odds `sum_i vech(y_i y_i')' theta` double sum = 0.0; for (const uint32_t y : Y) { sum += ising_log_odds_sum(y, theta); } // add scaling factor `log(p_0(theta))` return log(ising_prob0(theta)) + (sum / static_cast(Y.size())); } //' Computes the Score of the Ising model, this is the gradiend of the (mean) //' log-likelihood with respect to the natural parameters //' //' grad l(theta) = -E[vech(Y Y')] + n^-1 sum_i vech(y_i y_i') //' // [[Rcpp::export(rng = false)]] Rcpp::NumericVector ising_score(const VechView& theta, const MVBinary& Y) { const std::size_t p = theta.dim(); // Step 1: compute -E[vech(Y Y')] (data independent part) auto score = ising_marginal_probs(theta); for (auto& s : score) { s *= -1.0; } // Step 2: Add data dependend part `mean_i vech(y_i y_i')` const double n_inv = 1.0 / static_cast(Y.size()); for (const uint32_t y : Y) { // start by iterating the single effects in `y` (LSB) for (uint32_t a = y; a; a &= a - 1) { int i = bitScanLS(a); int base_index = (i * (2 * p + 1 - i)) / 2; // add single effects score[base_index] += n_inv; // and the two way interaction effects for (uint32_t b = a & (a - 1); b; b &= b - 1) { score[base_index + bitScanLS(b) - i] += n_inv; } } } return score; } /** * Overload of `ising_score` for a single observation `y` */ Rcpp::NumericVector ising_score(const VechView& theta, const uint32_t y, const std::size_t p ) { // Step 1: compute -E[vech(Y Y')] (data independent part) auto score = ising_marginal_probs(theta); for (auto& s : score) { s *= -1.0; } // Step 2: Add data dependend part `vech(y y')` // start by iterating the single effects in `y` (LSB) for (uint32_t a = y; a; a &= a - 1) { int i = bitScanLS(a); int base_index = (i * (2 * p + 1 - i)) / 2; // add single effects score[base_index] += 1.0; // and the two way interaction effects for (uint32_t b = a & (a - 1); b; b &= b - 1) { score[base_index + bitScanLS(b) - i] += 1.0; } } return score; } /** * Computes the log-likelihood at natural parameters `theta` of the Ising model * given a set of observations `Y` * * l(alpha) = n^-1 sum_i (log(p_0(alpha, x_i)) + (x_i' alpha y_i)^2) * = n^-1 sum_i (log(p_0(theta_alpha(x_i))) + vech(y_i y_i')'theta_alpha(x_i)) */ // [[Rcpp::export(rng = false)]] double ising_conditional_log_likelihood(const Rcpp::NumericMatrix& alpha, const Rcpp::NumericMatrix& X, const MVBinary& Y ) { // get probem dimensions const std::size_t p = alpha.nrow(); const std::size_t q = alpha.ncol(); // check parameter dimensions if (X.nrow() != Y.size() || X.ncol() != p || Y.dim() != q) { Rcpp::stop("Parameter dimension missmatch"); } // natural parameter for the conditional Ising model // `theta_alpha(x) = vech((2 1_q 1_q' - I_q) o Theta_alpha(x))` // with `o` denoting the Hadamart product. VechView theta(q); // temp inbetween variables Rcpp::NumericVector z(q); // sum over all observations double ll = 0.0; for (std::size_t i = 0; i < Y.size(); ++i) { // get i'th observation (response `y` and predictor `x`) const uint32_t y = Y[i]; const auto x = X.row(i); // compute natural parameter from `alpha` with current predictor `x` // First `z = alpha' x_i` and `s = y_i' alpha' x_i` double s = 0.0; for (std::size_t j = 0; j < q; ++j) { z[j] = 0.0; for (std::size_t k = 0; k < p; ++k) { z[j] += x[k] * alpha[j * p + k]; } s += static_cast(y & (1 << j)) * z[j]; } // and then `vech` of the outer product `z z'` with off diagonal // elements multiplied by `2`. (See: `Theta` to `theta` relation) // Explicitly (`theta = theta_alpha(x)`): // `theta = vech((2 1_q 1_q' - I_q) o (alpha' x x' alpha))` // where `o` is the Hadamard pdoruct. for (std::size_t j = 0; j < q; ++j) { theta(j, j) = z[j] * z[j]; for (std::size_t k = j + 1; k < q; ++k) { theta(j, k) = 2.0 * z[j] * z[k]; } } // add to log-lilelihood sum ll += log(ising_prob0(theta)) + s * s; } return ll / static_cast(Y.size()); } /** * Comutes the Score for the conditional Ising model * * P(Y = y | X = x) = p_0(alpha) exp(y' Theta_alpha(x) y) * * with the parameter relation * * Theta_alpha(x) = alpha' x x' alpha. * * The computed Score has the form * * grad l(alpha) = * 2 n^-1 sum_i x_i x_i' alpha (-E[vec(Y Y') | X = x_i] + vec(y_i y_i')) */ // [[Rcpp::export(rng = false)]] Rcpp::NumericVector ising_conditional_score(const Rcpp::NumericMatrix& alpha, const Rcpp::NumericMatrix& X, const MVBinary& Y ) { // get probem dimensions const std::size_t p = alpha.nrow(); const std::size_t q = alpha.ncol(); // check parameter dimensions if (X.nrow() != Y.size() || X.ncol() != p || Y.dim() != q) { Rcpp::stop("Parameter dimension missmatch"); } // setup the Score (default zero initialized) Rcpp::NumericMatrix score(p, q); // natural parameter for the conditional Ising model // `theta_alpha(x) = vech((2 1_q 1_q' - I_q) o Theta_alpha(x))` // with `o` denoting the Hadamart product. VechView theta(q); // temp inbetween variables Rcpp::NumericVector z(q); // for each observation (iter observation index i = 0, ..., n - 1) for (std::size_t i = 0; i < Y.size(); ++i) { // get i'th observation (response `y` and predictor `x`) const uint32_t y = Y[i]; const auto x = X.row(i); // compute natural parameter from `alpha` with current predictor `x` // First `z = alpha' x` for (std::size_t j = 0; j < q; ++j) { z[j] = 0.0; for (std::size_t k = 0; k < p; ++k) { z[j] += x[k] * alpha[j * p + k]; } } // and then `vech` of the outer product `z z'` with off diagonal // elements multiplied by `2`. (See: `Theta` to `theta` relation) // Explicitly (`theta = theta_alpha(x)`): // `theta = vech((2 1_q 1_q' - I_q) o (alpha' x x' alpha))` // where `o` is the Hadamard pdoruct. for (std::size_t j = 0; j < q; ++j) { theta(j, j) = z[j] * z[j]; for (std::size_t k = j + 1; k < q; ++k) { theta(j, k) = 2.0 * z[j] * z[k]; } } // With `theta_alpha(x)` compute the classic Ising model Score at // `theta = theta_alpha(x)` for a single observation `y`. // `S = grad_theta l` const auto S = ising_score(theta, y, q); // convert classic ising model Score to conditional Ising model Score // by using: // `grad_alpha l = 2 x x' alpha vec^-1(D_q grad_theta l)` // where `D_q` is the duplication matrix. // For each column of `grad_alpha l` for (std::size_t k = 0; k < q; ++k) { // compute the `k`th vector matrix product `(z' S)_k` component double zS_k = 0.0; for (std::size_t l = 0; l < q; ++l) { zS_k += z[l] * S[theta.index(std::min(k, l), std::max(k, l))]; } // and now add the `k`th column `x (z' S)_k` to the Score for (std::size_t j = 0; j < p; ++j) { score[k * p + j] += x[j] * zS_k; } } } return (2.0 / static_cast(X.nrow())) * score; } // TODO: Drop or rewrite to test/fix issue with R/Rcpp multithreading issue, // Apparently I can NOT use Rcpp in conjunction with multiple threads // as the garbage collector stack gets out of sync. // SEE: https://stackoverflow.com/questions/42119609/how-to-handle-mismatching-in-calling-convention-in-r-and-c-using-rcpp // // NOTE: The below algorithm implements the _old_ `theta` to `eta` relation // // // [[Rcpp::export(rng = false)]] // Rcpp::NumericVector ising_conditional_score_mt(const Rcpp::NumericMatrix& alpha, // const Rcpp::NumericMatrix& X, const MVBinary& Y // ) { // // get probem dimensions // const std::size_t p = alpha.nrow(); // const std::size_t q = alpha.ncol(); // // check parameter dimensions // if (X.nrow() != Y.size() || X.ncol() != p || Y.dim() != q) { // Rcpp::stop("Parameter dimension missmatch"); // } // // setup the Score (default zero initialized) // Rcpp::NumericMatrix score(p, q); // // setup a thread pool to perform per observation computations in parallel // ThreadPool pool; // // for each observation setup a task to compute the Score this observation // // Score and then add them up // for (std::size_t i = 0; i < Y.size(); ++i) { // // push task to the thread pool, they are executed as soon as possible // pool.push([&alpha, &X, &Y, i, p, q, /*out*/ &score]() { // // get i'th observation (response `y` and predictor `x`) // const uint32_t y = Y[i]; // const auto x = X.row(i); // // natural parameter for the conditional Ising model // // `theta_alpha(x) = vech((2 1_q 1_q' - I_q) o Theta_alpha(x))` // // with `o` denoting the Hadamart product. // VechView theta(q); // // temp inbetween variables // Rcpp::NumericVector z(q); // // compute natural parameter from `alpha` with current predictor `x` // // First `z = alpha' x` // for (std::size_t j = 0; j < q; ++j) { // z[j] = 0.0; // for (std::size_t k = 0; k < p; ++k) { // z[j] += x[k] * alpha[j * p + k]; // } // } // // and then `vech` of the outer product `z z'` with off diagonal // // elements multiplied by `2`. (See: `Theta` to `theta` relation) // // Explicitly (`theta = theta_alpha(x)`): // // `theta = vech((2 1_q 1_q' - I_q) o (alpha' x x' alpha))` // // where `o` is the Hadamard pdoruct. // for (std::size_t j = 0; j < q; ++j) { // theta(j, j) = z[j] * z[j]; // for (std::size_t k = j + 1; k < q; ++k) { // theta(j, k) = 2.0 * z[j] * z[k]; // } // } // // With `theta_alpha(x)` compute the classic Ising model Score at // // `theta = theta_alpha(x)` for a single observation `y`. // // `S = grad_theta l` // const auto S = ising_score(theta, y, q); // // convert classic ising model Score to conditional Ising model Score // // by using: // // `grad_alpha l = 2 x x' alpha vec^-1(D_q grad_theta l)` // // where `D_q` is the duplication matrix. // // For each column of `grad_alpha l` // for (std::size_t k = 0; k < q; ++k) { // // compute the `k`th vector matrix product `(z' S)_k` component // double zS_k = 0.0; // for (std::size_t l = 0; l < q; ++l) { // zS_k += z[l] * S[theta.index(std::min(k, l), std::max(k, l))]; // } // // and now add the `k`th column `x (z' S)_k` to the Score // // Add to output score, `+=` should be atomic (TODO: check this!!!) // for (std::size_t j = 0; j < p; ++j) { // score[k * p + j] += x[j] * zS_k; // } // } // }); // } // return (2.0 / static_cast(X.nrow())) * score; // } //' Fisher information for the natural parameters `theta` under the Ising model. //' //' I(theta) = -E(H l(theta) | theta) = Cov(vech(Y Y'), vech(Y Y') | theta) //' //' where `H l(theta)` is the Hessian of the log-likelihood `l(theta)` defined as //' //' l(theta) = n^-1 prod_i P(Y = y_i | theta) //' = log(p_0(theta)) - mean_i exp(vech(y_i y_i')' theta) //' //' Note that the Fisher information does _not_ depend on data. //' // [[Rcpp::export(rng = false)]] Rcpp::NumericMatrix ising_fisher_info(const VechView& theta) { // Step 0: Setup (and validate) variables/parameters const std::size_t p = theta.dim(); const std::size_t pp = p * (p + 1) / 2; const uint32_t max_event = static_cast(-1) >> (32 - p); double sum_0 = 1.0; // accumulates p_0(theta)^-1 // Initialize result covariance matrix (zero initialized) Rcpp::NumericMatrix cov(pp, pp); // Compute (unscaled) lower triag part of `E(vech(Y Y') vech(Y Y')' | theta)` for (uint32_t y = 1; y <= max_event; ++y) { // Compute (unscaled) probability `P(Y = y | theta)` const double prob = exp(ising_log_odds_sum(y, theta)); // and add to scaling factor accumulator sum_0 += prob; // Iterate Fisher information column indices (set bits in vech(y y')) // Those are the LSB lower triag indices of the `a, b` LSBs for (uint32_t a = y; a; a &= a - 1) { const std::size_t i = bitScanLS(a); const std::size_t ti = (i * (2 * p + 1 - i)) / 2; for (uint32_t b = a; b; b &= b - 1) { // Fisher info column index const std::size_t col = ti + (bitScanLS(b) - i); // Fill (lower triangular) Fisher information row positions // by computing the remaining (higher indexed) of vech(y y') // via the LSB bits of `c, d` // TODO: Slight inaccuracy with doing a bit to much work for (uint32_t c = a; c; c &= c - 1) { const std::size_t j = bitScanLS(c); const std::size_t tj = (j * (2 * p + 1 - j)) / 2; for (uint32_t d = c; d; d &= d - 1) { // Fisher info row index (lower triag part) const std::size_t row = tj + (bitScanLS(d) - j); // add (unscaled) probability for current event effects cov[row + pp * col] += prob; } } } } } // finish scaling factor const double p_0 = 1.0 / sum_0; // Scale and mirrow lower triag part of `E(vech(Y Y') vech(Y Y')' | theta)` for (std::size_t col = 0; col < pp; col++) { for (std::size_t row = col; row < pp; ++row) { cov[col + pp * row] = (cov[row + pp * col] *= p_0); } } // Subtract outer product of the expectation // `cov -= E(vech(Y Y') | theta) E(vech(Y Y') | theta)'` auto score = ising_marginal_probs(theta); std::transform(cov.begin(), cov.end(), Rcpp::outer(score, score, std::multiplies()).begin(), cov.begin(), std::minus()); return cov; } //' Samples from the Ising model given the natural parameters `theta` //' // [[Rcpp::export(rng = true)]] MVBinary ising_sample(const std::size_t n, const VechView& theta, const std::size_t warmup = 1000 ) { const std::size_t p = theta.dim(); const std::size_t max_event = static_cast(-1) >> (32 - p); // for small dimensions we use Rcpp::sample if (p < 5) { // setup complete probability vector for every possible event const auto probs = ising_probs(theta); // sample from all possible events given probabilities for each const auto events = Rcpp::sample(1 << p, n, true, probs, false); return MVBinary(events.begin(), events.end(), p); } // Else: "non trivial implementation" case: Gibbs sampling // RNG for continuous uniform `U[0, 1]` auto runif = Rcpp::stats::UnifGenerator(0, 1); // conditional probability `P(Y_i = 1 | Y_-i = y_-i) = exp_i / (1 + exp_i)` // where `exp_i = exp(theta_i + sum_{j != i} y_j theta_{i,j})` auto icond_prob = [&theta](const std::size_t i, const uint32_t y) { double log_odds = theta(i); for (uint32_t a = y & ~(static_cast(1) << i); a; a &= a - 1) { const std::size_t j = bitScanLS(a); log_odds += theta(std::min(i, j), std::max(i, j)); } return 1.0 / (1.0 + exp(-log_odds)); }; // Reserve result data set MVBinary events(n, p); // repeat untill `n` samples are drawn while (events.size() < n) { // Initialize sample as vector of independent bernoulli draws with // component wise probability `P(Y_i = 1 | Y_-i = 0)` uint32_t y = 0; for (std::size_t i = 0; i < p; ++i) { y |= static_cast((runif() * (1.0 + exp(-theta(i)))) < 1.0) << i; } // repeated cyclic updating for (std::size_t rep = 0; rep < warmup; ++rep) { for (std::size_t i = 0; i < p; ++i) { // event with the `i`th component `1` and all others `0` const uint32_t ei = static_cast(1) << i; // sample `i`th bit from `Bernoulli(P(Y_i = 1 | Y_-i = y_-i))` y = (y & ~ei) | (ei * (runif() < icond_prob(i, y))); } } // add generated sample to result set events.push_back(y); } return events; }