/** * Implements statistics like `mean`, `cov` and alike for MVBinary data */ #include // R to C++ binding library #include #include "bit_utils.h" // uint32_t, ... and the `bit*` functions #include "types.h" // MVBinary (Multivariate Binary Data) //' Converts a logical matrix to a multi variate bernoulli dataset //' // [[Rcpp::export(rng = false, name = "as.mvbinary")]] MVBinary as_mvbinary(const MVBinary& Y) { return Y; } //' Converts a Multivariate binary data set into a logical matrix //' // [[Rcpp::export(rng = false, name = "as.mvbmatrix")]] Rcpp::LogicalMatrix as_mvbmatrix(const MVBinary& Y) { Rcpp::LogicalMatrix mat(Y.nrow(), Y.ncol()); for (std::size_t i = 0; i < Y.nrow(); ++i) { for (uint32_t a = Y[i]; a; a &= a - 1) { mat[bitScanLS(a) * Y.nrow() + i] = true; } } return mat; } //' Mean for a multi variate bernoulli dataset `MVBinary` //' //' mean_i y_i # twoway = false (only single effects) //' //' or //' //' mean_i vech(y_i y_i') # twoway = true (with two-way interactions) //' // [[Rcpp::export(rng = false, name = "mean.mvbinary")]] Rcpp::NumericVector mean_mvbinary(const MVBinary& Y, const bool twoway = false) { if (!twoway) { // mean initialized as `p` dim zero vector Rcpp::NumericVector mean(Y.dim()); // setup scaling factor `1 / n` const double inv_n = 1.0 / static_cast(Y.size()); // iterate all events for (const auto& y : Y) { // and add set features for (auto a = y; a; a &= a - 1) { mean[bitScanLS(a)] += inv_n; } } return mean; } else { // Including two-way interactions Rcpp::NumericVector mean(Y.dim() * (Y.dim() + 1) / 2); // get binary vector dimension const int p = Y.dim(); // iterate all events for (const auto& y : Y) { // iterate event features for (auto a = y; a; a &= a - 1) { int i = bitScanLS(a); int base_index = (i * (2 * p + 1 - i)) / 2; // add single effect mean[base_index] += 1.0; // iterate event two way effects for (auto b = a & (a - 1); b; b &= b - 1) { // and add the two way effect mean[base_index + bitScanLS(b) - i] += 1.0; } } } // counts scaled by sample size return mean / static_cast(Y.size()); } } //' Covariance for multi variate binary data `MVBinary` //' //' cov(Y) = (n - 1)^-1 sum_i (y_i - mean(Y)) (y_i - mean(Y))' //' // [[Rcpp::export(rng = false, name = "cov.mvbinary")]] Rcpp::NumericMatrix cov_mvbinary(const MVBinary& Y) { // get random variable dimension const std::size_t p = Y.dim(); // initialize covariance (default zero initialized) Rcpp::NumericMatrix cov(p, p); // step 1: compute the mean (in reversed internal order) const auto mean = mean_mvbinary(Y); // iterate all events in `Y` for (const auto& y : Y) { for (std::size_t j = 0; j < p; ++j) { for (std::size_t i = 0; i < p; ++i) { cov[i + p * j] += (static_cast(y & (1 << i)) - mean[i]) * (static_cast(y & (1 << j)) - mean[j]); } } } // scale by `1 / (n - 1)` const double inv_nm1 = 1.0 / static_cast(Y.size() - 1); std::transform(cov.begin(), cov.end(), cov.begin(), [inv_nm1](const double c) { return c * inv_nm1; }); return cov; }