tensor_predictors/mvbernoulli/src/stats.cpp

116 lines
3.6 KiB
C++

/**
* Implements statistics like `mean`, `cov` and alike for MVBinary data
*/
#include <Rcpp.h> // R to C++ binding library
#include <algorithm>
#include "bit_utils.h" // uint32_t, ... and the `bit*` functions
#include "types.h" // MVBinary (Multivariate Binary Data)
//' Converts a logical matrix to a multi variate bernoulli dataset
//'
// [[Rcpp::export(rng = false, name = "as.mvbinary")]]
MVBinary as_mvbinary(const MVBinary& Y) { return Y; }
//' Converts a Multivariate binary data set into a logical matrix
//'
// [[Rcpp::export(rng = false, name = "as.mvbmatrix")]]
Rcpp::LogicalMatrix as_mvbmatrix(const MVBinary& Y) {
Rcpp::LogicalMatrix mat(Y.nrow(), Y.ncol());
for (std::size_t i = 0; i < Y.nrow(); ++i) {
for (uint32_t a = Y[i]; a; a &= a - 1) {
mat[bitScanLS(a) * Y.nrow() + i] = true;
}
}
return mat;
}
//' Mean for a multi variate bernoulli dataset `MVBinary`
//'
//' mean_i y_i # twoway = false (only single effects)
//'
//' or
//'
//' mean_i vech(y_i y_i') # twoway = true (with two-way interactions)
//'
// [[Rcpp::export(rng = false, name = "mean.mvbinary")]]
Rcpp::NumericVector mean_mvbinary(const MVBinary& Y, const bool twoway = false) {
if (!twoway) {
// mean initialized as `p` dim zero vector
Rcpp::NumericVector mean(Y.dim());
// setup scaling factor `1 / n`
const double inv_n = 1.0 / static_cast<double>(Y.size());
// iterate all events
for (const auto& y : Y) {
// and add set features
for (auto a = y; a; a &= a - 1) {
mean[bitScanLS(a)] += inv_n;
}
}
return mean;
} else {
// Including two-way interactions
Rcpp::NumericVector mean(Y.dim() * (Y.dim() + 1) / 2);
// get binary vector dimension
const int p = Y.dim();
// iterate all events
for (const auto& y : Y) {
// iterate event features
for (auto a = y; a; a &= a - 1) {
int i = bitScanLS(a);
int base_index = (i * (2 * p + 1 - i)) / 2;
// add single effect
mean[base_index] += 1.0;
// iterate event two way effects
for (auto b = a & (a - 1); b; b &= b - 1) {
// and add the two way effect
mean[base_index + bitScanLS(b) - i] += 1.0;
}
}
}
// counts scaled by sample size
return mean / static_cast<double>(Y.size());
}
}
//' Covariance for multi variate binary data `MVBinary`
//'
//' cov(Y) = (n - 1)^-1 sum_i (y_i - mean(Y)) (y_i - mean(Y))'
//'
// [[Rcpp::export(rng = false, name = "cov.mvbinary")]]
Rcpp::NumericMatrix cov_mvbinary(const MVBinary& Y) {
// get random variable dimension
const std::size_t p = Y.dim();
// initialize covariance (default zero initialized)
Rcpp::NumericMatrix cov(p, p);
// step 1: compute the mean (in reversed internal order)
const auto mean = mean_mvbinary(Y);
// iterate all events in `Y`
for (const auto& y : Y) {
for (std::size_t j = 0; j < p; ++j) {
for (std::size_t i = 0; i < p; ++i) {
cov[i + p * j] += (static_cast<bool>(y & (1 << i)) - mean[i])
* (static_cast<bool>(y & (1 << j)) - mean[j]);
}
}
}
// scale by `1 / (n - 1)`
const double inv_nm1 = 1.0 / static_cast<double>(Y.size() - 1);
std::transform(cov.begin(), cov.end(), cov.begin(),
[inv_nm1](const double c) { return c * inv_nm1; });
return cov;
}