116 lines
3.6 KiB
C++
116 lines
3.6 KiB
C++
/**
|
|
* Implements statistics like `mean`, `cov` and alike for MVBinary data
|
|
*/
|
|
#include <Rcpp.h> // R to C++ binding library
|
|
#include <algorithm>
|
|
|
|
#include "bit_utils.h" // uint32_t, ... and the `bit*` functions
|
|
#include "types.h" // MVBinary (Multivariate Binary Data)
|
|
|
|
//' Converts a logical matrix to a multi variate bernoulli dataset
|
|
//'
|
|
// [[Rcpp::export(rng = false, name = "as.mvbinary")]]
|
|
MVBinary as_mvbinary(const MVBinary& Y) { return Y; }
|
|
|
|
//' Converts a Multivariate binary data set into a logical matrix
|
|
//'
|
|
// [[Rcpp::export(rng = false, name = "as.mvbmatrix")]]
|
|
Rcpp::LogicalMatrix as_mvbmatrix(const MVBinary& Y) {
|
|
Rcpp::LogicalMatrix mat(Y.nrow(), Y.ncol());
|
|
|
|
for (std::size_t i = 0; i < Y.nrow(); ++i) {
|
|
for (uint32_t a = Y[i]; a; a &= a - 1) {
|
|
mat[bitScanLS(a) * Y.nrow() + i] = true;
|
|
}
|
|
}
|
|
|
|
return mat;
|
|
}
|
|
|
|
//' Mean for a multi variate bernoulli dataset `MVBinary`
|
|
//'
|
|
//' mean_i y_i # twoway = false (only single effects)
|
|
//'
|
|
//' or
|
|
//'
|
|
//' mean_i vech(y_i y_i') # twoway = true (with two-way interactions)
|
|
//'
|
|
// [[Rcpp::export(rng = false, name = "mean.mvbinary")]]
|
|
Rcpp::NumericVector mean_mvbinary(const MVBinary& Y, const bool twoway = false) {
|
|
if (!twoway) {
|
|
// mean initialized as `p` dim zero vector
|
|
Rcpp::NumericVector mean(Y.dim());
|
|
|
|
// setup scaling factor `1 / n`
|
|
const double inv_n = 1.0 / static_cast<double>(Y.size());
|
|
|
|
// iterate all events
|
|
for (const auto& y : Y) {
|
|
// and add set features
|
|
for (auto a = y; a; a &= a - 1) {
|
|
mean[bitScanLS(a)] += inv_n;
|
|
}
|
|
}
|
|
|
|
return mean;
|
|
} else {
|
|
// Including two-way interactions
|
|
Rcpp::NumericVector mean(Y.dim() * (Y.dim() + 1) / 2);
|
|
|
|
// get binary vector dimension
|
|
const int p = Y.dim();
|
|
|
|
// iterate all events
|
|
for (const auto& y : Y) {
|
|
// iterate event features
|
|
for (auto a = y; a; a &= a - 1) {
|
|
int i = bitScanLS(a);
|
|
int base_index = (i * (2 * p + 1 - i)) / 2;
|
|
// add single effect
|
|
mean[base_index] += 1.0;
|
|
// iterate event two way effects
|
|
for (auto b = a & (a - 1); b; b &= b - 1) {
|
|
// and add the two way effect
|
|
mean[base_index + bitScanLS(b) - i] += 1.0;
|
|
}
|
|
}
|
|
}
|
|
|
|
// counts scaled by sample size
|
|
return mean / static_cast<double>(Y.size());
|
|
}
|
|
}
|
|
|
|
//' Covariance for multi variate binary data `MVBinary`
|
|
//'
|
|
//' cov(Y) = (n - 1)^-1 sum_i (y_i - mean(Y)) (y_i - mean(Y))'
|
|
//'
|
|
// [[Rcpp::export(rng = false, name = "cov.mvbinary")]]
|
|
Rcpp::NumericMatrix cov_mvbinary(const MVBinary& Y) {
|
|
// get random variable dimension
|
|
const std::size_t p = Y.dim();
|
|
|
|
// initialize covariance (default zero initialized)
|
|
Rcpp::NumericMatrix cov(p, p);
|
|
|
|
// step 1: compute the mean (in reversed internal order)
|
|
const auto mean = mean_mvbinary(Y);
|
|
|
|
// iterate all events in `Y`
|
|
for (const auto& y : Y) {
|
|
for (std::size_t j = 0; j < p; ++j) {
|
|
for (std::size_t i = 0; i < p; ++i) {
|
|
cov[i + p * j] += (static_cast<bool>(y & (1 << i)) - mean[i])
|
|
* (static_cast<bool>(y & (1 << j)) - mean[j]);
|
|
}
|
|
}
|
|
}
|
|
|
|
// scale by `1 / (n - 1)`
|
|
const double inv_nm1 = 1.0 / static_cast<double>(Y.size() - 1);
|
|
std::transform(cov.begin(), cov.end(), cov.begin(),
|
|
[inv_nm1](const double c) { return c * inv_nm1; });
|
|
|
|
return cov;
|
|
}
|