tensor_predictors/tensorPredictors/src/bit_utils.h

186 lines
5.5 KiB
C

#ifndef INCLUDE_GUARD_BIT_UTILS_H
#define INCLUDE_GUARD_BIT_UTILS_H
#include <stdint.h> // uint32_t, uint64_t
#if (defined(__GNUC__) && defined(__BMI2__))
#include <x86intrin.h>
#include <bmi2intrin.h> // _pdep_u32
#endif
#ifdef _MSC_VER
#include <intrin.h> // _BitScanReverse
#endif
/**
* Computes the parity of a word (0 for even bit count and 1 otherwise)
*/
#ifdef __GNUC__
static inline int bitParity32(uint32_t x) { return __builtin_parity(x); }
static inline int bitParity64(uint64_t x) { return __builtin_parityll(x); }
#else
static inline int bitParity32(uint32_t x) {
int p = (x != 0);
while (x &= x - 1) { p = !p; }
return p;
}
static inline int bitParity64(uint64_t x) {
int p = (x != 0);
while (x &= x - 1) { p = !p; }
return p;
}
#endif
/**
* Counts the number of set bits (`1`s in binary) in the number `x`
*/
#ifdef __GNUC__ /* POPulation COUNT */
static inline int bitCount32(uint32_t x) { return __builtin_popcount(x); }
static inline int bitCount64(uint64_t x) { return __builtin_popcountll(x); }
#else
static inline int bitCount32(uint32_t x) {
int count = 0; // counts set bits
for (; x; count++) { x &= x - 1; } // increment count until there are no bits set in x
return count;
}
static inline int bitCount64(uint64_t x) {
int count = 0; // counts set bits
for (; x; count++) { x &= x - 1; } // increment count until there are no bits set in x
return count;
}
#endif
/**
* Gets the index of the LSB (least significant bit)
*
* @condition `x != 0`, for `x == 0` undefined behaviour
*/
#ifdef __GNUC__
static inline int bitScanLS32(uint32_t x) { return __builtin_ctz(x); } // Count Trailing Zeros
static inline int bitScanLS64(uint64_t x) { return __builtin_ctzll(x); }
#elif _MSC_VER
static inline int bitScanLS32(uint32_t x) {
unsigned long bsr;
_BitScanReverse(&bsr, x);
return 31 - bsr;
}
static inline int bitScanLS64(uint64_t x) {
unsigned long bsr;
_BitScanReverse64(&bsr, x);
return 63 - bsr;
}
#else
static inline int bitScanLS32(uint32_t x) {
int ctz = 0; // result storing the Count of Trailing Zeros
bool empty; // boolean variable storing if a bit has not found (search area is empty)
// logarithmic search for LSB bit index (-1)
ctz += (empty = !(x & (uint32_t)(65535))) << 4;
x >>= 16 * empty;
ctz += (empty = !(x & (uint32_t)( 255))) << 3;
x >>= 8 * empty;
ctz += (empty = !(x & (uint32_t)( 15))) << 2;
x >>= 4 * empty;
ctz += (empty = !(x & (uint32_t)( 3))) << 1;
x >>= 2 * empty;
ctz += (empty = !(x & (uint32_t)( 1)));
return ctz;
}
static inline int bitScanLS64(uint64_t x) {
int ctz = 0;
bool empty;
// logarithmic search for LSB bit index (-1)
ctz += (empty = !(x & (uint64_t)(4294967295))) << 5;
x >>= 32 * empty;
ctz += (empty = !(x & (uint64_t)( 65535))) << 4;
x >>= 16 * empty;
ctz += (empty = !(x & (uint64_t)( 255))) << 3;
x >>= 8 * empty;
ctz += (empty = !(x & (uint64_t)( 15))) << 2;
x >>= 4 * empty;
ctz += (empty = !(x & (uint64_t)( 3))) << 1;
x >>= 2 * empty;
ctz += (empty = !(x & (uint64_t)( 1)));
return ctz;
}
#endif
/**
* Parallel DEPosit (aka PDEP)
*
* Writes the `val` bits into the positions of the set bits in `mask`.
*
* Example:
* val: **** **** **** 1.1.
* mask: 1... 1... 1... 1...
* res: 1... .... 1... ....
*/
#if (defined(__GNUC__) && defined(__BMI2__))
static inline uint32_t bitDeposit32(uint32_t val, uint32_t mask) {
return _pdep_u32(val, mask);
}
static inline uint64_t bitDeposit64(uint64_t val, uint64_t mask) {
return _pdep_u64(val, mask);
}
#else
static inline uint32_t bitDeposit32(uint32_t val, uint32_t mask) {
uint32_t res = 0;
for (uint32_t pos = 1; mask; pos <<= 1) {
if (val & pos) {
res |= mask & -mask;
}
mask &= mask - 1;
}
return res;
}
static inline uint64_t bitDeposit64(uint64_t val, uint64_t mask) {
uint64_t res = 0;
for (uint64_t pos = 1; mask; pos <<= 1) {
if (val & pos) {
res |= mask & -mask;
}
mask &= mask - 1;
}
return res;
}
#endif
/**
* Gets the next lexicographically ordered permutation of an n-bit word.
*
* Let `val` be a bit-word with `n` bits set, then this procedire computes a
* `n` bit word wich is the next element in the lexicographically ordered
* sequence of `n` bit words. For example
*
* val -> bitNextPerm(val)
* 00010011 -> 00010101
* 00010101 -> 00010110
* 00010110 -> 00011001
* 00011001 -> 00011010
* 00011010 -> 00011100
* 00011100 -> 00100011
*
* @condition `x != 0`, for `x == 0` undefined behaviour due to `bitScanLS`
*
* see: https://graphics.stanford.edu/~seander/bithacks.html#NextBitPermutation
*/
static inline uint32_t bitNextPerm32(uint32_t val) {
// Sets all least significant 0-bits of val to 1
uint32_t t = val | (val - 1);
// Next set to 1 the most significant bit to change,
// set to 0 the least significant ones, and add the necessary 1 bits.
return (t + 1) | (((~t & -~t) - 1) >> (bitScanLS32(val) + 1));
}
static inline uint64_t bitNextPerm64(uint64_t val) {
// Sets all least significant 0-bits of val to 1
uint64_t t = val | (val - 1);
// Next set to 1 the most significant bit to change,
// set to 0 the least significant ones, and add the necessary 1 bits.
return (t + 1) | (((~t & -~t) - 1) >> (bitScanLS64(val) + 1));
}
#endif /* BIT_UTILS_INCLUDE_GUARD_H */