#ifndef INCLUDE_GUARD_BIT_UTILS_H #define INCLUDE_GUARD_BIT_UTILS_H #include // uint32_t, uint64_t #if (defined(__GNUC__) && defined(__BMI2__)) #include #include // _pdep_u32 #endif #ifdef _MSC_VER #include // _BitScanReverse #endif /** * Computes the parity of a word (0 for even bit count and 1 otherwise) */ #ifdef __GNUC__ static inline int bitParity32(uint32_t x) { return __builtin_parity(x); } static inline int bitParity64(uint64_t x) { return __builtin_parityll(x); } #else static inline int bitParity32(uint32_t x) { int p = (x != 0); while (x &= x - 1) { p = !p; } return p; } static inline int bitParity64(uint64_t x) { int p = (x != 0); while (x &= x - 1) { p = !p; } return p; } #endif /** * Counts the number of set bits (`1`s in binary) in the number `x` */ #ifdef __GNUC__ /* POPulation COUNT */ static inline int bitCount32(uint32_t x) { return __builtin_popcount(x); } static inline int bitCount64(uint64_t x) { return __builtin_popcountll(x); } #else static inline int bitCount32(uint32_t x) { int count = 0; // counts set bits for (; x; count++) { x &= x - 1; } // increment count until there are no bits set in x return count; } static inline int bitCount64(uint64_t x) { int count = 0; // counts set bits for (; x; count++) { x &= x - 1; } // increment count until there are no bits set in x return count; } #endif /** * Gets the index of the LSB (least significant bit) * * @condition `x != 0`, for `x == 0` undefined behaviour */ #ifdef __GNUC__ static inline int bitScanLS32(uint32_t x) { return __builtin_ctz(x); } // Count Trailing Zeros static inline int bitScanLS64(uint64_t x) { return __builtin_ctzll(x); } #elif _MSC_VER static inline int bitScanLS32(uint32_t x) { unsigned long bsr; _BitScanReverse(&bsr, x); return 31 - bsr; } static inline int bitScanLS64(uint64_t x) { unsigned long bsr; _BitScanReverse64(&bsr, x); return 63 - bsr; } #else static inline int bitScanLS32(uint32_t x) { int ctz = 0; // result storing the Count of Trailing Zeros bool empty; // boolean variable storing if a bit has not found (search area is empty) // logarithmic search for LSB bit index (-1) ctz += (empty = !(x & (uint32_t)(65535))) << 4; x >>= 16 * empty; ctz += (empty = !(x & (uint32_t)( 255))) << 3; x >>= 8 * empty; ctz += (empty = !(x & (uint32_t)( 15))) << 2; x >>= 4 * empty; ctz += (empty = !(x & (uint32_t)( 3))) << 1; x >>= 2 * empty; ctz += (empty = !(x & (uint32_t)( 1))); return ctz; } static inline int bitScanLS64(uint64_t x) { int ctz = 0; bool empty; // logarithmic search for LSB bit index (-1) ctz += (empty = !(x & (uint64_t)(4294967295))) << 5; x >>= 32 * empty; ctz += (empty = !(x & (uint64_t)( 65535))) << 4; x >>= 16 * empty; ctz += (empty = !(x & (uint64_t)( 255))) << 3; x >>= 8 * empty; ctz += (empty = !(x & (uint64_t)( 15))) << 2; x >>= 4 * empty; ctz += (empty = !(x & (uint64_t)( 3))) << 1; x >>= 2 * empty; ctz += (empty = !(x & (uint64_t)( 1))); return ctz; } #endif /** * Parallel DEPosit (aka PDEP) * * Writes the `val` bits into the positions of the set bits in `mask`. * * Example: * val: **** **** **** 1.1. * mask: 1... 1... 1... 1... * res: 1... .... 1... .... */ #if (defined(__GNUC__) && defined(__BMI2__)) static inline uint32_t bitDeposit32(uint32_t val, uint32_t mask) { return _pdep_u32(val, mask); } static inline uint64_t bitDeposit64(uint64_t val, uint64_t mask) { return _pdep_u64(val, mask); } #else static inline uint32_t bitDeposit32(uint32_t val, uint32_t mask) { uint32_t res = 0; for (uint32_t pos = 1; mask; pos <<= 1) { if (val & pos) { res |= mask & -mask; } mask &= mask - 1; } return res; } static inline uint64_t bitDeposit64(uint64_t val, uint64_t mask) { uint64_t res = 0; for (uint64_t pos = 1; mask; pos <<= 1) { if (val & pos) { res |= mask & -mask; } mask &= mask - 1; } return res; } #endif /** * Gets the next lexicographically ordered permutation of an n-bit word. * * Let `val` be a bit-word with `n` bits set, then this procedire computes a * `n` bit word wich is the next element in the lexicographically ordered * sequence of `n` bit words. For example * * val -> bitNextPerm(val) * 00010011 -> 00010101 * 00010101 -> 00010110 * 00010110 -> 00011001 * 00011001 -> 00011010 * 00011010 -> 00011100 * 00011100 -> 00100011 * * @condition `x != 0`, for `x == 0` undefined behaviour due to `bitScanLS` * * see: https://graphics.stanford.edu/~seander/bithacks.html#NextBitPermutation */ static inline uint32_t bitNextPerm32(uint32_t val) { // Sets all least significant 0-bits of val to 1 uint32_t t = val | (val - 1); // Next set to 1 the most significant bit to change, // set to 0 the least significant ones, and add the necessary 1 bits. return (t + 1) | (((~t & -~t) - 1) >> (bitScanLS32(val) + 1)); } static inline uint64_t bitNextPerm64(uint64_t val) { // Sets all least significant 0-bits of val to 1 uint64_t t = val | (val - 1); // Next set to 1 the most significant bit to change, // set to 0 the least significant ones, and add the necessary 1 bits. return (t + 1) | (((~t & -~t) - 1) >> (bitScanLS64(val) + 1)); } #endif /* BIT_UTILS_INCLUDE_GUARD_H */