tensor_predictors/tensorPredictors/src/R_api.h

#ifndef INCLUDE_GUARD_R_API_H
#define INCLUDE_GUARD_R_API_H

// The need for `USE_FC_LEN_T` and `FCONE` is due to a Fortran character string
// to C incompatibility. See: Writing R Extentions: 6.6.1 Fortran character strings
#define USE_FC_LEN_T
// Disables remapping of R API functions from `Rf_<name>` or `R_<name>`
#define R_NO_REMAP

#include <stdint.h> // uint32_t, uint64_t, ...

#include <R.h>
#include <Rinternals.h>
#include <R_ext/BLAS.h>
#include <R_ext/Lapack.h>

#ifndef FCONE
#define FCONE
#endif

// NULL pointer (not defined memory, array, vector, ...)
#ifndef NULL
#define NULL ((void*)0)
#endif

// Truth values
#ifndef FALSE
#define FALSE 0
#endif
#ifndef TRUE
#define TRUE 1
#endif

// Convenience convertion function similar to `asInteger`, `asReal`, ...
#define NA_UNSIGNED (-((unsigned long)1))
static inline unsigned long asUnsigned(SEXP _val) {
    int val = Rf_asInteger(_val);
    if (val == NA_INTEGER || val < 0) {
        return NA_UNSIGNED;
    }
    return (unsigned long)val;
}


// Remap BLAS and LAPACK bindings (being consistent with my own interface and I
// don't like to pass scalars by reference (memory address))
/** y <- a x + y */
static inline void axpy(
    const int dim,
    const double a,
    const double* x, const int incx,
          double* y, const int incy
) {
    F77_CALL(daxpy)(&dim, &a, x, &incx, y, &incy);
}

/** Scale a 1d array `x <- a x` */
static inline void scale(
    const int dim,
    const double a,
          double *x, const int incx
) {
    F77_CALL(dscal)(&dim, &a, x, &incx);
}

/** Dot product */
static inline double dot(
    const int dim,
    const double* x, const int incx,
    const double* y, const int incy
) {
    return F77_CALL(ddot)(&dim, x, &incx, y, &incy);
}

/** 1d array linear combination `z <- a x + b y` */
// TODO: optimize! (iff needed?, to optimize of at all?)
static inline void lincomb(
    const int dim,
    const double a, const double* x, const int incx,
    const double b, const double* y, const int incy,
                          double* z, const int incz
) {
    for (int i = 0; i < dim; ++i) {
        z[i * incz] = a * x[i * incx] + b * y[i * incy];
    }
}


/**
 * A sufficient Pseudo-Random-Number-Generators (PRNG) of the Xorshift family
 *
 * With parameterized seed/state this custom PRGN can be used in a thread save!
 *
 * For single threaded operations the PRNG provided by `R` are prefered. But they
 * are _not_ thread save. The following is a simple PRNG usable in a multi-threaded
 * application.
 *
 * See TODO: ...https://en.wikipedia.org/wiki/Xorshift
 *              SchachHoernchen
 */
static inline uint64_t rot64(uint64_t val, int shift) {
    return (val << shift) | (val >> (64 - shift));
}

// (internal) PRGN state/seed type
typedef uint64_t rng_seed_t[4];

// Hookup the PRNG via its seed to R's random number generation utilities
static inline void init_seed(rng_seed_t seed) {
    GetRNGstate();
    for (size_t i = 0; i < 4; ++i) {
        seed[i] = 0;
        for (size_t j = 0; j < 64; ++j) {
            seed[i] |= ((uint64_t)(unif_rand() < 0.5)) << j;
        }
    }
    PutRNGstate();
}

// PRNG of the Xorshift family
// The least significant 32 bits are not reliable, use most significant 32 bits
static inline uint64_t rand_u64(rng_seed_t seed) {
    uint64_t e = seed[0] - rot64(seed[1], 7);
    seed[0] = seed[1] ^ rot64(seed[1], 13);
    seed[1] = seed[2] + rot64(seed[3], 37);
    seed[2] = seed[3] + e;
    seed[3] = e + seed[0];
    return seed[3];
}

// With external supplied seed, every thread can have its own seed and as such
// we can use this as a thread save alternative to R's `unif_rand()`.
static inline double unif_rand_thrd(rng_seed_t seed) {
    return ((double)(rand_u64(seed) >> 32)) / (double)(-(uint32_t)1);
}


#endif /* INCLUDE_GUARD_R_API_H */