tensor_predictors/tensorPredictors/src/ising_m2.c

/** Methods computing or estimating the second moment of the Ising model given
 * the natural parameters of the p.m.f. in exponential family form. That is
 *      f(x) = p0(params) exp(vech(x x')' params)
 * where `params` are the natural parameters. The scaling constant `p0(params)`
 * is also the zero event `x = (0, ..., 0)` probability.
 *
 * Three different version are provided.
 * - exact: Exact method computing the true second moment `E[X X']` given the
 *      parameters `params`. This method has a runtime of `O(2^dim)` where
 *      `dim` is the dimension of the random variable `X`. This means it is
 *      infeasible for bigger (like 25 and above) values if `dim`
 * - MC: Monte-Carlo method used in case the exact method is not feasible.
 * - MC Thrd: Multi-Threaded Monte-Carlo method.
 *
 * The natural parameters `params` of the Ising model is a `dim (dim + 1) / 2`
 * dimensional numeric vector containing the log-odds. The indexing schema into
 * the parameters corresponding to the log-odds of single or two way interactions
 * with indicex `i, j` are according to the half-vectorization schema
 *
 *  i = 4'th row                  => Symmetry                       => Half-Vectorized Storage
 *
 * [ *    *    *    *    *    * ]    [ *    *    *    *    *    * ]   [ *    *    *   15    *    * ]
 * [ *    *    *    *    *    * ]    [ *    *    *    *    *    * ]   [ *    *   12   16    * ]
 * [ *    *    *    *    *    * ]    [ *    *    *    *    *    * ]   [ *    8    *   17 ]
 * [ 3    8   12   15   16   17 ]    [ 3    8   12   15    *    * ]   [ 3    *    * ]
 * [ *    *    *    *    *    * ]    [ *    *    *   16    *    * ]   [ *    * ]
 * [ *    *    *    *    *    * ]    [ *    *    *   17    *    * ]   [ * ]
 *
 */
#include "R_api.h"
#include "bit_utils.h"
#include "int_utils.h"
#include "ising_MCMC.h"

#ifndef __STDC_NO_THREADS__
    #include <threads.h>
#endif

////////////////////////////////////////////////////////////////////////////////
// TODO: read                                                                 //
// https://developer.r-project.org/Blog/public/2019/04/18/common-protect-errors/
// and
// https://developer.r-project.org/Blog/public/2018/12/10/unprotecting-by-value/index.html
// as well as do the following PROPERLLY
// TODO: make specialized version using the parameters in given sym mat form  //
//       similar to `ising_sample()`                                          //
////////////////////////////////////////////////////////////////////////////////


// void ising_m2_exact_mat(
//     const size_t dim, const size_t ldA, const double* A,
//     double* M2
// ) {
//     double sum_0 = 1.0;

//     const uint32_t max_event = (uint32_t)1 << dim;

//     (void)memset(M2, 0, dim * dim * sizeof(double));

//     for (uint32_t X = 1; X < max_event; ++X) {
//         // Evaluate quadratic form `X' A X` using symmetry of `A`
//         double XtAX = 0.0;
//         for (uint32_t Y = X; Y; Y &= Y - 1) {
//             const int i = bitScanLS32(Y);
//             XtAX += A[i * (ldA + 1)];
//             for (uint32_t Z = Y & (Y - 1); Z; Z &= Z - 1) {
//                 XtAX += 2 * A[i * ldA + bitScanLS32(Z)];
//             }
//         }

//         const double prob_X = exp(XtAX);
//         sum_0 += prob_X;

//         for (uint32_t Y = X; Y; Y &= Y - 1) {
//             const int i = bitScanLS32(Y);
//             for (uint32_t Z = Y; Z; Z &= Z - 1) {
//                 M2[i * dim + bitScanLS32(Z)] += prob_X;
//             }
//         }
//     }

//     const double prob_0 = 1.0 / sum_0;
//     for (size_t j = 0; j < dim; ++j) {
//         for (size_t i = 0; i < j; ++i) {
//             M2[j * dim + i] = M2[i * dim + j];
//         }
//         for (size_t i = j; i < dim; ++i) {
//             M2[i * dim + j] *= prob_0;
//         }
//     }
// }

double ising_m2_exact(const size_t dim, const double* params, double* M2) {
    // Accumulator of sum of all (unscaled) probabilities
    double sum_0 = 1.0;

    // max event (actually upper bound)
    const uint32_t max_event = (uint32_t)1 << dim;

    // Length of parameters
    const size_t len = dim * (dim + 1) / 2;

    // Initialize M2 to zero
    (void)memset(M2, 0, len * sizeof(double));

    // Iterate all `2^dim` binary vectors `X`
    for (uint32_t X = 1; X < max_event; ++X) {
        // Dot product `<vech(X X'), params>`
        double dot_X = 0.0;
        // all bits in `X`
        for (uint32_t Y = X; Y; Y &= Y - 1) {
            const int i = bitScanLS32(Y);
            const int I = (i * (2 * dim - 1 - i)) / 2;
            // add single and two way interaction log odds
            for (uint32_t Z = Y; Z; Z &= Z - 1) {
                dot_X += params[I + bitScanLS32(Z)];
            }
        }

        // (unscaled) probability of current event `X`
        const double prob_X = exp(dot_X);
        sum_0 += prob_X;

        // Accumulate set bits probability for the first end second moment `E[X X']`
        for (uint32_t Y = X; Y; Y &= Y - 1) {
            const int i = bitScanLS32(Y);
            const int I = (i * (2 * dim - 1 - i)) / 2;
            for (uint32_t Z = Y; Z; Z &= Z - 1) {
                M2[I + bitScanLS32(Z)] += prob_X;
            }
        }
    }

    // Finish by scaling with zero event probability (Ising p.m.f. scaling constant)
    const double prob_0 = 1.0 / sum_0;
    for (size_t i = 0; i < len; ++i) {
        M2[i] *= prob_0;
    }

    return prob_0;
}


// Monte-Carlo method using Gibbs sampling for the second moment
double ising_m2_MC(
    /*   options */ const size_t nr_samples, const size_t warmup,
    /* dimension */ const size_t dim,
    /*   vectors */ const double* params, double* M2
) {

    // Length of `params` and `M2`
    const size_t len = (dim * (dim + 1)) / 2;

    // Dirty trick (reuse output memory for precise counting)
    _Static_assert(sizeof(uint64_t) == sizeof(double), "Dirty trick fails!");
    uint64_t* counts = (uint64_t*)M2;

    // Initialize counts to zero
    (void)memset(counts, 0, len * sizeof(uint64_t));

    // Allocate memory to store Markov-Chain value
    int* X = (int*)R_alloc(dim, sizeof(int));

    // Create/Update R's internal PRGN state
    GetRNGstate();

    // Spawn chains
    for (size_t sample = 0; sample < nr_samples; ++sample) {
        // Draw random sample `X ~ Ising(params)`
        ising_mcmc_vech(warmup, dim, params, X);

        // Accumulate component counts
        uint64_t* count = counts;
        for (size_t i = 0; i < dim; ++i) {
            for (size_t j = i; j < dim; ++j) {
                *(count++) += X[i] & X[j];
            }
        }
    }

    // Write R's internal PRNG state back (if needed)
    PutRNGstate();

    // Compute means from counts
    for (size_t i = 0; i < len; ++i) {
        M2[i] = (double)counts[i] / (double)(nr_samples);
    }

    return -1.0;    // TODO: also compute prob_0
}


// in case the compile supports standard C threads
#ifndef __STDC_NO_THREADS__

// Worker thread to calling thread communication struct
typedef struct thrd_data {
    rng_seed_t seed;        // Pseudo-Random-Number-Generator seed/state value
    size_t nr_samples;      // Nr. of samples this worker handles
    size_t warmup;          // Monte-Carlo Chain burne-in length
    size_t dim;             // Random variable dimension
    const double* params;   // Ising model parameters
    int* X;                 // Working memory to store current binary sample
    uint32_t* counts;       // (output) count of single and two way interactions
    double prob_0;          // (output) zero event probability estimate
} thrd_data_t;

// Worker thread function
int thrd_worker(thrd_data_t* data) {
    // Extract data as thread local variables (for convenience)
    const size_t dim = data->dim;
    int* X = data->X;

    // Initialize counts to zero
    (void)memset(data->counts, 0, (dim * (dim + 1) / 2) * sizeof(uint32_t));

    // Spawn Monte-Carlo Chains (one foe every sample)
    for (size_t sample = 0; sample < data->nr_samples; ++sample) {
        // Draw random sample `X ~ Ising(params)`
        ising_mcmc_vech_thrd(data->warmup, dim, data->params, X, data->seed);

        // Accumulate component counts
        uint32_t* count = data->counts;
        for (size_t i = 0; i < dim; ++i) {
            for (size_t j = i; j < dim; ++j) {
                *(count++) += X[i] & X[j];
            }
        }
    }

    return 0;
}

double ising_m2_MC_thrd(
    /*   options */ const size_t nr_samples, const size_t warmup,
                    const size_t nr_threads,
    /* dimension */ const size_t dim,
    /*   vectors */ const double* params, double* M2
) {
    // Length of `params` and `M2`
    const size_t len = (dim * (dim + 1)) / 2;

    // Allocate working and self memory for worker threads
    int* Xs = (int*)R_alloc(dim * nr_threads, sizeof(int));
    uint32_t* counts = (uint32_t*)R_alloc(len * nr_threads, sizeof(uint32_t));
    thrd_t* threads = (thrd_t*)R_alloc(nr_threads, sizeof(thrd_data_t));
    thrd_data_t* threads_data = (thrd_data_t*)R_alloc(nr_threads, sizeof(thrd_data_t));

    // Provide instruction wor worker and dispatch them
    for (size_t tid = 0; tid < nr_threads; ++tid) {
        // Every thread needs its own PRNG seed!
        init_seed(threads_data[tid].seed);
        // divide work among workers (more or less) equaly with the first worker
        // (tid = 0) having the additional remainder of divided work (`nr_samples`)
        threads_data[tid].nr_samples = nr_samples / nr_threads + (
            tid ? 0 : nr_samples % nr_threads
        );
        threads_data[tid].warmup = warmup;
        threads_data[tid].dim = dim;
        threads_data[tid].params = params;
        // Every worker gets its disjint working memory
        threads_data[tid].X = &Xs[dim * tid];
        threads_data[tid].counts = &counts[len * tid];
        // dispatch the worker
        thrd_create(&threads[tid], (thrd_start_t)thrd_worker, (void*)&threads_data[tid]);
    }

    // Join (Wait for) all worker
    for (size_t tid = 0; tid < nr_threads; ++tid) {
        thrd_join(threads[tid], NULL);
    }

    // Accumulate worker results into first (tid = 0) worker counts result
    for (size_t tid = 1; tid < nr_threads; ++tid) {
        for (size_t i = 0; i < len; ++i) {
            counts[i] += counts[tid * len + i];
        }
    }

    // convert discreat counts into means
    for (size_t i = 0; i < len; ++i) {
        M2[i] = (double)counts[i] / (double)nr_samples;
    }

    return -2.0;    // TODO: also compute prob_0
}

#endif /* !__STDC_NO_THREADS__ */


/* Implementation of `.Call` entry point */
////////////////////////////////////////////////////////////////////////////////
// TODO: make specialized version using the parameters in given sym mat form  //
//       similar to `ising_sample()`                                          //
////////////////////////////////////////////////////////////////////////////////
extern SEXP R_ising_m2(
    SEXP _params, SEXP _use_MC, SEXP _nr_samples, SEXP _warmup, SEXP _nr_threads
) {
    // Extract and validate arguments
    size_t protect_count = 0;
    if (!Rf_isReal(_params)) {
        _params = PROTECT(Rf_coerceVector(_params, REALSXP));
        protect_count++;
    }

    // Depending on parameters given in symmetric matrix form or natural
    // or natrual parameters in the half-vectorized memory layout
    size_t dim;
    double* params;
    if (Rf_isMatrix(_params)) {
        if (Rf_nrows(_params) != Rf_ncols(_params)) {
            Rf_error("Invalid 'params' value, exected square matrix");
        }
        // Convert to natural parameters
        dim = Rf_nrows(_params);
        params = (double*)R_alloc(dim * (dim + 1) / 2, sizeof(double));

        double* A = REAL(_params);
        for (size_t j = 0, I = 0; j < dim; ++j) {
            for (size_t i = j; i < dim; ++i, ++I) {
                params[I] = (1 + (i != j)) * A[i + j * dim];
            }
        }
    } else {
        // Get Ising random variable dimension `dim` from "half-vectorized" parameters
        // vector. That is, compute `dim` from `length(params) = dim (dim + 1) / 2`.
        dim = invTriag(Rf_length(_params));
        if (!dim) {
            Rf_error("Expected parameter vector of length `p (p + 1) / 2` where"
                " `p` is the dimension of the random variable");
        }
        params = REAL(_params);
    }

    // Determin the method to use and validate if possible
    const int use_MC = Rf_asLogical(_use_MC);
    if (use_MC == NA_LOGICAL) {
        Rf_error("Invalid 'use_MC' value, expected ether TRUE or FALSE");
    }

    // Allocate result vector
    SEXP _M2 = PROTECT(Rf_allocVector(REALSXP, dim * (dim + 1) / 2));
    ++protect_count;

    // asside computed zero event probability (inverse partition function), the
    // scaling factor for the Ising model p.m.f.
    double prob_0 = -1.0;

    if (use_MC) {
        // Convert and validate arguments for the Monte-Carlo methods
        const size_t nr_samples = asUnsigned(_nr_samples);
        if (nr_samples == 0 || nr_samples == NA_UNSIGNED) {
            Rf_error("Invalid 'nr_samples' value, expected pos. integer");
        }
        const size_t warmup = asUnsigned(_warmup);
        if (warmup == NA_UNSIGNED) {
            Rf_error("Invalid 'warmup' value, expected non-negative integer");
        }
        const size_t nr_threads = asUnsigned(_nr_threads);
        if (nr_threads == 0 || nr_threads > 256) {
            Rf_error("Invalid 'nr_thread' value");
        }

        if (nr_threads == 1) {
            // Single threaded Monte-Carlo method
            prob_0 = ising_m2_MC(nr_samples, warmup, dim, params, REAL(_M2));
        } else {
            // Multi-Threaded Monte-Carlo method if provided, otherwise use
            // the single threaded version with a warning
#ifdef __STDC_NO_THREADS__
                Rf_warning("Multi-Threading NOT supported, using fallback.");
                prob_0 = ising_m2_MC(nr_samples, warmup, dim, params, REAL(_M2));
#else
                prob_0 = ising_m2_MC_thrd(
                    nr_samples, warmup, nr_threads,
                    dim, params, REAL(_M2)
                );
#endif
        }
    } else {
        // Exact method (ignore other arguments), only validate dimension
        if (25 < dim) {
            Rf_error("Dimension '%d' too big for exact method (max 24)", dim);
        }

        // and call the exact method
        prob_0 = ising_m2_exact(dim, params, REAL(_M2));
    }

    // Set log-lokelihood as an attribute to the computed second moment
    SEXP _prob_0 = PROTECT(Rf_ScalarReal(prob_0));
    ++protect_count;
    Rf_setAttrib(_M2, Rf_install("prob_0"), _prob_0);

    // release SEPXs to the garbage collector
    UNPROTECT(protect_count);

    return _M2;
}
dev 2023-11-14 13:35:43 +00:00			`/** Methods computing or estimating the second moment of the Ising model given`
			`* the natural parameters of the p.m.f. in exponential family form. That is`
			`* f(x) = p0(params) exp(vech(x x')' params)`
			* where `params` are the natural parameters. The scaling constant `p0(params)`
			* is also the zero event `x = (0, ..., 0)` probability.
			`*`
			`* Three different version are provided.`
			* - exact: Exact method computing the true second moment `E[X X']` given the
			* parameters `params`. This method has a runtime of `O(2^dim)` where
			* `dim` is the dimension of the random variable `X`. This means it is
			* infeasible for bigger (like 25 and above) values if `dim`
			`* - MC: Monte-Carlo method used in case the exact method is not feasible.`
			`* - MC Thrd: Multi-Threaded Monte-Carlo method.`
			`*`
			* The natural parameters `params` of the Ising model is a `dim (dim + 1) / 2`
			`* dimensional numeric vector containing the log-odds. The indexing schema into`
			`* the parameters corresponding to the log-odds of single or two way interactions`
			* with indicex `i, j` are according to the half-vectorization schema
			`*`
			`* i = 4'th row => Symmetry => Half-Vectorized Storage`
			`*`
			`* [ * * * * * * ] [ * * * * * * ] [ * * * 15 * * ]`
			`* [ * * * * * * ] [ * * * * * * ] [ * * 12 16 * ]`
			`* [ * * * * * * ] [ * * * * * * ] [ * 8 * 17 ]`
			`* [ 3 8 12 15 16 17 ] [ 3 8 12 15 * * ] [ 3 * * ]`
			`* [ * * * * * * ] [ * * * 16 * * ] [ * * ]`
			`* [ * * * * * * ] [ * * * 17 * * ] [ * ]`
			`*`
			`*/`
			`#include "R_api.h"`
			`#include "bit_utils.h"`
			`#include "int_utils.h"`
			`#include "ising_MCMC.h"`

			`#ifndef __STDC_NO_THREADS__`
			`#include <threads.h>`
			`#endif`

			`////////////////////////////////////////////////////////////////////////////////`
			`// TODO: read //`
			`// https://developer.r-project.org/Blog/public/2019/04/18/common-protect-errors/`
			`// and`
			`// https://developer.r-project.org/Blog/public/2018/12/10/unprotecting-by-value/index.html`
			`// as well as do the following PROPERLLY`
			`// TODO: make specialized version using the parameters in given sym mat form //`
			// similar to `ising_sample()` //
			`////////////////////////////////////////////////////////////////////////////////`


			`// void ising_m2_exact_mat(`
			`// const size_t dim, const size_t ldA, const double* A,`
			`// double* M2`
			`// ) {`
			`// double sum_0 = 1.0;`

			`// const uint32_t max_event = (uint32_t)1 << dim;`

			`// (void)memset(M2, 0, dim * dim * sizeof(double));`

			`// for (uint32_t X = 1; X < max_event; ++X) {`
			// // Evaluate quadratic form `X' A X` using symmetry of `A`
			`// double XtAX = 0.0;`
			`// for (uint32_t Y = X; Y; Y &= Y - 1) {`
			`// const int i = bitScanLS32(Y);`
			`// XtAX += A[i * (ldA + 1)];`
			`// for (uint32_t Z = Y & (Y - 1); Z; Z &= Z - 1) {`
			`// XtAX += 2 * A[i * ldA + bitScanLS32(Z)];`
			`// }`
			`// }`

			`// const double prob_X = exp(XtAX);`
			`// sum_0 += prob_X;`

			`// for (uint32_t Y = X; Y; Y &= Y - 1) {`
			`// const int i = bitScanLS32(Y);`
			`// for (uint32_t Z = Y; Z; Z &= Z - 1) {`
			`// M2[i * dim + bitScanLS32(Z)] += prob_X;`
			`// }`
			`// }`
			`// }`

			`// const double prob_0 = 1.0 / sum_0;`
			`// for (size_t j = 0; j < dim; ++j) {`
			`// for (size_t i = 0; i < j; ++i) {`
			`// M2[j * dim + i] = M2[i * dim + j];`
			`// }`
			`// for (size_t i = j; i < dim; ++i) {`
			`// M2[i * dim + j] *= prob_0;`
			`// }`
			`// }`
			`// }`

			`double ising_m2_exact(const size_t dim, const double* params, double* M2) {`
			`// Accumulator of sum of all (unscaled) probabilities`
			`double sum_0 = 1.0;`

			`// max event (actually upper bound)`
			`const uint32_t max_event = (uint32_t)1 << dim;`

			`// Length of parameters`
			`const size_t len = dim * (dim + 1) / 2;`

			`// Initialize M2 to zero`
			`(void)memset(M2, 0, len * sizeof(double));`

			// Iterate all `2^dim` binary vectors `X`
			`for (uint32_t X = 1; X < max_event; ++X) {`
			// Dot product `<vech(X X'), params>`
			`double dot_X = 0.0;`
			// all bits in `X`
			`for (uint32_t Y = X; Y; Y &= Y - 1) {`
			`const int i = bitScanLS32(Y);`
			`const int I = (i * (2 * dim - 1 - i)) / 2;`
			`// add single and two way interaction log odds`
			`for (uint32_t Z = Y; Z; Z &= Z - 1) {`
			`dot_X += params[I + bitScanLS32(Z)];`
			`}`
			`}`

			// (unscaled) probability of current event `X`
			`const double prob_X = exp(dot_X);`
			`sum_0 += prob_X;`

			// Accumulate set bits probability for the first end second moment `E[X X']`
			`for (uint32_t Y = X; Y; Y &= Y - 1) {`
			`const int i = bitScanLS32(Y);`
			`const int I = (i * (2 * dim - 1 - i)) / 2;`
			`for (uint32_t Z = Y; Z; Z &= Z - 1) {`
			`M2[I + bitScanLS32(Z)] += prob_X;`
			`}`
			`}`
			`}`

			`// Finish by scaling with zero event probability (Ising p.m.f. scaling constant)`
			`const double prob_0 = 1.0 / sum_0;`
			`for (size_t i = 0; i < len; ++i) {`
			`M2[i] *= prob_0;`
			`}`

			`return prob_0;`
			`}`


			`// Monte-Carlo method using Gibbs sampling for the second moment`
			`double ising_m2_MC(`
			`/* options */ const size_t nr_samples, const size_t warmup,`
			`/* dimension */ const size_t dim,`
			`/* vectors / const double params, double* M2`
			`) {`

			// Length of `params` and `M2`
			`const size_t len = (dim * (dim + 1)) / 2;`

			`// Dirty trick (reuse output memory for precise counting)`
			`_Static_assert(sizeof(uint64_t) == sizeof(double), "Dirty trick fails!");`
			`uint64_t* counts = (uint64_t*)M2;`

			`// Initialize counts to zero`
			`(void)memset(counts, 0, len * sizeof(uint64_t));`

			`// Allocate memory to store Markov-Chain value`
			`int* X = (int*)R_alloc(dim, sizeof(int));`

			`// Create/Update R's internal PRGN state`
			`GetRNGstate();`

			`// Spawn chains`
			`for (size_t sample = 0; sample < nr_samples; ++sample) {`
			// Draw random sample `X ~ Ising(params)`
			`ising_mcmc_vech(warmup, dim, params, X);`

			`// Accumulate component counts`
			`uint64_t* count = counts;`
			`for (size_t i = 0; i < dim; ++i) {`
			`for (size_t j = i; j < dim; ++j) {`
			`*(count++) += X[i] & X[j];`
			`}`
			`}`
			`}`

			`// Write R's internal PRNG state back (if needed)`
			`PutRNGstate();`

			`// Compute means from counts`
			`for (size_t i = 0; i < len; ++i) {`
			`M2[i] = (double)counts[i] / (double)(nr_samples);`
			`}`

			`return -1.0; // TODO: also compute prob_0`
			`}`


			`// in case the compile supports standard C threads`
			`#ifndef __STDC_NO_THREADS__`

			`// Worker thread to calling thread communication struct`
			`typedef struct thrd_data {`
			`rng_seed_t seed; // Pseudo-Random-Number-Generator seed/state value`
			`size_t nr_samples; // Nr. of samples this worker handles`
			`size_t warmup; // Monte-Carlo Chain burne-in length`
			`size_t dim; // Random variable dimension`
			`const double* params; // Ising model parameters`
			`int* X; // Working memory to store current binary sample`
			`uint32_t* counts; // (output) count of single and two way interactions`
			`double prob_0; // (output) zero event probability estimate`
			`} thrd_data_t;`

			`// Worker thread function`
			`int thrd_worker(thrd_data_t* data) {`
			`// Extract data as thread local variables (for convenience)`
			`const size_t dim = data->dim;`
			`int* X = data->X;`

			`// Initialize counts to zero`
			`(void)memset(data->counts, 0, (dim * (dim + 1) / 2) * sizeof(uint32_t));`

			`// Spawn Monte-Carlo Chains (one foe every sample)`
			`for (size_t sample = 0; sample < data->nr_samples; ++sample) {`
			// Draw random sample `X ~ Ising(params)`
			`ising_mcmc_vech_thrd(data->warmup, dim, data->params, X, data->seed);`

			`// Accumulate component counts`
			`uint32_t* count = data->counts;`
			`for (size_t i = 0; i < dim; ++i) {`
			`for (size_t j = i; j < dim; ++j) {`
			`*(count++) += X[i] & X[j];`
			`}`
			`}`
			`}`

			`return 0;`
			`}`

			`double ising_m2_MC_thrd(`
			`/* options */ const size_t nr_samples, const size_t warmup,`
			`const size_t nr_threads,`
			`/* dimension */ const size_t dim,`
			`/* vectors / const double params, double* M2`
			`) {`
			// Length of `params` and `M2`
			`const size_t len = (dim * (dim + 1)) / 2;`

			`// Allocate working and self memory for worker threads`
			`int* Xs = (int)R_alloc(dim nr_threads, sizeof(int));`
			`uint32_t* counts = (uint32_t)R_alloc(len nr_threads, sizeof(uint32_t));`
			`thrd_t* threads = (thrd_t*)R_alloc(nr_threads, sizeof(thrd_data_t));`
			`thrd_data_t* threads_data = (thrd_data_t*)R_alloc(nr_threads, sizeof(thrd_data_t));`

			`// Provide instruction wor worker and dispatch them`
			`for (size_t tid = 0; tid < nr_threads; ++tid) {`
			`// Every thread needs its own PRNG seed!`
			`init_seed(threads_data[tid].seed);`
			`// divide work among workers (more or less) equaly with the first worker`
			// (tid = 0) having the additional remainder of divided work (`nr_samples`)
			`threads_data[tid].nr_samples = nr_samples / nr_threads + (`
			`tid ? 0 : nr_samples % nr_threads`
			`);`
			`threads_data[tid].warmup = warmup;`
			`threads_data[tid].dim = dim;`
			`threads_data[tid].params = params;`
			`// Every worker gets its disjint working memory`
			`threads_data[tid].X = &Xs[dim * tid];`
			`threads_data[tid].counts = &counts[len * tid];`
			`// dispatch the worker`
			`thrd_create(&threads[tid], (thrd_start_t)thrd_worker, (void*)&threads_data[tid]);`
			`}`

			`// Join (Wait for) all worker`
			`for (size_t tid = 0; tid < nr_threads; ++tid) {`
			`thrd_join(threads[tid], NULL);`
			`}`

			`// Accumulate worker results into first (tid = 0) worker counts result`
			`for (size_t tid = 1; tid < nr_threads; ++tid) {`
			`for (size_t i = 0; i < len; ++i) {`
			`counts[i] += counts[tid * len + i];`
			`}`
			`}`

			`// convert discreat counts into means`
			`for (size_t i = 0; i < len; ++i) {`
			`M2[i] = (double)counts[i] / (double)nr_samples;`
			`}`

			`return -2.0; // TODO: also compute prob_0`
			`}`

			`#endif /* !__STDC_NO_THREADS__ */`




			/* Implementation of `.Call` entry point */
			`////////////////////////////////////////////////////////////////////////////////`
			`// TODO: make specialized version using the parameters in given sym mat form //`
			// similar to `ising_sample()` //
			`////////////////////////////////////////////////////////////////////////////////`
			`extern SEXP R_ising_m2(`
			`SEXP _params, SEXP _use_MC, SEXP _nr_samples, SEXP _warmup, SEXP _nr_threads`
			`) {`
			`// Extract and validate arguments`
			`size_t protect_count = 0;`
			`if (!Rf_isReal(_params)) {`
			`_params = PROTECT(Rf_coerceVector(_params, REALSXP));`
			`protect_count++;`
			`}`

			`// Depending on parameters given in symmetric matrix form or natural`
			`// or natrual parameters in the half-vectorized memory layout`
			`size_t dim;`
			`double* params;`
			`if (Rf_isMatrix(_params)) {`
			`if (Rf_nrows(_params) != Rf_ncols(_params)) {`
			`Rf_error("Invalid 'params' value, exected square matrix");`
			`}`
			`// Convert to natural parameters`
			`dim = Rf_nrows(_params);`
			`params = (double)R_alloc(dim (dim + 1) / 2, sizeof(double));`

			`double* A = REAL(_params);`
			`for (size_t j = 0, I = 0; j < dim; ++j) {`
			`for (size_t i = j; i < dim; ++i, ++I) {`
			`params[I] = (1 + (i != j)) * A[i + j * dim];`
			`}`
			`}`
			`} else {`
			// Get Ising random variable dimension `dim` from "half-vectorized" parameters
			// vector. That is, compute `dim` from `length(params) = dim (dim + 1) / 2`.
			`dim = invTriag(Rf_length(_params));`
			`if (!dim) {`
			Rf_error("Expected parameter vector of length `p (p + 1) / 2` where"
			" `p` is the dimension of the random variable");
			`}`
			`params = REAL(_params);`
			`}`

			`// Determin the method to use and validate if possible`
			`const int use_MC = Rf_asLogical(_use_MC);`
			`if (use_MC == NA_LOGICAL) {`
			`Rf_error("Invalid 'use_MC' value, expected ether TRUE or FALSE");`
			`}`

			`// Allocate result vector`
			`SEXP _M2 = PROTECT(Rf_allocVector(REALSXP, dim * (dim + 1) / 2));`
			`++protect_count;`

			`// asside computed zero event probability (inverse partition function), the`
			`// scaling factor for the Ising model p.m.f.`
			`double prob_0 = -1.0;`

			`if (use_MC) {`
			`// Convert and validate arguments for the Monte-Carlo methods`
			`const size_t nr_samples = asUnsigned(_nr_samples);`
			`if (nr_samples == 0 \|\| nr_samples == NA_UNSIGNED) {`
			`Rf_error("Invalid 'nr_samples' value, expected pos. integer");`
			`}`
			`const size_t warmup = asUnsigned(_warmup);`
			`if (warmup == NA_UNSIGNED) {`
			`Rf_error("Invalid 'warmup' value, expected non-negative integer");`
			`}`
			`const size_t nr_threads = asUnsigned(_nr_threads);`
			`if (nr_threads == 0 \|\| nr_threads > 256) {`
			`Rf_error("Invalid 'nr_thread' value");`
			`}`

			`if (nr_threads == 1) {`
			`// Single threaded Monte-Carlo method`
			`prob_0 = ising_m2_MC(nr_samples, warmup, dim, params, REAL(_M2));`
			`} else {`
			`// Multi-Threaded Monte-Carlo method if provided, otherwise use`
			`// the single threaded version with a warning`
			`#ifdef __STDC_NO_THREADS__`
			`Rf_warning("Multi-Threading NOT supported, using fallback.");`
			`prob_0 = ising_m2_MC(nr_samples, warmup, dim, params, REAL(_M2));`
			`#else`
			`prob_0 = ising_m2_MC_thrd(`
			`nr_samples, warmup, nr_threads,`
			`dim, params, REAL(_M2)`
			`);`
			`#endif`
			`}`
			`} else {`
			`// Exact method (ignore other arguments), only validate dimension`
			`if (25 < dim) {`
			`Rf_error("Dimension '%d' too big for exact method (max 24)", dim);`
			`}`

			`// and call the exact method`
			`prob_0 = ising_m2_exact(dim, params, REAL(_M2));`
			`}`

			`// Set log-lokelihood as an attribute to the computed second moment`
			`SEXP _prob_0 = PROTECT(Rf_ScalarReal(prob_0));`
			`++protect_count;`
			`Rf_setAttrib(_M2, Rf_install("prob_0"), _prob_0);`

			`// release SEPXs to the garbage collector`
			`UNPROTECT(protect_count);`

			`return _M2;`
			`}`