#include "cve.h" /** * Computes the row sums of a matrix `A`. * @param A Pointer to col-major matrix elements, size is `nrow * ncol`. * @param nrow Number of rows of `A`. * @param ncol Number of columns of `A`. * @param sum Pointer to output row sums of size `nrow`. */ void rowSums(const double *A, const int nrow, const int ncol, double *sum) { int i, j, block_size, block_size_i; const double *A_block = A; const double *A_end = A + nrow * ncol; if (nrow > CVE_MEM_CHUNK_SIZE) { block_size = CVE_MEM_CHUNK_SIZE; } else { block_size = nrow; } // Iterate `(block_size_i, ncol)` submatrix blocks. for (i = 0; i < nrow; i += block_size_i) { // Reset `A` to new block beginning. A = A_block; // Take block size of eveything left and reduce to max size. block_size_i = nrow - i; if (block_size_i > block_size) { block_size_i = block_size; } // Compute first blocks column, for (j = 0; j < block_size_i; ++j) { sum[j] = A[j]; } // and sum the following columns to the first one. for (A += nrow; A < A_end; A += nrow) { for (j = 0; j < block_size_i; ++j) { sum[j] += A[j]; } } // Step one block forth. A_block += block_size_i; sum += block_size_i; } } void colSums(const double *A, const int nrow, const int ncol, double *colSums) { int i, j; int nrowb = 4 * (nrow / 4); // 4 * floor(nrow / 4) double colSum; for (j = 0; j < ncol; ++j) { colSum = 0.0; for (i = 0; i < nrowb; i += 4) { colSum += A[i] + A[i + 1] + A[i + 2] + A[i + 3]; } for (; i < nrow; ++i) { colSum += A[i]; } *(colSums++) = colSum; A += nrow; } } void rowSquareSums(const double *A, const int nrow, const int ncol, double *sum) { int i, j, block_size, block_size_i; const double *A_block = A; const double *A_end = A + nrow * ncol; if (nrow > CVE_MEM_CHUNK_SIZE) { block_size = CVE_MEM_CHUNK_SIZE; } else { block_size = nrow; } // Iterate `(block_size_i, ncol)` submatrix blocks. for (i = 0; i < nrow; i += block_size_i) { // Reset `A` to new block beginning. A = A_block; // Take block size of eveything left and reduce to max size. block_size_i = nrow - i; if (block_size_i > block_size) { block_size_i = block_size; } // Compute first blocks column, for (j = 0; j < block_size_i; ++j) { sum[j] = A[j] * A[j]; } // and sum the following columns to the first one. for (A += nrow; A < A_end; A += nrow) { for (j = 0; j < block_size_i; ++j) { sum[j] += A[j] * A[j]; } } // Step one block forth. A_block += block_size_i; sum += block_size_i; } } void rowSumsSymVec(const double *Avec, const int nrow, const double diag, double *sum) { int i, j; if (diag == 0.0) { memset(sum, 0, nrow * sizeof(double)); } else { for (i = 0; i < nrow; ++i) { sum[i] = diag; } } for (j = 0; j < nrow; ++j) { for (i = j + 1; i < nrow; ++i, ++Avec) { sum[j] += *Avec; sum[i] += *Avec; } } } void rowDiffs(const double* X, const int nrow, const int ncol, double *diffs) { int i, j, k, l; const double *Xcol; for (k = l = 0; l < ncol; ++l) { Xcol = X + l * nrow; for (i = 0; i < nrow; ++i) { for (j = i + 1; j < nrow; ++j) { diffs[k++] = Xcol[i] - Xcol[j]; } } } } void rowDiffSquareSums(const double* X, const int nrow, const int ncol, double *sum) { int i, j, k, l; const double *Xcol; double tmp; memset(sum, 0, ((nrow * (nrow - 1)) / 2) * sizeof(double)); for (l = 0; l < ncol; ++l) { Xcol = X + l * nrow; for (k = i = 0; i < nrow; ++i) { for (j = i + 1; j < nrow; ++j, ++k) { tmp = Xcol[i] - Xcol[j]; sum[k] += tmp * tmp; } } } }