#include // for `mem*` functions. #include "config.h" #include "sums.h" void rowSums(const double *A, const int nrow, const int ncol, double *sum) { int i, j, block_size, block_size_i; const double *A_block = A; const double *A_end = A + nrow * ncol; if (nrow > CVE_MEM_CHUNK_SIZE) { block_size = CVE_MEM_CHUNK_SIZE; } else { block_size = nrow; } // Iterate `(block_size_i, ncol)` submatrix blocks. for (i = 0; i < nrow; i += block_size_i) { // Reset `A` to new block beginning. A = A_block; // Take block size of eveything left and reduce to max size. block_size_i = nrow - i; if (block_size_i > block_size) { block_size_i = block_size; } // Compute first blocks column, for (j = 0; j < block_size_i; ++j) { sum[j] = A[j]; } // and sum the following columns to the first one. for (A += nrow; A < A_end; A += nrow) { for (j = 0; j < block_size_i; ++j) { sum[j] += A[j]; } } // Step one block forth. A_block += block_size_i; sum += block_size_i; } } void colSums(const double *A, const int nrow, const int ncol, double *sum) { int j; double *sum_end = sum + ncol; memset(sum, 0, sizeof(double) * ncol); for (; sum < sum_end; ++sum) { for (j = 0; j < nrow; ++j) { *sum += A[j]; } A += nrow; } } void rowSquareSums(const double *A, const int nrow, const int ncol, double *sum) { int i, j, block_size, block_size_i; const double *A_block = A; const double *A_end = A + nrow * ncol; if (nrow < CVE_MEM_CHUNK_SIZE) { block_size = nrow; } else { block_size = CVE_MEM_CHUNK_SIZE; } // Iterate `(block_size_i, ncol)` submatrix blocks. for (i = 0; i < nrow; i += block_size_i) { // Reset `A` to new block beginning. A = A_block; // Take block size of eveything left and reduce to max size. block_size_i = nrow - i; if (block_size_i > block_size) { block_size_i = block_size; } // Compute first blocks column, for (j = 0; j < block_size_i; ++j) { sum[j] = A[j] * A[j]; } // and sum the following columns to the first one. for (A += nrow; A < A_end; A += nrow) { for (j = 0; j < block_size_i; ++j) { sum[j] += A[j] * A[j]; } } // Step one block forth. A_block += block_size_i; sum += block_size_i; } } void rowSumsSymVec(const double *Avec, const int nrow, const double diag, double *sum) { int i, j; if (diag == 0.0) { memset(sum, 0, nrow * sizeof(double)); } else { for (i = 0; i < nrow; ++i) { sum[i] = diag; } } for (j = 0; j < nrow; ++j) { for (i = j + 1; i < nrow; ++i, ++Avec) { sum[j] += *Avec; sum[i] += *Avec; } } }