114 lines
3.1 KiB
C
114 lines
3.1 KiB
C
#include <string.h> // for `mem*` functions.
|
|
|
|
#include "config.h"
|
|
#include "sums.h"
|
|
|
|
void rowSums(const double *A, const int nrow, const int ncol,
|
|
double *sum) {
|
|
int i, j, block_size, block_size_i;
|
|
const double *A_block = A;
|
|
const double *A_end = A + nrow * ncol;
|
|
|
|
if (nrow > CVE_MEM_CHUNK_SIZE) {
|
|
block_size = CVE_MEM_CHUNK_SIZE;
|
|
} else {
|
|
block_size = nrow;
|
|
}
|
|
|
|
// Iterate `(block_size_i, ncol)` submatrix blocks.
|
|
for (i = 0; i < nrow; i += block_size_i) {
|
|
// Reset `A` to new block beginning.
|
|
A = A_block;
|
|
// Take block size of eveything left and reduce to max size.
|
|
block_size_i = nrow - i;
|
|
if (block_size_i > block_size) {
|
|
block_size_i = block_size;
|
|
}
|
|
// Compute first blocks column,
|
|
for (j = 0; j < block_size_i; ++j) {
|
|
sum[j] = A[j];
|
|
}
|
|
// and sum the following columns to the first one.
|
|
for (A += nrow; A < A_end; A += nrow) {
|
|
for (j = 0; j < block_size_i; ++j) {
|
|
sum[j] += A[j];
|
|
}
|
|
}
|
|
// Step one block forth.
|
|
A_block += block_size_i;
|
|
sum += block_size_i;
|
|
}
|
|
}
|
|
|
|
void colSums(const double *A, const int nrow, const int ncol,
|
|
double *sum) {
|
|
int j;
|
|
double *sum_end = sum + ncol;
|
|
|
|
memset(sum, 0, sizeof(double) * ncol);
|
|
for (; sum < sum_end; ++sum) {
|
|
for (j = 0; j < nrow; ++j) {
|
|
*sum += A[j];
|
|
}
|
|
A += nrow;
|
|
}
|
|
}
|
|
|
|
void rowSquareSums(const double *A, const int nrow, const int ncol,
|
|
double *sum) {
|
|
int i, j, block_size, block_size_i;
|
|
const double *A_block = A;
|
|
const double *A_end = A + nrow * ncol;
|
|
|
|
if (nrow < CVE_MEM_CHUNK_SIZE) {
|
|
block_size = nrow;
|
|
} else {
|
|
block_size = CVE_MEM_CHUNK_SIZE;
|
|
}
|
|
|
|
// Iterate `(block_size_i, ncol)` submatrix blocks.
|
|
for (i = 0; i < nrow; i += block_size_i) {
|
|
// Reset `A` to new block beginning.
|
|
A = A_block;
|
|
// Take block size of eveything left and reduce to max size.
|
|
block_size_i = nrow - i;
|
|
if (block_size_i > block_size) {
|
|
block_size_i = block_size;
|
|
}
|
|
// Compute first blocks column,
|
|
for (j = 0; j < block_size_i; ++j) {
|
|
sum[j] = A[j] * A[j];
|
|
}
|
|
// and sum the following columns to the first one.
|
|
for (A += nrow; A < A_end; A += nrow) {
|
|
for (j = 0; j < block_size_i; ++j) {
|
|
sum[j] += A[j] * A[j];
|
|
}
|
|
}
|
|
// Step one block forth.
|
|
A_block += block_size_i;
|
|
sum += block_size_i;
|
|
}
|
|
}
|
|
|
|
void rowSumsSymVec(const double *Avec, const int nrow,
|
|
const double diag,
|
|
double *sum) {
|
|
int i, j;
|
|
|
|
if (diag == 0.0) {
|
|
memset(sum, 0, nrow * sizeof(double));
|
|
} else {
|
|
for (i = 0; i < nrow; ++i) {
|
|
sum[i] = diag;
|
|
}
|
|
}
|
|
|
|
for (j = 0; j < nrow; ++j) {
|
|
for (i = j + 1; i < nrow; ++i, ++Avec) {
|
|
sum[j] += *Avec;
|
|
sum[i] += *Avec;
|
|
}
|
|
}
|
|
}
|