2
0
Fork 0
CVE/CVE_C/src/rowColOp.c

161 lines
4.3 KiB
C

#include "cve.h"
/**
* Computes the row sums of a matrix `A`.
* @param A Pointer to col-major matrix elements, size is `nrow * ncol`.
* @param nrow Number of rows of `A`.
* @param ncol Number of columns of `A`.
* @param sum Pointer to output row sums of size `nrow`.
*/
void rowSums(const double *A, const int nrow, const int ncol,
double *sum) {
int i, j, block_size, block_size_i;
const double *A_block = A;
const double *A_end = A + nrow * ncol;
if (nrow > CVE_MEM_CHUNK_SIZE) {
block_size = CVE_MEM_CHUNK_SIZE;
} else {
block_size = nrow;
}
// Iterate `(block_size_i, ncol)` submatrix blocks.
for (i = 0; i < nrow; i += block_size_i) {
// Reset `A` to new block beginning.
A = A_block;
// Take block size of eveything left and reduce to max size.
block_size_i = nrow - i;
if (block_size_i > block_size) {
block_size_i = block_size;
}
// Compute first blocks column,
for (j = 0; j < block_size_i; ++j) {
sum[j] = A[j];
}
// and sum the following columns to the first one.
for (A += nrow; A < A_end; A += nrow) {
for (j = 0; j < block_size_i; ++j) {
sum[j] += A[j];
}
}
// Step one block forth.
A_block += block_size_i;
sum += block_size_i;
}
}
void colSums(const double *A, const int nrow, const int ncol,
double *colSums) {
int i, j;
int nrowb = 4 * (nrow / 4); // 4 * floor(nrow / 4)
double colSum;
for (j = 0; j < ncol; ++j) {
colSum = 0.0;
for (i = 0; i < nrowb; i += 4) {
colSum += A[i]
+ A[i + 1]
+ A[i + 2]
+ A[i + 3];
}
for (; i < nrow; ++i) {
colSum += A[i];
}
*(colSums++) = colSum;
A += nrow;
}
}
void rowSquareSums(const double *A,
const int nrow, const int ncol,
double *sum) {
int i, j, block_size, block_size_i;
const double *A_block = A;
const double *A_end = A + nrow * ncol;
if (nrow > CVE_MEM_CHUNK_SIZE) {
block_size = CVE_MEM_CHUNK_SIZE;
} else {
block_size = nrow;
}
// Iterate `(block_size_i, ncol)` submatrix blocks.
for (i = 0; i < nrow; i += block_size_i) {
// Reset `A` to new block beginning.
A = A_block;
// Take block size of eveything left and reduce to max size.
block_size_i = nrow - i;
if (block_size_i > block_size) {
block_size_i = block_size;
}
// Compute first blocks column,
for (j = 0; j < block_size_i; ++j) {
sum[j] = A[j] * A[j];
}
// and sum the following columns to the first one.
for (A += nrow; A < A_end; A += nrow) {
for (j = 0; j < block_size_i; ++j) {
sum[j] += A[j] * A[j];
}
}
// Step one block forth.
A_block += block_size_i;
sum += block_size_i;
}
}
void rowSumsSymVec(const double *Avec, const int nrow,
const double diag,
double *sum) {
int i, j;
if (diag == 0.0) {
memset(sum, 0, nrow * sizeof(double));
} else {
for (i = 0; i < nrow; ++i) {
sum[i] = diag;
}
}
for (j = 0; j < nrow; ++j) {
for (i = j + 1; i < nrow; ++i, ++Avec) {
sum[j] += *Avec;
sum[i] += *Avec;
}
}
}
void rowDiffs(const double* X, const int nrow, const int ncol,
double *diffs) {
int i, j, k, l;
const double *Xcol;
for (k = l = 0; l < ncol; ++l) {
Xcol = X + l * nrow;
for (i = 0; i < nrow; ++i) {
for (j = i + 1; j < nrow; ++j) {
diffs[k++] = Xcol[i] - Xcol[j];
}
}
}
}
void rowDiffSquareSums(const double* X, const int nrow, const int ncol,
double *sum) {
int i, j, k, l;
const double *Xcol;
double tmp;
memset(sum, 0, ((nrow * (nrow - 1)) / 2) * sizeof(double));
for (l = 0; l < ncol; ++l) {
Xcol = X + l * nrow;
for (k = i = 0; i < nrow; ++i) {
for (j = i + 1; j < nrow; ++j, ++k) {
tmp = Xcol[i] - Xcol[j];
sum[k] += tmp * tmp;
}
}
}
}