109 lines
3.8 KiB
C
109 lines
3.8 KiB
C
#include "cve.h"
|
|
|
|
/* C[, j] = A[, j] * v for each j = 1 to ncol */
|
|
void rowSweep(const double *A, const int nrow, const int ncol,
|
|
const char* op,
|
|
const double *v, // vector of length nrow
|
|
double *C) {
|
|
int i, j, block_size, block_size_i;
|
|
const double *A_block = A;
|
|
double *C_block = C;
|
|
const double *A_end = A + nrow * ncol;
|
|
|
|
if (nrow > CVE_MEM_CHUNK_SMALL) { // small because 3 vectors in cache
|
|
block_size = CVE_MEM_CHUNK_SMALL;
|
|
} else {
|
|
block_size = nrow;
|
|
}
|
|
|
|
if (*op == '+') {
|
|
// Iterate `(block_size_i, ncol)` submatrix blocks.
|
|
for (i = 0; i < nrow; i += block_size_i) {
|
|
// Set `A` and `C` to block beginning.
|
|
A = A_block;
|
|
C = C_block;
|
|
// Get current block's row size.
|
|
block_size_i = nrow - i;
|
|
if (block_size_i > block_size) {
|
|
block_size_i = block_size;
|
|
}
|
|
// Perform element wise operation for block.
|
|
for (; A < A_end; A += nrow, C += nrow) {
|
|
for (j = 0; j < block_size_i; ++j) {
|
|
C[j] = A[j] + v[j]; // FUN = '+'
|
|
}
|
|
}
|
|
// Step one block forth.
|
|
A_block += block_size_i;
|
|
C_block += block_size_i;
|
|
v += block_size_i;
|
|
}
|
|
} else if (*op == '-') {
|
|
// Iterate `(block_size_i, ncol)` submatrix blocks.
|
|
for (i = 0; i < nrow; i += block_size_i) {
|
|
// Set `A` and `C` to block beginning.
|
|
A = A_block;
|
|
C = C_block;
|
|
// Get current block's row size.
|
|
block_size_i = nrow - i;
|
|
if (block_size_i > block_size) {
|
|
block_size_i = block_size;
|
|
}
|
|
// Perform element wise operation for block.
|
|
for (; A < A_end; A += nrow, C += nrow) {
|
|
for (j = 0; j < block_size_i; ++j) {
|
|
C[j] = A[j] - v[j]; // FUN = '-'
|
|
}
|
|
}
|
|
// Step one block forth.
|
|
A_block += block_size_i;
|
|
C_block += block_size_i;
|
|
v += block_size_i;
|
|
}
|
|
} else if (*op == '*') {
|
|
// Iterate `(block_size_i, ncol)` submatrix blocks.
|
|
for (i = 0; i < nrow; i += block_size_i) {
|
|
// Set `A` and `C` to block beginning.
|
|
A = A_block;
|
|
C = C_block;
|
|
// Get current block's row size.
|
|
block_size_i = nrow - i;
|
|
if (block_size_i > block_size) {
|
|
block_size_i = block_size;
|
|
}
|
|
// Perform element wise operation for block.
|
|
for (; A < A_end; A += nrow, C += nrow) {
|
|
for (j = 0; j < block_size_i; ++j) {
|
|
C[j] = A[j] * v[j]; // FUN = '*'
|
|
}
|
|
}
|
|
// Step one block forth.
|
|
A_block += block_size_i;
|
|
C_block += block_size_i;
|
|
v += block_size_i;
|
|
}
|
|
} else if (*op == '/') {
|
|
// Iterate `(block_size_i, ncol)` submatrix blocks.
|
|
for (i = 0; i < nrow; i += block_size_i) {
|
|
// Set `A` and `C` to block beginning.
|
|
A = A_block;
|
|
C = C_block;
|
|
// Get current block's row size.
|
|
block_size_i = nrow - i;
|
|
if (block_size_i > block_size) {
|
|
block_size_i = block_size;
|
|
}
|
|
// Perform element wise operation for block.
|
|
for (; A < A_end; A += nrow, C += nrow) {
|
|
for (j = 0; j < block_size_i; ++j) {
|
|
C[j] = A[j] / v[j]; // FUN = '/'
|
|
}
|
|
}
|
|
// Step one block forth.
|
|
A_block += block_size_i;
|
|
C_block += block_size_i;
|
|
v += block_size_i;
|
|
}
|
|
}
|
|
}
|