#include "cve.h" /* C[, j] = A[, j] * v for each j = 1 to ncol */ void rowSweep(const double *A, const int nrow, const int ncol, const char* op, const double *v, // vector of length nrow double *C) { int i, j, block_size, block_size_i; const double *A_block = A; double *C_block = C; const double *A_end = A + nrow * ncol; if (nrow > CVE_MEM_CHUNK_SMALL) { // small because 3 vectors in cache block_size = CVE_MEM_CHUNK_SMALL; } else { block_size = nrow; } if (*op == '+') { // Iterate `(block_size_i, ncol)` submatrix blocks. for (i = 0; i < nrow; i += block_size_i) { // Set `A` and `C` to block beginning. A = A_block; C = C_block; // Get current block's row size. block_size_i = nrow - i; if (block_size_i > block_size) { block_size_i = block_size; } // Perform element wise operation for block. for (; A < A_end; A += nrow, C += nrow) { for (j = 0; j < block_size_i; ++j) { C[j] = A[j] + v[j]; // FUN = '+' } } // Step one block forth. A_block += block_size_i; C_block += block_size_i; v += block_size_i; } } else if (*op == '-') { // Iterate `(block_size_i, ncol)` submatrix blocks. for (i = 0; i < nrow; i += block_size_i) { // Set `A` and `C` to block beginning. A = A_block; C = C_block; // Get current block's row size. block_size_i = nrow - i; if (block_size_i > block_size) { block_size_i = block_size; } // Perform element wise operation for block. for (; A < A_end; A += nrow, C += nrow) { for (j = 0; j < block_size_i; ++j) { C[j] = A[j] - v[j]; // FUN = '-' } } // Step one block forth. A_block += block_size_i; C_block += block_size_i; v += block_size_i; } } else if (*op == '*') { // Iterate `(block_size_i, ncol)` submatrix blocks. for (i = 0; i < nrow; i += block_size_i) { // Set `A` and `C` to block beginning. A = A_block; C = C_block; // Get current block's row size. block_size_i = nrow - i; if (block_size_i > block_size) { block_size_i = block_size; } // Perform element wise operation for block. for (; A < A_end; A += nrow, C += nrow) { for (j = 0; j < block_size_i; ++j) { C[j] = A[j] * v[j]; // FUN = '*' } } // Step one block forth. A_block += block_size_i; C_block += block_size_i; v += block_size_i; } } else if (*op == '/') { // Iterate `(block_size_i, ncol)` submatrix blocks. for (i = 0; i < nrow; i += block_size_i) { // Set `A` and `C` to block beginning. A = A_block; C = C_block; // Get current block's row size. block_size_i = nrow - i; if (block_size_i > block_size) { block_size_i = block_size; } // Perform element wise operation for block. for (; A < A_end; A += nrow, C += nrow) { for (j = 0; j < block_size_i; ++j) { C[j] = A[j] / v[j]; // FUN = '/' } } // Step one block forth. A_block += block_size_i; C_block += block_size_i; v += block_size_i; } } }