2
0
Fork 0
CVE/CVarE/src/cve_subroutines.c

174 lines
5.8 KiB
C

#include "cve.h"
/**
* Applies the requested kernel element-wise.
*
* @param A matrix to apply the kernel to.
* @param h bandwidth parameter.
* @param kernel the kernel to be used.
* @param B (in/out) matrix `A` with element-wise applied kernel.
*
* @returns ether the passed `B`, or a new created matrix if `B` was NULL.
*/
mat* applyKernel(const mat* A, const double h, kernel kernel, mat* B) {
int i, nn = A->nrow * A->ncol;
int nn4 = 4 * (nn / 4);
double scale;
const double * restrict a;
double * restrict b;
if (!B) {
B = matrix(A->nrow, A->ncol);
} else if (nn != B->nrow * B->ncol) {
// TODO: error handling!
}
a = A->elem;
b = B->elem;
switch (kernel) {
case gauss:
scale = -0.5 / (h * h);
for (i = 0; i < nn4; i += 4) {
b[i] = exp(scale * a[i] * a[i]);
b[i + 1] = exp(scale * a[i + 1] * a[i + 1]);
b[i + 2] = exp(scale * a[i + 2] * a[i + 2]);
b[i + 3] = exp(scale * a[i + 3] * a[i + 3]);
}
for (; i < nn; ++i) {
b[i] = exp(scale * a[i] * a[i]);
}
break;
default:
// TODO: error handling!
break;
}
return B;
}
/**
* Scaling matrix `S` defined as
* s_{i j} = (L_i - (Y_j - y1_i)^2) * d_{i j} * w_{i j}
*
* Mapping of vectors `L`, `y1` and `Y` combinations.
*
* Y[j]
* ------ j ----->
* s[0] s[n] . . . s[jn] . . . s[(j-1)n]
* s[1] s[n+1] . . . s[jn+1] . . . s[(j-1)n+1]
* | . . . . . .
* | . . . . . .
* . . . . . .
* L[i], y1[i] i s[i] s[n+i] . . . s[jn+i] . . . s[(j-1)n+i]
* . . . . . .
* | . . . . . .
* v . . . . . .
* s[n-1] s[2n-1] . . . s[n-1] . . . s[nn-1]
*
* @param L per sample loss vector of (length `n`).
* @param Y responses (length `n`).
* @param y1 weighted responses (length `n`).
* @param D distance matrix (dim. `n x n`).
* @param W weight matrix (dim. `n x n`).
* @param kernel the kernel to be used.
*
* @returns passed matrix `S` if not NULL, then a new `n x n` matrix is created.
*
* @example Basically equivalent to the following R function.
* r_LS <- function(L, Y, y1, D, W) {
* # get dimension
* n <- length(L)
* # Indices
* i <- rep(1:n, n)
* j <- rep(1:n, each = n)
* # Compute S
* matrix(L[i] - (Y[j] - y1[i])^2, n) * W * D
* }
*
* @details mapping for indices used in blocked implementation.
*
* n ..... Dimensions of `S`, a `n x n` matrix.
* B ..... BLOCK_SIZE
* rB .... block reminder, aka rB = B % n.
*
* Y[j]
* 0 ----- j -----> n
* 0 +--------------------+
* | ' |
* ' | k B x n |
* ' | v |
* ' +--------------------+
* L[i], y1[i] i | ' |
* ' | k B x n |
* ' | v |
* v +--------------------+
* | k rB x n |
* n +--------------------+
*/
// TODO: fix: cache misses in Y?!
mat* adjacence(const mat *mat_L, const mat *mat_Fy, const mat *mat_y1,
const mat *mat_D, const mat *mat_W, kernel kernel,
mat *mat_S) {
int i, j, k, l, n = mat_L->nrow, m = mat_L->ncol;
int block_size, block_batch_size;
int max_size = 64 < n ? 64 : n; // Block Size set to 64
double Y_j, t0, t1, t2, t3; // internal temp. values.
double *Y, *L, *y1;
double *D, *W, *S;
if (!mat_S) {
mat_S = zero(n, n);
} else {
memset(mat_S->elem, 0, n * n * sizeof(double));
}
for (l = 0; l < m; ++l) {
Y = mat_Fy->elem + l * n;
L = mat_L->elem + l * n;
y1 = mat_y1->elem + l * n;
for (i = 0; i < n; i += block_size) {
/* Set blocks (left upper corner) */
S = mat_S->elem + i;
D = mat_D->elem + i;
W = mat_W->elem + i;
/* determine block size */
block_size = n - i;
if (block_size > max_size) {
block_size = max_size;
}
block_batch_size = 4 * (block_size / 4);
/* for each column in the block */
for (j = 0; j < n; ++j, S += n, D += n, W += n) {
Y_j = Y[j];
/* iterate over block rows */
for (k = 0; k < block_batch_size; k += 4) {
t0 = Y_j - y1[k];
t1 = Y_j - y1[k + 1];
t2 = Y_j - y1[k + 2];
t3 = Y_j - y1[k + 3];
S[k] += (L[k] - (t0 * t0)) * D[k] * W[k];
S[k + 1] += (L[k + 1] - (t1 * t1)) * D[k + 1] * W[k + 1];
S[k + 2] += (L[k + 2] - (t2 * t2)) * D[k + 2] * W[k + 2];
S[k + 3] += (L[k + 3] - (t3 * t3)) * D[k + 3] * W[k + 3];
}
for (; k < block_size; ++k) {
t0 = Y_j - y1[k];
S[k] += (L[k] - (t0 * t0)) * D[k] * W[k];
}
}
L += block_size;
y1 += block_size;
}
}
if (m > 1) {
S = mat_S->elem;
for (i = 0; i < n * n; ++i) {
S[i] /= m;
}
}
return mat_S;
}