2019-09-12 16:42:28 +00:00
|
|
|
#include <stdlib.h>
|
2019-09-16 09:15:51 +00:00
|
|
|
#include <string.h> // for `mem*` functions.
|
2019-09-12 16:42:28 +00:00
|
|
|
|
|
|
|
#include <R_ext/BLAS.h>
|
|
|
|
#include <R_ext/Lapack.h>
|
|
|
|
#include <R_ext/Error.h>
|
|
|
|
// #include <Rmath.h>
|
|
|
|
|
2019-10-18 07:06:36 +00:00
|
|
|
#include "benchmark.h"
|
2019-09-12 16:42:28 +00:00
|
|
|
|
2019-10-18 07:06:36 +00:00
|
|
|
void rowSums(const double *A,
|
|
|
|
const int nrow, const int ncol,
|
|
|
|
double *sum) {
|
|
|
|
int i, j, block_size, block_size_i;
|
|
|
|
const double *A_block = A;
|
|
|
|
const double *A_end = A + nrow * ncol;
|
|
|
|
|
|
|
|
if (nrow > 508) {
|
|
|
|
block_size = 508;
|
|
|
|
} else {
|
|
|
|
block_size = nrow;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Iterate `(block_size_i, ncol)` submatrix blocks.
|
|
|
|
for (i = 0; i < nrow; i += block_size_i) {
|
|
|
|
// Reset `A` to new block beginning.
|
|
|
|
A = A_block;
|
|
|
|
// Take block size of eveything left and reduce to max size.
|
|
|
|
block_size_i = nrow - i;
|
|
|
|
if (block_size_i > block_size) {
|
|
|
|
block_size_i = block_size;
|
|
|
|
}
|
|
|
|
// Copy blocks first column.
|
|
|
|
for (j = 0; j < block_size_i; j += 4) {
|
|
|
|
sum[j] = A[j];
|
|
|
|
sum[j + 1] = A[j + 1];
|
|
|
|
sum[j + 2] = A[j + 2];
|
|
|
|
sum[j + 3] = A[j + 3];
|
|
|
|
}
|
|
|
|
for (; j < block_size_i; ++j) {
|
|
|
|
sum[j] = A[j];
|
|
|
|
}
|
|
|
|
// Sum following columns to the first one.
|
|
|
|
for (A += nrow; A < A_end; A += nrow) {
|
|
|
|
for (j = 0; j < block_size_i; j += 4) {
|
|
|
|
sum[j] += A[j];
|
|
|
|
sum[j + 1] += A[j + 1];
|
|
|
|
sum[j + 2] += A[j + 2];
|
|
|
|
sum[j + 3] += A[j + 3];
|
|
|
|
}
|
|
|
|
for (; j < block_size_i; ++j) {
|
|
|
|
sum[j] += A[j];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Step one block forth.
|
|
|
|
A_block += block_size_i;
|
|
|
|
sum += block_size_i;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void rowSumsV2(const double *A,
|
|
|
|
const int nrow, const int ncol,
|
|
|
|
double *sum) {
|
2019-09-12 16:42:28 +00:00
|
|
|
int i, j, block_size, block_size_i;
|
|
|
|
const double *A_block = A;
|
|
|
|
const double *A_end = A + nrow * ncol;
|
|
|
|
|
|
|
|
if (nrow > CVE_MEM_CHUNK_SIZE) {
|
|
|
|
block_size = CVE_MEM_CHUNK_SIZE;
|
|
|
|
} else {
|
|
|
|
block_size = nrow;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Iterate `(block_size_i, ncol)` submatrix blocks.
|
|
|
|
for (i = 0; i < nrow; i += block_size_i) {
|
|
|
|
// Reset `A` to new block beginning.
|
|
|
|
A = A_block;
|
|
|
|
// Take block size of eveything left and reduce to max size.
|
|
|
|
block_size_i = nrow - i;
|
|
|
|
if (block_size_i > block_size) {
|
|
|
|
block_size_i = block_size;
|
|
|
|
}
|
|
|
|
// Compute first blocks column,
|
|
|
|
for (j = 0; j < block_size_i; ++j) {
|
|
|
|
sum[j] = A[j];
|
|
|
|
}
|
|
|
|
// and sum the following columns to the first one.
|
|
|
|
for (A += nrow; A < A_end; A += nrow) {
|
|
|
|
for (j = 0; j < block_size_i; ++j) {
|
|
|
|
sum[j] += A[j];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Step one block forth.
|
|
|
|
A_block += block_size_i;
|
|
|
|
sum += block_size_i;
|
|
|
|
}
|
|
|
|
}
|
2019-10-18 07:06:36 +00:00
|
|
|
void rowSumsV3(const double *A,
|
|
|
|
const int nrow, const int ncol,
|
|
|
|
double *sum) {
|
|
|
|
int i, onei = 1;
|
|
|
|
double* ones = (double*)malloc(ncol * sizeof(double));
|
|
|
|
const double one = 1.0;
|
|
|
|
const double zero = 0.0;
|
2019-09-12 16:42:28 +00:00
|
|
|
|
2019-10-18 07:06:36 +00:00
|
|
|
for (i = 0; i < ncol; ++i) {
|
|
|
|
ones[i] = 1.0;
|
|
|
|
}
|
|
|
|
|
|
|
|
matrixprod(A, nrow, ncol, ones, ncol, 1, sum);
|
|
|
|
free(ones);
|
|
|
|
}
|
2019-09-12 16:42:28 +00:00
|
|
|
|
2019-10-18 07:06:36 +00:00
|
|
|
void colSums(const double *A, const int nrow, const int ncol,
|
|
|
|
double *sums) {
|
|
|
|
int i, j, nrowb = 4 * (nrow / 4); // 4 dividable nrow block, biggest 4*k <= nrow.
|
|
|
|
double sum;
|
|
|
|
|
|
|
|
for (j = 0; j < ncol; ++j) {
|
|
|
|
sum = 0.0;
|
|
|
|
for (i = 0; i < nrowb; i += 4) {
|
|
|
|
sum += A[i]
|
|
|
|
+ A[i + 1]
|
|
|
|
+ A[i + 2]
|
|
|
|
+ A[i + 3];
|
2019-09-12 16:42:28 +00:00
|
|
|
}
|
2019-10-18 07:06:36 +00:00
|
|
|
for (; i < nrow; ++i) {
|
|
|
|
sum += A[i];
|
|
|
|
}
|
|
|
|
*(sums++) = sum;
|
2019-09-12 16:42:28 +00:00
|
|
|
A += nrow;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-10-18 07:06:36 +00:00
|
|
|
void rowSquareSums(const double *A,
|
|
|
|
const int nrow, const int ncol,
|
|
|
|
double *sum) {
|
2019-09-12 16:42:28 +00:00
|
|
|
int i, j, block_size, block_size_i;
|
|
|
|
const double *A_block = A;
|
|
|
|
const double *A_end = A + nrow * ncol;
|
|
|
|
|
2019-10-18 07:06:36 +00:00
|
|
|
if (nrow > 508) {
|
|
|
|
block_size = 508;
|
2019-09-12 16:42:28 +00:00
|
|
|
} else {
|
2019-10-18 07:06:36 +00:00
|
|
|
block_size = nrow;
|
2019-09-12 16:42:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Iterate `(block_size_i, ncol)` submatrix blocks.
|
|
|
|
for (i = 0; i < nrow; i += block_size_i) {
|
|
|
|
// Reset `A` to new block beginning.
|
|
|
|
A = A_block;
|
|
|
|
// Take block size of eveything left and reduce to max size.
|
|
|
|
block_size_i = nrow - i;
|
2019-10-18 07:06:36 +00:00
|
|
|
if (block_size_i > block_size) { // TODO: contains BUG!!! floor last one !!!
|
2019-09-12 16:42:28 +00:00
|
|
|
block_size_i = block_size;
|
2019-10-18 07:06:36 +00:00
|
|
|
} /// ...
|
|
|
|
// TODO:
|
|
|
|
// Copy blocks first column.
|
|
|
|
for (j = 0; j < block_size_i; j += 4) {
|
|
|
|
sum[j] = A[j] * A[j];
|
|
|
|
sum[j + 1] = A[j + 1] * A[j + 1];
|
|
|
|
sum[j + 2] = A[j + 2] * A[j + 2];
|
|
|
|
sum[j + 3] = A[j + 3] * A[j + 3];
|
2019-09-12 16:42:28 +00:00
|
|
|
}
|
2019-10-18 07:06:36 +00:00
|
|
|
for (; j < block_size_i; ++j) {
|
2019-09-12 16:42:28 +00:00
|
|
|
sum[j] = A[j] * A[j];
|
|
|
|
}
|
2019-10-18 07:06:36 +00:00
|
|
|
// Sum following columns to the first one.
|
2019-09-12 16:42:28 +00:00
|
|
|
for (A += nrow; A < A_end; A += nrow) {
|
2019-10-18 07:06:36 +00:00
|
|
|
for (j = 0; j < block_size_i; j += 4) {
|
|
|
|
sum[j] += A[j] * A[j];
|
|
|
|
sum[j + 1] += A[j + 1] * A[j + 1];
|
|
|
|
sum[j + 2] += A[j + 2] * A[j + 2];
|
|
|
|
sum[j + 3] += A[j + 3] * A[j + 3];
|
|
|
|
}
|
|
|
|
for (; j < block_size_i; ++j) {
|
2019-09-12 16:42:28 +00:00
|
|
|
sum[j] += A[j] * A[j];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Step one block forth.
|
|
|
|
A_block += block_size_i;
|
|
|
|
sum += block_size_i;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-10-18 07:06:36 +00:00
|
|
|
void rowSumsSymVec(const double *Avec, const int nrow,
|
|
|
|
const double diag,
|
|
|
|
double *sum) {
|
2019-09-12 16:42:28 +00:00
|
|
|
int i, j;
|
|
|
|
|
2019-09-16 09:15:51 +00:00
|
|
|
if (diag == 0.0) {
|
2019-09-12 16:42:28 +00:00
|
|
|
memset(sum, 0, nrow * sizeof(double));
|
|
|
|
} else {
|
|
|
|
for (i = 0; i < nrow; ++i) {
|
2019-09-16 09:15:51 +00:00
|
|
|
sum[i] = diag;
|
2019-09-12 16:42:28 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for (j = 0; j < nrow; ++j) {
|
|
|
|
for (i = j + 1; i < nrow; ++i, ++Avec) {
|
|
|
|
sum[j] += *Avec;
|
|
|
|
sum[i] += *Avec;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-11-20 18:03:21 +00:00
|
|
|
#define ROW_SWEEP_ALG(op) \
|
|
|
|
/* Iterate `(block_size_i, ncol)` submatrix blocks. */ \
|
|
|
|
for (i = 0; i < nrow; i += block_size_i) { \
|
|
|
|
/* Set `A` and `C` to block beginning. */ \
|
|
|
|
A = A_block; \
|
|
|
|
C = C_block; \
|
|
|
|
/* Get current block's row size. */ \
|
|
|
|
block_size_i = nrow - i; \
|
|
|
|
if (block_size_i > block_size) { \
|
|
|
|
block_size_i = block_size; \
|
|
|
|
} \
|
|
|
|
/* Perform element wise operation for block. */ \
|
|
|
|
for (; A < A_end; A += nrow, C += nrow) { \
|
|
|
|
for (j = 0; j < block_size_i; ++j) { \
|
|
|
|
C[j] = (A[j]) op (v[j]); \
|
|
|
|
} \
|
|
|
|
} \
|
|
|
|
/* Step one block forth. */ \
|
|
|
|
A_block += block_size_i; \
|
|
|
|
C_block += block_size_i; \
|
|
|
|
v += block_size_i; \
|
|
|
|
}
|
|
|
|
|
2019-09-12 16:42:28 +00:00
|
|
|
/* C[, j] = A[, j] * v for each j = 1 to ncol */
|
2019-10-18 07:06:36 +00:00
|
|
|
void rowSweep(const double *A, const int nrow, const int ncol,
|
|
|
|
const char* op,
|
|
|
|
const double *v, // vector of length nrow
|
|
|
|
double *C) {
|
2019-09-12 16:42:28 +00:00
|
|
|
int i, j, block_size, block_size_i;
|
|
|
|
const double *A_block = A;
|
|
|
|
double *C_block = C;
|
|
|
|
const double *A_end = A + nrow * ncol;
|
|
|
|
|
|
|
|
if (nrow > CVE_MEM_CHUNK_SMALL) { // small because 3 vectors in cache
|
|
|
|
block_size = CVE_MEM_CHUNK_SMALL;
|
|
|
|
} else {
|
|
|
|
block_size = nrow;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (*op == '+') {
|
2019-11-20 18:03:21 +00:00
|
|
|
ROW_SWEEP_ALG(+)
|
2019-09-12 16:42:28 +00:00
|
|
|
} else if (*op == '-') {
|
2019-11-20 18:03:21 +00:00
|
|
|
ROW_SWEEP_ALG(-)
|
2019-09-12 16:42:28 +00:00
|
|
|
} else if (*op == '*') {
|
2019-11-20 18:03:21 +00:00
|
|
|
ROW_SWEEP_ALG(*)
|
2019-09-12 16:42:28 +00:00
|
|
|
} else if (*op == '/') {
|
2019-11-20 18:03:21 +00:00
|
|
|
ROW_SWEEP_ALG(/)
|
2019-09-12 16:42:28 +00:00
|
|
|
} else {
|
|
|
|
error("Got unknown 'op' (opperation) argument.");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-09-16 09:15:51 +00:00
|
|
|
void transpose(const double *A, const int nrow, const int ncol, double* T) {
|
|
|
|
int i, j, len = nrow * ncol;
|
|
|
|
|
|
|
|
// Filling column-wise and accessing row-wise.
|
|
|
|
for (i = 0, j = 0; i < len; ++i, j += nrow) {
|
|
|
|
if (j >= len) {
|
|
|
|
j -= len - 1;
|
|
|
|
}
|
|
|
|
T[i] = A[j];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-10-18 07:06:36 +00:00
|
|
|
// Symmetric Packed matrix vector product.
|
|
|
|
// Computes
|
|
|
|
// y <- Ax
|
|
|
|
// where A is supplied as packed lower triangular part of a symmetric
|
|
|
|
// matrix A. Meaning that `vecA` is `vec_ltri(A)`.
|
|
|
|
void sympMV(const double* vecA, const int nrow, const double* x, double* y) {
|
|
|
|
double one = 1.0;
|
|
|
|
double zero = 0.0;
|
|
|
|
int onei = 1;
|
|
|
|
|
|
|
|
F77_NAME(dspmv)("L", &nrow, &one, vecA, x, &onei, &zero, y, &onei);
|
|
|
|
}
|
|
|
|
|
|
|
|
void matrixprod(const double *A, const int nrowA, const int ncolA,
|
|
|
|
const double *B, const int nrowB, const int ncolB,
|
|
|
|
double *C) {
|
2019-09-12 16:42:28 +00:00
|
|
|
const double one = 1.0;
|
|
|
|
const double zero = 0.0;
|
|
|
|
|
|
|
|
// DGEMM with parameterization:
|
|
|
|
// C <- A %*% B
|
|
|
|
F77_NAME(dgemm)("N", "N", &nrowA, &ncolB, &ncolA,
|
|
|
|
&one, A, &nrowA, B, &nrowB,
|
|
|
|
&zero, C, &nrowA);
|
|
|
|
}
|
|
|
|
|
2019-10-18 07:06:36 +00:00
|
|
|
void crossprod(const double *A, const int nrowA, const int ncolA,
|
|
|
|
const double *B, const int nrowB, const int ncolB,
|
|
|
|
double *C) {
|
2019-09-12 16:42:28 +00:00
|
|
|
const double one = 1.0;
|
|
|
|
const double zero = 0.0;
|
|
|
|
|
|
|
|
// DGEMM with parameterization:
|
|
|
|
// C <- t(A) %*% B
|
|
|
|
F77_NAME(dgemm)("T", "N", &ncolA, &ncolB, &nrowA,
|
|
|
|
&one, A, &nrowA, B, &nrowB,
|
|
|
|
&zero, C, &ncolA);
|
|
|
|
}
|
|
|
|
|
2019-11-20 18:03:21 +00:00
|
|
|
#define KRONECKER_ALG(op) \
|
|
|
|
for (j = 0; j < ncolA; ++j) { \
|
|
|
|
for (l = 0; l < ncolB; ++l) { \
|
|
|
|
colB = B + (l * nrowB); \
|
|
|
|
for (i = 0; i < nrowA; ++i) { \
|
|
|
|
for (k = 0; k < nrowB; ++k) { \
|
|
|
|
*(C++) = (A[i]) op (colB[k]); \
|
|
|
|
} \
|
|
|
|
} \
|
|
|
|
} \
|
|
|
|
A += nrowA; \
|
|
|
|
}
|
|
|
|
|
|
|
|
void kronecker(const double * restrict A, const int nrowA, const int ncolA,
|
|
|
|
const double * restrict B, const int nrowB, const int ncolB,
|
|
|
|
const char* op,
|
|
|
|
double * restrict C) {
|
|
|
|
int i, j, k, l;
|
|
|
|
const double *colB;
|
|
|
|
|
|
|
|
if (*op == '+') {
|
|
|
|
KRONECKER_ALG(+)
|
|
|
|
} else if (*op == '-') {
|
|
|
|
KRONECKER_ALG(-)
|
|
|
|
} else if (*op == '*') {
|
|
|
|
KRONECKER_ALG(*)
|
|
|
|
} else if (*op == '/') {
|
|
|
|
KRONECKER_ALG(/)
|
|
|
|
} else {
|
|
|
|
error("Got unknown 'op' (opperation) argument.");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-10-18 07:06:36 +00:00
|
|
|
void nullProj(const double *B, const int nrowB, const int ncolB,
|
|
|
|
double *Q) {
|
2019-09-12 16:42:28 +00:00
|
|
|
const double minusOne = -1.0;
|
|
|
|
const double one = 1.0;
|
|
|
|
|
2019-11-20 18:03:21 +00:00
|
|
|
// Initialize Q as identity matrix.
|
2019-09-12 16:42:28 +00:00
|
|
|
memset(Q, 0, sizeof(double) * nrowB * nrowB);
|
|
|
|
double *Q_diag, *Q_end = Q + nrowB * nrowB;
|
|
|
|
for (Q_diag = Q; Q_diag < Q_end; Q_diag += nrowB + 1) {
|
|
|
|
*Q_diag = 1.0;
|
|
|
|
}
|
|
|
|
|
|
|
|
// DGEMM with parameterization:
|
2019-11-20 18:03:21 +00:00
|
|
|
// Q <- (-1.0 * B %*% t(B)) + Q
|
2019-09-12 16:42:28 +00:00
|
|
|
F77_NAME(dgemm)("N", "T", &nrowB, &nrowB, &ncolB,
|
|
|
|
&minusOne, B, &nrowB, B, &nrowB,
|
|
|
|
&one, Q, &nrowB);
|
|
|
|
}
|
|
|
|
|
2019-10-18 07:06:36 +00:00
|
|
|
void rangePairs(const int from, const int to, int *pairs) {
|
2019-09-12 16:42:28 +00:00
|
|
|
int i, j;
|
|
|
|
for (i = from; i < to; ++i) {
|
|
|
|
for (j = i + 1; j < to; ++j) {
|
|
|
|
pairs[0] = i;
|
|
|
|
pairs[1] = j;
|
|
|
|
pairs += 2;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// A dence skwe-symmetric rank 2 update.
|
|
|
|
// Perform the update
|
|
|
|
// C := alpha (A * B^T - B * A^T) + beta C
|
2019-10-18 07:06:36 +00:00
|
|
|
void skewSymRank2k(const int nrow, const int ncol,
|
|
|
|
double alpha, const double *A, const double *B,
|
|
|
|
double beta,
|
|
|
|
double *C) {
|
2019-09-12 16:42:28 +00:00
|
|
|
F77_NAME(dgemm)("N", "T",
|
2019-10-18 07:06:36 +00:00
|
|
|
&nrow, &nrow, &ncol,
|
|
|
|
&alpha, A, &nrow, B, &nrow,
|
|
|
|
&beta, C, &nrow);
|
2019-09-12 16:42:28 +00:00
|
|
|
alpha *= -1.0;
|
|
|
|
beta = 1.0;
|
|
|
|
F77_NAME(dgemm)("N", "T",
|
2019-10-18 07:06:36 +00:00
|
|
|
&nrow, &nrow, &ncol,
|
|
|
|
&alpha, B, &nrow, A, &nrow,
|
|
|
|
&beta, C, &nrow);
|
|
|
|
}
|
|
|
|
// TODO: clarify
|
|
|
|
static inline double gaussKernel(const double x, const double scale) {
|
|
|
|
return exp(scale * x * x);
|
2019-09-12 16:42:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// TODO: mutch potential for optimization!!!
|
2019-10-18 07:06:36 +00:00
|
|
|
static void weightedYandLoss(const int n,
|
|
|
|
const double *Y,
|
|
|
|
const double *vecD,
|
|
|
|
const double *vecW,
|
|
|
|
const double *colSums,
|
|
|
|
double *y1, double *L, double *vecS,
|
|
|
|
double *loss) {
|
2019-09-12 16:42:28 +00:00
|
|
|
int i, j, k, N = n * (n - 1) / 2;
|
|
|
|
double l;
|
|
|
|
|
|
|
|
for (i = 0; i < n; ++i) {
|
2019-10-18 07:06:36 +00:00
|
|
|
y1[i] = Y[i];
|
|
|
|
L[i] = Y[i] * Y[i];
|
2019-09-12 16:42:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
for (k = j = 0; j < n; ++j) {
|
|
|
|
for (i = j + 1; i < n; ++i, ++k) {
|
2019-10-18 07:06:36 +00:00
|
|
|
y1[i] += Y[j] * vecW[k];
|
|
|
|
y1[j] += Y[i] * vecW[k];
|
|
|
|
L[i] += Y[j] * Y[j] * vecW[k];
|
|
|
|
L[j] += Y[i] * Y[i] * vecW[k];
|
2019-09-12 16:42:28 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-10-18 07:06:36 +00:00
|
|
|
for (i = 0; i < n; ++i) {
|
|
|
|
y1[i] /= colSums[i];
|
|
|
|
L[i] /= colSums[i];
|
|
|
|
}
|
|
|
|
|
2019-09-12 16:42:28 +00:00
|
|
|
l = 0.0;
|
|
|
|
for (i = 0; i < n; ++i) {
|
|
|
|
l += (L[i] -= y1[i] * y1[i]);
|
|
|
|
}
|
|
|
|
*loss = l / (double)n;
|
|
|
|
|
|
|
|
for (k = j = 0; j < n; ++j) {
|
|
|
|
for (i = j + 1; i < n; ++i, ++k) {
|
|
|
|
l = Y[j] - y1[i];
|
|
|
|
vecS[k] = (L[i] - (l * l)) / colSums[i];
|
|
|
|
l = Y[i] - y1[j];
|
|
|
|
vecS[k] += (L[j] - (l * l)) / colSums[j];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for (k = 0; k < N; ++k) {
|
|
|
|
vecS[k] *= vecW[k] * vecD[k];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-10-18 07:06:36 +00:00
|
|
|
void grad(const int n, const int p, const int q,
|
|
|
|
const double *X,
|
|
|
|
const double *X_diff,
|
|
|
|
const double *Y,
|
|
|
|
const double *V,
|
|
|
|
const double h,
|
|
|
|
double *G, double *loss) {
|
2019-09-12 16:42:28 +00:00
|
|
|
// Number of X_i to X_j not trivial pairs.
|
|
|
|
int i, N = (n * (n - 1)) / 2;
|
|
|
|
double scale = -0.5 / h;
|
|
|
|
|
|
|
|
if (X_diff == (void*)0) {
|
|
|
|
// TODO: ...
|
|
|
|
}
|
|
|
|
|
|
|
|
// Allocate and compute projection matrix `Q = I_p - V * V^T`
|
|
|
|
double *Q = (double*)malloc(p * p * sizeof(double));
|
|
|
|
nullProj(V, p, q, Q);
|
|
|
|
|
|
|
|
// allocate and compute vectorized distance matrix with a temporary
|
|
|
|
// projection of `X_diff`.
|
|
|
|
double *vecD = (double*)malloc(N * sizeof(double));
|
|
|
|
double *X_proj;
|
|
|
|
if (p < 5) { // TODO: refine that!
|
|
|
|
X_proj = (double*)malloc(N * 5 * sizeof(double));
|
|
|
|
} else {
|
|
|
|
X_proj = (double*)malloc(N * p * sizeof(double));
|
|
|
|
}
|
|
|
|
matrixprod(X_diff, N, p, Q, p, p, X_proj);
|
|
|
|
rowSquareSums(X_proj, N, p, vecD);
|
|
|
|
|
|
|
|
// Apply kernel to distence vector for weights computation.
|
2019-10-18 07:06:36 +00:00
|
|
|
double *vecK = X_proj; // reuse memory area, no longer needed.
|
2019-09-12 16:42:28 +00:00
|
|
|
for (i = 0; i < N; ++i) {
|
2019-10-18 07:06:36 +00:00
|
|
|
vecK[i] = gaussKernel(vecD[i], scale);
|
2019-09-12 16:42:28 +00:00
|
|
|
}
|
|
|
|
double *colSums = X_proj + N; // still allocated!
|
2019-10-18 07:06:36 +00:00
|
|
|
rowSumsSymVec(vecK, n, 1.0, colSums); // rowSums = colSums cause Sym
|
2019-09-12 16:42:28 +00:00
|
|
|
|
|
|
|
// compute weighted responces of first end second momontum, aka y1, y2.
|
|
|
|
double *y1 = X_proj + N + n;
|
|
|
|
double *L = X_proj + N + (2 * n);
|
|
|
|
// Allocate X_diff scaling vector `vecS`, not in `X_proj` mem area because
|
|
|
|
// used symultanious to X_proj in final gradient computation.
|
|
|
|
double *vecS = (double*)malloc(N * sizeof(double));
|
2019-10-18 07:06:36 +00:00
|
|
|
weightedYandLoss(n, Y, vecD, vecK, colSums, y1, L, vecS, loss);
|
2019-09-12 16:42:28 +00:00
|
|
|
|
|
|
|
// compute the gradient using X_proj for intermidiate scaled X_diff.
|
|
|
|
rowSweep(X_diff, N, p, "*", vecS, X_proj);
|
|
|
|
// reuse Q which has the required dim (p, p).
|
|
|
|
crossprod(X_diff, N, p, X_proj, N, p, Q);
|
|
|
|
// Product with V
|
|
|
|
matrixprod(Q, p, p, V, p, q, G);
|
|
|
|
// And final scaling (TODO: move into matrixprod!)
|
|
|
|
scale = -2.0 / (((double)n) * h * h);
|
|
|
|
N = p * q;
|
|
|
|
for (i = 0; i < N; ++i) {
|
|
|
|
G[i] *= scale;
|
|
|
|
}
|
|
|
|
|
|
|
|
free(vecS);
|
|
|
|
free(X_proj);
|
|
|
|
free(vecD);
|
|
|
|
free(Q);
|
|
|
|
}
|