#include #include #include "sums.h" #include "sweep.h" #include "matrix.h" #include "indexing.h" // TODO: clarify static inline double gaussKernel(const double x, const double scale) { return exp(scale * x * x); } // TODO: mutch potential for optimization!!! static void weightedYandLoss(const int n, const double *Y, const double *vecD, const double *vecW, const double *colSums, double *y1, double *L, double *vecS, double *loss) { int i, j, k, N = n * (n - 1) / 2; double l; for (i = 0; i < n; ++i) { y1[i] = Y[i] / colSums[i]; L[i] = Y[i] * Y[i] / colSums[i]; } for (k = j = 0; j < n; ++j) { for (i = j + 1; i < n; ++i, ++k) { y1[i] += Y[j] * vecW[k] / colSums[i]; y1[j] += Y[i] * vecW[k] / colSums[j]; L[i] += Y[j] * Y[j] * vecW[k] / colSums[i]; L[j] += Y[i] * Y[i] * vecW[k] / colSums[j]; } } l = 0.0; for (i = 0; i < n; ++i) { l += (L[i] -= y1[i] * y1[i]); } *loss = l / (double)n; for (k = j = 0; j < n; ++j) { for (i = j + 1; i < n; ++i, ++k) { l = Y[j] - y1[i]; vecS[k] = (L[i] - (l * l)) / colSums[i]; l = Y[i] - y1[j]; vecS[k] += (L[j] - (l * l)) / colSums[j]; } } for (k = 0; k < N; ++k) { vecS[k] *= vecW[k] * vecD[k]; } } void grad(const int n, const int p, const int q, const double *X, const double *X_diff, const double *Y, const double *V, const double h, double *G, double *loss) { // Number of X_i to X_j not trivial pairs. int i, N = (n * (n - 1)) / 2; double scale = -0.5 / h; if (X_diff == (void*)0) { // TODO: ... } // Allocate and compute projection matrix `Q = I_p - V * V^T` double *Q = (double*)malloc(p * p * sizeof(double)); nullProj(V, p, q, Q); // allocate and compute vectorized distance matrix with a temporary // projection of `X_diff`. double *vecD = (double*)malloc(N * sizeof(double)); double *X_proj; if (p < 5) { // TODO: refine that! X_proj = (double*)malloc(N * 5 * sizeof(double)); } else { X_proj = (double*)malloc(N * p * sizeof(double)); } matrixprod(X_diff, N, p, Q, p, p, X_proj); rowSquareSums(X_proj, N, p, vecD); // Apply kernel to distence vector for weights computation. double *vecW = X_proj; // reuse memory area, no longer needed. for (i = 0; i < N; ++i) { vecW[i] = gaussKernel(vecD[i], scale); } double *colSums = X_proj + N; // still allocated! rowSumsSymVec(vecW, n, 1.0, colSums); // rowSums = colSums cause Sym // compute weighted responces of first end second momontum, aka y1, y2. double *y1 = X_proj + N + n; double *L = X_proj + N + (2 * n); // Allocate X_diff scaling vector `vecS`, not in `X_proj` mem area because // used symultanious to X_proj in final gradient computation. double *vecS = (double*)malloc(N * sizeof(double)); weightedYandLoss(n, Y, vecD, vecW, colSums, y1, L, vecS, loss); // compute the gradient using X_proj for intermidiate scaled X_diff. rowSweep(X_diff, N, p, "*", vecS, X_proj); // reuse Q which has the required dim (p, p). crossprod(X_diff, N, p, X_proj, N, p, Q); // Product with V matrixprod(Q, p, p, V, p, q, G); // And final scaling (TODO: move into matrixprod!) scale = -2.0 / (((double)n) * h * h); N = p * q; for (i = 0; i < N; ++i) { G[i] *= scale; } free(vecS); free(X_proj); free(vecD); free(Q); }