#include // for R_CheckUserInterrupt #include "cve.h" void cve(const mat *X, const mat *Fy, const double h, const unsigned int method, const double momentum, const double tau_init, const double tol_init, const double slack, const double gamma, const int maxIter, const int attempts, mat *V, mat *L, SEXP logger, SEXP loggerEnv) { // TODO: param and dim. validation. int n = X->nrow, p = X->ncol, q = V->ncol; int attempt = 0, iter; double loss, loss_last, loss_best, err, tau; double tol = tol_init * sqrt((double)(2 * q)); double agility = -2.0 * (1.0 - momentum) / (h * h); double sumK; double c = agility / (double)n; // TODO: check parameters! dim, ... /* Create further intermediate or internal variables. */ mat *lvecD_e = (void*)0; mat *Fy_sq = (void*)0; mat *XV = (void*)0; mat *lvecD = (void*)0; // TODO: combine. aka in-place lvecToSym mat *D = (void*)0; // TODO: combine. aka in-place lvecToSym mat *lvecK = (void*)0; // TODO: combine. aka in-place lvecToSym mat *K = (void*)0; // TODO: combine. aka in-place lvecToSym mat *colSumsK = (void*)0; mat *rowSumsL = (void*)0; mat *W = (void*)0; mat *y1 = (void*)0; mat *y2 = (void*)0; mat *S = (void*)0; mat *tmp1 = (void*)0; mat *tmp2 = (void*)0; mat *G = (void*)0; mat *A = (void*)0; mat *V_tau = (void*)0; mat *V_best = (void*)0; mat *L_best = (void*)0; /* Allocate appropiate amount of working memory. */ int workLen = 2 * (p + 1) * p; if (workLen < n) { workLen = n; } double *workMem = (double*)R_alloc(workLen, sizeof(double)); lvecD_e = rowDiffSquareSums(X, lvecD_e); Fy_sq = hadamard(1.0, Fy, Fy, 0.0, Fy_sq); do { /* (Re)set learning rate. */ tau = tau_init; /* Check if start value for `V` was supplied. */ if (attempts > 0) { /* Sample start value from stiefel manifold. */ V = rStiefel(p, q, V, workMem); } /* Embed X_i's in V space */ XV = matrixprod(1.0, X, V, 0.0, XV); /* Compute embedded distances */ lvecD = lincomb(1.0, lvecD_e, -1.0, rowDiffSquareSums(XV, lvecD)); /* Apply kernel to distances. */ lvecK = applyKernel(lvecD, h, gauss, lvecK); /* Transform lower vectors lvecD, lvecK into sym. matrices. */ D = lvecToSym(lvecD, 0.0, D); K = lvecToSym(lvecK, 1.0, K); /* Compute column sums of kernel matrix K */ colSumsK = colSums(K, colSumsK); /* Normalize K columns to obtain weight matrix W */ W = colApply(K, '/', colSumsK, W); /* first and second order weighted responces */ y1 = matrixprod(1.0, W, Fy, 0.0, y1); y2 = matrixprod(1.0, W, Fy_sq, 0.0, y2); /* Compute losses */ L = hadamard(-1.0, y1, y1, 1.0, copy(y2, L)); /* Compute initial loss */ if (method == weighted) { colSumsK = elemApply(colSumsK, '-', 1.0, colSumsK); sumK = sum(colSumsK); if (L->ncol == 1) { loss_last = dot(L, '*', colSumsK) / sumK; } else { loss_last = dot(rowSums(L, rowSumsL), '*', colSumsK) / sumK; } c = agility / sumK; /* Calculate the scaling matrix S */ S = laplace(adjacence(L, Fy, y1, D, K, gauss, S), workMem); } else { /* simple */ loss_last = mean(L); /* Calculate the scaling matrix S */ S = laplace(adjacence(L, Fy, y1, D, W, gauss, S), workMem); } /* Gradient */ tmp1 = matrixprod(1.0, S, X, 0.0, tmp1); tmp2 = crossprod(1.0, X, tmp1, 0.0, tmp2); G = matrixprod(c, tmp2, V, 0.0, G); if (logger) { callLogger(logger, loggerEnv, attempt, /* iter <- 0L */ -1, L, V, G, loss_last, /* err <- NA */ -1.0, tau); } /* Compute Skew-Symmetric matrix `A` used in Cayley transform. * `A <- tau * (G V^T - V G^T) + 0 * A`*/ A = skew(tau, G, V, 0.0, A); for (iter = 0; iter < maxIter; ++iter) { /* Before Starting next iteration check if the Uer has requested an * interupt (aka. ^C, or "Stop" button). * If interrupted the algorithm will be exited here and everything * will be discharted! */ R_CheckUserInterrupt(); /* Move `V` along the gradient direction. */ V_tau = cayleyTransform(A, V, V_tau, workMem); /* Embed X_i's in V space */ XV = matrixprod(1.0, X, V_tau, 0.0, XV); /* Compute embedded distances */ lvecD = lincomb(1.0, lvecD_e, -1.0, rowDiffSquareSums(XV, lvecD)); /* Apply kernel to distances. */ lvecK = applyKernel(lvecD, h, gauss, lvecK); /* Transform lower vectors lvecD, lvecK into sym. matrices. */ D = lvecToSym(lvecD, 0.0, D); K = lvecToSym(lvecK, 1.0, K); /* Compute column sums of kernel matrix K */ colSumsK = colSums(K, colSumsK); /* Normalize K columns to obtain weight matrix W */ W = colApply(K, '/', colSumsK, W); /* first and second order weighted responces */ y1 = matrixprod(1.0, W, Fy, 0.0, y1); y2 = matrixprod(1.0, W, Fy_sq, 0.0, y2); /* Compute losses */ L = hadamard(-1.0, y1, y1, 1.0, copy(y2, L)); /* Compute loss */ if (method == weighted) { colSumsK = elemApply(colSumsK, '-', 1.0, colSumsK); sumK = sum(colSumsK); if (L->ncol == 1) { loss = dot(L, '*', colSumsK) / sumK; } else { loss = dot(rowSums(L, rowSumsL), '*', colSumsK) / sumK; } } else { /* simple */ loss = mean(L); } /* Check if step is appropriate, iff not reduce learning rate. */ if ((loss - loss_last) > loss_last * slack) { tau *= gamma; iter -= 1; A = elemApply(A, '*', gamma, A); // scale A by gamma continue; } else { tau /= gamma; } /* Compute error, use workMem. */ err = dist(V, V_tau); /* Shift next step to current step and store loss to last. */ V = copy(V_tau, V); loss_last = loss; if (logger) { callLogger(logger, loggerEnv, attempt, iter, L, V, G, loss, err, tau); } /* Check Break condition. */ if (err < tol || iter + 1 >= maxIter) { break; } if (method == weighted) { /* Calculate the scaling matrix S */ S = laplace(adjacence(L, Fy, y1, D, K, gauss, S), workMem); c = agility / sumK; // n removed previousely } else { /* simple */ /* Calculate the scaling matrix S */ S = laplace(adjacence(L, Fy, y1, D, W, gauss, S), workMem); } /* Gradient */ tmp1 = matrixprod(1.0, S, X, 0.0, tmp1); tmp2 = crossprod(1.0, X, tmp1, 0.0, tmp2); G = matrixprod(c, tmp2, V, momentum, G); /* Compute Skew-Symmetric matrix `A` used in Cayley transform. * `A <- tau * (G V^T - V G^T) + 0 * A`*/ A = skew(tau, G, V, 0.0, A); } /* Check if current attempt improved previous ones */ if (attempt == 0 || loss < loss_best) { loss_best = loss; V_best = copy(V, V_best); L_best = copy(L, L_best); } } while (++attempt < attempts); V = copy(V_best, V); L = copy(L_best, L); }