#include "cve.h" /** * Creates a matrix. * * @param nrow number of rows. * @param ncol number of columns. * * @returns an uninitialized `nrow x ncol` matrix. * * @details matrix elements are NOT initialized, its content is "random". */ mat* matrix(const int nrow, const int ncol) { mat* M = (mat*)R_alloc(1, sizeof(mat)); M->nrow = nrow; M->ncol = ncol; M->origin = (void*)0; M->elem = (double*)R_alloc(nrow * ncol, sizeof(double)); return M; } /** * Creates a zero matrix. * * @param nrow number of rows. * @param ncol number of columns. * * @returns a `nrow x ncol` matrix with all elements equal 0. */ mat* zero(const int nrow, const int ncol) { mat* M = matrix(nrow, ncol); /* Set all emements zero */ memset(M->elem, 0, nrow * ncol * sizeof(double)); return M; } /** * Copies elements form `src` to `dest` matrix. * * @param dest (in/out) destination matrix. * @param src source matrix. * * @returns passed dest matrix. */ mat* copy(mat *src, mat *dest) { if (!dest) { dest = matrix(src->nrow, src->ncol); } else if (src->nrow != dest->nrow || src->ncol != dest->ncol) { // TODO: error handling! } memcpy(dest->elem, src->elem, dest->nrow * dest->ncol * sizeof(double)); return dest; } /** * Sum of all elements. * * @param A matrix. * * @returns the sum of elements of `A`. */ double sum(const mat* A) { int i, nn = A->nrow * A->ncol; int nn4 = 4 * (nn / 4); double *a = A->elem; double s = 0.0; for (i = 0; i < nn4; i += 4) { s += a[i] + a[i + 1] + a[i + 2] + a[i + 3]; } for (; i < nn; ++i) { s += a[i]; } return s; } /** * Mean of all elements. * * @param A matrix. * * @returns the mean of elements of `A`. */ double mean(const mat* A) { int i, nn = A->nrow * A->ncol; int nn4 = 4 * (nn / 4); double *a = A->elem; double s = 0.0; for (i = 0; i < nn4; i += 4) { s += a[i] + a[i + 1] + a[i + 2] + a[i + 3]; } for (; i < nn; ++i) { s += a[i]; } return s / (double)nn; } #define CVE_DOT_ALG(op) \ for (i = 0; i < nn4; i += 4) { \ s += ((a[i]) op (b[i])) \ + ((a[i + 1]) op (b[i + 1])) \ + ((a[i + 2]) op (b[i + 2])) \ + ((a[i + 3]) op (b[i + 3])); \ } \ for (; i < nn; ++i) { \ s += (a[i]) op (b[i]); \ } /** * Element-wise sum of `A` and `op(B)`. * * sum(A + B), for op = '+', * sum(A * B), for op = '*', * sum(A - B), for op = '-', * sum(A / B), for op = '/'. * * @param A matrix. * @param op one of '+', '-', '*' or '/'. * @param B matrix of the same size as `A`. * * @returns sum of element-wise products. */ double dot(const mat *A, const char op, const mat *B) { int i, nn = A->nrow * A->ncol; int nn4 = 4 * (nn / 4); double *a = A->elem; double *b = B->elem; double s = 0.0; if (B->nrow * B->ncol != nn) { // TODO: error handling! } switch (op) { case '+': CVE_DOT_ALG(+) break; case '-': CVE_DOT_ALG(-) break; case '*': CVE_DOT_ALG(*) break; case '/': CVE_DOT_ALG(/) break; default: // TODO: error handling! break; } return s; } /** * Computes the sub-space distances according * * dist <- norm(A %*% t(A) - B %*% t(B), 'F') * * @param A semi-orthogonal matrix of dim. `n x m` such that `n < m`. * @param B semi-orthogonal matrix of same dimensions as `A`. * * @details The semi-orthogonality will NOT be validated! * * @returns dist as describet above. */ // TODO: optimize! double dist(const mat *A, const mat *B) { int i, j, k, n = A->nrow, m = A->ncol; double tmp1, tmp2, sum; double *a = A->elem, *b = B->elem; if (A->nrow != B->nrow || A->ncol != B->ncol) { // TODO: error handling! } sum = 0.0; for (i = 0; i < n; ++i) { for (j = 0; j < n; ++j) { tmp1 = tmp2 = 0.0; for (k = 0; k < m; ++k) { tmp1 += a[k * n + i] * a[k * n + j]; tmp2 += b[k * n + i] * b[k * n + j]; } sum += (tmp1 - tmp2) * (tmp1 - tmp2); } } return sqrt(sum); } /** * Sum of squared elements. * * @param A matrix. * * @returns the sum of squared elements of `A`. */ double squareSum(const mat* A) { int i, nn = A->nrow * A->ncol; int nn4 = 4 * (nn / 4); double *a = A->elem; double s = 0.0; for (i = 0; i < nn4; i += 4) { s += (a[i] * a[i]) + (a[i + 1] * a[i + 1]) + (a[i + 2] * a[i + 2]) + (a[i + 3] * a[i + 3]); } for (; i < nn; ++i) { s += a[i] * a[i]; } return s; } /** * Row sums of a matrix `A`. * * @param A matrix of size `n x m`. * @param sums vector of length `n`. * * @returns sums if not NULL, else a new creates vector of length `n`. */ mat* rowSums(const mat *A, mat *sums) { int i, j, block_size, block_size_i, nrow = A->nrow, ncol = A->ncol; const double *a; const double *A_block = A->elem; const double *A_end = A->elem + nrow * ncol; double *sum; if (!sums) { sums = zero(nrow, 1); } sum = sums->elem; if (nrow > CVE_MEM_CHUNK_SIZE) { block_size = CVE_MEM_CHUNK_SIZE; } else { block_size = nrow; } // Iterate `(block_size_i, ncol)` submatrix blocks. for (i = 0; i < nrow; i += block_size_i) { // Reset `A` to new block beginning. a = A_block; // Take block size of eveything left and reduce to max size. block_size_i = nrow - i; if (block_size_i > block_size) { block_size_i = block_size; } // Compute first blocks column, for (j = 0; j < block_size_i; ++j) { sum[j] = a[j]; } // and sum the following columns to the first one. for (a += nrow; a < A_end; a += nrow) { for (j = 0; j < block_size_i; ++j) { sum[j] += a[j]; } } // Step one block forth. A_block += block_size_i; sum += block_size_i; } return sums; } /** * Column sums of a matrix `A`. * * @param A matrix of size `n x m`. * @param sums (out) vector of length `m`. * * @returns sums if not NULL, else a new creates vector of length `m`. * * @details As a vector this the result `sums` must have matrix dim. `m x 1`. * Meaning that the result is a row vector. */ mat* colSums(const mat *A, mat *sums) { int i, j, nrow = A->nrow, ncol = A->ncol; int nrowb = 4 * (nrow / 4); // 4 * floor(nrow / 4) double *a = A->elem; double sum; if (!sums) { sums = matrix(ncol, 1); } for (j = 0; j < ncol; ++j) { sum = 0.0; for (i = 0; i < nrowb; i += 4) { sum += a[i] + a[i + 1] + a[i + 2] + a[i + 3]; } for (; i < nrow; ++i) { sum += a[i]; } sums->elem[j] = sum; a += nrow; } return sums; } /** * Sum of squared row differences. * * d_k = \sum_{l} (X_{i l} - X_{j l})^2 * * For a input matrix `X` of dimension `n x p` the result is a vector of length * `n * (n - 1) / 2` (aka number of all pairs of `n` elements). The relation * between `i, j` and `k` is: * * k = j n + i - j (j + 1) / 2, for 0 <= j < i < n * * @param X input matrix of dim. `n x p`. * @param lvecD (out) output vector of length `n * (n - 1) / 2`. * * @returns lvecD if not NULL, otherwise a new created vector. * * @details this computes the packed vectorized lower triangular part of the * distance matrix of `X`, aka lvec(D). */ mat* rowDiffSquareSums(const mat *X, mat *lvecD) { int i, j, k, l, nrow = X->nrow, ncol = X->ncol; const double *x; double *d, diff; if (!lvecD) { lvecD = zero(nrow * (nrow - 1) / 2, 1); } else if (nrow * (nrow - 1) == 2 * lvecD->nrow && lvecD->ncol == 1) { memset(lvecD->elem, 0, (nrow * (nrow - 1) / 2) * sizeof(double)); } else { // TODO: error handling! } d = lvecD->elem; for (l = 0; l < ncol; ++l) { x = X->elem + l * nrow; for (j = k = 0; j < nrow; ++j) { for (i = j + 1; i < nrow; ++i, ++k) { diff = x[i] - x[j]; d[k] += diff * diff; } } } return lvecD; } #define COLAPPLY_ALG(op) \ for (i = 0; i < nrow; i += block_size) { \ /* Set a, c to block's left upper corner and b to block height */ \ a = A->elem + i; \ b = B->elem + i; \ c = C->elem + i; \ /* Get block size as everyting left, then restrict size. */ \ block_size = nrow - i; \ if (block_size > max_size) { \ block_size = max_size; \ } \ /* remove reminders by 4 from block_size */ \ block_batch_size = 4 * (block_size / 4); \ /* Stay on i-index "height" and move "right" in j-direction */ \ for (j = 0; j < ncol; ++j, a += nrow, c += nrow) { \ /* Iterate over the block column */ \ for (k = 0; k < block_batch_size; k += 4) { \ c[k] = a[k] op b[k]; \ c[k + 1] = a[k + 1] op b[k + 1]; \ c[k + 2] = a[k + 2] op b[k + 2]; \ c[k + 3] = a[k + 3] op b[k + 3]; \ } \ for (; k < block_size; ++k) { \ c[k] = a[k] op b[k]; \ } \ } \ } /* C[, j] = A[, j] op v for each j = 1 to ncol with op as one of +, -, *, / */ /** * Elment-wise operation for each row to vector. * C <- A op B * using recycling (column-major order) with `op` ether '+', '-', '*' or '/'. * * @param A matrix of size `n x m`. * @param op type of operation, ether '+', '-', '*' or '/'. * @param B vector or length `m` to be sweeped over rows. * @param C (in/out) result after row sweep. * * @returns Passed (overwritten) `C` or new created if `C` is NULL. */ mat* colApply(mat *A, const char op, mat *B, mat *C) { int i, j, k, nrow = A->nrow, ncol = A->ncol; int max_size = CVE_MEM_CHUNK_SMALL < nrow ? CVE_MEM_CHUNK_SMALL : nrow; int block_size, block_batch_size; const double * restrict a, * restrict b; double * restrict c; /* Check if B length is the same as count A's rows. */ if (nrow != B->nrow) { // TODO: error handling! } /* Check for existance or shape of C */ if (!C) { C = matrix(nrow, ncol); } else if (C->nrow != nrow || C->ncol != ncol) { // TODO: error handling! } switch (op) { case '+': COLAPPLY_ALG(+) break; case '-': COLAPPLY_ALG(-) break; case '*': COLAPPLY_ALG(*) break; case '/': // TODO: check division by zero COLAPPLY_ALG(/) break; default: // TODO: error handling: break; } return C; } #undef COLAPPLY_ALG /** * Element-wise applies `op` with scalar. * * B <- A + scalar, for op = '+', * B <- A - scalar, for op = '-', * B <- A * scalar, for op = '*', * B <- A / scalar, for op = '/'. * * @param A matrix. * @param op one of '+', '-', '*' or '/'. * @param scalar scalar as right hand side of operation. * @param B matrix of the same size as `A`. May even `A`. */ mat* elemApply(mat *A, const char op, const double scalar, mat *B) { int i, n = A->nrow * A->ncol; double *a, *b; if (!B) { B = matrix(A->nrow, A->ncol); } if (n != B->nrow * B->ncol || (op == '/' && scalar == 0.0)) { // TODO: error handling! } a = A->elem; b = B->elem; switch (op) { case '+': for (i = 0; i < n; ++i) { b[i] = a[i] + scalar; } break; case '-': for (i = 0; i < n; ++i) { b[i] = a[i] - scalar; } break; case '*': for (i = 0; i < n; ++i) { b[i] = a[i] * scalar; } break; case '/': for (i = 0; i < n; ++i) { b[i] = a[i] / scalar; } break; default: break; } return B; } /** * Linear combination of A with B, * B <- alpha A + beta B * * @param alpha scaling factor for `A`. * @param A (in) first matrix. * @param beta scaling factor for `B`. * @param B (in/out) second matrix of the same length as `A`. * * @returns passed vector `B`. * * @details It is NOT allowed to pass NULL. * Also the number of elements must match, but it is legal to pass matrices of * different shape, the result may be unwanted but leagel. */ mat* lincomb(const double alpha, const mat *A, const double beta, mat *B) { int i, nn = A->nrow * A->ncol; int nnb = 4 * (nn / 4); double *restrict a = A->elem, *restrict b = B->elem; if (nn != B->nrow * B->ncol) { // TODO: error handling! } for (i = 0; i < nnb; i += 4) { b[i] = alpha * a[i] + beta * b[i]; b[i + 1] = alpha * a[i + 1] + beta * b[i + 1]; b[i + 2] = alpha * a[i + 2] + beta * b[i + 2]; b[i + 3] = alpha * a[i + 3] + beta * b[i + 3]; } for (; i < nn; ++i) { b[i] = alpha * a[i] + beta * b[i]; } return B; } /** * Matrix matrix product. * C <- alpha * A %*% B + beta * C * * @param alpha scaling factor for product of `A` with `B`. * @param A (in) matrix of dimension `n x k`. * @param B (in) matrix of dimension `k x m`. * @param beta scaling factor for original values in target matrix. * @param C (in/out) matrix of dimension `n x m` or NULL. * * @details if `C` is NULL, a new matrix of dimension `n x m` is created `beta` * is set to 0. * * @returns given `C` or a new created matrix if `C` was NULL or NULL on error. */ mat* matrixprod(const double alpha, const mat *A, const mat *B, double beta, mat* C) { /* Check dimensions. */ if (A->ncol != B->nrow) { // TODO: error handling! } /* Check if `C` is given. */ if (!C) { /* with `beta` zero, DGEMM ensures to overwrite `C`. */ beta = 0.0; /* Create uninitialized matrix `C`. */ C = matrix(A->nrow, B->ncol); } else { /* Check target dimensions. */ if (C->nrow != A->nrow || C->ncol != B->ncol) { // TODO: error handling! } } /* DGEMM ... Dence GEneralized Matrix Matrix level 3 BLAS routine */ F77_NAME(dgemm)("N", "N", &(A->nrow), &(B->ncol), &(A->ncol), &alpha, A->elem, &(A->nrow), B->elem, &(B->nrow), &beta, C->elem, &(C->nrow)); return C; } /** * Cross-product. * C <- alpha * t(A) %*% B + beta * C * C <- alpha * crossprod(A, B) + beta * C * * @param alpha scaling factor for product of `A` with `B`. * @param A (in) matrix of dimension `k x n`. * @param B (in) matrix of dimension `k x m`. * @param beta scaling factor for original values in target matrix. * @param C (in/out) matrix of dimension `n x m` or NULL. * * @details if `C` is NULL, a new matrix of dimension `n x m` is created `beta` * is set to 0. * * @returns given `C` or a new created matrix if `C` was NULL or NULL on error. */ mat* crossprod(const double alpha, const mat *A, const mat *B, double beta, mat* C) { /* Check dimensions. */ if (A->nrow != B->nrow) { // TODO: error handling! } /* Check if `C` is given. */ if (!C) { /* with `beta` zero, DGEMM ensures to overwrite `C`. */ beta = 0.0; /* Create uninitialized matrix `C`. */ C = matrix(A->ncol, B->ncol); } else { /* Check target dimensions. */ if (C->nrow != A->ncol || C->ncol != B->ncol) { // TODO: error handling! } } /* DGEMM ... Dence GEneralized Matrix Matrix level 3 BLAS routine */ F77_NAME(dgemm)("T", "N", &(A->ncol), &(B->ncol), &(A->nrow), &alpha, A->elem, &(A->nrow), B->elem, &(B->nrow), &beta, C->elem, &(C->nrow)); return C; } /** * Hadamard product (Schur product or element-wise product). * * C <- alpha * A * B + beta * C * * @param alpha scaling factor for `A o B`. * @param A matrix. * @param B matrix of same dimensions as `A`. * @param beta scaling factor for `C`. * @param C (in/out) matrix of same dimensions as `A` or NULL. * * @returns Passed C, or if C is NULL a new created matrix. */ mat* hadamard(const double alpha, const mat* A, const mat* B, double beta, mat* C) { int i, nn = A->nrow * A->ncol; double *a, *b, *c; if (A->nrow != B->nrow || A->ncol != B->ncol) { // TODO: error handling! } if (!C) { beta = 0.0; C = zero(A->nrow, A->ncol); } else if (A->nrow != C->nrow || A->ncol != C->ncol) { // TOD0: error handling! } a = A->elem; b = B->elem; c = C->elem; if (alpha == 1.0 && beta == 0.0) { for (i = 0; i < nn; ++i) { c[i] = a[i] * b[i]; } } else { for (i = 0; i < nn; ++i) { c[i] = alpha * a[i] * b[i] + beta * c[i]; } } return C; } /** * Skew-Symmetric rank-2 matrixprod: * * C <- alpha * (A %*% t(B) - B %*% t(A)) + beta * C * * @param alpha first scaling factor. * @param A (in) matrix of dimension `n x k`. * @param B (in) matrix of dimension `n x k`. * @param beta scaling factor for original values in target matrix. * @param C (in/out) matrix of dimension `n x n` or NULL. * * @details if `C` is NULL, a new matrix of dimension `n x n` is created, `beta` * is set to 0. * * @returns given `C` or a new created matrix if `C` was NULL or NULL on error. */ mat* skew(double alpha, mat *A, mat *B, double beta, mat *C) { if (A->nrow != B->nrow || A->ncol != B->ncol) { // TODO: error handling! } if (!C) { beta = 0.0; C = matrix(A->nrow, A->nrow); } else if (C->nrow != A->nrow || C->ncol != A->nrow) { // TODO: error handling! } F77_NAME(dgemm)("N", "T", &(C->nrow), &(C->ncol), &(A->ncol), &alpha, A->elem, &(A->nrow), B->elem, &(B->nrow), &beta, C->elem, &(C->nrow)); alpha *= -1.0; beta = 1.0; F77_NAME(dgemm)("N", "T", &(C->nrow), &(C->ncol), &(B->ncol), &alpha, B->elem, &(B->nrow), A->elem, &(A->nrow), &beta, C->elem, &(C->nrow)); return C; } /** * Integer square root. * isqrt(n) = floor(sqrt(n)) * which is the biggest positive integer such that its square is smaller or * equal to `n`. * * @param n positive integer. * * @returns integer square root of `n`. */ static int isqrt(const int n) { int x = 1, y; if (n < 0) { // TODO: error handling! } else if (n < 4) { return 1; } else if (n < 9) { // Only required for n <= 4. return 2; } while (1) { y = (x + n / x) / 2; if (x == y || x + 1 == y) { return x; } x = y; } } /** * Vector to symmetric matrix transform. * * Given a vector `A` of length `N = n * (n - 1) / 2` a symmetric matrix `B` * of dimension `n x n` is created such that (0-indexed) * * b_{i j} = b_{j i} = a_k, for k = j * n + i - j * (j + 1) / 2 * * with 0 <= j < i < n. The main diagonal elements are set to `diag`. * * @example The vector `A = (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)^T` with * `diag = -1` results in * * ( -1 0 1 2 3 ) * ( 0 -1 4 5 6 ) * B = ( 1 4 -1 7 8 ) * ( 2 5 7 -1 9 ) * ( 3 6 8 9 -1 ) * * @param A (in) source vector of length `N = n * (n - 1) / 2`. * @param B (in/out) destination matrix of dim. `n x n`. * @param diag value of all diagonal elements. * * @returns passed matrix `B` or new created if `B` is NULL. */ mat* lvecToSym(const mat* A, const double diag, mat* B) { int i, j, k, l, n, N = A->nrow; int max_size, block_size; double *a, *b, *L, *U; if (!B) { n = (1 + isqrt(1 + 8 * N)) / 2; if (n * (n - 1) != 2 * N) { // TODO: error handling! } B = matrix(n, n); } else { n = B->nrow; if (n * (n - 1) != 2 * N || n != B->ncol) { // TODO: error handling! } } /* Copy consecutive values of `A` to lower triangular part of `B`. */ a = A->elem; for (j = k = 0; j < n; ++j) { b = B->elem + j * n; b[j] = diag; for (i = j + 1; i < n; ++i, ++k) { b[i] = a[k]; } } /* Mirror along the main diagonal, aka transposed copy of lower to upper. */ b = B->elem; max_size = BLOCK_SIZE * (n / BLOCK_SIZE); /* Iterate over blocked columns */ for (j = 0; j < n; j += BLOCK_SIZE) { /* determine height of the block (for reminding partial blocks) */ block_size = j < max_size ? BLOCK_SIZE : (n % BLOCK_SIZE); /* Set diagonal block (left upper corner on the main diagonal) */ L = b + (j * n + j); // diagonal block. /* Transpose block on main diagonal */ for (l = 0; l < block_size; ++l) { for (k = l + 1; k < block_size; ++k) { L[k * n + l] = L[l * n + k]; } } /* Iterate complete blocks below previous halve block */ for (i = j + BLOCK_SIZE; i < n; i += BLOCK_SIZE) { /* determine height of the block (for reminding partial blocks) */ block_size = i < max_size ? BLOCK_SIZE : (n % BLOCK_SIZE); /* Set lower (L, below diag) and upper (U, above diag) blocks */ L = b + j * n + i; U = b + i * n + j; /* Transpose lower (L) to upper (U) complete block. */ for (l = 0; l < BLOCK_SIZE; ++l) { for (k = 0; k < block_size; ++k) { U[k * n + l] = L[l * n + k]; } } } } return B; } /** * In place transformation into symmetric matrix as * A <- diag(colSums(A + t(A))) - (A + t(A)) * * @param A (in/out) square matrix. * @param workMem (out) double[n] vector as working memory. Will be storing the * diagonal elements of the result matrix. Must have at least length `n`. * * @returns laplace transformed input `A`. * * @details The workMem parameter is required as working memory. */ mat* laplace(mat *A, double *workMem) { int i, j, k, l, n = A->nrow; int max_size = BLOCK_SIZE * (n / BLOCK_SIZE); int block_size; double *a = A->elem; double *L, *U; /* Check if A is square */ if (n != A->ncol) { // TODO: error handling! } /* Ckeck if working memory is supplied. */ if (!workMem) { workMem = (double*)R_alloc(n, sizeof(double)); } /* init working memory */ memset(workMem, 0, n * sizeof(double)); /* Iterate over column slices */ for (j = 0; j < n; j += BLOCK_SIZE) { /* determine height of the block (for reminding partial blocks) */ block_size = j < max_size ? BLOCK_SIZE : (n % BLOCK_SIZE); /* Set diagonal block (left upper corner on the main diagonal) */ L = a + (j * n + j); // D block in description. /* Transpose block on main diagonal */ for (l = 0; l < block_size; ++l) { for (k = l + 1; k < block_size; ++k) { workMem[j + l] += L[l * n + k] = L[k * n + l] = -(L[l * n + k] + L[k * n + l]); } } /* Iterate complete blocks below previous halve block */ for (i = j + BLOCK_SIZE; i < n; i += BLOCK_SIZE) { /* determine height of the block (for reminding partial blocks) */ block_size = i < max_size ? BLOCK_SIZE : (n % BLOCK_SIZE); /* Set lower (L, below diag) and upper (U, above diag) blocks */ L = a + j * n + i; U = a + i * n + j; /* Transpose lower (L) to upper (U) complete block. */ for (l = 0; l < BLOCK_SIZE; ++l) { for (k = 0; k < block_size; ++k) { workMem[j + l] += L[l * n + k] = U[k * n + l] = -(L[l * n + k] + U[k * n + l]); } } } } /* Sum remaining upper diagonal column sum parts and set diagonal */ for (j = 0; j < n; ++j) { for (i = 0; i < j; ++i) { workMem[j] += a[i]; } a[j] = -workMem[j]; a += n; } return A; } /** Cayley transformation of matrix `B` using a Skew-Symmetric matrix `A`. * * C = (I + A)^-1 (I - A) B * * by solving the following linear equation: * * (I + A) C = (I - A) B ==> C = (I + A)^-1 (I - A) B * \_____/ \_____/ * IpA C = ImA B * \_______/ * IpA C = Y ==> C = IpA^-1 Y * * @param A Skew-Symmetric matrix of dimension `(n, n)`. * @param B Matrix of dimensions `(n, m)` with `m <= n`. * @param C Matrix of dimensions `(n, m)` with `m <= n`. * @param workMem working memory array of length at least `2 p^2 + p`. * @return Transformed matrix `C`. * @note This opperation is equivalent to the R expression: * solve(diag(1, n) + A) %*% (diag(1, n) - A) %*% B * or * solve(diag(1, n) + A, (diag(1, n) - A) %*% B) */ mat* cayleyTransform(mat *A, mat *B, mat *C, double *workMem) { int i, info, pp = A->nrow * A->nrow; double zero = 0.0, one = 1.0; // TODO: validate dimensions!!! // TODO: calrify a bit ^^ if (!C) { C = matrix(B->nrow, B->ncol); } else if (B->nrow != C->nrow || B->ncol != C->ncol) { // TODO: error handling! } /* Allocate row permutation array used by `dgesv` */ int *ipiv = (int*)workMem; /* NOTE: workMem offset, NOT ipiv offset! There may be a bit space left out * but the working memory is required elsewhere anyway. It's impotant to * have an appropriate beginning cause if may occure the case that the * memory addressing faily due to size differences between int and double * leading to an illegal double* address. */ double *IpA = workMem + A->nrow; /* Create Matrix IpA = I + A (I plus A) */ memcpy(IpA, A->elem, A->nrow * A->ncol * sizeof(double)); for (i = 0; i < pp; i += A->nrow + 1) { IpA[i] += 1.0; // +1 to diagonal elements. } /* Create Matrix ImA = I - A (I minus A) */ double *ImA = IpA + pp; double *a = A->elem; for (i = 0; i < pp; ++i) { ImA[i] = -a[i]; } for (i = 0; i < pp; i += A->nrow + 1) { ImA[i] += 1.0; // +1 to diagonal elements. } /* Y as matrix-matrix product of ImA and B: * Y = 1 * ImA * B + 0 * Y */ F77_CALL(dgemm)("N", "N", &(A->nrow), &(B->ncol), &(A->nrow), &one, ImA, &(A->nrow), B->elem, &(B->nrow), &zero, C->elem, &(C->nrow)); /* Solve system IpA Y = C for Y (and store result in C). * aka. C = IpA^-1 X */ F77_CALL(dgesv)(&(A->nrow), &(B->ncol), IpA, &(A->nrow), ipiv, C->elem, &(A->nrow), &info); if (info) { error("[ cayleyTransform ] error in dgesv - info %d", info); } return C; }