library(microbenchmark)

dyn.load("benchmark.so")


## rowSum* .call --------------------------------------------------------------
rowSums.c <- function(M) {
    stopifnot(
        is.matrix(M),
        is.numeric(M)
    )
    if (!is.double(M)) {
        M <- matrix(as.double(M), nrow = nrow(M))
    }

    .Call('R_rowSums', PACKAGE = 'benchmark', M)
}
rowSumsV2.c <- function(M) {
    stopifnot(
        is.matrix(M),
        is.numeric(M)
    )
    if (!is.double(M)) {
        M <- matrix(as.double(M), nrow = nrow(M))
    }

    .Call('R_rowSumsV2', PACKAGE = 'benchmark', M)
}
rowSumsV3.c <- function(M) {
    stopifnot(
        is.matrix(M),
        is.numeric(M)
    )
    if (!is.double(M)) {
        M <- matrix(as.double(M), nrow = nrow(M))
    }

    .Call('R_rowSumsV3', PACKAGE = 'benchmark', M)
}
colSums.c <- function(M) {
    stopifnot(
        is.matrix(M),
        is.numeric(M)
    )
    if (!is.double(M)) {
        M <- matrix(as.double(M), nrow = nrow(M))
    }

    .Call('R_colSums', PACKAGE = 'benchmark', M)
}
rowSquareSums.c <- function(M) {
    stopifnot(
        is.matrix(M),
        is.numeric(M)
    )
    if (!is.double(M)) {
        M <- matrix(as.double(M), nrow = nrow(M))
    }

    .Call('R_rowSquareSums', PACKAGE = 'benchmark', M)
}
rowSumsSymVec.c <- function(vecA, nrow, diag = 0.0) {
    stopifnot(
        is.vector(vecA),
        is.numeric(vecA),
        is.numeric(diag),
        nrow * (nrow - 1) == length(vecA) * 2
    )
    if (!is.double(vecA)) {
        vecA <- as.double(vecA)
    }
    .Call('R_rowSumsSymVec', PACKAGE = 'benchmark',
          vecA, as.integer(nrow), as.double(diag))
}
rowSweep.c <- function(A, v, op = '-') {
    stopifnot(
        is.matrix(A),
        is.numeric(v)
    )
    if (!is.double(A)) {
        A <- matrix(as.double(A), nrow = nrow(A))
    }
    if (!is.vector(v) || !is.double(v)) {
        v <- as.double(v)
    }
    stopifnot(
        nrow(A) == length(v),
        op %in% c('+', '-', '*', '/')
    )

    .Call('R_rowSweep', PACKAGE = 'benchmark', A, v, op)
}

## row*, col* tests ------------------------------------------------------------
n <- 3000
M <- matrix(runif(n * 12), n, 12)
stopifnot(
    all.equal(rowSums(M^2), rowSums.c(M^2)),
    all.equal(colSums(M), colSums.c(M)),
    all.equal(rowSums(M), rowSumsV2.c(M)),
    all.equal(rowSums(M), rowSumsV3.c(M))
)
microbenchmark(
    rowSums     = rowSums(M),
    rowSums.c   = rowSums.c(M),
    rowSumsV2.c = rowSumsV2.c(M),
    rowSumsV3.c = rowSumsV3.c(M)
)
microbenchmark(
    rowSums     = rowSums(M^2),
    rowSums.c   = rowSums.c(M^2),
    rowSqSums.c = rowSquareSums.c(M)
)
microbenchmark(
    colSums   = colSums(M),
    colSums.c = colSums.c(M)
)

sum = rowSums(M)
stopifnot(all.equal(
    sweep(M, 1, sum, FUN = `/`),
    rowSweep.c(M, sum, '/') # Col-Normalize)
), all.equal(
    sweep(M, 1, sum, FUN = `/`),
    M / sum
))
microbenchmark(
    sweep = sweep(M, 1, sum, FUN = `/`),
    M / sum,
    rowSweep.c = rowSweep.c(M, sum, '/') # Col-Normalize)
)

# Create symmetric matrix with constant diagonal entries.
nrow <- 200
diag <- 1.0
Sym <- tcrossprod(runif(nrow))
diag(Sym) <- diag
# Get vectorized lower triangular part of `Sym` matrix.
SymVec <- Sym[lower.tri(Sym)]
stopifnot(all.equal(
    rowSums(Sym),
    rowSumsSymVec.c(SymVec, nrow, diag)
))
microbenchmark(
    rowSums = rowSums(Sym),
    rowSums.c = rowSums.c(Sym),
    rowSumsSymVec.c = rowSumsSymVec.c(SymVec, nrow, diag)
)

## Matrix-Matrix opperation .call ---------------------------------------------
transpose.c <- function(A) {
    stopifnot(
        is.matrix(A), is.numeric(A)
    )
    if (!is.double(A)) {
        A <- matrix(as.double(A), nrow(A), ncol(A))
    }

    .Call('R_transpose', PACKAGE = 'benchmark', A)
}

sympMV.c <- function(vecA, x) {
    stopifnot(
        is.vector(vecA), is.numeric(vecA),
        is.vector(x), is.numeric(x),
        length(x) * (length(x) + 1) == 2 * length(vecA)
    )
    if (!is.double(vecA)) {
        vecA <- as.double(vecA)
    }
    if (!is.double(x)) {
        x <- as.double(x)
    }

    .Call('R_sympMV', PACKAGE = 'benchmark', vecA, x)
}

matrixprod.c <- function(A, B) {
    stopifnot(
        is.matrix(A), is.numeric(A),
        is.matrix(B), is.numeric(B),
        ncol(A) == nrow(B)
    )
    if (!is.double(A)) {
        A <- matrix(as.double(A), nrow = nrow(A))
    }
    if (!is.double(B)) {
        B <- matrix(as.double(B), nrow = nrow(B))
    }

    .Call('R_matrixprod', PACKAGE = 'benchmark', A, B)
}
crossprod.c <- function(A, B) {
    stopifnot(
        is.matrix(A), is.numeric(A),
        is.matrix(B), is.numeric(B),
        nrow(A) == nrow(B)
    )
    if (!is.double(A)) {
        A <- matrix(as.double(A), nrow = nrow(A))
    }
    if (!is.double(B)) {
        B <- matrix(as.double(B), nrow = nrow(B))
    }

    .Call('R_crossprod', PACKAGE = 'benchmark', A, B)
}
kronecker.c <- function(A, B, op = '*') {
    stopifnot(
        is.matrix(A), is.numeric(A),
        is.matrix(B), is.numeric(B),
        is.character(op), op %in% c('*', '+', '/', '-')
    )
    if (!is.double(A)) {
        A <- matrix(as.double(A), nrow = nrow(A))
    }
    if (!is.double(B)) {
        B <- matrix(as.double(B), nrow = nrow(B))
    }

    .Call('R_kronecker', PACKAGE = 'benchmark', A, B, op)
}
skewSymRank2k.c <- function(A, B, alpha = 1, beta = 0) {
    stopifnot(
        is.matrix(A), is.numeric(A),
        is.matrix(B), is.numeric(B),
        all(dim(A) == dim(B)),
        is.numeric(alpha), length(alpha) == 1L,
        is.numeric(beta), length(beta) == 1L
    )
    if (!is.double(A)) {
        A <- matrix(as.double(A), nrow = nrow(A))
    }
    if (!is.double(B)) {
        B <- matrix(as.double(B), nrow = nrow(B))
    }

    .Call('R_skewSymRank2k', PACKAGE = 'benchmark', A, B,
          as.double(alpha), as.double(beta))
}

## Matrix-Matrix opperation tests ---------------------------------------------
n <- 200
k <- 100
m <- 300

A <- matrix(runif(n * k), n, k)
B <- matrix(runif(k * m), k, m)
stopifnot(
    all.equal(t(A), transpose.c(A))
)
microbenchmark(
    t(A),
    transpose.c(A)
)

Sym <- tcrossprod(runif(n))
vecSym <- Sym[lower.tri(Sym, diag = T)]
x <- runif(n)
stopifnot(all.equal(
    as.double(Sym %*% x),
    sympMV.c(vecSym, x)
))
microbenchmark(
    Sym %*% x,
    sympMV.c = sympMV.c(vecSym, x)
)

stopifnot(
    all.equal(A %*% B, matrixprod.c(A, B))
)
microbenchmark(
    "%*%" = A %*% B,
    matrixprod.c = matrixprod.c(A, B)
)

A <- matrix(runif(k * n), k, n)
B <- matrix(runif(k * m), k, m)
stopifnot(
    all.equal(crossprod(A, B), crossprod.c(A, B))
)
microbenchmark(
    crossprod = crossprod(A, B),
    crossprod.c = crossprod.c(A, B)
)

n <- 100L
m <- 12L
p <- 11L
q <- 10L
A <- matrix(runif(n * m), n, m)
B <- matrix(runif(p * q), p, q)

stopifnot(all.equal(
    kronecker(A, B),
    kronecker.c(A, B)
))
microbenchmark(
    kronecker = kronecker(A, B),
    kronecker.c = kronecker.c(A, B)
)

n <- 12
k <- 11
A <- matrix(runif(n * k), n, k)
B <- matrix(runif(n * k), n, k)
stopifnot(all.equal(
    A %*% t(B) - B %*% t(A), skewSymRank2k.c(A, B)
))
microbenchmark(
    A %*% t(B) - B %*% t(A),
    skewSymRank2k.c(A, B)
)

## Orthogonal projection onto null space .Call --------------------------------
nullProj.c <- function(B) {
    stopifnot(
        is.matrix(B), is.numeric(B)
    )
    if (!is.double(B)) {
        B <- matrix(as.double(B), nrow = nrow(B))
    }

    .Call('R_nullProj', PACKAGE = 'benchmark', B)
}
## Orthogonal projection onto null space tests --------------------------------
p <- 12
q <- 10
V <- qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q)))

# Projection matrix onto `span(V)`
Q <- diag(1, p) - tcrossprod(V, V)
stopifnot(
    all.equal(Q, nullProj.c(V))
)
microbenchmark(
    nullProj = diag(1, p) - tcrossprod(V, V),
    nullProj.c = nullProj.c(V)
)

# ## Kronecker optimizations ----------------------------------------------------
# library(microbenchmark)

# dist.1 <- function(X_diff, Q) {
#     rowSums((X_diff %*% Q)^2)
# }
# dist.2 <- function(X, Q) {
#     ones <- rep(1, nrow(X))
#     proj <- X %*% Q
#     rowSums((kronecker(proj, ones) - kronecker(ones, proj))^2)
# }

# n <- 400L
# p <- 12L
# k <- 2L
# q <- p - k

# X <- matrix(rnorm(n * p), n, p)
# Q <- diag(1, p) - tcrossprod(rnorm(p))
# ones <- rep(1, n)
# X_diff <- kronecker(X, ones) - kronecker(ones, X)

# stopifnot(all.equal(dist.1(X_diff, Q), dist.2(X, Q)))

# microbenchmark(
#     dist.1(X_diff, Q),
#     dist.2(X, Q),
#     times = 10L
# )
#     # if (!persistent) {
#     #     pair.index <- elem.pairs(seq(n))
#     #     i <- pair.index[, 1] # `i` indices of `(i, j)` pairs
#     #     j <- pair.index[, 2] # `j` indices of `(i, j)` pairs
#     #     lower <- ((i - 1) * n) + j
#     #     upper <- ((j - 1) * n) + i
#     #     X_diff <- X[i, , drop = F] - X[j, , drop = F]
#     # }

#     # # Projection matrix onto `span(V)`
#     # Q <- diag(1, p) - tcrossprod(V, V)
#     # # Vectorized distance matrix `D`.
#     # vecD <- rowSums((X_diff %*% Q)^2)


# ## WIP for gradient. ----------------------------------------------------------

grad.c <- function(X, X_diff, Y, V, h) {
    stopifnot(
        is.matrix(X), is.double(X),
        is.matrix(X_diff), is.double(X_diff),
            ncol(X_diff) == ncol(X), nrow(X_diff) == nrow(X) * (nrow(X) - 1) / 2,
        is.vector(Y) || (is.matrix(Y) && pmin(dim(Y)) == 1L), is.double(Y),
            length(Y) == nrow(X),
        is.matrix(V), is.double(V),
            nrow(V) == ncol(X),
        is.vector(h), is.numeric(h), length(h) == 1
    )

    .Call('R_grad', PACKAGE = 'benchmark',
          X, X_diff, as.double(Y), V, as.double(h));
}

elem.pairs <- function(elements) {
    # Number of elements to match.
    n <- length(elements)
    # Create all combinations.
    pairs <- rbind(rep(elements, each=n), rep(elements, n))
    # Select unique combinations without self interaction.
    return(pairs[, pairs[1, ] < pairs[2, ]])
}
grad <- function(X, Y, V, h, persistent = TRUE) {
    n <- nrow(X)
    p <- ncol(X)

    if (!persistent) {
        pair.index <- elem.pairs(seq(n))
        i <- pair.index[, 1] # `i` indices of `(i, j)` pairs
        j <- pair.index[, 2] # `j` indices of `(i, j)` pairs
        lower <- ((i - 1) * n) + j
        upper <- ((j - 1) * n) + i
        X_diff <- X[i, , drop = F] - X[j, , drop = F]
    }

    # Projection matrix onto `span(V)`
    Q <- diag(1, p) - tcrossprod(V, V)
    # Vectorized distance matrix `D`.
    vecD <- rowSums((X_diff %*% Q)^2)

    # Create Kernel matrix (aka. apply kernel to distances)
    K <- matrix(1, n, n) # `exp(0) == 1`
    K[lower] <- exp((-0.5 / h) * vecD^2) # Set lower tri. part
    K[upper] <- t(K)[upper] # Mirror lower tri. to upper

    # Weighted `Y` momentums
    colSumsK <- colSums(K)
    y1 <- (K %*% Y) / colSumsK
    y2 <- (K %*% Y^2) / colSumsK
    # Per example loss `L(V, X_i)`
    L <- y2 - y1^2

    # Compute scaling vector `vecS` for `X_diff`.
    tmp <- kronecker(matrix(y1, n, 1), matrix(Y, 1, n), `-`)^2
    tmp <- as.vector(L) - tmp
    tmp <- tmp * K / colSumsK
    vecS <- (tmp + t(tmp))[lower] * vecD

    G <- crossprod(X_diff, X_diff * vecS) %*% V
    G <- (-2 / (n * h^2)) * G
    return(G)
}
rStiefel <- function(p, q) {
    return(qr.Q(qr(matrix(rnorm(p * q, 0, 1), p, q))))
}

n <- 200
p <- 12
q <- 10

X <- matrix(runif(n * p), n, p)
Y <- runif(n)
V <- rStiefel(p, q)
h <- 0.1

pair.index <- elem.pairs(seq(n))
i <- pair.index[1, ] # `i` indices of `(i, j)` pairs
j <- pair.index[2, ] # `j` indices of `(i, j)` pairs
lower <- ((i - 1) * n) + j
upper <- ((j - 1) * n) + i
X_diff <- X[i, , drop = F] - X[j, , drop = F]

stopifnot(all.equal(
    grad(X, Y, V, h),
    grad.c(X, X_diff, Y, V, h)
))
microbenchmark(
    grad = grad(X, Y, V, h),
    grad.c = grad.c(X, X_diff, Y, V, h)
)