@@ -0,0 +1,26 @@ | |||
LaTeX/ | |||
**/data/* | |||
**/results/* | |||
.vscode/ | |||
# Generated man files in R package (auto generated by Roxygen) | |||
NNSDR/man/ | |||
# Large Files / Data Files | |||
*.png | |||
*.csv | |||
*.Rdata | |||
*.zip | |||
*.tar.gz | |||
*.BAK | |||
# LaTeX - build/database/... files | |||
*.log | |||
*.aux | |||
*.bbl | |||
*.blg | |||
*.out |
@@ -0,0 +1,13 @@ | |||
Package: NNSDR | |||
Type: Package | |||
Title: Fusing Neuronal Networks with Sufficient Dimension Reduction | |||
Version: 0.1 | |||
Date: 2021-04-19 | |||
Author: Daniel Kapla [aut] | |||
Maintainer: Daniel Kapla <daniel@kapla.at> | |||
Description: Compinint the Outer Product of Gradients (OPG) with Neuronal Networks | |||
for estimation of the mean subspace. | |||
License: GPL-3 | |||
Depends: methods, tensorflow (>= 2.2.0) | |||
Encoding: UTF-8 | |||
RoxygenNote: 7.0.2 |
@@ -0,0 +1,18 @@ | |||
# Generated by roxygen2: do not edit by hand | |||
S3method(coef,nnsdr) | |||
S3method(summary,nnsdr) | |||
export(dataset) | |||
export(dist.grassmann) | |||
export(dist.subspace) | |||
export(get.script) | |||
export(nnsdr) | |||
export(parse.args) | |||
export(reinitialize_weights) | |||
export(reset_optimizer) | |||
exportClasses(nnsdr) | |||
import(methods) | |||
import(stats) | |||
import(tensorflow) | |||
importFrom(stats,rbinom) | |||
importFrom(stats,rnorm) |
@@ -0,0 +1,8 @@ | |||
#' Mean Subspace estimation with Neural Nets | |||
#' | |||
#' Package for simulations using Neural Nets for Mean Subspace estimation. | |||
#' | |||
#' @author Daniel Kapla | |||
#' | |||
#' @docType package | |||
"_PACKAGE" |
@@ -0,0 +1,18 @@ | |||
#' Extracts the OPG or refined reduction coefficients from an nnsdr class instance | |||
#' | |||
#' @param object nnsdr class instance | |||
#' @param type specifies if the OPG or Refinement estimate is requested. | |||
#' One of `Refinement` or `OPG`, default is `Refinement`. | |||
#' @param ... ignored. | |||
#' | |||
#' @return Matrix | |||
#' | |||
#' @method coef nnsdr | |||
#' @export | |||
coef.nnsdr <- function(object, type, ...) { | |||
if (missing(type)) { | |||
object$coef() | |||
} else { | |||
object$coef(type) | |||
} | |||
} |
@@ -0,0 +1,287 @@ | |||
#' Multivariate Normal Distribution. | |||
#' | |||
#' Random generation for the multivariate normal distribution. | |||
#' \deqn{X \sim N_p(\mu, \Sigma)}{X ~ N_p(\mu, \Sigma)} | |||
#' | |||
#' @param n number of samples. | |||
#' @param mu mean | |||
#' @param sigma covariance matrix. | |||
#' | |||
#' @return a \eqn{n\times p}{n x p} matrix with samples in its rows. | |||
#' | |||
#' @examples | |||
#' NNSDR:::rmvnorm(20, sigma = matrix(c(2, 1, 1, 2), 2)) | |||
#' NNSDR:::rmvnorm(20, mu = c(3, -1, 2)) | |||
#' | |||
#' @keywords internal | |||
rmvnorm <- function(n = 1, mu = rep(0, p), sigma = diag(p)) { | |||
if (!missing(sigma)) { | |||
p <- nrow(sigma) | |||
} else if (!missing(mu)) { | |||
mu <- matrix(mu, ncol = 1) | |||
p <- nrow(mu) | |||
} else { | |||
stop("At least one of 'mu' or 'sigma' must be supplied.") | |||
} | |||
return(rep(mu, each = n) + matrix(rnorm(n * p), n) %*% chol(sigma)) | |||
} | |||
#' Multivariate t Distribution. | |||
#' | |||
#' Random generation from multivariate t distribution (student distribution). | |||
#' | |||
#' @param n number of samples. | |||
#' @param mu mean | |||
#' @param sigma a \eqn{k\times k}{k x k} positive definite matrix. If the degree | |||
#' \eqn{\nu} if bigger than 2 the created covariance is | |||
#' \deqn{var(x) = \Sigma\frac{\nu}{\nu - 2}} | |||
#' for \eqn{\nu > 2}. | |||
#' @param df degree of freedom \eqn{\nu}. | |||
#' | |||
#' @return a \eqn{n\times p}{n x p} matrix with samples in its rows. | |||
#' | |||
#' @examples | |||
#' NNSDR:::rmvt(20, c(0, 1), matrix(c(3, 1, 1, 2), 2), 3) | |||
#' NNSDR:::rmvt(20, sigma = matrix(c(2, 1, 1, 2), 2), df = 3) | |||
#' NNSDR:::rmvt(20, mu = c(3, -1, 2), df = 3) | |||
#' | |||
#' @keywords internal | |||
rmvt <- function(n = 1, mu = rep(0, p), sigma = diag(p), df = Inf) { | |||
if (!missing(sigma)) { | |||
p <- nrow(sigma) | |||
} else if (!missing(mu)) { | |||
mu <- matrix(mu, ncol = 1) | |||
p <- nrow(mu) | |||
} else { | |||
stop("At least one of 'mu' or 'sigma' must be supplied.") | |||
} | |||
if (df == Inf) { | |||
Z <- 1 | |||
} else { | |||
Z <- sqrt(df / rchisq(n, df)) | |||
} | |||
return(rmvnorm(n, sigma = sigma) * Z + rep(mu, each = n)) | |||
} | |||
#' Generalized Normal Distribution. | |||
#' | |||
#' Random generation for generalized Normal Distribution. | |||
#' | |||
#' @param n Number of generated samples. | |||
#' @param mu mean. | |||
#' @param alpha first shape parameter. | |||
#' @param beta second shape parameter. | |||
#' | |||
#' @return numeric array of length \eqn{n}. | |||
#' | |||
#' @keywords internal | |||
rgnorm <- function(n = 1, mu = 0, alpha = 1, beta = 1) { | |||
if (alpha <= 0 | beta <= 0) { | |||
stop("alpha and beta must be positive.") | |||
} | |||
lambda <- (1 / alpha)^beta | |||
scales <- qgamma(runif(n), shape = 1 / beta, scale = 1 / lambda)^(1 / beta) | |||
return(scales * ((-1)^rbinom(n, 1, 0.5)) + mu) | |||
} | |||
#' Laplace distribution | |||
#' | |||
#' Random generation for Laplace distribution. | |||
#' | |||
#' @param n Number of generated samples. | |||
#' @param mu mean. | |||
#' @param sd standard deviation. | |||
#' | |||
#' @return numeric array of length \eqn{n}. | |||
#' | |||
#' @keywords internal | |||
rlaplace <- function(n = 1, mu = 0, sd = 1) { | |||
U <- runif(n, -0.5, 0.5) | |||
scale <- sd / sqrt(2) | |||
return(mu - scale * sign(U) * log(1 - 2 * abs(U))) | |||
} | |||
#' Generates test datasets. | |||
#' | |||
#' Provides sample datasets. | |||
#' The general model is given by: | |||
#' \deqn{Y = g(B'X) + \epsilon} | |||
#' | |||
#' @param name One of \code{"M1"}, ..., \code{"M9"}. | |||
#' Alternative just the dataset number 1-9. | |||
#' @param n number of samples. | |||
#' @param p Dimension of random variable \eqn{X}. | |||
#' @param sd standard diviation for error term \eqn{\epsilon}. | |||
#' @param ... Additional parameters only for "M2" (namely \code{pmix} and | |||
#' \code{lambda}), see: below. | |||
#' | |||
#' @return List with elements | |||
#' \itemize{ | |||
#' \item{X}{data, a \eqn{n\times p}{n x p} matrix.} | |||
#' \item{Y}{response.} | |||
#' \item{B}{the dim-reduction matrix} | |||
#' \item{name}{Name of the dataset (name parameter)} | |||
#' } | |||
#' | |||
#' @section M1: | |||
#' The predictors are distributed as | |||
#' \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, \Sigma)} with | |||
#' \eqn{\Sigma_{i, j} = 0.5^{|i - j|}}{\Sigma_ij = 0.5^|i - j|} for | |||
#' \eqn{i, j = 1,..., p} for a subspace dimension of \eqn{k = 1} with a default | |||
#' of \eqn{n = 100} data points. \eqn{p = 20}, | |||
#' \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)}, and \eqn{Y} is | |||
#' given as \deqn{Y = cos(b_1'X) + \epsilon} where \eqn{\epsilon} is | |||
#' distributed as generalized normal distribution with location 0, | |||
#' shape-parameter 0.5, and the scale-parameter is chosen such that | |||
#' \eqn{Var(\epsilon) = 0.5}. | |||
#' @section M2: | |||
#' The predictors are distributed as \eqn{X \sim Z 1_p \lambda + N_p(0, I_p)}{X ~ Z 1_p \lambda + N_p(0, I_p)}. with | |||
#' \eqn{Z \sim 2 Binom(p_{mix}) - 1\in\{-1, 1\}}{Z~2Binom(pmix)-1} where | |||
#' \eqn{1_p} is the \eqn{p}-dimensional vector of one's, for a subspace | |||
#' dimension of \eqn{k = 1} with a default of \eqn{n = 100} data points. | |||
#' \eqn{p = 20}, \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)}, | |||
#' and \eqn{Y} is \deqn{Y = cos(b_1'X) + 0.5\epsilon} where \eqn{\epsilon} is | |||
#' standard normal. | |||
#' Defaults for \code{pmix} is 0.3 and \code{lambda} defaults to 1. | |||
#' @section M3: | |||
#' The predictors are distributed as \eqn{X\sim N_p(0, I_p)}{X~N_p(0, I_p)} | |||
#' for a subspace | |||
#' dimension of \eqn{k = 1} with a default of \eqn{n = 100} data points. | |||
#' \eqn{p = 20}, \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)}, | |||
#' and \eqn{Y} is | |||
#' \deqn{Y = 2 log(|b_1'X| + 2) + 0.5\epsilon} where \eqn{\epsilon} is | |||
#' standard normal. | |||
#' @section M4: | |||
#' The predictors are distributed as \eqn{X\sim N_p(0,\Sigma)}{X~N_p(0,\Sigma)} | |||
#' with \eqn{\Sigma_{i, j} = 0.5^{|i - j|}}{\Sigma_ij = 0.5^|i - j|} for | |||
#' \eqn{i, j = 1,..., p} for a subspace dimension of \eqn{k = 2} with a default | |||
#' of \eqn{n = 100} data points. \eqn{p = 20}, | |||
#' \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)}, | |||
#' \eqn{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / sqrt(6)} | |||
#' and \eqn{Y} is given as \deqn{Y = \frac{b_1'X}{0.5 + (1.5 + b_2'X)^2} + 0.5\epsilon}{Y = (b_1'X) / (0.5 + (1.5 + b_2'X)^2) + 0.5\epsilon} | |||
#' where \eqn{\epsilon} is standard normal. | |||
#' @section M5: | |||
#' The predictors are distributed as \eqn{X\sim U([0,1]^p)}{X~U([0, 1]^p)} | |||
#' where \eqn{U([0, 1]^p)} is the uniform distribution with | |||
#' independent components on the \eqn{p}-dimensional hypercube for a subspace | |||
#' dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points. | |||
#' \eqn{p = 20}, | |||
#' \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)}, | |||
#' \eqn{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / sqrt(6)} | |||
#' and \eqn{Y} is given as \deqn{Y = cos(\pi b_1'X)(b_2'X + 1)^2 + 0.5\epsilon} | |||
#' where \eqn{\epsilon} is standard normal. | |||
#' @section M6: | |||
#' The predictors are distributed as \eqn{X\sim N_p(0, I_p)}{X~N_p(0, I_p)} | |||
#' for a subspace dimension of \eqn{k = 3} with a default of \eqn{n = 200} data | |||
#' point. \eqn{p = 20, b_1 = e_1, b_2 = e_2}, and \eqn{b_3 = e_p}, where | |||
#' \eqn{e_j} is the \eqn{j}-th unit vector in the \eqn{p}-dimensional space. | |||
#' \eqn{Y} is given as \deqn{Y = (b_1'X)^2+(b_2'X)^2+(b_3'X)^2+0.5\epsilon} | |||
#' where \eqn{\epsilon} is standard normal. | |||
#' @section M7: | |||
#' The predictors are distributed as \eqn{X\sim t_3(I_p)}{X~t_3(I_p)} where | |||
#' \eqn{t_3(I_p)} is the standard multivariate t-distribution with 3 degrees of | |||
#' freedom, for a subspace dimension of \eqn{k = 4} with a default of | |||
#' \eqn{n = 200} data points. | |||
#' \eqn{p = 20, b_1 = e_1, b_2 = e_2, b_3 = e_3}, and \eqn{b_4 = e_p}, where | |||
#' \eqn{e_j} is the \eqn{j}-th unit vector in the \eqn{p}-dimensional space. | |||
#' \eqn{Y} is given as \deqn{Y = (b_1'X)(b_2'X)^2+(b_3'X)(b_4'X)+0.5\epsilon} | |||
#' where \eqn{\epsilon} is distributed as generalized normal distribution with | |||
#' location 0, shape-parameter 1, and the scale-parameter is chosen such that | |||
#' \eqn{Var(\epsilon) = 0.25}. | |||
#' | |||
#' @import stats | |||
#' @importFrom stats rnorm rbinom | |||
#' @export | |||
dataset <- function(name = "M1", n = NULL, p = 20, sd = 0.5, ...) { | |||
name <- toupper(name) | |||
if (nchar(name) == 1) { name <- paste0("M", name) } | |||
if (name == "M1") { | |||
if (missing(n)) { n <- 100 } | |||
# B ... `p x 1` | |||
B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, p - 6)), ncol = 1) | |||
X <- rmvnorm(n, sigma = 0.5^abs(outer(1:p, 1:p, FUN = `-`))) | |||
beta <- 0.5 | |||
Y <- cos(X %*% B) + rgnorm(n, 0, | |||
alpha = sqrt(sd^2 * gamma(1 / beta) / gamma(3 / beta)), | |||
beta = beta | |||
) | |||
} else if (name == "M2") { | |||
if (missing(n)) { n <- 100 } | |||
params <- list(...) | |||
pmix <- if (is.null(params$pmix)) { 0.3 } else { params$pmix } | |||
lambda <- if (is.null(params$lambda)) { 1 } else { params$lambda } | |||
# B ... `p x 1` | |||
B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, p - 6)), ncol = 1) | |||
Z <- 2 * rbinom(n, 1, pmix) - 1 | |||
X <- matrix(rep(lambda * Z, p) + rnorm(n * p), n) | |||
Y <- cos(X %*% B) + rnorm(n, 0, sd) | |||
} else if (name == "M3") { | |||
if (missing(n)) { n <- 100 } | |||
# B ... `p x 1` | |||
B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, p - 6)), ncol = 1) | |||
X <- matrix(rnorm(n * p), n) | |||
Y <- 2 * log(2 + abs(X %*% B)) + rnorm(n, 0, sd) | |||
} else if (name == "M4") { | |||
if (missing(n)) { n <- 200 } | |||
# B ... `p x 2` | |||
B <- cbind( | |||
c(rep(1 / sqrt(6), 6), rep(0, p - 6)), | |||
c(rep(c(1, -1), 3) / sqrt(6), rep(0, p - 6)) | |||
) | |||
X <- rmvnorm(n, sigma = 0.5^abs(outer(1:p, 1:p, FUN = `-`))) | |||
XB <- X %*% B | |||
Y <- (XB[, 1]) / (0.5 + (XB[, 2] + 1.5)^2) + rnorm(n, 0, sd) | |||
} else if (name == "M5") { | |||
if (missing(n)) { n <- 200 } | |||
# B ... `p x 2` | |||
B <- cbind( | |||
c(rep(1, 6), rep(0, p - 6)), | |||
c(rep(c(1, -1), 3), rep(0, p - 6)) | |||
) / sqrt(6) | |||
X <- matrix(runif(n * p), n) | |||
XB <- X %*% B | |||
Y <- cos(XB[, 1] * pi) * (XB[, 2] + 1)^2 + rnorm(n, 0, sd) | |||
} else if (name == "M6") { | |||
if (missing(n)) { n <- 200 } | |||
# B ... `p x 3` | |||
B <- diag(p)[, -(3:(p - 1))] | |||
X <- matrix(rnorm(n * p), n) | |||
Y <- rowSums((X %*% B)^2) + rnorm(n, 0, sd) | |||
} else if (name == "M7") { | |||
if (missing(n)) { n <- 400 } | |||
# B ... `p x 4` | |||
B <- diag(p)[, -(4:(p - 1))] | |||
# "R"andom "M"ulti"V"ariate "S"tudent | |||
X <- rmvt(n = n, sigma = diag(p), df = 3) | |||
XB <- X %*% B | |||
Y <- (XB[, 1]) * (XB[, 2])^2 + (XB[, 3]) * (XB[, 4]) | |||
Y <- Y + rlaplace(n, 0, sd) | |||
} else if (name == "M8") { | |||
# see: "Local Linear Forests" <arXiv:1807.11408> | |||
if (missing(n)) { n <- 600 } | |||
if (missing(p)) { p <- 20 } # 10 and 50 in "Local Linear Forests" | |||
if (missing(sd)) { sd <- 5 } # or 20 | |||
B <- diag(1, p, 4) | |||
B[, 4] <- c(0, 0, 0, 2, 1, rep(0, p - 5)) | |||
X <- matrix(runif(n * p), n, p) | |||
XB <- X %*% B | |||
Y <- 10 * sin(pi * XB[, 1] * XB[, 2]) + 20 * (XB[, 3] - 0.5)^2 + 5 * XB[, 4] + rnorm(n, sd = sd) | |||
Y <- as.matrix(Y) | |||
} else if (name == "M9") { | |||
if (missing(n)) { n <- 300 } | |||
X <- matrix(rnorm(n * p), n, p) | |||
Y <- X[, 1] + (0.5 + X[, 2])^2 * rnorm(n) | |||
B <- diag(1, p, 2) | |||
} else { | |||
stop("Got unknown dataset name.") | |||
} | |||
return(list(X = X, Y = as.matrix(Y), B = B, name = name)) | |||
} |
@@ -0,0 +1,24 @@ | |||
#' Grassmann distance | |||
#' | |||
#' @param A,B Basis matrices as representations of elements of the Grassmann | |||
#' manifold. | |||
#' @param is.ortho Boolean to specify if `A` and `B` are semi-orthogonal (if | |||
#' false, both arguments are `qr` decomposed) | |||
#' @param tol passed to `qr`, ignored if `is.ortho` is `true`. | |||
#' | |||
#' @seealso | |||
#' K. Ye and L.-H. Lim (2016) "Schubert varieties and distances between | |||
#' subspaces of different dimensions" <arXiv:1407.0900> | |||
#' | |||
#' @export | |||
dist.grassmann <- function(A, B, is.ortho = FALSE, tol = 1e-7) { | |||
if (!is.ortho) { | |||
A <- qr.Q(qr(A, tol)) | |||
B <- qr.Q(qr(B, tol)) | |||
} else { | |||
A <- as.matrix(A) | |||
B <- as.matrix(B) | |||
} | |||
sqrt(sum(acos(pmin(La.svd(crossprod(A, B), 0L, 0L)$d, 1))^2)) | |||
} |
@@ -0,0 +1,37 @@ | |||
#' Subspace distance | |||
#' | |||
#' @param A,B Basis matrices as representations of elements of the Grassmann | |||
#' manifold. | |||
#' @param is.ortho Boolean to specify if `A` and `B` are semi-orthogonal. If | |||
#' false, the projection matrices are computed as | |||
#' \deqn{P_A = A (A' A)^{-1} A'} | |||
#' otherwise just \eqn{P_A = A A'} since \eqn{A' A} is the identity. | |||
#' @param normalize Boolean to specify if the distance shall be normalized. | |||
#' Meaning, the maximal distance scaled to be \eqn{1} independent of dimensions. | |||
#' | |||
#' @seealso | |||
#' K. Ye and L.-H. Lim (2016) "Schubert varieties and distances between | |||
#' subspaces of different dimensions" <arXiv:1407.0900> | |||
#' | |||
#' @export | |||
dist.subspace <- function (A, B, is.ortho = FALSE, normalize = FALSE) { | |||
if (!is.matrix(A)) A <- as.matrix(A) | |||
if (!is.matrix(B)) B <- as.matrix(B) | |||
if (is.ortho) { | |||
PA <- tcrossprod(A, A) | |||
PB <- tcrossprod(B, B) | |||
} else { | |||
PA <- A %*% solve(t(A) %*% A, t(A)) | |||
PB <- B %*% solve(t(B) %*% B, t(B)) | |||
} | |||
if (normalize) { | |||
rankSum <- ncol(A) + ncol(B) | |||
c <- 1 / sqrt(min(rankSum, 2 * nrow(A) - rankSum)) | |||
} else { | |||
c <- sqrt(2) | |||
} | |||
c * norm(PA - PB, type = "F") | |||
} |
@@ -0,0 +1,12 @@ | |||
#' Gets the `Rscript` file name | |||
#' | |||
#' @note only relevant in scripts (useless in interactive `R` sessions) | |||
#' | |||
#' @return character array of the file name or empty | |||
#' | |||
#' @export | |||
get.script <- function() { | |||
args <- commandArgs() | |||
sub('--file=', '', args[startsWith(args, '--file=')]) | |||
} |
@@ -0,0 +1,279 @@ | |||
Sys.setenv(TF_CPP_MIN_LOG_LEVEL = "3") | |||
#' Build MLP | |||
#' | |||
#' @param input_shapes TODO: | |||
#' @param d TODO: | |||
#' @param name TODO: | |||
#' @param add_reduction TODO: | |||
#' @param hidden_units TODO: | |||
#' @param activation TODO: | |||
#' @param dropout TODO: | |||
#' @param loss TODO: | |||
#' @param optimizer TODO: | |||
#' @param metrics TODO: | |||
#' @param trainable_reduction TODO: | |||
#' | |||
#' @import tensorflow | |||
#' @keywords internal | |||
build.MLP <- function(input_shapes, d, name, add_reduction, | |||
output_shape = 1L, | |||
hidden_units = 512L, | |||
activation = 'relu', | |||
dropout = 0.4, | |||
loss = 'MSE', | |||
optimizer = 'RMSProp', | |||
metrics = NULL, | |||
trainable_reduction = TRUE | |||
) { | |||
K <- tf$keras | |||
inputs <- Map(K$layers$Input, | |||
shape = as.integer(input_shapes), # drops names (concatenate key error) | |||
name = if (is.null(names(input_shapes))) "" else names(input_shapes) | |||
) | |||
mlp_inputs <- if (add_reduction) { | |||
reduction <- K$layers$Dense( | |||
units = d, | |||
use_bias = FALSE, | |||
kernel_constraint = function(w) { # polar projection | |||
lhs <- tf$linalg$sqrtm(tf$matmul(w, w, transpose_a = TRUE)) | |||
tf$transpose(tf$linalg$solve(lhs, tf$transpose(w))) | |||
}, | |||
trainable = trainable_reduction, | |||
name = 'reduction' | |||
)(inputs[[1]]) | |||
c(reduction, inputs[-1]) | |||
} else { | |||
inputs | |||
} | |||
out <- if (length(inputs) == 1) { | |||
mlp_inputs[[1]] | |||
} else { | |||
K$layers$concatenate(mlp_inputs, axis = 1L, name = 'input_mlp') | |||
} | |||
for (i in seq_along(hidden_units)) { | |||
out <- K$layers$Dense(units = hidden_units[i], activation = activation, | |||
name = paste0('hidden', i))(out) | |||
if (dropout > 0) | |||
out <- K$layers$Dropout(rate = dropout, name = paste0('dropout', i))(out) | |||
} | |||
out <- K$layers$Dense(units = output_shape, name = 'output')(out) | |||
mlp <- K$models$Model(inputs = inputs, outputs = out, name = name) | |||
mlp$compile(loss = loss, optimizer = optimizer, metrics = metrics) | |||
mlp | |||
} | |||
#' Base Neuronal Network model class | |||
#' | |||
#' @examples | |||
#' model <- nnsdr$new( | |||
#' input_shapes = list(x = 7L), | |||
#' d = 2L, hidden_units = 128L | |||
#' ) | |||
#' | |||
#' @import methods tensorflow | |||
#' @export nnsdr | |||
#' @exportClass nnsdr | |||
nnsdr <- setRefClass('nnsdr', | |||
fields = list( | |||
config = 'list', | |||
nn.opg = 'ANY', | |||
nn.ref = 'ANY', | |||
history.opg = 'ANY', | |||
history.ref = 'ANY', | |||
B.opg = 'ANY', | |||
B.ref = 'ANY', | |||
history = function() { | |||
if (is.null(.self$history.opg)) | |||
return(NULL) | |||
history <- data.frame( | |||
.self$history.opg, | |||
model = factor('OPG'), | |||
epoch = seq_len(nrow(.self$history.opg)) | |||
) | |||
if (!is.null(.self$history.ref)) | |||
history <- rbind(history, data.frame( | |||
.self$history.ref, | |||
model = factor('Refinement'), | |||
epoch = seq_len(nrow(.self$history.ref)) | |||
)) | |||
history | |||
} | |||
), | |||
methods = list( | |||
initialize = function(input_shapes, d, output_shape = 1L, ...) { | |||
# Set configuration. | |||
.self$config <- c(list( | |||
input_shapes = input_shapes, | |||
d = as.integer(d), | |||
output_shape = output_shape | |||
), list(...)) | |||
# Build OPG (Step 1) and Refinement (Step 2) Neuronal Networks | |||
.self$nn.opg <- do.call(build.MLP, c(.self$config, list( | |||
name = 'OPG', add_reduction = FALSE | |||
))) | |||
.self$nn.ref <- do.call(build.MLP, c(.self$config, list( | |||
name = 'Refinement', add_reduction = TRUE | |||
))) | |||
# Set initial history field values. If and only if the `history.*` | |||
# fields are `NULL`, then the Nets are NOT trained. | |||
.self$history.opg <- NULL | |||
.self$history.ref <- NULL | |||
# Set (not jet available) reduction estimates | |||
.self$B.opg <- NULL | |||
.self$B.ref <- NULL | |||
}, | |||
fit = function(inputs, output, epochs = 1L, batch_size = 32L, | |||
initializer = c('random', 'fromOPG'), ..., verbose = 0L | |||
) { | |||
if (is.list(inputs)) { | |||
inputs <- Map(tf$cast, as.list(inputs), dtype = 'float32') | |||
} else { | |||
inputs <- list(tf$cast(inputs, dtype = 'float32')) | |||
} | |||
initializer <- match.arg(initializer) | |||
# Check for OPG history (Step 1), if available skip it. | |||
if (is.null(.self$history.opg)) { | |||
# Fit OPG Net and store training history. | |||
hist <- .self$nn.opg$fit(inputs, output, ..., | |||
epochs = as.integer(head(epochs, 1)), | |||
batch_size = as.integer(head(batch_size, 1)), | |||
verbose = as.integer(verbose) | |||
) | |||
.self$history.opg <- as.data.frame(hist$history) | |||
} else if (verbose > 0) { | |||
cat("OPG already trained -> skip OPG training.\n") | |||
} | |||
# Compute OPG estimate of the Reduction matrix 'B'. | |||
# Always compute, different inputs change the estimate. | |||
with(tf$GradientTape() %as% tape, { | |||
tape$watch(inputs[[1]]) | |||
out <- .self$nn.opg(inputs) | |||
}) | |||
G <- as.matrix(tape$gradient(out, inputs[[1]])) | |||
B <- eigen(var(G), symmetric = TRUE)$vectors | |||
B <- B[, 1:.self$config$d, drop = FALSE] | |||
.self$B.opg <- B | |||
# Check for need to initialize the Refinement Net. | |||
if (is.null(.self$history.ref)) { | |||
# Set Reduction layer | |||
.self$nn.ref$get_layer('reduction')$set_weights(list(B)) | |||
# Check initialization (for random keep random initialization) | |||
if (initializer == 'fromOPG') { | |||
# Initialize Refinement Net weights from the OPG Net. | |||
W <- as.array(.self$nn.opg$get_layer('hidden1')$kernel) | |||
W <- rbind( | |||
t(B) %*% W[1:nrow(B), , drop = FALSE], | |||
W[-(1:nrow(B)), , drop = FALSE] | |||
) | |||
b <- as.array(.self$nn.opg$get_layer('hidden1')$bias) | |||
.self$nn.ref$get_layer('hidden1')$set_weights(list(W, b)) | |||
# Get layer names with weights to be initialized from `nn.opg` | |||
# These are the output layer and all hidden layers except the first | |||
layer.names <- Filter(function(name) { | |||
if (name == 'output') { | |||
TRUE | |||
} else if (name == 'hidden1') { | |||
FALSE | |||
} else { | |||
startsWith(name, 'hidden') | |||
} | |||
}, lapply(.self$nn.opg$layers, `[[`, 'name')) | |||
# Copy `nn.opg` weights to `nn.ref` | |||
for (name in layer.names) { | |||
.self$nn.ref$get_layer(name)$set_weights(lapply( | |||
.self$nn.opg$get_layer(name)$weights, as.array | |||
)) | |||
} | |||
} | |||
} else if (verbose > 0) { | |||
cat("Refinement Net already trained -> continue training.\n") | |||
} | |||
# Fit (or continue fitting) the Refinement Net. | |||
hist <- .self$nn.ref$fit(inputs, output, ..., | |||
epochs = as.integer(tail(epochs, 1)), | |||
batch_size = as.integer(tail(batch_size, 1)), | |||
verbose = as.integer(verbose) | |||
) | |||
.self$history.ref <- rbind( | |||
.self$history.ref, | |||
as.data.frame(hist$history) | |||
) | |||
# Extract refined reduction estimate | |||
.self$B.ref <- .self$nn.ref$get_layer('reduction')$get_weights()[[1]] | |||
invisible(NULL) | |||
}, | |||
predict = function(inputs) { | |||
# Issue warning if the Refinement model (Step 2) used for prediction | |||
# is not trained. | |||
if (is.null(.self$history.ref)) | |||
warning('Refinement model not trained.') | |||
if (is.list(inputs)) { | |||
inputs <- Map(tf$cast, as.list(inputs), dtype = 'float32') | |||
} else { | |||
inputs <- list(tf$cast(inputs, dtype = 'float32')) | |||
} | |||
output <- .self$nn.ref(inputs) | |||
if (is.list(output)) { | |||
if (length(output) == 1L) { | |||
as.array(output[[1]]) | |||
} else { | |||
Map(as.array, output) | |||
} | |||
} else { | |||
as.array(output) | |||
} | |||
}, | |||
coef = function(type = c('Refinement', 'OPG')) { | |||
type <- match.arg(type) | |||
if (type == 'Refinement') { | |||
.self$B.ref | |||
} else { | |||
.self$B.opg | |||
} | |||
}, | |||
reset = function(reset = c('both', 'Refinement')) { | |||
reset <- match.arg(reset) | |||
if (reset == 'both') { | |||
reinitialize_weights(.self$nn.opg) | |||
reset_optimizer(.self$nn.opg$optimizer) | |||
.self$history.opg <- NULL | |||
.self$B.opg <- NULL | |||
} | |||
reinitialize_weights(.self$nn.ref) | |||
reset_optimizer(.self$nn.ref$optimizer) | |||
.self$history.ref <- NULL | |||
.self$B.ref <- NULL | |||
}, | |||
summary = function() { | |||
.self$nn.opg$summary() | |||
cat('\n') | |||
.self$nn.ref$summary() | |||
} | |||
) | |||
) | |||
nnsdr$lock('config') |
@@ -0,0 +1,35 @@ | |||
#' Parses script arguments. | |||
#' | |||
#' @param defaults list of default parameters. Names of the provided defaults | |||
#' define the allowed parameters. | |||
#' @param args Arguments to parge, if missing the Rscript params are taken. | |||
#' | |||
#' @return calling script arguments of `Rscript` | |||
#' | |||
#' @export | |||
parse.args <- function(defaults, args) { | |||
if (missing(args)) | |||
args <- commandArgs(trailingOnly = TRUE) | |||
if (length((args)) == 0) | |||
return(defaults) | |||
args <- strsplit(sub('--', '', args, fixed = TRUE), '=') | |||
values <- Map(`[`, args, 2) | |||
names(values) <- unlist(Map(`[`, args, 1)) | |||
if (!all(names(values) %in% names(defaults))) | |||
stop('Found unknown script parameter') | |||
for (i in seq_along(defaults)) { | |||
name <- names(defaults)[i] | |||
if (name %in% names(values)) { | |||
value <- unlist(strsplit(values[[name]], ',', fixed = TRUE)) | |||
value <- eval(call(paste0('as.', typeof(defaults[[name]])), value)) | |||
defaults[[name]] <- value | |||
} | |||
} | |||
defaults | |||
} |
@@ -0,0 +1,42 @@ | |||
#' Re-initialize model weights. | |||
#' | |||
#' An in-place model re-initialization. Intended for simulations to avoid | |||
#' rebuilding the same model architecture for multiple simulation runs. | |||
#' | |||
#' @param model A `keras` model. | |||
#' | |||
#' @seealso https://github.com/keras-team/keras/issues/341 | |||
#' @examples | |||
#' # library(tensorflow) # v2 | |||
#' K <- tf$keras | |||
#' model <- K$models$Sequential(list( | |||
#' K$layers$Dense(units = 7L, input_shape = list(3L)), | |||
#' K$layers$Dense(units = 1L) | |||
#' )) | |||
#' model$compile(loss = 'MSE', optimizer = K$optimizers$RMSprop()) | |||
#' | |||
#' model$weights | |||
#' reinitialize_weights(model) | |||
#' model$weights | |||
#' | |||
#' @export | |||
reinitialize_weights <- function(model) { | |||
for (layer in model$layers) { | |||
# Unwrap wrapped layers. | |||
if (any(endsWith(class(layer), 'Wrapper'))) | |||
layer <- layer$layer | |||
# Re-initialize kernel and bias weight variables. | |||
for (var in layer$weights) { | |||
if (any(grep('/recurrent_kernel:', var$name, fixed = TRUE))) { | |||
var$assign(layer$recurrent_initializer(var$shape, var$dtype)) | |||
} else if (any(grep('/kernel:', var$name, fixed = TRUE))) { | |||
var$assign(layer$kernel_initializer(var$shape, var$dtype)) | |||
} else if (any(grep('/bias:', var$name, fixed = TRUE))) { | |||
var$assign(layer$bias_initializer(var$shape, var$dtype)) | |||
} else { | |||
stop("Unknown initialization for variable ", var$name) | |||
} | |||
} | |||
} | |||
} |
@@ -0,0 +1,39 @@ | |||
#' Reset TensorFlow optimizer. | |||
#' | |||
#' @param optimizer a \pkg{tensorflow} optimizer instance | |||
#' | |||
#' @examples | |||
#' # Create example toy data | |||
#' | |||
#' # library(tensorflow) # v2 | |||
#' K <- tf$keras | |||
#' model <- K$models$Sequential(list( | |||
#' K$layers$Dense(units = 7L, input_shape = list(3L)), | |||
#' K$layers$Dense(units = 1L) | |||
#' )) | |||
#' model$compile(loss = 'MSE', optimizer = 'RMSprop') | |||
#' | |||
#' \donttest{ | |||
#' model$fit(input) # Fit the model | |||
#' } | |||
#' | |||
#' reinitialize_weights(model) | |||
#' reset_optimizer(model$optimizer) | |||
#' | |||
#' \donttest{ | |||
#' model$fit(input) # Fit the model again completely independent of the first fit. | |||
#' } | |||
#' | |||
#' @note Works for Adam, RMSprop properly (other optimizes are not tested!) | |||
#' @note see source and search for `_create_slots` and `add_slot`. | |||
#' @seealso https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/optimizer_v2/optimizer_v2.py | |||
#' @seealso https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/optimizer_v2/rmsprop.py | |||
#' @seealso https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/optimizer_v2/adam.py | |||
#' | |||
#' @export | |||
reset_optimizer <- function(optimizer) { | |||
for (var in optimizer$variables()) { | |||
var$assign(tf$zeros_like(var)) | |||
} | |||
} |
@@ -0,0 +1,12 @@ | |||
#' Summary for the OPG and Refinement model of an nnsdr class instance | |||
#' | |||
#' @param object nnsdr class instance | |||
#' @param ... ignored. | |||
#' | |||
#' @return No return value, prints human readable summary. | |||
#' | |||
#' @method summary nnsdr | |||
#' @export | |||
summary.nnsdr <- function(object, ...) { | |||
object$summary() | |||
} |