diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a2bebd3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,26 @@ + +LaTeX/ + +**/data/* +**/results/* + +.vscode/ + +# Generated man files in R package (auto generated by Roxygen) +NNSDR/man/ + +# Large Files / Data Files +*.pdf +*.png +*.csv +*.Rdata +*.zip +*.tar.gz +*.BAK + +# LaTeX - build/database/... files +*.log +*.aux +*.bbl +*.blg +*.out diff --git a/NNSDR/DESCRIPTION b/NNSDR/DESCRIPTION new file mode 100644 index 0000000..a3f0fa4 --- /dev/null +++ b/NNSDR/DESCRIPTION @@ -0,0 +1,13 @@ +Package: NNSDR +Type: Package +Title: Fusing Neuronal Networks with Sufficient Dimension Reduction +Version: 0.1 +Date: 2021-04-19 +Author: Daniel Kapla [aut] +Maintainer: Daniel Kapla +Description: Compinint the Outer Product of Gradients (OPG) with Neuronal Networks + for estimation of the mean subspace. +License: GPL-3 +Depends: methods, tensorflow (>= 2.2.0) +Encoding: UTF-8 +RoxygenNote: 7.0.2 diff --git a/NNSDR/NAMESPACE b/NNSDR/NAMESPACE new file mode 100644 index 0000000..278c3ee --- /dev/null +++ b/NNSDR/NAMESPACE @@ -0,0 +1,18 @@ +# Generated by roxygen2: do not edit by hand + +S3method(coef,nnsdr) +S3method(summary,nnsdr) +export(dataset) +export(dist.grassmann) +export(dist.subspace) +export(get.script) +export(nnsdr) +export(parse.args) +export(reinitialize_weights) +export(reset_optimizer) +exportClasses(nnsdr) +import(methods) +import(stats) +import(tensorflow) +importFrom(stats,rbinom) +importFrom(stats,rnorm) diff --git a/NNSDR/R/NNSDR.R b/NNSDR/R/NNSDR.R new file mode 100644 index 0000000..883f163 --- /dev/null +++ b/NNSDR/R/NNSDR.R @@ -0,0 +1,8 @@ +#' Mean Subspace estimation with Neural Nets +#' +#' Package for simulations using Neural Nets for Mean Subspace estimation. +#' +#' @author Daniel Kapla +#' +#' @docType package +"_PACKAGE" diff --git a/NNSDR/R/coef_nnsdr.R b/NNSDR/R/coef_nnsdr.R new file mode 100644 index 0000000..9ad734b --- /dev/null +++ b/NNSDR/R/coef_nnsdr.R @@ -0,0 +1,18 @@ +#' Extracts the OPG or refined reduction coefficients from an nnsdr class instance +#' +#' @param object nnsdr class instance +#' @param type specifies if the OPG or Refinement estimate is requested. +#' One of `Refinement` or `OPG`, default is `Refinement`. +#' @param ... ignored. +#' +#' @return Matrix +#' +#' @method coef nnsdr +#' @export +coef.nnsdr <- function(object, type, ...) { + if (missing(type)) { + object$coef() + } else { + object$coef(type) + } +} diff --git a/NNSDR/R/datasets.R b/NNSDR/R/datasets.R new file mode 100644 index 0000000..d8c0019 --- /dev/null +++ b/NNSDR/R/datasets.R @@ -0,0 +1,287 @@ +#' Multivariate Normal Distribution. +#' +#' Random generation for the multivariate normal distribution. +#' \deqn{X \sim N_p(\mu, \Sigma)}{X ~ N_p(\mu, \Sigma)} +#' +#' @param n number of samples. +#' @param mu mean +#' @param sigma covariance matrix. +#' +#' @return a \eqn{n\times p}{n x p} matrix with samples in its rows. +#' +#' @examples +#' NNSDR:::rmvnorm(20, sigma = matrix(c(2, 1, 1, 2), 2)) +#' NNSDR:::rmvnorm(20, mu = c(3, -1, 2)) +#' +#' @keywords internal +rmvnorm <- function(n = 1, mu = rep(0, p), sigma = diag(p)) { + if (!missing(sigma)) { + p <- nrow(sigma) + } else if (!missing(mu)) { + mu <- matrix(mu, ncol = 1) + p <- nrow(mu) + } else { + stop("At least one of 'mu' or 'sigma' must be supplied.") + } + + return(rep(mu, each = n) + matrix(rnorm(n * p), n) %*% chol(sigma)) +} + +#' Multivariate t Distribution. +#' +#' Random generation from multivariate t distribution (student distribution). +#' +#' @param n number of samples. +#' @param mu mean +#' @param sigma a \eqn{k\times k}{k x k} positive definite matrix. If the degree +#' \eqn{\nu} if bigger than 2 the created covariance is +#' \deqn{var(x) = \Sigma\frac{\nu}{\nu - 2}} +#' for \eqn{\nu > 2}. +#' @param df degree of freedom \eqn{\nu}. +#' +#' @return a \eqn{n\times p}{n x p} matrix with samples in its rows. +#' +#' @examples +#' NNSDR:::rmvt(20, c(0, 1), matrix(c(3, 1, 1, 2), 2), 3) +#' NNSDR:::rmvt(20, sigma = matrix(c(2, 1, 1, 2), 2), df = 3) +#' NNSDR:::rmvt(20, mu = c(3, -1, 2), df = 3) +#' +#' @keywords internal +rmvt <- function(n = 1, mu = rep(0, p), sigma = diag(p), df = Inf) { + if (!missing(sigma)) { + p <- nrow(sigma) + } else if (!missing(mu)) { + mu <- matrix(mu, ncol = 1) + p <- nrow(mu) + } else { + stop("At least one of 'mu' or 'sigma' must be supplied.") + } + + if (df == Inf) { + Z <- 1 + } else { + Z <- sqrt(df / rchisq(n, df)) + } + + return(rmvnorm(n, sigma = sigma) * Z + rep(mu, each = n)) +} + +#' Generalized Normal Distribution. +#' +#' Random generation for generalized Normal Distribution. +#' +#' @param n Number of generated samples. +#' @param mu mean. +#' @param alpha first shape parameter. +#' @param beta second shape parameter. +#' +#' @return numeric array of length \eqn{n}. +#' +#' @keywords internal +rgnorm <- function(n = 1, mu = 0, alpha = 1, beta = 1) { + if (alpha <= 0 | beta <= 0) { + stop("alpha and beta must be positive.") + } + lambda <- (1 / alpha)^beta + scales <- qgamma(runif(n), shape = 1 / beta, scale = 1 / lambda)^(1 / beta) + return(scales * ((-1)^rbinom(n, 1, 0.5)) + mu) +} + +#' Laplace distribution +#' +#' Random generation for Laplace distribution. +#' +#' @param n Number of generated samples. +#' @param mu mean. +#' @param sd standard deviation. +#' +#' @return numeric array of length \eqn{n}. +#' +#' @keywords internal +rlaplace <- function(n = 1, mu = 0, sd = 1) { + U <- runif(n, -0.5, 0.5) + scale <- sd / sqrt(2) + + return(mu - scale * sign(U) * log(1 - 2 * abs(U))) +} + +#' Generates test datasets. +#' +#' Provides sample datasets. +#' The general model is given by: +#' \deqn{Y = g(B'X) + \epsilon} +#' +#' @param name One of \code{"M1"}, ..., \code{"M9"}. +#' Alternative just the dataset number 1-9. +#' @param n number of samples. +#' @param p Dimension of random variable \eqn{X}. +#' @param sd standard diviation for error term \eqn{\epsilon}. +#' @param ... Additional parameters only for "M2" (namely \code{pmix} and +#' \code{lambda}), see: below. +#' +#' @return List with elements +#' \itemize{ +#' \item{X}{data, a \eqn{n\times p}{n x p} matrix.} +#' \item{Y}{response.} +#' \item{B}{the dim-reduction matrix} +#' \item{name}{Name of the dataset (name parameter)} +#' } +#' +#' @section M1: +#' The predictors are distributed as +#' \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, \Sigma)} with +#' \eqn{\Sigma_{i, j} = 0.5^{|i - j|}}{\Sigma_ij = 0.5^|i - j|} for +#' \eqn{i, j = 1,..., p} for a subspace dimension of \eqn{k = 1} with a default +#' of \eqn{n = 100} data points. \eqn{p = 20}, +#' \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)}, and \eqn{Y} is +#' given as \deqn{Y = cos(b_1'X) + \epsilon} where \eqn{\epsilon} is +#' distributed as generalized normal distribution with location 0, +#' shape-parameter 0.5, and the scale-parameter is chosen such that +#' \eqn{Var(\epsilon) = 0.5}. +#' @section M2: +#' The predictors are distributed as \eqn{X \sim Z 1_p \lambda + N_p(0, I_p)}{X ~ Z 1_p \lambda + N_p(0, I_p)}. with +#' \eqn{Z \sim 2 Binom(p_{mix}) - 1\in\{-1, 1\}}{Z~2Binom(pmix)-1} where +#' \eqn{1_p} is the \eqn{p}-dimensional vector of one's, for a subspace +#' dimension of \eqn{k = 1} with a default of \eqn{n = 100} data points. +#' \eqn{p = 20}, \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)}, +#' and \eqn{Y} is \deqn{Y = cos(b_1'X) + 0.5\epsilon} where \eqn{\epsilon} is +#' standard normal. +#' Defaults for \code{pmix} is 0.3 and \code{lambda} defaults to 1. +#' @section M3: +#' The predictors are distributed as \eqn{X\sim N_p(0, I_p)}{X~N_p(0, I_p)} +#' for a subspace +#' dimension of \eqn{k = 1} with a default of \eqn{n = 100} data points. +#' \eqn{p = 20}, \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)}, +#' and \eqn{Y} is +#' \deqn{Y = 2 log(|b_1'X| + 2) + 0.5\epsilon} where \eqn{\epsilon} is +#' standard normal. +#' @section M4: +#' The predictors are distributed as \eqn{X\sim N_p(0,\Sigma)}{X~N_p(0,\Sigma)} +#' with \eqn{\Sigma_{i, j} = 0.5^{|i - j|}}{\Sigma_ij = 0.5^|i - j|} for +#' \eqn{i, j = 1,..., p} for a subspace dimension of \eqn{k = 2} with a default +#' of \eqn{n = 100} data points. \eqn{p = 20}, +#' \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)}, +#' \eqn{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / sqrt(6)} +#' and \eqn{Y} is given as \deqn{Y = \frac{b_1'X}{0.5 + (1.5 + b_2'X)^2} + 0.5\epsilon}{Y = (b_1'X) / (0.5 + (1.5 + b_2'X)^2) + 0.5\epsilon} +#' where \eqn{\epsilon} is standard normal. +#' @section M5: +#' The predictors are distributed as \eqn{X\sim U([0,1]^p)}{X~U([0, 1]^p)} +#' where \eqn{U([0, 1]^p)} is the uniform distribution with +#' independent components on the \eqn{p}-dimensional hypercube for a subspace +#' dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points. +#' \eqn{p = 20}, +#' \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)}, +#' \eqn{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / sqrt(6)} +#' and \eqn{Y} is given as \deqn{Y = cos(\pi b_1'X)(b_2'X + 1)^2 + 0.5\epsilon} +#' where \eqn{\epsilon} is standard normal. +#' @section M6: +#' The predictors are distributed as \eqn{X\sim N_p(0, I_p)}{X~N_p(0, I_p)} +#' for a subspace dimension of \eqn{k = 3} with a default of \eqn{n = 200} data +#' point. \eqn{p = 20, b_1 = e_1, b_2 = e_2}, and \eqn{b_3 = e_p}, where +#' \eqn{e_j} is the \eqn{j}-th unit vector in the \eqn{p}-dimensional space. +#' \eqn{Y} is given as \deqn{Y = (b_1'X)^2+(b_2'X)^2+(b_3'X)^2+0.5\epsilon} +#' where \eqn{\epsilon} is standard normal. +#' @section M7: +#' The predictors are distributed as \eqn{X\sim t_3(I_p)}{X~t_3(I_p)} where +#' \eqn{t_3(I_p)} is the standard multivariate t-distribution with 3 degrees of +#' freedom, for a subspace dimension of \eqn{k = 4} with a default of +#' \eqn{n = 200} data points. +#' \eqn{p = 20, b_1 = e_1, b_2 = e_2, b_3 = e_3}, and \eqn{b_4 = e_p}, where +#' \eqn{e_j} is the \eqn{j}-th unit vector in the \eqn{p}-dimensional space. +#' \eqn{Y} is given as \deqn{Y = (b_1'X)(b_2'X)^2+(b_3'X)(b_4'X)+0.5\epsilon} +#' where \eqn{\epsilon} is distributed as generalized normal distribution with +#' location 0, shape-parameter 1, and the scale-parameter is chosen such that +#' \eqn{Var(\epsilon) = 0.25}. +#' +#' @import stats +#' @importFrom stats rnorm rbinom +#' @export +dataset <- function(name = "M1", n = NULL, p = 20, sd = 0.5, ...) { + name <- toupper(name) + if (nchar(name) == 1) { name <- paste0("M", name) } + + if (name == "M1") { + if (missing(n)) { n <- 100 } + # B ... `p x 1` + B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, p - 6)), ncol = 1) + X <- rmvnorm(n, sigma = 0.5^abs(outer(1:p, 1:p, FUN = `-`))) + beta <- 0.5 + Y <- cos(X %*% B) + rgnorm(n, 0, + alpha = sqrt(sd^2 * gamma(1 / beta) / gamma(3 / beta)), + beta = beta + ) + } else if (name == "M2") { + if (missing(n)) { n <- 100 } + params <- list(...) + pmix <- if (is.null(params$pmix)) { 0.3 } else { params$pmix } + lambda <- if (is.null(params$lambda)) { 1 } else { params$lambda } + # B ... `p x 1` + B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, p - 6)), ncol = 1) + Z <- 2 * rbinom(n, 1, pmix) - 1 + X <- matrix(rep(lambda * Z, p) + rnorm(n * p), n) + Y <- cos(X %*% B) + rnorm(n, 0, sd) + } else if (name == "M3") { + if (missing(n)) { n <- 100 } + # B ... `p x 1` + B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, p - 6)), ncol = 1) + X <- matrix(rnorm(n * p), n) + Y <- 2 * log(2 + abs(X %*% B)) + rnorm(n, 0, sd) + } else if (name == "M4") { + if (missing(n)) { n <- 200 } + # B ... `p x 2` + B <- cbind( + c(rep(1 / sqrt(6), 6), rep(0, p - 6)), + c(rep(c(1, -1), 3) / sqrt(6), rep(0, p - 6)) + ) + X <- rmvnorm(n, sigma = 0.5^abs(outer(1:p, 1:p, FUN = `-`))) + XB <- X %*% B + Y <- (XB[, 1]) / (0.5 + (XB[, 2] + 1.5)^2) + rnorm(n, 0, sd) + } else if (name == "M5") { + if (missing(n)) { n <- 200 } + # B ... `p x 2` + B <- cbind( + c(rep(1, 6), rep(0, p - 6)), + c(rep(c(1, -1), 3), rep(0, p - 6)) + ) / sqrt(6) + X <- matrix(runif(n * p), n) + XB <- X %*% B + Y <- cos(XB[, 1] * pi) * (XB[, 2] + 1)^2 + rnorm(n, 0, sd) + } else if (name == "M6") { + if (missing(n)) { n <- 200 } + # B ... `p x 3` + B <- diag(p)[, -(3:(p - 1))] + X <- matrix(rnorm(n * p), n) + Y <- rowSums((X %*% B)^2) + rnorm(n, 0, sd) + } else if (name == "M7") { + if (missing(n)) { n <- 400 } + # B ... `p x 4` + B <- diag(p)[, -(4:(p - 1))] + # "R"andom "M"ulti"V"ariate "S"tudent + X <- rmvt(n = n, sigma = diag(p), df = 3) + XB <- X %*% B + Y <- (XB[, 1]) * (XB[, 2])^2 + (XB[, 3]) * (XB[, 4]) + Y <- Y + rlaplace(n, 0, sd) + } else if (name == "M8") { + # see: "Local Linear Forests" + if (missing(n)) { n <- 600 } + if (missing(p)) { p <- 20 } # 10 and 50 in "Local Linear Forests" + if (missing(sd)) { sd <- 5 } # or 20 + + B <- diag(1, p, 4) + B[, 4] <- c(0, 0, 0, 2, 1, rep(0, p - 5)) + + X <- matrix(runif(n * p), n, p) + XB <- X %*% B + Y <- 10 * sin(pi * XB[, 1] * XB[, 2]) + 20 * (XB[, 3] - 0.5)^2 + 5 * XB[, 4] + rnorm(n, sd = sd) + Y <- as.matrix(Y) + } else if (name == "M9") { + if (missing(n)) { n <- 300 } + X <- matrix(rnorm(n * p), n, p) + Y <- X[, 1] + (0.5 + X[, 2])^2 * rnorm(n) + B <- diag(1, p, 2) + } else { + stop("Got unknown dataset name.") + } + + return(list(X = X, Y = as.matrix(Y), B = B, name = name)) +} diff --git a/NNSDR/R/dist_grassmann.R b/NNSDR/R/dist_grassmann.R new file mode 100644 index 0000000..7aabfa2 --- /dev/null +++ b/NNSDR/R/dist_grassmann.R @@ -0,0 +1,24 @@ +#' Grassmann distance +#' +#' @param A,B Basis matrices as representations of elements of the Grassmann +#' manifold. +#' @param is.ortho Boolean to specify if `A` and `B` are semi-orthogonal (if +#' false, both arguments are `qr` decomposed) +#' @param tol passed to `qr`, ignored if `is.ortho` is `true`. +#' +#' @seealso +#' K. Ye and L.-H. Lim (2016) "Schubert varieties and distances between +#' subspaces of different dimensions" +#' +#' @export +dist.grassmann <- function(A, B, is.ortho = FALSE, tol = 1e-7) { + if (!is.ortho) { + A <- qr.Q(qr(A, tol)) + B <- qr.Q(qr(B, tol)) + } else { + A <- as.matrix(A) + B <- as.matrix(B) + } + + sqrt(sum(acos(pmin(La.svd(crossprod(A, B), 0L, 0L)$d, 1))^2)) +} diff --git a/NNSDR/R/dist_subspace.R b/NNSDR/R/dist_subspace.R new file mode 100644 index 0000000..933a4cf --- /dev/null +++ b/NNSDR/R/dist_subspace.R @@ -0,0 +1,37 @@ +#' Subspace distance +#' +#' @param A,B Basis matrices as representations of elements of the Grassmann +#' manifold. +#' @param is.ortho Boolean to specify if `A` and `B` are semi-orthogonal. If +#' false, the projection matrices are computed as +#' \deqn{P_A = A (A' A)^{-1} A'} +#' otherwise just \eqn{P_A = A A'} since \eqn{A' A} is the identity. +#' @param normalize Boolean to specify if the distance shall be normalized. +#' Meaning, the maximal distance scaled to be \eqn{1} independent of dimensions. +#' +#' @seealso +#' K. Ye and L.-H. Lim (2016) "Schubert varieties and distances between +#' subspaces of different dimensions" +#' +#' @export +dist.subspace <- function (A, B, is.ortho = FALSE, normalize = FALSE) { + if (!is.matrix(A)) A <- as.matrix(A) + if (!is.matrix(B)) B <- as.matrix(B) + + if (is.ortho) { + PA <- tcrossprod(A, A) + PB <- tcrossprod(B, B) + } else { + PA <- A %*% solve(t(A) %*% A, t(A)) + PB <- B %*% solve(t(B) %*% B, t(B)) + } + + if (normalize) { + rankSum <- ncol(A) + ncol(B) + c <- 1 / sqrt(min(rankSum, 2 * nrow(A) - rankSum)) + } else { + c <- sqrt(2) + } + + c * norm(PA - PB, type = "F") +} diff --git a/NNSDR/R/get_script.R b/NNSDR/R/get_script.R new file mode 100644 index 0000000..4fbba66 --- /dev/null +++ b/NNSDR/R/get_script.R @@ -0,0 +1,12 @@ + +#' Gets the `Rscript` file name +#' +#' @note only relevant in scripts (useless in interactive `R` sessions) +#' +#' @return character array of the file name or empty +#' +#' @export +get.script <- function() { + args <- commandArgs() + sub('--file=', '', args[startsWith(args, '--file=')]) +} diff --git a/NNSDR/R/nnsdr_class.R b/NNSDR/R/nnsdr_class.R new file mode 100644 index 0000000..59ada70 --- /dev/null +++ b/NNSDR/R/nnsdr_class.R @@ -0,0 +1,279 @@ +Sys.setenv(TF_CPP_MIN_LOG_LEVEL = "3") + + + +#' Build MLP +#' +#' @param input_shapes TODO: +#' @param d TODO: +#' @param name TODO: +#' @param add_reduction TODO: +#' @param hidden_units TODO: +#' @param activation TODO: +#' @param dropout TODO: +#' @param loss TODO: +#' @param optimizer TODO: +#' @param metrics TODO: +#' @param trainable_reduction TODO: +#' +#' @import tensorflow +#' @keywords internal +build.MLP <- function(input_shapes, d, name, add_reduction, + output_shape = 1L, + hidden_units = 512L, + activation = 'relu', + dropout = 0.4, + loss = 'MSE', + optimizer = 'RMSProp', + metrics = NULL, + trainable_reduction = TRUE +) { + K <- tf$keras + + inputs <- Map(K$layers$Input, + shape = as.integer(input_shapes), # drops names (concatenate key error) + name = if (is.null(names(input_shapes))) "" else names(input_shapes) + ) + + mlp_inputs <- if (add_reduction) { + reduction <- K$layers$Dense( + units = d, + use_bias = FALSE, + kernel_constraint = function(w) { # polar projection + lhs <- tf$linalg$sqrtm(tf$matmul(w, w, transpose_a = TRUE)) + tf$transpose(tf$linalg$solve(lhs, tf$transpose(w))) + }, + trainable = trainable_reduction, + name = 'reduction' + )(inputs[[1]]) + + c(reduction, inputs[-1]) + } else { + inputs + } + + out <- if (length(inputs) == 1) { + mlp_inputs[[1]] + } else { + K$layers$concatenate(mlp_inputs, axis = 1L, name = 'input_mlp') + } + for (i in seq_along(hidden_units)) { + out <- K$layers$Dense(units = hidden_units[i], activation = activation, + name = paste0('hidden', i))(out) + if (dropout > 0) + out <- K$layers$Dropout(rate = dropout, name = paste0('dropout', i))(out) + } + out <- K$layers$Dense(units = output_shape, name = 'output')(out) + + mlp <- K$models$Model(inputs = inputs, outputs = out, name = name) + mlp$compile(loss = loss, optimizer = optimizer, metrics = metrics) + + mlp +} + +#' Base Neuronal Network model class +#' +#' @examples +#' model <- nnsdr$new( +#' input_shapes = list(x = 7L), +#' d = 2L, hidden_units = 128L +#' ) +#' +#' @import methods tensorflow +#' @export nnsdr +#' @exportClass nnsdr +nnsdr <- setRefClass('nnsdr', + fields = list( + config = 'list', + nn.opg = 'ANY', + nn.ref = 'ANY', + history.opg = 'ANY', + history.ref = 'ANY', + B.opg = 'ANY', + B.ref = 'ANY', + history = function() { + if (is.null(.self$history.opg)) + return(NULL) + + history <- data.frame( + .self$history.opg, + model = factor('OPG'), + epoch = seq_len(nrow(.self$history.opg)) + ) + + if (!is.null(.self$history.ref)) + history <- rbind(history, data.frame( + .self$history.ref, + model = factor('Refinement'), + epoch = seq_len(nrow(.self$history.ref)) + )) + + history + } + ), + + methods = list( + initialize = function(input_shapes, d, output_shape = 1L, ...) { + # Set configuration. + .self$config <- c(list( + input_shapes = input_shapes, + d = as.integer(d), + output_shape = output_shape + ), list(...)) + + # Build OPG (Step 1) and Refinement (Step 2) Neuronal Networks + .self$nn.opg <- do.call(build.MLP, c(.self$config, list( + name = 'OPG', add_reduction = FALSE + ))) + .self$nn.ref <- do.call(build.MLP, c(.self$config, list( + name = 'Refinement', add_reduction = TRUE + ))) + + # Set initial history field values. If and only if the `history.*` + # fields are `NULL`, then the Nets are NOT trained. + .self$history.opg <- NULL + .self$history.ref <- NULL + + # Set (not jet available) reduction estimates + .self$B.opg <- NULL + .self$B.ref <- NULL + }, + + fit = function(inputs, output, epochs = 1L, batch_size = 32L, + initializer = c('random', 'fromOPG'), ..., verbose = 0L + ) { + if (is.list(inputs)) { + inputs <- Map(tf$cast, as.list(inputs), dtype = 'float32') + } else { + inputs <- list(tf$cast(inputs, dtype = 'float32')) + } + initializer <- match.arg(initializer) + + # Check for OPG history (Step 1), if available skip it. + if (is.null(.self$history.opg)) { + # Fit OPG Net and store training history. + hist <- .self$nn.opg$fit(inputs, output, ..., + epochs = as.integer(head(epochs, 1)), + batch_size = as.integer(head(batch_size, 1)), + verbose = as.integer(verbose) + ) + .self$history.opg <- as.data.frame(hist$history) + } else if (verbose > 0) { + cat("OPG already trained -> skip OPG training.\n") + } + + # Compute OPG estimate of the Reduction matrix 'B'. + # Always compute, different inputs change the estimate. + with(tf$GradientTape() %as% tape, { + tape$watch(inputs[[1]]) + out <- .self$nn.opg(inputs) + }) + G <- as.matrix(tape$gradient(out, inputs[[1]])) + B <- eigen(var(G), symmetric = TRUE)$vectors + B <- B[, 1:.self$config$d, drop = FALSE] + .self$B.opg <- B + + # Check for need to initialize the Refinement Net. + if (is.null(.self$history.ref)) { + # Set Reduction layer + .self$nn.ref$get_layer('reduction')$set_weights(list(B)) + + # Check initialization (for random keep random initialization) + if (initializer == 'fromOPG') { + # Initialize Refinement Net weights from the OPG Net. + W <- as.array(.self$nn.opg$get_layer('hidden1')$kernel) + W <- rbind( + t(B) %*% W[1:nrow(B), , drop = FALSE], + W[-(1:nrow(B)), , drop = FALSE] + ) + b <- as.array(.self$nn.opg$get_layer('hidden1')$bias) + .self$nn.ref$get_layer('hidden1')$set_weights(list(W, b)) + # Get layer names with weights to be initialized from `nn.opg` + # These are the output layer and all hidden layers except the first + layer.names <- Filter(function(name) { + if (name == 'output') { + TRUE + } else if (name == 'hidden1') { + FALSE + } else { + startsWith(name, 'hidden') + } + }, lapply(.self$nn.opg$layers, `[[`, 'name')) + # Copy `nn.opg` weights to `nn.ref` + for (name in layer.names) { + .self$nn.ref$get_layer(name)$set_weights(lapply( + .self$nn.opg$get_layer(name)$weights, as.array + )) + } + } + } else if (verbose > 0) { + cat("Refinement Net already trained -> continue training.\n") + } + + # Fit (or continue fitting) the Refinement Net. + hist <- .self$nn.ref$fit(inputs, output, ..., + epochs = as.integer(tail(epochs, 1)), + batch_size = as.integer(tail(batch_size, 1)), + verbose = as.integer(verbose) + ) + .self$history.ref <- rbind( + .self$history.ref, + as.data.frame(hist$history) + ) + # Extract refined reduction estimate + .self$B.ref <- .self$nn.ref$get_layer('reduction')$get_weights()[[1]] + + invisible(NULL) + }, + predict = function(inputs) { + # Issue warning if the Refinement model (Step 2) used for prediction + # is not trained. + if (is.null(.self$history.ref)) + warning('Refinement model not trained.') + + if (is.list(inputs)) { + inputs <- Map(tf$cast, as.list(inputs), dtype = 'float32') + } else { + inputs <- list(tf$cast(inputs, dtype = 'float32')) + } + output <- .self$nn.ref(inputs) + + if (is.list(output)) { + if (length(output) == 1L) { + as.array(output[[1]]) + } else { + Map(as.array, output) + } + } else { + as.array(output) + } + }, + coef = function(type = c('Refinement', 'OPG')) { + type <- match.arg(type) + if (type == 'Refinement') { + .self$B.ref + } else { + .self$B.opg + } + }, + reset = function(reset = c('both', 'Refinement')) { + reset <- match.arg(reset) + if (reset == 'both') { + reinitialize_weights(.self$nn.opg) + reset_optimizer(.self$nn.opg$optimizer) + .self$history.opg <- NULL + .self$B.opg <- NULL + } + reinitialize_weights(.self$nn.ref) + reset_optimizer(.self$nn.ref$optimizer) + .self$history.ref <- NULL + .self$B.ref <- NULL + }, + summary = function() { + .self$nn.opg$summary() + cat('\n') + .self$nn.ref$summary() + } + ) +) +nnsdr$lock('config') diff --git a/NNSDR/R/parse_args.R b/NNSDR/R/parse_args.R new file mode 100644 index 0000000..56c14d7 --- /dev/null +++ b/NNSDR/R/parse_args.R @@ -0,0 +1,35 @@ + +#' Parses script arguments. +#' +#' @param defaults list of default parameters. Names of the provided defaults +#' define the allowed parameters. +#' @param args Arguments to parge, if missing the Rscript params are taken. +#' +#' @return calling script arguments of `Rscript` +#' +#' @export +parse.args <- function(defaults, args) { + if (missing(args)) + args <- commandArgs(trailingOnly = TRUE) + if (length((args)) == 0) + return(defaults) + + args <- strsplit(sub('--', '', args, fixed = TRUE), '=') + values <- Map(`[`, args, 2) + names(values) <- unlist(Map(`[`, args, 1)) + + if (!all(names(values) %in% names(defaults))) + stop('Found unknown script parameter') + + for (i in seq_along(defaults)) { + name <- names(defaults)[i] + + if (name %in% names(values)) { + value <- unlist(strsplit(values[[name]], ',', fixed = TRUE)) + value <- eval(call(paste0('as.', typeof(defaults[[name]])), value)) + defaults[[name]] <- value + } + } + + defaults +} diff --git a/NNSDR/R/reinitialize_weights.R b/NNSDR/R/reinitialize_weights.R new file mode 100644 index 0000000..2f8f4a2 --- /dev/null +++ b/NNSDR/R/reinitialize_weights.R @@ -0,0 +1,42 @@ + +#' Re-initialize model weights. +#' +#' An in-place model re-initialization. Intended for simulations to avoid +#' rebuilding the same model architecture for multiple simulation runs. +#' +#' @param model A `keras` model. +#' +#' @seealso https://github.com/keras-team/keras/issues/341 +#' @examples +#' # library(tensorflow) # v2 +#' K <- tf$keras +#' model <- K$models$Sequential(list( +#' K$layers$Dense(units = 7L, input_shape = list(3L)), +#' K$layers$Dense(units = 1L) +#' )) +#' model$compile(loss = 'MSE', optimizer = K$optimizers$RMSprop()) +#' +#' model$weights +#' reinitialize_weights(model) +#' model$weights +#' +#' @export +reinitialize_weights <- function(model) { + for (layer in model$layers) { + # Unwrap wrapped layers. + if (any(endsWith(class(layer), 'Wrapper'))) + layer <- layer$layer + # Re-initialize kernel and bias weight variables. + for (var in layer$weights) { + if (any(grep('/recurrent_kernel:', var$name, fixed = TRUE))) { + var$assign(layer$recurrent_initializer(var$shape, var$dtype)) + } else if (any(grep('/kernel:', var$name, fixed = TRUE))) { + var$assign(layer$kernel_initializer(var$shape, var$dtype)) + } else if (any(grep('/bias:', var$name, fixed = TRUE))) { + var$assign(layer$bias_initializer(var$shape, var$dtype)) + } else { + stop("Unknown initialization for variable ", var$name) + } + } + } +} diff --git a/NNSDR/R/reset_optimizer.R b/NNSDR/R/reset_optimizer.R new file mode 100644 index 0000000..fb6e698 --- /dev/null +++ b/NNSDR/R/reset_optimizer.R @@ -0,0 +1,39 @@ + +#' Reset TensorFlow optimizer. +#' +#' @param optimizer a \pkg{tensorflow} optimizer instance +#' +#' @examples +#' # Create example toy data +#' +#' # library(tensorflow) # v2 +#' K <- tf$keras +#' model <- K$models$Sequential(list( +#' K$layers$Dense(units = 7L, input_shape = list(3L)), +#' K$layers$Dense(units = 1L) +#' )) +#' model$compile(loss = 'MSE', optimizer = 'RMSprop') +#' +#' \donttest{ +#' model$fit(input) # Fit the model +#' } +#' +#' reinitialize_weights(model) +#' reset_optimizer(model$optimizer) +#' +#' \donttest{ +#' model$fit(input) # Fit the model again completely independent of the first fit. +#' } +#' +#' @note Works for Adam, RMSprop properly (other optimizes are not tested!) +#' @note see source and search for `_create_slots` and `add_slot`. +#' @seealso https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/optimizer_v2/optimizer_v2.py +#' @seealso https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/optimizer_v2/rmsprop.py +#' @seealso https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/optimizer_v2/adam.py +#' +#' @export +reset_optimizer <- function(optimizer) { + for (var in optimizer$variables()) { + var$assign(tf$zeros_like(var)) + } +} diff --git a/NNSDR/R/summary_nnsdr.R b/NNSDR/R/summary_nnsdr.R new file mode 100644 index 0000000..5e65e20 --- /dev/null +++ b/NNSDR/R/summary_nnsdr.R @@ -0,0 +1,12 @@ +#' Summary for the OPG and Refinement model of an nnsdr class instance +#' +#' @param object nnsdr class instance +#' @param ... ignored. +#' +#' @return No return value, prints human readable summary. +#' +#' @method summary nnsdr +#' @export +summary.nnsdr <- function(object, ...) { + object$summary() +}