diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..a2bebd3
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,26 @@
+
+LaTeX/
+
+**/data/*
+**/results/*
+
+.vscode/
+
+# Generated man files in R package (auto generated by Roxygen)
+NNSDR/man/
+
+# Large Files / Data Files
+*.pdf
+*.png
+*.csv
+*.Rdata
+*.zip
+*.tar.gz
+*.BAK
+
+# LaTeX - build/database/... files
+*.log
+*.aux
+*.bbl
+*.blg
+*.out
diff --git a/NNSDR/DESCRIPTION b/NNSDR/DESCRIPTION
new file mode 100644
index 0000000..a3f0fa4
--- /dev/null
+++ b/NNSDR/DESCRIPTION
@@ -0,0 +1,13 @@
+Package: NNSDR
+Type: Package
+Title: Fusing Neuronal Networks with Sufficient Dimension Reduction
+Version: 0.1
+Date: 2021-04-19
+Author: Daniel Kapla [aut]
+Maintainer: Daniel Kapla <daniel@kapla.at>
+Description: Compinint the Outer Product of Gradients (OPG) with Neuronal Networks
+    for estimation of the mean subspace.
+License: GPL-3
+Depends: methods, tensorflow (>= 2.2.0)
+Encoding: UTF-8
+RoxygenNote: 7.0.2
diff --git a/NNSDR/NAMESPACE b/NNSDR/NAMESPACE
new file mode 100644
index 0000000..278c3ee
--- /dev/null
+++ b/NNSDR/NAMESPACE
@@ -0,0 +1,18 @@
+# Generated by roxygen2: do not edit by hand
+
+S3method(coef,nnsdr)
+S3method(summary,nnsdr)
+export(dataset)
+export(dist.grassmann)
+export(dist.subspace)
+export(get.script)
+export(nnsdr)
+export(parse.args)
+export(reinitialize_weights)
+export(reset_optimizer)
+exportClasses(nnsdr)
+import(methods)
+import(stats)
+import(tensorflow)
+importFrom(stats,rbinom)
+importFrom(stats,rnorm)
diff --git a/NNSDR/R/NNSDR.R b/NNSDR/R/NNSDR.R
new file mode 100644
index 0000000..883f163
--- /dev/null
+++ b/NNSDR/R/NNSDR.R
@@ -0,0 +1,8 @@
+#' Mean Subspace estimation with Neural Nets
+#'
+#' Package for simulations using Neural Nets for Mean Subspace estimation.
+#'
+#' @author Daniel Kapla
+#'
+#' @docType package
+"_PACKAGE"
diff --git a/NNSDR/R/coef_nnsdr.R b/NNSDR/R/coef_nnsdr.R
new file mode 100644
index 0000000..9ad734b
--- /dev/null
+++ b/NNSDR/R/coef_nnsdr.R
@@ -0,0 +1,18 @@
+#' Extracts the OPG or refined reduction coefficients from an nnsdr class instance
+#' 
+#' @param object nnsdr class instance
+#' @param type specifies if the OPG or Refinement estimate is requested.
+#'  One of `Refinement` or `OPG`, default is `Refinement`.
+#' @param ... ignored.
+#' 
+#' @return Matrix
+#'
+#' @method coef nnsdr
+#' @export
+coef.nnsdr <- function(object, type, ...) {
+    if (missing(type)) {
+        object$coef()
+    } else {
+        object$coef(type)
+    }
+}
diff --git a/NNSDR/R/datasets.R b/NNSDR/R/datasets.R
new file mode 100644
index 0000000..d8c0019
--- /dev/null
+++ b/NNSDR/R/datasets.R
@@ -0,0 +1,287 @@
+#' Multivariate Normal Distribution.
+#'
+#' Random generation for the multivariate normal distribution.
+#' \deqn{X \sim N_p(\mu, \Sigma)}{X ~ N_p(\mu, \Sigma)}
+#'
+#' @param n number of samples.
+#' @param mu mean
+#' @param sigma covariance matrix.
+#'
+#' @return a \eqn{n\times p}{n x p} matrix with samples in its rows.
+#'
+#' @examples
+#' NNSDR:::rmvnorm(20, sigma = matrix(c(2, 1, 1, 2), 2))
+#' NNSDR:::rmvnorm(20, mu = c(3, -1, 2))
+#'
+#' @keywords internal
+rmvnorm <- function(n = 1, mu = rep(0, p), sigma = diag(p)) {
+    if (!missing(sigma)) {
+        p <- nrow(sigma)
+    } else if (!missing(mu)) {
+        mu <- matrix(mu, ncol = 1)
+        p <- nrow(mu)
+    } else {
+        stop("At least one of 'mu' or 'sigma' must be supplied.")
+    }
+
+    return(rep(mu, each = n) + matrix(rnorm(n * p), n) %*% chol(sigma))
+}
+
+#' Multivariate t Distribution.
+#'
+#' Random generation from multivariate t distribution (student distribution).
+#'
+#' @param n number of samples.
+#' @param mu mean
+#' @param sigma a \eqn{k\times k}{k x k} positive definite matrix. If the degree
+#' \eqn{\nu} if bigger than 2 the created covariance is
+#' \deqn{var(x) = \Sigma\frac{\nu}{\nu - 2}}
+#' for \eqn{\nu > 2}.
+#' @param df degree of freedom \eqn{\nu}.
+#'
+#' @return a \eqn{n\times p}{n x p} matrix with samples in its rows.
+#'
+#' @examples
+#' NNSDR:::rmvt(20, c(0, 1), matrix(c(3, 1, 1, 2), 2), 3)
+#' NNSDR:::rmvt(20, sigma = matrix(c(2, 1, 1, 2), 2), df = 3)
+#' NNSDR:::rmvt(20, mu = c(3, -1, 2), df = 3)
+#'
+#' @keywords internal
+rmvt <- function(n = 1, mu = rep(0, p), sigma = diag(p), df = Inf) {
+    if (!missing(sigma)) {
+        p <- nrow(sigma)
+    } else if (!missing(mu)) {
+        mu <- matrix(mu, ncol = 1)
+        p <- nrow(mu)
+    } else {
+        stop("At least one of 'mu' or 'sigma' must be supplied.")
+    }
+
+    if (df == Inf) {
+        Z <- 1
+    } else {
+        Z <- sqrt(df / rchisq(n, df))
+    }
+
+    return(rmvnorm(n, sigma = sigma) * Z + rep(mu, each = n))
+}
+
+#' Generalized Normal Distribution.
+#'
+#' Random generation for generalized Normal Distribution.
+#'
+#' @param n Number of generated samples.
+#' @param mu mean.
+#' @param alpha first shape parameter.
+#' @param beta second shape parameter.
+#'
+#' @return numeric array of length \eqn{n}.
+#'
+#' @keywords internal
+rgnorm <- function(n = 1, mu = 0, alpha = 1, beta = 1) {
+    if (alpha <= 0 | beta <= 0) {
+        stop("alpha and beta must be positive.")
+    }
+    lambda <- (1 / alpha)^beta
+    scales <- qgamma(runif(n), shape = 1 / beta, scale = 1 / lambda)^(1 / beta)
+    return(scales * ((-1)^rbinom(n, 1, 0.5)) + mu)
+}
+
+#' Laplace distribution
+#'
+#' Random generation for Laplace distribution.
+#'
+#' @param n Number of generated samples.
+#' @param mu mean.
+#' @param sd standard deviation.
+#'
+#' @return numeric array of length \eqn{n}.
+#'
+#' @keywords internal
+rlaplace <- function(n = 1, mu = 0, sd = 1) {
+    U <- runif(n, -0.5, 0.5)
+    scale <- sd / sqrt(2)
+
+    return(mu - scale * sign(U) * log(1 - 2 * abs(U)))
+}
+
+#' Generates test datasets.
+#'
+#' Provides sample datasets.
+#' The general model is given by:
+#' \deqn{Y = g(B'X) + \epsilon}
+#'
+#' @param name One of \code{"M1"}, ..., \code{"M9"}.
+#'      Alternative just the dataset number 1-9.
+#' @param n number of samples.
+#' @param p Dimension of random variable \eqn{X}.
+#' @param sd standard diviation for error term \eqn{\epsilon}.
+#' @param ... Additional parameters only for "M2" (namely \code{pmix} and
+#' \code{lambda}), see: below.
+#'
+#' @return List with elements
+#' \itemize{
+#'      \item{X}{data, a \eqn{n\times p}{n x p} matrix.}
+#'      \item{Y}{response.}
+#'      \item{B}{the dim-reduction matrix}
+#'      \item{name}{Name of the dataset (name parameter)}
+#' }
+#'
+#' @section M1:
+#' The predictors are distributed as
+#' \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, \Sigma)} with
+#' \eqn{\Sigma_{i, j} = 0.5^{|i - j|}}{\Sigma_ij = 0.5^|i - j|} for
+#' \eqn{i, j = 1,..., p} for a subspace dimension of \eqn{k = 1} with a default
+#' of \eqn{n = 100} data points. \eqn{p = 20},
+#' \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)}, and \eqn{Y} is
+#' given as \deqn{Y = cos(b_1'X) + \epsilon} where \eqn{\epsilon} is
+#' distributed as generalized normal distribution with location 0,
+#' shape-parameter 0.5, and the scale-parameter is chosen such that
+#' \eqn{Var(\epsilon) = 0.5}.
+#' @section M2:
+#' The predictors are distributed as \eqn{X \sim Z 1_p \lambda + N_p(0, I_p)}{X ~ Z 1_p \lambda + N_p(0, I_p)}. with
+#' \eqn{Z \sim 2 Binom(p_{mix}) - 1\in\{-1, 1\}}{Z~2Binom(pmix)-1} where
+#' \eqn{1_p} is the \eqn{p}-dimensional vector of one's, for a subspace
+#' dimension of \eqn{k = 1} with a default of \eqn{n = 100} data points.
+#' \eqn{p = 20}, \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)},
+#' and \eqn{Y} is \deqn{Y = cos(b_1'X) + 0.5\epsilon} where \eqn{\epsilon} is
+#' standard normal.
+#' Defaults for \code{pmix} is 0.3 and \code{lambda} defaults to 1.
+#' @section M3:
+#' The predictors are distributed as \eqn{X\sim N_p(0, I_p)}{X~N_p(0, I_p)}
+#' for a subspace
+#' dimension of \eqn{k = 1} with a default of \eqn{n = 100} data points.
+#' \eqn{p = 20}, \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)},
+#' and \eqn{Y} is 
+#' \deqn{Y = 2 log(|b_1'X| + 2) + 0.5\epsilon} where \eqn{\epsilon} is
+#' standard normal.
+#' @section M4:
+#' The predictors are distributed as \eqn{X\sim N_p(0,\Sigma)}{X~N_p(0,\Sigma)}
+#' with \eqn{\Sigma_{i, j} = 0.5^{|i - j|}}{\Sigma_ij = 0.5^|i - j|} for
+#' \eqn{i, j = 1,..., p} for a subspace dimension of \eqn{k = 2} with a default
+#' of \eqn{n = 100} data points. \eqn{p = 20},
+#' \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)},
+#' \eqn{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / sqrt(6)}
+#' and \eqn{Y} is given as \deqn{Y = \frac{b_1'X}{0.5 + (1.5 + b_2'X)^2} + 0.5\epsilon}{Y = (b_1'X) / (0.5 + (1.5 + b_2'X)^2) + 0.5\epsilon}
+#' where \eqn{\epsilon} is standard normal.
+#' @section M5:
+#' The predictors are distributed as \eqn{X\sim U([0,1]^p)}{X~U([0, 1]^p)}
+#' where \eqn{U([0, 1]^p)} is the uniform distribution with
+#' independent components on the \eqn{p}-dimensional hypercube for a subspace
+#' dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points.
+#' \eqn{p = 20},
+#' \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)},
+#' \eqn{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / sqrt(6)}
+#' and \eqn{Y} is given as \deqn{Y = cos(\pi b_1'X)(b_2'X + 1)^2 + 0.5\epsilon}
+#' where \eqn{\epsilon} is standard normal.
+#' @section M6:
+#' The predictors are distributed as \eqn{X\sim N_p(0, I_p)}{X~N_p(0, I_p)}
+#' for a subspace dimension of \eqn{k = 3} with a default of \eqn{n = 200} data
+#' point. \eqn{p = 20, b_1 = e_1, b_2 = e_2}, and \eqn{b_3 = e_p}, where
+#' \eqn{e_j} is the \eqn{j}-th unit vector in the \eqn{p}-dimensional space.
+#' \eqn{Y} is given as \deqn{Y = (b_1'X)^2+(b_2'X)^2+(b_3'X)^2+0.5\epsilon}
+#' where \eqn{\epsilon} is standard normal.
+#' @section M7:
+#' The predictors are distributed as \eqn{X\sim t_3(I_p)}{X~t_3(I_p)} where
+#' \eqn{t_3(I_p)} is the standard multivariate t-distribution with 3 degrees of
+#' freedom, for a subspace dimension of \eqn{k = 4} with a default of
+#' \eqn{n = 200} data points.
+#' \eqn{p = 20, b_1 = e_1, b_2 = e_2, b_3 = e_3}, and \eqn{b_4 = e_p}, where
+#' \eqn{e_j} is the \eqn{j}-th unit vector in the \eqn{p}-dimensional space.
+#' \eqn{Y} is given as \deqn{Y = (b_1'X)(b_2'X)^2+(b_3'X)(b_4'X)+0.5\epsilon}
+#' where \eqn{\epsilon} is distributed as generalized normal distribution with
+#' location 0, shape-parameter 1, and the scale-parameter is chosen such that
+#' \eqn{Var(\epsilon) = 0.25}.
+#'
+#' @import stats
+#' @importFrom stats rnorm rbinom
+#' @export
+dataset <- function(name = "M1", n = NULL, p = 20, sd = 0.5, ...) {
+    name <- toupper(name)
+    if (nchar(name) == 1) { name <- paste0("M", name) }
+
+    if (name == "M1") {
+        if (missing(n)) { n <- 100 }
+        # B ... `p x 1`
+        B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, p - 6)), ncol = 1)
+        X <- rmvnorm(n, sigma = 0.5^abs(outer(1:p, 1:p, FUN = `-`)))
+        beta <- 0.5
+        Y <- cos(X %*% B) + rgnorm(n, 0,
+            alpha = sqrt(sd^2 * gamma(1 / beta) / gamma(3 / beta)),
+            beta = beta
+        )
+    } else if (name == "M2") {
+        if (missing(n)) { n <- 100 }
+        params <- list(...)
+        pmix <- if (is.null(params$pmix)) { 0.3 } else { params$pmix }
+        lambda <- if (is.null(params$lambda)) { 1 } else { params$lambda }
+        # B ... `p x 1`
+        B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, p - 6)), ncol = 1)
+        Z <- 2 * rbinom(n, 1, pmix) - 1
+        X <- matrix(rep(lambda * Z, p) + rnorm(n * p), n)
+        Y <- cos(X %*% B) + rnorm(n, 0, sd)
+    } else if (name == "M3") {
+        if (missing(n)) { n <- 100 }
+        # B ... `p x 1`
+        B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, p - 6)), ncol = 1)
+        X <- matrix(rnorm(n * p), n)
+        Y <- 2 * log(2 + abs(X %*% B)) + rnorm(n, 0, sd)
+    } else if (name == "M4") {
+        if (missing(n)) { n <- 200 }
+        # B ... `p x 2`
+        B <- cbind(
+            c(rep(1 / sqrt(6), 6), rep(0, p - 6)),
+            c(rep(c(1, -1), 3) / sqrt(6), rep(0, p - 6))
+        )
+        X <- rmvnorm(n, sigma = 0.5^abs(outer(1:p, 1:p, FUN = `-`)))
+        XB <- X %*% B
+        Y <- (XB[, 1]) / (0.5 + (XB[, 2] + 1.5)^2) + rnorm(n, 0, sd)
+    } else if (name == "M5") {
+        if (missing(n)) { n <- 200 }
+        # B ... `p x 2`
+        B <- cbind(
+            c(rep(1,        6), rep(0, p - 6)),
+            c(rep(c(1, -1), 3), rep(0, p - 6))
+        ) / sqrt(6)
+        X <- matrix(runif(n * p), n)
+        XB <- X %*% B
+        Y <- cos(XB[, 1] * pi) * (XB[, 2] + 1)^2 + rnorm(n, 0, sd)
+    } else if (name == "M6") {
+        if (missing(n)) { n <- 200 }
+        # B ... `p x 3`
+        B <- diag(p)[, -(3:(p - 1))]
+        X <- matrix(rnorm(n * p), n)
+        Y <- rowSums((X %*% B)^2) + rnorm(n, 0, sd)
+    } else if (name == "M7") {
+        if (missing(n)) { n <- 400 }
+        # B ... `p x 4`
+        B <- diag(p)[, -(4:(p - 1))]
+        # "R"andom "M"ulti"V"ariate "S"tudent
+        X <- rmvt(n = n, sigma = diag(p), df = 3)
+        XB <- X %*% B
+        Y <- (XB[, 1]) * (XB[, 2])^2 + (XB[, 3]) * (XB[, 4])
+        Y <- Y + rlaplace(n, 0, sd)
+    } else if (name == "M8") {
+        # see: "Local Linear Forests" <arXiv:1807.11408>
+        if (missing(n)) { n <- 600 }
+        if (missing(p)) { p <- 20 } # 10 and 50 in "Local Linear Forests"
+        if (missing(sd)) { sd <- 5 } # or 20
+
+        B <- diag(1, p, 4)
+        B[, 4] <- c(0, 0, 0, 2, 1, rep(0, p - 5))
+
+        X <- matrix(runif(n * p), n, p)
+        XB <- X %*% B
+        Y <- 10 * sin(pi * XB[, 1] * XB[, 2]) + 20 * (XB[, 3] - 0.5)^2 + 5 * XB[, 4] + rnorm(n, sd = sd)
+        Y <- as.matrix(Y)
+    } else if (name == "M9") {
+        if (missing(n)) { n <- 300 }
+        X <- matrix(rnorm(n * p), n, p)
+        Y <- X[, 1] + (0.5 + X[, 2])^2 * rnorm(n)
+        B <- diag(1, p, 2)
+    } else {
+        stop("Got unknown dataset name.")
+    }
+
+    return(list(X = X, Y = as.matrix(Y), B = B, name = name))
+}
diff --git a/NNSDR/R/dist_grassmann.R b/NNSDR/R/dist_grassmann.R
new file mode 100644
index 0000000..7aabfa2
--- /dev/null
+++ b/NNSDR/R/dist_grassmann.R
@@ -0,0 +1,24 @@
+#' Grassmann distance
+#'
+#' @param A,B Basis matrices as representations of elements of the Grassmann
+#'  manifold.
+#' @param is.ortho Boolean to specify if `A` and `B` are semi-orthogonal (if
+#'  false, both arguments are `qr` decomposed)
+#' @param tol passed to `qr`, ignored if `is.ortho` is `true`.
+#'
+#' @seealso
+#'  K. Ye and L.-H. Lim (2016) "Schubert varieties and distances between
+#'  subspaces of different dimensions" <arXiv:1407.0900>
+#'
+#' @export
+dist.grassmann <- function(A, B, is.ortho = FALSE, tol = 1e-7) {
+    if (!is.ortho) {
+        A <- qr.Q(qr(A, tol))
+        B <- qr.Q(qr(B, tol))
+    } else {
+        A <- as.matrix(A)
+        B <- as.matrix(B)
+    }
+
+    sqrt(sum(acos(pmin(La.svd(crossprod(A, B), 0L, 0L)$d, 1))^2))
+}
diff --git a/NNSDR/R/dist_subspace.R b/NNSDR/R/dist_subspace.R
new file mode 100644
index 0000000..933a4cf
--- /dev/null
+++ b/NNSDR/R/dist_subspace.R
@@ -0,0 +1,37 @@
+#' Subspace distance
+#'
+#' @param A,B Basis matrices as representations of elements of the Grassmann
+#'  manifold.
+#' @param is.ortho Boolean to specify if `A` and `B` are semi-orthogonal. If
+#'  false, the projection matrices are computed as
+#' \deqn{P_A = A (A' A)^{-1} A'}
+#'  otherwise just \eqn{P_A = A A'} since \eqn{A' A} is the identity.
+#' @param normalize Boolean to specify if the distance shall be normalized.
+#'  Meaning, the maximal distance scaled to be \eqn{1} independent of dimensions.
+#' 
+#' @seealso
+#'  K. Ye and L.-H. Lim (2016) "Schubert varieties and distances between
+#'  subspaces of different dimensions" <arXiv:1407.0900>
+#' 
+#' @export
+dist.subspace <- function (A, B, is.ortho = FALSE, normalize = FALSE) {
+    if (!is.matrix(A)) A <- as.matrix(A)
+    if (!is.matrix(B)) B <- as.matrix(B)
+
+    if (is.ortho) {
+        PA <- tcrossprod(A, A)
+        PB <- tcrossprod(B, B)
+    } else {
+        PA <- A %*% solve(t(A) %*% A, t(A))
+        PB <- B %*% solve(t(B) %*% B, t(B))
+    }
+
+    if (normalize) {
+        rankSum <- ncol(A) + ncol(B)
+        c <- 1 / sqrt(min(rankSum, 2 * nrow(A) - rankSum))
+    } else {
+        c <- sqrt(2)
+    }
+
+    c * norm(PA - PB, type = "F")
+}
diff --git a/NNSDR/R/get_script.R b/NNSDR/R/get_script.R
new file mode 100644
index 0000000..4fbba66
--- /dev/null
+++ b/NNSDR/R/get_script.R
@@ -0,0 +1,12 @@
+
+#' Gets the `Rscript` file name
+#'
+#' @note only relevant in scripts (useless in interactive `R` sessions)
+#'
+#' @return character array of the file name or empty
+#'
+#' @export
+get.script <- function() {
+    args <- commandArgs()
+    sub('--file=', '', args[startsWith(args, '--file=')])
+}
diff --git a/NNSDR/R/nnsdr_class.R b/NNSDR/R/nnsdr_class.R
new file mode 100644
index 0000000..59ada70
--- /dev/null
+++ b/NNSDR/R/nnsdr_class.R
@@ -0,0 +1,279 @@
+Sys.setenv(TF_CPP_MIN_LOG_LEVEL = "3")
+
+
+
+#' Build MLP
+#'
+#' @param input_shapes TODO:
+#' @param d TODO:
+#' @param name TODO:
+#' @param add_reduction TODO:
+#' @param hidden_units TODO:
+#' @param activation TODO:
+#' @param dropout TODO:
+#' @param loss TODO:
+#' @param optimizer TODO:
+#' @param metrics TODO:
+#' @param trainable_reduction TODO:
+#'
+#' @import tensorflow
+#' @keywords internal
+build.MLP <- function(input_shapes, d, name, add_reduction,
+    output_shape        = 1L,
+    hidden_units        = 512L,
+    activation          = 'relu',
+    dropout             = 0.4,
+    loss                = 'MSE',
+    optimizer           = 'RMSProp',
+    metrics             = NULL,
+    trainable_reduction = TRUE
+) {
+    K <- tf$keras
+
+    inputs <- Map(K$layers$Input,
+        shape = as.integer(input_shapes), # drops names (concatenate key error)
+        name = if (is.null(names(input_shapes))) "" else names(input_shapes)
+    )
+
+    mlp_inputs <- if (add_reduction) {
+        reduction <- K$layers$Dense(
+            units = d,
+            use_bias = FALSE,
+            kernel_constraint = function(w) { # polar projection
+                lhs <- tf$linalg$sqrtm(tf$matmul(w, w, transpose_a = TRUE))
+                tf$transpose(tf$linalg$solve(lhs, tf$transpose(w)))
+            },
+            trainable = trainable_reduction,
+            name = 'reduction'
+        )(inputs[[1]])
+
+        c(reduction, inputs[-1])
+    } else {
+        inputs
+    }
+
+    out <- if (length(inputs) == 1) {
+        mlp_inputs[[1]]
+    } else {
+        K$layers$concatenate(mlp_inputs, axis = 1L, name = 'input_mlp')
+    }
+    for (i in seq_along(hidden_units)) {
+        out <- K$layers$Dense(units = hidden_units[i], activation = activation,
+                              name = paste0('hidden', i))(out)
+        if (dropout > 0)
+            out <- K$layers$Dropout(rate = dropout, name = paste0('dropout', i))(out)
+    }
+    out <- K$layers$Dense(units = output_shape, name = 'output')(out)
+
+    mlp <- K$models$Model(inputs = inputs, outputs = out, name = name)
+    mlp$compile(loss = loss, optimizer = optimizer, metrics = metrics)
+
+    mlp
+}
+
+#' Base Neuronal Network model class
+#'
+#' @examples
+#' model <- nnsdr$new(
+#'     input_shapes = list(x = 7L),
+#'     d = 2L, hidden_units = 128L
+#' )
+#'
+#' @import methods tensorflow
+#' @export nnsdr
+#' @exportClass nnsdr
+nnsdr <- setRefClass('nnsdr',
+    fields = list(
+        config = 'list',
+        nn.opg = 'ANY',
+        nn.ref = 'ANY',
+        history.opg = 'ANY',
+        history.ref = 'ANY',
+        B.opg = 'ANY',
+        B.ref = 'ANY',
+        history = function() {
+            if (is.null(.self$history.opg))
+                return(NULL)
+
+            history <- data.frame(
+                .self$history.opg,
+                model = factor('OPG'),
+                epoch = seq_len(nrow(.self$history.opg))
+            )
+
+            if (!is.null(.self$history.ref))
+                history <- rbind(history, data.frame(
+                    .self$history.ref,
+                    model = factor('Refinement'),
+                    epoch = seq_len(nrow(.self$history.ref))
+                ))
+
+            history
+        }
+    ),
+
+    methods = list(
+        initialize = function(input_shapes, d, output_shape = 1L, ...) {
+            # Set configuration.
+            .self$config <- c(list(
+                input_shapes = input_shapes,
+                d            = as.integer(d),
+                output_shape = output_shape
+            ), list(...))
+
+            # Build OPG (Step 1) and Refinement (Step 2) Neuronal Networks
+            .self$nn.opg <- do.call(build.MLP, c(.self$config, list(
+                name = 'OPG', add_reduction = FALSE
+            )))
+            .self$nn.ref <- do.call(build.MLP, c(.self$config, list(
+                name = 'Refinement', add_reduction = TRUE
+            )))
+
+            # Set initial history field values. If and only if the `history.*`
+            # fields are `NULL`, then the Nets are NOT trained.
+            .self$history.opg <- NULL
+            .self$history.ref <- NULL
+
+            # Set (not jet available) reduction estimates
+            .self$B.opg <- NULL
+            .self$B.ref <- NULL
+        },
+
+        fit = function(inputs, output, epochs = 1L, batch_size = 32L,
+            initializer = c('random', 'fromOPG'), ..., verbose = 0L
+        ) {
+            if (is.list(inputs)) {
+                inputs <- Map(tf$cast, as.list(inputs), dtype = 'float32')
+            } else {
+                inputs <- list(tf$cast(inputs, dtype = 'float32'))
+            }
+            initializer <- match.arg(initializer)
+
+            # Check for OPG history (Step 1), if available skip it.
+            if (is.null(.self$history.opg)) {
+                # Fit OPG Net and store training history.
+                hist <- .self$nn.opg$fit(inputs, output, ...,
+                    epochs = as.integer(head(epochs, 1)),
+                    batch_size = as.integer(head(batch_size, 1)),
+                    verbose = as.integer(verbose)
+                )
+                .self$history.opg <- as.data.frame(hist$history)
+            } else if (verbose > 0) {
+                cat("OPG already trained -> skip OPG training.\n")
+            }
+
+            # Compute OPG estimate of the Reduction matrix 'B'.
+            # Always compute, different inputs change the estimate.
+            with(tf$GradientTape() %as% tape, {
+                tape$watch(inputs[[1]])
+                out <- .self$nn.opg(inputs)
+            })
+            G <- as.matrix(tape$gradient(out, inputs[[1]]))
+            B <- eigen(var(G), symmetric = TRUE)$vectors
+            B <- B[, 1:.self$config$d, drop = FALSE]
+            .self$B.opg <- B
+
+            # Check for need to initialize the Refinement Net.
+            if (is.null(.self$history.ref)) {
+                # Set Reduction layer
+                .self$nn.ref$get_layer('reduction')$set_weights(list(B))
+
+                # Check initialization (for random keep random initialization)
+                if (initializer == 'fromOPG') {
+                    # Initialize Refinement Net weights from the OPG Net.
+                    W <- as.array(.self$nn.opg$get_layer('hidden1')$kernel)
+                    W <- rbind(
+                        t(B) %*% W[1:nrow(B), , drop = FALSE],
+                        W[-(1:nrow(B)), , drop = FALSE]
+                    )
+                    b <- as.array(.self$nn.opg$get_layer('hidden1')$bias)
+                    .self$nn.ref$get_layer('hidden1')$set_weights(list(W, b))
+                    # Get layer names with weights to be initialized from `nn.opg`
+                    # These are the output layer and all hidden layers except the first
+                    layer.names <- Filter(function(name) {
+                        if (name == 'output') {
+                            TRUE
+                        } else if (name == 'hidden1') {
+                            FALSE
+                        } else {
+                            startsWith(name, 'hidden')
+                        }
+                    }, lapply(.self$nn.opg$layers, `[[`, 'name'))
+                    # Copy `nn.opg` weights to `nn.ref`
+                    for (name in layer.names) {
+                        .self$nn.ref$get_layer(name)$set_weights(lapply(
+                            .self$nn.opg$get_layer(name)$weights, as.array
+                        ))
+                    }
+                }
+            } else if (verbose > 0) {
+                cat("Refinement Net already trained -> continue training.\n")
+            }
+
+            # Fit (or continue fitting) the Refinement Net.
+            hist <- .self$nn.ref$fit(inputs, output, ...,
+                epochs = as.integer(tail(epochs, 1)),
+                batch_size = as.integer(tail(batch_size, 1)),
+                verbose = as.integer(verbose)
+            )
+            .self$history.ref <- rbind(
+                .self$history.ref,
+                as.data.frame(hist$history)
+            )
+            # Extract refined reduction estimate
+            .self$B.ref <- .self$nn.ref$get_layer('reduction')$get_weights()[[1]]
+
+            invisible(NULL)
+        },
+        predict = function(inputs) {
+            # Issue warning if the Refinement model (Step 2) used for prediction
+            # is not trained.
+            if (is.null(.self$history.ref))
+                warning('Refinement model not trained.')
+
+            if (is.list(inputs)) {
+                inputs <- Map(tf$cast, as.list(inputs), dtype = 'float32')
+            } else {
+                inputs <- list(tf$cast(inputs, dtype = 'float32'))
+            }
+            output <- .self$nn.ref(inputs)
+
+            if (is.list(output)) {
+                if (length(output) == 1L) {
+                    as.array(output[[1]])
+                } else {
+                    Map(as.array, output)
+                }
+            } else {
+                as.array(output)
+            }
+        },
+        coef = function(type = c('Refinement', 'OPG')) {
+            type <- match.arg(type)
+            if (type == 'Refinement') {
+                .self$B.ref
+            } else {
+                .self$B.opg
+            }
+        },
+        reset = function(reset = c('both', 'Refinement')) {
+            reset <- match.arg(reset)
+            if (reset == 'both') {
+                reinitialize_weights(.self$nn.opg)
+                reset_optimizer(.self$nn.opg$optimizer)
+                .self$history.opg <- NULL
+                .self$B.opg <- NULL
+            }
+            reinitialize_weights(.self$nn.ref)
+            reset_optimizer(.self$nn.ref$optimizer)
+            .self$history.ref <- NULL
+            .self$B.ref <- NULL
+        },
+        summary = function() {
+            .self$nn.opg$summary()
+            cat('\n')
+            .self$nn.ref$summary()
+        }
+    )
+)
+nnsdr$lock('config')
diff --git a/NNSDR/R/parse_args.R b/NNSDR/R/parse_args.R
new file mode 100644
index 0000000..56c14d7
--- /dev/null
+++ b/NNSDR/R/parse_args.R
@@ -0,0 +1,35 @@
+
+#' Parses script arguments.
+#'
+#' @param defaults list of default parameters. Names of the provided defaults
+#'  define the allowed parameters.
+#' @param args Arguments to parge, if missing the Rscript params are taken.
+#'
+#' @return calling script arguments of `Rscript`
+#'
+#' @export
+parse.args <- function(defaults, args) {
+    if (missing(args))
+        args <- commandArgs(trailingOnly = TRUE)
+    if (length((args)) == 0)
+        return(defaults)
+
+    args <- strsplit(sub('--', '', args, fixed = TRUE), '=')
+    values <- Map(`[`, args, 2)
+    names(values) <- unlist(Map(`[`, args, 1))
+
+    if (!all(names(values) %in% names(defaults)))
+        stop('Found unknown script parameter')
+
+    for (i in seq_along(defaults)) {
+        name <- names(defaults)[i]
+
+        if (name %in% names(values)) {
+            value <- unlist(strsplit(values[[name]], ',', fixed = TRUE))
+            value <- eval(call(paste0('as.', typeof(defaults[[name]])), value))
+            defaults[[name]] <- value
+        }
+    }
+
+    defaults
+}
diff --git a/NNSDR/R/reinitialize_weights.R b/NNSDR/R/reinitialize_weights.R
new file mode 100644
index 0000000..2f8f4a2
--- /dev/null
+++ b/NNSDR/R/reinitialize_weights.R
@@ -0,0 +1,42 @@
+
+#' Re-initialize model weights.
+#'
+#' An in-place model re-initialization. Intended for simulations to avoid
+#' rebuilding the same model architecture for multiple simulation runs.
+#'
+#' @param model A `keras` model.
+#'
+#' @seealso https://github.com/keras-team/keras/issues/341
+#' @examples
+#' # library(tensorflow) # v2
+#' K <- tf$keras
+#' model <- K$models$Sequential(list(
+#'     K$layers$Dense(units = 7L, input_shape = list(3L)),
+#'     K$layers$Dense(units = 1L)
+#' ))
+#' model$compile(loss = 'MSE', optimizer = K$optimizers$RMSprop())
+#'
+#' model$weights
+#' reinitialize_weights(model)
+#' model$weights
+#'
+#' @export
+reinitialize_weights <- function(model) {
+    for (layer in model$layers) {
+        # Unwrap wrapped layers.
+        if (any(endsWith(class(layer), 'Wrapper')))
+            layer <- layer$layer
+        # Re-initialize kernel and bias weight variables.
+        for (var in layer$weights) {
+            if (any(grep('/recurrent_kernel:', var$name, fixed = TRUE))) {
+                var$assign(layer$recurrent_initializer(var$shape, var$dtype))
+            } else if (any(grep('/kernel:', var$name, fixed = TRUE))) {
+                var$assign(layer$kernel_initializer(var$shape, var$dtype))
+            } else if (any(grep('/bias:', var$name, fixed = TRUE))) {
+                var$assign(layer$bias_initializer(var$shape, var$dtype))
+            } else {
+                stop("Unknown initialization for variable ", var$name)
+            }
+        }
+    }
+}
diff --git a/NNSDR/R/reset_optimizer.R b/NNSDR/R/reset_optimizer.R
new file mode 100644
index 0000000..fb6e698
--- /dev/null
+++ b/NNSDR/R/reset_optimizer.R
@@ -0,0 +1,39 @@
+
+#' Reset TensorFlow optimizer.
+#'
+#' @param optimizer a \pkg{tensorflow} optimizer instance
+#'
+#' @examples
+#' # Create example toy data
+#'
+#' # library(tensorflow) # v2
+#' K <- tf$keras
+#' model <- K$models$Sequential(list(
+#'     K$layers$Dense(units = 7L, input_shape = list(3L)),
+#'     K$layers$Dense(units = 1L)
+#' ))
+#' model$compile(loss = 'MSE', optimizer = 'RMSprop')
+#'
+#' \donttest{
+#' model$fit(input) # Fit the model
+#' }
+#'
+#' reinitialize_weights(model)
+#' reset_optimizer(model$optimizer)
+#'
+#' \donttest{
+#' model$fit(input) # Fit the model again completely independent of the first fit.
+#' }
+#'
+#' @note Works for Adam, RMSprop properly (other optimizes are not tested!)
+#' @note see source and search for `_create_slots` and `add_slot`.
+#' @seealso https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/optimizer_v2/optimizer_v2.py
+#' @seealso https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/optimizer_v2/rmsprop.py
+#' @seealso https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/optimizer_v2/adam.py
+#'
+#' @export
+reset_optimizer <- function(optimizer) {
+    for (var in optimizer$variables()) {
+        var$assign(tf$zeros_like(var))
+    }
+}
diff --git a/NNSDR/R/summary_nnsdr.R b/NNSDR/R/summary_nnsdr.R
new file mode 100644
index 0000000..5e65e20
--- /dev/null
+++ b/NNSDR/R/summary_nnsdr.R
@@ -0,0 +1,12 @@
+#' Summary for the OPG and Refinement model of an nnsdr class instance
+#' 
+#' @param object nnsdr class instance
+#' @param ... ignored.
+#' 
+#' @return No return value, prints human readable summary.
+#'
+#' @method summary nnsdr
+#' @export
+summary.nnsdr <- function(object, ...) {
+    object$summary()
+}