Compare commits
3 Commits
36d0d26418
...
0214823794
Author | SHA1 | Date |
---|---|---|
Daniel Kapla | 0214823794 | |
Daniel Kapla | 2c5a87ccfe | |
Daniel Kapla | 6e4f3d30a7 |
|
@ -0,0 +1,27 @@
|
||||||
|
|
||||||
|
LaTeX/
|
||||||
|
|
||||||
|
**/data/*
|
||||||
|
**/results/*
|
||||||
|
|
||||||
|
.vscode/
|
||||||
|
|
||||||
|
# Generated man files in R package (auto generated by Roxygen)
|
||||||
|
NNSDR/man/
|
||||||
|
|
||||||
|
# Large Files / Data Files
|
||||||
|
*.pdf
|
||||||
|
*.png
|
||||||
|
*.csv
|
||||||
|
*.Rdata
|
||||||
|
*.zip
|
||||||
|
*.tar.gz
|
||||||
|
*.tar.xz
|
||||||
|
*.BAK
|
||||||
|
|
||||||
|
# LaTeX - build/database/... files
|
||||||
|
*.log
|
||||||
|
*.aux
|
||||||
|
*.bbl
|
||||||
|
*.blg
|
||||||
|
*.out
|
|
@ -0,0 +1,13 @@
|
||||||
|
Package: NNSDR
|
||||||
|
Type: Package
|
||||||
|
Title: Fusing Neuronal Networks with Sufficient Dimension Reduction
|
||||||
|
Version: 0.1
|
||||||
|
Date: 2021-04-19
|
||||||
|
Author: Daniel Kapla [aut]
|
||||||
|
Maintainer: Daniel Kapla <daniel@kapla.at>
|
||||||
|
Description: Compinint the Outer Product of Gradients (OPG) with Neuronal Networks
|
||||||
|
for estimation of the mean subspace.
|
||||||
|
License: GPL-3
|
||||||
|
Depends: methods, tensorflow (>= 2.2.0)
|
||||||
|
Encoding: UTF-8
|
||||||
|
RoxygenNote: 7.0.2
|
|
@ -0,0 +1,18 @@
|
||||||
|
# Generated by roxygen2: do not edit by hand
|
||||||
|
|
||||||
|
S3method(coef,nnsdr)
|
||||||
|
S3method(summary,nnsdr)
|
||||||
|
export(dataset)
|
||||||
|
export(dist.grassmann)
|
||||||
|
export(dist.subspace)
|
||||||
|
export(get.script)
|
||||||
|
export(nnsdr)
|
||||||
|
export(parse.args)
|
||||||
|
export(reinitialize_weights)
|
||||||
|
export(reset_optimizer)
|
||||||
|
exportClasses(nnsdr)
|
||||||
|
import(methods)
|
||||||
|
import(stats)
|
||||||
|
import(tensorflow)
|
||||||
|
importFrom(stats,rbinom)
|
||||||
|
importFrom(stats,rnorm)
|
|
@ -0,0 +1,8 @@
|
||||||
|
#' Mean Subspace estimation with Neural Nets
|
||||||
|
#'
|
||||||
|
#' Package for simulations using Neural Nets for Mean Subspace estimation.
|
||||||
|
#'
|
||||||
|
#' @author Daniel Kapla
|
||||||
|
#'
|
||||||
|
#' @docType package
|
||||||
|
"_PACKAGE"
|
|
@ -0,0 +1,18 @@
|
||||||
|
#' Extracts the OPG or refined reduction coefficients from an nnsdr class instance
|
||||||
|
#'
|
||||||
|
#' @param object nnsdr class instance
|
||||||
|
#' @param type specifies if the OPG or Refinement estimate is requested.
|
||||||
|
#' One of `Refinement` or `OPG`, default is `Refinement`.
|
||||||
|
#' @param ... ignored.
|
||||||
|
#'
|
||||||
|
#' @return Matrix
|
||||||
|
#'
|
||||||
|
#' @method coef nnsdr
|
||||||
|
#' @export
|
||||||
|
coef.nnsdr <- function(object, type, ...) {
|
||||||
|
if (missing(type)) {
|
||||||
|
object$coef()
|
||||||
|
} else {
|
||||||
|
object$coef(type)
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,263 @@
|
||||||
|
#' Multivariate Normal Distribution.
|
||||||
|
#'
|
||||||
|
#' Random generation for the multivariate normal distribution.
|
||||||
|
#' \deqn{X \sim N_p(\mu, \Sigma)}{X ~ N_p(\mu, \Sigma)}
|
||||||
|
#'
|
||||||
|
#' @param n number of samples.
|
||||||
|
#' @param mu mean
|
||||||
|
#' @param sigma covariance matrix.
|
||||||
|
#'
|
||||||
|
#' @return a \eqn{n\times p}{n x p} matrix with samples in its rows.
|
||||||
|
#'
|
||||||
|
#' @examples
|
||||||
|
#' NNSDR:::rmvnorm(20, sigma = matrix(c(2, 1, 1, 2), 2))
|
||||||
|
#' NNSDR:::rmvnorm(20, mu = c(3, -1, 2))
|
||||||
|
#'
|
||||||
|
#' @keywords internal
|
||||||
|
rmvnorm <- function(n = 1, mu = rep(0, p), sigma = diag(p)) {
|
||||||
|
if (!missing(sigma)) {
|
||||||
|
p <- nrow(sigma)
|
||||||
|
} else if (!missing(mu)) {
|
||||||
|
mu <- matrix(mu, ncol = 1)
|
||||||
|
p <- nrow(mu)
|
||||||
|
} else {
|
||||||
|
stop("At least one of 'mu' or 'sigma' must be supplied.")
|
||||||
|
}
|
||||||
|
|
||||||
|
return(rep(mu, each = n) + matrix(rnorm(n * p), n) %*% chol(sigma))
|
||||||
|
}
|
||||||
|
|
||||||
|
#' Multivariate t Distribution.
|
||||||
|
#'
|
||||||
|
#' Random generation from multivariate t distribution (student distribution).
|
||||||
|
#'
|
||||||
|
#' @param n number of samples.
|
||||||
|
#' @param mu mean
|
||||||
|
#' @param sigma a \eqn{k\times k}{k x k} positive definite matrix. If the degree
|
||||||
|
#' \eqn{\nu} if bigger than 2 the created covariance is
|
||||||
|
#' \deqn{var(x) = \Sigma\frac{\nu}{\nu - 2}}
|
||||||
|
#' for \eqn{\nu > 2}.
|
||||||
|
#' @param df degree of freedom \eqn{\nu}.
|
||||||
|
#'
|
||||||
|
#' @return a \eqn{n\times p}{n x p} matrix with samples in its rows.
|
||||||
|
#'
|
||||||
|
#' @examples
|
||||||
|
#' NNSDR:::rmvt(20, c(0, 1), matrix(c(3, 1, 1, 2), 2), 3)
|
||||||
|
#' NNSDR:::rmvt(20, sigma = matrix(c(2, 1, 1, 2), 2), df = 3)
|
||||||
|
#' NNSDR:::rmvt(20, mu = c(3, -1, 2), df = 3)
|
||||||
|
#'
|
||||||
|
#' @keywords internal
|
||||||
|
rmvt <- function(n = 1, mu = rep(0, p), sigma = diag(p), df = Inf) {
|
||||||
|
if (!missing(sigma)) {
|
||||||
|
p <- nrow(sigma)
|
||||||
|
} else if (!missing(mu)) {
|
||||||
|
mu <- matrix(mu, ncol = 1)
|
||||||
|
p <- nrow(mu)
|
||||||
|
} else {
|
||||||
|
stop("At least one of 'mu' or 'sigma' must be supplied.")
|
||||||
|
}
|
||||||
|
|
||||||
|
if (df == Inf) {
|
||||||
|
Z <- 1
|
||||||
|
} else {
|
||||||
|
Z <- sqrt(df / rchisq(n, df))
|
||||||
|
}
|
||||||
|
|
||||||
|
return(rmvnorm(n, sigma = sigma) * Z + rep(mu, each = n))
|
||||||
|
}
|
||||||
|
|
||||||
|
#' Generalized Normal Distribution.
|
||||||
|
#'
|
||||||
|
#' Random generation for generalized Normal Distribution.
|
||||||
|
#'
|
||||||
|
#' @param n Number of generated samples.
|
||||||
|
#' @param mu mean.
|
||||||
|
#' @param alpha first shape parameter.
|
||||||
|
#' @param beta second shape parameter.
|
||||||
|
#'
|
||||||
|
#' @return numeric array of length \eqn{n}.
|
||||||
|
#'
|
||||||
|
#' @keywords internal
|
||||||
|
rgnorm <- function(n = 1, mu = 0, alpha = 1, beta = 1) {
|
||||||
|
if (alpha <= 0 | beta <= 0) {
|
||||||
|
stop("alpha and beta must be positive.")
|
||||||
|
}
|
||||||
|
lambda <- (1 / alpha)^beta
|
||||||
|
scales <- qgamma(runif(n), shape = 1 / beta, scale = 1 / lambda)^(1 / beta)
|
||||||
|
return(scales * ((-1)^rbinom(n, 1, 0.5)) + mu)
|
||||||
|
}
|
||||||
|
|
||||||
|
#' Laplace distribution
|
||||||
|
#'
|
||||||
|
#' Random generation for Laplace distribution.
|
||||||
|
#'
|
||||||
|
#' @param n Number of generated samples.
|
||||||
|
#' @param mu mean.
|
||||||
|
#' @param sd standard deviation.
|
||||||
|
#'
|
||||||
|
#' @return numeric array of length \eqn{n}.
|
||||||
|
#'
|
||||||
|
#' @keywords internal
|
||||||
|
rlaplace <- function(n = 1, mu = 0, sd = 1) {
|
||||||
|
U <- runif(n, -0.5, 0.5)
|
||||||
|
scale <- sd / sqrt(2)
|
||||||
|
|
||||||
|
return(mu - scale * sign(U) * log(1 - 2 * abs(U)))
|
||||||
|
}
|
||||||
|
|
||||||
|
#' Generates test datasets.
|
||||||
|
#'
|
||||||
|
#' Provides sample datasets.
|
||||||
|
#' The general model is given by:
|
||||||
|
#' \deqn{Y = g(B'X) + \epsilon}
|
||||||
|
#'
|
||||||
|
#' @param name One of \code{"M1"}, ..., \code{"M9"}.
|
||||||
|
#' Alternative just the dataset number 1-9.
|
||||||
|
#' @param n number of samples.
|
||||||
|
#' @param p Dimension of random variable \eqn{X}.
|
||||||
|
#' @param sd standard diviation for error term \eqn{\epsilon}.
|
||||||
|
#' @param ... Additional parameters only for "M2" (namely \code{pmix} and
|
||||||
|
#' \code{lambda}), see: below.
|
||||||
|
#'
|
||||||
|
#' @return List with elements
|
||||||
|
#' \itemize{
|
||||||
|
#' \item{X}{data, a \eqn{n\times p}{n x p} matrix.}
|
||||||
|
#' \item{Y}{response.}
|
||||||
|
#' \item{B}{the dim-reduction matrix}
|
||||||
|
#' \item{name}{Name of the dataset (name parameter)}
|
||||||
|
#' }
|
||||||
|
#'
|
||||||
|
#' @section M1:
|
||||||
|
#' The predictors are distributed as
|
||||||
|
#' \eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, \Sigma)} with
|
||||||
|
#' \eqn{\Sigma_{i, j} = 0.5^{|i - j|}}{\Sigma_ij = 0.5^|i - j|} for
|
||||||
|
#' \eqn{i, j = 1,..., p} for a subspace dimension of \eqn{k = 1} with a default
|
||||||
|
#' of \eqn{n = 100} data points. \eqn{p = 20},
|
||||||
|
#' \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)}, and \eqn{Y} is
|
||||||
|
#' given as \deqn{Y = cos(b_1'X) + \epsilon} where \eqn{\epsilon} is
|
||||||
|
#' distributed as generalized normal distribution with location 0,
|
||||||
|
#' shape-parameter 0.5, and the scale-parameter is chosen such that
|
||||||
|
#' \eqn{Var(\epsilon) = 0.5}.
|
||||||
|
#' @section M2:
|
||||||
|
#' The predictors are distributed as \eqn{X \sim Z 1_p \lambda + N_p(0, I_p)}{X ~ Z 1_p \lambda + N_p(0, I_p)}. with
|
||||||
|
#' \eqn{Z \sim 2 Binom(p_{mix}) - 1\in\{-1, 1\}}{Z~2Binom(pmix)-1} where
|
||||||
|
#' \eqn{1_p} is the \eqn{p}-dimensional vector of one's, for a subspace
|
||||||
|
#' dimension of \eqn{k = 1} with a default of \eqn{n = 100} data points.
|
||||||
|
#' \eqn{p = 20}, \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)},
|
||||||
|
#' and \eqn{Y} is \deqn{Y = cos(b_1'X) + 0.5\epsilon} where \eqn{\epsilon} is
|
||||||
|
#' standard normal.
|
||||||
|
#' Defaults for \code{pmix} is 0.3 and \code{lambda} defaults to 1.
|
||||||
|
#' @section M3:
|
||||||
|
#' The predictors are distributed as \eqn{X\sim N_p(0, I_p)}{X~N_p(0, I_p)}
|
||||||
|
#' for a subspace
|
||||||
|
#' dimension of \eqn{k = 1} with a default of \eqn{n = 100} data points.
|
||||||
|
#' \eqn{p = 20}, \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)},
|
||||||
|
#' and \eqn{Y} is
|
||||||
|
#' \deqn{Y = 2 log(|b_1'X| + 2) + 0.5\epsilon} where \eqn{\epsilon} is
|
||||||
|
#' standard normal.
|
||||||
|
#' @section M4:
|
||||||
|
#' The predictors are distributed as \eqn{X\sim N_p(0,\Sigma)}{X~N_p(0,\Sigma)}
|
||||||
|
#' with \eqn{\Sigma_{i, j} = 0.5^{|i - j|}}{\Sigma_ij = 0.5^|i - j|} for
|
||||||
|
#' \eqn{i, j = 1,..., p} for a subspace dimension of \eqn{k = 2} with a default
|
||||||
|
#' of \eqn{n = 100} data points. \eqn{p = 20},
|
||||||
|
#' \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)},
|
||||||
|
#' \eqn{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / sqrt(6)}
|
||||||
|
#' and \eqn{Y} is given as \deqn{Y = \frac{b_1'X}{0.5 + (1.5 + b_2'X)^2} + 0.5\epsilon}{Y = (b_1'X) / (0.5 + (1.5 + b_2'X)^2) + 0.5\epsilon}
|
||||||
|
#' where \eqn{\epsilon} is standard normal.
|
||||||
|
#' @section M5:
|
||||||
|
#' The predictors are distributed as \eqn{X\sim U([0,1]^p)}{X~U([0, 1]^p)}
|
||||||
|
#' where \eqn{U([0, 1]^p)} is the uniform distribution with
|
||||||
|
#' independent components on the \eqn{p}-dimensional hypercube for a subspace
|
||||||
|
#' dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points.
|
||||||
|
#' \eqn{p = 20},
|
||||||
|
#' \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)},
|
||||||
|
#' \eqn{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / sqrt(6)}
|
||||||
|
#' and \eqn{Y} is given as \deqn{Y = cos(\pi b_1'X)(b_2'X + 1)^2 + 0.5\epsilon}
|
||||||
|
#' where \eqn{\epsilon} is standard normal.
|
||||||
|
#' @section M6:
|
||||||
|
#' The predictors are distributed as \eqn{X\sim N_p(0, I_p)}{X~N_p(0, I_p)}
|
||||||
|
#' for a subspace dimension of \eqn{k = 3} with a default of \eqn{n = 200} data
|
||||||
|
#' point. \eqn{p = 20, b_1 = e_1, b_2 = e_2}, and \eqn{b_3 = e_p}, where
|
||||||
|
#' \eqn{e_j} is the \eqn{j}-th unit vector in the \eqn{p}-dimensional space.
|
||||||
|
#' \eqn{Y} is given as \deqn{Y = (b_1'X)^2+(b_2'X)^2+(b_3'X)^2+0.5\epsilon}
|
||||||
|
#' where \eqn{\epsilon} is standard normal.
|
||||||
|
#' @section M7: see "Local Linear Forests" <arXiv:1807.11408>
|
||||||
|
#'
|
||||||
|
#' @import stats
|
||||||
|
#' @importFrom stats rnorm rbinom
|
||||||
|
#' @export
|
||||||
|
dataset <- function(name = "M1", n = NULL, p = 20, sd = 0.5, ...) {
|
||||||
|
name <- toupper(name)
|
||||||
|
if (nchar(name) == 1) { name <- paste0("M", name) }
|
||||||
|
|
||||||
|
if (name == "M1") {
|
||||||
|
if (missing(n)) { n <- 100 }
|
||||||
|
# B ... `p x 1`
|
||||||
|
B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, p - 6)), ncol = 1)
|
||||||
|
X <- rmvnorm(n, sigma = 0.5^abs(outer(1:p, 1:p, FUN = `-`)))
|
||||||
|
beta <- 0.5
|
||||||
|
Y <- cos(X %*% B) + rgnorm(n, 0,
|
||||||
|
alpha = sqrt(sd^2 * gamma(1 / beta) / gamma(3 / beta)),
|
||||||
|
beta = beta
|
||||||
|
)
|
||||||
|
} else if (name == "M2") {
|
||||||
|
if (missing(n)) { n <- 100 }
|
||||||
|
params <- list(...)
|
||||||
|
pmix <- if (is.null(params$pmix)) { 0.3 } else { params$pmix }
|
||||||
|
lambda <- if (is.null(params$lambda)) { 1 } else { params$lambda }
|
||||||
|
# B ... `p x 1`
|
||||||
|
B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, p - 6)), ncol = 1)
|
||||||
|
Z <- 2 * rbinom(n, 1, pmix) - 1
|
||||||
|
X <- matrix(rep(lambda * Z, p) + rnorm(n * p), n)
|
||||||
|
Y <- cos(X %*% B) + rnorm(n, 0, sd)
|
||||||
|
} else if (name == "M3") {
|
||||||
|
if (missing(n)) { n <- 100 }
|
||||||
|
# B ... `p x 1`
|
||||||
|
B <- matrix(c(rep(1 / sqrt(6), 6), rep(0, p - 6)), ncol = 1)
|
||||||
|
X <- matrix(rnorm(n * p), n)
|
||||||
|
Y <- 2 * log(2 + abs(X %*% B)) + rnorm(n, 0, sd)
|
||||||
|
} else if (name == "M4") {
|
||||||
|
if (missing(n)) { n <- 200 }
|
||||||
|
# B ... `p x 2`
|
||||||
|
B <- cbind(
|
||||||
|
c(rep(1 / sqrt(6), 6), rep(0, p - 6)),
|
||||||
|
c(rep(c(1, -1), 3) / sqrt(6), rep(0, p - 6))
|
||||||
|
)
|
||||||
|
X <- rmvnorm(n, sigma = 0.5^abs(outer(1:p, 1:p, FUN = `-`)))
|
||||||
|
XB <- X %*% B
|
||||||
|
Y <- (XB[, 1]) / (0.5 + (XB[, 2] + 1.5)^2) + rnorm(n, 0, sd)
|
||||||
|
} else if (name == "M5") {
|
||||||
|
if (missing(n)) { n <- 200 }
|
||||||
|
# B ... `p x 2`
|
||||||
|
B <- cbind(
|
||||||
|
c(rep(1, 6), rep(0, p - 6)),
|
||||||
|
c(rep(c(1, -1), 3), rep(0, p - 6))
|
||||||
|
) / sqrt(6)
|
||||||
|
X <- matrix(runif(n * p), n)
|
||||||
|
XB <- X %*% B
|
||||||
|
Y <- cos(XB[, 1] * pi) * (XB[, 2] + 1)^2 + rnorm(n, 0, sd)
|
||||||
|
} else if (name == "M6") {
|
||||||
|
if (missing(n)) { n <- 200 }
|
||||||
|
# B ... `p x 3`
|
||||||
|
B <- diag(p)[, -(3:(p - 1))]
|
||||||
|
X <- matrix(rnorm(n * p), n)
|
||||||
|
Y <- rowSums((X %*% B)^2) + rnorm(n, 0, sd)
|
||||||
|
} else if (name == "M7") {
|
||||||
|
# see: "Local Linear Forests" <arXiv:1807.11408>
|
||||||
|
if (missing(n)) { n <- 600 }
|
||||||
|
if (missing(p)) { p <- 20 } # 10 and 50 in "Local Linear Forests"
|
||||||
|
if (missing(sd)) { sd <- 5 } # or 20
|
||||||
|
|
||||||
|
B <- diag(1, p, 4)
|
||||||
|
B[, 4] <- c(0, 0, 0, 2, 1, rep(0, p - 5))
|
||||||
|
|
||||||
|
X <- matrix(runif(n * p), n, p)
|
||||||
|
XB <- X %*% B
|
||||||
|
Y <- 10 * sin(pi * XB[, 1] * XB[, 2]) + 20 * (XB[, 3] - 0.5)^2 + 5 * XB[, 4] + rnorm(n, sd = sd)
|
||||||
|
Y <- as.matrix(Y)
|
||||||
|
} else {
|
||||||
|
stop("Got unknown dataset name.")
|
||||||
|
}
|
||||||
|
|
||||||
|
return(list(X = X, Y = as.matrix(Y), B = B, name = name))
|
||||||
|
}
|
|
@ -0,0 +1,24 @@
|
||||||
|
#' Grassmann distance
|
||||||
|
#'
|
||||||
|
#' @param A,B Basis matrices as representations of elements of the Grassmann
|
||||||
|
#' manifold.
|
||||||
|
#' @param is.ortho Boolean to specify if `A` and `B` are semi-orthogonal (if
|
||||||
|
#' false, both arguments are `qr` decomposed)
|
||||||
|
#' @param tol passed to `qr`, ignored if `is.ortho` is `true`.
|
||||||
|
#'
|
||||||
|
#' @seealso
|
||||||
|
#' K. Ye and L.-H. Lim (2016) "Schubert varieties and distances between
|
||||||
|
#' subspaces of different dimensions" <arXiv:1407.0900>
|
||||||
|
#'
|
||||||
|
#' @export
|
||||||
|
dist.grassmann <- function(A, B, is.ortho = FALSE, tol = 1e-7) {
|
||||||
|
if (!is.ortho) {
|
||||||
|
A <- qr.Q(qr(A, tol))
|
||||||
|
B <- qr.Q(qr(B, tol))
|
||||||
|
} else {
|
||||||
|
A <- as.matrix(A)
|
||||||
|
B <- as.matrix(B)
|
||||||
|
}
|
||||||
|
|
||||||
|
sqrt(sum(acos(pmin(La.svd(crossprod(A, B), 0L, 0L)$d, 1))^2))
|
||||||
|
}
|
|
@ -0,0 +1,37 @@
|
||||||
|
#' Subspace distance
|
||||||
|
#'
|
||||||
|
#' @param A,B Basis matrices as representations of elements of the Grassmann
|
||||||
|
#' manifold.
|
||||||
|
#' @param is.ortho Boolean to specify if `A` and `B` are semi-orthogonal. If
|
||||||
|
#' false, the projection matrices are computed as
|
||||||
|
#' \deqn{P_A = A (A' A)^{-1} A'}
|
||||||
|
#' otherwise just \eqn{P_A = A A'} since \eqn{A' A} is the identity.
|
||||||
|
#' @param normalize Boolean to specify if the distance shall be normalized.
|
||||||
|
#' Meaning, the maximal distance scaled to be \eqn{1} independent of dimensions.
|
||||||
|
#'
|
||||||
|
#' @seealso
|
||||||
|
#' K. Ye and L.-H. Lim (2016) "Schubert varieties and distances between
|
||||||
|
#' subspaces of different dimensions" <arXiv:1407.0900>
|
||||||
|
#'
|
||||||
|
#' @export
|
||||||
|
dist.subspace <- function (A, B, is.ortho = FALSE, normalize = FALSE) {
|
||||||
|
if (!is.matrix(A)) A <- as.matrix(A)
|
||||||
|
if (!is.matrix(B)) B <- as.matrix(B)
|
||||||
|
|
||||||
|
if (is.ortho) {
|
||||||
|
PA <- tcrossprod(A, A)
|
||||||
|
PB <- tcrossprod(B, B)
|
||||||
|
} else {
|
||||||
|
PA <- A %*% solve(t(A) %*% A, t(A))
|
||||||
|
PB <- B %*% solve(t(B) %*% B, t(B))
|
||||||
|
}
|
||||||
|
|
||||||
|
if (normalize) {
|
||||||
|
rankSum <- ncol(A) + ncol(B)
|
||||||
|
c <- 1 / sqrt(min(rankSum, 2 * nrow(A) - rankSum))
|
||||||
|
} else {
|
||||||
|
c <- sqrt(2)
|
||||||
|
}
|
||||||
|
|
||||||
|
c * norm(PA - PB, type = "F")
|
||||||
|
}
|
|
@ -0,0 +1,12 @@
|
||||||
|
|
||||||
|
#' Gets the `Rscript` file name
|
||||||
|
#'
|
||||||
|
#' @note only relevant in scripts (useless in interactive `R` sessions)
|
||||||
|
#'
|
||||||
|
#' @return character array of the file name or empty
|
||||||
|
#'
|
||||||
|
#' @export
|
||||||
|
get.script <- function() {
|
||||||
|
args <- commandArgs()
|
||||||
|
sub('--file=', '', args[startsWith(args, '--file=')])
|
||||||
|
}
|
|
@ -0,0 +1,279 @@
|
||||||
|
Sys.setenv(TF_CPP_MIN_LOG_LEVEL = "3")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#' Build MLP
|
||||||
|
#'
|
||||||
|
#' @param input_shapes TODO:
|
||||||
|
#' @param d TODO:
|
||||||
|
#' @param name TODO:
|
||||||
|
#' @param add_reduction TODO:
|
||||||
|
#' @param hidden_units TODO:
|
||||||
|
#' @param activation TODO:
|
||||||
|
#' @param dropout TODO:
|
||||||
|
#' @param loss TODO:
|
||||||
|
#' @param optimizer TODO:
|
||||||
|
#' @param metrics TODO:
|
||||||
|
#' @param trainable_reduction TODO:
|
||||||
|
#'
|
||||||
|
#' @import tensorflow
|
||||||
|
#' @keywords internal
|
||||||
|
build.MLP <- function(input_shapes, d, name, add_reduction,
|
||||||
|
output_shape = 1L,
|
||||||
|
hidden_units = 512L,
|
||||||
|
activation = 'relu',
|
||||||
|
dropout = 0.4,
|
||||||
|
loss = 'MSE',
|
||||||
|
optimizer = 'RMSProp',
|
||||||
|
metrics = NULL,
|
||||||
|
trainable_reduction = TRUE
|
||||||
|
) {
|
||||||
|
K <- tf$keras
|
||||||
|
|
||||||
|
inputs <- Map(K$layers$Input,
|
||||||
|
shape = as.integer(input_shapes), # drops names (concatenate key error)
|
||||||
|
name = if (is.null(names(input_shapes))) "" else names(input_shapes)
|
||||||
|
)
|
||||||
|
|
||||||
|
mlp_inputs <- if (add_reduction) {
|
||||||
|
reduction <- K$layers$Dense(
|
||||||
|
units = d,
|
||||||
|
use_bias = FALSE,
|
||||||
|
kernel_constraint = function(w) { # polar projection
|
||||||
|
lhs <- tf$linalg$sqrtm(tf$matmul(w, w, transpose_a = TRUE))
|
||||||
|
tf$transpose(tf$linalg$solve(lhs, tf$transpose(w)))
|
||||||
|
},
|
||||||
|
trainable = trainable_reduction,
|
||||||
|
name = 'reduction'
|
||||||
|
)(inputs[[1]])
|
||||||
|
|
||||||
|
c(reduction, inputs[-1])
|
||||||
|
} else {
|
||||||
|
inputs
|
||||||
|
}
|
||||||
|
|
||||||
|
out <- if (length(inputs) == 1) {
|
||||||
|
mlp_inputs[[1]]
|
||||||
|
} else {
|
||||||
|
K$layers$concatenate(mlp_inputs, axis = 1L, name = 'input_mlp')
|
||||||
|
}
|
||||||
|
for (i in seq_along(hidden_units)) {
|
||||||
|
out <- K$layers$Dense(units = hidden_units[i], activation = activation,
|
||||||
|
name = paste0('hidden', i))(out)
|
||||||
|
if (dropout > 0)
|
||||||
|
out <- K$layers$Dropout(rate = dropout, name = paste0('dropout', i))(out)
|
||||||
|
}
|
||||||
|
out <- K$layers$Dense(units = output_shape, name = 'output')(out)
|
||||||
|
|
||||||
|
mlp <- K$models$Model(inputs = inputs, outputs = out, name = name)
|
||||||
|
mlp$compile(loss = loss, optimizer = optimizer, metrics = metrics)
|
||||||
|
|
||||||
|
mlp
|
||||||
|
}
|
||||||
|
|
||||||
|
#' Base Neuronal Network model class
|
||||||
|
#'
|
||||||
|
#' @examples
|
||||||
|
#' model <- nnsdr$new(
|
||||||
|
#' input_shapes = list(x = 7L),
|
||||||
|
#' d = 2L, hidden_units = 128L
|
||||||
|
#' )
|
||||||
|
#'
|
||||||
|
#' @import methods tensorflow
|
||||||
|
#' @export nnsdr
|
||||||
|
#' @exportClass nnsdr
|
||||||
|
nnsdr <- setRefClass('nnsdr',
|
||||||
|
fields = list(
|
||||||
|
config = 'list',
|
||||||
|
nn.opg = 'ANY',
|
||||||
|
nn.ref = 'ANY',
|
||||||
|
history.opg = 'ANY',
|
||||||
|
history.ref = 'ANY',
|
||||||
|
B.opg = 'ANY',
|
||||||
|
B.ref = 'ANY',
|
||||||
|
history = function() {
|
||||||
|
if (is.null(.self$history.opg))
|
||||||
|
return(NULL)
|
||||||
|
|
||||||
|
history <- data.frame(
|
||||||
|
.self$history.opg,
|
||||||
|
model = factor('OPG'),
|
||||||
|
epoch = seq_len(nrow(.self$history.opg))
|
||||||
|
)
|
||||||
|
|
||||||
|
if (!is.null(.self$history.ref))
|
||||||
|
history <- rbind(history, data.frame(
|
||||||
|
.self$history.ref,
|
||||||
|
model = factor('Refinement'),
|
||||||
|
epoch = seq_len(nrow(.self$history.ref))
|
||||||
|
))
|
||||||
|
|
||||||
|
history
|
||||||
|
}
|
||||||
|
),
|
||||||
|
|
||||||
|
methods = list(
|
||||||
|
initialize = function(input_shapes, d, output_shape = 1L, ...) {
|
||||||
|
# Set configuration.
|
||||||
|
.self$config <- c(list(
|
||||||
|
input_shapes = input_shapes,
|
||||||
|
d = as.integer(d),
|
||||||
|
output_shape = output_shape
|
||||||
|
), list(...))
|
||||||
|
|
||||||
|
# Build OPG (Step 1) and Refinement (Step 2) Neuronal Networks
|
||||||
|
.self$nn.opg <- do.call(build.MLP, c(.self$config, list(
|
||||||
|
name = 'OPG', add_reduction = FALSE
|
||||||
|
)))
|
||||||
|
.self$nn.ref <- do.call(build.MLP, c(.self$config, list(
|
||||||
|
name = 'Refinement', add_reduction = TRUE
|
||||||
|
)))
|
||||||
|
|
||||||
|
# Set initial history field values. If and only if the `history.*`
|
||||||
|
# fields are `NULL`, then the Nets are NOT trained.
|
||||||
|
.self$history.opg <- NULL
|
||||||
|
.self$history.ref <- NULL
|
||||||
|
|
||||||
|
# Set (not jet available) reduction estimates
|
||||||
|
.self$B.opg <- NULL
|
||||||
|
.self$B.ref <- NULL
|
||||||
|
},
|
||||||
|
|
||||||
|
fit = function(inputs, output, epochs = 1L, batch_size = 32L,
|
||||||
|
initializer = c('random', 'fromOPG'), ..., verbose = 0L
|
||||||
|
) {
|
||||||
|
if (is.list(inputs)) {
|
||||||
|
inputs <- Map(tf$cast, as.list(inputs), dtype = 'float32')
|
||||||
|
} else {
|
||||||
|
inputs <- list(tf$cast(inputs, dtype = 'float32'))
|
||||||
|
}
|
||||||
|
initializer <- match.arg(initializer)
|
||||||
|
|
||||||
|
# Check for OPG history (Step 1), if available skip it.
|
||||||
|
if (is.null(.self$history.opg)) {
|
||||||
|
# Fit OPG Net and store training history.
|
||||||
|
hist <- .self$nn.opg$fit(inputs, output, ...,
|
||||||
|
epochs = as.integer(head(epochs, 1)),
|
||||||
|
batch_size = as.integer(head(batch_size, 1)),
|
||||||
|
verbose = as.integer(verbose)
|
||||||
|
)
|
||||||
|
.self$history.opg <- as.data.frame(hist$history)
|
||||||
|
} else if (verbose > 0) {
|
||||||
|
cat("OPG already trained -> skip OPG training.\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Compute OPG estimate of the Reduction matrix 'B'.
|
||||||
|
# Always compute, different inputs change the estimate.
|
||||||
|
with(tf$GradientTape() %as% tape, {
|
||||||
|
tape$watch(inputs[[1]])
|
||||||
|
out <- .self$nn.opg(inputs)
|
||||||
|
})
|
||||||
|
G <- as.matrix(tape$gradient(out, inputs[[1]]))
|
||||||
|
B <- eigen(var(G), symmetric = TRUE)$vectors
|
||||||
|
B <- B[, 1:.self$config$d, drop = FALSE]
|
||||||
|
.self$B.opg <- B
|
||||||
|
|
||||||
|
# Check for need to initialize the Refinement Net.
|
||||||
|
if (is.null(.self$history.ref)) {
|
||||||
|
# Set Reduction layer
|
||||||
|
.self$nn.ref$get_layer('reduction')$set_weights(list(B))
|
||||||
|
|
||||||
|
# Check initialization (for random keep random initialization)
|
||||||
|
if (initializer == 'fromOPG') {
|
||||||
|
# Initialize Refinement Net weights from the OPG Net.
|
||||||
|
W <- as.array(.self$nn.opg$get_layer('hidden1')$kernel)
|
||||||
|
W <- rbind(
|
||||||
|
t(B) %*% W[1:nrow(B), , drop = FALSE],
|
||||||
|
W[-(1:nrow(B)), , drop = FALSE]
|
||||||
|
)
|
||||||
|
b <- as.array(.self$nn.opg$get_layer('hidden1')$bias)
|
||||||
|
.self$nn.ref$get_layer('hidden1')$set_weights(list(W, b))
|
||||||
|
# Get layer names with weights to be initialized from `nn.opg`
|
||||||
|
# These are the output layer and all hidden layers except the first
|
||||||
|
layer.names <- Filter(function(name) {
|
||||||
|
if (name == 'output') {
|
||||||
|
TRUE
|
||||||
|
} else if (name == 'hidden1') {
|
||||||
|
FALSE
|
||||||
|
} else {
|
||||||
|
startsWith(name, 'hidden')
|
||||||
|
}
|
||||||
|
}, lapply(.self$nn.opg$layers, `[[`, 'name'))
|
||||||
|
# Copy `nn.opg` weights to `nn.ref`
|
||||||
|
for (name in layer.names) {
|
||||||
|
.self$nn.ref$get_layer(name)$set_weights(lapply(
|
||||||
|
.self$nn.opg$get_layer(name)$weights, as.array
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (verbose > 0) {
|
||||||
|
cat("Refinement Net already trained -> continue training.\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Fit (or continue fitting) the Refinement Net.
|
||||||
|
hist <- .self$nn.ref$fit(inputs, output, ...,
|
||||||
|
epochs = as.integer(tail(epochs, 1)),
|
||||||
|
batch_size = as.integer(tail(batch_size, 1)),
|
||||||
|
verbose = as.integer(verbose)
|
||||||
|
)
|
||||||
|
.self$history.ref <- rbind(
|
||||||
|
.self$history.ref,
|
||||||
|
as.data.frame(hist$history)
|
||||||
|
)
|
||||||
|
# Extract refined reduction estimate
|
||||||
|
.self$B.ref <- .self$nn.ref$get_layer('reduction')$get_weights()[[1]]
|
||||||
|
|
||||||
|
invisible(NULL)
|
||||||
|
},
|
||||||
|
predict = function(inputs) {
|
||||||
|
# Issue warning if the Refinement model (Step 2) used for prediction
|
||||||
|
# is not trained.
|
||||||
|
if (is.null(.self$history.ref))
|
||||||
|
warning('Refinement model not trained.')
|
||||||
|
|
||||||
|
if (is.list(inputs)) {
|
||||||
|
inputs <- Map(tf$cast, as.list(inputs), dtype = 'float32')
|
||||||
|
} else {
|
||||||
|
inputs <- list(tf$cast(inputs, dtype = 'float32'))
|
||||||
|
}
|
||||||
|
output <- .self$nn.ref(inputs)
|
||||||
|
|
||||||
|
if (is.list(output)) {
|
||||||
|
if (length(output) == 1L) {
|
||||||
|
as.array(output[[1]])
|
||||||
|
} else {
|
||||||
|
Map(as.array, output)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
as.array(output)
|
||||||
|
}
|
||||||
|
},
|
||||||
|
coef = function(type = c('Refinement', 'OPG')) {
|
||||||
|
type <- match.arg(type)
|
||||||
|
if (type == 'Refinement') {
|
||||||
|
.self$B.ref
|
||||||
|
} else {
|
||||||
|
.self$B.opg
|
||||||
|
}
|
||||||
|
},
|
||||||
|
reset = function(reset = c('both', 'Refinement')) {
|
||||||
|
reset <- match.arg(reset)
|
||||||
|
if (reset == 'both') {
|
||||||
|
reinitialize_weights(.self$nn.opg)
|
||||||
|
reset_optimizer(.self$nn.opg$optimizer)
|
||||||
|
.self$history.opg <- NULL
|
||||||
|
.self$B.opg <- NULL
|
||||||
|
}
|
||||||
|
reinitialize_weights(.self$nn.ref)
|
||||||
|
reset_optimizer(.self$nn.ref$optimizer)
|
||||||
|
.self$history.ref <- NULL
|
||||||
|
.self$B.ref <- NULL
|
||||||
|
},
|
||||||
|
summary = function() {
|
||||||
|
.self$nn.opg$summary()
|
||||||
|
cat('\n')
|
||||||
|
.self$nn.ref$summary()
|
||||||
|
}
|
||||||
|
)
|
||||||
|
)
|
||||||
|
nnsdr$lock('config')
|
|
@ -0,0 +1,35 @@
|
||||||
|
|
||||||
|
#' Parses script arguments.
|
||||||
|
#'
|
||||||
|
#' @param defaults list of default parameters. Names of the provided defaults
|
||||||
|
#' define the allowed parameters.
|
||||||
|
#' @param args Arguments to parge, if missing the Rscript params are taken.
|
||||||
|
#'
|
||||||
|
#' @return calling script arguments of `Rscript`
|
||||||
|
#'
|
||||||
|
#' @export
|
||||||
|
parse.args <- function(defaults, args) {
|
||||||
|
if (missing(args))
|
||||||
|
args <- commandArgs(trailingOnly = TRUE)
|
||||||
|
if (length((args)) == 0)
|
||||||
|
return(defaults)
|
||||||
|
|
||||||
|
args <- strsplit(sub('--', '', args, fixed = TRUE), '=')
|
||||||
|
values <- Map(`[`, args, 2)
|
||||||
|
names(values) <- unlist(Map(`[`, args, 1))
|
||||||
|
|
||||||
|
if (!all(names(values) %in% names(defaults)))
|
||||||
|
stop('Found unknown script parameter')
|
||||||
|
|
||||||
|
for (i in seq_along(defaults)) {
|
||||||
|
name <- names(defaults)[i]
|
||||||
|
|
||||||
|
if (name %in% names(values)) {
|
||||||
|
value <- unlist(strsplit(values[[name]], ',', fixed = TRUE))
|
||||||
|
value <- eval(call(paste0('as.', typeof(defaults[[name]])), value))
|
||||||
|
defaults[[name]] <- value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defaults
|
||||||
|
}
|
|
@ -0,0 +1,42 @@
|
||||||
|
|
||||||
|
#' Re-initialize model weights.
|
||||||
|
#'
|
||||||
|
#' An in-place model re-initialization. Intended for simulations to avoid
|
||||||
|
#' rebuilding the same model architecture for multiple simulation runs.
|
||||||
|
#'
|
||||||
|
#' @param model A `keras` model.
|
||||||
|
#'
|
||||||
|
#' @seealso https://github.com/keras-team/keras/issues/341
|
||||||
|
#' @examples
|
||||||
|
#' # library(tensorflow) # v2
|
||||||
|
#' K <- tf$keras
|
||||||
|
#' model <- K$models$Sequential(list(
|
||||||
|
#' K$layers$Dense(units = 7L, input_shape = list(3L)),
|
||||||
|
#' K$layers$Dense(units = 1L)
|
||||||
|
#' ))
|
||||||
|
#' model$compile(loss = 'MSE', optimizer = K$optimizers$RMSprop())
|
||||||
|
#'
|
||||||
|
#' model$weights
|
||||||
|
#' reinitialize_weights(model)
|
||||||
|
#' model$weights
|
||||||
|
#'
|
||||||
|
#' @export
|
||||||
|
reinitialize_weights <- function(model) {
|
||||||
|
for (layer in model$layers) {
|
||||||
|
# Unwrap wrapped layers.
|
||||||
|
if (any(endsWith(class(layer), 'Wrapper')))
|
||||||
|
layer <- layer$layer
|
||||||
|
# Re-initialize kernel and bias weight variables.
|
||||||
|
for (var in layer$weights) {
|
||||||
|
if (any(grep('/recurrent_kernel:', var$name, fixed = TRUE))) {
|
||||||
|
var$assign(layer$recurrent_initializer(var$shape, var$dtype))
|
||||||
|
} else if (any(grep('/kernel:', var$name, fixed = TRUE))) {
|
||||||
|
var$assign(layer$kernel_initializer(var$shape, var$dtype))
|
||||||
|
} else if (any(grep('/bias:', var$name, fixed = TRUE))) {
|
||||||
|
var$assign(layer$bias_initializer(var$shape, var$dtype))
|
||||||
|
} else {
|
||||||
|
stop("Unknown initialization for variable ", var$name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,39 @@
|
||||||
|
|
||||||
|
#' Reset TensorFlow optimizer.
|
||||||
|
#'
|
||||||
|
#' @param optimizer a \pkg{tensorflow} optimizer instance
|
||||||
|
#'
|
||||||
|
#' @examples
|
||||||
|
#' # Create example toy data
|
||||||
|
#'
|
||||||
|
#' # library(tensorflow) # v2
|
||||||
|
#' K <- tf$keras
|
||||||
|
#' model <- K$models$Sequential(list(
|
||||||
|
#' K$layers$Dense(units = 7L, input_shape = list(3L)),
|
||||||
|
#' K$layers$Dense(units = 1L)
|
||||||
|
#' ))
|
||||||
|
#' model$compile(loss = 'MSE', optimizer = 'RMSprop')
|
||||||
|
#'
|
||||||
|
#' \donttest{
|
||||||
|
#' model$fit(input) # Fit the model
|
||||||
|
#' }
|
||||||
|
#'
|
||||||
|
#' reinitialize_weights(model)
|
||||||
|
#' reset_optimizer(model$optimizer)
|
||||||
|
#'
|
||||||
|
#' \donttest{
|
||||||
|
#' model$fit(input) # Fit the model again completely independent of the first fit.
|
||||||
|
#' }
|
||||||
|
#'
|
||||||
|
#' @note Works for Adam, RMSprop properly (other optimizes are not tested!)
|
||||||
|
#' @note see source and search for `_create_slots` and `add_slot`.
|
||||||
|
#' @seealso https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/optimizer_v2/optimizer_v2.py
|
||||||
|
#' @seealso https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/optimizer_v2/rmsprop.py
|
||||||
|
#' @seealso https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/optimizer_v2/adam.py
|
||||||
|
#'
|
||||||
|
#' @export
|
||||||
|
reset_optimizer <- function(optimizer) {
|
||||||
|
for (var in optimizer$variables()) {
|
||||||
|
var$assign(tf$zeros_like(var))
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,12 @@
|
||||||
|
#' Summary for the OPG and Refinement model of an nnsdr class instance
|
||||||
|
#'
|
||||||
|
#' @param object nnsdr class instance
|
||||||
|
#' @param ... ignored.
|
||||||
|
#'
|
||||||
|
#' @return No return value, prints human readable summary.
|
||||||
|
#'
|
||||||
|
#' @method summary nnsdr
|
||||||
|
#' @export
|
||||||
|
summary.nnsdr <- function(object, ...) {
|
||||||
|
object$summary()
|
||||||
|
}
|
|
@ -0,0 +1,79 @@
|
||||||
|
#!/usr/bin/env Rscript
|
||||||
|
## data source: https://archive.ics.uci.edu/ml/datasets/Beijing+Multi-Site+Air-Quality+Data
|
||||||
|
|
||||||
|
library(mda)
|
||||||
|
Sys.setenv(TF_CPP_MIN_LOG_LEVEL = "3") # Suppress `tensorflow` notes/warnings
|
||||||
|
suppressPackageStartupMessages({
|
||||||
|
library(NNSDR)
|
||||||
|
})
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
d <- 4L # reduction dimension
|
||||||
|
epochs = c(2L, 3L) # training epochs (OPG, Refinement)
|
||||||
|
|
||||||
|
## Loading one site of the "Beijing Air Quality" data set
|
||||||
|
files <- list.files('data/Beijing\ Multi\ Site\ Air\ Quality\ Data/',
|
||||||
|
pattern = '*.csv', full.names = TRUE)
|
||||||
|
ds <- na.omit(Reduce(rbind, lapply(files, read.csv)))
|
||||||
|
|
||||||
|
## Create model matrix with dummy variables for factors (One-Hot encoded) for
|
||||||
|
## regression of PM2.5 (and dropping PM10)
|
||||||
|
X <- model.matrix(~ year + month + day + hour + SO2 + NO2 + CO + O3 + TEMP +
|
||||||
|
PRES + DEWP + RAIN + wd + WSPM + station + 0, ds)
|
||||||
|
Y <- as.matrix(ds$PM2.5)
|
||||||
|
|
||||||
|
## Build Dimension Reduction Neuronal Network model (matching the data)
|
||||||
|
nn <- nnsdr$new(
|
||||||
|
input_shapes = list(x = ncol(X)),
|
||||||
|
d = d, # Reduction dimension
|
||||||
|
hidden_units = 512L,
|
||||||
|
activation = 'relu'
|
||||||
|
)
|
||||||
|
|
||||||
|
## Open simulation log file, write simulation configuration and header
|
||||||
|
log <- file(format(Sys.time(), "results/Beijing_Air_Quality.csv"), "w", blocking = FALSE)
|
||||||
|
cat('# d = ', d, '\n# epochs = ', epochs[1], ',', epochs[2], '\n',
|
||||||
|
'method,fold,mse,var(Y.test),time.user,time.system,time.elapsed\n',
|
||||||
|
sep = '', file = log, append = TRUE)
|
||||||
|
|
||||||
|
## K-Fold Cross Validation
|
||||||
|
K <- 10
|
||||||
|
for (i in 1:K) {
|
||||||
|
## Split into train/test sets
|
||||||
|
train <- (1:K) != i
|
||||||
|
X.train <- scale(X[train, ])
|
||||||
|
Y.train <- Y[train, , drop = FALSE]
|
||||||
|
X.test <- scale(X[!train, ], center = attr(X.train, 'scaled:center'),
|
||||||
|
scale = attr(X.train, 'scaled:scale'))
|
||||||
|
Y.test <- Y[!train, , drop = FALSE]
|
||||||
|
|
||||||
|
## Training
|
||||||
|
time <- system.time(nn$fit(X.train, Y.train, epochs = epochs, initializer = 'fromOPG'))
|
||||||
|
|
||||||
|
mse <- mean((nn$predict(X.test) - Y.test)^2)
|
||||||
|
cat('"nn.ref",', i, ',', mse, ',', c(var(Y.test)), ',',
|
||||||
|
time['user.self'], ',', time['sys.self'], ',', time['elapsed'], '\n',
|
||||||
|
sep = '', file = log, append = TRUE)
|
||||||
|
|
||||||
|
## Linear Model
|
||||||
|
time <- system.time(lm.mod <- lm(y ~ ., data.frame(X.train, y = Y.train)))
|
||||||
|
|
||||||
|
mse <- mean((predict(lm.mod, data.frame(X.test, y = Y.test)) - Y.test)^2)
|
||||||
|
cat('"lm",', i, ',', mse, ',', c(var(Y.test)), ',',
|
||||||
|
time['user.self'], ',', time['sys.self'], ',', time['elapsed'], '\n',
|
||||||
|
sep = '', file = log, append = TRUE)
|
||||||
|
|
||||||
|
## MARS
|
||||||
|
time <- system.time(mars.mod <- mars(X.train, Y.train))
|
||||||
|
|
||||||
|
mse <- mean((predict(mars.mod, X.test) - Y.test)^2)
|
||||||
|
cat('"mars",', i, ',', mse, ',', c(var(Y.test)), ',',
|
||||||
|
time['user.self'], ',', time['sys.self'], ',', time['elapsed'], '\n',
|
||||||
|
sep = '', file = log, append = TRUE)
|
||||||
|
|
||||||
|
## Reset model
|
||||||
|
nn$reset()
|
||||||
|
}
|
||||||
|
|
||||||
|
## Finished, close simulation log file
|
||||||
|
close(log)
|
|
@ -0,0 +1,70 @@
|
||||||
|
#!/usr/bin/env Rscript
|
||||||
|
|
||||||
|
library(MAVE)
|
||||||
|
library(CVarE)
|
||||||
|
library(mlbench)
|
||||||
|
Sys.setenv(TF_CPP_MIN_LOG_LEVEL = "3") # Suppress `tensorflow` notes/warnings
|
||||||
|
suppressPackageStartupMessages({
|
||||||
|
library(NNSDR)
|
||||||
|
})
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
d <- 1L
|
||||||
|
|
||||||
|
## Load Boston Housing data set
|
||||||
|
data('BostonHousing')
|
||||||
|
ds <- BostonHousing[, names(BostonHousing) != 'chas']
|
||||||
|
|
||||||
|
## Build Dimension Reduction Neuronal Network model (matching the data)
|
||||||
|
nn <- nnsdr$new(
|
||||||
|
input_shapes = list(x = ncol(ds) - 1L),
|
||||||
|
d = d, # Reduction dimension
|
||||||
|
hidden_units = 512L,
|
||||||
|
activation = 'relu'
|
||||||
|
)
|
||||||
|
|
||||||
|
## Open simulation log file, write simulation configuration and header
|
||||||
|
log <- file(format(Sys.time(), "results/BostonHousing.csv"), "w", blocking = FALSE)
|
||||||
|
cat('# d = ', d, '\n', 'method,fold,mse\n', sep = '', file = log, append = TRUE)
|
||||||
|
|
||||||
|
## K-Fold Cross Validation
|
||||||
|
K <- nrow(ds) # with K == nrow(ds) -> LOO CV
|
||||||
|
for (i in 1:K) {
|
||||||
|
ds.train <- ds[(1:K) != i, ]
|
||||||
|
ds.test <- ds[(1:K) == i, , drop = FALSE]
|
||||||
|
X.train <- as.matrix(ds.train[, names(ds) != 'medv'])
|
||||||
|
Y.train <- as.matrix(ds.train[, 'medv'])
|
||||||
|
X.test <- as.matrix(ds.test[, names(ds) != 'medv'])
|
||||||
|
Y.test <- as.matrix(ds.test[, 'medv'])
|
||||||
|
|
||||||
|
## Fit `DR` Neuronal Network model
|
||||||
|
nn$fit(X.train, Y.train, epochs = c(200L, 400L), initializer = 'fromOPG')
|
||||||
|
Y.pred <- nn$predict(X.test)
|
||||||
|
mse <- mean((Y.pred - Y.test)^2)
|
||||||
|
cat('"nn.ref",', i, ',', mse, '\n',
|
||||||
|
sep = '', file = log, append = TRUE)
|
||||||
|
|
||||||
|
## MAVE as reference
|
||||||
|
dr <- mave.compute(X.train, Y.train, method = 'meanMAVE', max.dim = d)
|
||||||
|
Y.pred <- predict(dr, X.test, d)
|
||||||
|
mse <- mean((Y.pred - Y.test)^2)
|
||||||
|
cat('"mave",', i, ',', mse, '\n',
|
||||||
|
sep = '', file = log, append = TRUE)
|
||||||
|
|
||||||
|
## and CVE
|
||||||
|
X.scaled <- scale(X.train)
|
||||||
|
dr <- cve.call(X.scaled, Y.train, k = d)
|
||||||
|
Y.pred <- predict(dr, scale(X.test,
|
||||||
|
scale = attr(X.scaled, 'scaled:scale'),
|
||||||
|
center = attr(X.scaled, 'scaled:center')),
|
||||||
|
k = d)
|
||||||
|
mse <- mean((Y.pred - Y.test)^2)
|
||||||
|
cat('"cve",', i, ',', mse, '\n',
|
||||||
|
sep = '', file = log, append = TRUE)
|
||||||
|
|
||||||
|
## Reset model
|
||||||
|
nn$reset()
|
||||||
|
}
|
||||||
|
|
||||||
|
## Finished, close simulation log file
|
||||||
|
close(log)
|
|
@ -0,0 +1,86 @@
|
||||||
|
#!/usr/bin/env Rscript
|
||||||
|
## data source: the `MAVE` R-package
|
||||||
|
|
||||||
|
library(MAVE)
|
||||||
|
library(CVarE)
|
||||||
|
Sys.setenv(TF_CPP_MIN_LOG_LEVEL = "3") # Suppress `tensorflow` notes/warnings
|
||||||
|
suppressPackageStartupMessages({
|
||||||
|
library(NNSDR)
|
||||||
|
})
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
d <- 1L # reduction dimension
|
||||||
|
epochs = c(50L, 100L) # training epochs
|
||||||
|
dropped <- c('id', 'date', 'zipcode') #, 'sqft_basement') # columns to be dropped
|
||||||
|
|
||||||
|
## Loading the "House Price in King Counte, USA" data set provided by MAVE
|
||||||
|
data('kc_house_data')
|
||||||
|
ds <- kc_house_data[, !(names(kc_house_data) %in% dropped)]
|
||||||
|
|
||||||
|
## Build Dimension Reduction Neuronal Network model (matching the data)
|
||||||
|
nn <- nnsdr$new(
|
||||||
|
input_shapes = list(x = ncol(ds) - 1L),
|
||||||
|
d = d, # Reduction dimension
|
||||||
|
hidden_units = 512L,
|
||||||
|
activation = 'relu'
|
||||||
|
)
|
||||||
|
|
||||||
|
## Open simulation log file, write simulation configuration and header
|
||||||
|
log <- file(format(Sys.time(), "results/kc_house_data.csv"), "w", blocking = FALSE)
|
||||||
|
cat('# d = ', d, '\n# epochs = ', epochs[1], ',', epochs[2], '\n',
|
||||||
|
'# dropped = ', paste(dropped, collapse = ', '), '\n',
|
||||||
|
'method,fold,mse,var(Y.test),time.user,time.system,time.elapsed\n',
|
||||||
|
sep = '', file = log, append = TRUE)
|
||||||
|
|
||||||
|
## K-Fold Cross Validation
|
||||||
|
K <- 10
|
||||||
|
for (i in 1:K) {
|
||||||
|
ds.train <- ds[(1:K) != i, ]
|
||||||
|
ds.test <- ds[(1:K) == i, , drop = FALSE]
|
||||||
|
X.train <- as.matrix(ds.train[, names(ds) != 'price'])
|
||||||
|
Y.train <- as.matrix(ds.train[, 'price'])
|
||||||
|
X.test <- as.matrix(ds.test[, names(ds) != 'price'])
|
||||||
|
Y.test <- as.matrix(ds.test[, 'price'])
|
||||||
|
|
||||||
|
## Fit `DR` Neuronal Network model
|
||||||
|
time <- system.time(nn$fit(X.train, Y.train, epochs = epochs, initializer = 'fromOPG'))
|
||||||
|
|
||||||
|
mse <- mean((nn$predict(X.test) - Y.test)^2)
|
||||||
|
cat('"nn.ref",', i, ',', mse, ',', c(var(Y.test)), ',',
|
||||||
|
time['user.self'], ',', time['sys.self'], ',', time['elapsed'], '\n',
|
||||||
|
sep = '', file = log, append = TRUE)
|
||||||
|
|
||||||
|
## `MAVE`
|
||||||
|
time <- system.time(dr <- mave.compute(X.train, Y.train, method = 'meanMAVE', max.dim = d))
|
||||||
|
|
||||||
|
# Sometimes the `mda` package fails -> predict with NA/NaN/Inf value error.
|
||||||
|
mse <- tryCatch(mean((predict(dr, X.test, d) - Y.test)^2),
|
||||||
|
error = function(err) NA)
|
||||||
|
cat('"mave",', i, ',', mse, ',', c(var(Y.test)), ',',
|
||||||
|
time['user.self'], ',', time['sys.self'], ',', time['elapsed'], '\n',
|
||||||
|
sep = '', file = log, append = TRUE)
|
||||||
|
|
||||||
|
# Current implementation requires too much memory (CVarE v1.1). Run on `VSC`.
|
||||||
|
# ## and CVE
|
||||||
|
# X.scaled <- scale(X.train)
|
||||||
|
# time <- system.time(dr <- cve.call(X.scaled, Y.train, k = d))
|
||||||
|
|
||||||
|
# # Might have the same problem as MAVE since we use `mda` as well.
|
||||||
|
# mse <- tryCatch({
|
||||||
|
# Y.pred <- predict(dr, scale(X.test,
|
||||||
|
# scale = attr(X.scaled, 'scaled:scale'),
|
||||||
|
# center = attr(X.scaled, 'scaled:center')),
|
||||||
|
# k = d)
|
||||||
|
# mean((Y.pred - Y.test)^2)
|
||||||
|
# },
|
||||||
|
# error = function(err) NA)
|
||||||
|
# cat('"cve",', i, ',', mse, ',', c(var(Y.test)), ',',
|
||||||
|
# time['user.self'], ',', time['sys.self'], ',', time['elapsed'], '\n',
|
||||||
|
# sep = '', file = log, append = TRUE)
|
||||||
|
|
||||||
|
## Reset model
|
||||||
|
nn$reset()
|
||||||
|
}
|
||||||
|
|
||||||
|
## Finished, close simulation log file
|
||||||
|
close(log)
|
|
@ -0,0 +1,48 @@
|
||||||
|
library(MAVE)
|
||||||
|
library(CVarE)
|
||||||
|
library(NNSDR)
|
||||||
|
|
||||||
|
set.seed(797)
|
||||||
|
|
||||||
|
dataset <- function(n = 100, p = 10, sd = 0.5) {
|
||||||
|
X <- matrix(rnorm(n * (p - 1)), n, p - 1)
|
||||||
|
X <- cbind(-0.5 * (X[, 1] + X[, 2]) + 0.001 * rnorm(n), X)
|
||||||
|
B <- diag(p)[, 4, drop = FALSE]
|
||||||
|
Y <- as.matrix(X[, 4]^2 + rnorm(n, 0, sd))
|
||||||
|
return(list(X = X, Y = Y, B = B))
|
||||||
|
}
|
||||||
|
|
||||||
|
nn <- nnsdr$new(
|
||||||
|
input_shapes = list(x = 10L),
|
||||||
|
d = 1L,
|
||||||
|
hidden_units = 512L,
|
||||||
|
activation = 'relu',
|
||||||
|
trainable_reduction = TRUE
|
||||||
|
)
|
||||||
|
|
||||||
|
sim <- data.frame(mave = rep(NA, 10), cve = NA, nn.opg = NA, nn.ref = NA)
|
||||||
|
for (i in 1:10) {
|
||||||
|
cat(i, "/ 10\n")
|
||||||
|
with(dataset(), {
|
||||||
|
dr <- mave.compute(X, Y, method = 'meanMAVE', max.dim = 1)
|
||||||
|
sim[i, 'mave'] <<- dist.subspace(B, coef(dr, 1), normalize = TRUE)
|
||||||
|
|
||||||
|
dr <- cve.call(X, Y, k = 1)
|
||||||
|
sim[i, 'cve'] <<- dist.subspace(B, coef(dr, 1), normalize = TRUE)
|
||||||
|
|
||||||
|
nn$fit(X, Y, epochs = c(200L, 400L), batch_size = 32L, initializer = 'fromOPG')
|
||||||
|
sim[i, 'nn.opg'] <<- dist.subspace(B, coef(nn, 'OPG'), normalize = TRUE)
|
||||||
|
sim[i, 'nn.ref'] <<- dist.subspace(B, coef(nn), normalize = TRUE)
|
||||||
|
})
|
||||||
|
nn$reset()
|
||||||
|
}
|
||||||
|
round(t(data.frame(
|
||||||
|
mean = colMeans(sim),
|
||||||
|
median = apply(sim, 2, median),
|
||||||
|
sd = apply(sim, 2, sd)
|
||||||
|
)), 3)
|
||||||
|
|
||||||
|
# &$\mave$& $\cve$&$\nn_{512}$ \\
|
||||||
|
# mean & 0.917 & 0.164 & 0.101 \\
|
||||||
|
# median & 0.999 & 0.162 & 0.096 \\
|
||||||
|
# sd & 0.256 & 0.057 & 0.032 \\
|
|
@ -0,0 +1,98 @@
|
||||||
|
#!/usr/bin/env Rscript
|
||||||
|
|
||||||
|
library(MAVE)
|
||||||
|
library(CVarE)
|
||||||
|
Sys.setenv(TF_CPP_MIN_LOG_LEVEL = "3") # Suppress `tensorflow` notes/warnings
|
||||||
|
suppressPackageStartupMessages({
|
||||||
|
library(NNSDR)
|
||||||
|
})
|
||||||
|
|
||||||
|
## Parse script parameters
|
||||||
|
args <- parse.args(defaults = list(
|
||||||
|
# Simulation configuration
|
||||||
|
reps = 100, # Number of replications
|
||||||
|
dataset = '1', # Name (number) of the data set
|
||||||
|
# Neuronal Net. structure/definitions
|
||||||
|
hidden_units = 512L,
|
||||||
|
activation = 'relu', # or `relu`
|
||||||
|
trainable_reduction = TRUE,
|
||||||
|
# Neuronal Net. training
|
||||||
|
epochs = c(200L, 400L), # Number of training epochs for (`OPG`, Refinement)
|
||||||
|
batch_size = 32L,
|
||||||
|
initializer = 'fromOPG',
|
||||||
|
seed = 1390L
|
||||||
|
))
|
||||||
|
|
||||||
|
## Generate reference data (gets re-sampled for each replication)
|
||||||
|
ds <- dataset(args$dataset) # Generates a list with `X`, `Y`, `B` and `name`
|
||||||
|
|
||||||
|
## Build Dimension Reduction Neuronal Network model (matching the data)
|
||||||
|
nn <- nnsdr$new(
|
||||||
|
input_shapes = list(x = ncol(ds$X)),
|
||||||
|
d = ncol(ds$B),
|
||||||
|
hidden_units = args$hidden_units,
|
||||||
|
activation = args$activation,
|
||||||
|
trainable_reduction = args$trainable_reduction
|
||||||
|
)
|
||||||
|
|
||||||
|
## Open simulation log file, write simulation configuration and header
|
||||||
|
log <- file(format(Sys.time(), "results/sim_%Y%m%d_%H%M.csv"), "w", blocking = FALSE)
|
||||||
|
cat(paste('#', names(args), args, sep = ' ', collapse = '\n'), '\n',
|
||||||
|
'method,replication,dist.subspace,dist.grassmann,mse\n',
|
||||||
|
sep = '', file = log, append = TRUE)
|
||||||
|
|
||||||
|
## Set seed for sampling simulation data (NOT effecting the `NN` initialization)
|
||||||
|
set.seed(args$seed)
|
||||||
|
|
||||||
|
## Repeated simulation runs
|
||||||
|
for (rep in seq_len(args$reps)) {
|
||||||
|
## Re-sample seeded data for each simulation replication
|
||||||
|
with(dataset(ds$name), {
|
||||||
|
## Sample test dataset
|
||||||
|
ds.test <- dataset(ds$name, n = 1000)
|
||||||
|
|
||||||
|
## First the reference method `MAVE`
|
||||||
|
dr <- mave(Y ~ X, method = "meanMAVE")
|
||||||
|
d.sub <- dist.subspace(B, coef(dr, ncol(B)), normalize = TRUE)
|
||||||
|
d.gra <- dist.grassmann(B, coef(dr, ncol(B)))
|
||||||
|
mse <- mean((predict(dr, ds.test$X, dim = ncol(B)) - ds.test$Y)^2)
|
||||||
|
cat('"mave",', rep, ',', d.sub, ',', d.gra, ',', mse, '\n', sep = '',
|
||||||
|
file = log, append = TRUE)
|
||||||
|
## and the `OPG` method
|
||||||
|
dr <- mave(Y ~ X, method = "meanOPG")
|
||||||
|
d.sub <- dist.subspace(B, coef(dr, ncol(B)), normalize = TRUE)
|
||||||
|
d.gra <- dist.grassmann(B, coef(dr, ncol(B)))
|
||||||
|
mse <- mean((predict(dr, ds.test$X, dim = ncol(B)) - ds.test$Y)^2)
|
||||||
|
cat('"opg",', rep, ',', d.sub, ',', d.gra, ',', mse, '\n', sep = '',
|
||||||
|
file = log, append = TRUE)
|
||||||
|
|
||||||
|
## Next the `CVE` method
|
||||||
|
dr <- cve(Y ~ X, k = ncol(B))
|
||||||
|
d.sub <- dist.subspace(B, coef(dr, ncol(B)), normalize = TRUE)
|
||||||
|
d.gra <- dist.grassmann(B, coef(dr, ncol(B)))
|
||||||
|
mse <- mean((predict(dr, ds.test$X, k = ncol(B)) - ds.test$Y)^2)
|
||||||
|
cat('"cve",', rep, ',', d.sub, ',', d.gra, ',', mse, '\n', sep = '',
|
||||||
|
file = log, append = TRUE)
|
||||||
|
|
||||||
|
## Fit `NNSDR` model
|
||||||
|
nn$fit(X, Y, epochs = args$epochs,
|
||||||
|
batch_size = args$batch_size, initializer = args$initializer)
|
||||||
|
# `OPG` estimate
|
||||||
|
d.sub <- dist.subspace(B, coef(nn, 'OPG'), normalize = TRUE)
|
||||||
|
d.gra <- dist.grassmann(B, coef(nn, 'OPG'))
|
||||||
|
cat('"nn.opg",', rep, ',', d.sub, ',', d.gra, ',', NA, '\n', sep = '',
|
||||||
|
file = log, append = TRUE)
|
||||||
|
# Refinement estimate
|
||||||
|
d.sub <- dist.subspace(B, coef(nn), normalize = TRUE)
|
||||||
|
d.gra <- dist.grassmann(B, coef(nn))
|
||||||
|
mse <- mean((nn$predict(ds.test$X) - ds.test$Y)^2)
|
||||||
|
cat('"nn.ref",', rep, ',', d.sub, ',', d.gra, ',', mse, '\n', sep = '',
|
||||||
|
file = log, append = TRUE)
|
||||||
|
})
|
||||||
|
|
||||||
|
## Reset model
|
||||||
|
nn$reset()
|
||||||
|
}
|
||||||
|
|
||||||
|
## Finished, close simulation log file
|
||||||
|
close(log)
|
|
@ -0,0 +1,111 @@
|
||||||
|
#!/usr/bin/env Rscript
|
||||||
|
|
||||||
|
library(MAVE)
|
||||||
|
library(CVarE)
|
||||||
|
Sys.setenv(TF_CPP_MIN_LOG_LEVEL = "3") # Suppress `tensorflow` notes/warnings
|
||||||
|
suppressPackageStartupMessages({
|
||||||
|
library(NNSDR)
|
||||||
|
})
|
||||||
|
|
||||||
|
## Parse script parameters
|
||||||
|
args <- parse.args(defaults = list(
|
||||||
|
# Simulation configuration
|
||||||
|
reps = 10L, # Number of replications
|
||||||
|
dataset = '6', # Name (number) of the data set
|
||||||
|
# Neuronal Net. structure/definitions
|
||||||
|
hidden_units = 512L,
|
||||||
|
activation = 'relu',
|
||||||
|
trainable_reduction = TRUE,
|
||||||
|
# Neuronal Net. training
|
||||||
|
epochs = c(200L, 400L), # Number of training epochs for (`OPG`, Refinement)
|
||||||
|
batch_size = 32L,
|
||||||
|
initializer = 'fromOPG',
|
||||||
|
# Simulation data generation configuration
|
||||||
|
seed = 1390L,
|
||||||
|
n = 100L,
|
||||||
|
p = 20L
|
||||||
|
))
|
||||||
|
|
||||||
|
## Generate reference data (gets re-sampled for each replication)
|
||||||
|
# Number of observations are irrelevant for the reference to generate a matching
|
||||||
|
# `NNSDR` estimator
|
||||||
|
ds <- dataset(args$dataset, n = 100L, p = args$p) # Generates a list with `X`, `Y`, `B` and `name`
|
||||||
|
|
||||||
|
## Build Dimension Reduction Neuronal Network model (matching the data)
|
||||||
|
nn <- nnsdr$new(
|
||||||
|
input_shapes = list(x = ncol(ds$X)),
|
||||||
|
d = ncol(ds$B),
|
||||||
|
hidden_units = args$hidden_units,
|
||||||
|
activation = args$activation,
|
||||||
|
trainable_reduction = args$trainable_reduction
|
||||||
|
)
|
||||||
|
|
||||||
|
## Open simulation log file, write simulation configuration and header
|
||||||
|
log <- file(format(Sys.time(), "results/sim_big_%Y%m%d_%H%M.csv"), "w", blocking = FALSE)
|
||||||
|
cat(paste('#', names(args), args, sep = ' ', collapse = '\n'), '\n',
|
||||||
|
'method,replication,dist.subspace,dist.grassmann,mse,time.user,time.system,time.elapsed\n',
|
||||||
|
sep = '', file = log, append = TRUE)
|
||||||
|
|
||||||
|
## Repeated simulation runs
|
||||||
|
for (rep in seq_len(args$reps)) {
|
||||||
|
## Re-sample seeded data for each simulation replication
|
||||||
|
with(dataset(ds$name, n = args$n, p = args$p), {
|
||||||
|
## Sample test dataset
|
||||||
|
ds.test <- dataset(ds$name, n = 1000L, p = args$p)
|
||||||
|
|
||||||
|
## First the reference method `MAVE`
|
||||||
|
# To be fair for measuring the time, set `max.dim` to true reduction dimension
|
||||||
|
# and with `screen = ncol(X)` screening is turned "off".
|
||||||
|
time <- system.time(dr <- mave.compute(X, Y, max.dim = ncol(B),
|
||||||
|
method = "meanMAVE", screen = ncol(X)))
|
||||||
|
d.sub <- dist.subspace(B, coef(dr, ncol(B)), normalize = TRUE)
|
||||||
|
d.gra <- dist.grassmann(B, coef(dr, ncol(B)))
|
||||||
|
mse <- mean((predict(dr, ds.test$X, dim = ncol(B)) - ds.test$Y)^2)
|
||||||
|
cat('"mave",', rep, ',', d.sub, ',', d.gra, ',', mse, ',',
|
||||||
|
time['user.self'], ',', time['sys.self'], ',', time['elapsed'], '\n',
|
||||||
|
sep = '', file = log, append = TRUE)
|
||||||
|
## and the `OPG` method
|
||||||
|
time <- system.time(dr <- mave.compute(X, Y, max.dim = ncol(B),
|
||||||
|
method = "meanOPG", screen = ncol(X)))
|
||||||
|
d.sub <- dist.subspace(B, coef(dr, ncol(B)), normalize = TRUE)
|
||||||
|
d.gra <- dist.grassmann(B, coef(dr, ncol(B)))
|
||||||
|
mse <- mean((predict(dr, ds.test$X, dim = ncol(B)) - ds.test$Y)^2)
|
||||||
|
cat('"opg",', rep, ',', d.sub, ',', d.gra, ',', mse, ',',
|
||||||
|
time['user.self'], ',', time['sys.self'], ',', time['elapsed'], '\n',
|
||||||
|
sep = '', file = log, append = TRUE)
|
||||||
|
|
||||||
|
## Next the CVE method
|
||||||
|
time <- system.time(dr <- cve.call(X, Y, k = ncol(B)))
|
||||||
|
d.sub <- dist.subspace(B, coef(dr, ncol(B)), normalize = TRUE)
|
||||||
|
d.gra <- dist.grassmann(B, coef(dr, ncol(B)))
|
||||||
|
mse <- mean((predict(dr, ds.test$X, k = ncol(B)) - ds.test$Y)^2)
|
||||||
|
cat('"cve",', rep, ',', d.sub, ',', d.gra, ',', mse, ',',
|
||||||
|
time['user.self'], ',', time['sys.self'], ',', time['elapsed'], '\n',
|
||||||
|
sep = '', file = log, append = TRUE)
|
||||||
|
|
||||||
|
## Fit `DR` Neuronal Network model
|
||||||
|
time <- system.time(nn$fit(X, Y, epochs = args$epochs,
|
||||||
|
batch_size = args$batch_size, initializer = args$initializer))
|
||||||
|
# OPG estimate
|
||||||
|
d.sub <- dist.subspace(B, coef(nn, 'OPG'), normalize = TRUE)
|
||||||
|
d.gra <- dist.grassmann(B, coef(nn, 'OPG'))
|
||||||
|
cat('"nn.opg",', rep, ',', d.sub, ',', d.gra, ',NA,NA,NA,NA\n',
|
||||||
|
sep = '', file = log, append = TRUE)
|
||||||
|
# Refinement estimate
|
||||||
|
d.sub <- dist.subspace(B, coef(nn), normalize = TRUE)
|
||||||
|
d.gra <- dist.grassmann(B, coef(nn))
|
||||||
|
mse <- mean((nn$predict(ds.test$X) - ds.test$Y)^2)
|
||||||
|
cat('"nn.ref",', rep, ',', d.sub, ',', d.gra, ',', mse, ',',
|
||||||
|
time['user.self'], ',', time['sys.self'], ',', time['elapsed'], '\n',
|
||||||
|
sep = '', file = log, append = TRUE)
|
||||||
|
})
|
||||||
|
|
||||||
|
## Invoke the garbage collector
|
||||||
|
gc()
|
||||||
|
|
||||||
|
## Reset model
|
||||||
|
nn$reset()
|
||||||
|
}
|
||||||
|
|
||||||
|
## Finished, close simulation log file
|
||||||
|
close(log)
|
Loading…
Reference in New Issue