diff --git a/.gitignore b/.gitignore index f5e8f7e..ca4e0d5 100644 --- a/.gitignore +++ b/.gitignore @@ -96,6 +96,8 @@ vignettes/*.pdf ## Further project development folders/files # General Work In Progress files wip/ +stash/ +simulations/ # PDFs *.pdf @@ -108,6 +110,8 @@ wip/ mlda_analysis/ References/ +*.csv +*.csv.log # Images (except images used in LaTeX) *.png diff --git a/LaTeX/GMLM.tex b/LaTeX/GMLM.tex index 9f6e880..fbe31c4 100644 --- a/LaTeX/GMLM.tex +++ b/LaTeX/GMLM.tex @@ -59,7 +59,7 @@ \DeclareMathOperator{\tr}{tr} \DeclareMathOperator{\var}{Var} \DeclareMathOperator{\cov}{Cov} -\DeclareMathOperator{\Span}{Span} +\DeclareMathOperator{\Span}{span} \DeclareMathOperator{\E}{\operatorname{\mathbb{E}}} % \DeclareMathOperator{\independent}{{\bot\!\!\!\bot}} \DeclareMathOperator*{\argmin}{{arg\,min}} @@ -185,14 +185,50 @@ Illustration of dimensions %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Sufficient Dimension Reduction} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - +By Theorem~1 in \cite{sdr-BuraDuarteForzani2016} we have that +\begin{displaymath} + \mat{R}(\ten{X}) = \t{\mat{B}}(\mat{t}(\ten{X}) - \E\mat{t}(\ten{X})) +\end{displaymath} +is a sufficient reduction where $\mat{B}\in\mathbb{R}^{p(p + 1)\times q}$ with $\Span(\mat{B}) = \Span(\{\mat{\eta}_Y - \E_{Y}\mat{\eta}_Y : Y\in\mathcal{S}_Y\})$. With $\E_Y\mat{\eta}_{Y,1} \equiv c_1\E[\overline{\ten{\eta}}_1 - \ten{F}_Y\times_{k\in[r]}\mat{\alpha}_k] = c_1 \overline{\ten{\eta}}_1$ cause $\E_Y\ten{F}_Y = 0$ and $\mat{\eta}_{y,2}$ does not depend on $y$ (regardless of the notation) we get +\begin{displaymath} + \mat{\eta}_Y - \E_{Y}\mat{\eta}_Y = \begin{pmatrix} + \mat{\eta}_{Y,1} - \E_{Y}\mat{\eta}_{Y,1} \\ + \mat{\eta}_{Y,2} - \E_{Y}\mat{\eta}_{Y,2} + \end{pmatrix} = \begin{pmatrix} + c_1\vec(\ten{F}_Y\times_{k\in[r]}\mat{\alpha}_k) \\ + \mat{0} + \end{pmatrix}. +\end{displaymath} +Noting that +\begin{displaymath} + c_1\vec(\ten{F}_Y\times_{k\in[r]}\mat{\alpha}_k) + = c_1\Big( \bigotimes_{k = r}^{1}\mat{\alpha}_k \Big)\vec(\ten{F}_Y) +\end{displaymath} +we get +\begin{displaymath} + \mat{B} = \begin{pmatrix} + c_1 \bigotimes_{k = r}^{1}\mat{\alpha}_k \\ + \mat{0}_{p^2\times q} + \end{pmatrix}. +\end{displaymath} +Simplifying leads to +\begin{displaymath} + \t{\mat{B}}(\mat{t}(\ten{X}) - \E\mat{t}(\ten{X})) = c_1 \Big( \bigotimes_{k = r}^{1}\mat{\alpha}_k \Big)(\mat{t}_1(\ten{X}) - \E\mat{t}_1(\ten{X})). +\end{displaymath} +Now note $\Span(\mat{A}) = \Span(c \mat{A})$ for any matrix $\mat{A}$ and non-zero scalar $c$ as well as the definition $\mat{t}_1(\ten{X}) = \vec{\ten{X}}$ which proves the following. \begin{theorem}[SDR]\label{thm:sdr} - A sufficient reduction for the regression $y\mid \ten{X}$ under the quadratic exponential family inverse regression model \todo{reg} is given by + A sufficient reduction for the regression $Y\mid \ten{X}$ under the quadratic exponential family inverse regression model \todo{reg} is given by + \begin{align*} + R(\ten{X}) + &= \t{\mat{\beta}}(\vec{\ten{X}} - \E\vec{\ten{X}}) \\ + &\equiv \ten{X}\times_{k\in[r]}\t{\mat{\alpha}_k}. + \end{align*} + for a $p\times q$ dimensional matrix $\mat{\beta}$ given by \begin{displaymath} - R(\ten{X}) = \vec(\ten{X}\times_{k\in[r]}\mat{\Omega}_k\mat{\alpha}_k). + \mat{\beta} = \bigotimes_{k = r}^{1}\mat{\alpha}_k \end{displaymath} - \todo{type proof in appendix} + which satisfies $\Span(\mat{\beta}) = \Span(\{\mat{\eta}_{Y,1} - \E_{Y}\mat{\eta}_{Y,1} : Y\in\mathcal{S}_Y\})$. \end{theorem} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -336,7 +372,7 @@ which yields the following relation to the conditional Ising model parameters \begin{displaymath} \mat{\theta}_y = \mat{\theta}(\mat{\eta}_y) = \vech(\diag(\mat{\eta}_{y,1}) + (2_{p\times p} - \mat{I}_p) \odot \reshape{(p, p)}(\mat{\eta}_{y,2})) \end{displaymath} -where the constants $c_1, c_2$ can be chosen arbitrary, as long as they are non-zero. The ``inverse'' link in then computed via the Ising model as the conditional expectation of all interactions +where $c_1, c_2$ are non-zero known (modeled) constants between $0$ and $1$ such that $c_1 + c_2 = 1$. The ``inverse'' link in then computed via the Ising model as the conditional expectation of all interactions \begin{align*} \invlink_2(\mat{\eta}_y) \equiv \E_{\mat{\theta}_y}[\vec(\ten{X})\t{\vec(\ten{X})}\mid Y = y] \end{align*} diff --git a/LaTeX/main.bib b/LaTeX/main.bib index f5f984e..55d7d2f 100644 --- a/LaTeX/main.bib +++ b/LaTeX/main.bib @@ -71,6 +71,29 @@ doi = {10.1007/s41060-020-00228-y} } +@article{sdr-BuraDuarteForzani2016, + author = {Bura, Efstathia and Duarte, Sabrina and Forzani, Liliana}, + title = {Sufficient Reductions in Regressions With Exponential Family Inverse Predictors}, + journal = {Journal of the American Statistical Association}, + volume = {111}, + number = {515}, + pages = {1313-1329}, + year = {2016}, + publisher = {Taylor \& Francis}, + doi = {10.1080/01621459.2015.1093944} +} + +@article{tsir-DingCook2015, + author = {Shanshan Ding and R. Dennis Cook}, + title = {Tensor sliced inverse regression}, + journal = {Journal of Multivariate Analysis}, + volume = {133}, + pages = {216-231}, + year = {2015}, + issn = {0047-259X}, + doi = {10.1016/j.jmva.2014.08.015} +} + @article{lsir-PfeifferForzaniBura, author = {Pfeiffer, Ruth and Forzani, Liliana and Bura, Efstathia}, year = {2012}, diff --git a/sim/ising.R b/sim/ising.R new file mode 100644 index 0000000..b4a3d8c --- /dev/null +++ b/sim/ising.R @@ -0,0 +1,203 @@ +library(tensorPredictors) +library(mvbernoulli) + +set.seed(161803399, "Mersenne-Twister", "Inversion", "Rejection") + +### simulation configuration +reps <- 100 # number of simulation replications +max.iter <- 100 # maximum number of iterations for GMLM +sample.sizes <- c(100, 200, 300, 500, 750) # sample sizes `n` +N <- 2000 # validation set size +p <- c(4, 4) # preditor dimensions (ONLY 4 by 4 allowed!) +q <- c(2, 2) # response dimensions (ONLY 2 by 2 allowed!) +r <- length(p) +# parameter configuration +rho <- -0.55 +c1 <- 1 +c2 <- 1 + +# initial consistency checks +stopifnot(exprs = { + r == 2 + all.equal(p, c(4, 4)) + all.equal(q, c(2, 2)) +}) + +### small helpers +# 270 deg matrix layout rotation (90 deg clockwise) +rot270 <- function(A) t(A)[, rev(seq_len(nrow(A))), drop = FALSE] +# Auto-Regression Covariance Matrix +AR <- function(rho, dim) rho^abs(outer(seq_len(dim), seq_len(dim), `-`)) +# Inverse of the AR matrix +AR.inv <- function(rho, dim) { + A <- diag(c(1, rep(rho^2 + 1, dim - 2), 1)) + A[abs(.row(dim(A)) - .col(dim(A))) == 1] <- -rho + A / (1 - rho^2) +} +# projection matrix `P_A` as a projection onto the span of `A` +proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A))) + +### setup Ising parameters (to get reasonable parameters) +eta1 <- 0 +alphas <- Map(function(pj, qj) { # qj ignored, its 2 + linspace <- seq(-1, 1, length.out = pj) + matrix(c(linspace, linspace^2), pj, 2) +}, p, q) +Omegas <- Map(AR, dim = p, MoreArgs = list(rho)) + +# data sampling routine +sample.data <- function(n, eta1, alphas, Omegas, sample.axis = r + 1L) { + # generate response (sample axis is last axis) + y <- runif(n, -1, 1) # Y ~ U[-1, 1] + Fy <- rbind(cos(pi * y), sin(pi * y), -sin(pi * y), cos(pi * y)) + dim(Fy) <- c(2, 2, n) + + # natural exponential family parameters + eta_y1 <- c1 * (mlm(Fy, alphas) + c(eta1)) + eta_y2 <- c2 * Reduce(`%x%`, rev(Omegas)) + + # conditional Ising model parameters + theta_y <- matrix(rep(vech(eta_y2), n), ncol = n) + ltri <- which(lower.tri(eta_y2, diag = TRUE)) + diagonal <- which(diag(TRUE, nrow(eta_y2))[ltri]) + theta_y[diagonal, ] <- eta_y1 + + # Sample X from conditional distribution + X <- apply(theta_y, 2, ising_sample, n = 1) + # convert (from compressed integer vector) to array data + attr(X, "p") <- prod(p) + X <- t(as.mvbmatrix(X)) + dim(X) <- c(p, n) + storage.mode(X) <- "double" + + # permute axis to requested get the sample axis + if (sample.axis != r + 1L) { + perm <- integer(r + 1L) + perm[sample.axis] <- r + 1L + perm[-sample.axis] <- seq_len(r) + X <- aperm(X, perm) + Fy <- aperm(Fy, perm) + } + + list(X = X, Fy = Fy, y = y, sample.axis = sample.axis) +} + +### Logging Errors and Warnings +# Register a global warning and error handler for logging warnings/errors with +# current simulation repetition session informatin allowing to reproduce problems +exceptionLogger <- function(ex) { + # retrieve current simulation repetition information + rep.info <- get("rep.info", envir = .GlobalEnv) + # setup an error log file with the same name as `file` + log <- paste0(rep.info$file, ".log") + # Write (append) condition message with reproduction info to the log + cat("\n\n------------------------------------------------------------\n", + sprintf("file <- \"%s\"\nn <- %d\nrep <- %d\n.Random.seed <- c(%s)\n%s\nTraceback:\n", + rep.info$file, rep.info$n, rep.info$rep, + paste(rep.info$.Random.seed, collapse = ","), + as.character.error(ex) + ), sep = "", file = log, append = TRUE) + # add Traceback (see: `traceback()` which the following is addapted from) + n <- length(x <- .traceback(NULL, max.lines = -1L)) + if (n == 0L) { + cat("No traceback available", "\n", file = log, append = TRUE) + } else { + for (i in 1L:n) { + xi <- x[[i]] + label <- paste0(n - i + 1L, ": ") + m <- length(xi) + srcloc <- if (!is.null(srcref <- attr(xi, "srcref"))) { + srcfile <- attr(srcref, "srcfile") + paste0(" at ", basename(srcfile$filename), "#", srcref[1L]) + } + if (isTRUE(attr(xi, "truncated"))) { + xi <- c(xi, " ...") + m <- length(xi) + } + if (!is.null(srcloc)) { + xi[m] <- paste0(xi[m], srcloc) + } + if (m > 1) { + label <- c(label, rep(substr(" ", 1L, + nchar(label, type = "w")), m - 1L)) + } + cat(paste0(label, xi), sep = "\n", file = log, append = TRUE) + } + } +} +globalCallingHandlers(list( + message = exceptionLogger, warning = exceptionLogger, error = exceptionLogger +)) + + +### for every sample size +start <- format(Sys.time(), "%Y%m%dT%H%M") +for (n in sample.sizes) { + ### write new simulation result file + file <- paste0(paste("sim-ising", start, n, sep = "-"), ".csv") + # CSV header, used to ensure correct value/column mapping when writing to file + header <- outer( + c("dist.subspace", "dist.projection", "error.pred"), # measures + c("gmlm", "pca", "hopca", "tsir"), # methods + paste, sep = ".") + cat(paste0(header, collapse = ","), "\n", sep = "", file = file) + + ### repeated simulation + for (rep in seq_len(reps)) { + ### Repetition session state info + # Stores specific session variables before starting the current + # simulation replication. This allows to log state information which + # can be used to replicate a specific simulation repetition in case of + # errors/warnings from the logs + rep.info <- list(n = n, rep = rep, file = file, .Random.seed = .Random.seed) + + ### sample (training) data + c(X, Fy, y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas) + + ### Fit data using different methods + fit.gmlm <- GMLM.default(X, Fy, sample.axis = sample.axis, + max.iter = max.iter, family = "ising") + fit.hopca <- HOPCA(X, npc = q, sample.axis = sample.axis) + fit.pca <- prcomp(mat(X, sample.axis), rank. = prod(q)) + fit.tsir <- NA # TSIR(X, y, q, sample.axis = sample.axis) + + ### Compute Reductions `B.*` where `B.*` spans the reduction subspaces + B.true <- Reduce(`%x%`, rev(Map(`%*%`, Omegas, alphas))) + B.gmlm <- with(fit.gmlm, Reduce(`%x%`, rev(Map(`%*%`, Omegas, alphas)))) + B.hopca <- Reduce(`%x%`, rev(fit.hopca)) + B.pca <- fit.pca$rotation + B.tsir <- NA # Reduce(`%x%`, rev(fit.tsir)) + + # Subspace Distances: Normalized `|| P_A - P_B ||_F` where + # `P_A = A (A' A)^-1/2 A'` and the normalization means that with + # respect to the dimensions of `A, B` the subspace distance is in the + # range `[0, 1]`. + dist.subspace.gmlm <- dist.subspace(B.true, B.gmlm, normalize = TRUE) + dist.subspace.hopca <- dist.subspace(B.true, B.hopca, normalize = TRUE) + dist.subspace.pca <- dist.subspace(B.true, B.pca, normalize = TRUE) + dist.subspace.tsir <- NA # dist.subspace(B.true, B.tsir, normalize = TRUE) + + # Projection Distances: Spectral norm (2-norm) `|| P_A - P_B ||_2`. + dist.projection.gmlm <- dist.projection(B.true, B.gmlm) + dist.projection.hopca <- dist.projection(B.true, B.hopca) + dist.projection.pca <- dist.projection(B.true, B.pca) + dist.projection.tsir <- NA # dist.projection(B.true, B.tsir) + + ### Prediction Errors: (using new independend sample of size `N`) + c(X, Fy, y, sample.axis) %<-% sample.data(N, eta1, alphas, Omegas) + # centered model matrix of vectorized `X`s + vecX <- scale(mat(X, sample.axis), center = TRUE, scale = FALSE) + P.true <- proj(B.true) + error.pred.gmlm <- norm(P.true - proj(B.gmlm), "2") + error.pred.hopca <- norm(P.true - proj(B.hopca), "2") + error.pred.pca <- norm(P.true - proj(B.pca), "2") + error.pred.tsir <- NA # norm(P.true - proj(B.tsir), "2") + + # format estimation/prediction errors and write to file and console + line <- paste0(Map(get, header), collapse = ",") + cat(line, "\n", sep = "", file = file, append = TRUE) + # report progress + cat(sprintf("sample size: %d/%d - rep: %d/%d\n", + which(n == sample.sizes), length(sample.sizes), rep, reps)) + } +} diff --git a/sim/ising_2.R b/sim/ising_2.R new file mode 100644 index 0000000..58be1e0 --- /dev/null +++ b/sim/ising_2.R @@ -0,0 +1,134 @@ +library(tensorPredictors) +library(mvbernoulli) + +set.seed(141421356, "Mersenne-Twister", "Inversion", "Rejection") + +### simulation configuration +reps <- 100 # number of simulation replications +max.iter <- 1000 # maximum number of iterations for GMLM +n <- 100 # sample sizes `n` +N <- 2000 # validation set size +p <- c(4, 4) # preditor dimensions (ONLY 4 by 4 allowed!) +q <- c(2, 2) # response dimensions (ONLY 2 by 2 allowed!) +r <- length(p) +# parameter configuration +rho <- -0.55 +c1 <- 1 +c2 <- 1 + +# initial consistency checks +stopifnot(exprs = { + r == 2 + all.equal(p, c(4, 4)) + all.equal(q, c(2, 2)) +}) + +### small helpers +# 270 deg matrix layout rotation (90 deg clockwise) +rot270 <- function(A) t(A)[, rev(seq_len(nrow(A))), drop = FALSE] +# Auto-Regression Covariance Matrix +AR <- function(rho, dim) rho^abs(outer(seq_len(dim), seq_len(dim), `-`)) +# Inverse of the AR matrix +AR.inv <- function(rho, dim) { + A <- diag(c(1, rep(rho^2 + 1, dim - 2), 1)) + A[abs(.row(dim(A)) - .col(dim(A))) == 1] <- -rho + A / (1 - rho^2) +} +# projection matrix `P_A` as a projection onto the span of `A` +proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A))) + +### setup Ising parameters (to get reasonable parameters) +eta1 <- 0 +# alphas <- Map(function(pj, qj) { # qj ignored, its 2 +# linspace <- seq(-1, 1, length.out = pj) +# matrix(c(linspace, rev(linspace)), pj, 2) +# }, p, q) +alphas <- Map(function(pj, qj) { # qj ignored, its 2 + linspace <- seq(-1, 1, length.out = pj) + matrix(c(linspace, linspace^2), pj, 2) +}, p, q) +# alphas <- Map(function(pj, qj) { +# qr.Q(qr(matrix(rnorm(pj * qj), pj, qj))) +# }, p, q) +Omegas <- Map(AR, dim = p, MoreArgs = list(rho)) + +# data sampling routine +sample.data <- function(n, eta1, alphas, Omegas, sample.axis = r + 1L) { + # generate response (sample axis is last axis) + y <- runif(n, -1, 1) # Y ~ U[-1, 1] + Fy <- rbind(cos(pi * y), sin(pi * y), -sin(pi * y), cos(pi * y)) + dim(Fy) <- c(2, 2, n) + + # natural exponential family parameters + eta_y1 <- c1 * (mlm(Fy, alphas) + c(eta1)) + eta_y2 <- c2 * Reduce(`%x%`, rev(Omegas)) + + # conditional Ising model parameters + theta_y <- matrix(rep(vech(eta_y2), n), ncol = n) + ltri <- which(lower.tri(eta_y2, diag = TRUE)) + diagonal <- which(diag(TRUE, nrow(eta_y2))[ltri]) + theta_y[diagonal, ] <- eta_y1 + + # Sample X from conditional distribution + X <- apply(theta_y, 2, ising_sample, n = 1) + # convert (from compressed integer vector) to array data + attr(X, "p") <- prod(p) + X <- t(as.mvbmatrix(X)) + dim(X) <- c(p, n) + storage.mode(X) <- "double" + + # permute axis to requested get the sample axis + if (sample.axis != r + 1L) { + perm <- integer(r + 1L) + perm[sample.axis] <- r + 1L + perm[-sample.axis] <- seq_len(r) + X <- aperm(X, perm) + Fy <- aperm(Fy, perm) + } + + list(X = X, Fy = Fy, y = y, sample.axis = sample.axis) +} + +### sample (training) data +c(X, Fy, y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas) + +### Fit data using GMLM with logging + +# logger to log iterative change in the estimation process of GMLM +# log <- data.frame() +log.likelihood <- tensorPredictors:::make.gmlm.family("ising")$log.likelihood +B.true <- Reduce(`%x%`, rev(alphas)) +logger <- function(iter, eta1.est, alphas.est, Omegas.est) { + B.est <- Reduce(`%x%`, rev(alphas.est)) + + err.alphas <- mapply(dist.subspace, alphas, alphas.est, MoreArgs = list(normalize = TRUE)) + err.Omegas <- mapply(norm, Map(`-`, Omegas, Omegas.est), MoreArgs = list(type = "F")) + + if (iter > 0) { cat("\033[9A") } + cat(sprintf("\n\033[2mIter: loss - dist\n\033[0m%4d: %8.3f - %8.3f", + iter, + log.likelihood(X, Fy, eta1.est, alphas.est, Omegas.est), + dist.subspace(B.true, B.est, normalize = TRUE) + ), + "\033[2mMSE eta1\033[0m", + mean((eta1 - eta1.est)^2), + "\033[2msubspace distances of alphas\033[0m", + do.call(paste, Map(sprintf, err.alphas, MoreArgs = list(fmt = "%8.3f"))), + "\033[2mFrob. norm of Omega differences\033[0m", + do.call(paste, Map(sprintf, err.Omegas, MoreArgs = list(fmt = "%8.3f"))), + sep = "\n " + ) +} + +# now call the GMLM fitting routine with performance profiling +tryCatch({ + system.time( # profvis::profvis( + fit.gmlm <- GMLM.default( + X, Fy, sample.axis = sample.axis, max.iter = max.iter, + family = "ising", logger = logger + ) + ) +}, error = function(ex) { + print(ex) + traceback() +}) diff --git a/sim/normal.R b/sim/normal.R index 077ebd2..529851f 100644 --- a/sim/normal.R +++ b/sim/normal.R @@ -4,26 +4,30 @@ set.seed(314159265, "Mersenne-Twister", "Inversion", "Rejection") ### simulation configuration reps <- 100 # number of simulation replications +max.iter <- 10000 # maximum number of iterations for GMLM sample.sizes <- c(100, 200, 300, 500, 750) # sample sizes `n` N <- 2000 # validation set size p <- c(2, 3, 5) # preditor dimensions q <- c(1, 2, 3) # functions of y dimensions (response dimensions) +r <- length(p) # initial consistency checks stopifnot(exprs = { - length(p) == length(q) + r == length(p) + r == length(q) all(outer(p, sample.sizes, `<`)) }) +# projection matrix `P_A` as a projection onto the span of `A` +proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A))) + # setup model parameters alphas <- Map(matrix, Map(rnorm, p * q), p) # reduction matrices Omegas <- Map(function(pj) 0.5^abs(outer(1:pj, 1:pj, `-`)), p) # mode scatter eta1 <- 0 # intercept # data sampling routine -sample.data <- function(n, eta1, alphas, Omegas, sample.axis = length(alphas) + 1L) { - r <- length(alphas) # tensor order - +sample.data <- function(n, eta1, alphas, Omegas, sample.axis = r + 1L) { # generate response (sample axis is last axis) y <- sample.int(prod(q), n, replace = TRUE) # uniform samples Fy <- array(outer(seq_len(prod(q)), y, `==`), dim = c(q, n)) @@ -43,12 +47,9 @@ sample.data <- function(n, eta1, alphas, Omegas, sample.axis = length(alphas) + Fy <- aperm(Fy, perm) } - list(X = X, Fy = Fy, sample.axis = sample.axis) + list(X = X, Fy = Fy, y = y, sample.axis = sample.axis) } -# projection matrix `P_A` as a projection onto the span of `A` -proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A))) - ### Logging Errors and Warnings # Register a global warning and error handler for logging warnings/errors with # current simulation repetition session informatin allowing to reproduce problems @@ -102,12 +103,11 @@ start <- format(Sys.time(), "%Y%m%dT%H%M") for (n in sample.sizes) { ### write new simulation result file file <- paste0(paste("sim-normal", start, n, sep = "-"), ".csv") - # CSV header, used to ensure correct value collumn mapping when writing to file - header <- c( - "dist.subspace.gmlm", "dist.subspace.hopca", "dist.subspace.pca", - "dist.projection.gmlm", "dist.projection.hopca", "dist.projection.pca", - "error.pred.gmlm", "error.pred.hopca", "error.pred.pca" - ) + # CSV header, used to ensure correct value/column mapping when writing to file + header <- outer( + c("dist.subspace", "dist.projection", "error.pred"), # measures + c("gmlm", "pca", "hopca", "tsir"), # methods + paste, sep = ".") cat(paste0(header, collapse = ","), "\n", sep = "", file = file) ### repeated simulation @@ -120,18 +120,20 @@ for (n in sample.sizes) { rep.info <- list(n = n, rep = rep, file = file, .Random.seed = .Random.seed) ### sample (training) data - c(X, Fy, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas) + c(X, Fy, y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas) ### Fit data using different methods - fit.gmlm <- GMLM.default(X, Fy, sample.axis = sample.axis) + fit.gmlm <- GMLM.default(X, Fy, sample.axis = sample.axis, max.iter = max.iter) fit.hopca <- HOPCA(X, npc = q, sample.axis = sample.axis) fit.pca <- prcomp(mat(X, sample.axis), rank. = prod(q)) + fit.tsir <- TSIR(X, y, q, sample.axis = sample.axis) ### Compute Reductions `B.*` where `B.*` spans the reduction subspaces B.true <- Reduce(`%x%`, rev(Map(`%*%`, Omegas, alphas))) B.gmlm <- with(fit.gmlm, Reduce(`%x%`, rev(Map(`%*%`, Omegas, alphas)))) B.hopca <- Reduce(`%x%`, rev(fit.hopca)) B.pca <- fit.pca$rotation + B.tsir <- Reduce(`%x%`, rev(fit.tsir)) # Subspace Distances: Normalized `|| P_A - P_B ||_F` where # `P_A = A (A' A)^-1/2 A'` and the normalization means that with @@ -140,20 +142,23 @@ for (n in sample.sizes) { dist.subspace.gmlm <- dist.subspace(B.true, B.gmlm, normalize = TRUE) dist.subspace.hopca <- dist.subspace(B.true, B.hopca, normalize = TRUE) dist.subspace.pca <- dist.subspace(B.true, B.pca, normalize = TRUE) + dist.subspace.tsir <- dist.subspace(B.true, B.tsir, normalize = TRUE) # Projection Distances: Spectral norm (2-norm) `|| P_A - P_B ||_2`. dist.projection.gmlm <- dist.projection(B.true, B.gmlm) dist.projection.hopca <- dist.projection(B.true, B.hopca) dist.projection.pca <- dist.projection(B.true, B.pca) + dist.projection.tsir <- dist.projection(B.true, B.tsir) ### Prediction Errors: (using new independend sample of size `N`) - c(X, Fy, sample.axis) %<-% sample.data(N, eta1, alphas, Omegas) + c(X, Fy, y, sample.axis) %<-% sample.data(N, eta1, alphas, Omegas) # centered model matrix of vectorized `X`s vecX <- scale(mat(X, sample.axis), center = TRUE, scale = FALSE) P.true <- proj(B.true) error.pred.gmlm <- norm(P.true - proj(B.gmlm), "2") error.pred.hopca <- norm(P.true - proj(B.hopca), "2") error.pred.pca <- norm(P.true - proj(B.pca), "2") + error.pred.tsir <- norm(P.true - proj(B.tsir), "2") # format estimation/prediction errors and write to file and console line <- paste0(Map(get, header), collapse = ",") diff --git a/sim/normal_2.R b/sim/normal_2.R new file mode 100644 index 0000000..36c7c1d --- /dev/null +++ b/sim/normal_2.R @@ -0,0 +1,96 @@ +library(tensorPredictors) + +set.seed(271828183, "Mersenne-Twister", "Inversion", "Rejection") + +### simulation configuration +reps <- 100 # number of simulation replications +n <- 100 # sample sizes `n` +N <- 2000 # validation set size +p <- c(2, 3, 5) # preditor dimensions +q <- c(1, 2, 3) # functions of y dimensions (response dimensions) + +# initial consistency checks +stopifnot(exprs = { + length(p) == length(q) +}) + +# setup model parameters +alphas <- Map(matrix, Map(rnorm, p * q), p) # reduction matrices +Omegas <- Map(function(pj) 0.5^abs(outer(1:pj, 1:pj, `-`)), p) # mode scatter +eta1 <- 0 # intercept + +# data sampling routine +sample.data <- function(n, eta1, alphas, Omegas, sample.axis = length(alphas) + 1L) { + r <- length(alphas) # tensor order + + # generate response (sample axis is last axis) + y <- sample.int(prod(q), n, replace = TRUE) # uniform samples + Fy <- array(outer(seq_len(prod(q)), y, `==`), dim = c(q, n)) + Fy <- Fy - c(rowMeans(Fy, dims = r)) + + # sample predictors as X | Y = y (sample axis is last axis) + Deltas <- Map(solve, Omegas) # normal covariances + mu_y <- mlm(mlm(Fy, alphas) + c(eta1), Deltas) # conditional mean + X <- mu_y + rtensornorm(n, 0, Deltas, r + 1L) # responses X + + # permute axis to requested get the sample axis + if (sample.axis != r + 1L) { + perm <- integer(r + 1L) + perm[sample.axis] <- r + 1L + perm[-sample.axis] <- seq_len(r) + X <- aperm(X, perm) + Fy <- aperm(Fy, perm) + } + + list(X = X, Fy = Fy, y = y, sample.axis = sample.axis) +} + +### sample (training) data +c(X, Fy, y = y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas) + +### Fit data using GMLM with logging + +# logger to log iterative change in the estimation process of GMLM +# log <- data.frame() +log.likelihood <- tensorPredictors:::make.gmlm.family("normal")$log.likelihood +B.true <- Reduce(`%x%`, rev(alphas)) +logger <- function(iter, eta1.est, alphas.est, Omegas.est) { + B.est <- Reduce(`%x%`, rev(alphas.est)) + + err.alphas <- mapply(dist.subspace, alphas, alphas.est, MoreArgs = list(normalize = TRUE)) + err.Omegas <- mapply(norm, Map(`-`, Omegas, Omegas.est), MoreArgs = list(type = "F")) + + if (iter > 1) { cat("\033[9A") } + cat(sprintf("\n\033[2mIter: loss - dist\n\033[0m%4d: %8.3f - %8.3f", + iter, + log.likelihood(X, Fy, eta1.est, alphas.est, Omegas.est), + dist.subspace(B.true, B.est, normalize = TRUE) + ), + "\033[2mMSE eta1\033[0m", + mean((eta1 - eta1.est)^2), + "\033[2msubspace distances of alphas\033[0m", + do.call(paste, Map(sprintf, err.alphas, MoreArgs = list(fmt = "%8.3f"))), + "\033[2mFrob. norm of Omega differences\033[0m", + do.call(paste, Map(sprintf, err.Omegas, MoreArgs = list(fmt = "%8.3f"))), + sep = "\n " + ) +} + +# now call the GMLM fitting routine with performance profiling +system.time( # profvis::profvis( + fit.gmlm <- GMLM.default( + X, Fy, sample.axis = sample.axis, max.iter = 10000L, logger = logger + ) +) + + +# Iter: loss - dist +# 7190: 50.583 - 0.057 +# MSE eta1 +# 0.02694658 +# subspace distances of alphas +# 0.043 0.035 0.034 +# Frob. norm of Omega differences +# 0.815 1.777 12.756 +# time user system elapsed +# 342.279 555.630 183.653 diff --git a/sim/plots.R b/sim/plots.R new file mode 100644 index 0000000..8cc7206 --- /dev/null +++ b/sim/plots.R @@ -0,0 +1,61 @@ + +if (!endsWith(getwd(), "/sim")) { + setwd("sim") +} + +date <- "20221007" # yyyymmdd, to match all "[0-9]{6}" +time <- "[0-9]{4}" # HHMM, to match all "[0-9]{4}" +sim <- Reduce(rbind, Map(function(path) { + df <- read.csv(path) + df$n <- as.integer(strsplit(path, "[-.]")[[1]][[4]]) + df +}, list.files(".", pattern = paste0( + "^sim-normal-", date, "T", time, "-[0-9]+[.]csv$", collapse = "" +)))) + +stats <- aggregate(. ~ n, sim, mean) +q75 <- aggregate(. ~ n, sim, function(x) quantile(x, 0.75)) +q25 <- aggregate(. ~ n, sim, function(x) quantile(x, 0.25)) + +colors <- c(gmlm = "#247407", hopca = "#2a62b6", pca = "#a11414", tsir = "#9313b9") +line.width <- 1.75 +margins <- c(5.1, 4.1, 4.1, 0.1) + +with(stats, { + par(mar = margins) + plot(range(n), c(0, 1.05), + type = "n", bty = "n", main = "Estimation Error", + xlab = "Sample Size", ylab = "Error") + lines(n, dist.projection.gmlm, col = colors["gmlm"], lwd = line.width) + lines(n, dist.projection.hopca, col = colors["hopca"], lwd = line.width) + lines(n, dist.projection.pca, col = colors["pca"], lwd = line.width) + lines(n, dist.projection.tsir, col = colors["tsir"], lwd = line.width) + + par(mar = rep(0, 4)) + legend("topright", legend = names(colors), col = colors, lwd = line.width, + lty = 1, bty = "n") + par(mar = margins) +}) + +with(stats, { + par(mar = margins) + plot(range(n), c(0, 1.05), + type = "n", bty = "n", main = "Root Mean Squared Prediction Error", + xlab = "Sample Size", ylab = "Error") + xn <- c(q75$n, rev(q25$n)) + polygon(x = xn, y = c(q75$error.pred.gmlm, rev(q25$error.pred.gmlm)), + col = adjustcolor(colors["gmlm"], alpha.f = 0.3), border = NA) + polygon(x = xn, y = c(q75$error.pred.hopca, rev(q25$error.pred.hopca)), + col = adjustcolor(colors["hopca"], alpha.f = 0.3), border = NA) + polygon(x = xn, y = c(q75$error.pred.pca, rev(q25$error.pred.pca)), + col = adjustcolor(colors["pca"], alpha.f = 0.3), border = NA) + lines(n, error.pred.gmlm, col = colors["gmlm"], lwd = line.width) + lines(n, error.pred.hopca, col = colors["hopca"], lwd = line.width) + lines(n, error.pred.pca, col = colors["pca"], lwd = line.width) + lines(n, error.pred.tsir, col = colors["tsir"], lwd = line.width) + + par(mar = rep(0, 4)) + legend("topright", legend = names(colors), col = colors, lwd = line.width, + lty = 1, bty = "n") + par(mar = margins) +}) diff --git a/tensorPredictors/NAMESPACE b/tensorPredictors/NAMESPACE index 6c7a884..751dcd0 100644 --- a/tensorPredictors/NAMESPACE +++ b/tensorPredictors/NAMESPACE @@ -24,6 +24,7 @@ export(POI) export(RMReg) export(RMap) export(S) +export(TSIR) export(approx.kronecker) export(colKronecker) export(dist.kron.norm) diff --git a/tensorPredictors/R/GMLM.R b/tensorPredictors/R/GMLM.R index 7a06a19..f057c89 100644 --- a/tensorPredictors/R/GMLM.R +++ b/tensorPredictors/R/GMLM.R @@ -22,37 +22,32 @@ make.gmlm.family <- function(name) { switch(name, normal = { initialize <- function(X, Fy) { - # observation/predictor tensor order p <- head(dim(X), -1) - q <- head(dim(Fy), -1) - r <- length(dim(X)) - 1L + r <- length(p) - # mu = E[X] = E[E[X | Y]] - mu <- rowMeans(X, dims = r) - # covariance of X (non conditional estimate) - Deltas <- mcov(X, sample.axis = r + 1L) - Omegas <- Map(solve, Deltas) + # Mode-Covariances + XSigmas <- mcov(X, sample.axis = r + 1L) + YSigmas <- mcov(Fy, sample.axis = r + 1L) - # GLM intercept - eta1 <- mlm(mu, Omegas) + # Extract main mode covariance directions + # Note: (the directions are transposed!) + XDirs <- Map(function(Sigma) { + with(La.svd(Sigma, nu = 0), sqrt(d) * vt) + }, XSigmas) + YDirs <- Map(function(Sigma) { + with(La.svd(Sigma, nu = 0), sqrt(d) * vt) + }, YSigmas) - # initialize GLM reduction matrices - Sigmas <- mcov(Fy, sample.axis = r + 1L) - alphas <- Map(function(j) { - s <- min(p[j], q[j]) - L <- with(La.svd(Deltas[[j]]), { - u[, 1:s] %*% diag(d[1:s]^-0.5, s, s) - }) - R <- with(La.svd(Sigmas[[j]]), { - diag(d[1:s]^-0.5, s, s) %*% vt[1:s, ] - }) - L %*% R - }, seq_len(r)) + alphas <- Map(function(xdir, ydir) { + s <- min(ncol(xdir), nrow(ydir)) + crossprod(xdir[seq_len(s), , drop = FALSE], + ydir[seq_len(s), , drop = FALSE]) + }, XDirs, YDirs) list( - eta1 = eta1, + eta1 = rowMeans(X, dims = r), alphas = alphas, - Omegas = Omegas + Omegas = Map(diag, p) ) } @@ -134,35 +129,71 @@ make.gmlm.family <- function(name) { require(mvbernoulli) initialize <- function(X, Fy) { - # retrieve dimensions - n <- tail(dim(X), 1) # sample size - p <- head(dim(X), -1) # predictor dimensions - q <- head(dim(Fy), -1) # response dimensions - r <- length(p) # single predictor/response tensor order + p <- head(dim(X), -1) + r <- length(p) - # Half vectorized two-way interaction stats E[vech(vec(X) vec(X)')] - dim(X) <- c(prod(p), n) - T2 <- rowMeans(colKronecker(X, X)[vech.index(prod(p)), ]) - - # If there are any 0 or 1 entries in T2, then theta contains - # +-infinity corresponding to certain/impossible events. - # Make this robust by squishing the domain a bit! - T2 <- 0.01 + 0.98 * T2 - - # take the expected two-way marginal probability estimate and - # equat them with the expected contitional probs from which - # we compute a joint (over all observations) estimate of theta. - theta0 <- ising_theta_from_cond_prob(T2) + # Mode-Covariances + XSigmas <- mcov(X, sample.axis = r + 1L) + YSigmas <- mcov(Fy, sample.axis = r + 1L) + # Extract main mode covariance directions + # Note: (the directions are transposed!) + XDirs <- Map(function(Sigma) { + with(La.svd(Sigma, nu = 0), sqrt(d) * vt) + }, XSigmas) + YDirs <- Map(function(Sigma) { + with(La.svd(Sigma, nu = 0), sqrt(d) * vt) + }, YSigmas) + alphas <- Map(function(xdir, ydir) { + s <- min(ncol(xdir), nrow(ydir)) + crossprod(xdir[seq_len(s), , drop = FALSE], + ydir[seq_len(s), , drop = FALSE]) + }, XDirs, YDirs) list( - eta1 = eta1, + eta1 = array(0, dim = p), alphas = alphas, - Omegas = Omegas + Omegas = Map(diag, p) ) } + # initialize <- function(X, Fy) { + # r <- length(dim(X)) - 1L + + # # Mode-Covariances + # XSigmas <- mcov(X, sample.axis = r + 1L) + # YSigmas <- mcov(Fy, sample.axis = r + 1L) + + # # Extract main mode covariance directions + # # Note: (the directions are transposed!) + # XDirs <- Map(function(Sigma) { + # with(La.svd(Sigma, nu = 0), sqrt(d) * vt) + # }, XSigmas) + # YDirs <- Map(function(Sigma) { + # with(La.svd(Sigma, nu = 0), sqrt(d) * vt) + # }, YSigmas) + + # alphas <- Map(function(xdir, ydir) { + # s <- min(ncol(xdir), nrow(ydir)) + # crossprod(xdir[seq_len(s), , drop = FALSE], + # ydir[seq_len(s), , drop = FALSE]) + # }, XDirs, YDirs) + + # # Scatter matrices from Residuals (intercept not considered) + # Deltas <- mcov(X - mlm(Fy, alphas), sample.axis = r + 1L) + # Omegas <- Map(solve, Deltas) + + # # and the intercept + # eta1 <- mlm(rowMeans(X, dims = r), Deltas) + + # list( + # eta1 = eta1, + # alphas = alphas, + # Omegas = Omegas + # ) + # } + params <- function(Fy, eta1, alphas, Omegas, c1 = 1, c2 = 1) { # number of observations n <- tail(dim(Fy), 1) @@ -171,14 +202,20 @@ make.gmlm.family <- function(name) { eta_y1 <- c1 * (mlm(Fy, alphas) + c(eta1)) eta_y2 <- c2 * Reduce(`%x%`, rev(Omegas)) - # next the conditional Ising model parameters `theta_y` - theta_y <- rep(eta_y2[lower.tri(eta_y2, diag = TRUE)], n) - dim(theta_y) <- c(nrow(eta_y2) * (nrow(eta_y2) + 1) / 2, n) + # # next the conditional Ising model parameters `theta_y` + # theta_y <- rep(eta_y2[lower.tri(eta_y2, diag = TRUE)], n) + # dim(theta_y) <- c(nrow(eta_y2) * (nrow(eta_y2) + 1) / 2, n) + # ltri <- which(lower.tri(eta_y2, diag = TRUE)) + # diagonal <- which(diag(TRUE, nrow(eta_y2))[ltri]) + # theta_y[diagonal, ] <- theta_y[diagonal, ] + c(eta_y1) + # theta_y[-diagonal, ] <- 2 * theta_y[-diagonal, ] + + # conditional Ising model parameters + theta_y <- matrix(rep(vech(eta_y2), n), ncol = n) ltri <- which(lower.tri(eta_y2, diag = TRUE)) diagonal <- which(diag(TRUE, nrow(eta_y2))[ltri]) - theta_y[diagonal, ] <- theta_y[diagonal, ] + c(eta_y1) - theta_y[-diagonal, ] <- 2 * theta_y[-diagonal, ] + theta_y[diagonal, ] <- eta_y1 theta_y } @@ -192,6 +229,7 @@ make.gmlm.family <- function(name) { theta_y <- params(Fy, eta1, alphas, Omegas, c1, c2) # convert to binary data set + storage.mode(X) <- "integer" X.mvb <- as.mvbinary(mat(X, length(dim(X)))) # log-likelihood of the data set @@ -268,6 +306,8 @@ GMLM.default <- function(X, Fy, sample.axis = 1L, logger = NULL ) { stopifnot(exprs = { + length(sample.axis) == 1L + (1L <= sample.axis) && (sample.axis <= length(dim(X))) (dim(X) == dim(Fy))[sample.axis] }) diff --git a/tensorPredictors/R/TSIR.R b/tensorPredictors/R/TSIR.R new file mode 100644 index 0000000..a34b04d --- /dev/null +++ b/tensorPredictors/R/TSIR.R @@ -0,0 +1,102 @@ +#' Tensor Sliced Inverse Regression +#' +#' @export +TSIR <- function(X, y, d, sample.axis = 1L, + max.iter = 50L, + eps = sqrt(.Machine$double.eps) +) { + + if (!(is.factor(y) || is.integer(y))) { # TODO: Implement continuous case! + stop("Only factor and integer response implemented!") + } + + stopifnot(exprs = { + dim(X)[sample.axis] == length(y) + length(d) + 1L == length(dim(X)) + }) + + # rearrange `X`, `Fy` such that the last axis enumerates observations + axis.perm <- c(seq_along(dim(X))[-sample.axis], sample.axis) + X <- aperm(X, axis.perm) + + # get dimensions + n <- tail(dim(X), 1) + p <- head(dim(X), -1) + modes <- seq_along(p) + + # reinterpretation of `y` as factor index as number of different slices + y <- as.factor(y) + nr.slices <- length(levels(y)) + slice.sizes <- table(y) + y <- as.integer(y) + + # center `X` + X <- X - c(rowMeans(X, dim = length(modes))) + + # Slice `X` into slices governed by `y` + slices <- Map(function(s) { + X_s <- X[rep(s == y, each = prod(p))] + dim(X_s) <- c(p, slice.sizes[s]) + X_s + }, seq_len(nr.slices)) + + # For each slice we get the slice means + slice.means <- Map(rowMeans, slices, MoreArgs = list(dim = length(modes))) + + # Initial `Gamma_k` estimates as dominent eigenvalues of `Cov_c(E[X_(k) | Y])` + Sigmas <- Map(function(k) { + Reduce(`+`, Map(function(X_s, mu_s, n_s) { + mcrossprod(rowMeans(X_s - c(mu_s), dims = length(modes)), mode = k) + }, slices, slice.means, slice.sizes)) / nr.slices + }, modes) + Gammas <- Map(`[[`, Map(La.svd, Sigmas, nu = d, nv = 0), list("u")) + + # setup projections as `Proj_k = Gamma_k Gamma_k'` + projections <- Map(tcrossprod, Gammas) + + # compute initial loss for the break condition + loss.last <- n^-1 * Reduce(`+`, Map(function(X_s, n_s) { + n_s * sum((X_s - mlm(X_s, projections))^2) + }, slice.means, slice.sizes)) + + # Iterate till convergence + for (iter in seq_len(max.iter)) { + # For each mode assume mode reduction `Gamma`s fixed and update + # current mode reduction `Gamma_k` + for (k in modes) { + # Other mode projection of slice means + slice.proj <- Map(mlm, slice.means, MoreArgs = list( + Gammas[-k], modes[-k], transposed = TRUE + )) + + # Update mode `k` slice mean covariances + Sigmas[[k]] <- n^-1 * Reduce(`+`, Map(function(PX_s, n_s) { + n_s * mcrossprod(PX_s, mode = k) + }, slice.proj, slice.sizes)) + + # Recompute mode `k` basis `Gamma_k` + Gammas[[k]] <- La.svd(Sigmas[[k]], nu = d[k], nv = 0)$u + + # update mode `k` projection matrix onto the new span of `Gamma_k` + projections[[k]] <- tcrossprod(Gammas[[k]]) + + # compute loss for break condition + loss <- n^-1 * Reduce(`+`, Map(function(X_s, n_s) { + n_s * sum((X_s - mlm(X_s, projections))^2) + }, slice.means, slice.sizes)) + } + + # check break condition + if (abs(loss - loss.last) < eps * loss) { + break + } + loss.last <- loss + } + + # mode sample covariance matrices + Omegas <- Map(function(k) n^-1 * mcrossprod(X, mode = k), modes) + + # reductions matrices `Omega_k^-1 Gamma_k` where there (reverse) kronecker + # product spans the central tensor subspace (CTS) estimate + Map(solve, Omegas, Gammas) +} diff --git a/vis/ising/app.R b/vis/ising/app.R new file mode 100644 index 0000000..7801412 --- /dev/null +++ b/vis/ising/app.R @@ -0,0 +1,347 @@ +# usage: R -e "shiny::runApp(port = 8080)" +# usage: R -e "shiny::runApp(host = '127.0.0.1', port = 8080)" + +library(shiny) +library(mvbernoulli) +library(tensorPredictors) + +# configuration +color.palet <- hcl.colors(64, "YlOrRd", rev = TRUE) + +# GMLM parameters +n <- 250 +p <- c(4, 4) +q <- c(2, 2) +r <- 2 + + +eta1 <- 0 # intercept +linspace <- seq(-1, 1, length.out = 4) + +# 270 deg (90 deg clockwise) rotation of matrix layout +# +# Used to get proper ploted matrices cause `image` interprets the `z` matrix as +# a table of `f(x[i], y[j])` values, so that the `x` axis corresponds to row +# number and the `y` axis to column number, with column 1 at the bottom, +# i.e. a 90 degree counter-clockwise rotation of the conventional printed layout +# of a matrix. By first calling `rot270` on a matrix before passing it to +# `image` the plotted matrix layout now matches the conventional printed layout. +rot270 <- function(A) { + t(A)[, rev(seq_len(nrow(A))), drop = FALSE] +} +plot.mat <- function(mat, add.values = FALSE, zlim = range(mat)) { + par(oma = rep(0, 4), mar = rep(0, 4)) + img <- rot270(mat) + image(x = seq_len(nrow(img)), y = seq_len(ncol(img)), z = img, + zlim = zlim, col = color.palet, xaxt = "n", yaxt = "n", bty = "n") + if (add.values) { + text(x = rep(seq_len(nrow(img)), ncol(img)), + y = rep(seq_len(ncol(img)), each = nrow(img)), + round(img, 2), adj = 0.5, col = "black") + } +} + +AR <- function(rho, dim) { + rho^abs(outer(seq_len(dim), seq_len(dim), `-`)) +} +AR.inv <- function(rho, dim) { + A <- diag(c(1, rep(rho^2 + 1, dim - 2), 1)) + A[abs(.row(dim(A)) - .col(dim(A))) == 1] <- -rho + A / (1 - rho^2) +} + +# User Interface (page layout) +ui <- fluidPage( + tags$head(HTML(" + + ")), + withMathJax(), + + titlePanel("Ising Model Simulation Data Generation"), + sidebarLayout( + sidebarPanel( + h2("Settings"), + h4("c1 (influence of $\\eta_1$)"), + sliderInput("c1", "", min = 0, max = 1, value = 1, step = 0.01), + h4("c2 (influence of $\\eta_2$)"), + sliderInput("c2", "", min = 0, max = 1, value = 1, step = 0.01), + sliderInput("y", "y", min = -1, max = 1, value = 0, step = 0.01), + fluidRow( + column(6, + radioButtons("alphaType", "Type: $\\boldsymbol{\\alpha}_k$", + choices = list( + "linspace" = "linspace", "squared" = "squared", "QR" = "QR" + ), + selected = "linspace" + ) + ), + column(6, + radioButtons("OmegaType", "Type: $\\boldsymbol{\\Omega}_k$", + choices = list( + "Identity" = "identity", "AR$(\\rho)$" = "AR", "AR$(\\rho)^{-1}$" = "AR.inv" + ), + selected = "AR" + ) + ) + ), + sliderInput("rho", "rho", min = -1, max = 1, value = -0.55, step = 0.01), + actionButton("reset", "Reset") + ), + mainPanel( + fluidRow( + column(4, + h3("$\\eta_{y,1}$"), + plotOutput("eta_y1"), + ), + column(4, + h3("$\\eta_{y,2}$"), + plotOutput("eta_y2"), + ), + column(4, + h3("$\\boldsymbol{\\Theta}_y$"), + plotOutput("Theta_y") + ) + ), + fluidRow( + column(4, offset = 2, + h3("Expectation $\\mathbb{E}[\\mathcal{X}\\mid Y = y]$"), + plotOutput("expectationPlot"), + ), + column(4, + h3("Covariance $\\operatorname{Cov}(\\text{vec}(\\mathcal{X})\\mid Y = y)$"), + plotOutput("covariancePlot"), + textOutput("covRange"), + ) + ), + fluidRow( + column(8, offset = 4, + h3("iid samples $(X_i, y_i)$ with $y_i \\sim U[-1, 1]$ sorted") + ), + column(4, + "Conditional Expectations", + plotOutput("cond_expectations") + ), + column(4, + "observations sorted by $y_i$", + plotOutput("sample_sorted_y") + ), + column(4, + "observations sorted (lexicographic order) by $\\mathcal{X}_i$", + plotOutput("sample_sorted_X") + ), + ), + fluidRow( + column(6, + h3("Sample Mean"), + plotOutput("sampleMean") + ), + column(6, + h3("Sample Cov"), + plotOutput("sampleCov") + ) + ), + h2("Explanation"), + " + The response $y$ follows a continuous uniform distributed + $y\\sim U[-1, 1]$ from which $\\mathcal{F}_y$ is computed as + + $$\\mathcal{F}_y = \\begin{pmatrix} + \\cos(\\pi y) & -\\sin(\\pi y) \\\\ + \\sin(\\pi y) & \\cos(\\pi y) + \\end{pmatrix}.$$ + + Next are the GMLM parameters (for 'linspace' or 'squared' type $\\boldsymbol{\\alpha}_k$ + with the 'QR' type being random semi-orthogonal matrices) which are set to be + $$\\overline{\\eta}_1 = 0$$ + $$\\boldsymbol{\\alpha}_k^{\\text{linspace}} = \\begin{pmatrix} + -1 & 1 \\\\ + -1/3 & 1/3 \\\\ + 1/3 & -1/3 \\\\ + 1 & -1 + \\end{pmatrix},\\qquad\\boldsymbol{\\alpha}_k^{\\text{squared}} = \\begin{pmatrix} + -1 & 1 \\\\ + -1/3 & 1/9 \\\\ + 1/3 & 1/9 \\\\ + 1 & 1 + \\end{pmatrix}$$ + for $k = 1,2$. The two-way interactions are modeled via the $\\boldsymbol{\\Omega}_k$ which + are the identity $\\boldsymbol{I}_4$ or one of + $$\\operatorname{AR}(\\rho) = \\begin{pmatrix} + {\\color{gray}1} & \\rho^1 & \\rho^2 & \\rho^3 \\\\ + \\rho^1 & {\\color{gray}1} & \\rho^1 & \\rho^2 \\\\ + \\rho^2 & \\rho^1 & {\\color{gray}1} & \\rho^1 \\\\ + \\rho^3 & \\rho^2 & \\rho^1 & {\\color{gray}1} + \\end{pmatrix},\\qquad + \\operatorname{AR}(\\rho)^{-1} = \\frac{1}{1 - \\rho^2}\\begin{pmatrix} + {\\color{gray}1} & -\\rho & 0 & 0 \\\\ + -\\rho & {\\color{gray}1+\\rho^2} & -\\rho & 0 \\\\ + 0 & -\\rho & {\\color{gray}1+\\rho^2} & -\\rho \\\\ + 0 & 0 & -\\rho & {\\color{gray}1} + \\end{pmatrix}.$$ + + The natural parameters given $y$ are then + $$\\boldsymbol{\\eta}_{y,1} \\equiv \\overline{\\boldsymbol{\\eta}}_1 + + \\mathcal{F}_y\\times_{k\\in[2]}\\boldsymbol{\\alpha}_k,$$ + $$\\boldsymbol{\\eta}_{y,2} \\equiv \\bigotimes_{k = 2}^{1}\\boldsymbol{\\Omega}_k.$$ + With that the conditional Ising model parameters are + $$\\boldsymbol{\\theta}_y = \\operatorname{vech}( + \\operatorname{diag}(\\boldsymbol{\\eta}_{y,1}) + + (\\boldsymbol{1}_p \\boldsymbol{1}_p' - \\mathbf{I}_p) \\odot \\boldsymbol{\\eta}_{y,2} + )$$ + which to sample the predictors via the conditional distribution + $$\\operatorname{vec}(\\mathcal{X})\\mid Y = y \\sim \\text{Ising}(\\boldsymbol{\\theta}_y).$$ + " + ) + ) +) + +# Server logic +server <- function(input, output, session) { + + Fy <- reactive({ + phi <- pi * input$y + matrix(c( + cos(phi), -sin(phi), + sin(phi), cos(phi) + ), 2, 2, byrow = TRUE) + }) + alphas <- reactive({ + switch(input$alphaType, + "linspace" = list( + matrix(c(linspace, rev(linspace)), length(linspace), 2), + matrix(c(linspace, rev(linspace)), length(linspace), 2) + ), + "squared" = list( + matrix(c(linspace, linspace^2), length(linspace), 2), + matrix(c(linspace, linspace^2), length(linspace), 2) + ), + "QR" = Map(function(pj, qj) { + qr.Q(qr(matrix(rnorm(pj * qj), pj, qj))) + }, p, q) + ) + }) + Omegas <- reactive({ + switch(input$OmegaType, + "identity" = Map(diag, p), + "AR" = Map(AR, list(input$rho), dim = p), + "AR.inv" = Map(AR.inv, list(input$rho), dim = p) + ) + }) + + eta_y1 <- reactive({ + input$c1 * (mlm(Fy(), alphas()) + c(eta1)) + }) + eta_y2 <- reactive({ + input$c2 * Reduce(`%x%`, rev(Omegas())) + }) + + # compute Ising model parameters from GMLM parameters given single `Fy` + theta_y <- reactive({ + vech(diag(c(eta_y1())) + (1 - diag(nrow(eta_y2()))) * eta_y2()) + }) + + E_y <- reactive({ + mvbernoulli::ising_expectation(theta_y()) + }) + Cov_y <- reactive({ + mvbernoulli::ising_cov(theta_y()) + }) + + random_sample <- reactive({ + c1 <- input$c1 + c2 <- input$c2 + eta_y_i2 <- eta_y2() + + y <- sort(runif(n, -1, 1)) + X <- sapply(y, function(y_i) { + phi <- pi * y_i + Fy_i <- matrix(c( + cos(phi), -sin(phi), + sin(phi), cos(phi) + ), 2, 2) + + eta_y_i1 <- c1 * (mlm(Fy_i, alphas()) + c(eta1)) + + theta_y_i <- vech(diag(c(eta_y_i1)) + (1 - diag(nrow(eta_y_i2))) * eta_y_i2) + + ising_sample(1, theta_y_i) + }) + attr(X, "p") <- prod(p) + + as.mvbmatrix(X) + }) + + cond_expectations <- reactive({ + c1 <- input$c1 + c2 <- input$c2 + eta_y_i2 <- eta_y2() + + y <- seq(-1, 1, length.out = 50) + t(sapply(y, function(y_i) { + phi <- pi * y_i + Fy_i <- matrix(c( + cos(phi), -sin(phi), + sin(phi), cos(phi) + ), 2, 2) + + eta_y_i1 <- c1 * (mlm(Fy_i, alphas()) + c(eta1)) + + theta_y_i <- vech(diag(c(eta_y_i1)) + (1 - diag(nrow(eta_y_i2))) * eta_y_i2) + + ising_expectation(theta_y_i) + })) + }) + + output$eta_y1 <- renderPlot({ + plot.mat(eta_y1(), add.values = TRUE, zlim = c(-2, 2)) + }, res = 108) + output$eta_y2 <- renderPlot({ + plot.mat(eta_y2()) + }) + output$Theta_y <- renderPlot({ + plot.mat(vech.pinv(theta_y())) + }) + + output$expectationPlot <- renderPlot({ + plot.mat(matrix(E_y(), p[1], p[2]), add.values = TRUE, zlim = c(0, 1)) + }, res = 108) + output$covariancePlot <- renderPlot({ + plot.mat(Cov_y()) + }) + output$covRange <- renderText({ + paste(round(range(Cov_y()), 3), collapse = " - ") + }) + output$cond_expectations <- renderPlot({ + plot.mat(cond_expectations(), zlim = 0:1) + }) + output$sample_sorted_y <- renderPlot({ + plot.mat(random_sample()) + }) + output$sample_sorted_X <- renderPlot({ + X <- random_sample() + plot.mat(X[do.call(order, as.data.frame(X)), ]) + }) + output$sampleMean <- renderPlot({ + Xmean <- matrix(colMeans(random_sample()), p[1], p[2]) + plot.mat(Xmean, add.values = TRUE, zlim = c(0, 1)) + }, res = 108) + output$sampleCov <- renderPlot({ + plot.mat(cov(random_sample())) + }) + + observeEvent(input$reset, { + updateNumericInput(session, "c1", value = 1) + updateNumericInput(session, "c2", value = 1) + updateNumericInput(session, "y", value = 0) + updateNumericInput(session, "rho", value = -0.55) + updateRadioButtons(session, "OmegaType", selected = "AR") + updateRadioButtons(session, "alphaType", selected = "squared") + }) +} + +# launch Shiny Application (start server) +shinyApp(ui = ui, server = server)