diff --git a/LaTeX/images/reduction.tex b/LaTeX/images/reduction.tex index 292f52a..4d0be77 100644 --- a/LaTeX/images/reduction.tex +++ b/LaTeX/images/reduction.tex @@ -1,4 +1,27 @@ -\begin{tikzpicture}[scale = \tikzscale], line width = 1pt] +\documentclass{standalone} + +\usepackage[T1]{fontenc} +\usepackage{tikz} +\usepackage{amsmath,amssymb} + +% matrices +\newcommand*{\mat}[1]{\boldsymbol{#1}} +% tensors (special case for lower case caligraphic letters) +\newcommand*{\ten}[1]{ + \ifnum\pdfstrcmp{#1}{`}=1 % lowercase argument + \mathfrak{#1} + \else % uppercase argument + \mathcal{#1} + \fi +} +% matrix transpose +\renewcommand*{\t}[1]{{#1^{T}}} +% Expected Value +\newcommand{\E}{\operatorname{\mathbb{E}}} + + +\begin{document} +\begin{tikzpicture}[scale = 1.2, line width = 1pt] \def\rect#1#2#3{ \draw (0, 0, 0) -- (#1, 0, 0) -- (#1, #2, 0) -- (0, #2, 0) -- cycle; @@ -28,9 +51,10 @@ \draw[fill = lightgray, fill opacity = 0.7] (0, 2.1, 0) -- (0, 2.1, -2) -- (0, 6.1, -2) -- (0, 6.1, 0) -- cycle; \node[opacity = 0.7, cm={0.66, 0.66, 0, 1, (0, 0)}] - at (0, 4.1, -1.1) {$\t{\mat{\beta}_3}$}; + at (0, 5.3, -1.1) {$\t{\mat{\beta}_3}$}; \draw[lightgray, line width = 0.7pt] (0, 5.1) arc (90:0:3); \draw[fill = lightgray, fill opacity = 0.7] (0, 2.1) rectangle +(1.5, 3) node [pos = 0.5] {$\t{\mat{\beta}_2}$}; \end{tikzpicture} +\end{document} diff --git a/LaTeX/plots/aggr-2d-ising.csv b/LaTeX/plots/aggr-2d-ising.csv index 005d256..1bc3ae9 100644 --- a/LaTeX/plots/aggr-2d-ising.csv +++ b/LaTeX/plots/aggr-2d-ising.csv @@ -1,6 +1,6 @@ sample.size rep time.gmlm dist.subspace.gmlm dist.projection.gmlm time.tnormal dist.subspace.tnormal dist.projection.tnormal time.pca dist.subspace.pca dist.projection.pca time.hopca dist.subspace.hopca dist.projection.hopca time.lpca dist.subspace.lpca dist.projection.lpca time.clpca dist.subspace.clpca dist.projection.clpca time.tsir dist.subspace.tsir dist.projection.tsir time.mgcca dist.subspace.mgcca dist.projection.mgcca -100 50.5 3.03076 0.3036349361 0.3036349361 -1 0.2868643362 0.2868643362 0.00073 0.818177689 0.984435713 0.00063 0.755946643 0.755946643 0.05648 0.819181741 0.953681387 0.0867 0.822312566 0.986781953 0.00613 0.4019671947 0.4019671947 0.04573 0.588313518 0.75377487 -200 50.5 3.10805 0.1992945381 0.1992945381 -1 0.2020230182 0.2020230182 0.00074 0.814338816 0.987890451 0.00093 0.690811228 0.690811228 0.05801 0.81457008 0.961377759 0.08991 0.825394861 0.991647771 0.00599 0.2511707246 0.2511707246 0.04656 0.476761868 0.629632653 -300 50.5 3.3052 0.1546113425 0.1546113425 -1 0.1537117318 0.1537117318 0.00075 0.798497064 0.993693989 0.00099 0.729634976 0.729634976 0.05696 0.820224046 0.982252313 0.09375 0.806834524 0.992228985 0.00636 0.1790215654 0.1790215654 0.04835 0.37049092 0.4917621054 -500 50.5 3.88258 0.1130383355 0.1130383355 -1 0.11118393534 0.11118393534 0.00095 0.817445534 0.996960022 0.00148 0.651972301 0.651972301 0.07627 0.816560225 0.989645322 0.11561 0.837453121 0.996154736 0.00715 0.1217933129 0.1217933129 0.06199 0.2348135641 0.3010364601 -750 50.5 3.97107 0.09585699718 0.09585699718 -1 0.09463671116 0.09463671116 0.00106 0.829576522 0.996509465 0.00179 0.6203066621 0.6203066621 0.09122 0.820908322 0.988956119 0.13158 0.849250091 0.997066922 0.00765 0.10393120662 0.10393120662 0.06512 0.1950222554 0.2482127769 +100 50.5 4.78067 0.2687172315 0.2687172315 -1 0.2740385439 0.2740385439 0.00142 0.879931169 0.955995509 0.00105 0.927846756 0.927846756 0.08114 0.823009087 0.924925441 0.12154 0.856587662 0.952690667 0.00723 0.4439691501 0.4439691501 0.05715 0.686201489 0.888446221 +200 50.5 4.89815 0.2088401765 0.2088401765 -1 0.2130251361 0.2130251361 0.00154 0.892216118 0.956475415 0.00147 0.955911213 0.955911213 0.08683 0.821679399 0.918999512 0.12544 0.867839167 0.956083632 0.00747 0.2895647172 0.2895647172 0.05528 0.618966058 0.818845212 +300 50.5 4.61123 0.1607295666 0.1607295666 -1 0.1669095997 0.1669095997 0.0015 0.895647105 0.951015895 0.00166 0.96455755 0.96455755 0.09651 0.823301202 0.926266165 0.12515 0.866042789 0.954992902 0.00736 0.1983123012 0.1983123012 0.04981 0.584343635 0.780637806 +500 50.5 4.02391 0.12353912092 0.12353912092 -1 0.12367372452 0.12367372452 0.00136 0.901367785 0.949912861 0.00178 0.976238781 0.976238781 0.10512 0.821751266 0.917064858 0.11746 0.882995254 0.962274667 0.00687 0.1500581675 0.1500581675 0.04545 0.586647965 0.797306219 +750 50.5 4.01303 0.1039492242 0.1039492242 -1 0.10122631012 0.10122631012 0.00144 0.903545694 0.947406206 0.00235 0.982184029 0.982184029 0.12965 0.821035531 0.916146581 0.13098 0.878463364 0.957372071 0.00792 0.11926051548 0.11926051548 0.05085 0.560734835 0.763031534 diff --git a/tensorPredictors/R/LSIR.R b/tensorPredictors/R/LSIR.R index ccfcdc7..8c4e1b9 100644 --- a/tensorPredictors/R/LSIR.R +++ b/tensorPredictors/R/LSIR.R @@ -3,66 +3,69 @@ #' @param X matrix of dim \eqn{n \times p t} with each row representing a #' vectorized \eqn{p \times t} observation. #' @param y vector of \eqn{n} elements as factors. (can be coersed to factors) -#' @param p,t,k,r dimensions. -#' -#' @returns a list with components -#' alpha: matrix of \eqn{t \times r} -#' beta: matrix of \eqn{p \times k} #' #' TODO: finish #' #' @export -LSIR <- function(X, y, p, t, k = 1L, r = 1L) { - # the code assumes: - # alpha: T x r, beta: p x k, X_i: p x T, for ith observation +LSIR <- function(X, y, + reduction.dims = rep(1L, length(dim(X)) - 1L), + sample.axis = 1L, + nr.slices = 10L, # default slices, ignored if y is a factor or integer + eps = sqrt(.Machine$double.eps), + slice.method = c("cut", "ecdf") # ignored if y is a factor or integer +) { + # In case of `y` not descrete, group `y` into slices + if (!(is.factor(y) || is.integer(y))) { + slice.method <- match.arg(slice.method) + if (slice.method == "ecdf") { + y <- cut(ecdf(y)(y), nr.slices) + } else { + y <- cut(y, nr.slices) + # ensure there are no empty slices + if (any(table(y) == 0)) { + y <- as.factor(as.integer(y)) + } + } + } + if (!is.factor(y)) { + y <- factor(y) + } - # Check and transform parameters. - if (!is.matrix(X)) X <- as.matrix(X) - n <- nrow(X) - stopifnot( - ncol(X) == p * t, - n == length(y) + stopifnot(dim(X)[sample.axis] == length(y)) + + # rearrange `X` such that the first axis enumerates observations + axis.perm <- c(sample.axis, seq_along(dim(X))[-sample.axis]) + X <- aperm(X, axis.perm) + + modes <- seq_along(dim(X))[-1L] + n <- dim(X)[1L] + + sigmas <- lapply(seq_along(modes), function(i) { + matrix(rowMeans(apply(X, modes[-i], cov)), dim(X)[modes[i]]) + }) + # Omega_i = Sigma_i^{-1 / 2} + isqrt_sigmas <- Map(matpow, sigmas, -1 / 2) + + # Normalize observations + Z <- mlm(X - rep(colMeans(X), each = dim(X)[1L]), isqrt_sigmas, modes = modes) + + # Estimate conditional covariances Omega = Cov(E[Z | Y]) + slice.args <- c( + list(Z), rep(alist(, )[1], length(dim(X))), list(drop = FALSE) ) - if (!is.factor(y)) y <- factor(y) + omegas <- lapply(seq_along(modes), function(i) { + matrix(Reduce(`+`, lapply(levels(y), function(l) { + slice.args[[2]] <- y == l + rowMeans(apply(do.call(`[`, slice.args), modes[-i], function(z) { + (nrow(z) / n) * tcrossprod(colMeans(z)) + })) + })), dim(X)[modes[i]]) + }) - # Restructure X into a 3D tensor with axis (observations, predictors, time). - dim(X) <- c(n, p, t) + # Compute central subspace basis estimate + betas <- mapply(function(isqrt_sigma, omega, reduction_dim) { + isqrt_sigma %*% La.svd(omega, reduction_dim, 0L)$u + }, isqrt_sigmas, omegas, reduction.dims, SIMPLIFY = FALSE) - # Estimate predictor/time covariance matrices \hat{Sigma}_1, \hat{Sigma}_2. - sigma_p <- matrix(rowMeans(apply(X, 3, cov)), p, p) - sigma_t <- matrix(rowMeans(apply(X, 2, cov)), t, t) - - # Normalize X as vec(Z) = Sigma^-1/2 (vec(X) - E(vec(X))) - dim(X) <- c(n, p * t) - sigma_p_isqrt <- matpow(sigma_p, -0.5) - sigma_t_isqrt <- matpow(sigma_t, -0.5) - Z <- scale(X, scale = FALSE) %*% kronecker(sigma_t_isqrt, sigma_p_isqrt) - # Both as 3D tensors. - dim(X) <- dim(Z) <- c(n, p, t) - - # Estimate the conditional predictor/time covariance matrix Omega = cov(E(Z|Y)). - omega_p <- matrix(Reduce(`+`, lapply(levels(y), function(l) { - rowMeans(apply(Z[y == l, , ], 3, function(z) { - (nrow(z) / n) * tcrossprod(colMeans(z)) - })) - })), p, p) - omega_t <- matrix(Reduce(`+`, lapply(levels(y), function(l) { - rowMeans(apply(Z[y == l, , ], 2, function(z) { - (nrow(z) / n) * tcrossprod(colMeans(z)) - })) - })), t, t) - omega <- kronecker(omega_t, omega_p) - - # Compute seperate SVD of estimated omega's and use that for an estimate of - # a central subspace basis. - svd_p <- La.svd(omega_p) - svd_t <- La.svd(omega_t) - beta <- sigma_p_isqrt %*% svd_p$u[, k] - alpha <- sigma_t_isqrt %*% svd_t$u[, r] - - return(list(sigma_p = sigma_p, sigma_t = sigma_t, - sigma = kronecker(sigma_t, sigma_p), - alpha = alpha, beta = beta, - Delta = omega, - B = kronecker(alpha, beta))) + list(betas = betas) } diff --git a/tensorPredictors/R/TSIR.R b/tensorPredictors/R/TSIR.R index cae0d07..2ce7dee 100644 --- a/tensorPredictors/R/TSIR.R +++ b/tensorPredictors/R/TSIR.R @@ -1,11 +1,14 @@ #' Tensor Sliced Inverse Regression #' #' @export -TSIR <- function(X, y, d, sample.axis = 1L, +TSIR <- function(X, y, + reduction.dims = rep(1L, length(dim(X)) - 1L), + sample.axis = 1L, nr.slices = 10L, # default slices, ignored if y is a factor or integer max.iter = 50L, - eps = sqrt(.Machine$double.eps), - slice.method = c("cut", "ecdf") # ignored if y is a factor or integer + cond.threshold = Inf, eps = sqrt(.Machine$double.eps), + slice.method = c("cut", "ecdf"), # ignored if y is a factor or integer + logger = NULL ) { if (!(is.factor(y) || is.integer(y))) { @@ -23,7 +26,7 @@ TSIR <- function(X, y, d, sample.axis = 1L, stopifnot(exprs = { dim(X)[sample.axis] == length(y) - length(d) + 1L == length(dim(X)) + length(reduction.dims) + 1L == length(dim(X)) }) # rearrange `X`, `Fy` such that the last axis enumerates observations @@ -60,7 +63,7 @@ TSIR <- function(X, y, d, sample.axis = 1L, mcrossprod(rowMeans(X_s - c(mu_s), dims = length(modes)), mode = k) }, slices, slice.means, slice.sizes)) / nr.slices }, modes) - Gammas <- Map(`[[`, Map(La.svd, Sigmas, nu = d, nv = 0), list("u")) + Gammas <- Map(`[[`, Map(La.svd, Sigmas, nu = reduction.dims, nv = 0), list("u")) # setup projections as `Proj_k = Gamma_k Gamma_k'` projections <- Map(tcrossprod, Gammas) @@ -70,6 +73,12 @@ TSIR <- function(X, y, d, sample.axis = 1L, n_s * sum((X_s - mlm(X_s, projections))^2) }, slice.means, slice.sizes)) + # invoke the logger + if (is.function(logger)) do.call(logger, list( + iter = 0L, Gammas = Gammas, Sigmas = Sigmas, loss = loss.last + )) + + # Iterate till convergence for (iter in seq_len(max.iter)) { # For each mode assume mode reduction `Gamma`s fixed and update @@ -86,7 +95,7 @@ TSIR <- function(X, y, d, sample.axis = 1L, }, slice.proj, slice.sizes)) # Recompute mode `k` basis `Gamma_k` - Gammas[[k]] <- La.svd(Sigmas[[k]], nu = d[k], nv = 0)$u + Gammas[[k]] <- La.svd(Sigmas[[k]], nu = reduction.dims[k], nv = 0)$u # update mode `k` projection matrix onto the new span of `Gamma_k` projections[[k]] <- tcrossprod(Gammas[[k]]) @@ -97,6 +106,11 @@ TSIR <- function(X, y, d, sample.axis = 1L, }, slice.means, slice.sizes)) } + # invoke the logger + if (is.function(logger)) do.call(logger, list( + iter = iter, Gammas = Gammas, Sigmas = Sigmas, loss = loss + )) + # check break condition if (abs(loss - loss.last) < eps * loss) { break @@ -107,6 +121,18 @@ TSIR <- function(X, y, d, sample.axis = 1L, # mode sample covariance matrices Omegas <- Map(function(k) n^-1 * mcrossprod(X, mode = k), modes) + # Check condition of sample covariances and perform regularization if needed + if (is.finite(cond.threshold)) { + for (j in seq_along(Omegas)) { + # Compute min and max eigen values + min_max <- range(eigen(Omegas[[j]], TRUE, TRUE)$values) + # The condition is approximately `kappa(Omegas[[j]]) > cond.threshold` + if (min_max[2] > cond.threshold * min_max[1]) { + Omegas[[j]] <- Omegas[[j]] + diag(0.2 * min_max[2], nrow(Omegas[[j]])) + } + } + } + # reductions matrices `Omega_k^-1 Gamma_k` where there (reverse) kronecker # product spans the central tensor subspace (CTS) estimate structure(Map(solve, Omegas, Gammas), mcov = Omegas, Gammas = Gammas) diff --git a/tensorPredictors/R/gmlm_tensor_normal.R b/tensorPredictors/R/gmlm_tensor_normal.R index dffe072..7e7032d 100644 --- a/tensorPredictors/R/gmlm_tensor_normal.R +++ b/tensorPredictors/R/gmlm_tensor_normal.R @@ -47,7 +47,7 @@ gmlm_tensor_normal <- function(X, F, sample.axis = length(dim(X)), Omegas <- Map(diag, dimX) # Per mode covariance directions - # Note: (the directions are transposed!) + # Note: directions are transposed! dirsX <- Map(function(Sigma) { SVD <- La.svd(Sigma, nu = 0) sqrt(SVD$d) * SVD$vt @@ -85,8 +85,8 @@ gmlm_tensor_normal <- function(X, F, sample.axis = length(dim(X)), ### Phase 2: (Block) Coordinate Descent for (iter in seq_len(max.iter)) { - # update every beta (in random order) - for (j in sample.int(length(betas))) { + # update every beta + for (j in seq_along(betas)) { FxB_j <- mlm(F, betas[-j], modes[-j]) FxSB_j <- mlm(FxB_j, Sigmas[-j], modes[-j]) betas[[j]] <- Omegas[[j]] %*% t(solve(mcrossprod(FxSB_j, FxB_j, j), mcrossprod(FxB_j, X, j)))