Compare commits
3 Commits
203028e255
...
36dd08c7c9
Author | SHA1 | Date |
---|---|---|
Daniel Kapla | 36dd08c7c9 | |
Daniel Kapla | b8064c90ef | |
Daniel Kapla | d95500c56e |
|
@ -66,6 +66,15 @@
|
||||||
\newcommand{\pinv}[1]{{{#1}^{\dagger}}} % `Moore-Penrose pseudoinverse`
|
\newcommand{\pinv}[1]{{{#1}^{\dagger}}} % `Moore-Penrose pseudoinverse`
|
||||||
\newcommand{\todo}[1]{{\color{red}TODO: #1}}
|
\newcommand{\todo}[1]{{\color{red}TODO: #1}}
|
||||||
|
|
||||||
|
% \DeclareFontFamily{U}{mathx}{\hyphenchar\font45}
|
||||||
|
% \DeclareFontShape{U}{mathx}{m}{n}{
|
||||||
|
% <5> <6> <7> <8> <9> <10>
|
||||||
|
% <10.95> <12> <14.4> <17.28> <20.74> <24.88>
|
||||||
|
% mathx10
|
||||||
|
% }{}
|
||||||
|
% \DeclareSymbolFont{mathx}{U}{mathx}{m}{n}
|
||||||
|
% \DeclareMathSymbol{\bigtimes}{1}{mathx}{"91}
|
||||||
|
|
||||||
\begin{document}
|
\begin{document}
|
||||||
|
|
||||||
\maketitle
|
\maketitle
|
||||||
|
@ -73,6 +82,35 @@
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
%%% Introduction %%%
|
%%% Introduction %%%
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
\section{Notation}
|
||||||
|
We start with a brief summary of the used notation.
|
||||||
|
|
||||||
|
\todo{write this}
|
||||||
|
|
||||||
|
Let $\ten{A}$ be a order (rank) $r$ tensor of dimensions $p_1\times ... \times p_r$ and the matrices $\mat{B}_i$ of dimensions $q_i\times p_i$ for $i = 1, ..., r$, then
|
||||||
|
\begin{displaymath}
|
||||||
|
\ten{A} \ttm[1] \mat{B}_1 \ttm[2] \ldots \ttm[r] \mat{B}_r
|
||||||
|
= \ten{A}\times\{ \mat{B}_1, ..., \mat{B}_r \}
|
||||||
|
= \ten{A}\times_{i\in[r]} \mat{B}_i
|
||||||
|
= (\ten{A}\times_{i\in[r]\backslash j} \mat{B}_i)\ttm[j]\mat{B}_j
|
||||||
|
\end{displaymath}
|
||||||
|
As an alternative example consider
|
||||||
|
\begin{displaymath}
|
||||||
|
\ten{A}\times_2\mat{B}_2\times_3\mat{B}_3 = \ten{A}\times\{ \mat{I}, \mat{B}_2, \mat{B}_3 \} = \ten{A}\times_{i\in\{2, 3\}}\mat{B}_i
|
||||||
|
\end{displaymath}
|
||||||
|
Another example
|
||||||
|
\begin{displaymath}
|
||||||
|
\mat{B}\mat{A}\t{\mat{C}} = \mat{A}\times_1\mat{B}\times_2\mat{C}
|
||||||
|
= \mat{A}\times\{\mat{B}, \mat{C}\}
|
||||||
|
\end{displaymath}
|
||||||
|
|
||||||
|
\begin{displaymath}
|
||||||
|
(\ten{A}\ttm[i]\mat{B})_{(i)} = \mat{B}\ten{A}_{(i)}
|
||||||
|
\end{displaymath}
|
||||||
|
|
||||||
|
\todo{continue}
|
||||||
|
|
||||||
|
|
||||||
\section{Introduction}
|
\section{Introduction}
|
||||||
We assume the model
|
We assume the model
|
||||||
\begin{displaymath}
|
\begin{displaymath}
|
||||||
|
@ -415,15 +453,45 @@ This leads to the following form of the differential of $\tilde{l}$ given by
|
||||||
and therefore the gradients
|
and therefore the gradients
|
||||||
\begin{align*}
|
\begin{align*}
|
||||||
\nabla_{\mat{\alpha}}\tilde{l}(\mat{\alpha}, \mat{\beta}) &= \sum_{i = 1}^n \t{\mat{G}_i}\mat{\beta}\mat{f}_{y_i}
|
\nabla_{\mat{\alpha}}\tilde{l}(\mat{\alpha}, \mat{\beta}) &= \sum_{i = 1}^n \t{\mat{G}_i}\mat{\beta}\mat{f}_{y_i}
|
||||||
= \ten{G}_{(3)}\t{(\ten{F}\ttm[2]\beta)_{(3)}}, \\
|
= \ten{G}_{(3)}\t{(\ten{F}\ttm[2]\mat{\beta})_{(3)}}, \\
|
||||||
\nabla_{\mat{\beta}} \tilde{l}(\mat{\alpha}, \mat{\beta}) &= \sum_{i = 1}^n \mat{G}_i\mat{\alpha}\t{\mat{f}_{y_i}}
|
\nabla_{\mat{\beta}} \tilde{l}(\mat{\alpha}, \mat{\beta}) &= \sum_{i = 1}^n \mat{G}_i\mat{\alpha}\t{\mat{f}_{y_i}}
|
||||||
= \ten{G}_{(2)}\t{(\ten{F}\ttm[3]\beta)_{(2)}}.
|
= \ten{G}_{(2)}\t{(\ten{F}\ttm[3]\mat{\alpha})_{(2)}}.
|
||||||
\end{align*}
|
\end{align*}
|
||||||
|
|
||||||
\todo{check the tensor version of the gradient!!!}
|
\todo{check the tensor version of the gradient!!!}
|
||||||
|
|
||||||
\newpage
|
\newpage
|
||||||
|
|
||||||
|
\section{Thoughts on initial value estimation}
|
||||||
|
\todo{This section uses an alternative notation as it already tries to generalize to general multi-dimensional arrays. Furthermore, one of the main differences is that the observation are indexed in the \emph{last} mode. The benefit of this is that the mode product and parameter matrix indices match not only in the population model but also in sample versions.}
|
||||||
|
Let $\ten{X}, \ten{F}$ be order (rank) $r$ tensors of dimensions $p_1\times ... \times p_r$ and $q_1\times ... \times q_r$, respectively. Also denote the error tensor $\epsilon$ of the same order and dimensions as $\ten{X}$. The considered model for the $i$'th observation is
|
||||||
|
\begin{displaymath}
|
||||||
|
\ten{X}_i = \ten{\mu} + \ten{F}_i\times\{ \mat{\alpha}_1, ..., \mat{\alpha}_r \} + \ten{\epsilon}_i
|
||||||
|
\end{displaymath}
|
||||||
|
where we assume $\ten{\epsilon}_i$ to be i.i.d. mean zero tensor normal distributed $\ten{\epsilon}\sim\mathcal{TM}(0, \mat{\Delta}_1, ..., \mat{\Delta}_r)$ for $\mat{\Delta}_j\in\mathcal{S}^{p_j}_{++}$, $j = 1, ..., r$. Given $i = 1, ..., n$ observations the collected model containing all observations
|
||||||
|
\begin{displaymath}
|
||||||
|
\ten{X} = \ten{\mu} + \ten{F}\times\{ \mat{\alpha}_1, ..., \mat{\alpha}_r, \mat{I}_n \} + \ten{\epsilon}
|
||||||
|
\end{displaymath}
|
||||||
|
which is almost identical as the observations $\ten{X}_i, \ten{F}_i$ are stacked on an addition $r + 1$ mode leading to response, predictor and error tensors $\ten{X}, \ten{F}$ of order (rank) $r + 1$ and dimensions $p_1\times...\times p_r\times n$ for $\ten{X}, \ten{\epsilon}$ and $q_1\times...\times q_r\times n$ for $\ten{F}$.
|
||||||
|
|
||||||
|
In the following we assume w.l.o.g that $\ten{\mu} = 0$, as if this is not true we simply replace $\ten{X}_i$ with $\ten{X}_i - \ten{\mu}$ for $i = 1, ..., n$ before collecting all the observations in the response tensor $\ten{X}$.
|
||||||
|
|
||||||
|
The goal here is to find reasonable estimates for $\mat{\alpha}_i$, $i = 1, ..., n$ for the mean model
|
||||||
|
\begin{displaymath}
|
||||||
|
\E \ten{X}|\ten{F}, \mat{\alpha}_1, ..., \mat{\alpha}_r = \ten{F}\times\{\mat{\alpha}_1, ..., \mat{\alpha}_r, \mat{I}_n\}
|
||||||
|
= \ten{F}\times_{i\in[r]}\mat{\alpha}_i.
|
||||||
|
\end{displaymath}
|
||||||
|
Under the mean model we have using the general mode product relation $(\ten{A}\times_j\mat{B})_{(j)} = \mat{B}\ten{A}_{(j)}$ we get
|
||||||
|
\begin{align*}
|
||||||
|
\ten{X}_{(j)}\t{\ten{X}_{(j)}} \overset{\text{SVD}}{=} \mat{U}_j\mat{D}_j\t{\mat{U}_j}
|
||||||
|
= \mat{\alpha}_j(\ten{F}\times_{i\in[r]\backslash j}\mat{\alpha}_i)_{(j)}
|
||||||
|
\t{(\ten{F}\times_{i\in[r]\backslash j}\mat{\alpha}_i)_{(j)}}\t{\mat{\alpha}_j}
|
||||||
|
\end{align*}
|
||||||
|
for the $j = 1, ..., r$ modes. Using this relation we construct an iterative estimation process by setting the initial estimates of $\hat{\mat{\alpha}}_j^{(0)} = \mat{U}_j[, 1:q_j]$ which are the first $q_j$ columns of $\mat{U}_j$.
|
||||||
|
|
||||||
|
\todo{continue}
|
||||||
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
%%% Numerical Examples %%%
|
%%% Numerical Examples %%%
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
@ -596,6 +664,31 @@ if and only if $\vec\mat{X}\sim\mathcal{N}_{p q}(\vec\mat\mu, \mat\Delta_1\otime
|
||||||
f(\mat{X}) = \frac{1}{(2\pi)^{p q / 2}|\mat\Delta_1|^{p / 2}|\mat\Delta_2|^{q / 2}}\exp\left(-\frac{1}{2}\tr(\mat\Delta_1^{-1}\t{(\mat X - \mat \mu)}\mat\Delta_2^{-1}(\mat X - \mat \mu))\right).
|
f(\mat{X}) = \frac{1}{(2\pi)^{p q / 2}|\mat\Delta_1|^{p / 2}|\mat\Delta_2|^{q / 2}}\exp\left(-\frac{1}{2}\tr(\mat\Delta_1^{-1}\t{(\mat X - \mat \mu)}\mat\Delta_2^{-1}(\mat X - \mat \mu))\right).
|
||||||
\end{displaymath}
|
\end{displaymath}
|
||||||
|
|
||||||
|
\section{Sampling form a Multi-Array Normal Distribution}
|
||||||
|
Let $\ten{X}$ be an order (rank) $r$ Multi-Array random variable of dimensions $p_1\times...\times p_r$ following a Multi-Array (or Tensor) Normal distributed
|
||||||
|
\begin{displaymath}
|
||||||
|
\ten{X}\sim\mathcal{TN}(\mu, \mat{\Delta}_1, ..., \mat{\Delta}_r).
|
||||||
|
\end{displaymath}
|
||||||
|
Its density is given by
|
||||||
|
\begin{displaymath}
|
||||||
|
f(\ten{X}) = \Big( \prod_{i = 1}^r \sqrt{(2\pi)^{p_i}|\mat{\Delta}_i|^{q_i}} \Big)^{-1}
|
||||||
|
\exp\!\left( -\frac{1}{2}\langle \ten{X} - \mu, (\ten{X} - \mu)\times\{\mat{\Delta}_1^{-1}, ..., \mat{\Delta}_r^{-1}\} \rangle \right)
|
||||||
|
\end{displaymath}
|
||||||
|
with $q_i = \prod_{j \neq i}p_j$. This is equivalent to the vectorized $\vec\ten{X}$ following a Multi-Variate Normal distribution
|
||||||
|
\begin{displaymath}
|
||||||
|
\vec{\ten{X}}\sim\mathcal{N}_{p}(\vec{\mu}, \mat{\Delta}_r\otimes...\otimes\mat{\Delta}_1)
|
||||||
|
\end{displaymath}
|
||||||
|
with $p = \prod_{i = 1}^r p_i$.
|
||||||
|
|
||||||
|
When sampling from the Multi-Array Normal one way is to sample from the Multi-Variate Normal and then reshaping the result, but this is usually very inefficient because it requires to store the multi-variate covariance matrix which is very big. Instead, it is more efficient to sample $\ten{Z}$ as a tensor of the same shape as $\ten{X}$ with standard normal entries and then transform the $\ten{Z}$ to follow the Multi-Array Normal as follows
|
||||||
|
\begin{displaymath}
|
||||||
|
\ten{Z}\sim\mathcal{TN}(0, \mat{I}_{p_1}, ..., \mat{I}_{p_r})
|
||||||
|
\quad\Rightarrow\quad
|
||||||
|
\ten{X} = \ten{Z}\times\{\mat{\Delta}_1^{1/2}, ..., \mat{\Delta}_r^{1/2}\} + \mu\sim\mathcal{TN}(\mu, \mat{\Delta}_1, ..., \mat{\Delta}_r).
|
||||||
|
\end{displaymath}
|
||||||
|
where the sampling from the standard Multi-Array Normal is done by sampling all of the elements of $\ten{Z}$ from a standard Normal.
|
||||||
|
|
||||||
|
\todo{Check this!!!}
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
%%% Reference Summaries %%%
|
%%% Reference Summaries %%%
|
||||||
|
|
|
@ -2,6 +2,16 @@ library(tensorPredictors)
|
||||||
library(dplyr)
|
library(dplyr)
|
||||||
library(ggplot2)
|
library(ggplot2)
|
||||||
|
|
||||||
|
## Logger callbacks
|
||||||
|
log.prog <- function(max.iter) {
|
||||||
|
function(iter, loss, alpha, beta, ...) {
|
||||||
|
select <- as.integer(seq(1, max.iter, len = 30) <= iter)
|
||||||
|
cat("\r[", paste(c(" ", "=")[1 + select], collapse = ""),
|
||||||
|
"] ", iter, "/", max.iter, sep = "")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
### Exec all methods for a given data set and collect logs ###
|
### Exec all methods for a given data set and collect logs ###
|
||||||
sim <- function(X, Fy, shape, alpha.true, beta.true, max.iter = 500L) {
|
sim <- function(X, Fy, shape, alpha.true, beta.true, max.iter = 500L) {
|
||||||
|
|
||||||
|
@ -20,9 +30,8 @@ sim <- function(X, Fy, shape, alpha.true, beta.true, max.iter = 500L) {
|
||||||
)
|
)
|
||||||
|
|
||||||
cat(sprintf(
|
cat(sprintf(
|
||||||
"%3d | l = %-12.4f - dist = %-.4e - alpha(%d, %d) = %-.4e - beta(%d, %d) = %-.4e\n",
|
"%s(%3d) | l = %-12.4f - dist = %-.4e - alpha(%d, %d) = %-.4e - beta(%d, %d) = %-.4e\n",
|
||||||
iter, loss,
|
name, iter, loss, dist,
|
||||||
dist,
|
|
||||||
nrow(alpha), ncol(alpha), dist.alpha,
|
nrow(alpha), ncol(alpha), dist.alpha,
|
||||||
nrow(beta), ncol(beta), dist.beta
|
nrow(beta), ncol(beta), dist.beta
|
||||||
))
|
))
|
||||||
|
@ -30,12 +39,11 @@ sim <- function(X, Fy, shape, alpha.true, beta.true, max.iter = 500L) {
|
||||||
}
|
}
|
||||||
|
|
||||||
# Initialize logger history targets
|
# Initialize logger history targets
|
||||||
hist.base <- hist.new <- hist.momentum <- # hist.kron <-
|
hist.base <- hist.new <- hist.momentum <- hist.approx <- # hist.kron <-
|
||||||
data.frame(iter = seq(0L, max.iter),
|
data.frame(iter = seq(0L, max.iter),
|
||||||
loss = NA, dist = NA, dist.alpha = NA, dist.beta = NA,
|
loss = NA, dist = NA, dist.alpha = NA, dist.beta = NA,
|
||||||
norm.alpha = NA, norm.beta = NA
|
norm.alpha = NA, norm.beta = NA
|
||||||
)
|
)
|
||||||
hist.kron <- NULL # TODO: fit kron version
|
|
||||||
|
|
||||||
# Base (old)
|
# Base (old)
|
||||||
kpir.base(X, Fy, shape, max.iter = max.iter, logger = logger("base"))
|
kpir.base(X, Fy, shape, max.iter = max.iter, logger = logger("base"))
|
||||||
|
@ -49,16 +57,54 @@ sim <- function(X, Fy, shape, alpha.true, beta.true, max.iter = 500L) {
|
||||||
# # Residual Covariance Kronecker product assumpton version
|
# # Residual Covariance Kronecker product assumpton version
|
||||||
# kpir.kron(X, Fy, shape, max.iter = max.iter, logger = logger("kron"))
|
# kpir.kron(X, Fy, shape, max.iter = max.iter, logger = logger("kron"))
|
||||||
|
|
||||||
|
# Approximated MLE with Nesterov Momentum
|
||||||
|
kpir.approx(X, Fy, shape, max.iter = max.iter, logger = logger("approx"))
|
||||||
|
|
||||||
# Add method tags
|
# Add method tags
|
||||||
hist.base$type <- factor("base")
|
hist.base$method <- factor("base")
|
||||||
hist.new$type <- factor("new")
|
hist.new$method <- factor("new")
|
||||||
hist.momentum$type <- factor("momentum")
|
hist.momentum$method <- factor("momentum")
|
||||||
# hist.kron$type <- factor("kron")
|
# hist.kron$method <- factor("kron")
|
||||||
|
hist.approx$method <- factor("approx")
|
||||||
|
|
||||||
# Combine results and return
|
# Combine results and return
|
||||||
rbind(hist.base, hist.new, hist.momentum, hist.kron)
|
rbind(hist.base, hist.new, hist.momentum, hist.approx) #, hist.kron
|
||||||
}
|
}
|
||||||
|
|
||||||
|
## Plot helper function
|
||||||
|
plot.hist2 <- function(hist, response, type = "all", ...) {
|
||||||
|
# Extract final results from history
|
||||||
|
sub <- na.omit(hist[c("iter", response, "method", "repetition")])
|
||||||
|
sub <- aggregate(sub, list(sub$method, sub$repetition), tail, 1)
|
||||||
|
|
||||||
|
# Setup ggplot
|
||||||
|
p <- ggplot(hist, aes_(x = quote(iter),
|
||||||
|
y = as.name(response),
|
||||||
|
color = quote(method),
|
||||||
|
group = quote(interaction(method, repetition))))
|
||||||
|
# Add requested layers
|
||||||
|
if (type == "all") {
|
||||||
|
p <- p + geom_line(na.rm = TRUE)
|
||||||
|
p <- p + geom_point(data = sub)
|
||||||
|
} else if (type == "mean") {
|
||||||
|
p <- p + geom_line(alpha = 0.5, na.rm = TRUE, linetype = "dotted")
|
||||||
|
p <- p + geom_point(data = sub, alpha = 0.5)
|
||||||
|
p <- p + geom_line(aes(group = method),
|
||||||
|
stat = "summary", fun = "mean", na.rm = TRUE)
|
||||||
|
} else if (type == "median") {
|
||||||
|
p <- p + geom_line(alpha = 0.5, na.rm = TRUE, linetype = "dotted")
|
||||||
|
p <- p + geom_point(data = sub, alpha = 0.5)
|
||||||
|
p <- p + geom_line(aes(group = method),
|
||||||
|
stat = "summary", fun = "median", na.rm = TRUE)
|
||||||
|
}
|
||||||
|
# return with theme and annotations
|
||||||
|
p + labs(...) + theme(legend.position = "bottom")
|
||||||
|
}
|
||||||
|
|
||||||
|
################################################################################
|
||||||
|
### Sim 1 / vec(X) has AR(0.5) Covariance ###
|
||||||
|
################################################################################
|
||||||
|
|
||||||
## Generate some test data / DEBUG
|
## Generate some test data / DEBUG
|
||||||
n <- 200 # Sample Size
|
n <- 200 # Sample Size
|
||||||
p <- sample(1:15, 1) # 11
|
p <- sample(1:15, 1) # 11
|
||||||
|
@ -68,7 +114,10 @@ r <- sample(1:15, 1) # 5
|
||||||
print(c(n, p, q, k, r))
|
print(c(n, p, q, k, r))
|
||||||
|
|
||||||
hist <- NULL
|
hist <- NULL
|
||||||
for (rep in 1:20) {
|
reps <- 20
|
||||||
|
for (rep in 1:reps) {
|
||||||
|
cat(sprintf("%4d / %d simulation rep. started\n", rep, reps))
|
||||||
|
|
||||||
alpha.true <- alpha <- matrix(rnorm(q * r), q, r)
|
alpha.true <- alpha <- matrix(rnorm(q * r), q, r)
|
||||||
beta.true <- beta <- matrix(rnorm(p * k), p, k)
|
beta.true <- beta <- matrix(rnorm(p * k), p, k)
|
||||||
y <- rnorm(n)
|
y <- rnorm(n)
|
||||||
|
@ -87,122 +136,479 @@ for (rep in 1:20) {
|
||||||
hist <- rbind(hist, hist.sim)
|
hist <- rbind(hist, hist.sim)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Save simulation results
|
||||||
|
datetime <- format(Sys.time(), "%Y%m%dT%H%M")
|
||||||
|
saveRDS(hist, file = sprintf("AR_%s.rds", datetime))
|
||||||
|
|
||||||
saveRDS(hist, file = "AR.rds")
|
# for GGPlot2, as factors for grouping
|
||||||
hist$repetition <- factor(hist$repetition)
|
hist$repetition <- factor(hist$repetition)
|
||||||
|
|
||||||
ggplot(hist, aes(x = iter, color = type, group = interaction(type, repetition))) +
|
|
||||||
geom_line(aes(y = loss)) +
|
|
||||||
geom_point(data = with(sub <- subset(hist, !is.na(loss)),
|
|
||||||
aggregate(sub, list(type, repetition), tail, 1)
|
|
||||||
), aes(y = loss)) +
|
|
||||||
labs(
|
|
||||||
title = bquote(paste("Optimization Objective: negative log-likelihood ",
|
|
||||||
l(hat(alpha), hat(beta)))),
|
|
||||||
subtitle = bquote(paste(Delta[i][j] == 0.25, " * ", 0.5^abs(i - j), ", ",
|
|
||||||
"20 repetitions, ", n == .(n), ", ",
|
|
||||||
p == .(p), ", ", q == .(q), ", ", k == .(k), ", ", r == .(r))),
|
|
||||||
x = "nr. of iterations",
|
|
||||||
y = expression(l(hat(alpha), hat(beta))),
|
|
||||||
color = "method"
|
|
||||||
) +
|
|
||||||
theme(legend.position = "bottom")
|
|
||||||
|
|
||||||
dev.print(png, file = "sim01_loss.png", width = 768, height = 768, res = 125)
|
# Save simulation results
|
||||||
|
sim.name <- "sim01"
|
||||||
|
datetime <- format(Sys.time(), "%Y%m%dT%H%M")
|
||||||
|
saveRDS(hist, file = sprintf("%s_%s.rds", sim.name, datetime))
|
||||||
|
|
||||||
|
# for GGPlot2, as factors for grouping
|
||||||
|
hist$repetition <- factor(hist$repetition)
|
||||||
|
|
||||||
|
for (response in c("loss", "dist", "dist.alpha", "dist.beta")) {
|
||||||
|
for (fun in c("all", "mean", "median")) {
|
||||||
|
print(plot.hist2(hist, response, fun, title = fun) + coord_trans(x = "log1p"))
|
||||||
|
dev.print(png, file = sprintf("%s_%s_%s_%s.png", sim.name, datetime, response, fun),
|
||||||
|
width = 768, height = 768, res = 125)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
ggplot(hist, aes(x = iter, color = type, group = interaction(type, repetition))) +
|
################################################################################
|
||||||
geom_line(aes(y = dist)) +
|
### Sim 2 / X has AR(0.707) %x% AR(0.707) Covariance ###
|
||||||
geom_point(data = with(sub <- subset(hist, !is.na(dist)),
|
################################################################################
|
||||||
aggregate(sub, list(type, repetition), tail, 1)
|
|
||||||
), aes(y = dist)) +
|
|
||||||
labs(
|
|
||||||
title = bquote(paste("Distance of estimate ", hat(B), " to true ", B == alpha %*% beta)),
|
|
||||||
subtitle = bquote(paste(Delta[i][j] == 0.25, " * ", 0.5^abs(i - j), ", ",
|
|
||||||
"20 repetitions, ", n == .(n), ", ",
|
|
||||||
p == .(p), ", ", q == .(q), ", ", k == .(k), ", ", r == .(r))),
|
|
||||||
x = "nr. of iterations",
|
|
||||||
y = expression(abs(B * B^T - hat(B) * hat(B)^T)),
|
|
||||||
color = "method"
|
|
||||||
) +
|
|
||||||
theme(legend.position = "bottom")
|
|
||||||
|
|
||||||
dev.print(png, file = "sim01_dist.png", width = 768, height = 768, res = 125)
|
n <- 200 # Sample Size
|
||||||
|
p <- 11 # sample(1:15, 1)
|
||||||
|
q <- 3 # sample(1:15, 1)
|
||||||
|
k <- 7 # sample(1:15, 1)
|
||||||
|
r <- 5 # sample(1:15, 1)
|
||||||
|
print(c(n, p, q, k, r))
|
||||||
|
|
||||||
|
hist <- NULL
|
||||||
|
reps <- 20
|
||||||
|
|
||||||
|
Delta.1 <- sqrt(0.5)^abs(outer(seq_len(q), seq_len(q), `-`))
|
||||||
|
Delta.2 <- sqrt(0.5)^abs(outer(seq_len(p), seq_len(p), `-`))
|
||||||
|
Delta <- kronecker(Delta.1, Delta.2)
|
||||||
|
for (rep in 1:reps) {
|
||||||
|
cat(sprintf("%4d / %d simulation rep. started\n", rep, reps))
|
||||||
|
|
||||||
|
alpha.true <- alpha <- matrix(rnorm(q * r), q, r)
|
||||||
|
beta.true <- beta <- matrix(rnorm(p * k), p, k)
|
||||||
|
y <- rnorm(n)
|
||||||
|
Fy <- do.call(cbind, Map(function(slope, offset) {
|
||||||
|
sin(slope * y + offset)
|
||||||
|
},
|
||||||
|
head(rep(seq(1, ceiling(0.5 * k * r)), each = 2), k * r),
|
||||||
|
head(rep(c(0, pi / 2), ceiling(0.5 * k * r)), k * r)
|
||||||
|
))
|
||||||
|
X <- tcrossprod(Fy, kronecker(alpha, beta)) + CVarE:::rmvnorm(n, sigma = Delta)
|
||||||
|
|
||||||
|
hist.sim <- sim(X, Fy, shape = c(p, q, k, r), alpha.true, beta.true)
|
||||||
|
hist.sim$repetition <- rep
|
||||||
|
|
||||||
|
hist <- rbind(hist, hist.sim)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
ggplot(hist, aes(x = iter, color = type, group = interaction(type, repetition))) +
|
# Save simulation results
|
||||||
geom_line(aes(y = dist.alpha)) +
|
sim.name <- "sim02"
|
||||||
geom_point(data = with(sub <- subset(hist, !is.na(dist.alpha)),
|
datetime <- format(Sys.time(), "%Y%m%dT%H%M")
|
||||||
aggregate(sub, list(type, repetition), tail, 1)
|
saveRDS(hist, file = sprintf("%s_%s.rds", sim.name, datetime))
|
||||||
), aes(y = dist.alpha)) +
|
|
||||||
labs(
|
# for GGPlot2, as factors for grouping
|
||||||
title = bquote(paste("Distance of estimate ", hat(alpha), " to true ", alpha)),
|
hist$repetition <- factor(hist$repetition)
|
||||||
subtitle = bquote(paste(Delta[i][j] == 0.25, " * ", 0.5^abs(i - j), ", ",
|
|
||||||
"20 repetitions, ", n == .(n), ", ",
|
for (response in c("loss", "dist", "dist.alpha", "dist.beta")) {
|
||||||
p == .(p), ", ", q == .(q), ", ", k == .(k), ", ", r == .(r))),
|
for (fun in c("all", "mean", "median")) {
|
||||||
x = "nr. of iterations",
|
print(plot.hist2(hist, response, fun, title = fun) + coord_trans(x = "log1p"))
|
||||||
y = expression(abs(alpha * alpha^T - hat(alpha) * hat(alpha)^T)),
|
dev.print(png, file = sprintf("%s_%s_%s_%s.png", sim.name, datetime, response, fun),
|
||||||
color = "method"
|
width = 768, height = 768, res = 125)
|
||||||
) +
|
}
|
||||||
theme(legend.position = "bottom")
|
}
|
||||||
|
|
||||||
|
################################################################################
|
||||||
|
### WIP ###
|
||||||
|
################################################################################
|
||||||
|
n <- 200 # Sample Size
|
||||||
|
p <- 11 # sample(1:15, 1)
|
||||||
|
q <- 3 # sample(1:15, 1)
|
||||||
|
k <- 7 # sample(1:15, 1)
|
||||||
|
r <- 5 # sample(1:15, 1)
|
||||||
|
print(c(n, p, q, k, r))
|
||||||
|
|
||||||
|
alpha.true <- alpha <- matrix(rnorm(q * r), q, r)
|
||||||
|
beta.true <- beta <- matrix(rnorm(p * k), p, k)
|
||||||
|
y <- rnorm(n)
|
||||||
|
Fy <- do.call(cbind, Map(function(slope, offset) {
|
||||||
|
sin(slope * y + offset)
|
||||||
|
},
|
||||||
|
head(rep(seq(1, ceiling(0.5 * k * r)), each = 2), k * r),
|
||||||
|
head(rep(c(0, pi / 2), ceiling(0.5 * k * r)), k * r)
|
||||||
|
))
|
||||||
|
X <- tcrossprod(Fy, kronecker(alpha, beta)) + CVarE:::rmvnorm(n, sigma = Delta)
|
||||||
|
|
||||||
|
Delta.1 <- sqrt(0.5)^abs(outer(seq_len(q), seq_len(q), `-`))
|
||||||
|
Delta.2 <- sqrt(0.5)^abs(outer(seq_len(p), seq_len(p), `-`))
|
||||||
|
Delta <- kronecker(Delta.1, Delta.2)
|
||||||
|
|
||||||
|
shape <- c(p, q, k, r)
|
||||||
|
|
||||||
|
# Base (old)
|
||||||
|
Rprof(fit.base <- kpir.base(X, Fy, shape, max.iter = 500, logger = prog(500)))
|
||||||
|
|
||||||
|
# New (simple Gradient Descent)
|
||||||
|
Rprof(fit.new <- kpir.new(X, Fy, shape, max.iter = 500, logger = prog(500)))
|
||||||
|
|
||||||
|
# Gradient Descent with Nesterov Momentum
|
||||||
|
Rprof(fit.momentum <- kpir.momentum(X, Fy, shape, max.iter = 500, logger = prog(500)))
|
||||||
|
|
||||||
|
# # Residual Covariance Kronecker product assumpton version
|
||||||
|
# Rprof(fit.kron <- kpir.kron(X, Fy, shape, max.iter = 500, logger = prog(500)))
|
||||||
|
|
||||||
|
# Approximated MLE with Nesterov Momentum
|
||||||
|
Rprof("kpir.approx.Rprof")
|
||||||
|
fit.approx <- kpir.approx(X, Fy, shape, max.iter = 500, logger = prog(500))
|
||||||
|
summaryRprof("kpir.approx.Rprof")
|
||||||
|
|
||||||
|
par(mfrow = c(2, 2))
|
||||||
|
matrixImage(Delta, main = expression(Delta))
|
||||||
|
matrixImage(fit.base$Delta, main = expression(hat(Delta)), sub = "base")
|
||||||
|
matrixImage(fit.momentum$Delta, main = expression(hat(Delta)), sub = "momentum")
|
||||||
|
matrixImage(kronecker(fit.approx$Delta.1, fit.approx$Delta.2), main = expression(hat(Delta)), sub = "approx")
|
||||||
|
|
||||||
|
par(mfrow = c(2, 2))
|
||||||
|
matrixImage(Delta.1, main = expression(Delta[1]))
|
||||||
|
matrixImage(fit.approx$Delta.1, main = expression(hat(Delta)[1]), sub = "approx")
|
||||||
|
matrixImage(Delta.2, main = expression(Delta[2]))
|
||||||
|
matrixImage(fit.approx$Delta.2, main = expression(hat(Delta)[2]), sub = "approx")
|
||||||
|
|
||||||
|
par(mfrow = c(2, 2))
|
||||||
|
matrixImage(alpha.true, main = expression(alpha))
|
||||||
|
matrixImage(fit.base$alpha, main = expression(hat(alpha)), sub = "base")
|
||||||
|
matrixImage(fit.momentum$alpha, main = expression(hat(alpha)), sub = "momentum")
|
||||||
|
matrixImage(fit.approx$alpha, main = expression(hat(alpha)), sub = "approx")
|
||||||
|
|
||||||
|
par(mfrow = c(2, 2))
|
||||||
|
matrixImage(beta.true, main = expression(beta))
|
||||||
|
matrixImage(fit.base$beta, main = expression(hat(beta)), sub = "base")
|
||||||
|
matrixImage(fit.momentum$beta, main = expression(hat(beta)), sub = "momentum")
|
||||||
|
matrixImage(fit.approx$beta, main = expression(hat(beta)), sub = "approx")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
################################################################################
|
||||||
|
### EEG ###
|
||||||
|
################################################################################
|
||||||
|
|
||||||
|
suppressPackageStartupMessages({
|
||||||
|
library(pROC)
|
||||||
|
})
|
||||||
|
|
||||||
|
# acc: Accuracy. P(Yhat = Y). Estimated as: (TP+TN)/(P+N).
|
||||||
|
acc <- function(y_true, y_pred) mean(round(y_pred) == y_true)
|
||||||
|
# err: Error rate. P(Yhat != Y). Estimated as: (FP+FN)/(P+N).
|
||||||
|
err <- function(y_true, y_pred) mean(round(y_pred) != y_true)
|
||||||
|
# fpr: False positive rate. P(Yhat = + | Y = -). aliases: Fallout.
|
||||||
|
fpr <- function(y_true, y_pred) mean((round(y_pred) == 1)[y_true == 0])
|
||||||
|
# tpr: True positive rate. P(Yhat = + | Y = +). aliases: Sensitivity, Recall.
|
||||||
|
tpr <- function(y_true, y_pred) mean((round(y_pred) == 1)[y_true == 1])
|
||||||
|
# fnr: False negative rate. P(Yhat = - | Y = +). aliases: Miss.
|
||||||
|
fnr <- function(y_true, y_pred) mean((round(y_pred) == 0)[y_true == 1])
|
||||||
|
# tnr: True negative rate. P(Yhat = - | Y = -).
|
||||||
|
tnr <- function(y_true, y_pred) mean((round(y_pred) == 0)[y_true == 0])
|
||||||
|
|
||||||
|
# Load EEG dataset
|
||||||
|
dataset <- readRDS('eeg_analysis/eeg_data.rds')
|
||||||
|
|
||||||
|
eeg_cross_validation <- function(nrFolds = 10L) {
|
||||||
|
# Set dimenional parameters.
|
||||||
|
n <- nrow(dataset) # sample size (nr. of people)
|
||||||
|
p <- 64L # nr. of predictors (count of sensorce)
|
||||||
|
t <- 256L # nr. of time points (measurements)
|
||||||
|
|
||||||
|
# Extract dimension names from X.
|
||||||
|
nNames <- dataset$PersonID
|
||||||
|
tNames <- as.character(seq(t))
|
||||||
|
pNames <- unlist(strsplit(colnames(dataset)[2 + t * seq(p)], '_'))[c(T, F)]
|
||||||
|
|
||||||
|
# Split into X-y.
|
||||||
|
X <- as.matrix(dataset[, -(1:2)])
|
||||||
|
y <- dataset$Case_Control
|
||||||
|
# Reshape X as 3D tenros of shape (n, t, p) aka. samples, timesteps, predictors.
|
||||||
|
# (Each of the n rows in X iterate over the time bevore switching sensorce.)
|
||||||
|
dim(X) <- c(n, t, p)
|
||||||
|
dimnames(X) <- list(nNames, tNames, pNames)
|
||||||
|
|
||||||
|
# Setup Cross-Validation result
|
||||||
|
CV <- data.frame(
|
||||||
|
fold = (seq_len(n) %% nrFolds) + 1L,
|
||||||
|
y_true = y,
|
||||||
|
y_pred = NA
|
||||||
|
)
|
||||||
|
|
||||||
|
#
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#' @param ppc Number of "p"redictor "p"rincipal "c"omponents.
|
||||||
|
#' @param tpc Number of "t"ime "p"rincipal "c"omponents.
|
||||||
|
egg_analysis_reduced <- function(methods, ppc, tpc) {
|
||||||
|
# Set dimenional parameters.
|
||||||
|
n <- nrow(dataset) # sample size (nr. of people)
|
||||||
|
p <- 64L # nr. of predictors (count of sensorce)
|
||||||
|
t <- 256L # nr. of time points (measurements)
|
||||||
|
|
||||||
|
# Extract dimension names from X.
|
||||||
|
nNames <- dataset$PersonID
|
||||||
|
tNames <- as.character(seq(t))
|
||||||
|
pNames <- unlist(strsplit(colnames(dataset)[2 + t * seq(p)], '_'))[c(T, F)]
|
||||||
|
|
||||||
|
# Split into X-y.
|
||||||
|
X <- as.matrix(dataset[, -(1:2)])
|
||||||
|
y <- dataset$Case_Control
|
||||||
|
# Reshape X as 3D tenros of shape (n, t, p) aka. samples, timesteps, predictors.
|
||||||
|
# (Each of the n rows in X iterate over the time bevore switching sensorce.)
|
||||||
|
X <- array(X, dim = c(n, t, p),
|
||||||
|
dimnames = list(nNames, tNames, pNames))
|
||||||
|
# Reorder axis to (p, t, n) = (predictors, timesteps, samples).
|
||||||
|
X <- aperm(X, c(3, 2, 1))
|
||||||
|
|
||||||
|
# Compute Mean of X.
|
||||||
|
X_mean <- apply(X, c(1, 2), mean)
|
||||||
|
X_center <- X - as.vector(X_mean)
|
||||||
|
|
||||||
|
# Compute "left" and "right" cov-matrices.
|
||||||
|
Sigma_t <- matrix(apply(apply(X_center, 3, crossprod), 1, mean), t, t)
|
||||||
|
Sigma_p <- matrix(apply(apply(X_center, 3, tcrossprod), 1, mean), p, p)
|
||||||
|
# Get "left", "right" principal components.
|
||||||
|
V_p <- svd(Sigma_p, ppc, 0L)$u
|
||||||
|
V_t <- svd(Sigma_t, tpc, 0L)$u
|
||||||
|
|
||||||
|
# Reduce dimension.
|
||||||
|
X_reduced <- apply(X_center, 3, function(x) crossprod(V_p, x %*% V_t))
|
||||||
|
dim(X_reduced) <- c(ppc, tpc, n)
|
||||||
|
|
||||||
|
# Vectorize to shape of (predictors * timesteps, samples) and transpose to
|
||||||
|
# (samples, predictors * timesteps).
|
||||||
|
X_vec <- t(matrix(X_reduced, ppc * tpc, n))
|
||||||
|
|
||||||
|
loo.cv <- expand.grid(method = names(methods), fold = 1:n)
|
||||||
|
loo.cv$y_true <- y[loo.cv$fold]
|
||||||
|
loo.cv$y_pred <- NA
|
||||||
|
|
||||||
|
# Performe LOO cross-validation for each method.
|
||||||
|
for (i in 1L:n) {
|
||||||
|
# Print progress.
|
||||||
|
cat(sprintf("\rCross-Validation (p-PC: %d, t-PC: %d): %4d/%d",
|
||||||
|
ppc, tpc, i, n))
|
||||||
|
# Leave Out the i-th element.
|
||||||
|
X_train <- X_vec[-i, ]
|
||||||
|
X_test <- X_vec[i, ]
|
||||||
|
y_train <- y[-i]
|
||||||
|
# Center y.
|
||||||
|
y_train <- scale(y_train, center = TRUE, scale = FALSE)
|
||||||
|
|
||||||
|
# For each method.
|
||||||
|
for (method.name in names(methods)) {
|
||||||
|
method <- methods[[method.name]]
|
||||||
|
# Compute reduction using current method under common API.
|
||||||
|
sdr <- method(X_train, y_train, ppc, tpc)
|
||||||
|
B <- kronecker(sdr$alpha, sdr$beta)
|
||||||
|
# Fit a linear model (which ensures a common sdr direction if possible).
|
||||||
|
model <- glm(y ~ x, family = binomial(link = "logit"),
|
||||||
|
data = data.frame(y = y[-i], x = X_train %*% B))
|
||||||
|
# Predict out of sample and store in LOO CV data.frame.
|
||||||
|
y_pred <- predict(model, data.frame(x = X_test %*% B), type = "response")
|
||||||
|
loo.cv[loo.cv$method == method.name & loo.cv$fold == i, 'y_pred'] <- y_pred
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (method.name in names(methods)) {
|
||||||
|
labels <- loo.cv[loo.cv$method == method.name, 'y_true']
|
||||||
|
predictions <- loo.cv[loo.cv$method == method.name, 'y_pred']
|
||||||
|
ROC <- roc(unlist(labels), unlist(predictions), quiet = TRUE)
|
||||||
|
# Combined accuracy, error, ...
|
||||||
|
cat("\nMethod: ", method.name, "\n",
|
||||||
|
"acc: ", acc(unlist(labels), unlist(predictions)), "\n",
|
||||||
|
"err: ", err(unlist(labels), unlist(predictions)), "\n",
|
||||||
|
"fpr: ", fpr(unlist(labels), unlist(predictions)), "\n",
|
||||||
|
"tpr: ", tpr(unlist(labels), unlist(predictions)), "\n",
|
||||||
|
"fnr: ", fnr(unlist(labels), unlist(predictions)), "\n",
|
||||||
|
"tnr: ", tnr(unlist(labels), unlist(predictions)), "\n",
|
||||||
|
"auc: ", ROC$auc, "\n",
|
||||||
|
"auc sd: ", sqrt(var(ROC)), "\n",
|
||||||
|
sep = '')
|
||||||
|
}
|
||||||
|
|
||||||
|
loo.cv
|
||||||
|
}
|
||||||
|
|
||||||
|
methods <- list(
|
||||||
|
KPIR_LS = function(...) tensor_predictor(..., method = "KPIR_LS"),
|
||||||
|
KPIR_MLE = function(...) tensor_predictor(..., method = "KPIR_MLE"),
|
||||||
|
KPFC1 = function(...) tensor_predictor(..., method = "KPFC1"),
|
||||||
|
KPFC2 = function(...) tensor_predictor(..., method = "KPFC2"),
|
||||||
|
LSIR = LSIR
|
||||||
|
)
|
||||||
|
|
||||||
|
# ppc, tpc
|
||||||
|
# ------------
|
||||||
|
params <- list( c( 4, 3)
|
||||||
|
, c( 15, 15)
|
||||||
|
, c( 30, 20)
|
||||||
|
)
|
||||||
|
|
||||||
|
for (param in params) {
|
||||||
|
c(ppc, tpc) %<-% param
|
||||||
|
sim <- egg_analysis_reduced(methods, ppc, tpc)
|
||||||
|
|
||||||
|
attr(sim, 'param') <- c(ppc = ppc, tpc = tpc)
|
||||||
|
|
||||||
|
saveRDS(sim, file = sprintf('eeg_analysis_reduced_%d_%d.rds', ppc, tpc))
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# plot.hist(hist, "loss",
|
||||||
|
# title = bquote(paste("Optimization Objective: negative log-likelihood ",
|
||||||
|
# l(hat(alpha), hat(beta)))),
|
||||||
|
# subtitle = bquote(paste(Delta[i][j] == 0.25, " * ", 0.5^abs(i - j), ", ",
|
||||||
|
# "20 repetitions, ", n == .(n), ", ",
|
||||||
|
# p == .(p), ", ", q == .(q), ", ", k == .(k), ", ", r == .(r))),
|
||||||
|
# x = "nr. of iterations",
|
||||||
|
# y = expression(l(hat(alpha), hat(beta)))
|
||||||
|
# )
|
||||||
|
# plot.stats(hist, "loss",
|
||||||
|
# title = bquote(paste("Optimization Objective: negative log-likelihood ",
|
||||||
|
# l(hat(alpha), hat(beta)))),
|
||||||
|
# subtitle = bquote(paste(Delta[i][j] == 0.25, " * ", 0.5^abs(i - j), ", ",
|
||||||
|
# "20 repetitions, ", n == .(n), ", ",
|
||||||
|
# p == .(p), ", ", q == .(q), ", ", k == .(k), ", ", r == .(r))),
|
||||||
|
# x = "nr. of iterations",
|
||||||
|
# y = expression(l(hat(alpha), hat(beta)))
|
||||||
|
# )
|
||||||
|
|
||||||
|
|
||||||
|
# dev.print(png, file = sprintf("sim01_loss_stat_%s.png", datetime),
|
||||||
|
# width = 768, height = 768, res = 125)
|
||||||
|
|
||||||
|
|
||||||
|
# ggplot(hist, aes(x = iter, color = method, group = interaction(method, repetition))) +
|
||||||
|
# geom_line(aes(y = dist)) +
|
||||||
|
# geom_point(data = with(sub <- subset(hist, !is.na(dist)),
|
||||||
|
# aggregate(sub, list(method, repetition), tail, 1)
|
||||||
|
# ), aes(y = dist)) +
|
||||||
|
# labs(
|
||||||
|
# title = bquote(paste("Distance of estimate ", hat(B), " to true ", B == alpha %*% beta)),
|
||||||
|
# subtitle = bquote(paste(Delta[i][j] == 0.25, " * ", 0.5^abs(i - j), ", ",
|
||||||
|
# "20 repetitions, ", n == .(n), ", ",
|
||||||
|
# p == .(p), ", ", q == .(q), ", ", k == .(k), ", ", r == .(r))),
|
||||||
|
# x = "nr. of iterations",
|
||||||
|
# y = expression(abs(B * B^T - hat(B) * hat(B)^T)),
|
||||||
|
# color = "method"
|
||||||
|
# ) +
|
||||||
|
# theme(legend.position = "bottom")
|
||||||
|
|
||||||
|
# dev.print(png, file = sprintf("sim01_dist_%s.png", datetime),
|
||||||
|
# width = 768, height = 768, res = 125)
|
||||||
|
|
||||||
|
# ggplot(hist, aes(x = iter, y = dist, color = method, group = method)) +
|
||||||
|
# geom_ribbon(aes(color = NULL, fill = method), alpha = 0.2,
|
||||||
|
# stat = "summary", fun.min = "min", fun.max = "max", na.rm = TRUE) +
|
||||||
|
# geom_ribbon(aes(color = NULL, fill = method), alpha = 0.4,
|
||||||
|
# stat = "summary", fun.min = function(y) quantile(y, 0.25),
|
||||||
|
# fun.max = function(y) quantile(y, 0.75), na.rm = TRUE) +
|
||||||
|
# geom_line(stat = "summary", fun = "mean", na.rm = TRUE) +
|
||||||
|
# labs(
|
||||||
|
# title = bquote(paste("Distance of estimate ", hat(B), " to true ", B == alpha %*% beta)),
|
||||||
|
# subtitle = bquote(paste(Delta[i][j] == 0.25, " * ", 0.5^abs(i - j), ", ",
|
||||||
|
# "20 repetitions, ", n == .(n), ", ",
|
||||||
|
# p == .(p), ", ", q == .(q), ", ", k == .(k), ", ", r == .(r))),
|
||||||
|
# x = "nr. of iterations",
|
||||||
|
# y = expression(abs(B * B^T - hat(B) * hat(B)^T)),
|
||||||
|
# color = "method"
|
||||||
|
# ) +
|
||||||
|
# theme(legend.position = "bottom")
|
||||||
|
|
||||||
|
# dev.print(png, file = sprintf("sim01_dist_stat_%s.png", datetime),
|
||||||
|
# width = 768, height = 768, res = 125)
|
||||||
|
|
||||||
|
|
||||||
|
# ggplot(hist, aes(x = iter, color = method, group = interaction(method, repetition))) +
|
||||||
|
# geom_line(aes(y = dist.alpha)) +
|
||||||
|
# geom_point(data = with(sub <- subset(hist, !is.na(dist.alpha)),
|
||||||
|
# aggregate(sub, list(method, repetition), tail, 1)
|
||||||
|
# ), aes(y = dist.alpha)) +
|
||||||
|
# labs(
|
||||||
|
# title = bquote(paste("Distance of estimate ", hat(alpha), " to true ", alpha)),
|
||||||
|
# subtitle = bquote(paste(Delta[i][j] == 0.25, " * ", 0.5^abs(i - j), ", ",
|
||||||
|
# "20 repetitions, ", n == .(n), ", ",
|
||||||
|
# p == .(p), ", ", q == .(q), ", ", k == .(k), ", ", r == .(r))),
|
||||||
|
# x = "nr. of iterations",
|
||||||
|
# y = expression(abs(alpha * alpha^T - hat(alpha) * hat(alpha)^T)),
|
||||||
|
# color = "method"
|
||||||
|
# ) +
|
||||||
|
# theme(legend.position = "bottom")
|
||||||
|
|
||||||
|
# dev.print(png, file = sprintf("sim01_dist_alpha_%s.png", datetime),
|
||||||
|
# width = 768, height = 768, res = 125)
|
||||||
|
|
||||||
|
|
||||||
|
# ggplot(hist, aes(x = iter, color = method, group = interaction(method, repetition))) +
|
||||||
|
# geom_line(aes(y = dist.beta)) +
|
||||||
|
# geom_point(data = with(sub <- subset(hist, !is.na(dist.beta)),
|
||||||
|
# aggregate(sub, list(method, repetition), tail, 1)
|
||||||
|
# ), aes(y = dist.beta)) +
|
||||||
|
# labs(
|
||||||
|
# title = bquote(paste("Distance of estimate ", hat(beta), " to true ", beta)),
|
||||||
|
# subtitle = bquote(paste(Delta[i][j] == 0.25, " * ", 0.5^abs(i - j), ", ",
|
||||||
|
# "20 repetitions, ", n == .(n), ", ",
|
||||||
|
# p == .(p), ", ", q == .(q), ", ", k == .(k), ", ", r == .(r))),
|
||||||
|
# x = "nr. of iterations",
|
||||||
|
# y = expression(abs(beta * beta^T - hat(beta) * hat(beta)^T)),
|
||||||
|
# color = "method"
|
||||||
|
# ) +
|
||||||
|
# theme(legend.position = "bottom")
|
||||||
|
|
||||||
|
# dev.print(png, file = sprintf("sim01_dist_beta_%s.png", datetime),
|
||||||
|
# width = 768, height = 768, res = 125)
|
||||||
|
|
||||||
|
|
||||||
|
# ggplot(hist, aes(x = iter, color = method, group = interaction(method, repetition))) +
|
||||||
|
# geom_line(aes(y = norm.alpha)) +
|
||||||
|
# geom_point(data = with(sub <- subset(hist, !is.na(norm.alpha)),
|
||||||
|
# aggregate(sub, list(method, repetition), tail, 1)
|
||||||
|
# ), aes(y = norm.alpha)) +
|
||||||
|
# labs(
|
||||||
|
# title = expression(paste("Norm of ", hat(alpha))),
|
||||||
|
# subtitle = bquote(paste(Delta[i][j] == 0.25, " * ", 0.5^abs(i - j), ", ",
|
||||||
|
# "20 repetitions, ", n == .(n), ", ",
|
||||||
|
# p == .(p), ", ", q == .(q), ", ", k == .(k), ", ", r == .(r))),
|
||||||
|
# x = "nr. of iterations",
|
||||||
|
# y = expression(abs(hat(alpha))[F]),
|
||||||
|
# color = "method"
|
||||||
|
# ) +
|
||||||
|
# theme(legend.position = "bottom")
|
||||||
|
|
||||||
|
# dev.print(png, file = sprintf("sim01_norm_alpha_%s.png", datetime),
|
||||||
|
# width = 768, height = 768, res = 125)
|
||||||
|
|
||||||
|
# ggplot(hist, aes(x = iter, color = method, group = interaction(method, repetition))) +
|
||||||
|
# geom_line(aes(y = norm.beta)) +
|
||||||
|
# geom_point(data = with(sub <- subset(hist, !is.na(norm.beta)),
|
||||||
|
# aggregate(sub, list(method, repetition), tail, 1)
|
||||||
|
# ), aes(y = norm.beta)) +
|
||||||
|
# labs(
|
||||||
|
# title = expression(paste("Norm of ", hat(beta))),
|
||||||
|
# subtitle = bquote(paste(Delta[i][j] == 0.25, " * ", 0.5^abs(i - j), ", ",
|
||||||
|
# "20 repetitions, ", n == .(n), ", ",
|
||||||
|
# p == .(p), ", ", q == .(q), ", ", k == .(k), ", ", r == .(r))),
|
||||||
|
# x = "nr. of iterations",
|
||||||
|
# y = expression(abs(hat(beta))[F]),
|
||||||
|
# color = "method"
|
||||||
|
# ) +
|
||||||
|
# theme(legend.position = "bottom")
|
||||||
|
|
||||||
|
# dev.print(png, file = sprintf("sim01_norm_beta_%s.png", datetime),
|
||||||
|
# width = 768, height = 768, res = 125)
|
||||||
|
|
||||||
dev.print(png, file = "sim01_dist_alpha.png", width = 768, height = 768, res = 125)
|
|
||||||
|
|
||||||
|
|
||||||
ggplot(hist, aes(x = iter, color = type, group = interaction(type, repetition))) +
|
|
||||||
geom_line(aes(y = dist.beta)) +
|
|
||||||
geom_point(data = with(sub <- subset(hist, !is.na(dist.beta)),
|
|
||||||
aggregate(sub, list(type, repetition), tail, 1)
|
|
||||||
), aes(y = dist.beta)) +
|
|
||||||
labs(
|
|
||||||
title = bquote(paste("Distance of estimate ", hat(beta), " to true ", beta)),
|
|
||||||
subtitle = bquote(paste(Delta[i][j] == 0.25, " * ", 0.5^abs(i - j), ", ",
|
|
||||||
"20 repetitions, ", n == .(n), ", ",
|
|
||||||
p == .(p), ", ", q == .(q), ", ", k == .(k), ", ", r == .(r))),
|
|
||||||
x = "nr. of iterations",
|
|
||||||
y = expression(abs(beta * beta^T - hat(beta) * hat(beta)^T)),
|
|
||||||
color = "method"
|
|
||||||
) +
|
|
||||||
theme(legend.position = "bottom")
|
|
||||||
|
|
||||||
dev.print(png, file = "sim01_dist_beta.png", width = 768, height = 768, res = 125)
|
|
||||||
|
|
||||||
|
|
||||||
ggplot(hist, aes(x = iter, color = type, group = interaction(type, repetition))) +
|
|
||||||
geom_line(aes(y = norm.alpha)) +
|
|
||||||
geom_point(data = with(sub <- subset(hist, !is.na(norm.alpha)),
|
|
||||||
aggregate(sub, list(type, repetition), tail, 1)
|
|
||||||
), aes(y = norm.alpha)) +
|
|
||||||
labs(
|
|
||||||
title = expression(paste("Norm of ", hat(alpha))),
|
|
||||||
subtitle = bquote(paste(Delta[i][j] == 0.25, " * ", 0.5^abs(i - j), ", ",
|
|
||||||
"20 repetitions, ", n == .(n), ", ",
|
|
||||||
p == .(p), ", ", q == .(q), ", ", k == .(k), ", ", r == .(r))),
|
|
||||||
x = "nr. of iterations",
|
|
||||||
y = expression(abs(hat(alpha))[F]),
|
|
||||||
color = "method"
|
|
||||||
) +
|
|
||||||
theme(legend.position = "bottom")
|
|
||||||
|
|
||||||
dev.print(png, file = "sim01_norm_alpha.png", width = 768, height = 768, res = 125)
|
|
||||||
|
|
||||||
ggplot(hist, aes(x = iter, color = type, group = interaction(type, repetition))) +
|
|
||||||
geom_line(aes(y = norm.beta)) +
|
|
||||||
geom_point(data = with(sub <- subset(hist, !is.na(norm.beta)),
|
|
||||||
aggregate(sub, list(type, repetition), tail, 1)
|
|
||||||
), aes(y = norm.beta)) +
|
|
||||||
labs(
|
|
||||||
title = expression(paste("Norm of ", hat(beta))),
|
|
||||||
subtitle = bquote(paste(Delta[i][j] == 0.25, " * ", 0.5^abs(i - j), ", ",
|
|
||||||
"20 repetitions, ", n == .(n), ", ",
|
|
||||||
p == .(p), ", ", q == .(q), ", ", k == .(k), ", ", r == .(r))),
|
|
||||||
x = "nr. of iterations",
|
|
||||||
y = expression(abs(hat(beta))[F]),
|
|
||||||
color = "method"
|
|
||||||
) +
|
|
||||||
theme(legend.position = "bottom")
|
|
||||||
|
|
||||||
dev.print(png, file = "sim01_norm_beta.png", width = 768, height = 768, res = 125)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -983,3 +1389,82 @@ dev.print(png, file = "sim01_norm_beta.png", width = 768, height = 768, res = 12
|
||||||
|
|
||||||
# log(det(kronecker(D.1, D.2)))
|
# log(det(kronecker(D.1, D.2)))
|
||||||
# p * log(det(D.1)) + q * log(det(D.2))
|
# p * log(det(D.1)) + q * log(det(D.2))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
d <- ggplot(mtcars, aes(cyl, mpg)) + geom_point()
|
||||||
|
d + stat_summary(fun.data = "mean_cl_boot", colour = "red", size = 2)
|
||||||
|
|
||||||
|
# Orientation follows the discrete axis
|
||||||
|
ggplot(mtcars, aes(mpg, factor(cyl))) +
|
||||||
|
geom_point() +
|
||||||
|
stat_summary(fun.data = "mean_cl_boot", colour = "red", size = 2)
|
||||||
|
|
||||||
|
# You can supply individual functions to summarise the value at
|
||||||
|
# each x:
|
||||||
|
d + stat_summary(fun = "median", colour = "red", size = 2, geom = "point")
|
||||||
|
d + stat_summary(fun = "mean", colour = "red", size = 2, geom = "point")
|
||||||
|
d + aes(colour = factor(vs)) + stat_summary(fun = mean, geom="line")
|
||||||
|
|
||||||
|
d + stat_summary(fun = mean, fun.min = min, fun.max = max, colour = "red")
|
||||||
|
|
||||||
|
d <- ggplot(diamonds, aes(cut))
|
||||||
|
d + geom_bar()
|
||||||
|
d + stat_summary(aes(y = price), fun = "mean", geom = "bar")
|
||||||
|
|
||||||
|
# Orientation of stat_summary_bin is ambiguous and must be specified directly
|
||||||
|
ggplot(diamonds, aes(carat, price)) +
|
||||||
|
stat_summary_bin(fun = "mean", geom = "bar", orientation = 'y')
|
||||||
|
|
||||||
|
|
||||||
|
# Don't use ylim to zoom into a summary plot - this throws the
|
||||||
|
# data away
|
||||||
|
p <- ggplot(mtcars, aes(cyl, mpg)) +
|
||||||
|
stat_summary(fun = "mean", geom = "point")
|
||||||
|
p
|
||||||
|
p + ylim(15, 30)
|
||||||
|
# Instead use coord_cartesian
|
||||||
|
p + coord_cartesian(ylim = c(15, 30))
|
||||||
|
|
||||||
|
# A set of useful summary functions is provided from the Hmisc package:
|
||||||
|
stat_sum_df <- function(fun, geom="crossbar", ...) {
|
||||||
|
stat_summary(fun.data = fun, colour = "red", geom = geom, width = 0.2, ...)
|
||||||
|
}
|
||||||
|
d <- ggplot(mtcars, aes(cyl, mpg)) + geom_point()
|
||||||
|
# The crossbar geom needs grouping to be specified when used with
|
||||||
|
# a continuous x axis.
|
||||||
|
d + stat_sum_df("mean_cl_boot", mapping = aes(group = cyl))
|
||||||
|
d + stat_sum_df("mean_sdl", mapping = aes(group = cyl))
|
||||||
|
d + stat_sum_df("mean_sdl", fun.args = list(mult = 1), mapping = aes(group = cyl))
|
||||||
|
d + stat_sum_df("median_hilow", mapping = aes(group = cyl))
|
||||||
|
|
||||||
|
# An example with highly skewed distributions:
|
||||||
|
if (require("ggplot2movies")) {
|
||||||
|
set.seed(596)
|
||||||
|
mov <- movies[sample(nrow(movies), 1000), ]
|
||||||
|
m2 <-
|
||||||
|
ggplot(mov, aes(x = factor(round(rating)), y = votes)) +
|
||||||
|
geom_point()
|
||||||
|
m2 <-
|
||||||
|
m2 +
|
||||||
|
stat_summary(
|
||||||
|
fun.data = "mean_cl_boot",
|
||||||
|
geom = "crossbar",
|
||||||
|
colour = "red", width = 0.3
|
||||||
|
) +
|
||||||
|
xlab("rating")
|
||||||
|
m2
|
||||||
|
# Notice how the overplotting skews off visual perception of the mean
|
||||||
|
# supplementing the raw data with summary statistics is _very_ important
|
||||||
|
|
||||||
|
# Next, we'll look at votes on a log scale.
|
||||||
|
|
||||||
|
# Transforming the scale means the data are transformed
|
||||||
|
# first, after which statistics are computed:
|
||||||
|
m2 + scale_y_log10()
|
||||||
|
# Transforming the coordinate system occurs after the
|
||||||
|
# statistic has been computed. This means we're calculating the summary on the raw data
|
||||||
|
# and stretching the geoms onto the log scale. Compare the widths of the
|
||||||
|
# standard errors.
|
||||||
|
m2 + coord_trans(y="log10")
|
||||||
|
}
|
|
@ -13,6 +13,7 @@ export(approx.kronecker)
|
||||||
export(colKronecker)
|
export(colKronecker)
|
||||||
export(dist.projection)
|
export(dist.projection)
|
||||||
export(dist.subspace)
|
export(dist.subspace)
|
||||||
|
export(kpir.approx)
|
||||||
export(kpir.base)
|
export(kpir.base)
|
||||||
export(kpir.kron)
|
export(kpir.kron)
|
||||||
export(kpir.momentum)
|
export(kpir.momentum)
|
||||||
|
@ -20,6 +21,7 @@ export(kpir.new)
|
||||||
export(mat)
|
export(mat)
|
||||||
export(matpow)
|
export(matpow)
|
||||||
export(matrixImage)
|
export(matrixImage)
|
||||||
|
export(mcrossprod)
|
||||||
export(reduce)
|
export(reduce)
|
||||||
export(rowKronecker)
|
export(rowKronecker)
|
||||||
export(tensor_predictor)
|
export(tensor_predictor)
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
#'
|
#'
|
||||||
#' Delta_1 = n^-1 sum_i R_i' R_i,
|
#' Delta_1 = n^-1 sum_i R_i' R_i,
|
||||||
#' Delta_2 = n^-1 sum_i R_i R_i'.
|
#' Delta_2 = n^-1 sum_i R_i R_i'.
|
||||||
#'
|
#'
|
||||||
#' @export
|
#' @export
|
||||||
kpir.approx <- function(X, Fy, shape = c(dim(X)[-1], dim(Fy[-1])),
|
kpir.approx <- function(X, Fy, shape = c(dim(X)[-1], dim(Fy[-1])),
|
||||||
max.iter = 500L, max.line.iter = 50L, step.size = 1e-3,
|
max.iter = 500L, max.line.iter = 50L, step.size = 1e-3,
|
||||||
|
@ -78,27 +78,21 @@ kpir.approx <- function(X, Fy, shape = c(dim(X)[-1], dim(Fy[-1])),
|
||||||
R <- X - (Fy %x_3% alpha0 %x_2% beta0)
|
R <- X - (Fy %x_3% alpha0 %x_2% beta0)
|
||||||
|
|
||||||
# Covariance estimates and scaling factor
|
# Covariance estimates and scaling factor
|
||||||
Delta.1 <- tcrossprod(mat(R, 3))
|
Delta.1 <- tcrossprod(mat(R, 3)) / n
|
||||||
Delta.2 <- tcrossprod(mat(R, 2))
|
Delta.2 <- tcrossprod(mat(R, 2)) / n
|
||||||
s <- sum(diag(Delta.1))
|
s <- mean(diag(Delta.1))
|
||||||
|
|
||||||
# Inverse Covariances
|
# Inverse Covariances
|
||||||
Delta.1.inv <- solve(Delta.1)
|
Delta.1.inv <- solve(Delta.1)
|
||||||
Delta.2.inv <- solve(Delta.2)
|
Delta.2.inv <- solve(Delta.2)
|
||||||
|
|
||||||
# cross dependent covariance estimates
|
# cross dependent covariance estimates
|
||||||
S.1 <- n^-1 * tcrossprod(mat(R, 3), mat(R %x_2% Delta.2.inv, 3))
|
S.1 <- tcrossprod(mat(R, 3), mat(R %x_2% Delta.2.inv, 3)) / n
|
||||||
S.2 <- n^-1 * tcrossprod(mat(R, 2), mat(R %x_3% Delta.1.inv, 2))
|
S.2 <- tcrossprod(mat(R, 2), mat(R %x_3% Delta.1.inv, 2)) / n
|
||||||
|
|
||||||
# Evaluate negative log-likelihood (2 pi term dropped)
|
# Evaluate negative log-likelihood (2 pi term dropped)
|
||||||
loss <- -0.5 * n * (p * q * log(s) - p * log(det(Delta.1)) -
|
loss <- -0.5 * (n * (p * q * log(s) - p * log(det(Delta.1)) -
|
||||||
q * log(det(Delta.2)) - s * sum(S.1 * Delta.1.inv))
|
q * log(det(Delta.2))) - s * sum(S.1 * Delta.1.inv))
|
||||||
|
|
||||||
# Gradient "generating" tensor
|
|
||||||
G <- (sum(S.1 * Delta.1.inv) - p * q / s) * R
|
|
||||||
G <- G + R %x_2% ((diag(q, p, p) - s * (Delta.2.inv %*% S.2)) %*% Delta.2.inv)
|
|
||||||
G <- G + R %x_3% ((diag(p, q, q) - s * (Delta.1.inv %*% S.1)) %*% Delta.1.inv)
|
|
||||||
G <- G + s * (R %x_2% Delta.2.inv %x_3% Delta.1.inv)
|
|
||||||
|
|
||||||
# Call history callback (logger) before the first iteration
|
# Call history callback (logger) before the first iteration
|
||||||
if (is.function(logger)) {
|
if (is.function(logger)) {
|
||||||
|
@ -125,12 +119,116 @@ kpir.approx <- function(X, Fy, shape = c(dim(X)[-1], dim(Fy[-1])),
|
||||||
alpha.moment <- alpha1 + ((a0 - 1) / a1) * (alpha1 - alpha0)
|
alpha.moment <- alpha1 + ((a0 - 1) / a1) * (alpha1 - alpha0)
|
||||||
beta.moment <- beta1 + ((a0 - 1) / a1) * ( beta1 - beta0)
|
beta.moment <- beta1 + ((a0 - 1) / a1) * ( beta1 - beta0)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Extrapolated residuals
|
||||||
|
R <- X - (Fy %x_3% alpha.moment %x_2% beta.moment)
|
||||||
|
|
||||||
|
# Recompute Covariance Estimates and scaling factor
|
||||||
|
Delta.1 <- tcrossprod(mat(R, 3)) / n
|
||||||
|
Delta.2 <- tcrossprod(mat(R, 2)) / n
|
||||||
|
s <- mean(diag(Delta.1))
|
||||||
|
|
||||||
|
# Inverse Covariances
|
||||||
|
Delta.1.inv <- solve(Delta.1)
|
||||||
|
Delta.2.inv <- solve(Delta.2)
|
||||||
|
|
||||||
|
# cross dependent covariance estimates
|
||||||
|
S.1 <- tcrossprod(mat(R, 3), mat(R %x_2% Delta.2.inv, 3)) / n
|
||||||
|
S.2 <- tcrossprod(mat(R, 2), mat(R %x_3% Delta.1.inv, 2)) / n
|
||||||
|
|
||||||
|
# Gradient "generating" tensor
|
||||||
|
G <- (sum(S.1 * Delta.1.inv) - p * q / s) * R
|
||||||
|
G <- G + R %x_2% ((diag(q, p) - s * (Delta.2.inv %*% S.2)) %*% Delta.2.inv)
|
||||||
|
G <- G + R %x_3% ((diag(p, q) - s * (Delta.1.inv %*% S.1)) %*% Delta.1.inv)
|
||||||
|
G <- G + s * (R %x_2% Delta.2.inv %x_3% Delta.1.inv)
|
||||||
|
|
||||||
|
# Calculate Gradients
|
||||||
|
grad.alpha <- tcrossprod(mat(G, 3), mat(Fy %x_2% beta.moment, 3))
|
||||||
|
grad.beta <- tcrossprod(mat(G, 2), mat(Fy %x_3% alpha.moment, 2))
|
||||||
|
|
||||||
|
# Backtracking line search (Armijo type)
|
||||||
|
# The `inner.prod` is used in the Armijo break condition but does not
|
||||||
|
# depend on the step size.
|
||||||
|
inner.prod <- sum(grad.alpha^2) + sum(grad.beta^2)
|
||||||
|
|
||||||
|
# backtracking loop
|
||||||
|
for (delta in step.size * 0.618034^seq.int(0L, len = max.line.iter)) {
|
||||||
|
# Update `alpha` and `beta` (note: add(+), the gradients are already
|
||||||
|
# pointing into the negative slope direction of the loss cause they are
|
||||||
|
# the gradients of the log-likelihood [NOT the negative log-likelihood])
|
||||||
|
alpha.temp <- alpha.moment + delta * grad.alpha
|
||||||
|
beta.temp <- beta.moment + delta * grad.beta
|
||||||
|
|
||||||
|
# Update Residuals, Covariances, ...
|
||||||
|
R <- X - (Fy %x_3% alpha.temp %x_2% beta.temp)
|
||||||
|
Delta.1 <- tcrossprod(mat(R, 3)) / n
|
||||||
|
Delta.2 <- tcrossprod(mat(R, 2)) / n
|
||||||
|
s <- mean(diag(Delta.1))
|
||||||
|
Delta.1.inv <- solve(Delta.1)
|
||||||
|
Delta.2.inv <- solve(Delta.2)
|
||||||
|
S.1 <- tcrossprod(mat(R, 3), mat(R %x_2% Delta.2.inv, 3)) / n
|
||||||
|
# S.2 not needed
|
||||||
|
|
||||||
|
# Re-evaluate negative log-likelihood
|
||||||
|
loss.temp <- -0.5 * (n * (p * q * log(s) - p * log(det(Delta.1)) -
|
||||||
|
q * log(det(Delta.2))) - s * sum(S.1 * Delta.1.inv))
|
||||||
|
|
||||||
|
# Armijo line search break condition
|
||||||
|
if (loss.temp <= loss - 0.1 * delta * inner.prod) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Call logger (invoke history callback)
|
||||||
|
if (is.function(logger)) {
|
||||||
|
logger(iter, loss.temp, alpha.temp, beta.temp, Delta.1, Delta.2, delta)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Enforce descent
|
||||||
|
if (loss.temp < loss) {
|
||||||
|
alpha0 <- alpha1
|
||||||
|
alpha1 <- alpha.temp
|
||||||
|
beta0 <- beta1
|
||||||
|
beta1 <- beta.temp
|
||||||
|
|
||||||
|
# check break conditions
|
||||||
|
if (mean(abs(alpha1)) + mean(abs(beta1)) < eps) {
|
||||||
|
break.reason <- "alpha, beta numerically zero"
|
||||||
|
break # estimates are basically zero -> stop
|
||||||
|
}
|
||||||
|
if (inner.prod < eps * (p * q + r * k)) {
|
||||||
|
break.reason <- "mean squared gradient is smaller than epsilon"
|
||||||
|
break # mean squared gradient is smaller than epsilon -> stop
|
||||||
|
}
|
||||||
|
if (abs(loss.temp - loss) < eps) {
|
||||||
|
break.reason <- "decrease is too small (slow)"
|
||||||
|
break # decrease is too small (slow) -> stop
|
||||||
|
}
|
||||||
|
|
||||||
|
loss <- loss.temp
|
||||||
|
no.nesterov <- FALSE # always reset
|
||||||
|
} else if (!no.nesterov) {
|
||||||
|
no.nesterov <- TRUE # retry without momentum
|
||||||
|
next
|
||||||
|
} else {
|
||||||
|
break.reason <- "failed even without momentum"
|
||||||
|
break # failed even without momentum -> stop
|
||||||
|
}
|
||||||
|
|
||||||
|
# update momentum scaling
|
||||||
|
a0 <- a1
|
||||||
|
a1 <- nesterov.scaling(a1, iter)
|
||||||
|
|
||||||
|
# Set next iter starting step.size to line searched step size
|
||||||
|
# (while allowing it to encrease)
|
||||||
|
step.size <- 1.618034 * delta
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# Extrapolated residuals
|
list(
|
||||||
R <- X - (Fy %x_3% alpha.moment %x_2% beta.moment)
|
loss = loss,
|
||||||
|
alpha = alpha1, beta = beta1,
|
||||||
|
Delta.1 = Delta.1, Delta.2 = Delta.2, tr.Delta = s,
|
||||||
|
break.reason = break.reason
|
||||||
list(loss = loss, alpha = alpha1, beta = beta1, Delta = Delta)
|
)
|
||||||
}
|
}
|
||||||
|
|
|
@ -126,8 +126,8 @@ kpir.kron <- function(X, Fy, shape = c(dim(X)[-1], dim(Fy[-1])),
|
||||||
resid.trans <- resid %x_3% solve(Delta.1) %x_2% solve(Delta.2)
|
resid.trans <- resid %x_3% solve(Delta.1) %x_2% solve(Delta.2)
|
||||||
|
|
||||||
# Calculate Gradients
|
# Calculate Gradients
|
||||||
grad.alpha <- tcrossprod(mat(resid.trans, 3), mat(Fy %x_2% beta, 3))
|
grad.alpha <- tcrossprod(mat(resid.trans, 3), mat(Fy %x_2% S.beta, 3))
|
||||||
grad.beta <- tcrossprod(mat(resid.trans, 2), mat(Fy %x_3% alpha, 2))
|
grad.beta <- tcrossprod(mat(resid.trans, 2), mat(Fy %x_3% S.alpha, 2))
|
||||||
|
|
||||||
# Backtracking line search (Armijo type)
|
# Backtracking line search (Armijo type)
|
||||||
# The `inner.prod` is used in the Armijo break condition but does not
|
# The `inner.prod` is used in the Armijo break condition but does not
|
||||||
|
@ -161,7 +161,7 @@ kpir.kron <- function(X, Fy, shape = c(dim(X)[-1], dim(Fy[-1])),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
# Call logger (invoce history callback)
|
# Call logger (invoke history callback)
|
||||||
if (is.function(logger)) {
|
if (is.function(logger)) {
|
||||||
logger(iter, loss.temp, alpha.temp, beta.temp, Delta.1, Delta.2, delta)
|
logger(iter, loss.temp, alpha.temp, beta.temp, Delta.1, Delta.2, delta)
|
||||||
}
|
}
|
||||||
|
@ -207,5 +207,10 @@ kpir.kron <- function(X, Fy, shape = c(dim(X)[-1], dim(Fy[-1])),
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
list(loss = loss, alpha = alpha1, beta = beta1, Delta = Delta, break.reason = break.reason)
|
list(
|
||||||
|
loss = loss,
|
||||||
|
alpha = alpha1, beta = beta1,
|
||||||
|
Delta.1 = Delta.1, Delta.2 = Delta.2,
|
||||||
|
break.reason = break.reason
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,21 @@
|
||||||
|
#' Tensor Mode Crossproduct
|
||||||
|
#'
|
||||||
|
#' C = A_(m) t(A_(m))
|
||||||
|
#'
|
||||||
|
#' For a matrix `A`, the first mode is `mcrossprod(A, 1)` equivalent to
|
||||||
|
#' `A %*% t(A)` (`tcrossprod`). On the other hand for mode two `mcrossprod(A, 2)`
|
||||||
|
#' the equivalence is `t(A) %*% A` (`crossprod`).
|
||||||
|
#'
|
||||||
|
#' @param A multi-dimensional array
|
||||||
|
#' @param mode index (1-indexed)
|
||||||
|
#'
|
||||||
|
#' @returns matrix of dimensions \code{dim(A)[mode] by dim(A)[mode]}.
|
||||||
|
#'
|
||||||
|
#' @note equivalent to \code{tcrossprod(mat(A, mode))} with around the same
|
||||||
|
#' performance but only allocates the result matrix.
|
||||||
|
#'
|
||||||
|
#' @export
|
||||||
|
mcrossprod <- function(A, mode = length(dim(A))) {
|
||||||
|
storage.mode(A) <- "double"
|
||||||
|
.Call("C_mcrossprod", A, as.integer(mode))
|
||||||
|
}
|
|
@ -0,0 +1,49 @@
|
||||||
|
#' Random sampling from a tensor (multi-array) normal distribution
|
||||||
|
#'
|
||||||
|
#' @examples
|
||||||
|
#' n <- 1000
|
||||||
|
#' Sigma.1 <- 0.5^abs(outer(1:5, 1:5, "-"))
|
||||||
|
#' Sigma.2 <- diag(1:4)
|
||||||
|
#' X <- rtensornorm(n, 0, Sigma.1, Sigma.2)
|
||||||
|
#'
|
||||||
|
#' @export
|
||||||
|
rtensornorm <- function(n, mean, ..., sample.mode) {
|
||||||
|
# get covariance matrices
|
||||||
|
cov <- list(...)
|
||||||
|
|
||||||
|
# get sample shape (dimensions of an observation)
|
||||||
|
shape <- unlist(Map(nrow, cov))
|
||||||
|
|
||||||
|
# result tensor dimensions
|
||||||
|
dims <- c(shape, n)
|
||||||
|
|
||||||
|
# validate mean dimensions
|
||||||
|
if (!missing(mean)) {
|
||||||
|
stopifnot(is.numeric(mean))
|
||||||
|
stopifnot(is.array(mean) || length(mean) == 1)
|
||||||
|
stopifnot(length(shape) == length(cov))
|
||||||
|
stopifnot(all(shape == dim(mean)))
|
||||||
|
}
|
||||||
|
|
||||||
|
# sample i.i.d. normal entries
|
||||||
|
X <- array(rnorm(prod(dims)), dim = dims)
|
||||||
|
|
||||||
|
# transform from standard normal to tensor normal with given covariances
|
||||||
|
for (i in seq_along(cov)) {
|
||||||
|
X <- ttm(X, matpow(cov[[i]], 1 / 2), i)
|
||||||
|
}
|
||||||
|
|
||||||
|
# add mean (using recycling, observations on last mode)
|
||||||
|
X <- X + mean
|
||||||
|
|
||||||
|
# permute axis for indeing observations on sample mode (permute first axis
|
||||||
|
# with sample mode axis)
|
||||||
|
if (!missing(sample.mode)) {
|
||||||
|
axis <- seq_len(length(dims) - 1)
|
||||||
|
start <- seq_len(sample.mode - 1)
|
||||||
|
end <- seq_len(length(dims) - sample.mode) + sample.mode - 1
|
||||||
|
X <- aperm(X, c(axis[start], length(dims), axis[end]))
|
||||||
|
}
|
||||||
|
|
||||||
|
X
|
||||||
|
}
|
|
@ -9,10 +9,14 @@
|
||||||
/* Tensor Times Matrix a.k.a. Mode Product */
|
/* Tensor Times Matrix a.k.a. Mode Product */
|
||||||
extern SEXP ttm(SEXP A, SEXP X, SEXP mode);
|
extern SEXP ttm(SEXP A, SEXP X, SEXP mode);
|
||||||
|
|
||||||
|
/* Tensor Mode Covariance e.g. `(1 / n) * A_(m) A_(m)^T` */
|
||||||
|
extern SEXP mcrossprod(SEXP A, SEXP mode);
|
||||||
|
|
||||||
/* List of registered routines (e.g. C entry points) */
|
/* List of registered routines (e.g. C entry points) */
|
||||||
static const R_CallMethodDef CallEntries[] = {
|
static const R_CallMethodDef CallEntries[] = {
|
||||||
// {"FastPOI_C_sub", (DL_FUNC) &FastPOI_C_sub, 5}, // NOT USED
|
// {"FastPOI_C_sub", (DL_FUNC) &FastPOI_C_sub, 5}, // NOT USED
|
||||||
{"C_ttm", (DL_FUNC) &ttm, 3},
|
{"C_ttm", (DL_FUNC) &ttm, 3},
|
||||||
|
{"C_mcrossprod", (DL_FUNC) &mcrossprod, 2},
|
||||||
{NULL, NULL, 0}
|
{NULL, NULL, 0}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,88 @@
|
||||||
|
// The need for `USE_FC_LEN_T` and `FCONE` is due to a Fortran character string
|
||||||
|
// to C incompatibility. See: Writing R Extentions: 6.6.1 Fortran character strings
|
||||||
|
#define USE_FC_LEN_T
|
||||||
|
#include <R.h>
|
||||||
|
#include <Rinternals.h>
|
||||||
|
#include <R_ext/BLAS.h>
|
||||||
|
#ifndef FCONE
|
||||||
|
#define FCONE
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tensor Mode Crossproduct
|
||||||
|
*
|
||||||
|
* C = A_(m) t(A_(m))
|
||||||
|
*
|
||||||
|
* For a matrix `A`, the first mode is `mcrossprod(A, 1)` equivalent to
|
||||||
|
* `A %*% t(A)` (`tcrossprod`). On the other hand for mode two `mcrossprod(A, 2)`
|
||||||
|
* the equivalence is `t(A) %*% A` (`crossprod`).
|
||||||
|
*
|
||||||
|
* @param A multi-dimensional array
|
||||||
|
* @param m mode index (1-indexed)
|
||||||
|
*/
|
||||||
|
extern SEXP mcrossprod(SEXP A, SEXP m) {
|
||||||
|
// get zero indexed mode
|
||||||
|
int mode = asInteger(m) - 1;
|
||||||
|
|
||||||
|
// get dimension attribute of A
|
||||||
|
SEXP dim = getAttrib(A, R_DimSymbol);
|
||||||
|
|
||||||
|
// validate mode (0-indexed, must be smaller than the tensor order)
|
||||||
|
if (mode < 0 || length(dim) <= mode) {
|
||||||
|
error("Illegal mode");
|
||||||
|
}
|
||||||
|
|
||||||
|
// the strides
|
||||||
|
// `stride[0] <- prod(dim(X)[seq_len(mode - 1)])`
|
||||||
|
// `stride[1] <- dim(X)[mode]`
|
||||||
|
// `stride[2] <- prod(dim(X)[-seq_len(mode)])`
|
||||||
|
int stride[3] = {1, INTEGER(dim)[mode], 1};
|
||||||
|
for (int i = 0; i < length(dim); ++i) {
|
||||||
|
int size = INTEGER(dim)[i];
|
||||||
|
stride[0] *= (i < mode) ? size : 1;
|
||||||
|
stride[2] *= (i > mode) ? size : 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// create response matrix C
|
||||||
|
SEXP C = PROTECT(allocMatrix(REALSXP, stride[1], stride[1]));
|
||||||
|
|
||||||
|
// raw data access pointers
|
||||||
|
double* a = REAL(A);
|
||||||
|
double* c = REAL(C);
|
||||||
|
|
||||||
|
// employ BLAS dsyrk (Double SYmmeric Rank K) operation
|
||||||
|
// (C = alpha A A^T + beta C or C = alpha A^T A + beta C)
|
||||||
|
const double zero = 0.0;
|
||||||
|
const double one = 1.0;
|
||||||
|
if (mode == 0) {
|
||||||
|
// mode 1: special case C = A_(1) A_(1)^T
|
||||||
|
// C = 1 A A^T + 0 C
|
||||||
|
F77_CALL(dsyrk)("U", "N", &stride[1], &stride[2],
|
||||||
|
&one, a, &stride[1], &zero, c, &stride[1] FCONE FCONE);
|
||||||
|
} else {
|
||||||
|
// Other modes writen as accumulated sum of matrix products
|
||||||
|
// initialize C to zero
|
||||||
|
memset(c, 0, stride[1] * stride[1] * sizeof(double));
|
||||||
|
|
||||||
|
// Sum over all modes > mode
|
||||||
|
for (int i2 = 0; i2 < stride[2]; ++i2) {
|
||||||
|
// C = 1 A^T A + 1 C
|
||||||
|
F77_CALL(dsyrk)("U", "T", &stride[1], &stride[0],
|
||||||
|
&one, &a[i2 * stride[0] * stride[1]], &stride[0],
|
||||||
|
&one, c, &stride[1] FCONE FCONE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Symmetric matrix result is stored in upper triangular part only
|
||||||
|
// Copy upper triangular part to lower
|
||||||
|
for (int j = 0; j + 1 < stride[1]; j++) {
|
||||||
|
for (int i = j + 1; i < stride[1]; ++i) {
|
||||||
|
c[i + j * stride[1]] = c[j + i * stride[1]];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// release C to grabage collector
|
||||||
|
UNPROTECT(1);
|
||||||
|
|
||||||
|
return C;
|
||||||
|
}
|
|
@ -80,15 +80,3 @@ extern SEXP FastPOI_C_sub(SEXP in_B, SEXP in_Delta,
|
||||||
UNPROTECT(1);
|
UNPROTECT(1);
|
||||||
return out_Z;
|
return out_Z;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* List of registered routines (e.g. C entry points) */
|
|
||||||
static const R_CallMethodDef CallEntries[] = {
|
|
||||||
{"FastPOI_C_sub", (DL_FUNC) &FastPOI_C_sub, 5},
|
|
||||||
{NULL, NULL, 0}
|
|
||||||
};
|
|
||||||
|
|
||||||
/* Restrict C entry points to registered routines. */
|
|
||||||
void R_init_tensorPredictors(DllInfo *dll) {
|
|
||||||
R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
|
|
||||||
R_useDynamicSymbols(dll, FALSE);
|
|
||||||
}
|
|
||||||
|
|
|
@ -11,7 +11,7 @@
|
||||||
/**
|
/**
|
||||||
* Tensor Times Matrix a.k.a. Mode Product
|
* Tensor Times Matrix a.k.a. Mode Product
|
||||||
*
|
*
|
||||||
* @param A multi-dimensionl array
|
* @param A multi-dimensional array
|
||||||
* @param B matrix
|
* @param B matrix
|
||||||
* @param m mode index (1-indexed)
|
* @param m mode index (1-indexed)
|
||||||
*/
|
*/
|
||||||
|
@ -41,12 +41,12 @@ extern SEXP ttm(SEXP A, SEXP B, SEXP m) {
|
||||||
error("Dimension missmatch (mode dim not equal to ncol)");
|
error("Dimension missmatch (mode dim not equal to ncol)");
|
||||||
}
|
}
|
||||||
|
|
||||||
// calc nr of response elements `prod(dim(X)[-mode]) * ncol(X)`,
|
// calc nr of response elements `prod(dim(A)[-mode]) * ncol(A)` (size of C),
|
||||||
int leny = 1;
|
int sizeC = 1;
|
||||||
// and the strides
|
// and the strides
|
||||||
// `stride[0] <- prod(dim(X)[seq_len(mode - 1)])`
|
// `stride[0] <- prod(dim(A)[seq_len(mode - 1)])`
|
||||||
// `stride[1] <- dim(X)[mode]`
|
// `stride[1] <- dim(A)[mode]`
|
||||||
// `stride[2] <- prod(dim(X)[-seq_len(mode)])`
|
// `stride[2] <- prod(dim(A)[-seq_len(mode)])`
|
||||||
int stride[3] = {1, INTEGER(dim)[mode], 1};
|
int stride[3] = {1, INTEGER(dim)[mode], 1};
|
||||||
for (int i = 0; i < length(dim); ++i) {
|
for (int i = 0; i < length(dim); ++i) {
|
||||||
int size = INTEGER(dim)[i];
|
int size = INTEGER(dim)[i];
|
||||||
|
@ -54,7 +54,7 @@ extern SEXP ttm(SEXP A, SEXP B, SEXP m) {
|
||||||
if (!size) {
|
if (!size) {
|
||||||
error("Zero dimension detected");
|
error("Zero dimension detected");
|
||||||
}
|
}
|
||||||
leny *= (i == mode) ? nrows(B) : size;
|
sizeC *= (i == mode) ? nrows(B) : size;
|
||||||
stride[0] *= (i < mode) ? size : 1;
|
stride[0] *= (i < mode) ? size : 1;
|
||||||
stride[2] *= (i > mode) ? size : 1;
|
stride[2] *= (i > mode) ? size : 1;
|
||||||
}
|
}
|
||||||
|
@ -63,7 +63,7 @@ extern SEXP ttm(SEXP A, SEXP B, SEXP m) {
|
||||||
int nrow = nrows(B);
|
int nrow = nrows(B);
|
||||||
|
|
||||||
// create response object C
|
// create response object C
|
||||||
SEXP C = PROTECT(allocVector(REALSXP, leny));
|
SEXP C = PROTECT(allocVector(REALSXP, sizeC));
|
||||||
|
|
||||||
// raw data access pointers
|
// raw data access pointers
|
||||||
double* a = REAL(A);
|
double* a = REAL(A);
|
||||||
|
@ -88,7 +88,7 @@ extern SEXP ttm(SEXP A, SEXP B, SEXP m) {
|
||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
// Tensor Times Matrix / Mode Product (reference implementation)
|
// Tensor Times Matrix / Mode Product (reference implementation)
|
||||||
memset(c, 0, leny * sizeof(double));
|
memset(c, 0, sizeC * sizeof(double));
|
||||||
for (int i2 = 0; i2 < stride[2]; ++i2) {
|
for (int i2 = 0; i2 < stride[2]; ++i2) {
|
||||||
for (int i1 = 0; i1 < stride[1]; ++i1) { // stride[1] == ncols(B)
|
for (int i1 = 0; i1 < stride[1]; ++i1) { // stride[1] == ncols(B)
|
||||||
for (int j = 0; j < nrow; ++j) {
|
for (int j = 0; j < nrow; ++j) {
|
||||||
|
|
Loading…
Reference in New Issue