This commit is contained in:
Daniel Kapla 2023-11-14 14:35:43 +01:00
parent 40132c2565
commit 90cd46e209
48 changed files with 2852 additions and 1353 deletions

View File

@ -90,10 +90,11 @@
%%% Custom operators with ether one or two arguments (limits)
\makeatletter
%%% Multi-Linear Multiplication
% $\mlm_{k \in [r]}$ or $\mlm_{k = 1}^{r}$ (lower limit MUST be the first!)
% Save first argument as \arg@one
\def\mlm#1{\def\arg@one{#1}\futurelet\next\mlm@i}
\def\mlm_#1{\def\arg@one{#1}\futurelet\next\mlm@i}
% Check for second argument
\def\mlm@i{\ifx\next\bgroup\expandafter\mlm@two\else\expandafter\mlm@one\fi}
\def\mlm@i{\ifx\next^\expandafter\mlm@two\else\expandafter\mlm@one\fi}
% specialization for one or two arguments, both versions use saved first argument
\def\mlm@one{\mathchoice%
{\operatorname*{\scalerel*{\times}{\bigotimes}}_{\makebox[0pt][c]{$\scriptstyle \arg@one$}}}%
@ -101,14 +102,31 @@
{\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}}%
{\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}}%
}
% this commands single argument is the second argument of \mlm
\def\mlm@two#1{\mathchoice%
% this commands single argument is the second argument of \mlm, it gobbles the `^`
\def\mlm@two^#1{\mathchoice%
{\operatorname*{\scalerel*{\times}{\bigotimes}}_{\makebox[0pt][c]{$\scriptstyle \arg@one$}}^{\makebox[0pt][c]{$\scriptstyle #1$}}}%
{\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}^{#1}}%
{\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}^{#1}}%
{\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}^{#1}}%
}
%%% Big Circle (Iterated Outer Product)
\def\outer#1{\def\arg@one{#1}\futurelet\next\outer@i}
\def\outer@i{\ifx\next\bgroup\expandafter\outer@two\else\expandafter\outer@one\fi}
\def\outer@one{\mathchoice%
{\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\makebox[0pt][c]{$\scriptstyle \arg@one$}}}%
{\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}}%
{\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}}%
{\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}}%
}
\def\outer@two#1{\mathchoice%
{\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\makebox[0pt][c]{$\scriptstyle \arg@one$}}^{\makebox[0pt][c]{$\scriptstyle #1$}}}%
{\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}^{#1}}%
{\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}^{#1}}%
{\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}^{#1}}%
}
%%% Big Kronecker Product (with overflowing limits)
% Save first argument as \arg@one
\def\bigkron#1{\def\arg@one{#1}\futurelet\next\bigkron@i}
@ -134,17 +152,15 @@
\newcommand{\algorithmicbreak}{\textbf{break}}
\newcommand{\Break}{\State \algorithmicbreak}
\begin{document}
\maketitle
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{abstract}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
We consider regression and classification for \textit{general} response and tensor-valued predictors (multi dimensional arrays) and propose a \textit{novel formulation} for sufficient dimension reduction. Assuming the distribution of the tensor-valued predictors given the response is in the quadratic exponential family, we model the natural parameter as a multi-linear function of the respons.
This allows per-axis reductions that drastically reduce the total number of parameters for higher order tensor-valued predictors. We derive maximum likelihood estimates for the sufficient dimension reduction and a computationally efficient estimation algorithm which leveraes the tensor structure. The performance of the method is illustrated via simulations and real world examples are provided.
We consider regression and classification for \textit{general} response and tensor-valued predictors (multi dimensional arrays) and propose a \textit{novel formulation} for sufficient dimension reduction. Assuming the distribution of the tensor-valued predictors given the response is in the quadratic exponential family, we model the natural parameter as a multi-linear function of the response.
This allows per-axis reductions that drastically reduce the total number of parameters for higher order tensor-valued predictors. We derive maximum likelihood estimates for the sufficient dimension reduction and a computationally efficient estimation algorithm which leverages the tensor structure. The performance of the method is illustrated via simulations and real world examples are provided.
\end{abstract}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@ -256,23 +272,23 @@ A straight forward idea for parameter estimation is to use Gradient Descent. For
\begin{algorithm}[ht]
\caption{\label{alg:NAGD}Nesterov Accelerated Gradient Descent}
\begin{algorithmic}[1]
\State Objective: $l(\Theta \mid \ten{X}, \ten{F}_y)$
\State Objective: $l(\mat{\theta} \mid \ten{X}, \ten{F}_y)$
\State Arguments: Order $r + 1$ tensors $\ten{X}$, $\ten{F}$
\State Initialize: Parameters $\Theta^{(0)}$, $0 < c, \delta^{(1)}$ and $0 < \gamma < 1$
\State Initialize: Parameters $\mat{\theta}^{(0)}$, $0 < c, \delta^{(1)}$ and $0 < \gamma < 1$
\\
\State $t \leftarrow 1$
\Comment{step counter}
\State $\mat{\Theta}^{(1)} \leftarrow \mat{\Theta}^{(0)}$
\State $\mat{\theta}^{(1)} \leftarrow \mat{\theta}^{(0)}$
\Comment{artificial first step}
\State $(m^{(0)}, m^{(1)}) \leftarrow (0, 1)$
\Comment{momentum extrapolation weights}
\\
\Repeat \Comment{repeat untill convergence}
\State $\mat{M} \leftarrow \mat{\Theta}^{(t)} + \frac{m^{(t - 1)} - 1}{m^{(t)}}(\mat{\Theta}^{(t)} - \mat{\Theta}^{(t - 1)})$ \Comment{momentum extrapolation}
\State $\mat{M} \leftarrow \mat{\theta}^{(t)} + \frac{m^{(t - 1)} - 1}{m^{(t)}}(\mat{\theta}^{(t)} - \mat{\theta}^{(t - 1)})$ \Comment{momentum extrapolation}
\For{$\delta = \gamma^{-1}\delta^{(t)}, \delta^{(t)}, \gamma\delta^{(t)}, \gamma^2\delta^{(t)}, ...$} \Comment{Line Search}
\State $\mat{\Theta}_{\text{temp}} \leftarrow \mat{M} + \delta \nabla_{\mat{\Theta}} l(\mat{M})$
\If{$l(\mat{\Theta}_{\text{temp}}) \leq l(\mat{\Theta}^{(t - 1)}) - c \delta \|\nabla_{\mat{\Theta}} l(\mat{M})\|_F^2$} \Comment{Armijo Condition}
\State $\mat{\Theta}^{(t + 1)} \leftarrow \mat{\Theta}_{\text{temp}}$
\State $\mat{\theta}_{\text{temp}} \leftarrow \mat{M} + \delta \nabla_{\mat{\theta}} l(\mat{M})$
\If{$l(\mat{\theta}_{\text{temp}}) \leq l(\mat{\theta}^{(t - 1)}) - c \delta \|\nabla_{\mat{\theta}} l(\mat{M})\|_F^2$} \Comment{Armijo Condition}
\State $\mat{\theta}^{(t + 1)} \leftarrow \mat{\theta}_{\text{temp}}$
\State $\delta^{(t + 1)} \leftarrow \delta$
\Break
\EndIf
@ -399,7 +415,7 @@ $\ten{X}$ is a $2\times 3\times 5$ tensor, $y\in\{1, 2, ..., 6\}$ uniformly dist
\begin{figure}[!ht]
\centering
\includegraphics[width = \textwidth]{sim-normal-20221012.png}
\includegraphics[width = \textwidth]{images/sim-normal-20221012.png}
\caption{\label{fig:sim-normal}Simulation Normal}
\end{figure}
@ -407,7 +423,7 @@ $\ten{X}$ is a $2\times 3\times 5$ tensor, $y\in\{1, 2, ..., 6\}$ uniformly dist
\begin{figure}[!ht]
\centering
\includegraphics[width = \textwidth]{sim-ising-small-20221012.png}
\includegraphics[width = \textwidth]{images/sim-ising-small-20221012.png}
\caption{\label{fig:sim-ising-small}Simulation Ising Small}
\end{figure}
@ -433,7 +449,7 @@ where each individual block is given by
For example $\mathcal{J}_{1,2} = -\frac{\partial l(\Theta)}{\partial\t{(\vec{\overline{\ten{\eta}}_1})}\partial(\vec{\mat{\alpha}_1})}$ and $\mathcal{J}_{2r + 1, 2r + 1} = -\H l(\mat{\Omega}_r)$.
We start by restating the log-likelihood for a given single observation $(\ten{X}, \ten{Y})$ where $\ten{F}_y$ given by
\begin{displaymath}
l(\mat{\Theta}) = \log h(\ten{X}) + c_1\big\langle\overline{\ten{\eta}}_1 + \ten{F}_{y}\mlm{k\in[r]}\mat{\alpha}_k, \ten{X}\big\rangle + c_2\big\langle\ten{X}\mlm{k\in[r]}\mat{\Omega}_k, \ten{X}\big\rangle - b(\mat{\eta}_{y})
l(\mat{\Theta}) = \log h(\ten{X}) + c_1\big\langle\overline{\ten{\eta}}_1 + \ten{F}_{y}\mlm_{k\in[r]}\mat{\alpha}_k, \ten{X}\big\rangle + c_2\big\langle\ten{X}\mlm_{k\in[r]}\mat{\Omega}_k, \ten{X}\big\rangle - b(\mat{\eta}_{y})
\end{displaymath}
with
\begin{align*}
@ -496,10 +512,10 @@ Now we rewrite all the above differentials to extract the derivatives one at a t
%
\d l(\mat{\Omega}_j) &= c_2\Big(\langle\ten{X}\times_{k\in[r]\backslash j}\mat{\Omega}_k\times_j\d\mat{\Omega}_j, \ten{X}\rangle - \D b(\mat{\eta}_{y,2})\vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big)\Big) \\
&= c_2 \t{(\vec{\ten{X}}\otimes\vec{\ten{X}} - (\ten{D}_2)_{([2r])})}\vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big) \\
&= c_2 (\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})} \\
&= c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\vec{\d\mat{\Omega}_j} \\
&= c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\mat{D}_{p_j}\t{\mat{D}_{p_j}}\vec{\d\mat{\Omega}_j} \\
&\qquad\Rightarrow \D l(\mat{\Omega}_j) = c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\mat{D}_{p_j}\t{\mat{D}_{p_j}}
&= c_2 (\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})} \\
&= c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\vec{\d\mat{\Omega}_j} \\
&= c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\mat{D}_{p_j}\t{\mat{D}_{p_j}}\vec{\d\mat{\Omega}_j} \\
&\qquad\Rightarrow \D l(\mat{\Omega}_j) = c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\mat{D}_{p_j}\t{\mat{D}_{p_j}}
\end{align*}}%
The next step is to identify the Hessians from the second differentials in a similar manner as befor.
{\allowdisplaybreaks\begin{align*}
@ -509,62 +525,62 @@ The next step is to identify the Hessians from the second differentials in a sim
\qquad{\color{gray} (p \times p)}
\\
&\d^2 l(\overline{\ten{\eta}}_1, \mat{\alpha}_j) \\
&= -c_1^2 \t{\vec(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j)}\mat{H}_{1,1}\vec{\d\overline{\ten{\eta}}_1} \\
&= -c_1^2 \t{\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)})}\mat{K}_{p,(j)}\mat{H}_{1,1}\vec{\d\overline{\ten{\eta}}_1} \\
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})}((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})(\ten{H}_{1,1})_{((j, [r]\backslash j))}\vec{\d\overline{\ten{\eta}}_1} \\
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} ( (\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k) \ttt_{[r]\backslash j} \ten{H}_{1,1})_{((2, 1))} \vec{\d\overline{\ten{\eta}}_1} \\
&\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\alpha}_j})\t{\partial(\vec{\overline{\ten{\eta}}_1)}}} = -c_1^2 ( (\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k) \ttt_{[r]\backslash j} \ten{H}_{1,1})_{((2, 1))}
&= -c_1^2 \t{\vec(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j)}\mat{H}_{1,1}\vec{\d\overline{\ten{\eta}}_1} \\
&= -c_1^2 \t{\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)})}\mat{K}_{p,(j)}\mat{H}_{1,1}\vec{\d\overline{\ten{\eta}}_1} \\
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})}((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})(\ten{H}_{1,1})_{((j, [r]\backslash j))}\vec{\d\overline{\ten{\eta}}_1} \\
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} ( (\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k) \ttt_{[r]\backslash j} \ten{H}_{1,1})_{((2, 1))} \vec{\d\overline{\ten{\eta}}_1} \\
&\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\alpha}_j})\t{\partial(\vec{\overline{\ten{\eta}}_1)}}} = -c_1^2 ( (\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k) \ttt_{[r]\backslash j} \ten{H}_{1,1})_{((2, 1))}
\qquad{\color{gray} (p_j q_j \times p)}
\\
&\d^2 l(\overline{\ten{\eta}}_1, \mat{\Omega}_j) \\
&= -c_1 c_2 \t{\vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big)}\mat{H}_{2,1}\vec{\d\overline{\ten{\eta}}_1} \\
&= -c_1 c_2 \t{\Big[ \t{(\ten{H}_{2,1})_{([2r])}} \vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big) \Big]} \vec{\d\overline{\ten{\eta}}_1} \\
&= -c_1 c_2 \t{\vec( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})} )} \vec{\d\overline{\ten{\eta}}_1} \\
&= -c_1 c_2 \t{(\vec{\d\mat{\Omega}_j})} ( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} )_{(j)} \vec{\d\overline{\ten{\eta}}_1} \\
&\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\Omega}_j})\t{\partial(\vec{\overline{\ten{\eta}}_1)}}} = -c_1 c_2 \mat{D}_{p_j}\t{\mat{D}_{p_j}}( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} )_{(j)}
&= -c_1 c_2 \t{\vec( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})} )} \vec{\d\overline{\ten{\eta}}_1} \\
&= -c_1 c_2 \t{(\vec{\d\mat{\Omega}_j})} ( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} )_{(j)} \vec{\d\overline{\ten{\eta}}_1} \\
&\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\Omega}_j})\t{\partial(\vec{\overline{\ten{\eta}}_1)}}} = -c_1 c_2 \mat{D}_{p_j}\t{\mat{D}_{p_j}}( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} )_{(j)}
\qquad{\color{gray} (p_j^2 \times p)}
\\
&\d^2 l(\mat{\alpha}_j) \\
&= -c_1^2 \t{\vec(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j)}\mat{H}_{1,1}\vec(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j) \\
&= -c_1^2 \t{\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)})}\mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(j)}}\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}) \\
&= -c_1^2 \t{[((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j}]}\mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(j)}}((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j} \\
&= -c_1^2 \t{[((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j}]}(\ten{H}_{1,1})_{((j,[r]\backslash j),(j,[r]\backslash j))}((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j} \\
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})}[ ((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)\ttt_{[r]\backslash j}\ten{H}_{1,1})\ttt_{[r]\backslash j + 2,[r]\backslash j}(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k) ]_{((2,1))}\vec{\d\mat{\alpha}_j} \\
&\qquad\Rightarrow \H l(\mat{\alpha}_j) = -c_1^2 \Big[ \left(\Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big)\ttt_{[r]\backslash j}\ten{H}_{1,1}\right)\ttt_{[r]\backslash j + 2}^{[r]\backslash j}\Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \Big]_{((2,1))}
&= -c_1^2 \t{\vec(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j)}\mat{H}_{1,1}\vec(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j) \\
&= -c_1^2 \t{\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)})}\mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(j)}}\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}) \\
&= -c_1^2 \t{[((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j}]}\mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(j)}}((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j} \\
&= -c_1^2 \t{[((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j}]}(\ten{H}_{1,1})_{((j,[r]\backslash j),(j,[r]\backslash j))}((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j} \\
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})}[ ((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)\ttt_{[r]\backslash j}\ten{H}_{1,1})\ttt_{[r]\backslash j + 2,[r]\backslash j}(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k) ]_{((2,1))}\vec{\d\mat{\alpha}_j} \\
&\qquad\Rightarrow \H l(\mat{\alpha}_j) = -c_1^2 \Big[ \left(\Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big)\ttt_{[r]\backslash j}\ten{H}_{1,1}\right)\ttt_{[r]\backslash j + 2}^{[r]\backslash j}\Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \Big]_{((2,1))}
\qquad{\color{gray} (p_j q_j \times p_j q_j)}
\\
&\d^2 l(\mat{\alpha}_j, \mat{\alpha}_l) \\
&\overset{\makebox[0pt]{\scriptsize $j < l$}}{=} -c_1^2 \t{\vec\Bigl(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\Bigr)}\mat{H}_{1,1}\vec\Bigl(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\times_l\d\mat{\alpha}_l\Bigr) \\
&\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \vec\Bigl(\ten{F}_y\mlm{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\times_l\d\mat{\alpha}_l\Bigr) \\
&= -c_1^2 \t{\vec\biggl( \d\mat{\alpha}_j \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big)_{(j)} \biggr)} \mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(l)}} \vec\biggl( \d\mat{\alpha}_l \Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big)_{(l)} \biggr) \\
&\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \t{\mat{K}_{\mat{p},((j,l))}} \vec\biggl( (\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j) \Big( \ten{F}_y\mlm{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k \Big)_{((j,l))} \biggr) \\
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big)_{(j)}\otimes\mat{I}_{p_j} \biggr) \mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(l)}} \biggl( \t{\Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big)_{(l)}}\otimes\mat{I}_{p_l} \biggr)\vec{\d\mat{\alpha}_l} \\
&\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \t{\mat{K}_{\mat{p},((j,l))}} \biggl( \t{\Big( \ten{F}_y\mlm{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k \Big)_{((j,l))}}\otimes\mat{I}_{p_j p_l} \biggr) \vec{(\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j)} \\
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big[ \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \vec{\d\mat{\alpha}_l} \\
&\qquad + c_1 \vec\biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm{k\neq j,l}\mat{\alpha}_k \Big) \biggr) \vec{(\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j)} \\
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big[ \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \vec{\d\mat{\alpha}_l} \\
&\qquad + c_1 \t{(\vec{\d\mat{\alpha}_j})} \biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm{k\neq j,l}\mat{\alpha}_k \Big) \biggr)_{((1,3))} \vec{\d\mat{\alpha}_l} \\
&\overset{\makebox[0pt]{\scriptsize $j < l$}}{=} -c_1^2 \t{\vec\Bigl(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\Bigr)}\mat{H}_{1,1}\vec\Bigl(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\times_l\d\mat{\alpha}_l\Bigr) \\
&\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \vec\Bigl(\ten{F}_y\mlm_{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\times_l\d\mat{\alpha}_l\Bigr) \\
&= -c_1^2 \t{\vec\biggl( \d\mat{\alpha}_j \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big)_{(j)} \biggr)} \mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(l)}} \vec\biggl( \d\mat{\alpha}_l \Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big)_{(l)} \biggr) \\
&\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \t{\mat{K}_{\mat{p},((j,l))}} \vec\biggl( (\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j) \Big( \ten{F}_y\mlm_{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k \Big)_{((j,l))} \biggr) \\
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big)_{(j)}\otimes\mat{I}_{p_j} \biggr) \mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(l)}} \biggl( \t{\Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big)_{(l)}}\otimes\mat{I}_{p_l} \biggr)\vec{\d\mat{\alpha}_l} \\
&\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \t{\mat{K}_{\mat{p},((j,l))}} \biggl( \t{\Big( \ten{F}_y\mlm_{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k \Big)_{((j,l))}}\otimes\mat{I}_{p_j p_l} \biggr) \vec{(\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j)} \\
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big[ \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \vec{\d\mat{\alpha}_l} \\
&\qquad + c_1 \vec\biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm_{k\neq j,l}\mat{\alpha}_k \Big) \biggr) \vec{(\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j)} \\
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big[ \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \vec{\d\mat{\alpha}_l} \\
&\qquad + c_1 \t{(\vec{\d\mat{\alpha}_j})} \biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm_{k\neq j,l}\mat{\alpha}_k \Big) \biggr)_{((1,3))} \vec{\d\mat{\alpha}_l} \\
&\qquad \begin{aligned}
\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\alpha}_j})\t{\partial(\vec{\mat{\alpha}_l})}} &=
-c_1^2 \biggl( \Big[ \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \\
&\qquad + c_1 \biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm{k\neq j,l}\mat{\alpha}_k \Big) \biggr)_{((1,3) + [[j > l]])}
-c_1^2 \biggl( \Big[ \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \\
&\qquad + c_1 \biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm_{k\neq j,l}\mat{\alpha}_k \Big) \biggr)_{((1,3) + [[j > l]])}
\qquad{\color{gray} (p_j q_j \times p_l q_l)}
\end{aligned}
\\
&\d^2 l(\mat{\alpha}_j, \mat{\Omega}_l) \\
&= -c_1 c_2 \t{\vec\Bigl(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\Bigr)} \mat{H}_{1,2} \vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr) \\
&= -c_1 c_2 \t{\vec\biggl(\d\mat{\alpha}_j\Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big)_{(j)}\biggr)}\mat{K}_{\mat{p},(j)} \t{(\ten{H}_{2,1})_{([2r])}} \vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr) \\
&= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl(\t{\Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big)_{(j)}}\otimes\mat{I}_{p_j}\biggr) \mat{K}_{\mat{p},(j)} \vec\Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm{k\neq l}\t{(\vec{\mat{\Omega}_k})}\times_l\t{(\vec{\d\mat{\Omega}_l})}\Bigr) \\
&= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl(\t{\Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big)_{(j)}}\otimes\mat{I}_{p_j}\biggr) \mat{K}_{\mat{p},(j)} \t{\Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr)_{([r])}}\vec{\d\mat{\Omega}_l} \\
&= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl( \Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{(r + 2, 1)} \vec{\d\mat{\Omega}_l} \\
&\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\alpha}_j})\t{\partial(\vec{\mat{\Omega}_l})}} = -c_1 c_2 \biggl( \Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{(r + 2, 1)}\mat{D}_{p_l}\t{\mat{D}_{p_l}}
&= -c_1 c_2 \t{\vec\Bigl(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\Bigr)} \mat{H}_{1,2} \vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr) \\
&= -c_1 c_2 \t{\vec\biggl(\d\mat{\alpha}_j\Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big)_{(j)}\biggr)}\mat{K}_{\mat{p},(j)} \t{(\ten{H}_{2,1})_{([2r])}} \vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr) \\
&= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl(\t{\Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big)_{(j)}}\otimes\mat{I}_{p_j}\biggr) \mat{K}_{\mat{p},(j)} \vec\Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm_{k\neq l}\t{(\vec{\mat{\Omega}_k})}\times_l\t{(\vec{\d\mat{\Omega}_l})}\Bigr) \\
&= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl(\t{\Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big)_{(j)}}\otimes\mat{I}_{p_j}\biggr) \mat{K}_{\mat{p},(j)} \t{\Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm_{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr)_{([r])}}\vec{\d\mat{\Omega}_l} \\
&= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl( \Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm_{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{(r + 2, 1)} \vec{\d\mat{\Omega}_l} \\
&\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\alpha}_j})\t{\partial(\vec{\mat{\Omega}_l})}} = -c_1 c_2 \biggl( \Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm_{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{(r + 2, 1)}\mat{D}_{p_l}\t{\mat{D}_{p_l}}
% \qquad {\color{gray} (p_j q_j \times p_l^2)}
\\
&\d^2 l(\mat{\Omega}_j) \\
&= -c_2^2 \t{\vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr)} \t{(\ten{H}_{2,2})_{([2r],[2r]+2r)}} \vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr) \\
&= -c_2^2 \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})}\times_{j + r}\t{(\vec{\d\mat{\Omega}_j})} \\
&= -c_2^2 \t{(\vec{\d\mat{\Omega}_j})} \biggl( \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})} \biggr)_{([r])} \vec{\d\mat{\Omega}_j} \\
&\qquad\Rightarrow \H l(\mat{\Omega}_j) = -c_2^2 \mat{D}_{p_j}\t{\mat{D}_{p_j}}\biggl( \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})} \biggr)_{([r])}\mat{D}_{p_j}\t{\mat{D}_{p_j}}
&= -c_2^2 \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm_{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})}\times_{j + r}\t{(\vec{\d\mat{\Omega}_j})} \\
&= -c_2^2 \t{(\vec{\d\mat{\Omega}_j})} \biggl( \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm_{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})} \biggr)_{([r])} \vec{\d\mat{\Omega}_j} \\
&\qquad\Rightarrow \H l(\mat{\Omega}_j) = -c_2^2 \mat{D}_{p_j}\t{\mat{D}_{p_j}}\biggl( \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm_{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})} \biggr)_{([r])}\mat{D}_{p_j}\t{\mat{D}_{p_j}}
%\qquad {\color{gray} (p_j^2 \times p_j^2)}
\\
&\d^2 l(\mat{\Omega}_j, \mat{\Omega}_l) \\
@ -573,13 +589,13 @@ The next step is to identify the Hessians from the second differentials in a sim
&\qquad\qquad - c_2 \D b(\mat{\eta}_{y,2})\vec\!\Big(\bigotimes_{k = r}^{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_{l}\otimes\bigotimes_{k = l - 1}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_{j}\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big) \\
&= c_2 \t{(\vec{\ten{X}}\otimes\vec{\ten{X}} - (\ten{D}_2)_{([2r])})} \vec\Bigl(\bigotimes_{k = r}^{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_{l}\otimes\bigotimes_{k = l - 1}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_{j}\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Bigr) \\
&\qquad - c_2^2 \t{\vec\!\Big(\bigotimes_{k = r}^{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigotimes_{k=l-1}^{1}\mat{\Omega}_k\Big)}\t{(\ten{H}_{2,2})_{([2r],[2r]+2r)}}\vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big) \\
&= c_2 (\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \times_j \t{(\vec{\d\mat{\Omega}_j})} \times_l \t{(\vec{\d\mat{\Omega}_l})} \\
&\qquad - c_2^2 \ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})} \times_j \t{(\vec{\d\mat{\Omega}_j})} \times_l \t{(\vec{\d\mat{\Omega}_l})} \\
&= c_2 \t{(\vec{\d\mat{\Omega}_j})}\Big((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)}\vec{\d\mat{\Omega}_l} \\
&\qquad - c_2^2 \t{(\vec{\d\mat{\Omega}_j})}\Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)}\vec{\d\mat{\Omega}_l} \\
&= c_2 (\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm_{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \times_j \t{(\vec{\d\mat{\Omega}_j})} \times_l \t{(\vec{\d\mat{\Omega}_l})} \\
&\qquad - c_2^2 \ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm_{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm_{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})} \times_j \t{(\vec{\d\mat{\Omega}_j})} \times_l \t{(\vec{\d\mat{\Omega}_l})} \\
&= c_2 \t{(\vec{\d\mat{\Omega}_j})}\Big((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm_{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)}\vec{\d\mat{\Omega}_l} \\
&\qquad - c_2^2 \t{(\vec{\d\mat{\Omega}_j})}\Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm_{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm_{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)}\vec{\d\mat{\Omega}_l} \\
&\qquad \begin{aligned}\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\Omega}_j})\t{\partial(\vec{\mat{\Omega}_l})}} &=
\mat{D}_{p_j}\t{\mat{D}_{p_j}}\Big[c_2\Big((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)} \\
&\qquad -c_2^2 \Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)}\Big]\mat{D}_{p_l}\t{\mat{D}_{p_l}}
\mat{D}_{p_j}\t{\mat{D}_{p_j}}\Big[c_2\Big((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm_{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)} \\
&\qquad -c_2^2 \Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm_{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm_{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)}\Big]\mat{D}_{p_l}\t{\mat{D}_{p_l}}
% \qquad {\color{gray} (p_j^2 \times p_l^2)}
\end{aligned}
\end{align*}}%
@ -612,15 +628,15 @@ and for every block holds $\mathcal{I}_{j, l} = \t{\mathcal{I}_{l, j}}$. The ind
\begin{align*}
\mathcal{I}_{1,1} &= c_1^2 (\ten{H}_{1,1})_{([r])} \\
\mathcal{I}_{1,j+1} % = \E\partial_{\vec{\overline{\ten{\eta}}_1}}\partial_{\t{(\vec{\mat{\alpha}_j})}} l(\mat{\Theta})\mid \ten{Y} = y
&= c_1^2 \Big[\Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1}\Big]_{((2, 1))} \\
&= c_1^2 \Big[\Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1}\Big]_{((2, 1))} \\
\mathcal{I}_{1,j+r+1}
&= c_1 c_2 \Big( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)} \\
&= c_1 c_2 \Big( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)} \\
\mathcal{I}_{j+1,l+1}
&= c_1^2 \biggl( \Big[ \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \\
&= c_1^2 \biggl( \Big[ \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \\
\mathcal{I}_{j+1,l+r+1}
&= c_1 c_2 \biggl( \Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{((r + 2, 1))} \\
&= c_1 c_2 \biggl( \Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm_{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{((r + 2, 1))} \\
\mathcal{I}_{j+r+1,l+r+1}
&= c_2^2 \Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)}
&= c_2^2 \Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm_{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm_{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)}
\end{align*}
@ -633,14 +649,14 @@ The \emph{matricization} is a generalization of the \emph{vectorization} operati
\begin{theorem}\label{thm:mlm_mat}
Let $\ten{A}$ be a tensor of order $r$ with the dimensions $q_1\times ... \times q_r$. Furthermore, let for $k = 1, ..., r$ be $\mat{B}_k$ matrices of dimensions $p_k\times q_k$. Then, for any $(\mat{i}, \mat{j})\in\perm{r}$ holds
\begin{displaymath}
\Big(\ten{A}\mlm{k\in[r]}\mat{B}_k\Big)_{(\mat{i}, \mat{j})}
\Big(\ten{A}\mlm_{k\in[r]}\mat{B}_k\Big)_{(\mat{i}, \mat{j})}
= \Big(\bigotimes_{k = \len{\mat{i}}}^{1}\mat{B}_{\mat{i}_k}\Big) \ten{A}_{(\mat{i}, \mat{j})} \Big(\bigotimes_{k = \len{\mat{j}}}^{1}\t{\mat{B}_{\mat{j}_k}}\Big).
\end{displaymath}
\end{theorem}
A well known special case of Theorem~\ref{thm:mlm_mat} is the relation between vectorization and the Kronecker product
\begin{displaymath}
\vec(\mat{B}_1\mat{A}\t{\mat{B}_2}) = (\mat{B}_2\otimes\mat{B}_1)\vec{A}.
\vec(\mat{B}_1\mat{A}\t{\mat{B}_2}) = (\mat{B}_2\otimes\mat{B}_1)\vec{\mat{A}}.
\end{displaymath}
Here we have a matrix, a.k.a. an order 2 tensor, and the vectorization as a special case of the matricization $\vec{\mat{A}} = \mat{A}_{((1, 2))}$ with $(\mat{i}, \mat{j}) = ((1, 2), ())\in\perm{2}$. Note that the empty Kronecker product is $1$ by convention.
@ -713,13 +729,37 @@ The operation $\ten{R}_{\mat{i}}(\ten{A})$ results in a tensor of order $r + s$
Let $\ten{A}$ be a $2 r + s$ tensor where $r > 0$ and $s \geq 0$ of dimensions $q_1\times ... \times q_{2 r + s}$. Furthermore, let $(\mat{i}, \mat{j})\in\perm{2 r + s}$ such that $\len{\mat{i}} = 2 r$ and for $k = 1, ..., r$ denote with $\mat{B}_k$ matrices of dimensions $q_{\mat{i}_{k}}\times q_{\mat{i}_{r + k}}$, then
\begin{displaymath}
\t{\ten{A}_{(\mat{i})}}\vec{\bigotimes_{k = r}^{1}}\mat{B}_k
\equiv \ten{R}_{\mat{i}}(\ten{A})\times_{k\in[r]}\t{(\vec{\mat{B}_k})}.
\equiv \ten{R}_{\mat{i}}(\ten{A})\mlm_{k = 1}^r\t{(\vec{\mat{B}_k})}.
\end{displaymath}
\end{theorem}
A special case of above Theorem is given for tensors represented as a Kronecker product. Therefore, let $\mat{A}_k, \mat{B}_k$ be arbitrary matrices of size $p_k\times q_k$ for $k = 1, ..., r$ and $\ten{A} = \reshape{(\mat{p}, \mat{q})}\bigotimes_{k = r}^{1}\mat{A}_k$. Then Theorem~\ref{thm:mtvk_rearrange} specializes to
\begin{displaymath}
\t{\Big( \vec\bigotimes_{k = r}^{1}\mat{A}_k \Big)}\Big( \vec\bigotimes_{k = r}^{1}\mat{B}_k \Big)
=
\prod_{k = 1}^{r}\tr(\t{\mat{A}_k}\mat{B}_k)
=
\Big( \outer{k = 1}{r}\vec\mat{A}_k \Big)\mlm_{k = 1}^r \t{(\vec\mat{B}_k)}.
\end{displaymath}
In case of $r = 2$ this means
\begin{align*}
\t{\vec(\mat{A}_1\otimes \mat{A}_2)}\vec(\mat{B}_1\otimes \mat{B}_2)
&= \t{(\vec{\mat{B}_1})}(\vec{\mat{A}_1})\t{(\vec{\mat{A}_2})}(\vec{\mat{B}_2}) \\
&= [(\vec{\mat{A}_1})\circ(\vec{\mat{A}_2})]\times_1\t{(\vec{\mat{B}_1})}\times_2\t{(\vec{\mat{B}_2})}.
\end{align*}
Another interesting special case is for two tensors $\ten{A}_1, \ten{A}_2$ of the same order
\begin{displaymath}
\t{(\vec{\ten{A}_1}\otimes\vec{\ten{A}_2})}\vec{\bigotimes_{k = r}^{1}\mat{B}_k}
= (\ten{A}_1\otimes\ten{A}_2)\mlm_{k = 1}^r\t{(\vec{\mat{B}_k})}
\end{displaymath}
which uses the relation $\ten{R}_{[2r]}^{(\mat{p}, \mat{q})}(\vec{\ten{A}_1}\otimes\vec{\ten{A}_2}) = \ten{A}_1\otimes\ten{A}_2$ .
\todo{continue}
% Next we define a specific axis permutation and reshaping operation on tensors which will be helpful in expressing some derivatives. Let $\ten{A}$ be a $2 r + s$ tensor with $r > 0$ and $s\geq 0$ of dimensions $p_1\times ... \times p_{2 r + s}$. Furthermore, let $(\mat{i}, \mat{j})\in\perm{2 r + s}$ such that $\len{\mat{i}} = 2 r$. The operation $\ten{R}_{\mat{i}}$ is defined as
% \begin{displaymath}
% \ten{R}_{\mat{i}} = \reshape{(p_1 p_{r + 1}, ..., p_r p_{2 r}, p_{2 r + 1}, ..., p_{r + s})}(\ten{A}_{(\pi(\mat{i}))})
@ -880,19 +920,19 @@ The operation $\ten{R}_{\mat{i}}(\ten{A})$ results in a tensor of order $r + s$
\end{tikzpicture}
\end{center}
\newcommand{\somedrawing} {
\coordinate (a) at (-2,-2,-2);
\coordinate (b) at (-2,-2,2);
\coordinate (c) at (-2,2,-2);
\coordinate (d) at (-2,2,2);
\coordinate (e) at (2,-2,-2);
\coordinate (f) at (2,-2,2);
\coordinate (g) at (2,2,-2);
\coordinate (h) at (2,2,2);
\draw (a)--(b) (a)--(c) (a)--(e) (b)--(d) (b)--(f) (c)--(d) (c)--(g) (d)--(h) (e)--(f) (e)--(g) (f)--(h) (g)--(h);
\fill (a) circle (0.1cm);
\fill (d) ++(0.1cm,0.1cm) rectangle ++(-0.2cm,-0.2cm);
}
% \newcommand{\somedrawing}{
% \coordinate (a) at (-2,-2,-2);
% \coordinate (b) at (-2,-2,2);
% \coordinate (c) at (-2,2,-2);
% \coordinate (d) at (-2,2,2);
% \coordinate (e) at (2,-2,-2);
% \coordinate (f) at (2,-2,2);
% \coordinate (g) at (2,2,-2);
% \coordinate (h) at (2,2,2);
% \draw (a)--(b) (a)--(c) (a)--(e) (b)--(d) (b)--(f) (c)--(d) (c)--(g) (d)--(h) (e)--(f) (e)--(g) (f)--(h) (g)--(h);
% \fill (a) circle (0.1cm);
% \fill (d) ++(0.1cm,0.1cm) rectangle ++(-0.2cm,-0.2cm);
% }
% \begin{figure}[ht!]
% \centering
@ -1153,7 +1193,7 @@ where $\circ$ is the outer product. For example considure two matrices $\mat{A},
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Let $f$ be an $r$ times differentiable function, then
\begin{displaymath}
\d^r f(\mat{X}) = \ten{F}(\mat{X})\mlm{k = 1}{r} \vec{\d\mat{X}}
\d^r f(\mat{X}) = \ten{F}(\mat{X})\mlm_{k = 1}^{r} \vec{\d\mat{X}}
\qquad\Leftrightarrow\qquad
\D^r f(\mat{X}) \equiv \frac{1}{r!}\sum_{\sigma\in\perm{r}}\ten{F}(\mat{X})_{(\sigma)}
\end{displaymath}
@ -1372,12 +1412,12 @@ The differentials up to the 4'th are
\begin{align*}
\d M(t) &= M(t) \t{(\mu + \Sigma t)} \d{t} \\
\d^2 M(t) &= \t{\d{t}} M(t) (\mu + \Sigma t)\t{(\mu + \Sigma t)} \d{t} \\
\d^3 M(t) &= M(t) (\mu + \Sigma t)\circ [(\mu + \Sigma t)\circ (\mu + \Sigma t) + 3\Sigma]\mlm{k = 1}{3} \d{t} \\
\d^4 M(t) &= M(t) (\mu + \Sigma t)\circ(\mu + \Sigma t)\circ[(\mu + \Sigma t)\circ(\mu + \Sigma t) + 6\Sigma)]\mlm{k = 1}{4} \d{t}
\d^3 M(t) &= M(t) (\mu + \Sigma t)\circ [(\mu + \Sigma t)\circ (\mu + \Sigma t) + 3\Sigma]\mlm_{k = 1}^{3} \d{t} \\
\d^4 M(t) &= M(t) (\mu + \Sigma t)\circ(\mu + \Sigma t)\circ[(\mu + \Sigma t)\circ(\mu + \Sigma t) + 6\Sigma)]\mlm_{k = 1}^{4} \d{t}
\end{align*}
Using the differentials to derivative identification identity
\begin{displaymath}
\d^m f(t) = \ten{F}(t)\mlm{k = 1}{m}\d{t}
\d^m f(t) = \ten{F}(t)\mlm_{k = 1}^{m}\d{t}
\qquad\Leftrightarrow\qquad
\D^m f(t) \equiv \frac{1}{m!}\sum_{\sigma\in\mathfrak{S}_m}\ten{F}(t)_{(\sigma)}
\end{displaymath}
@ -1388,7 +1428,7 @@ in conjunction with simplifications gives the first four raw moments by evaluati
M_3 = \D^3 M(t)|_{t = 0} &= \mu\circ\mu\circ\mu + \mu\circ\Sigma + (\mu\circ\Sigma)_{((2), (1), (3))} + \Sigma\circ\mu \\
M_4 = \D^4 M(t)|_{t = 0} &\equiv \frac{1}{4!}\sum_{\sigma\in\mathfrak{S}_4} (\mu\circ\mu\circ\Sigma + \Sigma\circ\Sigma + \Sigma\circ\mu\circ\mu)_{(\sigma)}
\end{align*}
which leads to the centered moments (which are also the covariances of the sufficient statistic $t(X)$)
which leads to the centered moments (which are also the covariance of the sufficient statistic $t(X)$)
\begin{align*}
H_{1,1} &= \cov(t_1(X)\mid Y = y) \\
&= \Sigma \\

View File

@ -9,6 +9,17 @@
publisher = {[Royal Statistical Society, Wiley]}
}
@inproceedings{Nesterov1983,
title = {A method of solving a convex programming problem with convergence rate $O(1/k^2)$},
author = {Nesterov, Yurii Evgen'evich},
booktitle = {Doklady Akademii Nauk},
volume = {269},
number = {3},
pages = {543--547},
year = {1983},
organization= {Russian Academy of Sciences}
}
@book{StatInf-CasellaBerger2002,
title = {{Statistical Inference}},
author = {Casella, George and Berger, Roger L.},
@ -27,6 +38,20 @@
isbn = {0-471-98632-1}
}
@article{SymMatandJacobians-MagnusNeudecker1986,
title = {Symmetry, 0-1 Matrices and Jacobians: A Review},
author = {Magnus, Jan R. and Neudecker, Heinz},
ISSN = {02664666, 14694360},
URL = {http://www.jstor.org/stable/3532421},
journal = {Econometric Theory},
number = {2},
pages = {157--190},
publisher = {Cambridge University Press},
urldate = {2023-10-03},
volume = {2},
year = {1986}
}
@book{MatrixAlgebra-AbadirMagnus2005,
title = {Matrix Algebra},
author = {Abadir, Karim M. and Magnus, Jan R.},
@ -83,6 +108,31 @@
doi = {10.1080/01621459.2015.1093944}
}
@article{FisherLectures-Cook2007,
author = {Cook, R. Dennis},
journal = {Statistical Science},
month = {02},
number = {1},
pages = {1--26},
publisher = {The Institute of Mathematical Statistics},
title = {{Fisher Lecture: Dimension Reduction in Regression}},
volume = {22},
year = {2007},
doi = {10.1214/088342306000000682}
}
@article{asymptoticMLE-BuraEtAl2018,
author = {Bura, Efstathia and Duarte, Sabrina and Forzani, Liliana and E. Smucler and M. Sued},
title = {Asymptotic theory for maximum likelihood estimates in reduced-rank multivariate generalized linear models},
journal = {Statistics},
volume = {52},
number = {5},
pages = {1005-1024},
year = {2018},
publisher = {Taylor \& Francis},
doi = {10.1080/02331888.2018.1467420},
}
@article{tsir-DingCook2015,
author = {Shanshan Ding and R. Dennis Cook},
title = {Tensor sliced inverse regression},
@ -117,3 +167,106 @@
isbn = {978-94-015-8196-7},
doi = {10.1007/978-94-015-8196-7_17}
}
@book{asymStats-van_der_Vaart1998,
title = {Asymptotic Statistics},
author = {{van der Vaart}, A.W.},
series = {Asymptotic Statistics},
year = {1998},
publisher = {Cambridge University Press},
series = {Cambridge Series in Statistical and Probabilistic Mathematics},
isbn = {0-521-49603-9}
}
@book{measureTheory-Kusolitsch2011,
title = {{M}a\ss{}- und {W}ahrscheinlichkeitstheorie},
subtitle = {{E}ine {E}inf{\"u}hrung},
author = {Kusolitsch, Norbert},
series = {Springer-Lehrbuch},
year = {2011},
publisher = {Springer Vienna},
isbn = {978-3-7091-0684-6},
doi = {10.1007/978-3-7091-0685-3}
}
@book{optimMatrixMani-AbsilEtAl2007,
title = {{Optimization Algorithms on Matrix Manifolds}},
author = {Absil, P.-A. and Mahony, R. and Sepulchre, R.},
year = {2007},
publisher = {Princeton University Press},
isbn = {9780691132983},
note = {Full Online Text \url{https://press.princeton.edu/absil}}
}
@Inbook{geomMethodsOnLowRankMat-Uschmajew2020,
author = {Uschmajew, Andr{\'e} and Vandereycken, Bart},
editor = {Grohs, Philipp and Holler, Martin and Weinmann, Andreas},
title = {Geometric Methods on Low-Rank Matrix and Tensor Manifolds},
bookTitle = {Handbook of Variational Methods for Nonlinear Geometric Data},
year = {2020},
publisher = {Springer International Publishing},
address = {Cham},
pages = {261--313},
isbn = {978-3-030-31351-7},
doi = {10.1007/978-3-030-31351-7_9}
}
@book{introToSmoothMani-Lee2012,
title = {Introduction to Smooth Manifolds},
author = {Lee, John M.},
year = {2012},
journal = {Graduate Texts in Mathematics},
publisher = {Springer New York},
doi = {10.1007/978-1-4419-9982-5}
}
@book{introToRiemannianMani-Lee2018,
title = {Introduction to Riemannian Manifolds},
author = {Lee, John M.},
year = {2018},
journal = {Graduate Texts in Mathematics},
publisher = {Springer International Publishing},
doi = {10.1007/978-3-319-91755-9}
}
@misc{MLEonManifolds-HajriEtAl2017,
title = {Maximum Likelihood Estimators on Manifolds},
author = {Hajri, Hatem and Said, Salem and Berthoumieu, Yannick},
year = {2017},
journal = {Lecture Notes in Computer Science},
publisher = {Springer International Publishing},
pages = {692-700},
doi = {10.1007/978-3-319-68445-1_80}
}
@article{relativity-Einstain1916,
author = {Einstein, Albert},
title = {Die Grundlage der allgemeinen Relativitätstheorie},
year = {1916},
journal = {Annalen der Physik},
volume = {354},
number = {7},
pages = {769-822},
doi = {10.1002/andp.19163540702}
}
@article{MultilinearOperators-Kolda2006,
title = {Multilinear operators for higher-order decompositions.},
author = {Kolda, Tamara Gibson},
doi = {10.2172/923081},
url = {https://www.osti.gov/biblio/923081},
place = {United States},
year = {2006},
month = {4},
type = {Technical Report}
}
@book{aufbauAnalysis-kaltenbaeck2021,
title = {Aufbau Analysis},
author = {Kaltenb\"ack, Michael},
isbn = {978-3-88538-127-3},
series = {Berliner Studienreihe zur Mathematik},
edition = {27},
year = {2021},
publisher = {Heldermann Verlag}
}

View File

@ -23,7 +23,7 @@
*
* with the parameter vector `theta` and a statistic `T` of `y`. The real valued
* parameter vector `theta` is of dimension `p (p + 1) / 2` and the statistic
* `T` has the same dimensions as a binary vector given by
* `T` has the same dimensions as the parameter vector given by
*
* T(y) = vech(y y').
*

View File

@ -1,204 +0,0 @@
library(tensorPredictors)
library(mvbernoulli)
set.seed(161803399, "Mersenne-Twister", "Inversion", "Rejection")
### simulation configuration
file.prefix <- "sim-ising"
reps <- 100 # number of simulation replications
max.iter <- 100 # maximum number of iterations for GMLM
sample.sizes <- c(100, 200, 300, 500, 750) # sample sizes `n`
N <- 2000 # validation set size
p <- c(4, 4) # preditor dimensions (ONLY 4 by 4 allowed!)
q <- c(2, 2) # response dimensions (ONLY 2 by 2 allowed!)
r <- length(p)
# parameter configuration
rho <- -0.55
c1 <- 1
c2 <- 1
# initial consistency checks
stopifnot(exprs = {
r == 2
all.equal(p, c(4, 4))
all.equal(q, c(2, 2))
})
### small helpers
# 270 deg matrix layout rotation (90 deg clockwise)
rot270 <- function(A) t(A)[, rev(seq_len(nrow(A))), drop = FALSE]
# Auto-Regression Covariance Matrix
AR <- function(rho, dim) rho^abs(outer(seq_len(dim), seq_len(dim), `-`))
# Inverse of the AR matrix
AR.inv <- function(rho, dim) {
A <- diag(c(1, rep(rho^2 + 1, dim - 2), 1))
A[abs(.row(dim(A)) - .col(dim(A))) == 1] <- -rho
A / (1 - rho^2)
}
# projection matrix `P_A` as a projection onto the span of `A`
proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A)))
### setup Ising parameters (to get reasonable parameters)
eta1 <- 0
alphas <- Map(function(pj, qj) { # qj ignored, its 2
linspace <- seq(-1, 1, length.out = pj)
matrix(c(linspace, linspace^2), pj, 2)
}, p, q)
Omegas <- Map(AR, dim = p, MoreArgs = list(rho))
# data sampling routine
sample.data <- function(n, eta1, alphas, Omegas, sample.axis = r + 1L) {
# generate response (sample axis is last axis)
y <- runif(n, -1, 1) # Y ~ U[-1, 1]
Fy <- rbind(cos(pi * y), sin(pi * y), -sin(pi * y), cos(pi * y))
dim(Fy) <- c(2, 2, n)
# natural exponential family parameters
eta_y1 <- c1 * (mlm(Fy, alphas) + c(eta1))
eta_y2 <- c2 * Reduce(`%x%`, rev(Omegas))
# conditional Ising model parameters
theta_y <- matrix(rep(vech(eta_y2), n), ncol = n)
ltri <- which(lower.tri(eta_y2, diag = TRUE))
diagonal <- which(diag(TRUE, nrow(eta_y2))[ltri])
theta_y[diagonal, ] <- eta_y1
# Sample X from conditional distribution
X <- apply(theta_y, 2, ising_sample, n = 1)
# convert (from compressed integer vector) to array data
attr(X, "p") <- prod(p)
X <- t(as.mvbmatrix(X))
dim(X) <- c(p, n)
storage.mode(X) <- "double"
# permute axis to requested get the sample axis
if (sample.axis != r + 1L) {
perm <- integer(r + 1L)
perm[sample.axis] <- r + 1L
perm[-sample.axis] <- seq_len(r)
X <- aperm(X, perm)
Fy <- aperm(Fy, perm)
}
list(X = X, Fy = Fy, y = y, sample.axis = sample.axis)
}
### Logging Errors and Warnings
# Register a global warning and error handler for logging warnings/errors with
# current simulation repetition session informatin allowing to reproduce problems
exceptionLogger <- function(ex) {
# retrieve current simulation repetition information
rep.info <- get("rep.info", envir = .GlobalEnv)
# setup an error log file with the same name as `file`
log <- paste0(rep.info$file, ".log")
# Write (append) condition message with reproduction info to the log
cat("\n\n------------------------------------------------------------\n",
sprintf("file <- \"%s\"\nn <- %d\nrep <- %d\n.Random.seed <- c(%s)\n%s\nTraceback:\n",
rep.info$file, rep.info$n, rep.info$rep,
paste(rep.info$.Random.seed, collapse = ","),
as.character.error(ex)
), sep = "", file = log, append = TRUE)
# add Traceback (see: `traceback()` which the following is addapted from)
n <- length(x <- .traceback(NULL, max.lines = -1L))
if (n == 0L) {
cat("No traceback available", "\n", file = log, append = TRUE)
} else {
for (i in 1L:n) {
xi <- x[[i]]
label <- paste0(n - i + 1L, ": ")
m <- length(xi)
srcloc <- if (!is.null(srcref <- attr(xi, "srcref"))) {
srcfile <- attr(srcref, "srcfile")
paste0(" at ", basename(srcfile$filename), "#", srcref[1L])
}
if (isTRUE(attr(xi, "truncated"))) {
xi <- c(xi, " ...")
m <- length(xi)
}
if (!is.null(srcloc)) {
xi[m] <- paste0(xi[m], srcloc)
}
if (m > 1) {
label <- c(label, rep(substr(" ", 1L,
nchar(label, type = "w")), m - 1L))
}
cat(paste0(label, xi), sep = "\n", file = log, append = TRUE)
}
}
}
globalCallingHandlers(list(
message = exceptionLogger, warning = exceptionLogger, error = exceptionLogger
))
### for every sample size
start <- format(Sys.time(), "%Y%m%dT%H%M")
for (n in sample.sizes) {
### write new simulation result file
file <- paste0(paste(file.prefix, start, n, sep = "-"), ".csv")
# CSV header, used to ensure correct value/column mapping when writing to file
header <- outer(
c("dist.subspace", "dist.projection", "error.pred"), # measures
c("gmlm", "pca", "hopca", "tsir"), # methods
paste, sep = ".")
cat(paste0(header, collapse = ","), "\n", sep = "", file = file)
### repeated simulation
for (rep in seq_len(reps)) {
### Repetition session state info
# Stores specific session variables before starting the current
# simulation replication. This allows to log state information which
# can be used to replicate a specific simulation repetition in case of
# errors/warnings from the logs
rep.info <- list(n = n, rep = rep, file = file, .Random.seed = .Random.seed)
### sample (training) data
c(X, Fy, y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas)
### Fit data using different methods
fit.gmlm <- GMLM.default(X, Fy, sample.axis = sample.axis,
max.iter = max.iter, family = "ising")
fit.hopca <- HOPCA(X, npc = q, sample.axis = sample.axis)
fit.pca <- prcomp(mat(X, sample.axis), rank. = prod(q))
fit.tsir <- NA # TSIR(X, y, q, sample.axis = sample.axis)
### Compute Reductions `B.*` where `B.*` spans the reduction subspaces
B.true <- Reduce(`%x%`, rev(alphas))
B.gmlm <- with(fit.gmlm, Reduce(`%x%`, rev(alphas)))
B.hopca <- Reduce(`%x%`, rev(fit.hopca))
B.pca <- fit.pca$rotation
B.tsir <- NA # Reduce(`%x%`, rev(fit.tsir))
# Subspace Distances: Normalized `|| P_A - P_B ||_F` where
# `P_A = A (A' A)^-1/2 A'` and the normalization means that with
# respect to the dimensions of `A, B` the subspace distance is in the
# range `[0, 1]`.
dist.subspace.gmlm <- dist.subspace(B.true, B.gmlm, normalize = TRUE)
dist.subspace.hopca <- dist.subspace(B.true, B.hopca, normalize = TRUE)
dist.subspace.pca <- dist.subspace(B.true, B.pca, normalize = TRUE)
dist.subspace.tsir <- NA # dist.subspace(B.true, B.tsir, normalize = TRUE)
# Projection Distances: Spectral norm (2-norm) `|| P_A - P_B ||_2`.
dist.projection.gmlm <- dist.projection(B.true, B.gmlm)
dist.projection.hopca <- dist.projection(B.true, B.hopca)
dist.projection.pca <- dist.projection(B.true, B.pca)
dist.projection.tsir <- NA # dist.projection(B.true, B.tsir)
### Prediction Errors: (using new independend sample of size `N`)
c(X, Fy, y, sample.axis) %<-% sample.data(N, eta1, alphas, Omegas)
# centered model matrix of vectorized `X`s
vecX <- scale(mat(X, sample.axis), center = TRUE, scale = FALSE)
P.true <- proj(B.true)
error.pred.gmlm <- norm(P.true - proj(B.gmlm), "2")
error.pred.hopca <- norm(P.true - proj(B.hopca), "2")
error.pred.pca <- norm(P.true - proj(B.pca), "2")
error.pred.tsir <- NA # norm(P.true - proj(B.tsir), "2")
# format estimation/prediction errors and write to file and console
line <- paste0(Map(get, header), collapse = ",")
cat(line, "\n", sep = "", file = file, append = TRUE)
# report progress
cat(sprintf("sample size: %d/%d - rep: %d/%d\n",
which(n == sample.sizes), length(sample.sizes), rep, reps))
}
}

View File

@ -1,134 +0,0 @@
library(tensorPredictors)
library(mvbernoulli)
set.seed(141421356, "Mersenne-Twister", "Inversion", "Rejection")
### simulation configuration
reps <- 100 # number of simulation replications
max.iter <- 1000 # maximum number of iterations for GMLM
n <- 100 # sample sizes `n`
N <- 2000 # validation set size
p <- c(4, 4) # preditor dimensions (ONLY 4 by 4 allowed!)
q <- c(2, 2) # response dimensions (ONLY 2 by 2 allowed!)
r <- length(p)
# parameter configuration
rho <- -0.55
c1 <- 1
c2 <- 1
# initial consistency checks
stopifnot(exprs = {
r == 2
all.equal(p, c(4, 4))
all.equal(q, c(2, 2))
})
### small helpers
# 270 deg matrix layout rotation (90 deg clockwise)
rot270 <- function(A) t(A)[, rev(seq_len(nrow(A))), drop = FALSE]
# Auto-Regression Covariance Matrix
AR <- function(rho, dim) rho^abs(outer(seq_len(dim), seq_len(dim), `-`))
# Inverse of the AR matrix
AR.inv <- function(rho, dim) {
A <- diag(c(1, rep(rho^2 + 1, dim - 2), 1))
A[abs(.row(dim(A)) - .col(dim(A))) == 1] <- -rho
A / (1 - rho^2)
}
# projection matrix `P_A` as a projection onto the span of `A`
proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A)))
### setup Ising parameters (to get reasonable parameters)
eta1 <- 0
# alphas <- Map(function(pj, qj) { # qj ignored, its 2
# linspace <- seq(-1, 1, length.out = pj)
# matrix(c(linspace, rev(linspace)), pj, 2)
# }, p, q)
alphas <- Map(function(pj, qj) { # qj ignored, its 2
linspace <- seq(-1, 1, length.out = pj)
matrix(c(linspace, linspace^2), pj, 2)
}, p, q)
# alphas <- Map(function(pj, qj) {
# qr.Q(qr(matrix(rnorm(pj * qj), pj, qj)))
# }, p, q)
Omegas <- Map(AR, dim = p, MoreArgs = list(rho))
# data sampling routine
sample.data <- function(n, eta1, alphas, Omegas, sample.axis = r + 1L) {
# generate response (sample axis is last axis)
y <- runif(n, -1, 1) # Y ~ U[-1, 1]
Fy <- rbind(cos(pi * y), sin(pi * y), -sin(pi * y), cos(pi * y))
dim(Fy) <- c(2, 2, n)
# natural exponential family parameters
eta_y1 <- c1 * (mlm(Fy, alphas) + c(eta1))
eta_y2 <- c2 * Reduce(`%x%`, rev(Omegas))
# conditional Ising model parameters
theta_y <- matrix(rep(vech(eta_y2), n), ncol = n)
ltri <- which(lower.tri(eta_y2, diag = TRUE))
diagonal <- which(diag(TRUE, nrow(eta_y2))[ltri])
theta_y[diagonal, ] <- eta_y1
# Sample X from conditional distribution
X <- apply(theta_y, 2, ising_sample, n = 1)
# convert (from compressed integer vector) to array data
attr(X, "p") <- prod(p)
X <- t(as.mvbmatrix(X))
dim(X) <- c(p, n)
storage.mode(X) <- "double"
# permute axis to requested get the sample axis
if (sample.axis != r + 1L) {
perm <- integer(r + 1L)
perm[sample.axis] <- r + 1L
perm[-sample.axis] <- seq_len(r)
X <- aperm(X, perm)
Fy <- aperm(Fy, perm)
}
list(X = X, Fy = Fy, y = y, sample.axis = sample.axis)
}
### sample (training) data
c(X, Fy, y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas)
### Fit data using GMLM with logging
# logger to log iterative change in the estimation process of GMLM
# log <- data.frame()
log.likelihood <- tensorPredictors:::make.gmlm.family("ising")$log.likelihood
B.true <- Reduce(`%x%`, rev(alphas))
logger <- function(iter, eta1.est, alphas.est, Omegas.est) {
B.est <- Reduce(`%x%`, rev(alphas.est))
err.alphas <- mapply(dist.subspace, alphas, alphas.est, MoreArgs = list(normalize = TRUE))
err.Omegas <- mapply(norm, Map(`-`, Omegas, Omegas.est), MoreArgs = list(type = "F"))
if (iter > 0) { cat("\033[9A") }
cat(sprintf("\n\033[2mIter: loss - dist\n\033[0m%4d: %8.3f - %8.3f",
iter,
log.likelihood(X, Fy, eta1.est, alphas.est, Omegas.est),
dist.subspace(B.true, B.est, normalize = TRUE)
),
"\033[2mMSE eta1\033[0m",
mean((eta1 - eta1.est)^2),
"\033[2msubspace distances of alphas\033[0m",
do.call(paste, Map(sprintf, err.alphas, MoreArgs = list(fmt = "%8.3f"))),
"\033[2mFrob. norm of Omega differences\033[0m",
do.call(paste, Map(sprintf, err.Omegas, MoreArgs = list(fmt = "%8.3f"))),
sep = "\n "
)
}
# now call the GMLM fitting routine with performance profiling
tryCatch({
system.time( # profvis::profvis(
fit.gmlm <- GMLM.default(
X, Fy, sample.axis = sample.axis, max.iter = max.iter,
family = "ising", logger = logger
)
)
}, error = function(ex) {
print(ex)
traceback()
})

View File

@ -1,207 +0,0 @@
library(tensorPredictors)
library(mvbernoulli)
# seed = first 8 digits Euler's constant gamma = 0.57721 56649 01532 86060
set.seed(57721566, "Mersenne-Twister", "Inversion", "Rejection")
### simulation configuration
file.prefix <- "sim-ising-small"
reps <- 100 # number of simulation replications
max.iter <- 1000 # maximum number of iterations for GMLM
sample.sizes <- c(100, 200, 300, 500, 750) # sample sizes `n`
N <- 2000 # validation set size
p <- c(2, 3) # preditor dimensions
q <- c(1, 1) # response dimensions
r <- length(p)
# parameter configuration
rho <- -0.55
c1 <- 1
c2 <- 1
# initial consistency checks
stopifnot(exprs = {
r == 2
length(p) == r
all(q == 1)
})
### small helpers
# 270 deg matrix layout rotation (90 deg clockwise)
rot270 <- function(A) t(A)[, rev(seq_len(nrow(A))), drop = FALSE]
# Auto-Regression Covariance Matrix
AR <- function(rho, dim) rho^abs(outer(seq_len(dim), seq_len(dim), `-`))
# Inverse of the AR matrix
AR.inv <- function(rho, dim) {
A <- diag(c(1, rep(rho^2 + 1, dim - 2), 1))
A[abs(.row(dim(A)) - .col(dim(A))) == 1] <- -rho
A / (1 - rho^2)
}
# projection matrix `P_A` as a projection onto the span of `A`
proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A)))
### setup Ising parameters (to get reasonable parameters)
eta1 <- 0
alphas <- Map(function(pj, qj) {
data <- linspace <- seq(-1, 1, len = pj)
for (k in (seq_len(qj - 1) + 1)) {
data <- c(data, linspace^k)
}
matrix(data, nrow = pj)
}, p, q)
Omegas <- Map(AR, dim = p, MoreArgs = list(rho))
# data sampling routine
sample.data <- function(n, eta1, alphas, Omegas, sample.axis = r + 1L) {
# generate response (sample axis is last axis)
y <- runif(n, -1, 1) # Y ~ U[-1, 1]
Fy <- array(sin(pi * y), dim = c(q, n))
# natural exponential family parameters
eta_y1 <- c1 * (mlm(Fy, alphas) + c(eta1))
eta_y2 <- c2 * Reduce(`%x%`, rev(Omegas))
# conditional Ising model parameters
theta_y <- matrix(rep(vech(eta_y2), n), ncol = n)
ltri <- which(lower.tri(eta_y2, diag = TRUE))
diagonal <- which(diag(TRUE, nrow(eta_y2))[ltri])
theta_y[diagonal, ] <- eta_y1
# Sample X from conditional distribution
X <- apply(theta_y, 2, ising_sample, n = 1)
# convert (from compressed integer vector) to array data
attr(X, "p") <- prod(p)
X <- t(as.mvbmatrix(X))
dim(X) <- c(p, n)
storage.mode(X) <- "double"
# permute axis to requested get the sample axis
if (sample.axis != r + 1L) {
perm <- integer(r + 1L)
perm[sample.axis] <- r + 1L
perm[-sample.axis] <- seq_len(r)
X <- aperm(X, perm)
Fy <- aperm(Fy, perm)
}
list(X = X, Fy = Fy, y = y, sample.axis = sample.axis)
}
### Logging Errors and Warnings
# Register a global warning and error handler for logging warnings/errors with
# current simulation repetition session informatin allowing to reproduce problems
exceptionLogger <- function(ex) {
# retrieve current simulation repetition information
rep.info <- get("rep.info", envir = .GlobalEnv)
# setup an error log file with the same name as `file`
log <- paste0(rep.info$file, ".log")
# Write (append) condition message with reproduction info to the log
cat("\n\n------------------------------------------------------------\n",
sprintf("file <- \"%s\"\nn <- %d\nrep <- %d\n.Random.seed <- c(%s)\n%s\nTraceback:\n",
rep.info$file, rep.info$n, rep.info$rep,
paste(rep.info$.Random.seed, collapse = ","),
as.character.error(ex)
), sep = "", file = log, append = TRUE)
# add Traceback (see: `traceback()` which the following is addapted from)
n <- length(x <- .traceback(NULL, max.lines = -1L))
if (n == 0L) {
cat("No traceback available", "\n", file = log, append = TRUE)
} else {
for (i in 1L:n) {
xi <- x[[i]]
label <- paste0(n - i + 1L, ": ")
m <- length(xi)
srcloc <- if (!is.null(srcref <- attr(xi, "srcref"))) {
srcfile <- attr(srcref, "srcfile")
paste0(" at ", basename(srcfile$filename), "#", srcref[1L])
}
if (isTRUE(attr(xi, "truncated"))) {
xi <- c(xi, " ...")
m <- length(xi)
}
if (!is.null(srcloc)) {
xi[m] <- paste0(xi[m], srcloc)
}
if (m > 1) {
label <- c(label, rep(substr(" ", 1L,
nchar(label, type = "w")), m - 1L))
}
cat(paste0(label, xi), sep = "\n", file = log, append = TRUE)
}
}
}
globalCallingHandlers(list(
message = exceptionLogger, warning = exceptionLogger, error = exceptionLogger
))
### for every sample size
start <- format(Sys.time(), "%Y%m%dT%H%M")
for (n in sample.sizes) {
### write new simulation result file
file <- paste0(paste(file.prefix, start, n, sep = "-"), ".csv")
# CSV header, used to ensure correct value/column mapping when writing to file
header <- outer(
c("dist.subspace", "dist.projection", "error.pred"), # measures
c("gmlm", "pca", "hopca", "tsir"), # methods
paste, sep = ".")
cat(paste0(header, collapse = ","), "\n", sep = "", file = file)
### repeated simulation
for (rep in seq_len(reps)) {
### Repetition session state info
# Stores specific session variables before starting the current
# simulation replication. This allows to log state information which
# can be used to replicate a specific simulation repetition in case of
# errors/warnings from the logs
rep.info <- list(n = n, rep = rep, file = file, .Random.seed = .Random.seed)
### sample (training) data
c(X, Fy, y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas)
### Fit data using different methods
fit.gmlm <- GMLM.default(X, Fy, sample.axis = sample.axis,
max.iter = max.iter, family = "ising")
fit.hopca <- HOPCA(X, npc = q, sample.axis = sample.axis)
fit.pca <- prcomp(mat(X, sample.axis), rank. = prod(q))
fit.tsir <- TSIR(X, y, q, sample.axis = sample.axis)
### Compute Reductions `B.*` where `B.*` spans the reduction subspaces
B.true <- Reduce(`%x%`, rev(alphas))
B.gmlm <- with(fit.gmlm, Reduce(`%x%`, rev(alphas)))
B.hopca <- Reduce(`%x%`, rev(fit.hopca))
B.pca <- fit.pca$rotation
B.tsir <- Reduce(`%x%`, rev(fit.tsir))
# Subspace Distances: Normalized `|| P_A - P_B ||_F` where
# `P_A = A (A' A)^-1/2 A'` and the normalization means that with
# respect to the dimensions of `A, B` the subspace distance is in the
# range `[0, 1]`.
dist.subspace.gmlm <- dist.subspace(B.true, B.gmlm, normalize = TRUE)
dist.subspace.hopca <- dist.subspace(B.true, B.hopca, normalize = TRUE)
dist.subspace.pca <- dist.subspace(B.true, B.pca, normalize = TRUE)
dist.subspace.tsir <- dist.subspace(B.true, B.tsir, normalize = TRUE)
# Projection Distances: Spectral norm (2-norm) `|| P_A - P_B ||_2`.
dist.projection.gmlm <- dist.projection(B.true, B.gmlm)
dist.projection.hopca <- dist.projection(B.true, B.hopca)
dist.projection.pca <- dist.projection(B.true, B.pca)
dist.projection.tsir <- dist.projection(B.true, B.tsir)
### Prediction Errors: (using new independend sample of size `N`)
c(X, Fy, y, sample.axis) %<-% sample.data(N, eta1, alphas, Omegas)
# centered model matrix of vectorized `X`s
vecX <- scale(mat(X, sample.axis), center = TRUE, scale = FALSE)
P.true <- proj(B.true)
error.pred.gmlm <- norm(P.true - proj(B.gmlm), "2")
error.pred.hopca <- norm(P.true - proj(B.hopca), "2")
error.pred.pca <- norm(P.true - proj(B.pca), "2")
error.pred.tsir <- norm(P.true - proj(B.tsir), "2")
# format estimation/prediction errors and write to file and console
line <- paste0(Map(get, header), collapse = ",")
cat(line, "\n", sep = "", file = file, append = TRUE)
# report progress
cat(sprintf("sample size: %d/%d - rep: %d/%d\n",
which(n == sample.sizes), length(sample.sizes), rep, reps))
}
}

View File

@ -1,131 +0,0 @@
library(tensorPredictors)
library(mvbernoulli)
# seed = leaf node count of a full chess search tree of depth 6 from the start pos
# > position startpos
# > go perft 6
set.seed(119060324, "Mersenne-Twister", "Inversion", "Rejection")
### simulation configuration
reps <- 100 # number of simulation replications
max.iter <- 1000 # maximum number of iterations for GMLM
n <- 100 # sample sizes `n`
N <- 2000 # validation set size
p <- c(2, 3) # preditor dimensions
q <- c(1, 1) # response dimensions
r <- length(p)
# parameter configuration
rho <- -0.55
c1 <- 1
c2 <- 1
# initial consistency checks
stopifnot(exprs = {
r == 2
length(p) == r
all(q == 1)
})
### small helpers
# 270 deg matrix layout rotation (90 deg clockwise)
rot270 <- function(A) t(A)[, rev(seq_len(nrow(A))), drop = FALSE]
# Auto-Regression Covariance Matrix
AR <- function(rho, dim) rho^abs(outer(seq_len(dim), seq_len(dim), `-`))
# Inverse of the AR matrix
AR.inv <- function(rho, dim) {
A <- diag(c(1, rep(rho^2 + 1, dim - 2), 1))
A[abs(.row(dim(A)) - .col(dim(A))) == 1] <- -rho
A / (1 - rho^2)
}
# projection matrix `P_A` as a projection onto the span of `A`
proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A)))
### setup Ising parameters (to get reasonable parameters)
eta1 <- 0
alphas <- Map(function(pj, qj) {
data <- linspace <- seq(-1, 1, len = pj)
for (k in (seq_len(qj - 1) + 1)) {
data <- c(data, linspace^k)
}
matrix(data, nrow = pj)
}, p, q)
Omegas <- Map(AR, dim = p, MoreArgs = list(rho))
# data sampling routine
sample.data <- function(n, eta1, alphas, Omegas, sample.axis = r + 1L) {
# generate response (sample axis is last axis)
y <- runif(n, -1, 1) # Y ~ U[-1, 1]
Fy <- array(sin(pi * y), dim = c(q, n))
# natural exponential family parameters
eta_y1 <- c1 * (mlm(Fy, alphas) + c(eta1))
eta_y2 <- c2 * Reduce(`%x%`, rev(Omegas))
# conditional Ising model parameters
theta_y <- matrix(rep(vech(eta_y2), n), ncol = n)
ltri <- which(lower.tri(eta_y2, diag = TRUE))
diagonal <- which(diag(TRUE, nrow(eta_y2))[ltri])
theta_y[diagonal, ] <- eta_y1
# Sample X from conditional distribution
X <- apply(theta_y, 2, ising_sample, n = 1)
# convert (from compressed integer vector) to array data
attr(X, "p") <- prod(p)
X <- t(as.mvbmatrix(X))
dim(X) <- c(p, n)
storage.mode(X) <- "double"
# permute axis to requested get the sample axis
if (sample.axis != r + 1L) {
perm <- integer(r + 1L)
perm[sample.axis] <- r + 1L
perm[-sample.axis] <- seq_len(r)
X <- aperm(X, perm)
Fy <- aperm(Fy, perm)
}
list(X = X, Fy = Fy, y = y, sample.axis = sample.axis)
}
# logger to log iterative change in the estimation process of GMLM
# log <- data.frame()
log.likelihood <- tensorPredictors:::make.gmlm.family("ising")$log.likelihood
B.true <- Reduce(`%x%`, rev(alphas))
logger <- function(iter, eta1.est, alphas.est, Omegas.est) {
B.est <- Reduce(`%x%`, rev(alphas.est))
err.alphas <- mapply(dist.subspace, alphas, alphas.est, MoreArgs = list(normalize = TRUE))
err.Omegas <- mapply(norm, Map(`-`, Omegas, Omegas.est), MoreArgs = list(type = "F"))
if (iter > 0) { cat("\033[9A") }
cat(sprintf("\n\033[2mIter: loss - dist\n\033[0m%4d: %8.3f - %8.3f",
iter,
log.likelihood(X, Fy, eta1.est, alphas.est, Omegas.est),
dist.subspace(B.true, B.est, normalize = TRUE)
),
"\033[2mMSE eta1\033[0m",
mean((eta1 - eta1.est)^2),
"\033[2msubspace distances of alphas\033[0m",
do.call(paste, Map(sprintf, err.alphas, MoreArgs = list(fmt = "%8.3f"))),
"\033[2mFrob. norm of Omega differences\033[0m",
do.call(paste, Map(sprintf, err.Omegas, MoreArgs = list(fmt = "%8.3f"))),
sep = "\n "
)
}
### sample (training) data
c(X, Fy, y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas)
# now call the GMLM fitting routine with performance profiling
tryCatch({
system.time( # profvis::profvis(
fit.gmlm <- GMLM.default(
X, Fy, sample.axis = sample.axis, max.iter = max.iter,
family = "ising", logger = logger
)
)
}, error = function(ex) {
print(ex)
traceback()
})

View File

@ -1,171 +0,0 @@
library(tensorPredictors)
set.seed(314159265, "Mersenne-Twister", "Inversion", "Rejection")
### simulation configuration
file.prefix <- "sim-normal"
reps <- 100 # number of simulation replications
max.iter <- 10000 # maximum number of iterations for GMLM
sample.sizes <- c(100, 200, 300, 500, 750) # sample sizes `n`
N <- 2000 # validation set size
p <- c(2, 3, 5) # preditor dimensions
q <- c(1, 2, 3) # functions of y dimensions (response dimensions)
r <- length(p)
# initial consistency checks
stopifnot(exprs = {
r == length(p)
r == length(q)
all(outer(p, sample.sizes, `<`))
})
# projection matrix `P_A` as a projection onto the span of `A`
proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A)))
# setup model parameters
alphas <- Map(matrix, Map(rnorm, p * q), p) # reduction matrices
Omegas <- Map(function(pj) 0.5^abs(outer(1:pj, 1:pj, `-`)), p) # mode scatter
eta1 <- 0 # intercept
# data sampling routine
sample.data <- function(n, eta1, alphas, Omegas, sample.axis = r + 1L) {
# generate response (sample axis is last axis)
y <- sample.int(prod(q), n, replace = TRUE) # uniform samples
Fy <- array(outer(seq_len(prod(q)), y, `==`), dim = c(q, n))
Fy <- Fy - c(rowMeans(Fy, dims = r))
# sample predictors as X | Y = y (sample axis is last axis)
Deltas <- Map(solve, Omegas) # normal covariances
mu_y <- mlm(mlm(Fy, alphas) + c(eta1), Deltas) # conditional mean
X <- mu_y + rtensornorm(n, 0, Deltas, r + 1L) # responses X
# permute axis to requested get the sample axis
if (sample.axis != r + 1L) {
perm <- integer(r + 1L)
perm[sample.axis] <- r + 1L
perm[-sample.axis] <- seq_len(r)
X <- aperm(X, perm)
Fy <- aperm(Fy, perm)
}
list(X = X, Fy = Fy, y = y, sample.axis = sample.axis)
}
### Logging Errors and Warnings
# Register a global warning and error handler for logging warnings/errors with
# current simulation repetition session informatin allowing to reproduce problems
exceptionLogger <- function(ex) {
# retrieve current simulation repetition information
rep.info <- get("rep.info", envir = .GlobalEnv)
# setup an error log file with the same name as `file`
log <- paste0(rep.info$file, ".log")
# Write (append) condition message with reproduction info to the log
cat("\n\n------------------------------------------------------------\n",
sprintf("file <- \"%s\"\nn <- %d\nrep <- %d\n.Random.seed <- c(%s)\n%s\nTraceback:\n",
rep.info$file, rep.info$n, rep.info$rep,
paste(rep.info$.Random.seed, collapse = ","),
as.character.error(ex)
), sep = "", file = log, append = TRUE)
# add Traceback (see: `traceback()` which the following is addapted from)
n <- length(x <- .traceback(NULL, max.lines = -1L))
if (n == 0L) {
cat("No traceback available", "\n", file = log, append = TRUE)
} else {
for (i in 1L:n) {
xi <- x[[i]]
label <- paste0(n - i + 1L, ": ")
m <- length(xi)
srcloc <- if (!is.null(srcref <- attr(xi, "srcref"))) {
srcfile <- attr(srcref, "srcfile")
paste0(" at ", basename(srcfile$filename), "#", srcref[1L])
}
if (isTRUE(attr(xi, "truncated"))) {
xi <- c(xi, " ...")
m <- length(xi)
}
if (!is.null(srcloc)) {
xi[m] <- paste0(xi[m], srcloc)
}
if (m > 1) {
label <- c(label, rep(substr(" ", 1L,
nchar(label, type = "w")), m - 1L))
}
cat(paste0(label, xi), sep = "\n", file = log, append = TRUE)
}
}
}
globalCallingHandlers(list(
message = exceptionLogger, warning = exceptionLogger, error = exceptionLogger
))
### for every sample size
start <- format(Sys.time(), "%Y%m%dT%H%M")
for (n in sample.sizes) {
### write new simulation result file
file <- paste0(paste(file.prefix, start, n, sep = "-"), ".csv")
# CSV header, used to ensure correct value/column mapping when writing to file
header <- outer(
c("dist.subspace", "dist.projection", "error.pred"), # measures
c("gmlm", "pca", "hopca", "tsir"), # methods
paste, sep = ".")
cat(paste0(header, collapse = ","), "\n", sep = "", file = file)
### repeated simulation
for (rep in seq_len(reps)) {
### Repetition session state info
# Stores specific session variables before starting the current
# simulation replication. This allows to log state information which
# can be used to replicate a specific simulation repetition in case of
# errors/warnings from the logs
rep.info <- list(n = n, rep = rep, file = file, .Random.seed = .Random.seed)
### sample (training) data
c(X, Fy, y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas)
### Fit data using different methods
fit.gmlm <- GMLM.default(X, Fy, sample.axis = sample.axis, max.iter = max.iter)
fit.hopca <- HOPCA(X, npc = q, sample.axis = sample.axis)
fit.pca <- prcomp(mat(X, sample.axis), rank. = prod(q))
fit.tsir <- TSIR(X, y, q, sample.axis = sample.axis)
### Compute Reductions `B.*` where `B.*` spans the reduction subspaces
B.true <- Reduce(`%x%`, rev(alphas))
B.gmlm <- with(fit.gmlm, Reduce(`%x%`, rev(alphas)))
B.hopca <- Reduce(`%x%`, rev(fit.hopca))
B.pca <- fit.pca$rotation
B.tsir <- Reduce(`%x%`, rev(fit.tsir))
# Subspace Distances: Normalized `|| P_A - P_B ||_F` where
# `P_A = A (A' A)^-1/2 A'` and the normalization means that with
# respect to the dimensions of `A, B` the subspace distance is in the
# range `[0, 1]`.
dist.subspace.gmlm <- dist.subspace(B.true, B.gmlm, normalize = TRUE)
dist.subspace.hopca <- dist.subspace(B.true, B.hopca, normalize = TRUE)
dist.subspace.pca <- dist.subspace(B.true, B.pca, normalize = TRUE)
dist.subspace.tsir <- dist.subspace(B.true, B.tsir, normalize = TRUE)
# Projection Distances: Spectral norm (2-norm) `|| P_A - P_B ||_2`.
dist.projection.gmlm <- dist.projection(B.true, B.gmlm)
dist.projection.hopca <- dist.projection(B.true, B.hopca)
dist.projection.pca <- dist.projection(B.true, B.pca)
dist.projection.tsir <- dist.projection(B.true, B.tsir)
### Prediction Errors: (using new independend sample of size `N`)
c(X, Fy, y, sample.axis) %<-% sample.data(N, eta1, alphas, Omegas)
# centered model matrix of vectorized `X`s
vecX <- scale(mat(X, sample.axis), center = TRUE, scale = FALSE)
P.true <- proj(B.true)
error.pred.gmlm <- norm(P.true - proj(B.gmlm), "2")
error.pred.hopca <- norm(P.true - proj(B.hopca), "2")
error.pred.pca <- norm(P.true - proj(B.pca), "2")
error.pred.tsir <- norm(P.true - proj(B.tsir), "2")
# format estimation/prediction errors and write to file and console
line <- paste0(Map(get, header), collapse = ",")
cat(line, "\n", sep = "", file = file, append = TRUE)
# report progress
cat(sprintf("sample size: %d/%d - rep: %d/%d\n",
which(n == sample.sizes), length(sample.sizes), rep, reps))
}
}

View File

@ -1,96 +0,0 @@
library(tensorPredictors)
set.seed(271828183, "Mersenne-Twister", "Inversion", "Rejection")
### simulation configuration
reps <- 100 # number of simulation replications
n <- 100 # sample sizes `n`
N <- 2000 # validation set size
p <- c(2, 3, 5) # preditor dimensions
q <- c(1, 2, 3) # functions of y dimensions (response dimensions)
# initial consistency checks
stopifnot(exprs = {
length(p) == length(q)
})
# setup model parameters
alphas <- Map(matrix, Map(rnorm, p * q), p) # reduction matrices
Omegas <- Map(function(pj) 0.5^abs(outer(1:pj, 1:pj, `-`)), p) # mode scatter
eta1 <- 0 # intercept
# data sampling routine
sample.data <- function(n, eta1, alphas, Omegas, sample.axis = length(alphas) + 1L) {
r <- length(alphas) # tensor order
# generate response (sample axis is last axis)
y <- sample.int(prod(q), n, replace = TRUE) # uniform samples
Fy <- array(outer(seq_len(prod(q)), y, `==`), dim = c(q, n))
Fy <- Fy - c(rowMeans(Fy, dims = r))
# sample predictors as X | Y = y (sample axis is last axis)
Deltas <- Map(solve, Omegas) # normal covariances
mu_y <- mlm(mlm(Fy, alphas) + c(eta1), Deltas) # conditional mean
X <- mu_y + rtensornorm(n, 0, Deltas, r + 1L) # responses X
# permute axis to requested get the sample axis
if (sample.axis != r + 1L) {
perm <- integer(r + 1L)
perm[sample.axis] <- r + 1L
perm[-sample.axis] <- seq_len(r)
X <- aperm(X, perm)
Fy <- aperm(Fy, perm)
}
list(X = X, Fy = Fy, y = y, sample.axis = sample.axis)
}
### sample (training) data
c(X, Fy, y = y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas)
### Fit data using GMLM with logging
# logger to log iterative change in the estimation process of GMLM
# log <- data.frame()
log.likelihood <- tensorPredictors:::make.gmlm.family("normal")$log.likelihood
B.true <- Reduce(`%x%`, rev(alphas))
logger <- function(iter, eta1.est, alphas.est, Omegas.est) {
B.est <- Reduce(`%x%`, rev(alphas.est))
err.alphas <- mapply(dist.subspace, alphas, alphas.est, MoreArgs = list(normalize = TRUE))
err.Omegas <- mapply(norm, Map(`-`, Omegas, Omegas.est), MoreArgs = list(type = "F"))
if (iter > 1) { cat("\033[9A") }
cat(sprintf("\n\033[2mIter: loss - dist\n\033[0m%4d: %8.3f - %8.3f",
iter,
log.likelihood(X, Fy, eta1.est, alphas.est, Omegas.est),
dist.subspace(B.true, B.est, normalize = TRUE)
),
"\033[2mMSE eta1\033[0m",
mean((eta1 - eta1.est)^2),
"\033[2msubspace distances of alphas\033[0m",
do.call(paste, Map(sprintf, err.alphas, MoreArgs = list(fmt = "%8.3f"))),
"\033[2mFrob. norm of Omega differences\033[0m",
do.call(paste, Map(sprintf, err.Omegas, MoreArgs = list(fmt = "%8.3f"))),
sep = "\n "
)
}
# now call the GMLM fitting routine with performance profiling
system.time( # profvis::profvis(
fit.gmlm <- GMLM.default(
X, Fy, sample.axis = sample.axis, max.iter = 10000L, logger = logger
)
)
# Iter: loss - dist
# 7190: 50.583 - 0.057
# MSE eta1
# 0.02694658
# subspace distances of alphas
# 0.043 0.035 0.034
# Frob. norm of Omega differences
# 0.815 1.777 12.756
# time user system elapsed
# 342.279 555.630 183.653

View File

@ -1,100 +0,0 @@
if (!endsWith(getwd(), "/sim")) {
setwd("sim")
}
sim.plot <- function(file.prefix, date, to.file = FALSE) {
# file.prefix <- "sim-ising-small"
# # file.prefix <- "sim-normal"
# date <- "20221012" # yyyymmdd, to match all "[0-9]{6}"
time <- "[0-9]{4}" # HHMM, to match all "[0-9]{4}"
colors <- c(
PCA = "#a11414",
HOPCA = "#2a62b6",
TSIR = "#9313b9",
GMLM = "#247407"
)
line.width <- 1.75
margins <- c(5.1, 4.1, 4.1, 0.1)
sim <- Reduce(rbind, Map(function(path) {
df <- read.csv(path)
df$n <- as.integer(tail(head(strsplit(path, "[-.]")[[1]], -1), 1))
df
}, list.files(".", pattern = paste0(
"^", file.prefix, "-", date, "T", time, "-[0-9]+[.]csv$", collapse = ""
))))
stats <- aggregate(. ~ n, sim, mean)
q75 <- aggregate(. ~ n, sim, function(x) quantile(x, 0.75))
q25 <- aggregate(. ~ n, sim, function(x) quantile(x, 0.25))
if (to.file) {
width <- 720
png(paste(file.prefix, "-", date, ".png", sep = ""),
width = width, height = round((6 / 11) * width, -1),
pointsize = 12)
}
layout(mat = matrix(c(
1, 2,
3, 3
), 2, 2, byrow = TRUE),
widths = c(1, 1),
heights = c(12, 1), respect = FALSE)
# layout.show(3)
with(stats, {
par(mar = margins)
plot(range(n), 0:1,
type = "n", bty = "n", main = "Subspace Distance",
xlab = "Sample Size", ylab = "Error")
lines(n, dist.subspace.gmlm, col = colors["GMLM"], lwd = line.width)
lines(n, dist.subspace.hopca, col = colors["HOPCA"], lwd = line.width)
lines(n, dist.subspace.pca, col = colors["PCA"], lwd = line.width)
lines(n, dist.subspace.tsir, col = colors["TSIR"], lwd = line.width)
xn <- c(q75$n, rev(q25$n))
polygon(x = xn, y = c(q75$dist.subspace.gmlm, rev(q25$dist.subspace.gmlm)),
col = adjustcolor(colors["GMLM"], alpha.f = 0.3), border = NA)
polygon(x = xn, y = c(q75$dist.subspace.hopca, rev(q25$dist.subspace.hopca)),
col = adjustcolor(colors["HOPCA"], alpha.f = 0.3), border = NA)
polygon(x = xn, y = c(q75$dist.subspace.pca, rev(q25$dist.subspace.pca)),
col = adjustcolor(colors["PCA"], alpha.f = 0.3), border = NA)
polygon(x = xn, y = c(q75$dist.subspace.tsir, rev(q25$dist.subspace.tsir)),
col = adjustcolor(colors["TSIR"], alpha.f = 0.3), border = NA)
})
with(stats, {
par(mar = margins)
plot(range(n), 0:1,
type = "n", bty = "n", main = "RMSE (Prediction Error)",
xlab = "Sample Size", ylab = "Error")
xn <- c(q75$n, rev(q25$n))
polygon(x = xn, y = c(q75$error.pred.gmlm, rev(q25$error.pred.gmlm)),
col = adjustcolor(colors["GMLM"], alpha.f = 0.3), border = NA)
polygon(x = xn, y = c(q75$error.pred.hopca, rev(q25$error.pred.hopca)),
col = adjustcolor(colors["HOPCA"], alpha.f = 0.3), border = NA)
polygon(x = xn, y = c(q75$error.pred.pca, rev(q25$error.pred.pca)),
col = adjustcolor(colors["PCA"], alpha.f = 0.3), border = NA)
polygon(x = xn, y = c(q75$error.pred.tsir, rev(q25$error.pred.tsir)),
col = adjustcolor(colors["TSIR"], alpha.f = 0.3), border = NA)
lines(n, error.pred.gmlm, col = colors["GMLM"], lwd = line.width)
lines(n, error.pred.hopca, col = colors["HOPCA"], lwd = line.width)
lines(n, error.pred.pca, col = colors["PCA"], lwd = line.width)
lines(n, error.pred.tsir, col = colors["TSIR"], lwd = line.width)
})
par(mar = rep(0, 4))
plot(1:2, 1:2, type = "n", bty = "n", axes = FALSE, xlab = "", ylab = "")
legend("center", legend = names(colors), col = colors, lwd = line.width,
lty = 1, bty = "n", horiz = TRUE)
if (to.file) {
dev.off()
}
}
sim.plot("sim-ising-small", "20221012", TRUE)
sim.plot("sim-normal", "20221012", TRUE)

View File

@ -5,6 +5,11 @@ export("%x_1%")
export("%x_2%")
export("%x_3%")
export("%x_4%")
export(.projBand)
export(.projPSD)
export(.projRank)
export(.projSymBand)
export(.projSymRank)
export(CISE)
export(D)
export(D.pinv)
@ -17,6 +22,8 @@ export(ICU)
export(K)
export(K.perm)
export(LSIR)
export(La.det)
export(La.solve)
export(N)
export(NAGD)
export(PCA2d)
@ -27,18 +34,26 @@ export(S)
export(TSIR)
export(approx.kronecker)
export(colKronecker)
export(decompose.kronecker)
export(dist.kron.norm)
export(dist.kron.tr)
export(dist.projection)
export(dist.subspace)
export(exprs.all.equal)
export(gmlm_ising)
export(gmlm_tensor_normal)
export(icu_tensor_normal)
export(ising_m2)
export(ising_sample)
export(kpir.approx)
export(kpir.base)
export(kpir.ls)
export(kpir.mle)
export(kpir.momentum)
export(kpir.new)
export(kronperm)
export(mat)
export(matProj)
export(matpow)
export(matrixImage)
export(mcov)
@ -49,10 +64,19 @@ export(mtvk)
export(num.deriv)
export(num.deriv.function)
export(num.deriv2)
export(pinv)
export(projDiag)
export(projStiefel)
export(projSym)
export(reduce)
export(riccati)
export(rowKronecker)
export(rtensornorm)
export(slice.expr)
export(slice.select)
export(sylvester)
export(tensor_predictor)
export(tsym)
export(ttm)
export(ttt)
export(vech)

View File

@ -36,12 +36,12 @@ GMLM.default <- function(X, Fy, sample.axis = 1L,
# optimize likelihood using Nesterov Excelerated Gradient Descent
params.fit <- NAGD(
fun.loss = function(params) {
# scaled negative lig-likelihood
# scaled negative log-likelihood
# eta1 alphas Omegas
family$log.likelihood(X, Fy, params[[1]], params[[2]], params[[3]])
},
fun.grad = function(params) {
# gradient of the scaled negative lig-likelihood
# gradient of the scaled negative log-likelihood
# eta1 alphas Omegas
family$grad(X, Fy, params[[1]], params[[2]], params[[3]])
},

View File

@ -10,7 +10,7 @@
#' `i`th axis excluding the sample axis.
#'
#' @export
HOPCA <- function(X, npc = dim(X)[-sample.axis], sample.axis = 1L) {
HOPCA <- function(X, npc = dim(X)[-sample.axis], sample.axis = 1L, use.C = FALSE) {
# observation index numbers (all axis except the sample axis)
modes <- seq_along(dim(X))[-sample.axis]

View File

@ -76,8 +76,8 @@ HOPIR.ls <- function(X, Fy, alphas, sample.axis, algorithm, ..., logger) {
list(alphas = alphas, Deltas = fun.Deltas(alphas))
}
#' HPOIR subroutine for the MLE estimation given proprocessed data and initial
#' alphas, Deltas paramters
#' HPOIR subroutine for the MLE estimation given preprocessed data and initial
#' alpha, Delta parameters
#'
#' @keywords internal
HOPIR.mle <- function(X, Fy, alphas, Deltas, sample.axis, algorithm, ..., logger) {

View File

@ -7,7 +7,7 @@
#'
#' @export
HOSVD <- function(X, nu = NULL, eps = 1e-07) {
if (!missing(nu)) {
if (!is.null(nu)) {
stopifnot(all(nu <= dim(X)))
}
@ -21,3 +21,5 @@ HOSVD <- function(X, nu = NULL, eps = 1e-07) {
list(C = C, Us = Us)
}
SVD <- function(A) .Call("C_svd", A)

View File

@ -60,7 +60,7 @@ NAGD <- function(fun.loss, fun.grad, params, more.params = NULL,
stop("Initial loss is non-finite (", loss, ")")
}
# initialize "previous" iterate parameters
params.last <- params
prev.params <- params
# Gradient Descent Loop
line.search.tag <- FALSE # init line search state as "failure"
@ -73,9 +73,9 @@ NAGD <- function(fun.loss, fun.grad, params, more.params = NULL,
}
# Extrapolation form previous position (momentum)
# `params.moment <- (1 + moment) * params - moment * param.last`
# `params.moment <- (1 + moment) * params - moment * prev.params`
moment <- (m[1] - 1) / m[2]
params.moment <- fun.lincomb(1 + moment, params, -moment, params.last)
params.moment <- fun.lincomb(1 + moment, params, -moment, prev.params)
# Compute gradient at extrapolated position
if (missing(more.params)) {
@ -114,7 +114,7 @@ NAGD <- function(fun.loss, fun.grad, params, more.params = NULL,
}
# keep track of previous parameters
params.last <- params
prev.params <- params
# check line search outcome
if (is.na(line.search.tag)) {

View File

@ -4,11 +4,21 @@
TSIR <- function(X, y, d, sample.axis = 1L,
nr.slices = 10L, # default slices, ignored if y is a factor or integer
max.iter = 50L,
eps = sqrt(.Machine$double.eps)
eps = sqrt(.Machine$double.eps),
slice.method = c("cut", "ecdf") # ignored if y is a factor or integer
) {
if (!(is.factor(y) || is.integer(y))) {
y <- cut(y, nr.slices)
slice.method <- match.arg(slice.method)
if (slice.method == "ecdf") {
y <- cut(ecdf(y)(y), nr.slices)
} else {
y <- cut(y, nr.slices)
# ensure there are no empty slices
if (any(table(y) == 0)) {
y <- as.factor(as.integer(y))
}
}
}
stopifnot(exprs = {

View File

@ -1,4 +1,4 @@
#' Approximates kronecker product decomposition.
#' Approximates Kronecker Product decomposition.
#'
#' Approximates the matrices `A` and `B` such that
#' C = A %x% B
@ -21,7 +21,7 @@
#' 123 (2000) 85-100 (pp. 93-95)
#'
#' @export
approx.kronecker <- function(C, dimA, dimB) {
approx.kronecker <- function(C, dimA, dimB = dim(C) / dimA) {
dim(C) <- c(dimB[1L], dimA[1L], dimB[2L], dimA[2L])
R <- aperm(C, c(2L, 4L, 1L, 3L))
@ -33,8 +33,115 @@ approx.kronecker <- function(C, dimA, dimB) {
svdR <- svd(R, 1L, 1L)
}
return(list(
list(
A = array(sqrt(svdR$d[1]) * svdR$u, dimA),
B = array(sqrt(svdR$d[1]) * svdR$v, dimB)
)
}
#' Kronecker Product Decomposition.
#'
#' Computes the components summation components `A_i`, `B_i` of a sum of
#' Kronecker products
#' C = sum_i A_i %x% B_i
#' with the minimal estimated number of summands.
#'
#' @param C desired kronecker product result.
#' @param dimA length 2 vector of dimensions of \code{A}.
#' @param dimB length 2 vector of dimensions of \code{B}.
#' @param tol tolerance of singular values of \code{C} to determin the number of
#' needed summands.
#'
#' @return list of lenghth with estimated number of summation components, each
#' entry consists of a list with named entries \code{"A"} and \code{"B"} of
#' dimensions \code{dimA} and \code{dimB}.
#'
#' @examples
#' As <- replicate(3, matrix(rnorm(2 * 7), 2), simplify = FALSE)
#' Bs <- replicate(3, matrix(rnorm(5 * 3), 5), simplify = FALSE)
#' C <- Reduce(`+`, Map(kronecker, As, Bs))
#'
#' decomposition <- decompose.kronecker(C, c(2, 7))
#'
#' reconstruction <- Reduce(`+`, Map(function(summand) {
#' kronecker(summand[[1]], summand[[2]])
#' }, decomposition), array(0, dim(C)))
#'
#' stopifnot(all.equal(C, reconstruction))
#'
#' @export
decompose.kronecker <- function(C, dimA, dimB = dim(C) / dimA, tol = 1e-7) {
# convert the equivalent outer product
dim(C) <- c(dimB[1L], dimA[1L], dimB[2L], dimA[2L])
C <- aperm(C, c(2L, 4L, 1L, 3L), resize = FALSE)
dim(C) <- c(prod(dimA), prod(dimB))
# Singular Valued Decomposition
svdC <- La.svd(C)
# Sum of Kronecker Components
lapply(seq_len(sum(svdC$d > tol)), function(i) list(
A = matrix(svdC$d[i] * svdC$u[, i], dimA),
B = matrix(svdC$vt[i, ], dimB)
))
}
### Given that C is a Kronecker product this is a fast method but a bit
### unreliable in full generality.
# decompose.kronecker <- function(C, dimA, dimB = dim(C) / dimA) {
# dim(C) <- c(dimB[1L], dimA[1L], dimB[2L], dimA[2L])
# R <- aperm(C, c(2L, 4L, 1L, 3L))
# dim(R) <- c(prod(dimA), prod(dimB))
# max.index <- which.max(abs(R))
# row.index <- ((max.index - 1L) %% nrow(R)) + 1L
# col.index <- ((max.index - 1L) %/% nrow(R)) + 1L
# max.elem <- if (abs(R[max.index]) > .Machine$double.eps) R[max.index] else 1
# list(
# A = array(R[, col.index], dimA),
# B = array(R[row.index, ] / max.elem, dimB)
# )
# }
# kron <- function(A, B) {
# perm <- as.vector(t(matrix(seq_len(2 * length(dim(A))), ncol = 2)[, 2:1]))
# K <- aperm(outer(A, B), perm)
# dim(K) <- dim(A) * dim(B)
# K
# }
# kronperm <- function(A) {
# # force A to have even number of dimensions
# dim(A) <- c(dim(A), rep(1L, length(dim(A)) %% 2L))
# # compute axis permutation
# perm <- as.vector(t(matrix(seq_along(dim(A)), ncol = 2)[, 2:1]))
# # permute elements of A
# K <- aperm(A, perm, resize = FALSE)
# # collapse/set dimensions
# dim(K) <- head(dim(A), length(dim(A)) / 2) * tail(dim(A), length(dim(A)) / 2)
# K
# }
# p <- c(2, 3, 5)
# q <- c(3, 4, 7)
# A <- array(rnorm(prod(p)), p)
# B <- array(rnorm(prod(q)), q)
# all.equal(kronperm(outer(A, B)), kronecker(A, B))
# all.equal(kron(A, B), kronecker(A, B))
# dA <- c(2, 3, 5)
# dB <- c(3, 4, 7)
# A <- array(rnorm(prod(dA)), dA)
# B <- array(rnorm(prod(dB)), dB)
# X <- outer(A, B)
# r <- length(dim(X)) / 2
# I <- t(do.call(expand.grid, Map(seq_len, head(dim(X), r) * tail(dim(X), r))))
# K <- apply(rbind(
# (I - 1) %/% tail(dim(X), r) + 1,
# (I - 1) %% tail(dim(X), r) + 1
# ), 2, function(i) X[sum(c(1, cumprod(head(dim(X), -1))) * (i - 1)) + 1])
# dim(K) <- head(dim(X), r) * tail(dim(X), r)
# all.equal(kronecker(A, B), K)

View File

@ -3,7 +3,7 @@
#' \|(A1 %x% ... %x% Ar - B1 %x% ... %x% Br\|_F
#'
#' This is equivalent to the expression
#' \code{norm(Reduce(kronecker, A) - Reduce(kronecker, B), "F")} but faster.
#' \code{norm(Reduce(kronecker, As) - Reduce(kronecker, Bs), "F")}, but faster.
#'
#' @examples
#' A1 <- matrix(rnorm(5^2), 5)
@ -16,21 +16,21 @@
#' ))
#'
#' p <- c(3, 7, 5, 2)
#' A <- Map(function(pj) matrix(rnorm(pj^2), pj), p)
#' B <- Map(function(pj) matrix(rnorm(pj^2), pj), p)
#' As <- Map(function(pj) matrix(rnorm(pj^2), pj), p)
#' Bs <- Map(function(pj) matrix(rnorm(pj^2), pj), p)
#' stopifnot(all.equal(
#' dist.kron.norm(A, B),
#' norm(Reduce(kronecker, A) - Reduce(kronecker, B), "F")
#' dist.kron.norm(As, Bs),
#' norm(Reduce(kronecker, As) - Reduce(kronecker, Bs), "F")
#' ))
#'
#' @export
dist.kron.norm <- function(A, B, eps = .Machine$double.eps) {
if (is.list(A) && is.list(B)) {
norm2 <- prod(unlist(Map(function(x) sum(x^2), A))) -
2 * prod(unlist(Map(function(a, b) sum(a * b), A, B))) +
prod(unlist(Map(function(x) sum(x^2), B)))
} else if (is.matrix(A) && is.matrix(B)) {
norm2 <- sum((A - B)^2)
dist.kron.norm <- function(As, Bs, eps = .Machine$double.eps) {
if (is.list(As) && is.list(Bs)) {
norm2 <- prod(unlist(Map(function(x) sum(x^2), As))) -
2 * prod(unlist(Map(function(a, b) sum(a * b), As, Bs))) +
prod(unlist(Map(function(x) sum(x^2), Bs)))
} else if (is.matrix(As) && is.matrix(Bs)) {
norm2 <- sum((As - Bs)^2)
} else {
stop("Unexpected input")
}

View File

@ -1,4 +1,4 @@
#' Porjection Distance of two matrices
#' Projection Distance of two matrices
#'
#' Defined as sine of the maximum principal angle between the column spaces
#' of the matrices

View File

@ -0,0 +1,138 @@
#' Specialized version of the GMLM for the Ising model (inverse Ising problem)
#'
#' @export
gmlm_ising <- function(X, F, sample.axis = length(dim(X)),
max.iter = 1000L,
eps = sqrt(.Machine$double.eps),
step.size = function(iter) 1e-2 * (1000 / (iter + 1000)),
zig.zag.threashold = 5L,
patience = 5L,
nr.slices = 10L, # only for univariate `F(y) = y`
slice.method = c("cut", "ecdf"), # only for univariate `F(y) = y` and `y` is a factor or integer
logger = function(...) { }
) {
# # Special case for univariate response `vec F(y) = y`
# # Due to high computational costs we use slicing
# if (length(F) == prod(dim(F))) {
# y <- as.vector(F)
# if (!(is.factor(y) || is.integer(y))) {
# slice.method <- match.arg(slice.method)
# if (slice.method == "ecdf") {
# y <- cut(ecdf(y)(y), nr.slices)
# } else {
# y <- cut(y, nr.slices)
# }
# }
# slices <- split(seq_len(sample.size), y, drop = TRUE)
# } else {
# slices <- seq_len(sample.size)
# }
dimX <- head(dim(X), -1)
dimF <- head(dim(F), -1)
sample.axis <- length(dim(X))
modes <- seq_len(length(dim(X)) - 1)
sample.size <- tail(dim(X), 1)
betas <- Map(matrix, Map(rnorm, dimX * dimF), dimX)
Omegas <- Map(diag, dimX)
grad2_betas <- Map(array, 0, Map(dim, betas))
grad2_Omegas <- Map(array, 0, Map(dim, Omegas))
# Keep track of the last loss to accumulate loss difference sign changes
# indicating optimization instabilities as a sign to stop
last_loss <- Inf
accum_sign <- 1
# non improving iteration counter
non_improving <- 0L
# technical access points to dynamicaly access a multi-dimensional array
# with its last index
`X[..., i]` <- slice.expr(X, sample.axis, index = i)
`F[..., i]` <- slice.expr(F, sample.axis, index = i, drop = FALSE)
# the next expression if accessing the precomputed `mlm(F, betas)`
`BF[..., i]` <- slice.expr(BF, sample.axis, nr.axis = sample.axis, drop = FALSE) # BF[..., i]
# Iterate till a break condition triggers or till max. nr. of iterations
for (iter in seq_len(max.iter)) {
grad_betas <- Map(matrix, 0, dimX, dimF)
Omega <- Reduce(kronecker, rev(Omegas))
R2 <- array(0, dim = c(dimX, dimX))
# negative log-likelihood
loss <- 0
BF <- mlm(F, betas)
for (i in seq_len(sample.size)) {
params_i <- Omega + diag(as.vector(eval(`BF[..., i]`)))
m2_i <- ising_m2(params_i)
# accumulate loss
x_i <- as.vector(eval(`X[..., i]`))
loss <- loss - (sum(x_i * (params_i %*% x_i)) + log(attr(m2_i, "prob_0")))
R2_i <- tcrossprod(x_i) - m2_i
R1_i <- diag(R2_i)
dim(R1_i) <- dimX
for (j in modes) {
grad_betas[[j]] <- grad_betas[[j]] +
mcrossprod(
R1_i, mlm(eval(`F[..., i]`), betas[-j], modes[-j]), j,
dimB = ifelse(j != modes, dimX, dimF)
)
}
R2 <- R2 + as.vector(R2_i)
}
grad_Omegas <- Map(function(j) {
grad <- mlm(kronperm(R2), Map(as.vector, Omegas[-j]), modes[-j], transposed = TRUE)
dim(grad) <- dim(Omegas[[j]])
grad
}, modes)
# update and accumulate alternating loss
accum_sign <- sign(last_loss - loss) - accum_sign
# check if accumulated alternating signs exceed stopping threshold
if (abs(accum_sign) > zig.zag.threashold) { break }
# increment non improving counter if thats the case
if (!(loss < last_loss)) {
non_improving <- non_improving + 1L
} else {
non_improving <- 0L
}
if (non_improving > patience) { break }
# store current loss for the next iteration
last_loss <- loss
# Accumulate root mean squared gradiends
grad2_betas <- Map(function(g2, g) 0.9 * g2 + 0.1 * (g * g),
grad2_betas, grad_betas)
grad2_Omegas <- Map(function(g2, g) 0.9 * g2 + 0.1 * (g * g),
grad2_Omegas, grad_Omegas)
# logging (before parameter update)
logger(iter, loss, betas, Omegas, grad_betas, grad_Omegas)
# gradualy decrease the step size
step <- if (is.function(step.size)) step.size(iter) else step.size
# Update Parameters
betas <- Map(function(beta, grad, m2) {
beta + (step / (sqrt(m2) + eps)) * grad
}, betas, grad_betas, grad2_betas)
Omegas <- Map(function(Omega, grad, m2) {
Omega + (step / (sqrt(m2) + eps)) * grad
}, Omegas, grad_Omegas, grad2_Omegas)
}
list(betas = betas, Omegas = Omegas)
}

View File

@ -0,0 +1,357 @@
# p <- 5
# A <- matrix(rnorm(p^2), p)
# mat.proj("TriDiag", p)(A)
# mat.proj("SymTriDiag", p)(A)
# A
# (AA <- mat.proj("PSD", p)(A))
# mat.proj("PSD", p)(AA)
# p <- 5
# A <- matrix(rnorm(p^2), p)
# projection <- function(T2) {
# P <- pinv(T2) %*% T2
# function(A) {
# matrix(P %*% as.vector(A), nrow(A))
# }
# }
# # All equal diagonal matrix, A = a * I_p
# T2 <- t(as.vector(diag(p)))
# proj <- projection(T2)
# print.table( diag(mean(diag(A)), nrow(A), ncol(A)) , zero.print = ".")
# print.table( matrix((pinv(T2) %*% T2) %*% as.vector(A), p) , zero.print = ".")
# print.table( proj(A) , zero.print = ".")
# # Diagonal matrix, A = diag(a_1, ..., a_p)
# T2 <- matrix(seq_len(p^2), p)
# T2 <- outer(diag(T2), as.vector(T2), `==`)
# storage.mode(T2) <- "double"
# proj <- projection(T2)
# print.table( T2 , zero.print = ".")
# print.table( diag(diag(A)) , zero.print = ".")
# print.table( proj(A) , zero.print = ".")
# # Tri-Diagonal Matrix
# T2 <- matrix(seq_len(p^2), p)
# T2 <- outer(T2[abs(row(A) - col(A)) <= 1], as.vector(T2), `==`)
# storage.mode(T2) <- "double"
# triDiag.mask <- (abs(row(A) - col(A)) <= 1)
# storage.mode(triDiag.mask) <- "double"
# print.table( T2 , zero.print = ".")
# print.table( triDiag.mask , zero.print = ".")
# print.table( A * triDiag.mask , zero.print = ".")
# print.table( matrix((pinv(T2) %*% T2) %*% as.vector(A), p) , zero.print = ".")
# # All equal main and off diagonals
# T2 <- Reduce(rbind, Map(function(i) {
# as.double(row(A) == i + col(A))
# }, (1 - p):(p - 1)))
# print.table( T2 , zero.print = ".")
# print.table( matrix((pinv(T2) %*% T2) %*% as.vector(A), p) , zero.print = ".")
# # Symmetric all equal main and off diagonals
# T2 <- Reduce(rbind, Map(function(i) {
# as.double(abs(row(A) - col(A)) == i)
# }, 0:(p - 1)))
# print.table( T2 , zero.print = ".")
# print.table( matrix((pinv(T2) %*% T2) %*% as.vector(A), p) , zero.print = ".")
# # Symetric Matrix
# index <- matrix(seq_len(p^2), p)
# T2 <- Reduce(rbind, Map(function(i) {
# e_i <- index == i
# as.double(e_i | t(e_i))
# }, index[lower.tri(index, diag = TRUE)]))
# proj <- projection(T2)
# print.table( T2 , zero.print = ".")
# print.table( 0.5 * (A + t(A)) , zero.print = ".")
# print.table( proj(A) , zero.print = ".")
# proj(solve(A))
# solve(proj(A))
#' Specialized version of GMLM for the tensor normal model
#'
#' The underlying algorithm is an ``iterative (block) coordinate descent'' method
#'
#' @export
gmlm_tensor_normal <- function(X, F, sample.axis = length(dim(X)),
max.iter = 100L, proj.betas = NULL, proj.Omegas = NULL, logger = NULL, # TODO: proj.betas NOT USED!!!
cond.threshold = 25, eps = 1e-6
) {
# rearrange `X`, `F` such that the last axis enumerates observations
if (!missing(sample.axis)) {
axis.perm <- c(seq_along(dim(X))[-sample.axis], sample.axis)
X <- aperm(X, axis.perm)
F <- aperm(F, axis.perm)
sample.axis <- length(dim(X))
}
# Get problem dimensions (observation dimensions)
dimX <- head(dim(X), -1)
dimF <- head(dim(F), -1)
modes <- seq_along(dimX)
# Ensure the Omega and beta projections lists are lists
if (!is.list(proj.Omegas)) {
proj.Omegas <- rep(NULL, length(modes))
}
if (!is.list(proj.betas)) {
proj.betas <- rep(NULL, length(modes))
}
### Phase 1: Computing initial values
meanX <- rowMeans(X, dims = length(dimX))
meanF <- rowMeans(F, dims = length(dimF))
# center X and F
X <- X - as.vector(meanX)
F <- F - as.vector(meanF)
# initialize Omega estimates as mode-wise, unconditional covariance estimates
Sigmas <- Map(diag, dimX)
Omegas <- Map(diag, dimX)
# Per mode covariance directions
# Note: (the directions are transposed!)
dirsX <- Map(function(Sigma) {
SVD <- La.svd(Sigma, nu = 0)
sqrt(SVD$d) * SVD$vt
}, mcov(X, sample.axis, center = FALSE))
dirsF <- Map(function(Sigma) {
SVD <- La.svd(Sigma, nu = 0)
sqrt(SVD$d) * SVD$vt
}, mcov(F, sample.axis, center = FALSE))
# initialization of betas ``covariance direction mappings``
betas <- Map(function(dX, dF) {
s <- min(ncol(dX), nrow(dF))
crossprod(dX[1:s, , drop = FALSE], dF[1:s, , drop = FALSE])
}, dirsX, dirsF)
# Residuals
R <- X - mlm(F, Map(`%*%`, Sigmas, betas))
# Initial value of the log-likelihood (scaled and constants dropped)
loss <- mean(R * mlm(R, Omegas)) - sum(log(mapply(det, Omegas)) / dimX)
# invoke the logger
if (is.function(logger)) do.call(logger, list(
iter = 0L, betas = betas, Omegas = Omegas,
resid = R, loss = loss
))
### Phase 2: (Block) Coordinate Descent
for (iter in seq_len(max.iter)) {
# update every beta (in random order)
for (j in sample.int(length(betas))) {
FxB_j <- mlm(F, betas[-j], modes[-j])
FxSB_j <- mlm(FxB_j, Sigmas[-j], modes[-j])
betas[[j]] <- Omegas[[j]] %*% t(solve(mcrossprod(FxSB_j, FxB_j, j), mcrossprod(FxB_j, X, j)))
# Project `betas` onto their manifold
if (is.function(proj_j <- proj.betas[[j]])) {
betas[[j]] <- proj_j(betas[[j]])
}
}
# Residuals
R <- X - mlm(F, Map(`%*%`, Sigmas, betas))
# Covariance Estimates (moment based, TODO: implement MLE estimate!)
Sigmas <- mcov(R, sample.axis, center = FALSE)
# Computing `Omega_j`s, the j'th mode presition matrices, in conjunction
# with regularization of the j'th mode covariance estimate `Sigma_j`
for (j in seq_along(Sigmas)) {
# Compute min and max eigen values
min_max <- range(eigen(Sigmas[[j]], TRUE, TRUE)$values)
# The condition is approximately `kappa(Sigmas[[j]]) > cond.threshold`
if (min_max[2] > cond.threshold * min_max[1]) {
Sigmas[[j]] <- Sigmas[[j]] + diag(0.2 * min_max[2], nrow(Sigmas[[j]]))
}
# Compute (unconstraint but regularized) Omega_j as covariance inverse
Omegas[[j]] <- solve(Sigmas[[j]])
# Project Omega_j to the Omega_j's manifold
if (is.function(proj_j <- proj.Omegas[[j]])) {
Omegas[[j]] <- proj_j(Omegas[[j]])
# Reverse computation of `Sigma_j` as inverse of `Omega_j`
# Order of projecting Omega_j and then recomputing Sigma_j is importent
Sigmas[[j]] <- solve(Omegas[[j]])
}
}
# store last loss and compute new value
loss.last <- loss
loss <- mean(R * mlm(R, Omegas)) - sum(log(mapply(det, Omegas)) / dimX)
# invoke the logger
if (is.function(logger)) do.call(logger, list(
iter = iter, betas = betas, Omegas = Omegas, resid = R, loss = loss
))
# check the break consition
if (abs(loss.last - loss) < eps * loss.last) {
break
}
}
list(eta1 = meanX, betas = betas, Omegas = Omegas)
}
# #### DEBUGGING
# library(tensorPredictors)
# # setup dimensions
# n <- 1e3
# p <- c(5, 7, 3)
# q <- c(2, 5, 3)
# max.iter <- 10L
# # create "true" GLM parameters
# eta1 <- array(rnorm(prod(p)), dim = p)
# betas <- Map(matrix, Map(rnorm, p * q), p)
# Omegas <- Map(function(p_j) {
# solve(0.5^abs(outer(seq_len(p_j), seq_len(p_j), `-`)))
# }, p)
# true.params <- list(eta1 = eta1, betas = betas, Omegas = Omegas)
# # compute tensor normal parameters from GMLM parameters
# Sigmas <- Map(solve, Omegas)
# mu <- mlm(eta1, Sigmas)
# # sample some test data
# sample.axis <- length(p) + 1L
# F <- array(rnorm(n * prod(q)), dim = c(q, n))
# X <- mlm(F, Map(`%*%`, Sigmas, betas)) + rtensornorm(n, mu, Sigmas, sample.axis)
# # setup a logging callback
# format <- sprintf("iter: %%3d, %s, Omega: %%8.2f, resid: %%8.3f\n",
# paste0("beta.", seq_along(betas), ": %.3f", collapse = ", ")
# )
# hist <- data.frame(
# iter = seq(0, max.iter),
# dist.B = numeric(max.iter + 1),
# dist.Omega = numeric(max.iter + 1),
# resid = numeric(max.iter + 1)
# )
# logger <- function(iter, betas, Omegas, resid) {
# dist.B <- dist.kron.norm(true.params$betas, betas)
# dist.Omega <- dist.kron.norm(true.params$Omegas, Omegas)
# resid <- mean(resid^2)
# hist[iter + 1, -1] <<- c(dist.B = dist.B, dist.Omega = dist.Omega, resid = resid)
# cat(do.call(sprintf, c(
# list(format, iter),
# Map(dist.subspace, betas, true.params$betas),
# dist.Omega, resid
# )))
# }
# # run the model
# est.params <- gmlm_tensor_normal(X, F, max.iter = max.iter, logger = logger)
# # run model with Omegas in sub-manifolds of SPD matrices
# est.params <- gmlm_tensor_normal(X, F, max.iter = max.iter, logger = logger,
# proj.Omegas = list(
# NULL,
# (function(nrow, ncol) {
# triDiag.mask <- (abs(.row(c(nrow, ncol)) - .col(c(nrow, ncol))) <= 1)
# storage.mode(triDiag.mask) <- "double"
# function(A) { A * triDiag.mask }
# })(p[2], p[2]),
# function(A) diag(diag(A))
# ))
# # # Profile the fitting routine without logging
# # profvis::profvis({
# # est.params <- gmlm_tensor_normal(X, F, max.iter = max.iter)
# # })
# with(hist, {
# plot(range(iter), range(hist[, -1]), type = "n", log = "y")
# lines(iter, resid, col = "red")
# lines(iter, dist.B, col = "green")
# lines(iter, dist.Omega, col = "blue")
# legend("topright", legend = c("resid", "dist.B", "dist.Omega"), col = c("red", "green", "blue"), lty = 1)
# })
# # par(mfrow = c(1, 2))
# # matrixImage(Reduce(kronecker, rev(est.params$Omegas)))
# # matrixImage(Reduce(kronecker, rev(true.params$Omegas)))
# # # matrixImage(Reduce(kronecker, rev(est.params$Omegas)) - Reduce(kronecker, rev(true.params$Omegas)))
# # par(mfrow = c(1, 2))
# # matrixImage(Reduce(kronecker, rev(true.params$betas)), main = "True", col = hcl.colors(48, palette = "Blue-Red 3"))
# # matrixImage(Reduce(kronecker, rev(est.params$betas)), main = "Est", col = hcl.colors(48, palette = "Blue-Red 3"))
# # # unlist(Map(kappa, mcov(X)))
# # # unlist(Map(kappa, Omegas))
# # # unlist(Map(kappa, Sigmas))
# # # max(unlist(Map(function(C) max(eigen(C)$values), mcov(X))))
# # # min(unlist(Map(function(C) min(eigen(C)$values), mcov(X))))
# # # Map(function(C) eigen(C)$values, mcov(X))
# # # kappa(Reduce(kronecker, mcov(X)))
# # kappa1.fun <- function(Omegas) Map(kappa, Omegas)
# # kappa2.fun <- function(Omegas) Map(kappa, Omegas, exact = TRUE)
# # eigen1.fun <- function(Omegas) {
# # Map(function(Omega) {
# # min_max <- range(eigen(Omega)$values)
# # min_max[2] / min_max[1]
# # }, Omegas)
# # }
# # eigen2.fun <- function(Omegas) {
# # Map(function(Omega) {
# # min_max <- range(eigen(Omega, TRUE, TRUE)$values)
# # min_max[2] / min_max[1]
# # }, Omegas)
# # }
# # microbenchmark::microbenchmark(
# # kappa1.fun(Omegas),
# # kappa2.fun(Omegas),
# # eigen1.fun(Omegas),
# # eigen2.fun(Omegas)
# # )

View File

@ -0,0 +1,18 @@
#' @export
ising_m2 <- function(
params, use_MC = NULL, nr_samples = 10000L,
warmup = 15L, nr_threads = 1L
) {
if (missing(use_MC)) {
use_MC <- if (is.matrix(params)) 19 < nrow(params) else 190 < length(params)
}
m2 <- .Call("C_ising_m2", params, use_MC, nr_samples, warmup, nr_threads,
PACKAGE = "tensorPredictors"
)
M2 <- vech.pinv(m2)
attr(M2, "prob_0") <- attr(m2, "prob_0")
M2
}

View File

@ -0,0 +1,11 @@
#' Sample from the Ising model
#'
#' @param nr number of samples
#' @param params Ising model parameters (numeric vector of size `p (p + 1) / 2`
#' for `p` dimensional random binary vectors)
#' @param warmup Monte-Carlo chain length before retreaving a sample
#'
#' @export
ising_sample <- function(nr, params, warmup = 15L) {
.Call("C_ising_sample", nr, params, warmup, PACKAGE = "tensorPredictors")
}

View File

@ -124,6 +124,30 @@ make.gmlm.family <- function(name) {
)
}
# conditional covariance of the sufficient statistic
# Cov(t(X) | Y = y)
# Note: fy is a single observation!
cov.sufficient.stat <- function(fy, eta1, alphas, Omegas) {
Deltas <- Map(solve, Omegas)
E1 <- c(mlm(mlm(fy, alphas) + eta1, Deltas))
# H11 = Cov(vec X | Y = y)
H11 <- Reduce(`%x%`, rev(Deltas))
# H21 = Cov(vec X %x% vec X, vec X | Y = y)
H21 <- kronecker(E1, H11) + kronecker(H11, E1)
# H22 = Cov(vec X %x% vec X | Y = y)
H22 <- local({
e1e1 <- tcrossprod(E1, E1)
h22 <- outer(e1e1 + H11, H11) + outer(H11, e1e1)
aperm(h22, c(1, 3, 2, 4)) + aperm(h22, c(1, 3, 4, 2))
})
# Combine into single covariance matrix
cbind(rbind(H11, H21), rbind(t(H21), mat(H22, 1:2)))
}
# mean conditional Fisher Information
fisher.info <- function(Fy, eta1, alphas, Omegas) {
# retrieve dimensions

View File

@ -9,6 +9,15 @@
#' or tensor of dimensions \code{dims} iff \code{inv} is true.
#'
#' @examples
#' stopifnot(all.equal(
#' mat(1:12, 2, dims = c(2, 3, 2)),
#' matrix(c(
#' 1, 2, 7, 8,
#' 3, 4, 9, 10,
#' 5, 6, 11, 12
#' ), 3, 4, byrow = TRUE)
#' ))
#'
#' A <- array(rnorm(2 * 3 * 5), dim = c(2, 3, 5))
#' stopifnot(exprs = {
#' all.equal(A, mat(mat(A, 1), 1, dim(A), TRUE))
@ -22,15 +31,6 @@
#' all.equal(t(mat(A, 3)), mat(A, c(1, 2)))
#' })
#'
#' stopifnot(all.equal(
#' mat(1:12, 2, dims = c(2, 3, 2)),
#' matrix(c(
#' 1, 2, 7, 8,
#' 3, 4, 9, 10,
#' 5, 6, 11, 12
#' ), 3, 4, byrow = TRUE)
#' ))
#'
#' @export
mat <- function(T, modes, dims = dim(T), inv = FALSE) {
modes <- as.integer(modes)

View File

@ -7,6 +7,8 @@
#' @param sub sub-title of the plot
#' @param interpolate a logical vector (or scalar) indicating whether to apply
#' linear interpolation to the image when drawing.
#' @param new.plot Recreating the plot area (clearing the plot device). can be
#' used to update a plot but _not_ recreate it. Leads to smoother updating.
#' @param ... further arguments passed to \code{\link{rasterImage}}
#'
#' @examples
@ -16,13 +18,15 @@
#' @export
matrixImage <- function(A, add.values = FALSE,
main = NULL, sub = NULL, interpolate = FALSE, ..., zlim = NA,
axes = TRUE, asp = 1, col = hcl.colors(24, "YlOrRd", rev = FALSE),
digits = getOption("digits")
axes = TRUE, asp = 1, col = hcl.colors(24, "Blue-Red 3", rev = FALSE),
digits = getOption("digits"), new.plot = TRUE
) {
# plot raster image
plot(c(0, ncol(A)), c(0, nrow(A)), type = "n", bty = "n", col = "black",
xlab = "", ylab = "", xaxt = "n", yaxt = "n", main = main, sub = sub,
asp = asp)
if (new.plot) {
plot(c(0, ncol(A)), c(0, nrow(A)), type = "n", bty = "n", col = "black",
xlab = "", ylab = "", xaxt = "n", yaxt = "n", main = main, sub = sub,
asp = asp)
}
# Scale values of `A` to [0, 1] with min mapped to 1 and max to 0.
if (missing(zlim)) {
@ -41,7 +45,7 @@ matrixImage <- function(A, add.values = FALSE,
# X/Y axes index (matches coordinates to matrix indices)
x <- seq(1, ncol(A), by = 1)
y <- seq(1, nrow(A))
if (axes) {
if (axes && new.plot) {
axis(1, at = x - 0.5, labels = x, lwd = 0, lwd.ticks = 1)
axis(2, at = y - 0.5, labels = rev(y), lwd = 0, lwd.ticks = 1, las = 1)
}

View File

@ -12,9 +12,13 @@
#'
#' @param X multi-dimensional array
#' @param sample.axis observation axis index
#' @param center logical, if `TRUE` (the default) compute the centered second
#' moment, that is, the mode-wise covariances. Otherwise, the raw second moment
#' of each mode is computed. This is specifically usefull in case `X` is
#' already centered.
#'
#' @export
mcov <- function(X, sample.axis = 1L) {
mcov <- function(X, sample.axis = length(dim(X)), center = TRUE) {
# observation modes (axis indices)
modes <- seq_along(dim(X))[-sample.axis]
# observation dimensions
@ -26,16 +30,18 @@ mcov <- function(X, sample.axis = 1L) {
if (sample.axis != r + 1L) {
X <- aperm(X, c(modes, sample.axis))
}
# centering: Z = X - E[X]
Z <- X - c(rowMeans(X, dims = r))
# centering: X <- X - E[X]
if (center) {
X <- X - c(rowMeans(X, dims = r))
}
# estimes (unscaled) covariances for each mode
Sigmas <- .mapply(mcrossprod, list(mode = seq_len(r)), MoreArgs = list(Z))
Sigmas <- .mapply(mcrossprod, list(mode = seq_len(r)), MoreArgs = list(X))
# scale by per mode "sample" size
Sigmas <- .mapply(`*`, list(Sigmas, p / prod(dim(X))), NULL)
# estimate trace of Kronecker product of covariances
tr.est <- prod(p) * mean(Z^2)
tr.est <- prod(p) * mean(X^2)
# as well as the current trace of the unscaled covariances
tr.Sigmas <- prod(unlist(.mapply(function(S) sum(diag(S)), list(Sigmas), NULL)))

View File

@ -44,16 +44,20 @@
#' ))
#'
#' @export
mcrossprod <- function(A, B, mode) {
mcrossprod <- function(A, B, mode, dimA = dim(A), dimB = dim(B)) {
storage.mode(A) <- "double"
if (is.null(dim(A))) {
if (!missing(dimA)) {
dim(A) <- dimA
} else if (is.null(dim(A))) {
dim(A) <- length(A)
}
if (missing(B)) {
.Call("C_mcrossprod_sym", A, as.integer(mode))
} else {
storage.mode(B) <- "double"
if (is.null(dim(B))) {
if (!missing(dimB)) {
dim(B) <- dimB
} else if (is.null(dim(B))) {
dim(B) <- length(B)
}
.Call("C_mcrossprod", A, B, as.integer(mode))

View File

@ -75,23 +75,63 @@
#' # (X x {A1, A2, A3, A4}) x {B1, B2, B3, B4} = X x {B1 A1, B2 A2, B3 A3, B4 A4}
#' all.equal(mlm(mlm(X, As), Bs), mlm(X, Map(`%*%`, Bs, As)))
#'
#' # Equivalent to
#' mlm_reference <- function(A, Bs, modes = seq_along(Bs), transposed = FALSE) {
#' # Collect all matrices in `B`
#' Bs <- if (is.matrix(Bs)) list(Bs) else Bs
#'
#' # replicate transposition if of length one only
#' transposed <- if (length(transposed) == 1) {
#' rep(as.logical(transposed), length(Bs))
#' } else {
#' as.logical(transposed)
#' }
#'
#' # iteratively apply Tensor Times Matrix multiplication over modes
#' for (i in seq_along(modes)) {
#' A <- ttm(A, Bs[[i]], modes[i], transposed[i])
#' }
#'
#' # return result tensor
#' A
#' }
#'
#' @export
mlm <- function(A, Bs, modes = seq_along(Bs), transposed = FALSE) {
# Collect all matrices in `B`
Bs <- if (is.matrix(Bs)) list(Bs) else Bs
Bs <- if (!is.list(Bs)) list(Bs) else Bs
# ensure all `B`s are matrices
Bs <- Map(as.matrix, Bs)
# replicate transposition if of length one only
transposed <- if (length(transposed) == 1) {
rep(as.logical(transposed), length(Bs))
} else {
} else if (length(transposed) == length(modes)) {
as.logical(transposed)
} else {
stop("Dim missmatch of param. `transposed`")
}
# iteratively apply Tensor Times Matrix multiplication over modes
for (i in seq_along(modes)) {
A <- ttm(A, Bs[[i]], modes[i], transposed[i])
}
# return result tensor
A
.Call("C_mlm", A, Bs, as.integer(modes), transposed, PACKAGE = "tensorPredictors")
}
# # general usage
# dimA <- c(3, 17, 19, 2)
# dimC <- c(7, 11, 13, 5)
# A <- array(rnorm(prod(dimA)), dim = dimA)
# trans <- c(TRUE, FALSE, TRUE, FALSE)
# Bs <- Map(function(p, q) matrix(rnorm(p * q), p, q), ifelse(trans, dimA, dimC), ifelse(trans, dimC, dimA))
# C <- mlm(A, Bs, transposed = trans)
# mlm(A, Bs[c(3, 2)], modes = c(3, 2), transposed = trans[c(3, 2)])
# microbenchmark::microbenchmark(
# mlm(A, Bs, transposed = trans),
# mlm_reference(A, Bs, transposed = trans)
# )
# microbenchmark::microbenchmark(
# mlm(A, Bs[c(3, 2)], modes = c(3, 2), transposed = trans[c(3, 2)]),
# mlm_reference(A, Bs[c(3, 2)], modes = c(3, 2), transposed = trans[c(3, 2)])
# )

View File

@ -1,14 +1,14 @@
#' Duplication Matrix
#'
#' Matrix such that `vec(A) = D vech(A)` for `A` symmetric
#' Matrix `D` such that `vec(A) = D vech(A)` for `A` symmetric
#' @examples
#' p <- 8
#' A <- matrix(rnorm(p^2), p, p)
#' A <- A + t(A)
#' stopifnot(all.equal(c(D(nrow(A)) %*% vech(A)), c(A)))
#' stopifnot(all.equal(c(Dup(nrow(A)) %*% vech(A)), c(A)))
#'
#' @export
D <- function(p) {
Dup <- function(p) {
# setup `vec` and `vech` element indices (zero indexed)
vec <- matrix(NA_integer_, p, p)
vec[lower.tri(vec, diag = TRUE)] <- seq_len(p * (p + 1) / 2)
@ -27,10 +27,10 @@ D <- function(p) {
#'
#' @examples
#' p <- 5
#' stopifnot(all.equal(D(p) %*% D.pinv(p), N(p)))
#' stopifnot(all.equal(Dup(p) %*% Dup.pinv(p), N(p)))
#'
#' @export
D.pinv <- function(p) {
Dup.pinv <- function(p) {
Dp <- D(p)
solve(crossprod(Dp), t(Dp))
}
@ -119,7 +119,7 @@ K <- function(dim, mode) {
#'
#' @examples
#' p <- 7
#' stopifnot(all.equal(N(p), D(p) %*% D.pinv(p)))
#' stopifnot(all.equal(N(p), Dup(p) %*% Dup.pinv(p)))
#'
#' @export
N <- function(p) {

View File

@ -11,6 +11,34 @@
#' \code{mode} dimension equal to \code{nrow(M)} or \code{ncol(M)} if
#' \code{transposed} is true.
#'
#' @examples
#' for (mode in 1:4) {
#' dimA <- sample.int(10, 4, replace = TRUE)
#' A <- array(rnorm(prod(dimA)), dim = dimA)
#' nrowB <- sample.int(10, 1)
#' B <- matrix(rnorm(nrowB * dimA[mode]), nrowB)
#'
#' C <- ttm(A, B, mode)
#'
#' dimC <- ifelse(seq_along(dims) != mode, dimA, nrowB)
#' C.ref <- mat(B %*% mat(A, mode), mode, dims = dimC, inv = TRUE)
#'
#' stopifnot(all.equal(C, C.ref))
#' }
#'
#' for (mode in 1:4) {
#' dimA <- sample.int(10, 4, replace = TRUE)
#' A <- array(rnorm(prod(dimA)), dim = dimA)
#' ncolB <- sample.int(10, 1)
#' B <- matrix(rnorm(dimA[mode] * ncolB), dimA[mode])
#'
#' C <- ttm(A, B, mode, transposed = TRUE)
#'
#' C.ref <- ttm(A, t(B), mode)
#'
#' stopifnot(all.equal(C, C.ref))
#' }
#'
#' @export
ttm <- function(T, M, mode = length(dim(T)), transposed = FALSE) {
storage.mode(T) <- storage.mode(M) <- "double"

View File

@ -31,3 +31,52 @@ vech.pinv <- function(a) {
# de-vectorized matrix
matrix(a[vech.pinv.index(p)], p, p)
}
# vech <- function(A) {
# stopifnot(all(nrow(A) == dim(A)))
# axis <- seq_len(nrow(A))
# grid <- expand.grid(c(list(rev(axis)), rep(list(axis), length(dim(A)) - 1)))
# A[rowSums(grid - 1) < nrow(A)]
# }
# p <- 4
# X <- matrix(rnorm(p^2), p)
# vech(outer(X, X))
# vech(outer(c(X), c(X)))
# sort(unique(c(sym(outer(X, X)))))
# sort(vech(sym(outer(X, X))))
# # (I <- matrix(c(
# # 1, 1, 1,
# # 2, 1, 1,
# # 3, 1, 1,
# # 2, 2, 1,
# # 3, 2, 1,
# # 3, 3, 1,
# # 2, 1, 2,
# # 3, 1, 2,
# # 3, 2, 2,
# # 3, 1, 3
# # ), ncol = 3, byrow = TRUE))
# # ((I - 1) %*% nrow(A)^(seq_along(dim(A)) - 1) + 1)
# # p <- 3
# # ord <- 4
# # A <- array(seq_len(p^ord), rep(p, ord))
# # axis <- seq_len(nrow(A))
# # grid <- expand.grid(c(list(rev(axis)), rep(list(axis), length(dim(A)) - 1)))
# # array(rowSums(grid - 1), dim(A))
# # A[rowSums(grid - 1) < nrow(A)]
# # apply(indices, 1, function(i) do.call(`[`, c(list(A), as.list(i))))

View File

@ -1,2 +1,3 @@
PKG_LIBS = $(BLAS_LIBS) $(FLIBS)
PKG_LIBS = $(BLAS_LIBS) $(LAPACK_LIBS) $(FLIBS)
# PKG_CFLAGS = -pg

View File

@ -0,0 +1,138 @@
#ifndef INCLUDE_GUARD_R_API_H
#define INCLUDE_GUARD_R_API_H
// The need for `USE_FC_LEN_T` and `FCONE` is due to a Fortran character string
// to C incompatibility. See: Writing R Extentions: 6.6.1 Fortran character strings
#define USE_FC_LEN_T
// Disables remapping of R API functions from `Rf_<name>` or `R_<name>`
#define R_NO_REMAP
#include <stdint.h> // uint32_t, uint64_t, ...
#include <R.h>
#include <Rinternals.h>
#include <R_ext/BLAS.h>
#include <R_ext/Lapack.h>
#ifndef FCONE
#define FCONE
#endif
// NULL pointer (not defined memory, array, vector, ...)
#ifndef NULL
#define NULL ((void*)0)
#endif
// Truth values
#ifndef FALSE
#define FALSE 0
#endif
#ifndef TRUE
#define TRUE 1
#endif
// Convenience convertion function similar to `asInteger`, `asReal`, ...
#define NA_UNSIGNED (-((unsigned long)1))
static inline unsigned long asUnsigned(SEXP _val) {
int val = Rf_asInteger(_val);
if (val == NA_INTEGER || val < 0) {
return NA_UNSIGNED;
}
return (unsigned long)val;
}
// Remap BLAS and LAPACK bindings (being consistent with my own interface and I
// don't like to pass scalars by reference (memory address))
/** y <- a x + y */
static inline void axpy(
const int dim,
const double a,
const double* x, const int incx,
double* y, const int incy
) {
F77_CALL(daxpy)(&dim, &a, x, &incx, y, &incy);
}
/** Scale a 1d array `x <- a x` */
static inline void scale(
const int dim,
const double a,
double *x, const int incx
) {
F77_CALL(dscal)(&dim, &a, x, &incx);
}
/** Dot product */
static inline double dot(
const int dim,
const double* x, const int incx,
const double* y, const int incy
) {
return F77_CALL(ddot)(&dim, x, &incx, y, &incy);
}
/** 1d array linear combination `z <- a x + b y` */
// TODO: optimize! (iff needed?, to optimize of at all?)
static inline void lincomb(
const int dim,
const double a, const double* x, const int incx,
const double b, const double* y, const int incy,
double* z, const int incz
) {
for (int i = 0; i < dim; ++i) {
z[i * incz] = a * x[i * incx] + b * y[i * incy];
}
}
/**
* A sufficient Pseudo-Random-Number-Generators (PRNG) of the Xorshift family
*
* With parameterized seed/state this custom PRGN can be used in a thread save!
*
* For single threaded operations the PRNG provided by `R` are prefered. But they
* are _not_ thread save. The following is a simple PRNG usable in a multi-threaded
* application.
*
* See TODO: ...https://en.wikipedia.org/wiki/Xorshift
* SchachHoernchen
*/
static inline uint64_t rot64(uint64_t val, int shift) {
return (val << shift) | (val >> (64 - shift));
}
// (internal) PRGN state/seed type
typedef uint64_t rng_seed_t[4];
// Hookup the PRNG via its seed to R's random number generation utilities
static inline void init_seed(rng_seed_t seed) {
GetRNGstate();
for (size_t i = 0; i < 4; ++i) {
seed[i] = 0;
for (size_t j = 0; j < 64; ++j) {
seed[i] |= ((uint64_t)(unif_rand() < 0.5)) << j;
}
}
PutRNGstate();
}
// PRNG of the Xorshift family
// The least significant 32 bits are not reliable, use most significant 32 bits
static inline uint64_t rand_u64(rng_seed_t seed) {
uint64_t e = seed[0] - rot64(seed[1], 7);
seed[0] = seed[1] ^ rot64(seed[1], 13);
seed[1] = seed[2] + rot64(seed[3], 37);
seed[2] = seed[3] + e;
seed[3] = e + seed[0];
return seed[3];
}
// With external supplied seed, every thread can have its own seed and as such
// we can use this as a thread save alternative to R's `unif_rand()`.
static inline double unif_rand_thrd(rng_seed_t seed) {
return ((double)(rand_u64(seed) >> 32)) / (double)(-(uint32_t)1);
}
#endif /* INCLUDE_GUARD_R_API_H */

View File

@ -0,0 +1,185 @@
#ifndef INCLUDE_GUARD_BIT_UTILS_H
#define INCLUDE_GUARD_BIT_UTILS_H
#include <stdint.h> // uint32_t, uint64_t
#if (defined(__GNUC__) && defined(__BMI2__))
#include <x86intrin.h>
#include <bmi2intrin.h> // _pdep_u32
#endif
#ifdef _MSC_VER
#include <intrin.h> // _BitScanReverse
#endif
/**
* Computes the parity of a word (0 for even bit count and 1 otherwise)
*/
#ifdef __GNUC__
static inline int bitParity32(uint32_t x) { return __builtin_parity(x); }
static inline int bitParity64(uint64_t x) { return __builtin_parityll(x); }
#else
static inline int bitParity32(uint32_t x) {
int p = (x != 0);
while (x &= x - 1) { p = !p; }
return p;
}
static inline int bitParity64(uint64_t x) {
int p = (x != 0);
while (x &= x - 1) { p = !p; }
return p;
}
#endif
/**
* Counts the number of set bits (`1`s in binary) in the number `x`
*/
#ifdef __GNUC__ /* POPulation COUNT */
static inline int bitCount32(uint32_t x) { return __builtin_popcount(x); }
static inline int bitCount64(uint64_t x) { return __builtin_popcountll(x); }
#else
static inline int bitCount32(uint32_t x) {
int count = 0; // counts set bits
for (; x; count++) { x &= x - 1; } // increment count until there are no bits set in x
return count;
}
static inline int bitCount64(uint64_t x) {
int count = 0; // counts set bits
for (; x; count++) { x &= x - 1; } // increment count until there are no bits set in x
return count;
}
#endif
/**
* Gets the index of the LSB (least significant bit)
*
* @condition `x != 0`, for `x == 0` undefined behaviour
*/
#ifdef __GNUC__
static inline int bitScanLS32(uint32_t x) { return __builtin_ctz(x); } // Count Trailing Zeros
static inline int bitScanLS64(uint64_t x) { return __builtin_ctzll(x); }
#elif _MSC_VER
static inline int bitScanLS32(uint32_t x) {
unsigned long bsr;
_BitScanReverse(&bsr, x);
return 31 - bsr;
}
static inline int bitScanLS64(uint64_t x) {
unsigned long bsr;
_BitScanReverse64(&bsr, x);
return 63 - bsr;
}
#else
static inline int bitScanLS32(uint32_t x) {
int ctz = 0; // result storing the Count of Trailing Zeros
bool empty; // boolean variable storing if a bit has not found (search area is empty)
// logarithmic search for LSB bit index (-1)
ctz += (empty = !(x & (uint32_t)(65535))) << 4;
x >>= 16 * empty;
ctz += (empty = !(x & (uint32_t)( 255))) << 3;
x >>= 8 * empty;
ctz += (empty = !(x & (uint32_t)( 15))) << 2;
x >>= 4 * empty;
ctz += (empty = !(x & (uint32_t)( 3))) << 1;
x >>= 2 * empty;
ctz += (empty = !(x & (uint32_t)( 1)));
return ctz;
}
static inline int bitScanLS64(uint64_t x) {
int ctz = 0;
bool empty;
// logarithmic search for LSB bit index (-1)
ctz += (empty = !(x & (uint64_t)(4294967295))) << 5;
x >>= 32 * empty;
ctz += (empty = !(x & (uint64_t)( 65535))) << 4;
x >>= 16 * empty;
ctz += (empty = !(x & (uint64_t)( 255))) << 3;
x >>= 8 * empty;
ctz += (empty = !(x & (uint64_t)( 15))) << 2;
x >>= 4 * empty;
ctz += (empty = !(x & (uint64_t)( 3))) << 1;
x >>= 2 * empty;
ctz += (empty = !(x & (uint64_t)( 1)));
return ctz;
}
#endif
/**
* Parallel DEPosit (aka PDEP)
*
* Writes the `val` bits into the positions of the set bits in `mask`.
*
* Example:
* val: **** **** **** 1.1.
* mask: 1... 1... 1... 1...
* res: 1... .... 1... ....
*/
#if (defined(__GNUC__) && defined(__BMI2__))
static inline uint32_t bitDeposit32(uint32_t val, uint32_t mask) {
return _pdep_u32(val, mask);
}
static inline uint64_t bitDeposit64(uint64_t val, uint64_t mask) {
return _pdep_u64(val, mask);
}
#else
static inline uint32_t bitDeposit32(uint32_t val, uint32_t mask) {
uint32_t res = 0;
for (uint32_t pos = 1; mask; pos <<= 1) {
if (val & pos) {
res |= mask & -mask;
}
mask &= mask - 1;
}
return res;
}
static inline uint64_t bitDeposit64(uint64_t val, uint64_t mask) {
uint64_t res = 0;
for (uint64_t pos = 1; mask; pos <<= 1) {
if (val & pos) {
res |= mask & -mask;
}
mask &= mask - 1;
}
return res;
}
#endif
/**
* Gets the next lexicographically ordered permutation of an n-bit word.
*
* Let `val` be a bit-word with `n` bits set, then this procedire computes a
* `n` bit word wich is the next element in the lexicographically ordered
* sequence of `n` bit words. For example
*
* val -> bitNextPerm(val)
* 00010011 -> 00010101
* 00010101 -> 00010110
* 00010110 -> 00011001
* 00011001 -> 00011010
* 00011010 -> 00011100
* 00011100 -> 00100011
*
* @condition `x != 0`, for `x == 0` undefined behaviour due to `bitScanLS`
*
* see: https://graphics.stanford.edu/~seander/bithacks.html#NextBitPermutation
*/
static inline uint32_t bitNextPerm32(uint32_t val) {
// Sets all least significant 0-bits of val to 1
uint32_t t = val | (val - 1);
// Next set to 1 the most significant bit to change,
// set to 0 the least significant ones, and add the necessary 1 bits.
return (t + 1) | (((~t & -~t) - 1) >> (bitScanLS32(val) + 1));
}
static inline uint64_t bitNextPerm64(uint64_t val) {
// Sets all least significant 0-bits of val to 1
uint64_t t = val | (val - 1);
// Next set to 1 the most significant bit to change,
// set to 0 the least significant ones, and add the necessary 1 bits.
return (t + 1) | (((~t & -~t) - 1) >> (bitScanLS64(val) + 1));
}
#endif /* BIT_UTILS_INCLUDE_GUARD_H */

View File

@ -7,27 +7,96 @@
// );
/* Tensor Times Matrix a.k.a. Mode Product */
extern SEXP ttm(SEXP A, SEXP X, SEXP mode, SEXP op);
extern SEXP R_ttm(SEXP A, SEXP X, SEXP mode, SEXP op);
/* Multi Linear Multiplication (iterated mode products) */
extern SEXP R_mlm(SEXP A, SEXP Bs, SEXP modes, SEXP ops);
/* Matrix Times Vectorized Kronecker product `A vec(B_1 %x% ... %x% B_r)` */
extern SEXP mtvk(SEXP A, SEXP Bs);
/* Tensor Mode Crossproduct `A_(m) B_(m)^T` */
extern SEXP mcrossprod(SEXP A, SEXP B, SEXP mode);
extern SEXP R_mcrossprod(SEXP A, SEXP B, SEXP mode);
/* Symmetric Tensor Mode Crossproduct `A_(m) A_(m)^T` */
extern SEXP mcrossprod_sym(SEXP A, SEXP mode);
extern SEXP R_mcrossprod_sym(SEXP A, SEXP mode);
// /* Higher Order PCA */
// extern SEXP hopca(SEXP X);
/* Singular Value Decomposition */
extern SEXP R_svd(SEXP A);
// /* Iterative Cyclic Updating for the Tensor Normal Distribution */
// extern SEXP R_icu_tensor_normal(SEXP X, SEXP Fy, SEXP max_iter);
// /* Generalized tensor normal using NAGD */
// extern SEXP R_gmlm_tensor_normal(
// SEXP X, SEXP Fy,
// SEXP eta_bar, SEXP alphas, SEXP Omegas,
// SEXP max_iter, SEXP max_line_iter
// );
/* Solve linear equation system A X = B */
extern SEXP R_solve(SEXP A, SEXP B);
/* Determinant of a matrix */
extern SEXP R_det(SEXP A);
// /* Unscaled PMF of the Ising model with natural parameters `_params` */
// extern SEXP R_unscaled_prob(SEXP _y, SEXP _params);
// /* Exact computation of the partition function of the Ising model with natural
// parameters `_params` */
// extern SEXP R_ising_partition_func_exact(SEXP _params);
// /* Estimated partition function of the Ising model with natural parameters `_params` */
// extern SEXP R_ising_partition_func_MC(SEXP _params);
// /* Exact computation of the partition function of the Ising model with natural
// parameters `_params` */
// extern SEXP R_ising_m2_exact(SEXP _params);
// // extern SEXP R_ising_m2_MC64(SEXP _params);
// extern SEXP R_ising_m2_MC(SEXP _params, SEXP _nr_samples, SEXP _warmup);
// extern SEXP R_ising_m2_MC_thrd(SEXP _params, SEXP _nr_threads);
/* Sample from the Ising model */
extern SEXP R_ising_sample(SEXP, SEXP, SEXP);
// Interface to the Ising second moment function
extern SEXP R_ising_m2(SEXP, SEXP, SEXP, SEXP, SEXP);
/* List of registered routines (a.k.a. C entry points) */
static const R_CallMethodDef CallEntries[] = {
// {"FastPOI_C_sub", (DL_FUNC) &FastPOI_C_sub, 5}, // NOT USED
{"C_ttm", (DL_FUNC) &ttm, 4},
{"C_mtvk", (DL_FUNC) &mtvk, 2},
{"C_mcrossprod", (DL_FUNC) &mcrossprod, 3},
{"C_mcrossprod_sym", (DL_FUNC) &mcrossprod_sym, 2},
{"C_ttm", (DL_FUNC) &R_ttm, 4},
{"C_mlm", (DL_FUNC) &R_mlm, 4},
{"C_mtvk", (DL_FUNC) &mtvk, 2},
{"C_mcrossprod", (DL_FUNC) &R_mcrossprod, 3},
{"C_mcrossprod_sym", (DL_FUNC) &R_mcrossprod_sym, 2},
{"C_svd", (DL_FUNC) &R_svd, 1},
{"C_solve", (DL_FUNC) &R_solve, 2},
{"C_det", (DL_FUNC) &R_det, 1},
{"C_ising_sample", (DL_FUNC) &R_ising_sample, 3},
{"C_ising_m2", (DL_FUNC) &R_ising_m2, 5},
// {"C_unscaled_prob", (DL_FUNC) &R_unscaled_prob, 2},
// {"C_ising_partition_func_MC", (DL_FUNC) &R_ising_partition_func_MC, 1},
// {"C_ising_partition_func_exact", (DL_FUNC) &R_ising_partition_func_exact, 1},
// {"C_ising_m2_exact", (DL_FUNC) &R_ising_m2_exact, 1},
// {"C_ising_m2_MC", (DL_FUNC) &R_ising_m2_MC, 3},
// {"C_ising_m2_MC_thrd", (DL_FUNC) &R_ising_m2_MC_thrd, 2},
// {"C_ising_m2_MC64", (DL_FUNC) &R_ising_m2_MC64, 1},
// {"FastPOI_C_sub", (DL_FUNC) &FastPOI_C_sub, 5}, // NOT USED
// {"C_hopca", (DL_FUNC) &hopca, 1},
// {"C_icu_tensor_normal", (DL_FUNC) &R_icu_tensor_normal, 3}, // NOT USED / IN DEVELOPMENT
// {"C_gmlm_tensor_normal", (DL_FUNC) &R_gmlm_tensor_normal, 7}, // NOT USED / IN DEVELOPMENT
{NULL, NULL, 0}
};
/* Restrict C entry points to registered routines. */
/**
* Restrict C entry points to registered routines.
*
* NOTE: Naming convention: `R_init_<PACKAGE-NAME>`
*/
void R_init_tensorPredictors(DllInfo *dll) {
R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
R_useDynamicSymbols(dll, FALSE);

View File

@ -0,0 +1,77 @@
#ifndef INCLUDE_GUARD_INT_UTILS_H
#define INCLUDE_GUARD_INT_UTILS_H
#include <stdint.h> // uint32_t, uint64_t
// /**
// * Integer logarithm, the biggest power `p` such that `2^p <= x`.
// */
// static int ilog2(uint64_t x) {
// int log = 0;
// while (x >>= 1) {
// log++;
// }
// return log;
// }
/**
* Integer Square root of `y`, that is `ceiling(sqrt(y))`
*/
static uint64_t isqrt(uint64_t x) {
// implements a binary search
uint64_t root = 0;
uint64_t left = 0; // left boundary
uint64_t right = x + 1; // right boundary
while(left + 1UL < right) {
root = (left + right) / 2UL;
if (root * root <= x) {
left = root;
} else {
right = root;
}
}
return left;
}
/**
* Inverse to the triangular numbers
*
* Given a positive number `x = p (p + 1) / 2` it computes `p` if possible.
* In case there is no positive integer solution `0` is returned.
*
* Note: this follows immediately from the quadratic equation.
*/
static uint32_t invTriag(uint32_t x) {
uint64_t root = isqrt(8UL * (uint64_t)x + 1UL);
if (root * root != 8UL * x + 1UL) {
return 0;
}
return (root - 1) / 2;
}
// /**
// * Number of sub-sets (including empty set) of max-size
// *
// * It computes, with `binom` beeing the binomial coefficient, the following sum
// *
// * sum_{i = 0}^k binom(n, i)
// */
// static uint64_t nrSubSets(uint64_t n, uint64_t k) {
// uint64_t sum = 1, binom = 1;
// for (uint64_t i = 1; i <= k; ++i) {
// binom *= n--;
// binom /= i;
// sum += binom;
// }
// return sum;
// }
#endif /* INCLUDE_GUARD_INT_UTILS_H */

View File

@ -0,0 +1,181 @@
#ifndef INCLUDE_GUARD_ISING_MCMC_H
#define INCLUDE_GUARD_ISING_MCMC_H
#include "R_api.h"
/** Sample a single value from the Ising model via a Monte-Carlo Markov-Chain
* method given the Ising model parameters in compact half-vectorized form.
*
* f(x) = p0(params) exp(vech(x x')' params)
*
* @important requires to be wrapped between `GetRNGstate()` and `PutRNGState()`
* calls. For example
*
* GetRNGstate()
* for (size_t sample = 0; sample < nr_samples; ++sample) {
* ising_mcmc_vech(warmup, dim, dim, params, &X[sample * dim]);
* }
* PutRNGState()
*/
static inline void ising_mcmc_vech(
const size_t warmup,
const size_t dim, const double* params,
int* X
) {
// Initialize elements `X_i ~ Bernoulli(P(X_i = 1 | X_-i = 0))`
for (size_t i = 0, I = 0; i < dim; I += dim - i++) {
double invProb = 1.0 + exp(-params[I]);
X[i] = unif_rand() * invProb < 1.0;
}
// Skip the first samples of the Markov-Chain (warmup)
for (size_t skip = 0; skip < warmup; ++skip) {
// For every component
for (size_t i = 0; i < dim; ++i) {
// Compute conditional probability `P(X_i = 1 | X_-i = x_-i)`
double log_odds = 0.0;
// Tracks position in half-vectorized storage
size_t J = i;
// Sum all log-odds for `j < i` (two way interactions)
for (size_t j = 0; j < i; J += dim - ++j) {
log_odds += X[j] ? params[J] : 0.0;
}
// Allways add log-odds for `i = j` (self-interaction)
log_odds += params[J];
// Continue adding log-odds for `j > i` (two way interactions)
J++;
for (size_t j = i + 1; j < dim; ++j, ++J) {
log_odds += X[j] ? params[J] : 0.0;
}
// Update `i`th element `X_i ~ Bernoulli(P(X_i = 1 | X_-i = x_-i))`
X[i] = unif_rand() * (1.0 + exp(-log_odds)) < 1.0;
}
}
}
// Thread save version of `ising_mcmc` (using thread save PRNG)
static inline void ising_mcmc_vech_thrd(
const size_t warmup,
const size_t dim, const double* params,
int* X, rng_seed_t seed
) {
// Initialize elements `X_i ~ Bernoulli(P(X_i = 1 | X_-i = 0))`
for (size_t i = 0, I = 0; i < dim; I += dim - i++) {
double invProb = 1.0 + exp(-params[I]);
X[i] = unif_rand_thrd(seed) * invProb < 1.0;
}
// Skip the first samples of the Markov-Chain (warmup)
for (size_t skip = 0; skip < warmup; ++skip) {
// For every component
for (size_t i = 0; i < dim; ++i) {
// Compute conditional probability `P(X_i = 1 | X_-i = x_-i)`
double log_odds = 0.0;
// Tracks position in half-vectorized storage
size_t J = i;
// Sum all log-odds for `j < i` (two way interactions)
for (size_t j = 0; j < i; J += dim - ++j) {
log_odds += X[j] ? params[J] : 0.0;
}
// Allways add log-odds for `i = j` (self-interaction)
log_odds += params[J];
// Continue adding log-odds for `j > i` (two way interactions)
J++;
for (size_t j = i + 1; j < dim; ++j, ++J) {
log_odds += X[j] ? params[J] : 0.0;
}
// Update `i`th element `X_i ~ Bernoulli(P(X_i = 1 | X_-i = x_-i))`
X[i] = unif_rand() * (1.0 + exp(-log_odds)) < 1.0;
}
}
}
/** Sample a single value from the Ising model via a Monte-Carlo Markov-Chain
* method given the Ising model parameters in symmetric matrix form `A`.
*
* f(x) = prob_0(A) exp(x' A x)
*
* The relation to the natural parameters `params` as used in the half-vectorized
* version `ising_mcmc_vech()`
*
* f(x) = prob_0(params) exp(vech(x x')' params)
*
* is illustrated via an example with `dim = 5`;
*
* p0 [ p0 p1/2 p2/2 p3/2 p4/2 ]
* p1 p5 [ p1/2 p5 p6/2 p7/2 p8/2 ]
* params = p2 p6 p9 => A = [ p2/2 p6/2 p9 p10/2 p11/2 ]
* p3 p7 p10 p12 [ p3/2 p7/2 p10/2 p12 p13/2 ]
* p4 p8 p11 p13 p14 [ p4/2 p8/2 p11/2 p13/2 p14 ]
*
* or the other way arroung given the symmetric matrix form `A` converted to
* the natural parameters
*
* [ a00 a10 a20 a30 a40 ] a00
* [ a10 a11 a21 a31 a41 ] 2*a10 a11
* A = [ a20 a21 a22 a32 a42 ] => params = 2*a20 2*a21 a22
* [ a30 a31 a32 a33 a43 ] 2*a30 2*a31 2*a32 a33
* [ a40 a41 a42 a43 a44 ] 2*a40 2*a41 2*a42 2*a43 a44
*
*/
static inline void ising_mcmc_mat(
const size_t warmup,
const size_t dim, const size_t ldA, const double* A,
int* X
) {
// Initialize elements `X_i ~ Bernoulli(P(X_i = 1 | X_-i = 0))`
for (size_t i = 0; i < dim; ++i) {
X[i] = unif_rand() * (1.0 + exp(-A[i * (ldA + 1)])) < 1.0;
}
// Skip the first samples of the Markov-Chain (warmup)
for (size_t skip = 0; skip < warmup; ++skip) {
// For every component
for (size_t i = 0; i < dim; ++i) {
// Compute conditional probability `P(X_i = 1 | X_-i = x_-i)`
double log_odds = 0.0;
for (size_t j = 0; j < i; ++j) {
log_odds += (2 * X[j]) * A[i * ldA + j];
}
log_odds += A[i * (ldA + 1)];
for (size_t j = i + 1; j < dim; ++j) {
log_odds += (2 * X[j]) * A[i * ldA + j];
}
// Update `i`th element `X_i ~ Bernoulli(P(X_i = 1 | X_-i = x_-i))`
X[i] = unif_rand() * (1.0 + exp(-log_odds)) < 1.0;
}
}
}
// thread save version of `ising_mcmc_mat()` (using thread save PRNG)
static inline void ising_mcmc_mat_thrd(
const size_t warmup,
const size_t dim, const size_t ldA, const double* A,
int* X, rng_seed_t seed
) {
// Initialize elements `X_i ~ Bernoulli(P(X_i = 1 | X_-i = 0))`
for (size_t i = 0; i < dim; ++i) {
X[i] = unif_rand_thrd(seed) * (1.0 + exp(-A[i * (ldA + 1)])) < 1.0;
}
// Skip the first samples of the Markov-Chain (warmup)
for (size_t skip = 0; skip < warmup; ++skip) {
// For every component
for (size_t i = 0; i < dim; ++i) {
// Compute conditional probability `P(X_i = 1 | X_-i = x_-i)`
double log_odds = 0.0;
for (size_t j = 0; j < i; ++j) {
log_odds += (2 * X[j]) * A[i * ldA + j];
}
log_odds += A[i * (ldA + 1)];
for (size_t j = i + 1; j < dim; ++j) {
log_odds += (2 * X[j]) * A[i * ldA + j];
}
// Update `i`th element `X_i ~ Bernoulli(P(X_i = 1 | X_-i = x_-i))`
X[i] = unif_rand_thrd(seed) * (1.0 + exp(-log_odds)) < 1.0;
}
}
}
#endif /* INCLUDE_GUARD_ISING_MCMC_H */

View File

@ -0,0 +1,401 @@
/** Methods computing or estimating the second moment of the Ising model given
* the natural parameters of the p.m.f. in exponential family form. That is
* f(x) = p0(params) exp(vech(x x')' params)
* where `params` are the natural parameters. The scaling constant `p0(params)`
* is also the zero event `x = (0, ..., 0)` probability.
*
* Three different version are provided.
* - exact: Exact method computing the true second moment `E[X X']` given the
* parameters `params`. This method has a runtime of `O(2^dim)` where
* `dim` is the dimension of the random variable `X`. This means it is
* infeasible for bigger (like 25 and above) values if `dim`
* - MC: Monte-Carlo method used in case the exact method is not feasible.
* - MC Thrd: Multi-Threaded Monte-Carlo method.
*
* The natural parameters `params` of the Ising model is a `dim (dim + 1) / 2`
* dimensional numeric vector containing the log-odds. The indexing schema into
* the parameters corresponding to the log-odds of single or two way interactions
* with indicex `i, j` are according to the half-vectorization schema
*
* i = 4'th row => Symmetry => Half-Vectorized Storage
*
* [ * * * * * * ] [ * * * * * * ] [ * * * 15 * * ]
* [ * * * * * * ] [ * * * * * * ] [ * * 12 16 * ]
* [ * * * * * * ] [ * * * * * * ] [ * 8 * 17 ]
* [ 3 8 12 15 16 17 ] [ 3 8 12 15 * * ] [ 3 * * ]
* [ * * * * * * ] [ * * * 16 * * ] [ * * ]
* [ * * * * * * ] [ * * * 17 * * ] [ * ]
*
*/
#include "R_api.h"
#include "bit_utils.h"
#include "int_utils.h"
#include "ising_MCMC.h"
#ifndef __STDC_NO_THREADS__
#include <threads.h>
#endif
////////////////////////////////////////////////////////////////////////////////
// TODO: read //
// https://developer.r-project.org/Blog/public/2019/04/18/common-protect-errors/
// and
// https://developer.r-project.org/Blog/public/2018/12/10/unprotecting-by-value/index.html
// as well as do the following PROPERLLY
// TODO: make specialized version using the parameters in given sym mat form //
// similar to `ising_sample()` //
////////////////////////////////////////////////////////////////////////////////
// void ising_m2_exact_mat(
// const size_t dim, const size_t ldA, const double* A,
// double* M2
// ) {
// double sum_0 = 1.0;
// const uint32_t max_event = (uint32_t)1 << dim;
// (void)memset(M2, 0, dim * dim * sizeof(double));
// for (uint32_t X = 1; X < max_event; ++X) {
// // Evaluate quadratic form `X' A X` using symmetry of `A`
// double XtAX = 0.0;
// for (uint32_t Y = X; Y; Y &= Y - 1) {
// const int i = bitScanLS32(Y);
// XtAX += A[i * (ldA + 1)];
// for (uint32_t Z = Y & (Y - 1); Z; Z &= Z - 1) {
// XtAX += 2 * A[i * ldA + bitScanLS32(Z)];
// }
// }
// const double prob_X = exp(XtAX);
// sum_0 += prob_X;
// for (uint32_t Y = X; Y; Y &= Y - 1) {
// const int i = bitScanLS32(Y);
// for (uint32_t Z = Y; Z; Z &= Z - 1) {
// M2[i * dim + bitScanLS32(Z)] += prob_X;
// }
// }
// }
// const double prob_0 = 1.0 / sum_0;
// for (size_t j = 0; j < dim; ++j) {
// for (size_t i = 0; i < j; ++i) {
// M2[j * dim + i] = M2[i * dim + j];
// }
// for (size_t i = j; i < dim; ++i) {
// M2[i * dim + j] *= prob_0;
// }
// }
// }
double ising_m2_exact(const size_t dim, const double* params, double* M2) {
// Accumulator of sum of all (unscaled) probabilities
double sum_0 = 1.0;
// max event (actually upper bound)
const uint32_t max_event = (uint32_t)1 << dim;
// Length of parameters
const size_t len = dim * (dim + 1) / 2;
// Initialize M2 to zero
(void)memset(M2, 0, len * sizeof(double));
// Iterate all `2^dim` binary vectors `X`
for (uint32_t X = 1; X < max_event; ++X) {
// Dot product `<vech(X X'), params>`
double dot_X = 0.0;
// all bits in `X`
for (uint32_t Y = X; Y; Y &= Y - 1) {
const int i = bitScanLS32(Y);
const int I = (i * (2 * dim - 1 - i)) / 2;
// add single and two way interaction log odds
for (uint32_t Z = Y; Z; Z &= Z - 1) {
dot_X += params[I + bitScanLS32(Z)];
}
}
// (unscaled) probability of current event `X`
const double prob_X = exp(dot_X);
sum_0 += prob_X;
// Accumulate set bits probability for the first end second moment `E[X X']`
for (uint32_t Y = X; Y; Y &= Y - 1) {
const int i = bitScanLS32(Y);
const int I = (i * (2 * dim - 1 - i)) / 2;
for (uint32_t Z = Y; Z; Z &= Z - 1) {
M2[I + bitScanLS32(Z)] += prob_X;
}
}
}
// Finish by scaling with zero event probability (Ising p.m.f. scaling constant)
const double prob_0 = 1.0 / sum_0;
for (size_t i = 0; i < len; ++i) {
M2[i] *= prob_0;
}
return prob_0;
}
// Monte-Carlo method using Gibbs sampling for the second moment
double ising_m2_MC(
/* options */ const size_t nr_samples, const size_t warmup,
/* dimension */ const size_t dim,
/* vectors */ const double* params, double* M2
) {
// Length of `params` and `M2`
const size_t len = (dim * (dim + 1)) / 2;
// Dirty trick (reuse output memory for precise counting)
_Static_assert(sizeof(uint64_t) == sizeof(double), "Dirty trick fails!");
uint64_t* counts = (uint64_t*)M2;
// Initialize counts to zero
(void)memset(counts, 0, len * sizeof(uint64_t));
// Allocate memory to store Markov-Chain value
int* X = (int*)R_alloc(dim, sizeof(int));
// Create/Update R's internal PRGN state
GetRNGstate();
// Spawn chains
for (size_t sample = 0; sample < nr_samples; ++sample) {
// Draw random sample `X ~ Ising(params)`
ising_mcmc_vech(warmup, dim, params, X);
// Accumulate component counts
uint64_t* count = counts;
for (size_t i = 0; i < dim; ++i) {
for (size_t j = i; j < dim; ++j) {
*(count++) += X[i] & X[j];
}
}
}
// Write R's internal PRNG state back (if needed)
PutRNGstate();
// Compute means from counts
for (size_t i = 0; i < len; ++i) {
M2[i] = (double)counts[i] / (double)(nr_samples);
}
return -1.0; // TODO: also compute prob_0
}
// in case the compile supports standard C threads
#ifndef __STDC_NO_THREADS__
// Worker thread to calling thread communication struct
typedef struct thrd_data {
rng_seed_t seed; // Pseudo-Random-Number-Generator seed/state value
size_t nr_samples; // Nr. of samples this worker handles
size_t warmup; // Monte-Carlo Chain burne-in length
size_t dim; // Random variable dimension
const double* params; // Ising model parameters
int* X; // Working memory to store current binary sample
uint32_t* counts; // (output) count of single and two way interactions
double prob_0; // (output) zero event probability estimate
} thrd_data_t;
// Worker thread function
int thrd_worker(thrd_data_t* data) {
// Extract data as thread local variables (for convenience)
const size_t dim = data->dim;
int* X = data->X;
// Initialize counts to zero
(void)memset(data->counts, 0, (dim * (dim + 1) / 2) * sizeof(uint32_t));
// Spawn Monte-Carlo Chains (one foe every sample)
for (size_t sample = 0; sample < data->nr_samples; ++sample) {
// Draw random sample `X ~ Ising(params)`
ising_mcmc_vech_thrd(data->warmup, dim, data->params, X, data->seed);
// Accumulate component counts
uint32_t* count = data->counts;
for (size_t i = 0; i < dim; ++i) {
for (size_t j = i; j < dim; ++j) {
*(count++) += X[i] & X[j];
}
}
}
return 0;
}
double ising_m2_MC_thrd(
/* options */ const size_t nr_samples, const size_t warmup,
const size_t nr_threads,
/* dimension */ const size_t dim,
/* vectors */ const double* params, double* M2
) {
// Length of `params` and `M2`
const size_t len = (dim * (dim + 1)) / 2;
// Allocate working and self memory for worker threads
int* Xs = (int*)R_alloc(dim * nr_threads, sizeof(int));
uint32_t* counts = (uint32_t*)R_alloc(len * nr_threads, sizeof(uint32_t));
thrd_t* threads = (thrd_t*)R_alloc(nr_threads, sizeof(thrd_data_t));
thrd_data_t* threads_data = (thrd_data_t*)R_alloc(nr_threads, sizeof(thrd_data_t));
// Provide instruction wor worker and dispatch them
for (size_t tid = 0; tid < nr_threads; ++tid) {
// Every thread needs its own PRNG seed!
init_seed(threads_data[tid].seed);
// divide work among workers (more or less) equaly with the first worker
// (tid = 0) having the additional remainder of divided work (`nr_samples`)
threads_data[tid].nr_samples = nr_samples / nr_threads + (
tid ? 0 : nr_samples % nr_threads
);
threads_data[tid].warmup = warmup;
threads_data[tid].dim = dim;
threads_data[tid].params = params;
// Every worker gets its disjint working memory
threads_data[tid].X = &Xs[dim * tid];
threads_data[tid].counts = &counts[len * tid];
// dispatch the worker
thrd_create(&threads[tid], (thrd_start_t)thrd_worker, (void*)&threads_data[tid]);
}
// Join (Wait for) all worker
for (size_t tid = 0; tid < nr_threads; ++tid) {
thrd_join(threads[tid], NULL);
}
// Accumulate worker results into first (tid = 0) worker counts result
for (size_t tid = 1; tid < nr_threads; ++tid) {
for (size_t i = 0; i < len; ++i) {
counts[i] += counts[tid * len + i];
}
}
// convert discreat counts into means
for (size_t i = 0; i < len; ++i) {
M2[i] = (double)counts[i] / (double)nr_samples;
}
return -2.0; // TODO: also compute prob_0
}
#endif /* !__STDC_NO_THREADS__ */
/* Implementation of `.Call` entry point */
////////////////////////////////////////////////////////////////////////////////
// TODO: make specialized version using the parameters in given sym mat form //
// similar to `ising_sample()` //
////////////////////////////////////////////////////////////////////////////////
extern SEXP R_ising_m2(
SEXP _params, SEXP _use_MC, SEXP _nr_samples, SEXP _warmup, SEXP _nr_threads
) {
// Extract and validate arguments
size_t protect_count = 0;
if (!Rf_isReal(_params)) {
_params = PROTECT(Rf_coerceVector(_params, REALSXP));
protect_count++;
}
// Depending on parameters given in symmetric matrix form or natural
// or natrual parameters in the half-vectorized memory layout
size_t dim;
double* params;
if (Rf_isMatrix(_params)) {
if (Rf_nrows(_params) != Rf_ncols(_params)) {
Rf_error("Invalid 'params' value, exected square matrix");
}
// Convert to natural parameters
dim = Rf_nrows(_params);
params = (double*)R_alloc(dim * (dim + 1) / 2, sizeof(double));
double* A = REAL(_params);
for (size_t j = 0, I = 0; j < dim; ++j) {
for (size_t i = j; i < dim; ++i, ++I) {
params[I] = (1 + (i != j)) * A[i + j * dim];
}
}
} else {
// Get Ising random variable dimension `dim` from "half-vectorized" parameters
// vector. That is, compute `dim` from `length(params) = dim (dim + 1) / 2`.
dim = invTriag(Rf_length(_params));
if (!dim) {
Rf_error("Expected parameter vector of length `p (p + 1) / 2` where"
" `p` is the dimension of the random variable");
}
params = REAL(_params);
}
// Determin the method to use and validate if possible
const int use_MC = Rf_asLogical(_use_MC);
if (use_MC == NA_LOGICAL) {
Rf_error("Invalid 'use_MC' value, expected ether TRUE or FALSE");
}
// Allocate result vector
SEXP _M2 = PROTECT(Rf_allocVector(REALSXP, dim * (dim + 1) / 2));
++protect_count;
// asside computed zero event probability (inverse partition function), the
// scaling factor for the Ising model p.m.f.
double prob_0 = -1.0;
if (use_MC) {
// Convert and validate arguments for the Monte-Carlo methods
const size_t nr_samples = asUnsigned(_nr_samples);
if (nr_samples == 0 || nr_samples == NA_UNSIGNED) {
Rf_error("Invalid 'nr_samples' value, expected pos. integer");
}
const size_t warmup = asUnsigned(_warmup);
if (warmup == NA_UNSIGNED) {
Rf_error("Invalid 'warmup' value, expected non-negative integer");
}
const size_t nr_threads = asUnsigned(_nr_threads);
if (nr_threads == 0 || nr_threads > 256) {
Rf_error("Invalid 'nr_thread' value");
}
if (nr_threads == 1) {
// Single threaded Monte-Carlo method
prob_0 = ising_m2_MC(nr_samples, warmup, dim, params, REAL(_M2));
} else {
// Multi-Threaded Monte-Carlo method if provided, otherwise use
// the single threaded version with a warning
#ifdef __STDC_NO_THREADS__
Rf_warning("Multi-Threading NOT supported, using fallback.");
prob_0 = ising_m2_MC(nr_samples, warmup, dim, params, REAL(_M2));
#else
prob_0 = ising_m2_MC_thrd(
nr_samples, warmup, nr_threads,
dim, params, REAL(_M2)
);
#endif
}
} else {
// Exact method (ignore other arguments), only validate dimension
if (25 < dim) {
Rf_error("Dimension '%d' too big for exact method (max 24)", dim);
}
// and call the exact method
prob_0 = ising_m2_exact(dim, params, REAL(_M2));
}
// Set log-lokelihood as an attribute to the computed second moment
SEXP _prob_0 = PROTECT(Rf_ScalarReal(prob_0));
++protect_count;
Rf_setAttrib(_M2, Rf_install("prob_0"), _prob_0);
// release SEPXs to the garbage collector
UNPROTECT(protect_count);
return _M2;
}

View File

@ -0,0 +1,72 @@
#ifndef INCLUDE_GUARD_ISING_SAMPLE_H
#define INCLUDE_GUARD_ISING_SAMPLE_H
#include "R_api.h"
#include "int_utils.h"
#include "ising_MCMC.h"
// .Call interface to draw from sample from the Ising model
extern SEXP R_ising_sample(SEXP _nr_samples, SEXP _params, SEXP _warmup) {
// Counts number of protected SEXP's to give them back to the garbage collector
size_t protect_count = 0;
// Parse and validate arguments
const size_t nr_samples = asUnsigned(_nr_samples);
if (nr_samples == 0 || nr_samples == NA_UNSIGNED) {
Rf_error("Invalid 'nr_samples' value, expected pos. integer");
}
const size_t warmup = asUnsigned(_warmup);
if (warmup == NA_UNSIGNED) {
Rf_error("Invalid 'warmup' value, expected non-negative integer");
}
// Determin parameter mode (natural parameter vector or symmetric matrix)
// Ether `m` for "Matrix" or `v` for "Vector"
const char param_type = Rf_isMatrix(_params) ? 'm' : 'v';
// In case of matrix parameters check for square matrix
if (param_type == 'm' && (Rf_nrows(_params) != Rf_ncols(_params))) {
Rf_error("Invalid 'params' value, exected square matrix");
}
// Get problem dimension from parameter size
const size_t dim = (param_type == 'm')
? Rf_nrows(_params)
: invTriag(Rf_length(_params));
if (!dim) {
Rf_error("Error determining dimension.");
}
// Ensure parameters are numeric
if (!Rf_isReal(_params)) {
_params = PROTECT(Rf_coerceVector(_params, REALSXP));
++protect_count;
}
double* params = REAL(_params);
// Allocate result sample
SEXP _X = PROTECT(Rf_allocMatrix(INTSXP, dim, nr_samples));
++protect_count;
int* X = INTEGER(_X);
// Call appropriate sampling routine for every sample to generate
GetRNGstate();
if (param_type == 'm') {
for (size_t sample = 0; sample < nr_samples; ++sample) {
ising_mcmc_mat(warmup, dim, dim, params, &X[sample * dim]);
}
} else {
for (size_t sample = 0; sample < nr_samples; ++sample) {
ising_mcmc_vech(warmup, dim, params, &X[sample * dim]);
}
}
PutRNGstate();
// Release protected SEXPs to the garbage collector
UNPROTECT(protect_count);
return _X;
}
#endif /* INCLUDE_GUARD_ISING_SAMPLE_H */

View File

@ -8,6 +8,51 @@
#define FCONE
#endif
void mcrossprod(
const int rank,
const double* A, const int* dimA,
const double* B, const int* dimB,
const int mode,
double* C
) {
// the strides
// `stride[0] <- prod(dim(A)[seq_len(mode - 1)])`
// `stride[1] <- dim(A)[mode]`
// `stride[2] <- prod(dim(A)[-seq_len(mode)])`
// Note: Middle stride is ignored (to be consistent with sym version)
int stride[3] = {1, 0, 1};
for (int i = 0; i < rank; ++i) {
int size = dimA[i];
stride[0] *= (i < mode) ? size : 1;
stride[2] *= (i > mode) ? size : 1;
}
// employ BLAS dgemm (Double GEneralized Matrix Matrix) operation
// (C = alpha op(A) op(A) + beta C, op is the identity of transposition)
const double zero = 0.0;
const double one = 1.0;
if (mode == 0) {
// mode 1: special case C = A_(1) B_(1)^T
// C = 1 A B^T + 0 C
F77_CALL(dgemm)("N", "T", &dimA[mode], &dimB[mode], &stride[2],
&one, A, &dimA[mode], B, &dimB[mode],
&zero, C, &dimA[mode] FCONE FCONE);
} else {
// Other modes writen as accumulated sum of matrix products
// initialize C to zero
memset(C, 0, dimA[mode] * dimB[mode] * sizeof(double));
// Sum over all modes > mode
for (int i2 = 0; i2 < stride[2]; ++i2) {
// C = 1 A^T B + 1 C
F77_CALL(dgemm)("T", "N", &dimA[mode], &dimB[mode], &stride[0],
&one, &A[i2 * stride[0] * dimA[mode]], &stride[0],
&B[i2 * stride[0] * dimB[mode]], &stride[0],
&one, C, &dimA[mode] FCONE FCONE);
}
}
}
/**
* Tensor Mode Crossproduct
*
@ -21,72 +66,58 @@
* @param B multi-dimensional array
* @param m mode index (1-indexed)
*/
extern SEXP mcrossprod(SEXP A, SEXP B, SEXP m) {
extern SEXP R_mcrossprod(SEXP A, SEXP B, SEXP m) {
// get zero indexed mode
int mode = asInteger(m) - 1;
// get dimension attributes
SEXP dimA = getAttrib(A, R_DimSymbol);
SEXP dimB = getAttrib(B, R_DimSymbol);
// Check if both `A` and `B` are real-valued
if (!isReal(A) || !isReal(B)) {
error("Type missmatch, both `A` and `B` must be real-valued");
}
// get dimension attributes
SEXP dimA_sexp = getAttrib(A, R_DimSymbol);
SEXP dimB_sexp = getAttrib(B, R_DimSymbol);
// dimension type validation
if (!isInteger(dimA_sexp) || !isInteger(dimB_sexp)) {
error("Dimensions must be integer vectors");
}
// validate dimensions
int rank = length(dimA_sexp);
if (rank != length(dimB_sexp)) {
error("Dimension mismatch");
}
// validate mode (0-indexed, must be smaller than the tensor order)
if (mode < 0 || length(dimA) <= mode || length(dimB) <= mode) {
if (mode < 0 || rank <= mode) {
error("Illegal mode");
}
// the strides
// `stride[0] <- prod(dim(A)[seq_len(mode - 1)])`
// `stride[1] <- dim(A)[mode]`
// `stride[2] <- prod(dim(A)[-seq_len(mode)])`
// Note: Middle stride is ignored (to be consistent with sym version)
int stride[3] = {1, 0, 1};
for (int i = 0; i < length(dimA); ++i) {
int size = INTEGER(dimA)[i];
stride[0] *= (i < mode) ? size : 1;
stride[2] *= (i > mode) ? size : 1;
// check margin matching of `A` and `B`
if (i != mode && size != INTEGER(dimB)[i]) {
error("Dimension missmatch");
}
// get raw pointers to dimensions
int* dimA = INTEGER(coerceVector(dimA_sexp, INTSXP));
int* dimB = INTEGER(coerceVector(dimB_sexp, INTSXP));
// finaly, check for `A` and `B` dimensions to match
for (int i = 0; i < rank; ++i) {
if (i != mode && dimA[i] != dimB[i]) {
error("Dimension mismatch");
}
}
// Result dimensions
int nrowC = INTEGER(dimA)[mode];
int ncolC = INTEGER(dimB)[mode];
// create response matrix C
SEXP C = PROTECT(allocMatrix(REALSXP, nrowC, ncolC));
SEXP C = PROTECT(allocMatrix(REALSXP, dimA[mode], dimB[mode]));
// raw data access pointers
double* a = REAL(A);
double* b = REAL(B);
double* c = REAL(C);
// Call C mode crossprod subroutine
mcrossprod(
rank, // tensor rank of both `A` and `B`
REAL(A), dimA, // mem. addr. of A, dim(A)
REAL(B), dimB, // mem. addr. of B, dim(B)
mode, // the crossproduct mode to compute
REAL(C) // return value memory addr.
);
// employ BLAS dgemm (Double GEneralized Matrix Matrix) operation
// (C = alpha op(A) op(A) + beta C, op is the identity of transposition)
const double zero = 0.0;
const double one = 1.0;
if (mode == 0) {
// mode 1: special case C = A_(1) B_(1)^T
// C = 1 A B^T + 0 C
F77_CALL(dgemm)("N", "T", &nrowC, &ncolC, &stride[2],
&one, a, &nrowC, b, &ncolC,
&zero, c, &nrowC FCONE FCONE);
} else {
// Other modes writen as accumulated sum of matrix products
// initialize C to zero
memset(c, 0, nrowC * ncolC * sizeof(double));
// Sum over all modes > mode
for (int i2 = 0; i2 < stride[2]; ++i2) {
// C = 1 A^T B + 1 C
F77_CALL(dgemm)("T", "N", &nrowC, &ncolC, &stride[0],
&one, &a[i2 * stride[0] * nrowC], &stride[0],
&b[i2 * stride[0] * ncolC], &stride[0],
&one, c, &nrowC FCONE FCONE);
}
}
// release C to grabage collector
// release C to garbage collector
UNPROTECT(1);
return C;
@ -104,7 +135,7 @@ extern SEXP mcrossprod(SEXP A, SEXP B, SEXP m) {
* @param A multi-dimensional array
* @param m mode index (1-indexed)
*/
extern SEXP mcrossprod_sym(SEXP A, SEXP m) {
extern SEXP R_mcrossprod_sym(SEXP A, SEXP m) {
// get zero indexed mode
int mode = asInteger(m) - 1;

160
tensorPredictors/src/mlm.c Normal file
View File

@ -0,0 +1,160 @@
#include "R_api.h"
#include "ttm.h"
int mlm(
/* options */ const int* trans, const int* modes, const int nrhs,
/* dims */ const int* dimA, const int* dimC, const int ord,
/* scalars */ const double alpha,
/* tensor */ const double* A,
/* matrices */ const double** Bs, const int* ldBs,
/* scalar */ const double beta,
/* tensor */ double* C,
double* work_mem
) {
// Compute total size of `A`, `C` and required working memory
int sizeA = 1; // `prod(dim(A))`
int sizeC = 1; // `prod(dim(C))`
int work_size = 2; // `2 * prod(pmax(dim(A), dim(C)))` (`+ ord` NOT included)
for (int i = 0; i < ord; ++i) {
sizeA *= dimA[i];
sizeC *= dimC[i];
work_size *= dimA[i] < dimC[i] ? dimC[i] : dimA[i];
}
// In requesting working memory size stop here and return size
if (work_mem == NULL) {
return work_size + ord;
}
// Copy dimensions of A to intermediate temp. dim
int* dimT = (int*)(work_mem + work_size); // `work_mem` is `ord` longer
memcpy(dimT, dimA, ord * sizeof(int));
// Setup work memory hooks (two swapable blocks)
double* tmp1 = work_mem;
double* tmp2 = work_mem + (work_size >> 1);
// Multi Linear Multiplication is an iterated application of TTM
for (int i = 0; i < nrhs; ++i) {
// Get current `B` dimensions (only implicitly given)
int nrowB = dimC[modes[i]];
int ncolB = dimA[modes[i]];
if (trans && trans[i]) {
nrowB = dimA[modes[i]];
ncolB = dimC[modes[i]];
}
// Tensor Times Matrix (`modes[i]` mode products)
ttm(trans ? trans[i] : 0, modes[i], dimT, ord, nrowB, ncolB,
1.0, i ? tmp2 : A, Bs[i], ldBs ? ldBs[i] : dimC[modes[i]], 0.0,
tmp1);
// Update intermediate temp dim
dimT[modes[i]] = dimC[modes[i]];
// Swap tmp1 <-> tmp2
double* tmp3 = tmp1; tmp1 = tmp2; tmp2 = tmp3;
}
// dAXPY (a x + y) with `x = tmp2` and `y = beta C`
if (beta != 0.0) {
for (int i = 0; i < sizeC; ++i) {
C[i] *= beta;
}
} else {
memset(C, 0, sizeC * sizeof(double));
}
axpy(sizeC, alpha, tmp2, 1, C, 1);
return 0;
}
/**
* Multi Linear Multiplication (`R` binding)
*/
extern SEXP R_mlm(SEXP A, SEXP Bs, SEXP ms, SEXP ops) {
// get dimension attribute of A
SEXP dimA = Rf_getAttrib(A, R_DimSymbol);
int ord = Rf_length(dimA);
// Check if `A` is a real valued tensor
if (!Rf_isReal(A) || Rf_isNull(dimA)) {
Rf_error("Param. `A` need to be a real valued array");
}
// Validate that `Bs` is a list
if (!Rf_isNewList(Bs)) {
Rf_error("Param. `Bs` need to be a list of matrices");
}
if (!Rf_isInteger(ms) || !Rf_isLogical(ops)) {
Rf_error("Param. type missmatch, expected modes as ints and ops logical");
}
// Number of `B` matrices (Nr of Right Hand Side objects)
int nrhs = Rf_length(Bs);
// further parameter validations
if ((nrhs != Rf_length(ms)) || (nrhs != Rf_length(ops))) {
Rf_error("Dimension missmatch (Params length differ)");
}
// Get modes and operations for Bs (transposed or not)
int* modes = (int*)R_alloc(nrhs, sizeof(int)); // 0-indexed version of `ms`
int* trans = INTEGER(ops);
// Compute result dimensions `dim(C)` while extracting `B` data pointers
SEXP dimC = PROTECT(Rf_duplicate(dimA));
const double** bs = (const double**)R_alloc(nrhs, sizeof(double*));
for (int i = 0; i < nrhs; ++i) {
// Extract right hand side matrices (`B` matrices from list)
SEXP B = VECTOR_ELT(Bs, i);
if (!(Rf_isMatrix(B) && Rf_isReal(B))) {
UNPROTECT(1);
Rf_error("Param. `Bs` need to be a list of real matrices");
}
int* dimB = INTEGER(Rf_getAttrib(B, R_DimSymbol));
bs[i] = REAL(B);
// Convert 1-indexed modes to be 0-indexed
modes[i] = INTEGER(ms)[i] - 1;
// Check if mode is out or range (indexing a non-existing mode of `A`)
if (!((0 <= modes[i]) && (modes[i] < ord))) {
UNPROTECT(1);
Rf_error("%d'th mode (%d) out of range", i + 1, modes[i] + 1);
}
// Check if `i`th mode of `A` matches corresponding `B` dimension
if (INTEGER(dimA)[modes[i]] != dimB[!trans[i]]) {
UNPROTECT(1);
Rf_error("%d'th mode (%d) dimension missmatch", i + 1, modes[i] + 1);
}
INTEGER(dimC)[modes[i]] = dimB[!!trans[i]];
}
// Now, compute `C`s size `prod(dim(C))`
int sizeC = 1;
for (int i = 0; i < ord; ++i) {
sizeC *= INTEGER(dimC)[i];
}
// Allocate response `C`
SEXP C = PROTECT(Rf_allocVector(REALSXP, sizeC));
Rf_setAttrib(C, R_DimSymbol, dimC);
// allocate working memory size
int work_size = mlm(trans, modes, nrhs, INTEGER(dimA), INTEGER(dimC), ord,
1.0, REAL(A), bs, NULL, 0.0, REAL(C), NULL);
double* work_memory = (double*)R_alloc(work_size, sizeof(double));
// Compute Multi-Linear Multiplication (call subroutine)
(void)mlm(trans, modes, nrhs, INTEGER(dimA), INTEGER(dimC), ord,
1.0, REAL(A), bs, NULL, 0.0, REAL(C), work_memory);
// release C to the hands of the garbage collector
UNPROTECT(2);
return C;
}

View File

@ -0,0 +1,35 @@
#ifndef INCLUDE_GUARD_MLM_H
#define INCLUDE_GUARD_MLM_H
/**
* Multi Linear Multiplication
*
* C = alpha A x_modes[0] op(Bs[0]) ... x_modes[nrhs] op(Bs[nrhs]) + beta C
*
* @param trans boolean vector of length `nrhs` indicating if `i`th RHS matrix
* is to be transposed. That is `op(Bs[i])` is the transposed of `Bs[i]` iff
* `trans[i]` is true, otherwise no-op on `Bs[i]`. Can be `NULL`, then `op` is
* always the identity.
* @param modes integer vector of length `nrhs` specifying the product modes.
*
* @todo TODO: continue doc. !!!
*
* @param alpha scaling factor
* @param beta scaling factor
* @param C output memory addr.
*
* @param work_mem NULL or temporary working memory of size
* `2 * prod(pmax(dim(A), dim(C))) + ord`
*/
int mlm(
/* options */ const int* trans, const int* modes, const int nrhs,
/* dims */ const int* dimA, const int* dimC, const int ord,
/* scalars */ const double alpha,
/* tensor */ const double* A,
/* matrices */ const double** Bs, const int* ldBs,
/* scalar */ const double beta,
/* tensor */ double* C,
double* work_mem
);
#endif /* INCLUDE_GUARD_MLM_H */

View File

@ -0,0 +1,37 @@
// /**
// * A sufficient Pseudo-Random-Number-Generators (PRNG) of the Xorshift family
// *
// * For single threaded operations the PRNG provided by `R` are prefered. But they
// * are _not_ thread save. The following is a simple PRNG usable in a multi-threaded
// * application.
// *
// * See TODO: ...https://en.wikipedia.org/wiki/Xorshift
// * SchachHoernchen
// */
// #ifndef INCLUDE_GUARD_RANDOM_H
// #define INCLUDE_GUARD_RANDOM_H
// #include <stdint.h> // uint32_t, uint64_t
// static inline uint64_t rot64(uint64_t val, int shift) {
// return (val << shift) | (val >> (64 - shift));
// }
// // PRNG of the Xorshift family
// // @note the least significant 32 bits are not reliable, use most significant 32 bits
// static inline uint64_t rand_u64(uint64_t seed[4]) {
// uint64_t e = seed[0] - rot64(seed[1], 7);
// seed[0] = seed[1] ^ rot64(seed[1], 13);
// seed[1] = seed[2] + rot64(seed[3], 37);
// seed[2] = seed[3] + e;
// seed[3] = e + seed[0];
// return seed[3];
// }
// static inline double unif_rand_u64(uint64_t seed[4]) {
// return ((double)(rand_u64(seed) >> 32)) / (double)(-(uint32_t)1);
// }
// #endif

112
tensorPredictors/src/svd.c Normal file
View File

@ -0,0 +1,112 @@
// The need for `USE_FC_LEN_T` and `FCONE` is due to a Fortran character string
// to C incompatibility. See: Writing R Extentions: 6.6.1 Fortran character strings
#define USE_FC_LEN_T
#include <R.h>
#include <Rinternals.h>
#include <R_ext/BLAS.h>
#include <R_ext/Lapack.h>
#ifndef FCONE
#define FCONE
#endif
// Singular Value Decomposition
// @note assumes passed arguments to be "clean", "properly checked"
int svd(
const int p, const int q,
double* A, const int ldA,
double* d,
double* U, const int ldU,
double* Vt, const int ldVt,
double* work_mem, int work_size, int* i_work // if unknown set to 0, -1, 0
) {
// LAPACK information return variable (in case of an error, LAPACK sets this
// to a non-zero value)
int info = 0;
// check if work memory is supplied, if _not_ query for appropriate size and
// allocate the memory
if (!work_mem || work_size < 1 || !i_work) {
// create (declare) work memory
i_work = (int*)R_alloc(8 * (p < q ? p : q), sizeof(int));
double tmp; // variable to store work memory size query result
work_mem = &tmp; // point to `tmp` as it will contain the result of
// the work memory size query
// request appropriate work memory size
F77_CALL(dgesdd)(
"S", &p, &q, A, &ldA, d, U, &ldU, Vt, &ldVt,
work_mem, &work_size, i_work, &info
FCONE);
// allocate work memory
work_size = (int)tmp; // "read" work memory size query result
work_mem = (double*)R_alloc(work_size, sizeof(double));
// in case of an error, return error code stored in `info`
if (info) { return info; }
}
// actual SVD computation
F77_CALL(dgesdd)(
"S", &p, &q, A, &ldA, d, U, &ldU, Vt, &ldVt,
work_mem, &work_size, i_work, &info
FCONE);
return info;
}
// Singular Valued Decomposition (R binding)
// @note educational purpose, same as `La.svd`
extern SEXP R_svd(SEXP A) {
// Check if we got a real-valued matrix
if (!isMatrix(A) || !isReal(A)) {
error("Require a real-valued matrix");
}
// extract matrix dimensions
int* dims = INTEGER(coerceVector(getAttrib(A, R_DimSymbol), INTSXP));
int p = dims[0]; // = nrow(A)
int q = dims[1]; // = ncol(A)
int m = p < q ? p : q; // = min(p, q) = min(dim(A))
// Check if dimensions are not degenerate
if (p < 1 || q < 1) {
error("Expected positive matrix dimensions");
}
// create R objects to store the SVD result `A = U diag(d) V^T`
SEXP U = PROTECT(allocMatrix(REALSXP, p, m));
SEXP d = PROTECT(allocVector(REALSXP, m));
SEXP Vt = PROTECT(allocMatrix(REALSXP, m, q));
// Call C SVD routine
int info = svd(
p, q, // nrow(A), ncol(A)
REAL(A), p, // mem. addr. of A, ldA (leading dimension of A)
REAL(d), // mem. addr. of d
REAL(U), p, // mem. addr. of U, ldU (leading dimension of U)
REAL(Vt), m, // mem. addr. of V^T, ldVt (leading dimension of V^T)
0, -1, 0 // work mem. pointer, work mem. size and int work mem
); // set to `0, -1, 0` to indicate "unknown"
// Check LAPACK info
if (info) {
error("error code %d from LAPACK routine 'dgesdd'", info);
}
// Create R list containint SVD components
SEXP result = PROTECT(allocVector(VECSXP, 3));
SEXP names = PROTECT(allocVector(STRSXP, 3));
SET_VECTOR_ELT(result, 0, d);
SET_VECTOR_ELT(result, 1, U);
SET_VECTOR_ELT(result, 2, Vt);
SET_STRING_ELT(names, 0, mkChar("d"));
SET_STRING_ELT(names, 1, mkChar("u"));
SET_STRING_ELT(names, 2, mkChar("vt"));
setAttrib(result, R_NamesSymbol, names);
// Release created objects to the garbage collector
UNPROTECT(5);
return result;
}

View File

@ -1,104 +1,48 @@
// The need for `USE_FC_LEN_T` and `FCONE` is due to a Fortran character string
// to C incompatibility. See: Writing R Extentions: 6.6.1 Fortran character strings
#define USE_FC_LEN_T
// Disables remapping of R API functions from `Rf_<name>` or `R_<name>`
#define R_NO_REMAP
#include <R.h>
#include <Rinternals.h>
#include <R_ext/BLAS.h>
#ifndef FCONE
#define FCONE
#endif
#include "ttm.h"
/**
* Tensor Times Matrix a.k.a. Mode Product
*
* @param A multi-dimensional array
* @param B matrix
* @param m mode index (1-indexed)
* @param op boolean if `B` is transposed
*/
extern SEXP ttm(SEXP A, SEXP B, SEXP m, SEXP op) {
void ttm(
const int transB, const int mode,
const int* dimA, const int ordA, const int nrowB, const int ncolB,
const double alpha,
const double* A,
const double* B, const int ldB, // TODO: ldB is IGNORED!!!
const double beta,
double* C
) {
// get zero indexed mode
const int mode = Rf_asInteger(m) - 1;
// get dimension attribute of A
SEXP dim = Rf_getAttrib(A, R_DimSymbol);
// operation on `B` (transposed or not)
const int trans = Rf_asLogical(op);
// as well as `B`s dimensions
const int nrow = Rf_nrows(B);
const int ncol = Rf_ncols(B);
// validate mode (mode must be smaller than the nr of dimensions)
if (mode < 0 || Rf_length(dim) <= mode) {
Rf_error("Illegal mode");
}
// and check if B is a matrix of non degenetate size
if (!Rf_isMatrix(B)) {
Rf_error("Expected a matrix as second argument");
}
if (!Rf_nrows(B) || !Rf_ncols(B)) {
Rf_error("Zero dimension detected");
}
// check matching of dimensions
if (INTEGER(dim)[mode] != (trans ? nrow : ncol)) {
Rf_error("Dimension missmatch");
}
// calc nr of response elements `prod(dim(A)[-mode]) * ncol(A)` (size of C),
int sizeC = 1;
// and the strides
// Strides are the "leading" and "trailing" dimensions of the matricized
// tensor `A` in the following matrix-matrix multiplications
// `stride[0] <- prod(dim(A)[seq_len(mode - 1)])`
// `stride[1] <- dim(A)[mode]`
// `stride[2] <- prod(dim(A)[-seq_len(mode)])`
int stride[3] = {1, INTEGER(dim)[mode], 1};
for (int i = 0; i < Rf_length(dim); ++i) {
int size = INTEGER(dim)[i];
// check for non-degenetate dimensions
if (!size) {
Rf_error("Zero dimension detected");
}
sizeC *= (i == mode) ? (trans ? ncol : nrow) : size;
stride[0] *= (i < mode) ? size : 1;
stride[2] *= (i > mode) ? size : 1;
int stride[3] = {1, dimA[mode], 1};
for (int i = 0; i < ordA; ++i) {
stride[0] *= (i < mode) ? dimA[i] : 1;
stride[2] *= (i > mode) ? dimA[i] : 1;
}
// create response object C
SEXP C = PROTECT(Rf_allocVector(REALSXP, sizeC));
// raw data access pointers
double* a = REAL(A);
double* b = REAL(B);
double* c = REAL(C);
// Tensor Times Matrix / Mode Product
const double zero = 0.0;
const double one = 1.0;
if (mode == 0) {
// mode 1: (A x_1 op(B))_(1) = op(B) A_(1) as a single Matrix-Matrix
// multiplication
F77_CALL(dgemm)(trans ? "T" : "N", "N",
(trans ? &ncol : &nrow), &stride[2], &stride[1], &one,
b, &nrow, a, &stride[1],
&zero, c, (trans ? &ncol : &nrow)
// mode 1: C = alpha (A x_1 op(B))_(1) + beta C
// = alpha op(B) A_(1) + beta C
// as a single Matrix-Matrix multiplication
F77_CALL(dgemm)(transB ? "T" : "N", "N",
(transB ? &ncolB : &nrowB), &stride[2], &stride[1], &alpha,
B, &nrowB, A, &stride[1],
&beta, C, (transB ? &ncolB : &nrowB)
FCONE FCONE);
} else {
// Other modes can be written as blocks of matrix multiplications
// (A x_m op(B))_(m)' = A_(m)' op(B)'
// C_:,:,i2 = alpha (A x_m op(B))_(m)' + beta C_:,:,i2
// = alpha A_(m)' op(B)' + beta C_:,:,i2
for (int i2 = 0; i2 < stride[2]; ++i2) {
F77_CALL(dgemm)("N", trans ? "N" : "T",
&stride[0], (trans ? &ncol : &nrow), &stride[1], &one,
&a[i2 * stride[0] * stride[1]], &stride[0], b, &nrow,
&zero, &c[i2 * stride[0] * (trans ? ncol : nrow)], &stride[0]
F77_CALL(dgemm)("N", transB ? "N" : "T",
&stride[0], (transB ? &ncolB : &nrowB), &stride[1], &alpha,
&A[i2 * stride[0] * stride[1]], &stride[0], B, &nrowB,
&beta, &C[i2 * stride[0] * (transB ? ncolB : nrowB)], &stride[0]
FCONE FCONE);
}
}
/*
// (reference implementation)
// Tensor Times Matrix / Mode Product for `op(B) == B`
@ -114,13 +58,76 @@ extern SEXP ttm(SEXP A, SEXP B, SEXP m, SEXP op) {
}
}
*/
}
/**
* Tensor Times Matrix a.k.a. Mode Product
*
* @param A multi-dimensional array
* @param B matrix
* @param m mode index (1-indexed)
* @param op boolean if `B` is transposed
*/
extern SEXP R_ttm(SEXP A, SEXP B, SEXP m, SEXP op) {
// get zero indexed mode
const int mode = Rf_asInteger(m) - 1;
// get dimension attribute of A
SEXP dimA = Rf_getAttrib(A, R_DimSymbol);
// operation on `B` (transposed or not)
const int transB = Rf_asLogical(op);
// as well as `B`s dimensions
const int nrowB = Rf_nrows(B);
const int ncolB = Rf_ncols(B);
// validate mode (mode must be smaller than the nr of dimensions)
if (mode < 0 || Rf_length(dimA) <= mode) {
Rf_error("Illegal mode");
}
// and check if B is a matrix of non degenetate size
if (!Rf_isMatrix(B)) {
Rf_error("Expected a matrix as second argument");
}
if (!Rf_nrows(B) || !Rf_ncols(B)) {
Rf_error("Zero dimension detected");
}
// check matching of dimensions
if (INTEGER(dimA)[mode] != (transB ? nrowB : ncolB)) {
Rf_error("Dimension missmatch");
}
// calc nr of response elements (size of C)
// `prod(dim(C)) = prod(dim(A)[-mode]) * nrow(if(transB) t(B) else B)`
int sizeC = 1;
for (int i = 0; i < Rf_length(dimA); ++i) {
int size = INTEGER(dimA)[i];
// check for non-degenetate dimensions
if (!size) {
Rf_error("Zero dimension detected");
}
sizeC *= (i == mode) ? (transB ? ncolB : nrowB) : size;
}
// create response object C
SEXP C = PROTECT(Rf_allocVector(REALSXP, sizeC));
// Tensor Times Matrix / Mode Product
ttm(transB, mode,
INTEGER(dimA), Rf_length(dimA), nrowB, ncolB,
1.0, REAL(A), REAL(B), nrowB, 0.0, REAL(C));
// finally, set result dimensions
SEXP newdim = PROTECT(Rf_allocVector(INTSXP, Rf_length(dim)));
for (int i = 0; i < Rf_length(dim); ++i) {
INTEGER(newdim)[i] = (i == mode) ? (trans ? ncol : nrow) : INTEGER(dim)[i];
SEXP dimC = PROTECT(Rf_allocVector(INTSXP, Rf_length(dimA)));
for (int i = 0; i < Rf_length(dimA); ++i) {
INTEGER(dimC)[i] = (i == mode) ? (transB ? ncolB : nrowB) : INTEGER(dimA)[i];
}
Rf_setAttrib(C, R_DimSymbol, newdim);
Rf_setAttrib(C, R_DimSymbol, dimC);
// release C to the hands of the garbage collector
UNPROTECT(2);

View File

@ -0,0 +1,21 @@
#ifndef INCLUDE_GUARD_TTM_H
#define INCLUDE_GUARD_TTM_H
#include "R_api.h"
/**
* Tensor Times Matrix Subroutine
*
* @attention Assumes all parameters to be correct!
*/
void ttm(
/* options */ const int transB, const int mode,
/* dims */ const int* dimA, const int ordA, const int nrowB, const int ncolB,
/* scalar */ const double alpha,
/* tensor */ const double* A,
/* matrix */ const double* B, const int ldB,
/* scalar */ const double beta,
/* tensor */ double* C
);
#endif /* INCLUDE_GUARD_TTM_H */