dev

2023-11-14 14:35:43 +01:00 · 2023-11-14 14:35:43 +01:00 · 90cd46e209
parent 40132c2565
commit 90cd46e209
48 changed files with 2852 additions and 1353 deletions
--- a/LaTeX/GMLM.tex
+++ b/LaTeX/GMLM.tex
@ -90,10 +90,11 @@
 %%% Custom operators with ether one or two arguments (limits)
 \makeatletter
 %%% Multi-Linear Multiplication
 % $\mlm_{k \in [r]}$ or $\mlm_{k = 1}^{r}$ (lower limit MUST be the first!)
 % Save first argument as \arg@one
-\def\mlm#1{\def\arg@one{#1}\futurelet\next\mlm@i}
+\def\mlm_#1{\def\arg@one{#1}\futurelet\next\mlm@i}
 % Check for second argument
-\def\mlm@i{\ifx\next\bgroup\expandafter\mlm@two\else\expandafter\mlm@one\fi}
+\def\mlm@i{\ifx\next^\expandafter\mlm@two\else\expandafter\mlm@one\fi}
 % specialization for one or two arguments, both versions use saved first argument
 \def\mlm@one{\mathchoice%
    {\operatorname*{\scalerel*{\times}{\bigotimes}}_{\makebox[0pt][c]{$\scriptstyle \arg@one$}}}%
@ -101,14 +102,31 @@
    {\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}}%
    {\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}}%
 }
-% this commands single argument is the second argument of \mlm
+% this commands single argument is the second argument of \mlm, it gobbles the `^`
-\def\mlm@two#1{\mathchoice%
+\def\mlm@two^#1{\mathchoice%
    {\operatorname*{\scalerel*{\times}{\bigotimes}}_{\makebox[0pt][c]{$\scriptstyle \arg@one$}}^{\makebox[0pt][c]{$\scriptstyle #1$}}}%
    {\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}^{#1}}%
    {\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}^{#1}}%
    {\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}^{#1}}%
 }
 %%% Big Circle (Iterated Outer Product)
 \def\outer#1{\def\arg@one{#1}\futurelet\next\outer@i}
 \def\outer@i{\ifx\next\bgroup\expandafter\outer@two\else\expandafter\outer@one\fi}
 \def\outer@one{\mathchoice%
    {\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\makebox[0pt][c]{$\scriptstyle \arg@one$}}}%
    {\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}}%
    {\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}}%
    {\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}}%
 }
 \def\outer@two#1{\mathchoice%
    {\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\makebox[0pt][c]{$\scriptstyle \arg@one$}}^{\makebox[0pt][c]{$\scriptstyle #1$}}}%
    {\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}^{#1}}%
    {\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}^{#1}}%
    {\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}^{#1}}%
 }
 %%% Big Kronecker Product (with overflowing limits)
 % Save first argument as \arg@one
 \def\bigkron#1{\def\arg@one{#1}\futurelet\next\bigkron@i}
@ -134,17 +152,15 @@
 \newcommand{\algorithmicbreak}{\textbf{break}}
 \newcommand{\Break}{\State \algorithmicbreak}
 \begin{document}
 \maketitle
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \begin{abstract}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-    We consider regression and classification for \textit{general} response and tensor-valued predictors (multi dimensional arrays) and propose a \textit{novel formulation} for sufficient dimension reduction. Assuming the distribution of the tensor-valued predictors given the response is in the quadratic exponential family, we model the natural parameter as a multi-linear function of the respons.
+    We consider regression and classification for \textit{general} response and tensor-valued predictors (multi dimensional arrays) and propose a \textit{novel formulation} for sufficient dimension reduction. Assuming the distribution of the tensor-valued predictors given the response is in the quadratic exponential family, we model the natural parameter as a multi-linear function of the response.
-    This allows per-axis reductions that drastically reduce the total number of parameters for higher order tensor-valued predictors. We derive maximum likelihood estimates for the sufficient dimension reduction and a computationally efficient estimation algorithm which leveraes the tensor structure. The performance of the method is illustrated via simulations and real world examples are provided.
+    This allows per-axis reductions that drastically reduce the total number of parameters for higher order tensor-valued predictors. We derive maximum likelihood estimates for the sufficient dimension reduction and a computationally efficient estimation algorithm which leverages the tensor structure. The performance of the method is illustrated via simulations and real world examples are provided.
 \end{abstract}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@ -256,23 +272,23 @@ A straight forward idea for parameter estimation is to use Gradient Descent. For
 \begin{algorithm}[ht]
    \caption{\label{alg:NAGD}Nesterov Accelerated Gradient Descent}
    \begin{algorithmic}[1]
-        \State Objective: $l(\Theta \mid \ten{X}, \ten{F}_y)$
+        \State Objective: $l(\mat{\theta} \mid \ten{X}, \ten{F}_y)$
        \State Arguments: Order $r + 1$ tensors $\ten{X}$, $\ten{F}$
-        \State Initialize: Parameters $\Theta^{(0)}$, $0 < c, \delta^{(1)}$ and $0 < \gamma < 1$
+        \State Initialize: Parameters $\mat{\theta}^{(0)}$, $0 < c, \delta^{(1)}$ and $0 < \gamma < 1$
        \\
        \State $t \leftarrow 1$
            \Comment{step counter}
-        \State $\mat{\Theta}^{(1)} \leftarrow \mat{\Theta}^{(0)}$
+        \State $\mat{\theta}^{(1)} \leftarrow \mat{\theta}^{(0)}$
            \Comment{artificial first step}
        \State $(m^{(0)}, m^{(1)}) \leftarrow (0, 1)$
            \Comment{momentum extrapolation weights}
        \\
        \Repeat \Comment{repeat untill convergence}
-            \State $\mat{M} \leftarrow \mat{\Theta}^{(t)} + \frac{m^{(t - 1)} - 1}{m^{(t)}}(\mat{\Theta}^{(t)} - \mat{\Theta}^{(t - 1)})$ \Comment{momentum extrapolation}
+            \State $\mat{M} \leftarrow \mat{\theta}^{(t)} + \frac{m^{(t - 1)} - 1}{m^{(t)}}(\mat{\theta}^{(t)} - \mat{\theta}^{(t - 1)})$ \Comment{momentum extrapolation}
            \For{$\delta = \gamma^{-1}\delta^{(t)}, \delta^{(t)}, \gamma\delta^{(t)}, \gamma^2\delta^{(t)}, ...$} \Comment{Line Search}
-                \State $\mat{\Theta}_{\text{temp}} \leftarrow \mat{M} + \delta \nabla_{\mat{\Theta}} l(\mat{M})$
+                \State $\mat{\theta}_{\text{temp}} \leftarrow \mat{M} + \delta \nabla_{\mat{\theta}} l(\mat{M})$
-                \If{$l(\mat{\Theta}_{\text{temp}}) \leq l(\mat{\Theta}^{(t - 1)}) - c \delta \|\nabla_{\mat{\Theta}} l(\mat{M})\|_F^2$} \Comment{Armijo Condition}
+                \If{$l(\mat{\theta}_{\text{temp}}) \leq l(\mat{\theta}^{(t - 1)}) - c \delta \|\nabla_{\mat{\theta}} l(\mat{M})\|_F^2$} \Comment{Armijo Condition}
-                    \State $\mat{\Theta}^{(t + 1)} \leftarrow \mat{\Theta}_{\text{temp}}$
+                    \State $\mat{\theta}^{(t + 1)} \leftarrow \mat{\theta}_{\text{temp}}$
                    \State $\delta^{(t + 1)} \leftarrow \delta$
                    \Break
                \EndIf
@ -399,7 +415,7 @@ $\ten{X}$ is a $2\times 3\times 5$ tensor, $y\in\{1, 2, ..., 6\}$ uniformly dist
 \begin{figure}[!ht]
    \centering
-    \includegraphics[width = \textwidth]{sim-normal-20221012.png}
+    \includegraphics[width = \textwidth]{images/sim-normal-20221012.png}
    \caption{\label{fig:sim-normal}Simulation Normal}
 \end{figure}
@ -407,7 +423,7 @@ $\ten{X}$ is a $2\times 3\times 5$ tensor, $y\in\{1, 2, ..., 6\}$ uniformly dist
 \begin{figure}[!ht]
    \centering
-    \includegraphics[width = \textwidth]{sim-ising-small-20221012.png}
+    \includegraphics[width = \textwidth]{images/sim-ising-small-20221012.png}
    \caption{\label{fig:sim-ising-small}Simulation Ising Small}
 \end{figure}
@ -433,7 +449,7 @@ where each individual block is given by
 For example $\mathcal{J}_{1,2} = -\frac{\partial l(\Theta)}{\partial\t{(\vec{\overline{\ten{\eta}}_1})}\partial(\vec{\mat{\alpha}_1})}$ and $\mathcal{J}_{2r + 1, 2r + 1} = -\H l(\mat{\Omega}_r)$.
 We start by restating the log-likelihood for a given single observation $(\ten{X}, \ten{Y})$ where $\ten{F}_y$ given by
 \begin{displaymath}
-    l(\mat{\Theta}) = \log h(\ten{X}) + c_1\big\langle\overline{\ten{\eta}}_1 + \ten{F}_{y}\mlm{k\in[r]}\mat{\alpha}_k, \ten{X}\big\rangle + c_2\big\langle\ten{X}\mlm{k\in[r]}\mat{\Omega}_k, \ten{X}\big\rangle - b(\mat{\eta}_{y})
+    l(\mat{\Theta}) = \log h(\ten{X}) + c_1\big\langle\overline{\ten{\eta}}_1 + \ten{F}_{y}\mlm_{k\in[r]}\mat{\alpha}_k, \ten{X}\big\rangle + c_2\big\langle\ten{X}\mlm_{k\in[r]}\mat{\Omega}_k, \ten{X}\big\rangle - b(\mat{\eta}_{y})
 \end{displaymath}
 with
 \begin{align*}
@ -496,10 +512,10 @@ Now we rewrite all the above differentials to extract the derivatives one at a t
 %
    \d l(\mat{\Omega}_j) &= c_2\Big(\langle\ten{X}\times_{k\in[r]\backslash j}\mat{\Omega}_k\times_j\d\mat{\Omega}_j, \ten{X}\rangle - \D b(\mat{\eta}_{y,2})\vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big)\Big) \\
        &= c_2 \t{(\vec{\ten{X}}\otimes\vec{\ten{X}} - (\ten{D}_2)_{([2r])})}\vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big) \\
-        &= c_2 (\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})} \\
+        &= c_2 (\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})} \\
-        &= c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\vec{\d\mat{\Omega}_j} \\
+        &= c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\vec{\d\mat{\Omega}_j} \\
-        &= c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\mat{D}_{p_j}\t{\mat{D}_{p_j}}\vec{\d\mat{\Omega}_j} \\
+        &= c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\mat{D}_{p_j}\t{\mat{D}_{p_j}}\vec{\d\mat{\Omega}_j} \\
-            &\qquad\Rightarrow \D l(\mat{\Omega}_j) = c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\mat{D}_{p_j}\t{\mat{D}_{p_j}}
+            &\qquad\Rightarrow \D l(\mat{\Omega}_j) = c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\mat{D}_{p_j}\t{\mat{D}_{p_j}}
 \end{align*}}%
 The next step is to identify the Hessians from the second differentials in a similar manner as befor.
 {\allowdisplaybreaks\begin{align*}
@ -509,62 +525,62 @@ The next step is to identify the Hessians from the second differentials in a sim
            \qquad{\color{gray} (p \times p)}
        \\
    &\d^2 l(\overline{\ten{\eta}}_1, \mat{\alpha}_j) \\
-        &= -c_1^2 \t{\vec(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j)}\mat{H}_{1,1}\vec{\d\overline{\ten{\eta}}_1} \\
+        &= -c_1^2 \t{\vec(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j)}\mat{H}_{1,1}\vec{\d\overline{\ten{\eta}}_1} \\
-        &= -c_1^2 \t{\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)})}\mat{K}_{p,(j)}\mat{H}_{1,1}\vec{\d\overline{\ten{\eta}}_1} \\
+        &= -c_1^2 \t{\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)})}\mat{K}_{p,(j)}\mat{H}_{1,1}\vec{\d\overline{\ten{\eta}}_1} \\
-        &= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})}((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})(\ten{H}_{1,1})_{((j, [r]\backslash j))}\vec{\d\overline{\ten{\eta}}_1} \\
+        &= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})}((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})(\ten{H}_{1,1})_{((j, [r]\backslash j))}\vec{\d\overline{\ten{\eta}}_1} \\
-        &= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} ( (\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k) \ttt_{[r]\backslash j} \ten{H}_{1,1})_{((2, 1))} \vec{\d\overline{\ten{\eta}}_1} \\
+        &= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} ( (\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k) \ttt_{[r]\backslash j} \ten{H}_{1,1})_{((2, 1))} \vec{\d\overline{\ten{\eta}}_1} \\
-            &\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\alpha}_j})\t{\partial(\vec{\overline{\ten{\eta}}_1)}}} = -c_1^2 ( (\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k) \ttt_{[r]\backslash j} \ten{H}_{1,1})_{((2, 1))}
+            &\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\alpha}_j})\t{\partial(\vec{\overline{\ten{\eta}}_1)}}} = -c_1^2 ( (\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k) \ttt_{[r]\backslash j} \ten{H}_{1,1})_{((2, 1))}
            \qquad{\color{gray} (p_j q_j \times p)}
        \\
    &\d^2 l(\overline{\ten{\eta}}_1, \mat{\Omega}_j) \\
        &= -c_1 c_2 \t{\vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big)}\mat{H}_{2,1}\vec{\d\overline{\ten{\eta}}_1} \\
        &= -c_1 c_2 \t{\Big[ \t{(\ten{H}_{2,1})_{([2r])}} \vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big) \Big]} \vec{\d\overline{\ten{\eta}}_1} \\
-        &= -c_1 c_2 \t{\vec( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})} )} \vec{\d\overline{\ten{\eta}}_1} \\
+        &= -c_1 c_2 \t{\vec( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})} )} \vec{\d\overline{\ten{\eta}}_1} \\
-        &= -c_1 c_2 \t{(\vec{\d\mat{\Omega}_j})} ( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} )_{(j)} \vec{\d\overline{\ten{\eta}}_1} \\
+        &= -c_1 c_2 \t{(\vec{\d\mat{\Omega}_j})} ( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} )_{(j)} \vec{\d\overline{\ten{\eta}}_1} \\
-            &\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\Omega}_j})\t{\partial(\vec{\overline{\ten{\eta}}_1)}}} = -c_1 c_2 \mat{D}_{p_j}\t{\mat{D}_{p_j}}( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} )_{(j)}
+            &\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\Omega}_j})\t{\partial(\vec{\overline{\ten{\eta}}_1)}}} = -c_1 c_2 \mat{D}_{p_j}\t{\mat{D}_{p_j}}( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} )_{(j)}
            \qquad{\color{gray} (p_j^2 \times p)}
        \\
    &\d^2 l(\mat{\alpha}_j) \\
-        &= -c_1^2 \t{\vec(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j)}\mat{H}_{1,1}\vec(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j) \\
+        &= -c_1^2 \t{\vec(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j)}\mat{H}_{1,1}\vec(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j) \\
-        &= -c_1^2 \t{\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)})}\mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(j)}}\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}) \\
+        &= -c_1^2 \t{\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)})}\mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(j)}}\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}) \\
-        &= -c_1^2 \t{[((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j}]}\mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(j)}}((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j} \\
+        &= -c_1^2 \t{[((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j}]}\mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(j)}}((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j} \\
-        &= -c_1^2 \t{[((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j}]}(\ten{H}_{1,1})_{((j,[r]\backslash j),(j,[r]\backslash j))}((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j} \\
+        &= -c_1^2 \t{[((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j}]}(\ten{H}_{1,1})_{((j,[r]\backslash j),(j,[r]\backslash j))}((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j} \\
-        &= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})}[ ((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)\ttt_{[r]\backslash j}\ten{H}_{1,1})\ttt_{[r]\backslash j + 2,[r]\backslash j}(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k) ]_{((2,1))}\vec{\d\mat{\alpha}_j} \\
+        &= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})}[ ((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)\ttt_{[r]\backslash j}\ten{H}_{1,1})\ttt_{[r]\backslash j + 2,[r]\backslash j}(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k) ]_{((2,1))}\vec{\d\mat{\alpha}_j} \\
-            &\qquad\Rightarrow \H l(\mat{\alpha}_j) = -c_1^2 \Big[ \left(\Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big)\ttt_{[r]\backslash j}\ten{H}_{1,1}\right)\ttt_{[r]\backslash j + 2}^{[r]\backslash j}\Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \Big]_{((2,1))}
+            &\qquad\Rightarrow \H l(\mat{\alpha}_j) = -c_1^2 \Big[ \left(\Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big)\ttt_{[r]\backslash j}\ten{H}_{1,1}\right)\ttt_{[r]\backslash j + 2}^{[r]\backslash j}\Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \Big]_{((2,1))}
            \qquad{\color{gray} (p_j q_j \times p_j q_j)}
        \\
    &\d^2 l(\mat{\alpha}_j, \mat{\alpha}_l) \\
-        &\overset{\makebox[0pt]{\scriptsize $j < l$}}{=} -c_1^2 \t{\vec\Bigl(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\Bigr)}\mat{H}_{1,1}\vec\Bigl(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\times_l\d\mat{\alpha}_l\Bigr) \\
+        &\overset{\makebox[0pt]{\scriptsize $j < l$}}{=} -c_1^2 \t{\vec\Bigl(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\Bigr)}\mat{H}_{1,1}\vec\Bigl(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\times_l\d\mat{\alpha}_l\Bigr) \\
-            &\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \vec\Bigl(\ten{F}_y\mlm{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\times_l\d\mat{\alpha}_l\Bigr) \\
+            &\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \vec\Bigl(\ten{F}_y\mlm_{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\times_l\d\mat{\alpha}_l\Bigr) \\
-        &= -c_1^2 \t{\vec\biggl( \d\mat{\alpha}_j \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big)_{(j)} \biggr)} \mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(l)}} \vec\biggl( \d\mat{\alpha}_l \Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big)_{(l)} \biggr) \\
+        &= -c_1^2 \t{\vec\biggl( \d\mat{\alpha}_j \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big)_{(j)} \biggr)} \mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(l)}} \vec\biggl( \d\mat{\alpha}_l \Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big)_{(l)} \biggr) \\
-            &\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \t{\mat{K}_{\mat{p},((j,l))}} \vec\biggl( (\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j) \Big( \ten{F}_y\mlm{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k \Big)_{((j,l))} \biggr) \\
+            &\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \t{\mat{K}_{\mat{p},((j,l))}} \vec\biggl( (\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j) \Big( \ten{F}_y\mlm_{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k \Big)_{((j,l))} \biggr) \\
-        &= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big)_{(j)}\otimes\mat{I}_{p_j} \biggr) \mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(l)}} \biggl( \t{\Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big)_{(l)}}\otimes\mat{I}_{p_l} \biggr)\vec{\d\mat{\alpha}_l} \\
+        &= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big)_{(j)}\otimes\mat{I}_{p_j} \biggr) \mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(l)}} \biggl( \t{\Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big)_{(l)}}\otimes\mat{I}_{p_l} \biggr)\vec{\d\mat{\alpha}_l} \\
-            &\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \t{\mat{K}_{\mat{p},((j,l))}} \biggl( \t{\Big( \ten{F}_y\mlm{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k \Big)_{((j,l))}}\otimes\mat{I}_{p_j p_l} \biggr) \vec{(\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j)} \\
+            &\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \t{\mat{K}_{\mat{p},((j,l))}} \biggl( \t{\Big( \ten{F}_y\mlm_{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k \Big)_{((j,l))}}\otimes\mat{I}_{p_j p_l} \biggr) \vec{(\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j)} \\
-        &= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big[ \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \vec{\d\mat{\alpha}_l} \\
+        &= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big[ \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \vec{\d\mat{\alpha}_l} \\
-            &\qquad + c_1 \vec\biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm{k\neq j,l}\mat{\alpha}_k \Big) \biggr) \vec{(\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j)} \\
+            &\qquad + c_1 \vec\biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm_{k\neq j,l}\mat{\alpha}_k \Big) \biggr) \vec{(\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j)} \\
-        &= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big[ \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \vec{\d\mat{\alpha}_l} \\
+        &= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big[ \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \vec{\d\mat{\alpha}_l} \\
-            &\qquad + c_1 \t{(\vec{\d\mat{\alpha}_j})} \biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm{k\neq j,l}\mat{\alpha}_k \Big) \biggr)_{((1,3))} \vec{\d\mat{\alpha}_l} \\
+            &\qquad + c_1 \t{(\vec{\d\mat{\alpha}_j})} \biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm_{k\neq j,l}\mat{\alpha}_k \Big) \biggr)_{((1,3))} \vec{\d\mat{\alpha}_l} \\
                &\qquad \begin{aligned}
                    \Rightarrow \frac{\partial l}{\partial(\vec{\mat{\alpha}_j})\t{\partial(\vec{\mat{\alpha}_l})}} &=
-                            -c_1^2 \biggl( \Big[ \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \\
+                            -c_1^2 \biggl( \Big[ \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \\
-                        &\qquad + c_1 \biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm{k\neq j,l}\mat{\alpha}_k \Big) \biggr)_{((1,3) + [[j > l]])}
+                        &\qquad + c_1 \biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm_{k\neq j,l}\mat{\alpha}_k \Big) \biggr)_{((1,3) + [[j > l]])}
                        \qquad{\color{gray} (p_j q_j \times p_l q_l)}
                \end{aligned}
        \\
    &\d^2 l(\mat{\alpha}_j, \mat{\Omega}_l) \\
-        &= -c_1 c_2 \t{\vec\Bigl(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\Bigr)} \mat{H}_{1,2} \vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr) \\
+        &= -c_1 c_2 \t{\vec\Bigl(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\Bigr)} \mat{H}_{1,2} \vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr) \\
-        &= -c_1 c_2 \t{\vec\biggl(\d\mat{\alpha}_j\Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big)_{(j)}\biggr)}\mat{K}_{\mat{p},(j)} \t{(\ten{H}_{2,1})_{([2r])}} \vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr) \\
+        &= -c_1 c_2 \t{\vec\biggl(\d\mat{\alpha}_j\Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big)_{(j)}\biggr)}\mat{K}_{\mat{p},(j)} \t{(\ten{H}_{2,1})_{([2r])}} \vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr) \\
-        &= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl(\t{\Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big)_{(j)}}\otimes\mat{I}_{p_j}\biggr) \mat{K}_{\mat{p},(j)} \vec\Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm{k\neq l}\t{(\vec{\mat{\Omega}_k})}\times_l\t{(\vec{\d\mat{\Omega}_l})}\Bigr) \\
+        &= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl(\t{\Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big)_{(j)}}\otimes\mat{I}_{p_j}\biggr) \mat{K}_{\mat{p},(j)} \vec\Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm_{k\neq l}\t{(\vec{\mat{\Omega}_k})}\times_l\t{(\vec{\d\mat{\Omega}_l})}\Bigr) \\
-        &= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl(\t{\Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big)_{(j)}}\otimes\mat{I}_{p_j}\biggr) \mat{K}_{\mat{p},(j)} \t{\Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr)_{([r])}}\vec{\d\mat{\Omega}_l} \\
+        &= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl(\t{\Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big)_{(j)}}\otimes\mat{I}_{p_j}\biggr) \mat{K}_{\mat{p},(j)} \t{\Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm_{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr)_{([r])}}\vec{\d\mat{\Omega}_l} \\
-        &= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl( \Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{(r + 2, 1)} \vec{\d\mat{\Omega}_l} \\
+        &= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl( \Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm_{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{(r + 2, 1)} \vec{\d\mat{\Omega}_l} \\
-            &\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\alpha}_j})\t{\partial(\vec{\mat{\Omega}_l})}} = -c_1 c_2 \biggl( \Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{(r + 2, 1)}\mat{D}_{p_l}\t{\mat{D}_{p_l}}
+            &\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\alpha}_j})\t{\partial(\vec{\mat{\Omega}_l})}} = -c_1 c_2 \biggl( \Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm_{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{(r + 2, 1)}\mat{D}_{p_l}\t{\mat{D}_{p_l}}
            % \qquad {\color{gray} (p_j q_j \times p_l^2)}
        \\
    &\d^2 l(\mat{\Omega}_j) \\
        &= -c_2^2 \t{\vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr)} \t{(\ten{H}_{2,2})_{([2r],[2r]+2r)}} \vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr) \\
-        &= -c_2^2 \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})}\times_{j + r}\t{(\vec{\d\mat{\Omega}_j})} \\
+        &= -c_2^2 \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm_{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})}\times_{j + r}\t{(\vec{\d\mat{\Omega}_j})} \\
-        &= -c_2^2 \t{(\vec{\d\mat{\Omega}_j})} \biggl( \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})} \biggr)_{([r])} \vec{\d\mat{\Omega}_j} \\
+        &= -c_2^2 \t{(\vec{\d\mat{\Omega}_j})} \biggl( \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm_{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})} \biggr)_{([r])} \vec{\d\mat{\Omega}_j} \\
-            &\qquad\Rightarrow \H l(\mat{\Omega}_j) = -c_2^2 \mat{D}_{p_j}\t{\mat{D}_{p_j}}\biggl( \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})} \biggr)_{([r])}\mat{D}_{p_j}\t{\mat{D}_{p_j}}
+            &\qquad\Rightarrow \H l(\mat{\Omega}_j) = -c_2^2 \mat{D}_{p_j}\t{\mat{D}_{p_j}}\biggl( \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm_{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})} \biggr)_{([r])}\mat{D}_{p_j}\t{\mat{D}_{p_j}}
            %\qquad {\color{gray} (p_j^2 \times p_j^2)}
        \\
    &\d^2 l(\mat{\Omega}_j, \mat{\Omega}_l) \\
@ -573,13 +589,13 @@ The next step is to identify the Hessians from the second differentials in a sim
        &\qquad\qquad - c_2 \D b(\mat{\eta}_{y,2})\vec\!\Big(\bigotimes_{k = r}^{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_{l}\otimes\bigotimes_{k = l - 1}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_{j}\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big) \\
        &= c_2 \t{(\vec{\ten{X}}\otimes\vec{\ten{X}} - (\ten{D}_2)_{([2r])})} \vec\Bigl(\bigotimes_{k = r}^{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_{l}\otimes\bigotimes_{k = l - 1}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_{j}\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Bigr) \\
        &\qquad - c_2^2 \t{\vec\!\Big(\bigotimes_{k = r}^{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigotimes_{k=l-1}^{1}\mat{\Omega}_k\Big)}\t{(\ten{H}_{2,2})_{([2r],[2r]+2r)}}\vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big) \\
-        &= c_2 (\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \times_j \t{(\vec{\d\mat{\Omega}_j})} \times_l \t{(\vec{\d\mat{\Omega}_l})} \\
+        &= c_2 (\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm_{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \times_j \t{(\vec{\d\mat{\Omega}_j})} \times_l \t{(\vec{\d\mat{\Omega}_l})} \\
-        &\qquad - c_2^2 \ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})} \times_j \t{(\vec{\d\mat{\Omega}_j})} \times_l \t{(\vec{\d\mat{\Omega}_l})} \\
+        &\qquad - c_2^2 \ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm_{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm_{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})} \times_j \t{(\vec{\d\mat{\Omega}_j})} \times_l \t{(\vec{\d\mat{\Omega}_l})} \\
-        &= c_2 \t{(\vec{\d\mat{\Omega}_j})}\Big((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)}\vec{\d\mat{\Omega}_l} \\
+        &= c_2 \t{(\vec{\d\mat{\Omega}_j})}\Big((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm_{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)}\vec{\d\mat{\Omega}_l} \\
-        &\qquad - c_2^2 \t{(\vec{\d\mat{\Omega}_j})}\Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)}\vec{\d\mat{\Omega}_l} \\
+        &\qquad - c_2^2 \t{(\vec{\d\mat{\Omega}_j})}\Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm_{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm_{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)}\vec{\d\mat{\Omega}_l} \\
            &\qquad \begin{aligned}\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\Omega}_j})\t{\partial(\vec{\mat{\Omega}_l})}} &=
-                \mat{D}_{p_j}\t{\mat{D}_{p_j}}\Big[c_2\Big((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)} \\
+                \mat{D}_{p_j}\t{\mat{D}_{p_j}}\Big[c_2\Big((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm_{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)} \\
-                    &\qquad -c_2^2 \Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)}\Big]\mat{D}_{p_l}\t{\mat{D}_{p_l}}
+                    &\qquad -c_2^2 \Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm_{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm_{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)}\Big]\mat{D}_{p_l}\t{\mat{D}_{p_l}}
        % \qquad {\color{gray} (p_j^2 \times p_l^2)}
        \end{aligned}
 \end{align*}}%
@ -612,15 +628,15 @@ and for every block holds $\mathcal{I}_{j, l} = \t{\mathcal{I}_{l, j}}$. The ind
 \begin{align*}
    \mathcal{I}_{1,1} &= c_1^2 (\ten{H}_{1,1})_{([r])} \\
    \mathcal{I}_{1,j+1} % = \E\partial_{\vec{\overline{\ten{\eta}}_1}}\partial_{\t{(\vec{\mat{\alpha}_j})}} l(\mat{\Theta})\mid \ten{Y} = y
-        &= c_1^2 \Big[\Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1}\Big]_{((2, 1))} \\
+        &= c_1^2 \Big[\Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1}\Big]_{((2, 1))} \\
    \mathcal{I}_{1,j+r+1}
-        &= c_1 c_2 \Big( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)} \\
+        &= c_1 c_2 \Big( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)} \\
    \mathcal{I}_{j+1,l+1}
-        &= c_1^2 \biggl( \Big[ \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \\
+        &= c_1^2 \biggl( \Big[ \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \\
    \mathcal{I}_{j+1,l+r+1}
-        &= c_1 c_2 \biggl( \Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{((r + 2, 1))} \\
+        &= c_1 c_2 \biggl( \Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm_{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{((r + 2, 1))} \\
    \mathcal{I}_{j+r+1,l+r+1}
-        &= c_2^2 \Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)}
+        &= c_2^2 \Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm_{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm_{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)}
 \end{align*}
@ -633,14 +649,14 @@ The \emph{matricization} is a generalization of the \emph{vectorization} operati
 \begin{theorem}\label{thm:mlm_mat}
    Let $\ten{A}$ be a tensor of order $r$ with the dimensions $q_1\times ... \times q_r$. Furthermore, let for $k = 1, ..., r$ be $\mat{B}_k$ matrices of dimensions $p_k\times q_k$. Then, for any $(\mat{i}, \mat{j})\in\perm{r}$ holds
    \begin{displaymath}
-        \Big(\ten{A}\mlm{k\in[r]}\mat{B}_k\Big)_{(\mat{i}, \mat{j})}
+        \Big(\ten{A}\mlm_{k\in[r]}\mat{B}_k\Big)_{(\mat{i}, \mat{j})}
            = \Big(\bigotimes_{k = \len{\mat{i}}}^{1}\mat{B}_{\mat{i}_k}\Big) \ten{A}_{(\mat{i}, \mat{j})} \Big(\bigotimes_{k = \len{\mat{j}}}^{1}\t{\mat{B}_{\mat{j}_k}}\Big).
    \end{displaymath}
 \end{theorem}
 A well known special case of Theorem~\ref{thm:mlm_mat} is the relation between vectorization and the Kronecker product
 \begin{displaymath}
-    \vec(\mat{B}_1\mat{A}\t{\mat{B}_2}) = (\mat{B}_2\otimes\mat{B}_1)\vec{A}.
+    \vec(\mat{B}_1\mat{A}\t{\mat{B}_2}) = (\mat{B}_2\otimes\mat{B}_1)\vec{\mat{A}}.
 \end{displaymath}
 Here we have a matrix, a.k.a. an order 2 tensor, and the vectorization as a special case of the matricization $\vec{\mat{A}} = \mat{A}_{((1, 2))}$ with $(\mat{i}, \mat{j}) = ((1, 2), ())\in\perm{2}$. Note that the empty Kronecker product is $1$ by convention.
@ -713,13 +729,37 @@ The operation $\ten{R}_{\mat{i}}(\ten{A})$ results in a tensor of order $r + s$
    Let $\ten{A}$ be a $2 r + s$ tensor where $r > 0$ and $s \geq 0$ of dimensions $q_1\times ... \times q_{2 r + s}$. Furthermore, let $(\mat{i}, \mat{j})\in\perm{2 r + s}$ such that $\len{\mat{i}} = 2 r$ and for $k = 1, ..., r$ denote with $\mat{B}_k$ matrices of dimensions $q_{\mat{i}_{k}}\times q_{\mat{i}_{r + k}}$, then
    \begin{displaymath}
        \t{\ten{A}_{(\mat{i})}}\vec{\bigotimes_{k = r}^{1}}\mat{B}_k
-            \equiv \ten{R}_{\mat{i}}(\ten{A})\times_{k\in[r]}\t{(\vec{\mat{B}_k})}.
+            \equiv \ten{R}_{\mat{i}}(\ten{A})\mlm_{k = 1}^r\t{(\vec{\mat{B}_k})}.
    \end{displaymath}
 \end{theorem}
 A special case of above Theorem is given for tensors represented as a Kronecker product. Therefore, let $\mat{A}_k, \mat{B}_k$ be arbitrary matrices of size $p_k\times q_k$ for $k = 1, ..., r$ and $\ten{A} = \reshape{(\mat{p}, \mat{q})}\bigotimes_{k = r}^{1}\mat{A}_k$. Then Theorem~\ref{thm:mtvk_rearrange} specializes to
 \begin{displaymath}
    \t{\Big( \vec\bigotimes_{k = r}^{1}\mat{A}_k \Big)}\Big( \vec\bigotimes_{k = r}^{1}\mat{B}_k \Big)
    =
    \prod_{k = 1}^{r}\tr(\t{\mat{A}_k}\mat{B}_k)
    =
    \Big( \outer{k = 1}{r}\vec\mat{A}_k \Big)\mlm_{k = 1}^r \t{(\vec\mat{B}_k)}.
 \end{displaymath}
 In case of $r = 2$ this means
 \begin{align*}
    \t{\vec(\mat{A}_1\otimes \mat{A}_2)}\vec(\mat{B}_1\otimes \mat{B}_2)
        &= \t{(\vec{\mat{B}_1})}(\vec{\mat{A}_1})\t{(\vec{\mat{A}_2})}(\vec{\mat{B}_2}) \\
        &= [(\vec{\mat{A}_1})\circ(\vec{\mat{A}_2})]\times_1\t{(\vec{\mat{B}_1})}\times_2\t{(\vec{\mat{B}_2})}.
 \end{align*}
 Another interesting special case is for two tensors $\ten{A}_1, \ten{A}_2$ of the same order
 \begin{displaymath}
    \t{(\vec{\ten{A}_1}\otimes\vec{\ten{A}_2})}\vec{\bigotimes_{k = r}^{1}\mat{B}_k}
        = (\ten{A}_1\otimes\ten{A}_2)\mlm_{k = 1}^r\t{(\vec{\mat{B}_k})}
 \end{displaymath}
 which uses the relation $\ten{R}_{[2r]}^{(\mat{p}, \mat{q})}(\vec{\ten{A}_1}\otimes\vec{\ten{A}_2}) = \ten{A}_1\otimes\ten{A}_2$ .
 \todo{continue}
 % Next we define a specific axis permutation and reshaping operation on tensors which will be helpful in expressing some derivatives. Let $\ten{A}$ be a $2 r + s$ tensor with $r > 0$ and $s\geq 0$ of dimensions $p_1\times ... \times p_{2 r + s}$. Furthermore, let $(\mat{i}, \mat{j})\in\perm{2 r + s}$ such that $\len{\mat{i}} = 2 r$. The operation $\ten{R}_{\mat{i}}$ is defined as
 % \begin{displaymath}
 %     \ten{R}_{\mat{i}} = \reshape{(p_1 p_{r + 1}, ..., p_r p_{2 r}, p_{2 r + 1}, ..., p_{r + s})}(\ten{A}_{(\pi(\mat{i}))})
@ -880,19 +920,19 @@ The operation $\ten{R}_{\mat{i}}(\ten{A})$ results in a tensor of order $r + s$
    \end{tikzpicture}
 \end{center}
-\newcommand{\somedrawing} {
+% \newcommand{\somedrawing}{
-    \coordinate (a) at (-2,-2,-2);
+%     \coordinate (a) at (-2,-2,-2);
-    \coordinate (b) at (-2,-2,2);
+%     \coordinate (b) at (-2,-2,2);
-    \coordinate (c) at (-2,2,-2);
+%     \coordinate (c) at (-2,2,-2);
-    \coordinate (d) at (-2,2,2);
+%     \coordinate (d) at (-2,2,2);
-    \coordinate (e) at (2,-2,-2);
+%     \coordinate (e) at (2,-2,-2);
-    \coordinate (f) at (2,-2,2);
+%     \coordinate (f) at (2,-2,2);
-    \coordinate (g) at (2,2,-2);
+%     \coordinate (g) at (2,2,-2);
-    \coordinate (h) at (2,2,2);
+%     \coordinate (h) at (2,2,2);
-    \draw (a)--(b) (a)--(c) (a)--(e) (b)--(d) (b)--(f) (c)--(d) (c)--(g) (d)--(h) (e)--(f) (e)--(g) (f)--(h) (g)--(h);
+%     \draw (a)--(b) (a)--(c) (a)--(e) (b)--(d) (b)--(f) (c)--(d) (c)--(g) (d)--(h) (e)--(f) (e)--(g) (f)--(h) (g)--(h);
-    \fill (a) circle (0.1cm);
+%     \fill (a) circle (0.1cm);
-    \fill (d) ++(0.1cm,0.1cm) rectangle ++(-0.2cm,-0.2cm);
+%     \fill (d) ++(0.1cm,0.1cm) rectangle ++(-0.2cm,-0.2cm);
-}
+% }
 % \begin{figure}[ht!]
 %     \centering
@ -1153,7 +1193,7 @@ where $\circ$ is the outer product. For example considure two matrices $\mat{A},
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 Let $f$ be an $r$ times differentiable function, then
 \begin{displaymath}
-    \d^r f(\mat{X}) = \ten{F}(\mat{X})\mlm{k = 1}{r} \vec{\d\mat{X}}
+    \d^r f(\mat{X}) = \ten{F}(\mat{X})\mlm_{k = 1}^{r} \vec{\d\mat{X}}
    \qquad\Leftrightarrow\qquad
    \D^r f(\mat{X}) \equiv \frac{1}{r!}\sum_{\sigma\in\perm{r}}\ten{F}(\mat{X})_{(\sigma)}
 \end{displaymath}
@ -1372,12 +1412,12 @@ The differentials up to the 4'th are
 \begin{align*}
    \d   M(t) &= M(t) \t{(\mu + \Sigma t)} \d{t} \\
    \d^2 M(t) &= \t{\d{t}} M(t) (\mu + \Sigma t)\t{(\mu + \Sigma t)} \d{t} \\
-    \d^3 M(t) &= M(t) (\mu + \Sigma t)\circ [(\mu + \Sigma t)\circ (\mu + \Sigma t) + 3\Sigma]\mlm{k = 1}{3} \d{t} \\
+    \d^3 M(t) &= M(t) (\mu + \Sigma t)\circ [(\mu + \Sigma t)\circ (\mu + \Sigma t) + 3\Sigma]\mlm_{k = 1}^{3} \d{t} \\
-    \d^4 M(t) &= M(t) (\mu + \Sigma t)\circ(\mu + \Sigma t)\circ[(\mu + \Sigma t)\circ(\mu + \Sigma t) + 6\Sigma)]\mlm{k = 1}{4} \d{t}
+    \d^4 M(t) &= M(t) (\mu + \Sigma t)\circ(\mu + \Sigma t)\circ[(\mu + \Sigma t)\circ(\mu + \Sigma t) + 6\Sigma)]\mlm_{k = 1}^{4} \d{t}
 \end{align*}
 Using the differentials to derivative identification identity
 \begin{displaymath}
-    \d^m f(t) = \ten{F}(t)\mlm{k = 1}{m}\d{t}
+    \d^m f(t) = \ten{F}(t)\mlm_{k = 1}^{m}\d{t}
    \qquad\Leftrightarrow\qquad
    \D^m f(t) \equiv \frac{1}{m!}\sum_{\sigma\in\mathfrak{S}_m}\ten{F}(t)_{(\sigma)}
 \end{displaymath}
@ -1388,7 +1428,7 @@ in conjunction with simplifications gives the first four raw moments by evaluati
    M_3 = \D^3 M(t)|_{t = 0} &= \mu\circ\mu\circ\mu + \mu\circ\Sigma + (\mu\circ\Sigma)_{((2), (1), (3))} + \Sigma\circ\mu \\
    M_4 = \D^4 M(t)|_{t = 0} &\equiv \frac{1}{4!}\sum_{\sigma\in\mathfrak{S}_4} (\mu\circ\mu\circ\Sigma + \Sigma\circ\Sigma + \Sigma\circ\mu\circ\mu)_{(\sigma)}
 \end{align*}
-which leads to the centered moments (which are also the covariances of the sufficient statistic $t(X)$)
+which leads to the centered moments (which are also the covariance of the sufficient statistic $t(X)$)
 \begin{align*}
    H_{1,1} &= \cov(t_1(X)\mid Y = y) \\
        &= \Sigma \\
--- a/LaTeX/main.bib
+++ b/LaTeX/main.bib
@ -9,6 +9,17 @@
    publisher   = {[Royal Statistical Society, Wiley]}
 }
@inproceedings{Nesterov1983,
    title       = {A method of solving a convex programming problem with convergence rate $O(1/k^2)$},
    author      = {Nesterov, Yurii Evgen'evich},
    booktitle   = {Doklady Akademii Nauk},
    volume      = {269},
    number      = {3},
    pages       = {543--547},
    year        = {1983},
    organization= {Russian Academy of Sciences}
 }
@book{StatInf-CasellaBerger2002,
    title       = {{Statistical Inference}},
    author      = {Casella, George and Berger, Roger L.},
@ -27,6 +38,20 @@
    isbn        = {0-471-98632-1}
 }
@article{SymMatandJacobians-MagnusNeudecker1986,
    title       = {Symmetry, 0-1 Matrices and Jacobians: A Review},
    author      = {Magnus, Jan R. and Neudecker, Heinz},
    ISSN        = {02664666, 14694360},
    URL         = {http://www.jstor.org/stable/3532421},
    journal     = {Econometric Theory},
    number      = {2},
    pages       = {157--190},
    publisher   = {Cambridge University Press},
    urldate     = {2023-10-03},
    volume      = {2},
    year        = {1986}
 }
@book{MatrixAlgebra-AbadirMagnus2005,
    title       = {Matrix Algebra},
    author      = {Abadir, Karim M. and Magnus, Jan R.},
@ -83,6 +108,31 @@
    doi         = {10.1080/01621459.2015.1093944}
 }
@article{FisherLectures-Cook2007,
    author    = {Cook, R. Dennis},
    journal   = {Statistical Science},
    month     = {02},
    number    = {1},
    pages     = {1--26},
    publisher = {The Institute of Mathematical Statistics},
    title     = {{Fisher Lecture: Dimension Reduction in Regression}},
    volume    = {22},
    year      = {2007},
    doi       = {10.1214/088342306000000682}
 }
@article{asymptoticMLE-BuraEtAl2018,
    author      = {Bura, Efstathia and Duarte, Sabrina and Forzani, Liliana and E. Smucler and M. Sued},
    title       = {Asymptotic theory for maximum likelihood estimates in reduced-rank multivariate generalized linear models},
    journal     = {Statistics},
    volume      = {52},
    number      = {5},
    pages       = {1005-1024},
    year        = {2018},
    publisher   = {Taylor \& Francis},
    doi         = {10.1080/02331888.2018.1467420},
 }
@article{tsir-DingCook2015,
    author      = {Shanshan Ding and R. Dennis Cook},
    title       = {Tensor sliced inverse regression},
@ -117,3 +167,106 @@
    isbn        = {978-94-015-8196-7},
    doi         = {10.1007/978-94-015-8196-7_17}
 }
@book{asymStats-van_der_Vaart1998,
    title       = {Asymptotic Statistics},
    author      = {{van der Vaart}, A.W.},
    series      = {Asymptotic Statistics},
    year        = {1998},
    publisher   = {Cambridge University Press},
    series      = {Cambridge Series in Statistical and Probabilistic Mathematics},
    isbn        = {0-521-49603-9}
 }
@book{measureTheory-Kusolitsch2011,
    title       = {{M}a\ss{}- und {W}ahrscheinlichkeitstheorie},
    subtitle    = {{E}ine {E}inf{\"u}hrung},
    author      = {Kusolitsch, Norbert},
    series      = {Springer-Lehrbuch},
    year        = {2011},
    publisher   = {Springer Vienna},
    isbn        = {978-3-7091-0684-6},
    doi         = {10.1007/978-3-7091-0685-3}
 }
@book{optimMatrixMani-AbsilEtAl2007,
    title       = {{Optimization Algorithms on Matrix Manifolds}},
    author      = {Absil, P.-A. and Mahony, R. and Sepulchre, R.},
    year        = {2007},
    publisher   = {Princeton University Press},
    isbn        = {9780691132983},
    note        = {Full Online Text \url{https://press.princeton.edu/absil}}
 }
@Inbook{geomMethodsOnLowRankMat-Uschmajew2020,
    author      = {Uschmajew, Andr{\'e} and Vandereycken, Bart},
    editor      = {Grohs, Philipp and Holler, Martin and Weinmann, Andreas},
    title       = {Geometric Methods on Low-Rank Matrix and Tensor Manifolds},
    bookTitle   = {Handbook of Variational Methods for Nonlinear Geometric Data},
    year        = {2020},
    publisher   = {Springer International Publishing},
    address     = {Cham},
    pages       = {261--313},
    isbn        = {978-3-030-31351-7},
    doi         = {10.1007/978-3-030-31351-7_9}
 }
@book{introToSmoothMani-Lee2012,
    title       = {Introduction to Smooth Manifolds},
    author      = {Lee, John M.},
    year        = {2012},
    journal     = {Graduate Texts in Mathematics},
    publisher   = {Springer New York},
    doi         = {10.1007/978-1-4419-9982-5}
 }
@book{introToRiemannianMani-Lee2018,
    title       = {Introduction to Riemannian Manifolds},
    author      = {Lee, John M.},
    year        = {2018},
    journal     = {Graduate Texts in Mathematics},
    publisher   = {Springer International Publishing},
    doi         = {10.1007/978-3-319-91755-9}
 }
@misc{MLEonManifolds-HajriEtAl2017,
    title       = {Maximum Likelihood Estimators on Manifolds},
    author      = {Hajri, Hatem and Said, Salem and Berthoumieu, Yannick},
    year        = {2017},
    journal     = {Lecture Notes in Computer Science},
    publisher   = {Springer International Publishing},
    pages       = {692-700},
    doi         = {10.1007/978-3-319-68445-1_80}
 }
@article{relativity-Einstain1916,
    author      = {Einstein, Albert},
    title       = {Die Grundlage der allgemeinen Relativitätstheorie},
    year        = {1916},
    journal     = {Annalen der Physik},
    volume      = {354},
    number      = {7},
    pages       = {769-822},
    doi         = {10.1002/andp.19163540702}
 }
@article{MultilinearOperators-Kolda2006,
    title       = {Multilinear operators for higher-order decompositions.},
    author      = {Kolda, Tamara Gibson},
    doi         = {10.2172/923081},
    url         = {https://www.osti.gov/biblio/923081},
    place       = {United States},
    year        = {2006},
    month       = {4},
    type        = {Technical Report}
 }
@book{aufbauAnalysis-kaltenbaeck2021,
    title       = {Aufbau Analysis},
    author      = {Kaltenb\"ack, Michael},
    isbn        = {978-3-88538-127-3},
    series      = {Berliner Studienreihe zur Mathematik},
    edition     = {27},
    year        = {2021},
    publisher   = {Heldermann Verlag}
 }
--- a/mvbernoulli/src/ising_model.cpp
+++ b/mvbernoulli/src/ising_model.cpp
@ -23,7 +23,7 @@
 *
 * with the parameter vector `theta` and a statistic `T` of `y`. The real valued
 * parameter vector `theta` is of dimension `p (p + 1) / 2` and the statistic
- * `T` has the same dimensions as a binary vector given by
+ * `T` has the same dimensions as the parameter vector given by
 *
 *      T(y) = vech(y y').
 *
--- a/sim/ising.R
+++ b/sim/ising.R
@ -1,204 +0,0 @@
 library(tensorPredictors)
 library(mvbernoulli)
 set.seed(161803399, "Mersenne-Twister", "Inversion", "Rejection")
 ### simulation configuration
 file.prefix <- "sim-ising"
 reps <- 100                     # number of simulation replications
 max.iter <- 100                 # maximum number of iterations for GMLM
 sample.sizes <- c(100, 200, 300, 500, 750)  # sample sizes `n`
 N <- 2000                       # validation set size
 p <- c(4, 4)                    # preditor dimensions (ONLY 4 by 4 allowed!)
 q <- c(2, 2)                    # response dimensions (ONLY 2 by 2 allowed!)
 r <- length(p)
 # parameter configuration
 rho <- -0.55
 c1 <- 1
 c2 <- 1
 # initial consistency checks
 stopifnot(exprs = {
    r == 2
    all.equal(p, c(4, 4))
    all.equal(q, c(2, 2))
 })
 ### small helpers
 # 270 deg matrix layout rotation (90 deg clockwise)
 rot270 <- function(A) t(A)[, rev(seq_len(nrow(A))), drop = FALSE]
 # Auto-Regression Covariance Matrix
 AR <- function(rho, dim) rho^abs(outer(seq_len(dim), seq_len(dim), `-`))
 # Inverse of the AR matrix
 AR.inv <- function(rho, dim) {
    A <- diag(c(1, rep(rho^2 + 1, dim - 2), 1))
    A[abs(.row(dim(A)) - .col(dim(A))) == 1] <- -rho
    A / (1 - rho^2)
 }
 # projection matrix `P_A` as a projection onto the span of `A`
 proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A)))
 ### setup Ising parameters (to get reasonable parameters)
 eta1 <- 0
 alphas <- Map(function(pj, qj) {    # qj ignored, its 2
    linspace <- seq(-1, 1, length.out = pj)
    matrix(c(linspace, linspace^2), pj, 2)
 }, p, q)
 Omegas <- Map(AR, dim = p, MoreArgs = list(rho))
 # data sampling routine
 sample.data <- function(n, eta1, alphas, Omegas, sample.axis = r + 1L) {
    # generate response (sample axis is last axis)
    y <- runif(n, -1, 1)                    # Y ~ U[-1, 1]
    Fy <- rbind(cos(pi * y), sin(pi * y), -sin(pi * y), cos(pi * y))
    dim(Fy) <- c(2, 2, n)
    # natural exponential family parameters
    eta_y1 <- c1 * (mlm(Fy, alphas) + c(eta1))
    eta_y2 <- c2 * Reduce(`%x%`, rev(Omegas))
    # conditional Ising model parameters
    theta_y <- matrix(rep(vech(eta_y2), n), ncol = n)
    ltri <- which(lower.tri(eta_y2, diag = TRUE))
    diagonal <- which(diag(TRUE, nrow(eta_y2))[ltri])
    theta_y[diagonal, ] <- eta_y1
    # Sample X from conditional distribution
    X <- apply(theta_y, 2, ising_sample, n = 1)
    # convert (from compressed integer vector) to array data
    attr(X, "p") <- prod(p)
    X <- t(as.mvbmatrix(X))
    dim(X) <- c(p, n)
    storage.mode(X) <- "double"
    # permute axis to requested get the sample axis
    if (sample.axis != r + 1L) {
        perm <- integer(r + 1L)
        perm[sample.axis] <- r + 1L
        perm[-sample.axis] <- seq_len(r)
        X <- aperm(X, perm)
        Fy <- aperm(Fy, perm)
    }
    list(X = X, Fy = Fy, y = y, sample.axis = sample.axis)
 }
 ### Logging Errors and Warnings
 # Register a global warning and error handler for logging warnings/errors with
 # current simulation repetition session informatin allowing to reproduce problems
 exceptionLogger <- function(ex) {
    # retrieve current simulation repetition information
    rep.info <- get("rep.info", envir = .GlobalEnv)
    # setup an error log file with the same name as `file`
    log <- paste0(rep.info$file, ".log")
    # Write (append) condition message with reproduction info to the log
    cat("\n\n------------------------------------------------------------\n",
        sprintf("file <- \"%s\"\nn <- %d\nrep <- %d\n.Random.seed <- c(%s)\n%s\nTraceback:\n",
            rep.info$file, rep.info$n, rep.info$rep,
            paste(rep.info$.Random.seed, collapse = ","),
            as.character.error(ex)
    ), sep = "", file = log, append = TRUE)
    # add Traceback (see: `traceback()` which the following is addapted from)
    n <- length(x <- .traceback(NULL, max.lines = -1L))
    if (n == 0L) {
        cat("No traceback available", "\n", file = log, append = TRUE)
    } else {
        for (i in 1L:n) {
            xi <- x[[i]]
            label <- paste0(n - i + 1L, ": ")
            m <- length(xi)
            srcloc <- if (!is.null(srcref <- attr(xi, "srcref"))) {
                srcfile <- attr(srcref, "srcfile")
                paste0(" at ", basename(srcfile$filename), "#", srcref[1L])
            }
            if (isTRUE(attr(xi, "truncated"))) {
                xi <- c(xi, " ...")
                m <- length(xi)
            }
            if (!is.null(srcloc)) {
                xi[m] <- paste0(xi[m], srcloc)
            }
            if (m > 1) {
                label <- c(label, rep(substr("          ", 1L,
                  nchar(label, type = "w")), m - 1L))
            }
            cat(paste0(label, xi), sep = "\n", file = log, append = TRUE)
        }
    }
 }
 globalCallingHandlers(list(
    message = exceptionLogger, warning = exceptionLogger, error = exceptionLogger
 ))
 ### for every sample size
 start <- format(Sys.time(), "%Y%m%dT%H%M")
 for (n in sample.sizes) {
    ### write new simulation result file
    file <- paste0(paste(file.prefix, start, n, sep = "-"), ".csv")
    # CSV header, used to ensure correct value/column mapping when writing to file
    header <- outer(
        c("dist.subspace", "dist.projection", "error.pred"),  # measures
        c("gmlm", "pca", "hopca", "tsir"),                    # methods
        paste, sep = ".")
    cat(paste0(header, collapse = ","), "\n", sep = "", file = file)
    ### repeated simulation
    for (rep in seq_len(reps)) {
        ### Repetition session state info
        # Stores specific session variables before starting the current
        # simulation replication. This allows to log state information which
        # can be used to replicate a specific simulation repetition in case of
        # errors/warnings from the logs
        rep.info <- list(n = n, rep = rep, file = file, .Random.seed = .Random.seed)
        ### sample (training) data
        c(X, Fy, y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas)
        ### Fit data using different methods
        fit.gmlm <- GMLM.default(X, Fy, sample.axis = sample.axis,
            max.iter = max.iter, family = "ising")
        fit.hopca <- HOPCA(X, npc = q, sample.axis = sample.axis)
        fit.pca <- prcomp(mat(X, sample.axis), rank. = prod(q))
        fit.tsir <- NA # TSIR(X, y, q, sample.axis = sample.axis)
        ### Compute Reductions `B.*` where `B.*` spans the reduction subspaces
        B.true <- Reduce(`%x%`, rev(alphas))
        B.gmlm <- with(fit.gmlm, Reduce(`%x%`, rev(alphas)))
        B.hopca <- Reduce(`%x%`, rev(fit.hopca))
        B.pca <- fit.pca$rotation
        B.tsir <- NA # Reduce(`%x%`, rev(fit.tsir))
        # Subspace Distances: Normalized `|| P_A - P_B ||_F` where
        #   `P_A = A (A' A)^-1/2 A'` and the normalization means that with
        #   respect to the dimensions of `A, B` the subspace distance is in the
        #   range `[0, 1]`.
        dist.subspace.gmlm  <- dist.subspace(B.true, B.gmlm,  normalize = TRUE)
        dist.subspace.hopca <- dist.subspace(B.true, B.hopca, normalize = TRUE)
        dist.subspace.pca   <- dist.subspace(B.true, B.pca,   normalize = TRUE)
        dist.subspace.tsir  <- NA # dist.subspace(B.true, B.tsir,  normalize = TRUE)
        # Projection Distances: Spectral norm (2-norm) `|| P_A - P_B ||_2`.
        dist.projection.gmlm  <- dist.projection(B.true, B.gmlm)
        dist.projection.hopca <- dist.projection(B.true, B.hopca)
        dist.projection.pca   <- dist.projection(B.true, B.pca)
        dist.projection.tsir  <- NA # dist.projection(B.true, B.tsir)
        ### Prediction Errors: (using new independend sample of size `N`)
        c(X, Fy, y, sample.axis) %<-% sample.data(N, eta1, alphas, Omegas)
        # centered model matrix of vectorized `X`s
        vecX <- scale(mat(X, sample.axis), center = TRUE, scale = FALSE)
        P.true <- proj(B.true)
        error.pred.gmlm  <- norm(P.true - proj(B.gmlm),  "2")
        error.pred.hopca <- norm(P.true - proj(B.hopca), "2")
        error.pred.pca   <- norm(P.true - proj(B.pca),   "2")
        error.pred.tsir  <- NA # norm(P.true - proj(B.tsir),  "2")
        # format estimation/prediction errors and write to file and console
        line <- paste0(Map(get, header), collapse = ",")
        cat(line, "\n", sep = "", file = file, append = TRUE)
        # report progress
        cat(sprintf("sample size: %d/%d - rep: %d/%d\n",
            which(n == sample.sizes), length(sample.sizes), rep, reps))
    }
 }
--- a/sim/ising_2.R
+++ b/sim/ising_2.R
@ -1,134 +0,0 @@
 library(tensorPredictors)
 library(mvbernoulli)
 set.seed(141421356, "Mersenne-Twister", "Inversion", "Rejection")
 ### simulation configuration
 reps <- 100                     # number of simulation replications
 max.iter <- 1000                # maximum number of iterations for GMLM
 n <- 100                        # sample sizes `n`
 N <- 2000                       # validation set size
 p <- c(4, 4)                    # preditor dimensions (ONLY 4 by 4 allowed!)
 q <- c(2, 2)                    # response dimensions (ONLY 2 by 2 allowed!)
 r <- length(p)
 # parameter configuration
 rho <- -0.55
 c1 <- 1
 c2 <- 1
 # initial consistency checks
 stopifnot(exprs = {
    r == 2
    all.equal(p, c(4, 4))
    all.equal(q, c(2, 2))
 })
 ### small helpers
 # 270 deg matrix layout rotation (90 deg clockwise)
 rot270 <- function(A) t(A)[, rev(seq_len(nrow(A))), drop = FALSE]
 # Auto-Regression Covariance Matrix
 AR <- function(rho, dim) rho^abs(outer(seq_len(dim), seq_len(dim), `-`))
 # Inverse of the AR matrix
 AR.inv <- function(rho, dim) {
    A <- diag(c(1, rep(rho^2 + 1, dim - 2), 1))
    A[abs(.row(dim(A)) - .col(dim(A))) == 1] <- -rho
    A / (1 - rho^2)
 }
 # projection matrix `P_A` as a projection onto the span of `A`
 proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A)))
 ### setup Ising parameters (to get reasonable parameters)
 eta1 <- 0
 # alphas <- Map(function(pj, qj) {    # qj ignored, its 2
 #     linspace <- seq(-1, 1, length.out = pj)
 #     matrix(c(linspace, rev(linspace)), pj, 2)
 # }, p, q)
 alphas <- Map(function(pj, qj) {    # qj ignored, its 2
    linspace <- seq(-1, 1, length.out = pj)
    matrix(c(linspace, linspace^2), pj, 2)
 }, p, q)
 # alphas <- Map(function(pj, qj) {
 #     qr.Q(qr(matrix(rnorm(pj * qj), pj, qj)))
 # }, p, q)
 Omegas <- Map(AR, dim = p, MoreArgs = list(rho))
 # data sampling routine
 sample.data <- function(n, eta1, alphas, Omegas, sample.axis = r + 1L) {
    # generate response (sample axis is last axis)
    y <- runif(n, -1, 1)                    # Y ~ U[-1, 1]
    Fy <- rbind(cos(pi * y), sin(pi * y), -sin(pi * y), cos(pi * y))
    dim(Fy) <- c(2, 2, n)
    # natural exponential family parameters
    eta_y1 <- c1 * (mlm(Fy, alphas) + c(eta1))
    eta_y2 <- c2 * Reduce(`%x%`, rev(Omegas))
    # conditional Ising model parameters
    theta_y <- matrix(rep(vech(eta_y2), n), ncol = n)
    ltri <- which(lower.tri(eta_y2, diag = TRUE))
    diagonal <- which(diag(TRUE, nrow(eta_y2))[ltri])
    theta_y[diagonal, ] <- eta_y1
    # Sample X from conditional distribution
    X <- apply(theta_y, 2, ising_sample, n = 1)
    # convert (from compressed integer vector) to array data
    attr(X, "p") <- prod(p)
    X <- t(as.mvbmatrix(X))
    dim(X) <- c(p, n)
    storage.mode(X) <- "double"
    # permute axis to requested get the sample axis
    if (sample.axis != r + 1L) {
        perm <- integer(r + 1L)
        perm[sample.axis] <- r + 1L
        perm[-sample.axis] <- seq_len(r)
        X <- aperm(X, perm)
        Fy <- aperm(Fy, perm)
    }
    list(X = X, Fy = Fy, y = y, sample.axis = sample.axis)
 }
 ### sample (training) data
 c(X, Fy, y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas)
 ### Fit data using GMLM with logging
 # logger to log iterative change in the estimation process of GMLM
 # log <- data.frame()
 log.likelihood <- tensorPredictors:::make.gmlm.family("ising")$log.likelihood
 B.true <- Reduce(`%x%`, rev(alphas))
 logger <- function(iter, eta1.est, alphas.est, Omegas.est) {
    B.est <- Reduce(`%x%`, rev(alphas.est))
    err.alphas <- mapply(dist.subspace, alphas, alphas.est, MoreArgs = list(normalize = TRUE))
    err.Omegas <- mapply(norm, Map(`-`, Omegas, Omegas.est), MoreArgs = list(type = "F"))
    if (iter > 0) { cat("\033[9A") }
    cat(sprintf("\n\033[2mIter:     loss -     dist\n\033[0m%4d: %8.3f - %8.3f",
            iter,
            log.likelihood(X, Fy, eta1.est, alphas.est, Omegas.est),
            dist.subspace(B.true, B.est, normalize = TRUE)
        ),
        "\033[2mMSE eta1\033[0m",
        mean((eta1 - eta1.est)^2),
        "\033[2msubspace distances of alphas\033[0m",
        do.call(paste, Map(sprintf, err.alphas, MoreArgs = list(fmt = "%8.3f"))),
        "\033[2mFrob. norm of Omega differences\033[0m",
        do.call(paste, Map(sprintf, err.Omegas, MoreArgs = list(fmt = "%8.3f"))),
        sep = "\n      "
    )
 }
 # now call the GMLM fitting routine with performance profiling
 tryCatch({
    system.time(   # profvis::profvis(
        fit.gmlm <- GMLM.default(
            X, Fy, sample.axis = sample.axis, max.iter = max.iter,
            family = "ising", logger = logger
        )
    )
 }, error = function(ex) {
    print(ex)
    traceback()
 })
--- a/sim/ising_small.R
+++ b/sim/ising_small.R
@ -1,207 +0,0 @@
 library(tensorPredictors)
 library(mvbernoulli)
 # seed = first 8 digits Euler's constant gamma = 0.57721 56649 01532 86060
 set.seed(57721566, "Mersenne-Twister", "Inversion", "Rejection")
 ### simulation configuration
 file.prefix <- "sim-ising-small"
 reps <- 100                     # number of simulation replications
 max.iter <- 1000                # maximum number of iterations for GMLM
 sample.sizes <- c(100, 200, 300, 500, 750)  # sample sizes `n`
 N <- 2000                       # validation set size
 p <- c(2, 3)                    # preditor dimensions
 q <- c(1, 1)                    # response dimensions
 r <- length(p)
 # parameter configuration
 rho <- -0.55
 c1 <- 1
 c2 <- 1
 # initial consistency checks
 stopifnot(exprs = {
    r == 2
    length(p) == r
    all(q == 1)
 })
 ### small helpers
 # 270 deg matrix layout rotation (90 deg clockwise)
 rot270 <- function(A) t(A)[, rev(seq_len(nrow(A))), drop = FALSE]
 # Auto-Regression Covariance Matrix
 AR <- function(rho, dim) rho^abs(outer(seq_len(dim), seq_len(dim), `-`))
 # Inverse of the AR matrix
 AR.inv <- function(rho, dim) {
    A <- diag(c(1, rep(rho^2 + 1, dim - 2), 1))
    A[abs(.row(dim(A)) - .col(dim(A))) == 1] <- -rho
    A / (1 - rho^2)
 }
 # projection matrix `P_A` as a projection onto the span of `A`
 proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A)))
 ### setup Ising parameters (to get reasonable parameters)
 eta1 <- 0
 alphas <- Map(function(pj, qj) {
    data <- linspace <- seq(-1, 1, len = pj)
    for (k in (seq_len(qj - 1) + 1)) {
        data <- c(data, linspace^k)
    }
    matrix(data, nrow = pj)
 }, p, q)
 Omegas <- Map(AR, dim = p, MoreArgs = list(rho))
 # data sampling routine
 sample.data <- function(n, eta1, alphas, Omegas, sample.axis = r + 1L) {
    # generate response (sample axis is last axis)
    y <- runif(n, -1, 1)                    # Y ~ U[-1, 1]
    Fy <- array(sin(pi * y), dim = c(q, n))
    # natural exponential family parameters
    eta_y1 <- c1 * (mlm(Fy, alphas) + c(eta1))
    eta_y2 <- c2 * Reduce(`%x%`, rev(Omegas))
    # conditional Ising model parameters
    theta_y <- matrix(rep(vech(eta_y2), n), ncol = n)
    ltri <- which(lower.tri(eta_y2, diag = TRUE))
    diagonal <- which(diag(TRUE, nrow(eta_y2))[ltri])
    theta_y[diagonal, ] <- eta_y1
    # Sample X from conditional distribution
    X <- apply(theta_y, 2, ising_sample, n = 1)
    # convert (from compressed integer vector) to array data
    attr(X, "p") <- prod(p)
    X <- t(as.mvbmatrix(X))
    dim(X) <- c(p, n)
    storage.mode(X) <- "double"
    # permute axis to requested get the sample axis
    if (sample.axis != r + 1L) {
        perm <- integer(r + 1L)
        perm[sample.axis] <- r + 1L
        perm[-sample.axis] <- seq_len(r)
        X <- aperm(X, perm)
        Fy <- aperm(Fy, perm)
    }
    list(X = X, Fy = Fy, y = y, sample.axis = sample.axis)
 }
 ### Logging Errors and Warnings
 # Register a global warning and error handler for logging warnings/errors with
 # current simulation repetition session informatin allowing to reproduce problems
 exceptionLogger <- function(ex) {
    # retrieve current simulation repetition information
    rep.info <- get("rep.info", envir = .GlobalEnv)
    # setup an error log file with the same name as `file`
    log <- paste0(rep.info$file, ".log")
    # Write (append) condition message with reproduction info to the log
    cat("\n\n------------------------------------------------------------\n",
        sprintf("file <- \"%s\"\nn <- %d\nrep <- %d\n.Random.seed <- c(%s)\n%s\nTraceback:\n",
            rep.info$file, rep.info$n, rep.info$rep,
            paste(rep.info$.Random.seed, collapse = ","),
            as.character.error(ex)
    ), sep = "", file = log, append = TRUE)
    # add Traceback (see: `traceback()` which the following is addapted from)
    n <- length(x <- .traceback(NULL, max.lines = -1L))
    if (n == 0L) {
        cat("No traceback available", "\n", file = log, append = TRUE)
    } else {
        for (i in 1L:n) {
            xi <- x[[i]]
            label <- paste0(n - i + 1L, ": ")
            m <- length(xi)
            srcloc <- if (!is.null(srcref <- attr(xi, "srcref"))) {
                srcfile <- attr(srcref, "srcfile")
                paste0(" at ", basename(srcfile$filename), "#", srcref[1L])
            }
            if (isTRUE(attr(xi, "truncated"))) {
                xi <- c(xi, " ...")
                m <- length(xi)
            }
            if (!is.null(srcloc)) {
                xi[m] <- paste0(xi[m], srcloc)
            }
            if (m > 1) {
                label <- c(label, rep(substr("          ", 1L,
                  nchar(label, type = "w")), m - 1L))
            }
            cat(paste0(label, xi), sep = "\n", file = log, append = TRUE)
        }
    }
 }
 globalCallingHandlers(list(
    message = exceptionLogger, warning = exceptionLogger, error = exceptionLogger
 ))
 ### for every sample size
 start <- format(Sys.time(), "%Y%m%dT%H%M")
 for (n in sample.sizes) {
    ### write new simulation result file
    file <- paste0(paste(file.prefix, start, n, sep = "-"), ".csv")
    # CSV header, used to ensure correct value/column mapping when writing to file
    header <- outer(
        c("dist.subspace", "dist.projection", "error.pred"),  # measures
        c("gmlm", "pca", "hopca", "tsir"),                    # methods
        paste, sep = ".")
    cat(paste0(header, collapse = ","), "\n", sep = "", file = file)
    ### repeated simulation
    for (rep in seq_len(reps)) {
        ### Repetition session state info
        # Stores specific session variables before starting the current
        # simulation replication. This allows to log state information which
        # can be used to replicate a specific simulation repetition in case of
        # errors/warnings from the logs
        rep.info <- list(n = n, rep = rep, file = file, .Random.seed = .Random.seed)
        ### sample (training) data
        c(X, Fy, y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas)
        ### Fit data using different methods
        fit.gmlm <- GMLM.default(X, Fy, sample.axis = sample.axis,
            max.iter = max.iter, family = "ising")
        fit.hopca <- HOPCA(X, npc = q, sample.axis = sample.axis)
        fit.pca <- prcomp(mat(X, sample.axis), rank. = prod(q))
        fit.tsir <- TSIR(X, y, q, sample.axis = sample.axis)
        ### Compute Reductions `B.*` where `B.*` spans the reduction subspaces
        B.true <- Reduce(`%x%`, rev(alphas))
        B.gmlm <- with(fit.gmlm, Reduce(`%x%`, rev(alphas)))
        B.hopca <- Reduce(`%x%`, rev(fit.hopca))
        B.pca <- fit.pca$rotation
        B.tsir <- Reduce(`%x%`, rev(fit.tsir))
        # Subspace Distances: Normalized `|| P_A - P_B ||_F` where
        #   `P_A = A (A' A)^-1/2 A'` and the normalization means that with
        #   respect to the dimensions of `A, B` the subspace distance is in the
        #   range `[0, 1]`.
        dist.subspace.gmlm  <- dist.subspace(B.true, B.gmlm,  normalize = TRUE)
        dist.subspace.hopca <- dist.subspace(B.true, B.hopca, normalize = TRUE)
        dist.subspace.pca   <- dist.subspace(B.true, B.pca,   normalize = TRUE)
        dist.subspace.tsir  <- dist.subspace(B.true, B.tsir,  normalize = TRUE)
        # Projection Distances: Spectral norm (2-norm) `|| P_A - P_B ||_2`.
        dist.projection.gmlm  <- dist.projection(B.true, B.gmlm)
        dist.projection.hopca <- dist.projection(B.true, B.hopca)
        dist.projection.pca   <- dist.projection(B.true, B.pca)
        dist.projection.tsir  <- dist.projection(B.true, B.tsir)
        ### Prediction Errors: (using new independend sample of size `N`)
        c(X, Fy, y, sample.axis) %<-% sample.data(N, eta1, alphas, Omegas)
        # centered model matrix of vectorized `X`s
        vecX <- scale(mat(X, sample.axis), center = TRUE, scale = FALSE)
        P.true <- proj(B.true)
        error.pred.gmlm  <- norm(P.true - proj(B.gmlm),  "2")
        error.pred.hopca <- norm(P.true - proj(B.hopca), "2")
        error.pred.pca   <- norm(P.true - proj(B.pca),   "2")
        error.pred.tsir  <- norm(P.true - proj(B.tsir),  "2")
        # format estimation/prediction errors and write to file and console
        line <- paste0(Map(get, header), collapse = ",")
        cat(line, "\n", sep = "", file = file, append = TRUE)
        # report progress
        cat(sprintf("sample size: %d/%d - rep: %d/%d\n",
            which(n == sample.sizes), length(sample.sizes), rep, reps))
    }
 }
--- a/sim/ising_small_2.R
+++ b/sim/ising_small_2.R
@ -1,131 +0,0 @@
 library(tensorPredictors)
 library(mvbernoulli)
 # seed = leaf node count of a full chess search tree of depth 6 from the start pos
 # > position startpos
 # > go perft 6
 set.seed(119060324, "Mersenne-Twister", "Inversion", "Rejection")
 ### simulation configuration
 reps <- 100                     # number of simulation replications
 max.iter <- 1000                # maximum number of iterations for GMLM
 n <- 100                        # sample sizes `n`
 N <- 2000                       # validation set size
 p <- c(2, 3)                    # preditor dimensions
 q <- c(1, 1)                    # response dimensions
 r <- length(p)
 # parameter configuration
 rho <- -0.55
 c1 <- 1
 c2 <- 1
 # initial consistency checks
 stopifnot(exprs = {
    r == 2
    length(p) == r
    all(q == 1)
 })
 ### small helpers
 # 270 deg matrix layout rotation (90 deg clockwise)
 rot270 <- function(A) t(A)[, rev(seq_len(nrow(A))), drop = FALSE]
 # Auto-Regression Covariance Matrix
 AR <- function(rho, dim) rho^abs(outer(seq_len(dim), seq_len(dim), `-`))
 # Inverse of the AR matrix
 AR.inv <- function(rho, dim) {
    A <- diag(c(1, rep(rho^2 + 1, dim - 2), 1))
    A[abs(.row(dim(A)) - .col(dim(A))) == 1] <- -rho
    A / (1 - rho^2)
 }
 # projection matrix `P_A` as a projection onto the span of `A`
 proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A)))
 ### setup Ising parameters (to get reasonable parameters)
 eta1 <- 0
 alphas <- Map(function(pj, qj) {
    data <- linspace <- seq(-1, 1, len = pj)
    for (k in (seq_len(qj - 1) + 1)) {
        data <- c(data, linspace^k)
    }
    matrix(data, nrow = pj)
 }, p, q)
 Omegas <- Map(AR, dim = p, MoreArgs = list(rho))
 # data sampling routine
 sample.data <- function(n, eta1, alphas, Omegas, sample.axis = r + 1L) {
    # generate response (sample axis is last axis)
    y <- runif(n, -1, 1)                    # Y ~ U[-1, 1]
    Fy <- array(sin(pi * y), dim = c(q, n))
    # natural exponential family parameters
    eta_y1 <- c1 * (mlm(Fy, alphas) + c(eta1))
    eta_y2 <- c2 * Reduce(`%x%`, rev(Omegas))
    # conditional Ising model parameters
    theta_y <- matrix(rep(vech(eta_y2), n), ncol = n)
    ltri <- which(lower.tri(eta_y2, diag = TRUE))
    diagonal <- which(diag(TRUE, nrow(eta_y2))[ltri])
    theta_y[diagonal, ] <- eta_y1
    # Sample X from conditional distribution
    X <- apply(theta_y, 2, ising_sample, n = 1)
    # convert (from compressed integer vector) to array data
    attr(X, "p") <- prod(p)
    X <- t(as.mvbmatrix(X))
    dim(X) <- c(p, n)
    storage.mode(X) <- "double"
    # permute axis to requested get the sample axis
    if (sample.axis != r + 1L) {
        perm <- integer(r + 1L)
        perm[sample.axis] <- r + 1L
        perm[-sample.axis] <- seq_len(r)
        X <- aperm(X, perm)
        Fy <- aperm(Fy, perm)
    }
    list(X = X, Fy = Fy, y = y, sample.axis = sample.axis)
 }
 # logger to log iterative change in the estimation process of GMLM
 # log <- data.frame()
 log.likelihood <- tensorPredictors:::make.gmlm.family("ising")$log.likelihood
 B.true <- Reduce(`%x%`, rev(alphas))
 logger <- function(iter, eta1.est, alphas.est, Omegas.est) {
    B.est <- Reduce(`%x%`, rev(alphas.est))
    err.alphas <- mapply(dist.subspace, alphas, alphas.est, MoreArgs = list(normalize = TRUE))
    err.Omegas <- mapply(norm, Map(`-`, Omegas, Omegas.est), MoreArgs = list(type = "F"))
    if (iter > 0) { cat("\033[9A") }
    cat(sprintf("\n\033[2mIter:     loss -     dist\n\033[0m%4d: %8.3f - %8.3f",
            iter,
            log.likelihood(X, Fy, eta1.est, alphas.est, Omegas.est),
            dist.subspace(B.true, B.est, normalize = TRUE)
        ),
        "\033[2mMSE eta1\033[0m",
        mean((eta1 - eta1.est)^2),
        "\033[2msubspace distances of alphas\033[0m",
        do.call(paste, Map(sprintf, err.alphas, MoreArgs = list(fmt = "%8.3f"))),
        "\033[2mFrob. norm of Omega differences\033[0m",
        do.call(paste, Map(sprintf, err.Omegas, MoreArgs = list(fmt = "%8.3f"))),
        sep = "\n      "
    )
 }
 ### sample (training) data
 c(X, Fy, y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas)
 # now call the GMLM fitting routine with performance profiling
 tryCatch({
    system.time(   # profvis::profvis(
        fit.gmlm <- GMLM.default(
            X, Fy, sample.axis = sample.axis, max.iter = max.iter,
            family = "ising", logger = logger
        )
    )
 }, error = function(ex) {
    print(ex)
    traceback()
 })
--- a/sim/normal.R
+++ b/sim/normal.R
@ -1,171 +0,0 @@
 library(tensorPredictors)
 set.seed(314159265, "Mersenne-Twister", "Inversion", "Rejection")
 ### simulation configuration
 file.prefix <- "sim-normal"
 reps <- 100                     # number of simulation replications
 max.iter <- 10000               # maximum number of iterations for GMLM
 sample.sizes <- c(100, 200, 300, 500, 750)  # sample sizes `n`
 N <- 2000                       # validation set size
 p <- c(2, 3, 5)                 # preditor dimensions
 q <- c(1, 2, 3)                 # functions of y dimensions (response dimensions)
 r <- length(p)
 # initial consistency checks
 stopifnot(exprs = {
    r == length(p)
    r == length(q)
    all(outer(p, sample.sizes, `<`))
 })
 # projection matrix `P_A` as a projection onto the span of `A`
 proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A)))
 # setup model parameters
 alphas <- Map(matrix, Map(rnorm, p * q), p)                     # reduction matrices
 Omegas <- Map(function(pj) 0.5^abs(outer(1:pj, 1:pj, `-`)), p)  # mode scatter
 eta1 <- 0                                                       # intercept
 # data sampling routine
 sample.data <- function(n, eta1, alphas, Omegas, sample.axis = r + 1L) {
    # generate response (sample axis is last axis)
    y <- sample.int(prod(q), n, replace = TRUE)                 # uniform samples
    Fy <- array(outer(seq_len(prod(q)), y, `==`), dim = c(q, n))
    Fy <- Fy - c(rowMeans(Fy, dims = r))
    # sample predictors as X | Y = y (sample axis is last axis)
    Deltas <- Map(solve, Omegas)                                # normal covariances
    mu_y <- mlm(mlm(Fy, alphas) + c(eta1), Deltas)              # conditional mean
    X <- mu_y + rtensornorm(n, 0, Deltas, r + 1L)               # responses X
    # permute axis to requested get the sample axis
    if (sample.axis != r + 1L) {
        perm <- integer(r + 1L)
        perm[sample.axis] <- r + 1L
        perm[-sample.axis] <- seq_len(r)
        X <- aperm(X, perm)
        Fy <- aperm(Fy, perm)
    }
    list(X = X, Fy = Fy, y = y, sample.axis = sample.axis)
 }
 ### Logging Errors and Warnings
 # Register a global warning and error handler for logging warnings/errors with
 # current simulation repetition session informatin allowing to reproduce problems
 exceptionLogger <- function(ex) {
    # retrieve current simulation repetition information
    rep.info <- get("rep.info", envir = .GlobalEnv)
    # setup an error log file with the same name as `file`
    log <- paste0(rep.info$file, ".log")
    # Write (append) condition message with reproduction info to the log
    cat("\n\n------------------------------------------------------------\n",
        sprintf("file <- \"%s\"\nn <- %d\nrep <- %d\n.Random.seed <- c(%s)\n%s\nTraceback:\n",
            rep.info$file, rep.info$n, rep.info$rep,
            paste(rep.info$.Random.seed, collapse = ","),
            as.character.error(ex)
    ), sep = "", file = log, append = TRUE)
    # add Traceback (see: `traceback()` which the following is addapted from)
    n <- length(x <- .traceback(NULL, max.lines = -1L))
    if (n == 0L) {
        cat("No traceback available", "\n", file = log, append = TRUE)
    } else {
        for (i in 1L:n) {
            xi <- x[[i]]
            label <- paste0(n - i + 1L, ": ")
            m <- length(xi)
            srcloc <- if (!is.null(srcref <- attr(xi, "srcref"))) {
                srcfile <- attr(srcref, "srcfile")
                paste0(" at ", basename(srcfile$filename), "#", srcref[1L])
            }
            if (isTRUE(attr(xi, "truncated"))) {
                xi <- c(xi, " ...")
                m <- length(xi)
            }
            if (!is.null(srcloc)) {
                xi[m] <- paste0(xi[m], srcloc)
            }
            if (m > 1) {
                label <- c(label, rep(substr("          ", 1L,
                  nchar(label, type = "w")), m - 1L))
            }
            cat(paste0(label, xi), sep = "\n", file = log, append = TRUE)
        }
    }
 }
 globalCallingHandlers(list(
    message = exceptionLogger, warning = exceptionLogger, error = exceptionLogger
 ))
 ### for every sample size
 start <- format(Sys.time(), "%Y%m%dT%H%M")
 for (n in sample.sizes) {
    ### write new simulation result file
    file <- paste0(paste(file.prefix, start, n, sep = "-"), ".csv")
    # CSV header, used to ensure correct value/column mapping when writing to file
    header <- outer(
        c("dist.subspace", "dist.projection", "error.pred"),  # measures
        c("gmlm", "pca", "hopca", "tsir"),                    # methods
        paste, sep = ".")
    cat(paste0(header, collapse = ","), "\n", sep = "", file = file)
    ### repeated simulation
    for (rep in seq_len(reps)) {
        ### Repetition session state info
        # Stores specific session variables before starting the current
        # simulation replication. This allows to log state information which
        # can be used to replicate a specific simulation repetition in case of
        # errors/warnings from the logs
        rep.info <- list(n = n, rep = rep, file = file, .Random.seed = .Random.seed)
        ### sample (training) data
        c(X, Fy, y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas)
        ### Fit data using different methods
        fit.gmlm <- GMLM.default(X, Fy, sample.axis = sample.axis, max.iter = max.iter)
        fit.hopca <- HOPCA(X, npc = q, sample.axis = sample.axis)
        fit.pca <- prcomp(mat(X, sample.axis), rank. = prod(q))
        fit.tsir <- TSIR(X, y, q, sample.axis = sample.axis)
        ### Compute Reductions `B.*` where `B.*` spans the reduction subspaces
        B.true <- Reduce(`%x%`, rev(alphas))
        B.gmlm <- with(fit.gmlm, Reduce(`%x%`, rev(alphas)))
        B.hopca <- Reduce(`%x%`, rev(fit.hopca))
        B.pca <- fit.pca$rotation
        B.tsir <- Reduce(`%x%`, rev(fit.tsir))
        # Subspace Distances: Normalized `|| P_A - P_B ||_F` where
        #   `P_A = A (A' A)^-1/2 A'` and the normalization means that with
        #   respect to the dimensions of `A, B` the subspace distance is in the
        #   range `[0, 1]`.
        dist.subspace.gmlm  <- dist.subspace(B.true, B.gmlm,  normalize = TRUE)
        dist.subspace.hopca <- dist.subspace(B.true, B.hopca, normalize = TRUE)
        dist.subspace.pca   <- dist.subspace(B.true, B.pca,   normalize = TRUE)
        dist.subspace.tsir  <- dist.subspace(B.true, B.tsir,  normalize = TRUE)
        # Projection Distances: Spectral norm (2-norm) `|| P_A - P_B ||_2`.
        dist.projection.gmlm  <- dist.projection(B.true, B.gmlm)
        dist.projection.hopca <- dist.projection(B.true, B.hopca)
        dist.projection.pca   <- dist.projection(B.true, B.pca)
        dist.projection.tsir  <- dist.projection(B.true, B.tsir)
        ### Prediction Errors: (using new independend sample of size `N`)
        c(X, Fy, y, sample.axis) %<-% sample.data(N, eta1, alphas, Omegas)
        # centered model matrix of vectorized `X`s
        vecX <- scale(mat(X, sample.axis), center = TRUE, scale = FALSE)
        P.true <- proj(B.true)
        error.pred.gmlm  <- norm(P.true - proj(B.gmlm),  "2")
        error.pred.hopca <- norm(P.true - proj(B.hopca), "2")
        error.pred.pca   <- norm(P.true - proj(B.pca),   "2")
        error.pred.tsir  <- norm(P.true - proj(B.tsir),  "2")
        # format estimation/prediction errors and write to file and console
        line <- paste0(Map(get, header), collapse = ",")
        cat(line, "\n", sep = "", file = file, append = TRUE)
        # report progress
        cat(sprintf("sample size: %d/%d - rep: %d/%d\n",
            which(n == sample.sizes), length(sample.sizes), rep, reps))
    }
 }
--- a/sim/normal_2.R
+++ b/sim/normal_2.R
@ -1,96 +0,0 @@
 library(tensorPredictors)
 set.seed(271828183, "Mersenne-Twister", "Inversion", "Rejection")
 ### simulation configuration
 reps <- 100                     # number of simulation replications
 n <- 100                        # sample sizes `n`
 N <- 2000                       # validation set size
 p <- c(2, 3, 5)                 # preditor dimensions
 q <- c(1, 2, 3)                 # functions of y dimensions (response dimensions)
 # initial consistency checks
 stopifnot(exprs = {
    length(p) == length(q)
 })
 # setup model parameters
 alphas <- Map(matrix, Map(rnorm, p * q), p)                     # reduction matrices
 Omegas <- Map(function(pj) 0.5^abs(outer(1:pj, 1:pj, `-`)), p)  # mode scatter
 eta1 <- 0                                                       # intercept
 # data sampling routine
 sample.data <- function(n, eta1, alphas, Omegas, sample.axis = length(alphas) + 1L) {
    r <- length(alphas)                                         # tensor order
    # generate response (sample axis is last axis)
    y <- sample.int(prod(q), n, replace = TRUE)                 # uniform samples
    Fy <- array(outer(seq_len(prod(q)), y, `==`), dim = c(q, n))
    Fy <- Fy - c(rowMeans(Fy, dims = r))
    # sample predictors as X | Y = y (sample axis is last axis)
    Deltas <- Map(solve, Omegas)                                # normal covariances
    mu_y <- mlm(mlm(Fy, alphas) + c(eta1), Deltas)              # conditional mean
    X <- mu_y + rtensornorm(n, 0, Deltas, r + 1L)               # responses X
    # permute axis to requested get the sample axis
    if (sample.axis != r + 1L) {
        perm <- integer(r + 1L)
        perm[sample.axis] <- r + 1L
        perm[-sample.axis] <- seq_len(r)
        X <- aperm(X, perm)
        Fy <- aperm(Fy, perm)
    }
    list(X = X, Fy = Fy, y = y, sample.axis = sample.axis)
 }
 ### sample (training) data
 c(X, Fy, y = y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas)
 ### Fit data using GMLM with logging
 # logger to log iterative change in the estimation process of GMLM
 # log <- data.frame()
 log.likelihood <- tensorPredictors:::make.gmlm.family("normal")$log.likelihood
 B.true <- Reduce(`%x%`, rev(alphas))
 logger <- function(iter, eta1.est, alphas.est, Omegas.est) {
    B.est <- Reduce(`%x%`, rev(alphas.est))
    err.alphas <- mapply(dist.subspace, alphas, alphas.est, MoreArgs = list(normalize = TRUE))
    err.Omegas <- mapply(norm, Map(`-`, Omegas, Omegas.est), MoreArgs = list(type = "F"))
    if (iter > 1) { cat("\033[9A") }
    cat(sprintf("\n\033[2mIter:     loss -     dist\n\033[0m%4d: %8.3f - %8.3f",
            iter,
            log.likelihood(X, Fy, eta1.est, alphas.est, Omegas.est),
            dist.subspace(B.true, B.est, normalize = TRUE)
        ),
        "\033[2mMSE eta1\033[0m",
        mean((eta1 - eta1.est)^2),
        "\033[2msubspace distances of alphas\033[0m",
        do.call(paste, Map(sprintf, err.alphas, MoreArgs = list(fmt = "%8.3f"))),
        "\033[2mFrob. norm of Omega differences\033[0m",
        do.call(paste, Map(sprintf, err.Omegas, MoreArgs = list(fmt = "%8.3f"))),
        sep = "\n      "
    )
 }
 # now call the GMLM fitting routine with performance profiling
 system.time(    # profvis::profvis(
    fit.gmlm <- GMLM.default(
        X, Fy, sample.axis = sample.axis, max.iter = 10000L, logger = logger
    )
 )
 # Iter:     loss -     dist
 # 7190:   50.583 -    0.057
 #       MSE eta1
 #       0.02694658
 #       subspace distances of alphas
 #          0.043    0.035    0.034
 #       Frob. norm of Omega differences
 #          0.815    1.777   12.756
 #       time user  system  elapsed
 #        342.279  555.630  183.653
--- a/sim/plots.R
+++ b/sim/plots.R
@ -1,100 +0,0 @@
 if (!endsWith(getwd(), "/sim")) {
    setwd("sim")
 }
 sim.plot <- function(file.prefix, date, to.file = FALSE) {
    # file.prefix <- "sim-ising-small"
    # # file.prefix <- "sim-normal"
    # date <- "20221012"  # yyyymmdd, to match all "[0-9]{6}"
    time <- "[0-9]{4}"  # HHMM,     to match all "[0-9]{4}"
    colors <- c(
        PCA   = "#a11414",
        HOPCA = "#2a62b6",
        TSIR  = "#9313b9",
        GMLM  = "#247407"
    )
    line.width <- 1.75
    margins <- c(5.1, 4.1, 4.1, 0.1)
    sim <- Reduce(rbind, Map(function(path) {
        df <- read.csv(path)
        df$n <- as.integer(tail(head(strsplit(path, "[-.]")[[1]], -1), 1))
        df
    }, list.files(".", pattern = paste0(
        "^", file.prefix, "-", date, "T", time, "-[0-9]+[.]csv$", collapse = ""
    ))))
    stats <- aggregate(. ~ n, sim, mean)
    q75 <- aggregate(. ~ n, sim, function(x) quantile(x, 0.75))
    q25 <- aggregate(. ~ n, sim, function(x) quantile(x, 0.25))
    if (to.file) {
        width <- 720
        png(paste(file.prefix, "-", date, ".png", sep = ""),
            width = width, height = round((6 / 11) * width, -1),
            pointsize = 12)
    }
    layout(mat = matrix(c(
        1, 2,
        3, 3
    ), 2, 2, byrow = TRUE),
        widths = c(1, 1),
        heights = c(12, 1), respect = FALSE)
    # layout.show(3)
    with(stats, {
        par(mar = margins)
        plot(range(n), 0:1,
            type = "n", bty = "n", main = "Subspace Distance",
            xlab = "Sample Size", ylab = "Error")
        lines(n, dist.subspace.gmlm,  col = colors["GMLM"],  lwd = line.width)
        lines(n, dist.subspace.hopca, col = colors["HOPCA"], lwd = line.width)
        lines(n, dist.subspace.pca,   col = colors["PCA"],   lwd = line.width)
        lines(n, dist.subspace.tsir,  col = colors["TSIR"],  lwd = line.width)
        xn <- c(q75$n, rev(q25$n))
        polygon(x = xn, y = c(q75$dist.subspace.gmlm, rev(q25$dist.subspace.gmlm)),
            col = adjustcolor(colors["GMLM"], alpha.f = 0.3), border = NA)
        polygon(x = xn, y = c(q75$dist.subspace.hopca, rev(q25$dist.subspace.hopca)),
            col = adjustcolor(colors["HOPCA"], alpha.f = 0.3), border = NA)
        polygon(x = xn, y = c(q75$dist.subspace.pca, rev(q25$dist.subspace.pca)),
            col = adjustcolor(colors["PCA"], alpha.f = 0.3), border = NA)
        polygon(x = xn, y = c(q75$dist.subspace.tsir, rev(q25$dist.subspace.tsir)),
            col = adjustcolor(colors["TSIR"], alpha.f = 0.3), border = NA)
    })
    with(stats, {
        par(mar = margins)
        plot(range(n), 0:1,
            type = "n", bty = "n", main = "RMSE (Prediction Error)",
            xlab = "Sample Size", ylab = "Error")
        xn <- c(q75$n, rev(q25$n))
        polygon(x = xn, y = c(q75$error.pred.gmlm, rev(q25$error.pred.gmlm)),
            col = adjustcolor(colors["GMLM"], alpha.f = 0.3), border = NA)
        polygon(x = xn, y = c(q75$error.pred.hopca, rev(q25$error.pred.hopca)),
            col = adjustcolor(colors["HOPCA"], alpha.f = 0.3), border = NA)
        polygon(x = xn, y = c(q75$error.pred.pca, rev(q25$error.pred.pca)),
            col = adjustcolor(colors["PCA"], alpha.f = 0.3), border = NA)
        polygon(x = xn, y = c(q75$error.pred.tsir, rev(q25$error.pred.tsir)),
            col = adjustcolor(colors["TSIR"], alpha.f = 0.3), border = NA)
        lines(n, error.pred.gmlm,  col = colors["GMLM"],  lwd = line.width)
        lines(n, error.pred.hopca, col = colors["HOPCA"], lwd = line.width)
        lines(n, error.pred.pca,   col = colors["PCA"],   lwd = line.width)
        lines(n, error.pred.tsir,  col = colors["TSIR"],  lwd = line.width)
    })
    par(mar = rep(0, 4))
    plot(1:2, 1:2, type = "n", bty = "n", axes = FALSE, xlab = "", ylab = "")
    legend("center", legend = names(colors), col = colors, lwd = line.width,
        lty = 1, bty = "n", horiz = TRUE)
    if (to.file) {
        dev.off()
    }
 }
 sim.plot("sim-ising-small", "20221012", TRUE)
 sim.plot("sim-normal", "20221012", TRUE)
--- a/tensorPredictors/NAMESPACE
+++ b/tensorPredictors/NAMESPACE
@ -5,6 +5,11 @@ export("%x_1%")
 export("%x_2%")
 export("%x_3%")
 export("%x_4%")
 export(.projBand)
 export(.projPSD)
 export(.projRank)
 export(.projSymBand)
 export(.projSymRank)
 export(CISE)
 export(D)
 export(D.pinv)
@ -17,6 +22,8 @@ export(ICU)
 export(K)
 export(K.perm)
 export(LSIR)
 export(La.det)
 export(La.solve)
 export(N)
 export(NAGD)
 export(PCA2d)
@ -27,18 +34,26 @@ export(S)
 export(TSIR)
 export(approx.kronecker)
 export(colKronecker)
 export(decompose.kronecker)
 export(dist.kron.norm)
 export(dist.kron.tr)
 export(dist.projection)
 export(dist.subspace)
 export(exprs.all.equal)
 export(gmlm_ising)
 export(gmlm_tensor_normal)
 export(icu_tensor_normal)
 export(ising_m2)
 export(ising_sample)
 export(kpir.approx)
 export(kpir.base)
 export(kpir.ls)
 export(kpir.mle)
 export(kpir.momentum)
 export(kpir.new)
 export(kronperm)
 export(mat)
 export(matProj)
 export(matpow)
 export(matrixImage)
 export(mcov)
@ -49,10 +64,19 @@ export(mtvk)
 export(num.deriv)
 export(num.deriv.function)
 export(num.deriv2)
 export(pinv)
 export(projDiag)
 export(projStiefel)
 export(projSym)
 export(reduce)
 export(riccati)
 export(rowKronecker)
 export(rtensornorm)
 export(slice.expr)
 export(slice.select)
 export(sylvester)
 export(tensor_predictor)
 export(tsym)
 export(ttm)
 export(ttt)
 export(vech)
--- a/tensorPredictors/R/GMLM.R
+++ b/tensorPredictors/R/GMLM.R
@ -36,12 +36,12 @@ GMLM.default <- function(X, Fy, sample.axis = 1L,
    # optimize likelihood using Nesterov Excelerated Gradient Descent
    params.fit <- NAGD(
        fun.loss = function(params) {
-            # scaled negative lig-likelihood
+            # scaled negative log-likelihood
            #                            eta1         alphas       Omegas
            family$log.likelihood(X, Fy, params[[1]], params[[2]], params[[3]])
        },
        fun.grad = function(params) {
-            # gradient of the scaled negative lig-likelihood
+            # gradient of the scaled negative log-likelihood
            #                  eta1         alphas       Omegas
            family$grad(X, Fy, params[[1]], params[[2]], params[[3]])
        },
--- a/tensorPredictors/R/HOPCA.R
+++ b/tensorPredictors/R/HOPCA.R
@ -10,7 +10,7 @@
 #'  `i`th axis excluding the sample axis.
 #'
 #' @export
-HOPCA <- function(X, npc = dim(X)[-sample.axis], sample.axis = 1L) {
+HOPCA <- function(X, npc = dim(X)[-sample.axis], sample.axis = 1L, use.C = FALSE) {
    # observation index numbers (all axis except the sample axis)
    modes <- seq_along(dim(X))[-sample.axis]
--- a/tensorPredictors/R/HOPIR.R
+++ b/tensorPredictors/R/HOPIR.R
@ -76,8 +76,8 @@ HOPIR.ls <- function(X, Fy, alphas, sample.axis, algorithm, ..., logger) {
    list(alphas = alphas, Deltas = fun.Deltas(alphas))
 }
-#' HPOIR subroutine for the MLE estimation given proprocessed data and initial
+#' HPOIR subroutine for the MLE estimation given preprocessed data and initial
-#' alphas, Deltas paramters
+#' alpha, Delta parameters
 #'
 #' @keywords internal
 HOPIR.mle <- function(X, Fy, alphas, Deltas, sample.axis, algorithm, ..., logger) {
--- a/tensorPredictors/R/HOSVD.R
+++ b/tensorPredictors/R/HOSVD.R
@ -7,7 +7,7 @@
 #'
 #' @export
 HOSVD <- function(X, nu = NULL, eps = 1e-07) {
-    if (!missing(nu)) {
+    if (!is.null(nu)) {
        stopifnot(all(nu <= dim(X)))
    }
@ -21,3 +21,5 @@ HOSVD <- function(X, nu = NULL, eps = 1e-07) {
    list(C = C, Us = Us)
 }
 SVD <- function(A) .Call("C_svd", A)
--- a/tensorPredictors/R/NAGD.R
+++ b/tensorPredictors/R/NAGD.R
@ -60,7 +60,7 @@ NAGD <- function(fun.loss, fun.grad, params, more.params = NULL,
        stop("Initial loss is non-finite (", loss, ")")
    }
    # initialize "previous" iterate parameters
-    params.last <- params
+    prev.params <- params
    # Gradient Descent Loop
    line.search.tag <- FALSE       # init line search state as "failure"
@ -73,9 +73,9 @@ NAGD <- function(fun.loss, fun.grad, params, more.params = NULL,
        }
        # Extrapolation form previous position (momentum)
-        # `params.moment <- (1 + moment) * params - moment * param.last`
+        # `params.moment <- (1 + moment) * params - moment * prev.params`
        moment <- (m[1] - 1) / m[2]
-        params.moment <- fun.lincomb(1 + moment, params, -moment, params.last)
+        params.moment <- fun.lincomb(1 + moment, params, -moment, prev.params)
        # Compute gradient at extrapolated position
        if (missing(more.params)) {
@ -114,7 +114,7 @@ NAGD <- function(fun.loss, fun.grad, params, more.params = NULL,
        }
        # keep track of previous parameters
-        params.last <- params
+        prev.params <- params
        # check line search outcome
        if (is.na(line.search.tag)) {
--- a/tensorPredictors/R/TSIR.R
+++ b/tensorPredictors/R/TSIR.R
@ -4,11 +4,21 @@
 TSIR <- function(X, y, d, sample.axis = 1L,
    nr.slices = 10L,    # default slices, ignored if y is a factor or integer
    max.iter = 50L,
-    eps = sqrt(.Machine$double.eps)
+    eps = sqrt(.Machine$double.eps),
    slice.method = c("cut", "ecdf")   # ignored if y is a factor or integer
 ) {
    if (!(is.factor(y) || is.integer(y))) {
-        y <- cut(y, nr.slices)
+        slice.method <- match.arg(slice.method)
        if (slice.method == "ecdf") {
            y <- cut(ecdf(y)(y), nr.slices)
        } else {
            y <- cut(y, nr.slices)
            # ensure there are no empty slices
            if (any(table(y) == 0)) {
                y <- as.factor(as.integer(y))
            }
        }
    }
    stopifnot(exprs = {
--- a/tensorPredictors/R/approx_kronecker.R
+++ b/tensorPredictors/R/approx_kronecker.R
@ -1,4 +1,4 @@
-#' Approximates kronecker product decomposition.
+#' Approximates Kronecker Product decomposition.
 #'
 #' Approximates the matrices `A` and `B` such that
 #'      C = A %x% B
@ -21,7 +21,7 @@
 #'          123 (2000) 85-100 (pp. 93-95)
 #'
 #' @export
-approx.kronecker <- function(C, dimA, dimB) {
+approx.kronecker <- function(C, dimA, dimB = dim(C) / dimA) {
    dim(C) <- c(dimB[1L], dimA[1L], dimB[2L], dimA[2L])
    R <- aperm(C, c(2L, 4L, 1L, 3L))
@ -33,8 +33,115 @@ approx.kronecker <- function(C, dimA, dimB) {
        svdR <- svd(R, 1L, 1L)
    }
-    return(list(
+    list(
        A = array(sqrt(svdR$d[1]) * svdR$u, dimA),
        B = array(sqrt(svdR$d[1]) * svdR$v, dimB)
    )
 }
 #' Kronecker Product Decomposition.
 #'
 #' Computes the components summation components `A_i`, `B_i` of a sum of
 #' Kronecker products
 #'      C = sum_i A_i %x% B_i
 #' with the minimal estimated number of summands.
 #'
 #' @param C desired kronecker product result.
 #' @param dimA length 2 vector of dimensions of \code{A}.
 #' @param dimB length 2 vector of dimensions of \code{B}.
 #' @param tol tolerance of singular values of \code{C} to determin the number of
 #'  needed summands.
 #'
 #' @return list of lenghth with estimated number of summation components, each
 #'  entry consists of a list with named entries \code{"A"} and \code{"B"} of
 #'  dimensions \code{dimA} and \code{dimB}.
 #'
 #' @examples
 #' As <- replicate(3, matrix(rnorm(2 * 7), 2), simplify = FALSE)
 #' Bs <- replicate(3, matrix(rnorm(5 * 3), 5), simplify = FALSE)
 #' C <- Reduce(`+`, Map(kronecker, As, Bs))
 #'
 #' decomposition <- decompose.kronecker(C, c(2, 7))
 #'
 #' reconstruction <- Reduce(`+`, Map(function(summand) {
 #'     kronecker(summand[[1]], summand[[2]])
 #' }, decomposition), array(0, dim(C)))
 #'
 #' stopifnot(all.equal(C, reconstruction))
 #'
 #' @export
 decompose.kronecker <- function(C, dimA, dimB = dim(C) / dimA, tol = 1e-7) {
    # convert the equivalent outer product
    dim(C) <- c(dimB[1L], dimA[1L], dimB[2L], dimA[2L])
    C <- aperm(C, c(2L, 4L, 1L, 3L), resize = FALSE)
    dim(C) <- c(prod(dimA), prod(dimB))
    # Singular Valued Decomposition
    svdC <- La.svd(C)
    # Sum of Kronecker Components
    lapply(seq_len(sum(svdC$d > tol)), function(i) list(
        A = matrix(svdC$d[i] * svdC$u[, i], dimA),
        B = matrix(svdC$vt[i, ], dimB)
    ))
 }
 ### Given that C is a Kronecker product this is a fast method but a bit
 ### unreliable in full generality.
 # decompose.kronecker <- function(C, dimA, dimB = dim(C) / dimA) {
 #     dim(C) <- c(dimB[1L], dimA[1L], dimB[2L], dimA[2L])
 #     R <- aperm(C, c(2L, 4L, 1L, 3L))
 #     dim(R) <- c(prod(dimA), prod(dimB))
 #     max.index <- which.max(abs(R))
 #     row.index <- ((max.index - 1L) %% nrow(R)) + 1L
 #     col.index <- ((max.index - 1L) %/% nrow(R)) + 1L
 #     max.elem <- if (abs(R[max.index]) > .Machine$double.eps) R[max.index] else 1
 #     list(
 #         A = array(R[, col.index], dimA),
 #         B = array(R[row.index, ] / max.elem, dimB)
 #     )
 # }
 # kron <- function(A, B) {
 #     perm <- as.vector(t(matrix(seq_len(2 * length(dim(A))), ncol = 2)[, 2:1]))
 #     K <- aperm(outer(A, B), perm)
 #     dim(K) <- dim(A) * dim(B)
 #     K
 # }
 # kronperm <- function(A) {
 #     # force A to have even number of dimensions
 #     dim(A) <- c(dim(A), rep(1L, length(dim(A)) %% 2L))
 #     # compute axis permutation
 #     perm <- as.vector(t(matrix(seq_along(dim(A)), ncol = 2)[, 2:1]))
 #     # permute elements of A
 #     K <- aperm(A, perm, resize = FALSE)
 #     # collapse/set dimensions
 #     dim(K) <- head(dim(A), length(dim(A)) / 2) * tail(dim(A), length(dim(A)) / 2)
 #     K
 # }
 # p <- c(2, 3, 5)
 # q <- c(3, 4, 7)
 # A <- array(rnorm(prod(p)), p)
 # B <- array(rnorm(prod(q)), q)
 # all.equal(kronperm(outer(A, B)), kronecker(A, B))
 # all.equal(kron(A, B), kronecker(A, B))
 # dA <- c(2, 3, 5)
 # dB <- c(3, 4, 7)
 # A <- array(rnorm(prod(dA)), dA)
 # B <- array(rnorm(prod(dB)), dB)
 # X <- outer(A, B)
 # r <- length(dim(X)) / 2
 # I <- t(do.call(expand.grid, Map(seq_len, head(dim(X), r) * tail(dim(X), r))))
 # K <- apply(rbind(
 #     (I - 1) %/% tail(dim(X), r) + 1,
 #     (I - 1) %% tail(dim(X), r) + 1
 # ), 2, function(i) X[sum(c(1, cumprod(head(dim(X), -1))) * (i - 1)) + 1])
 # dim(K) <- head(dim(X), r) * tail(dim(X), r)
 # all.equal(kronecker(A, B), K)
--- a/tensorPredictors/R/dist_kron_norm.R
+++ b/tensorPredictors/R/dist_kron_norm.R
@ -3,7 +3,7 @@
 #'  \|(A1 %x% ... %x% Ar - B1 %x% ... %x% Br\|_F
 #'
 #' This is equivalent to the expression
-#' \code{norm(Reduce(kronecker, A) - Reduce(kronecker, B), "F")} but faster.
+#' \code{norm(Reduce(kronecker, As) - Reduce(kronecker, Bs), "F")}, but faster.
 #'
 #' @examples
 #' A1 <- matrix(rnorm(5^2), 5)
@ -16,21 +16,21 @@
 #' ))
 #'
 #' p <- c(3, 7, 5, 2)
-#' A <- Map(function(pj) matrix(rnorm(pj^2), pj), p)
+#' As <- Map(function(pj) matrix(rnorm(pj^2), pj), p)
-#' B <- Map(function(pj) matrix(rnorm(pj^2), pj), p)
+#' Bs <- Map(function(pj) matrix(rnorm(pj^2), pj), p)
 #' stopifnot(all.equal(
-#'     dist.kron.norm(A, B),
+#'     dist.kron.norm(As, Bs),
-#'     norm(Reduce(kronecker, A) - Reduce(kronecker, B), "F")
+#'     norm(Reduce(kronecker, As) - Reduce(kronecker, Bs), "F")
 #' ))
 #'
 #' @export
-dist.kron.norm <- function(A, B, eps = .Machine$double.eps) {
+dist.kron.norm <- function(As, Bs, eps = .Machine$double.eps) {
-    if (is.list(A) && is.list(B)) {
+    if (is.list(As) && is.list(Bs)) {
-        norm2 <- prod(unlist(Map(function(x) sum(x^2), A))) -
+        norm2 <- prod(unlist(Map(function(x) sum(x^2), As))) -
-                2 * prod(unlist(Map(function(a, b) sum(a * b), A, B))) +
+                2 * prod(unlist(Map(function(a, b) sum(a * b), As, Bs))) +
-                    prod(unlist(Map(function(x) sum(x^2), B)))
+                    prod(unlist(Map(function(x) sum(x^2), Bs)))
-    } else if (is.matrix(A) && is.matrix(B)) {
+    } else if (is.matrix(As) && is.matrix(Bs)) {
-        norm2 <- sum((A - B)^2)
+        norm2 <- sum((As - Bs)^2)
    } else {
        stop("Unexpected input")
    }
--- a/tensorPredictors/R/dist_projection.R
+++ b/tensorPredictors/R/dist_projection.R
@ -1,4 +1,4 @@
-#' Porjection Distance of two matrices
+#' Projection Distance of two matrices
 #'
 #' Defined as sine of the maximum principal angle between the column spaces
 #' of the matrices
--- a/tensorPredictors/R/gmlm_ising.R
+++ b/tensorPredictors/R/gmlm_ising.R
@ -0,0 +1,138 @@
 #' Specialized version of the GMLM for the Ising model (inverse Ising problem)
 #'
 #' @export
 gmlm_ising <- function(X, F, sample.axis = length(dim(X)),
    max.iter = 1000L,
    eps = sqrt(.Machine$double.eps),
    step.size = function(iter) 1e-2 * (1000 / (iter + 1000)),
    zig.zag.threashold = 5L,
    patience = 5L,
    nr.slices = 10L,                    # only for univariate `F(y) = y`
    slice.method = c("cut", "ecdf"),    # only for univariate `F(y) = y` and `y` is a factor or integer
    logger = function(...) { }
 ) {
    # # Special case for univariate response `vec F(y) = y`
    # # Due to high computational costs we use slicing
    # if (length(F) == prod(dim(F))) {
    #     y <- as.vector(F)
    #     if (!(is.factor(y) || is.integer(y))) {
    #         slice.method <- match.arg(slice.method)
    #         if (slice.method == "ecdf") {
    #             y <- cut(ecdf(y)(y), nr.slices)
    #         } else {
    #             y <- cut(y, nr.slices)
    #         }
    #     }
    #     slices <- split(seq_len(sample.size), y, drop = TRUE)
    # } else {
    #     slices <- seq_len(sample.size)
    # }
    dimX <- head(dim(X), -1)
    dimF <- head(dim(F), -1)
    sample.axis <- length(dim(X))
    modes <- seq_len(length(dim(X)) - 1)
    sample.size <- tail(dim(X), 1)
    betas <- Map(matrix, Map(rnorm, dimX * dimF), dimX)
    Omegas <- Map(diag, dimX)
    grad2_betas <- Map(array, 0, Map(dim, betas))
    grad2_Omegas <- Map(array, 0, Map(dim, Omegas))
    # Keep track of the last loss to accumulate loss difference sign changes
    # indicating optimization instabilities as a sign to stop
    last_loss <- Inf
    accum_sign <- 1
    # non improving iteration counter
    non_improving <- 0L
    # technical access points to dynamicaly access a multi-dimensional array
    # with its last index
    `X[..., i]` <- slice.expr(X, sample.axis, index = i)
    `F[..., i]` <- slice.expr(F, sample.axis, index = i, drop = FALSE)
    # the next expression if accessing the precomputed `mlm(F, betas)`
    `BF[..., i]` <- slice.expr(BF, sample.axis, nr.axis = sample.axis, drop = FALSE) # BF[..., i]
    # Iterate till a break condition triggers or till max. nr. of iterations
    for (iter in seq_len(max.iter)) {
        grad_betas <- Map(matrix, 0, dimX, dimF)
        Omega <- Reduce(kronecker, rev(Omegas))
        R2 <- array(0, dim = c(dimX, dimX))
        # negative log-likelihood
        loss <- 0
        BF <- mlm(F, betas)
        for (i in seq_len(sample.size)) {
            params_i <- Omega + diag(as.vector(eval(`BF[..., i]`)))
            m2_i <- ising_m2(params_i)
            # accumulate loss
            x_i <- as.vector(eval(`X[..., i]`))
            loss <- loss - (sum(x_i * (params_i %*% x_i)) + log(attr(m2_i, "prob_0")))
            R2_i <- tcrossprod(x_i) - m2_i
            R1_i <- diag(R2_i)
            dim(R1_i) <- dimX
            for (j in modes) {
                grad_betas[[j]] <- grad_betas[[j]] +
                    mcrossprod(
                        R1_i, mlm(eval(`F[..., i]`), betas[-j], modes[-j]), j,
                        dimB = ifelse(j != modes, dimX, dimF)
                    )
            }
            R2 <- R2 + as.vector(R2_i)
        }
        grad_Omegas <- Map(function(j) {
            grad <- mlm(kronperm(R2), Map(as.vector, Omegas[-j]), modes[-j], transposed = TRUE)
            dim(grad) <- dim(Omegas[[j]])
            grad
        }, modes)
        # update and accumulate alternating loss
        accum_sign <- sign(last_loss - loss) - accum_sign
        # check if accumulated alternating signs exceed stopping threshold
        if (abs(accum_sign) > zig.zag.threashold) { break }
        # increment non improving counter if thats the case
        if (!(loss < last_loss)) {
            non_improving <- non_improving + 1L
        } else {
            non_improving <- 0L
        }
        if (non_improving > patience) { break }
        # store current loss for the next iteration
        last_loss <- loss
        # Accumulate root mean squared gradiends
        grad2_betas  <- Map(function(g2, g) 0.9 * g2 + 0.1 * (g * g),
            grad2_betas, grad_betas)
        grad2_Omegas <- Map(function(g2, g) 0.9 * g2 + 0.1 * (g * g),
            grad2_Omegas, grad_Omegas)
        # logging (before parameter update)
        logger(iter, loss, betas, Omegas, grad_betas, grad_Omegas)
        # gradualy decrease the step size
        step <- if (is.function(step.size)) step.size(iter) else step.size
        # Update Parameters
        betas <- Map(function(beta, grad, m2) {
            beta + (step / (sqrt(m2) + eps)) * grad
        }, betas, grad_betas, grad2_betas)
        Omegas <- Map(function(Omega, grad, m2) {
            Omega + (step / (sqrt(m2) + eps)) * grad
        }, Omegas, grad_Omegas, grad2_Omegas)
    }
    list(betas = betas, Omegas = Omegas)
 }
--- a/tensorPredictors/R/gmlm_tensor_normal.R
+++ b/tensorPredictors/R/gmlm_tensor_normal.R
@ -0,0 +1,357 @@
 # p <- 5
 # A <- matrix(rnorm(p^2), p)
 # mat.proj("TriDiag", p)(A)
 # mat.proj("SymTriDiag", p)(A)
 # A
 # (AA <- mat.proj("PSD", p)(A))
 # mat.proj("PSD", p)(AA)
 # p <- 5
 # A <- matrix(rnorm(p^2), p)
 # projection <- function(T2) {
 #     P <- pinv(T2) %*% T2
 #     function(A) {
 #         matrix(P %*% as.vector(A), nrow(A))
 #     }
 # }
 # # All equal diagonal matrix, A = a * I_p
 # T2 <- t(as.vector(diag(p)))
 # proj <- projection(T2)
 # print.table( diag(mean(diag(A)), nrow(A), ncol(A))          , zero.print = ".")
 # print.table( matrix((pinv(T2) %*% T2) %*% as.vector(A), p)  , zero.print = ".")
 # print.table( proj(A)                                        , zero.print = ".")
 # # Diagonal matrix, A = diag(a_1, ..., a_p)
 # T2 <- matrix(seq_len(p^2), p)
 # T2 <- outer(diag(T2), as.vector(T2), `==`)
 # storage.mode(T2) <- "double"
 # proj <- projection(T2)
 # print.table( T2 , zero.print = ".")
 # print.table( diag(diag(A))                                  , zero.print = ".")
 # print.table( proj(A)                                        , zero.print = ".")
 # # Tri-Diagonal Matrix
 # T2 <- matrix(seq_len(p^2), p)
 # T2 <- outer(T2[abs(row(A) - col(A)) <= 1], as.vector(T2), `==`)
 # storage.mode(T2) <- "double"
 # triDiag.mask <- (abs(row(A) - col(A)) <= 1)
 # storage.mode(triDiag.mask) <- "double"
 # print.table( T2 , zero.print = ".")
 # print.table( triDiag.mask , zero.print = ".")
 # print.table( A * triDiag.mask                               , zero.print = ".")
 # print.table( matrix((pinv(T2) %*% T2) %*% as.vector(A), p)  , zero.print = ".")
 # # All equal main and off diagonals
 # T2 <- Reduce(rbind, Map(function(i) {
 #     as.double(row(A) == i + col(A))
 # }, (1 - p):(p - 1)))
 # print.table( T2 , zero.print = ".")
 # print.table( matrix((pinv(T2) %*% T2) %*% as.vector(A), p)  , zero.print = ".")
 # # Symmetric all equal main and off diagonals
 # T2 <- Reduce(rbind, Map(function(i) {
 #     as.double(abs(row(A) - col(A)) == i)
 # }, 0:(p - 1)))
 # print.table( T2 , zero.print = ".")
 # print.table( matrix((pinv(T2) %*% T2) %*% as.vector(A), p)  , zero.print = ".")
 # # Symetric Matrix
 # index <- matrix(seq_len(p^2), p)
 # T2 <- Reduce(rbind, Map(function(i) {
 #     e_i <- index == i
 #     as.double(e_i | t(e_i))
 # }, index[lower.tri(index, diag = TRUE)]))
 # proj <- projection(T2)
 # print.table( T2 , zero.print = ".")
 # print.table( 0.5 * (A + t(A))   , zero.print = ".")
 # print.table( proj(A)            , zero.print = ".")
 # proj(solve(A))
 # solve(proj(A))
 #' Specialized version of GMLM for the tensor normal model
 #'
 #' The underlying algorithm is an ``iterative (block) coordinate descent'' method
 #'
 #' @export
 gmlm_tensor_normal <- function(X, F, sample.axis = length(dim(X)),
    max.iter = 100L, proj.betas = NULL, proj.Omegas = NULL, logger = NULL,       # TODO: proj.betas NOT USED!!!
    cond.threshold = 25, eps = 1e-6
 ) {
    # rearrange `X`, `F` such that the last axis enumerates observations
    if (!missing(sample.axis)) {
        axis.perm <- c(seq_along(dim(X))[-sample.axis], sample.axis)
        X <- aperm(X, axis.perm)
        F <- aperm(F, axis.perm)
        sample.axis <- length(dim(X))
    }
    # Get problem dimensions (observation dimensions)
    dimX <- head(dim(X), -1)
    dimF <- head(dim(F), -1)
    modes <- seq_along(dimX)
    # Ensure the Omega and beta projections lists are lists
    if (!is.list(proj.Omegas)) {
        proj.Omegas <- rep(NULL, length(modes))
    }
    if (!is.list(proj.betas)) {
        proj.betas <- rep(NULL, length(modes))
    }
    ### Phase 1: Computing initial values
    meanX <- rowMeans(X, dims = length(dimX))
    meanF <- rowMeans(F, dims = length(dimF))
    # center X and F
    X <- X - as.vector(meanX)
    F <- F - as.vector(meanF)
    # initialize Omega estimates as mode-wise, unconditional covariance estimates
    Sigmas <- Map(diag, dimX)
    Omegas <- Map(diag, dimX)
    # Per mode covariance directions
    # Note: (the directions are transposed!)
    dirsX <- Map(function(Sigma) {
        SVD <- La.svd(Sigma, nu = 0)
        sqrt(SVD$d) * SVD$vt
    }, mcov(X, sample.axis, center = FALSE))
    dirsF <- Map(function(Sigma) {
        SVD <- La.svd(Sigma, nu = 0)
        sqrt(SVD$d) * SVD$vt
    }, mcov(F, sample.axis, center = FALSE))
    # initialization of betas ``covariance direction mappings``
    betas <- Map(function(dX, dF) {
        s <- min(ncol(dX), nrow(dF))
        crossprod(dX[1:s, , drop = FALSE], dF[1:s, , drop = FALSE])
    }, dirsX, dirsF)
    # Residuals
    R <- X - mlm(F, Map(`%*%`, Sigmas, betas))
    # Initial value of the log-likelihood (scaled and constants dropped)
    loss <- mean(R * mlm(R, Omegas)) - sum(log(mapply(det, Omegas)) / dimX)
    # invoke the logger
    if (is.function(logger)) do.call(logger, list(
        iter = 0L, betas = betas, Omegas = Omegas,
        resid = R, loss = loss
    ))
    ### Phase 2: (Block) Coordinate Descent
    for (iter in seq_len(max.iter)) {
        # update every beta (in random order)
        for (j in sample.int(length(betas))) {
            FxB_j <- mlm(F, betas[-j], modes[-j])
            FxSB_j <- mlm(FxB_j, Sigmas[-j], modes[-j])
            betas[[j]] <- Omegas[[j]] %*% t(solve(mcrossprod(FxSB_j, FxB_j, j), mcrossprod(FxB_j, X, j)))
            # Project `betas` onto their manifold
            if (is.function(proj_j <- proj.betas[[j]])) {
                betas[[j]] <- proj_j(betas[[j]])
            }
        }
        # Residuals
        R <- X - mlm(F, Map(`%*%`, Sigmas, betas))
        # Covariance Estimates (moment based, TODO: implement MLE estimate!)
        Sigmas <- mcov(R, sample.axis, center = FALSE)
        # Computing `Omega_j`s, the j'th mode presition matrices, in conjunction
        # with regularization of the j'th mode covariance estimate `Sigma_j`
        for (j in seq_along(Sigmas)) {
            # Compute min and max eigen values
            min_max <- range(eigen(Sigmas[[j]], TRUE, TRUE)$values)
            # The condition is approximately `kappa(Sigmas[[j]]) > cond.threshold`
            if (min_max[2] > cond.threshold * min_max[1]) {
                Sigmas[[j]] <- Sigmas[[j]] + diag(0.2 * min_max[2], nrow(Sigmas[[j]]))
            }
            # Compute (unconstraint but regularized) Omega_j as covariance inverse
            Omegas[[j]] <- solve(Sigmas[[j]])
            # Project Omega_j to the Omega_j's manifold
            if (is.function(proj_j <- proj.Omegas[[j]])) {
                Omegas[[j]] <- proj_j(Omegas[[j]])
                # Reverse computation of `Sigma_j` as inverse of `Omega_j`
                # Order of projecting Omega_j and then recomputing Sigma_j is importent
                Sigmas[[j]] <- solve(Omegas[[j]])
            }
        }
        # store last loss and compute new value
        loss.last <- loss
        loss <- mean(R * mlm(R, Omegas)) - sum(log(mapply(det, Omegas)) / dimX)
        # invoke the logger
        if (is.function(logger)) do.call(logger, list(
            iter = iter, betas = betas, Omegas = Omegas, resid = R, loss = loss
        ))
        # check the break consition
        if (abs(loss.last - loss) < eps * loss.last) {
            break
        }
    }
    list(eta1 = meanX, betas = betas, Omegas = Omegas)
 }
 # #### DEBUGGING
 # library(tensorPredictors)
 # # setup dimensions
 # n <- 1e3
 # p <- c(5, 7, 3)
 # q <- c(2, 5, 3)
 # max.iter <- 10L
 # # create "true" GLM parameters
 # eta1 <- array(rnorm(prod(p)), dim = p)
 # betas <- Map(matrix, Map(rnorm, p * q), p)
 # Omegas <- Map(function(p_j) {
 #     solve(0.5^abs(outer(seq_len(p_j), seq_len(p_j), `-`)))
 # }, p)
 # true.params <- list(eta1 = eta1, betas = betas, Omegas = Omegas)
 # # compute tensor normal parameters from GMLM parameters
 # Sigmas <- Map(solve, Omegas)
 # mu <- mlm(eta1, Sigmas)
 # # sample some test data
 # sample.axis <- length(p) + 1L
 # F <- array(rnorm(n * prod(q)), dim = c(q, n))
 # X <- mlm(F, Map(`%*%`, Sigmas, betas)) + rtensornorm(n, mu, Sigmas, sample.axis)
 # # setup a logging callback
 # format <- sprintf("iter: %%3d, %s, Omega: %%8.2f, resid: %%8.3f\n",
 #     paste0("beta.", seq_along(betas), ": %.3f", collapse = ", ")
 # )
 # hist <- data.frame(
 #     iter = seq(0, max.iter),
 #     dist.B = numeric(max.iter + 1),
 #     dist.Omega = numeric(max.iter + 1),
 #     resid = numeric(max.iter + 1)
 # )
 # logger <- function(iter, betas, Omegas, resid) {
 #     dist.B <- dist.kron.norm(true.params$betas, betas)
 #     dist.Omega <- dist.kron.norm(true.params$Omegas, Omegas)
 #     resid <- mean(resid^2)
 #     hist[iter + 1, -1] <<- c(dist.B = dist.B, dist.Omega = dist.Omega, resid = resid)
 #     cat(do.call(sprintf, c(
 #         list(format, iter),
 #         Map(dist.subspace, betas, true.params$betas),
 #         dist.Omega, resid
 #     )))
 # }
 # # run the model
 # est.params <- gmlm_tensor_normal(X, F, max.iter = max.iter, logger = logger)
 # # run model with Omegas in sub-manifolds of SPD matrices
 # est.params <- gmlm_tensor_normal(X, F, max.iter = max.iter, logger = logger,
 #     proj.Omegas = list(
 #         NULL,
 #         (function(nrow, ncol) {
 #             triDiag.mask <- (abs(.row(c(nrow, ncol)) - .col(c(nrow, ncol))) <= 1)
 #             storage.mode(triDiag.mask) <- "double"
 #             function(A) { A * triDiag.mask }
 #         })(p[2], p[2]),
 #         function(A) diag(diag(A))
 #     ))
 # # # Profile the fitting routine without logging
 # # profvis::profvis({
 # #     est.params <- gmlm_tensor_normal(X, F, max.iter = max.iter)
 # # })
 # with(hist, {
 #     plot(range(iter), range(hist[, -1]), type = "n", log = "y")
 #     lines(iter, resid, col = "red")
 #     lines(iter, dist.B, col = "green")
 #     lines(iter, dist.Omega, col = "blue")
 #     legend("topright", legend = c("resid", "dist.B", "dist.Omega"), col = c("red", "green", "blue"), lty = 1)
 # })
 # # par(mfrow = c(1, 2))
 # # matrixImage(Reduce(kronecker, rev(est.params$Omegas)))
 # # matrixImage(Reduce(kronecker, rev(true.params$Omegas)))
 # # # matrixImage(Reduce(kronecker, rev(est.params$Omegas)) - Reduce(kronecker, rev(true.params$Omegas)))
 # # par(mfrow = c(1, 2))
 # # matrixImage(Reduce(kronecker, rev(true.params$betas)), main = "True", col = hcl.colors(48, palette = "Blue-Red 3"))
 # # matrixImage(Reduce(kronecker, rev(est.params$betas)), main = "Est", col = hcl.colors(48, palette = "Blue-Red 3"))
 # # # unlist(Map(kappa, mcov(X)))
 # # # unlist(Map(kappa, Omegas))
 # # # unlist(Map(kappa, Sigmas))
 # # # max(unlist(Map(function(C) max(eigen(C)$values), mcov(X))))
 # # # min(unlist(Map(function(C) min(eigen(C)$values), mcov(X))))
 # # # Map(function(C) eigen(C)$values, mcov(X))
 # # # kappa(Reduce(kronecker, mcov(X)))
 # # kappa1.fun <- function(Omegas) Map(kappa, Omegas)
 # # kappa2.fun <- function(Omegas) Map(kappa, Omegas, exact = TRUE)
 # # eigen1.fun <- function(Omegas) {
 # #     Map(function(Omega) {
 # #         min_max <- range(eigen(Omega)$values)
 # #         min_max[2] / min_max[1]
 # #     }, Omegas)
 # # }
 # # eigen2.fun <- function(Omegas) {
 # #     Map(function(Omega) {
 # #         min_max <- range(eigen(Omega, TRUE, TRUE)$values)
 # #         min_max[2] / min_max[1]
 # #     }, Omegas)
 # # }
 # # microbenchmark::microbenchmark(
 # #     kappa1.fun(Omegas),
 # #     kappa2.fun(Omegas),
 # #     eigen1.fun(Omegas),
 # #     eigen2.fun(Omegas)
 # # )
--- a/tensorPredictors/R/ising_m2.R
+++ b/tensorPredictors/R/ising_m2.R
@ -0,0 +1,18 @@
 #' @export
 ising_m2 <- function(
    params, use_MC = NULL, nr_samples = 10000L,
    warmup = 15L, nr_threads = 1L
 ) {
    if (missing(use_MC)) {
        use_MC <- if (is.matrix(params)) 19 < nrow(params) else 190 < length(params)
    }
    m2 <- .Call("C_ising_m2", params, use_MC, nr_samples, warmup, nr_threads,
            PACKAGE = "tensorPredictors"
    )
    M2 <- vech.pinv(m2)
    attr(M2, "prob_0") <- attr(m2, "prob_0")
    M2
 }
--- a/tensorPredictors/R/ising_sample.R
+++ b/tensorPredictors/R/ising_sample.R
@ -0,0 +1,11 @@
 #' Sample from the Ising model
 #'
 #' @param nr number of samples
 #' @param params Ising model parameters (numeric vector of size `p (p + 1) / 2`
 #'  for `p` dimensional random binary vectors)
 #' @param warmup Monte-Carlo chain length before retreaving a sample
 #'
 #' @export
 ising_sample <- function(nr, params, warmup = 15L) {
    .Call("C_ising_sample", nr, params, warmup, PACKAGE = "tensorPredictors")
 }
--- a/tensorPredictors/R/make_gmlm_family.R
+++ b/tensorPredictors/R/make_gmlm_family.R
@ -124,6 +124,30 @@ make.gmlm.family <- function(name) {
                )
            }
            # conditional covariance of the sufficient statistic
            # Cov(t(X) | Y = y)
            # Note: fy is a single observation!
            cov.sufficient.stat <- function(fy, eta1, alphas, Omegas) {
                Deltas <- Map(solve, Omegas)
                E1 <- c(mlm(mlm(fy, alphas) + eta1, Deltas))
                # H11 = Cov(vec X | Y = y)
                H11 <- Reduce(`%x%`, rev(Deltas))
                # H21 = Cov(vec X %x% vec X, vec X | Y = y)
                H21 <- kronecker(E1, H11) + kronecker(H11, E1)
                # H22 = Cov(vec X %x% vec X | Y = y)
                H22 <- local({
                    e1e1 <- tcrossprod(E1, E1)
                    h22 <- outer(e1e1 + H11, H11) + outer(H11, e1e1)
                    aperm(h22, c(1, 3, 2, 4)) + aperm(h22, c(1, 3, 4, 2))
                })
                # Combine into single covariance matrix
                cbind(rbind(H11, H21), rbind(t(H21), mat(H22, 1:2)))
            }
            # mean conditional Fisher Information
            fisher.info <- function(Fy, eta1, alphas, Omegas) {
                # retrieve dimensions
--- a/tensorPredictors/R/matricize.R
+++ b/tensorPredictors/R/matricize.R
@ -9,6 +9,15 @@
 #'  or tensor of dimensions \code{dims} iff \code{inv} is true.
 #'
 #' @examples
 #' stopifnot(all.equal(
 #'     mat(1:12, 2, dims = c(2, 3, 2)),
 #'     matrix(c(
 #'         1,  2,  7,  8,
 #'         3,  4,  9, 10,
 #'         5,  6, 11, 12
 #'     ), 3, 4, byrow = TRUE)
 #' ))
 #'
 #' A <- array(rnorm(2 * 3 * 5), dim = c(2, 3, 5))
 #' stopifnot(exprs = {
 #'     all.equal(A, mat(mat(A, 1), 1, dim(A), TRUE))
@ -22,15 +31,6 @@
 #'     all.equal(t(mat(A, 3)), mat(A, c(1, 2)))
 #' })
 #'
 #' stopifnot(all.equal(
 #'     mat(1:12, 2, dims = c(2, 3, 2)),
 #'     matrix(c(
 #'         1,  2,  7,  8,
 #'         3,  4,  9, 10,
 #'         5,  6, 11, 12
 #'     ), 3, 4, byrow = TRUE)
 #' ))
 #'
 #' @export
 mat <- function(T, modes, dims = dim(T), inv = FALSE) {
    modes <- as.integer(modes)
--- a/tensorPredictors/R/matrixImage.R
+++ b/tensorPredictors/R/matrixImage.R
@ -7,6 +7,8 @@
 #' @param sub sub-title of the plot
 #' @param interpolate a logical vector (or scalar) indicating whether to apply
 #'  linear interpolation to the image when drawing.
 #' @param new.plot Recreating the plot area (clearing the plot device). can be
 #'  used to update a plot but _not_ recreate it. Leads to smoother updating.
 #' @param ... further arguments passed to \code{\link{rasterImage}}
 #'
 #' @examples
@ -16,13 +18,15 @@
 #' @export
 matrixImage <- function(A, add.values = FALSE,
    main = NULL, sub = NULL, interpolate = FALSE, ..., zlim = NA,
-    axes = TRUE, asp = 1, col = hcl.colors(24, "YlOrRd", rev = FALSE),
+    axes = TRUE, asp = 1, col = hcl.colors(24, "Blue-Red 3", rev = FALSE),
-    digits = getOption("digits")
+    digits = getOption("digits"), new.plot = TRUE
 ) {
    # plot raster image
-    plot(c(0, ncol(A)), c(0, nrow(A)), type = "n", bty = "n", col = "black",
+    if (new.plot) {
-        xlab = "", ylab = "", xaxt = "n", yaxt = "n", main = main, sub = sub,
+        plot(c(0, ncol(A)), c(0, nrow(A)), type = "n", bty = "n", col = "black",
-        asp = asp)
+            xlab = "", ylab = "", xaxt = "n", yaxt = "n", main = main, sub = sub,
            asp = asp)
    }
    # Scale values of `A` to [0, 1] with min mapped to 1 and max to 0.
    if (missing(zlim)) {
@ -41,7 +45,7 @@ matrixImage <- function(A, add.values = FALSE,
    # X/Y axes index (matches coordinates to matrix indices)
    x <- seq(1, ncol(A), by = 1)
    y <- seq(1, nrow(A))
-    if (axes) {
+    if (axes && new.plot) {
        axis(1, at = x - 0.5, labels = x, lwd = 0, lwd.ticks = 1)
        axis(2, at = y - 0.5, labels = rev(y), lwd = 0, lwd.ticks = 1, las = 1)
    }
--- a/tensorPredictors/R/mcov.R
+++ b/tensorPredictors/R/mcov.R
@ -12,9 +12,13 @@
 #'
 #' @param X multi-dimensional array
 #' @param sample.axis observation axis index
 #' @param center logical, if `TRUE` (the default) compute the centered second
 #'  moment, that is, the mode-wise covariances. Otherwise, the raw second moment
 #'  of each mode is computed. This is specifically usefull in case `X` is
 #'  already centered.
 #'
 #' @export
-mcov <- function(X, sample.axis = 1L) {
+mcov <- function(X, sample.axis = length(dim(X)), center = TRUE) {
    # observation modes (axis indices)
    modes <- seq_along(dim(X))[-sample.axis]
    # observation dimensions
@ -26,16 +30,18 @@ mcov <- function(X, sample.axis = 1L) {
    if (sample.axis != r + 1L) {
        X <- aperm(X, c(modes, sample.axis))
    }
-    # centering: Z = X - E[X]
+    # centering: X <- X - E[X]
-    Z <- X - c(rowMeans(X, dims = r))
+    if (center) {
        X <- X - c(rowMeans(X, dims = r))
    }
    # estimes (unscaled) covariances for each mode
-    Sigmas <- .mapply(mcrossprod, list(mode = seq_len(r)), MoreArgs = list(Z))
+    Sigmas <- .mapply(mcrossprod, list(mode = seq_len(r)), MoreArgs = list(X))
    # scale by per mode "sample" size
    Sigmas <- .mapply(`*`, list(Sigmas, p / prod(dim(X))), NULL)
    # estimate trace of Kronecker product of covariances
-    tr.est <- prod(p) * mean(Z^2)
+    tr.est <- prod(p) * mean(X^2)
    # as well as the current trace of the unscaled covariances
    tr.Sigmas <- prod(unlist(.mapply(function(S) sum(diag(S)), list(Sigmas), NULL)))
--- a/tensorPredictors/R/mcrossprod.R
+++ b/tensorPredictors/R/mcrossprod.R
@ -44,16 +44,20 @@
 #' ))
 #'
 #' @export
-mcrossprod <- function(A, B, mode) {
+mcrossprod <- function(A, B, mode, dimA = dim(A), dimB = dim(B)) {
    storage.mode(A) <- "double"
-    if (is.null(dim(A))) {
+    if (!missing(dimA)) {
        dim(A) <- dimA
    } else if (is.null(dim(A))) {
        dim(A) <- length(A)
    }
    if (missing(B)) {
        .Call("C_mcrossprod_sym", A, as.integer(mode))
    } else {
        storage.mode(B) <- "double"
-        if (is.null(dim(B))) {
+        if (!missing(dimB)) {
            dim(B) <- dimB
        } else if (is.null(dim(B))) {
            dim(B) <- length(B)
        }
        .Call("C_mcrossprod", A, B, as.integer(mode))
--- a/tensorPredictors/R/mlm.R
+++ b/tensorPredictors/R/mlm.R
@ -75,23 +75,63 @@
 #' # (X x {A1, A2, A3, A4}) x {B1, B2, B3, B4} = X x {B1 A1, B2 A2, B3 A3, B4 A4}
 #' all.equal(mlm(mlm(X, As), Bs), mlm(X, Map(`%*%`, Bs, As)))
 #'
 #' # Equivalent to
 #' mlm_reference <- function(A, Bs, modes = seq_along(Bs), transposed = FALSE) {
 #'     # Collect all matrices in `B`
 #'     Bs <- if (is.matrix(Bs)) list(Bs) else Bs
 #'
 #'     # replicate transposition if of length one only
 #'     transposed <- if (length(transposed) == 1) {
 #'         rep(as.logical(transposed), length(Bs))
 #'     } else {
 #'         as.logical(transposed)
 #'     }
 #'
 #'     # iteratively apply Tensor Times Matrix multiplication over modes
 #'     for (i in seq_along(modes)) {
 #'         A <- ttm(A, Bs[[i]], modes[i], transposed[i])
 #'     }
 #'
 #'     # return result tensor
 #'     A
 #' }
 #'
 #' @export
 mlm <- function(A, Bs, modes = seq_along(Bs), transposed = FALSE) {
    # Collect all matrices in `B`
-    Bs <- if (is.matrix(Bs)) list(Bs) else Bs
+    Bs <- if (!is.list(Bs)) list(Bs) else Bs
    # ensure all `B`s are matrices
    Bs <- Map(as.matrix, Bs)
    # replicate transposition if of length one only
    transposed <- if (length(transposed) == 1) {
        rep(as.logical(transposed), length(Bs))
-    } else {
+    } else if (length(transposed) == length(modes)) {
        as.logical(transposed)
    } else {
        stop("Dim missmatch of param. `transposed`")
    }
-    # iteratively apply Tensor Times Matrix multiplication over modes
+    .Call("C_mlm", A, Bs, as.integer(modes), transposed, PACKAGE = "tensorPredictors")
    for (i in seq_along(modes)) {
        A <- ttm(A, Bs[[i]], modes[i], transposed[i])
    }
    # return result tensor
    A
 }
 # # general usage
 # dimA <- c(3, 17, 19, 2)
 # dimC <- c(7, 11, 13, 5)
 # A <- array(rnorm(prod(dimA)), dim = dimA)
 # trans <- c(TRUE, FALSE, TRUE, FALSE)
 # Bs <- Map(function(p, q) matrix(rnorm(p * q), p, q), ifelse(trans, dimA, dimC), ifelse(trans, dimC, dimA))
 # C <- mlm(A, Bs, transposed = trans)
 # mlm(A, Bs[c(3, 2)], modes = c(3, 2), transposed = trans[c(3, 2)])
 # microbenchmark::microbenchmark(
 #     mlm(A, Bs, transposed = trans),
 #     mlm_reference(A, Bs, transposed = trans)
 # )
 # microbenchmark::microbenchmark(
 #     mlm(A, Bs[c(3, 2)], modes = c(3, 2), transposed = trans[c(3, 2)]),
 #     mlm_reference(A, Bs[c(3, 2)], modes = c(3, 2), transposed = trans[c(3, 2)])
 # )
--- a/tensorPredictors/R/patternMatrices.R
+++ b/tensorPredictors/R/patternMatrices.R
@ -1,14 +1,14 @@
 #' Duplication Matrix
 #'
-#' Matrix such that `vec(A) = D vech(A)` for `A` symmetric
+#' Matrix `D` such that `vec(A) = D vech(A)` for `A` symmetric
 #' @examples
 #' p <- 8
 #' A <- matrix(rnorm(p^2), p, p)
 #' A <- A + t(A)
-#' stopifnot(all.equal(c(D(nrow(A)) %*% vech(A)), c(A)))
+#' stopifnot(all.equal(c(Dup(nrow(A)) %*% vech(A)), c(A)))
 #'
 #' @export
-D <- function(p) {
+Dup <- function(p) {
    # setup `vec` and `vech` element indices (zero indexed)
    vec <- matrix(NA_integer_, p, p)
    vec[lower.tri(vec, diag = TRUE)] <- seq_len(p * (p + 1) / 2)
@ -27,10 +27,10 @@ D <- function(p) {
 #'
 #' @examples
 #' p <- 5
-#' stopifnot(all.equal(D(p) %*% D.pinv(p), N(p)))
+#' stopifnot(all.equal(Dup(p) %*% Dup.pinv(p), N(p)))
 #'
 #' @export
-D.pinv <- function(p) {
+Dup.pinv <- function(p) {
    Dp <- D(p)
    solve(crossprod(Dp), t(Dp))
 }
@ -119,7 +119,7 @@ K <- function(dim, mode) {
 #'
 #' @examples
 #' p <- 7
-#' stopifnot(all.equal(N(p), D(p) %*% D.pinv(p)))
+#' stopifnot(all.equal(N(p), Dup(p) %*% Dup.pinv(p)))
 #'
 #' @export
 N <- function(p) {
--- a/tensorPredictors/R/ttm.R
+++ b/tensorPredictors/R/ttm.R
@ -11,6 +11,34 @@
 #'  \code{mode} dimension equal to \code{nrow(M)} or \code{ncol(M)} if
 #'  \code{transposed} is true.
 #'
 #' @examples
 #' for (mode in 1:4) {
 #'     dimA <- sample.int(10, 4, replace = TRUE)
 #'     A <- array(rnorm(prod(dimA)), dim = dimA)
 #'     nrowB <- sample.int(10, 1)
 #'     B <- matrix(rnorm(nrowB * dimA[mode]), nrowB)
 #'
 #'     C <- ttm(A, B, mode)
 #'
 #'     dimC <- ifelse(seq_along(dims) != mode, dimA, nrowB)
 #'     C.ref <- mat(B %*% mat(A, mode), mode, dims = dimC, inv = TRUE)
 #'
 #'     stopifnot(all.equal(C, C.ref))
 #' }
 #'
 #' for (mode in 1:4) {
 #'     dimA <- sample.int(10, 4, replace = TRUE)
 #'     A <- array(rnorm(prod(dimA)), dim = dimA)
 #'     ncolB <- sample.int(10, 1)
 #'     B <- matrix(rnorm(dimA[mode] * ncolB), dimA[mode])
 #'
 #'     C <- ttm(A, B, mode, transposed = TRUE)
 #'
 #'     C.ref <- ttm(A, t(B), mode)
 #'
 #'     stopifnot(all.equal(C, C.ref))
 #' }
 #'
 #' @export
 ttm <- function(T, M, mode = length(dim(T)), transposed = FALSE) {
    storage.mode(T) <- storage.mode(M) <- "double"
--- a/tensorPredictors/R/vech.R
+++ b/tensorPredictors/R/vech.R
@ -31,3 +31,52 @@ vech.pinv <- function(a) {
    # de-vectorized matrix
    matrix(a[vech.pinv.index(p)], p, p)
 }
 # vech <- function(A) {
 #     stopifnot(all(nrow(A) == dim(A)))
 #     axis <- seq_len(nrow(A))
 #     grid <- expand.grid(c(list(rev(axis)), rep(list(axis), length(dim(A)) - 1)))
 #     A[rowSums(grid - 1) < nrow(A)]
 # }
 # p <- 4
 # X <- matrix(rnorm(p^2), p)
 # vech(outer(X, X))
 # vech(outer(c(X), c(X)))
 # sort(unique(c(sym(outer(X, X)))))
 # sort(vech(sym(outer(X, X))))
 # # (I <- matrix(c(
 # #     1, 1, 1,
 # #     2, 1, 1,
 # #     3, 1, 1,
 # #     2, 2, 1,
 # #     3, 2, 1,
 # #     3, 3, 1,
 # #     2, 1, 2,
 # #     3, 1, 2,
 # #     3, 2, 2,
 # #     3, 1, 3
 # # ), ncol = 3, byrow = TRUE))
 # # ((I - 1) %*% nrow(A)^(seq_along(dim(A)) - 1) + 1)
 # # p <- 3
 # # ord <- 4
 # # A <- array(seq_len(p^ord), rep(p, ord))
 # # axis <- seq_len(nrow(A))
 # # grid <- expand.grid(c(list(rev(axis)), rep(list(axis), length(dim(A)) - 1)))
 # # array(rowSums(grid - 1), dim(A))
 # # A[rowSums(grid - 1) < nrow(A)]
 # # apply(indices, 1, function(i) do.call(`[`, c(list(A), as.list(i))))
--- a/tensorPredictors/src/Makevars
+++ b/tensorPredictors/src/Makevars
@ -1,2 +1,3 @@
-PKG_LIBS = $(BLAS_LIBS) $(FLIBS)
+PKG_LIBS = $(BLAS_LIBS) $(LAPACK_LIBS) $(FLIBS)
 # PKG_CFLAGS = -pg
--- a/tensorPredictors/src/R_api.h
+++ b/tensorPredictors/src/R_api.h
@ -0,0 +1,138 @@
 #ifndef INCLUDE_GUARD_R_API_H
 #define INCLUDE_GUARD_R_API_H
 // The need for `USE_FC_LEN_T` and `FCONE` is due to a Fortran character string
 // to C incompatibility. See: Writing R Extentions: 6.6.1 Fortran character strings
 #define USE_FC_LEN_T
 // Disables remapping of R API functions from `Rf_<name>` or `R_<name>`
 #define R_NO_REMAP
 #include <stdint.h> // uint32_t, uint64_t, ...
 #include <R.h>
 #include <Rinternals.h>
 #include <R_ext/BLAS.h>
 #include <R_ext/Lapack.h>
 #ifndef FCONE
 #define FCONE
 #endif
 // NULL pointer (not defined memory, array, vector, ...)
 #ifndef NULL
 #define NULL ((void*)0)
 #endif
 // Truth values
 #ifndef FALSE
 #define FALSE 0
 #endif
 #ifndef TRUE
 #define TRUE 1
 #endif
 // Convenience convertion function similar to `asInteger`, `asReal`, ...
 #define NA_UNSIGNED (-((unsigned long)1))
 static inline unsigned long asUnsigned(SEXP _val) {
    int val = Rf_asInteger(_val);
    if (val == NA_INTEGER || val < 0) {
        return NA_UNSIGNED;
    }
    return (unsigned long)val;
 }
 // Remap BLAS and LAPACK bindings (being consistent with my own interface and I
 // don't like to pass scalars by reference (memory address))
 /** y <- a x + y */
 static inline void axpy(
    const int dim,
    const double a,
    const double* x, const int incx,
          double* y, const int incy
 ) {
    F77_CALL(daxpy)(&dim, &a, x, &incx, y, &incy);
 }
 /** Scale a 1d array `x <- a x` */
 static inline void scale(
    const int dim,
    const double a,
          double *x, const int incx
 ) {
    F77_CALL(dscal)(&dim, &a, x, &incx);
 }
 /** Dot product */
 static inline double dot(
    const int dim,
    const double* x, const int incx,
    const double* y, const int incy
 ) {
    return F77_CALL(ddot)(&dim, x, &incx, y, &incy);
 }
 /** 1d array linear combination `z <- a x + b y` */
 // TODO: optimize! (iff needed?, to optimize of at all?)
 static inline void lincomb(
    const int dim,
    const double a, const double* x, const int incx,
    const double b, const double* y, const int incy,
                          double* z, const int incz
 ) {
    for (int i = 0; i < dim; ++i) {
        z[i * incz] = a * x[i * incx] + b * y[i * incy];
    }
 }
 /**
 * A sufficient Pseudo-Random-Number-Generators (PRNG) of the Xorshift family
 *
 * With parameterized seed/state this custom PRGN can be used in a thread save!
 *
 * For single threaded operations the PRNG provided by `R` are prefered. But they
 * are _not_ thread save. The following is a simple PRNG usable in a multi-threaded
 * application.
 *
 * See TODO: ...https://en.wikipedia.org/wiki/Xorshift
 *              SchachHoernchen
 */
 static inline uint64_t rot64(uint64_t val, int shift) {
    return (val << shift) | (val >> (64 - shift));
 }
 // (internal) PRGN state/seed type
 typedef uint64_t rng_seed_t[4];
 // Hookup the PRNG via its seed to R's random number generation utilities
 static inline void init_seed(rng_seed_t seed) {
    GetRNGstate();
    for (size_t i = 0; i < 4; ++i) {
        seed[i] = 0;
        for (size_t j = 0; j < 64; ++j) {
            seed[i] |= ((uint64_t)(unif_rand() < 0.5)) << j;
        }
    }
    PutRNGstate();
 }
 // PRNG of the Xorshift family
 // The least significant 32 bits are not reliable, use most significant 32 bits
 static inline uint64_t rand_u64(rng_seed_t seed) {
    uint64_t e = seed[0] - rot64(seed[1], 7);
    seed[0] = seed[1] ^ rot64(seed[1], 13);
    seed[1] = seed[2] + rot64(seed[3], 37);
    seed[2] = seed[3] + e;
    seed[3] = e + seed[0];
    return seed[3];
 }
 // With external supplied seed, every thread can have its own seed and as such
 // we can use this as a thread save alternative to R's `unif_rand()`.
 static inline double unif_rand_thrd(rng_seed_t seed) {
    return ((double)(rand_u64(seed) >> 32)) / (double)(-(uint32_t)1);
 }
 #endif /* INCLUDE_GUARD_R_API_H */
--- a/tensorPredictors/src/bit_utils.h
+++ b/tensorPredictors/src/bit_utils.h
@ -0,0 +1,185 @@
 #ifndef INCLUDE_GUARD_BIT_UTILS_H
 #define INCLUDE_GUARD_BIT_UTILS_H
 #include <stdint.h> // uint32_t, uint64_t
 #if (defined(__GNUC__) && defined(__BMI2__))
    #include <x86intrin.h>
    #include <bmi2intrin.h>     // _pdep_u32
 #endif
 #ifdef _MSC_VER
    #include <intrin.h>         // _BitScanReverse
 #endif
 /**
 * Computes the parity of a word (0 for even bit count and 1 otherwise)
 */
 #ifdef __GNUC__
 static inline int bitParity32(uint32_t x) { return __builtin_parity(x); }
 static inline int bitParity64(uint64_t x) { return __builtin_parityll(x); }
 #else
 static inline int bitParity32(uint32_t x) {
    int p = (x != 0);
    while (x &= x - 1) { p = !p; }
    return p;
 }
 static inline int bitParity64(uint64_t x) {
    int p = (x != 0);
    while (x &= x - 1) { p = !p; }
    return p;
 }
 #endif
 /**
 * Counts the number of set bits (`1`s in binary) in the number `x`
 */
 #ifdef __GNUC__                                   /* POPulation COUNT */
 static inline int bitCount32(uint32_t x) { return __builtin_popcount(x); }
 static inline int bitCount64(uint64_t x) { return __builtin_popcountll(x); }
 #else
 static inline int bitCount32(uint32_t x) {
    int count = 0; // counts set bits
    for (; x; count++) { x &= x - 1; } // increment count until there are no bits set in x
    return count;
 }
 static inline int bitCount64(uint64_t x) {
    int count = 0; // counts set bits
    for (; x; count++) { x &= x - 1; } // increment count until there are no bits set in x
    return count;
 }
 #endif
 /**
 * Gets the index of the LSB (least significant bit)
 *
 * @condition `x != 0`, for `x == 0` undefined behaviour
 */
 #ifdef __GNUC__
 static inline int bitScanLS32(uint32_t x) { return __builtin_ctz(x); } // Count Trailing Zeros
 static inline int bitScanLS64(uint64_t x) { return __builtin_ctzll(x); }
 #elif _MSC_VER
 static inline int bitScanLS32(uint32_t x) {
    unsigned long bsr;
    _BitScanReverse(&bsr, x);
    return 31 - bsr;
 }
 static inline int bitScanLS64(uint64_t x) {
    unsigned long bsr;
    _BitScanReverse64(&bsr, x);
    return 63 - bsr;
 }
 #else
 static inline int bitScanLS32(uint32_t x) {
    int ctz = 0; // result storing the Count of Trailing Zeros
    bool empty;  // boolean variable storing if a bit has not found (search area is empty)
    // logarithmic search for LSB bit index (-1)
    ctz += (empty = !(x & (uint32_t)(65535))) << 4;
    x >>= 16 * empty;
    ctz += (empty = !(x & (uint32_t)(  255))) << 3;
    x >>=  8 * empty;
    ctz += (empty = !(x & (uint32_t)(   15))) << 2;
    x >>=  4 * empty;
    ctz += (empty = !(x & (uint32_t)(    3))) << 1;
    x >>=  2 * empty;
    ctz += (empty = !(x & (uint32_t)(    1)));
    return ctz;
 }
 static inline int bitScanLS64(uint64_t x) {
    int ctz = 0;
    bool empty;
    // logarithmic search for LSB bit index (-1)
    ctz += (empty = !(x & (uint64_t)(4294967295))) << 5;
    x >>= 32 * empty;
    ctz += (empty = !(x & (uint64_t)(     65535))) << 4;
    x >>= 16 * empty;
    ctz += (empty = !(x & (uint64_t)(       255))) << 3;
    x >>=  8 * empty;
    ctz += (empty = !(x & (uint64_t)(        15))) << 2;
    x >>=  4 * empty;
    ctz += (empty = !(x & (uint64_t)(         3))) << 1;
    x >>=  2 * empty;
    ctz += (empty = !(x & (uint64_t)(         1)));
    return ctz;
 }
 #endif
 /**
 * Parallel DEPosit (aka PDEP)
 *
 * Writes the `val` bits into the positions of the set bits in `mask`.
 *
 * Example:
 *  val:   **** **** **** 1.1.
 * mask:   1... 1... 1... 1...
 *  res:   1... .... 1... ....
 */
 #if (defined(__GNUC__) && defined(__BMI2__))
 static inline uint32_t bitDeposit32(uint32_t val, uint32_t mask) {
    return _pdep_u32(val, mask);
 }
 static inline uint64_t bitDeposit64(uint64_t val, uint64_t mask) {
    return _pdep_u64(val, mask);
 }
 #else
 static inline uint32_t bitDeposit32(uint32_t val, uint32_t mask) {
    uint32_t res = 0;
    for (uint32_t pos = 1; mask; pos <<= 1) {
        if (val & pos) {
            res |= mask & -mask;
        }
        mask &= mask - 1;
    }
    return res;
 }
 static inline uint64_t bitDeposit64(uint64_t val, uint64_t mask) {
    uint64_t res = 0;
    for (uint64_t pos = 1; mask; pos <<= 1) {
        if (val & pos) {
            res |= mask & -mask;
        }
        mask &= mask - 1;
    }
    return res;
 }
 #endif
 /**
 * Gets the next lexicographically ordered permutation of an n-bit word.
 * 
 * Let `val` be a bit-word with `n` bits set, then this procedire computes a
 * `n` bit word wich is the next element in the lexicographically ordered
 * sequence of `n` bit words. For example
 * 
 *           val -> bitNextPerm(val)
 *      00010011 -> 00010101
 *      00010101 -> 00010110
 *      00010110 -> 00011001
 *      00011001 -> 00011010
 *      00011010 -> 00011100
 *      00011100 -> 00100011
 *
 * @condition `x != 0`, for `x == 0` undefined behaviour due to `bitScanLS`
 *
 * see: https://graphics.stanford.edu/~seander/bithacks.html#NextBitPermutation
 */
 static inline uint32_t bitNextPerm32(uint32_t val) {
    // Sets all least significant 0-bits of val to 1
    uint32_t t = val | (val - 1);
    // Next set to 1 the most significant bit to change,
    // set to 0 the least significant ones, and add the necessary 1 bits.
    return (t + 1) | (((~t & -~t) - 1) >> (bitScanLS32(val) + 1));
 }
 static inline uint64_t bitNextPerm64(uint64_t val) {
    // Sets all least significant 0-bits of val to 1
    uint64_t t = val | (val - 1);
    // Next set to 1 the most significant bit to change,
    // set to 0 the least significant ones, and add the necessary 1 bits.
    return (t + 1) | (((~t & -~t) - 1) >> (bitScanLS64(val) + 1));
 }
 #endif /* BIT_UTILS_INCLUDE_GUARD_H */
--- a/tensorPredictors/src/init.c
+++ b/tensorPredictors/src/init.c
@ -7,27 +7,96 @@
 // );
 /* Tensor Times Matrix a.k.a. Mode Product */
-extern SEXP ttm(SEXP A, SEXP X, SEXP mode, SEXP op);
+extern SEXP R_ttm(SEXP A, SEXP X, SEXP mode, SEXP op);
 /* Multi Linear Multiplication (iterated mode products) */
 extern SEXP R_mlm(SEXP A, SEXP Bs, SEXP modes, SEXP ops);
 /* Matrix Times Vectorized Kronecker product `A vec(B_1 %x% ... %x% B_r)` */
 extern SEXP mtvk(SEXP A, SEXP Bs);
 /* Tensor Mode Crossproduct `A_(m) B_(m)^T` */
-extern SEXP mcrossprod(SEXP A, SEXP B, SEXP mode);
+extern SEXP R_mcrossprod(SEXP A, SEXP B, SEXP mode);
 /* Symmetric Tensor Mode Crossproduct `A_(m) A_(m)^T` */
-extern SEXP mcrossprod_sym(SEXP A, SEXP mode);
+extern SEXP R_mcrossprod_sym(SEXP A, SEXP mode);
 // /* Higher Order PCA */
 // extern SEXP hopca(SEXP X);
 /* Singular Value Decomposition */
 extern SEXP R_svd(SEXP A);
 // /* Iterative Cyclic Updating for the Tensor Normal Distribution */
 // extern SEXP R_icu_tensor_normal(SEXP X, SEXP Fy, SEXP max_iter);
 // /* Generalized tensor normal using NAGD */
 // extern SEXP R_gmlm_tensor_normal(
 //     SEXP X, SEXP Fy,
 //     SEXP eta_bar, SEXP alphas, SEXP Omegas,
 //     SEXP max_iter, SEXP max_line_iter
 // );
 /* Solve linear equation system A X = B */
 extern SEXP R_solve(SEXP A, SEXP B);
 /* Determinant of a matrix */
 extern SEXP R_det(SEXP A);
 // /* Unscaled PMF of the Ising model with natural parameters `_params` */
 // extern SEXP R_unscaled_prob(SEXP _y, SEXP _params);
 // /* Exact computation of the partition function of the Ising model with natural
 //    parameters `_params` */
 // extern SEXP R_ising_partition_func_exact(SEXP _params);
 // /* Estimated partition function of the Ising model with natural parameters `_params` */
 // extern SEXP R_ising_partition_func_MC(SEXP _params);
 // /* Exact computation of the partition function of the Ising model with natural
 //    parameters `_params` */
 // extern SEXP R_ising_m2_exact(SEXP _params);
 // // extern SEXP R_ising_m2_MC64(SEXP _params);
 // extern SEXP R_ising_m2_MC(SEXP _params, SEXP _nr_samples, SEXP _warmup);
 // extern SEXP R_ising_m2_MC_thrd(SEXP _params, SEXP _nr_threads);
 /* Sample from the Ising model */
 extern SEXP R_ising_sample(SEXP, SEXP, SEXP);
 // Interface to the Ising second moment function
 extern SEXP R_ising_m2(SEXP, SEXP, SEXP, SEXP, SEXP);
 /* List of registered routines (a.k.a. C entry points) */
 static const R_CallMethodDef CallEntries[] = {
-    // {"FastPOI_C_sub", (DL_FUNC) &FastPOI_C_sub, 5},  // NOT USED
+    {"C_ttm",                         (DL_FUNC) &R_ttm,                         4},
-    {"C_ttm",            (DL_FUNC) &ttm,            4},
+    {"C_mlm",                         (DL_FUNC) &R_mlm,                         4},
-    {"C_mtvk",           (DL_FUNC) &mtvk,           2},
+    {"C_mtvk",                        (DL_FUNC) &mtvk,                          2},
-    {"C_mcrossprod",     (DL_FUNC) &mcrossprod,     3},
+    {"C_mcrossprod",                  (DL_FUNC) &R_mcrossprod,                  3},
-    {"C_mcrossprod_sym", (DL_FUNC) &mcrossprod_sym, 2},
+    {"C_mcrossprod_sym",              (DL_FUNC) &R_mcrossprod_sym,              2},
    {"C_svd",                         (DL_FUNC) &R_svd,                         1},
    {"C_solve",                       (DL_FUNC) &R_solve,                       2},
    {"C_det",                         (DL_FUNC) &R_det,                         1},
    {"C_ising_sample",                (DL_FUNC) &R_ising_sample,                3},
    {"C_ising_m2",                    (DL_FUNC) &R_ising_m2,                    5},
    // {"C_unscaled_prob",               (DL_FUNC) &R_unscaled_prob,               2},
    // {"C_ising_partition_func_MC",     (DL_FUNC) &R_ising_partition_func_MC,     1},
    // {"C_ising_partition_func_exact",  (DL_FUNC) &R_ising_partition_func_exact,  1},
    // {"C_ising_m2_exact",   (DL_FUNC) &R_ising_m2_exact,   1},
    // {"C_ising_m2_MC",      (DL_FUNC) &R_ising_m2_MC,      3},
    // {"C_ising_m2_MC_thrd", (DL_FUNC) &R_ising_m2_MC_thrd, 2},
    // {"C_ising_m2_MC64",    (DL_FUNC) &R_ising_m2_MC64,    1},
    // {"FastPOI_C_sub",          (DL_FUNC) &FastPOI_C_sub,         5},  // NOT USED
    // {"C_hopca",                      (DL_FUNC) &hopca,                  1},
    // {"C_icu_tensor_normal",          (DL_FUNC) &R_icu_tensor_normal,    3},  // NOT USED / IN DEVELOPMENT
    // {"C_gmlm_tensor_normal",         (DL_FUNC) &R_gmlm_tensor_normal,   7},  // NOT USED / IN DEVELOPMENT
    {NULL, NULL, 0}
 };
-/* Restrict C entry points to registered routines. */
+/**
 * Restrict C entry points to registered routines.
 *
 * NOTE: Naming convention: `R_init_<PACKAGE-NAME>`
 */
 void R_init_tensorPredictors(DllInfo *dll) {
    R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
    R_useDynamicSymbols(dll, FALSE);
--- a/tensorPredictors/src/int_utils.h
+++ b/tensorPredictors/src/int_utils.h
@ -0,0 +1,77 @@
 #ifndef INCLUDE_GUARD_INT_UTILS_H
 #define INCLUDE_GUARD_INT_UTILS_H
 #include <stdint.h> // uint32_t, uint64_t
 // /**
 //  * Integer logarithm, the biggest power `p` such that `2^p <= x`.
 //  */
 // static int ilog2(uint64_t x) {
 //     int log = 0;
 //     while (x >>= 1) {
 //         log++;
 //     }
 //     return log;
 // }
 /**
 * Integer Square root of `y`, that is `ceiling(sqrt(y))`
 */
 static uint64_t isqrt(uint64_t x) {
    // implements a binary search
    uint64_t root = 0;
    uint64_t left = 0;          // left boundary
    uint64_t right = x + 1;     // right boundary
    while(left + 1UL < right) {
        root = (left + right) / 2UL;
        if (root * root <= x) {
            left = root;
        } else {
            right = root;
        }
    }
    return left;
 }
 /**
 * Inverse to the triangular numbers
 *
 * Given a positive number `x = p (p + 1) / 2` it computes `p` if possible.
 * In case there is no positive integer solution `0` is returned.
 *
 * Note: this follows immediately from the quadratic equation.
 */
 static uint32_t invTriag(uint32_t x) {
    uint64_t root = isqrt(8UL * (uint64_t)x + 1UL);
    if (root * root != 8UL * x + 1UL) {
        return 0;
    }
    return (root - 1) / 2;
 }
 // /**
 //  * Number of sub-sets (including empty set) of max-size
 //  *
 //  * It computes, with `binom` beeing the binomial coefficient, the following sum
 //  *
 //  *      sum_{i = 0}^k binom(n, i)
 //  */
 // static uint64_t nrSubSets(uint64_t n, uint64_t k) {
 //     uint64_t sum = 1, binom = 1;
 //     for (uint64_t i = 1; i <= k; ++i) {
 //         binom *= n--;
 //         binom /= i;
 //         sum += binom;
 //     }
 //     return sum;
 // }
 #endif /* INCLUDE_GUARD_INT_UTILS_H */
--- a/tensorPredictors/src/ising_MCMC.h
+++ b/tensorPredictors/src/ising_MCMC.h
@ -0,0 +1,181 @@
 #ifndef INCLUDE_GUARD_ISING_MCMC_H
 #define INCLUDE_GUARD_ISING_MCMC_H
 #include "R_api.h"
 /** Sample a single value from the Ising model via a Monte-Carlo Markov-Chain
 * method given the Ising model parameters in compact half-vectorized form.
 *
 *      f(x) = p0(params) exp(vech(x x')' params)
 *
 * @important requires to be wrapped between `GetRNGstate()` and `PutRNGState()`
 * calls. For example
 *
 * GetRNGstate()
 *  for (size_t sample = 0; sample < nr_samples; ++sample) {
 *      ising_mcmc_vech(warmup, dim, dim, params, &X[sample * dim]);
 *  }
 * PutRNGState()
 */
 static inline void ising_mcmc_vech(
    const size_t warmup,
    const size_t dim, const double* params,
    int* X
 ) {
    // Initialize elements `X_i ~ Bernoulli(P(X_i = 1 | X_-i = 0))`
    for (size_t i = 0, I = 0; i < dim; I += dim - i++) {
        double invProb = 1.0 + exp(-params[I]);
        X[i] = unif_rand() * invProb < 1.0;
    }
    // Skip the first samples of the Markov-Chain (warmup)
    for (size_t skip = 0; skip < warmup; ++skip) {
        // For every component
        for (size_t i = 0; i < dim; ++i) {
            // Compute conditional probability `P(X_i = 1 | X_-i = x_-i)`
            double log_odds = 0.0;
            // Tracks position in half-vectorized storage
            size_t J = i;
            // Sum all log-odds for `j < i` (two way interactions)
            for (size_t j = 0; j < i; J += dim - ++j) {
                log_odds += X[j] ? params[J] : 0.0;
            }
            // Allways add log-odds for `i = j` (self-interaction)
            log_odds += params[J];
            // Continue adding log-odds for `j > i` (two way interactions)
            J++;
            for (size_t j = i + 1; j < dim; ++j, ++J) {
                log_odds += X[j] ? params[J] : 0.0;
            }
            // Update `i`th element `X_i ~ Bernoulli(P(X_i = 1 | X_-i = x_-i))`
            X[i] = unif_rand() * (1.0 + exp(-log_odds)) < 1.0;
        }
    }
 }
 // Thread save version of `ising_mcmc` (using thread save PRNG)
 static inline void ising_mcmc_vech_thrd(
    const size_t warmup,
    const size_t dim, const double* params,
    int* X, rng_seed_t seed
 ) {
    // Initialize elements `X_i ~ Bernoulli(P(X_i = 1 | X_-i = 0))`
    for (size_t i = 0, I = 0; i < dim; I += dim - i++) {
        double invProb = 1.0 + exp(-params[I]);
        X[i] = unif_rand_thrd(seed) * invProb < 1.0;
    }
    // Skip the first samples of the Markov-Chain (warmup)
    for (size_t skip = 0; skip < warmup; ++skip) {
        // For every component
        for (size_t i = 0; i < dim; ++i) {
            // Compute conditional probability `P(X_i = 1 | X_-i = x_-i)`
            double log_odds = 0.0;
            // Tracks position in half-vectorized storage
            size_t J = i;
            // Sum all log-odds for `j < i` (two way interactions)
            for (size_t j = 0; j < i; J += dim - ++j) {
                log_odds += X[j] ? params[J] : 0.0;
            }
            // Allways add log-odds for `i = j` (self-interaction)
            log_odds += params[J];
            // Continue adding log-odds for `j > i` (two way interactions)
            J++;
            for (size_t j = i + 1; j < dim; ++j, ++J) {
                log_odds += X[j] ? params[J] : 0.0;
            }
            // Update `i`th element `X_i ~ Bernoulli(P(X_i = 1 | X_-i = x_-i))`
            X[i] = unif_rand() * (1.0 + exp(-log_odds)) < 1.0;
        }
    }
 }
 /** Sample a single value from the Ising model via a Monte-Carlo Markov-Chain
 * method given the Ising model parameters in symmetric matrix form `A`.
 *
 *      f(x) = prob_0(A) exp(x' A x)
 *
 * The relation to the natural parameters `params` as used in the half-vectorized
 * version `ising_mcmc_vech()`
 *
 *      f(x) = prob_0(params) exp(vech(x x')' params)
 *
 * is illustrated via an example with `dim = 5`;
 *
 *          p0                             [ p0   p1/2 p2/2  p3/2  p4/2  ]
 *          p1 p5                          [ p1/2 p5   p6/2  p7/2  p8/2  ]
 * params = p2 p6 p9             =>    A = [ p2/2 p6/2 p9    p10/2 p11/2 ]
 *          p3 p7 p10 p12                  [ p3/2 p7/2 p10/2 p12   p13/2 ]
 *          p4 p8 p11 p13 p14              [ p4/2 p8/2 p11/2 p13/2 p14   ]
 *
 * or the other way arroung given the symmetric matrix form `A` converted to
 * the natural parameters
 *
 *     [ a00 a10 a20 a30 a40 ]                   a00
 *     [ a10 a11 a21 a31 a41 ]                   2*a10 a11
 * A = [ a20 a21 a22 a32 a42 ]    =>    params = 2*a20 2*a21 a22
 *     [ a30 a31 a32 a33 a43 ]                   2*a30 2*a31 2*a32 a33
 *     [ a40 a41 a42 a43 a44 ]                   2*a40 2*a41 2*a42 2*a43 a44
 *
 */
 static inline void ising_mcmc_mat(
    const size_t warmup,
    const size_t dim, const size_t ldA, const double* A,
    int* X
 ) {
    // Initialize elements `X_i ~ Bernoulli(P(X_i = 1 | X_-i = 0))`
    for (size_t i = 0; i < dim; ++i) {
        X[i] = unif_rand() * (1.0 + exp(-A[i * (ldA + 1)])) < 1.0;
    }
    // Skip the first samples of the Markov-Chain (warmup)
    for (size_t skip = 0; skip < warmup; ++skip) {
        // For every component
        for (size_t i = 0; i < dim; ++i) {
            // Compute conditional probability `P(X_i = 1 | X_-i = x_-i)`
            double log_odds = 0.0;
            for (size_t j = 0; j < i; ++j) {
                log_odds += (2 * X[j]) * A[i * ldA + j];
            }
            log_odds += A[i * (ldA + 1)];
            for (size_t j = i + 1; j < dim; ++j) {
                log_odds += (2 * X[j]) * A[i * ldA + j];
            }
            // Update `i`th element `X_i ~ Bernoulli(P(X_i = 1 | X_-i = x_-i))`
            X[i] = unif_rand() * (1.0 + exp(-log_odds)) < 1.0;
        }
    }
 }
 // thread save version of `ising_mcmc_mat()` (using thread save PRNG)
 static inline void ising_mcmc_mat_thrd(
    const size_t warmup,
    const size_t dim, const size_t ldA, const double* A,
    int* X, rng_seed_t seed
 ) {
    // Initialize elements `X_i ~ Bernoulli(P(X_i = 1 | X_-i = 0))`
    for (size_t i = 0; i < dim; ++i) {
        X[i] = unif_rand_thrd(seed) * (1.0 + exp(-A[i * (ldA + 1)])) < 1.0;
    }
    // Skip the first samples of the Markov-Chain (warmup)
    for (size_t skip = 0; skip < warmup; ++skip) {
        // For every component
        for (size_t i = 0; i < dim; ++i) {
            // Compute conditional probability `P(X_i = 1 | X_-i = x_-i)`
            double log_odds = 0.0;
            for (size_t j = 0; j < i; ++j) {
                log_odds += (2 * X[j]) * A[i * ldA + j];
            }
            log_odds += A[i * (ldA + 1)];
            for (size_t j = i + 1; j < dim; ++j) {
                log_odds += (2 * X[j]) * A[i * ldA + j];
            }
            // Update `i`th element `X_i ~ Bernoulli(P(X_i = 1 | X_-i = x_-i))`
            X[i] = unif_rand_thrd(seed) * (1.0 + exp(-log_odds)) < 1.0;
        }
    }
 }
 #endif /* INCLUDE_GUARD_ISING_MCMC_H */
--- a/tensorPredictors/src/ising_m2.c
+++ b/tensorPredictors/src/ising_m2.c
@ -0,0 +1,401 @@
 /** Methods computing or estimating the second moment of the Ising model given
 * the natural parameters of the p.m.f. in exponential family form. That is
 *      f(x) = p0(params) exp(vech(x x')' params)
 * where `params` are the natural parameters. The scaling constant `p0(params)`
 * is also the zero event `x = (0, ..., 0)` probability.
 *
 * Three different version are provided.
 * - exact: Exact method computing the true second moment `E[X X']` given the
 *      parameters `params`. This method has a runtime of `O(2^dim)` where
 *      `dim` is the dimension of the random variable `X`. This means it is
 *      infeasible for bigger (like 25 and above) values if `dim`
 * - MC: Monte-Carlo method used in case the exact method is not feasible.
 * - MC Thrd: Multi-Threaded Monte-Carlo method.
 *
 * The natural parameters `params` of the Ising model is a `dim (dim + 1) / 2`
 * dimensional numeric vector containing the log-odds. The indexing schema into
 * the parameters corresponding to the log-odds of single or two way interactions
 * with indicex `i, j` are according to the half-vectorization schema
 *
 *  i = 4'th row                  => Symmetry                       => Half-Vectorized Storage
 *
 * [ *    *    *    *    *    * ]    [ *    *    *    *    *    * ]   [ *    *    *   15    *    * ]
 * [ *    *    *    *    *    * ]    [ *    *    *    *    *    * ]   [ *    *   12   16    * ]
 * [ *    *    *    *    *    * ]    [ *    *    *    *    *    * ]   [ *    8    *   17 ]
 * [ 3    8   12   15   16   17 ]    [ 3    8   12   15    *    * ]   [ 3    *    * ]
 * [ *    *    *    *    *    * ]    [ *    *    *   16    *    * ]   [ *    * ]
 * [ *    *    *    *    *    * ]    [ *    *    *   17    *    * ]   [ * ]
 *
 */
 #include "R_api.h"
 #include "bit_utils.h"
 #include "int_utils.h"
 #include "ising_MCMC.h"
 #ifndef __STDC_NO_THREADS__
    #include <threads.h>
 #endif
 ////////////////////////////////////////////////////////////////////////////////
 // TODO: read                                                                 //
 // https://developer.r-project.org/Blog/public/2019/04/18/common-protect-errors/
 // and
 // https://developer.r-project.org/Blog/public/2018/12/10/unprotecting-by-value/index.html
 // as well as do the following PROPERLLY
 // TODO: make specialized version using the parameters in given sym mat form  //
 //       similar to `ising_sample()`                                          //
 ////////////////////////////////////////////////////////////////////////////////
 // void ising_m2_exact_mat(
 //     const size_t dim, const size_t ldA, const double* A,
 //     double* M2
 // ) {
 //     double sum_0 = 1.0;
 //     const uint32_t max_event = (uint32_t)1 << dim;
 //     (void)memset(M2, 0, dim * dim * sizeof(double));
 //     for (uint32_t X = 1; X < max_event; ++X) {
 //         // Evaluate quadratic form `X' A X` using symmetry of `A`
 //         double XtAX = 0.0;
 //         for (uint32_t Y = X; Y; Y &= Y - 1) {
 //             const int i = bitScanLS32(Y);
 //             XtAX += A[i * (ldA + 1)];
 //             for (uint32_t Z = Y & (Y - 1); Z; Z &= Z - 1) {
 //                 XtAX += 2 * A[i * ldA + bitScanLS32(Z)];
 //             }
 //         }
 //         const double prob_X = exp(XtAX);
 //         sum_0 += prob_X;
 //         for (uint32_t Y = X; Y; Y &= Y - 1) {
 //             const int i = bitScanLS32(Y);
 //             for (uint32_t Z = Y; Z; Z &= Z - 1) {
 //                 M2[i * dim + bitScanLS32(Z)] += prob_X;
 //             }
 //         }
 //     }
 //     const double prob_0 = 1.0 / sum_0;
 //     for (size_t j = 0; j < dim; ++j) {
 //         for (size_t i = 0; i < j; ++i) {
 //             M2[j * dim + i] = M2[i * dim + j];
 //         }
 //         for (size_t i = j; i < dim; ++i) {
 //             M2[i * dim + j] *= prob_0;
 //         }
 //     }
 // }
 double ising_m2_exact(const size_t dim, const double* params, double* M2) {
    // Accumulator of sum of all (unscaled) probabilities
    double sum_0 = 1.0;
    // max event (actually upper bound)
    const uint32_t max_event = (uint32_t)1 << dim;
    // Length of parameters
    const size_t len = dim * (dim + 1) / 2;
    // Initialize M2 to zero
    (void)memset(M2, 0, len * sizeof(double));
    // Iterate all `2^dim` binary vectors `X`
    for (uint32_t X = 1; X < max_event; ++X) {
        // Dot product `<vech(X X'), params>`
        double dot_X = 0.0;
        // all bits in `X`
        for (uint32_t Y = X; Y; Y &= Y - 1) {
            const int i = bitScanLS32(Y);
            const int I = (i * (2 * dim - 1 - i)) / 2;
            // add single and two way interaction log odds
            for (uint32_t Z = Y; Z; Z &= Z - 1) {
                dot_X += params[I + bitScanLS32(Z)];
            }
        }
        // (unscaled) probability of current event `X`
        const double prob_X = exp(dot_X);
        sum_0 += prob_X;
        // Accumulate set bits probability for the first end second moment `E[X X']`
        for (uint32_t Y = X; Y; Y &= Y - 1) {
            const int i = bitScanLS32(Y);
            const int I = (i * (2 * dim - 1 - i)) / 2;
            for (uint32_t Z = Y; Z; Z &= Z - 1) {
                M2[I + bitScanLS32(Z)] += prob_X;
            }
        }
    }
    // Finish by scaling with zero event probability (Ising p.m.f. scaling constant)
    const double prob_0 = 1.0 / sum_0;
    for (size_t i = 0; i < len; ++i) {
        M2[i] *= prob_0;
    }
    return prob_0;
 }
 // Monte-Carlo method using Gibbs sampling for the second moment
 double ising_m2_MC(
    /*   options */ const size_t nr_samples, const size_t warmup,
    /* dimension */ const size_t dim,
    /*   vectors */ const double* params, double* M2
 ) {
    // Length of `params` and `M2`
    const size_t len = (dim * (dim + 1)) / 2;
    // Dirty trick (reuse output memory for precise counting)
    _Static_assert(sizeof(uint64_t) == sizeof(double), "Dirty trick fails!");
    uint64_t* counts = (uint64_t*)M2;
    // Initialize counts to zero
    (void)memset(counts, 0, len * sizeof(uint64_t));
    // Allocate memory to store Markov-Chain value
    int* X = (int*)R_alloc(dim, sizeof(int));
    // Create/Update R's internal PRGN state
    GetRNGstate();
    // Spawn chains
    for (size_t sample = 0; sample < nr_samples; ++sample) {
        // Draw random sample `X ~ Ising(params)`
        ising_mcmc_vech(warmup, dim, params, X);
        // Accumulate component counts
        uint64_t* count = counts;
        for (size_t i = 0; i < dim; ++i) {
            for (size_t j = i; j < dim; ++j) {
                *(count++) += X[i] & X[j];
            }
        }
    }
    // Write R's internal PRNG state back (if needed)
    PutRNGstate();
    // Compute means from counts
    for (size_t i = 0; i < len; ++i) {
        M2[i] = (double)counts[i] / (double)(nr_samples);
    }
    return -1.0;    // TODO: also compute prob_0
 }
 // in case the compile supports standard C threads
 #ifndef __STDC_NO_THREADS__
 // Worker thread to calling thread communication struct
 typedef struct thrd_data {
    rng_seed_t seed;        // Pseudo-Random-Number-Generator seed/state value
    size_t nr_samples;      // Nr. of samples this worker handles
    size_t warmup;          // Monte-Carlo Chain burne-in length
    size_t dim;             // Random variable dimension
    const double* params;   // Ising model parameters
    int* X;                 // Working memory to store current binary sample
    uint32_t* counts;       // (output) count of single and two way interactions
    double prob_0;          // (output) zero event probability estimate
 } thrd_data_t;
 // Worker thread function
 int thrd_worker(thrd_data_t* data) {
    // Extract data as thread local variables (for convenience)
    const size_t dim = data->dim;
    int* X = data->X;
    // Initialize counts to zero
    (void)memset(data->counts, 0, (dim * (dim + 1) / 2) * sizeof(uint32_t));
    // Spawn Monte-Carlo Chains (one foe every sample)
    for (size_t sample = 0; sample < data->nr_samples; ++sample) {
        // Draw random sample `X ~ Ising(params)`
        ising_mcmc_vech_thrd(data->warmup, dim, data->params, X, data->seed);
        // Accumulate component counts
        uint32_t* count = data->counts;
        for (size_t i = 0; i < dim; ++i) {
            for (size_t j = i; j < dim; ++j) {
                *(count++) += X[i] & X[j];
            }
        }
    }
    return 0;
 }
 double ising_m2_MC_thrd(
    /*   options */ const size_t nr_samples, const size_t warmup,
                    const size_t nr_threads,
    /* dimension */ const size_t dim,
    /*   vectors */ const double* params, double* M2
 ) {
    // Length of `params` and `M2`
    const size_t len = (dim * (dim + 1)) / 2;
    // Allocate working and self memory for worker threads
    int* Xs = (int*)R_alloc(dim * nr_threads, sizeof(int));
    uint32_t* counts = (uint32_t*)R_alloc(len * nr_threads, sizeof(uint32_t));
    thrd_t* threads = (thrd_t*)R_alloc(nr_threads, sizeof(thrd_data_t));
    thrd_data_t* threads_data = (thrd_data_t*)R_alloc(nr_threads, sizeof(thrd_data_t));
    // Provide instruction wor worker and dispatch them
    for (size_t tid = 0; tid < nr_threads; ++tid) {
        // Every thread needs its own PRNG seed!
        init_seed(threads_data[tid].seed);
        // divide work among workers (more or less) equaly with the first worker
        // (tid = 0) having the additional remainder of divided work (`nr_samples`)
        threads_data[tid].nr_samples = nr_samples / nr_threads + (
            tid ? 0 : nr_samples % nr_threads
        );
        threads_data[tid].warmup = warmup;
        threads_data[tid].dim = dim;
        threads_data[tid].params = params;
        // Every worker gets its disjint working memory
        threads_data[tid].X = &Xs[dim * tid];
        threads_data[tid].counts = &counts[len * tid];
        // dispatch the worker
        thrd_create(&threads[tid], (thrd_start_t)thrd_worker, (void*)&threads_data[tid]);
    }
    // Join (Wait for) all worker
    for (size_t tid = 0; tid < nr_threads; ++tid) {
        thrd_join(threads[tid], NULL);
    }
    // Accumulate worker results into first (tid = 0) worker counts result
    for (size_t tid = 1; tid < nr_threads; ++tid) {
        for (size_t i = 0; i < len; ++i) {
            counts[i] += counts[tid * len + i];
        }
    }
    // convert discreat counts into means
    for (size_t i = 0; i < len; ++i) {
        M2[i] = (double)counts[i] / (double)nr_samples;
    }
    return -2.0;    // TODO: also compute prob_0
 }
 #endif /* !__STDC_NO_THREADS__ */
 /* Implementation of `.Call` entry point */
 ////////////////////////////////////////////////////////////////////////////////
 // TODO: make specialized version using the parameters in given sym mat form  //
 //       similar to `ising_sample()`                                          //
 ////////////////////////////////////////////////////////////////////////////////
 extern SEXP R_ising_m2(
    SEXP _params, SEXP _use_MC, SEXP _nr_samples, SEXP _warmup, SEXP _nr_threads
 ) {
    // Extract and validate arguments
    size_t protect_count = 0;
    if (!Rf_isReal(_params)) {
        _params = PROTECT(Rf_coerceVector(_params, REALSXP));
        protect_count++;
    }
    // Depending on parameters given in symmetric matrix form or natural
    // or natrual parameters in the half-vectorized memory layout
    size_t dim;
    double* params;
    if (Rf_isMatrix(_params)) {
        if (Rf_nrows(_params) != Rf_ncols(_params)) {
            Rf_error("Invalid 'params' value, exected square matrix");
        }
        // Convert to natural parameters
        dim = Rf_nrows(_params);
        params = (double*)R_alloc(dim * (dim + 1) / 2, sizeof(double));
        double* A = REAL(_params);
        for (size_t j = 0, I = 0; j < dim; ++j) {
            for (size_t i = j; i < dim; ++i, ++I) {
                params[I] = (1 + (i != j)) * A[i + j * dim];
            }
        }
    } else {
        // Get Ising random variable dimension `dim` from "half-vectorized" parameters
        // vector. That is, compute `dim` from `length(params) = dim (dim + 1) / 2`.
        dim = invTriag(Rf_length(_params));
        if (!dim) {
            Rf_error("Expected parameter vector of length `p (p + 1) / 2` where"
                " `p` is the dimension of the random variable");
        }
        params = REAL(_params);
    }
    // Determin the method to use and validate if possible
    const int use_MC = Rf_asLogical(_use_MC);
    if (use_MC == NA_LOGICAL) {
        Rf_error("Invalid 'use_MC' value, expected ether TRUE or FALSE");
    }
    // Allocate result vector
    SEXP _M2 = PROTECT(Rf_allocVector(REALSXP, dim * (dim + 1) / 2));
    ++protect_count;
    // asside computed zero event probability (inverse partition function), the
    // scaling factor for the Ising model p.m.f.
    double prob_0 = -1.0;
    if (use_MC) {
        // Convert and validate arguments for the Monte-Carlo methods
        const size_t nr_samples = asUnsigned(_nr_samples);
        if (nr_samples == 0 || nr_samples == NA_UNSIGNED) {
            Rf_error("Invalid 'nr_samples' value, expected pos. integer");
        }
        const size_t warmup = asUnsigned(_warmup);
        if (warmup == NA_UNSIGNED) {
            Rf_error("Invalid 'warmup' value, expected non-negative integer");
        }
        const size_t nr_threads = asUnsigned(_nr_threads);
        if (nr_threads == 0 || nr_threads > 256) {
            Rf_error("Invalid 'nr_thread' value");
        }
        if (nr_threads == 1) {
            // Single threaded Monte-Carlo method
            prob_0 = ising_m2_MC(nr_samples, warmup, dim, params, REAL(_M2));
        } else {
            // Multi-Threaded Monte-Carlo method if provided, otherwise use
            // the single threaded version with a warning
 #ifdef __STDC_NO_THREADS__
                Rf_warning("Multi-Threading NOT supported, using fallback.");
                prob_0 = ising_m2_MC(nr_samples, warmup, dim, params, REAL(_M2));
 #else
                prob_0 = ising_m2_MC_thrd(
                    nr_samples, warmup, nr_threads,
                    dim, params, REAL(_M2)
                );
 #endif
        }
    } else {
        // Exact method (ignore other arguments), only validate dimension
        if (25 < dim) {
            Rf_error("Dimension '%d' too big for exact method (max 24)", dim);
        }
        // and call the exact method
        prob_0 = ising_m2_exact(dim, params, REAL(_M2));
    }
    // Set log-lokelihood as an attribute to the computed second moment
    SEXP _prob_0 = PROTECT(Rf_ScalarReal(prob_0));
    ++protect_count;
    Rf_setAttrib(_M2, Rf_install("prob_0"), _prob_0);
    // release SEPXs to the garbage collector
    UNPROTECT(protect_count);
    return _M2;
 }
--- a/tensorPredictors/src/ising_sample.c
+++ b/tensorPredictors/src/ising_sample.c
@ -0,0 +1,72 @@
 #ifndef INCLUDE_GUARD_ISING_SAMPLE_H
 #define INCLUDE_GUARD_ISING_SAMPLE_H
 #include "R_api.h"
 #include "int_utils.h"
 #include "ising_MCMC.h"
 // .Call interface to draw from sample from the Ising model
 extern SEXP R_ising_sample(SEXP _nr_samples, SEXP _params, SEXP _warmup) {
    // Counts number of protected SEXP's to give them back to the garbage collector
    size_t protect_count = 0;
    // Parse and validate arguments
    const size_t nr_samples = asUnsigned(_nr_samples);
    if (nr_samples == 0 || nr_samples == NA_UNSIGNED) {
        Rf_error("Invalid 'nr_samples' value, expected pos. integer");
    }
    const size_t warmup = asUnsigned(_warmup);
    if (warmup == NA_UNSIGNED) {
        Rf_error("Invalid 'warmup' value, expected non-negative integer");
    }
    // Determin parameter mode (natural parameter vector or symmetric matrix)
    // Ether `m` for "Matrix" or `v` for "Vector"
    const char param_type = Rf_isMatrix(_params) ? 'm' : 'v';
    // In case of matrix parameters check for square matrix
    if (param_type == 'm' && (Rf_nrows(_params) != Rf_ncols(_params))) {
        Rf_error("Invalid 'params' value, exected square matrix");
    }
    // Get problem dimension from parameter size
    const size_t dim = (param_type == 'm')
                     ? Rf_nrows(_params)
                     : invTriag(Rf_length(_params));
    if (!dim) {
        Rf_error("Error determining dimension.");
    }
    // Ensure parameters are numeric
    if (!Rf_isReal(_params)) {
        _params = PROTECT(Rf_coerceVector(_params, REALSXP));
        ++protect_count;
    }
    double* params = REAL(_params);
    // Allocate result sample
    SEXP _X = PROTECT(Rf_allocMatrix(INTSXP, dim, nr_samples));
    ++protect_count;
    int* X = INTEGER(_X);
    // Call appropriate sampling routine for every sample to generate
    GetRNGstate();
    if (param_type == 'm') {
        for (size_t sample = 0; sample < nr_samples; ++sample) {
            ising_mcmc_mat(warmup, dim, dim, params, &X[sample * dim]);
        }
    } else {
        for (size_t sample = 0; sample < nr_samples; ++sample) {
            ising_mcmc_vech(warmup, dim, params, &X[sample * dim]);
        }
    }
    PutRNGstate();
    // Release protected SEXPs to the garbage collector
    UNPROTECT(protect_count);
    return _X;
 }
 #endif /* INCLUDE_GUARD_ISING_SAMPLE_H */
--- a/tensorPredictors/src/mcrossprod.c
+++ b/tensorPredictors/src/mcrossprod.c
@ -8,6 +8,51 @@
 #define FCONE
 #endif
 void mcrossprod(
    const int rank,
    const double* A, const int* dimA,
    const double* B, const int* dimB,
    const int mode,
    double* C
 ) {
    // the strides
    //  `stride[0] <- prod(dim(A)[seq_len(mode - 1)])`
    //  `stride[1] <- dim(A)[mode]`
    //  `stride[2] <- prod(dim(A)[-seq_len(mode)])`
    // Note: Middle stride is ignored (to be consistent with sym version)
    int stride[3] = {1, 0, 1};
    for (int i = 0; i < rank; ++i) {
        int size = dimA[i];
        stride[0] *= (i < mode) ? size : 1;
        stride[2] *= (i > mode) ? size : 1;
    }
    // employ BLAS dgemm (Double GEneralized Matrix Matrix) operation
    // (C = alpha op(A) op(A) + beta C, op is the identity of transposition)
    const double zero = 0.0;
    const double one  = 1.0;
    if (mode == 0) {
        // mode 1: special case C = A_(1) B_(1)^T
        // C = 1 A B^T + 0 C
        F77_CALL(dgemm)("N", "T", &dimA[mode], &dimB[mode], &stride[2],
            &one, A, &dimA[mode], B, &dimB[mode],
            &zero, C, &dimA[mode] FCONE FCONE);
    } else {
        // Other modes writen as accumulated sum of matrix products
        // initialize C to zero
        memset(C, 0, dimA[mode] * dimB[mode] * sizeof(double));
        // Sum over all modes > mode
        for (int i2 = 0; i2 < stride[2]; ++i2) {
            // C = 1 A^T B + 1 C
            F77_CALL(dgemm)("T", "N", &dimA[mode], &dimB[mode], &stride[0],
                &one, &A[i2 * stride[0] * dimA[mode]], &stride[0],
                &B[i2 * stride[0] * dimB[mode]], &stride[0],
                &one, C, &dimA[mode] FCONE FCONE);
        }
    }
 }
 /**
 * Tensor Mode Crossproduct
 *
@ -21,72 +66,58 @@
 * @param B multi-dimensional array
 * @param m mode index (1-indexed)
 */
-extern SEXP mcrossprod(SEXP A, SEXP B, SEXP m) {
+extern SEXP R_mcrossprod(SEXP A, SEXP B, SEXP m) {
    // get zero indexed mode
    int mode = asInteger(m) - 1;
-    // get dimension attributes
+    // Check if both `A` and `B` are real-valued
-    SEXP dimA = getAttrib(A, R_DimSymbol);
+    if (!isReal(A) || !isReal(B)) {
-    SEXP dimB = getAttrib(B, R_DimSymbol);
+        error("Type missmatch, both `A` and `B` must be real-valued");
    }
    // get dimension attributes
    SEXP dimA_sexp = getAttrib(A, R_DimSymbol);
    SEXP dimB_sexp = getAttrib(B, R_DimSymbol);
    // dimension type validation
    if (!isInteger(dimA_sexp) || !isInteger(dimB_sexp)) {
        error("Dimensions must be integer vectors");
    }
    // validate dimensions
    int rank = length(dimA_sexp);
    if (rank != length(dimB_sexp)) {
        error("Dimension mismatch");
    }
    // validate mode (0-indexed, must be smaller than the tensor order)
-    if (mode < 0 || length(dimA) <= mode || length(dimB) <= mode) {
+    if (mode < 0 || rank <= mode) {
        error("Illegal mode");
    }
-    // the strides
+    // get raw pointers to dimensions
-    //  `stride[0] <- prod(dim(A)[seq_len(mode - 1)])`
+    int* dimA = INTEGER(coerceVector(dimA_sexp, INTSXP));
-    //  `stride[1] <- dim(A)[mode]`
+    int* dimB = INTEGER(coerceVector(dimB_sexp, INTSXP));
-    //  `stride[2] <- prod(dim(A)[-seq_len(mode)])`
+
-    // Note: Middle stride is ignored (to be consistent with sym version)
+    // finaly, check for `A` and `B` dimensions to match
-    int stride[3] = {1, 0, 1};
+    for (int i = 0; i < rank; ++i) {
-    for (int i = 0; i < length(dimA); ++i) {
+        if (i != mode && dimA[i] != dimB[i]) {
-        int size = INTEGER(dimA)[i];
+            error("Dimension mismatch");
-        stride[0] *= (i < mode) ? size : 1;
+        } 
        stride[2] *= (i > mode) ? size : 1;
       // check margin matching of `A` and `B`
        if (i != mode && size != INTEGER(dimB)[i]) {
            error("Dimension missmatch");
        }
    }
    // Result dimensions
    int nrowC = INTEGER(dimA)[mode];
    int ncolC = INTEGER(dimB)[mode];
    // create response matrix C
-    SEXP C = PROTECT(allocMatrix(REALSXP, nrowC, ncolC));
+    SEXP C = PROTECT(allocMatrix(REALSXP, dimA[mode], dimB[mode]));
-    // raw data access pointers
+    // Call C mode crossprod subroutine
-    double* a = REAL(A);
+    mcrossprod(
-    double* b = REAL(B);
+        rank,           // tensor rank of both `A` and `B`
-    double* c = REAL(C);
+        REAL(A), dimA,  // mem. addr. of A, dim(A)
        REAL(B), dimB,  // mem. addr. of B, dim(B)
        mode,           // the crossproduct mode to compute
        REAL(C)         // return value memory addr.
    );
-    // employ BLAS dgemm (Double GEneralized Matrix Matrix) operation
+    // release C to garbage collector
    // (C = alpha op(A) op(A) + beta C, op is the identity of transposition)
    const double zero = 0.0;
    const double one  = 1.0;
    if (mode == 0) {
        // mode 1: special case C = A_(1) B_(1)^T
        // C = 1 A B^T + 0 C
        F77_CALL(dgemm)("N", "T", &nrowC, &ncolC, &stride[2],
            &one, a, &nrowC, b, &ncolC,
            &zero, c, &nrowC FCONE FCONE);
    } else {
        // Other modes writen as accumulated sum of matrix products
        // initialize C to zero
        memset(c, 0, nrowC * ncolC * sizeof(double));
        // Sum over all modes > mode
        for (int i2 = 0; i2 < stride[2]; ++i2) {
            // C = 1 A^T B + 1 C
            F77_CALL(dgemm)("T", "N", &nrowC, &ncolC, &stride[0],
                &one, &a[i2 * stride[0] * nrowC], &stride[0],
                &b[i2 * stride[0] * ncolC], &stride[0],
                &one, c, &nrowC FCONE FCONE);
        }
    }
    // release C to grabage collector
    UNPROTECT(1);
    return C;
@ -104,7 +135,7 @@ extern SEXP mcrossprod(SEXP A, SEXP B, SEXP m) {
 * @param A multi-dimensional array
 * @param m mode index (1-indexed)
 */
-extern SEXP mcrossprod_sym(SEXP A, SEXP m) {
+extern SEXP R_mcrossprod_sym(SEXP A, SEXP m) {
    // get zero indexed mode
    int mode = asInteger(m) - 1;
--- a/tensorPredictors/src/mlm.c
+++ b/tensorPredictors/src/mlm.c
@ -0,0 +1,160 @@
 #include "R_api.h"
 #include "ttm.h"
 int mlm(
    /*  options */  const int* trans, const int* modes, const int nrhs,
    /*     dims */  const int* dimA, const int* dimC, const int ord,
    /*  scalars */  const double alpha,
    /*   tensor */  const double* A,
    /* matrices */  const double** Bs, const int* ldBs,
    /*   scalar */  const double beta,
    /*   tensor */  double* C,
                    double* work_mem
 ) {
    // Compute total size of `A`, `C` and required working memory
    int sizeA = 1;  // `prod(dim(A))`
    int sizeC = 1;  // `prod(dim(C))`
    int work_size = 2;  // `2 * prod(pmax(dim(A), dim(C)))` (`+ ord` NOT included)
    for (int i = 0; i < ord; ++i) {
        sizeA *= dimA[i];
        sizeC *= dimC[i];
        work_size *= dimA[i] < dimC[i] ? dimC[i] : dimA[i];
    }
    // In requesting working memory size stop here and return size
    if (work_mem == NULL) {
        return work_size + ord;
    }
    // Copy dimensions of A to intermediate temp. dim
    int* dimT = (int*)(work_mem + work_size);   // `work_mem` is `ord` longer
    memcpy(dimT, dimA, ord * sizeof(int));
    // Setup work memory hooks (two swapable blocks)
    double* tmp1 = work_mem;
    double* tmp2 = work_mem + (work_size >> 1);
    // Multi Linear Multiplication is an iterated application of TTM
    for (int i = 0; i < nrhs; ++i) {
        // Get current `B` dimensions (only implicitly given)
        int nrowB = dimC[modes[i]];
        int ncolB = dimA[modes[i]];
        if (trans && trans[i]) {
            nrowB = dimA[modes[i]];
            ncolB = dimC[modes[i]];
        }
        // Tensor Times Matrix (`modes[i]` mode products)
        ttm(trans ? trans[i] : 0, modes[i], dimT, ord, nrowB, ncolB,
            1.0, i ? tmp2 : A, Bs[i], ldBs ? ldBs[i] : dimC[modes[i]], 0.0,
            tmp1);
        // Update intermediate temp dim
        dimT[modes[i]] = dimC[modes[i]];
        // Swap tmp1 <-> tmp2
        double* tmp3 = tmp1; tmp1 = tmp2; tmp2 = tmp3;
    }
    // dAXPY (a x + y) with `x = tmp2` and `y = beta C`
    if (beta != 0.0) {
        for (int i = 0; i < sizeC; ++i) {
            C[i] *= beta;
        }
    } else {
        memset(C, 0, sizeC * sizeof(double));
    }
    axpy(sizeC, alpha, tmp2, 1, C, 1);
    return 0;
 }
 /**
 * Multi Linear Multiplication (`R` binding)
 */
 extern SEXP R_mlm(SEXP A, SEXP Bs, SEXP ms, SEXP ops) {
    // get dimension attribute of A
    SEXP dimA = Rf_getAttrib(A, R_DimSymbol);
    int ord = Rf_length(dimA);
    // Check if `A` is a real valued tensor
    if (!Rf_isReal(A) || Rf_isNull(dimA)) {
        Rf_error("Param. `A` need to be a real valued array");
    }
    // Validate that `Bs` is a list
    if (!Rf_isNewList(Bs)) {
        Rf_error("Param. `Bs` need to be a list of matrices");
    }
    if (!Rf_isInteger(ms) || !Rf_isLogical(ops)) {
        Rf_error("Param. type missmatch, expected modes as ints and ops logical");
    } 
    // Number of `B` matrices (Nr of Right Hand Side objects)
    int nrhs = Rf_length(Bs);
    // further parameter validations
    if ((nrhs != Rf_length(ms)) || (nrhs != Rf_length(ops))) {
        Rf_error("Dimension missmatch (Params length differ)");
    }
    // Get modes and operations for Bs (transposed or not)
    int* modes = (int*)R_alloc(nrhs, sizeof(int)); // 0-indexed version of `ms`
    int* trans = INTEGER(ops);
    // Compute result dimensions `dim(C)` while extracting `B` data pointers
    SEXP dimC = PROTECT(Rf_duplicate(dimA));
    const double** bs = (const double**)R_alloc(nrhs, sizeof(double*));
    for (int i = 0; i < nrhs; ++i) {
        // Extract right hand side matrices (`B` matrices from list)
        SEXP B = VECTOR_ELT(Bs, i);
        if (!(Rf_isMatrix(B) && Rf_isReal(B))) {
            UNPROTECT(1);
            Rf_error("Param. `Bs` need to be a list of real matrices");
        }
        int* dimB = INTEGER(Rf_getAttrib(B, R_DimSymbol));
        bs[i] = REAL(B);
        // Convert 1-indexed modes to be 0-indexed
        modes[i] = INTEGER(ms)[i] - 1;
        // Check if mode is out or range (indexing a non-existing mode of `A`)
        if (!((0 <= modes[i]) && (modes[i] < ord))) {
            UNPROTECT(1);
            Rf_error("%d'th mode (%d) out of range", i + 1, modes[i] + 1);
        }
        // Check if `i`th mode of `A` matches corresponding `B` dimension
        if (INTEGER(dimA)[modes[i]] != dimB[!trans[i]]) {
            UNPROTECT(1);
            Rf_error("%d'th mode (%d) dimension missmatch", i + 1, modes[i] + 1);
        }
        INTEGER(dimC)[modes[i]] = dimB[!!trans[i]];
    }
    // Now, compute `C`s size `prod(dim(C))`
    int sizeC = 1;
    for (int i = 0; i < ord; ++i) {
        sizeC *= INTEGER(dimC)[i];
    }
    // Allocate response `C`
    SEXP C = PROTECT(Rf_allocVector(REALSXP, sizeC));
    Rf_setAttrib(C, R_DimSymbol, dimC);
    // allocate working memory size
    int work_size = mlm(trans, modes, nrhs, INTEGER(dimA), INTEGER(dimC), ord,
        1.0, REAL(A), bs, NULL, 0.0, REAL(C), NULL);
    double* work_memory = (double*)R_alloc(work_size, sizeof(double));
    // Compute Multi-Linear Multiplication (call subroutine)
    (void)mlm(trans, modes, nrhs, INTEGER(dimA), INTEGER(dimC), ord,
        1.0, REAL(A), bs, NULL, 0.0, REAL(C), work_memory);
    // release C to the hands of the garbage collector
    UNPROTECT(2);
    return C;
 }
--- a/tensorPredictors/src/mlm.h
+++ b/tensorPredictors/src/mlm.h
@ -0,0 +1,35 @@
 #ifndef INCLUDE_GUARD_MLM_H
 #define INCLUDE_GUARD_MLM_H
 /**
 * Multi Linear Multiplication
 * 
 *  C = alpha A x_modes[0] op(Bs[0]) ... x_modes[nrhs] op(Bs[nrhs]) + beta C
 * 
 * @param trans boolean vector of length `nrhs` indicating if `i`th RHS matrix
 *  is to be transposed. That is `op(Bs[i])` is the transposed of `Bs[i]` iff
 *  `trans[i]` is true, otherwise no-op on `Bs[i]`. Can be `NULL`, then `op` is
 *  always the identity.
 * @param modes integer vector of length `nrhs` specifying the product modes.
 * 
 * @todo TODO: continue doc. !!!
 * 
 * @param alpha scaling factor
 * @param beta scaling factor
 * @param C output memory addr.
 * 
 * @param work_mem NULL or temporary working memory of size
 *  `2 * prod(pmax(dim(A), dim(C))) + ord`
 */
 int mlm(
    /*  options */  const int* trans, const int* modes, const int nrhs,
    /*     dims */  const int* dimA, const int* dimC, const int ord,
    /*  scalars */  const double alpha,
    /*   tensor */  const double* A,
    /* matrices */  const double** Bs, const int* ldBs,
    /*   scalar */  const double beta,
    /*   tensor */  double* C,
                    double* work_mem
 );
 #endif /* INCLUDE_GUARD_MLM_H */
--- a/tensorPredictors/src/random.h
+++ b/tensorPredictors/src/random.h
@ -0,0 +1,37 @@
 // /**
 //  * A sufficient Pseudo-Random-Number-Generators (PRNG) of the Xorshift family
 //  *
 //  * For single threaded operations the PRNG provided by `R` are prefered. But they
 //  * are _not_ thread save. The following is a simple PRNG usable in a multi-threaded
 //  * application.
 //  *
 //  * See TODO: ...https://en.wikipedia.org/wiki/Xorshift
 //  *              SchachHoernchen
 //  */
 // #ifndef INCLUDE_GUARD_RANDOM_H
 // #define INCLUDE_GUARD_RANDOM_H
 // #include <stdint.h> // uint32_t, uint64_t
 // static inline uint64_t rot64(uint64_t val, int shift) {
 //     return (val << shift) | (val >> (64 - shift));
 // }
 // // PRNG of the Xorshift family
 // // @note the least significant 32 bits are not reliable, use most significant 32 bits
 // static inline uint64_t rand_u64(uint64_t seed[4]) {
 //     uint64_t e = seed[0] - rot64(seed[1], 7);
 //     seed[0] = seed[1] ^ rot64(seed[1], 13);
 //     seed[1] = seed[2] + rot64(seed[3], 37);
 //     seed[2] = seed[3] + e;
 //     seed[3] = e + seed[0];
 //     return seed[3];
 // }
 // static inline double unif_rand_u64(uint64_t seed[4]) {
 //     return ((double)(rand_u64(seed) >> 32)) / (double)(-(uint32_t)1);
 // }
 // #endif
--- a/tensorPredictors/src/svd.c
+++ b/tensorPredictors/src/svd.c
@ -0,0 +1,112 @@
 // The need for `USE_FC_LEN_T` and `FCONE` is due to a Fortran character string
 // to C incompatibility. See: Writing R Extentions: 6.6.1 Fortran character strings
 #define USE_FC_LEN_T
 #include <R.h>
 #include <Rinternals.h>
 #include <R_ext/BLAS.h>
 #include <R_ext/Lapack.h>
 #ifndef FCONE
 #define FCONE
 #endif
 // Singular Value Decomposition
 // @note assumes passed arguments to be "clean", "properly checked"
 int svd(
    const int p, const int q,
    double* A, const int ldA,
    double* d,
    double* U, const int ldU,
    double* Vt, const int ldVt,
    double* work_mem, int work_size, int* i_work    // if unknown set to 0, -1, 0
 ) {
    // LAPACK information return variable (in case of an error, LAPACK sets this
    // to a non-zero value)
    int info = 0;
    // check if work memory is supplied, if _not_ query for appropriate size and
    // allocate the memory
    if (!work_mem || work_size < 1 || !i_work) {
        // create (declare) work memory
        i_work = (int*)R_alloc(8 * (p < q ? p : q), sizeof(int));
        double tmp;         // variable to store work memory size query result
        work_mem = &tmp;    // point to `tmp` as it will contain the result of
                            // the work memory size query
        // request appropriate work memory size
        F77_CALL(dgesdd)(
            "S", &p, &q, A, &ldA, d, U, &ldU, Vt, &ldVt,
            work_mem, &work_size, i_work, &info
        FCONE);
        // allocate work memory
        work_size = (int)tmp;   // "read" work memory size query result
        work_mem = (double*)R_alloc(work_size, sizeof(double));
        // in case of an error, return error code stored in `info`
        if (info) { return info; }
    }
    // actual SVD computation
    F77_CALL(dgesdd)(
        "S", &p, &q, A, &ldA, d, U, &ldU, Vt, &ldVt,
        work_mem, &work_size, i_work, &info
    FCONE);
    return info;
 }
 // Singular Valued Decomposition (R binding)
 // @note educational purpose, same as `La.svd`
 extern SEXP R_svd(SEXP A) {
    // Check if we got a real-valued matrix
    if (!isMatrix(A) || !isReal(A)) {
        error("Require a real-valued matrix");
    }
    // extract matrix dimensions
    int* dims = INTEGER(coerceVector(getAttrib(A, R_DimSymbol), INTSXP));
    int p = dims[0];        // = nrow(A)
    int q = dims[1];        // = ncol(A)
    int m = p < q ? p : q;  // = min(p, q) = min(dim(A))
    // Check if dimensions are not degenerate
    if (p < 1 || q < 1) {
        error("Expected positive matrix dimensions");
    }
    // create R objects to store the SVD result `A = U diag(d) V^T`
    SEXP U = PROTECT(allocMatrix(REALSXP, p, m));
    SEXP d = PROTECT(allocVector(REALSXP, m));
    SEXP Vt = PROTECT(allocMatrix(REALSXP, m, q));
    // Call C SVD routine
    int info = svd(
        p, q,           // nrow(A), ncol(A)
        REAL(A), p,     // mem. addr. of A, ldA (leading dimension of A)
        REAL(d),        // mem. addr. of d
        REAL(U), p,     // mem. addr. of U, ldU (leading dimension of U)
        REAL(Vt), m,    // mem. addr. of V^T, ldVt (leading dimension of V^T)
        0, -1, 0        // work mem. pointer, work mem. size and int work mem
    );                  //  set to `0, -1, 0` to indicate "unknown"
    // Check LAPACK info
    if (info) {
        error("error code %d from LAPACK routine 'dgesdd'", info);
    }
    // Create R list containint SVD components
    SEXP result = PROTECT(allocVector(VECSXP, 3));
    SEXP names = PROTECT(allocVector(STRSXP, 3));
    SET_VECTOR_ELT(result, 0, d);
    SET_VECTOR_ELT(result, 1, U);
    SET_VECTOR_ELT(result, 2, Vt);
    SET_STRING_ELT(names, 0, mkChar("d"));
    SET_STRING_ELT(names, 1, mkChar("u"));
    SET_STRING_ELT(names, 2, mkChar("vt"));
    setAttrib(result, R_NamesSymbol, names);
    // Release created objects to the garbage collector
    UNPROTECT(5);
    return result;
 }
--- a/tensorPredictors/src/ttm.c
+++ b/tensorPredictors/src/ttm.c
@ -1,104 +1,48 @@
-// The need for `USE_FC_LEN_T` and `FCONE` is due to a Fortran character string
+#include "ttm.h"
 // to C incompatibility. See: Writing R Extentions: 6.6.1 Fortran character strings
 #define USE_FC_LEN_T
 // Disables remapping of R API functions from `Rf_<name>` or `R_<name>`
 #define R_NO_REMAP
 #include <R.h>
 #include <Rinternals.h>
 #include <R_ext/BLAS.h>
 #ifndef FCONE
 #define FCONE
 #endif
-/**
+void ttm(
- * Tensor Times Matrix a.k.a. Mode Product
+    const int transB, const int mode,
- *
+    const int* dimA, const int ordA, const int nrowB, const int ncolB,
- * @param A multi-dimensional array
+    const double alpha,
- * @param B matrix
+    const double* A,
- * @param m mode index (1-indexed)
+    const double* B, const int ldB, // TODO: ldB is IGNORED!!!
- * @param op boolean if `B` is transposed
+    const double beta,
- */
+    double* C
-extern SEXP ttm(SEXP A, SEXP B, SEXP m, SEXP op) {
+) {
-    // get zero indexed mode
+    // Strides are the "leading" and "trailing" dimensions of the matricized
-    const int mode = Rf_asInteger(m) - 1;
+    // tensor `A` in the following matrix-matrix multiplications
    // get dimension attribute of A
    SEXP dim = Rf_getAttrib(A, R_DimSymbol);
    // operation on `B` (transposed or not)
    const int trans = Rf_asLogical(op);
    // as well as `B`s dimensions
    const int nrow = Rf_nrows(B);
    const int ncol = Rf_ncols(B);
    // validate mode (mode must be smaller than the nr of dimensions)
    if (mode < 0 || Rf_length(dim) <= mode) {
        Rf_error("Illegal mode");
    }
    // and check if B is a matrix of non degenetate size
    if (!Rf_isMatrix(B)) {
        Rf_error("Expected a matrix as second argument");
    }
    if (!Rf_nrows(B) || !Rf_ncols(B)) {
        Rf_error("Zero dimension detected");
    }
    // check matching of dimensions
    if (INTEGER(dim)[mode] != (trans ? nrow : ncol)) {
        Rf_error("Dimension missmatch");
    }
    // calc nr of response elements `prod(dim(A)[-mode]) * ncol(A)` (size of C),
    int sizeC = 1;
    // and the strides
    //  `stride[0] <- prod(dim(A)[seq_len(mode - 1)])`
    //  `stride[1] <- dim(A)[mode]`
    //  `stride[2] <- prod(dim(A)[-seq_len(mode)])`
-    int stride[3] = {1, INTEGER(dim)[mode], 1};
+    int stride[3] = {1, dimA[mode], 1};
-    for (int i = 0; i < Rf_length(dim); ++i) {
+    for (int i = 0; i < ordA; ++i) {
-        int size = INTEGER(dim)[i];
+        stride[0] *= (i < mode) ? dimA[i] : 1;
-        // check for non-degenetate dimensions
+        stride[2] *= (i > mode) ? dimA[i] : 1;
        if (!size) {
            Rf_error("Zero dimension detected");
        }
        sizeC *= (i == mode) ? (trans ? ncol : nrow) : size;
        stride[0] *= (i < mode) ? size : 1;
        stride[2] *= (i > mode) ? size : 1;
    }
    // create response object C
    SEXP C = PROTECT(Rf_allocVector(REALSXP, sizeC));
    // raw data access pointers
    double* a = REAL(A);
    double* b = REAL(B);
    double* c = REAL(C);
    // Tensor Times Matrix / Mode Product
    const double zero = 0.0;
    const double one  = 1.0;
    if (mode == 0) {
-        // mode 1: (A x_1 op(B))_(1) = op(B) A_(1) as a single Matrix-Matrix
+        // mode 1: C = alpha (A x_1 op(B))_(1) + beta C
-        // multiplication
+        //           = alpha op(B) A_(1) + beta C
-        F77_CALL(dgemm)(trans ? "T" : "N", "N",
+        // as a single Matrix-Matrix multiplication
-            (trans ? &ncol : &nrow), &stride[2], &stride[1], &one,
+        F77_CALL(dgemm)(transB ? "T" : "N", "N",
-            b, &nrow, a, &stride[1],
+            (transB ? &ncolB : &nrowB), &stride[2], &stride[1], &alpha,
-            &zero, c, (trans ? &ncol : &nrow)
+            B, &nrowB, A, &stride[1],
            &beta, C, (transB ? &ncolB : &nrowB)
            FCONE FCONE);
    } else {
        // Other modes can be written as blocks of matrix multiplications
-        // (A x_m op(B))_(m)' = A_(m)' op(B)'
+        // C_:,:,i2 = alpha (A x_m op(B))_(m)' + beta C_:,:,i2
        //          = alpha A_(m)' op(B)' + beta C_:,:,i2
        for (int i2 = 0; i2 < stride[2]; ++i2) {
-            F77_CALL(dgemm)("N", trans ? "N" : "T",
+            F77_CALL(dgemm)("N", transB ? "N" : "T",
-                &stride[0], (trans ? &ncol : &nrow), &stride[1], &one,
+                &stride[0], (transB ? &ncolB : &nrowB), &stride[1], &alpha,
-                &a[i2 * stride[0] * stride[1]], &stride[0], b, &nrow,
+                &A[i2 * stride[0] * stride[1]], &stride[0], B, &nrowB,
-                &zero, &c[i2 * stride[0] * (trans ? ncol : nrow)], &stride[0]
+                &beta, &C[i2 * stride[0] * (transB ? ncolB : nrowB)], &stride[0]
                FCONE FCONE);
        }
    }
    /*
    // (reference implementation)
    // Tensor Times Matrix / Mode Product for `op(B) == B`
@ -114,13 +58,76 @@ extern SEXP ttm(SEXP A, SEXP B, SEXP m, SEXP op) {
        }
    }
    */
 }
 /**
 * Tensor Times Matrix a.k.a. Mode Product
 *
 * @param A multi-dimensional array
 * @param B matrix
 * @param m mode index (1-indexed)
 * @param op boolean if `B` is transposed
 */
 extern SEXP R_ttm(SEXP A, SEXP B, SEXP m, SEXP op) {
    // get zero indexed mode
    const int mode = Rf_asInteger(m) - 1;
    // get dimension attribute of A
    SEXP dimA = Rf_getAttrib(A, R_DimSymbol);
    // operation on `B` (transposed or not)
    const int transB = Rf_asLogical(op);
    // as well as `B`s dimensions
    const int nrowB = Rf_nrows(B);
    const int ncolB = Rf_ncols(B);
    // validate mode (mode must be smaller than the nr of dimensions)
    if (mode < 0 || Rf_length(dimA) <= mode) {
        Rf_error("Illegal mode");
    }
    // and check if B is a matrix of non degenetate size
    if (!Rf_isMatrix(B)) {
        Rf_error("Expected a matrix as second argument");
    }
    if (!Rf_nrows(B) || !Rf_ncols(B)) {
        Rf_error("Zero dimension detected");
    }
    // check matching of dimensions
    if (INTEGER(dimA)[mode] != (transB ? nrowB : ncolB)) {
        Rf_error("Dimension missmatch");
    }
    // calc nr of response elements (size of C)
    // `prod(dim(C)) = prod(dim(A)[-mode]) * nrow(if(transB) t(B) else B)`
    int sizeC = 1;
    for (int i = 0; i < Rf_length(dimA); ++i) {
        int size = INTEGER(dimA)[i];
        // check for non-degenetate dimensions
        if (!size) {
            Rf_error("Zero dimension detected");
        }
        sizeC *= (i == mode) ? (transB ? ncolB : nrowB) : size;
    }
    // create response object C
    SEXP C = PROTECT(Rf_allocVector(REALSXP, sizeC));
    // Tensor Times Matrix / Mode Product
    ttm(transB, mode,
        INTEGER(dimA), Rf_length(dimA), nrowB, ncolB,
        1.0, REAL(A), REAL(B), nrowB, 0.0, REAL(C));
    // finally, set result dimensions
-    SEXP newdim = PROTECT(Rf_allocVector(INTSXP, Rf_length(dim)));
+    SEXP dimC = PROTECT(Rf_allocVector(INTSXP, Rf_length(dimA)));
-    for (int i = 0; i < Rf_length(dim); ++i) {
+    for (int i = 0; i < Rf_length(dimA); ++i) {
-        INTEGER(newdim)[i] = (i == mode) ? (trans ? ncol : nrow) : INTEGER(dim)[i];
+        INTEGER(dimC)[i] = (i == mode) ? (transB ? ncolB : nrowB) : INTEGER(dimA)[i];
    }
-    Rf_setAttrib(C, R_DimSymbol, newdim);
+    Rf_setAttrib(C, R_DimSymbol, dimC);
    // release C to the hands of the garbage collector
    UNPROTECT(2);
--- a/tensorPredictors/src/ttm.h
+++ b/tensorPredictors/src/ttm.h
@ -0,0 +1,21 @@
 #ifndef INCLUDE_GUARD_TTM_H
 #define INCLUDE_GUARD_TTM_H
 #include "R_api.h"
 /**
 * Tensor Times Matrix Subroutine
 * 
 * @attention Assumes all parameters to be correct!
 */
 void ttm(
    /* options */   const int transB, const int mode,
    /*    dims */   const int* dimA, const int ordA, const int nrowB, const int ncolB,
    /*  scalar */   const double alpha,
    /*  tensor */   const double* A,
    /*  matrix */   const double* B, const int ldB,
    /*  scalar */   const double beta,
    /*  tensor */   double* C
 );
 #endif /* INCLUDE_GUARD_TTM_H */
`@ -1,2 +1,3 @@`

	`PKG_LIBS = $(BLAS_LIBS) $(FLIBS)`	`PKG_LIBS = $(BLAS_LIBS) $(LAPACK_LIBS) $(FLIBS)`
		`# PKG_CFLAGS = -pg`