dev

2023-11-14 14:35:43 +01:00 · 2023-11-14 14:35:43 +01:00 · 90cd46e209
commit 90cd46e209
parent 40132c2565
48 changed files with 2852 additions and 1353 deletions
--- a/LaTeX/GMLM.tex
+++ b/LaTeX/GMLM.tex
@ -90,10 +90,11 @@
 %%% Custom operators with ether one or two arguments (limits)
 \makeatletter
 %%% Multi-Linear Multiplication
+% $\mlm_{k \in [r]}$ or $\mlm_{k = 1}^{r}$ (lower limit MUST be the first!)
 % Save first argument as \arg@one
-\def\mlm#1{\def\arg@one{#1}\futurelet\next\mlm@i}
+\def\mlm_#1{\def\arg@one{#1}\futurelet\next\mlm@i}
 % Check for second argument
-\def\mlm@i{\ifx\next\bgroup\expandafter\mlm@two\else\expandafter\mlm@one\fi}
+\def\mlm@i{\ifx\next^\expandafter\mlm@two\else\expandafter\mlm@one\fi}
 % specialization for one or two arguments, both versions use saved first argument
 \def\mlm@one{\mathchoice%
    {\operatorname*{\scalerel*{\times}{\bigotimes}}_{\makebox[0pt][c]{$\scriptstyle \arg@one$}}}%
@ -101,14 +102,31 @@
    {\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}}%
    {\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}}%
 }
-% this commands single argument is the second argument of \mlm
-\def\mlm@two#1{\mathchoice%
+% this commands single argument is the second argument of \mlm, it gobbles the `^`
+\def\mlm@two^#1{\mathchoice%
    {\operatorname*{\scalerel*{\times}{\bigotimes}}_{\makebox[0pt][c]{$\scriptstyle \arg@one$}}^{\makebox[0pt][c]{$\scriptstyle #1$}}}%
    {\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}^{#1}}%
    {\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}^{#1}}%
    {\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}^{#1}}%
 }

+%%% Big Circle (Iterated Outer Product)
+\def\outer#1{\def\arg@one{#1}\futurelet\next\outer@i}
+\def\outer@i{\ifx\next\bgroup\expandafter\outer@two\else\expandafter\outer@one\fi}
+\def\outer@one{\mathchoice%
+    {\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\makebox[0pt][c]{$\scriptstyle \arg@one$}}}%
+    {\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}}%
+    {\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}}%
+    {\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}}%
+}
+\def\outer@two#1{\mathchoice%
+    {\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\makebox[0pt][c]{$\scriptstyle \arg@one$}}^{\makebox[0pt][c]{$\scriptstyle #1$}}}%
+    {\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}^{#1}}%
+    {\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}^{#1}}%
+    {\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}^{#1}}%
+}
+
+
 %%% Big Kronecker Product (with overflowing limits)
 % Save first argument as \arg@one
 \def\bigkron#1{\def\arg@one{#1}\futurelet\next\bigkron@i}
@ -134,17 +152,15 @@
 \newcommand{\algorithmicbreak}{\textbf{break}}
 \newcommand{\Break}{\State \algorithmicbreak}

-
 \begin{document}

 \maketitle

-
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \begin{abstract}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-    We consider regression and classification for \textit{general} response and tensor-valued predictors (multi dimensional arrays) and propose a \textit{novel formulation} for sufficient dimension reduction. Assuming the distribution of the tensor-valued predictors given the response is in the quadratic exponential family, we model the natural parameter as a multi-linear function of the respons.
-    This allows per-axis reductions that drastically reduce the total number of parameters for higher order tensor-valued predictors. We derive maximum likelihood estimates for the sufficient dimension reduction and a computationally efficient estimation algorithm which leveraes the tensor structure. The performance of the method is illustrated via simulations and real world examples are provided.
+    We consider regression and classification for \textit{general} response and tensor-valued predictors (multi dimensional arrays) and propose a \textit{novel formulation} for sufficient dimension reduction. Assuming the distribution of the tensor-valued predictors given the response is in the quadratic exponential family, we model the natural parameter as a multi-linear function of the response.
+    This allows per-axis reductions that drastically reduce the total number of parameters for higher order tensor-valued predictors. We derive maximum likelihood estimates for the sufficient dimension reduction and a computationally efficient estimation algorithm which leverages the tensor structure. The performance of the method is illustrated via simulations and real world examples are provided.
 \end{abstract}

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@ -256,23 +272,23 @@ A straight forward idea for parameter estimation is to use Gradient Descent. For
 \begin{algorithm}[ht]
    \caption{\label{alg:NAGD}Nesterov Accelerated Gradient Descent}
    \begin{algorithmic}[1]
-        \State Objective: $l(\Theta \mid \ten{X}, \ten{F}_y)$
+        \State Objective: $l(\mat{\theta} \mid \ten{X}, \ten{F}_y)$
        \State Arguments: Order $r + 1$ tensors $\ten{X}$, $\ten{F}$
-        \State Initialize: Parameters $\Theta^{(0)}$, $0 < c, \delta^{(1)}$ and $0 < \gamma < 1$
+        \State Initialize: Parameters $\mat{\theta}^{(0)}$, $0 < c, \delta^{(1)}$ and $0 < \gamma < 1$
        \\
        \State $t \leftarrow 1$
            \Comment{step counter}
-        \State $\mat{\Theta}^{(1)} \leftarrow \mat{\Theta}^{(0)}$
+        \State $\mat{\theta}^{(1)} \leftarrow \mat{\theta}^{(0)}$
            \Comment{artificial first step}
        \State $(m^{(0)}, m^{(1)}) \leftarrow (0, 1)$
            \Comment{momentum extrapolation weights}
        \\
        \Repeat \Comment{repeat untill convergence}
-            \State $\mat{M} \leftarrow \mat{\Theta}^{(t)} + \frac{m^{(t - 1)} - 1}{m^{(t)}}(\mat{\Theta}^{(t)} - \mat{\Theta}^{(t - 1)})$ \Comment{momentum extrapolation}
+            \State $\mat{M} \leftarrow \mat{\theta}^{(t)} + \frac{m^{(t - 1)} - 1}{m^{(t)}}(\mat{\theta}^{(t)} - \mat{\theta}^{(t - 1)})$ \Comment{momentum extrapolation}
            \For{$\delta = \gamma^{-1}\delta^{(t)}, \delta^{(t)}, \gamma\delta^{(t)}, \gamma^2\delta^{(t)}, ...$} \Comment{Line Search}
-                \State $\mat{\Theta}_{\text{temp}} \leftarrow \mat{M} + \delta \nabla_{\mat{\Theta}} l(\mat{M})$
-                \If{$l(\mat{\Theta}_{\text{temp}}) \leq l(\mat{\Theta}^{(t - 1)}) - c \delta \|\nabla_{\mat{\Theta}} l(\mat{M})\|_F^2$} \Comment{Armijo Condition}
-                    \State $\mat{\Theta}^{(t + 1)} \leftarrow \mat{\Theta}_{\text{temp}}$
+                \State $\mat{\theta}_{\text{temp}} \leftarrow \mat{M} + \delta \nabla_{\mat{\theta}} l(\mat{M})$
+                \If{$l(\mat{\theta}_{\text{temp}}) \leq l(\mat{\theta}^{(t - 1)}) - c \delta \|\nabla_{\mat{\theta}} l(\mat{M})\|_F^2$} \Comment{Armijo Condition}
+                    \State $\mat{\theta}^{(t + 1)} \leftarrow \mat{\theta}_{\text{temp}}$
                    \State $\delta^{(t + 1)} \leftarrow \delta$
                    \Break
                \EndIf
@ -399,7 +415,7 @@ $\ten{X}$ is a $2\times 3\times 5$ tensor, $y\in\{1, 2, ..., 6\}$ uniformly dist

 \begin{figure}[!ht]
    \centering
-    \includegraphics[width = \textwidth]{sim-normal-20221012.png}
+    \includegraphics[width = \textwidth]{images/sim-normal-20221012.png}
    \caption{\label{fig:sim-normal}Simulation Normal}
 \end{figure}

@ -407,7 +423,7 @@ $\ten{X}$ is a $2\times 3\times 5$ tensor, $y\in\{1, 2, ..., 6\}$ uniformly dist

 \begin{figure}[!ht]
    \centering
-    \includegraphics[width = \textwidth]{sim-ising-small-20221012.png}
+    \includegraphics[width = \textwidth]{images/sim-ising-small-20221012.png}
    \caption{\label{fig:sim-ising-small}Simulation Ising Small}
 \end{figure}

@ -433,7 +449,7 @@ where each individual block is given by
 For example $\mathcal{J}_{1,2} = -\frac{\partial l(\Theta)}{\partial\t{(\vec{\overline{\ten{\eta}}_1})}\partial(\vec{\mat{\alpha}_1})}$ and $\mathcal{J}_{2r + 1, 2r + 1} = -\H l(\mat{\Omega}_r)$.
 We start by restating the log-likelihood for a given single observation $(\ten{X}, \ten{Y})$ where $\ten{F}_y$ given by
 \begin{displaymath}
-    l(\mat{\Theta}) = \log h(\ten{X}) + c_1\big\langle\overline{\ten{\eta}}_1 + \ten{F}_{y}\mlm{k\in[r]}\mat{\alpha}_k, \ten{X}\big\rangle + c_2\big\langle\ten{X}\mlm{k\in[r]}\mat{\Omega}_k, \ten{X}\big\rangle - b(\mat{\eta}_{y})
+    l(\mat{\Theta}) = \log h(\ten{X}) + c_1\big\langle\overline{\ten{\eta}}_1 + \ten{F}_{y}\mlm_{k\in[r]}\mat{\alpha}_k, \ten{X}\big\rangle + c_2\big\langle\ten{X}\mlm_{k\in[r]}\mat{\Omega}_k, \ten{X}\big\rangle - b(\mat{\eta}_{y})
 \end{displaymath}
 with
 \begin{align*}
@ -496,10 +512,10 @@ Now we rewrite all the above differentials to extract the derivatives one at a t
 %
    \d l(\mat{\Omega}_j) &= c_2\Big(\langle\ten{X}\times_{k\in[r]\backslash j}\mat{\Omega}_k\times_j\d\mat{\Omega}_j, \ten{X}\rangle - \D b(\mat{\eta}_{y,2})\vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big)\Big) \\
        &= c_2 \t{(\vec{\ten{X}}\otimes\vec{\ten{X}} - (\ten{D}_2)_{([2r])})}\vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big) \\
-        &= c_2 (\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})} \\
-        &= c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\vec{\d\mat{\Omega}_j} \\
-        &= c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\mat{D}_{p_j}\t{\mat{D}_{p_j}}\vec{\d\mat{\Omega}_j} \\
-            &\qquad\Rightarrow \D l(\mat{\Omega}_j) = c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\mat{D}_{p_j}\t{\mat{D}_{p_j}}
+        &= c_2 (\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})} \\
+        &= c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\vec{\d\mat{\Omega}_j} \\
+        &= c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\mat{D}_{p_j}\t{\mat{D}_{p_j}}\vec{\d\mat{\Omega}_j} \\
+            &\qquad\Rightarrow \D l(\mat{\Omega}_j) = c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\mat{D}_{p_j}\t{\mat{D}_{p_j}}
 \end{align*}}%
 The next step is to identify the Hessians from the second differentials in a similar manner as befor.
 {\allowdisplaybreaks\begin{align*}
@ -509,62 +525,62 @@ The next step is to identify the Hessians from the second differentials in a sim
            \qquad{\color{gray} (p \times p)}
        \\
    &\d^2 l(\overline{\ten{\eta}}_1, \mat{\alpha}_j) \\
-        &= -c_1^2 \t{\vec(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j)}\mat{H}_{1,1}\vec{\d\overline{\ten{\eta}}_1} \\
-        &= -c_1^2 \t{\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)})}\mat{K}_{p,(j)}\mat{H}_{1,1}\vec{\d\overline{\ten{\eta}}_1} \\
-        &= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})}((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})(\ten{H}_{1,1})_{((j, [r]\backslash j))}\vec{\d\overline{\ten{\eta}}_1} \\
-        &= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} ( (\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k) \ttt_{[r]\backslash j} \ten{H}_{1,1})_{((2, 1))} \vec{\d\overline{\ten{\eta}}_1} \\
-            &\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\alpha}_j})\t{\partial(\vec{\overline{\ten{\eta}}_1)}}} = -c_1^2 ( (\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k) \ttt_{[r]\backslash j} \ten{H}_{1,1})_{((2, 1))}
+        &= -c_1^2 \t{\vec(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j)}\mat{H}_{1,1}\vec{\d\overline{\ten{\eta}}_1} \\
+        &= -c_1^2 \t{\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)})}\mat{K}_{p,(j)}\mat{H}_{1,1}\vec{\d\overline{\ten{\eta}}_1} \\
+        &= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})}((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})(\ten{H}_{1,1})_{((j, [r]\backslash j))}\vec{\d\overline{\ten{\eta}}_1} \\
+        &= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} ( (\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k) \ttt_{[r]\backslash j} \ten{H}_{1,1})_{((2, 1))} \vec{\d\overline{\ten{\eta}}_1} \\
+            &\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\alpha}_j})\t{\partial(\vec{\overline{\ten{\eta}}_1)}}} = -c_1^2 ( (\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k) \ttt_{[r]\backslash j} \ten{H}_{1,1})_{((2, 1))}
            \qquad{\color{gray} (p_j q_j \times p)}
        \\
    &\d^2 l(\overline{\ten{\eta}}_1, \mat{\Omega}_j) \\
        &= -c_1 c_2 \t{\vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big)}\mat{H}_{2,1}\vec{\d\overline{\ten{\eta}}_1} \\
        &= -c_1 c_2 \t{\Big[ \t{(\ten{H}_{2,1})_{([2r])}} \vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big) \Big]} \vec{\d\overline{\ten{\eta}}_1} \\
-        &= -c_1 c_2 \t{\vec( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})} )} \vec{\d\overline{\ten{\eta}}_1} \\
-        &= -c_1 c_2 \t{(\vec{\d\mat{\Omega}_j})} ( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} )_{(j)} \vec{\d\overline{\ten{\eta}}_1} \\
-            &\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\Omega}_j})\t{\partial(\vec{\overline{\ten{\eta}}_1)}}} = -c_1 c_2 \mat{D}_{p_j}\t{\mat{D}_{p_j}}( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} )_{(j)}
+        &= -c_1 c_2 \t{\vec( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})} )} \vec{\d\overline{\ten{\eta}}_1} \\
+        &= -c_1 c_2 \t{(\vec{\d\mat{\Omega}_j})} ( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} )_{(j)} \vec{\d\overline{\ten{\eta}}_1} \\
+            &\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\Omega}_j})\t{\partial(\vec{\overline{\ten{\eta}}_1)}}} = -c_1 c_2 \mat{D}_{p_j}\t{\mat{D}_{p_j}}( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} )_{(j)}
            \qquad{\color{gray} (p_j^2 \times p)}
        \\
    &\d^2 l(\mat{\alpha}_j) \\
-        &= -c_1^2 \t{\vec(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j)}\mat{H}_{1,1}\vec(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j) \\
-        &= -c_1^2 \t{\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)})}\mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(j)}}\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}) \\
-        &= -c_1^2 \t{[((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j}]}\mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(j)}}((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j} \\
-        &= -c_1^2 \t{[((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j}]}(\ten{H}_{1,1})_{((j,[r]\backslash j),(j,[r]\backslash j))}((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j} \\
-        &= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})}[ ((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)\ttt_{[r]\backslash j}\ten{H}_{1,1})\ttt_{[r]\backslash j + 2,[r]\backslash j}(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k) ]_{((2,1))}\vec{\d\mat{\alpha}_j} \\
-            &\qquad\Rightarrow \H l(\mat{\alpha}_j) = -c_1^2 \Big[ \left(\Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big)\ttt_{[r]\backslash j}\ten{H}_{1,1}\right)\ttt_{[r]\backslash j + 2}^{[r]\backslash j}\Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \Big]_{((2,1))}
+        &= -c_1^2 \t{\vec(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j)}\mat{H}_{1,1}\vec(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j) \\
+        &= -c_1^2 \t{\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)})}\mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(j)}}\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}) \\
+        &= -c_1^2 \t{[((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j}]}\mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(j)}}((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j} \\
+        &= -c_1^2 \t{[((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j}]}(\ten{H}_{1,1})_{((j,[r]\backslash j),(j,[r]\backslash j))}((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j} \\
+        &= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})}[ ((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)\ttt_{[r]\backslash j}\ten{H}_{1,1})\ttt_{[r]\backslash j + 2,[r]\backslash j}(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k) ]_{((2,1))}\vec{\d\mat{\alpha}_j} \\
+            &\qquad\Rightarrow \H l(\mat{\alpha}_j) = -c_1^2 \Big[ \left(\Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big)\ttt_{[r]\backslash j}\ten{H}_{1,1}\right)\ttt_{[r]\backslash j + 2}^{[r]\backslash j}\Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \Big]_{((2,1))}
            \qquad{\color{gray} (p_j q_j \times p_j q_j)}
        \\
    &\d^2 l(\mat{\alpha}_j, \mat{\alpha}_l) \\
-        &\overset{\makebox[0pt]{\scriptsize $j < l$}}{=} -c_1^2 \t{\vec\Bigl(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\Bigr)}\mat{H}_{1,1}\vec\Bigl(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\times_l\d\mat{\alpha}_l\Bigr) \\
-            &\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \vec\Bigl(\ten{F}_y\mlm{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\times_l\d\mat{\alpha}_l\Bigr) \\
-        &= -c_1^2 \t{\vec\biggl( \d\mat{\alpha}_j \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big)_{(j)} \biggr)} \mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(l)}} \vec\biggl( \d\mat{\alpha}_l \Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big)_{(l)} \biggr) \\
-            &\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \t{\mat{K}_{\mat{p},((j,l))}} \vec\biggl( (\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j) \Big( \ten{F}_y\mlm{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k \Big)_{((j,l))} \biggr) \\
-        &= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big)_{(j)}\otimes\mat{I}_{p_j} \biggr) \mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(l)}} \biggl( \t{\Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big)_{(l)}}\otimes\mat{I}_{p_l} \biggr)\vec{\d\mat{\alpha}_l} \\
-            &\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \t{\mat{K}_{\mat{p},((j,l))}} \biggl( \t{\Big( \ten{F}_y\mlm{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k \Big)_{((j,l))}}\otimes\mat{I}_{p_j p_l} \biggr) \vec{(\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j)} \\
-        &= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big[ \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \vec{\d\mat{\alpha}_l} \\
-            &\qquad + c_1 \vec\biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm{k\neq j,l}\mat{\alpha}_k \Big) \biggr) \vec{(\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j)} \\
-        &= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big[ \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \vec{\d\mat{\alpha}_l} \\
-            &\qquad + c_1 \t{(\vec{\d\mat{\alpha}_j})} \biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm{k\neq j,l}\mat{\alpha}_k \Big) \biggr)_{((1,3))} \vec{\d\mat{\alpha}_l} \\
+        &\overset{\makebox[0pt]{\scriptsize $j < l$}}{=} -c_1^2 \t{\vec\Bigl(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\Bigr)}\mat{H}_{1,1}\vec\Bigl(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\times_l\d\mat{\alpha}_l\Bigr) \\
+            &\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \vec\Bigl(\ten{F}_y\mlm_{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\times_l\d\mat{\alpha}_l\Bigr) \\
+        &= -c_1^2 \t{\vec\biggl( \d\mat{\alpha}_j \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big)_{(j)} \biggr)} \mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(l)}} \vec\biggl( \d\mat{\alpha}_l \Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big)_{(l)} \biggr) \\
+            &\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \t{\mat{K}_{\mat{p},((j,l))}} \vec\biggl( (\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j) \Big( \ten{F}_y\mlm_{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k \Big)_{((j,l))} \biggr) \\
+        &= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big)_{(j)}\otimes\mat{I}_{p_j} \biggr) \mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(l)}} \biggl( \t{\Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big)_{(l)}}\otimes\mat{I}_{p_l} \biggr)\vec{\d\mat{\alpha}_l} \\
+            &\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \t{\mat{K}_{\mat{p},((j,l))}} \biggl( \t{\Big( \ten{F}_y\mlm_{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k \Big)_{((j,l))}}\otimes\mat{I}_{p_j p_l} \biggr) \vec{(\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j)} \\
+        &= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big[ \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \vec{\d\mat{\alpha}_l} \\
+            &\qquad + c_1 \vec\biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm_{k\neq j,l}\mat{\alpha}_k \Big) \biggr) \vec{(\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j)} \\
+        &= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big[ \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \vec{\d\mat{\alpha}_l} \\
+            &\qquad + c_1 \t{(\vec{\d\mat{\alpha}_j})} \biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm_{k\neq j,l}\mat{\alpha}_k \Big) \biggr)_{((1,3))} \vec{\d\mat{\alpha}_l} \\
                &\qquad \begin{aligned}
                    \Rightarrow \frac{\partial l}{\partial(\vec{\mat{\alpha}_j})\t{\partial(\vec{\mat{\alpha}_l})}} &=
-                            -c_1^2 \biggl( \Big[ \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \\
-                        &\qquad + c_1 \biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm{k\neq j,l}\mat{\alpha}_k \Big) \biggr)_{((1,3) + [[j > l]])}
+                            -c_1^2 \biggl( \Big[ \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \\
+                        &\qquad + c_1 \biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm_{k\neq j,l}\mat{\alpha}_k \Big) \biggr)_{((1,3) + [[j > l]])}
                        \qquad{\color{gray} (p_j q_j \times p_l q_l)}
                \end{aligned}
        \\
    &\d^2 l(\mat{\alpha}_j, \mat{\Omega}_l) \\
-        &= -c_1 c_2 \t{\vec\Bigl(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\Bigr)} \mat{H}_{1,2} \vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr) \\
-        &= -c_1 c_2 \t{\vec\biggl(\d\mat{\alpha}_j\Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big)_{(j)}\biggr)}\mat{K}_{\mat{p},(j)} \t{(\ten{H}_{2,1})_{([2r])}} \vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr) \\
-        &= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl(\t{\Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big)_{(j)}}\otimes\mat{I}_{p_j}\biggr) \mat{K}_{\mat{p},(j)} \vec\Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm{k\neq l}\t{(\vec{\mat{\Omega}_k})}\times_l\t{(\vec{\d\mat{\Omega}_l})}\Bigr) \\
-        &= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl(\t{\Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big)_{(j)}}\otimes\mat{I}_{p_j}\biggr) \mat{K}_{\mat{p},(j)} \t{\Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr)_{([r])}}\vec{\d\mat{\Omega}_l} \\
-        &= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl( \Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{(r + 2, 1)} \vec{\d\mat{\Omega}_l} \\
-            &\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\alpha}_j})\t{\partial(\vec{\mat{\Omega}_l})}} = -c_1 c_2 \biggl( \Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{(r + 2, 1)}\mat{D}_{p_l}\t{\mat{D}_{p_l}}
+        &= -c_1 c_2 \t{\vec\Bigl(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\Bigr)} \mat{H}_{1,2} \vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr) \\
+        &= -c_1 c_2 \t{\vec\biggl(\d\mat{\alpha}_j\Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big)_{(j)}\biggr)}\mat{K}_{\mat{p},(j)} \t{(\ten{H}_{2,1})_{([2r])}} \vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr) \\
+        &= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl(\t{\Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big)_{(j)}}\otimes\mat{I}_{p_j}\biggr) \mat{K}_{\mat{p},(j)} \vec\Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm_{k\neq l}\t{(\vec{\mat{\Omega}_k})}\times_l\t{(\vec{\d\mat{\Omega}_l})}\Bigr) \\
+        &= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl(\t{\Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big)_{(j)}}\otimes\mat{I}_{p_j}\biggr) \mat{K}_{\mat{p},(j)} \t{\Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm_{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr)_{([r])}}\vec{\d\mat{\Omega}_l} \\
+        &= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl( \Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm_{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{(r + 2, 1)} \vec{\d\mat{\Omega}_l} \\
+            &\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\alpha}_j})\t{\partial(\vec{\mat{\Omega}_l})}} = -c_1 c_2 \biggl( \Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm_{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{(r + 2, 1)}\mat{D}_{p_l}\t{\mat{D}_{p_l}}
            % \qquad {\color{gray} (p_j q_j \times p_l^2)}
        \\
    &\d^2 l(\mat{\Omega}_j) \\
        &= -c_2^2 \t{\vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr)} \t{(\ten{H}_{2,2})_{([2r],[2r]+2r)}} \vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr) \\
-        &= -c_2^2 \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})}\times_{j + r}\t{(\vec{\d\mat{\Omega}_j})} \\
-        &= -c_2^2 \t{(\vec{\d\mat{\Omega}_j})} \biggl( \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})} \biggr)_{([r])} \vec{\d\mat{\Omega}_j} \\
-            &\qquad\Rightarrow \H l(\mat{\Omega}_j) = -c_2^2 \mat{D}_{p_j}\t{\mat{D}_{p_j}}\biggl( \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})} \biggr)_{([r])}\mat{D}_{p_j}\t{\mat{D}_{p_j}}
+        &= -c_2^2 \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm_{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})}\times_{j + r}\t{(\vec{\d\mat{\Omega}_j})} \\
+        &= -c_2^2 \t{(\vec{\d\mat{\Omega}_j})} \biggl( \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm_{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})} \biggr)_{([r])} \vec{\d\mat{\Omega}_j} \\
+            &\qquad\Rightarrow \H l(\mat{\Omega}_j) = -c_2^2 \mat{D}_{p_j}\t{\mat{D}_{p_j}}\biggl( \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm_{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})} \biggr)_{([r])}\mat{D}_{p_j}\t{\mat{D}_{p_j}}
            %\qquad {\color{gray} (p_j^2 \times p_j^2)}
        \\
    &\d^2 l(\mat{\Omega}_j, \mat{\Omega}_l) \\
@ -573,13 +589,13 @@ The next step is to identify the Hessians from the second differentials in a sim
        &\qquad\qquad - c_2 \D b(\mat{\eta}_{y,2})\vec\!\Big(\bigotimes_{k = r}^{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_{l}\otimes\bigotimes_{k = l - 1}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_{j}\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big) \\
        &= c_2 \t{(\vec{\ten{X}}\otimes\vec{\ten{X}} - (\ten{D}_2)_{([2r])})} \vec\Bigl(\bigotimes_{k = r}^{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_{l}\otimes\bigotimes_{k = l - 1}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_{j}\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Bigr) \\
        &\qquad - c_2^2 \t{\vec\!\Big(\bigotimes_{k = r}^{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigotimes_{k=l-1}^{1}\mat{\Omega}_k\Big)}\t{(\ten{H}_{2,2})_{([2r],[2r]+2r)}}\vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big) \\
-        &= c_2 (\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \times_j \t{(\vec{\d\mat{\Omega}_j})} \times_l \t{(\vec{\d\mat{\Omega}_l})} \\
-        &\qquad - c_2^2 \ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})} \times_j \t{(\vec{\d\mat{\Omega}_j})} \times_l \t{(\vec{\d\mat{\Omega}_l})} \\
-        &= c_2 \t{(\vec{\d\mat{\Omega}_j})}\Big((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)}\vec{\d\mat{\Omega}_l} \\
-        &\qquad - c_2^2 \t{(\vec{\d\mat{\Omega}_j})}\Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)}\vec{\d\mat{\Omega}_l} \\
+        &= c_2 (\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm_{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \times_j \t{(\vec{\d\mat{\Omega}_j})} \times_l \t{(\vec{\d\mat{\Omega}_l})} \\
+        &\qquad - c_2^2 \ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm_{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm_{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})} \times_j \t{(\vec{\d\mat{\Omega}_j})} \times_l \t{(\vec{\d\mat{\Omega}_l})} \\
+        &= c_2 \t{(\vec{\d\mat{\Omega}_j})}\Big((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm_{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)}\vec{\d\mat{\Omega}_l} \\
+        &\qquad - c_2^2 \t{(\vec{\d\mat{\Omega}_j})}\Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm_{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm_{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)}\vec{\d\mat{\Omega}_l} \\
            &\qquad \begin{aligned}\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\Omega}_j})\t{\partial(\vec{\mat{\Omega}_l})}} &=
-                \mat{D}_{p_j}\t{\mat{D}_{p_j}}\Big[c_2\Big((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)} \\
-                    &\qquad -c_2^2 \Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)}\Big]\mat{D}_{p_l}\t{\mat{D}_{p_l}}
+                \mat{D}_{p_j}\t{\mat{D}_{p_j}}\Big[c_2\Big((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm_{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)} \\
+                    &\qquad -c_2^2 \Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm_{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm_{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)}\Big]\mat{D}_{p_l}\t{\mat{D}_{p_l}}
        % \qquad {\color{gray} (p_j^2 \times p_l^2)}
        \end{aligned}
 \end{align*}}%
@ -612,15 +628,15 @@ and for every block holds $\mathcal{I}_{j, l} = \t{\mathcal{I}_{l, j}}$. The ind
 \begin{align*}
    \mathcal{I}_{1,1} &= c_1^2 (\ten{H}_{1,1})_{([r])} \\
    \mathcal{I}_{1,j+1} % = \E\partial_{\vec{\overline{\ten{\eta}}_1}}\partial_{\t{(\vec{\mat{\alpha}_j})}} l(\mat{\Theta})\mid \ten{Y} = y
-        &= c_1^2 \Big[\Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1}\Big]_{((2, 1))} \\
+        &= c_1^2 \Big[\Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1}\Big]_{((2, 1))} \\
    \mathcal{I}_{1,j+r+1}
-        &= c_1 c_2 \Big( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)} \\
+        &= c_1 c_2 \Big( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)} \\
    \mathcal{I}_{j+1,l+1}
-        &= c_1^2 \biggl( \Big[ \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \\
+        &= c_1^2 \biggl( \Big[ \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \\
    \mathcal{I}_{j+1,l+r+1}
-        &= c_1 c_2 \biggl( \Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{((r + 2, 1))} \\
+        &= c_1 c_2 \biggl( \Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm_{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{((r + 2, 1))} \\
    \mathcal{I}_{j+r+1,l+r+1}
-        &= c_2^2 \Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)}
+        &= c_2^2 \Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm_{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm_{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)}
 \end{align*}


@ -633,14 +649,14 @@ The \emph{matricization} is a generalization of the \emph{vectorization} operati
 \begin{theorem}\label{thm:mlm_mat}
    Let $\ten{A}$ be a tensor of order $r$ with the dimensions $q_1\times ... \times q_r$. Furthermore, let for $k = 1, ..., r$ be $\mat{B}_k$ matrices of dimensions $p_k\times q_k$. Then, for any $(\mat{i}, \mat{j})\in\perm{r}$ holds
    \begin{displaymath}
-        \Big(\ten{A}\mlm{k\in[r]}\mat{B}_k\Big)_{(\mat{i}, \mat{j})}
+        \Big(\ten{A}\mlm_{k\in[r]}\mat{B}_k\Big)_{(\mat{i}, \mat{j})}
            = \Big(\bigotimes_{k = \len{\mat{i}}}^{1}\mat{B}_{\mat{i}_k}\Big) \ten{A}_{(\mat{i}, \mat{j})} \Big(\bigotimes_{k = \len{\mat{j}}}^{1}\t{\mat{B}_{\mat{j}_k}}\Big).
    \end{displaymath}
 \end{theorem}

 A well known special case of Theorem~\ref{thm:mlm_mat} is the relation between vectorization and the Kronecker product
 \begin{displaymath}
-    \vec(\mat{B}_1\mat{A}\t{\mat{B}_2}) = (\mat{B}_2\otimes\mat{B}_1)\vec{A}.
+    \vec(\mat{B}_1\mat{A}\t{\mat{B}_2}) = (\mat{B}_2\otimes\mat{B}_1)\vec{\mat{A}}.
 \end{displaymath}
 Here we have a matrix, a.k.a. an order 2 tensor, and the vectorization as a special case of the matricization $\vec{\mat{A}} = \mat{A}_{((1, 2))}$ with $(\mat{i}, \mat{j}) = ((1, 2), ())\in\perm{2}$. Note that the empty Kronecker product is $1$ by convention.

@ -713,13 +729,37 @@ The operation $\ten{R}_{\mat{i}}(\ten{A})$ results in a tensor of order $r + s$
    Let $\ten{A}$ be a $2 r + s$ tensor where $r > 0$ and $s \geq 0$ of dimensions $q_1\times ... \times q_{2 r + s}$. Furthermore, let $(\mat{i}, \mat{j})\in\perm{2 r + s}$ such that $\len{\mat{i}} = 2 r$ and for $k = 1, ..., r$ denote with $\mat{B}_k$ matrices of dimensions $q_{\mat{i}_{k}}\times q_{\mat{i}_{r + k}}$, then
    \begin{displaymath}
        \t{\ten{A}_{(\mat{i})}}\vec{\bigotimes_{k = r}^{1}}\mat{B}_k
-            \equiv \ten{R}_{\mat{i}}(\ten{A})\times_{k\in[r]}\t{(\vec{\mat{B}_k})}.
+            \equiv \ten{R}_{\mat{i}}(\ten{A})\mlm_{k = 1}^r\t{(\vec{\mat{B}_k})}.
    \end{displaymath}
 \end{theorem}

+A special case of above Theorem is given for tensors represented as a Kronecker product. Therefore, let $\mat{A}_k, \mat{B}_k$ be arbitrary matrices of size $p_k\times q_k$ for $k = 1, ..., r$ and $\ten{A} = \reshape{(\mat{p}, \mat{q})}\bigotimes_{k = r}^{1}\mat{A}_k$. Then Theorem~\ref{thm:mtvk_rearrange} specializes to
+\begin{displaymath}
+    \t{\Big( \vec\bigotimes_{k = r}^{1}\mat{A}_k \Big)}\Big( \vec\bigotimes_{k = r}^{1}\mat{B}_k \Big)
+    =
+    \prod_{k = 1}^{r}\tr(\t{\mat{A}_k}\mat{B}_k)
+    =
+    \Big( \outer{k = 1}{r}\vec\mat{A}_k \Big)\mlm_{k = 1}^r \t{(\vec\mat{B}_k)}.
+\end{displaymath}
+In case of $r = 2$ this means
+\begin{align*}
+    \t{\vec(\mat{A}_1\otimes \mat{A}_2)}\vec(\mat{B}_1\otimes \mat{B}_2)
+        &= \t{(\vec{\mat{B}_1})}(\vec{\mat{A}_1})\t{(\vec{\mat{A}_2})}(\vec{\mat{B}_2}) \\
+        &= [(\vec{\mat{A}_1})\circ(\vec{\mat{A}_2})]\times_1\t{(\vec{\mat{B}_1})}\times_2\t{(\vec{\mat{B}_2})}.
+\end{align*}
+
+Another interesting special case is for two tensors $\ten{A}_1, \ten{A}_2$ of the same order
+\begin{displaymath}
+    \t{(\vec{\ten{A}_1}\otimes\vec{\ten{A}_2})}\vec{\bigotimes_{k = r}^{1}\mat{B}_k}
+        = (\ten{A}_1\otimes\ten{A}_2)\mlm_{k = 1}^r\t{(\vec{\mat{B}_k})}
+\end{displaymath}
+which uses the relation $\ten{R}_{[2r]}^{(\mat{p}, \mat{q})}(\vec{\ten{A}_1}\otimes\vec{\ten{A}_2}) = \ten{A}_1\otimes\ten{A}_2$ .
+
 \todo{continue}


+
+
 % Next we define a specific axis permutation and reshaping operation on tensors which will be helpful in expressing some derivatives. Let $\ten{A}$ be a $2 r + s$ tensor with $r > 0$ and $s\geq 0$ of dimensions $p_1\times ... \times p_{2 r + s}$. Furthermore, let $(\mat{i}, \mat{j})\in\perm{2 r + s}$ such that $\len{\mat{i}} = 2 r$. The operation $\ten{R}_{\mat{i}}$ is defined as
 % \begin{displaymath}
 %     \ten{R}_{\mat{i}} = \reshape{(p_1 p_{r + 1}, ..., p_r p_{2 r}, p_{2 r + 1}, ..., p_{r + s})}(\ten{A}_{(\pi(\mat{i}))})
@ -880,19 +920,19 @@ The operation $\ten{R}_{\mat{i}}(\ten{A})$ results in a tensor of order $r + s$
    \end{tikzpicture}
 \end{center}

-\newcommand{\somedrawing} {
-    \coordinate (a) at (-2,-2,-2);
-    \coordinate (b) at (-2,-2,2);
-    \coordinate (c) at (-2,2,-2);
-    \coordinate (d) at (-2,2,2);
-    \coordinate (e) at (2,-2,-2);
-    \coordinate (f) at (2,-2,2);
-    \coordinate (g) at (2,2,-2);
-    \coordinate (h) at (2,2,2);
-    \draw (a)--(b) (a)--(c) (a)--(e) (b)--(d) (b)--(f) (c)--(d) (c)--(g) (d)--(h) (e)--(f) (e)--(g) (f)--(h) (g)--(h);
-    \fill (a) circle (0.1cm);
-    \fill (d) ++(0.1cm,0.1cm) rectangle ++(-0.2cm,-0.2cm);
-}
+% \newcommand{\somedrawing}{
+%     \coordinate (a) at (-2,-2,-2);
+%     \coordinate (b) at (-2,-2,2);
+%     \coordinate (c) at (-2,2,-2);
+%     \coordinate (d) at (-2,2,2);
+%     \coordinate (e) at (2,-2,-2);
+%     \coordinate (f) at (2,-2,2);
+%     \coordinate (g) at (2,2,-2);
+%     \coordinate (h) at (2,2,2);
+%     \draw (a)--(b) (a)--(c) (a)--(e) (b)--(d) (b)--(f) (c)--(d) (c)--(g) (d)--(h) (e)--(f) (e)--(g) (f)--(h) (g)--(h);
+%     \fill (a) circle (0.1cm);
+%     \fill (d) ++(0.1cm,0.1cm) rectangle ++(-0.2cm,-0.2cm);
+% }

 % \begin{figure}[ht!]
 %     \centering
@ -1153,7 +1193,7 @@ where $\circ$ is the outer product. For example considure two matrices $\mat{A},
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 Let $f$ be an $r$ times differentiable function, then
 \begin{displaymath}
-    \d^r f(\mat{X}) = \ten{F}(\mat{X})\mlm{k = 1}{r} \vec{\d\mat{X}}
+    \d^r f(\mat{X}) = \ten{F}(\mat{X})\mlm_{k = 1}^{r} \vec{\d\mat{X}}
    \qquad\Leftrightarrow\qquad
    \D^r f(\mat{X}) \equiv \frac{1}{r!}\sum_{\sigma\in\perm{r}}\ten{F}(\mat{X})_{(\sigma)}
 \end{displaymath}
@ -1372,12 +1412,12 @@ The differentials up to the 4'th are
 \begin{align*}
    \d   M(t) &= M(t) \t{(\mu + \Sigma t)} \d{t} \\
    \d^2 M(t) &= \t{\d{t}} M(t) (\mu + \Sigma t)\t{(\mu + \Sigma t)} \d{t} \\
-    \d^3 M(t) &= M(t) (\mu + \Sigma t)\circ [(\mu + \Sigma t)\circ (\mu + \Sigma t) + 3\Sigma]\mlm{k = 1}{3} \d{t} \\
-    \d^4 M(t) &= M(t) (\mu + \Sigma t)\circ(\mu + \Sigma t)\circ[(\mu + \Sigma t)\circ(\mu + \Sigma t) + 6\Sigma)]\mlm{k = 1}{4} \d{t}
+    \d^3 M(t) &= M(t) (\mu + \Sigma t)\circ [(\mu + \Sigma t)\circ (\mu + \Sigma t) + 3\Sigma]\mlm_{k = 1}^{3} \d{t} \\
+    \d^4 M(t) &= M(t) (\mu + \Sigma t)\circ(\mu + \Sigma t)\circ[(\mu + \Sigma t)\circ(\mu + \Sigma t) + 6\Sigma)]\mlm_{k = 1}^{4} \d{t}
 \end{align*}
 Using the differentials to derivative identification identity
 \begin{displaymath}
-    \d^m f(t) = \ten{F}(t)\mlm{k = 1}{m}\d{t}
+    \d^m f(t) = \ten{F}(t)\mlm_{k = 1}^{m}\d{t}
    \qquad\Leftrightarrow\qquad
    \D^m f(t) \equiv \frac{1}{m!}\sum_{\sigma\in\mathfrak{S}_m}\ten{F}(t)_{(\sigma)}
 \end{displaymath}
@ -1388,7 +1428,7 @@ in conjunction with simplifications gives the first four raw moments by evaluati
    M_3 = \D^3 M(t)|_{t = 0} &= \mu\circ\mu\circ\mu + \mu\circ\Sigma + (\mu\circ\Sigma)_{((2), (1), (3))} + \Sigma\circ\mu \\
    M_4 = \D^4 M(t)|_{t = 0} &\equiv \frac{1}{4!}\sum_{\sigma\in\mathfrak{S}_4} (\mu\circ\mu\circ\Sigma + \Sigma\circ\Sigma + \Sigma\circ\mu\circ\mu)_{(\sigma)}
 \end{align*}
-which leads to the centered moments (which are also the covariances of the sufficient statistic $t(X)$)
+which leads to the centered moments (which are also the covariance of the sufficient statistic $t(X)$)
 \begin{align*}
    H_{1,1} &= \cov(t_1(X)\mid Y = y) \\
        &= \Sigma \\
--- a/LaTeX/main.bib
+++ b/LaTeX/main.bib
@ -9,6 +9,17 @@
    publisher   = {[Royal Statistical Society, Wiley]}
 }

+@inproceedings{Nesterov1983,
+    title       = {A method of solving a convex programming problem with convergence rate $O(1/k^2)$},
+    author      = {Nesterov, Yurii Evgen'evich},
+    booktitle   = {Doklady Akademii Nauk},
+    volume      = {269},
+    number      = {3},
+    pages       = {543--547},
+    year        = {1983},
+    organization= {Russian Academy of Sciences}
+}
+
@book{StatInf-CasellaBerger2002,
    title       = {{Statistical Inference}},
    author      = {Casella, George and Berger, Roger L.},
@ -27,6 +38,20 @@
    isbn        = {0-471-98632-1}
 }

+@article{SymMatandJacobians-MagnusNeudecker1986,
+    title       = {Symmetry, 0-1 Matrices and Jacobians: A Review},
+    author      = {Magnus, Jan R. and Neudecker, Heinz},
+    ISSN        = {02664666, 14694360},
+    URL         = {http://www.jstor.org/stable/3532421},
+    journal     = {Econometric Theory},
+    number      = {2},
+    pages       = {157--190},
+    publisher   = {Cambridge University Press},
+    urldate     = {2023-10-03},
+    volume      = {2},
+    year        = {1986}
+}
+
@book{MatrixAlgebra-AbadirMagnus2005,
    title       = {Matrix Algebra},
    author      = {Abadir, Karim M. and Magnus, Jan R.},
@ -83,6 +108,31 @@
    doi         = {10.1080/01621459.2015.1093944}
 }

+@article{FisherLectures-Cook2007,
+    author    = {Cook, R. Dennis},
+    journal   = {Statistical Science},
+    month     = {02},
+    number    = {1},
+    pages     = {1--26},
+    publisher = {The Institute of Mathematical Statistics},
+    title     = {{Fisher Lecture: Dimension Reduction in Regression}},
+    volume    = {22},
+    year      = {2007},
+    doi       = {10.1214/088342306000000682}
+}
+
+@article{asymptoticMLE-BuraEtAl2018,
+    author      = {Bura, Efstathia and Duarte, Sabrina and Forzani, Liliana and E. Smucler and M. Sued},
+    title       = {Asymptotic theory for maximum likelihood estimates in reduced-rank multivariate generalized linear models},
+    journal     = {Statistics},
+    volume      = {52},
+    number      = {5},
+    pages       = {1005-1024},
+    year        = {2018},
+    publisher   = {Taylor \& Francis},
+    doi         = {10.1080/02331888.2018.1467420},
+}
+
@article{tsir-DingCook2015,
    author      = {Shanshan Ding and R. Dennis Cook},
    title       = {Tensor sliced inverse regression},
@ -117,3 +167,106 @@
    isbn        = {978-94-015-8196-7},
    doi         = {10.1007/978-94-015-8196-7_17}
 }
+
+@book{asymStats-van_der_Vaart1998,
+    title       = {Asymptotic Statistics},
+    author      = {{van der Vaart}, A.W.},
+    series      = {Asymptotic Statistics},
+    year        = {1998},
+    publisher   = {Cambridge University Press},
+    series      = {Cambridge Series in Statistical and Probabilistic Mathematics},
+    isbn        = {0-521-49603-9}
+}
+
+@book{measureTheory-Kusolitsch2011,
+    title       = {{M}a\ss{}- und {W}ahrscheinlichkeitstheorie},
+    subtitle    = {{E}ine {E}inf{\"u}hrung},
+    author      = {Kusolitsch, Norbert},
+    series      = {Springer-Lehrbuch},
+    year        = {2011},
+    publisher   = {Springer Vienna},
+    isbn        = {978-3-7091-0684-6},
+    doi         = {10.1007/978-3-7091-0685-3}
+}
+
+@book{optimMatrixMani-AbsilEtAl2007,
+    title       = {{Optimization Algorithms on Matrix Manifolds}},
+    author      = {Absil, P.-A. and Mahony, R. and Sepulchre, R.},
+    year        = {2007},
+    publisher   = {Princeton University Press},
+    isbn        = {9780691132983},
+    note        = {Full Online Text \url{https://press.princeton.edu/absil}}
+}
+
+@Inbook{geomMethodsOnLowRankMat-Uschmajew2020,
+    author      = {Uschmajew, Andr{\'e} and Vandereycken, Bart},
+    editor      = {Grohs, Philipp and Holler, Martin and Weinmann, Andreas},
+    title       = {Geometric Methods on Low-Rank Matrix and Tensor Manifolds},
+    bookTitle   = {Handbook of Variational Methods for Nonlinear Geometric Data},
+    year        = {2020},
+    publisher   = {Springer International Publishing},
+    address     = {Cham},
+    pages       = {261--313},
+    isbn        = {978-3-030-31351-7},
+    doi         = {10.1007/978-3-030-31351-7_9}
+}
+
+@book{introToSmoothMani-Lee2012,
+    title       = {Introduction to Smooth Manifolds},
+    author      = {Lee, John M.},
+    year        = {2012},
+    journal     = {Graduate Texts in Mathematics},
+    publisher   = {Springer New York},
+    doi         = {10.1007/978-1-4419-9982-5}
+}
+
+@book{introToRiemannianMani-Lee2018,
+    title       = {Introduction to Riemannian Manifolds},
+    author      = {Lee, John M.},
+    year        = {2018},
+    journal     = {Graduate Texts in Mathematics},
+    publisher   = {Springer International Publishing},
+    doi         = {10.1007/978-3-319-91755-9}
+}
+
+@misc{MLEonManifolds-HajriEtAl2017,
+    title       = {Maximum Likelihood Estimators on Manifolds},
+    author      = {Hajri, Hatem and Said, Salem and Berthoumieu, Yannick},
+    year        = {2017},
+    journal     = {Lecture Notes in Computer Science},
+    publisher   = {Springer International Publishing},
+    pages       = {692-700},
+    doi         = {10.1007/978-3-319-68445-1_80}
+}
+
+@article{relativity-Einstain1916,
+    author      = {Einstein, Albert},
+    title       = {Die Grundlage der allgemeinen Relativitätstheorie},
+    year        = {1916},
+    journal     = {Annalen der Physik},
+    volume      = {354},
+    number      = {7},
+    pages       = {769-822},
+    doi         = {10.1002/andp.19163540702}
+}
+
+@article{MultilinearOperators-Kolda2006,
+    title       = {Multilinear operators for higher-order decompositions.},
+    author      = {Kolda, Tamara Gibson},
+    doi         = {10.2172/923081},
+    url         = {https://www.osti.gov/biblio/923081},
+    place       = {United States},
+    year        = {2006},
+    month       = {4},
+    type        = {Technical Report}
+}
+
+@book{aufbauAnalysis-kaltenbaeck2021,
+    title       = {Aufbau Analysis},
+    author      = {Kaltenb\"ack, Michael},
+    isbn        = {978-3-88538-127-3},
+    series      = {Berliner Studienreihe zur Mathematik},
+    edition     = {27},
+    year        = {2021},
+    publisher   = {Heldermann Verlag}
+}
--- a/mvbernoulli/src/ising_model.cpp
+++ b/mvbernoulli/src/ising_model.cpp
@ -23,7 +23,7 @@
 *
 * with the parameter vector `theta` and a statistic `T` of `y`. The real valued
 * parameter vector `theta` is of dimension `p (p + 1) / 2` and the statistic
- * `T` has the same dimensions as a binary vector given by
+ * `T` has the same dimensions as the parameter vector given by
 *
 *      T(y) = vech(y y').
 *
--- a/sim/ising.R
+++ b/sim/ising.R
@ -1,204 +0,0 @@
-library(tensorPredictors)
-library(mvbernoulli)
-
-set.seed(161803399, "Mersenne-Twister", "Inversion", "Rejection")
-
-### simulation configuration
-file.prefix <- "sim-ising"
-reps <- 100                     # number of simulation replications
-max.iter <- 100                 # maximum number of iterations for GMLM
-sample.sizes <- c(100, 200, 300, 500, 750)  # sample sizes `n`
-N <- 2000                       # validation set size
-p <- c(4, 4)                    # preditor dimensions (ONLY 4 by 4 allowed!)
-q <- c(2, 2)                    # response dimensions (ONLY 2 by 2 allowed!)
-r <- length(p)
-# parameter configuration
-rho <- -0.55
-c1 <- 1
-c2 <- 1
-
-# initial consistency checks
-stopifnot(exprs = {
-    r == 2
-    all.equal(p, c(4, 4))
-    all.equal(q, c(2, 2))
-})
-
-### small helpers
-# 270 deg matrix layout rotation (90 deg clockwise)
-rot270 <- function(A) t(A)[, rev(seq_len(nrow(A))), drop = FALSE]
-# Auto-Regression Covariance Matrix
-AR <- function(rho, dim) rho^abs(outer(seq_len(dim), seq_len(dim), `-`))
-# Inverse of the AR matrix
-AR.inv <- function(rho, dim) {
-    A <- diag(c(1, rep(rho^2 + 1, dim - 2), 1))
-    A[abs(.row(dim(A)) - .col(dim(A))) == 1] <- -rho
-    A / (1 - rho^2)
-}
-# projection matrix `P_A` as a projection onto the span of `A`
-proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A)))
-
-### setup Ising parameters (to get reasonable parameters)
-eta1 <- 0
-alphas <- Map(function(pj, qj) {    # qj ignored, its 2
-    linspace <- seq(-1, 1, length.out = pj)
-    matrix(c(linspace, linspace^2), pj, 2)
-}, p, q)
-Omegas <- Map(AR, dim = p, MoreArgs = list(rho))
-
-# data sampling routine
-sample.data <- function(n, eta1, alphas, Omegas, sample.axis = r + 1L) {
-    # generate response (sample axis is last axis)
-    y <- runif(n, -1, 1)                    # Y ~ U[-1, 1]
-    Fy <- rbind(cos(pi * y), sin(pi * y), -sin(pi * y), cos(pi * y))
-    dim(Fy) <- c(2, 2, n)
-
-    # natural exponential family parameters
-    eta_y1 <- c1 * (mlm(Fy, alphas) + c(eta1))
-    eta_y2 <- c2 * Reduce(`%x%`, rev(Omegas))
-
-    # conditional Ising model parameters
-    theta_y <- matrix(rep(vech(eta_y2), n), ncol = n)
-    ltri <- which(lower.tri(eta_y2, diag = TRUE))
-    diagonal <- which(diag(TRUE, nrow(eta_y2))[ltri])
-    theta_y[diagonal, ] <- eta_y1
-
-    # Sample X from conditional distribution
-    X <- apply(theta_y, 2, ising_sample, n = 1)
-    # convert (from compressed integer vector) to array data
-    attr(X, "p") <- prod(p)
-    X <- t(as.mvbmatrix(X))
-    dim(X) <- c(p, n)
-    storage.mode(X) <- "double"
-
-    # permute axis to requested get the sample axis
-    if (sample.axis != r + 1L) {
-        perm <- integer(r + 1L)
-        perm[sample.axis] <- r + 1L
-        perm[-sample.axis] <- seq_len(r)
-        X <- aperm(X, perm)
-        Fy <- aperm(Fy, perm)
-    }
-
-    list(X = X, Fy = Fy, y = y, sample.axis = sample.axis)
-}
-
-### Logging Errors and Warnings
-# Register a global warning and error handler for logging warnings/errors with
-# current simulation repetition session informatin allowing to reproduce problems
-exceptionLogger <- function(ex) {
-    # retrieve current simulation repetition information
-    rep.info <- get("rep.info", envir = .GlobalEnv)
-    # setup an error log file with the same name as `file`
-    log <- paste0(rep.info$file, ".log")
-    # Write (append) condition message with reproduction info to the log
-    cat("\n\n------------------------------------------------------------\n",
-        sprintf("file <- \"%s\"\nn <- %d\nrep <- %d\n.Random.seed <- c(%s)\n%s\nTraceback:\n",
-            rep.info$file, rep.info$n, rep.info$rep,
-            paste(rep.info$.Random.seed, collapse = ","),
-            as.character.error(ex)
-    ), sep = "", file = log, append = TRUE)
-    # add Traceback (see: `traceback()` which the following is addapted from)
-    n <- length(x <- .traceback(NULL, max.lines = -1L))
-    if (n == 0L) {
-        cat("No traceback available", "\n", file = log, append = TRUE)
-    } else {
-        for (i in 1L:n) {
-            xi <- x[[i]]
-            label <- paste0(n - i + 1L, ": ")
-            m <- length(xi)
-            srcloc <- if (!is.null(srcref <- attr(xi, "srcref"))) {
-                srcfile <- attr(srcref, "srcfile")
-                paste0(" at ", basename(srcfile$filename), "#", srcref[1L])
-            }
-            if (isTRUE(attr(xi, "truncated"))) {
-                xi <- c(xi, " ...")
-                m <- length(xi)
-            }
-            if (!is.null(srcloc)) {
-                xi[m] <- paste0(xi[m], srcloc)
-            }
-            if (m > 1) {
-                label <- c(label, rep(substr("          ", 1L,
-                  nchar(label, type = "w")), m - 1L))
-            }
-            cat(paste0(label, xi), sep = "\n", file = log, append = TRUE)
-        }
-    }
-}
-globalCallingHandlers(list(
-    message = exceptionLogger, warning = exceptionLogger, error = exceptionLogger
-))
-
-
-### for every sample size
-start <- format(Sys.time(), "%Y%m%dT%H%M")
-for (n in sample.sizes) {
-    ### write new simulation result file
-    file <- paste0(paste(file.prefix, start, n, sep = "-"), ".csv")
-    # CSV header, used to ensure correct value/column mapping when writing to file
-    header <- outer(
-        c("dist.subspace", "dist.projection", "error.pred"),  # measures
-        c("gmlm", "pca", "hopca", "tsir"),                    # methods
-        paste, sep = ".")
-    cat(paste0(header, collapse = ","), "\n", sep = "", file = file)
-
-    ### repeated simulation
-    for (rep in seq_len(reps)) {
-        ### Repetition session state info
-        # Stores specific session variables before starting the current
-        # simulation replication. This allows to log state information which
-        # can be used to replicate a specific simulation repetition in case of
-        # errors/warnings from the logs
-        rep.info <- list(n = n, rep = rep, file = file, .Random.seed = .Random.seed)
-
-        ### sample (training) data
-        c(X, Fy, y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas)
-
-        ### Fit data using different methods
-        fit.gmlm <- GMLM.default(X, Fy, sample.axis = sample.axis,
-            max.iter = max.iter, family = "ising")
-        fit.hopca <- HOPCA(X, npc = q, sample.axis = sample.axis)
-        fit.pca <- prcomp(mat(X, sample.axis), rank. = prod(q))
-        fit.tsir <- NA # TSIR(X, y, q, sample.axis = sample.axis)
-
-        ### Compute Reductions `B.*` where `B.*` spans the reduction subspaces
-        B.true <- Reduce(`%x%`, rev(alphas))
-        B.gmlm <- with(fit.gmlm, Reduce(`%x%`, rev(alphas)))
-        B.hopca <- Reduce(`%x%`, rev(fit.hopca))
-        B.pca <- fit.pca$rotation
-        B.tsir <- NA # Reduce(`%x%`, rev(fit.tsir))
-
-        # Subspace Distances: Normalized `|| P_A - P_B ||_F` where
-        #   `P_A = A (A' A)^-1/2 A'` and the normalization means that with
-        #   respect to the dimensions of `A, B` the subspace distance is in the
-        #   range `[0, 1]`.
-        dist.subspace.gmlm  <- dist.subspace(B.true, B.gmlm,  normalize = TRUE)
-        dist.subspace.hopca <- dist.subspace(B.true, B.hopca, normalize = TRUE)
-        dist.subspace.pca   <- dist.subspace(B.true, B.pca,   normalize = TRUE)
-        dist.subspace.tsir  <- NA # dist.subspace(B.true, B.tsir,  normalize = TRUE)
-
-        # Projection Distances: Spectral norm (2-norm) `|| P_A - P_B ||_2`.
-        dist.projection.gmlm  <- dist.projection(B.true, B.gmlm)
-        dist.projection.hopca <- dist.projection(B.true, B.hopca)
-        dist.projection.pca   <- dist.projection(B.true, B.pca)
-        dist.projection.tsir  <- NA # dist.projection(B.true, B.tsir)
-
-        ### Prediction Errors: (using new independend sample of size `N`)
-        c(X, Fy, y, sample.axis) %<-% sample.data(N, eta1, alphas, Omegas)
-        # centered model matrix of vectorized `X`s
-        vecX <- scale(mat(X, sample.axis), center = TRUE, scale = FALSE)
-        P.true <- proj(B.true)
-        error.pred.gmlm  <- norm(P.true - proj(B.gmlm),  "2")
-        error.pred.hopca <- norm(P.true - proj(B.hopca), "2")
-        error.pred.pca   <- norm(P.true - proj(B.pca),   "2")
-        error.pred.tsir  <- NA # norm(P.true - proj(B.tsir),  "2")
-
-        # format estimation/prediction errors and write to file and console
-        line <- paste0(Map(get, header), collapse = ",")
-        cat(line, "\n", sep = "", file = file, append = TRUE)
-        # report progress
-        cat(sprintf("sample size: %d/%d - rep: %d/%d\n",
-            which(n == sample.sizes), length(sample.sizes), rep, reps))
-    }
-}
--- a/sim/ising_2.R
+++ b/sim/ising_2.R
@ -1,134 +0,0 @@
-library(tensorPredictors)
-library(mvbernoulli)
-
-set.seed(141421356, "Mersenne-Twister", "Inversion", "Rejection")
-
-### simulation configuration
-reps <- 100                     # number of simulation replications
-max.iter <- 1000                # maximum number of iterations for GMLM
-n <- 100                        # sample sizes `n`
-N <- 2000                       # validation set size
-p <- c(4, 4)                    # preditor dimensions (ONLY 4 by 4 allowed!)
-q <- c(2, 2)                    # response dimensions (ONLY 2 by 2 allowed!)
-r <- length(p)
-# parameter configuration
-rho <- -0.55
-c1 <- 1
-c2 <- 1
-
-# initial consistency checks
-stopifnot(exprs = {
-    r == 2
-    all.equal(p, c(4, 4))
-    all.equal(q, c(2, 2))
-})
-
-### small helpers
-# 270 deg matrix layout rotation (90 deg clockwise)
-rot270 <- function(A) t(A)[, rev(seq_len(nrow(A))), drop = FALSE]
-# Auto-Regression Covariance Matrix
-AR <- function(rho, dim) rho^abs(outer(seq_len(dim), seq_len(dim), `-`))
-# Inverse of the AR matrix
-AR.inv <- function(rho, dim) {
-    A <- diag(c(1, rep(rho^2 + 1, dim - 2), 1))
-    A[abs(.row(dim(A)) - .col(dim(A))) == 1] <- -rho
-    A / (1 - rho^2)
-}
-# projection matrix `P_A` as a projection onto the span of `A`
-proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A)))
-
-### setup Ising parameters (to get reasonable parameters)
-eta1 <- 0
-# alphas <- Map(function(pj, qj) {    # qj ignored, its 2
-#     linspace <- seq(-1, 1, length.out = pj)
-#     matrix(c(linspace, rev(linspace)), pj, 2)
-# }, p, q)
-alphas <- Map(function(pj, qj) {    # qj ignored, its 2
-    linspace <- seq(-1, 1, length.out = pj)
-    matrix(c(linspace, linspace^2), pj, 2)
-}, p, q)
-# alphas <- Map(function(pj, qj) {
-#     qr.Q(qr(matrix(rnorm(pj * qj), pj, qj)))
-# }, p, q)
-Omegas <- Map(AR, dim = p, MoreArgs = list(rho))
-
-# data sampling routine
-sample.data <- function(n, eta1, alphas, Omegas, sample.axis = r + 1L) {
-    # generate response (sample axis is last axis)
-    y <- runif(n, -1, 1)                    # Y ~ U[-1, 1]
-    Fy <- rbind(cos(pi * y), sin(pi * y), -sin(pi * y), cos(pi * y))
-    dim(Fy) <- c(2, 2, n)
-
-    # natural exponential family parameters
-    eta_y1 <- c1 * (mlm(Fy, alphas) + c(eta1))
-    eta_y2 <- c2 * Reduce(`%x%`, rev(Omegas))
-
-    # conditional Ising model parameters
-    theta_y <- matrix(rep(vech(eta_y2), n), ncol = n)
-    ltri <- which(lower.tri(eta_y2, diag = TRUE))
-    diagonal <- which(diag(TRUE, nrow(eta_y2))[ltri])
-    theta_y[diagonal, ] <- eta_y1
-
-    # Sample X from conditional distribution
-    X <- apply(theta_y, 2, ising_sample, n = 1)
-    # convert (from compressed integer vector) to array data
-    attr(X, "p") <- prod(p)
-    X <- t(as.mvbmatrix(X))
-    dim(X) <- c(p, n)
-    storage.mode(X) <- "double"
-
-    # permute axis to requested get the sample axis
-    if (sample.axis != r + 1L) {
-        perm <- integer(r + 1L)
-        perm[sample.axis] <- r + 1L
-        perm[-sample.axis] <- seq_len(r)
-        X <- aperm(X, perm)
-        Fy <- aperm(Fy, perm)
-    }
-
-    list(X = X, Fy = Fy, y = y, sample.axis = sample.axis)
-}
-
-### sample (training) data
-c(X, Fy, y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas)
-
-### Fit data using GMLM with logging
-
-# logger to log iterative change in the estimation process of GMLM
-# log <- data.frame()
-log.likelihood <- tensorPredictors:::make.gmlm.family("ising")$log.likelihood
-B.true <- Reduce(`%x%`, rev(alphas))
-logger <- function(iter, eta1.est, alphas.est, Omegas.est) {
-    B.est <- Reduce(`%x%`, rev(alphas.est))
-
-    err.alphas <- mapply(dist.subspace, alphas, alphas.est, MoreArgs = list(normalize = TRUE))
-    err.Omegas <- mapply(norm, Map(`-`, Omegas, Omegas.est), MoreArgs = list(type = "F"))
-
-    if (iter > 0) { cat("\033[9A") }
-    cat(sprintf("\n\033[2mIter:     loss -     dist\n\033[0m%4d: %8.3f - %8.3f",
-            iter,
-            log.likelihood(X, Fy, eta1.est, alphas.est, Omegas.est),
-            dist.subspace(B.true, B.est, normalize = TRUE)
-        ),
-        "\033[2mMSE eta1\033[0m",
-        mean((eta1 - eta1.est)^2),
-        "\033[2msubspace distances of alphas\033[0m",
-        do.call(paste, Map(sprintf, err.alphas, MoreArgs = list(fmt = "%8.3f"))),
-        "\033[2mFrob. norm of Omega differences\033[0m",
-        do.call(paste, Map(sprintf, err.Omegas, MoreArgs = list(fmt = "%8.3f"))),
-        sep = "\n      "
-    )
-}
-
-# now call the GMLM fitting routine with performance profiling
-tryCatch({
-    system.time(   # profvis::profvis(
-        fit.gmlm <- GMLM.default(
-            X, Fy, sample.axis = sample.axis, max.iter = max.iter,
-            family = "ising", logger = logger
-        )
-    )
-}, error = function(ex) {
-    print(ex)
-    traceback()
-})
--- a/sim/ising_small.R
+++ b/sim/ising_small.R
@ -1,207 +0,0 @@
-library(tensorPredictors)
-library(mvbernoulli)
-
-# seed = first 8 digits Euler's constant gamma = 0.57721 56649 01532 86060
-set.seed(57721566, "Mersenne-Twister", "Inversion", "Rejection")
-
-### simulation configuration
-file.prefix <- "sim-ising-small"
-reps <- 100                     # number of simulation replications
-max.iter <- 1000                # maximum number of iterations for GMLM
-sample.sizes <- c(100, 200, 300, 500, 750)  # sample sizes `n`
-N <- 2000                       # validation set size
-p <- c(2, 3)                    # preditor dimensions
-q <- c(1, 1)                    # response dimensions
-r <- length(p)
-# parameter configuration
-rho <- -0.55
-c1 <- 1
-c2 <- 1
-
-# initial consistency checks
-stopifnot(exprs = {
-    r == 2
-    length(p) == r
-    all(q == 1)
-})
-
-### small helpers
-# 270 deg matrix layout rotation (90 deg clockwise)
-rot270 <- function(A) t(A)[, rev(seq_len(nrow(A))), drop = FALSE]
-# Auto-Regression Covariance Matrix
-AR <- function(rho, dim) rho^abs(outer(seq_len(dim), seq_len(dim), `-`))
-# Inverse of the AR matrix
-AR.inv <- function(rho, dim) {
-    A <- diag(c(1, rep(rho^2 + 1, dim - 2), 1))
-    A[abs(.row(dim(A)) - .col(dim(A))) == 1] <- -rho
-    A / (1 - rho^2)
-}
-# projection matrix `P_A` as a projection onto the span of `A`
-proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A)))
-
-### setup Ising parameters (to get reasonable parameters)
-eta1 <- 0
-alphas <- Map(function(pj, qj) {
-    data <- linspace <- seq(-1, 1, len = pj)
-    for (k in (seq_len(qj - 1) + 1)) {
-        data <- c(data, linspace^k)
-    }
-    matrix(data, nrow = pj)
-}, p, q)
-Omegas <- Map(AR, dim = p, MoreArgs = list(rho))
-
-# data sampling routine
-sample.data <- function(n, eta1, alphas, Omegas, sample.axis = r + 1L) {
-    # generate response (sample axis is last axis)
-    y <- runif(n, -1, 1)                    # Y ~ U[-1, 1]
-    Fy <- array(sin(pi * y), dim = c(q, n))
-
-    # natural exponential family parameters
-    eta_y1 <- c1 * (mlm(Fy, alphas) + c(eta1))
-    eta_y2 <- c2 * Reduce(`%x%`, rev(Omegas))
-
-    # conditional Ising model parameters
-    theta_y <- matrix(rep(vech(eta_y2), n), ncol = n)
-    ltri <- which(lower.tri(eta_y2, diag = TRUE))
-    diagonal <- which(diag(TRUE, nrow(eta_y2))[ltri])
-    theta_y[diagonal, ] <- eta_y1
-
-    # Sample X from conditional distribution
-    X <- apply(theta_y, 2, ising_sample, n = 1)
-    # convert (from compressed integer vector) to array data
-    attr(X, "p") <- prod(p)
-    X <- t(as.mvbmatrix(X))
-    dim(X) <- c(p, n)
-    storage.mode(X) <- "double"
-
-    # permute axis to requested get the sample axis
-    if (sample.axis != r + 1L) {
-        perm <- integer(r + 1L)
-        perm[sample.axis] <- r + 1L
-        perm[-sample.axis] <- seq_len(r)
-        X <- aperm(X, perm)
-        Fy <- aperm(Fy, perm)
-    }
-
-    list(X = X, Fy = Fy, y = y, sample.axis = sample.axis)
-}
-
-### Logging Errors and Warnings
-# Register a global warning and error handler for logging warnings/errors with
-# current simulation repetition session informatin allowing to reproduce problems
-exceptionLogger <- function(ex) {
-    # retrieve current simulation repetition information
-    rep.info <- get("rep.info", envir = .GlobalEnv)
-    # setup an error log file with the same name as `file`
-    log <- paste0(rep.info$file, ".log")
-    # Write (append) condition message with reproduction info to the log
-    cat("\n\n------------------------------------------------------------\n",
-        sprintf("file <- \"%s\"\nn <- %d\nrep <- %d\n.Random.seed <- c(%s)\n%s\nTraceback:\n",
-            rep.info$file, rep.info$n, rep.info$rep,
-            paste(rep.info$.Random.seed, collapse = ","),
-            as.character.error(ex)
-    ), sep = "", file = log, append = TRUE)
-    # add Traceback (see: `traceback()` which the following is addapted from)
-    n <- length(x <- .traceback(NULL, max.lines = -1L))
-    if (n == 0L) {
-        cat("No traceback available", "\n", file = log, append = TRUE)
-    } else {
-        for (i in 1L:n) {
-            xi <- x[[i]]
-            label <- paste0(n - i + 1L, ": ")
-            m <- length(xi)
-            srcloc <- if (!is.null(srcref <- attr(xi, "srcref"))) {
-                srcfile <- attr(srcref, "srcfile")
-                paste0(" at ", basename(srcfile$filename), "#", srcref[1L])
-            }
-            if (isTRUE(attr(xi, "truncated"))) {
-                xi <- c(xi, " ...")
-                m <- length(xi)
-            }
-            if (!is.null(srcloc)) {
-                xi[m] <- paste0(xi[m], srcloc)
-            }
-            if (m > 1) {
-                label <- c(label, rep(substr("          ", 1L,
-                  nchar(label, type = "w")), m - 1L))
-            }
-            cat(paste0(label, xi), sep = "\n", file = log, append = TRUE)
-        }
-    }
-}
-globalCallingHandlers(list(
-    message = exceptionLogger, warning = exceptionLogger, error = exceptionLogger
-))
-
-
-### for every sample size
-start <- format(Sys.time(), "%Y%m%dT%H%M")
-for (n in sample.sizes) {
-    ### write new simulation result file
-    file <- paste0(paste(file.prefix, start, n, sep = "-"), ".csv")
-    # CSV header, used to ensure correct value/column mapping when writing to file
-    header <- outer(
-        c("dist.subspace", "dist.projection", "error.pred"),  # measures
-        c("gmlm", "pca", "hopca", "tsir"),                    # methods
-        paste, sep = ".")
-    cat(paste0(header, collapse = ","), "\n", sep = "", file = file)
-
-    ### repeated simulation
-    for (rep in seq_len(reps)) {
-        ### Repetition session state info
-        # Stores specific session variables before starting the current
-        # simulation replication. This allows to log state information which
-        # can be used to replicate a specific simulation repetition in case of
-        # errors/warnings from the logs
-        rep.info <- list(n = n, rep = rep, file = file, .Random.seed = .Random.seed)
-
-        ### sample (training) data
-        c(X, Fy, y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas)
-
-        ### Fit data using different methods
-        fit.gmlm <- GMLM.default(X, Fy, sample.axis = sample.axis,
-            max.iter = max.iter, family = "ising")
-        fit.hopca <- HOPCA(X, npc = q, sample.axis = sample.axis)
-        fit.pca <- prcomp(mat(X, sample.axis), rank. = prod(q))
-        fit.tsir <- TSIR(X, y, q, sample.axis = sample.axis)
-
-        ### Compute Reductions `B.*` where `B.*` spans the reduction subspaces
-        B.true <- Reduce(`%x%`, rev(alphas))
-        B.gmlm <- with(fit.gmlm, Reduce(`%x%`, rev(alphas)))
-        B.hopca <- Reduce(`%x%`, rev(fit.hopca))
-        B.pca <- fit.pca$rotation
-        B.tsir <- Reduce(`%x%`, rev(fit.tsir))
-
-        # Subspace Distances: Normalized `|| P_A - P_B ||_F` where
-        #   `P_A = A (A' A)^-1/2 A'` and the normalization means that with
-        #   respect to the dimensions of `A, B` the subspace distance is in the
-        #   range `[0, 1]`.
-        dist.subspace.gmlm  <- dist.subspace(B.true, B.gmlm,  normalize = TRUE)
-        dist.subspace.hopca <- dist.subspace(B.true, B.hopca, normalize = TRUE)
-        dist.subspace.pca   <- dist.subspace(B.true, B.pca,   normalize = TRUE)
-        dist.subspace.tsir  <- dist.subspace(B.true, B.tsir,  normalize = TRUE)
-
-        # Projection Distances: Spectral norm (2-norm) `|| P_A - P_B ||_2`.
-        dist.projection.gmlm  <- dist.projection(B.true, B.gmlm)
-        dist.projection.hopca <- dist.projection(B.true, B.hopca)
-        dist.projection.pca   <- dist.projection(B.true, B.pca)
-        dist.projection.tsir  <- dist.projection(B.true, B.tsir)
-
-        ### Prediction Errors: (using new independend sample of size `N`)
-        c(X, Fy, y, sample.axis) %<-% sample.data(N, eta1, alphas, Omegas)
-        # centered model matrix of vectorized `X`s
-        vecX <- scale(mat(X, sample.axis), center = TRUE, scale = FALSE)
-        P.true <- proj(B.true)
-        error.pred.gmlm  <- norm(P.true - proj(B.gmlm),  "2")
-        error.pred.hopca <- norm(P.true - proj(B.hopca), "2")
-        error.pred.pca   <- norm(P.true - proj(B.pca),   "2")
-        error.pred.tsir  <- norm(P.true - proj(B.tsir),  "2")
-
-        # format estimation/prediction errors and write to file and console
-        line <- paste0(Map(get, header), collapse = ",")
-        cat(line, "\n", sep = "", file = file, append = TRUE)
-        # report progress
-        cat(sprintf("sample size: %d/%d - rep: %d/%d\n",
-            which(n == sample.sizes), length(sample.sizes), rep, reps))
-    }
-}
--- a/sim/ising_small_2.R
+++ b/sim/ising_small_2.R
@ -1,131 +0,0 @@
-library(tensorPredictors)
-library(mvbernoulli)
-
-# seed = leaf node count of a full chess search tree of depth 6 from the start pos
-# > position startpos
-# > go perft 6
-set.seed(119060324, "Mersenne-Twister", "Inversion", "Rejection")
-
-### simulation configuration
-reps <- 100                     # number of simulation replications
-max.iter <- 1000                # maximum number of iterations for GMLM
-n <- 100                        # sample sizes `n`
-N <- 2000                       # validation set size
-p <- c(2, 3)                    # preditor dimensions
-q <- c(1, 1)                    # response dimensions
-r <- length(p)
-# parameter configuration
-rho <- -0.55
-c1 <- 1
-c2 <- 1
-
-# initial consistency checks
-stopifnot(exprs = {
-    r == 2
-    length(p) == r
-    all(q == 1)
-})
-
-### small helpers
-# 270 deg matrix layout rotation (90 deg clockwise)
-rot270 <- function(A) t(A)[, rev(seq_len(nrow(A))), drop = FALSE]
-# Auto-Regression Covariance Matrix
-AR <- function(rho, dim) rho^abs(outer(seq_len(dim), seq_len(dim), `-`))
-# Inverse of the AR matrix
-AR.inv <- function(rho, dim) {
-    A <- diag(c(1, rep(rho^2 + 1, dim - 2), 1))
-    A[abs(.row(dim(A)) - .col(dim(A))) == 1] <- -rho
-    A / (1 - rho^2)
-}
-# projection matrix `P_A` as a projection onto the span of `A`
-proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A)))
-
-### setup Ising parameters (to get reasonable parameters)
-eta1 <- 0
-alphas <- Map(function(pj, qj) {
-    data <- linspace <- seq(-1, 1, len = pj)
-    for (k in (seq_len(qj - 1) + 1)) {
-        data <- c(data, linspace^k)
-    }
-    matrix(data, nrow = pj)
-}, p, q)
-Omegas <- Map(AR, dim = p, MoreArgs = list(rho))
-
-# data sampling routine
-sample.data <- function(n, eta1, alphas, Omegas, sample.axis = r + 1L) {
-    # generate response (sample axis is last axis)
-    y <- runif(n, -1, 1)                    # Y ~ U[-1, 1]
-    Fy <- array(sin(pi * y), dim = c(q, n))
-
-    # natural exponential family parameters
-    eta_y1 <- c1 * (mlm(Fy, alphas) + c(eta1))
-    eta_y2 <- c2 * Reduce(`%x%`, rev(Omegas))
-
-    # conditional Ising model parameters
-    theta_y <- matrix(rep(vech(eta_y2), n), ncol = n)
-    ltri <- which(lower.tri(eta_y2, diag = TRUE))
-    diagonal <- which(diag(TRUE, nrow(eta_y2))[ltri])
-    theta_y[diagonal, ] <- eta_y1
-
-    # Sample X from conditional distribution
-    X <- apply(theta_y, 2, ising_sample, n = 1)
-    # convert (from compressed integer vector) to array data
-    attr(X, "p") <- prod(p)
-    X <- t(as.mvbmatrix(X))
-    dim(X) <- c(p, n)
-    storage.mode(X) <- "double"
-
-    # permute axis to requested get the sample axis
-    if (sample.axis != r + 1L) {
-        perm <- integer(r + 1L)
-        perm[sample.axis] <- r + 1L
-        perm[-sample.axis] <- seq_len(r)
-        X <- aperm(X, perm)
-        Fy <- aperm(Fy, perm)
-    }
-
-    list(X = X, Fy = Fy, y = y, sample.axis = sample.axis)
-}
-
-
-# logger to log iterative change in the estimation process of GMLM
-# log <- data.frame()
-log.likelihood <- tensorPredictors:::make.gmlm.family("ising")$log.likelihood
-B.true <- Reduce(`%x%`, rev(alphas))
-logger <- function(iter, eta1.est, alphas.est, Omegas.est) {
-    B.est <- Reduce(`%x%`, rev(alphas.est))
-
-    err.alphas <- mapply(dist.subspace, alphas, alphas.est, MoreArgs = list(normalize = TRUE))
-    err.Omegas <- mapply(norm, Map(`-`, Omegas, Omegas.est), MoreArgs = list(type = "F"))
-
-    if (iter > 0) { cat("\033[9A") }
-    cat(sprintf("\n\033[2mIter:     loss -     dist\n\033[0m%4d: %8.3f - %8.3f",
-            iter,
-            log.likelihood(X, Fy, eta1.est, alphas.est, Omegas.est),
-            dist.subspace(B.true, B.est, normalize = TRUE)
-        ),
-        "\033[2mMSE eta1\033[0m",
-        mean((eta1 - eta1.est)^2),
-        "\033[2msubspace distances of alphas\033[0m",
-        do.call(paste, Map(sprintf, err.alphas, MoreArgs = list(fmt = "%8.3f"))),
-        "\033[2mFrob. norm of Omega differences\033[0m",
-        do.call(paste, Map(sprintf, err.Omegas, MoreArgs = list(fmt = "%8.3f"))),
-        sep = "\n      "
-    )
-}
-
-### sample (training) data
-c(X, Fy, y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas)
-
-# now call the GMLM fitting routine with performance profiling
-tryCatch({
-    system.time(   # profvis::profvis(
-        fit.gmlm <- GMLM.default(
-            X, Fy, sample.axis = sample.axis, max.iter = max.iter,
-            family = "ising", logger = logger
-        )
-    )
-}, error = function(ex) {
-    print(ex)
-    traceback()
-})
--- a/sim/normal.R
+++ b/sim/normal.R
@ -1,171 +0,0 @@
-library(tensorPredictors)
-
-set.seed(314159265, "Mersenne-Twister", "Inversion", "Rejection")
-
-### simulation configuration
-file.prefix <- "sim-normal"
-reps <- 100                     # number of simulation replications
-max.iter <- 10000               # maximum number of iterations for GMLM
-sample.sizes <- c(100, 200, 300, 500, 750)  # sample sizes `n`
-N <- 2000                       # validation set size
-p <- c(2, 3, 5)                 # preditor dimensions
-q <- c(1, 2, 3)                 # functions of y dimensions (response dimensions)
-r <- length(p)
-
-# initial consistency checks
-stopifnot(exprs = {
-    r == length(p)
-    r == length(q)
-    all(outer(p, sample.sizes, `<`))
-})
-
-# projection matrix `P_A` as a projection onto the span of `A`
-proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A)))
-
-# setup model parameters
-alphas <- Map(matrix, Map(rnorm, p * q), p)                     # reduction matrices
-Omegas <- Map(function(pj) 0.5^abs(outer(1:pj, 1:pj, `-`)), p)  # mode scatter
-eta1 <- 0                                                       # intercept
-
-# data sampling routine
-sample.data <- function(n, eta1, alphas, Omegas, sample.axis = r + 1L) {
-    # generate response (sample axis is last axis)
-    y <- sample.int(prod(q), n, replace = TRUE)                 # uniform samples
-    Fy <- array(outer(seq_len(prod(q)), y, `==`), dim = c(q, n))
-    Fy <- Fy - c(rowMeans(Fy, dims = r))
-
-    # sample predictors as X | Y = y (sample axis is last axis)
-    Deltas <- Map(solve, Omegas)                                # normal covariances
-    mu_y <- mlm(mlm(Fy, alphas) + c(eta1), Deltas)              # conditional mean
-    X <- mu_y + rtensornorm(n, 0, Deltas, r + 1L)               # responses X
-
-    # permute axis to requested get the sample axis
-    if (sample.axis != r + 1L) {
-        perm <- integer(r + 1L)
-        perm[sample.axis] <- r + 1L
-        perm[-sample.axis] <- seq_len(r)
-        X <- aperm(X, perm)
-        Fy <- aperm(Fy, perm)
-    }
-
-    list(X = X, Fy = Fy, y = y, sample.axis = sample.axis)
-}
-
-### Logging Errors and Warnings
-# Register a global warning and error handler for logging warnings/errors with
-# current simulation repetition session informatin allowing to reproduce problems
-exceptionLogger <- function(ex) {
-    # retrieve current simulation repetition information
-    rep.info <- get("rep.info", envir = .GlobalEnv)
-    # setup an error log file with the same name as `file`
-    log <- paste0(rep.info$file, ".log")
-    # Write (append) condition message with reproduction info to the log
-    cat("\n\n------------------------------------------------------------\n",
-        sprintf("file <- \"%s\"\nn <- %d\nrep <- %d\n.Random.seed <- c(%s)\n%s\nTraceback:\n",
-            rep.info$file, rep.info$n, rep.info$rep,
-            paste(rep.info$.Random.seed, collapse = ","),
-            as.character.error(ex)
-    ), sep = "", file = log, append = TRUE)
-    # add Traceback (see: `traceback()` which the following is addapted from)
-    n <- length(x <- .traceback(NULL, max.lines = -1L))
-    if (n == 0L) {
-        cat("No traceback available", "\n", file = log, append = TRUE)
-    } else {
-        for (i in 1L:n) {
-            xi <- x[[i]]
-            label <- paste0(n - i + 1L, ": ")
-            m <- length(xi)
-            srcloc <- if (!is.null(srcref <- attr(xi, "srcref"))) {
-                srcfile <- attr(srcref, "srcfile")
-                paste0(" at ", basename(srcfile$filename), "#", srcref[1L])
-            }
-            if (isTRUE(attr(xi, "truncated"))) {
-                xi <- c(xi, " ...")
-                m <- length(xi)
-            }
-            if (!is.null(srcloc)) {
-                xi[m] <- paste0(xi[m], srcloc)
-            }
-            if (m > 1) {
-                label <- c(label, rep(substr("          ", 1L,
-                  nchar(label, type = "w")), m - 1L))
-            }
-            cat(paste0(label, xi), sep = "\n", file = log, append = TRUE)
-        }
-    }
-}
-globalCallingHandlers(list(
-    message = exceptionLogger, warning = exceptionLogger, error = exceptionLogger
-))
-
-
-### for every sample size
-start <- format(Sys.time(), "%Y%m%dT%H%M")
-for (n in sample.sizes) {
-    ### write new simulation result file
-    file <- paste0(paste(file.prefix, start, n, sep = "-"), ".csv")
-    # CSV header, used to ensure correct value/column mapping when writing to file
-    header <- outer(
-        c("dist.subspace", "dist.projection", "error.pred"),  # measures
-        c("gmlm", "pca", "hopca", "tsir"),                    # methods
-        paste, sep = ".")
-    cat(paste0(header, collapse = ","), "\n", sep = "", file = file)
-
-    ### repeated simulation
-    for (rep in seq_len(reps)) {
-        ### Repetition session state info
-        # Stores specific session variables before starting the current
-        # simulation replication. This allows to log state information which
-        # can be used to replicate a specific simulation repetition in case of
-        # errors/warnings from the logs
-        rep.info <- list(n = n, rep = rep, file = file, .Random.seed = .Random.seed)
-
-        ### sample (training) data
-        c(X, Fy, y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas)
-
-        ### Fit data using different methods
-        fit.gmlm <- GMLM.default(X, Fy, sample.axis = sample.axis, max.iter = max.iter)
-        fit.hopca <- HOPCA(X, npc = q, sample.axis = sample.axis)
-        fit.pca <- prcomp(mat(X, sample.axis), rank. = prod(q))
-        fit.tsir <- TSIR(X, y, q, sample.axis = sample.axis)
-
-        ### Compute Reductions `B.*` where `B.*` spans the reduction subspaces
-        B.true <- Reduce(`%x%`, rev(alphas))
-        B.gmlm <- with(fit.gmlm, Reduce(`%x%`, rev(alphas)))
-        B.hopca <- Reduce(`%x%`, rev(fit.hopca))
-        B.pca <- fit.pca$rotation
-        B.tsir <- Reduce(`%x%`, rev(fit.tsir))
-
-        # Subspace Distances: Normalized `|| P_A - P_B ||_F` where
-        #   `P_A = A (A' A)^-1/2 A'` and the normalization means that with
-        #   respect to the dimensions of `A, B` the subspace distance is in the
-        #   range `[0, 1]`.
-        dist.subspace.gmlm  <- dist.subspace(B.true, B.gmlm,  normalize = TRUE)
-        dist.subspace.hopca <- dist.subspace(B.true, B.hopca, normalize = TRUE)
-        dist.subspace.pca   <- dist.subspace(B.true, B.pca,   normalize = TRUE)
-        dist.subspace.tsir  <- dist.subspace(B.true, B.tsir,  normalize = TRUE)
-
-        # Projection Distances: Spectral norm (2-norm) `|| P_A - P_B ||_2`.
-        dist.projection.gmlm  <- dist.projection(B.true, B.gmlm)
-        dist.projection.hopca <- dist.projection(B.true, B.hopca)
-        dist.projection.pca   <- dist.projection(B.true, B.pca)
-        dist.projection.tsir  <- dist.projection(B.true, B.tsir)
-
-        ### Prediction Errors: (using new independend sample of size `N`)
-        c(X, Fy, y, sample.axis) %<-% sample.data(N, eta1, alphas, Omegas)
-        # centered model matrix of vectorized `X`s
-        vecX <- scale(mat(X, sample.axis), center = TRUE, scale = FALSE)
-        P.true <- proj(B.true)
-        error.pred.gmlm  <- norm(P.true - proj(B.gmlm),  "2")
-        error.pred.hopca <- norm(P.true - proj(B.hopca), "2")
-        error.pred.pca   <- norm(P.true - proj(B.pca),   "2")
-        error.pred.tsir  <- norm(P.true - proj(B.tsir),  "2")
-
-        # format estimation/prediction errors and write to file and console
-        line <- paste0(Map(get, header), collapse = ",")
-        cat(line, "\n", sep = "", file = file, append = TRUE)
-        # report progress
-        cat(sprintf("sample size: %d/%d - rep: %d/%d\n",
-            which(n == sample.sizes), length(sample.sizes), rep, reps))
-    }
-}
--- a/sim/normal_2.R
+++ b/sim/normal_2.R
@ -1,96 +0,0 @@
-library(tensorPredictors)
-
-set.seed(271828183, "Mersenne-Twister", "Inversion", "Rejection")
-
-### simulation configuration
-reps <- 100                     # number of simulation replications
-n <- 100                        # sample sizes `n`
-N <- 2000                       # validation set size
-p <- c(2, 3, 5)                 # preditor dimensions
-q <- c(1, 2, 3)                 # functions of y dimensions (response dimensions)
-
-# initial consistency checks
-stopifnot(exprs = {
-    length(p) == length(q)
-})
-
-# setup model parameters
-alphas <- Map(matrix, Map(rnorm, p * q), p)                     # reduction matrices
-Omegas <- Map(function(pj) 0.5^abs(outer(1:pj, 1:pj, `-`)), p)  # mode scatter
-eta1 <- 0                                                       # intercept
-
-# data sampling routine
-sample.data <- function(n, eta1, alphas, Omegas, sample.axis = length(alphas) + 1L) {
-    r <- length(alphas)                                         # tensor order
-
-    # generate response (sample axis is last axis)
-    y <- sample.int(prod(q), n, replace = TRUE)                 # uniform samples
-    Fy <- array(outer(seq_len(prod(q)), y, `==`), dim = c(q, n))
-    Fy <- Fy - c(rowMeans(Fy, dims = r))
-
-    # sample predictors as X | Y = y (sample axis is last axis)
-    Deltas <- Map(solve, Omegas)                                # normal covariances
-    mu_y <- mlm(mlm(Fy, alphas) + c(eta1), Deltas)              # conditional mean
-    X <- mu_y + rtensornorm(n, 0, Deltas, r + 1L)               # responses X
-
-    # permute axis to requested get the sample axis
-    if (sample.axis != r + 1L) {
-        perm <- integer(r + 1L)
-        perm[sample.axis] <- r + 1L
-        perm[-sample.axis] <- seq_len(r)
-        X <- aperm(X, perm)
-        Fy <- aperm(Fy, perm)
-    }
-
-    list(X = X, Fy = Fy, y = y, sample.axis = sample.axis)
-}
-
-### sample (training) data
-c(X, Fy, y = y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas)
-
-### Fit data using GMLM with logging
-
-# logger to log iterative change in the estimation process of GMLM
-# log <- data.frame()
-log.likelihood <- tensorPredictors:::make.gmlm.family("normal")$log.likelihood
-B.true <- Reduce(`%x%`, rev(alphas))
-logger <- function(iter, eta1.est, alphas.est, Omegas.est) {
-    B.est <- Reduce(`%x%`, rev(alphas.est))
-
-    err.alphas <- mapply(dist.subspace, alphas, alphas.est, MoreArgs = list(normalize = TRUE))
-    err.Omegas <- mapply(norm, Map(`-`, Omegas, Omegas.est), MoreArgs = list(type = "F"))
-
-    if (iter > 1) { cat("\033[9A") }
-    cat(sprintf("\n\033[2mIter:     loss -     dist\n\033[0m%4d: %8.3f - %8.3f",
-            iter,
-            log.likelihood(X, Fy, eta1.est, alphas.est, Omegas.est),
-            dist.subspace(B.true, B.est, normalize = TRUE)
-        ),
-        "\033[2mMSE eta1\033[0m",
-        mean((eta1 - eta1.est)^2),
-        "\033[2msubspace distances of alphas\033[0m",
-        do.call(paste, Map(sprintf, err.alphas, MoreArgs = list(fmt = "%8.3f"))),
-        "\033[2mFrob. norm of Omega differences\033[0m",
-        do.call(paste, Map(sprintf, err.Omegas, MoreArgs = list(fmt = "%8.3f"))),
-        sep = "\n      "
-    )
-}
-
-# now call the GMLM fitting routine with performance profiling
-system.time(    # profvis::profvis(
-    fit.gmlm <- GMLM.default(
-        X, Fy, sample.axis = sample.axis, max.iter = 10000L, logger = logger
-    )
-)
-
-
-# Iter:     loss -     dist
-# 7190:   50.583 -    0.057
-#       MSE eta1
-#       0.02694658
-#       subspace distances of alphas
-#          0.043    0.035    0.034
-#       Frob. norm of Omega differences
-#          0.815    1.777   12.756
-#       time user  system  elapsed
-#        342.279  555.630  183.653
--- a/sim/plots.R
+++ b/sim/plots.R
@ -1,100 +0,0 @@
-
-if (!endsWith(getwd(), "/sim")) {
-    setwd("sim")
-}
-
-sim.plot <- function(file.prefix, date, to.file = FALSE) {
-
-    # file.prefix <- "sim-ising-small"
-    # # file.prefix <- "sim-normal"
-    # date <- "20221012"  # yyyymmdd, to match all "[0-9]{6}"
-    time <- "[0-9]{4}"  # HHMM,     to match all "[0-9]{4}"
-    colors <- c(
-        PCA   = "#a11414",
-        HOPCA = "#2a62b6",
-        TSIR  = "#9313b9",
-        GMLM  = "#247407"
-    )
-    line.width <- 1.75
-    margins <- c(5.1, 4.1, 4.1, 0.1)
-
-    sim <- Reduce(rbind, Map(function(path) {
-        df <- read.csv(path)
-        df$n <- as.integer(tail(head(strsplit(path, "[-.]")[[1]], -1), 1))
-        df
-    }, list.files(".", pattern = paste0(
-        "^", file.prefix, "-", date, "T", time, "-[0-9]+[.]csv$", collapse = ""
-    ))))
-
-    stats <- aggregate(. ~ n, sim, mean)
-    q75 <- aggregate(. ~ n, sim, function(x) quantile(x, 0.75))
-    q25 <- aggregate(. ~ n, sim, function(x) quantile(x, 0.25))
-
-    if (to.file) {
-        width <- 720
-        png(paste(file.prefix, "-", date, ".png", sep = ""),
-            width = width, height = round((6 / 11) * width, -1),
-            pointsize = 12)
-    }
-
-    layout(mat = matrix(c(
-        1, 2,
-        3, 3
-    ), 2, 2, byrow = TRUE),
-        widths = c(1, 1),
-        heights = c(12, 1), respect = FALSE)
-    # layout.show(3)
-
-    with(stats, {
-        par(mar = margins)
-        plot(range(n), 0:1,
-            type = "n", bty = "n", main = "Subspace Distance",
-            xlab = "Sample Size", ylab = "Error")
-        lines(n, dist.subspace.gmlm,  col = colors["GMLM"],  lwd = line.width)
-        lines(n, dist.subspace.hopca, col = colors["HOPCA"], lwd = line.width)
-        lines(n, dist.subspace.pca,   col = colors["PCA"],   lwd = line.width)
-        lines(n, dist.subspace.tsir,  col = colors["TSIR"],  lwd = line.width)
-
-        xn <- c(q75$n, rev(q25$n))
-        polygon(x = xn, y = c(q75$dist.subspace.gmlm, rev(q25$dist.subspace.gmlm)),
-            col = adjustcolor(colors["GMLM"], alpha.f = 0.3), border = NA)
-        polygon(x = xn, y = c(q75$dist.subspace.hopca, rev(q25$dist.subspace.hopca)),
-            col = adjustcolor(colors["HOPCA"], alpha.f = 0.3), border = NA)
-        polygon(x = xn, y = c(q75$dist.subspace.pca, rev(q25$dist.subspace.pca)),
-            col = adjustcolor(colors["PCA"], alpha.f = 0.3), border = NA)
-        polygon(x = xn, y = c(q75$dist.subspace.tsir, rev(q25$dist.subspace.tsir)),
-            col = adjustcolor(colors["TSIR"], alpha.f = 0.3), border = NA)
-    })
-
-    with(stats, {
-        par(mar = margins)
-        plot(range(n), 0:1,
-            type = "n", bty = "n", main = "RMSE (Prediction Error)",
-            xlab = "Sample Size", ylab = "Error")
-        xn <- c(q75$n, rev(q25$n))
-        polygon(x = xn, y = c(q75$error.pred.gmlm, rev(q25$error.pred.gmlm)),
-            col = adjustcolor(colors["GMLM"], alpha.f = 0.3), border = NA)
-        polygon(x = xn, y = c(q75$error.pred.hopca, rev(q25$error.pred.hopca)),
-            col = adjustcolor(colors["HOPCA"], alpha.f = 0.3), border = NA)
-        polygon(x = xn, y = c(q75$error.pred.pca, rev(q25$error.pred.pca)),
-            col = adjustcolor(colors["PCA"], alpha.f = 0.3), border = NA)
-        polygon(x = xn, y = c(q75$error.pred.tsir, rev(q25$error.pred.tsir)),
-            col = adjustcolor(colors["TSIR"], alpha.f = 0.3), border = NA)
-        lines(n, error.pred.gmlm,  col = colors["GMLM"],  lwd = line.width)
-        lines(n, error.pred.hopca, col = colors["HOPCA"], lwd = line.width)
-        lines(n, error.pred.pca,   col = colors["PCA"],   lwd = line.width)
-        lines(n, error.pred.tsir,  col = colors["TSIR"],  lwd = line.width)
-    })
-
-    par(mar = rep(0, 4))
-    plot(1:2, 1:2, type = "n", bty = "n", axes = FALSE, xlab = "", ylab = "")
-    legend("center", legend = names(colors), col = colors, lwd = line.width,
-        lty = 1, bty = "n", horiz = TRUE)
-
-    if (to.file) {
-        dev.off()
-    }
-}
-
-sim.plot("sim-ising-small", "20221012", TRUE)
-sim.plot("sim-normal", "20221012", TRUE)
--- a/tensorPredictors/NAMESPACE
+++ b/tensorPredictors/NAMESPACE
@ -5,6 +5,11 @@ export("%x_1%")
 export("%x_2%")
 export("%x_3%")
 export("%x_4%")
+export(.projBand)
+export(.projPSD)
+export(.projRank)
+export(.projSymBand)
+export(.projSymRank)
 export(CISE)
 export(D)
 export(D.pinv)
@ -17,6 +22,8 @@ export(ICU)
 export(K)
 export(K.perm)
 export(LSIR)
+export(La.det)
+export(La.solve)
 export(N)
 export(NAGD)
 export(PCA2d)
@ -27,18 +34,26 @@ export(S)
 export(TSIR)
 export(approx.kronecker)
 export(colKronecker)
+export(decompose.kronecker)
 export(dist.kron.norm)
 export(dist.kron.tr)
 export(dist.projection)
 export(dist.subspace)
 export(exprs.all.equal)
+export(gmlm_ising)
+export(gmlm_tensor_normal)
+export(icu_tensor_normal)
+export(ising_m2)
+export(ising_sample)
 export(kpir.approx)
 export(kpir.base)
 export(kpir.ls)
 export(kpir.mle)
 export(kpir.momentum)
 export(kpir.new)
+export(kronperm)
 export(mat)
+export(matProj)
 export(matpow)
 export(matrixImage)
 export(mcov)
@ -49,10 +64,19 @@ export(mtvk)
 export(num.deriv)
 export(num.deriv.function)
 export(num.deriv2)
+export(pinv)
+export(projDiag)
+export(projStiefel)
+export(projSym)
 export(reduce)
+export(riccati)
 export(rowKronecker)
 export(rtensornorm)
+export(slice.expr)
+export(slice.select)
+export(sylvester)
 export(tensor_predictor)
+export(tsym)
 export(ttm)
 export(ttt)
 export(vech)
--- a/tensorPredictors/R/GMLM.R
+++ b/tensorPredictors/R/GMLM.R
@ -36,12 +36,12 @@ GMLM.default <- function(X, Fy, sample.axis = 1L,
    # optimize likelihood using Nesterov Excelerated Gradient Descent
    params.fit <- NAGD(
        fun.loss = function(params) {
-            # scaled negative lig-likelihood
+            # scaled negative log-likelihood
            #                            eta1         alphas       Omegas
            family$log.likelihood(X, Fy, params[[1]], params[[2]], params[[3]])
        },
        fun.grad = function(params) {
-            # gradient of the scaled negative lig-likelihood
+            # gradient of the scaled negative log-likelihood
            #                  eta1         alphas       Omegas
            family$grad(X, Fy, params[[1]], params[[2]], params[[3]])
        },
--- a/tensorPredictors/R/HOPCA.R
+++ b/tensorPredictors/R/HOPCA.R
@ -10,7 +10,7 @@
 #'  `i`th axis excluding the sample axis.
 #'
 #' @export
-HOPCA <- function(X, npc = dim(X)[-sample.axis], sample.axis = 1L) {
+HOPCA <- function(X, npc = dim(X)[-sample.axis], sample.axis = 1L, use.C = FALSE) {
    # observation index numbers (all axis except the sample axis)
    modes <- seq_along(dim(X))[-sample.axis]

--- a/tensorPredictors/R/HOPIR.R
+++ b/tensorPredictors/R/HOPIR.R
@ -76,8 +76,8 @@ HOPIR.ls <- function(X, Fy, alphas, sample.axis, algorithm, ..., logger) {
    list(alphas = alphas, Deltas = fun.Deltas(alphas))
 }

-#' HPOIR subroutine for the MLE estimation given proprocessed data and initial
-#' alphas, Deltas paramters
+#' HPOIR subroutine for the MLE estimation given preprocessed data and initial
+#' alpha, Delta parameters
 #'
 #' @keywords internal
 HOPIR.mle <- function(X, Fy, alphas, Deltas, sample.axis, algorithm, ..., logger) {
--- a/tensorPredictors/R/HOSVD.R
+++ b/tensorPredictors/R/HOSVD.R
@ -7,7 +7,7 @@
 #'
 #' @export
 HOSVD <- function(X, nu = NULL, eps = 1e-07) {
-    if (!missing(nu)) {
+    if (!is.null(nu)) {
        stopifnot(all(nu <= dim(X)))
    }

@ -21,3 +21,5 @@ HOSVD <- function(X, nu = NULL, eps = 1e-07) {

    list(C = C, Us = Us)
 }
+
+SVD <- function(A) .Call("C_svd", A)
--- a/tensorPredictors/R/NAGD.R
+++ b/tensorPredictors/R/NAGD.R
@ -60,7 +60,7 @@ NAGD <- function(fun.loss, fun.grad, params, more.params = NULL,
        stop("Initial loss is non-finite (", loss, ")")
    }
    # initialize "previous" iterate parameters
-    params.last <- params
+    prev.params <- params

    # Gradient Descent Loop
    line.search.tag <- FALSE       # init line search state as "failure"
@ -73,9 +73,9 @@ NAGD <- function(fun.loss, fun.grad, params, more.params = NULL,
        }

        # Extrapolation form previous position (momentum)
-        # `params.moment <- (1 + moment) * params - moment * param.last`
+        # `params.moment <- (1 + moment) * params - moment * prev.params`
        moment <- (m[1] - 1) / m[2]
-        params.moment <- fun.lincomb(1 + moment, params, -moment, params.last)
+        params.moment <- fun.lincomb(1 + moment, params, -moment, prev.params)

        # Compute gradient at extrapolated position
        if (missing(more.params)) {
@ -114,7 +114,7 @@ NAGD <- function(fun.loss, fun.grad, params, more.params = NULL,
        }

        # keep track of previous parameters
-        params.last <- params
+        prev.params <- params

        # check line search outcome
        if (is.na(line.search.tag)) {
--- a/tensorPredictors/R/TSIR.R
+++ b/tensorPredictors/R/TSIR.R
@ -4,11 +4,21 @@
 TSIR <- function(X, y, d, sample.axis = 1L,
    nr.slices = 10L,    # default slices, ignored if y is a factor or integer
    max.iter = 50L,
-    eps = sqrt(.Machine$double.eps)
+    eps = sqrt(.Machine$double.eps),
+    slice.method = c("cut", "ecdf")   # ignored if y is a factor or integer
 ) {

    if (!(is.factor(y) || is.integer(y))) {
-        y <- cut(y, nr.slices)
+        slice.method <- match.arg(slice.method)
+        if (slice.method == "ecdf") {
+            y <- cut(ecdf(y)(y), nr.slices)
+        } else {
+            y <- cut(y, nr.slices)
+            # ensure there are no empty slices
+            if (any(table(y) == 0)) {
+                y <- as.factor(as.integer(y))
+            }
+        }
    }

    stopifnot(exprs = {
--- a/tensorPredictors/R/approx_kronecker.R
+++ b/tensorPredictors/R/approx_kronecker.R
@ -1,4 +1,4 @@
-#' Approximates kronecker product decomposition.
+#' Approximates Kronecker Product decomposition.
 #'
 #' Approximates the matrices `A` and `B` such that
 #'      C = A %x% B
@ -21,7 +21,7 @@
 #'          123 (2000) 85-100 (pp. 93-95)
 #'
 #' @export
-approx.kronecker <- function(C, dimA, dimB) {
+approx.kronecker <- function(C, dimA, dimB = dim(C) / dimA) {

    dim(C) <- c(dimB[1L], dimA[1L], dimB[2L], dimA[2L])
    R <- aperm(C, c(2L, 4L, 1L, 3L))
@ -33,8 +33,115 @@ approx.kronecker <- function(C, dimA, dimB) {
        svdR <- svd(R, 1L, 1L)
    }

-    return(list(
+    list(
        A = array(sqrt(svdR$d[1]) * svdR$u, dimA),
        B = array(sqrt(svdR$d[1]) * svdR$v, dimB)
+    )
+}
+
+#' Kronecker Product Decomposition.
+#'
+#' Computes the components summation components `A_i`, `B_i` of a sum of
+#' Kronecker products
+#'      C = sum_i A_i %x% B_i
+#' with the minimal estimated number of summands.
+#'
+#' @param C desired kronecker product result.
+#' @param dimA length 2 vector of dimensions of \code{A}.
+#' @param dimB length 2 vector of dimensions of \code{B}.
+#' @param tol tolerance of singular values of \code{C} to determin the number of
+#'  needed summands.
+#'
+#' @return list of lenghth with estimated number of summation components, each
+#'  entry consists of a list with named entries \code{"A"} and \code{"B"} of
+#'  dimensions \code{dimA} and \code{dimB}.
+#'
+#' @examples
+#' As <- replicate(3, matrix(rnorm(2 * 7), 2), simplify = FALSE)
+#' Bs <- replicate(3, matrix(rnorm(5 * 3), 5), simplify = FALSE)
+#' C <- Reduce(`+`, Map(kronecker, As, Bs))
+#'
+#' decomposition <- decompose.kronecker(C, c(2, 7))
+#'
+#' reconstruction <- Reduce(`+`, Map(function(summand) {
+#'     kronecker(summand[[1]], summand[[2]])
+#' }, decomposition), array(0, dim(C)))
+#'
+#' stopifnot(all.equal(C, reconstruction))
+#'
+#' @export
+decompose.kronecker <- function(C, dimA, dimB = dim(C) / dimA, tol = 1e-7) {
+    # convert the equivalent outer product
+    dim(C) <- c(dimB[1L], dimA[1L], dimB[2L], dimA[2L])
+    C <- aperm(C, c(2L, 4L, 1L, 3L), resize = FALSE)
+    dim(C) <- c(prod(dimA), prod(dimB))
+
+    # Singular Valued Decomposition
+    svdC <- La.svd(C)
+
+    # Sum of Kronecker Components
+    lapply(seq_len(sum(svdC$d > tol)), function(i) list(
+        A = matrix(svdC$d[i] * svdC$u[, i], dimA),
+        B = matrix(svdC$vt[i, ], dimB)
    ))
 }
+
+### Given that C is a Kronecker product this is a fast method but a bit
+### unreliable in full generality.
+# decompose.kronecker <- function(C, dimA, dimB = dim(C) / dimA) {
+#     dim(C) <- c(dimB[1L], dimA[1L], dimB[2L], dimA[2L])
+#     R <- aperm(C, c(2L, 4L, 1L, 3L))
+#     dim(R) <- c(prod(dimA), prod(dimB))
+#     max.index <- which.max(abs(R))
+#     row.index <- ((max.index - 1L) %% nrow(R)) + 1L
+#     col.index <- ((max.index - 1L) %/% nrow(R)) + 1L
+#     max.elem <- if (abs(R[max.index]) > .Machine$double.eps) R[max.index] else 1
+#     list(
+#         A = array(R[, col.index], dimA),
+#         B = array(R[row.index, ] / max.elem, dimB)
+#     )
+# }
+
+
+# kron <- function(A, B) {
+#     perm <- as.vector(t(matrix(seq_len(2 * length(dim(A))), ncol = 2)[, 2:1]))
+#     K <- aperm(outer(A, B), perm)
+#     dim(K) <- dim(A) * dim(B)
+#     K
+# }
+
+# kronperm <- function(A) {
+#     # force A to have even number of dimensions
+#     dim(A) <- c(dim(A), rep(1L, length(dim(A)) %% 2L))
+#     # compute axis permutation
+#     perm <- as.vector(t(matrix(seq_along(dim(A)), ncol = 2)[, 2:1]))
+#     # permute elements of A
+#     K <- aperm(A, perm, resize = FALSE)
+#     # collapse/set dimensions
+#     dim(K) <- head(dim(A), length(dim(A)) / 2) * tail(dim(A), length(dim(A)) / 2)
+#     K
+# }
+
+# p <- c(2, 3, 5)
+# q <- c(3, 4, 7)
+# A <- array(rnorm(prod(p)), p)
+# B <- array(rnorm(prod(q)), q)
+# all.equal(kronperm(outer(A, B)), kronecker(A, B))
+# all.equal(kron(A, B), kronecker(A, B))
+
+
+# dA <- c(2, 3, 5)
+# dB <- c(3, 4, 7)
+# A <- array(rnorm(prod(dA)), dA)
+# B <- array(rnorm(prod(dB)), dB)
+# X <- outer(A, B)
+
+# r <- length(dim(X)) / 2
+# I <- t(do.call(expand.grid, Map(seq_len, head(dim(X), r) * tail(dim(X), r))))
+# K <- apply(rbind(
+#     (I - 1) %/% tail(dim(X), r) + 1,
+#     (I - 1) %% tail(dim(X), r) + 1
+# ), 2, function(i) X[sum(c(1, cumprod(head(dim(X), -1))) * (i - 1)) + 1])
+# dim(K) <- head(dim(X), r) * tail(dim(X), r)
+
+# all.equal(kronecker(A, B), K)
--- a/tensorPredictors/R/dist_kron_norm.R
+++ b/tensorPredictors/R/dist_kron_norm.R
@ -3,7 +3,7 @@
 #'  \|(A1 %x% ... %x% Ar - B1 %x% ... %x% Br\|_F
 #'
 #' This is equivalent to the expression
-#' \code{norm(Reduce(kronecker, A) - Reduce(kronecker, B), "F")} but faster.
+#' \code{norm(Reduce(kronecker, As) - Reduce(kronecker, Bs), "F")}, but faster.
 #'
 #' @examples
 #' A1 <- matrix(rnorm(5^2), 5)
@ -16,21 +16,21 @@
 #' ))
 #'
 #' p <- c(3, 7, 5, 2)
-#' A <- Map(function(pj) matrix(rnorm(pj^2), pj), p)
-#' B <- Map(function(pj) matrix(rnorm(pj^2), pj), p)
+#' As <- Map(function(pj) matrix(rnorm(pj^2), pj), p)
+#' Bs <- Map(function(pj) matrix(rnorm(pj^2), pj), p)
 #' stopifnot(all.equal(
-#'     dist.kron.norm(A, B),
-#'     norm(Reduce(kronecker, A) - Reduce(kronecker, B), "F")
+#'     dist.kron.norm(As, Bs),
+#'     norm(Reduce(kronecker, As) - Reduce(kronecker, Bs), "F")
 #' ))
 #'
 #' @export
-dist.kron.norm <- function(A, B, eps = .Machine$double.eps) {
-    if (is.list(A) && is.list(B)) {
-        norm2 <- prod(unlist(Map(function(x) sum(x^2), A))) -
-                2 * prod(unlist(Map(function(a, b) sum(a * b), A, B))) +
-                    prod(unlist(Map(function(x) sum(x^2), B)))
-    } else if (is.matrix(A) && is.matrix(B)) {
-        norm2 <- sum((A - B)^2)
+dist.kron.norm <- function(As, Bs, eps = .Machine$double.eps) {
+    if (is.list(As) && is.list(Bs)) {
+        norm2 <- prod(unlist(Map(function(x) sum(x^2), As))) -
+                2 * prod(unlist(Map(function(a, b) sum(a * b), As, Bs))) +
+                    prod(unlist(Map(function(x) sum(x^2), Bs)))
+    } else if (is.matrix(As) && is.matrix(Bs)) {
+        norm2 <- sum((As - Bs)^2)
    } else {
        stop("Unexpected input")
    }
--- a/tensorPredictors/R/dist_projection.R
+++ b/tensorPredictors/R/dist_projection.R
@ -1,4 +1,4 @@
-#' Porjection Distance of two matrices
+#' Projection Distance of two matrices
 #'
 #' Defined as sine of the maximum principal angle between the column spaces
 #' of the matrices
--- a/tensorPredictors/R/gmlm_ising.R
+++ b/tensorPredictors/R/gmlm_ising.R
@ -0,0 +1,138 @@
+#' Specialized version of the GMLM for the Ising model (inverse Ising problem)
+#'
+#' @export
+gmlm_ising <- function(X, F, sample.axis = length(dim(X)),
+    max.iter = 1000L,
+    eps = sqrt(.Machine$double.eps),
+    step.size = function(iter) 1e-2 * (1000 / (iter + 1000)),
+    zig.zag.threashold = 5L,
+    patience = 5L,
+    nr.slices = 10L,                    # only for univariate `F(y) = y`
+    slice.method = c("cut", "ecdf"),    # only for univariate `F(y) = y` and `y` is a factor or integer
+    logger = function(...) { }
+) {
+    # # Special case for univariate response `vec F(y) = y`
+    # # Due to high computational costs we use slicing
+    # if (length(F) == prod(dim(F))) {
+    #     y <- as.vector(F)
+    #     if (!(is.factor(y) || is.integer(y))) {
+    #         slice.method <- match.arg(slice.method)
+    #         if (slice.method == "ecdf") {
+    #             y <- cut(ecdf(y)(y), nr.slices)
+    #         } else {
+    #             y <- cut(y, nr.slices)
+    #         }
+    #     }
+    #     slices <- split(seq_len(sample.size), y, drop = TRUE)
+    # } else {
+    #     slices <- seq_len(sample.size)
+    # }
+
+    dimX <- head(dim(X), -1)
+    dimF <- head(dim(F), -1)
+    sample.axis <- length(dim(X))
+    modes <- seq_len(length(dim(X)) - 1)
+    sample.size <- tail(dim(X), 1)
+
+    betas <- Map(matrix, Map(rnorm, dimX * dimF), dimX)
+    Omegas <- Map(diag, dimX)
+
+    grad2_betas <- Map(array, 0, Map(dim, betas))
+    grad2_Omegas <- Map(array, 0, Map(dim, Omegas))
+
+    # Keep track of the last loss to accumulate loss difference sign changes
+    # indicating optimization instabilities as a sign to stop
+    last_loss <- Inf
+    accum_sign <- 1
+
+    # non improving iteration counter
+    non_improving <- 0L
+
+    # technical access points to dynamicaly access a multi-dimensional array
+    # with its last index
+    `X[..., i]` <- slice.expr(X, sample.axis, index = i)
+    `F[..., i]` <- slice.expr(F, sample.axis, index = i, drop = FALSE)
+    # the next expression if accessing the precomputed `mlm(F, betas)`
+    `BF[..., i]` <- slice.expr(BF, sample.axis, nr.axis = sample.axis, drop = FALSE) # BF[..., i]
+
+
+    # Iterate till a break condition triggers or till max. nr. of iterations
+    for (iter in seq_len(max.iter)) {
+
+        grad_betas <- Map(matrix, 0, dimX, dimF)
+        Omega <- Reduce(kronecker, rev(Omegas))
+
+        R2 <- array(0, dim = c(dimX, dimX))
+
+        # negative log-likelihood
+        loss <- 0
+
+        BF <- mlm(F, betas)
+
+        for (i in seq_len(sample.size)) {
+            params_i <- Omega + diag(as.vector(eval(`BF[..., i]`)))
+            m2_i <- ising_m2(params_i)
+
+            # accumulate loss
+            x_i <- as.vector(eval(`X[..., i]`))
+            loss <- loss - (sum(x_i * (params_i %*% x_i)) + log(attr(m2_i, "prob_0")))
+
+            R2_i <- tcrossprod(x_i) - m2_i
+            R1_i <- diag(R2_i)
+            dim(R1_i) <- dimX
+
+            for (j in modes) {
+                grad_betas[[j]] <- grad_betas[[j]] +
+                    mcrossprod(
+                        R1_i, mlm(eval(`F[..., i]`), betas[-j], modes[-j]), j,
+                        dimB = ifelse(j != modes, dimX, dimF)
+                    )
+            }
+            R2 <- R2 + as.vector(R2_i)
+        }
+
+        grad_Omegas <- Map(function(j) {
+            grad <- mlm(kronperm(R2), Map(as.vector, Omegas[-j]), modes[-j], transposed = TRUE)
+            dim(grad) <- dim(Omegas[[j]])
+            grad
+        }, modes)
+
+
+        # update and accumulate alternating loss
+        accum_sign <- sign(last_loss - loss) - accum_sign
+        # check if accumulated alternating signs exceed stopping threshold
+        if (abs(accum_sign) > zig.zag.threashold) { break }
+        # increment non improving counter if thats the case
+        if (!(loss < last_loss)) {
+            non_improving <- non_improving + 1L
+        } else {
+            non_improving <- 0L
+        }
+        if (non_improving > patience) { break }
+
+        # store current loss for the next iteration
+        last_loss <- loss
+
+        # Accumulate root mean squared gradiends
+        grad2_betas  <- Map(function(g2, g) 0.9 * g2 + 0.1 * (g * g),
+            grad2_betas, grad_betas)
+        grad2_Omegas <- Map(function(g2, g) 0.9 * g2 + 0.1 * (g * g),
+            grad2_Omegas, grad_Omegas)
+
+        # logging (before parameter update)
+        logger(iter, loss, betas, Omegas, grad_betas, grad_Omegas)
+
+        # gradualy decrease the step size
+        step <- if (is.function(step.size)) step.size(iter) else step.size
+
+        # Update Parameters
+        betas <- Map(function(beta, grad, m2) {
+            beta + (step / (sqrt(m2) + eps)) * grad
+        }, betas, grad_betas, grad2_betas)
+        Omegas <- Map(function(Omega, grad, m2) {
+            Omega + (step / (sqrt(m2) + eps)) * grad
+        }, Omegas, grad_Omegas, grad2_Omegas)
+    }
+
+    list(betas = betas, Omegas = Omegas)
+}
--- a/tensorPredictors/R/gmlm_tensor_normal.R
+++ b/tensorPredictors/R/gmlm_tensor_normal.R
@ -0,0 +1,357 @@
+
+# p <- 5
+# A <- matrix(rnorm(p^2), p)
+
+# mat.proj("TriDiag", p)(A)
+# mat.proj("SymTriDiag", p)(A)
+
+# A
+# (AA <- mat.proj("PSD", p)(A))
+# mat.proj("PSD", p)(AA)
+
+
+# p <- 5
+# A <- matrix(rnorm(p^2), p)
+
+# projection <- function(T2) {
+#     P <- pinv(T2) %*% T2
+#     function(A) {
+#         matrix(P %*% as.vector(A), nrow(A))
+#     }
+# }
+
+
+# # All equal diagonal matrix, A = a * I_p
+# T2 <- t(as.vector(diag(p)))
+# proj <- projection(T2)
+
+# print.table( diag(mean(diag(A)), nrow(A), ncol(A))          , zero.print = ".")
+# print.table( matrix((pinv(T2) %*% T2) %*% as.vector(A), p)  , zero.print = ".")
+# print.table( proj(A)                                        , zero.print = ".")
+
+
+# # Diagonal matrix, A = diag(a_1, ..., a_p)
+# T2 <- matrix(seq_len(p^2), p)
+# T2 <- outer(diag(T2), as.vector(T2), `==`)
+# storage.mode(T2) <- "double"
+# proj <- projection(T2)
+
+# print.table( T2 , zero.print = ".")
+
+# print.table( diag(diag(A))                                  , zero.print = ".")
+# print.table( proj(A)                                        , zero.print = ".")
+
+
+# # Tri-Diagonal Matrix
+# T2 <- matrix(seq_len(p^2), p)
+# T2 <- outer(T2[abs(row(A) - col(A)) <= 1], as.vector(T2), `==`)
+# storage.mode(T2) <- "double"
+
+# triDiag.mask <- (abs(row(A) - col(A)) <= 1)
+# storage.mode(triDiag.mask) <- "double"
+
+# print.table( T2 , zero.print = ".")
+# print.table( triDiag.mask , zero.print = ".")
+
+# print.table( A * triDiag.mask                               , zero.print = ".")
+# print.table( matrix((pinv(T2) %*% T2) %*% as.vector(A), p)  , zero.print = ".")
+
+
+
+# # All equal main and off diagonals
+# T2 <- Reduce(rbind, Map(function(i) {
+#     as.double(row(A) == i + col(A))
+# }, (1 - p):(p - 1)))
+
+# print.table( T2 , zero.print = ".")
+
+# print.table( matrix((pinv(T2) %*% T2) %*% as.vector(A), p)  , zero.print = ".")
+
+
+# # Symmetric all equal main and off diagonals
+# T2 <- Reduce(rbind, Map(function(i) {
+#     as.double(abs(row(A) - col(A)) == i)
+# }, 0:(p - 1)))
+
+# print.table( T2 , zero.print = ".")
+
+# print.table( matrix((pinv(T2) %*% T2) %*% as.vector(A), p)  , zero.print = ".")
+
+
+# # Symetric Matrix
+# index <- matrix(seq_len(p^2), p)
+# T2 <- Reduce(rbind, Map(function(i) {
+#     e_i <- index == i
+#     as.double(e_i | t(e_i))
+# }, index[lower.tri(index, diag = TRUE)]))
+# proj <- projection(T2)
+
+# print.table( T2 , zero.print = ".")
+
+# print.table( 0.5 * (A + t(A))   , zero.print = ".")
+# print.table( proj(A)            , zero.print = ".")
+
+# proj(solve(A))
+# solve(proj(A))
+
+
+
+
+#' Specialized version of GMLM for the tensor normal model
+#'
+#' The underlying algorithm is an ``iterative (block) coordinate descent'' method
+#'
+#' @export
+gmlm_tensor_normal <- function(X, F, sample.axis = length(dim(X)),
+    max.iter = 100L, proj.betas = NULL, proj.Omegas = NULL, logger = NULL,       # TODO: proj.betas NOT USED!!!
+    cond.threshold = 25, eps = 1e-6
+) {
+    # rearrange `X`, `F` such that the last axis enumerates observations
+    if (!missing(sample.axis)) {
+        axis.perm <- c(seq_along(dim(X))[-sample.axis], sample.axis)
+        X <- aperm(X, axis.perm)
+        F <- aperm(F, axis.perm)
+        sample.axis <- length(dim(X))
+    }
+
+    # Get problem dimensions (observation dimensions)
+    dimX <- head(dim(X), -1)
+    dimF <- head(dim(F), -1)
+    modes <- seq_along(dimX)
+
+    # Ensure the Omega and beta projections lists are lists
+    if (!is.list(proj.Omegas)) {
+        proj.Omegas <- rep(NULL, length(modes))
+    }
+    if (!is.list(proj.betas)) {
+        proj.betas <- rep(NULL, length(modes))
+    }
+
+
+    ### Phase 1: Computing initial values
+    meanX <- rowMeans(X, dims = length(dimX))
+    meanF <- rowMeans(F, dims = length(dimF))
+
+    # center X and F
+    X <- X - as.vector(meanX)
+    F <- F - as.vector(meanF)
+
+    # initialize Omega estimates as mode-wise, unconditional covariance estimates
+    Sigmas <- Map(diag, dimX)
+    Omegas <- Map(diag, dimX)
+
+    # Per mode covariance directions
+    # Note: (the directions are transposed!)
+    dirsX <- Map(function(Sigma) {
+        SVD <- La.svd(Sigma, nu = 0)
+        sqrt(SVD$d) * SVD$vt
+    }, mcov(X, sample.axis, center = FALSE))
+    dirsF <- Map(function(Sigma) {
+        SVD <- La.svd(Sigma, nu = 0)
+        sqrt(SVD$d) * SVD$vt
+    }, mcov(F, sample.axis, center = FALSE))
+
+    # initialization of betas ``covariance direction mappings``
+    betas <- Map(function(dX, dF) {
+        s <- min(ncol(dX), nrow(dF))
+        crossprod(dX[1:s, , drop = FALSE], dF[1:s, , drop = FALSE])
+    }, dirsX, dirsF)
+
+    # Residuals
+    R <- X - mlm(F, Map(`%*%`, Sigmas, betas))
+
+    # Initial value of the log-likelihood (scaled and constants dropped)
+    loss <- mean(R * mlm(R, Omegas)) - sum(log(mapply(det, Omegas)) / dimX)
+
+    # invoke the logger
+    if (is.function(logger)) do.call(logger, list(
+        iter = 0L, betas = betas, Omegas = Omegas,
+        resid = R, loss = loss
+    ))
+
+
+    ### Phase 2: (Block) Coordinate Descent
+    for (iter in seq_len(max.iter)) {
+
+        # update every beta (in random order)
+        for (j in sample.int(length(betas))) {
+            FxB_j <- mlm(F, betas[-j], modes[-j])
+            FxSB_j <- mlm(FxB_j, Sigmas[-j], modes[-j])
+            betas[[j]] <- Omegas[[j]] %*% t(solve(mcrossprod(FxSB_j, FxB_j, j), mcrossprod(FxB_j, X, j)))
+            # Project `betas` onto their manifold
+            if (is.function(proj_j <- proj.betas[[j]])) {
+                betas[[j]] <- proj_j(betas[[j]])
+            }
+        }
+
+        # Residuals
+        R <- X - mlm(F, Map(`%*%`, Sigmas, betas))
+
+        # Covariance Estimates (moment based, TODO: implement MLE estimate!)
+        Sigmas <- mcov(R, sample.axis, center = FALSE)
+
+        # Computing `Omega_j`s, the j'th mode presition matrices, in conjunction
+        # with regularization of the j'th mode covariance estimate `Sigma_j`
+        for (j in seq_along(Sigmas)) {
+            # Compute min and max eigen values
+            min_max <- range(eigen(Sigmas[[j]], TRUE, TRUE)$values)
+            # The condition is approximately `kappa(Sigmas[[j]]) > cond.threshold`
+            if (min_max[2] > cond.threshold * min_max[1]) {
+                Sigmas[[j]] <- Sigmas[[j]] + diag(0.2 * min_max[2], nrow(Sigmas[[j]]))
+            }
+            # Compute (unconstraint but regularized) Omega_j as covariance inverse
+            Omegas[[j]] <- solve(Sigmas[[j]])
+            # Project Omega_j to the Omega_j's manifold
+            if (is.function(proj_j <- proj.Omegas[[j]])) {
+                Omegas[[j]] <- proj_j(Omegas[[j]])
+                # Reverse computation of `Sigma_j` as inverse of `Omega_j`
+                # Order of projecting Omega_j and then recomputing Sigma_j is importent
+                Sigmas[[j]] <- solve(Omegas[[j]])
+            }
+        }
+
+        # store last loss and compute new value
+        loss.last <- loss
+        loss <- mean(R * mlm(R, Omegas)) - sum(log(mapply(det, Omegas)) / dimX)
+
+        # invoke the logger
+        if (is.function(logger)) do.call(logger, list(
+            iter = iter, betas = betas, Omegas = Omegas, resid = R, loss = loss
+        ))
+
+        # check the break consition
+        if (abs(loss.last - loss) < eps * loss.last) {
+            break
+        }
+    }
+
+
+    list(eta1 = meanX, betas = betas, Omegas = Omegas)
+}
+
+
+
+# #### DEBUGGING
+# library(tensorPredictors)
+
+# # setup dimensions
+# n <- 1e3
+# p <- c(5, 7, 3)
+# q <- c(2, 5, 3)
+
+# max.iter <- 10L
+
+# # create "true" GLM parameters
+# eta1 <- array(rnorm(prod(p)), dim = p)
+# betas <- Map(matrix, Map(rnorm, p * q), p)
+# Omegas <- Map(function(p_j) {
+#     solve(0.5^abs(outer(seq_len(p_j), seq_len(p_j), `-`)))
+# }, p)
+# true.params <- list(eta1 = eta1, betas = betas, Omegas = Omegas)
+
+# # compute tensor normal parameters from GMLM parameters
+# Sigmas <- Map(solve, Omegas)
+# mu <- mlm(eta1, Sigmas)
+
+# # sample some test data
+# sample.axis <- length(p) + 1L
+# F <- array(rnorm(n * prod(q)), dim = c(q, n))
+# X <- mlm(F, Map(`%*%`, Sigmas, betas)) + rtensornorm(n, mu, Sigmas, sample.axis)
+
+# # setup a logging callback
+# format <- sprintf("iter: %%3d, %s, Omega: %%8.2f, resid: %%8.3f\n",
+#     paste0("beta.", seq_along(betas), ": %.3f", collapse = ", ")
+# )
+# hist <- data.frame(
+#     iter = seq(0, max.iter),
+#     dist.B = numeric(max.iter + 1),
+#     dist.Omega = numeric(max.iter + 1),
+#     resid = numeric(max.iter + 1)
+# )
+# logger <- function(iter, betas, Omegas, resid) {
+#     dist.B <- dist.kron.norm(true.params$betas, betas)
+#     dist.Omega <- dist.kron.norm(true.params$Omegas, Omegas)
+#     resid <- mean(resid^2)
+
+#     hist[iter + 1, -1] <<- c(dist.B = dist.B, dist.Omega = dist.Omega, resid = resid)
+#     cat(do.call(sprintf, c(
+#         list(format, iter),
+#         Map(dist.subspace, betas, true.params$betas),
+#         dist.Omega, resid
+#     )))
+# }
+
+# # run the model
+# est.params <- gmlm_tensor_normal(X, F, max.iter = max.iter, logger = logger)
+
+# # run model with Omegas in sub-manifolds of SPD matrices
+# est.params <- gmlm_tensor_normal(X, F, max.iter = max.iter, logger = logger,
+#     proj.Omegas = list(
+#         NULL,
+#         (function(nrow, ncol) {
+#             triDiag.mask <- (abs(.row(c(nrow, ncol)) - .col(c(nrow, ncol))) <= 1)
+#             storage.mode(triDiag.mask) <- "double"
+#             function(A) { A * triDiag.mask }
+#         })(p[2], p[2]),
+#         function(A) diag(diag(A))
+#     ))
+
+
+
+# # # Profile the fitting routine without logging
+# # profvis::profvis({
+# #     est.params <- gmlm_tensor_normal(X, F, max.iter = max.iter)
+# # })
+
+# with(hist, {
+#     plot(range(iter), range(hist[, -1]), type = "n", log = "y")
+#     lines(iter, resid, col = "red")
+#     lines(iter, dist.B, col = "green")
+#     lines(iter, dist.Omega, col = "blue")
+#     legend("topright", legend = c("resid", "dist.B", "dist.Omega"), col = c("red", "green", "blue"), lty = 1)
+# })
+
+
+# # par(mfrow = c(1, 2))
+
+# # matrixImage(Reduce(kronecker, rev(est.params$Omegas)))
+# # matrixImage(Reduce(kronecker, rev(true.params$Omegas)))
+# # # matrixImage(Reduce(kronecker, rev(est.params$Omegas)) - Reduce(kronecker, rev(true.params$Omegas)))
+
+# # par(mfrow = c(1, 2))
+
+# # matrixImage(Reduce(kronecker, rev(true.params$betas)), main = "True", col = hcl.colors(48, palette = "Blue-Red 3"))
+# # matrixImage(Reduce(kronecker, rev(est.params$betas)), main = "Est", col = hcl.colors(48, palette = "Blue-Red 3"))
+
+
+# # # unlist(Map(kappa, mcov(X)))
+# # # unlist(Map(kappa, Omegas))
+# # # unlist(Map(kappa, Sigmas))
+
+
+# # # max(unlist(Map(function(C) max(eigen(C)$values), mcov(X))))
+# # # min(unlist(Map(function(C) min(eigen(C)$values), mcov(X))))
+# # # Map(function(C) eigen(C)$values, mcov(X))
+
+# # # kappa(Reduce(kronecker, mcov(X)))
+
+# # kappa1.fun <- function(Omegas) Map(kappa, Omegas)
+# # kappa2.fun <- function(Omegas) Map(kappa, Omegas, exact = TRUE)
+# # eigen1.fun <- function(Omegas) {
+# #     Map(function(Omega) {
+# #         min_max <- range(eigen(Omega)$values)
+# #         min_max[2] / min_max[1]
+# #     }, Omegas)
+# # }
+# # eigen2.fun <- function(Omegas) {
+# #     Map(function(Omega) {
+# #         min_max <- range(eigen(Omega, TRUE, TRUE)$values)
+# #         min_max[2] / min_max[1]
+# #     }, Omegas)
+# # }
+# # microbenchmark::microbenchmark(
+# #     kappa1.fun(Omegas),
+# #     kappa2.fun(Omegas),
+# #     eigen1.fun(Omegas),
+# #     eigen2.fun(Omegas)
+# # )
--- a/tensorPredictors/R/ising_m2.R
+++ b/tensorPredictors/R/ising_m2.R
@ -0,0 +1,18 @@
+#' @export
+ising_m2 <- function(
+    params, use_MC = NULL, nr_samples = 10000L,
+    warmup = 15L, nr_threads = 1L
+) {
+    if (missing(use_MC)) {
+        use_MC <- if (is.matrix(params)) 19 < nrow(params) else 190 < length(params)
+    }
+
+    m2 <- .Call("C_ising_m2", params, use_MC, nr_samples, warmup, nr_threads,
+            PACKAGE = "tensorPredictors"
+    )
+
+    M2 <- vech.pinv(m2)
+    attr(M2, "prob_0") <- attr(m2, "prob_0")
+
+    M2
+}
--- a/tensorPredictors/R/ising_sample.R
+++ b/tensorPredictors/R/ising_sample.R
@ -0,0 +1,11 @@
+#' Sample from the Ising model
+#'
+#' @param nr number of samples
+#' @param params Ising model parameters (numeric vector of size `p (p + 1) / 2`
+#'  for `p` dimensional random binary vectors)
+#' @param warmup Monte-Carlo chain length before retreaving a sample
+#'
+#' @export
+ising_sample <- function(nr, params, warmup = 15L) {
+    .Call("C_ising_sample", nr, params, warmup, PACKAGE = "tensorPredictors")
+}
--- a/tensorPredictors/R/make_gmlm_family.R
+++ b/tensorPredictors/R/make_gmlm_family.R
@ -124,6 +124,30 @@ make.gmlm.family <- function(name) {
                )
            }

+            # conditional covariance of the sufficient statistic
+            # Cov(t(X) | Y = y)
+            # Note: fy is a single observation!
+            cov.sufficient.stat <- function(fy, eta1, alphas, Omegas) {
+                Deltas <- Map(solve, Omegas)
+                E1 <- c(mlm(mlm(fy, alphas) + eta1, Deltas))
+
+                # H11 = Cov(vec X | Y = y)
+                H11 <- Reduce(`%x%`, rev(Deltas))
+
+                # H21 = Cov(vec X %x% vec X, vec X | Y = y)
+                H21 <- kronecker(E1, H11) + kronecker(H11, E1)
+
+                # H22 = Cov(vec X %x% vec X | Y = y)
+                H22 <- local({
+                    e1e1 <- tcrossprod(E1, E1)
+                    h22 <- outer(e1e1 + H11, H11) + outer(H11, e1e1)
+                    aperm(h22, c(1, 3, 2, 4)) + aperm(h22, c(1, 3, 4, 2))
+                })
+
+                # Combine into single covariance matrix
+                cbind(rbind(H11, H21), rbind(t(H21), mat(H22, 1:2)))
+            }
+
            # mean conditional Fisher Information
            fisher.info <- function(Fy, eta1, alphas, Omegas) {
                # retrieve dimensions
--- a/tensorPredictors/R/matricize.R
+++ b/tensorPredictors/R/matricize.R
@ -9,6 +9,15 @@
 #'  or tensor of dimensions \code{dims} iff \code{inv} is true.
 #'
 #' @examples
+#' stopifnot(all.equal(
+#'     mat(1:12, 2, dims = c(2, 3, 2)),
+#'     matrix(c(
+#'         1,  2,  7,  8,
+#'         3,  4,  9, 10,
+#'         5,  6, 11, 12
+#'     ), 3, 4, byrow = TRUE)
+#' ))
+#'
 #' A <- array(rnorm(2 * 3 * 5), dim = c(2, 3, 5))
 #' stopifnot(exprs = {
 #'     all.equal(A, mat(mat(A, 1), 1, dim(A), TRUE))
@ -22,15 +31,6 @@
 #'     all.equal(t(mat(A, 3)), mat(A, c(1, 2)))
 #' })
 #'
-#' stopifnot(all.equal(
-#'     mat(1:12, 2, dims = c(2, 3, 2)),
-#'     matrix(c(
-#'         1,  2,  7,  8,
-#'         3,  4,  9, 10,
-#'         5,  6, 11, 12
-#'     ), 3, 4, byrow = TRUE)
-#' ))
-#'
 #' @export
 mat <- function(T, modes, dims = dim(T), inv = FALSE) {
    modes <- as.integer(modes)
--- a/tensorPredictors/R/matrixImage.R
+++ b/tensorPredictors/R/matrixImage.R
@ -7,6 +7,8 @@
 #' @param sub sub-title of the plot
 #' @param interpolate a logical vector (or scalar) indicating whether to apply
 #'  linear interpolation to the image when drawing.
+#' @param new.plot Recreating the plot area (clearing the plot device). can be
+#'  used to update a plot but _not_ recreate it. Leads to smoother updating.
 #' @param ... further arguments passed to \code{\link{rasterImage}}
 #'
 #' @examples
@ -16,13 +18,15 @@
 #' @export
 matrixImage <- function(A, add.values = FALSE,
    main = NULL, sub = NULL, interpolate = FALSE, ..., zlim = NA,
-    axes = TRUE, asp = 1, col = hcl.colors(24, "YlOrRd", rev = FALSE),
-    digits = getOption("digits")
+    axes = TRUE, asp = 1, col = hcl.colors(24, "Blue-Red 3", rev = FALSE),
+    digits = getOption("digits"), new.plot = TRUE
 ) {
    # plot raster image
-    plot(c(0, ncol(A)), c(0, nrow(A)), type = "n", bty = "n", col = "black",
-        xlab = "", ylab = "", xaxt = "n", yaxt = "n", main = main, sub = sub,
-        asp = asp)
+    if (new.plot) {
+        plot(c(0, ncol(A)), c(0, nrow(A)), type = "n", bty = "n", col = "black",
+            xlab = "", ylab = "", xaxt = "n", yaxt = "n", main = main, sub = sub,
+            asp = asp)
+    }

    # Scale values of `A` to [0, 1] with min mapped to 1 and max to 0.
    if (missing(zlim)) {
@ -41,7 +45,7 @@ matrixImage <- function(A, add.values = FALSE,
    # X/Y axes index (matches coordinates to matrix indices)
    x <- seq(1, ncol(A), by = 1)
    y <- seq(1, nrow(A))
-    if (axes) {
+    if (axes && new.plot) {
        axis(1, at = x - 0.5, labels = x, lwd = 0, lwd.ticks = 1)
        axis(2, at = y - 0.5, labels = rev(y), lwd = 0, lwd.ticks = 1, las = 1)
    }
--- a/tensorPredictors/R/mcov.R
+++ b/tensorPredictors/R/mcov.R
@ -12,9 +12,13 @@
 #'
 #' @param X multi-dimensional array
 #' @param sample.axis observation axis index
+#' @param center logical, if `TRUE` (the default) compute the centered second
+#'  moment, that is, the mode-wise covariances. Otherwise, the raw second moment
+#'  of each mode is computed. This is specifically usefull in case `X` is
+#'  already centered.
 #'
 #' @export
-mcov <- function(X, sample.axis = 1L) {
+mcov <- function(X, sample.axis = length(dim(X)), center = TRUE) {
    # observation modes (axis indices)
    modes <- seq_along(dim(X))[-sample.axis]
    # observation dimensions
@ -26,16 +30,18 @@ mcov <- function(X, sample.axis = 1L) {
    if (sample.axis != r + 1L) {
        X <- aperm(X, c(modes, sample.axis))
    }
-    # centering: Z = X - E[X]
-    Z <- X - c(rowMeans(X, dims = r))
+    # centering: X <- X - E[X]
+    if (center) {
+        X <- X - c(rowMeans(X, dims = r))
+    }

    # estimes (unscaled) covariances for each mode
-    Sigmas <- .mapply(mcrossprod, list(mode = seq_len(r)), MoreArgs = list(Z))
+    Sigmas <- .mapply(mcrossprod, list(mode = seq_len(r)), MoreArgs = list(X))
    # scale by per mode "sample" size
    Sigmas <- .mapply(`*`, list(Sigmas, p / prod(dim(X))), NULL)

    # estimate trace of Kronecker product of covariances
-    tr.est <- prod(p) * mean(Z^2)
+    tr.est <- prod(p) * mean(X^2)

    # as well as the current trace of the unscaled covariances
    tr.Sigmas <- prod(unlist(.mapply(function(S) sum(diag(S)), list(Sigmas), NULL)))
--- a/tensorPredictors/R/mcrossprod.R
+++ b/tensorPredictors/R/mcrossprod.R
@ -44,16 +44,20 @@
 #' ))
 #'
 #' @export
-mcrossprod <- function(A, B, mode) {
+mcrossprod <- function(A, B, mode, dimA = dim(A), dimB = dim(B)) {
    storage.mode(A) <- "double"
-    if (is.null(dim(A))) {
+    if (!missing(dimA)) {
+        dim(A) <- dimA
+    } else if (is.null(dim(A))) {
        dim(A) <- length(A)
    }
    if (missing(B)) {
        .Call("C_mcrossprod_sym", A, as.integer(mode))
    } else {
        storage.mode(B) <- "double"
-        if (is.null(dim(B))) {
+        if (!missing(dimB)) {
+            dim(B) <- dimB
+        } else if (is.null(dim(B))) {
            dim(B) <- length(B)
        }
        .Call("C_mcrossprod", A, B, as.integer(mode))
--- a/tensorPredictors/R/mlm.R
+++ b/tensorPredictors/R/mlm.R
@ -75,23 +75,63 @@
 #' # (X x {A1, A2, A3, A4}) x {B1, B2, B3, B4} = X x {B1 A1, B2 A2, B3 A3, B4 A4}
 #' all.equal(mlm(mlm(X, As), Bs), mlm(X, Map(`%*%`, Bs, As)))
 #'
+#' # Equivalent to
+#' mlm_reference <- function(A, Bs, modes = seq_along(Bs), transposed = FALSE) {
+#'     # Collect all matrices in `B`
+#'     Bs <- if (is.matrix(Bs)) list(Bs) else Bs
+#'
+#'     # replicate transposition if of length one only
+#'     transposed <- if (length(transposed) == 1) {
+#'         rep(as.logical(transposed), length(Bs))
+#'     } else {
+#'         as.logical(transposed)
+#'     }
+#'
+#'     # iteratively apply Tensor Times Matrix multiplication over modes
+#'     for (i in seq_along(modes)) {
+#'         A <- ttm(A, Bs[[i]], modes[i], transposed[i])
+#'     }
+#'
+#'     # return result tensor
+#'     A
+#' }
+#'
 #' @export
 mlm <- function(A, Bs, modes = seq_along(Bs), transposed = FALSE) {
    # Collect all matrices in `B`
-    Bs <- if (is.matrix(Bs)) list(Bs) else Bs
+    Bs <- if (!is.list(Bs)) list(Bs) else Bs
+    # ensure all `B`s are matrices
+    Bs <- Map(as.matrix, Bs)

    # replicate transposition if of length one only
    transposed <- if (length(transposed) == 1) {
        rep(as.logical(transposed), length(Bs))
-    } else {
+    } else if (length(transposed) == length(modes)) {
        as.logical(transposed)
+    } else {
+        stop("Dim missmatch of param. `transposed`")
    }

-    # iteratively apply Tensor Times Matrix multiplication over modes
-    for (i in seq_along(modes)) {
-        A <- ttm(A, Bs[[i]], modes[i], transposed[i])
-    }
-
-    # return result tensor
-    A
+    .Call("C_mlm", A, Bs, as.integer(modes), transposed, PACKAGE = "tensorPredictors")
 }
+
+
+# # general usage
+# dimA <- c(3, 17, 19, 2)
+# dimC <- c(7, 11, 13, 5)
+# A <- array(rnorm(prod(dimA)), dim = dimA)
+# trans <- c(TRUE, FALSE, TRUE, FALSE)
+# Bs <- Map(function(p, q) matrix(rnorm(p * q), p, q), ifelse(trans, dimA, dimC), ifelse(trans, dimC, dimA))
+
+# C <- mlm(A, Bs, transposed = trans)
+# mlm(A, Bs[c(3, 2)], modes = c(3, 2), transposed = trans[c(3, 2)])
+
+# microbenchmark::microbenchmark(
+#     mlm(A, Bs, transposed = trans),
+#     mlm_reference(A, Bs, transposed = trans)
+# )
+
+# microbenchmark::microbenchmark(
+#     mlm(A, Bs[c(3, 2)], modes = c(3, 2), transposed = trans[c(3, 2)]),
+#     mlm_reference(A, Bs[c(3, 2)], modes = c(3, 2), transposed = trans[c(3, 2)])
+# )
--- a/tensorPredictors/R/patternMatrices.R
+++ b/tensorPredictors/R/patternMatrices.R
@ -1,14 +1,14 @@
 #' Duplication Matrix
 #'
-#' Matrix such that `vec(A) = D vech(A)` for `A` symmetric
+#' Matrix `D` such that `vec(A) = D vech(A)` for `A` symmetric
 #' @examples
 #' p <- 8
 #' A <- matrix(rnorm(p^2), p, p)
 #' A <- A + t(A)
-#' stopifnot(all.equal(c(D(nrow(A)) %*% vech(A)), c(A)))
+#' stopifnot(all.equal(c(Dup(nrow(A)) %*% vech(A)), c(A)))
 #'
 #' @export
-D <- function(p) {
+Dup <- function(p) {
    # setup `vec` and `vech` element indices (zero indexed)
    vec <- matrix(NA_integer_, p, p)
    vec[lower.tri(vec, diag = TRUE)] <- seq_len(p * (p + 1) / 2)
@ -27,10 +27,10 @@ D <- function(p) {
 #'
 #' @examples
 #' p <- 5
-#' stopifnot(all.equal(D(p) %*% D.pinv(p), N(p)))
+#' stopifnot(all.equal(Dup(p) %*% Dup.pinv(p), N(p)))
 #'
 #' @export
-D.pinv <- function(p) {
+Dup.pinv <- function(p) {
    Dp <- D(p)
    solve(crossprod(Dp), t(Dp))
 }
@ -119,7 +119,7 @@ K <- function(dim, mode) {
 #'
 #' @examples
 #' p <- 7
-#' stopifnot(all.equal(N(p), D(p) %*% D.pinv(p)))
+#' stopifnot(all.equal(N(p), Dup(p) %*% Dup.pinv(p)))
 #'
 #' @export
 N <- function(p) {
--- a/tensorPredictors/R/ttm.R
+++ b/tensorPredictors/R/ttm.R
@ -11,6 +11,34 @@
 #'  \code{mode} dimension equal to \code{nrow(M)} or \code{ncol(M)} if
 #'  \code{transposed} is true.
 #'
+#' @examples
+#' for (mode in 1:4) {
+#'     dimA <- sample.int(10, 4, replace = TRUE)
+#'     A <- array(rnorm(prod(dimA)), dim = dimA)
+#'     nrowB <- sample.int(10, 1)
+#'     B <- matrix(rnorm(nrowB * dimA[mode]), nrowB)
+#'
+#'     C <- ttm(A, B, mode)
+#'
+#'     dimC <- ifelse(seq_along(dims) != mode, dimA, nrowB)
+#'     C.ref <- mat(B %*% mat(A, mode), mode, dims = dimC, inv = TRUE)
+#'
+#'     stopifnot(all.equal(C, C.ref))
+#' }
+#'
+#' for (mode in 1:4) {
+#'     dimA <- sample.int(10, 4, replace = TRUE)
+#'     A <- array(rnorm(prod(dimA)), dim = dimA)
+#'     ncolB <- sample.int(10, 1)
+#'     B <- matrix(rnorm(dimA[mode] * ncolB), dimA[mode])
+#'
+#'     C <- ttm(A, B, mode, transposed = TRUE)
+#'
+#'     C.ref <- ttm(A, t(B), mode)
+#'
+#'     stopifnot(all.equal(C, C.ref))
+#' }
+#'
 #' @export
 ttm <- function(T, M, mode = length(dim(T)), transposed = FALSE) {
    storage.mode(T) <- storage.mode(M) <- "double"
--- a/tensorPredictors/R/vech.R
+++ b/tensorPredictors/R/vech.R
@ -31,3 +31,52 @@ vech.pinv <- function(a) {
    # de-vectorized matrix
    matrix(a[vech.pinv.index(p)], p, p)
 }
+
+
+# vech <- function(A) {
+#     stopifnot(all(nrow(A) == dim(A)))
+
+#     axis <- seq_len(nrow(A))
+#     grid <- expand.grid(c(list(rev(axis)), rep(list(axis), length(dim(A)) - 1)))
+
+#     A[rowSums(grid - 1) < nrow(A)]
+# }
+
+# p <- 4
+# X <- matrix(rnorm(p^2), p)
+
+# vech(outer(X, X))
+
+# vech(outer(c(X), c(X)))
+
+# sort(unique(c(sym(outer(X, X)))))
+# sort(vech(sym(outer(X, X))))
+
+# # (I <- matrix(c(
+# #     1, 1, 1,
+# #     2, 1, 1,
+# #     3, 1, 1,
+# #     2, 2, 1,
+# #     3, 2, 1,
+# #     3, 3, 1,
+# #     2, 1, 2,
+# #     3, 1, 2,
+# #     3, 2, 2,
+# #     3, 1, 3
+# # ), ncol = 3, byrow = TRUE))
+
+# # ((I - 1) %*% nrow(A)^(seq_along(dim(A)) - 1) + 1)
+
+# # p <- 3
+# # ord <- 4
+# # A <- array(seq_len(p^ord), rep(p, ord))
+
+# # axis <- seq_len(nrow(A))
+# # grid <- expand.grid(c(list(rev(axis)), rep(list(axis), length(dim(A)) - 1)))
+# # array(rowSums(grid - 1), dim(A))
+
+# # A[rowSums(grid - 1) < nrow(A)]
+
+
+
+# # apply(indices, 1, function(i) do.call(`[`, c(list(A), as.list(i))))
--- a/tensorPredictors/src/Makevars
+++ b/tensorPredictors/src/Makevars
@ -1,2 +1,3 @@

-PKG_LIBS = $(BLAS_LIBS) $(FLIBS)
+PKG_LIBS = $(BLAS_LIBS) $(LAPACK_LIBS) $(FLIBS)
+# PKG_CFLAGS = -pg
--- a/tensorPredictors/src/R_api.h
+++ b/tensorPredictors/src/R_api.h
@ -0,0 +1,138 @@
+#ifndef INCLUDE_GUARD_R_API_H
+#define INCLUDE_GUARD_R_API_H
+
+// The need for `USE_FC_LEN_T` and `FCONE` is due to a Fortran character string
+// to C incompatibility. See: Writing R Extentions: 6.6.1 Fortran character strings
+#define USE_FC_LEN_T
+// Disables remapping of R API functions from `Rf_<name>` or `R_<name>`
+#define R_NO_REMAP
+
+#include <stdint.h> // uint32_t, uint64_t, ...
+
+#include <R.h>
+#include <Rinternals.h>
+#include <R_ext/BLAS.h>
+#include <R_ext/Lapack.h>
+
+#ifndef FCONE
+#define FCONE
+#endif
+
+// NULL pointer (not defined memory, array, vector, ...)
+#ifndef NULL
+#define NULL ((void*)0)
+#endif
+
+// Truth values
+#ifndef FALSE
+#define FALSE 0
+#endif
+#ifndef TRUE
+#define TRUE 1
+#endif
+
+// Convenience convertion function similar to `asInteger`, `asReal`, ...
+#define NA_UNSIGNED (-((unsigned long)1))
+static inline unsigned long asUnsigned(SEXP _val) {
+    int val = Rf_asInteger(_val);
+    if (val == NA_INTEGER || val < 0) {
+        return NA_UNSIGNED;
+    }
+    return (unsigned long)val;
+}
+
+
+// Remap BLAS and LAPACK bindings (being consistent with my own interface and I
+// don't like to pass scalars by reference (memory address))
+/** y <- a x + y */
+static inline void axpy(
+    const int dim,
+    const double a,
+    const double* x, const int incx,
+          double* y, const int incy
+) {
+    F77_CALL(daxpy)(&dim, &a, x, &incx, y, &incy);
+}
+
+/** Scale a 1d array `x <- a x` */
+static inline void scale(
+    const int dim,
+    const double a,
+          double *x, const int incx
+) {
+    F77_CALL(dscal)(&dim, &a, x, &incx);
+}
+
+/** Dot product */
+static inline double dot(
+    const int dim,
+    const double* x, const int incx,
+    const double* y, const int incy
+) {
+    return F77_CALL(ddot)(&dim, x, &incx, y, &incy);
+}
+
+/** 1d array linear combination `z <- a x + b y` */
+// TODO: optimize! (iff needed?, to optimize of at all?)
+static inline void lincomb(
+    const int dim,
+    const double a, const double* x, const int incx,
+    const double b, const double* y, const int incy,
+                          double* z, const int incz
+) {
+    for (int i = 0; i < dim; ++i) {
+        z[i * incz] = a * x[i * incx] + b * y[i * incy];
+    }
+}
+
+
+/**
+ * A sufficient Pseudo-Random-Number-Generators (PRNG) of the Xorshift family
+ *
+ * With parameterized seed/state this custom PRGN can be used in a thread save!
+ *
+ * For single threaded operations the PRNG provided by `R` are prefered. But they
+ * are _not_ thread save. The following is a simple PRNG usable in a multi-threaded
+ * application.
+ *
+ * See TODO: ...https://en.wikipedia.org/wiki/Xorshift
+ *              SchachHoernchen
+ */
+static inline uint64_t rot64(uint64_t val, int shift) {
+    return (val << shift) | (val >> (64 - shift));
+}
+
+// (internal) PRGN state/seed type
+typedef uint64_t rng_seed_t[4];
+
+// Hookup the PRNG via its seed to R's random number generation utilities
+static inline void init_seed(rng_seed_t seed) {
+    GetRNGstate();
+    for (size_t i = 0; i < 4; ++i) {
+        seed[i] = 0;
+        for (size_t j = 0; j < 64; ++j) {
+            seed[i] |= ((uint64_t)(unif_rand() < 0.5)) << j;
+        }
+    }
+    PutRNGstate();
+}
+
+// PRNG of the Xorshift family
+// The least significant 32 bits are not reliable, use most significant 32 bits
+static inline uint64_t rand_u64(rng_seed_t seed) {
+    uint64_t e = seed[0] - rot64(seed[1], 7);
+    seed[0] = seed[1] ^ rot64(seed[1], 13);
+    seed[1] = seed[2] + rot64(seed[3], 37);
+    seed[2] = seed[3] + e;
+    seed[3] = e + seed[0];
+    return seed[3];
+}
+
+// With external supplied seed, every thread can have its own seed and as such
+// we can use this as a thread save alternative to R's `unif_rand()`.
+static inline double unif_rand_thrd(rng_seed_t seed) {
+    return ((double)(rand_u64(seed) >> 32)) / (double)(-(uint32_t)1);
+}
+
+
+#endif /* INCLUDE_GUARD_R_API_H */
--- a/tensorPredictors/src/bit_utils.h
+++ b/tensorPredictors/src/bit_utils.h
@ -0,0 +1,185 @@
+#ifndef INCLUDE_GUARD_BIT_UTILS_H
+#define INCLUDE_GUARD_BIT_UTILS_H
+
+#include <stdint.h> // uint32_t, uint64_t
+
+#if (defined(__GNUC__) && defined(__BMI2__))
+    #include <x86intrin.h>
+    #include <bmi2intrin.h>     // _pdep_u32
+#endif
+
+#ifdef _MSC_VER
+    #include <intrin.h>         // _BitScanReverse
+#endif
+
+/**
+ * Computes the parity of a word (0 for even bit count and 1 otherwise)
+ */
+#ifdef __GNUC__
+static inline int bitParity32(uint32_t x) { return __builtin_parity(x); }
+static inline int bitParity64(uint64_t x) { return __builtin_parityll(x); }
+#else
+static inline int bitParity32(uint32_t x) {
+    int p = (x != 0);
+    while (x &= x - 1) { p = !p; }
+    return p;
+}
+static inline int bitParity64(uint64_t x) {
+    int p = (x != 0);
+    while (x &= x - 1) { p = !p; }
+    return p;
+}
+#endif
+
+/**
+ * Counts the number of set bits (`1`s in binary) in the number `x`
+ */
+#ifdef __GNUC__                                   /* POPulation COUNT */
+static inline int bitCount32(uint32_t x) { return __builtin_popcount(x); }
+static inline int bitCount64(uint64_t x) { return __builtin_popcountll(x); }
+#else
+static inline int bitCount32(uint32_t x) {
+    int count = 0; // counts set bits
+    for (; x; count++) { x &= x - 1; } // increment count until there are no bits set in x
+    return count;
+}
+static inline int bitCount64(uint64_t x) {
+    int count = 0; // counts set bits
+    for (; x; count++) { x &= x - 1; } // increment count until there are no bits set in x
+    return count;
+}
+#endif
+
+/**
+ * Gets the index of the LSB (least significant bit)
+ *
+ * @condition `x != 0`, for `x == 0` undefined behaviour
+ */
+#ifdef __GNUC__
+static inline int bitScanLS32(uint32_t x) { return __builtin_ctz(x); } // Count Trailing Zeros
+static inline int bitScanLS64(uint64_t x) { return __builtin_ctzll(x); }
+#elif _MSC_VER
+static inline int bitScanLS32(uint32_t x) {
+    unsigned long bsr;
+    _BitScanReverse(&bsr, x);
+    return 31 - bsr;
+}
+static inline int bitScanLS64(uint64_t x) {
+    unsigned long bsr;
+    _BitScanReverse64(&bsr, x);
+    return 63 - bsr;
+}
+#else
+static inline int bitScanLS32(uint32_t x) {
+    int ctz = 0; // result storing the Count of Trailing Zeros
+    bool empty;  // boolean variable storing if a bit has not found (search area is empty)
+
+    // logarithmic search for LSB bit index (-1)
+    ctz += (empty = !(x & (uint32_t)(65535))) << 4;
+    x >>= 16 * empty;
+    ctz += (empty = !(x & (uint32_t)(  255))) << 3;
+    x >>=  8 * empty;
+    ctz += (empty = !(x & (uint32_t)(   15))) << 2;
+    x >>=  4 * empty;
+    ctz += (empty = !(x & (uint32_t)(    3))) << 1;
+    x >>=  2 * empty;
+    ctz += (empty = !(x & (uint32_t)(    1)));
+
+    return ctz;
+}
+static inline int bitScanLS64(uint64_t x) {
+    int ctz = 0;
+    bool empty;
+
+    // logarithmic search for LSB bit index (-1)
+    ctz += (empty = !(x & (uint64_t)(4294967295))) << 5;
+    x >>= 32 * empty;
+    ctz += (empty = !(x & (uint64_t)(     65535))) << 4;
+    x >>= 16 * empty;
+    ctz += (empty = !(x & (uint64_t)(       255))) << 3;
+    x >>=  8 * empty;
+    ctz += (empty = !(x & (uint64_t)(        15))) << 2;
+    x >>=  4 * empty;
+    ctz += (empty = !(x & (uint64_t)(         3))) << 1;
+    x >>=  2 * empty;
+    ctz += (empty = !(x & (uint64_t)(         1)));
+
+    return ctz;
+}
+#endif
+
+/**
+ * Parallel DEPosit (aka PDEP)
+ *
+ * Writes the `val` bits into the positions of the set bits in `mask`.
+ *
+ * Example:
+ *  val:   **** **** **** 1.1.
+ * mask:   1... 1... 1... 1...
+ *  res:   1... .... 1... ....
+ */
+#if (defined(__GNUC__) && defined(__BMI2__))
+static inline uint32_t bitDeposit32(uint32_t val, uint32_t mask) {
+    return _pdep_u32(val, mask);
+}
+static inline uint64_t bitDeposit64(uint64_t val, uint64_t mask) {
+    return _pdep_u64(val, mask);
+}
+#else
+static inline uint32_t bitDeposit32(uint32_t val, uint32_t mask) {
+    uint32_t res = 0;
+    for (uint32_t pos = 1; mask; pos <<= 1) {
+        if (val & pos) {
+            res |= mask & -mask;
+        }
+        mask &= mask - 1;
+    }
+    return res;
+}
+static inline uint64_t bitDeposit64(uint64_t val, uint64_t mask) {
+    uint64_t res = 0;
+    for (uint64_t pos = 1; mask; pos <<= 1) {
+        if (val & pos) {
+            res |= mask & -mask;
+        }
+        mask &= mask - 1;
+    }
+    return res;
+}
+#endif
+
+/**
+ * Gets the next lexicographically ordered permutation of an n-bit word.
+ * 
+ * Let `val` be a bit-word with `n` bits set, then this procedire computes a
+ * `n` bit word wich is the next element in the lexicographically ordered
+ * sequence of `n` bit words. For example
+ * 
+ *           val -> bitNextPerm(val)
+ *      00010011 -> 00010101
+ *      00010101 -> 00010110
+ *      00010110 -> 00011001
+ *      00011001 -> 00011010
+ *      00011010 -> 00011100
+ *      00011100 -> 00100011
+ *
+ * @condition `x != 0`, for `x == 0` undefined behaviour due to `bitScanLS`
+ *
+ * see: https://graphics.stanford.edu/~seander/bithacks.html#NextBitPermutation
+ */
+static inline uint32_t bitNextPerm32(uint32_t val) {
+    // Sets all least significant 0-bits of val to 1
+    uint32_t t = val | (val - 1);
+    // Next set to 1 the most significant bit to change,
+    // set to 0 the least significant ones, and add the necessary 1 bits.
+    return (t + 1) | (((~t & -~t) - 1) >> (bitScanLS32(val) + 1));
+}
+static inline uint64_t bitNextPerm64(uint64_t val) {
+    // Sets all least significant 0-bits of val to 1
+    uint64_t t = val | (val - 1);
+    // Next set to 1 the most significant bit to change,
+    // set to 0 the least significant ones, and add the necessary 1 bits.
+    return (t + 1) | (((~t & -~t) - 1) >> (bitScanLS64(val) + 1));
+}
+
+#endif /* BIT_UTILS_INCLUDE_GUARD_H */
--- a/tensorPredictors/src/init.c
+++ b/tensorPredictors/src/init.c
@ -7,27 +7,96 @@
 // );

 /* Tensor Times Matrix a.k.a. Mode Product */
-extern SEXP ttm(SEXP A, SEXP X, SEXP mode, SEXP op);
+extern SEXP R_ttm(SEXP A, SEXP X, SEXP mode, SEXP op);
+
+/* Multi Linear Multiplication (iterated mode products) */
+extern SEXP R_mlm(SEXP A, SEXP Bs, SEXP modes, SEXP ops);

 /* Matrix Times Vectorized Kronecker product `A vec(B_1 %x% ... %x% B_r)` */
 extern SEXP mtvk(SEXP A, SEXP Bs);

 /* Tensor Mode Crossproduct `A_(m) B_(m)^T` */
-extern SEXP mcrossprod(SEXP A, SEXP B, SEXP mode);
+extern SEXP R_mcrossprod(SEXP A, SEXP B, SEXP mode);
 /* Symmetric Tensor Mode Crossproduct `A_(m) A_(m)^T` */
-extern SEXP mcrossprod_sym(SEXP A, SEXP mode);
+extern SEXP R_mcrossprod_sym(SEXP A, SEXP mode);
+
+// /* Higher Order PCA */
+// extern SEXP hopca(SEXP X);
+
+/* Singular Value Decomposition */
+extern SEXP R_svd(SEXP A);
+
+// /* Iterative Cyclic Updating for the Tensor Normal Distribution */
+// extern SEXP R_icu_tensor_normal(SEXP X, SEXP Fy, SEXP max_iter);
+
+// /* Generalized tensor normal using NAGD */
+// extern SEXP R_gmlm_tensor_normal(
+//     SEXP X, SEXP Fy,
+//     SEXP eta_bar, SEXP alphas, SEXP Omegas,
+//     SEXP max_iter, SEXP max_line_iter
+// );
+
+/* Solve linear equation system A X = B */
+extern SEXP R_solve(SEXP A, SEXP B);
+
+/* Determinant of a matrix */
+extern SEXP R_det(SEXP A);
+
+// /* Unscaled PMF of the Ising model with natural parameters `_params` */
+// extern SEXP R_unscaled_prob(SEXP _y, SEXP _params);
+
+// /* Exact computation of the partition function of the Ising model with natural
+//    parameters `_params` */
+// extern SEXP R_ising_partition_func_exact(SEXP _params);
+
+// /* Estimated partition function of the Ising model with natural parameters `_params` */
+// extern SEXP R_ising_partition_func_MC(SEXP _params);
+
+// /* Exact computation of the partition function of the Ising model with natural
+//    parameters `_params` */
+// extern SEXP R_ising_m2_exact(SEXP _params);
+
+// // extern SEXP R_ising_m2_MC64(SEXP _params);
+// extern SEXP R_ising_m2_MC(SEXP _params, SEXP _nr_samples, SEXP _warmup);
+// extern SEXP R_ising_m2_MC_thrd(SEXP _params, SEXP _nr_threads);
+
+/* Sample from the Ising model */
+extern SEXP R_ising_sample(SEXP, SEXP, SEXP);
+
+// Interface to the Ising second moment function
+extern SEXP R_ising_m2(SEXP, SEXP, SEXP, SEXP, SEXP);

 /* List of registered routines (a.k.a. C entry points) */
 static const R_CallMethodDef CallEntries[] = {
-    // {"FastPOI_C_sub", (DL_FUNC) &FastPOI_C_sub, 5},  // NOT USED
-    {"C_ttm",            (DL_FUNC) &ttm,            4},
-    {"C_mtvk",           (DL_FUNC) &mtvk,           2},
-    {"C_mcrossprod",     (DL_FUNC) &mcrossprod,     3},
-    {"C_mcrossprod_sym", (DL_FUNC) &mcrossprod_sym, 2},
+    {"C_ttm",                         (DL_FUNC) &R_ttm,                         4},
+    {"C_mlm",                         (DL_FUNC) &R_mlm,                         4},
+    {"C_mtvk",                        (DL_FUNC) &mtvk,                          2},
+    {"C_mcrossprod",                  (DL_FUNC) &R_mcrossprod,                  3},
+    {"C_mcrossprod_sym",              (DL_FUNC) &R_mcrossprod_sym,              2},
+    {"C_svd",                         (DL_FUNC) &R_svd,                         1},
+    {"C_solve",                       (DL_FUNC) &R_solve,                       2},
+    {"C_det",                         (DL_FUNC) &R_det,                         1},
+    {"C_ising_sample",                (DL_FUNC) &R_ising_sample,                3},
+    {"C_ising_m2",                    (DL_FUNC) &R_ising_m2,                    5},
+    // {"C_unscaled_prob",               (DL_FUNC) &R_unscaled_prob,               2},
+    // {"C_ising_partition_func_MC",     (DL_FUNC) &R_ising_partition_func_MC,     1},
+    // {"C_ising_partition_func_exact",  (DL_FUNC) &R_ising_partition_func_exact,  1},
+    // {"C_ising_m2_exact",   (DL_FUNC) &R_ising_m2_exact,   1},
+    // {"C_ising_m2_MC",      (DL_FUNC) &R_ising_m2_MC,      3},
+    // {"C_ising_m2_MC_thrd", (DL_FUNC) &R_ising_m2_MC_thrd, 2},
+    // {"C_ising_m2_MC64",    (DL_FUNC) &R_ising_m2_MC64,    1},
+    // {"FastPOI_C_sub",          (DL_FUNC) &FastPOI_C_sub,         5},  // NOT USED
+    // {"C_hopca",                      (DL_FUNC) &hopca,                  1},
+    // {"C_icu_tensor_normal",          (DL_FUNC) &R_icu_tensor_normal,    3},  // NOT USED / IN DEVELOPMENT
+    // {"C_gmlm_tensor_normal",         (DL_FUNC) &R_gmlm_tensor_normal,   7},  // NOT USED / IN DEVELOPMENT
    {NULL, NULL, 0}
 };

-/* Restrict C entry points to registered routines. */
+/**
+ * Restrict C entry points to registered routines.
+ *
+ * NOTE: Naming convention: `R_init_<PACKAGE-NAME>`
+ */
 void R_init_tensorPredictors(DllInfo *dll) {
    R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
    R_useDynamicSymbols(dll, FALSE);
--- a/tensorPredictors/src/int_utils.h
+++ b/tensorPredictors/src/int_utils.h
@ -0,0 +1,77 @@
+#ifndef INCLUDE_GUARD_INT_UTILS_H
+#define INCLUDE_GUARD_INT_UTILS_H
+
+#include <stdint.h> // uint32_t, uint64_t
+
+
+// /**
+//  * Integer logarithm, the biggest power `p` such that `2^p <= x`.
+//  */
+// static int ilog2(uint64_t x) {
+//     int log = 0;
+//     while (x >>= 1) {
+//         log++;
+//     }
+//     return log;
+// }
+
+
+/**
+ * Integer Square root of `y`, that is `ceiling(sqrt(y))`
+ */
+static uint64_t isqrt(uint64_t x) {
+    // implements a binary search
+    uint64_t root = 0;
+    uint64_t left = 0;          // left boundary
+    uint64_t right = x + 1;     // right boundary
+
+    while(left + 1UL < right) {
+        root = (left + right) / 2UL;
+        if (root * root <= x) {
+            left = root;
+        } else {
+            right = root;
+        }
+    }
+    return left;
+}
+
+
+/**
+ * Inverse to the triangular numbers
+ *
+ * Given a positive number `x = p (p + 1) / 2` it computes `p` if possible.
+ * In case there is no positive integer solution `0` is returned.
+ *
+ * Note: this follows immediately from the quadratic equation.
+ */
+static uint32_t invTriag(uint32_t x) {
+    uint64_t root = isqrt(8UL * (uint64_t)x + 1UL);
+    if (root * root != 8UL * x + 1UL) {
+        return 0;
+    }
+    return (root - 1) / 2;
+}
+
+
+// /**
+//  * Number of sub-sets (including empty set) of max-size
+//  *
+//  * It computes, with `binom` beeing the binomial coefficient, the following sum
+//  *
+//  *      sum_{i = 0}^k binom(n, i)
+//  */
+// static uint64_t nrSubSets(uint64_t n, uint64_t k) {
+//     uint64_t sum = 1, binom = 1;
+
+//     for (uint64_t i = 1; i <= k; ++i) {
+//         binom *= n--;
+//         binom /= i;
+//         sum += binom;
+//     }
+
+//     return sum;
+// }
+
+
+#endif /* INCLUDE_GUARD_INT_UTILS_H */
--- a/tensorPredictors/src/ising_MCMC.h
+++ b/tensorPredictors/src/ising_MCMC.h
@ -0,0 +1,181 @@
+#ifndef INCLUDE_GUARD_ISING_MCMC_H
+#define INCLUDE_GUARD_ISING_MCMC_H
+
+#include "R_api.h"
+
+/** Sample a single value from the Ising model via a Monte-Carlo Markov-Chain
+ * method given the Ising model parameters in compact half-vectorized form.
+ *
+ *      f(x) = p0(params) exp(vech(x x')' params)
+ *
+ * @important requires to be wrapped between `GetRNGstate()` and `PutRNGState()`
+ * calls. For example
+ *
+ * GetRNGstate()
+ *  for (size_t sample = 0; sample < nr_samples; ++sample) {
+ *      ising_mcmc_vech(warmup, dim, dim, params, &X[sample * dim]);
+ *  }
+ * PutRNGState()
+ */
+static inline void ising_mcmc_vech(
+    const size_t warmup,
+    const size_t dim, const double* params,
+    int* X
+) {
+    // Initialize elements `X_i ~ Bernoulli(P(X_i = 1 | X_-i = 0))`
+    for (size_t i = 0, I = 0; i < dim; I += dim - i++) {
+        double invProb = 1.0 + exp(-params[I]);
+        X[i] = unif_rand() * invProb < 1.0;
+    }
+
+    // Skip the first samples of the Markov-Chain (warmup)
+    for (size_t skip = 0; skip < warmup; ++skip) {
+        // For every component
+        for (size_t i = 0; i < dim; ++i) {
+            // Compute conditional probability `P(X_i = 1 | X_-i = x_-i)`
+            double log_odds = 0.0;
+            // Tracks position in half-vectorized storage
+            size_t J = i;
+            // Sum all log-odds for `j < i` (two way interactions)
+            for (size_t j = 0; j < i; J += dim - ++j) {
+                log_odds += X[j] ? params[J] : 0.0;
+            }
+            // Allways add log-odds for `i = j` (self-interaction)
+            log_odds += params[J];
+            // Continue adding log-odds for `j > i` (two way interactions)
+            J++;
+            for (size_t j = i + 1; j < dim; ++j, ++J) {
+                log_odds += X[j] ? params[J] : 0.0;
+            }
+            // Update `i`th element `X_i ~ Bernoulli(P(X_i = 1 | X_-i = x_-i))`
+            X[i] = unif_rand() * (1.0 + exp(-log_odds)) < 1.0;
+        }
+    }
+}
+
+// Thread save version of `ising_mcmc` (using thread save PRNG)
+static inline void ising_mcmc_vech_thrd(
+    const size_t warmup,
+    const size_t dim, const double* params,
+    int* X, rng_seed_t seed
+) {
+    // Initialize elements `X_i ~ Bernoulli(P(X_i = 1 | X_-i = 0))`
+    for (size_t i = 0, I = 0; i < dim; I += dim - i++) {
+        double invProb = 1.0 + exp(-params[I]);
+        X[i] = unif_rand_thrd(seed) * invProb < 1.0;
+    }
+
+    // Skip the first samples of the Markov-Chain (warmup)
+    for (size_t skip = 0; skip < warmup; ++skip) {
+        // For every component
+        for (size_t i = 0; i < dim; ++i) {
+            // Compute conditional probability `P(X_i = 1 | X_-i = x_-i)`
+            double log_odds = 0.0;
+            // Tracks position in half-vectorized storage
+            size_t J = i;
+            // Sum all log-odds for `j < i` (two way interactions)
+            for (size_t j = 0; j < i; J += dim - ++j) {
+                log_odds += X[j] ? params[J] : 0.0;
+            }
+            // Allways add log-odds for `i = j` (self-interaction)
+            log_odds += params[J];
+            // Continue adding log-odds for `j > i` (two way interactions)
+            J++;
+            for (size_t j = i + 1; j < dim; ++j, ++J) {
+                log_odds += X[j] ? params[J] : 0.0;
+            }
+            // Update `i`th element `X_i ~ Bernoulli(P(X_i = 1 | X_-i = x_-i))`
+            X[i] = unif_rand() * (1.0 + exp(-log_odds)) < 1.0;
+        }
+    }
+}
+
+/** Sample a single value from the Ising model via a Monte-Carlo Markov-Chain
+ * method given the Ising model parameters in symmetric matrix form `A`.
+ *
+ *      f(x) = prob_0(A) exp(x' A x)
+ *
+ * The relation to the natural parameters `params` as used in the half-vectorized
+ * version `ising_mcmc_vech()`
+ *
+ *      f(x) = prob_0(params) exp(vech(x x')' params)
+ *
+ * is illustrated via an example with `dim = 5`;
+ *
+ *          p0                             [ p0   p1/2 p2/2  p3/2  p4/2  ]
+ *          p1 p5                          [ p1/2 p5   p6/2  p7/2  p8/2  ]
+ * params = p2 p6 p9             =>    A = [ p2/2 p6/2 p9    p10/2 p11/2 ]
+ *          p3 p7 p10 p12                  [ p3/2 p7/2 p10/2 p12   p13/2 ]
+ *          p4 p8 p11 p13 p14              [ p4/2 p8/2 p11/2 p13/2 p14   ]
+ *
+ * or the other way arroung given the symmetric matrix form `A` converted to
+ * the natural parameters
+ *
+ *     [ a00 a10 a20 a30 a40 ]                   a00
+ *     [ a10 a11 a21 a31 a41 ]                   2*a10 a11
+ * A = [ a20 a21 a22 a32 a42 ]    =>    params = 2*a20 2*a21 a22
+ *     [ a30 a31 a32 a33 a43 ]                   2*a30 2*a31 2*a32 a33
+ *     [ a40 a41 a42 a43 a44 ]                   2*a40 2*a41 2*a42 2*a43 a44
+ *
+ */
+static inline void ising_mcmc_mat(
+    const size_t warmup,
+    const size_t dim, const size_t ldA, const double* A,
+    int* X
+) {
+    // Initialize elements `X_i ~ Bernoulli(P(X_i = 1 | X_-i = 0))`
+    for (size_t i = 0; i < dim; ++i) {
+        X[i] = unif_rand() * (1.0 + exp(-A[i * (ldA + 1)])) < 1.0;
+    }
+
+    // Skip the first samples of the Markov-Chain (warmup)
+    for (size_t skip = 0; skip < warmup; ++skip) {
+        // For every component
+        for (size_t i = 0; i < dim; ++i) {
+            // Compute conditional probability `P(X_i = 1 | X_-i = x_-i)`
+            double log_odds = 0.0;
+            for (size_t j = 0; j < i; ++j) {
+                log_odds += (2 * X[j]) * A[i * ldA + j];
+            }
+            log_odds += A[i * (ldA + 1)];
+            for (size_t j = i + 1; j < dim; ++j) {
+                log_odds += (2 * X[j]) * A[i * ldA + j];
+            }
+            // Update `i`th element `X_i ~ Bernoulli(P(X_i = 1 | X_-i = x_-i))`
+            X[i] = unif_rand() * (1.0 + exp(-log_odds)) < 1.0;
+        }
+    }
+}
+
+// thread save version of `ising_mcmc_mat()` (using thread save PRNG)
+static inline void ising_mcmc_mat_thrd(
+    const size_t warmup,
+    const size_t dim, const size_t ldA, const double* A,
+    int* X, rng_seed_t seed
+) {
+    // Initialize elements `X_i ~ Bernoulli(P(X_i = 1 | X_-i = 0))`
+    for (size_t i = 0; i < dim; ++i) {
+        X[i] = unif_rand_thrd(seed) * (1.0 + exp(-A[i * (ldA + 1)])) < 1.0;
+    }
+
+    // Skip the first samples of the Markov-Chain (warmup)
+    for (size_t skip = 0; skip < warmup; ++skip) {
+        // For every component
+        for (size_t i = 0; i < dim; ++i) {
+            // Compute conditional probability `P(X_i = 1 | X_-i = x_-i)`
+            double log_odds = 0.0;
+            for (size_t j = 0; j < i; ++j) {
+                log_odds += (2 * X[j]) * A[i * ldA + j];
+            }
+            log_odds += A[i * (ldA + 1)];
+            for (size_t j = i + 1; j < dim; ++j) {
+                log_odds += (2 * X[j]) * A[i * ldA + j];
+            }
+            // Update `i`th element `X_i ~ Bernoulli(P(X_i = 1 | X_-i = x_-i))`
+            X[i] = unif_rand_thrd(seed) * (1.0 + exp(-log_odds)) < 1.0;
+        }
+    }
+}
+
+
+#endif /* INCLUDE_GUARD_ISING_MCMC_H */
--- a/tensorPredictors/src/ising_m2.c
+++ b/tensorPredictors/src/ising_m2.c
@ -0,0 +1,401 @@
+/** Methods computing or estimating the second moment of the Ising model given
+ * the natural parameters of the p.m.f. in exponential family form. That is
+ *      f(x) = p0(params) exp(vech(x x')' params)
+ * where `params` are the natural parameters. The scaling constant `p0(params)`
+ * is also the zero event `x = (0, ..., 0)` probability.
+ *
+ * Three different version are provided.
+ * - exact: Exact method computing the true second moment `E[X X']` given the
+ *      parameters `params`. This method has a runtime of `O(2^dim)` where
+ *      `dim` is the dimension of the random variable `X`. This means it is
+ *      infeasible for bigger (like 25 and above) values if `dim`
+ * - MC: Monte-Carlo method used in case the exact method is not feasible.
+ * - MC Thrd: Multi-Threaded Monte-Carlo method.
+ *
+ * The natural parameters `params` of the Ising model is a `dim (dim + 1) / 2`
+ * dimensional numeric vector containing the log-odds. The indexing schema into
+ * the parameters corresponding to the log-odds of single or two way interactions
+ * with indicex `i, j` are according to the half-vectorization schema
+ *
+ *  i = 4'th row                  => Symmetry                       => Half-Vectorized Storage
+ *
+ * [ *    *    *    *    *    * ]    [ *    *    *    *    *    * ]   [ *    *    *   15    *    * ]
+ * [ *    *    *    *    *    * ]    [ *    *    *    *    *    * ]   [ *    *   12   16    * ]
+ * [ *    *    *    *    *    * ]    [ *    *    *    *    *    * ]   [ *    8    *   17 ]
+ * [ 3    8   12   15   16   17 ]    [ 3    8   12   15    *    * ]   [ 3    *    * ]
+ * [ *    *    *    *    *    * ]    [ *    *    *   16    *    * ]   [ *    * ]
+ * [ *    *    *    *    *    * ]    [ *    *    *   17    *    * ]   [ * ]
+ *
+ */
+#include "R_api.h"
+#include "bit_utils.h"
+#include "int_utils.h"
+#include "ising_MCMC.h"
+
+#ifndef __STDC_NO_THREADS__
+    #include <threads.h>
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+// TODO: read                                                                 //
+// https://developer.r-project.org/Blog/public/2019/04/18/common-protect-errors/
+// and
+// https://developer.r-project.org/Blog/public/2018/12/10/unprotecting-by-value/index.html
+// as well as do the following PROPERLLY
+// TODO: make specialized version using the parameters in given sym mat form  //
+//       similar to `ising_sample()`                                          //
+////////////////////////////////////////////////////////////////////////////////
+
+
+// void ising_m2_exact_mat(
+//     const size_t dim, const size_t ldA, const double* A,
+//     double* M2
+// ) {
+//     double sum_0 = 1.0;
+
+//     const uint32_t max_event = (uint32_t)1 << dim;
+
+//     (void)memset(M2, 0, dim * dim * sizeof(double));
+
+//     for (uint32_t X = 1; X < max_event; ++X) {
+//         // Evaluate quadratic form `X' A X` using symmetry of `A`
+//         double XtAX = 0.0;
+//         for (uint32_t Y = X; Y; Y &= Y - 1) {
+//             const int i = bitScanLS32(Y);
+//             XtAX += A[i * (ldA + 1)];
+//             for (uint32_t Z = Y & (Y - 1); Z; Z &= Z - 1) {
+//                 XtAX += 2 * A[i * ldA + bitScanLS32(Z)];
+//             }
+//         }
+
+//         const double prob_X = exp(XtAX);
+//         sum_0 += prob_X;
+
+//         for (uint32_t Y = X; Y; Y &= Y - 1) {
+//             const int i = bitScanLS32(Y);
+//             for (uint32_t Z = Y; Z; Z &= Z - 1) {
+//                 M2[i * dim + bitScanLS32(Z)] += prob_X;
+//             }
+//         }
+//     }
+
+//     const double prob_0 = 1.0 / sum_0;
+//     for (size_t j = 0; j < dim; ++j) {
+//         for (size_t i = 0; i < j; ++i) {
+//             M2[j * dim + i] = M2[i * dim + j];
+//         }
+//         for (size_t i = j; i < dim; ++i) {
+//             M2[i * dim + j] *= prob_0;
+//         }
+//     }
+// }
+
+double ising_m2_exact(const size_t dim, const double* params, double* M2) {
+    // Accumulator of sum of all (unscaled) probabilities
+    double sum_0 = 1.0;
+
+    // max event (actually upper bound)
+    const uint32_t max_event = (uint32_t)1 << dim;
+
+    // Length of parameters
+    const size_t len = dim * (dim + 1) / 2;
+
+    // Initialize M2 to zero
+    (void)memset(M2, 0, len * sizeof(double));
+
+    // Iterate all `2^dim` binary vectors `X`
+    for (uint32_t X = 1; X < max_event; ++X) {
+        // Dot product `<vech(X X'), params>`
+        double dot_X = 0.0;
+        // all bits in `X`
+        for (uint32_t Y = X; Y; Y &= Y - 1) {
+            const int i = bitScanLS32(Y);
+            const int I = (i * (2 * dim - 1 - i)) / 2;
+            // add single and two way interaction log odds
+            for (uint32_t Z = Y; Z; Z &= Z - 1) {
+                dot_X += params[I + bitScanLS32(Z)];
+            }
+        }
+
+        // (unscaled) probability of current event `X`
+        const double prob_X = exp(dot_X);
+        sum_0 += prob_X;
+
+        // Accumulate set bits probability for the first end second moment `E[X X']`
+        for (uint32_t Y = X; Y; Y &= Y - 1) {
+            const int i = bitScanLS32(Y);
+            const int I = (i * (2 * dim - 1 - i)) / 2;
+            for (uint32_t Z = Y; Z; Z &= Z - 1) {
+                M2[I + bitScanLS32(Z)] += prob_X;
+            }
+        }
+    }
+
+    // Finish by scaling with zero event probability (Ising p.m.f. scaling constant)
+    const double prob_0 = 1.0 / sum_0;
+    for (size_t i = 0; i < len; ++i) {
+        M2[i] *= prob_0;
+    }
+
+    return prob_0;
+}
+
+
+// Monte-Carlo method using Gibbs sampling for the second moment
+double ising_m2_MC(
+    /*   options */ const size_t nr_samples, const size_t warmup,
+    /* dimension */ const size_t dim,
+    /*   vectors */ const double* params, double* M2
+) {
+
+    // Length of `params` and `M2`
+    const size_t len = (dim * (dim + 1)) / 2;
+
+    // Dirty trick (reuse output memory for precise counting)
+    _Static_assert(sizeof(uint64_t) == sizeof(double), "Dirty trick fails!");
+    uint64_t* counts = (uint64_t*)M2;
+
+    // Initialize counts to zero
+    (void)memset(counts, 0, len * sizeof(uint64_t));
+
+    // Allocate memory to store Markov-Chain value
+    int* X = (int*)R_alloc(dim, sizeof(int));
+
+    // Create/Update R's internal PRGN state
+    GetRNGstate();
+
+    // Spawn chains
+    for (size_t sample = 0; sample < nr_samples; ++sample) {
+        // Draw random sample `X ~ Ising(params)`
+        ising_mcmc_vech(warmup, dim, params, X);
+
+        // Accumulate component counts
+        uint64_t* count = counts;
+        for (size_t i = 0; i < dim; ++i) {
+            for (size_t j = i; j < dim; ++j) {
+                *(count++) += X[i] & X[j];
+            }
+        }
+    }
+
+    // Write R's internal PRNG state back (if needed)
+    PutRNGstate();
+
+    // Compute means from counts
+    for (size_t i = 0; i < len; ++i) {
+        M2[i] = (double)counts[i] / (double)(nr_samples);
+    }
+
+    return -1.0;    // TODO: also compute prob_0
+}
+
+
+// in case the compile supports standard C threads
+#ifndef __STDC_NO_THREADS__
+
+// Worker thread to calling thread communication struct
+typedef struct thrd_data {
+    rng_seed_t seed;        // Pseudo-Random-Number-Generator seed/state value
+    size_t nr_samples;      // Nr. of samples this worker handles
+    size_t warmup;          // Monte-Carlo Chain burne-in length
+    size_t dim;             // Random variable dimension
+    const double* params;   // Ising model parameters
+    int* X;                 // Working memory to store current binary sample
+    uint32_t* counts;       // (output) count of single and two way interactions
+    double prob_0;          // (output) zero event probability estimate
+} thrd_data_t;
+
+// Worker thread function
+int thrd_worker(thrd_data_t* data) {
+    // Extract data as thread local variables (for convenience)
+    const size_t dim = data->dim;
+    int* X = data->X;
+
+    // Initialize counts to zero
+    (void)memset(data->counts, 0, (dim * (dim + 1) / 2) * sizeof(uint32_t));
+
+    // Spawn Monte-Carlo Chains (one foe every sample)
+    for (size_t sample = 0; sample < data->nr_samples; ++sample) {
+        // Draw random sample `X ~ Ising(params)`
+        ising_mcmc_vech_thrd(data->warmup, dim, data->params, X, data->seed);
+
+        // Accumulate component counts
+        uint32_t* count = data->counts;
+        for (size_t i = 0; i < dim; ++i) {
+            for (size_t j = i; j < dim; ++j) {
+                *(count++) += X[i] & X[j];
+            }
+        }
+    }
+
+    return 0;
+}
+
+double ising_m2_MC_thrd(
+    /*   options */ const size_t nr_samples, const size_t warmup,
+                    const size_t nr_threads,
+    /* dimension */ const size_t dim,
+    /*   vectors */ const double* params, double* M2
+) {
+    // Length of `params` and `M2`
+    const size_t len = (dim * (dim + 1)) / 2;
+
+    // Allocate working and self memory for worker threads
+    int* Xs = (int*)R_alloc(dim * nr_threads, sizeof(int));
+    uint32_t* counts = (uint32_t*)R_alloc(len * nr_threads, sizeof(uint32_t));
+    thrd_t* threads = (thrd_t*)R_alloc(nr_threads, sizeof(thrd_data_t));
+    thrd_data_t* threads_data = (thrd_data_t*)R_alloc(nr_threads, sizeof(thrd_data_t));
+
+    // Provide instruction wor worker and dispatch them
+    for (size_t tid = 0; tid < nr_threads; ++tid) {
+        // Every thread needs its own PRNG seed!
+        init_seed(threads_data[tid].seed);
+        // divide work among workers (more or less) equaly with the first worker
+        // (tid = 0) having the additional remainder of divided work (`nr_samples`)
+        threads_data[tid].nr_samples = nr_samples / nr_threads + (
+            tid ? 0 : nr_samples % nr_threads
+        );
+        threads_data[tid].warmup = warmup;
+        threads_data[tid].dim = dim;
+        threads_data[tid].params = params;
+        // Every worker gets its disjint working memory
+        threads_data[tid].X = &Xs[dim * tid];
+        threads_data[tid].counts = &counts[len * tid];
+        // dispatch the worker
+        thrd_create(&threads[tid], (thrd_start_t)thrd_worker, (void*)&threads_data[tid]);
+    }
+
+    // Join (Wait for) all worker
+    for (size_t tid = 0; tid < nr_threads; ++tid) {
+        thrd_join(threads[tid], NULL);
+    }
+
+    // Accumulate worker results into first (tid = 0) worker counts result
+    for (size_t tid = 1; tid < nr_threads; ++tid) {
+        for (size_t i = 0; i < len; ++i) {
+            counts[i] += counts[tid * len + i];
+        }
+    }
+
+    // convert discreat counts into means
+    for (size_t i = 0; i < len; ++i) {
+        M2[i] = (double)counts[i] / (double)nr_samples;
+    }
+
+    return -2.0;    // TODO: also compute prob_0
+}
+
+#endif /* !__STDC_NO_THREADS__ */
+
+
+
+
+/* Implementation of `.Call` entry point */
+////////////////////////////////////////////////////////////////////////////////
+// TODO: make specialized version using the parameters in given sym mat form  //
+//       similar to `ising_sample()`                                          //
+////////////////////////////////////////////////////////////////////////////////
+extern SEXP R_ising_m2(
+    SEXP _params, SEXP _use_MC, SEXP _nr_samples, SEXP _warmup, SEXP _nr_threads
+) {
+    // Extract and validate arguments
+    size_t protect_count = 0;
+    if (!Rf_isReal(_params)) {
+        _params = PROTECT(Rf_coerceVector(_params, REALSXP));
+        protect_count++;
+    }
+
+    // Depending on parameters given in symmetric matrix form or natural
+    // or natrual parameters in the half-vectorized memory layout
+    size_t dim;
+    double* params;
+    if (Rf_isMatrix(_params)) {
+        if (Rf_nrows(_params) != Rf_ncols(_params)) {
+            Rf_error("Invalid 'params' value, exected square matrix");
+        }
+        // Convert to natural parameters
+        dim = Rf_nrows(_params);
+        params = (double*)R_alloc(dim * (dim + 1) / 2, sizeof(double));
+
+        double* A = REAL(_params);
+        for (size_t j = 0, I = 0; j < dim; ++j) {
+            for (size_t i = j; i < dim; ++i, ++I) {
+                params[I] = (1 + (i != j)) * A[i + j * dim];
+            }
+        }
+    } else {
+        // Get Ising random variable dimension `dim` from "half-vectorized" parameters
+        // vector. That is, compute `dim` from `length(params) = dim (dim + 1) / 2`.
+        dim = invTriag(Rf_length(_params));
+        if (!dim) {
+            Rf_error("Expected parameter vector of length `p (p + 1) / 2` where"
+                " `p` is the dimension of the random variable");
+        }
+        params = REAL(_params);
+    }
+
+    // Determin the method to use and validate if possible
+    const int use_MC = Rf_asLogical(_use_MC);
+    if (use_MC == NA_LOGICAL) {
+        Rf_error("Invalid 'use_MC' value, expected ether TRUE or FALSE");
+    }
+
+    // Allocate result vector
+    SEXP _M2 = PROTECT(Rf_allocVector(REALSXP, dim * (dim + 1) / 2));
+    ++protect_count;
+
+    // asside computed zero event probability (inverse partition function), the
+    // scaling factor for the Ising model p.m.f.
+    double prob_0 = -1.0;
+
+    if (use_MC) {
+        // Convert and validate arguments for the Monte-Carlo methods
+        const size_t nr_samples = asUnsigned(_nr_samples);
+        if (nr_samples == 0 || nr_samples == NA_UNSIGNED) {
+            Rf_error("Invalid 'nr_samples' value, expected pos. integer");
+        }
+        const size_t warmup = asUnsigned(_warmup);
+        if (warmup == NA_UNSIGNED) {
+            Rf_error("Invalid 'warmup' value, expected non-negative integer");
+        }
+        const size_t nr_threads = asUnsigned(_nr_threads);
+        if (nr_threads == 0 || nr_threads > 256) {
+            Rf_error("Invalid 'nr_thread' value");
+        }
+
+        if (nr_threads == 1) {
+            // Single threaded Monte-Carlo method
+            prob_0 = ising_m2_MC(nr_samples, warmup, dim, params, REAL(_M2));
+        } else {
+            // Multi-Threaded Monte-Carlo method if provided, otherwise use
+            // the single threaded version with a warning
+#ifdef __STDC_NO_THREADS__
+                Rf_warning("Multi-Threading NOT supported, using fallback.");
+                prob_0 = ising_m2_MC(nr_samples, warmup, dim, params, REAL(_M2));
+#else
+                prob_0 = ising_m2_MC_thrd(
+                    nr_samples, warmup, nr_threads,
+                    dim, params, REAL(_M2)
+                );
+#endif
+        }
+    } else {
+        // Exact method (ignore other arguments), only validate dimension
+        if (25 < dim) {
+            Rf_error("Dimension '%d' too big for exact method (max 24)", dim);
+        }
+
+        // and call the exact method
+        prob_0 = ising_m2_exact(dim, params, REAL(_M2));
+    }
+
+    // Set log-lokelihood as an attribute to the computed second moment
+    SEXP _prob_0 = PROTECT(Rf_ScalarReal(prob_0));
+    ++protect_count;
+    Rf_setAttrib(_M2, Rf_install("prob_0"), _prob_0);
+
+    // release SEPXs to the garbage collector
+    UNPROTECT(protect_count);
+
+    return _M2;
+}
--- a/tensorPredictors/src/ising_sample.c
+++ b/tensorPredictors/src/ising_sample.c
@ -0,0 +1,72 @@
+#ifndef INCLUDE_GUARD_ISING_SAMPLE_H
+#define INCLUDE_GUARD_ISING_SAMPLE_H
+
+#include "R_api.h"
+#include "int_utils.h"
+#include "ising_MCMC.h"
+
+
+// .Call interface to draw from sample from the Ising model
+extern SEXP R_ising_sample(SEXP _nr_samples, SEXP _params, SEXP _warmup) {
+    // Counts number of protected SEXP's to give them back to the garbage collector
+    size_t protect_count = 0;
+
+    // Parse and validate arguments
+    const size_t nr_samples = asUnsigned(_nr_samples);
+    if (nr_samples == 0 || nr_samples == NA_UNSIGNED) {
+        Rf_error("Invalid 'nr_samples' value, expected pos. integer");
+    }
+    const size_t warmup = asUnsigned(_warmup);
+    if (warmup == NA_UNSIGNED) {
+        Rf_error("Invalid 'warmup' value, expected non-negative integer");
+    }
+
+    // Determin parameter mode (natural parameter vector or symmetric matrix)
+    // Ether `m` for "Matrix" or `v` for "Vector"
+    const char param_type = Rf_isMatrix(_params) ? 'm' : 'v';
+
+    // In case of matrix parameters check for square matrix
+    if (param_type == 'm' && (Rf_nrows(_params) != Rf_ncols(_params))) {
+        Rf_error("Invalid 'params' value, exected square matrix");
+    }
+
+    // Get problem dimension from parameter size
+    const size_t dim = (param_type == 'm')
+                     ? Rf_nrows(_params)
+                     : invTriag(Rf_length(_params));
+    if (!dim) {
+        Rf_error("Error determining dimension.");
+    }
+
+    // Ensure parameters are numeric
+    if (!Rf_isReal(_params)) {
+        _params = PROTECT(Rf_coerceVector(_params, REALSXP));
+        ++protect_count;
+    }
+    double* params = REAL(_params);
+
+    // Allocate result sample
+    SEXP _X = PROTECT(Rf_allocMatrix(INTSXP, dim, nr_samples));
+    ++protect_count;
+    int* X = INTEGER(_X);
+
+    // Call appropriate sampling routine for every sample to generate
+    GetRNGstate();
+    if (param_type == 'm') {
+        for (size_t sample = 0; sample < nr_samples; ++sample) {
+            ising_mcmc_mat(warmup, dim, dim, params, &X[sample * dim]);
+        }
+    } else {
+        for (size_t sample = 0; sample < nr_samples; ++sample) {
+            ising_mcmc_vech(warmup, dim, params, &X[sample * dim]);
+        }
+    }
+    PutRNGstate();
+
+    // Release protected SEXPs to the garbage collector
+    UNPROTECT(protect_count);
+
+    return _X;
+}
+
+#endif /* INCLUDE_GUARD_ISING_SAMPLE_H */
--- a/tensorPredictors/src/mcrossprod.c
+++ b/tensorPredictors/src/mcrossprod.c
@ -8,6 +8,51 @@
 #define FCONE
 #endif

+void mcrossprod(
+    const int rank,
+    const double* A, const int* dimA,
+    const double* B, const int* dimB,
+    const int mode,
+    double* C
+) {
+    // the strides
+    //  `stride[0] <- prod(dim(A)[seq_len(mode - 1)])`
+    //  `stride[1] <- dim(A)[mode]`
+    //  `stride[2] <- prod(dim(A)[-seq_len(mode)])`
+    // Note: Middle stride is ignored (to be consistent with sym version)
+    int stride[3] = {1, 0, 1};
+    for (int i = 0; i < rank; ++i) {
+        int size = dimA[i];
+        stride[0] *= (i < mode) ? size : 1;
+        stride[2] *= (i > mode) ? size : 1;
+    }
+
+    // employ BLAS dgemm (Double GEneralized Matrix Matrix) operation
+    // (C = alpha op(A) op(A) + beta C, op is the identity of transposition)
+    const double zero = 0.0;
+    const double one  = 1.0;
+    if (mode == 0) {
+        // mode 1: special case C = A_(1) B_(1)^T
+        // C = 1 A B^T + 0 C
+        F77_CALL(dgemm)("N", "T", &dimA[mode], &dimB[mode], &stride[2],
+            &one, A, &dimA[mode], B, &dimB[mode],
+            &zero, C, &dimA[mode] FCONE FCONE);
+    } else {
+        // Other modes writen as accumulated sum of matrix products
+        // initialize C to zero
+        memset(C, 0, dimA[mode] * dimB[mode] * sizeof(double));
+
+        // Sum over all modes > mode
+        for (int i2 = 0; i2 < stride[2]; ++i2) {
+            // C = 1 A^T B + 1 C
+            F77_CALL(dgemm)("T", "N", &dimA[mode], &dimB[mode], &stride[0],
+                &one, &A[i2 * stride[0] * dimA[mode]], &stride[0],
+                &B[i2 * stride[0] * dimB[mode]], &stride[0],
+                &one, C, &dimA[mode] FCONE FCONE);
+        }
+    }
+}
+
 /**
 * Tensor Mode Crossproduct
 *
@ -21,72 +66,58 @@
 * @param B multi-dimensional array
 * @param m mode index (1-indexed)
 */
-extern SEXP mcrossprod(SEXP A, SEXP B, SEXP m) {
+extern SEXP R_mcrossprod(SEXP A, SEXP B, SEXP m) {
    // get zero indexed mode
    int mode = asInteger(m) - 1;

-    // get dimension attributes
-    SEXP dimA = getAttrib(A, R_DimSymbol);
-    SEXP dimB = getAttrib(B, R_DimSymbol);
+    // Check if both `A` and `B` are real-valued
+    if (!isReal(A) || !isReal(B)) {
+        error("Type missmatch, both `A` and `B` must be real-valued");
+    }

+    // get dimension attributes
+    SEXP dimA_sexp = getAttrib(A, R_DimSymbol);
+    SEXP dimB_sexp = getAttrib(B, R_DimSymbol);
+
+    // dimension type validation
+    if (!isInteger(dimA_sexp) || !isInteger(dimB_sexp)) {
+        error("Dimensions must be integer vectors");
+    }
+
+    // validate dimensions
+    int rank = length(dimA_sexp);
+    if (rank != length(dimB_sexp)) {
+        error("Dimension mismatch");
+    }
    // validate mode (0-indexed, must be smaller than the tensor order)
-    if (mode < 0 || length(dimA) <= mode || length(dimB) <= mode) {
+    if (mode < 0 || rank <= mode) {
        error("Illegal mode");
    }

-    // the strides
-    //  `stride[0] <- prod(dim(A)[seq_len(mode - 1)])`
-    //  `stride[1] <- dim(A)[mode]`
-    //  `stride[2] <- prod(dim(A)[-seq_len(mode)])`
-    // Note: Middle stride is ignored (to be consistent with sym version)
-    int stride[3] = {1, 0, 1};
-    for (int i = 0; i < length(dimA); ++i) {
-        int size = INTEGER(dimA)[i];
-        stride[0] *= (i < mode) ? size : 1;
-        stride[2] *= (i > mode) ? size : 1;
-       // check margin matching of `A` and `B`
-        if (i != mode && size != INTEGER(dimB)[i]) {
-            error("Dimension missmatch");
-        }
+    // get raw pointers to dimensions
+    int* dimA = INTEGER(coerceVector(dimA_sexp, INTSXP));
+    int* dimB = INTEGER(coerceVector(dimB_sexp, INTSXP));
+
+    // finaly, check for `A` and `B` dimensions to match
+    for (int i = 0; i < rank; ++i) {
+        if (i != mode && dimA[i] != dimB[i]) {
+            error("Dimension mismatch");
+        } 
    }
-    // Result dimensions
-    int nrowC = INTEGER(dimA)[mode];
-    int ncolC = INTEGER(dimB)[mode];

    // create response matrix C
-    SEXP C = PROTECT(allocMatrix(REALSXP, nrowC, ncolC));
+    SEXP C = PROTECT(allocMatrix(REALSXP, dimA[mode], dimB[mode]));

-    // raw data access pointers
-    double* a = REAL(A);
-    double* b = REAL(B);
-    double* c = REAL(C);
+    // Call C mode crossprod subroutine
+    mcrossprod(
+        rank,           // tensor rank of both `A` and `B`
+        REAL(A), dimA,  // mem. addr. of A, dim(A)
+        REAL(B), dimB,  // mem. addr. of B, dim(B)
+        mode,           // the crossproduct mode to compute
+        REAL(C)         // return value memory addr.
+    );

-    // employ BLAS dgemm (Double GEneralized Matrix Matrix) operation
-    // (C = alpha op(A) op(A) + beta C, op is the identity of transposition)
-    const double zero = 0.0;
-    const double one  = 1.0;
-    if (mode == 0) {
-        // mode 1: special case C = A_(1) B_(1)^T
-        // C = 1 A B^T + 0 C
-        F77_CALL(dgemm)("N", "T", &nrowC, &ncolC, &stride[2],
-            &one, a, &nrowC, b, &ncolC,
-            &zero, c, &nrowC FCONE FCONE);
-    } else {
-        // Other modes writen as accumulated sum of matrix products
-        // initialize C to zero
-        memset(c, 0, nrowC * ncolC * sizeof(double));
-
-        // Sum over all modes > mode
-        for (int i2 = 0; i2 < stride[2]; ++i2) {
-            // C = 1 A^T B + 1 C
-            F77_CALL(dgemm)("T", "N", &nrowC, &ncolC, &stride[0],
-                &one, &a[i2 * stride[0] * nrowC], &stride[0],
-                &b[i2 * stride[0] * ncolC], &stride[0],
-                &one, c, &nrowC FCONE FCONE);
-        }
-    }
-
-    // release C to grabage collector
+    // release C to garbage collector
    UNPROTECT(1);

    return C;
@ -104,7 +135,7 @@ extern SEXP mcrossprod(SEXP A, SEXP B, SEXP m) {
 * @param A multi-dimensional array
 * @param m mode index (1-indexed)
 */
-extern SEXP mcrossprod_sym(SEXP A, SEXP m) {
+extern SEXP R_mcrossprod_sym(SEXP A, SEXP m) {
    // get zero indexed mode
    int mode = asInteger(m) - 1;

--- a/tensorPredictors/src/mlm.c
+++ b/tensorPredictors/src/mlm.c
@ -0,0 +1,160 @@
+#include "R_api.h"
+#include "ttm.h"
+
+int mlm(
+    /*  options */  const int* trans, const int* modes, const int nrhs,
+    /*     dims */  const int* dimA, const int* dimC, const int ord,
+    /*  scalars */  const double alpha,
+    /*   tensor */  const double* A,
+    /* matrices */  const double** Bs, const int* ldBs,
+    /*   scalar */  const double beta,
+    /*   tensor */  double* C,
+                    double* work_mem
+) {
+    // Compute total size of `A`, `C` and required working memory
+    int sizeA = 1;  // `prod(dim(A))`
+    int sizeC = 1;  // `prod(dim(C))`
+    int work_size = 2;  // `2 * prod(pmax(dim(A), dim(C)))` (`+ ord` NOT included)
+    for (int i = 0; i < ord; ++i) {
+        sizeA *= dimA[i];
+        sizeC *= dimC[i];
+        work_size *= dimA[i] < dimC[i] ? dimC[i] : dimA[i];
+    }
+
+    // In requesting working memory size stop here and return size
+    if (work_mem == NULL) {
+        return work_size + ord;
+    }
+
+    // Copy dimensions of A to intermediate temp. dim
+    int* dimT = (int*)(work_mem + work_size);   // `work_mem` is `ord` longer
+    memcpy(dimT, dimA, ord * sizeof(int));
+
+    // Setup work memory hooks (two swapable blocks)
+    double* tmp1 = work_mem;
+    double* tmp2 = work_mem + (work_size >> 1);
+
+    // Multi Linear Multiplication is an iterated application of TTM
+    for (int i = 0; i < nrhs; ++i) {
+        // Get current `B` dimensions (only implicitly given)
+        int nrowB = dimC[modes[i]];
+        int ncolB = dimA[modes[i]];
+        if (trans && trans[i]) {
+            nrowB = dimA[modes[i]];
+            ncolB = dimC[modes[i]];
+        }
+
+        // Tensor Times Matrix (`modes[i]` mode products)
+        ttm(trans ? trans[i] : 0, modes[i], dimT, ord, nrowB, ncolB,
+            1.0, i ? tmp2 : A, Bs[i], ldBs ? ldBs[i] : dimC[modes[i]], 0.0,
+            tmp1);
+
+        // Update intermediate temp dim
+        dimT[modes[i]] = dimC[modes[i]];
+
+        // Swap tmp1 <-> tmp2
+        double* tmp3 = tmp1; tmp1 = tmp2; tmp2 = tmp3;
+    }
+
+    // dAXPY (a x + y) with `x = tmp2` and `y = beta C`
+    if (beta != 0.0) {
+        for (int i = 0; i < sizeC; ++i) {
+            C[i] *= beta;
+        }
+    } else {
+        memset(C, 0, sizeC * sizeof(double));
+    }
+    axpy(sizeC, alpha, tmp2, 1, C, 1);
+
+    return 0;
+}
+
+/**
+ * Multi Linear Multiplication (`R` binding)
+ */
+extern SEXP R_mlm(SEXP A, SEXP Bs, SEXP ms, SEXP ops) {
+
+    // get dimension attribute of A
+    SEXP dimA = Rf_getAttrib(A, R_DimSymbol);
+    int ord = Rf_length(dimA);
+
+    // Check if `A` is a real valued tensor
+    if (!Rf_isReal(A) || Rf_isNull(dimA)) {
+        Rf_error("Param. `A` need to be a real valued array");
+    }
+
+    // Validate that `Bs` is a list
+    if (!Rf_isNewList(Bs)) {
+        Rf_error("Param. `Bs` need to be a list of matrices");
+    }
+    if (!Rf_isInteger(ms) || !Rf_isLogical(ops)) {
+        Rf_error("Param. type missmatch, expected modes as ints and ops logical");
+    } 
+
+    // Number of `B` matrices (Nr of Right Hand Side objects)
+    int nrhs = Rf_length(Bs);
+
+    // further parameter validations
+    if ((nrhs != Rf_length(ms)) || (nrhs != Rf_length(ops))) {
+        Rf_error("Dimension missmatch (Params length differ)");
+    }
+
+    // Get modes and operations for Bs (transposed or not)
+    int* modes = (int*)R_alloc(nrhs, sizeof(int)); // 0-indexed version of `ms`
+    int* trans = INTEGER(ops);
+
+    // Compute result dimensions `dim(C)` while extracting `B` data pointers
+    SEXP dimC = PROTECT(Rf_duplicate(dimA));
+    const double** bs = (const double**)R_alloc(nrhs, sizeof(double*));
+    for (int i = 0; i < nrhs; ++i) {
+        // Extract right hand side matrices (`B` matrices from list)
+        SEXP B = VECTOR_ELT(Bs, i);
+        if (!(Rf_isMatrix(B) && Rf_isReal(B))) {
+            UNPROTECT(1);
+            Rf_error("Param. `Bs` need to be a list of real matrices");
+        }
+        int* dimB = INTEGER(Rf_getAttrib(B, R_DimSymbol));
+        bs[i] = REAL(B);
+
+        // Convert 1-indexed modes to be 0-indexed
+        modes[i] = INTEGER(ms)[i] - 1;
+
+        // Check if mode is out or range (indexing a non-existing mode of `A`)
+        if (!((0 <= modes[i]) && (modes[i] < ord))) {
+            UNPROTECT(1);
+            Rf_error("%d'th mode (%d) out of range", i + 1, modes[i] + 1);
+        }
+
+        // Check if `i`th mode of `A` matches corresponding `B` dimension
+        if (INTEGER(dimA)[modes[i]] != dimB[!trans[i]]) {
+            UNPROTECT(1);
+            Rf_error("%d'th mode (%d) dimension missmatch", i + 1, modes[i] + 1);
+        }
+
+        INTEGER(dimC)[modes[i]] = dimB[!!trans[i]];
+    }
+
+    // Now, compute `C`s size `prod(dim(C))`
+    int sizeC = 1;
+    for (int i = 0; i < ord; ++i) {
+        sizeC *= INTEGER(dimC)[i];
+    }
+
+    // Allocate response `C`
+    SEXP C = PROTECT(Rf_allocVector(REALSXP, sizeC));
+    Rf_setAttrib(C, R_DimSymbol, dimC);
+
+    // allocate working memory size
+    int work_size = mlm(trans, modes, nrhs, INTEGER(dimA), INTEGER(dimC), ord,
+        1.0, REAL(A), bs, NULL, 0.0, REAL(C), NULL);
+    double* work_memory = (double*)R_alloc(work_size, sizeof(double));
+
+    // Compute Multi-Linear Multiplication (call subroutine)
+    (void)mlm(trans, modes, nrhs, INTEGER(dimA), INTEGER(dimC), ord,
+        1.0, REAL(A), bs, NULL, 0.0, REAL(C), work_memory);
+
+    // release C to the hands of the garbage collector
+    UNPROTECT(2);
+
+    return C;
+}
--- a/tensorPredictors/src/mlm.h
+++ b/tensorPredictors/src/mlm.h
@ -0,0 +1,35 @@
+#ifndef INCLUDE_GUARD_MLM_H
+#define INCLUDE_GUARD_MLM_H
+
+/**
+ * Multi Linear Multiplication
+ * 
+ *  C = alpha A x_modes[0] op(Bs[0]) ... x_modes[nrhs] op(Bs[nrhs]) + beta C
+ * 
+ * @param trans boolean vector of length `nrhs` indicating if `i`th RHS matrix
+ *  is to be transposed. That is `op(Bs[i])` is the transposed of `Bs[i]` iff
+ *  `trans[i]` is true, otherwise no-op on `Bs[i]`. Can be `NULL`, then `op` is
+ *  always the identity.
+ * @param modes integer vector of length `nrhs` specifying the product modes.
+ * 
+ * @todo TODO: continue doc. !!!
+ * 
+ * @param alpha scaling factor
+ * @param beta scaling factor
+ * @param C output memory addr.
+ * 
+ * @param work_mem NULL or temporary working memory of size
+ *  `2 * prod(pmax(dim(A), dim(C))) + ord`
+ */
+int mlm(
+    /*  options */  const int* trans, const int* modes, const int nrhs,
+    /*     dims */  const int* dimA, const int* dimC, const int ord,
+    /*  scalars */  const double alpha,
+    /*   tensor */  const double* A,
+    /* matrices */  const double** Bs, const int* ldBs,
+    /*   scalar */  const double beta,
+    /*   tensor */  double* C,
+                    double* work_mem
+);
+
+#endif /* INCLUDE_GUARD_MLM_H */
--- a/tensorPredictors/src/random.h
+++ b/tensorPredictors/src/random.h
@ -0,0 +1,37 @@
+// /**
+//  * A sufficient Pseudo-Random-Number-Generators (PRNG) of the Xorshift family
+//  *
+//  * For single threaded operations the PRNG provided by `R` are prefered. But they
+//  * are _not_ thread save. The following is a simple PRNG usable in a multi-threaded
+//  * application.
+//  *
+//  * See TODO: ...https://en.wikipedia.org/wiki/Xorshift
+//  *              SchachHoernchen
+//  */
+
+// #ifndef INCLUDE_GUARD_RANDOM_H
+// #define INCLUDE_GUARD_RANDOM_H
+
+// #include <stdint.h> // uint32_t, uint64_t
+
+
+// static inline uint64_t rot64(uint64_t val, int shift) {
+//     return (val << shift) | (val >> (64 - shift));
+// }
+
+// // PRNG of the Xorshift family
+// // @note the least significant 32 bits are not reliable, use most significant 32 bits
+// static inline uint64_t rand_u64(uint64_t seed[4]) {
+//     uint64_t e = seed[0] - rot64(seed[1], 7);
+//     seed[0] = seed[1] ^ rot64(seed[1], 13);
+//     seed[1] = seed[2] + rot64(seed[3], 37);
+//     seed[2] = seed[3] + e;
+//     seed[3] = e + seed[0];
+//     return seed[3];
+// }
+
+// static inline double unif_rand_u64(uint64_t seed[4]) {
+//     return ((double)(rand_u64(seed) >> 32)) / (double)(-(uint32_t)1);
+// }
+
+// #endif
--- a/tensorPredictors/src/svd.c
+++ b/tensorPredictors/src/svd.c
@ -0,0 +1,112 @@
+// The need for `USE_FC_LEN_T` and `FCONE` is due to a Fortran character string
+// to C incompatibility. See: Writing R Extentions: 6.6.1 Fortran character strings
+#define USE_FC_LEN_T
+#include <R.h>
+#include <Rinternals.h>
+#include <R_ext/BLAS.h>
+#include <R_ext/Lapack.h>
+#ifndef FCONE
+#define FCONE
+#endif
+
+// Singular Value Decomposition
+// @note assumes passed arguments to be "clean", "properly checked"
+int svd(
+    const int p, const int q,
+    double* A, const int ldA,
+    double* d,
+    double* U, const int ldU,
+    double* Vt, const int ldVt,
+    double* work_mem, int work_size, int* i_work    // if unknown set to 0, -1, 0
+) {
+    // LAPACK information return variable (in case of an error, LAPACK sets this
+    // to a non-zero value)
+    int info = 0;
+
+    // check if work memory is supplied, if _not_ query for appropriate size and
+    // allocate the memory
+    if (!work_mem || work_size < 1 || !i_work) {
+        // create (declare) work memory
+        i_work = (int*)R_alloc(8 * (p < q ? p : q), sizeof(int));
+        double tmp;         // variable to store work memory size query result
+        work_mem = &tmp;    // point to `tmp` as it will contain the result of
+                            // the work memory size query
+
+        // request appropriate work memory size
+        F77_CALL(dgesdd)(
+            "S", &p, &q, A, &ldA, d, U, &ldU, Vt, &ldVt,
+            work_mem, &work_size, i_work, &info
+        FCONE);
+
+        // allocate work memory
+        work_size = (int)tmp;   // "read" work memory size query result
+        work_mem = (double*)R_alloc(work_size, sizeof(double));
+
+        // in case of an error, return error code stored in `info`
+        if (info) { return info; }
+    }
+
+    // actual SVD computation
+    F77_CALL(dgesdd)(
+        "S", &p, &q, A, &ldA, d, U, &ldU, Vt, &ldVt,
+        work_mem, &work_size, i_work, &info
+    FCONE);
+
+    return info;
+}
+
+// Singular Valued Decomposition (R binding)
+// @note educational purpose, same as `La.svd`
+extern SEXP R_svd(SEXP A) {
+    // Check if we got a real-valued matrix
+    if (!isMatrix(A) || !isReal(A)) {
+        error("Require a real-valued matrix");
+    }
+
+    // extract matrix dimensions
+    int* dims = INTEGER(coerceVector(getAttrib(A, R_DimSymbol), INTSXP));
+    int p = dims[0];        // = nrow(A)
+    int q = dims[1];        // = ncol(A)
+    int m = p < q ? p : q;  // = min(p, q) = min(dim(A))
+
+    // Check if dimensions are not degenerate
+    if (p < 1 || q < 1) {
+        error("Expected positive matrix dimensions");
+    }
+
+    // create R objects to store the SVD result `A = U diag(d) V^T`
+    SEXP U = PROTECT(allocMatrix(REALSXP, p, m));
+    SEXP d = PROTECT(allocVector(REALSXP, m));
+    SEXP Vt = PROTECT(allocMatrix(REALSXP, m, q));
+
+    // Call C SVD routine
+    int info = svd(
+        p, q,           // nrow(A), ncol(A)
+        REAL(A), p,     // mem. addr. of A, ldA (leading dimension of A)
+        REAL(d),        // mem. addr. of d
+        REAL(U), p,     // mem. addr. of U, ldU (leading dimension of U)
+        REAL(Vt), m,    // mem. addr. of V^T, ldVt (leading dimension of V^T)
+        0, -1, 0        // work mem. pointer, work mem. size and int work mem
+    );                  //  set to `0, -1, 0` to indicate "unknown"
+
+    // Check LAPACK info
+    if (info) {
+        error("error code %d from LAPACK routine 'dgesdd'", info);
+    }
+
+    // Create R list containint SVD components
+    SEXP result = PROTECT(allocVector(VECSXP, 3));
+    SEXP names = PROTECT(allocVector(STRSXP, 3));
+    SET_VECTOR_ELT(result, 0, d);
+    SET_VECTOR_ELT(result, 1, U);
+    SET_VECTOR_ELT(result, 2, Vt);
+    SET_STRING_ELT(names, 0, mkChar("d"));
+    SET_STRING_ELT(names, 1, mkChar("u"));
+    SET_STRING_ELT(names, 2, mkChar("vt"));
+    setAttrib(result, R_NamesSymbol, names);
+
+    // Release created objects to the garbage collector
+    UNPROTECT(5);
+
+    return result;
+}
--- a/tensorPredictors/src/ttm.c
+++ b/tensorPredictors/src/ttm.c
@ -1,104 +1,48 @@
-// The need for `USE_FC_LEN_T` and `FCONE` is due to a Fortran character string
-// to C incompatibility. See: Writing R Extentions: 6.6.1 Fortran character strings
-#define USE_FC_LEN_T
-// Disables remapping of R API functions from `Rf_<name>` or `R_<name>`
-#define R_NO_REMAP
-#include <R.h>
-#include <Rinternals.h>
-#include <R_ext/BLAS.h>
-#ifndef FCONE
-#define FCONE
-#endif
+#include "ttm.h"

-/**
- * Tensor Times Matrix a.k.a. Mode Product
- *
- * @param A multi-dimensional array
- * @param B matrix
- * @param m mode index (1-indexed)
- * @param op boolean if `B` is transposed
- */
-extern SEXP ttm(SEXP A, SEXP B, SEXP m, SEXP op) {
+void ttm(
+    const int transB, const int mode,
+    const int* dimA, const int ordA, const int nrowB, const int ncolB,
+    const double alpha,
+    const double* A,
+    const double* B, const int ldB, // TODO: ldB is IGNORED!!!
+    const double beta,
+    double* C
+) {

-    // get zero indexed mode
-    const int mode = Rf_asInteger(m) - 1;
-
-    // get dimension attribute of A
-    SEXP dim = Rf_getAttrib(A, R_DimSymbol);
-
-    // operation on `B` (transposed or not)
-    const int trans = Rf_asLogical(op);
-
-    // as well as `B`s dimensions
-    const int nrow = Rf_nrows(B);
-    const int ncol = Rf_ncols(B);
-
-    // validate mode (mode must be smaller than the nr of dimensions)
-    if (mode < 0 || Rf_length(dim) <= mode) {
-        Rf_error("Illegal mode");
-    }
-
-    // and check if B is a matrix of non degenetate size
-    if (!Rf_isMatrix(B)) {
-        Rf_error("Expected a matrix as second argument");
-    }
-    if (!Rf_nrows(B) || !Rf_ncols(B)) {
-        Rf_error("Zero dimension detected");
-    }
-
-    // check matching of dimensions
-    if (INTEGER(dim)[mode] != (trans ? nrow : ncol)) {
-        Rf_error("Dimension missmatch");
-    }
-
-    // calc nr of response elements `prod(dim(A)[-mode]) * ncol(A)` (size of C),
-    int sizeC = 1;
-    // and the strides
+    // Strides are the "leading" and "trailing" dimensions of the matricized
+    // tensor `A` in the following matrix-matrix multiplications
    //  `stride[0] <- prod(dim(A)[seq_len(mode - 1)])`
    //  `stride[1] <- dim(A)[mode]`
    //  `stride[2] <- prod(dim(A)[-seq_len(mode)])`
-    int stride[3] = {1, INTEGER(dim)[mode], 1};
-    for (int i = 0; i < Rf_length(dim); ++i) {
-        int size = INTEGER(dim)[i];
-        // check for non-degenetate dimensions
-        if (!size) {
-            Rf_error("Zero dimension detected");
-        }
-        sizeC *= (i == mode) ? (trans ? ncol : nrow) : size;
-        stride[0] *= (i < mode) ? size : 1;
-        stride[2] *= (i > mode) ? size : 1;
+    int stride[3] = {1, dimA[mode], 1};
+    for (int i = 0; i < ordA; ++i) {
+        stride[0] *= (i < mode) ? dimA[i] : 1;
+        stride[2] *= (i > mode) ? dimA[i] : 1;
    }

-    // create response object C
-    SEXP C = PROTECT(Rf_allocVector(REALSXP, sizeC));
-
-    // raw data access pointers
-    double* a = REAL(A);
-    double* b = REAL(B);
-    double* c = REAL(C);
-
-    // Tensor Times Matrix / Mode Product
-    const double zero = 0.0;
-    const double one  = 1.0;
    if (mode == 0) {
-        // mode 1: (A x_1 op(B))_(1) = op(B) A_(1) as a single Matrix-Matrix
-        // multiplication
-        F77_CALL(dgemm)(trans ? "T" : "N", "N",
-            (trans ? &ncol : &nrow), &stride[2], &stride[1], &one,
-            b, &nrow, a, &stride[1],
-            &zero, c, (trans ? &ncol : &nrow)
+        // mode 1: C = alpha (A x_1 op(B))_(1) + beta C
+        //           = alpha op(B) A_(1) + beta C
+        // as a single Matrix-Matrix multiplication
+        F77_CALL(dgemm)(transB ? "T" : "N", "N",
+            (transB ? &ncolB : &nrowB), &stride[2], &stride[1], &alpha,
+            B, &nrowB, A, &stride[1],
+            &beta, C, (transB ? &ncolB : &nrowB)
            FCONE FCONE);
    } else {
        // Other modes can be written as blocks of matrix multiplications
-        // (A x_m op(B))_(m)' = A_(m)' op(B)'
+        // C_:,:,i2 = alpha (A x_m op(B))_(m)' + beta C_:,:,i2
+        //          = alpha A_(m)' op(B)' + beta C_:,:,i2
        for (int i2 = 0; i2 < stride[2]; ++i2) {
-            F77_CALL(dgemm)("N", trans ? "N" : "T",
-                &stride[0], (trans ? &ncol : &nrow), &stride[1], &one,
-                &a[i2 * stride[0] * stride[1]], &stride[0], b, &nrow,
-                &zero, &c[i2 * stride[0] * (trans ? ncol : nrow)], &stride[0]
+            F77_CALL(dgemm)("N", transB ? "N" : "T",
+                &stride[0], (transB ? &ncolB : &nrowB), &stride[1], &alpha,
+                &A[i2 * stride[0] * stride[1]], &stride[0], B, &nrowB,
+                &beta, &C[i2 * stride[0] * (transB ? ncolB : nrowB)], &stride[0]
                FCONE FCONE);
        }
    }
+
    /*
    // (reference implementation)
    // Tensor Times Matrix / Mode Product for `op(B) == B`
@ -114,13 +58,76 @@ extern SEXP ttm(SEXP A, SEXP B, SEXP m, SEXP op) {
        }
    }
    */
+}
+
+
+/**
+ * Tensor Times Matrix a.k.a. Mode Product
+ *
+ * @param A multi-dimensional array
+ * @param B matrix
+ * @param m mode index (1-indexed)
+ * @param op boolean if `B` is transposed
+ */
+extern SEXP R_ttm(SEXP A, SEXP B, SEXP m, SEXP op) {
+
+    // get zero indexed mode
+    const int mode = Rf_asInteger(m) - 1;
+
+    // get dimension attribute of A
+    SEXP dimA = Rf_getAttrib(A, R_DimSymbol);
+
+    // operation on `B` (transposed or not)
+    const int transB = Rf_asLogical(op);
+
+    // as well as `B`s dimensions
+    const int nrowB = Rf_nrows(B);
+    const int ncolB = Rf_ncols(B);
+
+    // validate mode (mode must be smaller than the nr of dimensions)
+    if (mode < 0 || Rf_length(dimA) <= mode) {
+        Rf_error("Illegal mode");
+    }
+
+    // and check if B is a matrix of non degenetate size
+    if (!Rf_isMatrix(B)) {
+        Rf_error("Expected a matrix as second argument");
+    }
+    if (!Rf_nrows(B) || !Rf_ncols(B)) {
+        Rf_error("Zero dimension detected");
+    }
+
+    // check matching of dimensions
+    if (INTEGER(dimA)[mode] != (transB ? nrowB : ncolB)) {
+        Rf_error("Dimension missmatch");
+    }
+
+    // calc nr of response elements (size of C)
+    // `prod(dim(C)) = prod(dim(A)[-mode]) * nrow(if(transB) t(B) else B)`
+    int sizeC = 1;
+    for (int i = 0; i < Rf_length(dimA); ++i) {
+        int size = INTEGER(dimA)[i];
+        // check for non-degenetate dimensions
+        if (!size) {
+            Rf_error("Zero dimension detected");
+        }
+        sizeC *= (i == mode) ? (transB ? ncolB : nrowB) : size;
+    }
+
+    // create response object C
+    SEXP C = PROTECT(Rf_allocVector(REALSXP, sizeC));
+
+    // Tensor Times Matrix / Mode Product
+    ttm(transB, mode,
+        INTEGER(dimA), Rf_length(dimA), nrowB, ncolB,
+        1.0, REAL(A), REAL(B), nrowB, 0.0, REAL(C));

    // finally, set result dimensions
-    SEXP newdim = PROTECT(Rf_allocVector(INTSXP, Rf_length(dim)));
-    for (int i = 0; i < Rf_length(dim); ++i) {
-        INTEGER(newdim)[i] = (i == mode) ? (trans ? ncol : nrow) : INTEGER(dim)[i];
+    SEXP dimC = PROTECT(Rf_allocVector(INTSXP, Rf_length(dimA)));
+    for (int i = 0; i < Rf_length(dimA); ++i) {
+        INTEGER(dimC)[i] = (i == mode) ? (transB ? ncolB : nrowB) : INTEGER(dimA)[i];
    }
-    Rf_setAttrib(C, R_DimSymbol, newdim);
+    Rf_setAttrib(C, R_DimSymbol, dimC);

    // release C to the hands of the garbage collector
    UNPROTECT(2);
--- a/tensorPredictors/src/ttm.h
+++ b/tensorPredictors/src/ttm.h
@ -0,0 +1,21 @@
+#ifndef INCLUDE_GUARD_TTM_H
+#define INCLUDE_GUARD_TTM_H
+
+#include "R_api.h"
+
+/**
+ * Tensor Times Matrix Subroutine
+ * 
+ * @attention Assumes all parameters to be correct!
+ */
+void ttm(
+    /* options */   const int transB, const int mode,
+    /*    dims */   const int* dimA, const int ordA, const int nrowB, const int ncolB,
+    /*  scalar */   const double alpha,
+    /*  tensor */   const double* A,
+    /*  matrix */   const double* B, const int ldB,
+    /*  scalar */   const double beta,
+    /*  tensor */   double* C
+);
+
+#endif /* INCLUDE_GUARD_TTM_H */