Compare commits

...

3 Commits

Author SHA1 Message Date
Daniel Kapla 399f878fbb fix: stability of gmlm_ising and added slicing,
add: higher-order approx.kron,
add: paper.tex - simulation plots and tensor normal MLE estimation
2023-11-21 12:21:43 +01:00
Daniel Kapla 6477d78190 wip: paper.tex 2023-11-16 19:17:35 +01:00
Daniel Kapla 90cd46e209 dev 2023-11-14 14:35:43 +01:00
51 changed files with 4711 additions and 1356 deletions

View File

@ -90,10 +90,11 @@
%%% Custom operators with ether one or two arguments (limits) %%% Custom operators with ether one or two arguments (limits)
\makeatletter \makeatletter
%%% Multi-Linear Multiplication %%% Multi-Linear Multiplication
% $\mlm_{k \in [r]}$ or $\mlm_{k = 1}^{r}$ (lower limit MUST be the first!)
% Save first argument as \arg@one % Save first argument as \arg@one
\def\mlm#1{\def\arg@one{#1}\futurelet\next\mlm@i} \def\mlm_#1{\def\arg@one{#1}\futurelet\next\mlm@i}
% Check for second argument % Check for second argument
\def\mlm@i{\ifx\next\bgroup\expandafter\mlm@two\else\expandafter\mlm@one\fi} \def\mlm@i{\ifx\next^\expandafter\mlm@two\else\expandafter\mlm@one\fi}
% specialization for one or two arguments, both versions use saved first argument % specialization for one or two arguments, both versions use saved first argument
\def\mlm@one{\mathchoice% \def\mlm@one{\mathchoice%
{\operatorname*{\scalerel*{\times}{\bigotimes}}_{\makebox[0pt][c]{$\scriptstyle \arg@one$}}}% {\operatorname*{\scalerel*{\times}{\bigotimes}}_{\makebox[0pt][c]{$\scriptstyle \arg@one$}}}%
@ -101,14 +102,31 @@
{\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}}% {\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}}%
{\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}}% {\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}}%
} }
% this commands single argument is the second argument of \mlm % this commands single argument is the second argument of \mlm, it gobbles the `^`
\def\mlm@two#1{\mathchoice% \def\mlm@two^#1{\mathchoice%
{\operatorname*{\scalerel*{\times}{\bigotimes}}_{\makebox[0pt][c]{$\scriptstyle \arg@one$}}^{\makebox[0pt][c]{$\scriptstyle #1$}}}% {\operatorname*{\scalerel*{\times}{\bigotimes}}_{\makebox[0pt][c]{$\scriptstyle \arg@one$}}^{\makebox[0pt][c]{$\scriptstyle #1$}}}%
{\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}^{#1}}% {\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}^{#1}}%
{\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}^{#1}}% {\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}^{#1}}%
{\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}^{#1}}% {\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}^{#1}}%
} }
%%% Big Circle (Iterated Outer Product)
\def\outer#1{\def\arg@one{#1}\futurelet\next\outer@i}
\def\outer@i{\ifx\next\bgroup\expandafter\outer@two\else\expandafter\outer@one\fi}
\def\outer@one{\mathchoice%
{\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\makebox[0pt][c]{$\scriptstyle \arg@one$}}}%
{\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}}%
{\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}}%
{\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}}%
}
\def\outer@two#1{\mathchoice%
{\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\makebox[0pt][c]{$\scriptstyle \arg@one$}}^{\makebox[0pt][c]{$\scriptstyle #1$}}}%
{\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}^{#1}}%
{\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}^{#1}}%
{\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}^{#1}}%
}
%%% Big Kronecker Product (with overflowing limits) %%% Big Kronecker Product (with overflowing limits)
% Save first argument as \arg@one % Save first argument as \arg@one
\def\bigkron#1{\def\arg@one{#1}\futurelet\next\bigkron@i} \def\bigkron#1{\def\arg@one{#1}\futurelet\next\bigkron@i}
@ -134,17 +152,15 @@
\newcommand{\algorithmicbreak}{\textbf{break}} \newcommand{\algorithmicbreak}{\textbf{break}}
\newcommand{\Break}{\State \algorithmicbreak} \newcommand{\Break}{\State \algorithmicbreak}
\begin{document} \begin{document}
\maketitle \maketitle
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{abstract} \begin{abstract}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
We consider regression and classification for \textit{general} response and tensor-valued predictors (multi dimensional arrays) and propose a \textit{novel formulation} for sufficient dimension reduction. Assuming the distribution of the tensor-valued predictors given the response is in the quadratic exponential family, we model the natural parameter as a multi-linear function of the respons. We consider regression and classification for \textit{general} response and tensor-valued predictors (multi dimensional arrays) and propose a \textit{novel formulation} for sufficient dimension reduction. Assuming the distribution of the tensor-valued predictors given the response is in the quadratic exponential family, we model the natural parameter as a multi-linear function of the response.
This allows per-axis reductions that drastically reduce the total number of parameters for higher order tensor-valued predictors. We derive maximum likelihood estimates for the sufficient dimension reduction and a computationally efficient estimation algorithm which leveraes the tensor structure. The performance of the method is illustrated via simulations and real world examples are provided. This allows per-axis reductions that drastically reduce the total number of parameters for higher order tensor-valued predictors. We derive maximum likelihood estimates for the sufficient dimension reduction and a computationally efficient estimation algorithm which leverages the tensor structure. The performance of the method is illustrated via simulations and real world examples are provided.
\end{abstract} \end{abstract}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@ -256,23 +272,23 @@ A straight forward idea for parameter estimation is to use Gradient Descent. For
\begin{algorithm}[ht] \begin{algorithm}[ht]
\caption{\label{alg:NAGD}Nesterov Accelerated Gradient Descent} \caption{\label{alg:NAGD}Nesterov Accelerated Gradient Descent}
\begin{algorithmic}[1] \begin{algorithmic}[1]
\State Objective: $l(\Theta \mid \ten{X}, \ten{F}_y)$ \State Objective: $l(\mat{\theta} \mid \ten{X}, \ten{F}_y)$
\State Arguments: Order $r + 1$ tensors $\ten{X}$, $\ten{F}$ \State Arguments: Order $r + 1$ tensors $\ten{X}$, $\ten{F}$
\State Initialize: Parameters $\Theta^{(0)}$, $0 < c, \delta^{(1)}$ and $0 < \gamma < 1$ \State Initialize: Parameters $\mat{\theta}^{(0)}$, $0 < c, \delta^{(1)}$ and $0 < \gamma < 1$
\\ \\
\State $t \leftarrow 1$ \State $t \leftarrow 1$
\Comment{step counter} \Comment{step counter}
\State $\mat{\Theta}^{(1)} \leftarrow \mat{\Theta}^{(0)}$ \State $\mat{\theta}^{(1)} \leftarrow \mat{\theta}^{(0)}$
\Comment{artificial first step} \Comment{artificial first step}
\State $(m^{(0)}, m^{(1)}) \leftarrow (0, 1)$ \State $(m^{(0)}, m^{(1)}) \leftarrow (0, 1)$
\Comment{momentum extrapolation weights} \Comment{momentum extrapolation weights}
\\ \\
\Repeat \Comment{repeat untill convergence} \Repeat \Comment{repeat untill convergence}
\State $\mat{M} \leftarrow \mat{\Theta}^{(t)} + \frac{m^{(t - 1)} - 1}{m^{(t)}}(\mat{\Theta}^{(t)} - \mat{\Theta}^{(t - 1)})$ \Comment{momentum extrapolation} \State $\mat{M} \leftarrow \mat{\theta}^{(t)} + \frac{m^{(t - 1)} - 1}{m^{(t)}}(\mat{\theta}^{(t)} - \mat{\theta}^{(t - 1)})$ \Comment{momentum extrapolation}
\For{$\delta = \gamma^{-1}\delta^{(t)}, \delta^{(t)}, \gamma\delta^{(t)}, \gamma^2\delta^{(t)}, ...$} \Comment{Line Search} \For{$\delta = \gamma^{-1}\delta^{(t)}, \delta^{(t)}, \gamma\delta^{(t)}, \gamma^2\delta^{(t)}, ...$} \Comment{Line Search}
\State $\mat{\Theta}_{\text{temp}} \leftarrow \mat{M} + \delta \nabla_{\mat{\Theta}} l(\mat{M})$ \State $\mat{\theta}_{\text{temp}} \leftarrow \mat{M} + \delta \nabla_{\mat{\theta}} l(\mat{M})$
\If{$l(\mat{\Theta}_{\text{temp}}) \leq l(\mat{\Theta}^{(t - 1)}) - c \delta \|\nabla_{\mat{\Theta}} l(\mat{M})\|_F^2$} \Comment{Armijo Condition} \If{$l(\mat{\theta}_{\text{temp}}) \leq l(\mat{\theta}^{(t - 1)}) - c \delta \|\nabla_{\mat{\theta}} l(\mat{M})\|_F^2$} \Comment{Armijo Condition}
\State $\mat{\Theta}^{(t + 1)} \leftarrow \mat{\Theta}_{\text{temp}}$ \State $\mat{\theta}^{(t + 1)} \leftarrow \mat{\theta}_{\text{temp}}$
\State $\delta^{(t + 1)} \leftarrow \delta$ \State $\delta^{(t + 1)} \leftarrow \delta$
\Break \Break
\EndIf \EndIf
@ -399,7 +415,7 @@ $\ten{X}$ is a $2\times 3\times 5$ tensor, $y\in\{1, 2, ..., 6\}$ uniformly dist
\begin{figure}[!ht] \begin{figure}[!ht]
\centering \centering
\includegraphics[width = \textwidth]{sim-normal-20221012.png} \includegraphics[width = \textwidth]{images/sim-normal-20221012.png}
\caption{\label{fig:sim-normal}Simulation Normal} \caption{\label{fig:sim-normal}Simulation Normal}
\end{figure} \end{figure}
@ -407,7 +423,7 @@ $\ten{X}$ is a $2\times 3\times 5$ tensor, $y\in\{1, 2, ..., 6\}$ uniformly dist
\begin{figure}[!ht] \begin{figure}[!ht]
\centering \centering
\includegraphics[width = \textwidth]{sim-ising-small-20221012.png} \includegraphics[width = \textwidth]{images/sim-ising-small-20221012.png}
\caption{\label{fig:sim-ising-small}Simulation Ising Small} \caption{\label{fig:sim-ising-small}Simulation Ising Small}
\end{figure} \end{figure}
@ -433,7 +449,7 @@ where each individual block is given by
For example $\mathcal{J}_{1,2} = -\frac{\partial l(\Theta)}{\partial\t{(\vec{\overline{\ten{\eta}}_1})}\partial(\vec{\mat{\alpha}_1})}$ and $\mathcal{J}_{2r + 1, 2r + 1} = -\H l(\mat{\Omega}_r)$. For example $\mathcal{J}_{1,2} = -\frac{\partial l(\Theta)}{\partial\t{(\vec{\overline{\ten{\eta}}_1})}\partial(\vec{\mat{\alpha}_1})}$ and $\mathcal{J}_{2r + 1, 2r + 1} = -\H l(\mat{\Omega}_r)$.
We start by restating the log-likelihood for a given single observation $(\ten{X}, \ten{Y})$ where $\ten{F}_y$ given by We start by restating the log-likelihood for a given single observation $(\ten{X}, \ten{Y})$ where $\ten{F}_y$ given by
\begin{displaymath} \begin{displaymath}
l(\mat{\Theta}) = \log h(\ten{X}) + c_1\big\langle\overline{\ten{\eta}}_1 + \ten{F}_{y}\mlm{k\in[r]}\mat{\alpha}_k, \ten{X}\big\rangle + c_2\big\langle\ten{X}\mlm{k\in[r]}\mat{\Omega}_k, \ten{X}\big\rangle - b(\mat{\eta}_{y}) l(\mat{\Theta}) = \log h(\ten{X}) + c_1\big\langle\overline{\ten{\eta}}_1 + \ten{F}_{y}\mlm_{k\in[r]}\mat{\alpha}_k, \ten{X}\big\rangle + c_2\big\langle\ten{X}\mlm_{k\in[r]}\mat{\Omega}_k, \ten{X}\big\rangle - b(\mat{\eta}_{y})
\end{displaymath} \end{displaymath}
with with
\begin{align*} \begin{align*}
@ -496,10 +512,10 @@ Now we rewrite all the above differentials to extract the derivatives one at a t
% %
\d l(\mat{\Omega}_j) &= c_2\Big(\langle\ten{X}\times_{k\in[r]\backslash j}\mat{\Omega}_k\times_j\d\mat{\Omega}_j, \ten{X}\rangle - \D b(\mat{\eta}_{y,2})\vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big)\Big) \\ \d l(\mat{\Omega}_j) &= c_2\Big(\langle\ten{X}\times_{k\in[r]\backslash j}\mat{\Omega}_k\times_j\d\mat{\Omega}_j, \ten{X}\rangle - \D b(\mat{\eta}_{y,2})\vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big)\Big) \\
&= c_2 \t{(\vec{\ten{X}}\otimes\vec{\ten{X}} - (\ten{D}_2)_{([2r])})}\vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big) \\ &= c_2 \t{(\vec{\ten{X}}\otimes\vec{\ten{X}} - (\ten{D}_2)_{([2r])})}\vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big) \\
&= c_2 (\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})} \\ &= c_2 (\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})} \\
&= c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\vec{\d\mat{\Omega}_j} \\ &= c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\vec{\d\mat{\Omega}_j} \\
&= c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\mat{D}_{p_j}\t{\mat{D}_{p_j}}\vec{\d\mat{\Omega}_j} \\ &= c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\mat{D}_{p_j}\t{\mat{D}_{p_j}}\vec{\d\mat{\Omega}_j} \\
&\qquad\Rightarrow \D l(\mat{\Omega}_j) = c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\mat{D}_{p_j}\t{\mat{D}_{p_j}} &\qquad\Rightarrow \D l(\mat{\Omega}_j) = c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\mat{D}_{p_j}\t{\mat{D}_{p_j}}
\end{align*}}% \end{align*}}%
The next step is to identify the Hessians from the second differentials in a similar manner as befor. The next step is to identify the Hessians from the second differentials in a similar manner as befor.
{\allowdisplaybreaks\begin{align*} {\allowdisplaybreaks\begin{align*}
@ -509,62 +525,62 @@ The next step is to identify the Hessians from the second differentials in a sim
\qquad{\color{gray} (p \times p)} \qquad{\color{gray} (p \times p)}
\\ \\
&\d^2 l(\overline{\ten{\eta}}_1, \mat{\alpha}_j) \\ &\d^2 l(\overline{\ten{\eta}}_1, \mat{\alpha}_j) \\
&= -c_1^2 \t{\vec(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j)}\mat{H}_{1,1}\vec{\d\overline{\ten{\eta}}_1} \\ &= -c_1^2 \t{\vec(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j)}\mat{H}_{1,1}\vec{\d\overline{\ten{\eta}}_1} \\
&= -c_1^2 \t{\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)})}\mat{K}_{p,(j)}\mat{H}_{1,1}\vec{\d\overline{\ten{\eta}}_1} \\ &= -c_1^2 \t{\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)})}\mat{K}_{p,(j)}\mat{H}_{1,1}\vec{\d\overline{\ten{\eta}}_1} \\
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})}((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})(\ten{H}_{1,1})_{((j, [r]\backslash j))}\vec{\d\overline{\ten{\eta}}_1} \\ &= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})}((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})(\ten{H}_{1,1})_{((j, [r]\backslash j))}\vec{\d\overline{\ten{\eta}}_1} \\
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} ( (\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k) \ttt_{[r]\backslash j} \ten{H}_{1,1})_{((2, 1))} \vec{\d\overline{\ten{\eta}}_1} \\ &= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} ( (\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k) \ttt_{[r]\backslash j} \ten{H}_{1,1})_{((2, 1))} \vec{\d\overline{\ten{\eta}}_1} \\
&\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\alpha}_j})\t{\partial(\vec{\overline{\ten{\eta}}_1)}}} = -c_1^2 ( (\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k) \ttt_{[r]\backslash j} \ten{H}_{1,1})_{((2, 1))} &\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\alpha}_j})\t{\partial(\vec{\overline{\ten{\eta}}_1)}}} = -c_1^2 ( (\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k) \ttt_{[r]\backslash j} \ten{H}_{1,1})_{((2, 1))}
\qquad{\color{gray} (p_j q_j \times p)} \qquad{\color{gray} (p_j q_j \times p)}
\\ \\
&\d^2 l(\overline{\ten{\eta}}_1, \mat{\Omega}_j) \\ &\d^2 l(\overline{\ten{\eta}}_1, \mat{\Omega}_j) \\
&= -c_1 c_2 \t{\vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big)}\mat{H}_{2,1}\vec{\d\overline{\ten{\eta}}_1} \\ &= -c_1 c_2 \t{\vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big)}\mat{H}_{2,1}\vec{\d\overline{\ten{\eta}}_1} \\
&= -c_1 c_2 \t{\Big[ \t{(\ten{H}_{2,1})_{([2r])}} \vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big) \Big]} \vec{\d\overline{\ten{\eta}}_1} \\ &= -c_1 c_2 \t{\Big[ \t{(\ten{H}_{2,1})_{([2r])}} \vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big) \Big]} \vec{\d\overline{\ten{\eta}}_1} \\
&= -c_1 c_2 \t{\vec( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})} )} \vec{\d\overline{\ten{\eta}}_1} \\ &= -c_1 c_2 \t{\vec( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})} )} \vec{\d\overline{\ten{\eta}}_1} \\
&= -c_1 c_2 \t{(\vec{\d\mat{\Omega}_j})} ( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} )_{(j)} \vec{\d\overline{\ten{\eta}}_1} \\ &= -c_1 c_2 \t{(\vec{\d\mat{\Omega}_j})} ( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} )_{(j)} \vec{\d\overline{\ten{\eta}}_1} \\
&\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\Omega}_j})\t{\partial(\vec{\overline{\ten{\eta}}_1)}}} = -c_1 c_2 \mat{D}_{p_j}\t{\mat{D}_{p_j}}( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} )_{(j)} &\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\Omega}_j})\t{\partial(\vec{\overline{\ten{\eta}}_1)}}} = -c_1 c_2 \mat{D}_{p_j}\t{\mat{D}_{p_j}}( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} )_{(j)}
\qquad{\color{gray} (p_j^2 \times p)} \qquad{\color{gray} (p_j^2 \times p)}
\\ \\
&\d^2 l(\mat{\alpha}_j) \\ &\d^2 l(\mat{\alpha}_j) \\
&= -c_1^2 \t{\vec(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j)}\mat{H}_{1,1}\vec(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j) \\ &= -c_1^2 \t{\vec(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j)}\mat{H}_{1,1}\vec(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j) \\
&= -c_1^2 \t{\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)})}\mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(j)}}\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}) \\ &= -c_1^2 \t{\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)})}\mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(j)}}\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}) \\
&= -c_1^2 \t{[((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j}]}\mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(j)}}((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j} \\ &= -c_1^2 \t{[((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j}]}\mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(j)}}((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j} \\
&= -c_1^2 \t{[((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j}]}(\ten{H}_{1,1})_{((j,[r]\backslash j),(j,[r]\backslash j))}((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j} \\ &= -c_1^2 \t{[((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j}]}(\ten{H}_{1,1})_{((j,[r]\backslash j),(j,[r]\backslash j))}((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j} \\
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})}[ ((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)\ttt_{[r]\backslash j}\ten{H}_{1,1})\ttt_{[r]\backslash j + 2,[r]\backslash j}(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k) ]_{((2,1))}\vec{\d\mat{\alpha}_j} \\ &= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})}[ ((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)\ttt_{[r]\backslash j}\ten{H}_{1,1})\ttt_{[r]\backslash j + 2,[r]\backslash j}(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k) ]_{((2,1))}\vec{\d\mat{\alpha}_j} \\
&\qquad\Rightarrow \H l(\mat{\alpha}_j) = -c_1^2 \Big[ \left(\Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big)\ttt_{[r]\backslash j}\ten{H}_{1,1}\right)\ttt_{[r]\backslash j + 2}^{[r]\backslash j}\Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \Big]_{((2,1))} &\qquad\Rightarrow \H l(\mat{\alpha}_j) = -c_1^2 \Big[ \left(\Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big)\ttt_{[r]\backslash j}\ten{H}_{1,1}\right)\ttt_{[r]\backslash j + 2}^{[r]\backslash j}\Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \Big]_{((2,1))}
\qquad{\color{gray} (p_j q_j \times p_j q_j)} \qquad{\color{gray} (p_j q_j \times p_j q_j)}
\\ \\
&\d^2 l(\mat{\alpha}_j, \mat{\alpha}_l) \\ &\d^2 l(\mat{\alpha}_j, \mat{\alpha}_l) \\
&\overset{\makebox[0pt]{\scriptsize $j < l$}}{=} -c_1^2 \t{\vec\Bigl(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\Bigr)}\mat{H}_{1,1}\vec\Bigl(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\times_l\d\mat{\alpha}_l\Bigr) \\ &\overset{\makebox[0pt]{\scriptsize $j < l$}}{=} -c_1^2 \t{\vec\Bigl(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\Bigr)}\mat{H}_{1,1}\vec\Bigl(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\times_l\d\mat{\alpha}_l\Bigr) \\
&\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \vec\Bigl(\ten{F}_y\mlm{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\times_l\d\mat{\alpha}_l\Bigr) \\ &\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \vec\Bigl(\ten{F}_y\mlm_{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\times_l\d\mat{\alpha}_l\Bigr) \\
&= -c_1^2 \t{\vec\biggl( \d\mat{\alpha}_j \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big)_{(j)} \biggr)} \mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(l)}} \vec\biggl( \d\mat{\alpha}_l \Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big)_{(l)} \biggr) \\ &= -c_1^2 \t{\vec\biggl( \d\mat{\alpha}_j \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big)_{(j)} \biggr)} \mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(l)}} \vec\biggl( \d\mat{\alpha}_l \Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big)_{(l)} \biggr) \\
&\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \t{\mat{K}_{\mat{p},((j,l))}} \vec\biggl( (\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j) \Big( \ten{F}_y\mlm{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k \Big)_{((j,l))} \biggr) \\ &\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \t{\mat{K}_{\mat{p},((j,l))}} \vec\biggl( (\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j) \Big( \ten{F}_y\mlm_{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k \Big)_{((j,l))} \biggr) \\
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big)_{(j)}\otimes\mat{I}_{p_j} \biggr) \mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(l)}} \biggl( \t{\Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big)_{(l)}}\otimes\mat{I}_{p_l} \biggr)\vec{\d\mat{\alpha}_l} \\ &= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big)_{(j)}\otimes\mat{I}_{p_j} \biggr) \mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(l)}} \biggl( \t{\Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big)_{(l)}}\otimes\mat{I}_{p_l} \biggr)\vec{\d\mat{\alpha}_l} \\
&\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \t{\mat{K}_{\mat{p},((j,l))}} \biggl( \t{\Big( \ten{F}_y\mlm{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k \Big)_{((j,l))}}\otimes\mat{I}_{p_j p_l} \biggr) \vec{(\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j)} \\ &\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \t{\mat{K}_{\mat{p},((j,l))}} \biggl( \t{\Big( \ten{F}_y\mlm_{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k \Big)_{((j,l))}}\otimes\mat{I}_{p_j p_l} \biggr) \vec{(\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j)} \\
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big[ \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \vec{\d\mat{\alpha}_l} \\ &= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big[ \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \vec{\d\mat{\alpha}_l} \\
&\qquad + c_1 \vec\biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm{k\neq j,l}\mat{\alpha}_k \Big) \biggr) \vec{(\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j)} \\ &\qquad + c_1 \vec\biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm_{k\neq j,l}\mat{\alpha}_k \Big) \biggr) \vec{(\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j)} \\
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big[ \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \vec{\d\mat{\alpha}_l} \\ &= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big[ \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \vec{\d\mat{\alpha}_l} \\
&\qquad + c_1 \t{(\vec{\d\mat{\alpha}_j})} \biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm{k\neq j,l}\mat{\alpha}_k \Big) \biggr)_{((1,3))} \vec{\d\mat{\alpha}_l} \\ &\qquad + c_1 \t{(\vec{\d\mat{\alpha}_j})} \biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm_{k\neq j,l}\mat{\alpha}_k \Big) \biggr)_{((1,3))} \vec{\d\mat{\alpha}_l} \\
&\qquad \begin{aligned} &\qquad \begin{aligned}
\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\alpha}_j})\t{\partial(\vec{\mat{\alpha}_l})}} &= \Rightarrow \frac{\partial l}{\partial(\vec{\mat{\alpha}_j})\t{\partial(\vec{\mat{\alpha}_l})}} &=
-c_1^2 \biggl( \Big[ \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \\ -c_1^2 \biggl( \Big[ \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \\
&\qquad + c_1 \biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm{k\neq j,l}\mat{\alpha}_k \Big) \biggr)_{((1,3) + [[j > l]])} &\qquad + c_1 \biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm_{k\neq j,l}\mat{\alpha}_k \Big) \biggr)_{((1,3) + [[j > l]])}
\qquad{\color{gray} (p_j q_j \times p_l q_l)} \qquad{\color{gray} (p_j q_j \times p_l q_l)}
\end{aligned} \end{aligned}
\\ \\
&\d^2 l(\mat{\alpha}_j, \mat{\Omega}_l) \\ &\d^2 l(\mat{\alpha}_j, \mat{\Omega}_l) \\
&= -c_1 c_2 \t{\vec\Bigl(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\Bigr)} \mat{H}_{1,2} \vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr) \\ &= -c_1 c_2 \t{\vec\Bigl(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\Bigr)} \mat{H}_{1,2} \vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr) \\
&= -c_1 c_2 \t{\vec\biggl(\d\mat{\alpha}_j\Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big)_{(j)}\biggr)}\mat{K}_{\mat{p},(j)} \t{(\ten{H}_{2,1})_{([2r])}} \vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr) \\ &= -c_1 c_2 \t{\vec\biggl(\d\mat{\alpha}_j\Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big)_{(j)}\biggr)}\mat{K}_{\mat{p},(j)} \t{(\ten{H}_{2,1})_{([2r])}} \vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr) \\
&= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl(\t{\Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big)_{(j)}}\otimes\mat{I}_{p_j}\biggr) \mat{K}_{\mat{p},(j)} \vec\Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm{k\neq l}\t{(\vec{\mat{\Omega}_k})}\times_l\t{(\vec{\d\mat{\Omega}_l})}\Bigr) \\ &= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl(\t{\Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big)_{(j)}}\otimes\mat{I}_{p_j}\biggr) \mat{K}_{\mat{p},(j)} \vec\Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm_{k\neq l}\t{(\vec{\mat{\Omega}_k})}\times_l\t{(\vec{\d\mat{\Omega}_l})}\Bigr) \\
&= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl(\t{\Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big)_{(j)}}\otimes\mat{I}_{p_j}\biggr) \mat{K}_{\mat{p},(j)} \t{\Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr)_{([r])}}\vec{\d\mat{\Omega}_l} \\ &= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl(\t{\Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big)_{(j)}}\otimes\mat{I}_{p_j}\biggr) \mat{K}_{\mat{p},(j)} \t{\Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm_{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr)_{([r])}}\vec{\d\mat{\Omega}_l} \\
&= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl( \Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{(r + 2, 1)} \vec{\d\mat{\Omega}_l} \\ &= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl( \Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm_{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{(r + 2, 1)} \vec{\d\mat{\Omega}_l} \\
&\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\alpha}_j})\t{\partial(\vec{\mat{\Omega}_l})}} = -c_1 c_2 \biggl( \Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{(r + 2, 1)}\mat{D}_{p_l}\t{\mat{D}_{p_l}} &\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\alpha}_j})\t{\partial(\vec{\mat{\Omega}_l})}} = -c_1 c_2 \biggl( \Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm_{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{(r + 2, 1)}\mat{D}_{p_l}\t{\mat{D}_{p_l}}
% \qquad {\color{gray} (p_j q_j \times p_l^2)} % \qquad {\color{gray} (p_j q_j \times p_l^2)}
\\ \\
&\d^2 l(\mat{\Omega}_j) \\ &\d^2 l(\mat{\Omega}_j) \\
&= -c_2^2 \t{\vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr)} \t{(\ten{H}_{2,2})_{([2r],[2r]+2r)}} \vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr) \\ &= -c_2^2 \t{\vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr)} \t{(\ten{H}_{2,2})_{([2r],[2r]+2r)}} \vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr) \\
&= -c_2^2 \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})}\times_{j + r}\t{(\vec{\d\mat{\Omega}_j})} \\ &= -c_2^2 \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm_{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})}\times_{j + r}\t{(\vec{\d\mat{\Omega}_j})} \\
&= -c_2^2 \t{(\vec{\d\mat{\Omega}_j})} \biggl( \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})} \biggr)_{([r])} \vec{\d\mat{\Omega}_j} \\ &= -c_2^2 \t{(\vec{\d\mat{\Omega}_j})} \biggl( \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm_{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})} \biggr)_{([r])} \vec{\d\mat{\Omega}_j} \\
&\qquad\Rightarrow \H l(\mat{\Omega}_j) = -c_2^2 \mat{D}_{p_j}\t{\mat{D}_{p_j}}\biggl( \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})} \biggr)_{([r])}\mat{D}_{p_j}\t{\mat{D}_{p_j}} &\qquad\Rightarrow \H l(\mat{\Omega}_j) = -c_2^2 \mat{D}_{p_j}\t{\mat{D}_{p_j}}\biggl( \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm_{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})} \biggr)_{([r])}\mat{D}_{p_j}\t{\mat{D}_{p_j}}
%\qquad {\color{gray} (p_j^2 \times p_j^2)} %\qquad {\color{gray} (p_j^2 \times p_j^2)}
\\ \\
&\d^2 l(\mat{\Omega}_j, \mat{\Omega}_l) \\ &\d^2 l(\mat{\Omega}_j, \mat{\Omega}_l) \\
@ -573,13 +589,13 @@ The next step is to identify the Hessians from the second differentials in a sim
&\qquad\qquad - c_2 \D b(\mat{\eta}_{y,2})\vec\!\Big(\bigotimes_{k = r}^{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_{l}\otimes\bigotimes_{k = l - 1}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_{j}\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big) \\ &\qquad\qquad - c_2 \D b(\mat{\eta}_{y,2})\vec\!\Big(\bigotimes_{k = r}^{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_{l}\otimes\bigotimes_{k = l - 1}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_{j}\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big) \\
&= c_2 \t{(\vec{\ten{X}}\otimes\vec{\ten{X}} - (\ten{D}_2)_{([2r])})} \vec\Bigl(\bigotimes_{k = r}^{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_{l}\otimes\bigotimes_{k = l - 1}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_{j}\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Bigr) \\ &= c_2 \t{(\vec{\ten{X}}\otimes\vec{\ten{X}} - (\ten{D}_2)_{([2r])})} \vec\Bigl(\bigotimes_{k = r}^{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_{l}\otimes\bigotimes_{k = l - 1}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_{j}\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Bigr) \\
&\qquad - c_2^2 \t{\vec\!\Big(\bigotimes_{k = r}^{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigotimes_{k=l-1}^{1}\mat{\Omega}_k\Big)}\t{(\ten{H}_{2,2})_{([2r],[2r]+2r)}}\vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big) \\ &\qquad - c_2^2 \t{\vec\!\Big(\bigotimes_{k = r}^{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigotimes_{k=l-1}^{1}\mat{\Omega}_k\Big)}\t{(\ten{H}_{2,2})_{([2r],[2r]+2r)}}\vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big) \\
&= c_2 (\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \times_j \t{(\vec{\d\mat{\Omega}_j})} \times_l \t{(\vec{\d\mat{\Omega}_l})} \\ &= c_2 (\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm_{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \times_j \t{(\vec{\d\mat{\Omega}_j})} \times_l \t{(\vec{\d\mat{\Omega}_l})} \\
&\qquad - c_2^2 \ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})} \times_j \t{(\vec{\d\mat{\Omega}_j})} \times_l \t{(\vec{\d\mat{\Omega}_l})} \\ &\qquad - c_2^2 \ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm_{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm_{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})} \times_j \t{(\vec{\d\mat{\Omega}_j})} \times_l \t{(\vec{\d\mat{\Omega}_l})} \\
&= c_2 \t{(\vec{\d\mat{\Omega}_j})}\Big((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)}\vec{\d\mat{\Omega}_l} \\ &= c_2 \t{(\vec{\d\mat{\Omega}_j})}\Big((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm_{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)}\vec{\d\mat{\Omega}_l} \\
&\qquad - c_2^2 \t{(\vec{\d\mat{\Omega}_j})}\Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)}\vec{\d\mat{\Omega}_l} \\ &\qquad - c_2^2 \t{(\vec{\d\mat{\Omega}_j})}\Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm_{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm_{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)}\vec{\d\mat{\Omega}_l} \\
&\qquad \begin{aligned}\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\Omega}_j})\t{\partial(\vec{\mat{\Omega}_l})}} &= &\qquad \begin{aligned}\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\Omega}_j})\t{\partial(\vec{\mat{\Omega}_l})}} &=
\mat{D}_{p_j}\t{\mat{D}_{p_j}}\Big[c_2\Big((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)} \\ \mat{D}_{p_j}\t{\mat{D}_{p_j}}\Big[c_2\Big((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm_{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)} \\
&\qquad -c_2^2 \Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)}\Big]\mat{D}_{p_l}\t{\mat{D}_{p_l}} &\qquad -c_2^2 \Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm_{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm_{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)}\Big]\mat{D}_{p_l}\t{\mat{D}_{p_l}}
% \qquad {\color{gray} (p_j^2 \times p_l^2)} % \qquad {\color{gray} (p_j^2 \times p_l^2)}
\end{aligned} \end{aligned}
\end{align*}}% \end{align*}}%
@ -612,15 +628,15 @@ and for every block holds $\mathcal{I}_{j, l} = \t{\mathcal{I}_{l, j}}$. The ind
\begin{align*} \begin{align*}
\mathcal{I}_{1,1} &= c_1^2 (\ten{H}_{1,1})_{([r])} \\ \mathcal{I}_{1,1} &= c_1^2 (\ten{H}_{1,1})_{([r])} \\
\mathcal{I}_{1,j+1} % = \E\partial_{\vec{\overline{\ten{\eta}}_1}}\partial_{\t{(\vec{\mat{\alpha}_j})}} l(\mat{\Theta})\mid \ten{Y} = y \mathcal{I}_{1,j+1} % = \E\partial_{\vec{\overline{\ten{\eta}}_1}}\partial_{\t{(\vec{\mat{\alpha}_j})}} l(\mat{\Theta})\mid \ten{Y} = y
&= c_1^2 \Big[\Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1}\Big]_{((2, 1))} \\ &= c_1^2 \Big[\Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1}\Big]_{((2, 1))} \\
\mathcal{I}_{1,j+r+1} \mathcal{I}_{1,j+r+1}
&= c_1 c_2 \Big( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)} \\ &= c_1 c_2 \Big( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)} \\
\mathcal{I}_{j+1,l+1} \mathcal{I}_{j+1,l+1}
&= c_1^2 \biggl( \Big[ \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \\ &= c_1^2 \biggl( \Big[ \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \\
\mathcal{I}_{j+1,l+r+1} \mathcal{I}_{j+1,l+r+1}
&= c_1 c_2 \biggl( \Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{((r + 2, 1))} \\ &= c_1 c_2 \biggl( \Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm_{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{((r + 2, 1))} \\
\mathcal{I}_{j+r+1,l+r+1} \mathcal{I}_{j+r+1,l+r+1}
&= c_2^2 \Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)} &= c_2^2 \Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm_{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm_{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)}
\end{align*} \end{align*}
@ -633,14 +649,14 @@ The \emph{matricization} is a generalization of the \emph{vectorization} operati
\begin{theorem}\label{thm:mlm_mat} \begin{theorem}\label{thm:mlm_mat}
Let $\ten{A}$ be a tensor of order $r$ with the dimensions $q_1\times ... \times q_r$. Furthermore, let for $k = 1, ..., r$ be $\mat{B}_k$ matrices of dimensions $p_k\times q_k$. Then, for any $(\mat{i}, \mat{j})\in\perm{r}$ holds Let $\ten{A}$ be a tensor of order $r$ with the dimensions $q_1\times ... \times q_r$. Furthermore, let for $k = 1, ..., r$ be $\mat{B}_k$ matrices of dimensions $p_k\times q_k$. Then, for any $(\mat{i}, \mat{j})\in\perm{r}$ holds
\begin{displaymath} \begin{displaymath}
\Big(\ten{A}\mlm{k\in[r]}\mat{B}_k\Big)_{(\mat{i}, \mat{j})} \Big(\ten{A}\mlm_{k\in[r]}\mat{B}_k\Big)_{(\mat{i}, \mat{j})}
= \Big(\bigotimes_{k = \len{\mat{i}}}^{1}\mat{B}_{\mat{i}_k}\Big) \ten{A}_{(\mat{i}, \mat{j})} \Big(\bigotimes_{k = \len{\mat{j}}}^{1}\t{\mat{B}_{\mat{j}_k}}\Big). = \Big(\bigotimes_{k = \len{\mat{i}}}^{1}\mat{B}_{\mat{i}_k}\Big) \ten{A}_{(\mat{i}, \mat{j})} \Big(\bigotimes_{k = \len{\mat{j}}}^{1}\t{\mat{B}_{\mat{j}_k}}\Big).
\end{displaymath} \end{displaymath}
\end{theorem} \end{theorem}
A well known special case of Theorem~\ref{thm:mlm_mat} is the relation between vectorization and the Kronecker product A well known special case of Theorem~\ref{thm:mlm_mat} is the relation between vectorization and the Kronecker product
\begin{displaymath} \begin{displaymath}
\vec(\mat{B}_1\mat{A}\t{\mat{B}_2}) = (\mat{B}_2\otimes\mat{B}_1)\vec{A}. \vec(\mat{B}_1\mat{A}\t{\mat{B}_2}) = (\mat{B}_2\otimes\mat{B}_1)\vec{\mat{A}}.
\end{displaymath} \end{displaymath}
Here we have a matrix, a.k.a. an order 2 tensor, and the vectorization as a special case of the matricization $\vec{\mat{A}} = \mat{A}_{((1, 2))}$ with $(\mat{i}, \mat{j}) = ((1, 2), ())\in\perm{2}$. Note that the empty Kronecker product is $1$ by convention. Here we have a matrix, a.k.a. an order 2 tensor, and the vectorization as a special case of the matricization $\vec{\mat{A}} = \mat{A}_{((1, 2))}$ with $(\mat{i}, \mat{j}) = ((1, 2), ())\in\perm{2}$. Note that the empty Kronecker product is $1$ by convention.
@ -713,13 +729,37 @@ The operation $\ten{R}_{\mat{i}}(\ten{A})$ results in a tensor of order $r + s$
Let $\ten{A}$ be a $2 r + s$ tensor where $r > 0$ and $s \geq 0$ of dimensions $q_1\times ... \times q_{2 r + s}$. Furthermore, let $(\mat{i}, \mat{j})\in\perm{2 r + s}$ such that $\len{\mat{i}} = 2 r$ and for $k = 1, ..., r$ denote with $\mat{B}_k$ matrices of dimensions $q_{\mat{i}_{k}}\times q_{\mat{i}_{r + k}}$, then Let $\ten{A}$ be a $2 r + s$ tensor where $r > 0$ and $s \geq 0$ of dimensions $q_1\times ... \times q_{2 r + s}$. Furthermore, let $(\mat{i}, \mat{j})\in\perm{2 r + s}$ such that $\len{\mat{i}} = 2 r$ and for $k = 1, ..., r$ denote with $\mat{B}_k$ matrices of dimensions $q_{\mat{i}_{k}}\times q_{\mat{i}_{r + k}}$, then
\begin{displaymath} \begin{displaymath}
\t{\ten{A}_{(\mat{i})}}\vec{\bigotimes_{k = r}^{1}}\mat{B}_k \t{\ten{A}_{(\mat{i})}}\vec{\bigotimes_{k = r}^{1}}\mat{B}_k
\equiv \ten{R}_{\mat{i}}(\ten{A})\times_{k\in[r]}\t{(\vec{\mat{B}_k})}. \equiv \ten{R}_{\mat{i}}(\ten{A})\mlm_{k = 1}^r\t{(\vec{\mat{B}_k})}.
\end{displaymath} \end{displaymath}
\end{theorem} \end{theorem}
A special case of above Theorem is given for tensors represented as a Kronecker product. Therefore, let $\mat{A}_k, \mat{B}_k$ be arbitrary matrices of size $p_k\times q_k$ for $k = 1, ..., r$ and $\ten{A} = \reshape{(\mat{p}, \mat{q})}\bigotimes_{k = r}^{1}\mat{A}_k$. Then Theorem~\ref{thm:mtvk_rearrange} specializes to
\begin{displaymath}
\t{\Big( \vec\bigotimes_{k = r}^{1}\mat{A}_k \Big)}\Big( \vec\bigotimes_{k = r}^{1}\mat{B}_k \Big)
=
\prod_{k = 1}^{r}\tr(\t{\mat{A}_k}\mat{B}_k)
=
\Big( \outer{k = 1}{r}\vec\mat{A}_k \Big)\mlm_{k = 1}^r \t{(\vec\mat{B}_k)}.
\end{displaymath}
In case of $r = 2$ this means
\begin{align*}
\t{\vec(\mat{A}_1\otimes \mat{A}_2)}\vec(\mat{B}_1\otimes \mat{B}_2)
&= \t{(\vec{\mat{B}_1})}(\vec{\mat{A}_1})\t{(\vec{\mat{A}_2})}(\vec{\mat{B}_2}) \\
&= [(\vec{\mat{A}_1})\circ(\vec{\mat{A}_2})]\times_1\t{(\vec{\mat{B}_1})}\times_2\t{(\vec{\mat{B}_2})}.
\end{align*}
Another interesting special case is for two tensors $\ten{A}_1, \ten{A}_2$ of the same order
\begin{displaymath}
\t{(\vec{\ten{A}_1}\otimes\vec{\ten{A}_2})}\vec{\bigotimes_{k = r}^{1}\mat{B}_k}
= (\ten{A}_1\otimes\ten{A}_2)\mlm_{k = 1}^r\t{(\vec{\mat{B}_k})}
\end{displaymath}
which uses the relation $\ten{R}_{[2r]}^{(\mat{p}, \mat{q})}(\vec{\ten{A}_1}\otimes\vec{\ten{A}_2}) = \ten{A}_1\otimes\ten{A}_2$ .
\todo{continue} \todo{continue}
% Next we define a specific axis permutation and reshaping operation on tensors which will be helpful in expressing some derivatives. Let $\ten{A}$ be a $2 r + s$ tensor with $r > 0$ and $s\geq 0$ of dimensions $p_1\times ... \times p_{2 r + s}$. Furthermore, let $(\mat{i}, \mat{j})\in\perm{2 r + s}$ such that $\len{\mat{i}} = 2 r$. The operation $\ten{R}_{\mat{i}}$ is defined as % Next we define a specific axis permutation and reshaping operation on tensors which will be helpful in expressing some derivatives. Let $\ten{A}$ be a $2 r + s$ tensor with $r > 0$ and $s\geq 0$ of dimensions $p_1\times ... \times p_{2 r + s}$. Furthermore, let $(\mat{i}, \mat{j})\in\perm{2 r + s}$ such that $\len{\mat{i}} = 2 r$. The operation $\ten{R}_{\mat{i}}$ is defined as
% \begin{displaymath} % \begin{displaymath}
% \ten{R}_{\mat{i}} = \reshape{(p_1 p_{r + 1}, ..., p_r p_{2 r}, p_{2 r + 1}, ..., p_{r + s})}(\ten{A}_{(\pi(\mat{i}))}) % \ten{R}_{\mat{i}} = \reshape{(p_1 p_{r + 1}, ..., p_r p_{2 r}, p_{2 r + 1}, ..., p_{r + s})}(\ten{A}_{(\pi(\mat{i}))})
@ -880,19 +920,19 @@ The operation $\ten{R}_{\mat{i}}(\ten{A})$ results in a tensor of order $r + s$
\end{tikzpicture} \end{tikzpicture}
\end{center} \end{center}
\newcommand{\somedrawing} { % \newcommand{\somedrawing}{
\coordinate (a) at (-2,-2,-2); % \coordinate (a) at (-2,-2,-2);
\coordinate (b) at (-2,-2,2); % \coordinate (b) at (-2,-2,2);
\coordinate (c) at (-2,2,-2); % \coordinate (c) at (-2,2,-2);
\coordinate (d) at (-2,2,2); % \coordinate (d) at (-2,2,2);
\coordinate (e) at (2,-2,-2); % \coordinate (e) at (2,-2,-2);
\coordinate (f) at (2,-2,2); % \coordinate (f) at (2,-2,2);
\coordinate (g) at (2,2,-2); % \coordinate (g) at (2,2,-2);
\coordinate (h) at (2,2,2); % \coordinate (h) at (2,2,2);
\draw (a)--(b) (a)--(c) (a)--(e) (b)--(d) (b)--(f) (c)--(d) (c)--(g) (d)--(h) (e)--(f) (e)--(g) (f)--(h) (g)--(h); % \draw (a)--(b) (a)--(c) (a)--(e) (b)--(d) (b)--(f) (c)--(d) (c)--(g) (d)--(h) (e)--(f) (e)--(g) (f)--(h) (g)--(h);
\fill (a) circle (0.1cm); % \fill (a) circle (0.1cm);
\fill (d) ++(0.1cm,0.1cm) rectangle ++(-0.2cm,-0.2cm); % \fill (d) ++(0.1cm,0.1cm) rectangle ++(-0.2cm,-0.2cm);
} % }
% \begin{figure}[ht!] % \begin{figure}[ht!]
% \centering % \centering
@ -1153,7 +1193,7 @@ where $\circ$ is the outer product. For example considure two matrices $\mat{A},
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Let $f$ be an $r$ times differentiable function, then Let $f$ be an $r$ times differentiable function, then
\begin{displaymath} \begin{displaymath}
\d^r f(\mat{X}) = \ten{F}(\mat{X})\mlm{k = 1}{r} \vec{\d\mat{X}} \d^r f(\mat{X}) = \ten{F}(\mat{X})\mlm_{k = 1}^{r} \vec{\d\mat{X}}
\qquad\Leftrightarrow\qquad \qquad\Leftrightarrow\qquad
\D^r f(\mat{X}) \equiv \frac{1}{r!}\sum_{\sigma\in\perm{r}}\ten{F}(\mat{X})_{(\sigma)} \D^r f(\mat{X}) \equiv \frac{1}{r!}\sum_{\sigma\in\perm{r}}\ten{F}(\mat{X})_{(\sigma)}
\end{displaymath} \end{displaymath}
@ -1372,12 +1412,12 @@ The differentials up to the 4'th are
\begin{align*} \begin{align*}
\d M(t) &= M(t) \t{(\mu + \Sigma t)} \d{t} \\ \d M(t) &= M(t) \t{(\mu + \Sigma t)} \d{t} \\
\d^2 M(t) &= \t{\d{t}} M(t) (\mu + \Sigma t)\t{(\mu + \Sigma t)} \d{t} \\ \d^2 M(t) &= \t{\d{t}} M(t) (\mu + \Sigma t)\t{(\mu + \Sigma t)} \d{t} \\
\d^3 M(t) &= M(t) (\mu + \Sigma t)\circ [(\mu + \Sigma t)\circ (\mu + \Sigma t) + 3\Sigma]\mlm{k = 1}{3} \d{t} \\ \d^3 M(t) &= M(t) (\mu + \Sigma t)\circ [(\mu + \Sigma t)\circ (\mu + \Sigma t) + 3\Sigma]\mlm_{k = 1}^{3} \d{t} \\
\d^4 M(t) &= M(t) (\mu + \Sigma t)\circ(\mu + \Sigma t)\circ[(\mu + \Sigma t)\circ(\mu + \Sigma t) + 6\Sigma)]\mlm{k = 1}{4} \d{t} \d^4 M(t) &= M(t) (\mu + \Sigma t)\circ(\mu + \Sigma t)\circ[(\mu + \Sigma t)\circ(\mu + \Sigma t) + 6\Sigma)]\mlm_{k = 1}^{4} \d{t}
\end{align*} \end{align*}
Using the differentials to derivative identification identity Using the differentials to derivative identification identity
\begin{displaymath} \begin{displaymath}
\d^m f(t) = \ten{F}(t)\mlm{k = 1}{m}\d{t} \d^m f(t) = \ten{F}(t)\mlm_{k = 1}^{m}\d{t}
\qquad\Leftrightarrow\qquad \qquad\Leftrightarrow\qquad
\D^m f(t) \equiv \frac{1}{m!}\sum_{\sigma\in\mathfrak{S}_m}\ten{F}(t)_{(\sigma)} \D^m f(t) \equiv \frac{1}{m!}\sum_{\sigma\in\mathfrak{S}_m}\ten{F}(t)_{(\sigma)}
\end{displaymath} \end{displaymath}
@ -1388,7 +1428,7 @@ in conjunction with simplifications gives the first four raw moments by evaluati
M_3 = \D^3 M(t)|_{t = 0} &= \mu\circ\mu\circ\mu + \mu\circ\Sigma + (\mu\circ\Sigma)_{((2), (1), (3))} + \Sigma\circ\mu \\ M_3 = \D^3 M(t)|_{t = 0} &= \mu\circ\mu\circ\mu + \mu\circ\Sigma + (\mu\circ\Sigma)_{((2), (1), (3))} + \Sigma\circ\mu \\
M_4 = \D^4 M(t)|_{t = 0} &\equiv \frac{1}{4!}\sum_{\sigma\in\mathfrak{S}_4} (\mu\circ\mu\circ\Sigma + \Sigma\circ\Sigma + \Sigma\circ\mu\circ\mu)_{(\sigma)} M_4 = \D^4 M(t)|_{t = 0} &\equiv \frac{1}{4!}\sum_{\sigma\in\mathfrak{S}_4} (\mu\circ\mu\circ\Sigma + \Sigma\circ\Sigma + \Sigma\circ\mu\circ\mu)_{(\sigma)}
\end{align*} \end{align*}
which leads to the centered moments (which are also the covariances of the sufficient statistic $t(X)$) which leads to the centered moments (which are also the covariance of the sufficient statistic $t(X)$)
\begin{align*} \begin{align*}
H_{1,1} &= \cov(t_1(X)\mid Y = y) \\ H_{1,1} &= \cov(t_1(X)\mid Y = y) \\
&= \Sigma \\ &= \Sigma \\

View File

@ -9,6 +9,17 @@
publisher = {[Royal Statistical Society, Wiley]} publisher = {[Royal Statistical Society, Wiley]}
} }
@inproceedings{Nesterov1983,
title = {A method of solving a convex programming problem with convergence rate $O(1/k^2)$},
author = {Nesterov, Yurii Evgen'evich},
booktitle = {Doklady Akademii Nauk},
volume = {269},
number = {3},
pages = {543--547},
year = {1983},
organization= {Russian Academy of Sciences}
}
@book{StatInf-CasellaBerger2002, @book{StatInf-CasellaBerger2002,
title = {{Statistical Inference}}, title = {{Statistical Inference}},
author = {Casella, George and Berger, Roger L.}, author = {Casella, George and Berger, Roger L.},
@ -27,6 +38,20 @@
isbn = {0-471-98632-1} isbn = {0-471-98632-1}
} }
@article{SymMatandJacobians-MagnusNeudecker1986,
title = {Symmetry, 0-1 Matrices and Jacobians: A Review},
author = {Magnus, Jan R. and Neudecker, Heinz},
ISSN = {02664666, 14694360},
URL = {http://www.jstor.org/stable/3532421},
journal = {Econometric Theory},
number = {2},
pages = {157--190},
publisher = {Cambridge University Press},
urldate = {2023-10-03},
volume = {2},
year = {1986}
}
@book{MatrixAlgebra-AbadirMagnus2005, @book{MatrixAlgebra-AbadirMagnus2005,
title = {Matrix Algebra}, title = {Matrix Algebra},
author = {Abadir, Karim M. and Magnus, Jan R.}, author = {Abadir, Karim M. and Magnus, Jan R.},
@ -83,6 +108,31 @@
doi = {10.1080/01621459.2015.1093944} doi = {10.1080/01621459.2015.1093944}
} }
@article{FisherLectures-Cook2007,
author = {Cook, R. Dennis},
journal = {Statistical Science},
month = {02},
number = {1},
pages = {1--26},
publisher = {The Institute of Mathematical Statistics},
title = {{Fisher Lecture: Dimension Reduction in Regression}},
volume = {22},
year = {2007},
doi = {10.1214/088342306000000682}
}
@article{asymptoticMLE-BuraEtAl2018,
author = {Bura, Efstathia and Duarte, Sabrina and Forzani, Liliana and E. Smucler and M. Sued},
title = {Asymptotic theory for maximum likelihood estimates in reduced-rank multivariate generalized linear models},
journal = {Statistics},
volume = {52},
number = {5},
pages = {1005-1024},
year = {2018},
publisher = {Taylor \& Francis},
doi = {10.1080/02331888.2018.1467420},
}
@article{tsir-DingCook2015, @article{tsir-DingCook2015,
author = {Shanshan Ding and R. Dennis Cook}, author = {Shanshan Ding and R. Dennis Cook},
title = {Tensor sliced inverse regression}, title = {Tensor sliced inverse regression},
@ -117,3 +167,106 @@
isbn = {978-94-015-8196-7}, isbn = {978-94-015-8196-7},
doi = {10.1007/978-94-015-8196-7_17} doi = {10.1007/978-94-015-8196-7_17}
} }
@book{asymStats-van_der_Vaart1998,
title = {Asymptotic Statistics},
author = {{van der Vaart}, A.W.},
series = {Asymptotic Statistics},
year = {1998},
publisher = {Cambridge University Press},
series = {Cambridge Series in Statistical and Probabilistic Mathematics},
isbn = {0-521-49603-9}
}
@book{measureTheory-Kusolitsch2011,
title = {{M}a\ss{}- und {W}ahrscheinlichkeitstheorie},
subtitle = {{E}ine {E}inf{\"u}hrung},
author = {Kusolitsch, Norbert},
series = {Springer-Lehrbuch},
year = {2011},
publisher = {Springer Vienna},
isbn = {978-3-7091-0684-6},
doi = {10.1007/978-3-7091-0685-3}
}
@book{optimMatrixMani-AbsilEtAl2007,
title = {{Optimization Algorithms on Matrix Manifolds}},
author = {Absil, P.-A. and Mahony, R. and Sepulchre, R.},
year = {2007},
publisher = {Princeton University Press},
isbn = {9780691132983},
note = {Full Online Text \url{https://press.princeton.edu/absil}}
}
@Inbook{geomMethodsOnLowRankMat-Uschmajew2020,
author = {Uschmajew, Andr{\'e} and Vandereycken, Bart},
editor = {Grohs, Philipp and Holler, Martin and Weinmann, Andreas},
title = {Geometric Methods on Low-Rank Matrix and Tensor Manifolds},
bookTitle = {Handbook of Variational Methods for Nonlinear Geometric Data},
year = {2020},
publisher = {Springer International Publishing},
address = {Cham},
pages = {261--313},
isbn = {978-3-030-31351-7},
doi = {10.1007/978-3-030-31351-7_9}
}
@book{introToSmoothMani-Lee2012,
title = {Introduction to Smooth Manifolds},
author = {Lee, John M.},
year = {2012},
journal = {Graduate Texts in Mathematics},
publisher = {Springer New York},
doi = {10.1007/978-1-4419-9982-5}
}
@book{introToRiemannianMani-Lee2018,
title = {Introduction to Riemannian Manifolds},
author = {Lee, John M.},
year = {2018},
journal = {Graduate Texts in Mathematics},
publisher = {Springer International Publishing},
doi = {10.1007/978-3-319-91755-9}
}
@misc{MLEonManifolds-HajriEtAl2017,
title = {Maximum Likelihood Estimators on Manifolds},
author = {Hajri, Hatem and Said, Salem and Berthoumieu, Yannick},
year = {2017},
journal = {Lecture Notes in Computer Science},
publisher = {Springer International Publishing},
pages = {692-700},
doi = {10.1007/978-3-319-68445-1_80}
}
@article{relativity-Einstain1916,
author = {Einstein, Albert},
title = {Die Grundlage der allgemeinen Relativitätstheorie},
year = {1916},
journal = {Annalen der Physik},
volume = {354},
number = {7},
pages = {769-822},
doi = {10.1002/andp.19163540702}
}
@article{MultilinearOperators-Kolda2006,
title = {Multilinear operators for higher-order decompositions.},
author = {Kolda, Tamara Gibson},
doi = {10.2172/923081},
url = {https://www.osti.gov/biblio/923081},
place = {United States},
year = {2006},
month = {4},
type = {Technical Report}
}
@book{aufbauAnalysis-kaltenbaeck2021,
title = {Aufbau Analysis},
author = {Kaltenb\"ack, Michael},
isbn = {978-3-88538-127-3},
series = {Berliner Studienreihe zur Mathematik},
edition = {27},
year = {2021},
publisher = {Heldermann Verlag}
}

1479
LaTeX/paper.tex Normal file

File diff suppressed because it is too large Load Diff

151
LaTeX/plots/sim-ising.tex Normal file
View File

@ -0,0 +1,151 @@
%%% R code to generate the input data files from corresponding simulation logs
% R> setwd("~/Work/tensorPredictors")
% R>
% R> for (sim.name in c("2a")) {
% R> pattern <- paste0("sim\\_", sim.name, "\\_ising\\-[0-9T]*\\.csv")
% R> log.file <- sort(
% R> list.files(path = "sim/", pattern = pattern, full.names = TRUE),
% R> decreasing = TRUE
% R> )[[1]]
% R>
% R> sim <- read.csv(log.file)
% R>
% R> aggr <- aggregate(sim[, names(sim) != "sample.size"], list(sample.size = sim$sample.size), mean)
% R>
% R> write.table(aggr, file = paste0("LaTeX/plots/aggr-", sim.name, "-ising.csv"), row.names = FALSE, quote = FALSE)
% R> }
\documentclass[border=0cm]{standalone}
\usepackage{tikz}
\usepackage{pgfplots}
\usepackage{bm}
\definecolor{gmlm}{RGB}{0,0,0}
\definecolor{mgcca}{RGB}{86,180,233}
\definecolor{tsir}{RGB}{0,158,115}
\definecolor{pca}{RGB}{240,228,66}
\definecolor{hopca}{RGB}{230,159,0}
\definecolor{lpca}{RGB}{127,127,127}
\definecolor{clpca}{RGB}{191,191,191}
\pgfplotsset{
every axis/.style={
xtick={100,200,300,500,750},
ymin=-0.05, ymax=1.05,
grid=both,
grid style={gray, dotted}
},
every axis plot/.append style={
mark = *,
mark size = 1pt,
line width=0.8pt
}
}
\tikzset{
legend entry/.style={
mark = *,
mark size = 1pt,
mark indices = {2},
line width=0.8pt
}
}
\begin{document}
\begin{tikzpicture}[>=latex]
\begin{axis}[
name=sim-2a
]
\addplot[color = gmlm, line width=1pt] table[x = sample.size, y = dist.subspace.gmlm] {aggr-1a-normal.csv};
\addplot[color = pca] table[x = sample.size, y = dist.subspace.pca] {aggr-2a-ising.csv};
\addplot[color = hopca] table[x = sample.size, y = dist.subspace.hopca] {aggr-2a-ising.csv};
\addplot[color = tsir] table[x = sample.size, y = dist.subspace.tsir] {aggr-2a-ising.csv};
\addplot[color = mgcca] table[x = sample.size, y = dist.subspace.mgcca] {aggr-2a-ising.csv};
\addplot[color = lpca] table[x = sample.size, y = dist.subspace.lpca] {aggr-2a-ising.csv};
\addplot[color = clpca] table[x = sample.size, y = dist.subspace.clpca] {aggr-2a-ising.csv};
\end{axis}
\node[anchor = base west, yshift = 0.3em] at (sim-2a.north west) {
a: small
};
% \begin{axis}[
% name=sim-1b,
% anchor=north west, at={(sim-2a.right of north east)}, xshift=0.1cm,
% xticklabel=\empty,
% ylabel near ticks, yticklabel pos=right
% ]
% \addplot[color = pca] table[x = sample.size, y = dist.subspace.pca] {aggr-1b-normal.csv};
% \addplot[color = hopca] table[x = sample.size, y = dist.subspace.hopca] {aggr-1b-normal.csv};
% \addplot[color = tsir] table[x = sample.size, y = dist.subspace.tsir] {aggr-1b-normal.csv};
% \addplot[color = mgcca] table[x = sample.size, y = dist.subspace.mgcca] {aggr-1b-normal.csv};
% \addplot[color = gmlm, line width=1pt] table[x = sample.size, y = dist.subspace.gmlm] {aggr-1b-normal.csv};
% \end{axis}
% \node[anchor = base west, yshift = 0.3em] at (sim-1b.north west) {
% b: cubic dependence on $y$
% };
% \begin{axis}[
% name=sim-1c,
% anchor=north west, at={(sim-2a.below south west)}, yshift=-.8em,
% xticklabel=\empty
% ]
% \addplot[color = pca] table[x = sample.size, y = dist.subspace.pca] {aggr-1c-normal.csv};
% \addplot[color = hopca] table[x = sample.size, y = dist.subspace.hopca] {aggr-1c-normal.csv};
% \addplot[color = tsir] table[x = sample.size, y = dist.subspace.tsir] {aggr-1c-normal.csv};
% \addplot[color = mgcca] table[x = sample.size, y = dist.subspace.mgcca] {aggr-1c-normal.csv};
% \addplot[color = gmlm, line width=1pt] table[x = sample.size, y = dist.subspace.gmlm] {aggr-1c-normal.csv};
% \end{axis}
% \node[anchor = base west, yshift = 0.3em] at (sim-1c.north west) {
% c: rank $1$ $\boldsymbol{\beta}$'s
% };
% \begin{axis}[
% name=sim-1d,
% anchor=north west, at={(sim-1c.right of north east)}, xshift=0.1cm,
% ylabel near ticks, yticklabel pos=right
% ]
% \addplot[color = pca] table[x = sample.size, y = dist.subspace.pca] {aggr-1d-normal.csv};
% \addplot[color = hopca] table[x = sample.size, y = dist.subspace.hopca] {aggr-1d-normal.csv};
% \addplot[color = tsir] table[x = sample.size, y = dist.subspace.tsir] {aggr-1d-normal.csv};
% \addplot[color = mgcca] table[x = sample.size, y = dist.subspace.mgcca] {aggr-1d-normal.csv};
% \addplot[color = gmlm, line width=1pt] table[x = sample.size, y = dist.subspace.gmlm] {aggr-1d-normal.csv};
% \end{axis}
% \node[anchor = base west, yshift = 0.3em] at (sim-1d.north west) {
% d: tri-diagonal $\boldsymbol{\Omega}$'s
% };
% \begin{axis}[
% name=sim-1e,
% anchor=north west, at={(sim-1c.below south west)}, yshift=-.8em
% ]
% \addplot[color = pca] table[x = sample.size, y = dist.subspace.pca] {aggr-1e-normal.csv};
% \addplot[color = hopca] table[x = sample.size, y = dist.subspace.hopca] {aggr-1e-normal.csv};
% \addplot[color = tsir] table[x = sample.size, y = dist.subspace.tsir] {aggr-1e-normal.csv};
% \addplot[color = mgcca] table[x = sample.size, y = dist.subspace.mgcca] {aggr-1e-normal.csv};
% \addplot[color = gmlm, line width=1pt] table[x = sample.size, y = dist.subspace.gmlm] {aggr-1e-normal.csv};
% \end{axis}
% \node[anchor = base west, yshift = 0.3em] at (sim-1e.north west) {
% e: missspecified
% };
\matrix[anchor = west] at (sim-2a.right of east) {
\draw[color=gmlm, legend entry, line width=1pt] plot coordinates {(0, 0) (.3, 0) (.6, 0)}; & \node[anchor=west] {GMLM}; \\
\draw[color=tsir, legend entry] plot coordinates {(0, 0) (.3, 0) (.6, 0)}; & \node[anchor=west] {TSIR}; \\
\draw[color=mgcca, legend entry] plot coordinates {(0, 0) (.3, 0) (.6, 0)}; & \node[anchor=west] {MGCCA}; \\
\draw[color=hopca, legend entry] plot coordinates {(0, 0) (.3, 0) (.6, 0)}; & \node[anchor=west] {HOPCA}; \\
\draw[color=pca, legend entry] plot coordinates {(0, 0) (.3, 0) (.6, 0)}; & \node[anchor=west] {PCA}; \\
\draw[color=lpca, legend entry] plot coordinates {(0, 0) (.3, 0) (.6, 0)}; & \node[anchor=west] {LPCA}; \\
\draw[color=clpca, legend entry] plot coordinates {(0, 0) (.3, 0) (.6, 0)}; & \node[anchor=west] {CLPCA}; \\
};
\node[anchor = north] at (current bounding box.south) {Sample Size $n$};
\node[anchor = south, rotate = 90] at (current bounding box.west) {Subspace Distance $d(\boldsymbol{B}, \hat{\boldsymbol{B}})$};
\node[anchor = south, rotate = 270] at (current bounding box.east) {\phantom{Subspace Distance $d(\boldsymbol{B}, \hat{\boldsymbol{B}})$}};
\node[anchor = south, font=\large] at (current bounding box.north) {Tensor Normal Simulation};
\end{tikzpicture}
\end{document}

148
LaTeX/plots/sim-normal.tex Normal file
View File

@ -0,0 +1,148 @@
%%% R code to generate the input data files from corresponding simulation logs
% R> setwd("~/Work/tensorPredictors")
% R>
% R> for (sim.name in c("1a", "1b", "1c", "1d", "1e")) {
% R> pattern <- paste0("sim\\_", sim.name, "\\_normal\\-[0-9T]*\\.csv")
% R> log.file <- sort(
% R> list.files(path = "sim/", pattern = pattern, full.names = TRUE),
% R> decreasing = TRUE
% R> )[[1]]
% R>
% R> sim <- read.csv(log.file)
% R>
% R> aggr <- aggregate(sim[, names(sim) != "sample.size"], list(sample.size = sim$sample.size), mean)
% R>
% R> write.table(aggr, file = paste0("LaTeX/plots/aggr-", sim.name, "-normal.csv"), row.names = FALSE, quote = FALSE)
% R> }
\documentclass[border=0cm]{standalone}
\usepackage{tikz}
\usepackage{pgfplots}
\usepackage{bm}
\definecolor{gmlm}{RGB}{0,0,0}
\definecolor{mgcca}{RGB}{86,180,233}
\definecolor{tsir}{RGB}{0,158,115}
\definecolor{hopca}{RGB}{230,159,0}
\definecolor{pca}{RGB}{240,228,66}
\definecolor{lpca}{RGB}{0,114,178}
\definecolor{clpca}{RGB}{213,94,0}
\pgfplotsset{
every axis/.style={
xtick={100,200,300,500,750},
ymin=-0.05, ymax=1.05,
grid=both,
grid style={gray, dotted}
},
every axis plot/.append style={
mark = *,
mark size = 1pt,
line width=0.8pt
}
}
\tikzset{
legend entry/.style={
mark = *,
mark size = 1pt,
mark indices = {2},
line width=0.8pt
}
}
\begin{document}
\begin{tikzpicture}[>=latex]
\begin{axis}[
name=sim-1a,
xticklabel=\empty
]
\addplot[color = gmlm, line width=1pt] table[x = sample.size, y = dist.subspace.gmlm] {aggr-1a-normal.csv};
\addplot[color = pca] table[x = sample.size, y = dist.subspace.pca] {aggr-1a-normal.csv};
\addplot[color = hopca] table[x = sample.size, y = dist.subspace.hopca] {aggr-1a-normal.csv};
\addplot[color = tsir] table[x = sample.size, y = dist.subspace.tsir] {aggr-1a-normal.csv};
\addplot[color = mgcca] table[x = sample.size, y = dist.subspace.mgcca] {aggr-1a-normal.csv};
\end{axis}
\node[anchor = base west, yshift = 0.3em] at (sim-1a.north west) {
a: linear dependence on $\mathcal{F}_y \equiv y$
};
\begin{axis}[
name=sim-1b,
anchor=north west, at={(sim-1a.right of north east)}, xshift=0.1cm,
xticklabel=\empty,
ylabel near ticks, yticklabel pos=right
]
\addplot[color = pca] table[x = sample.size, y = dist.subspace.pca] {aggr-1b-normal.csv};
\addplot[color = hopca] table[x = sample.size, y = dist.subspace.hopca] {aggr-1b-normal.csv};
\addplot[color = tsir] table[x = sample.size, y = dist.subspace.tsir] {aggr-1b-normal.csv};
\addplot[color = mgcca] table[x = sample.size, y = dist.subspace.mgcca] {aggr-1b-normal.csv};
\addplot[color = gmlm, line width=1pt] table[x = sample.size, y = dist.subspace.gmlm] {aggr-1b-normal.csv};
\end{axis}
\node[anchor = base west, yshift = 0.3em] at (sim-1b.north west) {
b: cubic dependence on $y$
};
\begin{axis}[
name=sim-1c,
anchor=north west, at={(sim-1a.below south west)}, yshift=-.8em,
xticklabel=\empty
]
\addplot[color = pca] table[x = sample.size, y = dist.subspace.pca] {aggr-1c-normal.csv};
\addplot[color = hopca] table[x = sample.size, y = dist.subspace.hopca] {aggr-1c-normal.csv};
\addplot[color = tsir] table[x = sample.size, y = dist.subspace.tsir] {aggr-1c-normal.csv};
\addplot[color = mgcca] table[x = sample.size, y = dist.subspace.mgcca] {aggr-1c-normal.csv};
\addplot[color = gmlm, line width=1pt] table[x = sample.size, y = dist.subspace.gmlm] {aggr-1c-normal.csv};
\end{axis}
\node[anchor = base west, yshift = 0.3em] at (sim-1c.north west) {
c: rank $1$ $\boldsymbol{\beta}$'s
};
\begin{axis}[
name=sim-1d,
anchor=north west, at={(sim-1c.right of north east)}, xshift=0.1cm,
ylabel near ticks, yticklabel pos=right
]
\addplot[color = pca] table[x = sample.size, y = dist.subspace.pca] {aggr-1d-normal.csv};
\addplot[color = hopca] table[x = sample.size, y = dist.subspace.hopca] {aggr-1d-normal.csv};
\addplot[color = tsir] table[x = sample.size, y = dist.subspace.tsir] {aggr-1d-normal.csv};
\addplot[color = mgcca] table[x = sample.size, y = dist.subspace.mgcca] {aggr-1d-normal.csv};
\addplot[color = gmlm, line width=1pt] table[x = sample.size, y = dist.subspace.gmlm] {aggr-1d-normal.csv};
\end{axis}
\node[anchor = base west, yshift = 0.3em] at (sim-1d.north west) {
d: tri-diagonal $\boldsymbol{\Omega}$'s
};
\begin{axis}[
name=sim-1e,
anchor=north west, at={(sim-1c.below south west)}, yshift=-.8em
]
\addplot[color = pca] table[x = sample.size, y = dist.subspace.pca] {aggr-1e-normal.csv};
\addplot[color = hopca] table[x = sample.size, y = dist.subspace.hopca] {aggr-1e-normal.csv};
\addplot[color = tsir] table[x = sample.size, y = dist.subspace.tsir] {aggr-1e-normal.csv};
\addplot[color = mgcca] table[x = sample.size, y = dist.subspace.mgcca] {aggr-1e-normal.csv};
\addplot[color = gmlm, line width=1pt] table[x = sample.size, y = dist.subspace.gmlm] {aggr-1e-normal.csv};
\end{axis}
\node[anchor = base west, yshift = 0.3em] at (sim-1e.north west) {
e: missspecified
};
\matrix[anchor = center] at (sim-1e.right of east -| sim-1d.south) {
\draw[color=gmlm, legend entry, line width=1pt] plot coordinates {(0, 0) (.3, 0) (.6, 0)}; & \node[anchor=west] {GMLM}; \\
\draw[color=tsir, legend entry] plot coordinates {(0, 0) (.3, 0) (.6, 0)}; & \node[anchor=west] {TSIR}; \\
\draw[color=mgcca, legend entry] plot coordinates {(0, 0) (.3, 0) (.6, 0)}; & \node[anchor=west] {MGCCA}; \\
\draw[color=hopca, legend entry] plot coordinates {(0, 0) (.3, 0) (.6, 0)}; & \node[anchor=west] {HOPCA}; \\
\draw[color=pca, legend entry] plot coordinates {(0, 0) (.3, 0) (.6, 0)}; & \node[anchor=west] {PCA}; \\
};
\node[anchor = north] at (current bounding box.south) {Sample Size $n$};
\node[anchor = south, rotate = 90] at (current bounding box.west) {Subspace Distance $d(\boldsymbol{B}, \hat{\boldsymbol{B}})$};
\node[anchor = south, rotate = 270] at (current bounding box.east) {\phantom{Subspace Distance $d(\boldsymbol{B}, \hat{\boldsymbol{B}})$}};
\node[anchor = south, font=\large] at (current bounding box.north) {Tensor Normal Simulation};
\end{tikzpicture}
\end{document}

View File

@ -23,7 +23,7 @@
* *
* with the parameter vector `theta` and a statistic `T` of `y`. The real valued * with the parameter vector `theta` and a statistic `T` of `y`. The real valued
* parameter vector `theta` is of dimension `p (p + 1) / 2` and the statistic * parameter vector `theta` is of dimension `p (p + 1) / 2` and the statistic
* `T` has the same dimensions as a binary vector given by * `T` has the same dimensions as the parameter vector given by
* *
* T(y) = vech(y y'). * T(y) = vech(y y').
* *

View File

@ -1,204 +0,0 @@
library(tensorPredictors)
library(mvbernoulli)
set.seed(161803399, "Mersenne-Twister", "Inversion", "Rejection")
### simulation configuration
file.prefix <- "sim-ising"
reps <- 100 # number of simulation replications
max.iter <- 100 # maximum number of iterations for GMLM
sample.sizes <- c(100, 200, 300, 500, 750) # sample sizes `n`
N <- 2000 # validation set size
p <- c(4, 4) # preditor dimensions (ONLY 4 by 4 allowed!)
q <- c(2, 2) # response dimensions (ONLY 2 by 2 allowed!)
r <- length(p)
# parameter configuration
rho <- -0.55
c1 <- 1
c2 <- 1
# initial consistency checks
stopifnot(exprs = {
r == 2
all.equal(p, c(4, 4))
all.equal(q, c(2, 2))
})
### small helpers
# 270 deg matrix layout rotation (90 deg clockwise)
rot270 <- function(A) t(A)[, rev(seq_len(nrow(A))), drop = FALSE]
# Auto-Regression Covariance Matrix
AR <- function(rho, dim) rho^abs(outer(seq_len(dim), seq_len(dim), `-`))
# Inverse of the AR matrix
AR.inv <- function(rho, dim) {
A <- diag(c(1, rep(rho^2 + 1, dim - 2), 1))
A[abs(.row(dim(A)) - .col(dim(A))) == 1] <- -rho
A / (1 - rho^2)
}
# projection matrix `P_A` as a projection onto the span of `A`
proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A)))
### setup Ising parameters (to get reasonable parameters)
eta1 <- 0
alphas <- Map(function(pj, qj) { # qj ignored, its 2
linspace <- seq(-1, 1, length.out = pj)
matrix(c(linspace, linspace^2), pj, 2)
}, p, q)
Omegas <- Map(AR, dim = p, MoreArgs = list(rho))
# data sampling routine
sample.data <- function(n, eta1, alphas, Omegas, sample.axis = r + 1L) {
# generate response (sample axis is last axis)
y <- runif(n, -1, 1) # Y ~ U[-1, 1]
Fy <- rbind(cos(pi * y), sin(pi * y), -sin(pi * y), cos(pi * y))
dim(Fy) <- c(2, 2, n)
# natural exponential family parameters
eta_y1 <- c1 * (mlm(Fy, alphas) + c(eta1))
eta_y2 <- c2 * Reduce(`%x%`, rev(Omegas))
# conditional Ising model parameters
theta_y <- matrix(rep(vech(eta_y2), n), ncol = n)
ltri <- which(lower.tri(eta_y2, diag = TRUE))
diagonal <- which(diag(TRUE, nrow(eta_y2))[ltri])
theta_y[diagonal, ] <- eta_y1
# Sample X from conditional distribution
X <- apply(theta_y, 2, ising_sample, n = 1)
# convert (from compressed integer vector) to array data
attr(X, "p") <- prod(p)
X <- t(as.mvbmatrix(X))
dim(X) <- c(p, n)
storage.mode(X) <- "double"
# permute axis to requested get the sample axis
if (sample.axis != r + 1L) {
perm <- integer(r + 1L)
perm[sample.axis] <- r + 1L
perm[-sample.axis] <- seq_len(r)
X <- aperm(X, perm)
Fy <- aperm(Fy, perm)
}
list(X = X, Fy = Fy, y = y, sample.axis = sample.axis)
}
### Logging Errors and Warnings
# Register a global warning and error handler for logging warnings/errors with
# current simulation repetition session informatin allowing to reproduce problems
exceptionLogger <- function(ex) {
# retrieve current simulation repetition information
rep.info <- get("rep.info", envir = .GlobalEnv)
# setup an error log file with the same name as `file`
log <- paste0(rep.info$file, ".log")
# Write (append) condition message with reproduction info to the log
cat("\n\n------------------------------------------------------------\n",
sprintf("file <- \"%s\"\nn <- %d\nrep <- %d\n.Random.seed <- c(%s)\n%s\nTraceback:\n",
rep.info$file, rep.info$n, rep.info$rep,
paste(rep.info$.Random.seed, collapse = ","),
as.character.error(ex)
), sep = "", file = log, append = TRUE)
# add Traceback (see: `traceback()` which the following is addapted from)
n <- length(x <- .traceback(NULL, max.lines = -1L))
if (n == 0L) {
cat("No traceback available", "\n", file = log, append = TRUE)
} else {
for (i in 1L:n) {
xi <- x[[i]]
label <- paste0(n - i + 1L, ": ")
m <- length(xi)
srcloc <- if (!is.null(srcref <- attr(xi, "srcref"))) {
srcfile <- attr(srcref, "srcfile")
paste0(" at ", basename(srcfile$filename), "#", srcref[1L])
}
if (isTRUE(attr(xi, "truncated"))) {
xi <- c(xi, " ...")
m <- length(xi)
}
if (!is.null(srcloc)) {
xi[m] <- paste0(xi[m], srcloc)
}
if (m > 1) {
label <- c(label, rep(substr(" ", 1L,
nchar(label, type = "w")), m - 1L))
}
cat(paste0(label, xi), sep = "\n", file = log, append = TRUE)
}
}
}
globalCallingHandlers(list(
message = exceptionLogger, warning = exceptionLogger, error = exceptionLogger
))
### for every sample size
start <- format(Sys.time(), "%Y%m%dT%H%M")
for (n in sample.sizes) {
### write new simulation result file
file <- paste0(paste(file.prefix, start, n, sep = "-"), ".csv")
# CSV header, used to ensure correct value/column mapping when writing to file
header <- outer(
c("dist.subspace", "dist.projection", "error.pred"), # measures
c("gmlm", "pca", "hopca", "tsir"), # methods
paste, sep = ".")
cat(paste0(header, collapse = ","), "\n", sep = "", file = file)
### repeated simulation
for (rep in seq_len(reps)) {
### Repetition session state info
# Stores specific session variables before starting the current
# simulation replication. This allows to log state information which
# can be used to replicate a specific simulation repetition in case of
# errors/warnings from the logs
rep.info <- list(n = n, rep = rep, file = file, .Random.seed = .Random.seed)
### sample (training) data
c(X, Fy, y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas)
### Fit data using different methods
fit.gmlm <- GMLM.default(X, Fy, sample.axis = sample.axis,
max.iter = max.iter, family = "ising")
fit.hopca <- HOPCA(X, npc = q, sample.axis = sample.axis)
fit.pca <- prcomp(mat(X, sample.axis), rank. = prod(q))
fit.tsir <- NA # TSIR(X, y, q, sample.axis = sample.axis)
### Compute Reductions `B.*` where `B.*` spans the reduction subspaces
B.true <- Reduce(`%x%`, rev(alphas))
B.gmlm <- with(fit.gmlm, Reduce(`%x%`, rev(alphas)))
B.hopca <- Reduce(`%x%`, rev(fit.hopca))
B.pca <- fit.pca$rotation
B.tsir <- NA # Reduce(`%x%`, rev(fit.tsir))
# Subspace Distances: Normalized `|| P_A - P_B ||_F` where
# `P_A = A (A' A)^-1/2 A'` and the normalization means that with
# respect to the dimensions of `A, B` the subspace distance is in the
# range `[0, 1]`.
dist.subspace.gmlm <- dist.subspace(B.true, B.gmlm, normalize = TRUE)
dist.subspace.hopca <- dist.subspace(B.true, B.hopca, normalize = TRUE)
dist.subspace.pca <- dist.subspace(B.true, B.pca, normalize = TRUE)
dist.subspace.tsir <- NA # dist.subspace(B.true, B.tsir, normalize = TRUE)
# Projection Distances: Spectral norm (2-norm) `|| P_A - P_B ||_2`.
dist.projection.gmlm <- dist.projection(B.true, B.gmlm)
dist.projection.hopca <- dist.projection(B.true, B.hopca)
dist.projection.pca <- dist.projection(B.true, B.pca)
dist.projection.tsir <- NA # dist.projection(B.true, B.tsir)
### Prediction Errors: (using new independend sample of size `N`)
c(X, Fy, y, sample.axis) %<-% sample.data(N, eta1, alphas, Omegas)
# centered model matrix of vectorized `X`s
vecX <- scale(mat(X, sample.axis), center = TRUE, scale = FALSE)
P.true <- proj(B.true)
error.pred.gmlm <- norm(P.true - proj(B.gmlm), "2")
error.pred.hopca <- norm(P.true - proj(B.hopca), "2")
error.pred.pca <- norm(P.true - proj(B.pca), "2")
error.pred.tsir <- NA # norm(P.true - proj(B.tsir), "2")
# format estimation/prediction errors and write to file and console
line <- paste0(Map(get, header), collapse = ",")
cat(line, "\n", sep = "", file = file, append = TRUE)
# report progress
cat(sprintf("sample size: %d/%d - rep: %d/%d\n",
which(n == sample.sizes), length(sample.sizes), rep, reps))
}
}

View File

@ -1,134 +0,0 @@
library(tensorPredictors)
library(mvbernoulli)
set.seed(141421356, "Mersenne-Twister", "Inversion", "Rejection")
### simulation configuration
reps <- 100 # number of simulation replications
max.iter <- 1000 # maximum number of iterations for GMLM
n <- 100 # sample sizes `n`
N <- 2000 # validation set size
p <- c(4, 4) # preditor dimensions (ONLY 4 by 4 allowed!)
q <- c(2, 2) # response dimensions (ONLY 2 by 2 allowed!)
r <- length(p)
# parameter configuration
rho <- -0.55
c1 <- 1
c2 <- 1
# initial consistency checks
stopifnot(exprs = {
r == 2
all.equal(p, c(4, 4))
all.equal(q, c(2, 2))
})
### small helpers
# 270 deg matrix layout rotation (90 deg clockwise)
rot270 <- function(A) t(A)[, rev(seq_len(nrow(A))), drop = FALSE]
# Auto-Regression Covariance Matrix
AR <- function(rho, dim) rho^abs(outer(seq_len(dim), seq_len(dim), `-`))
# Inverse of the AR matrix
AR.inv <- function(rho, dim) {
A <- diag(c(1, rep(rho^2 + 1, dim - 2), 1))
A[abs(.row(dim(A)) - .col(dim(A))) == 1] <- -rho
A / (1 - rho^2)
}
# projection matrix `P_A` as a projection onto the span of `A`
proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A)))
### setup Ising parameters (to get reasonable parameters)
eta1 <- 0
# alphas <- Map(function(pj, qj) { # qj ignored, its 2
# linspace <- seq(-1, 1, length.out = pj)
# matrix(c(linspace, rev(linspace)), pj, 2)
# }, p, q)
alphas <- Map(function(pj, qj) { # qj ignored, its 2
linspace <- seq(-1, 1, length.out = pj)
matrix(c(linspace, linspace^2), pj, 2)
}, p, q)
# alphas <- Map(function(pj, qj) {
# qr.Q(qr(matrix(rnorm(pj * qj), pj, qj)))
# }, p, q)
Omegas <- Map(AR, dim = p, MoreArgs = list(rho))
# data sampling routine
sample.data <- function(n, eta1, alphas, Omegas, sample.axis = r + 1L) {
# generate response (sample axis is last axis)
y <- runif(n, -1, 1) # Y ~ U[-1, 1]
Fy <- rbind(cos(pi * y), sin(pi * y), -sin(pi * y), cos(pi * y))
dim(Fy) <- c(2, 2, n)
# natural exponential family parameters
eta_y1 <- c1 * (mlm(Fy, alphas) + c(eta1))
eta_y2 <- c2 * Reduce(`%x%`, rev(Omegas))
# conditional Ising model parameters
theta_y <- matrix(rep(vech(eta_y2), n), ncol = n)
ltri <- which(lower.tri(eta_y2, diag = TRUE))
diagonal <- which(diag(TRUE, nrow(eta_y2))[ltri])
theta_y[diagonal, ] <- eta_y1
# Sample X from conditional distribution
X <- apply(theta_y, 2, ising_sample, n = 1)
# convert (from compressed integer vector) to array data
attr(X, "p") <- prod(p)
X <- t(as.mvbmatrix(X))
dim(X) <- c(p, n)
storage.mode(X) <- "double"
# permute axis to requested get the sample axis
if (sample.axis != r + 1L) {
perm <- integer(r + 1L)
perm[sample.axis] <- r + 1L
perm[-sample.axis] <- seq_len(r)
X <- aperm(X, perm)
Fy <- aperm(Fy, perm)
}
list(X = X, Fy = Fy, y = y, sample.axis = sample.axis)
}
### sample (training) data
c(X, Fy, y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas)
### Fit data using GMLM with logging
# logger to log iterative change in the estimation process of GMLM
# log <- data.frame()
log.likelihood <- tensorPredictors:::make.gmlm.family("ising")$log.likelihood
B.true <- Reduce(`%x%`, rev(alphas))
logger <- function(iter, eta1.est, alphas.est, Omegas.est) {
B.est <- Reduce(`%x%`, rev(alphas.est))
err.alphas <- mapply(dist.subspace, alphas, alphas.est, MoreArgs = list(normalize = TRUE))
err.Omegas <- mapply(norm, Map(`-`, Omegas, Omegas.est), MoreArgs = list(type = "F"))
if (iter > 0) { cat("\033[9A") }
cat(sprintf("\n\033[2mIter: loss - dist\n\033[0m%4d: %8.3f - %8.3f",
iter,
log.likelihood(X, Fy, eta1.est, alphas.est, Omegas.est),
dist.subspace(B.true, B.est, normalize = TRUE)
),
"\033[2mMSE eta1\033[0m",
mean((eta1 - eta1.est)^2),
"\033[2msubspace distances of alphas\033[0m",
do.call(paste, Map(sprintf, err.alphas, MoreArgs = list(fmt = "%8.3f"))),
"\033[2mFrob. norm of Omega differences\033[0m",
do.call(paste, Map(sprintf, err.Omegas, MoreArgs = list(fmt = "%8.3f"))),
sep = "\n "
)
}
# now call the GMLM fitting routine with performance profiling
tryCatch({
system.time( # profvis::profvis(
fit.gmlm <- GMLM.default(
X, Fy, sample.axis = sample.axis, max.iter = max.iter,
family = "ising", logger = logger
)
)
}, error = function(ex) {
print(ex)
traceback()
})

View File

@ -1,207 +0,0 @@
library(tensorPredictors)
library(mvbernoulli)
# seed = first 8 digits Euler's constant gamma = 0.57721 56649 01532 86060
set.seed(57721566, "Mersenne-Twister", "Inversion", "Rejection")
### simulation configuration
file.prefix <- "sim-ising-small"
reps <- 100 # number of simulation replications
max.iter <- 1000 # maximum number of iterations for GMLM
sample.sizes <- c(100, 200, 300, 500, 750) # sample sizes `n`
N <- 2000 # validation set size
p <- c(2, 3) # preditor dimensions
q <- c(1, 1) # response dimensions
r <- length(p)
# parameter configuration
rho <- -0.55
c1 <- 1
c2 <- 1
# initial consistency checks
stopifnot(exprs = {
r == 2
length(p) == r
all(q == 1)
})
### small helpers
# 270 deg matrix layout rotation (90 deg clockwise)
rot270 <- function(A) t(A)[, rev(seq_len(nrow(A))), drop = FALSE]
# Auto-Regression Covariance Matrix
AR <- function(rho, dim) rho^abs(outer(seq_len(dim), seq_len(dim), `-`))
# Inverse of the AR matrix
AR.inv <- function(rho, dim) {
A <- diag(c(1, rep(rho^2 + 1, dim - 2), 1))
A[abs(.row(dim(A)) - .col(dim(A))) == 1] <- -rho
A / (1 - rho^2)
}
# projection matrix `P_A` as a projection onto the span of `A`
proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A)))
### setup Ising parameters (to get reasonable parameters)
eta1 <- 0
alphas <- Map(function(pj, qj) {
data <- linspace <- seq(-1, 1, len = pj)
for (k in (seq_len(qj - 1) + 1)) {
data <- c(data, linspace^k)
}
matrix(data, nrow = pj)
}, p, q)
Omegas <- Map(AR, dim = p, MoreArgs = list(rho))
# data sampling routine
sample.data <- function(n, eta1, alphas, Omegas, sample.axis = r + 1L) {
# generate response (sample axis is last axis)
y <- runif(n, -1, 1) # Y ~ U[-1, 1]
Fy <- array(sin(pi * y), dim = c(q, n))
# natural exponential family parameters
eta_y1 <- c1 * (mlm(Fy, alphas) + c(eta1))
eta_y2 <- c2 * Reduce(`%x%`, rev(Omegas))
# conditional Ising model parameters
theta_y <- matrix(rep(vech(eta_y2), n), ncol = n)
ltri <- which(lower.tri(eta_y2, diag = TRUE))
diagonal <- which(diag(TRUE, nrow(eta_y2))[ltri])
theta_y[diagonal, ] <- eta_y1
# Sample X from conditional distribution
X <- apply(theta_y, 2, ising_sample, n = 1)
# convert (from compressed integer vector) to array data
attr(X, "p") <- prod(p)
X <- t(as.mvbmatrix(X))
dim(X) <- c(p, n)
storage.mode(X) <- "double"
# permute axis to requested get the sample axis
if (sample.axis != r + 1L) {
perm <- integer(r + 1L)
perm[sample.axis] <- r + 1L
perm[-sample.axis] <- seq_len(r)
X <- aperm(X, perm)
Fy <- aperm(Fy, perm)
}
list(X = X, Fy = Fy, y = y, sample.axis = sample.axis)
}
### Logging Errors and Warnings
# Register a global warning and error handler for logging warnings/errors with
# current simulation repetition session informatin allowing to reproduce problems
exceptionLogger <- function(ex) {
# retrieve current simulation repetition information
rep.info <- get("rep.info", envir = .GlobalEnv)
# setup an error log file with the same name as `file`
log <- paste0(rep.info$file, ".log")
# Write (append) condition message with reproduction info to the log
cat("\n\n------------------------------------------------------------\n",
sprintf("file <- \"%s\"\nn <- %d\nrep <- %d\n.Random.seed <- c(%s)\n%s\nTraceback:\n",
rep.info$file, rep.info$n, rep.info$rep,
paste(rep.info$.Random.seed, collapse = ","),
as.character.error(ex)
), sep = "", file = log, append = TRUE)
# add Traceback (see: `traceback()` which the following is addapted from)
n <- length(x <- .traceback(NULL, max.lines = -1L))
if (n == 0L) {
cat("No traceback available", "\n", file = log, append = TRUE)
} else {
for (i in 1L:n) {
xi <- x[[i]]
label <- paste0(n - i + 1L, ": ")
m <- length(xi)
srcloc <- if (!is.null(srcref <- attr(xi, "srcref"))) {
srcfile <- attr(srcref, "srcfile")
paste0(" at ", basename(srcfile$filename), "#", srcref[1L])
}
if (isTRUE(attr(xi, "truncated"))) {
xi <- c(xi, " ...")
m <- length(xi)
}
if (!is.null(srcloc)) {
xi[m] <- paste0(xi[m], srcloc)
}
if (m > 1) {
label <- c(label, rep(substr(" ", 1L,
nchar(label, type = "w")), m - 1L))
}
cat(paste0(label, xi), sep = "\n", file = log, append = TRUE)
}
}
}
globalCallingHandlers(list(
message = exceptionLogger, warning = exceptionLogger, error = exceptionLogger
))
### for every sample size
start <- format(Sys.time(), "%Y%m%dT%H%M")
for (n in sample.sizes) {
### write new simulation result file
file <- paste0(paste(file.prefix, start, n, sep = "-"), ".csv")
# CSV header, used to ensure correct value/column mapping when writing to file
header <- outer(
c("dist.subspace", "dist.projection", "error.pred"), # measures
c("gmlm", "pca", "hopca", "tsir"), # methods
paste, sep = ".")
cat(paste0(header, collapse = ","), "\n", sep = "", file = file)
### repeated simulation
for (rep in seq_len(reps)) {
### Repetition session state info
# Stores specific session variables before starting the current
# simulation replication. This allows to log state information which
# can be used to replicate a specific simulation repetition in case of
# errors/warnings from the logs
rep.info <- list(n = n, rep = rep, file = file, .Random.seed = .Random.seed)
### sample (training) data
c(X, Fy, y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas)
### Fit data using different methods
fit.gmlm <- GMLM.default(X, Fy, sample.axis = sample.axis,
max.iter = max.iter, family = "ising")
fit.hopca <- HOPCA(X, npc = q, sample.axis = sample.axis)
fit.pca <- prcomp(mat(X, sample.axis), rank. = prod(q))
fit.tsir <- TSIR(X, y, q, sample.axis = sample.axis)
### Compute Reductions `B.*` where `B.*` spans the reduction subspaces
B.true <- Reduce(`%x%`, rev(alphas))
B.gmlm <- with(fit.gmlm, Reduce(`%x%`, rev(alphas)))
B.hopca <- Reduce(`%x%`, rev(fit.hopca))
B.pca <- fit.pca$rotation
B.tsir <- Reduce(`%x%`, rev(fit.tsir))
# Subspace Distances: Normalized `|| P_A - P_B ||_F` where
# `P_A = A (A' A)^-1/2 A'` and the normalization means that with
# respect to the dimensions of `A, B` the subspace distance is in the
# range `[0, 1]`.
dist.subspace.gmlm <- dist.subspace(B.true, B.gmlm, normalize = TRUE)
dist.subspace.hopca <- dist.subspace(B.true, B.hopca, normalize = TRUE)
dist.subspace.pca <- dist.subspace(B.true, B.pca, normalize = TRUE)
dist.subspace.tsir <- dist.subspace(B.true, B.tsir, normalize = TRUE)
# Projection Distances: Spectral norm (2-norm) `|| P_A - P_B ||_2`.
dist.projection.gmlm <- dist.projection(B.true, B.gmlm)
dist.projection.hopca <- dist.projection(B.true, B.hopca)
dist.projection.pca <- dist.projection(B.true, B.pca)
dist.projection.tsir <- dist.projection(B.true, B.tsir)
### Prediction Errors: (using new independend sample of size `N`)
c(X, Fy, y, sample.axis) %<-% sample.data(N, eta1, alphas, Omegas)
# centered model matrix of vectorized `X`s
vecX <- scale(mat(X, sample.axis), center = TRUE, scale = FALSE)
P.true <- proj(B.true)
error.pred.gmlm <- norm(P.true - proj(B.gmlm), "2")
error.pred.hopca <- norm(P.true - proj(B.hopca), "2")
error.pred.pca <- norm(P.true - proj(B.pca), "2")
error.pred.tsir <- norm(P.true - proj(B.tsir), "2")
# format estimation/prediction errors and write to file and console
line <- paste0(Map(get, header), collapse = ",")
cat(line, "\n", sep = "", file = file, append = TRUE)
# report progress
cat(sprintf("sample size: %d/%d - rep: %d/%d\n",
which(n == sample.sizes), length(sample.sizes), rep, reps))
}
}

View File

@ -1,131 +0,0 @@
library(tensorPredictors)
library(mvbernoulli)
# seed = leaf node count of a full chess search tree of depth 6 from the start pos
# > position startpos
# > go perft 6
set.seed(119060324, "Mersenne-Twister", "Inversion", "Rejection")
### simulation configuration
reps <- 100 # number of simulation replications
max.iter <- 1000 # maximum number of iterations for GMLM
n <- 100 # sample sizes `n`
N <- 2000 # validation set size
p <- c(2, 3) # preditor dimensions
q <- c(1, 1) # response dimensions
r <- length(p)
# parameter configuration
rho <- -0.55
c1 <- 1
c2 <- 1
# initial consistency checks
stopifnot(exprs = {
r == 2
length(p) == r
all(q == 1)
})
### small helpers
# 270 deg matrix layout rotation (90 deg clockwise)
rot270 <- function(A) t(A)[, rev(seq_len(nrow(A))), drop = FALSE]
# Auto-Regression Covariance Matrix
AR <- function(rho, dim) rho^abs(outer(seq_len(dim), seq_len(dim), `-`))
# Inverse of the AR matrix
AR.inv <- function(rho, dim) {
A <- diag(c(1, rep(rho^2 + 1, dim - 2), 1))
A[abs(.row(dim(A)) - .col(dim(A))) == 1] <- -rho
A / (1 - rho^2)
}
# projection matrix `P_A` as a projection onto the span of `A`
proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A)))
### setup Ising parameters (to get reasonable parameters)
eta1 <- 0
alphas <- Map(function(pj, qj) {
data <- linspace <- seq(-1, 1, len = pj)
for (k in (seq_len(qj - 1) + 1)) {
data <- c(data, linspace^k)
}
matrix(data, nrow = pj)
}, p, q)
Omegas <- Map(AR, dim = p, MoreArgs = list(rho))
# data sampling routine
sample.data <- function(n, eta1, alphas, Omegas, sample.axis = r + 1L) {
# generate response (sample axis is last axis)
y <- runif(n, -1, 1) # Y ~ U[-1, 1]
Fy <- array(sin(pi * y), dim = c(q, n))
# natural exponential family parameters
eta_y1 <- c1 * (mlm(Fy, alphas) + c(eta1))
eta_y2 <- c2 * Reduce(`%x%`, rev(Omegas))
# conditional Ising model parameters
theta_y <- matrix(rep(vech(eta_y2), n), ncol = n)
ltri <- which(lower.tri(eta_y2, diag = TRUE))
diagonal <- which(diag(TRUE, nrow(eta_y2))[ltri])
theta_y[diagonal, ] <- eta_y1
# Sample X from conditional distribution
X <- apply(theta_y, 2, ising_sample, n = 1)
# convert (from compressed integer vector) to array data
attr(X, "p") <- prod(p)
X <- t(as.mvbmatrix(X))
dim(X) <- c(p, n)
storage.mode(X) <- "double"
# permute axis to requested get the sample axis
if (sample.axis != r + 1L) {
perm <- integer(r + 1L)
perm[sample.axis] <- r + 1L
perm[-sample.axis] <- seq_len(r)
X <- aperm(X, perm)
Fy <- aperm(Fy, perm)
}
list(X = X, Fy = Fy, y = y, sample.axis = sample.axis)
}
# logger to log iterative change in the estimation process of GMLM
# log <- data.frame()
log.likelihood <- tensorPredictors:::make.gmlm.family("ising")$log.likelihood
B.true <- Reduce(`%x%`, rev(alphas))
logger <- function(iter, eta1.est, alphas.est, Omegas.est) {
B.est <- Reduce(`%x%`, rev(alphas.est))
err.alphas <- mapply(dist.subspace, alphas, alphas.est, MoreArgs = list(normalize = TRUE))
err.Omegas <- mapply(norm, Map(`-`, Omegas, Omegas.est), MoreArgs = list(type = "F"))
if (iter > 0) { cat("\033[9A") }
cat(sprintf("\n\033[2mIter: loss - dist\n\033[0m%4d: %8.3f - %8.3f",
iter,
log.likelihood(X, Fy, eta1.est, alphas.est, Omegas.est),
dist.subspace(B.true, B.est, normalize = TRUE)
),
"\033[2mMSE eta1\033[0m",
mean((eta1 - eta1.est)^2),
"\033[2msubspace distances of alphas\033[0m",
do.call(paste, Map(sprintf, err.alphas, MoreArgs = list(fmt = "%8.3f"))),
"\033[2mFrob. norm of Omega differences\033[0m",
do.call(paste, Map(sprintf, err.Omegas, MoreArgs = list(fmt = "%8.3f"))),
sep = "\n "
)
}
### sample (training) data
c(X, Fy, y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas)
# now call the GMLM fitting routine with performance profiling
tryCatch({
system.time( # profvis::profvis(
fit.gmlm <- GMLM.default(
X, Fy, sample.axis = sample.axis, max.iter = max.iter,
family = "ising", logger = logger
)
)
}, error = function(ex) {
print(ex)
traceback()
})

View File

@ -1,171 +0,0 @@
library(tensorPredictors)
set.seed(314159265, "Mersenne-Twister", "Inversion", "Rejection")
### simulation configuration
file.prefix <- "sim-normal"
reps <- 100 # number of simulation replications
max.iter <- 10000 # maximum number of iterations for GMLM
sample.sizes <- c(100, 200, 300, 500, 750) # sample sizes `n`
N <- 2000 # validation set size
p <- c(2, 3, 5) # preditor dimensions
q <- c(1, 2, 3) # functions of y dimensions (response dimensions)
r <- length(p)
# initial consistency checks
stopifnot(exprs = {
r == length(p)
r == length(q)
all(outer(p, sample.sizes, `<`))
})
# projection matrix `P_A` as a projection onto the span of `A`
proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A)))
# setup model parameters
alphas <- Map(matrix, Map(rnorm, p * q), p) # reduction matrices
Omegas <- Map(function(pj) 0.5^abs(outer(1:pj, 1:pj, `-`)), p) # mode scatter
eta1 <- 0 # intercept
# data sampling routine
sample.data <- function(n, eta1, alphas, Omegas, sample.axis = r + 1L) {
# generate response (sample axis is last axis)
y <- sample.int(prod(q), n, replace = TRUE) # uniform samples
Fy <- array(outer(seq_len(prod(q)), y, `==`), dim = c(q, n))
Fy <- Fy - c(rowMeans(Fy, dims = r))
# sample predictors as X | Y = y (sample axis is last axis)
Deltas <- Map(solve, Omegas) # normal covariances
mu_y <- mlm(mlm(Fy, alphas) + c(eta1), Deltas) # conditional mean
X <- mu_y + rtensornorm(n, 0, Deltas, r + 1L) # responses X
# permute axis to requested get the sample axis
if (sample.axis != r + 1L) {
perm <- integer(r + 1L)
perm[sample.axis] <- r + 1L
perm[-sample.axis] <- seq_len(r)
X <- aperm(X, perm)
Fy <- aperm(Fy, perm)
}
list(X = X, Fy = Fy, y = y, sample.axis = sample.axis)
}
### Logging Errors and Warnings
# Register a global warning and error handler for logging warnings/errors with
# current simulation repetition session informatin allowing to reproduce problems
exceptionLogger <- function(ex) {
# retrieve current simulation repetition information
rep.info <- get("rep.info", envir = .GlobalEnv)
# setup an error log file with the same name as `file`
log <- paste0(rep.info$file, ".log")
# Write (append) condition message with reproduction info to the log
cat("\n\n------------------------------------------------------------\n",
sprintf("file <- \"%s\"\nn <- %d\nrep <- %d\n.Random.seed <- c(%s)\n%s\nTraceback:\n",
rep.info$file, rep.info$n, rep.info$rep,
paste(rep.info$.Random.seed, collapse = ","),
as.character.error(ex)
), sep = "", file = log, append = TRUE)
# add Traceback (see: `traceback()` which the following is addapted from)
n <- length(x <- .traceback(NULL, max.lines = -1L))
if (n == 0L) {
cat("No traceback available", "\n", file = log, append = TRUE)
} else {
for (i in 1L:n) {
xi <- x[[i]]
label <- paste0(n - i + 1L, ": ")
m <- length(xi)
srcloc <- if (!is.null(srcref <- attr(xi, "srcref"))) {
srcfile <- attr(srcref, "srcfile")
paste0(" at ", basename(srcfile$filename), "#", srcref[1L])
}
if (isTRUE(attr(xi, "truncated"))) {
xi <- c(xi, " ...")
m <- length(xi)
}
if (!is.null(srcloc)) {
xi[m] <- paste0(xi[m], srcloc)
}
if (m > 1) {
label <- c(label, rep(substr(" ", 1L,
nchar(label, type = "w")), m - 1L))
}
cat(paste0(label, xi), sep = "\n", file = log, append = TRUE)
}
}
}
globalCallingHandlers(list(
message = exceptionLogger, warning = exceptionLogger, error = exceptionLogger
))
### for every sample size
start <- format(Sys.time(), "%Y%m%dT%H%M")
for (n in sample.sizes) {
### write new simulation result file
file <- paste0(paste(file.prefix, start, n, sep = "-"), ".csv")
# CSV header, used to ensure correct value/column mapping when writing to file
header <- outer(
c("dist.subspace", "dist.projection", "error.pred"), # measures
c("gmlm", "pca", "hopca", "tsir"), # methods
paste, sep = ".")
cat(paste0(header, collapse = ","), "\n", sep = "", file = file)
### repeated simulation
for (rep in seq_len(reps)) {
### Repetition session state info
# Stores specific session variables before starting the current
# simulation replication. This allows to log state information which
# can be used to replicate a specific simulation repetition in case of
# errors/warnings from the logs
rep.info <- list(n = n, rep = rep, file = file, .Random.seed = .Random.seed)
### sample (training) data
c(X, Fy, y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas)
### Fit data using different methods
fit.gmlm <- GMLM.default(X, Fy, sample.axis = sample.axis, max.iter = max.iter)
fit.hopca <- HOPCA(X, npc = q, sample.axis = sample.axis)
fit.pca <- prcomp(mat(X, sample.axis), rank. = prod(q))
fit.tsir <- TSIR(X, y, q, sample.axis = sample.axis)
### Compute Reductions `B.*` where `B.*` spans the reduction subspaces
B.true <- Reduce(`%x%`, rev(alphas))
B.gmlm <- with(fit.gmlm, Reduce(`%x%`, rev(alphas)))
B.hopca <- Reduce(`%x%`, rev(fit.hopca))
B.pca <- fit.pca$rotation
B.tsir <- Reduce(`%x%`, rev(fit.tsir))
# Subspace Distances: Normalized `|| P_A - P_B ||_F` where
# `P_A = A (A' A)^-1/2 A'` and the normalization means that with
# respect to the dimensions of `A, B` the subspace distance is in the
# range `[0, 1]`.
dist.subspace.gmlm <- dist.subspace(B.true, B.gmlm, normalize = TRUE)
dist.subspace.hopca <- dist.subspace(B.true, B.hopca, normalize = TRUE)
dist.subspace.pca <- dist.subspace(B.true, B.pca, normalize = TRUE)
dist.subspace.tsir <- dist.subspace(B.true, B.tsir, normalize = TRUE)
# Projection Distances: Spectral norm (2-norm) `|| P_A - P_B ||_2`.
dist.projection.gmlm <- dist.projection(B.true, B.gmlm)
dist.projection.hopca <- dist.projection(B.true, B.hopca)
dist.projection.pca <- dist.projection(B.true, B.pca)
dist.projection.tsir <- dist.projection(B.true, B.tsir)
### Prediction Errors: (using new independend sample of size `N`)
c(X, Fy, y, sample.axis) %<-% sample.data(N, eta1, alphas, Omegas)
# centered model matrix of vectorized `X`s
vecX <- scale(mat(X, sample.axis), center = TRUE, scale = FALSE)
P.true <- proj(B.true)
error.pred.gmlm <- norm(P.true - proj(B.gmlm), "2")
error.pred.hopca <- norm(P.true - proj(B.hopca), "2")
error.pred.pca <- norm(P.true - proj(B.pca), "2")
error.pred.tsir <- norm(P.true - proj(B.tsir), "2")
# format estimation/prediction errors and write to file and console
line <- paste0(Map(get, header), collapse = ",")
cat(line, "\n", sep = "", file = file, append = TRUE)
# report progress
cat(sprintf("sample size: %d/%d - rep: %d/%d\n",
which(n == sample.sizes), length(sample.sizes), rep, reps))
}
}

View File

@ -1,96 +0,0 @@
library(tensorPredictors)
set.seed(271828183, "Mersenne-Twister", "Inversion", "Rejection")
### simulation configuration
reps <- 100 # number of simulation replications
n <- 100 # sample sizes `n`
N <- 2000 # validation set size
p <- c(2, 3, 5) # preditor dimensions
q <- c(1, 2, 3) # functions of y dimensions (response dimensions)
# initial consistency checks
stopifnot(exprs = {
length(p) == length(q)
})
# setup model parameters
alphas <- Map(matrix, Map(rnorm, p * q), p) # reduction matrices
Omegas <- Map(function(pj) 0.5^abs(outer(1:pj, 1:pj, `-`)), p) # mode scatter
eta1 <- 0 # intercept
# data sampling routine
sample.data <- function(n, eta1, alphas, Omegas, sample.axis = length(alphas) + 1L) {
r <- length(alphas) # tensor order
# generate response (sample axis is last axis)
y <- sample.int(prod(q), n, replace = TRUE) # uniform samples
Fy <- array(outer(seq_len(prod(q)), y, `==`), dim = c(q, n))
Fy <- Fy - c(rowMeans(Fy, dims = r))
# sample predictors as X | Y = y (sample axis is last axis)
Deltas <- Map(solve, Omegas) # normal covariances
mu_y <- mlm(mlm(Fy, alphas) + c(eta1), Deltas) # conditional mean
X <- mu_y + rtensornorm(n, 0, Deltas, r + 1L) # responses X
# permute axis to requested get the sample axis
if (sample.axis != r + 1L) {
perm <- integer(r + 1L)
perm[sample.axis] <- r + 1L
perm[-sample.axis] <- seq_len(r)
X <- aperm(X, perm)
Fy <- aperm(Fy, perm)
}
list(X = X, Fy = Fy, y = y, sample.axis = sample.axis)
}
### sample (training) data
c(X, Fy, y = y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas)
### Fit data using GMLM with logging
# logger to log iterative change in the estimation process of GMLM
# log <- data.frame()
log.likelihood <- tensorPredictors:::make.gmlm.family("normal")$log.likelihood
B.true <- Reduce(`%x%`, rev(alphas))
logger <- function(iter, eta1.est, alphas.est, Omegas.est) {
B.est <- Reduce(`%x%`, rev(alphas.est))
err.alphas <- mapply(dist.subspace, alphas, alphas.est, MoreArgs = list(normalize = TRUE))
err.Omegas <- mapply(norm, Map(`-`, Omegas, Omegas.est), MoreArgs = list(type = "F"))
if (iter > 1) { cat("\033[9A") }
cat(sprintf("\n\033[2mIter: loss - dist\n\033[0m%4d: %8.3f - %8.3f",
iter,
log.likelihood(X, Fy, eta1.est, alphas.est, Omegas.est),
dist.subspace(B.true, B.est, normalize = TRUE)
),
"\033[2mMSE eta1\033[0m",
mean((eta1 - eta1.est)^2),
"\033[2msubspace distances of alphas\033[0m",
do.call(paste, Map(sprintf, err.alphas, MoreArgs = list(fmt = "%8.3f"))),
"\033[2mFrob. norm of Omega differences\033[0m",
do.call(paste, Map(sprintf, err.Omegas, MoreArgs = list(fmt = "%8.3f"))),
sep = "\n "
)
}
# now call the GMLM fitting routine with performance profiling
system.time( # profvis::profvis(
fit.gmlm <- GMLM.default(
X, Fy, sample.axis = sample.axis, max.iter = 10000L, logger = logger
)
)
# Iter: loss - dist
# 7190: 50.583 - 0.057
# MSE eta1
# 0.02694658
# subspace distances of alphas
# 0.043 0.035 0.034
# Frob. norm of Omega differences
# 0.815 1.777 12.756
# time user system elapsed
# 342.279 555.630 183.653

View File

@ -1,100 +0,0 @@
if (!endsWith(getwd(), "/sim")) {
setwd("sim")
}
sim.plot <- function(file.prefix, date, to.file = FALSE) {
# file.prefix <- "sim-ising-small"
# # file.prefix <- "sim-normal"
# date <- "20221012" # yyyymmdd, to match all "[0-9]{6}"
time <- "[0-9]{4}" # HHMM, to match all "[0-9]{4}"
colors <- c(
PCA = "#a11414",
HOPCA = "#2a62b6",
TSIR = "#9313b9",
GMLM = "#247407"
)
line.width <- 1.75
margins <- c(5.1, 4.1, 4.1, 0.1)
sim <- Reduce(rbind, Map(function(path) {
df <- read.csv(path)
df$n <- as.integer(tail(head(strsplit(path, "[-.]")[[1]], -1), 1))
df
}, list.files(".", pattern = paste0(
"^", file.prefix, "-", date, "T", time, "-[0-9]+[.]csv$", collapse = ""
))))
stats <- aggregate(. ~ n, sim, mean)
q75 <- aggregate(. ~ n, sim, function(x) quantile(x, 0.75))
q25 <- aggregate(. ~ n, sim, function(x) quantile(x, 0.25))
if (to.file) {
width <- 720
png(paste(file.prefix, "-", date, ".png", sep = ""),
width = width, height = round((6 / 11) * width, -1),
pointsize = 12)
}
layout(mat = matrix(c(
1, 2,
3, 3
), 2, 2, byrow = TRUE),
widths = c(1, 1),
heights = c(12, 1), respect = FALSE)
# layout.show(3)
with(stats, {
par(mar = margins)
plot(range(n), 0:1,
type = "n", bty = "n", main = "Subspace Distance",
xlab = "Sample Size", ylab = "Error")
lines(n, dist.subspace.gmlm, col = colors["GMLM"], lwd = line.width)
lines(n, dist.subspace.hopca, col = colors["HOPCA"], lwd = line.width)
lines(n, dist.subspace.pca, col = colors["PCA"], lwd = line.width)
lines(n, dist.subspace.tsir, col = colors["TSIR"], lwd = line.width)
xn <- c(q75$n, rev(q25$n))
polygon(x = xn, y = c(q75$dist.subspace.gmlm, rev(q25$dist.subspace.gmlm)),
col = adjustcolor(colors["GMLM"], alpha.f = 0.3), border = NA)
polygon(x = xn, y = c(q75$dist.subspace.hopca, rev(q25$dist.subspace.hopca)),
col = adjustcolor(colors["HOPCA"], alpha.f = 0.3), border = NA)
polygon(x = xn, y = c(q75$dist.subspace.pca, rev(q25$dist.subspace.pca)),
col = adjustcolor(colors["PCA"], alpha.f = 0.3), border = NA)
polygon(x = xn, y = c(q75$dist.subspace.tsir, rev(q25$dist.subspace.tsir)),
col = adjustcolor(colors["TSIR"], alpha.f = 0.3), border = NA)
})
with(stats, {
par(mar = margins)
plot(range(n), 0:1,
type = "n", bty = "n", main = "RMSE (Prediction Error)",
xlab = "Sample Size", ylab = "Error")
xn <- c(q75$n, rev(q25$n))
polygon(x = xn, y = c(q75$error.pred.gmlm, rev(q25$error.pred.gmlm)),
col = adjustcolor(colors["GMLM"], alpha.f = 0.3), border = NA)
polygon(x = xn, y = c(q75$error.pred.hopca, rev(q25$error.pred.hopca)),
col = adjustcolor(colors["HOPCA"], alpha.f = 0.3), border = NA)
polygon(x = xn, y = c(q75$error.pred.pca, rev(q25$error.pred.pca)),
col = adjustcolor(colors["PCA"], alpha.f = 0.3), border = NA)
polygon(x = xn, y = c(q75$error.pred.tsir, rev(q25$error.pred.tsir)),
col = adjustcolor(colors["TSIR"], alpha.f = 0.3), border = NA)
lines(n, error.pred.gmlm, col = colors["GMLM"], lwd = line.width)
lines(n, error.pred.hopca, col = colors["HOPCA"], lwd = line.width)
lines(n, error.pred.pca, col = colors["PCA"], lwd = line.width)
lines(n, error.pred.tsir, col = colors["TSIR"], lwd = line.width)
})
par(mar = rep(0, 4))
plot(1:2, 1:2, type = "n", bty = "n", axes = FALSE, xlab = "", ylab = "")
legend("center", legend = names(colors), col = colors, lwd = line.width,
lty = 1, bty = "n", horiz = TRUE)
if (to.file) {
dev.off()
}
}
sim.plot("sim-ising-small", "20221012", TRUE)
sim.plot("sim-normal", "20221012", TRUE)

View File

@ -5,9 +5,14 @@ export("%x_1%")
export("%x_2%") export("%x_2%")
export("%x_3%") export("%x_3%")
export("%x_4%") export("%x_4%")
export(.projBand)
export(.projPSD)
export(.projRank)
export(.projSymBand)
export(.projSymRank)
export(CISE) export(CISE)
export(D) export(Dup)
export(D.pinv) export(Dup.pinv)
export(GMLM) export(GMLM)
export(GMLM.default) export(GMLM.default)
export(HOPCA) export(HOPCA)
@ -17,6 +22,8 @@ export(ICU)
export(K) export(K)
export(K.perm) export(K.perm)
export(LSIR) export(LSIR)
export(La.det)
export(La.solve)
export(N) export(N)
export(NAGD) export(NAGD)
export(PCA2d) export(PCA2d)
@ -25,20 +32,30 @@ export(RMReg)
export(RMap) export(RMap)
export(S) export(S)
export(TSIR) export(TSIR)
export(approx.kron)
export(approx.kronecker) export(approx.kronecker)
export(colKronecker) export(colKronecker)
export(decompose.kronecker)
export(dist.kron.norm) export(dist.kron.norm)
export(dist.kron.tr) export(dist.kron.tr)
export(dist.projection) export(dist.projection)
export(dist.subspace) export(dist.subspace)
export(exprs.all.equal) export(exprs.all.equal)
export(gmlm_ising)
export(gmlm_mlm)
export(gmlm_tensor_normal)
export(icu_tensor_normal)
export(ising_m2)
export(ising_sample)
export(kpir.approx) export(kpir.approx)
export(kpir.base) export(kpir.base)
export(kpir.ls) export(kpir.ls)
export(kpir.mle) export(kpir.mle)
export(kpir.momentum) export(kpir.momentum)
export(kpir.new) export(kpir.new)
export(kronperm)
export(mat) export(mat)
export(matProj)
export(matpow) export(matpow)
export(matrixImage) export(matrixImage)
export(mcov) export(mcov)
@ -49,10 +66,20 @@ export(mtvk)
export(num.deriv) export(num.deriv)
export(num.deriv.function) export(num.deriv.function)
export(num.deriv2) export(num.deriv2)
export(pinv)
export(projDiag)
export(projStiefel)
export(projSym)
export(reduce) export(reduce)
export(riccati)
export(rowKronecker) export(rowKronecker)
export(rtensornorm) export(rtensornorm)
export(slice.assign.expr)
export(slice.expr)
export(slice.select)
export(sylvester)
export(tensor_predictor) export(tensor_predictor)
export(tsym)
export(ttm) export(ttm)
export(ttt) export(ttt)
export(vech) export(vech)

View File

@ -36,12 +36,12 @@ GMLM.default <- function(X, Fy, sample.axis = 1L,
# optimize likelihood using Nesterov Excelerated Gradient Descent # optimize likelihood using Nesterov Excelerated Gradient Descent
params.fit <- NAGD( params.fit <- NAGD(
fun.loss = function(params) { fun.loss = function(params) {
# scaled negative lig-likelihood # scaled negative log-likelihood
# eta1 alphas Omegas # eta1 alphas Omegas
family$log.likelihood(X, Fy, params[[1]], params[[2]], params[[3]]) family$log.likelihood(X, Fy, params[[1]], params[[2]], params[[3]])
}, },
fun.grad = function(params) { fun.grad = function(params) {
# gradient of the scaled negative lig-likelihood # gradient of the scaled negative log-likelihood
# eta1 alphas Omegas # eta1 alphas Omegas
family$grad(X, Fy, params[[1]], params[[2]], params[[3]]) family$grad(X, Fy, params[[1]], params[[2]], params[[3]])
}, },

View File

@ -10,7 +10,7 @@
#' `i`th axis excluding the sample axis. #' `i`th axis excluding the sample axis.
#' #'
#' @export #' @export
HOPCA <- function(X, npc = dim(X)[-sample.axis], sample.axis = 1L) { HOPCA <- function(X, npc = dim(X)[-sample.axis], sample.axis = 1L, use.C = FALSE) {
# observation index numbers (all axis except the sample axis) # observation index numbers (all axis except the sample axis)
modes <- seq_along(dim(X))[-sample.axis] modes <- seq_along(dim(X))[-sample.axis]

View File

@ -76,8 +76,8 @@ HOPIR.ls <- function(X, Fy, alphas, sample.axis, algorithm, ..., logger) {
list(alphas = alphas, Deltas = fun.Deltas(alphas)) list(alphas = alphas, Deltas = fun.Deltas(alphas))
} }
#' HPOIR subroutine for the MLE estimation given proprocessed data and initial #' HPOIR subroutine for the MLE estimation given preprocessed data and initial
#' alphas, Deltas paramters #' alpha, Delta parameters
#' #'
#' @keywords internal #' @keywords internal
HOPIR.mle <- function(X, Fy, alphas, Deltas, sample.axis, algorithm, ..., logger) { HOPIR.mle <- function(X, Fy, alphas, Deltas, sample.axis, algorithm, ..., logger) {

View File

@ -7,7 +7,7 @@
#' #'
#' @export #' @export
HOSVD <- function(X, nu = NULL, eps = 1e-07) { HOSVD <- function(X, nu = NULL, eps = 1e-07) {
if (!missing(nu)) { if (!is.null(nu)) {
stopifnot(all(nu <= dim(X))) stopifnot(all(nu <= dim(X)))
} }
@ -21,3 +21,5 @@ HOSVD <- function(X, nu = NULL, eps = 1e-07) {
list(C = C, Us = Us) list(C = C, Us = Us)
} }
SVD <- function(A) .Call("C_svd", A)

View File

@ -60,7 +60,7 @@ NAGD <- function(fun.loss, fun.grad, params, more.params = NULL,
stop("Initial loss is non-finite (", loss, ")") stop("Initial loss is non-finite (", loss, ")")
} }
# initialize "previous" iterate parameters # initialize "previous" iterate parameters
params.last <- params prev.params <- params
# Gradient Descent Loop # Gradient Descent Loop
line.search.tag <- FALSE # init line search state as "failure" line.search.tag <- FALSE # init line search state as "failure"
@ -73,9 +73,9 @@ NAGD <- function(fun.loss, fun.grad, params, more.params = NULL,
} }
# Extrapolation form previous position (momentum) # Extrapolation form previous position (momentum)
# `params.moment <- (1 + moment) * params - moment * param.last` # `params.moment <- (1 + moment) * params - moment * prev.params`
moment <- (m[1] - 1) / m[2] moment <- (m[1] - 1) / m[2]
params.moment <- fun.lincomb(1 + moment, params, -moment, params.last) params.moment <- fun.lincomb(1 + moment, params, -moment, prev.params)
# Compute gradient at extrapolated position # Compute gradient at extrapolated position
if (missing(more.params)) { if (missing(more.params)) {
@ -114,7 +114,7 @@ NAGD <- function(fun.loss, fun.grad, params, more.params = NULL,
} }
# keep track of previous parameters # keep track of previous parameters
params.last <- params prev.params <- params
# check line search outcome # check line search outcome
if (is.na(line.search.tag)) { if (is.na(line.search.tag)) {

View File

@ -4,11 +4,21 @@
TSIR <- function(X, y, d, sample.axis = 1L, TSIR <- function(X, y, d, sample.axis = 1L,
nr.slices = 10L, # default slices, ignored if y is a factor or integer nr.slices = 10L, # default slices, ignored if y is a factor or integer
max.iter = 50L, max.iter = 50L,
eps = sqrt(.Machine$double.eps) eps = sqrt(.Machine$double.eps),
slice.method = c("cut", "ecdf") # ignored if y is a factor or integer
) { ) {
if (!(is.factor(y) || is.integer(y))) { if (!(is.factor(y) || is.integer(y))) {
y <- cut(y, nr.slices) slice.method <- match.arg(slice.method)
if (slice.method == "ecdf") {
y <- cut(ecdf(y)(y), nr.slices)
} else {
y <- cut(y, nr.slices)
# ensure there are no empty slices
if (any(table(y) == 0)) {
y <- as.factor(as.integer(y))
}
}
} }
stopifnot(exprs = { stopifnot(exprs = {

View File

@ -1,4 +1,32 @@
#' Approximates kronecker product decomposition. #' @examples
#' nrows <- c(3, 2, 5)
#' ncols <- c(2, 4, 8)
#'
#' A <- Reduce(kronecker, Map(matrix, Map(rnorm, nrows * ncols), nrows))
#'
#' all.equal(A, Reduce(kronecker, approx.kron(A, nrows, ncols)))
#'
#' @export
approx.kron <- function(A, nrows, ncols) {
# rearrange kronecker product `A` into outer product `R`
dim(A) <- c(rev(nrows), rev(ncols))
axis.perm <- as.vector(t(matrix(seq_along(dim(A)), ncol = 2L))[, rev(seq_along(nrows))])
R <- aperm(A, axis.perm, resize = FALSE)
dim(R) <- nrows * ncols
# scaling factor for every product component
s <- sum(A^2)^(1 / length(dim(A)))
# higher order SVD on `R` with one singular vector
Map(function(mode, nr, nc) {
s * `dim<-`(La.svd(mcrossprod(R, mode = mode), 1L, 0L)$u, c(nr, nc))
}, seq_along(nrows), nrows, ncols)
}
#' Approximates Kronecker Product decomposition.
#' #'
#' Approximates the matrices `A` and `B` such that #' Approximates the matrices `A` and `B` such that
#' C = A %x% B #' C = A %x% B
@ -21,7 +49,7 @@
#' 123 (2000) 85-100 (pp. 93-95) #' 123 (2000) 85-100 (pp. 93-95)
#' #'
#' @export #' @export
approx.kronecker <- function(C, dimA, dimB) { approx.kronecker <- function(C, dimA, dimB = dim(C) / dimA) {
dim(C) <- c(dimB[1L], dimA[1L], dimB[2L], dimA[2L]) dim(C) <- c(dimB[1L], dimA[1L], dimB[2L], dimA[2L])
R <- aperm(C, c(2L, 4L, 1L, 3L)) R <- aperm(C, c(2L, 4L, 1L, 3L))
@ -33,8 +61,115 @@ approx.kronecker <- function(C, dimA, dimB) {
svdR <- svd(R, 1L, 1L) svdR <- svd(R, 1L, 1L)
} }
return(list( list(
A = array(sqrt(svdR$d[1]) * svdR$u, dimA), A = array(sqrt(svdR$d[1]) * svdR$u, dimA),
B = array(sqrt(svdR$d[1]) * svdR$v, dimB) B = array(sqrt(svdR$d[1]) * svdR$v, dimB)
)
}
#' Kronecker Product Decomposition.
#'
#' Computes the components summation components `A_i`, `B_i` of a sum of
#' Kronecker products
#' C = sum_i A_i %x% B_i
#' with the minimal estimated number of summands.
#'
#' @param C desired kronecker product result.
#' @param dimA length 2 vector of dimensions of \code{A}.
#' @param dimB length 2 vector of dimensions of \code{B}.
#' @param tol tolerance of singular values of \code{C} to determin the number of
#' needed summands.
#'
#' @return list of lenghth with estimated number of summation components, each
#' entry consists of a list with named entries \code{"A"} and \code{"B"} of
#' dimensions \code{dimA} and \code{dimB}.
#'
#' @examples
#' As <- replicate(3, matrix(rnorm(2 * 7), 2), simplify = FALSE)
#' Bs <- replicate(3, matrix(rnorm(5 * 3), 5), simplify = FALSE)
#' C <- Reduce(`+`, Map(kronecker, As, Bs))
#'
#' decomposition <- decompose.kronecker(C, c(2, 7))
#'
#' reconstruction <- Reduce(`+`, Map(function(summand) {
#' kronecker(summand[[1]], summand[[2]])
#' }, decomposition), array(0, dim(C)))
#'
#' stopifnot(all.equal(C, reconstruction))
#'
#' @export
decompose.kronecker <- function(C, dimA, dimB = dim(C) / dimA, tol = 1e-7) {
# convert the equivalent outer product
dim(C) <- c(dimB[1L], dimA[1L], dimB[2L], dimA[2L])
C <- aperm(C, c(2L, 4L, 1L, 3L), resize = FALSE)
dim(C) <- c(prod(dimA), prod(dimB))
# Singular Valued Decomposition
svdC <- La.svd(C)
# Sum of Kronecker Components
lapply(seq_len(sum(svdC$d > tol)), function(i) list(
A = matrix(svdC$d[i] * svdC$u[, i], dimA),
B = matrix(svdC$vt[i, ], dimB)
)) ))
} }
### Given that C is a Kronecker product this is a fast method but a bit
### unreliable in full generality.
# decompose.kronecker <- function(C, dimA, dimB = dim(C) / dimA) {
# dim(C) <- c(dimB[1L], dimA[1L], dimB[2L], dimA[2L])
# R <- aperm(C, c(2L, 4L, 1L, 3L))
# dim(R) <- c(prod(dimA), prod(dimB))
# max.index <- which.max(abs(R))
# row.index <- ((max.index - 1L) %% nrow(R)) + 1L
# col.index <- ((max.index - 1L) %/% nrow(R)) + 1L
# max.elem <- if (abs(R[max.index]) > .Machine$double.eps) R[max.index] else 1
# list(
# A = array(R[, col.index], dimA),
# B = array(R[row.index, ] / max.elem, dimB)
# )
# }
# kron <- function(A, B) {
# perm <- as.vector(t(matrix(seq_len(2 * length(dim(A))), ncol = 2)[, 2:1]))
# K <- aperm(outer(A, B), perm)
# dim(K) <- dim(A) * dim(B)
# K
# }
# kronperm <- function(A) {
# # force A to have even number of dimensions
# dim(A) <- c(dim(A), rep(1L, length(dim(A)) %% 2L))
# # compute axis permutation
# perm <- as.vector(t(matrix(seq_along(dim(A)), ncol = 2)[, 2:1]))
# # permute elements of A
# K <- aperm(A, perm, resize = FALSE)
# # collapse/set dimensions
# dim(K) <- head(dim(A), length(dim(A)) / 2) * tail(dim(A), length(dim(A)) / 2)
# K
# }
# p <- c(2, 3, 5)
# q <- c(3, 4, 7)
# A <- array(rnorm(prod(p)), p)
# B <- array(rnorm(prod(q)), q)
# all.equal(kronperm(outer(A, B)), kronecker(A, B))
# all.equal(kron(A, B), kronecker(A, B))
# dA <- c(2, 3, 5)
# dB <- c(3, 4, 7)
# A <- array(rnorm(prod(dA)), dA)
# B <- array(rnorm(prod(dB)), dB)
# X <- outer(A, B)
# r <- length(dim(X)) / 2
# I <- t(do.call(expand.grid, Map(seq_len, head(dim(X), r) * tail(dim(X), r))))
# K <- apply(rbind(
# (I - 1) %/% tail(dim(X), r) + 1,
# (I - 1) %% tail(dim(X), r) + 1
# ), 2, function(i) X[sum(c(1, cumprod(head(dim(X), -1))) * (i - 1)) + 1])
# dim(K) <- head(dim(X), r) * tail(dim(X), r)
# all.equal(kronecker(A, B), K)

View File

@ -3,7 +3,7 @@
#' \|(A1 %x% ... %x% Ar - B1 %x% ... %x% Br\|_F #' \|(A1 %x% ... %x% Ar - B1 %x% ... %x% Br\|_F
#' #'
#' This is equivalent to the expression #' This is equivalent to the expression
#' \code{norm(Reduce(kronecker, A) - Reduce(kronecker, B), "F")} but faster. #' \code{norm(Reduce(kronecker, As) - Reduce(kronecker, Bs), "F")}, but faster.
#' #'
#' @examples #' @examples
#' A1 <- matrix(rnorm(5^2), 5) #' A1 <- matrix(rnorm(5^2), 5)
@ -16,21 +16,21 @@
#' )) #' ))
#' #'
#' p <- c(3, 7, 5, 2) #' p <- c(3, 7, 5, 2)
#' A <- Map(function(pj) matrix(rnorm(pj^2), pj), p) #' As <- Map(function(pj) matrix(rnorm(pj^2), pj), p)
#' B <- Map(function(pj) matrix(rnorm(pj^2), pj), p) #' Bs <- Map(function(pj) matrix(rnorm(pj^2), pj), p)
#' stopifnot(all.equal( #' stopifnot(all.equal(
#' dist.kron.norm(A, B), #' dist.kron.norm(As, Bs),
#' norm(Reduce(kronecker, A) - Reduce(kronecker, B), "F") #' norm(Reduce(kronecker, As) - Reduce(kronecker, Bs), "F")
#' )) #' ))
#' #'
#' @export #' @export
dist.kron.norm <- function(A, B, eps = .Machine$double.eps) { dist.kron.norm <- function(As, Bs, eps = .Machine$double.eps) {
if (is.list(A) && is.list(B)) { if (is.list(As) && is.list(Bs)) {
norm2 <- prod(unlist(Map(function(x) sum(x^2), A))) - norm2 <- prod(unlist(Map(function(x) sum(x^2), As))) -
2 * prod(unlist(Map(function(a, b) sum(a * b), A, B))) + 2 * prod(unlist(Map(function(a, b) sum(a * b), As, Bs))) +
prod(unlist(Map(function(x) sum(x^2), B))) prod(unlist(Map(function(x) sum(x^2), Bs)))
} else if (is.matrix(A) && is.matrix(B)) { } else if (is.matrix(As) && is.matrix(Bs)) {
norm2 <- sum((A - B)^2) norm2 <- sum((As - Bs)^2)
} else { } else {
stop("Unexpected input") stop("Unexpected input")
} }

View File

@ -1,4 +1,4 @@
#' Porjection Distance of two matrices #' Projection Distance of two matrices
#' #'
#' Defined as sine of the maximum principal angle between the column spaces #' Defined as sine of the maximum principal angle between the column spaces
#' of the matrices #' of the matrices

View File

@ -0,0 +1,406 @@
#' Specialized version of the GMLM for the Ising model (inverse Ising problem)
#'
#' @todo TODO: Add beta and Omega projections
#'
#' @export
gmlm_ising <- function(X, F, sample.axis = length(dim(X)),
# proj.betas = ..., proj.Omegas = ..., # TODO: this
max.iter = 1000L,
eps = sqrt(.Machine$double.eps),
step.size = 1e-3,
zig.zag.threashold = 20L,
patience = 3L,
nr.slices = 20L, # only for univariate `F(y) = y`
slice.method = c("cut", "ecdf", "none"), # only for univariate `F(y) = y` and `y` is a factor or integer
logger = function(...) { }
) {
# Get problem dimensions
dimX <- dim(X)[-sample.axis]
# threat scalar `F` as a tensor
if (is.null(dim(F))) {
dimF <- rep(1L, length(dimX))
dim(F) <- ifelse(seq_along(dim(X)) == sample.axis, sample.size, 1L)
} else {
dimF <- dim(F)[-sample.axis]
}
sample.size <- dim(X)[sample.axis]
# rearrange `X`, `F` such that the last axis enumerates observations
if (sample.axis != length(dim(X))) {
axis.perm <- c(seq_along(dim(X))[-sample.axis], sample.axis)
X <- aperm(X, axis.perm)
F <- aperm(F, axis.perm)
sample.axis <- length(dim(X))
}
modes <- seq_along(dimX)
# Special case for univariate response `vec F(y) = y`
# Due to high computational costs we use slicing
slice.method <- match.arg(slice.method)
slices.ind <- if ((slice.method != "none") && (length(F) == prod(dim(F)))) {
y <- as.vector(F)
if (!(is.factor(y) || is.integer(y))) {
slice.method <- match.arg(slice.method)
if (slice.method == "ecdf") {
y <- cut(ecdf(y)(y), nr.slices)
} else {
y <- cut(y, nr.slices)
}
}
split(seq_len(sample.size), y, drop = TRUE)
} else {
seq_len(sample.size)
}
# initialize betas with tensor normal estimate (ignoring data being binary)
fit_normal <- gmlm_tensor_normal(X, F, sample.axis = length(dim(X)))
betas <- fit_normal$betas
Omegas <- Omegas.init <- Map(function(mode) {
n <- prod(dim(X)[-mode])
prob2 <- mcrossprod(X, mode = mode) / n
prob2[prob2 == 0] <- 1 / n
prob1 <- diag(prob2)
`prob1^2` <- outer(prob1, prob1)
`diag<-`(log(((1 - `prob1^2`) / `prob1^2`) * prob2 / (1 - prob2)), 0)
}, modes)
# Determin degenerate combinations, that are variables which are exclusive
# in the data set
matX <- mat(X, sample.axis)
degen <- crossprod(matX) == 0
degen.mask <- which(degen)
# If there are degenerate combination, compute an (arbitrary) bound the
# log odds parameters of those combinations
if (any(degen.mask)) {
degen.ind <- arrayInd(degen.mask, dim(degen))
meanX <- colMeans(matX)
prodX <- meanX[degen.ind[, 1]] * meanX[degen.ind[, 2]]
degen.bounds <- log((1 - prodX) / (prodX * sample.size))
# Component indices in Omegas of degenerate two-way interactions
degen.ind <- arrayInd(degen.mask, rep(dimX, 2))
degen.ind <- Map(function(d, m) {
degen.ind[, m] + dimX[m] * (degen.ind[, m + length(dimX)] - 1L)
}, dimX, seq_along(dimX))
## Enforce initial value degeneracy interaction param. constraints
# Extract parameters corresponding to degenerate interactions
degen.params <- do.call(rbind, Map(`[`, Omegas, degen.ind))
# Degeneracy Constrained Parameters (sign is dropped)
DCP <- mapply(function(vals, bound) {
logVals <- log(abs(vals))
err <- max(0, sum(logVals) - log(abs(bound)))
exp(logVals - (err / length(vals)))
}, split(degen.params, col(degen.params)), degen.bounds)
# Update values in Omegas such that all degeneracy constraints hold
Omegas <- Map(function(Omega, cp, ind) {
# Combine multiple constraints for every element into single
# constraint value per element
cp <- mapply(min, split(abs(cp), ind))
ind <- as.integer(names(cp))
`[<-`(Omega, ind, sign(Omega[ind]) * cp)
}, Omegas, split(DCP, row(DCP)), degen.ind)
}
# Initialize mean squared gradients
grad2_betas <- Map(array, 0, Map(dim, betas))
grad2_Omegas <- Map(array, 0, Map(dim, Omegas))
# Keep track of the last loss to accumulate loss difference sign changes
# indicating optimization instabilities as a sign to stop
last_loss <- Inf
accum_sign <- 1
# non improving iteration counter
non_improving <- 0L
# technical access points to dynamicaly access a multi-dimensional array
`X[..., i]` <- slice.expr(X, sample.axis, index = i, drop = FALSE)
`F[..., i]` <- slice.expr(F, sample.axis, index = i, drop = FALSE)
# Iterate till a break condition triggers or till max. nr. of iterations
for (iter in seq_len(max.iter)) {
grad_betas <- Map(matrix, 0, dimX, dimF)
Omega <- Reduce(kronecker, rev(Omegas))
# second order residuals accumulator
# `sum_i (X_i o X_i - E[X o X | Y = y_i])`
R2 <- array(0, dim = c(dimX, dimX))
# negative log-likelihood
loss <- 0
for (i in slices.ind) {
# slice size (nr. of objects in the slice)
n_i <- length(i)
sumF_i <- rowSums(eval(`F[..., i]`), dims = length(dimF))
diag_params_i <- mlm(sumF_i / n_i, betas)
params_i <- Omega + diag(as.vector(diag_params_i))
m2_i <- ising_m2(params_i)
# accumulate loss
matX_i <- mat(eval(`X[..., i]`), modes)
loss <- loss - (
sum(matX_i * (params_i %*% matX_i)) + n_i * log(attr(m2_i, "prob_0"))
)
R2_i <- tcrossprod(matX_i) - n_i * m2_i
R1_i <- diag(R2_i)
dim(R1_i) <- dimX
for (j in modes) {
grad_betas[[j]] <- grad_betas[[j]] +
mcrossprod(R1_i, mlm(sumF_i, betas[-j], modes[-j]), j)
}
R2 <- R2 + as.vector(R2_i)
}
grad_Omegas <- Map(function(j) {
grad <- mlm(kronperm(R2), Map(as.vector, Omegas[-j]), modes[-j], transposed = TRUE)
dim(grad) <- dim(Omegas[[j]])
grad
}, modes)
# update optimization behavioral trackers
accum_sign <- sign(last_loss - loss) - accum_sign
non_improving <- max(0L, non_improving - 1L + 2L * (last_loss < loss))
# check break conditions
if (abs(accum_sign) > zig.zag.threashold) { break }
if (non_improving > patience) { break }
if (abs(last_loss - loss) < eps * last_loss) { break }
# store current loss for the next iteration
last_loss <- loss
# Accumulate root mean squared gradiends
grad2_betas <- Map(function(g2, g) 0.9 * g2 + 0.1 * (g * g),
grad2_betas, grad_betas)
grad2_Omegas <- Map(function(g2, g) 0.9 * g2 + 0.1 * (g * g),
grad2_Omegas, grad_Omegas)
# logging (before parameter update)
logger(iter, loss, betas, Omegas, grad_betas, grad_Omegas)
# Update Parameters
betas <- Map(function(beta, grad, m2) {
beta + (step.size / (sqrt(m2) + eps)) * grad
}, betas, grad_betas, grad2_betas)
Omegas <- Map(function(Omega, grad, m2) {
Omega + (step.size / (sqrt(m2) + eps)) * grad
}, Omegas, grad_Omegas, grad2_Omegas)
# Enforce degeneracy parameter constraints
if (any(degen.mask)) {
# Extract parameters corresponding to degenerate interactions
degen.params <- do.call(rbind, Map(`[`, Omegas, degen.ind))
# Degeneracy Constrained Parameters (sign is dropped)
DCP <- mapply(function(vals, bound) {
logVals <- log(abs(vals))
err <- max(0, sum(logVals) - log(abs(bound)))
exp(logVals - (err / length(vals)))
}, split(degen.params, col(degen.params)), degen.bounds)
# Update values in Omegas such that all degeneracy constraints hold
Omegas <- Map(function(Omega, cp, ind) {
# Combine multiple constraints for every element into single
# constraint value per element
cp <- mapply(min, split(abs(cp), ind))
ind <- as.integer(names(cp))
`[<-`(Omega, ind, sign(Omega[ind]) * cp)
}, Omegas, split(DCP, row(DCP)), degen.ind)
}
}
structure(
list(eta1 = array(0, dimX), betas = betas, Omegas = Omegas),
tensor_normal = fit_normal,
Omegas.init = Omegas.init,
degen.mask = degen.mask
)
}
################################################################################
### Development Interactive Block (Delete / Make sim / TODO: ...) ###
################################################################################
if (FALSE) { # interactive()
par(bg = "#1d1d1d",
fg = "lightgray",
col = "#d5d5d5",
col.axis = "#d5d5d5",
col.lab = "#d5d5d5",
col.main = "#d5d5d5",
col.sub = "#d5d5d5", # col.sub = "#2467d0"
pch = 16
)
cex <- 1.25
col <- colorRampPalette(c("#f15050", "#1d1d1d", "#567DCA"))(256)
.logger <- function() {
iter <- 0L
assign("log", data.frame(
iter = rep(NA_integer_, 100000),
loss = rep(NA_real_, 100000),
dist.B = rep(NA_real_, 100000),
dist.Omega = rep(NA_real_, 100000),
norm.grad.B = rep(NA_real_, 100000),
norm.grad.Omega = rep(NA_real_, 100000)
), envir = .GlobalEnv)
assign("B.gmlm", NULL, .GlobalEnv)
assign("Omega.gmlm", NULL, .GlobalEnv)
function(it, loss, betas, Omegas, grad_betas, grad_Omegas) {
# Store in global namespace (allows to stop and get the results)
B.gmlm <- Reduce(kronecker, rev(betas))
assign("B.gmlm", B.gmlm, .GlobalEnv)
Omega.gmlm <- Reduce(kronecker, rev(Omegas))
assign("Omega.gmlm", Omega.gmlm, .GlobalEnv)
dist.B <- dist.subspace(B.true, B.gmlm, normalize = TRUE)
dist.Omega <- norm(Omega.true - Omega.gmlm, "F")
norm.grad.B <- sqrt(sum(mapply(norm, grad_betas, "F")^2))
norm.grad.Omega <- sqrt(sum(mapply(norm, grad_Omegas, "F")^2))
log[iter <<- iter + 1L, ] <<- list(
it, loss, dist.B, dist.Omega, norm.grad.B, norm.grad.Omega
)
cat(sprintf("\r%3d - d(B): %.3f, d(O): %.3f, |g(B)|: %.3f, |g(O)|: %.3f, loss: %.3f\033[K",
it, dist.B, dist.Omega, norm.grad.B, norm.grad.Omega, loss))
}
}
sample.size <- 1000
dimX <- c(2, 3) # predictor `X` dimension
dimF <- rep(1, length(dimX)) # "function" `F(y)` of responce `y` dimension
betas <- Map(diag, 1, dimX, dimF)
Omegas <- list(toeplitz(c(0, -2)), toeplitz(seq(1, 0, by = -0.5)))
B.true <- Reduce(kronecker, rev(betas))
Omega.true <- Reduce(kronecker, rev(Omegas))
# data sampling routine
c(X, F, y, sample.axis) %<-% (sample.data <- function(sample.size, betas, Omegas) {
dimX <- mapply(nrow, betas)
dimF <- mapply(ncol, betas)
# generate response (sample axis is last axis)
y <- runif(prod(sample.size, dimF), -2, 2)
F <- array(y, dim = c(dimF, sample.size)) # ~ U[-1, 1]
Omega <- Reduce(kronecker, rev(Omegas))
X <- apply(F, length(dim(F)), function(Fi) {
dim(Fi) <- dimF
params <- diag(as.vector(mlm(Fi, betas))) + Omega
tensorPredictors::ising_sample(1, params)
})
dim(X) <- c(dimX, sample.size)
list(X = X, F = F, y = y, sample.axis = length(dim(X)))
})(sample.size, betas, Omegas)
local({
X.proto <- array(seq_len(prod(dimX)), dimX)
interactions <- crossprod(mat(X, sample.axis))
dimnames(interactions) <- rep(list(
do.call(paste0, c("X", Map(slice.index, list(X.proto), seq_along(dimX))))
), 2)
cat("Sample Size: ", sample.size, "\n")
print.table(interactions, zero.print = ".")
})
# system.time({
# fit.gmlm <- gmlm_ising(X, y, logger = .logger())
# })
Rprof()
gmlm_ising(X, y)
Rprof(NULL)
summaryRprof()
B.gmlm <- Reduce(kronecker, rev(fit.gmlm$betas))
Omega.gmlm <- Reduce(kronecker, rev(fit.gmlm$Omegas))
B.normal <- Reduce(kronecker, rev(attr(fit.gmlm, "tensor_normal")$betas))
Omega.init <- Reduce(kronecker, rev(attr(fit.gmlm, "Omegas.init")))
degen.mask <- attr(fit.gmlm, "degen.mask")
local({
layout(matrix(c(
1, 2, 3, 3, 3,
1, 4, 5, 6, 7
), nrow = 2, byrow = TRUE), width = c(6, 3, 1, 1, 1))
with(na.omit(log), {
plot(range(iter), c(0, 1), type = "n", bty = "n",
xlab = "Iterations", ylab = "Distance")
lines(iter, dist.B, col = "red", lwd = 2)
lines(iter, dist.Omega / max(dist.Omega), col = "blue", lwd = 2)
lines(iter, (loss - min(loss)) / diff(range(loss)), col = "darkgreen", lwd = 2)
norm.grad <- sqrt(norm.grad.B^2 + norm.grad.Omega^2)
# Scale all gradient norms
norm.grad.B <- norm.grad.B / max(norm.grad)
norm.grad.Omega <- norm.grad.Omega / max(norm.grad)
norm.grad <- norm.grad / max(norm.grad)
lines(iter, norm.grad.B, lty = 2, col = "red")
lines(iter, norm.grad.Omega, lty = 2, col = "blue")
lines(iter, norm.grad, lty = 2, col = "darkgreen")
axis(4, at = c(
tail(dist.B, 1),
min(dist.B)
), labels = round(c(
tail(dist.B, 1),
min(dist.B)
), 2), col = NA, col.ticks = "red", las = 1)
axis(4, at = c(
1,
tail(dist.Omega, 1) / max(dist.Omega),
min(dist.Omega) / max(dist.Omega)
), labels = round(c(
max(dist.Omega),
tail(dist.Omega, 1),
min(dist.Omega)
), 2), col = NA, col.ticks = "blue", las = 1)
abline(h = c(tail(dist.B, 1), min(dist.B)),
lty = "dotted", col = "red")
abline(h = c(max(dist.Omega), tail(dist.Omega, 1), min(dist.Omega)) / max(dist.Omega),
lty = "dotted", col = "blue")
})
legend("topright", col = c("red", "blue", "darkgreen"), lty = 1, lwd = 2,
legend = c("dist.B", "dist.Omega", "loss"), bty = "n")
zlim <- max(abs(range(Omega.true, Omega.init, Omega.gmlm))) * c(-1, 1)
matrixImage(Omega.true, main = "true", zlim = zlim, add.values = TRUE, col = col, cex = cex)
matrixImage(round(Omega.init, 2), main = "init (cond. prob.)", zlim = zlim, add.values = TRUE, col = col, cex = cex)
mtext(round(norm(Omega.true - Omega.init, "F"), 3), 3)
matrixImage(round(Omega.gmlm, 2), main = "gmlm (ising)", zlim = zlim, add.values = TRUE, col = col, cex = cex,
col.values = c(par("col"), "red")[`[<-`(array(1, rep(prod(dim(X)[-sample.axis]), 2)), degen.mask, 2)])
mtext(round(norm(Omega.true - Omega.gmlm, "F"), 3), 3)
zlim <- max(abs(range(B.true, B.normal, B.gmlm))) * c(-1, 1)
matrixImage(B.true, main = "true",
zlim = zlim, add.values = TRUE, col = col, cex = cex)
matrixImage(round(B.normal, 2), main = "init (normal)",
zlim = zlim, add.values = TRUE, axes = FALSE, col = col, cex = cex)
mtext(round(dist.subspace(B.true, B.normal, normalize = TRUE), 3), 3)
matrixImage(round(B.gmlm, 2), main = "gmlm (ising)",
zlim = zlim, add.values = TRUE, axes = FALSE, col = col, cex = cex)
mtext(round(dist.subspace(B.true, B.gmlm, normalize = TRUE), 3), 3)
})
}

View File

@ -0,0 +1,134 @@
#' Specialized version of GMLM for the tensor normal model
#'
#' The underlying algorithm is an ``iterative (block) coordinate descent'' method
#'
#' @export
gmlm_tensor_normal <- function(X, F, sample.axis = length(dim(X)),
max.iter = 100L, proj.betas = NULL, proj.Omegas = NULL, logger = NULL,
cond.threshold = 25, eps = 1e-6
) {
# rearrange `X`, `F` such that the last axis enumerates observations
if (!missing(sample.axis)) {
axis.perm <- c(seq_along(dim(X))[-sample.axis], sample.axis)
X <- aperm(X, axis.perm)
F <- aperm(F, axis.perm)
sample.axis <- length(dim(X))
}
# Get problem dimensions (observation dimensions)
dimX <- head(dim(X), -1)
dimF <- head(dim(F), -1)
modes <- seq_along(dimX)
# Ensure the Omega and beta projections lists are lists
if (!is.list(proj.Omegas)) {
proj.Omegas <- rep(NULL, length(modes))
}
if (!is.list(proj.betas)) {
proj.betas <- rep(NULL, length(modes))
}
### Phase 1: Computing initial values
meanX <- rowMeans(X, dims = length(dimX))
meanF <- rowMeans(F, dims = length(dimF))
# center X and F
X <- X - as.vector(meanX)
F <- F - as.vector(meanF)
# initialize Omega estimates as mode-wise, unconditional covariance estimates
Sigmas <- Map(diag, dimX)
Omegas <- Map(diag, dimX)
# Per mode covariance directions
# Note: (the directions are transposed!)
dirsX <- Map(function(Sigma) {
SVD <- La.svd(Sigma, nu = 0)
sqrt(SVD$d) * SVD$vt
}, mcov(X, sample.axis, center = FALSE))
dirsF <- Map(function(Sigma) {
SVD <- La.svd(Sigma, nu = 0)
sqrt(SVD$d) * SVD$vt
}, mcov(F, sample.axis, center = FALSE))
# initialization of betas ``covariance direction mappings``
betas <- betas.init <- Map(function(dX, dF) {
s <- min(ncol(dX), nrow(dF))
crossprod(dX[1:s, , drop = FALSE], dF[1:s, , drop = FALSE])
}, dirsX, dirsF)
# Residuals
R <- X - mlm(F, Map(`%*%`, Sigmas, betas))
# Initial value of the log-likelihood (scaled and constants dropped)
loss <- mean(R * mlm(R, Omegas)) - sum(log(mapply(det, Omegas)) / dimX)
# invoke the logger
if (is.function(logger)) do.call(logger, list(
iter = 0L, betas = betas, Omegas = Omegas,
resid = R, loss = loss
))
### Phase 2: (Block) Coordinate Descent
for (iter in seq_len(max.iter)) {
# update every beta (in random order)
for (j in sample.int(length(betas))) {
FxB_j <- mlm(F, betas[-j], modes[-j])
FxSB_j <- mlm(FxB_j, Sigmas[-j], modes[-j])
betas[[j]] <- Omegas[[j]] %*% t(solve(mcrossprod(FxSB_j, FxB_j, j), mcrossprod(FxB_j, X, j)))
# Project `betas` onto their manifold
if (is.function(proj_j <- proj.betas[[j]])) {
betas[[j]] <- proj_j(betas[[j]])
}
}
# Residuals
R <- X - mlm(F, Map(`%*%`, Sigmas, betas))
# Covariance Estimates (moment based, TODO: implement MLE estimate!)
Sigmas <- mcov(R, sample.axis, center = FALSE)
# Computing `Omega_j`s, the j'th mode presition matrices, in conjunction
# with regularization of the j'th mode covariance estimate `Sigma_j`
for (j in seq_along(Sigmas)) {
# Compute min and max eigen values
min_max <- range(eigen(Sigmas[[j]], TRUE, TRUE)$values)
# The condition is approximately `kappa(Sigmas[[j]]) > cond.threshold`
if (min_max[2] > cond.threshold * min_max[1]) {
Sigmas[[j]] <- Sigmas[[j]] + diag(0.2 * min_max[2], nrow(Sigmas[[j]]))
}
# Compute (unconstraint but regularized) Omega_j as covariance inverse
Omegas[[j]] <- solve(Sigmas[[j]])
# Project Omega_j to the Omega_j's manifold
if (is.function(proj_j <- proj.Omegas[[j]])) {
Omegas[[j]] <- proj_j(Omegas[[j]])
# Reverse computation of `Sigma_j` as inverse of `Omega_j`
# Order of projecting Omega_j and then recomputing Sigma_j is importent
Sigmas[[j]] <- solve(Omegas[[j]])
}
}
# store last loss and compute new value
loss.last <- loss
loss <- mean(R * mlm(R, Omegas)) - sum(log(mapply(det, Omegas)) / dimX)
# invoke the logger
if (is.function(logger)) do.call(logger, list(
iter = iter, betas = betas, Omegas = Omegas, resid = R, loss = loss
))
# check the break consition
if (abs(loss.last - loss) < eps * loss.last) {
break
}
}
structure(
list(eta1 = mlm(meanX, Sigmas), betas = betas, Omegas = Omegas),
betas.init = betas.init
)
}

View File

@ -0,0 +1,18 @@
#' @export
ising_m2 <- function(
params, use_MC = NULL, nr_samples = 10000L,
warmup = 15L, nr_threads = 1L
) {
if (missing(use_MC)) {
use_MC <- if (is.matrix(params)) 19 < nrow(params) else 190 < length(params)
}
m2 <- .Call("C_ising_m2", params, use_MC, nr_samples, warmup, nr_threads,
PACKAGE = "tensorPredictors"
)
M2 <- vech.pinv(m2)
attr(M2, "prob_0") <- attr(m2, "prob_0")
M2
}

View File

@ -0,0 +1,11 @@
#' Sample from the Ising model
#'
#' @param nr number of samples
#' @param params Ising model parameters (numeric vector of size `p (p + 1) / 2`
#' for `p` dimensional random binary vectors)
#' @param warmup Monte-Carlo chain length before retreaving a sample
#'
#' @export
ising_sample <- function(nr, params, warmup = 15L) {
.Call("C_ising_sample", nr, params, warmup, PACKAGE = "tensorPredictors")
}

View File

@ -124,6 +124,30 @@ make.gmlm.family <- function(name) {
) )
} }
# conditional covariance of the sufficient statistic
# Cov(t(X) | Y = y)
# Note: fy is a single observation!
cov.sufficient.stat <- function(fy, eta1, alphas, Omegas) {
Deltas <- Map(solve, Omegas)
E1 <- c(mlm(mlm(fy, alphas) + eta1, Deltas))
# H11 = Cov(vec X | Y = y)
H11 <- Reduce(`%x%`, rev(Deltas))
# H21 = Cov(vec X %x% vec X, vec X | Y = y)
H21 <- kronecker(E1, H11) + kronecker(H11, E1)
# H22 = Cov(vec X %x% vec X | Y = y)
H22 <- local({
e1e1 <- tcrossprod(E1, E1)
h22 <- outer(e1e1 + H11, H11) + outer(H11, e1e1)
aperm(h22, c(1, 3, 2, 4)) + aperm(h22, c(1, 3, 4, 2))
})
# Combine into single covariance matrix
cbind(rbind(H11, H21), rbind(t(H21), mat(H22, 1:2)))
}
# mean conditional Fisher Information # mean conditional Fisher Information
fisher.info <- function(Fy, eta1, alphas, Omegas) { fisher.info <- function(Fy, eta1, alphas, Omegas) {
# retrieve dimensions # retrieve dimensions

View File

@ -9,6 +9,15 @@
#' or tensor of dimensions \code{dims} iff \code{inv} is true. #' or tensor of dimensions \code{dims} iff \code{inv} is true.
#' #'
#' @examples #' @examples
#' stopifnot(all.equal(
#' mat(1:12, 2, dims = c(2, 3, 2)),
#' matrix(c(
#' 1, 2, 7, 8,
#' 3, 4, 9, 10,
#' 5, 6, 11, 12
#' ), 3, 4, byrow = TRUE)
#' ))
#'
#' A <- array(rnorm(2 * 3 * 5), dim = c(2, 3, 5)) #' A <- array(rnorm(2 * 3 * 5), dim = c(2, 3, 5))
#' stopifnot(exprs = { #' stopifnot(exprs = {
#' all.equal(A, mat(mat(A, 1), 1, dim(A), TRUE)) #' all.equal(A, mat(mat(A, 1), 1, dim(A), TRUE))
@ -22,15 +31,6 @@
#' all.equal(t(mat(A, 3)), mat(A, c(1, 2))) #' all.equal(t(mat(A, 3)), mat(A, c(1, 2)))
#' }) #' })
#' #'
#' stopifnot(all.equal(
#' mat(1:12, 2, dims = c(2, 3, 2)),
#' matrix(c(
#' 1, 2, 7, 8,
#' 3, 4, 9, 10,
#' 5, 6, 11, 12
#' ), 3, 4, byrow = TRUE)
#' ))
#'
#' @export #' @export
mat <- function(T, modes, dims = dim(T), inv = FALSE) { mat <- function(T, modes, dims = dim(T), inv = FALSE) {
modes <- as.integer(modes) modes <- as.integer(modes)

View File

@ -7,6 +7,8 @@
#' @param sub sub-title of the plot #' @param sub sub-title of the plot
#' @param interpolate a logical vector (or scalar) indicating whether to apply #' @param interpolate a logical vector (or scalar) indicating whether to apply
#' linear interpolation to the image when drawing. #' linear interpolation to the image when drawing.
#' @param new.plot Recreating the plot area (clearing the plot device). can be
#' used to update a plot but _not_ recreate it. Leads to smoother updating.
#' @param ... further arguments passed to \code{\link{rasterImage}} #' @param ... further arguments passed to \code{\link{rasterImage}}
#' #'
#' @examples #' @examples
@ -16,13 +18,16 @@
#' @export #' @export
matrixImage <- function(A, add.values = FALSE, matrixImage <- function(A, add.values = FALSE,
main = NULL, sub = NULL, interpolate = FALSE, ..., zlim = NA, main = NULL, sub = NULL, interpolate = FALSE, ..., zlim = NA,
axes = TRUE, asp = 1, col = hcl.colors(24, "YlOrRd", rev = FALSE), axes = TRUE, asp = 1, col = hcl.colors(24, "Blue-Red 3", rev = FALSE),
digits = getOption("digits") col.values = par("col"), cex = 1,
digits = getOption("digits"), new.plot = TRUE
) { ) {
# plot raster image # plot raster image
plot(c(0, ncol(A)), c(0, nrow(A)), type = "n", bty = "n", col = "black", if (new.plot) {
xlab = "", ylab = "", xaxt = "n", yaxt = "n", main = main, sub = sub, plot(c(0, ncol(A)), c(0, nrow(A)), type = "n", bty = "n", col = "black",
asp = asp) xlab = "", ylab = "", xaxt = "n", yaxt = "n", main = main, sub = sub,
asp = asp)
}
# Scale values of `A` to [0, 1] with min mapped to 1 and max to 0. # Scale values of `A` to [0, 1] with min mapped to 1 and max to 0.
if (missing(zlim)) { if (missing(zlim)) {
@ -41,7 +46,7 @@ matrixImage <- function(A, add.values = FALSE,
# X/Y axes index (matches coordinates to matrix indices) # X/Y axes index (matches coordinates to matrix indices)
x <- seq(1, ncol(A), by = 1) x <- seq(1, ncol(A), by = 1)
y <- seq(1, nrow(A)) y <- seq(1, nrow(A))
if (axes) { if (axes && new.plot) {
axis(1, at = x - 0.5, labels = x, lwd = 0, lwd.ticks = 1) axis(1, at = x - 0.5, labels = x, lwd = 0, lwd.ticks = 1)
axis(2, at = y - 0.5, labels = rev(y), lwd = 0, lwd.ticks = 1, las = 1) axis(2, at = y - 0.5, labels = rev(y), lwd = 0, lwd.ticks = 1, las = 1)
} }
@ -52,6 +57,7 @@ matrixImage <- function(A, add.values = FALSE,
A[!add.values] <- NA A[!add.values] <- NA
A[add.values] <- format(A[add.values], digits = digits) A[add.values] <- format(A[add.values], digits = digits)
} }
text(rep(x - 0.5, each = nrow(A)), rep(rev(y - 0.5), ncol(A)), A, adj = 0.5) text(rep(x - 0.5, each = nrow(A)), rep(rev(y - 0.5), ncol(A)), A,
adj = 0.5, cex = cex, col = col.values)
} }
} }

View File

@ -12,9 +12,13 @@
#' #'
#' @param X multi-dimensional array #' @param X multi-dimensional array
#' @param sample.axis observation axis index #' @param sample.axis observation axis index
#' @param center logical, if `TRUE` (the default) compute the centered second
#' moment, that is, the mode-wise covariances. Otherwise, the raw second moment
#' of each mode is computed. This is specifically usefull in case `X` is
#' already centered.
#' #'
#' @export #' @export
mcov <- function(X, sample.axis = 1L) { mcov <- function(X, sample.axis = length(dim(X)), center = TRUE) {
# observation modes (axis indices) # observation modes (axis indices)
modes <- seq_along(dim(X))[-sample.axis] modes <- seq_along(dim(X))[-sample.axis]
# observation dimensions # observation dimensions
@ -26,16 +30,18 @@ mcov <- function(X, sample.axis = 1L) {
if (sample.axis != r + 1L) { if (sample.axis != r + 1L) {
X <- aperm(X, c(modes, sample.axis)) X <- aperm(X, c(modes, sample.axis))
} }
# centering: Z = X - E[X] # centering: X <- X - E[X]
Z <- X - c(rowMeans(X, dims = r)) if (center) {
X <- X - c(rowMeans(X, dims = r))
}
# estimes (unscaled) covariances for each mode # estimes (unscaled) covariances for each mode
Sigmas <- .mapply(mcrossprod, list(mode = seq_len(r)), MoreArgs = list(Z)) Sigmas <- .mapply(mcrossprod, list(mode = seq_len(r)), MoreArgs = list(X))
# scale by per mode "sample" size # scale by per mode "sample" size
Sigmas <- .mapply(`*`, list(Sigmas, p / prod(dim(X))), NULL) Sigmas <- .mapply(`*`, list(Sigmas, p / prod(dim(X))), NULL)
# estimate trace of Kronecker product of covariances # estimate trace of Kronecker product of covariances
tr.est <- prod(p) * mean(Z^2) tr.est <- prod(p) * mean(X^2)
# as well as the current trace of the unscaled covariances # as well as the current trace of the unscaled covariances
tr.Sigmas <- prod(unlist(.mapply(function(S) sum(diag(S)), list(Sigmas), NULL))) tr.Sigmas <- prod(unlist(.mapply(function(S) sum(diag(S)), list(Sigmas), NULL)))

View File

@ -44,16 +44,20 @@
#' )) #' ))
#' #'
#' @export #' @export
mcrossprod <- function(A, B, mode) { mcrossprod <- function(A, B, mode, dimA = dim(A), dimB = dim(B)) {
storage.mode(A) <- "double" storage.mode(A) <- "double"
if (is.null(dim(A))) { if (!missing(dimA)) {
dim(A) <- dimA
} else if (is.null(dim(A))) {
dim(A) <- length(A) dim(A) <- length(A)
} }
if (missing(B)) { if (missing(B)) {
.Call("C_mcrossprod_sym", A, as.integer(mode)) .Call("C_mcrossprod_sym", A, as.integer(mode))
} else { } else {
storage.mode(B) <- "double" storage.mode(B) <- "double"
if (is.null(dim(B))) { if (!missing(dimB)) {
dim(B) <- dimB
} else if (is.null(dim(B))) {
dim(B) <- length(B) dim(B) <- length(B)
} }
.Call("C_mcrossprod", A, B, as.integer(mode)) .Call("C_mcrossprod", A, B, as.integer(mode))

View File

@ -75,23 +75,63 @@
#' # (X x {A1, A2, A3, A4}) x {B1, B2, B3, B4} = X x {B1 A1, B2 A2, B3 A3, B4 A4} #' # (X x {A1, A2, A3, A4}) x {B1, B2, B3, B4} = X x {B1 A1, B2 A2, B3 A3, B4 A4}
#' all.equal(mlm(mlm(X, As), Bs), mlm(X, Map(`%*%`, Bs, As))) #' all.equal(mlm(mlm(X, As), Bs), mlm(X, Map(`%*%`, Bs, As)))
#' #'
#' # Equivalent to
#' mlm_reference <- function(A, Bs, modes = seq_along(Bs), transposed = FALSE) {
#' # Collect all matrices in `B`
#' Bs <- if (is.matrix(Bs)) list(Bs) else Bs
#'
#' # replicate transposition if of length one only
#' transposed <- if (length(transposed) == 1) {
#' rep(as.logical(transposed), length(Bs))
#' } else {
#' as.logical(transposed)
#' }
#'
#' # iteratively apply Tensor Times Matrix multiplication over modes
#' for (i in seq_along(modes)) {
#' A <- ttm(A, Bs[[i]], modes[i], transposed[i])
#' }
#'
#' # return result tensor
#' A
#' }
#'
#' @export #' @export
mlm <- function(A, Bs, modes = seq_along(Bs), transposed = FALSE) { mlm <- function(A, Bs, modes = seq_along(Bs), transposed = FALSE) {
# Collect all matrices in `B` # Collect all matrices in `B`
Bs <- if (is.matrix(Bs)) list(Bs) else Bs Bs <- if (!is.list(Bs)) list(Bs) else Bs
# ensure all `B`s are matrices
Bs <- Map(as.matrix, Bs)
# replicate transposition if of length one only # replicate transposition if of length one only
transposed <- if (length(transposed) == 1) { transposed <- if (length(transposed) == 1) {
rep(as.logical(transposed), length(Bs)) rep(as.logical(transposed), length(Bs))
} else { } else if (length(transposed) == length(modes)) {
as.logical(transposed) as.logical(transposed)
} else {
stop("Dim missmatch of param. `transposed`")
} }
# iteratively apply Tensor Times Matrix multiplication over modes .Call("C_mlm", A, Bs, as.integer(modes), transposed, PACKAGE = "tensorPredictors")
for (i in seq_along(modes)) {
A <- ttm(A, Bs[[i]], modes[i], transposed[i])
}
# return result tensor
A
} }
# # general usage
# dimA <- c(3, 17, 19, 2)
# dimC <- c(7, 11, 13, 5)
# A <- array(rnorm(prod(dimA)), dim = dimA)
# trans <- c(TRUE, FALSE, TRUE, FALSE)
# Bs <- Map(function(p, q) matrix(rnorm(p * q), p, q), ifelse(trans, dimA, dimC), ifelse(trans, dimC, dimA))
# C <- mlm(A, Bs, transposed = trans)
# mlm(A, Bs[c(3, 2)], modes = c(3, 2), transposed = trans[c(3, 2)])
# microbenchmark::microbenchmark(
# mlm(A, Bs, transposed = trans),
# mlm_reference(A, Bs, transposed = trans)
# )
# microbenchmark::microbenchmark(
# mlm(A, Bs[c(3, 2)], modes = c(3, 2), transposed = trans[c(3, 2)]),
# mlm_reference(A, Bs[c(3, 2)], modes = c(3, 2), transposed = trans[c(3, 2)])
# )

View File

@ -1,14 +1,14 @@
#' Duplication Matrix #' Duplication Matrix
#' #'
#' Matrix such that `vec(A) = D vech(A)` for `A` symmetric #' Matrix `D` such that `vec(A) = D vech(A)` for `A` symmetric
#' @examples #' @examples
#' p <- 8 #' p <- 8
#' A <- matrix(rnorm(p^2), p, p) #' A <- matrix(rnorm(p^2), p, p)
#' A <- A + t(A) #' A <- A + t(A)
#' stopifnot(all.equal(c(D(nrow(A)) %*% vech(A)), c(A))) #' stopifnot(all.equal(c(Dup(nrow(A)) %*% vech(A)), c(A)))
#' #'
#' @export #' @export
D <- function(p) { Dup <- function(p) {
# setup `vec` and `vech` element indices (zero indexed) # setup `vec` and `vech` element indices (zero indexed)
vec <- matrix(NA_integer_, p, p) vec <- matrix(NA_integer_, p, p)
vec[lower.tri(vec, diag = TRUE)] <- seq_len(p * (p + 1) / 2) vec[lower.tri(vec, diag = TRUE)] <- seq_len(p * (p + 1) / 2)
@ -27,10 +27,10 @@ D <- function(p) {
#' #'
#' @examples #' @examples
#' p <- 5 #' p <- 5
#' stopifnot(all.equal(D(p) %*% D.pinv(p), N(p))) #' stopifnot(all.equal(Dup(p) %*% Dup.pinv(p), N(p)))
#' #'
#' @export #' @export
D.pinv <- function(p) { Dup.pinv <- function(p) {
Dp <- D(p) Dp <- D(p)
solve(crossprod(Dp), t(Dp)) solve(crossprod(Dp), t(Dp))
} }
@ -119,7 +119,7 @@ K <- function(dim, mode) {
#' #'
#' @examples #' @examples
#' p <- 7 #' p <- 7
#' stopifnot(all.equal(N(p), D(p) %*% D.pinv(p))) #' stopifnot(all.equal(N(p), Dup(p) %*% Dup.pinv(p)))
#' #'
#' @export #' @export
N <- function(p) { N <- function(p) {

View File

@ -11,6 +11,34 @@
#' \code{mode} dimension equal to \code{nrow(M)} or \code{ncol(M)} if #' \code{mode} dimension equal to \code{nrow(M)} or \code{ncol(M)} if
#' \code{transposed} is true. #' \code{transposed} is true.
#' #'
#' @examples
#' for (mode in 1:4) {
#' dimA <- sample.int(10, 4, replace = TRUE)
#' A <- array(rnorm(prod(dimA)), dim = dimA)
#' nrowB <- sample.int(10, 1)
#' B <- matrix(rnorm(nrowB * dimA[mode]), nrowB)
#'
#' C <- ttm(A, B, mode)
#'
#' dimC <- ifelse(seq_along(dims) != mode, dimA, nrowB)
#' C.ref <- mat(B %*% mat(A, mode), mode, dims = dimC, inv = TRUE)
#'
#' stopifnot(all.equal(C, C.ref))
#' }
#'
#' for (mode in 1:4) {
#' dimA <- sample.int(10, 4, replace = TRUE)
#' A <- array(rnorm(prod(dimA)), dim = dimA)
#' ncolB <- sample.int(10, 1)
#' B <- matrix(rnorm(dimA[mode] * ncolB), dimA[mode])
#'
#' C <- ttm(A, B, mode, transposed = TRUE)
#'
#' C.ref <- ttm(A, t(B), mode)
#'
#' stopifnot(all.equal(C, C.ref))
#' }
#'
#' @export #' @export
ttm <- function(T, M, mode = length(dim(T)), transposed = FALSE) { ttm <- function(T, M, mode = length(dim(T)), transposed = FALSE) {
storage.mode(T) <- storage.mode(M) <- "double" storage.mode(T) <- storage.mode(M) <- "double"

View File

@ -31,3 +31,52 @@ vech.pinv <- function(a) {
# de-vectorized matrix # de-vectorized matrix
matrix(a[vech.pinv.index(p)], p, p) matrix(a[vech.pinv.index(p)], p, p)
} }
# vech <- function(A) {
# stopifnot(all(nrow(A) == dim(A)))
# axis <- seq_len(nrow(A))
# grid <- expand.grid(c(list(rev(axis)), rep(list(axis), length(dim(A)) - 1)))
# A[rowSums(grid - 1) < nrow(A)]
# }
# p <- 4
# X <- matrix(rnorm(p^2), p)
# vech(outer(X, X))
# vech(outer(c(X), c(X)))
# sort(unique(c(sym(outer(X, X)))))
# sort(vech(sym(outer(X, X))))
# # (I <- matrix(c(
# # 1, 1, 1,
# # 2, 1, 1,
# # 3, 1, 1,
# # 2, 2, 1,
# # 3, 2, 1,
# # 3, 3, 1,
# # 2, 1, 2,
# # 3, 1, 2,
# # 3, 2, 2,
# # 3, 1, 3
# # ), ncol = 3, byrow = TRUE))
# # ((I - 1) %*% nrow(A)^(seq_along(dim(A)) - 1) + 1)
# # p <- 3
# # ord <- 4
# # A <- array(seq_len(p^ord), rep(p, ord))
# # axis <- seq_len(nrow(A))
# # grid <- expand.grid(c(list(rev(axis)), rep(list(axis), length(dim(A)) - 1)))
# # array(rowSums(grid - 1), dim(A))
# # A[rowSums(grid - 1) < nrow(A)]
# # apply(indices, 1, function(i) do.call(`[`, c(list(A), as.list(i))))

View File

@ -1,2 +1,3 @@
PKG_LIBS = $(BLAS_LIBS) $(FLIBS) PKG_LIBS = $(BLAS_LIBS) $(LAPACK_LIBS) $(FLIBS)
# PKG_CFLAGS = -pg

View File

@ -0,0 +1,138 @@
#ifndef INCLUDE_GUARD_R_API_H
#define INCLUDE_GUARD_R_API_H
// The need for `USE_FC_LEN_T` and `FCONE` is due to a Fortran character string
// to C incompatibility. See: Writing R Extentions: 6.6.1 Fortran character strings
#define USE_FC_LEN_T
// Disables remapping of R API functions from `Rf_<name>` or `R_<name>`
#define R_NO_REMAP
#include <stdint.h> // uint32_t, uint64_t, ...
#include <R.h>
#include <Rinternals.h>
#include <R_ext/BLAS.h>
#include <R_ext/Lapack.h>
#ifndef FCONE
#define FCONE
#endif
// NULL pointer (not defined memory, array, vector, ...)
#ifndef NULL
#define NULL ((void*)0)
#endif
// Truth values
#ifndef FALSE
#define FALSE 0
#endif
#ifndef TRUE
#define TRUE 1
#endif
// Convenience convertion function similar to `asInteger`, `asReal`, ...
#define NA_UNSIGNED (-((unsigned long)1))
static inline unsigned long asUnsigned(SEXP _val) {
int val = Rf_asInteger(_val);
if (val == NA_INTEGER || val < 0) {
return NA_UNSIGNED;
}
return (unsigned long)val;
}
// Remap BLAS and LAPACK bindings (being consistent with my own interface and I
// don't like to pass scalars by reference (memory address))
/** y <- a x + y */
static inline void axpy(
const int dim,
const double a,
const double* x, const int incx,
double* y, const int incy
) {
F77_CALL(daxpy)(&dim, &a, x, &incx, y, &incy);
}
/** Scale a 1d array `x <- a x` */
static inline void scale(
const int dim,
const double a,
double *x, const int incx
) {
F77_CALL(dscal)(&dim, &a, x, &incx);
}
/** Dot product */
static inline double dot(
const int dim,
const double* x, const int incx,
const double* y, const int incy
) {
return F77_CALL(ddot)(&dim, x, &incx, y, &incy);
}
/** 1d array linear combination `z <- a x + b y` */
// TODO: optimize! (iff needed?, to optimize of at all?)
static inline void lincomb(
const int dim,
const double a, const double* x, const int incx,
const double b, const double* y, const int incy,
double* z, const int incz
) {
for (int i = 0; i < dim; ++i) {
z[i * incz] = a * x[i * incx] + b * y[i * incy];
}
}
/**
* A sufficient Pseudo-Random-Number-Generators (PRNG) of the Xorshift family
*
* With parameterized seed/state this custom PRGN can be used in a thread save!
*
* For single threaded operations the PRNG provided by `R` are prefered. But they
* are _not_ thread save. The following is a simple PRNG usable in a multi-threaded
* application.
*
* See TODO: ...https://en.wikipedia.org/wiki/Xorshift
* SchachHoernchen
*/
static inline uint64_t rot64(uint64_t val, int shift) {
return (val << shift) | (val >> (64 - shift));
}
// (internal) PRGN state/seed type
typedef uint64_t rng_seed_t[4];
// Hookup the PRNG via its seed to R's random number generation utilities
static inline void init_seed(rng_seed_t seed) {
GetRNGstate();
for (size_t i = 0; i < 4; ++i) {
seed[i] = 0;
for (size_t j = 0; j < 64; ++j) {
seed[i] |= ((uint64_t)(unif_rand() < 0.5)) << j;
}
}
PutRNGstate();
}
// PRNG of the Xorshift family
// The least significant 32 bits are not reliable, use most significant 32 bits
static inline uint64_t rand_u64(rng_seed_t seed) {
uint64_t e = seed[0] - rot64(seed[1], 7);
seed[0] = seed[1] ^ rot64(seed[1], 13);
seed[1] = seed[2] + rot64(seed[3], 37);
seed[2] = seed[3] + e;
seed[3] = e + seed[0];
return seed[3];
}
// With external supplied seed, every thread can have its own seed and as such
// we can use this as a thread save alternative to R's `unif_rand()`.
static inline double unif_rand_thrd(rng_seed_t seed) {
return ((double)(rand_u64(seed) >> 32)) / (double)(-(uint32_t)1);
}
#endif /* INCLUDE_GUARD_R_API_H */

View File

@ -0,0 +1,185 @@
#ifndef INCLUDE_GUARD_BIT_UTILS_H
#define INCLUDE_GUARD_BIT_UTILS_H
#include <stdint.h> // uint32_t, uint64_t
#if (defined(__GNUC__) && defined(__BMI2__))
#include <x86intrin.h>
#include <bmi2intrin.h> // _pdep_u32
#endif
#ifdef _MSC_VER
#include <intrin.h> // _BitScanReverse
#endif
/**
* Computes the parity of a word (0 for even bit count and 1 otherwise)
*/
#ifdef __GNUC__
static inline int bitParity32(uint32_t x) { return __builtin_parity(x); }
static inline int bitParity64(uint64_t x) { return __builtin_parityll(x); }
#else
static inline int bitParity32(uint32_t x) {
int p = (x != 0);
while (x &= x - 1) { p = !p; }
return p;
}
static inline int bitParity64(uint64_t x) {
int p = (x != 0);
while (x &= x - 1) { p = !p; }
return p;
}
#endif
/**
* Counts the number of set bits (`1`s in binary) in the number `x`
*/
#ifdef __GNUC__ /* POPulation COUNT */
static inline int bitCount32(uint32_t x) { return __builtin_popcount(x); }
static inline int bitCount64(uint64_t x) { return __builtin_popcountll(x); }
#else
static inline int bitCount32(uint32_t x) {
int count = 0; // counts set bits
for (; x; count++) { x &= x - 1; } // increment count until there are no bits set in x
return count;
}
static inline int bitCount64(uint64_t x) {
int count = 0; // counts set bits
for (; x; count++) { x &= x - 1; } // increment count until there are no bits set in x
return count;
}
#endif
/**
* Gets the index of the LSB (least significant bit)
*
* @condition `x != 0`, for `x == 0` undefined behaviour
*/
#ifdef __GNUC__
static inline int bitScanLS32(uint32_t x) { return __builtin_ctz(x); } // Count Trailing Zeros
static inline int bitScanLS64(uint64_t x) { return __builtin_ctzll(x); }
#elif _MSC_VER
static inline int bitScanLS32(uint32_t x) {
unsigned long bsr;
_BitScanReverse(&bsr, x);
return 31 - bsr;
}
static inline int bitScanLS64(uint64_t x) {
unsigned long bsr;
_BitScanReverse64(&bsr, x);
return 63 - bsr;
}
#else
static inline int bitScanLS32(uint32_t x) {
int ctz = 0; // result storing the Count of Trailing Zeros
bool empty; // boolean variable storing if a bit has not found (search area is empty)
// logarithmic search for LSB bit index (-1)
ctz += (empty = !(x & (uint32_t)(65535))) << 4;
x >>= 16 * empty;
ctz += (empty = !(x & (uint32_t)( 255))) << 3;
x >>= 8 * empty;
ctz += (empty = !(x & (uint32_t)( 15))) << 2;
x >>= 4 * empty;
ctz += (empty = !(x & (uint32_t)( 3))) << 1;
x >>= 2 * empty;
ctz += (empty = !(x & (uint32_t)( 1)));
return ctz;
}
static inline int bitScanLS64(uint64_t x) {
int ctz = 0;
bool empty;
// logarithmic search for LSB bit index (-1)
ctz += (empty = !(x & (uint64_t)(4294967295))) << 5;
x >>= 32 * empty;
ctz += (empty = !(x & (uint64_t)( 65535))) << 4;
x >>= 16 * empty;
ctz += (empty = !(x & (uint64_t)( 255))) << 3;
x >>= 8 * empty;
ctz += (empty = !(x & (uint64_t)( 15))) << 2;
x >>= 4 * empty;
ctz += (empty = !(x & (uint64_t)( 3))) << 1;
x >>= 2 * empty;
ctz += (empty = !(x & (uint64_t)( 1)));
return ctz;
}
#endif
/**
* Parallel DEPosit (aka PDEP)
*
* Writes the `val` bits into the positions of the set bits in `mask`.
*
* Example:
* val: **** **** **** 1.1.
* mask: 1... 1... 1... 1...
* res: 1... .... 1... ....
*/
#if (defined(__GNUC__) && defined(__BMI2__))
static inline uint32_t bitDeposit32(uint32_t val, uint32_t mask) {
return _pdep_u32(val, mask);
}
static inline uint64_t bitDeposit64(uint64_t val, uint64_t mask) {
return _pdep_u64(val, mask);
}
#else
static inline uint32_t bitDeposit32(uint32_t val, uint32_t mask) {
uint32_t res = 0;
for (uint32_t pos = 1; mask; pos <<= 1) {
if (val & pos) {
res |= mask & -mask;
}
mask &= mask - 1;
}
return res;
}
static inline uint64_t bitDeposit64(uint64_t val, uint64_t mask) {
uint64_t res = 0;
for (uint64_t pos = 1; mask; pos <<= 1) {
if (val & pos) {
res |= mask & -mask;
}
mask &= mask - 1;
}
return res;
}
#endif
/**
* Gets the next lexicographically ordered permutation of an n-bit word.
*
* Let `val` be a bit-word with `n` bits set, then this procedire computes a
* `n` bit word wich is the next element in the lexicographically ordered
* sequence of `n` bit words. For example
*
* val -> bitNextPerm(val)
* 00010011 -> 00010101
* 00010101 -> 00010110
* 00010110 -> 00011001
* 00011001 -> 00011010
* 00011010 -> 00011100
* 00011100 -> 00100011
*
* @condition `x != 0`, for `x == 0` undefined behaviour due to `bitScanLS`
*
* see: https://graphics.stanford.edu/~seander/bithacks.html#NextBitPermutation
*/
static inline uint32_t bitNextPerm32(uint32_t val) {
// Sets all least significant 0-bits of val to 1
uint32_t t = val | (val - 1);
// Next set to 1 the most significant bit to change,
// set to 0 the least significant ones, and add the necessary 1 bits.
return (t + 1) | (((~t & -~t) - 1) >> (bitScanLS32(val) + 1));
}
static inline uint64_t bitNextPerm64(uint64_t val) {
// Sets all least significant 0-bits of val to 1
uint64_t t = val | (val - 1);
// Next set to 1 the most significant bit to change,
// set to 0 the least significant ones, and add the necessary 1 bits.
return (t + 1) | (((~t & -~t) - 1) >> (bitScanLS64(val) + 1));
}
#endif /* BIT_UTILS_INCLUDE_GUARD_H */

View File

@ -7,27 +7,96 @@
// ); // );
/* Tensor Times Matrix a.k.a. Mode Product */ /* Tensor Times Matrix a.k.a. Mode Product */
extern SEXP ttm(SEXP A, SEXP X, SEXP mode, SEXP op); extern SEXP R_ttm(SEXP A, SEXP X, SEXP mode, SEXP op);
/* Multi Linear Multiplication (iterated mode products) */
extern SEXP R_mlm(SEXP A, SEXP Bs, SEXP modes, SEXP ops);
/* Matrix Times Vectorized Kronecker product `A vec(B_1 %x% ... %x% B_r)` */ /* Matrix Times Vectorized Kronecker product `A vec(B_1 %x% ... %x% B_r)` */
extern SEXP mtvk(SEXP A, SEXP Bs); extern SEXP mtvk(SEXP A, SEXP Bs);
/* Tensor Mode Crossproduct `A_(m) B_(m)^T` */ /* Tensor Mode Crossproduct `A_(m) B_(m)^T` */
extern SEXP mcrossprod(SEXP A, SEXP B, SEXP mode); extern SEXP R_mcrossprod(SEXP A, SEXP B, SEXP mode);
/* Symmetric Tensor Mode Crossproduct `A_(m) A_(m)^T` */ /* Symmetric Tensor Mode Crossproduct `A_(m) A_(m)^T` */
extern SEXP mcrossprod_sym(SEXP A, SEXP mode); extern SEXP R_mcrossprod_sym(SEXP A, SEXP mode);
// /* Higher Order PCA */
// extern SEXP hopca(SEXP X);
/* Singular Value Decomposition */
extern SEXP R_svd(SEXP A);
// /* Iterative Cyclic Updating for the Tensor Normal Distribution */
// extern SEXP R_icu_tensor_normal(SEXP X, SEXP Fy, SEXP max_iter);
// /* Generalized tensor normal using NAGD */
// extern SEXP R_gmlm_tensor_normal(
// SEXP X, SEXP Fy,
// SEXP eta_bar, SEXP alphas, SEXP Omegas,
// SEXP max_iter, SEXP max_line_iter
// );
/* Solve linear equation system A X = B */
extern SEXP R_solve(SEXP A, SEXP B);
/* Determinant of a matrix */
extern SEXP R_det(SEXP A);
// /* Unscaled PMF of the Ising model with natural parameters `_params` */
// extern SEXP R_unscaled_prob(SEXP _y, SEXP _params);
// /* Exact computation of the partition function of the Ising model with natural
// parameters `_params` */
// extern SEXP R_ising_partition_func_exact(SEXP _params);
// /* Estimated partition function of the Ising model with natural parameters `_params` */
// extern SEXP R_ising_partition_func_MC(SEXP _params);
// /* Exact computation of the partition function of the Ising model with natural
// parameters `_params` */
// extern SEXP R_ising_m2_exact(SEXP _params);
// // extern SEXP R_ising_m2_MC64(SEXP _params);
// extern SEXP R_ising_m2_MC(SEXP _params, SEXP _nr_samples, SEXP _warmup);
// extern SEXP R_ising_m2_MC_thrd(SEXP _params, SEXP _nr_threads);
/* Sample from the Ising model */
extern SEXP R_ising_sample(SEXP, SEXP, SEXP);
// Interface to the Ising second moment function
extern SEXP R_ising_m2(SEXP, SEXP, SEXP, SEXP, SEXP);
/* List of registered routines (a.k.a. C entry points) */ /* List of registered routines (a.k.a. C entry points) */
static const R_CallMethodDef CallEntries[] = { static const R_CallMethodDef CallEntries[] = {
// {"FastPOI_C_sub", (DL_FUNC) &FastPOI_C_sub, 5}, // NOT USED {"C_ttm", (DL_FUNC) &R_ttm, 4},
{"C_ttm", (DL_FUNC) &ttm, 4}, {"C_mlm", (DL_FUNC) &R_mlm, 4},
{"C_mtvk", (DL_FUNC) &mtvk, 2}, {"C_mtvk", (DL_FUNC) &mtvk, 2},
{"C_mcrossprod", (DL_FUNC) &mcrossprod, 3}, {"C_mcrossprod", (DL_FUNC) &R_mcrossprod, 3},
{"C_mcrossprod_sym", (DL_FUNC) &mcrossprod_sym, 2}, {"C_mcrossprod_sym", (DL_FUNC) &R_mcrossprod_sym, 2},
{"C_svd", (DL_FUNC) &R_svd, 1},
{"C_solve", (DL_FUNC) &R_solve, 2},
{"C_det", (DL_FUNC) &R_det, 1},
{"C_ising_sample", (DL_FUNC) &R_ising_sample, 3},
{"C_ising_m2", (DL_FUNC) &R_ising_m2, 5},
// {"C_unscaled_prob", (DL_FUNC) &R_unscaled_prob, 2},
// {"C_ising_partition_func_MC", (DL_FUNC) &R_ising_partition_func_MC, 1},
// {"C_ising_partition_func_exact", (DL_FUNC) &R_ising_partition_func_exact, 1},
// {"C_ising_m2_exact", (DL_FUNC) &R_ising_m2_exact, 1},
// {"C_ising_m2_MC", (DL_FUNC) &R_ising_m2_MC, 3},
// {"C_ising_m2_MC_thrd", (DL_FUNC) &R_ising_m2_MC_thrd, 2},
// {"C_ising_m2_MC64", (DL_FUNC) &R_ising_m2_MC64, 1},
// {"FastPOI_C_sub", (DL_FUNC) &FastPOI_C_sub, 5}, // NOT USED
// {"C_hopca", (DL_FUNC) &hopca, 1},
// {"C_icu_tensor_normal", (DL_FUNC) &R_icu_tensor_normal, 3}, // NOT USED / IN DEVELOPMENT
// {"C_gmlm_tensor_normal", (DL_FUNC) &R_gmlm_tensor_normal, 7}, // NOT USED / IN DEVELOPMENT
{NULL, NULL, 0} {NULL, NULL, 0}
}; };
/* Restrict C entry points to registered routines. */ /**
* Restrict C entry points to registered routines.
*
* NOTE: Naming convention: `R_init_<PACKAGE-NAME>`
*/
void R_init_tensorPredictors(DllInfo *dll) { void R_init_tensorPredictors(DllInfo *dll) {
R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
R_useDynamicSymbols(dll, FALSE); R_useDynamicSymbols(dll, FALSE);

View File

@ -0,0 +1,77 @@
#ifndef INCLUDE_GUARD_INT_UTILS_H
#define INCLUDE_GUARD_INT_UTILS_H
#include <stdint.h> // uint32_t, uint64_t
// /**
// * Integer logarithm, the biggest power `p` such that `2^p <= x`.
// */
// static int ilog2(uint64_t x) {
// int log = 0;
// while (x >>= 1) {
// log++;
// }
// return log;
// }
/**
* Integer Square root of `y`, that is `ceiling(sqrt(y))`
*/
static uint64_t isqrt(uint64_t x) {
// implements a binary search
uint64_t root = 0;
uint64_t left = 0; // left boundary
uint64_t right = x + 1; // right boundary
while(left + 1UL < right) {
root = (left + right) / 2UL;
if (root * root <= x) {
left = root;
} else {
right = root;
}
}
return left;
}
/**
* Inverse to the triangular numbers
*
* Given a positive number `x = p (p + 1) / 2` it computes `p` if possible.
* In case there is no positive integer solution `0` is returned.
*
* Note: this follows immediately from the quadratic equation.
*/
static uint32_t invTriag(uint32_t x) {
uint64_t root = isqrt(8UL * (uint64_t)x + 1UL);
if (root * root != 8UL * x + 1UL) {
return 0;
}
return (root - 1) / 2;
}
// /**
// * Number of sub-sets (including empty set) of max-size
// *
// * It computes, with `binom` beeing the binomial coefficient, the following sum
// *
// * sum_{i = 0}^k binom(n, i)
// */
// static uint64_t nrSubSets(uint64_t n, uint64_t k) {
// uint64_t sum = 1, binom = 1;
// for (uint64_t i = 1; i <= k; ++i) {
// binom *= n--;
// binom /= i;
// sum += binom;
// }
// return sum;
// }
#endif /* INCLUDE_GUARD_INT_UTILS_H */

View File

@ -0,0 +1,181 @@
#ifndef INCLUDE_GUARD_ISING_MCMC_H
#define INCLUDE_GUARD_ISING_MCMC_H
#include "R_api.h"
/** Sample a single value from the Ising model via a Monte-Carlo Markov-Chain
* method given the Ising model parameters in compact half-vectorized form.
*
* f(x) = p0(params) exp(vech(x x')' params)
*
* @important requires to be wrapped between `GetRNGstate()` and `PutRNGState()`
* calls. For example
*
* GetRNGstate()
* for (size_t sample = 0; sample < nr_samples; ++sample) {
* ising_mcmc_vech(warmup, dim, dim, params, &X[sample * dim]);
* }
* PutRNGState()
*/
static inline void ising_mcmc_vech(
const size_t warmup,
const size_t dim, const double* params,
int* X
) {
// Initialize elements `X_i ~ Bernoulli(P(X_i = 1 | X_-i = 0))`
for (size_t i = 0, I = 0; i < dim; I += dim - i++) {
double invProb = 1.0 + exp(-params[I]);
X[i] = unif_rand() * invProb < 1.0;
}
// Skip the first samples of the Markov-Chain (warmup)
for (size_t skip = 0; skip < warmup; ++skip) {
// For every component
for (size_t i = 0; i < dim; ++i) {
// Compute conditional probability `P(X_i = 1 | X_-i = x_-i)`
double log_odds = 0.0;
// Tracks position in half-vectorized storage
size_t J = i;
// Sum all log-odds for `j < i` (two way interactions)
for (size_t j = 0; j < i; J += dim - ++j) {
log_odds += X[j] ? params[J] : 0.0;
}
// Allways add log-odds for `i = j` (self-interaction)
log_odds += params[J];
// Continue adding log-odds for `j > i` (two way interactions)
J++;
for (size_t j = i + 1; j < dim; ++j, ++J) {
log_odds += X[j] ? params[J] : 0.0;
}
// Update `i`th element `X_i ~ Bernoulli(P(X_i = 1 | X_-i = x_-i))`
X[i] = unif_rand() * (1.0 + exp(-log_odds)) < 1.0;
}
}
}
// Thread save version of `ising_mcmc` (using thread save PRNG)
static inline void ising_mcmc_vech_thrd(
const size_t warmup,
const size_t dim, const double* params,
int* X, rng_seed_t seed
) {
// Initialize elements `X_i ~ Bernoulli(P(X_i = 1 | X_-i = 0))`
for (size_t i = 0, I = 0; i < dim; I += dim - i++) {
double invProb = 1.0 + exp(-params[I]);
X[i] = unif_rand_thrd(seed) * invProb < 1.0;
}
// Skip the first samples of the Markov-Chain (warmup)
for (size_t skip = 0; skip < warmup; ++skip) {
// For every component
for (size_t i = 0; i < dim; ++i) {
// Compute conditional probability `P(X_i = 1 | X_-i = x_-i)`
double log_odds = 0.0;
// Tracks position in half-vectorized storage
size_t J = i;
// Sum all log-odds for `j < i` (two way interactions)
for (size_t j = 0; j < i; J += dim - ++j) {
log_odds += X[j] ? params[J] : 0.0;
}
// Allways add log-odds for `i = j` (self-interaction)
log_odds += params[J];
// Continue adding log-odds for `j > i` (two way interactions)
J++;
for (size_t j = i + 1; j < dim; ++j, ++J) {
log_odds += X[j] ? params[J] : 0.0;
}
// Update `i`th element `X_i ~ Bernoulli(P(X_i = 1 | X_-i = x_-i))`
X[i] = unif_rand() * (1.0 + exp(-log_odds)) < 1.0;
}
}
}
/** Sample a single value from the Ising model via a Monte-Carlo Markov-Chain
* method given the Ising model parameters in symmetric matrix form `A`.
*
* f(x) = prob_0(A) exp(x' A x)
*
* The relation to the natural parameters `params` as used in the half-vectorized
* version `ising_mcmc_vech()`
*
* f(x) = prob_0(params) exp(vech(x x')' params)
*
* is illustrated via an example with `dim = 5`;
*
* p0 [ p0 p1/2 p2/2 p3/2 p4/2 ]
* p1 p5 [ p1/2 p5 p6/2 p7/2 p8/2 ]
* params = p2 p6 p9 => A = [ p2/2 p6/2 p9 p10/2 p11/2 ]
* p3 p7 p10 p12 [ p3/2 p7/2 p10/2 p12 p13/2 ]
* p4 p8 p11 p13 p14 [ p4/2 p8/2 p11/2 p13/2 p14 ]
*
* or the other way arroung given the symmetric matrix form `A` converted to
* the natural parameters
*
* [ a00 a10 a20 a30 a40 ] a00
* [ a10 a11 a21 a31 a41 ] 2*a10 a11
* A = [ a20 a21 a22 a32 a42 ] => params = 2*a20 2*a21 a22
* [ a30 a31 a32 a33 a43 ] 2*a30 2*a31 2*a32 a33
* [ a40 a41 a42 a43 a44 ] 2*a40 2*a41 2*a42 2*a43 a44
*
*/
static inline void ising_mcmc_mat(
const size_t warmup,
const size_t dim, const size_t ldA, const double* A,
int* X
) {
// Initialize elements `X_i ~ Bernoulli(P(X_i = 1 | X_-i = 0))`
for (size_t i = 0; i < dim; ++i) {
X[i] = unif_rand() * (1.0 + exp(-A[i * (ldA + 1)])) < 1.0;
}
// Skip the first samples of the Markov-Chain (warmup)
for (size_t skip = 0; skip < warmup; ++skip) {
// For every component
for (size_t i = 0; i < dim; ++i) {
// Compute conditional probability `P(X_i = 1 | X_-i = x_-i)`
double log_odds = 0.0;
for (size_t j = 0; j < i; ++j) {
log_odds += (2 * X[j]) * A[i * ldA + j];
}
log_odds += A[i * (ldA + 1)];
for (size_t j = i + 1; j < dim; ++j) {
log_odds += (2 * X[j]) * A[i * ldA + j];
}
// Update `i`th element `X_i ~ Bernoulli(P(X_i = 1 | X_-i = x_-i))`
X[i] = unif_rand() * (1.0 + exp(-log_odds)) < 1.0;
}
}
}
// thread save version of `ising_mcmc_mat()` (using thread save PRNG)
static inline void ising_mcmc_mat_thrd(
const size_t warmup,
const size_t dim, const size_t ldA, const double* A,
int* X, rng_seed_t seed
) {
// Initialize elements `X_i ~ Bernoulli(P(X_i = 1 | X_-i = 0))`
for (size_t i = 0; i < dim; ++i) {
X[i] = unif_rand_thrd(seed) * (1.0 + exp(-A[i * (ldA + 1)])) < 1.0;
}
// Skip the first samples of the Markov-Chain (warmup)
for (size_t skip = 0; skip < warmup; ++skip) {
// For every component
for (size_t i = 0; i < dim; ++i) {
// Compute conditional probability `P(X_i = 1 | X_-i = x_-i)`
double log_odds = 0.0;
for (size_t j = 0; j < i; ++j) {
log_odds += (2 * X[j]) * A[i * ldA + j];
}
log_odds += A[i * (ldA + 1)];
for (size_t j = i + 1; j < dim; ++j) {
log_odds += (2 * X[j]) * A[i * ldA + j];
}
// Update `i`th element `X_i ~ Bernoulli(P(X_i = 1 | X_-i = x_-i))`
X[i] = unif_rand_thrd(seed) * (1.0 + exp(-log_odds)) < 1.0;
}
}
}
#endif /* INCLUDE_GUARD_ISING_MCMC_H */

View File

@ -0,0 +1,401 @@
/** Methods computing or estimating the second moment of the Ising model given
* the natural parameters of the p.m.f. in exponential family form. That is
* f(x) = p0(params) exp(vech(x x')' params)
* where `params` are the natural parameters. The scaling constant `p0(params)`
* is also the zero event `x = (0, ..., 0)` probability.
*
* Three different version are provided.
* - exact: Exact method computing the true second moment `E[X X']` given the
* parameters `params`. This method has a runtime of `O(2^dim)` where
* `dim` is the dimension of the random variable `X`. This means it is
* infeasible for bigger (like 25 and above) values if `dim`
* - MC: Monte-Carlo method used in case the exact method is not feasible.
* - MC Thrd: Multi-Threaded Monte-Carlo method.
*
* The natural parameters `params` of the Ising model is a `dim (dim + 1) / 2`
* dimensional numeric vector containing the log-odds. The indexing schema into
* the parameters corresponding to the log-odds of single or two way interactions
* with indicex `i, j` are according to the half-vectorization schema
*
* i = 4'th row => Symmetry => Half-Vectorized Storage
*
* [ * * * * * * ] [ * * * * * * ] [ * * * 15 * * ]
* [ * * * * * * ] [ * * * * * * ] [ * * 12 16 * ]
* [ * * * * * * ] [ * * * * * * ] [ * 8 * 17 ]
* [ 3 8 12 15 16 17 ] [ 3 8 12 15 * * ] [ 3 * * ]
* [ * * * * * * ] [ * * * 16 * * ] [ * * ]
* [ * * * * * * ] [ * * * 17 * * ] [ * ]
*
*/
#include "R_api.h"
#include "bit_utils.h"
#include "int_utils.h"
#include "ising_MCMC.h"
#ifndef __STDC_NO_THREADS__
#include <threads.h>
#endif
////////////////////////////////////////////////////////////////////////////////
// TODO: read //
// https://developer.r-project.org/Blog/public/2019/04/18/common-protect-errors/
// and
// https://developer.r-project.org/Blog/public/2018/12/10/unprotecting-by-value/index.html
// as well as do the following PROPERLLY
// TODO: make specialized version using the parameters in given sym mat form //
// similar to `ising_sample()` //
////////////////////////////////////////////////////////////////////////////////
// void ising_m2_exact_mat(
// const size_t dim, const size_t ldA, const double* A,
// double* M2
// ) {
// double sum_0 = 1.0;
// const uint32_t max_event = (uint32_t)1 << dim;
// (void)memset(M2, 0, dim * dim * sizeof(double));
// for (uint32_t X = 1; X < max_event; ++X) {
// // Evaluate quadratic form `X' A X` using symmetry of `A`
// double XtAX = 0.0;
// for (uint32_t Y = X; Y; Y &= Y - 1) {
// const int i = bitScanLS32(Y);
// XtAX += A[i * (ldA + 1)];
// for (uint32_t Z = Y & (Y - 1); Z; Z &= Z - 1) {
// XtAX += 2 * A[i * ldA + bitScanLS32(Z)];
// }
// }
// const double prob_X = exp(XtAX);
// sum_0 += prob_X;
// for (uint32_t Y = X; Y; Y &= Y - 1) {
// const int i = bitScanLS32(Y);
// for (uint32_t Z = Y; Z; Z &= Z - 1) {
// M2[i * dim + bitScanLS32(Z)] += prob_X;
// }
// }
// }
// const double prob_0 = 1.0 / sum_0;
// for (size_t j = 0; j < dim; ++j) {
// for (size_t i = 0; i < j; ++i) {
// M2[j * dim + i] = M2[i * dim + j];
// }
// for (size_t i = j; i < dim; ++i) {
// M2[i * dim + j] *= prob_0;
// }
// }
// }
double ising_m2_exact(const size_t dim, const double* params, double* M2) {
// Accumulator of sum of all (unscaled) probabilities
double sum_0 = 1.0;
// max event (actually upper bound)
const uint32_t max_event = (uint32_t)1 << dim;
// Length of parameters
const size_t len = dim * (dim + 1) / 2;
// Initialize M2 to zero
(void)memset(M2, 0, len * sizeof(double));
// Iterate all `2^dim` binary vectors `X`
for (uint32_t X = 1; X < max_event; ++X) {
// Dot product `<vech(X X'), params>`
double dot_X = 0.0;
// all bits in `X`
for (uint32_t Y = X; Y; Y &= Y - 1) {
const int i = bitScanLS32(Y);
const int I = (i * (2 * dim - 1 - i)) / 2;
// add single and two way interaction log odds
for (uint32_t Z = Y; Z; Z &= Z - 1) {
dot_X += params[I + bitScanLS32(Z)];
}
}
// (unscaled) probability of current event `X`
const double prob_X = exp(dot_X);
sum_0 += prob_X;
// Accumulate set bits probability for the first end second moment `E[X X']`
for (uint32_t Y = X; Y; Y &= Y - 1) {
const int i = bitScanLS32(Y);
const int I = (i * (2 * dim - 1 - i)) / 2;
for (uint32_t Z = Y; Z; Z &= Z - 1) {
M2[I + bitScanLS32(Z)] += prob_X;
}
}
}
// Finish by scaling with zero event probability (Ising p.m.f. scaling constant)
const double prob_0 = 1.0 / sum_0;
for (size_t i = 0; i < len; ++i) {
M2[i] *= prob_0;
}
return prob_0;
}
// Monte-Carlo method using Gibbs sampling for the second moment
double ising_m2_MC(
/* options */ const size_t nr_samples, const size_t warmup,
/* dimension */ const size_t dim,
/* vectors */ const double* params, double* M2
) {
// Length of `params` and `M2`
const size_t len = (dim * (dim + 1)) / 2;
// Dirty trick (reuse output memory for precise counting)
_Static_assert(sizeof(uint64_t) == sizeof(double), "Dirty trick fails!");
uint64_t* counts = (uint64_t*)M2;
// Initialize counts to zero
(void)memset(counts, 0, len * sizeof(uint64_t));
// Allocate memory to store Markov-Chain value
int* X = (int*)R_alloc(dim, sizeof(int));
// Create/Update R's internal PRGN state
GetRNGstate();
// Spawn chains
for (size_t sample = 0; sample < nr_samples; ++sample) {
// Draw random sample `X ~ Ising(params)`
ising_mcmc_vech(warmup, dim, params, X);
// Accumulate component counts
uint64_t* count = counts;
for (size_t i = 0; i < dim; ++i) {
for (size_t j = i; j < dim; ++j) {
*(count++) += X[i] & X[j];
}
}
}
// Write R's internal PRNG state back (if needed)
PutRNGstate();
// Compute means from counts
for (size_t i = 0; i < len; ++i) {
M2[i] = (double)counts[i] / (double)(nr_samples);
}
return -1.0; // TODO: also compute prob_0
}
// in case the compile supports standard C threads
#ifndef __STDC_NO_THREADS__
// Worker thread to calling thread communication struct
typedef struct thrd_data {
rng_seed_t seed; // Pseudo-Random-Number-Generator seed/state value
size_t nr_samples; // Nr. of samples this worker handles
size_t warmup; // Monte-Carlo Chain burne-in length
size_t dim; // Random variable dimension
const double* params; // Ising model parameters
int* X; // Working memory to store current binary sample
uint32_t* counts; // (output) count of single and two way interactions
double prob_0; // (output) zero event probability estimate
} thrd_data_t;
// Worker thread function
int thrd_worker(thrd_data_t* data) {
// Extract data as thread local variables (for convenience)
const size_t dim = data->dim;
int* X = data->X;
// Initialize counts to zero
(void)memset(data->counts, 0, (dim * (dim + 1) / 2) * sizeof(uint32_t));
// Spawn Monte-Carlo Chains (one foe every sample)
for (size_t sample = 0; sample < data->nr_samples; ++sample) {
// Draw random sample `X ~ Ising(params)`
ising_mcmc_vech_thrd(data->warmup, dim, data->params, X, data->seed);
// Accumulate component counts
uint32_t* count = data->counts;
for (size_t i = 0; i < dim; ++i) {
for (size_t j = i; j < dim; ++j) {
*(count++) += X[i] & X[j];
}
}
}
return 0;
}
double ising_m2_MC_thrd(
/* options */ const size_t nr_samples, const size_t warmup,
const size_t nr_threads,
/* dimension */ const size_t dim,
/* vectors */ const double* params, double* M2
) {
// Length of `params` and `M2`
const size_t len = (dim * (dim + 1)) / 2;
// Allocate working and self memory for worker threads
int* Xs = (int*)R_alloc(dim * nr_threads, sizeof(int));
uint32_t* counts = (uint32_t*)R_alloc(len * nr_threads, sizeof(uint32_t));
thrd_t* threads = (thrd_t*)R_alloc(nr_threads, sizeof(thrd_data_t));
thrd_data_t* threads_data = (thrd_data_t*)R_alloc(nr_threads, sizeof(thrd_data_t));
// Provide instruction wor worker and dispatch them
for (size_t tid = 0; tid < nr_threads; ++tid) {
// Every thread needs its own PRNG seed!
init_seed(threads_data[tid].seed);
// divide work among workers (more or less) equaly with the first worker
// (tid = 0) having the additional remainder of divided work (`nr_samples`)
threads_data[tid].nr_samples = nr_samples / nr_threads + (
tid ? 0 : nr_samples % nr_threads
);
threads_data[tid].warmup = warmup;
threads_data[tid].dim = dim;
threads_data[tid].params = params;
// Every worker gets its disjint working memory
threads_data[tid].X = &Xs[dim * tid];
threads_data[tid].counts = &counts[len * tid];
// dispatch the worker
thrd_create(&threads[tid], (thrd_start_t)thrd_worker, (void*)&threads_data[tid]);
}
// Join (Wait for) all worker
for (size_t tid = 0; tid < nr_threads; ++tid) {
thrd_join(threads[tid], NULL);
}
// Accumulate worker results into first (tid = 0) worker counts result
for (size_t tid = 1; tid < nr_threads; ++tid) {
for (size_t i = 0; i < len; ++i) {
counts[i] += counts[tid * len + i];
}
}
// convert discreat counts into means
for (size_t i = 0; i < len; ++i) {
M2[i] = (double)counts[i] / (double)nr_samples;
}
return -2.0; // TODO: also compute prob_0
}
#endif /* !__STDC_NO_THREADS__ */
/* Implementation of `.Call` entry point */
////////////////////////////////////////////////////////////////////////////////
// TODO: make specialized version using the parameters in given sym mat form //
// similar to `ising_sample()` //
////////////////////////////////////////////////////////////////////////////////
extern SEXP R_ising_m2(
SEXP _params, SEXP _use_MC, SEXP _nr_samples, SEXP _warmup, SEXP _nr_threads
) {
// Extract and validate arguments
size_t protect_count = 0;
if (!Rf_isReal(_params)) {
_params = PROTECT(Rf_coerceVector(_params, REALSXP));
protect_count++;
}
// Depending on parameters given in symmetric matrix form or natural
// or natrual parameters in the half-vectorized memory layout
size_t dim;
double* params;
if (Rf_isMatrix(_params)) {
if (Rf_nrows(_params) != Rf_ncols(_params)) {
Rf_error("Invalid 'params' value, exected square matrix");
}
// Convert to natural parameters
dim = Rf_nrows(_params);
params = (double*)R_alloc(dim * (dim + 1) / 2, sizeof(double));
double* A = REAL(_params);
for (size_t j = 0, I = 0; j < dim; ++j) {
for (size_t i = j; i < dim; ++i, ++I) {
params[I] = (1 + (i != j)) * A[i + j * dim];
}
}
} else {
// Get Ising random variable dimension `dim` from "half-vectorized" parameters
// vector. That is, compute `dim` from `length(params) = dim (dim + 1) / 2`.
dim = invTriag(Rf_length(_params));
if (!dim) {
Rf_error("Expected parameter vector of length `p (p + 1) / 2` where"
" `p` is the dimension of the random variable");
}
params = REAL(_params);
}
// Determin the method to use and validate if possible
const int use_MC = Rf_asLogical(_use_MC);
if (use_MC == NA_LOGICAL) {
Rf_error("Invalid 'use_MC' value, expected ether TRUE or FALSE");
}
// Allocate result vector
SEXP _M2 = PROTECT(Rf_allocVector(REALSXP, dim * (dim + 1) / 2));
++protect_count;
// asside computed zero event probability (inverse partition function), the
// scaling factor for the Ising model p.m.f.
double prob_0 = -1.0;
if (use_MC) {
// Convert and validate arguments for the Monte-Carlo methods
const size_t nr_samples = asUnsigned(_nr_samples);
if (nr_samples == 0 || nr_samples == NA_UNSIGNED) {
Rf_error("Invalid 'nr_samples' value, expected pos. integer");
}
const size_t warmup = asUnsigned(_warmup);
if (warmup == NA_UNSIGNED) {
Rf_error("Invalid 'warmup' value, expected non-negative integer");
}
const size_t nr_threads = asUnsigned(_nr_threads);
if (nr_threads == 0 || nr_threads > 256) {
Rf_error("Invalid 'nr_thread' value");
}
if (nr_threads == 1) {
// Single threaded Monte-Carlo method
prob_0 = ising_m2_MC(nr_samples, warmup, dim, params, REAL(_M2));
} else {
// Multi-Threaded Monte-Carlo method if provided, otherwise use
// the single threaded version with a warning
#ifdef __STDC_NO_THREADS__
Rf_warning("Multi-Threading NOT supported, using fallback.");
prob_0 = ising_m2_MC(nr_samples, warmup, dim, params, REAL(_M2));
#else
prob_0 = ising_m2_MC_thrd(
nr_samples, warmup, nr_threads,
dim, params, REAL(_M2)
);
#endif
}
} else {
// Exact method (ignore other arguments), only validate dimension
if (25 < dim) {
Rf_error("Dimension '%d' too big for exact method (max 24)", dim);
}
// and call the exact method
prob_0 = ising_m2_exact(dim, params, REAL(_M2));
}
// Set log-lokelihood as an attribute to the computed second moment
SEXP _prob_0 = PROTECT(Rf_ScalarReal(prob_0));
++protect_count;
Rf_setAttrib(_M2, Rf_install("prob_0"), _prob_0);
// release SEPXs to the garbage collector
UNPROTECT(protect_count);
return _M2;
}

View File

@ -0,0 +1,72 @@
#ifndef INCLUDE_GUARD_ISING_SAMPLE_H
#define INCLUDE_GUARD_ISING_SAMPLE_H
#include "R_api.h"
#include "int_utils.h"
#include "ising_MCMC.h"
// .Call interface to draw from sample from the Ising model
extern SEXP R_ising_sample(SEXP _nr_samples, SEXP _params, SEXP _warmup) {
// Counts number of protected SEXP's to give them back to the garbage collector
size_t protect_count = 0;
// Parse and validate arguments
const size_t nr_samples = asUnsigned(_nr_samples);
if (nr_samples == 0 || nr_samples == NA_UNSIGNED) {
Rf_error("Invalid 'nr_samples' value, expected pos. integer");
}
const size_t warmup = asUnsigned(_warmup);
if (warmup == NA_UNSIGNED) {
Rf_error("Invalid 'warmup' value, expected non-negative integer");
}
// Determin parameter mode (natural parameter vector or symmetric matrix)
// Ether `m` for "Matrix" or `v` for "Vector"
const char param_type = Rf_isMatrix(_params) ? 'm' : 'v';
// In case of matrix parameters check for square matrix
if (param_type == 'm' && (Rf_nrows(_params) != Rf_ncols(_params))) {
Rf_error("Invalid 'params' value, exected square matrix");
}
// Get problem dimension from parameter size
const size_t dim = (param_type == 'm')
? Rf_nrows(_params)
: invTriag(Rf_length(_params));
if (!dim) {
Rf_error("Error determining dimension.");
}
// Ensure parameters are numeric
if (!Rf_isReal(_params)) {
_params = PROTECT(Rf_coerceVector(_params, REALSXP));
++protect_count;
}
double* params = REAL(_params);
// Allocate result sample
SEXP _X = PROTECT(Rf_allocMatrix(INTSXP, dim, nr_samples));
++protect_count;
int* X = INTEGER(_X);
// Call appropriate sampling routine for every sample to generate
GetRNGstate();
if (param_type == 'm') {
for (size_t sample = 0; sample < nr_samples; ++sample) {
ising_mcmc_mat(warmup, dim, dim, params, &X[sample * dim]);
}
} else {
for (size_t sample = 0; sample < nr_samples; ++sample) {
ising_mcmc_vech(warmup, dim, params, &X[sample * dim]);
}
}
PutRNGstate();
// Release protected SEXPs to the garbage collector
UNPROTECT(protect_count);
return _X;
}
#endif /* INCLUDE_GUARD_ISING_SAMPLE_H */

View File

@ -8,6 +8,51 @@
#define FCONE #define FCONE
#endif #endif
void mcrossprod(
const int rank,
const double* A, const int* dimA,
const double* B, const int* dimB,
const int mode,
double* C
) {
// the strides
// `stride[0] <- prod(dim(A)[seq_len(mode - 1)])`
// `stride[1] <- dim(A)[mode]`
// `stride[2] <- prod(dim(A)[-seq_len(mode)])`
// Note: Middle stride is ignored (to be consistent with sym version)
int stride[3] = {1, 0, 1};
for (int i = 0; i < rank; ++i) {
int size = dimA[i];
stride[0] *= (i < mode) ? size : 1;
stride[2] *= (i > mode) ? size : 1;
}
// employ BLAS dgemm (Double GEneralized Matrix Matrix) operation
// (C = alpha op(A) op(A) + beta C, op is the identity of transposition)
const double zero = 0.0;
const double one = 1.0;
if (mode == 0) {
// mode 1: special case C = A_(1) B_(1)^T
// C = 1 A B^T + 0 C
F77_CALL(dgemm)("N", "T", &dimA[mode], &dimB[mode], &stride[2],
&one, A, &dimA[mode], B, &dimB[mode],
&zero, C, &dimA[mode] FCONE FCONE);
} else {
// Other modes writen as accumulated sum of matrix products
// initialize C to zero
memset(C, 0, dimA[mode] * dimB[mode] * sizeof(double));
// Sum over all modes > mode
for (int i2 = 0; i2 < stride[2]; ++i2) {
// C = 1 A^T B + 1 C
F77_CALL(dgemm)("T", "N", &dimA[mode], &dimB[mode], &stride[0],
&one, &A[i2 * stride[0] * dimA[mode]], &stride[0],
&B[i2 * stride[0] * dimB[mode]], &stride[0],
&one, C, &dimA[mode] FCONE FCONE);
}
}
}
/** /**
* Tensor Mode Crossproduct * Tensor Mode Crossproduct
* *
@ -21,72 +66,58 @@
* @param B multi-dimensional array * @param B multi-dimensional array
* @param m mode index (1-indexed) * @param m mode index (1-indexed)
*/ */
extern SEXP mcrossprod(SEXP A, SEXP B, SEXP m) { extern SEXP R_mcrossprod(SEXP A, SEXP B, SEXP m) {
// get zero indexed mode // get zero indexed mode
int mode = asInteger(m) - 1; int mode = asInteger(m) - 1;
// get dimension attributes // Check if both `A` and `B` are real-valued
SEXP dimA = getAttrib(A, R_DimSymbol); if (!isReal(A) || !isReal(B)) {
SEXP dimB = getAttrib(B, R_DimSymbol); error("Type missmatch, both `A` and `B` must be real-valued");
}
// get dimension attributes
SEXP dimA_sexp = getAttrib(A, R_DimSymbol);
SEXP dimB_sexp = getAttrib(B, R_DimSymbol);
// dimension type validation
if (!isInteger(dimA_sexp) || !isInteger(dimB_sexp)) {
error("Dimensions must be integer vectors");
}
// validate dimensions
int rank = length(dimA_sexp);
if (rank != length(dimB_sexp)) {
error("Dimension mismatch");
}
// validate mode (0-indexed, must be smaller than the tensor order) // validate mode (0-indexed, must be smaller than the tensor order)
if (mode < 0 || length(dimA) <= mode || length(dimB) <= mode) { if (mode < 0 || rank <= mode) {
error("Illegal mode"); error("Illegal mode");
} }
// the strides // get raw pointers to dimensions
// `stride[0] <- prod(dim(A)[seq_len(mode - 1)])` int* dimA = INTEGER(coerceVector(dimA_sexp, INTSXP));
// `stride[1] <- dim(A)[mode]` int* dimB = INTEGER(coerceVector(dimB_sexp, INTSXP));
// `stride[2] <- prod(dim(A)[-seq_len(mode)])`
// Note: Middle stride is ignored (to be consistent with sym version) // finaly, check for `A` and `B` dimensions to match
int stride[3] = {1, 0, 1}; for (int i = 0; i < rank; ++i) {
for (int i = 0; i < length(dimA); ++i) { if (i != mode && dimA[i] != dimB[i]) {
int size = INTEGER(dimA)[i]; error("Dimension mismatch");
stride[0] *= (i < mode) ? size : 1; }
stride[2] *= (i > mode) ? size : 1;
// check margin matching of `A` and `B`
if (i != mode && size != INTEGER(dimB)[i]) {
error("Dimension missmatch");
}
} }
// Result dimensions
int nrowC = INTEGER(dimA)[mode];
int ncolC = INTEGER(dimB)[mode];
// create response matrix C // create response matrix C
SEXP C = PROTECT(allocMatrix(REALSXP, nrowC, ncolC)); SEXP C = PROTECT(allocMatrix(REALSXP, dimA[mode], dimB[mode]));
// raw data access pointers // Call C mode crossprod subroutine
double* a = REAL(A); mcrossprod(
double* b = REAL(B); rank, // tensor rank of both `A` and `B`
double* c = REAL(C); REAL(A), dimA, // mem. addr. of A, dim(A)
REAL(B), dimB, // mem. addr. of B, dim(B)
mode, // the crossproduct mode to compute
REAL(C) // return value memory addr.
);
// employ BLAS dgemm (Double GEneralized Matrix Matrix) operation // release C to garbage collector
// (C = alpha op(A) op(A) + beta C, op is the identity of transposition)
const double zero = 0.0;
const double one = 1.0;
if (mode == 0) {
// mode 1: special case C = A_(1) B_(1)^T
// C = 1 A B^T + 0 C
F77_CALL(dgemm)("N", "T", &nrowC, &ncolC, &stride[2],
&one, a, &nrowC, b, &ncolC,
&zero, c, &nrowC FCONE FCONE);
} else {
// Other modes writen as accumulated sum of matrix products
// initialize C to zero
memset(c, 0, nrowC * ncolC * sizeof(double));
// Sum over all modes > mode
for (int i2 = 0; i2 < stride[2]; ++i2) {
// C = 1 A^T B + 1 C
F77_CALL(dgemm)("T", "N", &nrowC, &ncolC, &stride[0],
&one, &a[i2 * stride[0] * nrowC], &stride[0],
&b[i2 * stride[0] * ncolC], &stride[0],
&one, c, &nrowC FCONE FCONE);
}
}
// release C to grabage collector
UNPROTECT(1); UNPROTECT(1);
return C; return C;
@ -104,7 +135,7 @@ extern SEXP mcrossprod(SEXP A, SEXP B, SEXP m) {
* @param A multi-dimensional array * @param A multi-dimensional array
* @param m mode index (1-indexed) * @param m mode index (1-indexed)
*/ */
extern SEXP mcrossprod_sym(SEXP A, SEXP m) { extern SEXP R_mcrossprod_sym(SEXP A, SEXP m) {
// get zero indexed mode // get zero indexed mode
int mode = asInteger(m) - 1; int mode = asInteger(m) - 1;

160
tensorPredictors/src/mlm.c Normal file
View File

@ -0,0 +1,160 @@
#include "R_api.h"
#include "ttm.h"
int mlm(
/* options */ const int* trans, const int* modes, const int nrhs,
/* dims */ const int* dimA, const int* dimC, const int ord,
/* scalars */ const double alpha,
/* tensor */ const double* A,
/* matrices */ const double** Bs, const int* ldBs,
/* scalar */ const double beta,
/* tensor */ double* C,
double* work_mem
) {
// Compute total size of `A`, `C` and required working memory
int sizeA = 1; // `prod(dim(A))`
int sizeC = 1; // `prod(dim(C))`
int work_size = 2; // `2 * prod(pmax(dim(A), dim(C)))` (`+ ord` NOT included)
for (int i = 0; i < ord; ++i) {
sizeA *= dimA[i];
sizeC *= dimC[i];
work_size *= dimA[i] < dimC[i] ? dimC[i] : dimA[i];
}
// In requesting working memory size stop here and return size
if (work_mem == NULL) {
return work_size + ord;
}
// Copy dimensions of A to intermediate temp. dim
int* dimT = (int*)(work_mem + work_size); // `work_mem` is `ord` longer
memcpy(dimT, dimA, ord * sizeof(int));
// Setup work memory hooks (two swapable blocks)
double* tmp1 = work_mem;
double* tmp2 = work_mem + (work_size >> 1);
// Multi Linear Multiplication is an iterated application of TTM
for (int i = 0; i < nrhs; ++i) {
// Get current `B` dimensions (only implicitly given)
int nrowB = dimC[modes[i]];
int ncolB = dimA[modes[i]];
if (trans && trans[i]) {
nrowB = dimA[modes[i]];
ncolB = dimC[modes[i]];
}
// Tensor Times Matrix (`modes[i]` mode products)
ttm(trans ? trans[i] : 0, modes[i], dimT, ord, nrowB, ncolB,
1.0, i ? tmp2 : A, Bs[i], ldBs ? ldBs[i] : dimC[modes[i]], 0.0,
tmp1);
// Update intermediate temp dim
dimT[modes[i]] = dimC[modes[i]];
// Swap tmp1 <-> tmp2
double* tmp3 = tmp1; tmp1 = tmp2; tmp2 = tmp3;
}
// dAXPY (a x + y) with `x = tmp2` and `y = beta C`
if (beta != 0.0) {
for (int i = 0; i < sizeC; ++i) {
C[i] *= beta;
}
} else {
memset(C, 0, sizeC * sizeof(double));
}
axpy(sizeC, alpha, tmp2, 1, C, 1);
return 0;
}
/**
* Multi Linear Multiplication (`R` binding)
*/
extern SEXP R_mlm(SEXP A, SEXP Bs, SEXP ms, SEXP ops) {
// get dimension attribute of A
SEXP dimA = Rf_getAttrib(A, R_DimSymbol);
int ord = Rf_length(dimA);
// Check if `A` is a real valued tensor
if (!Rf_isReal(A) || Rf_isNull(dimA)) {
Rf_error("Param. `A` need to be a real valued array");
}
// Validate that `Bs` is a list
if (!Rf_isNewList(Bs)) {
Rf_error("Param. `Bs` need to be a list of matrices");
}
if (!Rf_isInteger(ms) || !Rf_isLogical(ops)) {
Rf_error("Param. type missmatch, expected modes as ints and ops logical");
}
// Number of `B` matrices (Nr of Right Hand Side objects)
int nrhs = Rf_length(Bs);
// further parameter validations
if ((nrhs != Rf_length(ms)) || (nrhs != Rf_length(ops))) {
Rf_error("Dimension missmatch (Params length differ)");
}
// Get modes and operations for Bs (transposed or not)
int* modes = (int*)R_alloc(nrhs, sizeof(int)); // 0-indexed version of `ms`
int* trans = INTEGER(ops);
// Compute result dimensions `dim(C)` while extracting `B` data pointers
SEXP dimC = PROTECT(Rf_duplicate(dimA));
const double** bs = (const double**)R_alloc(nrhs, sizeof(double*));
for (int i = 0; i < nrhs; ++i) {
// Extract right hand side matrices (`B` matrices from list)
SEXP B = VECTOR_ELT(Bs, i);
if (!(Rf_isMatrix(B) && Rf_isReal(B))) {
UNPROTECT(1);
Rf_error("Param. `Bs` need to be a list of real matrices");
}
int* dimB = INTEGER(Rf_getAttrib(B, R_DimSymbol));
bs[i] = REAL(B);
// Convert 1-indexed modes to be 0-indexed
modes[i] = INTEGER(ms)[i] - 1;
// Check if mode is out or range (indexing a non-existing mode of `A`)
if (!((0 <= modes[i]) && (modes[i] < ord))) {
UNPROTECT(1);
Rf_error("%d'th mode (%d) out of range", i + 1, modes[i] + 1);
}
// Check if `i`th mode of `A` matches corresponding `B` dimension
if (INTEGER(dimA)[modes[i]] != dimB[!trans[i]]) {
UNPROTECT(1);
Rf_error("%d'th mode (%d) dimension missmatch", i + 1, modes[i] + 1);
}
INTEGER(dimC)[modes[i]] = dimB[!!trans[i]];
}
// Now, compute `C`s size `prod(dim(C))`
int sizeC = 1;
for (int i = 0; i < ord; ++i) {
sizeC *= INTEGER(dimC)[i];
}
// Allocate response `C`
SEXP C = PROTECT(Rf_allocVector(REALSXP, sizeC));
Rf_setAttrib(C, R_DimSymbol, dimC);
// allocate working memory size
int work_size = mlm(trans, modes, nrhs, INTEGER(dimA), INTEGER(dimC), ord,
1.0, REAL(A), bs, NULL, 0.0, REAL(C), NULL);
double* work_memory = (double*)R_alloc(work_size, sizeof(double));
// Compute Multi-Linear Multiplication (call subroutine)
(void)mlm(trans, modes, nrhs, INTEGER(dimA), INTEGER(dimC), ord,
1.0, REAL(A), bs, NULL, 0.0, REAL(C), work_memory);
// release C to the hands of the garbage collector
UNPROTECT(2);
return C;
}

View File

@ -0,0 +1,35 @@
#ifndef INCLUDE_GUARD_MLM_H
#define INCLUDE_GUARD_MLM_H
/**
* Multi Linear Multiplication
*
* C = alpha A x_modes[0] op(Bs[0]) ... x_modes[nrhs] op(Bs[nrhs]) + beta C
*
* @param trans boolean vector of length `nrhs` indicating if `i`th RHS matrix
* is to be transposed. That is `op(Bs[i])` is the transposed of `Bs[i]` iff
* `trans[i]` is true, otherwise no-op on `Bs[i]`. Can be `NULL`, then `op` is
* always the identity.
* @param modes integer vector of length `nrhs` specifying the product modes.
*
* @todo TODO: continue doc. !!!
*
* @param alpha scaling factor
* @param beta scaling factor
* @param C output memory addr.
*
* @param work_mem NULL or temporary working memory of size
* `2 * prod(pmax(dim(A), dim(C))) + ord`
*/
int mlm(
/* options */ const int* trans, const int* modes, const int nrhs,
/* dims */ const int* dimA, const int* dimC, const int ord,
/* scalars */ const double alpha,
/* tensor */ const double* A,
/* matrices */ const double** Bs, const int* ldBs,
/* scalar */ const double beta,
/* tensor */ double* C,
double* work_mem
);
#endif /* INCLUDE_GUARD_MLM_H */

View File

@ -0,0 +1,37 @@
// /**
// * A sufficient Pseudo-Random-Number-Generators (PRNG) of the Xorshift family
// *
// * For single threaded operations the PRNG provided by `R` are prefered. But they
// * are _not_ thread save. The following is a simple PRNG usable in a multi-threaded
// * application.
// *
// * See TODO: ...https://en.wikipedia.org/wiki/Xorshift
// * SchachHoernchen
// */
// #ifndef INCLUDE_GUARD_RANDOM_H
// #define INCLUDE_GUARD_RANDOM_H
// #include <stdint.h> // uint32_t, uint64_t
// static inline uint64_t rot64(uint64_t val, int shift) {
// return (val << shift) | (val >> (64 - shift));
// }
// // PRNG of the Xorshift family
// // @note the least significant 32 bits are not reliable, use most significant 32 bits
// static inline uint64_t rand_u64(uint64_t seed[4]) {
// uint64_t e = seed[0] - rot64(seed[1], 7);
// seed[0] = seed[1] ^ rot64(seed[1], 13);
// seed[1] = seed[2] + rot64(seed[3], 37);
// seed[2] = seed[3] + e;
// seed[3] = e + seed[0];
// return seed[3];
// }
// static inline double unif_rand_u64(uint64_t seed[4]) {
// return ((double)(rand_u64(seed) >> 32)) / (double)(-(uint32_t)1);
// }
// #endif

112
tensorPredictors/src/svd.c Normal file
View File

@ -0,0 +1,112 @@
// The need for `USE_FC_LEN_T` and `FCONE` is due to a Fortran character string
// to C incompatibility. See: Writing R Extentions: 6.6.1 Fortran character strings
#define USE_FC_LEN_T
#include <R.h>
#include <Rinternals.h>
#include <R_ext/BLAS.h>
#include <R_ext/Lapack.h>
#ifndef FCONE
#define FCONE
#endif
// Singular Value Decomposition
// @note assumes passed arguments to be "clean", "properly checked"
int svd(
const int p, const int q,
double* A, const int ldA,
double* d,
double* U, const int ldU,
double* Vt, const int ldVt,
double* work_mem, int work_size, int* i_work // if unknown set to 0, -1, 0
) {
// LAPACK information return variable (in case of an error, LAPACK sets this
// to a non-zero value)
int info = 0;
// check if work memory is supplied, if _not_ query for appropriate size and
// allocate the memory
if (!work_mem || work_size < 1 || !i_work) {
// create (declare) work memory
i_work = (int*)R_alloc(8 * (p < q ? p : q), sizeof(int));
double tmp; // variable to store work memory size query result
work_mem = &tmp; // point to `tmp` as it will contain the result of
// the work memory size query
// request appropriate work memory size
F77_CALL(dgesdd)(
"S", &p, &q, A, &ldA, d, U, &ldU, Vt, &ldVt,
work_mem, &work_size, i_work, &info
FCONE);
// allocate work memory
work_size = (int)tmp; // "read" work memory size query result
work_mem = (double*)R_alloc(work_size, sizeof(double));
// in case of an error, return error code stored in `info`
if (info) { return info; }
}
// actual SVD computation
F77_CALL(dgesdd)(
"S", &p, &q, A, &ldA, d, U, &ldU, Vt, &ldVt,
work_mem, &work_size, i_work, &info
FCONE);
return info;
}
// Singular Valued Decomposition (R binding)
// @note educational purpose, same as `La.svd`
extern SEXP R_svd(SEXP A) {
// Check if we got a real-valued matrix
if (!isMatrix(A) || !isReal(A)) {
error("Require a real-valued matrix");
}
// extract matrix dimensions
int* dims = INTEGER(coerceVector(getAttrib(A, R_DimSymbol), INTSXP));
int p = dims[0]; // = nrow(A)
int q = dims[1]; // = ncol(A)
int m = p < q ? p : q; // = min(p, q) = min(dim(A))
// Check if dimensions are not degenerate
if (p < 1 || q < 1) {
error("Expected positive matrix dimensions");
}
// create R objects to store the SVD result `A = U diag(d) V^T`
SEXP U = PROTECT(allocMatrix(REALSXP, p, m));
SEXP d = PROTECT(allocVector(REALSXP, m));
SEXP Vt = PROTECT(allocMatrix(REALSXP, m, q));
// Call C SVD routine
int info = svd(
p, q, // nrow(A), ncol(A)
REAL(A), p, // mem. addr. of A, ldA (leading dimension of A)
REAL(d), // mem. addr. of d
REAL(U), p, // mem. addr. of U, ldU (leading dimension of U)
REAL(Vt), m, // mem. addr. of V^T, ldVt (leading dimension of V^T)
0, -1, 0 // work mem. pointer, work mem. size and int work mem
); // set to `0, -1, 0` to indicate "unknown"
// Check LAPACK info
if (info) {
error("error code %d from LAPACK routine 'dgesdd'", info);
}
// Create R list containint SVD components
SEXP result = PROTECT(allocVector(VECSXP, 3));
SEXP names = PROTECT(allocVector(STRSXP, 3));
SET_VECTOR_ELT(result, 0, d);
SET_VECTOR_ELT(result, 1, U);
SET_VECTOR_ELT(result, 2, Vt);
SET_STRING_ELT(names, 0, mkChar("d"));
SET_STRING_ELT(names, 1, mkChar("u"));
SET_STRING_ELT(names, 2, mkChar("vt"));
setAttrib(result, R_NamesSymbol, names);
// Release created objects to the garbage collector
UNPROTECT(5);
return result;
}

View File

@ -1,104 +1,48 @@
// The need for `USE_FC_LEN_T` and `FCONE` is due to a Fortran character string #include "ttm.h"
// to C incompatibility. See: Writing R Extentions: 6.6.1 Fortran character strings
#define USE_FC_LEN_T
// Disables remapping of R API functions from `Rf_<name>` or `R_<name>`
#define R_NO_REMAP
#include <R.h>
#include <Rinternals.h>
#include <R_ext/BLAS.h>
#ifndef FCONE
#define FCONE
#endif
/** void ttm(
* Tensor Times Matrix a.k.a. Mode Product const int transB, const int mode,
* const int* dimA, const int ordA, const int nrowB, const int ncolB,
* @param A multi-dimensional array const double alpha,
* @param B matrix const double* A,
* @param m mode index (1-indexed) const double* B, const int ldB, // TODO: ldB is IGNORED!!!
* @param op boolean if `B` is transposed const double beta,
*/ double* C
extern SEXP ttm(SEXP A, SEXP B, SEXP m, SEXP op) { ) {
// get zero indexed mode // Strides are the "leading" and "trailing" dimensions of the matricized
const int mode = Rf_asInteger(m) - 1; // tensor `A` in the following matrix-matrix multiplications
// get dimension attribute of A
SEXP dim = Rf_getAttrib(A, R_DimSymbol);
// operation on `B` (transposed or not)
const int trans = Rf_asLogical(op);
// as well as `B`s dimensions
const int nrow = Rf_nrows(B);
const int ncol = Rf_ncols(B);
// validate mode (mode must be smaller than the nr of dimensions)
if (mode < 0 || Rf_length(dim) <= mode) {
Rf_error("Illegal mode");
}
// and check if B is a matrix of non degenetate size
if (!Rf_isMatrix(B)) {
Rf_error("Expected a matrix as second argument");
}
if (!Rf_nrows(B) || !Rf_ncols(B)) {
Rf_error("Zero dimension detected");
}
// check matching of dimensions
if (INTEGER(dim)[mode] != (trans ? nrow : ncol)) {
Rf_error("Dimension missmatch");
}
// calc nr of response elements `prod(dim(A)[-mode]) * ncol(A)` (size of C),
int sizeC = 1;
// and the strides
// `stride[0] <- prod(dim(A)[seq_len(mode - 1)])` // `stride[0] <- prod(dim(A)[seq_len(mode - 1)])`
// `stride[1] <- dim(A)[mode]` // `stride[1] <- dim(A)[mode]`
// `stride[2] <- prod(dim(A)[-seq_len(mode)])` // `stride[2] <- prod(dim(A)[-seq_len(mode)])`
int stride[3] = {1, INTEGER(dim)[mode], 1}; int stride[3] = {1, dimA[mode], 1};
for (int i = 0; i < Rf_length(dim); ++i) { for (int i = 0; i < ordA; ++i) {
int size = INTEGER(dim)[i]; stride[0] *= (i < mode) ? dimA[i] : 1;
// check for non-degenetate dimensions stride[2] *= (i > mode) ? dimA[i] : 1;
if (!size) {
Rf_error("Zero dimension detected");
}
sizeC *= (i == mode) ? (trans ? ncol : nrow) : size;
stride[0] *= (i < mode) ? size : 1;
stride[2] *= (i > mode) ? size : 1;
} }
// create response object C
SEXP C = PROTECT(Rf_allocVector(REALSXP, sizeC));
// raw data access pointers
double* a = REAL(A);
double* b = REAL(B);
double* c = REAL(C);
// Tensor Times Matrix / Mode Product
const double zero = 0.0;
const double one = 1.0;
if (mode == 0) { if (mode == 0) {
// mode 1: (A x_1 op(B))_(1) = op(B) A_(1) as a single Matrix-Matrix // mode 1: C = alpha (A x_1 op(B))_(1) + beta C
// multiplication // = alpha op(B) A_(1) + beta C
F77_CALL(dgemm)(trans ? "T" : "N", "N", // as a single Matrix-Matrix multiplication
(trans ? &ncol : &nrow), &stride[2], &stride[1], &one, F77_CALL(dgemm)(transB ? "T" : "N", "N",
b, &nrow, a, &stride[1], (transB ? &ncolB : &nrowB), &stride[2], &stride[1], &alpha,
&zero, c, (trans ? &ncol : &nrow) B, &nrowB, A, &stride[1],
&beta, C, (transB ? &ncolB : &nrowB)
FCONE FCONE); FCONE FCONE);
} else { } else {
// Other modes can be written as blocks of matrix multiplications // Other modes can be written as blocks of matrix multiplications
// (A x_m op(B))_(m)' = A_(m)' op(B)' // C_:,:,i2 = alpha (A x_m op(B))_(m)' + beta C_:,:,i2
// = alpha A_(m)' op(B)' + beta C_:,:,i2
for (int i2 = 0; i2 < stride[2]; ++i2) { for (int i2 = 0; i2 < stride[2]; ++i2) {
F77_CALL(dgemm)("N", trans ? "N" : "T", F77_CALL(dgemm)("N", transB ? "N" : "T",
&stride[0], (trans ? &ncol : &nrow), &stride[1], &one, &stride[0], (transB ? &ncolB : &nrowB), &stride[1], &alpha,
&a[i2 * stride[0] * stride[1]], &stride[0], b, &nrow, &A[i2 * stride[0] * stride[1]], &stride[0], B, &nrowB,
&zero, &c[i2 * stride[0] * (trans ? ncol : nrow)], &stride[0] &beta, &C[i2 * stride[0] * (transB ? ncolB : nrowB)], &stride[0]
FCONE FCONE); FCONE FCONE);
} }
} }
/* /*
// (reference implementation) // (reference implementation)
// Tensor Times Matrix / Mode Product for `op(B) == B` // Tensor Times Matrix / Mode Product for `op(B) == B`
@ -114,13 +58,76 @@ extern SEXP ttm(SEXP A, SEXP B, SEXP m, SEXP op) {
} }
} }
*/ */
}
/**
* Tensor Times Matrix a.k.a. Mode Product
*
* @param A multi-dimensional array
* @param B matrix
* @param m mode index (1-indexed)
* @param op boolean if `B` is transposed
*/
extern SEXP R_ttm(SEXP A, SEXP B, SEXP m, SEXP op) {
// get zero indexed mode
const int mode = Rf_asInteger(m) - 1;
// get dimension attribute of A
SEXP dimA = Rf_getAttrib(A, R_DimSymbol);
// operation on `B` (transposed or not)
const int transB = Rf_asLogical(op);
// as well as `B`s dimensions
const int nrowB = Rf_nrows(B);
const int ncolB = Rf_ncols(B);
// validate mode (mode must be smaller than the nr of dimensions)
if (mode < 0 || Rf_length(dimA) <= mode) {
Rf_error("Illegal mode");
}
// and check if B is a matrix of non degenetate size
if (!Rf_isMatrix(B)) {
Rf_error("Expected a matrix as second argument");
}
if (!Rf_nrows(B) || !Rf_ncols(B)) {
Rf_error("Zero dimension detected");
}
// check matching of dimensions
if (INTEGER(dimA)[mode] != (transB ? nrowB : ncolB)) {
Rf_error("Dimension missmatch");
}
// calc nr of response elements (size of C)
// `prod(dim(C)) = prod(dim(A)[-mode]) * nrow(if(transB) t(B) else B)`
int sizeC = 1;
for (int i = 0; i < Rf_length(dimA); ++i) {
int size = INTEGER(dimA)[i];
// check for non-degenetate dimensions
if (!size) {
Rf_error("Zero dimension detected");
}
sizeC *= (i == mode) ? (transB ? ncolB : nrowB) : size;
}
// create response object C
SEXP C = PROTECT(Rf_allocVector(REALSXP, sizeC));
// Tensor Times Matrix / Mode Product
ttm(transB, mode,
INTEGER(dimA), Rf_length(dimA), nrowB, ncolB,
1.0, REAL(A), REAL(B), nrowB, 0.0, REAL(C));
// finally, set result dimensions // finally, set result dimensions
SEXP newdim = PROTECT(Rf_allocVector(INTSXP, Rf_length(dim))); SEXP dimC = PROTECT(Rf_allocVector(INTSXP, Rf_length(dimA)));
for (int i = 0; i < Rf_length(dim); ++i) { for (int i = 0; i < Rf_length(dimA); ++i) {
INTEGER(newdim)[i] = (i == mode) ? (trans ? ncol : nrow) : INTEGER(dim)[i]; INTEGER(dimC)[i] = (i == mode) ? (transB ? ncolB : nrowB) : INTEGER(dimA)[i];
} }
Rf_setAttrib(C, R_DimSymbol, newdim); Rf_setAttrib(C, R_DimSymbol, dimC);
// release C to the hands of the garbage collector // release C to the hands of the garbage collector
UNPROTECT(2); UNPROTECT(2);

View File

@ -0,0 +1,21 @@
#ifndef INCLUDE_GUARD_TTM_H
#define INCLUDE_GUARD_TTM_H
#include "R_api.h"
/**
* Tensor Times Matrix Subroutine
*
* @attention Assumes all parameters to be correct!
*/
void ttm(
/* options */ const int transB, const int mode,
/* dims */ const int* dimA, const int ordA, const int nrowB, const int ncolB,
/* scalar */ const double alpha,
/* tensor */ const double* A,
/* matrix */ const double* B, const int ldB,
/* scalar */ const double beta,
/* tensor */ double* C
);
#endif /* INCLUDE_GUARD_TTM_H */