Compare commits
No commits in common. "399f878fbb7733bc3c5639e5da0117d879ad8929" and "40132c25658b8950129c4749eba109b62e5c2d1e" have entirely different histories.
399f878fbb
...
40132c2565
218
LaTeX/GMLM.tex
218
LaTeX/GMLM.tex
|
@ -90,11 +90,10 @@
|
||||||
%%% Custom operators with ether one or two arguments (limits)
|
%%% Custom operators with ether one or two arguments (limits)
|
||||||
\makeatletter
|
\makeatletter
|
||||||
%%% Multi-Linear Multiplication
|
%%% Multi-Linear Multiplication
|
||||||
% $\mlm_{k \in [r]}$ or $\mlm_{k = 1}^{r}$ (lower limit MUST be the first!)
|
|
||||||
% Save first argument as \arg@one
|
% Save first argument as \arg@one
|
||||||
\def\mlm_#1{\def\arg@one{#1}\futurelet\next\mlm@i}
|
\def\mlm#1{\def\arg@one{#1}\futurelet\next\mlm@i}
|
||||||
% Check for second argument
|
% Check for second argument
|
||||||
\def\mlm@i{\ifx\next^\expandafter\mlm@two\else\expandafter\mlm@one\fi}
|
\def\mlm@i{\ifx\next\bgroup\expandafter\mlm@two\else\expandafter\mlm@one\fi}
|
||||||
% specialization for one or two arguments, both versions use saved first argument
|
% specialization for one or two arguments, both versions use saved first argument
|
||||||
\def\mlm@one{\mathchoice%
|
\def\mlm@one{\mathchoice%
|
||||||
{\operatorname*{\scalerel*{\times}{\bigotimes}}_{\makebox[0pt][c]{$\scriptstyle \arg@one$}}}%
|
{\operatorname*{\scalerel*{\times}{\bigotimes}}_{\makebox[0pt][c]{$\scriptstyle \arg@one$}}}%
|
||||||
|
@ -102,31 +101,14 @@
|
||||||
{\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}}%
|
{\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}}%
|
||||||
{\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}}%
|
{\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}}%
|
||||||
}
|
}
|
||||||
% this commands single argument is the second argument of \mlm, it gobbles the `^`
|
% this commands single argument is the second argument of \mlm
|
||||||
\def\mlm@two^#1{\mathchoice%
|
\def\mlm@two#1{\mathchoice%
|
||||||
{\operatorname*{\scalerel*{\times}{\bigotimes}}_{\makebox[0pt][c]{$\scriptstyle \arg@one$}}^{\makebox[0pt][c]{$\scriptstyle #1$}}}%
|
{\operatorname*{\scalerel*{\times}{\bigotimes}}_{\makebox[0pt][c]{$\scriptstyle \arg@one$}}^{\makebox[0pt][c]{$\scriptstyle #1$}}}%
|
||||||
{\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}^{#1}}%
|
{\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}^{#1}}%
|
||||||
{\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}^{#1}}%
|
{\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}^{#1}}%
|
||||||
{\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}^{#1}}%
|
{\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}^{#1}}%
|
||||||
}
|
}
|
||||||
|
|
||||||
%%% Big Circle (Iterated Outer Product)
|
|
||||||
\def\outer#1{\def\arg@one{#1}\futurelet\next\outer@i}
|
|
||||||
\def\outer@i{\ifx\next\bgroup\expandafter\outer@two\else\expandafter\outer@one\fi}
|
|
||||||
\def\outer@one{\mathchoice%
|
|
||||||
{\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\makebox[0pt][c]{$\scriptstyle \arg@one$}}}%
|
|
||||||
{\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}}%
|
|
||||||
{\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}}%
|
|
||||||
{\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}}%
|
|
||||||
}
|
|
||||||
\def\outer@two#1{\mathchoice%
|
|
||||||
{\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\makebox[0pt][c]{$\scriptstyle \arg@one$}}^{\makebox[0pt][c]{$\scriptstyle #1$}}}%
|
|
||||||
{\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}^{#1}}%
|
|
||||||
{\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}^{#1}}%
|
|
||||||
{\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}^{#1}}%
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
%%% Big Kronecker Product (with overflowing limits)
|
%%% Big Kronecker Product (with overflowing limits)
|
||||||
% Save first argument as \arg@one
|
% Save first argument as \arg@one
|
||||||
\def\bigkron#1{\def\arg@one{#1}\futurelet\next\bigkron@i}
|
\def\bigkron#1{\def\arg@one{#1}\futurelet\next\bigkron@i}
|
||||||
|
@ -152,15 +134,17 @@
|
||||||
\newcommand{\algorithmicbreak}{\textbf{break}}
|
\newcommand{\algorithmicbreak}{\textbf{break}}
|
||||||
\newcommand{\Break}{\State \algorithmicbreak}
|
\newcommand{\Break}{\State \algorithmicbreak}
|
||||||
|
|
||||||
|
|
||||||
\begin{document}
|
\begin{document}
|
||||||
|
|
||||||
\maketitle
|
\maketitle
|
||||||
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
\begin{abstract}
|
\begin{abstract}
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
We consider regression and classification for \textit{general} response and tensor-valued predictors (multi dimensional arrays) and propose a \textit{novel formulation} for sufficient dimension reduction. Assuming the distribution of the tensor-valued predictors given the response is in the quadratic exponential family, we model the natural parameter as a multi-linear function of the response.
|
We consider regression and classification for \textit{general} response and tensor-valued predictors (multi dimensional arrays) and propose a \textit{novel formulation} for sufficient dimension reduction. Assuming the distribution of the tensor-valued predictors given the response is in the quadratic exponential family, we model the natural parameter as a multi-linear function of the respons.
|
||||||
This allows per-axis reductions that drastically reduce the total number of parameters for higher order tensor-valued predictors. We derive maximum likelihood estimates for the sufficient dimension reduction and a computationally efficient estimation algorithm which leverages the tensor structure. The performance of the method is illustrated via simulations and real world examples are provided.
|
This allows per-axis reductions that drastically reduce the total number of parameters for higher order tensor-valued predictors. We derive maximum likelihood estimates for the sufficient dimension reduction and a computationally efficient estimation algorithm which leveraes the tensor structure. The performance of the method is illustrated via simulations and real world examples are provided.
|
||||||
\end{abstract}
|
\end{abstract}
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
@ -272,23 +256,23 @@ A straight forward idea for parameter estimation is to use Gradient Descent. For
|
||||||
\begin{algorithm}[ht]
|
\begin{algorithm}[ht]
|
||||||
\caption{\label{alg:NAGD}Nesterov Accelerated Gradient Descent}
|
\caption{\label{alg:NAGD}Nesterov Accelerated Gradient Descent}
|
||||||
\begin{algorithmic}[1]
|
\begin{algorithmic}[1]
|
||||||
\State Objective: $l(\mat{\theta} \mid \ten{X}, \ten{F}_y)$
|
\State Objective: $l(\Theta \mid \ten{X}, \ten{F}_y)$
|
||||||
\State Arguments: Order $r + 1$ tensors $\ten{X}$, $\ten{F}$
|
\State Arguments: Order $r + 1$ tensors $\ten{X}$, $\ten{F}$
|
||||||
\State Initialize: Parameters $\mat{\theta}^{(0)}$, $0 < c, \delta^{(1)}$ and $0 < \gamma < 1$
|
\State Initialize: Parameters $\Theta^{(0)}$, $0 < c, \delta^{(1)}$ and $0 < \gamma < 1$
|
||||||
\\
|
\\
|
||||||
\State $t \leftarrow 1$
|
\State $t \leftarrow 1$
|
||||||
\Comment{step counter}
|
\Comment{step counter}
|
||||||
\State $\mat{\theta}^{(1)} \leftarrow \mat{\theta}^{(0)}$
|
\State $\mat{\Theta}^{(1)} \leftarrow \mat{\Theta}^{(0)}$
|
||||||
\Comment{artificial first step}
|
\Comment{artificial first step}
|
||||||
\State $(m^{(0)}, m^{(1)}) \leftarrow (0, 1)$
|
\State $(m^{(0)}, m^{(1)}) \leftarrow (0, 1)$
|
||||||
\Comment{momentum extrapolation weights}
|
\Comment{momentum extrapolation weights}
|
||||||
\\
|
\\
|
||||||
\Repeat \Comment{repeat untill convergence}
|
\Repeat \Comment{repeat untill convergence}
|
||||||
\State $\mat{M} \leftarrow \mat{\theta}^{(t)} + \frac{m^{(t - 1)} - 1}{m^{(t)}}(\mat{\theta}^{(t)} - \mat{\theta}^{(t - 1)})$ \Comment{momentum extrapolation}
|
\State $\mat{M} \leftarrow \mat{\Theta}^{(t)} + \frac{m^{(t - 1)} - 1}{m^{(t)}}(\mat{\Theta}^{(t)} - \mat{\Theta}^{(t - 1)})$ \Comment{momentum extrapolation}
|
||||||
\For{$\delta = \gamma^{-1}\delta^{(t)}, \delta^{(t)}, \gamma\delta^{(t)}, \gamma^2\delta^{(t)}, ...$} \Comment{Line Search}
|
\For{$\delta = \gamma^{-1}\delta^{(t)}, \delta^{(t)}, \gamma\delta^{(t)}, \gamma^2\delta^{(t)}, ...$} \Comment{Line Search}
|
||||||
\State $\mat{\theta}_{\text{temp}} \leftarrow \mat{M} + \delta \nabla_{\mat{\theta}} l(\mat{M})$
|
\State $\mat{\Theta}_{\text{temp}} \leftarrow \mat{M} + \delta \nabla_{\mat{\Theta}} l(\mat{M})$
|
||||||
\If{$l(\mat{\theta}_{\text{temp}}) \leq l(\mat{\theta}^{(t - 1)}) - c \delta \|\nabla_{\mat{\theta}} l(\mat{M})\|_F^2$} \Comment{Armijo Condition}
|
\If{$l(\mat{\Theta}_{\text{temp}}) \leq l(\mat{\Theta}^{(t - 1)}) - c \delta \|\nabla_{\mat{\Theta}} l(\mat{M})\|_F^2$} \Comment{Armijo Condition}
|
||||||
\State $\mat{\theta}^{(t + 1)} \leftarrow \mat{\theta}_{\text{temp}}$
|
\State $\mat{\Theta}^{(t + 1)} \leftarrow \mat{\Theta}_{\text{temp}}$
|
||||||
\State $\delta^{(t + 1)} \leftarrow \delta$
|
\State $\delta^{(t + 1)} \leftarrow \delta$
|
||||||
\Break
|
\Break
|
||||||
\EndIf
|
\EndIf
|
||||||
|
@ -415,7 +399,7 @@ $\ten{X}$ is a $2\times 3\times 5$ tensor, $y\in\{1, 2, ..., 6\}$ uniformly dist
|
||||||
|
|
||||||
\begin{figure}[!ht]
|
\begin{figure}[!ht]
|
||||||
\centering
|
\centering
|
||||||
\includegraphics[width = \textwidth]{images/sim-normal-20221012.png}
|
\includegraphics[width = \textwidth]{sim-normal-20221012.png}
|
||||||
\caption{\label{fig:sim-normal}Simulation Normal}
|
\caption{\label{fig:sim-normal}Simulation Normal}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
|
@ -423,7 +407,7 @@ $\ten{X}$ is a $2\times 3\times 5$ tensor, $y\in\{1, 2, ..., 6\}$ uniformly dist
|
||||||
|
|
||||||
\begin{figure}[!ht]
|
\begin{figure}[!ht]
|
||||||
\centering
|
\centering
|
||||||
\includegraphics[width = \textwidth]{images/sim-ising-small-20221012.png}
|
\includegraphics[width = \textwidth]{sim-ising-small-20221012.png}
|
||||||
\caption{\label{fig:sim-ising-small}Simulation Ising Small}
|
\caption{\label{fig:sim-ising-small}Simulation Ising Small}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
|
@ -449,7 +433,7 @@ where each individual block is given by
|
||||||
For example $\mathcal{J}_{1,2} = -\frac{\partial l(\Theta)}{\partial\t{(\vec{\overline{\ten{\eta}}_1})}\partial(\vec{\mat{\alpha}_1})}$ and $\mathcal{J}_{2r + 1, 2r + 1} = -\H l(\mat{\Omega}_r)$.
|
For example $\mathcal{J}_{1,2} = -\frac{\partial l(\Theta)}{\partial\t{(\vec{\overline{\ten{\eta}}_1})}\partial(\vec{\mat{\alpha}_1})}$ and $\mathcal{J}_{2r + 1, 2r + 1} = -\H l(\mat{\Omega}_r)$.
|
||||||
We start by restating the log-likelihood for a given single observation $(\ten{X}, \ten{Y})$ where $\ten{F}_y$ given by
|
We start by restating the log-likelihood for a given single observation $(\ten{X}, \ten{Y})$ where $\ten{F}_y$ given by
|
||||||
\begin{displaymath}
|
\begin{displaymath}
|
||||||
l(\mat{\Theta}) = \log h(\ten{X}) + c_1\big\langle\overline{\ten{\eta}}_1 + \ten{F}_{y}\mlm_{k\in[r]}\mat{\alpha}_k, \ten{X}\big\rangle + c_2\big\langle\ten{X}\mlm_{k\in[r]}\mat{\Omega}_k, \ten{X}\big\rangle - b(\mat{\eta}_{y})
|
l(\mat{\Theta}) = \log h(\ten{X}) + c_1\big\langle\overline{\ten{\eta}}_1 + \ten{F}_{y}\mlm{k\in[r]}\mat{\alpha}_k, \ten{X}\big\rangle + c_2\big\langle\ten{X}\mlm{k\in[r]}\mat{\Omega}_k, \ten{X}\big\rangle - b(\mat{\eta}_{y})
|
||||||
\end{displaymath}
|
\end{displaymath}
|
||||||
with
|
with
|
||||||
\begin{align*}
|
\begin{align*}
|
||||||
|
@ -512,10 +496,10 @@ Now we rewrite all the above differentials to extract the derivatives one at a t
|
||||||
%
|
%
|
||||||
\d l(\mat{\Omega}_j) &= c_2\Big(\langle\ten{X}\times_{k\in[r]\backslash j}\mat{\Omega}_k\times_j\d\mat{\Omega}_j, \ten{X}\rangle - \D b(\mat{\eta}_{y,2})\vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big)\Big) \\
|
\d l(\mat{\Omega}_j) &= c_2\Big(\langle\ten{X}\times_{k\in[r]\backslash j}\mat{\Omega}_k\times_j\d\mat{\Omega}_j, \ten{X}\rangle - \D b(\mat{\eta}_{y,2})\vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big)\Big) \\
|
||||||
&= c_2 \t{(\vec{\ten{X}}\otimes\vec{\ten{X}} - (\ten{D}_2)_{([2r])})}\vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big) \\
|
&= c_2 \t{(\vec{\ten{X}}\otimes\vec{\ten{X}} - (\ten{D}_2)_{([2r])})}\vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big) \\
|
||||||
&= c_2 (\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})} \\
|
&= c_2 (\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})} \\
|
||||||
&= c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\vec{\d\mat{\Omega}_j} \\
|
&= c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\vec{\d\mat{\Omega}_j} \\
|
||||||
&= c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\mat{D}_{p_j}\t{\mat{D}_{p_j}}\vec{\d\mat{\Omega}_j} \\
|
&= c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\mat{D}_{p_j}\t{\mat{D}_{p_j}}\vec{\d\mat{\Omega}_j} \\
|
||||||
&\qquad\Rightarrow \D l(\mat{\Omega}_j) = c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\mat{D}_{p_j}\t{\mat{D}_{p_j}}
|
&\qquad\Rightarrow \D l(\mat{\Omega}_j) = c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\mat{D}_{p_j}\t{\mat{D}_{p_j}}
|
||||||
\end{align*}}%
|
\end{align*}}%
|
||||||
The next step is to identify the Hessians from the second differentials in a similar manner as befor.
|
The next step is to identify the Hessians from the second differentials in a similar manner as befor.
|
||||||
{\allowdisplaybreaks\begin{align*}
|
{\allowdisplaybreaks\begin{align*}
|
||||||
|
@ -525,62 +509,62 @@ The next step is to identify the Hessians from the second differentials in a sim
|
||||||
\qquad{\color{gray} (p \times p)}
|
\qquad{\color{gray} (p \times p)}
|
||||||
\\
|
\\
|
||||||
&\d^2 l(\overline{\ten{\eta}}_1, \mat{\alpha}_j) \\
|
&\d^2 l(\overline{\ten{\eta}}_1, \mat{\alpha}_j) \\
|
||||||
&= -c_1^2 \t{\vec(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j)}\mat{H}_{1,1}\vec{\d\overline{\ten{\eta}}_1} \\
|
&= -c_1^2 \t{\vec(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j)}\mat{H}_{1,1}\vec{\d\overline{\ten{\eta}}_1} \\
|
||||||
&= -c_1^2 \t{\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)})}\mat{K}_{p,(j)}\mat{H}_{1,1}\vec{\d\overline{\ten{\eta}}_1} \\
|
&= -c_1^2 \t{\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)})}\mat{K}_{p,(j)}\mat{H}_{1,1}\vec{\d\overline{\ten{\eta}}_1} \\
|
||||||
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})}((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})(\ten{H}_{1,1})_{((j, [r]\backslash j))}\vec{\d\overline{\ten{\eta}}_1} \\
|
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})}((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})(\ten{H}_{1,1})_{((j, [r]\backslash j))}\vec{\d\overline{\ten{\eta}}_1} \\
|
||||||
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} ( (\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k) \ttt_{[r]\backslash j} \ten{H}_{1,1})_{((2, 1))} \vec{\d\overline{\ten{\eta}}_1} \\
|
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} ( (\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k) \ttt_{[r]\backslash j} \ten{H}_{1,1})_{((2, 1))} \vec{\d\overline{\ten{\eta}}_1} \\
|
||||||
&\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\alpha}_j})\t{\partial(\vec{\overline{\ten{\eta}}_1)}}} = -c_1^2 ( (\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k) \ttt_{[r]\backslash j} \ten{H}_{1,1})_{((2, 1))}
|
&\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\alpha}_j})\t{\partial(\vec{\overline{\ten{\eta}}_1)}}} = -c_1^2 ( (\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k) \ttt_{[r]\backslash j} \ten{H}_{1,1})_{((2, 1))}
|
||||||
\qquad{\color{gray} (p_j q_j \times p)}
|
\qquad{\color{gray} (p_j q_j \times p)}
|
||||||
\\
|
\\
|
||||||
&\d^2 l(\overline{\ten{\eta}}_1, \mat{\Omega}_j) \\
|
&\d^2 l(\overline{\ten{\eta}}_1, \mat{\Omega}_j) \\
|
||||||
&= -c_1 c_2 \t{\vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big)}\mat{H}_{2,1}\vec{\d\overline{\ten{\eta}}_1} \\
|
&= -c_1 c_2 \t{\vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big)}\mat{H}_{2,1}\vec{\d\overline{\ten{\eta}}_1} \\
|
||||||
&= -c_1 c_2 \t{\Big[ \t{(\ten{H}_{2,1})_{([2r])}} \vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big) \Big]} \vec{\d\overline{\ten{\eta}}_1} \\
|
&= -c_1 c_2 \t{\Big[ \t{(\ten{H}_{2,1})_{([2r])}} \vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big) \Big]} \vec{\d\overline{\ten{\eta}}_1} \\
|
||||||
&= -c_1 c_2 \t{\vec( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})} )} \vec{\d\overline{\ten{\eta}}_1} \\
|
&= -c_1 c_2 \t{\vec( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})} )} \vec{\d\overline{\ten{\eta}}_1} \\
|
||||||
&= -c_1 c_2 \t{(\vec{\d\mat{\Omega}_j})} ( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} )_{(j)} \vec{\d\overline{\ten{\eta}}_1} \\
|
&= -c_1 c_2 \t{(\vec{\d\mat{\Omega}_j})} ( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} )_{(j)} \vec{\d\overline{\ten{\eta}}_1} \\
|
||||||
&\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\Omega}_j})\t{\partial(\vec{\overline{\ten{\eta}}_1)}}} = -c_1 c_2 \mat{D}_{p_j}\t{\mat{D}_{p_j}}( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} )_{(j)}
|
&\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\Omega}_j})\t{\partial(\vec{\overline{\ten{\eta}}_1)}}} = -c_1 c_2 \mat{D}_{p_j}\t{\mat{D}_{p_j}}( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} )_{(j)}
|
||||||
\qquad{\color{gray} (p_j^2 \times p)}
|
\qquad{\color{gray} (p_j^2 \times p)}
|
||||||
\\
|
\\
|
||||||
&\d^2 l(\mat{\alpha}_j) \\
|
&\d^2 l(\mat{\alpha}_j) \\
|
||||||
&= -c_1^2 \t{\vec(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j)}\mat{H}_{1,1}\vec(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j) \\
|
&= -c_1^2 \t{\vec(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j)}\mat{H}_{1,1}\vec(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j) \\
|
||||||
&= -c_1^2 \t{\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)})}\mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(j)}}\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}) \\
|
&= -c_1^2 \t{\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)})}\mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(j)}}\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}) \\
|
||||||
&= -c_1^2 \t{[((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j}]}\mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(j)}}((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j} \\
|
&= -c_1^2 \t{[((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j}]}\mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(j)}}((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j} \\
|
||||||
&= -c_1^2 \t{[((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j}]}(\ten{H}_{1,1})_{((j,[r]\backslash j),(j,[r]\backslash j))}((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j} \\
|
&= -c_1^2 \t{[((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j}]}(\ten{H}_{1,1})_{((j,[r]\backslash j),(j,[r]\backslash j))}((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j} \\
|
||||||
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})}[ ((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)\ttt_{[r]\backslash j}\ten{H}_{1,1})\ttt_{[r]\backslash j + 2,[r]\backslash j}(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k) ]_{((2,1))}\vec{\d\mat{\alpha}_j} \\
|
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})}[ ((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)\ttt_{[r]\backslash j}\ten{H}_{1,1})\ttt_{[r]\backslash j + 2,[r]\backslash j}(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k) ]_{((2,1))}\vec{\d\mat{\alpha}_j} \\
|
||||||
&\qquad\Rightarrow \H l(\mat{\alpha}_j) = -c_1^2 \Big[ \left(\Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big)\ttt_{[r]\backslash j}\ten{H}_{1,1}\right)\ttt_{[r]\backslash j + 2}^{[r]\backslash j}\Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \Big]_{((2,1))}
|
&\qquad\Rightarrow \H l(\mat{\alpha}_j) = -c_1^2 \Big[ \left(\Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big)\ttt_{[r]\backslash j}\ten{H}_{1,1}\right)\ttt_{[r]\backslash j + 2}^{[r]\backslash j}\Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \Big]_{((2,1))}
|
||||||
\qquad{\color{gray} (p_j q_j \times p_j q_j)}
|
\qquad{\color{gray} (p_j q_j \times p_j q_j)}
|
||||||
\\
|
\\
|
||||||
&\d^2 l(\mat{\alpha}_j, \mat{\alpha}_l) \\
|
&\d^2 l(\mat{\alpha}_j, \mat{\alpha}_l) \\
|
||||||
&\overset{\makebox[0pt]{\scriptsize $j < l$}}{=} -c_1^2 \t{\vec\Bigl(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\Bigr)}\mat{H}_{1,1}\vec\Bigl(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\times_l\d\mat{\alpha}_l\Bigr) \\
|
&\overset{\makebox[0pt]{\scriptsize $j < l$}}{=} -c_1^2 \t{\vec\Bigl(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\Bigr)}\mat{H}_{1,1}\vec\Bigl(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\times_l\d\mat{\alpha}_l\Bigr) \\
|
||||||
&\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \vec\Bigl(\ten{F}_y\mlm_{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\times_l\d\mat{\alpha}_l\Bigr) \\
|
&\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \vec\Bigl(\ten{F}_y\mlm{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\times_l\d\mat{\alpha}_l\Bigr) \\
|
||||||
&= -c_1^2 \t{\vec\biggl( \d\mat{\alpha}_j \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big)_{(j)} \biggr)} \mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(l)}} \vec\biggl( \d\mat{\alpha}_l \Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big)_{(l)} \biggr) \\
|
&= -c_1^2 \t{\vec\biggl( \d\mat{\alpha}_j \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big)_{(j)} \biggr)} \mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(l)}} \vec\biggl( \d\mat{\alpha}_l \Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big)_{(l)} \biggr) \\
|
||||||
&\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \t{\mat{K}_{\mat{p},((j,l))}} \vec\biggl( (\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j) \Big( \ten{F}_y\mlm_{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k \Big)_{((j,l))} \biggr) \\
|
&\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \t{\mat{K}_{\mat{p},((j,l))}} \vec\biggl( (\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j) \Big( \ten{F}_y\mlm{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k \Big)_{((j,l))} \biggr) \\
|
||||||
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big)_{(j)}\otimes\mat{I}_{p_j} \biggr) \mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(l)}} \biggl( \t{\Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big)_{(l)}}\otimes\mat{I}_{p_l} \biggr)\vec{\d\mat{\alpha}_l} \\
|
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big)_{(j)}\otimes\mat{I}_{p_j} \biggr) \mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(l)}} \biggl( \t{\Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big)_{(l)}}\otimes\mat{I}_{p_l} \biggr)\vec{\d\mat{\alpha}_l} \\
|
||||||
&\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \t{\mat{K}_{\mat{p},((j,l))}} \biggl( \t{\Big( \ten{F}_y\mlm_{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k \Big)_{((j,l))}}\otimes\mat{I}_{p_j p_l} \biggr) \vec{(\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j)} \\
|
&\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \t{\mat{K}_{\mat{p},((j,l))}} \biggl( \t{\Big( \ten{F}_y\mlm{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k \Big)_{((j,l))}}\otimes\mat{I}_{p_j p_l} \biggr) \vec{(\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j)} \\
|
||||||
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big[ \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \vec{\d\mat{\alpha}_l} \\
|
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big[ \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \vec{\d\mat{\alpha}_l} \\
|
||||||
&\qquad + c_1 \vec\biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm_{k\neq j,l}\mat{\alpha}_k \Big) \biggr) \vec{(\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j)} \\
|
&\qquad + c_1 \vec\biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm{k\neq j,l}\mat{\alpha}_k \Big) \biggr) \vec{(\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j)} \\
|
||||||
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big[ \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \vec{\d\mat{\alpha}_l} \\
|
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big[ \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \vec{\d\mat{\alpha}_l} \\
|
||||||
&\qquad + c_1 \t{(\vec{\d\mat{\alpha}_j})} \biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm_{k\neq j,l}\mat{\alpha}_k \Big) \biggr)_{((1,3))} \vec{\d\mat{\alpha}_l} \\
|
&\qquad + c_1 \t{(\vec{\d\mat{\alpha}_j})} \biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm{k\neq j,l}\mat{\alpha}_k \Big) \biggr)_{((1,3))} \vec{\d\mat{\alpha}_l} \\
|
||||||
&\qquad \begin{aligned}
|
&\qquad \begin{aligned}
|
||||||
\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\alpha}_j})\t{\partial(\vec{\mat{\alpha}_l})}} &=
|
\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\alpha}_j})\t{\partial(\vec{\mat{\alpha}_l})}} &=
|
||||||
-c_1^2 \biggl( \Big[ \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \\
|
-c_1^2 \biggl( \Big[ \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \\
|
||||||
&\qquad + c_1 \biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm_{k\neq j,l}\mat{\alpha}_k \Big) \biggr)_{((1,3) + [[j > l]])}
|
&\qquad + c_1 \biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm{k\neq j,l}\mat{\alpha}_k \Big) \biggr)_{((1,3) + [[j > l]])}
|
||||||
\qquad{\color{gray} (p_j q_j \times p_l q_l)}
|
\qquad{\color{gray} (p_j q_j \times p_l q_l)}
|
||||||
\end{aligned}
|
\end{aligned}
|
||||||
\\
|
\\
|
||||||
&\d^2 l(\mat{\alpha}_j, \mat{\Omega}_l) \\
|
&\d^2 l(\mat{\alpha}_j, \mat{\Omega}_l) \\
|
||||||
&= -c_1 c_2 \t{\vec\Bigl(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\Bigr)} \mat{H}_{1,2} \vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr) \\
|
&= -c_1 c_2 \t{\vec\Bigl(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\Bigr)} \mat{H}_{1,2} \vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr) \\
|
||||||
&= -c_1 c_2 \t{\vec\biggl(\d\mat{\alpha}_j\Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big)_{(j)}\biggr)}\mat{K}_{\mat{p},(j)} \t{(\ten{H}_{2,1})_{([2r])}} \vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr) \\
|
&= -c_1 c_2 \t{\vec\biggl(\d\mat{\alpha}_j\Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big)_{(j)}\biggr)}\mat{K}_{\mat{p},(j)} \t{(\ten{H}_{2,1})_{([2r])}} \vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr) \\
|
||||||
&= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl(\t{\Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big)_{(j)}}\otimes\mat{I}_{p_j}\biggr) \mat{K}_{\mat{p},(j)} \vec\Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm_{k\neq l}\t{(\vec{\mat{\Omega}_k})}\times_l\t{(\vec{\d\mat{\Omega}_l})}\Bigr) \\
|
&= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl(\t{\Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big)_{(j)}}\otimes\mat{I}_{p_j}\biggr) \mat{K}_{\mat{p},(j)} \vec\Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm{k\neq l}\t{(\vec{\mat{\Omega}_k})}\times_l\t{(\vec{\d\mat{\Omega}_l})}\Bigr) \\
|
||||||
&= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl(\t{\Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big)_{(j)}}\otimes\mat{I}_{p_j}\biggr) \mat{K}_{\mat{p},(j)} \t{\Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm_{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr)_{([r])}}\vec{\d\mat{\Omega}_l} \\
|
&= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl(\t{\Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big)_{(j)}}\otimes\mat{I}_{p_j}\biggr) \mat{K}_{\mat{p},(j)} \t{\Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr)_{([r])}}\vec{\d\mat{\Omega}_l} \\
|
||||||
&= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl( \Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm_{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{(r + 2, 1)} \vec{\d\mat{\Omega}_l} \\
|
&= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl( \Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{(r + 2, 1)} \vec{\d\mat{\Omega}_l} \\
|
||||||
&\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\alpha}_j})\t{\partial(\vec{\mat{\Omega}_l})}} = -c_1 c_2 \biggl( \Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm_{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{(r + 2, 1)}\mat{D}_{p_l}\t{\mat{D}_{p_l}}
|
&\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\alpha}_j})\t{\partial(\vec{\mat{\Omega}_l})}} = -c_1 c_2 \biggl( \Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{(r + 2, 1)}\mat{D}_{p_l}\t{\mat{D}_{p_l}}
|
||||||
% \qquad {\color{gray} (p_j q_j \times p_l^2)}
|
% \qquad {\color{gray} (p_j q_j \times p_l^2)}
|
||||||
\\
|
\\
|
||||||
&\d^2 l(\mat{\Omega}_j) \\
|
&\d^2 l(\mat{\Omega}_j) \\
|
||||||
&= -c_2^2 \t{\vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr)} \t{(\ten{H}_{2,2})_{([2r],[2r]+2r)}} \vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr) \\
|
&= -c_2^2 \t{\vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr)} \t{(\ten{H}_{2,2})_{([2r],[2r]+2r)}} \vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr) \\
|
||||||
&= -c_2^2 \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm_{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})}\times_{j + r}\t{(\vec{\d\mat{\Omega}_j})} \\
|
&= -c_2^2 \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})}\times_{j + r}\t{(\vec{\d\mat{\Omega}_j})} \\
|
||||||
&= -c_2^2 \t{(\vec{\d\mat{\Omega}_j})} \biggl( \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm_{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})} \biggr)_{([r])} \vec{\d\mat{\Omega}_j} \\
|
&= -c_2^2 \t{(\vec{\d\mat{\Omega}_j})} \biggl( \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})} \biggr)_{([r])} \vec{\d\mat{\Omega}_j} \\
|
||||||
&\qquad\Rightarrow \H l(\mat{\Omega}_j) = -c_2^2 \mat{D}_{p_j}\t{\mat{D}_{p_j}}\biggl( \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm_{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})} \biggr)_{([r])}\mat{D}_{p_j}\t{\mat{D}_{p_j}}
|
&\qquad\Rightarrow \H l(\mat{\Omega}_j) = -c_2^2 \mat{D}_{p_j}\t{\mat{D}_{p_j}}\biggl( \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})} \biggr)_{([r])}\mat{D}_{p_j}\t{\mat{D}_{p_j}}
|
||||||
%\qquad {\color{gray} (p_j^2 \times p_j^2)}
|
%\qquad {\color{gray} (p_j^2 \times p_j^2)}
|
||||||
\\
|
\\
|
||||||
&\d^2 l(\mat{\Omega}_j, \mat{\Omega}_l) \\
|
&\d^2 l(\mat{\Omega}_j, \mat{\Omega}_l) \\
|
||||||
|
@ -589,13 +573,13 @@ The next step is to identify the Hessians from the second differentials in a sim
|
||||||
&\qquad\qquad - c_2 \D b(\mat{\eta}_{y,2})\vec\!\Big(\bigotimes_{k = r}^{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_{l}\otimes\bigotimes_{k = l - 1}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_{j}\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big) \\
|
&\qquad\qquad - c_2 \D b(\mat{\eta}_{y,2})\vec\!\Big(\bigotimes_{k = r}^{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_{l}\otimes\bigotimes_{k = l - 1}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_{j}\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big) \\
|
||||||
&= c_2 \t{(\vec{\ten{X}}\otimes\vec{\ten{X}} - (\ten{D}_2)_{([2r])})} \vec\Bigl(\bigotimes_{k = r}^{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_{l}\otimes\bigotimes_{k = l - 1}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_{j}\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Bigr) \\
|
&= c_2 \t{(\vec{\ten{X}}\otimes\vec{\ten{X}} - (\ten{D}_2)_{([2r])})} \vec\Bigl(\bigotimes_{k = r}^{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_{l}\otimes\bigotimes_{k = l - 1}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_{j}\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Bigr) \\
|
||||||
&\qquad - c_2^2 \t{\vec\!\Big(\bigotimes_{k = r}^{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigotimes_{k=l-1}^{1}\mat{\Omega}_k\Big)}\t{(\ten{H}_{2,2})_{([2r],[2r]+2r)}}\vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big) \\
|
&\qquad - c_2^2 \t{\vec\!\Big(\bigotimes_{k = r}^{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigotimes_{k=l-1}^{1}\mat{\Omega}_k\Big)}\t{(\ten{H}_{2,2})_{([2r],[2r]+2r)}}\vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big) \\
|
||||||
&= c_2 (\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm_{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \times_j \t{(\vec{\d\mat{\Omega}_j})} \times_l \t{(\vec{\d\mat{\Omega}_l})} \\
|
&= c_2 (\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \times_j \t{(\vec{\d\mat{\Omega}_j})} \times_l \t{(\vec{\d\mat{\Omega}_l})} \\
|
||||||
&\qquad - c_2^2 \ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm_{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm_{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})} \times_j \t{(\vec{\d\mat{\Omega}_j})} \times_l \t{(\vec{\d\mat{\Omega}_l})} \\
|
&\qquad - c_2^2 \ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})} \times_j \t{(\vec{\d\mat{\Omega}_j})} \times_l \t{(\vec{\d\mat{\Omega}_l})} \\
|
||||||
&= c_2 \t{(\vec{\d\mat{\Omega}_j})}\Big((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm_{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)}\vec{\d\mat{\Omega}_l} \\
|
&= c_2 \t{(\vec{\d\mat{\Omega}_j})}\Big((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)}\vec{\d\mat{\Omega}_l} \\
|
||||||
&\qquad - c_2^2 \t{(\vec{\d\mat{\Omega}_j})}\Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm_{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm_{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)}\vec{\d\mat{\Omega}_l} \\
|
&\qquad - c_2^2 \t{(\vec{\d\mat{\Omega}_j})}\Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)}\vec{\d\mat{\Omega}_l} \\
|
||||||
&\qquad \begin{aligned}\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\Omega}_j})\t{\partial(\vec{\mat{\Omega}_l})}} &=
|
&\qquad \begin{aligned}\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\Omega}_j})\t{\partial(\vec{\mat{\Omega}_l})}} &=
|
||||||
\mat{D}_{p_j}\t{\mat{D}_{p_j}}\Big[c_2\Big((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm_{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)} \\
|
\mat{D}_{p_j}\t{\mat{D}_{p_j}}\Big[c_2\Big((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)} \\
|
||||||
&\qquad -c_2^2 \Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm_{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm_{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)}\Big]\mat{D}_{p_l}\t{\mat{D}_{p_l}}
|
&\qquad -c_2^2 \Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)}\Big]\mat{D}_{p_l}\t{\mat{D}_{p_l}}
|
||||||
% \qquad {\color{gray} (p_j^2 \times p_l^2)}
|
% \qquad {\color{gray} (p_j^2 \times p_l^2)}
|
||||||
\end{aligned}
|
\end{aligned}
|
||||||
\end{align*}}%
|
\end{align*}}%
|
||||||
|
@ -628,15 +612,15 @@ and for every block holds $\mathcal{I}_{j, l} = \t{\mathcal{I}_{l, j}}$. The ind
|
||||||
\begin{align*}
|
\begin{align*}
|
||||||
\mathcal{I}_{1,1} &= c_1^2 (\ten{H}_{1,1})_{([r])} \\
|
\mathcal{I}_{1,1} &= c_1^2 (\ten{H}_{1,1})_{([r])} \\
|
||||||
\mathcal{I}_{1,j+1} % = \E\partial_{\vec{\overline{\ten{\eta}}_1}}\partial_{\t{(\vec{\mat{\alpha}_j})}} l(\mat{\Theta})\mid \ten{Y} = y
|
\mathcal{I}_{1,j+1} % = \E\partial_{\vec{\overline{\ten{\eta}}_1}}\partial_{\t{(\vec{\mat{\alpha}_j})}} l(\mat{\Theta})\mid \ten{Y} = y
|
||||||
&= c_1^2 \Big[\Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1}\Big]_{((2, 1))} \\
|
&= c_1^2 \Big[\Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1}\Big]_{((2, 1))} \\
|
||||||
\mathcal{I}_{1,j+r+1}
|
\mathcal{I}_{1,j+r+1}
|
||||||
&= c_1 c_2 \Big( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)} \\
|
&= c_1 c_2 \Big( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)} \\
|
||||||
\mathcal{I}_{j+1,l+1}
|
\mathcal{I}_{j+1,l+1}
|
||||||
&= c_1^2 \biggl( \Big[ \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \\
|
&= c_1^2 \biggl( \Big[ \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \\
|
||||||
\mathcal{I}_{j+1,l+r+1}
|
\mathcal{I}_{j+1,l+r+1}
|
||||||
&= c_1 c_2 \biggl( \Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm_{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{((r + 2, 1))} \\
|
&= c_1 c_2 \biggl( \Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{((r + 2, 1))} \\
|
||||||
\mathcal{I}_{j+r+1,l+r+1}
|
\mathcal{I}_{j+r+1,l+r+1}
|
||||||
&= c_2^2 \Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm_{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm_{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)}
|
&= c_2^2 \Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)}
|
||||||
\end{align*}
|
\end{align*}
|
||||||
|
|
||||||
|
|
||||||
|
@ -649,14 +633,14 @@ The \emph{matricization} is a generalization of the \emph{vectorization} operati
|
||||||
\begin{theorem}\label{thm:mlm_mat}
|
\begin{theorem}\label{thm:mlm_mat}
|
||||||
Let $\ten{A}$ be a tensor of order $r$ with the dimensions $q_1\times ... \times q_r$. Furthermore, let for $k = 1, ..., r$ be $\mat{B}_k$ matrices of dimensions $p_k\times q_k$. Then, for any $(\mat{i}, \mat{j})\in\perm{r}$ holds
|
Let $\ten{A}$ be a tensor of order $r$ with the dimensions $q_1\times ... \times q_r$. Furthermore, let for $k = 1, ..., r$ be $\mat{B}_k$ matrices of dimensions $p_k\times q_k$. Then, for any $(\mat{i}, \mat{j})\in\perm{r}$ holds
|
||||||
\begin{displaymath}
|
\begin{displaymath}
|
||||||
\Big(\ten{A}\mlm_{k\in[r]}\mat{B}_k\Big)_{(\mat{i}, \mat{j})}
|
\Big(\ten{A}\mlm{k\in[r]}\mat{B}_k\Big)_{(\mat{i}, \mat{j})}
|
||||||
= \Big(\bigotimes_{k = \len{\mat{i}}}^{1}\mat{B}_{\mat{i}_k}\Big) \ten{A}_{(\mat{i}, \mat{j})} \Big(\bigotimes_{k = \len{\mat{j}}}^{1}\t{\mat{B}_{\mat{j}_k}}\Big).
|
= \Big(\bigotimes_{k = \len{\mat{i}}}^{1}\mat{B}_{\mat{i}_k}\Big) \ten{A}_{(\mat{i}, \mat{j})} \Big(\bigotimes_{k = \len{\mat{j}}}^{1}\t{\mat{B}_{\mat{j}_k}}\Big).
|
||||||
\end{displaymath}
|
\end{displaymath}
|
||||||
\end{theorem}
|
\end{theorem}
|
||||||
|
|
||||||
A well known special case of Theorem~\ref{thm:mlm_mat} is the relation between vectorization and the Kronecker product
|
A well known special case of Theorem~\ref{thm:mlm_mat} is the relation between vectorization and the Kronecker product
|
||||||
\begin{displaymath}
|
\begin{displaymath}
|
||||||
\vec(\mat{B}_1\mat{A}\t{\mat{B}_2}) = (\mat{B}_2\otimes\mat{B}_1)\vec{\mat{A}}.
|
\vec(\mat{B}_1\mat{A}\t{\mat{B}_2}) = (\mat{B}_2\otimes\mat{B}_1)\vec{A}.
|
||||||
\end{displaymath}
|
\end{displaymath}
|
||||||
Here we have a matrix, a.k.a. an order 2 tensor, and the vectorization as a special case of the matricization $\vec{\mat{A}} = \mat{A}_{((1, 2))}$ with $(\mat{i}, \mat{j}) = ((1, 2), ())\in\perm{2}$. Note that the empty Kronecker product is $1$ by convention.
|
Here we have a matrix, a.k.a. an order 2 tensor, and the vectorization as a special case of the matricization $\vec{\mat{A}} = \mat{A}_{((1, 2))}$ with $(\mat{i}, \mat{j}) = ((1, 2), ())\in\perm{2}$. Note that the empty Kronecker product is $1$ by convention.
|
||||||
|
|
||||||
|
@ -729,37 +713,13 @@ The operation $\ten{R}_{\mat{i}}(\ten{A})$ results in a tensor of order $r + s$
|
||||||
Let $\ten{A}$ be a $2 r + s$ tensor where $r > 0$ and $s \geq 0$ of dimensions $q_1\times ... \times q_{2 r + s}$. Furthermore, let $(\mat{i}, \mat{j})\in\perm{2 r + s}$ such that $\len{\mat{i}} = 2 r$ and for $k = 1, ..., r$ denote with $\mat{B}_k$ matrices of dimensions $q_{\mat{i}_{k}}\times q_{\mat{i}_{r + k}}$, then
|
Let $\ten{A}$ be a $2 r + s$ tensor where $r > 0$ and $s \geq 0$ of dimensions $q_1\times ... \times q_{2 r + s}$. Furthermore, let $(\mat{i}, \mat{j})\in\perm{2 r + s}$ such that $\len{\mat{i}} = 2 r$ and for $k = 1, ..., r$ denote with $\mat{B}_k$ matrices of dimensions $q_{\mat{i}_{k}}\times q_{\mat{i}_{r + k}}$, then
|
||||||
\begin{displaymath}
|
\begin{displaymath}
|
||||||
\t{\ten{A}_{(\mat{i})}}\vec{\bigotimes_{k = r}^{1}}\mat{B}_k
|
\t{\ten{A}_{(\mat{i})}}\vec{\bigotimes_{k = r}^{1}}\mat{B}_k
|
||||||
\equiv \ten{R}_{\mat{i}}(\ten{A})\mlm_{k = 1}^r\t{(\vec{\mat{B}_k})}.
|
\equiv \ten{R}_{\mat{i}}(\ten{A})\times_{k\in[r]}\t{(\vec{\mat{B}_k})}.
|
||||||
\end{displaymath}
|
\end{displaymath}
|
||||||
\end{theorem}
|
\end{theorem}
|
||||||
|
|
||||||
A special case of above Theorem is given for tensors represented as a Kronecker product. Therefore, let $\mat{A}_k, \mat{B}_k$ be arbitrary matrices of size $p_k\times q_k$ for $k = 1, ..., r$ and $\ten{A} = \reshape{(\mat{p}, \mat{q})}\bigotimes_{k = r}^{1}\mat{A}_k$. Then Theorem~\ref{thm:mtvk_rearrange} specializes to
|
|
||||||
\begin{displaymath}
|
|
||||||
\t{\Big( \vec\bigotimes_{k = r}^{1}\mat{A}_k \Big)}\Big( \vec\bigotimes_{k = r}^{1}\mat{B}_k \Big)
|
|
||||||
=
|
|
||||||
\prod_{k = 1}^{r}\tr(\t{\mat{A}_k}\mat{B}_k)
|
|
||||||
=
|
|
||||||
\Big( \outer{k = 1}{r}\vec\mat{A}_k \Big)\mlm_{k = 1}^r \t{(\vec\mat{B}_k)}.
|
|
||||||
\end{displaymath}
|
|
||||||
In case of $r = 2$ this means
|
|
||||||
\begin{align*}
|
|
||||||
\t{\vec(\mat{A}_1\otimes \mat{A}_2)}\vec(\mat{B}_1\otimes \mat{B}_2)
|
|
||||||
&= \t{(\vec{\mat{B}_1})}(\vec{\mat{A}_1})\t{(\vec{\mat{A}_2})}(\vec{\mat{B}_2}) \\
|
|
||||||
&= [(\vec{\mat{A}_1})\circ(\vec{\mat{A}_2})]\times_1\t{(\vec{\mat{B}_1})}\times_2\t{(\vec{\mat{B}_2})}.
|
|
||||||
\end{align*}
|
|
||||||
|
|
||||||
Another interesting special case is for two tensors $\ten{A}_1, \ten{A}_2$ of the same order
|
|
||||||
\begin{displaymath}
|
|
||||||
\t{(\vec{\ten{A}_1}\otimes\vec{\ten{A}_2})}\vec{\bigotimes_{k = r}^{1}\mat{B}_k}
|
|
||||||
= (\ten{A}_1\otimes\ten{A}_2)\mlm_{k = 1}^r\t{(\vec{\mat{B}_k})}
|
|
||||||
\end{displaymath}
|
|
||||||
which uses the relation $\ten{R}_{[2r]}^{(\mat{p}, \mat{q})}(\vec{\ten{A}_1}\otimes\vec{\ten{A}_2}) = \ten{A}_1\otimes\ten{A}_2$ .
|
|
||||||
|
|
||||||
\todo{continue}
|
\todo{continue}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
% Next we define a specific axis permutation and reshaping operation on tensors which will be helpful in expressing some derivatives. Let $\ten{A}$ be a $2 r + s$ tensor with $r > 0$ and $s\geq 0$ of dimensions $p_1\times ... \times p_{2 r + s}$. Furthermore, let $(\mat{i}, \mat{j})\in\perm{2 r + s}$ such that $\len{\mat{i}} = 2 r$. The operation $\ten{R}_{\mat{i}}$ is defined as
|
% Next we define a specific axis permutation and reshaping operation on tensors which will be helpful in expressing some derivatives. Let $\ten{A}$ be a $2 r + s$ tensor with $r > 0$ and $s\geq 0$ of dimensions $p_1\times ... \times p_{2 r + s}$. Furthermore, let $(\mat{i}, \mat{j})\in\perm{2 r + s}$ such that $\len{\mat{i}} = 2 r$. The operation $\ten{R}_{\mat{i}}$ is defined as
|
||||||
% \begin{displaymath}
|
% \begin{displaymath}
|
||||||
% \ten{R}_{\mat{i}} = \reshape{(p_1 p_{r + 1}, ..., p_r p_{2 r}, p_{2 r + 1}, ..., p_{r + s})}(\ten{A}_{(\pi(\mat{i}))})
|
% \ten{R}_{\mat{i}} = \reshape{(p_1 p_{r + 1}, ..., p_r p_{2 r}, p_{2 r + 1}, ..., p_{r + s})}(\ten{A}_{(\pi(\mat{i}))})
|
||||||
|
@ -920,19 +880,19 @@ which uses the relation $\ten{R}_{[2r]}^{(\mat{p}, \mat{q})}(\vec{\ten{A}_1}\oti
|
||||||
\end{tikzpicture}
|
\end{tikzpicture}
|
||||||
\end{center}
|
\end{center}
|
||||||
|
|
||||||
% \newcommand{\somedrawing}{
|
\newcommand{\somedrawing} {
|
||||||
% \coordinate (a) at (-2,-2,-2);
|
\coordinate (a) at (-2,-2,-2);
|
||||||
% \coordinate (b) at (-2,-2,2);
|
\coordinate (b) at (-2,-2,2);
|
||||||
% \coordinate (c) at (-2,2,-2);
|
\coordinate (c) at (-2,2,-2);
|
||||||
% \coordinate (d) at (-2,2,2);
|
\coordinate (d) at (-2,2,2);
|
||||||
% \coordinate (e) at (2,-2,-2);
|
\coordinate (e) at (2,-2,-2);
|
||||||
% \coordinate (f) at (2,-2,2);
|
\coordinate (f) at (2,-2,2);
|
||||||
% \coordinate (g) at (2,2,-2);
|
\coordinate (g) at (2,2,-2);
|
||||||
% \coordinate (h) at (2,2,2);
|
\coordinate (h) at (2,2,2);
|
||||||
% \draw (a)--(b) (a)--(c) (a)--(e) (b)--(d) (b)--(f) (c)--(d) (c)--(g) (d)--(h) (e)--(f) (e)--(g) (f)--(h) (g)--(h);
|
\draw (a)--(b) (a)--(c) (a)--(e) (b)--(d) (b)--(f) (c)--(d) (c)--(g) (d)--(h) (e)--(f) (e)--(g) (f)--(h) (g)--(h);
|
||||||
% \fill (a) circle (0.1cm);
|
\fill (a) circle (0.1cm);
|
||||||
% \fill (d) ++(0.1cm,0.1cm) rectangle ++(-0.2cm,-0.2cm);
|
\fill (d) ++(0.1cm,0.1cm) rectangle ++(-0.2cm,-0.2cm);
|
||||||
% }
|
}
|
||||||
|
|
||||||
% \begin{figure}[ht!]
|
% \begin{figure}[ht!]
|
||||||
% \centering
|
% \centering
|
||||||
|
@ -1193,7 +1153,7 @@ where $\circ$ is the outer product. For example considure two matrices $\mat{A},
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
Let $f$ be an $r$ times differentiable function, then
|
Let $f$ be an $r$ times differentiable function, then
|
||||||
\begin{displaymath}
|
\begin{displaymath}
|
||||||
\d^r f(\mat{X}) = \ten{F}(\mat{X})\mlm_{k = 1}^{r} \vec{\d\mat{X}}
|
\d^r f(\mat{X}) = \ten{F}(\mat{X})\mlm{k = 1}{r} \vec{\d\mat{X}}
|
||||||
\qquad\Leftrightarrow\qquad
|
\qquad\Leftrightarrow\qquad
|
||||||
\D^r f(\mat{X}) \equiv \frac{1}{r!}\sum_{\sigma\in\perm{r}}\ten{F}(\mat{X})_{(\sigma)}
|
\D^r f(\mat{X}) \equiv \frac{1}{r!}\sum_{\sigma\in\perm{r}}\ten{F}(\mat{X})_{(\sigma)}
|
||||||
\end{displaymath}
|
\end{displaymath}
|
||||||
|
@ -1412,12 +1372,12 @@ The differentials up to the 4'th are
|
||||||
\begin{align*}
|
\begin{align*}
|
||||||
\d M(t) &= M(t) \t{(\mu + \Sigma t)} \d{t} \\
|
\d M(t) &= M(t) \t{(\mu + \Sigma t)} \d{t} \\
|
||||||
\d^2 M(t) &= \t{\d{t}} M(t) (\mu + \Sigma t)\t{(\mu + \Sigma t)} \d{t} \\
|
\d^2 M(t) &= \t{\d{t}} M(t) (\mu + \Sigma t)\t{(\mu + \Sigma t)} \d{t} \\
|
||||||
\d^3 M(t) &= M(t) (\mu + \Sigma t)\circ [(\mu + \Sigma t)\circ (\mu + \Sigma t) + 3\Sigma]\mlm_{k = 1}^{3} \d{t} \\
|
\d^3 M(t) &= M(t) (\mu + \Sigma t)\circ [(\mu + \Sigma t)\circ (\mu + \Sigma t) + 3\Sigma]\mlm{k = 1}{3} \d{t} \\
|
||||||
\d^4 M(t) &= M(t) (\mu + \Sigma t)\circ(\mu + \Sigma t)\circ[(\mu + \Sigma t)\circ(\mu + \Sigma t) + 6\Sigma)]\mlm_{k = 1}^{4} \d{t}
|
\d^4 M(t) &= M(t) (\mu + \Sigma t)\circ(\mu + \Sigma t)\circ[(\mu + \Sigma t)\circ(\mu + \Sigma t) + 6\Sigma)]\mlm{k = 1}{4} \d{t}
|
||||||
\end{align*}
|
\end{align*}
|
||||||
Using the differentials to derivative identification identity
|
Using the differentials to derivative identification identity
|
||||||
\begin{displaymath}
|
\begin{displaymath}
|
||||||
\d^m f(t) = \ten{F}(t)\mlm_{k = 1}^{m}\d{t}
|
\d^m f(t) = \ten{F}(t)\mlm{k = 1}{m}\d{t}
|
||||||
\qquad\Leftrightarrow\qquad
|
\qquad\Leftrightarrow\qquad
|
||||||
\D^m f(t) \equiv \frac{1}{m!}\sum_{\sigma\in\mathfrak{S}_m}\ten{F}(t)_{(\sigma)}
|
\D^m f(t) \equiv \frac{1}{m!}\sum_{\sigma\in\mathfrak{S}_m}\ten{F}(t)_{(\sigma)}
|
||||||
\end{displaymath}
|
\end{displaymath}
|
||||||
|
@ -1428,7 +1388,7 @@ in conjunction with simplifications gives the first four raw moments by evaluati
|
||||||
M_3 = \D^3 M(t)|_{t = 0} &= \mu\circ\mu\circ\mu + \mu\circ\Sigma + (\mu\circ\Sigma)_{((2), (1), (3))} + \Sigma\circ\mu \\
|
M_3 = \D^3 M(t)|_{t = 0} &= \mu\circ\mu\circ\mu + \mu\circ\Sigma + (\mu\circ\Sigma)_{((2), (1), (3))} + \Sigma\circ\mu \\
|
||||||
M_4 = \D^4 M(t)|_{t = 0} &\equiv \frac{1}{4!}\sum_{\sigma\in\mathfrak{S}_4} (\mu\circ\mu\circ\Sigma + \Sigma\circ\Sigma + \Sigma\circ\mu\circ\mu)_{(\sigma)}
|
M_4 = \D^4 M(t)|_{t = 0} &\equiv \frac{1}{4!}\sum_{\sigma\in\mathfrak{S}_4} (\mu\circ\mu\circ\Sigma + \Sigma\circ\Sigma + \Sigma\circ\mu\circ\mu)_{(\sigma)}
|
||||||
\end{align*}
|
\end{align*}
|
||||||
which leads to the centered moments (which are also the covariance of the sufficient statistic $t(X)$)
|
which leads to the centered moments (which are also the covariances of the sufficient statistic $t(X)$)
|
||||||
\begin{align*}
|
\begin{align*}
|
||||||
H_{1,1} &= \cov(t_1(X)\mid Y = y) \\
|
H_{1,1} &= \cov(t_1(X)\mid Y = y) \\
|
||||||
&= \Sigma \\
|
&= \Sigma \\
|
||||||
|
|
153
LaTeX/main.bib
153
LaTeX/main.bib
|
@ -9,17 +9,6 @@
|
||||||
publisher = {[Royal Statistical Society, Wiley]}
|
publisher = {[Royal Statistical Society, Wiley]}
|
||||||
}
|
}
|
||||||
|
|
||||||
@inproceedings{Nesterov1983,
|
|
||||||
title = {A method of solving a convex programming problem with convergence rate $O(1/k^2)$},
|
|
||||||
author = {Nesterov, Yurii Evgen'evich},
|
|
||||||
booktitle = {Doklady Akademii Nauk},
|
|
||||||
volume = {269},
|
|
||||||
number = {3},
|
|
||||||
pages = {543--547},
|
|
||||||
year = {1983},
|
|
||||||
organization= {Russian Academy of Sciences}
|
|
||||||
}
|
|
||||||
|
|
||||||
@book{StatInf-CasellaBerger2002,
|
@book{StatInf-CasellaBerger2002,
|
||||||
title = {{Statistical Inference}},
|
title = {{Statistical Inference}},
|
||||||
author = {Casella, George and Berger, Roger L.},
|
author = {Casella, George and Berger, Roger L.},
|
||||||
|
@ -38,20 +27,6 @@
|
||||||
isbn = {0-471-98632-1}
|
isbn = {0-471-98632-1}
|
||||||
}
|
}
|
||||||
|
|
||||||
@article{SymMatandJacobians-MagnusNeudecker1986,
|
|
||||||
title = {Symmetry, 0-1 Matrices and Jacobians: A Review},
|
|
||||||
author = {Magnus, Jan R. and Neudecker, Heinz},
|
|
||||||
ISSN = {02664666, 14694360},
|
|
||||||
URL = {http://www.jstor.org/stable/3532421},
|
|
||||||
journal = {Econometric Theory},
|
|
||||||
number = {2},
|
|
||||||
pages = {157--190},
|
|
||||||
publisher = {Cambridge University Press},
|
|
||||||
urldate = {2023-10-03},
|
|
||||||
volume = {2},
|
|
||||||
year = {1986}
|
|
||||||
}
|
|
||||||
|
|
||||||
@book{MatrixAlgebra-AbadirMagnus2005,
|
@book{MatrixAlgebra-AbadirMagnus2005,
|
||||||
title = {Matrix Algebra},
|
title = {Matrix Algebra},
|
||||||
author = {Abadir, Karim M. and Magnus, Jan R.},
|
author = {Abadir, Karim M. and Magnus, Jan R.},
|
||||||
|
@ -108,31 +83,6 @@
|
||||||
doi = {10.1080/01621459.2015.1093944}
|
doi = {10.1080/01621459.2015.1093944}
|
||||||
}
|
}
|
||||||
|
|
||||||
@article{FisherLectures-Cook2007,
|
|
||||||
author = {Cook, R. Dennis},
|
|
||||||
journal = {Statistical Science},
|
|
||||||
month = {02},
|
|
||||||
number = {1},
|
|
||||||
pages = {1--26},
|
|
||||||
publisher = {The Institute of Mathematical Statistics},
|
|
||||||
title = {{Fisher Lecture: Dimension Reduction in Regression}},
|
|
||||||
volume = {22},
|
|
||||||
year = {2007},
|
|
||||||
doi = {10.1214/088342306000000682}
|
|
||||||
}
|
|
||||||
|
|
||||||
@article{asymptoticMLE-BuraEtAl2018,
|
|
||||||
author = {Bura, Efstathia and Duarte, Sabrina and Forzani, Liliana and E. Smucler and M. Sued},
|
|
||||||
title = {Asymptotic theory for maximum likelihood estimates in reduced-rank multivariate generalized linear models},
|
|
||||||
journal = {Statistics},
|
|
||||||
volume = {52},
|
|
||||||
number = {5},
|
|
||||||
pages = {1005-1024},
|
|
||||||
year = {2018},
|
|
||||||
publisher = {Taylor \& Francis},
|
|
||||||
doi = {10.1080/02331888.2018.1467420},
|
|
||||||
}
|
|
||||||
|
|
||||||
@article{tsir-DingCook2015,
|
@article{tsir-DingCook2015,
|
||||||
author = {Shanshan Ding and R. Dennis Cook},
|
author = {Shanshan Ding and R. Dennis Cook},
|
||||||
title = {Tensor sliced inverse regression},
|
title = {Tensor sliced inverse regression},
|
||||||
|
@ -167,106 +117,3 @@
|
||||||
isbn = {978-94-015-8196-7},
|
isbn = {978-94-015-8196-7},
|
||||||
doi = {10.1007/978-94-015-8196-7_17}
|
doi = {10.1007/978-94-015-8196-7_17}
|
||||||
}
|
}
|
||||||
|
|
||||||
@book{asymStats-van_der_Vaart1998,
|
|
||||||
title = {Asymptotic Statistics},
|
|
||||||
author = {{van der Vaart}, A.W.},
|
|
||||||
series = {Asymptotic Statistics},
|
|
||||||
year = {1998},
|
|
||||||
publisher = {Cambridge University Press},
|
|
||||||
series = {Cambridge Series in Statistical and Probabilistic Mathematics},
|
|
||||||
isbn = {0-521-49603-9}
|
|
||||||
}
|
|
||||||
|
|
||||||
@book{measureTheory-Kusolitsch2011,
|
|
||||||
title = {{M}a\ss{}- und {W}ahrscheinlichkeitstheorie},
|
|
||||||
subtitle = {{E}ine {E}inf{\"u}hrung},
|
|
||||||
author = {Kusolitsch, Norbert},
|
|
||||||
series = {Springer-Lehrbuch},
|
|
||||||
year = {2011},
|
|
||||||
publisher = {Springer Vienna},
|
|
||||||
isbn = {978-3-7091-0684-6},
|
|
||||||
doi = {10.1007/978-3-7091-0685-3}
|
|
||||||
}
|
|
||||||
|
|
||||||
@book{optimMatrixMani-AbsilEtAl2007,
|
|
||||||
title = {{Optimization Algorithms on Matrix Manifolds}},
|
|
||||||
author = {Absil, P.-A. and Mahony, R. and Sepulchre, R.},
|
|
||||||
year = {2007},
|
|
||||||
publisher = {Princeton University Press},
|
|
||||||
isbn = {9780691132983},
|
|
||||||
note = {Full Online Text \url{https://press.princeton.edu/absil}}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Inbook{geomMethodsOnLowRankMat-Uschmajew2020,
|
|
||||||
author = {Uschmajew, Andr{\'e} and Vandereycken, Bart},
|
|
||||||
editor = {Grohs, Philipp and Holler, Martin and Weinmann, Andreas},
|
|
||||||
title = {Geometric Methods on Low-Rank Matrix and Tensor Manifolds},
|
|
||||||
bookTitle = {Handbook of Variational Methods for Nonlinear Geometric Data},
|
|
||||||
year = {2020},
|
|
||||||
publisher = {Springer International Publishing},
|
|
||||||
address = {Cham},
|
|
||||||
pages = {261--313},
|
|
||||||
isbn = {978-3-030-31351-7},
|
|
||||||
doi = {10.1007/978-3-030-31351-7_9}
|
|
||||||
}
|
|
||||||
|
|
||||||
@book{introToSmoothMani-Lee2012,
|
|
||||||
title = {Introduction to Smooth Manifolds},
|
|
||||||
author = {Lee, John M.},
|
|
||||||
year = {2012},
|
|
||||||
journal = {Graduate Texts in Mathematics},
|
|
||||||
publisher = {Springer New York},
|
|
||||||
doi = {10.1007/978-1-4419-9982-5}
|
|
||||||
}
|
|
||||||
|
|
||||||
@book{introToRiemannianMani-Lee2018,
|
|
||||||
title = {Introduction to Riemannian Manifolds},
|
|
||||||
author = {Lee, John M.},
|
|
||||||
year = {2018},
|
|
||||||
journal = {Graduate Texts in Mathematics},
|
|
||||||
publisher = {Springer International Publishing},
|
|
||||||
doi = {10.1007/978-3-319-91755-9}
|
|
||||||
}
|
|
||||||
|
|
||||||
@misc{MLEonManifolds-HajriEtAl2017,
|
|
||||||
title = {Maximum Likelihood Estimators on Manifolds},
|
|
||||||
author = {Hajri, Hatem and Said, Salem and Berthoumieu, Yannick},
|
|
||||||
year = {2017},
|
|
||||||
journal = {Lecture Notes in Computer Science},
|
|
||||||
publisher = {Springer International Publishing},
|
|
||||||
pages = {692-700},
|
|
||||||
doi = {10.1007/978-3-319-68445-1_80}
|
|
||||||
}
|
|
||||||
|
|
||||||
@article{relativity-Einstain1916,
|
|
||||||
author = {Einstein, Albert},
|
|
||||||
title = {Die Grundlage der allgemeinen Relativitätstheorie},
|
|
||||||
year = {1916},
|
|
||||||
journal = {Annalen der Physik},
|
|
||||||
volume = {354},
|
|
||||||
number = {7},
|
|
||||||
pages = {769-822},
|
|
||||||
doi = {10.1002/andp.19163540702}
|
|
||||||
}
|
|
||||||
|
|
||||||
@article{MultilinearOperators-Kolda2006,
|
|
||||||
title = {Multilinear operators for higher-order decompositions.},
|
|
||||||
author = {Kolda, Tamara Gibson},
|
|
||||||
doi = {10.2172/923081},
|
|
||||||
url = {https://www.osti.gov/biblio/923081},
|
|
||||||
place = {United States},
|
|
||||||
year = {2006},
|
|
||||||
month = {4},
|
|
||||||
type = {Technical Report}
|
|
||||||
}
|
|
||||||
|
|
||||||
@book{aufbauAnalysis-kaltenbaeck2021,
|
|
||||||
title = {Aufbau Analysis},
|
|
||||||
author = {Kaltenb\"ack, Michael},
|
|
||||||
isbn = {978-3-88538-127-3},
|
|
||||||
series = {Berliner Studienreihe zur Mathematik},
|
|
||||||
edition = {27},
|
|
||||||
year = {2021},
|
|
||||||
publisher = {Heldermann Verlag}
|
|
||||||
}
|
|
||||||
|
|
1479
LaTeX/paper.tex
1479
LaTeX/paper.tex
File diff suppressed because it is too large
Load Diff
|
@ -1,151 +0,0 @@
|
||||||
%%% R code to generate the input data files from corresponding simulation logs
|
|
||||||
% R> setwd("~/Work/tensorPredictors")
|
|
||||||
% R>
|
|
||||||
% R> for (sim.name in c("2a")) {
|
|
||||||
% R> pattern <- paste0("sim\\_", sim.name, "\\_ising\\-[0-9T]*\\.csv")
|
|
||||||
% R> log.file <- sort(
|
|
||||||
% R> list.files(path = "sim/", pattern = pattern, full.names = TRUE),
|
|
||||||
% R> decreasing = TRUE
|
|
||||||
% R> )[[1]]
|
|
||||||
% R>
|
|
||||||
% R> sim <- read.csv(log.file)
|
|
||||||
% R>
|
|
||||||
% R> aggr <- aggregate(sim[, names(sim) != "sample.size"], list(sample.size = sim$sample.size), mean)
|
|
||||||
% R>
|
|
||||||
% R> write.table(aggr, file = paste0("LaTeX/plots/aggr-", sim.name, "-ising.csv"), row.names = FALSE, quote = FALSE)
|
|
||||||
% R> }
|
|
||||||
\documentclass[border=0cm]{standalone}
|
|
||||||
|
|
||||||
\usepackage{tikz}
|
|
||||||
\usepackage{pgfplots}
|
|
||||||
\usepackage{bm}
|
|
||||||
|
|
||||||
\definecolor{gmlm}{RGB}{0,0,0}
|
|
||||||
\definecolor{mgcca}{RGB}{86,180,233}
|
|
||||||
\definecolor{tsir}{RGB}{0,158,115}
|
|
||||||
\definecolor{pca}{RGB}{240,228,66}
|
|
||||||
\definecolor{hopca}{RGB}{230,159,0}
|
|
||||||
\definecolor{lpca}{RGB}{127,127,127}
|
|
||||||
\definecolor{clpca}{RGB}{191,191,191}
|
|
||||||
|
|
||||||
\pgfplotsset{
|
|
||||||
every axis/.style={
|
|
||||||
xtick={100,200,300,500,750},
|
|
||||||
ymin=-0.05, ymax=1.05,
|
|
||||||
grid=both,
|
|
||||||
grid style={gray, dotted}
|
|
||||||
},
|
|
||||||
every axis plot/.append style={
|
|
||||||
mark = *,
|
|
||||||
mark size = 1pt,
|
|
||||||
line width=0.8pt
|
|
||||||
}
|
|
||||||
}
|
|
||||||
\tikzset{
|
|
||||||
legend entry/.style={
|
|
||||||
mark = *,
|
|
||||||
mark size = 1pt,
|
|
||||||
mark indices = {2},
|
|
||||||
line width=0.8pt
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
\begin{document}
|
|
||||||
\begin{tikzpicture}[>=latex]
|
|
||||||
|
|
||||||
\begin{axis}[
|
|
||||||
name=sim-2a
|
|
||||||
]
|
|
||||||
\addplot[color = gmlm, line width=1pt] table[x = sample.size, y = dist.subspace.gmlm] {aggr-1a-normal.csv};
|
|
||||||
\addplot[color = pca] table[x = sample.size, y = dist.subspace.pca] {aggr-2a-ising.csv};
|
|
||||||
\addplot[color = hopca] table[x = sample.size, y = dist.subspace.hopca] {aggr-2a-ising.csv};
|
|
||||||
\addplot[color = tsir] table[x = sample.size, y = dist.subspace.tsir] {aggr-2a-ising.csv};
|
|
||||||
\addplot[color = mgcca] table[x = sample.size, y = dist.subspace.mgcca] {aggr-2a-ising.csv};
|
|
||||||
\addplot[color = lpca] table[x = sample.size, y = dist.subspace.lpca] {aggr-2a-ising.csv};
|
|
||||||
\addplot[color = clpca] table[x = sample.size, y = dist.subspace.clpca] {aggr-2a-ising.csv};
|
|
||||||
\end{axis}
|
|
||||||
\node[anchor = base west, yshift = 0.3em] at (sim-2a.north west) {
|
|
||||||
a: small
|
|
||||||
};
|
|
||||||
|
|
||||||
% \begin{axis}[
|
|
||||||
% name=sim-1b,
|
|
||||||
% anchor=north west, at={(sim-2a.right of north east)}, xshift=0.1cm,
|
|
||||||
% xticklabel=\empty,
|
|
||||||
% ylabel near ticks, yticklabel pos=right
|
|
||||||
% ]
|
|
||||||
% \addplot[color = pca] table[x = sample.size, y = dist.subspace.pca] {aggr-1b-normal.csv};
|
|
||||||
% \addplot[color = hopca] table[x = sample.size, y = dist.subspace.hopca] {aggr-1b-normal.csv};
|
|
||||||
% \addplot[color = tsir] table[x = sample.size, y = dist.subspace.tsir] {aggr-1b-normal.csv};
|
|
||||||
% \addplot[color = mgcca] table[x = sample.size, y = dist.subspace.mgcca] {aggr-1b-normal.csv};
|
|
||||||
% \addplot[color = gmlm, line width=1pt] table[x = sample.size, y = dist.subspace.gmlm] {aggr-1b-normal.csv};
|
|
||||||
% \end{axis}
|
|
||||||
% \node[anchor = base west, yshift = 0.3em] at (sim-1b.north west) {
|
|
||||||
% b: cubic dependence on $y$
|
|
||||||
% };
|
|
||||||
|
|
||||||
% \begin{axis}[
|
|
||||||
% name=sim-1c,
|
|
||||||
% anchor=north west, at={(sim-2a.below south west)}, yshift=-.8em,
|
|
||||||
% xticklabel=\empty
|
|
||||||
% ]
|
|
||||||
% \addplot[color = pca] table[x = sample.size, y = dist.subspace.pca] {aggr-1c-normal.csv};
|
|
||||||
% \addplot[color = hopca] table[x = sample.size, y = dist.subspace.hopca] {aggr-1c-normal.csv};
|
|
||||||
% \addplot[color = tsir] table[x = sample.size, y = dist.subspace.tsir] {aggr-1c-normal.csv};
|
|
||||||
% \addplot[color = mgcca] table[x = sample.size, y = dist.subspace.mgcca] {aggr-1c-normal.csv};
|
|
||||||
% \addplot[color = gmlm, line width=1pt] table[x = sample.size, y = dist.subspace.gmlm] {aggr-1c-normal.csv};
|
|
||||||
% \end{axis}
|
|
||||||
% \node[anchor = base west, yshift = 0.3em] at (sim-1c.north west) {
|
|
||||||
% c: rank $1$ $\boldsymbol{\beta}$'s
|
|
||||||
% };
|
|
||||||
|
|
||||||
% \begin{axis}[
|
|
||||||
% name=sim-1d,
|
|
||||||
% anchor=north west, at={(sim-1c.right of north east)}, xshift=0.1cm,
|
|
||||||
% ylabel near ticks, yticklabel pos=right
|
|
||||||
% ]
|
|
||||||
% \addplot[color = pca] table[x = sample.size, y = dist.subspace.pca] {aggr-1d-normal.csv};
|
|
||||||
% \addplot[color = hopca] table[x = sample.size, y = dist.subspace.hopca] {aggr-1d-normal.csv};
|
|
||||||
% \addplot[color = tsir] table[x = sample.size, y = dist.subspace.tsir] {aggr-1d-normal.csv};
|
|
||||||
% \addplot[color = mgcca] table[x = sample.size, y = dist.subspace.mgcca] {aggr-1d-normal.csv};
|
|
||||||
% \addplot[color = gmlm, line width=1pt] table[x = sample.size, y = dist.subspace.gmlm] {aggr-1d-normal.csv};
|
|
||||||
% \end{axis}
|
|
||||||
% \node[anchor = base west, yshift = 0.3em] at (sim-1d.north west) {
|
|
||||||
% d: tri-diagonal $\boldsymbol{\Omega}$'s
|
|
||||||
% };
|
|
||||||
|
|
||||||
% \begin{axis}[
|
|
||||||
% name=sim-1e,
|
|
||||||
% anchor=north west, at={(sim-1c.below south west)}, yshift=-.8em
|
|
||||||
% ]
|
|
||||||
% \addplot[color = pca] table[x = sample.size, y = dist.subspace.pca] {aggr-1e-normal.csv};
|
|
||||||
% \addplot[color = hopca] table[x = sample.size, y = dist.subspace.hopca] {aggr-1e-normal.csv};
|
|
||||||
% \addplot[color = tsir] table[x = sample.size, y = dist.subspace.tsir] {aggr-1e-normal.csv};
|
|
||||||
% \addplot[color = mgcca] table[x = sample.size, y = dist.subspace.mgcca] {aggr-1e-normal.csv};
|
|
||||||
% \addplot[color = gmlm, line width=1pt] table[x = sample.size, y = dist.subspace.gmlm] {aggr-1e-normal.csv};
|
|
||||||
% \end{axis}
|
|
||||||
% \node[anchor = base west, yshift = 0.3em] at (sim-1e.north west) {
|
|
||||||
% e: missspecified
|
|
||||||
% };
|
|
||||||
|
|
||||||
|
|
||||||
\matrix[anchor = west] at (sim-2a.right of east) {
|
|
||||||
\draw[color=gmlm, legend entry, line width=1pt] plot coordinates {(0, 0) (.3, 0) (.6, 0)}; & \node[anchor=west] {GMLM}; \\
|
|
||||||
\draw[color=tsir, legend entry] plot coordinates {(0, 0) (.3, 0) (.6, 0)}; & \node[anchor=west] {TSIR}; \\
|
|
||||||
\draw[color=mgcca, legend entry] plot coordinates {(0, 0) (.3, 0) (.6, 0)}; & \node[anchor=west] {MGCCA}; \\
|
|
||||||
\draw[color=hopca, legend entry] plot coordinates {(0, 0) (.3, 0) (.6, 0)}; & \node[anchor=west] {HOPCA}; \\
|
|
||||||
\draw[color=pca, legend entry] plot coordinates {(0, 0) (.3, 0) (.6, 0)}; & \node[anchor=west] {PCA}; \\
|
|
||||||
\draw[color=lpca, legend entry] plot coordinates {(0, 0) (.3, 0) (.6, 0)}; & \node[anchor=west] {LPCA}; \\
|
|
||||||
\draw[color=clpca, legend entry] plot coordinates {(0, 0) (.3, 0) (.6, 0)}; & \node[anchor=west] {CLPCA}; \\
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
\node[anchor = north] at (current bounding box.south) {Sample Size $n$};
|
|
||||||
|
|
||||||
\node[anchor = south, rotate = 90] at (current bounding box.west) {Subspace Distance $d(\boldsymbol{B}, \hat{\boldsymbol{B}})$};
|
|
||||||
\node[anchor = south, rotate = 270] at (current bounding box.east) {\phantom{Subspace Distance $d(\boldsymbol{B}, \hat{\boldsymbol{B}})$}};
|
|
||||||
|
|
||||||
\node[anchor = south, font=\large] at (current bounding box.north) {Tensor Normal Simulation};
|
|
||||||
|
|
||||||
\end{tikzpicture}
|
|
||||||
\end{document}
|
|
|
@ -1,148 +0,0 @@
|
||||||
%%% R code to generate the input data files from corresponding simulation logs
|
|
||||||
% R> setwd("~/Work/tensorPredictors")
|
|
||||||
% R>
|
|
||||||
% R> for (sim.name in c("1a", "1b", "1c", "1d", "1e")) {
|
|
||||||
% R> pattern <- paste0("sim\\_", sim.name, "\\_normal\\-[0-9T]*\\.csv")
|
|
||||||
% R> log.file <- sort(
|
|
||||||
% R> list.files(path = "sim/", pattern = pattern, full.names = TRUE),
|
|
||||||
% R> decreasing = TRUE
|
|
||||||
% R> )[[1]]
|
|
||||||
% R>
|
|
||||||
% R> sim <- read.csv(log.file)
|
|
||||||
% R>
|
|
||||||
% R> aggr <- aggregate(sim[, names(sim) != "sample.size"], list(sample.size = sim$sample.size), mean)
|
|
||||||
% R>
|
|
||||||
% R> write.table(aggr, file = paste0("LaTeX/plots/aggr-", sim.name, "-normal.csv"), row.names = FALSE, quote = FALSE)
|
|
||||||
% R> }
|
|
||||||
\documentclass[border=0cm]{standalone}
|
|
||||||
|
|
||||||
\usepackage{tikz}
|
|
||||||
\usepackage{pgfplots}
|
|
||||||
\usepackage{bm}
|
|
||||||
|
|
||||||
\definecolor{gmlm}{RGB}{0,0,0}
|
|
||||||
\definecolor{mgcca}{RGB}{86,180,233}
|
|
||||||
\definecolor{tsir}{RGB}{0,158,115}
|
|
||||||
\definecolor{hopca}{RGB}{230,159,0}
|
|
||||||
\definecolor{pca}{RGB}{240,228,66}
|
|
||||||
\definecolor{lpca}{RGB}{0,114,178}
|
|
||||||
\definecolor{clpca}{RGB}{213,94,0}
|
|
||||||
|
|
||||||
\pgfplotsset{
|
|
||||||
every axis/.style={
|
|
||||||
xtick={100,200,300,500,750},
|
|
||||||
ymin=-0.05, ymax=1.05,
|
|
||||||
grid=both,
|
|
||||||
grid style={gray, dotted}
|
|
||||||
},
|
|
||||||
every axis plot/.append style={
|
|
||||||
mark = *,
|
|
||||||
mark size = 1pt,
|
|
||||||
line width=0.8pt
|
|
||||||
}
|
|
||||||
}
|
|
||||||
\tikzset{
|
|
||||||
legend entry/.style={
|
|
||||||
mark = *,
|
|
||||||
mark size = 1pt,
|
|
||||||
mark indices = {2},
|
|
||||||
line width=0.8pt
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
\begin{document}
|
|
||||||
\begin{tikzpicture}[>=latex]
|
|
||||||
|
|
||||||
\begin{axis}[
|
|
||||||
name=sim-1a,
|
|
||||||
xticklabel=\empty
|
|
||||||
]
|
|
||||||
\addplot[color = gmlm, line width=1pt] table[x = sample.size, y = dist.subspace.gmlm] {aggr-1a-normal.csv};
|
|
||||||
\addplot[color = pca] table[x = sample.size, y = dist.subspace.pca] {aggr-1a-normal.csv};
|
|
||||||
\addplot[color = hopca] table[x = sample.size, y = dist.subspace.hopca] {aggr-1a-normal.csv};
|
|
||||||
\addplot[color = tsir] table[x = sample.size, y = dist.subspace.tsir] {aggr-1a-normal.csv};
|
|
||||||
\addplot[color = mgcca] table[x = sample.size, y = dist.subspace.mgcca] {aggr-1a-normal.csv};
|
|
||||||
\end{axis}
|
|
||||||
\node[anchor = base west, yshift = 0.3em] at (sim-1a.north west) {
|
|
||||||
a: linear dependence on $\mathcal{F}_y \equiv y$
|
|
||||||
};
|
|
||||||
|
|
||||||
\begin{axis}[
|
|
||||||
name=sim-1b,
|
|
||||||
anchor=north west, at={(sim-1a.right of north east)}, xshift=0.1cm,
|
|
||||||
xticklabel=\empty,
|
|
||||||
ylabel near ticks, yticklabel pos=right
|
|
||||||
]
|
|
||||||
\addplot[color = pca] table[x = sample.size, y = dist.subspace.pca] {aggr-1b-normal.csv};
|
|
||||||
\addplot[color = hopca] table[x = sample.size, y = dist.subspace.hopca] {aggr-1b-normal.csv};
|
|
||||||
\addplot[color = tsir] table[x = sample.size, y = dist.subspace.tsir] {aggr-1b-normal.csv};
|
|
||||||
\addplot[color = mgcca] table[x = sample.size, y = dist.subspace.mgcca] {aggr-1b-normal.csv};
|
|
||||||
\addplot[color = gmlm, line width=1pt] table[x = sample.size, y = dist.subspace.gmlm] {aggr-1b-normal.csv};
|
|
||||||
\end{axis}
|
|
||||||
\node[anchor = base west, yshift = 0.3em] at (sim-1b.north west) {
|
|
||||||
b: cubic dependence on $y$
|
|
||||||
};
|
|
||||||
|
|
||||||
\begin{axis}[
|
|
||||||
name=sim-1c,
|
|
||||||
anchor=north west, at={(sim-1a.below south west)}, yshift=-.8em,
|
|
||||||
xticklabel=\empty
|
|
||||||
]
|
|
||||||
\addplot[color = pca] table[x = sample.size, y = dist.subspace.pca] {aggr-1c-normal.csv};
|
|
||||||
\addplot[color = hopca] table[x = sample.size, y = dist.subspace.hopca] {aggr-1c-normal.csv};
|
|
||||||
\addplot[color = tsir] table[x = sample.size, y = dist.subspace.tsir] {aggr-1c-normal.csv};
|
|
||||||
\addplot[color = mgcca] table[x = sample.size, y = dist.subspace.mgcca] {aggr-1c-normal.csv};
|
|
||||||
\addplot[color = gmlm, line width=1pt] table[x = sample.size, y = dist.subspace.gmlm] {aggr-1c-normal.csv};
|
|
||||||
\end{axis}
|
|
||||||
\node[anchor = base west, yshift = 0.3em] at (sim-1c.north west) {
|
|
||||||
c: rank $1$ $\boldsymbol{\beta}$'s
|
|
||||||
};
|
|
||||||
|
|
||||||
\begin{axis}[
|
|
||||||
name=sim-1d,
|
|
||||||
anchor=north west, at={(sim-1c.right of north east)}, xshift=0.1cm,
|
|
||||||
ylabel near ticks, yticklabel pos=right
|
|
||||||
]
|
|
||||||
\addplot[color = pca] table[x = sample.size, y = dist.subspace.pca] {aggr-1d-normal.csv};
|
|
||||||
\addplot[color = hopca] table[x = sample.size, y = dist.subspace.hopca] {aggr-1d-normal.csv};
|
|
||||||
\addplot[color = tsir] table[x = sample.size, y = dist.subspace.tsir] {aggr-1d-normal.csv};
|
|
||||||
\addplot[color = mgcca] table[x = sample.size, y = dist.subspace.mgcca] {aggr-1d-normal.csv};
|
|
||||||
\addplot[color = gmlm, line width=1pt] table[x = sample.size, y = dist.subspace.gmlm] {aggr-1d-normal.csv};
|
|
||||||
\end{axis}
|
|
||||||
\node[anchor = base west, yshift = 0.3em] at (sim-1d.north west) {
|
|
||||||
d: tri-diagonal $\boldsymbol{\Omega}$'s
|
|
||||||
};
|
|
||||||
|
|
||||||
\begin{axis}[
|
|
||||||
name=sim-1e,
|
|
||||||
anchor=north west, at={(sim-1c.below south west)}, yshift=-.8em
|
|
||||||
]
|
|
||||||
\addplot[color = pca] table[x = sample.size, y = dist.subspace.pca] {aggr-1e-normal.csv};
|
|
||||||
\addplot[color = hopca] table[x = sample.size, y = dist.subspace.hopca] {aggr-1e-normal.csv};
|
|
||||||
\addplot[color = tsir] table[x = sample.size, y = dist.subspace.tsir] {aggr-1e-normal.csv};
|
|
||||||
\addplot[color = mgcca] table[x = sample.size, y = dist.subspace.mgcca] {aggr-1e-normal.csv};
|
|
||||||
\addplot[color = gmlm, line width=1pt] table[x = sample.size, y = dist.subspace.gmlm] {aggr-1e-normal.csv};
|
|
||||||
\end{axis}
|
|
||||||
\node[anchor = base west, yshift = 0.3em] at (sim-1e.north west) {
|
|
||||||
e: missspecified
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
\matrix[anchor = center] at (sim-1e.right of east -| sim-1d.south) {
|
|
||||||
\draw[color=gmlm, legend entry, line width=1pt] plot coordinates {(0, 0) (.3, 0) (.6, 0)}; & \node[anchor=west] {GMLM}; \\
|
|
||||||
\draw[color=tsir, legend entry] plot coordinates {(0, 0) (.3, 0) (.6, 0)}; & \node[anchor=west] {TSIR}; \\
|
|
||||||
\draw[color=mgcca, legend entry] plot coordinates {(0, 0) (.3, 0) (.6, 0)}; & \node[anchor=west] {MGCCA}; \\
|
|
||||||
\draw[color=hopca, legend entry] plot coordinates {(0, 0) (.3, 0) (.6, 0)}; & \node[anchor=west] {HOPCA}; \\
|
|
||||||
\draw[color=pca, legend entry] plot coordinates {(0, 0) (.3, 0) (.6, 0)}; & \node[anchor=west] {PCA}; \\
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
\node[anchor = north] at (current bounding box.south) {Sample Size $n$};
|
|
||||||
|
|
||||||
\node[anchor = south, rotate = 90] at (current bounding box.west) {Subspace Distance $d(\boldsymbol{B}, \hat{\boldsymbol{B}})$};
|
|
||||||
\node[anchor = south, rotate = 270] at (current bounding box.east) {\phantom{Subspace Distance $d(\boldsymbol{B}, \hat{\boldsymbol{B}})$}};
|
|
||||||
|
|
||||||
\node[anchor = south, font=\large] at (current bounding box.north) {Tensor Normal Simulation};
|
|
||||||
|
|
||||||
\end{tikzpicture}
|
|
||||||
\end{document}
|
|
|
@ -23,7 +23,7 @@
|
||||||
*
|
*
|
||||||
* with the parameter vector `theta` and a statistic `T` of `y`. The real valued
|
* with the parameter vector `theta` and a statistic `T` of `y`. The real valued
|
||||||
* parameter vector `theta` is of dimension `p (p + 1) / 2` and the statistic
|
* parameter vector `theta` is of dimension `p (p + 1) / 2` and the statistic
|
||||||
* `T` has the same dimensions as the parameter vector given by
|
* `T` has the same dimensions as a binary vector given by
|
||||||
*
|
*
|
||||||
* T(y) = vech(y y').
|
* T(y) = vech(y y').
|
||||||
*
|
*
|
||||||
|
|
|
@ -0,0 +1,204 @@
|
||||||
|
library(tensorPredictors)
|
||||||
|
library(mvbernoulli)
|
||||||
|
|
||||||
|
set.seed(161803399, "Mersenne-Twister", "Inversion", "Rejection")
|
||||||
|
|
||||||
|
### simulation configuration
|
||||||
|
file.prefix <- "sim-ising"
|
||||||
|
reps <- 100 # number of simulation replications
|
||||||
|
max.iter <- 100 # maximum number of iterations for GMLM
|
||||||
|
sample.sizes <- c(100, 200, 300, 500, 750) # sample sizes `n`
|
||||||
|
N <- 2000 # validation set size
|
||||||
|
p <- c(4, 4) # preditor dimensions (ONLY 4 by 4 allowed!)
|
||||||
|
q <- c(2, 2) # response dimensions (ONLY 2 by 2 allowed!)
|
||||||
|
r <- length(p)
|
||||||
|
# parameter configuration
|
||||||
|
rho <- -0.55
|
||||||
|
c1 <- 1
|
||||||
|
c2 <- 1
|
||||||
|
|
||||||
|
# initial consistency checks
|
||||||
|
stopifnot(exprs = {
|
||||||
|
r == 2
|
||||||
|
all.equal(p, c(4, 4))
|
||||||
|
all.equal(q, c(2, 2))
|
||||||
|
})
|
||||||
|
|
||||||
|
### small helpers
|
||||||
|
# 270 deg matrix layout rotation (90 deg clockwise)
|
||||||
|
rot270 <- function(A) t(A)[, rev(seq_len(nrow(A))), drop = FALSE]
|
||||||
|
# Auto-Regression Covariance Matrix
|
||||||
|
AR <- function(rho, dim) rho^abs(outer(seq_len(dim), seq_len(dim), `-`))
|
||||||
|
# Inverse of the AR matrix
|
||||||
|
AR.inv <- function(rho, dim) {
|
||||||
|
A <- diag(c(1, rep(rho^2 + 1, dim - 2), 1))
|
||||||
|
A[abs(.row(dim(A)) - .col(dim(A))) == 1] <- -rho
|
||||||
|
A / (1 - rho^2)
|
||||||
|
}
|
||||||
|
# projection matrix `P_A` as a projection onto the span of `A`
|
||||||
|
proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A)))
|
||||||
|
|
||||||
|
### setup Ising parameters (to get reasonable parameters)
|
||||||
|
eta1 <- 0
|
||||||
|
alphas <- Map(function(pj, qj) { # qj ignored, its 2
|
||||||
|
linspace <- seq(-1, 1, length.out = pj)
|
||||||
|
matrix(c(linspace, linspace^2), pj, 2)
|
||||||
|
}, p, q)
|
||||||
|
Omegas <- Map(AR, dim = p, MoreArgs = list(rho))
|
||||||
|
|
||||||
|
# data sampling routine
|
||||||
|
sample.data <- function(n, eta1, alphas, Omegas, sample.axis = r + 1L) {
|
||||||
|
# generate response (sample axis is last axis)
|
||||||
|
y <- runif(n, -1, 1) # Y ~ U[-1, 1]
|
||||||
|
Fy <- rbind(cos(pi * y), sin(pi * y), -sin(pi * y), cos(pi * y))
|
||||||
|
dim(Fy) <- c(2, 2, n)
|
||||||
|
|
||||||
|
# natural exponential family parameters
|
||||||
|
eta_y1 <- c1 * (mlm(Fy, alphas) + c(eta1))
|
||||||
|
eta_y2 <- c2 * Reduce(`%x%`, rev(Omegas))
|
||||||
|
|
||||||
|
# conditional Ising model parameters
|
||||||
|
theta_y <- matrix(rep(vech(eta_y2), n), ncol = n)
|
||||||
|
ltri <- which(lower.tri(eta_y2, diag = TRUE))
|
||||||
|
diagonal <- which(diag(TRUE, nrow(eta_y2))[ltri])
|
||||||
|
theta_y[diagonal, ] <- eta_y1
|
||||||
|
|
||||||
|
# Sample X from conditional distribution
|
||||||
|
X <- apply(theta_y, 2, ising_sample, n = 1)
|
||||||
|
# convert (from compressed integer vector) to array data
|
||||||
|
attr(X, "p") <- prod(p)
|
||||||
|
X <- t(as.mvbmatrix(X))
|
||||||
|
dim(X) <- c(p, n)
|
||||||
|
storage.mode(X) <- "double"
|
||||||
|
|
||||||
|
# permute axis to requested get the sample axis
|
||||||
|
if (sample.axis != r + 1L) {
|
||||||
|
perm <- integer(r + 1L)
|
||||||
|
perm[sample.axis] <- r + 1L
|
||||||
|
perm[-sample.axis] <- seq_len(r)
|
||||||
|
X <- aperm(X, perm)
|
||||||
|
Fy <- aperm(Fy, perm)
|
||||||
|
}
|
||||||
|
|
||||||
|
list(X = X, Fy = Fy, y = y, sample.axis = sample.axis)
|
||||||
|
}
|
||||||
|
|
||||||
|
### Logging Errors and Warnings
|
||||||
|
# Register a global warning and error handler for logging warnings/errors with
|
||||||
|
# current simulation repetition session informatin allowing to reproduce problems
|
||||||
|
exceptionLogger <- function(ex) {
|
||||||
|
# retrieve current simulation repetition information
|
||||||
|
rep.info <- get("rep.info", envir = .GlobalEnv)
|
||||||
|
# setup an error log file with the same name as `file`
|
||||||
|
log <- paste0(rep.info$file, ".log")
|
||||||
|
# Write (append) condition message with reproduction info to the log
|
||||||
|
cat("\n\n------------------------------------------------------------\n",
|
||||||
|
sprintf("file <- \"%s\"\nn <- %d\nrep <- %d\n.Random.seed <- c(%s)\n%s\nTraceback:\n",
|
||||||
|
rep.info$file, rep.info$n, rep.info$rep,
|
||||||
|
paste(rep.info$.Random.seed, collapse = ","),
|
||||||
|
as.character.error(ex)
|
||||||
|
), sep = "", file = log, append = TRUE)
|
||||||
|
# add Traceback (see: `traceback()` which the following is addapted from)
|
||||||
|
n <- length(x <- .traceback(NULL, max.lines = -1L))
|
||||||
|
if (n == 0L) {
|
||||||
|
cat("No traceback available", "\n", file = log, append = TRUE)
|
||||||
|
} else {
|
||||||
|
for (i in 1L:n) {
|
||||||
|
xi <- x[[i]]
|
||||||
|
label <- paste0(n - i + 1L, ": ")
|
||||||
|
m <- length(xi)
|
||||||
|
srcloc <- if (!is.null(srcref <- attr(xi, "srcref"))) {
|
||||||
|
srcfile <- attr(srcref, "srcfile")
|
||||||
|
paste0(" at ", basename(srcfile$filename), "#", srcref[1L])
|
||||||
|
}
|
||||||
|
if (isTRUE(attr(xi, "truncated"))) {
|
||||||
|
xi <- c(xi, " ...")
|
||||||
|
m <- length(xi)
|
||||||
|
}
|
||||||
|
if (!is.null(srcloc)) {
|
||||||
|
xi[m] <- paste0(xi[m], srcloc)
|
||||||
|
}
|
||||||
|
if (m > 1) {
|
||||||
|
label <- c(label, rep(substr(" ", 1L,
|
||||||
|
nchar(label, type = "w")), m - 1L))
|
||||||
|
}
|
||||||
|
cat(paste0(label, xi), sep = "\n", file = log, append = TRUE)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
globalCallingHandlers(list(
|
||||||
|
message = exceptionLogger, warning = exceptionLogger, error = exceptionLogger
|
||||||
|
))
|
||||||
|
|
||||||
|
|
||||||
|
### for every sample size
|
||||||
|
start <- format(Sys.time(), "%Y%m%dT%H%M")
|
||||||
|
for (n in sample.sizes) {
|
||||||
|
### write new simulation result file
|
||||||
|
file <- paste0(paste(file.prefix, start, n, sep = "-"), ".csv")
|
||||||
|
# CSV header, used to ensure correct value/column mapping when writing to file
|
||||||
|
header <- outer(
|
||||||
|
c("dist.subspace", "dist.projection", "error.pred"), # measures
|
||||||
|
c("gmlm", "pca", "hopca", "tsir"), # methods
|
||||||
|
paste, sep = ".")
|
||||||
|
cat(paste0(header, collapse = ","), "\n", sep = "", file = file)
|
||||||
|
|
||||||
|
### repeated simulation
|
||||||
|
for (rep in seq_len(reps)) {
|
||||||
|
### Repetition session state info
|
||||||
|
# Stores specific session variables before starting the current
|
||||||
|
# simulation replication. This allows to log state information which
|
||||||
|
# can be used to replicate a specific simulation repetition in case of
|
||||||
|
# errors/warnings from the logs
|
||||||
|
rep.info <- list(n = n, rep = rep, file = file, .Random.seed = .Random.seed)
|
||||||
|
|
||||||
|
### sample (training) data
|
||||||
|
c(X, Fy, y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas)
|
||||||
|
|
||||||
|
### Fit data using different methods
|
||||||
|
fit.gmlm <- GMLM.default(X, Fy, sample.axis = sample.axis,
|
||||||
|
max.iter = max.iter, family = "ising")
|
||||||
|
fit.hopca <- HOPCA(X, npc = q, sample.axis = sample.axis)
|
||||||
|
fit.pca <- prcomp(mat(X, sample.axis), rank. = prod(q))
|
||||||
|
fit.tsir <- NA # TSIR(X, y, q, sample.axis = sample.axis)
|
||||||
|
|
||||||
|
### Compute Reductions `B.*` where `B.*` spans the reduction subspaces
|
||||||
|
B.true <- Reduce(`%x%`, rev(alphas))
|
||||||
|
B.gmlm <- with(fit.gmlm, Reduce(`%x%`, rev(alphas)))
|
||||||
|
B.hopca <- Reduce(`%x%`, rev(fit.hopca))
|
||||||
|
B.pca <- fit.pca$rotation
|
||||||
|
B.tsir <- NA # Reduce(`%x%`, rev(fit.tsir))
|
||||||
|
|
||||||
|
# Subspace Distances: Normalized `|| P_A - P_B ||_F` where
|
||||||
|
# `P_A = A (A' A)^-1/2 A'` and the normalization means that with
|
||||||
|
# respect to the dimensions of `A, B` the subspace distance is in the
|
||||||
|
# range `[0, 1]`.
|
||||||
|
dist.subspace.gmlm <- dist.subspace(B.true, B.gmlm, normalize = TRUE)
|
||||||
|
dist.subspace.hopca <- dist.subspace(B.true, B.hopca, normalize = TRUE)
|
||||||
|
dist.subspace.pca <- dist.subspace(B.true, B.pca, normalize = TRUE)
|
||||||
|
dist.subspace.tsir <- NA # dist.subspace(B.true, B.tsir, normalize = TRUE)
|
||||||
|
|
||||||
|
# Projection Distances: Spectral norm (2-norm) `|| P_A - P_B ||_2`.
|
||||||
|
dist.projection.gmlm <- dist.projection(B.true, B.gmlm)
|
||||||
|
dist.projection.hopca <- dist.projection(B.true, B.hopca)
|
||||||
|
dist.projection.pca <- dist.projection(B.true, B.pca)
|
||||||
|
dist.projection.tsir <- NA # dist.projection(B.true, B.tsir)
|
||||||
|
|
||||||
|
### Prediction Errors: (using new independend sample of size `N`)
|
||||||
|
c(X, Fy, y, sample.axis) %<-% sample.data(N, eta1, alphas, Omegas)
|
||||||
|
# centered model matrix of vectorized `X`s
|
||||||
|
vecX <- scale(mat(X, sample.axis), center = TRUE, scale = FALSE)
|
||||||
|
P.true <- proj(B.true)
|
||||||
|
error.pred.gmlm <- norm(P.true - proj(B.gmlm), "2")
|
||||||
|
error.pred.hopca <- norm(P.true - proj(B.hopca), "2")
|
||||||
|
error.pred.pca <- norm(P.true - proj(B.pca), "2")
|
||||||
|
error.pred.tsir <- NA # norm(P.true - proj(B.tsir), "2")
|
||||||
|
|
||||||
|
# format estimation/prediction errors and write to file and console
|
||||||
|
line <- paste0(Map(get, header), collapse = ",")
|
||||||
|
cat(line, "\n", sep = "", file = file, append = TRUE)
|
||||||
|
# report progress
|
||||||
|
cat(sprintf("sample size: %d/%d - rep: %d/%d\n",
|
||||||
|
which(n == sample.sizes), length(sample.sizes), rep, reps))
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,134 @@
|
||||||
|
library(tensorPredictors)
|
||||||
|
library(mvbernoulli)
|
||||||
|
|
||||||
|
set.seed(141421356, "Mersenne-Twister", "Inversion", "Rejection")
|
||||||
|
|
||||||
|
### simulation configuration
|
||||||
|
reps <- 100 # number of simulation replications
|
||||||
|
max.iter <- 1000 # maximum number of iterations for GMLM
|
||||||
|
n <- 100 # sample sizes `n`
|
||||||
|
N <- 2000 # validation set size
|
||||||
|
p <- c(4, 4) # preditor dimensions (ONLY 4 by 4 allowed!)
|
||||||
|
q <- c(2, 2) # response dimensions (ONLY 2 by 2 allowed!)
|
||||||
|
r <- length(p)
|
||||||
|
# parameter configuration
|
||||||
|
rho <- -0.55
|
||||||
|
c1 <- 1
|
||||||
|
c2 <- 1
|
||||||
|
|
||||||
|
# initial consistency checks
|
||||||
|
stopifnot(exprs = {
|
||||||
|
r == 2
|
||||||
|
all.equal(p, c(4, 4))
|
||||||
|
all.equal(q, c(2, 2))
|
||||||
|
})
|
||||||
|
|
||||||
|
### small helpers
|
||||||
|
# 270 deg matrix layout rotation (90 deg clockwise)
|
||||||
|
rot270 <- function(A) t(A)[, rev(seq_len(nrow(A))), drop = FALSE]
|
||||||
|
# Auto-Regression Covariance Matrix
|
||||||
|
AR <- function(rho, dim) rho^abs(outer(seq_len(dim), seq_len(dim), `-`))
|
||||||
|
# Inverse of the AR matrix
|
||||||
|
AR.inv <- function(rho, dim) {
|
||||||
|
A <- diag(c(1, rep(rho^2 + 1, dim - 2), 1))
|
||||||
|
A[abs(.row(dim(A)) - .col(dim(A))) == 1] <- -rho
|
||||||
|
A / (1 - rho^2)
|
||||||
|
}
|
||||||
|
# projection matrix `P_A` as a projection onto the span of `A`
|
||||||
|
proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A)))
|
||||||
|
|
||||||
|
### setup Ising parameters (to get reasonable parameters)
|
||||||
|
eta1 <- 0
|
||||||
|
# alphas <- Map(function(pj, qj) { # qj ignored, its 2
|
||||||
|
# linspace <- seq(-1, 1, length.out = pj)
|
||||||
|
# matrix(c(linspace, rev(linspace)), pj, 2)
|
||||||
|
# }, p, q)
|
||||||
|
alphas <- Map(function(pj, qj) { # qj ignored, its 2
|
||||||
|
linspace <- seq(-1, 1, length.out = pj)
|
||||||
|
matrix(c(linspace, linspace^2), pj, 2)
|
||||||
|
}, p, q)
|
||||||
|
# alphas <- Map(function(pj, qj) {
|
||||||
|
# qr.Q(qr(matrix(rnorm(pj * qj), pj, qj)))
|
||||||
|
# }, p, q)
|
||||||
|
Omegas <- Map(AR, dim = p, MoreArgs = list(rho))
|
||||||
|
|
||||||
|
# data sampling routine
|
||||||
|
sample.data <- function(n, eta1, alphas, Omegas, sample.axis = r + 1L) {
|
||||||
|
# generate response (sample axis is last axis)
|
||||||
|
y <- runif(n, -1, 1) # Y ~ U[-1, 1]
|
||||||
|
Fy <- rbind(cos(pi * y), sin(pi * y), -sin(pi * y), cos(pi * y))
|
||||||
|
dim(Fy) <- c(2, 2, n)
|
||||||
|
|
||||||
|
# natural exponential family parameters
|
||||||
|
eta_y1 <- c1 * (mlm(Fy, alphas) + c(eta1))
|
||||||
|
eta_y2 <- c2 * Reduce(`%x%`, rev(Omegas))
|
||||||
|
|
||||||
|
# conditional Ising model parameters
|
||||||
|
theta_y <- matrix(rep(vech(eta_y2), n), ncol = n)
|
||||||
|
ltri <- which(lower.tri(eta_y2, diag = TRUE))
|
||||||
|
diagonal <- which(diag(TRUE, nrow(eta_y2))[ltri])
|
||||||
|
theta_y[diagonal, ] <- eta_y1
|
||||||
|
|
||||||
|
# Sample X from conditional distribution
|
||||||
|
X <- apply(theta_y, 2, ising_sample, n = 1)
|
||||||
|
# convert (from compressed integer vector) to array data
|
||||||
|
attr(X, "p") <- prod(p)
|
||||||
|
X <- t(as.mvbmatrix(X))
|
||||||
|
dim(X) <- c(p, n)
|
||||||
|
storage.mode(X) <- "double"
|
||||||
|
|
||||||
|
# permute axis to requested get the sample axis
|
||||||
|
if (sample.axis != r + 1L) {
|
||||||
|
perm <- integer(r + 1L)
|
||||||
|
perm[sample.axis] <- r + 1L
|
||||||
|
perm[-sample.axis] <- seq_len(r)
|
||||||
|
X <- aperm(X, perm)
|
||||||
|
Fy <- aperm(Fy, perm)
|
||||||
|
}
|
||||||
|
|
||||||
|
list(X = X, Fy = Fy, y = y, sample.axis = sample.axis)
|
||||||
|
}
|
||||||
|
|
||||||
|
### sample (training) data
|
||||||
|
c(X, Fy, y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas)
|
||||||
|
|
||||||
|
### Fit data using GMLM with logging
|
||||||
|
|
||||||
|
# logger to log iterative change in the estimation process of GMLM
|
||||||
|
# log <- data.frame()
|
||||||
|
log.likelihood <- tensorPredictors:::make.gmlm.family("ising")$log.likelihood
|
||||||
|
B.true <- Reduce(`%x%`, rev(alphas))
|
||||||
|
logger <- function(iter, eta1.est, alphas.est, Omegas.est) {
|
||||||
|
B.est <- Reduce(`%x%`, rev(alphas.est))
|
||||||
|
|
||||||
|
err.alphas <- mapply(dist.subspace, alphas, alphas.est, MoreArgs = list(normalize = TRUE))
|
||||||
|
err.Omegas <- mapply(norm, Map(`-`, Omegas, Omegas.est), MoreArgs = list(type = "F"))
|
||||||
|
|
||||||
|
if (iter > 0) { cat("\033[9A") }
|
||||||
|
cat(sprintf("\n\033[2mIter: loss - dist\n\033[0m%4d: %8.3f - %8.3f",
|
||||||
|
iter,
|
||||||
|
log.likelihood(X, Fy, eta1.est, alphas.est, Omegas.est),
|
||||||
|
dist.subspace(B.true, B.est, normalize = TRUE)
|
||||||
|
),
|
||||||
|
"\033[2mMSE eta1\033[0m",
|
||||||
|
mean((eta1 - eta1.est)^2),
|
||||||
|
"\033[2msubspace distances of alphas\033[0m",
|
||||||
|
do.call(paste, Map(sprintf, err.alphas, MoreArgs = list(fmt = "%8.3f"))),
|
||||||
|
"\033[2mFrob. norm of Omega differences\033[0m",
|
||||||
|
do.call(paste, Map(sprintf, err.Omegas, MoreArgs = list(fmt = "%8.3f"))),
|
||||||
|
sep = "\n "
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
# now call the GMLM fitting routine with performance profiling
|
||||||
|
tryCatch({
|
||||||
|
system.time( # profvis::profvis(
|
||||||
|
fit.gmlm <- GMLM.default(
|
||||||
|
X, Fy, sample.axis = sample.axis, max.iter = max.iter,
|
||||||
|
family = "ising", logger = logger
|
||||||
|
)
|
||||||
|
)
|
||||||
|
}, error = function(ex) {
|
||||||
|
print(ex)
|
||||||
|
traceback()
|
||||||
|
})
|
|
@ -0,0 +1,207 @@
|
||||||
|
library(tensorPredictors)
|
||||||
|
library(mvbernoulli)
|
||||||
|
|
||||||
|
# seed = first 8 digits Euler's constant gamma = 0.57721 56649 01532 86060
|
||||||
|
set.seed(57721566, "Mersenne-Twister", "Inversion", "Rejection")
|
||||||
|
|
||||||
|
### simulation configuration
|
||||||
|
file.prefix <- "sim-ising-small"
|
||||||
|
reps <- 100 # number of simulation replications
|
||||||
|
max.iter <- 1000 # maximum number of iterations for GMLM
|
||||||
|
sample.sizes <- c(100, 200, 300, 500, 750) # sample sizes `n`
|
||||||
|
N <- 2000 # validation set size
|
||||||
|
p <- c(2, 3) # preditor dimensions
|
||||||
|
q <- c(1, 1) # response dimensions
|
||||||
|
r <- length(p)
|
||||||
|
# parameter configuration
|
||||||
|
rho <- -0.55
|
||||||
|
c1 <- 1
|
||||||
|
c2 <- 1
|
||||||
|
|
||||||
|
# initial consistency checks
|
||||||
|
stopifnot(exprs = {
|
||||||
|
r == 2
|
||||||
|
length(p) == r
|
||||||
|
all(q == 1)
|
||||||
|
})
|
||||||
|
|
||||||
|
### small helpers
|
||||||
|
# 270 deg matrix layout rotation (90 deg clockwise)
|
||||||
|
rot270 <- function(A) t(A)[, rev(seq_len(nrow(A))), drop = FALSE]
|
||||||
|
# Auto-Regression Covariance Matrix
|
||||||
|
AR <- function(rho, dim) rho^abs(outer(seq_len(dim), seq_len(dim), `-`))
|
||||||
|
# Inverse of the AR matrix
|
||||||
|
AR.inv <- function(rho, dim) {
|
||||||
|
A <- diag(c(1, rep(rho^2 + 1, dim - 2), 1))
|
||||||
|
A[abs(.row(dim(A)) - .col(dim(A))) == 1] <- -rho
|
||||||
|
A / (1 - rho^2)
|
||||||
|
}
|
||||||
|
# projection matrix `P_A` as a projection onto the span of `A`
|
||||||
|
proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A)))
|
||||||
|
|
||||||
|
### setup Ising parameters (to get reasonable parameters)
|
||||||
|
eta1 <- 0
|
||||||
|
alphas <- Map(function(pj, qj) {
|
||||||
|
data <- linspace <- seq(-1, 1, len = pj)
|
||||||
|
for (k in (seq_len(qj - 1) + 1)) {
|
||||||
|
data <- c(data, linspace^k)
|
||||||
|
}
|
||||||
|
matrix(data, nrow = pj)
|
||||||
|
}, p, q)
|
||||||
|
Omegas <- Map(AR, dim = p, MoreArgs = list(rho))
|
||||||
|
|
||||||
|
# data sampling routine
|
||||||
|
sample.data <- function(n, eta1, alphas, Omegas, sample.axis = r + 1L) {
|
||||||
|
# generate response (sample axis is last axis)
|
||||||
|
y <- runif(n, -1, 1) # Y ~ U[-1, 1]
|
||||||
|
Fy <- array(sin(pi * y), dim = c(q, n))
|
||||||
|
|
||||||
|
# natural exponential family parameters
|
||||||
|
eta_y1 <- c1 * (mlm(Fy, alphas) + c(eta1))
|
||||||
|
eta_y2 <- c2 * Reduce(`%x%`, rev(Omegas))
|
||||||
|
|
||||||
|
# conditional Ising model parameters
|
||||||
|
theta_y <- matrix(rep(vech(eta_y2), n), ncol = n)
|
||||||
|
ltri <- which(lower.tri(eta_y2, diag = TRUE))
|
||||||
|
diagonal <- which(diag(TRUE, nrow(eta_y2))[ltri])
|
||||||
|
theta_y[diagonal, ] <- eta_y1
|
||||||
|
|
||||||
|
# Sample X from conditional distribution
|
||||||
|
X <- apply(theta_y, 2, ising_sample, n = 1)
|
||||||
|
# convert (from compressed integer vector) to array data
|
||||||
|
attr(X, "p") <- prod(p)
|
||||||
|
X <- t(as.mvbmatrix(X))
|
||||||
|
dim(X) <- c(p, n)
|
||||||
|
storage.mode(X) <- "double"
|
||||||
|
|
||||||
|
# permute axis to requested get the sample axis
|
||||||
|
if (sample.axis != r + 1L) {
|
||||||
|
perm <- integer(r + 1L)
|
||||||
|
perm[sample.axis] <- r + 1L
|
||||||
|
perm[-sample.axis] <- seq_len(r)
|
||||||
|
X <- aperm(X, perm)
|
||||||
|
Fy <- aperm(Fy, perm)
|
||||||
|
}
|
||||||
|
|
||||||
|
list(X = X, Fy = Fy, y = y, sample.axis = sample.axis)
|
||||||
|
}
|
||||||
|
|
||||||
|
### Logging Errors and Warnings
|
||||||
|
# Register a global warning and error handler for logging warnings/errors with
|
||||||
|
# current simulation repetition session informatin allowing to reproduce problems
|
||||||
|
exceptionLogger <- function(ex) {
|
||||||
|
# retrieve current simulation repetition information
|
||||||
|
rep.info <- get("rep.info", envir = .GlobalEnv)
|
||||||
|
# setup an error log file with the same name as `file`
|
||||||
|
log <- paste0(rep.info$file, ".log")
|
||||||
|
# Write (append) condition message with reproduction info to the log
|
||||||
|
cat("\n\n------------------------------------------------------------\n",
|
||||||
|
sprintf("file <- \"%s\"\nn <- %d\nrep <- %d\n.Random.seed <- c(%s)\n%s\nTraceback:\n",
|
||||||
|
rep.info$file, rep.info$n, rep.info$rep,
|
||||||
|
paste(rep.info$.Random.seed, collapse = ","),
|
||||||
|
as.character.error(ex)
|
||||||
|
), sep = "", file = log, append = TRUE)
|
||||||
|
# add Traceback (see: `traceback()` which the following is addapted from)
|
||||||
|
n <- length(x <- .traceback(NULL, max.lines = -1L))
|
||||||
|
if (n == 0L) {
|
||||||
|
cat("No traceback available", "\n", file = log, append = TRUE)
|
||||||
|
} else {
|
||||||
|
for (i in 1L:n) {
|
||||||
|
xi <- x[[i]]
|
||||||
|
label <- paste0(n - i + 1L, ": ")
|
||||||
|
m <- length(xi)
|
||||||
|
srcloc <- if (!is.null(srcref <- attr(xi, "srcref"))) {
|
||||||
|
srcfile <- attr(srcref, "srcfile")
|
||||||
|
paste0(" at ", basename(srcfile$filename), "#", srcref[1L])
|
||||||
|
}
|
||||||
|
if (isTRUE(attr(xi, "truncated"))) {
|
||||||
|
xi <- c(xi, " ...")
|
||||||
|
m <- length(xi)
|
||||||
|
}
|
||||||
|
if (!is.null(srcloc)) {
|
||||||
|
xi[m] <- paste0(xi[m], srcloc)
|
||||||
|
}
|
||||||
|
if (m > 1) {
|
||||||
|
label <- c(label, rep(substr(" ", 1L,
|
||||||
|
nchar(label, type = "w")), m - 1L))
|
||||||
|
}
|
||||||
|
cat(paste0(label, xi), sep = "\n", file = log, append = TRUE)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
globalCallingHandlers(list(
|
||||||
|
message = exceptionLogger, warning = exceptionLogger, error = exceptionLogger
|
||||||
|
))
|
||||||
|
|
||||||
|
|
||||||
|
### for every sample size
|
||||||
|
start <- format(Sys.time(), "%Y%m%dT%H%M")
|
||||||
|
for (n in sample.sizes) {
|
||||||
|
### write new simulation result file
|
||||||
|
file <- paste0(paste(file.prefix, start, n, sep = "-"), ".csv")
|
||||||
|
# CSV header, used to ensure correct value/column mapping when writing to file
|
||||||
|
header <- outer(
|
||||||
|
c("dist.subspace", "dist.projection", "error.pred"), # measures
|
||||||
|
c("gmlm", "pca", "hopca", "tsir"), # methods
|
||||||
|
paste, sep = ".")
|
||||||
|
cat(paste0(header, collapse = ","), "\n", sep = "", file = file)
|
||||||
|
|
||||||
|
### repeated simulation
|
||||||
|
for (rep in seq_len(reps)) {
|
||||||
|
### Repetition session state info
|
||||||
|
# Stores specific session variables before starting the current
|
||||||
|
# simulation replication. This allows to log state information which
|
||||||
|
# can be used to replicate a specific simulation repetition in case of
|
||||||
|
# errors/warnings from the logs
|
||||||
|
rep.info <- list(n = n, rep = rep, file = file, .Random.seed = .Random.seed)
|
||||||
|
|
||||||
|
### sample (training) data
|
||||||
|
c(X, Fy, y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas)
|
||||||
|
|
||||||
|
### Fit data using different methods
|
||||||
|
fit.gmlm <- GMLM.default(X, Fy, sample.axis = sample.axis,
|
||||||
|
max.iter = max.iter, family = "ising")
|
||||||
|
fit.hopca <- HOPCA(X, npc = q, sample.axis = sample.axis)
|
||||||
|
fit.pca <- prcomp(mat(X, sample.axis), rank. = prod(q))
|
||||||
|
fit.tsir <- TSIR(X, y, q, sample.axis = sample.axis)
|
||||||
|
|
||||||
|
### Compute Reductions `B.*` where `B.*` spans the reduction subspaces
|
||||||
|
B.true <- Reduce(`%x%`, rev(alphas))
|
||||||
|
B.gmlm <- with(fit.gmlm, Reduce(`%x%`, rev(alphas)))
|
||||||
|
B.hopca <- Reduce(`%x%`, rev(fit.hopca))
|
||||||
|
B.pca <- fit.pca$rotation
|
||||||
|
B.tsir <- Reduce(`%x%`, rev(fit.tsir))
|
||||||
|
|
||||||
|
# Subspace Distances: Normalized `|| P_A - P_B ||_F` where
|
||||||
|
# `P_A = A (A' A)^-1/2 A'` and the normalization means that with
|
||||||
|
# respect to the dimensions of `A, B` the subspace distance is in the
|
||||||
|
# range `[0, 1]`.
|
||||||
|
dist.subspace.gmlm <- dist.subspace(B.true, B.gmlm, normalize = TRUE)
|
||||||
|
dist.subspace.hopca <- dist.subspace(B.true, B.hopca, normalize = TRUE)
|
||||||
|
dist.subspace.pca <- dist.subspace(B.true, B.pca, normalize = TRUE)
|
||||||
|
dist.subspace.tsir <- dist.subspace(B.true, B.tsir, normalize = TRUE)
|
||||||
|
|
||||||
|
# Projection Distances: Spectral norm (2-norm) `|| P_A - P_B ||_2`.
|
||||||
|
dist.projection.gmlm <- dist.projection(B.true, B.gmlm)
|
||||||
|
dist.projection.hopca <- dist.projection(B.true, B.hopca)
|
||||||
|
dist.projection.pca <- dist.projection(B.true, B.pca)
|
||||||
|
dist.projection.tsir <- dist.projection(B.true, B.tsir)
|
||||||
|
|
||||||
|
### Prediction Errors: (using new independend sample of size `N`)
|
||||||
|
c(X, Fy, y, sample.axis) %<-% sample.data(N, eta1, alphas, Omegas)
|
||||||
|
# centered model matrix of vectorized `X`s
|
||||||
|
vecX <- scale(mat(X, sample.axis), center = TRUE, scale = FALSE)
|
||||||
|
P.true <- proj(B.true)
|
||||||
|
error.pred.gmlm <- norm(P.true - proj(B.gmlm), "2")
|
||||||
|
error.pred.hopca <- norm(P.true - proj(B.hopca), "2")
|
||||||
|
error.pred.pca <- norm(P.true - proj(B.pca), "2")
|
||||||
|
error.pred.tsir <- norm(P.true - proj(B.tsir), "2")
|
||||||
|
|
||||||
|
# format estimation/prediction errors and write to file and console
|
||||||
|
line <- paste0(Map(get, header), collapse = ",")
|
||||||
|
cat(line, "\n", sep = "", file = file, append = TRUE)
|
||||||
|
# report progress
|
||||||
|
cat(sprintf("sample size: %d/%d - rep: %d/%d\n",
|
||||||
|
which(n == sample.sizes), length(sample.sizes), rep, reps))
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,131 @@
|
||||||
|
library(tensorPredictors)
|
||||||
|
library(mvbernoulli)
|
||||||
|
|
||||||
|
# seed = leaf node count of a full chess search tree of depth 6 from the start pos
|
||||||
|
# > position startpos
|
||||||
|
# > go perft 6
|
||||||
|
set.seed(119060324, "Mersenne-Twister", "Inversion", "Rejection")
|
||||||
|
|
||||||
|
### simulation configuration
|
||||||
|
reps <- 100 # number of simulation replications
|
||||||
|
max.iter <- 1000 # maximum number of iterations for GMLM
|
||||||
|
n <- 100 # sample sizes `n`
|
||||||
|
N <- 2000 # validation set size
|
||||||
|
p <- c(2, 3) # preditor dimensions
|
||||||
|
q <- c(1, 1) # response dimensions
|
||||||
|
r <- length(p)
|
||||||
|
# parameter configuration
|
||||||
|
rho <- -0.55
|
||||||
|
c1 <- 1
|
||||||
|
c2 <- 1
|
||||||
|
|
||||||
|
# initial consistency checks
|
||||||
|
stopifnot(exprs = {
|
||||||
|
r == 2
|
||||||
|
length(p) == r
|
||||||
|
all(q == 1)
|
||||||
|
})
|
||||||
|
|
||||||
|
### small helpers
|
||||||
|
# 270 deg matrix layout rotation (90 deg clockwise)
|
||||||
|
rot270 <- function(A) t(A)[, rev(seq_len(nrow(A))), drop = FALSE]
|
||||||
|
# Auto-Regression Covariance Matrix
|
||||||
|
AR <- function(rho, dim) rho^abs(outer(seq_len(dim), seq_len(dim), `-`))
|
||||||
|
# Inverse of the AR matrix
|
||||||
|
AR.inv <- function(rho, dim) {
|
||||||
|
A <- diag(c(1, rep(rho^2 + 1, dim - 2), 1))
|
||||||
|
A[abs(.row(dim(A)) - .col(dim(A))) == 1] <- -rho
|
||||||
|
A / (1 - rho^2)
|
||||||
|
}
|
||||||
|
# projection matrix `P_A` as a projection onto the span of `A`
|
||||||
|
proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A)))
|
||||||
|
|
||||||
|
### setup Ising parameters (to get reasonable parameters)
|
||||||
|
eta1 <- 0
|
||||||
|
alphas <- Map(function(pj, qj) {
|
||||||
|
data <- linspace <- seq(-1, 1, len = pj)
|
||||||
|
for (k in (seq_len(qj - 1) + 1)) {
|
||||||
|
data <- c(data, linspace^k)
|
||||||
|
}
|
||||||
|
matrix(data, nrow = pj)
|
||||||
|
}, p, q)
|
||||||
|
Omegas <- Map(AR, dim = p, MoreArgs = list(rho))
|
||||||
|
|
||||||
|
# data sampling routine
|
||||||
|
sample.data <- function(n, eta1, alphas, Omegas, sample.axis = r + 1L) {
|
||||||
|
# generate response (sample axis is last axis)
|
||||||
|
y <- runif(n, -1, 1) # Y ~ U[-1, 1]
|
||||||
|
Fy <- array(sin(pi * y), dim = c(q, n))
|
||||||
|
|
||||||
|
# natural exponential family parameters
|
||||||
|
eta_y1 <- c1 * (mlm(Fy, alphas) + c(eta1))
|
||||||
|
eta_y2 <- c2 * Reduce(`%x%`, rev(Omegas))
|
||||||
|
|
||||||
|
# conditional Ising model parameters
|
||||||
|
theta_y <- matrix(rep(vech(eta_y2), n), ncol = n)
|
||||||
|
ltri <- which(lower.tri(eta_y2, diag = TRUE))
|
||||||
|
diagonal <- which(diag(TRUE, nrow(eta_y2))[ltri])
|
||||||
|
theta_y[diagonal, ] <- eta_y1
|
||||||
|
|
||||||
|
# Sample X from conditional distribution
|
||||||
|
X <- apply(theta_y, 2, ising_sample, n = 1)
|
||||||
|
# convert (from compressed integer vector) to array data
|
||||||
|
attr(X, "p") <- prod(p)
|
||||||
|
X <- t(as.mvbmatrix(X))
|
||||||
|
dim(X) <- c(p, n)
|
||||||
|
storage.mode(X) <- "double"
|
||||||
|
|
||||||
|
# permute axis to requested get the sample axis
|
||||||
|
if (sample.axis != r + 1L) {
|
||||||
|
perm <- integer(r + 1L)
|
||||||
|
perm[sample.axis] <- r + 1L
|
||||||
|
perm[-sample.axis] <- seq_len(r)
|
||||||
|
X <- aperm(X, perm)
|
||||||
|
Fy <- aperm(Fy, perm)
|
||||||
|
}
|
||||||
|
|
||||||
|
list(X = X, Fy = Fy, y = y, sample.axis = sample.axis)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# logger to log iterative change in the estimation process of GMLM
|
||||||
|
# log <- data.frame()
|
||||||
|
log.likelihood <- tensorPredictors:::make.gmlm.family("ising")$log.likelihood
|
||||||
|
B.true <- Reduce(`%x%`, rev(alphas))
|
||||||
|
logger <- function(iter, eta1.est, alphas.est, Omegas.est) {
|
||||||
|
B.est <- Reduce(`%x%`, rev(alphas.est))
|
||||||
|
|
||||||
|
err.alphas <- mapply(dist.subspace, alphas, alphas.est, MoreArgs = list(normalize = TRUE))
|
||||||
|
err.Omegas <- mapply(norm, Map(`-`, Omegas, Omegas.est), MoreArgs = list(type = "F"))
|
||||||
|
|
||||||
|
if (iter > 0) { cat("\033[9A") }
|
||||||
|
cat(sprintf("\n\033[2mIter: loss - dist\n\033[0m%4d: %8.3f - %8.3f",
|
||||||
|
iter,
|
||||||
|
log.likelihood(X, Fy, eta1.est, alphas.est, Omegas.est),
|
||||||
|
dist.subspace(B.true, B.est, normalize = TRUE)
|
||||||
|
),
|
||||||
|
"\033[2mMSE eta1\033[0m",
|
||||||
|
mean((eta1 - eta1.est)^2),
|
||||||
|
"\033[2msubspace distances of alphas\033[0m",
|
||||||
|
do.call(paste, Map(sprintf, err.alphas, MoreArgs = list(fmt = "%8.3f"))),
|
||||||
|
"\033[2mFrob. norm of Omega differences\033[0m",
|
||||||
|
do.call(paste, Map(sprintf, err.Omegas, MoreArgs = list(fmt = "%8.3f"))),
|
||||||
|
sep = "\n "
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
### sample (training) data
|
||||||
|
c(X, Fy, y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas)
|
||||||
|
|
||||||
|
# now call the GMLM fitting routine with performance profiling
|
||||||
|
tryCatch({
|
||||||
|
system.time( # profvis::profvis(
|
||||||
|
fit.gmlm <- GMLM.default(
|
||||||
|
X, Fy, sample.axis = sample.axis, max.iter = max.iter,
|
||||||
|
family = "ising", logger = logger
|
||||||
|
)
|
||||||
|
)
|
||||||
|
}, error = function(ex) {
|
||||||
|
print(ex)
|
||||||
|
traceback()
|
||||||
|
})
|
|
@ -0,0 +1,171 @@
|
||||||
|
library(tensorPredictors)
|
||||||
|
|
||||||
|
set.seed(314159265, "Mersenne-Twister", "Inversion", "Rejection")
|
||||||
|
|
||||||
|
### simulation configuration
|
||||||
|
file.prefix <- "sim-normal"
|
||||||
|
reps <- 100 # number of simulation replications
|
||||||
|
max.iter <- 10000 # maximum number of iterations for GMLM
|
||||||
|
sample.sizes <- c(100, 200, 300, 500, 750) # sample sizes `n`
|
||||||
|
N <- 2000 # validation set size
|
||||||
|
p <- c(2, 3, 5) # preditor dimensions
|
||||||
|
q <- c(1, 2, 3) # functions of y dimensions (response dimensions)
|
||||||
|
r <- length(p)
|
||||||
|
|
||||||
|
# initial consistency checks
|
||||||
|
stopifnot(exprs = {
|
||||||
|
r == length(p)
|
||||||
|
r == length(q)
|
||||||
|
all(outer(p, sample.sizes, `<`))
|
||||||
|
})
|
||||||
|
|
||||||
|
# projection matrix `P_A` as a projection onto the span of `A`
|
||||||
|
proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A)))
|
||||||
|
|
||||||
|
# setup model parameters
|
||||||
|
alphas <- Map(matrix, Map(rnorm, p * q), p) # reduction matrices
|
||||||
|
Omegas <- Map(function(pj) 0.5^abs(outer(1:pj, 1:pj, `-`)), p) # mode scatter
|
||||||
|
eta1 <- 0 # intercept
|
||||||
|
|
||||||
|
# data sampling routine
|
||||||
|
sample.data <- function(n, eta1, alphas, Omegas, sample.axis = r + 1L) {
|
||||||
|
# generate response (sample axis is last axis)
|
||||||
|
y <- sample.int(prod(q), n, replace = TRUE) # uniform samples
|
||||||
|
Fy <- array(outer(seq_len(prod(q)), y, `==`), dim = c(q, n))
|
||||||
|
Fy <- Fy - c(rowMeans(Fy, dims = r))
|
||||||
|
|
||||||
|
# sample predictors as X | Y = y (sample axis is last axis)
|
||||||
|
Deltas <- Map(solve, Omegas) # normal covariances
|
||||||
|
mu_y <- mlm(mlm(Fy, alphas) + c(eta1), Deltas) # conditional mean
|
||||||
|
X <- mu_y + rtensornorm(n, 0, Deltas, r + 1L) # responses X
|
||||||
|
|
||||||
|
# permute axis to requested get the sample axis
|
||||||
|
if (sample.axis != r + 1L) {
|
||||||
|
perm <- integer(r + 1L)
|
||||||
|
perm[sample.axis] <- r + 1L
|
||||||
|
perm[-sample.axis] <- seq_len(r)
|
||||||
|
X <- aperm(X, perm)
|
||||||
|
Fy <- aperm(Fy, perm)
|
||||||
|
}
|
||||||
|
|
||||||
|
list(X = X, Fy = Fy, y = y, sample.axis = sample.axis)
|
||||||
|
}
|
||||||
|
|
||||||
|
### Logging Errors and Warnings
|
||||||
|
# Register a global warning and error handler for logging warnings/errors with
|
||||||
|
# current simulation repetition session informatin allowing to reproduce problems
|
||||||
|
exceptionLogger <- function(ex) {
|
||||||
|
# retrieve current simulation repetition information
|
||||||
|
rep.info <- get("rep.info", envir = .GlobalEnv)
|
||||||
|
# setup an error log file with the same name as `file`
|
||||||
|
log <- paste0(rep.info$file, ".log")
|
||||||
|
# Write (append) condition message with reproduction info to the log
|
||||||
|
cat("\n\n------------------------------------------------------------\n",
|
||||||
|
sprintf("file <- \"%s\"\nn <- %d\nrep <- %d\n.Random.seed <- c(%s)\n%s\nTraceback:\n",
|
||||||
|
rep.info$file, rep.info$n, rep.info$rep,
|
||||||
|
paste(rep.info$.Random.seed, collapse = ","),
|
||||||
|
as.character.error(ex)
|
||||||
|
), sep = "", file = log, append = TRUE)
|
||||||
|
# add Traceback (see: `traceback()` which the following is addapted from)
|
||||||
|
n <- length(x <- .traceback(NULL, max.lines = -1L))
|
||||||
|
if (n == 0L) {
|
||||||
|
cat("No traceback available", "\n", file = log, append = TRUE)
|
||||||
|
} else {
|
||||||
|
for (i in 1L:n) {
|
||||||
|
xi <- x[[i]]
|
||||||
|
label <- paste0(n - i + 1L, ": ")
|
||||||
|
m <- length(xi)
|
||||||
|
srcloc <- if (!is.null(srcref <- attr(xi, "srcref"))) {
|
||||||
|
srcfile <- attr(srcref, "srcfile")
|
||||||
|
paste0(" at ", basename(srcfile$filename), "#", srcref[1L])
|
||||||
|
}
|
||||||
|
if (isTRUE(attr(xi, "truncated"))) {
|
||||||
|
xi <- c(xi, " ...")
|
||||||
|
m <- length(xi)
|
||||||
|
}
|
||||||
|
if (!is.null(srcloc)) {
|
||||||
|
xi[m] <- paste0(xi[m], srcloc)
|
||||||
|
}
|
||||||
|
if (m > 1) {
|
||||||
|
label <- c(label, rep(substr(" ", 1L,
|
||||||
|
nchar(label, type = "w")), m - 1L))
|
||||||
|
}
|
||||||
|
cat(paste0(label, xi), sep = "\n", file = log, append = TRUE)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
globalCallingHandlers(list(
|
||||||
|
message = exceptionLogger, warning = exceptionLogger, error = exceptionLogger
|
||||||
|
))
|
||||||
|
|
||||||
|
|
||||||
|
### for every sample size
|
||||||
|
start <- format(Sys.time(), "%Y%m%dT%H%M")
|
||||||
|
for (n in sample.sizes) {
|
||||||
|
### write new simulation result file
|
||||||
|
file <- paste0(paste(file.prefix, start, n, sep = "-"), ".csv")
|
||||||
|
# CSV header, used to ensure correct value/column mapping when writing to file
|
||||||
|
header <- outer(
|
||||||
|
c("dist.subspace", "dist.projection", "error.pred"), # measures
|
||||||
|
c("gmlm", "pca", "hopca", "tsir"), # methods
|
||||||
|
paste, sep = ".")
|
||||||
|
cat(paste0(header, collapse = ","), "\n", sep = "", file = file)
|
||||||
|
|
||||||
|
### repeated simulation
|
||||||
|
for (rep in seq_len(reps)) {
|
||||||
|
### Repetition session state info
|
||||||
|
# Stores specific session variables before starting the current
|
||||||
|
# simulation replication. This allows to log state information which
|
||||||
|
# can be used to replicate a specific simulation repetition in case of
|
||||||
|
# errors/warnings from the logs
|
||||||
|
rep.info <- list(n = n, rep = rep, file = file, .Random.seed = .Random.seed)
|
||||||
|
|
||||||
|
### sample (training) data
|
||||||
|
c(X, Fy, y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas)
|
||||||
|
|
||||||
|
### Fit data using different methods
|
||||||
|
fit.gmlm <- GMLM.default(X, Fy, sample.axis = sample.axis, max.iter = max.iter)
|
||||||
|
fit.hopca <- HOPCA(X, npc = q, sample.axis = sample.axis)
|
||||||
|
fit.pca <- prcomp(mat(X, sample.axis), rank. = prod(q))
|
||||||
|
fit.tsir <- TSIR(X, y, q, sample.axis = sample.axis)
|
||||||
|
|
||||||
|
### Compute Reductions `B.*` where `B.*` spans the reduction subspaces
|
||||||
|
B.true <- Reduce(`%x%`, rev(alphas))
|
||||||
|
B.gmlm <- with(fit.gmlm, Reduce(`%x%`, rev(alphas)))
|
||||||
|
B.hopca <- Reduce(`%x%`, rev(fit.hopca))
|
||||||
|
B.pca <- fit.pca$rotation
|
||||||
|
B.tsir <- Reduce(`%x%`, rev(fit.tsir))
|
||||||
|
|
||||||
|
# Subspace Distances: Normalized `|| P_A - P_B ||_F` where
|
||||||
|
# `P_A = A (A' A)^-1/2 A'` and the normalization means that with
|
||||||
|
# respect to the dimensions of `A, B` the subspace distance is in the
|
||||||
|
# range `[0, 1]`.
|
||||||
|
dist.subspace.gmlm <- dist.subspace(B.true, B.gmlm, normalize = TRUE)
|
||||||
|
dist.subspace.hopca <- dist.subspace(B.true, B.hopca, normalize = TRUE)
|
||||||
|
dist.subspace.pca <- dist.subspace(B.true, B.pca, normalize = TRUE)
|
||||||
|
dist.subspace.tsir <- dist.subspace(B.true, B.tsir, normalize = TRUE)
|
||||||
|
|
||||||
|
# Projection Distances: Spectral norm (2-norm) `|| P_A - P_B ||_2`.
|
||||||
|
dist.projection.gmlm <- dist.projection(B.true, B.gmlm)
|
||||||
|
dist.projection.hopca <- dist.projection(B.true, B.hopca)
|
||||||
|
dist.projection.pca <- dist.projection(B.true, B.pca)
|
||||||
|
dist.projection.tsir <- dist.projection(B.true, B.tsir)
|
||||||
|
|
||||||
|
### Prediction Errors: (using new independend sample of size `N`)
|
||||||
|
c(X, Fy, y, sample.axis) %<-% sample.data(N, eta1, alphas, Omegas)
|
||||||
|
# centered model matrix of vectorized `X`s
|
||||||
|
vecX <- scale(mat(X, sample.axis), center = TRUE, scale = FALSE)
|
||||||
|
P.true <- proj(B.true)
|
||||||
|
error.pred.gmlm <- norm(P.true - proj(B.gmlm), "2")
|
||||||
|
error.pred.hopca <- norm(P.true - proj(B.hopca), "2")
|
||||||
|
error.pred.pca <- norm(P.true - proj(B.pca), "2")
|
||||||
|
error.pred.tsir <- norm(P.true - proj(B.tsir), "2")
|
||||||
|
|
||||||
|
# format estimation/prediction errors and write to file and console
|
||||||
|
line <- paste0(Map(get, header), collapse = ",")
|
||||||
|
cat(line, "\n", sep = "", file = file, append = TRUE)
|
||||||
|
# report progress
|
||||||
|
cat(sprintf("sample size: %d/%d - rep: %d/%d\n",
|
||||||
|
which(n == sample.sizes), length(sample.sizes), rep, reps))
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,96 @@
|
||||||
|
library(tensorPredictors)
|
||||||
|
|
||||||
|
set.seed(271828183, "Mersenne-Twister", "Inversion", "Rejection")
|
||||||
|
|
||||||
|
### simulation configuration
|
||||||
|
reps <- 100 # number of simulation replications
|
||||||
|
n <- 100 # sample sizes `n`
|
||||||
|
N <- 2000 # validation set size
|
||||||
|
p <- c(2, 3, 5) # preditor dimensions
|
||||||
|
q <- c(1, 2, 3) # functions of y dimensions (response dimensions)
|
||||||
|
|
||||||
|
# initial consistency checks
|
||||||
|
stopifnot(exprs = {
|
||||||
|
length(p) == length(q)
|
||||||
|
})
|
||||||
|
|
||||||
|
# setup model parameters
|
||||||
|
alphas <- Map(matrix, Map(rnorm, p * q), p) # reduction matrices
|
||||||
|
Omegas <- Map(function(pj) 0.5^abs(outer(1:pj, 1:pj, `-`)), p) # mode scatter
|
||||||
|
eta1 <- 0 # intercept
|
||||||
|
|
||||||
|
# data sampling routine
|
||||||
|
sample.data <- function(n, eta1, alphas, Omegas, sample.axis = length(alphas) + 1L) {
|
||||||
|
r <- length(alphas) # tensor order
|
||||||
|
|
||||||
|
# generate response (sample axis is last axis)
|
||||||
|
y <- sample.int(prod(q), n, replace = TRUE) # uniform samples
|
||||||
|
Fy <- array(outer(seq_len(prod(q)), y, `==`), dim = c(q, n))
|
||||||
|
Fy <- Fy - c(rowMeans(Fy, dims = r))
|
||||||
|
|
||||||
|
# sample predictors as X | Y = y (sample axis is last axis)
|
||||||
|
Deltas <- Map(solve, Omegas) # normal covariances
|
||||||
|
mu_y <- mlm(mlm(Fy, alphas) + c(eta1), Deltas) # conditional mean
|
||||||
|
X <- mu_y + rtensornorm(n, 0, Deltas, r + 1L) # responses X
|
||||||
|
|
||||||
|
# permute axis to requested get the sample axis
|
||||||
|
if (sample.axis != r + 1L) {
|
||||||
|
perm <- integer(r + 1L)
|
||||||
|
perm[sample.axis] <- r + 1L
|
||||||
|
perm[-sample.axis] <- seq_len(r)
|
||||||
|
X <- aperm(X, perm)
|
||||||
|
Fy <- aperm(Fy, perm)
|
||||||
|
}
|
||||||
|
|
||||||
|
list(X = X, Fy = Fy, y = y, sample.axis = sample.axis)
|
||||||
|
}
|
||||||
|
|
||||||
|
### sample (training) data
|
||||||
|
c(X, Fy, y = y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas)
|
||||||
|
|
||||||
|
### Fit data using GMLM with logging
|
||||||
|
|
||||||
|
# logger to log iterative change in the estimation process of GMLM
|
||||||
|
# log <- data.frame()
|
||||||
|
log.likelihood <- tensorPredictors:::make.gmlm.family("normal")$log.likelihood
|
||||||
|
B.true <- Reduce(`%x%`, rev(alphas))
|
||||||
|
logger <- function(iter, eta1.est, alphas.est, Omegas.est) {
|
||||||
|
B.est <- Reduce(`%x%`, rev(alphas.est))
|
||||||
|
|
||||||
|
err.alphas <- mapply(dist.subspace, alphas, alphas.est, MoreArgs = list(normalize = TRUE))
|
||||||
|
err.Omegas <- mapply(norm, Map(`-`, Omegas, Omegas.est), MoreArgs = list(type = "F"))
|
||||||
|
|
||||||
|
if (iter > 1) { cat("\033[9A") }
|
||||||
|
cat(sprintf("\n\033[2mIter: loss - dist\n\033[0m%4d: %8.3f - %8.3f",
|
||||||
|
iter,
|
||||||
|
log.likelihood(X, Fy, eta1.est, alphas.est, Omegas.est),
|
||||||
|
dist.subspace(B.true, B.est, normalize = TRUE)
|
||||||
|
),
|
||||||
|
"\033[2mMSE eta1\033[0m",
|
||||||
|
mean((eta1 - eta1.est)^2),
|
||||||
|
"\033[2msubspace distances of alphas\033[0m",
|
||||||
|
do.call(paste, Map(sprintf, err.alphas, MoreArgs = list(fmt = "%8.3f"))),
|
||||||
|
"\033[2mFrob. norm of Omega differences\033[0m",
|
||||||
|
do.call(paste, Map(sprintf, err.Omegas, MoreArgs = list(fmt = "%8.3f"))),
|
||||||
|
sep = "\n "
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
# now call the GMLM fitting routine with performance profiling
|
||||||
|
system.time( # profvis::profvis(
|
||||||
|
fit.gmlm <- GMLM.default(
|
||||||
|
X, Fy, sample.axis = sample.axis, max.iter = 10000L, logger = logger
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# Iter: loss - dist
|
||||||
|
# 7190: 50.583 - 0.057
|
||||||
|
# MSE eta1
|
||||||
|
# 0.02694658
|
||||||
|
# subspace distances of alphas
|
||||||
|
# 0.043 0.035 0.034
|
||||||
|
# Frob. norm of Omega differences
|
||||||
|
# 0.815 1.777 12.756
|
||||||
|
# time user system elapsed
|
||||||
|
# 342.279 555.630 183.653
|
|
@ -0,0 +1,100 @@
|
||||||
|
|
||||||
|
if (!endsWith(getwd(), "/sim")) {
|
||||||
|
setwd("sim")
|
||||||
|
}
|
||||||
|
|
||||||
|
sim.plot <- function(file.prefix, date, to.file = FALSE) {
|
||||||
|
|
||||||
|
# file.prefix <- "sim-ising-small"
|
||||||
|
# # file.prefix <- "sim-normal"
|
||||||
|
# date <- "20221012" # yyyymmdd, to match all "[0-9]{6}"
|
||||||
|
time <- "[0-9]{4}" # HHMM, to match all "[0-9]{4}"
|
||||||
|
colors <- c(
|
||||||
|
PCA = "#a11414",
|
||||||
|
HOPCA = "#2a62b6",
|
||||||
|
TSIR = "#9313b9",
|
||||||
|
GMLM = "#247407"
|
||||||
|
)
|
||||||
|
line.width <- 1.75
|
||||||
|
margins <- c(5.1, 4.1, 4.1, 0.1)
|
||||||
|
|
||||||
|
sim <- Reduce(rbind, Map(function(path) {
|
||||||
|
df <- read.csv(path)
|
||||||
|
df$n <- as.integer(tail(head(strsplit(path, "[-.]")[[1]], -1), 1))
|
||||||
|
df
|
||||||
|
}, list.files(".", pattern = paste0(
|
||||||
|
"^", file.prefix, "-", date, "T", time, "-[0-9]+[.]csv$", collapse = ""
|
||||||
|
))))
|
||||||
|
|
||||||
|
stats <- aggregate(. ~ n, sim, mean)
|
||||||
|
q75 <- aggregate(. ~ n, sim, function(x) quantile(x, 0.75))
|
||||||
|
q25 <- aggregate(. ~ n, sim, function(x) quantile(x, 0.25))
|
||||||
|
|
||||||
|
if (to.file) {
|
||||||
|
width <- 720
|
||||||
|
png(paste(file.prefix, "-", date, ".png", sep = ""),
|
||||||
|
width = width, height = round((6 / 11) * width, -1),
|
||||||
|
pointsize = 12)
|
||||||
|
}
|
||||||
|
|
||||||
|
layout(mat = matrix(c(
|
||||||
|
1, 2,
|
||||||
|
3, 3
|
||||||
|
), 2, 2, byrow = TRUE),
|
||||||
|
widths = c(1, 1),
|
||||||
|
heights = c(12, 1), respect = FALSE)
|
||||||
|
# layout.show(3)
|
||||||
|
|
||||||
|
with(stats, {
|
||||||
|
par(mar = margins)
|
||||||
|
plot(range(n), 0:1,
|
||||||
|
type = "n", bty = "n", main = "Subspace Distance",
|
||||||
|
xlab = "Sample Size", ylab = "Error")
|
||||||
|
lines(n, dist.subspace.gmlm, col = colors["GMLM"], lwd = line.width)
|
||||||
|
lines(n, dist.subspace.hopca, col = colors["HOPCA"], lwd = line.width)
|
||||||
|
lines(n, dist.subspace.pca, col = colors["PCA"], lwd = line.width)
|
||||||
|
lines(n, dist.subspace.tsir, col = colors["TSIR"], lwd = line.width)
|
||||||
|
|
||||||
|
xn <- c(q75$n, rev(q25$n))
|
||||||
|
polygon(x = xn, y = c(q75$dist.subspace.gmlm, rev(q25$dist.subspace.gmlm)),
|
||||||
|
col = adjustcolor(colors["GMLM"], alpha.f = 0.3), border = NA)
|
||||||
|
polygon(x = xn, y = c(q75$dist.subspace.hopca, rev(q25$dist.subspace.hopca)),
|
||||||
|
col = adjustcolor(colors["HOPCA"], alpha.f = 0.3), border = NA)
|
||||||
|
polygon(x = xn, y = c(q75$dist.subspace.pca, rev(q25$dist.subspace.pca)),
|
||||||
|
col = adjustcolor(colors["PCA"], alpha.f = 0.3), border = NA)
|
||||||
|
polygon(x = xn, y = c(q75$dist.subspace.tsir, rev(q25$dist.subspace.tsir)),
|
||||||
|
col = adjustcolor(colors["TSIR"], alpha.f = 0.3), border = NA)
|
||||||
|
})
|
||||||
|
|
||||||
|
with(stats, {
|
||||||
|
par(mar = margins)
|
||||||
|
plot(range(n), 0:1,
|
||||||
|
type = "n", bty = "n", main = "RMSE (Prediction Error)",
|
||||||
|
xlab = "Sample Size", ylab = "Error")
|
||||||
|
xn <- c(q75$n, rev(q25$n))
|
||||||
|
polygon(x = xn, y = c(q75$error.pred.gmlm, rev(q25$error.pred.gmlm)),
|
||||||
|
col = adjustcolor(colors["GMLM"], alpha.f = 0.3), border = NA)
|
||||||
|
polygon(x = xn, y = c(q75$error.pred.hopca, rev(q25$error.pred.hopca)),
|
||||||
|
col = adjustcolor(colors["HOPCA"], alpha.f = 0.3), border = NA)
|
||||||
|
polygon(x = xn, y = c(q75$error.pred.pca, rev(q25$error.pred.pca)),
|
||||||
|
col = adjustcolor(colors["PCA"], alpha.f = 0.3), border = NA)
|
||||||
|
polygon(x = xn, y = c(q75$error.pred.tsir, rev(q25$error.pred.tsir)),
|
||||||
|
col = adjustcolor(colors["TSIR"], alpha.f = 0.3), border = NA)
|
||||||
|
lines(n, error.pred.gmlm, col = colors["GMLM"], lwd = line.width)
|
||||||
|
lines(n, error.pred.hopca, col = colors["HOPCA"], lwd = line.width)
|
||||||
|
lines(n, error.pred.pca, col = colors["PCA"], lwd = line.width)
|
||||||
|
lines(n, error.pred.tsir, col = colors["TSIR"], lwd = line.width)
|
||||||
|
})
|
||||||
|
|
||||||
|
par(mar = rep(0, 4))
|
||||||
|
plot(1:2, 1:2, type = "n", bty = "n", axes = FALSE, xlab = "", ylab = "")
|
||||||
|
legend("center", legend = names(colors), col = colors, lwd = line.width,
|
||||||
|
lty = 1, bty = "n", horiz = TRUE)
|
||||||
|
|
||||||
|
if (to.file) {
|
||||||
|
dev.off()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sim.plot("sim-ising-small", "20221012", TRUE)
|
||||||
|
sim.plot("sim-normal", "20221012", TRUE)
|
|
@ -5,14 +5,9 @@ export("%x_1%")
|
||||||
export("%x_2%")
|
export("%x_2%")
|
||||||
export("%x_3%")
|
export("%x_3%")
|
||||||
export("%x_4%")
|
export("%x_4%")
|
||||||
export(.projBand)
|
|
||||||
export(.projPSD)
|
|
||||||
export(.projRank)
|
|
||||||
export(.projSymBand)
|
|
||||||
export(.projSymRank)
|
|
||||||
export(CISE)
|
export(CISE)
|
||||||
export(Dup)
|
export(D)
|
||||||
export(Dup.pinv)
|
export(D.pinv)
|
||||||
export(GMLM)
|
export(GMLM)
|
||||||
export(GMLM.default)
|
export(GMLM.default)
|
||||||
export(HOPCA)
|
export(HOPCA)
|
||||||
|
@ -22,8 +17,6 @@ export(ICU)
|
||||||
export(K)
|
export(K)
|
||||||
export(K.perm)
|
export(K.perm)
|
||||||
export(LSIR)
|
export(LSIR)
|
||||||
export(La.det)
|
|
||||||
export(La.solve)
|
|
||||||
export(N)
|
export(N)
|
||||||
export(NAGD)
|
export(NAGD)
|
||||||
export(PCA2d)
|
export(PCA2d)
|
||||||
|
@ -32,30 +25,20 @@ export(RMReg)
|
||||||
export(RMap)
|
export(RMap)
|
||||||
export(S)
|
export(S)
|
||||||
export(TSIR)
|
export(TSIR)
|
||||||
export(approx.kron)
|
|
||||||
export(approx.kronecker)
|
export(approx.kronecker)
|
||||||
export(colKronecker)
|
export(colKronecker)
|
||||||
export(decompose.kronecker)
|
|
||||||
export(dist.kron.norm)
|
export(dist.kron.norm)
|
||||||
export(dist.kron.tr)
|
export(dist.kron.tr)
|
||||||
export(dist.projection)
|
export(dist.projection)
|
||||||
export(dist.subspace)
|
export(dist.subspace)
|
||||||
export(exprs.all.equal)
|
export(exprs.all.equal)
|
||||||
export(gmlm_ising)
|
|
||||||
export(gmlm_mlm)
|
|
||||||
export(gmlm_tensor_normal)
|
|
||||||
export(icu_tensor_normal)
|
|
||||||
export(ising_m2)
|
|
||||||
export(ising_sample)
|
|
||||||
export(kpir.approx)
|
export(kpir.approx)
|
||||||
export(kpir.base)
|
export(kpir.base)
|
||||||
export(kpir.ls)
|
export(kpir.ls)
|
||||||
export(kpir.mle)
|
export(kpir.mle)
|
||||||
export(kpir.momentum)
|
export(kpir.momentum)
|
||||||
export(kpir.new)
|
export(kpir.new)
|
||||||
export(kronperm)
|
|
||||||
export(mat)
|
export(mat)
|
||||||
export(matProj)
|
|
||||||
export(matpow)
|
export(matpow)
|
||||||
export(matrixImage)
|
export(matrixImage)
|
||||||
export(mcov)
|
export(mcov)
|
||||||
|
@ -66,20 +49,10 @@ export(mtvk)
|
||||||
export(num.deriv)
|
export(num.deriv)
|
||||||
export(num.deriv.function)
|
export(num.deriv.function)
|
||||||
export(num.deriv2)
|
export(num.deriv2)
|
||||||
export(pinv)
|
|
||||||
export(projDiag)
|
|
||||||
export(projStiefel)
|
|
||||||
export(projSym)
|
|
||||||
export(reduce)
|
export(reduce)
|
||||||
export(riccati)
|
|
||||||
export(rowKronecker)
|
export(rowKronecker)
|
||||||
export(rtensornorm)
|
export(rtensornorm)
|
||||||
export(slice.assign.expr)
|
|
||||||
export(slice.expr)
|
|
||||||
export(slice.select)
|
|
||||||
export(sylvester)
|
|
||||||
export(tensor_predictor)
|
export(tensor_predictor)
|
||||||
export(tsym)
|
|
||||||
export(ttm)
|
export(ttm)
|
||||||
export(ttt)
|
export(ttt)
|
||||||
export(vech)
|
export(vech)
|
||||||
|
|
|
@ -36,12 +36,12 @@ GMLM.default <- function(X, Fy, sample.axis = 1L,
|
||||||
# optimize likelihood using Nesterov Excelerated Gradient Descent
|
# optimize likelihood using Nesterov Excelerated Gradient Descent
|
||||||
params.fit <- NAGD(
|
params.fit <- NAGD(
|
||||||
fun.loss = function(params) {
|
fun.loss = function(params) {
|
||||||
# scaled negative log-likelihood
|
# scaled negative lig-likelihood
|
||||||
# eta1 alphas Omegas
|
# eta1 alphas Omegas
|
||||||
family$log.likelihood(X, Fy, params[[1]], params[[2]], params[[3]])
|
family$log.likelihood(X, Fy, params[[1]], params[[2]], params[[3]])
|
||||||
},
|
},
|
||||||
fun.grad = function(params) {
|
fun.grad = function(params) {
|
||||||
# gradient of the scaled negative log-likelihood
|
# gradient of the scaled negative lig-likelihood
|
||||||
# eta1 alphas Omegas
|
# eta1 alphas Omegas
|
||||||
family$grad(X, Fy, params[[1]], params[[2]], params[[3]])
|
family$grad(X, Fy, params[[1]], params[[2]], params[[3]])
|
||||||
},
|
},
|
||||||
|
|
|
@ -10,7 +10,7 @@
|
||||||
#' `i`th axis excluding the sample axis.
|
#' `i`th axis excluding the sample axis.
|
||||||
#'
|
#'
|
||||||
#' @export
|
#' @export
|
||||||
HOPCA <- function(X, npc = dim(X)[-sample.axis], sample.axis = 1L, use.C = FALSE) {
|
HOPCA <- function(X, npc = dim(X)[-sample.axis], sample.axis = 1L) {
|
||||||
# observation index numbers (all axis except the sample axis)
|
# observation index numbers (all axis except the sample axis)
|
||||||
modes <- seq_along(dim(X))[-sample.axis]
|
modes <- seq_along(dim(X))[-sample.axis]
|
||||||
|
|
||||||
|
|
|
@ -76,8 +76,8 @@ HOPIR.ls <- function(X, Fy, alphas, sample.axis, algorithm, ..., logger) {
|
||||||
list(alphas = alphas, Deltas = fun.Deltas(alphas))
|
list(alphas = alphas, Deltas = fun.Deltas(alphas))
|
||||||
}
|
}
|
||||||
|
|
||||||
#' HPOIR subroutine for the MLE estimation given preprocessed data and initial
|
#' HPOIR subroutine for the MLE estimation given proprocessed data and initial
|
||||||
#' alpha, Delta parameters
|
#' alphas, Deltas paramters
|
||||||
#'
|
#'
|
||||||
#' @keywords internal
|
#' @keywords internal
|
||||||
HOPIR.mle <- function(X, Fy, alphas, Deltas, sample.axis, algorithm, ..., logger) {
|
HOPIR.mle <- function(X, Fy, alphas, Deltas, sample.axis, algorithm, ..., logger) {
|
||||||
|
|
|
@ -7,7 +7,7 @@
|
||||||
#'
|
#'
|
||||||
#' @export
|
#' @export
|
||||||
HOSVD <- function(X, nu = NULL, eps = 1e-07) {
|
HOSVD <- function(X, nu = NULL, eps = 1e-07) {
|
||||||
if (!is.null(nu)) {
|
if (!missing(nu)) {
|
||||||
stopifnot(all(nu <= dim(X)))
|
stopifnot(all(nu <= dim(X)))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -21,5 +21,3 @@ HOSVD <- function(X, nu = NULL, eps = 1e-07) {
|
||||||
|
|
||||||
list(C = C, Us = Us)
|
list(C = C, Us = Us)
|
||||||
}
|
}
|
||||||
|
|
||||||
SVD <- function(A) .Call("C_svd", A)
|
|
||||||
|
|
|
@ -60,7 +60,7 @@ NAGD <- function(fun.loss, fun.grad, params, more.params = NULL,
|
||||||
stop("Initial loss is non-finite (", loss, ")")
|
stop("Initial loss is non-finite (", loss, ")")
|
||||||
}
|
}
|
||||||
# initialize "previous" iterate parameters
|
# initialize "previous" iterate parameters
|
||||||
prev.params <- params
|
params.last <- params
|
||||||
|
|
||||||
# Gradient Descent Loop
|
# Gradient Descent Loop
|
||||||
line.search.tag <- FALSE # init line search state as "failure"
|
line.search.tag <- FALSE # init line search state as "failure"
|
||||||
|
@ -73,9 +73,9 @@ NAGD <- function(fun.loss, fun.grad, params, more.params = NULL,
|
||||||
}
|
}
|
||||||
|
|
||||||
# Extrapolation form previous position (momentum)
|
# Extrapolation form previous position (momentum)
|
||||||
# `params.moment <- (1 + moment) * params - moment * prev.params`
|
# `params.moment <- (1 + moment) * params - moment * param.last`
|
||||||
moment <- (m[1] - 1) / m[2]
|
moment <- (m[1] - 1) / m[2]
|
||||||
params.moment <- fun.lincomb(1 + moment, params, -moment, prev.params)
|
params.moment <- fun.lincomb(1 + moment, params, -moment, params.last)
|
||||||
|
|
||||||
# Compute gradient at extrapolated position
|
# Compute gradient at extrapolated position
|
||||||
if (missing(more.params)) {
|
if (missing(more.params)) {
|
||||||
|
@ -114,7 +114,7 @@ NAGD <- function(fun.loss, fun.grad, params, more.params = NULL,
|
||||||
}
|
}
|
||||||
|
|
||||||
# keep track of previous parameters
|
# keep track of previous parameters
|
||||||
prev.params <- params
|
params.last <- params
|
||||||
|
|
||||||
# check line search outcome
|
# check line search outcome
|
||||||
if (is.na(line.search.tag)) {
|
if (is.na(line.search.tag)) {
|
||||||
|
|
|
@ -4,21 +4,11 @@
|
||||||
TSIR <- function(X, y, d, sample.axis = 1L,
|
TSIR <- function(X, y, d, sample.axis = 1L,
|
||||||
nr.slices = 10L, # default slices, ignored if y is a factor or integer
|
nr.slices = 10L, # default slices, ignored if y is a factor or integer
|
||||||
max.iter = 50L,
|
max.iter = 50L,
|
||||||
eps = sqrt(.Machine$double.eps),
|
eps = sqrt(.Machine$double.eps)
|
||||||
slice.method = c("cut", "ecdf") # ignored if y is a factor or integer
|
|
||||||
) {
|
) {
|
||||||
|
|
||||||
if (!(is.factor(y) || is.integer(y))) {
|
if (!(is.factor(y) || is.integer(y))) {
|
||||||
slice.method <- match.arg(slice.method)
|
y <- cut(y, nr.slices)
|
||||||
if (slice.method == "ecdf") {
|
|
||||||
y <- cut(ecdf(y)(y), nr.slices)
|
|
||||||
} else {
|
|
||||||
y <- cut(y, nr.slices)
|
|
||||||
# ensure there are no empty slices
|
|
||||||
if (any(table(y) == 0)) {
|
|
||||||
y <- as.factor(as.integer(y))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
stopifnot(exprs = {
|
stopifnot(exprs = {
|
||||||
|
|
|
@ -1,32 +1,4 @@
|
||||||
#' @examples
|
#' Approximates kronecker product decomposition.
|
||||||
#' nrows <- c(3, 2, 5)
|
|
||||||
#' ncols <- c(2, 4, 8)
|
|
||||||
#'
|
|
||||||
#' A <- Reduce(kronecker, Map(matrix, Map(rnorm, nrows * ncols), nrows))
|
|
||||||
#'
|
|
||||||
#' all.equal(A, Reduce(kronecker, approx.kron(A, nrows, ncols)))
|
|
||||||
#'
|
|
||||||
#' @export
|
|
||||||
approx.kron <- function(A, nrows, ncols) {
|
|
||||||
|
|
||||||
# rearrange kronecker product `A` into outer product `R`
|
|
||||||
dim(A) <- c(rev(nrows), rev(ncols))
|
|
||||||
axis.perm <- as.vector(t(matrix(seq_along(dim(A)), ncol = 2L))[, rev(seq_along(nrows))])
|
|
||||||
R <- aperm(A, axis.perm, resize = FALSE)
|
|
||||||
dim(R) <- nrows * ncols
|
|
||||||
|
|
||||||
# scaling factor for every product component
|
|
||||||
s <- sum(A^2)^(1 / length(dim(A)))
|
|
||||||
|
|
||||||
# higher order SVD on `R` with one singular vector
|
|
||||||
Map(function(mode, nr, nc) {
|
|
||||||
s * `dim<-`(La.svd(mcrossprod(R, mode = mode), 1L, 0L)$u, c(nr, nc))
|
|
||||||
}, seq_along(nrows), nrows, ncols)
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
#' Approximates Kronecker Product decomposition.
|
|
||||||
#'
|
#'
|
||||||
#' Approximates the matrices `A` and `B` such that
|
#' Approximates the matrices `A` and `B` such that
|
||||||
#' C = A %x% B
|
#' C = A %x% B
|
||||||
|
@ -49,7 +21,7 @@ approx.kron <- function(A, nrows, ncols) {
|
||||||
#' 123 (2000) 85-100 (pp. 93-95)
|
#' 123 (2000) 85-100 (pp. 93-95)
|
||||||
#'
|
#'
|
||||||
#' @export
|
#' @export
|
||||||
approx.kronecker <- function(C, dimA, dimB = dim(C) / dimA) {
|
approx.kronecker <- function(C, dimA, dimB) {
|
||||||
|
|
||||||
dim(C) <- c(dimB[1L], dimA[1L], dimB[2L], dimA[2L])
|
dim(C) <- c(dimB[1L], dimA[1L], dimB[2L], dimA[2L])
|
||||||
R <- aperm(C, c(2L, 4L, 1L, 3L))
|
R <- aperm(C, c(2L, 4L, 1L, 3L))
|
||||||
|
@ -61,115 +33,8 @@ approx.kronecker <- function(C, dimA, dimB = dim(C) / dimA) {
|
||||||
svdR <- svd(R, 1L, 1L)
|
svdR <- svd(R, 1L, 1L)
|
||||||
}
|
}
|
||||||
|
|
||||||
list(
|
return(list(
|
||||||
A = array(sqrt(svdR$d[1]) * svdR$u, dimA),
|
A = array(sqrt(svdR$d[1]) * svdR$u, dimA),
|
||||||
B = array(sqrt(svdR$d[1]) * svdR$v, dimB)
|
B = array(sqrt(svdR$d[1]) * svdR$v, dimB)
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
#' Kronecker Product Decomposition.
|
|
||||||
#'
|
|
||||||
#' Computes the components summation components `A_i`, `B_i` of a sum of
|
|
||||||
#' Kronecker products
|
|
||||||
#' C = sum_i A_i %x% B_i
|
|
||||||
#' with the minimal estimated number of summands.
|
|
||||||
#'
|
|
||||||
#' @param C desired kronecker product result.
|
|
||||||
#' @param dimA length 2 vector of dimensions of \code{A}.
|
|
||||||
#' @param dimB length 2 vector of dimensions of \code{B}.
|
|
||||||
#' @param tol tolerance of singular values of \code{C} to determin the number of
|
|
||||||
#' needed summands.
|
|
||||||
#'
|
|
||||||
#' @return list of lenghth with estimated number of summation components, each
|
|
||||||
#' entry consists of a list with named entries \code{"A"} and \code{"B"} of
|
|
||||||
#' dimensions \code{dimA} and \code{dimB}.
|
|
||||||
#'
|
|
||||||
#' @examples
|
|
||||||
#' As <- replicate(3, matrix(rnorm(2 * 7), 2), simplify = FALSE)
|
|
||||||
#' Bs <- replicate(3, matrix(rnorm(5 * 3), 5), simplify = FALSE)
|
|
||||||
#' C <- Reduce(`+`, Map(kronecker, As, Bs))
|
|
||||||
#'
|
|
||||||
#' decomposition <- decompose.kronecker(C, c(2, 7))
|
|
||||||
#'
|
|
||||||
#' reconstruction <- Reduce(`+`, Map(function(summand) {
|
|
||||||
#' kronecker(summand[[1]], summand[[2]])
|
|
||||||
#' }, decomposition), array(0, dim(C)))
|
|
||||||
#'
|
|
||||||
#' stopifnot(all.equal(C, reconstruction))
|
|
||||||
#'
|
|
||||||
#' @export
|
|
||||||
decompose.kronecker <- function(C, dimA, dimB = dim(C) / dimA, tol = 1e-7) {
|
|
||||||
# convert the equivalent outer product
|
|
||||||
dim(C) <- c(dimB[1L], dimA[1L], dimB[2L], dimA[2L])
|
|
||||||
C <- aperm(C, c(2L, 4L, 1L, 3L), resize = FALSE)
|
|
||||||
dim(C) <- c(prod(dimA), prod(dimB))
|
|
||||||
|
|
||||||
# Singular Valued Decomposition
|
|
||||||
svdC <- La.svd(C)
|
|
||||||
|
|
||||||
# Sum of Kronecker Components
|
|
||||||
lapply(seq_len(sum(svdC$d > tol)), function(i) list(
|
|
||||||
A = matrix(svdC$d[i] * svdC$u[, i], dimA),
|
|
||||||
B = matrix(svdC$vt[i, ], dimB)
|
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
### Given that C is a Kronecker product this is a fast method but a bit
|
|
||||||
### unreliable in full generality.
|
|
||||||
# decompose.kronecker <- function(C, dimA, dimB = dim(C) / dimA) {
|
|
||||||
# dim(C) <- c(dimB[1L], dimA[1L], dimB[2L], dimA[2L])
|
|
||||||
# R <- aperm(C, c(2L, 4L, 1L, 3L))
|
|
||||||
# dim(R) <- c(prod(dimA), prod(dimB))
|
|
||||||
# max.index <- which.max(abs(R))
|
|
||||||
# row.index <- ((max.index - 1L) %% nrow(R)) + 1L
|
|
||||||
# col.index <- ((max.index - 1L) %/% nrow(R)) + 1L
|
|
||||||
# max.elem <- if (abs(R[max.index]) > .Machine$double.eps) R[max.index] else 1
|
|
||||||
# list(
|
|
||||||
# A = array(R[, col.index], dimA),
|
|
||||||
# B = array(R[row.index, ] / max.elem, dimB)
|
|
||||||
# )
|
|
||||||
# }
|
|
||||||
|
|
||||||
|
|
||||||
# kron <- function(A, B) {
|
|
||||||
# perm <- as.vector(t(matrix(seq_len(2 * length(dim(A))), ncol = 2)[, 2:1]))
|
|
||||||
# K <- aperm(outer(A, B), perm)
|
|
||||||
# dim(K) <- dim(A) * dim(B)
|
|
||||||
# K
|
|
||||||
# }
|
|
||||||
|
|
||||||
# kronperm <- function(A) {
|
|
||||||
# # force A to have even number of dimensions
|
|
||||||
# dim(A) <- c(dim(A), rep(1L, length(dim(A)) %% 2L))
|
|
||||||
# # compute axis permutation
|
|
||||||
# perm <- as.vector(t(matrix(seq_along(dim(A)), ncol = 2)[, 2:1]))
|
|
||||||
# # permute elements of A
|
|
||||||
# K <- aperm(A, perm, resize = FALSE)
|
|
||||||
# # collapse/set dimensions
|
|
||||||
# dim(K) <- head(dim(A), length(dim(A)) / 2) * tail(dim(A), length(dim(A)) / 2)
|
|
||||||
# K
|
|
||||||
# }
|
|
||||||
|
|
||||||
# p <- c(2, 3, 5)
|
|
||||||
# q <- c(3, 4, 7)
|
|
||||||
# A <- array(rnorm(prod(p)), p)
|
|
||||||
# B <- array(rnorm(prod(q)), q)
|
|
||||||
# all.equal(kronperm(outer(A, B)), kronecker(A, B))
|
|
||||||
# all.equal(kron(A, B), kronecker(A, B))
|
|
||||||
|
|
||||||
|
|
||||||
# dA <- c(2, 3, 5)
|
|
||||||
# dB <- c(3, 4, 7)
|
|
||||||
# A <- array(rnorm(prod(dA)), dA)
|
|
||||||
# B <- array(rnorm(prod(dB)), dB)
|
|
||||||
# X <- outer(A, B)
|
|
||||||
|
|
||||||
# r <- length(dim(X)) / 2
|
|
||||||
# I <- t(do.call(expand.grid, Map(seq_len, head(dim(X), r) * tail(dim(X), r))))
|
|
||||||
# K <- apply(rbind(
|
|
||||||
# (I - 1) %/% tail(dim(X), r) + 1,
|
|
||||||
# (I - 1) %% tail(dim(X), r) + 1
|
|
||||||
# ), 2, function(i) X[sum(c(1, cumprod(head(dim(X), -1))) * (i - 1)) + 1])
|
|
||||||
# dim(K) <- head(dim(X), r) * tail(dim(X), r)
|
|
||||||
|
|
||||||
# all.equal(kronecker(A, B), K)
|
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
#' \|(A1 %x% ... %x% Ar - B1 %x% ... %x% Br\|_F
|
#' \|(A1 %x% ... %x% Ar - B1 %x% ... %x% Br\|_F
|
||||||
#'
|
#'
|
||||||
#' This is equivalent to the expression
|
#' This is equivalent to the expression
|
||||||
#' \code{norm(Reduce(kronecker, As) - Reduce(kronecker, Bs), "F")}, but faster.
|
#' \code{norm(Reduce(kronecker, A) - Reduce(kronecker, B), "F")} but faster.
|
||||||
#'
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
#' A1 <- matrix(rnorm(5^2), 5)
|
#' A1 <- matrix(rnorm(5^2), 5)
|
||||||
|
@ -16,21 +16,21 @@
|
||||||
#' ))
|
#' ))
|
||||||
#'
|
#'
|
||||||
#' p <- c(3, 7, 5, 2)
|
#' p <- c(3, 7, 5, 2)
|
||||||
#' As <- Map(function(pj) matrix(rnorm(pj^2), pj), p)
|
#' A <- Map(function(pj) matrix(rnorm(pj^2), pj), p)
|
||||||
#' Bs <- Map(function(pj) matrix(rnorm(pj^2), pj), p)
|
#' B <- Map(function(pj) matrix(rnorm(pj^2), pj), p)
|
||||||
#' stopifnot(all.equal(
|
#' stopifnot(all.equal(
|
||||||
#' dist.kron.norm(As, Bs),
|
#' dist.kron.norm(A, B),
|
||||||
#' norm(Reduce(kronecker, As) - Reduce(kronecker, Bs), "F")
|
#' norm(Reduce(kronecker, A) - Reduce(kronecker, B), "F")
|
||||||
#' ))
|
#' ))
|
||||||
#'
|
#'
|
||||||
#' @export
|
#' @export
|
||||||
dist.kron.norm <- function(As, Bs, eps = .Machine$double.eps) {
|
dist.kron.norm <- function(A, B, eps = .Machine$double.eps) {
|
||||||
if (is.list(As) && is.list(Bs)) {
|
if (is.list(A) && is.list(B)) {
|
||||||
norm2 <- prod(unlist(Map(function(x) sum(x^2), As))) -
|
norm2 <- prod(unlist(Map(function(x) sum(x^2), A))) -
|
||||||
2 * prod(unlist(Map(function(a, b) sum(a * b), As, Bs))) +
|
2 * prod(unlist(Map(function(a, b) sum(a * b), A, B))) +
|
||||||
prod(unlist(Map(function(x) sum(x^2), Bs)))
|
prod(unlist(Map(function(x) sum(x^2), B)))
|
||||||
} else if (is.matrix(As) && is.matrix(Bs)) {
|
} else if (is.matrix(A) && is.matrix(B)) {
|
||||||
norm2 <- sum((As - Bs)^2)
|
norm2 <- sum((A - B)^2)
|
||||||
} else {
|
} else {
|
||||||
stop("Unexpected input")
|
stop("Unexpected input")
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
#' Projection Distance of two matrices
|
#' Porjection Distance of two matrices
|
||||||
#'
|
#'
|
||||||
#' Defined as sine of the maximum principal angle between the column spaces
|
#' Defined as sine of the maximum principal angle between the column spaces
|
||||||
#' of the matrices
|
#' of the matrices
|
||||||
|
|
|
@ -1,406 +0,0 @@
|
||||||
#' Specialized version of the GMLM for the Ising model (inverse Ising problem)
|
|
||||||
#'
|
|
||||||
#' @todo TODO: Add beta and Omega projections
|
|
||||||
#'
|
|
||||||
#' @export
|
|
||||||
gmlm_ising <- function(X, F, sample.axis = length(dim(X)),
|
|
||||||
# proj.betas = ..., proj.Omegas = ..., # TODO: this
|
|
||||||
max.iter = 1000L,
|
|
||||||
eps = sqrt(.Machine$double.eps),
|
|
||||||
step.size = 1e-3,
|
|
||||||
zig.zag.threashold = 20L,
|
|
||||||
patience = 3L,
|
|
||||||
nr.slices = 20L, # only for univariate `F(y) = y`
|
|
||||||
slice.method = c("cut", "ecdf", "none"), # only for univariate `F(y) = y` and `y` is a factor or integer
|
|
||||||
logger = function(...) { }
|
|
||||||
) {
|
|
||||||
# Get problem dimensions
|
|
||||||
dimX <- dim(X)[-sample.axis]
|
|
||||||
# threat scalar `F` as a tensor
|
|
||||||
if (is.null(dim(F))) {
|
|
||||||
dimF <- rep(1L, length(dimX))
|
|
||||||
dim(F) <- ifelse(seq_along(dim(X)) == sample.axis, sample.size, 1L)
|
|
||||||
} else {
|
|
||||||
dimF <- dim(F)[-sample.axis]
|
|
||||||
}
|
|
||||||
sample.size <- dim(X)[sample.axis]
|
|
||||||
|
|
||||||
# rearrange `X`, `F` such that the last axis enumerates observations
|
|
||||||
if (sample.axis != length(dim(X))) {
|
|
||||||
axis.perm <- c(seq_along(dim(X))[-sample.axis], sample.axis)
|
|
||||||
X <- aperm(X, axis.perm)
|
|
||||||
F <- aperm(F, axis.perm)
|
|
||||||
sample.axis <- length(dim(X))
|
|
||||||
}
|
|
||||||
modes <- seq_along(dimX)
|
|
||||||
|
|
||||||
# Special case for univariate response `vec F(y) = y`
|
|
||||||
# Due to high computational costs we use slicing
|
|
||||||
slice.method <- match.arg(slice.method)
|
|
||||||
slices.ind <- if ((slice.method != "none") && (length(F) == prod(dim(F)))) {
|
|
||||||
y <- as.vector(F)
|
|
||||||
if (!(is.factor(y) || is.integer(y))) {
|
|
||||||
slice.method <- match.arg(slice.method)
|
|
||||||
if (slice.method == "ecdf") {
|
|
||||||
y <- cut(ecdf(y)(y), nr.slices)
|
|
||||||
} else {
|
|
||||||
y <- cut(y, nr.slices)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
split(seq_len(sample.size), y, drop = TRUE)
|
|
||||||
} else {
|
|
||||||
seq_len(sample.size)
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
# initialize betas with tensor normal estimate (ignoring data being binary)
|
|
||||||
fit_normal <- gmlm_tensor_normal(X, F, sample.axis = length(dim(X)))
|
|
||||||
betas <- fit_normal$betas
|
|
||||||
|
|
||||||
Omegas <- Omegas.init <- Map(function(mode) {
|
|
||||||
n <- prod(dim(X)[-mode])
|
|
||||||
prob2 <- mcrossprod(X, mode = mode) / n
|
|
||||||
prob2[prob2 == 0] <- 1 / n
|
|
||||||
prob1 <- diag(prob2)
|
|
||||||
`prob1^2` <- outer(prob1, prob1)
|
|
||||||
|
|
||||||
`diag<-`(log(((1 - `prob1^2`) / `prob1^2`) * prob2 / (1 - prob2)), 0)
|
|
||||||
}, modes)
|
|
||||||
|
|
||||||
# Determin degenerate combinations, that are variables which are exclusive
|
|
||||||
# in the data set
|
|
||||||
matX <- mat(X, sample.axis)
|
|
||||||
degen <- crossprod(matX) == 0
|
|
||||||
degen.mask <- which(degen)
|
|
||||||
# If there are degenerate combination, compute an (arbitrary) bound the
|
|
||||||
# log odds parameters of those combinations
|
|
||||||
if (any(degen.mask)) {
|
|
||||||
degen.ind <- arrayInd(degen.mask, dim(degen))
|
|
||||||
meanX <- colMeans(matX)
|
|
||||||
prodX <- meanX[degen.ind[, 1]] * meanX[degen.ind[, 2]]
|
|
||||||
degen.bounds <- log((1 - prodX) / (prodX * sample.size))
|
|
||||||
# Component indices in Omegas of degenerate two-way interactions
|
|
||||||
degen.ind <- arrayInd(degen.mask, rep(dimX, 2))
|
|
||||||
degen.ind <- Map(function(d, m) {
|
|
||||||
degen.ind[, m] + dimX[m] * (degen.ind[, m + length(dimX)] - 1L)
|
|
||||||
}, dimX, seq_along(dimX))
|
|
||||||
|
|
||||||
## Enforce initial value degeneracy interaction param. constraints
|
|
||||||
# Extract parameters corresponding to degenerate interactions
|
|
||||||
degen.params <- do.call(rbind, Map(`[`, Omegas, degen.ind))
|
|
||||||
# Degeneracy Constrained Parameters (sign is dropped)
|
|
||||||
DCP <- mapply(function(vals, bound) {
|
|
||||||
logVals <- log(abs(vals))
|
|
||||||
err <- max(0, sum(logVals) - log(abs(bound)))
|
|
||||||
exp(logVals - (err / length(vals)))
|
|
||||||
}, split(degen.params, col(degen.params)), degen.bounds)
|
|
||||||
# Update values in Omegas such that all degeneracy constraints hold
|
|
||||||
Omegas <- Map(function(Omega, cp, ind) {
|
|
||||||
# Combine multiple constraints for every element into single
|
|
||||||
# constraint value per element
|
|
||||||
cp <- mapply(min, split(abs(cp), ind))
|
|
||||||
ind <- as.integer(names(cp))
|
|
||||||
`[<-`(Omega, ind, sign(Omega[ind]) * cp)
|
|
||||||
}, Omegas, split(DCP, row(DCP)), degen.ind)
|
|
||||||
}
|
|
||||||
|
|
||||||
# Initialize mean squared gradients
|
|
||||||
grad2_betas <- Map(array, 0, Map(dim, betas))
|
|
||||||
grad2_Omegas <- Map(array, 0, Map(dim, Omegas))
|
|
||||||
|
|
||||||
# Keep track of the last loss to accumulate loss difference sign changes
|
|
||||||
# indicating optimization instabilities as a sign to stop
|
|
||||||
last_loss <- Inf
|
|
||||||
accum_sign <- 1
|
|
||||||
|
|
||||||
# non improving iteration counter
|
|
||||||
non_improving <- 0L
|
|
||||||
|
|
||||||
# technical access points to dynamicaly access a multi-dimensional array
|
|
||||||
`X[..., i]` <- slice.expr(X, sample.axis, index = i, drop = FALSE)
|
|
||||||
`F[..., i]` <- slice.expr(F, sample.axis, index = i, drop = FALSE)
|
|
||||||
|
|
||||||
# Iterate till a break condition triggers or till max. nr. of iterations
|
|
||||||
for (iter in seq_len(max.iter)) {
|
|
||||||
|
|
||||||
grad_betas <- Map(matrix, 0, dimX, dimF)
|
|
||||||
Omega <- Reduce(kronecker, rev(Omegas))
|
|
||||||
|
|
||||||
# second order residuals accumulator
|
|
||||||
# `sum_i (X_i o X_i - E[X o X | Y = y_i])`
|
|
||||||
R2 <- array(0, dim = c(dimX, dimX))
|
|
||||||
|
|
||||||
# negative log-likelihood
|
|
||||||
loss <- 0
|
|
||||||
|
|
||||||
for (i in slices.ind) {
|
|
||||||
# slice size (nr. of objects in the slice)
|
|
||||||
n_i <- length(i)
|
|
||||||
|
|
||||||
sumF_i <- rowSums(eval(`F[..., i]`), dims = length(dimF))
|
|
||||||
|
|
||||||
diag_params_i <- mlm(sumF_i / n_i, betas)
|
|
||||||
params_i <- Omega + diag(as.vector(diag_params_i))
|
|
||||||
m2_i <- ising_m2(params_i)
|
|
||||||
|
|
||||||
# accumulate loss
|
|
||||||
matX_i <- mat(eval(`X[..., i]`), modes)
|
|
||||||
loss <- loss - (
|
|
||||||
sum(matX_i * (params_i %*% matX_i)) + n_i * log(attr(m2_i, "prob_0"))
|
|
||||||
)
|
|
||||||
|
|
||||||
R2_i <- tcrossprod(matX_i) - n_i * m2_i
|
|
||||||
R1_i <- diag(R2_i)
|
|
||||||
dim(R1_i) <- dimX
|
|
||||||
|
|
||||||
for (j in modes) {
|
|
||||||
grad_betas[[j]] <- grad_betas[[j]] +
|
|
||||||
mcrossprod(R1_i, mlm(sumF_i, betas[-j], modes[-j]), j)
|
|
||||||
}
|
|
||||||
R2 <- R2 + as.vector(R2_i)
|
|
||||||
}
|
|
||||||
|
|
||||||
grad_Omegas <- Map(function(j) {
|
|
||||||
grad <- mlm(kronperm(R2), Map(as.vector, Omegas[-j]), modes[-j], transposed = TRUE)
|
|
||||||
dim(grad) <- dim(Omegas[[j]])
|
|
||||||
grad
|
|
||||||
}, modes)
|
|
||||||
|
|
||||||
|
|
||||||
# update optimization behavioral trackers
|
|
||||||
accum_sign <- sign(last_loss - loss) - accum_sign
|
|
||||||
non_improving <- max(0L, non_improving - 1L + 2L * (last_loss < loss))
|
|
||||||
|
|
||||||
# check break conditions
|
|
||||||
if (abs(accum_sign) > zig.zag.threashold) { break }
|
|
||||||
if (non_improving > patience) { break }
|
|
||||||
if (abs(last_loss - loss) < eps * last_loss) { break }
|
|
||||||
|
|
||||||
# store current loss for the next iteration
|
|
||||||
last_loss <- loss
|
|
||||||
|
|
||||||
# Accumulate root mean squared gradiends
|
|
||||||
grad2_betas <- Map(function(g2, g) 0.9 * g2 + 0.1 * (g * g),
|
|
||||||
grad2_betas, grad_betas)
|
|
||||||
grad2_Omegas <- Map(function(g2, g) 0.9 * g2 + 0.1 * (g * g),
|
|
||||||
grad2_Omegas, grad_Omegas)
|
|
||||||
|
|
||||||
# logging (before parameter update)
|
|
||||||
logger(iter, loss, betas, Omegas, grad_betas, grad_Omegas)
|
|
||||||
|
|
||||||
# Update Parameters
|
|
||||||
betas <- Map(function(beta, grad, m2) {
|
|
||||||
beta + (step.size / (sqrt(m2) + eps)) * grad
|
|
||||||
}, betas, grad_betas, grad2_betas)
|
|
||||||
Omegas <- Map(function(Omega, grad, m2) {
|
|
||||||
Omega + (step.size / (sqrt(m2) + eps)) * grad
|
|
||||||
}, Omegas, grad_Omegas, grad2_Omegas)
|
|
||||||
|
|
||||||
# Enforce degeneracy parameter constraints
|
|
||||||
if (any(degen.mask)) {
|
|
||||||
# Extract parameters corresponding to degenerate interactions
|
|
||||||
degen.params <- do.call(rbind, Map(`[`, Omegas, degen.ind))
|
|
||||||
# Degeneracy Constrained Parameters (sign is dropped)
|
|
||||||
DCP <- mapply(function(vals, bound) {
|
|
||||||
logVals <- log(abs(vals))
|
|
||||||
err <- max(0, sum(logVals) - log(abs(bound)))
|
|
||||||
exp(logVals - (err / length(vals)))
|
|
||||||
}, split(degen.params, col(degen.params)), degen.bounds)
|
|
||||||
# Update values in Omegas such that all degeneracy constraints hold
|
|
||||||
Omegas <- Map(function(Omega, cp, ind) {
|
|
||||||
# Combine multiple constraints for every element into single
|
|
||||||
# constraint value per element
|
|
||||||
cp <- mapply(min, split(abs(cp), ind))
|
|
||||||
ind <- as.integer(names(cp))
|
|
||||||
`[<-`(Omega, ind, sign(Omega[ind]) * cp)
|
|
||||||
}, Omegas, split(DCP, row(DCP)), degen.ind)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
structure(
|
|
||||||
list(eta1 = array(0, dimX), betas = betas, Omegas = Omegas),
|
|
||||||
tensor_normal = fit_normal,
|
|
||||||
Omegas.init = Omegas.init,
|
|
||||||
degen.mask = degen.mask
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
################################################################################
|
|
||||||
### Development Interactive Block (Delete / Make sim / TODO: ...) ###
|
|
||||||
################################################################################
|
|
||||||
if (FALSE) { # interactive()
|
|
||||||
|
|
||||||
par(bg = "#1d1d1d",
|
|
||||||
fg = "lightgray",
|
|
||||||
col = "#d5d5d5",
|
|
||||||
col.axis = "#d5d5d5",
|
|
||||||
col.lab = "#d5d5d5",
|
|
||||||
col.main = "#d5d5d5",
|
|
||||||
col.sub = "#d5d5d5", # col.sub = "#2467d0"
|
|
||||||
pch = 16
|
|
||||||
)
|
|
||||||
cex <- 1.25
|
|
||||||
col <- colorRampPalette(c("#f15050", "#1d1d1d", "#567DCA"))(256)
|
|
||||||
|
|
||||||
|
|
||||||
.logger <- function() {
|
|
||||||
iter <- 0L
|
|
||||||
assign("log", data.frame(
|
|
||||||
iter = rep(NA_integer_, 100000),
|
|
||||||
loss = rep(NA_real_, 100000),
|
|
||||||
dist.B = rep(NA_real_, 100000),
|
|
||||||
dist.Omega = rep(NA_real_, 100000),
|
|
||||||
norm.grad.B = rep(NA_real_, 100000),
|
|
||||||
norm.grad.Omega = rep(NA_real_, 100000)
|
|
||||||
), envir = .GlobalEnv)
|
|
||||||
assign("B.gmlm", NULL, .GlobalEnv)
|
|
||||||
assign("Omega.gmlm", NULL, .GlobalEnv)
|
|
||||||
|
|
||||||
function(it, loss, betas, Omegas, grad_betas, grad_Omegas) {
|
|
||||||
# Store in global namespace (allows to stop and get the results)
|
|
||||||
B.gmlm <- Reduce(kronecker, rev(betas))
|
|
||||||
assign("B.gmlm", B.gmlm, .GlobalEnv)
|
|
||||||
Omega.gmlm <- Reduce(kronecker, rev(Omegas))
|
|
||||||
assign("Omega.gmlm", Omega.gmlm, .GlobalEnv)
|
|
||||||
|
|
||||||
dist.B <- dist.subspace(B.true, B.gmlm, normalize = TRUE)
|
|
||||||
dist.Omega <- norm(Omega.true - Omega.gmlm, "F")
|
|
||||||
norm.grad.B <- sqrt(sum(mapply(norm, grad_betas, "F")^2))
|
|
||||||
norm.grad.Omega <- sqrt(sum(mapply(norm, grad_Omegas, "F")^2))
|
|
||||||
|
|
||||||
log[iter <<- iter + 1L, ] <<- list(
|
|
||||||
it, loss, dist.B, dist.Omega, norm.grad.B, norm.grad.Omega
|
|
||||||
)
|
|
||||||
cat(sprintf("\r%3d - d(B): %.3f, d(O): %.3f, |g(B)|: %.3f, |g(O)|: %.3f, loss: %.3f\033[K",
|
|
||||||
it, dist.B, dist.Omega, norm.grad.B, norm.grad.Omega, loss))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
sample.size <- 1000
|
|
||||||
dimX <- c(2, 3) # predictor `X` dimension
|
|
||||||
dimF <- rep(1, length(dimX)) # "function" `F(y)` of responce `y` dimension
|
|
||||||
|
|
||||||
betas <- Map(diag, 1, dimX, dimF)
|
|
||||||
Omegas <- list(toeplitz(c(0, -2)), toeplitz(seq(1, 0, by = -0.5)))
|
|
||||||
|
|
||||||
B.true <- Reduce(kronecker, rev(betas))
|
|
||||||
Omega.true <- Reduce(kronecker, rev(Omegas))
|
|
||||||
|
|
||||||
# data sampling routine
|
|
||||||
c(X, F, y, sample.axis) %<-% (sample.data <- function(sample.size, betas, Omegas) {
|
|
||||||
dimX <- mapply(nrow, betas)
|
|
||||||
dimF <- mapply(ncol, betas)
|
|
||||||
|
|
||||||
# generate response (sample axis is last axis)
|
|
||||||
y <- runif(prod(sample.size, dimF), -2, 2)
|
|
||||||
F <- array(y, dim = c(dimF, sample.size)) # ~ U[-1, 1]
|
|
||||||
|
|
||||||
Omega <- Reduce(kronecker, rev(Omegas))
|
|
||||||
|
|
||||||
X <- apply(F, length(dim(F)), function(Fi) {
|
|
||||||
dim(Fi) <- dimF
|
|
||||||
params <- diag(as.vector(mlm(Fi, betas))) + Omega
|
|
||||||
tensorPredictors::ising_sample(1, params)
|
|
||||||
})
|
|
||||||
dim(X) <- c(dimX, sample.size)
|
|
||||||
|
|
||||||
list(X = X, F = F, y = y, sample.axis = length(dim(X)))
|
|
||||||
})(sample.size, betas, Omegas)
|
|
||||||
|
|
||||||
local({
|
|
||||||
X.proto <- array(seq_len(prod(dimX)), dimX)
|
|
||||||
interactions <- crossprod(mat(X, sample.axis))
|
|
||||||
dimnames(interactions) <- rep(list(
|
|
||||||
do.call(paste0, c("X", Map(slice.index, list(X.proto), seq_along(dimX))))
|
|
||||||
), 2)
|
|
||||||
cat("Sample Size: ", sample.size, "\n")
|
|
||||||
print.table(interactions, zero.print = ".")
|
|
||||||
})
|
|
||||||
|
|
||||||
# system.time({
|
|
||||||
# fit.gmlm <- gmlm_ising(X, y, logger = .logger())
|
|
||||||
# })
|
|
||||||
Rprof()
|
|
||||||
gmlm_ising(X, y)
|
|
||||||
Rprof(NULL)
|
|
||||||
summaryRprof()
|
|
||||||
|
|
||||||
B.gmlm <- Reduce(kronecker, rev(fit.gmlm$betas))
|
|
||||||
Omega.gmlm <- Reduce(kronecker, rev(fit.gmlm$Omegas))
|
|
||||||
|
|
||||||
B.normal <- Reduce(kronecker, rev(attr(fit.gmlm, "tensor_normal")$betas))
|
|
||||||
Omega.init <- Reduce(kronecker, rev(attr(fit.gmlm, "Omegas.init")))
|
|
||||||
degen.mask <- attr(fit.gmlm, "degen.mask")
|
|
||||||
|
|
||||||
local({
|
|
||||||
layout(matrix(c(
|
|
||||||
1, 2, 3, 3, 3,
|
|
||||||
1, 4, 5, 6, 7
|
|
||||||
), nrow = 2, byrow = TRUE), width = c(6, 3, 1, 1, 1))
|
|
||||||
|
|
||||||
with(na.omit(log), {
|
|
||||||
plot(range(iter), c(0, 1), type = "n", bty = "n",
|
|
||||||
xlab = "Iterations", ylab = "Distance")
|
|
||||||
|
|
||||||
lines(iter, dist.B, col = "red", lwd = 2)
|
|
||||||
lines(iter, dist.Omega / max(dist.Omega), col = "blue", lwd = 2)
|
|
||||||
lines(iter, (loss - min(loss)) / diff(range(loss)), col = "darkgreen", lwd = 2)
|
|
||||||
|
|
||||||
norm.grad <- sqrt(norm.grad.B^2 + norm.grad.Omega^2)
|
|
||||||
# Scale all gradient norms
|
|
||||||
norm.grad.B <- norm.grad.B / max(norm.grad)
|
|
||||||
norm.grad.Omega <- norm.grad.Omega / max(norm.grad)
|
|
||||||
norm.grad <- norm.grad / max(norm.grad)
|
|
||||||
lines(iter, norm.grad.B, lty = 2, col = "red")
|
|
||||||
lines(iter, norm.grad.Omega, lty = 2, col = "blue")
|
|
||||||
lines(iter, norm.grad, lty = 2, col = "darkgreen")
|
|
||||||
|
|
||||||
axis(4, at = c(
|
|
||||||
tail(dist.B, 1),
|
|
||||||
min(dist.B)
|
|
||||||
), labels = round(c(
|
|
||||||
tail(dist.B, 1),
|
|
||||||
min(dist.B)
|
|
||||||
), 2), col = NA, col.ticks = "red", las = 1)
|
|
||||||
axis(4, at = c(
|
|
||||||
1,
|
|
||||||
tail(dist.Omega, 1) / max(dist.Omega),
|
|
||||||
min(dist.Omega) / max(dist.Omega)
|
|
||||||
), labels = round(c(
|
|
||||||
max(dist.Omega),
|
|
||||||
tail(dist.Omega, 1),
|
|
||||||
min(dist.Omega)
|
|
||||||
), 2), col = NA, col.ticks = "blue", las = 1)
|
|
||||||
|
|
||||||
abline(h = c(tail(dist.B, 1), min(dist.B)),
|
|
||||||
lty = "dotted", col = "red")
|
|
||||||
abline(h = c(max(dist.Omega), tail(dist.Omega, 1), min(dist.Omega)) / max(dist.Omega),
|
|
||||||
lty = "dotted", col = "blue")
|
|
||||||
|
|
||||||
})
|
|
||||||
legend("topright", col = c("red", "blue", "darkgreen"), lty = 1, lwd = 2,
|
|
||||||
legend = c("dist.B", "dist.Omega", "loss"), bty = "n")
|
|
||||||
|
|
||||||
zlim <- max(abs(range(Omega.true, Omega.init, Omega.gmlm))) * c(-1, 1)
|
|
||||||
matrixImage(Omega.true, main = "true", zlim = zlim, add.values = TRUE, col = col, cex = cex)
|
|
||||||
matrixImage(round(Omega.init, 2), main = "init (cond. prob.)", zlim = zlim, add.values = TRUE, col = col, cex = cex)
|
|
||||||
mtext(round(norm(Omega.true - Omega.init, "F"), 3), 3)
|
|
||||||
matrixImage(round(Omega.gmlm, 2), main = "gmlm (ising)", zlim = zlim, add.values = TRUE, col = col, cex = cex,
|
|
||||||
col.values = c(par("col"), "red")[`[<-`(array(1, rep(prod(dim(X)[-sample.axis]), 2)), degen.mask, 2)])
|
|
||||||
mtext(round(norm(Omega.true - Omega.gmlm, "F"), 3), 3)
|
|
||||||
|
|
||||||
zlim <- max(abs(range(B.true, B.normal, B.gmlm))) * c(-1, 1)
|
|
||||||
matrixImage(B.true, main = "true",
|
|
||||||
zlim = zlim, add.values = TRUE, col = col, cex = cex)
|
|
||||||
matrixImage(round(B.normal, 2), main = "init (normal)",
|
|
||||||
zlim = zlim, add.values = TRUE, axes = FALSE, col = col, cex = cex)
|
|
||||||
mtext(round(dist.subspace(B.true, B.normal, normalize = TRUE), 3), 3)
|
|
||||||
matrixImage(round(B.gmlm, 2), main = "gmlm (ising)",
|
|
||||||
zlim = zlim, add.values = TRUE, axes = FALSE, col = col, cex = cex)
|
|
||||||
mtext(round(dist.subspace(B.true, B.gmlm, normalize = TRUE), 3), 3)
|
|
||||||
})
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,134 +0,0 @@
|
||||||
#' Specialized version of GMLM for the tensor normal model
|
|
||||||
#'
|
|
||||||
#' The underlying algorithm is an ``iterative (block) coordinate descent'' method
|
|
||||||
#'
|
|
||||||
#' @export
|
|
||||||
gmlm_tensor_normal <- function(X, F, sample.axis = length(dim(X)),
|
|
||||||
max.iter = 100L, proj.betas = NULL, proj.Omegas = NULL, logger = NULL,
|
|
||||||
cond.threshold = 25, eps = 1e-6
|
|
||||||
) {
|
|
||||||
# rearrange `X`, `F` such that the last axis enumerates observations
|
|
||||||
if (!missing(sample.axis)) {
|
|
||||||
axis.perm <- c(seq_along(dim(X))[-sample.axis], sample.axis)
|
|
||||||
X <- aperm(X, axis.perm)
|
|
||||||
F <- aperm(F, axis.perm)
|
|
||||||
sample.axis <- length(dim(X))
|
|
||||||
}
|
|
||||||
|
|
||||||
# Get problem dimensions (observation dimensions)
|
|
||||||
dimX <- head(dim(X), -1)
|
|
||||||
dimF <- head(dim(F), -1)
|
|
||||||
modes <- seq_along(dimX)
|
|
||||||
|
|
||||||
# Ensure the Omega and beta projections lists are lists
|
|
||||||
if (!is.list(proj.Omegas)) {
|
|
||||||
proj.Omegas <- rep(NULL, length(modes))
|
|
||||||
}
|
|
||||||
if (!is.list(proj.betas)) {
|
|
||||||
proj.betas <- rep(NULL, length(modes))
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
### Phase 1: Computing initial values
|
|
||||||
meanX <- rowMeans(X, dims = length(dimX))
|
|
||||||
meanF <- rowMeans(F, dims = length(dimF))
|
|
||||||
|
|
||||||
# center X and F
|
|
||||||
X <- X - as.vector(meanX)
|
|
||||||
F <- F - as.vector(meanF)
|
|
||||||
|
|
||||||
# initialize Omega estimates as mode-wise, unconditional covariance estimates
|
|
||||||
Sigmas <- Map(diag, dimX)
|
|
||||||
Omegas <- Map(diag, dimX)
|
|
||||||
|
|
||||||
# Per mode covariance directions
|
|
||||||
# Note: (the directions are transposed!)
|
|
||||||
dirsX <- Map(function(Sigma) {
|
|
||||||
SVD <- La.svd(Sigma, nu = 0)
|
|
||||||
sqrt(SVD$d) * SVD$vt
|
|
||||||
}, mcov(X, sample.axis, center = FALSE))
|
|
||||||
dirsF <- Map(function(Sigma) {
|
|
||||||
SVD <- La.svd(Sigma, nu = 0)
|
|
||||||
sqrt(SVD$d) * SVD$vt
|
|
||||||
}, mcov(F, sample.axis, center = FALSE))
|
|
||||||
|
|
||||||
# initialization of betas ``covariance direction mappings``
|
|
||||||
betas <- betas.init <- Map(function(dX, dF) {
|
|
||||||
s <- min(ncol(dX), nrow(dF))
|
|
||||||
crossprod(dX[1:s, , drop = FALSE], dF[1:s, , drop = FALSE])
|
|
||||||
}, dirsX, dirsF)
|
|
||||||
|
|
||||||
# Residuals
|
|
||||||
R <- X - mlm(F, Map(`%*%`, Sigmas, betas))
|
|
||||||
|
|
||||||
# Initial value of the log-likelihood (scaled and constants dropped)
|
|
||||||
loss <- mean(R * mlm(R, Omegas)) - sum(log(mapply(det, Omegas)) / dimX)
|
|
||||||
|
|
||||||
# invoke the logger
|
|
||||||
if (is.function(logger)) do.call(logger, list(
|
|
||||||
iter = 0L, betas = betas, Omegas = Omegas,
|
|
||||||
resid = R, loss = loss
|
|
||||||
))
|
|
||||||
|
|
||||||
|
|
||||||
### Phase 2: (Block) Coordinate Descent
|
|
||||||
for (iter in seq_len(max.iter)) {
|
|
||||||
|
|
||||||
# update every beta (in random order)
|
|
||||||
for (j in sample.int(length(betas))) {
|
|
||||||
FxB_j <- mlm(F, betas[-j], modes[-j])
|
|
||||||
FxSB_j <- mlm(FxB_j, Sigmas[-j], modes[-j])
|
|
||||||
betas[[j]] <- Omegas[[j]] %*% t(solve(mcrossprod(FxSB_j, FxB_j, j), mcrossprod(FxB_j, X, j)))
|
|
||||||
# Project `betas` onto their manifold
|
|
||||||
if (is.function(proj_j <- proj.betas[[j]])) {
|
|
||||||
betas[[j]] <- proj_j(betas[[j]])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# Residuals
|
|
||||||
R <- X - mlm(F, Map(`%*%`, Sigmas, betas))
|
|
||||||
|
|
||||||
# Covariance Estimates (moment based, TODO: implement MLE estimate!)
|
|
||||||
Sigmas <- mcov(R, sample.axis, center = FALSE)
|
|
||||||
|
|
||||||
# Computing `Omega_j`s, the j'th mode presition matrices, in conjunction
|
|
||||||
# with regularization of the j'th mode covariance estimate `Sigma_j`
|
|
||||||
for (j in seq_along(Sigmas)) {
|
|
||||||
# Compute min and max eigen values
|
|
||||||
min_max <- range(eigen(Sigmas[[j]], TRUE, TRUE)$values)
|
|
||||||
# The condition is approximately `kappa(Sigmas[[j]]) > cond.threshold`
|
|
||||||
if (min_max[2] > cond.threshold * min_max[1]) {
|
|
||||||
Sigmas[[j]] <- Sigmas[[j]] + diag(0.2 * min_max[2], nrow(Sigmas[[j]]))
|
|
||||||
}
|
|
||||||
# Compute (unconstraint but regularized) Omega_j as covariance inverse
|
|
||||||
Omegas[[j]] <- solve(Sigmas[[j]])
|
|
||||||
# Project Omega_j to the Omega_j's manifold
|
|
||||||
if (is.function(proj_j <- proj.Omegas[[j]])) {
|
|
||||||
Omegas[[j]] <- proj_j(Omegas[[j]])
|
|
||||||
# Reverse computation of `Sigma_j` as inverse of `Omega_j`
|
|
||||||
# Order of projecting Omega_j and then recomputing Sigma_j is importent
|
|
||||||
Sigmas[[j]] <- solve(Omegas[[j]])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# store last loss and compute new value
|
|
||||||
loss.last <- loss
|
|
||||||
loss <- mean(R * mlm(R, Omegas)) - sum(log(mapply(det, Omegas)) / dimX)
|
|
||||||
|
|
||||||
# invoke the logger
|
|
||||||
if (is.function(logger)) do.call(logger, list(
|
|
||||||
iter = iter, betas = betas, Omegas = Omegas, resid = R, loss = loss
|
|
||||||
))
|
|
||||||
|
|
||||||
# check the break consition
|
|
||||||
if (abs(loss.last - loss) < eps * loss.last) {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
structure(
|
|
||||||
list(eta1 = mlm(meanX, Sigmas), betas = betas, Omegas = Omegas),
|
|
||||||
betas.init = betas.init
|
|
||||||
)
|
|
||||||
}
|
|
|
@ -1,18 +0,0 @@
|
||||||
#' @export
|
|
||||||
ising_m2 <- function(
|
|
||||||
params, use_MC = NULL, nr_samples = 10000L,
|
|
||||||
warmup = 15L, nr_threads = 1L
|
|
||||||
) {
|
|
||||||
if (missing(use_MC)) {
|
|
||||||
use_MC <- if (is.matrix(params)) 19 < nrow(params) else 190 < length(params)
|
|
||||||
}
|
|
||||||
|
|
||||||
m2 <- .Call("C_ising_m2", params, use_MC, nr_samples, warmup, nr_threads,
|
|
||||||
PACKAGE = "tensorPredictors"
|
|
||||||
)
|
|
||||||
|
|
||||||
M2 <- vech.pinv(m2)
|
|
||||||
attr(M2, "prob_0") <- attr(m2, "prob_0")
|
|
||||||
|
|
||||||
M2
|
|
||||||
}
|
|
|
@ -1,11 +0,0 @@
|
||||||
#' Sample from the Ising model
|
|
||||||
#'
|
|
||||||
#' @param nr number of samples
|
|
||||||
#' @param params Ising model parameters (numeric vector of size `p (p + 1) / 2`
|
|
||||||
#' for `p` dimensional random binary vectors)
|
|
||||||
#' @param warmup Monte-Carlo chain length before retreaving a sample
|
|
||||||
#'
|
|
||||||
#' @export
|
|
||||||
ising_sample <- function(nr, params, warmup = 15L) {
|
|
||||||
.Call("C_ising_sample", nr, params, warmup, PACKAGE = "tensorPredictors")
|
|
||||||
}
|
|
|
@ -124,30 +124,6 @@ make.gmlm.family <- function(name) {
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
# conditional covariance of the sufficient statistic
|
|
||||||
# Cov(t(X) | Y = y)
|
|
||||||
# Note: fy is a single observation!
|
|
||||||
cov.sufficient.stat <- function(fy, eta1, alphas, Omegas) {
|
|
||||||
Deltas <- Map(solve, Omegas)
|
|
||||||
E1 <- c(mlm(mlm(fy, alphas) + eta1, Deltas))
|
|
||||||
|
|
||||||
# H11 = Cov(vec X | Y = y)
|
|
||||||
H11 <- Reduce(`%x%`, rev(Deltas))
|
|
||||||
|
|
||||||
# H21 = Cov(vec X %x% vec X, vec X | Y = y)
|
|
||||||
H21 <- kronecker(E1, H11) + kronecker(H11, E1)
|
|
||||||
|
|
||||||
# H22 = Cov(vec X %x% vec X | Y = y)
|
|
||||||
H22 <- local({
|
|
||||||
e1e1 <- tcrossprod(E1, E1)
|
|
||||||
h22 <- outer(e1e1 + H11, H11) + outer(H11, e1e1)
|
|
||||||
aperm(h22, c(1, 3, 2, 4)) + aperm(h22, c(1, 3, 4, 2))
|
|
||||||
})
|
|
||||||
|
|
||||||
# Combine into single covariance matrix
|
|
||||||
cbind(rbind(H11, H21), rbind(t(H21), mat(H22, 1:2)))
|
|
||||||
}
|
|
||||||
|
|
||||||
# mean conditional Fisher Information
|
# mean conditional Fisher Information
|
||||||
fisher.info <- function(Fy, eta1, alphas, Omegas) {
|
fisher.info <- function(Fy, eta1, alphas, Omegas) {
|
||||||
# retrieve dimensions
|
# retrieve dimensions
|
||||||
|
|
|
@ -9,15 +9,6 @@
|
||||||
#' or tensor of dimensions \code{dims} iff \code{inv} is true.
|
#' or tensor of dimensions \code{dims} iff \code{inv} is true.
|
||||||
#'
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
#' stopifnot(all.equal(
|
|
||||||
#' mat(1:12, 2, dims = c(2, 3, 2)),
|
|
||||||
#' matrix(c(
|
|
||||||
#' 1, 2, 7, 8,
|
|
||||||
#' 3, 4, 9, 10,
|
|
||||||
#' 5, 6, 11, 12
|
|
||||||
#' ), 3, 4, byrow = TRUE)
|
|
||||||
#' ))
|
|
||||||
#'
|
|
||||||
#' A <- array(rnorm(2 * 3 * 5), dim = c(2, 3, 5))
|
#' A <- array(rnorm(2 * 3 * 5), dim = c(2, 3, 5))
|
||||||
#' stopifnot(exprs = {
|
#' stopifnot(exprs = {
|
||||||
#' all.equal(A, mat(mat(A, 1), 1, dim(A), TRUE))
|
#' all.equal(A, mat(mat(A, 1), 1, dim(A), TRUE))
|
||||||
|
@ -31,6 +22,15 @@
|
||||||
#' all.equal(t(mat(A, 3)), mat(A, c(1, 2)))
|
#' all.equal(t(mat(A, 3)), mat(A, c(1, 2)))
|
||||||
#' })
|
#' })
|
||||||
#'
|
#'
|
||||||
|
#' stopifnot(all.equal(
|
||||||
|
#' mat(1:12, 2, dims = c(2, 3, 2)),
|
||||||
|
#' matrix(c(
|
||||||
|
#' 1, 2, 7, 8,
|
||||||
|
#' 3, 4, 9, 10,
|
||||||
|
#' 5, 6, 11, 12
|
||||||
|
#' ), 3, 4, byrow = TRUE)
|
||||||
|
#' ))
|
||||||
|
#'
|
||||||
#' @export
|
#' @export
|
||||||
mat <- function(T, modes, dims = dim(T), inv = FALSE) {
|
mat <- function(T, modes, dims = dim(T), inv = FALSE) {
|
||||||
modes <- as.integer(modes)
|
modes <- as.integer(modes)
|
||||||
|
|
|
@ -7,8 +7,6 @@
|
||||||
#' @param sub sub-title of the plot
|
#' @param sub sub-title of the plot
|
||||||
#' @param interpolate a logical vector (or scalar) indicating whether to apply
|
#' @param interpolate a logical vector (or scalar) indicating whether to apply
|
||||||
#' linear interpolation to the image when drawing.
|
#' linear interpolation to the image when drawing.
|
||||||
#' @param new.plot Recreating the plot area (clearing the plot device). can be
|
|
||||||
#' used to update a plot but _not_ recreate it. Leads to smoother updating.
|
|
||||||
#' @param ... further arguments passed to \code{\link{rasterImage}}
|
#' @param ... further arguments passed to \code{\link{rasterImage}}
|
||||||
#'
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
|
@ -18,16 +16,13 @@
|
||||||
#' @export
|
#' @export
|
||||||
matrixImage <- function(A, add.values = FALSE,
|
matrixImage <- function(A, add.values = FALSE,
|
||||||
main = NULL, sub = NULL, interpolate = FALSE, ..., zlim = NA,
|
main = NULL, sub = NULL, interpolate = FALSE, ..., zlim = NA,
|
||||||
axes = TRUE, asp = 1, col = hcl.colors(24, "Blue-Red 3", rev = FALSE),
|
axes = TRUE, asp = 1, col = hcl.colors(24, "YlOrRd", rev = FALSE),
|
||||||
col.values = par("col"), cex = 1,
|
digits = getOption("digits")
|
||||||
digits = getOption("digits"), new.plot = TRUE
|
|
||||||
) {
|
) {
|
||||||
# plot raster image
|
# plot raster image
|
||||||
if (new.plot) {
|
plot(c(0, ncol(A)), c(0, nrow(A)), type = "n", bty = "n", col = "black",
|
||||||
plot(c(0, ncol(A)), c(0, nrow(A)), type = "n", bty = "n", col = "black",
|
xlab = "", ylab = "", xaxt = "n", yaxt = "n", main = main, sub = sub,
|
||||||
xlab = "", ylab = "", xaxt = "n", yaxt = "n", main = main, sub = sub,
|
asp = asp)
|
||||||
asp = asp)
|
|
||||||
}
|
|
||||||
|
|
||||||
# Scale values of `A` to [0, 1] with min mapped to 1 and max to 0.
|
# Scale values of `A` to [0, 1] with min mapped to 1 and max to 0.
|
||||||
if (missing(zlim)) {
|
if (missing(zlim)) {
|
||||||
|
@ -46,7 +41,7 @@ matrixImage <- function(A, add.values = FALSE,
|
||||||
# X/Y axes index (matches coordinates to matrix indices)
|
# X/Y axes index (matches coordinates to matrix indices)
|
||||||
x <- seq(1, ncol(A), by = 1)
|
x <- seq(1, ncol(A), by = 1)
|
||||||
y <- seq(1, nrow(A))
|
y <- seq(1, nrow(A))
|
||||||
if (axes && new.plot) {
|
if (axes) {
|
||||||
axis(1, at = x - 0.5, labels = x, lwd = 0, lwd.ticks = 1)
|
axis(1, at = x - 0.5, labels = x, lwd = 0, lwd.ticks = 1)
|
||||||
axis(2, at = y - 0.5, labels = rev(y), lwd = 0, lwd.ticks = 1, las = 1)
|
axis(2, at = y - 0.5, labels = rev(y), lwd = 0, lwd.ticks = 1, las = 1)
|
||||||
}
|
}
|
||||||
|
@ -57,7 +52,6 @@ matrixImage <- function(A, add.values = FALSE,
|
||||||
A[!add.values] <- NA
|
A[!add.values] <- NA
|
||||||
A[add.values] <- format(A[add.values], digits = digits)
|
A[add.values] <- format(A[add.values], digits = digits)
|
||||||
}
|
}
|
||||||
text(rep(x - 0.5, each = nrow(A)), rep(rev(y - 0.5), ncol(A)), A,
|
text(rep(x - 0.5, each = nrow(A)), rep(rev(y - 0.5), ncol(A)), A, adj = 0.5)
|
||||||
adj = 0.5, cex = cex, col = col.values)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -12,13 +12,9 @@
|
||||||
#'
|
#'
|
||||||
#' @param X multi-dimensional array
|
#' @param X multi-dimensional array
|
||||||
#' @param sample.axis observation axis index
|
#' @param sample.axis observation axis index
|
||||||
#' @param center logical, if `TRUE` (the default) compute the centered second
|
|
||||||
#' moment, that is, the mode-wise covariances. Otherwise, the raw second moment
|
|
||||||
#' of each mode is computed. This is specifically usefull in case `X` is
|
|
||||||
#' already centered.
|
|
||||||
#'
|
#'
|
||||||
#' @export
|
#' @export
|
||||||
mcov <- function(X, sample.axis = length(dim(X)), center = TRUE) {
|
mcov <- function(X, sample.axis = 1L) {
|
||||||
# observation modes (axis indices)
|
# observation modes (axis indices)
|
||||||
modes <- seq_along(dim(X))[-sample.axis]
|
modes <- seq_along(dim(X))[-sample.axis]
|
||||||
# observation dimensions
|
# observation dimensions
|
||||||
|
@ -30,18 +26,16 @@ mcov <- function(X, sample.axis = length(dim(X)), center = TRUE) {
|
||||||
if (sample.axis != r + 1L) {
|
if (sample.axis != r + 1L) {
|
||||||
X <- aperm(X, c(modes, sample.axis))
|
X <- aperm(X, c(modes, sample.axis))
|
||||||
}
|
}
|
||||||
# centering: X <- X - E[X]
|
# centering: Z = X - E[X]
|
||||||
if (center) {
|
Z <- X - c(rowMeans(X, dims = r))
|
||||||
X <- X - c(rowMeans(X, dims = r))
|
|
||||||
}
|
|
||||||
|
|
||||||
# estimes (unscaled) covariances for each mode
|
# estimes (unscaled) covariances for each mode
|
||||||
Sigmas <- .mapply(mcrossprod, list(mode = seq_len(r)), MoreArgs = list(X))
|
Sigmas <- .mapply(mcrossprod, list(mode = seq_len(r)), MoreArgs = list(Z))
|
||||||
# scale by per mode "sample" size
|
# scale by per mode "sample" size
|
||||||
Sigmas <- .mapply(`*`, list(Sigmas, p / prod(dim(X))), NULL)
|
Sigmas <- .mapply(`*`, list(Sigmas, p / prod(dim(X))), NULL)
|
||||||
|
|
||||||
# estimate trace of Kronecker product of covariances
|
# estimate trace of Kronecker product of covariances
|
||||||
tr.est <- prod(p) * mean(X^2)
|
tr.est <- prod(p) * mean(Z^2)
|
||||||
|
|
||||||
# as well as the current trace of the unscaled covariances
|
# as well as the current trace of the unscaled covariances
|
||||||
tr.Sigmas <- prod(unlist(.mapply(function(S) sum(diag(S)), list(Sigmas), NULL)))
|
tr.Sigmas <- prod(unlist(.mapply(function(S) sum(diag(S)), list(Sigmas), NULL)))
|
||||||
|
|
|
@ -44,20 +44,16 @@
|
||||||
#' ))
|
#' ))
|
||||||
#'
|
#'
|
||||||
#' @export
|
#' @export
|
||||||
mcrossprod <- function(A, B, mode, dimA = dim(A), dimB = dim(B)) {
|
mcrossprod <- function(A, B, mode) {
|
||||||
storage.mode(A) <- "double"
|
storage.mode(A) <- "double"
|
||||||
if (!missing(dimA)) {
|
if (is.null(dim(A))) {
|
||||||
dim(A) <- dimA
|
|
||||||
} else if (is.null(dim(A))) {
|
|
||||||
dim(A) <- length(A)
|
dim(A) <- length(A)
|
||||||
}
|
}
|
||||||
if (missing(B)) {
|
if (missing(B)) {
|
||||||
.Call("C_mcrossprod_sym", A, as.integer(mode))
|
.Call("C_mcrossprod_sym", A, as.integer(mode))
|
||||||
} else {
|
} else {
|
||||||
storage.mode(B) <- "double"
|
storage.mode(B) <- "double"
|
||||||
if (!missing(dimB)) {
|
if (is.null(dim(B))) {
|
||||||
dim(B) <- dimB
|
|
||||||
} else if (is.null(dim(B))) {
|
|
||||||
dim(B) <- length(B)
|
dim(B) <- length(B)
|
||||||
}
|
}
|
||||||
.Call("C_mcrossprod", A, B, as.integer(mode))
|
.Call("C_mcrossprod", A, B, as.integer(mode))
|
||||||
|
|
|
@ -75,63 +75,23 @@
|
||||||
#' # (X x {A1, A2, A3, A4}) x {B1, B2, B3, B4} = X x {B1 A1, B2 A2, B3 A3, B4 A4}
|
#' # (X x {A1, A2, A3, A4}) x {B1, B2, B3, B4} = X x {B1 A1, B2 A2, B3 A3, B4 A4}
|
||||||
#' all.equal(mlm(mlm(X, As), Bs), mlm(X, Map(`%*%`, Bs, As)))
|
#' all.equal(mlm(mlm(X, As), Bs), mlm(X, Map(`%*%`, Bs, As)))
|
||||||
#'
|
#'
|
||||||
#' # Equivalent to
|
|
||||||
#' mlm_reference <- function(A, Bs, modes = seq_along(Bs), transposed = FALSE) {
|
|
||||||
#' # Collect all matrices in `B`
|
|
||||||
#' Bs <- if (is.matrix(Bs)) list(Bs) else Bs
|
|
||||||
#'
|
|
||||||
#' # replicate transposition if of length one only
|
|
||||||
#' transposed <- if (length(transposed) == 1) {
|
|
||||||
#' rep(as.logical(transposed), length(Bs))
|
|
||||||
#' } else {
|
|
||||||
#' as.logical(transposed)
|
|
||||||
#' }
|
|
||||||
#'
|
|
||||||
#' # iteratively apply Tensor Times Matrix multiplication over modes
|
|
||||||
#' for (i in seq_along(modes)) {
|
|
||||||
#' A <- ttm(A, Bs[[i]], modes[i], transposed[i])
|
|
||||||
#' }
|
|
||||||
#'
|
|
||||||
#' # return result tensor
|
|
||||||
#' A
|
|
||||||
#' }
|
|
||||||
#'
|
|
||||||
#' @export
|
#' @export
|
||||||
mlm <- function(A, Bs, modes = seq_along(Bs), transposed = FALSE) {
|
mlm <- function(A, Bs, modes = seq_along(Bs), transposed = FALSE) {
|
||||||
# Collect all matrices in `B`
|
# Collect all matrices in `B`
|
||||||
Bs <- if (!is.list(Bs)) list(Bs) else Bs
|
Bs <- if (is.matrix(Bs)) list(Bs) else Bs
|
||||||
# ensure all `B`s are matrices
|
|
||||||
Bs <- Map(as.matrix, Bs)
|
|
||||||
|
|
||||||
# replicate transposition if of length one only
|
# replicate transposition if of length one only
|
||||||
transposed <- if (length(transposed) == 1) {
|
transposed <- if (length(transposed) == 1) {
|
||||||
rep(as.logical(transposed), length(Bs))
|
rep(as.logical(transposed), length(Bs))
|
||||||
} else if (length(transposed) == length(modes)) {
|
|
||||||
as.logical(transposed)
|
|
||||||
} else {
|
} else {
|
||||||
stop("Dim missmatch of param. `transposed`")
|
as.logical(transposed)
|
||||||
}
|
}
|
||||||
|
|
||||||
.Call("C_mlm", A, Bs, as.integer(modes), transposed, PACKAGE = "tensorPredictors")
|
# iteratively apply Tensor Times Matrix multiplication over modes
|
||||||
|
for (i in seq_along(modes)) {
|
||||||
|
A <- ttm(A, Bs[[i]], modes[i], transposed[i])
|
||||||
|
}
|
||||||
|
|
||||||
|
# return result tensor
|
||||||
|
A
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
# # general usage
|
|
||||||
# dimA <- c(3, 17, 19, 2)
|
|
||||||
# dimC <- c(7, 11, 13, 5)
|
|
||||||
# A <- array(rnorm(prod(dimA)), dim = dimA)
|
|
||||||
# trans <- c(TRUE, FALSE, TRUE, FALSE)
|
|
||||||
# Bs <- Map(function(p, q) matrix(rnorm(p * q), p, q), ifelse(trans, dimA, dimC), ifelse(trans, dimC, dimA))
|
|
||||||
|
|
||||||
# C <- mlm(A, Bs, transposed = trans)
|
|
||||||
# mlm(A, Bs[c(3, 2)], modes = c(3, 2), transposed = trans[c(3, 2)])
|
|
||||||
|
|
||||||
# microbenchmark::microbenchmark(
|
|
||||||
# mlm(A, Bs, transposed = trans),
|
|
||||||
# mlm_reference(A, Bs, transposed = trans)
|
|
||||||
# )
|
|
||||||
|
|
||||||
# microbenchmark::microbenchmark(
|
|
||||||
# mlm(A, Bs[c(3, 2)], modes = c(3, 2), transposed = trans[c(3, 2)]),
|
|
||||||
# mlm_reference(A, Bs[c(3, 2)], modes = c(3, 2), transposed = trans[c(3, 2)])
|
|
||||||
# )
|
|
||||||
|
|
|
@ -1,14 +1,14 @@
|
||||||
#' Duplication Matrix
|
#' Duplication Matrix
|
||||||
#'
|
#'
|
||||||
#' Matrix `D` such that `vec(A) = D vech(A)` for `A` symmetric
|
#' Matrix such that `vec(A) = D vech(A)` for `A` symmetric
|
||||||
#' @examples
|
#' @examples
|
||||||
#' p <- 8
|
#' p <- 8
|
||||||
#' A <- matrix(rnorm(p^2), p, p)
|
#' A <- matrix(rnorm(p^2), p, p)
|
||||||
#' A <- A + t(A)
|
#' A <- A + t(A)
|
||||||
#' stopifnot(all.equal(c(Dup(nrow(A)) %*% vech(A)), c(A)))
|
#' stopifnot(all.equal(c(D(nrow(A)) %*% vech(A)), c(A)))
|
||||||
#'
|
#'
|
||||||
#' @export
|
#' @export
|
||||||
Dup <- function(p) {
|
D <- function(p) {
|
||||||
# setup `vec` and `vech` element indices (zero indexed)
|
# setup `vec` and `vech` element indices (zero indexed)
|
||||||
vec <- matrix(NA_integer_, p, p)
|
vec <- matrix(NA_integer_, p, p)
|
||||||
vec[lower.tri(vec, diag = TRUE)] <- seq_len(p * (p + 1) / 2)
|
vec[lower.tri(vec, diag = TRUE)] <- seq_len(p * (p + 1) / 2)
|
||||||
|
@ -27,10 +27,10 @@ Dup <- function(p) {
|
||||||
#'
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
#' p <- 5
|
#' p <- 5
|
||||||
#' stopifnot(all.equal(Dup(p) %*% Dup.pinv(p), N(p)))
|
#' stopifnot(all.equal(D(p) %*% D.pinv(p), N(p)))
|
||||||
#'
|
#'
|
||||||
#' @export
|
#' @export
|
||||||
Dup.pinv <- function(p) {
|
D.pinv <- function(p) {
|
||||||
Dp <- D(p)
|
Dp <- D(p)
|
||||||
solve(crossprod(Dp), t(Dp))
|
solve(crossprod(Dp), t(Dp))
|
||||||
}
|
}
|
||||||
|
@ -119,7 +119,7 @@ K <- function(dim, mode) {
|
||||||
#'
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
#' p <- 7
|
#' p <- 7
|
||||||
#' stopifnot(all.equal(N(p), Dup(p) %*% Dup.pinv(p)))
|
#' stopifnot(all.equal(N(p), D(p) %*% D.pinv(p)))
|
||||||
#'
|
#'
|
||||||
#' @export
|
#' @export
|
||||||
N <- function(p) {
|
N <- function(p) {
|
||||||
|
|
|
@ -11,34 +11,6 @@
|
||||||
#' \code{mode} dimension equal to \code{nrow(M)} or \code{ncol(M)} if
|
#' \code{mode} dimension equal to \code{nrow(M)} or \code{ncol(M)} if
|
||||||
#' \code{transposed} is true.
|
#' \code{transposed} is true.
|
||||||
#'
|
#'
|
||||||
#' @examples
|
|
||||||
#' for (mode in 1:4) {
|
|
||||||
#' dimA <- sample.int(10, 4, replace = TRUE)
|
|
||||||
#' A <- array(rnorm(prod(dimA)), dim = dimA)
|
|
||||||
#' nrowB <- sample.int(10, 1)
|
|
||||||
#' B <- matrix(rnorm(nrowB * dimA[mode]), nrowB)
|
|
||||||
#'
|
|
||||||
#' C <- ttm(A, B, mode)
|
|
||||||
#'
|
|
||||||
#' dimC <- ifelse(seq_along(dims) != mode, dimA, nrowB)
|
|
||||||
#' C.ref <- mat(B %*% mat(A, mode), mode, dims = dimC, inv = TRUE)
|
|
||||||
#'
|
|
||||||
#' stopifnot(all.equal(C, C.ref))
|
|
||||||
#' }
|
|
||||||
#'
|
|
||||||
#' for (mode in 1:4) {
|
|
||||||
#' dimA <- sample.int(10, 4, replace = TRUE)
|
|
||||||
#' A <- array(rnorm(prod(dimA)), dim = dimA)
|
|
||||||
#' ncolB <- sample.int(10, 1)
|
|
||||||
#' B <- matrix(rnorm(dimA[mode] * ncolB), dimA[mode])
|
|
||||||
#'
|
|
||||||
#' C <- ttm(A, B, mode, transposed = TRUE)
|
|
||||||
#'
|
|
||||||
#' C.ref <- ttm(A, t(B), mode)
|
|
||||||
#'
|
|
||||||
#' stopifnot(all.equal(C, C.ref))
|
|
||||||
#' }
|
|
||||||
#'
|
|
||||||
#' @export
|
#' @export
|
||||||
ttm <- function(T, M, mode = length(dim(T)), transposed = FALSE) {
|
ttm <- function(T, M, mode = length(dim(T)), transposed = FALSE) {
|
||||||
storage.mode(T) <- storage.mode(M) <- "double"
|
storage.mode(T) <- storage.mode(M) <- "double"
|
||||||
|
|
|
@ -31,52 +31,3 @@ vech.pinv <- function(a) {
|
||||||
# de-vectorized matrix
|
# de-vectorized matrix
|
||||||
matrix(a[vech.pinv.index(p)], p, p)
|
matrix(a[vech.pinv.index(p)], p, p)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
# vech <- function(A) {
|
|
||||||
# stopifnot(all(nrow(A) == dim(A)))
|
|
||||||
|
|
||||||
# axis <- seq_len(nrow(A))
|
|
||||||
# grid <- expand.grid(c(list(rev(axis)), rep(list(axis), length(dim(A)) - 1)))
|
|
||||||
|
|
||||||
# A[rowSums(grid - 1) < nrow(A)]
|
|
||||||
# }
|
|
||||||
|
|
||||||
# p <- 4
|
|
||||||
# X <- matrix(rnorm(p^2), p)
|
|
||||||
|
|
||||||
# vech(outer(X, X))
|
|
||||||
|
|
||||||
# vech(outer(c(X), c(X)))
|
|
||||||
|
|
||||||
# sort(unique(c(sym(outer(X, X)))))
|
|
||||||
# sort(vech(sym(outer(X, X))))
|
|
||||||
|
|
||||||
# # (I <- matrix(c(
|
|
||||||
# # 1, 1, 1,
|
|
||||||
# # 2, 1, 1,
|
|
||||||
# # 3, 1, 1,
|
|
||||||
# # 2, 2, 1,
|
|
||||||
# # 3, 2, 1,
|
|
||||||
# # 3, 3, 1,
|
|
||||||
# # 2, 1, 2,
|
|
||||||
# # 3, 1, 2,
|
|
||||||
# # 3, 2, 2,
|
|
||||||
# # 3, 1, 3
|
|
||||||
# # ), ncol = 3, byrow = TRUE))
|
|
||||||
|
|
||||||
# # ((I - 1) %*% nrow(A)^(seq_along(dim(A)) - 1) + 1)
|
|
||||||
|
|
||||||
# # p <- 3
|
|
||||||
# # ord <- 4
|
|
||||||
# # A <- array(seq_len(p^ord), rep(p, ord))
|
|
||||||
|
|
||||||
# # axis <- seq_len(nrow(A))
|
|
||||||
# # grid <- expand.grid(c(list(rev(axis)), rep(list(axis), length(dim(A)) - 1)))
|
|
||||||
# # array(rowSums(grid - 1), dim(A))
|
|
||||||
|
|
||||||
# # A[rowSums(grid - 1) < nrow(A)]
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# # apply(indices, 1, function(i) do.call(`[`, c(list(A), as.list(i))))
|
|
||||||
|
|
|
@ -1,3 +1,2 @@
|
||||||
|
|
||||||
PKG_LIBS = $(BLAS_LIBS) $(LAPACK_LIBS) $(FLIBS)
|
PKG_LIBS = $(BLAS_LIBS) $(FLIBS)
|
||||||
# PKG_CFLAGS = -pg
|
|
||||||
|
|
|
@ -1,138 +0,0 @@
|
||||||
#ifndef INCLUDE_GUARD_R_API_H
|
|
||||||
#define INCLUDE_GUARD_R_API_H
|
|
||||||
|
|
||||||
// The need for `USE_FC_LEN_T` and `FCONE` is due to a Fortran character string
|
|
||||||
// to C incompatibility. See: Writing R Extentions: 6.6.1 Fortran character strings
|
|
||||||
#define USE_FC_LEN_T
|
|
||||||
// Disables remapping of R API functions from `Rf_<name>` or `R_<name>`
|
|
||||||
#define R_NO_REMAP
|
|
||||||
|
|
||||||
#include <stdint.h> // uint32_t, uint64_t, ...
|
|
||||||
|
|
||||||
#include <R.h>
|
|
||||||
#include <Rinternals.h>
|
|
||||||
#include <R_ext/BLAS.h>
|
|
||||||
#include <R_ext/Lapack.h>
|
|
||||||
|
|
||||||
#ifndef FCONE
|
|
||||||
#define FCONE
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// NULL pointer (not defined memory, array, vector, ...)
|
|
||||||
#ifndef NULL
|
|
||||||
#define NULL ((void*)0)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Truth values
|
|
||||||
#ifndef FALSE
|
|
||||||
#define FALSE 0
|
|
||||||
#endif
|
|
||||||
#ifndef TRUE
|
|
||||||
#define TRUE 1
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Convenience convertion function similar to `asInteger`, `asReal`, ...
|
|
||||||
#define NA_UNSIGNED (-((unsigned long)1))
|
|
||||||
static inline unsigned long asUnsigned(SEXP _val) {
|
|
||||||
int val = Rf_asInteger(_val);
|
|
||||||
if (val == NA_INTEGER || val < 0) {
|
|
||||||
return NA_UNSIGNED;
|
|
||||||
}
|
|
||||||
return (unsigned long)val;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// Remap BLAS and LAPACK bindings (being consistent with my own interface and I
|
|
||||||
// don't like to pass scalars by reference (memory address))
|
|
||||||
/** y <- a x + y */
|
|
||||||
static inline void axpy(
|
|
||||||
const int dim,
|
|
||||||
const double a,
|
|
||||||
const double* x, const int incx,
|
|
||||||
double* y, const int incy
|
|
||||||
) {
|
|
||||||
F77_CALL(daxpy)(&dim, &a, x, &incx, y, &incy);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Scale a 1d array `x <- a x` */
|
|
||||||
static inline void scale(
|
|
||||||
const int dim,
|
|
||||||
const double a,
|
|
||||||
double *x, const int incx
|
|
||||||
) {
|
|
||||||
F77_CALL(dscal)(&dim, &a, x, &incx);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Dot product */
|
|
||||||
static inline double dot(
|
|
||||||
const int dim,
|
|
||||||
const double* x, const int incx,
|
|
||||||
const double* y, const int incy
|
|
||||||
) {
|
|
||||||
return F77_CALL(ddot)(&dim, x, &incx, y, &incy);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** 1d array linear combination `z <- a x + b y` */
|
|
||||||
// TODO: optimize! (iff needed?, to optimize of at all?)
|
|
||||||
static inline void lincomb(
|
|
||||||
const int dim,
|
|
||||||
const double a, const double* x, const int incx,
|
|
||||||
const double b, const double* y, const int incy,
|
|
||||||
double* z, const int incz
|
|
||||||
) {
|
|
||||||
for (int i = 0; i < dim; ++i) {
|
|
||||||
z[i * incz] = a * x[i * incx] + b * y[i * incy];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A sufficient Pseudo-Random-Number-Generators (PRNG) of the Xorshift family
|
|
||||||
*
|
|
||||||
* With parameterized seed/state this custom PRGN can be used in a thread save!
|
|
||||||
*
|
|
||||||
* For single threaded operations the PRNG provided by `R` are prefered. But they
|
|
||||||
* are _not_ thread save. The following is a simple PRNG usable in a multi-threaded
|
|
||||||
* application.
|
|
||||||
*
|
|
||||||
* See TODO: ...https://en.wikipedia.org/wiki/Xorshift
|
|
||||||
* SchachHoernchen
|
|
||||||
*/
|
|
||||||
static inline uint64_t rot64(uint64_t val, int shift) {
|
|
||||||
return (val << shift) | (val >> (64 - shift));
|
|
||||||
}
|
|
||||||
|
|
||||||
// (internal) PRGN state/seed type
|
|
||||||
typedef uint64_t rng_seed_t[4];
|
|
||||||
|
|
||||||
// Hookup the PRNG via its seed to R's random number generation utilities
|
|
||||||
static inline void init_seed(rng_seed_t seed) {
|
|
||||||
GetRNGstate();
|
|
||||||
for (size_t i = 0; i < 4; ++i) {
|
|
||||||
seed[i] = 0;
|
|
||||||
for (size_t j = 0; j < 64; ++j) {
|
|
||||||
seed[i] |= ((uint64_t)(unif_rand() < 0.5)) << j;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
PutRNGstate();
|
|
||||||
}
|
|
||||||
|
|
||||||
// PRNG of the Xorshift family
|
|
||||||
// The least significant 32 bits are not reliable, use most significant 32 bits
|
|
||||||
static inline uint64_t rand_u64(rng_seed_t seed) {
|
|
||||||
uint64_t e = seed[0] - rot64(seed[1], 7);
|
|
||||||
seed[0] = seed[1] ^ rot64(seed[1], 13);
|
|
||||||
seed[1] = seed[2] + rot64(seed[3], 37);
|
|
||||||
seed[2] = seed[3] + e;
|
|
||||||
seed[3] = e + seed[0];
|
|
||||||
return seed[3];
|
|
||||||
}
|
|
||||||
|
|
||||||
// With external supplied seed, every thread can have its own seed and as such
|
|
||||||
// we can use this as a thread save alternative to R's `unif_rand()`.
|
|
||||||
static inline double unif_rand_thrd(rng_seed_t seed) {
|
|
||||||
return ((double)(rand_u64(seed) >> 32)) / (double)(-(uint32_t)1);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
#endif /* INCLUDE_GUARD_R_API_H */
|
|
|
@ -1,185 +0,0 @@
|
||||||
#ifndef INCLUDE_GUARD_BIT_UTILS_H
|
|
||||||
#define INCLUDE_GUARD_BIT_UTILS_H
|
|
||||||
|
|
||||||
#include <stdint.h> // uint32_t, uint64_t
|
|
||||||
|
|
||||||
#if (defined(__GNUC__) && defined(__BMI2__))
|
|
||||||
#include <x86intrin.h>
|
|
||||||
#include <bmi2intrin.h> // _pdep_u32
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
|
||||||
#include <intrin.h> // _BitScanReverse
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Computes the parity of a word (0 for even bit count and 1 otherwise)
|
|
||||||
*/
|
|
||||||
#ifdef __GNUC__
|
|
||||||
static inline int bitParity32(uint32_t x) { return __builtin_parity(x); }
|
|
||||||
static inline int bitParity64(uint64_t x) { return __builtin_parityll(x); }
|
|
||||||
#else
|
|
||||||
static inline int bitParity32(uint32_t x) {
|
|
||||||
int p = (x != 0);
|
|
||||||
while (x &= x - 1) { p = !p; }
|
|
||||||
return p;
|
|
||||||
}
|
|
||||||
static inline int bitParity64(uint64_t x) {
|
|
||||||
int p = (x != 0);
|
|
||||||
while (x &= x - 1) { p = !p; }
|
|
||||||
return p;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Counts the number of set bits (`1`s in binary) in the number `x`
|
|
||||||
*/
|
|
||||||
#ifdef __GNUC__ /* POPulation COUNT */
|
|
||||||
static inline int bitCount32(uint32_t x) { return __builtin_popcount(x); }
|
|
||||||
static inline int bitCount64(uint64_t x) { return __builtin_popcountll(x); }
|
|
||||||
#else
|
|
||||||
static inline int bitCount32(uint32_t x) {
|
|
||||||
int count = 0; // counts set bits
|
|
||||||
for (; x; count++) { x &= x - 1; } // increment count until there are no bits set in x
|
|
||||||
return count;
|
|
||||||
}
|
|
||||||
static inline int bitCount64(uint64_t x) {
|
|
||||||
int count = 0; // counts set bits
|
|
||||||
for (; x; count++) { x &= x - 1; } // increment count until there are no bits set in x
|
|
||||||
return count;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets the index of the LSB (least significant bit)
|
|
||||||
*
|
|
||||||
* @condition `x != 0`, for `x == 0` undefined behaviour
|
|
||||||
*/
|
|
||||||
#ifdef __GNUC__
|
|
||||||
static inline int bitScanLS32(uint32_t x) { return __builtin_ctz(x); } // Count Trailing Zeros
|
|
||||||
static inline int bitScanLS64(uint64_t x) { return __builtin_ctzll(x); }
|
|
||||||
#elif _MSC_VER
|
|
||||||
static inline int bitScanLS32(uint32_t x) {
|
|
||||||
unsigned long bsr;
|
|
||||||
_BitScanReverse(&bsr, x);
|
|
||||||
return 31 - bsr;
|
|
||||||
}
|
|
||||||
static inline int bitScanLS64(uint64_t x) {
|
|
||||||
unsigned long bsr;
|
|
||||||
_BitScanReverse64(&bsr, x);
|
|
||||||
return 63 - bsr;
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
static inline int bitScanLS32(uint32_t x) {
|
|
||||||
int ctz = 0; // result storing the Count of Trailing Zeros
|
|
||||||
bool empty; // boolean variable storing if a bit has not found (search area is empty)
|
|
||||||
|
|
||||||
// logarithmic search for LSB bit index (-1)
|
|
||||||
ctz += (empty = !(x & (uint32_t)(65535))) << 4;
|
|
||||||
x >>= 16 * empty;
|
|
||||||
ctz += (empty = !(x & (uint32_t)( 255))) << 3;
|
|
||||||
x >>= 8 * empty;
|
|
||||||
ctz += (empty = !(x & (uint32_t)( 15))) << 2;
|
|
||||||
x >>= 4 * empty;
|
|
||||||
ctz += (empty = !(x & (uint32_t)( 3))) << 1;
|
|
||||||
x >>= 2 * empty;
|
|
||||||
ctz += (empty = !(x & (uint32_t)( 1)));
|
|
||||||
|
|
||||||
return ctz;
|
|
||||||
}
|
|
||||||
static inline int bitScanLS64(uint64_t x) {
|
|
||||||
int ctz = 0;
|
|
||||||
bool empty;
|
|
||||||
|
|
||||||
// logarithmic search for LSB bit index (-1)
|
|
||||||
ctz += (empty = !(x & (uint64_t)(4294967295))) << 5;
|
|
||||||
x >>= 32 * empty;
|
|
||||||
ctz += (empty = !(x & (uint64_t)( 65535))) << 4;
|
|
||||||
x >>= 16 * empty;
|
|
||||||
ctz += (empty = !(x & (uint64_t)( 255))) << 3;
|
|
||||||
x >>= 8 * empty;
|
|
||||||
ctz += (empty = !(x & (uint64_t)( 15))) << 2;
|
|
||||||
x >>= 4 * empty;
|
|
||||||
ctz += (empty = !(x & (uint64_t)( 3))) << 1;
|
|
||||||
x >>= 2 * empty;
|
|
||||||
ctz += (empty = !(x & (uint64_t)( 1)));
|
|
||||||
|
|
||||||
return ctz;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Parallel DEPosit (aka PDEP)
|
|
||||||
*
|
|
||||||
* Writes the `val` bits into the positions of the set bits in `mask`.
|
|
||||||
*
|
|
||||||
* Example:
|
|
||||||
* val: **** **** **** 1.1.
|
|
||||||
* mask: 1... 1... 1... 1...
|
|
||||||
* res: 1... .... 1... ....
|
|
||||||
*/
|
|
||||||
#if (defined(__GNUC__) && defined(__BMI2__))
|
|
||||||
static inline uint32_t bitDeposit32(uint32_t val, uint32_t mask) {
|
|
||||||
return _pdep_u32(val, mask);
|
|
||||||
}
|
|
||||||
static inline uint64_t bitDeposit64(uint64_t val, uint64_t mask) {
|
|
||||||
return _pdep_u64(val, mask);
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
static inline uint32_t bitDeposit32(uint32_t val, uint32_t mask) {
|
|
||||||
uint32_t res = 0;
|
|
||||||
for (uint32_t pos = 1; mask; pos <<= 1) {
|
|
||||||
if (val & pos) {
|
|
||||||
res |= mask & -mask;
|
|
||||||
}
|
|
||||||
mask &= mask - 1;
|
|
||||||
}
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
static inline uint64_t bitDeposit64(uint64_t val, uint64_t mask) {
|
|
||||||
uint64_t res = 0;
|
|
||||||
for (uint64_t pos = 1; mask; pos <<= 1) {
|
|
||||||
if (val & pos) {
|
|
||||||
res |= mask & -mask;
|
|
||||||
}
|
|
||||||
mask &= mask - 1;
|
|
||||||
}
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets the next lexicographically ordered permutation of an n-bit word.
|
|
||||||
*
|
|
||||||
* Let `val` be a bit-word with `n` bits set, then this procedire computes a
|
|
||||||
* `n` bit word wich is the next element in the lexicographically ordered
|
|
||||||
* sequence of `n` bit words. For example
|
|
||||||
*
|
|
||||||
* val -> bitNextPerm(val)
|
|
||||||
* 00010011 -> 00010101
|
|
||||||
* 00010101 -> 00010110
|
|
||||||
* 00010110 -> 00011001
|
|
||||||
* 00011001 -> 00011010
|
|
||||||
* 00011010 -> 00011100
|
|
||||||
* 00011100 -> 00100011
|
|
||||||
*
|
|
||||||
* @condition `x != 0`, for `x == 0` undefined behaviour due to `bitScanLS`
|
|
||||||
*
|
|
||||||
* see: https://graphics.stanford.edu/~seander/bithacks.html#NextBitPermutation
|
|
||||||
*/
|
|
||||||
static inline uint32_t bitNextPerm32(uint32_t val) {
|
|
||||||
// Sets all least significant 0-bits of val to 1
|
|
||||||
uint32_t t = val | (val - 1);
|
|
||||||
// Next set to 1 the most significant bit to change,
|
|
||||||
// set to 0 the least significant ones, and add the necessary 1 bits.
|
|
||||||
return (t + 1) | (((~t & -~t) - 1) >> (bitScanLS32(val) + 1));
|
|
||||||
}
|
|
||||||
static inline uint64_t bitNextPerm64(uint64_t val) {
|
|
||||||
// Sets all least significant 0-bits of val to 1
|
|
||||||
uint64_t t = val | (val - 1);
|
|
||||||
// Next set to 1 the most significant bit to change,
|
|
||||||
// set to 0 the least significant ones, and add the necessary 1 bits.
|
|
||||||
return (t + 1) | (((~t & -~t) - 1) >> (bitScanLS64(val) + 1));
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* BIT_UTILS_INCLUDE_GUARD_H */
|
|
|
@ -7,96 +7,27 @@
|
||||||
// );
|
// );
|
||||||
|
|
||||||
/* Tensor Times Matrix a.k.a. Mode Product */
|
/* Tensor Times Matrix a.k.a. Mode Product */
|
||||||
extern SEXP R_ttm(SEXP A, SEXP X, SEXP mode, SEXP op);
|
extern SEXP ttm(SEXP A, SEXP X, SEXP mode, SEXP op);
|
||||||
|
|
||||||
/* Multi Linear Multiplication (iterated mode products) */
|
|
||||||
extern SEXP R_mlm(SEXP A, SEXP Bs, SEXP modes, SEXP ops);
|
|
||||||
|
|
||||||
/* Matrix Times Vectorized Kronecker product `A vec(B_1 %x% ... %x% B_r)` */
|
/* Matrix Times Vectorized Kronecker product `A vec(B_1 %x% ... %x% B_r)` */
|
||||||
extern SEXP mtvk(SEXP A, SEXP Bs);
|
extern SEXP mtvk(SEXP A, SEXP Bs);
|
||||||
|
|
||||||
/* Tensor Mode Crossproduct `A_(m) B_(m)^T` */
|
/* Tensor Mode Crossproduct `A_(m) B_(m)^T` */
|
||||||
extern SEXP R_mcrossprod(SEXP A, SEXP B, SEXP mode);
|
extern SEXP mcrossprod(SEXP A, SEXP B, SEXP mode);
|
||||||
/* Symmetric Tensor Mode Crossproduct `A_(m) A_(m)^T` */
|
/* Symmetric Tensor Mode Crossproduct `A_(m) A_(m)^T` */
|
||||||
extern SEXP R_mcrossprod_sym(SEXP A, SEXP mode);
|
extern SEXP mcrossprod_sym(SEXP A, SEXP mode);
|
||||||
|
|
||||||
// /* Higher Order PCA */
|
|
||||||
// extern SEXP hopca(SEXP X);
|
|
||||||
|
|
||||||
/* Singular Value Decomposition */
|
|
||||||
extern SEXP R_svd(SEXP A);
|
|
||||||
|
|
||||||
// /* Iterative Cyclic Updating for the Tensor Normal Distribution */
|
|
||||||
// extern SEXP R_icu_tensor_normal(SEXP X, SEXP Fy, SEXP max_iter);
|
|
||||||
|
|
||||||
// /* Generalized tensor normal using NAGD */
|
|
||||||
// extern SEXP R_gmlm_tensor_normal(
|
|
||||||
// SEXP X, SEXP Fy,
|
|
||||||
// SEXP eta_bar, SEXP alphas, SEXP Omegas,
|
|
||||||
// SEXP max_iter, SEXP max_line_iter
|
|
||||||
// );
|
|
||||||
|
|
||||||
/* Solve linear equation system A X = B */
|
|
||||||
extern SEXP R_solve(SEXP A, SEXP B);
|
|
||||||
|
|
||||||
/* Determinant of a matrix */
|
|
||||||
extern SEXP R_det(SEXP A);
|
|
||||||
|
|
||||||
// /* Unscaled PMF of the Ising model with natural parameters `_params` */
|
|
||||||
// extern SEXP R_unscaled_prob(SEXP _y, SEXP _params);
|
|
||||||
|
|
||||||
// /* Exact computation of the partition function of the Ising model with natural
|
|
||||||
// parameters `_params` */
|
|
||||||
// extern SEXP R_ising_partition_func_exact(SEXP _params);
|
|
||||||
|
|
||||||
// /* Estimated partition function of the Ising model with natural parameters `_params` */
|
|
||||||
// extern SEXP R_ising_partition_func_MC(SEXP _params);
|
|
||||||
|
|
||||||
// /* Exact computation of the partition function of the Ising model with natural
|
|
||||||
// parameters `_params` */
|
|
||||||
// extern SEXP R_ising_m2_exact(SEXP _params);
|
|
||||||
|
|
||||||
// // extern SEXP R_ising_m2_MC64(SEXP _params);
|
|
||||||
// extern SEXP R_ising_m2_MC(SEXP _params, SEXP _nr_samples, SEXP _warmup);
|
|
||||||
// extern SEXP R_ising_m2_MC_thrd(SEXP _params, SEXP _nr_threads);
|
|
||||||
|
|
||||||
/* Sample from the Ising model */
|
|
||||||
extern SEXP R_ising_sample(SEXP, SEXP, SEXP);
|
|
||||||
|
|
||||||
// Interface to the Ising second moment function
|
|
||||||
extern SEXP R_ising_m2(SEXP, SEXP, SEXP, SEXP, SEXP);
|
|
||||||
|
|
||||||
/* List of registered routines (a.k.a. C entry points) */
|
/* List of registered routines (a.k.a. C entry points) */
|
||||||
static const R_CallMethodDef CallEntries[] = {
|
static const R_CallMethodDef CallEntries[] = {
|
||||||
{"C_ttm", (DL_FUNC) &R_ttm, 4},
|
// {"FastPOI_C_sub", (DL_FUNC) &FastPOI_C_sub, 5}, // NOT USED
|
||||||
{"C_mlm", (DL_FUNC) &R_mlm, 4},
|
{"C_ttm", (DL_FUNC) &ttm, 4},
|
||||||
{"C_mtvk", (DL_FUNC) &mtvk, 2},
|
{"C_mtvk", (DL_FUNC) &mtvk, 2},
|
||||||
{"C_mcrossprod", (DL_FUNC) &R_mcrossprod, 3},
|
{"C_mcrossprod", (DL_FUNC) &mcrossprod, 3},
|
||||||
{"C_mcrossprod_sym", (DL_FUNC) &R_mcrossprod_sym, 2},
|
{"C_mcrossprod_sym", (DL_FUNC) &mcrossprod_sym, 2},
|
||||||
{"C_svd", (DL_FUNC) &R_svd, 1},
|
|
||||||
{"C_solve", (DL_FUNC) &R_solve, 2},
|
|
||||||
{"C_det", (DL_FUNC) &R_det, 1},
|
|
||||||
{"C_ising_sample", (DL_FUNC) &R_ising_sample, 3},
|
|
||||||
{"C_ising_m2", (DL_FUNC) &R_ising_m2, 5},
|
|
||||||
// {"C_unscaled_prob", (DL_FUNC) &R_unscaled_prob, 2},
|
|
||||||
// {"C_ising_partition_func_MC", (DL_FUNC) &R_ising_partition_func_MC, 1},
|
|
||||||
// {"C_ising_partition_func_exact", (DL_FUNC) &R_ising_partition_func_exact, 1},
|
|
||||||
// {"C_ising_m2_exact", (DL_FUNC) &R_ising_m2_exact, 1},
|
|
||||||
// {"C_ising_m2_MC", (DL_FUNC) &R_ising_m2_MC, 3},
|
|
||||||
// {"C_ising_m2_MC_thrd", (DL_FUNC) &R_ising_m2_MC_thrd, 2},
|
|
||||||
// {"C_ising_m2_MC64", (DL_FUNC) &R_ising_m2_MC64, 1},
|
|
||||||
// {"FastPOI_C_sub", (DL_FUNC) &FastPOI_C_sub, 5}, // NOT USED
|
|
||||||
// {"C_hopca", (DL_FUNC) &hopca, 1},
|
|
||||||
// {"C_icu_tensor_normal", (DL_FUNC) &R_icu_tensor_normal, 3}, // NOT USED / IN DEVELOPMENT
|
|
||||||
// {"C_gmlm_tensor_normal", (DL_FUNC) &R_gmlm_tensor_normal, 7}, // NOT USED / IN DEVELOPMENT
|
|
||||||
{NULL, NULL, 0}
|
{NULL, NULL, 0}
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/* Restrict C entry points to registered routines. */
|
||||||
* Restrict C entry points to registered routines.
|
|
||||||
*
|
|
||||||
* NOTE: Naming convention: `R_init_<PACKAGE-NAME>`
|
|
||||||
*/
|
|
||||||
void R_init_tensorPredictors(DllInfo *dll) {
|
void R_init_tensorPredictors(DllInfo *dll) {
|
||||||
R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
|
R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
|
||||||
R_useDynamicSymbols(dll, FALSE);
|
R_useDynamicSymbols(dll, FALSE);
|
||||||
|
|
|
@ -1,77 +0,0 @@
|
||||||
#ifndef INCLUDE_GUARD_INT_UTILS_H
|
|
||||||
#define INCLUDE_GUARD_INT_UTILS_H
|
|
||||||
|
|
||||||
#include <stdint.h> // uint32_t, uint64_t
|
|
||||||
|
|
||||||
|
|
||||||
// /**
|
|
||||||
// * Integer logarithm, the biggest power `p` such that `2^p <= x`.
|
|
||||||
// */
|
|
||||||
// static int ilog2(uint64_t x) {
|
|
||||||
// int log = 0;
|
|
||||||
// while (x >>= 1) {
|
|
||||||
// log++;
|
|
||||||
// }
|
|
||||||
// return log;
|
|
||||||
// }
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Integer Square root of `y`, that is `ceiling(sqrt(y))`
|
|
||||||
*/
|
|
||||||
static uint64_t isqrt(uint64_t x) {
|
|
||||||
// implements a binary search
|
|
||||||
uint64_t root = 0;
|
|
||||||
uint64_t left = 0; // left boundary
|
|
||||||
uint64_t right = x + 1; // right boundary
|
|
||||||
|
|
||||||
while(left + 1UL < right) {
|
|
||||||
root = (left + right) / 2UL;
|
|
||||||
if (root * root <= x) {
|
|
||||||
left = root;
|
|
||||||
} else {
|
|
||||||
right = root;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return left;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Inverse to the triangular numbers
|
|
||||||
*
|
|
||||||
* Given a positive number `x = p (p + 1) / 2` it computes `p` if possible.
|
|
||||||
* In case there is no positive integer solution `0` is returned.
|
|
||||||
*
|
|
||||||
* Note: this follows immediately from the quadratic equation.
|
|
||||||
*/
|
|
||||||
static uint32_t invTriag(uint32_t x) {
|
|
||||||
uint64_t root = isqrt(8UL * (uint64_t)x + 1UL);
|
|
||||||
if (root * root != 8UL * x + 1UL) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
return (root - 1) / 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// /**
|
|
||||||
// * Number of sub-sets (including empty set) of max-size
|
|
||||||
// *
|
|
||||||
// * It computes, with `binom` beeing the binomial coefficient, the following sum
|
|
||||||
// *
|
|
||||||
// * sum_{i = 0}^k binom(n, i)
|
|
||||||
// */
|
|
||||||
// static uint64_t nrSubSets(uint64_t n, uint64_t k) {
|
|
||||||
// uint64_t sum = 1, binom = 1;
|
|
||||||
|
|
||||||
// for (uint64_t i = 1; i <= k; ++i) {
|
|
||||||
// binom *= n--;
|
|
||||||
// binom /= i;
|
|
||||||
// sum += binom;
|
|
||||||
// }
|
|
||||||
|
|
||||||
// return sum;
|
|
||||||
// }
|
|
||||||
|
|
||||||
|
|
||||||
#endif /* INCLUDE_GUARD_INT_UTILS_H */
|
|
|
@ -1,181 +0,0 @@
|
||||||
#ifndef INCLUDE_GUARD_ISING_MCMC_H
|
|
||||||
#define INCLUDE_GUARD_ISING_MCMC_H
|
|
||||||
|
|
||||||
#include "R_api.h"
|
|
||||||
|
|
||||||
/** Sample a single value from the Ising model via a Monte-Carlo Markov-Chain
|
|
||||||
* method given the Ising model parameters in compact half-vectorized form.
|
|
||||||
*
|
|
||||||
* f(x) = p0(params) exp(vech(x x')' params)
|
|
||||||
*
|
|
||||||
* @important requires to be wrapped between `GetRNGstate()` and `PutRNGState()`
|
|
||||||
* calls. For example
|
|
||||||
*
|
|
||||||
* GetRNGstate()
|
|
||||||
* for (size_t sample = 0; sample < nr_samples; ++sample) {
|
|
||||||
* ising_mcmc_vech(warmup, dim, dim, params, &X[sample * dim]);
|
|
||||||
* }
|
|
||||||
* PutRNGState()
|
|
||||||
*/
|
|
||||||
static inline void ising_mcmc_vech(
|
|
||||||
const size_t warmup,
|
|
||||||
const size_t dim, const double* params,
|
|
||||||
int* X
|
|
||||||
) {
|
|
||||||
// Initialize elements `X_i ~ Bernoulli(P(X_i = 1 | X_-i = 0))`
|
|
||||||
for (size_t i = 0, I = 0; i < dim; I += dim - i++) {
|
|
||||||
double invProb = 1.0 + exp(-params[I]);
|
|
||||||
X[i] = unif_rand() * invProb < 1.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Skip the first samples of the Markov-Chain (warmup)
|
|
||||||
for (size_t skip = 0; skip < warmup; ++skip) {
|
|
||||||
// For every component
|
|
||||||
for (size_t i = 0; i < dim; ++i) {
|
|
||||||
// Compute conditional probability `P(X_i = 1 | X_-i = x_-i)`
|
|
||||||
double log_odds = 0.0;
|
|
||||||
// Tracks position in half-vectorized storage
|
|
||||||
size_t J = i;
|
|
||||||
// Sum all log-odds for `j < i` (two way interactions)
|
|
||||||
for (size_t j = 0; j < i; J += dim - ++j) {
|
|
||||||
log_odds += X[j] ? params[J] : 0.0;
|
|
||||||
}
|
|
||||||
// Allways add log-odds for `i = j` (self-interaction)
|
|
||||||
log_odds += params[J];
|
|
||||||
// Continue adding log-odds for `j > i` (two way interactions)
|
|
||||||
J++;
|
|
||||||
for (size_t j = i + 1; j < dim; ++j, ++J) {
|
|
||||||
log_odds += X[j] ? params[J] : 0.0;
|
|
||||||
}
|
|
||||||
// Update `i`th element `X_i ~ Bernoulli(P(X_i = 1 | X_-i = x_-i))`
|
|
||||||
X[i] = unif_rand() * (1.0 + exp(-log_odds)) < 1.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Thread save version of `ising_mcmc` (using thread save PRNG)
|
|
||||||
static inline void ising_mcmc_vech_thrd(
|
|
||||||
const size_t warmup,
|
|
||||||
const size_t dim, const double* params,
|
|
||||||
int* X, rng_seed_t seed
|
|
||||||
) {
|
|
||||||
// Initialize elements `X_i ~ Bernoulli(P(X_i = 1 | X_-i = 0))`
|
|
||||||
for (size_t i = 0, I = 0; i < dim; I += dim - i++) {
|
|
||||||
double invProb = 1.0 + exp(-params[I]);
|
|
||||||
X[i] = unif_rand_thrd(seed) * invProb < 1.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Skip the first samples of the Markov-Chain (warmup)
|
|
||||||
for (size_t skip = 0; skip < warmup; ++skip) {
|
|
||||||
// For every component
|
|
||||||
for (size_t i = 0; i < dim; ++i) {
|
|
||||||
// Compute conditional probability `P(X_i = 1 | X_-i = x_-i)`
|
|
||||||
double log_odds = 0.0;
|
|
||||||
// Tracks position in half-vectorized storage
|
|
||||||
size_t J = i;
|
|
||||||
// Sum all log-odds for `j < i` (two way interactions)
|
|
||||||
for (size_t j = 0; j < i; J += dim - ++j) {
|
|
||||||
log_odds += X[j] ? params[J] : 0.0;
|
|
||||||
}
|
|
||||||
// Allways add log-odds for `i = j` (self-interaction)
|
|
||||||
log_odds += params[J];
|
|
||||||
// Continue adding log-odds for `j > i` (two way interactions)
|
|
||||||
J++;
|
|
||||||
for (size_t j = i + 1; j < dim; ++j, ++J) {
|
|
||||||
log_odds += X[j] ? params[J] : 0.0;
|
|
||||||
}
|
|
||||||
// Update `i`th element `X_i ~ Bernoulli(P(X_i = 1 | X_-i = x_-i))`
|
|
||||||
X[i] = unif_rand() * (1.0 + exp(-log_odds)) < 1.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Sample a single value from the Ising model via a Monte-Carlo Markov-Chain
|
|
||||||
* method given the Ising model parameters in symmetric matrix form `A`.
|
|
||||||
*
|
|
||||||
* f(x) = prob_0(A) exp(x' A x)
|
|
||||||
*
|
|
||||||
* The relation to the natural parameters `params` as used in the half-vectorized
|
|
||||||
* version `ising_mcmc_vech()`
|
|
||||||
*
|
|
||||||
* f(x) = prob_0(params) exp(vech(x x')' params)
|
|
||||||
*
|
|
||||||
* is illustrated via an example with `dim = 5`;
|
|
||||||
*
|
|
||||||
* p0 [ p0 p1/2 p2/2 p3/2 p4/2 ]
|
|
||||||
* p1 p5 [ p1/2 p5 p6/2 p7/2 p8/2 ]
|
|
||||||
* params = p2 p6 p9 => A = [ p2/2 p6/2 p9 p10/2 p11/2 ]
|
|
||||||
* p3 p7 p10 p12 [ p3/2 p7/2 p10/2 p12 p13/2 ]
|
|
||||||
* p4 p8 p11 p13 p14 [ p4/2 p8/2 p11/2 p13/2 p14 ]
|
|
||||||
*
|
|
||||||
* or the other way arroung given the symmetric matrix form `A` converted to
|
|
||||||
* the natural parameters
|
|
||||||
*
|
|
||||||
* [ a00 a10 a20 a30 a40 ] a00
|
|
||||||
* [ a10 a11 a21 a31 a41 ] 2*a10 a11
|
|
||||||
* A = [ a20 a21 a22 a32 a42 ] => params = 2*a20 2*a21 a22
|
|
||||||
* [ a30 a31 a32 a33 a43 ] 2*a30 2*a31 2*a32 a33
|
|
||||||
* [ a40 a41 a42 a43 a44 ] 2*a40 2*a41 2*a42 2*a43 a44
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
static inline void ising_mcmc_mat(
|
|
||||||
const size_t warmup,
|
|
||||||
const size_t dim, const size_t ldA, const double* A,
|
|
||||||
int* X
|
|
||||||
) {
|
|
||||||
// Initialize elements `X_i ~ Bernoulli(P(X_i = 1 | X_-i = 0))`
|
|
||||||
for (size_t i = 0; i < dim; ++i) {
|
|
||||||
X[i] = unif_rand() * (1.0 + exp(-A[i * (ldA + 1)])) < 1.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Skip the first samples of the Markov-Chain (warmup)
|
|
||||||
for (size_t skip = 0; skip < warmup; ++skip) {
|
|
||||||
// For every component
|
|
||||||
for (size_t i = 0; i < dim; ++i) {
|
|
||||||
// Compute conditional probability `P(X_i = 1 | X_-i = x_-i)`
|
|
||||||
double log_odds = 0.0;
|
|
||||||
for (size_t j = 0; j < i; ++j) {
|
|
||||||
log_odds += (2 * X[j]) * A[i * ldA + j];
|
|
||||||
}
|
|
||||||
log_odds += A[i * (ldA + 1)];
|
|
||||||
for (size_t j = i + 1; j < dim; ++j) {
|
|
||||||
log_odds += (2 * X[j]) * A[i * ldA + j];
|
|
||||||
}
|
|
||||||
// Update `i`th element `X_i ~ Bernoulli(P(X_i = 1 | X_-i = x_-i))`
|
|
||||||
X[i] = unif_rand() * (1.0 + exp(-log_odds)) < 1.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// thread save version of `ising_mcmc_mat()` (using thread save PRNG)
|
|
||||||
static inline void ising_mcmc_mat_thrd(
|
|
||||||
const size_t warmup,
|
|
||||||
const size_t dim, const size_t ldA, const double* A,
|
|
||||||
int* X, rng_seed_t seed
|
|
||||||
) {
|
|
||||||
// Initialize elements `X_i ~ Bernoulli(P(X_i = 1 | X_-i = 0))`
|
|
||||||
for (size_t i = 0; i < dim; ++i) {
|
|
||||||
X[i] = unif_rand_thrd(seed) * (1.0 + exp(-A[i * (ldA + 1)])) < 1.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Skip the first samples of the Markov-Chain (warmup)
|
|
||||||
for (size_t skip = 0; skip < warmup; ++skip) {
|
|
||||||
// For every component
|
|
||||||
for (size_t i = 0; i < dim; ++i) {
|
|
||||||
// Compute conditional probability `P(X_i = 1 | X_-i = x_-i)`
|
|
||||||
double log_odds = 0.0;
|
|
||||||
for (size_t j = 0; j < i; ++j) {
|
|
||||||
log_odds += (2 * X[j]) * A[i * ldA + j];
|
|
||||||
}
|
|
||||||
log_odds += A[i * (ldA + 1)];
|
|
||||||
for (size_t j = i + 1; j < dim; ++j) {
|
|
||||||
log_odds += (2 * X[j]) * A[i * ldA + j];
|
|
||||||
}
|
|
||||||
// Update `i`th element `X_i ~ Bernoulli(P(X_i = 1 | X_-i = x_-i))`
|
|
||||||
X[i] = unif_rand_thrd(seed) * (1.0 + exp(-log_odds)) < 1.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
#endif /* INCLUDE_GUARD_ISING_MCMC_H */
|
|
|
@ -1,401 +0,0 @@
|
||||||
/** Methods computing or estimating the second moment of the Ising model given
|
|
||||||
* the natural parameters of the p.m.f. in exponential family form. That is
|
|
||||||
* f(x) = p0(params) exp(vech(x x')' params)
|
|
||||||
* where `params` are the natural parameters. The scaling constant `p0(params)`
|
|
||||||
* is also the zero event `x = (0, ..., 0)` probability.
|
|
||||||
*
|
|
||||||
* Three different version are provided.
|
|
||||||
* - exact: Exact method computing the true second moment `E[X X']` given the
|
|
||||||
* parameters `params`. This method has a runtime of `O(2^dim)` where
|
|
||||||
* `dim` is the dimension of the random variable `X`. This means it is
|
|
||||||
* infeasible for bigger (like 25 and above) values if `dim`
|
|
||||||
* - MC: Monte-Carlo method used in case the exact method is not feasible.
|
|
||||||
* - MC Thrd: Multi-Threaded Monte-Carlo method.
|
|
||||||
*
|
|
||||||
* The natural parameters `params` of the Ising model is a `dim (dim + 1) / 2`
|
|
||||||
* dimensional numeric vector containing the log-odds. The indexing schema into
|
|
||||||
* the parameters corresponding to the log-odds of single or two way interactions
|
|
||||||
* with indicex `i, j` are according to the half-vectorization schema
|
|
||||||
*
|
|
||||||
* i = 4'th row => Symmetry => Half-Vectorized Storage
|
|
||||||
*
|
|
||||||
* [ * * * * * * ] [ * * * * * * ] [ * * * 15 * * ]
|
|
||||||
* [ * * * * * * ] [ * * * * * * ] [ * * 12 16 * ]
|
|
||||||
* [ * * * * * * ] [ * * * * * * ] [ * 8 * 17 ]
|
|
||||||
* [ 3 8 12 15 16 17 ] [ 3 8 12 15 * * ] [ 3 * * ]
|
|
||||||
* [ * * * * * * ] [ * * * 16 * * ] [ * * ]
|
|
||||||
* [ * * * * * * ] [ * * * 17 * * ] [ * ]
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
#include "R_api.h"
|
|
||||||
#include "bit_utils.h"
|
|
||||||
#include "int_utils.h"
|
|
||||||
#include "ising_MCMC.h"
|
|
||||||
|
|
||||||
#ifndef __STDC_NO_THREADS__
|
|
||||||
#include <threads.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// TODO: read //
|
|
||||||
// https://developer.r-project.org/Blog/public/2019/04/18/common-protect-errors/
|
|
||||||
// and
|
|
||||||
// https://developer.r-project.org/Blog/public/2018/12/10/unprotecting-by-value/index.html
|
|
||||||
// as well as do the following PROPERLLY
|
|
||||||
// TODO: make specialized version using the parameters in given sym mat form //
|
|
||||||
// similar to `ising_sample()` //
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
|
|
||||||
// void ising_m2_exact_mat(
|
|
||||||
// const size_t dim, const size_t ldA, const double* A,
|
|
||||||
// double* M2
|
|
||||||
// ) {
|
|
||||||
// double sum_0 = 1.0;
|
|
||||||
|
|
||||||
// const uint32_t max_event = (uint32_t)1 << dim;
|
|
||||||
|
|
||||||
// (void)memset(M2, 0, dim * dim * sizeof(double));
|
|
||||||
|
|
||||||
// for (uint32_t X = 1; X < max_event; ++X) {
|
|
||||||
// // Evaluate quadratic form `X' A X` using symmetry of `A`
|
|
||||||
// double XtAX = 0.0;
|
|
||||||
// for (uint32_t Y = X; Y; Y &= Y - 1) {
|
|
||||||
// const int i = bitScanLS32(Y);
|
|
||||||
// XtAX += A[i * (ldA + 1)];
|
|
||||||
// for (uint32_t Z = Y & (Y - 1); Z; Z &= Z - 1) {
|
|
||||||
// XtAX += 2 * A[i * ldA + bitScanLS32(Z)];
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
// const double prob_X = exp(XtAX);
|
|
||||||
// sum_0 += prob_X;
|
|
||||||
|
|
||||||
// for (uint32_t Y = X; Y; Y &= Y - 1) {
|
|
||||||
// const int i = bitScanLS32(Y);
|
|
||||||
// for (uint32_t Z = Y; Z; Z &= Z - 1) {
|
|
||||||
// M2[i * dim + bitScanLS32(Z)] += prob_X;
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
// const double prob_0 = 1.0 / sum_0;
|
|
||||||
// for (size_t j = 0; j < dim; ++j) {
|
|
||||||
// for (size_t i = 0; i < j; ++i) {
|
|
||||||
// M2[j * dim + i] = M2[i * dim + j];
|
|
||||||
// }
|
|
||||||
// for (size_t i = j; i < dim; ++i) {
|
|
||||||
// M2[i * dim + j] *= prob_0;
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
double ising_m2_exact(const size_t dim, const double* params, double* M2) {
|
|
||||||
// Accumulator of sum of all (unscaled) probabilities
|
|
||||||
double sum_0 = 1.0;
|
|
||||||
|
|
||||||
// max event (actually upper bound)
|
|
||||||
const uint32_t max_event = (uint32_t)1 << dim;
|
|
||||||
|
|
||||||
// Length of parameters
|
|
||||||
const size_t len = dim * (dim + 1) / 2;
|
|
||||||
|
|
||||||
// Initialize M2 to zero
|
|
||||||
(void)memset(M2, 0, len * sizeof(double));
|
|
||||||
|
|
||||||
// Iterate all `2^dim` binary vectors `X`
|
|
||||||
for (uint32_t X = 1; X < max_event; ++X) {
|
|
||||||
// Dot product `<vech(X X'), params>`
|
|
||||||
double dot_X = 0.0;
|
|
||||||
// all bits in `X`
|
|
||||||
for (uint32_t Y = X; Y; Y &= Y - 1) {
|
|
||||||
const int i = bitScanLS32(Y);
|
|
||||||
const int I = (i * (2 * dim - 1 - i)) / 2;
|
|
||||||
// add single and two way interaction log odds
|
|
||||||
for (uint32_t Z = Y; Z; Z &= Z - 1) {
|
|
||||||
dot_X += params[I + bitScanLS32(Z)];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// (unscaled) probability of current event `X`
|
|
||||||
const double prob_X = exp(dot_X);
|
|
||||||
sum_0 += prob_X;
|
|
||||||
|
|
||||||
// Accumulate set bits probability for the first end second moment `E[X X']`
|
|
||||||
for (uint32_t Y = X; Y; Y &= Y - 1) {
|
|
||||||
const int i = bitScanLS32(Y);
|
|
||||||
const int I = (i * (2 * dim - 1 - i)) / 2;
|
|
||||||
for (uint32_t Z = Y; Z; Z &= Z - 1) {
|
|
||||||
M2[I + bitScanLS32(Z)] += prob_X;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Finish by scaling with zero event probability (Ising p.m.f. scaling constant)
|
|
||||||
const double prob_0 = 1.0 / sum_0;
|
|
||||||
for (size_t i = 0; i < len; ++i) {
|
|
||||||
M2[i] *= prob_0;
|
|
||||||
}
|
|
||||||
|
|
||||||
return prob_0;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// Monte-Carlo method using Gibbs sampling for the second moment
|
|
||||||
double ising_m2_MC(
|
|
||||||
/* options */ const size_t nr_samples, const size_t warmup,
|
|
||||||
/* dimension */ const size_t dim,
|
|
||||||
/* vectors */ const double* params, double* M2
|
|
||||||
) {
|
|
||||||
|
|
||||||
// Length of `params` and `M2`
|
|
||||||
const size_t len = (dim * (dim + 1)) / 2;
|
|
||||||
|
|
||||||
// Dirty trick (reuse output memory for precise counting)
|
|
||||||
_Static_assert(sizeof(uint64_t) == sizeof(double), "Dirty trick fails!");
|
|
||||||
uint64_t* counts = (uint64_t*)M2;
|
|
||||||
|
|
||||||
// Initialize counts to zero
|
|
||||||
(void)memset(counts, 0, len * sizeof(uint64_t));
|
|
||||||
|
|
||||||
// Allocate memory to store Markov-Chain value
|
|
||||||
int* X = (int*)R_alloc(dim, sizeof(int));
|
|
||||||
|
|
||||||
// Create/Update R's internal PRGN state
|
|
||||||
GetRNGstate();
|
|
||||||
|
|
||||||
// Spawn chains
|
|
||||||
for (size_t sample = 0; sample < nr_samples; ++sample) {
|
|
||||||
// Draw random sample `X ~ Ising(params)`
|
|
||||||
ising_mcmc_vech(warmup, dim, params, X);
|
|
||||||
|
|
||||||
// Accumulate component counts
|
|
||||||
uint64_t* count = counts;
|
|
||||||
for (size_t i = 0; i < dim; ++i) {
|
|
||||||
for (size_t j = i; j < dim; ++j) {
|
|
||||||
*(count++) += X[i] & X[j];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write R's internal PRNG state back (if needed)
|
|
||||||
PutRNGstate();
|
|
||||||
|
|
||||||
// Compute means from counts
|
|
||||||
for (size_t i = 0; i < len; ++i) {
|
|
||||||
M2[i] = (double)counts[i] / (double)(nr_samples);
|
|
||||||
}
|
|
||||||
|
|
||||||
return -1.0; // TODO: also compute prob_0
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// in case the compile supports standard C threads
|
|
||||||
#ifndef __STDC_NO_THREADS__
|
|
||||||
|
|
||||||
// Worker thread to calling thread communication struct
|
|
||||||
typedef struct thrd_data {
|
|
||||||
rng_seed_t seed; // Pseudo-Random-Number-Generator seed/state value
|
|
||||||
size_t nr_samples; // Nr. of samples this worker handles
|
|
||||||
size_t warmup; // Monte-Carlo Chain burne-in length
|
|
||||||
size_t dim; // Random variable dimension
|
|
||||||
const double* params; // Ising model parameters
|
|
||||||
int* X; // Working memory to store current binary sample
|
|
||||||
uint32_t* counts; // (output) count of single and two way interactions
|
|
||||||
double prob_0; // (output) zero event probability estimate
|
|
||||||
} thrd_data_t;
|
|
||||||
|
|
||||||
// Worker thread function
|
|
||||||
int thrd_worker(thrd_data_t* data) {
|
|
||||||
// Extract data as thread local variables (for convenience)
|
|
||||||
const size_t dim = data->dim;
|
|
||||||
int* X = data->X;
|
|
||||||
|
|
||||||
// Initialize counts to zero
|
|
||||||
(void)memset(data->counts, 0, (dim * (dim + 1) / 2) * sizeof(uint32_t));
|
|
||||||
|
|
||||||
// Spawn Monte-Carlo Chains (one foe every sample)
|
|
||||||
for (size_t sample = 0; sample < data->nr_samples; ++sample) {
|
|
||||||
// Draw random sample `X ~ Ising(params)`
|
|
||||||
ising_mcmc_vech_thrd(data->warmup, dim, data->params, X, data->seed);
|
|
||||||
|
|
||||||
// Accumulate component counts
|
|
||||||
uint32_t* count = data->counts;
|
|
||||||
for (size_t i = 0; i < dim; ++i) {
|
|
||||||
for (size_t j = i; j < dim; ++j) {
|
|
||||||
*(count++) += X[i] & X[j];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
double ising_m2_MC_thrd(
|
|
||||||
/* options */ const size_t nr_samples, const size_t warmup,
|
|
||||||
const size_t nr_threads,
|
|
||||||
/* dimension */ const size_t dim,
|
|
||||||
/* vectors */ const double* params, double* M2
|
|
||||||
) {
|
|
||||||
// Length of `params` and `M2`
|
|
||||||
const size_t len = (dim * (dim + 1)) / 2;
|
|
||||||
|
|
||||||
// Allocate working and self memory for worker threads
|
|
||||||
int* Xs = (int*)R_alloc(dim * nr_threads, sizeof(int));
|
|
||||||
uint32_t* counts = (uint32_t*)R_alloc(len * nr_threads, sizeof(uint32_t));
|
|
||||||
thrd_t* threads = (thrd_t*)R_alloc(nr_threads, sizeof(thrd_data_t));
|
|
||||||
thrd_data_t* threads_data = (thrd_data_t*)R_alloc(nr_threads, sizeof(thrd_data_t));
|
|
||||||
|
|
||||||
// Provide instruction wor worker and dispatch them
|
|
||||||
for (size_t tid = 0; tid < nr_threads; ++tid) {
|
|
||||||
// Every thread needs its own PRNG seed!
|
|
||||||
init_seed(threads_data[tid].seed);
|
|
||||||
// divide work among workers (more or less) equaly with the first worker
|
|
||||||
// (tid = 0) having the additional remainder of divided work (`nr_samples`)
|
|
||||||
threads_data[tid].nr_samples = nr_samples / nr_threads + (
|
|
||||||
tid ? 0 : nr_samples % nr_threads
|
|
||||||
);
|
|
||||||
threads_data[tid].warmup = warmup;
|
|
||||||
threads_data[tid].dim = dim;
|
|
||||||
threads_data[tid].params = params;
|
|
||||||
// Every worker gets its disjint working memory
|
|
||||||
threads_data[tid].X = &Xs[dim * tid];
|
|
||||||
threads_data[tid].counts = &counts[len * tid];
|
|
||||||
// dispatch the worker
|
|
||||||
thrd_create(&threads[tid], (thrd_start_t)thrd_worker, (void*)&threads_data[tid]);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Join (Wait for) all worker
|
|
||||||
for (size_t tid = 0; tid < nr_threads; ++tid) {
|
|
||||||
thrd_join(threads[tid], NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Accumulate worker results into first (tid = 0) worker counts result
|
|
||||||
for (size_t tid = 1; tid < nr_threads; ++tid) {
|
|
||||||
for (size_t i = 0; i < len; ++i) {
|
|
||||||
counts[i] += counts[tid * len + i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// convert discreat counts into means
|
|
||||||
for (size_t i = 0; i < len; ++i) {
|
|
||||||
M2[i] = (double)counts[i] / (double)nr_samples;
|
|
||||||
}
|
|
||||||
|
|
||||||
return -2.0; // TODO: also compute prob_0
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* !__STDC_NO_THREADS__ */
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* Implementation of `.Call` entry point */
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// TODO: make specialized version using the parameters in given sym mat form //
|
|
||||||
// similar to `ising_sample()` //
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
|
||||||
extern SEXP R_ising_m2(
|
|
||||||
SEXP _params, SEXP _use_MC, SEXP _nr_samples, SEXP _warmup, SEXP _nr_threads
|
|
||||||
) {
|
|
||||||
// Extract and validate arguments
|
|
||||||
size_t protect_count = 0;
|
|
||||||
if (!Rf_isReal(_params)) {
|
|
||||||
_params = PROTECT(Rf_coerceVector(_params, REALSXP));
|
|
||||||
protect_count++;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Depending on parameters given in symmetric matrix form or natural
|
|
||||||
// or natrual parameters in the half-vectorized memory layout
|
|
||||||
size_t dim;
|
|
||||||
double* params;
|
|
||||||
if (Rf_isMatrix(_params)) {
|
|
||||||
if (Rf_nrows(_params) != Rf_ncols(_params)) {
|
|
||||||
Rf_error("Invalid 'params' value, exected square matrix");
|
|
||||||
}
|
|
||||||
// Convert to natural parameters
|
|
||||||
dim = Rf_nrows(_params);
|
|
||||||
params = (double*)R_alloc(dim * (dim + 1) / 2, sizeof(double));
|
|
||||||
|
|
||||||
double* A = REAL(_params);
|
|
||||||
for (size_t j = 0, I = 0; j < dim; ++j) {
|
|
||||||
for (size_t i = j; i < dim; ++i, ++I) {
|
|
||||||
params[I] = (1 + (i != j)) * A[i + j * dim];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Get Ising random variable dimension `dim` from "half-vectorized" parameters
|
|
||||||
// vector. That is, compute `dim` from `length(params) = dim (dim + 1) / 2`.
|
|
||||||
dim = invTriag(Rf_length(_params));
|
|
||||||
if (!dim) {
|
|
||||||
Rf_error("Expected parameter vector of length `p (p + 1) / 2` where"
|
|
||||||
" `p` is the dimension of the random variable");
|
|
||||||
}
|
|
||||||
params = REAL(_params);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Determin the method to use and validate if possible
|
|
||||||
const int use_MC = Rf_asLogical(_use_MC);
|
|
||||||
if (use_MC == NA_LOGICAL) {
|
|
||||||
Rf_error("Invalid 'use_MC' value, expected ether TRUE or FALSE");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Allocate result vector
|
|
||||||
SEXP _M2 = PROTECT(Rf_allocVector(REALSXP, dim * (dim + 1) / 2));
|
|
||||||
++protect_count;
|
|
||||||
|
|
||||||
// asside computed zero event probability (inverse partition function), the
|
|
||||||
// scaling factor for the Ising model p.m.f.
|
|
||||||
double prob_0 = -1.0;
|
|
||||||
|
|
||||||
if (use_MC) {
|
|
||||||
// Convert and validate arguments for the Monte-Carlo methods
|
|
||||||
const size_t nr_samples = asUnsigned(_nr_samples);
|
|
||||||
if (nr_samples == 0 || nr_samples == NA_UNSIGNED) {
|
|
||||||
Rf_error("Invalid 'nr_samples' value, expected pos. integer");
|
|
||||||
}
|
|
||||||
const size_t warmup = asUnsigned(_warmup);
|
|
||||||
if (warmup == NA_UNSIGNED) {
|
|
||||||
Rf_error("Invalid 'warmup' value, expected non-negative integer");
|
|
||||||
}
|
|
||||||
const size_t nr_threads = asUnsigned(_nr_threads);
|
|
||||||
if (nr_threads == 0 || nr_threads > 256) {
|
|
||||||
Rf_error("Invalid 'nr_thread' value");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (nr_threads == 1) {
|
|
||||||
// Single threaded Monte-Carlo method
|
|
||||||
prob_0 = ising_m2_MC(nr_samples, warmup, dim, params, REAL(_M2));
|
|
||||||
} else {
|
|
||||||
// Multi-Threaded Monte-Carlo method if provided, otherwise use
|
|
||||||
// the single threaded version with a warning
|
|
||||||
#ifdef __STDC_NO_THREADS__
|
|
||||||
Rf_warning("Multi-Threading NOT supported, using fallback.");
|
|
||||||
prob_0 = ising_m2_MC(nr_samples, warmup, dim, params, REAL(_M2));
|
|
||||||
#else
|
|
||||||
prob_0 = ising_m2_MC_thrd(
|
|
||||||
nr_samples, warmup, nr_threads,
|
|
||||||
dim, params, REAL(_M2)
|
|
||||||
);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Exact method (ignore other arguments), only validate dimension
|
|
||||||
if (25 < dim) {
|
|
||||||
Rf_error("Dimension '%d' too big for exact method (max 24)", dim);
|
|
||||||
}
|
|
||||||
|
|
||||||
// and call the exact method
|
|
||||||
prob_0 = ising_m2_exact(dim, params, REAL(_M2));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set log-lokelihood as an attribute to the computed second moment
|
|
||||||
SEXP _prob_0 = PROTECT(Rf_ScalarReal(prob_0));
|
|
||||||
++protect_count;
|
|
||||||
Rf_setAttrib(_M2, Rf_install("prob_0"), _prob_0);
|
|
||||||
|
|
||||||
// release SEPXs to the garbage collector
|
|
||||||
UNPROTECT(protect_count);
|
|
||||||
|
|
||||||
return _M2;
|
|
||||||
}
|
|
|
@ -1,72 +0,0 @@
|
||||||
#ifndef INCLUDE_GUARD_ISING_SAMPLE_H
|
|
||||||
#define INCLUDE_GUARD_ISING_SAMPLE_H
|
|
||||||
|
|
||||||
#include "R_api.h"
|
|
||||||
#include "int_utils.h"
|
|
||||||
#include "ising_MCMC.h"
|
|
||||||
|
|
||||||
|
|
||||||
// .Call interface to draw from sample from the Ising model
|
|
||||||
extern SEXP R_ising_sample(SEXP _nr_samples, SEXP _params, SEXP _warmup) {
|
|
||||||
// Counts number of protected SEXP's to give them back to the garbage collector
|
|
||||||
size_t protect_count = 0;
|
|
||||||
|
|
||||||
// Parse and validate arguments
|
|
||||||
const size_t nr_samples = asUnsigned(_nr_samples);
|
|
||||||
if (nr_samples == 0 || nr_samples == NA_UNSIGNED) {
|
|
||||||
Rf_error("Invalid 'nr_samples' value, expected pos. integer");
|
|
||||||
}
|
|
||||||
const size_t warmup = asUnsigned(_warmup);
|
|
||||||
if (warmup == NA_UNSIGNED) {
|
|
||||||
Rf_error("Invalid 'warmup' value, expected non-negative integer");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Determin parameter mode (natural parameter vector or symmetric matrix)
|
|
||||||
// Ether `m` for "Matrix" or `v` for "Vector"
|
|
||||||
const char param_type = Rf_isMatrix(_params) ? 'm' : 'v';
|
|
||||||
|
|
||||||
// In case of matrix parameters check for square matrix
|
|
||||||
if (param_type == 'm' && (Rf_nrows(_params) != Rf_ncols(_params))) {
|
|
||||||
Rf_error("Invalid 'params' value, exected square matrix");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get problem dimension from parameter size
|
|
||||||
const size_t dim = (param_type == 'm')
|
|
||||||
? Rf_nrows(_params)
|
|
||||||
: invTriag(Rf_length(_params));
|
|
||||||
if (!dim) {
|
|
||||||
Rf_error("Error determining dimension.");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Ensure parameters are numeric
|
|
||||||
if (!Rf_isReal(_params)) {
|
|
||||||
_params = PROTECT(Rf_coerceVector(_params, REALSXP));
|
|
||||||
++protect_count;
|
|
||||||
}
|
|
||||||
double* params = REAL(_params);
|
|
||||||
|
|
||||||
// Allocate result sample
|
|
||||||
SEXP _X = PROTECT(Rf_allocMatrix(INTSXP, dim, nr_samples));
|
|
||||||
++protect_count;
|
|
||||||
int* X = INTEGER(_X);
|
|
||||||
|
|
||||||
// Call appropriate sampling routine for every sample to generate
|
|
||||||
GetRNGstate();
|
|
||||||
if (param_type == 'm') {
|
|
||||||
for (size_t sample = 0; sample < nr_samples; ++sample) {
|
|
||||||
ising_mcmc_mat(warmup, dim, dim, params, &X[sample * dim]);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
for (size_t sample = 0; sample < nr_samples; ++sample) {
|
|
||||||
ising_mcmc_vech(warmup, dim, params, &X[sample * dim]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
PutRNGstate();
|
|
||||||
|
|
||||||
// Release protected SEXPs to the garbage collector
|
|
||||||
UNPROTECT(protect_count);
|
|
||||||
|
|
||||||
return _X;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* INCLUDE_GUARD_ISING_SAMPLE_H */
|
|
|
@ -8,51 +8,6 @@
|
||||||
#define FCONE
|
#define FCONE
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void mcrossprod(
|
|
||||||
const int rank,
|
|
||||||
const double* A, const int* dimA,
|
|
||||||
const double* B, const int* dimB,
|
|
||||||
const int mode,
|
|
||||||
double* C
|
|
||||||
) {
|
|
||||||
// the strides
|
|
||||||
// `stride[0] <- prod(dim(A)[seq_len(mode - 1)])`
|
|
||||||
// `stride[1] <- dim(A)[mode]`
|
|
||||||
// `stride[2] <- prod(dim(A)[-seq_len(mode)])`
|
|
||||||
// Note: Middle stride is ignored (to be consistent with sym version)
|
|
||||||
int stride[3] = {1, 0, 1};
|
|
||||||
for (int i = 0; i < rank; ++i) {
|
|
||||||
int size = dimA[i];
|
|
||||||
stride[0] *= (i < mode) ? size : 1;
|
|
||||||
stride[2] *= (i > mode) ? size : 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// employ BLAS dgemm (Double GEneralized Matrix Matrix) operation
|
|
||||||
// (C = alpha op(A) op(A) + beta C, op is the identity of transposition)
|
|
||||||
const double zero = 0.0;
|
|
||||||
const double one = 1.0;
|
|
||||||
if (mode == 0) {
|
|
||||||
// mode 1: special case C = A_(1) B_(1)^T
|
|
||||||
// C = 1 A B^T + 0 C
|
|
||||||
F77_CALL(dgemm)("N", "T", &dimA[mode], &dimB[mode], &stride[2],
|
|
||||||
&one, A, &dimA[mode], B, &dimB[mode],
|
|
||||||
&zero, C, &dimA[mode] FCONE FCONE);
|
|
||||||
} else {
|
|
||||||
// Other modes writen as accumulated sum of matrix products
|
|
||||||
// initialize C to zero
|
|
||||||
memset(C, 0, dimA[mode] * dimB[mode] * sizeof(double));
|
|
||||||
|
|
||||||
// Sum over all modes > mode
|
|
||||||
for (int i2 = 0; i2 < stride[2]; ++i2) {
|
|
||||||
// C = 1 A^T B + 1 C
|
|
||||||
F77_CALL(dgemm)("T", "N", &dimA[mode], &dimB[mode], &stride[0],
|
|
||||||
&one, &A[i2 * stride[0] * dimA[mode]], &stride[0],
|
|
||||||
&B[i2 * stride[0] * dimB[mode]], &stride[0],
|
|
||||||
&one, C, &dimA[mode] FCONE FCONE);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tensor Mode Crossproduct
|
* Tensor Mode Crossproduct
|
||||||
*
|
*
|
||||||
|
@ -66,58 +21,72 @@ void mcrossprod(
|
||||||
* @param B multi-dimensional array
|
* @param B multi-dimensional array
|
||||||
* @param m mode index (1-indexed)
|
* @param m mode index (1-indexed)
|
||||||
*/
|
*/
|
||||||
extern SEXP R_mcrossprod(SEXP A, SEXP B, SEXP m) {
|
extern SEXP mcrossprod(SEXP A, SEXP B, SEXP m) {
|
||||||
// get zero indexed mode
|
// get zero indexed mode
|
||||||
int mode = asInteger(m) - 1;
|
int mode = asInteger(m) - 1;
|
||||||
|
|
||||||
// Check if both `A` and `B` are real-valued
|
|
||||||
if (!isReal(A) || !isReal(B)) {
|
|
||||||
error("Type missmatch, both `A` and `B` must be real-valued");
|
|
||||||
}
|
|
||||||
|
|
||||||
// get dimension attributes
|
// get dimension attributes
|
||||||
SEXP dimA_sexp = getAttrib(A, R_DimSymbol);
|
SEXP dimA = getAttrib(A, R_DimSymbol);
|
||||||
SEXP dimB_sexp = getAttrib(B, R_DimSymbol);
|
SEXP dimB = getAttrib(B, R_DimSymbol);
|
||||||
|
|
||||||
// dimension type validation
|
|
||||||
if (!isInteger(dimA_sexp) || !isInteger(dimB_sexp)) {
|
|
||||||
error("Dimensions must be integer vectors");
|
|
||||||
}
|
|
||||||
|
|
||||||
// validate dimensions
|
|
||||||
int rank = length(dimA_sexp);
|
|
||||||
if (rank != length(dimB_sexp)) {
|
|
||||||
error("Dimension mismatch");
|
|
||||||
}
|
|
||||||
// validate mode (0-indexed, must be smaller than the tensor order)
|
// validate mode (0-indexed, must be smaller than the tensor order)
|
||||||
if (mode < 0 || rank <= mode) {
|
if (mode < 0 || length(dimA) <= mode || length(dimB) <= mode) {
|
||||||
error("Illegal mode");
|
error("Illegal mode");
|
||||||
}
|
}
|
||||||
|
|
||||||
// get raw pointers to dimensions
|
// the strides
|
||||||
int* dimA = INTEGER(coerceVector(dimA_sexp, INTSXP));
|
// `stride[0] <- prod(dim(A)[seq_len(mode - 1)])`
|
||||||
int* dimB = INTEGER(coerceVector(dimB_sexp, INTSXP));
|
// `stride[1] <- dim(A)[mode]`
|
||||||
|
// `stride[2] <- prod(dim(A)[-seq_len(mode)])`
|
||||||
// finaly, check for `A` and `B` dimensions to match
|
// Note: Middle stride is ignored (to be consistent with sym version)
|
||||||
for (int i = 0; i < rank; ++i) {
|
int stride[3] = {1, 0, 1};
|
||||||
if (i != mode && dimA[i] != dimB[i]) {
|
for (int i = 0; i < length(dimA); ++i) {
|
||||||
error("Dimension mismatch");
|
int size = INTEGER(dimA)[i];
|
||||||
}
|
stride[0] *= (i < mode) ? size : 1;
|
||||||
|
stride[2] *= (i > mode) ? size : 1;
|
||||||
|
// check margin matching of `A` and `B`
|
||||||
|
if (i != mode && size != INTEGER(dimB)[i]) {
|
||||||
|
error("Dimension missmatch");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
// Result dimensions
|
||||||
|
int nrowC = INTEGER(dimA)[mode];
|
||||||
|
int ncolC = INTEGER(dimB)[mode];
|
||||||
|
|
||||||
// create response matrix C
|
// create response matrix C
|
||||||
SEXP C = PROTECT(allocMatrix(REALSXP, dimA[mode], dimB[mode]));
|
SEXP C = PROTECT(allocMatrix(REALSXP, nrowC, ncolC));
|
||||||
|
|
||||||
// Call C mode crossprod subroutine
|
// raw data access pointers
|
||||||
mcrossprod(
|
double* a = REAL(A);
|
||||||
rank, // tensor rank of both `A` and `B`
|
double* b = REAL(B);
|
||||||
REAL(A), dimA, // mem. addr. of A, dim(A)
|
double* c = REAL(C);
|
||||||
REAL(B), dimB, // mem. addr. of B, dim(B)
|
|
||||||
mode, // the crossproduct mode to compute
|
|
||||||
REAL(C) // return value memory addr.
|
|
||||||
);
|
|
||||||
|
|
||||||
// release C to garbage collector
|
// employ BLAS dgemm (Double GEneralized Matrix Matrix) operation
|
||||||
|
// (C = alpha op(A) op(A) + beta C, op is the identity of transposition)
|
||||||
|
const double zero = 0.0;
|
||||||
|
const double one = 1.0;
|
||||||
|
if (mode == 0) {
|
||||||
|
// mode 1: special case C = A_(1) B_(1)^T
|
||||||
|
// C = 1 A B^T + 0 C
|
||||||
|
F77_CALL(dgemm)("N", "T", &nrowC, &ncolC, &stride[2],
|
||||||
|
&one, a, &nrowC, b, &ncolC,
|
||||||
|
&zero, c, &nrowC FCONE FCONE);
|
||||||
|
} else {
|
||||||
|
// Other modes writen as accumulated sum of matrix products
|
||||||
|
// initialize C to zero
|
||||||
|
memset(c, 0, nrowC * ncolC * sizeof(double));
|
||||||
|
|
||||||
|
// Sum over all modes > mode
|
||||||
|
for (int i2 = 0; i2 < stride[2]; ++i2) {
|
||||||
|
// C = 1 A^T B + 1 C
|
||||||
|
F77_CALL(dgemm)("T", "N", &nrowC, &ncolC, &stride[0],
|
||||||
|
&one, &a[i2 * stride[0] * nrowC], &stride[0],
|
||||||
|
&b[i2 * stride[0] * ncolC], &stride[0],
|
||||||
|
&one, c, &nrowC FCONE FCONE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// release C to grabage collector
|
||||||
UNPROTECT(1);
|
UNPROTECT(1);
|
||||||
|
|
||||||
return C;
|
return C;
|
||||||
|
@ -135,7 +104,7 @@ extern SEXP R_mcrossprod(SEXP A, SEXP B, SEXP m) {
|
||||||
* @param A multi-dimensional array
|
* @param A multi-dimensional array
|
||||||
* @param m mode index (1-indexed)
|
* @param m mode index (1-indexed)
|
||||||
*/
|
*/
|
||||||
extern SEXP R_mcrossprod_sym(SEXP A, SEXP m) {
|
extern SEXP mcrossprod_sym(SEXP A, SEXP m) {
|
||||||
// get zero indexed mode
|
// get zero indexed mode
|
||||||
int mode = asInteger(m) - 1;
|
int mode = asInteger(m) - 1;
|
||||||
|
|
||||||
|
|
|
@ -1,160 +0,0 @@
|
||||||
#include "R_api.h"
|
|
||||||
#include "ttm.h"
|
|
||||||
|
|
||||||
int mlm(
|
|
||||||
/* options */ const int* trans, const int* modes, const int nrhs,
|
|
||||||
/* dims */ const int* dimA, const int* dimC, const int ord,
|
|
||||||
/* scalars */ const double alpha,
|
|
||||||
/* tensor */ const double* A,
|
|
||||||
/* matrices */ const double** Bs, const int* ldBs,
|
|
||||||
/* scalar */ const double beta,
|
|
||||||
/* tensor */ double* C,
|
|
||||||
double* work_mem
|
|
||||||
) {
|
|
||||||
// Compute total size of `A`, `C` and required working memory
|
|
||||||
int sizeA = 1; // `prod(dim(A))`
|
|
||||||
int sizeC = 1; // `prod(dim(C))`
|
|
||||||
int work_size = 2; // `2 * prod(pmax(dim(A), dim(C)))` (`+ ord` NOT included)
|
|
||||||
for (int i = 0; i < ord; ++i) {
|
|
||||||
sizeA *= dimA[i];
|
|
||||||
sizeC *= dimC[i];
|
|
||||||
work_size *= dimA[i] < dimC[i] ? dimC[i] : dimA[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
// In requesting working memory size stop here and return size
|
|
||||||
if (work_mem == NULL) {
|
|
||||||
return work_size + ord;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Copy dimensions of A to intermediate temp. dim
|
|
||||||
int* dimT = (int*)(work_mem + work_size); // `work_mem` is `ord` longer
|
|
||||||
memcpy(dimT, dimA, ord * sizeof(int));
|
|
||||||
|
|
||||||
// Setup work memory hooks (two swapable blocks)
|
|
||||||
double* tmp1 = work_mem;
|
|
||||||
double* tmp2 = work_mem + (work_size >> 1);
|
|
||||||
|
|
||||||
// Multi Linear Multiplication is an iterated application of TTM
|
|
||||||
for (int i = 0; i < nrhs; ++i) {
|
|
||||||
// Get current `B` dimensions (only implicitly given)
|
|
||||||
int nrowB = dimC[modes[i]];
|
|
||||||
int ncolB = dimA[modes[i]];
|
|
||||||
if (trans && trans[i]) {
|
|
||||||
nrowB = dimA[modes[i]];
|
|
||||||
ncolB = dimC[modes[i]];
|
|
||||||
}
|
|
||||||
|
|
||||||
// Tensor Times Matrix (`modes[i]` mode products)
|
|
||||||
ttm(trans ? trans[i] : 0, modes[i], dimT, ord, nrowB, ncolB,
|
|
||||||
1.0, i ? tmp2 : A, Bs[i], ldBs ? ldBs[i] : dimC[modes[i]], 0.0,
|
|
||||||
tmp1);
|
|
||||||
|
|
||||||
// Update intermediate temp dim
|
|
||||||
dimT[modes[i]] = dimC[modes[i]];
|
|
||||||
|
|
||||||
// Swap tmp1 <-> tmp2
|
|
||||||
double* tmp3 = tmp1; tmp1 = tmp2; tmp2 = tmp3;
|
|
||||||
}
|
|
||||||
|
|
||||||
// dAXPY (a x + y) with `x = tmp2` and `y = beta C`
|
|
||||||
if (beta != 0.0) {
|
|
||||||
for (int i = 0; i < sizeC; ++i) {
|
|
||||||
C[i] *= beta;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
memset(C, 0, sizeC * sizeof(double));
|
|
||||||
}
|
|
||||||
axpy(sizeC, alpha, tmp2, 1, C, 1);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Multi Linear Multiplication (`R` binding)
|
|
||||||
*/
|
|
||||||
extern SEXP R_mlm(SEXP A, SEXP Bs, SEXP ms, SEXP ops) {
|
|
||||||
|
|
||||||
// get dimension attribute of A
|
|
||||||
SEXP dimA = Rf_getAttrib(A, R_DimSymbol);
|
|
||||||
int ord = Rf_length(dimA);
|
|
||||||
|
|
||||||
// Check if `A` is a real valued tensor
|
|
||||||
if (!Rf_isReal(A) || Rf_isNull(dimA)) {
|
|
||||||
Rf_error("Param. `A` need to be a real valued array");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Validate that `Bs` is a list
|
|
||||||
if (!Rf_isNewList(Bs)) {
|
|
||||||
Rf_error("Param. `Bs` need to be a list of matrices");
|
|
||||||
}
|
|
||||||
if (!Rf_isInteger(ms) || !Rf_isLogical(ops)) {
|
|
||||||
Rf_error("Param. type missmatch, expected modes as ints and ops logical");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Number of `B` matrices (Nr of Right Hand Side objects)
|
|
||||||
int nrhs = Rf_length(Bs);
|
|
||||||
|
|
||||||
// further parameter validations
|
|
||||||
if ((nrhs != Rf_length(ms)) || (nrhs != Rf_length(ops))) {
|
|
||||||
Rf_error("Dimension missmatch (Params length differ)");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get modes and operations for Bs (transposed or not)
|
|
||||||
int* modes = (int*)R_alloc(nrhs, sizeof(int)); // 0-indexed version of `ms`
|
|
||||||
int* trans = INTEGER(ops);
|
|
||||||
|
|
||||||
// Compute result dimensions `dim(C)` while extracting `B` data pointers
|
|
||||||
SEXP dimC = PROTECT(Rf_duplicate(dimA));
|
|
||||||
const double** bs = (const double**)R_alloc(nrhs, sizeof(double*));
|
|
||||||
for (int i = 0; i < nrhs; ++i) {
|
|
||||||
// Extract right hand side matrices (`B` matrices from list)
|
|
||||||
SEXP B = VECTOR_ELT(Bs, i);
|
|
||||||
if (!(Rf_isMatrix(B) && Rf_isReal(B))) {
|
|
||||||
UNPROTECT(1);
|
|
||||||
Rf_error("Param. `Bs` need to be a list of real matrices");
|
|
||||||
}
|
|
||||||
int* dimB = INTEGER(Rf_getAttrib(B, R_DimSymbol));
|
|
||||||
bs[i] = REAL(B);
|
|
||||||
|
|
||||||
// Convert 1-indexed modes to be 0-indexed
|
|
||||||
modes[i] = INTEGER(ms)[i] - 1;
|
|
||||||
|
|
||||||
// Check if mode is out or range (indexing a non-existing mode of `A`)
|
|
||||||
if (!((0 <= modes[i]) && (modes[i] < ord))) {
|
|
||||||
UNPROTECT(1);
|
|
||||||
Rf_error("%d'th mode (%d) out of range", i + 1, modes[i] + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if `i`th mode of `A` matches corresponding `B` dimension
|
|
||||||
if (INTEGER(dimA)[modes[i]] != dimB[!trans[i]]) {
|
|
||||||
UNPROTECT(1);
|
|
||||||
Rf_error("%d'th mode (%d) dimension missmatch", i + 1, modes[i] + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
INTEGER(dimC)[modes[i]] = dimB[!!trans[i]];
|
|
||||||
}
|
|
||||||
|
|
||||||
// Now, compute `C`s size `prod(dim(C))`
|
|
||||||
int sizeC = 1;
|
|
||||||
for (int i = 0; i < ord; ++i) {
|
|
||||||
sizeC *= INTEGER(dimC)[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
// Allocate response `C`
|
|
||||||
SEXP C = PROTECT(Rf_allocVector(REALSXP, sizeC));
|
|
||||||
Rf_setAttrib(C, R_DimSymbol, dimC);
|
|
||||||
|
|
||||||
// allocate working memory size
|
|
||||||
int work_size = mlm(trans, modes, nrhs, INTEGER(dimA), INTEGER(dimC), ord,
|
|
||||||
1.0, REAL(A), bs, NULL, 0.0, REAL(C), NULL);
|
|
||||||
double* work_memory = (double*)R_alloc(work_size, sizeof(double));
|
|
||||||
|
|
||||||
// Compute Multi-Linear Multiplication (call subroutine)
|
|
||||||
(void)mlm(trans, modes, nrhs, INTEGER(dimA), INTEGER(dimC), ord,
|
|
||||||
1.0, REAL(A), bs, NULL, 0.0, REAL(C), work_memory);
|
|
||||||
|
|
||||||
// release C to the hands of the garbage collector
|
|
||||||
UNPROTECT(2);
|
|
||||||
|
|
||||||
return C;
|
|
||||||
}
|
|
|
@ -1,35 +0,0 @@
|
||||||
#ifndef INCLUDE_GUARD_MLM_H
|
|
||||||
#define INCLUDE_GUARD_MLM_H
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Multi Linear Multiplication
|
|
||||||
*
|
|
||||||
* C = alpha A x_modes[0] op(Bs[0]) ... x_modes[nrhs] op(Bs[nrhs]) + beta C
|
|
||||||
*
|
|
||||||
* @param trans boolean vector of length `nrhs` indicating if `i`th RHS matrix
|
|
||||||
* is to be transposed. That is `op(Bs[i])` is the transposed of `Bs[i]` iff
|
|
||||||
* `trans[i]` is true, otherwise no-op on `Bs[i]`. Can be `NULL`, then `op` is
|
|
||||||
* always the identity.
|
|
||||||
* @param modes integer vector of length `nrhs` specifying the product modes.
|
|
||||||
*
|
|
||||||
* @todo TODO: continue doc. !!!
|
|
||||||
*
|
|
||||||
* @param alpha scaling factor
|
|
||||||
* @param beta scaling factor
|
|
||||||
* @param C output memory addr.
|
|
||||||
*
|
|
||||||
* @param work_mem NULL or temporary working memory of size
|
|
||||||
* `2 * prod(pmax(dim(A), dim(C))) + ord`
|
|
||||||
*/
|
|
||||||
int mlm(
|
|
||||||
/* options */ const int* trans, const int* modes, const int nrhs,
|
|
||||||
/* dims */ const int* dimA, const int* dimC, const int ord,
|
|
||||||
/* scalars */ const double alpha,
|
|
||||||
/* tensor */ const double* A,
|
|
||||||
/* matrices */ const double** Bs, const int* ldBs,
|
|
||||||
/* scalar */ const double beta,
|
|
||||||
/* tensor */ double* C,
|
|
||||||
double* work_mem
|
|
||||||
);
|
|
||||||
|
|
||||||
#endif /* INCLUDE_GUARD_MLM_H */
|
|
|
@ -1,37 +0,0 @@
|
||||||
// /**
|
|
||||||
// * A sufficient Pseudo-Random-Number-Generators (PRNG) of the Xorshift family
|
|
||||||
// *
|
|
||||||
// * For single threaded operations the PRNG provided by `R` are prefered. But they
|
|
||||||
// * are _not_ thread save. The following is a simple PRNG usable in a multi-threaded
|
|
||||||
// * application.
|
|
||||||
// *
|
|
||||||
// * See TODO: ...https://en.wikipedia.org/wiki/Xorshift
|
|
||||||
// * SchachHoernchen
|
|
||||||
// */
|
|
||||||
|
|
||||||
// #ifndef INCLUDE_GUARD_RANDOM_H
|
|
||||||
// #define INCLUDE_GUARD_RANDOM_H
|
|
||||||
|
|
||||||
// #include <stdint.h> // uint32_t, uint64_t
|
|
||||||
|
|
||||||
|
|
||||||
// static inline uint64_t rot64(uint64_t val, int shift) {
|
|
||||||
// return (val << shift) | (val >> (64 - shift));
|
|
||||||
// }
|
|
||||||
|
|
||||||
// // PRNG of the Xorshift family
|
|
||||||
// // @note the least significant 32 bits are not reliable, use most significant 32 bits
|
|
||||||
// static inline uint64_t rand_u64(uint64_t seed[4]) {
|
|
||||||
// uint64_t e = seed[0] - rot64(seed[1], 7);
|
|
||||||
// seed[0] = seed[1] ^ rot64(seed[1], 13);
|
|
||||||
// seed[1] = seed[2] + rot64(seed[3], 37);
|
|
||||||
// seed[2] = seed[3] + e;
|
|
||||||
// seed[3] = e + seed[0];
|
|
||||||
// return seed[3];
|
|
||||||
// }
|
|
||||||
|
|
||||||
// static inline double unif_rand_u64(uint64_t seed[4]) {
|
|
||||||
// return ((double)(rand_u64(seed) >> 32)) / (double)(-(uint32_t)1);
|
|
||||||
// }
|
|
||||||
|
|
||||||
// #endif
|
|
|
@ -1,112 +0,0 @@
|
||||||
// The need for `USE_FC_LEN_T` and `FCONE` is due to a Fortran character string
|
|
||||||
// to C incompatibility. See: Writing R Extentions: 6.6.1 Fortran character strings
|
|
||||||
#define USE_FC_LEN_T
|
|
||||||
#include <R.h>
|
|
||||||
#include <Rinternals.h>
|
|
||||||
#include <R_ext/BLAS.h>
|
|
||||||
#include <R_ext/Lapack.h>
|
|
||||||
#ifndef FCONE
|
|
||||||
#define FCONE
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Singular Value Decomposition
|
|
||||||
// @note assumes passed arguments to be "clean", "properly checked"
|
|
||||||
int svd(
|
|
||||||
const int p, const int q,
|
|
||||||
double* A, const int ldA,
|
|
||||||
double* d,
|
|
||||||
double* U, const int ldU,
|
|
||||||
double* Vt, const int ldVt,
|
|
||||||
double* work_mem, int work_size, int* i_work // if unknown set to 0, -1, 0
|
|
||||||
) {
|
|
||||||
// LAPACK information return variable (in case of an error, LAPACK sets this
|
|
||||||
// to a non-zero value)
|
|
||||||
int info = 0;
|
|
||||||
|
|
||||||
// check if work memory is supplied, if _not_ query for appropriate size and
|
|
||||||
// allocate the memory
|
|
||||||
if (!work_mem || work_size < 1 || !i_work) {
|
|
||||||
// create (declare) work memory
|
|
||||||
i_work = (int*)R_alloc(8 * (p < q ? p : q), sizeof(int));
|
|
||||||
double tmp; // variable to store work memory size query result
|
|
||||||
work_mem = &tmp; // point to `tmp` as it will contain the result of
|
|
||||||
// the work memory size query
|
|
||||||
|
|
||||||
// request appropriate work memory size
|
|
||||||
F77_CALL(dgesdd)(
|
|
||||||
"S", &p, &q, A, &ldA, d, U, &ldU, Vt, &ldVt,
|
|
||||||
work_mem, &work_size, i_work, &info
|
|
||||||
FCONE);
|
|
||||||
|
|
||||||
// allocate work memory
|
|
||||||
work_size = (int)tmp; // "read" work memory size query result
|
|
||||||
work_mem = (double*)R_alloc(work_size, sizeof(double));
|
|
||||||
|
|
||||||
// in case of an error, return error code stored in `info`
|
|
||||||
if (info) { return info; }
|
|
||||||
}
|
|
||||||
|
|
||||||
// actual SVD computation
|
|
||||||
F77_CALL(dgesdd)(
|
|
||||||
"S", &p, &q, A, &ldA, d, U, &ldU, Vt, &ldVt,
|
|
||||||
work_mem, &work_size, i_work, &info
|
|
||||||
FCONE);
|
|
||||||
|
|
||||||
return info;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Singular Valued Decomposition (R binding)
|
|
||||||
// @note educational purpose, same as `La.svd`
|
|
||||||
extern SEXP R_svd(SEXP A) {
|
|
||||||
// Check if we got a real-valued matrix
|
|
||||||
if (!isMatrix(A) || !isReal(A)) {
|
|
||||||
error("Require a real-valued matrix");
|
|
||||||
}
|
|
||||||
|
|
||||||
// extract matrix dimensions
|
|
||||||
int* dims = INTEGER(coerceVector(getAttrib(A, R_DimSymbol), INTSXP));
|
|
||||||
int p = dims[0]; // = nrow(A)
|
|
||||||
int q = dims[1]; // = ncol(A)
|
|
||||||
int m = p < q ? p : q; // = min(p, q) = min(dim(A))
|
|
||||||
|
|
||||||
// Check if dimensions are not degenerate
|
|
||||||
if (p < 1 || q < 1) {
|
|
||||||
error("Expected positive matrix dimensions");
|
|
||||||
}
|
|
||||||
|
|
||||||
// create R objects to store the SVD result `A = U diag(d) V^T`
|
|
||||||
SEXP U = PROTECT(allocMatrix(REALSXP, p, m));
|
|
||||||
SEXP d = PROTECT(allocVector(REALSXP, m));
|
|
||||||
SEXP Vt = PROTECT(allocMatrix(REALSXP, m, q));
|
|
||||||
|
|
||||||
// Call C SVD routine
|
|
||||||
int info = svd(
|
|
||||||
p, q, // nrow(A), ncol(A)
|
|
||||||
REAL(A), p, // mem. addr. of A, ldA (leading dimension of A)
|
|
||||||
REAL(d), // mem. addr. of d
|
|
||||||
REAL(U), p, // mem. addr. of U, ldU (leading dimension of U)
|
|
||||||
REAL(Vt), m, // mem. addr. of V^T, ldVt (leading dimension of V^T)
|
|
||||||
0, -1, 0 // work mem. pointer, work mem. size and int work mem
|
|
||||||
); // set to `0, -1, 0` to indicate "unknown"
|
|
||||||
|
|
||||||
// Check LAPACK info
|
|
||||||
if (info) {
|
|
||||||
error("error code %d from LAPACK routine 'dgesdd'", info);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create R list containint SVD components
|
|
||||||
SEXP result = PROTECT(allocVector(VECSXP, 3));
|
|
||||||
SEXP names = PROTECT(allocVector(STRSXP, 3));
|
|
||||||
SET_VECTOR_ELT(result, 0, d);
|
|
||||||
SET_VECTOR_ELT(result, 1, U);
|
|
||||||
SET_VECTOR_ELT(result, 2, Vt);
|
|
||||||
SET_STRING_ELT(names, 0, mkChar("d"));
|
|
||||||
SET_STRING_ELT(names, 1, mkChar("u"));
|
|
||||||
SET_STRING_ELT(names, 2, mkChar("vt"));
|
|
||||||
setAttrib(result, R_NamesSymbol, names);
|
|
||||||
|
|
||||||
// Release created objects to the garbage collector
|
|
||||||
UNPROTECT(5);
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
|
@ -1,48 +1,104 @@
|
||||||
#include "ttm.h"
|
// The need for `USE_FC_LEN_T` and `FCONE` is due to a Fortran character string
|
||||||
|
// to C incompatibility. See: Writing R Extentions: 6.6.1 Fortran character strings
|
||||||
|
#define USE_FC_LEN_T
|
||||||
|
// Disables remapping of R API functions from `Rf_<name>` or `R_<name>`
|
||||||
|
#define R_NO_REMAP
|
||||||
|
#include <R.h>
|
||||||
|
#include <Rinternals.h>
|
||||||
|
#include <R_ext/BLAS.h>
|
||||||
|
#ifndef FCONE
|
||||||
|
#define FCONE
|
||||||
|
#endif
|
||||||
|
|
||||||
void ttm(
|
/**
|
||||||
const int transB, const int mode,
|
* Tensor Times Matrix a.k.a. Mode Product
|
||||||
const int* dimA, const int ordA, const int nrowB, const int ncolB,
|
*
|
||||||
const double alpha,
|
* @param A multi-dimensional array
|
||||||
const double* A,
|
* @param B matrix
|
||||||
const double* B, const int ldB, // TODO: ldB is IGNORED!!!
|
* @param m mode index (1-indexed)
|
||||||
const double beta,
|
* @param op boolean if `B` is transposed
|
||||||
double* C
|
*/
|
||||||
) {
|
extern SEXP ttm(SEXP A, SEXP B, SEXP m, SEXP op) {
|
||||||
|
|
||||||
// Strides are the "leading" and "trailing" dimensions of the matricized
|
// get zero indexed mode
|
||||||
// tensor `A` in the following matrix-matrix multiplications
|
const int mode = Rf_asInteger(m) - 1;
|
||||||
|
|
||||||
|
// get dimension attribute of A
|
||||||
|
SEXP dim = Rf_getAttrib(A, R_DimSymbol);
|
||||||
|
|
||||||
|
// operation on `B` (transposed or not)
|
||||||
|
const int trans = Rf_asLogical(op);
|
||||||
|
|
||||||
|
// as well as `B`s dimensions
|
||||||
|
const int nrow = Rf_nrows(B);
|
||||||
|
const int ncol = Rf_ncols(B);
|
||||||
|
|
||||||
|
// validate mode (mode must be smaller than the nr of dimensions)
|
||||||
|
if (mode < 0 || Rf_length(dim) <= mode) {
|
||||||
|
Rf_error("Illegal mode");
|
||||||
|
}
|
||||||
|
|
||||||
|
// and check if B is a matrix of non degenetate size
|
||||||
|
if (!Rf_isMatrix(B)) {
|
||||||
|
Rf_error("Expected a matrix as second argument");
|
||||||
|
}
|
||||||
|
if (!Rf_nrows(B) || !Rf_ncols(B)) {
|
||||||
|
Rf_error("Zero dimension detected");
|
||||||
|
}
|
||||||
|
|
||||||
|
// check matching of dimensions
|
||||||
|
if (INTEGER(dim)[mode] != (trans ? nrow : ncol)) {
|
||||||
|
Rf_error("Dimension missmatch");
|
||||||
|
}
|
||||||
|
|
||||||
|
// calc nr of response elements `prod(dim(A)[-mode]) * ncol(A)` (size of C),
|
||||||
|
int sizeC = 1;
|
||||||
|
// and the strides
|
||||||
// `stride[0] <- prod(dim(A)[seq_len(mode - 1)])`
|
// `stride[0] <- prod(dim(A)[seq_len(mode - 1)])`
|
||||||
// `stride[1] <- dim(A)[mode]`
|
// `stride[1] <- dim(A)[mode]`
|
||||||
// `stride[2] <- prod(dim(A)[-seq_len(mode)])`
|
// `stride[2] <- prod(dim(A)[-seq_len(mode)])`
|
||||||
int stride[3] = {1, dimA[mode], 1};
|
int stride[3] = {1, INTEGER(dim)[mode], 1};
|
||||||
for (int i = 0; i < ordA; ++i) {
|
for (int i = 0; i < Rf_length(dim); ++i) {
|
||||||
stride[0] *= (i < mode) ? dimA[i] : 1;
|
int size = INTEGER(dim)[i];
|
||||||
stride[2] *= (i > mode) ? dimA[i] : 1;
|
// check for non-degenetate dimensions
|
||||||
|
if (!size) {
|
||||||
|
Rf_error("Zero dimension detected");
|
||||||
|
}
|
||||||
|
sizeC *= (i == mode) ? (trans ? ncol : nrow) : size;
|
||||||
|
stride[0] *= (i < mode) ? size : 1;
|
||||||
|
stride[2] *= (i > mode) ? size : 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// create response object C
|
||||||
|
SEXP C = PROTECT(Rf_allocVector(REALSXP, sizeC));
|
||||||
|
|
||||||
|
// raw data access pointers
|
||||||
|
double* a = REAL(A);
|
||||||
|
double* b = REAL(B);
|
||||||
|
double* c = REAL(C);
|
||||||
|
|
||||||
|
// Tensor Times Matrix / Mode Product
|
||||||
|
const double zero = 0.0;
|
||||||
|
const double one = 1.0;
|
||||||
if (mode == 0) {
|
if (mode == 0) {
|
||||||
// mode 1: C = alpha (A x_1 op(B))_(1) + beta C
|
// mode 1: (A x_1 op(B))_(1) = op(B) A_(1) as a single Matrix-Matrix
|
||||||
// = alpha op(B) A_(1) + beta C
|
// multiplication
|
||||||
// as a single Matrix-Matrix multiplication
|
F77_CALL(dgemm)(trans ? "T" : "N", "N",
|
||||||
F77_CALL(dgemm)(transB ? "T" : "N", "N",
|
(trans ? &ncol : &nrow), &stride[2], &stride[1], &one,
|
||||||
(transB ? &ncolB : &nrowB), &stride[2], &stride[1], &alpha,
|
b, &nrow, a, &stride[1],
|
||||||
B, &nrowB, A, &stride[1],
|
&zero, c, (trans ? &ncol : &nrow)
|
||||||
&beta, C, (transB ? &ncolB : &nrowB)
|
|
||||||
FCONE FCONE);
|
FCONE FCONE);
|
||||||
} else {
|
} else {
|
||||||
// Other modes can be written as blocks of matrix multiplications
|
// Other modes can be written as blocks of matrix multiplications
|
||||||
// C_:,:,i2 = alpha (A x_m op(B))_(m)' + beta C_:,:,i2
|
// (A x_m op(B))_(m)' = A_(m)' op(B)'
|
||||||
// = alpha A_(m)' op(B)' + beta C_:,:,i2
|
|
||||||
for (int i2 = 0; i2 < stride[2]; ++i2) {
|
for (int i2 = 0; i2 < stride[2]; ++i2) {
|
||||||
F77_CALL(dgemm)("N", transB ? "N" : "T",
|
F77_CALL(dgemm)("N", trans ? "N" : "T",
|
||||||
&stride[0], (transB ? &ncolB : &nrowB), &stride[1], &alpha,
|
&stride[0], (trans ? &ncol : &nrow), &stride[1], &one,
|
||||||
&A[i2 * stride[0] * stride[1]], &stride[0], B, &nrowB,
|
&a[i2 * stride[0] * stride[1]], &stride[0], b, &nrow,
|
||||||
&beta, &C[i2 * stride[0] * (transB ? ncolB : nrowB)], &stride[0]
|
&zero, &c[i2 * stride[0] * (trans ? ncol : nrow)], &stride[0]
|
||||||
FCONE FCONE);
|
FCONE FCONE);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
// (reference implementation)
|
// (reference implementation)
|
||||||
// Tensor Times Matrix / Mode Product for `op(B) == B`
|
// Tensor Times Matrix / Mode Product for `op(B) == B`
|
||||||
|
@ -58,76 +114,13 @@ void ttm(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Tensor Times Matrix a.k.a. Mode Product
|
|
||||||
*
|
|
||||||
* @param A multi-dimensional array
|
|
||||||
* @param B matrix
|
|
||||||
* @param m mode index (1-indexed)
|
|
||||||
* @param op boolean if `B` is transposed
|
|
||||||
*/
|
|
||||||
extern SEXP R_ttm(SEXP A, SEXP B, SEXP m, SEXP op) {
|
|
||||||
|
|
||||||
// get zero indexed mode
|
|
||||||
const int mode = Rf_asInteger(m) - 1;
|
|
||||||
|
|
||||||
// get dimension attribute of A
|
|
||||||
SEXP dimA = Rf_getAttrib(A, R_DimSymbol);
|
|
||||||
|
|
||||||
// operation on `B` (transposed or not)
|
|
||||||
const int transB = Rf_asLogical(op);
|
|
||||||
|
|
||||||
// as well as `B`s dimensions
|
|
||||||
const int nrowB = Rf_nrows(B);
|
|
||||||
const int ncolB = Rf_ncols(B);
|
|
||||||
|
|
||||||
// validate mode (mode must be smaller than the nr of dimensions)
|
|
||||||
if (mode < 0 || Rf_length(dimA) <= mode) {
|
|
||||||
Rf_error("Illegal mode");
|
|
||||||
}
|
|
||||||
|
|
||||||
// and check if B is a matrix of non degenetate size
|
|
||||||
if (!Rf_isMatrix(B)) {
|
|
||||||
Rf_error("Expected a matrix as second argument");
|
|
||||||
}
|
|
||||||
if (!Rf_nrows(B) || !Rf_ncols(B)) {
|
|
||||||
Rf_error("Zero dimension detected");
|
|
||||||
}
|
|
||||||
|
|
||||||
// check matching of dimensions
|
|
||||||
if (INTEGER(dimA)[mode] != (transB ? nrowB : ncolB)) {
|
|
||||||
Rf_error("Dimension missmatch");
|
|
||||||
}
|
|
||||||
|
|
||||||
// calc nr of response elements (size of C)
|
|
||||||
// `prod(dim(C)) = prod(dim(A)[-mode]) * nrow(if(transB) t(B) else B)`
|
|
||||||
int sizeC = 1;
|
|
||||||
for (int i = 0; i < Rf_length(dimA); ++i) {
|
|
||||||
int size = INTEGER(dimA)[i];
|
|
||||||
// check for non-degenetate dimensions
|
|
||||||
if (!size) {
|
|
||||||
Rf_error("Zero dimension detected");
|
|
||||||
}
|
|
||||||
sizeC *= (i == mode) ? (transB ? ncolB : nrowB) : size;
|
|
||||||
}
|
|
||||||
|
|
||||||
// create response object C
|
|
||||||
SEXP C = PROTECT(Rf_allocVector(REALSXP, sizeC));
|
|
||||||
|
|
||||||
// Tensor Times Matrix / Mode Product
|
|
||||||
ttm(transB, mode,
|
|
||||||
INTEGER(dimA), Rf_length(dimA), nrowB, ncolB,
|
|
||||||
1.0, REAL(A), REAL(B), nrowB, 0.0, REAL(C));
|
|
||||||
|
|
||||||
// finally, set result dimensions
|
// finally, set result dimensions
|
||||||
SEXP dimC = PROTECT(Rf_allocVector(INTSXP, Rf_length(dimA)));
|
SEXP newdim = PROTECT(Rf_allocVector(INTSXP, Rf_length(dim)));
|
||||||
for (int i = 0; i < Rf_length(dimA); ++i) {
|
for (int i = 0; i < Rf_length(dim); ++i) {
|
||||||
INTEGER(dimC)[i] = (i == mode) ? (transB ? ncolB : nrowB) : INTEGER(dimA)[i];
|
INTEGER(newdim)[i] = (i == mode) ? (trans ? ncol : nrow) : INTEGER(dim)[i];
|
||||||
}
|
}
|
||||||
Rf_setAttrib(C, R_DimSymbol, dimC);
|
Rf_setAttrib(C, R_DimSymbol, newdim);
|
||||||
|
|
||||||
// release C to the hands of the garbage collector
|
// release C to the hands of the garbage collector
|
||||||
UNPROTECT(2);
|
UNPROTECT(2);
|
||||||
|
|
|
@ -1,21 +0,0 @@
|
||||||
#ifndef INCLUDE_GUARD_TTM_H
|
|
||||||
#define INCLUDE_GUARD_TTM_H
|
|
||||||
|
|
||||||
#include "R_api.h"
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Tensor Times Matrix Subroutine
|
|
||||||
*
|
|
||||||
* @attention Assumes all parameters to be correct!
|
|
||||||
*/
|
|
||||||
void ttm(
|
|
||||||
/* options */ const int transB, const int mode,
|
|
||||||
/* dims */ const int* dimA, const int ordA, const int nrowB, const int ncolB,
|
|
||||||
/* scalar */ const double alpha,
|
|
||||||
/* tensor */ const double* A,
|
|
||||||
/* matrix */ const double* B, const int ldB,
|
|
||||||
/* scalar */ const double beta,
|
|
||||||
/* tensor */ double* C
|
|
||||||
);
|
|
||||||
|
|
||||||
#endif /* INCLUDE_GUARD_TTM_H */
|
|
Loading…
Reference in New Issue