Compare commits
3 Commits
40132c2565
...
399f878fbb
Author | SHA1 | Date |
---|---|---|
Daniel Kapla | 399f878fbb | |
Daniel Kapla | 6477d78190 | |
Daniel Kapla | 90cd46e209 |
218
LaTeX/GMLM.tex
218
LaTeX/GMLM.tex
|
@ -90,10 +90,11 @@
|
|||
%%% Custom operators with ether one or two arguments (limits)
|
||||
\makeatletter
|
||||
%%% Multi-Linear Multiplication
|
||||
% $\mlm_{k \in [r]}$ or $\mlm_{k = 1}^{r}$ (lower limit MUST be the first!)
|
||||
% Save first argument as \arg@one
|
||||
\def\mlm#1{\def\arg@one{#1}\futurelet\next\mlm@i}
|
||||
\def\mlm_#1{\def\arg@one{#1}\futurelet\next\mlm@i}
|
||||
% Check for second argument
|
||||
\def\mlm@i{\ifx\next\bgroup\expandafter\mlm@two\else\expandafter\mlm@one\fi}
|
||||
\def\mlm@i{\ifx\next^\expandafter\mlm@two\else\expandafter\mlm@one\fi}
|
||||
% specialization for one or two arguments, both versions use saved first argument
|
||||
\def\mlm@one{\mathchoice%
|
||||
{\operatorname*{\scalerel*{\times}{\bigotimes}}_{\makebox[0pt][c]{$\scriptstyle \arg@one$}}}%
|
||||
|
@ -101,14 +102,31 @@
|
|||
{\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}}%
|
||||
{\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}}%
|
||||
}
|
||||
% this commands single argument is the second argument of \mlm
|
||||
\def\mlm@two#1{\mathchoice%
|
||||
% this commands single argument is the second argument of \mlm, it gobbles the `^`
|
||||
\def\mlm@two^#1{\mathchoice%
|
||||
{\operatorname*{\scalerel*{\times}{\bigotimes}}_{\makebox[0pt][c]{$\scriptstyle \arg@one$}}^{\makebox[0pt][c]{$\scriptstyle #1$}}}%
|
||||
{\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}^{#1}}%
|
||||
{\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}^{#1}}%
|
||||
{\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}^{#1}}%
|
||||
}
|
||||
|
||||
%%% Big Circle (Iterated Outer Product)
|
||||
\def\outer#1{\def\arg@one{#1}\futurelet\next\outer@i}
|
||||
\def\outer@i{\ifx\next\bgroup\expandafter\outer@two\else\expandafter\outer@one\fi}
|
||||
\def\outer@one{\mathchoice%
|
||||
{\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\makebox[0pt][c]{$\scriptstyle \arg@one$}}}%
|
||||
{\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}}%
|
||||
{\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}}%
|
||||
{\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}}%
|
||||
}
|
||||
\def\outer@two#1{\mathchoice%
|
||||
{\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\makebox[0pt][c]{$\scriptstyle \arg@one$}}^{\makebox[0pt][c]{$\scriptstyle #1$}}}%
|
||||
{\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}^{#1}}%
|
||||
{\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}^{#1}}%
|
||||
{\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}^{#1}}%
|
||||
}
|
||||
|
||||
|
||||
%%% Big Kronecker Product (with overflowing limits)
|
||||
% Save first argument as \arg@one
|
||||
\def\bigkron#1{\def\arg@one{#1}\futurelet\next\bigkron@i}
|
||||
|
@ -134,17 +152,15 @@
|
|||
\newcommand{\algorithmicbreak}{\textbf{break}}
|
||||
\newcommand{\Break}{\State \algorithmicbreak}
|
||||
|
||||
|
||||
\begin{document}
|
||||
|
||||
\maketitle
|
||||
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\begin{abstract}
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
We consider regression and classification for \textit{general} response and tensor-valued predictors (multi dimensional arrays) and propose a \textit{novel formulation} for sufficient dimension reduction. Assuming the distribution of the tensor-valued predictors given the response is in the quadratic exponential family, we model the natural parameter as a multi-linear function of the respons.
|
||||
This allows per-axis reductions that drastically reduce the total number of parameters for higher order tensor-valued predictors. We derive maximum likelihood estimates for the sufficient dimension reduction and a computationally efficient estimation algorithm which leveraes the tensor structure. The performance of the method is illustrated via simulations and real world examples are provided.
|
||||
We consider regression and classification for \textit{general} response and tensor-valued predictors (multi dimensional arrays) and propose a \textit{novel formulation} for sufficient dimension reduction. Assuming the distribution of the tensor-valued predictors given the response is in the quadratic exponential family, we model the natural parameter as a multi-linear function of the response.
|
||||
This allows per-axis reductions that drastically reduce the total number of parameters for higher order tensor-valued predictors. We derive maximum likelihood estimates for the sufficient dimension reduction and a computationally efficient estimation algorithm which leverages the tensor structure. The performance of the method is illustrated via simulations and real world examples are provided.
|
||||
\end{abstract}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
@ -256,23 +272,23 @@ A straight forward idea for parameter estimation is to use Gradient Descent. For
|
|||
\begin{algorithm}[ht]
|
||||
\caption{\label{alg:NAGD}Nesterov Accelerated Gradient Descent}
|
||||
\begin{algorithmic}[1]
|
||||
\State Objective: $l(\Theta \mid \ten{X}, \ten{F}_y)$
|
||||
\State Objective: $l(\mat{\theta} \mid \ten{X}, \ten{F}_y)$
|
||||
\State Arguments: Order $r + 1$ tensors $\ten{X}$, $\ten{F}$
|
||||
\State Initialize: Parameters $\Theta^{(0)}$, $0 < c, \delta^{(1)}$ and $0 < \gamma < 1$
|
||||
\State Initialize: Parameters $\mat{\theta}^{(0)}$, $0 < c, \delta^{(1)}$ and $0 < \gamma < 1$
|
||||
\\
|
||||
\State $t \leftarrow 1$
|
||||
\Comment{step counter}
|
||||
\State $\mat{\Theta}^{(1)} \leftarrow \mat{\Theta}^{(0)}$
|
||||
\State $\mat{\theta}^{(1)} \leftarrow \mat{\theta}^{(0)}$
|
||||
\Comment{artificial first step}
|
||||
\State $(m^{(0)}, m^{(1)}) \leftarrow (0, 1)$
|
||||
\Comment{momentum extrapolation weights}
|
||||
\\
|
||||
\Repeat \Comment{repeat untill convergence}
|
||||
\State $\mat{M} \leftarrow \mat{\Theta}^{(t)} + \frac{m^{(t - 1)} - 1}{m^{(t)}}(\mat{\Theta}^{(t)} - \mat{\Theta}^{(t - 1)})$ \Comment{momentum extrapolation}
|
||||
\State $\mat{M} \leftarrow \mat{\theta}^{(t)} + \frac{m^{(t - 1)} - 1}{m^{(t)}}(\mat{\theta}^{(t)} - \mat{\theta}^{(t - 1)})$ \Comment{momentum extrapolation}
|
||||
\For{$\delta = \gamma^{-1}\delta^{(t)}, \delta^{(t)}, \gamma\delta^{(t)}, \gamma^2\delta^{(t)}, ...$} \Comment{Line Search}
|
||||
\State $\mat{\Theta}_{\text{temp}} \leftarrow \mat{M} + \delta \nabla_{\mat{\Theta}} l(\mat{M})$
|
||||
\If{$l(\mat{\Theta}_{\text{temp}}) \leq l(\mat{\Theta}^{(t - 1)}) - c \delta \|\nabla_{\mat{\Theta}} l(\mat{M})\|_F^2$} \Comment{Armijo Condition}
|
||||
\State $\mat{\Theta}^{(t + 1)} \leftarrow \mat{\Theta}_{\text{temp}}$
|
||||
\State $\mat{\theta}_{\text{temp}} \leftarrow \mat{M} + \delta \nabla_{\mat{\theta}} l(\mat{M})$
|
||||
\If{$l(\mat{\theta}_{\text{temp}}) \leq l(\mat{\theta}^{(t - 1)}) - c \delta \|\nabla_{\mat{\theta}} l(\mat{M})\|_F^2$} \Comment{Armijo Condition}
|
||||
\State $\mat{\theta}^{(t + 1)} \leftarrow \mat{\theta}_{\text{temp}}$
|
||||
\State $\delta^{(t + 1)} \leftarrow \delta$
|
||||
\Break
|
||||
\EndIf
|
||||
|
@ -399,7 +415,7 @@ $\ten{X}$ is a $2\times 3\times 5$ tensor, $y\in\{1, 2, ..., 6\}$ uniformly dist
|
|||
|
||||
\begin{figure}[!ht]
|
||||
\centering
|
||||
\includegraphics[width = \textwidth]{sim-normal-20221012.png}
|
||||
\includegraphics[width = \textwidth]{images/sim-normal-20221012.png}
|
||||
\caption{\label{fig:sim-normal}Simulation Normal}
|
||||
\end{figure}
|
||||
|
||||
|
@ -407,7 +423,7 @@ $\ten{X}$ is a $2\times 3\times 5$ tensor, $y\in\{1, 2, ..., 6\}$ uniformly dist
|
|||
|
||||
\begin{figure}[!ht]
|
||||
\centering
|
||||
\includegraphics[width = \textwidth]{sim-ising-small-20221012.png}
|
||||
\includegraphics[width = \textwidth]{images/sim-ising-small-20221012.png}
|
||||
\caption{\label{fig:sim-ising-small}Simulation Ising Small}
|
||||
\end{figure}
|
||||
|
||||
|
@ -433,7 +449,7 @@ where each individual block is given by
|
|||
For example $\mathcal{J}_{1,2} = -\frac{\partial l(\Theta)}{\partial\t{(\vec{\overline{\ten{\eta}}_1})}\partial(\vec{\mat{\alpha}_1})}$ and $\mathcal{J}_{2r + 1, 2r + 1} = -\H l(\mat{\Omega}_r)$.
|
||||
We start by restating the log-likelihood for a given single observation $(\ten{X}, \ten{Y})$ where $\ten{F}_y$ given by
|
||||
\begin{displaymath}
|
||||
l(\mat{\Theta}) = \log h(\ten{X}) + c_1\big\langle\overline{\ten{\eta}}_1 + \ten{F}_{y}\mlm{k\in[r]}\mat{\alpha}_k, \ten{X}\big\rangle + c_2\big\langle\ten{X}\mlm{k\in[r]}\mat{\Omega}_k, \ten{X}\big\rangle - b(\mat{\eta}_{y})
|
||||
l(\mat{\Theta}) = \log h(\ten{X}) + c_1\big\langle\overline{\ten{\eta}}_1 + \ten{F}_{y}\mlm_{k\in[r]}\mat{\alpha}_k, \ten{X}\big\rangle + c_2\big\langle\ten{X}\mlm_{k\in[r]}\mat{\Omega}_k, \ten{X}\big\rangle - b(\mat{\eta}_{y})
|
||||
\end{displaymath}
|
||||
with
|
||||
\begin{align*}
|
||||
|
@ -496,10 +512,10 @@ Now we rewrite all the above differentials to extract the derivatives one at a t
|
|||
%
|
||||
\d l(\mat{\Omega}_j) &= c_2\Big(\langle\ten{X}\times_{k\in[r]\backslash j}\mat{\Omega}_k\times_j\d\mat{\Omega}_j, \ten{X}\rangle - \D b(\mat{\eta}_{y,2})\vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big)\Big) \\
|
||||
&= c_2 \t{(\vec{\ten{X}}\otimes\vec{\ten{X}} - (\ten{D}_2)_{([2r])})}\vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big) \\
|
||||
&= c_2 (\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})} \\
|
||||
&= c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\vec{\d\mat{\Omega}_j} \\
|
||||
&= c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\mat{D}_{p_j}\t{\mat{D}_{p_j}}\vec{\d\mat{\Omega}_j} \\
|
||||
&\qquad\Rightarrow \D l(\mat{\Omega}_j) = c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\mat{D}_{p_j}\t{\mat{D}_{p_j}}
|
||||
&= c_2 (\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})} \\
|
||||
&= c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\vec{\d\mat{\Omega}_j} \\
|
||||
&= c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\mat{D}_{p_j}\t{\mat{D}_{p_j}}\vec{\d\mat{\Omega}_j} \\
|
||||
&\qquad\Rightarrow \D l(\mat{\Omega}_j) = c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\mat{D}_{p_j}\t{\mat{D}_{p_j}}
|
||||
\end{align*}}%
|
||||
The next step is to identify the Hessians from the second differentials in a similar manner as befor.
|
||||
{\allowdisplaybreaks\begin{align*}
|
||||
|
@ -509,62 +525,62 @@ The next step is to identify the Hessians from the second differentials in a sim
|
|||
\qquad{\color{gray} (p \times p)}
|
||||
\\
|
||||
&\d^2 l(\overline{\ten{\eta}}_1, \mat{\alpha}_j) \\
|
||||
&= -c_1^2 \t{\vec(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j)}\mat{H}_{1,1}\vec{\d\overline{\ten{\eta}}_1} \\
|
||||
&= -c_1^2 \t{\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)})}\mat{K}_{p,(j)}\mat{H}_{1,1}\vec{\d\overline{\ten{\eta}}_1} \\
|
||||
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})}((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})(\ten{H}_{1,1})_{((j, [r]\backslash j))}\vec{\d\overline{\ten{\eta}}_1} \\
|
||||
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} ( (\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k) \ttt_{[r]\backslash j} \ten{H}_{1,1})_{((2, 1))} \vec{\d\overline{\ten{\eta}}_1} \\
|
||||
&\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\alpha}_j})\t{\partial(\vec{\overline{\ten{\eta}}_1)}}} = -c_1^2 ( (\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k) \ttt_{[r]\backslash j} \ten{H}_{1,1})_{((2, 1))}
|
||||
&= -c_1^2 \t{\vec(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j)}\mat{H}_{1,1}\vec{\d\overline{\ten{\eta}}_1} \\
|
||||
&= -c_1^2 \t{\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)})}\mat{K}_{p,(j)}\mat{H}_{1,1}\vec{\d\overline{\ten{\eta}}_1} \\
|
||||
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})}((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})(\ten{H}_{1,1})_{((j, [r]\backslash j))}\vec{\d\overline{\ten{\eta}}_1} \\
|
||||
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} ( (\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k) \ttt_{[r]\backslash j} \ten{H}_{1,1})_{((2, 1))} \vec{\d\overline{\ten{\eta}}_1} \\
|
||||
&\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\alpha}_j})\t{\partial(\vec{\overline{\ten{\eta}}_1)}}} = -c_1^2 ( (\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k) \ttt_{[r]\backslash j} \ten{H}_{1,1})_{((2, 1))}
|
||||
\qquad{\color{gray} (p_j q_j \times p)}
|
||||
\\
|
||||
&\d^2 l(\overline{\ten{\eta}}_1, \mat{\Omega}_j) \\
|
||||
&= -c_1 c_2 \t{\vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big)}\mat{H}_{2,1}\vec{\d\overline{\ten{\eta}}_1} \\
|
||||
&= -c_1 c_2 \t{\Big[ \t{(\ten{H}_{2,1})_{([2r])}} \vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big) \Big]} \vec{\d\overline{\ten{\eta}}_1} \\
|
||||
&= -c_1 c_2 \t{\vec( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})} )} \vec{\d\overline{\ten{\eta}}_1} \\
|
||||
&= -c_1 c_2 \t{(\vec{\d\mat{\Omega}_j})} ( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} )_{(j)} \vec{\d\overline{\ten{\eta}}_1} \\
|
||||
&\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\Omega}_j})\t{\partial(\vec{\overline{\ten{\eta}}_1)}}} = -c_1 c_2 \mat{D}_{p_j}\t{\mat{D}_{p_j}}( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} )_{(j)}
|
||||
&= -c_1 c_2 \t{\vec( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})} )} \vec{\d\overline{\ten{\eta}}_1} \\
|
||||
&= -c_1 c_2 \t{(\vec{\d\mat{\Omega}_j})} ( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} )_{(j)} \vec{\d\overline{\ten{\eta}}_1} \\
|
||||
&\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\Omega}_j})\t{\partial(\vec{\overline{\ten{\eta}}_1)}}} = -c_1 c_2 \mat{D}_{p_j}\t{\mat{D}_{p_j}}( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} )_{(j)}
|
||||
\qquad{\color{gray} (p_j^2 \times p)}
|
||||
\\
|
||||
&\d^2 l(\mat{\alpha}_j) \\
|
||||
&= -c_1^2 \t{\vec(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j)}\mat{H}_{1,1}\vec(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j) \\
|
||||
&= -c_1^2 \t{\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)})}\mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(j)}}\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}) \\
|
||||
&= -c_1^2 \t{[((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j}]}\mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(j)}}((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j} \\
|
||||
&= -c_1^2 \t{[((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j}]}(\ten{H}_{1,1})_{((j,[r]\backslash j),(j,[r]\backslash j))}((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j} \\
|
||||
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})}[ ((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)\ttt_{[r]\backslash j}\ten{H}_{1,1})\ttt_{[r]\backslash j + 2,[r]\backslash j}(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k) ]_{((2,1))}\vec{\d\mat{\alpha}_j} \\
|
||||
&\qquad\Rightarrow \H l(\mat{\alpha}_j) = -c_1^2 \Big[ \left(\Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big)\ttt_{[r]\backslash j}\ten{H}_{1,1}\right)\ttt_{[r]\backslash j + 2}^{[r]\backslash j}\Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \Big]_{((2,1))}
|
||||
&= -c_1^2 \t{\vec(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j)}\mat{H}_{1,1}\vec(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j) \\
|
||||
&= -c_1^2 \t{\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)})}\mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(j)}}\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}) \\
|
||||
&= -c_1^2 \t{[((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j}]}\mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(j)}}((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j} \\
|
||||
&= -c_1^2 \t{[((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j}]}(\ten{H}_{1,1})_{((j,[r]\backslash j),(j,[r]\backslash j))}((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j} \\
|
||||
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})}[ ((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)\ttt_{[r]\backslash j}\ten{H}_{1,1})\ttt_{[r]\backslash j + 2,[r]\backslash j}(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k) ]_{((2,1))}\vec{\d\mat{\alpha}_j} \\
|
||||
&\qquad\Rightarrow \H l(\mat{\alpha}_j) = -c_1^2 \Big[ \left(\Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big)\ttt_{[r]\backslash j}\ten{H}_{1,1}\right)\ttt_{[r]\backslash j + 2}^{[r]\backslash j}\Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \Big]_{((2,1))}
|
||||
\qquad{\color{gray} (p_j q_j \times p_j q_j)}
|
||||
\\
|
||||
&\d^2 l(\mat{\alpha}_j, \mat{\alpha}_l) \\
|
||||
&\overset{\makebox[0pt]{\scriptsize $j < l$}}{=} -c_1^2 \t{\vec\Bigl(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\Bigr)}\mat{H}_{1,1}\vec\Bigl(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\times_l\d\mat{\alpha}_l\Bigr) \\
|
||||
&\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \vec\Bigl(\ten{F}_y\mlm{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\times_l\d\mat{\alpha}_l\Bigr) \\
|
||||
&= -c_1^2 \t{\vec\biggl( \d\mat{\alpha}_j \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big)_{(j)} \biggr)} \mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(l)}} \vec\biggl( \d\mat{\alpha}_l \Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big)_{(l)} \biggr) \\
|
||||
&\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \t{\mat{K}_{\mat{p},((j,l))}} \vec\biggl( (\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j) \Big( \ten{F}_y\mlm{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k \Big)_{((j,l))} \biggr) \\
|
||||
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big)_{(j)}\otimes\mat{I}_{p_j} \biggr) \mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(l)}} \biggl( \t{\Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big)_{(l)}}\otimes\mat{I}_{p_l} \biggr)\vec{\d\mat{\alpha}_l} \\
|
||||
&\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \t{\mat{K}_{\mat{p},((j,l))}} \biggl( \t{\Big( \ten{F}_y\mlm{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k \Big)_{((j,l))}}\otimes\mat{I}_{p_j p_l} \biggr) \vec{(\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j)} \\
|
||||
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big[ \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \vec{\d\mat{\alpha}_l} \\
|
||||
&\qquad + c_1 \vec\biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm{k\neq j,l}\mat{\alpha}_k \Big) \biggr) \vec{(\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j)} \\
|
||||
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big[ \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \vec{\d\mat{\alpha}_l} \\
|
||||
&\qquad + c_1 \t{(\vec{\d\mat{\alpha}_j})} \biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm{k\neq j,l}\mat{\alpha}_k \Big) \biggr)_{((1,3))} \vec{\d\mat{\alpha}_l} \\
|
||||
&\overset{\makebox[0pt]{\scriptsize $j < l$}}{=} -c_1^2 \t{\vec\Bigl(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\Bigr)}\mat{H}_{1,1}\vec\Bigl(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\times_l\d\mat{\alpha}_l\Bigr) \\
|
||||
&\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \vec\Bigl(\ten{F}_y\mlm_{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\times_l\d\mat{\alpha}_l\Bigr) \\
|
||||
&= -c_1^2 \t{\vec\biggl( \d\mat{\alpha}_j \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big)_{(j)} \biggr)} \mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(l)}} \vec\biggl( \d\mat{\alpha}_l \Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big)_{(l)} \biggr) \\
|
||||
&\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \t{\mat{K}_{\mat{p},((j,l))}} \vec\biggl( (\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j) \Big( \ten{F}_y\mlm_{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k \Big)_{((j,l))} \biggr) \\
|
||||
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big)_{(j)}\otimes\mat{I}_{p_j} \biggr) \mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(l)}} \biggl( \t{\Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big)_{(l)}}\otimes\mat{I}_{p_l} \biggr)\vec{\d\mat{\alpha}_l} \\
|
||||
&\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \t{\mat{K}_{\mat{p},((j,l))}} \biggl( \t{\Big( \ten{F}_y\mlm_{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k \Big)_{((j,l))}}\otimes\mat{I}_{p_j p_l} \biggr) \vec{(\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j)} \\
|
||||
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big[ \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \vec{\d\mat{\alpha}_l} \\
|
||||
&\qquad + c_1 \vec\biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm_{k\neq j,l}\mat{\alpha}_k \Big) \biggr) \vec{(\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j)} \\
|
||||
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big[ \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \vec{\d\mat{\alpha}_l} \\
|
||||
&\qquad + c_1 \t{(\vec{\d\mat{\alpha}_j})} \biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm_{k\neq j,l}\mat{\alpha}_k \Big) \biggr)_{((1,3))} \vec{\d\mat{\alpha}_l} \\
|
||||
&\qquad \begin{aligned}
|
||||
\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\alpha}_j})\t{\partial(\vec{\mat{\alpha}_l})}} &=
|
||||
-c_1^2 \biggl( \Big[ \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \\
|
||||
&\qquad + c_1 \biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm{k\neq j,l}\mat{\alpha}_k \Big) \biggr)_{((1,3) + [[j > l]])}
|
||||
-c_1^2 \biggl( \Big[ \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \\
|
||||
&\qquad + c_1 \biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm_{k\neq j,l}\mat{\alpha}_k \Big) \biggr)_{((1,3) + [[j > l]])}
|
||||
\qquad{\color{gray} (p_j q_j \times p_l q_l)}
|
||||
\end{aligned}
|
||||
\\
|
||||
&\d^2 l(\mat{\alpha}_j, \mat{\Omega}_l) \\
|
||||
&= -c_1 c_2 \t{\vec\Bigl(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\Bigr)} \mat{H}_{1,2} \vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr) \\
|
||||
&= -c_1 c_2 \t{\vec\biggl(\d\mat{\alpha}_j\Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big)_{(j)}\biggr)}\mat{K}_{\mat{p},(j)} \t{(\ten{H}_{2,1})_{([2r])}} \vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr) \\
|
||||
&= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl(\t{\Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big)_{(j)}}\otimes\mat{I}_{p_j}\biggr) \mat{K}_{\mat{p},(j)} \vec\Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm{k\neq l}\t{(\vec{\mat{\Omega}_k})}\times_l\t{(\vec{\d\mat{\Omega}_l})}\Bigr) \\
|
||||
&= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl(\t{\Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big)_{(j)}}\otimes\mat{I}_{p_j}\biggr) \mat{K}_{\mat{p},(j)} \t{\Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr)_{([r])}}\vec{\d\mat{\Omega}_l} \\
|
||||
&= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl( \Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{(r + 2, 1)} \vec{\d\mat{\Omega}_l} \\
|
||||
&\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\alpha}_j})\t{\partial(\vec{\mat{\Omega}_l})}} = -c_1 c_2 \biggl( \Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{(r + 2, 1)}\mat{D}_{p_l}\t{\mat{D}_{p_l}}
|
||||
&= -c_1 c_2 \t{\vec\Bigl(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\Bigr)} \mat{H}_{1,2} \vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr) \\
|
||||
&= -c_1 c_2 \t{\vec\biggl(\d\mat{\alpha}_j\Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big)_{(j)}\biggr)}\mat{K}_{\mat{p},(j)} \t{(\ten{H}_{2,1})_{([2r])}} \vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr) \\
|
||||
&= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl(\t{\Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big)_{(j)}}\otimes\mat{I}_{p_j}\biggr) \mat{K}_{\mat{p},(j)} \vec\Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm_{k\neq l}\t{(\vec{\mat{\Omega}_k})}\times_l\t{(\vec{\d\mat{\Omega}_l})}\Bigr) \\
|
||||
&= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl(\t{\Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big)_{(j)}}\otimes\mat{I}_{p_j}\biggr) \mat{K}_{\mat{p},(j)} \t{\Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm_{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr)_{([r])}}\vec{\d\mat{\Omega}_l} \\
|
||||
&= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl( \Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm_{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{(r + 2, 1)} \vec{\d\mat{\Omega}_l} \\
|
||||
&\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\alpha}_j})\t{\partial(\vec{\mat{\Omega}_l})}} = -c_1 c_2 \biggl( \Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm_{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{(r + 2, 1)}\mat{D}_{p_l}\t{\mat{D}_{p_l}}
|
||||
% \qquad {\color{gray} (p_j q_j \times p_l^2)}
|
||||
\\
|
||||
&\d^2 l(\mat{\Omega}_j) \\
|
||||
&= -c_2^2 \t{\vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr)} \t{(\ten{H}_{2,2})_{([2r],[2r]+2r)}} \vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr) \\
|
||||
&= -c_2^2 \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})}\times_{j + r}\t{(\vec{\d\mat{\Omega}_j})} \\
|
||||
&= -c_2^2 \t{(\vec{\d\mat{\Omega}_j})} \biggl( \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})} \biggr)_{([r])} \vec{\d\mat{\Omega}_j} \\
|
||||
&\qquad\Rightarrow \H l(\mat{\Omega}_j) = -c_2^2 \mat{D}_{p_j}\t{\mat{D}_{p_j}}\biggl( \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})} \biggr)_{([r])}\mat{D}_{p_j}\t{\mat{D}_{p_j}}
|
||||
&= -c_2^2 \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm_{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})}\times_{j + r}\t{(\vec{\d\mat{\Omega}_j})} \\
|
||||
&= -c_2^2 \t{(\vec{\d\mat{\Omega}_j})} \biggl( \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm_{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})} \biggr)_{([r])} \vec{\d\mat{\Omega}_j} \\
|
||||
&\qquad\Rightarrow \H l(\mat{\Omega}_j) = -c_2^2 \mat{D}_{p_j}\t{\mat{D}_{p_j}}\biggl( \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm_{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})} \biggr)_{([r])}\mat{D}_{p_j}\t{\mat{D}_{p_j}}
|
||||
%\qquad {\color{gray} (p_j^2 \times p_j^2)}
|
||||
\\
|
||||
&\d^2 l(\mat{\Omega}_j, \mat{\Omega}_l) \\
|
||||
|
@ -573,13 +589,13 @@ The next step is to identify the Hessians from the second differentials in a sim
|
|||
&\qquad\qquad - c_2 \D b(\mat{\eta}_{y,2})\vec\!\Big(\bigotimes_{k = r}^{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_{l}\otimes\bigotimes_{k = l - 1}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_{j}\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big) \\
|
||||
&= c_2 \t{(\vec{\ten{X}}\otimes\vec{\ten{X}} - (\ten{D}_2)_{([2r])})} \vec\Bigl(\bigotimes_{k = r}^{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_{l}\otimes\bigotimes_{k = l - 1}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_{j}\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Bigr) \\
|
||||
&\qquad - c_2^2 \t{\vec\!\Big(\bigotimes_{k = r}^{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigotimes_{k=l-1}^{1}\mat{\Omega}_k\Big)}\t{(\ten{H}_{2,2})_{([2r],[2r]+2r)}}\vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big) \\
|
||||
&= c_2 (\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \times_j \t{(\vec{\d\mat{\Omega}_j})} \times_l \t{(\vec{\d\mat{\Omega}_l})} \\
|
||||
&\qquad - c_2^2 \ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})} \times_j \t{(\vec{\d\mat{\Omega}_j})} \times_l \t{(\vec{\d\mat{\Omega}_l})} \\
|
||||
&= c_2 \t{(\vec{\d\mat{\Omega}_j})}\Big((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)}\vec{\d\mat{\Omega}_l} \\
|
||||
&\qquad - c_2^2 \t{(\vec{\d\mat{\Omega}_j})}\Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)}\vec{\d\mat{\Omega}_l} \\
|
||||
&= c_2 (\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm_{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \times_j \t{(\vec{\d\mat{\Omega}_j})} \times_l \t{(\vec{\d\mat{\Omega}_l})} \\
|
||||
&\qquad - c_2^2 \ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm_{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm_{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})} \times_j \t{(\vec{\d\mat{\Omega}_j})} \times_l \t{(\vec{\d\mat{\Omega}_l})} \\
|
||||
&= c_2 \t{(\vec{\d\mat{\Omega}_j})}\Big((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm_{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)}\vec{\d\mat{\Omega}_l} \\
|
||||
&\qquad - c_2^2 \t{(\vec{\d\mat{\Omega}_j})}\Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm_{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm_{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)}\vec{\d\mat{\Omega}_l} \\
|
||||
&\qquad \begin{aligned}\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\Omega}_j})\t{\partial(\vec{\mat{\Omega}_l})}} &=
|
||||
\mat{D}_{p_j}\t{\mat{D}_{p_j}}\Big[c_2\Big((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)} \\
|
||||
&\qquad -c_2^2 \Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)}\Big]\mat{D}_{p_l}\t{\mat{D}_{p_l}}
|
||||
\mat{D}_{p_j}\t{\mat{D}_{p_j}}\Big[c_2\Big((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm_{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)} \\
|
||||
&\qquad -c_2^2 \Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm_{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm_{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)}\Big]\mat{D}_{p_l}\t{\mat{D}_{p_l}}
|
||||
% \qquad {\color{gray} (p_j^2 \times p_l^2)}
|
||||
\end{aligned}
|
||||
\end{align*}}%
|
||||
|
@ -612,15 +628,15 @@ and for every block holds $\mathcal{I}_{j, l} = \t{\mathcal{I}_{l, j}}$. The ind
|
|||
\begin{align*}
|
||||
\mathcal{I}_{1,1} &= c_1^2 (\ten{H}_{1,1})_{([r])} \\
|
||||
\mathcal{I}_{1,j+1} % = \E\partial_{\vec{\overline{\ten{\eta}}_1}}\partial_{\t{(\vec{\mat{\alpha}_j})}} l(\mat{\Theta})\mid \ten{Y} = y
|
||||
&= c_1^2 \Big[\Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1}\Big]_{((2, 1))} \\
|
||||
&= c_1^2 \Big[\Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1}\Big]_{((2, 1))} \\
|
||||
\mathcal{I}_{1,j+r+1}
|
||||
&= c_1 c_2 \Big( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)} \\
|
||||
&= c_1 c_2 \Big( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)} \\
|
||||
\mathcal{I}_{j+1,l+1}
|
||||
&= c_1^2 \biggl( \Big[ \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \\
|
||||
&= c_1^2 \biggl( \Big[ \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \\
|
||||
\mathcal{I}_{j+1,l+r+1}
|
||||
&= c_1 c_2 \biggl( \Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{((r + 2, 1))} \\
|
||||
&= c_1 c_2 \biggl( \Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm_{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{((r + 2, 1))} \\
|
||||
\mathcal{I}_{j+r+1,l+r+1}
|
||||
&= c_2^2 \Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)}
|
||||
&= c_2^2 \Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm_{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm_{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)}
|
||||
\end{align*}
|
||||
|
||||
|
||||
|
@ -633,14 +649,14 @@ The \emph{matricization} is a generalization of the \emph{vectorization} operati
|
|||
\begin{theorem}\label{thm:mlm_mat}
|
||||
Let $\ten{A}$ be a tensor of order $r$ with the dimensions $q_1\times ... \times q_r$. Furthermore, let for $k = 1, ..., r$ be $\mat{B}_k$ matrices of dimensions $p_k\times q_k$. Then, for any $(\mat{i}, \mat{j})\in\perm{r}$ holds
|
||||
\begin{displaymath}
|
||||
\Big(\ten{A}\mlm{k\in[r]}\mat{B}_k\Big)_{(\mat{i}, \mat{j})}
|
||||
\Big(\ten{A}\mlm_{k\in[r]}\mat{B}_k\Big)_{(\mat{i}, \mat{j})}
|
||||
= \Big(\bigotimes_{k = \len{\mat{i}}}^{1}\mat{B}_{\mat{i}_k}\Big) \ten{A}_{(\mat{i}, \mat{j})} \Big(\bigotimes_{k = \len{\mat{j}}}^{1}\t{\mat{B}_{\mat{j}_k}}\Big).
|
||||
\end{displaymath}
|
||||
\end{theorem}
|
||||
|
||||
A well known special case of Theorem~\ref{thm:mlm_mat} is the relation between vectorization and the Kronecker product
|
||||
\begin{displaymath}
|
||||
\vec(\mat{B}_1\mat{A}\t{\mat{B}_2}) = (\mat{B}_2\otimes\mat{B}_1)\vec{A}.
|
||||
\vec(\mat{B}_1\mat{A}\t{\mat{B}_2}) = (\mat{B}_2\otimes\mat{B}_1)\vec{\mat{A}}.
|
||||
\end{displaymath}
|
||||
Here we have a matrix, a.k.a. an order 2 tensor, and the vectorization as a special case of the matricization $\vec{\mat{A}} = \mat{A}_{((1, 2))}$ with $(\mat{i}, \mat{j}) = ((1, 2), ())\in\perm{2}$. Note that the empty Kronecker product is $1$ by convention.
|
||||
|
||||
|
@ -713,13 +729,37 @@ The operation $\ten{R}_{\mat{i}}(\ten{A})$ results in a tensor of order $r + s$
|
|||
Let $\ten{A}$ be a $2 r + s$ tensor where $r > 0$ and $s \geq 0$ of dimensions $q_1\times ... \times q_{2 r + s}$. Furthermore, let $(\mat{i}, \mat{j})\in\perm{2 r + s}$ such that $\len{\mat{i}} = 2 r$ and for $k = 1, ..., r$ denote with $\mat{B}_k$ matrices of dimensions $q_{\mat{i}_{k}}\times q_{\mat{i}_{r + k}}$, then
|
||||
\begin{displaymath}
|
||||
\t{\ten{A}_{(\mat{i})}}\vec{\bigotimes_{k = r}^{1}}\mat{B}_k
|
||||
\equiv \ten{R}_{\mat{i}}(\ten{A})\times_{k\in[r]}\t{(\vec{\mat{B}_k})}.
|
||||
\equiv \ten{R}_{\mat{i}}(\ten{A})\mlm_{k = 1}^r\t{(\vec{\mat{B}_k})}.
|
||||
\end{displaymath}
|
||||
\end{theorem}
|
||||
|
||||
A special case of above Theorem is given for tensors represented as a Kronecker product. Therefore, let $\mat{A}_k, \mat{B}_k$ be arbitrary matrices of size $p_k\times q_k$ for $k = 1, ..., r$ and $\ten{A} = \reshape{(\mat{p}, \mat{q})}\bigotimes_{k = r}^{1}\mat{A}_k$. Then Theorem~\ref{thm:mtvk_rearrange} specializes to
|
||||
\begin{displaymath}
|
||||
\t{\Big( \vec\bigotimes_{k = r}^{1}\mat{A}_k \Big)}\Big( \vec\bigotimes_{k = r}^{1}\mat{B}_k \Big)
|
||||
=
|
||||
\prod_{k = 1}^{r}\tr(\t{\mat{A}_k}\mat{B}_k)
|
||||
=
|
||||
\Big( \outer{k = 1}{r}\vec\mat{A}_k \Big)\mlm_{k = 1}^r \t{(\vec\mat{B}_k)}.
|
||||
\end{displaymath}
|
||||
In case of $r = 2$ this means
|
||||
\begin{align*}
|
||||
\t{\vec(\mat{A}_1\otimes \mat{A}_2)}\vec(\mat{B}_1\otimes \mat{B}_2)
|
||||
&= \t{(\vec{\mat{B}_1})}(\vec{\mat{A}_1})\t{(\vec{\mat{A}_2})}(\vec{\mat{B}_2}) \\
|
||||
&= [(\vec{\mat{A}_1})\circ(\vec{\mat{A}_2})]\times_1\t{(\vec{\mat{B}_1})}\times_2\t{(\vec{\mat{B}_2})}.
|
||||
\end{align*}
|
||||
|
||||
Another interesting special case is for two tensors $\ten{A}_1, \ten{A}_2$ of the same order
|
||||
\begin{displaymath}
|
||||
\t{(\vec{\ten{A}_1}\otimes\vec{\ten{A}_2})}\vec{\bigotimes_{k = r}^{1}\mat{B}_k}
|
||||
= (\ten{A}_1\otimes\ten{A}_2)\mlm_{k = 1}^r\t{(\vec{\mat{B}_k})}
|
||||
\end{displaymath}
|
||||
which uses the relation $\ten{R}_{[2r]}^{(\mat{p}, \mat{q})}(\vec{\ten{A}_1}\otimes\vec{\ten{A}_2}) = \ten{A}_1\otimes\ten{A}_2$ .
|
||||
|
||||
\todo{continue}
|
||||
|
||||
|
||||
|
||||
|
||||
% Next we define a specific axis permutation and reshaping operation on tensors which will be helpful in expressing some derivatives. Let $\ten{A}$ be a $2 r + s$ tensor with $r > 0$ and $s\geq 0$ of dimensions $p_1\times ... \times p_{2 r + s}$. Furthermore, let $(\mat{i}, \mat{j})\in\perm{2 r + s}$ such that $\len{\mat{i}} = 2 r$. The operation $\ten{R}_{\mat{i}}$ is defined as
|
||||
% \begin{displaymath}
|
||||
% \ten{R}_{\mat{i}} = \reshape{(p_1 p_{r + 1}, ..., p_r p_{2 r}, p_{2 r + 1}, ..., p_{r + s})}(\ten{A}_{(\pi(\mat{i}))})
|
||||
|
@ -880,19 +920,19 @@ The operation $\ten{R}_{\mat{i}}(\ten{A})$ results in a tensor of order $r + s$
|
|||
\end{tikzpicture}
|
||||
\end{center}
|
||||
|
||||
\newcommand{\somedrawing} {
|
||||
\coordinate (a) at (-2,-2,-2);
|
||||
\coordinate (b) at (-2,-2,2);
|
||||
\coordinate (c) at (-2,2,-2);
|
||||
\coordinate (d) at (-2,2,2);
|
||||
\coordinate (e) at (2,-2,-2);
|
||||
\coordinate (f) at (2,-2,2);
|
||||
\coordinate (g) at (2,2,-2);
|
||||
\coordinate (h) at (2,2,2);
|
||||
\draw (a)--(b) (a)--(c) (a)--(e) (b)--(d) (b)--(f) (c)--(d) (c)--(g) (d)--(h) (e)--(f) (e)--(g) (f)--(h) (g)--(h);
|
||||
\fill (a) circle (0.1cm);
|
||||
\fill (d) ++(0.1cm,0.1cm) rectangle ++(-0.2cm,-0.2cm);
|
||||
}
|
||||
% \newcommand{\somedrawing}{
|
||||
% \coordinate (a) at (-2,-2,-2);
|
||||
% \coordinate (b) at (-2,-2,2);
|
||||
% \coordinate (c) at (-2,2,-2);
|
||||
% \coordinate (d) at (-2,2,2);
|
||||
% \coordinate (e) at (2,-2,-2);
|
||||
% \coordinate (f) at (2,-2,2);
|
||||
% \coordinate (g) at (2,2,-2);
|
||||
% \coordinate (h) at (2,2,2);
|
||||
% \draw (a)--(b) (a)--(c) (a)--(e) (b)--(d) (b)--(f) (c)--(d) (c)--(g) (d)--(h) (e)--(f) (e)--(g) (f)--(h) (g)--(h);
|
||||
% \fill (a) circle (0.1cm);
|
||||
% \fill (d) ++(0.1cm,0.1cm) rectangle ++(-0.2cm,-0.2cm);
|
||||
% }
|
||||
|
||||
% \begin{figure}[ht!]
|
||||
% \centering
|
||||
|
@ -1153,7 +1193,7 @@ where $\circ$ is the outer product. For example considure two matrices $\mat{A},
|
|||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
Let $f$ be an $r$ times differentiable function, then
|
||||
\begin{displaymath}
|
||||
\d^r f(\mat{X}) = \ten{F}(\mat{X})\mlm{k = 1}{r} \vec{\d\mat{X}}
|
||||
\d^r f(\mat{X}) = \ten{F}(\mat{X})\mlm_{k = 1}^{r} \vec{\d\mat{X}}
|
||||
\qquad\Leftrightarrow\qquad
|
||||
\D^r f(\mat{X}) \equiv \frac{1}{r!}\sum_{\sigma\in\perm{r}}\ten{F}(\mat{X})_{(\sigma)}
|
||||
\end{displaymath}
|
||||
|
@ -1372,12 +1412,12 @@ The differentials up to the 4'th are
|
|||
\begin{align*}
|
||||
\d M(t) &= M(t) \t{(\mu + \Sigma t)} \d{t} \\
|
||||
\d^2 M(t) &= \t{\d{t}} M(t) (\mu + \Sigma t)\t{(\mu + \Sigma t)} \d{t} \\
|
||||
\d^3 M(t) &= M(t) (\mu + \Sigma t)\circ [(\mu + \Sigma t)\circ (\mu + \Sigma t) + 3\Sigma]\mlm{k = 1}{3} \d{t} \\
|
||||
\d^4 M(t) &= M(t) (\mu + \Sigma t)\circ(\mu + \Sigma t)\circ[(\mu + \Sigma t)\circ(\mu + \Sigma t) + 6\Sigma)]\mlm{k = 1}{4} \d{t}
|
||||
\d^3 M(t) &= M(t) (\mu + \Sigma t)\circ [(\mu + \Sigma t)\circ (\mu + \Sigma t) + 3\Sigma]\mlm_{k = 1}^{3} \d{t} \\
|
||||
\d^4 M(t) &= M(t) (\mu + \Sigma t)\circ(\mu + \Sigma t)\circ[(\mu + \Sigma t)\circ(\mu + \Sigma t) + 6\Sigma)]\mlm_{k = 1}^{4} \d{t}
|
||||
\end{align*}
|
||||
Using the differentials to derivative identification identity
|
||||
\begin{displaymath}
|
||||
\d^m f(t) = \ten{F}(t)\mlm{k = 1}{m}\d{t}
|
||||
\d^m f(t) = \ten{F}(t)\mlm_{k = 1}^{m}\d{t}
|
||||
\qquad\Leftrightarrow\qquad
|
||||
\D^m f(t) \equiv \frac{1}{m!}\sum_{\sigma\in\mathfrak{S}_m}\ten{F}(t)_{(\sigma)}
|
||||
\end{displaymath}
|
||||
|
@ -1388,7 +1428,7 @@ in conjunction with simplifications gives the first four raw moments by evaluati
|
|||
M_3 = \D^3 M(t)|_{t = 0} &= \mu\circ\mu\circ\mu + \mu\circ\Sigma + (\mu\circ\Sigma)_{((2), (1), (3))} + \Sigma\circ\mu \\
|
||||
M_4 = \D^4 M(t)|_{t = 0} &\equiv \frac{1}{4!}\sum_{\sigma\in\mathfrak{S}_4} (\mu\circ\mu\circ\Sigma + \Sigma\circ\Sigma + \Sigma\circ\mu\circ\mu)_{(\sigma)}
|
||||
\end{align*}
|
||||
which leads to the centered moments (which are also the covariances of the sufficient statistic $t(X)$)
|
||||
which leads to the centered moments (which are also the covariance of the sufficient statistic $t(X)$)
|
||||
\begin{align*}
|
||||
H_{1,1} &= \cov(t_1(X)\mid Y = y) \\
|
||||
&= \Sigma \\
|
||||
|
|
153
LaTeX/main.bib
153
LaTeX/main.bib
|
@ -9,6 +9,17 @@
|
|||
publisher = {[Royal Statistical Society, Wiley]}
|
||||
}
|
||||
|
||||
@inproceedings{Nesterov1983,
|
||||
title = {A method of solving a convex programming problem with convergence rate $O(1/k^2)$},
|
||||
author = {Nesterov, Yurii Evgen'evich},
|
||||
booktitle = {Doklady Akademii Nauk},
|
||||
volume = {269},
|
||||
number = {3},
|
||||
pages = {543--547},
|
||||
year = {1983},
|
||||
organization= {Russian Academy of Sciences}
|
||||
}
|
||||
|
||||
@book{StatInf-CasellaBerger2002,
|
||||
title = {{Statistical Inference}},
|
||||
author = {Casella, George and Berger, Roger L.},
|
||||
|
@ -27,6 +38,20 @@
|
|||
isbn = {0-471-98632-1}
|
||||
}
|
||||
|
||||
@article{SymMatandJacobians-MagnusNeudecker1986,
|
||||
title = {Symmetry, 0-1 Matrices and Jacobians: A Review},
|
||||
author = {Magnus, Jan R. and Neudecker, Heinz},
|
||||
ISSN = {02664666, 14694360},
|
||||
URL = {http://www.jstor.org/stable/3532421},
|
||||
journal = {Econometric Theory},
|
||||
number = {2},
|
||||
pages = {157--190},
|
||||
publisher = {Cambridge University Press},
|
||||
urldate = {2023-10-03},
|
||||
volume = {2},
|
||||
year = {1986}
|
||||
}
|
||||
|
||||
@book{MatrixAlgebra-AbadirMagnus2005,
|
||||
title = {Matrix Algebra},
|
||||
author = {Abadir, Karim M. and Magnus, Jan R.},
|
||||
|
@ -83,6 +108,31 @@
|
|||
doi = {10.1080/01621459.2015.1093944}
|
||||
}
|
||||
|
||||
@article{FisherLectures-Cook2007,
|
||||
author = {Cook, R. Dennis},
|
||||
journal = {Statistical Science},
|
||||
month = {02},
|
||||
number = {1},
|
||||
pages = {1--26},
|
||||
publisher = {The Institute of Mathematical Statistics},
|
||||
title = {{Fisher Lecture: Dimension Reduction in Regression}},
|
||||
volume = {22},
|
||||
year = {2007},
|
||||
doi = {10.1214/088342306000000682}
|
||||
}
|
||||
|
||||
@article{asymptoticMLE-BuraEtAl2018,
|
||||
author = {Bura, Efstathia and Duarte, Sabrina and Forzani, Liliana and E. Smucler and M. Sued},
|
||||
title = {Asymptotic theory for maximum likelihood estimates in reduced-rank multivariate generalized linear models},
|
||||
journal = {Statistics},
|
||||
volume = {52},
|
||||
number = {5},
|
||||
pages = {1005-1024},
|
||||
year = {2018},
|
||||
publisher = {Taylor \& Francis},
|
||||
doi = {10.1080/02331888.2018.1467420},
|
||||
}
|
||||
|
||||
@article{tsir-DingCook2015,
|
||||
author = {Shanshan Ding and R. Dennis Cook},
|
||||
title = {Tensor sliced inverse regression},
|
||||
|
@ -117,3 +167,106 @@
|
|||
isbn = {978-94-015-8196-7},
|
||||
doi = {10.1007/978-94-015-8196-7_17}
|
||||
}
|
||||
|
||||
@book{asymStats-van_der_Vaart1998,
|
||||
title = {Asymptotic Statistics},
|
||||
author = {{van der Vaart}, A.W.},
|
||||
series = {Asymptotic Statistics},
|
||||
year = {1998},
|
||||
publisher = {Cambridge University Press},
|
||||
series = {Cambridge Series in Statistical and Probabilistic Mathematics},
|
||||
isbn = {0-521-49603-9}
|
||||
}
|
||||
|
||||
@book{measureTheory-Kusolitsch2011,
|
||||
title = {{M}a\ss{}- und {W}ahrscheinlichkeitstheorie},
|
||||
subtitle = {{E}ine {E}inf{\"u}hrung},
|
||||
author = {Kusolitsch, Norbert},
|
||||
series = {Springer-Lehrbuch},
|
||||
year = {2011},
|
||||
publisher = {Springer Vienna},
|
||||
isbn = {978-3-7091-0684-6},
|
||||
doi = {10.1007/978-3-7091-0685-3}
|
||||
}
|
||||
|
||||
@book{optimMatrixMani-AbsilEtAl2007,
|
||||
title = {{Optimization Algorithms on Matrix Manifolds}},
|
||||
author = {Absil, P.-A. and Mahony, R. and Sepulchre, R.},
|
||||
year = {2007},
|
||||
publisher = {Princeton University Press},
|
||||
isbn = {9780691132983},
|
||||
note = {Full Online Text \url{https://press.princeton.edu/absil}}
|
||||
}
|
||||
|
||||
@Inbook{geomMethodsOnLowRankMat-Uschmajew2020,
|
||||
author = {Uschmajew, Andr{\'e} and Vandereycken, Bart},
|
||||
editor = {Grohs, Philipp and Holler, Martin and Weinmann, Andreas},
|
||||
title = {Geometric Methods on Low-Rank Matrix and Tensor Manifolds},
|
||||
bookTitle = {Handbook of Variational Methods for Nonlinear Geometric Data},
|
||||
year = {2020},
|
||||
publisher = {Springer International Publishing},
|
||||
address = {Cham},
|
||||
pages = {261--313},
|
||||
isbn = {978-3-030-31351-7},
|
||||
doi = {10.1007/978-3-030-31351-7_9}
|
||||
}
|
||||
|
||||
@book{introToSmoothMani-Lee2012,
|
||||
title = {Introduction to Smooth Manifolds},
|
||||
author = {Lee, John M.},
|
||||
year = {2012},
|
||||
journal = {Graduate Texts in Mathematics},
|
||||
publisher = {Springer New York},
|
||||
doi = {10.1007/978-1-4419-9982-5}
|
||||
}
|
||||
|
||||
@book{introToRiemannianMani-Lee2018,
|
||||
title = {Introduction to Riemannian Manifolds},
|
||||
author = {Lee, John M.},
|
||||
year = {2018},
|
||||
journal = {Graduate Texts in Mathematics},
|
||||
publisher = {Springer International Publishing},
|
||||
doi = {10.1007/978-3-319-91755-9}
|
||||
}
|
||||
|
||||
@misc{MLEonManifolds-HajriEtAl2017,
|
||||
title = {Maximum Likelihood Estimators on Manifolds},
|
||||
author = {Hajri, Hatem and Said, Salem and Berthoumieu, Yannick},
|
||||
year = {2017},
|
||||
journal = {Lecture Notes in Computer Science},
|
||||
publisher = {Springer International Publishing},
|
||||
pages = {692-700},
|
||||
doi = {10.1007/978-3-319-68445-1_80}
|
||||
}
|
||||
|
||||
@article{relativity-Einstain1916,
|
||||
author = {Einstein, Albert},
|
||||
title = {Die Grundlage der allgemeinen Relativitätstheorie},
|
||||
year = {1916},
|
||||
journal = {Annalen der Physik},
|
||||
volume = {354},
|
||||
number = {7},
|
||||
pages = {769-822},
|
||||
doi = {10.1002/andp.19163540702}
|
||||
}
|
||||
|
||||
@article{MultilinearOperators-Kolda2006,
|
||||
title = {Multilinear operators for higher-order decompositions.},
|
||||
author = {Kolda, Tamara Gibson},
|
||||
doi = {10.2172/923081},
|
||||
url = {https://www.osti.gov/biblio/923081},
|
||||
place = {United States},
|
||||
year = {2006},
|
||||
month = {4},
|
||||
type = {Technical Report}
|
||||
}
|
||||
|
||||
@book{aufbauAnalysis-kaltenbaeck2021,
|
||||
title = {Aufbau Analysis},
|
||||
author = {Kaltenb\"ack, Michael},
|
||||
isbn = {978-3-88538-127-3},
|
||||
series = {Berliner Studienreihe zur Mathematik},
|
||||
edition = {27},
|
||||
year = {2021},
|
||||
publisher = {Heldermann Verlag}
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,151 @@
|
|||
%%% R code to generate the input data files from corresponding simulation logs
|
||||
% R> setwd("~/Work/tensorPredictors")
|
||||
% R>
|
||||
% R> for (sim.name in c("2a")) {
|
||||
% R> pattern <- paste0("sim\\_", sim.name, "\\_ising\\-[0-9T]*\\.csv")
|
||||
% R> log.file <- sort(
|
||||
% R> list.files(path = "sim/", pattern = pattern, full.names = TRUE),
|
||||
% R> decreasing = TRUE
|
||||
% R> )[[1]]
|
||||
% R>
|
||||
% R> sim <- read.csv(log.file)
|
||||
% R>
|
||||
% R> aggr <- aggregate(sim[, names(sim) != "sample.size"], list(sample.size = sim$sample.size), mean)
|
||||
% R>
|
||||
% R> write.table(aggr, file = paste0("LaTeX/plots/aggr-", sim.name, "-ising.csv"), row.names = FALSE, quote = FALSE)
|
||||
% R> }
|
||||
\documentclass[border=0cm]{standalone}
|
||||
|
||||
\usepackage{tikz}
|
||||
\usepackage{pgfplots}
|
||||
\usepackage{bm}
|
||||
|
||||
\definecolor{gmlm}{RGB}{0,0,0}
|
||||
\definecolor{mgcca}{RGB}{86,180,233}
|
||||
\definecolor{tsir}{RGB}{0,158,115}
|
||||
\definecolor{pca}{RGB}{240,228,66}
|
||||
\definecolor{hopca}{RGB}{230,159,0}
|
||||
\definecolor{lpca}{RGB}{127,127,127}
|
||||
\definecolor{clpca}{RGB}{191,191,191}
|
||||
|
||||
\pgfplotsset{
|
||||
every axis/.style={
|
||||
xtick={100,200,300,500,750},
|
||||
ymin=-0.05, ymax=1.05,
|
||||
grid=both,
|
||||
grid style={gray, dotted}
|
||||
},
|
||||
every axis plot/.append style={
|
||||
mark = *,
|
||||
mark size = 1pt,
|
||||
line width=0.8pt
|
||||
}
|
||||
}
|
||||
\tikzset{
|
||||
legend entry/.style={
|
||||
mark = *,
|
||||
mark size = 1pt,
|
||||
mark indices = {2},
|
||||
line width=0.8pt
|
||||
}
|
||||
}
|
||||
|
||||
\begin{document}
|
||||
\begin{tikzpicture}[>=latex]
|
||||
|
||||
\begin{axis}[
|
||||
name=sim-2a
|
||||
]
|
||||
\addplot[color = gmlm, line width=1pt] table[x = sample.size, y = dist.subspace.gmlm] {aggr-1a-normal.csv};
|
||||
\addplot[color = pca] table[x = sample.size, y = dist.subspace.pca] {aggr-2a-ising.csv};
|
||||
\addplot[color = hopca] table[x = sample.size, y = dist.subspace.hopca] {aggr-2a-ising.csv};
|
||||
\addplot[color = tsir] table[x = sample.size, y = dist.subspace.tsir] {aggr-2a-ising.csv};
|
||||
\addplot[color = mgcca] table[x = sample.size, y = dist.subspace.mgcca] {aggr-2a-ising.csv};
|
||||
\addplot[color = lpca] table[x = sample.size, y = dist.subspace.lpca] {aggr-2a-ising.csv};
|
||||
\addplot[color = clpca] table[x = sample.size, y = dist.subspace.clpca] {aggr-2a-ising.csv};
|
||||
\end{axis}
|
||||
\node[anchor = base west, yshift = 0.3em] at (sim-2a.north west) {
|
||||
a: small
|
||||
};
|
||||
|
||||
% \begin{axis}[
|
||||
% name=sim-1b,
|
||||
% anchor=north west, at={(sim-2a.right of north east)}, xshift=0.1cm,
|
||||
% xticklabel=\empty,
|
||||
% ylabel near ticks, yticklabel pos=right
|
||||
% ]
|
||||
% \addplot[color = pca] table[x = sample.size, y = dist.subspace.pca] {aggr-1b-normal.csv};
|
||||
% \addplot[color = hopca] table[x = sample.size, y = dist.subspace.hopca] {aggr-1b-normal.csv};
|
||||
% \addplot[color = tsir] table[x = sample.size, y = dist.subspace.tsir] {aggr-1b-normal.csv};
|
||||
% \addplot[color = mgcca] table[x = sample.size, y = dist.subspace.mgcca] {aggr-1b-normal.csv};
|
||||
% \addplot[color = gmlm, line width=1pt] table[x = sample.size, y = dist.subspace.gmlm] {aggr-1b-normal.csv};
|
||||
% \end{axis}
|
||||
% \node[anchor = base west, yshift = 0.3em] at (sim-1b.north west) {
|
||||
% b: cubic dependence on $y$
|
||||
% };
|
||||
|
||||
% \begin{axis}[
|
||||
% name=sim-1c,
|
||||
% anchor=north west, at={(sim-2a.below south west)}, yshift=-.8em,
|
||||
% xticklabel=\empty
|
||||
% ]
|
||||
% \addplot[color = pca] table[x = sample.size, y = dist.subspace.pca] {aggr-1c-normal.csv};
|
||||
% \addplot[color = hopca] table[x = sample.size, y = dist.subspace.hopca] {aggr-1c-normal.csv};
|
||||
% \addplot[color = tsir] table[x = sample.size, y = dist.subspace.tsir] {aggr-1c-normal.csv};
|
||||
% \addplot[color = mgcca] table[x = sample.size, y = dist.subspace.mgcca] {aggr-1c-normal.csv};
|
||||
% \addplot[color = gmlm, line width=1pt] table[x = sample.size, y = dist.subspace.gmlm] {aggr-1c-normal.csv};
|
||||
% \end{axis}
|
||||
% \node[anchor = base west, yshift = 0.3em] at (sim-1c.north west) {
|
||||
% c: rank $1$ $\boldsymbol{\beta}$'s
|
||||
% };
|
||||
|
||||
% \begin{axis}[
|
||||
% name=sim-1d,
|
||||
% anchor=north west, at={(sim-1c.right of north east)}, xshift=0.1cm,
|
||||
% ylabel near ticks, yticklabel pos=right
|
||||
% ]
|
||||
% \addplot[color = pca] table[x = sample.size, y = dist.subspace.pca] {aggr-1d-normal.csv};
|
||||
% \addplot[color = hopca] table[x = sample.size, y = dist.subspace.hopca] {aggr-1d-normal.csv};
|
||||
% \addplot[color = tsir] table[x = sample.size, y = dist.subspace.tsir] {aggr-1d-normal.csv};
|
||||
% \addplot[color = mgcca] table[x = sample.size, y = dist.subspace.mgcca] {aggr-1d-normal.csv};
|
||||
% \addplot[color = gmlm, line width=1pt] table[x = sample.size, y = dist.subspace.gmlm] {aggr-1d-normal.csv};
|
||||
% \end{axis}
|
||||
% \node[anchor = base west, yshift = 0.3em] at (sim-1d.north west) {
|
||||
% d: tri-diagonal $\boldsymbol{\Omega}$'s
|
||||
% };
|
||||
|
||||
% \begin{axis}[
|
||||
% name=sim-1e,
|
||||
% anchor=north west, at={(sim-1c.below south west)}, yshift=-.8em
|
||||
% ]
|
||||
% \addplot[color = pca] table[x = sample.size, y = dist.subspace.pca] {aggr-1e-normal.csv};
|
||||
% \addplot[color = hopca] table[x = sample.size, y = dist.subspace.hopca] {aggr-1e-normal.csv};
|
||||
% \addplot[color = tsir] table[x = sample.size, y = dist.subspace.tsir] {aggr-1e-normal.csv};
|
||||
% \addplot[color = mgcca] table[x = sample.size, y = dist.subspace.mgcca] {aggr-1e-normal.csv};
|
||||
% \addplot[color = gmlm, line width=1pt] table[x = sample.size, y = dist.subspace.gmlm] {aggr-1e-normal.csv};
|
||||
% \end{axis}
|
||||
% \node[anchor = base west, yshift = 0.3em] at (sim-1e.north west) {
|
||||
% e: missspecified
|
||||
% };
|
||||
|
||||
|
||||
\matrix[anchor = west] at (sim-2a.right of east) {
|
||||
\draw[color=gmlm, legend entry, line width=1pt] plot coordinates {(0, 0) (.3, 0) (.6, 0)}; & \node[anchor=west] {GMLM}; \\
|
||||
\draw[color=tsir, legend entry] plot coordinates {(0, 0) (.3, 0) (.6, 0)}; & \node[anchor=west] {TSIR}; \\
|
||||
\draw[color=mgcca, legend entry] plot coordinates {(0, 0) (.3, 0) (.6, 0)}; & \node[anchor=west] {MGCCA}; \\
|
||||
\draw[color=hopca, legend entry] plot coordinates {(0, 0) (.3, 0) (.6, 0)}; & \node[anchor=west] {HOPCA}; \\
|
||||
\draw[color=pca, legend entry] plot coordinates {(0, 0) (.3, 0) (.6, 0)}; & \node[anchor=west] {PCA}; \\
|
||||
\draw[color=lpca, legend entry] plot coordinates {(0, 0) (.3, 0) (.6, 0)}; & \node[anchor=west] {LPCA}; \\
|
||||
\draw[color=clpca, legend entry] plot coordinates {(0, 0) (.3, 0) (.6, 0)}; & \node[anchor=west] {CLPCA}; \\
|
||||
};
|
||||
|
||||
|
||||
\node[anchor = north] at (current bounding box.south) {Sample Size $n$};
|
||||
|
||||
\node[anchor = south, rotate = 90] at (current bounding box.west) {Subspace Distance $d(\boldsymbol{B}, \hat{\boldsymbol{B}})$};
|
||||
\node[anchor = south, rotate = 270] at (current bounding box.east) {\phantom{Subspace Distance $d(\boldsymbol{B}, \hat{\boldsymbol{B}})$}};
|
||||
|
||||
\node[anchor = south, font=\large] at (current bounding box.north) {Tensor Normal Simulation};
|
||||
|
||||
\end{tikzpicture}
|
||||
\end{document}
|
|
@ -0,0 +1,148 @@
|
|||
%%% R code to generate the input data files from corresponding simulation logs
|
||||
% R> setwd("~/Work/tensorPredictors")
|
||||
% R>
|
||||
% R> for (sim.name in c("1a", "1b", "1c", "1d", "1e")) {
|
||||
% R> pattern <- paste0("sim\\_", sim.name, "\\_normal\\-[0-9T]*\\.csv")
|
||||
% R> log.file <- sort(
|
||||
% R> list.files(path = "sim/", pattern = pattern, full.names = TRUE),
|
||||
% R> decreasing = TRUE
|
||||
% R> )[[1]]
|
||||
% R>
|
||||
% R> sim <- read.csv(log.file)
|
||||
% R>
|
||||
% R> aggr <- aggregate(sim[, names(sim) != "sample.size"], list(sample.size = sim$sample.size), mean)
|
||||
% R>
|
||||
% R> write.table(aggr, file = paste0("LaTeX/plots/aggr-", sim.name, "-normal.csv"), row.names = FALSE, quote = FALSE)
|
||||
% R> }
|
||||
\documentclass[border=0cm]{standalone}
|
||||
|
||||
\usepackage{tikz}
|
||||
\usepackage{pgfplots}
|
||||
\usepackage{bm}
|
||||
|
||||
\definecolor{gmlm}{RGB}{0,0,0}
|
||||
\definecolor{mgcca}{RGB}{86,180,233}
|
||||
\definecolor{tsir}{RGB}{0,158,115}
|
||||
\definecolor{hopca}{RGB}{230,159,0}
|
||||
\definecolor{pca}{RGB}{240,228,66}
|
||||
\definecolor{lpca}{RGB}{0,114,178}
|
||||
\definecolor{clpca}{RGB}{213,94,0}
|
||||
|
||||
\pgfplotsset{
|
||||
every axis/.style={
|
||||
xtick={100,200,300,500,750},
|
||||
ymin=-0.05, ymax=1.05,
|
||||
grid=both,
|
||||
grid style={gray, dotted}
|
||||
},
|
||||
every axis plot/.append style={
|
||||
mark = *,
|
||||
mark size = 1pt,
|
||||
line width=0.8pt
|
||||
}
|
||||
}
|
||||
\tikzset{
|
||||
legend entry/.style={
|
||||
mark = *,
|
||||
mark size = 1pt,
|
||||
mark indices = {2},
|
||||
line width=0.8pt
|
||||
}
|
||||
}
|
||||
|
||||
\begin{document}
|
||||
\begin{tikzpicture}[>=latex]
|
||||
|
||||
\begin{axis}[
|
||||
name=sim-1a,
|
||||
xticklabel=\empty
|
||||
]
|
||||
\addplot[color = gmlm, line width=1pt] table[x = sample.size, y = dist.subspace.gmlm] {aggr-1a-normal.csv};
|
||||
\addplot[color = pca] table[x = sample.size, y = dist.subspace.pca] {aggr-1a-normal.csv};
|
||||
\addplot[color = hopca] table[x = sample.size, y = dist.subspace.hopca] {aggr-1a-normal.csv};
|
||||
\addplot[color = tsir] table[x = sample.size, y = dist.subspace.tsir] {aggr-1a-normal.csv};
|
||||
\addplot[color = mgcca] table[x = sample.size, y = dist.subspace.mgcca] {aggr-1a-normal.csv};
|
||||
\end{axis}
|
||||
\node[anchor = base west, yshift = 0.3em] at (sim-1a.north west) {
|
||||
a: linear dependence on $\mathcal{F}_y \equiv y$
|
||||
};
|
||||
|
||||
\begin{axis}[
|
||||
name=sim-1b,
|
||||
anchor=north west, at={(sim-1a.right of north east)}, xshift=0.1cm,
|
||||
xticklabel=\empty,
|
||||
ylabel near ticks, yticklabel pos=right
|
||||
]
|
||||
\addplot[color = pca] table[x = sample.size, y = dist.subspace.pca] {aggr-1b-normal.csv};
|
||||
\addplot[color = hopca] table[x = sample.size, y = dist.subspace.hopca] {aggr-1b-normal.csv};
|
||||
\addplot[color = tsir] table[x = sample.size, y = dist.subspace.tsir] {aggr-1b-normal.csv};
|
||||
\addplot[color = mgcca] table[x = sample.size, y = dist.subspace.mgcca] {aggr-1b-normal.csv};
|
||||
\addplot[color = gmlm, line width=1pt] table[x = sample.size, y = dist.subspace.gmlm] {aggr-1b-normal.csv};
|
||||
\end{axis}
|
||||
\node[anchor = base west, yshift = 0.3em] at (sim-1b.north west) {
|
||||
b: cubic dependence on $y$
|
||||
};
|
||||
|
||||
\begin{axis}[
|
||||
name=sim-1c,
|
||||
anchor=north west, at={(sim-1a.below south west)}, yshift=-.8em,
|
||||
xticklabel=\empty
|
||||
]
|
||||
\addplot[color = pca] table[x = sample.size, y = dist.subspace.pca] {aggr-1c-normal.csv};
|
||||
\addplot[color = hopca] table[x = sample.size, y = dist.subspace.hopca] {aggr-1c-normal.csv};
|
||||
\addplot[color = tsir] table[x = sample.size, y = dist.subspace.tsir] {aggr-1c-normal.csv};
|
||||
\addplot[color = mgcca] table[x = sample.size, y = dist.subspace.mgcca] {aggr-1c-normal.csv};
|
||||
\addplot[color = gmlm, line width=1pt] table[x = sample.size, y = dist.subspace.gmlm] {aggr-1c-normal.csv};
|
||||
\end{axis}
|
||||
\node[anchor = base west, yshift = 0.3em] at (sim-1c.north west) {
|
||||
c: rank $1$ $\boldsymbol{\beta}$'s
|
||||
};
|
||||
|
||||
\begin{axis}[
|
||||
name=sim-1d,
|
||||
anchor=north west, at={(sim-1c.right of north east)}, xshift=0.1cm,
|
||||
ylabel near ticks, yticklabel pos=right
|
||||
]
|
||||
\addplot[color = pca] table[x = sample.size, y = dist.subspace.pca] {aggr-1d-normal.csv};
|
||||
\addplot[color = hopca] table[x = sample.size, y = dist.subspace.hopca] {aggr-1d-normal.csv};
|
||||
\addplot[color = tsir] table[x = sample.size, y = dist.subspace.tsir] {aggr-1d-normal.csv};
|
||||
\addplot[color = mgcca] table[x = sample.size, y = dist.subspace.mgcca] {aggr-1d-normal.csv};
|
||||
\addplot[color = gmlm, line width=1pt] table[x = sample.size, y = dist.subspace.gmlm] {aggr-1d-normal.csv};
|
||||
\end{axis}
|
||||
\node[anchor = base west, yshift = 0.3em] at (sim-1d.north west) {
|
||||
d: tri-diagonal $\boldsymbol{\Omega}$'s
|
||||
};
|
||||
|
||||
\begin{axis}[
|
||||
name=sim-1e,
|
||||
anchor=north west, at={(sim-1c.below south west)}, yshift=-.8em
|
||||
]
|
||||
\addplot[color = pca] table[x = sample.size, y = dist.subspace.pca] {aggr-1e-normal.csv};
|
||||
\addplot[color = hopca] table[x = sample.size, y = dist.subspace.hopca] {aggr-1e-normal.csv};
|
||||
\addplot[color = tsir] table[x = sample.size, y = dist.subspace.tsir] {aggr-1e-normal.csv};
|
||||
\addplot[color = mgcca] table[x = sample.size, y = dist.subspace.mgcca] {aggr-1e-normal.csv};
|
||||
\addplot[color = gmlm, line width=1pt] table[x = sample.size, y = dist.subspace.gmlm] {aggr-1e-normal.csv};
|
||||
\end{axis}
|
||||
\node[anchor = base west, yshift = 0.3em] at (sim-1e.north west) {
|
||||
e: missspecified
|
||||
};
|
||||
|
||||
|
||||
\matrix[anchor = center] at (sim-1e.right of east -| sim-1d.south) {
|
||||
\draw[color=gmlm, legend entry, line width=1pt] plot coordinates {(0, 0) (.3, 0) (.6, 0)}; & \node[anchor=west] {GMLM}; \\
|
||||
\draw[color=tsir, legend entry] plot coordinates {(0, 0) (.3, 0) (.6, 0)}; & \node[anchor=west] {TSIR}; \\
|
||||
\draw[color=mgcca, legend entry] plot coordinates {(0, 0) (.3, 0) (.6, 0)}; & \node[anchor=west] {MGCCA}; \\
|
||||
\draw[color=hopca, legend entry] plot coordinates {(0, 0) (.3, 0) (.6, 0)}; & \node[anchor=west] {HOPCA}; \\
|
||||
\draw[color=pca, legend entry] plot coordinates {(0, 0) (.3, 0) (.6, 0)}; & \node[anchor=west] {PCA}; \\
|
||||
};
|
||||
|
||||
|
||||
\node[anchor = north] at (current bounding box.south) {Sample Size $n$};
|
||||
|
||||
\node[anchor = south, rotate = 90] at (current bounding box.west) {Subspace Distance $d(\boldsymbol{B}, \hat{\boldsymbol{B}})$};
|
||||
\node[anchor = south, rotate = 270] at (current bounding box.east) {\phantom{Subspace Distance $d(\boldsymbol{B}, \hat{\boldsymbol{B}})$}};
|
||||
|
||||
\node[anchor = south, font=\large] at (current bounding box.north) {Tensor Normal Simulation};
|
||||
|
||||
\end{tikzpicture}
|
||||
\end{document}
|
|
@ -23,7 +23,7 @@
|
|||
*
|
||||
* with the parameter vector `theta` and a statistic `T` of `y`. The real valued
|
||||
* parameter vector `theta` is of dimension `p (p + 1) / 2` and the statistic
|
||||
* `T` has the same dimensions as a binary vector given by
|
||||
* `T` has the same dimensions as the parameter vector given by
|
||||
*
|
||||
* T(y) = vech(y y').
|
||||
*
|
||||
|
|
204
sim/ising.R
204
sim/ising.R
|
@ -1,204 +0,0 @@
|
|||
library(tensorPredictors)
|
||||
library(mvbernoulli)
|
||||
|
||||
set.seed(161803399, "Mersenne-Twister", "Inversion", "Rejection")
|
||||
|
||||
### simulation configuration
|
||||
file.prefix <- "sim-ising"
|
||||
reps <- 100 # number of simulation replications
|
||||
max.iter <- 100 # maximum number of iterations for GMLM
|
||||
sample.sizes <- c(100, 200, 300, 500, 750) # sample sizes `n`
|
||||
N <- 2000 # validation set size
|
||||
p <- c(4, 4) # preditor dimensions (ONLY 4 by 4 allowed!)
|
||||
q <- c(2, 2) # response dimensions (ONLY 2 by 2 allowed!)
|
||||
r <- length(p)
|
||||
# parameter configuration
|
||||
rho <- -0.55
|
||||
c1 <- 1
|
||||
c2 <- 1
|
||||
|
||||
# initial consistency checks
|
||||
stopifnot(exprs = {
|
||||
r == 2
|
||||
all.equal(p, c(4, 4))
|
||||
all.equal(q, c(2, 2))
|
||||
})
|
||||
|
||||
### small helpers
|
||||
# 270 deg matrix layout rotation (90 deg clockwise)
|
||||
rot270 <- function(A) t(A)[, rev(seq_len(nrow(A))), drop = FALSE]
|
||||
# Auto-Regression Covariance Matrix
|
||||
AR <- function(rho, dim) rho^abs(outer(seq_len(dim), seq_len(dim), `-`))
|
||||
# Inverse of the AR matrix
|
||||
AR.inv <- function(rho, dim) {
|
||||
A <- diag(c(1, rep(rho^2 + 1, dim - 2), 1))
|
||||
A[abs(.row(dim(A)) - .col(dim(A))) == 1] <- -rho
|
||||
A / (1 - rho^2)
|
||||
}
|
||||
# projection matrix `P_A` as a projection onto the span of `A`
|
||||
proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A)))
|
||||
|
||||
### setup Ising parameters (to get reasonable parameters)
|
||||
eta1 <- 0
|
||||
alphas <- Map(function(pj, qj) { # qj ignored, its 2
|
||||
linspace <- seq(-1, 1, length.out = pj)
|
||||
matrix(c(linspace, linspace^2), pj, 2)
|
||||
}, p, q)
|
||||
Omegas <- Map(AR, dim = p, MoreArgs = list(rho))
|
||||
|
||||
# data sampling routine
|
||||
sample.data <- function(n, eta1, alphas, Omegas, sample.axis = r + 1L) {
|
||||
# generate response (sample axis is last axis)
|
||||
y <- runif(n, -1, 1) # Y ~ U[-1, 1]
|
||||
Fy <- rbind(cos(pi * y), sin(pi * y), -sin(pi * y), cos(pi * y))
|
||||
dim(Fy) <- c(2, 2, n)
|
||||
|
||||
# natural exponential family parameters
|
||||
eta_y1 <- c1 * (mlm(Fy, alphas) + c(eta1))
|
||||
eta_y2 <- c2 * Reduce(`%x%`, rev(Omegas))
|
||||
|
||||
# conditional Ising model parameters
|
||||
theta_y <- matrix(rep(vech(eta_y2), n), ncol = n)
|
||||
ltri <- which(lower.tri(eta_y2, diag = TRUE))
|
||||
diagonal <- which(diag(TRUE, nrow(eta_y2))[ltri])
|
||||
theta_y[diagonal, ] <- eta_y1
|
||||
|
||||
# Sample X from conditional distribution
|
||||
X <- apply(theta_y, 2, ising_sample, n = 1)
|
||||
# convert (from compressed integer vector) to array data
|
||||
attr(X, "p") <- prod(p)
|
||||
X <- t(as.mvbmatrix(X))
|
||||
dim(X) <- c(p, n)
|
||||
storage.mode(X) <- "double"
|
||||
|
||||
# permute axis to requested get the sample axis
|
||||
if (sample.axis != r + 1L) {
|
||||
perm <- integer(r + 1L)
|
||||
perm[sample.axis] <- r + 1L
|
||||
perm[-sample.axis] <- seq_len(r)
|
||||
X <- aperm(X, perm)
|
||||
Fy <- aperm(Fy, perm)
|
||||
}
|
||||
|
||||
list(X = X, Fy = Fy, y = y, sample.axis = sample.axis)
|
||||
}
|
||||
|
||||
### Logging Errors and Warnings
|
||||
# Register a global warning and error handler for logging warnings/errors with
|
||||
# current simulation repetition session informatin allowing to reproduce problems
|
||||
exceptionLogger <- function(ex) {
|
||||
# retrieve current simulation repetition information
|
||||
rep.info <- get("rep.info", envir = .GlobalEnv)
|
||||
# setup an error log file with the same name as `file`
|
||||
log <- paste0(rep.info$file, ".log")
|
||||
# Write (append) condition message with reproduction info to the log
|
||||
cat("\n\n------------------------------------------------------------\n",
|
||||
sprintf("file <- \"%s\"\nn <- %d\nrep <- %d\n.Random.seed <- c(%s)\n%s\nTraceback:\n",
|
||||
rep.info$file, rep.info$n, rep.info$rep,
|
||||
paste(rep.info$.Random.seed, collapse = ","),
|
||||
as.character.error(ex)
|
||||
), sep = "", file = log, append = TRUE)
|
||||
# add Traceback (see: `traceback()` which the following is addapted from)
|
||||
n <- length(x <- .traceback(NULL, max.lines = -1L))
|
||||
if (n == 0L) {
|
||||
cat("No traceback available", "\n", file = log, append = TRUE)
|
||||
} else {
|
||||
for (i in 1L:n) {
|
||||
xi <- x[[i]]
|
||||
label <- paste0(n - i + 1L, ": ")
|
||||
m <- length(xi)
|
||||
srcloc <- if (!is.null(srcref <- attr(xi, "srcref"))) {
|
||||
srcfile <- attr(srcref, "srcfile")
|
||||
paste0(" at ", basename(srcfile$filename), "#", srcref[1L])
|
||||
}
|
||||
if (isTRUE(attr(xi, "truncated"))) {
|
||||
xi <- c(xi, " ...")
|
||||
m <- length(xi)
|
||||
}
|
||||
if (!is.null(srcloc)) {
|
||||
xi[m] <- paste0(xi[m], srcloc)
|
||||
}
|
||||
if (m > 1) {
|
||||
label <- c(label, rep(substr(" ", 1L,
|
||||
nchar(label, type = "w")), m - 1L))
|
||||
}
|
||||
cat(paste0(label, xi), sep = "\n", file = log, append = TRUE)
|
||||
}
|
||||
}
|
||||
}
|
||||
globalCallingHandlers(list(
|
||||
message = exceptionLogger, warning = exceptionLogger, error = exceptionLogger
|
||||
))
|
||||
|
||||
|
||||
### for every sample size
|
||||
start <- format(Sys.time(), "%Y%m%dT%H%M")
|
||||
for (n in sample.sizes) {
|
||||
### write new simulation result file
|
||||
file <- paste0(paste(file.prefix, start, n, sep = "-"), ".csv")
|
||||
# CSV header, used to ensure correct value/column mapping when writing to file
|
||||
header <- outer(
|
||||
c("dist.subspace", "dist.projection", "error.pred"), # measures
|
||||
c("gmlm", "pca", "hopca", "tsir"), # methods
|
||||
paste, sep = ".")
|
||||
cat(paste0(header, collapse = ","), "\n", sep = "", file = file)
|
||||
|
||||
### repeated simulation
|
||||
for (rep in seq_len(reps)) {
|
||||
### Repetition session state info
|
||||
# Stores specific session variables before starting the current
|
||||
# simulation replication. This allows to log state information which
|
||||
# can be used to replicate a specific simulation repetition in case of
|
||||
# errors/warnings from the logs
|
||||
rep.info <- list(n = n, rep = rep, file = file, .Random.seed = .Random.seed)
|
||||
|
||||
### sample (training) data
|
||||
c(X, Fy, y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas)
|
||||
|
||||
### Fit data using different methods
|
||||
fit.gmlm <- GMLM.default(X, Fy, sample.axis = sample.axis,
|
||||
max.iter = max.iter, family = "ising")
|
||||
fit.hopca <- HOPCA(X, npc = q, sample.axis = sample.axis)
|
||||
fit.pca <- prcomp(mat(X, sample.axis), rank. = prod(q))
|
||||
fit.tsir <- NA # TSIR(X, y, q, sample.axis = sample.axis)
|
||||
|
||||
### Compute Reductions `B.*` where `B.*` spans the reduction subspaces
|
||||
B.true <- Reduce(`%x%`, rev(alphas))
|
||||
B.gmlm <- with(fit.gmlm, Reduce(`%x%`, rev(alphas)))
|
||||
B.hopca <- Reduce(`%x%`, rev(fit.hopca))
|
||||
B.pca <- fit.pca$rotation
|
||||
B.tsir <- NA # Reduce(`%x%`, rev(fit.tsir))
|
||||
|
||||
# Subspace Distances: Normalized `|| P_A - P_B ||_F` where
|
||||
# `P_A = A (A' A)^-1/2 A'` and the normalization means that with
|
||||
# respect to the dimensions of `A, B` the subspace distance is in the
|
||||
# range `[0, 1]`.
|
||||
dist.subspace.gmlm <- dist.subspace(B.true, B.gmlm, normalize = TRUE)
|
||||
dist.subspace.hopca <- dist.subspace(B.true, B.hopca, normalize = TRUE)
|
||||
dist.subspace.pca <- dist.subspace(B.true, B.pca, normalize = TRUE)
|
||||
dist.subspace.tsir <- NA # dist.subspace(B.true, B.tsir, normalize = TRUE)
|
||||
|
||||
# Projection Distances: Spectral norm (2-norm) `|| P_A - P_B ||_2`.
|
||||
dist.projection.gmlm <- dist.projection(B.true, B.gmlm)
|
||||
dist.projection.hopca <- dist.projection(B.true, B.hopca)
|
||||
dist.projection.pca <- dist.projection(B.true, B.pca)
|
||||
dist.projection.tsir <- NA # dist.projection(B.true, B.tsir)
|
||||
|
||||
### Prediction Errors: (using new independend sample of size `N`)
|
||||
c(X, Fy, y, sample.axis) %<-% sample.data(N, eta1, alphas, Omegas)
|
||||
# centered model matrix of vectorized `X`s
|
||||
vecX <- scale(mat(X, sample.axis), center = TRUE, scale = FALSE)
|
||||
P.true <- proj(B.true)
|
||||
error.pred.gmlm <- norm(P.true - proj(B.gmlm), "2")
|
||||
error.pred.hopca <- norm(P.true - proj(B.hopca), "2")
|
||||
error.pred.pca <- norm(P.true - proj(B.pca), "2")
|
||||
error.pred.tsir <- NA # norm(P.true - proj(B.tsir), "2")
|
||||
|
||||
# format estimation/prediction errors and write to file and console
|
||||
line <- paste0(Map(get, header), collapse = ",")
|
||||
cat(line, "\n", sep = "", file = file, append = TRUE)
|
||||
# report progress
|
||||
cat(sprintf("sample size: %d/%d - rep: %d/%d\n",
|
||||
which(n == sample.sizes), length(sample.sizes), rep, reps))
|
||||
}
|
||||
}
|
134
sim/ising_2.R
134
sim/ising_2.R
|
@ -1,134 +0,0 @@
|
|||
library(tensorPredictors)
|
||||
library(mvbernoulli)
|
||||
|
||||
set.seed(141421356, "Mersenne-Twister", "Inversion", "Rejection")
|
||||
|
||||
### simulation configuration
|
||||
reps <- 100 # number of simulation replications
|
||||
max.iter <- 1000 # maximum number of iterations for GMLM
|
||||
n <- 100 # sample sizes `n`
|
||||
N <- 2000 # validation set size
|
||||
p <- c(4, 4) # preditor dimensions (ONLY 4 by 4 allowed!)
|
||||
q <- c(2, 2) # response dimensions (ONLY 2 by 2 allowed!)
|
||||
r <- length(p)
|
||||
# parameter configuration
|
||||
rho <- -0.55
|
||||
c1 <- 1
|
||||
c2 <- 1
|
||||
|
||||
# initial consistency checks
|
||||
stopifnot(exprs = {
|
||||
r == 2
|
||||
all.equal(p, c(4, 4))
|
||||
all.equal(q, c(2, 2))
|
||||
})
|
||||
|
||||
### small helpers
|
||||
# 270 deg matrix layout rotation (90 deg clockwise)
|
||||
rot270 <- function(A) t(A)[, rev(seq_len(nrow(A))), drop = FALSE]
|
||||
# Auto-Regression Covariance Matrix
|
||||
AR <- function(rho, dim) rho^abs(outer(seq_len(dim), seq_len(dim), `-`))
|
||||
# Inverse of the AR matrix
|
||||
AR.inv <- function(rho, dim) {
|
||||
A <- diag(c(1, rep(rho^2 + 1, dim - 2), 1))
|
||||
A[abs(.row(dim(A)) - .col(dim(A))) == 1] <- -rho
|
||||
A / (1 - rho^2)
|
||||
}
|
||||
# projection matrix `P_A` as a projection onto the span of `A`
|
||||
proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A)))
|
||||
|
||||
### setup Ising parameters (to get reasonable parameters)
|
||||
eta1 <- 0
|
||||
# alphas <- Map(function(pj, qj) { # qj ignored, its 2
|
||||
# linspace <- seq(-1, 1, length.out = pj)
|
||||
# matrix(c(linspace, rev(linspace)), pj, 2)
|
||||
# }, p, q)
|
||||
alphas <- Map(function(pj, qj) { # qj ignored, its 2
|
||||
linspace <- seq(-1, 1, length.out = pj)
|
||||
matrix(c(linspace, linspace^2), pj, 2)
|
||||
}, p, q)
|
||||
# alphas <- Map(function(pj, qj) {
|
||||
# qr.Q(qr(matrix(rnorm(pj * qj), pj, qj)))
|
||||
# }, p, q)
|
||||
Omegas <- Map(AR, dim = p, MoreArgs = list(rho))
|
||||
|
||||
# data sampling routine
|
||||
sample.data <- function(n, eta1, alphas, Omegas, sample.axis = r + 1L) {
|
||||
# generate response (sample axis is last axis)
|
||||
y <- runif(n, -1, 1) # Y ~ U[-1, 1]
|
||||
Fy <- rbind(cos(pi * y), sin(pi * y), -sin(pi * y), cos(pi * y))
|
||||
dim(Fy) <- c(2, 2, n)
|
||||
|
||||
# natural exponential family parameters
|
||||
eta_y1 <- c1 * (mlm(Fy, alphas) + c(eta1))
|
||||
eta_y2 <- c2 * Reduce(`%x%`, rev(Omegas))
|
||||
|
||||
# conditional Ising model parameters
|
||||
theta_y <- matrix(rep(vech(eta_y2), n), ncol = n)
|
||||
ltri <- which(lower.tri(eta_y2, diag = TRUE))
|
||||
diagonal <- which(diag(TRUE, nrow(eta_y2))[ltri])
|
||||
theta_y[diagonal, ] <- eta_y1
|
||||
|
||||
# Sample X from conditional distribution
|
||||
X <- apply(theta_y, 2, ising_sample, n = 1)
|
||||
# convert (from compressed integer vector) to array data
|
||||
attr(X, "p") <- prod(p)
|
||||
X <- t(as.mvbmatrix(X))
|
||||
dim(X) <- c(p, n)
|
||||
storage.mode(X) <- "double"
|
||||
|
||||
# permute axis to requested get the sample axis
|
||||
if (sample.axis != r + 1L) {
|
||||
perm <- integer(r + 1L)
|
||||
perm[sample.axis] <- r + 1L
|
||||
perm[-sample.axis] <- seq_len(r)
|
||||
X <- aperm(X, perm)
|
||||
Fy <- aperm(Fy, perm)
|
||||
}
|
||||
|
||||
list(X = X, Fy = Fy, y = y, sample.axis = sample.axis)
|
||||
}
|
||||
|
||||
### sample (training) data
|
||||
c(X, Fy, y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas)
|
||||
|
||||
### Fit data using GMLM with logging
|
||||
|
||||
# logger to log iterative change in the estimation process of GMLM
|
||||
# log <- data.frame()
|
||||
log.likelihood <- tensorPredictors:::make.gmlm.family("ising")$log.likelihood
|
||||
B.true <- Reduce(`%x%`, rev(alphas))
|
||||
logger <- function(iter, eta1.est, alphas.est, Omegas.est) {
|
||||
B.est <- Reduce(`%x%`, rev(alphas.est))
|
||||
|
||||
err.alphas <- mapply(dist.subspace, alphas, alphas.est, MoreArgs = list(normalize = TRUE))
|
||||
err.Omegas <- mapply(norm, Map(`-`, Omegas, Omegas.est), MoreArgs = list(type = "F"))
|
||||
|
||||
if (iter > 0) { cat("\033[9A") }
|
||||
cat(sprintf("\n\033[2mIter: loss - dist\n\033[0m%4d: %8.3f - %8.3f",
|
||||
iter,
|
||||
log.likelihood(X, Fy, eta1.est, alphas.est, Omegas.est),
|
||||
dist.subspace(B.true, B.est, normalize = TRUE)
|
||||
),
|
||||
"\033[2mMSE eta1\033[0m",
|
||||
mean((eta1 - eta1.est)^2),
|
||||
"\033[2msubspace distances of alphas\033[0m",
|
||||
do.call(paste, Map(sprintf, err.alphas, MoreArgs = list(fmt = "%8.3f"))),
|
||||
"\033[2mFrob. norm of Omega differences\033[0m",
|
||||
do.call(paste, Map(sprintf, err.Omegas, MoreArgs = list(fmt = "%8.3f"))),
|
||||
sep = "\n "
|
||||
)
|
||||
}
|
||||
|
||||
# now call the GMLM fitting routine with performance profiling
|
||||
tryCatch({
|
||||
system.time( # profvis::profvis(
|
||||
fit.gmlm <- GMLM.default(
|
||||
X, Fy, sample.axis = sample.axis, max.iter = max.iter,
|
||||
family = "ising", logger = logger
|
||||
)
|
||||
)
|
||||
}, error = function(ex) {
|
||||
print(ex)
|
||||
traceback()
|
||||
})
|
|
@ -1,207 +0,0 @@
|
|||
library(tensorPredictors)
|
||||
library(mvbernoulli)
|
||||
|
||||
# seed = first 8 digits Euler's constant gamma = 0.57721 56649 01532 86060
|
||||
set.seed(57721566, "Mersenne-Twister", "Inversion", "Rejection")
|
||||
|
||||
### simulation configuration
|
||||
file.prefix <- "sim-ising-small"
|
||||
reps <- 100 # number of simulation replications
|
||||
max.iter <- 1000 # maximum number of iterations for GMLM
|
||||
sample.sizes <- c(100, 200, 300, 500, 750) # sample sizes `n`
|
||||
N <- 2000 # validation set size
|
||||
p <- c(2, 3) # preditor dimensions
|
||||
q <- c(1, 1) # response dimensions
|
||||
r <- length(p)
|
||||
# parameter configuration
|
||||
rho <- -0.55
|
||||
c1 <- 1
|
||||
c2 <- 1
|
||||
|
||||
# initial consistency checks
|
||||
stopifnot(exprs = {
|
||||
r == 2
|
||||
length(p) == r
|
||||
all(q == 1)
|
||||
})
|
||||
|
||||
### small helpers
|
||||
# 270 deg matrix layout rotation (90 deg clockwise)
|
||||
rot270 <- function(A) t(A)[, rev(seq_len(nrow(A))), drop = FALSE]
|
||||
# Auto-Regression Covariance Matrix
|
||||
AR <- function(rho, dim) rho^abs(outer(seq_len(dim), seq_len(dim), `-`))
|
||||
# Inverse of the AR matrix
|
||||
AR.inv <- function(rho, dim) {
|
||||
A <- diag(c(1, rep(rho^2 + 1, dim - 2), 1))
|
||||
A[abs(.row(dim(A)) - .col(dim(A))) == 1] <- -rho
|
||||
A / (1 - rho^2)
|
||||
}
|
||||
# projection matrix `P_A` as a projection onto the span of `A`
|
||||
proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A)))
|
||||
|
||||
### setup Ising parameters (to get reasonable parameters)
|
||||
eta1 <- 0
|
||||
alphas <- Map(function(pj, qj) {
|
||||
data <- linspace <- seq(-1, 1, len = pj)
|
||||
for (k in (seq_len(qj - 1) + 1)) {
|
||||
data <- c(data, linspace^k)
|
||||
}
|
||||
matrix(data, nrow = pj)
|
||||
}, p, q)
|
||||
Omegas <- Map(AR, dim = p, MoreArgs = list(rho))
|
||||
|
||||
# data sampling routine
|
||||
sample.data <- function(n, eta1, alphas, Omegas, sample.axis = r + 1L) {
|
||||
# generate response (sample axis is last axis)
|
||||
y <- runif(n, -1, 1) # Y ~ U[-1, 1]
|
||||
Fy <- array(sin(pi * y), dim = c(q, n))
|
||||
|
||||
# natural exponential family parameters
|
||||
eta_y1 <- c1 * (mlm(Fy, alphas) + c(eta1))
|
||||
eta_y2 <- c2 * Reduce(`%x%`, rev(Omegas))
|
||||
|
||||
# conditional Ising model parameters
|
||||
theta_y <- matrix(rep(vech(eta_y2), n), ncol = n)
|
||||
ltri <- which(lower.tri(eta_y2, diag = TRUE))
|
||||
diagonal <- which(diag(TRUE, nrow(eta_y2))[ltri])
|
||||
theta_y[diagonal, ] <- eta_y1
|
||||
|
||||
# Sample X from conditional distribution
|
||||
X <- apply(theta_y, 2, ising_sample, n = 1)
|
||||
# convert (from compressed integer vector) to array data
|
||||
attr(X, "p") <- prod(p)
|
||||
X <- t(as.mvbmatrix(X))
|
||||
dim(X) <- c(p, n)
|
||||
storage.mode(X) <- "double"
|
||||
|
||||
# permute axis to requested get the sample axis
|
||||
if (sample.axis != r + 1L) {
|
||||
perm <- integer(r + 1L)
|
||||
perm[sample.axis] <- r + 1L
|
||||
perm[-sample.axis] <- seq_len(r)
|
||||
X <- aperm(X, perm)
|
||||
Fy <- aperm(Fy, perm)
|
||||
}
|
||||
|
||||
list(X = X, Fy = Fy, y = y, sample.axis = sample.axis)
|
||||
}
|
||||
|
||||
### Logging Errors and Warnings
|
||||
# Register a global warning and error handler for logging warnings/errors with
|
||||
# current simulation repetition session informatin allowing to reproduce problems
|
||||
exceptionLogger <- function(ex) {
|
||||
# retrieve current simulation repetition information
|
||||
rep.info <- get("rep.info", envir = .GlobalEnv)
|
||||
# setup an error log file with the same name as `file`
|
||||
log <- paste0(rep.info$file, ".log")
|
||||
# Write (append) condition message with reproduction info to the log
|
||||
cat("\n\n------------------------------------------------------------\n",
|
||||
sprintf("file <- \"%s\"\nn <- %d\nrep <- %d\n.Random.seed <- c(%s)\n%s\nTraceback:\n",
|
||||
rep.info$file, rep.info$n, rep.info$rep,
|
||||
paste(rep.info$.Random.seed, collapse = ","),
|
||||
as.character.error(ex)
|
||||
), sep = "", file = log, append = TRUE)
|
||||
# add Traceback (see: `traceback()` which the following is addapted from)
|
||||
n <- length(x <- .traceback(NULL, max.lines = -1L))
|
||||
if (n == 0L) {
|
||||
cat("No traceback available", "\n", file = log, append = TRUE)
|
||||
} else {
|
||||
for (i in 1L:n) {
|
||||
xi <- x[[i]]
|
||||
label <- paste0(n - i + 1L, ": ")
|
||||
m <- length(xi)
|
||||
srcloc <- if (!is.null(srcref <- attr(xi, "srcref"))) {
|
||||
srcfile <- attr(srcref, "srcfile")
|
||||
paste0(" at ", basename(srcfile$filename), "#", srcref[1L])
|
||||
}
|
||||
if (isTRUE(attr(xi, "truncated"))) {
|
||||
xi <- c(xi, " ...")
|
||||
m <- length(xi)
|
||||
}
|
||||
if (!is.null(srcloc)) {
|
||||
xi[m] <- paste0(xi[m], srcloc)
|
||||
}
|
||||
if (m > 1) {
|
||||
label <- c(label, rep(substr(" ", 1L,
|
||||
nchar(label, type = "w")), m - 1L))
|
||||
}
|
||||
cat(paste0(label, xi), sep = "\n", file = log, append = TRUE)
|
||||
}
|
||||
}
|
||||
}
|
||||
globalCallingHandlers(list(
|
||||
message = exceptionLogger, warning = exceptionLogger, error = exceptionLogger
|
||||
))
|
||||
|
||||
|
||||
### for every sample size
|
||||
start <- format(Sys.time(), "%Y%m%dT%H%M")
|
||||
for (n in sample.sizes) {
|
||||
### write new simulation result file
|
||||
file <- paste0(paste(file.prefix, start, n, sep = "-"), ".csv")
|
||||
# CSV header, used to ensure correct value/column mapping when writing to file
|
||||
header <- outer(
|
||||
c("dist.subspace", "dist.projection", "error.pred"), # measures
|
||||
c("gmlm", "pca", "hopca", "tsir"), # methods
|
||||
paste, sep = ".")
|
||||
cat(paste0(header, collapse = ","), "\n", sep = "", file = file)
|
||||
|
||||
### repeated simulation
|
||||
for (rep in seq_len(reps)) {
|
||||
### Repetition session state info
|
||||
# Stores specific session variables before starting the current
|
||||
# simulation replication. This allows to log state information which
|
||||
# can be used to replicate a specific simulation repetition in case of
|
||||
# errors/warnings from the logs
|
||||
rep.info <- list(n = n, rep = rep, file = file, .Random.seed = .Random.seed)
|
||||
|
||||
### sample (training) data
|
||||
c(X, Fy, y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas)
|
||||
|
||||
### Fit data using different methods
|
||||
fit.gmlm <- GMLM.default(X, Fy, sample.axis = sample.axis,
|
||||
max.iter = max.iter, family = "ising")
|
||||
fit.hopca <- HOPCA(X, npc = q, sample.axis = sample.axis)
|
||||
fit.pca <- prcomp(mat(X, sample.axis), rank. = prod(q))
|
||||
fit.tsir <- TSIR(X, y, q, sample.axis = sample.axis)
|
||||
|
||||
### Compute Reductions `B.*` where `B.*` spans the reduction subspaces
|
||||
B.true <- Reduce(`%x%`, rev(alphas))
|
||||
B.gmlm <- with(fit.gmlm, Reduce(`%x%`, rev(alphas)))
|
||||
B.hopca <- Reduce(`%x%`, rev(fit.hopca))
|
||||
B.pca <- fit.pca$rotation
|
||||
B.tsir <- Reduce(`%x%`, rev(fit.tsir))
|
||||
|
||||
# Subspace Distances: Normalized `|| P_A - P_B ||_F` where
|
||||
# `P_A = A (A' A)^-1/2 A'` and the normalization means that with
|
||||
# respect to the dimensions of `A, B` the subspace distance is in the
|
||||
# range `[0, 1]`.
|
||||
dist.subspace.gmlm <- dist.subspace(B.true, B.gmlm, normalize = TRUE)
|
||||
dist.subspace.hopca <- dist.subspace(B.true, B.hopca, normalize = TRUE)
|
||||
dist.subspace.pca <- dist.subspace(B.true, B.pca, normalize = TRUE)
|
||||
dist.subspace.tsir <- dist.subspace(B.true, B.tsir, normalize = TRUE)
|
||||
|
||||
# Projection Distances: Spectral norm (2-norm) `|| P_A - P_B ||_2`.
|
||||
dist.projection.gmlm <- dist.projection(B.true, B.gmlm)
|
||||
dist.projection.hopca <- dist.projection(B.true, B.hopca)
|
||||
dist.projection.pca <- dist.projection(B.true, B.pca)
|
||||
dist.projection.tsir <- dist.projection(B.true, B.tsir)
|
||||
|
||||
### Prediction Errors: (using new independend sample of size `N`)
|
||||
c(X, Fy, y, sample.axis) %<-% sample.data(N, eta1, alphas, Omegas)
|
||||
# centered model matrix of vectorized `X`s
|
||||
vecX <- scale(mat(X, sample.axis), center = TRUE, scale = FALSE)
|
||||
P.true <- proj(B.true)
|
||||
error.pred.gmlm <- norm(P.true - proj(B.gmlm), "2")
|
||||
error.pred.hopca <- norm(P.true - proj(B.hopca), "2")
|
||||
error.pred.pca <- norm(P.true - proj(B.pca), "2")
|
||||
error.pred.tsir <- norm(P.true - proj(B.tsir), "2")
|
||||
|
||||
# format estimation/prediction errors and write to file and console
|
||||
line <- paste0(Map(get, header), collapse = ",")
|
||||
cat(line, "\n", sep = "", file = file, append = TRUE)
|
||||
# report progress
|
||||
cat(sprintf("sample size: %d/%d - rep: %d/%d\n",
|
||||
which(n == sample.sizes), length(sample.sizes), rep, reps))
|
||||
}
|
||||
}
|
|
@ -1,131 +0,0 @@
|
|||
library(tensorPredictors)
|
||||
library(mvbernoulli)
|
||||
|
||||
# seed = leaf node count of a full chess search tree of depth 6 from the start pos
|
||||
# > position startpos
|
||||
# > go perft 6
|
||||
set.seed(119060324, "Mersenne-Twister", "Inversion", "Rejection")
|
||||
|
||||
### simulation configuration
|
||||
reps <- 100 # number of simulation replications
|
||||
max.iter <- 1000 # maximum number of iterations for GMLM
|
||||
n <- 100 # sample sizes `n`
|
||||
N <- 2000 # validation set size
|
||||
p <- c(2, 3) # preditor dimensions
|
||||
q <- c(1, 1) # response dimensions
|
||||
r <- length(p)
|
||||
# parameter configuration
|
||||
rho <- -0.55
|
||||
c1 <- 1
|
||||
c2 <- 1
|
||||
|
||||
# initial consistency checks
|
||||
stopifnot(exprs = {
|
||||
r == 2
|
||||
length(p) == r
|
||||
all(q == 1)
|
||||
})
|
||||
|
||||
### small helpers
|
||||
# 270 deg matrix layout rotation (90 deg clockwise)
|
||||
rot270 <- function(A) t(A)[, rev(seq_len(nrow(A))), drop = FALSE]
|
||||
# Auto-Regression Covariance Matrix
|
||||
AR <- function(rho, dim) rho^abs(outer(seq_len(dim), seq_len(dim), `-`))
|
||||
# Inverse of the AR matrix
|
||||
AR.inv <- function(rho, dim) {
|
||||
A <- diag(c(1, rep(rho^2 + 1, dim - 2), 1))
|
||||
A[abs(.row(dim(A)) - .col(dim(A))) == 1] <- -rho
|
||||
A / (1 - rho^2)
|
||||
}
|
||||
# projection matrix `P_A` as a projection onto the span of `A`
|
||||
proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A)))
|
||||
|
||||
### setup Ising parameters (to get reasonable parameters)
|
||||
eta1 <- 0
|
||||
alphas <- Map(function(pj, qj) {
|
||||
data <- linspace <- seq(-1, 1, len = pj)
|
||||
for (k in (seq_len(qj - 1) + 1)) {
|
||||
data <- c(data, linspace^k)
|
||||
}
|
||||
matrix(data, nrow = pj)
|
||||
}, p, q)
|
||||
Omegas <- Map(AR, dim = p, MoreArgs = list(rho))
|
||||
|
||||
# data sampling routine
|
||||
sample.data <- function(n, eta1, alphas, Omegas, sample.axis = r + 1L) {
|
||||
# generate response (sample axis is last axis)
|
||||
y <- runif(n, -1, 1) # Y ~ U[-1, 1]
|
||||
Fy <- array(sin(pi * y), dim = c(q, n))
|
||||
|
||||
# natural exponential family parameters
|
||||
eta_y1 <- c1 * (mlm(Fy, alphas) + c(eta1))
|
||||
eta_y2 <- c2 * Reduce(`%x%`, rev(Omegas))
|
||||
|
||||
# conditional Ising model parameters
|
||||
theta_y <- matrix(rep(vech(eta_y2), n), ncol = n)
|
||||
ltri <- which(lower.tri(eta_y2, diag = TRUE))
|
||||
diagonal <- which(diag(TRUE, nrow(eta_y2))[ltri])
|
||||
theta_y[diagonal, ] <- eta_y1
|
||||
|
||||
# Sample X from conditional distribution
|
||||
X <- apply(theta_y, 2, ising_sample, n = 1)
|
||||
# convert (from compressed integer vector) to array data
|
||||
attr(X, "p") <- prod(p)
|
||||
X <- t(as.mvbmatrix(X))
|
||||
dim(X) <- c(p, n)
|
||||
storage.mode(X) <- "double"
|
||||
|
||||
# permute axis to requested get the sample axis
|
||||
if (sample.axis != r + 1L) {
|
||||
perm <- integer(r + 1L)
|
||||
perm[sample.axis] <- r + 1L
|
||||
perm[-sample.axis] <- seq_len(r)
|
||||
X <- aperm(X, perm)
|
||||
Fy <- aperm(Fy, perm)
|
||||
}
|
||||
|
||||
list(X = X, Fy = Fy, y = y, sample.axis = sample.axis)
|
||||
}
|
||||
|
||||
|
||||
# logger to log iterative change in the estimation process of GMLM
|
||||
# log <- data.frame()
|
||||
log.likelihood <- tensorPredictors:::make.gmlm.family("ising")$log.likelihood
|
||||
B.true <- Reduce(`%x%`, rev(alphas))
|
||||
logger <- function(iter, eta1.est, alphas.est, Omegas.est) {
|
||||
B.est <- Reduce(`%x%`, rev(alphas.est))
|
||||
|
||||
err.alphas <- mapply(dist.subspace, alphas, alphas.est, MoreArgs = list(normalize = TRUE))
|
||||
err.Omegas <- mapply(norm, Map(`-`, Omegas, Omegas.est), MoreArgs = list(type = "F"))
|
||||
|
||||
if (iter > 0) { cat("\033[9A") }
|
||||
cat(sprintf("\n\033[2mIter: loss - dist\n\033[0m%4d: %8.3f - %8.3f",
|
||||
iter,
|
||||
log.likelihood(X, Fy, eta1.est, alphas.est, Omegas.est),
|
||||
dist.subspace(B.true, B.est, normalize = TRUE)
|
||||
),
|
||||
"\033[2mMSE eta1\033[0m",
|
||||
mean((eta1 - eta1.est)^2),
|
||||
"\033[2msubspace distances of alphas\033[0m",
|
||||
do.call(paste, Map(sprintf, err.alphas, MoreArgs = list(fmt = "%8.3f"))),
|
||||
"\033[2mFrob. norm of Omega differences\033[0m",
|
||||
do.call(paste, Map(sprintf, err.Omegas, MoreArgs = list(fmt = "%8.3f"))),
|
||||
sep = "\n "
|
||||
)
|
||||
}
|
||||
|
||||
### sample (training) data
|
||||
c(X, Fy, y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas)
|
||||
|
||||
# now call the GMLM fitting routine with performance profiling
|
||||
tryCatch({
|
||||
system.time( # profvis::profvis(
|
||||
fit.gmlm <- GMLM.default(
|
||||
X, Fy, sample.axis = sample.axis, max.iter = max.iter,
|
||||
family = "ising", logger = logger
|
||||
)
|
||||
)
|
||||
}, error = function(ex) {
|
||||
print(ex)
|
||||
traceback()
|
||||
})
|
171
sim/normal.R
171
sim/normal.R
|
@ -1,171 +0,0 @@
|
|||
library(tensorPredictors)
|
||||
|
||||
set.seed(314159265, "Mersenne-Twister", "Inversion", "Rejection")
|
||||
|
||||
### simulation configuration
|
||||
file.prefix <- "sim-normal"
|
||||
reps <- 100 # number of simulation replications
|
||||
max.iter <- 10000 # maximum number of iterations for GMLM
|
||||
sample.sizes <- c(100, 200, 300, 500, 750) # sample sizes `n`
|
||||
N <- 2000 # validation set size
|
||||
p <- c(2, 3, 5) # preditor dimensions
|
||||
q <- c(1, 2, 3) # functions of y dimensions (response dimensions)
|
||||
r <- length(p)
|
||||
|
||||
# initial consistency checks
|
||||
stopifnot(exprs = {
|
||||
r == length(p)
|
||||
r == length(q)
|
||||
all(outer(p, sample.sizes, `<`))
|
||||
})
|
||||
|
||||
# projection matrix `P_A` as a projection onto the span of `A`
|
||||
proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A)))
|
||||
|
||||
# setup model parameters
|
||||
alphas <- Map(matrix, Map(rnorm, p * q), p) # reduction matrices
|
||||
Omegas <- Map(function(pj) 0.5^abs(outer(1:pj, 1:pj, `-`)), p) # mode scatter
|
||||
eta1 <- 0 # intercept
|
||||
|
||||
# data sampling routine
|
||||
sample.data <- function(n, eta1, alphas, Omegas, sample.axis = r + 1L) {
|
||||
# generate response (sample axis is last axis)
|
||||
y <- sample.int(prod(q), n, replace = TRUE) # uniform samples
|
||||
Fy <- array(outer(seq_len(prod(q)), y, `==`), dim = c(q, n))
|
||||
Fy <- Fy - c(rowMeans(Fy, dims = r))
|
||||
|
||||
# sample predictors as X | Y = y (sample axis is last axis)
|
||||
Deltas <- Map(solve, Omegas) # normal covariances
|
||||
mu_y <- mlm(mlm(Fy, alphas) + c(eta1), Deltas) # conditional mean
|
||||
X <- mu_y + rtensornorm(n, 0, Deltas, r + 1L) # responses X
|
||||
|
||||
# permute axis to requested get the sample axis
|
||||
if (sample.axis != r + 1L) {
|
||||
perm <- integer(r + 1L)
|
||||
perm[sample.axis] <- r + 1L
|
||||
perm[-sample.axis] <- seq_len(r)
|
||||
X <- aperm(X, perm)
|
||||
Fy <- aperm(Fy, perm)
|
||||
}
|
||||
|
||||
list(X = X, Fy = Fy, y = y, sample.axis = sample.axis)
|
||||
}
|
||||
|
||||
### Logging Errors and Warnings
|
||||
# Register a global warning and error handler for logging warnings/errors with
|
||||
# current simulation repetition session informatin allowing to reproduce problems
|
||||
exceptionLogger <- function(ex) {
|
||||
# retrieve current simulation repetition information
|
||||
rep.info <- get("rep.info", envir = .GlobalEnv)
|
||||
# setup an error log file with the same name as `file`
|
||||
log <- paste0(rep.info$file, ".log")
|
||||
# Write (append) condition message with reproduction info to the log
|
||||
cat("\n\n------------------------------------------------------------\n",
|
||||
sprintf("file <- \"%s\"\nn <- %d\nrep <- %d\n.Random.seed <- c(%s)\n%s\nTraceback:\n",
|
||||
rep.info$file, rep.info$n, rep.info$rep,
|
||||
paste(rep.info$.Random.seed, collapse = ","),
|
||||
as.character.error(ex)
|
||||
), sep = "", file = log, append = TRUE)
|
||||
# add Traceback (see: `traceback()` which the following is addapted from)
|
||||
n <- length(x <- .traceback(NULL, max.lines = -1L))
|
||||
if (n == 0L) {
|
||||
cat("No traceback available", "\n", file = log, append = TRUE)
|
||||
} else {
|
||||
for (i in 1L:n) {
|
||||
xi <- x[[i]]
|
||||
label <- paste0(n - i + 1L, ": ")
|
||||
m <- length(xi)
|
||||
srcloc <- if (!is.null(srcref <- attr(xi, "srcref"))) {
|
||||
srcfile <- attr(srcref, "srcfile")
|
||||
paste0(" at ", basename(srcfile$filename), "#", srcref[1L])
|
||||
}
|
||||
if (isTRUE(attr(xi, "truncated"))) {
|
||||
xi <- c(xi, " ...")
|
||||
m <- length(xi)
|
||||
}
|
||||
if (!is.null(srcloc)) {
|
||||
xi[m] <- paste0(xi[m], srcloc)
|
||||
}
|
||||
if (m > 1) {
|
||||
label <- c(label, rep(substr(" ", 1L,
|
||||
nchar(label, type = "w")), m - 1L))
|
||||
}
|
||||
cat(paste0(label, xi), sep = "\n", file = log, append = TRUE)
|
||||
}
|
||||
}
|
||||
}
|
||||
globalCallingHandlers(list(
|
||||
message = exceptionLogger, warning = exceptionLogger, error = exceptionLogger
|
||||
))
|
||||
|
||||
|
||||
### for every sample size
|
||||
start <- format(Sys.time(), "%Y%m%dT%H%M")
|
||||
for (n in sample.sizes) {
|
||||
### write new simulation result file
|
||||
file <- paste0(paste(file.prefix, start, n, sep = "-"), ".csv")
|
||||
# CSV header, used to ensure correct value/column mapping when writing to file
|
||||
header <- outer(
|
||||
c("dist.subspace", "dist.projection", "error.pred"), # measures
|
||||
c("gmlm", "pca", "hopca", "tsir"), # methods
|
||||
paste, sep = ".")
|
||||
cat(paste0(header, collapse = ","), "\n", sep = "", file = file)
|
||||
|
||||
### repeated simulation
|
||||
for (rep in seq_len(reps)) {
|
||||
### Repetition session state info
|
||||
# Stores specific session variables before starting the current
|
||||
# simulation replication. This allows to log state information which
|
||||
# can be used to replicate a specific simulation repetition in case of
|
||||
# errors/warnings from the logs
|
||||
rep.info <- list(n = n, rep = rep, file = file, .Random.seed = .Random.seed)
|
||||
|
||||
### sample (training) data
|
||||
c(X, Fy, y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas)
|
||||
|
||||
### Fit data using different methods
|
||||
fit.gmlm <- GMLM.default(X, Fy, sample.axis = sample.axis, max.iter = max.iter)
|
||||
fit.hopca <- HOPCA(X, npc = q, sample.axis = sample.axis)
|
||||
fit.pca <- prcomp(mat(X, sample.axis), rank. = prod(q))
|
||||
fit.tsir <- TSIR(X, y, q, sample.axis = sample.axis)
|
||||
|
||||
### Compute Reductions `B.*` where `B.*` spans the reduction subspaces
|
||||
B.true <- Reduce(`%x%`, rev(alphas))
|
||||
B.gmlm <- with(fit.gmlm, Reduce(`%x%`, rev(alphas)))
|
||||
B.hopca <- Reduce(`%x%`, rev(fit.hopca))
|
||||
B.pca <- fit.pca$rotation
|
||||
B.tsir <- Reduce(`%x%`, rev(fit.tsir))
|
||||
|
||||
# Subspace Distances: Normalized `|| P_A - P_B ||_F` where
|
||||
# `P_A = A (A' A)^-1/2 A'` and the normalization means that with
|
||||
# respect to the dimensions of `A, B` the subspace distance is in the
|
||||
# range `[0, 1]`.
|
||||
dist.subspace.gmlm <- dist.subspace(B.true, B.gmlm, normalize = TRUE)
|
||||
dist.subspace.hopca <- dist.subspace(B.true, B.hopca, normalize = TRUE)
|
||||
dist.subspace.pca <- dist.subspace(B.true, B.pca, normalize = TRUE)
|
||||
dist.subspace.tsir <- dist.subspace(B.true, B.tsir, normalize = TRUE)
|
||||
|
||||
# Projection Distances: Spectral norm (2-norm) `|| P_A - P_B ||_2`.
|
||||
dist.projection.gmlm <- dist.projection(B.true, B.gmlm)
|
||||
dist.projection.hopca <- dist.projection(B.true, B.hopca)
|
||||
dist.projection.pca <- dist.projection(B.true, B.pca)
|
||||
dist.projection.tsir <- dist.projection(B.true, B.tsir)
|
||||
|
||||
### Prediction Errors: (using new independend sample of size `N`)
|
||||
c(X, Fy, y, sample.axis) %<-% sample.data(N, eta1, alphas, Omegas)
|
||||
# centered model matrix of vectorized `X`s
|
||||
vecX <- scale(mat(X, sample.axis), center = TRUE, scale = FALSE)
|
||||
P.true <- proj(B.true)
|
||||
error.pred.gmlm <- norm(P.true - proj(B.gmlm), "2")
|
||||
error.pred.hopca <- norm(P.true - proj(B.hopca), "2")
|
||||
error.pred.pca <- norm(P.true - proj(B.pca), "2")
|
||||
error.pred.tsir <- norm(P.true - proj(B.tsir), "2")
|
||||
|
||||
# format estimation/prediction errors and write to file and console
|
||||
line <- paste0(Map(get, header), collapse = ",")
|
||||
cat(line, "\n", sep = "", file = file, append = TRUE)
|
||||
# report progress
|
||||
cat(sprintf("sample size: %d/%d - rep: %d/%d\n",
|
||||
which(n == sample.sizes), length(sample.sizes), rep, reps))
|
||||
}
|
||||
}
|
|
@ -1,96 +0,0 @@
|
|||
library(tensorPredictors)
|
||||
|
||||
set.seed(271828183, "Mersenne-Twister", "Inversion", "Rejection")
|
||||
|
||||
### simulation configuration
|
||||
reps <- 100 # number of simulation replications
|
||||
n <- 100 # sample sizes `n`
|
||||
N <- 2000 # validation set size
|
||||
p <- c(2, 3, 5) # preditor dimensions
|
||||
q <- c(1, 2, 3) # functions of y dimensions (response dimensions)
|
||||
|
||||
# initial consistency checks
|
||||
stopifnot(exprs = {
|
||||
length(p) == length(q)
|
||||
})
|
||||
|
||||
# setup model parameters
|
||||
alphas <- Map(matrix, Map(rnorm, p * q), p) # reduction matrices
|
||||
Omegas <- Map(function(pj) 0.5^abs(outer(1:pj, 1:pj, `-`)), p) # mode scatter
|
||||
eta1 <- 0 # intercept
|
||||
|
||||
# data sampling routine
|
||||
sample.data <- function(n, eta1, alphas, Omegas, sample.axis = length(alphas) + 1L) {
|
||||
r <- length(alphas) # tensor order
|
||||
|
||||
# generate response (sample axis is last axis)
|
||||
y <- sample.int(prod(q), n, replace = TRUE) # uniform samples
|
||||
Fy <- array(outer(seq_len(prod(q)), y, `==`), dim = c(q, n))
|
||||
Fy <- Fy - c(rowMeans(Fy, dims = r))
|
||||
|
||||
# sample predictors as X | Y = y (sample axis is last axis)
|
||||
Deltas <- Map(solve, Omegas) # normal covariances
|
||||
mu_y <- mlm(mlm(Fy, alphas) + c(eta1), Deltas) # conditional mean
|
||||
X <- mu_y + rtensornorm(n, 0, Deltas, r + 1L) # responses X
|
||||
|
||||
# permute axis to requested get the sample axis
|
||||
if (sample.axis != r + 1L) {
|
||||
perm <- integer(r + 1L)
|
||||
perm[sample.axis] <- r + 1L
|
||||
perm[-sample.axis] <- seq_len(r)
|
||||
X <- aperm(X, perm)
|
||||
Fy <- aperm(Fy, perm)
|
||||
}
|
||||
|
||||
list(X = X, Fy = Fy, y = y, sample.axis = sample.axis)
|
||||
}
|
||||
|
||||
### sample (training) data
|
||||
c(X, Fy, y = y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas)
|
||||
|
||||
### Fit data using GMLM with logging
|
||||
|
||||
# logger to log iterative change in the estimation process of GMLM
|
||||
# log <- data.frame()
|
||||
log.likelihood <- tensorPredictors:::make.gmlm.family("normal")$log.likelihood
|
||||
B.true <- Reduce(`%x%`, rev(alphas))
|
||||
logger <- function(iter, eta1.est, alphas.est, Omegas.est) {
|
||||
B.est <- Reduce(`%x%`, rev(alphas.est))
|
||||
|
||||
err.alphas <- mapply(dist.subspace, alphas, alphas.est, MoreArgs = list(normalize = TRUE))
|
||||
err.Omegas <- mapply(norm, Map(`-`, Omegas, Omegas.est), MoreArgs = list(type = "F"))
|
||||
|
||||
if (iter > 1) { cat("\033[9A") }
|
||||
cat(sprintf("\n\033[2mIter: loss - dist\n\033[0m%4d: %8.3f - %8.3f",
|
||||
iter,
|
||||
log.likelihood(X, Fy, eta1.est, alphas.est, Omegas.est),
|
||||
dist.subspace(B.true, B.est, normalize = TRUE)
|
||||
),
|
||||
"\033[2mMSE eta1\033[0m",
|
||||
mean((eta1 - eta1.est)^2),
|
||||
"\033[2msubspace distances of alphas\033[0m",
|
||||
do.call(paste, Map(sprintf, err.alphas, MoreArgs = list(fmt = "%8.3f"))),
|
||||
"\033[2mFrob. norm of Omega differences\033[0m",
|
||||
do.call(paste, Map(sprintf, err.Omegas, MoreArgs = list(fmt = "%8.3f"))),
|
||||
sep = "\n "
|
||||
)
|
||||
}
|
||||
|
||||
# now call the GMLM fitting routine with performance profiling
|
||||
system.time( # profvis::profvis(
|
||||
fit.gmlm <- GMLM.default(
|
||||
X, Fy, sample.axis = sample.axis, max.iter = 10000L, logger = logger
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
# Iter: loss - dist
|
||||
# 7190: 50.583 - 0.057
|
||||
# MSE eta1
|
||||
# 0.02694658
|
||||
# subspace distances of alphas
|
||||
# 0.043 0.035 0.034
|
||||
# Frob. norm of Omega differences
|
||||
# 0.815 1.777 12.756
|
||||
# time user system elapsed
|
||||
# 342.279 555.630 183.653
|
100
sim/plots.R
100
sim/plots.R
|
@ -1,100 +0,0 @@
|
|||
|
||||
if (!endsWith(getwd(), "/sim")) {
|
||||
setwd("sim")
|
||||
}
|
||||
|
||||
sim.plot <- function(file.prefix, date, to.file = FALSE) {
|
||||
|
||||
# file.prefix <- "sim-ising-small"
|
||||
# # file.prefix <- "sim-normal"
|
||||
# date <- "20221012" # yyyymmdd, to match all "[0-9]{6}"
|
||||
time <- "[0-9]{4}" # HHMM, to match all "[0-9]{4}"
|
||||
colors <- c(
|
||||
PCA = "#a11414",
|
||||
HOPCA = "#2a62b6",
|
||||
TSIR = "#9313b9",
|
||||
GMLM = "#247407"
|
||||
)
|
||||
line.width <- 1.75
|
||||
margins <- c(5.1, 4.1, 4.1, 0.1)
|
||||
|
||||
sim <- Reduce(rbind, Map(function(path) {
|
||||
df <- read.csv(path)
|
||||
df$n <- as.integer(tail(head(strsplit(path, "[-.]")[[1]], -1), 1))
|
||||
df
|
||||
}, list.files(".", pattern = paste0(
|
||||
"^", file.prefix, "-", date, "T", time, "-[0-9]+[.]csv$", collapse = ""
|
||||
))))
|
||||
|
||||
stats <- aggregate(. ~ n, sim, mean)
|
||||
q75 <- aggregate(. ~ n, sim, function(x) quantile(x, 0.75))
|
||||
q25 <- aggregate(. ~ n, sim, function(x) quantile(x, 0.25))
|
||||
|
||||
if (to.file) {
|
||||
width <- 720
|
||||
png(paste(file.prefix, "-", date, ".png", sep = ""),
|
||||
width = width, height = round((6 / 11) * width, -1),
|
||||
pointsize = 12)
|
||||
}
|
||||
|
||||
layout(mat = matrix(c(
|
||||
1, 2,
|
||||
3, 3
|
||||
), 2, 2, byrow = TRUE),
|
||||
widths = c(1, 1),
|
||||
heights = c(12, 1), respect = FALSE)
|
||||
# layout.show(3)
|
||||
|
||||
with(stats, {
|
||||
par(mar = margins)
|
||||
plot(range(n), 0:1,
|
||||
type = "n", bty = "n", main = "Subspace Distance",
|
||||
xlab = "Sample Size", ylab = "Error")
|
||||
lines(n, dist.subspace.gmlm, col = colors["GMLM"], lwd = line.width)
|
||||
lines(n, dist.subspace.hopca, col = colors["HOPCA"], lwd = line.width)
|
||||
lines(n, dist.subspace.pca, col = colors["PCA"], lwd = line.width)
|
||||
lines(n, dist.subspace.tsir, col = colors["TSIR"], lwd = line.width)
|
||||
|
||||
xn <- c(q75$n, rev(q25$n))
|
||||
polygon(x = xn, y = c(q75$dist.subspace.gmlm, rev(q25$dist.subspace.gmlm)),
|
||||
col = adjustcolor(colors["GMLM"], alpha.f = 0.3), border = NA)
|
||||
polygon(x = xn, y = c(q75$dist.subspace.hopca, rev(q25$dist.subspace.hopca)),
|
||||
col = adjustcolor(colors["HOPCA"], alpha.f = 0.3), border = NA)
|
||||
polygon(x = xn, y = c(q75$dist.subspace.pca, rev(q25$dist.subspace.pca)),
|
||||
col = adjustcolor(colors["PCA"], alpha.f = 0.3), border = NA)
|
||||
polygon(x = xn, y = c(q75$dist.subspace.tsir, rev(q25$dist.subspace.tsir)),
|
||||
col = adjustcolor(colors["TSIR"], alpha.f = 0.3), border = NA)
|
||||
})
|
||||
|
||||
with(stats, {
|
||||
par(mar = margins)
|
||||
plot(range(n), 0:1,
|
||||
type = "n", bty = "n", main = "RMSE (Prediction Error)",
|
||||
xlab = "Sample Size", ylab = "Error")
|
||||
xn <- c(q75$n, rev(q25$n))
|
||||
polygon(x = xn, y = c(q75$error.pred.gmlm, rev(q25$error.pred.gmlm)),
|
||||
col = adjustcolor(colors["GMLM"], alpha.f = 0.3), border = NA)
|
||||
polygon(x = xn, y = c(q75$error.pred.hopca, rev(q25$error.pred.hopca)),
|
||||
col = adjustcolor(colors["HOPCA"], alpha.f = 0.3), border = NA)
|
||||
polygon(x = xn, y = c(q75$error.pred.pca, rev(q25$error.pred.pca)),
|
||||
col = adjustcolor(colors["PCA"], alpha.f = 0.3), border = NA)
|
||||
polygon(x = xn, y = c(q75$error.pred.tsir, rev(q25$error.pred.tsir)),
|
||||
col = adjustcolor(colors["TSIR"], alpha.f = 0.3), border = NA)
|
||||
lines(n, error.pred.gmlm, col = colors["GMLM"], lwd = line.width)
|
||||
lines(n, error.pred.hopca, col = colors["HOPCA"], lwd = line.width)
|
||||
lines(n, error.pred.pca, col = colors["PCA"], lwd = line.width)
|
||||
lines(n, error.pred.tsir, col = colors["TSIR"], lwd = line.width)
|
||||
})
|
||||
|
||||
par(mar = rep(0, 4))
|
||||
plot(1:2, 1:2, type = "n", bty = "n", axes = FALSE, xlab = "", ylab = "")
|
||||
legend("center", legend = names(colors), col = colors, lwd = line.width,
|
||||
lty = 1, bty = "n", horiz = TRUE)
|
||||
|
||||
if (to.file) {
|
||||
dev.off()
|
||||
}
|
||||
}
|
||||
|
||||
sim.plot("sim-ising-small", "20221012", TRUE)
|
||||
sim.plot("sim-normal", "20221012", TRUE)
|
|
@ -5,9 +5,14 @@ export("%x_1%")
|
|||
export("%x_2%")
|
||||
export("%x_3%")
|
||||
export("%x_4%")
|
||||
export(.projBand)
|
||||
export(.projPSD)
|
||||
export(.projRank)
|
||||
export(.projSymBand)
|
||||
export(.projSymRank)
|
||||
export(CISE)
|
||||
export(D)
|
||||
export(D.pinv)
|
||||
export(Dup)
|
||||
export(Dup.pinv)
|
||||
export(GMLM)
|
||||
export(GMLM.default)
|
||||
export(HOPCA)
|
||||
|
@ -17,6 +22,8 @@ export(ICU)
|
|||
export(K)
|
||||
export(K.perm)
|
||||
export(LSIR)
|
||||
export(La.det)
|
||||
export(La.solve)
|
||||
export(N)
|
||||
export(NAGD)
|
||||
export(PCA2d)
|
||||
|
@ -25,20 +32,30 @@ export(RMReg)
|
|||
export(RMap)
|
||||
export(S)
|
||||
export(TSIR)
|
||||
export(approx.kron)
|
||||
export(approx.kronecker)
|
||||
export(colKronecker)
|
||||
export(decompose.kronecker)
|
||||
export(dist.kron.norm)
|
||||
export(dist.kron.tr)
|
||||
export(dist.projection)
|
||||
export(dist.subspace)
|
||||
export(exprs.all.equal)
|
||||
export(gmlm_ising)
|
||||
export(gmlm_mlm)
|
||||
export(gmlm_tensor_normal)
|
||||
export(icu_tensor_normal)
|
||||
export(ising_m2)
|
||||
export(ising_sample)
|
||||
export(kpir.approx)
|
||||
export(kpir.base)
|
||||
export(kpir.ls)
|
||||
export(kpir.mle)
|
||||
export(kpir.momentum)
|
||||
export(kpir.new)
|
||||
export(kronperm)
|
||||
export(mat)
|
||||
export(matProj)
|
||||
export(matpow)
|
||||
export(matrixImage)
|
||||
export(mcov)
|
||||
|
@ -49,10 +66,20 @@ export(mtvk)
|
|||
export(num.deriv)
|
||||
export(num.deriv.function)
|
||||
export(num.deriv2)
|
||||
export(pinv)
|
||||
export(projDiag)
|
||||
export(projStiefel)
|
||||
export(projSym)
|
||||
export(reduce)
|
||||
export(riccati)
|
||||
export(rowKronecker)
|
||||
export(rtensornorm)
|
||||
export(slice.assign.expr)
|
||||
export(slice.expr)
|
||||
export(slice.select)
|
||||
export(sylvester)
|
||||
export(tensor_predictor)
|
||||
export(tsym)
|
||||
export(ttm)
|
||||
export(ttt)
|
||||
export(vech)
|
||||
|
|
|
@ -36,12 +36,12 @@ GMLM.default <- function(X, Fy, sample.axis = 1L,
|
|||
# optimize likelihood using Nesterov Excelerated Gradient Descent
|
||||
params.fit <- NAGD(
|
||||
fun.loss = function(params) {
|
||||
# scaled negative lig-likelihood
|
||||
# scaled negative log-likelihood
|
||||
# eta1 alphas Omegas
|
||||
family$log.likelihood(X, Fy, params[[1]], params[[2]], params[[3]])
|
||||
},
|
||||
fun.grad = function(params) {
|
||||
# gradient of the scaled negative lig-likelihood
|
||||
# gradient of the scaled negative log-likelihood
|
||||
# eta1 alphas Omegas
|
||||
family$grad(X, Fy, params[[1]], params[[2]], params[[3]])
|
||||
},
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
#' `i`th axis excluding the sample axis.
|
||||
#'
|
||||
#' @export
|
||||
HOPCA <- function(X, npc = dim(X)[-sample.axis], sample.axis = 1L) {
|
||||
HOPCA <- function(X, npc = dim(X)[-sample.axis], sample.axis = 1L, use.C = FALSE) {
|
||||
# observation index numbers (all axis except the sample axis)
|
||||
modes <- seq_along(dim(X))[-sample.axis]
|
||||
|
||||
|
|
|
@ -76,8 +76,8 @@ HOPIR.ls <- function(X, Fy, alphas, sample.axis, algorithm, ..., logger) {
|
|||
list(alphas = alphas, Deltas = fun.Deltas(alphas))
|
||||
}
|
||||
|
||||
#' HPOIR subroutine for the MLE estimation given proprocessed data and initial
|
||||
#' alphas, Deltas paramters
|
||||
#' HPOIR subroutine for the MLE estimation given preprocessed data and initial
|
||||
#' alpha, Delta parameters
|
||||
#'
|
||||
#' @keywords internal
|
||||
HOPIR.mle <- function(X, Fy, alphas, Deltas, sample.axis, algorithm, ..., logger) {
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
#'
|
||||
#' @export
|
||||
HOSVD <- function(X, nu = NULL, eps = 1e-07) {
|
||||
if (!missing(nu)) {
|
||||
if (!is.null(nu)) {
|
||||
stopifnot(all(nu <= dim(X)))
|
||||
}
|
||||
|
||||
|
@ -21,3 +21,5 @@ HOSVD <- function(X, nu = NULL, eps = 1e-07) {
|
|||
|
||||
list(C = C, Us = Us)
|
||||
}
|
||||
|
||||
SVD <- function(A) .Call("C_svd", A)
|
||||
|
|
|
@ -60,7 +60,7 @@ NAGD <- function(fun.loss, fun.grad, params, more.params = NULL,
|
|||
stop("Initial loss is non-finite (", loss, ")")
|
||||
}
|
||||
# initialize "previous" iterate parameters
|
||||
params.last <- params
|
||||
prev.params <- params
|
||||
|
||||
# Gradient Descent Loop
|
||||
line.search.tag <- FALSE # init line search state as "failure"
|
||||
|
@ -73,9 +73,9 @@ NAGD <- function(fun.loss, fun.grad, params, more.params = NULL,
|
|||
}
|
||||
|
||||
# Extrapolation form previous position (momentum)
|
||||
# `params.moment <- (1 + moment) * params - moment * param.last`
|
||||
# `params.moment <- (1 + moment) * params - moment * prev.params`
|
||||
moment <- (m[1] - 1) / m[2]
|
||||
params.moment <- fun.lincomb(1 + moment, params, -moment, params.last)
|
||||
params.moment <- fun.lincomb(1 + moment, params, -moment, prev.params)
|
||||
|
||||
# Compute gradient at extrapolated position
|
||||
if (missing(more.params)) {
|
||||
|
@ -114,7 +114,7 @@ NAGD <- function(fun.loss, fun.grad, params, more.params = NULL,
|
|||
}
|
||||
|
||||
# keep track of previous parameters
|
||||
params.last <- params
|
||||
prev.params <- params
|
||||
|
||||
# check line search outcome
|
||||
if (is.na(line.search.tag)) {
|
||||
|
|
|
@ -4,11 +4,21 @@
|
|||
TSIR <- function(X, y, d, sample.axis = 1L,
|
||||
nr.slices = 10L, # default slices, ignored if y is a factor or integer
|
||||
max.iter = 50L,
|
||||
eps = sqrt(.Machine$double.eps)
|
||||
eps = sqrt(.Machine$double.eps),
|
||||
slice.method = c("cut", "ecdf") # ignored if y is a factor or integer
|
||||
) {
|
||||
|
||||
if (!(is.factor(y) || is.integer(y))) {
|
||||
y <- cut(y, nr.slices)
|
||||
slice.method <- match.arg(slice.method)
|
||||
if (slice.method == "ecdf") {
|
||||
y <- cut(ecdf(y)(y), nr.slices)
|
||||
} else {
|
||||
y <- cut(y, nr.slices)
|
||||
# ensure there are no empty slices
|
||||
if (any(table(y) == 0)) {
|
||||
y <- as.factor(as.integer(y))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stopifnot(exprs = {
|
||||
|
|
|
@ -1,4 +1,32 @@
|
|||
#' Approximates kronecker product decomposition.
|
||||
#' @examples
|
||||
#' nrows <- c(3, 2, 5)
|
||||
#' ncols <- c(2, 4, 8)
|
||||
#'
|
||||
#' A <- Reduce(kronecker, Map(matrix, Map(rnorm, nrows * ncols), nrows))
|
||||
#'
|
||||
#' all.equal(A, Reduce(kronecker, approx.kron(A, nrows, ncols)))
|
||||
#'
|
||||
#' @export
|
||||
approx.kron <- function(A, nrows, ncols) {
|
||||
|
||||
# rearrange kronecker product `A` into outer product `R`
|
||||
dim(A) <- c(rev(nrows), rev(ncols))
|
||||
axis.perm <- as.vector(t(matrix(seq_along(dim(A)), ncol = 2L))[, rev(seq_along(nrows))])
|
||||
R <- aperm(A, axis.perm, resize = FALSE)
|
||||
dim(R) <- nrows * ncols
|
||||
|
||||
# scaling factor for every product component
|
||||
s <- sum(A^2)^(1 / length(dim(A)))
|
||||
|
||||
# higher order SVD on `R` with one singular vector
|
||||
Map(function(mode, nr, nc) {
|
||||
s * `dim<-`(La.svd(mcrossprod(R, mode = mode), 1L, 0L)$u, c(nr, nc))
|
||||
}, seq_along(nrows), nrows, ncols)
|
||||
|
||||
}
|
||||
|
||||
|
||||
#' Approximates Kronecker Product decomposition.
|
||||
#'
|
||||
#' Approximates the matrices `A` and `B` such that
|
||||
#' C = A %x% B
|
||||
|
@ -21,7 +49,7 @@
|
|||
#' 123 (2000) 85-100 (pp. 93-95)
|
||||
#'
|
||||
#' @export
|
||||
approx.kronecker <- function(C, dimA, dimB) {
|
||||
approx.kronecker <- function(C, dimA, dimB = dim(C) / dimA) {
|
||||
|
||||
dim(C) <- c(dimB[1L], dimA[1L], dimB[2L], dimA[2L])
|
||||
R <- aperm(C, c(2L, 4L, 1L, 3L))
|
||||
|
@ -33,8 +61,115 @@ approx.kronecker <- function(C, dimA, dimB) {
|
|||
svdR <- svd(R, 1L, 1L)
|
||||
}
|
||||
|
||||
return(list(
|
||||
list(
|
||||
A = array(sqrt(svdR$d[1]) * svdR$u, dimA),
|
||||
B = array(sqrt(svdR$d[1]) * svdR$v, dimB)
|
||||
)
|
||||
}
|
||||
|
||||
#' Kronecker Product Decomposition.
|
||||
#'
|
||||
#' Computes the components summation components `A_i`, `B_i` of a sum of
|
||||
#' Kronecker products
|
||||
#' C = sum_i A_i %x% B_i
|
||||
#' with the minimal estimated number of summands.
|
||||
#'
|
||||
#' @param C desired kronecker product result.
|
||||
#' @param dimA length 2 vector of dimensions of \code{A}.
|
||||
#' @param dimB length 2 vector of dimensions of \code{B}.
|
||||
#' @param tol tolerance of singular values of \code{C} to determin the number of
|
||||
#' needed summands.
|
||||
#'
|
||||
#' @return list of lenghth with estimated number of summation components, each
|
||||
#' entry consists of a list with named entries \code{"A"} and \code{"B"} of
|
||||
#' dimensions \code{dimA} and \code{dimB}.
|
||||
#'
|
||||
#' @examples
|
||||
#' As <- replicate(3, matrix(rnorm(2 * 7), 2), simplify = FALSE)
|
||||
#' Bs <- replicate(3, matrix(rnorm(5 * 3), 5), simplify = FALSE)
|
||||
#' C <- Reduce(`+`, Map(kronecker, As, Bs))
|
||||
#'
|
||||
#' decomposition <- decompose.kronecker(C, c(2, 7))
|
||||
#'
|
||||
#' reconstruction <- Reduce(`+`, Map(function(summand) {
|
||||
#' kronecker(summand[[1]], summand[[2]])
|
||||
#' }, decomposition), array(0, dim(C)))
|
||||
#'
|
||||
#' stopifnot(all.equal(C, reconstruction))
|
||||
#'
|
||||
#' @export
|
||||
decompose.kronecker <- function(C, dimA, dimB = dim(C) / dimA, tol = 1e-7) {
|
||||
# convert the equivalent outer product
|
||||
dim(C) <- c(dimB[1L], dimA[1L], dimB[2L], dimA[2L])
|
||||
C <- aperm(C, c(2L, 4L, 1L, 3L), resize = FALSE)
|
||||
dim(C) <- c(prod(dimA), prod(dimB))
|
||||
|
||||
# Singular Valued Decomposition
|
||||
svdC <- La.svd(C)
|
||||
|
||||
# Sum of Kronecker Components
|
||||
lapply(seq_len(sum(svdC$d > tol)), function(i) list(
|
||||
A = matrix(svdC$d[i] * svdC$u[, i], dimA),
|
||||
B = matrix(svdC$vt[i, ], dimB)
|
||||
))
|
||||
}
|
||||
|
||||
### Given that C is a Kronecker product this is a fast method but a bit
|
||||
### unreliable in full generality.
|
||||
# decompose.kronecker <- function(C, dimA, dimB = dim(C) / dimA) {
|
||||
# dim(C) <- c(dimB[1L], dimA[1L], dimB[2L], dimA[2L])
|
||||
# R <- aperm(C, c(2L, 4L, 1L, 3L))
|
||||
# dim(R) <- c(prod(dimA), prod(dimB))
|
||||
# max.index <- which.max(abs(R))
|
||||
# row.index <- ((max.index - 1L) %% nrow(R)) + 1L
|
||||
# col.index <- ((max.index - 1L) %/% nrow(R)) + 1L
|
||||
# max.elem <- if (abs(R[max.index]) > .Machine$double.eps) R[max.index] else 1
|
||||
# list(
|
||||
# A = array(R[, col.index], dimA),
|
||||
# B = array(R[row.index, ] / max.elem, dimB)
|
||||
# )
|
||||
# }
|
||||
|
||||
|
||||
# kron <- function(A, B) {
|
||||
# perm <- as.vector(t(matrix(seq_len(2 * length(dim(A))), ncol = 2)[, 2:1]))
|
||||
# K <- aperm(outer(A, B), perm)
|
||||
# dim(K) <- dim(A) * dim(B)
|
||||
# K
|
||||
# }
|
||||
|
||||
# kronperm <- function(A) {
|
||||
# # force A to have even number of dimensions
|
||||
# dim(A) <- c(dim(A), rep(1L, length(dim(A)) %% 2L))
|
||||
# # compute axis permutation
|
||||
# perm <- as.vector(t(matrix(seq_along(dim(A)), ncol = 2)[, 2:1]))
|
||||
# # permute elements of A
|
||||
# K <- aperm(A, perm, resize = FALSE)
|
||||
# # collapse/set dimensions
|
||||
# dim(K) <- head(dim(A), length(dim(A)) / 2) * tail(dim(A), length(dim(A)) / 2)
|
||||
# K
|
||||
# }
|
||||
|
||||
# p <- c(2, 3, 5)
|
||||
# q <- c(3, 4, 7)
|
||||
# A <- array(rnorm(prod(p)), p)
|
||||
# B <- array(rnorm(prod(q)), q)
|
||||
# all.equal(kronperm(outer(A, B)), kronecker(A, B))
|
||||
# all.equal(kron(A, B), kronecker(A, B))
|
||||
|
||||
|
||||
# dA <- c(2, 3, 5)
|
||||
# dB <- c(3, 4, 7)
|
||||
# A <- array(rnorm(prod(dA)), dA)
|
||||
# B <- array(rnorm(prod(dB)), dB)
|
||||
# X <- outer(A, B)
|
||||
|
||||
# r <- length(dim(X)) / 2
|
||||
# I <- t(do.call(expand.grid, Map(seq_len, head(dim(X), r) * tail(dim(X), r))))
|
||||
# K <- apply(rbind(
|
||||
# (I - 1) %/% tail(dim(X), r) + 1,
|
||||
# (I - 1) %% tail(dim(X), r) + 1
|
||||
# ), 2, function(i) X[sum(c(1, cumprod(head(dim(X), -1))) * (i - 1)) + 1])
|
||||
# dim(K) <- head(dim(X), r) * tail(dim(X), r)
|
||||
|
||||
# all.equal(kronecker(A, B), K)
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
#' \|(A1 %x% ... %x% Ar - B1 %x% ... %x% Br\|_F
|
||||
#'
|
||||
#' This is equivalent to the expression
|
||||
#' \code{norm(Reduce(kronecker, A) - Reduce(kronecker, B), "F")} but faster.
|
||||
#' \code{norm(Reduce(kronecker, As) - Reduce(kronecker, Bs), "F")}, but faster.
|
||||
#'
|
||||
#' @examples
|
||||
#' A1 <- matrix(rnorm(5^2), 5)
|
||||
|
@ -16,21 +16,21 @@
|
|||
#' ))
|
||||
#'
|
||||
#' p <- c(3, 7, 5, 2)
|
||||
#' A <- Map(function(pj) matrix(rnorm(pj^2), pj), p)
|
||||
#' B <- Map(function(pj) matrix(rnorm(pj^2), pj), p)
|
||||
#' As <- Map(function(pj) matrix(rnorm(pj^2), pj), p)
|
||||
#' Bs <- Map(function(pj) matrix(rnorm(pj^2), pj), p)
|
||||
#' stopifnot(all.equal(
|
||||
#' dist.kron.norm(A, B),
|
||||
#' norm(Reduce(kronecker, A) - Reduce(kronecker, B), "F")
|
||||
#' dist.kron.norm(As, Bs),
|
||||
#' norm(Reduce(kronecker, As) - Reduce(kronecker, Bs), "F")
|
||||
#' ))
|
||||
#'
|
||||
#' @export
|
||||
dist.kron.norm <- function(A, B, eps = .Machine$double.eps) {
|
||||
if (is.list(A) && is.list(B)) {
|
||||
norm2 <- prod(unlist(Map(function(x) sum(x^2), A))) -
|
||||
2 * prod(unlist(Map(function(a, b) sum(a * b), A, B))) +
|
||||
prod(unlist(Map(function(x) sum(x^2), B)))
|
||||
} else if (is.matrix(A) && is.matrix(B)) {
|
||||
norm2 <- sum((A - B)^2)
|
||||
dist.kron.norm <- function(As, Bs, eps = .Machine$double.eps) {
|
||||
if (is.list(As) && is.list(Bs)) {
|
||||
norm2 <- prod(unlist(Map(function(x) sum(x^2), As))) -
|
||||
2 * prod(unlist(Map(function(a, b) sum(a * b), As, Bs))) +
|
||||
prod(unlist(Map(function(x) sum(x^2), Bs)))
|
||||
} else if (is.matrix(As) && is.matrix(Bs)) {
|
||||
norm2 <- sum((As - Bs)^2)
|
||||
} else {
|
||||
stop("Unexpected input")
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#' Porjection Distance of two matrices
|
||||
#' Projection Distance of two matrices
|
||||
#'
|
||||
#' Defined as sine of the maximum principal angle between the column spaces
|
||||
#' of the matrices
|
||||
|
|
|
@ -0,0 +1,406 @@
|
|||
#' Specialized version of the GMLM for the Ising model (inverse Ising problem)
|
||||
#'
|
||||
#' @todo TODO: Add beta and Omega projections
|
||||
#'
|
||||
#' @export
|
||||
gmlm_ising <- function(X, F, sample.axis = length(dim(X)),
|
||||
# proj.betas = ..., proj.Omegas = ..., # TODO: this
|
||||
max.iter = 1000L,
|
||||
eps = sqrt(.Machine$double.eps),
|
||||
step.size = 1e-3,
|
||||
zig.zag.threashold = 20L,
|
||||
patience = 3L,
|
||||
nr.slices = 20L, # only for univariate `F(y) = y`
|
||||
slice.method = c("cut", "ecdf", "none"), # only for univariate `F(y) = y` and `y` is a factor or integer
|
||||
logger = function(...) { }
|
||||
) {
|
||||
# Get problem dimensions
|
||||
dimX <- dim(X)[-sample.axis]
|
||||
# threat scalar `F` as a tensor
|
||||
if (is.null(dim(F))) {
|
||||
dimF <- rep(1L, length(dimX))
|
||||
dim(F) <- ifelse(seq_along(dim(X)) == sample.axis, sample.size, 1L)
|
||||
} else {
|
||||
dimF <- dim(F)[-sample.axis]
|
||||
}
|
||||
sample.size <- dim(X)[sample.axis]
|
||||
|
||||
# rearrange `X`, `F` such that the last axis enumerates observations
|
||||
if (sample.axis != length(dim(X))) {
|
||||
axis.perm <- c(seq_along(dim(X))[-sample.axis], sample.axis)
|
||||
X <- aperm(X, axis.perm)
|
||||
F <- aperm(F, axis.perm)
|
||||
sample.axis <- length(dim(X))
|
||||
}
|
||||
modes <- seq_along(dimX)
|
||||
|
||||
# Special case for univariate response `vec F(y) = y`
|
||||
# Due to high computational costs we use slicing
|
||||
slice.method <- match.arg(slice.method)
|
||||
slices.ind <- if ((slice.method != "none") && (length(F) == prod(dim(F)))) {
|
||||
y <- as.vector(F)
|
||||
if (!(is.factor(y) || is.integer(y))) {
|
||||
slice.method <- match.arg(slice.method)
|
||||
if (slice.method == "ecdf") {
|
||||
y <- cut(ecdf(y)(y), nr.slices)
|
||||
} else {
|
||||
y <- cut(y, nr.slices)
|
||||
}
|
||||
}
|
||||
split(seq_len(sample.size), y, drop = TRUE)
|
||||
} else {
|
||||
seq_len(sample.size)
|
||||
}
|
||||
|
||||
|
||||
# initialize betas with tensor normal estimate (ignoring data being binary)
|
||||
fit_normal <- gmlm_tensor_normal(X, F, sample.axis = length(dim(X)))
|
||||
betas <- fit_normal$betas
|
||||
|
||||
Omegas <- Omegas.init <- Map(function(mode) {
|
||||
n <- prod(dim(X)[-mode])
|
||||
prob2 <- mcrossprod(X, mode = mode) / n
|
||||
prob2[prob2 == 0] <- 1 / n
|
||||
prob1 <- diag(prob2)
|
||||
`prob1^2` <- outer(prob1, prob1)
|
||||
|
||||
`diag<-`(log(((1 - `prob1^2`) / `prob1^2`) * prob2 / (1 - prob2)), 0)
|
||||
}, modes)
|
||||
|
||||
# Determin degenerate combinations, that are variables which are exclusive
|
||||
# in the data set
|
||||
matX <- mat(X, sample.axis)
|
||||
degen <- crossprod(matX) == 0
|
||||
degen.mask <- which(degen)
|
||||
# If there are degenerate combination, compute an (arbitrary) bound the
|
||||
# log odds parameters of those combinations
|
||||
if (any(degen.mask)) {
|
||||
degen.ind <- arrayInd(degen.mask, dim(degen))
|
||||
meanX <- colMeans(matX)
|
||||
prodX <- meanX[degen.ind[, 1]] * meanX[degen.ind[, 2]]
|
||||
degen.bounds <- log((1 - prodX) / (prodX * sample.size))
|
||||
# Component indices in Omegas of degenerate two-way interactions
|
||||
degen.ind <- arrayInd(degen.mask, rep(dimX, 2))
|
||||
degen.ind <- Map(function(d, m) {
|
||||
degen.ind[, m] + dimX[m] * (degen.ind[, m + length(dimX)] - 1L)
|
||||
}, dimX, seq_along(dimX))
|
||||
|
||||
## Enforce initial value degeneracy interaction param. constraints
|
||||
# Extract parameters corresponding to degenerate interactions
|
||||
degen.params <- do.call(rbind, Map(`[`, Omegas, degen.ind))
|
||||
# Degeneracy Constrained Parameters (sign is dropped)
|
||||
DCP <- mapply(function(vals, bound) {
|
||||
logVals <- log(abs(vals))
|
||||
err <- max(0, sum(logVals) - log(abs(bound)))
|
||||
exp(logVals - (err / length(vals)))
|
||||
}, split(degen.params, col(degen.params)), degen.bounds)
|
||||
# Update values in Omegas such that all degeneracy constraints hold
|
||||
Omegas <- Map(function(Omega, cp, ind) {
|
||||
# Combine multiple constraints for every element into single
|
||||
# constraint value per element
|
||||
cp <- mapply(min, split(abs(cp), ind))
|
||||
ind <- as.integer(names(cp))
|
||||
`[<-`(Omega, ind, sign(Omega[ind]) * cp)
|
||||
}, Omegas, split(DCP, row(DCP)), degen.ind)
|
||||
}
|
||||
|
||||
# Initialize mean squared gradients
|
||||
grad2_betas <- Map(array, 0, Map(dim, betas))
|
||||
grad2_Omegas <- Map(array, 0, Map(dim, Omegas))
|
||||
|
||||
# Keep track of the last loss to accumulate loss difference sign changes
|
||||
# indicating optimization instabilities as a sign to stop
|
||||
last_loss <- Inf
|
||||
accum_sign <- 1
|
||||
|
||||
# non improving iteration counter
|
||||
non_improving <- 0L
|
||||
|
||||
# technical access points to dynamicaly access a multi-dimensional array
|
||||
`X[..., i]` <- slice.expr(X, sample.axis, index = i, drop = FALSE)
|
||||
`F[..., i]` <- slice.expr(F, sample.axis, index = i, drop = FALSE)
|
||||
|
||||
# Iterate till a break condition triggers or till max. nr. of iterations
|
||||
for (iter in seq_len(max.iter)) {
|
||||
|
||||
grad_betas <- Map(matrix, 0, dimX, dimF)
|
||||
Omega <- Reduce(kronecker, rev(Omegas))
|
||||
|
||||
# second order residuals accumulator
|
||||
# `sum_i (X_i o X_i - E[X o X | Y = y_i])`
|
||||
R2 <- array(0, dim = c(dimX, dimX))
|
||||
|
||||
# negative log-likelihood
|
||||
loss <- 0
|
||||
|
||||
for (i in slices.ind) {
|
||||
# slice size (nr. of objects in the slice)
|
||||
n_i <- length(i)
|
||||
|
||||
sumF_i <- rowSums(eval(`F[..., i]`), dims = length(dimF))
|
||||
|
||||
diag_params_i <- mlm(sumF_i / n_i, betas)
|
||||
params_i <- Omega + diag(as.vector(diag_params_i))
|
||||
m2_i <- ising_m2(params_i)
|
||||
|
||||
# accumulate loss
|
||||
matX_i <- mat(eval(`X[..., i]`), modes)
|
||||
loss <- loss - (
|
||||
sum(matX_i * (params_i %*% matX_i)) + n_i * log(attr(m2_i, "prob_0"))
|
||||
)
|
||||
|
||||
R2_i <- tcrossprod(matX_i) - n_i * m2_i
|
||||
R1_i <- diag(R2_i)
|
||||
dim(R1_i) <- dimX
|
||||
|
||||
for (j in modes) {
|
||||
grad_betas[[j]] <- grad_betas[[j]] +
|
||||
mcrossprod(R1_i, mlm(sumF_i, betas[-j], modes[-j]), j)
|
||||
}
|
||||
R2 <- R2 + as.vector(R2_i)
|
||||
}
|
||||
|
||||
grad_Omegas <- Map(function(j) {
|
||||
grad <- mlm(kronperm(R2), Map(as.vector, Omegas[-j]), modes[-j], transposed = TRUE)
|
||||
dim(grad) <- dim(Omegas[[j]])
|
||||
grad
|
||||
}, modes)
|
||||
|
||||
|
||||
# update optimization behavioral trackers
|
||||
accum_sign <- sign(last_loss - loss) - accum_sign
|
||||
non_improving <- max(0L, non_improving - 1L + 2L * (last_loss < loss))
|
||||
|
||||
# check break conditions
|
||||
if (abs(accum_sign) > zig.zag.threashold) { break }
|
||||
if (non_improving > patience) { break }
|
||||
if (abs(last_loss - loss) < eps * last_loss) { break }
|
||||
|
||||
# store current loss for the next iteration
|
||||
last_loss <- loss
|
||||
|
||||
# Accumulate root mean squared gradiends
|
||||
grad2_betas <- Map(function(g2, g) 0.9 * g2 + 0.1 * (g * g),
|
||||
grad2_betas, grad_betas)
|
||||
grad2_Omegas <- Map(function(g2, g) 0.9 * g2 + 0.1 * (g * g),
|
||||
grad2_Omegas, grad_Omegas)
|
||||
|
||||
# logging (before parameter update)
|
||||
logger(iter, loss, betas, Omegas, grad_betas, grad_Omegas)
|
||||
|
||||
# Update Parameters
|
||||
betas <- Map(function(beta, grad, m2) {
|
||||
beta + (step.size / (sqrt(m2) + eps)) * grad
|
||||
}, betas, grad_betas, grad2_betas)
|
||||
Omegas <- Map(function(Omega, grad, m2) {
|
||||
Omega + (step.size / (sqrt(m2) + eps)) * grad
|
||||
}, Omegas, grad_Omegas, grad2_Omegas)
|
||||
|
||||
# Enforce degeneracy parameter constraints
|
||||
if (any(degen.mask)) {
|
||||
# Extract parameters corresponding to degenerate interactions
|
||||
degen.params <- do.call(rbind, Map(`[`, Omegas, degen.ind))
|
||||
# Degeneracy Constrained Parameters (sign is dropped)
|
||||
DCP <- mapply(function(vals, bound) {
|
||||
logVals <- log(abs(vals))
|
||||
err <- max(0, sum(logVals) - log(abs(bound)))
|
||||
exp(logVals - (err / length(vals)))
|
||||
}, split(degen.params, col(degen.params)), degen.bounds)
|
||||
# Update values in Omegas such that all degeneracy constraints hold
|
||||
Omegas <- Map(function(Omega, cp, ind) {
|
||||
# Combine multiple constraints for every element into single
|
||||
# constraint value per element
|
||||
cp <- mapply(min, split(abs(cp), ind))
|
||||
ind <- as.integer(names(cp))
|
||||
`[<-`(Omega, ind, sign(Omega[ind]) * cp)
|
||||
}, Omegas, split(DCP, row(DCP)), degen.ind)
|
||||
}
|
||||
}
|
||||
|
||||
structure(
|
||||
list(eta1 = array(0, dimX), betas = betas, Omegas = Omegas),
|
||||
tensor_normal = fit_normal,
|
||||
Omegas.init = Omegas.init,
|
||||
degen.mask = degen.mask
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
|
||||
################################################################################
|
||||
### Development Interactive Block (Delete / Make sim / TODO: ...) ###
|
||||
################################################################################
|
||||
if (FALSE) { # interactive()
|
||||
|
||||
par(bg = "#1d1d1d",
|
||||
fg = "lightgray",
|
||||
col = "#d5d5d5",
|
||||
col.axis = "#d5d5d5",
|
||||
col.lab = "#d5d5d5",
|
||||
col.main = "#d5d5d5",
|
||||
col.sub = "#d5d5d5", # col.sub = "#2467d0"
|
||||
pch = 16
|
||||
)
|
||||
cex <- 1.25
|
||||
col <- colorRampPalette(c("#f15050", "#1d1d1d", "#567DCA"))(256)
|
||||
|
||||
|
||||
.logger <- function() {
|
||||
iter <- 0L
|
||||
assign("log", data.frame(
|
||||
iter = rep(NA_integer_, 100000),
|
||||
loss = rep(NA_real_, 100000),
|
||||
dist.B = rep(NA_real_, 100000),
|
||||
dist.Omega = rep(NA_real_, 100000),
|
||||
norm.grad.B = rep(NA_real_, 100000),
|
||||
norm.grad.Omega = rep(NA_real_, 100000)
|
||||
), envir = .GlobalEnv)
|
||||
assign("B.gmlm", NULL, .GlobalEnv)
|
||||
assign("Omega.gmlm", NULL, .GlobalEnv)
|
||||
|
||||
function(it, loss, betas, Omegas, grad_betas, grad_Omegas) {
|
||||
# Store in global namespace (allows to stop and get the results)
|
||||
B.gmlm <- Reduce(kronecker, rev(betas))
|
||||
assign("B.gmlm", B.gmlm, .GlobalEnv)
|
||||
Omega.gmlm <- Reduce(kronecker, rev(Omegas))
|
||||
assign("Omega.gmlm", Omega.gmlm, .GlobalEnv)
|
||||
|
||||
dist.B <- dist.subspace(B.true, B.gmlm, normalize = TRUE)
|
||||
dist.Omega <- norm(Omega.true - Omega.gmlm, "F")
|
||||
norm.grad.B <- sqrt(sum(mapply(norm, grad_betas, "F")^2))
|
||||
norm.grad.Omega <- sqrt(sum(mapply(norm, grad_Omegas, "F")^2))
|
||||
|
||||
log[iter <<- iter + 1L, ] <<- list(
|
||||
it, loss, dist.B, dist.Omega, norm.grad.B, norm.grad.Omega
|
||||
)
|
||||
cat(sprintf("\r%3d - d(B): %.3f, d(O): %.3f, |g(B)|: %.3f, |g(O)|: %.3f, loss: %.3f\033[K",
|
||||
it, dist.B, dist.Omega, norm.grad.B, norm.grad.Omega, loss))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
sample.size <- 1000
|
||||
dimX <- c(2, 3) # predictor `X` dimension
|
||||
dimF <- rep(1, length(dimX)) # "function" `F(y)` of responce `y` dimension
|
||||
|
||||
betas <- Map(diag, 1, dimX, dimF)
|
||||
Omegas <- list(toeplitz(c(0, -2)), toeplitz(seq(1, 0, by = -0.5)))
|
||||
|
||||
B.true <- Reduce(kronecker, rev(betas))
|
||||
Omega.true <- Reduce(kronecker, rev(Omegas))
|
||||
|
||||
# data sampling routine
|
||||
c(X, F, y, sample.axis) %<-% (sample.data <- function(sample.size, betas, Omegas) {
|
||||
dimX <- mapply(nrow, betas)
|
||||
dimF <- mapply(ncol, betas)
|
||||
|
||||
# generate response (sample axis is last axis)
|
||||
y <- runif(prod(sample.size, dimF), -2, 2)
|
||||
F <- array(y, dim = c(dimF, sample.size)) # ~ U[-1, 1]
|
||||
|
||||
Omega <- Reduce(kronecker, rev(Omegas))
|
||||
|
||||
X <- apply(F, length(dim(F)), function(Fi) {
|
||||
dim(Fi) <- dimF
|
||||
params <- diag(as.vector(mlm(Fi, betas))) + Omega
|
||||
tensorPredictors::ising_sample(1, params)
|
||||
})
|
||||
dim(X) <- c(dimX, sample.size)
|
||||
|
||||
list(X = X, F = F, y = y, sample.axis = length(dim(X)))
|
||||
})(sample.size, betas, Omegas)
|
||||
|
||||
local({
|
||||
X.proto <- array(seq_len(prod(dimX)), dimX)
|
||||
interactions <- crossprod(mat(X, sample.axis))
|
||||
dimnames(interactions) <- rep(list(
|
||||
do.call(paste0, c("X", Map(slice.index, list(X.proto), seq_along(dimX))))
|
||||
), 2)
|
||||
cat("Sample Size: ", sample.size, "\n")
|
||||
print.table(interactions, zero.print = ".")
|
||||
})
|
||||
|
||||
# system.time({
|
||||
# fit.gmlm <- gmlm_ising(X, y, logger = .logger())
|
||||
# })
|
||||
Rprof()
|
||||
gmlm_ising(X, y)
|
||||
Rprof(NULL)
|
||||
summaryRprof()
|
||||
|
||||
B.gmlm <- Reduce(kronecker, rev(fit.gmlm$betas))
|
||||
Omega.gmlm <- Reduce(kronecker, rev(fit.gmlm$Omegas))
|
||||
|
||||
B.normal <- Reduce(kronecker, rev(attr(fit.gmlm, "tensor_normal")$betas))
|
||||
Omega.init <- Reduce(kronecker, rev(attr(fit.gmlm, "Omegas.init")))
|
||||
degen.mask <- attr(fit.gmlm, "degen.mask")
|
||||
|
||||
local({
|
||||
layout(matrix(c(
|
||||
1, 2, 3, 3, 3,
|
||||
1, 4, 5, 6, 7
|
||||
), nrow = 2, byrow = TRUE), width = c(6, 3, 1, 1, 1))
|
||||
|
||||
with(na.omit(log), {
|
||||
plot(range(iter), c(0, 1), type = "n", bty = "n",
|
||||
xlab = "Iterations", ylab = "Distance")
|
||||
|
||||
lines(iter, dist.B, col = "red", lwd = 2)
|
||||
lines(iter, dist.Omega / max(dist.Omega), col = "blue", lwd = 2)
|
||||
lines(iter, (loss - min(loss)) / diff(range(loss)), col = "darkgreen", lwd = 2)
|
||||
|
||||
norm.grad <- sqrt(norm.grad.B^2 + norm.grad.Omega^2)
|
||||
# Scale all gradient norms
|
||||
norm.grad.B <- norm.grad.B / max(norm.grad)
|
||||
norm.grad.Omega <- norm.grad.Omega / max(norm.grad)
|
||||
norm.grad <- norm.grad / max(norm.grad)
|
||||
lines(iter, norm.grad.B, lty = 2, col = "red")
|
||||
lines(iter, norm.grad.Omega, lty = 2, col = "blue")
|
||||
lines(iter, norm.grad, lty = 2, col = "darkgreen")
|
||||
|
||||
axis(4, at = c(
|
||||
tail(dist.B, 1),
|
||||
min(dist.B)
|
||||
), labels = round(c(
|
||||
tail(dist.B, 1),
|
||||
min(dist.B)
|
||||
), 2), col = NA, col.ticks = "red", las = 1)
|
||||
axis(4, at = c(
|
||||
1,
|
||||
tail(dist.Omega, 1) / max(dist.Omega),
|
||||
min(dist.Omega) / max(dist.Omega)
|
||||
), labels = round(c(
|
||||
max(dist.Omega),
|
||||
tail(dist.Omega, 1),
|
||||
min(dist.Omega)
|
||||
), 2), col = NA, col.ticks = "blue", las = 1)
|
||||
|
||||
abline(h = c(tail(dist.B, 1), min(dist.B)),
|
||||
lty = "dotted", col = "red")
|
||||
abline(h = c(max(dist.Omega), tail(dist.Omega, 1), min(dist.Omega)) / max(dist.Omega),
|
||||
lty = "dotted", col = "blue")
|
||||
|
||||
})
|
||||
legend("topright", col = c("red", "blue", "darkgreen"), lty = 1, lwd = 2,
|
||||
legend = c("dist.B", "dist.Omega", "loss"), bty = "n")
|
||||
|
||||
zlim <- max(abs(range(Omega.true, Omega.init, Omega.gmlm))) * c(-1, 1)
|
||||
matrixImage(Omega.true, main = "true", zlim = zlim, add.values = TRUE, col = col, cex = cex)
|
||||
matrixImage(round(Omega.init, 2), main = "init (cond. prob.)", zlim = zlim, add.values = TRUE, col = col, cex = cex)
|
||||
mtext(round(norm(Omega.true - Omega.init, "F"), 3), 3)
|
||||
matrixImage(round(Omega.gmlm, 2), main = "gmlm (ising)", zlim = zlim, add.values = TRUE, col = col, cex = cex,
|
||||
col.values = c(par("col"), "red")[`[<-`(array(1, rep(prod(dim(X)[-sample.axis]), 2)), degen.mask, 2)])
|
||||
mtext(round(norm(Omega.true - Omega.gmlm, "F"), 3), 3)
|
||||
|
||||
zlim <- max(abs(range(B.true, B.normal, B.gmlm))) * c(-1, 1)
|
||||
matrixImage(B.true, main = "true",
|
||||
zlim = zlim, add.values = TRUE, col = col, cex = cex)
|
||||
matrixImage(round(B.normal, 2), main = "init (normal)",
|
||||
zlim = zlim, add.values = TRUE, axes = FALSE, col = col, cex = cex)
|
||||
mtext(round(dist.subspace(B.true, B.normal, normalize = TRUE), 3), 3)
|
||||
matrixImage(round(B.gmlm, 2), main = "gmlm (ising)",
|
||||
zlim = zlim, add.values = TRUE, axes = FALSE, col = col, cex = cex)
|
||||
mtext(round(dist.subspace(B.true, B.gmlm, normalize = TRUE), 3), 3)
|
||||
})
|
||||
|
||||
}
|
|
@ -0,0 +1,134 @@
|
|||
#' Specialized version of GMLM for the tensor normal model
|
||||
#'
|
||||
#' The underlying algorithm is an ``iterative (block) coordinate descent'' method
|
||||
#'
|
||||
#' @export
|
||||
gmlm_tensor_normal <- function(X, F, sample.axis = length(dim(X)),
|
||||
max.iter = 100L, proj.betas = NULL, proj.Omegas = NULL, logger = NULL,
|
||||
cond.threshold = 25, eps = 1e-6
|
||||
) {
|
||||
# rearrange `X`, `F` such that the last axis enumerates observations
|
||||
if (!missing(sample.axis)) {
|
||||
axis.perm <- c(seq_along(dim(X))[-sample.axis], sample.axis)
|
||||
X <- aperm(X, axis.perm)
|
||||
F <- aperm(F, axis.perm)
|
||||
sample.axis <- length(dim(X))
|
||||
}
|
||||
|
||||
# Get problem dimensions (observation dimensions)
|
||||
dimX <- head(dim(X), -1)
|
||||
dimF <- head(dim(F), -1)
|
||||
modes <- seq_along(dimX)
|
||||
|
||||
# Ensure the Omega and beta projections lists are lists
|
||||
if (!is.list(proj.Omegas)) {
|
||||
proj.Omegas <- rep(NULL, length(modes))
|
||||
}
|
||||
if (!is.list(proj.betas)) {
|
||||
proj.betas <- rep(NULL, length(modes))
|
||||
}
|
||||
|
||||
|
||||
### Phase 1: Computing initial values
|
||||
meanX <- rowMeans(X, dims = length(dimX))
|
||||
meanF <- rowMeans(F, dims = length(dimF))
|
||||
|
||||
# center X and F
|
||||
X <- X - as.vector(meanX)
|
||||
F <- F - as.vector(meanF)
|
||||
|
||||
# initialize Omega estimates as mode-wise, unconditional covariance estimates
|
||||
Sigmas <- Map(diag, dimX)
|
||||
Omegas <- Map(diag, dimX)
|
||||
|
||||
# Per mode covariance directions
|
||||
# Note: (the directions are transposed!)
|
||||
dirsX <- Map(function(Sigma) {
|
||||
SVD <- La.svd(Sigma, nu = 0)
|
||||
sqrt(SVD$d) * SVD$vt
|
||||
}, mcov(X, sample.axis, center = FALSE))
|
||||
dirsF <- Map(function(Sigma) {
|
||||
SVD <- La.svd(Sigma, nu = 0)
|
||||
sqrt(SVD$d) * SVD$vt
|
||||
}, mcov(F, sample.axis, center = FALSE))
|
||||
|
||||
# initialization of betas ``covariance direction mappings``
|
||||
betas <- betas.init <- Map(function(dX, dF) {
|
||||
s <- min(ncol(dX), nrow(dF))
|
||||
crossprod(dX[1:s, , drop = FALSE], dF[1:s, , drop = FALSE])
|
||||
}, dirsX, dirsF)
|
||||
|
||||
# Residuals
|
||||
R <- X - mlm(F, Map(`%*%`, Sigmas, betas))
|
||||
|
||||
# Initial value of the log-likelihood (scaled and constants dropped)
|
||||
loss <- mean(R * mlm(R, Omegas)) - sum(log(mapply(det, Omegas)) / dimX)
|
||||
|
||||
# invoke the logger
|
||||
if (is.function(logger)) do.call(logger, list(
|
||||
iter = 0L, betas = betas, Omegas = Omegas,
|
||||
resid = R, loss = loss
|
||||
))
|
||||
|
||||
|
||||
### Phase 2: (Block) Coordinate Descent
|
||||
for (iter in seq_len(max.iter)) {
|
||||
|
||||
# update every beta (in random order)
|
||||
for (j in sample.int(length(betas))) {
|
||||
FxB_j <- mlm(F, betas[-j], modes[-j])
|
||||
FxSB_j <- mlm(FxB_j, Sigmas[-j], modes[-j])
|
||||
betas[[j]] <- Omegas[[j]] %*% t(solve(mcrossprod(FxSB_j, FxB_j, j), mcrossprod(FxB_j, X, j)))
|
||||
# Project `betas` onto their manifold
|
||||
if (is.function(proj_j <- proj.betas[[j]])) {
|
||||
betas[[j]] <- proj_j(betas[[j]])
|
||||
}
|
||||
}
|
||||
|
||||
# Residuals
|
||||
R <- X - mlm(F, Map(`%*%`, Sigmas, betas))
|
||||
|
||||
# Covariance Estimates (moment based, TODO: implement MLE estimate!)
|
||||
Sigmas <- mcov(R, sample.axis, center = FALSE)
|
||||
|
||||
# Computing `Omega_j`s, the j'th mode presition matrices, in conjunction
|
||||
# with regularization of the j'th mode covariance estimate `Sigma_j`
|
||||
for (j in seq_along(Sigmas)) {
|
||||
# Compute min and max eigen values
|
||||
min_max <- range(eigen(Sigmas[[j]], TRUE, TRUE)$values)
|
||||
# The condition is approximately `kappa(Sigmas[[j]]) > cond.threshold`
|
||||
if (min_max[2] > cond.threshold * min_max[1]) {
|
||||
Sigmas[[j]] <- Sigmas[[j]] + diag(0.2 * min_max[2], nrow(Sigmas[[j]]))
|
||||
}
|
||||
# Compute (unconstraint but regularized) Omega_j as covariance inverse
|
||||
Omegas[[j]] <- solve(Sigmas[[j]])
|
||||
# Project Omega_j to the Omega_j's manifold
|
||||
if (is.function(proj_j <- proj.Omegas[[j]])) {
|
||||
Omegas[[j]] <- proj_j(Omegas[[j]])
|
||||
# Reverse computation of `Sigma_j` as inverse of `Omega_j`
|
||||
# Order of projecting Omega_j and then recomputing Sigma_j is importent
|
||||
Sigmas[[j]] <- solve(Omegas[[j]])
|
||||
}
|
||||
}
|
||||
|
||||
# store last loss and compute new value
|
||||
loss.last <- loss
|
||||
loss <- mean(R * mlm(R, Omegas)) - sum(log(mapply(det, Omegas)) / dimX)
|
||||
|
||||
# invoke the logger
|
||||
if (is.function(logger)) do.call(logger, list(
|
||||
iter = iter, betas = betas, Omegas = Omegas, resid = R, loss = loss
|
||||
))
|
||||
|
||||
# check the break consition
|
||||
if (abs(loss.last - loss) < eps * loss.last) {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
structure(
|
||||
list(eta1 = mlm(meanX, Sigmas), betas = betas, Omegas = Omegas),
|
||||
betas.init = betas.init
|
||||
)
|
||||
}
|
|
@ -0,0 +1,18 @@
|
|||
#' @export
|
||||
ising_m2 <- function(
|
||||
params, use_MC = NULL, nr_samples = 10000L,
|
||||
warmup = 15L, nr_threads = 1L
|
||||
) {
|
||||
if (missing(use_MC)) {
|
||||
use_MC <- if (is.matrix(params)) 19 < nrow(params) else 190 < length(params)
|
||||
}
|
||||
|
||||
m2 <- .Call("C_ising_m2", params, use_MC, nr_samples, warmup, nr_threads,
|
||||
PACKAGE = "tensorPredictors"
|
||||
)
|
||||
|
||||
M2 <- vech.pinv(m2)
|
||||
attr(M2, "prob_0") <- attr(m2, "prob_0")
|
||||
|
||||
M2
|
||||
}
|
|
@ -0,0 +1,11 @@
|
|||
#' Sample from the Ising model
|
||||
#'
|
||||
#' @param nr number of samples
|
||||
#' @param params Ising model parameters (numeric vector of size `p (p + 1) / 2`
|
||||
#' for `p` dimensional random binary vectors)
|
||||
#' @param warmup Monte-Carlo chain length before retreaving a sample
|
||||
#'
|
||||
#' @export
|
||||
ising_sample <- function(nr, params, warmup = 15L) {
|
||||
.Call("C_ising_sample", nr, params, warmup, PACKAGE = "tensorPredictors")
|
||||
}
|
|
@ -124,6 +124,30 @@ make.gmlm.family <- function(name) {
|
|||
)
|
||||
}
|
||||
|
||||
# conditional covariance of the sufficient statistic
|
||||
# Cov(t(X) | Y = y)
|
||||
# Note: fy is a single observation!
|
||||
cov.sufficient.stat <- function(fy, eta1, alphas, Omegas) {
|
||||
Deltas <- Map(solve, Omegas)
|
||||
E1 <- c(mlm(mlm(fy, alphas) + eta1, Deltas))
|
||||
|
||||
# H11 = Cov(vec X | Y = y)
|
||||
H11 <- Reduce(`%x%`, rev(Deltas))
|
||||
|
||||
# H21 = Cov(vec X %x% vec X, vec X | Y = y)
|
||||
H21 <- kronecker(E1, H11) + kronecker(H11, E1)
|
||||
|
||||
# H22 = Cov(vec X %x% vec X | Y = y)
|
||||
H22 <- local({
|
||||
e1e1 <- tcrossprod(E1, E1)
|
||||
h22 <- outer(e1e1 + H11, H11) + outer(H11, e1e1)
|
||||
aperm(h22, c(1, 3, 2, 4)) + aperm(h22, c(1, 3, 4, 2))
|
||||
})
|
||||
|
||||
# Combine into single covariance matrix
|
||||
cbind(rbind(H11, H21), rbind(t(H21), mat(H22, 1:2)))
|
||||
}
|
||||
|
||||
# mean conditional Fisher Information
|
||||
fisher.info <- function(Fy, eta1, alphas, Omegas) {
|
||||
# retrieve dimensions
|
||||
|
|
|
@ -9,6 +9,15 @@
|
|||
#' or tensor of dimensions \code{dims} iff \code{inv} is true.
|
||||
#'
|
||||
#' @examples
|
||||
#' stopifnot(all.equal(
|
||||
#' mat(1:12, 2, dims = c(2, 3, 2)),
|
||||
#' matrix(c(
|
||||
#' 1, 2, 7, 8,
|
||||
#' 3, 4, 9, 10,
|
||||
#' 5, 6, 11, 12
|
||||
#' ), 3, 4, byrow = TRUE)
|
||||
#' ))
|
||||
#'
|
||||
#' A <- array(rnorm(2 * 3 * 5), dim = c(2, 3, 5))
|
||||
#' stopifnot(exprs = {
|
||||
#' all.equal(A, mat(mat(A, 1), 1, dim(A), TRUE))
|
||||
|
@ -22,15 +31,6 @@
|
|||
#' all.equal(t(mat(A, 3)), mat(A, c(1, 2)))
|
||||
#' })
|
||||
#'
|
||||
#' stopifnot(all.equal(
|
||||
#' mat(1:12, 2, dims = c(2, 3, 2)),
|
||||
#' matrix(c(
|
||||
#' 1, 2, 7, 8,
|
||||
#' 3, 4, 9, 10,
|
||||
#' 5, 6, 11, 12
|
||||
#' ), 3, 4, byrow = TRUE)
|
||||
#' ))
|
||||
#'
|
||||
#' @export
|
||||
mat <- function(T, modes, dims = dim(T), inv = FALSE) {
|
||||
modes <- as.integer(modes)
|
||||
|
|
|
@ -7,6 +7,8 @@
|
|||
#' @param sub sub-title of the plot
|
||||
#' @param interpolate a logical vector (or scalar) indicating whether to apply
|
||||
#' linear interpolation to the image when drawing.
|
||||
#' @param new.plot Recreating the plot area (clearing the plot device). can be
|
||||
#' used to update a plot but _not_ recreate it. Leads to smoother updating.
|
||||
#' @param ... further arguments passed to \code{\link{rasterImage}}
|
||||
#'
|
||||
#' @examples
|
||||
|
@ -16,13 +18,16 @@
|
|||
#' @export
|
||||
matrixImage <- function(A, add.values = FALSE,
|
||||
main = NULL, sub = NULL, interpolate = FALSE, ..., zlim = NA,
|
||||
axes = TRUE, asp = 1, col = hcl.colors(24, "YlOrRd", rev = FALSE),
|
||||
digits = getOption("digits")
|
||||
axes = TRUE, asp = 1, col = hcl.colors(24, "Blue-Red 3", rev = FALSE),
|
||||
col.values = par("col"), cex = 1,
|
||||
digits = getOption("digits"), new.plot = TRUE
|
||||
) {
|
||||
# plot raster image
|
||||
plot(c(0, ncol(A)), c(0, nrow(A)), type = "n", bty = "n", col = "black",
|
||||
xlab = "", ylab = "", xaxt = "n", yaxt = "n", main = main, sub = sub,
|
||||
asp = asp)
|
||||
if (new.plot) {
|
||||
plot(c(0, ncol(A)), c(0, nrow(A)), type = "n", bty = "n", col = "black",
|
||||
xlab = "", ylab = "", xaxt = "n", yaxt = "n", main = main, sub = sub,
|
||||
asp = asp)
|
||||
}
|
||||
|
||||
# Scale values of `A` to [0, 1] with min mapped to 1 and max to 0.
|
||||
if (missing(zlim)) {
|
||||
|
@ -41,7 +46,7 @@ matrixImage <- function(A, add.values = FALSE,
|
|||
# X/Y axes index (matches coordinates to matrix indices)
|
||||
x <- seq(1, ncol(A), by = 1)
|
||||
y <- seq(1, nrow(A))
|
||||
if (axes) {
|
||||
if (axes && new.plot) {
|
||||
axis(1, at = x - 0.5, labels = x, lwd = 0, lwd.ticks = 1)
|
||||
axis(2, at = y - 0.5, labels = rev(y), lwd = 0, lwd.ticks = 1, las = 1)
|
||||
}
|
||||
|
@ -52,6 +57,7 @@ matrixImage <- function(A, add.values = FALSE,
|
|||
A[!add.values] <- NA
|
||||
A[add.values] <- format(A[add.values], digits = digits)
|
||||
}
|
||||
text(rep(x - 0.5, each = nrow(A)), rep(rev(y - 0.5), ncol(A)), A, adj = 0.5)
|
||||
text(rep(x - 0.5, each = nrow(A)), rep(rev(y - 0.5), ncol(A)), A,
|
||||
adj = 0.5, cex = cex, col = col.values)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -12,9 +12,13 @@
|
|||
#'
|
||||
#' @param X multi-dimensional array
|
||||
#' @param sample.axis observation axis index
|
||||
#' @param center logical, if `TRUE` (the default) compute the centered second
|
||||
#' moment, that is, the mode-wise covariances. Otherwise, the raw second moment
|
||||
#' of each mode is computed. This is specifically usefull in case `X` is
|
||||
#' already centered.
|
||||
#'
|
||||
#' @export
|
||||
mcov <- function(X, sample.axis = 1L) {
|
||||
mcov <- function(X, sample.axis = length(dim(X)), center = TRUE) {
|
||||
# observation modes (axis indices)
|
||||
modes <- seq_along(dim(X))[-sample.axis]
|
||||
# observation dimensions
|
||||
|
@ -26,16 +30,18 @@ mcov <- function(X, sample.axis = 1L) {
|
|||
if (sample.axis != r + 1L) {
|
||||
X <- aperm(X, c(modes, sample.axis))
|
||||
}
|
||||
# centering: Z = X - E[X]
|
||||
Z <- X - c(rowMeans(X, dims = r))
|
||||
# centering: X <- X - E[X]
|
||||
if (center) {
|
||||
X <- X - c(rowMeans(X, dims = r))
|
||||
}
|
||||
|
||||
# estimes (unscaled) covariances for each mode
|
||||
Sigmas <- .mapply(mcrossprod, list(mode = seq_len(r)), MoreArgs = list(Z))
|
||||
Sigmas <- .mapply(mcrossprod, list(mode = seq_len(r)), MoreArgs = list(X))
|
||||
# scale by per mode "sample" size
|
||||
Sigmas <- .mapply(`*`, list(Sigmas, p / prod(dim(X))), NULL)
|
||||
|
||||
# estimate trace of Kronecker product of covariances
|
||||
tr.est <- prod(p) * mean(Z^2)
|
||||
tr.est <- prod(p) * mean(X^2)
|
||||
|
||||
# as well as the current trace of the unscaled covariances
|
||||
tr.Sigmas <- prod(unlist(.mapply(function(S) sum(diag(S)), list(Sigmas), NULL)))
|
||||
|
|
|
@ -44,16 +44,20 @@
|
|||
#' ))
|
||||
#'
|
||||
#' @export
|
||||
mcrossprod <- function(A, B, mode) {
|
||||
mcrossprod <- function(A, B, mode, dimA = dim(A), dimB = dim(B)) {
|
||||
storage.mode(A) <- "double"
|
||||
if (is.null(dim(A))) {
|
||||
if (!missing(dimA)) {
|
||||
dim(A) <- dimA
|
||||
} else if (is.null(dim(A))) {
|
||||
dim(A) <- length(A)
|
||||
}
|
||||
if (missing(B)) {
|
||||
.Call("C_mcrossprod_sym", A, as.integer(mode))
|
||||
} else {
|
||||
storage.mode(B) <- "double"
|
||||
if (is.null(dim(B))) {
|
||||
if (!missing(dimB)) {
|
||||
dim(B) <- dimB
|
||||
} else if (is.null(dim(B))) {
|
||||
dim(B) <- length(B)
|
||||
}
|
||||
.Call("C_mcrossprod", A, B, as.integer(mode))
|
||||
|
|
|
@ -75,23 +75,63 @@
|
|||
#' # (X x {A1, A2, A3, A4}) x {B1, B2, B3, B4} = X x {B1 A1, B2 A2, B3 A3, B4 A4}
|
||||
#' all.equal(mlm(mlm(X, As), Bs), mlm(X, Map(`%*%`, Bs, As)))
|
||||
#'
|
||||
#' # Equivalent to
|
||||
#' mlm_reference <- function(A, Bs, modes = seq_along(Bs), transposed = FALSE) {
|
||||
#' # Collect all matrices in `B`
|
||||
#' Bs <- if (is.matrix(Bs)) list(Bs) else Bs
|
||||
#'
|
||||
#' # replicate transposition if of length one only
|
||||
#' transposed <- if (length(transposed) == 1) {
|
||||
#' rep(as.logical(transposed), length(Bs))
|
||||
#' } else {
|
||||
#' as.logical(transposed)
|
||||
#' }
|
||||
#'
|
||||
#' # iteratively apply Tensor Times Matrix multiplication over modes
|
||||
#' for (i in seq_along(modes)) {
|
||||
#' A <- ttm(A, Bs[[i]], modes[i], transposed[i])
|
||||
#' }
|
||||
#'
|
||||
#' # return result tensor
|
||||
#' A
|
||||
#' }
|
||||
#'
|
||||
#' @export
|
||||
mlm <- function(A, Bs, modes = seq_along(Bs), transposed = FALSE) {
|
||||
# Collect all matrices in `B`
|
||||
Bs <- if (is.matrix(Bs)) list(Bs) else Bs
|
||||
Bs <- if (!is.list(Bs)) list(Bs) else Bs
|
||||
# ensure all `B`s are matrices
|
||||
Bs <- Map(as.matrix, Bs)
|
||||
|
||||
# replicate transposition if of length one only
|
||||
transposed <- if (length(transposed) == 1) {
|
||||
rep(as.logical(transposed), length(Bs))
|
||||
} else {
|
||||
} else if (length(transposed) == length(modes)) {
|
||||
as.logical(transposed)
|
||||
} else {
|
||||
stop("Dim missmatch of param. `transposed`")
|
||||
}
|
||||
|
||||
# iteratively apply Tensor Times Matrix multiplication over modes
|
||||
for (i in seq_along(modes)) {
|
||||
A <- ttm(A, Bs[[i]], modes[i], transposed[i])
|
||||
}
|
||||
|
||||
# return result tensor
|
||||
A
|
||||
.Call("C_mlm", A, Bs, as.integer(modes), transposed, PACKAGE = "tensorPredictors")
|
||||
}
|
||||
|
||||
|
||||
# # general usage
|
||||
# dimA <- c(3, 17, 19, 2)
|
||||
# dimC <- c(7, 11, 13, 5)
|
||||
# A <- array(rnorm(prod(dimA)), dim = dimA)
|
||||
# trans <- c(TRUE, FALSE, TRUE, FALSE)
|
||||
# Bs <- Map(function(p, q) matrix(rnorm(p * q), p, q), ifelse(trans, dimA, dimC), ifelse(trans, dimC, dimA))
|
||||
|
||||
# C <- mlm(A, Bs, transposed = trans)
|
||||
# mlm(A, Bs[c(3, 2)], modes = c(3, 2), transposed = trans[c(3, 2)])
|
||||
|
||||
# microbenchmark::microbenchmark(
|
||||
# mlm(A, Bs, transposed = trans),
|
||||
# mlm_reference(A, Bs, transposed = trans)
|
||||
# )
|
||||
|
||||
# microbenchmark::microbenchmark(
|
||||
# mlm(A, Bs[c(3, 2)], modes = c(3, 2), transposed = trans[c(3, 2)]),
|
||||
# mlm_reference(A, Bs[c(3, 2)], modes = c(3, 2), transposed = trans[c(3, 2)])
|
||||
# )
|
||||
|
|
|
@ -1,14 +1,14 @@
|
|||
#' Duplication Matrix
|
||||
#'
|
||||
#' Matrix such that `vec(A) = D vech(A)` for `A` symmetric
|
||||
#' Matrix `D` such that `vec(A) = D vech(A)` for `A` symmetric
|
||||
#' @examples
|
||||
#' p <- 8
|
||||
#' A <- matrix(rnorm(p^2), p, p)
|
||||
#' A <- A + t(A)
|
||||
#' stopifnot(all.equal(c(D(nrow(A)) %*% vech(A)), c(A)))
|
||||
#' stopifnot(all.equal(c(Dup(nrow(A)) %*% vech(A)), c(A)))
|
||||
#'
|
||||
#' @export
|
||||
D <- function(p) {
|
||||
Dup <- function(p) {
|
||||
# setup `vec` and `vech` element indices (zero indexed)
|
||||
vec <- matrix(NA_integer_, p, p)
|
||||
vec[lower.tri(vec, diag = TRUE)] <- seq_len(p * (p + 1) / 2)
|
||||
|
@ -27,10 +27,10 @@ D <- function(p) {
|
|||
#'
|
||||
#' @examples
|
||||
#' p <- 5
|
||||
#' stopifnot(all.equal(D(p) %*% D.pinv(p), N(p)))
|
||||
#' stopifnot(all.equal(Dup(p) %*% Dup.pinv(p), N(p)))
|
||||
#'
|
||||
#' @export
|
||||
D.pinv <- function(p) {
|
||||
Dup.pinv <- function(p) {
|
||||
Dp <- D(p)
|
||||
solve(crossprod(Dp), t(Dp))
|
||||
}
|
||||
|
@ -119,7 +119,7 @@ K <- function(dim, mode) {
|
|||
#'
|
||||
#' @examples
|
||||
#' p <- 7
|
||||
#' stopifnot(all.equal(N(p), D(p) %*% D.pinv(p)))
|
||||
#' stopifnot(all.equal(N(p), Dup(p) %*% Dup.pinv(p)))
|
||||
#'
|
||||
#' @export
|
||||
N <- function(p) {
|
||||
|
|
|
@ -11,6 +11,34 @@
|
|||
#' \code{mode} dimension equal to \code{nrow(M)} or \code{ncol(M)} if
|
||||
#' \code{transposed} is true.
|
||||
#'
|
||||
#' @examples
|
||||
#' for (mode in 1:4) {
|
||||
#' dimA <- sample.int(10, 4, replace = TRUE)
|
||||
#' A <- array(rnorm(prod(dimA)), dim = dimA)
|
||||
#' nrowB <- sample.int(10, 1)
|
||||
#' B <- matrix(rnorm(nrowB * dimA[mode]), nrowB)
|
||||
#'
|
||||
#' C <- ttm(A, B, mode)
|
||||
#'
|
||||
#' dimC <- ifelse(seq_along(dims) != mode, dimA, nrowB)
|
||||
#' C.ref <- mat(B %*% mat(A, mode), mode, dims = dimC, inv = TRUE)
|
||||
#'
|
||||
#' stopifnot(all.equal(C, C.ref))
|
||||
#' }
|
||||
#'
|
||||
#' for (mode in 1:4) {
|
||||
#' dimA <- sample.int(10, 4, replace = TRUE)
|
||||
#' A <- array(rnorm(prod(dimA)), dim = dimA)
|
||||
#' ncolB <- sample.int(10, 1)
|
||||
#' B <- matrix(rnorm(dimA[mode] * ncolB), dimA[mode])
|
||||
#'
|
||||
#' C <- ttm(A, B, mode, transposed = TRUE)
|
||||
#'
|
||||
#' C.ref <- ttm(A, t(B), mode)
|
||||
#'
|
||||
#' stopifnot(all.equal(C, C.ref))
|
||||
#' }
|
||||
#'
|
||||
#' @export
|
||||
ttm <- function(T, M, mode = length(dim(T)), transposed = FALSE) {
|
||||
storage.mode(T) <- storage.mode(M) <- "double"
|
||||
|
|
|
@ -31,3 +31,52 @@ vech.pinv <- function(a) {
|
|||
# de-vectorized matrix
|
||||
matrix(a[vech.pinv.index(p)], p, p)
|
||||
}
|
||||
|
||||
|
||||
# vech <- function(A) {
|
||||
# stopifnot(all(nrow(A) == dim(A)))
|
||||
|
||||
# axis <- seq_len(nrow(A))
|
||||
# grid <- expand.grid(c(list(rev(axis)), rep(list(axis), length(dim(A)) - 1)))
|
||||
|
||||
# A[rowSums(grid - 1) < nrow(A)]
|
||||
# }
|
||||
|
||||
# p <- 4
|
||||
# X <- matrix(rnorm(p^2), p)
|
||||
|
||||
# vech(outer(X, X))
|
||||
|
||||
# vech(outer(c(X), c(X)))
|
||||
|
||||
# sort(unique(c(sym(outer(X, X)))))
|
||||
# sort(vech(sym(outer(X, X))))
|
||||
|
||||
# # (I <- matrix(c(
|
||||
# # 1, 1, 1,
|
||||
# # 2, 1, 1,
|
||||
# # 3, 1, 1,
|
||||
# # 2, 2, 1,
|
||||
# # 3, 2, 1,
|
||||
# # 3, 3, 1,
|
||||
# # 2, 1, 2,
|
||||
# # 3, 1, 2,
|
||||
# # 3, 2, 2,
|
||||
# # 3, 1, 3
|
||||
# # ), ncol = 3, byrow = TRUE))
|
||||
|
||||
# # ((I - 1) %*% nrow(A)^(seq_along(dim(A)) - 1) + 1)
|
||||
|
||||
# # p <- 3
|
||||
# # ord <- 4
|
||||
# # A <- array(seq_len(p^ord), rep(p, ord))
|
||||
|
||||
# # axis <- seq_len(nrow(A))
|
||||
# # grid <- expand.grid(c(list(rev(axis)), rep(list(axis), length(dim(A)) - 1)))
|
||||
# # array(rowSums(grid - 1), dim(A))
|
||||
|
||||
# # A[rowSums(grid - 1) < nrow(A)]
|
||||
|
||||
|
||||
|
||||
# # apply(indices, 1, function(i) do.call(`[`, c(list(A), as.list(i))))
|
||||
|
|
|
@ -1,2 +1,3 @@
|
|||
|
||||
PKG_LIBS = $(BLAS_LIBS) $(FLIBS)
|
||||
PKG_LIBS = $(BLAS_LIBS) $(LAPACK_LIBS) $(FLIBS)
|
||||
# PKG_CFLAGS = -pg
|
||||
|
|
|
@ -0,0 +1,138 @@
|
|||
#ifndef INCLUDE_GUARD_R_API_H
|
||||
#define INCLUDE_GUARD_R_API_H
|
||||
|
||||
// The need for `USE_FC_LEN_T` and `FCONE` is due to a Fortran character string
|
||||
// to C incompatibility. See: Writing R Extentions: 6.6.1 Fortran character strings
|
||||
#define USE_FC_LEN_T
|
||||
// Disables remapping of R API functions from `Rf_<name>` or `R_<name>`
|
||||
#define R_NO_REMAP
|
||||
|
||||
#include <stdint.h> // uint32_t, uint64_t, ...
|
||||
|
||||
#include <R.h>
|
||||
#include <Rinternals.h>
|
||||
#include <R_ext/BLAS.h>
|
||||
#include <R_ext/Lapack.h>
|
||||
|
||||
#ifndef FCONE
|
||||
#define FCONE
|
||||
#endif
|
||||
|
||||
// NULL pointer (not defined memory, array, vector, ...)
|
||||
#ifndef NULL
|
||||
#define NULL ((void*)0)
|
||||
#endif
|
||||
|
||||
// Truth values
|
||||
#ifndef FALSE
|
||||
#define FALSE 0
|
||||
#endif
|
||||
#ifndef TRUE
|
||||
#define TRUE 1
|
||||
#endif
|
||||
|
||||
// Convenience convertion function similar to `asInteger`, `asReal`, ...
|
||||
#define NA_UNSIGNED (-((unsigned long)1))
|
||||
static inline unsigned long asUnsigned(SEXP _val) {
|
||||
int val = Rf_asInteger(_val);
|
||||
if (val == NA_INTEGER || val < 0) {
|
||||
return NA_UNSIGNED;
|
||||
}
|
||||
return (unsigned long)val;
|
||||
}
|
||||
|
||||
|
||||
// Remap BLAS and LAPACK bindings (being consistent with my own interface and I
|
||||
// don't like to pass scalars by reference (memory address))
|
||||
/** y <- a x + y */
|
||||
static inline void axpy(
|
||||
const int dim,
|
||||
const double a,
|
||||
const double* x, const int incx,
|
||||
double* y, const int incy
|
||||
) {
|
||||
F77_CALL(daxpy)(&dim, &a, x, &incx, y, &incy);
|
||||
}
|
||||
|
||||
/** Scale a 1d array `x <- a x` */
|
||||
static inline void scale(
|
||||
const int dim,
|
||||
const double a,
|
||||
double *x, const int incx
|
||||
) {
|
||||
F77_CALL(dscal)(&dim, &a, x, &incx);
|
||||
}
|
||||
|
||||
/** Dot product */
|
||||
static inline double dot(
|
||||
const int dim,
|
||||
const double* x, const int incx,
|
||||
const double* y, const int incy
|
||||
) {
|
||||
return F77_CALL(ddot)(&dim, x, &incx, y, &incy);
|
||||
}
|
||||
|
||||
/** 1d array linear combination `z <- a x + b y` */
|
||||
// TODO: optimize! (iff needed?, to optimize of at all?)
|
||||
static inline void lincomb(
|
||||
const int dim,
|
||||
const double a, const double* x, const int incx,
|
||||
const double b, const double* y, const int incy,
|
||||
double* z, const int incz
|
||||
) {
|
||||
for (int i = 0; i < dim; ++i) {
|
||||
z[i * incz] = a * x[i * incx] + b * y[i * incy];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* A sufficient Pseudo-Random-Number-Generators (PRNG) of the Xorshift family
|
||||
*
|
||||
* With parameterized seed/state this custom PRGN can be used in a thread save!
|
||||
*
|
||||
* For single threaded operations the PRNG provided by `R` are prefered. But they
|
||||
* are _not_ thread save. The following is a simple PRNG usable in a multi-threaded
|
||||
* application.
|
||||
*
|
||||
* See TODO: ...https://en.wikipedia.org/wiki/Xorshift
|
||||
* SchachHoernchen
|
||||
*/
|
||||
static inline uint64_t rot64(uint64_t val, int shift) {
|
||||
return (val << shift) | (val >> (64 - shift));
|
||||
}
|
||||
|
||||
// (internal) PRGN state/seed type
|
||||
typedef uint64_t rng_seed_t[4];
|
||||
|
||||
// Hookup the PRNG via its seed to R's random number generation utilities
|
||||
static inline void init_seed(rng_seed_t seed) {
|
||||
GetRNGstate();
|
||||
for (size_t i = 0; i < 4; ++i) {
|
||||
seed[i] = 0;
|
||||
for (size_t j = 0; j < 64; ++j) {
|
||||
seed[i] |= ((uint64_t)(unif_rand() < 0.5)) << j;
|
||||
}
|
||||
}
|
||||
PutRNGstate();
|
||||
}
|
||||
|
||||
// PRNG of the Xorshift family
|
||||
// The least significant 32 bits are not reliable, use most significant 32 bits
|
||||
static inline uint64_t rand_u64(rng_seed_t seed) {
|
||||
uint64_t e = seed[0] - rot64(seed[1], 7);
|
||||
seed[0] = seed[1] ^ rot64(seed[1], 13);
|
||||
seed[1] = seed[2] + rot64(seed[3], 37);
|
||||
seed[2] = seed[3] + e;
|
||||
seed[3] = e + seed[0];
|
||||
return seed[3];
|
||||
}
|
||||
|
||||
// With external supplied seed, every thread can have its own seed and as such
|
||||
// we can use this as a thread save alternative to R's `unif_rand()`.
|
||||
static inline double unif_rand_thrd(rng_seed_t seed) {
|
||||
return ((double)(rand_u64(seed) >> 32)) / (double)(-(uint32_t)1);
|
||||
}
|
||||
|
||||
|
||||
#endif /* INCLUDE_GUARD_R_API_H */
|
|
@ -0,0 +1,185 @@
|
|||
#ifndef INCLUDE_GUARD_BIT_UTILS_H
|
||||
#define INCLUDE_GUARD_BIT_UTILS_H
|
||||
|
||||
#include <stdint.h> // uint32_t, uint64_t
|
||||
|
||||
#if (defined(__GNUC__) && defined(__BMI2__))
|
||||
#include <x86intrin.h>
|
||||
#include <bmi2intrin.h> // _pdep_u32
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <intrin.h> // _BitScanReverse
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Computes the parity of a word (0 for even bit count and 1 otherwise)
|
||||
*/
|
||||
#ifdef __GNUC__
|
||||
static inline int bitParity32(uint32_t x) { return __builtin_parity(x); }
|
||||
static inline int bitParity64(uint64_t x) { return __builtin_parityll(x); }
|
||||
#else
|
||||
static inline int bitParity32(uint32_t x) {
|
||||
int p = (x != 0);
|
||||
while (x &= x - 1) { p = !p; }
|
||||
return p;
|
||||
}
|
||||
static inline int bitParity64(uint64_t x) {
|
||||
int p = (x != 0);
|
||||
while (x &= x - 1) { p = !p; }
|
||||
return p;
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Counts the number of set bits (`1`s in binary) in the number `x`
|
||||
*/
|
||||
#ifdef __GNUC__ /* POPulation COUNT */
|
||||
static inline int bitCount32(uint32_t x) { return __builtin_popcount(x); }
|
||||
static inline int bitCount64(uint64_t x) { return __builtin_popcountll(x); }
|
||||
#else
|
||||
static inline int bitCount32(uint32_t x) {
|
||||
int count = 0; // counts set bits
|
||||
for (; x; count++) { x &= x - 1; } // increment count until there are no bits set in x
|
||||
return count;
|
||||
}
|
||||
static inline int bitCount64(uint64_t x) {
|
||||
int count = 0; // counts set bits
|
||||
for (; x; count++) { x &= x - 1; } // increment count until there are no bits set in x
|
||||
return count;
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Gets the index of the LSB (least significant bit)
|
||||
*
|
||||
* @condition `x != 0`, for `x == 0` undefined behaviour
|
||||
*/
|
||||
#ifdef __GNUC__
|
||||
static inline int bitScanLS32(uint32_t x) { return __builtin_ctz(x); } // Count Trailing Zeros
|
||||
static inline int bitScanLS64(uint64_t x) { return __builtin_ctzll(x); }
|
||||
#elif _MSC_VER
|
||||
static inline int bitScanLS32(uint32_t x) {
|
||||
unsigned long bsr;
|
||||
_BitScanReverse(&bsr, x);
|
||||
return 31 - bsr;
|
||||
}
|
||||
static inline int bitScanLS64(uint64_t x) {
|
||||
unsigned long bsr;
|
||||
_BitScanReverse64(&bsr, x);
|
||||
return 63 - bsr;
|
||||
}
|
||||
#else
|
||||
static inline int bitScanLS32(uint32_t x) {
|
||||
int ctz = 0; // result storing the Count of Trailing Zeros
|
||||
bool empty; // boolean variable storing if a bit has not found (search area is empty)
|
||||
|
||||
// logarithmic search for LSB bit index (-1)
|
||||
ctz += (empty = !(x & (uint32_t)(65535))) << 4;
|
||||
x >>= 16 * empty;
|
||||
ctz += (empty = !(x & (uint32_t)( 255))) << 3;
|
||||
x >>= 8 * empty;
|
||||
ctz += (empty = !(x & (uint32_t)( 15))) << 2;
|
||||
x >>= 4 * empty;
|
||||
ctz += (empty = !(x & (uint32_t)( 3))) << 1;
|
||||
x >>= 2 * empty;
|
||||
ctz += (empty = !(x & (uint32_t)( 1)));
|
||||
|
||||
return ctz;
|
||||
}
|
||||
static inline int bitScanLS64(uint64_t x) {
|
||||
int ctz = 0;
|
||||
bool empty;
|
||||
|
||||
// logarithmic search for LSB bit index (-1)
|
||||
ctz += (empty = !(x & (uint64_t)(4294967295))) << 5;
|
||||
x >>= 32 * empty;
|
||||
ctz += (empty = !(x & (uint64_t)( 65535))) << 4;
|
||||
x >>= 16 * empty;
|
||||
ctz += (empty = !(x & (uint64_t)( 255))) << 3;
|
||||
x >>= 8 * empty;
|
||||
ctz += (empty = !(x & (uint64_t)( 15))) << 2;
|
||||
x >>= 4 * empty;
|
||||
ctz += (empty = !(x & (uint64_t)( 3))) << 1;
|
||||
x >>= 2 * empty;
|
||||
ctz += (empty = !(x & (uint64_t)( 1)));
|
||||
|
||||
return ctz;
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Parallel DEPosit (aka PDEP)
|
||||
*
|
||||
* Writes the `val` bits into the positions of the set bits in `mask`.
|
||||
*
|
||||
* Example:
|
||||
* val: **** **** **** 1.1.
|
||||
* mask: 1... 1... 1... 1...
|
||||
* res: 1... .... 1... ....
|
||||
*/
|
||||
#if (defined(__GNUC__) && defined(__BMI2__))
|
||||
static inline uint32_t bitDeposit32(uint32_t val, uint32_t mask) {
|
||||
return _pdep_u32(val, mask);
|
||||
}
|
||||
static inline uint64_t bitDeposit64(uint64_t val, uint64_t mask) {
|
||||
return _pdep_u64(val, mask);
|
||||
}
|
||||
#else
|
||||
static inline uint32_t bitDeposit32(uint32_t val, uint32_t mask) {
|
||||
uint32_t res = 0;
|
||||
for (uint32_t pos = 1; mask; pos <<= 1) {
|
||||
if (val & pos) {
|
||||
res |= mask & -mask;
|
||||
}
|
||||
mask &= mask - 1;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
static inline uint64_t bitDeposit64(uint64_t val, uint64_t mask) {
|
||||
uint64_t res = 0;
|
||||
for (uint64_t pos = 1; mask; pos <<= 1) {
|
||||
if (val & pos) {
|
||||
res |= mask & -mask;
|
||||
}
|
||||
mask &= mask - 1;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Gets the next lexicographically ordered permutation of an n-bit word.
|
||||
*
|
||||
* Let `val` be a bit-word with `n` bits set, then this procedire computes a
|
||||
* `n` bit word wich is the next element in the lexicographically ordered
|
||||
* sequence of `n` bit words. For example
|
||||
*
|
||||
* val -> bitNextPerm(val)
|
||||
* 00010011 -> 00010101
|
||||
* 00010101 -> 00010110
|
||||
* 00010110 -> 00011001
|
||||
* 00011001 -> 00011010
|
||||
* 00011010 -> 00011100
|
||||
* 00011100 -> 00100011
|
||||
*
|
||||
* @condition `x != 0`, for `x == 0` undefined behaviour due to `bitScanLS`
|
||||
*
|
||||
* see: https://graphics.stanford.edu/~seander/bithacks.html#NextBitPermutation
|
||||
*/
|
||||
static inline uint32_t bitNextPerm32(uint32_t val) {
|
||||
// Sets all least significant 0-bits of val to 1
|
||||
uint32_t t = val | (val - 1);
|
||||
// Next set to 1 the most significant bit to change,
|
||||
// set to 0 the least significant ones, and add the necessary 1 bits.
|
||||
return (t + 1) | (((~t & -~t) - 1) >> (bitScanLS32(val) + 1));
|
||||
}
|
||||
static inline uint64_t bitNextPerm64(uint64_t val) {
|
||||
// Sets all least significant 0-bits of val to 1
|
||||
uint64_t t = val | (val - 1);
|
||||
// Next set to 1 the most significant bit to change,
|
||||
// set to 0 the least significant ones, and add the necessary 1 bits.
|
||||
return (t + 1) | (((~t & -~t) - 1) >> (bitScanLS64(val) + 1));
|
||||
}
|
||||
|
||||
#endif /* BIT_UTILS_INCLUDE_GUARD_H */
|
|
@ -7,27 +7,96 @@
|
|||
// );
|
||||
|
||||
/* Tensor Times Matrix a.k.a. Mode Product */
|
||||
extern SEXP ttm(SEXP A, SEXP X, SEXP mode, SEXP op);
|
||||
extern SEXP R_ttm(SEXP A, SEXP X, SEXP mode, SEXP op);
|
||||
|
||||
/* Multi Linear Multiplication (iterated mode products) */
|
||||
extern SEXP R_mlm(SEXP A, SEXP Bs, SEXP modes, SEXP ops);
|
||||
|
||||
/* Matrix Times Vectorized Kronecker product `A vec(B_1 %x% ... %x% B_r)` */
|
||||
extern SEXP mtvk(SEXP A, SEXP Bs);
|
||||
|
||||
/* Tensor Mode Crossproduct `A_(m) B_(m)^T` */
|
||||
extern SEXP mcrossprod(SEXP A, SEXP B, SEXP mode);
|
||||
extern SEXP R_mcrossprod(SEXP A, SEXP B, SEXP mode);
|
||||
/* Symmetric Tensor Mode Crossproduct `A_(m) A_(m)^T` */
|
||||
extern SEXP mcrossprod_sym(SEXP A, SEXP mode);
|
||||
extern SEXP R_mcrossprod_sym(SEXP A, SEXP mode);
|
||||
|
||||
// /* Higher Order PCA */
|
||||
// extern SEXP hopca(SEXP X);
|
||||
|
||||
/* Singular Value Decomposition */
|
||||
extern SEXP R_svd(SEXP A);
|
||||
|
||||
// /* Iterative Cyclic Updating for the Tensor Normal Distribution */
|
||||
// extern SEXP R_icu_tensor_normal(SEXP X, SEXP Fy, SEXP max_iter);
|
||||
|
||||
// /* Generalized tensor normal using NAGD */
|
||||
// extern SEXP R_gmlm_tensor_normal(
|
||||
// SEXP X, SEXP Fy,
|
||||
// SEXP eta_bar, SEXP alphas, SEXP Omegas,
|
||||
// SEXP max_iter, SEXP max_line_iter
|
||||
// );
|
||||
|
||||
/* Solve linear equation system A X = B */
|
||||
extern SEXP R_solve(SEXP A, SEXP B);
|
||||
|
||||
/* Determinant of a matrix */
|
||||
extern SEXP R_det(SEXP A);
|
||||
|
||||
// /* Unscaled PMF of the Ising model with natural parameters `_params` */
|
||||
// extern SEXP R_unscaled_prob(SEXP _y, SEXP _params);
|
||||
|
||||
// /* Exact computation of the partition function of the Ising model with natural
|
||||
// parameters `_params` */
|
||||
// extern SEXP R_ising_partition_func_exact(SEXP _params);
|
||||
|
||||
// /* Estimated partition function of the Ising model with natural parameters `_params` */
|
||||
// extern SEXP R_ising_partition_func_MC(SEXP _params);
|
||||
|
||||
// /* Exact computation of the partition function of the Ising model with natural
|
||||
// parameters `_params` */
|
||||
// extern SEXP R_ising_m2_exact(SEXP _params);
|
||||
|
||||
// // extern SEXP R_ising_m2_MC64(SEXP _params);
|
||||
// extern SEXP R_ising_m2_MC(SEXP _params, SEXP _nr_samples, SEXP _warmup);
|
||||
// extern SEXP R_ising_m2_MC_thrd(SEXP _params, SEXP _nr_threads);
|
||||
|
||||
/* Sample from the Ising model */
|
||||
extern SEXP R_ising_sample(SEXP, SEXP, SEXP);
|
||||
|
||||
// Interface to the Ising second moment function
|
||||
extern SEXP R_ising_m2(SEXP, SEXP, SEXP, SEXP, SEXP);
|
||||
|
||||
/* List of registered routines (a.k.a. C entry points) */
|
||||
static const R_CallMethodDef CallEntries[] = {
|
||||
// {"FastPOI_C_sub", (DL_FUNC) &FastPOI_C_sub, 5}, // NOT USED
|
||||
{"C_ttm", (DL_FUNC) &ttm, 4},
|
||||
{"C_mtvk", (DL_FUNC) &mtvk, 2},
|
||||
{"C_mcrossprod", (DL_FUNC) &mcrossprod, 3},
|
||||
{"C_mcrossprod_sym", (DL_FUNC) &mcrossprod_sym, 2},
|
||||
{"C_ttm", (DL_FUNC) &R_ttm, 4},
|
||||
{"C_mlm", (DL_FUNC) &R_mlm, 4},
|
||||
{"C_mtvk", (DL_FUNC) &mtvk, 2},
|
||||
{"C_mcrossprod", (DL_FUNC) &R_mcrossprod, 3},
|
||||
{"C_mcrossprod_sym", (DL_FUNC) &R_mcrossprod_sym, 2},
|
||||
{"C_svd", (DL_FUNC) &R_svd, 1},
|
||||
{"C_solve", (DL_FUNC) &R_solve, 2},
|
||||
{"C_det", (DL_FUNC) &R_det, 1},
|
||||
{"C_ising_sample", (DL_FUNC) &R_ising_sample, 3},
|
||||
{"C_ising_m2", (DL_FUNC) &R_ising_m2, 5},
|
||||
// {"C_unscaled_prob", (DL_FUNC) &R_unscaled_prob, 2},
|
||||
// {"C_ising_partition_func_MC", (DL_FUNC) &R_ising_partition_func_MC, 1},
|
||||
// {"C_ising_partition_func_exact", (DL_FUNC) &R_ising_partition_func_exact, 1},
|
||||
// {"C_ising_m2_exact", (DL_FUNC) &R_ising_m2_exact, 1},
|
||||
// {"C_ising_m2_MC", (DL_FUNC) &R_ising_m2_MC, 3},
|
||||
// {"C_ising_m2_MC_thrd", (DL_FUNC) &R_ising_m2_MC_thrd, 2},
|
||||
// {"C_ising_m2_MC64", (DL_FUNC) &R_ising_m2_MC64, 1},
|
||||
// {"FastPOI_C_sub", (DL_FUNC) &FastPOI_C_sub, 5}, // NOT USED
|
||||
// {"C_hopca", (DL_FUNC) &hopca, 1},
|
||||
// {"C_icu_tensor_normal", (DL_FUNC) &R_icu_tensor_normal, 3}, // NOT USED / IN DEVELOPMENT
|
||||
// {"C_gmlm_tensor_normal", (DL_FUNC) &R_gmlm_tensor_normal, 7}, // NOT USED / IN DEVELOPMENT
|
||||
{NULL, NULL, 0}
|
||||
};
|
||||
|
||||
/* Restrict C entry points to registered routines. */
|
||||
/**
|
||||
* Restrict C entry points to registered routines.
|
||||
*
|
||||
* NOTE: Naming convention: `R_init_<PACKAGE-NAME>`
|
||||
*/
|
||||
void R_init_tensorPredictors(DllInfo *dll) {
|
||||
R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
|
||||
R_useDynamicSymbols(dll, FALSE);
|
||||
|
|
|
@ -0,0 +1,77 @@
|
|||
#ifndef INCLUDE_GUARD_INT_UTILS_H
|
||||
#define INCLUDE_GUARD_INT_UTILS_H
|
||||
|
||||
#include <stdint.h> // uint32_t, uint64_t
|
||||
|
||||
|
||||
// /**
|
||||
// * Integer logarithm, the biggest power `p` such that `2^p <= x`.
|
||||
// */
|
||||
// static int ilog2(uint64_t x) {
|
||||
// int log = 0;
|
||||
// while (x >>= 1) {
|
||||
// log++;
|
||||
// }
|
||||
// return log;
|
||||
// }
|
||||
|
||||
|
||||
/**
|
||||
* Integer Square root of `y`, that is `ceiling(sqrt(y))`
|
||||
*/
|
||||
static uint64_t isqrt(uint64_t x) {
|
||||
// implements a binary search
|
||||
uint64_t root = 0;
|
||||
uint64_t left = 0; // left boundary
|
||||
uint64_t right = x + 1; // right boundary
|
||||
|
||||
while(left + 1UL < right) {
|
||||
root = (left + right) / 2UL;
|
||||
if (root * root <= x) {
|
||||
left = root;
|
||||
} else {
|
||||
right = root;
|
||||
}
|
||||
}
|
||||
return left;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Inverse to the triangular numbers
|
||||
*
|
||||
* Given a positive number `x = p (p + 1) / 2` it computes `p` if possible.
|
||||
* In case there is no positive integer solution `0` is returned.
|
||||
*
|
||||
* Note: this follows immediately from the quadratic equation.
|
||||
*/
|
||||
static uint32_t invTriag(uint32_t x) {
|
||||
uint64_t root = isqrt(8UL * (uint64_t)x + 1UL);
|
||||
if (root * root != 8UL * x + 1UL) {
|
||||
return 0;
|
||||
}
|
||||
return (root - 1) / 2;
|
||||
}
|
||||
|
||||
|
||||
// /**
|
||||
// * Number of sub-sets (including empty set) of max-size
|
||||
// *
|
||||
// * It computes, with `binom` beeing the binomial coefficient, the following sum
|
||||
// *
|
||||
// * sum_{i = 0}^k binom(n, i)
|
||||
// */
|
||||
// static uint64_t nrSubSets(uint64_t n, uint64_t k) {
|
||||
// uint64_t sum = 1, binom = 1;
|
||||
|
||||
// for (uint64_t i = 1; i <= k; ++i) {
|
||||
// binom *= n--;
|
||||
// binom /= i;
|
||||
// sum += binom;
|
||||
// }
|
||||
|
||||
// return sum;
|
||||
// }
|
||||
|
||||
|
||||
#endif /* INCLUDE_GUARD_INT_UTILS_H */
|
|
@ -0,0 +1,181 @@
|
|||
#ifndef INCLUDE_GUARD_ISING_MCMC_H
|
||||
#define INCLUDE_GUARD_ISING_MCMC_H
|
||||
|
||||
#include "R_api.h"
|
||||
|
||||
/** Sample a single value from the Ising model via a Monte-Carlo Markov-Chain
|
||||
* method given the Ising model parameters in compact half-vectorized form.
|
||||
*
|
||||
* f(x) = p0(params) exp(vech(x x')' params)
|
||||
*
|
||||
* @important requires to be wrapped between `GetRNGstate()` and `PutRNGState()`
|
||||
* calls. For example
|
||||
*
|
||||
* GetRNGstate()
|
||||
* for (size_t sample = 0; sample < nr_samples; ++sample) {
|
||||
* ising_mcmc_vech(warmup, dim, dim, params, &X[sample * dim]);
|
||||
* }
|
||||
* PutRNGState()
|
||||
*/
|
||||
static inline void ising_mcmc_vech(
|
||||
const size_t warmup,
|
||||
const size_t dim, const double* params,
|
||||
int* X
|
||||
) {
|
||||
// Initialize elements `X_i ~ Bernoulli(P(X_i = 1 | X_-i = 0))`
|
||||
for (size_t i = 0, I = 0; i < dim; I += dim - i++) {
|
||||
double invProb = 1.0 + exp(-params[I]);
|
||||
X[i] = unif_rand() * invProb < 1.0;
|
||||
}
|
||||
|
||||
// Skip the first samples of the Markov-Chain (warmup)
|
||||
for (size_t skip = 0; skip < warmup; ++skip) {
|
||||
// For every component
|
||||
for (size_t i = 0; i < dim; ++i) {
|
||||
// Compute conditional probability `P(X_i = 1 | X_-i = x_-i)`
|
||||
double log_odds = 0.0;
|
||||
// Tracks position in half-vectorized storage
|
||||
size_t J = i;
|
||||
// Sum all log-odds for `j < i` (two way interactions)
|
||||
for (size_t j = 0; j < i; J += dim - ++j) {
|
||||
log_odds += X[j] ? params[J] : 0.0;
|
||||
}
|
||||
// Allways add log-odds for `i = j` (self-interaction)
|
||||
log_odds += params[J];
|
||||
// Continue adding log-odds for `j > i` (two way interactions)
|
||||
J++;
|
||||
for (size_t j = i + 1; j < dim; ++j, ++J) {
|
||||
log_odds += X[j] ? params[J] : 0.0;
|
||||
}
|
||||
// Update `i`th element `X_i ~ Bernoulli(P(X_i = 1 | X_-i = x_-i))`
|
||||
X[i] = unif_rand() * (1.0 + exp(-log_odds)) < 1.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Thread save version of `ising_mcmc` (using thread save PRNG)
|
||||
static inline void ising_mcmc_vech_thrd(
|
||||
const size_t warmup,
|
||||
const size_t dim, const double* params,
|
||||
int* X, rng_seed_t seed
|
||||
) {
|
||||
// Initialize elements `X_i ~ Bernoulli(P(X_i = 1 | X_-i = 0))`
|
||||
for (size_t i = 0, I = 0; i < dim; I += dim - i++) {
|
||||
double invProb = 1.0 + exp(-params[I]);
|
||||
X[i] = unif_rand_thrd(seed) * invProb < 1.0;
|
||||
}
|
||||
|
||||
// Skip the first samples of the Markov-Chain (warmup)
|
||||
for (size_t skip = 0; skip < warmup; ++skip) {
|
||||
// For every component
|
||||
for (size_t i = 0; i < dim; ++i) {
|
||||
// Compute conditional probability `P(X_i = 1 | X_-i = x_-i)`
|
||||
double log_odds = 0.0;
|
||||
// Tracks position in half-vectorized storage
|
||||
size_t J = i;
|
||||
// Sum all log-odds for `j < i` (two way interactions)
|
||||
for (size_t j = 0; j < i; J += dim - ++j) {
|
||||
log_odds += X[j] ? params[J] : 0.0;
|
||||
}
|
||||
// Allways add log-odds for `i = j` (self-interaction)
|
||||
log_odds += params[J];
|
||||
// Continue adding log-odds for `j > i` (two way interactions)
|
||||
J++;
|
||||
for (size_t j = i + 1; j < dim; ++j, ++J) {
|
||||
log_odds += X[j] ? params[J] : 0.0;
|
||||
}
|
||||
// Update `i`th element `X_i ~ Bernoulli(P(X_i = 1 | X_-i = x_-i))`
|
||||
X[i] = unif_rand() * (1.0 + exp(-log_odds)) < 1.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Sample a single value from the Ising model via a Monte-Carlo Markov-Chain
|
||||
* method given the Ising model parameters in symmetric matrix form `A`.
|
||||
*
|
||||
* f(x) = prob_0(A) exp(x' A x)
|
||||
*
|
||||
* The relation to the natural parameters `params` as used in the half-vectorized
|
||||
* version `ising_mcmc_vech()`
|
||||
*
|
||||
* f(x) = prob_0(params) exp(vech(x x')' params)
|
||||
*
|
||||
* is illustrated via an example with `dim = 5`;
|
||||
*
|
||||
* p0 [ p0 p1/2 p2/2 p3/2 p4/2 ]
|
||||
* p1 p5 [ p1/2 p5 p6/2 p7/2 p8/2 ]
|
||||
* params = p2 p6 p9 => A = [ p2/2 p6/2 p9 p10/2 p11/2 ]
|
||||
* p3 p7 p10 p12 [ p3/2 p7/2 p10/2 p12 p13/2 ]
|
||||
* p4 p8 p11 p13 p14 [ p4/2 p8/2 p11/2 p13/2 p14 ]
|
||||
*
|
||||
* or the other way arroung given the symmetric matrix form `A` converted to
|
||||
* the natural parameters
|
||||
*
|
||||
* [ a00 a10 a20 a30 a40 ] a00
|
||||
* [ a10 a11 a21 a31 a41 ] 2*a10 a11
|
||||
* A = [ a20 a21 a22 a32 a42 ] => params = 2*a20 2*a21 a22
|
||||
* [ a30 a31 a32 a33 a43 ] 2*a30 2*a31 2*a32 a33
|
||||
* [ a40 a41 a42 a43 a44 ] 2*a40 2*a41 2*a42 2*a43 a44
|
||||
*
|
||||
*/
|
||||
static inline void ising_mcmc_mat(
|
||||
const size_t warmup,
|
||||
const size_t dim, const size_t ldA, const double* A,
|
||||
int* X
|
||||
) {
|
||||
// Initialize elements `X_i ~ Bernoulli(P(X_i = 1 | X_-i = 0))`
|
||||
for (size_t i = 0; i < dim; ++i) {
|
||||
X[i] = unif_rand() * (1.0 + exp(-A[i * (ldA + 1)])) < 1.0;
|
||||
}
|
||||
|
||||
// Skip the first samples of the Markov-Chain (warmup)
|
||||
for (size_t skip = 0; skip < warmup; ++skip) {
|
||||
// For every component
|
||||
for (size_t i = 0; i < dim; ++i) {
|
||||
// Compute conditional probability `P(X_i = 1 | X_-i = x_-i)`
|
||||
double log_odds = 0.0;
|
||||
for (size_t j = 0; j < i; ++j) {
|
||||
log_odds += (2 * X[j]) * A[i * ldA + j];
|
||||
}
|
||||
log_odds += A[i * (ldA + 1)];
|
||||
for (size_t j = i + 1; j < dim; ++j) {
|
||||
log_odds += (2 * X[j]) * A[i * ldA + j];
|
||||
}
|
||||
// Update `i`th element `X_i ~ Bernoulli(P(X_i = 1 | X_-i = x_-i))`
|
||||
X[i] = unif_rand() * (1.0 + exp(-log_odds)) < 1.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// thread save version of `ising_mcmc_mat()` (using thread save PRNG)
|
||||
static inline void ising_mcmc_mat_thrd(
|
||||
const size_t warmup,
|
||||
const size_t dim, const size_t ldA, const double* A,
|
||||
int* X, rng_seed_t seed
|
||||
) {
|
||||
// Initialize elements `X_i ~ Bernoulli(P(X_i = 1 | X_-i = 0))`
|
||||
for (size_t i = 0; i < dim; ++i) {
|
||||
X[i] = unif_rand_thrd(seed) * (1.0 + exp(-A[i * (ldA + 1)])) < 1.0;
|
||||
}
|
||||
|
||||
// Skip the first samples of the Markov-Chain (warmup)
|
||||
for (size_t skip = 0; skip < warmup; ++skip) {
|
||||
// For every component
|
||||
for (size_t i = 0; i < dim; ++i) {
|
||||
// Compute conditional probability `P(X_i = 1 | X_-i = x_-i)`
|
||||
double log_odds = 0.0;
|
||||
for (size_t j = 0; j < i; ++j) {
|
||||
log_odds += (2 * X[j]) * A[i * ldA + j];
|
||||
}
|
||||
log_odds += A[i * (ldA + 1)];
|
||||
for (size_t j = i + 1; j < dim; ++j) {
|
||||
log_odds += (2 * X[j]) * A[i * ldA + j];
|
||||
}
|
||||
// Update `i`th element `X_i ~ Bernoulli(P(X_i = 1 | X_-i = x_-i))`
|
||||
X[i] = unif_rand_thrd(seed) * (1.0 + exp(-log_odds)) < 1.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#endif /* INCLUDE_GUARD_ISING_MCMC_H */
|
|
@ -0,0 +1,401 @@
|
|||
/** Methods computing or estimating the second moment of the Ising model given
|
||||
* the natural parameters of the p.m.f. in exponential family form. That is
|
||||
* f(x) = p0(params) exp(vech(x x')' params)
|
||||
* where `params` are the natural parameters. The scaling constant `p0(params)`
|
||||
* is also the zero event `x = (0, ..., 0)` probability.
|
||||
*
|
||||
* Three different version are provided.
|
||||
* - exact: Exact method computing the true second moment `E[X X']` given the
|
||||
* parameters `params`. This method has a runtime of `O(2^dim)` where
|
||||
* `dim` is the dimension of the random variable `X`. This means it is
|
||||
* infeasible for bigger (like 25 and above) values if `dim`
|
||||
* - MC: Monte-Carlo method used in case the exact method is not feasible.
|
||||
* - MC Thrd: Multi-Threaded Monte-Carlo method.
|
||||
*
|
||||
* The natural parameters `params` of the Ising model is a `dim (dim + 1) / 2`
|
||||
* dimensional numeric vector containing the log-odds. The indexing schema into
|
||||
* the parameters corresponding to the log-odds of single or two way interactions
|
||||
* with indicex `i, j` are according to the half-vectorization schema
|
||||
*
|
||||
* i = 4'th row => Symmetry => Half-Vectorized Storage
|
||||
*
|
||||
* [ * * * * * * ] [ * * * * * * ] [ * * * 15 * * ]
|
||||
* [ * * * * * * ] [ * * * * * * ] [ * * 12 16 * ]
|
||||
* [ * * * * * * ] [ * * * * * * ] [ * 8 * 17 ]
|
||||
* [ 3 8 12 15 16 17 ] [ 3 8 12 15 * * ] [ 3 * * ]
|
||||
* [ * * * * * * ] [ * * * 16 * * ] [ * * ]
|
||||
* [ * * * * * * ] [ * * * 17 * * ] [ * ]
|
||||
*
|
||||
*/
|
||||
#include "R_api.h"
|
||||
#include "bit_utils.h"
|
||||
#include "int_utils.h"
|
||||
#include "ising_MCMC.h"
|
||||
|
||||
#ifndef __STDC_NO_THREADS__
|
||||
#include <threads.h>
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// TODO: read //
|
||||
// https://developer.r-project.org/Blog/public/2019/04/18/common-protect-errors/
|
||||
// and
|
||||
// https://developer.r-project.org/Blog/public/2018/12/10/unprotecting-by-value/index.html
|
||||
// as well as do the following PROPERLLY
|
||||
// TODO: make specialized version using the parameters in given sym mat form //
|
||||
// similar to `ising_sample()` //
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
// void ising_m2_exact_mat(
|
||||
// const size_t dim, const size_t ldA, const double* A,
|
||||
// double* M2
|
||||
// ) {
|
||||
// double sum_0 = 1.0;
|
||||
|
||||
// const uint32_t max_event = (uint32_t)1 << dim;
|
||||
|
||||
// (void)memset(M2, 0, dim * dim * sizeof(double));
|
||||
|
||||
// for (uint32_t X = 1; X < max_event; ++X) {
|
||||
// // Evaluate quadratic form `X' A X` using symmetry of `A`
|
||||
// double XtAX = 0.0;
|
||||
// for (uint32_t Y = X; Y; Y &= Y - 1) {
|
||||
// const int i = bitScanLS32(Y);
|
||||
// XtAX += A[i * (ldA + 1)];
|
||||
// for (uint32_t Z = Y & (Y - 1); Z; Z &= Z - 1) {
|
||||
// XtAX += 2 * A[i * ldA + bitScanLS32(Z)];
|
||||
// }
|
||||
// }
|
||||
|
||||
// const double prob_X = exp(XtAX);
|
||||
// sum_0 += prob_X;
|
||||
|
||||
// for (uint32_t Y = X; Y; Y &= Y - 1) {
|
||||
// const int i = bitScanLS32(Y);
|
||||
// for (uint32_t Z = Y; Z; Z &= Z - 1) {
|
||||
// M2[i * dim + bitScanLS32(Z)] += prob_X;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
// const double prob_0 = 1.0 / sum_0;
|
||||
// for (size_t j = 0; j < dim; ++j) {
|
||||
// for (size_t i = 0; i < j; ++i) {
|
||||
// M2[j * dim + i] = M2[i * dim + j];
|
||||
// }
|
||||
// for (size_t i = j; i < dim; ++i) {
|
||||
// M2[i * dim + j] *= prob_0;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
double ising_m2_exact(const size_t dim, const double* params, double* M2) {
|
||||
// Accumulator of sum of all (unscaled) probabilities
|
||||
double sum_0 = 1.0;
|
||||
|
||||
// max event (actually upper bound)
|
||||
const uint32_t max_event = (uint32_t)1 << dim;
|
||||
|
||||
// Length of parameters
|
||||
const size_t len = dim * (dim + 1) / 2;
|
||||
|
||||
// Initialize M2 to zero
|
||||
(void)memset(M2, 0, len * sizeof(double));
|
||||
|
||||
// Iterate all `2^dim` binary vectors `X`
|
||||
for (uint32_t X = 1; X < max_event; ++X) {
|
||||
// Dot product `<vech(X X'), params>`
|
||||
double dot_X = 0.0;
|
||||
// all bits in `X`
|
||||
for (uint32_t Y = X; Y; Y &= Y - 1) {
|
||||
const int i = bitScanLS32(Y);
|
||||
const int I = (i * (2 * dim - 1 - i)) / 2;
|
||||
// add single and two way interaction log odds
|
||||
for (uint32_t Z = Y; Z; Z &= Z - 1) {
|
||||
dot_X += params[I + bitScanLS32(Z)];
|
||||
}
|
||||
}
|
||||
|
||||
// (unscaled) probability of current event `X`
|
||||
const double prob_X = exp(dot_X);
|
||||
sum_0 += prob_X;
|
||||
|
||||
// Accumulate set bits probability for the first end second moment `E[X X']`
|
||||
for (uint32_t Y = X; Y; Y &= Y - 1) {
|
||||
const int i = bitScanLS32(Y);
|
||||
const int I = (i * (2 * dim - 1 - i)) / 2;
|
||||
for (uint32_t Z = Y; Z; Z &= Z - 1) {
|
||||
M2[I + bitScanLS32(Z)] += prob_X;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Finish by scaling with zero event probability (Ising p.m.f. scaling constant)
|
||||
const double prob_0 = 1.0 / sum_0;
|
||||
for (size_t i = 0; i < len; ++i) {
|
||||
M2[i] *= prob_0;
|
||||
}
|
||||
|
||||
return prob_0;
|
||||
}
|
||||
|
||||
|
||||
// Monte-Carlo method using Gibbs sampling for the second moment
|
||||
double ising_m2_MC(
|
||||
/* options */ const size_t nr_samples, const size_t warmup,
|
||||
/* dimension */ const size_t dim,
|
||||
/* vectors */ const double* params, double* M2
|
||||
) {
|
||||
|
||||
// Length of `params` and `M2`
|
||||
const size_t len = (dim * (dim + 1)) / 2;
|
||||
|
||||
// Dirty trick (reuse output memory for precise counting)
|
||||
_Static_assert(sizeof(uint64_t) == sizeof(double), "Dirty trick fails!");
|
||||
uint64_t* counts = (uint64_t*)M2;
|
||||
|
||||
// Initialize counts to zero
|
||||
(void)memset(counts, 0, len * sizeof(uint64_t));
|
||||
|
||||
// Allocate memory to store Markov-Chain value
|
||||
int* X = (int*)R_alloc(dim, sizeof(int));
|
||||
|
||||
// Create/Update R's internal PRGN state
|
||||
GetRNGstate();
|
||||
|
||||
// Spawn chains
|
||||
for (size_t sample = 0; sample < nr_samples; ++sample) {
|
||||
// Draw random sample `X ~ Ising(params)`
|
||||
ising_mcmc_vech(warmup, dim, params, X);
|
||||
|
||||
// Accumulate component counts
|
||||
uint64_t* count = counts;
|
||||
for (size_t i = 0; i < dim; ++i) {
|
||||
for (size_t j = i; j < dim; ++j) {
|
||||
*(count++) += X[i] & X[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Write R's internal PRNG state back (if needed)
|
||||
PutRNGstate();
|
||||
|
||||
// Compute means from counts
|
||||
for (size_t i = 0; i < len; ++i) {
|
||||
M2[i] = (double)counts[i] / (double)(nr_samples);
|
||||
}
|
||||
|
||||
return -1.0; // TODO: also compute prob_0
|
||||
}
|
||||
|
||||
|
||||
// in case the compile supports standard C threads
|
||||
#ifndef __STDC_NO_THREADS__
|
||||
|
||||
// Worker thread to calling thread communication struct
|
||||
typedef struct thrd_data {
|
||||
rng_seed_t seed; // Pseudo-Random-Number-Generator seed/state value
|
||||
size_t nr_samples; // Nr. of samples this worker handles
|
||||
size_t warmup; // Monte-Carlo Chain burne-in length
|
||||
size_t dim; // Random variable dimension
|
||||
const double* params; // Ising model parameters
|
||||
int* X; // Working memory to store current binary sample
|
||||
uint32_t* counts; // (output) count of single and two way interactions
|
||||
double prob_0; // (output) zero event probability estimate
|
||||
} thrd_data_t;
|
||||
|
||||
// Worker thread function
|
||||
int thrd_worker(thrd_data_t* data) {
|
||||
// Extract data as thread local variables (for convenience)
|
||||
const size_t dim = data->dim;
|
||||
int* X = data->X;
|
||||
|
||||
// Initialize counts to zero
|
||||
(void)memset(data->counts, 0, (dim * (dim + 1) / 2) * sizeof(uint32_t));
|
||||
|
||||
// Spawn Monte-Carlo Chains (one foe every sample)
|
||||
for (size_t sample = 0; sample < data->nr_samples; ++sample) {
|
||||
// Draw random sample `X ~ Ising(params)`
|
||||
ising_mcmc_vech_thrd(data->warmup, dim, data->params, X, data->seed);
|
||||
|
||||
// Accumulate component counts
|
||||
uint32_t* count = data->counts;
|
||||
for (size_t i = 0; i < dim; ++i) {
|
||||
for (size_t j = i; j < dim; ++j) {
|
||||
*(count++) += X[i] & X[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
double ising_m2_MC_thrd(
|
||||
/* options */ const size_t nr_samples, const size_t warmup,
|
||||
const size_t nr_threads,
|
||||
/* dimension */ const size_t dim,
|
||||
/* vectors */ const double* params, double* M2
|
||||
) {
|
||||
// Length of `params` and `M2`
|
||||
const size_t len = (dim * (dim + 1)) / 2;
|
||||
|
||||
// Allocate working and self memory for worker threads
|
||||
int* Xs = (int*)R_alloc(dim * nr_threads, sizeof(int));
|
||||
uint32_t* counts = (uint32_t*)R_alloc(len * nr_threads, sizeof(uint32_t));
|
||||
thrd_t* threads = (thrd_t*)R_alloc(nr_threads, sizeof(thrd_data_t));
|
||||
thrd_data_t* threads_data = (thrd_data_t*)R_alloc(nr_threads, sizeof(thrd_data_t));
|
||||
|
||||
// Provide instruction wor worker and dispatch them
|
||||
for (size_t tid = 0; tid < nr_threads; ++tid) {
|
||||
// Every thread needs its own PRNG seed!
|
||||
init_seed(threads_data[tid].seed);
|
||||
// divide work among workers (more or less) equaly with the first worker
|
||||
// (tid = 0) having the additional remainder of divided work (`nr_samples`)
|
||||
threads_data[tid].nr_samples = nr_samples / nr_threads + (
|
||||
tid ? 0 : nr_samples % nr_threads
|
||||
);
|
||||
threads_data[tid].warmup = warmup;
|
||||
threads_data[tid].dim = dim;
|
||||
threads_data[tid].params = params;
|
||||
// Every worker gets its disjint working memory
|
||||
threads_data[tid].X = &Xs[dim * tid];
|
||||
threads_data[tid].counts = &counts[len * tid];
|
||||
// dispatch the worker
|
||||
thrd_create(&threads[tid], (thrd_start_t)thrd_worker, (void*)&threads_data[tid]);
|
||||
}
|
||||
|
||||
// Join (Wait for) all worker
|
||||
for (size_t tid = 0; tid < nr_threads; ++tid) {
|
||||
thrd_join(threads[tid], NULL);
|
||||
}
|
||||
|
||||
// Accumulate worker results into first (tid = 0) worker counts result
|
||||
for (size_t tid = 1; tid < nr_threads; ++tid) {
|
||||
for (size_t i = 0; i < len; ++i) {
|
||||
counts[i] += counts[tid * len + i];
|
||||
}
|
||||
}
|
||||
|
||||
// convert discreat counts into means
|
||||
for (size_t i = 0; i < len; ++i) {
|
||||
M2[i] = (double)counts[i] / (double)nr_samples;
|
||||
}
|
||||
|
||||
return -2.0; // TODO: also compute prob_0
|
||||
}
|
||||
|
||||
#endif /* !__STDC_NO_THREADS__ */
|
||||
|
||||
|
||||
|
||||
|
||||
/* Implementation of `.Call` entry point */
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// TODO: make specialized version using the parameters in given sym mat form //
|
||||
// similar to `ising_sample()` //
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
extern SEXP R_ising_m2(
|
||||
SEXP _params, SEXP _use_MC, SEXP _nr_samples, SEXP _warmup, SEXP _nr_threads
|
||||
) {
|
||||
// Extract and validate arguments
|
||||
size_t protect_count = 0;
|
||||
if (!Rf_isReal(_params)) {
|
||||
_params = PROTECT(Rf_coerceVector(_params, REALSXP));
|
||||
protect_count++;
|
||||
}
|
||||
|
||||
// Depending on parameters given in symmetric matrix form or natural
|
||||
// or natrual parameters in the half-vectorized memory layout
|
||||
size_t dim;
|
||||
double* params;
|
||||
if (Rf_isMatrix(_params)) {
|
||||
if (Rf_nrows(_params) != Rf_ncols(_params)) {
|
||||
Rf_error("Invalid 'params' value, exected square matrix");
|
||||
}
|
||||
// Convert to natural parameters
|
||||
dim = Rf_nrows(_params);
|
||||
params = (double*)R_alloc(dim * (dim + 1) / 2, sizeof(double));
|
||||
|
||||
double* A = REAL(_params);
|
||||
for (size_t j = 0, I = 0; j < dim; ++j) {
|
||||
for (size_t i = j; i < dim; ++i, ++I) {
|
||||
params[I] = (1 + (i != j)) * A[i + j * dim];
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Get Ising random variable dimension `dim` from "half-vectorized" parameters
|
||||
// vector. That is, compute `dim` from `length(params) = dim (dim + 1) / 2`.
|
||||
dim = invTriag(Rf_length(_params));
|
||||
if (!dim) {
|
||||
Rf_error("Expected parameter vector of length `p (p + 1) / 2` where"
|
||||
" `p` is the dimension of the random variable");
|
||||
}
|
||||
params = REAL(_params);
|
||||
}
|
||||
|
||||
// Determin the method to use and validate if possible
|
||||
const int use_MC = Rf_asLogical(_use_MC);
|
||||
if (use_MC == NA_LOGICAL) {
|
||||
Rf_error("Invalid 'use_MC' value, expected ether TRUE or FALSE");
|
||||
}
|
||||
|
||||
// Allocate result vector
|
||||
SEXP _M2 = PROTECT(Rf_allocVector(REALSXP, dim * (dim + 1) / 2));
|
||||
++protect_count;
|
||||
|
||||
// asside computed zero event probability (inverse partition function), the
|
||||
// scaling factor for the Ising model p.m.f.
|
||||
double prob_0 = -1.0;
|
||||
|
||||
if (use_MC) {
|
||||
// Convert and validate arguments for the Monte-Carlo methods
|
||||
const size_t nr_samples = asUnsigned(_nr_samples);
|
||||
if (nr_samples == 0 || nr_samples == NA_UNSIGNED) {
|
||||
Rf_error("Invalid 'nr_samples' value, expected pos. integer");
|
||||
}
|
||||
const size_t warmup = asUnsigned(_warmup);
|
||||
if (warmup == NA_UNSIGNED) {
|
||||
Rf_error("Invalid 'warmup' value, expected non-negative integer");
|
||||
}
|
||||
const size_t nr_threads = asUnsigned(_nr_threads);
|
||||
if (nr_threads == 0 || nr_threads > 256) {
|
||||
Rf_error("Invalid 'nr_thread' value");
|
||||
}
|
||||
|
||||
if (nr_threads == 1) {
|
||||
// Single threaded Monte-Carlo method
|
||||
prob_0 = ising_m2_MC(nr_samples, warmup, dim, params, REAL(_M2));
|
||||
} else {
|
||||
// Multi-Threaded Monte-Carlo method if provided, otherwise use
|
||||
// the single threaded version with a warning
|
||||
#ifdef __STDC_NO_THREADS__
|
||||
Rf_warning("Multi-Threading NOT supported, using fallback.");
|
||||
prob_0 = ising_m2_MC(nr_samples, warmup, dim, params, REAL(_M2));
|
||||
#else
|
||||
prob_0 = ising_m2_MC_thrd(
|
||||
nr_samples, warmup, nr_threads,
|
||||
dim, params, REAL(_M2)
|
||||
);
|
||||
#endif
|
||||
}
|
||||
} else {
|
||||
// Exact method (ignore other arguments), only validate dimension
|
||||
if (25 < dim) {
|
||||
Rf_error("Dimension '%d' too big for exact method (max 24)", dim);
|
||||
}
|
||||
|
||||
// and call the exact method
|
||||
prob_0 = ising_m2_exact(dim, params, REAL(_M2));
|
||||
}
|
||||
|
||||
// Set log-lokelihood as an attribute to the computed second moment
|
||||
SEXP _prob_0 = PROTECT(Rf_ScalarReal(prob_0));
|
||||
++protect_count;
|
||||
Rf_setAttrib(_M2, Rf_install("prob_0"), _prob_0);
|
||||
|
||||
// release SEPXs to the garbage collector
|
||||
UNPROTECT(protect_count);
|
||||
|
||||
return _M2;
|
||||
}
|
|
@ -0,0 +1,72 @@
|
|||
#ifndef INCLUDE_GUARD_ISING_SAMPLE_H
|
||||
#define INCLUDE_GUARD_ISING_SAMPLE_H
|
||||
|
||||
#include "R_api.h"
|
||||
#include "int_utils.h"
|
||||
#include "ising_MCMC.h"
|
||||
|
||||
|
||||
// .Call interface to draw from sample from the Ising model
|
||||
extern SEXP R_ising_sample(SEXP _nr_samples, SEXP _params, SEXP _warmup) {
|
||||
// Counts number of protected SEXP's to give them back to the garbage collector
|
||||
size_t protect_count = 0;
|
||||
|
||||
// Parse and validate arguments
|
||||
const size_t nr_samples = asUnsigned(_nr_samples);
|
||||
if (nr_samples == 0 || nr_samples == NA_UNSIGNED) {
|
||||
Rf_error("Invalid 'nr_samples' value, expected pos. integer");
|
||||
}
|
||||
const size_t warmup = asUnsigned(_warmup);
|
||||
if (warmup == NA_UNSIGNED) {
|
||||
Rf_error("Invalid 'warmup' value, expected non-negative integer");
|
||||
}
|
||||
|
||||
// Determin parameter mode (natural parameter vector or symmetric matrix)
|
||||
// Ether `m` for "Matrix" or `v` for "Vector"
|
||||
const char param_type = Rf_isMatrix(_params) ? 'm' : 'v';
|
||||
|
||||
// In case of matrix parameters check for square matrix
|
||||
if (param_type == 'm' && (Rf_nrows(_params) != Rf_ncols(_params))) {
|
||||
Rf_error("Invalid 'params' value, exected square matrix");
|
||||
}
|
||||
|
||||
// Get problem dimension from parameter size
|
||||
const size_t dim = (param_type == 'm')
|
||||
? Rf_nrows(_params)
|
||||
: invTriag(Rf_length(_params));
|
||||
if (!dim) {
|
||||
Rf_error("Error determining dimension.");
|
||||
}
|
||||
|
||||
// Ensure parameters are numeric
|
||||
if (!Rf_isReal(_params)) {
|
||||
_params = PROTECT(Rf_coerceVector(_params, REALSXP));
|
||||
++protect_count;
|
||||
}
|
||||
double* params = REAL(_params);
|
||||
|
||||
// Allocate result sample
|
||||
SEXP _X = PROTECT(Rf_allocMatrix(INTSXP, dim, nr_samples));
|
||||
++protect_count;
|
||||
int* X = INTEGER(_X);
|
||||
|
||||
// Call appropriate sampling routine for every sample to generate
|
||||
GetRNGstate();
|
||||
if (param_type == 'm') {
|
||||
for (size_t sample = 0; sample < nr_samples; ++sample) {
|
||||
ising_mcmc_mat(warmup, dim, dim, params, &X[sample * dim]);
|
||||
}
|
||||
} else {
|
||||
for (size_t sample = 0; sample < nr_samples; ++sample) {
|
||||
ising_mcmc_vech(warmup, dim, params, &X[sample * dim]);
|
||||
}
|
||||
}
|
||||
PutRNGstate();
|
||||
|
||||
// Release protected SEXPs to the garbage collector
|
||||
UNPROTECT(protect_count);
|
||||
|
||||
return _X;
|
||||
}
|
||||
|
||||
#endif /* INCLUDE_GUARD_ISING_SAMPLE_H */
|
|
@ -8,6 +8,51 @@
|
|||
#define FCONE
|
||||
#endif
|
||||
|
||||
void mcrossprod(
|
||||
const int rank,
|
||||
const double* A, const int* dimA,
|
||||
const double* B, const int* dimB,
|
||||
const int mode,
|
||||
double* C
|
||||
) {
|
||||
// the strides
|
||||
// `stride[0] <- prod(dim(A)[seq_len(mode - 1)])`
|
||||
// `stride[1] <- dim(A)[mode]`
|
||||
// `stride[2] <- prod(dim(A)[-seq_len(mode)])`
|
||||
// Note: Middle stride is ignored (to be consistent with sym version)
|
||||
int stride[3] = {1, 0, 1};
|
||||
for (int i = 0; i < rank; ++i) {
|
||||
int size = dimA[i];
|
||||
stride[0] *= (i < mode) ? size : 1;
|
||||
stride[2] *= (i > mode) ? size : 1;
|
||||
}
|
||||
|
||||
// employ BLAS dgemm (Double GEneralized Matrix Matrix) operation
|
||||
// (C = alpha op(A) op(A) + beta C, op is the identity of transposition)
|
||||
const double zero = 0.0;
|
||||
const double one = 1.0;
|
||||
if (mode == 0) {
|
||||
// mode 1: special case C = A_(1) B_(1)^T
|
||||
// C = 1 A B^T + 0 C
|
||||
F77_CALL(dgemm)("N", "T", &dimA[mode], &dimB[mode], &stride[2],
|
||||
&one, A, &dimA[mode], B, &dimB[mode],
|
||||
&zero, C, &dimA[mode] FCONE FCONE);
|
||||
} else {
|
||||
// Other modes writen as accumulated sum of matrix products
|
||||
// initialize C to zero
|
||||
memset(C, 0, dimA[mode] * dimB[mode] * sizeof(double));
|
||||
|
||||
// Sum over all modes > mode
|
||||
for (int i2 = 0; i2 < stride[2]; ++i2) {
|
||||
// C = 1 A^T B + 1 C
|
||||
F77_CALL(dgemm)("T", "N", &dimA[mode], &dimB[mode], &stride[0],
|
||||
&one, &A[i2 * stride[0] * dimA[mode]], &stride[0],
|
||||
&B[i2 * stride[0] * dimB[mode]], &stride[0],
|
||||
&one, C, &dimA[mode] FCONE FCONE);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Tensor Mode Crossproduct
|
||||
*
|
||||
|
@ -21,72 +66,58 @@
|
|||
* @param B multi-dimensional array
|
||||
* @param m mode index (1-indexed)
|
||||
*/
|
||||
extern SEXP mcrossprod(SEXP A, SEXP B, SEXP m) {
|
||||
extern SEXP R_mcrossprod(SEXP A, SEXP B, SEXP m) {
|
||||
// get zero indexed mode
|
||||
int mode = asInteger(m) - 1;
|
||||
|
||||
// get dimension attributes
|
||||
SEXP dimA = getAttrib(A, R_DimSymbol);
|
||||
SEXP dimB = getAttrib(B, R_DimSymbol);
|
||||
// Check if both `A` and `B` are real-valued
|
||||
if (!isReal(A) || !isReal(B)) {
|
||||
error("Type missmatch, both `A` and `B` must be real-valued");
|
||||
}
|
||||
|
||||
// get dimension attributes
|
||||
SEXP dimA_sexp = getAttrib(A, R_DimSymbol);
|
||||
SEXP dimB_sexp = getAttrib(B, R_DimSymbol);
|
||||
|
||||
// dimension type validation
|
||||
if (!isInteger(dimA_sexp) || !isInteger(dimB_sexp)) {
|
||||
error("Dimensions must be integer vectors");
|
||||
}
|
||||
|
||||
// validate dimensions
|
||||
int rank = length(dimA_sexp);
|
||||
if (rank != length(dimB_sexp)) {
|
||||
error("Dimension mismatch");
|
||||
}
|
||||
// validate mode (0-indexed, must be smaller than the tensor order)
|
||||
if (mode < 0 || length(dimA) <= mode || length(dimB) <= mode) {
|
||||
if (mode < 0 || rank <= mode) {
|
||||
error("Illegal mode");
|
||||
}
|
||||
|
||||
// the strides
|
||||
// `stride[0] <- prod(dim(A)[seq_len(mode - 1)])`
|
||||
// `stride[1] <- dim(A)[mode]`
|
||||
// `stride[2] <- prod(dim(A)[-seq_len(mode)])`
|
||||
// Note: Middle stride is ignored (to be consistent with sym version)
|
||||
int stride[3] = {1, 0, 1};
|
||||
for (int i = 0; i < length(dimA); ++i) {
|
||||
int size = INTEGER(dimA)[i];
|
||||
stride[0] *= (i < mode) ? size : 1;
|
||||
stride[2] *= (i > mode) ? size : 1;
|
||||
// check margin matching of `A` and `B`
|
||||
if (i != mode && size != INTEGER(dimB)[i]) {
|
||||
error("Dimension missmatch");
|
||||
// get raw pointers to dimensions
|
||||
int* dimA = INTEGER(coerceVector(dimA_sexp, INTSXP));
|
||||
int* dimB = INTEGER(coerceVector(dimB_sexp, INTSXP));
|
||||
|
||||
// finaly, check for `A` and `B` dimensions to match
|
||||
for (int i = 0; i < rank; ++i) {
|
||||
if (i != mode && dimA[i] != dimB[i]) {
|
||||
error("Dimension mismatch");
|
||||
}
|
||||
}
|
||||
// Result dimensions
|
||||
int nrowC = INTEGER(dimA)[mode];
|
||||
int ncolC = INTEGER(dimB)[mode];
|
||||
|
||||
// create response matrix C
|
||||
SEXP C = PROTECT(allocMatrix(REALSXP, nrowC, ncolC));
|
||||
SEXP C = PROTECT(allocMatrix(REALSXP, dimA[mode], dimB[mode]));
|
||||
|
||||
// raw data access pointers
|
||||
double* a = REAL(A);
|
||||
double* b = REAL(B);
|
||||
double* c = REAL(C);
|
||||
// Call C mode crossprod subroutine
|
||||
mcrossprod(
|
||||
rank, // tensor rank of both `A` and `B`
|
||||
REAL(A), dimA, // mem. addr. of A, dim(A)
|
||||
REAL(B), dimB, // mem. addr. of B, dim(B)
|
||||
mode, // the crossproduct mode to compute
|
||||
REAL(C) // return value memory addr.
|
||||
);
|
||||
|
||||
// employ BLAS dgemm (Double GEneralized Matrix Matrix) operation
|
||||
// (C = alpha op(A) op(A) + beta C, op is the identity of transposition)
|
||||
const double zero = 0.0;
|
||||
const double one = 1.0;
|
||||
if (mode == 0) {
|
||||
// mode 1: special case C = A_(1) B_(1)^T
|
||||
// C = 1 A B^T + 0 C
|
||||
F77_CALL(dgemm)("N", "T", &nrowC, &ncolC, &stride[2],
|
||||
&one, a, &nrowC, b, &ncolC,
|
||||
&zero, c, &nrowC FCONE FCONE);
|
||||
} else {
|
||||
// Other modes writen as accumulated sum of matrix products
|
||||
// initialize C to zero
|
||||
memset(c, 0, nrowC * ncolC * sizeof(double));
|
||||
|
||||
// Sum over all modes > mode
|
||||
for (int i2 = 0; i2 < stride[2]; ++i2) {
|
||||
// C = 1 A^T B + 1 C
|
||||
F77_CALL(dgemm)("T", "N", &nrowC, &ncolC, &stride[0],
|
||||
&one, &a[i2 * stride[0] * nrowC], &stride[0],
|
||||
&b[i2 * stride[0] * ncolC], &stride[0],
|
||||
&one, c, &nrowC FCONE FCONE);
|
||||
}
|
||||
}
|
||||
|
||||
// release C to grabage collector
|
||||
// release C to garbage collector
|
||||
UNPROTECT(1);
|
||||
|
||||
return C;
|
||||
|
@ -104,7 +135,7 @@ extern SEXP mcrossprod(SEXP A, SEXP B, SEXP m) {
|
|||
* @param A multi-dimensional array
|
||||
* @param m mode index (1-indexed)
|
||||
*/
|
||||
extern SEXP mcrossprod_sym(SEXP A, SEXP m) {
|
||||
extern SEXP R_mcrossprod_sym(SEXP A, SEXP m) {
|
||||
// get zero indexed mode
|
||||
int mode = asInteger(m) - 1;
|
||||
|
||||
|
|
|
@ -0,0 +1,160 @@
|
|||
#include "R_api.h"
|
||||
#include "ttm.h"
|
||||
|
||||
int mlm(
|
||||
/* options */ const int* trans, const int* modes, const int nrhs,
|
||||
/* dims */ const int* dimA, const int* dimC, const int ord,
|
||||
/* scalars */ const double alpha,
|
||||
/* tensor */ const double* A,
|
||||
/* matrices */ const double** Bs, const int* ldBs,
|
||||
/* scalar */ const double beta,
|
||||
/* tensor */ double* C,
|
||||
double* work_mem
|
||||
) {
|
||||
// Compute total size of `A`, `C` and required working memory
|
||||
int sizeA = 1; // `prod(dim(A))`
|
||||
int sizeC = 1; // `prod(dim(C))`
|
||||
int work_size = 2; // `2 * prod(pmax(dim(A), dim(C)))` (`+ ord` NOT included)
|
||||
for (int i = 0; i < ord; ++i) {
|
||||
sizeA *= dimA[i];
|
||||
sizeC *= dimC[i];
|
||||
work_size *= dimA[i] < dimC[i] ? dimC[i] : dimA[i];
|
||||
}
|
||||
|
||||
// In requesting working memory size stop here and return size
|
||||
if (work_mem == NULL) {
|
||||
return work_size + ord;
|
||||
}
|
||||
|
||||
// Copy dimensions of A to intermediate temp. dim
|
||||
int* dimT = (int*)(work_mem + work_size); // `work_mem` is `ord` longer
|
||||
memcpy(dimT, dimA, ord * sizeof(int));
|
||||
|
||||
// Setup work memory hooks (two swapable blocks)
|
||||
double* tmp1 = work_mem;
|
||||
double* tmp2 = work_mem + (work_size >> 1);
|
||||
|
||||
// Multi Linear Multiplication is an iterated application of TTM
|
||||
for (int i = 0; i < nrhs; ++i) {
|
||||
// Get current `B` dimensions (only implicitly given)
|
||||
int nrowB = dimC[modes[i]];
|
||||
int ncolB = dimA[modes[i]];
|
||||
if (trans && trans[i]) {
|
||||
nrowB = dimA[modes[i]];
|
||||
ncolB = dimC[modes[i]];
|
||||
}
|
||||
|
||||
// Tensor Times Matrix (`modes[i]` mode products)
|
||||
ttm(trans ? trans[i] : 0, modes[i], dimT, ord, nrowB, ncolB,
|
||||
1.0, i ? tmp2 : A, Bs[i], ldBs ? ldBs[i] : dimC[modes[i]], 0.0,
|
||||
tmp1);
|
||||
|
||||
// Update intermediate temp dim
|
||||
dimT[modes[i]] = dimC[modes[i]];
|
||||
|
||||
// Swap tmp1 <-> tmp2
|
||||
double* tmp3 = tmp1; tmp1 = tmp2; tmp2 = tmp3;
|
||||
}
|
||||
|
||||
// dAXPY (a x + y) with `x = tmp2` and `y = beta C`
|
||||
if (beta != 0.0) {
|
||||
for (int i = 0; i < sizeC; ++i) {
|
||||
C[i] *= beta;
|
||||
}
|
||||
} else {
|
||||
memset(C, 0, sizeC * sizeof(double));
|
||||
}
|
||||
axpy(sizeC, alpha, tmp2, 1, C, 1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Multi Linear Multiplication (`R` binding)
|
||||
*/
|
||||
extern SEXP R_mlm(SEXP A, SEXP Bs, SEXP ms, SEXP ops) {
|
||||
|
||||
// get dimension attribute of A
|
||||
SEXP dimA = Rf_getAttrib(A, R_DimSymbol);
|
||||
int ord = Rf_length(dimA);
|
||||
|
||||
// Check if `A` is a real valued tensor
|
||||
if (!Rf_isReal(A) || Rf_isNull(dimA)) {
|
||||
Rf_error("Param. `A` need to be a real valued array");
|
||||
}
|
||||
|
||||
// Validate that `Bs` is a list
|
||||
if (!Rf_isNewList(Bs)) {
|
||||
Rf_error("Param. `Bs` need to be a list of matrices");
|
||||
}
|
||||
if (!Rf_isInteger(ms) || !Rf_isLogical(ops)) {
|
||||
Rf_error("Param. type missmatch, expected modes as ints and ops logical");
|
||||
}
|
||||
|
||||
// Number of `B` matrices (Nr of Right Hand Side objects)
|
||||
int nrhs = Rf_length(Bs);
|
||||
|
||||
// further parameter validations
|
||||
if ((nrhs != Rf_length(ms)) || (nrhs != Rf_length(ops))) {
|
||||
Rf_error("Dimension missmatch (Params length differ)");
|
||||
}
|
||||
|
||||
// Get modes and operations for Bs (transposed or not)
|
||||
int* modes = (int*)R_alloc(nrhs, sizeof(int)); // 0-indexed version of `ms`
|
||||
int* trans = INTEGER(ops);
|
||||
|
||||
// Compute result dimensions `dim(C)` while extracting `B` data pointers
|
||||
SEXP dimC = PROTECT(Rf_duplicate(dimA));
|
||||
const double** bs = (const double**)R_alloc(nrhs, sizeof(double*));
|
||||
for (int i = 0; i < nrhs; ++i) {
|
||||
// Extract right hand side matrices (`B` matrices from list)
|
||||
SEXP B = VECTOR_ELT(Bs, i);
|
||||
if (!(Rf_isMatrix(B) && Rf_isReal(B))) {
|
||||
UNPROTECT(1);
|
||||
Rf_error("Param. `Bs` need to be a list of real matrices");
|
||||
}
|
||||
int* dimB = INTEGER(Rf_getAttrib(B, R_DimSymbol));
|
||||
bs[i] = REAL(B);
|
||||
|
||||
// Convert 1-indexed modes to be 0-indexed
|
||||
modes[i] = INTEGER(ms)[i] - 1;
|
||||
|
||||
// Check if mode is out or range (indexing a non-existing mode of `A`)
|
||||
if (!((0 <= modes[i]) && (modes[i] < ord))) {
|
||||
UNPROTECT(1);
|
||||
Rf_error("%d'th mode (%d) out of range", i + 1, modes[i] + 1);
|
||||
}
|
||||
|
||||
// Check if `i`th mode of `A` matches corresponding `B` dimension
|
||||
if (INTEGER(dimA)[modes[i]] != dimB[!trans[i]]) {
|
||||
UNPROTECT(1);
|
||||
Rf_error("%d'th mode (%d) dimension missmatch", i + 1, modes[i] + 1);
|
||||
}
|
||||
|
||||
INTEGER(dimC)[modes[i]] = dimB[!!trans[i]];
|
||||
}
|
||||
|
||||
// Now, compute `C`s size `prod(dim(C))`
|
||||
int sizeC = 1;
|
||||
for (int i = 0; i < ord; ++i) {
|
||||
sizeC *= INTEGER(dimC)[i];
|
||||
}
|
||||
|
||||
// Allocate response `C`
|
||||
SEXP C = PROTECT(Rf_allocVector(REALSXP, sizeC));
|
||||
Rf_setAttrib(C, R_DimSymbol, dimC);
|
||||
|
||||
// allocate working memory size
|
||||
int work_size = mlm(trans, modes, nrhs, INTEGER(dimA), INTEGER(dimC), ord,
|
||||
1.0, REAL(A), bs, NULL, 0.0, REAL(C), NULL);
|
||||
double* work_memory = (double*)R_alloc(work_size, sizeof(double));
|
||||
|
||||
// Compute Multi-Linear Multiplication (call subroutine)
|
||||
(void)mlm(trans, modes, nrhs, INTEGER(dimA), INTEGER(dimC), ord,
|
||||
1.0, REAL(A), bs, NULL, 0.0, REAL(C), work_memory);
|
||||
|
||||
// release C to the hands of the garbage collector
|
||||
UNPROTECT(2);
|
||||
|
||||
return C;
|
||||
}
|
|
@ -0,0 +1,35 @@
|
|||
#ifndef INCLUDE_GUARD_MLM_H
|
||||
#define INCLUDE_GUARD_MLM_H
|
||||
|
||||
/**
|
||||
* Multi Linear Multiplication
|
||||
*
|
||||
* C = alpha A x_modes[0] op(Bs[0]) ... x_modes[nrhs] op(Bs[nrhs]) + beta C
|
||||
*
|
||||
* @param trans boolean vector of length `nrhs` indicating if `i`th RHS matrix
|
||||
* is to be transposed. That is `op(Bs[i])` is the transposed of `Bs[i]` iff
|
||||
* `trans[i]` is true, otherwise no-op on `Bs[i]`. Can be `NULL`, then `op` is
|
||||
* always the identity.
|
||||
* @param modes integer vector of length `nrhs` specifying the product modes.
|
||||
*
|
||||
* @todo TODO: continue doc. !!!
|
||||
*
|
||||
* @param alpha scaling factor
|
||||
* @param beta scaling factor
|
||||
* @param C output memory addr.
|
||||
*
|
||||
* @param work_mem NULL or temporary working memory of size
|
||||
* `2 * prod(pmax(dim(A), dim(C))) + ord`
|
||||
*/
|
||||
int mlm(
|
||||
/* options */ const int* trans, const int* modes, const int nrhs,
|
||||
/* dims */ const int* dimA, const int* dimC, const int ord,
|
||||
/* scalars */ const double alpha,
|
||||
/* tensor */ const double* A,
|
||||
/* matrices */ const double** Bs, const int* ldBs,
|
||||
/* scalar */ const double beta,
|
||||
/* tensor */ double* C,
|
||||
double* work_mem
|
||||
);
|
||||
|
||||
#endif /* INCLUDE_GUARD_MLM_H */
|
|
@ -0,0 +1,37 @@
|
|||
// /**
|
||||
// * A sufficient Pseudo-Random-Number-Generators (PRNG) of the Xorshift family
|
||||
// *
|
||||
// * For single threaded operations the PRNG provided by `R` are prefered. But they
|
||||
// * are _not_ thread save. The following is a simple PRNG usable in a multi-threaded
|
||||
// * application.
|
||||
// *
|
||||
// * See TODO: ...https://en.wikipedia.org/wiki/Xorshift
|
||||
// * SchachHoernchen
|
||||
// */
|
||||
|
||||
// #ifndef INCLUDE_GUARD_RANDOM_H
|
||||
// #define INCLUDE_GUARD_RANDOM_H
|
||||
|
||||
// #include <stdint.h> // uint32_t, uint64_t
|
||||
|
||||
|
||||
// static inline uint64_t rot64(uint64_t val, int shift) {
|
||||
// return (val << shift) | (val >> (64 - shift));
|
||||
// }
|
||||
|
||||
// // PRNG of the Xorshift family
|
||||
// // @note the least significant 32 bits are not reliable, use most significant 32 bits
|
||||
// static inline uint64_t rand_u64(uint64_t seed[4]) {
|
||||
// uint64_t e = seed[0] - rot64(seed[1], 7);
|
||||
// seed[0] = seed[1] ^ rot64(seed[1], 13);
|
||||
// seed[1] = seed[2] + rot64(seed[3], 37);
|
||||
// seed[2] = seed[3] + e;
|
||||
// seed[3] = e + seed[0];
|
||||
// return seed[3];
|
||||
// }
|
||||
|
||||
// static inline double unif_rand_u64(uint64_t seed[4]) {
|
||||
// return ((double)(rand_u64(seed) >> 32)) / (double)(-(uint32_t)1);
|
||||
// }
|
||||
|
||||
// #endif
|
|
@ -0,0 +1,112 @@
|
|||
// The need for `USE_FC_LEN_T` and `FCONE` is due to a Fortran character string
|
||||
// to C incompatibility. See: Writing R Extentions: 6.6.1 Fortran character strings
|
||||
#define USE_FC_LEN_T
|
||||
#include <R.h>
|
||||
#include <Rinternals.h>
|
||||
#include <R_ext/BLAS.h>
|
||||
#include <R_ext/Lapack.h>
|
||||
#ifndef FCONE
|
||||
#define FCONE
|
||||
#endif
|
||||
|
||||
// Singular Value Decomposition
|
||||
// @note assumes passed arguments to be "clean", "properly checked"
|
||||
int svd(
|
||||
const int p, const int q,
|
||||
double* A, const int ldA,
|
||||
double* d,
|
||||
double* U, const int ldU,
|
||||
double* Vt, const int ldVt,
|
||||
double* work_mem, int work_size, int* i_work // if unknown set to 0, -1, 0
|
||||
) {
|
||||
// LAPACK information return variable (in case of an error, LAPACK sets this
|
||||
// to a non-zero value)
|
||||
int info = 0;
|
||||
|
||||
// check if work memory is supplied, if _not_ query for appropriate size and
|
||||
// allocate the memory
|
||||
if (!work_mem || work_size < 1 || !i_work) {
|
||||
// create (declare) work memory
|
||||
i_work = (int*)R_alloc(8 * (p < q ? p : q), sizeof(int));
|
||||
double tmp; // variable to store work memory size query result
|
||||
work_mem = &tmp; // point to `tmp` as it will contain the result of
|
||||
// the work memory size query
|
||||
|
||||
// request appropriate work memory size
|
||||
F77_CALL(dgesdd)(
|
||||
"S", &p, &q, A, &ldA, d, U, &ldU, Vt, &ldVt,
|
||||
work_mem, &work_size, i_work, &info
|
||||
FCONE);
|
||||
|
||||
// allocate work memory
|
||||
work_size = (int)tmp; // "read" work memory size query result
|
||||
work_mem = (double*)R_alloc(work_size, sizeof(double));
|
||||
|
||||
// in case of an error, return error code stored in `info`
|
||||
if (info) { return info; }
|
||||
}
|
||||
|
||||
// actual SVD computation
|
||||
F77_CALL(dgesdd)(
|
||||
"S", &p, &q, A, &ldA, d, U, &ldU, Vt, &ldVt,
|
||||
work_mem, &work_size, i_work, &info
|
||||
FCONE);
|
||||
|
||||
return info;
|
||||
}
|
||||
|
||||
// Singular Valued Decomposition (R binding)
|
||||
// @note educational purpose, same as `La.svd`
|
||||
extern SEXP R_svd(SEXP A) {
|
||||
// Check if we got a real-valued matrix
|
||||
if (!isMatrix(A) || !isReal(A)) {
|
||||
error("Require a real-valued matrix");
|
||||
}
|
||||
|
||||
// extract matrix dimensions
|
||||
int* dims = INTEGER(coerceVector(getAttrib(A, R_DimSymbol), INTSXP));
|
||||
int p = dims[0]; // = nrow(A)
|
||||
int q = dims[1]; // = ncol(A)
|
||||
int m = p < q ? p : q; // = min(p, q) = min(dim(A))
|
||||
|
||||
// Check if dimensions are not degenerate
|
||||
if (p < 1 || q < 1) {
|
||||
error("Expected positive matrix dimensions");
|
||||
}
|
||||
|
||||
// create R objects to store the SVD result `A = U diag(d) V^T`
|
||||
SEXP U = PROTECT(allocMatrix(REALSXP, p, m));
|
||||
SEXP d = PROTECT(allocVector(REALSXP, m));
|
||||
SEXP Vt = PROTECT(allocMatrix(REALSXP, m, q));
|
||||
|
||||
// Call C SVD routine
|
||||
int info = svd(
|
||||
p, q, // nrow(A), ncol(A)
|
||||
REAL(A), p, // mem. addr. of A, ldA (leading dimension of A)
|
||||
REAL(d), // mem. addr. of d
|
||||
REAL(U), p, // mem. addr. of U, ldU (leading dimension of U)
|
||||
REAL(Vt), m, // mem. addr. of V^T, ldVt (leading dimension of V^T)
|
||||
0, -1, 0 // work mem. pointer, work mem. size and int work mem
|
||||
); // set to `0, -1, 0` to indicate "unknown"
|
||||
|
||||
// Check LAPACK info
|
||||
if (info) {
|
||||
error("error code %d from LAPACK routine 'dgesdd'", info);
|
||||
}
|
||||
|
||||
// Create R list containint SVD components
|
||||
SEXP result = PROTECT(allocVector(VECSXP, 3));
|
||||
SEXP names = PROTECT(allocVector(STRSXP, 3));
|
||||
SET_VECTOR_ELT(result, 0, d);
|
||||
SET_VECTOR_ELT(result, 1, U);
|
||||
SET_VECTOR_ELT(result, 2, Vt);
|
||||
SET_STRING_ELT(names, 0, mkChar("d"));
|
||||
SET_STRING_ELT(names, 1, mkChar("u"));
|
||||
SET_STRING_ELT(names, 2, mkChar("vt"));
|
||||
setAttrib(result, R_NamesSymbol, names);
|
||||
|
||||
// Release created objects to the garbage collector
|
||||
UNPROTECT(5);
|
||||
|
||||
return result;
|
||||
}
|
|
@ -1,104 +1,48 @@
|
|||
// The need for `USE_FC_LEN_T` and `FCONE` is due to a Fortran character string
|
||||
// to C incompatibility. See: Writing R Extentions: 6.6.1 Fortran character strings
|
||||
#define USE_FC_LEN_T
|
||||
// Disables remapping of R API functions from `Rf_<name>` or `R_<name>`
|
||||
#define R_NO_REMAP
|
||||
#include <R.h>
|
||||
#include <Rinternals.h>
|
||||
#include <R_ext/BLAS.h>
|
||||
#ifndef FCONE
|
||||
#define FCONE
|
||||
#endif
|
||||
#include "ttm.h"
|
||||
|
||||
/**
|
||||
* Tensor Times Matrix a.k.a. Mode Product
|
||||
*
|
||||
* @param A multi-dimensional array
|
||||
* @param B matrix
|
||||
* @param m mode index (1-indexed)
|
||||
* @param op boolean if `B` is transposed
|
||||
*/
|
||||
extern SEXP ttm(SEXP A, SEXP B, SEXP m, SEXP op) {
|
||||
void ttm(
|
||||
const int transB, const int mode,
|
||||
const int* dimA, const int ordA, const int nrowB, const int ncolB,
|
||||
const double alpha,
|
||||
const double* A,
|
||||
const double* B, const int ldB, // TODO: ldB is IGNORED!!!
|
||||
const double beta,
|
||||
double* C
|
||||
) {
|
||||
|
||||
// get zero indexed mode
|
||||
const int mode = Rf_asInteger(m) - 1;
|
||||
|
||||
// get dimension attribute of A
|
||||
SEXP dim = Rf_getAttrib(A, R_DimSymbol);
|
||||
|
||||
// operation on `B` (transposed or not)
|
||||
const int trans = Rf_asLogical(op);
|
||||
|
||||
// as well as `B`s dimensions
|
||||
const int nrow = Rf_nrows(B);
|
||||
const int ncol = Rf_ncols(B);
|
||||
|
||||
// validate mode (mode must be smaller than the nr of dimensions)
|
||||
if (mode < 0 || Rf_length(dim) <= mode) {
|
||||
Rf_error("Illegal mode");
|
||||
}
|
||||
|
||||
// and check if B is a matrix of non degenetate size
|
||||
if (!Rf_isMatrix(B)) {
|
||||
Rf_error("Expected a matrix as second argument");
|
||||
}
|
||||
if (!Rf_nrows(B) || !Rf_ncols(B)) {
|
||||
Rf_error("Zero dimension detected");
|
||||
}
|
||||
|
||||
// check matching of dimensions
|
||||
if (INTEGER(dim)[mode] != (trans ? nrow : ncol)) {
|
||||
Rf_error("Dimension missmatch");
|
||||
}
|
||||
|
||||
// calc nr of response elements `prod(dim(A)[-mode]) * ncol(A)` (size of C),
|
||||
int sizeC = 1;
|
||||
// and the strides
|
||||
// Strides are the "leading" and "trailing" dimensions of the matricized
|
||||
// tensor `A` in the following matrix-matrix multiplications
|
||||
// `stride[0] <- prod(dim(A)[seq_len(mode - 1)])`
|
||||
// `stride[1] <- dim(A)[mode]`
|
||||
// `stride[2] <- prod(dim(A)[-seq_len(mode)])`
|
||||
int stride[3] = {1, INTEGER(dim)[mode], 1};
|
||||
for (int i = 0; i < Rf_length(dim); ++i) {
|
||||
int size = INTEGER(dim)[i];
|
||||
// check for non-degenetate dimensions
|
||||
if (!size) {
|
||||
Rf_error("Zero dimension detected");
|
||||
}
|
||||
sizeC *= (i == mode) ? (trans ? ncol : nrow) : size;
|
||||
stride[0] *= (i < mode) ? size : 1;
|
||||
stride[2] *= (i > mode) ? size : 1;
|
||||
int stride[3] = {1, dimA[mode], 1};
|
||||
for (int i = 0; i < ordA; ++i) {
|
||||
stride[0] *= (i < mode) ? dimA[i] : 1;
|
||||
stride[2] *= (i > mode) ? dimA[i] : 1;
|
||||
}
|
||||
|
||||
// create response object C
|
||||
SEXP C = PROTECT(Rf_allocVector(REALSXP, sizeC));
|
||||
|
||||
// raw data access pointers
|
||||
double* a = REAL(A);
|
||||
double* b = REAL(B);
|
||||
double* c = REAL(C);
|
||||
|
||||
// Tensor Times Matrix / Mode Product
|
||||
const double zero = 0.0;
|
||||
const double one = 1.0;
|
||||
if (mode == 0) {
|
||||
// mode 1: (A x_1 op(B))_(1) = op(B) A_(1) as a single Matrix-Matrix
|
||||
// multiplication
|
||||
F77_CALL(dgemm)(trans ? "T" : "N", "N",
|
||||
(trans ? &ncol : &nrow), &stride[2], &stride[1], &one,
|
||||
b, &nrow, a, &stride[1],
|
||||
&zero, c, (trans ? &ncol : &nrow)
|
||||
// mode 1: C = alpha (A x_1 op(B))_(1) + beta C
|
||||
// = alpha op(B) A_(1) + beta C
|
||||
// as a single Matrix-Matrix multiplication
|
||||
F77_CALL(dgemm)(transB ? "T" : "N", "N",
|
||||
(transB ? &ncolB : &nrowB), &stride[2], &stride[1], &alpha,
|
||||
B, &nrowB, A, &stride[1],
|
||||
&beta, C, (transB ? &ncolB : &nrowB)
|
||||
FCONE FCONE);
|
||||
} else {
|
||||
// Other modes can be written as blocks of matrix multiplications
|
||||
// (A x_m op(B))_(m)' = A_(m)' op(B)'
|
||||
// C_:,:,i2 = alpha (A x_m op(B))_(m)' + beta C_:,:,i2
|
||||
// = alpha A_(m)' op(B)' + beta C_:,:,i2
|
||||
for (int i2 = 0; i2 < stride[2]; ++i2) {
|
||||
F77_CALL(dgemm)("N", trans ? "N" : "T",
|
||||
&stride[0], (trans ? &ncol : &nrow), &stride[1], &one,
|
||||
&a[i2 * stride[0] * stride[1]], &stride[0], b, &nrow,
|
||||
&zero, &c[i2 * stride[0] * (trans ? ncol : nrow)], &stride[0]
|
||||
F77_CALL(dgemm)("N", transB ? "N" : "T",
|
||||
&stride[0], (transB ? &ncolB : &nrowB), &stride[1], &alpha,
|
||||
&A[i2 * stride[0] * stride[1]], &stride[0], B, &nrowB,
|
||||
&beta, &C[i2 * stride[0] * (transB ? ncolB : nrowB)], &stride[0]
|
||||
FCONE FCONE);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
// (reference implementation)
|
||||
// Tensor Times Matrix / Mode Product for `op(B) == B`
|
||||
|
@ -114,13 +58,76 @@ extern SEXP ttm(SEXP A, SEXP B, SEXP m, SEXP op) {
|
|||
}
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Tensor Times Matrix a.k.a. Mode Product
|
||||
*
|
||||
* @param A multi-dimensional array
|
||||
* @param B matrix
|
||||
* @param m mode index (1-indexed)
|
||||
* @param op boolean if `B` is transposed
|
||||
*/
|
||||
extern SEXP R_ttm(SEXP A, SEXP B, SEXP m, SEXP op) {
|
||||
|
||||
// get zero indexed mode
|
||||
const int mode = Rf_asInteger(m) - 1;
|
||||
|
||||
// get dimension attribute of A
|
||||
SEXP dimA = Rf_getAttrib(A, R_DimSymbol);
|
||||
|
||||
// operation on `B` (transposed or not)
|
||||
const int transB = Rf_asLogical(op);
|
||||
|
||||
// as well as `B`s dimensions
|
||||
const int nrowB = Rf_nrows(B);
|
||||
const int ncolB = Rf_ncols(B);
|
||||
|
||||
// validate mode (mode must be smaller than the nr of dimensions)
|
||||
if (mode < 0 || Rf_length(dimA) <= mode) {
|
||||
Rf_error("Illegal mode");
|
||||
}
|
||||
|
||||
// and check if B is a matrix of non degenetate size
|
||||
if (!Rf_isMatrix(B)) {
|
||||
Rf_error("Expected a matrix as second argument");
|
||||
}
|
||||
if (!Rf_nrows(B) || !Rf_ncols(B)) {
|
||||
Rf_error("Zero dimension detected");
|
||||
}
|
||||
|
||||
// check matching of dimensions
|
||||
if (INTEGER(dimA)[mode] != (transB ? nrowB : ncolB)) {
|
||||
Rf_error("Dimension missmatch");
|
||||
}
|
||||
|
||||
// calc nr of response elements (size of C)
|
||||
// `prod(dim(C)) = prod(dim(A)[-mode]) * nrow(if(transB) t(B) else B)`
|
||||
int sizeC = 1;
|
||||
for (int i = 0; i < Rf_length(dimA); ++i) {
|
||||
int size = INTEGER(dimA)[i];
|
||||
// check for non-degenetate dimensions
|
||||
if (!size) {
|
||||
Rf_error("Zero dimension detected");
|
||||
}
|
||||
sizeC *= (i == mode) ? (transB ? ncolB : nrowB) : size;
|
||||
}
|
||||
|
||||
// create response object C
|
||||
SEXP C = PROTECT(Rf_allocVector(REALSXP, sizeC));
|
||||
|
||||
// Tensor Times Matrix / Mode Product
|
||||
ttm(transB, mode,
|
||||
INTEGER(dimA), Rf_length(dimA), nrowB, ncolB,
|
||||
1.0, REAL(A), REAL(B), nrowB, 0.0, REAL(C));
|
||||
|
||||
// finally, set result dimensions
|
||||
SEXP newdim = PROTECT(Rf_allocVector(INTSXP, Rf_length(dim)));
|
||||
for (int i = 0; i < Rf_length(dim); ++i) {
|
||||
INTEGER(newdim)[i] = (i == mode) ? (trans ? ncol : nrow) : INTEGER(dim)[i];
|
||||
SEXP dimC = PROTECT(Rf_allocVector(INTSXP, Rf_length(dimA)));
|
||||
for (int i = 0; i < Rf_length(dimA); ++i) {
|
||||
INTEGER(dimC)[i] = (i == mode) ? (transB ? ncolB : nrowB) : INTEGER(dimA)[i];
|
||||
}
|
||||
Rf_setAttrib(C, R_DimSymbol, newdim);
|
||||
Rf_setAttrib(C, R_DimSymbol, dimC);
|
||||
|
||||
// release C to the hands of the garbage collector
|
||||
UNPROTECT(2);
|
||||
|
|
|
@ -0,0 +1,21 @@
|
|||
#ifndef INCLUDE_GUARD_TTM_H
|
||||
#define INCLUDE_GUARD_TTM_H
|
||||
|
||||
#include "R_api.h"
|
||||
|
||||
/**
|
||||
* Tensor Times Matrix Subroutine
|
||||
*
|
||||
* @attention Assumes all parameters to be correct!
|
||||
*/
|
||||
void ttm(
|
||||
/* options */ const int transB, const int mode,
|
||||
/* dims */ const int* dimA, const int ordA, const int nrowB, const int ncolB,
|
||||
/* scalar */ const double alpha,
|
||||
/* tensor */ const double* A,
|
||||
/* matrix */ const double* B, const int ldB,
|
||||
/* scalar */ const double beta,
|
||||
/* tensor */ double* C
|
||||
);
|
||||
|
||||
#endif /* INCLUDE_GUARD_TTM_H */
|
Loading…
Reference in New Issue