From 90cd46e2097b822134078d20f6428e77c99cf829 Mon Sep 17 00:00:00 2001 From: daniel Date: Tue, 14 Nov 2023 14:35:43 +0100 Subject: [PATCH] dev --- LaTeX/GMLM.tex | 218 +++++++------ LaTeX/main.bib | 153 +++++++++ mvbernoulli/src/ising_model.cpp | 2 +- sim/ising.R | 204 ------------ sim/ising_2.R | 134 -------- sim/ising_small.R | 207 ------------ sim/ising_small_2.R | 131 -------- sim/normal.R | 171 ---------- sim/normal_2.R | 96 ------ sim/plots.R | 100 ------ tensorPredictors/NAMESPACE | 24 ++ tensorPredictors/R/GMLM.R | 4 +- tensorPredictors/R/HOPCA.R | 2 +- tensorPredictors/R/HOPIR.R | 4 +- tensorPredictors/R/HOSVD.R | 4 +- tensorPredictors/R/NAGD.R | 8 +- tensorPredictors/R/TSIR.R | 14 +- tensorPredictors/R/approx_kronecker.R | 113 ++++++- tensorPredictors/R/dist_kron_norm.R | 24 +- tensorPredictors/R/dist_projection.R | 2 +- tensorPredictors/R/gmlm_ising.R | 138 ++++++++ tensorPredictors/R/gmlm_tensor_normal.R | 357 +++++++++++++++++++++ tensorPredictors/R/ising_m2.R | 18 ++ tensorPredictors/R/ising_sample.R | 11 + tensorPredictors/R/make_gmlm_family.R | 24 ++ tensorPredictors/R/matricize.R | 18 +- tensorPredictors/R/matrixImage.R | 16 +- tensorPredictors/R/mcov.R | 16 +- tensorPredictors/R/mcrossprod.R | 10 +- tensorPredictors/R/mlm.R | 58 +++- tensorPredictors/R/patternMatrices.R | 12 +- tensorPredictors/R/ttm.R | 28 ++ tensorPredictors/R/vech.R | 49 +++ tensorPredictors/src/Makevars | 3 +- tensorPredictors/src/R_api.h | 138 ++++++++ tensorPredictors/src/bit_utils.h | 185 +++++++++++ tensorPredictors/src/init.c | 87 ++++- tensorPredictors/src/int_utils.h | 77 +++++ tensorPredictors/src/ising_MCMC.h | 181 +++++++++++ tensorPredictors/src/ising_m2.c | 401 ++++++++++++++++++++++++ tensorPredictors/src/ising_sample.c | 72 +++++ tensorPredictors/src/mcrossprod.c | 139 ++++---- tensorPredictors/src/mlm.c | 160 ++++++++++ tensorPredictors/src/mlm.h | 35 +++ tensorPredictors/src/random.h | 37 +++ tensorPredictors/src/svd.c | 112 +++++++ tensorPredictors/src/ttm.c | 187 +++++------ tensorPredictors/src/ttm.h | 21 ++ 48 files changed, 2852 insertions(+), 1353 deletions(-) delete mode 100644 sim/ising.R delete mode 100644 sim/ising_2.R delete mode 100644 sim/ising_small.R delete mode 100644 sim/ising_small_2.R delete mode 100644 sim/normal.R delete mode 100644 sim/normal_2.R delete mode 100644 sim/plots.R create mode 100644 tensorPredictors/R/gmlm_ising.R create mode 100644 tensorPredictors/R/gmlm_tensor_normal.R create mode 100644 tensorPredictors/R/ising_m2.R create mode 100644 tensorPredictors/R/ising_sample.R create mode 100644 tensorPredictors/src/R_api.h create mode 100644 tensorPredictors/src/bit_utils.h create mode 100644 tensorPredictors/src/int_utils.h create mode 100644 tensorPredictors/src/ising_MCMC.h create mode 100644 tensorPredictors/src/ising_m2.c create mode 100644 tensorPredictors/src/ising_sample.c create mode 100644 tensorPredictors/src/mlm.c create mode 100644 tensorPredictors/src/mlm.h create mode 100644 tensorPredictors/src/random.h create mode 100644 tensorPredictors/src/svd.c create mode 100644 tensorPredictors/src/ttm.h diff --git a/LaTeX/GMLM.tex b/LaTeX/GMLM.tex index a7502e8..01d9e6b 100644 --- a/LaTeX/GMLM.tex +++ b/LaTeX/GMLM.tex @@ -90,10 +90,11 @@ %%% Custom operators with ether one or two arguments (limits) \makeatletter %%% Multi-Linear Multiplication +% $\mlm_{k \in [r]}$ or $\mlm_{k = 1}^{r}$ (lower limit MUST be the first!) % Save first argument as \arg@one -\def\mlm#1{\def\arg@one{#1}\futurelet\next\mlm@i} +\def\mlm_#1{\def\arg@one{#1}\futurelet\next\mlm@i} % Check for second argument -\def\mlm@i{\ifx\next\bgroup\expandafter\mlm@two\else\expandafter\mlm@one\fi} +\def\mlm@i{\ifx\next^\expandafter\mlm@two\else\expandafter\mlm@one\fi} % specialization for one or two arguments, both versions use saved first argument \def\mlm@one{\mathchoice% {\operatorname*{\scalerel*{\times}{\bigotimes}}_{\makebox[0pt][c]{$\scriptstyle \arg@one$}}}% @@ -101,14 +102,31 @@ {\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}}% {\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}}% } -% this commands single argument is the second argument of \mlm -\def\mlm@two#1{\mathchoice% +% this commands single argument is the second argument of \mlm, it gobbles the `^` +\def\mlm@two^#1{\mathchoice% {\operatorname*{\scalerel*{\times}{\bigotimes}}_{\makebox[0pt][c]{$\scriptstyle \arg@one$}}^{\makebox[0pt][c]{$\scriptstyle #1$}}}% {\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}^{#1}}% {\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}^{#1}}% {\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}^{#1}}% } +%%% Big Circle (Iterated Outer Product) +\def\outer#1{\def\arg@one{#1}\futurelet\next\outer@i} +\def\outer@i{\ifx\next\bgroup\expandafter\outer@two\else\expandafter\outer@one\fi} +\def\outer@one{\mathchoice% + {\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\makebox[0pt][c]{$\scriptstyle \arg@one$}}}% + {\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}}% + {\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}}% + {\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}}% +} +\def\outer@two#1{\mathchoice% + {\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\makebox[0pt][c]{$\scriptstyle \arg@one$}}^{\makebox[0pt][c]{$\scriptstyle #1$}}}% + {\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}^{#1}}% + {\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}^{#1}}% + {\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}^{#1}}% +} + + %%% Big Kronecker Product (with overflowing limits) % Save first argument as \arg@one \def\bigkron#1{\def\arg@one{#1}\futurelet\next\bigkron@i} @@ -134,17 +152,15 @@ \newcommand{\algorithmicbreak}{\textbf{break}} \newcommand{\Break}{\State \algorithmicbreak} - \begin{document} \maketitle - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{abstract} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - We consider regression and classification for \textit{general} response and tensor-valued predictors (multi dimensional arrays) and propose a \textit{novel formulation} for sufficient dimension reduction. Assuming the distribution of the tensor-valued predictors given the response is in the quadratic exponential family, we model the natural parameter as a multi-linear function of the respons. - This allows per-axis reductions that drastically reduce the total number of parameters for higher order tensor-valued predictors. We derive maximum likelihood estimates for the sufficient dimension reduction and a computationally efficient estimation algorithm which leveraes the tensor structure. The performance of the method is illustrated via simulations and real world examples are provided. + We consider regression and classification for \textit{general} response and tensor-valued predictors (multi dimensional arrays) and propose a \textit{novel formulation} for sufficient dimension reduction. Assuming the distribution of the tensor-valued predictors given the response is in the quadratic exponential family, we model the natural parameter as a multi-linear function of the response. + This allows per-axis reductions that drastically reduce the total number of parameters for higher order tensor-valued predictors. We derive maximum likelihood estimates for the sufficient dimension reduction and a computationally efficient estimation algorithm which leverages the tensor structure. The performance of the method is illustrated via simulations and real world examples are provided. \end{abstract} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -256,23 +272,23 @@ A straight forward idea for parameter estimation is to use Gradient Descent. For \begin{algorithm}[ht] \caption{\label{alg:NAGD}Nesterov Accelerated Gradient Descent} \begin{algorithmic}[1] - \State Objective: $l(\Theta \mid \ten{X}, \ten{F}_y)$ + \State Objective: $l(\mat{\theta} \mid \ten{X}, \ten{F}_y)$ \State Arguments: Order $r + 1$ tensors $\ten{X}$, $\ten{F}$ - \State Initialize: Parameters $\Theta^{(0)}$, $0 < c, \delta^{(1)}$ and $0 < \gamma < 1$ + \State Initialize: Parameters $\mat{\theta}^{(0)}$, $0 < c, \delta^{(1)}$ and $0 < \gamma < 1$ \\ \State $t \leftarrow 1$ \Comment{step counter} - \State $\mat{\Theta}^{(1)} \leftarrow \mat{\Theta}^{(0)}$ + \State $\mat{\theta}^{(1)} \leftarrow \mat{\theta}^{(0)}$ \Comment{artificial first step} \State $(m^{(0)}, m^{(1)}) \leftarrow (0, 1)$ \Comment{momentum extrapolation weights} \\ \Repeat \Comment{repeat untill convergence} - \State $\mat{M} \leftarrow \mat{\Theta}^{(t)} + \frac{m^{(t - 1)} - 1}{m^{(t)}}(\mat{\Theta}^{(t)} - \mat{\Theta}^{(t - 1)})$ \Comment{momentum extrapolation} + \State $\mat{M} \leftarrow \mat{\theta}^{(t)} + \frac{m^{(t - 1)} - 1}{m^{(t)}}(\mat{\theta}^{(t)} - \mat{\theta}^{(t - 1)})$ \Comment{momentum extrapolation} \For{$\delta = \gamma^{-1}\delta^{(t)}, \delta^{(t)}, \gamma\delta^{(t)}, \gamma^2\delta^{(t)}, ...$} \Comment{Line Search} - \State $\mat{\Theta}_{\text{temp}} \leftarrow \mat{M} + \delta \nabla_{\mat{\Theta}} l(\mat{M})$ - \If{$l(\mat{\Theta}_{\text{temp}}) \leq l(\mat{\Theta}^{(t - 1)}) - c \delta \|\nabla_{\mat{\Theta}} l(\mat{M})\|_F^2$} \Comment{Armijo Condition} - \State $\mat{\Theta}^{(t + 1)} \leftarrow \mat{\Theta}_{\text{temp}}$ + \State $\mat{\theta}_{\text{temp}} \leftarrow \mat{M} + \delta \nabla_{\mat{\theta}} l(\mat{M})$ + \If{$l(\mat{\theta}_{\text{temp}}) \leq l(\mat{\theta}^{(t - 1)}) - c \delta \|\nabla_{\mat{\theta}} l(\mat{M})\|_F^2$} \Comment{Armijo Condition} + \State $\mat{\theta}^{(t + 1)} \leftarrow \mat{\theta}_{\text{temp}}$ \State $\delta^{(t + 1)} \leftarrow \delta$ \Break \EndIf @@ -399,7 +415,7 @@ $\ten{X}$ is a $2\times 3\times 5$ tensor, $y\in\{1, 2, ..., 6\}$ uniformly dist \begin{figure}[!ht] \centering - \includegraphics[width = \textwidth]{sim-normal-20221012.png} + \includegraphics[width = \textwidth]{images/sim-normal-20221012.png} \caption{\label{fig:sim-normal}Simulation Normal} \end{figure} @@ -407,7 +423,7 @@ $\ten{X}$ is a $2\times 3\times 5$ tensor, $y\in\{1, 2, ..., 6\}$ uniformly dist \begin{figure}[!ht] \centering - \includegraphics[width = \textwidth]{sim-ising-small-20221012.png} + \includegraphics[width = \textwidth]{images/sim-ising-small-20221012.png} \caption{\label{fig:sim-ising-small}Simulation Ising Small} \end{figure} @@ -433,7 +449,7 @@ where each individual block is given by For example $\mathcal{J}_{1,2} = -\frac{\partial l(\Theta)}{\partial\t{(\vec{\overline{\ten{\eta}}_1})}\partial(\vec{\mat{\alpha}_1})}$ and $\mathcal{J}_{2r + 1, 2r + 1} = -\H l(\mat{\Omega}_r)$. We start by restating the log-likelihood for a given single observation $(\ten{X}, \ten{Y})$ where $\ten{F}_y$ given by \begin{displaymath} - l(\mat{\Theta}) = \log h(\ten{X}) + c_1\big\langle\overline{\ten{\eta}}_1 + \ten{F}_{y}\mlm{k\in[r]}\mat{\alpha}_k, \ten{X}\big\rangle + c_2\big\langle\ten{X}\mlm{k\in[r]}\mat{\Omega}_k, \ten{X}\big\rangle - b(\mat{\eta}_{y}) + l(\mat{\Theta}) = \log h(\ten{X}) + c_1\big\langle\overline{\ten{\eta}}_1 + \ten{F}_{y}\mlm_{k\in[r]}\mat{\alpha}_k, \ten{X}\big\rangle + c_2\big\langle\ten{X}\mlm_{k\in[r]}\mat{\Omega}_k, \ten{X}\big\rangle - b(\mat{\eta}_{y}) \end{displaymath} with \begin{align*} @@ -496,10 +512,10 @@ Now we rewrite all the above differentials to extract the derivatives one at a t % \d l(\mat{\Omega}_j) &= c_2\Big(\langle\ten{X}\times_{k\in[r]\backslash j}\mat{\Omega}_k\times_j\d\mat{\Omega}_j, \ten{X}\rangle - \D b(\mat{\eta}_{y,2})\vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big)\Big) \\ &= c_2 \t{(\vec{\ten{X}}\otimes\vec{\ten{X}} - (\ten{D}_2)_{([2r])})}\vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big) \\ - &= c_2 (\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})} \\ - &= c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\vec{\d\mat{\Omega}_j} \\ - &= c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\mat{D}_{p_j}\t{\mat{D}_{p_j}}\vec{\d\mat{\Omega}_j} \\ - &\qquad\Rightarrow \D l(\mat{\Omega}_j) = c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\mat{D}_{p_j}\t{\mat{D}_{p_j}} + &= c_2 (\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})} \\ + &= c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\vec{\d\mat{\Omega}_j} \\ + &= c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\mat{D}_{p_j}\t{\mat{D}_{p_j}}\vec{\d\mat{\Omega}_j} \\ + &\qquad\Rightarrow \D l(\mat{\Omega}_j) = c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\mat{D}_{p_j}\t{\mat{D}_{p_j}} \end{align*}}% The next step is to identify the Hessians from the second differentials in a similar manner as befor. {\allowdisplaybreaks\begin{align*} @@ -509,62 +525,62 @@ The next step is to identify the Hessians from the second differentials in a sim \qquad{\color{gray} (p \times p)} \\ &\d^2 l(\overline{\ten{\eta}}_1, \mat{\alpha}_j) \\ - &= -c_1^2 \t{\vec(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j)}\mat{H}_{1,1}\vec{\d\overline{\ten{\eta}}_1} \\ - &= -c_1^2 \t{\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)})}\mat{K}_{p,(j)}\mat{H}_{1,1}\vec{\d\overline{\ten{\eta}}_1} \\ - &= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})}((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})(\ten{H}_{1,1})_{((j, [r]\backslash j))}\vec{\d\overline{\ten{\eta}}_1} \\ - &= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} ( (\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k) \ttt_{[r]\backslash j} \ten{H}_{1,1})_{((2, 1))} \vec{\d\overline{\ten{\eta}}_1} \\ - &\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\alpha}_j})\t{\partial(\vec{\overline{\ten{\eta}}_1)}}} = -c_1^2 ( (\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k) \ttt_{[r]\backslash j} \ten{H}_{1,1})_{((2, 1))} + &= -c_1^2 \t{\vec(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j)}\mat{H}_{1,1}\vec{\d\overline{\ten{\eta}}_1} \\ + &= -c_1^2 \t{\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)})}\mat{K}_{p,(j)}\mat{H}_{1,1}\vec{\d\overline{\ten{\eta}}_1} \\ + &= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})}((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})(\ten{H}_{1,1})_{((j, [r]\backslash j))}\vec{\d\overline{\ten{\eta}}_1} \\ + &= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} ( (\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k) \ttt_{[r]\backslash j} \ten{H}_{1,1})_{((2, 1))} \vec{\d\overline{\ten{\eta}}_1} \\ + &\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\alpha}_j})\t{\partial(\vec{\overline{\ten{\eta}}_1)}}} = -c_1^2 ( (\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k) \ttt_{[r]\backslash j} \ten{H}_{1,1})_{((2, 1))} \qquad{\color{gray} (p_j q_j \times p)} \\ &\d^2 l(\overline{\ten{\eta}}_1, \mat{\Omega}_j) \\ &= -c_1 c_2 \t{\vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big)}\mat{H}_{2,1}\vec{\d\overline{\ten{\eta}}_1} \\ &= -c_1 c_2 \t{\Big[ \t{(\ten{H}_{2,1})_{([2r])}} \vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big) \Big]} \vec{\d\overline{\ten{\eta}}_1} \\ - &= -c_1 c_2 \t{\vec( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})} )} \vec{\d\overline{\ten{\eta}}_1} \\ - &= -c_1 c_2 \t{(\vec{\d\mat{\Omega}_j})} ( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} )_{(j)} \vec{\d\overline{\ten{\eta}}_1} \\ - &\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\Omega}_j})\t{\partial(\vec{\overline{\ten{\eta}}_1)}}} = -c_1 c_2 \mat{D}_{p_j}\t{\mat{D}_{p_j}}( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} )_{(j)} + &= -c_1 c_2 \t{\vec( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})} )} \vec{\d\overline{\ten{\eta}}_1} \\ + &= -c_1 c_2 \t{(\vec{\d\mat{\Omega}_j})} ( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} )_{(j)} \vec{\d\overline{\ten{\eta}}_1} \\ + &\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\Omega}_j})\t{\partial(\vec{\overline{\ten{\eta}}_1)}}} = -c_1 c_2 \mat{D}_{p_j}\t{\mat{D}_{p_j}}( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} )_{(j)} \qquad{\color{gray} (p_j^2 \times p)} \\ &\d^2 l(\mat{\alpha}_j) \\ - &= -c_1^2 \t{\vec(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j)}\mat{H}_{1,1}\vec(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j) \\ - &= -c_1^2 \t{\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)})}\mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(j)}}\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}) \\ - &= -c_1^2 \t{[((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j}]}\mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(j)}}((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j} \\ - &= -c_1^2 \t{[((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j}]}(\ten{H}_{1,1})_{((j,[r]\backslash j),(j,[r]\backslash j))}((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j} \\ - &= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})}[ ((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)\ttt_{[r]\backslash j}\ten{H}_{1,1})\ttt_{[r]\backslash j + 2,[r]\backslash j}(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k) ]_{((2,1))}\vec{\d\mat{\alpha}_j} \\ - &\qquad\Rightarrow \H l(\mat{\alpha}_j) = -c_1^2 \Big[ \left(\Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big)\ttt_{[r]\backslash j}\ten{H}_{1,1}\right)\ttt_{[r]\backslash j + 2}^{[r]\backslash j}\Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \Big]_{((2,1))} + &= -c_1^2 \t{\vec(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j)}\mat{H}_{1,1}\vec(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j) \\ + &= -c_1^2 \t{\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)})}\mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(j)}}\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}) \\ + &= -c_1^2 \t{[((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j}]}\mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(j)}}((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j} \\ + &= -c_1^2 \t{[((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j}]}(\ten{H}_{1,1})_{((j,[r]\backslash j),(j,[r]\backslash j))}((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j} \\ + &= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})}[ ((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)\ttt_{[r]\backslash j}\ten{H}_{1,1})\ttt_{[r]\backslash j + 2,[r]\backslash j}(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k) ]_{((2,1))}\vec{\d\mat{\alpha}_j} \\ + &\qquad\Rightarrow \H l(\mat{\alpha}_j) = -c_1^2 \Big[ \left(\Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big)\ttt_{[r]\backslash j}\ten{H}_{1,1}\right)\ttt_{[r]\backslash j + 2}^{[r]\backslash j}\Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \Big]_{((2,1))} \qquad{\color{gray} (p_j q_j \times p_j q_j)} \\ &\d^2 l(\mat{\alpha}_j, \mat{\alpha}_l) \\ - &\overset{\makebox[0pt]{\scriptsize $j < l$}}{=} -c_1^2 \t{\vec\Bigl(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\Bigr)}\mat{H}_{1,1}\vec\Bigl(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\times_l\d\mat{\alpha}_l\Bigr) \\ - &\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \vec\Bigl(\ten{F}_y\mlm{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\times_l\d\mat{\alpha}_l\Bigr) \\ - &= -c_1^2 \t{\vec\biggl( \d\mat{\alpha}_j \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big)_{(j)} \biggr)} \mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(l)}} \vec\biggl( \d\mat{\alpha}_l \Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big)_{(l)} \biggr) \\ - &\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \t{\mat{K}_{\mat{p},((j,l))}} \vec\biggl( (\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j) \Big( \ten{F}_y\mlm{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k \Big)_{((j,l))} \biggr) \\ - &= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big)_{(j)}\otimes\mat{I}_{p_j} \biggr) \mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(l)}} \biggl( \t{\Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big)_{(l)}}\otimes\mat{I}_{p_l} \biggr)\vec{\d\mat{\alpha}_l} \\ - &\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \t{\mat{K}_{\mat{p},((j,l))}} \biggl( \t{\Big( \ten{F}_y\mlm{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k \Big)_{((j,l))}}\otimes\mat{I}_{p_j p_l} \biggr) \vec{(\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j)} \\ - &= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big[ \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \vec{\d\mat{\alpha}_l} \\ - &\qquad + c_1 \vec\biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm{k\neq j,l}\mat{\alpha}_k \Big) \biggr) \vec{(\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j)} \\ - &= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big[ \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \vec{\d\mat{\alpha}_l} \\ - &\qquad + c_1 \t{(\vec{\d\mat{\alpha}_j})} \biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm{k\neq j,l}\mat{\alpha}_k \Big) \biggr)_{((1,3))} \vec{\d\mat{\alpha}_l} \\ + &\overset{\makebox[0pt]{\scriptsize $j < l$}}{=} -c_1^2 \t{\vec\Bigl(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\Bigr)}\mat{H}_{1,1}\vec\Bigl(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\times_l\d\mat{\alpha}_l\Bigr) \\ + &\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \vec\Bigl(\ten{F}_y\mlm_{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\times_l\d\mat{\alpha}_l\Bigr) \\ + &= -c_1^2 \t{\vec\biggl( \d\mat{\alpha}_j \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big)_{(j)} \biggr)} \mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(l)}} \vec\biggl( \d\mat{\alpha}_l \Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big)_{(l)} \biggr) \\ + &\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \t{\mat{K}_{\mat{p},((j,l))}} \vec\biggl( (\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j) \Big( \ten{F}_y\mlm_{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k \Big)_{((j,l))} \biggr) \\ + &= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big)_{(j)}\otimes\mat{I}_{p_j} \biggr) \mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(l)}} \biggl( \t{\Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big)_{(l)}}\otimes\mat{I}_{p_l} \biggr)\vec{\d\mat{\alpha}_l} \\ + &\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \t{\mat{K}_{\mat{p},((j,l))}} \biggl( \t{\Big( \ten{F}_y\mlm_{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k \Big)_{((j,l))}}\otimes\mat{I}_{p_j p_l} \biggr) \vec{(\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j)} \\ + &= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big[ \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \vec{\d\mat{\alpha}_l} \\ + &\qquad + c_1 \vec\biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm_{k\neq j,l}\mat{\alpha}_k \Big) \biggr) \vec{(\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j)} \\ + &= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big[ \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \vec{\d\mat{\alpha}_l} \\ + &\qquad + c_1 \t{(\vec{\d\mat{\alpha}_j})} \biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm_{k\neq j,l}\mat{\alpha}_k \Big) \biggr)_{((1,3))} \vec{\d\mat{\alpha}_l} \\ &\qquad \begin{aligned} \Rightarrow \frac{\partial l}{\partial(\vec{\mat{\alpha}_j})\t{\partial(\vec{\mat{\alpha}_l})}} &= - -c_1^2 \biggl( \Big[ \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \\ - &\qquad + c_1 \biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm{k\neq j,l}\mat{\alpha}_k \Big) \biggr)_{((1,3) + [[j > l]])} + -c_1^2 \biggl( \Big[ \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \\ + &\qquad + c_1 \biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm_{k\neq j,l}\mat{\alpha}_k \Big) \biggr)_{((1,3) + [[j > l]])} \qquad{\color{gray} (p_j q_j \times p_l q_l)} \end{aligned} \\ &\d^2 l(\mat{\alpha}_j, \mat{\Omega}_l) \\ - &= -c_1 c_2 \t{\vec\Bigl(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\Bigr)} \mat{H}_{1,2} \vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr) \\ - &= -c_1 c_2 \t{\vec\biggl(\d\mat{\alpha}_j\Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big)_{(j)}\biggr)}\mat{K}_{\mat{p},(j)} \t{(\ten{H}_{2,1})_{([2r])}} \vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr) \\ - &= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl(\t{\Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big)_{(j)}}\otimes\mat{I}_{p_j}\biggr) \mat{K}_{\mat{p},(j)} \vec\Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm{k\neq l}\t{(\vec{\mat{\Omega}_k})}\times_l\t{(\vec{\d\mat{\Omega}_l})}\Bigr) \\ - &= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl(\t{\Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big)_{(j)}}\otimes\mat{I}_{p_j}\biggr) \mat{K}_{\mat{p},(j)} \t{\Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr)_{([r])}}\vec{\d\mat{\Omega}_l} \\ - &= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl( \Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{(r + 2, 1)} \vec{\d\mat{\Omega}_l} \\ - &\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\alpha}_j})\t{\partial(\vec{\mat{\Omega}_l})}} = -c_1 c_2 \biggl( \Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{(r + 2, 1)}\mat{D}_{p_l}\t{\mat{D}_{p_l}} + &= -c_1 c_2 \t{\vec\Bigl(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\Bigr)} \mat{H}_{1,2} \vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr) \\ + &= -c_1 c_2 \t{\vec\biggl(\d\mat{\alpha}_j\Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big)_{(j)}\biggr)}\mat{K}_{\mat{p},(j)} \t{(\ten{H}_{2,1})_{([2r])}} \vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr) \\ + &= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl(\t{\Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big)_{(j)}}\otimes\mat{I}_{p_j}\biggr) \mat{K}_{\mat{p},(j)} \vec\Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm_{k\neq l}\t{(\vec{\mat{\Omega}_k})}\times_l\t{(\vec{\d\mat{\Omega}_l})}\Bigr) \\ + &= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl(\t{\Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big)_{(j)}}\otimes\mat{I}_{p_j}\biggr) \mat{K}_{\mat{p},(j)} \t{\Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm_{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr)_{([r])}}\vec{\d\mat{\Omega}_l} \\ + &= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl( \Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm_{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{(r + 2, 1)} \vec{\d\mat{\Omega}_l} \\ + &\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\alpha}_j})\t{\partial(\vec{\mat{\Omega}_l})}} = -c_1 c_2 \biggl( \Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm_{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{(r + 2, 1)}\mat{D}_{p_l}\t{\mat{D}_{p_l}} % \qquad {\color{gray} (p_j q_j \times p_l^2)} \\ &\d^2 l(\mat{\Omega}_j) \\ &= -c_2^2 \t{\vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr)} \t{(\ten{H}_{2,2})_{([2r],[2r]+2r)}} \vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr) \\ - &= -c_2^2 \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})}\times_{j + r}\t{(\vec{\d\mat{\Omega}_j})} \\ - &= -c_2^2 \t{(\vec{\d\mat{\Omega}_j})} \biggl( \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})} \biggr)_{([r])} \vec{\d\mat{\Omega}_j} \\ - &\qquad\Rightarrow \H l(\mat{\Omega}_j) = -c_2^2 \mat{D}_{p_j}\t{\mat{D}_{p_j}}\biggl( \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})} \biggr)_{([r])}\mat{D}_{p_j}\t{\mat{D}_{p_j}} + &= -c_2^2 \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm_{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})}\times_{j + r}\t{(\vec{\d\mat{\Omega}_j})} \\ + &= -c_2^2 \t{(\vec{\d\mat{\Omega}_j})} \biggl( \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm_{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})} \biggr)_{([r])} \vec{\d\mat{\Omega}_j} \\ + &\qquad\Rightarrow \H l(\mat{\Omega}_j) = -c_2^2 \mat{D}_{p_j}\t{\mat{D}_{p_j}}\biggl( \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm_{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})} \biggr)_{([r])}\mat{D}_{p_j}\t{\mat{D}_{p_j}} %\qquad {\color{gray} (p_j^2 \times p_j^2)} \\ &\d^2 l(\mat{\Omega}_j, \mat{\Omega}_l) \\ @@ -573,13 +589,13 @@ The next step is to identify the Hessians from the second differentials in a sim &\qquad\qquad - c_2 \D b(\mat{\eta}_{y,2})\vec\!\Big(\bigotimes_{k = r}^{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_{l}\otimes\bigotimes_{k = l - 1}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_{j}\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big) \\ &= c_2 \t{(\vec{\ten{X}}\otimes\vec{\ten{X}} - (\ten{D}_2)_{([2r])})} \vec\Bigl(\bigotimes_{k = r}^{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_{l}\otimes\bigotimes_{k = l - 1}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_{j}\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Bigr) \\ &\qquad - c_2^2 \t{\vec\!\Big(\bigotimes_{k = r}^{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigotimes_{k=l-1}^{1}\mat{\Omega}_k\Big)}\t{(\ten{H}_{2,2})_{([2r],[2r]+2r)}}\vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big) \\ - &= c_2 (\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \times_j \t{(\vec{\d\mat{\Omega}_j})} \times_l \t{(\vec{\d\mat{\Omega}_l})} \\ - &\qquad - c_2^2 \ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})} \times_j \t{(\vec{\d\mat{\Omega}_j})} \times_l \t{(\vec{\d\mat{\Omega}_l})} \\ - &= c_2 \t{(\vec{\d\mat{\Omega}_j})}\Big((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)}\vec{\d\mat{\Omega}_l} \\ - &\qquad - c_2^2 \t{(\vec{\d\mat{\Omega}_j})}\Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)}\vec{\d\mat{\Omega}_l} \\ + &= c_2 (\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm_{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \times_j \t{(\vec{\d\mat{\Omega}_j})} \times_l \t{(\vec{\d\mat{\Omega}_l})} \\ + &\qquad - c_2^2 \ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm_{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm_{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})} \times_j \t{(\vec{\d\mat{\Omega}_j})} \times_l \t{(\vec{\d\mat{\Omega}_l})} \\ + &= c_2 \t{(\vec{\d\mat{\Omega}_j})}\Big((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm_{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)}\vec{\d\mat{\Omega}_l} \\ + &\qquad - c_2^2 \t{(\vec{\d\mat{\Omega}_j})}\Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm_{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm_{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)}\vec{\d\mat{\Omega}_l} \\ &\qquad \begin{aligned}\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\Omega}_j})\t{\partial(\vec{\mat{\Omega}_l})}} &= - \mat{D}_{p_j}\t{\mat{D}_{p_j}}\Big[c_2\Big((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)} \\ - &\qquad -c_2^2 \Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)}\Big]\mat{D}_{p_l}\t{\mat{D}_{p_l}} + \mat{D}_{p_j}\t{\mat{D}_{p_j}}\Big[c_2\Big((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm_{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)} \\ + &\qquad -c_2^2 \Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm_{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm_{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)}\Big]\mat{D}_{p_l}\t{\mat{D}_{p_l}} % \qquad {\color{gray} (p_j^2 \times p_l^2)} \end{aligned} \end{align*}}% @@ -612,15 +628,15 @@ and for every block holds $\mathcal{I}_{j, l} = \t{\mathcal{I}_{l, j}}$. The ind \begin{align*} \mathcal{I}_{1,1} &= c_1^2 (\ten{H}_{1,1})_{([r])} \\ \mathcal{I}_{1,j+1} % = \E\partial_{\vec{\overline{\ten{\eta}}_1}}\partial_{\t{(\vec{\mat{\alpha}_j})}} l(\mat{\Theta})\mid \ten{Y} = y - &= c_1^2 \Big[\Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1}\Big]_{((2, 1))} \\ + &= c_1^2 \Big[\Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1}\Big]_{((2, 1))} \\ \mathcal{I}_{1,j+r+1} - &= c_1 c_2 \Big( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)} \\ + &= c_1 c_2 \Big( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)} \\ \mathcal{I}_{j+1,l+1} - &= c_1^2 \biggl( \Big[ \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \\ + &= c_1^2 \biggl( \Big[ \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \\ \mathcal{I}_{j+1,l+r+1} - &= c_1 c_2 \biggl( \Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{((r + 2, 1))} \\ + &= c_1 c_2 \biggl( \Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm_{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{((r + 2, 1))} \\ \mathcal{I}_{j+r+1,l+r+1} - &= c_2^2 \Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)} + &= c_2^2 \Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm_{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm_{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)} \end{align*} @@ -633,14 +649,14 @@ The \emph{matricization} is a generalization of the \emph{vectorization} operati \begin{theorem}\label{thm:mlm_mat} Let $\ten{A}$ be a tensor of order $r$ with the dimensions $q_1\times ... \times q_r$. Furthermore, let for $k = 1, ..., r$ be $\mat{B}_k$ matrices of dimensions $p_k\times q_k$. Then, for any $(\mat{i}, \mat{j})\in\perm{r}$ holds \begin{displaymath} - \Big(\ten{A}\mlm{k\in[r]}\mat{B}_k\Big)_{(\mat{i}, \mat{j})} + \Big(\ten{A}\mlm_{k\in[r]}\mat{B}_k\Big)_{(\mat{i}, \mat{j})} = \Big(\bigotimes_{k = \len{\mat{i}}}^{1}\mat{B}_{\mat{i}_k}\Big) \ten{A}_{(\mat{i}, \mat{j})} \Big(\bigotimes_{k = \len{\mat{j}}}^{1}\t{\mat{B}_{\mat{j}_k}}\Big). \end{displaymath} \end{theorem} A well known special case of Theorem~\ref{thm:mlm_mat} is the relation between vectorization and the Kronecker product \begin{displaymath} - \vec(\mat{B}_1\mat{A}\t{\mat{B}_2}) = (\mat{B}_2\otimes\mat{B}_1)\vec{A}. + \vec(\mat{B}_1\mat{A}\t{\mat{B}_2}) = (\mat{B}_2\otimes\mat{B}_1)\vec{\mat{A}}. \end{displaymath} Here we have a matrix, a.k.a. an order 2 tensor, and the vectorization as a special case of the matricization $\vec{\mat{A}} = \mat{A}_{((1, 2))}$ with $(\mat{i}, \mat{j}) = ((1, 2), ())\in\perm{2}$. Note that the empty Kronecker product is $1$ by convention. @@ -713,13 +729,37 @@ The operation $\ten{R}_{\mat{i}}(\ten{A})$ results in a tensor of order $r + s$ Let $\ten{A}$ be a $2 r + s$ tensor where $r > 0$ and $s \geq 0$ of dimensions $q_1\times ... \times q_{2 r + s}$. Furthermore, let $(\mat{i}, \mat{j})\in\perm{2 r + s}$ such that $\len{\mat{i}} = 2 r$ and for $k = 1, ..., r$ denote with $\mat{B}_k$ matrices of dimensions $q_{\mat{i}_{k}}\times q_{\mat{i}_{r + k}}$, then \begin{displaymath} \t{\ten{A}_{(\mat{i})}}\vec{\bigotimes_{k = r}^{1}}\mat{B}_k - \equiv \ten{R}_{\mat{i}}(\ten{A})\times_{k\in[r]}\t{(\vec{\mat{B}_k})}. + \equiv \ten{R}_{\mat{i}}(\ten{A})\mlm_{k = 1}^r\t{(\vec{\mat{B}_k})}. \end{displaymath} \end{theorem} +A special case of above Theorem is given for tensors represented as a Kronecker product. Therefore, let $\mat{A}_k, \mat{B}_k$ be arbitrary matrices of size $p_k\times q_k$ for $k = 1, ..., r$ and $\ten{A} = \reshape{(\mat{p}, \mat{q})}\bigotimes_{k = r}^{1}\mat{A}_k$. Then Theorem~\ref{thm:mtvk_rearrange} specializes to +\begin{displaymath} + \t{\Big( \vec\bigotimes_{k = r}^{1}\mat{A}_k \Big)}\Big( \vec\bigotimes_{k = r}^{1}\mat{B}_k \Big) + = + \prod_{k = 1}^{r}\tr(\t{\mat{A}_k}\mat{B}_k) + = + \Big( \outer{k = 1}{r}\vec\mat{A}_k \Big)\mlm_{k = 1}^r \t{(\vec\mat{B}_k)}. +\end{displaymath} +In case of $r = 2$ this means +\begin{align*} + \t{\vec(\mat{A}_1\otimes \mat{A}_2)}\vec(\mat{B}_1\otimes \mat{B}_2) + &= \t{(\vec{\mat{B}_1})}(\vec{\mat{A}_1})\t{(\vec{\mat{A}_2})}(\vec{\mat{B}_2}) \\ + &= [(\vec{\mat{A}_1})\circ(\vec{\mat{A}_2})]\times_1\t{(\vec{\mat{B}_1})}\times_2\t{(\vec{\mat{B}_2})}. +\end{align*} + +Another interesting special case is for two tensors $\ten{A}_1, \ten{A}_2$ of the same order +\begin{displaymath} + \t{(\vec{\ten{A}_1}\otimes\vec{\ten{A}_2})}\vec{\bigotimes_{k = r}^{1}\mat{B}_k} + = (\ten{A}_1\otimes\ten{A}_2)\mlm_{k = 1}^r\t{(\vec{\mat{B}_k})} +\end{displaymath} +which uses the relation $\ten{R}_{[2r]}^{(\mat{p}, \mat{q})}(\vec{\ten{A}_1}\otimes\vec{\ten{A}_2}) = \ten{A}_1\otimes\ten{A}_2$ . + \todo{continue} + + % Next we define a specific axis permutation and reshaping operation on tensors which will be helpful in expressing some derivatives. Let $\ten{A}$ be a $2 r + s$ tensor with $r > 0$ and $s\geq 0$ of dimensions $p_1\times ... \times p_{2 r + s}$. Furthermore, let $(\mat{i}, \mat{j})\in\perm{2 r + s}$ such that $\len{\mat{i}} = 2 r$. The operation $\ten{R}_{\mat{i}}$ is defined as % \begin{displaymath} % \ten{R}_{\mat{i}} = \reshape{(p_1 p_{r + 1}, ..., p_r p_{2 r}, p_{2 r + 1}, ..., p_{r + s})}(\ten{A}_{(\pi(\mat{i}))}) @@ -880,19 +920,19 @@ The operation $\ten{R}_{\mat{i}}(\ten{A})$ results in a tensor of order $r + s$ \end{tikzpicture} \end{center} -\newcommand{\somedrawing} { - \coordinate (a) at (-2,-2,-2); - \coordinate (b) at (-2,-2,2); - \coordinate (c) at (-2,2,-2); - \coordinate (d) at (-2,2,2); - \coordinate (e) at (2,-2,-2); - \coordinate (f) at (2,-2,2); - \coordinate (g) at (2,2,-2); - \coordinate (h) at (2,2,2); - \draw (a)--(b) (a)--(c) (a)--(e) (b)--(d) (b)--(f) (c)--(d) (c)--(g) (d)--(h) (e)--(f) (e)--(g) (f)--(h) (g)--(h); - \fill (a) circle (0.1cm); - \fill (d) ++(0.1cm,0.1cm) rectangle ++(-0.2cm,-0.2cm); -} +% \newcommand{\somedrawing}{ +% \coordinate (a) at (-2,-2,-2); +% \coordinate (b) at (-2,-2,2); +% \coordinate (c) at (-2,2,-2); +% \coordinate (d) at (-2,2,2); +% \coordinate (e) at (2,-2,-2); +% \coordinate (f) at (2,-2,2); +% \coordinate (g) at (2,2,-2); +% \coordinate (h) at (2,2,2); +% \draw (a)--(b) (a)--(c) (a)--(e) (b)--(d) (b)--(f) (c)--(d) (c)--(g) (d)--(h) (e)--(f) (e)--(g) (f)--(h) (g)--(h); +% \fill (a) circle (0.1cm); +% \fill (d) ++(0.1cm,0.1cm) rectangle ++(-0.2cm,-0.2cm); +% } % \begin{figure}[ht!] % \centering @@ -1153,7 +1193,7 @@ where $\circ$ is the outer product. For example considure two matrices $\mat{A}, %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Let $f$ be an $r$ times differentiable function, then \begin{displaymath} - \d^r f(\mat{X}) = \ten{F}(\mat{X})\mlm{k = 1}{r} \vec{\d\mat{X}} + \d^r f(\mat{X}) = \ten{F}(\mat{X})\mlm_{k = 1}^{r} \vec{\d\mat{X}} \qquad\Leftrightarrow\qquad \D^r f(\mat{X}) \equiv \frac{1}{r!}\sum_{\sigma\in\perm{r}}\ten{F}(\mat{X})_{(\sigma)} \end{displaymath} @@ -1372,12 +1412,12 @@ The differentials up to the 4'th are \begin{align*} \d M(t) &= M(t) \t{(\mu + \Sigma t)} \d{t} \\ \d^2 M(t) &= \t{\d{t}} M(t) (\mu + \Sigma t)\t{(\mu + \Sigma t)} \d{t} \\ - \d^3 M(t) &= M(t) (\mu + \Sigma t)\circ [(\mu + \Sigma t)\circ (\mu + \Sigma t) + 3\Sigma]\mlm{k = 1}{3} \d{t} \\ - \d^4 M(t) &= M(t) (\mu + \Sigma t)\circ(\mu + \Sigma t)\circ[(\mu + \Sigma t)\circ(\mu + \Sigma t) + 6\Sigma)]\mlm{k = 1}{4} \d{t} + \d^3 M(t) &= M(t) (\mu + \Sigma t)\circ [(\mu + \Sigma t)\circ (\mu + \Sigma t) + 3\Sigma]\mlm_{k = 1}^{3} \d{t} \\ + \d^4 M(t) &= M(t) (\mu + \Sigma t)\circ(\mu + \Sigma t)\circ[(\mu + \Sigma t)\circ(\mu + \Sigma t) + 6\Sigma)]\mlm_{k = 1}^{4} \d{t} \end{align*} Using the differentials to derivative identification identity \begin{displaymath} - \d^m f(t) = \ten{F}(t)\mlm{k = 1}{m}\d{t} + \d^m f(t) = \ten{F}(t)\mlm_{k = 1}^{m}\d{t} \qquad\Leftrightarrow\qquad \D^m f(t) \equiv \frac{1}{m!}\sum_{\sigma\in\mathfrak{S}_m}\ten{F}(t)_{(\sigma)} \end{displaymath} @@ -1388,7 +1428,7 @@ in conjunction with simplifications gives the first four raw moments by evaluati M_3 = \D^3 M(t)|_{t = 0} &= \mu\circ\mu\circ\mu + \mu\circ\Sigma + (\mu\circ\Sigma)_{((2), (1), (3))} + \Sigma\circ\mu \\ M_4 = \D^4 M(t)|_{t = 0} &\equiv \frac{1}{4!}\sum_{\sigma\in\mathfrak{S}_4} (\mu\circ\mu\circ\Sigma + \Sigma\circ\Sigma + \Sigma\circ\mu\circ\mu)_{(\sigma)} \end{align*} -which leads to the centered moments (which are also the covariances of the sufficient statistic $t(X)$) +which leads to the centered moments (which are also the covariance of the sufficient statistic $t(X)$) \begin{align*} H_{1,1} &= \cov(t_1(X)\mid Y = y) \\ &= \Sigma \\ diff --git a/LaTeX/main.bib b/LaTeX/main.bib index 55d7d2f..8e15903 100644 --- a/LaTeX/main.bib +++ b/LaTeX/main.bib @@ -9,6 +9,17 @@ publisher = {[Royal Statistical Society, Wiley]} } +@inproceedings{Nesterov1983, + title = {A method of solving a convex programming problem with convergence rate $O(1/k^2)$}, + author = {Nesterov, Yurii Evgen'evich}, + booktitle = {Doklady Akademii Nauk}, + volume = {269}, + number = {3}, + pages = {543--547}, + year = {1983}, + organization= {Russian Academy of Sciences} +} + @book{StatInf-CasellaBerger2002, title = {{Statistical Inference}}, author = {Casella, George and Berger, Roger L.}, @@ -27,6 +38,20 @@ isbn = {0-471-98632-1} } +@article{SymMatandJacobians-MagnusNeudecker1986, + title = {Symmetry, 0-1 Matrices and Jacobians: A Review}, + author = {Magnus, Jan R. and Neudecker, Heinz}, + ISSN = {02664666, 14694360}, + URL = {http://www.jstor.org/stable/3532421}, + journal = {Econometric Theory}, + number = {2}, + pages = {157--190}, + publisher = {Cambridge University Press}, + urldate = {2023-10-03}, + volume = {2}, + year = {1986} +} + @book{MatrixAlgebra-AbadirMagnus2005, title = {Matrix Algebra}, author = {Abadir, Karim M. and Magnus, Jan R.}, @@ -83,6 +108,31 @@ doi = {10.1080/01621459.2015.1093944} } +@article{FisherLectures-Cook2007, + author = {Cook, R. Dennis}, + journal = {Statistical Science}, + month = {02}, + number = {1}, + pages = {1--26}, + publisher = {The Institute of Mathematical Statistics}, + title = {{Fisher Lecture: Dimension Reduction in Regression}}, + volume = {22}, + year = {2007}, + doi = {10.1214/088342306000000682} +} + +@article{asymptoticMLE-BuraEtAl2018, + author = {Bura, Efstathia and Duarte, Sabrina and Forzani, Liliana and E. Smucler and M. Sued}, + title = {Asymptotic theory for maximum likelihood estimates in reduced-rank multivariate generalized linear models}, + journal = {Statistics}, + volume = {52}, + number = {5}, + pages = {1005-1024}, + year = {2018}, + publisher = {Taylor \& Francis}, + doi = {10.1080/02331888.2018.1467420}, +} + @article{tsir-DingCook2015, author = {Shanshan Ding and R. Dennis Cook}, title = {Tensor sliced inverse regression}, @@ -117,3 +167,106 @@ isbn = {978-94-015-8196-7}, doi = {10.1007/978-94-015-8196-7_17} } + +@book{asymStats-van_der_Vaart1998, + title = {Asymptotic Statistics}, + author = {{van der Vaart}, A.W.}, + series = {Asymptotic Statistics}, + year = {1998}, + publisher = {Cambridge University Press}, + series = {Cambridge Series in Statistical and Probabilistic Mathematics}, + isbn = {0-521-49603-9} +} + +@book{measureTheory-Kusolitsch2011, + title = {{M}a\ss{}- und {W}ahrscheinlichkeitstheorie}, + subtitle = {{E}ine {E}inf{\"u}hrung}, + author = {Kusolitsch, Norbert}, + series = {Springer-Lehrbuch}, + year = {2011}, + publisher = {Springer Vienna}, + isbn = {978-3-7091-0684-6}, + doi = {10.1007/978-3-7091-0685-3} +} + +@book{optimMatrixMani-AbsilEtAl2007, + title = {{Optimization Algorithms on Matrix Manifolds}}, + author = {Absil, P.-A. and Mahony, R. and Sepulchre, R.}, + year = {2007}, + publisher = {Princeton University Press}, + isbn = {9780691132983}, + note = {Full Online Text \url{https://press.princeton.edu/absil}} +} + +@Inbook{geomMethodsOnLowRankMat-Uschmajew2020, + author = {Uschmajew, Andr{\'e} and Vandereycken, Bart}, + editor = {Grohs, Philipp and Holler, Martin and Weinmann, Andreas}, + title = {Geometric Methods on Low-Rank Matrix and Tensor Manifolds}, + bookTitle = {Handbook of Variational Methods for Nonlinear Geometric Data}, + year = {2020}, + publisher = {Springer International Publishing}, + address = {Cham}, + pages = {261--313}, + isbn = {978-3-030-31351-7}, + doi = {10.1007/978-3-030-31351-7_9} +} + +@book{introToSmoothMani-Lee2012, + title = {Introduction to Smooth Manifolds}, + author = {Lee, John M.}, + year = {2012}, + journal = {Graduate Texts in Mathematics}, + publisher = {Springer New York}, + doi = {10.1007/978-1-4419-9982-5} +} + +@book{introToRiemannianMani-Lee2018, + title = {Introduction to Riemannian Manifolds}, + author = {Lee, John M.}, + year = {2018}, + journal = {Graduate Texts in Mathematics}, + publisher = {Springer International Publishing}, + doi = {10.1007/978-3-319-91755-9} +} + +@misc{MLEonManifolds-HajriEtAl2017, + title = {Maximum Likelihood Estimators on Manifolds}, + author = {Hajri, Hatem and Said, Salem and Berthoumieu, Yannick}, + year = {2017}, + journal = {Lecture Notes in Computer Science}, + publisher = {Springer International Publishing}, + pages = {692-700}, + doi = {10.1007/978-3-319-68445-1_80} +} + +@article{relativity-Einstain1916, + author = {Einstein, Albert}, + title = {Die Grundlage der allgemeinen Relativitätstheorie}, + year = {1916}, + journal = {Annalen der Physik}, + volume = {354}, + number = {7}, + pages = {769-822}, + doi = {10.1002/andp.19163540702} +} + +@article{MultilinearOperators-Kolda2006, + title = {Multilinear operators for higher-order decompositions.}, + author = {Kolda, Tamara Gibson}, + doi = {10.2172/923081}, + url = {https://www.osti.gov/biblio/923081}, + place = {United States}, + year = {2006}, + month = {4}, + type = {Technical Report} +} + +@book{aufbauAnalysis-kaltenbaeck2021, + title = {Aufbau Analysis}, + author = {Kaltenb\"ack, Michael}, + isbn = {978-3-88538-127-3}, + series = {Berliner Studienreihe zur Mathematik}, + edition = {27}, + year = {2021}, + publisher = {Heldermann Verlag} +} diff --git a/mvbernoulli/src/ising_model.cpp b/mvbernoulli/src/ising_model.cpp index 329d22d..097b168 100644 --- a/mvbernoulli/src/ising_model.cpp +++ b/mvbernoulli/src/ising_model.cpp @@ -23,7 +23,7 @@ * * with the parameter vector `theta` and a statistic `T` of `y`. The real valued * parameter vector `theta` is of dimension `p (p + 1) / 2` and the statistic - * `T` has the same dimensions as a binary vector given by + * `T` has the same dimensions as the parameter vector given by * * T(y) = vech(y y'). * diff --git a/sim/ising.R b/sim/ising.R deleted file mode 100644 index 80e4ee5..0000000 --- a/sim/ising.R +++ /dev/null @@ -1,204 +0,0 @@ -library(tensorPredictors) -library(mvbernoulli) - -set.seed(161803399, "Mersenne-Twister", "Inversion", "Rejection") - -### simulation configuration -file.prefix <- "sim-ising" -reps <- 100 # number of simulation replications -max.iter <- 100 # maximum number of iterations for GMLM -sample.sizes <- c(100, 200, 300, 500, 750) # sample sizes `n` -N <- 2000 # validation set size -p <- c(4, 4) # preditor dimensions (ONLY 4 by 4 allowed!) -q <- c(2, 2) # response dimensions (ONLY 2 by 2 allowed!) -r <- length(p) -# parameter configuration -rho <- -0.55 -c1 <- 1 -c2 <- 1 - -# initial consistency checks -stopifnot(exprs = { - r == 2 - all.equal(p, c(4, 4)) - all.equal(q, c(2, 2)) -}) - -### small helpers -# 270 deg matrix layout rotation (90 deg clockwise) -rot270 <- function(A) t(A)[, rev(seq_len(nrow(A))), drop = FALSE] -# Auto-Regression Covariance Matrix -AR <- function(rho, dim) rho^abs(outer(seq_len(dim), seq_len(dim), `-`)) -# Inverse of the AR matrix -AR.inv <- function(rho, dim) { - A <- diag(c(1, rep(rho^2 + 1, dim - 2), 1)) - A[abs(.row(dim(A)) - .col(dim(A))) == 1] <- -rho - A / (1 - rho^2) -} -# projection matrix `P_A` as a projection onto the span of `A` -proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A))) - -### setup Ising parameters (to get reasonable parameters) -eta1 <- 0 -alphas <- Map(function(pj, qj) { # qj ignored, its 2 - linspace <- seq(-1, 1, length.out = pj) - matrix(c(linspace, linspace^2), pj, 2) -}, p, q) -Omegas <- Map(AR, dim = p, MoreArgs = list(rho)) - -# data sampling routine -sample.data <- function(n, eta1, alphas, Omegas, sample.axis = r + 1L) { - # generate response (sample axis is last axis) - y <- runif(n, -1, 1) # Y ~ U[-1, 1] - Fy <- rbind(cos(pi * y), sin(pi * y), -sin(pi * y), cos(pi * y)) - dim(Fy) <- c(2, 2, n) - - # natural exponential family parameters - eta_y1 <- c1 * (mlm(Fy, alphas) + c(eta1)) - eta_y2 <- c2 * Reduce(`%x%`, rev(Omegas)) - - # conditional Ising model parameters - theta_y <- matrix(rep(vech(eta_y2), n), ncol = n) - ltri <- which(lower.tri(eta_y2, diag = TRUE)) - diagonal <- which(diag(TRUE, nrow(eta_y2))[ltri]) - theta_y[diagonal, ] <- eta_y1 - - # Sample X from conditional distribution - X <- apply(theta_y, 2, ising_sample, n = 1) - # convert (from compressed integer vector) to array data - attr(X, "p") <- prod(p) - X <- t(as.mvbmatrix(X)) - dim(X) <- c(p, n) - storage.mode(X) <- "double" - - # permute axis to requested get the sample axis - if (sample.axis != r + 1L) { - perm <- integer(r + 1L) - perm[sample.axis] <- r + 1L - perm[-sample.axis] <- seq_len(r) - X <- aperm(X, perm) - Fy <- aperm(Fy, perm) - } - - list(X = X, Fy = Fy, y = y, sample.axis = sample.axis) -} - -### Logging Errors and Warnings -# Register a global warning and error handler for logging warnings/errors with -# current simulation repetition session informatin allowing to reproduce problems -exceptionLogger <- function(ex) { - # retrieve current simulation repetition information - rep.info <- get("rep.info", envir = .GlobalEnv) - # setup an error log file with the same name as `file` - log <- paste0(rep.info$file, ".log") - # Write (append) condition message with reproduction info to the log - cat("\n\n------------------------------------------------------------\n", - sprintf("file <- \"%s\"\nn <- %d\nrep <- %d\n.Random.seed <- c(%s)\n%s\nTraceback:\n", - rep.info$file, rep.info$n, rep.info$rep, - paste(rep.info$.Random.seed, collapse = ","), - as.character.error(ex) - ), sep = "", file = log, append = TRUE) - # add Traceback (see: `traceback()` which the following is addapted from) - n <- length(x <- .traceback(NULL, max.lines = -1L)) - if (n == 0L) { - cat("No traceback available", "\n", file = log, append = TRUE) - } else { - for (i in 1L:n) { - xi <- x[[i]] - label <- paste0(n - i + 1L, ": ") - m <- length(xi) - srcloc <- if (!is.null(srcref <- attr(xi, "srcref"))) { - srcfile <- attr(srcref, "srcfile") - paste0(" at ", basename(srcfile$filename), "#", srcref[1L]) - } - if (isTRUE(attr(xi, "truncated"))) { - xi <- c(xi, " ...") - m <- length(xi) - } - if (!is.null(srcloc)) { - xi[m] <- paste0(xi[m], srcloc) - } - if (m > 1) { - label <- c(label, rep(substr(" ", 1L, - nchar(label, type = "w")), m - 1L)) - } - cat(paste0(label, xi), sep = "\n", file = log, append = TRUE) - } - } -} -globalCallingHandlers(list( - message = exceptionLogger, warning = exceptionLogger, error = exceptionLogger -)) - - -### for every sample size -start <- format(Sys.time(), "%Y%m%dT%H%M") -for (n in sample.sizes) { - ### write new simulation result file - file <- paste0(paste(file.prefix, start, n, sep = "-"), ".csv") - # CSV header, used to ensure correct value/column mapping when writing to file - header <- outer( - c("dist.subspace", "dist.projection", "error.pred"), # measures - c("gmlm", "pca", "hopca", "tsir"), # methods - paste, sep = ".") - cat(paste0(header, collapse = ","), "\n", sep = "", file = file) - - ### repeated simulation - for (rep in seq_len(reps)) { - ### Repetition session state info - # Stores specific session variables before starting the current - # simulation replication. This allows to log state information which - # can be used to replicate a specific simulation repetition in case of - # errors/warnings from the logs - rep.info <- list(n = n, rep = rep, file = file, .Random.seed = .Random.seed) - - ### sample (training) data - c(X, Fy, y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas) - - ### Fit data using different methods - fit.gmlm <- GMLM.default(X, Fy, sample.axis = sample.axis, - max.iter = max.iter, family = "ising") - fit.hopca <- HOPCA(X, npc = q, sample.axis = sample.axis) - fit.pca <- prcomp(mat(X, sample.axis), rank. = prod(q)) - fit.tsir <- NA # TSIR(X, y, q, sample.axis = sample.axis) - - ### Compute Reductions `B.*` where `B.*` spans the reduction subspaces - B.true <- Reduce(`%x%`, rev(alphas)) - B.gmlm <- with(fit.gmlm, Reduce(`%x%`, rev(alphas))) - B.hopca <- Reduce(`%x%`, rev(fit.hopca)) - B.pca <- fit.pca$rotation - B.tsir <- NA # Reduce(`%x%`, rev(fit.tsir)) - - # Subspace Distances: Normalized `|| P_A - P_B ||_F` where - # `P_A = A (A' A)^-1/2 A'` and the normalization means that with - # respect to the dimensions of `A, B` the subspace distance is in the - # range `[0, 1]`. - dist.subspace.gmlm <- dist.subspace(B.true, B.gmlm, normalize = TRUE) - dist.subspace.hopca <- dist.subspace(B.true, B.hopca, normalize = TRUE) - dist.subspace.pca <- dist.subspace(B.true, B.pca, normalize = TRUE) - dist.subspace.tsir <- NA # dist.subspace(B.true, B.tsir, normalize = TRUE) - - # Projection Distances: Spectral norm (2-norm) `|| P_A - P_B ||_2`. - dist.projection.gmlm <- dist.projection(B.true, B.gmlm) - dist.projection.hopca <- dist.projection(B.true, B.hopca) - dist.projection.pca <- dist.projection(B.true, B.pca) - dist.projection.tsir <- NA # dist.projection(B.true, B.tsir) - - ### Prediction Errors: (using new independend sample of size `N`) - c(X, Fy, y, sample.axis) %<-% sample.data(N, eta1, alphas, Omegas) - # centered model matrix of vectorized `X`s - vecX <- scale(mat(X, sample.axis), center = TRUE, scale = FALSE) - P.true <- proj(B.true) - error.pred.gmlm <- norm(P.true - proj(B.gmlm), "2") - error.pred.hopca <- norm(P.true - proj(B.hopca), "2") - error.pred.pca <- norm(P.true - proj(B.pca), "2") - error.pred.tsir <- NA # norm(P.true - proj(B.tsir), "2") - - # format estimation/prediction errors and write to file and console - line <- paste0(Map(get, header), collapse = ",") - cat(line, "\n", sep = "", file = file, append = TRUE) - # report progress - cat(sprintf("sample size: %d/%d - rep: %d/%d\n", - which(n == sample.sizes), length(sample.sizes), rep, reps)) - } -} diff --git a/sim/ising_2.R b/sim/ising_2.R deleted file mode 100644 index 58be1e0..0000000 --- a/sim/ising_2.R +++ /dev/null @@ -1,134 +0,0 @@ -library(tensorPredictors) -library(mvbernoulli) - -set.seed(141421356, "Mersenne-Twister", "Inversion", "Rejection") - -### simulation configuration -reps <- 100 # number of simulation replications -max.iter <- 1000 # maximum number of iterations for GMLM -n <- 100 # sample sizes `n` -N <- 2000 # validation set size -p <- c(4, 4) # preditor dimensions (ONLY 4 by 4 allowed!) -q <- c(2, 2) # response dimensions (ONLY 2 by 2 allowed!) -r <- length(p) -# parameter configuration -rho <- -0.55 -c1 <- 1 -c2 <- 1 - -# initial consistency checks -stopifnot(exprs = { - r == 2 - all.equal(p, c(4, 4)) - all.equal(q, c(2, 2)) -}) - -### small helpers -# 270 deg matrix layout rotation (90 deg clockwise) -rot270 <- function(A) t(A)[, rev(seq_len(nrow(A))), drop = FALSE] -# Auto-Regression Covariance Matrix -AR <- function(rho, dim) rho^abs(outer(seq_len(dim), seq_len(dim), `-`)) -# Inverse of the AR matrix -AR.inv <- function(rho, dim) { - A <- diag(c(1, rep(rho^2 + 1, dim - 2), 1)) - A[abs(.row(dim(A)) - .col(dim(A))) == 1] <- -rho - A / (1 - rho^2) -} -# projection matrix `P_A` as a projection onto the span of `A` -proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A))) - -### setup Ising parameters (to get reasonable parameters) -eta1 <- 0 -# alphas <- Map(function(pj, qj) { # qj ignored, its 2 -# linspace <- seq(-1, 1, length.out = pj) -# matrix(c(linspace, rev(linspace)), pj, 2) -# }, p, q) -alphas <- Map(function(pj, qj) { # qj ignored, its 2 - linspace <- seq(-1, 1, length.out = pj) - matrix(c(linspace, linspace^2), pj, 2) -}, p, q) -# alphas <- Map(function(pj, qj) { -# qr.Q(qr(matrix(rnorm(pj * qj), pj, qj))) -# }, p, q) -Omegas <- Map(AR, dim = p, MoreArgs = list(rho)) - -# data sampling routine -sample.data <- function(n, eta1, alphas, Omegas, sample.axis = r + 1L) { - # generate response (sample axis is last axis) - y <- runif(n, -1, 1) # Y ~ U[-1, 1] - Fy <- rbind(cos(pi * y), sin(pi * y), -sin(pi * y), cos(pi * y)) - dim(Fy) <- c(2, 2, n) - - # natural exponential family parameters - eta_y1 <- c1 * (mlm(Fy, alphas) + c(eta1)) - eta_y2 <- c2 * Reduce(`%x%`, rev(Omegas)) - - # conditional Ising model parameters - theta_y <- matrix(rep(vech(eta_y2), n), ncol = n) - ltri <- which(lower.tri(eta_y2, diag = TRUE)) - diagonal <- which(diag(TRUE, nrow(eta_y2))[ltri]) - theta_y[diagonal, ] <- eta_y1 - - # Sample X from conditional distribution - X <- apply(theta_y, 2, ising_sample, n = 1) - # convert (from compressed integer vector) to array data - attr(X, "p") <- prod(p) - X <- t(as.mvbmatrix(X)) - dim(X) <- c(p, n) - storage.mode(X) <- "double" - - # permute axis to requested get the sample axis - if (sample.axis != r + 1L) { - perm <- integer(r + 1L) - perm[sample.axis] <- r + 1L - perm[-sample.axis] <- seq_len(r) - X <- aperm(X, perm) - Fy <- aperm(Fy, perm) - } - - list(X = X, Fy = Fy, y = y, sample.axis = sample.axis) -} - -### sample (training) data -c(X, Fy, y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas) - -### Fit data using GMLM with logging - -# logger to log iterative change in the estimation process of GMLM -# log <- data.frame() -log.likelihood <- tensorPredictors:::make.gmlm.family("ising")$log.likelihood -B.true <- Reduce(`%x%`, rev(alphas)) -logger <- function(iter, eta1.est, alphas.est, Omegas.est) { - B.est <- Reduce(`%x%`, rev(alphas.est)) - - err.alphas <- mapply(dist.subspace, alphas, alphas.est, MoreArgs = list(normalize = TRUE)) - err.Omegas <- mapply(norm, Map(`-`, Omegas, Omegas.est), MoreArgs = list(type = "F")) - - if (iter > 0) { cat("\033[9A") } - cat(sprintf("\n\033[2mIter: loss - dist\n\033[0m%4d: %8.3f - %8.3f", - iter, - log.likelihood(X, Fy, eta1.est, alphas.est, Omegas.est), - dist.subspace(B.true, B.est, normalize = TRUE) - ), - "\033[2mMSE eta1\033[0m", - mean((eta1 - eta1.est)^2), - "\033[2msubspace distances of alphas\033[0m", - do.call(paste, Map(sprintf, err.alphas, MoreArgs = list(fmt = "%8.3f"))), - "\033[2mFrob. norm of Omega differences\033[0m", - do.call(paste, Map(sprintf, err.Omegas, MoreArgs = list(fmt = "%8.3f"))), - sep = "\n " - ) -} - -# now call the GMLM fitting routine with performance profiling -tryCatch({ - system.time( # profvis::profvis( - fit.gmlm <- GMLM.default( - X, Fy, sample.axis = sample.axis, max.iter = max.iter, - family = "ising", logger = logger - ) - ) -}, error = function(ex) { - print(ex) - traceback() -}) diff --git a/sim/ising_small.R b/sim/ising_small.R deleted file mode 100644 index d954512..0000000 --- a/sim/ising_small.R +++ /dev/null @@ -1,207 +0,0 @@ -library(tensorPredictors) -library(mvbernoulli) - -# seed = first 8 digits Euler's constant gamma = 0.57721 56649 01532 86060 -set.seed(57721566, "Mersenne-Twister", "Inversion", "Rejection") - -### simulation configuration -file.prefix <- "sim-ising-small" -reps <- 100 # number of simulation replications -max.iter <- 1000 # maximum number of iterations for GMLM -sample.sizes <- c(100, 200, 300, 500, 750) # sample sizes `n` -N <- 2000 # validation set size -p <- c(2, 3) # preditor dimensions -q <- c(1, 1) # response dimensions -r <- length(p) -# parameter configuration -rho <- -0.55 -c1 <- 1 -c2 <- 1 - -# initial consistency checks -stopifnot(exprs = { - r == 2 - length(p) == r - all(q == 1) -}) - -### small helpers -# 270 deg matrix layout rotation (90 deg clockwise) -rot270 <- function(A) t(A)[, rev(seq_len(nrow(A))), drop = FALSE] -# Auto-Regression Covariance Matrix -AR <- function(rho, dim) rho^abs(outer(seq_len(dim), seq_len(dim), `-`)) -# Inverse of the AR matrix -AR.inv <- function(rho, dim) { - A <- diag(c(1, rep(rho^2 + 1, dim - 2), 1)) - A[abs(.row(dim(A)) - .col(dim(A))) == 1] <- -rho - A / (1 - rho^2) -} -# projection matrix `P_A` as a projection onto the span of `A` -proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A))) - -### setup Ising parameters (to get reasonable parameters) -eta1 <- 0 -alphas <- Map(function(pj, qj) { - data <- linspace <- seq(-1, 1, len = pj) - for (k in (seq_len(qj - 1) + 1)) { - data <- c(data, linspace^k) - } - matrix(data, nrow = pj) -}, p, q) -Omegas <- Map(AR, dim = p, MoreArgs = list(rho)) - -# data sampling routine -sample.data <- function(n, eta1, alphas, Omegas, sample.axis = r + 1L) { - # generate response (sample axis is last axis) - y <- runif(n, -1, 1) # Y ~ U[-1, 1] - Fy <- array(sin(pi * y), dim = c(q, n)) - - # natural exponential family parameters - eta_y1 <- c1 * (mlm(Fy, alphas) + c(eta1)) - eta_y2 <- c2 * Reduce(`%x%`, rev(Omegas)) - - # conditional Ising model parameters - theta_y <- matrix(rep(vech(eta_y2), n), ncol = n) - ltri <- which(lower.tri(eta_y2, diag = TRUE)) - diagonal <- which(diag(TRUE, nrow(eta_y2))[ltri]) - theta_y[diagonal, ] <- eta_y1 - - # Sample X from conditional distribution - X <- apply(theta_y, 2, ising_sample, n = 1) - # convert (from compressed integer vector) to array data - attr(X, "p") <- prod(p) - X <- t(as.mvbmatrix(X)) - dim(X) <- c(p, n) - storage.mode(X) <- "double" - - # permute axis to requested get the sample axis - if (sample.axis != r + 1L) { - perm <- integer(r + 1L) - perm[sample.axis] <- r + 1L - perm[-sample.axis] <- seq_len(r) - X <- aperm(X, perm) - Fy <- aperm(Fy, perm) - } - - list(X = X, Fy = Fy, y = y, sample.axis = sample.axis) -} - -### Logging Errors and Warnings -# Register a global warning and error handler for logging warnings/errors with -# current simulation repetition session informatin allowing to reproduce problems -exceptionLogger <- function(ex) { - # retrieve current simulation repetition information - rep.info <- get("rep.info", envir = .GlobalEnv) - # setup an error log file with the same name as `file` - log <- paste0(rep.info$file, ".log") - # Write (append) condition message with reproduction info to the log - cat("\n\n------------------------------------------------------------\n", - sprintf("file <- \"%s\"\nn <- %d\nrep <- %d\n.Random.seed <- c(%s)\n%s\nTraceback:\n", - rep.info$file, rep.info$n, rep.info$rep, - paste(rep.info$.Random.seed, collapse = ","), - as.character.error(ex) - ), sep = "", file = log, append = TRUE) - # add Traceback (see: `traceback()` which the following is addapted from) - n <- length(x <- .traceback(NULL, max.lines = -1L)) - if (n == 0L) { - cat("No traceback available", "\n", file = log, append = TRUE) - } else { - for (i in 1L:n) { - xi <- x[[i]] - label <- paste0(n - i + 1L, ": ") - m <- length(xi) - srcloc <- if (!is.null(srcref <- attr(xi, "srcref"))) { - srcfile <- attr(srcref, "srcfile") - paste0(" at ", basename(srcfile$filename), "#", srcref[1L]) - } - if (isTRUE(attr(xi, "truncated"))) { - xi <- c(xi, " ...") - m <- length(xi) - } - if (!is.null(srcloc)) { - xi[m] <- paste0(xi[m], srcloc) - } - if (m > 1) { - label <- c(label, rep(substr(" ", 1L, - nchar(label, type = "w")), m - 1L)) - } - cat(paste0(label, xi), sep = "\n", file = log, append = TRUE) - } - } -} -globalCallingHandlers(list( - message = exceptionLogger, warning = exceptionLogger, error = exceptionLogger -)) - - -### for every sample size -start <- format(Sys.time(), "%Y%m%dT%H%M") -for (n in sample.sizes) { - ### write new simulation result file - file <- paste0(paste(file.prefix, start, n, sep = "-"), ".csv") - # CSV header, used to ensure correct value/column mapping when writing to file - header <- outer( - c("dist.subspace", "dist.projection", "error.pred"), # measures - c("gmlm", "pca", "hopca", "tsir"), # methods - paste, sep = ".") - cat(paste0(header, collapse = ","), "\n", sep = "", file = file) - - ### repeated simulation - for (rep in seq_len(reps)) { - ### Repetition session state info - # Stores specific session variables before starting the current - # simulation replication. This allows to log state information which - # can be used to replicate a specific simulation repetition in case of - # errors/warnings from the logs - rep.info <- list(n = n, rep = rep, file = file, .Random.seed = .Random.seed) - - ### sample (training) data - c(X, Fy, y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas) - - ### Fit data using different methods - fit.gmlm <- GMLM.default(X, Fy, sample.axis = sample.axis, - max.iter = max.iter, family = "ising") - fit.hopca <- HOPCA(X, npc = q, sample.axis = sample.axis) - fit.pca <- prcomp(mat(X, sample.axis), rank. = prod(q)) - fit.tsir <- TSIR(X, y, q, sample.axis = sample.axis) - - ### Compute Reductions `B.*` where `B.*` spans the reduction subspaces - B.true <- Reduce(`%x%`, rev(alphas)) - B.gmlm <- with(fit.gmlm, Reduce(`%x%`, rev(alphas))) - B.hopca <- Reduce(`%x%`, rev(fit.hopca)) - B.pca <- fit.pca$rotation - B.tsir <- Reduce(`%x%`, rev(fit.tsir)) - - # Subspace Distances: Normalized `|| P_A - P_B ||_F` where - # `P_A = A (A' A)^-1/2 A'` and the normalization means that with - # respect to the dimensions of `A, B` the subspace distance is in the - # range `[0, 1]`. - dist.subspace.gmlm <- dist.subspace(B.true, B.gmlm, normalize = TRUE) - dist.subspace.hopca <- dist.subspace(B.true, B.hopca, normalize = TRUE) - dist.subspace.pca <- dist.subspace(B.true, B.pca, normalize = TRUE) - dist.subspace.tsir <- dist.subspace(B.true, B.tsir, normalize = TRUE) - - # Projection Distances: Spectral norm (2-norm) `|| P_A - P_B ||_2`. - dist.projection.gmlm <- dist.projection(B.true, B.gmlm) - dist.projection.hopca <- dist.projection(B.true, B.hopca) - dist.projection.pca <- dist.projection(B.true, B.pca) - dist.projection.tsir <- dist.projection(B.true, B.tsir) - - ### Prediction Errors: (using new independend sample of size `N`) - c(X, Fy, y, sample.axis) %<-% sample.data(N, eta1, alphas, Omegas) - # centered model matrix of vectorized `X`s - vecX <- scale(mat(X, sample.axis), center = TRUE, scale = FALSE) - P.true <- proj(B.true) - error.pred.gmlm <- norm(P.true - proj(B.gmlm), "2") - error.pred.hopca <- norm(P.true - proj(B.hopca), "2") - error.pred.pca <- norm(P.true - proj(B.pca), "2") - error.pred.tsir <- norm(P.true - proj(B.tsir), "2") - - # format estimation/prediction errors and write to file and console - line <- paste0(Map(get, header), collapse = ",") - cat(line, "\n", sep = "", file = file, append = TRUE) - # report progress - cat(sprintf("sample size: %d/%d - rep: %d/%d\n", - which(n == sample.sizes), length(sample.sizes), rep, reps)) - } -} diff --git a/sim/ising_small_2.R b/sim/ising_small_2.R deleted file mode 100644 index 0f611f7..0000000 --- a/sim/ising_small_2.R +++ /dev/null @@ -1,131 +0,0 @@ -library(tensorPredictors) -library(mvbernoulli) - -# seed = leaf node count of a full chess search tree of depth 6 from the start pos -# > position startpos -# > go perft 6 -set.seed(119060324, "Mersenne-Twister", "Inversion", "Rejection") - -### simulation configuration -reps <- 100 # number of simulation replications -max.iter <- 1000 # maximum number of iterations for GMLM -n <- 100 # sample sizes `n` -N <- 2000 # validation set size -p <- c(2, 3) # preditor dimensions -q <- c(1, 1) # response dimensions -r <- length(p) -# parameter configuration -rho <- -0.55 -c1 <- 1 -c2 <- 1 - -# initial consistency checks -stopifnot(exprs = { - r == 2 - length(p) == r - all(q == 1) -}) - -### small helpers -# 270 deg matrix layout rotation (90 deg clockwise) -rot270 <- function(A) t(A)[, rev(seq_len(nrow(A))), drop = FALSE] -# Auto-Regression Covariance Matrix -AR <- function(rho, dim) rho^abs(outer(seq_len(dim), seq_len(dim), `-`)) -# Inverse of the AR matrix -AR.inv <- function(rho, dim) { - A <- diag(c(1, rep(rho^2 + 1, dim - 2), 1)) - A[abs(.row(dim(A)) - .col(dim(A))) == 1] <- -rho - A / (1 - rho^2) -} -# projection matrix `P_A` as a projection onto the span of `A` -proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A))) - -### setup Ising parameters (to get reasonable parameters) -eta1 <- 0 -alphas <- Map(function(pj, qj) { - data <- linspace <- seq(-1, 1, len = pj) - for (k in (seq_len(qj - 1) + 1)) { - data <- c(data, linspace^k) - } - matrix(data, nrow = pj) -}, p, q) -Omegas <- Map(AR, dim = p, MoreArgs = list(rho)) - -# data sampling routine -sample.data <- function(n, eta1, alphas, Omegas, sample.axis = r + 1L) { - # generate response (sample axis is last axis) - y <- runif(n, -1, 1) # Y ~ U[-1, 1] - Fy <- array(sin(pi * y), dim = c(q, n)) - - # natural exponential family parameters - eta_y1 <- c1 * (mlm(Fy, alphas) + c(eta1)) - eta_y2 <- c2 * Reduce(`%x%`, rev(Omegas)) - - # conditional Ising model parameters - theta_y <- matrix(rep(vech(eta_y2), n), ncol = n) - ltri <- which(lower.tri(eta_y2, diag = TRUE)) - diagonal <- which(diag(TRUE, nrow(eta_y2))[ltri]) - theta_y[diagonal, ] <- eta_y1 - - # Sample X from conditional distribution - X <- apply(theta_y, 2, ising_sample, n = 1) - # convert (from compressed integer vector) to array data - attr(X, "p") <- prod(p) - X <- t(as.mvbmatrix(X)) - dim(X) <- c(p, n) - storage.mode(X) <- "double" - - # permute axis to requested get the sample axis - if (sample.axis != r + 1L) { - perm <- integer(r + 1L) - perm[sample.axis] <- r + 1L - perm[-sample.axis] <- seq_len(r) - X <- aperm(X, perm) - Fy <- aperm(Fy, perm) - } - - list(X = X, Fy = Fy, y = y, sample.axis = sample.axis) -} - - -# logger to log iterative change in the estimation process of GMLM -# log <- data.frame() -log.likelihood <- tensorPredictors:::make.gmlm.family("ising")$log.likelihood -B.true <- Reduce(`%x%`, rev(alphas)) -logger <- function(iter, eta1.est, alphas.est, Omegas.est) { - B.est <- Reduce(`%x%`, rev(alphas.est)) - - err.alphas <- mapply(dist.subspace, alphas, alphas.est, MoreArgs = list(normalize = TRUE)) - err.Omegas <- mapply(norm, Map(`-`, Omegas, Omegas.est), MoreArgs = list(type = "F")) - - if (iter > 0) { cat("\033[9A") } - cat(sprintf("\n\033[2mIter: loss - dist\n\033[0m%4d: %8.3f - %8.3f", - iter, - log.likelihood(X, Fy, eta1.est, alphas.est, Omegas.est), - dist.subspace(B.true, B.est, normalize = TRUE) - ), - "\033[2mMSE eta1\033[0m", - mean((eta1 - eta1.est)^2), - "\033[2msubspace distances of alphas\033[0m", - do.call(paste, Map(sprintf, err.alphas, MoreArgs = list(fmt = "%8.3f"))), - "\033[2mFrob. norm of Omega differences\033[0m", - do.call(paste, Map(sprintf, err.Omegas, MoreArgs = list(fmt = "%8.3f"))), - sep = "\n " - ) -} - -### sample (training) data -c(X, Fy, y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas) - -# now call the GMLM fitting routine with performance profiling -tryCatch({ - system.time( # profvis::profvis( - fit.gmlm <- GMLM.default( - X, Fy, sample.axis = sample.axis, max.iter = max.iter, - family = "ising", logger = logger - ) - ) -}, error = function(ex) { - print(ex) - traceback() -}) diff --git a/sim/normal.R b/sim/normal.R deleted file mode 100644 index 7722243..0000000 --- a/sim/normal.R +++ /dev/null @@ -1,171 +0,0 @@ -library(tensorPredictors) - -set.seed(314159265, "Mersenne-Twister", "Inversion", "Rejection") - -### simulation configuration -file.prefix <- "sim-normal" -reps <- 100 # number of simulation replications -max.iter <- 10000 # maximum number of iterations for GMLM -sample.sizes <- c(100, 200, 300, 500, 750) # sample sizes `n` -N <- 2000 # validation set size -p <- c(2, 3, 5) # preditor dimensions -q <- c(1, 2, 3) # functions of y dimensions (response dimensions) -r <- length(p) - -# initial consistency checks -stopifnot(exprs = { - r == length(p) - r == length(q) - all(outer(p, sample.sizes, `<`)) -}) - -# projection matrix `P_A` as a projection onto the span of `A` -proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A))) - -# setup model parameters -alphas <- Map(matrix, Map(rnorm, p * q), p) # reduction matrices -Omegas <- Map(function(pj) 0.5^abs(outer(1:pj, 1:pj, `-`)), p) # mode scatter -eta1 <- 0 # intercept - -# data sampling routine -sample.data <- function(n, eta1, alphas, Omegas, sample.axis = r + 1L) { - # generate response (sample axis is last axis) - y <- sample.int(prod(q), n, replace = TRUE) # uniform samples - Fy <- array(outer(seq_len(prod(q)), y, `==`), dim = c(q, n)) - Fy <- Fy - c(rowMeans(Fy, dims = r)) - - # sample predictors as X | Y = y (sample axis is last axis) - Deltas <- Map(solve, Omegas) # normal covariances - mu_y <- mlm(mlm(Fy, alphas) + c(eta1), Deltas) # conditional mean - X <- mu_y + rtensornorm(n, 0, Deltas, r + 1L) # responses X - - # permute axis to requested get the sample axis - if (sample.axis != r + 1L) { - perm <- integer(r + 1L) - perm[sample.axis] <- r + 1L - perm[-sample.axis] <- seq_len(r) - X <- aperm(X, perm) - Fy <- aperm(Fy, perm) - } - - list(X = X, Fy = Fy, y = y, sample.axis = sample.axis) -} - -### Logging Errors and Warnings -# Register a global warning and error handler for logging warnings/errors with -# current simulation repetition session informatin allowing to reproduce problems -exceptionLogger <- function(ex) { - # retrieve current simulation repetition information - rep.info <- get("rep.info", envir = .GlobalEnv) - # setup an error log file with the same name as `file` - log <- paste0(rep.info$file, ".log") - # Write (append) condition message with reproduction info to the log - cat("\n\n------------------------------------------------------------\n", - sprintf("file <- \"%s\"\nn <- %d\nrep <- %d\n.Random.seed <- c(%s)\n%s\nTraceback:\n", - rep.info$file, rep.info$n, rep.info$rep, - paste(rep.info$.Random.seed, collapse = ","), - as.character.error(ex) - ), sep = "", file = log, append = TRUE) - # add Traceback (see: `traceback()` which the following is addapted from) - n <- length(x <- .traceback(NULL, max.lines = -1L)) - if (n == 0L) { - cat("No traceback available", "\n", file = log, append = TRUE) - } else { - for (i in 1L:n) { - xi <- x[[i]] - label <- paste0(n - i + 1L, ": ") - m <- length(xi) - srcloc <- if (!is.null(srcref <- attr(xi, "srcref"))) { - srcfile <- attr(srcref, "srcfile") - paste0(" at ", basename(srcfile$filename), "#", srcref[1L]) - } - if (isTRUE(attr(xi, "truncated"))) { - xi <- c(xi, " ...") - m <- length(xi) - } - if (!is.null(srcloc)) { - xi[m] <- paste0(xi[m], srcloc) - } - if (m > 1) { - label <- c(label, rep(substr(" ", 1L, - nchar(label, type = "w")), m - 1L)) - } - cat(paste0(label, xi), sep = "\n", file = log, append = TRUE) - } - } -} -globalCallingHandlers(list( - message = exceptionLogger, warning = exceptionLogger, error = exceptionLogger -)) - - -### for every sample size -start <- format(Sys.time(), "%Y%m%dT%H%M") -for (n in sample.sizes) { - ### write new simulation result file - file <- paste0(paste(file.prefix, start, n, sep = "-"), ".csv") - # CSV header, used to ensure correct value/column mapping when writing to file - header <- outer( - c("dist.subspace", "dist.projection", "error.pred"), # measures - c("gmlm", "pca", "hopca", "tsir"), # methods - paste, sep = ".") - cat(paste0(header, collapse = ","), "\n", sep = "", file = file) - - ### repeated simulation - for (rep in seq_len(reps)) { - ### Repetition session state info - # Stores specific session variables before starting the current - # simulation replication. This allows to log state information which - # can be used to replicate a specific simulation repetition in case of - # errors/warnings from the logs - rep.info <- list(n = n, rep = rep, file = file, .Random.seed = .Random.seed) - - ### sample (training) data - c(X, Fy, y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas) - - ### Fit data using different methods - fit.gmlm <- GMLM.default(X, Fy, sample.axis = sample.axis, max.iter = max.iter) - fit.hopca <- HOPCA(X, npc = q, sample.axis = sample.axis) - fit.pca <- prcomp(mat(X, sample.axis), rank. = prod(q)) - fit.tsir <- TSIR(X, y, q, sample.axis = sample.axis) - - ### Compute Reductions `B.*` where `B.*` spans the reduction subspaces - B.true <- Reduce(`%x%`, rev(alphas)) - B.gmlm <- with(fit.gmlm, Reduce(`%x%`, rev(alphas))) - B.hopca <- Reduce(`%x%`, rev(fit.hopca)) - B.pca <- fit.pca$rotation - B.tsir <- Reduce(`%x%`, rev(fit.tsir)) - - # Subspace Distances: Normalized `|| P_A - P_B ||_F` where - # `P_A = A (A' A)^-1/2 A'` and the normalization means that with - # respect to the dimensions of `A, B` the subspace distance is in the - # range `[0, 1]`. - dist.subspace.gmlm <- dist.subspace(B.true, B.gmlm, normalize = TRUE) - dist.subspace.hopca <- dist.subspace(B.true, B.hopca, normalize = TRUE) - dist.subspace.pca <- dist.subspace(B.true, B.pca, normalize = TRUE) - dist.subspace.tsir <- dist.subspace(B.true, B.tsir, normalize = TRUE) - - # Projection Distances: Spectral norm (2-norm) `|| P_A - P_B ||_2`. - dist.projection.gmlm <- dist.projection(B.true, B.gmlm) - dist.projection.hopca <- dist.projection(B.true, B.hopca) - dist.projection.pca <- dist.projection(B.true, B.pca) - dist.projection.tsir <- dist.projection(B.true, B.tsir) - - ### Prediction Errors: (using new independend sample of size `N`) - c(X, Fy, y, sample.axis) %<-% sample.data(N, eta1, alphas, Omegas) - # centered model matrix of vectorized `X`s - vecX <- scale(mat(X, sample.axis), center = TRUE, scale = FALSE) - P.true <- proj(B.true) - error.pred.gmlm <- norm(P.true - proj(B.gmlm), "2") - error.pred.hopca <- norm(P.true - proj(B.hopca), "2") - error.pred.pca <- norm(P.true - proj(B.pca), "2") - error.pred.tsir <- norm(P.true - proj(B.tsir), "2") - - # format estimation/prediction errors and write to file and console - line <- paste0(Map(get, header), collapse = ",") - cat(line, "\n", sep = "", file = file, append = TRUE) - # report progress - cat(sprintf("sample size: %d/%d - rep: %d/%d\n", - which(n == sample.sizes), length(sample.sizes), rep, reps)) - } -} diff --git a/sim/normal_2.R b/sim/normal_2.R deleted file mode 100644 index 36c7c1d..0000000 --- a/sim/normal_2.R +++ /dev/null @@ -1,96 +0,0 @@ -library(tensorPredictors) - -set.seed(271828183, "Mersenne-Twister", "Inversion", "Rejection") - -### simulation configuration -reps <- 100 # number of simulation replications -n <- 100 # sample sizes `n` -N <- 2000 # validation set size -p <- c(2, 3, 5) # preditor dimensions -q <- c(1, 2, 3) # functions of y dimensions (response dimensions) - -# initial consistency checks -stopifnot(exprs = { - length(p) == length(q) -}) - -# setup model parameters -alphas <- Map(matrix, Map(rnorm, p * q), p) # reduction matrices -Omegas <- Map(function(pj) 0.5^abs(outer(1:pj, 1:pj, `-`)), p) # mode scatter -eta1 <- 0 # intercept - -# data sampling routine -sample.data <- function(n, eta1, alphas, Omegas, sample.axis = length(alphas) + 1L) { - r <- length(alphas) # tensor order - - # generate response (sample axis is last axis) - y <- sample.int(prod(q), n, replace = TRUE) # uniform samples - Fy <- array(outer(seq_len(prod(q)), y, `==`), dim = c(q, n)) - Fy <- Fy - c(rowMeans(Fy, dims = r)) - - # sample predictors as X | Y = y (sample axis is last axis) - Deltas <- Map(solve, Omegas) # normal covariances - mu_y <- mlm(mlm(Fy, alphas) + c(eta1), Deltas) # conditional mean - X <- mu_y + rtensornorm(n, 0, Deltas, r + 1L) # responses X - - # permute axis to requested get the sample axis - if (sample.axis != r + 1L) { - perm <- integer(r + 1L) - perm[sample.axis] <- r + 1L - perm[-sample.axis] <- seq_len(r) - X <- aperm(X, perm) - Fy <- aperm(Fy, perm) - } - - list(X = X, Fy = Fy, y = y, sample.axis = sample.axis) -} - -### sample (training) data -c(X, Fy, y = y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas) - -### Fit data using GMLM with logging - -# logger to log iterative change in the estimation process of GMLM -# log <- data.frame() -log.likelihood <- tensorPredictors:::make.gmlm.family("normal")$log.likelihood -B.true <- Reduce(`%x%`, rev(alphas)) -logger <- function(iter, eta1.est, alphas.est, Omegas.est) { - B.est <- Reduce(`%x%`, rev(alphas.est)) - - err.alphas <- mapply(dist.subspace, alphas, alphas.est, MoreArgs = list(normalize = TRUE)) - err.Omegas <- mapply(norm, Map(`-`, Omegas, Omegas.est), MoreArgs = list(type = "F")) - - if (iter > 1) { cat("\033[9A") } - cat(sprintf("\n\033[2mIter: loss - dist\n\033[0m%4d: %8.3f - %8.3f", - iter, - log.likelihood(X, Fy, eta1.est, alphas.est, Omegas.est), - dist.subspace(B.true, B.est, normalize = TRUE) - ), - "\033[2mMSE eta1\033[0m", - mean((eta1 - eta1.est)^2), - "\033[2msubspace distances of alphas\033[0m", - do.call(paste, Map(sprintf, err.alphas, MoreArgs = list(fmt = "%8.3f"))), - "\033[2mFrob. norm of Omega differences\033[0m", - do.call(paste, Map(sprintf, err.Omegas, MoreArgs = list(fmt = "%8.3f"))), - sep = "\n " - ) -} - -# now call the GMLM fitting routine with performance profiling -system.time( # profvis::profvis( - fit.gmlm <- GMLM.default( - X, Fy, sample.axis = sample.axis, max.iter = 10000L, logger = logger - ) -) - - -# Iter: loss - dist -# 7190: 50.583 - 0.057 -# MSE eta1 -# 0.02694658 -# subspace distances of alphas -# 0.043 0.035 0.034 -# Frob. norm of Omega differences -# 0.815 1.777 12.756 -# time user system elapsed -# 342.279 555.630 183.653 diff --git a/sim/plots.R b/sim/plots.R deleted file mode 100644 index d657356..0000000 --- a/sim/plots.R +++ /dev/null @@ -1,100 +0,0 @@ - -if (!endsWith(getwd(), "/sim")) { - setwd("sim") -} - -sim.plot <- function(file.prefix, date, to.file = FALSE) { - - # file.prefix <- "sim-ising-small" - # # file.prefix <- "sim-normal" - # date <- "20221012" # yyyymmdd, to match all "[0-9]{6}" - time <- "[0-9]{4}" # HHMM, to match all "[0-9]{4}" - colors <- c( - PCA = "#a11414", - HOPCA = "#2a62b6", - TSIR = "#9313b9", - GMLM = "#247407" - ) - line.width <- 1.75 - margins <- c(5.1, 4.1, 4.1, 0.1) - - sim <- Reduce(rbind, Map(function(path) { - df <- read.csv(path) - df$n <- as.integer(tail(head(strsplit(path, "[-.]")[[1]], -1), 1)) - df - }, list.files(".", pattern = paste0( - "^", file.prefix, "-", date, "T", time, "-[0-9]+[.]csv$", collapse = "" - )))) - - stats <- aggregate(. ~ n, sim, mean) - q75 <- aggregate(. ~ n, sim, function(x) quantile(x, 0.75)) - q25 <- aggregate(. ~ n, sim, function(x) quantile(x, 0.25)) - - if (to.file) { - width <- 720 - png(paste(file.prefix, "-", date, ".png", sep = ""), - width = width, height = round((6 / 11) * width, -1), - pointsize = 12) - } - - layout(mat = matrix(c( - 1, 2, - 3, 3 - ), 2, 2, byrow = TRUE), - widths = c(1, 1), - heights = c(12, 1), respect = FALSE) - # layout.show(3) - - with(stats, { - par(mar = margins) - plot(range(n), 0:1, - type = "n", bty = "n", main = "Subspace Distance", - xlab = "Sample Size", ylab = "Error") - lines(n, dist.subspace.gmlm, col = colors["GMLM"], lwd = line.width) - lines(n, dist.subspace.hopca, col = colors["HOPCA"], lwd = line.width) - lines(n, dist.subspace.pca, col = colors["PCA"], lwd = line.width) - lines(n, dist.subspace.tsir, col = colors["TSIR"], lwd = line.width) - - xn <- c(q75$n, rev(q25$n)) - polygon(x = xn, y = c(q75$dist.subspace.gmlm, rev(q25$dist.subspace.gmlm)), - col = adjustcolor(colors["GMLM"], alpha.f = 0.3), border = NA) - polygon(x = xn, y = c(q75$dist.subspace.hopca, rev(q25$dist.subspace.hopca)), - col = adjustcolor(colors["HOPCA"], alpha.f = 0.3), border = NA) - polygon(x = xn, y = c(q75$dist.subspace.pca, rev(q25$dist.subspace.pca)), - col = adjustcolor(colors["PCA"], alpha.f = 0.3), border = NA) - polygon(x = xn, y = c(q75$dist.subspace.tsir, rev(q25$dist.subspace.tsir)), - col = adjustcolor(colors["TSIR"], alpha.f = 0.3), border = NA) - }) - - with(stats, { - par(mar = margins) - plot(range(n), 0:1, - type = "n", bty = "n", main = "RMSE (Prediction Error)", - xlab = "Sample Size", ylab = "Error") - xn <- c(q75$n, rev(q25$n)) - polygon(x = xn, y = c(q75$error.pred.gmlm, rev(q25$error.pred.gmlm)), - col = adjustcolor(colors["GMLM"], alpha.f = 0.3), border = NA) - polygon(x = xn, y = c(q75$error.pred.hopca, rev(q25$error.pred.hopca)), - col = adjustcolor(colors["HOPCA"], alpha.f = 0.3), border = NA) - polygon(x = xn, y = c(q75$error.pred.pca, rev(q25$error.pred.pca)), - col = adjustcolor(colors["PCA"], alpha.f = 0.3), border = NA) - polygon(x = xn, y = c(q75$error.pred.tsir, rev(q25$error.pred.tsir)), - col = adjustcolor(colors["TSIR"], alpha.f = 0.3), border = NA) - lines(n, error.pred.gmlm, col = colors["GMLM"], lwd = line.width) - lines(n, error.pred.hopca, col = colors["HOPCA"], lwd = line.width) - lines(n, error.pred.pca, col = colors["PCA"], lwd = line.width) - lines(n, error.pred.tsir, col = colors["TSIR"], lwd = line.width) - }) - - par(mar = rep(0, 4)) - plot(1:2, 1:2, type = "n", bty = "n", axes = FALSE, xlab = "", ylab = "") - legend("center", legend = names(colors), col = colors, lwd = line.width, - lty = 1, bty = "n", horiz = TRUE) - - if (to.file) { - dev.off() - } -} - -sim.plot("sim-ising-small", "20221012", TRUE) -sim.plot("sim-normal", "20221012", TRUE) diff --git a/tensorPredictors/NAMESPACE b/tensorPredictors/NAMESPACE index b97d6ab..fe7a243 100644 --- a/tensorPredictors/NAMESPACE +++ b/tensorPredictors/NAMESPACE @@ -5,6 +5,11 @@ export("%x_1%") export("%x_2%") export("%x_3%") export("%x_4%") +export(.projBand) +export(.projPSD) +export(.projRank) +export(.projSymBand) +export(.projSymRank) export(CISE) export(D) export(D.pinv) @@ -17,6 +22,8 @@ export(ICU) export(K) export(K.perm) export(LSIR) +export(La.det) +export(La.solve) export(N) export(NAGD) export(PCA2d) @@ -27,18 +34,26 @@ export(S) export(TSIR) export(approx.kronecker) export(colKronecker) +export(decompose.kronecker) export(dist.kron.norm) export(dist.kron.tr) export(dist.projection) export(dist.subspace) export(exprs.all.equal) +export(gmlm_ising) +export(gmlm_tensor_normal) +export(icu_tensor_normal) +export(ising_m2) +export(ising_sample) export(kpir.approx) export(kpir.base) export(kpir.ls) export(kpir.mle) export(kpir.momentum) export(kpir.new) +export(kronperm) export(mat) +export(matProj) export(matpow) export(matrixImage) export(mcov) @@ -49,10 +64,19 @@ export(mtvk) export(num.deriv) export(num.deriv.function) export(num.deriv2) +export(pinv) +export(projDiag) +export(projStiefel) +export(projSym) export(reduce) +export(riccati) export(rowKronecker) export(rtensornorm) +export(slice.expr) +export(slice.select) +export(sylvester) export(tensor_predictor) +export(tsym) export(ttm) export(ttt) export(vech) diff --git a/tensorPredictors/R/GMLM.R b/tensorPredictors/R/GMLM.R index 060c398..247e4f1 100644 --- a/tensorPredictors/R/GMLM.R +++ b/tensorPredictors/R/GMLM.R @@ -36,12 +36,12 @@ GMLM.default <- function(X, Fy, sample.axis = 1L, # optimize likelihood using Nesterov Excelerated Gradient Descent params.fit <- NAGD( fun.loss = function(params) { - # scaled negative lig-likelihood + # scaled negative log-likelihood # eta1 alphas Omegas family$log.likelihood(X, Fy, params[[1]], params[[2]], params[[3]]) }, fun.grad = function(params) { - # gradient of the scaled negative lig-likelihood + # gradient of the scaled negative log-likelihood # eta1 alphas Omegas family$grad(X, Fy, params[[1]], params[[2]], params[[3]]) }, diff --git a/tensorPredictors/R/HOPCA.R b/tensorPredictors/R/HOPCA.R index e6b217d..510dae4 100644 --- a/tensorPredictors/R/HOPCA.R +++ b/tensorPredictors/R/HOPCA.R @@ -10,7 +10,7 @@ #' `i`th axis excluding the sample axis. #' #' @export -HOPCA <- function(X, npc = dim(X)[-sample.axis], sample.axis = 1L) { +HOPCA <- function(X, npc = dim(X)[-sample.axis], sample.axis = 1L, use.C = FALSE) { # observation index numbers (all axis except the sample axis) modes <- seq_along(dim(X))[-sample.axis] diff --git a/tensorPredictors/R/HOPIR.R b/tensorPredictors/R/HOPIR.R index 5d21183..fed4894 100644 --- a/tensorPredictors/R/HOPIR.R +++ b/tensorPredictors/R/HOPIR.R @@ -76,8 +76,8 @@ HOPIR.ls <- function(X, Fy, alphas, sample.axis, algorithm, ..., logger) { list(alphas = alphas, Deltas = fun.Deltas(alphas)) } -#' HPOIR subroutine for the MLE estimation given proprocessed data and initial -#' alphas, Deltas paramters +#' HPOIR subroutine for the MLE estimation given preprocessed data and initial +#' alpha, Delta parameters #' #' @keywords internal HOPIR.mle <- function(X, Fy, alphas, Deltas, sample.axis, algorithm, ..., logger) { diff --git a/tensorPredictors/R/HOSVD.R b/tensorPredictors/R/HOSVD.R index 68392fa..0039a0e 100644 --- a/tensorPredictors/R/HOSVD.R +++ b/tensorPredictors/R/HOSVD.R @@ -7,7 +7,7 @@ #' #' @export HOSVD <- function(X, nu = NULL, eps = 1e-07) { - if (!missing(nu)) { + if (!is.null(nu)) { stopifnot(all(nu <= dim(X))) } @@ -21,3 +21,5 @@ HOSVD <- function(X, nu = NULL, eps = 1e-07) { list(C = C, Us = Us) } + +SVD <- function(A) .Call("C_svd", A) diff --git a/tensorPredictors/R/NAGD.R b/tensorPredictors/R/NAGD.R index b59d59f..a41cb99 100644 --- a/tensorPredictors/R/NAGD.R +++ b/tensorPredictors/R/NAGD.R @@ -60,7 +60,7 @@ NAGD <- function(fun.loss, fun.grad, params, more.params = NULL, stop("Initial loss is non-finite (", loss, ")") } # initialize "previous" iterate parameters - params.last <- params + prev.params <- params # Gradient Descent Loop line.search.tag <- FALSE # init line search state as "failure" @@ -73,9 +73,9 @@ NAGD <- function(fun.loss, fun.grad, params, more.params = NULL, } # Extrapolation form previous position (momentum) - # `params.moment <- (1 + moment) * params - moment * param.last` + # `params.moment <- (1 + moment) * params - moment * prev.params` moment <- (m[1] - 1) / m[2] - params.moment <- fun.lincomb(1 + moment, params, -moment, params.last) + params.moment <- fun.lincomb(1 + moment, params, -moment, prev.params) # Compute gradient at extrapolated position if (missing(more.params)) { @@ -114,7 +114,7 @@ NAGD <- function(fun.loss, fun.grad, params, more.params = NULL, } # keep track of previous parameters - params.last <- params + prev.params <- params # check line search outcome if (is.na(line.search.tag)) { diff --git a/tensorPredictors/R/TSIR.R b/tensorPredictors/R/TSIR.R index 874bb56..fcb78a5 100644 --- a/tensorPredictors/R/TSIR.R +++ b/tensorPredictors/R/TSIR.R @@ -4,11 +4,21 @@ TSIR <- function(X, y, d, sample.axis = 1L, nr.slices = 10L, # default slices, ignored if y is a factor or integer max.iter = 50L, - eps = sqrt(.Machine$double.eps) + eps = sqrt(.Machine$double.eps), + slice.method = c("cut", "ecdf") # ignored if y is a factor or integer ) { if (!(is.factor(y) || is.integer(y))) { - y <- cut(y, nr.slices) + slice.method <- match.arg(slice.method) + if (slice.method == "ecdf") { + y <- cut(ecdf(y)(y), nr.slices) + } else { + y <- cut(y, nr.slices) + # ensure there are no empty slices + if (any(table(y) == 0)) { + y <- as.factor(as.integer(y)) + } + } } stopifnot(exprs = { diff --git a/tensorPredictors/R/approx_kronecker.R b/tensorPredictors/R/approx_kronecker.R index bc583ee..5cc20b2 100644 --- a/tensorPredictors/R/approx_kronecker.R +++ b/tensorPredictors/R/approx_kronecker.R @@ -1,4 +1,4 @@ -#' Approximates kronecker product decomposition. +#' Approximates Kronecker Product decomposition. #' #' Approximates the matrices `A` and `B` such that #' C = A %x% B @@ -21,7 +21,7 @@ #' 123 (2000) 85-100 (pp. 93-95) #' #' @export -approx.kronecker <- function(C, dimA, dimB) { +approx.kronecker <- function(C, dimA, dimB = dim(C) / dimA) { dim(C) <- c(dimB[1L], dimA[1L], dimB[2L], dimA[2L]) R <- aperm(C, c(2L, 4L, 1L, 3L)) @@ -33,8 +33,115 @@ approx.kronecker <- function(C, dimA, dimB) { svdR <- svd(R, 1L, 1L) } - return(list( + list( A = array(sqrt(svdR$d[1]) * svdR$u, dimA), B = array(sqrt(svdR$d[1]) * svdR$v, dimB) + ) +} + +#' Kronecker Product Decomposition. +#' +#' Computes the components summation components `A_i`, `B_i` of a sum of +#' Kronecker products +#' C = sum_i A_i %x% B_i +#' with the minimal estimated number of summands. +#' +#' @param C desired kronecker product result. +#' @param dimA length 2 vector of dimensions of \code{A}. +#' @param dimB length 2 vector of dimensions of \code{B}. +#' @param tol tolerance of singular values of \code{C} to determin the number of +#' needed summands. +#' +#' @return list of lenghth with estimated number of summation components, each +#' entry consists of a list with named entries \code{"A"} and \code{"B"} of +#' dimensions \code{dimA} and \code{dimB}. +#' +#' @examples +#' As <- replicate(3, matrix(rnorm(2 * 7), 2), simplify = FALSE) +#' Bs <- replicate(3, matrix(rnorm(5 * 3), 5), simplify = FALSE) +#' C <- Reduce(`+`, Map(kronecker, As, Bs)) +#' +#' decomposition <- decompose.kronecker(C, c(2, 7)) +#' +#' reconstruction <- Reduce(`+`, Map(function(summand) { +#' kronecker(summand[[1]], summand[[2]]) +#' }, decomposition), array(0, dim(C))) +#' +#' stopifnot(all.equal(C, reconstruction)) +#' +#' @export +decompose.kronecker <- function(C, dimA, dimB = dim(C) / dimA, tol = 1e-7) { + # convert the equivalent outer product + dim(C) <- c(dimB[1L], dimA[1L], dimB[2L], dimA[2L]) + C <- aperm(C, c(2L, 4L, 1L, 3L), resize = FALSE) + dim(C) <- c(prod(dimA), prod(dimB)) + + # Singular Valued Decomposition + svdC <- La.svd(C) + + # Sum of Kronecker Components + lapply(seq_len(sum(svdC$d > tol)), function(i) list( + A = matrix(svdC$d[i] * svdC$u[, i], dimA), + B = matrix(svdC$vt[i, ], dimB) )) } + +### Given that C is a Kronecker product this is a fast method but a bit +### unreliable in full generality. +# decompose.kronecker <- function(C, dimA, dimB = dim(C) / dimA) { +# dim(C) <- c(dimB[1L], dimA[1L], dimB[2L], dimA[2L]) +# R <- aperm(C, c(2L, 4L, 1L, 3L)) +# dim(R) <- c(prod(dimA), prod(dimB)) +# max.index <- which.max(abs(R)) +# row.index <- ((max.index - 1L) %% nrow(R)) + 1L +# col.index <- ((max.index - 1L) %/% nrow(R)) + 1L +# max.elem <- if (abs(R[max.index]) > .Machine$double.eps) R[max.index] else 1 +# list( +# A = array(R[, col.index], dimA), +# B = array(R[row.index, ] / max.elem, dimB) +# ) +# } + + +# kron <- function(A, B) { +# perm <- as.vector(t(matrix(seq_len(2 * length(dim(A))), ncol = 2)[, 2:1])) +# K <- aperm(outer(A, B), perm) +# dim(K) <- dim(A) * dim(B) +# K +# } + +# kronperm <- function(A) { +# # force A to have even number of dimensions +# dim(A) <- c(dim(A), rep(1L, length(dim(A)) %% 2L)) +# # compute axis permutation +# perm <- as.vector(t(matrix(seq_along(dim(A)), ncol = 2)[, 2:1])) +# # permute elements of A +# K <- aperm(A, perm, resize = FALSE) +# # collapse/set dimensions +# dim(K) <- head(dim(A), length(dim(A)) / 2) * tail(dim(A), length(dim(A)) / 2) +# K +# } + +# p <- c(2, 3, 5) +# q <- c(3, 4, 7) +# A <- array(rnorm(prod(p)), p) +# B <- array(rnorm(prod(q)), q) +# all.equal(kronperm(outer(A, B)), kronecker(A, B)) +# all.equal(kron(A, B), kronecker(A, B)) + + +# dA <- c(2, 3, 5) +# dB <- c(3, 4, 7) +# A <- array(rnorm(prod(dA)), dA) +# B <- array(rnorm(prod(dB)), dB) +# X <- outer(A, B) + +# r <- length(dim(X)) / 2 +# I <- t(do.call(expand.grid, Map(seq_len, head(dim(X), r) * tail(dim(X), r)))) +# K <- apply(rbind( +# (I - 1) %/% tail(dim(X), r) + 1, +# (I - 1) %% tail(dim(X), r) + 1 +# ), 2, function(i) X[sum(c(1, cumprod(head(dim(X), -1))) * (i - 1)) + 1]) +# dim(K) <- head(dim(X), r) * tail(dim(X), r) + +# all.equal(kronecker(A, B), K) diff --git a/tensorPredictors/R/dist_kron_norm.R b/tensorPredictors/R/dist_kron_norm.R index 46bf337..edecf4a 100644 --- a/tensorPredictors/R/dist_kron_norm.R +++ b/tensorPredictors/R/dist_kron_norm.R @@ -3,7 +3,7 @@ #' \|(A1 %x% ... %x% Ar - B1 %x% ... %x% Br\|_F #' #' This is equivalent to the expression -#' \code{norm(Reduce(kronecker, A) - Reduce(kronecker, B), "F")} but faster. +#' \code{norm(Reduce(kronecker, As) - Reduce(kronecker, Bs), "F")}, but faster. #' #' @examples #' A1 <- matrix(rnorm(5^2), 5) @@ -16,21 +16,21 @@ #' )) #' #' p <- c(3, 7, 5, 2) -#' A <- Map(function(pj) matrix(rnorm(pj^2), pj), p) -#' B <- Map(function(pj) matrix(rnorm(pj^2), pj), p) +#' As <- Map(function(pj) matrix(rnorm(pj^2), pj), p) +#' Bs <- Map(function(pj) matrix(rnorm(pj^2), pj), p) #' stopifnot(all.equal( -#' dist.kron.norm(A, B), -#' norm(Reduce(kronecker, A) - Reduce(kronecker, B), "F") +#' dist.kron.norm(As, Bs), +#' norm(Reduce(kronecker, As) - Reduce(kronecker, Bs), "F") #' )) #' #' @export -dist.kron.norm <- function(A, B, eps = .Machine$double.eps) { - if (is.list(A) && is.list(B)) { - norm2 <- prod(unlist(Map(function(x) sum(x^2), A))) - - 2 * prod(unlist(Map(function(a, b) sum(a * b), A, B))) + - prod(unlist(Map(function(x) sum(x^2), B))) - } else if (is.matrix(A) && is.matrix(B)) { - norm2 <- sum((A - B)^2) +dist.kron.norm <- function(As, Bs, eps = .Machine$double.eps) { + if (is.list(As) && is.list(Bs)) { + norm2 <- prod(unlist(Map(function(x) sum(x^2), As))) - + 2 * prod(unlist(Map(function(a, b) sum(a * b), As, Bs))) + + prod(unlist(Map(function(x) sum(x^2), Bs))) + } else if (is.matrix(As) && is.matrix(Bs)) { + norm2 <- sum((As - Bs)^2) } else { stop("Unexpected input") } diff --git a/tensorPredictors/R/dist_projection.R b/tensorPredictors/R/dist_projection.R index 3c66e50..6d21008 100644 --- a/tensorPredictors/R/dist_projection.R +++ b/tensorPredictors/R/dist_projection.R @@ -1,4 +1,4 @@ -#' Porjection Distance of two matrices +#' Projection Distance of two matrices #' #' Defined as sine of the maximum principal angle between the column spaces #' of the matrices diff --git a/tensorPredictors/R/gmlm_ising.R b/tensorPredictors/R/gmlm_ising.R new file mode 100644 index 0000000..c06c0d2 --- /dev/null +++ b/tensorPredictors/R/gmlm_ising.R @@ -0,0 +1,138 @@ +#' Specialized version of the GMLM for the Ising model (inverse Ising problem) +#' +#' @export +gmlm_ising <- function(X, F, sample.axis = length(dim(X)), + max.iter = 1000L, + eps = sqrt(.Machine$double.eps), + step.size = function(iter) 1e-2 * (1000 / (iter + 1000)), + zig.zag.threashold = 5L, + patience = 5L, + nr.slices = 10L, # only for univariate `F(y) = y` + slice.method = c("cut", "ecdf"), # only for univariate `F(y) = y` and `y` is a factor or integer + logger = function(...) { } +) { + # # Special case for univariate response `vec F(y) = y` + # # Due to high computational costs we use slicing + # if (length(F) == prod(dim(F))) { + # y <- as.vector(F) + # if (!(is.factor(y) || is.integer(y))) { + # slice.method <- match.arg(slice.method) + # if (slice.method == "ecdf") { + # y <- cut(ecdf(y)(y), nr.slices) + # } else { + # y <- cut(y, nr.slices) + # } + # } + # slices <- split(seq_len(sample.size), y, drop = TRUE) + # } else { + # slices <- seq_len(sample.size) + # } + + dimX <- head(dim(X), -1) + dimF <- head(dim(F), -1) + sample.axis <- length(dim(X)) + modes <- seq_len(length(dim(X)) - 1) + sample.size <- tail(dim(X), 1) + + betas <- Map(matrix, Map(rnorm, dimX * dimF), dimX) + Omegas <- Map(diag, dimX) + + grad2_betas <- Map(array, 0, Map(dim, betas)) + grad2_Omegas <- Map(array, 0, Map(dim, Omegas)) + + # Keep track of the last loss to accumulate loss difference sign changes + # indicating optimization instabilities as a sign to stop + last_loss <- Inf + accum_sign <- 1 + + # non improving iteration counter + non_improving <- 0L + + # technical access points to dynamicaly access a multi-dimensional array + # with its last index + `X[..., i]` <- slice.expr(X, sample.axis, index = i) + `F[..., i]` <- slice.expr(F, sample.axis, index = i, drop = FALSE) + # the next expression if accessing the precomputed `mlm(F, betas)` + `BF[..., i]` <- slice.expr(BF, sample.axis, nr.axis = sample.axis, drop = FALSE) # BF[..., i] + + + # Iterate till a break condition triggers or till max. nr. of iterations + for (iter in seq_len(max.iter)) { + + grad_betas <- Map(matrix, 0, dimX, dimF) + Omega <- Reduce(kronecker, rev(Omegas)) + + R2 <- array(0, dim = c(dimX, dimX)) + + # negative log-likelihood + loss <- 0 + + BF <- mlm(F, betas) + + for (i in seq_len(sample.size)) { + params_i <- Omega + diag(as.vector(eval(`BF[..., i]`))) + m2_i <- ising_m2(params_i) + + # accumulate loss + x_i <- as.vector(eval(`X[..., i]`)) + loss <- loss - (sum(x_i * (params_i %*% x_i)) + log(attr(m2_i, "prob_0"))) + + R2_i <- tcrossprod(x_i) - m2_i + R1_i <- diag(R2_i) + dim(R1_i) <- dimX + + for (j in modes) { + grad_betas[[j]] <- grad_betas[[j]] + + mcrossprod( + R1_i, mlm(eval(`F[..., i]`), betas[-j], modes[-j]), j, + dimB = ifelse(j != modes, dimX, dimF) + ) + } + R2 <- R2 + as.vector(R2_i) + } + + grad_Omegas <- Map(function(j) { + grad <- mlm(kronperm(R2), Map(as.vector, Omegas[-j]), modes[-j], transposed = TRUE) + dim(grad) <- dim(Omegas[[j]]) + grad + }, modes) + + + # update and accumulate alternating loss + accum_sign <- sign(last_loss - loss) - accum_sign + # check if accumulated alternating signs exceed stopping threshold + if (abs(accum_sign) > zig.zag.threashold) { break } + # increment non improving counter if thats the case + if (!(loss < last_loss)) { + non_improving <- non_improving + 1L + } else { + non_improving <- 0L + } + if (non_improving > patience) { break } + + # store current loss for the next iteration + last_loss <- loss + + # Accumulate root mean squared gradiends + grad2_betas <- Map(function(g2, g) 0.9 * g2 + 0.1 * (g * g), + grad2_betas, grad_betas) + grad2_Omegas <- Map(function(g2, g) 0.9 * g2 + 0.1 * (g * g), + grad2_Omegas, grad_Omegas) + + # logging (before parameter update) + logger(iter, loss, betas, Omegas, grad_betas, grad_Omegas) + + # gradualy decrease the step size + step <- if (is.function(step.size)) step.size(iter) else step.size + + # Update Parameters + betas <- Map(function(beta, grad, m2) { + beta + (step / (sqrt(m2) + eps)) * grad + }, betas, grad_betas, grad2_betas) + Omegas <- Map(function(Omega, grad, m2) { + Omega + (step / (sqrt(m2) + eps)) * grad + }, Omegas, grad_Omegas, grad2_Omegas) + } + + list(betas = betas, Omegas = Omegas) +} diff --git a/tensorPredictors/R/gmlm_tensor_normal.R b/tensorPredictors/R/gmlm_tensor_normal.R new file mode 100644 index 0000000..9d0e55c --- /dev/null +++ b/tensorPredictors/R/gmlm_tensor_normal.R @@ -0,0 +1,357 @@ + +# p <- 5 +# A <- matrix(rnorm(p^2), p) + +# mat.proj("TriDiag", p)(A) +# mat.proj("SymTriDiag", p)(A) + +# A +# (AA <- mat.proj("PSD", p)(A)) +# mat.proj("PSD", p)(AA) + + +# p <- 5 +# A <- matrix(rnorm(p^2), p) + +# projection <- function(T2) { +# P <- pinv(T2) %*% T2 +# function(A) { +# matrix(P %*% as.vector(A), nrow(A)) +# } +# } + + +# # All equal diagonal matrix, A = a * I_p +# T2 <- t(as.vector(diag(p))) +# proj <- projection(T2) + +# print.table( diag(mean(diag(A)), nrow(A), ncol(A)) , zero.print = ".") +# print.table( matrix((pinv(T2) %*% T2) %*% as.vector(A), p) , zero.print = ".") +# print.table( proj(A) , zero.print = ".") + + +# # Diagonal matrix, A = diag(a_1, ..., a_p) +# T2 <- matrix(seq_len(p^2), p) +# T2 <- outer(diag(T2), as.vector(T2), `==`) +# storage.mode(T2) <- "double" +# proj <- projection(T2) + +# print.table( T2 , zero.print = ".") + +# print.table( diag(diag(A)) , zero.print = ".") +# print.table( proj(A) , zero.print = ".") + + +# # Tri-Diagonal Matrix +# T2 <- matrix(seq_len(p^2), p) +# T2 <- outer(T2[abs(row(A) - col(A)) <= 1], as.vector(T2), `==`) +# storage.mode(T2) <- "double" + +# triDiag.mask <- (abs(row(A) - col(A)) <= 1) +# storage.mode(triDiag.mask) <- "double" + +# print.table( T2 , zero.print = ".") +# print.table( triDiag.mask , zero.print = ".") + +# print.table( A * triDiag.mask , zero.print = ".") +# print.table( matrix((pinv(T2) %*% T2) %*% as.vector(A), p) , zero.print = ".") + + + +# # All equal main and off diagonals +# T2 <- Reduce(rbind, Map(function(i) { +# as.double(row(A) == i + col(A)) +# }, (1 - p):(p - 1))) + +# print.table( T2 , zero.print = ".") + +# print.table( matrix((pinv(T2) %*% T2) %*% as.vector(A), p) , zero.print = ".") + + +# # Symmetric all equal main and off diagonals +# T2 <- Reduce(rbind, Map(function(i) { +# as.double(abs(row(A) - col(A)) == i) +# }, 0:(p - 1))) + +# print.table( T2 , zero.print = ".") + +# print.table( matrix((pinv(T2) %*% T2) %*% as.vector(A), p) , zero.print = ".") + + +# # Symetric Matrix +# index <- matrix(seq_len(p^2), p) +# T2 <- Reduce(rbind, Map(function(i) { +# e_i <- index == i +# as.double(e_i | t(e_i)) +# }, index[lower.tri(index, diag = TRUE)])) +# proj <- projection(T2) + +# print.table( T2 , zero.print = ".") + +# print.table( 0.5 * (A + t(A)) , zero.print = ".") +# print.table( proj(A) , zero.print = ".") + +# proj(solve(A)) +# solve(proj(A)) + + + + +#' Specialized version of GMLM for the tensor normal model +#' +#' The underlying algorithm is an ``iterative (block) coordinate descent'' method +#' +#' @export +gmlm_tensor_normal <- function(X, F, sample.axis = length(dim(X)), + max.iter = 100L, proj.betas = NULL, proj.Omegas = NULL, logger = NULL, # TODO: proj.betas NOT USED!!! + cond.threshold = 25, eps = 1e-6 +) { + # rearrange `X`, `F` such that the last axis enumerates observations + if (!missing(sample.axis)) { + axis.perm <- c(seq_along(dim(X))[-sample.axis], sample.axis) + X <- aperm(X, axis.perm) + F <- aperm(F, axis.perm) + sample.axis <- length(dim(X)) + } + + # Get problem dimensions (observation dimensions) + dimX <- head(dim(X), -1) + dimF <- head(dim(F), -1) + modes <- seq_along(dimX) + + # Ensure the Omega and beta projections lists are lists + if (!is.list(proj.Omegas)) { + proj.Omegas <- rep(NULL, length(modes)) + } + if (!is.list(proj.betas)) { + proj.betas <- rep(NULL, length(modes)) + } + + + ### Phase 1: Computing initial values + meanX <- rowMeans(X, dims = length(dimX)) + meanF <- rowMeans(F, dims = length(dimF)) + + # center X and F + X <- X - as.vector(meanX) + F <- F - as.vector(meanF) + + # initialize Omega estimates as mode-wise, unconditional covariance estimates + Sigmas <- Map(diag, dimX) + Omegas <- Map(diag, dimX) + + # Per mode covariance directions + # Note: (the directions are transposed!) + dirsX <- Map(function(Sigma) { + SVD <- La.svd(Sigma, nu = 0) + sqrt(SVD$d) * SVD$vt + }, mcov(X, sample.axis, center = FALSE)) + dirsF <- Map(function(Sigma) { + SVD <- La.svd(Sigma, nu = 0) + sqrt(SVD$d) * SVD$vt + }, mcov(F, sample.axis, center = FALSE)) + + # initialization of betas ``covariance direction mappings`` + betas <- Map(function(dX, dF) { + s <- min(ncol(dX), nrow(dF)) + crossprod(dX[1:s, , drop = FALSE], dF[1:s, , drop = FALSE]) + }, dirsX, dirsF) + + # Residuals + R <- X - mlm(F, Map(`%*%`, Sigmas, betas)) + + # Initial value of the log-likelihood (scaled and constants dropped) + loss <- mean(R * mlm(R, Omegas)) - sum(log(mapply(det, Omegas)) / dimX) + + # invoke the logger + if (is.function(logger)) do.call(logger, list( + iter = 0L, betas = betas, Omegas = Omegas, + resid = R, loss = loss + )) + + + ### Phase 2: (Block) Coordinate Descent + for (iter in seq_len(max.iter)) { + + # update every beta (in random order) + for (j in sample.int(length(betas))) { + FxB_j <- mlm(F, betas[-j], modes[-j]) + FxSB_j <- mlm(FxB_j, Sigmas[-j], modes[-j]) + betas[[j]] <- Omegas[[j]] %*% t(solve(mcrossprod(FxSB_j, FxB_j, j), mcrossprod(FxB_j, X, j))) + # Project `betas` onto their manifold + if (is.function(proj_j <- proj.betas[[j]])) { + betas[[j]] <- proj_j(betas[[j]]) + } + } + + # Residuals + R <- X - mlm(F, Map(`%*%`, Sigmas, betas)) + + # Covariance Estimates (moment based, TODO: implement MLE estimate!) + Sigmas <- mcov(R, sample.axis, center = FALSE) + + # Computing `Omega_j`s, the j'th mode presition matrices, in conjunction + # with regularization of the j'th mode covariance estimate `Sigma_j` + for (j in seq_along(Sigmas)) { + # Compute min and max eigen values + min_max <- range(eigen(Sigmas[[j]], TRUE, TRUE)$values) + # The condition is approximately `kappa(Sigmas[[j]]) > cond.threshold` + if (min_max[2] > cond.threshold * min_max[1]) { + Sigmas[[j]] <- Sigmas[[j]] + diag(0.2 * min_max[2], nrow(Sigmas[[j]])) + } + # Compute (unconstraint but regularized) Omega_j as covariance inverse + Omegas[[j]] <- solve(Sigmas[[j]]) + # Project Omega_j to the Omega_j's manifold + if (is.function(proj_j <- proj.Omegas[[j]])) { + Omegas[[j]] <- proj_j(Omegas[[j]]) + # Reverse computation of `Sigma_j` as inverse of `Omega_j` + # Order of projecting Omega_j and then recomputing Sigma_j is importent + Sigmas[[j]] <- solve(Omegas[[j]]) + } + } + + # store last loss and compute new value + loss.last <- loss + loss <- mean(R * mlm(R, Omegas)) - sum(log(mapply(det, Omegas)) / dimX) + + # invoke the logger + if (is.function(logger)) do.call(logger, list( + iter = iter, betas = betas, Omegas = Omegas, resid = R, loss = loss + )) + + # check the break consition + if (abs(loss.last - loss) < eps * loss.last) { + break + } + } + + + list(eta1 = meanX, betas = betas, Omegas = Omegas) +} + + + +# #### DEBUGGING +# library(tensorPredictors) + +# # setup dimensions +# n <- 1e3 +# p <- c(5, 7, 3) +# q <- c(2, 5, 3) + +# max.iter <- 10L + +# # create "true" GLM parameters +# eta1 <- array(rnorm(prod(p)), dim = p) +# betas <- Map(matrix, Map(rnorm, p * q), p) +# Omegas <- Map(function(p_j) { +# solve(0.5^abs(outer(seq_len(p_j), seq_len(p_j), `-`))) +# }, p) +# true.params <- list(eta1 = eta1, betas = betas, Omegas = Omegas) + +# # compute tensor normal parameters from GMLM parameters +# Sigmas <- Map(solve, Omegas) +# mu <- mlm(eta1, Sigmas) + +# # sample some test data +# sample.axis <- length(p) + 1L +# F <- array(rnorm(n * prod(q)), dim = c(q, n)) +# X <- mlm(F, Map(`%*%`, Sigmas, betas)) + rtensornorm(n, mu, Sigmas, sample.axis) + +# # setup a logging callback +# format <- sprintf("iter: %%3d, %s, Omega: %%8.2f, resid: %%8.3f\n", +# paste0("beta.", seq_along(betas), ": %.3f", collapse = ", ") +# ) +# hist <- data.frame( +# iter = seq(0, max.iter), +# dist.B = numeric(max.iter + 1), +# dist.Omega = numeric(max.iter + 1), +# resid = numeric(max.iter + 1) +# ) +# logger <- function(iter, betas, Omegas, resid) { +# dist.B <- dist.kron.norm(true.params$betas, betas) +# dist.Omega <- dist.kron.norm(true.params$Omegas, Omegas) +# resid <- mean(resid^2) + +# hist[iter + 1, -1] <<- c(dist.B = dist.B, dist.Omega = dist.Omega, resid = resid) +# cat(do.call(sprintf, c( +# list(format, iter), +# Map(dist.subspace, betas, true.params$betas), +# dist.Omega, resid +# ))) +# } + +# # run the model +# est.params <- gmlm_tensor_normal(X, F, max.iter = max.iter, logger = logger) + +# # run model with Omegas in sub-manifolds of SPD matrices +# est.params <- gmlm_tensor_normal(X, F, max.iter = max.iter, logger = logger, +# proj.Omegas = list( +# NULL, +# (function(nrow, ncol) { +# triDiag.mask <- (abs(.row(c(nrow, ncol)) - .col(c(nrow, ncol))) <= 1) +# storage.mode(triDiag.mask) <- "double" +# function(A) { A * triDiag.mask } +# })(p[2], p[2]), +# function(A) diag(diag(A)) +# )) + + + +# # # Profile the fitting routine without logging +# # profvis::profvis({ +# # est.params <- gmlm_tensor_normal(X, F, max.iter = max.iter) +# # }) + +# with(hist, { +# plot(range(iter), range(hist[, -1]), type = "n", log = "y") +# lines(iter, resid, col = "red") +# lines(iter, dist.B, col = "green") +# lines(iter, dist.Omega, col = "blue") +# legend("topright", legend = c("resid", "dist.B", "dist.Omega"), col = c("red", "green", "blue"), lty = 1) +# }) + + +# # par(mfrow = c(1, 2)) + +# # matrixImage(Reduce(kronecker, rev(est.params$Omegas))) +# # matrixImage(Reduce(kronecker, rev(true.params$Omegas))) +# # # matrixImage(Reduce(kronecker, rev(est.params$Omegas)) - Reduce(kronecker, rev(true.params$Omegas))) + +# # par(mfrow = c(1, 2)) + +# # matrixImage(Reduce(kronecker, rev(true.params$betas)), main = "True", col = hcl.colors(48, palette = "Blue-Red 3")) +# # matrixImage(Reduce(kronecker, rev(est.params$betas)), main = "Est", col = hcl.colors(48, palette = "Blue-Red 3")) + + +# # # unlist(Map(kappa, mcov(X))) +# # # unlist(Map(kappa, Omegas)) +# # # unlist(Map(kappa, Sigmas)) + + +# # # max(unlist(Map(function(C) max(eigen(C)$values), mcov(X)))) +# # # min(unlist(Map(function(C) min(eigen(C)$values), mcov(X)))) +# # # Map(function(C) eigen(C)$values, mcov(X)) + +# # # kappa(Reduce(kronecker, mcov(X))) + +# # kappa1.fun <- function(Omegas) Map(kappa, Omegas) +# # kappa2.fun <- function(Omegas) Map(kappa, Omegas, exact = TRUE) +# # eigen1.fun <- function(Omegas) { +# # Map(function(Omega) { +# # min_max <- range(eigen(Omega)$values) +# # min_max[2] / min_max[1] +# # }, Omegas) +# # } +# # eigen2.fun <- function(Omegas) { +# # Map(function(Omega) { +# # min_max <- range(eigen(Omega, TRUE, TRUE)$values) +# # min_max[2] / min_max[1] +# # }, Omegas) +# # } +# # microbenchmark::microbenchmark( +# # kappa1.fun(Omegas), +# # kappa2.fun(Omegas), +# # eigen1.fun(Omegas), +# # eigen2.fun(Omegas) +# # ) diff --git a/tensorPredictors/R/ising_m2.R b/tensorPredictors/R/ising_m2.R new file mode 100644 index 0000000..c572cb9 --- /dev/null +++ b/tensorPredictors/R/ising_m2.R @@ -0,0 +1,18 @@ +#' @export +ising_m2 <- function( + params, use_MC = NULL, nr_samples = 10000L, + warmup = 15L, nr_threads = 1L +) { + if (missing(use_MC)) { + use_MC <- if (is.matrix(params)) 19 < nrow(params) else 190 < length(params) + } + + m2 <- .Call("C_ising_m2", params, use_MC, nr_samples, warmup, nr_threads, + PACKAGE = "tensorPredictors" + ) + + M2 <- vech.pinv(m2) + attr(M2, "prob_0") <- attr(m2, "prob_0") + + M2 +} diff --git a/tensorPredictors/R/ising_sample.R b/tensorPredictors/R/ising_sample.R new file mode 100644 index 0000000..1c0bdb1 --- /dev/null +++ b/tensorPredictors/R/ising_sample.R @@ -0,0 +1,11 @@ +#' Sample from the Ising model +#' +#' @param nr number of samples +#' @param params Ising model parameters (numeric vector of size `p (p + 1) / 2` +#' for `p` dimensional random binary vectors) +#' @param warmup Monte-Carlo chain length before retreaving a sample +#' +#' @export +ising_sample <- function(nr, params, warmup = 15L) { + .Call("C_ising_sample", nr, params, warmup, PACKAGE = "tensorPredictors") +} diff --git a/tensorPredictors/R/make_gmlm_family.R b/tensorPredictors/R/make_gmlm_family.R index 4027680..109bd75 100644 --- a/tensorPredictors/R/make_gmlm_family.R +++ b/tensorPredictors/R/make_gmlm_family.R @@ -124,6 +124,30 @@ make.gmlm.family <- function(name) { ) } + # conditional covariance of the sufficient statistic + # Cov(t(X) | Y = y) + # Note: fy is a single observation! + cov.sufficient.stat <- function(fy, eta1, alphas, Omegas) { + Deltas <- Map(solve, Omegas) + E1 <- c(mlm(mlm(fy, alphas) + eta1, Deltas)) + + # H11 = Cov(vec X | Y = y) + H11 <- Reduce(`%x%`, rev(Deltas)) + + # H21 = Cov(vec X %x% vec X, vec X | Y = y) + H21 <- kronecker(E1, H11) + kronecker(H11, E1) + + # H22 = Cov(vec X %x% vec X | Y = y) + H22 <- local({ + e1e1 <- tcrossprod(E1, E1) + h22 <- outer(e1e1 + H11, H11) + outer(H11, e1e1) + aperm(h22, c(1, 3, 2, 4)) + aperm(h22, c(1, 3, 4, 2)) + }) + + # Combine into single covariance matrix + cbind(rbind(H11, H21), rbind(t(H21), mat(H22, 1:2))) + } + # mean conditional Fisher Information fisher.info <- function(Fy, eta1, alphas, Omegas) { # retrieve dimensions diff --git a/tensorPredictors/R/matricize.R b/tensorPredictors/R/matricize.R index 1be504b..da5d7c8 100644 --- a/tensorPredictors/R/matricize.R +++ b/tensorPredictors/R/matricize.R @@ -9,6 +9,15 @@ #' or tensor of dimensions \code{dims} iff \code{inv} is true. #' #' @examples +#' stopifnot(all.equal( +#' mat(1:12, 2, dims = c(2, 3, 2)), +#' matrix(c( +#' 1, 2, 7, 8, +#' 3, 4, 9, 10, +#' 5, 6, 11, 12 +#' ), 3, 4, byrow = TRUE) +#' )) +#' #' A <- array(rnorm(2 * 3 * 5), dim = c(2, 3, 5)) #' stopifnot(exprs = { #' all.equal(A, mat(mat(A, 1), 1, dim(A), TRUE)) @@ -22,15 +31,6 @@ #' all.equal(t(mat(A, 3)), mat(A, c(1, 2))) #' }) #' -#' stopifnot(all.equal( -#' mat(1:12, 2, dims = c(2, 3, 2)), -#' matrix(c( -#' 1, 2, 7, 8, -#' 3, 4, 9, 10, -#' 5, 6, 11, 12 -#' ), 3, 4, byrow = TRUE) -#' )) -#' #' @export mat <- function(T, modes, dims = dim(T), inv = FALSE) { modes <- as.integer(modes) diff --git a/tensorPredictors/R/matrixImage.R b/tensorPredictors/R/matrixImage.R index aeb3d9d..1f53bc0 100644 --- a/tensorPredictors/R/matrixImage.R +++ b/tensorPredictors/R/matrixImage.R @@ -7,6 +7,8 @@ #' @param sub sub-title of the plot #' @param interpolate a logical vector (or scalar) indicating whether to apply #' linear interpolation to the image when drawing. +#' @param new.plot Recreating the plot area (clearing the plot device). can be +#' used to update a plot but _not_ recreate it. Leads to smoother updating. #' @param ... further arguments passed to \code{\link{rasterImage}} #' #' @examples @@ -16,13 +18,15 @@ #' @export matrixImage <- function(A, add.values = FALSE, main = NULL, sub = NULL, interpolate = FALSE, ..., zlim = NA, - axes = TRUE, asp = 1, col = hcl.colors(24, "YlOrRd", rev = FALSE), - digits = getOption("digits") + axes = TRUE, asp = 1, col = hcl.colors(24, "Blue-Red 3", rev = FALSE), + digits = getOption("digits"), new.plot = TRUE ) { # plot raster image - plot(c(0, ncol(A)), c(0, nrow(A)), type = "n", bty = "n", col = "black", - xlab = "", ylab = "", xaxt = "n", yaxt = "n", main = main, sub = sub, - asp = asp) + if (new.plot) { + plot(c(0, ncol(A)), c(0, nrow(A)), type = "n", bty = "n", col = "black", + xlab = "", ylab = "", xaxt = "n", yaxt = "n", main = main, sub = sub, + asp = asp) + } # Scale values of `A` to [0, 1] with min mapped to 1 and max to 0. if (missing(zlim)) { @@ -41,7 +45,7 @@ matrixImage <- function(A, add.values = FALSE, # X/Y axes index (matches coordinates to matrix indices) x <- seq(1, ncol(A), by = 1) y <- seq(1, nrow(A)) - if (axes) { + if (axes && new.plot) { axis(1, at = x - 0.5, labels = x, lwd = 0, lwd.ticks = 1) axis(2, at = y - 0.5, labels = rev(y), lwd = 0, lwd.ticks = 1, las = 1) } diff --git a/tensorPredictors/R/mcov.R b/tensorPredictors/R/mcov.R index 3e37222..e278526 100644 --- a/tensorPredictors/R/mcov.R +++ b/tensorPredictors/R/mcov.R @@ -12,9 +12,13 @@ #' #' @param X multi-dimensional array #' @param sample.axis observation axis index +#' @param center logical, if `TRUE` (the default) compute the centered second +#' moment, that is, the mode-wise covariances. Otherwise, the raw second moment +#' of each mode is computed. This is specifically usefull in case `X` is +#' already centered. #' #' @export -mcov <- function(X, sample.axis = 1L) { +mcov <- function(X, sample.axis = length(dim(X)), center = TRUE) { # observation modes (axis indices) modes <- seq_along(dim(X))[-sample.axis] # observation dimensions @@ -26,16 +30,18 @@ mcov <- function(X, sample.axis = 1L) { if (sample.axis != r + 1L) { X <- aperm(X, c(modes, sample.axis)) } - # centering: Z = X - E[X] - Z <- X - c(rowMeans(X, dims = r)) + # centering: X <- X - E[X] + if (center) { + X <- X - c(rowMeans(X, dims = r)) + } # estimes (unscaled) covariances for each mode - Sigmas <- .mapply(mcrossprod, list(mode = seq_len(r)), MoreArgs = list(Z)) + Sigmas <- .mapply(mcrossprod, list(mode = seq_len(r)), MoreArgs = list(X)) # scale by per mode "sample" size Sigmas <- .mapply(`*`, list(Sigmas, p / prod(dim(X))), NULL) # estimate trace of Kronecker product of covariances - tr.est <- prod(p) * mean(Z^2) + tr.est <- prod(p) * mean(X^2) # as well as the current trace of the unscaled covariances tr.Sigmas <- prod(unlist(.mapply(function(S) sum(diag(S)), list(Sigmas), NULL))) diff --git a/tensorPredictors/R/mcrossprod.R b/tensorPredictors/R/mcrossprod.R index 67282de..2f36622 100644 --- a/tensorPredictors/R/mcrossprod.R +++ b/tensorPredictors/R/mcrossprod.R @@ -44,16 +44,20 @@ #' )) #' #' @export -mcrossprod <- function(A, B, mode) { +mcrossprod <- function(A, B, mode, dimA = dim(A), dimB = dim(B)) { storage.mode(A) <- "double" - if (is.null(dim(A))) { + if (!missing(dimA)) { + dim(A) <- dimA + } else if (is.null(dim(A))) { dim(A) <- length(A) } if (missing(B)) { .Call("C_mcrossprod_sym", A, as.integer(mode)) } else { storage.mode(B) <- "double" - if (is.null(dim(B))) { + if (!missing(dimB)) { + dim(B) <- dimB + } else if (is.null(dim(B))) { dim(B) <- length(B) } .Call("C_mcrossprod", A, B, as.integer(mode)) diff --git a/tensorPredictors/R/mlm.R b/tensorPredictors/R/mlm.R index 1f8cee4..71dcf13 100644 --- a/tensorPredictors/R/mlm.R +++ b/tensorPredictors/R/mlm.R @@ -75,23 +75,63 @@ #' # (X x {A1, A2, A3, A4}) x {B1, B2, B3, B4} = X x {B1 A1, B2 A2, B3 A3, B4 A4} #' all.equal(mlm(mlm(X, As), Bs), mlm(X, Map(`%*%`, Bs, As))) #' +#' # Equivalent to +#' mlm_reference <- function(A, Bs, modes = seq_along(Bs), transposed = FALSE) { +#' # Collect all matrices in `B` +#' Bs <- if (is.matrix(Bs)) list(Bs) else Bs +#' +#' # replicate transposition if of length one only +#' transposed <- if (length(transposed) == 1) { +#' rep(as.logical(transposed), length(Bs)) +#' } else { +#' as.logical(transposed) +#' } +#' +#' # iteratively apply Tensor Times Matrix multiplication over modes +#' for (i in seq_along(modes)) { +#' A <- ttm(A, Bs[[i]], modes[i], transposed[i]) +#' } +#' +#' # return result tensor +#' A +#' } +#' #' @export mlm <- function(A, Bs, modes = seq_along(Bs), transposed = FALSE) { # Collect all matrices in `B` - Bs <- if (is.matrix(Bs)) list(Bs) else Bs + Bs <- if (!is.list(Bs)) list(Bs) else Bs + # ensure all `B`s are matrices + Bs <- Map(as.matrix, Bs) # replicate transposition if of length one only transposed <- if (length(transposed) == 1) { rep(as.logical(transposed), length(Bs)) - } else { + } else if (length(transposed) == length(modes)) { as.logical(transposed) + } else { + stop("Dim missmatch of param. `transposed`") } - # iteratively apply Tensor Times Matrix multiplication over modes - for (i in seq_along(modes)) { - A <- ttm(A, Bs[[i]], modes[i], transposed[i]) - } - - # return result tensor - A + .Call("C_mlm", A, Bs, as.integer(modes), transposed, PACKAGE = "tensorPredictors") } + + +# # general usage +# dimA <- c(3, 17, 19, 2) +# dimC <- c(7, 11, 13, 5) +# A <- array(rnorm(prod(dimA)), dim = dimA) +# trans <- c(TRUE, FALSE, TRUE, FALSE) +# Bs <- Map(function(p, q) matrix(rnorm(p * q), p, q), ifelse(trans, dimA, dimC), ifelse(trans, dimC, dimA)) + +# C <- mlm(A, Bs, transposed = trans) +# mlm(A, Bs[c(3, 2)], modes = c(3, 2), transposed = trans[c(3, 2)]) + +# microbenchmark::microbenchmark( +# mlm(A, Bs, transposed = trans), +# mlm_reference(A, Bs, transposed = trans) +# ) + +# microbenchmark::microbenchmark( +# mlm(A, Bs[c(3, 2)], modes = c(3, 2), transposed = trans[c(3, 2)]), +# mlm_reference(A, Bs[c(3, 2)], modes = c(3, 2), transposed = trans[c(3, 2)]) +# ) diff --git a/tensorPredictors/R/patternMatrices.R b/tensorPredictors/R/patternMatrices.R index 69ca1e1..925b037 100644 --- a/tensorPredictors/R/patternMatrices.R +++ b/tensorPredictors/R/patternMatrices.R @@ -1,14 +1,14 @@ #' Duplication Matrix #' -#' Matrix such that `vec(A) = D vech(A)` for `A` symmetric +#' Matrix `D` such that `vec(A) = D vech(A)` for `A` symmetric #' @examples #' p <- 8 #' A <- matrix(rnorm(p^2), p, p) #' A <- A + t(A) -#' stopifnot(all.equal(c(D(nrow(A)) %*% vech(A)), c(A))) +#' stopifnot(all.equal(c(Dup(nrow(A)) %*% vech(A)), c(A))) #' #' @export -D <- function(p) { +Dup <- function(p) { # setup `vec` and `vech` element indices (zero indexed) vec <- matrix(NA_integer_, p, p) vec[lower.tri(vec, diag = TRUE)] <- seq_len(p * (p + 1) / 2) @@ -27,10 +27,10 @@ D <- function(p) { #' #' @examples #' p <- 5 -#' stopifnot(all.equal(D(p) %*% D.pinv(p), N(p))) +#' stopifnot(all.equal(Dup(p) %*% Dup.pinv(p), N(p))) #' #' @export -D.pinv <- function(p) { +Dup.pinv <- function(p) { Dp <- D(p) solve(crossprod(Dp), t(Dp)) } @@ -119,7 +119,7 @@ K <- function(dim, mode) { #' #' @examples #' p <- 7 -#' stopifnot(all.equal(N(p), D(p) %*% D.pinv(p))) +#' stopifnot(all.equal(N(p), Dup(p) %*% Dup.pinv(p))) #' #' @export N <- function(p) { diff --git a/tensorPredictors/R/ttm.R b/tensorPredictors/R/ttm.R index a2c570d..7b85d6b 100644 --- a/tensorPredictors/R/ttm.R +++ b/tensorPredictors/R/ttm.R @@ -11,6 +11,34 @@ #' \code{mode} dimension equal to \code{nrow(M)} or \code{ncol(M)} if #' \code{transposed} is true. #' +#' @examples +#' for (mode in 1:4) { +#' dimA <- sample.int(10, 4, replace = TRUE) +#' A <- array(rnorm(prod(dimA)), dim = dimA) +#' nrowB <- sample.int(10, 1) +#' B <- matrix(rnorm(nrowB * dimA[mode]), nrowB) +#' +#' C <- ttm(A, B, mode) +#' +#' dimC <- ifelse(seq_along(dims) != mode, dimA, nrowB) +#' C.ref <- mat(B %*% mat(A, mode), mode, dims = dimC, inv = TRUE) +#' +#' stopifnot(all.equal(C, C.ref)) +#' } +#' +#' for (mode in 1:4) { +#' dimA <- sample.int(10, 4, replace = TRUE) +#' A <- array(rnorm(prod(dimA)), dim = dimA) +#' ncolB <- sample.int(10, 1) +#' B <- matrix(rnorm(dimA[mode] * ncolB), dimA[mode]) +#' +#' C <- ttm(A, B, mode, transposed = TRUE) +#' +#' C.ref <- ttm(A, t(B), mode) +#' +#' stopifnot(all.equal(C, C.ref)) +#' } +#' #' @export ttm <- function(T, M, mode = length(dim(T)), transposed = FALSE) { storage.mode(T) <- storage.mode(M) <- "double" diff --git a/tensorPredictors/R/vech.R b/tensorPredictors/R/vech.R index 7899944..37680e6 100644 --- a/tensorPredictors/R/vech.R +++ b/tensorPredictors/R/vech.R @@ -31,3 +31,52 @@ vech.pinv <- function(a) { # de-vectorized matrix matrix(a[vech.pinv.index(p)], p, p) } + + +# vech <- function(A) { +# stopifnot(all(nrow(A) == dim(A))) + +# axis <- seq_len(nrow(A)) +# grid <- expand.grid(c(list(rev(axis)), rep(list(axis), length(dim(A)) - 1))) + +# A[rowSums(grid - 1) < nrow(A)] +# } + +# p <- 4 +# X <- matrix(rnorm(p^2), p) + +# vech(outer(X, X)) + +# vech(outer(c(X), c(X))) + +# sort(unique(c(sym(outer(X, X))))) +# sort(vech(sym(outer(X, X)))) + +# # (I <- matrix(c( +# # 1, 1, 1, +# # 2, 1, 1, +# # 3, 1, 1, +# # 2, 2, 1, +# # 3, 2, 1, +# # 3, 3, 1, +# # 2, 1, 2, +# # 3, 1, 2, +# # 3, 2, 2, +# # 3, 1, 3 +# # ), ncol = 3, byrow = TRUE)) + +# # ((I - 1) %*% nrow(A)^(seq_along(dim(A)) - 1) + 1) + +# # p <- 3 +# # ord <- 4 +# # A <- array(seq_len(p^ord), rep(p, ord)) + +# # axis <- seq_len(nrow(A)) +# # grid <- expand.grid(c(list(rev(axis)), rep(list(axis), length(dim(A)) - 1))) +# # array(rowSums(grid - 1), dim(A)) + +# # A[rowSums(grid - 1) < nrow(A)] + + + +# # apply(indices, 1, function(i) do.call(`[`, c(list(A), as.list(i)))) diff --git a/tensorPredictors/src/Makevars b/tensorPredictors/src/Makevars index 1971f9a..f340afa 100644 --- a/tensorPredictors/src/Makevars +++ b/tensorPredictors/src/Makevars @@ -1,2 +1,3 @@ -PKG_LIBS = $(BLAS_LIBS) $(FLIBS) +PKG_LIBS = $(BLAS_LIBS) $(LAPACK_LIBS) $(FLIBS) +# PKG_CFLAGS = -pg diff --git a/tensorPredictors/src/R_api.h b/tensorPredictors/src/R_api.h new file mode 100644 index 0000000..7896f4b --- /dev/null +++ b/tensorPredictors/src/R_api.h @@ -0,0 +1,138 @@ +#ifndef INCLUDE_GUARD_R_API_H +#define INCLUDE_GUARD_R_API_H + +// The need for `USE_FC_LEN_T` and `FCONE` is due to a Fortran character string +// to C incompatibility. See: Writing R Extentions: 6.6.1 Fortran character strings +#define USE_FC_LEN_T +// Disables remapping of R API functions from `Rf_` or `R_` +#define R_NO_REMAP + +#include // uint32_t, uint64_t, ... + +#include +#include +#include +#include + +#ifndef FCONE +#define FCONE +#endif + +// NULL pointer (not defined memory, array, vector, ...) +#ifndef NULL +#define NULL ((void*)0) +#endif + +// Truth values +#ifndef FALSE +#define FALSE 0 +#endif +#ifndef TRUE +#define TRUE 1 +#endif + +// Convenience convertion function similar to `asInteger`, `asReal`, ... +#define NA_UNSIGNED (-((unsigned long)1)) +static inline unsigned long asUnsigned(SEXP _val) { + int val = Rf_asInteger(_val); + if (val == NA_INTEGER || val < 0) { + return NA_UNSIGNED; + } + return (unsigned long)val; +} + + +// Remap BLAS and LAPACK bindings (being consistent with my own interface and I +// don't like to pass scalars by reference (memory address)) +/** y <- a x + y */ +static inline void axpy( + const int dim, + const double a, + const double* x, const int incx, + double* y, const int incy +) { + F77_CALL(daxpy)(&dim, &a, x, &incx, y, &incy); +} + +/** Scale a 1d array `x <- a x` */ +static inline void scale( + const int dim, + const double a, + double *x, const int incx +) { + F77_CALL(dscal)(&dim, &a, x, &incx); +} + +/** Dot product */ +static inline double dot( + const int dim, + const double* x, const int incx, + const double* y, const int incy +) { + return F77_CALL(ddot)(&dim, x, &incx, y, &incy); +} + +/** 1d array linear combination `z <- a x + b y` */ +// TODO: optimize! (iff needed?, to optimize of at all?) +static inline void lincomb( + const int dim, + const double a, const double* x, const int incx, + const double b, const double* y, const int incy, + double* z, const int incz +) { + for (int i = 0; i < dim; ++i) { + z[i * incz] = a * x[i * incx] + b * y[i * incy]; + } +} + + +/** + * A sufficient Pseudo-Random-Number-Generators (PRNG) of the Xorshift family + * + * With parameterized seed/state this custom PRGN can be used in a thread save! + * + * For single threaded operations the PRNG provided by `R` are prefered. But they + * are _not_ thread save. The following is a simple PRNG usable in a multi-threaded + * application. + * + * See TODO: ...https://en.wikipedia.org/wiki/Xorshift + * SchachHoernchen + */ +static inline uint64_t rot64(uint64_t val, int shift) { + return (val << shift) | (val >> (64 - shift)); +} + +// (internal) PRGN state/seed type +typedef uint64_t rng_seed_t[4]; + +// Hookup the PRNG via its seed to R's random number generation utilities +static inline void init_seed(rng_seed_t seed) { + GetRNGstate(); + for (size_t i = 0; i < 4; ++i) { + seed[i] = 0; + for (size_t j = 0; j < 64; ++j) { + seed[i] |= ((uint64_t)(unif_rand() < 0.5)) << j; + } + } + PutRNGstate(); +} + +// PRNG of the Xorshift family +// The least significant 32 bits are not reliable, use most significant 32 bits +static inline uint64_t rand_u64(rng_seed_t seed) { + uint64_t e = seed[0] - rot64(seed[1], 7); + seed[0] = seed[1] ^ rot64(seed[1], 13); + seed[1] = seed[2] + rot64(seed[3], 37); + seed[2] = seed[3] + e; + seed[3] = e + seed[0]; + return seed[3]; +} + +// With external supplied seed, every thread can have its own seed and as such +// we can use this as a thread save alternative to R's `unif_rand()`. +static inline double unif_rand_thrd(rng_seed_t seed) { + return ((double)(rand_u64(seed) >> 32)) / (double)(-(uint32_t)1); +} + + +#endif /* INCLUDE_GUARD_R_API_H */ diff --git a/tensorPredictors/src/bit_utils.h b/tensorPredictors/src/bit_utils.h new file mode 100644 index 0000000..35f7056 --- /dev/null +++ b/tensorPredictors/src/bit_utils.h @@ -0,0 +1,185 @@ +#ifndef INCLUDE_GUARD_BIT_UTILS_H +#define INCLUDE_GUARD_BIT_UTILS_H + +#include // uint32_t, uint64_t + +#if (defined(__GNUC__) && defined(__BMI2__)) + #include + #include // _pdep_u32 +#endif + +#ifdef _MSC_VER + #include // _BitScanReverse +#endif + +/** + * Computes the parity of a word (0 for even bit count and 1 otherwise) + */ +#ifdef __GNUC__ +static inline int bitParity32(uint32_t x) { return __builtin_parity(x); } +static inline int bitParity64(uint64_t x) { return __builtin_parityll(x); } +#else +static inline int bitParity32(uint32_t x) { + int p = (x != 0); + while (x &= x - 1) { p = !p; } + return p; +} +static inline int bitParity64(uint64_t x) { + int p = (x != 0); + while (x &= x - 1) { p = !p; } + return p; +} +#endif + +/** + * Counts the number of set bits (`1`s in binary) in the number `x` + */ +#ifdef __GNUC__ /* POPulation COUNT */ +static inline int bitCount32(uint32_t x) { return __builtin_popcount(x); } +static inline int bitCount64(uint64_t x) { return __builtin_popcountll(x); } +#else +static inline int bitCount32(uint32_t x) { + int count = 0; // counts set bits + for (; x; count++) { x &= x - 1; } // increment count until there are no bits set in x + return count; +} +static inline int bitCount64(uint64_t x) { + int count = 0; // counts set bits + for (; x; count++) { x &= x - 1; } // increment count until there are no bits set in x + return count; +} +#endif + +/** + * Gets the index of the LSB (least significant bit) + * + * @condition `x != 0`, for `x == 0` undefined behaviour + */ +#ifdef __GNUC__ +static inline int bitScanLS32(uint32_t x) { return __builtin_ctz(x); } // Count Trailing Zeros +static inline int bitScanLS64(uint64_t x) { return __builtin_ctzll(x); } +#elif _MSC_VER +static inline int bitScanLS32(uint32_t x) { + unsigned long bsr; + _BitScanReverse(&bsr, x); + return 31 - bsr; +} +static inline int bitScanLS64(uint64_t x) { + unsigned long bsr; + _BitScanReverse64(&bsr, x); + return 63 - bsr; +} +#else +static inline int bitScanLS32(uint32_t x) { + int ctz = 0; // result storing the Count of Trailing Zeros + bool empty; // boolean variable storing if a bit has not found (search area is empty) + + // logarithmic search for LSB bit index (-1) + ctz += (empty = !(x & (uint32_t)(65535))) << 4; + x >>= 16 * empty; + ctz += (empty = !(x & (uint32_t)( 255))) << 3; + x >>= 8 * empty; + ctz += (empty = !(x & (uint32_t)( 15))) << 2; + x >>= 4 * empty; + ctz += (empty = !(x & (uint32_t)( 3))) << 1; + x >>= 2 * empty; + ctz += (empty = !(x & (uint32_t)( 1))); + + return ctz; +} +static inline int bitScanLS64(uint64_t x) { + int ctz = 0; + bool empty; + + // logarithmic search for LSB bit index (-1) + ctz += (empty = !(x & (uint64_t)(4294967295))) << 5; + x >>= 32 * empty; + ctz += (empty = !(x & (uint64_t)( 65535))) << 4; + x >>= 16 * empty; + ctz += (empty = !(x & (uint64_t)( 255))) << 3; + x >>= 8 * empty; + ctz += (empty = !(x & (uint64_t)( 15))) << 2; + x >>= 4 * empty; + ctz += (empty = !(x & (uint64_t)( 3))) << 1; + x >>= 2 * empty; + ctz += (empty = !(x & (uint64_t)( 1))); + + return ctz; +} +#endif + +/** + * Parallel DEPosit (aka PDEP) + * + * Writes the `val` bits into the positions of the set bits in `mask`. + * + * Example: + * val: **** **** **** 1.1. + * mask: 1... 1... 1... 1... + * res: 1... .... 1... .... + */ +#if (defined(__GNUC__) && defined(__BMI2__)) +static inline uint32_t bitDeposit32(uint32_t val, uint32_t mask) { + return _pdep_u32(val, mask); +} +static inline uint64_t bitDeposit64(uint64_t val, uint64_t mask) { + return _pdep_u64(val, mask); +} +#else +static inline uint32_t bitDeposit32(uint32_t val, uint32_t mask) { + uint32_t res = 0; + for (uint32_t pos = 1; mask; pos <<= 1) { + if (val & pos) { + res |= mask & -mask; + } + mask &= mask - 1; + } + return res; +} +static inline uint64_t bitDeposit64(uint64_t val, uint64_t mask) { + uint64_t res = 0; + for (uint64_t pos = 1; mask; pos <<= 1) { + if (val & pos) { + res |= mask & -mask; + } + mask &= mask - 1; + } + return res; +} +#endif + +/** + * Gets the next lexicographically ordered permutation of an n-bit word. + * + * Let `val` be a bit-word with `n` bits set, then this procedire computes a + * `n` bit word wich is the next element in the lexicographically ordered + * sequence of `n` bit words. For example + * + * val -> bitNextPerm(val) + * 00010011 -> 00010101 + * 00010101 -> 00010110 + * 00010110 -> 00011001 + * 00011001 -> 00011010 + * 00011010 -> 00011100 + * 00011100 -> 00100011 + * + * @condition `x != 0`, for `x == 0` undefined behaviour due to `bitScanLS` + * + * see: https://graphics.stanford.edu/~seander/bithacks.html#NextBitPermutation + */ +static inline uint32_t bitNextPerm32(uint32_t val) { + // Sets all least significant 0-bits of val to 1 + uint32_t t = val | (val - 1); + // Next set to 1 the most significant bit to change, + // set to 0 the least significant ones, and add the necessary 1 bits. + return (t + 1) | (((~t & -~t) - 1) >> (bitScanLS32(val) + 1)); +} +static inline uint64_t bitNextPerm64(uint64_t val) { + // Sets all least significant 0-bits of val to 1 + uint64_t t = val | (val - 1); + // Next set to 1 the most significant bit to change, + // set to 0 the least significant ones, and add the necessary 1 bits. + return (t + 1) | (((~t & -~t) - 1) >> (bitScanLS64(val) + 1)); +} + +#endif /* BIT_UTILS_INCLUDE_GUARD_H */ diff --git a/tensorPredictors/src/init.c b/tensorPredictors/src/init.c index 54fa54e..59f794f 100644 --- a/tensorPredictors/src/init.c +++ b/tensorPredictors/src/init.c @@ -7,27 +7,96 @@ // ); /* Tensor Times Matrix a.k.a. Mode Product */ -extern SEXP ttm(SEXP A, SEXP X, SEXP mode, SEXP op); +extern SEXP R_ttm(SEXP A, SEXP X, SEXP mode, SEXP op); + +/* Multi Linear Multiplication (iterated mode products) */ +extern SEXP R_mlm(SEXP A, SEXP Bs, SEXP modes, SEXP ops); /* Matrix Times Vectorized Kronecker product `A vec(B_1 %x% ... %x% B_r)` */ extern SEXP mtvk(SEXP A, SEXP Bs); /* Tensor Mode Crossproduct `A_(m) B_(m)^T` */ -extern SEXP mcrossprod(SEXP A, SEXP B, SEXP mode); +extern SEXP R_mcrossprod(SEXP A, SEXP B, SEXP mode); /* Symmetric Tensor Mode Crossproduct `A_(m) A_(m)^T` */ -extern SEXP mcrossprod_sym(SEXP A, SEXP mode); +extern SEXP R_mcrossprod_sym(SEXP A, SEXP mode); + +// /* Higher Order PCA */ +// extern SEXP hopca(SEXP X); + +/* Singular Value Decomposition */ +extern SEXP R_svd(SEXP A); + +// /* Iterative Cyclic Updating for the Tensor Normal Distribution */ +// extern SEXP R_icu_tensor_normal(SEXP X, SEXP Fy, SEXP max_iter); + +// /* Generalized tensor normal using NAGD */ +// extern SEXP R_gmlm_tensor_normal( +// SEXP X, SEXP Fy, +// SEXP eta_bar, SEXP alphas, SEXP Omegas, +// SEXP max_iter, SEXP max_line_iter +// ); + +/* Solve linear equation system A X = B */ +extern SEXP R_solve(SEXP A, SEXP B); + +/* Determinant of a matrix */ +extern SEXP R_det(SEXP A); + +// /* Unscaled PMF of the Ising model with natural parameters `_params` */ +// extern SEXP R_unscaled_prob(SEXP _y, SEXP _params); + +// /* Exact computation of the partition function of the Ising model with natural +// parameters `_params` */ +// extern SEXP R_ising_partition_func_exact(SEXP _params); + +// /* Estimated partition function of the Ising model with natural parameters `_params` */ +// extern SEXP R_ising_partition_func_MC(SEXP _params); + +// /* Exact computation of the partition function of the Ising model with natural +// parameters `_params` */ +// extern SEXP R_ising_m2_exact(SEXP _params); + +// // extern SEXP R_ising_m2_MC64(SEXP _params); +// extern SEXP R_ising_m2_MC(SEXP _params, SEXP _nr_samples, SEXP _warmup); +// extern SEXP R_ising_m2_MC_thrd(SEXP _params, SEXP _nr_threads); + +/* Sample from the Ising model */ +extern SEXP R_ising_sample(SEXP, SEXP, SEXP); + +// Interface to the Ising second moment function +extern SEXP R_ising_m2(SEXP, SEXP, SEXP, SEXP, SEXP); /* List of registered routines (a.k.a. C entry points) */ static const R_CallMethodDef CallEntries[] = { - // {"FastPOI_C_sub", (DL_FUNC) &FastPOI_C_sub, 5}, // NOT USED - {"C_ttm", (DL_FUNC) &ttm, 4}, - {"C_mtvk", (DL_FUNC) &mtvk, 2}, - {"C_mcrossprod", (DL_FUNC) &mcrossprod, 3}, - {"C_mcrossprod_sym", (DL_FUNC) &mcrossprod_sym, 2}, + {"C_ttm", (DL_FUNC) &R_ttm, 4}, + {"C_mlm", (DL_FUNC) &R_mlm, 4}, + {"C_mtvk", (DL_FUNC) &mtvk, 2}, + {"C_mcrossprod", (DL_FUNC) &R_mcrossprod, 3}, + {"C_mcrossprod_sym", (DL_FUNC) &R_mcrossprod_sym, 2}, + {"C_svd", (DL_FUNC) &R_svd, 1}, + {"C_solve", (DL_FUNC) &R_solve, 2}, + {"C_det", (DL_FUNC) &R_det, 1}, + {"C_ising_sample", (DL_FUNC) &R_ising_sample, 3}, + {"C_ising_m2", (DL_FUNC) &R_ising_m2, 5}, + // {"C_unscaled_prob", (DL_FUNC) &R_unscaled_prob, 2}, + // {"C_ising_partition_func_MC", (DL_FUNC) &R_ising_partition_func_MC, 1}, + // {"C_ising_partition_func_exact", (DL_FUNC) &R_ising_partition_func_exact, 1}, + // {"C_ising_m2_exact", (DL_FUNC) &R_ising_m2_exact, 1}, + // {"C_ising_m2_MC", (DL_FUNC) &R_ising_m2_MC, 3}, + // {"C_ising_m2_MC_thrd", (DL_FUNC) &R_ising_m2_MC_thrd, 2}, + // {"C_ising_m2_MC64", (DL_FUNC) &R_ising_m2_MC64, 1}, + // {"FastPOI_C_sub", (DL_FUNC) &FastPOI_C_sub, 5}, // NOT USED + // {"C_hopca", (DL_FUNC) &hopca, 1}, + // {"C_icu_tensor_normal", (DL_FUNC) &R_icu_tensor_normal, 3}, // NOT USED / IN DEVELOPMENT + // {"C_gmlm_tensor_normal", (DL_FUNC) &R_gmlm_tensor_normal, 7}, // NOT USED / IN DEVELOPMENT {NULL, NULL, 0} }; -/* Restrict C entry points to registered routines. */ +/** + * Restrict C entry points to registered routines. + * + * NOTE: Naming convention: `R_init_` + */ void R_init_tensorPredictors(DllInfo *dll) { R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); R_useDynamicSymbols(dll, FALSE); diff --git a/tensorPredictors/src/int_utils.h b/tensorPredictors/src/int_utils.h new file mode 100644 index 0000000..68c9c6f --- /dev/null +++ b/tensorPredictors/src/int_utils.h @@ -0,0 +1,77 @@ +#ifndef INCLUDE_GUARD_INT_UTILS_H +#define INCLUDE_GUARD_INT_UTILS_H + +#include // uint32_t, uint64_t + + +// /** +// * Integer logarithm, the biggest power `p` such that `2^p <= x`. +// */ +// static int ilog2(uint64_t x) { +// int log = 0; +// while (x >>= 1) { +// log++; +// } +// return log; +// } + + +/** + * Integer Square root of `y`, that is `ceiling(sqrt(y))` + */ +static uint64_t isqrt(uint64_t x) { + // implements a binary search + uint64_t root = 0; + uint64_t left = 0; // left boundary + uint64_t right = x + 1; // right boundary + + while(left + 1UL < right) { + root = (left + right) / 2UL; + if (root * root <= x) { + left = root; + } else { + right = root; + } + } + return left; +} + + +/** + * Inverse to the triangular numbers + * + * Given a positive number `x = p (p + 1) / 2` it computes `p` if possible. + * In case there is no positive integer solution `0` is returned. + * + * Note: this follows immediately from the quadratic equation. + */ +static uint32_t invTriag(uint32_t x) { + uint64_t root = isqrt(8UL * (uint64_t)x + 1UL); + if (root * root != 8UL * x + 1UL) { + return 0; + } + return (root - 1) / 2; +} + + +// /** +// * Number of sub-sets (including empty set) of max-size +// * +// * It computes, with `binom` beeing the binomial coefficient, the following sum +// * +// * sum_{i = 0}^k binom(n, i) +// */ +// static uint64_t nrSubSets(uint64_t n, uint64_t k) { +// uint64_t sum = 1, binom = 1; + +// for (uint64_t i = 1; i <= k; ++i) { +// binom *= n--; +// binom /= i; +// sum += binom; +// } + +// return sum; +// } + + +#endif /* INCLUDE_GUARD_INT_UTILS_H */ diff --git a/tensorPredictors/src/ising_MCMC.h b/tensorPredictors/src/ising_MCMC.h new file mode 100644 index 0000000..027b18a --- /dev/null +++ b/tensorPredictors/src/ising_MCMC.h @@ -0,0 +1,181 @@ +#ifndef INCLUDE_GUARD_ISING_MCMC_H +#define INCLUDE_GUARD_ISING_MCMC_H + +#include "R_api.h" + +/** Sample a single value from the Ising model via a Monte-Carlo Markov-Chain + * method given the Ising model parameters in compact half-vectorized form. + * + * f(x) = p0(params) exp(vech(x x')' params) + * + * @important requires to be wrapped between `GetRNGstate()` and `PutRNGState()` + * calls. For example + * + * GetRNGstate() + * for (size_t sample = 0; sample < nr_samples; ++sample) { + * ising_mcmc_vech(warmup, dim, dim, params, &X[sample * dim]); + * } + * PutRNGState() + */ +static inline void ising_mcmc_vech( + const size_t warmup, + const size_t dim, const double* params, + int* X +) { + // Initialize elements `X_i ~ Bernoulli(P(X_i = 1 | X_-i = 0))` + for (size_t i = 0, I = 0; i < dim; I += dim - i++) { + double invProb = 1.0 + exp(-params[I]); + X[i] = unif_rand() * invProb < 1.0; + } + + // Skip the first samples of the Markov-Chain (warmup) + for (size_t skip = 0; skip < warmup; ++skip) { + // For every component + for (size_t i = 0; i < dim; ++i) { + // Compute conditional probability `P(X_i = 1 | X_-i = x_-i)` + double log_odds = 0.0; + // Tracks position in half-vectorized storage + size_t J = i; + // Sum all log-odds for `j < i` (two way interactions) + for (size_t j = 0; j < i; J += dim - ++j) { + log_odds += X[j] ? params[J] : 0.0; + } + // Allways add log-odds for `i = j` (self-interaction) + log_odds += params[J]; + // Continue adding log-odds for `j > i` (two way interactions) + J++; + for (size_t j = i + 1; j < dim; ++j, ++J) { + log_odds += X[j] ? params[J] : 0.0; + } + // Update `i`th element `X_i ~ Bernoulli(P(X_i = 1 | X_-i = x_-i))` + X[i] = unif_rand() * (1.0 + exp(-log_odds)) < 1.0; + } + } +} + +// Thread save version of `ising_mcmc` (using thread save PRNG) +static inline void ising_mcmc_vech_thrd( + const size_t warmup, + const size_t dim, const double* params, + int* X, rng_seed_t seed +) { + // Initialize elements `X_i ~ Bernoulli(P(X_i = 1 | X_-i = 0))` + for (size_t i = 0, I = 0; i < dim; I += dim - i++) { + double invProb = 1.0 + exp(-params[I]); + X[i] = unif_rand_thrd(seed) * invProb < 1.0; + } + + // Skip the first samples of the Markov-Chain (warmup) + for (size_t skip = 0; skip < warmup; ++skip) { + // For every component + for (size_t i = 0; i < dim; ++i) { + // Compute conditional probability `P(X_i = 1 | X_-i = x_-i)` + double log_odds = 0.0; + // Tracks position in half-vectorized storage + size_t J = i; + // Sum all log-odds for `j < i` (two way interactions) + for (size_t j = 0; j < i; J += dim - ++j) { + log_odds += X[j] ? params[J] : 0.0; + } + // Allways add log-odds for `i = j` (self-interaction) + log_odds += params[J]; + // Continue adding log-odds for `j > i` (two way interactions) + J++; + for (size_t j = i + 1; j < dim; ++j, ++J) { + log_odds += X[j] ? params[J] : 0.0; + } + // Update `i`th element `X_i ~ Bernoulli(P(X_i = 1 | X_-i = x_-i))` + X[i] = unif_rand() * (1.0 + exp(-log_odds)) < 1.0; + } + } +} + +/** Sample a single value from the Ising model via a Monte-Carlo Markov-Chain + * method given the Ising model parameters in symmetric matrix form `A`. + * + * f(x) = prob_0(A) exp(x' A x) + * + * The relation to the natural parameters `params` as used in the half-vectorized + * version `ising_mcmc_vech()` + * + * f(x) = prob_0(params) exp(vech(x x')' params) + * + * is illustrated via an example with `dim = 5`; + * + * p0 [ p0 p1/2 p2/2 p3/2 p4/2 ] + * p1 p5 [ p1/2 p5 p6/2 p7/2 p8/2 ] + * params = p2 p6 p9 => A = [ p2/2 p6/2 p9 p10/2 p11/2 ] + * p3 p7 p10 p12 [ p3/2 p7/2 p10/2 p12 p13/2 ] + * p4 p8 p11 p13 p14 [ p4/2 p8/2 p11/2 p13/2 p14 ] + * + * or the other way arroung given the symmetric matrix form `A` converted to + * the natural parameters + * + * [ a00 a10 a20 a30 a40 ] a00 + * [ a10 a11 a21 a31 a41 ] 2*a10 a11 + * A = [ a20 a21 a22 a32 a42 ] => params = 2*a20 2*a21 a22 + * [ a30 a31 a32 a33 a43 ] 2*a30 2*a31 2*a32 a33 + * [ a40 a41 a42 a43 a44 ] 2*a40 2*a41 2*a42 2*a43 a44 + * + */ +static inline void ising_mcmc_mat( + const size_t warmup, + const size_t dim, const size_t ldA, const double* A, + int* X +) { + // Initialize elements `X_i ~ Bernoulli(P(X_i = 1 | X_-i = 0))` + for (size_t i = 0; i < dim; ++i) { + X[i] = unif_rand() * (1.0 + exp(-A[i * (ldA + 1)])) < 1.0; + } + + // Skip the first samples of the Markov-Chain (warmup) + for (size_t skip = 0; skip < warmup; ++skip) { + // For every component + for (size_t i = 0; i < dim; ++i) { + // Compute conditional probability `P(X_i = 1 | X_-i = x_-i)` + double log_odds = 0.0; + for (size_t j = 0; j < i; ++j) { + log_odds += (2 * X[j]) * A[i * ldA + j]; + } + log_odds += A[i * (ldA + 1)]; + for (size_t j = i + 1; j < dim; ++j) { + log_odds += (2 * X[j]) * A[i * ldA + j]; + } + // Update `i`th element `X_i ~ Bernoulli(P(X_i = 1 | X_-i = x_-i))` + X[i] = unif_rand() * (1.0 + exp(-log_odds)) < 1.0; + } + } +} + +// thread save version of `ising_mcmc_mat()` (using thread save PRNG) +static inline void ising_mcmc_mat_thrd( + const size_t warmup, + const size_t dim, const size_t ldA, const double* A, + int* X, rng_seed_t seed +) { + // Initialize elements `X_i ~ Bernoulli(P(X_i = 1 | X_-i = 0))` + for (size_t i = 0; i < dim; ++i) { + X[i] = unif_rand_thrd(seed) * (1.0 + exp(-A[i * (ldA + 1)])) < 1.0; + } + + // Skip the first samples of the Markov-Chain (warmup) + for (size_t skip = 0; skip < warmup; ++skip) { + // For every component + for (size_t i = 0; i < dim; ++i) { + // Compute conditional probability `P(X_i = 1 | X_-i = x_-i)` + double log_odds = 0.0; + for (size_t j = 0; j < i; ++j) { + log_odds += (2 * X[j]) * A[i * ldA + j]; + } + log_odds += A[i * (ldA + 1)]; + for (size_t j = i + 1; j < dim; ++j) { + log_odds += (2 * X[j]) * A[i * ldA + j]; + } + // Update `i`th element `X_i ~ Bernoulli(P(X_i = 1 | X_-i = x_-i))` + X[i] = unif_rand_thrd(seed) * (1.0 + exp(-log_odds)) < 1.0; + } + } +} + + +#endif /* INCLUDE_GUARD_ISING_MCMC_H */ diff --git a/tensorPredictors/src/ising_m2.c b/tensorPredictors/src/ising_m2.c new file mode 100644 index 0000000..f75ca18 --- /dev/null +++ b/tensorPredictors/src/ising_m2.c @@ -0,0 +1,401 @@ +/** Methods computing or estimating the second moment of the Ising model given + * the natural parameters of the p.m.f. in exponential family form. That is + * f(x) = p0(params) exp(vech(x x')' params) + * where `params` are the natural parameters. The scaling constant `p0(params)` + * is also the zero event `x = (0, ..., 0)` probability. + * + * Three different version are provided. + * - exact: Exact method computing the true second moment `E[X X']` given the + * parameters `params`. This method has a runtime of `O(2^dim)` where + * `dim` is the dimension of the random variable `X`. This means it is + * infeasible for bigger (like 25 and above) values if `dim` + * - MC: Monte-Carlo method used in case the exact method is not feasible. + * - MC Thrd: Multi-Threaded Monte-Carlo method. + * + * The natural parameters `params` of the Ising model is a `dim (dim + 1) / 2` + * dimensional numeric vector containing the log-odds. The indexing schema into + * the parameters corresponding to the log-odds of single or two way interactions + * with indicex `i, j` are according to the half-vectorization schema + * + * i = 4'th row => Symmetry => Half-Vectorized Storage + * + * [ * * * * * * ] [ * * * * * * ] [ * * * 15 * * ] + * [ * * * * * * ] [ * * * * * * ] [ * * 12 16 * ] + * [ * * * * * * ] [ * * * * * * ] [ * 8 * 17 ] + * [ 3 8 12 15 16 17 ] [ 3 8 12 15 * * ] [ 3 * * ] + * [ * * * * * * ] [ * * * 16 * * ] [ * * ] + * [ * * * * * * ] [ * * * 17 * * ] [ * ] + * + */ +#include "R_api.h" +#include "bit_utils.h" +#include "int_utils.h" +#include "ising_MCMC.h" + +#ifndef __STDC_NO_THREADS__ + #include +#endif + +//////////////////////////////////////////////////////////////////////////////// +// TODO: read // +// https://developer.r-project.org/Blog/public/2019/04/18/common-protect-errors/ +// and +// https://developer.r-project.org/Blog/public/2018/12/10/unprotecting-by-value/index.html +// as well as do the following PROPERLLY +// TODO: make specialized version using the parameters in given sym mat form // +// similar to `ising_sample()` // +//////////////////////////////////////////////////////////////////////////////// + + +// void ising_m2_exact_mat( +// const size_t dim, const size_t ldA, const double* A, +// double* M2 +// ) { +// double sum_0 = 1.0; + +// const uint32_t max_event = (uint32_t)1 << dim; + +// (void)memset(M2, 0, dim * dim * sizeof(double)); + +// for (uint32_t X = 1; X < max_event; ++X) { +// // Evaluate quadratic form `X' A X` using symmetry of `A` +// double XtAX = 0.0; +// for (uint32_t Y = X; Y; Y &= Y - 1) { +// const int i = bitScanLS32(Y); +// XtAX += A[i * (ldA + 1)]; +// for (uint32_t Z = Y & (Y - 1); Z; Z &= Z - 1) { +// XtAX += 2 * A[i * ldA + bitScanLS32(Z)]; +// } +// } + +// const double prob_X = exp(XtAX); +// sum_0 += prob_X; + +// for (uint32_t Y = X; Y; Y &= Y - 1) { +// const int i = bitScanLS32(Y); +// for (uint32_t Z = Y; Z; Z &= Z - 1) { +// M2[i * dim + bitScanLS32(Z)] += prob_X; +// } +// } +// } + +// const double prob_0 = 1.0 / sum_0; +// for (size_t j = 0; j < dim; ++j) { +// for (size_t i = 0; i < j; ++i) { +// M2[j * dim + i] = M2[i * dim + j]; +// } +// for (size_t i = j; i < dim; ++i) { +// M2[i * dim + j] *= prob_0; +// } +// } +// } + +double ising_m2_exact(const size_t dim, const double* params, double* M2) { + // Accumulator of sum of all (unscaled) probabilities + double sum_0 = 1.0; + + // max event (actually upper bound) + const uint32_t max_event = (uint32_t)1 << dim; + + // Length of parameters + const size_t len = dim * (dim + 1) / 2; + + // Initialize M2 to zero + (void)memset(M2, 0, len * sizeof(double)); + + // Iterate all `2^dim` binary vectors `X` + for (uint32_t X = 1; X < max_event; ++X) { + // Dot product `` + double dot_X = 0.0; + // all bits in `X` + for (uint32_t Y = X; Y; Y &= Y - 1) { + const int i = bitScanLS32(Y); + const int I = (i * (2 * dim - 1 - i)) / 2; + // add single and two way interaction log odds + for (uint32_t Z = Y; Z; Z &= Z - 1) { + dot_X += params[I + bitScanLS32(Z)]; + } + } + + // (unscaled) probability of current event `X` + const double prob_X = exp(dot_X); + sum_0 += prob_X; + + // Accumulate set bits probability for the first end second moment `E[X X']` + for (uint32_t Y = X; Y; Y &= Y - 1) { + const int i = bitScanLS32(Y); + const int I = (i * (2 * dim - 1 - i)) / 2; + for (uint32_t Z = Y; Z; Z &= Z - 1) { + M2[I + bitScanLS32(Z)] += prob_X; + } + } + } + + // Finish by scaling with zero event probability (Ising p.m.f. scaling constant) + const double prob_0 = 1.0 / sum_0; + for (size_t i = 0; i < len; ++i) { + M2[i] *= prob_0; + } + + return prob_0; +} + + +// Monte-Carlo method using Gibbs sampling for the second moment +double ising_m2_MC( + /* options */ const size_t nr_samples, const size_t warmup, + /* dimension */ const size_t dim, + /* vectors */ const double* params, double* M2 +) { + + // Length of `params` and `M2` + const size_t len = (dim * (dim + 1)) / 2; + + // Dirty trick (reuse output memory for precise counting) + _Static_assert(sizeof(uint64_t) == sizeof(double), "Dirty trick fails!"); + uint64_t* counts = (uint64_t*)M2; + + // Initialize counts to zero + (void)memset(counts, 0, len * sizeof(uint64_t)); + + // Allocate memory to store Markov-Chain value + int* X = (int*)R_alloc(dim, sizeof(int)); + + // Create/Update R's internal PRGN state + GetRNGstate(); + + // Spawn chains + for (size_t sample = 0; sample < nr_samples; ++sample) { + // Draw random sample `X ~ Ising(params)` + ising_mcmc_vech(warmup, dim, params, X); + + // Accumulate component counts + uint64_t* count = counts; + for (size_t i = 0; i < dim; ++i) { + for (size_t j = i; j < dim; ++j) { + *(count++) += X[i] & X[j]; + } + } + } + + // Write R's internal PRNG state back (if needed) + PutRNGstate(); + + // Compute means from counts + for (size_t i = 0; i < len; ++i) { + M2[i] = (double)counts[i] / (double)(nr_samples); + } + + return -1.0; // TODO: also compute prob_0 +} + + +// in case the compile supports standard C threads +#ifndef __STDC_NO_THREADS__ + +// Worker thread to calling thread communication struct +typedef struct thrd_data { + rng_seed_t seed; // Pseudo-Random-Number-Generator seed/state value + size_t nr_samples; // Nr. of samples this worker handles + size_t warmup; // Monte-Carlo Chain burne-in length + size_t dim; // Random variable dimension + const double* params; // Ising model parameters + int* X; // Working memory to store current binary sample + uint32_t* counts; // (output) count of single and two way interactions + double prob_0; // (output) zero event probability estimate +} thrd_data_t; + +// Worker thread function +int thrd_worker(thrd_data_t* data) { + // Extract data as thread local variables (for convenience) + const size_t dim = data->dim; + int* X = data->X; + + // Initialize counts to zero + (void)memset(data->counts, 0, (dim * (dim + 1) / 2) * sizeof(uint32_t)); + + // Spawn Monte-Carlo Chains (one foe every sample) + for (size_t sample = 0; sample < data->nr_samples; ++sample) { + // Draw random sample `X ~ Ising(params)` + ising_mcmc_vech_thrd(data->warmup, dim, data->params, X, data->seed); + + // Accumulate component counts + uint32_t* count = data->counts; + for (size_t i = 0; i < dim; ++i) { + for (size_t j = i; j < dim; ++j) { + *(count++) += X[i] & X[j]; + } + } + } + + return 0; +} + +double ising_m2_MC_thrd( + /* options */ const size_t nr_samples, const size_t warmup, + const size_t nr_threads, + /* dimension */ const size_t dim, + /* vectors */ const double* params, double* M2 +) { + // Length of `params` and `M2` + const size_t len = (dim * (dim + 1)) / 2; + + // Allocate working and self memory for worker threads + int* Xs = (int*)R_alloc(dim * nr_threads, sizeof(int)); + uint32_t* counts = (uint32_t*)R_alloc(len * nr_threads, sizeof(uint32_t)); + thrd_t* threads = (thrd_t*)R_alloc(nr_threads, sizeof(thrd_data_t)); + thrd_data_t* threads_data = (thrd_data_t*)R_alloc(nr_threads, sizeof(thrd_data_t)); + + // Provide instruction wor worker and dispatch them + for (size_t tid = 0; tid < nr_threads; ++tid) { + // Every thread needs its own PRNG seed! + init_seed(threads_data[tid].seed); + // divide work among workers (more or less) equaly with the first worker + // (tid = 0) having the additional remainder of divided work (`nr_samples`) + threads_data[tid].nr_samples = nr_samples / nr_threads + ( + tid ? 0 : nr_samples % nr_threads + ); + threads_data[tid].warmup = warmup; + threads_data[tid].dim = dim; + threads_data[tid].params = params; + // Every worker gets its disjint working memory + threads_data[tid].X = &Xs[dim * tid]; + threads_data[tid].counts = &counts[len * tid]; + // dispatch the worker + thrd_create(&threads[tid], (thrd_start_t)thrd_worker, (void*)&threads_data[tid]); + } + + // Join (Wait for) all worker + for (size_t tid = 0; tid < nr_threads; ++tid) { + thrd_join(threads[tid], NULL); + } + + // Accumulate worker results into first (tid = 0) worker counts result + for (size_t tid = 1; tid < nr_threads; ++tid) { + for (size_t i = 0; i < len; ++i) { + counts[i] += counts[tid * len + i]; + } + } + + // convert discreat counts into means + for (size_t i = 0; i < len; ++i) { + M2[i] = (double)counts[i] / (double)nr_samples; + } + + return -2.0; // TODO: also compute prob_0 +} + +#endif /* !__STDC_NO_THREADS__ */ + + + + +/* Implementation of `.Call` entry point */ +//////////////////////////////////////////////////////////////////////////////// +// TODO: make specialized version using the parameters in given sym mat form // +// similar to `ising_sample()` // +//////////////////////////////////////////////////////////////////////////////// +extern SEXP R_ising_m2( + SEXP _params, SEXP _use_MC, SEXP _nr_samples, SEXP _warmup, SEXP _nr_threads +) { + // Extract and validate arguments + size_t protect_count = 0; + if (!Rf_isReal(_params)) { + _params = PROTECT(Rf_coerceVector(_params, REALSXP)); + protect_count++; + } + + // Depending on parameters given in symmetric matrix form or natural + // or natrual parameters in the half-vectorized memory layout + size_t dim; + double* params; + if (Rf_isMatrix(_params)) { + if (Rf_nrows(_params) != Rf_ncols(_params)) { + Rf_error("Invalid 'params' value, exected square matrix"); + } + // Convert to natural parameters + dim = Rf_nrows(_params); + params = (double*)R_alloc(dim * (dim + 1) / 2, sizeof(double)); + + double* A = REAL(_params); + for (size_t j = 0, I = 0; j < dim; ++j) { + for (size_t i = j; i < dim; ++i, ++I) { + params[I] = (1 + (i != j)) * A[i + j * dim]; + } + } + } else { + // Get Ising random variable dimension `dim` from "half-vectorized" parameters + // vector. That is, compute `dim` from `length(params) = dim (dim + 1) / 2`. + dim = invTriag(Rf_length(_params)); + if (!dim) { + Rf_error("Expected parameter vector of length `p (p + 1) / 2` where" + " `p` is the dimension of the random variable"); + } + params = REAL(_params); + } + + // Determin the method to use and validate if possible + const int use_MC = Rf_asLogical(_use_MC); + if (use_MC == NA_LOGICAL) { + Rf_error("Invalid 'use_MC' value, expected ether TRUE or FALSE"); + } + + // Allocate result vector + SEXP _M2 = PROTECT(Rf_allocVector(REALSXP, dim * (dim + 1) / 2)); + ++protect_count; + + // asside computed zero event probability (inverse partition function), the + // scaling factor for the Ising model p.m.f. + double prob_0 = -1.0; + + if (use_MC) { + // Convert and validate arguments for the Monte-Carlo methods + const size_t nr_samples = asUnsigned(_nr_samples); + if (nr_samples == 0 || nr_samples == NA_UNSIGNED) { + Rf_error("Invalid 'nr_samples' value, expected pos. integer"); + } + const size_t warmup = asUnsigned(_warmup); + if (warmup == NA_UNSIGNED) { + Rf_error("Invalid 'warmup' value, expected non-negative integer"); + } + const size_t nr_threads = asUnsigned(_nr_threads); + if (nr_threads == 0 || nr_threads > 256) { + Rf_error("Invalid 'nr_thread' value"); + } + + if (nr_threads == 1) { + // Single threaded Monte-Carlo method + prob_0 = ising_m2_MC(nr_samples, warmup, dim, params, REAL(_M2)); + } else { + // Multi-Threaded Monte-Carlo method if provided, otherwise use + // the single threaded version with a warning +#ifdef __STDC_NO_THREADS__ + Rf_warning("Multi-Threading NOT supported, using fallback."); + prob_0 = ising_m2_MC(nr_samples, warmup, dim, params, REAL(_M2)); +#else + prob_0 = ising_m2_MC_thrd( + nr_samples, warmup, nr_threads, + dim, params, REAL(_M2) + ); +#endif + } + } else { + // Exact method (ignore other arguments), only validate dimension + if (25 < dim) { + Rf_error("Dimension '%d' too big for exact method (max 24)", dim); + } + + // and call the exact method + prob_0 = ising_m2_exact(dim, params, REAL(_M2)); + } + + // Set log-lokelihood as an attribute to the computed second moment + SEXP _prob_0 = PROTECT(Rf_ScalarReal(prob_0)); + ++protect_count; + Rf_setAttrib(_M2, Rf_install("prob_0"), _prob_0); + + // release SEPXs to the garbage collector + UNPROTECT(protect_count); + + return _M2; +} diff --git a/tensorPredictors/src/ising_sample.c b/tensorPredictors/src/ising_sample.c new file mode 100644 index 0000000..8246a96 --- /dev/null +++ b/tensorPredictors/src/ising_sample.c @@ -0,0 +1,72 @@ +#ifndef INCLUDE_GUARD_ISING_SAMPLE_H +#define INCLUDE_GUARD_ISING_SAMPLE_H + +#include "R_api.h" +#include "int_utils.h" +#include "ising_MCMC.h" + + +// .Call interface to draw from sample from the Ising model +extern SEXP R_ising_sample(SEXP _nr_samples, SEXP _params, SEXP _warmup) { + // Counts number of protected SEXP's to give them back to the garbage collector + size_t protect_count = 0; + + // Parse and validate arguments + const size_t nr_samples = asUnsigned(_nr_samples); + if (nr_samples == 0 || nr_samples == NA_UNSIGNED) { + Rf_error("Invalid 'nr_samples' value, expected pos. integer"); + } + const size_t warmup = asUnsigned(_warmup); + if (warmup == NA_UNSIGNED) { + Rf_error("Invalid 'warmup' value, expected non-negative integer"); + } + + // Determin parameter mode (natural parameter vector or symmetric matrix) + // Ether `m` for "Matrix" or `v` for "Vector" + const char param_type = Rf_isMatrix(_params) ? 'm' : 'v'; + + // In case of matrix parameters check for square matrix + if (param_type == 'm' && (Rf_nrows(_params) != Rf_ncols(_params))) { + Rf_error("Invalid 'params' value, exected square matrix"); + } + + // Get problem dimension from parameter size + const size_t dim = (param_type == 'm') + ? Rf_nrows(_params) + : invTriag(Rf_length(_params)); + if (!dim) { + Rf_error("Error determining dimension."); + } + + // Ensure parameters are numeric + if (!Rf_isReal(_params)) { + _params = PROTECT(Rf_coerceVector(_params, REALSXP)); + ++protect_count; + } + double* params = REAL(_params); + + // Allocate result sample + SEXP _X = PROTECT(Rf_allocMatrix(INTSXP, dim, nr_samples)); + ++protect_count; + int* X = INTEGER(_X); + + // Call appropriate sampling routine for every sample to generate + GetRNGstate(); + if (param_type == 'm') { + for (size_t sample = 0; sample < nr_samples; ++sample) { + ising_mcmc_mat(warmup, dim, dim, params, &X[sample * dim]); + } + } else { + for (size_t sample = 0; sample < nr_samples; ++sample) { + ising_mcmc_vech(warmup, dim, params, &X[sample * dim]); + } + } + PutRNGstate(); + + // Release protected SEXPs to the garbage collector + UNPROTECT(protect_count); + + return _X; +} + +#endif /* INCLUDE_GUARD_ISING_SAMPLE_H */ diff --git a/tensorPredictors/src/mcrossprod.c b/tensorPredictors/src/mcrossprod.c index 0763c53..fc764af 100644 --- a/tensorPredictors/src/mcrossprod.c +++ b/tensorPredictors/src/mcrossprod.c @@ -8,6 +8,51 @@ #define FCONE #endif +void mcrossprod( + const int rank, + const double* A, const int* dimA, + const double* B, const int* dimB, + const int mode, + double* C +) { + // the strides + // `stride[0] <- prod(dim(A)[seq_len(mode - 1)])` + // `stride[1] <- dim(A)[mode]` + // `stride[2] <- prod(dim(A)[-seq_len(mode)])` + // Note: Middle stride is ignored (to be consistent with sym version) + int stride[3] = {1, 0, 1}; + for (int i = 0; i < rank; ++i) { + int size = dimA[i]; + stride[0] *= (i < mode) ? size : 1; + stride[2] *= (i > mode) ? size : 1; + } + + // employ BLAS dgemm (Double GEneralized Matrix Matrix) operation + // (C = alpha op(A) op(A) + beta C, op is the identity of transposition) + const double zero = 0.0; + const double one = 1.0; + if (mode == 0) { + // mode 1: special case C = A_(1) B_(1)^T + // C = 1 A B^T + 0 C + F77_CALL(dgemm)("N", "T", &dimA[mode], &dimB[mode], &stride[2], + &one, A, &dimA[mode], B, &dimB[mode], + &zero, C, &dimA[mode] FCONE FCONE); + } else { + // Other modes writen as accumulated sum of matrix products + // initialize C to zero + memset(C, 0, dimA[mode] * dimB[mode] * sizeof(double)); + + // Sum over all modes > mode + for (int i2 = 0; i2 < stride[2]; ++i2) { + // C = 1 A^T B + 1 C + F77_CALL(dgemm)("T", "N", &dimA[mode], &dimB[mode], &stride[0], + &one, &A[i2 * stride[0] * dimA[mode]], &stride[0], + &B[i2 * stride[0] * dimB[mode]], &stride[0], + &one, C, &dimA[mode] FCONE FCONE); + } + } +} + /** * Tensor Mode Crossproduct * @@ -21,72 +66,58 @@ * @param B multi-dimensional array * @param m mode index (1-indexed) */ -extern SEXP mcrossprod(SEXP A, SEXP B, SEXP m) { +extern SEXP R_mcrossprod(SEXP A, SEXP B, SEXP m) { // get zero indexed mode int mode = asInteger(m) - 1; - // get dimension attributes - SEXP dimA = getAttrib(A, R_DimSymbol); - SEXP dimB = getAttrib(B, R_DimSymbol); + // Check if both `A` and `B` are real-valued + if (!isReal(A) || !isReal(B)) { + error("Type missmatch, both `A` and `B` must be real-valued"); + } + // get dimension attributes + SEXP dimA_sexp = getAttrib(A, R_DimSymbol); + SEXP dimB_sexp = getAttrib(B, R_DimSymbol); + + // dimension type validation + if (!isInteger(dimA_sexp) || !isInteger(dimB_sexp)) { + error("Dimensions must be integer vectors"); + } + + // validate dimensions + int rank = length(dimA_sexp); + if (rank != length(dimB_sexp)) { + error("Dimension mismatch"); + } // validate mode (0-indexed, must be smaller than the tensor order) - if (mode < 0 || length(dimA) <= mode || length(dimB) <= mode) { + if (mode < 0 || rank <= mode) { error("Illegal mode"); } - // the strides - // `stride[0] <- prod(dim(A)[seq_len(mode - 1)])` - // `stride[1] <- dim(A)[mode]` - // `stride[2] <- prod(dim(A)[-seq_len(mode)])` - // Note: Middle stride is ignored (to be consistent with sym version) - int stride[3] = {1, 0, 1}; - for (int i = 0; i < length(dimA); ++i) { - int size = INTEGER(dimA)[i]; - stride[0] *= (i < mode) ? size : 1; - stride[2] *= (i > mode) ? size : 1; - // check margin matching of `A` and `B` - if (i != mode && size != INTEGER(dimB)[i]) { - error("Dimension missmatch"); - } + // get raw pointers to dimensions + int* dimA = INTEGER(coerceVector(dimA_sexp, INTSXP)); + int* dimB = INTEGER(coerceVector(dimB_sexp, INTSXP)); + + // finaly, check for `A` and `B` dimensions to match + for (int i = 0; i < rank; ++i) { + if (i != mode && dimA[i] != dimB[i]) { + error("Dimension mismatch"); + } } - // Result dimensions - int nrowC = INTEGER(dimA)[mode]; - int ncolC = INTEGER(dimB)[mode]; // create response matrix C - SEXP C = PROTECT(allocMatrix(REALSXP, nrowC, ncolC)); + SEXP C = PROTECT(allocMatrix(REALSXP, dimA[mode], dimB[mode])); - // raw data access pointers - double* a = REAL(A); - double* b = REAL(B); - double* c = REAL(C); + // Call C mode crossprod subroutine + mcrossprod( + rank, // tensor rank of both `A` and `B` + REAL(A), dimA, // mem. addr. of A, dim(A) + REAL(B), dimB, // mem. addr. of B, dim(B) + mode, // the crossproduct mode to compute + REAL(C) // return value memory addr. + ); - // employ BLAS dgemm (Double GEneralized Matrix Matrix) operation - // (C = alpha op(A) op(A) + beta C, op is the identity of transposition) - const double zero = 0.0; - const double one = 1.0; - if (mode == 0) { - // mode 1: special case C = A_(1) B_(1)^T - // C = 1 A B^T + 0 C - F77_CALL(dgemm)("N", "T", &nrowC, &ncolC, &stride[2], - &one, a, &nrowC, b, &ncolC, - &zero, c, &nrowC FCONE FCONE); - } else { - // Other modes writen as accumulated sum of matrix products - // initialize C to zero - memset(c, 0, nrowC * ncolC * sizeof(double)); - - // Sum over all modes > mode - for (int i2 = 0; i2 < stride[2]; ++i2) { - // C = 1 A^T B + 1 C - F77_CALL(dgemm)("T", "N", &nrowC, &ncolC, &stride[0], - &one, &a[i2 * stride[0] * nrowC], &stride[0], - &b[i2 * stride[0] * ncolC], &stride[0], - &one, c, &nrowC FCONE FCONE); - } - } - - // release C to grabage collector + // release C to garbage collector UNPROTECT(1); return C; @@ -104,7 +135,7 @@ extern SEXP mcrossprod(SEXP A, SEXP B, SEXP m) { * @param A multi-dimensional array * @param m mode index (1-indexed) */ -extern SEXP mcrossprod_sym(SEXP A, SEXP m) { +extern SEXP R_mcrossprod_sym(SEXP A, SEXP m) { // get zero indexed mode int mode = asInteger(m) - 1; diff --git a/tensorPredictors/src/mlm.c b/tensorPredictors/src/mlm.c new file mode 100644 index 0000000..c33a2f3 --- /dev/null +++ b/tensorPredictors/src/mlm.c @@ -0,0 +1,160 @@ +#include "R_api.h" +#include "ttm.h" + +int mlm( + /* options */ const int* trans, const int* modes, const int nrhs, + /* dims */ const int* dimA, const int* dimC, const int ord, + /* scalars */ const double alpha, + /* tensor */ const double* A, + /* matrices */ const double** Bs, const int* ldBs, + /* scalar */ const double beta, + /* tensor */ double* C, + double* work_mem +) { + // Compute total size of `A`, `C` and required working memory + int sizeA = 1; // `prod(dim(A))` + int sizeC = 1; // `prod(dim(C))` + int work_size = 2; // `2 * prod(pmax(dim(A), dim(C)))` (`+ ord` NOT included) + for (int i = 0; i < ord; ++i) { + sizeA *= dimA[i]; + sizeC *= dimC[i]; + work_size *= dimA[i] < dimC[i] ? dimC[i] : dimA[i]; + } + + // In requesting working memory size stop here and return size + if (work_mem == NULL) { + return work_size + ord; + } + + // Copy dimensions of A to intermediate temp. dim + int* dimT = (int*)(work_mem + work_size); // `work_mem` is `ord` longer + memcpy(dimT, dimA, ord * sizeof(int)); + + // Setup work memory hooks (two swapable blocks) + double* tmp1 = work_mem; + double* tmp2 = work_mem + (work_size >> 1); + + // Multi Linear Multiplication is an iterated application of TTM + for (int i = 0; i < nrhs; ++i) { + // Get current `B` dimensions (only implicitly given) + int nrowB = dimC[modes[i]]; + int ncolB = dimA[modes[i]]; + if (trans && trans[i]) { + nrowB = dimA[modes[i]]; + ncolB = dimC[modes[i]]; + } + + // Tensor Times Matrix (`modes[i]` mode products) + ttm(trans ? trans[i] : 0, modes[i], dimT, ord, nrowB, ncolB, + 1.0, i ? tmp2 : A, Bs[i], ldBs ? ldBs[i] : dimC[modes[i]], 0.0, + tmp1); + + // Update intermediate temp dim + dimT[modes[i]] = dimC[modes[i]]; + + // Swap tmp1 <-> tmp2 + double* tmp3 = tmp1; tmp1 = tmp2; tmp2 = tmp3; + } + + // dAXPY (a x + y) with `x = tmp2` and `y = beta C` + if (beta != 0.0) { + for (int i = 0; i < sizeC; ++i) { + C[i] *= beta; + } + } else { + memset(C, 0, sizeC * sizeof(double)); + } + axpy(sizeC, alpha, tmp2, 1, C, 1); + + return 0; +} + +/** + * Multi Linear Multiplication (`R` binding) + */ +extern SEXP R_mlm(SEXP A, SEXP Bs, SEXP ms, SEXP ops) { + + // get dimension attribute of A + SEXP dimA = Rf_getAttrib(A, R_DimSymbol); + int ord = Rf_length(dimA); + + // Check if `A` is a real valued tensor + if (!Rf_isReal(A) || Rf_isNull(dimA)) { + Rf_error("Param. `A` need to be a real valued array"); + } + + // Validate that `Bs` is a list + if (!Rf_isNewList(Bs)) { + Rf_error("Param. `Bs` need to be a list of matrices"); + } + if (!Rf_isInteger(ms) || !Rf_isLogical(ops)) { + Rf_error("Param. type missmatch, expected modes as ints and ops logical"); + } + + // Number of `B` matrices (Nr of Right Hand Side objects) + int nrhs = Rf_length(Bs); + + // further parameter validations + if ((nrhs != Rf_length(ms)) || (nrhs != Rf_length(ops))) { + Rf_error("Dimension missmatch (Params length differ)"); + } + + // Get modes and operations for Bs (transposed or not) + int* modes = (int*)R_alloc(nrhs, sizeof(int)); // 0-indexed version of `ms` + int* trans = INTEGER(ops); + + // Compute result dimensions `dim(C)` while extracting `B` data pointers + SEXP dimC = PROTECT(Rf_duplicate(dimA)); + const double** bs = (const double**)R_alloc(nrhs, sizeof(double*)); + for (int i = 0; i < nrhs; ++i) { + // Extract right hand side matrices (`B` matrices from list) + SEXP B = VECTOR_ELT(Bs, i); + if (!(Rf_isMatrix(B) && Rf_isReal(B))) { + UNPROTECT(1); + Rf_error("Param. `Bs` need to be a list of real matrices"); + } + int* dimB = INTEGER(Rf_getAttrib(B, R_DimSymbol)); + bs[i] = REAL(B); + + // Convert 1-indexed modes to be 0-indexed + modes[i] = INTEGER(ms)[i] - 1; + + // Check if mode is out or range (indexing a non-existing mode of `A`) + if (!((0 <= modes[i]) && (modes[i] < ord))) { + UNPROTECT(1); + Rf_error("%d'th mode (%d) out of range", i + 1, modes[i] + 1); + } + + // Check if `i`th mode of `A` matches corresponding `B` dimension + if (INTEGER(dimA)[modes[i]] != dimB[!trans[i]]) { + UNPROTECT(1); + Rf_error("%d'th mode (%d) dimension missmatch", i + 1, modes[i] + 1); + } + + INTEGER(dimC)[modes[i]] = dimB[!!trans[i]]; + } + + // Now, compute `C`s size `prod(dim(C))` + int sizeC = 1; + for (int i = 0; i < ord; ++i) { + sizeC *= INTEGER(dimC)[i]; + } + + // Allocate response `C` + SEXP C = PROTECT(Rf_allocVector(REALSXP, sizeC)); + Rf_setAttrib(C, R_DimSymbol, dimC); + + // allocate working memory size + int work_size = mlm(trans, modes, nrhs, INTEGER(dimA), INTEGER(dimC), ord, + 1.0, REAL(A), bs, NULL, 0.0, REAL(C), NULL); + double* work_memory = (double*)R_alloc(work_size, sizeof(double)); + + // Compute Multi-Linear Multiplication (call subroutine) + (void)mlm(trans, modes, nrhs, INTEGER(dimA), INTEGER(dimC), ord, + 1.0, REAL(A), bs, NULL, 0.0, REAL(C), work_memory); + + // release C to the hands of the garbage collector + UNPROTECT(2); + + return C; +} diff --git a/tensorPredictors/src/mlm.h b/tensorPredictors/src/mlm.h new file mode 100644 index 0000000..9a07c8b --- /dev/null +++ b/tensorPredictors/src/mlm.h @@ -0,0 +1,35 @@ +#ifndef INCLUDE_GUARD_MLM_H +#define INCLUDE_GUARD_MLM_H + +/** + * Multi Linear Multiplication + * + * C = alpha A x_modes[0] op(Bs[0]) ... x_modes[nrhs] op(Bs[nrhs]) + beta C + * + * @param trans boolean vector of length `nrhs` indicating if `i`th RHS matrix + * is to be transposed. That is `op(Bs[i])` is the transposed of `Bs[i]` iff + * `trans[i]` is true, otherwise no-op on `Bs[i]`. Can be `NULL`, then `op` is + * always the identity. + * @param modes integer vector of length `nrhs` specifying the product modes. + * + * @todo TODO: continue doc. !!! + * + * @param alpha scaling factor + * @param beta scaling factor + * @param C output memory addr. + * + * @param work_mem NULL or temporary working memory of size + * `2 * prod(pmax(dim(A), dim(C))) + ord` + */ +int mlm( + /* options */ const int* trans, const int* modes, const int nrhs, + /* dims */ const int* dimA, const int* dimC, const int ord, + /* scalars */ const double alpha, + /* tensor */ const double* A, + /* matrices */ const double** Bs, const int* ldBs, + /* scalar */ const double beta, + /* tensor */ double* C, + double* work_mem +); + +#endif /* INCLUDE_GUARD_MLM_H */ diff --git a/tensorPredictors/src/random.h b/tensorPredictors/src/random.h new file mode 100644 index 0000000..d5aa375 --- /dev/null +++ b/tensorPredictors/src/random.h @@ -0,0 +1,37 @@ +// /** +// * A sufficient Pseudo-Random-Number-Generators (PRNG) of the Xorshift family +// * +// * For single threaded operations the PRNG provided by `R` are prefered. But they +// * are _not_ thread save. The following is a simple PRNG usable in a multi-threaded +// * application. +// * +// * See TODO: ...https://en.wikipedia.org/wiki/Xorshift +// * SchachHoernchen +// */ + +// #ifndef INCLUDE_GUARD_RANDOM_H +// #define INCLUDE_GUARD_RANDOM_H + +// #include // uint32_t, uint64_t + + +// static inline uint64_t rot64(uint64_t val, int shift) { +// return (val << shift) | (val >> (64 - shift)); +// } + +// // PRNG of the Xorshift family +// // @note the least significant 32 bits are not reliable, use most significant 32 bits +// static inline uint64_t rand_u64(uint64_t seed[4]) { +// uint64_t e = seed[0] - rot64(seed[1], 7); +// seed[0] = seed[1] ^ rot64(seed[1], 13); +// seed[1] = seed[2] + rot64(seed[3], 37); +// seed[2] = seed[3] + e; +// seed[3] = e + seed[0]; +// return seed[3]; +// } + +// static inline double unif_rand_u64(uint64_t seed[4]) { +// return ((double)(rand_u64(seed) >> 32)) / (double)(-(uint32_t)1); +// } + +// #endif diff --git a/tensorPredictors/src/svd.c b/tensorPredictors/src/svd.c new file mode 100644 index 0000000..ac73151 --- /dev/null +++ b/tensorPredictors/src/svd.c @@ -0,0 +1,112 @@ +// The need for `USE_FC_LEN_T` and `FCONE` is due to a Fortran character string +// to C incompatibility. See: Writing R Extentions: 6.6.1 Fortran character strings +#define USE_FC_LEN_T +#include +#include +#include +#include +#ifndef FCONE +#define FCONE +#endif + +// Singular Value Decomposition +// @note assumes passed arguments to be "clean", "properly checked" +int svd( + const int p, const int q, + double* A, const int ldA, + double* d, + double* U, const int ldU, + double* Vt, const int ldVt, + double* work_mem, int work_size, int* i_work // if unknown set to 0, -1, 0 +) { + // LAPACK information return variable (in case of an error, LAPACK sets this + // to a non-zero value) + int info = 0; + + // check if work memory is supplied, if _not_ query for appropriate size and + // allocate the memory + if (!work_mem || work_size < 1 || !i_work) { + // create (declare) work memory + i_work = (int*)R_alloc(8 * (p < q ? p : q), sizeof(int)); + double tmp; // variable to store work memory size query result + work_mem = &tmp; // point to `tmp` as it will contain the result of + // the work memory size query + + // request appropriate work memory size + F77_CALL(dgesdd)( + "S", &p, &q, A, &ldA, d, U, &ldU, Vt, &ldVt, + work_mem, &work_size, i_work, &info + FCONE); + + // allocate work memory + work_size = (int)tmp; // "read" work memory size query result + work_mem = (double*)R_alloc(work_size, sizeof(double)); + + // in case of an error, return error code stored in `info` + if (info) { return info; } + } + + // actual SVD computation + F77_CALL(dgesdd)( + "S", &p, &q, A, &ldA, d, U, &ldU, Vt, &ldVt, + work_mem, &work_size, i_work, &info + FCONE); + + return info; +} + +// Singular Valued Decomposition (R binding) +// @note educational purpose, same as `La.svd` +extern SEXP R_svd(SEXP A) { + // Check if we got a real-valued matrix + if (!isMatrix(A) || !isReal(A)) { + error("Require a real-valued matrix"); + } + + // extract matrix dimensions + int* dims = INTEGER(coerceVector(getAttrib(A, R_DimSymbol), INTSXP)); + int p = dims[0]; // = nrow(A) + int q = dims[1]; // = ncol(A) + int m = p < q ? p : q; // = min(p, q) = min(dim(A)) + + // Check if dimensions are not degenerate + if (p < 1 || q < 1) { + error("Expected positive matrix dimensions"); + } + + // create R objects to store the SVD result `A = U diag(d) V^T` + SEXP U = PROTECT(allocMatrix(REALSXP, p, m)); + SEXP d = PROTECT(allocVector(REALSXP, m)); + SEXP Vt = PROTECT(allocMatrix(REALSXP, m, q)); + + // Call C SVD routine + int info = svd( + p, q, // nrow(A), ncol(A) + REAL(A), p, // mem. addr. of A, ldA (leading dimension of A) + REAL(d), // mem. addr. of d + REAL(U), p, // mem. addr. of U, ldU (leading dimension of U) + REAL(Vt), m, // mem. addr. of V^T, ldVt (leading dimension of V^T) + 0, -1, 0 // work mem. pointer, work mem. size and int work mem + ); // set to `0, -1, 0` to indicate "unknown" + + // Check LAPACK info + if (info) { + error("error code %d from LAPACK routine 'dgesdd'", info); + } + + // Create R list containint SVD components + SEXP result = PROTECT(allocVector(VECSXP, 3)); + SEXP names = PROTECT(allocVector(STRSXP, 3)); + SET_VECTOR_ELT(result, 0, d); + SET_VECTOR_ELT(result, 1, U); + SET_VECTOR_ELT(result, 2, Vt); + SET_STRING_ELT(names, 0, mkChar("d")); + SET_STRING_ELT(names, 1, mkChar("u")); + SET_STRING_ELT(names, 2, mkChar("vt")); + setAttrib(result, R_NamesSymbol, names); + + // Release created objects to the garbage collector + UNPROTECT(5); + + return result; +} diff --git a/tensorPredictors/src/ttm.c b/tensorPredictors/src/ttm.c index f913764..1b28c8f 100644 --- a/tensorPredictors/src/ttm.c +++ b/tensorPredictors/src/ttm.c @@ -1,104 +1,48 @@ -// The need for `USE_FC_LEN_T` and `FCONE` is due to a Fortran character string -// to C incompatibility. See: Writing R Extentions: 6.6.1 Fortran character strings -#define USE_FC_LEN_T -// Disables remapping of R API functions from `Rf_` or `R_` -#define R_NO_REMAP -#include -#include -#include -#ifndef FCONE -#define FCONE -#endif +#include "ttm.h" -/** - * Tensor Times Matrix a.k.a. Mode Product - * - * @param A multi-dimensional array - * @param B matrix - * @param m mode index (1-indexed) - * @param op boolean if `B` is transposed - */ -extern SEXP ttm(SEXP A, SEXP B, SEXP m, SEXP op) { +void ttm( + const int transB, const int mode, + const int* dimA, const int ordA, const int nrowB, const int ncolB, + const double alpha, + const double* A, + const double* B, const int ldB, // TODO: ldB is IGNORED!!! + const double beta, + double* C +) { - // get zero indexed mode - const int mode = Rf_asInteger(m) - 1; - - // get dimension attribute of A - SEXP dim = Rf_getAttrib(A, R_DimSymbol); - - // operation on `B` (transposed or not) - const int trans = Rf_asLogical(op); - - // as well as `B`s dimensions - const int nrow = Rf_nrows(B); - const int ncol = Rf_ncols(B); - - // validate mode (mode must be smaller than the nr of dimensions) - if (mode < 0 || Rf_length(dim) <= mode) { - Rf_error("Illegal mode"); - } - - // and check if B is a matrix of non degenetate size - if (!Rf_isMatrix(B)) { - Rf_error("Expected a matrix as second argument"); - } - if (!Rf_nrows(B) || !Rf_ncols(B)) { - Rf_error("Zero dimension detected"); - } - - // check matching of dimensions - if (INTEGER(dim)[mode] != (trans ? nrow : ncol)) { - Rf_error("Dimension missmatch"); - } - - // calc nr of response elements `prod(dim(A)[-mode]) * ncol(A)` (size of C), - int sizeC = 1; - // and the strides + // Strides are the "leading" and "trailing" dimensions of the matricized + // tensor `A` in the following matrix-matrix multiplications // `stride[0] <- prod(dim(A)[seq_len(mode - 1)])` // `stride[1] <- dim(A)[mode]` // `stride[2] <- prod(dim(A)[-seq_len(mode)])` - int stride[3] = {1, INTEGER(dim)[mode], 1}; - for (int i = 0; i < Rf_length(dim); ++i) { - int size = INTEGER(dim)[i]; - // check for non-degenetate dimensions - if (!size) { - Rf_error("Zero dimension detected"); - } - sizeC *= (i == mode) ? (trans ? ncol : nrow) : size; - stride[0] *= (i < mode) ? size : 1; - stride[2] *= (i > mode) ? size : 1; + int stride[3] = {1, dimA[mode], 1}; + for (int i = 0; i < ordA; ++i) { + stride[0] *= (i < mode) ? dimA[i] : 1; + stride[2] *= (i > mode) ? dimA[i] : 1; } - // create response object C - SEXP C = PROTECT(Rf_allocVector(REALSXP, sizeC)); - - // raw data access pointers - double* a = REAL(A); - double* b = REAL(B); - double* c = REAL(C); - - // Tensor Times Matrix / Mode Product - const double zero = 0.0; - const double one = 1.0; if (mode == 0) { - // mode 1: (A x_1 op(B))_(1) = op(B) A_(1) as a single Matrix-Matrix - // multiplication - F77_CALL(dgemm)(trans ? "T" : "N", "N", - (trans ? &ncol : &nrow), &stride[2], &stride[1], &one, - b, &nrow, a, &stride[1], - &zero, c, (trans ? &ncol : &nrow) + // mode 1: C = alpha (A x_1 op(B))_(1) + beta C + // = alpha op(B) A_(1) + beta C + // as a single Matrix-Matrix multiplication + F77_CALL(dgemm)(transB ? "T" : "N", "N", + (transB ? &ncolB : &nrowB), &stride[2], &stride[1], &alpha, + B, &nrowB, A, &stride[1], + &beta, C, (transB ? &ncolB : &nrowB) FCONE FCONE); } else { // Other modes can be written as blocks of matrix multiplications - // (A x_m op(B))_(m)' = A_(m)' op(B)' + // C_:,:,i2 = alpha (A x_m op(B))_(m)' + beta C_:,:,i2 + // = alpha A_(m)' op(B)' + beta C_:,:,i2 for (int i2 = 0; i2 < stride[2]; ++i2) { - F77_CALL(dgemm)("N", trans ? "N" : "T", - &stride[0], (trans ? &ncol : &nrow), &stride[1], &one, - &a[i2 * stride[0] * stride[1]], &stride[0], b, &nrow, - &zero, &c[i2 * stride[0] * (trans ? ncol : nrow)], &stride[0] + F77_CALL(dgemm)("N", transB ? "N" : "T", + &stride[0], (transB ? &ncolB : &nrowB), &stride[1], &alpha, + &A[i2 * stride[0] * stride[1]], &stride[0], B, &nrowB, + &beta, &C[i2 * stride[0] * (transB ? ncolB : nrowB)], &stride[0] FCONE FCONE); } } + /* // (reference implementation) // Tensor Times Matrix / Mode Product for `op(B) == B` @@ -114,13 +58,76 @@ extern SEXP ttm(SEXP A, SEXP B, SEXP m, SEXP op) { } } */ +} + + +/** + * Tensor Times Matrix a.k.a. Mode Product + * + * @param A multi-dimensional array + * @param B matrix + * @param m mode index (1-indexed) + * @param op boolean if `B` is transposed + */ +extern SEXP R_ttm(SEXP A, SEXP B, SEXP m, SEXP op) { + + // get zero indexed mode + const int mode = Rf_asInteger(m) - 1; + + // get dimension attribute of A + SEXP dimA = Rf_getAttrib(A, R_DimSymbol); + + // operation on `B` (transposed or not) + const int transB = Rf_asLogical(op); + + // as well as `B`s dimensions + const int nrowB = Rf_nrows(B); + const int ncolB = Rf_ncols(B); + + // validate mode (mode must be smaller than the nr of dimensions) + if (mode < 0 || Rf_length(dimA) <= mode) { + Rf_error("Illegal mode"); + } + + // and check if B is a matrix of non degenetate size + if (!Rf_isMatrix(B)) { + Rf_error("Expected a matrix as second argument"); + } + if (!Rf_nrows(B) || !Rf_ncols(B)) { + Rf_error("Zero dimension detected"); + } + + // check matching of dimensions + if (INTEGER(dimA)[mode] != (transB ? nrowB : ncolB)) { + Rf_error("Dimension missmatch"); + } + + // calc nr of response elements (size of C) + // `prod(dim(C)) = prod(dim(A)[-mode]) * nrow(if(transB) t(B) else B)` + int sizeC = 1; + for (int i = 0; i < Rf_length(dimA); ++i) { + int size = INTEGER(dimA)[i]; + // check for non-degenetate dimensions + if (!size) { + Rf_error("Zero dimension detected"); + } + sizeC *= (i == mode) ? (transB ? ncolB : nrowB) : size; + } + + // create response object C + SEXP C = PROTECT(Rf_allocVector(REALSXP, sizeC)); + + // Tensor Times Matrix / Mode Product + ttm(transB, mode, + INTEGER(dimA), Rf_length(dimA), nrowB, ncolB, + 1.0, REAL(A), REAL(B), nrowB, 0.0, REAL(C)); // finally, set result dimensions - SEXP newdim = PROTECT(Rf_allocVector(INTSXP, Rf_length(dim))); - for (int i = 0; i < Rf_length(dim); ++i) { - INTEGER(newdim)[i] = (i == mode) ? (trans ? ncol : nrow) : INTEGER(dim)[i]; + SEXP dimC = PROTECT(Rf_allocVector(INTSXP, Rf_length(dimA))); + for (int i = 0; i < Rf_length(dimA); ++i) { + INTEGER(dimC)[i] = (i == mode) ? (transB ? ncolB : nrowB) : INTEGER(dimA)[i]; } - Rf_setAttrib(C, R_DimSymbol, newdim); + Rf_setAttrib(C, R_DimSymbol, dimC); // release C to the hands of the garbage collector UNPROTECT(2); diff --git a/tensorPredictors/src/ttm.h b/tensorPredictors/src/ttm.h new file mode 100644 index 0000000..93fd74e --- /dev/null +++ b/tensorPredictors/src/ttm.h @@ -0,0 +1,21 @@ +#ifndef INCLUDE_GUARD_TTM_H +#define INCLUDE_GUARD_TTM_H + +#include "R_api.h" + +/** + * Tensor Times Matrix Subroutine + * + * @attention Assumes all parameters to be correct! + */ +void ttm( + /* options */ const int transB, const int mode, + /* dims */ const int* dimA, const int ordA, const int nrowB, const int ncolB, + /* scalar */ const double alpha, + /* tensor */ const double* A, + /* matrix */ const double* B, const int ldB, + /* scalar */ const double beta, + /* tensor */ double* C +); + +#endif /* INCLUDE_GUARD_TTM_H */