This commit is contained in:
Daniel Kapla 2023-11-14 14:35:43 +01:00
parent 40132c2565
commit 90cd46e209
48 changed files with 2852 additions and 1353 deletions

View File

@ -90,10 +90,11 @@
%%% Custom operators with ether one or two arguments (limits)
\makeatletter
%%% Multi-Linear Multiplication
% $\mlm_{k \in [r]}$ or $\mlm_{k = 1}^{r}$ (lower limit MUST be the first!)
% Save first argument as \arg@one
\def\mlm#1{\def\arg@one{#1}\futurelet\next\mlm@i}
\def\mlm_#1{\def\arg@one{#1}\futurelet\next\mlm@i}
% Check for second argument
\def\mlm@i{\ifx\next\bgroup\expandafter\mlm@two\else\expandafter\mlm@one\fi}
\def\mlm@i{\ifx\next^\expandafter\mlm@two\else\expandafter\mlm@one\fi}
% specialization for one or two arguments, both versions use saved first argument
\def\mlm@one{\mathchoice%
{\operatorname*{\scalerel*{\times}{\bigotimes}}_{\makebox[0pt][c]{$\scriptstyle \arg@one$}}}%
@ -101,14 +102,31 @@
{\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}}%
{\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}}%
}
% this commands single argument is the second argument of \mlm
\def\mlm@two#1{\mathchoice%
% this commands single argument is the second argument of \mlm, it gobbles the `^`
\def\mlm@two^#1{\mathchoice%
{\operatorname*{\scalerel*{\times}{\bigotimes}}_{\makebox[0pt][c]{$\scriptstyle \arg@one$}}^{\makebox[0pt][c]{$\scriptstyle #1$}}}%
{\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}^{#1}}%
{\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}^{#1}}%
{\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}^{#1}}%
}
%%% Big Circle (Iterated Outer Product)
\def\outer#1{\def\arg@one{#1}\futurelet\next\outer@i}
\def\outer@i{\ifx\next\bgroup\expandafter\outer@two\else\expandafter\outer@one\fi}
\def\outer@one{\mathchoice%
{\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\makebox[0pt][c]{$\scriptstyle \arg@one$}}}%
{\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}}%
{\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}}%
{\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}}%
}
\def\outer@two#1{\mathchoice%
{\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\makebox[0pt][c]{$\scriptstyle \arg@one$}}^{\makebox[0pt][c]{$\scriptstyle #1$}}}%
{\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}^{#1}}%
{\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}^{#1}}%
{\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}^{#1}}%
}
%%% Big Kronecker Product (with overflowing limits)
% Save first argument as \arg@one
\def\bigkron#1{\def\arg@one{#1}\futurelet\next\bigkron@i}
@ -134,17 +152,15 @@
\newcommand{\algorithmicbreak}{\textbf{break}}
\newcommand{\Break}{\State \algorithmicbreak}
\begin{document}
\maketitle
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{abstract}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
We consider regression and classification for \textit{general} response and tensor-valued predictors (multi dimensional arrays) and propose a \textit{novel formulation} for sufficient dimension reduction. Assuming the distribution of the tensor-valued predictors given the response is in the quadratic exponential family, we model the natural parameter as a multi-linear function of the respons.
This allows per-axis reductions that drastically reduce the total number of parameters for higher order tensor-valued predictors. We derive maximum likelihood estimates for the sufficient dimension reduction and a computationally efficient estimation algorithm which leveraes the tensor structure. The performance of the method is illustrated via simulations and real world examples are provided.
We consider regression and classification for \textit{general} response and tensor-valued predictors (multi dimensional arrays) and propose a \textit{novel formulation} for sufficient dimension reduction. Assuming the distribution of the tensor-valued predictors given the response is in the quadratic exponential family, we model the natural parameter as a multi-linear function of the response.
This allows per-axis reductions that drastically reduce the total number of parameters for higher order tensor-valued predictors. We derive maximum likelihood estimates for the sufficient dimension reduction and a computationally efficient estimation algorithm which leverages the tensor structure. The performance of the method is illustrated via simulations and real world examples are provided.
\end{abstract}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@ -256,23 +272,23 @@ A straight forward idea for parameter estimation is to use Gradient Descent. For
\begin{algorithm}[ht]
\caption{\label{alg:NAGD}Nesterov Accelerated Gradient Descent}
\begin{algorithmic}[1]
\State Objective: $l(\Theta \mid \ten{X}, \ten{F}_y)$
\State Objective: $l(\mat{\theta} \mid \ten{X}, \ten{F}_y)$
\State Arguments: Order $r + 1$ tensors $\ten{X}$, $\ten{F}$
\State Initialize: Parameters $\Theta^{(0)}$, $0 < c, \delta^{(1)}$ and $0 < \gamma < 1$
\State Initialize: Parameters $\mat{\theta}^{(0)}$, $0 < c, \delta^{(1)}$ and $0 < \gamma < 1$
\\
\State $t \leftarrow 1$
\Comment{step counter}
\State $\mat{\Theta}^{(1)} \leftarrow \mat{\Theta}^{(0)}$
\State $\mat{\theta}^{(1)} \leftarrow \mat{\theta}^{(0)}$
\Comment{artificial first step}
\State $(m^{(0)}, m^{(1)}) \leftarrow (0, 1)$
\Comment{momentum extrapolation weights}
\\
\Repeat \Comment{repeat untill convergence}
\State $\mat{M} \leftarrow \mat{\Theta}^{(t)} + \frac{m^{(t - 1)} - 1}{m^{(t)}}(\mat{\Theta}^{(t)} - \mat{\Theta}^{(t - 1)})$ \Comment{momentum extrapolation}
\State $\mat{M} \leftarrow \mat{\theta}^{(t)} + \frac{m^{(t - 1)} - 1}{m^{(t)}}(\mat{\theta}^{(t)} - \mat{\theta}^{(t - 1)})$ \Comment{momentum extrapolation}
\For{$\delta = \gamma^{-1}\delta^{(t)}, \delta^{(t)}, \gamma\delta^{(t)}, \gamma^2\delta^{(t)}, ...$} \Comment{Line Search}
\State $\mat{\Theta}_{\text{temp}} \leftarrow \mat{M} + \delta \nabla_{\mat{\Theta}} l(\mat{M})$
\If{$l(\mat{\Theta}_{\text{temp}}) \leq l(\mat{\Theta}^{(t - 1)}) - c \delta \|\nabla_{\mat{\Theta}} l(\mat{M})\|_F^2$} \Comment{Armijo Condition}
\State $\mat{\Theta}^{(t + 1)} \leftarrow \mat{\Theta}_{\text{temp}}$
\State $\mat{\theta}_{\text{temp}} \leftarrow \mat{M} + \delta \nabla_{\mat{\theta}} l(\mat{M})$
\If{$l(\mat{\theta}_{\text{temp}}) \leq l(\mat{\theta}^{(t - 1)}) - c \delta \|\nabla_{\mat{\theta}} l(\mat{M})\|_F^2$} \Comment{Armijo Condition}
\State $\mat{\theta}^{(t + 1)} \leftarrow \mat{\theta}_{\text{temp}}$
\State $\delta^{(t + 1)} \leftarrow \delta$
\Break
\EndIf
@ -399,7 +415,7 @@ $\ten{X}$ is a $2\times 3\times 5$ tensor, $y\in\{1, 2, ..., 6\}$ uniformly dist
\begin{figure}[!ht]
\centering
\includegraphics[width = \textwidth]{sim-normal-20221012.png}
\includegraphics[width = \textwidth]{images/sim-normal-20221012.png}
\caption{\label{fig:sim-normal}Simulation Normal}
\end{figure}
@ -407,7 +423,7 @@ $\ten{X}$ is a $2\times 3\times 5$ tensor, $y\in\{1, 2, ..., 6\}$ uniformly dist
\begin{figure}[!ht]
\centering
\includegraphics[width = \textwidth]{sim-ising-small-20221012.png}
\includegraphics[width = \textwidth]{images/sim-ising-small-20221012.png}
\caption{\label{fig:sim-ising-small}Simulation Ising Small}
\end{figure}
@ -433,7 +449,7 @@ where each individual block is given by
For example $\mathcal{J}_{1,2} = -\frac{\partial l(\Theta)}{\partial\t{(\vec{\overline{\ten{\eta}}_1})}\partial(\vec{\mat{\alpha}_1})}$ and $\mathcal{J}_{2r + 1, 2r + 1} = -\H l(\mat{\Omega}_r)$.
We start by restating the log-likelihood for a given single observation $(\ten{X}, \ten{Y})$ where $\ten{F}_y$ given by
\begin{displaymath}
l(\mat{\Theta}) = \log h(\ten{X}) + c_1\big\langle\overline{\ten{\eta}}_1 + \ten{F}_{y}\mlm{k\in[r]}\mat{\alpha}_k, \ten{X}\big\rangle + c_2\big\langle\ten{X}\mlm{k\in[r]}\mat{\Omega}_k, \ten{X}\big\rangle - b(\mat{\eta}_{y})
l(\mat{\Theta}) = \log h(\ten{X}) + c_1\big\langle\overline{\ten{\eta}}_1 + \ten{F}_{y}\mlm_{k\in[r]}\mat{\alpha}_k, \ten{X}\big\rangle + c_2\big\langle\ten{X}\mlm_{k\in[r]}\mat{\Omega}_k, \ten{X}\big\rangle - b(\mat{\eta}_{y})
\end{displaymath}
with
\begin{align*}
@ -496,10 +512,10 @@ Now we rewrite all the above differentials to extract the derivatives one at a t
%
\d l(\mat{\Omega}_j) &= c_2\Big(\langle\ten{X}\times_{k\in[r]\backslash j}\mat{\Omega}_k\times_j\d\mat{\Omega}_j, \ten{X}\rangle - \D b(\mat{\eta}_{y,2})\vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big)\Big) \\
&= c_2 \t{(\vec{\ten{X}}\otimes\vec{\ten{X}} - (\ten{D}_2)_{([2r])})}\vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big) \\
&= c_2 (\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})} \\
&= c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\vec{\d\mat{\Omega}_j} \\
&= c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\mat{D}_{p_j}\t{\mat{D}_{p_j}}\vec{\d\mat{\Omega}_j} \\
&\qquad\Rightarrow \D l(\mat{\Omega}_j) = c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\mat{D}_{p_j}\t{\mat{D}_{p_j}}
&= c_2 (\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})} \\
&= c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\vec{\d\mat{\Omega}_j} \\
&= c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\mat{D}_{p_j}\t{\mat{D}_{p_j}}\vec{\d\mat{\Omega}_j} \\
&\qquad\Rightarrow \D l(\mat{\Omega}_j) = c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\mat{D}_{p_j}\t{\mat{D}_{p_j}}
\end{align*}}%
The next step is to identify the Hessians from the second differentials in a similar manner as befor.
{\allowdisplaybreaks\begin{align*}
@ -509,62 +525,62 @@ The next step is to identify the Hessians from the second differentials in a sim
\qquad{\color{gray} (p \times p)}
\\
&\d^2 l(\overline{\ten{\eta}}_1, \mat{\alpha}_j) \\
&= -c_1^2 \t{\vec(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j)}\mat{H}_{1,1}\vec{\d\overline{\ten{\eta}}_1} \\
&= -c_1^2 \t{\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)})}\mat{K}_{p,(j)}\mat{H}_{1,1}\vec{\d\overline{\ten{\eta}}_1} \\
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})}((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})(\ten{H}_{1,1})_{((j, [r]\backslash j))}\vec{\d\overline{\ten{\eta}}_1} \\
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} ( (\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k) \ttt_{[r]\backslash j} \ten{H}_{1,1})_{((2, 1))} \vec{\d\overline{\ten{\eta}}_1} \\
&\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\alpha}_j})\t{\partial(\vec{\overline{\ten{\eta}}_1)}}} = -c_1^2 ( (\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k) \ttt_{[r]\backslash j} \ten{H}_{1,1})_{((2, 1))}
&= -c_1^2 \t{\vec(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j)}\mat{H}_{1,1}\vec{\d\overline{\ten{\eta}}_1} \\
&= -c_1^2 \t{\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)})}\mat{K}_{p,(j)}\mat{H}_{1,1}\vec{\d\overline{\ten{\eta}}_1} \\
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})}((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})(\ten{H}_{1,1})_{((j, [r]\backslash j))}\vec{\d\overline{\ten{\eta}}_1} \\
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} ( (\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k) \ttt_{[r]\backslash j} \ten{H}_{1,1})_{((2, 1))} \vec{\d\overline{\ten{\eta}}_1} \\
&\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\alpha}_j})\t{\partial(\vec{\overline{\ten{\eta}}_1)}}} = -c_1^2 ( (\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k) \ttt_{[r]\backslash j} \ten{H}_{1,1})_{((2, 1))}
\qquad{\color{gray} (p_j q_j \times p)}
\\
&\d^2 l(\overline{\ten{\eta}}_1, \mat{\Omega}_j) \\
&= -c_1 c_2 \t{\vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big)}\mat{H}_{2,1}\vec{\d\overline{\ten{\eta}}_1} \\
&= -c_1 c_2 \t{\Big[ \t{(\ten{H}_{2,1})_{([2r])}} \vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big) \Big]} \vec{\d\overline{\ten{\eta}}_1} \\
&= -c_1 c_2 \t{\vec( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})} )} \vec{\d\overline{\ten{\eta}}_1} \\
&= -c_1 c_2 \t{(\vec{\d\mat{\Omega}_j})} ( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} )_{(j)} \vec{\d\overline{\ten{\eta}}_1} \\
&\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\Omega}_j})\t{\partial(\vec{\overline{\ten{\eta}}_1)}}} = -c_1 c_2 \mat{D}_{p_j}\t{\mat{D}_{p_j}}( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} )_{(j)}
&= -c_1 c_2 \t{\vec( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})} )} \vec{\d\overline{\ten{\eta}}_1} \\
&= -c_1 c_2 \t{(\vec{\d\mat{\Omega}_j})} ( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} )_{(j)} \vec{\d\overline{\ten{\eta}}_1} \\
&\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\Omega}_j})\t{\partial(\vec{\overline{\ten{\eta}}_1)}}} = -c_1 c_2 \mat{D}_{p_j}\t{\mat{D}_{p_j}}( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} )_{(j)}
\qquad{\color{gray} (p_j^2 \times p)}
\\
&\d^2 l(\mat{\alpha}_j) \\
&= -c_1^2 \t{\vec(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j)}\mat{H}_{1,1}\vec(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j) \\
&= -c_1^2 \t{\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)})}\mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(j)}}\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}) \\
&= -c_1^2 \t{[((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j}]}\mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(j)}}((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j} \\
&= -c_1^2 \t{[((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j}]}(\ten{H}_{1,1})_{((j,[r]\backslash j),(j,[r]\backslash j))}((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j} \\
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})}[ ((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)\ttt_{[r]\backslash j}\ten{H}_{1,1})\ttt_{[r]\backslash j + 2,[r]\backslash j}(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k) ]_{((2,1))}\vec{\d\mat{\alpha}_j} \\
&\qquad\Rightarrow \H l(\mat{\alpha}_j) = -c_1^2 \Big[ \left(\Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big)\ttt_{[r]\backslash j}\ten{H}_{1,1}\right)\ttt_{[r]\backslash j + 2}^{[r]\backslash j}\Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \Big]_{((2,1))}
&= -c_1^2 \t{\vec(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j)}\mat{H}_{1,1}\vec(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j) \\
&= -c_1^2 \t{\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)})}\mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(j)}}\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}) \\
&= -c_1^2 \t{[((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j}]}\mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(j)}}((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j} \\
&= -c_1^2 \t{[((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j}]}(\ten{H}_{1,1})_{((j,[r]\backslash j),(j,[r]\backslash j))}((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j} \\
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})}[ ((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)\ttt_{[r]\backslash j}\ten{H}_{1,1})\ttt_{[r]\backslash j + 2,[r]\backslash j}(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k) ]_{((2,1))}\vec{\d\mat{\alpha}_j} \\
&\qquad\Rightarrow \H l(\mat{\alpha}_j) = -c_1^2 \Big[ \left(\Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big)\ttt_{[r]\backslash j}\ten{H}_{1,1}\right)\ttt_{[r]\backslash j + 2}^{[r]\backslash j}\Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \Big]_{((2,1))}
\qquad{\color{gray} (p_j q_j \times p_j q_j)}
\\
&\d^2 l(\mat{\alpha}_j, \mat{\alpha}_l) \\
&\overset{\makebox[0pt]{\scriptsize $j < l$}}{=} -c_1^2 \t{\vec\Bigl(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\Bigr)}\mat{H}_{1,1}\vec\Bigl(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\times_l\d\mat{\alpha}_l\Bigr) \\
&\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \vec\Bigl(\ten{F}_y\mlm{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\times_l\d\mat{\alpha}_l\Bigr) \\
&= -c_1^2 \t{\vec\biggl( \d\mat{\alpha}_j \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big)_{(j)} \biggr)} \mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(l)}} \vec\biggl( \d\mat{\alpha}_l \Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big)_{(l)} \biggr) \\
&\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \t{\mat{K}_{\mat{p},((j,l))}} \vec\biggl( (\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j) \Big( \ten{F}_y\mlm{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k \Big)_{((j,l))} \biggr) \\
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big)_{(j)}\otimes\mat{I}_{p_j} \biggr) \mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(l)}} \biggl( \t{\Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big)_{(l)}}\otimes\mat{I}_{p_l} \biggr)\vec{\d\mat{\alpha}_l} \\
&\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \t{\mat{K}_{\mat{p},((j,l))}} \biggl( \t{\Big( \ten{F}_y\mlm{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k \Big)_{((j,l))}}\otimes\mat{I}_{p_j p_l} \biggr) \vec{(\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j)} \\
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big[ \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \vec{\d\mat{\alpha}_l} \\
&\qquad + c_1 \vec\biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm{k\neq j,l}\mat{\alpha}_k \Big) \biggr) \vec{(\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j)} \\
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big[ \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \vec{\d\mat{\alpha}_l} \\
&\qquad + c_1 \t{(\vec{\d\mat{\alpha}_j})} \biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm{k\neq j,l}\mat{\alpha}_k \Big) \biggr)_{((1,3))} \vec{\d\mat{\alpha}_l} \\
&\overset{\makebox[0pt]{\scriptsize $j < l$}}{=} -c_1^2 \t{\vec\Bigl(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\Bigr)}\mat{H}_{1,1}\vec\Bigl(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\times_l\d\mat{\alpha}_l\Bigr) \\
&\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \vec\Bigl(\ten{F}_y\mlm_{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\times_l\d\mat{\alpha}_l\Bigr) \\
&= -c_1^2 \t{\vec\biggl( \d\mat{\alpha}_j \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big)_{(j)} \biggr)} \mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(l)}} \vec\biggl( \d\mat{\alpha}_l \Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big)_{(l)} \biggr) \\
&\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \t{\mat{K}_{\mat{p},((j,l))}} \vec\biggl( (\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j) \Big( \ten{F}_y\mlm_{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k \Big)_{((j,l))} \biggr) \\
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big)_{(j)}\otimes\mat{I}_{p_j} \biggr) \mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(l)}} \biggl( \t{\Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big)_{(l)}}\otimes\mat{I}_{p_l} \biggr)\vec{\d\mat{\alpha}_l} \\
&\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \t{\mat{K}_{\mat{p},((j,l))}} \biggl( \t{\Big( \ten{F}_y\mlm_{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k \Big)_{((j,l))}}\otimes\mat{I}_{p_j p_l} \biggr) \vec{(\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j)} \\
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big[ \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \vec{\d\mat{\alpha}_l} \\
&\qquad + c_1 \vec\biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm_{k\neq j,l}\mat{\alpha}_k \Big) \biggr) \vec{(\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j)} \\
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big[ \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \vec{\d\mat{\alpha}_l} \\
&\qquad + c_1 \t{(\vec{\d\mat{\alpha}_j})} \biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm_{k\neq j,l}\mat{\alpha}_k \Big) \biggr)_{((1,3))} \vec{\d\mat{\alpha}_l} \\
&\qquad \begin{aligned}
\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\alpha}_j})\t{\partial(\vec{\mat{\alpha}_l})}} &=
-c_1^2 \biggl( \Big[ \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \\
&\qquad + c_1 \biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm{k\neq j,l}\mat{\alpha}_k \Big) \biggr)_{((1,3) + [[j > l]])}
-c_1^2 \biggl( \Big[ \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \\
&\qquad + c_1 \biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm_{k\neq j,l}\mat{\alpha}_k \Big) \biggr)_{((1,3) + [[j > l]])}
\qquad{\color{gray} (p_j q_j \times p_l q_l)}
\end{aligned}
\\
&\d^2 l(\mat{\alpha}_j, \mat{\Omega}_l) \\
&= -c_1 c_2 \t{\vec\Bigl(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\Bigr)} \mat{H}_{1,2} \vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr) \\
&= -c_1 c_2 \t{\vec\biggl(\d\mat{\alpha}_j\Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big)_{(j)}\biggr)}\mat{K}_{\mat{p},(j)} \t{(\ten{H}_{2,1})_{([2r])}} \vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr) \\
&= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl(\t{\Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big)_{(j)}}\otimes\mat{I}_{p_j}\biggr) \mat{K}_{\mat{p},(j)} \vec\Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm{k\neq l}\t{(\vec{\mat{\Omega}_k})}\times_l\t{(\vec{\d\mat{\Omega}_l})}\Bigr) \\
&= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl(\t{\Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big)_{(j)}}\otimes\mat{I}_{p_j}\biggr) \mat{K}_{\mat{p},(j)} \t{\Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr)_{([r])}}\vec{\d\mat{\Omega}_l} \\
&= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl( \Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{(r + 2, 1)} \vec{\d\mat{\Omega}_l} \\
&\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\alpha}_j})\t{\partial(\vec{\mat{\Omega}_l})}} = -c_1 c_2 \biggl( \Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{(r + 2, 1)}\mat{D}_{p_l}\t{\mat{D}_{p_l}}
&= -c_1 c_2 \t{\vec\Bigl(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\Bigr)} \mat{H}_{1,2} \vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr) \\
&= -c_1 c_2 \t{\vec\biggl(\d\mat{\alpha}_j\Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big)_{(j)}\biggr)}\mat{K}_{\mat{p},(j)} \t{(\ten{H}_{2,1})_{([2r])}} \vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr) \\
&= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl(\t{\Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big)_{(j)}}\otimes\mat{I}_{p_j}\biggr) \mat{K}_{\mat{p},(j)} \vec\Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm_{k\neq l}\t{(\vec{\mat{\Omega}_k})}\times_l\t{(\vec{\d\mat{\Omega}_l})}\Bigr) \\
&= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl(\t{\Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big)_{(j)}}\otimes\mat{I}_{p_j}\biggr) \mat{K}_{\mat{p},(j)} \t{\Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm_{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr)_{([r])}}\vec{\d\mat{\Omega}_l} \\
&= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl( \Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm_{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{(r + 2, 1)} \vec{\d\mat{\Omega}_l} \\
&\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\alpha}_j})\t{\partial(\vec{\mat{\Omega}_l})}} = -c_1 c_2 \biggl( \Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm_{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{(r + 2, 1)}\mat{D}_{p_l}\t{\mat{D}_{p_l}}
% \qquad {\color{gray} (p_j q_j \times p_l^2)}
\\
&\d^2 l(\mat{\Omega}_j) \\
&= -c_2^2 \t{\vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr)} \t{(\ten{H}_{2,2})_{([2r],[2r]+2r)}} \vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr) \\
&= -c_2^2 \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})}\times_{j + r}\t{(\vec{\d\mat{\Omega}_j})} \\
&= -c_2^2 \t{(\vec{\d\mat{\Omega}_j})} \biggl( \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})} \biggr)_{([r])} \vec{\d\mat{\Omega}_j} \\
&\qquad\Rightarrow \H l(\mat{\Omega}_j) = -c_2^2 \mat{D}_{p_j}\t{\mat{D}_{p_j}}\biggl( \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})} \biggr)_{([r])}\mat{D}_{p_j}\t{\mat{D}_{p_j}}
&= -c_2^2 \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm_{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})}\times_{j + r}\t{(\vec{\d\mat{\Omega}_j})} \\
&= -c_2^2 \t{(\vec{\d\mat{\Omega}_j})} \biggl( \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm_{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})} \biggr)_{([r])} \vec{\d\mat{\Omega}_j} \\
&\qquad\Rightarrow \H l(\mat{\Omega}_j) = -c_2^2 \mat{D}_{p_j}\t{\mat{D}_{p_j}}\biggl( \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm_{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})} \biggr)_{([r])}\mat{D}_{p_j}\t{\mat{D}_{p_j}}
%\qquad {\color{gray} (p_j^2 \times p_j^2)}
\\
&\d^2 l(\mat{\Omega}_j, \mat{\Omega}_l) \\
@ -573,13 +589,13 @@ The next step is to identify the Hessians from the second differentials in a sim
&\qquad\qquad - c_2 \D b(\mat{\eta}_{y,2})\vec\!\Big(\bigotimes_{k = r}^{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_{l}\otimes\bigotimes_{k = l - 1}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_{j}\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big) \\
&= c_2 \t{(\vec{\ten{X}}\otimes\vec{\ten{X}} - (\ten{D}_2)_{([2r])})} \vec\Bigl(\bigotimes_{k = r}^{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_{l}\otimes\bigotimes_{k = l - 1}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_{j}\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Bigr) \\
&\qquad - c_2^2 \t{\vec\!\Big(\bigotimes_{k = r}^{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigotimes_{k=l-1}^{1}\mat{\Omega}_k\Big)}\t{(\ten{H}_{2,2})_{([2r],[2r]+2r)}}\vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big) \\
&= c_2 (\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \times_j \t{(\vec{\d\mat{\Omega}_j})} \times_l \t{(\vec{\d\mat{\Omega}_l})} \\
&\qquad - c_2^2 \ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})} \times_j \t{(\vec{\d\mat{\Omega}_j})} \times_l \t{(\vec{\d\mat{\Omega}_l})} \\
&= c_2 \t{(\vec{\d\mat{\Omega}_j})}\Big((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)}\vec{\d\mat{\Omega}_l} \\
&\qquad - c_2^2 \t{(\vec{\d\mat{\Omega}_j})}\Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)}\vec{\d\mat{\Omega}_l} \\
&= c_2 (\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm_{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \times_j \t{(\vec{\d\mat{\Omega}_j})} \times_l \t{(\vec{\d\mat{\Omega}_l})} \\
&\qquad - c_2^2 \ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm_{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm_{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})} \times_j \t{(\vec{\d\mat{\Omega}_j})} \times_l \t{(\vec{\d\mat{\Omega}_l})} \\
&= c_2 \t{(\vec{\d\mat{\Omega}_j})}\Big((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm_{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)}\vec{\d\mat{\Omega}_l} \\
&\qquad - c_2^2 \t{(\vec{\d\mat{\Omega}_j})}\Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm_{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm_{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)}\vec{\d\mat{\Omega}_l} \\
&\qquad \begin{aligned}\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\Omega}_j})\t{\partial(\vec{\mat{\Omega}_l})}} &=
\mat{D}_{p_j}\t{\mat{D}_{p_j}}\Big[c_2\Big((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)} \\
&\qquad -c_2^2 \Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)}\Big]\mat{D}_{p_l}\t{\mat{D}_{p_l}}
\mat{D}_{p_j}\t{\mat{D}_{p_j}}\Big[c_2\Big((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm_{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)} \\
&\qquad -c_2^2 \Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm_{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm_{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)}\Big]\mat{D}_{p_l}\t{\mat{D}_{p_l}}
% \qquad {\color{gray} (p_j^2 \times p_l^2)}
\end{aligned}
\end{align*}}%
@ -612,15 +628,15 @@ and for every block holds $\mathcal{I}_{j, l} = \t{\mathcal{I}_{l, j}}$. The ind
\begin{align*}
\mathcal{I}_{1,1} &= c_1^2 (\ten{H}_{1,1})_{([r])} \\
\mathcal{I}_{1,j+1} % = \E\partial_{\vec{\overline{\ten{\eta}}_1}}\partial_{\t{(\vec{\mat{\alpha}_j})}} l(\mat{\Theta})\mid \ten{Y} = y
&= c_1^2 \Big[\Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1}\Big]_{((2, 1))} \\
&= c_1^2 \Big[\Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1}\Big]_{((2, 1))} \\
\mathcal{I}_{1,j+r+1}
&= c_1 c_2 \Big( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)} \\
&= c_1 c_2 \Big( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)} \\
\mathcal{I}_{j+1,l+1}
&= c_1^2 \biggl( \Big[ \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \\
&= c_1^2 \biggl( \Big[ \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \\
\mathcal{I}_{j+1,l+r+1}
&= c_1 c_2 \biggl( \Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{((r + 2, 1))} \\
&= c_1 c_2 \biggl( \Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm_{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{((r + 2, 1))} \\
\mathcal{I}_{j+r+1,l+r+1}
&= c_2^2 \Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)}
&= c_2^2 \Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm_{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm_{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)}
\end{align*}
@ -633,14 +649,14 @@ The \emph{matricization} is a generalization of the \emph{vectorization} operati
\begin{theorem}\label{thm:mlm_mat}
Let $\ten{A}$ be a tensor of order $r$ with the dimensions $q_1\times ... \times q_r$. Furthermore, let for $k = 1, ..., r$ be $\mat{B}_k$ matrices of dimensions $p_k\times q_k$. Then, for any $(\mat{i}, \mat{j})\in\perm{r}$ holds
\begin{displaymath}
\Big(\ten{A}\mlm{k\in[r]}\mat{B}_k\Big)_{(\mat{i}, \mat{j})}
\Big(\ten{A}\mlm_{k\in[r]}\mat{B}_k\Big)_{(\mat{i}, \mat{j})}
= \Big(\bigotimes_{k = \len{\mat{i}}}^{1}\mat{B}_{\mat{i}_k}\Big) \ten{A}_{(\mat{i}, \mat{j})} \Big(\bigotimes_{k = \len{\mat{j}}}^{1}\t{\mat{B}_{\mat{j}_k}}\Big).
\end{displaymath}
\end{theorem}
A well known special case of Theorem~\ref{thm:mlm_mat} is the relation between vectorization and the Kronecker product
\begin{displaymath}
\vec(\mat{B}_1\mat{A}\t{\mat{B}_2}) = (\mat{B}_2\otimes\mat{B}_1)\vec{A}.
\vec(\mat{B}_1\mat{A}\t{\mat{B}_2}) = (\mat{B}_2\otimes\mat{B}_1)\vec{\mat{A}}.
\end{displaymath}
Here we have a matrix, a.k.a. an order 2 tensor, and the vectorization as a special case of the matricization $\vec{\mat{A}} = \mat{A}_{((1, 2))}$ with $(\mat{i}, \mat{j}) = ((1, 2), ())\in\perm{2}$. Note that the empty Kronecker product is $1$ by convention.
@ -713,13 +729,37 @@ The operation $\ten{R}_{\mat{i}}(\ten{A})$ results in a tensor of order $r + s$
Let $\ten{A}$ be a $2 r + s$ tensor where $r > 0$ and $s \geq 0$ of dimensions $q_1\times ... \times q_{2 r + s}$. Furthermore, let $(\mat{i}, \mat{j})\in\perm{2 r + s}$ such that $\len{\mat{i}} = 2 r$ and for $k = 1, ..., r$ denote with $\mat{B}_k$ matrices of dimensions $q_{\mat{i}_{k}}\times q_{\mat{i}_{r + k}}$, then
\begin{displaymath}
\t{\ten{A}_{(\mat{i})}}\vec{\bigotimes_{k = r}^{1}}\mat{B}_k
\equiv \ten{R}_{\mat{i}}(\ten{A})\times_{k\in[r]}\t{(\vec{\mat{B}_k})}.
\equiv \ten{R}_{\mat{i}}(\ten{A})\mlm_{k = 1}^r\t{(\vec{\mat{B}_k})}.
\end{displaymath}
\end{theorem}
A special case of above Theorem is given for tensors represented as a Kronecker product. Therefore, let $\mat{A}_k, \mat{B}_k$ be arbitrary matrices of size $p_k\times q_k$ for $k = 1, ..., r$ and $\ten{A} = \reshape{(\mat{p}, \mat{q})}\bigotimes_{k = r}^{1}\mat{A}_k$. Then Theorem~\ref{thm:mtvk_rearrange} specializes to
\begin{displaymath}
\t{\Big( \vec\bigotimes_{k = r}^{1}\mat{A}_k \Big)}\Big( \vec\bigotimes_{k = r}^{1}\mat{B}_k \Big)
=
\prod_{k = 1}^{r}\tr(\t{\mat{A}_k}\mat{B}_k)
=
\Big( \outer{k = 1}{r}\vec\mat{A}_k \Big)\mlm_{k = 1}^r \t{(\vec\mat{B}_k)}.
\end{displaymath}
In case of $r = 2$ this means
\begin{align*}
\t{\vec(\mat{A}_1\otimes \mat{A}_2)}\vec(\mat{B}_1\otimes \mat{B}_2)
&= \t{(\vec{\mat{B}_1})}(\vec{\mat{A}_1})\t{(\vec{\mat{A}_2})}(\vec{\mat{B}_2}) \\
&= [(\vec{\mat{A}_1})\circ(\vec{\mat{A}_2})]\times_1\t{(\vec{\mat{B}_1})}\times_2\t{(\vec{\mat{B}_2})}.
\end{align*}
Another interesting special case is for two tensors $\ten{A}_1, \ten{A}_2$ of the same order
\begin{displaymath}
\t{(\vec{\ten{A}_1}\otimes\vec{\ten{A}_2})}\vec{\bigotimes_{k = r}^{1}\mat{B}_k}
= (\ten{A}_1\otimes\ten{A}_2)\mlm_{k = 1}^r\t{(\vec{\mat{B}_k})}
\end{displaymath}
which uses the relation $\ten{R}_{[2r]}^{(\mat{p}, \mat{q})}(\vec{\ten{A}_1}\otimes\vec{\ten{A}_2}) = \ten{A}_1\otimes\ten{A}_2$ .
\todo{continue}
% Next we define a specific axis permutation and reshaping operation on tensors which will be helpful in expressing some derivatives. Let $\ten{A}$ be a $2 r + s$ tensor with $r > 0$ and $s\geq 0$ of dimensions $p_1\times ... \times p_{2 r + s}$. Furthermore, let $(\mat{i}, \mat{j})\in\perm{2 r + s}$ such that $\len{\mat{i}} = 2 r$. The operation $\ten{R}_{\mat{i}}$ is defined as
% \begin{displaymath}
% \ten{R}_{\mat{i}} = \reshape{(p_1 p_{r + 1}, ..., p_r p_{2 r}, p_{2 r + 1}, ..., p_{r + s})}(\ten{A}_{(\pi(\mat{i}))})
@ -880,19 +920,19 @@ The operation $\ten{R}_{\mat{i}}(\ten{A})$ results in a tensor of order $r + s$
\end{tikzpicture}
\end{center}
\newcommand{\somedrawing} {
\coordinate (a) at (-2,-2,-2);
\coordinate (b) at (-2,-2,2);
\coordinate (c) at (-2,2,-2);
\coordinate (d) at (-2,2,2);
\coordinate (e) at (2,-2,-2);
\coordinate (f) at (2,-2,2);
\coordinate (g) at (2,2,-2);
\coordinate (h) at (2,2,2);
\draw (a)--(b) (a)--(c) (a)--(e) (b)--(d) (b)--(f) (c)--(d) (c)--(g) (d)--(h) (e)--(f) (e)--(g) (f)--(h) (g)--(h);
\fill (a) circle (0.1cm);
\fill (d) ++(0.1cm,0.1cm) rectangle ++(-0.2cm,-0.2cm);
}
% \newcommand{\somedrawing}{
% \coordinate (a) at (-2,-2,-2);
% \coordinate (b) at (-2,-2,2);
% \coordinate (c) at (-2,2,-2);
% \coordinate (d) at (-2,2,2);
% \coordinate (e) at (2,-2,-2);
% \coordinate (f) at (2,-2,2);
% \coordinate (g) at (2,2,-2);
% \coordinate (h) at (2,2,2);
% \draw (a)--(b) (a)--(c) (a)--(e) (b)--(d) (b)--(f) (c)--(d) (c)--(g) (d)--(h) (e)--(f) (e)--(g) (f)--(h) (g)--(h);
% \fill (a) circle (0.1cm);
% \fill (d) ++(0.1cm,0.1cm) rectangle ++(-0.2cm,-0.2cm);
% }
% \begin{figure}[ht!]
% \centering
@ -1153,7 +1193,7 @@ where $\circ$ is the outer product. For example considure two matrices $\mat{A},
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Let $f$ be an $r$ times differentiable function, then
\begin{displaymath}
\d^r f(\mat{X}) = \ten{F}(\mat{X})\mlm{k = 1}{r} \vec{\d\mat{X}}
\d^r f(\mat{X}) = \ten{F}(\mat{X})\mlm_{k = 1}^{r} \vec{\d\mat{X}}
\qquad\Leftrightarrow\qquad
\D^r f(\mat{X}) \equiv \frac{1}{r!}\sum_{\sigma\in\perm{r}}\ten{F}(\mat{X})_{(\sigma)}
\end{displaymath}
@ -1372,12 +1412,12 @@ The differentials up to the 4'th are
\begin{align*}
\d M(t) &= M(t) \t{(\mu + \Sigma t)} \d{t} \\
\d^2 M(t) &= \t{\d{t}} M(t) (\mu + \Sigma t)\t{(\mu + \Sigma t)} \d{t} \\
\d^3 M(t) &= M(t) (\mu + \Sigma t)\circ [(\mu + \Sigma t)\circ (\mu + \Sigma t) + 3\Sigma]\mlm{k = 1}{3} \d{t} \\
\d^4 M(t) &= M(t) (\mu + \Sigma t)\circ(\mu + \Sigma t)\circ[(\mu + \Sigma t)\circ(\mu + \Sigma t) + 6\Sigma)]\mlm{k = 1}{4} \d{t}
\d^3 M(t) &= M(t) (\mu + \Sigma t)\circ [(\mu + \Sigma t)\circ (\mu + \Sigma t) + 3\Sigma]\mlm_{k = 1}^{3} \d{t} \\
\d^4 M(t) &= M(t) (\mu + \Sigma t)\circ(\mu + \Sigma t)\circ[(\mu + \Sigma t)\circ(\mu + \Sigma t) + 6\Sigma)]\mlm_{k = 1}^{4} \d{t}
\end{align*}
Using the differentials to derivative identification identity
\begin{displaymath}
\d^m f(t) = \ten{F}(t)\mlm{k = 1}{m}\d{t}
\d^m f(t) = \ten{F}(t)\mlm_{k = 1}^{m}\d{t}
\qquad\Leftrightarrow\qquad
\D^m f(t) \equiv \frac{1}{m!}\sum_{\sigma\in\mathfrak{S}_m}\ten{F}(t)_{(\sigma)}
\end{displaymath}
@ -1388,7 +1428,7 @@ in conjunction with simplifications gives the first four raw moments by evaluati
M_3 = \D^3 M(t)|_{t = 0} &= \mu\circ\mu\circ\mu + \mu\circ\Sigma + (\mu\circ\Sigma)_{((2), (1), (3))} + \Sigma\circ\mu \\
M_4 = \D^4 M(t)|_{t = 0} &\equiv \frac{1}{4!}\sum_{\sigma\in\mathfrak{S}_4} (\mu\circ\mu\circ\Sigma + \Sigma\circ\Sigma + \Sigma\circ\mu\circ\mu)_{(\sigma)}
\end{align*}
which leads to the centered moments (which are also the covariances of the sufficient statistic $t(X)$)
which leads to the centered moments (which are also the covariance of the sufficient statistic $t(X)$)
\begin{align*}
H_{1,1} &= \cov(t_1(X)\mid Y = y) \\
&= \Sigma \\

View File

@ -9,6 +9,17 @@
publisher = {[Royal Statistical Society, Wiley]}
}
@inproceedings{Nesterov1983,
title = {A method of solving a convex programming problem with convergence rate $O(1/k^2)$},
author = {Nesterov, Yurii Evgen'evich},
booktitle = {Doklady Akademii Nauk},
volume = {269},
number = {3},
pages = {543--547},
year = {1983},
organization= {Russian Academy of Sciences}
}
@book{StatInf-CasellaBerger2002,
title = {{Statistical Inference}},
author = {Casella, George and Berger, Roger L.},
@ -27,6 +38,20 @@
isbn = {0-471-98632-1}
}
@article{SymMatandJacobians-MagnusNeudecker1986,
title = {Symmetry, 0-1 Matrices and Jacobians: A Review},
author = {Magnus, Jan R. and Neudecker, Heinz},
ISSN = {02664666, 14694360},
URL = {http://www.jstor.org/stable/3532421},
journal = {Econometric Theory},
number = {2},
pages = {157--190},
publisher = {Cambridge University Press},
urldate = {2023-10-03},
volume = {2},
year = {1986}
}
@book{MatrixAlgebra-AbadirMagnus2005,
title = {Matrix Algebra},
author = {Abadir, Karim M. and Magnus, Jan R.},
@ -83,6 +108,31 @@
doi = {10.1080/01621459.2015.1093944}
}
@article{FisherLectures-Cook2007,
author = {Cook, R. Dennis},
journal = {Statistical Science},
month = {02},
number = {1},
pages = {1--26},
publisher = {The Institute of Mathematical Statistics},
title = {{Fisher Lecture: Dimension Reduction in Regression}},
volume = {22},
year = {2007},
doi = {10.1214/088342306000000682}
}
@article{asymptoticMLE-BuraEtAl2018,
author = {Bura, Efstathia and Duarte, Sabrina and Forzani, Liliana and E. Smucler and M. Sued},
title = {Asymptotic theory for maximum likelihood estimates in reduced-rank multivariate generalized linear models},
journal = {Statistics},
volume = {52},
number = {5},
pages = {1005-1024},
year = {2018},
publisher = {Taylor \& Francis},
doi = {10.1080/02331888.2018.1467420},
}
@article{tsir-DingCook2015,
author = {Shanshan Ding and R. Dennis Cook},
title = {Tensor sliced inverse regression},
@ -117,3 +167,106 @@
isbn = {978-94-015-8196-7},
doi = {10.1007/978-94-015-8196-7_17}
}
@book{asymStats-van_der_Vaart1998,
title = {Asymptotic Statistics},
author = {{van der Vaart}, A.W.},
series = {Asymptotic Statistics},
year = {1998},
publisher = {Cambridge University Press},
series = {Cambridge Series in Statistical and Probabilistic Mathematics},
isbn = {0-521-49603-9}
}
@book{measureTheory-Kusolitsch2011,
title = {{M}a\ss{}- und {W}ahrscheinlichkeitstheorie},
subtitle = {{E}ine {E}inf{\"u}hrung},
author = {Kusolitsch, Norbert},
series = {Springer-Lehrbuch},
year = {2011},
publisher = {Springer Vienna},
isbn = {978-3-7091-0684-6},
doi = {10.1007/978-3-7091-0685-3}
}
@book{optimMatrixMani-AbsilEtAl2007,
title = {{Optimization Algorithms on Matrix Manifolds}},
author = {Absil, P.-A. and Mahony, R. and Sepulchre, R.},
year = {2007},
publisher = {Princeton University Press},
isbn = {9780691132983},
note = {Full Online Text \url{https://press.princeton.edu/absil}}
}
@Inbook{geomMethodsOnLowRankMat-Uschmajew2020,
author = {Uschmajew, Andr{\'e} and Vandereycken, Bart},
editor = {Grohs, Philipp and Holler, Martin and Weinmann, Andreas},
title = {Geometric Methods on Low-Rank Matrix and Tensor Manifolds},
bookTitle = {Handbook of Variational Methods for Nonlinear Geometric Data},
year = {2020},
publisher = {Springer International Publishing},
address = {Cham},
pages = {261--313},
isbn = {978-3-030-31351-7},
doi = {10.1007/978-3-030-31351-7_9}
}
@book{introToSmoothMani-Lee2012,
title = {Introduction to Smooth Manifolds},
author = {Lee, John M.},
year = {2012},
journal = {Graduate Texts in Mathematics},
publisher = {Springer New York},
doi = {10.1007/978-1-4419-9982-5}
}
@book{introToRiemannianMani-Lee2018,
title = {Introduction to Riemannian Manifolds},
author = {Lee, John M.},
year = {2018},
journal = {Graduate Texts in Mathematics},
publisher = {Springer International Publishing},
doi = {10.1007/978-3-319-91755-9}
}
@misc{MLEonManifolds-HajriEtAl2017,
title = {Maximum Likelihood Estimators on Manifolds},
author = {Hajri, Hatem and Said, Salem and Berthoumieu, Yannick},
year = {2017},
journal = {Lecture Notes in Computer Science},
publisher = {Springer International Publishing},
pages = {692-700},
doi = {10.1007/978-3-319-68445-1_80}
}
@article{relativity-Einstain1916,
author = {Einstein, Albert},
title = {Die Grundlage der allgemeinen Relativitätstheorie},
year = {1916},
journal = {Annalen der Physik},
volume = {354},
number = {7},
pages = {769-822},
doi = {10.1002/andp.19163540702}
}
@article{MultilinearOperators-Kolda2006,
title = {Multilinear operators for higher-order decompositions.},
author = {Kolda, Tamara Gibson},
doi = {10.2172/923081},
url = {https://www.osti.gov/biblio/923081},
place = {United States},
year = {2006},
month = {4},
type = {Technical Report}
}
@book{aufbauAnalysis-kaltenbaeck2021,
title = {Aufbau Analysis},
author = {Kaltenb\"ack, Michael},
isbn = {978-3-88538-127-3},
series = {Berliner Studienreihe zur Mathematik},
edition = {27},
year = {2021},
publisher = {Heldermann Verlag}
}

View File

@ -23,7 +23,7 @@
*
* with the parameter vector `theta` and a statistic `T` of `y`. The real valued
* parameter vector `theta` is of dimension `p (p + 1) / 2` and the statistic
* `T` has the same dimensions as a binary vector given by
* `T` has the same dimensions as the parameter vector given by
*
* T(y) = vech(y y').
*

View File

@ -1,204 +0,0 @@
library(tensorPredictors)
library(mvbernoulli)
set.seed(161803399, "Mersenne-Twister", "Inversion", "Rejection")
### simulation configuration
file.prefix <- "sim-ising"
reps <- 100 # number of simulation replications
max.iter <- 100 # maximum number of iterations for GMLM
sample.sizes <- c(100, 200, 300, 500, 750) # sample sizes `n`
N <- 2000 # validation set size
p <- c(4, 4) # preditor dimensions (ONLY 4 by 4 allowed!)
q <- c(2, 2) # response dimensions (ONLY 2 by 2 allowed!)
r <- length(p)
# parameter configuration
rho <- -0.55
c1 <- 1
c2 <- 1
# initial consistency checks
stopifnot(exprs = {
r == 2
all.equal(p, c(4, 4))
all.equal(q, c(2, 2))
})
### small helpers
# 270 deg matrix layout rotation (90 deg clockwise)
rot270 <- function(A) t(A)[, rev(seq_len(nrow(A))), drop = FALSE]
# Auto-Regression Covariance Matrix
AR <- function(rho, dim) rho^abs(outer(seq_len(dim), seq_len(dim), `-`))
# Inverse of the AR matrix
AR.inv <- function(rho, dim) {
A <- diag(c(1, rep(rho^2 + 1, dim - 2), 1))
A[abs(.row(dim(A)) - .col(dim(A))) == 1] <- -rho
A / (1 - rho^2)
}
# projection matrix `P_A` as a projection onto the span of `A`
proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A)))
### setup Ising parameters (to get reasonable parameters)
eta1 <- 0
alphas <- Map(function(pj, qj) { # qj ignored, its 2
linspace <- seq(-1, 1, length.out = pj)
matrix(c(linspace, linspace^2), pj, 2)
}, p, q)
Omegas <- Map(AR, dim = p, MoreArgs = list(rho))
# data sampling routine
sample.data <- function(n, eta1, alphas, Omegas, sample.axis = r + 1L) {
# generate response (sample axis is last axis)
y <- runif(n, -1, 1) # Y ~ U[-1, 1]
Fy <- rbind(cos(pi * y), sin(pi * y), -sin(pi * y), cos(pi * y))
dim(Fy) <- c(2, 2, n)
# natural exponential family parameters
eta_y1 <- c1 * (mlm(Fy, alphas) + c(eta1))
eta_y2 <- c2 * Reduce(`%x%`, rev(Omegas))
# conditional Ising model parameters
theta_y <- matrix(rep(vech(eta_y2), n), ncol = n)
ltri <- which(lower.tri(eta_y2, diag = TRUE))
diagonal <- which(diag(TRUE, nrow(eta_y2))[ltri])
theta_y[diagonal, ] <- eta_y1
# Sample X from conditional distribution
X <- apply(theta_y, 2, ising_sample, n = 1)
# convert (from compressed integer vector) to array data
attr(X, "p") <- prod(p)
X <- t(as.mvbmatrix(X))
dim(X) <- c(p, n)
storage.mode(X) <- "double"
# permute axis to requested get the sample axis
if (sample.axis != r + 1L) {
perm <- integer(r + 1L)
perm[sample.axis] <- r + 1L
perm[-sample.axis] <- seq_len(r)
X <- aperm(X, perm)
Fy <- aperm(Fy, perm)
}
list(X = X, Fy = Fy, y = y, sample.axis = sample.axis)
}
### Logging Errors and Warnings
# Register a global warning and error handler for logging warnings/errors with
# current simulation repetition session informatin allowing to reproduce problems
exceptionLogger <- function(ex) {
# retrieve current simulation repetition information
rep.info <- get("rep.info", envir = .GlobalEnv)
# setup an error log file with the same name as `file`
log <- paste0(rep.info$file, ".log")
# Write (append) condition message with reproduction info to the log
cat("\n\n------------------------------------------------------------\n",
sprintf("file <- \"%s\"\nn <- %d\nrep <- %d\n.Random.seed <- c(%s)\n%s\nTraceback:\n",
rep.info$file, rep.info$n, rep.info$rep,
paste(rep.info$.Random.seed, collapse = ","),
as.character.error(ex)
), sep = "", file = log, append = TRUE)
# add Traceback (see: `traceback()` which the following is addapted from)
n <- length(x <- .traceback(NULL, max.lines = -1L))
if (n == 0L) {
cat("No traceback available", "\n", file = log, append = TRUE)
} else {
for (i in 1L:n) {
xi <- x[[i]]
label <- paste0(n - i + 1L, ": ")
m <- length(xi)
srcloc <- if (!is.null(srcref <- attr(xi, "srcref"))) {
srcfile <- attr(srcref, "srcfile")
paste0(" at ", basename(srcfile$filename), "#", srcref[1L])
}
if (isTRUE(attr(xi, "truncated"))) {
xi <- c(xi, " ...")
m <- length(xi)
}
if (!is.null(srcloc)) {
xi[m] <- paste0(xi[m], srcloc)
}
if (m > 1) {
label <- c(label, rep(substr(" ", 1L,
nchar(label, type = "w")), m - 1L))
}
cat(paste0(label, xi), sep = "\n", file = log, append = TRUE)
}
}
}
globalCallingHandlers(list(
message = exceptionLogger, warning = exceptionLogger, error = exceptionLogger
))
### for every sample size
start <- format(Sys.time(), "%Y%m%dT%H%M")
for (n in sample.sizes) {
### write new simulation result file
file <- paste0(paste(file.prefix, start, n, sep = "-"), ".csv")
# CSV header, used to ensure correct value/column mapping when writing to file
header <- outer(
c("dist.subspace", "dist.projection", "error.pred"), # measures
c("gmlm", "pca", "hopca", "tsir"), # methods
paste, sep = ".")
cat(paste0(header, collapse = ","), "\n", sep = "", file = file)
### repeated simulation
for (rep in seq_len(reps)) {
### Repetition session state info
# Stores specific session variables before starting the current
# simulation replication. This allows to log state information which
# can be used to replicate a specific simulation repetition in case of
# errors/warnings from the logs
rep.info <- list(n = n, rep = rep, file = file, .Random.seed = .Random.seed)
### sample (training) data
c(X, Fy, y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas)
### Fit data using different methods
fit.gmlm <- GMLM.default(X, Fy, sample.axis = sample.axis,
max.iter = max.iter, family = "ising")
fit.hopca <- HOPCA(X, npc = q, sample.axis = sample.axis)
fit.pca <- prcomp(mat(X, sample.axis), rank. = prod(q))
fit.tsir <- NA # TSIR(X, y, q, sample.axis = sample.axis)
### Compute Reductions `B.*` where `B.*` spans the reduction subspaces
B.true <- Reduce(`%x%`, rev(alphas))
B.gmlm <- with(fit.gmlm, Reduce(`%x%`, rev(alphas)))
B.hopca <- Reduce(`%x%`, rev(fit.hopca))
B.pca <- fit.pca$rotation
B.tsir <- NA # Reduce(`%x%`, rev(fit.tsir))
# Subspace Distances: Normalized `|| P_A - P_B ||_F` where
# `P_A = A (A' A)^-1/2 A'` and the normalization means that with
# respect to the dimensions of `A, B` the subspace distance is in the
# range `[0, 1]`.
dist.subspace.gmlm <- dist.subspace(B.true, B.gmlm, normalize = TRUE)
dist.subspace.hopca <- dist.subspace(B.true, B.hopca, normalize = TRUE)
dist.subspace.pca <- dist.subspace(B.true, B.pca, normalize = TRUE)
dist.subspace.tsir <- NA # dist.subspace(B.true, B.tsir, normalize = TRUE)
# Projection Distances: Spectral norm (2-norm) `|| P_A - P_B ||_2`.
dist.projection.gmlm <- dist.projection(B.true, B.gmlm)
dist.projection.hopca <- dist.projection(B.true, B.hopca)
dist.projection.pca <- dist.projection(B.true, B.pca)
dist.projection.tsir <- NA # dist.projection(B.true, B.tsir)
### Prediction Errors: (using new independend sample of size `N`)
c(X, Fy, y, sample.axis) %<-% sample.data(N, eta1, alphas, Omegas)
# centered model matrix of vectorized `X`s
vecX <- scale(mat(X, sample.axis), center = TRUE, scale = FALSE)
P.true <- proj(B.true)
error.pred.gmlm <- norm(P.true - proj(B.gmlm), "2")
error.pred.hopca <- norm(P.true - proj(B.hopca), "2")
error.pred.pca <- norm(P.true - proj(B.pca), "2")
error.pred.tsir <- NA # norm(P.true - proj(B.tsir), "2")
# format estimation/prediction errors and write to file and console
line <- paste0(Map(get, header), collapse = ",")
cat(line, "\n", sep = "", file = file, append = TRUE)
# report progress
cat(sprintf("sample size: %d/%d - rep: %d/%d\n",
which(n == sample.sizes), length(sample.sizes), rep, reps))
}
}

View File

@ -1,134 +0,0 @@
library(tensorPredictors)
library(mvbernoulli)
set.seed(141421356, "Mersenne-Twister", "Inversion", "Rejection")
### simulation configuration
reps <- 100 # number of simulation replications
max.iter <- 1000 # maximum number of iterations for GMLM
n <- 100 # sample sizes `n`
N <- 2000 # validation set size
p <- c(4, 4) # preditor dimensions (ONLY 4 by 4 allowed!)
q <- c(2, 2) # response dimensions (ONLY 2 by 2 allowed!)
r <- length(p)
# parameter configuration
rho <- -0.55
c1 <- 1
c2 <- 1
# initial consistency checks
stopifnot(exprs = {
r == 2
all.equal(p, c(4, 4))
all.equal(q, c(2, 2))
})
### small helpers
# 270 deg matrix layout rotation (90 deg clockwise)
rot270 <- function(A) t(A)[, rev(seq_len(nrow(A))), drop = FALSE]
# Auto-Regression Covariance Matrix
AR <- function(rho, dim) rho^abs(outer(seq_len(dim), seq_len(dim), `-`))
# Inverse of the AR matrix
AR.inv <- function(rho, dim) {
A <- diag(c(1, rep(rho^2 + 1, dim - 2), 1))
A[abs(.row(dim(A)) - .col(dim(A))) == 1] <- -rho
A / (1 - rho^2)
}
# projection matrix `P_A` as a projection onto the span of `A`
proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A)))
### setup Ising parameters (to get reasonable parameters)
eta1 <- 0
# alphas <- Map(function(pj, qj) { # qj ignored, its 2
# linspace <- seq(-1, 1, length.out = pj)
# matrix(c(linspace, rev(linspace)), pj, 2)
# }, p, q)
alphas <- Map(function(pj, qj) { # qj ignored, its 2
linspace <- seq(-1, 1, length.out = pj)
matrix(c(linspace, linspace^2), pj, 2)
}, p, q)
# alphas <- Map(function(pj, qj) {
# qr.Q(qr(matrix(rnorm(pj * qj), pj, qj)))
# }, p, q)
Omegas <- Map(AR, dim = p, MoreArgs = list(rho))
# data sampling routine
sample.data <- function(n, eta1, alphas, Omegas, sample.axis = r + 1L) {
# generate response (sample axis is last axis)
y <- runif(n, -1, 1) # Y ~ U[-1, 1]
Fy <- rbind(cos(pi * y), sin(pi * y), -sin(pi * y), cos(pi * y))
dim(Fy) <- c(2, 2, n)
# natural exponential family parameters
eta_y1 <- c1 * (mlm(Fy, alphas) + c(eta1))
eta_y2 <- c2 * Reduce(`%x%`, rev(Omegas))
# conditional Ising model parameters
theta_y <- matrix(rep(vech(eta_y2), n), ncol = n)
ltri <- which(lower.tri(eta_y2, diag = TRUE))
diagonal <- which(diag(TRUE, nrow(eta_y2))[ltri])
theta_y[diagonal, ] <- eta_y1
# Sample X from conditional distribution
X <- apply(theta_y, 2, ising_sample, n = 1)
# convert (from compressed integer vector) to array data
attr(X, "p") <- prod(p)
X <- t(as.mvbmatrix(X))
dim(X) <- c(p, n)
storage.mode(X) <- "double"
# permute axis to requested get the sample axis
if (sample.axis != r + 1L) {
perm <- integer(r + 1L)
perm[sample.axis] <- r + 1L
perm[-sample.axis] <- seq_len(r)
X <- aperm(X, perm)
Fy <- aperm(Fy, perm)
}
list(X = X, Fy = Fy, y = y, sample.axis = sample.axis)
}
### sample (training) data
c(X, Fy, y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas)
### Fit data using GMLM with logging
# logger to log iterative change in the estimation process of GMLM
# log <- data.frame()
log.likelihood <- tensorPredictors:::make.gmlm.family("ising")$log.likelihood
B.true <- Reduce(`%x%`, rev(alphas))
logger <- function(iter, eta1.est, alphas.est, Omegas.est) {
B.est <- Reduce(`%x%`, rev(alphas.est))
err.alphas <- mapply(dist.subspace, alphas, alphas.est, MoreArgs = list(normalize = TRUE))
err.Omegas <- mapply(norm, Map(`-`, Omegas, Omegas.est), MoreArgs = list(type = "F"))
if (iter > 0) { cat("\033[9A") }
cat(sprintf("\n\033[2mIter: loss - dist\n\033[0m%4d: %8.3f - %8.3f",
iter,
log.likelihood(X, Fy, eta1.est, alphas.est, Omegas.est),
dist.subspace(B.true, B.est, normalize = TRUE)
),
"\033[2mMSE eta1\033[0m",
mean((eta1 - eta1.est)^2),
"\033[2msubspace distances of alphas\033[0m",
do.call(paste, Map(sprintf, err.alphas, MoreArgs = list(fmt = "%8.3f"))),
"\033[2mFrob. norm of Omega differences\033[0m",
do.call(paste, Map(sprintf, err.Omegas, MoreArgs = list(fmt = "%8.3f"))),
sep = "\n "
)
}
# now call the GMLM fitting routine with performance profiling
tryCatch({
system.time( # profvis::profvis(
fit.gmlm <- GMLM.default(
X, Fy, sample.axis = sample.axis, max.iter = max.iter,
family = "ising", logger = logger
)
)
}, error = function(ex) {
print(ex)
traceback()
})

View File

@ -1,207 +0,0 @@
library(tensorPredictors)
library(mvbernoulli)
# seed = first 8 digits Euler's constant gamma = 0.57721 56649 01532 86060
set.seed(57721566, "Mersenne-Twister", "Inversion", "Rejection")
### simulation configuration
file.prefix <- "sim-ising-small"
reps <- 100 # number of simulation replications
max.iter <- 1000 # maximum number of iterations for GMLM
sample.sizes <- c(100, 200, 300, 500, 750) # sample sizes `n`
N <- 2000 # validation set size
p <- c(2, 3) # preditor dimensions
q <- c(1, 1) # response dimensions
r <- length(p)
# parameter configuration
rho <- -0.55
c1 <- 1
c2 <- 1
# initial consistency checks
stopifnot(exprs = {
r == 2
length(p) == r
all(q == 1)
})
### small helpers
# 270 deg matrix layout rotation (90 deg clockwise)
rot270 <- function(A) t(A)[, rev(seq_len(nrow(A))), drop = FALSE]
# Auto-Regression Covariance Matrix
AR <- function(rho, dim) rho^abs(outer(seq_len(dim), seq_len(dim), `-`))
# Inverse of the AR matrix
AR.inv <- function(rho, dim) {
A <- diag(c(1, rep(rho^2 + 1, dim - 2), 1))
A[abs(.row(dim(A)) - .col(dim(A))) == 1] <- -rho
A / (1 - rho^2)
}
# projection matrix `P_A` as a projection onto the span of `A`
proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A)))
### setup Ising parameters (to get reasonable parameters)
eta1 <- 0
alphas <- Map(function(pj, qj) {
data <- linspace <- seq(-1, 1, len = pj)
for (k in (seq_len(qj - 1) + 1)) {
data <- c(data, linspace^k)
}
matrix(data, nrow = pj)
}, p, q)
Omegas <- Map(AR, dim = p, MoreArgs = list(rho))
# data sampling routine
sample.data <- function(n, eta1, alphas, Omegas, sample.axis = r + 1L) {
# generate response (sample axis is last axis)
y <- runif(n, -1, 1) # Y ~ U[-1, 1]
Fy <- array(sin(pi * y), dim = c(q, n))
# natural exponential family parameters
eta_y1 <- c1 * (mlm(Fy, alphas) + c(eta1))
eta_y2 <- c2 * Reduce(`%x%`, rev(Omegas))
# conditional Ising model parameters
theta_y <- matrix(rep(vech(eta_y2), n), ncol = n)
ltri <- which(lower.tri(eta_y2, diag = TRUE))
diagonal <- which(diag(TRUE, nrow(eta_y2))[ltri])
theta_y[diagonal, ] <- eta_y1
# Sample X from conditional distribution
X <- apply(theta_y, 2, ising_sample, n = 1)
# convert (from compressed integer vector) to array data
attr(X, "p") <- prod(p)
X <- t(as.mvbmatrix(X))
dim(X) <- c(p, n)
storage.mode(X) <- "double"
# permute axis to requested get the sample axis
if (sample.axis != r + 1L) {
perm <- integer(r + 1L)
perm[sample.axis] <- r + 1L
perm[-sample.axis] <- seq_len(r)
X <- aperm(X, perm)
Fy <- aperm(Fy, perm)
}
list(X = X, Fy = Fy, y = y, sample.axis = sample.axis)
}
### Logging Errors and Warnings
# Register a global warning and error handler for logging warnings/errors with
# current simulation repetition session informatin allowing to reproduce problems
exceptionLogger <- function(ex) {
# retrieve current simulation repetition information
rep.info <- get("rep.info", envir = .GlobalEnv)
# setup an error log file with the same name as `file`
log <- paste0(rep.info$file, ".log")
# Write (append) condition message with reproduction info to the log
cat("\n\n------------------------------------------------------------\n",
sprintf("file <- \"%s\"\nn <- %d\nrep <- %d\n.Random.seed <- c(%s)\n%s\nTraceback:\n",
rep.info$file, rep.info$n, rep.info$rep,
paste(rep.info$.Random.seed, collapse = ","),
as.character.error(ex)
), sep = "", file = log, append = TRUE)
# add Traceback (see: `traceback()` which the following is addapted from)
n <- length(x <- .traceback(NULL, max.lines = -1L))
if (n == 0L) {
cat("No traceback available", "\n", file = log, append = TRUE)
} else {
for (i in 1L:n) {
xi <- x[[i]]
label <- paste0(n - i + 1L, ": ")
m <- length(xi)