dev
This commit is contained in:
parent
40132c2565
commit
90cd46e209
218
LaTeX/GMLM.tex
218
LaTeX/GMLM.tex
|
@ -90,10 +90,11 @@
|
|||
%%% Custom operators with ether one or two arguments (limits)
|
||||
\makeatletter
|
||||
%%% Multi-Linear Multiplication
|
||||
% $\mlm_{k \in [r]}$ or $\mlm_{k = 1}^{r}$ (lower limit MUST be the first!)
|
||||
% Save first argument as \arg@one
|
||||
\def\mlm#1{\def\arg@one{#1}\futurelet\next\mlm@i}
|
||||
\def\mlm_#1{\def\arg@one{#1}\futurelet\next\mlm@i}
|
||||
% Check for second argument
|
||||
\def\mlm@i{\ifx\next\bgroup\expandafter\mlm@two\else\expandafter\mlm@one\fi}
|
||||
\def\mlm@i{\ifx\next^\expandafter\mlm@two\else\expandafter\mlm@one\fi}
|
||||
% specialization for one or two arguments, both versions use saved first argument
|
||||
\def\mlm@one{\mathchoice%
|
||||
{\operatorname*{\scalerel*{\times}{\bigotimes}}_{\makebox[0pt][c]{$\scriptstyle \arg@one$}}}%
|
||||
|
@ -101,14 +102,31 @@
|
|||
{\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}}%
|
||||
{\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}}%
|
||||
}
|
||||
% this commands single argument is the second argument of \mlm
|
||||
\def\mlm@two#1{\mathchoice%
|
||||
% this commands single argument is the second argument of \mlm, it gobbles the `^`
|
||||
\def\mlm@two^#1{\mathchoice%
|
||||
{\operatorname*{\scalerel*{\times}{\bigotimes}}_{\makebox[0pt][c]{$\scriptstyle \arg@one$}}^{\makebox[0pt][c]{$\scriptstyle #1$}}}%
|
||||
{\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}^{#1}}%
|
||||
{\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}^{#1}}%
|
||||
{\operatorname*{\scalerel*{\times}{\bigotimes}}_{\arg@one}^{#1}}%
|
||||
}
|
||||
|
||||
%%% Big Circle (Iterated Outer Product)
|
||||
\def\outer#1{\def\arg@one{#1}\futurelet\next\outer@i}
|
||||
\def\outer@i{\ifx\next\bgroup\expandafter\outer@two\else\expandafter\outer@one\fi}
|
||||
\def\outer@one{\mathchoice%
|
||||
{\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\makebox[0pt][c]{$\scriptstyle \arg@one$}}}%
|
||||
{\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}}%
|
||||
{\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}}%
|
||||
{\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}}%
|
||||
}
|
||||
\def\outer@two#1{\mathchoice%
|
||||
{\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\makebox[0pt][c]{$\scriptstyle \arg@one$}}^{\makebox[0pt][c]{$\scriptstyle #1$}}}%
|
||||
{\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}^{#1}}%
|
||||
{\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}^{#1}}%
|
||||
{\operatorname*{\scalerel*{\circ}{\bigotimes}}_{\arg@one}^{#1}}%
|
||||
}
|
||||
|
||||
|
||||
%%% Big Kronecker Product (with overflowing limits)
|
||||
% Save first argument as \arg@one
|
||||
\def\bigkron#1{\def\arg@one{#1}\futurelet\next\bigkron@i}
|
||||
|
@ -134,17 +152,15 @@
|
|||
\newcommand{\algorithmicbreak}{\textbf{break}}
|
||||
\newcommand{\Break}{\State \algorithmicbreak}
|
||||
|
||||
|
||||
\begin{document}
|
||||
|
||||
\maketitle
|
||||
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
\begin{abstract}
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
We consider regression and classification for \textit{general} response and tensor-valued predictors (multi dimensional arrays) and propose a \textit{novel formulation} for sufficient dimension reduction. Assuming the distribution of the tensor-valued predictors given the response is in the quadratic exponential family, we model the natural parameter as a multi-linear function of the respons.
|
||||
This allows per-axis reductions that drastically reduce the total number of parameters for higher order tensor-valued predictors. We derive maximum likelihood estimates for the sufficient dimension reduction and a computationally efficient estimation algorithm which leveraes the tensor structure. The performance of the method is illustrated via simulations and real world examples are provided.
|
||||
We consider regression and classification for \textit{general} response and tensor-valued predictors (multi dimensional arrays) and propose a \textit{novel formulation} for sufficient dimension reduction. Assuming the distribution of the tensor-valued predictors given the response is in the quadratic exponential family, we model the natural parameter as a multi-linear function of the response.
|
||||
This allows per-axis reductions that drastically reduce the total number of parameters for higher order tensor-valued predictors. We derive maximum likelihood estimates for the sufficient dimension reduction and a computationally efficient estimation algorithm which leverages the tensor structure. The performance of the method is illustrated via simulations and real world examples are provided.
|
||||
\end{abstract}
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
@ -256,23 +272,23 @@ A straight forward idea for parameter estimation is to use Gradient Descent. For
|
|||
\begin{algorithm}[ht]
|
||||
\caption{\label{alg:NAGD}Nesterov Accelerated Gradient Descent}
|
||||
\begin{algorithmic}[1]
|
||||
\State Objective: $l(\Theta \mid \ten{X}, \ten{F}_y)$
|
||||
\State Objective: $l(\mat{\theta} \mid \ten{X}, \ten{F}_y)$
|
||||
\State Arguments: Order $r + 1$ tensors $\ten{X}$, $\ten{F}$
|
||||
\State Initialize: Parameters $\Theta^{(0)}$, $0 < c, \delta^{(1)}$ and $0 < \gamma < 1$
|
||||
\State Initialize: Parameters $\mat{\theta}^{(0)}$, $0 < c, \delta^{(1)}$ and $0 < \gamma < 1$
|
||||
\\
|
||||
\State $t \leftarrow 1$
|
||||
\Comment{step counter}
|
||||
\State $\mat{\Theta}^{(1)} \leftarrow \mat{\Theta}^{(0)}$
|
||||
\State $\mat{\theta}^{(1)} \leftarrow \mat{\theta}^{(0)}$
|
||||
\Comment{artificial first step}
|
||||
\State $(m^{(0)}, m^{(1)}) \leftarrow (0, 1)$
|
||||
\Comment{momentum extrapolation weights}
|
||||
\\
|
||||
\Repeat \Comment{repeat untill convergence}
|
||||
\State $\mat{M} \leftarrow \mat{\Theta}^{(t)} + \frac{m^{(t - 1)} - 1}{m^{(t)}}(\mat{\Theta}^{(t)} - \mat{\Theta}^{(t - 1)})$ \Comment{momentum extrapolation}
|
||||
\State $\mat{M} \leftarrow \mat{\theta}^{(t)} + \frac{m^{(t - 1)} - 1}{m^{(t)}}(\mat{\theta}^{(t)} - \mat{\theta}^{(t - 1)})$ \Comment{momentum extrapolation}
|
||||
\For{$\delta = \gamma^{-1}\delta^{(t)}, \delta^{(t)}, \gamma\delta^{(t)}, \gamma^2\delta^{(t)}, ...$} \Comment{Line Search}
|
||||
\State $\mat{\Theta}_{\text{temp}} \leftarrow \mat{M} + \delta \nabla_{\mat{\Theta}} l(\mat{M})$
|
||||
\If{$l(\mat{\Theta}_{\text{temp}}) \leq l(\mat{\Theta}^{(t - 1)}) - c \delta \|\nabla_{\mat{\Theta}} l(\mat{M})\|_F^2$} \Comment{Armijo Condition}
|
||||
\State $\mat{\Theta}^{(t + 1)} \leftarrow \mat{\Theta}_{\text{temp}}$
|
||||
\State $\mat{\theta}_{\text{temp}} \leftarrow \mat{M} + \delta \nabla_{\mat{\theta}} l(\mat{M})$
|
||||
\If{$l(\mat{\theta}_{\text{temp}}) \leq l(\mat{\theta}^{(t - 1)}) - c \delta \|\nabla_{\mat{\theta}} l(\mat{M})\|_F^2$} \Comment{Armijo Condition}
|
||||
\State $\mat{\theta}^{(t + 1)} \leftarrow \mat{\theta}_{\text{temp}}$
|
||||
\State $\delta^{(t + 1)} \leftarrow \delta$
|
||||
\Break
|
||||
\EndIf
|
||||
|
@ -399,7 +415,7 @@ $\ten{X}$ is a $2\times 3\times 5$ tensor, $y\in\{1, 2, ..., 6\}$ uniformly dist
|
|||
|
||||
\begin{figure}[!ht]
|
||||
\centering
|
||||
\includegraphics[width = \textwidth]{sim-normal-20221012.png}
|
||||
\includegraphics[width = \textwidth]{images/sim-normal-20221012.png}
|
||||
\caption{\label{fig:sim-normal}Simulation Normal}
|
||||
\end{figure}
|
||||
|
||||
|
@ -407,7 +423,7 @@ $\ten{X}$ is a $2\times 3\times 5$ tensor, $y\in\{1, 2, ..., 6\}$ uniformly dist
|
|||
|
||||
\begin{figure}[!ht]
|
||||
\centering
|
||||
\includegraphics[width = \textwidth]{sim-ising-small-20221012.png}
|
||||
\includegraphics[width = \textwidth]{images/sim-ising-small-20221012.png}
|
||||
\caption{\label{fig:sim-ising-small}Simulation Ising Small}
|
||||
\end{figure}
|
||||
|
||||
|
@ -433,7 +449,7 @@ where each individual block is given by
|
|||
For example $\mathcal{J}_{1,2} = -\frac{\partial l(\Theta)}{\partial\t{(\vec{\overline{\ten{\eta}}_1})}\partial(\vec{\mat{\alpha}_1})}$ and $\mathcal{J}_{2r + 1, 2r + 1} = -\H l(\mat{\Omega}_r)$.
|
||||
We start by restating the log-likelihood for a given single observation $(\ten{X}, \ten{Y})$ where $\ten{F}_y$ given by
|
||||
\begin{displaymath}
|
||||
l(\mat{\Theta}) = \log h(\ten{X}) + c_1\big\langle\overline{\ten{\eta}}_1 + \ten{F}_{y}\mlm{k\in[r]}\mat{\alpha}_k, \ten{X}\big\rangle + c_2\big\langle\ten{X}\mlm{k\in[r]}\mat{\Omega}_k, \ten{X}\big\rangle - b(\mat{\eta}_{y})
|
||||
l(\mat{\Theta}) = \log h(\ten{X}) + c_1\big\langle\overline{\ten{\eta}}_1 + \ten{F}_{y}\mlm_{k\in[r]}\mat{\alpha}_k, \ten{X}\big\rangle + c_2\big\langle\ten{X}\mlm_{k\in[r]}\mat{\Omega}_k, \ten{X}\big\rangle - b(\mat{\eta}_{y})
|
||||
\end{displaymath}
|
||||
with
|
||||
\begin{align*}
|
||||
|
@ -496,10 +512,10 @@ Now we rewrite all the above differentials to extract the derivatives one at a t
|
|||
%
|
||||
\d l(\mat{\Omega}_j) &= c_2\Big(\langle\ten{X}\times_{k\in[r]\backslash j}\mat{\Omega}_k\times_j\d\mat{\Omega}_j, \ten{X}\rangle - \D b(\mat{\eta}_{y,2})\vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big)\Big) \\
|
||||
&= c_2 \t{(\vec{\ten{X}}\otimes\vec{\ten{X}} - (\ten{D}_2)_{([2r])})}\vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big) \\
|
||||
&= c_2 (\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})} \\
|
||||
&= c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\vec{\d\mat{\Omega}_j} \\
|
||||
&= c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\mat{D}_{p_j}\t{\mat{D}_{p_j}}\vec{\d\mat{\Omega}_j} \\
|
||||
&\qquad\Rightarrow \D l(\mat{\Omega}_j) = c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\mat{D}_{p_j}\t{\mat{D}_{p_j}}
|
||||
&= c_2 (\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})} \\
|
||||
&= c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\vec{\d\mat{\Omega}_j} \\
|
||||
&= c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\mat{D}_{p_j}\t{\mat{D}_{p_j}}\vec{\d\mat{\Omega}_j} \\
|
||||
&\qquad\Rightarrow \D l(\mat{\Omega}_j) = c_2 \t{\vec\Bigl((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2))\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\Bigr)}\mat{D}_{p_j}\t{\mat{D}_{p_j}}
|
||||
\end{align*}}%
|
||||
The next step is to identify the Hessians from the second differentials in a similar manner as befor.
|
||||
{\allowdisplaybreaks\begin{align*}
|
||||
|
@ -509,62 +525,62 @@ The next step is to identify the Hessians from the second differentials in a sim
|
|||
\qquad{\color{gray} (p \times p)}
|
||||
\\
|
||||
&\d^2 l(\overline{\ten{\eta}}_1, \mat{\alpha}_j) \\
|
||||
&= -c_1^2 \t{\vec(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j)}\mat{H}_{1,1}\vec{\d\overline{\ten{\eta}}_1} \\
|
||||
&= -c_1^2 \t{\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)})}\mat{K}_{p,(j)}\mat{H}_{1,1}\vec{\d\overline{\ten{\eta}}_1} \\
|
||||
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})}((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})(\ten{H}_{1,1})_{((j, [r]\backslash j))}\vec{\d\overline{\ten{\eta}}_1} \\
|
||||
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} ( (\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k) \ttt_{[r]\backslash j} \ten{H}_{1,1})_{((2, 1))} \vec{\d\overline{\ten{\eta}}_1} \\
|
||||
&\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\alpha}_j})\t{\partial(\vec{\overline{\ten{\eta}}_1)}}} = -c_1^2 ( (\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k) \ttt_{[r]\backslash j} \ten{H}_{1,1})_{((2, 1))}
|
||||
&= -c_1^2 \t{\vec(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j)}\mat{H}_{1,1}\vec{\d\overline{\ten{\eta}}_1} \\
|
||||
&= -c_1^2 \t{\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)})}\mat{K}_{p,(j)}\mat{H}_{1,1}\vec{\d\overline{\ten{\eta}}_1} \\
|
||||
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})}((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})(\ten{H}_{1,1})_{((j, [r]\backslash j))}\vec{\d\overline{\ten{\eta}}_1} \\
|
||||
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} ( (\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k) \ttt_{[r]\backslash j} \ten{H}_{1,1})_{((2, 1))} \vec{\d\overline{\ten{\eta}}_1} \\
|
||||
&\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\alpha}_j})\t{\partial(\vec{\overline{\ten{\eta}}_1)}}} = -c_1^2 ( (\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k) \ttt_{[r]\backslash j} \ten{H}_{1,1})_{((2, 1))}
|
||||
\qquad{\color{gray} (p_j q_j \times p)}
|
||||
\\
|
||||
&\d^2 l(\overline{\ten{\eta}}_1, \mat{\Omega}_j) \\
|
||||
&= -c_1 c_2 \t{\vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big)}\mat{H}_{2,1}\vec{\d\overline{\ten{\eta}}_1} \\
|
||||
&= -c_1 c_2 \t{\Big[ \t{(\ten{H}_{2,1})_{([2r])}} \vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big) \Big]} \vec{\d\overline{\ten{\eta}}_1} \\
|
||||
&= -c_1 c_2 \t{\vec( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})} )} \vec{\d\overline{\ten{\eta}}_1} \\
|
||||
&= -c_1 c_2 \t{(\vec{\d\mat{\Omega}_j})} ( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} )_{(j)} \vec{\d\overline{\ten{\eta}}_1} \\
|
||||
&\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\Omega}_j})\t{\partial(\vec{\overline{\ten{\eta}}_1)}}} = -c_1 c_2 \mat{D}_{p_j}\t{\mat{D}_{p_j}}( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} )_{(j)}
|
||||
&= -c_1 c_2 \t{\vec( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})} )} \vec{\d\overline{\ten{\eta}}_1} \\
|
||||
&= -c_1 c_2 \t{(\vec{\d\mat{\Omega}_j})} ( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} )_{(j)} \vec{\d\overline{\ten{\eta}}_1} \\
|
||||
&\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\Omega}_j})\t{\partial(\vec{\overline{\ten{\eta}}_1)}}} = -c_1 c_2 \mat{D}_{p_j}\t{\mat{D}_{p_j}}( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} )_{(j)}
|
||||
\qquad{\color{gray} (p_j^2 \times p)}
|
||||
\\
|
||||
&\d^2 l(\mat{\alpha}_j) \\
|
||||
&= -c_1^2 \t{\vec(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j)}\mat{H}_{1,1}\vec(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j) \\
|
||||
&= -c_1^2 \t{\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)})}\mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(j)}}\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}) \\
|
||||
&= -c_1^2 \t{[((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j}]}\mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(j)}}((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j} \\
|
||||
&= -c_1^2 \t{[((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j}]}(\ten{H}_{1,1})_{((j,[r]\backslash j),(j,[r]\backslash j))}((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j} \\
|
||||
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})}[ ((\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k)\ttt_{[r]\backslash j}\ten{H}_{1,1})\ttt_{[r]\backslash j + 2,[r]\backslash j}(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k) ]_{((2,1))}\vec{\d\mat{\alpha}_j} \\
|
||||
&\qquad\Rightarrow \H l(\mat{\alpha}_j) = -c_1^2 \Big[ \left(\Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big)\ttt_{[r]\backslash j}\ten{H}_{1,1}\right)\ttt_{[r]\backslash j + 2}^{[r]\backslash j}\Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \Big]_{((2,1))}
|
||||
&= -c_1^2 \t{\vec(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j)}\mat{H}_{1,1}\vec(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j) \\
|
||||
&= -c_1^2 \t{\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)})}\mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(j)}}\vec(\d\mat{\alpha}_j(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}) \\
|
||||
&= -c_1^2 \t{[((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j}]}\mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(j)}}((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j} \\
|
||||
&= -c_1^2 \t{[((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j}]}(\ten{H}_{1,1})_{((j,[r]\backslash j),(j,[r]\backslash j))}((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)_{(j)}\otimes\mat{I}_{p_j})\vec{\d\mat{\alpha}_j} \\
|
||||
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})}[ ((\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k)\ttt_{[r]\backslash j}\ten{H}_{1,1})\ttt_{[r]\backslash j + 2,[r]\backslash j}(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k) ]_{((2,1))}\vec{\d\mat{\alpha}_j} \\
|
||||
&\qquad\Rightarrow \H l(\mat{\alpha}_j) = -c_1^2 \Big[ \left(\Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big)\ttt_{[r]\backslash j}\ten{H}_{1,1}\right)\ttt_{[r]\backslash j + 2}^{[r]\backslash j}\Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \Big]_{((2,1))}
|
||||
\qquad{\color{gray} (p_j q_j \times p_j q_j)}
|
||||
\\
|
||||
&\d^2 l(\mat{\alpha}_j, \mat{\alpha}_l) \\
|
||||
&\overset{\makebox[0pt]{\scriptsize $j < l$}}{=} -c_1^2 \t{\vec\Bigl(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\Bigr)}\mat{H}_{1,1}\vec\Bigl(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\times_l\d\mat{\alpha}_l\Bigr) \\
|
||||
&\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \vec\Bigl(\ten{F}_y\mlm{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\times_l\d\mat{\alpha}_l\Bigr) \\
|
||||
&= -c_1^2 \t{\vec\biggl( \d\mat{\alpha}_j \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big)_{(j)} \biggr)} \mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(l)}} \vec\biggl( \d\mat{\alpha}_l \Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big)_{(l)} \biggr) \\
|
||||
&\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \t{\mat{K}_{\mat{p},((j,l))}} \vec\biggl( (\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j) \Big( \ten{F}_y\mlm{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k \Big)_{((j,l))} \biggr) \\
|
||||
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big)_{(j)}\otimes\mat{I}_{p_j} \biggr) \mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(l)}} \biggl( \t{\Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big)_{(l)}}\otimes\mat{I}_{p_l} \biggr)\vec{\d\mat{\alpha}_l} \\
|
||||
&\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \t{\mat{K}_{\mat{p},((j,l))}} \biggl( \t{\Big( \ten{F}_y\mlm{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k \Big)_{((j,l))}}\otimes\mat{I}_{p_j p_l} \biggr) \vec{(\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j)} \\
|
||||
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big[ \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \vec{\d\mat{\alpha}_l} \\
|
||||
&\qquad + c_1 \vec\biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm{k\neq j,l}\mat{\alpha}_k \Big) \biggr) \vec{(\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j)} \\
|
||||
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big[ \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \vec{\d\mat{\alpha}_l} \\
|
||||
&\qquad + c_1 \t{(\vec{\d\mat{\alpha}_j})} \biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm{k\neq j,l}\mat{\alpha}_k \Big) \biggr)_{((1,3))} \vec{\d\mat{\alpha}_l} \\
|
||||
&\overset{\makebox[0pt]{\scriptsize $j < l$}}{=} -c_1^2 \t{\vec\Bigl(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\Bigr)}\mat{H}_{1,1}\vec\Bigl(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\times_l\d\mat{\alpha}_l\Bigr) \\
|
||||
&\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \vec\Bigl(\ten{F}_y\mlm_{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\times_l\d\mat{\alpha}_l\Bigr) \\
|
||||
&= -c_1^2 \t{\vec\biggl( \d\mat{\alpha}_j \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big)_{(j)} \biggr)} \mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(l)}} \vec\biggl( \d\mat{\alpha}_l \Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big)_{(l)} \biggr) \\
|
||||
&\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \t{\mat{K}_{\mat{p},((j,l))}} \vec\biggl( (\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j) \Big( \ten{F}_y\mlm_{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k \Big)_{((j,l))} \biggr) \\
|
||||
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big)_{(j)}\otimes\mat{I}_{p_j} \biggr) \mat{K}_{\mat{p},(j)}\mat{H}_{1,1}\t{\mat{K}_{\mat{p},(l)}} \biggl( \t{\Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big)_{(l)}}\otimes\mat{I}_{p_l} \biggr)\vec{\d\mat{\alpha}_l} \\
|
||||
&\qquad + c_1 (\t{(\vec{\ten{X}})} - \D b(\mat{\eta}_{y,1})) \t{\mat{K}_{\mat{p},((j,l))}} \biggl( \t{\Big( \ten{F}_y\mlm_{k\in[r]\backslash\{j,l\}}\mat{\alpha}_k \Big)_{((j,l))}}\otimes\mat{I}_{p_j p_l} \biggr) \vec{(\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j)} \\
|
||||
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big[ \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \vec{\d\mat{\alpha}_l} \\
|
||||
&\qquad + c_1 \vec\biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm_{k\neq j,l}\mat{\alpha}_k \Big) \biggr) \vec{(\d\mat{\alpha}_l\otimes\d\mat{\alpha}_j)} \\
|
||||
&= -c_1^2 \t{(\vec{\d\mat{\alpha}_j})} \biggl( \Big[ \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \vec{\d\mat{\alpha}_l} \\
|
||||
&\qquad + c_1 \t{(\vec{\d\mat{\alpha}_j})} \biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm_{k\neq j,l}\mat{\alpha}_k \Big) \biggr)_{((1,3))} \vec{\d\mat{\alpha}_l} \\
|
||||
&\qquad \begin{aligned}
|
||||
\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\alpha}_j})\t{\partial(\vec{\mat{\alpha}_l})}} &=
|
||||
-c_1^2 \biggl( \Big[ \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \\
|
||||
&\qquad + c_1 \biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm{k\neq j,l}\mat{\alpha}_k \Big) \biggr)_{((1,3) + [[j > l]])}
|
||||
-c_1^2 \biggl( \Big[ \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \\
|
||||
&\qquad + c_1 \biggl( (\ten{X} - \ten{D}_1) \ttt_{[r]\backslash\{j,l\}} \Big( \ten{F}_y\mlm_{k\neq j,l}\mat{\alpha}_k \Big) \biggr)_{((1,3) + [[j > l]])}
|
||||
\qquad{\color{gray} (p_j q_j \times p_l q_l)}
|
||||
\end{aligned}
|
||||
\\
|
||||
&\d^2 l(\mat{\alpha}_j, \mat{\Omega}_l) \\
|
||||
&= -c_1 c_2 \t{\vec\Bigl(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\Bigr)} \mat{H}_{1,2} \vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr) \\
|
||||
&= -c_1 c_2 \t{\vec\biggl(\d\mat{\alpha}_j\Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big)_{(j)}\biggr)}\mat{K}_{\mat{p},(j)} \t{(\ten{H}_{2,1})_{([2r])}} \vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr) \\
|
||||
&= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl(\t{\Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big)_{(j)}}\otimes\mat{I}_{p_j}\biggr) \mat{K}_{\mat{p},(j)} \vec\Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm{k\neq l}\t{(\vec{\mat{\Omega}_k})}\times_l\t{(\vec{\d\mat{\Omega}_l})}\Bigr) \\
|
||||
&= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl(\t{\Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big)_{(j)}}\otimes\mat{I}_{p_j}\biggr) \mat{K}_{\mat{p},(j)} \t{\Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr)_{([r])}}\vec{\d\mat{\Omega}_l} \\
|
||||
&= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl( \Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{(r + 2, 1)} \vec{\d\mat{\Omega}_l} \\
|
||||
&\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\alpha}_j})\t{\partial(\vec{\mat{\Omega}_l})}} = -c_1 c_2 \biggl( \Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{(r + 2, 1)}\mat{D}_{p_l}\t{\mat{D}_{p_l}}
|
||||
&= -c_1 c_2 \t{\vec\Bigl(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\times_j\d\mat{\alpha}_j\Bigr)} \mat{H}_{1,2} \vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr) \\
|
||||
&= -c_1 c_2 \t{\vec\biggl(\d\mat{\alpha}_j\Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big)_{(j)}\biggr)}\mat{K}_{\mat{p},(j)} \t{(\ten{H}_{2,1})_{([2r])}} \vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr) \\
|
||||
&= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl(\t{\Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big)_{(j)}}\otimes\mat{I}_{p_j}\biggr) \mat{K}_{\mat{p},(j)} \vec\Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm_{k\neq l}\t{(\vec{\mat{\Omega}_k})}\times_l\t{(\vec{\d\mat{\Omega}_l})}\Bigr) \\
|
||||
&= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl(\t{\Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big)_{(j)}}\otimes\mat{I}_{p_j}\biggr) \mat{K}_{\mat{p},(j)} \t{\Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm_{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr)_{([r])}}\vec{\d\mat{\Omega}_l} \\
|
||||
&= -c_1 c_2 \t{(\vec{\d\mat{\alpha}_j})}\biggl( \Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm_{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{(r + 2, 1)} \vec{\d\mat{\Omega}_l} \\
|
||||
&\qquad\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\alpha}_j})\t{\partial(\vec{\mat{\Omega}_l})}} = -c_1 c_2 \biggl( \Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm_{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{(r + 2, 1)}\mat{D}_{p_l}\t{\mat{D}_{p_l}}
|
||||
% \qquad {\color{gray} (p_j q_j \times p_l^2)}
|
||||
\\
|
||||
&\d^2 l(\mat{\Omega}_j) \\
|
||||
&= -c_2^2 \t{\vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr)} \t{(\ten{H}_{2,2})_{([2r],[2r]+2r)}} \vec\Bigl(\bigkron{k = r}{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigkron{k=l-1}{1}\mat{\Omega}_k\Bigr) \\
|
||||
&= -c_2^2 \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})}\times_{j + r}\t{(\vec{\d\mat{\Omega}_j})} \\
|
||||
&= -c_2^2 \t{(\vec{\d\mat{\Omega}_j})} \biggl( \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})} \biggr)_{([r])} \vec{\d\mat{\Omega}_j} \\
|
||||
&\qquad\Rightarrow \H l(\mat{\Omega}_j) = -c_2^2 \mat{D}_{p_j}\t{\mat{D}_{p_j}}\biggl( \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})} \biggr)_{([r])}\mat{D}_{p_j}\t{\mat{D}_{p_j}}
|
||||
&= -c_2^2 \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm_{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})}\times_j\t{(\vec{\d\mat{\Omega}_j})}\times_{j + r}\t{(\vec{\d\mat{\Omega}_j})} \\
|
||||
&= -c_2^2 \t{(\vec{\d\mat{\Omega}_j})} \biggl( \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm_{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})} \biggr)_{([r])} \vec{\d\mat{\Omega}_j} \\
|
||||
&\qquad\Rightarrow \H l(\mat{\Omega}_j) = -c_2^2 \mat{D}_{p_j}\t{\mat{D}_{p_j}}\biggl( \ten{R}_{[2r],[2r]+2r}(\ten{H}_{2,2})\mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})}\mlm_{\substack{k + r\\k\in[r]\backslash j}}\t{(\vec{\mat{\Omega}_k})} \biggr)_{([r])}\mat{D}_{p_j}\t{\mat{D}_{p_j}}
|
||||
%\qquad {\color{gray} (p_j^2 \times p_j^2)}
|
||||
\\
|
||||
&\d^2 l(\mat{\Omega}_j, \mat{\Omega}_l) \\
|
||||
|
@ -573,13 +589,13 @@ The next step is to identify the Hessians from the second differentials in a sim
|
|||
&\qquad\qquad - c_2 \D b(\mat{\eta}_{y,2})\vec\!\Big(\bigotimes_{k = r}^{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_{l}\otimes\bigotimes_{k = l - 1}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_{j}\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big) \\
|
||||
&= c_2 \t{(\vec{\ten{X}}\otimes\vec{\ten{X}} - (\ten{D}_2)_{([2r])})} \vec\Bigl(\bigotimes_{k = r}^{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_{l}\otimes\bigotimes_{k = l - 1}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_{j}\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Bigr) \\
|
||||
&\qquad - c_2^2 \t{\vec\!\Big(\bigotimes_{k = r}^{l + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_l\otimes\bigotimes_{k=l-1}^{1}\mat{\Omega}_k\Big)}\t{(\ten{H}_{2,2})_{([2r],[2r]+2r)}}\vec\!\Big(\bigotimes_{k = r}^{j + 1}\mat{\Omega}_k\otimes\d\mat{\Omega}_j\otimes\bigotimes_{k=j-1}^{1}\mat{\Omega}_k\Big) \\
|
||||
&= c_2 (\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \times_j \t{(\vec{\d\mat{\Omega}_j})} \times_l \t{(\vec{\d\mat{\Omega}_l})} \\
|
||||
&\qquad - c_2^2 \ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})} \times_j \t{(\vec{\d\mat{\Omega}_j})} \times_l \t{(\vec{\d\mat{\Omega}_l})} \\
|
||||
&= c_2 \t{(\vec{\d\mat{\Omega}_j})}\Big((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)}\vec{\d\mat{\Omega}_l} \\
|
||||
&\qquad - c_2^2 \t{(\vec{\d\mat{\Omega}_j})}\Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)}\vec{\d\mat{\Omega}_l} \\
|
||||
&= c_2 (\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm_{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \times_j \t{(\vec{\d\mat{\Omega}_j})} \times_l \t{(\vec{\d\mat{\Omega}_l})} \\
|
||||
&\qquad - c_2^2 \ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm_{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm_{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})} \times_j \t{(\vec{\d\mat{\Omega}_j})} \times_l \t{(\vec{\d\mat{\Omega}_l})} \\
|
||||
&= c_2 \t{(\vec{\d\mat{\Omega}_j})}\Big((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm_{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)}\vec{\d\mat{\Omega}_l} \\
|
||||
&\qquad - c_2^2 \t{(\vec{\d\mat{\Omega}_j})}\Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm_{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm_{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)}\vec{\d\mat{\Omega}_l} \\
|
||||
&\qquad \begin{aligned}\Rightarrow \frac{\partial l}{\partial(\vec{\mat{\Omega}_j})\t{\partial(\vec{\mat{\Omega}_l})}} &=
|
||||
\mat{D}_{p_j}\t{\mat{D}_{p_j}}\Big[c_2\Big((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)} \\
|
||||
&\qquad -c_2^2 \Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)}\Big]\mat{D}_{p_l}\t{\mat{D}_{p_l}}
|
||||
\mat{D}_{p_j}\t{\mat{D}_{p_j}}\Big[c_2\Big((\ten{X}\otimes\ten{X} - \ten{R}_{[2r]}(\ten{D}_2)) \mlm_{k\neq j,l}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)} \\
|
||||
&\qquad -c_2^2 \Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm_{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm_{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)}\Big]\mat{D}_{p_l}\t{\mat{D}_{p_l}}
|
||||
% \qquad {\color{gray} (p_j^2 \times p_l^2)}
|
||||
\end{aligned}
|
||||
\end{align*}}%
|
||||
|
@ -612,15 +628,15 @@ and for every block holds $\mathcal{I}_{j, l} = \t{\mathcal{I}_{l, j}}$. The ind
|
|||
\begin{align*}
|
||||
\mathcal{I}_{1,1} &= c_1^2 (\ten{H}_{1,1})_{([r])} \\
|
||||
\mathcal{I}_{1,j+1} % = \E\partial_{\vec{\overline{\ten{\eta}}_1}}\partial_{\t{(\vec{\mat{\alpha}_j})}} l(\mat{\Theta})\mid \ten{Y} = y
|
||||
&= c_1^2 \Big[\Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1}\Big]_{((2, 1))} \\
|
||||
&= c_1^2 \Big[\Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1}\Big]_{((2, 1))} \\
|
||||
\mathcal{I}_{1,j+r+1}
|
||||
&= c_1 c_2 \Big( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)} \\
|
||||
&= c_1 c_2 \Big( \ten{R}_{[2r]}(\ten{H}_{2,1}) \mlm_{k\in[r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \Big)_{(j)} \\
|
||||
\mathcal{I}_{j+1,l+1}
|
||||
&= c_1^2 \biggl( \Big[ \Big(\ten{F}_y\mlm{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \\
|
||||
&= c_1^2 \biggl( \Big[ \Big(\ten{F}_y\mlm_{k\in[r]\backslash j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j} \ten{H}_{1,1} \Big] \ttt_{[r]\backslash l + 2}^{[r]\backslash l} \Big(\ten{F}_y\mlm_{k\in[r]\backslash l}\mat{\alpha}_k\Big) \biggr)_{((2,1))} \\
|
||||
\mathcal{I}_{j+1,l+r+1}
|
||||
&= c_1 c_2 \biggl( \Big(\ten{F}_y\mlm{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{((r + 2, 1))} \\
|
||||
&= c_1 c_2 \biggl( \Big(\ten{F}_y\mlm_{k\neq j}\mat{\alpha}_k\Big) \ttt_{[r]\backslash j}^{[r]\backslash j + r} \Bigl(\ten{R}_{[2r]}(\ten{H}_{2,1})\mlm_{k\neq l}\t{(\vec{\mat{\Omega}_k})}\Bigr) \biggr)_{((r + 2, 1))} \\
|
||||
\mathcal{I}_{j+r+1,l+r+1}
|
||||
&= c_2^2 \Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)}
|
||||
&= c_2^2 \Big(\ten{R}_{([2r],[2r]+2r)}(\ten{H}_{2,2}) \mlm_{k\in [r]\backslash j}\t{(\vec{\mat{\Omega}_k})} \mlm_{\substack{k + r \\ k\in [r]\backslash l}}\t{(\vec{\mat{\Omega}_k})}\Big)_{(j)}
|
||||
\end{align*}
|
||||
|
||||
|
||||
|
@ -633,14 +649,14 @@ The \emph{matricization} is a generalization of the \emph{vectorization} operati
|
|||
\begin{theorem}\label{thm:mlm_mat}
|
||||
Let $\ten{A}$ be a tensor of order $r$ with the dimensions $q_1\times ... \times q_r$. Furthermore, let for $k = 1, ..., r$ be $\mat{B}_k$ matrices of dimensions $p_k\times q_k$. Then, for any $(\mat{i}, \mat{j})\in\perm{r}$ holds
|
||||
\begin{displaymath}
|
||||
\Big(\ten{A}\mlm{k\in[r]}\mat{B}_k\Big)_{(\mat{i}, \mat{j})}
|
||||
\Big(\ten{A}\mlm_{k\in[r]}\mat{B}_k\Big)_{(\mat{i}, \mat{j})}
|
||||
= \Big(\bigotimes_{k = \len{\mat{i}}}^{1}\mat{B}_{\mat{i}_k}\Big) \ten{A}_{(\mat{i}, \mat{j})} \Big(\bigotimes_{k = \len{\mat{j}}}^{1}\t{\mat{B}_{\mat{j}_k}}\Big).
|
||||
\end{displaymath}
|
||||
\end{theorem}
|
||||
|
||||
A well known special case of Theorem~\ref{thm:mlm_mat} is the relation between vectorization and the Kronecker product
|
||||
\begin{displaymath}
|
||||
\vec(\mat{B}_1\mat{A}\t{\mat{B}_2}) = (\mat{B}_2\otimes\mat{B}_1)\vec{A}.
|
||||
\vec(\mat{B}_1\mat{A}\t{\mat{B}_2}) = (\mat{B}_2\otimes\mat{B}_1)\vec{\mat{A}}.
|
||||
\end{displaymath}
|
||||
Here we have a matrix, a.k.a. an order 2 tensor, and the vectorization as a special case of the matricization $\vec{\mat{A}} = \mat{A}_{((1, 2))}$ with $(\mat{i}, \mat{j}) = ((1, 2), ())\in\perm{2}$. Note that the empty Kronecker product is $1$ by convention.
|
||||
|
||||
|
@ -713,13 +729,37 @@ The operation $\ten{R}_{\mat{i}}(\ten{A})$ results in a tensor of order $r + s$
|
|||
Let $\ten{A}$ be a $2 r + s$ tensor where $r > 0$ and $s \geq 0$ of dimensions $q_1\times ... \times q_{2 r + s}$. Furthermore, let $(\mat{i}, \mat{j})\in\perm{2 r + s}$ such that $\len{\mat{i}} = 2 r$ and for $k = 1, ..., r$ denote with $\mat{B}_k$ matrices of dimensions $q_{\mat{i}_{k}}\times q_{\mat{i}_{r + k}}$, then
|
||||
\begin{displaymath}
|
||||
\t{\ten{A}_{(\mat{i})}}\vec{\bigotimes_{k = r}^{1}}\mat{B}_k
|
||||
\equiv \ten{R}_{\mat{i}}(\ten{A})\times_{k\in[r]}\t{(\vec{\mat{B}_k})}.
|
||||
\equiv \ten{R}_{\mat{i}}(\ten{A})\mlm_{k = 1}^r\t{(\vec{\mat{B}_k})}.
|
||||
\end{displaymath}
|
||||
\end{theorem}
|
||||
|
||||
A special case of above Theorem is given for tensors represented as a Kronecker product. Therefore, let $\mat{A}_k, \mat{B}_k$ be arbitrary matrices of size $p_k\times q_k$ for $k = 1, ..., r$ and $\ten{A} = \reshape{(\mat{p}, \mat{q})}\bigotimes_{k = r}^{1}\mat{A}_k$. Then Theorem~\ref{thm:mtvk_rearrange} specializes to
|
||||
\begin{displaymath}
|
||||
\t{\Big( \vec\bigotimes_{k = r}^{1}\mat{A}_k \Big)}\Big( \vec\bigotimes_{k = r}^{1}\mat{B}_k \Big)
|
||||
=
|
||||
\prod_{k = 1}^{r}\tr(\t{\mat{A}_k}\mat{B}_k)
|
||||
=
|
||||
\Big( \outer{k = 1}{r}\vec\mat{A}_k \Big)\mlm_{k = 1}^r \t{(\vec\mat{B}_k)}.
|
||||
\end{displaymath}
|
||||
In case of $r = 2$ this means
|
||||
\begin{align*}
|
||||
\t{\vec(\mat{A}_1\otimes \mat{A}_2)}\vec(\mat{B}_1\otimes \mat{B}_2)
|
||||
&= \t{(\vec{\mat{B}_1})}(\vec{\mat{A}_1})\t{(\vec{\mat{A}_2})}(\vec{\mat{B}_2}) \\
|
||||
&= [(\vec{\mat{A}_1})\circ(\vec{\mat{A}_2})]\times_1\t{(\vec{\mat{B}_1})}\times_2\t{(\vec{\mat{B}_2})}.
|
||||
\end{align*}
|
||||
|
||||
Another interesting special case is for two tensors $\ten{A}_1, \ten{A}_2$ of the same order
|
||||
\begin{displaymath}
|
||||
\t{(\vec{\ten{A}_1}\otimes\vec{\ten{A}_2})}\vec{\bigotimes_{k = r}^{1}\mat{B}_k}
|
||||
= (\ten{A}_1\otimes\ten{A}_2)\mlm_{k = 1}^r\t{(\vec{\mat{B}_k})}
|
||||
\end{displaymath}
|
||||
which uses the relation $\ten{R}_{[2r]}^{(\mat{p}, \mat{q})}(\vec{\ten{A}_1}\otimes\vec{\ten{A}_2}) = \ten{A}_1\otimes\ten{A}_2$ .
|
||||
|
||||
\todo{continue}
|
||||
|
||||
|
||||
|
||||
|
||||
% Next we define a specific axis permutation and reshaping operation on tensors which will be helpful in expressing some derivatives. Let $\ten{A}$ be a $2 r + s$ tensor with $r > 0$ and $s\geq 0$ of dimensions $p_1\times ... \times p_{2 r + s}$. Furthermore, let $(\mat{i}, \mat{j})\in\perm{2 r + s}$ such that $\len{\mat{i}} = 2 r$. The operation $\ten{R}_{\mat{i}}$ is defined as
|
||||
% \begin{displaymath}
|
||||
% \ten{R}_{\mat{i}} = \reshape{(p_1 p_{r + 1}, ..., p_r p_{2 r}, p_{2 r + 1}, ..., p_{r + s})}(\ten{A}_{(\pi(\mat{i}))})
|
||||
|
@ -880,19 +920,19 @@ The operation $\ten{R}_{\mat{i}}(\ten{A})$ results in a tensor of order $r + s$
|
|||
\end{tikzpicture}
|
||||
\end{center}
|
||||
|
||||
\newcommand{\somedrawing} {
|
||||
\coordinate (a) at (-2,-2,-2);
|
||||
\coordinate (b) at (-2,-2,2);
|
||||
\coordinate (c) at (-2,2,-2);
|
||||
\coordinate (d) at (-2,2,2);
|
||||
\coordinate (e) at (2,-2,-2);
|
||||
\coordinate (f) at (2,-2,2);
|
||||
\coordinate (g) at (2,2,-2);
|
||||
\coordinate (h) at (2,2,2);
|
||||
\draw (a)--(b) (a)--(c) (a)--(e) (b)--(d) (b)--(f) (c)--(d) (c)--(g) (d)--(h) (e)--(f) (e)--(g) (f)--(h) (g)--(h);
|
||||
\fill (a) circle (0.1cm);
|
||||
\fill (d) ++(0.1cm,0.1cm) rectangle ++(-0.2cm,-0.2cm);
|
||||
}
|
||||
% \newcommand{\somedrawing}{
|
||||
% \coordinate (a) at (-2,-2,-2);
|
||||
% \coordinate (b) at (-2,-2,2);
|
||||
% \coordinate (c) at (-2,2,-2);
|
||||
% \coordinate (d) at (-2,2,2);
|
||||
% \coordinate (e) at (2,-2,-2);
|
||||
% \coordinate (f) at (2,-2,2);
|
||||
% \coordinate (g) at (2,2,-2);
|
||||
% \coordinate (h) at (2,2,2);
|
||||
% \draw (a)--(b) (a)--(c) (a)--(e) (b)--(d) (b)--(f) (c)--(d) (c)--(g) (d)--(h) (e)--(f) (e)--(g) (f)--(h) (g)--(h);
|
||||
% \fill (a) circle (0.1cm);
|
||||
% \fill (d) ++(0.1cm,0.1cm) rectangle ++(-0.2cm,-0.2cm);
|
||||
% }
|
||||
|
||||
% \begin{figure}[ht!]
|
||||
% \centering
|
||||
|
@ -1153,7 +1193,7 @@ where $\circ$ is the outer product. For example considure two matrices $\mat{A},
|
|||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
Let $f$ be an $r$ times differentiable function, then
|
||||
\begin{displaymath}
|
||||
\d^r f(\mat{X}) = \ten{F}(\mat{X})\mlm{k = 1}{r} \vec{\d\mat{X}}
|
||||
\d^r f(\mat{X}) = \ten{F}(\mat{X})\mlm_{k = 1}^{r} \vec{\d\mat{X}}
|
||||
\qquad\Leftrightarrow\qquad
|
||||
\D^r f(\mat{X}) \equiv \frac{1}{r!}\sum_{\sigma\in\perm{r}}\ten{F}(\mat{X})_{(\sigma)}
|
||||
\end{displaymath}
|
||||
|
@ -1372,12 +1412,12 @@ The differentials up to the 4'th are
|
|||
\begin{align*}
|
||||
\d M(t) &= M(t) \t{(\mu + \Sigma t)} \d{t} \\
|
||||
\d^2 M(t) &= \t{\d{t}} M(t) (\mu + \Sigma t)\t{(\mu + \Sigma t)} \d{t} \\
|
||||
\d^3 M(t) &= M(t) (\mu + \Sigma t)\circ [(\mu + \Sigma t)\circ (\mu + \Sigma t) + 3\Sigma]\mlm{k = 1}{3} \d{t} \\
|
||||
\d^4 M(t) &= M(t) (\mu + \Sigma t)\circ(\mu + \Sigma t)\circ[(\mu + \Sigma t)\circ(\mu + \Sigma t) + 6\Sigma)]\mlm{k = 1}{4} \d{t}
|
||||
\d^3 M(t) &= M(t) (\mu + \Sigma t)\circ [(\mu + \Sigma t)\circ (\mu + \Sigma t) + 3\Sigma]\mlm_{k = 1}^{3} \d{t} \\
|
||||
\d^4 M(t) &= M(t) (\mu + \Sigma t)\circ(\mu + \Sigma t)\circ[(\mu + \Sigma t)\circ(\mu + \Sigma t) + 6\Sigma)]\mlm_{k = 1}^{4} \d{t}
|
||||
\end{align*}
|
||||
Using the differentials to derivative identification identity
|
||||
\begin{displaymath}
|
||||
\d^m f(t) = \ten{F}(t)\mlm{k = 1}{m}\d{t}
|
||||
\d^m f(t) = \ten{F}(t)\mlm_{k = 1}^{m}\d{t}
|
||||
\qquad\Leftrightarrow\qquad
|
||||
\D^m f(t) \equiv \frac{1}{m!}\sum_{\sigma\in\mathfrak{S}_m}\ten{F}(t)_{(\sigma)}
|
||||
\end{displaymath}
|
||||
|
@ -1388,7 +1428,7 @@ in conjunction with simplifications gives the first four raw moments by evaluati
|
|||
M_3 = \D^3 M(t)|_{t = 0} &= \mu\circ\mu\circ\mu + \mu\circ\Sigma + (\mu\circ\Sigma)_{((2), (1), (3))} + \Sigma\circ\mu \\
|
||||
M_4 = \D^4 M(t)|_{t = 0} &\equiv \frac{1}{4!}\sum_{\sigma\in\mathfrak{S}_4} (\mu\circ\mu\circ\Sigma + \Sigma\circ\Sigma + \Sigma\circ\mu\circ\mu)_{(\sigma)}
|
||||
\end{align*}
|
||||
which leads to the centered moments (which are also the covariances of the sufficient statistic $t(X)$)
|
||||
which leads to the centered moments (which are also the covariance of the sufficient statistic $t(X)$)
|
||||
\begin{align*}
|
||||
H_{1,1} &= \cov(t_1(X)\mid Y = y) \\
|
||||
&= \Sigma \\
|
||||
|
|
153
LaTeX/main.bib
153
LaTeX/main.bib
|
@ -9,6 +9,17 @@
|
|||
publisher = {[Royal Statistical Society, Wiley]}
|
||||
}
|
||||
|
||||
@inproceedings{Nesterov1983,
|
||||
title = {A method of solving a convex programming problem with convergence rate $O(1/k^2)$},
|
||||
author = {Nesterov, Yurii Evgen'evich},
|
||||
booktitle = {Doklady Akademii Nauk},
|
||||
volume = {269},
|
||||
number = {3},
|
||||
pages = {543--547},
|
||||
year = {1983},
|
||||
organization= {Russian Academy of Sciences}
|
||||
}
|
||||
|
||||
@book{StatInf-CasellaBerger2002,
|
||||
title = {{Statistical Inference}},
|
||||
author = {Casella, George and Berger, Roger L.},
|
||||
|
@ -27,6 +38,20 @@
|
|||
isbn = {0-471-98632-1}
|
||||
}
|
||||
|
||||
@article{SymMatandJacobians-MagnusNeudecker1986,
|
||||
title = {Symmetry, 0-1 Matrices and Jacobians: A Review},
|
||||
author = {Magnus, Jan R. and Neudecker, Heinz},
|
||||
ISSN = {02664666, 14694360},
|
||||
URL = {http://www.jstor.org/stable/3532421},
|
||||
journal = {Econometric Theory},
|
||||
number = {2},
|
||||
pages = {157--190},
|
||||
publisher = {Cambridge University Press},
|
||||
urldate = {2023-10-03},
|
||||
volume = {2},
|
||||
year = {1986}
|
||||
}
|
||||
|
||||
@book{MatrixAlgebra-AbadirMagnus2005,
|
||||
title = {Matrix Algebra},
|
||||
author = {Abadir, Karim M. and Magnus, Jan R.},
|
||||
|
@ -83,6 +108,31 @@
|
|||
doi = {10.1080/01621459.2015.1093944}
|
||||
}
|
||||
|
||||
@article{FisherLectures-Cook2007,
|
||||
author = {Cook, R. Dennis},
|
||||
journal = {Statistical Science},
|
||||
month = {02},
|
||||
number = {1},
|
||||
pages = {1--26},
|
||||
publisher = {The Institute of Mathematical Statistics},
|
||||
title = {{Fisher Lecture: Dimension Reduction in Regression}},
|
||||
volume = {22},
|
||||
year = {2007},
|
||||
doi = {10.1214/088342306000000682}
|
||||
}
|
||||
|
||||
@article{asymptoticMLE-BuraEtAl2018,
|
||||
author = {Bura, Efstathia and Duarte, Sabrina and Forzani, Liliana and E. Smucler and M. Sued},
|
||||
title = {Asymptotic theory for maximum likelihood estimates in reduced-rank multivariate generalized linear models},
|
||||
journal = {Statistics},
|
||||
volume = {52},
|
||||
number = {5},
|
||||
pages = {1005-1024},
|
||||
year = {2018},
|
||||
publisher = {Taylor \& Francis},
|
||||
doi = {10.1080/02331888.2018.1467420},
|
||||
}
|
||||
|
||||
@article{tsir-DingCook2015,
|
||||
author = {Shanshan Ding and R. Dennis Cook},
|
||||
title = {Tensor sliced inverse regression},
|
||||
|
@ -117,3 +167,106 @@
|
|||
isbn = {978-94-015-8196-7},
|
||||
doi = {10.1007/978-94-015-8196-7_17}
|
||||
}
|
||||
|
||||
@book{asymStats-van_der_Vaart1998,
|
||||
title = {Asymptotic Statistics},
|
||||
author = {{van der Vaart}, A.W.},
|
||||
series = {Asymptotic Statistics},
|
||||
year = {1998},
|
||||
publisher = {Cambridge University Press},
|
||||
series = {Cambridge Series in Statistical and Probabilistic Mathematics},
|
||||
isbn = {0-521-49603-9}
|
||||
}
|
||||
|
||||
@book{measureTheory-Kusolitsch2011,
|
||||
title = {{M}a\ss{}- und {W}ahrscheinlichkeitstheorie},
|
||||
subtitle = {{E}ine {E}inf{\"u}hrung},
|
||||
author = {Kusolitsch, Norbert},
|
||||
series = {Springer-Lehrbuch},
|
||||
year = {2011},
|
||||
publisher = {Springer Vienna},
|
||||
isbn = {978-3-7091-0684-6},
|
||||
doi = {10.1007/978-3-7091-0685-3}
|
||||
}
|
||||
|
||||
@book{optimMatrixMani-AbsilEtAl2007,
|
||||
title = {{Optimization Algorithms on Matrix Manifolds}},
|
||||
author = {Absil, P.-A. and Mahony, R. and Sepulchre, R.},
|
||||
year = {2007},
|
||||
publisher = {Princeton University Press},
|
||||
isbn = {9780691132983},
|
||||
note = {Full Online Text \url{https://press.princeton.edu/absil}}
|
||||
}
|
||||
|
||||
@Inbook{geomMethodsOnLowRankMat-Uschmajew2020,
|
||||
author = {Uschmajew, Andr{\'e} and Vandereycken, Bart},
|
||||
editor = {Grohs, Philipp and Holler, Martin and Weinmann, Andreas},
|
||||
title = {Geometric Methods on Low-Rank Matrix and Tensor Manifolds},
|
||||
bookTitle = {Handbook of Variational Methods for Nonlinear Geometric Data},
|
||||
year = {2020},
|
||||
publisher = {Springer International Publishing},
|
||||
address = {Cham},
|
||||
pages = {261--313},
|
||||
isbn = {978-3-030-31351-7},
|
||||
doi = {10.1007/978-3-030-31351-7_9}
|
||||
}
|
||||
|
||||
@book{introToSmoothMani-Lee2012,
|
||||
title = {Introduction to Smooth Manifolds},
|
||||
author = {Lee, John M.},
|
||||
year = {2012},
|
||||
journal = {Graduate Texts in Mathematics},
|
||||
publisher = {Springer New York},
|
||||
doi = {10.1007/978-1-4419-9982-5}
|
||||
}
|
||||
|
||||
@book{introToRiemannianMani-Lee2018,
|
||||
title = {Introduction to Riemannian Manifolds},
|
||||
author = {Lee, John M.},
|
||||
year = {2018},
|
||||
journal = {Graduate Texts in Mathematics},
|
||||
publisher = {Springer International Publishing},
|
||||
doi = {10.1007/978-3-319-91755-9}
|
||||
}
|
||||
|
||||
@misc{MLEonManifolds-HajriEtAl2017,
|
||||
title = {Maximum Likelihood Estimators on Manifolds},
|
||||
author = {Hajri, Hatem and Said, Salem and Berthoumieu, Yannick},
|
||||
year = {2017},
|
||||
journal = {Lecture Notes in Computer Science},
|
||||
publisher = {Springer International Publishing},
|
||||
pages = {692-700},
|
||||
doi = {10.1007/978-3-319-68445-1_80}
|
||||
}
|
||||
|
||||
@article{relativity-Einstain1916,
|
||||
author = {Einstein, Albert},
|
||||
title = {Die Grundlage der allgemeinen Relativitätstheorie},
|
||||
year = {1916},
|
||||
journal = {Annalen der Physik},
|
||||
volume = {354},
|
||||
number = {7},
|
||||
pages = {769-822},
|
||||
doi = {10.1002/andp.19163540702}
|
||||
}
|
||||
|
||||
@article{MultilinearOperators-Kolda2006,
|
||||
title = {Multilinear operators for higher-order decompositions.},
|
||||
author = {Kolda, Tamara Gibson},
|
||||
doi = {10.2172/923081},
|
||||
url = {https://www.osti.gov/biblio/923081},
|
||||
place = {United States},
|
||||
year = {2006},
|
||||
month = {4},
|
||||
type = {Technical Report}
|
||||
}
|
||||
|
||||
@book{aufbauAnalysis-kaltenbaeck2021,
|
||||
title = {Aufbau Analysis},
|
||||
author = {Kaltenb\"ack, Michael},
|
||||
isbn = {978-3-88538-127-3},
|
||||
series = {Berliner Studienreihe zur Mathematik},
|
||||
edition = {27},
|
||||
year = {2021},
|
||||
publisher = {Heldermann Verlag}
|
||||
}
|
||||
|
|
|
@ -23,7 +23,7 @@
|
|||
*
|
||||
* with the parameter vector `theta` and a statistic `T` of `y`. The real valued
|
||||
* parameter vector `theta` is of dimension `p (p + 1) / 2` and the statistic
|
||||
* `T` has the same dimensions as a binary vector given by
|
||||
* `T` has the same dimensions as the parameter vector given by
|
||||
*
|
||||
* T(y) = vech(y y').
|
||||
*
|
||||
|
|
204
sim/ising.R
204
sim/ising.R
|
@ -1,204 +0,0 @@
|
|||
library(tensorPredictors)
|
||||
library(mvbernoulli)
|
||||
|
||||
set.seed(161803399, "Mersenne-Twister", "Inversion", "Rejection")
|
||||
|
||||
### simulation configuration
|
||||
file.prefix <- "sim-ising"
|
||||
reps <- 100 # number of simulation replications
|
||||
max.iter <- 100 # maximum number of iterations for GMLM
|
||||
sample.sizes <- c(100, 200, 300, 500, 750) # sample sizes `n`
|
||||
N <- 2000 # validation set size
|
||||
p <- c(4, 4) # preditor dimensions (ONLY 4 by 4 allowed!)
|
||||
q <- c(2, 2) # response dimensions (ONLY 2 by 2 allowed!)
|
||||
r <- length(p)
|
||||
# parameter configuration
|
||||
rho <- -0.55
|
||||
c1 <- 1
|
||||
c2 <- 1
|
||||
|
||||
# initial consistency checks
|
||||
stopifnot(exprs = {
|
||||
r == 2
|
||||
all.equal(p, c(4, 4))
|
||||
all.equal(q, c(2, 2))
|
||||
})
|
||||
|
||||
### small helpers
|
||||
# 270 deg matrix layout rotation (90 deg clockwise)
|
||||
rot270 <- function(A) t(A)[, rev(seq_len(nrow(A))), drop = FALSE]
|
||||
# Auto-Regression Covariance Matrix
|
||||
AR <- function(rho, dim) rho^abs(outer(seq_len(dim), seq_len(dim), `-`))
|
||||
# Inverse of the AR matrix
|
||||
AR.inv <- function(rho, dim) {
|
||||
A <- diag(c(1, rep(rho^2 + 1, dim - 2), 1))
|
||||
A[abs(.row(dim(A)) - .col(dim(A))) == 1] <- -rho
|
||||
A / (1 - rho^2)
|
||||
}
|
||||
# projection matrix `P_A` as a projection onto the span of `A`
|
||||
proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A)))
|
||||
|
||||
### setup Ising parameters (to get reasonable parameters)
|
||||
eta1 <- 0
|
||||
alphas <- Map(function(pj, qj) { # qj ignored, its 2
|
||||
linspace <- seq(-1, 1, length.out = pj)
|
||||
matrix(c(linspace, linspace^2), pj, 2)
|
||||
}, p, q)
|
||||
Omegas <- Map(AR, dim = p, MoreArgs = list(rho))
|
||||
|
||||
# data sampling routine
|
||||
sample.data <- function(n, eta1, alphas, Omegas, sample.axis = r + 1L) {
|
||||
# generate response (sample axis is last axis)
|
||||
y <- runif(n, -1, 1) # Y ~ U[-1, 1]
|
||||
Fy <- rbind(cos(pi * y), sin(pi * y), -sin(pi * y), cos(pi * y))
|
||||
dim(Fy) <- c(2, 2, n)
|
||||
|
||||
# natural exponential family parameters
|
||||
eta_y1 <- c1 * (mlm(Fy, alphas) + c(eta1))
|
||||
eta_y2 <- c2 * Reduce(`%x%`, rev(Omegas))
|
||||
|
||||
# conditional Ising model parameters
|
||||
theta_y <- matrix(rep(vech(eta_y2), n), ncol = n)
|
||||
ltri <- which(lower.tri(eta_y2, diag = TRUE))
|
||||
diagonal <- which(diag(TRUE, nrow(eta_y2))[ltri])
|
||||
theta_y[diagonal, ] <- eta_y1
|
||||
|
||||
# Sample X from conditional distribution
|
||||
X <- apply(theta_y, 2, ising_sample, n = 1)
|
||||
# convert (from compressed integer vector) to array data
|
||||
attr(X, "p") <- prod(p)
|
||||
X <- t(as.mvbmatrix(X))
|
||||
dim(X) <- c(p, n)
|
||||
storage.mode(X) <- "double"
|
||||
|
||||
# permute axis to requested get the sample axis
|
||||
if (sample.axis != r + 1L) {
|
||||
perm <- integer(r + 1L)
|
||||
perm[sample.axis] <- r + 1L
|
||||
perm[-sample.axis] <- seq_len(r)
|
||||
X <- aperm(X, perm)
|
||||
Fy <- aperm(Fy, perm)
|
||||
}
|
||||
|
||||
list(X = X, Fy = Fy, y = y, sample.axis = sample.axis)
|
||||
}
|
||||
|
||||
### Logging Errors and Warnings
|
||||
# Register a global warning and error handler for logging warnings/errors with
|
||||
# current simulation repetition session informatin allowing to reproduce problems
|
||||
exceptionLogger <- function(ex) {
|
||||
# retrieve current simulation repetition information
|
||||
rep.info <- get("rep.info", envir = .GlobalEnv)
|
||||
# setup an error log file with the same name as `file`
|
||||
log <- paste0(rep.info$file, ".log")
|
||||
# Write (append) condition message with reproduction info to the log
|
||||
cat("\n\n------------------------------------------------------------\n",
|
||||
sprintf("file <- \"%s\"\nn <- %d\nrep <- %d\n.Random.seed <- c(%s)\n%s\nTraceback:\n",
|
||||
rep.info$file, rep.info$n, rep.info$rep,
|
||||
paste(rep.info$.Random.seed, collapse = ","),
|
||||
as.character.error(ex)
|
||||
), sep = "", file = log, append = TRUE)
|
||||
# add Traceback (see: `traceback()` which the following is addapted from)
|
||||
n <- length(x <- .traceback(NULL, max.lines = -1L))
|
||||
if (n == 0L) {
|
||||
cat("No traceback available", "\n", file = log, append = TRUE)
|
||||
} else {
|
||||
for (i in 1L:n) {
|
||||
xi <- x[[i]]
|
||||
label <- paste0(n - i + 1L, ": ")
|
||||
m <- length(xi)
|
||||
srcloc <- if (!is.null(srcref <- attr(xi, "srcref"))) {
|
||||
srcfile <- attr(srcref, "srcfile")
|
||||
paste0(" at ", basename(srcfile$filename), "#", srcref[1L])
|
||||
}
|
||||
if (isTRUE(attr(xi, "truncated"))) {
|
||||
xi <- c(xi, " ...")
|
||||
m <- length(xi)
|
||||
}
|
||||
if (!is.null(srcloc)) {
|
||||
xi[m] <- paste0(xi[m], srcloc)
|
||||
}
|
||||
if (m > 1) {
|
||||
label <- c(label, rep(substr(" ", 1L,
|
||||
nchar(label, type = "w")), m - 1L))
|
||||
}
|
||||
cat(paste0(label, xi), sep = "\n", file = log, append = TRUE)
|
||||
}
|
||||
}
|
||||
}
|
||||
globalCallingHandlers(list(
|
||||
message = exceptionLogger, warning = exceptionLogger, error = exceptionLogger
|
||||
))
|
||||
|
||||
|
||||
### for every sample size
|
||||
start <- format(Sys.time(), "%Y%m%dT%H%M")
|
||||
for (n in sample.sizes) {
|
||||
### write new simulation result file
|
||||
file <- paste0(paste(file.prefix, start, n, sep = "-"), ".csv")
|
||||
# CSV header, used to ensure correct value/column mapping when writing to file
|
||||
header <- outer(
|
||||
c("dist.subspace", "dist.projection", "error.pred"), # measures
|
||||
c("gmlm", "pca", "hopca", "tsir"), # methods
|
||||
paste, sep = ".")
|
||||
cat(paste0(header, collapse = ","), "\n", sep = "", file = file)
|
||||
|
||||
### repeated simulation
|
||||
for (rep in seq_len(reps)) {
|
||||
### Repetition session state info
|
||||
# Stores specific session variables before starting the current
|
||||
# simulation replication. This allows to log state information which
|
||||
# can be used to replicate a specific simulation repetition in case of
|
||||
# errors/warnings from the logs
|
||||
rep.info <- list(n = n, rep = rep, file = file, .Random.seed = .Random.seed)
|
||||
|
||||
### sample (training) data
|
||||
c(X, Fy, y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas)
|
||||
|
||||
### Fit data using different methods
|
||||
fit.gmlm <- GMLM.default(X, Fy, sample.axis = sample.axis,
|
||||
max.iter = max.iter, family = "ising")
|
||||
fit.hopca <- HOPCA(X, npc = q, sample.axis = sample.axis)
|
||||
fit.pca <- prcomp(mat(X, sample.axis), rank. = prod(q))
|
||||
fit.tsir <- NA # TSIR(X, y, q, sample.axis = sample.axis)
|
||||
|
||||
### Compute Reductions `B.*` where `B.*` spans the reduction subspaces
|
||||
B.true <- Reduce(`%x%`, rev(alphas))
|
||||
B.gmlm <- with(fit.gmlm, Reduce(`%x%`, rev(alphas)))
|
||||
B.hopca <- Reduce(`%x%`, rev(fit.hopca))
|
||||
B.pca <- fit.pca$rotation
|
||||
B.tsir <- NA # Reduce(`%x%`, rev(fit.tsir))
|
||||
|
||||
# Subspace Distances: Normalized `|| P_A - P_B ||_F` where
|
||||
# `P_A = A (A' A)^-1/2 A'` and the normalization means that with
|
||||
# respect to the dimensions of `A, B` the subspace distance is in the
|
||||
# range `[0, 1]`.
|
||||
dist.subspace.gmlm <- dist.subspace(B.true, B.gmlm, normalize = TRUE)
|
||||
dist.subspace.hopca <- dist.subspace(B.true, B.hopca, normalize = TRUE)
|
||||
dist.subspace.pca <- dist.subspace(B.true, B.pca, normalize = TRUE)
|
||||
dist.subspace.tsir <- NA # dist.subspace(B.true, B.tsir, normalize = TRUE)
|
||||
|
||||
# Projection Distances: Spectral norm (2-norm) `|| P_A - P_B ||_2`.
|
||||
dist.projection.gmlm <- dist.projection(B.true, B.gmlm)
|
||||
dist.projection.hopca <- dist.projection(B.true, B.hopca)
|
||||
dist.projection.pca <- dist.projection(B.true, B.pca)
|
||||
dist.projection.tsir <- NA # dist.projection(B.true, B.tsir)
|
||||
|
||||
### Prediction Errors: (using new independend sample of size `N`)
|
||||
c(X, Fy, y, sample.axis) %<-% sample.data(N, eta1, alphas, Omegas)
|
||||
# centered model matrix of vectorized `X`s
|
||||
vecX <- scale(mat(X, sample.axis), center = TRUE, scale = FALSE)
|
||||
P.true <- proj(B.true)
|
||||
error.pred.gmlm <- norm(P.true - proj(B.gmlm), "2")
|
||||
error.pred.hopca <- norm(P.true - proj(B.hopca), "2")
|
||||
error.pred.pca <- norm(P.true - proj(B.pca), "2")
|
||||
error.pred.tsir <- NA # norm(P.true - proj(B.tsir), "2")
|
||||
|
||||
# format estimation/prediction errors and write to file and console
|
||||
line <- paste0(Map(get, header), collapse = ",")
|
||||
cat(line, "\n", sep = "", file = file, append = TRUE)
|
||||
# report progress
|
||||
cat(sprintf("sample size: %d/%d - rep: %d/%d\n",
|
||||
which(n == sample.sizes), length(sample.sizes), rep, reps))
|
||||
}
|
||||
}
|
134
sim/ising_2.R
134
sim/ising_2.R
|
@ -1,134 +0,0 @@
|
|||
library(tensorPredictors)
|
||||
library(mvbernoulli)
|
||||
|
||||
set.seed(141421356, "Mersenne-Twister", "Inversion", "Rejection")
|
||||
|
||||
### simulation configuration
|
||||
reps <- 100 # number of simulation replications
|
||||
max.iter <- 1000 # maximum number of iterations for GMLM
|
||||
n <- 100 # sample sizes `n`
|
||||
N <- 2000 # validation set size
|
||||
p <- c(4, 4) # preditor dimensions (ONLY 4 by 4 allowed!)
|
||||
q <- c(2, 2) # response dimensions (ONLY 2 by 2 allowed!)
|
||||
r <- length(p)
|
||||
# parameter configuration
|
||||
rho <- -0.55
|
||||
c1 <- 1
|
||||
c2 <- 1
|
||||
|
||||
# initial consistency checks
|
||||
stopifnot(exprs = {
|
||||
r == 2
|
||||
all.equal(p, c(4, 4))
|
||||
all.equal(q, c(2, 2))
|
||||
})
|
||||
|
||||
### small helpers
|
||||
# 270 deg matrix layout rotation (90 deg clockwise)
|
||||
rot270 <- function(A) t(A)[, rev(seq_len(nrow(A))), drop = FALSE]
|
||||
# Auto-Regression Covariance Matrix
|
||||
AR <- function(rho, dim) rho^abs(outer(seq_len(dim), seq_len(dim), `-`))
|
||||
# Inverse of the AR matrix
|
||||
AR.inv <- function(rho, dim) {
|
||||
A <- diag(c(1, rep(rho^2 + 1, dim - 2), 1))
|
||||
A[abs(.row(dim(A)) - .col(dim(A))) == 1] <- -rho
|
||||
A / (1 - rho^2)
|
||||
}
|
||||
# projection matrix `P_A` as a projection onto the span of `A`
|
||||
proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A)))
|
||||
|
||||
### setup Ising parameters (to get reasonable parameters)
|
||||
eta1 <- 0
|
||||
# alphas <- Map(function(pj, qj) { # qj ignored, its 2
|
||||
# linspace <- seq(-1, 1, length.out = pj)
|
||||
# matrix(c(linspace, rev(linspace)), pj, 2)
|
||||
# }, p, q)
|
||||
alphas <- Map(function(pj, qj) { # qj ignored, its 2
|
||||
linspace <- seq(-1, 1, length.out = pj)
|
||||
matrix(c(linspace, linspace^2), pj, 2)
|
||||
}, p, q)
|
||||
# alphas <- Map(function(pj, qj) {
|
||||
# qr.Q(qr(matrix(rnorm(pj * qj), pj, qj)))
|
||||
# }, p, q)
|
||||
Omegas <- Map(AR, dim = p, MoreArgs = list(rho))
|
||||
|
||||
# data sampling routine
|
||||
sample.data <- function(n, eta1, alphas, Omegas, sample.axis = r + 1L) {
|
||||
# generate response (sample axis is last axis)
|
||||
y <- runif(n, -1, 1) # Y ~ U[-1, 1]
|
||||
Fy <- rbind(cos(pi * y), sin(pi * y), -sin(pi * y), cos(pi * y))
|
||||
dim(Fy) <- c(2, 2, n)
|
||||
|
||||
# natural exponential family parameters
|
||||
eta_y1 <- c1 * (mlm(Fy, alphas) + c(eta1))
|
||||
eta_y2 <- c2 * Reduce(`%x%`, rev(Omegas))
|
||||
|
||||
# conditional Ising model parameters
|
||||
theta_y <- matrix(rep(vech(eta_y2), n), ncol = n)
|
||||
ltri <- which(lower.tri(eta_y2, diag = TRUE))
|
||||
diagonal <- which(diag(TRUE, nrow(eta_y2))[ltri])
|
||||
theta_y[diagonal, ] <- eta_y1
|
||||
|
||||
# Sample X from conditional distribution
|
||||
X <- apply(theta_y, 2, ising_sample, n = 1)
|
||||
# convert (from compressed integer vector) to array data
|
||||
attr(X, "p") <- prod(p)
|
||||
X <- t(as.mvbmatrix(X))
|
||||
dim(X) <- c(p, n)
|
||||
storage.mode(X) <- "double"
|
||||
|
||||
# permute axis to requested get the sample axis
|
||||
if (sample.axis != r + 1L) {
|
||||
perm <- integer(r + 1L)
|
||||
perm[sample.axis] <- r + 1L
|
||||
perm[-sample.axis] <- seq_len(r)
|
||||
X <- aperm(X, perm)
|
||||
Fy <- aperm(Fy, perm)
|
||||
}
|
||||
|
||||
list(X = X, Fy = Fy, y = y, sample.axis = sample.axis)
|
||||
}
|
||||
|
||||
### sample (training) data
|
||||
c(X, Fy, y, sample.axis) %<-% sample.data(n, eta1, alphas, Omegas)
|
||||
|
||||
### Fit data using GMLM with logging
|
||||
|
||||
# logger to log iterative change in the estimation process of GMLM
|
||||
# log <- data.frame()
|
||||
log.likelihood <- tensorPredictors:::make.gmlm.family("ising")$log.likelihood
|
||||
B.true <- Reduce(`%x%`, rev(alphas))
|
||||
logger <- function(iter, eta1.est, alphas.est, Omegas.est) {
|
||||
B.est <- Reduce(`%x%`, rev(alphas.est))
|
||||
|
||||
err.alphas <- mapply(dist.subspace, alphas, alphas.est, MoreArgs = list(normalize = TRUE))
|
||||
err.Omegas <- mapply(norm, Map(`-`, Omegas, Omegas.est), MoreArgs = list(type = "F"))
|
||||
|
||||
if (iter > 0) { cat("\033[9A") }
|
||||
cat(sprintf("\n\033[2mIter: loss - dist\n\033[0m%4d: %8.3f - %8.3f",
|
||||
iter,
|
||||
log.likelihood(X, Fy, eta1.est, alphas.est, Omegas.est),
|
||||
dist.subspace(B.true, B.est, normalize = TRUE)
|
||||
),
|
||||
"\033[2mMSE eta1\033[0m",
|
||||
mean((eta1 - eta1.est)^2),
|
||||
"\033[2msubspace distances of alphas\033[0m",
|
||||
do.call(paste, Map(sprintf, err.alphas, MoreArgs = list(fmt = "%8.3f"))),
|
||||
"\033[2mFrob. norm of Omega differences\033[0m",
|
||||
do.call(paste, Map(sprintf, err.Omegas, MoreArgs = list(fmt = "%8.3f"))),
|
||||
sep = "\n "
|
||||
)
|
||||
}
|
||||
|
||||
# now call the GMLM fitting routine with performance profiling
|
||||
tryCatch({
|
||||
system.time( # profvis::profvis(
|
||||
fit.gmlm <- GMLM.default(
|
||||
X, Fy, sample.axis = sample.axis, max.iter = max.iter,
|
||||
family = "ising", logger = logger
|
||||
)
|
||||
)
|
||||
}, error = function(ex) {
|
||||
print(ex)
|
||||
traceback()
|
||||
})
|
|
@ -1,207 +0,0 @@
|
|||
library(tensorPredictors)
|
||||
library(mvbernoulli)
|
||||
|
||||
# seed = first 8 digits Euler's constant gamma = 0.57721 56649 01532 86060
|
||||
set.seed(57721566, "Mersenne-Twister", "Inversion", "Rejection")
|
||||
|
||||
### simulation configuration
|
||||
file.prefix <- "sim-ising-small"
|
||||
reps <- 100 # number of simulation replications
|
||||
max.iter <- 1000 # maximum number of iterations for GMLM
|
||||
sample.sizes <- c(100, 200, 300, 500, 750) # sample sizes `n`
|
||||
N <- 2000 # validation set size
|
||||
p <- c(2, 3) # preditor dimensions
|
||||
q <- c(1, 1) # response dimensions
|
||||
r <- length(p)
|
||||
# parameter configuration
|
||||
rho <- -0.55
|
||||
c1 <- 1
|
||||
c2 <- 1
|
||||
|
||||
# initial consistency checks
|
||||
stopifnot(exprs = {
|
||||
r == 2
|
||||
length(p) == r
|
||||
all(q == 1)
|
||||
})
|
||||
|
||||
### small helpers
|
||||
# 270 deg matrix layout rotation (90 deg clockwise)
|
||||
rot270 <- function(A) t(A)[, rev(seq_len(nrow(A))), drop = FALSE]
|
||||
# Auto-Regression Covariance Matrix
|
||||
AR <- function(rho, dim) rho^abs(outer(seq_len(dim), seq_len(dim), `-`))
|
||||
# Inverse of the AR matrix
|
||||
AR.inv <- function(rho, dim) {
|
||||
A <- diag(c(1, rep(rho^2 + 1, dim - 2), 1))
|
||||
A[abs(.row(dim(A)) - .col(dim(A))) == 1] <- -rho
|
||||
A / (1 - rho^2)
|
||||
}
|
||||
# projection matrix `P_A` as a projection onto the span of `A`
|
||||
proj <- function(A) tcrossprod(A, A %*% solve(crossprod(A, A)))
|
||||
|
||||
### setup Ising parameters (to get reasonable parameters)
|
||||
eta1 <- 0
|
||||
alphas <- Map(function(pj, qj) {
|
||||
data <- linspace <- seq(-1, 1, len = pj)
|
||||
for (k in (seq_len(qj - 1) + 1)) {
|
||||
data <- c(data, linspace^k)
|
||||
}
|
||||
matrix(data, nrow = pj)
|
||||
}, p, q)
|
||||
Omegas <- Map(AR, dim = p, MoreArgs = list(rho))
|
||||
|
||||
# data sampling routine
|
||||
sample.data <- function(n, eta1, alphas, Omegas, sample.axis = r + 1L) {
|
||||
# generate response (sample axis is last axis)
|
||||
y <- runif(n, -1, 1) # Y ~ U[-1, 1]
|
||||
Fy <- array(sin(pi * y), dim = c(q, n))
|
||||
|
||||
# natural exponential family parameters
|
||||
eta_y1 <- c1 * (mlm(Fy, alphas) + c(eta1))
|
||||
eta_y2 <- c2 * Reduce(`%x%`, rev(Omegas))
|
||||
|
||||
# conditional Ising model parameters
|
||||
theta_y <- matrix(rep(vech(eta_y2), n), ncol = n)
|
||||
ltri <- which(lower.tri(eta_y2, diag = TRUE))
|
||||
diagonal <- which(diag(TRUE, nrow(eta_y2))[ltri])
|
||||
theta_y[diagonal, ] <- eta_y1
|
||||
|
||||
# Sample X from conditional distribution
|
||||
X <- apply(theta_y, 2, ising_sample, n = 1)
|
||||
# convert (from compressed integer vector) to array data
|
||||
attr(X, "p") <- prod(p)
|
||||
X <- t(as.mvbmatrix(X))
|
||||
dim(X) <- c(p, n)
|
||||
storage.mode(X) <- "double"
|
||||
|
||||
# permute axis to requested get the sample axis
|
||||
if (sample.axis != r + 1L) {
|
||||
perm <- integer(r + 1L)
|
||||
perm[sample.axis] <- r + 1L
|
||||
perm[-sample.axis] <- seq_len(r)
|
||||
X <- aperm(X, perm)
|
||||
Fy <- aperm(Fy, perm)
|
||||
}
|
||||
|
||||
list(X = X, Fy = Fy, y = y, sample.axis = sample.axis)
|
||||
}
|
||||
|
||||
### Logging Errors and Warnings
|
||||
# Register a global warning and error handler for logging warnings/errors with
|
||||
# current simulation repetition session informatin allowing to reproduce problems
|
||||
exceptionLogger <- function(ex) {
|
||||
# retrieve current simulation repetition information
|
||||
rep.info <- get("rep.info", envir = .GlobalEnv)
|
||||
# setup an error log file with the same name as `file`
|
||||
log <- paste0(rep.info$file, ".log")
|
||||
# Write (append) condition message with reproduction info to the log
|
||||
cat("\n\n------------------------------------------------------------\n",
|
||||
sprintf("file <- \"%s\"\nn <- %d\nrep <- %d\n.Random.seed <- c(%s)\n%s\nTraceback:\n",
|
||||
rep.info$file, rep.info$n, rep.info$rep,
|
||||
paste(rep.info$.Random.seed, collapse = ","),
|
||||
as.character.error(ex)
|
||||
), sep = "", file = log, append = TRUE)
|
||||
# add Traceback (see: `traceback()` which the following is addapted from)
|
||||
n <- length(x <- .traceback(NULL, max.lines = -1L))
|
||||
if (n == 0L) {
|
||||
cat("No traceback available", "\n", file = log, append = TRUE)
|
||||
} else {
|
||||
for (i in 1L:n) {
|
||||
xi <- x[[i]]
|
||||
label <- paste0(n - i + 1L, ": ")
|
||||
m <- length(xi)
|
||||