add: EEG sim to LaTeX

2022-05-25 18:19:08 +02:00 · 2022-05-25 18:19:08 +02:00 · 6455d27386
commit 6455d27386
parent 62f1656d96
4 changed files with 139 additions and 106 deletions
--- a/LaTeX/main.bib
+++ b/LaTeX/main.bib
@ -61,3 +61,12 @@
    year      = {2018},
    doi       = {10.3150/17-BEJ980}
 }
+
+@article{sdr-PfeifferKaplaBura2021,
+    author   = {Pfeiffer, Ruth and Kapla, Daniel and Bura, Efstathia},
+    title    = {{Least squares and maximum likelihood estimation of sufficient reductions in regressions with matrix-valued predictors}},
+    volume   = {11},
+    year     = {2021},
+    journal  = {International Journal of Data Science and Analytics},
+    doi      = {10.1007/s41060-020-00228-y}
+}
--- a/LaTeX/main.tex
+++ b/LaTeX/main.tex
@ -358,96 +358,7 @@ Through that the gradient for all the parameter matrices is
        \qquad{\color{gray}p_j\times q_j} && j = 1, ..., r.
 \end{align*}

-% Now we assume the residuals covariance has the form $\mat\Delta = \mat\Delta_1\otimes\mat\Delta_2$ where $\mat\Delta_1$, $\mat\Delta_2$ are $q\times q$, $p\times p$ covariance matrices, respectively. This is analog to the case that $\mat{R}_i$'s are i.i.d. Matrix Normal distribution
-% \begin{displaymath}
-%     \mat{R}_i = \mat{X}_i - \mat\mu - \mat\beta\mat{f}_{y_i}\t{\mat\alpha} \sim \mathcal{MN}_{p\times q}(\mat 0, \mat\Delta_2, \mat\Delta_1).
-% \end{displaymath}
-% The density of the Matrix Normal (with mean zero) is equivalent to the vectorized quantities being multivariate normal distributed with Kronecker structured covariance
-% \begin{align*}
-%     f(\mat R)
-%         &= \frac{1}{\sqrt{(2\pi)^{p q}|\mat\Delta|}}\exp\left(-\frac{1}{2}\t{\vec(\mat{R})} \mat\Delta^{-1}\vec(\mat{R})\right) \\
-%         &= \frac{1}{(2\pi)^{p q / 2}|\mat\Delta_1|^{p / 2}|\mat\Delta_2|^{q / 2}}\exp\left(-\frac{1}{2}\tr(\mat\Delta_1^{-1}\t{\mat{R}}\mat\Delta_2^{-1}\mat{R})\right)
-% \end{align*}
-% which leads for given data to the log-likelihood
-% \begin{displaymath}
-%     l(\mat{\mu}, \mat\Delta_1, \mat\Delta_2) =
-%         -\frac{n p q}{2}\log 2\pi
-%         -\frac{n p}{2}\log|\mat{\Delta}_1|
-%         -\frac{n q}{2}\log|\mat{\Delta}_2|
-%         -\frac{1}{2}\sum_{i = 1}^n \tr(\mat\Delta_1^{-1}\t{\mat{R}_i}\mat\Delta_2^{-1}\mat{R}_i).
-% \end{displaymath}
-% \subsection{MLE covariance estimates}
-% Out first order of business is to derive the MLE estimated of the covariance matrices $\mat\Delta_1$, $\mat\Delta_2$ (the mean estimate $\widehat{\mat\mu}$ is trivial). Therefore, we look at the differentials with respect to changes in the covariance matrices as
-% \begin{align*}
-%     \d l(\mat\Delta_1, \mat\Delta_2) &=
-%     -\frac{n p}{2}\d\log|\mat{\Delta}_1|
-%     -\frac{n q}{2}\d\log|\mat{\Delta}_2|
-%     -\frac{1}{2}\sum_{i = 1}^n
-%         \tr(  (\d\mat\Delta_1^{-1})\t{\mat{R}_i}\mat\Delta_2^{-1}\mat{R}_i
-%             + \mat\Delta_1^{-1}\t{\mat{R}_i}(\d\mat\Delta_2^{-1})\mat{R}_i) \\
-%         &=
-%     -\frac{n p}{2}\tr\mat{\Delta}_1^{-1}\d\mat{\Delta}_1
-%     -\frac{n q}{2}\tr\mat{\Delta}_2^{-1}\d\mat{\Delta}_2 \\
-%         &\qquad\qquad
-%     +\frac{1}{2}\sum_{i = 1}^n
-%         \tr(  \mat\Delta_1^{-1}(\d\mat\Delta_1)\mat\Delta_1^{-1}\t{\mat{R}_i}\mat\Delta_2^{-1}\mat{R}_i
-%             + \mat\Delta_1^{-1}\t{\mat{R}_i}\mat\Delta_2^{-1}(\d\mat\Delta_2)\mat\Delta_2^{-1}\mat{R}_i) \\
-%         &= \frac{1}{2}\tr\!\Big(\Big(
-%             -n p \mat{I}_q + \mat\Delta_1^{-1}\sum_{i = 1}^n \t{\mat{R}_i}\mat\Delta_2^{-1}\mat{R}_i
-%         \Big)\mat{\Delta}_1^{-1}\d\mat{\Delta}_1\Big) \\
-%             &\qquad\qquad
-%             + \frac{1}{2}\tr\!\Big(\Big(
-%             -n q \mat{I}_p + \mat\Delta_2^{-1}\sum_{i = 1}^n \mat{R}_i\mat\Delta_1^{-1}\t{\mat{R}_i}
-%         \Big)\mat{\Delta}_2^{-1}\d\mat{\Delta}_2\Big) \overset{!}{=} 0.
-% \end{align*}
-% Setting $\d l$ to zero yields the MLE estimates as
-% \begin{displaymath}
-%     \widehat{\mat{\mu}} = \overline{\mat X}{\color{gray}\quad(p\times q)}, \qquad
-%     \widehat{\mat\Delta}_1 = \frac{1}{n p}\sum_{i = 1}^n \t{\mat{R}_i}\widehat{\mat\Delta}_2^{-1}\mat{R}_i{\color{gray}\quad(q\times q)}, \qquad
-%     \widehat{\mat\Delta}_2 = \frac{1}{n q}\sum_{i = 1}^n \mat{R}_i\widehat{\mat\Delta}_1^{-1}\t{\mat{R}_i}{\color{gray}\quad(p\times p)}.
-% \end{displaymath}
-% Next, analog to above, we take the estimated log-likelihood and derive gradients with respect to $\mat{\alpha}$, $\mat{\beta}$.
-% The estimated log-likelihood derives by replacing the unknown covariance matrices by there MLE estimates leading to
-% \begin{displaymath}
-%     \hat{l}(\mat\alpha, \mat\beta) =
-%         -\frac{n p q}{2}\log 2\pi
-%         -\frac{n p}{2}\log|\widehat{\mat{\Delta}}_1|
-%         -\frac{n q}{2}\log|\widehat{\mat{\Delta}}_2|
-%         -\frac{1}{2}\sum_{i = 1}^n \tr(\widehat{\mat{\Delta}}_1^{-1}\t{\mat{R}_i}\widehat{\mat{\Delta}}_2^{-1}\mat{R}_i)
-% \end{displaymath}
-% and its differential
-% \begin{displaymath}
-%     \d\hat{l}(\mat\alpha, \mat\beta) =
-%         -\frac{n p}{2}\d\log|\widehat{\mat{\Delta}}_1|
-%         -\frac{n q}{2}\d\log|\widehat{\mat{\Delta}}_2|
-%         -\frac{1}{2}\sum_{i = 1}^n \d\tr(\widehat{\mat{\Delta}}_1^{-1}\t{\mat{R}_i}\widehat{\mat{\Delta}}_2^{-1}\mat{R}_i).
-% \end{displaymath}
-% We first take a closer look at the sum. After a bit of algebra using $\d\mat A^{-1} = -\mat A^{-1}(\d\mat A)\mat A^{-1}$ and the definitions of $\widehat{\mat\Delta}_1$, $\widehat{\mat\Delta}_2$ the sum can be rewritten
-% \begin{displaymath}
-%     \frac{1}{2}\sum_{i = 1}^n \d\tr(\widehat{\mat{\Delta}}_1^{-1}\t{\mat{R}_i}\widehat{\mat{\Delta}}_2^{-1}\mat{R}_i)
-%         = \sum_{i = 1}^n \tr(\widehat{\mat{\Delta}}_1^{-1}\t{\mat{R}_i}\widehat{\mat{\Delta}}_2^{-1}\d\mat{R}_i)
-%             - \frac{np}{2}\d\log|\widehat{\mat\Delta}_1|
-%             - \frac{nq}{2}\d\log|\widehat{\mat\Delta}_2|.
-% \end{displaymath}
-% This means that most of the derivative cancels out and we get
-% \begin{align*}
-%     \d\hat{l}(\mat\alpha, \mat\beta)
-%         &= \sum_{i = 1}^n \tr(\widehat{\mat{\Delta}}_1^{-1}\t{\mat{R}_i}\widehat{\mat{\Delta}}_2^{-1}\d\mat{R}_i) \\
-%         &= \sum_{i = 1}^n \tr(\widehat{\mat{\Delta}}_1^{-1}\t{\mat{R}_i}\widehat{\mat{\Delta}}_2^{-1}((\d\mat\beta)\mat{f}_{y_i}\t{\mat\alpha} + \mat\beta\mat{f}_{y_i}\t{(\d\mat\alpha}))) \\
-%         &= \sum_{i = 1}^n \t{\vec(\widehat{\mat{\Delta}}_2^{-1}\mat{R}_i\widehat{\mat{\Delta}}_1^{-1}\mat\alpha\t{\mat{f}_{y_i}})}\d\vec\mat\beta
-%             + \sum_{i = 1}^n \t{\vec(\widehat{\mat{\Delta}}_1^{-1}\t{\mat{R}_i}\widehat{\mat{\Delta}}_2^{-1}\mat\beta\mat{f}_{y_i})}\d\vec\mat\alpha
-% \end{align*}
-% which means the gradients are
-% \begin{align*}
-%     \nabla_{\mat\alpha}\hat{l}(\mat\alpha, \mat\beta)
-%         &= \sum_{i = 1}^n \widehat{\mat{\Delta}}_1^{-1}\t{\mat{R}_i}\widehat{\mat{\Delta}}_2^{-1}\mat\beta\mat{f}_{y_i}
-%         = (\ten{R}\ttm[3]\widehat{\mat{\Delta}}_1^{-1}\ttm[2]\widehat{\mat{\Delta}}_2^{-1})_{(3)}\t{(\ten{F}\ttm[2]\mat\beta)_{(3)}}\\
-%     \nabla_{\mat\beta}\hat{l}(\mat\alpha, \mat\beta)
-%         &= \sum_{i = 1}^n \widehat{\mat{\Delta}}_2^{-1}\mat{R}_i\widehat{\mat{\Delta}}_1^{-1}\mat\alpha\t{\mat{f}_{y_i}}
-%         = (\ten{R}\ttm[3]\widehat{\mat{\Delta}}_1^{-1}\ttm[2]\widehat{\mat{\Delta}}_2^{-1})_{(2)}\t{(\ten{F}\ttm[3]\mat\alpha)_{(2)}}
-% \end{align*}
-
-\paragraph{Comparison to the general case:} There are two main differences, first the general case has a closed form solution for the gradient due to the explicit nature of the MLE estimate of $\widehat{\mat\Delta}$ compared to the mutually dependent MLE estimates $\widehat{\mat\Delta}_1$, $\widehat{\mat\Delta}_2$. On the other hand the general case has dramatically bigger dimensions of the covariance matrix ($p q \times p q$) compared to the two Kronecker components with dimensions $q \times q$ and $p \times p$. This means that in the general case there is a huge performance penalty in the dimensions of $\widehat{\mat\Delta}$ while in the other case an extra estimation is required to determine $\widehat{\mat\Delta}_1$, $\widehat{\mat\Delta}_2$.
+\paragraph{Comparison to the general case:} {\color{red} There are two main differences, first the general case has a closed form solution for the gradient due to the explicit nature of the MLE estimate of $\widehat{\mat\Delta}$ compared to the mutually dependent MLE estimates $\widehat{\mat\Delta}_1$, $\widehat{\mat\Delta}_2$. On the other hand the general case has dramatically bigger dimensions of the covariance matrix ($p q \times p q$) compared to the two Kronecker components with dimensions $q \times q$ and $p \times p$. This means that in the general case there is a huge performance penalty in the dimensions of $\widehat{\mat\Delta}$ while in the other case an extra estimation is required to determine $\widehat{\mat\Delta}_1$, $\widehat{\mat\Delta}_2$.}


 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@ -691,6 +602,65 @@ for $j = 1, ..., r$ until convergence or a maximum number of iterations is excee
 %     \includegraphics{hist_Ex01.png}
 % \end{figure}

+\begin{table}[!ht]
+    \centering
+    \begin{tabular}{*{3}{l} | *{7}{c}}
+        method   & init &      npc  &  Acc &  Err &  FPR &  TPR &  FNR &  TNR &  AUC (sd) \\ \hline
+        base     &  vlp &    (3, 4) & 0.70 & 0.30 & 0.60 & 0.87 & 0.13 & 0.40 & 0.75 (0.047) \\
+        new      &  vlp &    (3, 4) & 0.70 & 0.30 & 0.60 & 0.87 & 0.13 & 0.40 & 0.75 (0.047) \\
+        new      &   ls &    (3, 4) & 0.74 & 0.26 & 0.51 & 0.88 & 0.12 & 0.49 & 0.77 (0.045) \\
+        ls       &      &    (3, 4) & 0.75 & 0.25 & 0.49 & 0.88 & 0.12 & 0.51 & 0.78 (0.044) \\
+        ls$^*$   &      &    (3, 4) & 0.78 & 0.22 & 0.38 & 0.87 & 0.13 & 0.62 & 0.86 (0.034) \\ %
+        LSIR     &      &    (3, 4) & 0.80 & 0.20 & 0.36 & 0.88 & 0.12 & 0.64 & 0.85 (0.036) \\
+        momentum &  vlp &    (3, 4) & 0.70 & 0.30 & 0.60 & 0.87 & 0.13 & 0.40 & 0.75 (0.047) \\
+        momentum &   ls &    (3, 4) & 0.70 & 0.30 & 0.58 & 0.87 & 0.13 & 0.42 & 0.76 (0.046) \\
+        approx   &  vlp &    (3, 4) & 0.68 & 0.32 & 0.62 & 0.86 & 0.14 & 0.38 & 0.74 (0.048) \\
+        approx   &   ls &    (3, 4) & 0.73 & 0.27 & 0.53 & 0.88 & 0.12 & 0.47 & 0.78 (0.044) \\ \hline
+        ls       &      &  (15, 15) & 0.75 & 0.25 & 0.47 & 0.87 & 0.13 & 0.53 & 0.78 (0.044) \\
+        ls$^*$   &      &  (15, 15) & 0.76 & 0.24 & 0.44 & 0.88 & 0.12 & 0.56 & 0.83 (0.039) \\ %
+        LSIR     &      &  (15, 15) & 0.72 & 0.28 & 0.44 & 0.82 & 0.18 & 0.56 & 0.81 (0.040) \\
+        approx   &   ls &  (15, 15) & 0.73 & 0.27 & 0.51 & 0.87 & 0.13 & 0.49 & 0.78 (0.044) \\ \hline
+        ls       &      &  (20, 30) & 0.75 & 0.25 & 0.47 & 0.87 & 0.13 & 0.53 & 0.78 (0.044) \\
+        ls$^*$   &      &  (20, 30) & 0.77 & 0.23 & 0.36 & 0.84 & 0.16 & 0.64 & 0.79 (0.045) \\ %
+        LSIR     &      &  (20, 30) & 0.79 & 0.21 & 0.36 & 0.87 & 0.13 & 0.64 & 0.83 (0.038) \\
+        approx   &   ls &  (20, 30) & 0.63 & 0.37 & 1.00 & 1.00 & 0.00 & 0.00 & 0.51 (0.053) \\ \hline
+        HOPCA    &      &           & 0.62 & 0.38 & 1    & 0.99 & 0.01 & 0    & 0.56 (0.053) \\
+        ls       &      &           & 0.75 & 0.25 & 0.44 & 0.87 & 0.13 & 0.56 & 0.78 (0.044) \\
+        ls$^*$   &      &           & 0.68 & 0.32 & 0.51 & 0.79 & 0.21 & 0.49 & 0.66 (0.054) \\ %
+        approx   &   ls &           & 0.75 & 0.25 & 0.44 & 0.87 & 0.13 & 0.56 & 0.78 (0.044) \\
+    \end{tabular}
+    \caption{\label{tab:eeg_sim}Recreation of the EEG data LOO-CV from \cite{sdr-PfeifferKaplaBura2021} Section~7, EEG Data and Table~6 with new methods. Column \emph{vlp} stands for the Van Loan and Pitsianis initialization scheme as described in \cite{sdr-PfeifferKaplaBura2021} and \emph{ls} is the approach described above. The column \emph{npc} gives the number of principal component of the $(2D)^2 PCA$ preprocessing. Reduction by $\ten{X}\times_{j\in[r]}\t{\widehat{\mat{\alpha}}_j}$ instread of $^*$ where reduction is $\ten{X}\times_{j\in[r]}\t{\widehat{\mat{\Delta}}_j^{-1}\widehat{\mat{\alpha}}_j}$.}
+\end{table}
+
+\begin{table}
+    \centering
+    \begin{tabular}{*{3}{l} | *{3}{r@{.}l}}
+        method   & init &       npc
+            & \multicolumn{2}{c}{elapsed}
+            & \multicolumn{2}{c}{sys.self}
+            & \multicolumn{2}{c}{user.self} \\ \hline
+        base     &      &    (3, 4) &  0&079 &   0&402 &   0&220 \\
+        new      &  vlp &    (3, 4) &  0&075 &   0&393 &   0&217 \\
+        new      &   ls &    (3, 4) &  0&218 &   0&243 &   0&305 \\
+        ls       &      &    (3, 4) &  0&003 &   0&006 &   0&006 \\
+        LSIR     &      &    (3, 4) &  0&002 &   0&000 &   0&002 \\
+        momentum &  vlp &    (3, 4) &  0&143 &   0&595 &   0&359 \\
+        momentum &   ls &    (3, 4) &  0&297 &   0&252 &   0&385 \\
+        approx   &  vlp &    (3, 4) &  0&044 &   0&240 &   0&152 \\
+        approx   &   ls &    (3, 4) &  0&066 &   0&144 &   0&121 \\ \hline
+        ls       &      &  (15, 15) &  0&012 &   0&059 &   0&034 \\
+        LSIR     &      &  (15, 15) &  0&010 &   0&050 &   0&031 \\
+        approx   &   ls &  (15, 15) &  0&813 &   3&911 &   2&325 \\ \hline
+        ls       &      &  (20, 30) &  0&028 &   0&129 &   0&080 \\
+        LSIR     &      &  (20, 30) &  0&030 &   0&142 &   0&100 \\
+        approx   &   ls &  (20, 30) &  2&110 &  10&111 &   6&290 \\ \hline
+        HOPCA    &      &           &  0&24  &   0&37  &   0&43  \\
+        ls       &      &           &  1&252 &   6&215 &   3&681 \\
+        approx   &   ls &           & 36&754 & 141&028 & 147&490 \\
+    \end{tabular}
+    \caption{\label{tab:eeg_time}Like Table~\ref{tab:eeg_sim} but reports the mean run-time.}
+\end{table}
+
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %%%                              Bib and Index                               %%%
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
--- a/simulations/eeg_sim.R
+++ b/simulations/eeg_sim.R
@ -49,6 +49,10 @@ npcs <- list(c(3, 4), c(15, 15), c(20, 30), dim(X)[-1])

 # setup methods for simulation (with unified API)
 methods <- list(
+    hopca             = list(
+        fun = function(X, Fy) list(alphas = hopca(X, npc = c(1L, 1L), 1L)),
+        is.applicable = function(npc) all(npc == c(256L, 64L))  # NOT reduced
+    ),
    kpir.base         = list(
        fun = toNewAPI(kpir.base),
        is.applicable = function(npc) prod(npc) < 100
@ -65,6 +69,13 @@ methods <- list(
        fun = kpir.ls,
        is.applicable = function(npc) TRUE
    ),
+    LSIR              = list(
+        fun = function(X, Fy) {
+            res <- LSIR(matrix(X, nrow(X)), Fy, dim(X)[2], dim(X)[3])
+            list(alphas = list(res$beta, res$alpha))
+        },
+        is.applicable = function(npc) prod(npc) < 1000
+    ),
    kpir.momentum.vlp = list(
        fun = toNewAPI(function(X, Fy) kpir.momentum(X, Fy, init.method = "vlp")),
        is.applicable = function(npc) prod(npc) < 100
@ -86,6 +97,9 @@ methods <- list(
 # define AUC for reporting while simulation is running
 auc <- function(y_true, y_pred) pROC::roc(y_true, y_pred, quiet = TRUE)$auc[1]

+# file to dump simulation results
+log.file <- format(Sys.time(), "eeg_sim_%Y%m%dT%H%M.rds")
+
 # init complete simulation as empty
 sim <- NULL
 for (npc in npcs) {
@ -132,12 +146,18 @@ for (npc in npcs) {
            time <- system.time(sdr <- method(X.train, c(y.train)))

            # reduce training data and fit a GLM
-            x.train <- mlm(X.train, Map(t, sdr$alphas), modes = 2:3)
+            if ("Deltas" %in% names(sdr)) {
+                # the small deltas are `delta_j = Delta_j^-1 alpha_j`
+                deltas <- Map(solve, sdr$Deltas, sdr$alphas)
+            } else {
+                deltas <- sdr$alphas
+            }
+            x.train <- mlm(X.train, Map(t, deltas), modes = 2:3)
            fit <- glm(y ~ x, family = binomial(link = "logit"),
                data = data.frame(y = y[-i], x = matrix(x.train, nrow(x.train))))

            # predict from reduced test data
-            x.test <- mlm(X.test, Map(t, sdr$alphas), modes = 2:3)
+            x.test <- mlm(X.test, Map(t, deltas), modes = 2:3)
            y.pred <- predict(fit, data.frame(x = matrix(x.test, 1)), type = "response")

            loo.cv[i, "y_pred"] <- y.pred
@ -155,15 +175,16 @@ for (npc in npcs) {
        cat(sprintf(" (Done) AUC: %f\n", with(loo.cv, auc(y_true, y_pred))))

        # dump simulation (after each fold) to file
-        saveRDS(sim, "eeg_sim.rds")
+        saveRDS(sim, log.file)
    }
 }

-
 ################################################################################
 ###                             Simulation Stats                             ###
 ################################################################################
-# sim <- readRDS("eeg_sim.rds")
+# sim <- readRDS("eeg_sim_<date-time>.rds")
+# sim <- readRDS("eeg_sim_20220524T2100.rds")
+# sim <- readRDS("eeg_sim_20220525T1700.rds")

 metrics <- list(
    # acc: Accuracy. P(Yhat = Y). Estimated as: (TP+TN)/(P+N).
@ -191,7 +212,7 @@ do.stats <- function(group) {
    data.frame(method = group$method[1], npc = group$npc[1], stat, check.names = FALSE)
 }
 # Call stats for each grouping
-stats <- do.call(rbind, Map(do.stats, split(sim, ~ method + npc, sep = " ")))
+stats <- do.call(rbind, Map(do.stats, split(sim, ~ method + npc, sep = " ", drop = TRUE)))
 rownames(stats) <- NULL

 print(stats, digits = 2)
@ -200,3 +221,46 @@ print(stats, digits = 2)
 times <- aggregate(cbind(elapsed, sys.self, user.self) ~ method + npc, sim, median)

 print(times, digits = 2)
+
+## stats: 2022.05.24
+#               method       npc  Acc  Err  FPR  TPR  FNR  TNR  AUC sd(AUC)
+# 1          kpir.base    (3, 4) 0.70 0.30 0.60 0.87 0.13 0.40 0.75   0.047
+# 2       kpir.new.vlp    (3, 4) 0.70 0.30 0.60 0.87 0.13 0.40 0.75   0.047
+# 3        kpir.new.ls    (3, 4) 0.74 0.26 0.51 0.88 0.12 0.49 0.77   0.045
+# 4            kpir.ls    (3, 4) 0.75 0.25 0.49 0.88 0.12 0.51 0.78   0.044
+#          (*) kpir.ls    (3, 4) 0.78 0.22 0.38 0.87 0.13 0.62 0.86   0.034
+# 5  kpir.momentum.vlp    (3, 4) 0.70 0.30 0.60 0.87 0.13 0.40 0.75   0.047
+# 6   kpir.momentum.ls    (3, 4) 0.70 0.30 0.58 0.87 0.13 0.42 0.76   0.046
+# 7    kpir.approx.vlp    (3, 4) 0.68 0.32 0.62 0.86 0.14 0.38 0.74   0.048
+# 8     kpir.approx.ls    (3, 4) 0.73 0.27 0.53 0.88 0.12 0.47 0.78   0.044
+# 9            kpir.ls  (15, 15) 0.75 0.25 0.47 0.87 0.13 0.53 0.78   0.044
+#          (*) kpir.ls  (15, 15) 0.76 0.24 0.44 0.88 0.12 0.56 0.83   0.039
+# 10    kpir.approx.ls  (15, 15) 0.73 0.27 0.51 0.87 0.13 0.49 0.78   0.044
+# 11           kpir.ls  (20, 30) 0.75 0.25 0.47 0.87 0.13 0.53 0.78   0.044
+#          (*) kpir.ls  (20, 30) 0.77 0.23 0.36 0.84 0.16 0.64 0.79   0.045
+# 12    kpir.approx.ls  (20, 30) 0.63 0.37 1.00 1.00 0.00 0.00 0.51   0.053
+# 13           kpir.ls (256, 64) 0.75 0.25 0.44 0.87 0.13 0.56 0.78   0.044
+#          (*) kpir.ls (256, 64) 0.68 0.32 0.51 0.79 0.21 0.49 0.66   0.054
+# 14    kpir.approx.ls (256, 64) 0.75 0.25 0.44 0.87 0.13 0.56 0.78   0.044
+#
+# (*) Using reduction matrices `Map(solve, sdr$Deltas, sdr$alphas)` instead
+#     of only `sdr$alpha`.
+
+## times: 2022.05.24
+#               method       npc elapsed sys.self user.self
+# 1          kpir.base    (3, 4)   0.079    0.402     0.220
+# 2       kpir.new.vlp    (3, 4)   0.075    0.393     0.217
+# 3        kpir.new.ls    (3, 4)   0.218    0.243     0.305
+# 4            kpir.ls    (3, 4)   0.003    0.006     0.006
+# 5  kpir.momentum.vlp    (3, 4)   0.143    0.595     0.359
+# 6   kpir.momentum.ls    (3, 4)   0.297    0.252     0.385
+# 7    kpir.approx.vlp    (3, 4)   0.044    0.240     0.152
+# 8     kpir.approx.ls    (3, 4)   0.066    0.144     0.121
+# 9            kpir.ls  (15, 15)   0.012    0.059     0.034
+# 10    kpir.approx.ls  (15, 15)   0.813    3.911     2.325
+# 11           kpir.ls  (20, 30)   0.028    0.129     0.080
+# 12    kpir.approx.ls  (20, 30)   2.110   10.111     6.290
+# 13           kpir.ls (256, 64)   1.252    6.215     3.681
+# 14    kpir.approx.ls (256, 64)  36.754  141.028   147.490
+
+
--- a/tensorPredictors/R/kpir_ls.R
+++ b/tensorPredictors/R/kpir_ls.R
@ -32,11 +32,6 @@ kpir.ls <- function(X, Fy, max.iter = 20L, sample.axis = 1L,
        La.svd(mcrossprod(X, mode = mode), ncol)$u
    }, modes, dim(Fy)[modes])

-    # # Scaling of alpha, such that `tr(alpha_i' alpha_i) = tr(alpha_j' alpha_j)``
-    # # for `i, j = 1, ..., r`.
-    # traces <- unlist(Map(function(alpha) sum(alpha^2)))
-    # alphas <- Map(`*`, prod(traces)^(1 / length(alphas)) / traces, alphas)
-
    # Call history callback (logger) before the first iteration
    if (is.function(logger)) { do.call(logger, c(0L, NA, rev(alphas))) }

@ -52,11 +47,6 @@ kpir.ls <- function(X, Fy, max.iter = 20L, sample.axis = 1L,
            # TODO: alphas[[j]] <- t(solve(mcrossprod(Z, j), mcrossprod(Z, X, j)))
        }

-        # # Scaling of alpha, such that `tr(alpha_i' alpha_i) = tr(alpha_j' alpha_j)``
-        # # for `i, j = 1, ..., r`.
-        # traces <- unlist(Map(function(alpha) sum(alpha^2)))
-        # alphas <- Map(`*`, prod(traces)^(1 / length(alphas)) / traces, alphas)
-
        # Call logger (invoke history callback)
        if (is.function(logger)) { do.call(logger, c(iter, NA, rev(alphas))) }