From 6455d27386a3285f777993cb34510defd7542046 Mon Sep 17 00:00:00 2001 From: daniel Date: Wed, 25 May 2022 18:19:08 +0200 Subject: [PATCH] add: EEG sim to LaTeX --- LaTeX/main.bib | 9 +++ LaTeX/main.tex | 150 ++++++++++++++--------------------- simulations/eeg_sim.R | 76 ++++++++++++++++-- tensorPredictors/R/kpir_ls.R | 10 --- 4 files changed, 139 insertions(+), 106 deletions(-) diff --git a/LaTeX/main.bib b/LaTeX/main.bib index b7ac7ab..f0d0dce 100644 --- a/LaTeX/main.bib +++ b/LaTeX/main.bib @@ -61,3 +61,12 @@ year = {2018}, doi = {10.3150/17-BEJ980} } + +@article{sdr-PfeifferKaplaBura2021, + author = {Pfeiffer, Ruth and Kapla, Daniel and Bura, Efstathia}, + title = {{Least squares and maximum likelihood estimation of sufficient reductions in regressions with matrix-valued predictors}}, + volume = {11}, + year = {2021}, + journal = {International Journal of Data Science and Analytics}, + doi = {10.1007/s41060-020-00228-y} +} diff --git a/LaTeX/main.tex b/LaTeX/main.tex index 4392d1d..8a52719 100644 --- a/LaTeX/main.tex +++ b/LaTeX/main.tex @@ -358,96 +358,7 @@ Through that the gradient for all the parameter matrices is \qquad{\color{gray}p_j\times q_j} && j = 1, ..., r. \end{align*} -% Now we assume the residuals covariance has the form $\mat\Delta = \mat\Delta_1\otimes\mat\Delta_2$ where $\mat\Delta_1$, $\mat\Delta_2$ are $q\times q$, $p\times p$ covariance matrices, respectively. This is analog to the case that $\mat{R}_i$'s are i.i.d. Matrix Normal distribution -% \begin{displaymath} -% \mat{R}_i = \mat{X}_i - \mat\mu - \mat\beta\mat{f}_{y_i}\t{\mat\alpha} \sim \mathcal{MN}_{p\times q}(\mat 0, \mat\Delta_2, \mat\Delta_1). -% \end{displaymath} -% The density of the Matrix Normal (with mean zero) is equivalent to the vectorized quantities being multivariate normal distributed with Kronecker structured covariance -% \begin{align*} -% f(\mat R) -% &= \frac{1}{\sqrt{(2\pi)^{p q}|\mat\Delta|}}\exp\left(-\frac{1}{2}\t{\vec(\mat{R})} \mat\Delta^{-1}\vec(\mat{R})\right) \\ -% &= \frac{1}{(2\pi)^{p q / 2}|\mat\Delta_1|^{p / 2}|\mat\Delta_2|^{q / 2}}\exp\left(-\frac{1}{2}\tr(\mat\Delta_1^{-1}\t{\mat{R}}\mat\Delta_2^{-1}\mat{R})\right) -% \end{align*} -% which leads for given data to the log-likelihood -% \begin{displaymath} -% l(\mat{\mu}, \mat\Delta_1, \mat\Delta_2) = -% -\frac{n p q}{2}\log 2\pi -% -\frac{n p}{2}\log|\mat{\Delta}_1| -% -\frac{n q}{2}\log|\mat{\Delta}_2| -% -\frac{1}{2}\sum_{i = 1}^n \tr(\mat\Delta_1^{-1}\t{\mat{R}_i}\mat\Delta_2^{-1}\mat{R}_i). -% \end{displaymath} -% \subsection{MLE covariance estimates} -% Out first order of business is to derive the MLE estimated of the covariance matrices $\mat\Delta_1$, $\mat\Delta_2$ (the mean estimate $\widehat{\mat\mu}$ is trivial). Therefore, we look at the differentials with respect to changes in the covariance matrices as -% \begin{align*} -% \d l(\mat\Delta_1, \mat\Delta_2) &= -% -\frac{n p}{2}\d\log|\mat{\Delta}_1| -% -\frac{n q}{2}\d\log|\mat{\Delta}_2| -% -\frac{1}{2}\sum_{i = 1}^n -% \tr( (\d\mat\Delta_1^{-1})\t{\mat{R}_i}\mat\Delta_2^{-1}\mat{R}_i -% + \mat\Delta_1^{-1}\t{\mat{R}_i}(\d\mat\Delta_2^{-1})\mat{R}_i) \\ -% &= -% -\frac{n p}{2}\tr\mat{\Delta}_1^{-1}\d\mat{\Delta}_1 -% -\frac{n q}{2}\tr\mat{\Delta}_2^{-1}\d\mat{\Delta}_2 \\ -% &\qquad\qquad -% +\frac{1}{2}\sum_{i = 1}^n -% \tr( \mat\Delta_1^{-1}(\d\mat\Delta_1)\mat\Delta_1^{-1}\t{\mat{R}_i}\mat\Delta_2^{-1}\mat{R}_i -% + \mat\Delta_1^{-1}\t{\mat{R}_i}\mat\Delta_2^{-1}(\d\mat\Delta_2)\mat\Delta_2^{-1}\mat{R}_i) \\ -% &= \frac{1}{2}\tr\!\Big(\Big( -% -n p \mat{I}_q + \mat\Delta_1^{-1}\sum_{i = 1}^n \t{\mat{R}_i}\mat\Delta_2^{-1}\mat{R}_i -% \Big)\mat{\Delta}_1^{-1}\d\mat{\Delta}_1\Big) \\ -% &\qquad\qquad -% + \frac{1}{2}\tr\!\Big(\Big( -% -n q \mat{I}_p + \mat\Delta_2^{-1}\sum_{i = 1}^n \mat{R}_i\mat\Delta_1^{-1}\t{\mat{R}_i} -% \Big)\mat{\Delta}_2^{-1}\d\mat{\Delta}_2\Big) \overset{!}{=} 0. -% \end{align*} -% Setting $\d l$ to zero yields the MLE estimates as -% \begin{displaymath} -% \widehat{\mat{\mu}} = \overline{\mat X}{\color{gray}\quad(p\times q)}, \qquad -% \widehat{\mat\Delta}_1 = \frac{1}{n p}\sum_{i = 1}^n \t{\mat{R}_i}\widehat{\mat\Delta}_2^{-1}\mat{R}_i{\color{gray}\quad(q\times q)}, \qquad -% \widehat{\mat\Delta}_2 = \frac{1}{n q}\sum_{i = 1}^n \mat{R}_i\widehat{\mat\Delta}_1^{-1}\t{\mat{R}_i}{\color{gray}\quad(p\times p)}. -% \end{displaymath} -% Next, analog to above, we take the estimated log-likelihood and derive gradients with respect to $\mat{\alpha}$, $\mat{\beta}$. -% The estimated log-likelihood derives by replacing the unknown covariance matrices by there MLE estimates leading to -% \begin{displaymath} -% \hat{l}(\mat\alpha, \mat\beta) = -% -\frac{n p q}{2}\log 2\pi -% -\frac{n p}{2}\log|\widehat{\mat{\Delta}}_1| -% -\frac{n q}{2}\log|\widehat{\mat{\Delta}}_2| -% -\frac{1}{2}\sum_{i = 1}^n \tr(\widehat{\mat{\Delta}}_1^{-1}\t{\mat{R}_i}\widehat{\mat{\Delta}}_2^{-1}\mat{R}_i) -% \end{displaymath} -% and its differential -% \begin{displaymath} -% \d\hat{l}(\mat\alpha, \mat\beta) = -% -\frac{n p}{2}\d\log|\widehat{\mat{\Delta}}_1| -% -\frac{n q}{2}\d\log|\widehat{\mat{\Delta}}_2| -% -\frac{1}{2}\sum_{i = 1}^n \d\tr(\widehat{\mat{\Delta}}_1^{-1}\t{\mat{R}_i}\widehat{\mat{\Delta}}_2^{-1}\mat{R}_i). -% \end{displaymath} -% We first take a closer look at the sum. After a bit of algebra using $\d\mat A^{-1} = -\mat A^{-1}(\d\mat A)\mat A^{-1}$ and the definitions of $\widehat{\mat\Delta}_1$, $\widehat{\mat\Delta}_2$ the sum can be rewritten -% \begin{displaymath} -% \frac{1}{2}\sum_{i = 1}^n \d\tr(\widehat{\mat{\Delta}}_1^{-1}\t{\mat{R}_i}\widehat{\mat{\Delta}}_2^{-1}\mat{R}_i) -% = \sum_{i = 1}^n \tr(\widehat{\mat{\Delta}}_1^{-1}\t{\mat{R}_i}\widehat{\mat{\Delta}}_2^{-1}\d\mat{R}_i) -% - \frac{np}{2}\d\log|\widehat{\mat\Delta}_1| -% - \frac{nq}{2}\d\log|\widehat{\mat\Delta}_2|. -% \end{displaymath} -% This means that most of the derivative cancels out and we get -% \begin{align*} -% \d\hat{l}(\mat\alpha, \mat\beta) -% &= \sum_{i = 1}^n \tr(\widehat{\mat{\Delta}}_1^{-1}\t{\mat{R}_i}\widehat{\mat{\Delta}}_2^{-1}\d\mat{R}_i) \\ -% &= \sum_{i = 1}^n \tr(\widehat{\mat{\Delta}}_1^{-1}\t{\mat{R}_i}\widehat{\mat{\Delta}}_2^{-1}((\d\mat\beta)\mat{f}_{y_i}\t{\mat\alpha} + \mat\beta\mat{f}_{y_i}\t{(\d\mat\alpha}))) \\ -% &= \sum_{i = 1}^n \t{\vec(\widehat{\mat{\Delta}}_2^{-1}\mat{R}_i\widehat{\mat{\Delta}}_1^{-1}\mat\alpha\t{\mat{f}_{y_i}})}\d\vec\mat\beta -% + \sum_{i = 1}^n \t{\vec(\widehat{\mat{\Delta}}_1^{-1}\t{\mat{R}_i}\widehat{\mat{\Delta}}_2^{-1}\mat\beta\mat{f}_{y_i})}\d\vec\mat\alpha -% \end{align*} -% which means the gradients are -% \begin{align*} -% \nabla_{\mat\alpha}\hat{l}(\mat\alpha, \mat\beta) -% &= \sum_{i = 1}^n \widehat{\mat{\Delta}}_1^{-1}\t{\mat{R}_i}\widehat{\mat{\Delta}}_2^{-1}\mat\beta\mat{f}_{y_i} -% = (\ten{R}\ttm[3]\widehat{\mat{\Delta}}_1^{-1}\ttm[2]\widehat{\mat{\Delta}}_2^{-1})_{(3)}\t{(\ten{F}\ttm[2]\mat\beta)_{(3)}}\\ -% \nabla_{\mat\beta}\hat{l}(\mat\alpha, \mat\beta) -% &= \sum_{i = 1}^n \widehat{\mat{\Delta}}_2^{-1}\mat{R}_i\widehat{\mat{\Delta}}_1^{-1}\mat\alpha\t{\mat{f}_{y_i}} -% = (\ten{R}\ttm[3]\widehat{\mat{\Delta}}_1^{-1}\ttm[2]\widehat{\mat{\Delta}}_2^{-1})_{(2)}\t{(\ten{F}\ttm[3]\mat\alpha)_{(2)}} -% \end{align*} - -\paragraph{Comparison to the general case:} There are two main differences, first the general case has a closed form solution for the gradient due to the explicit nature of the MLE estimate of $\widehat{\mat\Delta}$ compared to the mutually dependent MLE estimates $\widehat{\mat\Delta}_1$, $\widehat{\mat\Delta}_2$. On the other hand the general case has dramatically bigger dimensions of the covariance matrix ($p q \times p q$) compared to the two Kronecker components with dimensions $q \times q$ and $p \times p$. This means that in the general case there is a huge performance penalty in the dimensions of $\widehat{\mat\Delta}$ while in the other case an extra estimation is required to determine $\widehat{\mat\Delta}_1$, $\widehat{\mat\Delta}_2$. +\paragraph{Comparison to the general case:} {\color{red} There are two main differences, first the general case has a closed form solution for the gradient due to the explicit nature of the MLE estimate of $\widehat{\mat\Delta}$ compared to the mutually dependent MLE estimates $\widehat{\mat\Delta}_1$, $\widehat{\mat\Delta}_2$. On the other hand the general case has dramatically bigger dimensions of the covariance matrix ($p q \times p q$) compared to the two Kronecker components with dimensions $q \times q$ and $p \times p$. This means that in the general case there is a huge performance penalty in the dimensions of $\widehat{\mat\Delta}$ while in the other case an extra estimation is required to determine $\widehat{\mat\Delta}_1$, $\widehat{\mat\Delta}_2$.} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -691,6 +602,65 @@ for $j = 1, ..., r$ until convergence or a maximum number of iterations is excee % \includegraphics{hist_Ex01.png} % \end{figure} +\begin{table}[!ht] + \centering + \begin{tabular}{*{3}{l} | *{7}{c}} + method & init & npc & Acc & Err & FPR & TPR & FNR & TNR & AUC (sd) \\ \hline + base & vlp & (3, 4) & 0.70 & 0.30 & 0.60 & 0.87 & 0.13 & 0.40 & 0.75 (0.047) \\ + new & vlp & (3, 4) & 0.70 & 0.30 & 0.60 & 0.87 & 0.13 & 0.40 & 0.75 (0.047) \\ + new & ls & (3, 4) & 0.74 & 0.26 & 0.51 & 0.88 & 0.12 & 0.49 & 0.77 (0.045) \\ + ls & & (3, 4) & 0.75 & 0.25 & 0.49 & 0.88 & 0.12 & 0.51 & 0.78 (0.044) \\ + ls$^*$ & & (3, 4) & 0.78 & 0.22 & 0.38 & 0.87 & 0.13 & 0.62 & 0.86 (0.034) \\ % + LSIR & & (3, 4) & 0.80 & 0.20 & 0.36 & 0.88 & 0.12 & 0.64 & 0.85 (0.036) \\ + momentum & vlp & (3, 4) & 0.70 & 0.30 & 0.60 & 0.87 & 0.13 & 0.40 & 0.75 (0.047) \\ + momentum & ls & (3, 4) & 0.70 & 0.30 & 0.58 & 0.87 & 0.13 & 0.42 & 0.76 (0.046) \\ + approx & vlp & (3, 4) & 0.68 & 0.32 & 0.62 & 0.86 & 0.14 & 0.38 & 0.74 (0.048) \\ + approx & ls & (3, 4) & 0.73 & 0.27 & 0.53 & 0.88 & 0.12 & 0.47 & 0.78 (0.044) \\ \hline + ls & & (15, 15) & 0.75 & 0.25 & 0.47 & 0.87 & 0.13 & 0.53 & 0.78 (0.044) \\ + ls$^*$ & & (15, 15) & 0.76 & 0.24 & 0.44 & 0.88 & 0.12 & 0.56 & 0.83 (0.039) \\ % + LSIR & & (15, 15) & 0.72 & 0.28 & 0.44 & 0.82 & 0.18 & 0.56 & 0.81 (0.040) \\ + approx & ls & (15, 15) & 0.73 & 0.27 & 0.51 & 0.87 & 0.13 & 0.49 & 0.78 (0.044) \\ \hline + ls & & (20, 30) & 0.75 & 0.25 & 0.47 & 0.87 & 0.13 & 0.53 & 0.78 (0.044) \\ + ls$^*$ & & (20, 30) & 0.77 & 0.23 & 0.36 & 0.84 & 0.16 & 0.64 & 0.79 (0.045) \\ % + LSIR & & (20, 30) & 0.79 & 0.21 & 0.36 & 0.87 & 0.13 & 0.64 & 0.83 (0.038) \\ + approx & ls & (20, 30) & 0.63 & 0.37 & 1.00 & 1.00 & 0.00 & 0.00 & 0.51 (0.053) \\ \hline + HOPCA & & & 0.62 & 0.38 & 1 & 0.99 & 0.01 & 0 & 0.56 (0.053) \\ + ls & & & 0.75 & 0.25 & 0.44 & 0.87 & 0.13 & 0.56 & 0.78 (0.044) \\ + ls$^*$ & & & 0.68 & 0.32 & 0.51 & 0.79 & 0.21 & 0.49 & 0.66 (0.054) \\ % + approx & ls & & 0.75 & 0.25 & 0.44 & 0.87 & 0.13 & 0.56 & 0.78 (0.044) \\ + \end{tabular} + \caption{\label{tab:eeg_sim}Recreation of the EEG data LOO-CV from \cite{sdr-PfeifferKaplaBura2021} Section~7, EEG Data and Table~6 with new methods. Column \emph{vlp} stands for the Van Loan and Pitsianis initialization scheme as described in \cite{sdr-PfeifferKaplaBura2021} and \emph{ls} is the approach described above. The column \emph{npc} gives the number of principal component of the $(2D)^2 PCA$ preprocessing. Reduction by $\ten{X}\times_{j\in[r]}\t{\widehat{\mat{\alpha}}_j}$ instread of $^*$ where reduction is $\ten{X}\times_{j\in[r]}\t{\widehat{\mat{\Delta}}_j^{-1}\widehat{\mat{\alpha}}_j}$.} +\end{table} + +\begin{table} + \centering + \begin{tabular}{*{3}{l} | *{3}{r@{.}l}} + method & init & npc + & \multicolumn{2}{c}{elapsed} + & \multicolumn{2}{c}{sys.self} + & \multicolumn{2}{c}{user.self} \\ \hline + base & & (3, 4) & 0&079 & 0&402 & 0&220 \\ + new & vlp & (3, 4) & 0&075 & 0&393 & 0&217 \\ + new & ls & (3, 4) & 0&218 & 0&243 & 0&305 \\ + ls & & (3, 4) & 0&003 & 0&006 & 0&006 \\ + LSIR & & (3, 4) & 0&002 & 0&000 & 0&002 \\ + momentum & vlp & (3, 4) & 0&143 & 0&595 & 0&359 \\ + momentum & ls & (3, 4) & 0&297 & 0&252 & 0&385 \\ + approx & vlp & (3, 4) & 0&044 & 0&240 & 0&152 \\ + approx & ls & (3, 4) & 0&066 & 0&144 & 0&121 \\ \hline + ls & & (15, 15) & 0&012 & 0&059 & 0&034 \\ + LSIR & & (15, 15) & 0&010 & 0&050 & 0&031 \\ + approx & ls & (15, 15) & 0&813 & 3&911 & 2&325 \\ \hline + ls & & (20, 30) & 0&028 & 0&129 & 0&080 \\ + LSIR & & (20, 30) & 0&030 & 0&142 & 0&100 \\ + approx & ls & (20, 30) & 2&110 & 10&111 & 6&290 \\ \hline + HOPCA & & & 0&24 & 0&37 & 0&43 \\ + ls & & & 1&252 & 6&215 & 3&681 \\ + approx & ls & & 36&754 & 141&028 & 147&490 \\ + \end{tabular} + \caption{\label{tab:eeg_time}Like Table~\ref{tab:eeg_sim} but reports the mean run-time.} +\end{table} + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%% Bib and Index %%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% diff --git a/simulations/eeg_sim.R b/simulations/eeg_sim.R index 0695f8f..72e9d85 100644 --- a/simulations/eeg_sim.R +++ b/simulations/eeg_sim.R @@ -49,6 +49,10 @@ npcs <- list(c(3, 4), c(15, 15), c(20, 30), dim(X)[-1]) # setup methods for simulation (with unified API) methods <- list( + hopca = list( + fun = function(X, Fy) list(alphas = hopca(X, npc = c(1L, 1L), 1L)), + is.applicable = function(npc) all(npc == c(256L, 64L)) # NOT reduced + ), kpir.base = list( fun = toNewAPI(kpir.base), is.applicable = function(npc) prod(npc) < 100 @@ -65,6 +69,13 @@ methods <- list( fun = kpir.ls, is.applicable = function(npc) TRUE ), + LSIR = list( + fun = function(X, Fy) { + res <- LSIR(matrix(X, nrow(X)), Fy, dim(X)[2], dim(X)[3]) + list(alphas = list(res$beta, res$alpha)) + }, + is.applicable = function(npc) prod(npc) < 1000 + ), kpir.momentum.vlp = list( fun = toNewAPI(function(X, Fy) kpir.momentum(X, Fy, init.method = "vlp")), is.applicable = function(npc) prod(npc) < 100 @@ -86,6 +97,9 @@ methods <- list( # define AUC for reporting while simulation is running auc <- function(y_true, y_pred) pROC::roc(y_true, y_pred, quiet = TRUE)$auc[1] +# file to dump simulation results +log.file <- format(Sys.time(), "eeg_sim_%Y%m%dT%H%M.rds") + # init complete simulation as empty sim <- NULL for (npc in npcs) { @@ -132,12 +146,18 @@ for (npc in npcs) { time <- system.time(sdr <- method(X.train, c(y.train))) # reduce training data and fit a GLM - x.train <- mlm(X.train, Map(t, sdr$alphas), modes = 2:3) + if ("Deltas" %in% names(sdr)) { + # the small deltas are `delta_j = Delta_j^-1 alpha_j` + deltas <- Map(solve, sdr$Deltas, sdr$alphas) + } else { + deltas <- sdr$alphas + } + x.train <- mlm(X.train, Map(t, deltas), modes = 2:3) fit <- glm(y ~ x, family = binomial(link = "logit"), data = data.frame(y = y[-i], x = matrix(x.train, nrow(x.train)))) # predict from reduced test data - x.test <- mlm(X.test, Map(t, sdr$alphas), modes = 2:3) + x.test <- mlm(X.test, Map(t, deltas), modes = 2:3) y.pred <- predict(fit, data.frame(x = matrix(x.test, 1)), type = "response") loo.cv[i, "y_pred"] <- y.pred @@ -155,15 +175,16 @@ for (npc in npcs) { cat(sprintf(" (Done) AUC: %f\n", with(loo.cv, auc(y_true, y_pred)))) # dump simulation (after each fold) to file - saveRDS(sim, "eeg_sim.rds") + saveRDS(sim, log.file) } } - ################################################################################ ### Simulation Stats ### ################################################################################ -# sim <- readRDS("eeg_sim.rds") +# sim <- readRDS("eeg_sim_.rds") +# sim <- readRDS("eeg_sim_20220524T2100.rds") +# sim <- readRDS("eeg_sim_20220525T1700.rds") metrics <- list( # acc: Accuracy. P(Yhat = Y). Estimated as: (TP+TN)/(P+N). @@ -191,7 +212,7 @@ do.stats <- function(group) { data.frame(method = group$method[1], npc = group$npc[1], stat, check.names = FALSE) } # Call stats for each grouping -stats <- do.call(rbind, Map(do.stats, split(sim, ~ method + npc, sep = " "))) +stats <- do.call(rbind, Map(do.stats, split(sim, ~ method + npc, sep = " ", drop = TRUE))) rownames(stats) <- NULL print(stats, digits = 2) @@ -200,3 +221,46 @@ print(stats, digits = 2) times <- aggregate(cbind(elapsed, sys.self, user.self) ~ method + npc, sim, median) print(times, digits = 2) + +## stats: 2022.05.24 +# method npc Acc Err FPR TPR FNR TNR AUC sd(AUC) +# 1 kpir.base (3, 4) 0.70 0.30 0.60 0.87 0.13 0.40 0.75 0.047 +# 2 kpir.new.vlp (3, 4) 0.70 0.30 0.60 0.87 0.13 0.40 0.75 0.047 +# 3 kpir.new.ls (3, 4) 0.74 0.26 0.51 0.88 0.12 0.49 0.77 0.045 +# 4 kpir.ls (3, 4) 0.75 0.25 0.49 0.88 0.12 0.51 0.78 0.044 +# (*) kpir.ls (3, 4) 0.78 0.22 0.38 0.87 0.13 0.62 0.86 0.034 +# 5 kpir.momentum.vlp (3, 4) 0.70 0.30 0.60 0.87 0.13 0.40 0.75 0.047 +# 6 kpir.momentum.ls (3, 4) 0.70 0.30 0.58 0.87 0.13 0.42 0.76 0.046 +# 7 kpir.approx.vlp (3, 4) 0.68 0.32 0.62 0.86 0.14 0.38 0.74 0.048 +# 8 kpir.approx.ls (3, 4) 0.73 0.27 0.53 0.88 0.12 0.47 0.78 0.044 +# 9 kpir.ls (15, 15) 0.75 0.25 0.47 0.87 0.13 0.53 0.78 0.044 +# (*) kpir.ls (15, 15) 0.76 0.24 0.44 0.88 0.12 0.56 0.83 0.039 +# 10 kpir.approx.ls (15, 15) 0.73 0.27 0.51 0.87 0.13 0.49 0.78 0.044 +# 11 kpir.ls (20, 30) 0.75 0.25 0.47 0.87 0.13 0.53 0.78 0.044 +# (*) kpir.ls (20, 30) 0.77 0.23 0.36 0.84 0.16 0.64 0.79 0.045 +# 12 kpir.approx.ls (20, 30) 0.63 0.37 1.00 1.00 0.00 0.00 0.51 0.053 +# 13 kpir.ls (256, 64) 0.75 0.25 0.44 0.87 0.13 0.56 0.78 0.044 +# (*) kpir.ls (256, 64) 0.68 0.32 0.51 0.79 0.21 0.49 0.66 0.054 +# 14 kpir.approx.ls (256, 64) 0.75 0.25 0.44 0.87 0.13 0.56 0.78 0.044 +# +# (*) Using reduction matrices `Map(solve, sdr$Deltas, sdr$alphas)` instead +# of only `sdr$alpha`. + +## times: 2022.05.24 +# method npc elapsed sys.self user.self +# 1 kpir.base (3, 4) 0.079 0.402 0.220 +# 2 kpir.new.vlp (3, 4) 0.075 0.393 0.217 +# 3 kpir.new.ls (3, 4) 0.218 0.243 0.305 +# 4 kpir.ls (3, 4) 0.003 0.006 0.006 +# 5 kpir.momentum.vlp (3, 4) 0.143 0.595 0.359 +# 6 kpir.momentum.ls (3, 4) 0.297 0.252 0.385 +# 7 kpir.approx.vlp (3, 4) 0.044 0.240 0.152 +# 8 kpir.approx.ls (3, 4) 0.066 0.144 0.121 +# 9 kpir.ls (15, 15) 0.012 0.059 0.034 +# 10 kpir.approx.ls (15, 15) 0.813 3.911 2.325 +# 11 kpir.ls (20, 30) 0.028 0.129 0.080 +# 12 kpir.approx.ls (20, 30) 2.110 10.111 6.290 +# 13 kpir.ls (256, 64) 1.252 6.215 3.681 +# 14 kpir.approx.ls (256, 64) 36.754 141.028 147.490 + + diff --git a/tensorPredictors/R/kpir_ls.R b/tensorPredictors/R/kpir_ls.R index a795b44..ec3c185 100644 --- a/tensorPredictors/R/kpir_ls.R +++ b/tensorPredictors/R/kpir_ls.R @@ -32,11 +32,6 @@ kpir.ls <- function(X, Fy, max.iter = 20L, sample.axis = 1L, La.svd(mcrossprod(X, mode = mode), ncol)$u }, modes, dim(Fy)[modes]) - # # Scaling of alpha, such that `tr(alpha_i' alpha_i) = tr(alpha_j' alpha_j)`` - # # for `i, j = 1, ..., r`. - # traces <- unlist(Map(function(alpha) sum(alpha^2))) - # alphas <- Map(`*`, prod(traces)^(1 / length(alphas)) / traces, alphas) - # Call history callback (logger) before the first iteration if (is.function(logger)) { do.call(logger, c(0L, NA, rev(alphas))) } @@ -52,11 +47,6 @@ kpir.ls <- function(X, Fy, max.iter = 20L, sample.axis = 1L, # TODO: alphas[[j]] <- t(solve(mcrossprod(Z, j), mcrossprod(Z, X, j))) } - # # Scaling of alpha, such that `tr(alpha_i' alpha_i) = tr(alpha_j' alpha_j)`` - # # for `i, j = 1, ..., r`. - # traces <- unlist(Map(function(alpha) sum(alpha^2))) - # alphas <- Map(`*`, prod(traces)^(1 / length(alphas)) / traces, alphas) - # Call logger (invoke history callback) if (is.function(logger)) { do.call(logger, c(iter, NA, rev(alphas))) }