From 82816e96a6af84a10e20632bdd2480f289b591b1 Mon Sep 17 00:00:00 2001 From: daniel Date: Wed, 5 Nov 2025 17:21:33 +0100 Subject: [PATCH 1/2] add: reg.factor (regularization factor), add: FFT to EEG sim --- dataAnalysis/eeg/02_eeg_2d.R | 39 +++++++++++++++---------- dataAnalysis/eeg/03_eeg_3d.R | 31 +++++--------------- tensorPredictors/R/TSIR.R | 11 +++---- tensorPredictors/R/gmlm_tensor_normal.R | 26 +++++++++-------- 4 files changed, 51 insertions(+), 56 deletions(-) diff --git a/dataAnalysis/eeg/02_eeg_2d.R b/dataAnalysis/eeg/02_eeg_2d.R index e2d7ae7..eb901db 100644 --- a/dataAnalysis/eeg/02_eeg_2d.R +++ b/dataAnalysis/eeg/02_eeg_2d.R @@ -52,10 +52,10 @@ c(X, y) %<-% readRDS("eeg_data_2d.rds") #' #' @param X 3D EEG data (preprocessed or not) #' @param F binary responce `y` as a 3D tensor, every obs. is a 1 x 1 matrix -loo.predict.gmlm <- function(X, y) { +loo.predict.gmlm <- function(X, y, ...) { unlist(parallel::mclapply(seq_along(y), function(i) { # Fit with i'th observation removed - fit <- gmlm_tensor_normal(X[ , , -i], as.integer(y[-i]), sample.axis = 3L) + fit <- gmlm_tensor_normal(X[ , , -i], as.integer(y[-i]), sample.axis = 3L, ...) # Reduce the entire data set r <- as.vector(mlm(X, fit$betas, modes = 1:2, transpose = TRUE)) @@ -75,26 +75,35 @@ loo.predict.gmlm <- function(X, y) { }, mc.cores = getOption("mc.cores", max(1L, parallel::detectCores() - 1L)))) } +proj.fft <- function(beta1, nr.freq = 5L) { + F <- fft(beta1) + Re(fft(`[<-`(F, head(order(abs(F)), -nr.freq), 0+0i), inverse = TRUE)) / length(F) +} + # perform preprocessed (reduced) and raw (not reduced) leave-one-out prediction y.hat.3.4 <- loo.predict.gmlm(preprocess(X, 3, 4), y) y.hat.15.15 <- loo.predict.gmlm(preprocess(X, 15, 15), y) y.hat.20.30 <- loo.predict.gmlm(preprocess(X, 20, 30), y) y.hat <- loo.predict.gmlm(X, y) +y.hat.fft <- loo.predict.gmlm(X, y, proj.betas = list(proj.fft, NULL)) # classification performance measures table by leave-one-out cross-validation -(loo.cv <- apply(cbind(y.hat.3.4, y.hat.15.15, y.hat.20.30, y.hat), 2, function(y.pred) { - sapply(c("acc", "err", "fpr", "tpr", "fnr", "tnr", "auc", "auc.sd"), - function(FUN) { match.fun(FUN)(as.integer(y) - 1L, y.pred) }) -})) -#> y.hat.3.4 y.hat.15.15 y.hat.20.30 y.hat -#> acc 0.79508197 0.78688525 0.78688525 0.78688525 -#> err 0.20491803 0.21311475 0.21311475 0.21311475 -#> fpr 0.35555556 0.40000000 0.40000000 0.40000000 -#> tpr 0.88311688 0.89610390 0.89610390 0.89610390 -#> fnr 0.11688312 0.10389610 0.10389610 0.10389610 -#> tnr 0.64444444 0.60000000 0.60000000 0.60000000 -#> auc 0.85108225 0.83838384 0.83924964 0.83896104 -#> auc.sd 0.03584791 0.03760531 0.03751307 0.03754553 +(loo.cv <- apply( + cbind(y.hat.3.4, y.hat.15.15, y.hat.20.30, y.hat, y.hat.fft), 2, + function(y.pred) { + sapply(c("acc", "err", "fpr", "tpr", "fnr", "tnr", "auc", "auc.sd"), + function(FUN) { match.fun(FUN)(as.integer(y) - 1L, y.pred) }) + } +)) +#> y.hat.3.4 y.hat.15.15 y.hat.20.30 y.hat y.hat.fft +#> acc 0.79508197 0.78688525 0.78688525 0.78688525 0.81147541 +#> err 0.20491803 0.21311475 0.21311475 0.21311475 0.18852459 +#> fpr 0.35555556 0.40000000 0.40000000 0.40000000 0.33333333 +#> tpr 0.88311688 0.89610390 0.89610390 0.89610390 0.89610390 +#> fnr 0.11688312 0.10389610 0.10389610 0.10389610 0.10389610 +#> tnr 0.64444444 0.60000000 0.60000000 0.60000000 0.66666667 +#> auc 0.85194805 0.83838384 0.83924964 0.83896104 0.84646465 +#> auc.sd 0.03574475 0.03760531 0.03751754 0.03754553 0.03751864 ################################## Tensor SIR ################################## diff --git a/dataAnalysis/eeg/03_eeg_3d.R b/dataAnalysis/eeg/03_eeg_3d.R index fe3709d..ff07e93 100644 --- a/dataAnalysis/eeg/03_eeg_3d.R +++ b/dataAnalysis/eeg/03_eeg_3d.R @@ -42,29 +42,6 @@ auc.sd <- function(y.true, y.pred) { sqrt(pROC::var(pROC::roc(y.true, y.pred, quiet = TRUE, direction = "<"))) } - -# # unified API for all reduction procedures -# GMLM <- list( -# fit = function(X, y) tensorPredictors::gmlm_tensor_normal(X, as.integer(y), sample.axis = 4L), -# reduce = function(X, fit) mlm(X, fit$betas, 1:3, TRUE), -# applicable = function(X) TRUE -# ) -# TSIR <- list( -# fit = function(X, y) tensorPredictors::TSIR(X, y, c(1L, 1L, 1L), sample.axis = 4L), -# reduce = function(X, fit) mlm(X, fit, 1:3, TRUE), -# applicable = function(X) TRUE -# ) -# KPIR_LS <- list( -# fit = function(X, y) { -# if (any(dim(X)[-4] > dim(X)[4])) { -# stop("Dimensions too big") -# } -# tensorPredictors::kpir.ls(X, as.integer(y), sample.axis = 4L) -# }, -# reduce = function(X, fit) if (is.null(fit)) NA else mlm(X, fit$alphas, 1:3, TRUE), -# applicable = function(X) all(dim(X)[1:3] <= dim(X)[4]) -# ) - #' Leave-one-out prediction using TSIR #' #' @param method reduction method to be applied @@ -105,14 +82,20 @@ c(X, y) %<-% readRDS("eeg_data_3d.rds") ##################################### GMLM ##################################### +proj.fft <- function(beta1, nr.freq = 5L) { + F <- fft(beta1) + Re(fft(`[<-`(F, head(order(abs(F)), -nr.freq), 0+0i), inverse = TRUE)) / length(F) +} + # perform preprocessed (reduced) and raw (not reduced) leave-one-out prediction y.hat.3.4 <- loo.predict(gmlm_tensor_normal, preprocess(X, 3, 4, 3), y) y.hat.15.15 <- loo.predict(gmlm_tensor_normal, preprocess(X, 15, 15, 3), y) y.hat.20.30 <- loo.predict(gmlm_tensor_normal, preprocess(X, 20, 30, 3), y) y.hat <- loo.predict(gmlm_tensor_normal, X, y) +y.hat.fft <- loo.predict(gmlm_tensor_normal, X, y, proj.betas = list(proj.fft, NULL, NULL)) # classification performance measures table by leave-one-out cross-validation -(loo.cv <- apply(cbind(y.hat.3.4, y.hat.15.15, y.hat.20.30, y.hat), 2, function(y.pred) { +(loo.cv <- apply(cbind(y.hat.3.4, y.hat.15.15, y.hat.20.30, y.hat, y.hat.fft), 2, function(y.pred) { sapply(c("acc", "err", "fpr", "tpr", "fnr", "tnr", "auc", "auc.sd"), function(FUN) { match.fun(FUN)(as.integer(y) - 1L, y.pred) }) })) diff --git a/tensorPredictors/R/TSIR.R b/tensorPredictors/R/TSIR.R index 4ee49a8..fe0ec2f 100644 --- a/tensorPredictors/R/TSIR.R +++ b/tensorPredictors/R/TSIR.R @@ -6,7 +6,8 @@ TSIR <- function(X, y, sample.axis = 1L, nr.slices = 10L, # default slices, ignored if y is a factor or integer max.iter = 50L, - cond.threshold = Inf, eps = sqrt(.Machine$double.eps), + reg.threshold = Inf, reg.factor = 0.2, + eps = 1e-6, slice.method = c("cut", "ecdf"), # ignored if y is a factor or integer logger = NULL ) { @@ -122,13 +123,13 @@ TSIR <- function(X, y, Omegas <- Map(function(k) prod(dim(X)[-k])^-1 * mcrossprod(X, mode = k), modes) # Check condition of sample covariances and perform regularization if needed - if (is.finite(cond.threshold)) { + if (is.finite(reg.threshold)) { for (j in seq_along(Omegas)) { # Compute min and max eigen values min_max <- range(eigen(Omegas[[j]], TRUE, TRUE)$values) - # The condition is approximately `kappa(Omegas[[j]]) > cond.threshold` - if (min_max[2] > cond.threshold * min_max[1]) { - Omegas[[j]] <- Omegas[[j]] + diag(0.2 * min_max[2], nrow(Omegas[[j]])) + # The condition is approximately `kappa(Omegas[[j]]) > reg.threshold` + if (min_max[2] > reg.threshold * min_max[1]) { + diag(Omegas[[j]]) <- diag(Omegas[[j]]) + reg.factor * min_max[2] } } } diff --git a/tensorPredictors/R/gmlm_tensor_normal.R b/tensorPredictors/R/gmlm_tensor_normal.R index da7c4ac..db155c3 100644 --- a/tensorPredictors/R/gmlm_tensor_normal.R +++ b/tensorPredictors/R/gmlm_tensor_normal.R @@ -5,7 +5,8 @@ #' @export gmlm_tensor_normal <- function(X, F, sample.axis = length(dim(X)), max.iter = 100L, proj.betas = NULL, proj.Omegas = NULL, logger = NULL, - cond.threshold = 25, eps = 1e-6 + reg.threshold = 25, reg.factor = 0.2, + eps = 1e-6 ) { # Special case for `F` being vector valued, add dims of size 1 if (is.null(dim(F))) { @@ -25,13 +26,13 @@ gmlm_tensor_normal <- function(X, F, sample.axis = length(dim(X)), dimF <- head(dim(F), -1) modes <- seq_along(dimX) - # Ensure the Omega and beta projections lists are lists - if (!is.list(proj.Omegas)) { - proj.Omegas <- rep(NULL, length(modes)) - } - if (!is.list(proj.betas)) { - proj.betas <- rep(NULL, length(modes)) - } + # # Ensure the Omega and beta projections lists are lists + # if (!is.list(proj.Omegas)) { + # proj.Omegas <- rep(NULL, length(modes)) # thats just NULL, which it already is + # } + # if (!is.list(proj.betas)) { + # proj.betas <- rep(NULL, length(modes)) + # } ### Phase 1: Computing initial values (`dim<-` ensures we have an "array") @@ -106,12 +107,13 @@ gmlm_tensor_normal <- function(X, F, sample.axis = length(dim(X)), # with regularization of the j'th mode covariance estimate `Sigma_j` for (j in seq_along(Sigmas)) { # Regularize Covariances - if (is.finite(cond.threshold)) { + if (is.finite(reg.threshold)) { # Compute min and max eigen values min_max <- range(eigen(Sigmas[[j]], TRUE, TRUE)$values) - # The condition is approximately `kappa(Sigmas[[j]]) > cond.threshold` - if (min_max[2] > cond.threshold * min_max[1]) { - Sigmas[[j]] <- Sigmas[[j]] + diag(0.2 * min_max[2], nrow(Sigmas[[j]])) + # The condition is approximately `kappa(Sigmas[[j]]) > reg.threshold` + if (min_max[2] > reg.threshold * min_max[1]) { + cat(sprintf("\033[2mReg (%d): cond(Sigma_%d) = %f\033[0m\n", iter, j, min_max[2] / min_max[1])) + diag(Sigmas[[j]]) <- diag(Sigmas[[j]]) + reg.factor * min_max[2] } } # Compute (unconstraint but regularized) Omega_j as covariance inverse From e9c4a2a9715d56526b195e3ddd94a3eec736535f Mon Sep 17 00:00:00 2001 From: daniel Date: Wed, 5 Nov 2025 17:24:33 +0100 Subject: [PATCH 2/2] sync --- LaTeX/main.bib | 100 ++++++++++++++++++++++++++++++------------------ LaTeX/paper.tex | 59 ++++++++-------------------- 2 files changed, 78 insertions(+), 81 deletions(-) diff --git a/LaTeX/main.bib b/LaTeX/main.bib index b912c2d..9f0cc03 100644 --- a/LaTeX/main.bib +++ b/LaTeX/main.bib @@ -1,12 +1,16 @@ @book{AbadirMagnus2005, - author = {Abadir, Karim M. and Magnus, Jan R.}, - year = {2005}, - title = {Matrix Algebra}, - publisher = {Cambridge University Press}, - doi = {10.1017/CBO9780511810800}, - collection = {Econometric Exercises}, - place = {Cambridge}, - series = {Econometric Exercises} + author = {Abadir, Karim M. and Magnus, Jan R.}, + year = {2005}, + title = {Matrix algebra}, + volume = {1}, + pages = {xxx+434}, + publisher = {Cambridge University Press, Cambridge}, + doi = {10.1017/CBO9780511810800}, + url = {https://doi.org/10.1017/CBO9780511810800}, + isbn = {978-0-521-53746-9; 0-521-53746-0}, + mrclass = {15-01 (91B02)}, + mrnumber = {2408356}, + series = {Econometric Exercises} } @book{AbsilEtAl2007, @@ -47,15 +51,16 @@ } @book{Arnold1981, - author = {Arnold, Steven F}, - year = {1981}, - title = {The theory of linear models and multivariate analysis}, - publisher = {Wiley}, - address = {New York, NY [u.a.]}, - isbn = {0471050652}, - keywords = {Multivariate Analyse}, - language = {eng}, - series = {Wiley series in probability and mathematical statistics : Probability and mathematical statistics} + author = {Arnold, Steven F.}, + year = {1981}, + title = {The theory of linear models and multivariate analysis}, + pages = {xvi+475}, + publisher = {John Wiley \& Sons, Inc., New York}, + isbn = {0-471-05065-2}, + mrclass = {62-01 (62J05)}, + mrnumber = {606011}, + mrreviewer = {Pi-Erh\ Lin}, + series = {Wiley Series in Probability and Mathematical Statistics} } @article{BanerjeeEtAl2008, @@ -438,11 +443,19 @@ } @book{Cook1998, - author = {Cook, Dennis R.}, - year = {1998}, - title = {Regression Graphics: Ideas for studying regressions through graphics}, - publisher = {Wiley}, - address = {New York} + author = {Cook, R. Dennis}, + year = {1998}, + title = {Regression graphics}, + pages = {xviii+349}, + publisher = {John Wiley \& Sons, Inc., New York}, + doi = {10.1002/9780470316931}, + url = {https://doi.org/10.1002/9780470316931}, + isbn = {0-471-19365-8}, + mrclass = {62-01 (62J05)}, + mrnumber = {1645673}, + mrreviewer = {Jon\ Stene}, + note = {Ideas for studying regressions through graphics, A Wiley-Interscience Publication}, + series = {Wiley Series in Probability and Statistics: Probability and Statistics} } @article{Cook2000, @@ -590,17 +603,19 @@ } @article{CoxWermuth1994, - author = {D. R. Cox and Nanny Wermuth}, - year = {1994}, - title = {A Note on the Quadratic Exponential Binary Distribution}, - journal = {Biometrika}, - volume = {81}, - number = {2}, - pages = {403--408}, - publisher = {[Oxford University Press, Biometrika Trust]}, - issn = {00063444}, - url = {http://www.jstor.org/stable/2336971}, - urldate = {2024-04-11} + author = {Cox, D. R. and Wermuth, Nanny}, + year = {1994}, + title = {A note on the quadratic exponential binary distribution}, + journal = {Biometrika}, + volume = {81}, + number = {2}, + pages = {403--408}, + issn = {0006-3444,1464-3510}, + doi = {10.1093/biomet/81.2.403}, + url = {https://doi.org/10.1093/biomet/81.2.403}, + fjournal = {Biometrika}, + mrclass = {62E15}, + mrnumber = {1294901} } @book{Dai2012, @@ -721,11 +736,20 @@ } @article{DrtonEtAl2020, - author = {Mathias Drton and Satoshi Kuriki and Peter D. Hoff}, - year = {2020}, - title = {Existence and uniqueness of the Kronecker covariance MLE}, - journal = {The Annals of Statistics}, - url = {https://api.semanticscholar.org/CorpusID:212718000} + author = {Drton, Mathias and Kuriki, Satoshi and Hoff, Peter}, + year = {2021}, + title = {Existence and uniqueness of the {K}ronecker covariance {MLE}}, + journal = {Ann. Statist.}, + volume = {49}, + number = {5}, + pages = {2721--2754}, + issn = {0090-5364,2168-8966}, + doi = {10.1214/21-aos2052}, + url = {https://doi.org/10.1214/21-aos2052}, + fjournal = {The Annals of Statistics}, + mrclass = {62H12 (62R01)}, + mrnumber = {4338381}, + mrreviewer = {Burcu\ H.\ Ucer} } @article{DrydenEtAl2009, diff --git a/LaTeX/paper.tex b/LaTeX/paper.tex index 8919c38..8e80308 100644 --- a/LaTeX/paper.tex +++ b/LaTeX/paper.tex @@ -5,13 +5,9 @@ \usepackage[utf8]{inputenc} \usepackage[LSF, T1]{fontenc} \usepackage{chessfss} -% \usepackage{fullpage} \usepackage{amsmath, amssymb, amstext, amsthm, scalerel, bm, pifont} \usepackage[dvipsnames]{xcolor} % dvipsnames loads more named colors \usepackage{graphicx} % colors, images and graphics -\usepackage{tikz} % TikZ (TeX ist kein Zeichenprogramm) -\usepackage{environ} % for dynamic TikZ picture scaling -\usepackage{algorithm, algpseudocode} % Pseudo Codes / Algorithms \usepackage[ style = authoryear, % citation style isbn = false, % show isbn? @@ -32,13 +28,9 @@ doi = true, bibencoding = utf8 ]{biblatex} - - \usepackage[pdftex, colorlinks, allcolors=blue]{hyperref} % Load as last package! Redefines commands \usepackage[noabbrev, capitalize, nameinlink]{cleveref} % but this after hyperref -\usetikzlibrary{calc, perspective, datavisualization} - \setcounter{MaxMatrixCols}{20} % Sets the max nr. of columns in AMSmath's matrix envs to 20 % Document meta into @@ -119,21 +111,21 @@ % \newcommand*{\ten}[1]{\mathcal{#1}} % \DeclareMathOperator{\sym}{sym} \renewcommand*{\vec}{\operatorname{vec}} -\newcommand*{\unvec}{\operatorname{vec^{-1}}} -\newcommand*{\reshape}[1]{\operatorname{reshape}_{#1}} +% \newcommand*{\unvec}{\operatorname{vec^{-1}}} +% \newcommand*{\reshape}[1]{\operatorname{reshape}_{#1}} \newcommand*{\vech}{\operatorname{vech}} \newcommand*{\rank}{\operatorname{rank}} \newcommand*{\diag}{\operatorname{diag}} -\newcommand*{\perm}[1]{\mathfrak{S}_{#1}} % set of permutations of size #1 -\newcommand*{\len}[1]{\#{#1}} % length of #1 -\DeclareMathOperator*{\ttt}{\circledast} +% \newcommand*{\perm}[1]{\mathfrak{S}_{#1}} % set of permutations of size #1 +% \newcommand*{\len}[1]{\#{#1}} % length of #1 +% \DeclareMathOperator*{\ttt}{\circledast} \DeclareMathOperator{\tr}{tr} \DeclareMathOperator{\var}{Var} \DeclareMathOperator{\cov}{Cov} \DeclareMathOperator{\Span}{span} \DeclareMathOperator{\E}{\operatorname{\mathbb{E}}} % \DeclareMathOperator{\independent}{{\bot\!\!\!\bot}} -\DeclareMathOperator*{\argmin}{{arg\,min}} +% \DeclareMathOperator*{\argmin}{{arg\,min}} \DeclareMathOperator*{\argmax}{{arg\,max}} \newcommand*{\D}{\textnormal{D}} % derivative \renewcommand*{\H}{\textnormal{H}} % hessian @@ -146,21 +138,21 @@ \renewcommand{\checkmark}{{\color{Green}\ding{51}}} \newcommand{\xmark}{{\color{Red!70}\ding{55}}} -% Pseudo Code Commands -\newcommand{\algorithmicbreak}{\textbf{break}} -\newcommand{\Break}{\State \algorithmicbreak} +% % Pseudo Code Commands +% \newcommand{\algorithmicbreak}{\textbf{break}} +% \newcommand{\Break}{\State \algorithmicbreak} % Special Matrix Sets (Manifolds) -\newcommand{\MatMani}[2]{\mathbb{R}^{{#1}\times {#2}}} +% \newcommand{\MatMani}[2]{\mathbb{R}^{{#1}\times {#2}}} \newcommand{\StiefelNonCompact}[2]{\mathbb{R}_{*}^{{#1}\times {#2}}} \newcommand{\Stiefel}[2]{\mathrm{St}^{{#1}\times {#2}}} -\newcommand{\MatRankMani}[3]{\mathbb{R}_{\rank={#1}}^{{#2}\times {#3}}} -\newcommand{\DiagZeroMat}[1]{\mathbb{R}_{\diag=0}^{{#1}\times {#1}}} +% \newcommand{\MatRankMani}[3]{\mathbb{R}_{\rank={#1}}^{{#2}\times {#3}}} +% \newcommand{\DiagZeroMat}[1]{\mathbb{R}_{\diag=0}^{{#1}\times {#1}}} \newcommand{\SymMat}[1]{\mathrm{Sym}^{{#1}\times {#1}}} -\newcommand{\SkewSymMat}[1]{\mathrm{Skew}^{{#1}\times {#1}}} +% \newcommand{\SkewSymMat}[1]{\mathrm{Skew}^{{#1}\times {#1}}} \newcommand{\SymPosDefMat}[1]{\mathrm{Sym}_{++}^{{#1}\times {#1}}} -\newcommand{\SymDiagZeroMat}[1]{\mathrm{Sym}_{\diag=0}^{p\times p}} -\newcommand{\SymBand}[2]{\mathrm{SymBand}^{{#1}\times {#1}}_{#2}} +% \newcommand{\SymDiagZeroMat}[1]{\mathrm{Sym}_{\diag=0}^{p\times p}} +% \newcommand{\SymBand}[2]{\mathrm{SymBand}^{{#1}\times {#1}}_{#2}} \newcommand{\OrthogonalGrp}[1]{\mathrm{O}(#1)} \newcommand{\SpecialOrthogonalGrp}[1]{\mathrm{SO}(#1)} @@ -233,23 +225,6 @@ \makeatother -%%% Scaling TikZ pictures using the `environ' package -% see: https://tex.stackexchange.com/questions/6388/how-to-scale-a-tikzpicture-to-textwidth -\makeatletter -\newsavebox{\measure@tikzpicture} -\NewEnviron{scaletikzpicturetowidth}[1]{% - \def\tikz@width{#1}% - \def\tikzscale{1}\begin{lrbox}{\measure@tikzpicture}% - \BODY - \end{lrbox}% - \pgfmathparse{#1/\wd\measure@tikzpicture}% - \edef\tikzscale{\pgfmathresult}% - \BODY -} -\makeatother - - - \newcommand{\smoothFunc}[2][\infty]{{C^{#2}(#1)}} \newcommand{\localSmoothFunc}[3][\infty]{{C^{#3}_{#1}(#2)}} \newcommand{\tangentSpace}[2]{\ensuremath{T_{#1}{#2}}} @@ -450,9 +425,7 @@ The reduction in vectorized form is $\vec\ten{R}(\ten{X})=\t{\mat{B}}\vec(\ten{X \begin{figure} \centering - \begin{scaletikzpicturetowidth}{0.5\textwidth} - \input{images/reduction.tex} - \end{scaletikzpicturetowidth} + \includegraphics{images/reduction.pdf} \caption{\label{fig:SDRvisual}Visual depiction of the sufficient reduction in \cref{thm:sdr}.} \end{figure}