From 7a68948d26f6137b829d7271abed3b1228ee95d9 Mon Sep 17 00:00:00 2001 From: daniel Date: Thu, 11 Apr 2024 16:08:00 +0200 Subject: [PATCH] effie --- LaTeX/main.bib | 2069 ++++++++++++++++++++++++----------------------- LaTeX/paper.tex | 281 ++++--- 2 files changed, 1196 insertions(+), 1154 deletions(-) diff --git a/LaTeX/main.bib b/LaTeX/main.bib index feb3fa7..fc88e7c 100644 --- a/LaTeX/main.bib +++ b/LaTeX/main.bib @@ -1,2321 +1,2350 @@ +@misc{MukherjeeEtAl2020, + title={Estimation in Tensor Ising Models}, + author={Somabha Mukherjee and Jaesung Son and Bhaswar B. Bhattacharya}, + year={2020}, + eprint={2008.12882}, + archivePrefix={arXiv}, + primaryClass={math.ST} +} + +@misc{LiuEtAl2023, + title={Tensor Recovery in High-Dimensional Ising Models}, + author={Tianyu Liu and Somabha Mukherjee and Rahul Biswas}, + year={2023}, + eprint={2304.00530}, + archivePrefix={arXiv}, + primaryClass={math.ST} +} + @book{AbadirMagnus2005, - title = {Matrix Algebra}, author = {Abadir, Karim M. and Magnus, Jan R.}, - year = {2005}, - publisher = {Cambridge University Press}, collection = {Econometric Exercises}, doi = {10.1017/CBO9780511810800}, place = {Cambridge}, - series = {Econometric Exercises} + publisher = {Cambridge University Press}, + series = {Econometric Exercises}, + title = {Matrix Algebra}, + year = {2005} } @book{AbsilEtAl2007, - title = {{Optimization Algorithms on Matrix Manifolds}}, author = {Absil, P.-A. and Mahony, R. and Sepulchre, R.}, - year = {2008}, - pages = {xvi+224}, - publisher = {Princeton University Press, Princeton, NJ}, doi = {10.1515/9781400830244}, isbn = {978-0-691-13298-3}, mrclass = {90-02 (58E17 90C30 90C52)}, mrnumber = {2364186}, note = {Full Online Text \url{https://press.princeton.edu/absil}}, - url = {https://doi.org/10.1515/9781400830244} + pages = {xvi+224}, + publisher = {Princeton University Press, Princeton, NJ}, + title = {{Optimization Algorithms on Matrix Manifolds}}, + url = {https://doi.org/10.1515/9781400830244}, + year = {2008} } @article{AdragniCook2009, - title = {Sufficient dimension reduction and prediction in regression}, author = {Adragni, Kofi P. and Cook, R. Dennis}, - year = {2009}, - journal = {Philos. Trans. R. Soc. Lond. Ser. A Math. Phys. Eng. Sci.}, - fjournal = {Philosophical Transactions of the Royal Society of London. Series A. Mathematical, Physical and Engineering Sciences}, - volume = {367}, - number = {1906}, - pages = {4385--4405}, doi = {10.1098/rsta.2009.0110}, + fjournal = {Philosophical Transactions of the Royal Society of London. Series A. Mathematical, Physical and Engineering Sciences}, issn = {1364-503X,1471-2962}, + journal = {Philos. Trans. R. Soc. Lond. Ser. A Math. Phys. Eng. Sci.}, mrclass = {62J02 (62H25)}, mrnumber = {2546393}, - url = {https://doi.org/10.1098/rsta.2009.0110} + number = {1906}, + pages = {4385--4405}, + title = {Sufficient dimension reduction and prediction in regression}, + url = {https://doi.org/10.1098/rsta.2009.0110}, + volume = {367}, + year = {2009} } @book{Anderson2003, - title = {An Introduction to Multivariate Statistical Analysis}, - author = {T. W. Anderson}, - year = {2003}, - publisher = {Wiley}, address = {New York, NY}, - edition = {third} + author = {T. W. Anderson}, + edition = {third}, + publisher = {Wiley}, + title = {An Introduction to Multivariate Statistical Analysis}, + year = {2003} } @book{Arnold1981, - title = {The theory of linear models and multivariate analysis}, - author = {Arnold, Steven F}, - year = {1981}, - publisher = {Wiley}, address = {New York, NY [u.a.]}, + author = {Arnold, Steven F}, isbn = {0471050652}, keywords = {Multivariate Analyse}, language = {eng}, - series = {Wiley series in probability and mathematical statistics : Probability and mathematical statistics} + publisher = {Wiley}, + series = {Wiley series in probability and mathematical statistics : Probability and mathematical statistics}, + title = {The theory of linear models and multivariate analysis}, + year = {1981} } @article{BanerjeeEtAl2008, - title = {Model Selection Through Sparse Maximum Likelihood Estimation for Multivariate Gaussian or Binary Data}, author = {Onureena Banerjee and Laurent El Ghaoui and Alexandre d'Aspremont}, - year = {2008}, journal = {Journal of Machine Learning Research}, - volume = {9}, number = {15}, pages = {485-516}, - url = {http://jmlr.org/papers/v9/banerjee08a.html} + title = {Model Selection Through Sparse Maximum Likelihood Estimation for Multivariate Gaussian or Binary Data}, + url = {http://jmlr.org/papers/v9/banerjee08a.html}, + volume = {9}, + year = {2008} } @article{BasserPajevic2000, - title = {Statistical artifacts in diffusion tensor MRI (DT-MRI) caused by background noise}, author = {Basser, Peter J. and Pajevic, Sinisa}, - year = {2000}, + doi = {10.1002/1522-2594(200007)44:1<41::AID-MRM8>3.0.CO;2-O}, journal = {Magnetic Resonance in Medicine}, - volume = {44}, number = {1}, pages = {41-50}, - doi = {10.1002/1522-2594(200007)44:1<41::AID-MRM8>3.0.CO;2-O}, - url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/1522-2594%28200007%2944%3A1%3C41%3A%3AAID-MRM8%3E3.0.CO%3B2-O} + title = {Statistical artifacts in diffusion tensor MRI (DT-MRI) caused by background noise}, + url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/1522-2594%28200007%2944%3A1%3C41%3A%3AAID-MRM8%3E3.0.CO%3B2-O}, + volume = {44}, + year = {2000} } @article{BasserPajevic2003, - title = {A normal distribution for tensor-valued random variables: applications to diffusion tensor MRI}, author = {Basser, Peter J and Pajevic, Sinisa}, - year = {2003}, journal = {IEEE transactions on medical imaging}, - volume = {22}, number = {7}, pages = {785--794}, - publisher = {IEEE} + publisher = {IEEE}, + title = {A normal distribution for tensor-valued random variables: applications to diffusion tensor MRI}, + volume = {22}, + year = {2003} } @article{BasserPajevic2007, - title = {Spectral decomposition of a 4th-order covariance tensor: Applications to diffusion tensor MRI}, author = {Peter J. Basser and Sinisa Pajevic}, - year = {2007}, - journal = {Signal Processing}, - volume = {87}, - number = {2}, - pages = {220-236}, doi = {10.1016/j.sigpro.2006.02.050}, issn = {0165-1684}, + journal = {Signal Processing}, note = {Tensor Signal Processing}, - url = {https://www.sciencedirect.com/science/article/pii/S0165168406001678} + number = {2}, + pages = {220-236}, + title = {Spectral decomposition of a 4th-order covariance tensor: Applications to diffusion tensor MRI}, + url = {https://www.sciencedirect.com/science/article/pii/S0165168406001678}, + volume = {87}, + year = {2007} } @article{Besag1974, - title = {Spatial Interaction and the Statistical Analysis of Lattice Systems}, author = {Besag, Julian}, - year = {1974}, + doi = {10.1111/j.2517-6161.1974.tb00999.x}, journal = {Journal of the Royal Statistical Society: Series B (Methodological)}, - volume = {36}, number = {2}, pages = {192-225}, - doi = {10.1111/j.2517-6161.1974.tb00999.x}, - url = {https://rss.onlinelibrary.wiley.com/doi/abs/10.1111/j.2517-6161.1974.tb00999.x} + title = {Spatial Interaction and the Statistical Analysis of Lattice Systems}, + url = {https://rss.onlinelibrary.wiley.com/doi/abs/10.1111/j.2517-6161.1974.tb00999.x}, + volume = {36}, + year = {1974} } @incollection{Bottou1998, - title = {Online Algorithms and Stochastic Approximations}, - author = {Bottou, L\'{e}on}, - year = {1998}, - publisher = {Cambridge University Press}, address = {Cambridge, UK}, + author = {Bottou, L\'{e}on}, booktitle = {Online Learning and Neural Networks}, editor = {Saad, David}, - note = {\url{http://leon.bottou.org/papers/bottou-98x} revised, Oct 2012} + note = {\url{http://leon.bottou.org/papers/bottou-98x} revised, Oct 2012}, + publisher = {Cambridge University Press}, + title = {Online Algorithms and Stochastic Approximations}, + year = {1998} } @article{Brenner2018, - title = {Serially measured pre-diagnostic levels of serum cytokines and risk of brain cancer in active component military personnel}, author = {Brenner, A.V. and Inskip, P.D. and Rusiecki, J. and Rabkin, C.S. and Engels, J. and Pfeiffer, R.M.}, - year = {2018}, + document_type = {Article}, + doi = {10.1038/s41416-018-0272-x}, journal = {British Journal of Cancer}, - volume = {119}, number = {7}, pages = {893-900}, - document_type = {Article}, - doi = {10.1038/s41416-018-0272-x} + title = {Serially measured pre-diagnostic levels of serum cytokines and risk of brain cancer in active component military personnel}, + volume = {119}, + year = {2018} } @article{BrownEtAl2001, - title = {Bayesian discrimination with longitudinal data}, author = {Brown, P. J. and Kenward, M. G. and Bassett, E. E.}, - year = {2001}, - journal = {Biostatistics}, - volume = {2}, - number = {4}, - pages = {417-432}, doi = {10.1093/biostatistics/2.4.417}, issn = {1465-4644}, - month = {12} + journal = {Biostatistics}, + month = {12}, + number = {4}, + pages = {417-432}, + title = {Bayesian discrimination with longitudinal data}, + volume = {2}, + year = {2001} } @article{BuraCook2001a, - title = {Extending Sliced Inverse Regression}, author = {Efstathia Bura and R. Dennis Cook}, - year = {2001}, + doi = {10.1198/016214501753208979}, journal = {Journal of the American Statistical Association}, - volume = {96}, number = {455}, pages = {996-1003}, publisher = {Taylor & Francis}, - doi = {10.1198/016214501753208979} + title = {Extending Sliced Inverse Regression}, + volume = {96}, + year = {2001} } @article{BuraCook2001b, - title = {Estimating the structural dimension of regressions via parametric inverse regression}, author = {Bura, Efstathia and Cook, R. Dennis}, - year = {2001}, - journal = {J. R. Stat. Soc. Ser. B Stat. Methodol.}, fjournal = {Journal of the Royal Statistical Society. Series B: Statistical Methodology}, - volume = {63}, + issn = {1369-7412}, + journal = {J. R. Stat. Soc. Ser. B Stat. Methodol.}, number = {2}, pages = {393--410}, publisher = {Wiley-Blackwell}, - issn = {1369-7412} + title = {Estimating the structural dimension of regressions via parametric inverse regression}, + volume = {63}, + year = {2001} } @article{BuraDuarteForzani2016, - title = {Sufficient Reductions in Regressions With Exponential Family Inverse Predictors}, author = {Efstathia Bura and Sabrina Duarte and Liliana Forzani}, - year = {2016}, - journal = {J. Amer. Statist. Assoc.}, + doi = {10.1080/01621459.2015.1093944}, fjournal = {Journal of the American Statistical Association}, - volume = {111}, + journal = {J. Amer. Statist. Assoc.}, number = {515}, pages = {1313--1329}, publisher = {Taylor \& Francis}, - doi = {10.1080/01621459.2015.1093944} + title = {Sufficient Reductions in Regressions With Exponential Family Inverse Predictors}, + volume = {111}, + year = {2016} } @article{BuraEtAl2018, - title = {Asymptotic theory for maximum likelihood estimates in reduced-rank multivariate generalized linear models}, author = {Bura, Efstathia and Duarte, Sabrina and Forzani, Liliana and E. Smucler and M. Sued}, - year = {2018}, - journal = {Statistics}, + doi = {10.1080/02331888.2018.1467420}, fjournal = {Statistics. A Journal of Theoretical and Applied Statistics}, - volume = {52}, + journal = {Statistics}, number = {5}, pages = {1005--1024}, publisher = {Taylor \& Francis}, - doi = {10.1080/02331888.2018.1467420} + title = {Asymptotic theory for maximum likelihood estimates in reduced-rank multivariate generalized linear models}, + volume = {52}, + year = {2018} } @article{BuraEtAl2022, - title = {Sufficient reductions in regression with mixed predictors}, author = {Bura, Efstathia and Forzani, Liliana and Garc\'{i}a Arancibia, Rodrigo and Llop, Pamela and Tomassi, Diego}, - year = {2022}, - journal = {J. Mach. Learn. Res.}, fjournal = {Journal of Machine Learning Research (JMLR)}, - volume = {23}, - number = {102}, - pages = {1--47}, issn = {1532-4435,1533-7928}, + journal = {J. Mach. Learn. Res.}, mrclass = {62H12 (62H22 62H25 62J02 65C20)}, mrnumber = {4576687}, - url = {http://jmlr.org/papers/v23/21-0175.html} + number = {102}, + pages = {1--47}, + title = {Sufficient reductions in regression with mixed predictors}, + url = {http://jmlr.org/papers/v23/21-0175.html}, + volume = {23}, + year = {2022} } @article{BuraForzani2015, - title = {{Sufficient Reductions in Regressions With Elliptically Contoured Inverse Predictors}}, author = {Bura, Efstathia and Forzani, Liliana}, - year = {2015}, - journal = {J. Amer. Statist. Assoc.}, + doi = {10.1080/01621459.2014.914440}, fjournal = {Journal of the American Statistical Association}, - volume = {110}, + issn = {0162-1459,1537-274X}, + journal = {J. Amer. Statist. Assoc.}, + mrclass = {62J02 (62J05 62P10)}, number = {509}, pages = {420--434}, publisher = {Taylor \& Francis}, - doi = {10.1080/01621459.2014.914440}, - issn = {0162-1459,1537-274X}, - mrclass = {62J02 (62J05 62P10)} + title = {{Sufficient Reductions in Regressions With Elliptically Contoured Inverse Predictors}}, + volume = {110}, + year = {2015} } @article{Burdick1995, - title = {An introduction to tensor products with applications to multiway data analysis}, author = {Donald S. Burdick}, - year = {1995}, - journal = {Chemometrics and Intelligent Laboratory Systems}, - volume = {28}, - number = {2}, - pages = {229-237}, doi = {10.1016/0169-7439(95)80060-M}, issn = {0169-7439}, - url = {https://www.sciencedirect.com/science/article/pii/016974399580060M} + journal = {Chemometrics and Intelligent Laboratory Systems}, + number = {2}, + pages = {229-237}, + title = {An introduction to tensor products with applications to multiway data analysis}, + url = {https://www.sciencedirect.com/science/article/pii/016974399580060M}, + volume = {28}, + year = {1995} } @article{Burges2010, - title = {Dimension Reduction: A Guided Tour}, author = {Christopher J. C. Burges}, - year = {2010}, + doi = {10.1561/2200000002}, + issn = {1935-8237}, journal = {Foundations and Trends® in Machine Learning}, - volume = {2}, number = {4}, pages = {275-365}, - doi = {10.1561/2200000002}, - issn = {1935-8237} + title = {Dimension Reduction: A Guided Tour}, + volume = {2}, + year = {2010} } @article{CandesEtAl2008, - title = {Enhancing sparsity by reweighted {$l_1$} minimization}, author = {Cand\`es, Emmanuel J. and Wakin, Michael B. and Boyd, Stephen P.}, - year = {2008}, - journal = {J. Fourier Anal. Appl.}, - fjournal = {The Journal of Fourier Analysis and Applications}, - volume = {14}, - number = {5-6}, - pages = {877--905}, doi = {10.1007/s00041-008-9045-x}, + fjournal = {The Journal of Fourier Analysis and Applications}, issn = {1069-5869,1531-5851}, + journal = {J. Fourier Anal. Appl.}, mrclass = {90C25 (90C27 94A12)}, mrnumber = {2461611}, - url = {https://doi.org/10.1007/s00041-008-9045-x} + number = {5-6}, + pages = {877--905}, + title = {Enhancing sparsity by reweighted {$l_1$} minimization}, + url = {https://doi.org/10.1007/s00041-008-9045-x}, + volume = {14}, + year = {2008} } @article{CarrollLi1995, - title = {Binary regressors in dimension reduction models: a new look at treatment comparisons}, author = {Carroll, Raymond James and Li, Ker-Chau}, - year = {1995}, - journal = {Statist. Sinica}, fjournal = {Statistica Sinica}, - volume = {5}, - number = {2}, - pages = {667--688}, issn = {1017-0405,1996-8507}, + journal = {Statist. Sinica}, mrclass = {62J02 (62G05)}, mrnumber = {1347614}, - url = {https://api.semanticscholar.org/CorpusID:1648354} + number = {2}, + pages = {667--688}, + title = {Binary regressors in dimension reduction models: a new look at treatment comparisons}, + url = {https://api.semanticscholar.org/CorpusID:1648354}, + volume = {5}, + year = {1995} } @book{CasellaBerger2002, - title = {{Statistical Inference}}, author = {Casella, George and Berger, Roger L.}, - year = {2002}, - publisher = {Thomson Learning}, edition = {2}, isbn = {0-534-24312-6}, - series = {Duxbury Advanced Series} + publisher = {Thomson Learning}, + series = {Duxbury Advanced Series}, + title = {{Statistical Inference}}, + year = {2002} } @article{ChakrabortyEtAl2022, - title = {Ordered conditional approximation of Potts models}, author = {Anirban Chakraborty and Matthias Katzfuss and Joseph Guinness}, - year = {2022}, - journal = {Spatial Statistics}, - volume = {52}, - pages = {100708}, doi = {10.1016/j.spasta.2022.100708}, issn = {2211--6753}, - url = {https://www.sciencedirect.com/science/article/pii/S2211675322000690} + journal = {Spatial Statistics}, + pages = {100708}, + title = {Ordered conditional approximation of Potts models}, + url = {https://www.sciencedirect.com/science/article/pii/S2211675322000690}, + volume = {52}, + year = {2022} } @article{ChenEtAl2021, - title = {Tensor Canonical Correlation Analysis With Convergence and Statistical Guarantees}, author = {Chen, You-Lin and Kolar, Mladen and Tsay, Ruey S.}, - year = {2021}, + doi = {10.1080/10618600.2020.1856118}, journal = {Journal of Computational and Graphical Statistics}, - volume = {30}, number = {3}, pages = {728--744}, - doi = {10.1080/10618600.2020.1856118} + title = {Tensor Canonical Correlation Analysis With Convergence and Statistical Guarantees}, + volume = {30}, + year = {2021} } @article{ChengEtAl2014, - title = {A sparse Ising model with covariates}, author = {Cheng, Jie and Levina, Elizaveta and Wang, Pei and Zhu, Ji}, - year = {2014}, + doi = {10.1111/biom.12202}, journal = {Biometrics}, - volume = {70}, number = {4}, pages = {943-953}, - doi = {10.1111/biom.12202} + title = {A sparse Ising model with covariates}, + volume = {70}, + year = {2014} } @article{ChenZouCook2010, - title = {Coordinate-independent sparse sufficient dimension reduction and variable selection}, author = {Chen, Xin and Zou, Changliang and Cook, R. Dennis}, - year = {2010}, - journal = {Ann. Statist.}, + doi = {10.1214/10-AOS826}, fjournal = {The Annals of Statistics}, - volume = {38}, + journal = {Ann. Statist.}, + month = {12}, number = {6}, pages = {3696--3723}, publisher = {The Institute of Mathematical Statistics}, - doi = {10.1214/10-AOS826}, - month = {12}, - url = {https://doi.org/10.1214/10-AOS826} + title = {Coordinate-independent sparse sufficient dimension reduction and variable selection}, + url = {https://doi.org/10.1214/10-AOS826}, + volume = {38}, + year = {2010} } @article{ChiaroCookLi2002, - title = {Sufficient dimension reduction in regressions with categorical predictors}, author = {Chiaromonte, F. and Cook, R. Dennis and Li, B.}, - year = {2002}, - journal = {Ann. Statist.}, fjournal = {The Annals of Statistics}, - volume = {30}, + issue = {2}, + journal = {Ann. Statist.}, pages = {475-497}, publisher = {The Institute of Mathematical Statistics}, - issue = {2} + title = {Sufficient dimension reduction in regressions with categorical predictors}, + volume = {30}, + year = {2002} } @article{ChiaromonteCook2002, - title = {Sufficient dimension reduction and graphics in regression}, author = {Chiaromonte, Francesca and Cook, R. Dennis}, - year = {2002}, - journal = {Ann. Inst. Statist. Math.}, - fjournal = {Annals of the Institute of Statistical Mathematics}, - volume = {54}, - number = {4}, - pages = {768--795}, doi = {10.1023/A:1022411301790}, + fjournal = {Annals of the Institute of Statistical Mathematics}, issn = {0020-3157,1572-9052}, + journal = {Ann. Inst. Statist. Math.}, mrclass = {62J05 (62H99)}, mrnumber = {1954046}, - url = {https://doi.org/10.1023/A:1022411301790} + number = {4}, + pages = {768--795}, + title = {Sufficient dimension reduction and graphics in regression}, + url = {https://doi.org/10.1023/A:1022411301790}, + volume = {54}, + year = {2002} } @article{ClevelandDevlin1988, - title = {{Locally Weighted Regression: An Approach to Regression Analysis by Local Fitting}}, author = {William S. Cleveland and Susan J. Devlin}, - year = {1988}, - journal = {J. Amer. Statist. Assoc.}, + doi = {10.1080/01621459.1988.10478639}, fjournal = {Journal of the American Statistical Association}, - volume = {83}, + journal = {J. Amer. Statist. Assoc.}, number = {403}, pages = {596--610}, publisher = {Taylor \& Francis}, - doi = {10.1080/01621459.1988.10478639} + title = {{Locally Weighted Regression: An Approach to Regression Analysis by Local Fitting}}, + volume = {83}, + year = {1988} } @incollection{Comon2002, - title = {{Tensor Decompositions: State of the Art and Applications}}, author = {Comon, Pierre}, - year = {2002}, - publisher = {Oxford University Press}, booktitle = {{Mathematics in Signal Processing V}}, doi = {10.1093/oso/9780198507345.003.0001}, eprint = {https://academic.oup.com/book/0/chapter/422056726/chapter-pdf/52392862/isbn-9780198507345-book-part-1.pdf}, isbn = {9780198507345}, - month = {06} + month = {06}, + publisher = {Oxford University Press}, + title = {{Tensor Decompositions: State of the Art and Applications}}, + year = {2002} } @inproceedings{Comon2009, - title = {Tensors versus matrices usefulness and unexpected properties}, author = {Comon, Pierre}, - year = {2009}, - volume = {}, + booktitle = {2009 IEEE/SP 15th Workshop on Statistical Signal Processing}, + doi = {10.1109/SSP.2009.5278471}, number = {}, pages = {781-788}, - booktitle = {2009 IEEE/SP 15th Workshop on Statistical Signal Processing}, - doi = {10.1109/SSP.2009.5278471} + title = {Tensors versus matrices usefulness and unexpected properties}, + volume = {}, + year = {2009} } @book{Conway1997, - title = {A Course in Functional Analysis}, - author = {Conway, John B.}, - year = {1997}, - number = {96}, address = {New York}, + author = {Conway, John B.}, edition = {2nd ed}, isbn = {978-0-387-97245-9}, - series = {Graduate Texts in Mathematics} + number = {96}, + series = {Graduate Texts in Mathematics}, + title = {A Course in Functional Analysis}, + year = {1997} } @articla{Cook1994, - title = {Using dimension-reduction subspaces to identify important inputs in models of physical systems}, author = {Cook, Dennis R.}, - year = {1994}, journal = {Proc. Sect. Phys. Eng. Sci.}, - pages = {18--25} + pages = {18--25}, + title = {Using dimension-reduction subspaces to identify important inputs in models of physical systems}, + year = {1994} } @book{Cook1998, - title = {Regression Graphics: Ideas for studying regressions through graphics}, + address = {New York}, author = {Cook, Dennis R.}, - year = {1998}, publisher = {Wiley}, - address = {New York} + title = {Regression Graphics: Ideas for studying regressions through graphics}, + year = {1998} } @article{Cook2000, - title = {Save: a method for dimension reduction and graphics in regression}, author = {R. Dennis Cook}, - year = {2000}, + doi = {10.1080/03610920008832598}, journal = {Communications in Statistics - Theory and Methods}, - volume = {29}, number = {9-10}, pages = {2109-2121}, publisher = {Taylor \& Francis}, - doi = {10.1080/03610920008832598}, - url = {https://doi.org/10.1080/03610920008832598} + title = {Save: a method for dimension reduction and graphics in regression}, + url = {https://doi.org/10.1080/03610920008832598}, + volume = {29}, + year = {2000} } @article{Cook2007, - title = {{Fisher Lecture: Dimension Reduction in Regression}}, author = {Cook, R. Dennis}, - year = {2007}, + doi = {10.1214/088342306000000682}, journal = {Statistical Science}, - volume = {22}, + month = {02}, number = {1}, pages = {1--26}, publisher = {The Institute of Mathematical Statistics}, - doi = {10.1214/088342306000000682}, - month = {02} + title = {{Fisher Lecture: Dimension Reduction in Regression}}, + volume = {22}, + year = {2007} } @article{Cook2018, - title = {Principal Components, Sufficient Dimension Reduction, and Envelopes}, author = {Cook, R. Dennis}, - year = {2018}, + doi = {10.1146/annurev-statistics-031017-100257}, journal = {Annual Review of Statistics and Its Application}, - volume = {5}, number = {1}, pages = {533-559}, - doi = {10.1146/annurev-statistics-031017-100257} + title = {Principal Components, Sufficient Dimension Reduction, and Envelopes}, + volume = {5}, + year = {2018} } @article{CookForzani2008, - title = {Principal fitted components for dimension reduction in regression}, author = {Cook, R. D. and Forzani, L.}, - year = {2008}, journal = {Statistical Science}, - volume = {23}, number = {4}, - pages = {485-501} + pages = {485-501}, + title = {Principal fitted components for dimension reduction in regression}, + volume = {23}, + year = {2008} } @article{CookForzani2009, - title = {Likelihood-based sufficient dimension reduction}, author = {R. Dennis Cook and Liliana Forzani}, - year = {2009}, + doi = {10.1198/jasa.2009.0106}, + issn = {0162-1459}, journal = {Journal of the American Statistical Association}, - volume = {104}, + month = {3}, number = {485}, pages = {197--208}, publisher = {Taylor and Francis Ltd.}, - doi = {10.1198/jasa.2009.0106}, - issn = {0162-1459}, - month = {3} + title = {Likelihood-based sufficient dimension reduction}, + volume = {104}, + year = {2009} } @article{CookLi2002, - title = {Dimension reduction for conditional mean in regression}, author = {Cook, R.D. and Li, B.}, - year = {2002}, - journal = {The Annals of Statistics}, + doi = {10.1214/aos/1021379861}, fjournal = {The Annals of Statistics}, - volume = {30}, + journal = {The Annals of Statistics}, number = {2}, pages = {455--474}, publisher = {The Institute of Mathematical Statistics}, - doi = {10.1214/aos/1021379861} + title = {Dimension reduction for conditional mean in regression}, + volume = {30}, + year = {2002} } @article{CookLi2004, - title = {Determining the dimension of iterative {H}essian transformation}, author = {Cook, R. Dennis and Li, Bing}, - year = {2004}, - journal = {Ann. Statist.}, - fjournal = {The Annals of Statistics}, - volume = {32}, - number = {6}, - pages = {2501--2531}, doi = {10.1214/009053604000000661}, + fjournal = {The Annals of Statistics}, issn = {0090-5364,2168-8966}, + journal = {Ann. Statist.}, mrclass = {62G08 (62G09 62H05)}, mrnumber = {2153993}, - url = {https://doi.org/10.1214/009053604000000661} + number = {6}, + pages = {2501--2531}, + title = {Determining the dimension of iterative {H}essian transformation}, + url = {https://doi.org/10.1214/009053604000000661}, + volume = {32}, + year = {2004} } @article{CookLi2009, - title = {Dimension reduction in regressions with exponential family predictors}, author = {Cook, R. Dennis and Li, Lexin}, - year = {2009}, - journal = {J. Comput. Graph. Statist.}, - fjournal = {Journal of Computational and Graphical Statistics}, - volume = {18}, - number = {3}, - pages = {774--791}, doi = {10.1198/jcgs.2009.08005}, + fjournal = {Journal of Computational and Graphical Statistics}, issn = {1061-8600,1537-2715}, + journal = {J. Comput. Graph. Statist.}, mrclass = {62J05}, mrnumber = {2572637}, - url = {https://doi.org/10.1198/jcgs.2009.08005} -} + number = {3}, + pages = {774--791}, + title = {Dimension reduction in regressions with exponential family predictors}, + url = {https://doi.org/10.1198/jcgs.2009.08005}, + volume = {18}, + year = {2009} +} + +@article{CookLiChiaromonte2010, + author = {R. Dennis Cook and Bing Li and Francesca Chiaromonte}, + issn = {10170405, 19968507}, + journal = {Statistica Sinica}, + number = {3}, + pages = {927--960}, + publisher = {Institute of Statistical Science, Academia Sinica}, + title = {Envelope Models for Parsimonious and Efficient Multivariate Linear Regression}, + url = {http://www.jstor.org/stable/24309466}, + urldate = {2024-03-29}, + volume = {20}, + year = {2010} +} @article{CookNachtsheim1994, - title = {Reweighting to Achieve Elliptically Contoured Covariates in Regression}, author = {R. Dennis Cook and Christopher J. Nachtsheim}, - year = {1994}, + issn = {01621459}, journal = {Journal of the American Statistical Association}, - volume = {89}, number = {426}, pages = {592--599}, publisher = {[American Statistical Association, Taylor \& Francis, Ltd.]}, - issn = {01621459}, + title = {Reweighting to Achieve Elliptically Contoured Covariates in Regression}, url = {http://www.jstor.org/stable/2290862}, - urldate = {2024-01-18} -} + urldate = {2024-01-18}, + volume = {89}, + year = {1994} +} @article{CookWeisberg1991, - title = {Sliced Inverse Regression for Dimension Reduction: Comment}, author = {Cook, R. Dennis and Sanford Weisberg}, - year = {1991}, + issn = {01621459}, journal = {Journal of the American Statistical Association}, - volume = {86}, number = {414}, pages = {328-332}, publisher = {[American Statistical Association, Taylor & Francis, Ltd.]}, - issn = {01621459}, - url = {http://www.jstor.org/stable/2290564} + title = {Sliced Inverse Regression for Dimension Reduction: Comment}, + url = {http://www.jstor.org/stable/2290564}, + volume = {86}, + year = {1991} } @book{Dai2012, - title = {Multivariate {B}ernoulli distribution models}, author = {Dai, Bin}, - year = {2012}, - pages = {109}, - publisher = {ProQuest LLC, Ann Arbor, MI}, isbn = {978-1267-53750-8}, mrclass = {99-05}, mrnumber = {3078422}, note = {Thesis (Ph.D.)--The University of Wisconsin - Madison}, - url = {http://gateway.proquest.com/openurl?url_ver=Z39.88-2004&rft_val_fmt=info:ofi/fmt:kev:mtx:dissertation&res_dat=xri:pqm&rft_dat=xri:pqdiss:3522035} + pages = {109}, + publisher = {ProQuest LLC, Ann Arbor, MI}, + title = {Multivariate {B}ernoulli distribution models}, + url = {http://gateway.proquest.com/openurl?url_ver=Z39.88-2004&rft_val_fmt=info:ofi/fmt:kev:mtx:dissertation&res_dat=xri:pqm&rft_dat=xri:pqdiss:3522035}, + year = {2012} } @article{DaiDingWahba2013, - title = {Multivariate {B}ernoulli distribution}, author = {Dai, Bin and Ding, Shilin and Wahba, Grace}, - year = {2013}, - journal = {Bernoulli}, - fjournal = {Bernoulli. Official Journal of the Bernoulli Society for Mathematical Statistics and Probability}, - volume = {19}, - number = {4}, - pages = {1465--1483}, doi = {10.3150/12-BEJSP10}, + fjournal = {Bernoulli. Official Journal of the Bernoulli Society for Mathematical Statistics and Probability}, issn = {1350-7265,1573-9759}, + journal = {Bernoulli}, mrclass = {62E15 (60E05 62G05 62H10 62H12 62J12)}, mrnumber = {3102559}, - url = {https://doi.org/10.3150/12-BEJSP10} + number = {4}, + pages = {1465--1483}, + title = {Multivariate {B}ernoulli distribution}, + url = {https://doi.org/10.3150/12-BEJSP10}, + volume = {19}, + year = {2013} } @article{Dawid1981, - title = {Some Matrix-Variate Distribution Theory: Notational Considerations and a Bayesian Application}, author = {A. P. Dawid}, - year = {1981}, + issn = {00063444}, journal = {Biometrika}, - volume = {68}, number = {1}, pages = {265--274}, publisher = {[Oxford University Press, Biometrika Trust]}, - issn = {00063444}, + title = {Some Matrix-Variate Distribution Theory: Notational Considerations and a Bayesian Application}, url = {http://www.jstor.org/stable/2335827}, - urldate = {2024-01-12} + urldate = {2024-01-12}, + volume = {68}, + year = {1981} } @article{DeAlmeidaEtAl2007, - title = {PARAFAC-based unified tensor modeling for wireless communication systems with application to blind multiuser equalization}, author = {André L.F. {de Almeida} and Gérard Favier and João Cesar M. Mota}, - year = {2007}, - journal = {Signal Processing}, - volume = {87}, - number = {2}, - pages = {337-351}, doi = {https://doi.org/10.1016/j.sigpro.2005.12.014}, issn = {0165-1684}, + journal = {Signal Processing}, note = {Tensor Signal Processing}, - url = {https://www.sciencedirect.com/science/article/pii/S0165168406001757} + number = {2}, + pages = {337-351}, + title = {PARAFAC-based unified tensor modeling for wireless communication systems with application to blind multiuser equalization}, + url = {https://www.sciencedirect.com/science/article/pii/S0165168406001757}, + volume = {87}, + year = {2007} } @article{DeesMandic2019, - title = {A Statistically Identifiable Model for Tensor-Valued Gaussian Random Variables}, author = {Bruno Scalzo Dees and Danilo P. Mandic}, - year = {2019}, journal = {ArXiv}, + title = {A Statistically Identifiable Model for Tensor-Valued Gaussian Random Variables}, + url = {https://api.semanticscholar.org/CorpusID:207847615}, volume = {abs/1911.02915}, - url = {https://api.semanticscholar.org/CorpusID:207847615} + year = {2019} } @article{DeLathauwerCastaing2007, - title = {Tensor-based techniques for the blind separation of DS-CDMA signals}, author = {Lieven {De Lathauwer} and Joséphine Castaing}, - year = {2007}, - journal = {Signal Processing}, - volume = {87}, - number = {2}, - pages = {322-336}, doi = {10.1016/j.sigpro.2005.12.015}, issn = {0165-1684}, + journal = {Signal Processing}, note = {Tensor Signal Processing}, - url = {https://www.sciencedirect.com/science/article/pii/S0165168406001745} + number = {2}, + pages = {322-336}, + title = {Tensor-based techniques for the blind separation of DS-CDMA signals}, + url = {https://www.sciencedirect.com/science/article/pii/S0165168406001745}, + volume = {87}, + year = {2007} } @article{deLeeuwMichailidis2000, - title = {Discussion article on the paper by Lange, Hunter \& Yang (2000)}, author = {J. de Leeuw and G. Michailidis}, - year = {2000}, journal = {Journal of Computational and Graphical Statistics}, + pages = {26-31}, + title = {Discussion article on the paper by Lange, Hunter \& Yang (2000)}, volume = {9}, - pages = {26-31} + year = {2000} } @article{delPino1989, - title = {The unifying role of iterative generalized least squares in statistical algorithms}, author = {G. del Pino}, - year = {1989}, journal = {Statistical Science}, + pages = {394-408}, + title = {The unifying role of iterative generalized least squares in statistical algorithms}, volume = {4}, - pages = {394-408} + year = {1989} } @article{DingCook2014, - title = {Dimension folding PCA and PFC for matrix-valued predictors}, author = {Shanshan Ding and Cook, R. Dennis}, - year = {2014}, + doi = {10.5705/ss.2012.138}, journal = {Statistica Sinica}, - volume = {24}, pages = {463--492}, publisher = {Institute of Statistical Science}, - doi = {10.5705/ss.2012.138} + title = {Dimension folding PCA and PFC for matrix-valued predictors}, + volume = {24}, + year = {2014} } @article{DingCook2015, - title = {Tensor sliced inverse regression}, author = {Shanshan Ding and R. Dennis Cook}, - year = {2015}, - journal = {Journal of Multivariate Analysis}, - volume = {133}, - pages = {216-231}, doi = {10.1016/j.jmva.2014.08.015}, - issn = {0047-259X} + issn = {0047-259X}, + journal = {Journal of Multivariate Analysis}, + pages = {216-231}, + title = {Tensor sliced inverse regression}, + volume = {133}, + year = {2015} } @article{DrtonEtAl2020, - title = {Existence and uniqueness of the Kronecker covariance MLE}, author = {Mathias Drton and Satoshi Kuriki and Peter D. Hoff}, - year = {2020}, journal = {The Annals of Statistics}, - url = {https://api.semanticscholar.org/CorpusID:212718000} + title = {Existence and uniqueness of the Kronecker covariance MLE}, + url = {https://api.semanticscholar.org/CorpusID:212718000}, + year = {2020} } @article{DrydenEtAl2009, - title = {{Non-Euclidean statistics for covariance matrices, with applications to diffusion tensor imaging}}, author = {Ian L. Dryden and Alexey Koloydenko and Diwei Zhou}, - year = {2009}, + doi = {10.1214/09-AOAS249}, journal = {The Annals of Applied Statistics}, - volume = {3}, number = {3}, pages = {1102 -- 1123}, publisher = {Institute of Mathematical Statistics}, - doi = {10.1214/09-AOAS249} + title = {{Non-Euclidean statistics for covariance matrices, with applications to diffusion tensor imaging}}, + volume = {3}, + year = {2009} } @misc{Dutilleul1990, - title = {Apport en analyse spectrale d'un p\'eriodogramme modifi\'e et mod\'elisation des s\'eries chronologiques avec r\'ep\'etitions en vue de leur comparaison en fr\'equence}, - author = {Pierre Dutilleul}, - year = {1990}, address = {Department of Mathematics. Universit\'e catholique de Louvian, Louvain-la-Neuve, Belgium}, - note = {Unpublished D.Sc. Dissertation} + author = {Pierre Dutilleul}, + note = {Unpublished D.Sc. Dissertation}, + title = {Apport en analyse spectrale d'un p\'eriodogramme modifi\'e et mod\'elisation des s\'eries chronologiques avec r\'ep\'etitions en vue de leur comparaison en fr\'equence}, + year = {1990} } @article{Dutilleul1999, - title = {The mle algorithm for the matrix normal distribution}, author = {Pierre Dutilleul}, - year = {1999}, + doi = {10.1080/00949659908811970}, journal = {Journal of Statistical Computation and Simulation}, - volume = {64}, number = {2}, pages = {105-123}, publisher = {Taylor & Francis}, - doi = {10.1080/00949659908811970} + title = {The mle algorithm for the matrix normal distribution}, + volume = {64}, + year = {1999} } @book{Eaton2007, - title = {Multivariate Statistics: A Vector Space Approach}, author = {Morris L. Eaton}, - year = {2007}, publisher = {Institute of Mathematical Statistics}, series = {Lecture Notes--Monograph Series, Volume 53}, - url = {https://projecteuclid.org/euclid.lnms/1196285102} + title = {Multivariate Statistics: A Vector Space Approach}, + url = {https://projecteuclid.org/euclid.lnms/1196285102}, + year = {2007} } @article{EdelmanEtAl1998, - title = {The Geometry of Algorithms with Orthogonality Constraints}, author = {Edelman, A. and Arias, T. and Smith, S.}, - year = {1998}, - journal = {SIAM Journal on Matrix Analysis and Applications}, - volume = {20}, - number = {2}, - pages = {303-353}, doi = {10.1137/S0895479895290954}, eprint = {https://doi.org/10.1137/S0895479895290954}, - url = {https://doi.org/10.1137/S0895479895290954} + journal = {SIAM Journal on Matrix Analysis and Applications}, + number = {2}, + pages = {303-353}, + title = {The Geometry of Algorithms with Orthogonality Constraints}, + url = {https://doi.org/10.1137/S0895479895290954}, + volume = {20}, + year = {1998} } @article{Einstein1916, - title = {Die Grundlage der allgemeinen Relativitätstheorie}, author = {Einstein, Albert}, - year = {1916}, + doi = {10.1002/andp.19163540702}, journal = {Annalen der Physik}, - volume = {354}, number = {7}, pages = {769-822}, - doi = {10.1002/andp.19163540702} + title = {Die Grundlage der allgemeinen Relativitätstheorie}, + volume = {354}, + year = {1916} } @article{Fan1993, - title = {Local Linear Regression Smoothers and Their Minimax Efficiencies}, author = {Jianqing Fan}, - year = {1993}, journal = {Annals of Statistics}, - volume = {21}, pages = {196-216}, - url = {https://api.semanticscholar.org/CorpusID:9375835} + title = {Local Linear Regression Smoothers and Their Minimax Efficiencies}, + url = {https://api.semanticscholar.org/CorpusID:9375835}, + volume = {21}, + year = {1993} } @article{FanGijbels1992, - title = {Variable Bandwidth and Local Linear Regression Smoothers}, author = {Jianqing Fan and Irene Gijbels}, - year = {1992}, + issn = {00905364}, journal = {The Annals of Statistics}, - volume = {20}, number = {4}, pages = {2008--2036}, publisher = {Institute of Mathematical Statistics}, - issn = {00905364}, + title = {Variable Bandwidth and Local Linear Regression Smoothers}, url = {http://www.jstor.org/stable/2242378}, - urldate = {2024-01-25} + urldate = {2024-01-25}, + volume = {20}, + year = {1992} } @article{FanLi2001, - title = {Variable Selection via Nonconcave Penalized Likelihood and its Oracle Properties}, author = {Jianqing Fan and Runze Li}, - year = {2001}, + doi = {10.1198/016214501753382273}, + eprint = {https://doi.org/10.1198/016214501753382273}, journal = {Journal of the American Statistical Association}, - volume = {96}, number = {456}, pages = {1348-1360}, publisher = {Taylor & Francis}, - doi = {10.1198/016214501753382273}, - eprint = {https://doi.org/10.1198/016214501753382273}, - url = {https://doi.org/10.1198/016214501753382273} + title = {Variable Selection via Nonconcave Penalized Likelihood and its Oracle Properties}, + url = {https://doi.org/10.1198/016214501753382273}, + volume = {96}, + year = {2001} } @article{FertlBura2022a, - title = {Conditional variance estimator for sufficient dimension reduction}, author = {Fertl, Lukas and Bura, Efstathia}, - year = {2022}, - journal = {Bernoulli}, - fjournal = {Bernoulli. Official Journal of the Bernoulli Society for Mathematical Statistics and Probability}, - volume = {28}, - number = {3}, - pages = {1862--1891}, doi = {10.3150/21-bej1402}, + fjournal = {Bernoulli. Official Journal of the Bernoulli Society for Mathematical Statistics and Probability}, issn = {1350-7265,1573-9759}, + journal = {Bernoulli}, mrclass = {62G08}, mrnumber = {4411514}, - url = {https://doi.org/10.3150/21-bej1402} + number = {3}, + pages = {1862--1891}, + title = {Conditional variance estimator for sufficient dimension reduction}, + url = {https://doi.org/10.3150/21-bej1402}, + volume = {28}, + year = {2022} } @article{FertlBura2022b, - title = {The ensemble conditional variance estimator for sufficient dimension reduction}, author = {Fertl, Lukas and Bura, Efstathia}, - year = {2022}, - journal = {Electron. J. Stat.}, - fjournal = {Electronic Journal of Statistics}, - volume = {16}, - number = {1}, - pages = {1595--1634}, doi = {10.1214/22-EJS1994}, + fjournal = {Electronic Journal of Statistics}, issn = {1935-7524}, + journal = {Electron. J. Stat.}, mrclass = {62G07 (62G08)}, mrnumber = {4390504}, - url = {https://doi.org/10.1214/22-EJS1994} + number = {1}, + pages = {1595--1634}, + title = {The ensemble conditional variance estimator for sufficient dimension reduction}, + url = {https://doi.org/10.1214/22-EJS1994}, + volume = {16}, + year = {2022} } @article{Fisher1922, - title = {On the Mathematical Foundations of Theoretical Statistics}, author = {R. A. Fisher}, - year = {1922}, + issn = {02643952}, journal = {Philosophical Transactions of the Royal Society of London. Series A, Containing Papers of a Mathematical or Physical Character}, - volume = {222}, number = {}, pages = {309--368}, publisher = {The Royal Society}, - issn = {02643952}, + title = {On the Mathematical Foundations of Theoretical Statistics}, url = {http://www.jstor.org/stable/91208}, - urldate = {2024-01-22} + urldate = {2024-01-22}, + volume = {222}, + year = {1922} } @misc{friedbergEtAl2020, - title = {Local {L}inear {F}orests}, author = {Friedberg, Rina and Tibshirani, Julie and Athey, Susan and Wager, Stefan}, - year = {2020}, howpublished = {arXiv:1807.11408 [cs, econ, math, stat]}, note = {\url{http://arxiv.org/abs/1807.11408}}, - urldate = {2021-03-08} + title = {Local {L}inear {F}orests}, + urldate = {2021-03-08}, + year = {2020} } @article{Friedman1991, - title = {Multivariate Adaptive Regression Splines}, author = {Jerome H. Friedman}, - year = {1991}, + issn = {00905364}, journal = {The Annals of Statistics}, - volume = {19}, number = {1}, pages = {1--67}, publisher = {Institute of Mathematical Statistics}, - issn = {00905364}, - url = {http://www.jstor.org/stable/2241837} + title = {Multivariate Adaptive Regression Splines}, + url = {http://www.jstor.org/stable/2241837}, + volume = {19}, + year = {1991} } @article{FukumizuEtAl2009, - title = {Kernel dimension reduction in regression}, author = {Fukumizu, Kenji and Bach, Francis R. and Jordan, Michael I.}, - year = {2009}, - journal = {Ann. Statist.}, + doi = {10.1214/08-AOS637}, fjournal = {The Annals of Statistics}, - volume = {37}, + journal = {Ann. Statist.}, + month = {08}, number = {4}, pages = {1871--1905}, publisher = {The Institute of Mathematical Statistics}, - doi = {10.1214/08-AOS637}, - month = {08} + title = {Kernel dimension reduction in regression}, + volume = {37}, + year = {2009} } @misc{GhojoghEtAl2021, - title = {Sufficient Dimension Reduction for High-Dimensional Regression and Low-Dimensional Embedding: Tutorial and Survey}, - author = {Benyamin Ghojogh and Ali Ghodsi and Fakhri Karray and Mark Crowley}, - year = {2021}, archiveprefix = {arXiv}, + author = {Benyamin Ghojogh and Ali Ghodsi and Fakhri Karray and Mark Crowley}, doi = {10.48550/arXiv.2110.09620}, eprint = {2110.09620}, - primaryclass = {stat.ME} + primaryclass = {stat.ME}, + title = {Sufficient Dimension Reduction for High-Dimensional Regression and Low-Dimensional Embedding: Tutorial and Survey}, + year = {2021} } @article{GirkaEtAl2024, - title = {Tensor generalized canonical correlation analysis}, author = {Fabien Girka and Arnaud Gloaguen and Laurent {Le Brusquet} and Violetta Zujovic and Arthur Tenenhaus}, - year = {2024}, - journal = {Information Fusion}, - volume = {102}, doi = {10.1016/j.inffus.2023.102045}, - issn = {1566-2535} + issn = {1566-2535}, + journal = {Information Fusion}, + title = {Tensor generalized canonical correlation analysis}, + volume = {102}, + year = {2024} } @article{GlobersonTishby2003, - title = {Sufficient Dimensionality Reduction}, author = {Amir Globerson and Naftali Tishby}, - year = {2003}, journal = {Journal of Machine Learning Research}, + title = {Sufficient Dimensionality Reduction}, + url = {https://api.semanticscholar.org/CorpusID:5095858}, volume = {3}, - url = {https://api.semanticscholar.org/CorpusID:5095858} + year = {2003} } @book{GolubVanLoanl996, - title = {Matrix Computations}, author = {Golub, Gene H. and Van Loan, Charles F.}, - year = {1996}, + edition = {Third}, publisher = {The Johns Hopkins University Press}, - edition = {Third} + title = {Matrix Computations}, + year = {1996} } @book{GoodfellowEtAl2016, - title = {Deep Learning}, author = {Ian Goodfellow and Yoshua Bengio and Aaron Courville}, - year = {2016}, publisher = {MIT Press}, - url = {\url{http://www.deeplearningbook.org}} + title = {Deep Learning}, + url = {\url{http://www.deeplearningbook.org}}, + year = {2016} } @article{Green1984, - title = {Iteratively Reweighted Least Squares for Maximum Likelihood Estimation, and some Robust and Resistant Alternatives}, author = {P. J. Green}, - year = {1984}, + issn = {00359246}, journal = {Journal of the Royal Statistical Society. Series B (Methodological)}, - volume = {46}, number = {2}, pages = {149--192}, publisher = {[Royal Statistical Society, Wiley]}, - issn = {00359246}, - url = {http://www.jstor.org/stable/2345503} + title = {Iteratively Reweighted Least Squares for Maximum Likelihood Estimation, and some Robust and Resistant Alternatives}, + url = {http://www.jstor.org/stable/2345503}, + volume = {46}, + year = {1984} } @article{GreenewaldHero2014, - title = {Robust Kronecker Product PCA for Spatio-Temporal Covariance Estimation}, author = {Kristjan H. Greenewald and Alfred O. Hero}, - year = {2014}, journal = {IEEE Transactions on Signal Processing}, - volume = {63}, pages = {6368-6378}, - url = {https://api.semanticscholar.org/CorpusID:15582097} + title = {Robust Kronecker Product PCA for Spatio-Temporal Covariance Estimation}, + url = {https://api.semanticscholar.org/CorpusID:15582097}, + volume = {63}, + year = {2014} } @book{Gurney1997, - title = {An Introduction to Neural Networks}, - author = {Gurney, Kevin}, - year = {1997}, - publisher = {Taylor \& Francis, Inc.}, address = {USA}, - isbn = {1857286731} + author = {Gurney, Kevin}, + isbn = {1857286731}, + publisher = {Taylor \& Francis, Inc.}, + title = {An Introduction to Neural Networks}, + year = {1997} } @misc{HajriEtAl2017, - title = {Maximum Likelihood Estimators on Manifolds}, author = {Hajri, Hatem and Said, Salem and Berthoumieu, Yannick}, - year = {2017}, + doi = {10.1007/978-3-319-68445-1_80}, journal = {Lecture Notes in Computer Science}, pages = {692-700}, publisher = {Springer International Publishing}, - doi = {10.1007/978-3-319-68445-1_80} + title = {Maximum Likelihood Estimators on Manifolds}, + year = {2017} } @article{HallLi1993, - title = {On almost Linearity of Low Dimensional Projections from High Dimensional Data}, author = {Hall, P. and Li, KC.}, - year = {1993}, journal = {Annals of Statistics}, - volume = {21}, + language = {English}, number = {2}, pages = { 867-889}, - language = {English} + title = {On almost Linearity of Low Dimensional Projections from High Dimensional Data}, + volume = {21}, + year = {1993} +} + +@article{HaoEtAl2021, + author = {Botao Hao and Boxiang Wang and Pengyuan Wang and Jingfei Zhang and Jian Yang and Will Wei Sun}, + journal = {Journal of Machine Learning Research}, + number = {64}, + pages = {1--43}, + title = {Sparse Tensor Additive Regression}, + url = {http://jmlr.org/papers/v22/19-769.html}, + volume = {22}, + year = {2021} } @book{Harville1997, - title = {Matrix Algebra From a Statistician's Perspective}, - author = {David A. Harville}, - year = {1997}, - publisher = {Springer-Verlag}, address = {New York}, + author = {David A. Harville}, chapter = {15}, - edition = {1} + edition = {1}, + publisher = {Springer-Verlag}, + title = {Matrix Algebra From a Statistician's Perspective}, + year = {1997} } @book{HastieTibshirani1990, - title = {Generalized additive models}, author = {Hastie, Trevor John and Tibshirani, Robert J.}, - year = {1990}, - volume = {43}, - pages = {xvi+335}, - publisher = {Chapman and Hall, Ltd., London}, isbn = {0-412-34390-8}, mrclass = {62J02 (62-07 62G05 62J20)}, mrnumber = {1082147}, - series = {Monographs on Statistics and Applied Probability} + pages = {xvi+335}, + publisher = {Chapman and Hall, Ltd., London}, + series = {Monographs on Statistics and Applied Probability}, + title = {Generalized additive models}, + volume = {43}, + year = {1990} } @article{HillarLim2013, - title = {Most Tensor Problems Are NP-Hard}, - author = {Hillar, Christopher J. and Lim, Lek\-Heng}, - year = {2013}, - journal = {J. ACM}, - volume = {60}, - number = {6}, - publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, articleno = {45}, + author = {Hillar, Christopher J. and Lim, Lek\-Heng}, doi = {10.1145/2512329}, issn = {0004-5411}, issue_date = {November 2013}, + journal = {J. ACM}, + number = {6}, numpages = {39}, - url = {https://doi.org/10.1145/2512329} + publisher = {Association for Computing Machinery}, + title = {Most Tensor Problems Are NP-Hard}, + url = {https://doi.org/10.1145/2512329}, + volume = {60}, + year = {2013} } @misc{Hinton2012, - title = {{Neural Networks for Machine Learning}}, author = {Hinton, Geoffrey E.}, - year = {2012}, note = {Coursera Lecture 6 - Online; accessed Jan 18, 2024}, - url = {www.cs.toronto.edu/~hinton/coursera/lecture6/lec6.pdf} + title = {{Neural Networks for Machine Learning}}, + url = {www.cs.toronto.edu/~hinton/coursera/lecture6/lec6.pdf}, + year = {2012} } @article{Hoff2011, - title = {{Separable covariance arrays via the Tucker product, with applications to multivariate relational data}}, author = {Peter D. Hoff}, - year = {2011}, + doi = {10.1214/11-BA606}, journal = {Bayesian Analysis}, - volume = {6}, + keywords = {Gaussian, matrix normal, multiway data, network, tensor, Tucker decomposition}, number = {2}, pages = {179 -- 196}, publisher = {International Society for Bayesian Analysis}, - doi = {10.1214/11-BA606}, - keywords = {Gaussian, matrix normal, multiway data, network, tensor, Tucker decomposition} + title = {{Separable covariance arrays via the Tucker product, with applications to multivariate relational data}}, + volume = {6}, + year = {2011} } @article{Hoff2015, - title = {{Multilinear tensor regression for longitudinal relational data}}, author = {Peter D. Hoff}, - year = {2015}, + doi = {10.1214/15-AOAS839}, journal = {The Annals of Applied Statistics}, - volume = {9}, + keywords = {Array normal, Bayesian inference, event data, international relations, network, Tucker product, vector autoregression}, number = {3}, pages = {1169 -- 1193}, publisher = {Institute of Mathematical Statistics}, - doi = {10.1214/15-AOAS839}, - keywords = {Array normal, Bayesian inference, event data, international relations, network, Tucker product, vector autoregression} + title = {{Multilinear tensor regression for longitudinal relational data}}, + volume = {9}, + year = {2015} } @article{Hornik1991, - title = {Approximation capabilities of multilayer feedforward networks}, author = {Hornik, Kurt}, - year = {1991}, + issn = {0893-6080}, journal = {Neural Networks}, - volume = {4}, + note = {\url{https://doi.org/10.1016/0893-6080(91)90009-T}}, number = {2}, pages = {251-257}, - issn = {0893-6080}, - note = {\url{https://doi.org/10.1016/0893-6080(91)90009-T}} + title = {Approximation capabilities of multilayer feedforward networks}, + volume = {4}, + year = {1991} } @article{HuLeeWang2022, - title = {Generalized Tensor Decomposition With Features on Multiple Modes}, author = {Hu, Jiaxin and Lee, Chanwoo and Wang, Miaoyan}, - year = {2022}, + doi = {10.1080/10618600.2021.1978471}, journal = {Journal of Computational and Graphical Statistics}, - volume = {31}, number = {1}, pages = {204-218}, publisher = {Taylor \& Francis}, - doi = {10.1080/10618600.2021.1978471} + title = {Generalized Tensor Decomposition With Features on Multiple Modes}, + volume = {31}, + year = {2022} } @article{Ising1925, - title = {{Beitrag zur Theorie des Ferromagnetismus}}, author = {Ising, Ernst}, - year = {1925}, - journal = {Zeitschrift f\"ur Physik}, - volume = {31}, - number = {1}, - pages = {253-258}, doi = {10.1007/BF02980577}, issn = {0044-3328}, - month = {2} + journal = {Zeitschrift f\"ur Physik}, + month = {2}, + number = {1}, + pages = {253-258}, + title = {{Beitrag zur Theorie des Ferromagnetismus}}, + volume = {31}, + year = {1925} } @book{JamesEtAl2021, - title = {An introduction to statistical learning---with applications in {R}}, author = {James, Gareth and Witten, Daniela and Hastie, Trevor and Tibshirani, Robert}, - year = {2021}, - pages = {xv+607}, - publisher = {Springer, New York}, doi = {10.1007/978-1-0716-1418-1}, edition = {Second}, isbn = {978-1-0716-1418-1}, mrclass = {62-01 (62-04 62H30 62Jxx 62M45 62N01)}, mrnumber = {4309209}, + pages = {xv+607}, + publisher = {Springer, New York}, series = {Springer Texts in Statistics}, - url = {https://doi.org/10.1007/978-1-0716-1418-1} + title = {An introduction to statistical learning---with applications in {R}}, + url = {https://doi.org/10.1007/978-1-0716-1418-1}, + year = {2021} } @incollection{JennyHaselmayerKapla2021, - title = {Measuring incivility in parliamentary debates : validating a sentiment analysis procedure with calls to order in the Austrian Parliament}, - author = {Jenny, Marcelo and Haselmayer, Martin and Kapla, Daniel}, - year = {2021}, - pages = {1--11}, - publisher = {Routledge}, address = {London}, + author = {Jenny, Marcelo and Haselmayer, Martin and Kapla, Daniel}, booktitle = {Political Incivility in the Parliamentary, Electoral and Media Arena : Crossing Boundaries}, editor = {Walter, Annemarie S.}, isbn = {978-0-367-46273-4}, - series = {Routledge studies on political parties and party systems} + pages = {1--11}, + publisher = {Routledge}, + series = {Routledge studies on political parties and party systems}, + title = {Measuring incivility in parliamentary debates : validating a sentiment analysis procedure with calls to order in the Austrian Parliament}, + year = {2021} } @book{JohnsonEtAl1997, - title = {{Discrete Multivariate Distributions}}, author = {Johnson, Norman L. and Kotz, Samuel and Balakrishnan, N.}, - year = {1997}, - pages = {xxii+299}, - publisher = {John Wiley \& Sons, Inc., New York}, isbn = {0-471-12844-9}, mrclass = {62E15 (60C05 60E05 62H05)}, mrnumber = {1429617}, note = {A Wiley-Interscience Publication}, - series = {Wiley Series in Probability and Statistics: Applied Probability and Statistics} + pages = {xxii+299}, + publisher = {John Wiley \& Sons, Inc., New York}, + series = {Wiley Series in Probability and Statistics: Applied Probability and Statistics}, + title = {{Discrete Multivariate Distributions}}, + year = {1997} } @article{Jolliffe1982, - title = {A Note on the Use of Principal Components in Regression}, author = {Ian T. Jolliffe}, - year = {1982}, + issn = {00359254, 14679876}, journal = {Journal of the Royal Statistical Society. Series C (Applied Statistics)}, - volume = {31}, number = {3}, pages = {300--303}, publisher = {[Wiley, Royal Statistical Society]}, - issn = {00359254, 14679876}, - url = {http://www.jstor.org/stable/2348005} + title = {A Note on the Use of Principal Components in Regression}, + url = {http://www.jstor.org/stable/2348005}, + volume = {31}, + year = {1982} } @article{JungEtAl2019, - title = {Penalized Orthogonal Iteration for Sparse Estimation of Generalized Eigenvalue Problem}, author = {Sungkyu Jung and Jeongyoun Ahn and Yongho Jeon}, - year = {2019}, + doi = {10.1080/10618600.2019.1568014}, journal = {Journal of Computational and Graphical Statistics}, - volume = {28}, number = {3}, pages = {710-721}, publisher = {Taylor & Francis}, - doi = {10.1080/10618600.2019.1568014} + title = {Penalized Orthogonal Iteration for Sparse Estimation of Generalized Eigenvalue Problem}, + volume = {28}, + year = {2019} } @book{Kaltenbaeck2021, - title = {Aufbau Analysis}, author = {Kaltenb\"ack, Michael}, - year = {2021}, - publisher = {Heldermann Verlag}, edition = {27}, isbn = {978-3-88538-127-3}, - series = {Berliner Studienreihe zur Mathematik} + publisher = {Heldermann Verlag}, + series = {Berliner Studienreihe zur Mathematik}, + title = {Aufbau Analysis}, + year = {2021} } @article{Kapla2019, - title = {Comparison of Different Word Embeddings and Neural Network Types for Sentiment Analysis of German Political Speeches}, author = {Kapla, Daniel}, + title = {Comparison of Different Word Embeddings and Neural Network Types for Sentiment Analysis of German Political Speeches}, year = {2019} } @article{KaplaFertlBura2022, - title = {Fusing sufficient dimension reduction with neural networks}, author = {Kapla, Daniel and Fertl, Lukas and Bura, Efstathia}, - year = {2022}, - journal = {Comput. Statist. Data Anal.}, - fjournal = {Computational Statistics \& Data Analysis}, - volume = {168}, - pages = {Paper No. 107390, 20}, doi = {10.1016/j.csda.2021.107390}, + fjournal = {Computational Statistics \& Data Analysis}, issn = {0167-9473,1872-7352}, + journal = {Comput. Statist. Data Anal.}, mrclass = {99-01}, mrnumber = {4343643}, - url = {https://doi.org/10.1016/j.csda.2021.107390} + pages = {Paper No. 107390, 20}, + title = {Fusing sufficient dimension reduction with neural networks}, + url = {https://doi.org/10.1016/j.csda.2021.107390}, + volume = {168}, + year = {2022} } @misc{KingmaWelling2019, - title = {An {I}ntroduction to {V}ariational {A}utoencoders}, author = {Kingma, Diederik P. and Welling, Max}, - year = 2019, howpublished = {arXiv:1906.02691 [cs.LG]}, - note = {\url{http://arxiv.org/abs/1906.02691}} + note = {\url{http://arxiv.org/abs/1906.02691}}, + title = {An {I}ntroduction to {V}ariational {A}utoencoders}, + year = 2019 } @inproceedings{KofidisRegalia2005, - title = {Tensor Approximation and Signal Processing Applications}, author = {Eleftherios Kofidis and Phillip A. Regalia}, - year = {2005}, - url = {https://api.semanticscholar.org/CorpusID:13667742} + title = {Tensor Approximation and Signal Processing Applications}, + url = {https://api.semanticscholar.org/CorpusID:13667742}, + year = {2005} } @article{Kolda2006, - title = {Multilinear operators for higher-order decompositions.}, author = {Kolda, Tamara Gibson}, - year = {2006}, doi = {10.2172/923081}, month = {4}, place = {United States}, + title = {Multilinear operators for higher-order decompositions.}, type = {Technical Report}, - url = {https://www.osti.gov/biblio/923081} + url = {https://www.osti.gov/biblio/923081}, + year = {2006} } @article{KoldaBader2009, - title = {Tensor Decompositions and Applications}, author = {Kolda, Tamara G. and Bader, Brett W.}, - year = {2009}, + doi = {10.1137/07070111X}, journal = {SIAM Review}, - volume = {51}, number = {3}, pages = {455-500}, - doi = {10.1137/07070111X} + title = {Tensor Decompositions and Applications}, + volume = {51}, + year = {2009} } @book{KolloVonRosen2005, - title = {Advanced Multivariate Statistics with Matrices}, author = {Kollo, T\~onu and von Rosen, Dietrich}, - year = {2005}, - publisher = {Springer Dordrecht}, doi = {10.1007/1-4020-3419-9}, editor = {Hazewinkel, M.}, - isbn = {978-1-4020-3419-0} + isbn = {978-1-4020-3419-0}, + publisher = {Springer Dordrecht}, + title = {Advanced Multivariate Statistics with Matrices}, + year = {2005} } @inproceedings{KongEtAl2005, - title = {Generalized 2D principal component analysis}, author = {Hui Kong and Xuchun Li and Lei Wang and Earn Khwang Teoh and Jian-Gang Wang and R. Venkateswarlu}, - year = {2005}, - volume = {1}, - number = {}, - pages = {108-113}, booktitle = {Proceedings. 2005 IEEE International Joint Conference on Neural Networks, 2005.}, doi = {10.1109/IJCNN.2005.1555814}, - issn = {2161-4393} + issn = {2161-4393}, + number = {}, + pages = {108-113}, + title = {Generalized 2D principal component analysis}, + volume = {1}, + year = {2005} } @article{Kramer1991, - title = {Nonlinear principal component analysis using autoassociative neural networks}, author = {Kramer, Mark A.}, - year = {1991}, journal = {AIChE Journal}, - volume = {37}, + note = {\url{https://doi.org/10.1002/aic.690370209}}, number = {2}, pages = {233-243}, - note = {\url{https://doi.org/10.1002/aic.690370209}} + title = {Nonlinear principal component analysis using autoassociative neural networks}, + volume = {37}, + year = {1991} } @book{Kroonenberg2008, - title = {Applied Multiway Data Analysis}, - author = {Kroonenberg, Pieter M.}, - year = {2008}, - publisher = {John Wiley \& Sons, Ltd}, address = {New York}, + author = {Kroonenberg, Pieter M.}, doi = {10.1002/9780470238004}, - isbn = {9780470238004} + isbn = {9780470238004}, + publisher = {John Wiley \& Sons, Ltd}, + title = {Applied Multiway Data Analysis}, + year = {2008} } @book{Kusolitsch2011, - title = {{M}a\ss{}- und {W}ahrscheinlichkeitstheorie}, author = {Kusolitsch, Norbert}, - year = {2011}, - publisher = {Springer Vienna}, doi = {10.1007/978-3-7091-0685-3}, isbn = {978-3-7091-0684-6}, + publisher = {Springer Vienna}, series = {Springer-Lehrbuch}, - subtitle = {{E}ine {E}inf{\"u}hrung} + subtitle = {{E}ine {E}inf{\"u}hrung}, + title = {{M}a\ss{}- und {W}ahrscheinlichkeitstheorie}, + year = {2011} } @article{LandgrafLee2020, - title = {Dimensionality reduction for binary data through the projection of natural parameters}, author = {Andrew J. Landgraf and Yoonkyung Lee}, - year = {2020}, - journal = {Journal of Multivariate Analysis}, - volume = {180}, - pages = {104668}, doi = {10.1016/j.jmva.2020.104668}, - issn = {0047-259X} + issn = {0047-259X}, + journal = {Journal of Multivariate Analysis}, + pages = {104668}, + title = {Dimensionality reduction for binary data through the projection of natural parameters}, + volume = {180}, + year = {2020} } @book{Lauritzen1996, - title = {{Graphical Models}}, author = {Lauritzen, Steffen L}, - year = {1996}, - publisher = {Oxford University Press}, doi = {10.1093/oso/9780198522195.001.0001}, isbn = {9780198522195}, - month = {05} + month = {05}, + publisher = {Oxford University Press}, + title = {{Graphical Models}}, + year = {1996} } @article{LauritzenRichardson2002, - title = {Chain Graph Models and Their Causal Interpretations}, author = {Steffen L. Lauritzen and Thomas S. Richardson}, - year = {2002}, + issn = {13697412, 14679868}, journal = {Journal of the Royal Statistical Society. Series B (Statistical Methodology)}, - volume = {64}, number = {3}, pages = {321--361}, publisher = {[Royal Statistical Society, Wiley]}, - issn = {13697412, 14679868}, + title = {Chain Graph Models and Their Causal Interpretations}, url = {http://www.jstor.org/stable/3088778}, - urldate = {2024-01-20} + urldate = {2024-01-20}, + volume = {64}, + year = {2002} } @article{LeBihanEtAl2001, - title = {Diffusion tensor imaging: Concepts and applications}, author = {Le Bihan, Denis and Mangin, Jean-Fran\c{c}ois and Poupon, Cyril and Clark, Chris A. and Pappata, Sabina and Molko, Nicolas and Chabriat, Hughes}, - year = {2001}, + doi = {https://doi.org/10.1002/jmri.1076}, journal = {Journal of Magnetic Resonance Imaging}, - volume = {13}, number = {4}, pages = {534-546}, - doi = {https://doi.org/10.1002/jmri.1076}, - url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/jmri.1076} + title = {Diffusion tensor imaging: Concepts and applications}, + url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/jmri.1076}, + volume = {13}, + year = {2001} } @book{Lee2012, - title = {Introduction to Smooth Manifolds}, author = {Lee, John M.}, - year = {2012}, + doi = {10.1007/978-1-4419-9982-5}, journal = {Graduate Texts in Mathematics}, publisher = {Springer New York}, - doi = {10.1007/978-1-4419-9982-5} + title = {Introduction to Smooth Manifolds}, + year = {2012} } @book{Lee2018, - title = {Introduction to Riemannian Manifolds}, author = {Lee, John M.}, - year = {2018}, + doi = {10.1007/978-3-319-91755-9}, journal = {Graduate Texts in Mathematics}, publisher = {Springer International Publishing}, - doi = {10.1007/978-3-319-91755-9} + title = {Introduction to Riemannian Manifolds}, + year = {2018} } @article{LengPan2018, - title = {{Covariance estimation via sparse Kronecker structures}}, author = {Leng, Chenlei and Pan, Guangming}, - year = {2018}, + doi = {10.3150/17-BEJ980}, journal = {Bernoulli}, - volume = {24}, number = {4B}, pages = {3833 -- 3863}, publisher = {Bernoulli Society for Mathematical Statistics and Probability}, - doi = {10.3150/17-BEJ980} + title = {{Covariance estimation via sparse Kronecker structures}}, + volume = {24}, + year = {2018} } @article{Lenz1920, - title = {Beitrag zum Verst{\"a}ndnis der magnetischen Erscheinungen in festen K{\"o}rpern}, author = {W. Lenz}, - year = {1920}, journal = {European Physical Journal A}, - volume = {21}, pages = {613--615}, - url = {https://cds.cern.ch/record/460663} + title = {Beitrag zum Verst{\"a}ndnis der magnetischen Erscheinungen in festen K{\"o}rpern}, + url = {https://cds.cern.ch/record/460663}, + volume = {21}, + year = {1920} } @article{LeporeEtAl2008, - title = {Generalized Tensor-Based Morphometry of HIV/AIDS Using Multivariate Statistics on Deformation Tensors}, author = {Lepore, Natasha and Brun, Caroline and Chou, Yi-Yu and Chiang, Ming-Chang and Dutton, Rebecca A. and Hayashi, Kiralee M. and Luders, Eileen and Lopez, Oscar L. and Aizenstein, Howard J. and Toga, Arthur W. and Becker, James T. and Thompson, Paul M.}, - year = {2008}, + doi = {10.1109/TMI.2007.906091}, journal = {IEEE Transactions on Medical Imaging}, - volume = {27}, number = {1}, pages = {129-141}, - doi = {10.1109/TMI.2007.906091} + title = {Generalized Tensor-Based Morphometry of HIV/AIDS Using Multivariate Statistics on Deformation Tensors}, + volume = {27}, + year = {2008} } @article{LeurgansRoss1992, - title = {{Multilinear Models: Applications in Spectroscopy}}, author = {Sue Leurgans and Robert T. Ross}, - year = {1992}, + doi = {10.1214/ss/1177011225}, journal = {Statistical Science}, - volume = {7}, + keywords = {Multi-mode factor analysis, nonlinear least-squares, PARAFAC, three-way arrays}, number = {3}, pages = {289 -- 310}, publisher = {Institute of Mathematical Statistics}, - doi = {10.1214/ss/1177011225}, - keywords = {Multi-mode factor analysis, nonlinear least-squares, PARAFAC, three-way arrays} + title = {{Multilinear Models: Applications in Spectroscopy}}, + volume = {7}, + year = {1992} } @article{LezonEtAl2006, - title = {Using the principle of entropy maximization to infer genetic interaction networks from gene expression patterns}, author = {Timothy R. Lezon and Jayanth R. Banavar and Marek Cieplak and Amos Maritan and Nina V. Fedoroff}, - year = {2006}, + doi = {10.1073/pnas.0609152103}, journal = {Proceedings of the National Academy of Sciences}, - volume = {103}, number = {50}, pages = {19033-19038}, - doi = {10.1073/pnas.0609152103} + title = {Using the principle of entropy maximization to infer genetic interaction networks from gene expression patterns}, + volume = {103}, + year = {2006} } @article{Li1991, - title = {{Sliced Inverse Regression for Dimension Reduction}}, author = {Li, Ker-Chau}, - year = {1991}, - journal = {J. Amer. Statist. Assoc.}, + doi = {10.1080/01621459.1991.10475035}, fjournal = {Journal of the American Statistical Association}, - volume = {86}, + journal = {J. Amer. Statist. Assoc.}, number = {414}, pages = {316--327}, - doi = {10.1080/01621459.1991.10475035} + title = {{Sliced Inverse Regression for Dimension Reduction}}, + volume = {86}, + year = {1991} } @article{Li1992, - title = {On principal {H}essian directions for data visualization and dimension reduction: another application of {S}tein's lemma}, author = {Li, Ker-Chau}, - year = {1992}, - journal = {J. Amer. Statist. Assoc.}, + doi = {10.1080/01621459.1992.10476258}, fjournal = {Journal of the American Statistical Association}, - volume = {87}, + issn = {0162-1459,1537-274X}, + journal = {J. Amer. Statist. Assoc.}, number = {420}, pages = {1025--1039}, publisher = {Taylor \& Francis}, - doi = {10.1080/01621459.1992.10476258}, - issn = {0162-1459,1537-274X} + title = {On principal {H}essian directions for data visualization and dimension reduction: another application of {S}tein's lemma}, + volume = {87}, + year = {1992} } @book{Li2018, - title = {Sufficient dimension reduction}, author = {Li, Bing}, - year = {2018}, - volume = {161}, - pages = {xxi+283}, - publisher = {CRC Press, Boca Raton, FL}, doi = {10.1201/9781315119427}, isbn = {978-1-4987-0447-2}, mrclass = {62-02 (62G08 62H12 62H20 62L10)}, mrnumber = {3838449}, note = {Methods and applications with R}, + pages = {xxi+283}, + publisher = {CRC Press, Boca Raton, FL}, series = {Monographs on Statistics and Applied Probability}, - url = {https://doi.org/10.1201/9781315119427} + title = {Sufficient dimension reduction}, + url = {https://doi.org/10.1201/9781315119427}, + volume = {161}, + year = {2018} } @article{LiArtemiouLi2011, - title = {Principal support vector machines for linear and nonlinear sufficient dimension reduction}, author = {Li, Bing and Artemiou, Andreas and Li, Lexin}, - year = {2011}, - journal = {Ann. Statist.}, + doi = {10.1214/11-AOS932}, fjournal = {The Annals of Statistics}, - volume = {39}, + journal = {Ann. Statist.}, + month = {12}, number = {6}, pages = {3182--3210}, publisher = {The Institute of Mathematical Statistics}, - doi = {10.1214/11-AOS932}, - month = {12}, - url = {https://doi.org/10.1214/11-AOS932} + title = {Principal support vector machines for linear and nonlinear sufficient dimension reduction}, + url = {https://doi.org/10.1214/11-AOS932}, + volume = {39}, + year = {2011} } @article{LiKimAltman2010, - title = {{On dimension folding of matrix- or array-valued statistical objects}}, author = {Bing Li and Min Kyung Kim and Naomi Altman}, - year = {2010}, + doi = {10.1214/09-AOS737}, journal = {The Annals of Statistics}, - volume = {38}, + keywords = {directional regression, electroencephalography, Kronecker envelope, sliced average variance estimate, sliced inverse regression}, number = {2}, pages = {1094 -- 1121}, publisher = {Institute of Mathematical Statistics}, - doi = {10.1214/09-AOS737}, - keywords = {directional regression, electroencephalography, Kronecker envelope, sliced average variance estimate, sliced inverse regression} + title = {{On dimension folding of matrix- or array-valued statistical objects}}, + volume = {38}, + year = {2010} } @article{Lin2019, - title = {Riemannian Geometry of Symmetric Positive Definite Matrices via Cholesky Decomposition}, author = {Lin, Zhenhua}, - year = {2019}, + doi = {10.1137/18M1221084}, journal = {SIAM Journal on Matrix Analysis and Applications}, - volume = {40}, number = {4}, pages = {1353--1370}, - doi = {10.1137/18M1221084} + title = {Riemannian Geometry of Symmetric Positive Definite Matrices via Cholesky Decomposition}, + volume = {40}, + year = {2019} } @inbook{LiuKoike2007, - title = {Extending Multivariate Space-Time Geostatistics for Environmental Data Analysis}, author = {Chunxue Liu and Katsuaki Koike}, - year = {2007}, + doi = {10.1007/s11004-007-9085-9}, journal = {Mathematical Geology}, pages = {289--305}, publisher = {International Association for Mathematical Geology}, - doi = {10.1007/s11004-007-9085-9} + title = {Extending Multivariate Space-Time Geostatistics for Environmental Data Analysis}, + year = {2007} } @article{LiWang2007, - title = {On Directional Regression for Dimension Reduction}, author = {Bing Li and Shaoli Wang}, - year = {2007}, + doi = {10.1198/016214507000000536}, journal = {Journal of the American Statistical Association}, - volume = {102}, number = {479}, pages = {997-1008}, publisher = {Taylor \& Francis}, - doi = {10.1198/016214507000000536} + title = {On Directional Regression for Dimension Reduction}, + volume = {102}, + year = {2007} } @article{LiZhaChiaromonte2005, - title = {Contour regression: A general approach to dimension reduction}, author = {Li, Bing and Zha, Hongyuan and Chiaromonte, Francesca}, - year = {2005}, - journal = {Ann. Statist.}, + doi = {10.1214/009053605000000192}, fjournal = {The Annals of Statistics}, - volume = {33}, + journal = {Ann. Statist.}, number = {4}, pages = {1580--1616}, publisher = {The Institute of Mathematical Statistics}, - doi = {10.1214/009053605000000192}, - url = {https://doi.org/10.1214/009053605000000192} + title = {Contour regression: A general approach to dimension reduction}, + url = {https://doi.org/10.1214/009053605000000192}, + volume = {33}, + year = {2005} +} + +@article{LiZhang2017, + author = {Lexin Li and Xin Zhang}, + doi = {10.1080/01621459.2016.1193022}, + journal = {Journal of the American Statistical Association}, + number = {519}, + pages = {1131-1146}, + publisher = {Taylor & Francis}, + title = {Parsimonious Tensor Response Regression}, + volume = {112}, + year = {2017} +} + +@article{Lock2018, + author = {Eric F. Lock}, + doi = {10.1080/10618600.2017.1401544}, + journal = {Journal of Computational and Graphical Statistics}, + number = {3}, + pages = {638-647}, + publisher = {Taylor \& Francis}, + title = {Tensor-on-Tensor Regression}, + volume = {27}, + year = {2018} } @article{LuoLi2016, - title = {Combining eigenvalues and variation of eigenvectors for order determination}, author = {Luo, Wei and Li, Bing}, - year = {2016}, - journal = {Biometrika}, - volume = {103}, - number = {4}, - pages = {875--887}, doi = {10.1093/biomet/asw051}, issn = {0006-3444, 1464-3510}, + journal = {Biometrika}, month = {12}, + number = {4}, + pages = {875--887}, + title = {Combining eigenvalues and variation of eigenvectors for order determination}, url = {https://academic.oup.com/biomet/article-lookup/doi/10.1093/biomet/asw051}, - urldate = {2021-10-06} + urldate = {2021-10-06}, + volume = {103}, + year = {2016} } @article{LuoLi2021, - title = {On order determination by predictor augmentation}, author = {Luo, Wei and Li, Bing}, - year = {2021}, - journal = {Biometrika}, - volume = {108}, - number = {3}, - pages = {557--574}, doi = {10.1093/biomet/asaa077}, issn = {0006-3444, 1464-3510}, + journal = {Biometrika}, month = {08}, + number = {3}, + pages = {557--574}, + title = {On order determination by predictor augmentation}, url = {https://academic.oup.com/biomet/article/108/3/557/5917626}, - urldate = {2021-10-06} + urldate = {2021-10-06}, + volume = {108}, + year = {2021} } @article{LuZimmerman2005, - title = {The likelihood ratio test for a separable covariance matrix}, author = {Nelson Lu and Dale L. Zimmerman}, - year = {2005}, - journal = {Statistics \& Probability Letters}, - volume = {73}, - number = {4}, - pages = {449-457}, doi = {10.1016/j.spl.2005.04.020}, issn = {0167-7152}, - url = {https://www.sciencedirect.com/science/article/pii/S0167715205001495} + journal = {Statistics \& Probability Letters}, + number = {4}, + pages = {449-457}, + title = {The likelihood ratio test for a separable covariance matrix}, + url = {https://www.sciencedirect.com/science/article/pii/S0167715205001495}, + volume = {73}, + year = {2005} } @article{MagnusNeudecker1986, - title = {Symmetry, 0-1 Matrices and Jacobians: A Review}, author = {Magnus, Jan R. and Neudecker, Heinz}, - year = {1986}, + issn = {02664666, 14694360}, journal = {Econometric Theory}, - volume = {2}, number = {2}, pages = {157--190}, publisher = {Cambridge University Press}, - issn = {02664666, 14694360}, + title = {Symmetry, 0-1 Matrices and Jacobians: A Review}, url = {http://www.jstor.org/stable/3532421}, - urldate = {2023-10-03} + urldate = {2023-10-03}, + volume = {2}, + year = {1986} } @book{MagnusNeudecker1999, - title = {Matrix differential calculus with applications in statistics and econometrics}, author = {Magnus, Jan R. and Neudecker, Heinz}, - year = {1999}, - pages = {xviii+395}, - publisher = {John Wiley \& Sons, Ltd., Chichester}, isbn = {0-471-98633-X}, mrclass = {15-01 (26-01 62-01)}, mrnumber = {1698873}, note = {Revised reprint of the 1988 original}, - series = {Wiley Series in Probability and Statistics} + pages = {xviii+395}, + publisher = {John Wiley \& Sons, Ltd., Chichester}, + series = {Wiley Series in Probability and Statistics}, + title = {Matrix differential calculus with applications in statistics and econometrics}, + year = {1999} } @article{ManceurDutilleul2013, - title = {Maximum likelihood estimation for the tensor normal distribution: Algorithm, minimum sample size, and empirical bias and dispersion}, author = {Ameur M. Manceur and Pierre Dutilleul}, - year = {2013}, - journal = {Journal of Computational and Applied Mathematics}, - volume = {239}, - pages = {37-49}, doi = {10.1016/j.cam.2012.09.017}, issn = {0377-0427}, - url = {https://www.sciencedirect.com/science/article/pii/S0377042712003810} + journal = {Journal of Computational and Applied Mathematics}, + pages = {37-49}, + title = {Maximum likelihood estimation for the tensor normal distribution: Algorithm, minimum sample size, and empirical bias and dispersion}, + url = {https://www.sciencedirect.com/science/article/pii/S0377042712003810}, + volume = {239}, + year = {2013} } @incollection{MardiaGoodall1993, - title = {Spatial-temporal analysis of multivariate environmental monitoring data}, author = {Mardia, Kanti V. and Goodall, Colin R.}, - year = {1993}, - volume = {6}, - pages = {347--386}, - publisher = {North-Holland, Amsterdam}, booktitle = {Multivariate environmental statistics}, isbn = {0-444-89804-2}, mrclass = {62H11}, mrnumber = {1268443}, - series = {North-Holland Ser. Statist. Probab.} + pages = {347--386}, + publisher = {North-Holland, Amsterdam}, + series = {North-Holland Ser. Statist. Probab.}, + title = {Spatial-temporal analysis of multivariate environmental monitoring data}, + volume = {6}, + year = {1993} } @inproceedings{MartinFernandez2004, - title = {3D Bayesian Regularization of Diffusion Tensor MRI Using Multivariate Gaussian Markov Random Fields}, - author = {Mart{\'i}n-Fern{\'a}ndez, Marcos and Westin, Carl-Fredrik and Alberola-L{\'o}pez, Carlos}, - year = {2004}, - pages = {351--359}, - publisher = {Springer Berlin Heidelberg}, address = {Berlin, Heidelberg}, + author = {Mart{\'i}n-Fern{\'a}ndez, Marcos and Westin, Carl-Fredrik and Alberola-L{\'o}pez, Carlos}, booktitle = {Medical Image Computing and Computer-Assisted Intervention -- MICCAI 2004}, editor = {Barillot, Christian and Haynor, David R. and Hellier, Pierre}, - isbn = {978-3-540-30135-6} + isbn = {978-3-540-30135-6}, + pages = {351--359}, + publisher = {Springer Berlin Heidelberg}, + title = {3D Bayesian Regularization of Diffusion Tensor MRI Using Multivariate Gaussian Markov Random Fields}, + year = {2004} } @article{MaZhu2013, - title = {A review on dimension reduction}, author = {Ma, Yanyuan and Zhu, Liping}, - year = {2013}, - journal = {Int. Stat. Rev.}, - fjournal = {International Statistical Review. Revue Internationale de Statistique}, - volume = {81}, - number = {1}, - pages = {134--150}, doi = {10.1111/j.1751-5823.2012.00182.x}, + fjournal = {International Statistical Review. Revue Internationale de Statistique}, issn = {0306-7734,1751-5823}, + journal = {Int. Stat. Rev.}, mrclass = {62G08 (62-02 62H12)}, mrnumber = {3047506}, - url = {https://doi.org/10.1111/j.1751-5823.2012.00182.x} + number = {1}, + pages = {134--150}, + title = {A review on dimension reduction}, + url = {https://doi.org/10.1111/j.1751-5823.2012.00182.x}, + volume = {81}, + year = {2013} } @book{McCullagh1987, - title = {Tensor Methods in Statistics}, author = {McCullagh, Peter}, - year = {1987}, - publisher = {Chapman and Hall/CRC}, doi = {10.1201/9781351077118}, - subtitle = {Monographs on Statistics and Applied Probability} + publisher = {Chapman and Hall/CRC}, + subtitle = {Monographs on Statistics and Applied Probability}, + title = {Tensor Methods in Statistics}, + year = {1987} } @article{McCullochPitts1943, - title = {A Logical Calculus of the Ideas Immanent in Nervous Activity}, author = {Mc{C}ulloch, Warren S and Pitts, Walter}, - year = {1943}, journal = {Bulletin of Mathematical Biophysics}, - volume = {5}, pages = {115--133}, - publisher = {Springer} + publisher = {Springer}, + title = {A Logical Calculus of the Ideas Immanent in Nervous Activity}, + volume = {5}, + year = {1943} } @article{Nadarajah2005, - title = {A generalized normal distribution}, author = {Saralees Nadarajah}, - year = {2005}, + doi = {10.1080/02664760500079464}, journal = {Journal of Applied Statistics}, - volume = {32}, number = {7}, pages = {685--694}, publisher = {Taylor \& Francis}, - doi = {10.1080/02664760500079464} + title = {A generalized normal distribution}, + volume = {32}, + year = {2005} } @inproceedings{Nesterov1983, - title = {A method of solving a convex programming problem with convergence rate $O(1/k^2)$}, author = {Nesterov, Yurii Evgen'evich}, - year = {1983}, - volume = {269}, - number = {3}, - pages = {543--547}, booktitle = {Doklady Akademii Nauk}, - organization = {Russian Academy of Sciences} + number = {3}, + organization = {Russian Academy of Sciences}, + pages = {543--547}, + title = {A method of solving a convex programming problem with convergence rate $O(1/k^2)$}, + volume = {269}, + year = {1983} } @article{NguyenEtAl2017, - title = {Inverse statistical problems: from the inverse {I}sing problem to data science}, author = {H. Chau Nguyen and Riccardo Zecchina and Johannes Berg}, - year = {2017}, + doi = {10.1080/00018732.2017.1341604}, journal = {Advances in Physics}, - volume = {66}, number = {3}, pages = {197--261}, publisher = {Taylor \& Francis}, - doi = {10.1080/00018732.2017.1341604} + title = {Inverse statistical problems: from the inverse {I}sing problem to data science}, + volume = {66}, + year = {2017} } @article{Niss2005, - title = {{History of the Lenz-Ising Model 1920--1950: From Ferromagnetic to Cooperative Phenomena}}, author = {Niss, Martin}, - year = {2005}, - journal = {Arch. Hist. Exact Sci.}, + doi = {10.1007/s00407-004-0088-3}, fjournal = {Archive for History of Exact Sciences}, - volume = {59}, + issn = {1432-0657}, + journal = {Arch. Hist. Exact Sci.}, number = {3}, pages = {267--318}, - doi = {10.1007/s00407-004-0088-3}, - issn = {1432-0657} + title = {{History of the Lenz-Ising Model 1920--1950: From Ferromagnetic to Cooperative Phenomena}}, + volume = {59}, + year = {2005} } @article{OhlsonEtAl2013, - title = {The multilinear normal distribution: Introduction and some basic properties}, author = {Ohlson, Martin and Ahmad, Mumtaz Rauf and von Rosen, Dietrich}, - year = {2013}, - journal = {Journal of Multivariate Analysis}, - volume = {113}, - pages = {37-47}, doi = {10.1016/j.jmva.2011.05.015}, issn = {0047-259X}, - url = {https://www.sciencedirect.com/science/article/pii/S0047259X11001047} + journal = {Journal of Multivariate Analysis}, + pages = {37-47}, + title = {The multilinear normal distribution: Introduction and some basic properties}, + url = {https://www.sciencedirect.com/science/article/pii/S0047259X11001047}, + volume = {113}, + year = {2013} } @article{Oseledets2011, - title = {Tensor-Train Decomposition}, author = {Oseledets, I. V.}, - year = {2011}, + doi = {10.1137/090752286}, journal = {SIAM Journal on Scientific Computing}, - volume = {33}, number = {5}, pages = {2295-2317}, - doi = {10.1137/090752286} + title = {Tensor-Train Decomposition}, + volume = {33}, + year = {2011} } @article{PanMaiZhang2018, - title = {Covariate-Adjusted Tensor Classification in High dimensions}, author = {Yuqing Pan and Qing Mai and Xin Zhang}, - year = {2018}, + doi = {10.1080/01621459.2018.1497500}, + eprint = {https://doi.org/10.1080/01621459.2018.1497500}, journal = {Journal of the American Statistical Association}, - volume = {0}, number = {ja}, pages = {1-41}, publisher = {Taylor & Francis}, - doi = {10.1080/01621459.2018.1497500}, - eprint = {https://doi.org/10.1080/01621459.2018.1497500}, - url = {https://doi.org/10.1080/01621459.2018.1497500} + title = {Covariate-Adjusted Tensor Classification in High dimensions}, + url = {https://doi.org/10.1080/01621459.2018.1497500}, + volume = {0}, + year = {2018} } @book{Pepe03, - title = {The Statistical Evaluation of Medical Tests for Classification and Prediction}, + address = {New York}, author = {Pepe, M.S.}, - year = {2003}, publisher = {Oxford University Press}, - address = {New York} + title = {The Statistical Evaluation of Medical Tests for Classification and Prediction}, + year = {2003} } @article{PfeifferForzaniBura2012, - title = {Sufficient dimension reduction for longitudinally measured predictors}, author = {Pfeiffer, Ruth and Forzani, Liliana and Bura, Efstathia}, - year = {2012}, - journal = {Statistics in medicine}, - volume = {31}, - pages = {2414-27}, doi = {10.1002/sim.4437}, - month = {09} + journal = {Statistics in medicine}, + month = {09}, + pages = {2414-27}, + title = {Sufficient dimension reduction for longitudinally measured predictors}, + volume = {31}, + year = {2012} } @article{PfeifferKaplaBura2021, - title = {{Least squares and maximum likelihood estimation of sufficient reductions in regressions with matrix-valued predictors}}, author = {Pfeiffer, Ruth and Kapla, Daniel and Bura, Efstathia}, - year = {2021}, + doi = {10.1007/s41060-020-00228-y}, journal = {International Journal of Data Science and Analytics}, + title = {{Least squares and maximum likelihood estimation of sufficient reductions in regressions with matrix-valued predictors}}, volume = {11}, - doi = {10.1007/s41060-020-00228-y} -} - -@article{LiZhang2017, -author = {Lexin Li and Xin Zhang}, -title = {Parsimonious Tensor Response Regression}, -journal = {Journal of the American Statistical Association}, -volume = {112}, -number = {519}, -pages = {1131-1146}, -year = {2017}, -publisher = {Taylor & Francis}, -doi = {10.1080/01621459.2016.1193022}, - - -URL = { - - https://doi.org/10.1080/01621459.2016.1193022 - - - -}, -eprint = { - https://doi.org/10.1080/01621459.2016.1193022} + year = {2021} } @inproceedings{RabusseauKadri2016, - author = {Rabusseau, Guillaume and Kadri, Hachem}, - booktitle = {Advances in Neural Information Processing Systems}, - editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett}, - pages = {}, - publisher = {Curran Associates, Inc.}, - title = {Low-Rank Regression with Tensor Responses}, - url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/3806734b256c27e41ec2c6bffa26d9e7-Paper.pdf}, - volume = {29}, - year = {2016} -} - - -@article{HaoEtAl2021, - author = {Botao Hao and Boxiang Wang and Pengyuan Wang and Jingfei Zhang and Jian Yang and Will Wei Sun}, - title = {Sparse Tensor Additive Regression}, - journal = {Journal of Machine Learning Research}, - year = {2021}, - volume = {22}, - number = {64}, - pages = {1--43}, - url = {http://jmlr.org/papers/v22/19-769.html} + author = {Rabusseau, Guillaume and Kadri, Hachem}, + booktitle = {Advances in Neural Information Processing Systems}, + editor = {D. Lee and M. Sugiyama and U. Luxburg and I. Guyon and R. Garnett}, + pages = {}, + publisher = {Curran Associates, Inc.}, + title = {Low-Rank Regression with Tensor Responses}, + url = {https://proceedings.neurips.cc/paper_files/paper/2016/file/3806734b256c27e41ec2c6bffa26d9e7-Paper.pdf}, + volume = {29}, + year = {2016} } @article{Rosenblatt1958, - title = {The perceptron: A probabilistic model for information storage and organization in the brain}, author = {Frank Rosenblatt}, - year = {1958}, + doi = {10.1037/h0042519}, journal = {Psychological Review}, - volume = {65}, number = {6}, pages = {386--408}, - doi = {10.1037/h0042519} + title = {The perceptron: A probabilistic model for information storage and organization in the brain}, + volume = {65}, + year = {1958} } @inproceedings{Rumelhart1986, - title = {Learning internal representations by error propagation}, author = {David E. Rumelhart and Geoffrey E. Hinton and Ronald J. Williams}, - year = {1986}, - url = {https://api.semanticscholar.org/CorpusID:62245742} + title = {Learning internal representations by error propagation}, + url = {https://api.semanticscholar.org/CorpusID:62245742}, + year = {1986} } @article{RuppertWand1994, - title = {Multivariate Locally Weighted Least Squares Regression}, author = {D. Ruppert and M. P. Wand}, - year = {1994}, + issn = {00905364}, journal = {The Annals of Statistics}, - volume = {22}, number = {3}, pages = {1346--1370}, publisher = {Institute of Mathematical Statistics}, - issn = {00905364}, + title = {Multivariate Locally Weighted Least Squares Regression}, url = {http://www.jstor.org/stable/2242229}, - urldate = {2024-01-25} + urldate = {2024-01-25}, + volume = {22}, + year = {1994} } @inproceedings{ShanEtAl2008, - title = {Unified Principal Component Analysis with generalized Covariance Matrix for face recognition}, author = {Shiguang Shan and Bo Cao and Yu Su and Laiyun Qing and Xilin Chen and Wen Gao}, - year = {2008}, - volume = {}, - number = {}, - pages = {1-7}, booktitle = {2008 IEEE Conference on Computer Vision and Pattern Recognition}, doi = {10.1109/CVPR.2008.4587375}, - issn = {1063-6919} + issn = {1063-6919}, + number = {}, + pages = {1-7}, + title = {Unified Principal Component Analysis with generalized Covariance Matrix for face recognition}, + volume = {}, + year = {2008} } @inproceedings{ShashuaHazan2005, - title = {Non-Negative Tensor Factorization with Applications to Statistics and Computer Vision}, - author = {Shashua, Amnon and Hazan, Tamir}, - year = {2005}, - pages = {792--799}, - publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, + author = {Shashua, Amnon and Hazan, Tamir}, booktitle = {Proceedings of the 22nd International Conference on Machine Learning}, doi = {10.1145/1102351.1102451}, isbn = {1595931805}, location = {Bonn, Germany}, numpages = {8}, - series = {ICML '05} + pages = {792--799}, + publisher = {Association for Computing Machinery}, + series = {ICML '05}, + title = {Non-Negative Tensor Factorization with Applications to Statistics and Computer Vision}, + year = {2005} } @article{Soize2008, - title = {Tensor-valued random fields for meso-scale stochastic model of anisotropic elastic microstructure and probabilistic analysis of representative volume element size}, author = {C. Soize}, - year = {2008}, - journal = {Probabilistic Engineering Mechanics}, - volume = {23}, - number = {2}, - pages = {307-323}, doi = {10.1016/j.probengmech.2007.12.019}, issn = {0266-8920}, + journal = {Probabilistic Engineering Mechanics}, note = {5th International Conference on Computational Stochastic Mechanics}, - url = {https://www.sciencedirect.com/science/article/pii/S0266892007000562} + number = {2}, + pages = {307-323}, + title = {Tensor-valued random fields for meso-scale stochastic model of anisotropic elastic microstructure and probabilistic analysis of representative volume element size}, + url = {https://www.sciencedirect.com/science/article/pii/S0266892007000562}, + volume = {23}, + year = {2008} } @article{SoloveychikTrushin2016, - title = {Gaussian and robust Kronecker product covariance estimation: Existence and uniqueness}, author = {I. Soloveychik and D. Trushin}, - year = {2016}, - journal = {Journal of Multivariate Analysis}, - volume = {149}, - pages = {92-113}, doi = {10.1016/j.jmva.2016.04.001}, issn = {0047-259X}, - url = {https://www.sciencedirect.com/science/article/pii/S0047259X16300070} + journal = {Journal of Multivariate Analysis}, + pages = {92-113}, + title = {Gaussian and robust Kronecker product covariance estimation: Existence and uniqueness}, + url = {https://www.sciencedirect.com/science/article/pii/S0047259X16300070}, + volume = {149}, + year = {2016} } @misc{SongHero2023, - title = {On Separability of Covariance in Multiway Data Analysis}, - author = {Dogyoon Song and Alfred O. Hero}, - year = {2023}, archiveprefix = {arXiv}, + author = {Dogyoon Song and Alfred O. Hero}, doi = {10.48550/arXiv.2302.02415}, eprint = {2302.02415}, - primaryclass = {math.ST} + primaryclass = {math.ST}, + title = {On Separability of Covariance in Multiway Data Analysis}, + year = {2023} } @article{SrivastavaEtAl2008, - title = {Models with a Kronecker product covariance structure: Estimation and testing}, author = {Srivastava, Muni Shanker and von Rosen, Tatjana and von Rosen, Dietrich}, - year = {2008}, - journal = {Mathematical Methods of Statistics}, - volume = {17}, - number = {4}, - pages = {357-370}, day = {01}, doi = {10.3103/S1066530708040066}, issn = {1934-8045}, - month = {Dec} + journal = {Mathematical Methods of Statistics}, + month = {Dec}, + number = {4}, + pages = {357-370}, + title = {Models with a Kronecker product covariance structure: Estimation and testing}, + volume = {17}, + year = {2008} } @article{SrivastavaEtAl2014, - title = {Dropout: A Simple Way to Prevent Neural Networks from Overfitting}, author = {Nitish Srivastava and Geoffrey Hinton and Alex Krizhevsky and Ilya Sutskever and Ruslan Salakhutdinov}, - year = {2014}, journal = {Journal of Machine Learning Research}, - volume = {15}, number = {56}, pages = {1929-1958}, - url = {http://jmlr.org/papers/v15/srivastava14a.html} + title = {Dropout: A Simple Way to Prevent Neural Networks from Overfitting}, + url = {http://jmlr.org/papers/v15/srivastava14a.html}, + volume = {15}, + year = {2014} } @book{SrivastavaKhatri1979, - title = {An introduction to multivariate statistics}, - author = {Srivastava, Muni Shanker and Khatri, Chinubal G.}, - year = {1979}, - publisher = {North Holland}, address = {New York, NY [u.a.]}, + author = {Srivastava, Muni Shanker and Khatri, Chinubal G.}, isbn = {0444003029}, - language = {eng} + language = {eng}, + publisher = {North Holland}, + title = {An introduction to multivariate statistics}, + year = {1979} } @article{Steinberger2018, - title = {On conditional moments of high-dimensional random vectors given lower-dimensional projections}, author = {Steinberger, Lukas and Leeb, Hannes}, - year = {2018}, - journal = {Bernoulli}, + doi = {10.3150/16-BEJ888}, fjournal = {Bernoulli}, - volume = {24}, + journal = {Bernoulli}, number = {1}, pages = {565--591}, publisher = {Bernoulli Society for Mathematical Statistics and Probability}, - doi = {10.3150/16-BEJ888} + title = {On conditional moments of high-dimensional random vectors given lower-dimensional projections}, + volume = {24}, + year = {2018} } @article{Tseng1993, - title = {Dual coordinate ascent methods for non-strictly convex minimization}, author = {Paul Tseng}, - year = {1993}, journal = {Mathematical Programming}, - volume = {59}, number = {1}, - pages = {231-247} + pages = {231-247}, + title = {Dual coordinate ascent methods for non-strictly convex minimization}, + volume = {59}, + year = {1993} } @article{TsiligkaridisHero2013, - title = {Covariance Estimation in High Dimensions Via Kronecker Product Expansions}, author = {Tsiligkaridis, Theodoros and Hero, Alfred O.}, - year = {2013}, + doi = {10.1109/TSP.2013.2279355}, journal = {IEEE Transactions on Signal Processing}, - volume = {61}, number = {21}, pages = {5347-5360}, - doi = {10.1109/TSP.2013.2279355} + title = {Covariance Estimation in High Dimensions Via Kronecker Product Expansions}, + volume = {61}, + year = {2013} } @inbook{Uschmajew2020, - title = {Geometric Methods on Low-Rank Matrix and Tensor Manifolds}, - author = {Uschmajew, Andr{\'e} and Vandereycken, Bart}, - year = {2020}, - pages = {261--313}, - publisher = {Springer International Publishing}, address = {Cham}, + author = {Uschmajew, Andr{\'e} and Vandereycken, Bart}, booktitle = {Handbook of Variational Methods for Nonlinear Geometric Data}, doi = {10.1007/978-3-030-31351-7_9}, editor = {Grohs, Philipp and Holler, Martin and Weinmann, Andreas}, - isbn = {978-3-030-31351-7} + isbn = {978-3-030-31351-7}, + pages = {261--313}, + publisher = {Springer International Publishing}, + title = {Geometric Methods on Low-Rank Matrix and Tensor Manifolds}, + year = {2020} } @book{vanderVaart1998, - title = {Asymptotic Statistics}, author = {{van der Vaart}, A.W.}, - year = {1998}, - publisher = {Cambridge University Press}, isbn = {0-521-49603-9}, + publisher = {Cambridge University Press}, series = {Asymptotic Statistics}, - series = {Cambridge Series in Statistical and Probabilistic Mathematics} + series = {Cambridge Series in Statistical and Probabilistic Mathematics}, + title = {Asymptotic Statistics}, + year = {1998} } @inbook{VanLoanPitsianis1993, - title = {Approximation with Kronecker Products}, - author = {Van Loan, C. F. and Pitsianis, N.}, - year = {1993}, - pages = {293--314}, - publisher = {Springer Netherlands}, address = {Dordrecht}, + author = {Van Loan, C. F. and Pitsianis, N.}, booktitle = {Linear Algebra for Large Scale and Real-Time Applications}, doi = {10.1007/978-94-015-8196-7_17}, editor = {Moonen, Marc S. and Golub, Gene H. and De Moor, Bart L. R.}, - isbn = {978-94-015-8196-7} + isbn = {978-94-015-8196-7}, + pages = {293--314}, + publisher = {Springer Netherlands}, + title = {Approximation with Kronecker Products}, + year = {1993} } @article{WainwrightJordan2008, - title = {Graphical Models, Exponential Families, and Variational Inference}, author = {Martin J. Wainwright and Michael I. Jordan}, - year = {2008}, - journal = {Foundations and Trends® in Machine Learning}, - volume = {1}, - number = {1--2}, - pages = {1-305}, doi = {10.1561/2200000001}, issn = {1935-8237}, - url = {http://dx.doi.org/10.1561/2200000001} + journal = {Foundations and Trends® in Machine Learning}, + number = {1--2}, + pages = {1-305}, + title = {Graphical Models, Exponential Families, and Variational Inference}, + url = {http://dx.doi.org/10.1561/2200000001}, + volume = {1}, + year = {2008} } @article{WangEtAl2022, - title = {{Kronecker-structured covariance models for multiway data}}, author = {Yu Wang and Zeyu Sun and Dogyoon Song and Alfred Hero}, - year = {2022}, + doi = {10.1214/22-SS139}, journal = {Statistics Surveys}, - volume = {16}, number = {none}, pages = {238 -- 270}, publisher = {Amer. Statist. Assoc., the Bernoulli Soc., the Inst. Math. Statist., and the Statist. Soc. Canada}, - doi = {10.1214/22-SS139} + title = {{Kronecker-structured covariance models for multiway data}}, + volume = {16}, + year = {2022} } @article{WangLi2020, - title = {Learning from Binary Multiway Data: Probabilistic Tensor Decomposition and its Statistical Optimality}, author = {Miaoyan Wang and Lexin Li}, - year = {2020}, journal = {Journal of Machine Learning Research}, - volume = {21}, number = {154}, - pages = {1--38} + pages = {1--38}, + title = {Learning from Binary Multiway Data: Probabilistic Tensor Decomposition and its Statistical Optimality}, + volume = {21}, + year = {2020} } @article{WangXia2008, - title = {{Sliced Regression for Dimension Reduction}}, author = {Hansheng Wang and Yingcun Xia}, - year = {2008}, - journal = {J. Amer. Statist. Assoc.}, + doi = {10.1198/016214508000000418}, fjournal = {Journal of the American Statistical Association}, - volume = {103}, + journal = {J. Amer. Statist. Assoc.}, number = {482}, pages = {811--821}, publisher = {Taylor \& Francis}, - doi = {10.1198/016214508000000418} + title = {{Sliced Regression for Dimension Reduction}}, + volume = {103}, + year = {2008} } @book{Whittaker1990, - title = {Graphical models in applied multivariate statistics}, author = {Whittaker, Joe}, - year = {1990}, - pages = {xiv+448}, - publisher = {John Wiley \& Sons, Ltd., Chichester}, isbn = {0-471-91750-8}, mrclass = {62-02 (62H17 62J12)}, mrnumber = {1112133}, - series = {Wiley Series in Probability and Mathematical Statistics: Probability and Mathematical Statistics} + pages = {xiv+448}, + publisher = {John Wiley \& Sons, Ltd., Chichester}, + series = {Wiley Series in Probability and Mathematical Statistics: Probability and Mathematical Statistics}, + title = {Graphical models in applied multivariate statistics}, + year = {1990} } @book{Whittaker2009, - title = {Graphical Models in Applied Multivariate Statistics}, author = {J. Whittaker}, - year = {2009}, - publisher = {Wiley} + publisher = {Wiley}, + title = {Graphical Models in Applied Multivariate Statistics}, + year = {2009} } @article{Xia2007, - title = {A constructive approach to the estimation of dimension reduction directions}, author = {Xia, Yingcun}, - year = {2007}, - journal = {Ann. Statist.}, - fjournal = {The Annals of Statistics}, - volume = {35}, - number = {6}, - pages = {2654--2690}, doi = {10.1214/009053607000000352}, + fjournal = {The Annals of Statistics}, issn = {0090-5364,2168-8966}, + journal = {Ann. Statist.}, mrclass = {62G08 (62G09 62H05)}, mrnumber = {2382662}, - url = {https://doi.org/10.1198/016214508000000805} + number = {6}, + pages = {2654--2690}, + title = {A constructive approach to the estimation of dimension reduction directions}, + url = {https://doi.org/10.1198/016214508000000805}, + volume = {35}, + year = {2007} } @article{Xia2008, - title = {A multiple-index model and dimension reduction}, author = {Xia, Yingcun}, - year = {2008}, - journal = {J. Amer. Statist. Assoc.}, - fjournal = {Journal of the American Statistical Association}, - volume = {103}, - number = {484}, - pages = {1631--1640}, doi = {10.1198/016214508000000805}, + fjournal = {Journal of the American Statistical Association}, issn = {0162-1459,1537-274X}, + journal = {J. Amer. Statist. Assoc.}, mrclass = {62E20 (62F10 62G05)}, mrnumber = {2504209}, - url = {https://doi.org/10.1198/016214508000000805} + number = {484}, + pages = {1631--1640}, + title = {A multiple-index model and dimension reduction}, + url = {https://doi.org/10.1198/016214508000000805}, + volume = {103}, + year = {2008} } @article{XiaEtAl2002, - title = {{An Adaptive Estimation of Dimension Reduction Space}}, author = {Xia, Yingcun and Tong, Howell and Li, W. K. and Zhu, Li-Xing}, - year = {2002}, - journal = {J. R. Stat. Soc. Ser. B Stat. Methodol.}, + doi = {10.1111/1467-9868.03411}, fjournal = {Journal of the Royal Statistical Society. Series B: Statistical Methodology}, - volume = {64}, + issn = {1369-7412}, + journal = {J. R. Stat. Soc. Ser. B Stat. Methodol.}, + month = {08}, number = {3}, pages = {363-410}, - doi = {10.1111/1467-9868.03411}, - issn = {1369-7412}, - month = {08} + title = {{An Adaptive Estimation of Dimension Reduction Space}}, + volume = {64}, + year = {2002} } @article{YangEtAl2004, - title = {Two-dimensional {PCA}: a new approach to appearance-based face representation and recognition}, author = {Jian Yang and D. Zhang and A. F. Frangi and Jingyu Yang}, - year = {2004}, + doi = {10.1109/TPAMI.2004.1261097}, + issn = {0162-8828}, journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence}, - volume = {26}, number = {1}, pages = {131-137}, - doi = {10.1109/TPAMI.2004.1261097}, - issn = {0162-8828} + title = {Two-dimensional {PCA}: a new approach to appearance-based face representation and recognition}, + volume = {26}, + year = {2004} } @article{Ye2005, - title = {Generalized Low Rank Approximations of Matrices}, author = {Ye, Jieping}, - year = {2005}, - journal = {Machine Learning}, - volume = {61}, - number = {1}, - pages = {167--191}, day = {01}, doi = {10.1007/s10994-005-3561-6}, issn = {1573-0565}, - url = {https://doi.org/10.1007/s10994-005-3561-6} + journal = {Machine Learning}, + number = {1}, + pages = {167--191}, + title = {Generalized Low Rank Approximations of Matrices}, + url = {https://doi.org/10.1007/s10994-005-3561-6}, + volume = {61}, + year = {2005} } @article{YeLim2016, - title = {Schubert Varieties and Distances between Subspaces of Different Dimensions}, author = {Ye, Ke and Lim, Lek-Heng}, - year = {2016}, + doi = {10.1137/15M1054201}, journal = {SIAM Journal on Matrix Analysis and Applications}, - volume = {37}, number = {3}, pages = {1176-1197}, - doi = {10.1137/15M1054201} + title = {Schubert Varieties and Distances between Subspaces of Different Dimensions}, + volume = {37}, + year = {2016} } @inbook{Yin2010, - title = {Sufficient Dimension Reduction in Regression}, author = {Yin, Xiangrong}, - year = {2010}, + booktitle = {High-Dimensional Data Analysis}, pages = {257--273}, publisher = {WORLD SCIENTIFIC / HIGHER EDUCATION PRESS, CHINA}, - booktitle = {High-Dimensional Data Analysis}, - url = {https://doi.org/10.1142/9789814324861_0009} + title = {Sufficient Dimension Reduction in Regression}, + url = {https://doi.org/10.1142/9789814324861_0009}, + year = {2010} } @article{YinHilafu2015, - title = {Sequential sufficient dimension reduction for large {$p$}, small {$n$} problems}, author = {Yin, Xiangrong and Hilafu, Haileab}, - year = {2015}, - journal = {J. R. Stat. Soc. Ser. B. Stat. Methodol.}, - fjournal = {Journal of the Royal Statistical Society. Series B. Statistical Methodology}, - volume = {77}, - number = {4}, - pages = {879--892}, doi = {10.1111/rssb.12093}, + fjournal = {Journal of the Royal Statistical Society. Series B. Statistical Methodology}, issn = {1369-7412,1467-9868}, + journal = {J. R. Stat. Soc. Ser. B. Stat. Methodol.}, mrclass = {62H12}, mrnumber = {3382601}, mrreviewer = {Santiago\ Velilla}, - url = {https://doi.org/10.1111/rssb.12093} + number = {4}, + pages = {879--892}, + title = {Sequential sufficient dimension reduction for large {$p$}, small {$n$} problems}, + url = {https://doi.org/10.1111/rssb.12093}, + volume = {77}, + year = {2015} } @article{YinLiCook2008, - title = {Successive direction extraction for estimating the central subspace in a multiple-index regression}, author = {Yin, Xiangrong and Li, Bing and Cook, R. Dennis}, - year = {2008}, - journal = {J. Multivariate Anal.}, - fjournal = {Journal of Multivariate Analysis}, - volume = {99}, - number = {8}, - pages = {1733--1757}, doi = {10.1016/j.jmva.2008.01.006}, + fjournal = {Journal of Multivariate Analysis}, issn = {0047-259X,1095-7243}, + journal = {J. Multivariate Anal.}, mrclass = {62B05 (62H20)}, mrnumber = {2444817}, - url = {https://doi.org/10.1016/j.jmva.2008.01.006} + number = {8}, + pages = {1733--1757}, + title = {Successive direction extraction for estimating the central subspace in a multiple-index regression}, + url = {https://doi.org/10.1016/j.jmva.2008.01.006}, + volume = {99}, + year = {2008} } @article{YuBiYe2010, - title = {Matrix-variate and higher-order probabilistic projections}, author = {Shipeng Yu and Jinbo Bi and Jieping Ye}, - year = {2010}, journal = {Data Mining and Knowledge Discovery}, + pages = {372-392}, + title = {Matrix-variate and higher-order probabilistic projections}, volume = {22}, - pages = {372-392} + year = {2010} } @article{ZengZhu2010, - title = {An integral transform method for estimating the central mean and central subspaces}, author = {Peng Zeng and Yu Zhu}, - year = {2010}, - journal = {J. Multivariate Anal.}, + doi = {10.1016/j.jmva.2009.08.004}, fjournal = {Journal of Multivariate Analysis}, - volume = {101}, + issn = {0047-259X}, + journal = {J. Multivariate Anal.}, number = {1}, pages = {271--290}, - doi = {10.1016/j.jmva.2009.08.004}, - issn = {0047-259X}, - url = {https://www.sciencedirect.com/science/article/pii/S0047259X0900147X} + title = {An integral transform method for estimating the central mean and central subspaces}, + url = {https://www.sciencedirect.com/science/article/pii/S0047259X0900147X}, + volume = {101}, + year = {2010} } @article{ZhangLin2017, - title = {Tensor Envelope Partial Least-Squares Regression}, author = {Xin Zhang and Lexin Li}, - year = {2017}, + doi = {10.1080/00401706.2016.1272495}, journal = {Technometrics}, - volume = {59}, number = {4}, pages = {426-436}, publisher = {Taylor & Francis}, - doi = {10.1080/00401706.2016.1272495} + title = {Tensor Envelope Partial Least-Squares Regression}, + volume = {59}, + year = {2017} } @article{ZhangZhou2005, - title = {{(2D)2PCA}: Two-directional two-dimensional {PCA} for efficient face representation and recognition}, author = {Daoqiang Zhang and Zhi-Hua Zhou}, - year = {2005}, - journal = {Neurocomputing}, - volume = {69}, - number = {1}, - pages = {224-231}, doi = {10.1016/j.neucom.2005.06.004}, issn = {0925-2312}, + journal = {Neurocomputing}, note = {Neural Networks in Signal Processing}, - url = {https://www.sciencedirect.com/science/article/pii/S0925231205001785} -} - -@article{ZhouLi2014, - title = {Regularized matrix regression}, - author = {Zhou, Hua and Li, Lexin}, - year = {2014}, - journal = {Journal of the Royal Statistical Society. Series B (Statistical Methodology)}, - volume = {76}, - number = {2}, - pages = {463--483}, - publisher = {[Royal Statistical Society, Wiley]} -} - -@article{ZhouLiZhu2013, - title = {Tensor regression with applications in neuroimaging data analysis}, - author = {Zhou, H. and Li, L. and Zhu, H.}, - year = {2013}, - journal = {Journal of the American Statistical Association}, - volume = {108}, - pages = {540-552}, - issue = {502} + number = {1}, + pages = {224-231}, + title = {{(2D)2PCA}: Two-directional two-dimensional {PCA} for efficient face representation and recognition}, + url = {https://www.sciencedirect.com/science/article/pii/S0925231205001785}, + volume = {69}, + year = {2005} } @article{ZhouEtAl2023, -author = {Jie Zhou, Will Wei Sun, Jingfei Zhang and Lexin Li}, -title = {Partially Observed Dynamic Tensor Response Regression}, -journal = {Journal of the American Statistical Association}, -volume = {118}, -number = {541}, -pages = {424-439}, -year = {2023}, -publisher = {Taylor & Francis}, -doi = {10.1080/01621459.2021.1938082}, -URL = { https://doi.org/10.1080/01621459.2021.1938082 -}, -eprint = { https://doi.org/10.1080/01621459.2021.1938082} + author = {Jie Zhou, Will Wei Sun, Jingfei Zhang and Lexin Li}, + doi = {10.1080/01621459.2021.1938082}, + journal = {Journal of the American Statistical Association}, + number = {541}, + pages = {424-439}, + publisher = {Taylor & Francis}, + title = {Partially Observed Dynamic Tensor Response Regression}, + volume = {118}, + year = {2023} +} + +@article{ZhouLi2014, + author = {Zhou, Hua and Li, Lexin}, + journal = {Journal of the Royal Statistical Society. Series B (Statistical Methodology)}, + number = {2}, + pages = {463--483}, + publisher = {[Royal Statistical Society, Wiley]}, + title = {Regularized matrix regression}, + volume = {76}, + year = {2014} +} + +@article{ZhouLiZhu2013, + author = {Zhou, H. and Li, L. and Zhu, H.}, + issue = {502}, + journal = {Journal of the American Statistical Association}, + pages = {540-552}, + title = {Tensor regression with applications in neuroimaging data analysis}, + volume = {108}, + year = {2013} } @article{ZouChen2012, - title = {On the consistency of coordinate-independent sparse estimation with BIC}, author = {Zou, Changliang and Chen, Xin}, - year = {2012}, journal = {Journal of Multivariate Analysis}, - volume = {112}, number = {C}, - pages = {248-255} + pages = {248-255}, + title = {On the consistency of coordinate-independent sparse estimation with BIC}, + volume = {112}, + year = {2012} } diff --git a/LaTeX/paper.tex b/LaTeX/paper.tex index c0ec5de..bdf9fb3 100644 --- a/LaTeX/paper.tex +++ b/LaTeX/paper.tex @@ -281,23 +281,45 @@ \section{Introduction} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -In Statistics, tensors are a mathematical tool to represent data of complex structure. In this paper, \textit{tensors} are considered as a generalization of matrices to higher dimensions: A tensor is a multi-dimensional array of numbers. For example, a second-order tensor can be represented as a matrix, while a third-order tensor can be represented as a cube of matrices. +Tensors are a mathematical tool to represent data of complex structure in statistics. \textit{Tensors} are considered as a generalization of matrices to higher dimensions: A tensor is a multi-dimensional array of numbers. For example, a second-order tensor can be represented as a matrix, while a third-order tensor can be represented as a cube of matrices. -Complex data are collected at different times and/or under several conditions often involving a large number of multi-indexed variables represented as tensor-valued data \parencite{KoldaBader2009}. They occur in large-scale longitudinal studies \parencite[e.g.][]{Hoff2015}, in agricultural experiments and chemometrics and spectroscopy \parencite[e.g.][]{LeurgansRoss1992,Burdick1995}. Also, in signal and video processing where sensors produce multi-indexed data, e.g. over spatial, frequency, and temporal dimensions \parencite[e.g.][]{DeLathauwerCastaing2007,KofidisRegalia2005}, in telecommunications \parencite[e.g.][]{DeAlmeidaEtAl2007}. Other examples of multiway data include 3D images of the brain, where the modes are the 3 spatial dimensions, and spatio-temporal weather imaging data, a set of image sequences represented as 2 spatial modes and 1 temporal mode. +Complex data are collected at different times and/or under several conditions often involving a large number of multi-indexed variables represented as tensor-valued data \parencite{KoldaBader2009}. They occur in large-scale longitudinal studies \parencite[e.g.][]{Hoff2015}, in agricultural experiments and chemometrics and spectroscopy \parencite[e.g.][]{LeurgansRoss1992,Burdick1995}, in signal and video processing where sensors produce multi-indexed data, e.g. over spatial, frequency, and temporal dimensions \parencite[e.g.][]{DeLathauwerCastaing2007,KofidisRegalia2005}, and in telecommunications \parencite[e.g.][]{DeAlmeidaEtAl2007}. Other examples of multiway data include 3D images of the brain, where the modes are the 3 spatial dimensions, and spatio-temporal weather imaging data, a set of image sequences represented as 2 spatial modes and 1 temporal mode. -\begin{itemize} - \item Review \cite{ZhouLiZhu2013} and see how you compare with them. They focus on the forward regression model with a scalar response but they claim that "Exploit- ing the array structure in imaging data, the new method substantially reduces the dimensionality of imaging data, which leads to efficient estimation and prediction." - \item Read \cite{ZhouEtAl2023} to figure out the distribution they use for the tensor-valued predictors and briefly describe what they do. - \item Read \cite{RabusseauKadri2016} to figure out what they do. They seem to draw both the response and the predictors from tensor-normal with iid N(0,1) entries: "In order to leverage the tensor structure of the output data, we formulate the problem as the minimization of a least squares criterion subject to a multilinear rank constraint on the regression tensor. The rank constraint enforces the model to capture low-rank structure in the outputs and to explain dependencies between inputs and outputs in a low-dimensional multilinear subspace." - \item -\end{itemize} +% \begin{itemize} +% \item Review \cite{ZhouLiZhu2013} and see how you compare with them. They focus on the forward regression model with a scalar response but they claim that "Exploiting the array structure in imaging data, the new method substantially reduces the dimensionality of imaging data, which leads to efficient estimation and prediction." +% \item Read \cite{ZhouEtAl2023} to figure out the distribution they use for the tensor-valued predictors and briefly describe what they do. +% \item Read \cite{RabusseauKadri2016} to figure out what they do. They seem to draw both the response and the predictors from tensor-normal with iid N(0,1) entries: "In order to leverage the tensor structure of the output data, we formulate the problem as the minimization of a least squares criterion subject to a multilinear rank constraint on the regression tensor. The rank constraint enforces the model to capture low-rank structure in the outputs and to explain dependencies between inputs and outputs in a low-dimensional multilinear subspace." +% \end{itemize} + +% - RabusseauKadri2016 Y | x for tensor Y with vector x (HOLRR; Higher Order Low-Rank Regression) +% - LiZhang2017 Y | x for tensoe Y with vector x (envelope model) +% - ZhouEtAl2023 Y | x for tensor Y with vector x (sparse and partially observed) + +% - ZhouLiZhu2013 y\in R (GLM) for y | Z, X for tensor X +% - HaoEtAl2021 y\in R for y | X for tensor X (sparse, element wise B-splines) + +% Tensor regression models have been proposed to exploit the special structure of tensor covariates, e.g. \cite{HaoEtAl2021,ZhouLiZhu2013}, or tensor responses \cite{RabusseauKadri2016,LiZhang2017,ZhouEtAl2023} \cite{HaoEtAl2021} modeled a scalar response as a flexible nonparametric function of tensor covariates. \cite{ZhouLiZhu2013} assume the scalar response has a distribution in the exponential family given the tensor-valued predictors and model the link function as a multilinear function of the predictors. \cite{LiZhang2017} model the tensor-valued response as tensor normal. Rather than using $L_1$ type penalty functions to induce sparsity, they employ the envelope method (Cook, Li, and Chiaromonte Citation2010) to estimate the unknown regression coefficient. Moreover, the envelope method essentially identifies and uses the material information jointly. They develop an estimation algorithm and study the asymptotic properties of the estimator. the scalar response as These models try to utilize the sparse and low-rank structures in the tensors -- either in the regression coefficient tensor or the response tensor -- to boost performance on the regression task by reducing the number of free parameters. + +Tensor regression models have been proposed to leverage the structure inherent in tensor valued data. For instance, \textcite{HaoEtAl2021,ZhouLiZhu2013} focus on tensor covariates, while \textcite{RabusseauKadri2016,LiZhang2017,ZhouLiZhu2013} focus on tensor responses, and \textcite{Hoff2015,Lock2018} consider tensor on tensor regression. \textcite{HaoEtAl2021} modeled a scalar response as a flexible nonparametric function of tensor covariates. \textcite{ZhouLiZhu2013} assume the scalar response has a distribution in the exponential family given the tensor-valued predictors with the link modeled as a multilinear function of the predictors. \textcite{RabusseauKadri2016} model the tensor-valued response as a linear model with tensor valued regression coefficients subject to a multilinear rank constraint. \textcite{LiZhang2017} approach the problem with a similar linear model but instead of a low rank constraint the error term is assumed to have a separable Kronecker product structure while using a generalization of the envelope model \parencite{CookLiChiaromonte2010}. \textcite{ZhouEtAl2023} focus on partially observed tensor response given vector-valued predictors with mode-wise sparsity constraints in the regression coefficients. \textcite{Hoff2015} extends an existing bilinear regression model to a tensor on tensor of conformable modes and dimensions regression model based on a Tucker product. \textcite{Lock2018} uses a tensor contraction to build a penalized least squares model for a tensor with arbitrary number of modes and dimensions. + +Our approach considers the general regression problem of fitting a response of general form (univariate, multivariate, tensor-valued) on a tensor-value predictor. We operate in the context of sufficient dimension reduction \parencite[e.g.]{Cook1998,Li2018} based on inverse regression, which leads us to regressing the tensor-valued predictor on the response. In our setting, this necessitates transforming the response to tensor-valued functions, regardless of whether it is itself tensor-valued. Because of the setting, our method shares commonalities with the tensor regression models referred to above, yet the modeling and methodology are novel. +Specifically, our tensor-to-tensor regression model is a generalized multi-linear model similar to the generalized linear model of \cite{ZhouLiZhu2013}. % but with tensor valued response by applying (a known) tensor valued function to the response in an inverse regression setting, reversing the role of response and predictors. +To bypass the explosion of number of parameters to estimate, we assume the inverse regression error covariance has Kronecker product structure as do \textcite{LiZhang2017}. Our maximum likelihood-based estimation does not require any penalty terms in contrast to the least squares and/or sparse approaches \cite{????} . In the case of a tensor (multilinear) normal, given the tensor-valued function of the response, our model exhibits similarities to the multilinear modeling of \textcite{Hoff2015}, but we use a generalized multilinear model and estimate the parameters with maximum likelihood instead of least squares. Moreover, a common issue in multilinear tensor regression models is the unidentifiability of the parameters, which we address in a completely different manner. For example, \cite{LiZhang2017} develop theory that is based on orthogonal projection matrices to uniquely identify a subspace, while our approach is more general as it uses manifold theory. -Tensor regression models have been proposed to exploit the special structure of tensor covariates, e.g. \cite{HaoEtAl2021,ZhouLiZhu2013}, or tensor responses \cite{RabusseauKadri2016,LiZhang2017,ZhouEtAl2023} \cite{HaoEtAl2021} modeled a scalar response as a flexible nonparametric function of tensor covariates. \cite{ZhouLiZhu2013} assume the scalar response has a distribution in the exponential family given the tensor-valued predictors and model the link function as a multilinear function of the predictors. \cite{LiZhang2017} model the tensor-valued response as tensor normal. Rather than using $L_1$ type penalty functions to induce sparsity, they employ the envelope method (Cook, Li, and Chiaromonte Citation2010) to estimate the unknown regression coefficient. Moreover, the envelope method essentially identifies and uses the material information jointly. They develop an estimation algorithm and study the asymptotic properties of the estimator. the scalar response as These models try to utilize the sparse and low-rank structures in the tensors – either in the regression coefficient tensor or the response tensor – to boost performance on the regression task by reducing the number of free parameters. +In this paper we present a model-based \emph{Sufficient Dimension Reduction} (SDR) method for tensor-valued data with distribution in the quadratic exponential family assuming a separable Kronecker product structure of the first and second moment. By generalizing the parameter space to embedded manifolds we obtain consistency and asymptotic normality results while allowing great modeling flexibility in the linear sufficient dimension reduction. + +The quadratic exponential family contains the tensor normal and the tensor Ising distributions, for continuous and binary tensor-valued random variables, respectively. Multilinear tensor normal models have been used in various applications, including medical imaging \parencite{BasserPajevic2007,DrydenEtAl2009}, spatio-temporal data analysis \parencite{GreenewaldHero2014}, regression analysis for longitudinal relational data \parencite{Hoff2015}. One of the most important uses of the multilinear normal (MLN) distribution, and hence tensor analysis, is perhaps in magnetic resonance imaging (MRI) \parencite{OhlsonEtAl2013}. A recent survey \parencite{WangEtAl2022} and references therein contain more information and potential applications of multilinear tensor normal models. -In this paper we present a model-based \emph{Sufficient Dimension Reduction} (SDR) method for tensor-valued data with distribution in the quadratic exponential family assuming a Kronecker product structure of the first and second moment. By generalizing the parameter space to embedded manifolds we obtain consistency and asymtotic normality results while allowing great modeling flexibility in the linear sufficient dimension reduction. +The Ising model for multivariate binary outcomes belongs to the class of discrete exponential families. Its defining feature is that the sufficient statistic involves a quadratic term to capture correlations arising from pairwise interactions. +The tensor Ising model is a higher-order Ising model for tensor-valued binary outcomes. +%From \cite{MukherjeeEtAl2020} +Higher-order Ising models arise naturally in the study of multi-atom interactions in lattice gas models, such as the square-lattice eight-vertex model, the Ashkin-Teller model, and Suzuki's pseudo-3D anisotropic model (cf. [6, 33, 36, 37, 49, 55, 56, 61, 62] and the references therein). More recently, higher-order spin systems have also been proposed for modeling peer-group effects in social networks [22]. \textcite{MukherjeeEtAl2020} proposed a maximum pseudo-likelihood estimation algorithm for a one-parameter tensor-Ising model. \efi{Daniel: comment on what these guys do and contrast with your setting} In our approach, the parameter is not constrained to be scalar +We derive maximum likelihood estimates for all first and second order interactions and propose a gradient-based optimization algorithm. + +Our results in the framework of the quadratic exponential family for tensor-valued variables; i.e., consistency and asymptotic normality, apply to both tensor normal and tensor Ising models. The structure of this paper is as follows. In \cref{sec:notation} we introduce our notation. \Cref{sec:problem-formulation} decribes the exact problem and in \cref{sec:gmlm-model} we introduce our model. Continuing in \cref{sec:ml-estimation} we provide the basis for a general maximum likelihood estimation procedure and derive specialized methods for tensor normal as well as the tensor Ising distributions. \Cref{sec:manifolds} gives a short introduction into manifolds and provides the basis for applying the consistency and asymtotic normality results from \cref{sec:asymtotics}. Simulations for continuous and binary predicotrs are subject of \cref{sec:simulations}. Finally, in \cref{sec:data-analysis} we apply our model to EEG data and perform a prove of concept data analysis example where a chess board is interprated as a collection of binary $8\times 8$ matrices. @@ -359,7 +381,7 @@ where the vectorized quantities $\vec{\ten{X}}\in\mathbb{R}^p$ and $\vec\ten{F}( %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Problem Formulation}\label{sec:problem-formulation} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -Our goal is to infer the cumulative distribution function (cdf) $F$ of $Y\mid \ten{X}$, where $\ten{X}$ is assumed to admit $r$-tensor structure of dimension $p_1\times ... \times p_r$ with continuous or discrete entries and the response $Y$ is unconstrained. The predictor $\ten{X}$ is a complex object; to simplify the problem we assume there exists a tensor valued function of lower dimension $\ten{R}:\ten{X}\mapsto \ten{R}(\ten{X})$ such that +Our goal is to infer the cumulative distribution function (cdf) $F$ of $Y\mid \ten{X}$, where $\ten{X}$ is assumed to admit $r$-tensor structure of dimension $p_1\times ... \times p_r$ with continuous or discrete entries, and the response $Y$ is unconstrained. The predictor $\ten{X}$ is a complex object; to simplify the problem we assume there exists a tensor-valued function of lower dimension $\ten{R}:\ten{X}\mapsto \ten{R}(\ten{X})$ such that \begin{displaymath} F(Y\mid \ten{X}) = F(Y\mid \ten{R}(\ten{X})). \end{displaymath} @@ -379,7 +401,7 @@ f_{\mat{\eta}_y}(\ten{X}\mid Y = y) &= h(\ten{X})\exp(\t{\mat{\eta}_y}\mat{t}(\ten{X}) - b(\mat{\eta}_y)) \nonumber \\ &= h(\ten{X})\exp(\langle \mat{t}_1(\ten{X}), \mat{\eta}_{1y} \rangle + \langle \mat{t}_2(\ten{X}), \mat{\eta}_{2y} \rangle - b(\mat{\eta}_{y})) \label{eq:quad-density} \end{align} -where $\mat{t}_1(\ten{X})=\vec \ten{X}$ and $\mat{t}_2(\ten{X})$ is linear in $\ten{X}\circ\ten{X}$. The dependence of $\ten{X}$ on $Y$ is fully captured in the natural parameter $\mat{\eta}_y$. The function $h$ is non-negative real-valued and $b$ is assumed to be at least twice continuously differentiable and strictly convex. An important feature of the \emph{quadratic exponential family} is that the distribution of its members is fully characterized by their first two moments. Distributions within the quadratic exponential family include the \emph{tensor normal} (\cref{sec:tensor-normal-estimation}) and \emph{tensor Ising model} (\cref{sec:ising_estimation}, a generalization of the (inverse) Ising model which is multi-variate Bernoulli with up to second order interactions) and mixtures of these two. +where $\mat{t}_1(\ten{X})=\vec \ten{X}$ and $\mat{t}_2(\ten{X})$ is linear in $\ten{X}\circ\ten{X}$. The dependence of $\ten{X}$ on $Y$ is fully captured in the natural parameter $\mat{\eta}_y$. The function $h$ is non-negative real-valued and $b$ is assumed to be at least twice continuously differentiable and strictly convex. An important feature of the \emph{quadratic exponential family} is that the distribution of its members is fully characterized by their first two moments. Distributions within the quadratic exponential family include the \emph{tensor normal} (\cref{sec:tensor-normal-estimation}) and \emph{tensor Ising model} (\cref{sec:ising_estimation}, a generalization of the (inverse)\footnote{\todo{}} Ising model which is multi-variate Bernoulli with up to second order interactions) and mixtures of these two. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{The Generalized Multi-Linear Model}\label{sec:gmlm-model} @@ -440,7 +462,7 @@ Under the quadratic exponential family model \eqref{eq:quadratic-exp-fam}, a su The reduction in vectorized form is $\vec\ten{R}(\ten{X})=\t{\mat{B}}\vec(\ten{X} - \E\ten{X})$, where $\mat{B} = \bigotimes_{k = r}^{1}\mat{\beta}_k$ with $\Span(\mat{B}) = \Span(\{\mat{\eta}_{1y} - \E_{Y}\mat{\eta}_{1Y} : y\in\mathcal{S}_Y\})$, using $\mathcal{S}_Y$ to denote the set of values of the random variable $Y$. -\cref{thm:sdr} obtains that the \emph{sufficient reduction} $\ten{R}(\ten{X})$ reduces $\ten{X}$ along each dimension linearly. The graph in \cref{fig:SDRvisual} is a visual depiction of the sufficient reduction. +\cref{thm:sdr} obtains the \emph{sufficient reduction} $\ten{R}(\ten{X})$ reduces $\ten{X}$ along each dimension linearly. The graph in \cref{fig:SDRvisual} is a visual depiction of the sufficient reduction. \begin{figure} \centering @@ -464,7 +486,7 @@ The reduction in vectorized form is $\vec\ten{R}(\ten{X})=\t{\mat{B}}\vec(\ten{X \end{example} \begin{example}[Matrix valued $\mat{X}$ ($r = 2$)] - Assuming $\mat{X}$ to be matrix valued, that is $r = 2$, $\mat{\theta} = (\overline{\mat{\eta}}, \mat{\beta}_1, \mat{\beta}_2, \mat{\Omega}_1, \mat{\Omega}_2)$, where the intercept term $\overline{\mat{\eta}}\in\mathbb{R}^{p_1\times p_2}$ is now matrix valued. Similar to \cref{ex:vector-dist} with $\mat{F}_y\in\mathbb{R}^{q_1\times q_2}$ being matrix valued, the conditional density of $\mat{X}\mid Y = y$ reads + Assuming $\mat{X}$ to be matrix valued, that is $r = 2$, $\mat{\theta} = (\overline{\mat{\eta}}, \mat{\beta}_1, \mat{\beta}_2, \mat{\Omega}_1, \mat{\Omega}_2)$, where the intercept term $\overline{\mat{\eta}}\in\mathbb{R}^{p_1\times p_2}$ is now matrix valued. Similar to \cref{ex:vector-dist} with $\mat{F}_y\in\mathbb{R}^{q_1\times q_2}$ being matrix valued, the conditional density of $\mat{X}\mid Y = y$ is \begin{align*} f_{\mat{\theta}}(\mat{X}\mid Y = y) &= h(\mat{X})\exp(\langle\vec{\mat{X}}, \mat{\eta}_{1y}(\mat{\theta})\rangle + \langle\vec(\mat{X}\circ\mat{X}), \mat{\eta}_2(\mat{\theta})\rangle - b(\mat{\eta}_y(\mat{\theta}))) \\ @@ -502,7 +524,8 @@ A straightforward and general method for parameter estimation is \emph{gradient If $\mat{T}_2$ is the identity matrix $\mat{I}_{p(p + 1) / 2}$, then $\ten{G}_2(\mat{\eta}_y) = \ten{g}_2(\mat{\eta}_y)$. \end{theorem} -Although the general case of any GMLM model can be fitted via gradient descent using \cref{thm:grad}, this may be very inefficient. In \cref{thm:grad}, $\mat{T}_2$ can be used to introduce flexible second moment structures. For example, it allows modeling effects differently for predictor components, as described in \cref{sec:ising_estimation} after Eqn. \eqref{eq:ising-cond-prob}. In the remainder, we focus on $\mat{T}_2$'s that are identity matrices. This approach simplifies the estimation algorithm and the speed of the numerical calculation in the case of tensor normal predictors. In the case of the tensor normal distribution, an iterative cyclic updating scheme is derived in \cref{sec:tensor-normal-estimation}, which has much faster convergence, is stable and does not require hyper parameters, as will be discussed later. On the other hand, the Ising model does not allow such a scheme. There we need to use a gradient based method, which is the subject of \cref{sec:ising_estimation}. +Although the general case of any GMLM model can be fitted via gradient descent using \cref{thm:grad}, this may be very inefficient. In \cref{thm:grad}, $\mat{T}_2$ can be used to introduce flexible second moment structures. For example, it allows modeling effects differently for predictor components, as described in \cref{sec:ising_estimation} after Eqn. \eqref{eq:ising-cond-prob}. In the remainder, we focus on $\mat{T}_2$'s that are identity matrices. This approach simplifies the estimation algorithm and the speed of the numerical calculation in the case of tensor normal predictors. % In the case of the tensor normal distribution, +An iterative cyclic updating scheme is derived in \cref{sec:tensor-normal-estimation}, which has much faster convergence, is stable and does not require hyperparameters, as will be discussed later. On the other hand, the Ising model does not allow such a scheme. There we need to use a gradient-based method, which is the subject of \cref{sec:ising_estimation}. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{Tensor Normal}\label{sec:tensor-normal-estimation} @@ -512,7 +535,14 @@ representing a vector of observations \parencite{OhlsonEtAl2013}. \textcite{Koll The defining feature of the matrix normal distribution, and its tensor extension, is the Kronecker product structure of its covariance. This formulation, where the covariates are multivariate normal with multiway covariance structure modeled as a Kronecker product of matrices of much lower dimension, aims to overcome the significant modeling and computational challenges arising from the high computational complexity of manipulating tensor representations \parencite[see, e.g.,][]{HillarLim2013,WangEtAl2022}. -% Multilinear tensor normal models have been used in various applications, including medical imaging \parencite{BasserPajevic2007,DrydenEtAl2009}, spatio-temporal data analysis \parencite{GreenewaldHero2014}, regression analysis for longitudinal relational data \parencite{Hoff2015}. One of the most important uses of the multilinear normal (MLN) distribution, and hence tensor analysis, is perhaps in magnetic resonance imaging (MRI) \parencite{OhlsonEtAl2013}. A recent survey \parencite{WangEtAl2022} and references therein contain more information and potential applications of multilinear tensor normal models. +Multilinear tensor normal %Kronecker-separable covariance +models have been used in various applications, including +medical imaging \parencite{BasserPajevic2007,DrydenEtAl2009}, spatio-temporal data analysis \parencite{GreenewaldHero2014}, regression analysis +for longitudinal relational data \parencite{Hoff2015}. +%, radar [AFJ10], and multiple-input-multiple-output (MIMO) communications [WJS08]. +One of the most important uses of the multilinear normal (MLN) distribution, and hence tensor analysis, is perhaps in magnetic resonance imaging (MRI) \parencite{OhlsonEtAl2013}. +A recent survey \parencite{WangEtAl2022} and references therein contain more information and potential applications of multilinear tensor normal models. +%The first occurrence of the \textit{matrix normal} we found, even though not explicitly called as such, was in \textcite{SrivastavaKhatri1979}. Suppose $\ten{X}\mid Y = y$ follows a tensor normal distribution with mean $\ten{\mu}_y$ and covariance $\mat{\Sigma} = \bigkron_{k = r}^{1}\mat{\Sigma}_k$. We assume the distribution is non-degenerate which means that the covariances $\mat{\Sigma}_k$ are symmetric positive definite matrices. Its density is given by \begin{displaymath} @@ -528,7 +558,7 @@ where we used that $\overline{\ten{\eta}} = 0$ due to $0 = \E\ten{X} = \E\E[\ten \ten{g}_1(\mat{\eta}_y) = \E[\ten{X}\mid Y = y] = \ten{\mu}_y, \qquad \ten{G}_2(\mat{\eta}_y) = \ten{g}_2(\mat{\eta}_y) = \E[\ten{X}\circ\ten{X}\mid Y = y] \equiv \bigkron_{k = r}^1\mat{\Sigma}_k + (\vec{\ten{\mu}}_y)\t{(\vec{\ten{\mu}}_y)}. \end{displaymath} -In practice, we assume we have a random sample of $n$ observations $(\ten{X}_i, \ten{F}_{y_i})$ from the joint distribution. We start the estimation process by demeaning them. Then, only the reduction matrices $\mat{\beta}_k$ and the scatter matrices $\mat{\Omega}_k$ need to be estimated. To solve the optimization problem \eqref{eq:mle}, with $\overline{\ten{\eta}} = 0$ we initialize the parameters using a simple heuristic approach. % For initial estimates $\hat{\mat{\beta}}_k^{(0)}$ we +In practice, we assume we have a random sample of $n$ observations $(\ten{X}_i, \ten{F}_{y_i})$ from the joint distribution. We start the estimation process by demeaning the data. Then, only the reduction matrices $\mat{\beta}_k$ and the scatter matrices $\mat{\Omega}_k$ need to be estimated. To solve the optimization problem \eqref{eq:mle}, with $\overline{\ten{\eta}} = 0$ we initialize the parameters using a simple heuristic approach. % For initial estimates $\hat{\mat{\beta}}_k^{(0)}$ we First, we compute moment based mode-wise marginal covariance estimates $\widehat{\mat{\Sigma}}_k(\ten{X})$ and $\widehat{\mat{\Sigma}}_k(\ten{F}_Y)$ as \begin{displaymath} \widehat{\mat{\Sigma}}_k(\ten{X}) = \frac{1}{n}\sum_{i = 1}^{n} (\ten{X}_i)_{(k)}\t{(\ten{X}_i)_{(k)}}, \qquad @@ -538,7 +568,7 @@ Then, for every mode $k = 1, \ldots, r$, we compute the first $j = 1, \ldots, q_ \begin{align*} \mat{U}_k &= (\mat{v}_1(\widehat{\mat{\Sigma}}_1(\ten{X})), \ldots, \mat{v}_{q_k}(\widehat{\mat{\Sigma}}_{q_k}(\ten{X}))), \\ \mat{D}_k &= \diag(\mat{v}_1(\widehat{\mat{\Sigma}}_1(\ten{X}))\mat{v}_1(\widehat{\mat{\Sigma}}_1(\ten{F}_{Y})), \ldots, \mat{v}_{q_k}(\widehat{\mat{\Sigma}}_{q_k}(\ten{X}))\mat{v}_{q_k}(\widehat{\mat{\Sigma}}_k(\ten{F}_{Y}))), \\ - \mat{V}_k &= (\mat{v}_1(\widehat{\mat{\Sigma}}_1(\ten{F}_Y), \ldots, \mat{v}_{q_k}(\widehat{\mat{\Sigma}}_{q_k}(\ten{F}_Y)). \\ + \mat{V}_k &= (\mat{v}_1(\widehat{\mat{\Sigma}}_1(\ten{F}_Y), \ldots, \mat{v}_{q_k}(\widehat{\mat{\Sigma}}_{q_k}(\ten{F}_Y)). \end{align*} The initial value of $\mat{\beta}_k$ is \begin{displaymath} @@ -561,7 +591,7 @@ Given $\hat{\mat{\beta}}_1, \ldots, \hat{\mat{\beta}}_r, \hat{\mat{\Omega}}_1, \hat{\mat{\Omega}}_j. \end{equation} %For the scatter matrices $\mat{\Omega}_j$, we need to fudge a bit. -Equating the partial gradient of the $j$th scatter matrix $\mat{\Omega}_j$ in \cref{thm:grad} to zero ( $\nabla_{\mat{\Omega}_j}l_n = 0$) gives a quadratic matrix equation. This is due to the dependence of $\ten{\mu}_y$ on $\mat{\Omega}_j$. In practice though, it is faster, more stable, and equally accurate to use mode-wise covariance estimates via the residuals +Equating the partial gradient of the $j$th scatter matrix $\mat{\Omega}_j$ in \cref{thm:grad} to zero ( $\nabla_{\mat{\Omega}_j}l_n = 0$) gives a quadratic matrix equation due to the dependence of $\ten{\mu}_y$ on $\mat{\Omega}_j$. In practice though, it is faster, more stable, and equally accurate to use mode-wise covariance estimates via the residuals \begin{displaymath} \hat{\ten{R}}_i = \ten{X}_i - \hat{\ten{\mu}}_{y_i} = \ten{X}_i - \ten{F}_{y_i}\mlm_{k = 1}^{r}\hat{\mat{\Omega}}_k^{-1}\hat{\mat{\beta}}_k. \end{displaymath} @@ -569,7 +599,7 @@ The estimates are computed via \begin{displaymath} \tilde{\mat{\Sigma}}_j = \sum_{i = 1}^n (\hat{\ten{R}}_i)_{(j)} \t{(\hat{\ten{R}}_i)_{(j)}}, \end{displaymath} -where $\tilde{s}\tilde{\mat{\Sigma}}_j = \hat{\mat{\Omega}}_j^{-1}$. For scaling we use that the mean squared error has to be equal to the trace of the covariance estimate, +where $\tilde{s}\tilde{\mat{\Sigma}}_j = \hat{\mat{\Omega}}_j^{-1}$. To decide on the scaling factor $\tilde{s}$ we use that the mean squared error has to be equal to the trace of the covariance estimate, \begin{displaymath} \frac{1}{n}\sum_{i = 1}^n \langle \hat{\ten{R}}_i, \hat{\ten{R}}_i \rangle = \tr\bigkron_{k = r}^{1}\hat{\mat{\Omega}}_k^{-1} = \prod_{k = 1}^{r}\tr{\hat{\mat{\Omega}}_k^{-1}} = \tilde{s}^r\prod_{k = 1}^{r}\tr{\tilde{\mat{\Sigma}}_k}, \end{displaymath} @@ -580,43 +610,23 @@ so that resulting in the estimates $\hat{\mat{\Omega}}_j = (\tilde{s}\tilde{\mat{\Sigma}}_j)^{-1}$. Estimation is then performed by updating the estimates $\hat{\mat{\beta}}_j$ via \eqref{eq:tensor_normal_beta_solution} for $j = 1, \ldots, r$, and then recompute the $\hat{\mat{\Omega}}_j$ estimates simultaneously keeping the $\hat{\mat{\beta}}_j$'s fixed. This procedure is repeated until convergence. % Convergence is very fast, experiments showed that convergence occures usualy in less than $10$ iterations. -A technical detail for numerical stability is to ensure that the scaled values $\tilde{s}\tilde{\mat{\Sigma}}_j$, assumed to be symmetric and positive definite, are well conditioned. Thus, we estimate the condition number of $\tilde{s}\tilde{\mat{\Sigma}}_j$ prior to computing the inverse. In case of ill- conditioning, we use the regularized $\hat{\mat{\Omega}}_j = (\tilde{s}\tilde{\mat{\Sigma}}_j + 0.2 \lambda_{1}(\tilde{s}\tilde{\mat{\Sigma}}_j)\mat{I}_{p_j})^{-1}$ instead, where $\lambda_{1}(\tilde{s}\tilde{\mat{\Sigma}}_j)$ is the first (maximum) eigenvalue. Experiments showed that this regularization is usually only required in the first few iterations. +A technical detail for numerical stability is to ensure that the scaled values $\tilde{s}\tilde{\mat{\Sigma}}_j$, assumed to be symmetric and positive definite, are well conditioned. Thus, we estimate the condition number of $\tilde{s}\tilde{\mat{\Sigma}}_j$ before computing the inverse. In case of ill-conditioning, we use the regularized $\hat{\mat{\Omega}}_j = (\tilde{s}\tilde{\mat{\Sigma}}_j + 0.2 \lambda_{1}(\tilde{s}\tilde{\mat{\Sigma}}_j)\mat{I}_{p_j})^{-1}$ instead, where $\lambda_{1}(\tilde{s}\tilde{\mat{\Sigma}}_j)$ is the first (maximum) eigenvalue. Experiments showed that this regularization is usually only required in the first few iterations. -Furthermore, if the parameter space follows a more general setting as in \cref{thm:param-manifold}, updating may produces estimates outside the parameter space. A simple and efficient method is to project every updated estimate onto the corresponding manifold. +Furthermore, if the parameter space follows a more general setting as in \cref{thm:param-manifold}, updating may produce estimates outside the parameter space. A simple and efficient method is to project every updated estimate onto the corresponding manifold. A standard method to compute the MLE of a Kronecker product is block-coordinate descent, also referred to as the ``flip-flop algorithm.'' This algorithm was proposed independently by \textcite{MardiaGoodall1993} and \textcite{Dutilleul1999} and was later called ``flip-flop'' algorithm by \textcite{LuZimmerman2005} for the computation of the maximum likelihood estimators of the components of a separable covariance matrix. \textcite{ManceurDutilleul2013} extended the ``flip-flop'' algorithm for the computation of the MLE of the separable covariance structure of a 3-way and 4-way normal distribution and obtained a lower bound for the sample size required for its existence. The same issue was also studied by \textcite{DrtonEtAl2020} in the case of a two-way array (matrix). Our algorithm uses a similar “flip-flop” approach by iteratively updating the $\mat{\beta}_k$'s and $\mat{\Omega}_k$'s, one after the other. -\subsubsection{Matrix and Tensor Normal} - -The tensor normal model is the extension of the matrix normal to tensor-valued random variables and a member of the quadratic exponential family \eqref{eq:quadratic-exp-fam} under \eqref{eq:eta2}. \textcite{Dawid1981,Arnold1981} introduced the term matrix normal and, in particular, \textcite{Arnold1981} provided several theoretical results, such as its density, moments and conditional distributions of its components. The matrix normal distribution is a bilinear normal distribution; a distribution of a two-way array, each component -representing a vector of observations \parencite{OhlsonEtAl2013}. \textcite{KolloVonRosen2005,Hoff2011,OhlsonEtAl2013} presented the extension of the bilinear to the multilinear normal distribution, what we call tensor normal in this paper, using a parallel extension of bilinear matrices to multilinear tensors \parencite{Comon2009}. - -The defining feature of the matrix normal distribution, and its tensor extension, is the Kronecker product structure of its covariance. This formulation, where the covariates are multivariate normal with multiway covariance structure modeled as a Kronecker -product of matrices of much lower dimension, aims to overcome the significant modeling and computational challenges arising from the -high computational complexity of manipulating tensor representations \parencite[see, e.g.,][]{HillarLim2013,WangEtAl2022}. - -Multilinear tensor normal %Kronecker-separable covariance -models have been used in various applications, including -medical imaging \parencite{BasserPajevic2007,DrydenEtAl2009}, spatio-temporal data analysis \parencite{GreenewaldHero2014}, regression analysis -for longitudinal relational data \parencite{Hoff2015}. -%, radar [AFJ10], and multiple-input-multiple-output (MIMO) communications [WJS08]. -One of the most important uses of the multilinear normal (MLN) distribution, and hence tensor analysis, is perhaps in magnetic resonance imaging (MRI) \parencite{OhlsonEtAl2013}. -A recent survey \parencite{WangEtAl2022} and references therein contain more information and potential applications of multilinear tensor normal models. -%The first occurrence of the \textit{matrix normal} we found, even though not explicitly called as such, was in \textcite{SrivastavaKhatri1979}. - - - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{Ising Model}\label{sec:ising_estimation} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -The Ising\footnote{Also known as the \emph{Lenz-Ising} model as the physical assumptions of the model where developed by both Lenz and Ising \parencite{Niss2005}. Ising gave a closed form solution for the 1-dimensional lattice, that is, a linear chain \parencite{Ising1925}.} model \parencite{Lenz1920,Ising1925,Niss2005} is a mathematical model originating in statistical physics to study ferromagnetism in a thermodynamic setting. It describes magentic dipoles (atomic ``spins'') which can take two states ($\pm 1$) while allowing two-way interactions between direct neighbours on a lattice, a discrete grid. The model assumes all elementary magnets to be the same, which translates to all having the same coupling strength (two-way interactions) governed by a single parameter relating to the temperature of the system. Nowadays, the Ising model, in its general form, allows for different coupling strength for every (symmetric) interaction as well as an external magnetic field acting on every magnetic dipole separately. A modern review is given by \textcite{NguyenEtAl2017}. +The Ising\footnote{Also known as the \emph{Lenz-Ising} model as the physical assumptions of the model where developed by both Lenz and Ising \parencite{Niss2005}. Ising gave a closed form solution for the 1-dimensional lattice, that is, a linear chain \parencite{Ising1925}.} model \parencite{Lenz1920,Ising1925,Niss2005} is a mathematical model originating in statistical physics to study ferromagnetism in a thermodynamic setting. It describes magentic dipoles (atomic ``spins'') which can take two states ($\pm 1$) while allowing two-way interactions between direct neighbours on a lattice, a discrete grid. The model assumes all elementary magnets to be the same, which translates to all having the same coupling strength (two-way interactions) governed by a single parameter relating to the temperature of the system. Nowadays, the Ising model, in its general form, allows for different coupling strength for every (symmetric) interaction as well as an external magnetic field acting on every magnetic dipole separately. A review is given by \textcite{NguyenEtAl2017}. In statistics, the Ising model is used to model multivariate binary data. That is, the states are ${0, 1}$ instead of $\pm 1$. It is related to a multitude of other models; \emph{Graphical Models} and \emph{Markov Random Fields} to describe conditional dependence \parencite{Lauritzen1996,WainwrightJordan2008,LauritzenRichardson2002}, \emph{Potts models} \parencite{Besag1974,ChakrabortyEtAl2022} which generalize the Ising model to multiple states, the \emph{multivariate Bernoulli distribution} \parencite{Whittaker1990,JohnsonEtAl1997,DaiDingWahba2013} considering also interactions (tree-way and higher), to give the most prominent. -The $p$-dimensional Ising model is a discrete probability distribution on the set of $p$-dimensional binary vectors $\mat{x}\in\{0, 1\}^p$ with pmf given by +The $p$-dimensional Ising model is a discrete probability distribution on the set of $p$-dimensional binary vectors $\mat{x}\in\{0, 1\}^p$ with probability mass function (pmf) given by \begin{displaymath} P_{\mat{\gamma}}(\mat{x}) = p_0(\mat{\gamma})\exp(\t{\vech(\mat{x}\t{\mat{x}})}\mat{\gamma}). \end{displaymath} @@ -624,20 +634,20 @@ The scaling factor $p_0(\mat{\gamma})\in\mathbb{R}_{+}$ ensures that $P_{\mat{\g \begin{equation}\label{eq:ising-partition-function} p_0(\mat{\gamma})^{-1} = \sum_{\mat{x}\in\{0, 1\}^p}\exp(\t{\vech(\mat{x}\t{\mat{x}})}\mat{\gamma}). \end{equation} -By an abuse of notation, we let $\mat{\gamma}_{j l}$ denote the element of $\mat{\gamma}$ corresponding to $\mat{x}_j\mat{x}_l$ in $\vech(\mat{x}\t{\mat{x}})$\footnote{Specifically, the element $\mat{\gamma}_{j l}$ of $\mat{\gamma}$ is a short hand for $\mat{\gamma}_{\iota(j, l)}$ with $\iota(j, l) = (\min(j, l) - 1)(2 p - \min(j, l)) / 2 + \max(j, l)$ mapping the matrix row index $j$ and column index $l$ to the corresponding half vectorization indices $\iota(j, l)$.}. The ``diagonal'' parameter $\mat{\gamma}_{j j}$ expresses the conditional log odds of $X_j = 1\mid X_{-j} = \mat{0}$, where the negative subscript in $X_{-j}$ describes the $p - 1$ dimensional vector $X$ with the $j$th element removed. The off diagonal entries $\mat{\gamma}_{j l}$, $j\neq l$, are equal to the conditional log odds of simultaneous occurrence $X_j = 1, X_l = 1 \mid X_{-j, -l} = \mat{0}$. More precisely, the conditional probabilities and the natural parameters are related via +Abusing notation, we let $\mat{\gamma}_{j l}$ denote the element of $\mat{\gamma}$ corresponding to $\mat{x}_j\mat{x}_l$ in $\vech(\mat{x}\t{\mat{x}})$\footnote{Specifically, the element $\mat{\gamma}_{j l}$ of $\mat{\gamma}$ is a short hand for $\mat{\gamma}_{\iota(j, l)}$ with $\iota(j, l) = (\min(j, l) - 1)(2 p - \min(j, l)) / 2 + \max(j, l)$ mapping the matrix row index $j$ and column index $l$ to the corresponding half vectorization indices $\iota(j, l)$.}. The ``diagonal'' parameter $\mat{\gamma}_{j j}$ expresses the conditional log odds of $X_j = 1\mid X_{-j} = \mat{0}$, where the negative subscript in $X_{-j}$ describes the $p - 1$ dimensional vector $X$ with the $j$th element removed. The off diagonal entries $\mat{\gamma}_{j l}$, $j\neq l$, are equal to the conditional log odds of simultaneous occurrence $X_j = 1, X_l = 1 \mid X_{-j, -l} = \mat{0}$. More precisely, the conditional probabilities and the natural parameters are related via \begin{align} \mat{\gamma}_{j j} &= \log\frac{P_{\mat{\gamma}}(X_j = 1\mid X_{-j} = \mat{0})}{1 - P_{\mat{\gamma}}(X_j = 1\mid X_{-j} = \mat{0})}, \nonumber \\ \mat{\gamma}_{j l} &= \log\frac{1 - P_{\mat{\gamma}}(X_j = 1\mid X_{-j} = \mat{0})P_{\mat{\gamma}}(X_l = 1\mid X_{-l} = \mat{0})}{P_{\mat{\gamma}}(X_j = 1\mid X_{-j} = \mat{0})P_{\mat{\gamma}}(X_l = 1\mid X_{-l} = \mat{0})}\frac{P_{\mat{\gamma}}(X_j = 1, X_l = 1\mid X_{-j, -l} = \mat{0})}{1 - P_{\mat{\gamma}}(X_j = 1, X_l = 1\mid X_{-j, -l} = \mat{0})} \label{eq:ising-two-way-log-odds}. \end{align} Conditional Ising models, incorporating the information of covariates $Y$ into the model, were considered by \textcite{ChengEtAl2014,BuraEtAl2022}. The direct way is to parameterize $\mat{\gamma} = \mat{\gamma}_y$ by the covariate $Y = y$ to model a conditional distribution $P_{\mat{\gamma}_y}(\mat{x}\mid Y = y)$. -We extend the conditional pmf by allowing the binary variables to be tensor valued; that is, we set $\mat{x} = \vec{\ten{X}}$, with dimension $p = \prod_{k = 1}^{r}p_k$ for $\ten{X}\in\{ 0, 1 \}^{p_1\times\cdots\times p_r}$. The tensor structure of $\ten{X}$ is accommodated by assuming Kronecker product constraints to the parameter vector $\mat{\gamma}_y$ in a similar fashion as for the tensor normal model. This means that we compare the pmf $P_{\mat{\gamma}_y}(\vec{\ten{X}} | Y = y)$ with the quadratic exponential family \eqref{eq:quadratic-exp-fam} with the natural parameters modeled by \eqref{eq:eta1} and \eqref{eq:eta2}. A detail to be considered is that the diagonal of $(\vec{\ten{X}})\t{(\vec{\ten{X}})}$ is equal to $\vec{\ten{X}}$. This gives the GMLM model as +We extend the conditional pmf by allowing the binary variables to be tensor valued; that is, we set $\mat{x} = \vec{\ten{X}}$, with dimension $p = \prod_{k = 1}^{r}p_k$ for $\ten{X}\in\{ 0, 1 \}^{p_1\times\cdots\times p_r}$. The tensor structure of $\ten{X}$ is accommodated by assuming Kronecker product constraints to the parameter vector $\mat{\gamma}_y$ in a similar fashion as for the tensor normal model. This means that we compare the pmf $P_{\mat{\gamma}_y}(\vec{\ten{X}} | Y = y)$ with the quadratic exponential family \eqref{eq:quadratic-exp-fam} with the natural parameters modeled by \eqref{eq:eta1} and \eqref{eq:eta2}. A detail to be considered is that the diagonal of $(\vec{\ten{X}})\t{(\vec{\ten{X}})}$ is equal to $\vec{\ten{X}}$, which results in the GMLM being expressed as \begin{align} P_{\mat{\gamma}_y}(\ten{X} \mid Y = y) &= p_0(\mat{\gamma}_y)\exp(\t{\vech((\vec{\ten{X}})\t{(\vec{\ten{X}})})}\mat{\gamma}_y) \nonumber \\ &= p_0(\mat{\gamma}_y)\exp\Bigl(\Bigl\langle \ten{X}, \ten{F}_y\mlm_{k = 1}^{r}\mat{\beta}_k \Bigr\rangle + \Bigl\langle\ten{X}\mlm_{k = 1}^{r}\mat{\Omega}_k, \ten{X}\Bigr\rangle\Bigr)\label{eq:ising-cond-prob} \end{align} -where we set $\overline{\ten{\eta}} = 0$ and $\mat{T}_2$ to the identity. This imposes an additional constraint to the model, the reason is that the diagonal elements of $\mat{\Omega} = \bigkron_{k = r}^{1}\mat{\Omega}_k$ take the role of $\overline{\ten{\eta}}$, although not fully. Having the diagonal of $\mat{\Omega}$ and $\overline{\ten{\eta}}$ handling the self interaction effects might lead to interference in the optimization routine. Another approach would be to use the $\mat{T}_2$ matrix to set the corresponding diagonal elements of $\mat{\Omega}$ to zero and let $\overline{\ten{\eta}}$ handle the self interaction effect. All of those approaches, namely setting $\overline{\ten{\eta}} = 0$, keeping $\overline{\ten{\eta}}$ or using $\mat{T}_2$, are theoretically solid and compatible with \cref{thm:grad,thm:param-manifold,thm:asymptotic-normality-gmlm}, assuming all axis dimensions $p_k$ are non-degenerate, that is $p_k > 1$ for all $k = 1, \ldots, r$. Regardless, under our modeling choice the relation between the natural parameters $\mat{\gamma}_y$ of the conditional Ising model and the GMLM parameters $\mat{\beta}_k$ and $\mat{\Omega}_k$ is +where we set $\overline{\ten{\eta}} = 0$ and $\mat{T}_2$ to the identity. This imposes an additional constraint to the model, the reason is that the diagonal elements of $\mat{\Omega} = \bigkron_{k = r}^{1}\mat{\Omega}_k$ take the role of $\overline{\ten{\eta}}$, although not fully. Having the diagonal of $\mat{\Omega}$ and $\overline{\ten{\eta}}$ handling the self interaction effects might lead to interference in the optimization routine. Another approach would be to use the $\mat{T}_2$ matrix to set the corresponding diagonal elements of $\mat{\Omega}$ to zero and let $\overline{\ten{\eta}}$ handle the self interaction effect. All of these approaches, namely setting $\overline{\ten{\eta}} = 0$, keeping $\overline{\ten{\eta}}$ or using $\mat{T}_2$, are theoretically solid and compatible with \cref{thm:grad,thm:param-manifold,thm:asymptotic-normality-gmlm}, assuming all axis dimensions $p_k$ are non-degenerate, that is $p_k > 1$ for all $k = 1, \ldots, r$. Regardless, under our modeling choice, the relation between the natural parameters $\mat{\gamma}_y$ of the conditional Ising model and the GMLM parameters $\mat{\beta}_k$ and $\mat{\Omega}_k$ is \begin{equation}\label{eq:ising-natural-params} % \t{\pinv{\mat{D}_p}}\mat{\gamma}_y % = \vec(\mat{\Omega} + \diag(\mat{B}\vec{\ten{F}_y})) @@ -648,41 +658,41 @@ where we set $\overline{\ten{\eta}} = 0$ and $\mat{T}_2$ to the identity. This i \end{equation} In contract to the tensor normal GMLM, the matrices $\mat{\Omega}_k$ are only required to be symmetric. More specifically, we require $\mat{\Omega}_k$, for $k = 1, \ldots, r$, to be elements of an embedded submanifold of $\SymMat{p_k}$ (see: \cref{sec:kron-manifolds,sec:matrix-manifolds}). The mode wise reduction matrices $\mat{\beta}_k$ are elements of an embedded submanifold of $\mathbb{R}^{p_k\times q_k}$. Common choices are listed in \cref{sec:matrix-manifolds}. -To solve the optimization problem \eqref{eq:mle}, given a data set $(\ten{X}_i, y_i)$, for $i = 1, \ldots, n$, we use a variation of gradient descent. +To solve the optimization problem \eqref{eq:mle}, given a data set $(\ten{X}_i, y_i)$, $i = 1, \ldots, n$, we use a variation of gradient descent. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsubsection{Initial Values} -The first step is to get reasonable starting values. Experiments showed that a good starting value of $\mat{\beta}_k$, for $k = 1, \ldots, r$, it to use the tensor normal estimates from \cref{sec:tensor-normal-estimation}, considering $\ten{X}_i$ as continuous. For initial values of $\mat{\Omega}_k$, a different approach is required. Setting everything to the uninformed initial value, that is $\mat{\Omega}_k = \mat{0}$ as this corresponds to the conditional log odds to be $1:1$ for every component and pairwaide interaction. This is not possible, since $\mat{0}$ is a stationary point of the log-likelihood. This is directly observed by taking a look at the partial gradients of the log-likelihood in \cref{thm:grad}. Instead, we use a crude heuristic which threads every mode seperately and ignores any relation to the covariates. It is computationaly cheap and better than any of the alternatives we considered. For every $k = 1, \ldots, r$, let the $k$'th mode second moment estimate be +The first step is to get reasonable starting values. Experiments showed that a good starting value of $\mat{\beta}_k$ is to use the tensor normal estimates from \cref{sec:tensor-normal-estimation} for $k = 1, \ldots, r$, considering $\ten{X}_i$ as continuous. For initial values of $\mat{\Omega}_k$, a different approach is required. Setting everything to the uninformed initial value, that is $\mat{\Omega}_k = \mat{0}$ as this corresponds to the conditional log odds to be $1:1$ for every component and pairwise interaction. This is not possible, since $\mat{0}$ is a stationary point of the log-likelihood. This is directly observed by considering the partial gradients of the log-likelihood in \cref{thm:grad}. Instead, we use a crude heuristic which threads every mode seperately and ignores any relation to the covariates. It is computationaly cheap and better than any of the alternatives we considered. For every $k = 1, \ldots, r$, let the $k$th mode second moment estimate be \begin{equation}\label{eq:ising-mode-moments} \hat{\mat{M}}_{2(k)} = \frac{p_k}{n p}\sum_{i = 1}^n (\ten{X}_i)_{(k)}\t{(\ten{X}_i)_{(k)}} \end{equation} -which contains the $k$'th mode first moment estimate in its diagonal $\hat{\mat{M}}_{1(k)} = \diag\hat{\mat{M}}_{2(k)}$. Considering every column of the matricized observation $(\ten{X}_i)_{(k)}$ as a $p_k$ dimensional observation itself. The number of those artificially generated observations is $n \prod_{j\neq k}p_j$. Let $Z_k$ denote the random variable those artificial observations are realization of. Then, we can interpret the elements $(\hat{\mat{M}}_{1(k)})_{j}$ as the estimates of the probability $P((Z_k)_j = 1)$, that is the marginal probability of the $j$th element of $Z_k$ being $1$. Similar, for $l \neq j$ we have $(\hat{\mat{M}}_{2(k)})_{j l}$ estimating $P((Z_k)_j = 1, (Z_k)_l = 1)$, the marginal probability of two-way interactions. % Without any regard of accuracy ... -Now, we set the diagonal elements of $\mat{\Omega}_k$ to zero. For the off diagonal elements of $\mat{\Omega}_k$, we equate the conditional probabilities $P((Z_k)_j = 1 \mid (Z_k)_{-j} = \mat{0})$ and $P((Z_k)_j = 1, (Z_k)_l = 1\mid (Z_k)_{-j, -l} = \mat{0})$ with the marginal probability estimates $(\hat{\mat{M}}_{1(k)})_{j}$ and $(\hat{\mat{M}}_{2(k)})_{j l}$, respectively. Use \eqref{eq:ising-two-way-log-odds} then gives the initial estimates $\hat{\mat{\Omega}}_k^{(0)}$, with $j \neq l$ component wise +which contains the $k$th mode first moment estimate in its diagonal $\hat{\mat{M}}_{1(k)} = \diag\hat{\mat{M}}_{2(k)}$. Considering every column of the matricized observation $(\ten{X}_i)_{(k)}$ as a $p_k$ dimensional observation. The number of those artificially generated observations is $n \prod_{j\neq k}p_j$. Let $Z_k$ denote the random variable those artificial observations are realization of. Then, we can interpret the elements $(\hat{\mat{M}}_{1(k)})_{j}$ as the estimates of the probability $P((Z_k)_j = 1)$, that is the marginal probability of the $j$th element of $Z_k$ being $1$. Similar, for $l \neq j$ we have $(\hat{\mat{M}}_{2(k)})_{j l}$ estimating $P((Z_k)_j = 1, (Z_k)_l = 1)$, the marginal probability of two-way interactions. % Without any regard of accuracy ... +Now, we set the diagonal elements of $\mat{\Omega}_k$ to zero. For the off diagonal elements of $\mat{\Omega}_k$, we equate the conditional probabilities $P((Z_k)_j = 1 \mid (Z_k)_{-j} = \mat{0})$ and $P((Z_k)_j = 1, (Z_k)_l = 1\mid (Z_k)_{-j, -l} = \mat{0})$ with the marginal probability estimates $(\hat{\mat{M}}_{1(k)})_{j}$ and $(\hat{\mat{M}}_{2(k)})_{j l}$, respectively. Applying \eqref{eq:ising-two-way-log-odds} then gives the initial component-wise estimates $\hat{\mat{\Omega}}_k^{(0)}$, \begin{equation}\label{eq:ising-init-Omegas} (\hat{\mat{\Omega}}_k^{(0)})_{j j} = 0, \qquad - (\hat{\mat{\Omega}}_k^{(0)})_{j l} = \log\frac{1 - (\hat{\mat{M}}_{1(k)})_{j}(\hat{\mat{M}}_{1(k)})_{l}}{(\hat{\mat{M}}_{1(k)})_{j}(\hat{\mat{M}}_{1(k)})_{l}}\frac{(\hat{\mat{M}}_{2(k)})_{j l}}{1 - (\hat{\mat{M}}_{2(k)})_{j l}}. + (\hat{\mat{\Omega}}_k^{(0)})_{j l} = \log\frac{1 - (\hat{\mat{M}}_{1(k)})_{j}(\hat{\mat{M}}_{1(k)})_{l}}{(\hat{\mat{M}}_{1(k)})_{j}(\hat{\mat{M}}_{1(k)})_{l}}\frac{(\hat{\mat{M}}_{2(k)})_{j l}}{1 - (\hat{\mat{M}}_{2(k)})_{j l}}, \, j \neq l. \end{equation} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsubsection{Gradient Optimization} -Given initial values, the gradients provided by \cref{thm:grad} can be evaluated for the Ising model. The first step therefore is to determine the values of the inverse link components $\ten{g}_1(\mat{\gamma}_y) = \E[\ten{X} \mid Y = y]$ and $\ten{G}_2(\mat{\gamma}_y) = \ten{g}_2(\mat{\gamma}_y) = \E[\ten{X}\circ\ten{X} \mid Y = y]$. An immediate simplification is that the first moment is a part of the second moment. Its values are determined via $\vec(\E[\ten{X} \mid Y = y]) = \diag(\E[\ten{X}\circ\ten{X} \mid Y = y]_{(1, \ldots, r)})$. This means only the second moment needs to be computed, or estimated (see: \cref{sec:ising-bigger-dim}) in the case of slightly bigger $p$. For the Ising model, the conditional second moment with parameters $\mat{\gamma}_y$ is given by the matricized relation +Given initial values, the gradients derived in \cref{thm:grad} can be evaluated for the Ising model. The first step therefore is to determine the values of the inverse link components $\ten{g}_1(\mat{\gamma}_y) = \E[\ten{X} \mid Y = y]$ and $\ten{G}_2(\mat{\gamma}_y) = \ten{g}_2(\mat{\gamma}_y) = \E[\ten{X}\circ\ten{X} \mid Y = y]$. An immediate simplification is that the first moment is a part of the second moment. Its values are determined via $\vec(\E[\ten{X} \mid Y = y]) = \diag(\E[\ten{X}\circ\ten{X} \mid Y = y]_{(1, \ldots, r)})$. This means only the second moment needs to be computed, or estimated (see: \cref{sec:ising-bigger-dim}) in the case of slightly bigger $p$. For the Ising model, the conditional second moment with parameters $\mat{\gamma}_y$ is given by the matricized relation \begin{equation}\label{eq:ising-m2} - \ten{g}_2(\ten{\gamma}_y)_{(1, \ldots, r)} = \E[(\vec{\ten{X}})\t{(\vec{\ten{X}})}\mid Y = y] = p_0(\mat{\gamma}_y)\sum_{\mat{x}\in\{0, 1\}^{p}}\mat{x}\t{\mat{x}}\exp(\t{\vech(\mat{x}\t{\mat{x}})}\mat{\gamma}_y). + \ten{g}_2(\ten{\gamma}_y)_{(1, \ldots, r)} = \E\left[(\vec{\ten{X}})\t{(\vec{\ten{X}})}\mid Y = y\right] = p_0(\mat{\gamma}_y)\sum_{\mat{x}\in\{0, 1\}^{p}}\mat{x}\t{\mat{x}}\exp(\t{\vech(\mat{x}\t{\mat{x}})}\mat{\gamma}_y). \end{equation} -The natural parameter $\mat{\gamma}_y$ is evaluated via \eqref{eq:ising-natural-params} enabling us to compute the partial gradients of the log-likelihood $l_n$ \eqref{eq:log-likelihood} for the Ising model by \cref{thm:grad} for the GMLM parameters $\mat{\beta}_k$ and $\mat{\Omega}_k$, $k = 1, \ldots, r$, at the current iterate $\mat{\theta}^{(I)} = (\mat{\beta}_1^{(I)}, \ldots, \mat{\beta}_r^{(I)}, \mat{\Omega}_1^{(I)}, \ldots, \mat{\Omega}_r^{(I)})$. Using classic gradient ascent for maximizing the log-likelihood, we have to specify a learning rate $\lambda\in\mathbb{R}_{+}$, usualy something like $10^{-3}$. The update rule is +The natural parameter $\mat{\gamma}_y$ is evaluated via \eqref{eq:ising-natural-params} enabling us to compute the partial gradients of the log-likelihood $l_n$ \eqref{eq:log-likelihood} for the Ising model by \cref{thm:grad} for the GMLM parameters $\mat{\beta}_k$ and $\mat{\Omega}_k$, $k = 1, \ldots, r$, at the current iterate $\mat{\theta}^{(I)} = (\mat{\beta}_1^{(I)}, \ldots, \mat{\beta}_r^{(I)}, \mat{\Omega}_1^{(I)}, \ldots, \mat{\Omega}_r^{(I)})$. Using classic gradient ascent for maximizing the log-likelihood, we have to specify a learning rate $\lambda\in\mathbb{R}_{+}$, usually a value close to $10^{-3}$. The update rule is \begin{displaymath} \mat{\theta}^{(I + 1)} = \mat{\theta}^{(I)} + \lambda\nabla_{\mat{\theta}} l_n(\mat{\theta})\bigr|_{\mat{\theta} = \mat{\theta}^{(I)}}, \end{displaymath} -which is iterated till convergence. In practice, iteration is performed until ether a maximum number of iterations is exhausted and/or some break condition is satisfied. A proper choice of the learning rate is needed as a too large learning rate $\lambda$ causes instabilities, while a too low learning rate requires an enormous amount of iterations. Generically, there are two approach against the need to determine a proper learning rate. First, \emph{line search methods} determin an appropriate step size for every iteration. This works great if the evaluation of the object function (the log-likelihood) is cheap. This is not the case in our setting, see \cref{sec:ising-bigger-dim}. The second approach is an \emph{adaptive learning rate}. The basic idea is to track specific statistics while optimizing and dynamically adapt the leaning rate via well tested heuristics using the gathered knowledge from past iterations. We opted to use an adaptive leaning rate approach, this not only removes the need to determine an appropriate leaning rate, but also accelerates learning. +which is iterated till convergence. In practice, iteration is performed until either a maximum number of iterations is exhausted and/or some break condition is satisfied. A proper choice of the learning rate is needed as a large learning rate $\lambda$ may cause instability, while a very low learning rate requires an enormous amount of iterations. Generically, there are two approaches to avoid the need to determine a proper learning rate. First, \emph{line search methods} determine an appropriate step size for every iteration. This works well if the evaluation of the object function (the log-likelihood) is cheap. This is not the case in our setting, see \cref{sec:ising-bigger-dim}. The second approach is an \emph{adaptive learning rate}, where one tracks specific statistics while optimizing and dynamically adapting the learning rate via well-tested heuristics using the gathered knowledge from past iterations. We opted to use an adaptive learning rate approach, which not only removes the need to determine an appropriate learning rate, but also accelerates learning. -Our method of choice is RMSprop, which stands for \emph{root mean squared propagation} \textcite{Hinton2012}. This is a well known method in machine learning for training neural networks. It is a variation of gradient descent with a per scalar parameter adaptive learning rate. It tracks a moving average of the element wise squared gradient $\mat{g}_2^{(I)}$, which is then used to scale (element wise) the gradient in the update rule. See \textcite{Hinton2012,GoodfellowEtAl2016} among others. The update rule using RMSprop for maximization\footnote{Instead of the more common minimization, therefore $+$ in the update of $\mat{\theta}$.} is +Our method of choice is \emph{root mean squared propagation} (RMSprop) \parencite{Hinton2012}. This is a well known method in machine learning for training neural networks. It is a variation of gradient descent with a per scalar parameter adaptive learning rate. It tracks a moving average of the element wise squared gradient $\mat{g}_2^{(I)}$, which is then used to scale (element wise) the gradient in the update rule. See \textcite{Hinton2012,GoodfellowEtAl2016} among others. The update rule using RMSprop for maximization\footnote{Instead of the more common minimization, therefore $+$ in the update of $\mat{\theta}$.} is \begin{align*} \mat{g}_2^{(I + 1)} &= \nu \mat{g}_2^{(I)} + (1 - \nu)\nabla l_n(\mat{\theta}^{(I)})\odot\nabla l_n(\mat{\theta}^{(I)}), \\ \mat{\theta}^{(I + 1)} &= \mat{\theta}^{(I)} + \frac{\lambda}{\sqrt{\mat{g}_2^{(I + 1)}} + \epsilon}\odot\nabla l_n(\mat{\theta}^{(I)}). \end{align*} -The parameters $\nu = 0.9$, $\lambda = 10^{-3}$ and $\epsilon\approx 1.49\cdot 10^{-8}$ are fixed. The initial value of $\mat{g}_2^{(0)} = \mat{0}$, the symbol $\odot$ denotes the Hadamard product, that is the element wise multiplication. The division and square root operation are performed element wise as well. According to our experiments, RMSprop requires in the range of $50$ till $1000$ iterations till convergence while gradient ascent with a learning rate of $10^{-3}$ is in the range of $1000$ till $10000$. +The parameters $\nu = 0.9$, $\lambda = 10^{-3}$ and $\epsilon\approx 1.49\cdot 10^{-8}$ are fixed. The initial value of $\mat{g}_2^{(0)} = \mat{0}$, the symbol $\odot$ denotes the Hadamard product, or element wise multiplication. The division and square root operation are performed element wise as well. According to our experiments, RMSprop requires iterations in the range of $50$ till $1000$ till convergence while gradient ascent with a learning rate of $10^{-3}$ is in the range of $1000$ till $10000$. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsubsection{Small Data Sets}\label{sec:ising-small-data-sets} @@ -1135,85 +1145,20 @@ Next, going over the PSQTs one by one, a few words about the preferred positions The results of our analysis in the previous paragraph agree with the configuration of the chess board most associated with observed chess game outcomes. This arrangement also aligns with the understanding of human chess players of an average configuration at any moment during the game. \section{Discussion} +We have addressed sufficient dimension reduction for tensor valued predictors for regression or classification problems. Proposing a generalized multilinear model modeling the inverse conditional distribution we provided a multilinear sufficient reduction with consistent and asymptotic normal parameters. Moreover, our ansatz for proving the asymptotic results required by leveraging manifolds as a basis for resolving the issue of unidentifiable parameters lead to an even more flexible modeling framework. This allows to build complex and potentially problem specific parameter spaces incorporating additional domain specific knownledge into the model. +An additional powerful extension of our model involves considering a sum of separable Kronecker predictors. This is motivated by the equivalence of a Kronecker product to a rank 1 tensor. By allowing a sum of a few separable Kronecker predictors, we remove the implicit rank 1 constraint. However, if this extension is to be applied to the SDR setting, as in this paper, it is crucial to ensure that the sum of Kronecker products forms a parameter manifold to apply our theory. While we anticipate that this approach can lead to intriguing and powerful models, there are certain details that need to be resolved first. + +\todo{finish!} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\printbibliography[heading=bibintoc, title={References}] +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \appendix %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Examples}\label{app:examples} - -\begin{example}[Vectorization]\label{ex:vectorization} - Given a matrix - \begin{displaymath} - \mat{A} = \begin{pmatrix} - 1 & 4 & 7 \\ - 2 & 5 & 8 \\ - 3 & 6 & 9 - \end{pmatrix} - \end{displaymath} - its vectorization is $\vec{\mat{A}} = \t{(1, 2, 3, 4, 5, 6, 7, 8, 9)}$ and its half vectorization $\vech{\mat{A}} = \t{(1, 2, 3, 5, 6, 9)}$. Let a $\ten{A}$ be a tensor of dimension $3\times 3\times 3$ given by - \begin{displaymath} - \ten{A}_{:,:,1} = \begin{pmatrix} - 1 & 4 & 7 \\ - 2 & 5 & 8 \\ - 3 & 6 & 9 - \end{pmatrix}, - \qquad - \ten{A}_{:,:,2} = \begin{pmatrix} - 10 & 13 & 16 \\ - 11 & 14 & 17 \\ - 12 & 15 & 18 - \end{pmatrix}, - \qquad - \ten{A}_{:,:,3} = \begin{pmatrix} - 19 & 22 & 25 \\ - 20 & 23 & 26 \\ - 21 & 24 & 27 - \end{pmatrix}. - \end{displaymath} - Then the vectorization of $\ten{A}$ is given by - \begin{displaymath} - \vec{\ten{A}} = \t{(1, 2, 3, 4, ..., 26, 27)}\in\mathbb{R}^{27}. - \end{displaymath} -\end{example} - -\begin{example}[Matricization] - Let $\ten{A}$ be the $3\times 4\times 2$ tensor given by - \begin{displaymath} - \ten{A}_{:,:,1} = \begin{pmatrix} - 1 & 4 & 7 & 10 \\ - 2 & 5 & 8 & 11 \\ - 3 & 6 & 9 & 12 - \end{pmatrix}, - \ten{A}_{:,:,2} = \begin{pmatrix} - 13 & 16 & 19 & 22 \\ - 14 & 17 & 20 & 23 \\ - 15 & 18 & 21 & 24 - \end{pmatrix}. - \end{displaymath} - Its matricizations are then - \begin{gather*} - \ten{A}_{(1)} = \begin{pmatrix} - 1 & 4 & 7 & 10 & 13 & 16 & 19 & 22 \\ - 2 & 5 & 8 & 11 & 14 & 17 & 20 & 23 \\ - 3 & 6 & 9 & 12 & 15 & 18 & 21 & 24 - \end{pmatrix}, - \qquad - \ten{A}_{(2)} = \begin{pmatrix} - 1 & 2 & 3 & 13 & 14 & 15 \\ - 4 & 5 & 6 & 16 & 17 & 18 \\ - 7 & 8 & 9 & 19 & 20 & 21 \\ - 10 & 11 & 12 & 22 & 23 & 24 - \end{pmatrix}, \\ - \ten{A}_{(3)} = \begin{pmatrix} - 1 & 2 & 3 & 4 & 5 & 6 & 7 & 8 & 9 & 10 & 11 & 12 \\ - 13 & 14 & 15 & 16 & 17 & 18 & 19 & 20 & 21 & 22 & 23 & 24 - \end{pmatrix}. - \end{gather*} -\end{example} - - %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Tensor Calculus and Multi Linear Algebra} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -1776,9 +1721,77 @@ The following is a technical Lemma used in the proof of \cref{thm:asymptotic-nor \end{proof} +\section{Examples}\label{app:examples} + +\begin{example}[Vectorization]\label{ex:vectorization} + Given a matrix + \begin{displaymath} + \mat{A} = \begin{pmatrix} + 1 & 4 & 7 \\ + 2 & 5 & 8 \\ + 3 & 6 & 9 + \end{pmatrix} + \end{displaymath} + its vectorization is $\vec{\mat{A}} = \t{(1, 2, 3, 4, 5, 6, 7, 8, 9)}$ and its half vectorization $\vech{\mat{A}} = \t{(1, 2, 3, 5, 6, 9)}$. Let a $\ten{A}$ be a tensor of dimension $3\times 3\times 3$ given by + \begin{displaymath} + \ten{A}_{:,:,1} = \begin{pmatrix} + 1 & 4 & 7 \\ + 2 & 5 & 8 \\ + 3 & 6 & 9 + \end{pmatrix}, + \qquad + \ten{A}_{:,:,2} = \begin{pmatrix} + 10 & 13 & 16 \\ + 11 & 14 & 17 \\ + 12 & 15 & 18 + \end{pmatrix}, + \qquad + \ten{A}_{:,:,3} = \begin{pmatrix} + 19 & 22 & 25 \\ + 20 & 23 & 26 \\ + 21 & 24 & 27 + \end{pmatrix}. + \end{displaymath} + Then the vectorization of $\ten{A}$ is given by + \begin{displaymath} + \vec{\ten{A}} = \t{(1, 2, 3, 4, ..., 26, 27)}\in\mathbb{R}^{27}. + \end{displaymath} +\end{example} + +\begin{example}[Matricization] + Let $\ten{A}$ be the $3\times 4\times 2$ tensor given by + \begin{displaymath} + \ten{A}_{:,:,1} = \begin{pmatrix} + 1 & 4 & 7 & 10 \\ + 2 & 5 & 8 & 11 \\ + 3 & 6 & 9 & 12 + \end{pmatrix}, + \ten{A}_{:,:,2} = \begin{pmatrix} + 13 & 16 & 19 & 22 \\ + 14 & 17 & 20 & 23 \\ + 15 & 18 & 21 & 24 + \end{pmatrix}. + \end{displaymath} + Its matricizations are then + \begin{gather*} + \ten{A}_{(1)} = \begin{pmatrix} + 1 & 4 & 7 & 10 & 13 & 16 & 19 & 22 \\ + 2 & 5 & 8 & 11 & 14 & 17 & 20 & 23 \\ + 3 & 6 & 9 & 12 & 15 & 18 & 21 & 24 + \end{pmatrix}, + \qquad + \ten{A}_{(2)} = \begin{pmatrix} + 1 & 2 & 3 & 13 & 14 & 15 \\ + 4 & 5 & 6 & 16 & 17 & 18 \\ + 7 & 8 & 9 & 19 & 20 & 21 \\ + 10 & 11 & 12 & 22 & 23 & 24 + \end{pmatrix}, \\ + \ten{A}_{(3)} = \begin{pmatrix} + 1 & 2 & 3 & 4 & 5 & 6 & 7 & 8 & 9 & 10 & 11 & 12 \\ + 13 & 14 & 15 & 16 & 17 & 18 & 19 & 20 & 21 & 22 & 23 & 24 + \end{pmatrix}. + \end{gather*} +\end{example} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\printbibliography[heading=bibintoc, title={References}] -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \end{document}