fix: added second argument to mcrossprod,
add: kpir.mle, wip: LaTeX
This commit is contained in:
		
							parent
							
								
									6455d27386
								
							
						
					
					
						commit
						1838da8b3d
					
				| @ -293,13 +293,13 @@ with $\pi$ being a permutation of $p q k r$ elements corresponding to permuting | |||||||
| \section{Kronecker Covariance Structure} | \section{Kronecker Covariance Structure} | ||||||
| As before we let the sample model for tensor valued opbservations and responses or oder $r$ be | As before we let the sample model for tensor valued opbservations and responses or oder $r$ be | ||||||
| \begin{displaymath} | \begin{displaymath} | ||||||
|     \ten{X} = \ten{\mu} + \ten{F}\times_{j\in[r]}\alpha_j + \ten{\epsilon} |     \ten{X} = \ten{\mu} + \ten{F}\times_{j\in[r]}\mat{\alpha}_j + \ten{\epsilon} | ||||||
| \end{displaymath} | \end{displaymath} | ||||||
| but the error tensor $\ten{\epsilon}\sim\mathcal{TN}(0, \mat{\Delta}_1, ..., \mat{\Delta}_r)$ is Tensor Normal distributed with mean zero and covariance matrices $\mat{\Delta}_1, ..., \mat{\Delta}_r$. | but the error tensor $\ten{\epsilon}\sim\mathcal{TN}(0, \mat{\Delta}_1, ..., \mat{\Delta}_r)$ is Tensor Normal distributed with mean zero and covariance matrices $\mat{\Delta}_1, ..., \mat{\Delta}_r$. | ||||||
| 
 | 
 | ||||||
| The sample model for $n$ observations has the same form with an additional sample axis in the last mode of $\ten{X}$ and $\ten{Y}$ with dimensions $p_1\times ...\times p_r\times n$ and $q_1\times ...\times q_r\times n$, respectively. Let the residual tensor be | The sample model for $n$ observations has the same form with an additional sample axis in the last mode of $\ten{X}$ and $\ten{Y}$ with dimensions $p_1\times ...\times p_r\times n$ and $q_1\times ...\times q_r\times n$, respectively. Let the residual tensor be | ||||||
| \begin{displaymath} | \begin{displaymath} | ||||||
|     \ten{R} = \ten{X} - \ten{\mu} + \ten{F}\times_{j\in[r]}\alpha_j. |     \ten{R} = \ten{X} - \ten{\mu} - \ten{F}\times_{j\in[r]}\mat{\alpha}_j. | ||||||
| \end{displaymath} | \end{displaymath} | ||||||
| 
 | 
 | ||||||
| By the definition of the Tensor Normal, using the notation $p_{\lnot j} = \prod_{k\neq j}p_j$, we get for observations $\ten{X}, \ten{F}$ the log-likelihood in terms of the residuals as | By the definition of the Tensor Normal, using the notation $p_{\lnot j} = \prod_{k\neq j}p_j$, we get for observations $\ten{X}, \ten{F}$ the log-likelihood in terms of the residuals as | ||||||
| @ -315,7 +315,7 @@ For deriving the MLE estimates we compute the differential of the log-likelihood | |||||||
| \begin{displaymath} | \begin{displaymath} | ||||||
|     \d l = |     \d l = | ||||||
|         -\sum_{j = 1}^r \frac{n p_{\lnot j}}{2}\d\log|{\mat{\Delta}}_j| |         -\sum_{j = 1}^r \frac{n p_{\lnot j}}{2}\d\log|{\mat{\Delta}}_j| | ||||||
|         -\frac{1}{2}\sum_{j = 1}^r\langle {\ten{R}}\times_{k\in[r]\backslash j}{\mat{\Delta}}_k^{-1}\times_j\d{\mat{\Delta}}^{-1}_j \rangle |         -\frac{1}{2}\sum_{j = 1}^r\langle {\ten{R}}\times_{k\in[r]\backslash j}{\mat{\Delta}}_k^{-1}\times_j\d{\mat{\Delta}}^{-1}_j, \ten{R} \rangle | ||||||
|         -\langle {\ten{R}}\times_{j\in[r]}{\mat{\Delta}}_j^{-1}, \d{\ten{R}} \rangle. |         -\langle {\ten{R}}\times_{j\in[r]}{\mat{\Delta}}_j^{-1}, \d{\ten{R}} \rangle. | ||||||
| \end{displaymath} | \end{displaymath} | ||||||
| Using $\d\log|\mat{A}| = \tr(\mat{A}^{-1}\d\mat{A})$ and $\d\mat{A}^{-1} = -\mat{A}^{-1}(\d\mat{A})\mat{A}^{-1}$ as well as $\langle\ten{A}, \ten{B}\rangle = \tr(\ten{A}_{(j)}\t{\ten{B}_{(j)}})$ for any $j = 1, ..., r$ we get the differential of the estimated log-likelihood as | Using $\d\log|\mat{A}| = \tr(\mat{A}^{-1}\d\mat{A})$ and $\d\mat{A}^{-1} = -\mat{A}^{-1}(\d\mat{A})\mat{A}^{-1}$ as well as $\langle\ten{A}, \ten{B}\rangle = \tr(\ten{A}_{(j)}\t{\ten{B}_{(j)}})$ for any $j = 1, ..., r$ we get the differential of the estimated log-likelihood as | ||||||
| @ -349,14 +349,43 @@ as well as gradients (even though they are not realy used, except in the case of | |||||||
| We continue by substitution of the covariance estimates and get | We continue by substitution of the covariance estimates and get | ||||||
| \begin{align*} | \begin{align*} | ||||||
|     \d\hat{l} &= -\langle \ten{R}\times_{j\in[r]}{\widehat{\mat{\Delta}}}_j^{-1}, \d\ten{R} \rangle \\ |     \d\hat{l} &= -\langle \ten{R}\times_{j\in[r]}{\widehat{\mat{\Delta}}}_j^{-1}, \d\ten{R} \rangle \\ | ||||||
|         &= \sum_{j = 1}^r \langle \ten{R}\times_{j\in[r]}{\widehat{\mat{\Delta}}}_j^{-1}, \ten{F}\times_{k\in[r]\backslash j}\widehat{\mat{\alpha}}_k\times_j\d\widehat{\mat{\alpha}}_j \rangle \\ |         &= \sum_{j = 1}^r \langle \ten{R}\times_{k\in[r]}{\widehat{\mat{\Delta}}}_k^{-1}, \ten{F}\times_{k\in[r]\backslash j}\widehat{\mat{\alpha}}_k\times_j\d\widehat{\mat{\alpha}}_j \rangle \\ | ||||||
|         &= \sum_{j = 1}^r \tr\big( (\ten{F}\times_{k\in[r]\backslash j}\widehat{\mat{\alpha}}_k)_{(j)}\t{(\ten{R}\times_{j\in[r]}{\widehat{\mat{\Delta}}}_j^{-1})_{(j)}}\d\widehat{\mat{\alpha}}_j \big). |         &= \sum_{j = 1}^r \tr\big( (\ten{F}\times_{k\in[r]\backslash j}\widehat{\mat{\alpha}}_k)_{(j)}\t{(\ten{R}\times_{k\in[r]}{\widehat{\mat{\Delta}}}_k^{-1})_{(j)}}\d\widehat{\mat{\alpha}}_j \big). | ||||||
| \end{align*} | \end{align*} | ||||||
| Through that the gradient for all the parameter matrices is | Through that the gradient for all the parameter matrices is | ||||||
| \begin{align*} | \begin{align*} | ||||||
|     \nabla_{\widehat{\mat{\alpha}}_j}\hat{l} &= (\ten{R}\times_{j\in[r]}{\widehat{\mat{\Delta}}}_j^{-1})_{(j)}\t{(\ten{F}\times_{k\in[r]\backslash j}\widehat{\mat{\alpha}}_k)_{(j)}} |     \nabla_{\widehat{\mat{\alpha}}_j}\hat{l} &= (\ten{R}\times_{k\in[r]}{\widehat{\mat{\Delta}}}_k^{-1})_{(j)}\t{(\ten{F}\times_{k\in[r]\backslash j}\widehat{\mat{\alpha}}_k)_{(j)}} | ||||||
|         \qquad{\color{gray}p_j\times q_j} && j = 1, ..., r. |         \qquad{\color{gray}p_j\times q_j} && j = 1, ..., r. | ||||||
| \end{align*} | \end{align*} | ||||||
|  | By equating the gradients to zero and expanding $\ten{R}$ we get a system of normal equations | ||||||
|  | \begin{gather*} | ||||||
|  |     0\overset{!}{=} \nabla_{\widehat{\mat{\alpha}}_j}\hat{l} | ||||||
|  |     = ((\ten{X}-\widehat{\ten{\mu}}-\ten{F}\times_{l\in[r]}\widehat{\mat{\alpha}}_l)\times_{k\in[r]}{\widehat{\mat{\Delta}}}_k^{-1})_{(j)}\t{(\ten{F}\times_{k\in[r]\backslash j}\widehat{\mat{\alpha}}_k)_{(j)}} \\ | ||||||
|  |     % | ||||||
|  |     ((\ten{X}-\widehat{\ten{\mu}})\times_{k\in[r]}{\widehat{\mat{\Delta}}}_k^{-1})_{(j)}\t{(\ten{F}\times_{k\in[r]\backslash j}\widehat{\mat{\alpha}}_k)_{(j)}} = (\ten{F}\times_{k\in[r]}\widehat{\mat{\Delta}}_k^{-1}\widehat{\mat{\alpha}}_k)_{(j)}\t{(\ten{F}\times_{k\in[r]\backslash j}\widehat{\mat{\alpha}}_k)_{(j)}} | ||||||
|  | \end{gather*} | ||||||
|  | with cross dependent solutions for the MLE estimates $\widehat{\mat{\alpha}}$ given by | ||||||
|  | \begin{displaymath} | ||||||
|  |     \widehat{\mat{\alpha}}_j = \widehat{\mat{\Delta}}_j((\ten{X}-\widehat{\ten{\mu}})\times_{k\in[r]}{\widehat{\mat{\Delta}}}_k^{-1})_{(j)}\t{(\ten{F}\times_{k\in[r]\backslash j}\widehat{\mat{\alpha}}_k)_{(j)}}[(\ten{F}\times_{k\in[r]\backslash j}\widehat{\mat{\Delta}}_k^{-1}\widehat{\mat{\alpha}}_k)_{(j)}\t{(\ten{F}\times_{k\in[r]\backslash j}\widehat{\mat{\alpha}}_k)_{(j)}}]^{-1}. | ||||||
|  | \end{displaymath} | ||||||
|  | for $j = 1, ..., r$. | ||||||
|  | 
 | ||||||
|  | \begin{example}[Simple multivariate case ($r = 1$)] | ||||||
|  |     Lets consider the case of $r = 1$, this is the usual multivariate regression case for vector valued predictors. Here $\mat{F}$ denotes the $n\times q$ centered model matrix and the responses are collected in the $n\times p$ matrix $\mat{X}$ with the $n$ observations in the rows and the variables by columns. Furthermore, let the residual estimate matrix $\mat{R} = \mat{X} - \mat{F}\t{\widehat{\mat{\alpha}}_1}$. In this case the tensor MLE estimates simplify to there well known form | ||||||
|  |     \begin{align*} | ||||||
|  |         \widehat{\mat{\alpha}}_1 | ||||||
|  |             &= \widehat{\mat{\Delta}}_1((\ten{X}-\widehat{\ten{\mu}})\times_{k\in[1]}{\widehat{\mat{\Delta}}}_k^{-1})_{(1)}\t{(\ten{F}\times_{k\in\emptyset}\widehat{\mat{\alpha}}_k)_{(j)}}[(\ten{F}\times_{k\in\emptyset}\widehat{\mat{\Delta}}_k^{-1}\widehat{\mat{\alpha}}_k)_{(j)}\t{(\ten{F}\times_{k\in\emptyset}\widehat{\mat{\alpha}}_k)_{(j)}}]^{-1} \\ | ||||||
|  |             &= \widehat{\mat{\Delta}}_1\widehat{\mat{\Delta}}_1^{-1}((\ten{X}-\widehat{\ten{\mu}}))_{(1)}\t{\ten{F}_{(1)}}[\ten{F}_{(1)}\t{\ten{F}_{(1)}}]^{-1} \\ | ||||||
|  |             &= \t{\mat{X}}\mat{F}(\t{\mat{F}}\mat{F})^{-1} | ||||||
|  |     \end{align*} | ||||||
|  |     as well as the covariance estimate | ||||||
|  |     \begin{align*} | ||||||
|  |         \widehat{\mat{\Delta}}_1 | ||||||
|  |             &= \frac{1}{n}(\ten{R}\times_{k\in\emptyset}{\widehat{\mat{\Delta}}}_k^{-1})_{(1)}\t{\ten{R}_{(1)}} = \frac{1}{n}\t{\mat{R}}\mat{R} | ||||||
|  |                 = \frac{1}{n}\t{(\mat{X} - \mat{F}\t{\widehat{\mat{\alpha}}_1})}(\mat{X} - \mat{F}\t{\widehat{\mat{\alpha}}_1}). | ||||||
|  |     \end{align*} | ||||||
|  |     Note that $\ten{F}_{(1)} = \t{\mat{F}}$, $\ten{X}_{(1)} = \t{\mat{X}}$ and $\ten{R}_{(1)} = \t{\mat{R}}$. | ||||||
|  | \end{example} | ||||||
| 
 | 
 | ||||||
| \paragraph{Comparison to the general case:} {\color{red} There are two main differences, first the general case has a closed form solution for the gradient due to the explicit nature of the MLE estimate of $\widehat{\mat\Delta}$ compared to the mutually dependent MLE estimates $\widehat{\mat\Delta}_1$, $\widehat{\mat\Delta}_2$. On the other hand the general case has dramatically bigger dimensions of the covariance matrix ($p q \times p q$) compared to the two Kronecker components with dimensions $q \times q$ and $p \times p$. This means that in the general case there is a huge performance penalty in the dimensions of $\widehat{\mat\Delta}$ while in the other case an extra estimation is required to determine $\widehat{\mat\Delta}_1$, $\widehat{\mat\Delta}_2$.} | \paragraph{Comparison to the general case:} {\color{red} There are two main differences, first the general case has a closed form solution for the gradient due to the explicit nature of the MLE estimate of $\widehat{\mat\Delta}$ compared to the mutually dependent MLE estimates $\widehat{\mat\Delta}_1$, $\widehat{\mat\Delta}_2$. On the other hand the general case has dramatically bigger dimensions of the covariance matrix ($p q \times p q$) compared to the two Kronecker components with dimensions $q \times q$ and $p \times p$. This means that in the general case there is a huge performance penalty in the dimensions of $\widehat{\mat\Delta}$ while in the other case an extra estimation is required to determine $\widehat{\mat\Delta}_1$, $\widehat{\mat\Delta}_2$.} | ||||||
| 
 | 
 | ||||||
| @ -508,11 +537,11 @@ and therefore the gradients | |||||||
| 
 | 
 | ||||||
| \section{Thoughts on initial value estimation} | \section{Thoughts on initial value estimation} | ||||||
| \todo{This section uses an alternative notation as it already tries to generalize to general multi-dimensional arrays. Furthermore, one of the main differences is that the observation are indexed in the \emph{last} mode. The benefit of this is that the mode product and parameter matrix indices match not only in the population model but also in sample versions.} | \todo{This section uses an alternative notation as it already tries to generalize to general multi-dimensional arrays. Furthermore, one of the main differences is that the observation are indexed in the \emph{last} mode. The benefit of this is that the mode product and parameter matrix indices match not only in the population model but also in sample versions.} | ||||||
| Let $\ten{X}, \ten{F}$ be order $r$ tensors of dimensions $p_1\times ... \times p_r$ and $q_1\times ... \times q_r$, respectively. Also denote the error tensor $\epsilon$ of the same order and dimensions as $\ten{X}$. The considered model for the $i$'th observation is | Let $\ten{X}_i, \ten{F}_i$ be order $r$ tensors of dimensions $p_1\times ... \times p_r$ and $q_1\times ... \times q_r$, respectively. Also denote the error tensor $\epsilon$ of the same order and dimensions as $\ten{X}_i$. The considered model for the $i$'th observation is | ||||||
| \begin{displaymath} | \begin{displaymath} | ||||||
|     \ten{X}_i = \ten{\mu} + \ten{F}_i\times\{ \mat{\alpha}_1, ..., \mat{\alpha}_r \} + \ten{\epsilon}_i |     \ten{X}_i = \ten{\mu} + \ten{F}_i\times\{ \mat{\alpha}_1, ..., \mat{\alpha}_r \} + \ten{\epsilon}_i | ||||||
| \end{displaymath} | \end{displaymath} | ||||||
| where we assume $\ten{\epsilon}_i$ to be i.i.d. mean zero tensor normal distributed $\ten{\epsilon}\sim\mathcal{TN}(0, \mat{\Delta}_1, ..., \mat{\Delta}_r)$ for $\mat{\Delta}_j\in\mathcal{S}^{p_j}_{++}$, $j = 1, ..., r$. Given $i = 1, ..., n$ observations the collected model containing all observations | where we assume $\ten{\epsilon}_i$ to be i.i.d. mean zero tensor normal distributed $\ten{\epsilon}_i\sim\mathcal{TN}(0, \mat{\Delta}_1, ..., \mat{\Delta}_r)$ for $\mat{\Delta}_j\in\mathcal{S}^{p_j}_{++}$, $j = 1, ..., r$. Given $i = 1, ..., n$ observations the collected model containing all observations | ||||||
| \begin{displaymath} | \begin{displaymath} | ||||||
|     \ten{X} = \ten{\mu} + \ten{F}\times\{ \mat{\alpha}_1, ..., \mat{\alpha}_r, \mat{I}_n \} + \ten{\epsilon} |     \ten{X} = \ten{\mu} + \ten{F}\times\{ \mat{\alpha}_1, ..., \mat{\alpha}_r, \mat{I}_n \} + \ten{\epsilon} | ||||||
| \end{displaymath} | \end{displaymath} | ||||||
|  | |||||||
| @ -18,6 +18,7 @@ export(hopca) | |||||||
| export(kpir.approx) | export(kpir.approx) | ||||||
| export(kpir.base) | export(kpir.base) | ||||||
| export(kpir.ls) | export(kpir.ls) | ||||||
|  | export(kpir.mle) | ||||||
| export(kpir.momentum) | export(kpir.momentum) | ||||||
| export(kpir.new) | export(kpir.new) | ||||||
| export(mat) | export(mat) | ||||||
|  | |||||||
| @ -2,11 +2,11 @@ | |||||||
| #' | #' | ||||||
| #' This is a special case of the "Khatri-Rao Product". | #' This is a special case of the "Khatri-Rao Product". | ||||||
| #' | #' | ||||||
| #' @seealso \link{\code{rowKronecker}} | #' @seealso \code{\link{rowKronecker}} | ||||||
| #' | #' | ||||||
| #' @export | #' @export | ||||||
| colKronecker <- function(A, B) { | colKronecker <- function(A, B) { | ||||||
|     sapply(1:ncol(A), function(i) kronecker(A[, i], B[, i])) |     sapply(seq_len(ncol(A)), function(i) kronecker(A[, i], B[, i])) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| #' Row wise kronecker product | #' Row wise kronecker product | ||||||
| @ -14,7 +14,7 @@ colKronecker <- function(A, B) { | |||||||
| #' Also known as the "Face-splitting product". This is a special case of the | #' Also known as the "Face-splitting product". This is a special case of the | ||||||
| #' "Khatri-Rao Product". | #' "Khatri-Rao Product". | ||||||
| #' | #' | ||||||
| #' @seealso \link{\code{colKronecker}} | #' @seealso \code{\link{colKronecker}} | ||||||
| #' | #' | ||||||
| #' @export | #' @export | ||||||
| rowKronecker <- function(A, B) { | rowKronecker <- function(A, B) { | ||||||
|  | |||||||
| @ -161,7 +161,7 @@ kpir.approx <- function(X, Fy, shape = c(dim(X)[-1], dim(Fy[-1])), | |||||||
|         inner.prod <- sum(grad.alpha^2) + sum(grad.beta^2) |         inner.prod <- sum(grad.alpha^2) + sum(grad.beta^2) | ||||||
| 
 | 
 | ||||||
|         # backtracking loop |         # backtracking loop | ||||||
|         for (delta in step.size * 0.618034^seq.int(0L, len = max.line.iter)) { |         for (delta in step.size * 0.618034^seq.int(0L, length = max.line.iter)) { | ||||||
|             # Update `alpha` and `beta` (note: add(+), the gradients are already |             # Update `alpha` and `beta` (note: add(+), the gradients are already | ||||||
|             # pointing into the negative slope direction of the loss cause they are |             # pointing into the negative slope direction of the loss cause they are | ||||||
|             # the gradients of the log-likelihood [NOT the negative log-likelihood]) |             # the gradients of the log-likelihood [NOT the negative log-likelihood]) | ||||||
|  | |||||||
| @ -14,8 +14,6 @@ kpir.ls <- function(X, Fy, max.iter = 20L, sample.axis = 1L, | |||||||
|             dims[sample.axis] <- length(Fy) |             dims[sample.axis] <- length(Fy) | ||||||
|             dims |             dims | ||||||
|         }) |         }) | ||||||
|     } else { |  | ||||||
|         stopifnot(dim(X)[sample.axis] == dim(Fy)[sample.axis]) |  | ||||||
|     } |     } | ||||||
|     # Check dimensions |     # Check dimensions | ||||||
|     stopifnot(length(dim(X)) == length(dim(Fy))) |     stopifnot(length(dim(X)) == length(dim(Fy))) | ||||||
| @ -60,8 +58,6 @@ kpir.ls <- function(X, Fy, max.iter = 20L, sample.axis = 1L, | |||||||
|     Deltas <- Map(mcrossprod, list(R), mode = modes) |     Deltas <- Map(mcrossprod, list(R), mode = modes) | ||||||
|     Deltas <- Map(`*`, 1 / dim(X)[sample.axis], Deltas) |     Deltas <- Map(`*`, 1 / dim(X)[sample.axis], Deltas) | ||||||
| 
 | 
 | ||||||
|     list( |     list(alphas = structure(alphas, names = as.character(modes)), | ||||||
|         alphas = structure(alphas, names = as.character(modes)), |          Deltas = structure(Deltas, names = as.character(modes))) | ||||||
|         Deltas = structure(Deltas, names = as.character(modes)) |  | ||||||
|     ) |  | ||||||
| } | } | ||||||
|  | |||||||
							
								
								
									
										90
									
								
								tensorPredictors/R/kpir_mle.R
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										90
									
								
								tensorPredictors/R/kpir_mle.R
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,90 @@ | |||||||
|  | #' Per mode (axis) MLE | ||||||
|  | #' | ||||||
|  | #' @export | ||||||
|  | kpir.mle <- function(X, Fy, max.iter = 500L, sample.axis = 1L, | ||||||
|  |     logger = NULL #, eps = .Machine$double.eps | ||||||
|  | ) { | ||||||
|  |     ### Step 0: Setup/Initialization | ||||||
|  |     if (!is.array(Fy)) { | ||||||
|  |         # scalar response case (add new axis of size 1) | ||||||
|  |         dim(Fy) <- ifelse(seq_along(dim(X)) == sample.axis, dim(X)[sample.axis], 1L) | ||||||
|  |     } | ||||||
|  |     # Check dimensions and matching of axis (tensor order) | ||||||
|  |     stopifnot(exprs = { | ||||||
|  |         length(dim(X)) == length(dim(Fy)) | ||||||
|  |         dim(X)[sample.axis] == dim(Fy)[sample.axis] | ||||||
|  |     }) | ||||||
|  |     # warn about model constraints | ||||||
|  |     if (any(dim(Fy)[-sample.axis] >= dim(X)[-sample.axis])) { | ||||||
|  |         warning("Degenerate case 'any(dim(Fy)[-sample.axis] >= dim(X)[-sample.axis])'") | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     # extract dimensions (for easier handling as local variables) | ||||||
|  |     modes <- seq_along(dim(X))[-sample.axis]    # predictor axis indices | ||||||
|  |     n <- dim(X)[sample.axis]                    # observation count (scalar) | ||||||
|  |     p <- dim(X)[-sample.axis]                   # predictor dimensions (vector) | ||||||
|  |     # q <- dim(Fy)[-sample.axis]                  # response dimensions (vector) | ||||||
|  |     # r <- length(dim(X)) - 1L                    # tensor order (scalar) | ||||||
|  | 
 | ||||||
|  |     # Means for X and Fy (a.k.a. sum elements over the sample axis) | ||||||
|  |     meanX <- apply(X, modes, mean, simplify = TRUE) | ||||||
|  |     meanFy <- apply(Fy, modes, mean, simplify = TRUE) | ||||||
|  |     # Center both X and Fy | ||||||
|  |     X <- sweep(X, modes, meanX) | ||||||
|  |     Fy <- sweep(Fy, modes, meanFy) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  |     ### Step 1: Initial value estimation | ||||||
|  |     alphas <- Map(function(mode, ncol) { | ||||||
|  |         La.svd(mcrossprod(X, mode = mode), ncol)$u | ||||||
|  |     }, modes, dim(Fy)[modes]) | ||||||
|  |     # Residuals | ||||||
|  |     R <- X - mlm(Fy, alphas, modes = modes) | ||||||
|  |     # Covariance Moment estimates | ||||||
|  |     Deltas <- Map(mcrossprod, list(R), mode = modes) | ||||||
|  |     Deltas <- Map(function(Delta, j) (n * prod(p[-j]))^(-1) * Delta, | ||||||
|  |         Deltas, seq_along(Deltas)) | ||||||
|  | 
 | ||||||
|  |     # Call history callback (logger) before the first iteration | ||||||
|  |     if (is.function(logger)) { do.call(logger, c(0L, NA, alphas, Deltas)) } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  |     ### Step 2: Alternating estimate updates | ||||||
|  |     for (iter in seq_len(max.iter)) { | ||||||
|  |         # Compute covariance inverses | ||||||
|  |         Deltas.inv <- Map(solve, Deltas) | ||||||
|  | 
 | ||||||
|  |         # "standardize" X | ||||||
|  |         Z <- mlm(X, Deltas.inv, modes = modes) | ||||||
|  | 
 | ||||||
|  |         # Compute new alpha estimates | ||||||
|  |         alphas <- Map(function(j) { | ||||||
|  |             # MLE estimate for alpha_j | alpha_k, Delta_l for all k != j and l | ||||||
|  |             FF <- mlm(Fy, alphas[-j], modes = modes[-j]) | ||||||
|  |             Deltas[[j]] %*% t(solve( | ||||||
|  |                 t(mcrossprod(mlm(FF, Deltas.inv[-j], modes = modes[-j]), FF, mode = modes[j])), | ||||||
|  |                 t(mcrossprod(Z, FF, mode = modes[j])))) | ||||||
|  |         }, seq_along(modes)) | ||||||
|  | 
 | ||||||
|  |         # update residuals | ||||||
|  |         R <- X - mlm(Fy, alphas, modes = modes) | ||||||
|  | 
 | ||||||
|  |         # next Delta estimates | ||||||
|  |         Deltas <- Map(function(j) { | ||||||
|  |             # MLE estimate for Delta_j | Delta_k, alpha_l for all k != j and l | ||||||
|  |             (n * prod(p[-j]))^(-1) * mcrossprod( | ||||||
|  |                 mlm(R, Deltas[-j], modes = modes[-j]), R, mode = modes[j]) | ||||||
|  |         }, seq_along(modes)) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  |         # TODO: break condition!!! | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  |         # Call history callback | ||||||
|  |         if (is.function(logger)) { do.call(logger, c(iter, NA, alphas, Deltas)) } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     list(alphas = structure(alphas, names = as.character(modes)), | ||||||
|  |          Deltas = structure(Deltas, names = as.character(modes)), | ||||||
|  |          meanX = meanX, meanFy = meanFy) | ||||||
|  | } | ||||||
| @ -137,7 +137,7 @@ kpir.momentum <- function(X, Fy, shape = c(dim(X)[-1], dim(Fy[-1])), | |||||||
|         inner.prod <- sum(grad.alpha^2) + sum(grad.beta^2) |         inner.prod <- sum(grad.alpha^2) + sum(grad.beta^2) | ||||||
| 
 | 
 | ||||||
|         # backtracking loop |         # backtracking loop | ||||||
|         for (delta in step.size * 0.618034^seq.int(0L, len = max.line.iter)) { |         for (delta in step.size * 0.618034^seq.int(0L, length = max.line.iter)) { | ||||||
|             # Update `alpha` and `beta` (note: add(+), the gradients are already |             # Update `alpha` and `beta` (note: add(+), the gradients are already | ||||||
|             # pointing into the negative slope direction of the loss cause they are |             # pointing into the negative slope direction of the loss cause they are | ||||||
|             # the gradients of the log-likelihood [NOT the negative log-likelihood]) |             # the gradients of the log-likelihood [NOT the negative log-likelihood]) | ||||||
|  | |||||||
| @ -112,7 +112,7 @@ kpir.new <- function(X, Fy, shape = c(dim(X)[-1], dim(Fy[-1])), | |||||||
|         inner.prod <- sum(grad.alpha^2) + sum(grad.beta^2) |         inner.prod <- sum(grad.alpha^2) + sum(grad.beta^2) | ||||||
| 
 | 
 | ||||||
|         # Line Search loop |         # Line Search loop | ||||||
|         for (delta in step.size * 0.618034^seq.int(0L, len = max.line.iter)) { |         for (delta in step.size * 0.618034^seq.int(0L, length = max.line.iter)) { | ||||||
|             # Update `alpha` and `beta` (note: add(+), the gradients are already |             # Update `alpha` and `beta` (note: add(+), the gradients are already | ||||||
|             # pointing into the negative slope direction of the loss cause they are |             # pointing into the negative slope direction of the loss cause they are | ||||||
|             # the gradients of the log-likelihood [NOT the negative log-likelihood]) |             # the gradients of the log-likelihood [NOT the negative log-likelihood]) | ||||||
|  | |||||||
| @ -14,8 +14,41 @@ | |||||||
| #' @note equivalent to \code{tcrossprod(mat(A, mode))} with around the same | #' @note equivalent to \code{tcrossprod(mat(A, mode))} with around the same | ||||||
| #'  performance but only allocates the result matrix. | #'  performance but only allocates the result matrix. | ||||||
| #' | #' | ||||||
|  | #' @examples | ||||||
|  | #' dimA <- c(2, 5, 7, 11) | ||||||
|  | #' A <- array(rnorm(prod(dimA)), dim = dimA) | ||||||
|  | #' stopifnot(exprs = { | ||||||
|  | #'     all.equal(mcrossprod(A, mode = 1),      tcrossprod(mat(A, 1))) | ||||||
|  | #'     all.equal(mcrossprod(A, , 2),           tcrossprod(mat(A, 2))) | ||||||
|  | #'     all.equal(mcrossprod(A, , mode = 3),    tcrossprod(mat(A, 3))) | ||||||
|  | #'     all.equal(mcrossprod(A, A, 4),          tcrossprod(mat(A, 4))) | ||||||
|  | #' }) | ||||||
|  | #' | ||||||
|  | #' dimA <- c(2, 5, 7, 11) | ||||||
|  | #' dimB <- c(3, 5, 7, 11) # same as dimA except 1. entry | ||||||
|  | #' A <- array(rnorm(prod(dimA)), dim = dimA) | ||||||
|  | #' B <- array(rnorm(prod(dimB)), dim = dimB) | ||||||
|  | #' stopifnot(all.equal( | ||||||
|  | #'     mcrossprod(A, B, 1), | ||||||
|  | #'     tcrossprod(mat(A, 1), mat(B, 1)) | ||||||
|  | #' )) | ||||||
|  | #' | ||||||
|  | #' dimA <- c(5, 2, 7, 11) | ||||||
|  | #' dimB <- c(5, 3, 7, 11) # same as dimA except 2. entry | ||||||
|  | #' A <- array(rnorm(prod(dimA)), dim = dimA) | ||||||
|  | #' B <- array(rnorm(prod(dimB)), dim = dimB) | ||||||
|  | #' stopifnot(all.equal( | ||||||
|  | #'     mcrossprod(A, B, mode = 2L), | ||||||
|  | #'     tcrossprod(mat(A, 2), mat(B, 2)) | ||||||
|  | #' )) | ||||||
|  | #' | ||||||
| #' @export | #' @export | ||||||
| mcrossprod <- function(A, mode = length(dim(A))) { | mcrossprod <- function(A, B, mode = length(dim(A))) { | ||||||
|     storage.mode(A) <- "double" |     storage.mode(A) <- "double" | ||||||
|     .Call("C_mcrossprod", A, as.integer(mode)) |     if (missing(B)) { | ||||||
|  |         .Call("C_mcrossprod_sym", A, as.integer(mode)) | ||||||
|  |     } else { | ||||||
|  |         storage.mode(B) <- "double" | ||||||
|  |         .Call("C_mcrossprod", A, B, as.integer(mode)) | ||||||
|  |     } | ||||||
| } | } | ||||||
|  | |||||||
| @ -23,7 +23,7 @@ | |||||||
| #' stopifnot(all.equal(C1, C4)) | #' stopifnot(all.equal(C1, C4)) | ||||||
| #' | #' | ||||||
| #' # selected modes | #' # selected modes | ||||||
| #' C1 <- mlm(A, B, modes = 2:3) | #' C1 <- mlm(A, B[2:3], modes = 2:3) | ||||||
| #' C2 <- mlm(A, B[[2]], B[[3]], modes = 2:3) | #' C2 <- mlm(A, B[[2]], B[[3]], modes = 2:3) | ||||||
| #' C3 <- ttm(ttm(A, B[[2]], 2), B[[3]], 3) | #' C3 <- ttm(ttm(A, B[[2]], 2), B[[3]], 3) | ||||||
| #' stopifnot(all.equal(C1, C2)) | #' stopifnot(all.equal(C1, C2)) | ||||||
| @ -32,7 +32,7 @@ | |||||||
| #' # analog to matrix multiplication | #' # analog to matrix multiplication | ||||||
| #' A <- matrix(rnorm( 6), 2, 3) | #' A <- matrix(rnorm( 6), 2, 3) | ||||||
| #' B <- matrix(rnorm(12), 3, 4) | #' B <- matrix(rnorm(12), 3, 4) | ||||||
| #' C <- matrix(rnorm(20), 4, 5) | #' C <- matrix(rnorm(20), 5, 4) | ||||||
| #' stopifnot(all.equal( | #' stopifnot(all.equal( | ||||||
| #'     A %*% B %*% t(C), | #'     A %*% B %*% t(C), | ||||||
| #'     mlm(B, list(A, C)) | #'     mlm(B, list(A, C)) | ||||||
| @ -56,6 +56,16 @@ | |||||||
| #' stopifnot(all.equal(P1, P2)) | #' stopifnot(all.equal(P1, P2)) | ||||||
| #' stopifnot(all.equal(P1, P3)) | #' stopifnot(all.equal(P1, P3)) | ||||||
| #' | #' | ||||||
|  | #' Concatination of MLM is MLM | ||||||
|  | #' dimX <- c(4, 8, 6, 3) | ||||||
|  | #' dimA <- c(3, 17, 19, 2) | ||||||
|  | #' dimB <- c(7, 11, 13, 5) | ||||||
|  | #' X <- array(rnorm(prod(dimX)), dim = dimX) | ||||||
|  | #' As <- Map(function(p, q) matrix(rnorm(p * q), p, q), dimA, dimX) | ||||||
|  | #' Bs <- Map(function(p, q) matrix(rnorm(p * q), p, q), dimB, dimA) | ||||||
|  | #' # (X x {A1, A2, A3, A4}) x {B1, B2, B3, B4} = X x {B1 A1, B2 A2, B3 A3, B4 A4} | ||||||
|  | #' all.equal(mlm(mlm(X, As), Bs), mlm(X, Map(`%*%`, Bs, As))) | ||||||
|  | #' | ||||||
| #' @export | #' @export | ||||||
| mlm <- function(A, B, ..., modes = seq_along(B)) { | mlm <- function(A, B, ..., modes = seq_along(B)) { | ||||||
|     # Collect all matrices in `B` |     # Collect all matrices in `B` | ||||||
|  | |||||||
| @ -1,78 +0,0 @@ | |||||||
| #' Kronecker decomposed Variance Matrix estimation. |  | ||||||
| #' |  | ||||||
| #' @description Computes the kronecker decomposition factors of the variance |  | ||||||
| #'  matrix |  | ||||||
| #' \deqn{\var{X} = tr(L)tr(R) (L\otimes R).} |  | ||||||
| #' |  | ||||||
| #' @param X numeric matrix or 3d array. |  | ||||||
| #' @param shape in case of \code{X} being a matrix, this specifies the predictor |  | ||||||
| #'  shape, otherwise ignored. |  | ||||||
| #' @param center boolean specifying if \code{X} is centered before computing the |  | ||||||
| #'  left/right second moments. This is usefull in the case of allready centered |  | ||||||
| #'  data. |  | ||||||
| #' |  | ||||||
| #' @returns List containing |  | ||||||
| #' \describe{ |  | ||||||
| #'   \item{lhs}{Left Hand Side \eqn{L} of the kronecker decomposed variance matrix} |  | ||||||
| #'   \item{rhs}{Right Hand Side \eqn{R} of the kronecker decomposed variance matrix} |  | ||||||
| #'   \item{trace}{Scaling factor \eqn{tr(L)tr(R)} for the variance matrix} |  | ||||||
| #' } |  | ||||||
| #' |  | ||||||
| #' @examples |  | ||||||
| #' n <- 503L   # nr. of observations |  | ||||||
| #' p <- 32L    # first predictor dimension |  | ||||||
| #' q <- 27L    # second predictor dimension |  | ||||||
| #' lhs <- 0.5^abs(outer(seq_len(q), seq_len(q), `-`))   # Left Var components |  | ||||||
| #' rhs <- 0.5^abs(outer(seq_len(p), seq_len(p), `-`))   # Right Var components |  | ||||||
| #' X <- rmvnorm(n, sigma = kronecker(lhs, rhs))         # Multivariate normal data |  | ||||||
| #' |  | ||||||
| #' # Estimate kronecker decomposed variance matrix |  | ||||||
| #' dim(X)   # c(n, p * q) |  | ||||||
| #' fit <- var.kronecker(X, shape = c(p, q)) |  | ||||||
| #' |  | ||||||
| #' # equivalent |  | ||||||
| #' dim(X) <- c(n, p, q) |  | ||||||
| #' fit <- var.kronecker(X) |  | ||||||
| #' |  | ||||||
| #' # Compute complete estimated variance matrix |  | ||||||
| #' varX.hat <- fit$trace^-1 * kronecker(fit$lhs, fit$rhs) |  | ||||||
| #' |  | ||||||
| #' # or its inverse |  | ||||||
| #' varXinv.hat <- fit$trace * kronecker(solve(fit$lhs), solve(fit$rhs)) |  | ||||||
| #' |  | ||||||
| var.kronecker <- function(X, shape = dim(X)[-1], center = TRUE) { |  | ||||||
|     # Get and check predictor dimensions |  | ||||||
|     n <- nrow(X) |  | ||||||
|     if (length(dim(X)) == 2L) { |  | ||||||
|         stopifnot(ncol(X) == prod(shape[1:2])) |  | ||||||
|         p <- as.integer(shape[1])       # Predictor "height" |  | ||||||
|         q <- as.integer(shape[2])       # Predictor "width" |  | ||||||
|         dim(X) <- c(n, p, q) |  | ||||||
|     } else if (length(dim(X)) == 3L) { |  | ||||||
|         p <- dim(X)[2] |  | ||||||
|         q <- dim(X)[3] |  | ||||||
|     } else { |  | ||||||
|         stop("'X' must be a matrix or 3-tensor") |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     if (isTRUE(center)) { |  | ||||||
|         # Center X; X[, i, j] <- X[, i, j] - mean(X[, i, j]) |  | ||||||
|         X <- scale(X, scale = FALSE) |  | ||||||
| 
 |  | ||||||
|         print(range(attr(X, "scaled:center"))) |  | ||||||
| 
 |  | ||||||
|         dim(X) <- c(n, p, q) |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     # Calc left/right side of kronecker structures covariance |  | ||||||
|     # var(X) = var.lhs %x% var.rhs |  | ||||||
|     var.lhs <- .rowMeans(apply(X, 1, crossprod), q * q, n) |  | ||||||
|     dim(var.lhs) <- c(q, q) |  | ||||||
|     var.rhs <- .rowMeans(apply(X, 1, tcrossprod), p * p, n) |  | ||||||
|     dim(var.rhs) <- c(p, p) |  | ||||||
| 
 |  | ||||||
|     # Estimate scalling factor tr(var(X)) = tr(var.lhs) tr(var.rhs) |  | ||||||
|     trace <- sum(X^2) / n |  | ||||||
| 
 |  | ||||||
|     list(lhs = var.lhs, rhs = var.rhs, trace = trace) |  | ||||||
| } |  | ||||||
| @ -9,14 +9,17 @@ | |||||||
| /* Tensor Times Matrix a.k.a. Mode Product */ | /* Tensor Times Matrix a.k.a. Mode Product */ | ||||||
| extern SEXP ttm(SEXP A, SEXP X, SEXP mode); | extern SEXP ttm(SEXP A, SEXP X, SEXP mode); | ||||||
| 
 | 
 | ||||||
| /* Tensor Mode Covariance e.g. `(1 / n) * A_(m) A_(m)^T` */ | /* Tensor Mode Crossproduct `A_(m) B_(m)^T` */ | ||||||
| extern SEXP mcrossprod(SEXP A, SEXP mode); | extern SEXP mcrossprod(SEXP A, SEXP B, SEXP mode); | ||||||
|  | /* Symmetric Tensor Mode Crossproduct `A_(m) A_(m)^T` */ | ||||||
|  | extern SEXP mcrossprod_sym(SEXP A, SEXP mode); | ||||||
| 
 | 
 | ||||||
| /* List of registered routines (e.g. C entry points) */ | /* List of registered routines (a.k.a. C entry points) */ | ||||||
| static const R_CallMethodDef CallEntries[] = { | static const R_CallMethodDef CallEntries[] = { | ||||||
|     // {"FastPOI_C_sub", (DL_FUNC) &FastPOI_C_sub, 5},  // NOT USED
 |     // {"FastPOI_C_sub", (DL_FUNC) &FastPOI_C_sub, 5},  // NOT USED
 | ||||||
|     {"C_ttm",        (DL_FUNC) &ttm,        3}, |     {"C_ttm",            (DL_FUNC) &ttm,        3}, | ||||||
|     {"C_mcrossprod", (DL_FUNC) &mcrossprod, 2}, |     {"C_mcrossprod",     (DL_FUNC) &mcrossprod, 3}, | ||||||
|  |     {"C_mcrossprod_sym", (DL_FUNC) &mcrossprod_sym, 2}, | ||||||
|     {NULL, NULL, 0} |     {NULL, NULL, 0} | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -11,16 +11,100 @@ | |||||||
| /**
 | /**
 | ||||||
|  * Tensor Mode Crossproduct |  * Tensor Mode Crossproduct | ||||||
|  * |  * | ||||||
|  |  *      C = A_(m) t(B_(m)) | ||||||
|  |  * | ||||||
|  |  * For a matrices `A` and `B`, the first mode `mcrossprod(A, B, 1)` is equivalent | ||||||
|  |  * to `A %*% t(B)` (`tcrossprod`). On the other hand for mode two | ||||||
|  |  * `mcrossprod(A, B, 2)` the equivalence is `t(A) %*% B` (`crossprod`). | ||||||
|  |  * | ||||||
|  |  * @param A multi-dimensional array | ||||||
|  |  * @param B multi-dimensional array | ||||||
|  |  * @param m mode index (1-indexed) | ||||||
|  |  */ | ||||||
|  | extern SEXP mcrossprod(SEXP A, SEXP B, SEXP m) { | ||||||
|  |     // get zero indexed mode
 | ||||||
|  |     int mode = asInteger(m) - 1; | ||||||
|  | 
 | ||||||
|  |     // get dimension attributes
 | ||||||
|  |     SEXP dimA = getAttrib(A, R_DimSymbol); | ||||||
|  |     SEXP dimB = getAttrib(B, R_DimSymbol); | ||||||
|  | 
 | ||||||
|  |     // validate mode (0-indexed, must be smaller than the tensor order)
 | ||||||
|  |     if (mode < 0 || length(dimA) <= mode || length(dimB) <= mode) { | ||||||
|  |         error("Illegal mode"); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     // the strides
 | ||||||
|  |     //  `stride[0] <- prod(dim(A)[seq_len(mode - 1)])`
 | ||||||
|  |     //  `stride[1] <- dim(A)[mode]`
 | ||||||
|  |     //  `stride[2] <- prod(dim(A)[-seq_len(mode)])`
 | ||||||
|  |     // Note: Middle stride is ignored (to be consistent with sym version)
 | ||||||
|  |     int stride[3] = {1, 0, 1}; | ||||||
|  |     for (int i = 0; i < length(dimA); ++i) { | ||||||
|  |         int size = INTEGER(dimA)[i]; | ||||||
|  |         stride[0] *= (i < mode) ? size : 1; | ||||||
|  |         stride[2] *= (i > mode) ? size : 1; | ||||||
|  |        // check margin matching of `A` and `B`
 | ||||||
|  |         if (i != mode && size != INTEGER(dimB)[i]) { | ||||||
|  |             error("Dimension missmatch"); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     // Result dimensions
 | ||||||
|  |     int nrowC = INTEGER(dimA)[mode]; | ||||||
|  |     int ncolC = INTEGER(dimB)[mode]; | ||||||
|  | 
 | ||||||
|  |     // create response matrix C
 | ||||||
|  |     SEXP C = PROTECT(allocMatrix(REALSXP, nrowC, ncolC)); | ||||||
|  | 
 | ||||||
|  |     // raw data access pointers
 | ||||||
|  |     double* a = REAL(A); | ||||||
|  |     double* b = REAL(B); | ||||||
|  |     double* c = REAL(C); | ||||||
|  | 
 | ||||||
|  |     // employ BLAS dgemm (Double GEneralized Matrix Matrix) operation
 | ||||||
|  |     // (C = alpha op(A) op(A) + beta C, op is the identity of transposition)
 | ||||||
|  |     const double zero = 0.0; | ||||||
|  |     const double one  = 1.0; | ||||||
|  |     if (mode == 0) { | ||||||
|  |         // mode 1: special case C = A_(1) B_(1)^T
 | ||||||
|  |         // C = 1 A B^T + 0 C
 | ||||||
|  |         F77_CALL(dgemm)("N", "T", &nrowC, &ncolC, &stride[2], | ||||||
|  |             &one, a, &nrowC, b, &ncolC, | ||||||
|  |             &zero, c, &nrowC FCONE FCONE); | ||||||
|  |     } else { | ||||||
|  |         // Other modes writen as accumulated sum of matrix products
 | ||||||
|  |         // initialize C to zero
 | ||||||
|  |         memset(c, 0, nrowC * ncolC * sizeof(double)); | ||||||
|  | 
 | ||||||
|  |         // Sum over all modes > mode
 | ||||||
|  |         for (int i2 = 0; i2 < stride[2]; ++i2) { | ||||||
|  |             // C = 1 A^T B + 1 C
 | ||||||
|  |             F77_CALL(dgemm)("T", "N", &nrowC, &ncolC, &stride[0], | ||||||
|  |                 &one, &a[i2 * stride[0] * nrowC], &stride[0], | ||||||
|  |                 &b[i2 * stride[0] * ncolC], &stride[0], | ||||||
|  |                 &one, c, &nrowC FCONE FCONE); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     // release C to grabage collector
 | ||||||
|  |     UNPROTECT(1); | ||||||
|  | 
 | ||||||
|  |     return C; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /**
 | ||||||
|  |  * Symmetric Tensor Mode Crossproduct | ||||||
|  |  * | ||||||
|  *      C = A_(m) t(A_(m)) |  *      C = A_(m) t(A_(m)) | ||||||
|  * |  * | ||||||
|  * For a matrix `A`, the first mode is `mcrossprod(A, 1)` equivalent to |  * For a matrix `A`, the first mode is `mcrossprod_sym(A, 1)` equivalent to | ||||||
|  * `A %*% t(A)` (`tcrossprod`). On the other hand for mode two `mcrossprod(A, 2)` |  * `A %*% t(A)` (`tcrossprod`). On the other hand for mode two | ||||||
|  * the equivalence is `t(A) %*% A` (`crossprod`). |  * `mcrossprod(A, 2)` the equivalence is `t(A) %*% A` (`crossprod`). | ||||||
|  * |  * | ||||||
|  * @param A multi-dimensional array |  * @param A multi-dimensional array | ||||||
|  * @param m mode index (1-indexed) |  * @param m mode index (1-indexed) | ||||||
|  */ |  */ | ||||||
| extern SEXP mcrossprod(SEXP A, SEXP m) { | extern SEXP mcrossprod_sym(SEXP A, SEXP m) { | ||||||
|     // get zero indexed mode
 |     // get zero indexed mode
 | ||||||
|     int mode = asInteger(m) - 1; |     int mode = asInteger(m) - 1; | ||||||
| 
 | 
 | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user