wip: writing next method kpir_approx
This commit is contained in:
parent
b2c6d3ca56
commit
203028e255
|
@ -306,10 +306,11 @@ because with $\|\mat{R}_i\|_F^2 = \tr \mat{R}_i\t{\mat{R}_i} = \tr \t{\mat{R}_i}
|
||||||
\todo{ prove they are consistent, especially $\widetilde{\mat\Delta} = \tilde{s}^{-1}(\widetilde{\mat\Delta}_1\otimes\widetilde{\mat\Delta}_2)$!}
|
\todo{ prove they are consistent, especially $\widetilde{\mat\Delta} = \tilde{s}^{-1}(\widetilde{\mat\Delta}_1\otimes\widetilde{\mat\Delta}_2)$!}
|
||||||
|
|
||||||
The hoped for a benefit is that these covariance estimates are in a closed form which means there is no need for an additional iterative estimations step. Before we start with the derivation of the gradients define the following two quantities
|
The hoped for a benefit is that these covariance estimates are in a closed form which means there is no need for an additional iterative estimations step. Before we start with the derivation of the gradients define the following two quantities
|
||||||
\begin{displaymath}
|
\begin{align*}
|
||||||
\mat{S}_1 = \frac{1}{n}\sum_{i = 1}^n \t{\mat{R}_i}\widetilde{\mat{\Delta}}_2^{-1}\mat{R}_i\quad{\color{gray}(q\times q)}, \qquad
|
\mat{S}_1 = \frac{1}{n}\sum_{i = 1}^n \t{\mat{R}_i}\widetilde{\mat{\Delta}}_2^{-1}\mat{R}_i = \frac{1}{n}\ten{R}_{(3)}\t{(\ten{R}\ttm[2]\widetilde{\mat{\Delta}}_2^{-1})_{(3)}}\quad{\color{gray}(q\times q)}, \\
|
||||||
\mat{S}_2 = \frac{1}{n}\sum_{i = 1}^n \mat{R}_i\widetilde{\mat{\Delta}}_1^{-1}\t{\mat{R}_i}\quad{\color{gray}(p\times p)}.
|
\mat{S}_2 = \frac{1}{n}\sum_{i = 1}^n \mat{R}_i\widetilde{\mat{\Delta}}_1^{-1}\t{\mat{R}_i} = \frac{1}{n}\ten{R}_{(2)}\t{(\ten{R}\ttm[3]\widetilde{\mat{\Delta}}_1^{-1})_{(2)}}\quad{\color{gray}(p\times p)}.
|
||||||
\end{displaymath}
|
\end{align*}
|
||||||
|
\todo{Check tensor form!}
|
||||||
|
|
||||||
Now, the matrix normal with the covariance matrix of the vectorized quantities of the form $\mat{\Delta} = s^{-1}(\mat{\Delta}_1\otimes\mat{\Delta}_2)$ has the form
|
Now, the matrix normal with the covariance matrix of the vectorized quantities of the form $\mat{\Delta} = s^{-1}(\mat{\Delta}_1\otimes\mat{\Delta}_2)$ has the form
|
||||||
\begin{align*}
|
\begin{align*}
|
||||||
|
|
|
@ -0,0 +1,985 @@
|
||||||
|
library(tensorPredictors)
|
||||||
|
library(dplyr)
|
||||||
|
library(ggplot2)
|
||||||
|
|
||||||
|
### Exec all methods for a given data set and collect logs ###
|
||||||
|
sim <- function(X, Fy, shape, alpha.true, beta.true, max.iter = 500L) {
|
||||||
|
|
||||||
|
# Logger creator
|
||||||
|
logger <- function(name) {
|
||||||
|
eval(substitute(function(iter, loss, alpha, beta, ...) {
|
||||||
|
hist[iter + 1L, ] <<- c(
|
||||||
|
iter = iter,
|
||||||
|
loss = loss,
|
||||||
|
dist = (dist <- dist.subspace(c(kronecker(alpha.true, beta.true)),
|
||||||
|
c(kronecker(alpha, beta)))),
|
||||||
|
dist.alpha = (dist.alpha <- dist.subspace(c(alpha.true), c(alpha))),
|
||||||
|
dist.beta = (dist.beta <- dist.subspace(c( beta.true), c(beta ))),
|
||||||
|
norm.alpha = norm(alpha, "F"),
|
||||||
|
norm.beta = norm(beta, "F")
|
||||||
|
)
|
||||||
|
|
||||||
|
cat(sprintf(
|
||||||
|
"%3d | l = %-12.4f - dist = %-.4e - alpha(%d, %d) = %-.4e - beta(%d, %d) = %-.4e\n",
|
||||||
|
iter, loss,
|
||||||
|
dist,
|
||||||
|
nrow(alpha), ncol(alpha), dist.alpha,
|
||||||
|
nrow(beta), ncol(beta), dist.beta
|
||||||
|
))
|
||||||
|
}, list(hist = as.symbol(paste0("hist.", name)))))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Initialize logger history targets
|
||||||
|
hist.base <- hist.new <- hist.momentum <- # hist.kron <-
|
||||||
|
data.frame(iter = seq(0L, max.iter),
|
||||||
|
loss = NA, dist = NA, dist.alpha = NA, dist.beta = NA,
|
||||||
|
norm.alpha = NA, norm.beta = NA
|
||||||
|
)
|
||||||
|
hist.kron <- NULL # TODO: fit kron version
|
||||||
|
|
||||||
|
# Base (old)
|
||||||
|
kpir.base(X, Fy, shape, max.iter = max.iter, logger = logger("base"))
|
||||||
|
|
||||||
|
# New (simple Gradient Descent)
|
||||||
|
kpir.new(X, Fy, shape, max.iter = max.iter, logger = logger("new"))
|
||||||
|
|
||||||
|
# Gradient Descent with Nesterov Momentum
|
||||||
|
kpir.momentum(X, Fy, shape, max.iter = max.iter, logger = logger("momentum"))
|
||||||
|
|
||||||
|
# # Residual Covariance Kronecker product assumpton version
|
||||||
|
# kpir.kron(X, Fy, shape, max.iter = max.iter, logger = logger("kron"))
|
||||||
|
|
||||||
|
# Add method tags
|
||||||
|
hist.base$type <- factor("base")
|
||||||
|
hist.new$type <- factor("new")
|
||||||
|
hist.momentum$type <- factor("momentum")
|
||||||
|
# hist.kron$type <- factor("kron")
|
||||||
|
|
||||||
|
# Combine results and return
|
||||||
|
rbind(hist.base, hist.new, hist.momentum, hist.kron)
|
||||||
|
}
|
||||||
|
|
||||||
|
## Generate some test data / DEBUG
|
||||||
|
n <- 200 # Sample Size
|
||||||
|
p <- sample(1:15, 1) # 11
|
||||||
|
q <- sample(1:15, 1) # 3
|
||||||
|
k <- sample(1:15, 1) # 7
|
||||||
|
r <- sample(1:15, 1) # 5
|
||||||
|
print(c(n, p, q, k, r))
|
||||||
|
|
||||||
|
hist <- NULL
|
||||||
|
for (rep in 1:20) {
|
||||||
|
alpha.true <- alpha <- matrix(rnorm(q * r), q, r)
|
||||||
|
beta.true <- beta <- matrix(rnorm(p * k), p, k)
|
||||||
|
y <- rnorm(n)
|
||||||
|
Fy <- do.call(cbind, Map(function(slope, offset) {
|
||||||
|
sin(slope * y + offset)
|
||||||
|
},
|
||||||
|
head(rep(seq(1, ceiling(0.5 * k * r)), each = 2), k * r),
|
||||||
|
head(rep(c(0, pi / 2), ceiling(0.5 * k * r)), k * r)
|
||||||
|
))
|
||||||
|
Delta <- 0.5^abs(outer(seq_len(p * q), seq_len(p * q), `-`))
|
||||||
|
X <- tcrossprod(Fy, kronecker(alpha, beta)) + CVarE:::rmvnorm(n, sigma = Delta)
|
||||||
|
|
||||||
|
hist.sim <- sim(X, Fy, shape = c(p, q, k, r), alpha.true, beta.true)
|
||||||
|
hist.sim$repetition <- rep
|
||||||
|
|
||||||
|
hist <- rbind(hist, hist.sim)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
saveRDS(hist, file = "AR.rds")
|
||||||
|
hist$repetition <- factor(hist$repetition)
|
||||||
|
|
||||||
|
ggplot(hist, aes(x = iter, color = type, group = interaction(type, repetition))) +
|
||||||
|
geom_line(aes(y = loss)) +
|
||||||
|
geom_point(data = with(sub <- subset(hist, !is.na(loss)),
|
||||||
|
aggregate(sub, list(type, repetition), tail, 1)
|
||||||
|
), aes(y = loss)) +
|
||||||
|
labs(
|
||||||
|
title = bquote(paste("Optimization Objective: negative log-likelihood ",
|
||||||
|
l(hat(alpha), hat(beta)))),
|
||||||
|
subtitle = bquote(paste(Delta[i][j] == 0.25, " * ", 0.5^abs(i - j), ", ",
|
||||||
|
"20 repetitions, ", n == .(n), ", ",
|
||||||
|
p == .(p), ", ", q == .(q), ", ", k == .(k), ", ", r == .(r))),
|
||||||
|
x = "nr. of iterations",
|
||||||
|
y = expression(l(hat(alpha), hat(beta))),
|
||||||
|
color = "method"
|
||||||
|
) +
|
||||||
|
theme(legend.position = "bottom")
|
||||||
|
|
||||||
|
dev.print(png, file = "sim01_loss.png", width = 768, height = 768, res = 125)
|
||||||
|
|
||||||
|
|
||||||
|
ggplot(hist, aes(x = iter, color = type, group = interaction(type, repetition))) +
|
||||||
|
geom_line(aes(y = dist)) +
|
||||||
|
geom_point(data = with(sub <- subset(hist, !is.na(dist)),
|
||||||
|
aggregate(sub, list(type, repetition), tail, 1)
|
||||||
|
), aes(y = dist)) +
|
||||||
|
labs(
|
||||||
|
title = bquote(paste("Distance of estimate ", hat(B), " to true ", B == alpha %*% beta)),
|
||||||
|
subtitle = bquote(paste(Delta[i][j] == 0.25, " * ", 0.5^abs(i - j), ", ",
|
||||||
|
"20 repetitions, ", n == .(n), ", ",
|
||||||
|
p == .(p), ", ", q == .(q), ", ", k == .(k), ", ", r == .(r))),
|
||||||
|
x = "nr. of iterations",
|
||||||
|
y = expression(abs(B * B^T - hat(B) * hat(B)^T)),
|
||||||
|
color = "method"
|
||||||
|
) +
|
||||||
|
theme(legend.position = "bottom")
|
||||||
|
|
||||||
|
dev.print(png, file = "sim01_dist.png", width = 768, height = 768, res = 125)
|
||||||
|
|
||||||
|
|
||||||
|
ggplot(hist, aes(x = iter, color = type, group = interaction(type, repetition))) +
|
||||||
|
geom_line(aes(y = dist.alpha)) +
|
||||||
|
geom_point(data = with(sub <- subset(hist, !is.na(dist.alpha)),
|
||||||
|
aggregate(sub, list(type, repetition), tail, 1)
|
||||||
|
), aes(y = dist.alpha)) +
|
||||||
|
labs(
|
||||||
|
title = bquote(paste("Distance of estimate ", hat(alpha), " to true ", alpha)),
|
||||||
|
subtitle = bquote(paste(Delta[i][j] == 0.25, " * ", 0.5^abs(i - j), ", ",
|
||||||
|
"20 repetitions, ", n == .(n), ", ",
|
||||||
|
p == .(p), ", ", q == .(q), ", ", k == .(k), ", ", r == .(r))),
|
||||||
|
x = "nr. of iterations",
|
||||||
|
y = expression(abs(alpha * alpha^T - hat(alpha) * hat(alpha)^T)),
|
||||||
|
color = "method"
|
||||||
|
) +
|
||||||
|
theme(legend.position = "bottom")
|
||||||
|
|
||||||
|
dev.print(png, file = "sim01_dist_alpha.png", width = 768, height = 768, res = 125)
|
||||||
|
|
||||||
|
|
||||||
|
ggplot(hist, aes(x = iter, color = type, group = interaction(type, repetition))) +
|
||||||
|
geom_line(aes(y = dist.beta)) +
|
||||||
|
geom_point(data = with(sub <- subset(hist, !is.na(dist.beta)),
|
||||||
|
aggregate(sub, list(type, repetition), tail, 1)
|
||||||
|
), aes(y = dist.beta)) +
|
||||||
|
labs(
|
||||||
|
title = bquote(paste("Distance of estimate ", hat(beta), " to true ", beta)),
|
||||||
|
subtitle = bquote(paste(Delta[i][j] == 0.25, " * ", 0.5^abs(i - j), ", ",
|
||||||
|
"20 repetitions, ", n == .(n), ", ",
|
||||||
|
p == .(p), ", ", q == .(q), ", ", k == .(k), ", ", r == .(r))),
|
||||||
|
x = "nr. of iterations",
|
||||||
|
y = expression(abs(beta * beta^T - hat(beta) * hat(beta)^T)),
|
||||||
|
color = "method"
|
||||||
|
) +
|
||||||
|
theme(legend.position = "bottom")
|
||||||
|
|
||||||
|
dev.print(png, file = "sim01_dist_beta.png", width = 768, height = 768, res = 125)
|
||||||
|
|
||||||
|
|
||||||
|
ggplot(hist, aes(x = iter, color = type, group = interaction(type, repetition))) +
|
||||||
|
geom_line(aes(y = norm.alpha)) +
|
||||||
|
geom_point(data = with(sub <- subset(hist, !is.na(norm.alpha)),
|
||||||
|
aggregate(sub, list(type, repetition), tail, 1)
|
||||||
|
), aes(y = norm.alpha)) +
|
||||||
|
labs(
|
||||||
|
title = expression(paste("Norm of ", hat(alpha))),
|
||||||
|
subtitle = bquote(paste(Delta[i][j] == 0.25, " * ", 0.5^abs(i - j), ", ",
|
||||||
|
"20 repetitions, ", n == .(n), ", ",
|
||||||
|
p == .(p), ", ", q == .(q), ", ", k == .(k), ", ", r == .(r))),
|
||||||
|
x = "nr. of iterations",
|
||||||
|
y = expression(abs(hat(alpha))[F]),
|
||||||
|
color = "method"
|
||||||
|
) +
|
||||||
|
theme(legend.position = "bottom")
|
||||||
|
|
||||||
|
dev.print(png, file = "sim01_norm_alpha.png", width = 768, height = 768, res = 125)
|
||||||
|
|
||||||
|
ggplot(hist, aes(x = iter, color = type, group = interaction(type, repetition))) +
|
||||||
|
geom_line(aes(y = norm.beta)) +
|
||||||
|
geom_point(data = with(sub <- subset(hist, !is.na(norm.beta)),
|
||||||
|
aggregate(sub, list(type, repetition), tail, 1)
|
||||||
|
), aes(y = norm.beta)) +
|
||||||
|
labs(
|
||||||
|
title = expression(paste("Norm of ", hat(beta))),
|
||||||
|
subtitle = bquote(paste(Delta[i][j] == 0.25, " * ", 0.5^abs(i - j), ", ",
|
||||||
|
"20 repetitions, ", n == .(n), ", ",
|
||||||
|
p == .(p), ", ", q == .(q), ", ", k == .(k), ", ", r == .(r))),
|
||||||
|
x = "nr. of iterations",
|
||||||
|
y = expression(abs(hat(beta))[F]),
|
||||||
|
color = "method"
|
||||||
|
) +
|
||||||
|
theme(legend.position = "bottom")
|
||||||
|
|
||||||
|
dev.print(png, file = "sim01_norm_beta.png", width = 768, height = 768, res = 125)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# local({
|
||||||
|
# par(mfrow = c(2, 3), oma = c(2, 1, 1, 1), mar = c(3.1, 2.1, 2.1, 1.1), lwd = 2)
|
||||||
|
# plot(c(1, max.iter), range(c(hist.base$loss, hist.new$loss, hist.momentum$loss, hist.kron$loss), na.rm = TRUE),
|
||||||
|
# type = "n", log = "x", main = "loss")
|
||||||
|
# lines( hist.base$loss, col = 2)
|
||||||
|
# lines( hist.new$loss, col = 3)
|
||||||
|
# lines(hist.momentum$loss, col = 4)
|
||||||
|
# lines( hist.kron$loss, col = 5)
|
||||||
|
# yrange <- range(c(hist.base$step.size, hist.new$step.size, hist.momentum$step.size, hist.kron$step.size),
|
||||||
|
# na.rm = TRUE)
|
||||||
|
# plot(c(1, max.iter), yrange,
|
||||||
|
# type = "n", log = "x", main = "step.size")
|
||||||
|
# lines( hist.base$step.size, col = 2)
|
||||||
|
# lines( hist.new$step.size, col = 3)
|
||||||
|
# lines(hist.momentum$step.size, col = 4)
|
||||||
|
# lines( hist.kron$step.size, col = 5)
|
||||||
|
# # lines( hist.base$step.size, col = 4) # there is no step.size
|
||||||
|
# plot(0, 0, type = "l", bty = "n", xaxt = "n", yaxt = "n")
|
||||||
|
# legend("topleft", legend = c("Base", "GD", "GD + Momentum", "Kron + GD + Momentum"), col = 2:5,
|
||||||
|
# lwd = par("lwd"), xpd = TRUE, horiz = FALSE, cex = 1.2, bty = "n",
|
||||||
|
# x.intersp = 1, y.intersp = 1.5)
|
||||||
|
# # xpd = TRUE makes the legend plot to the figure
|
||||||
|
# plot(c(1, max.iter), range(c(hist.base$dist, hist.new$dist, hist.momentum$dist, hist.kron$dist), na.rm = TRUE),
|
||||||
|
# type = "n", log = "x", main = "dist")
|
||||||
|
# lines( hist.base$dist, col = 2)
|
||||||
|
# lines( hist.new$dist, col = 3)
|
||||||
|
# lines(hist.momentum$dist, col = 4)
|
||||||
|
# lines( hist.kron$dist, col = 5)
|
||||||
|
# plot(c(1, max.iter), range(c(hist.base$dist.alpha, hist.new$dist.alpha, hist.momentum$dist.alpha, hist.kron$dist.alpha), na.rm = TRUE),
|
||||||
|
# type = "n", log = "x", main = "dist.alpha")
|
||||||
|
# lines( hist.base$dist.alpha, col = 2)
|
||||||
|
# lines( hist.new$dist.alpha, col = 3)
|
||||||
|
# lines(hist.momentum$dist.alpha, col = 4)
|
||||||
|
# lines( hist.kron$dist.alpha, col = 5)
|
||||||
|
# plot(c(1, max.iter), range(c(hist.base$dist.beta, hist.new$dist.beta, hist.momentum$dist.beta, hist.kron$dist.beta), na.rm = TRUE),
|
||||||
|
# type = "n", log = "x", main = "dist.beta")
|
||||||
|
# lines( hist.base$dist.beta, col = 2)
|
||||||
|
# lines( hist.new$dist.beta, col = 3)
|
||||||
|
# lines(hist.momentum$dist.beta, col = 4)
|
||||||
|
# lines( hist.kron$dist.beta, col = 5)
|
||||||
|
# # par(fig = c(0, 1, 0, 1), oma = c(0, 0, 0, 0), mar = c(0, 0, 0, 0), new = TRUE)
|
||||||
|
# # plot(0, 0, type = 'l', bty = 'n', xaxt = 'n', yaxt = 'n')
|
||||||
|
# # legend('bottom', legend = c('GD', 'GD + Nesterov Momentum', 'Alternating'), col = 2:4,
|
||||||
|
# # lwd = 5, xpd = TRUE, horiz = TRUE, cex = 1, seg.len = 1, bty = 'n')
|
||||||
|
# # # xpd = TRUE makes the legend plot to the figure
|
||||||
|
# })
|
||||||
|
# dev.print(png, file = "loss.png", width = 768, height = 768, res = 125)
|
||||||
|
|
||||||
|
# with(list(a1 = alpha, a2 = fit.base$alpha, a3 = fit.new$alpha, a4 = fit.momentum$alpha,
|
||||||
|
# b1 = beta, b2 = fit.base$beta, b3 = fit.new$beta, b4 = fit.momentum$beta), {
|
||||||
|
# par(mfrow = c(2, 4))
|
||||||
|
# a2 <- sign(sum(sign(a1 * a2))) * a2
|
||||||
|
# a3 <- sign(sum(sign(a1 * a3))) * a3
|
||||||
|
# a4 <- sign(sum(sign(a1 * a4))) * a4
|
||||||
|
# b2 <- sign(sum(sign(b1 * b2))) * b2
|
||||||
|
# b3 <- sign(sum(sign(b1 * b3))) * b3
|
||||||
|
# b4 <- sign(sum(sign(b1 * b4))) * b4
|
||||||
|
|
||||||
|
# matrixImage(a1, main = expression(alpha))
|
||||||
|
# matrixImage(a2, main = expression(paste(hat(alpha)["Base"])))
|
||||||
|
# matrixImage(a3, main = expression(paste(hat(alpha)["GD"])))
|
||||||
|
# matrixImage(a4, main = expression(paste(hat(alpha)["GD+Nest"])))
|
||||||
|
# matrixImage(b1, main = expression(beta))
|
||||||
|
# matrixImage(b2, main = expression(paste(hat(beta)["Base"])))
|
||||||
|
# matrixImage(b3, main = expression(paste(hat(beta)["GD"])))
|
||||||
|
# matrixImage(b4, main = expression(paste(hat(beta)["GD+Nest"])))
|
||||||
|
# })
|
||||||
|
# dev.print(png, file = "estimates.png", width = 768, height = 768, res = 125)
|
||||||
|
|
||||||
|
|
||||||
|
# with(list(d1 = Delta, d2 = fit.base$Delta, d3 = fit.new$Delta, d4 = fit.momentum$Delta), {
|
||||||
|
# par(mfrow = c(2, 2))
|
||||||
|
|
||||||
|
# matrixImage(d1, main = expression(Delta))
|
||||||
|
# matrixImage(d2, main = expression(hat(Delta)["Base"]))
|
||||||
|
# matrixImage(d3, main = expression(hat(Delta)["GD"]))
|
||||||
|
# matrixImage(d4, main = expression(hat(Delta)["GD+Nest"]))
|
||||||
|
# })
|
||||||
|
# dev.print(png, file = "Delta.png", width = 768, height = 768, res = 125)
|
||||||
|
|
||||||
|
# with(list(a1 = alpha, a2 = fit.base$alpha, a3 = fit.new$alpha, a4 = fit.momentum$alpha,
|
||||||
|
# b1 = beta, b2 = fit.base$beta, b3 = fit.new$beta, b4 = fit.momentum$beta), {
|
||||||
|
# par(mfrow = c(2, 2))
|
||||||
|
|
||||||
|
# matrixImage(kronecker(a1, b1), main = expression(B))
|
||||||
|
# matrixImage(kronecker(a2, b2), main = expression(hat(B)["Base"]))
|
||||||
|
# matrixImage(kronecker(a3, b3), main = expression(hat(B)["GD"]))
|
||||||
|
# matrixImage(kronecker(a4, b4), main = expression(hat(B)["GD+Nest"]))
|
||||||
|
# })
|
||||||
|
# dev.print(png, file = "B.png", width = 768, height = 768, res = 125)
|
||||||
|
|
||||||
|
# with(list(a1 = alpha, a2 = fit.base$alpha, a3 = fit.new$alpha, a4 = fit.momentum$alpha,
|
||||||
|
# b1 = beta, b2 = fit.base$beta, b3 = fit.new$beta, b4 = fit.momentum$beta), {
|
||||||
|
# par(mfrow = c(3, 1), lwd = 1)
|
||||||
|
|
||||||
|
# d2 <- kronecker(a1, b1) - kronecker(a2, b2)
|
||||||
|
# d3 <- kronecker(a1, b1) - kronecker(a3, b3)
|
||||||
|
# d4 <- kronecker(a1, b1) - kronecker(a4, b4)
|
||||||
|
# xlim <- c(-1, 1) * max(abs(c(d2, d3, d4)))
|
||||||
|
# breaks <- seq(xlim[1], xlim[2], len = 41)
|
||||||
|
|
||||||
|
# hist(d2, main = expression(paste(base, (B - hat(B))[i])),
|
||||||
|
# breaks = breaks, xlim = xlim, freq = FALSE)
|
||||||
|
# lines(density(d2), col = 2)
|
||||||
|
# abline(v = range(d2), lty = 2)
|
||||||
|
# hist(d3, main = expression(paste(GD, (B - hat(B))[i])),
|
||||||
|
# breaks = breaks, xlim = xlim, freq = FALSE)
|
||||||
|
# lines(density(d3), col = 3)
|
||||||
|
# abline(v = range(d3), lty = 2)
|
||||||
|
# hist(d4, main = expression(paste(GD + Nest, (B - hat(B))[i])),
|
||||||
|
# breaks = breaks, xlim = xlim, freq = FALSE)
|
||||||
|
# lines(density(d4), col = 4)
|
||||||
|
# abline(v = range(d4), lty = 2)
|
||||||
|
# })
|
||||||
|
# dev.print(png, file = "hist.png", width = 768, height = 768, res = 125)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# options(width = 300)
|
||||||
|
# print(pr <- prof.tree::prof.tree("./Rprof.out"), limit = NULL
|
||||||
|
# , pruneFun = function(x) x$percent > 0.01)
|
||||||
|
|
||||||
|
# par(mfrow = c(2, 2))
|
||||||
|
# matrixImage(alpha, main = "alpha")
|
||||||
|
# matrixImage(fit$alpha, main = "fit$alpha")
|
||||||
|
# matrixImage(beta, main = "beta")
|
||||||
|
# matrixImage(fit$beta, main = "fit$beta")
|
||||||
|
|
||||||
|
# if (diff(dim(alpha) * dim(beta)) > 0) {
|
||||||
|
# par(mfrow = c(2, 1))
|
||||||
|
# } else {
|
||||||
|
# par(mfrow = c(1, 2))
|
||||||
|
# }
|
||||||
|
# matrixImage(kronecker(alpha, beta), main = "kronecker(alpha, beta)")
|
||||||
|
# matrixImage(kronecker(fit$alpha, fit$beta), main = "kronecker(fit$alpha, fit$beta)")
|
||||||
|
|
||||||
|
# matrixImage(Delta, main = "Delta")
|
||||||
|
# matrixImage(fit$Delta, main = "fit$Delta")
|
||||||
|
|
||||||
|
|
||||||
|
# local({
|
||||||
|
# a <- (-1 * (sum(sign(fit$alpha) * sign(alpha)) < 0)) * fit$alpha / mean(fit$alpha^2)
|
||||||
|
# b <- alpha / mean(alpha^2)
|
||||||
|
|
||||||
|
# norm(a - b, "F")
|
||||||
|
# })
|
||||||
|
# local({
|
||||||
|
# a <- (-1 * (sum(sign(fit$beta) * sign(beta)) < 0)) * fit$beta / mean(fit$beta^2)
|
||||||
|
# b <- beta / mean(beta^2)
|
||||||
|
|
||||||
|
# norm(a - b, "F")
|
||||||
|
# })
|
||||||
|
|
||||||
|
|
||||||
|
# # Which Sequence?
|
||||||
|
# x <- y <- 1
|
||||||
|
# replicate(40, x <<- (y <<- x + y) - x)
|
||||||
|
|
||||||
|
|
||||||
|
# # Face-Splitting Product
|
||||||
|
# n <- 100
|
||||||
|
# p <- 3
|
||||||
|
# q <- 500
|
||||||
|
# A <- matrix(rnorm(n * p), n)
|
||||||
|
# B <- matrix(rnorm(n * q), n)
|
||||||
|
|
||||||
|
# faceSplit <- function(A, B) {
|
||||||
|
# C <- vapply(seq_len(ncol(A)), function(i) A[, i] * B, B)
|
||||||
|
# dim(C) <- c(nrow(A), ncol(A) * ncol(B))
|
||||||
|
# C
|
||||||
|
# }
|
||||||
|
|
||||||
|
# all.equal(
|
||||||
|
# tkhatriRao(A, B),
|
||||||
|
# faceSplit(A, B)
|
||||||
|
# )
|
||||||
|
# microbenchmark::microbenchmark(
|
||||||
|
# tkhatriRao(A, B),
|
||||||
|
# faceSplit(A, B)
|
||||||
|
# )
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# dist.kron <- function(a0, b0, a1, b1) {
|
||||||
|
# sqrt(sum(a0^2) * sum(b0^2) -
|
||||||
|
# 2 * sum(diag(crossprod(a0, a1))) * sum(diag(crossprod(b0, b1))) +
|
||||||
|
# sum(a1^2) * sum(b1^2))
|
||||||
|
# }
|
||||||
|
|
||||||
|
|
||||||
|
# alpha <- matrix(rnorm(q * r), q)
|
||||||
|
# beta <- matrix(rnorm(p * k), p)
|
||||||
|
# alpha.true <- matrix(rnorm(q * r), q)
|
||||||
|
# beta.true <- matrix(rnorm(p * k), p)
|
||||||
|
# all.equal(
|
||||||
|
# dist.kron(alpha, beta, alpha.true, beta.true),
|
||||||
|
# norm(kronecker(alpha, beta) - kronecker(alpha.true, beta.true), "F")
|
||||||
|
# )
|
||||||
|
|
||||||
|
# A <- matrix(rnorm(p^2), p)
|
||||||
|
# B <- matrix(rnorm(p^2), p)
|
||||||
|
|
||||||
|
# tr <- function(A) sum(diag(A))
|
||||||
|
# tr(crossprod(A, B))
|
||||||
|
# tr(tcrossprod(B, A))
|
||||||
|
|
||||||
|
# tr(crossprod(A, A))
|
||||||
|
# tr(tcrossprod(A, A))
|
||||||
|
# sum(A^2)
|
||||||
|
|
||||||
|
# alpha <- matrix(rnorm(q * r), q)
|
||||||
|
# beta <- matrix(rnorm(p * k), p)
|
||||||
|
|
||||||
|
# norm(kronecker(alpha, beta), "F")^2
|
||||||
|
# norm(alpha, "F")^2 * norm(beta, "F")^2
|
||||||
|
# tr(crossprod(kronecker(alpha, beta)))
|
||||||
|
# tr(tcrossprod(kronecker(alpha, beta)))
|
||||||
|
# tr(crossprod(kronecker(t(alpha), t(beta))))
|
||||||
|
# tr(crossprod(alpha)) * tr(crossprod(beta))
|
||||||
|
# tr(tcrossprod(alpha)) * tr(tcrossprod(beta))
|
||||||
|
# tr(crossprod(alpha)) * tr(tcrossprod(beta))
|
||||||
|
# sum(alpha^2) * sum(beta^2)
|
||||||
|
|
||||||
|
# alpha <- matrix(rnorm(q * r), q)
|
||||||
|
# beta <- matrix(rnorm(p * k), p)
|
||||||
|
# alpha.true <- matrix(rnorm(q * r), q)
|
||||||
|
# beta.true <- matrix(rnorm(p * k), p)
|
||||||
|
# microbenchmark::microbenchmark(
|
||||||
|
# norm(kronecker(alpha, beta), "F")^2,
|
||||||
|
# norm(alpha, "F")^2 * norm(beta, "F")^2,
|
||||||
|
# tr(crossprod(kronecker(alpha, beta))),
|
||||||
|
# tr(tcrossprod(kronecker(alpha, beta))),
|
||||||
|
# tr(crossprod(kronecker(t(alpha), t(beta)))),
|
||||||
|
# tr(crossprod(alpha)) * tr(crossprod(beta)),
|
||||||
|
# tr(tcrossprod(alpha)) * tr(tcrossprod(beta)),
|
||||||
|
# tr(crossprod(alpha)) * tr(tcrossprod(beta)),
|
||||||
|
# sum(alpha^2) * sum(beta^2),
|
||||||
|
|
||||||
|
# setup = {
|
||||||
|
# p <- sample(1:10, 1)
|
||||||
|
# q <- sample(1:10, 1)
|
||||||
|
# k <- sample(1:10, 1)
|
||||||
|
# r <- sample(1:10, 1)
|
||||||
|
# assign("alpha", matrix(rnorm(q * r), q), .GlobalEnv)
|
||||||
|
# assign("beta", matrix(rnorm(p * k), p), .GlobalEnv)
|
||||||
|
# assign("alpha.true", matrix(rnorm(q * r), q), .GlobalEnv)
|
||||||
|
# assign("beta.true", matrix(rnorm(p * k), p), .GlobalEnv)
|
||||||
|
# }
|
||||||
|
# )
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# p <- sample(1:15, 1) # 11
|
||||||
|
# q <- sample(1:15, 1) # 3
|
||||||
|
# k <- sample(1:15, 1) # 7
|
||||||
|
# r <- sample(1:15, 1) # 5
|
||||||
|
# A <- matrix(rnorm(q * r), q)
|
||||||
|
# B <- matrix(rnorm(p * k), p)
|
||||||
|
# a <- matrix(rnorm(q * r), q)
|
||||||
|
# b <- matrix(rnorm(p * k), p)
|
||||||
|
|
||||||
|
# all.equal(
|
||||||
|
# kronecker(A + a, B + b),
|
||||||
|
# kronecker(A, B) + kronecker(A, b) + kronecker(a, B) + kronecker(a, b)
|
||||||
|
# )
|
||||||
|
|
||||||
|
|
||||||
|
# p <- 200L
|
||||||
|
# n <- 100L
|
||||||
|
# R <- matrix(rnorm(n * p), n)
|
||||||
|
# A <- matrix(rnorm(p^2), p) # Base Matrix
|
||||||
|
# B <- A + 0.01 * matrix(rnorm(p^2), p) # Distortion / Update of A
|
||||||
|
# A.inv <- solve(A)
|
||||||
|
|
||||||
|
# microbenchmark::microbenchmark(
|
||||||
|
# solve = R %*% solve(B),
|
||||||
|
# neumann.raw = R %*% (A.inv + A.inv %*% (A - B) %*% A.inv + A.inv %*% (A - B) %*% A.inv %*% (A - B) %*% A.inv),
|
||||||
|
# neumann.fun = {
|
||||||
|
# AD <- A.inv %*% (A - B)
|
||||||
|
# res <- A.inv + AD %*% A.inv
|
||||||
|
# res <- A.inv + AD %*% res
|
||||||
|
# R %*% res
|
||||||
|
# }
|
||||||
|
# )
|
||||||
|
|
||||||
|
# all.equal(
|
||||||
|
# A.inv + A.inv %*% (A - B) %*% A.inv + A.inv %*% (A - B) %*% A.inv %*% (A - B) %*% A.inv,
|
||||||
|
# {
|
||||||
|
# DA <- (A - B) %*% A.inv
|
||||||
|
# res <- A.inv + A.inv %*% DA
|
||||||
|
# res <- A.inv + res %*% DA
|
||||||
|
# res
|
||||||
|
# }
|
||||||
|
# )
|
||||||
|
|
||||||
|
# all.equal(
|
||||||
|
# A.inv + A.inv %*% (A - B) %*% A.inv + A.inv %*% (A - B) %*% A.inv %*% (A - B) %*% A.inv,
|
||||||
|
# {
|
||||||
|
# AD <- A.inv %*% (A - B)
|
||||||
|
# res <- A.inv + AD %*% A.inv
|
||||||
|
# res <- A.inv + AD %*% res
|
||||||
|
# res
|
||||||
|
# }
|
||||||
|
# )
|
||||||
|
|
||||||
|
# #####
|
||||||
|
# sym <- function(A) A + t(A)
|
||||||
|
# n <- 101
|
||||||
|
# p <- 7
|
||||||
|
# q <- 11
|
||||||
|
# r <- 3
|
||||||
|
# k <- 5
|
||||||
|
# R <- array(rnorm(n * p * q), dim = c(n = n, p = p, q = q))
|
||||||
|
# F <- array(rnorm(n * k * r), dim = c(n = n, k = k, r = r))
|
||||||
|
# alpha <- array(rnorm(q * r), dim = c(q = q, r = r))
|
||||||
|
# beta <- array(rnorm(p * k), dim = c(p = p, k = k))
|
||||||
|
# Delta.1 <- sym(matrix(rnorm(q * q), q, q))
|
||||||
|
# dim(Delta.1) <- c(q = q, q = q)
|
||||||
|
# Delta.2 <- sym(matrix(rnorm(p * p), p, p))
|
||||||
|
# dim(Delta.2) <- c(p = p, p = p)
|
||||||
|
|
||||||
|
# Delta <- kronecker(Delta.1, Delta.2)
|
||||||
|
|
||||||
|
# grad.alpha.1 <- local({
|
||||||
|
# Di.1 <- solve(Delta.1)
|
||||||
|
# Di.2 <- solve(Delta.2)
|
||||||
|
# .R <- sapply(seq_len(n), function(i) tcrossprod(Di.1, R[i, , ]) %*% Di.2)
|
||||||
|
# dim(.R) <- c(q, p, n)
|
||||||
|
# .F <- sapply(seq_len(n), function(i) beta %*% F[i, , ])
|
||||||
|
# dim(.F) <- c(p, r, n)
|
||||||
|
|
||||||
|
# .C <- sapply(seq_len(n), function(i) .R[i, , ] %*% .F[i, , ])
|
||||||
|
# dim(.C) <- c(n, q, r)
|
||||||
|
# colSums(.C)
|
||||||
|
# })
|
||||||
|
# grad.alpha.2 <- local({
|
||||||
|
# # Delta.1^-1 R' Delta.2^-1
|
||||||
|
# .R <- aperm(R, c(2, 1, 3))
|
||||||
|
# dim(.R) <- c(q, n * p)
|
||||||
|
# .R <- solve(Delta.1) %*% .R
|
||||||
|
# dim(.R) <- c(q, n, p)
|
||||||
|
# .R <- aperm(.R, c(3, 2, 1))
|
||||||
|
# dim(.R) <- c(p, n * q)
|
||||||
|
# .R <- solve(Delta.2) %*% .R
|
||||||
|
# dim(.R) <- c(p, n, q)
|
||||||
|
# .R <- aperm(.R, c(2, 3, 1)) # n x q x p
|
||||||
|
|
||||||
|
# # beta F
|
||||||
|
# .F <- aperm(F, c(2, 1, 3))
|
||||||
|
# dim(.F) <- c(k, n * r)
|
||||||
|
# .F <- beta %*% .F
|
||||||
|
# dim(.F) <- c(p, n, r)
|
||||||
|
# .F <- aperm(.F, c(2, 1, 3)) # n x p x r
|
||||||
|
|
||||||
|
# # (Delta.1^-1 R' Delta.2^-1) (beta F)
|
||||||
|
# .R <- aperm(.R, c(1, 3, 2))
|
||||||
|
# dim(.R) <- c(n * p, q)
|
||||||
|
# dim(.F) <- c(n * p, r)
|
||||||
|
# crossprod(.R, .F)
|
||||||
|
# })
|
||||||
|
|
||||||
|
# all.equal(
|
||||||
|
# grad.alpha.1,
|
||||||
|
# grad.alpha.2
|
||||||
|
# )
|
||||||
|
|
||||||
|
# all.equal({
|
||||||
|
# .R <- matrix(0, q, p)
|
||||||
|
# Di.1 <- solve(Delta.1)
|
||||||
|
# Di.2 <- solve(Delta.2)
|
||||||
|
# for (i in 1:n) {
|
||||||
|
# .R <- .R + Di.1 %*% t(R[i, , ]) %*% Di.2
|
||||||
|
# }
|
||||||
|
# .R
|
||||||
|
# }, {
|
||||||
|
# .R <- R
|
||||||
|
# Di.1 <- solve(Delta.1)
|
||||||
|
# Di.2 <- solve(Delta.2)
|
||||||
|
# .R <- sapply(seq_len(n), function(i) tcrossprod(Di.1, R[i, , ]) %*% Di.2)
|
||||||
|
# dim(.R) <- c(q, p, n)
|
||||||
|
# .R <- aperm(.R, c(3, 1, 2))
|
||||||
|
# colSums(.R)
|
||||||
|
# })
|
||||||
|
# all.equal({
|
||||||
|
# Di.1 <- solve(Delta.1)
|
||||||
|
# Di.2 <- solve(Delta.2)
|
||||||
|
# .R <- sapply(seq_len(n), function(i) tcrossprod(Di.1, R[i, , ]) %*% Di.2)
|
||||||
|
# dim(.R) <- c(q, p, n)
|
||||||
|
# .R <- aperm(.R, c(3, 1, 2))
|
||||||
|
# .R
|
||||||
|
# }, {
|
||||||
|
# .R <- R
|
||||||
|
# dim(.R) <- c(n * p, q)
|
||||||
|
# .R <- .R %*% solve(Delta.1)
|
||||||
|
# dim(.R) <- c(n, p, q)
|
||||||
|
# .R <- aperm(.R, c(1, 3, 2))
|
||||||
|
# dim(.R) <- c(n * q, p)
|
||||||
|
# .R <- .R %*% solve(Delta.2)
|
||||||
|
# dim(.R) <- c(n, q, p)
|
||||||
|
# .R
|
||||||
|
# })
|
||||||
|
# all.equal({
|
||||||
|
# .F <- matrix(0, p, r)
|
||||||
|
# for (i in 1:n) {
|
||||||
|
# .F <- .F + beta %*% F[i, , ]
|
||||||
|
# }
|
||||||
|
# .F
|
||||||
|
# }, {
|
||||||
|
# .F <- apply(F, 1, function(Fi) beta %*% Fi)
|
||||||
|
# dim(.F) <- c(p, r, n)
|
||||||
|
# .F <- aperm(.F, c(3, 1, 2))
|
||||||
|
# colSums(.F)
|
||||||
|
# })
|
||||||
|
# all.equal({
|
||||||
|
# .F <- apply(F, 1, function(Fi) beta %*% Fi)
|
||||||
|
# dim(.F) <- c(p, r, n)
|
||||||
|
# .F <- aperm(.F, c(3, 1, 2))
|
||||||
|
# colSums(.F)
|
||||||
|
# }, {
|
||||||
|
# .F <- aperm(F, c(1, 3, 2))
|
||||||
|
# dim(.F) <- c(n * r, k)
|
||||||
|
# .F <- tcrossprod(.F, beta)
|
||||||
|
# dim(.F) <- c(n, r, p)
|
||||||
|
# t(colSums(.F))
|
||||||
|
# })
|
||||||
|
|
||||||
|
# all.equal({
|
||||||
|
# Di.1 <- solve(Delta.1)
|
||||||
|
# Di.2 <- solve(Delta.2)
|
||||||
|
# grad.alpha <- 0
|
||||||
|
# grad.beta <- 0
|
||||||
|
# dim(R) <- c(n, p, q)
|
||||||
|
# dim(F) <- c(n, k, r)
|
||||||
|
# for (i in 1:n) {
|
||||||
|
# grad.alpha <- grad.alpha + (
|
||||||
|
# Di.1 %*% t(R[i, , ]) %*% Di.2 %*% beta %*% F[i, , ]
|
||||||
|
# )
|
||||||
|
# grad.beta <- grad.beta + (
|
||||||
|
# Di.2 %*% R[i, , ] %*% Di.1 %*% alpha %*% t(F[i, , ])
|
||||||
|
# )
|
||||||
|
# }
|
||||||
|
|
||||||
|
# g1 <- c(dim(grad.alpha), dim(grad.beta), grad.alpha, grad.beta)
|
||||||
|
# }, {
|
||||||
|
# # Note that the order is important since for grad.beta the residuals do NOT
|
||||||
|
# # need to be transposes.
|
||||||
|
|
||||||
|
# # left/right standardized residuals Delta_1^-1 R_i' Delta_2^-1 for i in 1:n
|
||||||
|
# dim(R) <- c(n * p, q)
|
||||||
|
# .R <- R %*% solve(Delta.1)
|
||||||
|
# dim(.R) <- c(n, p, q)
|
||||||
|
# .R <- aperm(.R, c(1, 3, 2))
|
||||||
|
# dim(.R) <- c(n * q, p)
|
||||||
|
# .R <- .R %*% solve(Delta.2)
|
||||||
|
# dim(.R) <- c(n, q, p)
|
||||||
|
|
||||||
|
# # gradient with respect to beta
|
||||||
|
# # Responces times beta (alpha f_i')
|
||||||
|
# dim(F) <- c(n * k, r)
|
||||||
|
# .F <- tcrossprod(F, alpha)
|
||||||
|
# dim(.F) <- c(n, k, q)
|
||||||
|
# .F <- aperm(.F, c(1, 3, 2))
|
||||||
|
# # Matricize
|
||||||
|
# dim(.R) <- c(n * q, p)
|
||||||
|
# dim(.F) <- c(n * q, k)
|
||||||
|
# grad.beta <- crossprod(.R, .F)
|
||||||
|
|
||||||
|
# # gradient with respect to beta
|
||||||
|
# # Responces times alpha
|
||||||
|
# dim(F) <- c(n, k, r)
|
||||||
|
# .F <- aperm(F, c(1, 3, 2))
|
||||||
|
# dim(.F) <- c(n * r, k)
|
||||||
|
# .F <- tcrossprod(.F, beta)
|
||||||
|
# dim(.F) <- c(n, r, p)
|
||||||
|
# .F <- aperm(.F, c(1, 3, 2))
|
||||||
|
# # Transpose stand. residuals
|
||||||
|
# dim(.R) <- c(n, q, p)
|
||||||
|
# .R <- aperm(.R, c(1, 3, 2))
|
||||||
|
# # Matricize
|
||||||
|
# dim(.R) <- c(n * p, q)
|
||||||
|
# dim(.F) <- c(n * p, r)
|
||||||
|
# grad.alpha <- crossprod(.R, .F)
|
||||||
|
|
||||||
|
# g2 <- c(dim(grad.alpha), dim(grad.beta), grad.alpha, grad.beta)
|
||||||
|
# })
|
||||||
|
|
||||||
|
|
||||||
|
# microbenchmark::microbenchmark(R1 = {
|
||||||
|
# Di.1 <- solve(Delta.1)
|
||||||
|
# Di.2 <- solve(Delta.2)
|
||||||
|
# grad.alpha <- 0 # matrix(0, q, r)
|
||||||
|
# grad.beta <- 0 # matrix(0, p, k)
|
||||||
|
# dim(R) <- c(n, p, q)
|
||||||
|
# dim(F) <- c(n, k, r)
|
||||||
|
# for (i in 1:n) {
|
||||||
|
# grad.alpha <- grad.alpha + (
|
||||||
|
# Di.1 %*% t(R[i, , ]) %*% Di.2 %*% beta %*% F[i, , ]
|
||||||
|
# )
|
||||||
|
# grad.beta <- grad.beta + (
|
||||||
|
# Di.2 %*% R[i, , ] %*% Di.1 %*% alpha %*% t(F[i, , ])
|
||||||
|
# )
|
||||||
|
# }
|
||||||
|
|
||||||
|
# g1 <- c(dim(grad.alpha), dim(grad.beta), grad.alpha, grad.beta)
|
||||||
|
# }, R3 = {
|
||||||
|
# # Note that the order is important since for grad.beta the residuals do NOT
|
||||||
|
# # need to be transposes.
|
||||||
|
|
||||||
|
# # left/right standardized residuals Delta_1^-1 R_i' Delta_2^-1 for i in 1:n
|
||||||
|
# dim(R) <- c(n * p, q)
|
||||||
|
# .R <- R %*% solve(Delta.1)
|
||||||
|
# dim(.R) <- c(n, p, q)
|
||||||
|
# .R <- aperm(.R, c(1, 3, 2))
|
||||||
|
# dim(.R) <- c(n * q, p)
|
||||||
|
# .R <- .R %*% solve(Delta.2)
|
||||||
|
# dim(.R) <- c(n, q, p)
|
||||||
|
|
||||||
|
# # gradient with respect to beta
|
||||||
|
# # Responces times beta (alpha f_i')
|
||||||
|
# dim(F) <- c(n * k, r)
|
||||||
|
# .F <- tcrossprod(F, alpha)
|
||||||
|
# dim(.F) <- c(n, k, q)
|
||||||
|
# .F <- aperm(.F, c(1, 3, 2))
|
||||||
|
# # Matricize
|
||||||
|
# dim(.R) <- c(n * q, p)
|
||||||
|
# dim(.F) <- c(n * q, k)
|
||||||
|
# grad.beta <- crossprod(.R, .F)
|
||||||
|
|
||||||
|
# # gradient with respect to beta
|
||||||
|
# # Responces times alpha
|
||||||
|
# dim(F) <- c(n, k, r)
|
||||||
|
# .F <- aperm(F, c(1, 3, 2))
|
||||||
|
# dim(.F) <- c(n * r, k)
|
||||||
|
# .F <- tcrossprod(.F, beta)
|
||||||
|
# dim(.F) <- c(n, r, p)
|
||||||
|
# .F <- aperm(.F, c(1, 3, 2))
|
||||||
|
# # Transpose stand. residuals
|
||||||
|
# dim(.R) <- c(n, q, p)
|
||||||
|
# .R <- aperm(.R, c(1, 3, 2))
|
||||||
|
# # Matricize
|
||||||
|
# dim(.R) <- c(n * p, q)
|
||||||
|
# dim(.F) <- c(n * p, r)
|
||||||
|
# grad.alpha <- crossprod(.R, .F)
|
||||||
|
|
||||||
|
# g2 <- c(dim(grad.alpha), dim(grad.beta), grad.alpha, grad.beta)
|
||||||
|
# })
|
||||||
|
|
||||||
|
|
||||||
|
# n <- 100
|
||||||
|
# p <- 7
|
||||||
|
# q <- 11
|
||||||
|
# k <- 3
|
||||||
|
# r <- 5
|
||||||
|
|
||||||
|
# X <- array(rnorm(n * p * q), dim = c(n = n, p = p, q = q))
|
||||||
|
# F <- array(rnorm(n * k * r), dim = c(n = n, k = k, r = r))
|
||||||
|
# alpha <- array(rnorm(q * r), dim = c(q = q, r = r))
|
||||||
|
# beta <- array(rnorm(p * k), dim = c(p = p, k = k))
|
||||||
|
|
||||||
|
# all.equal({
|
||||||
|
# R <- array(NA, dim = c(n, p, q))
|
||||||
|
# for (i in 1:n) {
|
||||||
|
# R[i, , ] <- X[i, , ] - beta %*% F[i, , ] %*% t(alpha)
|
||||||
|
# }
|
||||||
|
# R
|
||||||
|
# }, {
|
||||||
|
# X - (F %x_3% alpha %x_2% beta)
|
||||||
|
# }, check.attributes = FALSE)
|
||||||
|
|
||||||
|
# microbenchmark::microbenchmark(base = {
|
||||||
|
# R <- array(NA, dim = c(n, p, q))
|
||||||
|
# for (i in 1:n) {
|
||||||
|
# R[i, , ] <- X[i, , ] - beta %*% F[i, , ] %*% t(alpha)
|
||||||
|
# }
|
||||||
|
# R
|
||||||
|
# }, ttm = {
|
||||||
|
# X - (F %x_3% alpha %x_2% beta)
|
||||||
|
# })
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# n <- 100; p <- 7; q <- 11; k <- 3; r <- 5
|
||||||
|
# sym <- function(x) t(x) + x
|
||||||
|
# Di.1 <- sym(matrix(rnorm(q^2), q, q))
|
||||||
|
# Di.2 <- sym(matrix(rnorm(p^2), p, p))
|
||||||
|
# R <- array(rnorm(n, p, q), dim = c(n, p, q))
|
||||||
|
# F <- array(rnorm(n, k, r), dim = c(n, k, r))
|
||||||
|
# alpha <- matrix(rnorm(q * r), q, r)
|
||||||
|
# beta <- matrix(rnorm(p * k), p, k)
|
||||||
|
|
||||||
|
# all.equal({
|
||||||
|
# .R <- array(NA, dim = c(n, p, q))
|
||||||
|
# for (i in 1:n) {
|
||||||
|
# .R[i, , ] <- Di.2 %*% R[i, , ] %*% Di.1
|
||||||
|
# }
|
||||||
|
# .R
|
||||||
|
# }, {
|
||||||
|
# R %x_3% Di.1 %x_2% Di.2
|
||||||
|
# })
|
||||||
|
# all.equal({
|
||||||
|
# .Rt <- array(NA, dim = c(n, q, p))
|
||||||
|
# for (i in 1:n) {
|
||||||
|
# .Rt[i, , ] <- Di.1 %*% t(R[i, , ]) %*% Di.2
|
||||||
|
# }
|
||||||
|
# .Rt
|
||||||
|
# }, {
|
||||||
|
# .Rt <- R %x_3% Di.1 %x_2% Di.2
|
||||||
|
# aperm(.Rt, c(1, 3, 2))
|
||||||
|
# })
|
||||||
|
# all.equal({
|
||||||
|
# .Fa <- array(NA, dim = c(n, q, k))
|
||||||
|
# for (i in 1:n) {
|
||||||
|
# .Fa[i, , ] <- alpha %*% t(F[i, , ])
|
||||||
|
# }
|
||||||
|
# .Fa
|
||||||
|
# }, {
|
||||||
|
# aperm(F %x_3% alpha, c(1, 3, 2))
|
||||||
|
# })
|
||||||
|
# all.equal({
|
||||||
|
# .Fb <- array(NA, dim = c(n, p, r))
|
||||||
|
# for (i in 1:n) {
|
||||||
|
# .Fb[i, , ] <- beta %*% F[i, , ]
|
||||||
|
# }
|
||||||
|
# .Fb
|
||||||
|
# }, {
|
||||||
|
# F %x_2% beta
|
||||||
|
# })
|
||||||
|
# all.equal({
|
||||||
|
# .F <- array(NA, dim = c(n, p, q))
|
||||||
|
# for (i in 1:n) {
|
||||||
|
# .F[i, , ] <- beta %*% F[i, , ] %*% t(alpha)
|
||||||
|
# }
|
||||||
|
# .F
|
||||||
|
# }, {
|
||||||
|
# F %x_3% alpha %x_2% beta
|
||||||
|
# })
|
||||||
|
# all.equal({
|
||||||
|
# .Ga <- 0
|
||||||
|
# for (i in 1:n) {
|
||||||
|
# .Ga <- .Ga + Di.1 %*% t(R[i, , ]) %*% Di.2 %*% beta %*% F[i, , ]
|
||||||
|
# }
|
||||||
|
# .Ga
|
||||||
|
# }, {
|
||||||
|
# .R <- R %x_3% Di.1 %x_2% Di.2
|
||||||
|
# dim(.R) <- c(n * p, q)
|
||||||
|
# .Fb <- F %x_2% beta
|
||||||
|
# dim(.Fb) <- c(n * p, r)
|
||||||
|
# crossprod(.R, .Fb)
|
||||||
|
# })
|
||||||
|
# all.equal({
|
||||||
|
# .Gb <- 0
|
||||||
|
# for (i in 1:n) {
|
||||||
|
# .Gb <- .Gb + Di.2 %*% R[i, , ] %*% Di.1 %*% alpha %*% t(F[i, , ])
|
||||||
|
# }
|
||||||
|
# .Gb
|
||||||
|
# }, {
|
||||||
|
# .Rt <- aperm(R %x_3% Di.1 %x_2% Di.2, c(1, 3, 2))
|
||||||
|
# dim(.Rt) <- c(n * q, p)
|
||||||
|
# .Fa <- aperm(F %x_3% alpha, c(1, 3, 2))
|
||||||
|
# dim(.Fa) <- c(n * q, k)
|
||||||
|
# crossprod(.Rt, .Fa)
|
||||||
|
# })
|
||||||
|
# all.equal({
|
||||||
|
# .Ga <- 0
|
||||||
|
# for (i in 1:n) {
|
||||||
|
# .Ga <- .Ga + Di.1 %*% t(R[i, , ]) %*% Di.2 %*% beta %*% F[i, , ]
|
||||||
|
# }
|
||||||
|
# .Gb <- 0
|
||||||
|
# for (i in 1:n) {
|
||||||
|
# .Gb <- .Gb + Di.2 %*% R[i, , ] %*% Di.1 %*% alpha %*% t(F[i, , ])
|
||||||
|
# }
|
||||||
|
# c(.Ga, .Gb)
|
||||||
|
# }, {
|
||||||
|
# .R <- R %x_3% Di.1 %x_2% Di.2
|
||||||
|
# .Fb <- F %x_2% beta
|
||||||
|
# .Fa <- aperm(F %x_3% alpha, c(1, 3, 2))
|
||||||
|
|
||||||
|
# dim(.R) <- c(n * p, q)
|
||||||
|
# dim(.Fb) <- c(n * p, r)
|
||||||
|
# .Ga <- crossprod(.R, .Fb)
|
||||||
|
|
||||||
|
# dim(.R) <- c(n, p, q)
|
||||||
|
# .R <- aperm(.R, c(1, 3, 2))
|
||||||
|
# dim(.R) <- c(n * q, p)
|
||||||
|
# dim(.Fa) <- c(n * q, k)
|
||||||
|
# .Gb <- crossprod(.R, .Fa)
|
||||||
|
|
||||||
|
# c(.Ga, .Gb)
|
||||||
|
# })
|
||||||
|
# all.equal({
|
||||||
|
# .Ga <- 0
|
||||||
|
# for (i in 1:n) {
|
||||||
|
# .Ga <- .Ga + Di.1 %*% t(R[i, , ]) %*% Di.2 %*% beta %*% F[i, , ]
|
||||||
|
# }
|
||||||
|
# .Gb <- 0
|
||||||
|
# for (i in 1:n) {
|
||||||
|
# .Gb <- .Gb + Di.2 %*% R[i, , ] %*% Di.1 %*% alpha %*% t(F[i, , ])
|
||||||
|
# }
|
||||||
|
# c(.Ga, .Gb)
|
||||||
|
# }, {
|
||||||
|
# .R <- R %x_3% Di.1 %x_2% Di.2
|
||||||
|
|
||||||
|
# .Ga <- tcrossprod(mat(.R, 3), mat(F %x_2% beta, 3))
|
||||||
|
# .Gb <- tcrossprod(mat(.R, 2), mat(F %x_3% alpha, 2))
|
||||||
|
|
||||||
|
# c(.Ga, .Gb)
|
||||||
|
# })
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# n <- 101; p <- 5; q <- 7
|
||||||
|
|
||||||
|
# sym <- function(x) crossprod(x)
|
||||||
|
# D1 <- sym(matrix(rnorm(q^2), q, q))
|
||||||
|
# D2 <- sym(matrix(rnorm(p^2), p, p))
|
||||||
|
|
||||||
|
# X <- tensorPredictors:::rmvnorm(n, sigma = kronecker(D1, D2))
|
||||||
|
# dim(X) <- c(n, p, q)
|
||||||
|
|
||||||
|
# D1.hat <- tcrossprod(mat(X, 3)) / n
|
||||||
|
# D2.hat <- tcrossprod(mat(X, 2)) / n
|
||||||
|
|
||||||
|
# local({
|
||||||
|
# par(mfrow = c(2, 2))
|
||||||
|
# matrixImage(D1, main = "D1")
|
||||||
|
# matrixImage(D1.hat, main = "D1.hat")
|
||||||
|
# matrixImage(D2, main = "D2")
|
||||||
|
# matrixImage(D2.hat, main = "D2.hat")
|
||||||
|
# })
|
||||||
|
|
||||||
|
# sum(X^2) / n
|
||||||
|
# sum(diag(D1.hat))
|
||||||
|
# sum(diag(D2.hat))
|
||||||
|
# sum(diag(kronecker(D1, D2)))
|
||||||
|
# sum(diag(kronecker(D1.hat / sqrt(sum(diag(D1.hat))),
|
||||||
|
# D2.hat / sqrt(sum(diag(D1.hat))))))
|
||||||
|
|
||||||
|
# all.equal({
|
||||||
|
# mat(X, 1) %*% kronecker(D1.hat, D2.hat)
|
||||||
|
# }, {
|
||||||
|
# mat(X %x_3% D1.hat %x_2% D2.hat, 1)
|
||||||
|
# })
|
||||||
|
# all.equal({
|
||||||
|
# C <- mat(X, 1) %*% kronecker(D1.hat, D2.hat) * (n / sum(X^2))
|
||||||
|
# dim(C) <- c(n, p, q)
|
||||||
|
# C
|
||||||
|
# }, {
|
||||||
|
# (X %x_3% D1.hat %x_2% D2.hat) / sum(diag(D1.hat))
|
||||||
|
# })
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# D.1 <- tcrossprod(mat(X, 3))
|
||||||
|
# D.2 <- tcrossprod(mat(X, 2))
|
||||||
|
# tr <- sum(diag(D.1))
|
||||||
|
# D.1 <- D.1 / sqrt(n * tr)
|
||||||
|
# D.2 <- D.2 / sqrt(n * tr)
|
||||||
|
|
||||||
|
# sum(diag(kronecker(D1, D2)))
|
||||||
|
# sum(diag(kronecker(D.1, D.2)))
|
||||||
|
# det(kronecker(D1, D2))
|
||||||
|
# det(kronecker(D.1, D.2))
|
||||||
|
# det(D.1)^p * det(D.2)^q
|
||||||
|
|
||||||
|
# log(det(kronecker(D.1, D.2)))
|
||||||
|
# p * log(det(D.1)) + q * log(det(D.2))
|
|
@ -0,0 +1,136 @@
|
||||||
|
#' Using unbiased (but not MLE) estimates for the Kronecker decomposed
|
||||||
|
#' covariance matrices Delta_1, Delta_2 for approximating the log-likelihood
|
||||||
|
#' giving a closed form solution for the gradient.
|
||||||
|
#'
|
||||||
|
#' Delta_1 = n^-1 sum_i R_i' R_i,
|
||||||
|
#' Delta_2 = n^-1 sum_i R_i R_i'.
|
||||||
|
#'
|
||||||
|
#' @export
|
||||||
|
kpir.approx <- function(X, Fy, shape = c(dim(X)[-1], dim(Fy[-1])),
|
||||||
|
max.iter = 500L, max.line.iter = 50L, step.size = 1e-3,
|
||||||
|
nesterov.scaling = function(a, t) 0.5 * (1 + sqrt(1 + (2 * a)^2)),
|
||||||
|
eps = .Machine$double.eps,
|
||||||
|
logger = NULL
|
||||||
|
) {
|
||||||
|
|
||||||
|
# Check if X and Fy have same number of observations
|
||||||
|
stopifnot(nrow(X) == NROW(Fy))
|
||||||
|
n <- nrow(X) # Number of observations
|
||||||
|
|
||||||
|
# Get and check predictor dimensions
|
||||||
|
if (length(dim(X)) == 2L) {
|
||||||
|
stopifnot(!missing(shape))
|
||||||
|
stopifnot(ncol(X) == prod(shape[1:2]))
|
||||||
|
p <- as.integer(shape[1]) # Predictor "height"
|
||||||
|
q <- as.integer(shape[2]) # Predictor "width"
|
||||||
|
} else if (length(dim(X)) == 3L) {
|
||||||
|
p <- dim(X)[2]
|
||||||
|
q <- dim(X)[3]
|
||||||
|
dim(X) <- c(n, p * q)
|
||||||
|
} else {
|
||||||
|
stop("'X' must be a matrix or 3-tensor")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Get and check response dimensions
|
||||||
|
if (!is.array(Fy)) {
|
||||||
|
Fy <- as.array(Fy)
|
||||||
|
}
|
||||||
|
if (length(dim(Fy)) == 1L) {
|
||||||
|
k <- r <- 1L
|
||||||
|
dim(Fy) <- c(n, 1L)
|
||||||
|
} else if (length(dim(Fy)) == 2L) {
|
||||||
|
stopifnot(!missing(shape))
|
||||||
|
stopifnot(ncol(Fy) == prod(shape[3:4]))
|
||||||
|
k <- as.integer(shape[3]) # Response functional "height"
|
||||||
|
r <- as.integer(shape[4]) # Response functional "width"
|
||||||
|
} else if (length(dim(Fy)) == 3L) {
|
||||||
|
k <- dim(Fy)[2]
|
||||||
|
r <- dim(Fy)[3]
|
||||||
|
dim(Fy) <- c(n, k * r)
|
||||||
|
} else {
|
||||||
|
stop("'Fy' must be a vector, matrix or 3-tensor")
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
### Step 1: (Approx) Least Squares solution for `X = Fy B' + epsilon`
|
||||||
|
# Vectorize
|
||||||
|
dim(Fy) <- c(n, k * r)
|
||||||
|
dim(X) <- c(n, p * q)
|
||||||
|
# Solve
|
||||||
|
cpFy <- crossprod(Fy) # TODO: Check/Test and/or replace
|
||||||
|
if (n <= k * r || qr(cpFy)$rank < k * r) {
|
||||||
|
# In case of under-determined system replace the inverse in the normal
|
||||||
|
# equation by the Moore-Penrose Pseudo Inverse
|
||||||
|
B <- t(matpow(cpFy, -1) %*% crossprod(Fy, X))
|
||||||
|
} else {
|
||||||
|
# Compute OLS estimate by the Normal Equation
|
||||||
|
B <- t(solve(cpFy, crossprod(Fy, X)))
|
||||||
|
}
|
||||||
|
|
||||||
|
# De-Vectroize (from now on tensor arithmetics)
|
||||||
|
dim(Fy) <- c(n, k, r)
|
||||||
|
dim(X) <- c(n, p, q)
|
||||||
|
|
||||||
|
# Decompose `B = alpha x beta` into `alpha` and `beta`
|
||||||
|
c(alpha0, beta0) %<-% approx.kronecker(B, c(q, r), c(p, k))
|
||||||
|
|
||||||
|
# Compute residuals
|
||||||
|
R <- X - (Fy %x_3% alpha0 %x_2% beta0)
|
||||||
|
|
||||||
|
# Covariance estimates and scaling factor
|
||||||
|
Delta.1 <- tcrossprod(mat(R, 3))
|
||||||
|
Delta.2 <- tcrossprod(mat(R, 2))
|
||||||
|
s <- sum(diag(Delta.1))
|
||||||
|
|
||||||
|
# Inverse Covariances
|
||||||
|
Delta.1.inv <- solve(Delta.1)
|
||||||
|
Delta.2.inv <- solve(Delta.2)
|
||||||
|
|
||||||
|
# cross dependent covariance estimates
|
||||||
|
S.1 <- n^-1 * tcrossprod(mat(R, 3), mat(R %x_2% Delta.2.inv, 3))
|
||||||
|
S.2 <- n^-1 * tcrossprod(mat(R, 2), mat(R %x_3% Delta.1.inv, 2))
|
||||||
|
|
||||||
|
# Evaluate negative log-likelihood (2 pi term dropped)
|
||||||
|
loss <- -0.5 * n * (p * q * log(s) - p * log(det(Delta.1)) -
|
||||||
|
q * log(det(Delta.2)) - s * sum(S.1 * Delta.1.inv))
|
||||||
|
|
||||||
|
# Gradient "generating" tensor
|
||||||
|
G <- (sum(S.1 * Delta.1.inv) - p * q / s) * R
|
||||||
|
G <- G + R %x_2% ((diag(q, p, p) - s * (Delta.2.inv %*% S.2)) %*% Delta.2.inv)
|
||||||
|
G <- G + R %x_3% ((diag(p, q, q) - s * (Delta.1.inv %*% S.1)) %*% Delta.1.inv)
|
||||||
|
G <- G + s * (R %x_2% Delta.2.inv %x_3% Delta.1.inv)
|
||||||
|
|
||||||
|
# Call history callback (logger) before the first iteration
|
||||||
|
if (is.function(logger)) {
|
||||||
|
logger(0L, loss, alpha0, beta0, Delta.1, Delta.2, NA)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
### Step 2: MLE estimate with LS solution as starting value
|
||||||
|
a0 <- 0
|
||||||
|
a1 <- 1
|
||||||
|
alpha1 <- alpha0
|
||||||
|
beta1 <- beta0
|
||||||
|
|
||||||
|
# main descent loop
|
||||||
|
no.nesterov <- TRUE
|
||||||
|
break.reason <- NA
|
||||||
|
for (iter in seq_len(max.iter)) {
|
||||||
|
if (no.nesterov) {
|
||||||
|
# without extrapolation as fallback
|
||||||
|
alpha.moment <- alpha1
|
||||||
|
beta.moment <- beta1
|
||||||
|
} else {
|
||||||
|
# extrapolation using previous direction
|
||||||
|
alpha.moment <- alpha1 + ((a0 - 1) / a1) * (alpha1 - alpha0)
|
||||||
|
beta.moment <- beta1 + ((a0 - 1) / a1) * ( beta1 - beta0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Extrapolated residuals
|
||||||
|
R <- X - (Fy %x_3% alpha.moment %x_2% beta.moment)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
list(loss = loss, alpha = alpha1, beta = beta1, Delta = Delta)
|
||||||
|
}
|
|
@ -1,12 +1,50 @@
|
||||||
#' (Slightly altered) old implementation
|
#' (Slightly altered) old implementation
|
||||||
#'
|
#'
|
||||||
#' @export
|
#' @export
|
||||||
kpir.base <- function(X, Fy, p, t, k = 1L, r = 1L, d1 = 1L, d2 = 1L,
|
kpir.base <- function(X, Fy, shape = c(dim(X)[-1], dim(Fy[-1])),
|
||||||
method = c("mle", "ls"),
|
method = c("mle", "ls"),
|
||||||
eps1 = 1e-10, eps2 = 1e-10, max.iter = 500L,
|
eps1 = 1e-10, eps2 = 1e-10, max.iter = 500L,
|
||||||
logger = NULL
|
logger = NULL
|
||||||
) {
|
) {
|
||||||
|
|
||||||
|
# Check if X and Fy have same number of observations
|
||||||
|
stopifnot(nrow(X) == NROW(Fy))
|
||||||
|
n <- nrow(X) # Number of observations
|
||||||
|
|
||||||
|
# Get and check predictor dimensions
|
||||||
|
if (length(dim(X)) == 2L) {
|
||||||
|
stopifnot(!missing(shape))
|
||||||
|
stopifnot(ncol(X) == prod(shape[1:2]))
|
||||||
|
p <- as.integer(shape[1]) # Predictor "height"
|
||||||
|
q <- as.integer(shape[2]) # Predictor "width"
|
||||||
|
} else if (length(dim(X)) == 3L) {
|
||||||
|
p <- dim(X)[2]
|
||||||
|
q <- dim(X)[3]
|
||||||
|
dim(X) <- c(n, p * q)
|
||||||
|
} else {
|
||||||
|
stop("'X' must be a matrix or 3-tensor")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Get and check response dimensions
|
||||||
|
if (!is.array(Fy)) {
|
||||||
|
Fy <- as.array(Fy)
|
||||||
|
}
|
||||||
|
if (length(dim(Fy)) == 1L) {
|
||||||
|
k <- r <- 1L
|
||||||
|
dim(Fy) <- c(n, 1L)
|
||||||
|
} else if (length(dim(Fy)) == 2L) {
|
||||||
|
stopifnot(!missing(shape))
|
||||||
|
stopifnot(ncol(Fy) == prod(shape[3:4]))
|
||||||
|
k <- as.integer(shape[3]) # Response functional "height"
|
||||||
|
r <- as.integer(shape[4]) # Response functional "width"
|
||||||
|
} else if (length(dim(Fy)) == 3L) {
|
||||||
|
k <- dim(Fy)[2]
|
||||||
|
r <- dim(Fy)[3]
|
||||||
|
dim(Fy) <- c(n, k * r)
|
||||||
|
} else {
|
||||||
|
stop("'Fy' must be a vector, matrix or 3-tensor")
|
||||||
|
}
|
||||||
|
|
||||||
log.likelihood <- function(par, X, Fy, Delta.inv, da, db) {
|
log.likelihood <- function(par, X, Fy, Delta.inv, da, db) {
|
||||||
alpha <- matrix(par[1:prod(da)], da[1L])
|
alpha <- matrix(par[1:prod(da)], da[1L])
|
||||||
beta <- matrix(par[(prod(da) + 1):length(par)], db[1L])
|
beta <- matrix(par[(prod(da) + 1):length(par)], db[1L])
|
||||||
|
@ -17,10 +55,6 @@ kpir.base <- function(X, Fy, p, t, k = 1L, r = 1L, d1 = 1L, d2 = 1L,
|
||||||
# Validate method using unexact matching.
|
# Validate method using unexact matching.
|
||||||
method <- match.arg(method)
|
method <- match.arg(method)
|
||||||
|
|
||||||
# ## Step 1:
|
|
||||||
# # OLS estimate of the model `X = F_y B + epsilon`.
|
|
||||||
# B <- t(solve(crossprod(Fy), crossprod(Fy, X)))
|
|
||||||
|
|
||||||
### Step 1: (Approx) Least Squares solution for `X = Fy B' + epsilon`
|
### Step 1: (Approx) Least Squares solution for `X = Fy B' + epsilon`
|
||||||
cpFy <- crossprod(Fy)
|
cpFy <- crossprod(Fy)
|
||||||
if (n <= k * r || qr(cpFy)$rank < k * r) {
|
if (n <= k * r || qr(cpFy)$rank < k * r) {
|
||||||
|
@ -33,7 +67,7 @@ kpir.base <- function(X, Fy, p, t, k = 1L, r = 1L, d1 = 1L, d2 = 1L,
|
||||||
}
|
}
|
||||||
|
|
||||||
# Estimate alpha, beta as nearest kronecker approximation.
|
# Estimate alpha, beta as nearest kronecker approximation.
|
||||||
c(alpha, beta) %<-% approx.kronecker(B, c(t, r), c(p, k))
|
c(alpha, beta) %<-% approx.kronecker(B, c(q, r), c(p, k))
|
||||||
|
|
||||||
if (method == "ls") {
|
if (method == "ls") {
|
||||||
# Estimate Delta.
|
# Estimate Delta.
|
||||||
|
@ -63,13 +97,13 @@ kpir.base <- function(X, Fy, p, t, k = 1L, r = 1L, d1 = 1L, d2 = 1L,
|
||||||
for (iter in 1:max.iter) {
|
for (iter in 1:max.iter) {
|
||||||
# Optimize log-likelihood for alpha, beta with fixed Delta.
|
# Optimize log-likelihood for alpha, beta with fixed Delta.
|
||||||
opt <- optim(c(alpha, beta), log.likelihood, gr = NULL,
|
opt <- optim(c(alpha, beta), log.likelihood, gr = NULL,
|
||||||
X, Fy, matpow(Delta, -1), c(t, r), c(p, k))
|
X, Fy, matpow(Delta, -1), c(q, r), c(p, k))
|
||||||
# Store previous alpha, beta and Delta (for break consition).
|
# Store previous alpha, beta and Delta (for break consition).
|
||||||
Delta.last <- Delta
|
Delta.last <- Delta
|
||||||
B.last <- B
|
B.last <- B
|
||||||
# Extract optimized alpha, beta.
|
# Extract optimized alpha, beta.
|
||||||
alpha <- matrix(opt$par[1:(t * r)], t, r)
|
alpha <- matrix(opt$par[1:(q * r)], q, r)
|
||||||
beta <- matrix(opt$par[(t * r + 1):length(opt$par)], p, k)
|
beta <- matrix(opt$par[(q * r + 1):length(opt$par)], p, k)
|
||||||
# Calc new Delta with likelihood optimized alpha, beta.
|
# Calc new Delta with likelihood optimized alpha, beta.
|
||||||
B <- kronecker(alpha, beta)
|
B <- kronecker(alpha, beta)
|
||||||
resid <- X - tcrossprod(Fy, B)
|
resid <- X - tcrossprod(Fy, B)
|
||||||
|
@ -85,9 +119,9 @@ kpir.base <- function(X, Fy, p, t, k = 1L, r = 1L, d1 = 1L, d2 = 1L,
|
||||||
}
|
}
|
||||||
|
|
||||||
# Check break condition 1.
|
# Check break condition 1.
|
||||||
if (norm(Delta - Delta.last, 'F') < eps1 * norm(Delta, 'F')) {
|
if (norm(Delta - Delta.last, "F") < eps1 * norm(Delta, "F")) {
|
||||||
# Check break condition 2.
|
# Check break condition 2.
|
||||||
if (norm(B - B.last, 'F') < eps2 * norm(B, 'F')) {
|
if (norm(B - B.last, "F") < eps2 * norm(B, "F")) {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -65,7 +65,7 @@ kpir.kron <- function(X, Fy, shape = c(dim(X)[-1], dim(Fy[-1])),
|
||||||
# De-Vectroize (from now on tensor arithmetics)
|
# De-Vectroize (from now on tensor arithmetics)
|
||||||
dim(Fy) <- c(n, k, r)
|
dim(Fy) <- c(n, k, r)
|
||||||
dim(X) <- c(n, p, q)
|
dim(X) <- c(n, p, q)
|
||||||
|
|
||||||
# Decompose `B = alpha x beta` into `alpha` and `beta`
|
# Decompose `B = alpha x beta` into `alpha` and `beta`
|
||||||
c(alpha0, beta0) %<-% approx.kronecker(B, c(q, r), c(p, k))
|
c(alpha0, beta0) %<-% approx.kronecker(B, c(q, r), c(p, k))
|
||||||
|
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
#' Gradient Descent Bases Tensor Predictors method with Nesterov Accelerated
|
#' Gradient Descent based Tensor Predictors method with Nesterov Accelerated
|
||||||
#' Momentum
|
#' Momentum
|
||||||
#'
|
#'
|
||||||
#' @export
|
#' @export
|
||||||
kpir.momentum <- function(X, Fy, shape = c(dim(X)[-1], dim(Fy[-1])),
|
kpir.momentum <- function(X, Fy, shape = c(dim(X)[-1], dim(Fy[-1])),
|
||||||
max.iter = 500L, max.line.iter = 50L, step.size = 1e-3,
|
max.iter = 500L, max.line.iter = 50L, step.size = 1e-3,
|
||||||
nesterov.scaling = function(a, t) { 0.5 * (1 + sqrt(1 + (2 * a)^2)) },
|
nesterov.scaling = function(a, t) 0.5 * (1 + sqrt(1 + (2 * a)^2)),
|
||||||
eps = .Machine$double.eps,
|
eps = .Machine$double.eps,
|
||||||
logger = NULL
|
logger = NULL
|
||||||
) {
|
) {
|
||||||
|
|
Loading…
Reference in New Issue