2022-04-29 16:37:25 +00:00
|
|
|
library(tensorPredictors)
|
|
|
|
library(dplyr)
|
|
|
|
library(ggplot2)
|
|
|
|
|
2022-05-06 20:27:24 +00:00
|
|
|
## Logger callbacks
|
|
|
|
log.prog <- function(max.iter) {
|
|
|
|
function(iter, loss, alpha, beta, ...) {
|
|
|
|
select <- as.integer(seq(1, max.iter, len = 30) <= iter)
|
|
|
|
cat("\r[", paste(c(" ", "=")[1 + select], collapse = ""),
|
|
|
|
"] ", iter, "/", max.iter, sep = "")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2022-04-29 16:37:25 +00:00
|
|
|
### Exec all methods for a given data set and collect logs ###
|
2022-05-18 11:02:01 +00:00
|
|
|
sim <- function(X, Fy, alpha.true, beta.true, max.iter = 500L) {
|
2022-04-29 16:37:25 +00:00
|
|
|
|
|
|
|
# Logger creator
|
|
|
|
logger <- function(name) {
|
|
|
|
eval(substitute(function(iter, loss, alpha, beta, ...) {
|
2022-05-18 16:33:28 +00:00
|
|
|
tryCatch({
|
|
|
|
hist[iter + 1L, ] <<- c(
|
|
|
|
iter = iter,
|
|
|
|
loss = loss,
|
|
|
|
dist = (dist <- dist.subspace(c(kronecker(alpha.true, beta.true)),
|
|
|
|
c(kronecker(alpha, beta)))),
|
|
|
|
dist.alpha = (dist.alpha <- dist.subspace(c(alpha.true), c(alpha))),
|
|
|
|
dist.beta = (dist.beta <- dist.subspace(c( beta.true), c(beta ))),
|
|
|
|
norm.alpha = norm(alpha, "F"),
|
|
|
|
norm.beta = norm(beta, "F"),
|
|
|
|
mse = mean((X - mlm(Fy, alpha, beta, modes = 3:2))^2)
|
|
|
|
)},
|
|
|
|
error = function(e) {
|
|
|
|
cat("Error in ", name,
|
|
|
|
", dim(alpha): ", dim(alpha),
|
|
|
|
", dim(alpha.true): ", dim(alpha.true),
|
|
|
|
", dim(beta)", dim(beta),
|
|
|
|
", dim(beta.true)", dim(beta.true),
|
|
|
|
"\n")
|
|
|
|
stop(e)
|
|
|
|
})
|
2022-04-29 16:37:25 +00:00
|
|
|
|
|
|
|
cat(sprintf(
|
2022-05-05 11:35:07 +00:00
|
|
|
"%s(%3d) | l = %-12.4f - dist = %-.4e - alpha(%d, %d) = %-.4e - beta(%d, %d) = %-.4e\n",
|
|
|
|
name, iter, loss, dist,
|
2022-04-29 16:37:25 +00:00
|
|
|
nrow(alpha), ncol(alpha), dist.alpha,
|
|
|
|
nrow(beta), ncol(beta), dist.beta
|
|
|
|
))
|
|
|
|
}, list(hist = as.symbol(paste0("hist.", name)))))
|
|
|
|
}
|
|
|
|
|
|
|
|
# Initialize logger history targets
|
2022-05-18 16:33:28 +00:00
|
|
|
hist.base <-
|
|
|
|
hist.new.vlp <- hist.new.ls <-
|
|
|
|
hist.ls <-
|
|
|
|
hist.momentum.vlp <- hist.momentum.ls <-
|
|
|
|
hist.approx.vlp <- hist.approx.ls <-
|
2022-04-29 16:37:25 +00:00
|
|
|
data.frame(iter = seq(0L, max.iter),
|
|
|
|
loss = NA, dist = NA, dist.alpha = NA, dist.beta = NA,
|
2022-05-18 11:02:01 +00:00
|
|
|
norm.alpha = NA, norm.beta = NA, mse = NA
|
2022-04-29 16:37:25 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
# Base (old)
|
2022-05-18 11:02:01 +00:00
|
|
|
kpir.base(X, Fy, max.iter = max.iter, logger = logger("base"))
|
2022-04-29 16:37:25 +00:00
|
|
|
|
2022-05-18 16:33:28 +00:00
|
|
|
# New (simple Gradient Descent, using VLP initialization)
|
|
|
|
kpir.new(X, Fy, max.iter = max.iter, init.method = "vlp",
|
|
|
|
logger = logger("new.vlp"))
|
|
|
|
kpir.new(X, Fy, max.iter = max.iter, init.method = "ls",
|
|
|
|
logger = logger("new.ls"))
|
2022-04-29 16:37:25 +00:00
|
|
|
|
2022-05-18 11:02:01 +00:00
|
|
|
# Least Squares estimate (alternating estimation)
|
2022-05-24 19:07:40 +00:00
|
|
|
kpir.ls(X, Fy, sample.axis = 1L, max.iter = max.iter, logger = logger("ls"))
|
2022-04-29 16:37:25 +00:00
|
|
|
|
2022-05-18 11:02:01 +00:00
|
|
|
# Gradient Descent with Nesterov Momentum
|
2022-05-18 16:33:28 +00:00
|
|
|
kpir.momentum(X, Fy, max.iter = max.iter, init.method = "vlp",
|
|
|
|
logger = logger("momentum.vlp"))
|
|
|
|
kpir.momentum(X, Fy, max.iter = max.iter, init.method = "ls",
|
|
|
|
logger = logger("momentum.ls"))
|
2022-04-29 16:37:25 +00:00
|
|
|
|
2022-05-05 11:35:07 +00:00
|
|
|
# Approximated MLE with Nesterov Momentum
|
2022-05-18 16:33:28 +00:00
|
|
|
kpir.approx(X, Fy, max.iter = max.iter, init.method = "vlp",
|
|
|
|
logger = logger("approx.vlp"))
|
|
|
|
kpir.approx(X, Fy, max.iter = max.iter, init.method = "ls",
|
|
|
|
logger = logger("approx.ls"))
|
2022-05-05 11:35:07 +00:00
|
|
|
|
2022-04-29 16:37:25 +00:00
|
|
|
# Add method tags
|
2022-05-18 16:33:28 +00:00
|
|
|
hist.base$method <- factor("base")
|
|
|
|
hist.new.vlp$method <- factor("new")
|
|
|
|
hist.new.ls$method <- factor("new")
|
|
|
|
hist.ls$method <- factor("ls")
|
|
|
|
hist.momentum.vlp$method <- factor("momentum")
|
|
|
|
hist.momentum.ls$method <- factor("momentum")
|
|
|
|
hist.approx.vlp$method <- factor("approx")
|
|
|
|
hist.approx.ls$method <- factor("approx")
|
|
|
|
# Add init. method tag
|
|
|
|
hist.base$init <- factor("vlp")
|
|
|
|
hist.new.vlp$init <- factor("vlp")
|
|
|
|
hist.new.ls$init <- factor("ls")
|
|
|
|
hist.ls$init <- factor("ls")
|
|
|
|
hist.momentum.vlp$init <- factor("vlp")
|
|
|
|
hist.momentum.ls$init <- factor("ls")
|
|
|
|
hist.approx.vlp$init <- factor("vlp")
|
|
|
|
hist.approx.ls$init <- factor("ls")
|
2022-04-29 16:37:25 +00:00
|
|
|
|
|
|
|
# Combine results and return
|
2022-05-18 16:33:28 +00:00
|
|
|
rbind(
|
|
|
|
hist.base,
|
|
|
|
hist.new.vlp, hist.new.ls,
|
|
|
|
hist.ls,
|
|
|
|
hist.momentum.vlp, hist.momentum.ls,
|
|
|
|
hist.approx.vlp, hist.approx.ls
|
|
|
|
)
|
2022-05-05 11:35:07 +00:00
|
|
|
}
|
|
|
|
|
2022-05-06 20:27:24 +00:00
|
|
|
## Plot helper function
|
|
|
|
plot.hist2 <- function(hist, response, type = "all", ...) {
|
|
|
|
# Extract final results from history
|
2022-05-18 16:33:28 +00:00
|
|
|
sub <- na.omit(hist[c("iter", response, "method", "init", "repetition")])
|
|
|
|
sub <- aggregate(sub, list(sub$method, sub$init, sub$repetition), tail, 1)
|
2022-05-06 20:27:24 +00:00
|
|
|
|
|
|
|
# Setup ggplot
|
|
|
|
p <- ggplot(hist, aes_(x = quote(iter),
|
|
|
|
y = as.name(response),
|
|
|
|
color = quote(method),
|
2022-05-18 16:33:28 +00:00
|
|
|
linetype = quote(init),
|
|
|
|
group = quote(interaction(method, repetition, init))))
|
2022-05-06 20:27:24 +00:00
|
|
|
# Add requested layers
|
|
|
|
if (type == "all") {
|
|
|
|
p <- p + geom_line(na.rm = TRUE)
|
|
|
|
p <- p + geom_point(data = sub)
|
|
|
|
} else if (type == "mean") {
|
2022-05-18 16:33:28 +00:00
|
|
|
p <- p + geom_line(alpha = 0.4, na.rm = TRUE, linetype = "dotted")
|
|
|
|
p <- p + geom_point(data = sub, alpha = 0.4)
|
|
|
|
p <- p + geom_line(aes(group = interaction(method, init)),
|
2022-05-06 20:27:24 +00:00
|
|
|
stat = "summary", fun = "mean", na.rm = TRUE)
|
|
|
|
} else if (type == "median") {
|
2022-05-18 16:33:28 +00:00
|
|
|
p <- p + geom_line(alpha = 0.4, na.rm = TRUE, linetype = "dotted")
|
|
|
|
p <- p + geom_point(data = sub, alpha = 0.4)
|
|
|
|
p <- p + geom_line(aes(group = interaction(method, init)),
|
2022-05-06 20:27:24 +00:00
|
|
|
stat = "summary", fun = "median", na.rm = TRUE)
|
|
|
|
}
|
|
|
|
# return with theme and annotations
|
|
|
|
p + labs(...) + theme(legend.position = "bottom")
|
2022-04-29 16:37:25 +00:00
|
|
|
}
|
|
|
|
|
2022-05-06 20:27:24 +00:00
|
|
|
################################################################################
|
|
|
|
### Sim 1 / vec(X) has AR(0.5) Covariance ###
|
|
|
|
################################################################################
|
|
|
|
|
2022-04-29 16:37:25 +00:00
|
|
|
## Generate some test data / DEBUG
|
|
|
|
n <- 200 # Sample Size
|
2022-05-18 11:02:01 +00:00
|
|
|
p <- sample(2:15, 1) # 11
|
|
|
|
q <- sample(2:15, 1) # 7
|
|
|
|
k <- min(sample(1:15, 1), p - 1) # 3
|
|
|
|
r <- min(sample(1:15, 1), q - 1) # 5
|
2022-04-29 16:37:25 +00:00
|
|
|
print(c(n, p, q, k, r))
|
|
|
|
|
|
|
|
hist <- NULL
|
2022-05-05 11:35:07 +00:00
|
|
|
reps <- 20
|
|
|
|
for (rep in 1:reps) {
|
|
|
|
cat(sprintf("%4d / %d simulation rep. started\n", rep, reps))
|
|
|
|
|
2022-04-29 16:37:25 +00:00
|
|
|
alpha.true <- alpha <- matrix(rnorm(q * r), q, r)
|
|
|
|
beta.true <- beta <- matrix(rnorm(p * k), p, k)
|
|
|
|
y <- rnorm(n)
|
|
|
|
Fy <- do.call(cbind, Map(function(slope, offset) {
|
|
|
|
sin(slope * y + offset)
|
|
|
|
},
|
|
|
|
head(rep(seq(1, ceiling(0.5 * k * r)), each = 2), k * r),
|
|
|
|
head(rep(c(0, pi / 2), ceiling(0.5 * k * r)), k * r)
|
|
|
|
))
|
|
|
|
Delta <- 0.5^abs(outer(seq_len(p * q), seq_len(p * q), `-`))
|
|
|
|
X <- tcrossprod(Fy, kronecker(alpha, beta)) + CVarE:::rmvnorm(n, sigma = Delta)
|
2022-05-18 11:02:01 +00:00
|
|
|
dim(X) <- c(n, p, q)
|
|
|
|
dim(Fy) <- c(n, k, r)
|
2022-04-29 16:37:25 +00:00
|
|
|
|
2022-05-18 11:02:01 +00:00
|
|
|
hist.sim <- sim(X, Fy, alpha.true, beta.true)
|
2022-04-29 16:37:25 +00:00
|
|
|
hist.sim$repetition <- rep
|
|
|
|
|
|
|
|
hist <- rbind(hist, hist.sim)
|
|
|
|
}
|
|
|
|
|
2022-05-05 11:35:07 +00:00
|
|
|
# Save simulation results
|
|
|
|
datetime <- format(Sys.time(), "%Y%m%dT%H%M")
|
|
|
|
saveRDS(hist, file = sprintf("AR_%s.rds", datetime))
|
2022-04-29 16:37:25 +00:00
|
|
|
|
2022-05-05 11:35:07 +00:00
|
|
|
# for GGPlot2, as factors for grouping
|
2022-04-29 16:37:25 +00:00
|
|
|
hist$repetition <- factor(hist$repetition)
|
|
|
|
|
2022-05-05 11:35:07 +00:00
|
|
|
|
2022-05-06 20:27:24 +00:00
|
|
|
# Save simulation results
|
|
|
|
sim.name <- "sim01"
|
|
|
|
datetime <- format(Sys.time(), "%Y%m%dT%H%M")
|
|
|
|
saveRDS(hist, file = sprintf("%s_%s.rds", sim.name, datetime))
|
2022-05-05 11:35:07 +00:00
|
|
|
|
2022-05-06 20:27:24 +00:00
|
|
|
# for GGPlot2, as factors for grouping
|
|
|
|
hist$repetition <- factor(hist$repetition)
|
2022-05-05 11:35:07 +00:00
|
|
|
|
2022-05-06 20:27:24 +00:00
|
|
|
for (response in c("loss", "dist", "dist.alpha", "dist.beta")) {
|
|
|
|
for (fun in c("all", "mean", "median")) {
|
|
|
|
print(plot.hist2(hist, response, fun, title = fun) + coord_trans(x = "log1p"))
|
|
|
|
dev.print(png, file = sprintf("%s_%s_%s_%s.png", sim.name, datetime, response, fun),
|
|
|
|
width = 768, height = 768, res = 125)
|
|
|
|
}
|
|
|
|
}
|
2022-05-05 11:35:07 +00:00
|
|
|
|
|
|
|
|
2022-05-06 20:27:24 +00:00
|
|
|
################################################################################
|
|
|
|
### Sim 2 / X has AR(0.707) %x% AR(0.707) Covariance ###
|
|
|
|
################################################################################
|
2022-05-05 11:35:07 +00:00
|
|
|
|
|
|
|
n <- 200 # Sample Size
|
|
|
|
p <- 11 # sample(1:15, 1)
|
2022-05-18 11:02:01 +00:00
|
|
|
q <- 7 # sample(1:15, 1)
|
|
|
|
k <- 3 # sample(1:15, 1)
|
2022-05-05 11:35:07 +00:00
|
|
|
r <- 5 # sample(1:15, 1)
|
|
|
|
print(c(n, p, q, k, r))
|
|
|
|
|
|
|
|
hist <- NULL
|
|
|
|
reps <- 20
|
2022-05-18 11:02:01 +00:00
|
|
|
max.iter <- 2
|
2022-05-05 11:35:07 +00:00
|
|
|
|
2022-05-18 11:02:01 +00:00
|
|
|
Delta.1 <- sqrt(0.5)^abs(outer(seq_len(p), seq_len(p), `-`))
|
|
|
|
Delta.2 <- sqrt(0.5)^abs(outer(seq_len(q), seq_len(q), `-`))
|
2022-05-05 11:35:07 +00:00
|
|
|
for (rep in 1:reps) {
|
2022-05-18 16:33:28 +00:00
|
|
|
cat(sprintf("\n\033[1m%4d / %d simulation rep. started\033[0m\n", rep, reps))
|
2022-05-05 11:35:07 +00:00
|
|
|
|
2022-05-18 11:02:01 +00:00
|
|
|
alpha.1.true <- alpha.1 <- matrix(rnorm(q * r), q, r)
|
|
|
|
alpha.2.true <- alpha.2 <- matrix(rnorm(p * k), p, k)
|
2022-05-05 11:35:07 +00:00
|
|
|
y <- rnorm(n)
|
|
|
|
Fy <- do.call(cbind, Map(function(slope, offset) {
|
|
|
|
sin(slope * y + offset)
|
|
|
|
},
|
|
|
|
head(rep(seq(1, ceiling(0.5 * k * r)), each = 2), k * r),
|
|
|
|
head(rep(c(0, pi / 2), ceiling(0.5 * k * r)), k * r)
|
|
|
|
))
|
2022-05-18 11:02:01 +00:00
|
|
|
dim(Fy) <- c(n, k, r)
|
2022-05-18 16:33:28 +00:00
|
|
|
X <- mlm(Fy, alpha.1, alpha.2, modes = 3:2)
|
2022-05-24 19:07:40 +00:00
|
|
|
X <- X + rtensornorm(n, 0, Delta.1, Delta.2, sample.axis = 1L)
|
2022-05-05 11:35:07 +00:00
|
|
|
|
2022-05-18 16:33:28 +00:00
|
|
|
hist.sim <- sim(X, Fy, alpha.1.true, alpha.2.true, max.iter = max.iter)
|
2022-05-05 11:35:07 +00:00
|
|
|
hist.sim$repetition <- rep
|
|
|
|
|
|
|
|
hist <- rbind(hist, hist.sim)
|
|
|
|
}
|
|
|
|
|
|
|
|
# Save simulation results
|
2022-05-06 20:27:24 +00:00
|
|
|
sim.name <- "sim02"
|
2022-05-05 11:35:07 +00:00
|
|
|
datetime <- format(Sys.time(), "%Y%m%dT%H%M")
|
2022-05-06 20:27:24 +00:00
|
|
|
saveRDS(hist, file = sprintf("%s_%s.rds", sim.name, datetime))
|
2022-05-05 11:35:07 +00:00
|
|
|
|
|
|
|
# for GGPlot2, as factors for grouping
|
|
|
|
hist$repetition <- factor(hist$repetition)
|
|
|
|
|
2022-05-18 11:02:01 +00:00
|
|
|
for (response in c("loss", "mse", "dist", "dist.alpha", "dist.beta")) {
|
2022-05-06 20:27:24 +00:00
|
|
|
for (fun in c("all", "mean", "median")) {
|
2022-05-18 16:33:28 +00:00
|
|
|
title <- paste(fun, paste(c("n", "p", "q", "k", "r"), c(n, p, q, k, r), sep = "=", collapse = ", "))
|
|
|
|
print(plot.hist2(hist, response, fun, title = title) +
|
|
|
|
coord_trans(x = "log1p"))
|
2022-05-06 20:27:24 +00:00
|
|
|
dev.print(png, file = sprintf("%s_%s_%s_%s.png", sim.name, datetime, response, fun),
|
|
|
|
width = 768, height = 768, res = 125)
|
2022-05-18 11:02:01 +00:00
|
|
|
if (response != "loss") {
|
2022-05-18 16:33:28 +00:00
|
|
|
print(plot.hist2(hist, response, fun, title = title) +
|
|
|
|
coord_trans(x = "log1p", y = "log1p"))
|
2022-05-18 11:02:01 +00:00
|
|
|
dev.print(png, file = sprintf("%s_%s_%s_%s_log.png", sim.name, datetime, response, fun),
|
|
|
|
width = 768, height = 768, res = 125)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-05-18 16:33:28 +00:00
|
|
|
stats <- local({
|
|
|
|
# final result from history
|
|
|
|
sub <- na.omit(hist)
|
|
|
|
sub <- aggregate(sub, list(
|
|
|
|
method = sub$method, init = sub$init, repetition = sub$repetition
|
|
|
|
), tail, 1)
|
|
|
|
|
|
|
|
# aggregate statistics over repetitions
|
|
|
|
stats.mean <- aggregate(subset(sub, select = c("loss", "mse", "dist.alpha", "dist.beta")),
|
|
|
|
list(method = sub$method, init = sub$init), mean)
|
|
|
|
stats.sd <- aggregate(subset(sub, select = c("loss", "mse", "dist.alpha", "dist.beta")),
|
|
|
|
list(method = sub$method, init = sub$init), sd)
|
|
|
|
|
|
|
|
# merge mean and sd stats together
|
|
|
|
merge(stats.mean, stats.sd, by = c("method", "init"), suffixes = c(".mean", ".sd"))
|
|
|
|
})
|
|
|
|
print(stats, digits = 2)
|
|
|
|
# method init loss.mean mse.mean dist.alpha.mean dist.beta.mean loss.sd mse.sd dist.alpha.sd dist.beta.sd
|
|
|
|
# 1 approx ls 5457 0.99 0.033 0.030 163 0.025 0.017 0.012
|
|
|
|
# 2 approx vlp 6819 3.99 0.267 0.287 1995 12.256 0.448 0.457
|
|
|
|
# 3 base vlp -2642 1.82 0.248 0.271 1594 2.714 0.447 0.458
|
|
|
|
# 4 momentum ls -3479 0.99 0.037 0.035 95 0.025 0.017 0.015
|
|
|
|
# 5 momentum vlp -2704 1.78 0.233 0.260 1452 2.658 0.438 0.448
|
|
|
|
# 6 new ls -3479 0.99 0.037 0.035 95 0.025 0.017 0.015
|
|
|
|
# 7 new vlp -2704 1.78 0.233 0.260 1452 2.658 0.438 0.448
|
|
|
|
|
|
|
|
|
2022-05-18 11:02:01 +00:00
|
|
|
################################################################################
|
|
|
|
### Sim 3 ###
|
|
|
|
################################################################################
|
|
|
|
n <- 200
|
|
|
|
p <- c(7, 11, 5) # response dimensions (order 3)
|
|
|
|
q <- c(3, 6, 2) # predictor dimensions (order 3)
|
|
|
|
|
|
|
|
# currently only kpir.ls suppoert higher orders (order > 2)
|
|
|
|
sim3 <- function(X, Fy, alphas.true, max.iter = 500L) {
|
|
|
|
|
|
|
|
# Logger creator
|
|
|
|
logger <- function(name) {
|
|
|
|
eval(substitute(function(iter, loss, alpha, beta, ...) {
|
|
|
|
hist[iter + 1L, ] <<- c(
|
|
|
|
iter = iter,
|
|
|
|
loss = loss,
|
|
|
|
mse = (mse <- mean((X - mlm(Fy, alpha, beta, modes = 3:2))^2)),
|
|
|
|
(dist <- unlist(Map(dist.subspace, alphas, alphas.true)))
|
|
|
|
)
|
|
|
|
|
|
|
|
cat(sprintf(
|
|
|
|
"%s(%3d) | loss: %-12.4f - mse: %-12.4f - sum(dist): %-.4e\n",
|
|
|
|
name, iter, loss, sum(dist)
|
|
|
|
))
|
|
|
|
}, list(hist = as.symbol(paste0("hist.", name)))))
|
2022-05-05 11:35:07 +00:00
|
|
|
}
|
2022-05-18 11:02:01 +00:00
|
|
|
|
|
|
|
# Initialize logger history targets
|
|
|
|
hist.ls <-
|
|
|
|
do.call(data.frame, c(list(
|
|
|
|
iter = seq(0, r), loss = NA, mse = NA),
|
|
|
|
dist = rep(NA, length(dim(X)) - 1L)
|
|
|
|
))
|
|
|
|
|
|
|
|
# Approximated MLE with Nesterov Momentum
|
2022-05-24 19:07:40 +00:00
|
|
|
kpir.ls(X, Fy, sample.axis = 1L, max.iter = max.iter, logger = logger("ls"))
|
2022-05-18 11:02:01 +00:00
|
|
|
|
|
|
|
# Add method tags
|
|
|
|
hist.ls$method <- factor("ls")
|
|
|
|
|
|
|
|
# # Combine results and return
|
|
|
|
# rbind(hist.base, hist.new, hist.momentum, hist.approx, hist.ls)
|
|
|
|
hist.ls
|
|
|
|
}
|
|
|
|
|
|
|
|
sample.data3 <- function(n, p, q) {
|
|
|
|
stopifnot(length(p) == length(q))
|
|
|
|
stopifnot(all(q <= p))
|
|
|
|
|
|
|
|
Deltas <- Map(function(nrow) {
|
|
|
|
|
|
|
|
}, p)
|
|
|
|
|
|
|
|
list(X, Fy, alphas, Deltas)
|
2022-05-05 11:35:07 +00:00
|
|
|
}
|
|
|
|
|
2022-05-18 11:02:01 +00:00
|
|
|
|
|
|
|
|
2022-05-06 20:27:24 +00:00
|
|
|
################################################################################
|
|
|
|
### WIP ###
|
|
|
|
################################################################################
|
|
|
|
n <- 200 # Sample Size
|
|
|
|
p <- 11 # sample(1:15, 1)
|
|
|
|
q <- 3 # sample(1:15, 1)
|
|
|
|
k <- 7 # sample(1:15, 1)
|
|
|
|
r <- 5 # sample(1:15, 1)
|
|
|
|
print(c(n, p, q, k, r))
|
|
|
|
|
|
|
|
alpha.true <- alpha <- matrix(rnorm(q * r), q, r)
|
|
|
|
beta.true <- beta <- matrix(rnorm(p * k), p, k)
|
|
|
|
y <- rnorm(n)
|
|
|
|
Fy <- do.call(cbind, Map(function(slope, offset) {
|
|
|
|
sin(slope * y + offset)
|
|
|
|
},
|
|
|
|
head(rep(seq(1, ceiling(0.5 * k * r)), each = 2), k * r),
|
|
|
|
head(rep(c(0, pi / 2), ceiling(0.5 * k * r)), k * r)
|
|
|
|
))
|
|
|
|
X <- tcrossprod(Fy, kronecker(alpha, beta)) + CVarE:::rmvnorm(n, sigma = Delta)
|
|
|
|
|
|
|
|
Delta.1 <- sqrt(0.5)^abs(outer(seq_len(q), seq_len(q), `-`))
|
|
|
|
Delta.2 <- sqrt(0.5)^abs(outer(seq_len(p), seq_len(p), `-`))
|
|
|
|
Delta <- kronecker(Delta.1, Delta.2)
|
|
|
|
|
|
|
|
shape <- c(p, q, k, r)
|
|
|
|
|
|
|
|
# Base (old)
|
|
|
|
Rprof(fit.base <- kpir.base(X, Fy, shape, max.iter = 500, logger = prog(500)))
|
|
|
|
|
|
|
|
# New (simple Gradient Descent)
|
|
|
|
Rprof(fit.new <- kpir.new(X, Fy, shape, max.iter = 500, logger = prog(500)))
|
|
|
|
|
|
|
|
# Gradient Descent with Nesterov Momentum
|
|
|
|
Rprof(fit.momentum <- kpir.momentum(X, Fy, shape, max.iter = 500, logger = prog(500)))
|
|
|
|
|
|
|
|
# # Residual Covariance Kronecker product assumpton version
|
|
|
|
# Rprof(fit.kron <- kpir.kron(X, Fy, shape, max.iter = 500, logger = prog(500)))
|
|
|
|
|
|
|
|
# Approximated MLE with Nesterov Momentum
|
|
|
|
Rprof("kpir.approx.Rprof")
|
|
|
|
fit.approx <- kpir.approx(X, Fy, shape, max.iter = 500, logger = prog(500))
|
|
|
|
summaryRprof("kpir.approx.Rprof")
|
|
|
|
|
|
|
|
par(mfrow = c(2, 2))
|
|
|
|
matrixImage(Delta, main = expression(Delta))
|
|
|
|
matrixImage(fit.base$Delta, main = expression(hat(Delta)), sub = "base")
|
|
|
|
matrixImage(fit.momentum$Delta, main = expression(hat(Delta)), sub = "momentum")
|
|
|
|
matrixImage(kronecker(fit.approx$Delta.1, fit.approx$Delta.2), main = expression(hat(Delta)), sub = "approx")
|
|
|
|
|
|
|
|
par(mfrow = c(2, 2))
|
|
|
|
matrixImage(Delta.1, main = expression(Delta[1]))
|
|
|
|
matrixImage(fit.approx$Delta.1, main = expression(hat(Delta)[1]), sub = "approx")
|
|
|
|
matrixImage(Delta.2, main = expression(Delta[2]))
|
|
|
|
matrixImage(fit.approx$Delta.2, main = expression(hat(Delta)[2]), sub = "approx")
|
|
|
|
|
|
|
|
par(mfrow = c(2, 2))
|
|
|
|
matrixImage(alpha.true, main = expression(alpha))
|
|
|
|
matrixImage(fit.base$alpha, main = expression(hat(alpha)), sub = "base")
|
|
|
|
matrixImage(fit.momentum$alpha, main = expression(hat(alpha)), sub = "momentum")
|
|
|
|
matrixImage(fit.approx$alpha, main = expression(hat(alpha)), sub = "approx")
|
|
|
|
|
|
|
|
par(mfrow = c(2, 2))
|
|
|
|
matrixImage(beta.true, main = expression(beta))
|
|
|
|
matrixImage(fit.base$beta, main = expression(hat(beta)), sub = "base")
|
|
|
|
matrixImage(fit.momentum$beta, main = expression(hat(beta)), sub = "momentum")
|
|
|
|
matrixImage(fit.approx$beta, main = expression(hat(beta)), sub = "approx")
|
|
|
|
|
|
|
|
|
2022-05-05 11:35:07 +00:00
|
|
|
|
|
|
|
|
|
|
|
################################################################################
|
|
|
|
### EEG ###
|
|
|
|
################################################################################
|
|
|
|
|
|
|
|
suppressPackageStartupMessages({
|
|
|
|
library(pROC)
|
|
|
|
})
|
|
|
|
|
|
|
|
# acc: Accuracy. P(Yhat = Y). Estimated as: (TP+TN)/(P+N).
|
|
|
|
acc <- function(y_true, y_pred) mean(round(y_pred) == y_true)
|
|
|
|
# err: Error rate. P(Yhat != Y). Estimated as: (FP+FN)/(P+N).
|
|
|
|
err <- function(y_true, y_pred) mean(round(y_pred) != y_true)
|
|
|
|
# fpr: False positive rate. P(Yhat = + | Y = -). aliases: Fallout.
|
|
|
|
fpr <- function(y_true, y_pred) mean((round(y_pred) == 1)[y_true == 0])
|
|
|
|
# tpr: True positive rate. P(Yhat = + | Y = +). aliases: Sensitivity, Recall.
|
|
|
|
tpr <- function(y_true, y_pred) mean((round(y_pred) == 1)[y_true == 1])
|
|
|
|
# fnr: False negative rate. P(Yhat = - | Y = +). aliases: Miss.
|
|
|
|
fnr <- function(y_true, y_pred) mean((round(y_pred) == 0)[y_true == 1])
|
|
|
|
# tnr: True negative rate. P(Yhat = - | Y = -).
|
|
|
|
tnr <- function(y_true, y_pred) mean((round(y_pred) == 0)[y_true == 0])
|
|
|
|
|
|
|
|
# Load EEG dataset
|
|
|
|
dataset <- readRDS('eeg_analysis/eeg_data.rds')
|
|
|
|
|
|
|
|
eeg_cross_validation <- function(nrFolds = 10L) {
|
|
|
|
# Set dimenional parameters.
|
|
|
|
n <- nrow(dataset) # sample size (nr. of people)
|
|
|
|
p <- 64L # nr. of predictors (count of sensorce)
|
|
|
|
t <- 256L # nr. of time points (measurements)
|
|
|
|
|
|
|
|
# Extract dimension names from X.
|
|
|
|
nNames <- dataset$PersonID
|
|
|
|
tNames <- as.character(seq(t))
|
|
|
|
pNames <- unlist(strsplit(colnames(dataset)[2 + t * seq(p)], '_'))[c(T, F)]
|
|
|
|
|
|
|
|
# Split into X-y.
|
|
|
|
X <- as.matrix(dataset[, -(1:2)])
|
|
|
|
y <- dataset$Case_Control
|
|
|
|
# Reshape X as 3D tenros of shape (n, t, p) aka. samples, timesteps, predictors.
|
|
|
|
# (Each of the n rows in X iterate over the time bevore switching sensorce.)
|
|
|
|
dim(X) <- c(n, t, p)
|
|
|
|
dimnames(X) <- list(nNames, tNames, pNames)
|
|
|
|
|
|
|
|
# Setup Cross-Validation result
|
|
|
|
CV <- data.frame(
|
|
|
|
fold = (seq_len(n) %% nrFolds) + 1L,
|
|
|
|
y_true = y,
|
|
|
|
y_pred = NA
|
|
|
|
)
|
|
|
|
|
|
|
|
#
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
#' @param ppc Number of "p"redictor "p"rincipal "c"omponents.
|
|
|
|
#' @param tpc Number of "t"ime "p"rincipal "c"omponents.
|
|
|
|
egg_analysis_reduced <- function(methods, ppc, tpc) {
|
|
|
|
# Set dimenional parameters.
|
|
|
|
n <- nrow(dataset) # sample size (nr. of people)
|
|
|
|
p <- 64L # nr. of predictors (count of sensorce)
|
|
|
|
t <- 256L # nr. of time points (measurements)
|
|
|
|
|
|
|
|
# Extract dimension names from X.
|
|
|
|
nNames <- dataset$PersonID
|
|
|
|
tNames <- as.character(seq(t))
|
|
|
|
pNames <- unlist(strsplit(colnames(dataset)[2 + t * seq(p)], '_'))[c(T, F)]
|
|
|
|
|
|
|
|
# Split into X-y.
|
|
|
|
X <- as.matrix(dataset[, -(1:2)])
|
|
|
|
y <- dataset$Case_Control
|
|
|
|
# Reshape X as 3D tenros of shape (n, t, p) aka. samples, timesteps, predictors.
|
|
|
|
# (Each of the n rows in X iterate over the time bevore switching sensorce.)
|
|
|
|
X <- array(X, dim = c(n, t, p),
|
|
|
|
dimnames = list(nNames, tNames, pNames))
|
|
|
|
# Reorder axis to (p, t, n) = (predictors, timesteps, samples).
|
|
|
|
X <- aperm(X, c(3, 2, 1))
|
|
|
|
|
|
|
|
# Compute Mean of X.
|
|
|
|
X_mean <- apply(X, c(1, 2), mean)
|
|
|
|
X_center <- X - as.vector(X_mean)
|
|
|
|
|
|
|
|
# Compute "left" and "right" cov-matrices.
|
|
|
|
Sigma_t <- matrix(apply(apply(X_center, 3, crossprod), 1, mean), t, t)
|
|
|
|
Sigma_p <- matrix(apply(apply(X_center, 3, tcrossprod), 1, mean), p, p)
|
|
|
|
# Get "left", "right" principal components.
|
|
|
|
V_p <- svd(Sigma_p, ppc, 0L)$u
|
|
|
|
V_t <- svd(Sigma_t, tpc, 0L)$u
|
|
|
|
|
|
|
|
# Reduce dimension.
|
|
|
|
X_reduced <- apply(X_center, 3, function(x) crossprod(V_p, x %*% V_t))
|
|
|
|
dim(X_reduced) <- c(ppc, tpc, n)
|
|
|
|
|
|
|
|
# Vectorize to shape of (predictors * timesteps, samples) and transpose to
|
|
|
|
# (samples, predictors * timesteps).
|
|
|
|
X_vec <- t(matrix(X_reduced, ppc * tpc, n))
|
|
|
|
|
|
|
|
loo.cv <- expand.grid(method = names(methods), fold = 1:n)
|
|
|
|
loo.cv$y_true <- y[loo.cv$fold]
|
|
|
|
loo.cv$y_pred <- NA
|
|
|
|
|
|
|
|
# Performe LOO cross-validation for each method.
|
|
|
|
for (i in 1L:n) {
|
|
|
|
# Print progress.
|
|
|
|
cat(sprintf("\rCross-Validation (p-PC: %d, t-PC: %d): %4d/%d",
|
|
|
|
ppc, tpc, i, n))
|
|
|
|
# Leave Out the i-th element.
|
|
|
|
X_train <- X_vec[-i, ]
|
|
|
|
X_test <- X_vec[i, ]
|
|
|
|
y_train <- y[-i]
|
|
|
|
# Center y.
|
|
|
|
y_train <- scale(y_train, center = TRUE, scale = FALSE)
|
|
|
|
|
|
|
|
# For each method.
|
|
|
|
for (method.name in names(methods)) {
|
|
|
|
method <- methods[[method.name]]
|
|
|
|
# Compute reduction using current method under common API.
|
|
|
|
sdr <- method(X_train, y_train, ppc, tpc)
|
|
|
|
B <- kronecker(sdr$alpha, sdr$beta)
|
|
|
|
# Fit a linear model (which ensures a common sdr direction if possible).
|
|
|
|
model <- glm(y ~ x, family = binomial(link = "logit"),
|
|
|
|
data = data.frame(y = y[-i], x = X_train %*% B))
|
|
|
|
# Predict out of sample and store in LOO CV data.frame.
|
|
|
|
y_pred <- predict(model, data.frame(x = X_test %*% B), type = "response")
|
|
|
|
loo.cv[loo.cv$method == method.name & loo.cv$fold == i, 'y_pred'] <- y_pred
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for (method.name in names(methods)) {
|
|
|
|
labels <- loo.cv[loo.cv$method == method.name, 'y_true']
|
|
|
|
predictions <- loo.cv[loo.cv$method == method.name, 'y_pred']
|
|
|
|
ROC <- roc(unlist(labels), unlist(predictions), quiet = TRUE)
|
|
|
|
# Combined accuracy, error, ...
|
|
|
|
cat("\nMethod: ", method.name, "\n",
|
|
|
|
"acc: ", acc(unlist(labels), unlist(predictions)), "\n",
|
|
|
|
"err: ", err(unlist(labels), unlist(predictions)), "\n",
|
|
|
|
"fpr: ", fpr(unlist(labels), unlist(predictions)), "\n",
|
|
|
|
"tpr: ", tpr(unlist(labels), unlist(predictions)), "\n",
|
|
|
|
"fnr: ", fnr(unlist(labels), unlist(predictions)), "\n",
|
|
|
|
"tnr: ", tnr(unlist(labels), unlist(predictions)), "\n",
|
|
|
|
"auc: ", ROC$auc, "\n",
|
|
|
|
"auc sd: ", sqrt(var(ROC)), "\n",
|
|
|
|
sep = '')
|
|
|
|
}
|
|
|
|
|
|
|
|
loo.cv
|
|
|
|
}
|
|
|
|
|
|
|
|
methods <- list(
|
|
|
|
KPIR_LS = function(...) tensor_predictor(..., method = "KPIR_LS"),
|
|
|
|
KPIR_MLE = function(...) tensor_predictor(..., method = "KPIR_MLE"),
|
|
|
|
KPFC1 = function(...) tensor_predictor(..., method = "KPFC1"),
|
|
|
|
KPFC2 = function(...) tensor_predictor(..., method = "KPFC2"),
|
|
|
|
LSIR = LSIR
|
|
|
|
)
|
|
|
|
|
|
|
|
# ppc, tpc
|
|
|
|
# ------------
|
|
|
|
params <- list( c( 4, 3)
|
|
|
|
, c( 15, 15)
|
|
|
|
, c( 30, 20)
|
|
|
|
)
|
|
|
|
|
|
|
|
for (param in params) {
|
|
|
|
c(ppc, tpc) %<-% param
|
|
|
|
sim <- egg_analysis_reduced(methods, ppc, tpc)
|
|
|
|
|
|
|
|
attr(sim, 'param') <- c(ppc = ppc, tpc = tpc)
|
|
|
|
|
|
|
|
saveRDS(sim, file = sprintf('eeg_analysis_reduced_%d_%d.rds', ppc, tpc))
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# plot.hist(hist, "loss",
|
|
|
|
# title = bquote(paste("Optimization Objective: negative log-likelihood ",
|
|
|
|
# l(hat(alpha), hat(beta)))),
|
|
|
|
# subtitle = bquote(paste(Delta[i][j] == 0.25, " * ", 0.5^abs(i - j), ", ",
|
|
|
|
# "20 repetitions, ", n == .(n), ", ",
|
|
|
|
# p == .(p), ", ", q == .(q), ", ", k == .(k), ", ", r == .(r))),
|
|
|
|
# x = "nr. of iterations",
|
|
|
|
# y = expression(l(hat(alpha), hat(beta)))
|
|
|
|
# )
|
|
|
|
# plot.stats(hist, "loss",
|
|
|
|
# title = bquote(paste("Optimization Objective: negative log-likelihood ",
|
|
|
|
# l(hat(alpha), hat(beta)))),
|
|
|
|
# subtitle = bquote(paste(Delta[i][j] == 0.25, " * ", 0.5^abs(i - j), ", ",
|
|
|
|
# "20 repetitions, ", n == .(n), ", ",
|
|
|
|
# p == .(p), ", ", q == .(q), ", ", k == .(k), ", ", r == .(r))),
|
|
|
|
# x = "nr. of iterations",
|
|
|
|
# y = expression(l(hat(alpha), hat(beta)))
|
|
|
|
# )
|
|
|
|
|
|
|
|
|
|
|
|
# dev.print(png, file = sprintf("sim01_loss_stat_%s.png", datetime),
|
|
|
|
# width = 768, height = 768, res = 125)
|
|
|
|
|
|
|
|
|
|
|
|
# ggplot(hist, aes(x = iter, color = method, group = interaction(method, repetition))) +
|
|
|
|
# geom_line(aes(y = dist)) +
|
|
|
|
# geom_point(data = with(sub <- subset(hist, !is.na(dist)),
|
|
|
|
# aggregate(sub, list(method, repetition), tail, 1)
|
|
|
|
# ), aes(y = dist)) +
|
|
|
|
# labs(
|
|
|
|
# title = bquote(paste("Distance of estimate ", hat(B), " to true ", B == alpha %*% beta)),
|
|
|
|
# subtitle = bquote(paste(Delta[i][j] == 0.25, " * ", 0.5^abs(i - j), ", ",
|
|
|
|
# "20 repetitions, ", n == .(n), ", ",
|
|
|
|
# p == .(p), ", ", q == .(q), ", ", k == .(k), ", ", r == .(r))),
|
|
|
|
# x = "nr. of iterations",
|
|
|
|
# y = expression(abs(B * B^T - hat(B) * hat(B)^T)),
|
|
|
|
# color = "method"
|
|
|
|
# ) +
|
|
|
|
# theme(legend.position = "bottom")
|
|
|
|
|
|
|
|
# dev.print(png, file = sprintf("sim01_dist_%s.png", datetime),
|
|
|
|
# width = 768, height = 768, res = 125)
|
|
|
|
|
|
|
|
# ggplot(hist, aes(x = iter, y = dist, color = method, group = method)) +
|
|
|
|
# geom_ribbon(aes(color = NULL, fill = method), alpha = 0.2,
|
|
|
|
# stat = "summary", fun.min = "min", fun.max = "max", na.rm = TRUE) +
|
|
|
|
# geom_ribbon(aes(color = NULL, fill = method), alpha = 0.4,
|
|
|
|
# stat = "summary", fun.min = function(y) quantile(y, 0.25),
|
|
|
|
# fun.max = function(y) quantile(y, 0.75), na.rm = TRUE) +
|
|
|
|
# geom_line(stat = "summary", fun = "mean", na.rm = TRUE) +
|
|
|
|
# labs(
|
|
|
|
# title = bquote(paste("Distance of estimate ", hat(B), " to true ", B == alpha %*% beta)),
|
|
|
|
# subtitle = bquote(paste(Delta[i][j] == 0.25, " * ", 0.5^abs(i - j), ", ",
|
|
|
|
# "20 repetitions, ", n == .(n), ", ",
|
|
|
|
# p == .(p), ", ", q == .(q), ", ", k == .(k), ", ", r == .(r))),
|
|
|
|
# x = "nr. of iterations",
|
|
|
|
# y = expression(abs(B * B^T - hat(B) * hat(B)^T)),
|
|
|
|
# color = "method"
|
|
|
|
# ) +
|
|
|
|
# theme(legend.position = "bottom")
|
|
|
|
|
|
|
|
# dev.print(png, file = sprintf("sim01_dist_stat_%s.png", datetime),
|
|
|
|
# width = 768, height = 768, res = 125)
|
|
|
|
|
|
|
|
|
|
|
|
# ggplot(hist, aes(x = iter, color = method, group = interaction(method, repetition))) +
|
|
|
|
# geom_line(aes(y = dist.alpha)) +
|
|
|
|
# geom_point(data = with(sub <- subset(hist, !is.na(dist.alpha)),
|
|
|
|
# aggregate(sub, list(method, repetition), tail, 1)
|
|
|
|
# ), aes(y = dist.alpha)) +
|
|
|
|
# labs(
|
|
|
|
# title = bquote(paste("Distance of estimate ", hat(alpha), " to true ", alpha)),
|
|
|
|
# subtitle = bquote(paste(Delta[i][j] == 0.25, " * ", 0.5^abs(i - j), ", ",
|
|
|
|
# "20 repetitions, ", n == .(n), ", ",
|
|
|
|
# p == .(p), ", ", q == .(q), ", ", k == .(k), ", ", r == .(r))),
|
|
|
|
# x = "nr. of iterations",
|
|
|
|
# y = expression(abs(alpha * alpha^T - hat(alpha) * hat(alpha)^T)),
|
|
|
|
# color = "method"
|
|
|
|
# ) +
|
|
|
|
# theme(legend.position = "bottom")
|
|
|
|
|
|
|
|
# dev.print(png, file = sprintf("sim01_dist_alpha_%s.png", datetime),
|
|
|
|
# width = 768, height = 768, res = 125)
|
|
|
|
|
|
|
|
|
|
|
|
# ggplot(hist, aes(x = iter, color = method, group = interaction(method, repetition))) +
|
|
|
|
# geom_line(aes(y = dist.beta)) +
|
|
|
|
# geom_point(data = with(sub <- subset(hist, !is.na(dist.beta)),
|
|
|
|
# aggregate(sub, list(method, repetition), tail, 1)
|
|
|
|
# ), aes(y = dist.beta)) +
|
|
|
|
# labs(
|
|
|
|
# title = bquote(paste("Distance of estimate ", hat(beta), " to true ", beta)),
|
|
|
|
# subtitle = bquote(paste(Delta[i][j] == 0.25, " * ", 0.5^abs(i - j), ", ",
|
|
|
|
# "20 repetitions, ", n == .(n), ", ",
|
|
|
|
# p == .(p), ", ", q == .(q), ", ", k == .(k), ", ", r == .(r))),
|
|
|
|
# x = "nr. of iterations",
|
|
|
|
# y = expression(abs(beta * beta^T - hat(beta) * hat(beta)^T)),
|
|
|
|
# color = "method"
|
|
|
|
# ) +
|
|
|
|
# theme(legend.position = "bottom")
|
|
|
|
|
|
|
|
# dev.print(png, file = sprintf("sim01_dist_beta_%s.png", datetime),
|
|
|
|
# width = 768, height = 768, res = 125)
|
|
|
|
|
|
|
|
|
|
|
|
# ggplot(hist, aes(x = iter, color = method, group = interaction(method, repetition))) +
|
|
|
|
# geom_line(aes(y = norm.alpha)) +
|
|
|
|
# geom_point(data = with(sub <- subset(hist, !is.na(norm.alpha)),
|
|
|
|
# aggregate(sub, list(method, repetition), tail, 1)
|
|
|
|
# ), aes(y = norm.alpha)) +
|
|
|
|
# labs(
|
|
|
|
# title = expression(paste("Norm of ", hat(alpha))),
|
|
|
|
# subtitle = bquote(paste(Delta[i][j] == 0.25, " * ", 0.5^abs(i - j), ", ",
|
|
|
|
# "20 repetitions, ", n == .(n), ", ",
|
|
|
|
# p == .(p), ", ", q == .(q), ", ", k == .(k), ", ", r == .(r))),
|
|
|
|
# x = "nr. of iterations",
|
|
|
|
# y = expression(abs(hat(alpha))[F]),
|
|
|
|
# color = "method"
|
|
|
|
# ) +
|
|
|
|
# theme(legend.position = "bottom")
|
|
|
|
|
|
|
|
# dev.print(png, file = sprintf("sim01_norm_alpha_%s.png", datetime),
|
|
|
|
# width = 768, height = 768, res = 125)
|
|
|
|
|
|
|
|
# ggplot(hist, aes(x = iter, color = method, group = interaction(method, repetition))) +
|
|
|
|
# geom_line(aes(y = norm.beta)) +
|
|
|
|
# geom_point(data = with(sub <- subset(hist, !is.na(norm.beta)),
|
|
|
|
# aggregate(sub, list(method, repetition), tail, 1)
|
|
|
|
# ), aes(y = norm.beta)) +
|
|
|
|
# labs(
|
|
|
|
# title = expression(paste("Norm of ", hat(beta))),
|
|
|
|
# subtitle = bquote(paste(Delta[i][j] == 0.25, " * ", 0.5^abs(i - j), ", ",
|
|
|
|
# "20 repetitions, ", n == .(n), ", ",
|
|
|
|
# p == .(p), ", ", q == .(q), ", ", k == .(k), ", ", r == .(r))),
|
|
|
|
# x = "nr. of iterations",
|
|
|
|
# y = expression(abs(hat(beta))[F]),
|
|
|
|
# color = "method"
|
|
|
|
# ) +
|
|
|
|
# theme(legend.position = "bottom")
|
|
|
|
|
|
|
|
# dev.print(png, file = sprintf("sim01_norm_beta_%s.png", datetime),
|
|
|
|
# width = 768, height = 768, res = 125)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2022-04-29 16:37:25 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# local({
|
|
|
|
# par(mfrow = c(2, 3), oma = c(2, 1, 1, 1), mar = c(3.1, 2.1, 2.1, 1.1), lwd = 2)
|
|
|
|
# plot(c(1, max.iter), range(c(hist.base$loss, hist.new$loss, hist.momentum$loss, hist.kron$loss), na.rm = TRUE),
|
|
|
|
# type = "n", log = "x", main = "loss")
|
|
|
|
# lines( hist.base$loss, col = 2)
|
|
|
|
# lines( hist.new$loss, col = 3)
|
|
|
|
# lines(hist.momentum$loss, col = 4)
|
|
|
|
# lines( hist.kron$loss, col = 5)
|
|
|
|
# yrange <- range(c(hist.base$step.size, hist.new$step.size, hist.momentum$step.size, hist.kron$step.size),
|
|
|
|
# na.rm = TRUE)
|
|
|
|
# plot(c(1, max.iter), yrange,
|
|
|
|
# type = "n", log = "x", main = "step.size")
|
|
|
|
# lines( hist.base$step.size, col = 2)
|
|
|
|
# lines( hist.new$step.size, col = 3)
|
|
|
|
# lines(hist.momentum$step.size, col = 4)
|
|
|
|
# lines( hist.kron$step.size, col = 5)
|
|
|
|
# # lines( hist.base$step.size, col = 4) # there is no step.size
|
|
|
|
# plot(0, 0, type = "l", bty = "n", xaxt = "n", yaxt = "n")
|
|
|
|
# legend("topleft", legend = c("Base", "GD", "GD + Momentum", "Kron + GD + Momentum"), col = 2:5,
|
|
|
|
# lwd = par("lwd"), xpd = TRUE, horiz = FALSE, cex = 1.2, bty = "n",
|
|
|
|
# x.intersp = 1, y.intersp = 1.5)
|
|
|
|
# # xpd = TRUE makes the legend plot to the figure
|
|
|
|
# plot(c(1, max.iter), range(c(hist.base$dist, hist.new$dist, hist.momentum$dist, hist.kron$dist), na.rm = TRUE),
|
|
|
|
# type = "n", log = "x", main = "dist")
|
|
|
|
# lines( hist.base$dist, col = 2)
|
|
|
|
# lines( hist.new$dist, col = 3)
|
|
|
|
# lines(hist.momentum$dist, col = 4)
|
|
|
|
# lines( hist.kron$dist, col = 5)
|
|
|
|
# plot(c(1, max.iter), range(c(hist.base$dist.alpha, hist.new$dist.alpha, hist.momentum$dist.alpha, hist.kron$dist.alpha), na.rm = TRUE),
|
|
|
|
# type = "n", log = "x", main = "dist.alpha")
|
|
|
|
# lines( hist.base$dist.alpha, col = 2)
|
|
|
|
# lines( hist.new$dist.alpha, col = 3)
|
|
|
|
# lines(hist.momentum$dist.alpha, col = 4)
|
|
|
|
# lines( hist.kron$dist.alpha, col = 5)
|
|
|
|
# plot(c(1, max.iter), range(c(hist.base$dist.beta, hist.new$dist.beta, hist.momentum$dist.beta, hist.kron$dist.beta), na.rm = TRUE),
|
|
|
|
# type = "n", log = "x", main = "dist.beta")
|
|
|
|
# lines( hist.base$dist.beta, col = 2)
|
|
|
|
# lines( hist.new$dist.beta, col = 3)
|
|
|
|
# lines(hist.momentum$dist.beta, col = 4)
|
|
|
|
# lines( hist.kron$dist.beta, col = 5)
|
|
|
|
# # par(fig = c(0, 1, 0, 1), oma = c(0, 0, 0, 0), mar = c(0, 0, 0, 0), new = TRUE)
|
|
|
|
# # plot(0, 0, type = 'l', bty = 'n', xaxt = 'n', yaxt = 'n')
|
|
|
|
# # legend('bottom', legend = c('GD', 'GD + Nesterov Momentum', 'Alternating'), col = 2:4,
|
|
|
|
# # lwd = 5, xpd = TRUE, horiz = TRUE, cex = 1, seg.len = 1, bty = 'n')
|
|
|
|
# # # xpd = TRUE makes the legend plot to the figure
|
|
|
|
# })
|
|
|
|
# dev.print(png, file = "loss.png", width = 768, height = 768, res = 125)
|
|
|
|
|
|
|
|
# with(list(a1 = alpha, a2 = fit.base$alpha, a3 = fit.new$alpha, a4 = fit.momentum$alpha,
|
|
|
|
# b1 = beta, b2 = fit.base$beta, b3 = fit.new$beta, b4 = fit.momentum$beta), {
|
|
|
|
# par(mfrow = c(2, 4))
|
|
|
|
# a2 <- sign(sum(sign(a1 * a2))) * a2
|
|
|
|
# a3 <- sign(sum(sign(a1 * a3))) * a3
|
|
|
|
# a4 <- sign(sum(sign(a1 * a4))) * a4
|
|
|
|
# b2 <- sign(sum(sign(b1 * b2))) * b2
|
|
|
|
# b3 <- sign(sum(sign(b1 * b3))) * b3
|
|
|
|
# b4 <- sign(sum(sign(b1 * b4))) * b4
|
|
|
|
|
|
|
|
# matrixImage(a1, main = expression(alpha))
|
|
|
|
# matrixImage(a2, main = expression(paste(hat(alpha)["Base"])))
|
|
|
|
# matrixImage(a3, main = expression(paste(hat(alpha)["GD"])))
|
|
|
|
# matrixImage(a4, main = expression(paste(hat(alpha)["GD+Nest"])))
|
|
|
|
# matrixImage(b1, main = expression(beta))
|
|
|
|
# matrixImage(b2, main = expression(paste(hat(beta)["Base"])))
|
|
|
|
# matrixImage(b3, main = expression(paste(hat(beta)["GD"])))
|
|
|
|
# matrixImage(b4, main = expression(paste(hat(beta)["GD+Nest"])))
|
|
|
|
# })
|
|
|
|
# dev.print(png, file = "estimates.png", width = 768, height = 768, res = 125)
|
|
|
|
|
|
|
|
|
|
|
|
# with(list(d1 = Delta, d2 = fit.base$Delta, d3 = fit.new$Delta, d4 = fit.momentum$Delta), {
|
|
|
|
# par(mfrow = c(2, 2))
|
|
|
|
|
|
|
|
# matrixImage(d1, main = expression(Delta))
|
|
|
|
# matrixImage(d2, main = expression(hat(Delta)["Base"]))
|
|
|
|
# matrixImage(d3, main = expression(hat(Delta)["GD"]))
|
|
|
|
# matrixImage(d4, main = expression(hat(Delta)["GD+Nest"]))
|
|
|
|
# })
|
|
|
|
# dev.print(png, file = "Delta.png", width = 768, height = 768, res = 125)
|
|
|
|
|
|
|
|
# with(list(a1 = alpha, a2 = fit.base$alpha, a3 = fit.new$alpha, a4 = fit.momentum$alpha,
|
|
|
|
# b1 = beta, b2 = fit.base$beta, b3 = fit.new$beta, b4 = fit.momentum$beta), {
|
|
|
|
# par(mfrow = c(2, 2))
|
|
|
|
|
|
|
|
# matrixImage(kronecker(a1, b1), main = expression(B))
|
|
|
|
# matrixImage(kronecker(a2, b2), main = expression(hat(B)["Base"]))
|
|
|
|
# matrixImage(kronecker(a3, b3), main = expression(hat(B)["GD"]))
|
|
|
|
# matrixImage(kronecker(a4, b4), main = expression(hat(B)["GD+Nest"]))
|
|
|
|
# })
|
|
|
|
# dev.print(png, file = "B.png", width = 768, height = 768, res = 125)
|
|
|
|
|
|
|
|
# with(list(a1 = alpha, a2 = fit.base$alpha, a3 = fit.new$alpha, a4 = fit.momentum$alpha,
|
|
|
|
# b1 = beta, b2 = fit.base$beta, b3 = fit.new$beta, b4 = fit.momentum$beta), {
|
|
|
|
# par(mfrow = c(3, 1), lwd = 1)
|
|
|
|
|
|
|
|
# d2 <- kronecker(a1, b1) - kronecker(a2, b2)
|
|
|
|
# d3 <- kronecker(a1, b1) - kronecker(a3, b3)
|
|
|
|
# d4 <- kronecker(a1, b1) - kronecker(a4, b4)
|
|
|
|
# xlim <- c(-1, 1) * max(abs(c(d2, d3, d4)))
|
|
|
|
# breaks <- seq(xlim[1], xlim[2], len = 41)
|
|
|
|
|
|
|
|
# hist(d2, main = expression(paste(base, (B - hat(B))[i])),
|
|
|
|
# breaks = breaks, xlim = xlim, freq = FALSE)
|
|
|
|
# lines(density(d2), col = 2)
|
|
|
|
# abline(v = range(d2), lty = 2)
|
|
|
|
# hist(d3, main = expression(paste(GD, (B - hat(B))[i])),
|
|
|
|
# breaks = breaks, xlim = xlim, freq = FALSE)
|
|
|
|
# lines(density(d3), col = 3)
|
|
|
|
# abline(v = range(d3), lty = 2)
|
|
|
|
# hist(d4, main = expression(paste(GD + Nest, (B - hat(B))[i])),
|
|
|
|
# breaks = breaks, xlim = xlim, freq = FALSE)
|
|
|
|
# lines(density(d4), col = 4)
|
|
|
|
# abline(v = range(d4), lty = 2)
|
|
|
|
# })
|
|
|
|
# dev.print(png, file = "hist.png", width = 768, height = 768, res = 125)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# options(width = 300)
|
|
|
|
# print(pr <- prof.tree::prof.tree("./Rprof.out"), limit = NULL
|
|
|
|
# , pruneFun = function(x) x$percent > 0.01)
|
|
|
|
|
|
|
|
# par(mfrow = c(2, 2))
|
|
|
|
# matrixImage(alpha, main = "alpha")
|
|
|
|
# matrixImage(fit$alpha, main = "fit$alpha")
|
|
|
|
# matrixImage(beta, main = "beta")
|
|
|
|
# matrixImage(fit$beta, main = "fit$beta")
|
|
|
|
|
|
|
|
# if (diff(dim(alpha) * dim(beta)) > 0) {
|
|
|
|
# par(mfrow = c(2, 1))
|
|
|
|
# } else {
|
|
|
|
# par(mfrow = c(1, 2))
|
|
|
|
# }
|
|
|
|
# matrixImage(kronecker(alpha, beta), main = "kronecker(alpha, beta)")
|
|
|
|
# matrixImage(kronecker(fit$alpha, fit$beta), main = "kronecker(fit$alpha, fit$beta)")
|
|
|
|
|
|
|
|
# matrixImage(Delta, main = "Delta")
|
|
|
|
# matrixImage(fit$Delta, main = "fit$Delta")
|
|
|
|
|
|
|
|
|
|
|
|
# local({
|
|
|
|
# a <- (-1 * (sum(sign(fit$alpha) * sign(alpha)) < 0)) * fit$alpha / mean(fit$alpha^2)
|
|
|
|
# b <- alpha / mean(alpha^2)
|
|
|
|
|
|
|
|
# norm(a - b, "F")
|
|
|
|
# })
|
|
|
|
# local({
|
|
|
|
# a <- (-1 * (sum(sign(fit$beta) * sign(beta)) < 0)) * fit$beta / mean(fit$beta^2)
|
|
|
|
# b <- beta / mean(beta^2)
|
|
|
|
|
|
|
|
# norm(a - b, "F")
|
|
|
|
# })
|
|
|
|
|
|
|
|
|
|
|
|
# # Which Sequence?
|
|
|
|
# x <- y <- 1
|
|
|
|
# replicate(40, x <<- (y <<- x + y) - x)
|
|
|
|
|
|
|
|
|
|
|
|
# # Face-Splitting Product
|
|
|
|
# n <- 100
|
|
|
|
# p <- 3
|
|
|
|
# q <- 500
|
|
|
|
# A <- matrix(rnorm(n * p), n)
|
|
|
|
# B <- matrix(rnorm(n * q), n)
|
|
|
|
|
|
|
|
# faceSplit <- function(A, B) {
|
|
|
|
# C <- vapply(seq_len(ncol(A)), function(i) A[, i] * B, B)
|
|
|
|
# dim(C) <- c(nrow(A), ncol(A) * ncol(B))
|
|
|
|
# C
|
|
|
|
# }
|
|
|
|
|
|
|
|
# all.equal(
|
|
|
|
# tkhatriRao(A, B),
|
|
|
|
# faceSplit(A, B)
|
|
|
|
# )
|
|
|
|
# microbenchmark::microbenchmark(
|
|
|
|
# tkhatriRao(A, B),
|
|
|
|
# faceSplit(A, B)
|
|
|
|
# )
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# dist.kron <- function(a0, b0, a1, b1) {
|
|
|
|
# sqrt(sum(a0^2) * sum(b0^2) -
|
|
|
|
# 2 * sum(diag(crossprod(a0, a1))) * sum(diag(crossprod(b0, b1))) +
|
|
|
|
# sum(a1^2) * sum(b1^2))
|
|
|
|
# }
|
|
|
|
|
|
|
|
|
|
|
|
# alpha <- matrix(rnorm(q * r), q)
|
|
|
|
# beta <- matrix(rnorm(p * k), p)
|
|
|
|
# alpha.true <- matrix(rnorm(q * r), q)
|
|
|
|
# beta.true <- matrix(rnorm(p * k), p)
|
|
|
|
# all.equal(
|
|
|
|
# dist.kron(alpha, beta, alpha.true, beta.true),
|
|
|
|
# norm(kronecker(alpha, beta) - kronecker(alpha.true, beta.true), "F")
|
|
|
|
# )
|
|
|
|
|
|
|
|
# A <- matrix(rnorm(p^2), p)
|
|
|
|
# B <- matrix(rnorm(p^2), p)
|
|
|
|
|
|
|
|
# tr <- function(A) sum(diag(A))
|
|
|
|
# tr(crossprod(A, B))
|
|
|
|
# tr(tcrossprod(B, A))
|
|
|
|
|
|
|
|
# tr(crossprod(A, A))
|
|
|
|
# tr(tcrossprod(A, A))
|
|
|
|
# sum(A^2)
|
|
|
|
|
|
|
|
# alpha <- matrix(rnorm(q * r), q)
|
|
|
|
# beta <- matrix(rnorm(p * k), p)
|
|
|
|
|
|
|
|
# norm(kronecker(alpha, beta), "F")^2
|
|
|
|
# norm(alpha, "F")^2 * norm(beta, "F")^2
|
|
|
|
# tr(crossprod(kronecker(alpha, beta)))
|
|
|
|
# tr(tcrossprod(kronecker(alpha, beta)))
|
|
|
|
# tr(crossprod(kronecker(t(alpha), t(beta))))
|
|
|
|
# tr(crossprod(alpha)) * tr(crossprod(beta))
|
|
|
|
# tr(tcrossprod(alpha)) * tr(tcrossprod(beta))
|
|
|
|
# tr(crossprod(alpha)) * tr(tcrossprod(beta))
|
|
|
|
# sum(alpha^2) * sum(beta^2)
|
|
|
|
|
|
|
|
# alpha <- matrix(rnorm(q * r), q)
|
|
|
|
# beta <- matrix(rnorm(p * k), p)
|
|
|
|
# alpha.true <- matrix(rnorm(q * r), q)
|
|
|
|
# beta.true <- matrix(rnorm(p * k), p)
|
|
|
|
# microbenchmark::microbenchmark(
|
|
|
|
# norm(kronecker(alpha, beta), "F")^2,
|
|
|
|
# norm(alpha, "F")^2 * norm(beta, "F")^2,
|
|
|
|
# tr(crossprod(kronecker(alpha, beta))),
|
|
|
|
# tr(tcrossprod(kronecker(alpha, beta))),
|
|
|
|
# tr(crossprod(kronecker(t(alpha), t(beta)))),
|
|
|
|
# tr(crossprod(alpha)) * tr(crossprod(beta)),
|
|
|
|
# tr(tcrossprod(alpha)) * tr(tcrossprod(beta)),
|
|
|
|
# tr(crossprod(alpha)) * tr(tcrossprod(beta)),
|
|
|
|
# sum(alpha^2) * sum(beta^2),
|
|
|
|
|
|
|
|
# setup = {
|
|
|
|
# p <- sample(1:10, 1)
|
|
|
|
# q <- sample(1:10, 1)
|
|
|
|
# k <- sample(1:10, 1)
|
|
|
|
# r <- sample(1:10, 1)
|
|
|
|
# assign("alpha", matrix(rnorm(q * r), q), .GlobalEnv)
|
|
|
|
# assign("beta", matrix(rnorm(p * k), p), .GlobalEnv)
|
|
|
|
# assign("alpha.true", matrix(rnorm(q * r), q), .GlobalEnv)
|
|
|
|
# assign("beta.true", matrix(rnorm(p * k), p), .GlobalEnv)
|
|
|
|
# }
|
|
|
|
# )
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# p <- sample(1:15, 1) # 11
|
|
|
|
# q <- sample(1:15, 1) # 3
|
|
|
|
# k <- sample(1:15, 1) # 7
|
|
|
|
# r <- sample(1:15, 1) # 5
|
|
|
|
# A <- matrix(rnorm(q * r), q)
|
|
|
|
# B <- matrix(rnorm(p * k), p)
|
|
|
|
# a <- matrix(rnorm(q * r), q)
|
|
|
|
# b <- matrix(rnorm(p * k), p)
|
|
|
|
|
|
|
|
# all.equal(
|
|
|
|
# kronecker(A + a, B + b),
|
|
|
|
# kronecker(A, B) + kronecker(A, b) + kronecker(a, B) + kronecker(a, b)
|
|
|
|
# )
|
|
|
|
|
|
|
|
|
|
|
|
# p <- 200L
|
|
|
|
# n <- 100L
|
|
|
|
# R <- matrix(rnorm(n * p), n)
|
|
|
|
# A <- matrix(rnorm(p^2), p) # Base Matrix
|
|
|
|
# B <- A + 0.01 * matrix(rnorm(p^2), p) # Distortion / Update of A
|
|
|
|
# A.inv <- solve(A)
|
|
|
|
|
|
|
|
# microbenchmark::microbenchmark(
|
|
|
|
# solve = R %*% solve(B),
|
|
|
|
# neumann.raw = R %*% (A.inv + A.inv %*% (A - B) %*% A.inv + A.inv %*% (A - B) %*% A.inv %*% (A - B) %*% A.inv),
|
|
|
|
# neumann.fun = {
|
|
|
|
# AD <- A.inv %*% (A - B)
|
|
|
|
# res <- A.inv + AD %*% A.inv
|
|
|
|
# res <- A.inv + AD %*% res
|
|
|
|
# R %*% res
|
|
|
|
# }
|
|
|
|
# )
|
|
|
|
|
|
|
|
# all.equal(
|
|
|
|
# A.inv + A.inv %*% (A - B) %*% A.inv + A.inv %*% (A - B) %*% A.inv %*% (A - B) %*% A.inv,
|
|
|
|
# {
|
|
|
|
# DA <- (A - B) %*% A.inv
|
|
|
|
# res <- A.inv + A.inv %*% DA
|
|
|
|
# res <- A.inv + res %*% DA
|
|
|
|
# res
|
|
|
|
# }
|
|
|
|
# )
|
|
|
|
|
|
|
|
# all.equal(
|
|
|
|
# A.inv + A.inv %*% (A - B) %*% A.inv + A.inv %*% (A - B) %*% A.inv %*% (A - B) %*% A.inv,
|
|
|
|
# {
|
|
|
|
# AD <- A.inv %*% (A - B)
|
|
|
|
# res <- A.inv + AD %*% A.inv
|
|
|
|
# res <- A.inv + AD %*% res
|
|
|
|
# res
|
|
|
|
# }
|
|
|
|
# )
|
|
|
|
|
|
|
|
# #####
|
|
|
|
# sym <- function(A) A + t(A)
|
|
|
|
# n <- 101
|
|
|
|
# p <- 7
|
|
|
|
# q <- 11
|
|
|
|
# r <- 3
|
|
|
|
# k <- 5
|
|
|
|
# R <- array(rnorm(n * p * q), dim = c(n = n, p = p, q = q))
|
|
|
|
# F <- array(rnorm(n * k * r), dim = c(n = n, k = k, r = r))
|
|
|
|
# alpha <- array(rnorm(q * r), dim = c(q = q, r = r))
|
|
|
|
# beta <- array(rnorm(p * k), dim = c(p = p, k = k))
|
|
|
|
# Delta.1 <- sym(matrix(rnorm(q * q), q, q))
|
|
|
|
# dim(Delta.1) <- c(q = q, q = q)
|
|
|
|
# Delta.2 <- sym(matrix(rnorm(p * p), p, p))
|
|
|
|
# dim(Delta.2) <- c(p = p, p = p)
|
|
|
|
|
|
|
|
# Delta <- kronecker(Delta.1, Delta.2)
|
|
|
|
|
|
|
|
# grad.alpha.1 <- local({
|
|
|
|
# Di.1 <- solve(Delta.1)
|
|
|
|
# Di.2 <- solve(Delta.2)
|
|
|
|
# .R <- sapply(seq_len(n), function(i) tcrossprod(Di.1, R[i, , ]) %*% Di.2)
|
|
|
|
# dim(.R) <- c(q, p, n)
|
|
|
|
# .F <- sapply(seq_len(n), function(i) beta %*% F[i, , ])
|
|
|
|
# dim(.F) <- c(p, r, n)
|
|
|
|
|
|
|
|
# .C <- sapply(seq_len(n), function(i) .R[i, , ] %*% .F[i, , ])
|
|
|
|
# dim(.C) <- c(n, q, r)
|
|
|
|
# colSums(.C)
|
|
|
|
# })
|
|
|
|
# grad.alpha.2 <- local({
|
|
|
|
# # Delta.1^-1 R' Delta.2^-1
|
|
|
|
# .R <- aperm(R, c(2, 1, 3))
|
|
|
|
# dim(.R) <- c(q, n * p)
|
|
|
|
# .R <- solve(Delta.1) %*% .R
|
|
|
|
# dim(.R) <- c(q, n, p)
|
|
|
|
# .R <- aperm(.R, c(3, 2, 1))
|
|
|
|
# dim(.R) <- c(p, n * q)
|
|
|
|
# .R <- solve(Delta.2) %*% .R
|
|
|
|
# dim(.R) <- c(p, n, q)
|
|
|
|
# .R <- aperm(.R, c(2, 3, 1)) # n x q x p
|
|
|
|
|
|
|
|
# # beta F
|
|
|
|
# .F <- aperm(F, c(2, 1, 3))
|
|
|
|
# dim(.F) <- c(k, n * r)
|
|
|
|
# .F <- beta %*% .F
|
|
|
|
# dim(.F) <- c(p, n, r)
|
|
|
|
# .F <- aperm(.F, c(2, 1, 3)) # n x p x r
|
|
|
|
|
|
|
|
# # (Delta.1^-1 R' Delta.2^-1) (beta F)
|
|
|
|
# .R <- aperm(.R, c(1, 3, 2))
|
|
|
|
# dim(.R) <- c(n * p, q)
|
|
|
|
# dim(.F) <- c(n * p, r)
|
|
|
|
# crossprod(.R, .F)
|
|
|
|
# })
|
|
|
|
|
|
|
|
# all.equal(
|
|
|
|
# grad.alpha.1,
|
|
|
|
# grad.alpha.2
|
|
|
|
# )
|
|
|
|
|
|
|
|
# all.equal({
|
|
|
|
# .R <- matrix(0, q, p)
|
|
|
|
# Di.1 <- solve(Delta.1)
|
|
|
|
# Di.2 <- solve(Delta.2)
|
|
|
|
# for (i in 1:n) {
|
|
|
|
# .R <- .R + Di.1 %*% t(R[i, , ]) %*% Di.2
|
|
|
|
# }
|
|
|
|
# .R
|
|
|
|
# }, {
|
|
|
|
# .R <- R
|
|
|
|
# Di.1 <- solve(Delta.1)
|
|
|
|
# Di.2 <- solve(Delta.2)
|
|
|
|
# .R <- sapply(seq_len(n), function(i) tcrossprod(Di.1, R[i, , ]) %*% Di.2)
|
|
|
|
# dim(.R) <- c(q, p, n)
|
|
|
|
# .R <- aperm(.R, c(3, 1, 2))
|
|
|
|
# colSums(.R)
|
|
|
|
# })
|
|
|
|
# all.equal({
|
|
|
|
# Di.1 <- solve(Delta.1)
|
|
|
|
# Di.2 <- solve(Delta.2)
|
|
|
|
# .R <- sapply(seq_len(n), function(i) tcrossprod(Di.1, R[i, , ]) %*% Di.2)
|
|
|
|
# dim(.R) <- c(q, p, n)
|
|
|
|
# .R <- aperm(.R, c(3, 1, 2))
|
|
|
|
# .R
|
|
|
|
# }, {
|
|
|
|
# .R <- R
|
|
|
|
# dim(.R) <- c(n * p, q)
|
|
|
|
# .R <- .R %*% solve(Delta.1)
|
|
|
|
# dim(.R) <- c(n, p, q)
|
|
|
|
# .R <- aperm(.R, c(1, 3, 2))
|
|
|
|
# dim(.R) <- c(n * q, p)
|
|
|
|
# .R <- .R %*% solve(Delta.2)
|
|
|
|
# dim(.R) <- c(n, q, p)
|
|
|
|
# .R
|
|
|
|
# })
|
|
|
|
# all.equal({
|
|
|
|
# .F <- matrix(0, p, r)
|
|
|
|
# for (i in 1:n) {
|
|
|
|
# .F <- .F + beta %*% F[i, , ]
|
|
|
|
# }
|
|
|
|
# .F
|
|
|
|
# }, {
|
|
|
|
# .F <- apply(F, 1, function(Fi) beta %*% Fi)
|
|
|
|
# dim(.F) <- c(p, r, n)
|
|
|
|
# .F <- aperm(.F, c(3, 1, 2))
|
|
|
|
# colSums(.F)
|
|
|
|
# })
|
|
|
|
# all.equal({
|
|
|
|
# .F <- apply(F, 1, function(Fi) beta %*% Fi)
|
|
|
|
# dim(.F) <- c(p, r, n)
|
|
|
|
# .F <- aperm(.F, c(3, 1, 2))
|
|
|
|
# colSums(.F)
|
|
|
|
# }, {
|
|
|
|
# .F <- aperm(F, c(1, 3, 2))
|
|
|
|
# dim(.F) <- c(n * r, k)
|
|
|
|
# .F <- tcrossprod(.F, beta)
|
|
|
|
# dim(.F) <- c(n, r, p)
|
|
|
|
# t(colSums(.F))
|
|
|
|
# })
|
|
|
|
|
|
|
|
# all.equal({
|
|
|
|
# Di.1 <- solve(Delta.1)
|
|
|
|
# Di.2 <- solve(Delta.2)
|
|
|
|
# grad.alpha <- 0
|
|
|
|
# grad.beta <- 0
|
|
|
|
# dim(R) <- c(n, p, q)
|
|
|
|
# dim(F) <- c(n, k, r)
|
|
|
|
# for (i in 1:n) {
|
|
|
|
# grad.alpha <- grad.alpha + (
|
|
|
|
# Di.1 %*% t(R[i, , ]) %*% Di.2 %*% beta %*% F[i, , ]
|
|
|
|
# )
|
|
|
|
# grad.beta <- grad.beta + (
|
|
|
|
# Di.2 %*% R[i, , ] %*% Di.1 %*% alpha %*% t(F[i, , ])
|
|
|
|
# )
|
|
|
|
# }
|
|
|
|
|
|
|
|
# g1 <- c(dim(grad.alpha), dim(grad.beta), grad.alpha, grad.beta)
|
|
|
|
# }, {
|
|
|
|
# # Note that the order is important since for grad.beta the residuals do NOT
|
|
|
|
# # need to be transposes.
|
|
|
|
|
|
|
|
# # left/right standardized residuals Delta_1^-1 R_i' Delta_2^-1 for i in 1:n
|
|
|
|
# dim(R) <- c(n * p, q)
|
|
|
|
# .R <- R %*% solve(Delta.1)
|
|
|
|
# dim(.R) <- c(n, p, q)
|
|
|
|
# .R <- aperm(.R, c(1, 3, 2))
|
|
|
|
# dim(.R) <- c(n * q, p)
|
|
|
|
# .R <- .R %*% solve(Delta.2)
|
|
|
|
# dim(.R) <- c(n, q, p)
|
|
|
|
|
|
|
|
# # gradient with respect to beta
|
|
|
|
# # Responces times beta (alpha f_i')
|
|
|
|
# dim(F) <- c(n * k, r)
|
|
|
|
# .F <- tcrossprod(F, alpha)
|
|
|
|
# dim(.F) <- c(n, k, q)
|
|
|
|
# .F <- aperm(.F, c(1, 3, 2))
|
|
|
|
# # Matricize
|
|
|
|
# dim(.R) <- c(n * q, p)
|
|
|
|
# dim(.F) <- c(n * q, k)
|
|
|
|
# grad.beta <- crossprod(.R, .F)
|
|
|
|
|
|
|
|
# # gradient with respect to beta
|
|
|
|
# # Responces times alpha
|
|
|
|
# dim(F) <- c(n, k, r)
|
|
|
|
# .F <- aperm(F, c(1, 3, 2))
|
|
|
|
# dim(.F) <- c(n * r, k)
|
|
|
|
# .F <- tcrossprod(.F, beta)
|
|
|
|
# dim(.F) <- c(n, r, p)
|
|
|
|
# .F <- aperm(.F, c(1, 3, 2))
|
|
|
|
# # Transpose stand. residuals
|
|
|
|
# dim(.R) <- c(n, q, p)
|
|
|
|
# .R <- aperm(.R, c(1, 3, 2))
|
|
|
|
# # Matricize
|
|
|
|
# dim(.R) <- c(n * p, q)
|
|
|
|
# dim(.F) <- c(n * p, r)
|
|
|
|
# grad.alpha <- crossprod(.R, .F)
|
|
|
|
|
|
|
|
# g2 <- c(dim(grad.alpha), dim(grad.beta), grad.alpha, grad.beta)
|
|
|
|
# })
|
|
|
|
|
|
|
|
|
|
|
|
# microbenchmark::microbenchmark(R1 = {
|
|
|
|
# Di.1 <- solve(Delta.1)
|
|
|
|
# Di.2 <- solve(Delta.2)
|
|
|
|
# grad.alpha <- 0 # matrix(0, q, r)
|
|
|
|
# grad.beta <- 0 # matrix(0, p, k)
|
|
|
|
# dim(R) <- c(n, p, q)
|
|
|
|
# dim(F) <- c(n, k, r)
|
|
|
|
# for (i in 1:n) {
|
|
|
|
# grad.alpha <- grad.alpha + (
|
|
|
|
# Di.1 %*% t(R[i, , ]) %*% Di.2 %*% beta %*% F[i, , ]
|
|
|
|
# )
|
|
|
|
# grad.beta <- grad.beta + (
|
|
|
|
# Di.2 %*% R[i, , ] %*% Di.1 %*% alpha %*% t(F[i, , ])
|
|
|
|
# )
|
|
|
|
# }
|
|
|
|
|
|
|
|
# g1 <- c(dim(grad.alpha), dim(grad.beta), grad.alpha, grad.beta)
|
|
|
|
# }, R3 = {
|
|
|
|
# # Note that the order is important since for grad.beta the residuals do NOT
|
|
|
|
# # need to be transposes.
|
|
|
|
|
|
|
|
# # left/right standardized residuals Delta_1^-1 R_i' Delta_2^-1 for i in 1:n
|
|
|
|
# dim(R) <- c(n * p, q)
|
|
|
|
# .R <- R %*% solve(Delta.1)
|
|
|
|
# dim(.R) <- c(n, p, q)
|
|
|
|
# .R <- aperm(.R, c(1, 3, 2))
|
|
|
|
# dim(.R) <- c(n * q, p)
|
|
|
|
# .R <- .R %*% solve(Delta.2)
|
|
|
|
# dim(.R) <- c(n, q, p)
|
|
|
|
|
|
|
|
# # gradient with respect to beta
|
|
|
|
# # Responces times beta (alpha f_i')
|
|
|
|
# dim(F) <- c(n * k, r)
|
|
|
|
# .F <- tcrossprod(F, alpha)
|
|
|
|
# dim(.F) <- c(n, k, q)
|
|
|
|
# .F <- aperm(.F, c(1, 3, 2))
|
|
|
|
# # Matricize
|
|
|
|
# dim(.R) <- c(n * q, p)
|
|
|
|
# dim(.F) <- c(n * q, k)
|
|
|
|
# grad.beta <- crossprod(.R, .F)
|
|
|
|
|
|
|
|
# # gradient with respect to beta
|
|
|
|
# # Responces times alpha
|
|
|
|
# dim(F) <- c(n, k, r)
|
|
|
|
# .F <- aperm(F, c(1, 3, 2))
|
|
|
|
# dim(.F) <- c(n * r, k)
|
|
|
|
# .F <- tcrossprod(.F, beta)
|
|
|
|
# dim(.F) <- c(n, r, p)
|
|
|
|
# .F <- aperm(.F, c(1, 3, 2))
|
|
|
|
# # Transpose stand. residuals
|
|
|
|
# dim(.R) <- c(n, q, p)
|
|
|
|
# .R <- aperm(.R, c(1, 3, 2))
|
|
|
|
# # Matricize
|
|
|
|
# dim(.R) <- c(n * p, q)
|
|
|
|
# dim(.F) <- c(n * p, r)
|
|
|
|
# grad.alpha <- crossprod(.R, .F)
|
|
|
|
|
|
|
|
# g2 <- c(dim(grad.alpha), dim(grad.beta), grad.alpha, grad.beta)
|
|
|
|
# })
|
|
|
|
|
|
|
|
|
|
|
|
# n <- 100
|
|
|
|
# p <- 7
|
|
|
|
# q <- 11
|
|
|
|
# k <- 3
|
|
|
|
# r <- 5
|
|
|
|
|
|
|
|
# X <- array(rnorm(n * p * q), dim = c(n = n, p = p, q = q))
|
|
|
|
# F <- array(rnorm(n * k * r), dim = c(n = n, k = k, r = r))
|
|
|
|
# alpha <- array(rnorm(q * r), dim = c(q = q, r = r))
|
|
|
|
# beta <- array(rnorm(p * k), dim = c(p = p, k = k))
|
|
|
|
|
|
|
|
# all.equal({
|
|
|
|
# R <- array(NA, dim = c(n, p, q))
|
|
|
|
# for (i in 1:n) {
|
|
|
|
# R[i, , ] <- X[i, , ] - beta %*% F[i, , ] %*% t(alpha)
|
|
|
|
# }
|
|
|
|
# R
|
|
|
|
# }, {
|
|
|
|
# X - (F %x_3% alpha %x_2% beta)
|
|
|
|
# }, check.attributes = FALSE)
|
|
|
|
|
|
|
|
# microbenchmark::microbenchmark(base = {
|
|
|
|
# R <- array(NA, dim = c(n, p, q))
|
|
|
|
# for (i in 1:n) {
|
|
|
|
# R[i, , ] <- X[i, , ] - beta %*% F[i, , ] %*% t(alpha)
|
|
|
|
# }
|
|
|
|
# R
|
|
|
|
# }, ttm = {
|
|
|
|
# X - (F %x_3% alpha %x_2% beta)
|
|
|
|
# })
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# n <- 100; p <- 7; q <- 11; k <- 3; r <- 5
|
|
|
|
# sym <- function(x) t(x) + x
|
|
|
|
# Di.1 <- sym(matrix(rnorm(q^2), q, q))
|
|
|
|
# Di.2 <- sym(matrix(rnorm(p^2), p, p))
|
|
|
|
# R <- array(rnorm(n, p, q), dim = c(n, p, q))
|
|
|
|
# F <- array(rnorm(n, k, r), dim = c(n, k, r))
|
|
|
|
# alpha <- matrix(rnorm(q * r), q, r)
|
|
|
|
# beta <- matrix(rnorm(p * k), p, k)
|
|
|
|
|
|
|
|
# all.equal({
|
|
|
|
# .R <- array(NA, dim = c(n, p, q))
|
|
|
|
# for (i in 1:n) {
|
|
|
|
# .R[i, , ] <- Di.2 %*% R[i, , ] %*% Di.1
|
|
|
|
# }
|
|
|
|
# .R
|
|
|
|
# }, {
|
|
|
|
# R %x_3% Di.1 %x_2% Di.2
|
|
|
|
# })
|
|
|
|
# all.equal({
|
|
|
|
# .Rt <- array(NA, dim = c(n, q, p))
|
|
|
|
# for (i in 1:n) {
|
|
|
|
# .Rt[i, , ] <- Di.1 %*% t(R[i, , ]) %*% Di.2
|
|
|
|
# }
|
|
|
|
# .Rt
|
|
|
|
# }, {
|
|
|
|
# .Rt <- R %x_3% Di.1 %x_2% Di.2
|
|
|
|
# aperm(.Rt, c(1, 3, 2))
|
|
|
|
# })
|
|
|
|
# all.equal({
|
|
|
|
# .Fa <- array(NA, dim = c(n, q, k))
|
|
|
|
# for (i in 1:n) {
|
|
|
|
# .Fa[i, , ] <- alpha %*% t(F[i, , ])
|
|
|
|
# }
|
|
|
|
# .Fa
|
|
|
|
# }, {
|
|
|
|
# aperm(F %x_3% alpha, c(1, 3, 2))
|
|
|
|
# })
|
|
|
|
# all.equal({
|
|
|
|
# .Fb <- array(NA, dim = c(n, p, r))
|
|
|
|
# for (i in 1:n) {
|
|
|
|
# .Fb[i, , ] <- beta %*% F[i, , ]
|
|
|
|
# }
|
|
|
|
# .Fb
|
|
|
|
# }, {
|
|
|
|
# F %x_2% beta
|
|
|
|
# })
|
|
|
|
# all.equal({
|
|
|
|
# .F <- array(NA, dim = c(n, p, q))
|
|
|
|
# for (i in 1:n) {
|
|
|
|
# .F[i, , ] <- beta %*% F[i, , ] %*% t(alpha)
|
|
|
|
# }
|
|
|
|
# .F
|
|
|
|
# }, {
|
|
|
|
# F %x_3% alpha %x_2% beta
|
|
|
|
# })
|
|
|
|
# all.equal({
|
|
|
|
# .Ga <- 0
|
|
|
|
# for (i in 1:n) {
|
|
|
|
# .Ga <- .Ga + Di.1 %*% t(R[i, , ]) %*% Di.2 %*% beta %*% F[i, , ]
|
|
|
|
# }
|
|
|
|
# .Ga
|
|
|
|
# }, {
|
|
|
|
# .R <- R %x_3% Di.1 %x_2% Di.2
|
|
|
|
# dim(.R) <- c(n * p, q)
|
|
|
|
# .Fb <- F %x_2% beta
|
|
|
|
# dim(.Fb) <- c(n * p, r)
|
|
|
|
# crossprod(.R, .Fb)
|
|
|
|
# })
|
|
|
|
# all.equal({
|
|
|
|
# .Gb <- 0
|
|
|
|
# for (i in 1:n) {
|
|
|
|
# .Gb <- .Gb + Di.2 %*% R[i, , ] %*% Di.1 %*% alpha %*% t(F[i, , ])
|
|
|
|
# }
|
|
|
|
# .Gb
|
|
|
|
# }, {
|
|
|
|
# .Rt <- aperm(R %x_3% Di.1 %x_2% Di.2, c(1, 3, 2))
|
|
|
|
# dim(.Rt) <- c(n * q, p)
|
|
|
|
# .Fa <- aperm(F %x_3% alpha, c(1, 3, 2))
|
|
|
|
# dim(.Fa) <- c(n * q, k)
|
|
|
|
# crossprod(.Rt, .Fa)
|
|
|
|
# })
|
|
|
|
# all.equal({
|
|
|
|
# .Ga <- 0
|
|
|
|
# for (i in 1:n) {
|
|
|
|
# .Ga <- .Ga + Di.1 %*% t(R[i, , ]) %*% Di.2 %*% beta %*% F[i, , ]
|
|
|
|
# }
|
|
|
|
# .Gb <- 0
|
|
|
|
# for (i in 1:n) {
|
|
|
|
# .Gb <- .Gb + Di.2 %*% R[i, , ] %*% Di.1 %*% alpha %*% t(F[i, , ])
|
|
|
|
# }
|
|
|
|
# c(.Ga, .Gb)
|
|
|
|
# }, {
|
|
|
|
# .R <- R %x_3% Di.1 %x_2% Di.2
|
|
|
|
# .Fb <- F %x_2% beta
|
|
|
|
# .Fa <- aperm(F %x_3% alpha, c(1, 3, 2))
|
|
|
|
|
|
|
|
# dim(.R) <- c(n * p, q)
|
|
|
|
# dim(.Fb) <- c(n * p, r)
|
|
|
|
# .Ga <- crossprod(.R, .Fb)
|
|
|
|
|
|
|
|
# dim(.R) <- c(n, p, q)
|
|
|
|
# .R <- aperm(.R, c(1, 3, 2))
|
|
|
|
# dim(.R) <- c(n * q, p)
|
|
|
|
# dim(.Fa) <- c(n * q, k)
|
|
|
|
# .Gb <- crossprod(.R, .Fa)
|
|
|
|
|
|
|
|
# c(.Ga, .Gb)
|
|
|
|
# })
|
|
|
|
# all.equal({
|
|
|
|
# .Ga <- 0
|
|
|
|
# for (i in 1:n) {
|
|
|
|
# .Ga <- .Ga + Di.1 %*% t(R[i, , ]) %*% Di.2 %*% beta %*% F[i, , ]
|
|
|
|
# }
|
|
|
|
# .Gb <- 0
|
|
|
|
# for (i in 1:n) {
|
|
|
|
# .Gb <- .Gb + Di.2 %*% R[i, , ] %*% Di.1 %*% alpha %*% t(F[i, , ])
|
|
|
|
# }
|
|
|
|
# c(.Ga, .Gb)
|
|
|
|
# }, {
|
|
|
|
# .R <- R %x_3% Di.1 %x_2% Di.2
|
|
|
|
|
|
|
|
# .Ga <- tcrossprod(mat(.R, 3), mat(F %x_2% beta, 3))
|
|
|
|
# .Gb <- tcrossprod(mat(.R, 2), mat(F %x_3% alpha, 2))
|
|
|
|
|
|
|
|
# c(.Ga, .Gb)
|
|
|
|
# })
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# n <- 101; p <- 5; q <- 7
|
|
|
|
|
|
|
|
# sym <- function(x) crossprod(x)
|
|
|
|
# D1 <- sym(matrix(rnorm(q^2), q, q))
|
|
|
|
# D2 <- sym(matrix(rnorm(p^2), p, p))
|
|
|
|
|
|
|
|
# X <- tensorPredictors:::rmvnorm(n, sigma = kronecker(D1, D2))
|
|
|
|
# dim(X) <- c(n, p, q)
|
|
|
|
|
|
|
|
# D1.hat <- tcrossprod(mat(X, 3)) / n
|
|
|
|
# D2.hat <- tcrossprod(mat(X, 2)) / n
|
|
|
|
|
|
|
|
# local({
|
|
|
|
# par(mfrow = c(2, 2))
|
|
|
|
# matrixImage(D1, main = "D1")
|
|
|
|
# matrixImage(D1.hat, main = "D1.hat")
|
|
|
|
# matrixImage(D2, main = "D2")
|
|
|
|
# matrixImage(D2.hat, main = "D2.hat")
|
|
|
|
# })
|
|
|
|
|
|
|
|
# sum(X^2) / n
|
|
|
|
# sum(diag(D1.hat))
|
|
|
|
# sum(diag(D2.hat))
|
|
|
|
# sum(diag(kronecker(D1, D2)))
|
|
|
|
# sum(diag(kronecker(D1.hat / sqrt(sum(diag(D1.hat))),
|
|
|
|
# D2.hat / sqrt(sum(diag(D1.hat))))))
|
|
|
|
|
|
|
|
# all.equal({
|
|
|
|
# mat(X, 1) %*% kronecker(D1.hat, D2.hat)
|
|
|
|
# }, {
|
|
|
|
# mat(X %x_3% D1.hat %x_2% D2.hat, 1)
|
|
|
|
# })
|
|
|
|
# all.equal({
|
|
|
|
# C <- mat(X, 1) %*% kronecker(D1.hat, D2.hat) * (n / sum(X^2))
|
|
|
|
# dim(C) <- c(n, p, q)
|
|
|
|
# C
|
|
|
|
# }, {
|
|
|
|
# (X %x_3% D1.hat %x_2% D2.hat) / sum(diag(D1.hat))
|
|
|
|
# })
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# D.1 <- tcrossprod(mat(X, 3))
|
|
|
|
# D.2 <- tcrossprod(mat(X, 2))
|
|
|
|
# tr <- sum(diag(D.1))
|
|
|
|
# D.1 <- D.1 / sqrt(n * tr)
|
|
|
|
# D.2 <- D.2 / sqrt(n * tr)
|
|
|
|
|
|
|
|
# sum(diag(kronecker(D1, D2)))
|
|
|
|
# sum(diag(kronecker(D.1, D.2)))
|
|
|
|
# det(kronecker(D1, D2))
|
|
|
|
# det(kronecker(D.1, D.2))
|
|
|
|
# det(D.1)^p * det(D.2)^q
|
|
|
|
|
|
|
|
# log(det(kronecker(D.1, D.2)))
|
|
|
|
# p * log(det(D.1)) + q * log(det(D.2))
|
2022-05-05 11:35:07 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
d <- ggplot(mtcars, aes(cyl, mpg)) + geom_point()
|
|
|
|
d + stat_summary(fun.data = "mean_cl_boot", colour = "red", size = 2)
|
|
|
|
|
|
|
|
# Orientation follows the discrete axis
|
|
|
|
ggplot(mtcars, aes(mpg, factor(cyl))) +
|
|
|
|
geom_point() +
|
|
|
|
stat_summary(fun.data = "mean_cl_boot", colour = "red", size = 2)
|
|
|
|
|
|
|
|
# You can supply individual functions to summarise the value at
|
|
|
|
# each x:
|
|
|
|
d + stat_summary(fun = "median", colour = "red", size = 2, geom = "point")
|
|
|
|
d + stat_summary(fun = "mean", colour = "red", size = 2, geom = "point")
|
|
|
|
d + aes(colour = factor(vs)) + stat_summary(fun = mean, geom="line")
|
|
|
|
|
|
|
|
d + stat_summary(fun = mean, fun.min = min, fun.max = max, colour = "red")
|
|
|
|
|
|
|
|
d <- ggplot(diamonds, aes(cut))
|
|
|
|
d + geom_bar()
|
|
|
|
d + stat_summary(aes(y = price), fun = "mean", geom = "bar")
|
|
|
|
|
|
|
|
# Orientation of stat_summary_bin is ambiguous and must be specified directly
|
|
|
|
ggplot(diamonds, aes(carat, price)) +
|
|
|
|
stat_summary_bin(fun = "mean", geom = "bar", orientation = 'y')
|
|
|
|
|
|
|
|
|
|
|
|
# Don't use ylim to zoom into a summary plot - this throws the
|
|
|
|
# data away
|
|
|
|
p <- ggplot(mtcars, aes(cyl, mpg)) +
|
|
|
|
stat_summary(fun = "mean", geom = "point")
|
|
|
|
p
|
|
|
|
p + ylim(15, 30)
|
|
|
|
# Instead use coord_cartesian
|
|
|
|
p + coord_cartesian(ylim = c(15, 30))
|
|
|
|
|
|
|
|
# A set of useful summary functions is provided from the Hmisc package:
|
|
|
|
stat_sum_df <- function(fun, geom="crossbar", ...) {
|
|
|
|
stat_summary(fun.data = fun, colour = "red", geom = geom, width = 0.2, ...)
|
|
|
|
}
|
|
|
|
d <- ggplot(mtcars, aes(cyl, mpg)) + geom_point()
|
|
|
|
# The crossbar geom needs grouping to be specified when used with
|
|
|
|
# a continuous x axis.
|
|
|
|
d + stat_sum_df("mean_cl_boot", mapping = aes(group = cyl))
|
|
|
|
d + stat_sum_df("mean_sdl", mapping = aes(group = cyl))
|
|
|
|
d + stat_sum_df("mean_sdl", fun.args = list(mult = 1), mapping = aes(group = cyl))
|
|
|
|
d + stat_sum_df("median_hilow", mapping = aes(group = cyl))
|
|
|
|
|
|
|
|
# An example with highly skewed distributions:
|
|
|
|
if (require("ggplot2movies")) {
|
|
|
|
set.seed(596)
|
|
|
|
mov <- movies[sample(nrow(movies), 1000), ]
|
|
|
|
m2 <-
|
|
|
|
ggplot(mov, aes(x = factor(round(rating)), y = votes)) +
|
|
|
|
geom_point()
|
|
|
|
m2 <-
|
|
|
|
m2 +
|
|
|
|
stat_summary(
|
|
|
|
fun.data = "mean_cl_boot",
|
|
|
|
geom = "crossbar",
|
|
|
|
colour = "red", width = 0.3
|
|
|
|
) +
|
|
|
|
xlab("rating")
|
|
|
|
m2
|
|
|
|
# Notice how the overplotting skews off visual perception of the mean
|
|
|
|
# supplementing the raw data with summary statistics is _very_ important
|
|
|
|
|
|
|
|
# Next, we'll look at votes on a log scale.
|
|
|
|
|
|
|
|
# Transforming the scale means the data are transformed
|
|
|
|
# first, after which statistics are computed:
|
|
|
|
m2 + scale_y_log10()
|
|
|
|
# Transforming the coordinate system occurs after the
|
|
|
|
# statistic has been computed. This means we're calculating the summary on the raw data
|
|
|
|
# and stretching the geoms onto the log scale. Compare the widths of the
|
|
|
|
# standard errors.
|
|
|
|
m2 + coord_trans(y="log10")
|
|
|
|
}
|