wip: poi, add: projection dist (sin of max principal angle)

This commit is contained in:
Daniel Kapla 2021-11-16 12:01:12 +01:00
parent b0151dfafb
commit 9123baccdd
6 changed files with 306 additions and 117 deletions

View File

@ -0,0 +1,111 @@
################################################################################
### LDA (sparse Linear Discrimina Analysis) ###
################################################################################
devtools::load_all('tensorPredictors/')
C <- function(rho, p) {
res <- matrix(rho, p, p)
diag(res) <- 1
res
}
R <- function(rho, p) {
rho^abs(outer(1:p, 1:p, `-`))
}
dataset <- function(nr) {
K <- 3 # Nr. Groups
n.i <- 30 # Sample group size for each of the K groups
n <- K * n.i # Sample size
p <- 200 # Nr. of predictors
# Generate test data
V <- cbind(matrix(c(
2, 1, 2, 1, 2,
1,-1, 1,-1, 1,
0, 1,-1, 1, 0
), 3, 5, byrow = TRUE),
matrix(0, 3, p - 5)
)
W <- cbind(matrix(c(
-1, 1, 1, 1, 1,
1,-1, 1,-1, 1,
1, 1,-1, 1, 0
), 3, 5, byrow = TRUE),
matrix(0, 3, p - 5)
)
if (nr == 1) { # Model 1
y <- factor(rep(1:K, each = n.i))
X <- rmvnorm(n, mu = rep(0, p)) + V[y, ]
B <- cbind(V[1, ] - V[2, ], V[2, ] - V[3, ])
} else if (nr == 2) { # Model 2
y <- factor(rep(1:K, each = n.i))
X <- rmvnorm(n, sigma = C(0.5, p)) + (V %*% C(0.5, p))[y, ]
B <- cbind(V[1, ] - V[2, ], V[2, ] - V[3, ])
} else if (nr == 3) { # Model 3
y <- factor(rep(1:K, each = n.i))
X <- rmvnorm(n, sigma = R(0.5, p)) + (V %*% R(0.5, p))[y, ]
B <- cbind(V[1, ] - V[2, ], V[2, ] - V[3, ])
} else if (nr == 4) { # Model 4
y <- factor(rep(1:K, each = n.i))
X <- rmvnorm(n, sigma = C(0.5, p)) + (W %*% C(0.5, p))[y, ]
B <- cbind(W[1, ] - W[2, ], W[2, ] - W[3, ])
} else if (nr == 5) { # Model 5
K <- 4
n <- K * n.i
W.tilde <- 2 * rbind(W, colMeans(W))
mu.tilde <- W.tilde %*% C(0.5, p)
y <- factor(rep(1:K, each = n.i))
X <- rmvnorm(n, sigma = C(0.5, p)) + mu.tilde[y, ]
B <- cbind(W[1, ] - W[2, ], W[2, ] - W[3, ])
} else {
stop("Unknown model nr.")
}
list(X = X, y = y, B = qr.Q(qr(B)))
}
# # Model 1
# fit <- with(dataset(1), {
# with(GEP(X, y, 'lda'), POI(lhs, rhs, ncol(B), method = 'POI-C'))
# })
# fit <- with(dataset(1), {
# with(GEP(X, y, 'lda'), POI(lhs, rhs, ncol(B), method = 'FastPOI-C'))
# })
# fit <- with(dataset(1), {
# with(GEP(X, y, 'lda'), POI(lhs, rhs, ncol(B), method = 'POI-C', use.C = TRUE))
# })
# fit <- with(dataset(1), {
# with(GEP(X, y, 'lda'), POI(lhs, rhs, ncol(B), method = 'FastPOI-C', use.C = TRUE))
# })
# head(fit$vectors, 10)
count <- 0
nr.reps <- 100
sim <- replicate(nr.reps, {
res <- double(0)
for (model.nr in 1:5) {
for (method in c('POI-C', 'FastPOI-C')) {
for (use.C in c(FALSE, TRUE)) {
dist <- with(dataset(model.nr), {
fit <- with(GEP(X, y, 'lda'), {
POI(lhs, rhs, ncol(B), method = 'POI-C', use.C = use.C)
})
# dist.subspace(B, fit$vectors, is.ortho = FALSE, normalize = TRUE)
dist.projection(B, fit$vectors)
})
names(dist) <- paste('M', model.nr, '-', method, '-', use.C)
res <- c(res, dist)
}
}
}
cat("Counter", (count <<- count + 1), "/", nr.reps, "\n")
res
})
(stats <- as.matrix(rowMeans(sim)))

View File

@ -14,7 +14,7 @@
#' Reduction and Variable Selection" By Xin Chen, Changliang Zou and #' Reduction and Variable Selection" By Xin Chen, Changliang Zou and
#' R. Dennis Cook. #' R. Dennis Cook.
#' #'
GEP <- function(X, y, method = c('pfc', 'pca', 'sir', 'save'), ..., GEP <- function(X, y, method = c('pfc', 'pca', 'sir', 'lda'), ...,
nr.slices = 10, ensamble = list(abs, identity, function(x) x^2) nr.slices = 10, ensamble = list(abs, identity, function(x) x^2)
) { ) {
method <- match.arg(method) method <- match.arg(method)
@ -52,6 +52,21 @@ GEP <- function(X, y, method = c('pfc', 'pca', 'sir', 'save'), ...,
}))) })))
# Sample covariance # Sample covariance
rhs <- cov(X) rhs <- cov(X)
} else if (method == 'lda') {
# TODO: check this properly!!! (Maybe a bit better implementation and/or
# some theoretical inaccuracies)
y <- as.factor(y)
# group means
mu <- as.matrix(aggregate(X, list(y), mean)[, -1])
# between group covariance Sigma.B = Cov(E[X | y])
lhs <- ((nrow(mu) - 1) / nrow(mu)) * cov(mu)
# within group covariance (Sigma.T = Sigma.B + Sigma.W)
# with Sigma.W = E(Cov(X | y)) and Sigma.T = Cov(X)
rhs <- (((nrow(X) - 1) / nrow(X)) * cov(X)) - lhs
} else { } else {
stop('Not implemented!') stop('Not implemented!')
} }

View File

@ -1,13 +1,13 @@
solve.gep <- function(A, B, d = nrow(A)) { solve.gep <- function(A, B, d = nrow(A)) {
isrB <- matpow(B, -0.5) isrB <- matpow(B, -0.5)
if (requireNamespace("RSpectra", quietly = TRUE) && d < nrow(A)) { if ((d < nrow(A)) && requireNamespace("RSpectra", quietly = TRUE)) {
eig <- RSpectra::eigs_sym(isrB %*% A %*% isrB, d) eig <- RSpectra::eigs_sym(isrB %*% A %*% isrB, d)
} else { } else {
eig <- eigen(isrB %*% A %*% isrB, symmetric = TRUE) eig <- eigen(isrB %*% A %*% isrB, symmetric = TRUE)
} }
list(vectors = isrB %*% eig$vectors, values = eig$values) list(vectors = isrB %*% eig$vectors[, 1:d, drop = FALSE], values = eig$values)
} }
POI.lambda.max <- function(A, d = 1L, method = c('POI-C', 'POI-L', 'FastPOI-C', 'FastPOI-L')) { POI.lambda.max <- function(A, d = 1L, method = c('POI-C', 'POI-L', 'FastPOI-C', 'FastPOI-L')) {
@ -37,16 +37,28 @@ POI.lambda.max <- function(A, d = 1L, method = c('POI-C', 'POI-L', 'FastPOI-C',
#' Penalysed Orthogonal Iteration. #' Penalysed Orthogonal Iteration.
#' #'
#' @param lambda Default: 0.75 * lambda_max for FastPOI-C method. #' @param A Left hand side of GEP
#' #' @param B right hand side of GEP
#' @note use.C required 'poi.so' beeing dynamicaly loaded. #' @param d number of eigen-vectors, -values to be computed coresponding to the
#' dyn.load('../tensor_predictors/poi.so') #' largest \eqn{d} eigenvalues of the penalized GEP
#' @param sparsity scaling for max penalty term in [0, 1) where 0 corresponds to
#' no penalization and 1 leads to the trivial solution. (default: 1 / 2)
#' @param method ether \code{"POI-C"} or \code{"FastPOI-C"} where
#' POI-C: Penalized Orthogonal Iteration with Coordinate-wise Lasso penalty
#' FastPOI-C: Fast POI with Coordinate-wise Lasso penalty
#' where the Coordinate-wise Lasso is a group Lasso penalty.
#' @param iter.outer maximum number of orthogonal iterations (ignored by Fast
#' methods)
#' @param iter.inner maximum number of inner iterations
#' @param tol numerical tolerance. Absolute values smaller than \code{tol} are
#' treated as 0.
#' #'
#' @export #' @export
POI <- function(A, B, d = 1L, sparsity = 0.5, POI <- function(A, B, d = 1L, sparsity = 0.5,
method = c('POI-C', 'FastPOI-C'), # TODO: Maybe implement the the lasso loss too method = c('POI-C', 'FastPOI-C'), # TODO: Maybe implement Lasso penalty too
iter.outer = 100L, iter.inner = 500L, iter.outer = 100L, iter.inner = 500L,
tol = sqrt(.Machine$double.eps) tol = sqrt(.Machine$double.eps),
use.C = FALSE
) { ) {
method <- match.arg(method) method <- match.arg(method)
@ -78,29 +90,36 @@ POI <- function(A, B, d = 1L, sparsity = 0.5,
} else { } else {
Delta <- eigen(A, symmetric = TRUE)$vectors[, 1:d, drop = FALSE] Delta <- eigen(A, symmetric = TRUE)$vectors[, 1:d, drop = FALSE]
} }
Q <- Delta
Z <- matrix(0, nrow(Q), ncol(Q))
# Outer loop (iteration) # In case of fast POI, only one iteration
for (i in seq_len(iter.outer)) { if (startsWith(method, 'Fast')) {
Q.last <- Q # for break condition # Step 1: Optimize (inner loop, a.k.a. coordinate wise penalization)
if (use.C) {
Z <- .Call('FastPOI_C_sub', B, Delta, lambda,
as.integer(iter.inner), tol,
PACKAGE = 'tensorPredictors')
} else {
# Initial value
Z <- Delta
# Step 1: Solve B Z_i = A Q_{i-1} for Z_i # Note, the R implementation does NOT use a cyclic update instead
Delta <- crossprod(A, Q) # performs coordinate penalization in parallel
# Inner Loop
for (j in seq_len(iter.inner)) { for (j in seq_len(iter.inner)) {
Z.last <- Z # for break condition Z.last <- Z # for break condition
# TODO: it seems (in general) that the cyclic update is actually needed!
traces <- Delta - B %*% Z + diag(B) * Z traces <- Delta - B %*% Z + diag(B) * Z
Z <- traces * (pmax(1 - lambda / sqrt(rowSums(traces^2)), 0) / diag(B)) Z <- traces * (pmax(1 - lambda / sqrt(rowSums(traces^2)), 0) / diag(B))
# Inner break condition # Inner break condition (second condition safeguards against devergence)
if (norm(Z.last - Z, 'F') < tol) { diff <- norm(Z.last - Z, 'F')
if (diff < tol || 1 < tol * diff) {
break break
} }
} }
}
# Step 2: QR decomposition of Z_i = Q_i R_i. # Step 2: QR decomposition (same as below)
if (d == 1L) { if (d == 1L) {
Z.norm <- norm(Z, 'F') Z.norm <- norm(Z, 'F')
if (Z.norm < tol) { if (Z.norm < tol) {
@ -120,94 +139,92 @@ POI <- function(A, B, d = 1L, sparsity = 0.5,
Q <- qr.Q(qr(Z)) Q <- qr.Q(qr(Z))
} }
} }
} else { # POI-C
Q <- Delta
Z <- matrix(0, nrow(Q), ncol(Q))
# In case of fast POI, only one iteration # Outer loop (iteration)
if (startsWith(method, 'Fast')) { for (i in seq_len(iter.outer)) {
break Q.last <- Q # for break condition
}
if (norm(tcrossprod(Q, Q) - tcrossprod(Q.last, Q.last), 'F') < tol) { # Step 1: Solve B Z_i = A Q_{i-1} for Z_i
Delta <- crossprod(A, Q)
# Inner Loop
if (use.C) {
Z <- .Call('FastPOI_C_sub', B, Delta, lambda,
as.integer(iter.inner), tol,
PACKAGE = 'tensorPredictors')
} else {
# Note, the R implementation does NOT use a cyclic update instead
# performs coordinate penalization in parallel
for (j in seq_len(iter.inner)) {
Z.last <- Z # for break condition
# TODO: it seems (in general) that the cyclic update is actually needed!
traces <- Delta - B %*% Z + diag(B) * Z
Z <- traces * (pmax(1 - lambda / sqrt(rowSums(traces^2)), 0) / diag(B))
# Inner break condition (second condition safeguards against devergence)
diff <- norm(Z.last - Z, 'F')
if (diff < tol || 1 < tol * diff) {
break break
} }
} }
}
# TODO: Finish with transformation to original solution U of # Step 2: QR decomposition of Z_i = Q_i R_i.
# A U = B U Lambda. if (d == 1L) {
Z.norm <- norm(Z, 'F')
if (Z.norm < tol) {
Q <- matrix(0, p, d)
} else {
Q <- Z / Z.norm
}
} else {
# Detect zero columns.
zero.col <- colSums(abs(Z)) < tol
if (all(zero.col)) {
Q <- matrix(0, nrow(Z), ncol(Z))
} else if (any(zero.col)) {
Q <- matrix(0, nrow(Z), ncol(Z))
Q[, !zero.col] <- qr.Q(qr(Z[, !zero.col]))
} else {
Q <- qr.Q(qr(Z))
}
}
# Outer break condition || Q Q' - Q.last Q.last' || < tol
# The used form is equivalent but much faster for d << p
# The following holds in general for two matrices A, B of dim p x d
# || A A' - B B' ||^2 = || A' A ||^2 - 2 || A' B ||^2 + || B' B ||^2
# for the Frobenius norm ||.||. The computational cost of the left side
# is O(p^2 d) while the right side has O(p d^2).
tr <- sum(crossprod(Q)^2) -
2 * sum(crossprod(Q, Q.last)^2) +
sum(crossprod(Q.last)^2)
if (sqrt(max(0, tr)) < tol) {
break
}
}
}
# Reconstruct solution of the original GEP by solving
# (Q' A Q) T = (Q' B Q) T D
# for T and D which gives the solution U = Q T and Lambda = D.
if (1 < d) {
eig <- solve.gep(crossprod(Q, A) %*% Q, crossprod(Q, B) %*% Q)
vectors <- Q %*% eig$vectors
values <- eig$values
} else {
vectors <- Q
values <- c((crossprod(Q, A) %*% Q) / (crossprod(Q, B) %*% Q))
}
structure(list( structure(list(
Z = Z, Q = Q, vectors = vectors,
values = values,
lambda = lambda, lambda = lambda,
call = match.call() call = match.call()
), class = c("tensor_predictor", "POI")) ), class = c("tensor_predictor", "POI"))
} }
# POI.bak <- function(A, B, d,
# lambda = 0.75 * sqrt(max(rowSums(Delta^2))),
# update.tol = 1e-3,
# tol = 100 * .Machine$double.eps,
# maxit = 400L,
# # maxit.outer = maxit,
# maxit.inner = maxit,
# use.C = FALSE,
# method = 'FastPOI-C') {
# # TODO:
# stopifnot(method == 'FastPOI-C')
# if (requireNamespace("RSpectra", quietly = TRUE)) {
# Delta <- RSpectra::eigs_sym(A, d)$vectors
# } else {
# Delta <- eigen(A, symmetric = TRUE)$vectors[, 1:d, drop = FALSE]
# }
# # Set initial value.
# Z <- Delta
# # Step 1: Optimization.
# # The "inner" optimization loop, aka repeated coordinate optimization.
# if (use.C) {
# Z <- .Call('FastPOI_C_sub', A, B, Delta, lambda, as.integer(maxit.inner),
# PACKAGE = 'tensorPredictors')
# } else {
# p <- nrow(Z)
# for (iter.inner in 1:maxit.inner) {
# Zold <- Z
# for (g in 1:p) {
# a <- Delta[g, ] - B[g, ] %*% Z + B[g, g] * Z[g, ]
# a_norm <- sqrt(sum(a^2))
# if (a_norm > lambda) {
# Z[g, ] <- a * ((1 - lambda / a_norm) / B[g, g])
# } else {
# Z[g, ] <- 0
# }
# }
# if (norm(Z - Zold, 'F') < update.tol) {
# break
# }
# }
# }
# # Step 2: QR decomposition.
# if (d == 1L) {
# Z_norm <- sqrt(sum(Z^2))
# if (Z_norm < tol) {
# Q <- matrix(0, p, d)
# } else {
# Q <- Z / Z_norm
# }
# } else {
# # Detect zero columns.
# zeroColumn <- colSums(abs(Z)) < tol
# if (all(zeroColumn)) {
# Q <- matrix(0, p, d)
# } else if (any(zeroColumn)) {
# Q <- matrix(0, p, d)
# Q[, !zeroColumn] <- qr.Q(qr(Z))
# } else {
# Q <- qr.Q(qr(Z))
# }
# }
# list(Z = Z, Q = Q, iter.inner = if (use.C) NA else iter.inner,
# lambda = lambda)
# }

View File

@ -0,0 +1,27 @@
#' Porjection Distance of two matrices
#'
#' Defined as sine of the maximum principal angle between the column spaces
#' of the matrices
#' max{ sin theta_i, i = 1, ..., min(d1, d2) }
#'
#' @param A,B matrices of size \eqn{p\times d_1} and \eqn{p\times d_2}.
#'
#' @export
dist.projection <- function(A, B, is.ortho = FALSE,
tol = sqrt(.Machine$double.eps)
) {
if (!is.ortho) {
qrA <- qr(A, tol)
A <- qr.Q(qrA)[, seq_len(qrA$rank), drop = FALSE]
qrB <- qr(B, tol)
B <- qr.Q(qrB)[, seq_len(qrB$rank), drop = FALSE]
}
if (ncol(A) == 0L && ncol(B) == 0L) {
0
} else if (ncol(A) == 0L || ncol(B) == 0L) {
1
} else {
sin(acos(min(c(La.svd(crossprod(A, B), 0, 0)$d, 1))))
}
}

View File

@ -14,17 +14,29 @@
#' subspaces of different dimensions" <arXiv:1407.0900> #' subspaces of different dimensions" <arXiv:1407.0900>
#' #'
#' @export #' @export
dist.subspace <- function (A, B, is.ortho = FALSE, normalize = FALSE) { dist.subspace <- function (A, B, is.ortho = FALSE, normalize = FALSE,
tol = sqrt(.Machine$double.eps)
) {
if (!is.matrix(A)) A <- as.matrix(A) if (!is.matrix(A)) A <- as.matrix(A)
if (!is.matrix(B)) B <- as.matrix(B) if (!is.matrix(B)) B <- as.matrix(B)
if (is.ortho) { if (!is.ortho) {
qrA <- qr(A, tol)
if (qrA$rank < ncol(A)) {
A <- qr.Q(qrA)[, abs(diag(qr.R(qrA))) > tol, drop = FALSE]
} else {
A <- qr.Q(qrA)
}
qrB <- qr(B, tol)
if (qrB$rank < ncol(B)) {
B <- qr.Q(qrB)[, abs(diag(qr.R(qrB))) > tol, drop = FALSE]
} else {
B <- qr.Q(qrB)
}
}
PA <- tcrossprod(A, A) PA <- tcrossprod(A, A)
PB <- tcrossprod(B, B) PB <- tcrossprod(B, B)
} else {
PA <- A %*% solve(t(A) %*% A, t(A))
PB <- B %*% solve(t(B) %*% B, t(B))
}
if (normalize) { if (normalize) {
rankSum <- ncol(A) + ncol(B) rankSum <- ncol(A) + ncol(B)

View File

@ -1,10 +1,18 @@
#include <math.h> #include <math.h>
/**
*
* NOTE: CURRENTLY NOT IN USE!
*
*/
#include <R.h> #include <R.h>
#include <Rinternals.h> #include <Rinternals.h>
/* invoced by .Call */ /* invoced by .Call */
extern SEXP FastPOI_C_sub(SEXP in_A, SEXP in_B, SEXP in_Delta, SEXP in_lambda, SEXP in_maxit) { extern SEXP FastPOI_C_sub(SEXP in_B, SEXP in_Delta,
SEXP in_lambda, SEXP in_maxit, SEXP in_tol
) {
int i, j, k, g; int i, j, k, g;
int p = nrows(in_Delta); int p = nrows(in_Delta);
@ -16,10 +24,10 @@ extern SEXP FastPOI_C_sub(SEXP in_A, SEXP in_B, SEXP in_Delta, SEXP in_lambda, S
double* Zold = (double*)R_alloc(p * d, sizeof(double)); double* Zold = (double*)R_alloc(p * d, sizeof(double));
double* Delta = REAL(in_Delta); double* Delta = REAL(in_Delta);
double* a = (double*)R_alloc(d, sizeof(double)); double* a = (double*)R_alloc(d, sizeof(double));
double* A = REAL(in_A);
double* B = REAL(in_B); double* B = REAL(in_B);
double a_norm; double a_norm;
double lambda = asReal(in_lambda); double lambda = asReal(in_lambda);
double tol = asReal(in_tol);
double scale; double scale;
double res; double res;
@ -55,7 +63,6 @@ extern SEXP FastPOI_C_sub(SEXP in_A, SEXP in_B, SEXP in_Delta, SEXP in_lambda, S
for (j = 0; j < d; ++j) { for (j = 0; j < d; ++j) {
Z[j * p + g] = scale * a[j]; Z[j * p + g] = scale * a[j];
} }
} }
// Copy Z to Zold and check break condition. // Copy Z to Zold and check break condition.
@ -64,7 +71,7 @@ extern SEXP FastPOI_C_sub(SEXP in_A, SEXP in_B, SEXP in_Delta, SEXP in_lambda, S
res += (Z[j] - Zold[j]) * (Z[j] - Zold[j]); res += (Z[j] - Zold[j]) * (Z[j] - Zold[j]);
Zold[j] = Z[j]; Zold[j] = Z[j];
} }
if (res < 1e-6) { if (res < tol) {
break; break;
} }