diff --git a/simulations/simulation_poi.R b/simulations/simulation_poi.R new file mode 100644 index 0000000..49c3873 --- /dev/null +++ b/simulations/simulation_poi.R @@ -0,0 +1,111 @@ + +################################################################################ +### LDA (sparse Linear Discrimina Analysis) ### +################################################################################ +devtools::load_all('tensorPredictors/') + +C <- function(rho, p) { + res <- matrix(rho, p, p) + diag(res) <- 1 + res +} +R <- function(rho, p) { + rho^abs(outer(1:p, 1:p, `-`)) +} + +dataset <- function(nr) { + K <- 3 # Nr. Groups + n.i <- 30 # Sample group size for each of the K groups + n <- K * n.i # Sample size + p <- 200 # Nr. of predictors + + # Generate test data + V <- cbind(matrix(c( + 2, 1, 2, 1, 2, + 1,-1, 1,-1, 1, + 0, 1,-1, 1, 0 + ), 3, 5, byrow = TRUE), + matrix(0, 3, p - 5) + ) + W <- cbind(matrix(c( + -1, 1, 1, 1, 1, + 1,-1, 1,-1, 1, + 1, 1,-1, 1, 0 + ), 3, 5, byrow = TRUE), + matrix(0, 3, p - 5) + ) + + if (nr == 1) { # Model 1 + y <- factor(rep(1:K, each = n.i)) + X <- rmvnorm(n, mu = rep(0, p)) + V[y, ] + B <- cbind(V[1, ] - V[2, ], V[2, ] - V[3, ]) + } else if (nr == 2) { # Model 2 + y <- factor(rep(1:K, each = n.i)) + X <- rmvnorm(n, sigma = C(0.5, p)) + (V %*% C(0.5, p))[y, ] + B <- cbind(V[1, ] - V[2, ], V[2, ] - V[3, ]) + } else if (nr == 3) { # Model 3 + y <- factor(rep(1:K, each = n.i)) + X <- rmvnorm(n, sigma = R(0.5, p)) + (V %*% R(0.5, p))[y, ] + B <- cbind(V[1, ] - V[2, ], V[2, ] - V[3, ]) + } else if (nr == 4) { # Model 4 + y <- factor(rep(1:K, each = n.i)) + X <- rmvnorm(n, sigma = C(0.5, p)) + (W %*% C(0.5, p))[y, ] + B <- cbind(W[1, ] - W[2, ], W[2, ] - W[3, ]) + } else if (nr == 5) { # Model 5 + K <- 4 + n <- K * n.i + + W.tilde <- 2 * rbind(W, colMeans(W)) + mu.tilde <- W.tilde %*% C(0.5, p) + + y <- factor(rep(1:K, each = n.i)) + X <- rmvnorm(n, sigma = C(0.5, p)) + mu.tilde[y, ] + + B <- cbind(W[1, ] - W[2, ], W[2, ] - W[3, ]) + } else { + stop("Unknown model nr.") + } + + list(X = X, y = y, B = qr.Q(qr(B))) +} + +# # Model 1 +# fit <- with(dataset(1), { +# with(GEP(X, y, 'lda'), POI(lhs, rhs, ncol(B), method = 'POI-C')) +# }) +# fit <- with(dataset(1), { +# with(GEP(X, y, 'lda'), POI(lhs, rhs, ncol(B), method = 'FastPOI-C')) +# }) +# fit <- with(dataset(1), { +# with(GEP(X, y, 'lda'), POI(lhs, rhs, ncol(B), method = 'POI-C', use.C = TRUE)) +# }) +# fit <- with(dataset(1), { +# with(GEP(X, y, 'lda'), POI(lhs, rhs, ncol(B), method = 'FastPOI-C', use.C = TRUE)) +# }) + +# head(fit$vectors, 10) + +count <- 0 +nr.reps <- 100 +sim <- replicate(nr.reps, { + res <- double(0) + for (model.nr in 1:5) { + for (method in c('POI-C', 'FastPOI-C')) { + for (use.C in c(FALSE, TRUE)) { + dist <- with(dataset(model.nr), { + fit <- with(GEP(X, y, 'lda'), { + POI(lhs, rhs, ncol(B), method = 'POI-C', use.C = use.C) + }) + # dist.subspace(B, fit$vectors, is.ortho = FALSE, normalize = TRUE) + dist.projection(B, fit$vectors) + }) + names(dist) <- paste('M', model.nr, '-', method, '-', use.C) + res <- c(res, dist) + } + } + } + cat("Counter", (count <<- count + 1), "/", nr.reps, "\n") + res +}) + +(stats <- as.matrix(rowMeans(sim))) diff --git a/tensorPredictors/R/GEP.R b/tensorPredictors/R/GEP.R index a0639c5..c079bcf 100644 --- a/tensorPredictors/R/GEP.R +++ b/tensorPredictors/R/GEP.R @@ -14,7 +14,7 @@ #' Reduction and Variable Selection" By Xin Chen, Changliang Zou and #' R. Dennis Cook. #' -GEP <- function(X, y, method = c('pfc', 'pca', 'sir', 'save'), ..., +GEP <- function(X, y, method = c('pfc', 'pca', 'sir', 'lda'), ..., nr.slices = 10, ensamble = list(abs, identity, function(x) x^2) ) { method <- match.arg(method) @@ -52,6 +52,21 @@ GEP <- function(X, y, method = c('pfc', 'pca', 'sir', 'save'), ..., }))) # Sample covariance rhs <- cov(X) + } else if (method == 'lda') { + # TODO: check this properly!!! (Maybe a bit better implementation and/or + # some theoretical inaccuracies) + + y <- as.factor(y) + + # group means + mu <- as.matrix(aggregate(X, list(y), mean)[, -1]) + + # between group covariance Sigma.B = Cov(E[X | y]) + lhs <- ((nrow(mu) - 1) / nrow(mu)) * cov(mu) + + # within group covariance (Sigma.T = Sigma.B + Sigma.W) + # with Sigma.W = E(Cov(X | y)) and Sigma.T = Cov(X) + rhs <- (((nrow(X) - 1) / nrow(X)) * cov(X)) - lhs } else { stop('Not implemented!') } diff --git a/tensorPredictors/R/POI.R b/tensorPredictors/R/POI.R index 8c73e79..ef5235b 100644 --- a/tensorPredictors/R/POI.R +++ b/tensorPredictors/R/POI.R @@ -1,13 +1,13 @@ solve.gep <- function(A, B, d = nrow(A)) { isrB <- matpow(B, -0.5) - if (requireNamespace("RSpectra", quietly = TRUE) && d < nrow(A)) { + if ((d < nrow(A)) && requireNamespace("RSpectra", quietly = TRUE)) { eig <- RSpectra::eigs_sym(isrB %*% A %*% isrB, d) } else { eig <- eigen(isrB %*% A %*% isrB, symmetric = TRUE) } - list(vectors = isrB %*% eig$vectors, values = eig$values) + list(vectors = isrB %*% eig$vectors[, 1:d, drop = FALSE], values = eig$values) } POI.lambda.max <- function(A, d = 1L, method = c('POI-C', 'POI-L', 'FastPOI-C', 'FastPOI-L')) { @@ -37,16 +37,28 @@ POI.lambda.max <- function(A, d = 1L, method = c('POI-C', 'POI-L', 'FastPOI-C', #' Penalysed Orthogonal Iteration. #' -#' @param lambda Default: 0.75 * lambda_max for FastPOI-C method. -#' -#' @note use.C required 'poi.so' beeing dynamicaly loaded. -#' dyn.load('../tensor_predictors/poi.so') +#' @param A Left hand side of GEP +#' @param B right hand side of GEP +#' @param d number of eigen-vectors, -values to be computed coresponding to the +#' largest \eqn{d} eigenvalues of the penalized GEP +#' @param sparsity scaling for max penalty term in [0, 1) where 0 corresponds to +#' no penalization and 1 leads to the trivial solution. (default: 1 / 2) +#' @param method ether \code{"POI-C"} or \code{"FastPOI-C"} where +#' POI-C: Penalized Orthogonal Iteration with Coordinate-wise Lasso penalty +#' FastPOI-C: Fast POI with Coordinate-wise Lasso penalty +#' where the Coordinate-wise Lasso is a group Lasso penalty. +#' @param iter.outer maximum number of orthogonal iterations (ignored by Fast +#' methods) +#' @param iter.inner maximum number of inner iterations +#' @param tol numerical tolerance. Absolute values smaller than \code{tol} are +#' treated as 0. #' #' @export POI <- function(A, B, d = 1L, sparsity = 0.5, - method = c('POI-C', 'FastPOI-C'), # TODO: Maybe implement the the lasso loss too + method = c('POI-C', 'FastPOI-C'), # TODO: Maybe implement Lasso penalty too iter.outer = 100L, iter.inner = 500L, - tol = sqrt(.Machine$double.eps) + tol = sqrt(.Machine$double.eps), + use.C = FALSE ) { method <- match.arg(method) @@ -78,29 +90,36 @@ POI <- function(A, B, d = 1L, sparsity = 0.5, } else { Delta <- eigen(A, symmetric = TRUE)$vectors[, 1:d, drop = FALSE] } - Q <- Delta - Z <- matrix(0, nrow(Q), ncol(Q)) - # Outer loop (iteration) - for (i in seq_len(iter.outer)) { - Q.last <- Q # for break condition + # In case of fast POI, only one iteration + if (startsWith(method, 'Fast')) { + # Step 1: Optimize (inner loop, a.k.a. coordinate wise penalization) + if (use.C) { + Z <- .Call('FastPOI_C_sub', B, Delta, lambda, + as.integer(iter.inner), tol, + PACKAGE = 'tensorPredictors') + } else { + # Initial value + Z <- Delta - # Step 1: Solve B Z_i = A Q_{i-1} for Z_i - Delta <- crossprod(A, Q) - # Inner Loop - for (j in seq_len(iter.inner)) { - Z.last <- Z # for break condition + # Note, the R implementation does NOT use a cyclic update instead + # performs coordinate penalization in parallel + for (j in seq_len(iter.inner)) { + Z.last <- Z # for break condition - traces <- Delta - B %*% Z + diag(B) * Z - Z <- traces * (pmax(1 - lambda / sqrt(rowSums(traces^2)), 0) / diag(B)) + # TODO: it seems (in general) that the cyclic update is actually needed! + traces <- Delta - B %*% Z + diag(B) * Z + Z <- traces * (pmax(1 - lambda / sqrt(rowSums(traces^2)), 0) / diag(B)) - # Inner break condition - if (norm(Z.last - Z, 'F') < tol) { - break + # Inner break condition (second condition safeguards against devergence) + diff <- norm(Z.last - Z, 'F') + if (diff < tol || 1 < tol * diff) { + break + } } } - # Step 2: QR decomposition of Z_i = Q_i R_i. + # Step 2: QR decomposition (same as below) if (d == 1L) { Z.norm <- norm(Z, 'F') if (Z.norm < tol) { @@ -120,94 +139,92 @@ POI <- function(A, B, d = 1L, sparsity = 0.5, Q <- qr.Q(qr(Z)) } } + } else { # POI-C + Q <- Delta + Z <- matrix(0, nrow(Q), ncol(Q)) - # In case of fast POI, only one iteration - if (startsWith(method, 'Fast')) { - break - } - if (norm(tcrossprod(Q, Q) - tcrossprod(Q.last, Q.last), 'F') < tol) { - break + # Outer loop (iteration) + for (i in seq_len(iter.outer)) { + Q.last <- Q # for break condition + + # Step 1: Solve B Z_i = A Q_{i-1} for Z_i + Delta <- crossprod(A, Q) + + # Inner Loop + if (use.C) { + Z <- .Call('FastPOI_C_sub', B, Delta, lambda, + as.integer(iter.inner), tol, + PACKAGE = 'tensorPredictors') + } else { + # Note, the R implementation does NOT use a cyclic update instead + # performs coordinate penalization in parallel + for (j in seq_len(iter.inner)) { + Z.last <- Z # for break condition + + # TODO: it seems (in general) that the cyclic update is actually needed! + traces <- Delta - B %*% Z + diag(B) * Z + Z <- traces * (pmax(1 - lambda / sqrt(rowSums(traces^2)), 0) / diag(B)) + + # Inner break condition (second condition safeguards against devergence) + diff <- norm(Z.last - Z, 'F') + if (diff < tol || 1 < tol * diff) { + break + } + } + } + + # Step 2: QR decomposition of Z_i = Q_i R_i. + if (d == 1L) { + Z.norm <- norm(Z, 'F') + if (Z.norm < tol) { + Q <- matrix(0, p, d) + } else { + Q <- Z / Z.norm + } + } else { + # Detect zero columns. + zero.col <- colSums(abs(Z)) < tol + if (all(zero.col)) { + Q <- matrix(0, nrow(Z), ncol(Z)) + } else if (any(zero.col)) { + Q <- matrix(0, nrow(Z), ncol(Z)) + Q[, !zero.col] <- qr.Q(qr(Z[, !zero.col])) + } else { + Q <- qr.Q(qr(Z)) + } + } + + # Outer break condition || Q Q' - Q.last Q.last' || < tol + # The used form is equivalent but much faster for d << p + # The following holds in general for two matrices A, B of dim p x d + # || A A' - B B' ||^2 = || A' A ||^2 - 2 || A' B ||^2 + || B' B ||^2 + # for the Frobenius norm ||.||. The computational cost of the left side + # is O(p^2 d) while the right side has O(p d^2). + tr <- sum(crossprod(Q)^2) - + 2 * sum(crossprod(Q, Q.last)^2) + + sum(crossprod(Q.last)^2) + if (sqrt(max(0, tr)) < tol) { + break + } } } - # TODO: Finish with transformation to original solution U of - # A U = B U Lambda. + # Reconstruct solution of the original GEP by solving + # (Q' A Q) T = (Q' B Q) T D + # for T and D which gives the solution U = Q T and Lambda = D. + if (1 < d) { + eig <- solve.gep(crossprod(Q, A) %*% Q, crossprod(Q, B) %*% Q) + vectors <- Q %*% eig$vectors + values <- eig$values + } else { + vectors <- Q + values <- c((crossprod(Q, A) %*% Q) / (crossprod(Q, B) %*% Q)) + } structure(list( - Z = Z, Q = Q, + vectors = vectors, + values = values, lambda = lambda, call = match.call() ), class = c("tensor_predictor", "POI")) } - - -# POI.bak <- function(A, B, d, -# lambda = 0.75 * sqrt(max(rowSums(Delta^2))), -# update.tol = 1e-3, -# tol = 100 * .Machine$double.eps, -# maxit = 400L, -# # maxit.outer = maxit, -# maxit.inner = maxit, -# use.C = FALSE, -# method = 'FastPOI-C') { - -# # TODO: -# stopifnot(method == 'FastPOI-C') - -# if (requireNamespace("RSpectra", quietly = TRUE)) { -# Delta <- RSpectra::eigs_sym(A, d)$vectors -# } else { -# Delta <- eigen(A, symmetric = TRUE)$vectors[, 1:d, drop = FALSE] -# } - -# # Set initial value. -# Z <- Delta - -# # Step 1: Optimization. -# # The "inner" optimization loop, aka repeated coordinate optimization. -# if (use.C) { -# Z <- .Call('FastPOI_C_sub', A, B, Delta, lambda, as.integer(maxit.inner), -# PACKAGE = 'tensorPredictors') -# } else { -# p <- nrow(Z) -# for (iter.inner in 1:maxit.inner) { -# Zold <- Z -# for (g in 1:p) { -# a <- Delta[g, ] - B[g, ] %*% Z + B[g, g] * Z[g, ] -# a_norm <- sqrt(sum(a^2)) -# if (a_norm > lambda) { -# Z[g, ] <- a * ((1 - lambda / a_norm) / B[g, g]) -# } else { -# Z[g, ] <- 0 -# } -# } -# if (norm(Z - Zold, 'F') < update.tol) { -# break -# } -# } -# } - -# # Step 2: QR decomposition. -# if (d == 1L) { -# Z_norm <- sqrt(sum(Z^2)) -# if (Z_norm < tol) { -# Q <- matrix(0, p, d) -# } else { -# Q <- Z / Z_norm -# } -# } else { -# # Detect zero columns. -# zeroColumn <- colSums(abs(Z)) < tol -# if (all(zeroColumn)) { -# Q <- matrix(0, p, d) -# } else if (any(zeroColumn)) { -# Q <- matrix(0, p, d) -# Q[, !zeroColumn] <- qr.Q(qr(Z)) -# } else { -# Q <- qr.Q(qr(Z)) -# } -# } - -# list(Z = Z, Q = Q, iter.inner = if (use.C) NA else iter.inner, -# lambda = lambda) -# } diff --git a/tensorPredictors/R/dist_projection.R b/tensorPredictors/R/dist_projection.R new file mode 100644 index 0000000..7a5811d --- /dev/null +++ b/tensorPredictors/R/dist_projection.R @@ -0,0 +1,27 @@ +#' Porjection Distance of two matrices +#' +#' Defined as sine of the maximum principal angle between the column spaces +#' of the matrices +#' max{ sin theta_i, i = 1, ..., min(d1, d2) } +#' +#' @param A,B matrices of size \eqn{p\times d_1} and \eqn{p\times d_2}. +#' +#' @export +dist.projection <- function(A, B, is.ortho = FALSE, + tol = sqrt(.Machine$double.eps) +) { + if (!is.ortho) { + qrA <- qr(A, tol) + A <- qr.Q(qrA)[, seq_len(qrA$rank), drop = FALSE] + qrB <- qr(B, tol) + B <- qr.Q(qrB)[, seq_len(qrB$rank), drop = FALSE] + } + + if (ncol(A) == 0L && ncol(B) == 0L) { + 0 + } else if (ncol(A) == 0L || ncol(B) == 0L) { + 1 + } else { + sin(acos(min(c(La.svd(crossprod(A, B), 0, 0)$d, 1)))) + } +} diff --git a/tensorPredictors/R/dist_subspace.R b/tensorPredictors/R/dist_subspace.R index cb4dc6c..19d23f8 100644 --- a/tensorPredictors/R/dist_subspace.R +++ b/tensorPredictors/R/dist_subspace.R @@ -14,18 +14,30 @@ #' subspaces of different dimensions" #' #' @export -dist.subspace <- function (A, B, is.ortho = FALSE, normalize = FALSE) { +dist.subspace <- function (A, B, is.ortho = FALSE, normalize = FALSE, + tol = sqrt(.Machine$double.eps) +) { if (!is.matrix(A)) A <- as.matrix(A) if (!is.matrix(B)) B <- as.matrix(B) - if (is.ortho) { - PA <- tcrossprod(A, A) - PB <- tcrossprod(B, B) - } else { - PA <- A %*% solve(t(A) %*% A, t(A)) - PB <- B %*% solve(t(B) %*% B, t(B)) + if (!is.ortho) { + qrA <- qr(A, tol) + if (qrA$rank < ncol(A)) { + A <- qr.Q(qrA)[, abs(diag(qr.R(qrA))) > tol, drop = FALSE] + } else { + A <- qr.Q(qrA) + } + qrB <- qr(B, tol) + if (qrB$rank < ncol(B)) { + B <- qr.Q(qrB)[, abs(diag(qr.R(qrB))) > tol, drop = FALSE] + } else { + B <- qr.Q(qrB) + } } + PA <- tcrossprod(A, A) + PB <- tcrossprod(B, B) + if (normalize) { rankSum <- ncol(A) + ncol(B) c <- 1 / sqrt(min(rankSum, 2 * nrow(A) - rankSum)) diff --git a/tensorPredictors/src/poi.c b/tensorPredictors/src/poi.c index 30fa8a9..de3dbc1 100644 --- a/tensorPredictors/src/poi.c +++ b/tensorPredictors/src/poi.c @@ -1,10 +1,18 @@ #include +/** + * + * NOTE: CURRENTLY NOT IN USE! + * + */ + #include #include /* invoced by .Call */ -extern SEXP FastPOI_C_sub(SEXP in_A, SEXP in_B, SEXP in_Delta, SEXP in_lambda, SEXP in_maxit) { +extern SEXP FastPOI_C_sub(SEXP in_B, SEXP in_Delta, + SEXP in_lambda, SEXP in_maxit, SEXP in_tol +) { int i, j, k, g; int p = nrows(in_Delta); @@ -16,10 +24,10 @@ extern SEXP FastPOI_C_sub(SEXP in_A, SEXP in_B, SEXP in_Delta, SEXP in_lambda, S double* Zold = (double*)R_alloc(p * d, sizeof(double)); double* Delta = REAL(in_Delta); double* a = (double*)R_alloc(d, sizeof(double)); - double* A = REAL(in_A); double* B = REAL(in_B); double a_norm; double lambda = asReal(in_lambda); + double tol = asReal(in_tol); double scale; double res; @@ -55,7 +63,6 @@ extern SEXP FastPOI_C_sub(SEXP in_A, SEXP in_B, SEXP in_Delta, SEXP in_lambda, S for (j = 0; j < d; ++j) { Z[j * p + g] = scale * a[j]; } - } // Copy Z to Zold and check break condition. @@ -64,7 +71,7 @@ extern SEXP FastPOI_C_sub(SEXP in_A, SEXP in_B, SEXP in_Delta, SEXP in_lambda, S res += (Z[j] - Zold[j]) * (Z[j] - Zold[j]); Zold[j] = Z[j]; } - if (res < 1e-6) { + if (res < tol) { break; }