tensor_predictors/simulations/simulation_poi.R

################################################################################
###                          Sparce SIR against SIR                          ###
################################################################################
devtools::load_all('tensorPredictors/')
library(dr)

n <- 100
p <- 10

X <- rmvnorm(n, sigma = 0.5^abs(outer(1:p, 1:p, `-`)))
y <- rowSums(X[, 1:3]) + rnorm(n, 0.5)
B <- as.matrix(1:p <= 3) / sqrt(3)

dr.sir <- dr(y ~ X, method = 'sir', numdir = ncol(B))
B.sir <- dr.sir$evectors[, seq_len(ncol(B)), drop = FALSE]

dist.projection(B, B.sir)

B.poi <- with(GEP(X, y, 'sir'), {
    POI(lhs, rhs, ncol(B), method = 'POI-C', use.C = TRUE)$vectors
})

dist.projection(B, B.poi)

################################################################################
###                 LDA (sparse Linear Discrimina Analysis)                  ###
################################################################################
devtools::load_all('tensorPredictors/')

C <- function(rho, p) {
    res <- matrix(rho, p, p)
    diag(res) <- 1
    res
}
R <- function(rho, p) {
    rho^abs(outer(1:p, 1:p, `-`))
}

dataset <- function(nr) {
    K <- 3          # Nr. Groups
    n.i <- 30       # Sample group size for each of the K groups
    n <- K * n.i    # Sample size
    p <- 200        # Nr. of predictors

    # Generate test data
    V <- cbind(matrix(c(
        2, 1, 2, 1, 2,
        1,-1, 1,-1, 1,
        0, 1,-1, 1, 0
    ), 3, 5, byrow = TRUE),
        matrix(0, 3, p - 5)
    )
    W <- cbind(matrix(c(
    -1, 1, 1, 1, 1,
        1,-1, 1,-1, 1,
        1, 1,-1, 1, 0
    ), 3, 5, byrow = TRUE),
        matrix(0, 3, p - 5)
    )

    if (nr == 1) {          # Model 1
        y <- factor(rep(1:K, each = n.i))
        X <- rmvnorm(n, mu = rep(0, p)) + V[y, ]
        B <- cbind(V[1, ] - V[2, ], V[2, ] - V[3, ])
    } else if (nr == 2) {   # Model 2
        y <- factor(rep(1:K, each = n.i))
        X <- rmvnorm(n, sigma = C(0.5, p)) + (V %*% C(0.5, p))[y, ]
        B <- cbind(V[1, ] - V[2, ], V[2, ] - V[3, ])
    } else if (nr == 3) {   # Model 3
        y <- factor(rep(1:K, each = n.i))
        X <- rmvnorm(n, sigma = R(0.5, p)) + (V %*% R(0.5, p))[y, ]
        B <- cbind(V[1, ] - V[2, ], V[2, ] - V[3, ])
    } else if (nr == 4) {   # Model 4
        y <- factor(rep(1:K, each = n.i))
        X <- rmvnorm(n, sigma = C(0.5, p)) + (W %*% C(0.5, p))[y, ]
        B <- cbind(W[1, ] - W[2, ], W[2, ] - W[3, ])
    } else if (nr == 5) {   # Model 5
        K <- 4
        n <- K * n.i

        W.tilde <- 2 * rbind(W, colMeans(W))
        mu.tilde <- W.tilde %*% C(0.5, p)

        y <- factor(rep(1:K, each = n.i))
        X <- rmvnorm(n, sigma = C(0.5, p)) + mu.tilde[y, ]

        B <- cbind(W[1, ] - W[2, ], W[2, ] - W[3, ])
    } else {
        stop("Unknown model nr.")
    }

    list(X = X, y = y, B = qr.Q(qr(B)))
}

# Model 1
fit <- with(dataset(1), {
    with(GEP(X, y, 'lda'), POI(lhs, rhs, ncol(B), method = 'POI-C'))
})
fit <- with(dataset(1), {
    with(GEP(X, y, 'lda'), POI(lhs, rhs, ncol(B), method = 'FastPOI-C'))
})
fit <- with(dataset(1), {
    with(GEP(X, y, 'lda'), POI(lhs, rhs, ncol(B), method = 'POI-C', use.C = TRUE))
})
fit <- with(dataset(1), {
    with(GEP(X, y, 'lda'), POI(lhs, rhs, ncol(B), method = 'FastPOI-C', use.C = TRUE))
})

# head(fit$vectors, 10)

count <- 0
nr.reps <- 100
sim <- replicate(nr.reps, {
    res <- double(0)
    for (model.nr in 1:5) {
        with(dataset(model.nr), {
            for (method in c('POI-C', 'FastPOI-C')) {
                for (use.C in c(FALSE, TRUE)) {
                    fit <- with(GEP(X, y, 'lda'), {
                        POI(lhs, rhs, ncol(B), method = 'POI-C', use.C = use.C)
                    })
                    dist <- dist.projection(B, fit$vectors)
                    names(dist) <- paste('M', model.nr, '-', method, '-', use.C)
                    res <<- c(res, dist)
                }
            }
            fit <- with(GEP(X, y, 'lda'), {
                solve.gep(lhs, rhs, ncol(B))
            })
            dist <- dist.projection(B, fit$vectors)
            names(dist) <- paste('M', model.nr, '- solve -', use.C)
            res <<- c(res, dist)
        })
    }
    cat("Counter", (count <<- count + 1), "/", nr.reps, "\n")
    res
})

(stats <- as.matrix(rowMeans(sim)))