#' Conditional Variance Estimator (CVE) Package.
#'
#' Conditional Variance Estimation (CVE) is a novel sufficient dimension
#' reduction (SDR) method for regressions satisfying \eqn{E(Y|X) = E(Y|B'X)},
#' where \eqn{B'X} is a lower dimensional projection of the predictors. CVE,
#' similarly to its main competitor, the mean average variance estimation
#' (MAVE), is not based on inverse regression, and does not require the
#' restrictive linearity and constant variance conditions of moment based SDR
#' methods. CVE is data-driven and applies to additive error regressions with
#' continuous predictors and link function. The effectiveness and accuracy of 
#' CVE compared to MAVE and other SDR techniques is demonstrated in simulation
#' studies. CVE is shown to outperform MAVE in some model set-ups, while it
#' remains largely on par under most others.
#'
#' Let \eqn{Y} be real denotes a univariate response and \eqn{X} a real
#' \eqn{p}-dimensional covariate vector. We assume that the dependence of
#' \eqn{Y} and \eqn{X} is modelled by
#' \deqn{Y = g(B'X) + \epsilon}
#' where \eqn{X} is independent of \eqn{\epsilon} with positive definite
#' variance-covariance matrix \eqn{Var(X) = \Sigma_X}. \eqn{\epsilon} is a mean
#' zero random variable with finite \eqn{Var(\epsilon) = E(\epsilon^2)}, \eqn{g}
#' is an unknown, continuous non-constant function,
#' and \eqn{B = (b_1, ..., b_k)} is
#' a real \eqn{p \times k}{p x k} of rank \eqn{k <= p}{k \leq p}. 
#' Without loss of generality \eqn{B} is assumed to be orthonormal.
#'
#' @author Daniel Kapla, Lukas Fertl, Bura Efstathia
#' @references Fertl Lukas, Bura Efstathia. Conditional Variance Estimation for
#' Sufficient Dimension Reduction, 2019
#'
#' @importFrom stats model.frame
#' @docType package
#' @useDynLib CVE, .registration = TRUE
"_PACKAGE"

#' Conditional Variance Estimator (CVE).
#'
#' TODO: reuse of package description and details!!!!
#'
#' @param formula an object of class \code{"formula"} which is a symbolic
#' description of the model to be fitted.
#' @param data an optional data frame, containing the data for the formula if
#' supplied.
#' @param method specifies the CVE method variation as one of
#' \itemize{
#'      \item "simple" exact implementation as described in the paper listed 
#'          below.
#'      \item "weighted" variation with addaptive weighting of slices.
#' }
#' @param ... Parameters passed on to \code{cve.call}.
#'
#' @return dr is a S3 object of class \code{cve} with named properties:
#' \itemize{
#'    \item X: Original training data,
#'    \item Y: Responce of original training data,
#'    \item method: Name of used method,
#'    \item call: The method call
#' }
#' as well as indexed entries \code{dr[[k]]} storing the k-dimensional SDR
#' projection matrices.
#'
#' @examples
#' library(CVE)
#'
#' # create dataset
#' n <- 200
#' p <- 12
#' X <- matrix(rnorm(n * p), n, p)
#' B <- cbind(c(1, rep(0, p - 1)), c(0, 1, rep(0, p - 2)))
#' Y <- X %*% B
#' Y <- Y[, 1L]^2 + Y[, 2L]^2 + rnorm(n, 0, 0.3)
#'
#' # Call the CVE method.
#' dr <- cve(Y ~ X)
#' (B <- basis(dr, 2))
#'
#' @seealso For a detailed description of \code{formula} see
#'      [\code{\link{formula}}].
#' @export
cve <- function(formula, data, method = "simple", max.dim = 10L, ...) {
    # check for type of `data` if supplied and set default
    if (missing(data)) {
        data <- environment(formula)
    } else if (!is.data.frame(data)) {
        stop("Parameter 'data' must be a 'data.frame' or missing.")
    }

    # extract `X`, `Y` from `formula` with `data`
    model <- stats::model.frame(formula, data)
    X <- as.matrix(model[ ,-1L, drop = FALSE])
    Y <- as.double(model[ , 1L])

    # pass extracted data on to [cve.call()]
    dr <- cve.call(X, Y, method = method, max.dim = max.dim, ...)

    # overwrite `call` property from [cve.call()]
    dr$call <- match.call()
    return(dr)
}

#' @param nObs parameter for choosing bandwidth \code{h} using
#'   \code{\link{estimate.bandwidth}} (ignored if \code{h} is supplied).
#' @param X data matrix with samples in its rows.
#' @param Y Responses (1 dimensional).
#' @param k Dimension of lower dimensional projection, if \code{k} is given
#'      only the specified dimension \code{B} matrix is estimated.
#' @param min.dim lower bounds for \code{k}, (ignored if \code{k} is supplied).
#' @param max.dim upper bounds for \code{k}, (ignored if \code{k} is supplied).
#' @param tau Initial step-size.
#' @param tol Tolerance for break condition.
#' @param epochs maximum number of optimization steps.
#' @param attempts number of arbitrary different starting points.
#' @param logger a logger function (only for advanced user, significantly slows
#'      down the computation).
#'
#' @return dr is a list which contains:
#' \itemize{
#' \item dir: dir[[d]] is the central space with d-dimension
#'         d = 1, 2, ..., p reduced direction of different dimensions
#' \item y: the value of response
#' \item idx: the index of variables which survives after screening
#' \item max.dim: the largest dimensions of CS or CMS which have been calculated in mave function
#' \item ky: parameter used for DIM for selection
#' \item x: the original training data
#' }
#'
#' @rdname cve
#' @export
cve.call <- function(X, Y, method = "simple",
                     nObs = sqrt(nrow(X)), h = NULL,
                     min.dim = 1L, max.dim = 10L, k = NULL,
                     momentum = 0.0, tau = 1.0, tol = 1e-3,
                     slack = 0.0, gamma = 0.5,
                     V.init = NULL,
                     epochs = 50L, attempts = 10L,
                     logger = NULL) {
    # get method bitmask
    methods <- list(
        "simple" = 0L,
        "weighted" = 1L
    )
    method <- tolower(method)
    if (!(method %in% names(methods))) {
        stop('Got unknown method.')
    }
    method_bitmask <- methods[[method]]

    # parameter checking
    if (!is.numeric(momentum) || length(momentum) > 1L) {
        stop("Momentum must be a number.")
    }
    if (!is.double(momentum)) {
        momentum <- as.double(momentum)
    }
    if (momentum < 0.0 || momentum >= 1.0) {
        stop("Momentum must be in [0, 1).")
    }

    if (!(is.matrix(X) && is.numeric(X))) {
        stop("Parameter 'X' should be a numeric matrices.")
    }
    if (!is.numeric(Y)) {
        stop("Parameter 'Y' must be numeric.")
    }
    if (is.matrix(Y) || !is.double(Y)) {
        Y <- as.double(Y)
    }
    if (nrow(X) != length(Y)) {
        stop("Rows of 'X' and 'Y' elements are not compatible.")
    }
    if (ncol(X) < 2) {
        stop("'X' is one dimensional, no need for dimension reduction.")
    }

    if (!is.null(V.init)) {
        if (!is.matrix(V.init)) {
            stop("'V.init' must be a matrix.")
        }
        if (!all.equal(crossprod(V.init), diag(1, ncol(V.init)))) {
            stop("'V.init' must be semi-orthogonal.")
        }
        if (ncol(X) != nrow(V.init) || ncol(X) <= ncol(V.init)) {
            stop("Dimension missmatch of 'V.init' and 'X'")
        }
        min.dim <- max.dim <- ncol(X) - ncol(V.init)
        attempts <- 0L
    } else if (missing(k) || is.null(k)) {
        min.dim <- as.integer(min.dim)
        max.dim <- as.integer(min(max.dim, ncol(X) - 1L))
    } else {
        min.dim <- max.dim <- as.integer(k)
    }
    if (min.dim > max.dim) {
        stop("'min.dim' bigger 'max.dim'.")
    }
    if (max.dim >= ncol(X)) {
        stop("'max.dim' (or 'k') must be smaller than 'ncol(X)'.")
    }

    if (missing(h) || is.null(h)) {
        estimate <- TRUE
    } else if (is.function(h)) {
        estimate <- TRUE
        estimate.bandwidth <- h
    } else if (is.numeric(h) && h > 0.0) {
        estimate <- FALSE
        h <- as.double(h)
    } else {
        stop("Bandwidth 'h' must be positive numeric.")
    }

    if (!is.numeric(tau) || length(tau) > 1L || tau <= 0.0) {
        stop("Initial step-width 'tau' must be positive number.")
    } else {
        tau <- as.double(tau)
    }
    if (!is.numeric(tol) || length(tol) > 1L || tol < 0.0) {
        stop("Break condition tolerance 'tol' must be not negative number.")
    } else {
        tol <- as.double(tol)
    }
    if (!is.numeric(slack) || length(slack) > 1L || slack < 0.0) {
        stop("Break condition slack 'slack' must be not negative number.")
    } else {
        slack <- as.double(slack)
    }
    if (!is.numeric(gamma) || length(gamma) > 1L || gamma <= 0.0 || gamma >= 1.0) {
        stop("Stepsize reduction 'gamma' must be between 0 and 1.")
    } else {
        gamma <- as.double(gamma)
    }

    if (!is.numeric(epochs) || length(epochs) > 1L) {
        stop("Parameter 'epochs' must be positive integer.")
    } else if (!is.integer(epochs)) {
        epochs <- as.integer(epochs)
    }
    if (epochs < 1L) {
        stop("Parameter 'epochs' must be at least 1L.")
    }

    if (is.null(V.init)) {
        if (!is.numeric(attempts) || length(attempts) > 1L) {
            stop("Parameter 'attempts' must be positive integer.")
        } else if (!is.integer(attempts)) {
            attempts <- as.integer(attempts)
        }
        if (attempts < 1L) {
            stop("Parameter 'attempts' must be at least 1L.")
        }
    }

    if (is.function(logger)) {
        loggerEnv <- environment(logger)
    } else {
        loggerEnv <- NULL
    }

    # Call specified method.
    method <- tolower(method)
    call <- match.call()
    dr <- list()
    dr$res <- list()
    for (k in min.dim:max.dim) {

        if (estimate) {
            h <- estimate.bandwidth(X, k, nObs)
        }

        dr.k <- .Call('cve', PACKAGE = 'CVE',
                      X, Y, k, h,
                      method_bitmask,
                      V.init,
                      momentum, tau, tol,
                      slack, gamma,
                      epochs, attempts,
                      logger, loggerEnv)

        dr.k$B <- null(dr.k$V)
        dr.k$loss <- mean(dr.k$L)
        dr.k$h <- h
        dr.k$k <- k
        class(dr.k) <- "cve.k"
        dr$res[[as.character(k)]] <- dr.k
    }

    # augment result information
    dr$X <- X
    dr$Y <- Y
    dr$method <- method
    dr$call <- call
    class(dr) <- "cve"
    return(dr)
}

#' Loss distribution elbow plot.
#'
#' Boxplots of the loss from \code{min.dim} to \code{max.dim} \code{k} values.
#'
#' @param x Object of class \code{"cve"} (result of [\code{\link{cve}}]).
#' @param ... Pass through parameters to [\code{\link{plot}}] and
#'      [\code{\link{lines}}]
#'
#' @seealso see \code{\link{par}} for graphical parameters to pass through
#'      as well as \code{\link{plot}}, the standard plot utility.
#' @method plot cve
#' @importFrom graphics plot lines points
#' @export
plot.cve <- function(x, ...) {
    L <- c()
    k <- c()
    for (dr.k in x$res) {
        if (class(dr.k) == 'cve.k') {
            k <- c(k, as.character(dr.k$k))
            L <- c(L, dr.k$L)
        }
    }
    L <- matrix(L, ncol = length(k)) / var(x$Y)
    boxplot(L, main = "elbow plot",
            xlab = "SDR dimension",
            ylab = "Sample loss distribution",
            names = k)
}

#' Prints a summary of a \code{cve} result.
#' @param object Instance of 'cve' as returned by \code{cve}.
#' @method summary cve
#' @export
summary.cve <- function(object, ...) {
    cat('Summary of CVE result - Method: "', object$method, '"\n',
        '\n',
        'Dataset size:   ', nrow(object$X), '\n',
        'Data Dimension: ', ncol(object$X), '\n',
        # 'SDR Dimension:  ', object$k, '\n',
        # 'loss:           ', object$loss, '\n',
        '\n',
        'Called via:\n',
        '    ',
        sep='')
    print(object$call)

    L <- c()
    k <- c()
    for (dr.k in object$res) {
        if (class(dr.k) == 'cve.k') {
            k <- c(k, as.character(dr.k$k))
            L <- c(L, dr.k$L)
        }
    }
    L <- matrix(L, ncol = length(k))
    S <- apply(L, 2, summary)
    colnames(S) <- k
    cat('\n')
    print(S)
}

#' @export
directions <- function(dr, k) {
    UseMethod("directions")
}

#' Computes projected training data \code{X} for given dimension `k`.
#'
#' @param dr Instance of 'cve' as returned by \code{cve}.
#' @param k SDR dimension to use for projection.
#'
#' @method directions cve
#' @aliases directions directions.cve
#' @export
directions.cve <- function(dr, k) {
    if (!(k %in% names(dr$res))) {
        stop("SDR directions for requested dimension `k` not computed.")
    }
    return(dr$X %*% dr$res[[as.character(k)]]$B)
}

#' @export
basis <- function(dr, k) {
    UseMethod("basis")
}

#' Gets estimated SDR basis.
#'
#' @param dr Instance of 'cve' as returned by \code{cve}.
#' @param k SDR dimension of requested basis, if not given a list of all
#'  computed basis is returned.
#'
#' @return List of basis matrices, or the SDR basis for supplied dimension `k`.
#'
#' @method basis cve
#' @aliases basis basis.cve
#' @export
basis.cve <- function(dr, k) {
    if (missing(k)) {
        Bs <- list()
        for (k in names(dr$res)) {
            Bs[[k]] <- dr$res[[k]]$B
        }
        return(Bs)
    } else if (k %in% names(dr$res)) {
        return(dr$res[[as.character(k)]]$B)
    } else {
        stop("Requested dimenion `k` not computed.")
    }
}


#' Predict method for CVE Fits.
#'
#' Predict responces using reduced data with \code{\link{mars}}.
#'
#' @param object instance of class \code{cve} (result of \code{cve},
#'  \code{cve.call}).
#' @param X.new Matrix of the new data to be predicted.
#' @param dim dimension of SDR space to be used for data projecition.
#' @param ... further arguments passed to \code{\link{mars}}.
#'
#' @return prediced response of data \code{X.new}.
#'
#' @seealso \code{\link{cve}}, \code{\link{cve.call}} or \pkg{\link{mars}}.
#'
#' @examples
#'  TODO:
#'
#' @aliases predict.cve
#' @rdname predict.cve
#'
#' @method predict cve
#' @export
predict.cve <- function(object, X.new, dim = NULL, ...) {
    library(mda)

    if (!is.matrix(X.new)) {
        X.new <- matrix(X.new, nrow = 1L)
    }

    B <- dr$res[[as.character(dim)]]$B

    model <- mars(object$X %*% B, object$Y)
    predict(model, X.new %*% B)
}

#' @export
predict.dim <- function(dr) {
    UseMethod("predict.dim")
}

#' @method predict.dim cve
#' @export
predict.dim.cve <- function(dr) {
    library(mda)

    # Get centered training data and dimensions
    X <- scale(dr$X, center = TRUE, scale = FALSE)
    n <- nrow(dr$X) # umber of training data samples
    Sigma <- (1 / n) * crossprod(X, X)
    eig <- eigen(Sigma)
    Sigma_root <- eig$vectors %*% tcrossprod(diag(sqrt(eig$values)), eig$vectors)
    X <- X %*% solve(Sigma_root)

    pred <- matrix(0, n, length(dr$res))
    colnames(pred) <- names(dr$res)
    for (dr.k in dr$res) {
        # get "name" of current dimension
        k <- as.character(dr.k$k)
        # Project dataset with current SDR basis
        X.proj <- X %*% dr.k$B

        for (i in 1:n) {
            model <- mars(X.proj[-i, ], dr$Y[-i])
            pred[i, k] <- predict(model, X.proj[i, , drop = F])
        }

    }
    MSE <- colMeans((pred - dr$Y)^2)

    return(list(
        MSE = MSE,
        k = as.integer(names(which.min(MSE)))
    ))
}