library(ISLR) # for Hitters dataset library(MAVE) library(CVE) # Set global parameters. seed <- 21 max.dim <- 5L attempts <- 25L max.iter <- 100L # momentum <- 0.0 # Prepair data for analysis. cols <- c("Salary", "AtBat", "Hits", "HmRun", "Runs", "RBI", "Walks", "Years", "CAtBat","CHits", "CHmRun","CRuns", "CRBI", "CWalks", "PutOuts", "Assists", "Errors") outliers <- c(92, # Gary Pettis 120, # John Moses 173, # Milt Thompson 189, # Rick Burleson 220, # Scott Fletcher 230, # Tom Foley 241) # Terry Puhl # Subselect as a matrix without outliers as well as reordered # and filtered columns. ds <- na.omit(Hitters[, cols])[-outliers, ] ds$Salary <- log(ds$Salary) ds <- scale(ds, center = TRUE, scale = TRUE) # Split into data and responce. X <- as.matrix(ds[, colnames(ds) != "Salary"]) Y <- as.matrix(ds[, "Salary"]) pdf("results/hitters.pdf", width = 15, height = 5) layout(matrix(c(1, 2), nrow = 1)) cat("Seed: ", seed, "\n\n") ################################################################################ ### meanMAVE ### ################################################################################ set.seed(seed) dr <- mave(Y ~ X, method = "meanMAVE", max.dim = max.dim) dim <- which.min(mave.dim(dr)$cv) B <- coef(dr, 2L) # train linear model on reduced data data <- as.data.frame(cbind(Y, X %*% B)) colnames(data) <- c("Y", "dir1", "dir2") model <- lm(Y ~ dir1 + I(dir1^2) + dir2, data = data) # Print and log. cat("### meanMAVE\n\nest.dim: ", dim, '\n\n') summary(model) plot(x = data[, "dir1"], y = data[, "Y"], main = "meanMave", xlab = "dir1", ylab = "Y") plot(x = data[, "dir2"], y = data[, "Y"], main = "meanMave", xlab = "dir2", ylab = "Y") ################################################################################ ### CVE ### ################################################################################ set.seed(seed) dr <- cve(Y ~ X, max.dim = max.dim, max.iter = max.iter, attempts = attempts) dim <- predict_dim(dr, method = "cv")$k B <- coef(dr, 2L) # Determine 1st direction (cause CVE does not care for column order). if (which.max(abs(t(coef(dr, 1L)) %*% B)) == 2) { B <- B[, c(2, 1)] # switch directions } # train linear model on reduced data data <- as.data.frame(cbind(Y, X %*% B)) colnames(data) <- c("Y", "dir1", "dir2") model <- lm(Y ~ dir1 + I(dir1^2) + dir2, data = data) # Print and log. cat("### CVE\n\nest.dim: ", dim, '\n\n') summary(model) plot(x = data[, "dir1"], y = data[, "Y"], main = "CVE", xlab = "dir1", ylab = "Y") plot(x = data[, "dir2"], y = data[, "Y"], main = "CVE", xlab = "dir2", ylab = "Y") dev.off()