NNSDR/real_data/kc_house_data.R

87 lines
3.2 KiB
R

#!/usr/bin/env Rscript
## data source: the `MAVE` R-package
library(MAVE)
library(CVarE)
Sys.setenv(TF_CPP_MIN_LOG_LEVEL = "3") # Suppress `tensorflow` notes/warnings
suppressPackageStartupMessages({
library(NNSDR)
})
## Configuration
d <- 1L # reduction dimension
epochs = c(50L, 100L) # training epochs
dropped <- c('id', 'date', 'zipcode') #, 'sqft_basement') # columns to be dropped
## Loading the "House Price in King Counte, USA" data set provided by MAVE
data('kc_house_data')
ds <- kc_house_data[, !(names(kc_house_data) %in% dropped)]
## Build Dimension Reduction Neuronal Network model (matching the data)
nn <- nnsdr$new(
input_shapes = list(x = ncol(ds) - 1L),
d = d, # Reduction dimension
hidden_units = 512L,
activation = 'relu'
)
## Open simulation log file, write simulation configuration and header
log <- file(format(Sys.time(), "results/kc_house_data.csv"), "w", blocking = FALSE)
cat('# d = ', d, '\n# epochs = ', epochs[1], ',', epochs[2], '\n',
'# dropped = ', paste(dropped, collapse = ', '), '\n',
'method,fold,mse,var(Y.test),time.user,time.system,time.elapsed\n',
sep = '', file = log, append = TRUE)
## K-Fold Cross Validation
K <- 10
for (i in 1:K) {
ds.train <- ds[(1:K) != i, ]
ds.test <- ds[(1:K) == i, , drop = FALSE]
X.train <- as.matrix(ds.train[, names(ds) != 'price'])
Y.train <- as.matrix(ds.train[, 'price'])
X.test <- as.matrix(ds.test[, names(ds) != 'price'])
Y.test <- as.matrix(ds.test[, 'price'])
## Fit `DR` Neuronal Network model
time <- system.time(nn$fit(X.train, Y.train, epochs = epochs, initializer = 'fromOPG'))
mse <- mean((nn$predict(X.test) - Y.test)^2)
cat('"nn.ref",', i, ',', mse, ',', c(var(Y.test)), ',',
time['user.self'], ',', time['sys.self'], ',', time['elapsed'], '\n',
sep = '', file = log, append = TRUE)
## `MAVE`
time <- system.time(dr <- mave.compute(X.train, Y.train, method = 'meanMAVE', max.dim = d))
# Sometimes the `mda` package fails -> predict with NA/NaN/Inf value error.
mse <- tryCatch(mean((predict(dr, X.test, d) - Y.test)^2),
error = function(err) NA)
cat('"mave",', i, ',', mse, ',', c(var(Y.test)), ',',
time['user.self'], ',', time['sys.self'], ',', time['elapsed'], '\n',
sep = '', file = log, append = TRUE)
# Current implementation requires too much memory (CVarE v1.1). Run on `VSC`.
# ## and CVE
# X.scaled <- scale(X.train)
# time <- system.time(dr <- cve.call(X.scaled, Y.train, k = d))
# # Might have the same problem as MAVE since we use `mda` as well.
# mse <- tryCatch({
# Y.pred <- predict(dr, scale(X.test,
# scale = attr(X.scaled, 'scaled:scale'),
# center = attr(X.scaled, 'scaled:center')),
# k = d)
# mean((Y.pred - Y.test)^2)
# },
# error = function(err) NA)
# cat('"cve",', i, ',', mse, ',', c(var(Y.test)), ',',
# time['user.self'], ',', time['sys.self'], ',', time['elapsed'], '\n',
# sep = '', file = log, append = TRUE)
## Reset model
nn$reset()
}
## Finished, close simulation log file
close(log)