# List of all sensor names/positions (in chanel order) to ensure all entries # are places at the same sensor x time position in the matrix layout sensors <- c( "FP1", "FP2", "F7", "F8", "AF1", "AF2", "FZ", "F4", "F3", "FC6", "FC5", "FC2", "FC1", "T8", "T7", "CZ", "C3", "C4", "CP5", "CP6", "CP1", "CP2", "P3", "P4", "PZ", "P8", "P7", "PO2", "PO1", "O2", "O1", "X", "AF7", "AF8", "F5", "F6", "FT7", "FT8", "FPZ", "FC4", "FC3", "C6", "C5", "F2", "F1", "TP8", "TP7", "AFZ", "CP3", "CP4", "P5", "P6", "C1", "C2", "PO7", "PO8", "FCZ", "POZ", "OZ", "P2", "P1", "CPZ", "nd", "Y" ) tmpdir <- tempdir() untar("eeg_full.tar", exdir = tmpdir) # uncompress subjects <- untar("eeg_full.tar", list = TRUE) # file names (only read) subjects <- `names<-`(vector("list", length(subjects)), substr(subjects, 1, 11)) for (i in seq_along(subjects)) { subject <- names(subjects)[i] # Decompressed folder of trials for current subject untar(file.path(tmpdir, sprintf("%s.tar.gz", subject)), exdir = tmpdir) # Iterate all trial files of current subject X <- lapply(list.files(file.path(tmpdir, subject), full.names = TRUE), function(trial) { # Read leading meta data lines and data from gz compressed CSV file conn <- gzfile(trial) meta <- readLines(conn, 4) # If there are less than 4 entries in meta, the file does NOT contain # any data -> error, ignore trial if (length(meta) < 4) { return(NULL) } # Read data (measurements) data <- read.csv(conn, header = FALSE, sep = " ", comment.char = "#", col.names = c("trial", "sensor", "time", "volts"), colClasses = c("integer", "character", "integer", "numeric") ) # Compute index of every entry (ensures same # placement of measurements in its matrix representation) data$sensor <- factor(data$sensor, levels = sensors) idx <- (as.integer(data$sensor) - 1L) * 256L + data$time + 1L # Check if every sensor at every time point is present to ensure # no interleaving or shifted data in final 3D array if ((length(idx) != 64 * 256) || any(sort(idx) != seq_along(idx))) { return(NULL) } # Return measurements in standardized order and attach meta info structure(data$volts[idx], meta = meta[4]) } ) # Count nr. of errors (return NULL) and remove from data file_error_idx <- which(sapply(X, is.null)) if (length(file_error_idx)) { X[file_error_idx] <- NULL } # Extract meta information meta <- sapply(X, attr, "meta") # Check for error notification in meta data and drop them as well notice_error_idx <- grep(".*err.*", meta) if (length(notice_error_idx)) { X[notice_error_idx] <- NULL meta <- meta[-notice_error_idx] } # Split into trial condition and trial nr. condition <- factor( sub(".*(S1 obj|S2 match|S2 nomatch).*", meta, replacement = "\\1"), levels = c("S1 obj", "S2 match", "S2 nomatch") ) # Concatinate individual trials in a 3D array (based on standardized order) X <- matrix(unlist(X), 256 * 64) # Track for reporting the nr. of non-finite values nr_non_finite <- sum(!is.finite(X)) # Compute mean over trials grouped by trial condition X <- c( rowMeans(X[, condition == "S1 obj", drop = FALSE], na.rm = TRUE), rowMeans(X[, condition == "S2 match", drop = FALSE], na.rm = TRUE), rowMeans(X[, condition == "S2 nomatch", drop = FALSE], na.rm = TRUE) ) # store mean of trials of current subject in list of subjects subjects[[subject]] <- X # Remove/Delete subject file and decompressed folder unlink(file.path(tmpdir, subject), recursive = TRUE) unlink(file.path(tmpdir, sprintf("%s.tar.gz", subject))) # Report progress cat(sprintf( "%5d/%d - Nr. trials: %3d = %3d + %3d + %3d%s%s\n", i, length(subjects), length(condition), sum(condition == "S1 obj"), sum(condition == "S2 match"), sum(condition == "S2 nomatch"), if (nr_non_finite) sprintf(", Nr. non-finite: %d", nr_non_finite) else "", if (length(file_error_idx) && length(notice_error_idx)) { sprintf(", %d file and %d notice errors (%s) -> trials dropped", length(file_error_idx), length(notice_error_idx), subject ) } else if (length(file_error_idx)) { sprintf(", %d file errors (%s) -> trials dropped", length(file_error_idx), subject ) } else if (length(notice_error_idx)) { sprintf(", %d notice errors (%s) -> trials dropped", length(notice_error_idx), subject ) } else { "" } )) } # Combine subjects in single 4D tensor (time x sensor x condition x subject) X <- array( unlist(subjects), dim = c(time = 256, sensor = 64, condition = 3, subject = length(subjects)), dimnames = list( time = 1:256, sensor = sensors, condition = c("S1 obj", "S2 match", "S2 nomatch"), subject = names(subjects) ) ) # Extract alcoholic or control labels for each subject y <- factor( substr(names(subjects), 4, 4), levels = c("c", "a"), labels = c("control", "alcoholic") ) names(y) <- names(subjects) # Save full processed EEG dataset as R data file saveRDS(list(X = X, y = y), file = "eeg_data_3d.rds") saveRDS(list(X = X[, , "S1 obj", ], y = y), file = "eeg_data_2d.rds")