149 lines
		
	
	
		
			5.6 KiB
		
	
	
	
		
			R
		
	
	
	
	
	
			
		
		
	
	
			149 lines
		
	
	
		
			5.6 KiB
		
	
	
	
		
			R
		
	
	
	
	
	
# List of all sensor names/positions (in chanel order) to ensure all entries
 | 
						|
# are places at the same sensor x time position in the matrix layout
 | 
						|
sensors <- c(
 | 
						|
    "FP1", "FP2", "F7",  "F8",  "AF1", "AF2", "FZ",  "F4",  "F3",  "FC6", "FC5",
 | 
						|
    "FC2", "FC1", "T8",  "T7",  "CZ",  "C3",  "C4",  "CP5", "CP6", "CP1", "CP2",
 | 
						|
    "P3",  "P4",  "PZ",  "P8",  "P7",  "PO2", "PO1", "O2",  "O1",  "X",   "AF7",
 | 
						|
    "AF8", "F5",  "F6",  "FT7", "FT8", "FPZ", "FC4", "FC3", "C6",  "C5",  "F2",
 | 
						|
    "F1",  "TP8", "TP7", "AFZ", "CP3", "CP4", "P5",  "P6",  "C1",  "C2",  "PO7",
 | 
						|
    "PO8", "FCZ", "POZ", "OZ",  "P2",  "P1",  "CPZ", "nd",  "Y"
 | 
						|
)
 | 
						|
 | 
						|
tmpdir <- tempdir()
 | 
						|
 | 
						|
untar("eeg_full.tar", exdir = tmpdir)           # uncompress
 | 
						|
subjects <- untar("eeg_full.tar", list = TRUE)  # file names (only read)
 | 
						|
subjects <- `names<-`(vector("list", length(subjects)), substr(subjects, 1, 11))
 | 
						|
 | 
						|
for (i in seq_along(subjects)) {
 | 
						|
    subject <- names(subjects)[i]
 | 
						|
 | 
						|
    # Decompressed folder of trials for current subject
 | 
						|
    untar(file.path(tmpdir, sprintf("%s.tar.gz", subject)), exdir = tmpdir)
 | 
						|
 | 
						|
    # Iterate all trial files of current subject
 | 
						|
    X <- lapply(list.files(file.path(tmpdir, subject), full.names = TRUE),
 | 
						|
        function(trial) {
 | 
						|
            # Read leading meta data lines and data from gz compressed CSV file
 | 
						|
            conn <- gzfile(trial)
 | 
						|
            meta <- readLines(conn, 4)
 | 
						|
            # If there are less than 4 entries in meta, the file does NOT contain
 | 
						|
            # any data -> error, ignore trial
 | 
						|
            if (length(meta) < 4) {
 | 
						|
                return(NULL)
 | 
						|
            }
 | 
						|
            # Read data (measurements)
 | 
						|
            data <- read.csv(conn,
 | 
						|
                header = FALSE, sep = " ", comment.char = "#",
 | 
						|
                col.names = c("trial", "sensor", "time", "volts"),
 | 
						|
                colClasses = c("integer", "character", "integer", "numeric")
 | 
						|
            )
 | 
						|
 | 
						|
            # Compute index of every entry (ensures same
 | 
						|
            # placement of measurements in its matrix representation)
 | 
						|
            data$sensor <- factor(data$sensor, levels = sensors)
 | 
						|
            idx <- (as.integer(data$sensor) - 1L) * 256L + data$time + 1L
 | 
						|
            # Check if every sensor at every time point is present to ensure
 | 
						|
            # no interleaving or shifted data in final 3D array
 | 
						|
            if ((length(idx) != 64 * 256) || any(sort(idx) != seq_along(idx))) {
 | 
						|
                return(NULL)
 | 
						|
            }
 | 
						|
 | 
						|
            # Return measurements in standardized order and attach meta info
 | 
						|
            structure(data$volts[idx], meta = meta[4])
 | 
						|
        }
 | 
						|
    )
 | 
						|
 | 
						|
    # Count nr. of errors (return NULL) and remove from data
 | 
						|
    file_error_idx <- which(sapply(X, is.null))
 | 
						|
    if (length(file_error_idx)) {
 | 
						|
        X[file_error_idx] <- NULL
 | 
						|
    }
 | 
						|
 | 
						|
    # Extract meta information
 | 
						|
    meta <- sapply(X, attr, "meta")
 | 
						|
 | 
						|
    # Check for error notification in meta data and drop them as well
 | 
						|
    notice_error_idx <- grep(".*err.*", meta)
 | 
						|
    if (length(notice_error_idx)) {
 | 
						|
        X[notice_error_idx] <- NULL
 | 
						|
        meta <- meta[-notice_error_idx]
 | 
						|
    }
 | 
						|
 | 
						|
    # Split into trial condition and trial nr.
 | 
						|
    condition <- factor(
 | 
						|
        sub(".*(S1 obj|S2 match|S2 nomatch).*", meta, replacement = "\\1"),
 | 
						|
        levels = c("S1 obj", "S2 match", "S2 nomatch")
 | 
						|
    )
 | 
						|
 | 
						|
    # Concatinate individual trials in a 3D array (based on standardized order)
 | 
						|
    X <- matrix(unlist(X), 256 * 64)
 | 
						|
 | 
						|
    # Track for reporting the nr. of non-finite values
 | 
						|
    nr_non_finite <- sum(!is.finite(X))
 | 
						|
 | 
						|
    # Compute mean over trials grouped by trial condition
 | 
						|
    X <- c(
 | 
						|
        rowMeans(X[, condition == "S1 obj",     drop = FALSE], na.rm = TRUE),
 | 
						|
        rowMeans(X[, condition == "S2 match",   drop = FALSE], na.rm = TRUE),
 | 
						|
        rowMeans(X[, condition == "S2 nomatch", drop = FALSE], na.rm = TRUE)
 | 
						|
    )
 | 
						|
 | 
						|
    # store mean of trials of current subject in list of subjects
 | 
						|
    subjects[[subject]] <- X
 | 
						|
 | 
						|
    # Remove/Delete subject file and decompressed folder
 | 
						|
    unlink(file.path(tmpdir, subject), recursive = TRUE)
 | 
						|
    unlink(file.path(tmpdir, sprintf("%s.tar.gz", subject)))
 | 
						|
 | 
						|
    # Report progress
 | 
						|
    cat(sprintf(
 | 
						|
        "%5d/%d - Nr. trials: %3d = %3d + %3d + %3d%s%s\n",
 | 
						|
        i, length(subjects),
 | 
						|
        length(condition),
 | 
						|
        sum(condition == "S1 obj"),
 | 
						|
        sum(condition == "S2 match"),
 | 
						|
        sum(condition == "S2 nomatch"),
 | 
						|
        if (nr_non_finite) sprintf(", Nr. non-finite: %d", nr_non_finite) else "",
 | 
						|
        if (length(file_error_idx) && length(notice_error_idx)) {
 | 
						|
            sprintf(", %d file and %d notice errors (%s) -> trials dropped",
 | 
						|
                length(file_error_idx), length(notice_error_idx), subject
 | 
						|
            )
 | 
						|
        } else if (length(file_error_idx)) {
 | 
						|
            sprintf(", %d file errors (%s) -> trials dropped",
 | 
						|
                length(file_error_idx), subject
 | 
						|
            )
 | 
						|
        } else if (length(notice_error_idx)) {
 | 
						|
            sprintf(", %d notice errors (%s) -> trials dropped",
 | 
						|
                length(notice_error_idx), subject
 | 
						|
            )
 | 
						|
        } else {
 | 
						|
            ""
 | 
						|
        }
 | 
						|
    ))
 | 
						|
}
 | 
						|
 | 
						|
# Combine subjects in single 4D tensor (time x sensor x condition x subject)
 | 
						|
X <- array(
 | 
						|
    unlist(subjects),
 | 
						|
    dim = c(time = 256, sensor = 64, condition = 3, subject = length(subjects)),
 | 
						|
    dimnames = list(
 | 
						|
        time = 1:256,
 | 
						|
        sensor = sensors,
 | 
						|
        condition = c("S1 obj", "S2 match", "S2 nomatch"),
 | 
						|
        subject = names(subjects)
 | 
						|
    )
 | 
						|
)
 | 
						|
 | 
						|
# Extract alcoholic or control labels for each subject
 | 
						|
y <- factor(
 | 
						|
    substr(names(subjects), 4, 4),
 | 
						|
    levels = c("c", "a"),
 | 
						|
    labels = c("control", "alcoholic")
 | 
						|
)
 | 
						|
names(y) <- names(subjects)
 | 
						|
 | 
						|
# Save full processed EEG dataset as R data file
 | 
						|
saveRDS(list(X = X, y = y), file = "eeg_data_3d.rds")
 | 
						|
saveRDS(list(X = X[, , "S1 obj", ], y = y), file = "eeg_data_2d.rds")
 |