From e1c0bc1b6ab2dd9d030bedd793084ff7e05d0fa3 Mon Sep 17 00:00:00 2001 From: Raphael Winkelmann Date: Mon, 2 Nov 2020 18:21:00 +0100 Subject: [PATCH] more metadata stuff --- R/emuR-database.flatfiledata.R | 45 +++++++++++++------ .../test_emuR-database.flatfiledata.R | 10 ++--- 2 files changed, 36 insertions(+), 19 deletions(-) diff --git a/R/emuR-database.flatfiledata.R b/R/emuR-database.flatfiledata.R index c5779313..9730abeb 100644 --- a/R/emuR-database.flatfiledata.R +++ b/R/emuR-database.flatfiledata.R @@ -2,19 +2,24 @@ read_and_join_key_value_tsv <- function(emuDBhandle, file, x, bundleName, - sessionName){ + sessionName, + delim = "\t"){ if(file.exists(file)){ - key_value_tsv = readr::read_tsv(file, col_types = readr::cols()) + browser() + key_value_tsv = readr::read_delim(file, + col_types = readr::cols(), + delim) if(all(names(key_value_tsv) == c("key", "value"))){ key_value_tsv_pivoted = tidyr::pivot_wider(key_value_tsv, names_from = "key", values_from = "value") if(missing(bundleName) && missing(sessionName)){ res = dplyr::full_join(x, key_value_tsv_pivoted, by = character()) } else if(missing(bundleName) && !missing(sessionName)) { # join by session - browser() key_value_tsv_pivoted$session = sessionName res = dplyr::left_join(x, key_value_tsv_pivoted, by = "session") - } + } else if(!missing(bundleName) && !missing(sessionName)){ + stop("handeling bundle key value pairs not implemented yet!") + } } else { stop(path2tsv, " doesn't only contain the columns 'key' and 'value'. Only these two columns are permitted!") } @@ -38,7 +43,18 @@ read_and_join_long_tsv <- function(emuDBhandle, file, x){ } } -##' Join flat file .tsv data to x +##' Join flat file data (UTF-8 .tsv/.csv files) to x +##' +##' Function to join flat file data that is present within +##' the directories of an emuDB to a tibble/data.frame object +##' usually either produced by \link{query} or \link{get_trackdata}. As +##' it uses the "session" and "bundle" columns to perform the joins these +##' have to be present in x. +##' +##' This function recognizes 2 types flat files files: +##' \itemize{ +##' \item *_keyValue files +##' } ##' ##' @param emuDBhandle emuDB handle object (see \link{load_emuDB}) ##' @param sessionPattern A regular expression pattern matching session names to be searched from the database @@ -53,10 +69,11 @@ read_and_join_long_tsv <- function(emuDBhandle, file, x){ ##' } ##' \code{session}, \code{b} ##' @export -join_tsvs <- function(emuDBhandle, - x, - sessionPattern = '.*', - bundlePattern = '.*'){ +join_flatFileData <- function(emuDBhandle, + x, + sessionPattern = '.*', + bundlePattern = '.*', + fileExtension = '.tsv'){ # gen. strat. move from bundles to session to emuDB level all_bundles = list_bundles(emuDBhandle) @@ -66,24 +83,24 @@ join_tsvs <- function(emuDBhandle, all_bundles = all_bundles[ grepl(pattern = sessionPattern, x = all_bundles, perl = T) & grepl(pattern = bundlePattern, x = all_bundles, perl = T) - ] + ] ############################## # handle emuDB level # get keyValue tsv file on emuDB level - path2tsv = file.path(emuDBhandle$basePath, paste0(emuDBhandle$dbName, "_keyValue.", "tsv")) + path2tsv = file.path(emuDBhandle$basePath, paste0(emuDBhandle$dbName, "_keyValue", fileExtension)) x = read_and_join_key_value_tsv(emuDBhandle, file = path2tsv, x = x) - + # get long tsv file on emuDB level - path2tsv = file.path(emuDBhandle$basePath, paste0(emuDBhandle$dbName, "_long.", "tsv")) + path2tsv = file.path(emuDBhandle$basePath, paste0(emuDBhandle$dbName, "_long", fileExtension)) x = read_and_join_long_tsv(emuDBhandle, file = path2tsv, x) ############################## # handle session level for(session_name in unique(all_bundles$session)){ # get keyValue tsv file on session level - path2tsv = file.path(emuDBhandle$basePath, paste0(session_name, session.suffix), paste0(session_name, "_keyValue.", "tsv")) + path2tsv = file.path(emuDBhandle$basePath, paste0(session_name, session.suffix), paste0(session_name, "_keyValue", fileExtension)) x = read_and_join_key_value_tsv(emuDBhandle, file = path2tsv, x = x, sessionName = session_name) } diff --git a/tests/testthat/test_emuR-database.flatfiledata.R b/tests/testthat/test_emuR-database.flatfiledata.R index 8d751a43..61e02c5d 100644 --- a/tests/testthat/test_emuR-database.flatfiledata.R +++ b/tests/testthat/test_emuR-database.flatfiledata.R @@ -31,7 +31,7 @@ db = load_emuDB(path2db, inMemoryCache = internalVars$testingVars$inMemoryCache, verbose = F) -test_that("join_tsvs works on emuDB level", { +test_that("join_flatFileData works on emuDB level", { # key value emuDB data flat_data = tibble::tibble(key = c("location", "institution"), value = c("Muenchen", "IPS")) @@ -40,7 +40,7 @@ test_that("join_tsvs works on emuDB level", { sl = query(db, "Phonetic == S") - sl_joined = join_tsvs(db, sl) + sl_joined = join_flatFileData(db, sl) expect_true(all(c("location", "institution") %in% names(sl_joined))) @@ -56,11 +56,11 @@ test_that("join_tsvs works on emuDB level", { readr::write_tsv(x = long_data, file = file.path(db$basePath, paste0(db$dbName, "_long.", "tsv"))) - sl_joined = join_tsvs(db, sl) + sl_joined = join_flatFileData(db, sl) expect_equal(length(which(is.na(sl_joined$eyecolor))), 7) }) -test_that("join_tsvs works on session level", { +test_that("join_flatFileData works on session level", { # key value session data flat_data = tibble::tibble(key = c("location", "fudge", "speed"), value = c("Muenchen", "yummy", "fast")) @@ -69,7 +69,7 @@ test_that("join_tsvs works on session level", { sl = query(db, "Phonetic == S") - sl_joined = join_tsvs(db, sl) + sl_joined = join_flatFileData(db, sl) expect_true(all(c("location.x", "location.y") %in% names(sl_joined)))