Skip to content

Commit

Permalink
more metadata stuff
Browse files Browse the repository at this point in the history
  • Loading branch information
raphywink committed Nov 2, 2020
1 parent 368782a commit e1c0bc1
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 19 deletions.
45 changes: 31 additions & 14 deletions R/emuR-database.flatfiledata.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,24 @@ read_and_join_key_value_tsv <- function(emuDBhandle,
file,
x,
bundleName,
sessionName){
sessionName,
delim = "\t"){
if(file.exists(file)){
key_value_tsv = readr::read_tsv(file, col_types = readr::cols())
browser()
key_value_tsv = readr::read_delim(file,
col_types = readr::cols(),
delim)
if(all(names(key_value_tsv) == c("key", "value"))){
key_value_tsv_pivoted = tidyr::pivot_wider(key_value_tsv, names_from = "key", values_from = "value")
if(missing(bundleName) && missing(sessionName)){
res = dplyr::full_join(x, key_value_tsv_pivoted, by = character())
} else if(missing(bundleName) && !missing(sessionName)) {
# join by session
browser()
key_value_tsv_pivoted$session = sessionName
res = dplyr::left_join(x, key_value_tsv_pivoted, by = "session")
}
} else if(!missing(bundleName) && !missing(sessionName)){
stop("handeling bundle key value pairs not implemented yet!")
}
} else {
stop(path2tsv, " doesn't only contain the columns 'key' and 'value'. Only these two columns are permitted!")
}
Expand All @@ -38,7 +43,18 @@ read_and_join_long_tsv <- function(emuDBhandle, file, x){
}
}

##' Join flat file .tsv data to x
##' Join flat file data (UTF-8 .tsv/.csv files) to x
##'
##' Function to join flat file data that is present within
##' the directories of an emuDB to a tibble/data.frame object
##' usually either produced by \link{query} or \link{get_trackdata}. As
##' it uses the "session" and "bundle" columns to perform the joins these
##' have to be present in x.
##'
##' This function recognizes 2 types flat files files:
##' \itemize{
##' \item *_keyValue files
##' }
##'
##' @param emuDBhandle emuDB handle object (see \link{load_emuDB})
##' @param sessionPattern A regular expression pattern matching session names to be searched from the database
Expand All @@ -53,10 +69,11 @@ read_and_join_long_tsv <- function(emuDBhandle, file, x){
##' }
##' \code{session}, \code{b}
##' @export
join_tsvs <- function(emuDBhandle,
x,
sessionPattern = '.*',
bundlePattern = '.*'){
join_flatFileData <- function(emuDBhandle,
x,
sessionPattern = '.*',
bundlePattern = '.*',
fileExtension = '.tsv'){

# gen. strat. move from bundles to session to emuDB level
all_bundles = list_bundles(emuDBhandle)
Expand All @@ -66,24 +83,24 @@ join_tsvs <- function(emuDBhandle,
all_bundles = all_bundles[
grepl(pattern = sessionPattern, x = all_bundles, perl = T)
& grepl(pattern = bundlePattern, x = all_bundles, perl = T)
]
]

##############################
# handle emuDB level

# get keyValue tsv file on emuDB level
path2tsv = file.path(emuDBhandle$basePath, paste0(emuDBhandle$dbName, "_keyValue.", "tsv"))
path2tsv = file.path(emuDBhandle$basePath, paste0(emuDBhandle$dbName, "_keyValue", fileExtension))
x = read_and_join_key_value_tsv(emuDBhandle, file = path2tsv, x = x)

# get long tsv file on emuDB level
path2tsv = file.path(emuDBhandle$basePath, paste0(emuDBhandle$dbName, "_long.", "tsv"))
path2tsv = file.path(emuDBhandle$basePath, paste0(emuDBhandle$dbName, "_long", fileExtension))
x = read_and_join_long_tsv(emuDBhandle, file = path2tsv, x)

##############################
# handle session level
for(session_name in unique(all_bundles$session)){
# get keyValue tsv file on session level
path2tsv = file.path(emuDBhandle$basePath, paste0(session_name, session.suffix), paste0(session_name, "_keyValue.", "tsv"))
path2tsv = file.path(emuDBhandle$basePath, paste0(session_name, session.suffix), paste0(session_name, "_keyValue", fileExtension))
x = read_and_join_key_value_tsv(emuDBhandle, file = path2tsv, x = x, sessionName = session_name)
}

Expand Down
10 changes: 5 additions & 5 deletions tests/testthat/test_emuR-database.flatfiledata.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ db = load_emuDB(path2db,
inMemoryCache = internalVars$testingVars$inMemoryCache,
verbose = F)

test_that("join_tsvs works on emuDB level", {
test_that("join_flatFileData works on emuDB level", {

# key value emuDB data
flat_data = tibble::tibble(key = c("location", "institution"), value = c("Muenchen", "IPS"))
Expand All @@ -40,7 +40,7 @@ test_that("join_tsvs works on emuDB level", {

sl = query(db, "Phonetic == S")

sl_joined = join_tsvs(db, sl)
sl_joined = join_flatFileData(db, sl)

expect_true(all(c("location", "institution") %in% names(sl_joined)))

Expand All @@ -56,11 +56,11 @@ test_that("join_tsvs works on emuDB level", {

readr::write_tsv(x = long_data, file = file.path(db$basePath, paste0(db$dbName, "_long.", "tsv")))

sl_joined = join_tsvs(db, sl)
sl_joined = join_flatFileData(db, sl)
expect_equal(length(which(is.na(sl_joined$eyecolor))), 7)
})

test_that("join_tsvs works on session level", {
test_that("join_flatFileData works on session level", {

# key value session data
flat_data = tibble::tibble(key = c("location", "fudge", "speed"), value = c("Muenchen", "yummy", "fast"))
Expand All @@ -69,7 +69,7 @@ test_that("join_tsvs works on session level", {

sl = query(db, "Phonetic == S")

sl_joined = join_tsvs(db, sl)
sl_joined = join_flatFileData(db, sl)

expect_true(all(c("location.x", "location.y") %in% names(sl_joined)))

Expand Down

0 comments on commit e1c0bc1

Please sign in to comment.