Skip to content

Commit

Permalink
more flat data join stuff
Browse files Browse the repository at this point in the history
  • Loading branch information
raphywink committed Oct 30, 2020
1 parent 7c3fc96 commit 368782a
Show file tree
Hide file tree
Showing 2 changed files with 111 additions and 57 deletions.
80 changes: 51 additions & 29 deletions R/emuR-database.flatfiledata.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,44 @@
##' List sample rates of media and annotation (_annot.json) files
read_and_join_key_value_tsv <- function(emuDBhandle,
file,
x,
bundleName,
sessionName){
if(file.exists(file)){
key_value_tsv = readr::read_tsv(file, col_types = readr::cols())
if(all(names(key_value_tsv) == c("key", "value"))){
key_value_tsv_pivoted = tidyr::pivot_wider(key_value_tsv, names_from = "key", values_from = "value")
if(missing(bundleName) && missing(sessionName)){
res = dplyr::full_join(x, key_value_tsv_pivoted, by = character())
} else if(missing(bundleName) && !missing(sessionName)) {
# join by session
browser()
key_value_tsv_pivoted$session = sessionName
res = dplyr::left_join(x, key_value_tsv_pivoted, by = "session")
}
} else {
stop(path2tsv, " doesn't only contain the columns 'key' and 'value'. Only these two columns are permitted!")
}
return(res)
}else{
return(x)
}
}

read_and_join_long_tsv <- function(emuDBhandle, file, x){
if(file.exists(file)){
long_tsv = readr::read_tsv(file, col_types = readr::cols())
if(all(c("session", "bundle") %in% names(long_tsv))){
res = dplyr::left_join(x, long_tsv, by = c("bundle", "session"))
} else {
stop(file, " doesn't only contain the columns 'session' and 'value'. Only these two columns are permitted!")
}
return(res)
} else {
return(x)
}
}

##' Join flat file .tsv data to x
##'
##' @param emuDBhandle emuDB handle object (see \link{load_emuDB})
##' @param sessionPattern A regular expression pattern matching session names to be searched from the database
Expand Down Expand Up @@ -28,44 +68,26 @@ join_tsvs <- function(emuDBhandle,
& grepl(pattern = bundlePattern, x = all_bundles, perl = T)
]


##############################
# handle session level

for(sessio_name in unique(all_bundles$session)){
# get keyValue tsv file on session level
path2tsv = file.path(emuDBhandle$basePath, sessio_name, paste0(sessio_name, "_keyValue.", "tsv"))
}

##############################
# handle emuDB level

# get keyValue tsv file on emuDB level
#browser()
path2tsv = file.path(emuDBhandle$basePath, paste0(emuDBhandle$dbName, "_keyValue.", "tsv"))
if(file.exists(path2tsv)){
key_value_tsv = readr::read_tsv(path2tsv, col_types = readr::cols())
if(all(names(key_value_tsv) == c("key", "value"))){
res = dplyr::full_join(x, key_value_tsv, by = character())
} else {
stop(path2tsv, " doesn't only contain the columns 'key' and 'value'. Only these two columns are permitted!")
}

}
x = read_and_join_key_value_tsv(emuDBhandle, file = path2tsv, x = x)

# get long tsv file on emuDB level
path2tsv = file.path(emuDBhandle$basePath, paste0(emuDBhandle$dbName, "_long.", "tsv"))
if(file.exists(path2tsv)){
long_tsv = readr::read_tsv(path2tsv, col_types = readr::cols())
if(all(c("session", "bundle") %in% names(long_tsv))){
res = dplyr::left_join(res, long_tsv, by = c("bundle", "session"))
} else {
stop(path2tsv, " doesn't only contain the columns 'key' and 'value'. Only these two columns are permitted!")
}
}
x = read_and_join_long_tsv(emuDBhandle, file = path2tsv, x)

##############################
# handle session level
for(session_name in unique(all_bundles$session)){
# get keyValue tsv file on session level
path2tsv = file.path(emuDBhandle$basePath, paste0(session_name, session.suffix), paste0(session_name, "_keyValue.", "tsv"))
x = read_and_join_key_value_tsv(emuDBhandle, file = path2tsv, x = x, sessionName = session_name)
}

return(res)
return(x)

}

Expand Down
88 changes: 60 additions & 28 deletions tests/testthat/test_emuR-database.flatfiledata.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,45 +16,37 @@ unlink(path2db, recursive = T)
file.copy(path2orig,
path2testData,
recursive = T)

# create a second session
dir.create(file.path(path2db, "0001_ses"))
file.copy(from = list.files(file.path(path2db, "0000_ses"), full.names = T),
to = file.path(path2db, "0001_ses"),
recursive = T)

# extract internalVars from environment .emuR_pkgEnv
internalVars = get("internalVars",
envir = .emuR_pkgEnv)

db = load_emuDB(path2db,
inMemoryCache = internalVars$testingVars$inMemoryCache,
verbose = F)


test_that("join_tsvs works on emuDB level", {

# key value emuDB data
flat_data = tibble::tibble(key = c("location of creation", "institution"), value = c("Muenchen", "IPS"))
flat_data = tibble::tibble(key = c("location", "institution"), value = c("Muenchen", "IPS"))

readr::write_tsv(x = flat_data, file = file.path(db$basePath, paste0(db$dbName, "_keyValue.", "tsv")))

sl = query(db, "Phonetic == S")

sl_joined = join_tsvs(db, sl)

expect_true(all(c("key", "value") %in% names(sl_joined)))

expect_equal(sl_joined$key, c("location of creation",
"institution",
"location of creation",
"institution",
"location of creation",
"institution",
"location of creation",
"institution",
"location of creation",
"institution"))

expect_equal(sl_joined$value, c("Muenchen",
"IPS",
"Muenchen",
"IPS",
"Muenchen",
"IPS",
"Muenchen",
"IPS",
"Muenchen",
"IPS"))
expect_true(all(c("location", "institution") %in% names(sl_joined)))

expect_equal(sl_joined$location, rep("Muenchen", 10))

expect_equal(sl_joined$institution, rep("IPS", 10))

# long emuDB data
# specify session and bundle
Expand All @@ -65,8 +57,48 @@ test_that("join_tsvs works on emuDB level", {
readr::write_tsv(x = long_data, file = file.path(db$basePath, paste0(db$dbName, "_long.", "tsv")))

sl_joined = join_tsvs(db, sl)
# 4 NAs
expect_equal(length(which(is.na(sl_joined$eyecolor))), 4)
expect_equal(length(which(is.na(sl_joined$eyecolor))), 7)
})

test_that("join_tsvs works on session level", {

# key value session data
flat_data = tibble::tibble(key = c("location", "fudge", "speed"), value = c("Muenchen", "yummy", "fast"))

readr::write_tsv(x = flat_data, file = file.path(db$basePath, "0000_ses", paste0("0000", "_keyValue.", "tsv")))

sl = query(db, "Phonetic == S")

sl_joined = join_tsvs(db, sl)

expect_true(all(c("location.x", "location.y") %in% names(sl_joined)))

expect_equal(length(which(is.na(sl_joined$location.x))), 5)
expect_equal(length(which(is.na(sl_joined$fudge))), 5)
expect_equal(length(which(is.na(sl_joined$speed))), 5)

})
#
# expect_equal(sl_joined$location, c("Muenchen",
# "Muenchen",
# "Muenchen",
# "Muenchen",
# "Muenchen"))
#
# expect_equal(sl_joined$institution, c("IPS",
# "IPS",
# "IPS",
# "IPS",
# "IPS"))
#
# # long emuDB data
# # specify session and bundle
# long_data = tibble::tibble(session = c("0000", "0000"),
# bundle = c("msajc003", "msajc012"),
# eyecolor = c("blue", "brown"))
#
# readr::write_tsv(x = long_data, file = file.path(db$basePath, paste0(db$dbName, "_long.", "tsv")))
#
# sl_joined = join_tsvs(db, sl)
# # 4 NAs
# expect_equal(length(which(is.na(sl_joined$eyecolor))), 2)
})

0 comments on commit 368782a

Please sign in to comment.