Skip to content

Commit

Permalink
fix: refactor get_keywords() #2
Browse files Browse the repository at this point in the history
  • Loading branch information
atn38 committed Apr 15, 2022
1 parent cfba02e commit 019eaed
Showing 1 changed file with 54 additions and 54 deletions.
108 changes: 54 additions & 54 deletions R/get_keywords.R
Original file line number Diff line number Diff line change
@@ -1,66 +1,66 @@

#' Title
#'
#' @param corpus
#'
#' @return
#' @export
#'
#'
#'
#'
#'
#'
#'
#'
#'
#'
#'
#'
#'

#' @examples
get_keywords <- function(corpus) {
vw_keywords <- data.frame()
keysetss <- list()

vw <- list()
for (i in seq_along(corpus)) {
pk <- get_pk(names(corpus)[[i]])
pk <- parse_packageId(names(corpus)[[i]])
scope <- pk[["scope"]]
id <- pk[["id"]]
rev <- pk[["rev"]]
klist <- list()
if ("keywordSet" %in% names(corpus[[i]][["dataset"]])) {
keysets <- handle_one(corpus[[i]][["dataset"]][["keywordSet"]])
klist <-
data.table::rbindlist(lapply(seq_along(keysets), function(x) parse_keywordset(keysets[[x]])), fill = TRUE)
klist$id <- id
klist$scope <- scope
klist$rev <- rev
klist <- subset(klist, select = c(4:6, 1:3))
}
vw[[i]] <- klist
}
return(data.table::rbindlist(vw))
}

keysets <- corpus[[i]][["dataset"]][["keywordSet"]]
if (!is.null(names(keysets))) keysets <- list(keysets)
for (j in seq_along(keysets)) {
thesaurus <- keysets[[j]][["keywordThesaurus"]]
if (is.null(thesaurus))
thesaurus <- "none"

keywords <- keysets[[j]][["keyword"]]
if (!is.null(names(keywords))) keywords <- list(keywords)
# return(keywords)
for (k in seq_along(keywords)){
keyk <- keywords[[k]]

# sometimes there are no keyword types and the keyword list is unnamed
if (is.null(names(keyk))) {
keyk <- list(keyword = keyk)
key_type <- NA
} else key_type <- keyk[["keywordType"]]


keydf <- data.frame(
scope = scope,
id = id,
rev = rev,

# subscript out of bounds here. not sure why.
keyword = keyk[["keyword"]],
keywordtype = key_type,
keyword_thesaurus = thesaurus,
stringsAsFactors = F
)
#' Title
#'
#' @param x
#'
#' @return
#'
#' @examples
parse_keywordset <- function(x) {
keywords <- handle_one(x[["keyword"]])
df <- data.table::rbindlist(lapply(seq_along(keywords), function(x) parse_keyword(keywords[[x]])))
df$thesaurus <- null2na(x[["keywordThesaurus"]])
return(df)
}

vw_keywords <- rbind(vw_keywords, keydf)
}
}
# keysetss <- c(keysetss, keysets)
#' Title
#'
#' @param x
#'
#' @return
#'
#' @examples
parse_keyword <- function(x) {
# sometimes there are no keyword types and the keyword list is unnamed
if (is.null(names(x))) {
x <- list(keyword = x)
key_type <- NA
} else key_type <- x[["keywordType"]]

}
return(vw_keywords)
data.frame(
# subscript out of bounds here. not sure why.
keyword = x[["keyword"]],
keywordtype = key_type,
stringsAsFactors = F
)
}

0 comments on commit 019eaed

Please sign in to comment.