Skip to content

Commit

Permalink
fix: get_attributes() now works and works pretty well! handles all po…
Browse files Browse the repository at this point in the history
…ssible numbers of entities as well and should keep IDs consistent. #5 #6 #3
  • Loading branch information
atn38 committed Apr 7, 2022
1 parent 508ad4d commit 89b72ca
Showing 1 changed file with 166 additions and 33 deletions.
199 changes: 166 additions & 33 deletions R/get_attributes.R
Original file line number Diff line number Diff line change
@@ -1,53 +1,186 @@







#' Title
#'
#' @param corpus
#'
#' @return
#' @export
#'
#'
#'
#'
#'
#'
#'
#'
#'
#'
#' only data table attributes for now

#' @examples
get_attributes <- function(corpus) {
vw_att <- data.frame()
vw_att <- list()

for (i in seq_along(corpus)) {
pk <- get_pk(names(corpus)[[i]])
eml <- corpus[[i]]
pk <- parse_packageId(names(corpus)[[i]])
scope <- pk[["scope"]]
id <- pk[["id"]]
rev <- pk[["rev"]]

dts <-
handle_one(purrr::compact(corpus[[i]][["dataset"]][["dataTable"]]))
ent_groups <-
purrr::compact(eml[["dataset"]][c("dataTable",
"spatialVector",
"spatialRaster",
"otherEntity",
"view")])
# exit if no entities found
if (is.null(ent_groups))
return()
groupdf <- list()
for (j in seq_along(ent_groups)) {
ents <- handle_one(ent_groups[[j]])

for (j in seq_along(dts)) {
dt <- dts[[j]]
att_list <- lapply(seq_along(ents), function(x) {
ent <- ents[[x]]
if ("attributeList" %in% names(ent)) {
attdf <-
parse_attributeList(x = ent[["attributeList"]])#,
#eml = corpus[[i]]))
n <-
nrow(attdf)
#print(paste(i, j, how_many))
#print(attdf$attributes)
info <- data.frame(
stringsAsFactors = F,
scope = rep(scope, n),
datasetid = rep(id, n),
rev = rep(rev, n),
entity = rep(paste0(j, x), n),
entitytype = paste0(names(ent_groups)[[j]])
)
return(cbind(info, attdf))
}
})
groupdf[[j]] <- data.table::rbindlist(att_list, fill = TRUE)
}
vw_att[[i]] <- data.table::rbindlist(groupdf, fill = TRUE)
}
return(data.table::rbindlist(vw_att, fill = TRUE))
}

if (!is.null(dt[["attributeList"]])) {
attdf <- EML::get_attributes(x = dt[["attributeList"]],
eml = corpus[[i]])
how_many <- nrow(attdf[["attributes"]])
# print(how_many)
attdf2 <- data.frame(
stringsAsFactors = F,
scope = rep(scope, how_many),
id = rep(id, how_many),
rev = rep(rev, how_many),
entityposition = rep(j, how_many)
)

# print(str(attdf[["attributes"]]))

attdf <- dplyr::bind_cols(attdf2,attdf[["attributes"]])

vw_att <- dplyr::bind_rows(vw_att, attdf)

#' Title
#'
#' @param eml
#' @param x (list) attributeList EML node
#'
#' @return
#'
#' @examples
parse_attributeList <- function(x, eml = NULL) {
attributeList <- x
## check to make sure input appears to be an attributeList
if (!("attribute" %in% names(attributeList)) &
is.null(attributeList$references)) {
stop(call. = FALSE,
"Input does not appear to be an attributeList.")
}
## if the attributeList is referenced, get reference
if (!is.null(attributeList$references)) {
if (is.null(eml)) {
warning(
"The attributeList entered is referenced somewhere else in the eml. ",
"No eml was entered to find the attributes. ",
"Please enter the eml to get better results."
)
eml <- x
}

all_attributeLists <- eml_get(eml, "attributeList")

for (attList in all_attributeLists) {
if (attList$id == attributeList$references) {
attributeList <- attList
break
}
}
}
attributes <- lapply(attributeList$attribute, parse_attribute)
attributes <- data.table::rbindlist(attributes, fill = T)
## remove non_fields in attributes
non_fields <- c("enforced",
"exclusive",
"order",
"references",
"scope",
"system",
"typeSystem",
"missingValueCode",
"missingValueCodeExplanation",
"propertyLabel",
"propertyURI",
"valueLabel",
"valueURI")
attributes <-
subset(attributes, select = !(names(attributes) %in% non_fields))
return(attributes)
}




#' Title
#'
#' @param x (list) attribute EML node
#'
#' @return
#'
#' @examples
parse_attribute <- function(x) {
## get full attribute list
att <- unlist(x, recursive = TRUE, use.names = TRUE)
measurementScale <- names(x$measurementScale)
domain <- names(x$measurementScale[[measurementScale]])

if (length(domain) == 1) {
## domain == "nonNumericDomain"
domain <-
names(x$measurementScale[[measurementScale]][[domain]])
}
domain <- domain[grepl("Domain", domain)]

if (measurementScale == "dateTime" & is.null(domain)) {
domain <- "dateTimeDomain"
}
return(vw_att)

att <-
c(att, measurementScale = measurementScale, domain = domain)

## separate factors
att <- att[!grepl("enumeratedDomain", names(att))]

## separate methods
att <- att[!grepl("methods", names(att))]

## Alter names to be consistent with other tools
names(att) <- gsub("missingValueCode.code",
"missingValueCode",
names(att),
fixed = TRUE)
names(att) <- gsub("standardUnit|customUnit",
"unit",
names(att))
## Alter names of annotation label fields for accessibility
names(att) <- gsub("annotation.valueURI.label",
"valueLabel",
names(att),
fixed = TRUE)
names(att) <- gsub("annotation.propertyURI.label",
"propertyLabel",
names(att),
fixed = TRUE)
names(att) <- gsub(".+\\.+",
"",
names(att))
att <- as.data.frame(t(att), stringsAsFactors = FALSE)
return(att)
}

0 comments on commit 89b72ca

Please sign in to comment.