From a199ab76c853f5153fc14f47e56d5a58a8e20911 Mon Sep 17 00:00:00 2001 From: Jeanette Clark Date: Thu, 15 Aug 2024 17:18:50 +0000 Subject: [PATCH] make the write citations function more robust --- R/write_citation_pairs.R | 120 +++++++++++++++++++++------------------ 1 file changed, 65 insertions(+), 55 deletions(-) diff --git a/R/write_citation_pairs.R b/R/write_citation_pairs.R index 5093a02..b2dd2ab 100644 --- a/R/write_citation_pairs.R +++ b/R/write_citation_pairs.R @@ -13,59 +13,69 @@ #' } write_citation_pairs <- function(citation_list, path) { - if (any(!(c("article_id", "dataset_id") %in% names(citation_list)))) { - stop( - .call = FALSE, - "citations_list data.frame does not contain variables article_id and/or dataset_id" - ) - } - - # write list of citations to bib format - bib <- - rcrossref::cr_cn(dois = citation_list$article_id, format = "bibtex") - - t <- tempfile() - writeLines(unlist(bib), t) - - # import as a dataframe - df <- bib2df::bib2df(t) - - # assign article_id to data.frame - df$dataset_id <- citation_list$dataset_id - - # rename for database ingest - cit_full <- df %>% - dplyr::rename( - target_id = .data$dataset_id, - source_id = .data$DOI, - source_url = .data$URL, - origin = .data$AUTHOR, - title = .data$TITLE, - publisher = .data$PUBLISHER, - journal = .data$JOURNAL, - volume = .data$VOLUME, - page = .data$PAGES, - year_of_publishing = .data$YEAR - ) %>% - dplyr::select( - .data$target_id, - .data$source_id, - .data$source_url, - .data$origin, - .data$title, - .data$publisher, - .data$journal, - .data$volume, - .data$page, - .data$year_of_publishing - ) %>% - dplyr::mutate( - id = NA, - report = NA, - metadata = NA, - link_publication_date = Sys.Date() - ) #%>% - #dplyr::mutate(publisher = ifelse(.data$publisher == "Elsevier {BV", "Elsevier", "Copernicus")) - - jsonlite::write_json(cit_full, path) + if (any(!(c("article_id", "dataset_id") %in% names(citation_list)))) { + stop( + .call = FALSE, + "citations_list data.frame does not contain variables article_id and/or dataset_id" + ) + } + + # write list of citations to bib format + bib_full <- c() + for (i in 1:nrow(citation_list)){ + bib <- tryCatch({ + trimws(rcrossref::cr_cn(dois = citation_list$article_id[i], format = "bibtex")) + }, error = function(e) { + paste0("@article{ERROR, title={", conditionMessage(e), "}, volume={ERROR}, ISSN={ERROR}, url={ERROR}, DOI={", citation_list$article_id[i], "}, number={ERROR}, journal={ERROR}, publisher={ERROR}, author={ERROR}, year={1900}, month=dec, pages={ERROR} }\n") + }, warning = function(w) { + paste0("@article{WARNING, title={", conditionMessage(w), "}, volume={WARNING}, ISSN={WARNING}, url={WARNING}, DOI={", citation_list$article_id[i], "}, number={WARNING}, journal={WARNING}, publisher={WARNING}, author={WARNING}, year={1900}, month=dec, pages={WARNING} }\n") + }) + bib_full <- paste(bib_full, bib, sep = "\n") + } + + + t <- tempfile() + writeLines(bib_full, t) + + # import as a dataframe + df <- bib2df::bib2df(t) + + # assign article_id to data.frame + df$dataset_id <- citation_list$dataset_id + + # rename for database ingest + cit_full <- df %>% + dplyr::rename( + target_id = .data$dataset_id, + source_id = .data$DOI, + source_url = .data$URL, + origin = .data$AUTHOR, + title = .data$TITLE, + publisher = .data$PUBLISHER, + journal = .data$JOURNAL, + volume = .data$VOLUME, + page = .data$PAGES, + year_of_publishing = .data$YEAR + ) %>% + dplyr::select( + .data$target_id, + .data$source_id, + .data$source_url, + .data$origin, + .data$title, + .data$publisher, + .data$journal, + .data$volume, + .data$page, + .data$year_of_publishing + ) %>% + dplyr::mutate( + id = NA, + report = NA, + metadata = NA, + link_publication_date = Sys.Date() + ) #%>% + #dplyr::mutate(publisher = ifelse(.data$publisher == "Elsevier {BV", "Elsevier", "Copernicus")) + + jsonlite::write_json(cit_full, path) }