Skip to content

Commit

Permalink
Merge pull request #287 from massimoaria/develop
Browse files Browse the repository at this point in the history
Develop to CRAN
  • Loading branch information
massimoaria authored Jan 13, 2023
2 parents 5e3c37c + 310c25d commit 75f663e
Show file tree
Hide file tree
Showing 33 changed files with 3,996 additions and 3,564 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,14 @@
.txt
.bib
.Ruserdata
.DS_Store
VOSviewer.jar
network.net
Rubbish
desktop.ini
vignette.txt
inst/doc
inst/biblioshiny/__MACOSX
inst/biblioshiny/rsconnect
_gh-pages

6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: bibliometrix
Type: Package
Title: Comprehensive Science Mapping Analysis
Version: 4.0.2
Version: 4.1.0
Authors@R: c(
person(given = "Massimo",
family = "Aria",
Expand Down Expand Up @@ -48,7 +48,7 @@ Imports: stats,
shiny,
SnowballC,
stringdist,
stringr,
stringi,
tidyr,
tidytext
Suggests:
Expand All @@ -59,6 +59,6 @@ Suggests:
shinycssloaders,
visNetwork,
wordcloud2
RoxygenNote: 7.2.1
RoxygenNote: 7.2.3
NeedsCompilation: no
Config/testthat/edition: 3
5 changes: 2 additions & 3 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ export(localCitations)
export(lotka)
export(mergeDbSources)
export(metaTagExtraction)
export(missingData)
export(net2VOSviewer)
export(networkPlot)
export(networkStat)
Expand Down Expand Up @@ -56,6 +57,7 @@ import(readr)
import(readxl)
import(shiny)
import(stats)
import(stringi)
import(tidytext)
importFrom(DT,DTOutput)
importFrom(DT,datatable)
Expand Down Expand Up @@ -294,9 +296,6 @@ importFrom(rscopus,author_df_orig)
importFrom(rscopus,author_search)
importFrom(rscopus,get_complete_author_info)
importFrom(stringdist,stringdistmatrix)
importFrom(stringr,str_extract_all)
importFrom(stringr,str_locate_all)
importFrom(stringr,str_replace_all)
importFrom(tidyr,drop_na)
importFrom(tidyr,gather)
importFrom(tidyr,pivot_longer)
Expand Down
14 changes: 14 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
bibliometrix v4.0.1 (Release date: 2023-01-13)

Features:
* Added a new function missingData() to check the completeness of metadata included in a bibliographic data frame
* Biblioshiny: Added the ability to create an excel report by adding step by step results of different analysis
* Biblioshiny: Added a popup that returns the results of the metadata completeness check of imported collections
* Biblioshiny: Revamped interface with floating options menu and more space for graphical analysis results

Changes:
* Several bug fixes
* Computational speed improvements



bibliometrix v4.0.1 (Release date: 2022-09-16)

Features:
Expand Down
4 changes: 3 additions & 1 deletion R/bib2df.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,10 @@ bib2df<-function(D, dbsource = "isi"){

if (dbsource == "isi") D <- gsub(" = \\{","={",D)

D <- gsub("\\\t","",gsub(" = \\{","=\\{",D)) # to work also with new scopus bib format

D[Papers] <- paste("Paper={",D[Papers],sep="")
#ii <- regexpr("\\{",D[Papers])

ind <- regexpr("=\\{",D) # sep among tags and contents
ind[Papers] <- 6

Expand Down
38 changes: 18 additions & 20 deletions R/conceptualStructure.R
Original file line number Diff line number Diff line change
Expand Up @@ -409,36 +409,34 @@ conceptualStructure<-function(M,field="ID", ngrams=1, method="MCA", quali.supp=N
b_doc_TC <- b_doc_TC + annotation_custom(logo, xmin = xl[1], xmax = xl[2], ymin = yl[1], ymax = yl[2])
##

params <- list(field = field,
ngrams = ngrams,
method=method,
quali.supp=quali.supp,
quanti.supp=quanti.supp,
minDegree=minDegree,
clust=clust,
k.max=k.max,
stemming = stemming,
labelsize=labelsize,
documents=documents,
graph=graph,
remove.terms = remove.terms,
synonyms = synonyms)

params <- data.frame(params=names(unlist(params)),values=unlist(params), row.names = NULL)

if (isTRUE(graph)){plot(b_doc_TC)}

semanticResults=list(net=CW,res=res.mca,km.res=km.res,graph_terms=b,graph_dendogram=b_dend,
graph_documents_Contrib=b_doc,graph_documents_TC=b_doc_TC,docCoord=docCoord,
params=params)
graph_documents_Contrib=b_doc,graph_documents_TC=b_doc_TC,docCoord=docCoord)

}else{

semanticResults=list(net=CW,res=res.mca,km.res=km.res,graph_terms=b,graph_dendogram=b_dend,
graph_documents_Contrib=NULL,graph_documents_TC=NULL,docCoord=NULL,
params=params)
graph_documents_Contrib=NULL,graph_documents_TC=NULL,docCoord=NULL)
}

params <- list(field = field,
ngrams = ngrams,
method=method,
quali.supp=quali.supp,
quanti.supp=quanti.supp,
minDegree=minDegree,
clust=clust,
k.max=k.max,
stemming = stemming,
labelsize=labelsize,
documents=documents,
graph=graph,
remove.terms = remove.terms,
synonyms = synonyms)

semanticResults$params <- data.frame(params=names(unlist(params)),values=unlist(params), row.names = NULL)


return(semanticResults)
Expand Down
19 changes: 17 additions & 2 deletions R/couplingMap.R
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,22 @@ couplingMap <- function(M, analysis = "documents", field="CR", n=500, label.term
row.names(df)=NULL
df <- df %>% rename(items = .data$words)

results=list(map=g, clusters=df, data=df_lab,nclust=dim(df)[1], NCS = D, net=Net)
params <- list(analysis = analysis,
field=field,
n=n,
minfreq=minfreq,
label.term=label.term,
ngrams=ngrams,
impact.measure=impact.measure,
stemming=stemming,
n.labels=n.labels,
size=size,
community.repulsion = community.repulsion,
repel=repel,
cluster=cluster)
params <- data.frame(params=names(unlist(params)),values=unlist(params), row.names = NULL)

results=list(map=g, clusters=df, data=df_lab,nclust=dim(df)[1], NCS = D, net=Net, params=params)
return(results)
}

Expand Down Expand Up @@ -315,7 +330,7 @@ labeling <- function(M, df_lab, term, n, n.labels, analysis, ngrams){

#clusters <- unique(df$Cluster)
#w <- character(length(clusters))

df$SR <- df[,1]
tab_global <- tableTag(df, term)
tab_global <- data.frame(label=names(tab_global),tot=as.numeric(tab_global), n=nrow(M),stringsAsFactors = FALSE)

Expand Down
9 changes: 7 additions & 2 deletions R/dimensions2df.R
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ dimensions2df <- function(file, format = "csv") {


postprocessingDim <- function(DATA) {
DATA <- data.frame(lapply(DATA, toupper), stringsAsFactors = FALSE)
# DATA <- data.frame(lapply(DATA, toupper), stringsAsFactors = FALSE)

## Converting original references in WOS format (AU, PY, SO, VOL, NUM, DOI)
if ("Cited.references" %in% names(DATA)) {
Expand Down Expand Up @@ -232,15 +232,20 @@ postprocessingDim <- function(DATA) {
if (("SO" %in% names(DATA)) & ("Anthology.title" %in% names(DATA))) {
ind <- which(is.na(DATA$SO) | DATA$SO=="")
DATA$SO[ind] <- DATA$Anthology.title[ind]
DATA$SO[DATA$SO==""] <- NA
DATA$SO[is.na(DATA$SO) | DATA$SO==""] <- "NA"
}

if (!("SO" %in% names(DATA))) {
DATA$SO <- "NA"
}

####
cat("\nCreating ISO Source names...")
DATA$JI <- sapply(DATA$SO, AbbrevTitle, USE.NAMES = FALSE)
DATA$J9 <- gsub("\\.","",DATA$JI)
####

DATA <- data.frame(lapply(DATA, toupper), stringsAsFactors = FALSE)

DATA$PY <- as.numeric(DATA$PY)

Expand Down
4 changes: 2 additions & 2 deletions R/histNetwork.R
Original file line number Diff line number Diff line change
Expand Up @@ -203,8 +203,8 @@ scopus <- function(M, min.citations, sep, network, verbose){
nCum <- c(1, cumsum(n[-length(n)]))
CR <- paste(CR, collapse = " ")

L <- str_locate_all(CR, TIpost)

#L <- str_locate_all(CR, TIpost)
L <- stringi::stri_locate_all_regex(CR,TIpost, omit_no_match = TRUE)

LCS <- lengths(L) / 2

Expand Down
19 changes: 4 additions & 15 deletions R/histPlot.R
Original file line number Diff line number Diff line change
Expand Up @@ -74,14 +74,14 @@ histPlot<-function(histResults, n=20, size = 5, labelsize = 5, title_as_label =

switch(label,
title={
title <- strsplit(stringr::str_to_title(V(bsk.network)$title), " ")
title <- strsplit(stringi::stri_trans_totitle(V(bsk.network)$title), " ")
V(bsk.network)$id <- unlist(lapply(title, function(l){
n <- floor(length(l)/2)
paste0(paste(l[1:n], collapse=" ", sep=""),"\n",paste(l[(n+1):length(l)], collapse=" ", sep=""))
}))
},
keywords={
kw <- strsplit(stringr::str_to_title(V(bsk.network)$keywords), ";")
kw <- strsplit(stringi::stri_trans_totitle(V(bsk.network)$keywords), ";")
kw[is.na(kw)] <- "Not Available"
V(bsk.network)$id <- unlist(lapply(kw, function(l){
if (length(l)>1){
Expand All @@ -92,7 +92,7 @@ histPlot<-function(histResults, n=20, size = 5, labelsize = 5, title_as_label =
}))
},
keywordsplus={
kw <- strsplit(stringr::str_to_title(V(bsk.network)$keywordsplus), ";")
kw <- strsplit(stringi::stri_trans_totitle(V(bsk.network)$keywordsplus), ";")
kw[is.na(kw)] <- "Not Available"
V(bsk.network)$id <- unlist(lapply(kw, function(l){
if (length(l)>1){
Expand All @@ -107,24 +107,13 @@ histPlot<-function(histResults, n=20, size = 5, labelsize = 5, title_as_label =
}
)

# if (isTRUE(title_as_label)){
# title <- strsplit(stringr::str_to_title(V(bsk.network)$title), " ")
# V(bsk.network)$id <- unlist(lapply(title, function(l){
# n <- floor(length(l)/2)
# paste0(paste(l[1:n], collapse=" ", sep=""),"\n",paste(l[(n+1):length(l)], collapse=" ", sep=""))
# }))
# #V(bsk.network)$id <- tolower(paste(substr(V(bsk.network)$title,1,50),"...",sep=""))
# } else {
# V(bsk.network)$id <- tolower(unlist(RR))
# }

# Compute node degrees (#links) and use that to set node size:
deg <- LCS
V(bsk.network)$size <- size
#rep(size,length(V(bsk.network)))}

#Years=histResults$histData$Year[ind]
Years <- as.numeric(unlist(str_extract_all(unlist(RR),"[[:digit:]]{4}$")))
Years <- as.numeric(unlist(stringi::stri_extract_all_regex(unlist(RR),"[[:digit:]]{4}$")))
V(bsk.network)$years <- Years

# Remove loops
Expand Down
4 changes: 2 additions & 2 deletions R/keywordGrowth.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ KeywordGrowth <- function(M, Tag = "ID", sep = ";", top=10, cdf=TRUE, remove.ter
A <- A %>%
mutate(
# Tab = str_replace_all(Tab, paste(sold[[i]], collapse="|",sep=""),snew[i])
Tab= str_replace_all(Tab, str_replace_all(str_replace_all(paste(sold[[i]], collapse="|",sep=""),"\\(","\\\\("),"\\)","\\\\)"),snew[i])

#Tab= str_replace_all(Tab, str_replace_all(str_replace_all(paste(sold[[i]], collapse="|",sep=""),"\\(","\\\\("),"\\)","\\\\)"),snew[i]),
Tab= stringi::stri_replace_all_regex(Tab, stringi::stri_replace_all_regex(stringi::stri_replace_all_regex(paste(sold[[i]], collapse="|",sep=""),"\\(","\\\\("),"\\)","\\\\)"),snew[i])
)
}
}
Expand Down
69 changes: 69 additions & 0 deletions R/missingData.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#' Completeness of bibliographic metadata
#'
#' It calculates the percentage of missing data in the metadata of a bibliographic data frame.
#'
#' Each metadata is assigned a status c("Excellent," "Good," "Acceptable", "Poor", "Critical," "Completely missing")
#' depending on the percentage of missing data. In particular, the column *status* classifies the percentage of missing
#' value in 5 categories: "Excellent" (0%), "Good" (0.01% to 10.00%), "Acceptable" (from 10.01% to 20.00%),
#' "Poor" (from 20.01% to 50.00%), "Critical" (from 50.01% to 99.99%), "Completely missing" (100%).
#'
#' The results of the function allow us to understand which analyses can be performed with bibliometrix
#' and which cannot based on the completeness (or status) of different metadata.
#' @param M is a bibliographic data frame obtained by \code{\link{convert2df}} function.
#'
#' @return The function \code{missingData} returns a list containing two objects:
#' \tabular{lll}{
#' \code{allTags} \tab \tab is a data frame including results for all original metadata tags from the collection\cr
#' \code{mandatoryTags}\tab \tab is a data frame that included only the tags needed for analysis with bibliometrix and biblioshiny.}
#'
#' @examples
#' data(scientometrics, package = "bibliometrixData")
#' res <- missingData(scientometrics)
#' print(res$mandatoryTags)
#'
#' @export
#'
missingData <- function(M) {
cols <- names(M)
missing_counts <- sapply(cols, function(x){
sum(is.na(M[,x]) | M[,x] %in% c("NA,0000,NA","NA",""))
})
missing_pct <- round(missing_counts/nrow(M) * 100, 2)
df_all <- data.frame(cols, missing_counts, missing_pct)

tag <- unlist(
strsplit(
"AB,AU,C1,CR,DE,DI,DT,ID,LA,NR,PY,RP,SO,TC,TI,WC",","
)
)
description <- trimws(unlist(
strsplit(
"Abstract, Author,Affiliation,Cited References,Keywords,DOI,Document Type,Keywords Plus,Language,Number of Cited References,
Publication Year,Corresponding Author, Journal, Total Citation, Title, Science Categories", ","
)
))

df_all <- df_all %>%
mutate(status = status(missing_pct)) %>%
replace_na(replace = list(missing_counts = nrow(M), missing_pct = 100))

df_tags <- data.frame(tag, description) %>%
left_join(df_all, by = c("tag" = "cols")) %>%
replace_na(replace = list(missing_counts = nrow(M), missing_pct = 100, status = "Completely missing")) %>%
arrange(missing_pct,description)

results <- list(allTags=df_all, mandatoryTags=df_tags)
return(results)
}

status <- function(x){
y <- character(length(x))
y[x==0] <- "Excellent"
y[x>0 & x<= 10] <- "Good"
y[x>10 & x<= 20] <- "Acceptable"
y[x>20 & x<=50] <- "Poor"
y[x>50 & x<100] <- "Critical"
y[is.na(x) | x==100] <- "Completely missing"
return(y)
}

2 changes: 1 addition & 1 deletion R/rpys.R
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ y <- c(min(c(RPYS$Citations,RPYS$diffMedian)),min(c(RPYS$Citations,RPYS$diffMedi

g=ggplot(RPYS, aes(x=.data$Year ,y=.data$Citations,text=paste("Year: ",.data$Year,"\nN. of References: ",.data$Citations)))+
geom_line(aes(group="NA")) +
geom_area(aes(group="NA"),fill = 'grey90', alpha = .5) +
#geom_area(aes(group="NA"),fill = 'grey90', alpha = .5) +
#geom_hline(aes(yintercept=0, color = 'grey'))+
geom_line(aes(x=.data$Year,y=.data$diffMedian, color="firebrick", group="NA"))+
labs(x = 'Year'
Expand Down
3 changes: 2 additions & 1 deletion R/thematicEvolution.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#'
#' @param M is a bibliographic data frame obtained by the converting function \code{\link{convert2df}}.
#' @param field is a character object. It indicates the content field to use. Field can be one of c=("ID","DE","TI","AB"). Default value is \code{field="ID"}.
#' @param years is a numeric vector of two or more unique cut points.
#' @param years is a numeric vector of one or more unique cut points.
#' @param n is numerical. It indicates the number of words to use in the network analysis
#' @param minFreq is numerical. It indicates the min frequency of words included in to a cluster.
#' @param ngrams is an integer between 1 and 4. It indicates the type of n-gram to extract from texts.
Expand Down Expand Up @@ -57,6 +57,7 @@ thematicEvolution <- function(M, field = "ID", years, n = 250, minFreq = 2, size
resk <- thematicMap(Mk, field = field, n = n, minfreq = minFreq, ngrams=ngrams,
stemming = stemming, size = size, n.labels = n.labels,
repel = repel, remove.terms = remove.terms, synonyms = synonyms, cluster=cluster)
resk$params <- resk$params %>% dplyr::filter(.data$params!="minfreq")
res[[k]] <- resk
net[[k]] <- resk$net
}
Expand Down
2 changes: 1 addition & 1 deletion R/toUpper.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
toUpper <- function(D){
stringr::str_to_upper(D, locale = "en")
stringi::stri_trans_toupper(D, locale = "en")
}
Loading

0 comments on commit 75f663e

Please sign in to comment.