diff --git a/DESCRIPTION b/DESCRIPTION index 2cd97b8..41374a8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: hoaddata Type: Package Title: Data about hybrid open access journal publishing -Version: 0.2.98 +Version: 0.2.99 Authors@R: person("Najko", "Jahn", , "najko.jahn@sub.uni-goettingen.de", role = c("aut", "cre"), diff --git a/data/cc_articles.rda b/data/cc_articles.rda index bac20ae..c378a0b 100644 Binary files a/data/cc_articles.rda and b/data/cc_articles.rda differ diff --git a/data/cr_md.rda b/data/cr_md.rda index 56e9eb3..60141c0 100644 Binary files a/data/cr_md.rda and b/data/cr_md.rda differ diff --git a/data/cr_upw.rda b/data/cr_upw.rda index d463671..0a64a45 100644 Binary files a/data/cr_upw.rda and b/data/cr_upw.rda differ diff --git a/data/jct_oalex_venues.rda b/data/jct_oalex_venues.rda index 0e08665..ec9a9c2 100644 Binary files a/data/jct_oalex_venues.rda and b/data/jct_oalex_venues.rda differ diff --git a/data/jn_aff.rda b/data/jn_aff.rda index c3de4f4..6d31ba7 100644 Binary files a/data/jn_aff.rda and b/data/jn_aff.rda differ diff --git a/data/jn_ind.rda b/data/jn_ind.rda index 9dd9343..51b90f8 100644 Binary files a/data/jn_ind.rda and b/data/jn_ind.rda differ diff --git a/inst/sql/jct_inst_enriched.sql b/inst/sql/jct_inst_enriched.sql index 8f7daa5..b070763 100644 --- a/inst/sql/jct_inst_enriched.sql +++ b/inst/sql/jct_inst_enriched.sql @@ -1,49 +1,46 @@ --- This query is designed to identify institutions participating in transformative agreements. --- JCT does not cover all associated institutions, eg. university hospitals and MPG institutes. --- Here, we include associated institutions to ensure comprehensive coverage - --- Common Table Expression (CTE) - matching: -WITH matching AS ( - -- Part 1: Retrieve data from 'oalex_inst' to include associated institutions - ( - SELECT - esac_id, -- ESAC TA ID - oalex_inst.ror AS ror_main, -- ROR identifier for the main institution - inst.ror AS ror -- ROR identifier for associated institutions - FROM - `subugoe-collaborative.hoaddata.jct_inst` AS jct_inst - INNER JOIN - `subugoe-collaborative.openalex.institutions` as oalex_inst - ON - jct_inst.ror_id = oalex_inst.ror - INNER JOIN - UNNEST(oalex_inst.associated_institutions) as inst - ORDER BY - esac_id - ) +WITH + obtain_associated_ror_ids AS ( + -- Part 1: Retrieve data from OpenAlex institution table to include associated institutions + SELECT + esac_id, -- ESAC TA ID + jct_inst.ror_id AS ror_jct, -- ROR identifier from JCT + inst.ror AS ror_associated -- ROR identifier for associated institutions from OpenAlex + FROM + `subugoe-collaborative.hoaddata.jct_inst` AS jct_inst + LEFT JOIN + `subugoe-collaborative.openalex.institutions` AS oalex_inst + ON + jct_inst.ror_id = oalex_inst.ror + LEFT JOIN + UNNEST(oalex_inst.associated_institutions) AS inst + ORDER BY + esac_id + ), + create_matching_table AS ( + SELECT + esac_id, + 'ror_jct' AS ror_type, + ror_jct AS ror + FROM + obtain_associated_ror_ids UNION ALL - -- Part 2: Retrieve data from 'jct_inst' for the main institutions - SELECT - esac_id, -- ESAC TA ID - ror_id AS ror_main, -- ROR identifier for the main institution - ror_id AS ror -- ROR identifier for the main institution (no associated institutions) - FROM - `subugoe-collaborative.hoaddata.jct_inst` AS jct_inst - ORDER BY - esac_id, ror_main -) - --- Main Query: --- Select data from the 'matching' CTE and join it with 'jct_inst' to retrieve additional details. -SELECT - DISTINCT matching.*, -- Data from the 'matching' CTE - start_date, -- Start date of participation in transformative agreement - end_date -- End date of participation in transformative agreement -FROM - matching -INNER JOIN - `subugoe-collaborative.hoaddata.jct_inst` AS jct_inst -ON - matching.esac_id = jct_inst.esac_id -ORDER BY - esac_id, ror_main + SELECT + esac_id, + 'ror_associated' AS ror_type, + ror_associated AS ror + FROM + obtain_associated_ror_ids + ) +SELECT + DISTINCT create_matching_table.*, + start_date, + end_date +FROM + create_matching_table +INNER JOIN + `subugoe-collaborative.hoaddata.jct_inst` AS jct_inst +ON + create_matching_table.esac_id = jct_inst.esac_id +ORDER BY + esac_id, + ror \ No newline at end of file diff --git a/inst/sql/ta_oa_inst.sql b/inst/sql/ta_oa_inst.sql index 7597662..1e3079e 100644 --- a/inst/sql/ta_oa_inst.sql +++ b/inst/sql/ta_oa_inst.sql @@ -1,72 +1,61 @@ --- Publication statistics for institutions participating in transformative agreements (TA) --- The resulting table allows us to determine the impact TAs have on the open access publication activity of participating institutions. +-- Combine ESAC TA data with institution information +WITH esac_journals AS ( + SELECT DISTINCT + hybrid_jns.issn_l AS matching_issn, + hybrid_jns.esac_id, + esac_publisher, + start_date, + EXTRACT(YEAR FROM start_date) AS start_year, + end_date, + EXTRACT(YEAR FROM end_date) AS end_year, + issn_l, + jct_inst.ror, + jct_inst.ror_type, + oalex.id AS oalex_inst_id + FROM `subugoe-collaborative.hoaddata.jct_hybrid_jns` AS hybrid_jns + -- Join with participating institutions + INNER JOIN `subugoe-collaborative.hoaddata.jct_inst_enriched` AS jct_inst + ON jct_inst.esac_id = hybrid_jns.esac_id + -- Match with OpenAlex institutions + INNER JOIN `subugoe-collaborative.openalex.institutions` AS oalex + ON jct_inst.ror = oalex.ror +), -WITH - -- ESAC TA / institutions matching table - esac_journals AS ( - SELECT - DISTINCT hybrid_jns.issn_l AS matching_issn, -- Extracting ISSN - hybrid_jns.esac_id, -- ESAC ID of the TA - esac_publisher, -- Publisher information - start_date, -- Agreement start date - EXTRACT(YEAR FROM start_date) AS start_year, -- Extracting the year from start_date - end_date, -- Agreement end date - EXTRACT(YEAR FROM end_date) AS end_year, -- Extracting the year from end_date - issn_l, -- ISSN of the journal - jct_inst.ror, -- ROR ID of the institution - jct_inst.ror_main, -- Main ROR ID of the institution - oalex.id AS oalex_inst_id -- OpenAlex institution ID - FROM - `subugoe-collaborative.hoaddata.jct_hybrid_jns` AS hybrid_jns - -- Join with participating institutions - INNER JOIN - `subugoe-collaborative.hoaddata.jct_inst_enriched` AS jct_inst - ON - jct_inst.esac_id = hybrid_jns.esac_id - -- OpenAlex / ROR Matching - INNER JOIN - `subugoe-collaborative.openalex.institutions` AS oalex - ON - jct_inst.ror = oalex.ror - ), +-- Gather publication data for institutions per year and link to TA +inst_per_year AS ( + SELECT DISTINCT + esac_journals.*, + oalex_inst.doi, + oalex_inst.cr_year, + oa.cc, + cr.issued, + esac_journals.ror as ror_matched, + esac_journals.ror_type as ror_type_, + -- Determine if publication is within TA date range + CASE + WHEN (DATE(cr.issued) BETWEEN DATE(start_date) AND DATE(end_date)) THEN TRUE + ELSE FALSE + END AS ta, + -- Check if publication has a CC license + CASE + WHEN oa.cc IS NOT NULL THEN TRUE + ELSE FALSE + END AS has_cc + FROM esac_journals + -- Join with OpenAlex institution data + INNER JOIN `subugoe-collaborative.hoaddata.cr_openalex_inst_full` AS oalex_inst + ON esac_journals.oalex_inst_id = oalex_inst.id + AND oalex_inst.issn_l = esac_journals.matching_issn + -- Left join to include publications without CC licenses + LEFT JOIN `subugoe-collaborative.hoaddata.cc_openalex_inst` AS oa + ON oalex_inst.doi = oa.doi + -- Join with Crossref data for publication dates + INNER JOIN `subugoe-collaborative.cr_instant.snapshot` AS cr + ON oalex_inst.doi = cr.doi +) - -- Publications per year, institution, and agreement - inst_per_year AS ( - SELECT - DISTINCT esac_journals.*, -- Include columns from the previous CTE - oalex_inst.doi, -- DOI of the publication - oalex_inst.cr_year, -- Publication year - cc, -- CC license information - cr.issued, -- Date of publication - esac_journals.ror AS ror_rel, -- ROR ID related to the journal - esac_journals.ror_main AS ror_id, -- Main ROR ID related to the journal - CASE - WHEN (DATE(cr.issued) BETWEEN DATE(start_date) AND DATE(end_date)) THEN TRUE -- Check if publication is within the agreement's date range - ELSE FALSE - END AS ta, -- Flag indicating if the publication is within the agreement - CASE - WHEN cc IS NOT NULL THEN TRUE -- Check if the publication has a CC license - ELSE FALSE - END AS has_cc -- Flag indicating if the publication has a CC license - FROM - esac_journals - INNER JOIN - `subugoe-collaborative.hoaddata.cr_openalex_inst_full` AS oalex_inst - ON - esac_journals.oalex_inst_id = oalex_inst.id - AND oalex_inst.issn_l = esac_journals.matching_issn - LEFT JOIN - `subugoe-collaborative.hoaddata.cc_openalex_inst` AS oa - ON - oalex_inst.doi = oa.doi - INNER JOIN - `subugoe-collaborative.cr_instant.snapshot` AS cr - ON - oalex_inst.doi = cr.doi - ) - --- Selecting relevant columns for the final result -SELECT +-- Final query to select relevant columns for analysis +SELECT DISTINCT doi, cr_year, matching_issn AS issn_l, @@ -74,11 +63,11 @@ SELECT esac_publisher, ta AS ta_active, cc, - ror_rel AS ror_matching, - ror_id AS ror_main -FROM - inst_per_year --- Ordering the result set + ror_matched as ror, + ror_type_ as ror_type +FROM inst_per_year +-- Order results by TA journal portfolio, publication year (descending) and DOI ORDER BY ta_journal_portfolio, - cr_year DESC; + cr_year DESC, + doi \ No newline at end of file