-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #76 from subugoe/ta_match
Refactor TA matching to avoid confusion about the main institution
- Loading branch information
Showing
9 changed files
with
108 additions
and
122 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
Package: hoaddata | ||
Type: Package | ||
Title: Data about hybrid open access journal publishing | ||
Version: 0.2.98 | ||
Version: 0.2.99 | ||
Authors@R: | ||
person("Najko", "Jahn", , "[email protected]", | ||
role = c("aut", "cre"), | ||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,49 +1,46 @@ | ||
-- This query is designed to identify institutions participating in transformative agreements. | ||
-- JCT does not cover all associated institutions, eg. university hospitals and MPG institutes. | ||
-- Here, we include associated institutions to ensure comprehensive coverage | ||
|
||
-- Common Table Expression (CTE) - matching: | ||
WITH matching AS ( | ||
-- Part 1: Retrieve data from 'oalex_inst' to include associated institutions | ||
( | ||
SELECT | ||
esac_id, -- ESAC TA ID | ||
oalex_inst.ror AS ror_main, -- ROR identifier for the main institution | ||
inst.ror AS ror -- ROR identifier for associated institutions | ||
FROM | ||
`subugoe-collaborative.hoaddata.jct_inst` AS jct_inst | ||
INNER JOIN | ||
`subugoe-collaborative.openalex.institutions` as oalex_inst | ||
ON | ||
jct_inst.ror_id = oalex_inst.ror | ||
INNER JOIN | ||
UNNEST(oalex_inst.associated_institutions) as inst | ||
ORDER BY | ||
esac_id | ||
) | ||
WITH | ||
obtain_associated_ror_ids AS ( | ||
-- Part 1: Retrieve data from OpenAlex institution table to include associated institutions | ||
SELECT | ||
esac_id, -- ESAC TA ID | ||
jct_inst.ror_id AS ror_jct, -- ROR identifier from JCT | ||
inst.ror AS ror_associated -- ROR identifier for associated institutions from OpenAlex | ||
FROM | ||
`subugoe-collaborative.hoaddata.jct_inst` AS jct_inst | ||
LEFT JOIN | ||
`subugoe-collaborative.openalex.institutions` AS oalex_inst | ||
ON | ||
jct_inst.ror_id = oalex_inst.ror | ||
LEFT JOIN | ||
UNNEST(oalex_inst.associated_institutions) AS inst | ||
ORDER BY | ||
esac_id | ||
), | ||
create_matching_table AS ( | ||
SELECT | ||
esac_id, | ||
'ror_jct' AS ror_type, | ||
ror_jct AS ror | ||
FROM | ||
obtain_associated_ror_ids | ||
UNION ALL | ||
-- Part 2: Retrieve data from 'jct_inst' for the main institutions | ||
SELECT | ||
esac_id, -- ESAC TA ID | ||
ror_id AS ror_main, -- ROR identifier for the main institution | ||
ror_id AS ror -- ROR identifier for the main institution (no associated institutions) | ||
FROM | ||
`subugoe-collaborative.hoaddata.jct_inst` AS jct_inst | ||
ORDER BY | ||
esac_id, ror_main | ||
) | ||
|
||
-- Main Query: | ||
-- Select data from the 'matching' CTE and join it with 'jct_inst' to retrieve additional details. | ||
SELECT | ||
DISTINCT matching.*, -- Data from the 'matching' CTE | ||
start_date, -- Start date of participation in transformative agreement | ||
end_date -- End date of participation in transformative agreement | ||
FROM | ||
matching | ||
INNER JOIN | ||
`subugoe-collaborative.hoaddata.jct_inst` AS jct_inst | ||
ON | ||
matching.esac_id = jct_inst.esac_id | ||
ORDER BY | ||
esac_id, ror_main | ||
SELECT | ||
esac_id, | ||
'ror_associated' AS ror_type, | ||
ror_associated AS ror | ||
FROM | ||
obtain_associated_ror_ids | ||
) | ||
SELECT | ||
DISTINCT create_matching_table.*, | ||
start_date, | ||
end_date | ||
FROM | ||
create_matching_table | ||
INNER JOIN | ||
`subugoe-collaborative.hoaddata.jct_inst` AS jct_inst | ||
ON | ||
create_matching_table.esac_id = jct_inst.esac_id | ||
ORDER BY | ||
esac_id, | ||
ror |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,84 +1,73 @@ | ||
-- Publication statistics for institutions participating in transformative agreements (TA) | ||
-- The resulting table allows us to determine the impact TAs have on the open access publication activity of participating institutions. | ||
-- Combine ESAC TA data with institution information | ||
WITH esac_journals AS ( | ||
SELECT DISTINCT | ||
hybrid_jns.issn_l AS matching_issn, | ||
hybrid_jns.esac_id, | ||
esac_publisher, | ||
start_date, | ||
EXTRACT(YEAR FROM start_date) AS start_year, | ||
end_date, | ||
EXTRACT(YEAR FROM end_date) AS end_year, | ||
issn_l, | ||
jct_inst.ror, | ||
jct_inst.ror_type, | ||
oalex.id AS oalex_inst_id | ||
FROM `subugoe-collaborative.hoaddata.jct_hybrid_jns` AS hybrid_jns | ||
-- Join with participating institutions | ||
INNER JOIN `subugoe-collaborative.hoaddata.jct_inst_enriched` AS jct_inst | ||
ON jct_inst.esac_id = hybrid_jns.esac_id | ||
-- Match with OpenAlex institutions | ||
INNER JOIN `subugoe-collaborative.openalex.institutions` AS oalex | ||
ON jct_inst.ror = oalex.ror | ||
), | ||
|
||
WITH | ||
-- ESAC TA / institutions matching table | ||
esac_journals AS ( | ||
SELECT | ||
DISTINCT hybrid_jns.issn_l AS matching_issn, -- Extracting ISSN | ||
hybrid_jns.esac_id, -- ESAC ID of the TA | ||
esac_publisher, -- Publisher information | ||
start_date, -- Agreement start date | ||
EXTRACT(YEAR FROM start_date) AS start_year, -- Extracting the year from start_date | ||
end_date, -- Agreement end date | ||
EXTRACT(YEAR FROM end_date) AS end_year, -- Extracting the year from end_date | ||
issn_l, -- ISSN of the journal | ||
jct_inst.ror, -- ROR ID of the institution | ||
jct_inst.ror_main, -- Main ROR ID of the institution | ||
oalex.id AS oalex_inst_id -- OpenAlex institution ID | ||
FROM | ||
`subugoe-collaborative.hoaddata.jct_hybrid_jns` AS hybrid_jns | ||
-- Join with participating institutions | ||
INNER JOIN | ||
`subugoe-collaborative.hoaddata.jct_inst_enriched` AS jct_inst | ||
ON | ||
jct_inst.esac_id = hybrid_jns.esac_id | ||
-- OpenAlex / ROR Matching | ||
INNER JOIN | ||
`subugoe-collaborative.openalex.institutions` AS oalex | ||
ON | ||
jct_inst.ror = oalex.ror | ||
), | ||
-- Gather publication data for institutions per year and link to TA | ||
inst_per_year AS ( | ||
SELECT DISTINCT | ||
esac_journals.*, | ||
oalex_inst.doi, | ||
oalex_inst.cr_year, | ||
oa.cc, | ||
cr.issued, | ||
esac_journals.ror as ror_matched, | ||
esac_journals.ror_type as ror_type_, | ||
-- Determine if publication is within TA date range | ||
CASE | ||
WHEN (DATE(cr.issued) BETWEEN DATE(start_date) AND DATE(end_date)) THEN TRUE | ||
ELSE FALSE | ||
END AS ta, | ||
-- Check if publication has a CC license | ||
CASE | ||
WHEN oa.cc IS NOT NULL THEN TRUE | ||
ELSE FALSE | ||
END AS has_cc | ||
FROM esac_journals | ||
-- Join with OpenAlex institution data | ||
INNER JOIN `subugoe-collaborative.hoaddata.cr_openalex_inst_full` AS oalex_inst | ||
ON esac_journals.oalex_inst_id = oalex_inst.id | ||
AND oalex_inst.issn_l = esac_journals.matching_issn | ||
-- Left join to include publications without CC licenses | ||
LEFT JOIN `subugoe-collaborative.hoaddata.cc_openalex_inst` AS oa | ||
ON oalex_inst.doi = oa.doi | ||
-- Join with Crossref data for publication dates | ||
INNER JOIN `subugoe-collaborative.cr_instant.snapshot` AS cr | ||
ON oalex_inst.doi = cr.doi | ||
) | ||
|
||
-- Publications per year, institution, and agreement | ||
inst_per_year AS ( | ||
SELECT | ||
DISTINCT esac_journals.*, -- Include columns from the previous CTE | ||
oalex_inst.doi, -- DOI of the publication | ||
oalex_inst.cr_year, -- Publication year | ||
cc, -- CC license information | ||
cr.issued, -- Date of publication | ||
esac_journals.ror AS ror_rel, -- ROR ID related to the journal | ||
esac_journals.ror_main AS ror_id, -- Main ROR ID related to the journal | ||
CASE | ||
WHEN (DATE(cr.issued) BETWEEN DATE(start_date) AND DATE(end_date)) THEN TRUE -- Check if publication is within the agreement's date range | ||
ELSE FALSE | ||
END AS ta, -- Flag indicating if the publication is within the agreement | ||
CASE | ||
WHEN cc IS NOT NULL THEN TRUE -- Check if the publication has a CC license | ||
ELSE FALSE | ||
END AS has_cc -- Flag indicating if the publication has a CC license | ||
FROM | ||
esac_journals | ||
INNER JOIN | ||
`subugoe-collaborative.hoaddata.cr_openalex_inst_full` AS oalex_inst | ||
ON | ||
esac_journals.oalex_inst_id = oalex_inst.id | ||
AND oalex_inst.issn_l = esac_journals.matching_issn | ||
LEFT JOIN | ||
`subugoe-collaborative.hoaddata.cc_openalex_inst` AS oa | ||
ON | ||
oalex_inst.doi = oa.doi | ||
INNER JOIN | ||
`subugoe-collaborative.cr_instant.snapshot` AS cr | ||
ON | ||
oalex_inst.doi = cr.doi | ||
) | ||
|
||
-- Selecting relevant columns for the final result | ||
SELECT | ||
-- Final query to select relevant columns for analysis | ||
SELECT DISTINCT | ||
doi, | ||
cr_year, | ||
matching_issn AS issn_l, | ||
esac_id AS ta_journal_portfolio, | ||
esac_publisher, | ||
ta AS ta_active, | ||
cc, | ||
ror_rel AS ror_matching, | ||
ror_id AS ror_main | ||
FROM | ||
inst_per_year | ||
-- Ordering the result set | ||
ror_matched as ror, | ||
ror_type_ as ror_type | ||
FROM inst_per_year | ||
-- Order results by TA journal portfolio, publication year (descending) and DOI | ||
ORDER BY | ||
ta_journal_portfolio, | ||
cr_year DESC; | ||
cr_year DESC, | ||
doi |