Skip to content

Commit

Permalink
stats: generalize json
Browse files Browse the repository at this point in the history
  • Loading branch information
miku committed Feb 3, 2022
1 parent d5c7dec commit e2ee50b
Show file tree
Hide file tree
Showing 5 changed files with 12 additions and 15 deletions.
2 changes: 1 addition & 1 deletion python/.style.yapf
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@

based_on_style = pep8
split_before_logical_operator = true
column_limit = 120
column_limit = 140
3 changes: 1 addition & 2 deletions python/labe/deps.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,7 @@ def dump_deps_dot(task=None, file=sys.stdout):
print(textwrap.dedent("""
graph [fontname=helvetica];
node [shape=record fontname=helvetica];
"""),
file=file)
"""), file=file)
for k, vs in g.items():
for v in vs:
print(""" "{}" -> "{}"; """.format(k, v), file=file)
Expand Down
16 changes: 8 additions & 8 deletions python/labe/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,10 +436,9 @@ class StatsReportData(Task):
def requires(self):
return {
"common": StatsCommonDOI(date=self.date),
# TODO: "slub" -> institutional ...
"common_slub": StatsCommonDOIForInstitution(date=self.date, institution=self.institution),
"common_institution": StatsCommonDOIForInstitution(date=self.date, institution=self.institution),
"index_unique": IndexMappedDOI(date=self.date),
"index_unique_slub": IndexMappedDOIForInstitution(date=self.date, institution=self.institution),
"index_unique_institution": IndexMappedDOIForInstitution(date=self.date, institution=self.institution),
"oci_inbound": OpenCitationsInboundStats(),
"oci_outbound": OpenCitationsOutboundStats(),
"oci_unique": OpenCitationsUniqueDOI(),
Expand All @@ -454,10 +453,12 @@ def run(self):
data = {
"version": "1",
"date": str(self.date),
"slub": {
"num_mapped_doi": sum(1 for _ in si.get("index_unique_slub").open()),
"num_common_doi": sum(1 for _ in si.get("common_slub").open()),
"ratio": (sum(1 for _ in si.get("common_slub").open()) / sum(1 for _ in si.get("oci_unique").open())),
"institution": {
self.institution: {
"num_mapped_doi": sum(1 for _ in si.get("index_unique_institution").open()),
"num_common_doi": sum(1 for _ in si.get("common_institution").open()),
"ratio": (sum(1 for _ in si.get("common_institution").open()) / sum(1 for _ in si.get("oci_unique").open())),
},
},
"index": {
"num_mapped_doi": sum(1 for _ in si.get("index_unique").open()),
Expand All @@ -475,7 +476,6 @@ def run(self):
json.dump(data, output)

def output(self):
# TODO: exclude outputs from this task from cleanup.
return luigi.LocalTarget(path=self.path(ext="json"))

def on_success(self):
Expand Down
3 changes: 1 addition & 2 deletions python/labe/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,7 @@ def run(self):
url = self.open_citations_url()
output = shellout("""
curl --fail -sL "{url}" > {output}
""",
url=url)
""", url=url)

# Do a basic sanity check right here, e.g. in 12/2021 filesize was
# about 30GB; we fail if the file size seems too small.
Expand Down
3 changes: 1 addition & 2 deletions python/tests/test_oci.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,7 @@ def test_get_redirct_url():

assert get_terminal_url("https://google.com") == "https://www.google.com/"
assert get_terminal_url("http://google.com") == "https://www.google.com/?gws_rd=ssl"
assert (get_terminal_url("https://doi.org/10.1111/icad.12417") ==
"https://onlinelibrary.wiley.com/doi/10.1111/icad.12417")
assert (get_terminal_url("https://doi.org/10.1111/icad.12417") == "https://onlinelibrary.wiley.com/doi/10.1111/icad.12417")


@pytest.mark.skipif(no_internet(), reason="no internet")
Expand Down

0 comments on commit e2ee50b

Please sign in to comment.