Skip to content

Commit

Permalink
Merge pull request #172 from NPLinker/fix_download_bug
Browse files Browse the repository at this point in the history
Fix download bugs
  • Loading branch information
CunliangGeng authored Oct 31, 2023
2 parents f577e59 + 34488a1 commit 6301b28
Show file tree
Hide file tree
Showing 9 changed files with 77 additions and 85 deletions.
5 changes: 2 additions & 3 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
Expand Down Expand Up @@ -179,15 +178,15 @@
APPENDIX: How to apply the Apache License to your work.

To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "{}"
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.

Copyright [2022] [Netherlands eScience Center, Wageningen University & Research ]
Copyright [yyyy] [name of copyright owner]

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand Down
4 changes: 2 additions & 2 deletions NOTICE
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
NPLinker
Copyright [2022] The Netherlands eScience Center, Wageningen University & Research
Copyright 2022-2023 Netherlands eScience Center and Wageningen University & Research.

This product includes software developed at
The Netherlands eScience Center (https://www.esciencecenter.nl/)
Netherlands eScience Center (https://www.esciencecenter.nl/)
5 changes: 2 additions & 3 deletions src/nplinker/pairedomics/podp_antismash_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from pathlib import Path
import re
import time
from urllib.error import HTTPError
from bs4 import BeautifulSoup
from bs4 import NavigableString
from bs4 import Tag
Expand Down Expand Up @@ -203,7 +202,7 @@ def podp_download_and_extract_antismash_data(
if output_path.exists():
Path.touch(output_path / 'completed', exist_ok=True)

except HTTPError:
except Exception:
gs_obj.bgc_path = ""

missing = len([gs for gs in gs_dict.values() if not gs.bgc_path])
Expand All @@ -214,7 +213,7 @@ def podp_download_and_extract_antismash_data(
GenomeStatus.to_json(gs_dict, gs_file)

if missing == len(genome_records):
logger.warning('Failed to successfully retrieve ANY genome data!')
raise ValueError("No antiSMASH data found for any genome")


def get_best_available_genome_id(genome_id_data: dict[str, str]) -> str | None:
Expand Down
2 changes: 1 addition & 1 deletion src/nplinker/pairedomics/strain_mappings_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ def extract_mappings_ms_filename_spectrum_id(
`GNPSFileMappingLoader`: A class to load GNPS file mapping TSV file.
"""
loader = GNPSFileMappingLoader(tsv_file)
return loader.mapping_reversed()
return loader.mapping_reversed


def get_mappings_strain_id_spectrum_id(
Expand Down
9 changes: 6 additions & 3 deletions src/nplinker/schemas/podp_adapted_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -93,17 +93,20 @@
"GenBank_accession": {
"type": "string",
"title": "GenBank accession number",
"description": "If the publicly available genome got a GenBank accession number assigned, e.g., <a href=\"https://www.ncbi.nlm.nih.gov/nuccore/AL645882\" target=\"_blank\" rel=\"noopener noreferrer\">AL645882</a>, please provide it here. The genome sequence must be submitted to GenBank/ENA/DDBJ (and an accession number must be received) before this form can be filled out. In case of a whole genome sequence, please use master records. At least one identifier must be entered."
"description": "If the publicly available genome got a GenBank accession number assigned, e.g., <a href=\"https://www.ncbi.nlm.nih.gov/nuccore/AL645882\" target=\"_blank\" rel=\"noopener noreferrer\">AL645882</a>, please provide it here. The genome sequence must be submitted to GenBank/ENA/DDBJ (and an accession number must be received) before this form can be filled out. In case of a whole genome sequence, please use master records. At least one identifier must be entered.",
"minLength": 1
},
"RefSeq_accession": {
"type": "string",
"title": "RefSeq accession number",
"description": "For example: <a target=\"_blank\" rel=\"noopener noreferrer\" href=\"https://www.ncbi.nlm.nih.gov/nuccore/NC_003888.3\">NC_003888.3</a>"
"description": "For example: <a target=\"_blank\" rel=\"noopener noreferrer\" href=\"https://www.ncbi.nlm.nih.gov/nuccore/NC_003888.3\">NC_003888.3</a>",
"minLength": 1
},
"JGI_Genome_ID": {
"type": "string",
"title": "JGI IMG genome ID",
"description": "For example: <a target=\"_blank\" rel=\"noopener noreferrer\" href=\"https://img.jgi.doe.gov/cgi-bin/m/main.cgi?section=TaxonDetail&page=taxonDetail&taxon_oid=641228474\">641228474</a>"
"description": "For example: <a target=\"_blank\" rel=\"noopener noreferrer\" href=\"https://img.jgi.doe.gov/cgi-bin/m/main.cgi?section=TaxonDetail&page=taxonDetail&taxon_oid=641228474\">641228474</a>",
"minLength": 1
}
}
},
Expand Down
3 changes: 2 additions & 1 deletion src/nplinker/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ def download_url(url: str,
url,
follow_redirects=allow_http_redirect) as response:
if not response.is_success:
fpath.unlink(missing_ok=True)
raise RuntimeError(
f"Failed to download url {url} with status code {response.status_code}"
)
Expand All @@ -182,7 +183,7 @@ def download_url(url: str,
unit_divisor=1024,
unit="B") as progress:
num_bytes_downloaded = response.num_bytes_downloaded
for chunk in response.iter_raw():
for chunk in response.iter_bytes():
fh.write(chunk)
progress.update(response.num_bytes_downloaded -
num_bytes_downloaded)
Expand Down
2 changes: 1 addition & 1 deletion tests/pairedomics/test_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def test_download_metabolomics_zipfile(tmp_path):
try:
sut._download_metabolomics_zipfile("c22f44b14a3d450eb836d607cb9521bb")
expected_path = os.path.join(sut.project_downloads_dir,
'c22f44b14a3d450eb836d607cb9521bb.zip')
'METABOLOMICS-SNETS-c22f44b14a3d450eb836d607cb9521bb.zip')

assert os.path.exists(expected_path)
assert (Path(sut.project_results_dir) /
Expand Down
Loading

0 comments on commit 6301b28

Please sign in to comment.