Skip to content

Commit

Permalink
Update strain_mappings_generator.py
Browse files Browse the repository at this point in the history
GNPS file mappings file could be tsv or csv file.
- rename `gnps_file_mapping_tsv_file` to `gnps_file_mappings_file`
- rename `tsv_file` to `gnps_file_mappings_file`
- update relevant docstrings
  • Loading branch information
CunliangGeng committed Mar 1, 2024
1 parent 0dd19d5 commit da51aeb
Showing 1 changed file with 14 additions and 12 deletions.
26 changes: 14 additions & 12 deletions src/nplinker/pairedomics/strain_mappings_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def podp_generate_strain_mappings(
podp_project_json_file: str | PathLike,
genome_status_json_file: str | PathLike,
genome_bgc_mappings_file: str | PathLike,
gnps_file_mapping_tsv_file: str | PathLike,
gnps_file_mappings_file: str | PathLike,
output_json_file: str | PathLike,
) -> StrainCollection:
"""Generate strain mappings JSON file for PODP pipeline.
Expand All @@ -44,7 +44,7 @@ def podp_generate_strain_mappings(
- "original_genome_id <-> resolved_genome_id" is extracted from `genome_status_json_file`.
- "resolved_genome_id <-> bgc_id" is extracted from `genome_bgc_mappings_file`.
- "strain_id <-> MS_filename" is extracted from `podp_project_json_file`.
- "MS_filename <-> spectrum_id" is extracted from `gnps_file_mapping_tsv_file`.
- "MS_filename <-> spectrum_id" is extracted from `gnps_file_mappings_file`.
Args:
podp_project_json_file(str | PathLike): The path to the PODP project
Expand All @@ -53,8 +53,8 @@ def podp_generate_strain_mappings(
JSON file.
genome_bgc_mappings_file(str | PathLike): The path to the genome BGC
mappings JSON file.
gnps_file_mapping_tsv_file(str | PathLike): The path to the GNPS file
mapping TSV file.
gnps_file_mappings_file(str | PathLike): The path to the GNPS file
mappings file (csv or tsv).
output_json_file(str | PathLike): The path to the output JSON file.
Returns:
Expand Down Expand Up @@ -84,7 +84,7 @@ def podp_generate_strain_mappings(
# Get mappings strain_id <-> MS_filename <-> spectrum_id
mappings_strain_id_spectrum_id = get_mappings_strain_id_spectrum_id(
extract_mappings_strain_id_ms_filename(podp_project_json_file),
extract_mappings_ms_filename_spectrum_id(gnps_file_mapping_tsv_file),
extract_mappings_ms_filename_spectrum_id(gnps_file_mappings_file),
)

# Get mappings strain_id <-> bgc_id / spectrum_id
Expand Down Expand Up @@ -280,24 +280,26 @@ def extract_mappings_strain_id_ms_filename(
return mappings_dict


def extract_mappings_ms_filename_spectrum_id(tsv_file: str | PathLike) -> dict[str, set[str]]:
def extract_mappings_ms_filename_spectrum_id(
gnps_file_mappings_file: str | PathLike
) -> dict[str, set[str]]:
"""Extract mappings "MS_filename <-> spectrum_id".
Args:
tsv_file(str | PathLike): The path to the GNPS file mapping TSV file.
gnps_file_mappings_file(str | PathLike): The path to the GNPS file mappings file (csv or
tsv).
Returns:
dict[str, set[str]]: Key is MS filename and value is a set of spectrum ids.
Notes:
The `tsv_file` is generated by GNPS molecular networking. It's downloaded
from GNPS website to a file with a default name defined in
`GNPS_FILE_MAPPINGS_FILENAME`.
The `gnps_file_mappings_file` is generated by GNPS molecular networking. It's downloaded
from GNPS website to a file with a default name defined in `GNPS_FILE_MAPPINGS_FILENAME`.
See Also:
`GNPSFileMappingLoader`: A class to load GNPS file mapping TSV file.
`GNPSFileMappingLoader`: A class to load GNPS file mappings file.
"""
loader = GNPSFileMappingLoader(tsv_file)
loader = GNPSFileMappingLoader(gnps_file_mappings_file)
return loader.mapping_reversed


Expand Down

0 comments on commit da51aeb

Please sign in to comment.