Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Disabled key conversion in matchms tools #484

Merged
merged 6 commits into from
Feb 5, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion tools/matchms/matchms_add_key.xml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<tool id="matchms_add_key" name="matchms add key" version="@TOOL_VERSION@+galaxy0" profile="21.09">
<tool id="matchms_add_key" name="matchms add key" version="@TOOL_VERSION@+galaxy1" profile="21.09">
<description>Set metadata key in MSP to static value</description>

<macros>
Expand All @@ -25,8 +25,13 @@
<configfile name="matchms_python_cli">
@init_logger@

import matchms
from matchms.importing import load_from_msp
from matchms.exporting import save_as_msp


matchms.Metadata.set_key_replacements({})

spectra = list(load_from_msp("${spectral_library}", metadata_harmonization = "False"))
new_spectra = []
for spectrum in spectra:
Expand Down
9 changes: 8 additions & 1 deletion tools/matchms/matchms_metadata_export.xml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<tool id="matchms_metadata_export" name="matchms metadata export" version="@TOOL_VERSION@+galaxy0" profile="21.09">
<tool id="matchms_metadata_export" name="matchms metadata export" version="@TOOL_VERSION@+galaxy1" profile="21.09">
<description>extract all metadata from mass spectra file to tabular format</description>
<macros>
<import>macros.xml</import>
Expand All @@ -17,9 +17,14 @@

<configfiles>
<configfile name="matchms_python_cli">
import matchms
from matchms.importing import load_from_msp, load_from_mgf
from matchms.exporting.metadata_export import export_metadata_as_csv


if "$harmonize_metadata" == "False":
matchms.Metadata.set_key_replacements({})

spectra_list = list(load_from_${input_file.ext}("${input_file}", $harmonize_metadata))

export_metadata_as_csv(spectra_list, "${output_file}")
Expand All @@ -39,10 +44,12 @@ export_metadata_as_csv(spectra_list, "${output_file}")
<tests>
<test>
<param name="input_file" value="convert/mgf_out.mgf" ftype="mgf"/>
<param name="harmonize_metadata" value="True"/>
<output name="output_file" file="convert/metadata.csv" ftype="csv" compare="sim_size" delta="0"/>
</test>
<test>
<param name="input_file" value="similarity/RECETOX_Exposome_pesticides_HR_MS_20220323.msp" ftype="msp"/>
<param name="harmonize_metadata" value="True"/>
<output name="output_file" file="convert/metadata.csv" ftype="csv" compare="sim_size" delta="0"/>
</test>
</tests>
Expand Down
32 changes: 4 additions & 28 deletions tools/matchms/matchms_split.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,13 @@
import argparse
import itertools
import os
from typing import List

import matchms
from matchms.exporting import save_as_msp
from matchms.importing import load_from_msp


def get_spectra_names(spectra: list) -> List[str]:
"""Read the keyword 'compound_name' from a spectra.

Args:
spectra (list): List of individual spectra.

Returns:
List[str]: List with 'compoud_name' of individual spectra.
"""
return [x.get("compound_name") for x in spectra]
matchms.Metadata.set_key_replacements({})


def make_outdir(outdir: str):
Expand All @@ -35,23 +26,8 @@ def write_spectra(spectra, outdir):
spectra (List[Spectrum]): Spectra to write to file
outdir (str): Path to destination directory.
"""
names = get_spectra_names(spectra)
for i in range(len(spectra)):
outpath = assemble_outpath(names[i], outdir)
save_as_msp(spectra[i], outpath)


def assemble_outpath(name, outdir):
"""Filter special chracteres from name.

Args:
name (str): Name to be filetered.
outdir (str): Path to destination directory.
"""
filename = ''.join(filter(str.isalnum, name))
outfile = str(filename) + ".msp"
outpath = os.path.join(outdir, outfile)
return outpath
save_as_msp(spectra[i], os.path.join(outdir, f"{i}.msp"))


def split_round_robin(iterable, num_chunks):
Expand All @@ -76,7 +52,7 @@ def split_round_robin(iterable, num_chunks):


if __name__ == "__main__":
spectra = load_from_msp(filename, metadata_harmonization=True)
spectra = load_from_msp(filename, metadata_harmonization=False)
make_outdir(outdir)

if method == "one-per-file":
Expand Down
22 changes: 11 additions & 11 deletions tools/matchms/matchms_split.xml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<tool id="matchms_split" name="matchms split library" version="@TOOL_VERSION@+galaxy0" profile="21.09">
<tool id="matchms_split" name="matchms split library" version="@TOOL_VERSION@+galaxy1" profile="21.09">
<description>split a large library into subsets</description>
<macros>
<import>macros.xml</import>
Expand Down Expand Up @@ -53,16 +53,16 @@
<param name="msp_input" value="split/sample_input.msp" />
<param name="split_type" value="one-per-file" />
<output_collection name="sample" type="list">
<element name="1NITROPYRENE" file="split/one-per-file/1NITROPYRENE.msp" ftype="msp" compare="diff"/>
<element name="23DICHLOROPHENOL" file="split/one-per-file/23DICHLOROPHENOL.msp" ftype="msp" compare="diff"/>
<element name="245TRICHLOROPHENOL" file="split/one-per-file/245TRICHLOROPHENOL.msp" ftype="msp" compare="diff"/>
<element name="246TRICHLOROPHENOL" file="split/one-per-file/246TRICHLOROPHENOL.msp" ftype="msp" compare="diff"/>
<element name="24DICHLOROPHENOL" file="split/one-per-file/24DICHLOROPHENOL.msp" ftype="msp" compare="diff"/>
<element name="24DINITROPHENOL" file="split/one-per-file/24DINITROPHENOL.msp" ftype="msp" compare="diff"/>
<element name="25DICHLOROPHENOL" file="split/one-per-file/25DICHLOROPHENOL.msp" ftype="msp" compare="diff"/>
<element name="26DICHLOROPHENOL" file="split/one-per-file/26DICHLOROPHENOL.msp" ftype="msp" compare="diff"/>
<element name="34DICHLOROPHENOL" file="split/one-per-file/34DICHLOROPHENOL.msp" ftype="msp" compare="diff"/>
<element name="35DICHLOROPHENOL" file="split/one-per-file/35DICHLOROPHENOL.msp" ftype="msp" compare="diff"/>
<element name="0" file="split/one-per-file/0.msp" ftype="msp" compare="diff"/>
<element name="1" file="split/one-per-file/1.msp" ftype="msp" compare="diff"/>
<element name="2" file="split/one-per-file/2.msp" ftype="msp" compare="diff"/>
<element name="3" file="split/one-per-file/3.msp" ftype="msp" compare="diff"/>
<element name="4" file="split/one-per-file/4.msp" ftype="msp" compare="diff"/>
<element name="5" file="split/one-per-file/5.msp" ftype="msp" compare="diff"/>
<element name="6" file="split/one-per-file/6.msp" ftype="msp" compare="diff"/>
<element name="7" file="split/one-per-file/7.msp" ftype="msp" compare="diff"/>
<element name="8" file="split/one-per-file/8.msp" ftype="msp" compare="diff"/>
<element name="9" file="split/one-per-file/9.msp" ftype="msp" compare="diff"/>
</output_collection>
</test>
<test>
Expand Down
4 changes: 2 additions & 2 deletions tools/matchms/matchms_subsetting.xml
hechth marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -87,13 +87,13 @@ matchms.exporting.save_as_msp(filtered_spectra.tolist(), '${output}')

<tests>
<test>
<param name="spectral_library" value="out_matchms_add_key.msp" ftype="msp"/>
<param name="spectral_library" value="filtering/input.msp" ftype="msp"/>
<param name="mode" value="include"/>
<param name="list_of_identifiers" value="subsetting/identifier.csv" ftype="csv"/>
<output name="output" file="subsetting/subsetting_output.msp" ftype="msp"/>
</test>
<test>
<param name="spectral_library" value="out_matchms_add_key.msp" ftype="msp"/>
<param name="spectral_library" value="filtering/input.msp" ftype="msp"/>
<param name="mode" value="exclude"/>
<param name="list_of_identifiers" value="subsetting/identifier.csv" ftype="csv"/>
<output name="output" file="subsetting/subsetting_output2.msp" ftype="msp"/>
Expand Down
Loading
Loading