Skip to content

Commit

Permalink
Merge pull request #758 from robinschmid/food_masst_tree
Browse files Browse the repository at this point in the history
[foodmasst] argument for local file
  • Loading branch information
mwang87 authored Jul 27, 2021
2 parents e8bb716 + 3e84fdb commit 6ead607
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 10 deletions.
2 changes: 2 additions & 0 deletions search_single_spectrum/search_single_spectrum/tool.xml
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@
<pathSet base="$base">
<pathVar name="foodomics_track.script" path="foodomics_track.py" />
<pathVar name="foodomics_metadata.file" path="gfop_ontology_foodmasst.txt" />
<pathVar name="foodomics_metadata_table.file" path="foodomics_metadata_foodmasst.tsv" />
</pathSet>
<tool name="foodomics_track">
<require name="all_dataset_file_matches" type="file"/>
Expand All @@ -161,6 +162,7 @@
<execution env="binary" argConvention="adhoc">
<arg pathRef="foodomics_track.script"/>
<arg pathRef="foodomics_metadata.file"/>
<arg pathRef="foodomics_metadata_table.file"/>
<arg valueRef="all_dataset_file_matches"/>
<arg valueRef="foodomics_tracking_file"/>
<arg valueRef="filtered_food_metadata_file"/>
Expand Down
25 changes: 24 additions & 1 deletion search_single_spectrum/test/test_foodomics.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,14 @@ class TestLoaders(unittest.TestCase):
def test(self):
input_filename = "data/foodomics/all_dataset_matchs.tsv"
foodomics_metadata = "../tools/search_single_spectrum/gfop_ontology_foodmasst.txt"
foodomics_metadata_table = "../tools/search_single_spectrum/foodomics_metadata_foodmasst.tsv"

out_foodmasst = "tmp/out_foodmasst.tsv"
out_filtered_food_metadata = "tmp/out_filtered_food_metadata.tsv"

# run
foodomics_track.combine_food_masst(foodomics_metadata, input_filename, out_foodmasst, out_filtered_food_metadata)
foodomics_track.combine_food_masst(foodomics_metadata, foodomics_metadata_table, input_filename, out_foodmasst, \
out_filtered_food_metadata)

# ground truth test file
test_output = pd.read_csv("data/foodomics/foodmasst_test_output.tsv", sep="\t")
Expand All @@ -35,6 +37,27 @@ def test(self):
pd.testing.assert_frame_equal(test_output, enrichment_df, check_like=True)


def test_empty_matches(self):
input_filename = "data/foodomics/all_dataset_matchs_empty.tsv"
foodomics_metadata = "../tools/search_single_spectrum/gfop_ontology_foodmasst.txt"
foodomics_metadata_table = "../tools/search_single_spectrum/foodomics_metadata_foodmasst.tsv"

out_foodmasst = "tmp/out_foodmasst_empty.tsv"
out_filtered_food_metadata = "tmp/out_filtered_food_metadata_empty.tsv"

# run
foodomics_track.combine_food_masst(foodomics_metadata, foodomics_metadata_table, input_filename, out_foodmasst, \
out_filtered_food_metadata)

# should be empty file
self.assertTrue(os.path.isfile(out_foodmasst), "No enrichment file exported")
self.assertFalse(os.path.getsize(out_foodmasst) > 0, "Enrichment file is not empty - was expected for an empty "
"input")

self.assertTrue(os.path.isfile(out_filtered_food_metadata), "No metadata table file exported")
self.assertFalse(os.path.getsize(out_filtered_food_metadata) > 0, "Metadata table file is not empty - was "
"expected for an empty input")

def test_food_tree(self):
test_output = "data/foodomics/foodmasst_test_output.tsv"
in_html = "../tools/search_single_spectrum/interactive_tree_js/collapsible_tree_v3.html"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@
import pandas as pd
import os
import argparse
import logging

logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)


def calculate_enrichment(matches_df, metadata_df):
Expand Down Expand Up @@ -48,10 +52,11 @@ def calculate_enrichment(matches_df, metadata_df):
return pd.DataFrame(output_list)


def metadata_file_matches(matches_df):
def metadata_file_matches(matches_df, foodomics_metadata_table):
"""
Create filtered metadata table for MASST matched files
:param matches_df: filtered MASST matches in GFOP files : pandas dataframe
:param foodomics_metadata_table: filtered metadata table file
:return: filtered metadata
"""

Expand All @@ -60,7 +65,8 @@ def metadata_file_matches(matches_df):
# /foodomics_metadata_foodmasst.txt', sep='\t')
# gfop_meta = pd.read_csv(
# 'https://github.com/global-foodomics/GFOPontology/raw/master/data/foodomics_metadata_foodmasst.tsv', sep='\t')
gfop_meta = pd.read_csv('foodomics_metadata_foodmasst.tsv', sep='\t')
# gfop_meta = pd.read_csv('foodomics_metadata_foodmasst.tsv', sep='\t')
gfop_meta = pd.read_csv(foodomics_metadata_table, sep='\t')

# Removing extensions on both
gfop_meta["filename"] = gfop_meta["filename"].apply(lambda x: os.path.splitext(x)[0])
Expand All @@ -80,7 +86,8 @@ def filter_matches(matches_df):
return matches_df[matches_df["dataset_id"] == "MSV000084900"]


def combine_food_masst(foodomics_metadata, matches_results, output_enrichment, output_metadata_matches):
def combine_food_masst(foodomics_metadata, foodomics_metadata_table, matches_results, output_enrichment,
output_metadata_matches):
try:
matches_df = pd.read_csv(matches_results, sep="\t")
# filter to global foodomics
Expand All @@ -93,30 +100,40 @@ def combine_food_masst(foodomics_metadata, matches_results, output_enrichment, o
sorted_df = enrichment_df.sort_values(by=["group_value"])
sorted_df.to_csv(output_enrichment, sep="\t", index=False)

matched_metadata = metadata_file_matches(matches_df)
# a table of matched files and their metadata (filtered columns)
matched_metadata = metadata_file_matches(matches_df, foodomics_metadata_table)
if output_metadata_matches is not None:
matched_metadata.to_csv(output_metadata_matches, sep="\t", index=False)

return enrichment_df, matched_metadata
except:
except Exception as e:
# exit with error
logger.exception(e)
logger.exception("Error during foodMASST creation of output")

# on error write all files as empty
with open(output_enrichment, "w") as o:
with open(output_enrichment, "a") as o:
pass
# o.write("")
with open(output_metadata_matches, "w") as o:
with open(output_metadata_matches, "a") as o:
pass
# o.write("")


def main():
parser = argparse.ArgumentParser(description='Create foodomics enrichment')
parser.add_argument('foodomics_metadata', help='the foodomics metadata table')
# foodomics_metadata maps the raw data files into all matching ontology classes
# foodomics_metadata_table is a filtered version with extensive metadata for each file
parser.add_argument('foodomics_metadata', help='the foodomics ontology table (data file and all ontology levels)')
parser.add_argument('foodomics_metadata_table', help='the foodomics metadata table (data file and filtered '
'metadata)', default='foodomics_metadata_foodmasst.tsv')
parser.add_argument('matches_results', help='MASST match results')
parser.add_argument('output_enrichment', help='Output tsv file for enrichment results')
parser.add_argument('metadata_matches', help='Output filtered metadata tsv file')
args = parser.parse_args()

combine_food_masst(args.foodomics_metadata, args.matches_results, args.output_enrichment, args.metadata_matches)
combine_food_masst(args.foodomics_metadata, args.foodomics_metadata_table, args.matches_results,
args.output_enrichment, args.metadata_matches)


if __name__ == "__main__":
Expand Down

0 comments on commit 6ead607

Please sign in to comment.