Skip to content

Commit

Permalink
fix: pass data instance when extracting metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
cmdoret committed Oct 15, 2024
1 parent 6aa4bbb commit 0183d45
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 12 deletions.
6 changes: 4 additions & 2 deletions modos/genomics/cram.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@
import modos_schema.datamodel as model


def extract_cram_metadata(cram_path: Path) -> list:
def extract_cram_metadata(
instance: model.AlignmentSet, base_path: Path
) -> list:
"""Extract metadata from the CRAM file header and
convert specific attributes according to the modo schema."""
cram = pysam.AlignmentFile(str(cram_path), mode="rc")
cram = pysam.AlignmentFile(str(base_path / instance.data_path), mode="rc")
cram_head = cram.header
ref_list: list = []
for refseq in cram_head.get("SQ"):
Expand Down
7 changes: 3 additions & 4 deletions modos/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,13 +65,12 @@ def extract_metadata(instance, base_path: Path) -> List:
if not isinstance(instance, model.DataEntity):
raise ValueError(f"{instance} is not a DataEntity, cannot extract")

file_path = base_path / instance.data_path
match str(instance.data_format):
case "mzTab":
return extract_mztab_metadata(file_path)
return extract_mztab_metadata(instance, base_path)
case "CRAM":
return extract_cram_metadata(file_path)
return extract_cram_metadata(instance, base_path)
case _:
raise NotImplementedError(
f"Metadata extraction not impolemented for this format: {instance.data_format}"
f"Metadata extraction not implemented for this format: {instance.data_format}"
)
14 changes: 8 additions & 6 deletions modos/metabolomics/mztab.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,13 @@ def get_samples(mz: MzTab) -> list[model.Sample]:
return samples


def get_assay(mz: MzTab) -> model.Assay:
def get_assay(mz: MzTab, data_id: str) -> model.Assay:
meta = mz.metadata
assay = model.Assay(
id=f"assay/{meta['mzTab-ID']}",
id=meta["mzTab-ID"],
name=meta.get("title", None),
description=meta.get("description", None),
has_data=f"data/{meta['mzTab-ID']}",
has_data=data_id,
has_sample=[sample["name"] for sample in mz.samples.values()],
omics_type="METABOLOMICS"
if re.match(r".*-M", mz.version)
Expand All @@ -41,9 +41,11 @@ def get_assay(mz: MzTab) -> model.Assay:
return assay


def extract_mztab_metadata(path: Path) -> list[model.Assay | model.Sample]:
mz = load_mztab(path)
def extract_mztab_metadata(
instance: model.MassSpectrometryResults, base_path: Path
) -> list[model.Assay | model.Sample]:
mz = load_mztab(base_path / instance.data_path)
elems = []
elems.extend(get_samples(mz))
elems.append(get_assay(mz))
elems.append(get_assay(mz, instance.id))
return elems

0 comments on commit 0183d45

Please sign in to comment.