Skip to content

Commit

Permalink
change to html parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
Karl-Svard committed Aug 21, 2024
1 parent 68bf7b6 commit 515dce5
Showing 1 changed file with 17 additions and 32 deletions.
49 changes: 17 additions & 32 deletions cg_lims/EPPs/files/ont_json_to_udf.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import json
import logging
import os
import re
import sys
from pathlib import Path
from typing import Dict, List
Expand Down Expand Up @@ -30,14 +31,14 @@ def get_experiment_id(artifact: Artifact) -> str:
return artifact.udf.get("ONT Experiment Name")


def get_report_json_path(artifact: Artifact, root_path: str) -> str:
"""Return the expected file path of the JSON report for a given flow cell artifact."""
def get_report_html_path(artifact: Artifact, root_path: str) -> str:
"""Return the expected file path of the HTML report for a given flow cell artifact."""
flow_cell_id: str = get_flow_cell_id(artifact=artifact)
experiment_id: str = get_experiment_id(artifact=artifact)
sample_id: str = artifact.samples[0].id
file_path: str = max(
glob.glob(
f"{root_path}/{experiment_id}/{sample_id}/*{flow_cell_id}*/report_{flow_cell_id}*.json"
f"{root_path}/{experiment_id}/{sample_id}/*{flow_cell_id}*/report_{flow_cell_id}*.html"
),
key=os.path.getctime,
)
Expand All @@ -48,67 +49,51 @@ def get_report_json_path(artifact: Artifact, root_path: str) -> str:
return file_path


def parse_json(file_path: str) -> Dict:
"""Return JSON file content as a dictionary."""
def parse_html(file_path: str) -> Dict:
"""Return report data JSON from an HTML file."""
with open(file_path) as f:
data = json.load(f)
return data
data = f.read()
match = re.findall("const reportData={.*}$", data, re.M)[0]
json_string = match.split("=")[1]
return json.loads(json_string)


def get_n50(json_dict: Dict) -> int:
"""Return the N50 value found in the given report data."""
try:
return int(
json_dict["acquisitions"][3]["read_length_histogram"][-2]["plot"]["histogram_data"][0][
"n50"
]
)
return int(json_dict["estimated_n50"])
except KeyError:
return 0


def get_estimated_bases(json_dict: Dict) -> int:
"""Return the Estimated Bases value found in the given report data."""
try:
return int(
json_dict["acquisitions"][3]["acquisition_run_info"]["yield_summary"][
"estimated_selected_bases"
]
)
return int(json_dict["data_output"]["estimated_bases"])
except KeyError:
return 0


def get_passed_bases(json_dict: Dict) -> int:
"""Return the Passed Bases value found in the given report data."""
try:
return int(
json_dict["acquisitions"][3]["acquisition_run_info"]["yield_summary"][
"basecalled_pass_bases"
]
)
return int(json_dict["basecalling"]["bases_called_pass"])
except KeyError:
return 0


def get_failed_bases(json_dict: Dict) -> int:
"""Return the Failed Bases value found in the given report data."""
try:
return int(
json_dict["acquisitions"][3]["acquisition_run_info"]["yield_summary"][
"basecalled_fail_bases"
]
)
return int(json_dict["basecalling"]["bases_called_fail"])
except KeyError:
return 0


def get_read_count(json_dict: Dict) -> int:
"""Return the read count found in the given report data."""
try:
return int(
json_dict["acquisitions"][3]["acquisition_run_info"]["yield_summary"]["read_count"]
)
return int(json_dict["data_output"]["reads_generated"])
except KeyError:
return 0

Expand Down Expand Up @@ -136,8 +121,8 @@ def parse_ont_report(ctx, root_path: str):
artifacts: List[Artifact] = get_artifacts(process=process, input=True)

for artifact in artifacts:
file_path: str = get_report_json_path(artifact=artifact, root_path=root_path)
json_dict: Dict = parse_json(file_path=file_path)
file_path: str = get_report_html_path(artifact=artifact, root_path=root_path)
json_dict: Dict = parse_html(file_path=file_path)
set_sequencing_qc(artifact=artifact, json_dict=json_dict)

click.echo("Sequencing metrics were successfully read!")
Expand Down

0 comments on commit 515dce5

Please sign in to comment.