Clinical-Genomics · Karl-Svard · Jan 7, 2024 · Jan 8, 2024 · Jan 9, 2024 · Jan 11, 2024
@@ -6,8 +6,10 @@
 # commands
 from cg_lims.EPPs.files.file_to_udf import csv_well_to_udf
 from cg_lims.EPPs.files.hamilton.base import hamilton
+from cg_lims.EPPs.files.ont_json_to_udf import parse_ont_report
 from cg_lims.EPPs.files.placement_map.make_96well_placement_map import placement_map
 from cg_lims.EPPs.files.pooling_map.make_pooling_map import pool_map
+from cg_lims.EPPs.files.sample_sheet.create_ont_sample_sheet import create_ont_sample_sheet
 from cg_lims.EPPs.files.sample_sheet.create_sample_sheet import create_sample_sheet
 from cg_lims.EPPs.files.xml_to_udf import parse_run_parameters
 
@@ -26,4 +28,6 @@ def files(ctx):
 files.add_command(trouble_shoot_kapa)
 files.add_command(make_barcode_csv)
 files.add_command(create_sample_sheet)
+files.add_command(create_ont_sample_sheet)
 files.add_command(parse_run_parameters)
+files.add_command(parse_ont_report)
@@ -0,0 +1,146 @@
+import glob
+import json
+import logging
+import os
+import sys
+from pathlib import Path
+from typing import Dict, List
+
+import click
+from cg_lims import options
+from cg_lims.exceptions import LimsError, MissingFileError, MissingUDFsError
+from cg_lims.get.artifacts import get_artifacts
+from genologics.entities import Artifact, Process
+
+LOG = logging.getLogger(__name__)
+
+
+def get_flow_cell_id(artifact: Artifact) -> str:
+    """Return the flow cell ID of an artifact from the connected container's name."""
+    container_name: str = artifact.container.name
+    if not container_name:
+        raise MissingUDFsError(f"Artifact {artifact.name} is missing a flow cell ID!")
+    return container_name
+
+
+def get_experiment_id(artifact: Artifact) -> str:
+    """"""
+    if not artifact.udf.get("ONT Experiment Name"):
+        raise MissingUDFsError(f"Artifact {artifact.name} is missing an experiment ID!")
+    return artifact.udf.get("ONT Experiment Name")
+
+
+def get_report_json_path(artifact: Artifact, root_path: str) -> str:
+    """"""
+    flow_cell_id: str = get_flow_cell_id(artifact=artifact)
+    experiment_id: str = get_experiment_id(artifact=artifact)
+    sample_id: str = artifact.samples[0].id
+    file_path: str = max(
+        glob.glob(
+            f"{root_path}/{experiment_id}/{sample_id}/*{flow_cell_id}*/report_{flow_cell_id}*.json"
+        ),
+        key=os.path.getctime,
+    )
+    if not Path(file_path).is_file():
+        message: str = f"No such file: {file_path}"
+        LOG.error(message)
+        raise MissingFileError(message)
+    return file_path
+
+
+def parse_json(file_path: str) -> Dict:
+    """"""
+    with open(file_path) as f:
+        data = json.load(f)
+    return data
+
+
+def get_n50(json_dict: Dict) -> int:
+    """"""
+    try:
+        return int(
+            json_dict["acquisitions"][3]["read_length_histogram"][-2]["plot"]["histogram_data"][0][
+                "n50"
+            ]
+        )
+    except KeyError:
+        return 0
+
+
+def get_estimated_bases(json_dict: Dict) -> int:
+    """"""
+    try:
+        return int(
+            json_dict["acquisitions"][3]["acquisition_run_info"]["yield_summary"][
+                "estimated_selected_bases"
+            ]
+        )
+    except KeyError:
+        return 0
+
+
+def get_passed_bases(json_dict: Dict) -> int:
+    """"""
+    try:
+        return int(
+            json_dict["acquisitions"][3]["acquisition_run_info"]["yield_summary"][
+                "basecalled_pass_bases"
+            ]
+        )
+    except KeyError:
+        return 0
+
+
+def get_failed_bases(json_dict: Dict) -> int:
+    """"""
+    try:
+        return int(
+            json_dict["acquisitions"][3]["acquisition_run_info"]["yield_summary"][
+                "basecalled_fail_bases"
+            ]
+        )
+    except KeyError:
+        return 0
+
+
+def get_read_count(json_dict: Dict) -> int:
+    """"""
+    try:
+        return int(
+            json_dict["acquisitions"][3]["acquisition_run_info"]["yield_summary"]["read_count"]
+        )
+    except KeyError:
+        return 0
+
+
+def set_sequencing_qc(artifact: Artifact, json_dict: Dict) -> None:
+    """"""
+    artifact.udf["Reads Generated"] = get_read_count(json_dict=json_dict)
+    artifact.udf["Estimated Bases"] = get_estimated_bases(json_dict=json_dict)
+    artifact.udf["QC Passed Bases"] = get_passed_bases(json_dict=json_dict)
+    artifact.udf["QC Failed Bases"] = get_failed_bases(json_dict=json_dict)
+    artifact.udf["Estimated N50"] = get_n50(json_dict=json_dict)
+    artifact.put()
+
+
+@click.command()
+@options.root_path()
+@click.pass_context
+def parse_ont_report(ctx, root_path: str):
+    """Script to parse sequencing metrics data from ONT report JSONs and save to artifact UDFs."""
+
+    LOG.info(f"Running {ctx.command_path} with params: {ctx.params}")
+    process: Process = ctx.obj["process"]
+
+    try:
+        artifacts: List[Artifact] = get_artifacts(process=process, input=True)
+
+        for artifact in artifacts:
+            file_path: str = get_report_json_path(artifact=artifact, root_path=root_path)
+            json_dict: Dict = parse_json(file_path=file_path)
+            set_sequencing_qc(artifact=artifact, json_dict=json_dict)
+
+        click.echo("Sequencing metrics were successfully read!")
+    except LimsError as e:
+        LOG.error(e.message)
+        sys.exit(e.message)
@@ -0,0 +1,106 @@
+import logging
+import sys
+from pathlib import Path
+from typing import List
+
+import click
+from cg_lims import options
+from cg_lims.EPPs.files.sample_sheet.models import NanoporeSampleSheetHeader
+from cg_lims.exceptions import LimsError, MissingUDFsError
+from cg_lims.files.manage_csv_files import build_csv
+from cg_lims.get.artifacts import get_artifacts
+from cg_lims.get.samples import get_one_sample_from_artifact
+from genologics.entities import Artifact, Process
+
+LOG = logging.getLogger(__name__)
+
+
+def get_flow_cell_id(artifact: Artifact) -> str:
+    """Return the flow cell ID of an artifact from the connected container's name."""
+    container_name: str = artifact.container.name
+    if not container_name:
+        raise MissingUDFsError(f"Artifact {artifact.name} is missing a flow cell ID!")
+    return container_name
+
+
+def get_flow_cell_type(process: Process) -> str:
+    """Return the flow cell type used for the sequencing run."""
+    if not process.udf.get("ONT Flow Cell Type"):
+        raise MissingUDFsError(f"Sample sheet generation requires a flow cell type!")
+    return process.udf.get("ONT Flow Cell Type")
+
+
+def get_sample_id(artifact: Artifact) -> str:
+    """Return the sample ID for a given artifact."""
+    return get_one_sample_from_artifact(artifact=artifact).id
+
+
+def get_experiment_name(process: Process) -> str:
+    """Return the experiment name used for the sequencing run."""
+    if not process.udf.get("Experiment Name"):
+        raise MissingUDFsError(f"Sample sheet generation requires an experiment name!")
+    return process.udf.get("Experiment Name")
+
+
+def get_kit(process: Process) -> str:
+    """Return the prep kits used, in the format required for sample sheet generation."""
+    library_kit: str = process.udf.get("ONT Prep Kit")
+    expansion_kit: str = process.udf.get("ONT Expansion Kit")
+    if not library_kit:
+        raise MissingUDFsError("Sample sheet generation requires a library kit name!")
+    if expansion_kit:
+        library_kit = f"{library_kit} {expansion_kit}"
+    return library_kit
+
+
+def get_header() -> List[str]:
+    """Return the header of the sample sheet."""
+    return [
+        NanoporeSampleSheetHeader.FLOW_CELL_ID,
+        NanoporeSampleSheetHeader.FLOW_CELL_PROD_CODE,
+        NanoporeSampleSheetHeader.SAMPLE_ID,
+        NanoporeSampleSheetHeader.EXPERIMENT_ID,
+        NanoporeSampleSheetHeader.KIT,
+    ]
+
+
+def get_row(artifact: Artifact, process: Process) -> List[str]:
+    """Return the sample sheet row of one sample."""
+    return [
+        get_flow_cell_id(artifact=artifact),
+        get_flow_cell_type(process=process),
+        get_sample_id(artifact=artifact),
+        get_experiment_name(process=process),
+        get_kit(process=process),
+    ]
+
+
+def get_sample_sheet_content(process: Process) -> List[List[str]]:
+    """Return the sample sheet content."""
+    rows: List = []
+    artifacts: List[Artifact] = get_artifacts(process=process)
+    for artifact in artifacts:
+        rows.append(get_row(artifact=artifact, process=process))
+    return rows
+
+
+@click.command()
+@options.file_placeholder(help="File placeholder name.")
+@click.pass_context
+def create_ont_sample_sheet(ctx, file: str):
+    """Create an Oxford Nanopore sample sheet .csv file from an 'ONT Start Sequencing' step."""
+    LOG.info(f"Running {ctx.command_path} with params: {ctx.params}")
+
+    process: Process = ctx.obj["process"]
+
+    try:
+        header: List[str] = get_header()
+        sample_sheet_content: List[List[str]] = get_sample_sheet_content(process=process)
+        file_path: Path = Path(f"{file}_sample_sheet_{get_experiment_name(process=process)}.csv")
+        build_csv(rows=sample_sheet_content, headers=header, file=file_path)
+        message: str = "The sample sheet was successfully generated."
+        LOG.info(message)
+        click.echo(message)
+    except LimsError as e:
+        LOG.error(e.message)
+        sys.exit(e.message)
@@ -178,3 +178,11 @@ def get_bclconversion_data_row(self) -> str:
         if self.run_settings.barcode_mismatches:
             line = line + f",{self.barcode_mismatch_index_1},{self.barcode_mismatch_index_2}"
         return line + "\n"
+
+
+class NanoporeSampleSheetHeader(StrEnum):
+    FLOW_CELL_ID: str = "flow_cell_id"
+    FLOW_CELL_PROD_CODE: str = "flow_cell_product_code"
+    SAMPLE_ID: str = "sample_id"
+    EXPERIMENT_ID: str = "experiment_id"
+    KIT: str = "kit"
@@ -64,12 +64,18 @@ def check_same_sample_in_many_rerun_pools(rerun_arts: List[Artifact]) -> None:
     help="The name(s) of the process type(s) before the requeue step. Fetching artifact to requeue from here."
 )
 @options.udf(help="UDF that will tell wich artifacts to move.")
+@options.input(
+    help="Use this flag if you want to queue the input artifacts of the current process. Default is to queue the "
+    "output artifacts (analytes) of the process. "
+)
 @click.pass_context
-def rerun_samples(ctx, workflow_id, stage_id, udf, process_types):
+def rerun_samples(
+    ctx, workflow_id: str, stage_id: str, udf: str, process_types: List[str], input: bool
+):
     """Script to requeue samples for sequencing."""
     process = ctx.obj["process"]
     lims = ctx.obj["lims"]
-    artifacts = get_artifacts(process, False)
+    artifacts = get_artifacts(process=process, input=input)
     rerun_arts = filter_artifacts(artifacts, udf, True)
     if rerun_arts:
         try:

@@ -1,7 +1,9 @@
 #!/usr/bin/env python
 
 import click
+from cg_lims.EPPs.udf.calculate.calculate_aliquot_volume import calculate_aliquot_volume
 from cg_lims.EPPs.udf.calculate.calculate_amount_ng import calculate_amount_ng
+from cg_lims.EPPs.udf.calculate.calculate_amount_ng_fmol import calculate_amount_ng_fmol
 from cg_lims.EPPs.udf.calculate.calculate_average_size_and_set_qc import (
     calculate_average_size_and_set_qc,
 )
@@ -19,10 +21,11 @@
 from cg_lims.EPPs.udf.calculate.molar_concentration import molar_concentration
 from cg_lims.EPPs.udf.calculate.novaseq_x_denaturation import novaseq_x_denaturation
 from cg_lims.EPPs.udf.calculate.novaseq_x_volumes import novaseq_x_volumes
+from cg_lims.EPPs.udf.calculate.ont_aliquot_volume import ont_aliquot_volume
+from cg_lims.EPPs.udf.calculate.ont_sequencing_reload import ont_available_sequencing_reload
 from cg_lims.EPPs.udf.calculate.pool_normalization import pool_normalization
 from cg_lims.EPPs.udf.calculate.sum_missing_reads_in_pool import missing_reads_in_pool
 from cg_lims.EPPs.udf.calculate.twist_aliquot_amount import twist_aliquot_amount
-from cg_lims.EPPs.udf.calculate.twist_aliquot_volume import twist_aliquot_volume
 from cg_lims.EPPs.udf.calculate.twist_get_volumes_from_buffer import get_volumes_from_buffer
 
 # commands
@@ -39,11 +42,12 @@ def calculate(ctx):
 
 calculate.add_command(twist_pool)
 calculate.add_command(twist_aliquot_amount)
-calculate.add_command(twist_aliquot_volume)
+calculate.add_command(calculate_aliquot_volume)
 calculate.add_command(twist_qc_amount)
 calculate.add_command(get_volumes_from_buffer)
 calculate.add_command(get_missing_reads)
 calculate.add_command(calculate_amount_ng)
+calculate.add_command(calculate_amount_ng_fmol)
 calculate.add_command(volume_water)
 calculate.add_command(molar_concentration)
 calculate.add_command(calculate_beads)
@@ -56,3 +60,5 @@ def calculate(ctx):
 calculate.add_command(novaseq_x_volumes)
 calculate.add_command(pool_normalization)
 calculate.add_command(novaseq_x_denaturation)
+calculate.add_command(ont_aliquot_volume)
+calculate.add_command(ont_available_sequencing_reload)
@@ -37,8 +37,8 @@ def calculate_volumes(artifacts: List[Artifact], process: Process):
 
 @click.command()
 @click.pass_context
-def twist_aliquot_volume(ctx):
-    """Calculates amount needed for samples."""
+def calculate_aliquot_volume(ctx):
+    """Calculates aliquot volumes needed for samples."""
 
     LOG.info(f"Running {ctx.command_path} with params: {ctx.params}")