Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ONT workflow EPPs #467

Closed
wants to merge 31 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
b98411c
add new sample udf checker epp
Karl-Svard Jan 7, 2024
3ef97d6
refactor aliquot volume script name
Karl-Svard Jan 8, 2024
298097f
add new wip epp for updating sample volumes
Karl-Svard Jan 9, 2024
5515a7b
add wip ont calc amount script
Karl-Svard Jan 11, 2024
e644060
fixes for new calculate amount script
Karl-Svard Jan 11, 2024
22a6d1f
add wip ont aliquot script
Karl-Svard Jan 11, 2024
2724090
removed volume udf option
Karl-Svard Jan 12, 2024
ca1cfba
test a new option for copying volumes
Karl-Svard Jan 12, 2024
2e16217
improve update volume epp
Karl-Svard Jan 16, 2024
e5ad677
add new features after feedback
Karl-Svard Jan 17, 2024
1a98b2b
merge master into branch
Karl-Svard Jan 24, 2024
945d530
refactor ont aliquotation script and add some better error handling
Karl-Svard Jan 30, 2024
f12cc06
add wip sample sheet epp
Karl-Svard Jan 31, 2024
3a1196f
make the ont aliquot EPP more versitile with customizable udf names
Karl-Svard Feb 5, 2024
8e94cbc
add ont json parser
Karl-Svard Feb 6, 2024
d932b11
add EPP for setting ONT seq settings
Karl-Svard Feb 6, 2024
81f527d
add ONT JSON parser to base
Karl-Svard Feb 6, 2024
5bf8c6c
update date format in experiment name
Karl-Svard Feb 6, 2024
362ece9
update samplesheet
Karl-Svard Feb 6, 2024
9b388f3
add experiment name on artifact level
Karl-Svard Feb 15, 2024
c9d56b1
add new EPP for calculating available material for seq reload
Karl-Svard Mar 6, 2024
b95b6ba
fix for None values
Karl-Svard Mar 6, 2024
cecf758
update udf name for ont run
Karl-Svard Mar 6, 2024
488e5da
missed one spot
Karl-Svard Mar 6, 2024
f806009
fetch run ID from artifact instead
Karl-Svard Mar 6, 2024
9adf1e1
change output types of ont json parser
Karl-Svard Mar 6, 2024
437c792
update ONT sample sheet script
Karl-Svard Mar 9, 2024
cdec3ab
patch
Karl-Svard Mar 12, 2024
7d3020f
Add option to re-run input artifacts
Karl-Svard Mar 13, 2024
39b592e
fix
Karl-Svard Mar 13, 2024
3da2bee
add WIP EPP for copying container values
Karl-Svard Mar 13, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions cg_lims/EPPs/files/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
# commands
from cg_lims.EPPs.files.file_to_udf import csv_well_to_udf
from cg_lims.EPPs.files.hamilton.base import hamilton
from cg_lims.EPPs.files.ont_json_to_udf import parse_ont_report
from cg_lims.EPPs.files.placement_map.make_96well_placement_map import placement_map
from cg_lims.EPPs.files.pooling_map.make_pooling_map import pool_map
from cg_lims.EPPs.files.sample_sheet.create_ont_sample_sheet import create_ont_sample_sheet
from cg_lims.EPPs.files.sample_sheet.create_sample_sheet import create_sample_sheet
from cg_lims.EPPs.files.xml_to_udf import parse_run_parameters

Expand All @@ -26,4 +28,6 @@ def files(ctx):
files.add_command(trouble_shoot_kapa)
files.add_command(make_barcode_csv)
files.add_command(create_sample_sheet)
files.add_command(create_ont_sample_sheet)
files.add_command(parse_run_parameters)
files.add_command(parse_ont_report)
146 changes: 146 additions & 0 deletions cg_lims/EPPs/files/ont_json_to_udf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
import glob
import json
import logging
import os
import sys
from pathlib import Path
from typing import Dict, List

import click
from cg_lims import options
from cg_lims.exceptions import LimsError, MissingFileError, MissingUDFsError
from cg_lims.get.artifacts import get_artifacts
from genologics.entities import Artifact, Process

LOG = logging.getLogger(__name__)


def get_flow_cell_id(artifact: Artifact) -> str:
"""Return the flow cell ID of an artifact from the connected container's name."""
container_name: str = artifact.container.name
if not container_name:
raise MissingUDFsError(f"Artifact {artifact.name} is missing a flow cell ID!")
return container_name


def get_experiment_id(artifact: Artifact) -> str:
""""""
if not artifact.udf.get("ONT Experiment Name"):
raise MissingUDFsError(f"Artifact {artifact.name} is missing an experiment ID!")
return artifact.udf.get("ONT Experiment Name")


def get_report_json_path(artifact: Artifact, root_path: str) -> str:
""""""
flow_cell_id: str = get_flow_cell_id(artifact=artifact)
experiment_id: str = get_experiment_id(artifact=artifact)
sample_id: str = artifact.samples[0].id
file_path: str = max(
glob.glob(
f"{root_path}/{experiment_id}/{sample_id}/*{flow_cell_id}*/report_{flow_cell_id}*.json"
),
key=os.path.getctime,
)
if not Path(file_path).is_file():
message: str = f"No such file: {file_path}"
LOG.error(message)
raise MissingFileError(message)
return file_path


def parse_json(file_path: str) -> Dict:
""""""
with open(file_path) as f:
data = json.load(f)
return data


def get_n50(json_dict: Dict) -> int:
""""""
try:
return int(
json_dict["acquisitions"][3]["read_length_histogram"][-2]["plot"]["histogram_data"][0][
"n50"
]
)
except KeyError:
return 0


def get_estimated_bases(json_dict: Dict) -> int:
""""""
try:
return int(
json_dict["acquisitions"][3]["acquisition_run_info"]["yield_summary"][
"estimated_selected_bases"
]
)
except KeyError:
return 0


def get_passed_bases(json_dict: Dict) -> int:
""""""
try:
return int(
json_dict["acquisitions"][3]["acquisition_run_info"]["yield_summary"][
"basecalled_pass_bases"
]
)
except KeyError:
return 0


def get_failed_bases(json_dict: Dict) -> int:
""""""
try:
return int(
json_dict["acquisitions"][3]["acquisition_run_info"]["yield_summary"][
"basecalled_fail_bases"
]
)
except KeyError:
return 0


def get_read_count(json_dict: Dict) -> int:
""""""
try:
return int(
json_dict["acquisitions"][3]["acquisition_run_info"]["yield_summary"]["read_count"]
)
except KeyError:
return 0


def set_sequencing_qc(artifact: Artifact, json_dict: Dict) -> None:
""""""
artifact.udf["Reads Generated"] = get_read_count(json_dict=json_dict)
artifact.udf["Estimated Bases"] = get_estimated_bases(json_dict=json_dict)
artifact.udf["QC Passed Bases"] = get_passed_bases(json_dict=json_dict)
artifact.udf["QC Failed Bases"] = get_failed_bases(json_dict=json_dict)
artifact.udf["Estimated N50"] = get_n50(json_dict=json_dict)
artifact.put()


@click.command()
@options.root_path()
@click.pass_context
def parse_ont_report(ctx, root_path: str):
"""Script to parse sequencing metrics data from ONT report JSONs and save to artifact UDFs."""

LOG.info(f"Running {ctx.command_path} with params: {ctx.params}")
process: Process = ctx.obj["process"]

try:
artifacts: List[Artifact] = get_artifacts(process=process, input=True)

for artifact in artifacts:
file_path: str = get_report_json_path(artifact=artifact, root_path=root_path)
json_dict: Dict = parse_json(file_path=file_path)
set_sequencing_qc(artifact=artifact, json_dict=json_dict)

click.echo("Sequencing metrics were successfully read!")
except LimsError as e:
LOG.error(e.message)
sys.exit(e.message)
106 changes: 106 additions & 0 deletions cg_lims/EPPs/files/sample_sheet/create_ont_sample_sheet.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import logging
import sys
from pathlib import Path
from typing import List

import click
from cg_lims import options
from cg_lims.EPPs.files.sample_sheet.models import NanoporeSampleSheetHeader
from cg_lims.exceptions import LimsError, MissingUDFsError
from cg_lims.files.manage_csv_files import build_csv
from cg_lims.get.artifacts import get_artifacts
from cg_lims.get.samples import get_one_sample_from_artifact
from genologics.entities import Artifact, Process

LOG = logging.getLogger(__name__)


def get_flow_cell_id(artifact: Artifact) -> str:
"""Return the flow cell ID of an artifact from the connected container's name."""
container_name: str = artifact.container.name
if not container_name:
raise MissingUDFsError(f"Artifact {artifact.name} is missing a flow cell ID!")
return container_name


def get_flow_cell_type(process: Process) -> str:
"""Return the flow cell type used for the sequencing run."""
if not process.udf.get("ONT Flow Cell Type"):
raise MissingUDFsError(f"Sample sheet generation requires a flow cell type!")
return process.udf.get("ONT Flow Cell Type")


def get_sample_id(artifact: Artifact) -> str:
"""Return the sample ID for a given artifact."""
return get_one_sample_from_artifact(artifact=artifact).id


def get_experiment_name(process: Process) -> str:
"""Return the experiment name used for the sequencing run."""
if not process.udf.get("Experiment Name"):
raise MissingUDFsError(f"Sample sheet generation requires an experiment name!")
return process.udf.get("Experiment Name")


def get_kit(process: Process) -> str:
"""Return the prep kits used, in the format required for sample sheet generation."""
library_kit: str = process.udf.get("ONT Prep Kit")
expansion_kit: str = process.udf.get("ONT Expansion Kit")
if not library_kit:
raise MissingUDFsError("Sample sheet generation requires a library kit name!")
if expansion_kit:
library_kit = f"{library_kit} {expansion_kit}"
return library_kit


def get_header() -> List[str]:
"""Return the header of the sample sheet."""
return [
NanoporeSampleSheetHeader.FLOW_CELL_ID,
NanoporeSampleSheetHeader.FLOW_CELL_PROD_CODE,
NanoporeSampleSheetHeader.SAMPLE_ID,
NanoporeSampleSheetHeader.EXPERIMENT_ID,
NanoporeSampleSheetHeader.KIT,
]


def get_row(artifact: Artifact, process: Process) -> List[str]:
"""Return the sample sheet row of one sample."""
return [
get_flow_cell_id(artifact=artifact),
get_flow_cell_type(process=process),
get_sample_id(artifact=artifact),
get_experiment_name(process=process),
get_kit(process=process),
]


def get_sample_sheet_content(process: Process) -> List[List[str]]:
"""Return the sample sheet content."""
rows: List = []
artifacts: List[Artifact] = get_artifacts(process=process)
for artifact in artifacts:
rows.append(get_row(artifact=artifact, process=process))
return rows


@click.command()
@options.file_placeholder(help="File placeholder name.")
@click.pass_context
def create_ont_sample_sheet(ctx, file: str):
"""Create an Oxford Nanopore sample sheet .csv file from an 'ONT Start Sequencing' step."""
LOG.info(f"Running {ctx.command_path} with params: {ctx.params}")

process: Process = ctx.obj["process"]

try:
header: List[str] = get_header()
sample_sheet_content: List[List[str]] = get_sample_sheet_content(process=process)
file_path: Path = Path(f"{file}_sample_sheet_{get_experiment_name(process=process)}.csv")
build_csv(rows=sample_sheet_content, headers=header, file=file_path)
message: str = "The sample sheet was successfully generated."
LOG.info(message)
click.echo(message)
except LimsError as e:
LOG.error(e.message)
sys.exit(e.message)
8 changes: 8 additions & 0 deletions cg_lims/EPPs/files/sample_sheet/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,3 +178,11 @@ def get_bclconversion_data_row(self) -> str:
if self.run_settings.barcode_mismatches:
line = line + f",{self.barcode_mismatch_index_1},{self.barcode_mismatch_index_2}"
return line + "\n"


class NanoporeSampleSheetHeader(StrEnum):
FLOW_CELL_ID: str = "flow_cell_id"
FLOW_CELL_PROD_CODE: str = "flow_cell_product_code"
SAMPLE_ID: str = "sample_id"
EXPERIMENT_ID: str = "experiment_id"
KIT: str = "kit"
10 changes: 8 additions & 2 deletions cg_lims/EPPs/move/rerun_samples.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,18 @@ def check_same_sample_in_many_rerun_pools(rerun_arts: List[Artifact]) -> None:
help="The name(s) of the process type(s) before the requeue step. Fetching artifact to requeue from here."
)
@options.udf(help="UDF that will tell wich artifacts to move.")
@options.input(
help="Use this flag if you want to queue the input artifacts of the current process. Default is to queue the "
"output artifacts (analytes) of the process. "
)
@click.pass_context
def rerun_samples(ctx, workflow_id, stage_id, udf, process_types):
def rerun_samples(
ctx, workflow_id: str, stage_id: str, udf: str, process_types: List[str], input: bool
):
"""Script to requeue samples for sequencing."""
process = ctx.obj["process"]
lims = ctx.obj["lims"]
artifacts = get_artifacts(process, False)
artifacts = get_artifacts(process=process, input=input)
rerun_arts = filter_artifacts(artifacts, udf, True)
if rerun_arts:
try:
Expand Down
10 changes: 8 additions & 2 deletions cg_lims/EPPs/udf/calculate/base.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
#!/usr/bin/env python

import click
from cg_lims.EPPs.udf.calculate.calculate_aliquot_volume import calculate_aliquot_volume
from cg_lims.EPPs.udf.calculate.calculate_amount_ng import calculate_amount_ng
from cg_lims.EPPs.udf.calculate.calculate_amount_ng_fmol import calculate_amount_ng_fmol
from cg_lims.EPPs.udf.calculate.calculate_average_size_and_set_qc import (
calculate_average_size_and_set_qc,
)
Expand All @@ -19,10 +21,11 @@
from cg_lims.EPPs.udf.calculate.molar_concentration import molar_concentration
from cg_lims.EPPs.udf.calculate.novaseq_x_denaturation import novaseq_x_denaturation
from cg_lims.EPPs.udf.calculate.novaseq_x_volumes import novaseq_x_volumes
from cg_lims.EPPs.udf.calculate.ont_aliquot_volume import ont_aliquot_volume
from cg_lims.EPPs.udf.calculate.ont_sequencing_reload import ont_available_sequencing_reload
from cg_lims.EPPs.udf.calculate.pool_normalization import pool_normalization
from cg_lims.EPPs.udf.calculate.sum_missing_reads_in_pool import missing_reads_in_pool
from cg_lims.EPPs.udf.calculate.twist_aliquot_amount import twist_aliquot_amount
from cg_lims.EPPs.udf.calculate.twist_aliquot_volume import twist_aliquot_volume
from cg_lims.EPPs.udf.calculate.twist_get_volumes_from_buffer import get_volumes_from_buffer

# commands
Expand All @@ -39,11 +42,12 @@ def calculate(ctx):

calculate.add_command(twist_pool)
calculate.add_command(twist_aliquot_amount)
calculate.add_command(twist_aliquot_volume)
calculate.add_command(calculate_aliquot_volume)
calculate.add_command(twist_qc_amount)
calculate.add_command(get_volumes_from_buffer)
calculate.add_command(get_missing_reads)
calculate.add_command(calculate_amount_ng)
calculate.add_command(calculate_amount_ng_fmol)
calculate.add_command(volume_water)
calculate.add_command(molar_concentration)
calculate.add_command(calculate_beads)
Expand All @@ -56,3 +60,5 @@ def calculate(ctx):
calculate.add_command(novaseq_x_volumes)
calculate.add_command(pool_normalization)
calculate.add_command(novaseq_x_denaturation)
calculate.add_command(ont_aliquot_volume)
calculate.add_command(ont_available_sequencing_reload)
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ def calculate_volumes(artifacts: List[Artifact], process: Process):

@click.command()
@click.pass_context
def twist_aliquot_volume(ctx):
"""Calculates amount needed for samples."""
def calculate_aliquot_volume(ctx):
"""Calculates aliquot volumes needed for samples."""

LOG.info(f"Running {ctx.command_path} with params: {ctx.params}")

Expand Down
Loading
Loading