Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New script: Saphyr average concentration and CV calculation #479

Merged
merged 6 commits into from
Mar 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cg_lims/EPPs/udf/calculate/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from cg_lims.EPPs.udf.calculate.calculate_resuspension_buffer_volumes import (
calculate_resuspension_buffer_volume,
)
from cg_lims.EPPs.udf.calculate.calculate_saphyr_concentration import calculate_saphyr_concentration
from cg_lims.EPPs.udf.calculate.calculate_water import volume_water
from cg_lims.EPPs.udf.calculate.calculate_water_volume_rna import calculate_water_volume_rna
from cg_lims.EPPs.udf.calculate.get_missing_reads import get_missing_reads
Expand Down Expand Up @@ -60,5 +61,6 @@ def calculate(ctx):
calculate.add_command(novaseq_x_volumes)
calculate.add_command(pool_normalization)
calculate.add_command(novaseq_x_denaturation)
calculate.add_command(calculate_saphyr_concentration)
calculate.add_command(ont_aliquot_volume)
calculate.add_command(ont_available_sequencing_reload)
92 changes: 92 additions & 0 deletions cg_lims/EPPs/udf/calculate/calculate_saphyr_concentration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
import logging
import sys
from typing import List

import click
import numpy as np
from cg_lims.exceptions import LimsError, MissingUDFsError
from cg_lims.get.artifacts import get_artifacts
from genologics.entities import Artifact

LOG = logging.getLogger(__name__)


def get_concentrations(artifact: Artifact) -> List[float]:
"""Returns a list of all concentration replicates called
Concentration 1 (ng/ul), Concentration 2 (ng/ul) and Concentration 3 (ng/ul) of an artifact."""
udf_names = ["Concentration 1 (ng/ul)", "Concentration 2 (ng/ul)", "Concentration 3 (ng/ul)"]
concentrations = []
for name in udf_names:
concentrations.append(artifact.udf.get(name))
return concentrations


def calculate_average_concentration(concentrations: List[float]) -> float:
"""Returns the average concentration of the list concentrations"""
return float(np.mean(concentrations))


def calculate_cv(concentrations: List[float]) -> float:
"""Calculates the coefficient of variance of the concentrations with the average concentration
that was retrieved from calculate_average_concentration"""
average_concentration = np.mean(concentrations)
std_deviation = np.std(concentrations)
coefficient_variation = std_deviation / average_concentration
return coefficient_variation


def set_average_and_cv(artifact: Artifact) -> None:
"""Calls on the previous functions get_concentration, calculate_average_concentration and calculate_cv
and updates the UDFs Average concentration (ng/ul) and Coefficient of variation (CV) with the calculated values
"""
concentrations = get_concentrations(artifact=artifact)
average_concentration = calculate_average_concentration(concentrations=concentrations)
coefficient_variation = calculate_cv(concentrations=concentrations)

artifact.udf["Average concentration (ng/ul)"] = average_concentration
artifact.udf["Coefficient of variation (CV)"] = coefficient_variation
artifact.put()


def validate_udf_values(artifact: Artifact) -> bool:
"""A function checking whether a concentration in the list of concentrations for each artifact has a negative/no/zero value.
Then the function returns the output as 'False' and logs all those sample IDs in the EPP log"""
udf_names = ["Concentration 1 (ng/ul)", "Concentration 2 (ng/ul)", "Concentration 3 (ng/ul)"]
output = True
for name in udf_names:
if not artifact.udf.get(name) or artifact.udf.get(name) < 0:
output = False
LOG.info(
f"Sample {artifact.samples[0].id} has an invalid concentration value for {name}. Skipping."
)
return output


@click.command()
@click.pass_context
def calculate_saphyr_concentration(ctx) -> None:
"""Calculates and sets the average concentration and coefficient of variance based on three given concentrations.
Returns a message if this worked well, and if there were negative/no/zero concentration values, there's an error message for this
"""

LOG.info(f"Running {ctx.command_path} with params: {ctx.params}")
process = ctx.obj["process"]

try:
artifacts: List[Artifact] = get_artifacts(process=process, measurement=True)
failed_samples = 0
for artifact in artifacts:
if not validate_udf_values(artifact=artifact):
failed_samples += 1
continue
set_average_and_cv(artifact=artifact)
if failed_samples:
raise MissingUDFsError(
f"{failed_samples} samples have invalid concentration values (<= 0). See log for more information."
)
message = "The average concentration and coefficient of variance have been calculated for all samples."
LOG.info(message)
click.echo(message)
except LimsError as e:
LOG.error(e.message)
sys.exit(e.message)
Loading