diff --git a/cg_lims/EPPs/udf/calculate/base.py b/cg_lims/EPPs/udf/calculate/base.py index ce2e5a69..d935d162 100644 --- a/cg_lims/EPPs/udf/calculate/base.py +++ b/cg_lims/EPPs/udf/calculate/base.py @@ -14,6 +14,7 @@ from cg_lims.EPPs.udf.calculate.calculate_resuspension_buffer_volumes import ( calculate_resuspension_buffer_volume, ) +from cg_lims.EPPs.udf.calculate.calculate_saphyr_concentration import calculate_saphyr_concentration from cg_lims.EPPs.udf.calculate.calculate_water import volume_water from cg_lims.EPPs.udf.calculate.calculate_water_volume_rna import calculate_water_volume_rna from cg_lims.EPPs.udf.calculate.get_missing_reads import get_missing_reads @@ -60,5 +61,6 @@ def calculate(ctx): calculate.add_command(novaseq_x_volumes) calculate.add_command(pool_normalization) calculate.add_command(novaseq_x_denaturation) +calculate.add_command(calculate_saphyr_concentration) calculate.add_command(ont_aliquot_volume) calculate.add_command(ont_available_sequencing_reload) diff --git a/cg_lims/EPPs/udf/calculate/calculate_saphyr_concentration.py b/cg_lims/EPPs/udf/calculate/calculate_saphyr_concentration.py new file mode 100644 index 00000000..ae5ae106 --- /dev/null +++ b/cg_lims/EPPs/udf/calculate/calculate_saphyr_concentration.py @@ -0,0 +1,92 @@ +import logging +import sys +from typing import List + +import click +import numpy as np +from cg_lims.exceptions import LimsError, MissingUDFsError +from cg_lims.get.artifacts import get_artifacts +from genologics.entities import Artifact + +LOG = logging.getLogger(__name__) + + +def get_concentrations(artifact: Artifact) -> List[float]: + """Returns a list of all concentration replicates called + Concentration 1 (ng/ul), Concentration 2 (ng/ul) and Concentration 3 (ng/ul) of an artifact.""" + udf_names = ["Concentration 1 (ng/ul)", "Concentration 2 (ng/ul)", "Concentration 3 (ng/ul)"] + concentrations = [] + for name in udf_names: + concentrations.append(artifact.udf.get(name)) + return concentrations + + +def calculate_average_concentration(concentrations: List[float]) -> float: + """Returns the average concentration of the list concentrations""" + return float(np.mean(concentrations)) + + +def calculate_cv(concentrations: List[float]) -> float: + """Calculates the coefficient of variance of the concentrations with the average concentration + that was retrieved from calculate_average_concentration""" + average_concentration = np.mean(concentrations) + std_deviation = np.std(concentrations) + coefficient_variation = std_deviation / average_concentration + return coefficient_variation + + +def set_average_and_cv(artifact: Artifact) -> None: + """Calls on the previous functions get_concentration, calculate_average_concentration and calculate_cv + and updates the UDFs Average concentration (ng/ul) and Coefficient of variation (CV) with the calculated values + """ + concentrations = get_concentrations(artifact=artifact) + average_concentration = calculate_average_concentration(concentrations=concentrations) + coefficient_variation = calculate_cv(concentrations=concentrations) + + artifact.udf["Average concentration (ng/ul)"] = average_concentration + artifact.udf["Coefficient of variation (CV)"] = coefficient_variation + artifact.put() + + +def validate_udf_values(artifact: Artifact) -> bool: + """A function checking whether a concentration in the list of concentrations for each artifact has a negative/no/zero value. + Then the function returns the output as 'False' and logs all those sample IDs in the EPP log""" + udf_names = ["Concentration 1 (ng/ul)", "Concentration 2 (ng/ul)", "Concentration 3 (ng/ul)"] + output = True + for name in udf_names: + if not artifact.udf.get(name) or artifact.udf.get(name) < 0: + output = False + LOG.info( + f"Sample {artifact.samples[0].id} has an invalid concentration value for {name}. Skipping." + ) + return output + + +@click.command() +@click.pass_context +def calculate_saphyr_concentration(ctx) -> None: + """Calculates and sets the average concentration and coefficient of variance based on three given concentrations. + Returns a message if this worked well, and if there were negative/no/zero concentration values, there's an error message for this + """ + + LOG.info(f"Running {ctx.command_path} with params: {ctx.params}") + process = ctx.obj["process"] + + try: + artifacts: List[Artifact] = get_artifacts(process=process, measurement=True) + failed_samples = 0 + for artifact in artifacts: + if not validate_udf_values(artifact=artifact): + failed_samples += 1 + continue + set_average_and_cv(artifact=artifact) + if failed_samples: + raise MissingUDFsError( + f"{failed_samples} samples have invalid concentration values (<= 0). See log for more information." + ) + message = "The average concentration and coefficient of variance have been calculated for all samples." + LOG.info(message) + click.echo(message) + except LimsError as e: + LOG.error(e.message) + sys.exit(e.message)