From 04b00fb6037084a73b766d7aff3fee7da2be98d7 Mon Sep 17 00:00:00 2001 From: RachelDuffin Date: Thu, 11 Jul 2024 17:30:57 +0100 Subject: [PATCH 1/3] Add dry run = False --- wscleaner/__main__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/wscleaner/__main__.py b/wscleaner/__main__.py index 29a99425..ec8529a8 100644 --- a/wscleaner/__main__.py +++ b/wscleaner/__main__.py @@ -61,6 +61,9 @@ def get_arguments(): # no directories are deleted by the runfolder manager if parsed_args.dry_run or BRANCH != "main": dry_run = True # Protects against deleting the test folders (!!) +else: + dry_run = False + RFM = RunFolderManager( dry_run=dry_run, From a86e1d86729b3e980e4eeecf841bd3c0d22a5649 Mon Sep 17 00:00:00 2001 From: George Doyle Date: Tue, 28 Jan 2025 12:12:01 +0000 Subject: [PATCH 2/3] Gzip changes --- config/log_msgs_config.py | 4 ++-- demultiplex/demultiplex.py | 3 ++- toolbox/toolbox.py | 47 ++++++++++++++++++++++++++++++-------- wscleaner/__main__.py | 2 +- 4 files changed, 43 insertions(+), 13 deletions(-) diff --git a/config/log_msgs_config.py b/config/log_msgs_config.py index 9a8749a2..816b704a 100755 --- a/config/log_msgs_config.py +++ b/config/log_msgs_config.py @@ -43,8 +43,8 @@ "pipelines for the same run. Supported pipelines: %s" ), "ss_missing": "SampleSheet is missing and is required for sample name parsing", - "fastq_valid": "Gzip --test determined that the fastq is valid: %s", - "fastq_invalid": "Gzip --test determined that the fastq is not valid: %s. Stdout: %s. Stderr: %s", + "fastq_valid": "Gzip testing determined that the fastq is valid: %s", + "fastq_invalid": "Gzip testing determined that the fastq is not valid: %s. Error: %s", "demux_success": "Demultiplexing was successful for the run with all fastqs valid", "wes_batch_nos_identified": "WES batch numbers %s identified", "wes_batch_nos_missing": "WES batch numbers missing. Check for errors in the sample names. Script exited", diff --git a/demultiplex/demultiplex.py b/demultiplex/demultiplex.py index 7bea2824..cd221427 100755 --- a/demultiplex/demultiplex.py +++ b/demultiplex/demultiplex.py @@ -766,7 +766,8 @@ def run_demultiplexing(self) -> Optional[bool]: # Runs bcl2fastq2 and checks if completed successfully # Bcl2fastq2 returncode 0 upon success. Outputs info logs to stderr out, err, returncode = execute_subprocess_command( - self.bcl2fastq2_cmd, + #self.bcl2fastq2_cmd, + self.cluster_density_cmd, self.demux_rf_logger, ) if returncode == 0: diff --git a/toolbox/toolbox.py b/toolbox/toolbox.py index 6ad0ede5..2c3c96fb 100755 --- a/toolbox/toolbox.py +++ b/toolbox/toolbox.py @@ -24,6 +24,8 @@ from typing import Union, Optional from config.ad_config import ToolboxConfig from ad_logger.ad_logger import RunfolderLoggers +import gzip +import zlib def get_credential(file: str) -> None: @@ -304,6 +306,34 @@ def get_samplename_dict( logger.error(logger.log_msgs["ss_missing"]) +def validate_fastq_gzip(file_path): + """Fast gzip validation by checking header, footer, and partial decompression""" + try: + # Check compressed file header (magic number check) + with open(file_path, 'rb') as f: + magic = f.read(2) + if magic != b'\x1f\x8b': + return False, f"Invalid gzip magic bytes: {magic.hex()}" + + # Check footer (last 4 bytes for ISIZE) + f.seek(-4, 2) + isize = int.from_bytes(f.read(4), 'little') + if isize == 0: + return False, "Invalid zero uncompressed size" + + # Quick decompression check of first block + with gzip.open(file_path, 'rb') as f: + # Only read first 1KB of decompressed data + f.read(1024) + + return True, None + + except (OSError, EOFError, zlib.error) as e: + return False, f"Validation error: {str(e)}" + except Exception as e: + return False, f"Unexpected error: {str(e)}" + + def validate_fastqs(fastq_dir_path: str, logger: logging.Logger) -> Optional[bool]: """ Validate the created fastqs in the BaseCalls directory and log success @@ -317,25 +347,24 @@ def validate_fastqs(fastq_dir_path: str, logger: logging.Logger) -> Optional[boo returncodes = [] for fastq in fastqs: - out, err, returncode = execute_subprocess_command( - f"gzip --test {os.path.join(fastq_dir_path, fastq)}", - logger, - ) - returncodes.append(returncode) - if returncode == 0: + full_path = os.path.join(fastq_dir_path, fastq) + is_valid, error_msg = validate_fastq_gzip(full_path) + + if is_valid: logger.info( logger.log_msgs["fastq_valid"], fastq, ) + returncodes.append(True) else: logger.error( logger.log_msgs["fastq_invalid"], fastq, - out, - err, + error_msg, ) + returncodes.append(False) - if all(code == 0 for code in returncodes): + if all(returncodes): logger.info(logger.log_msgs["demux_success"]) return True diff --git a/wscleaner/__main__.py b/wscleaner/__main__.py index ec8529a8..c1c7a67f 100644 --- a/wscleaner/__main__.py +++ b/wscleaner/__main__.py @@ -40,7 +40,7 @@ def get_arguments(): "--min-age", help="The age (days) a runfolder must be to be deleted", type=int, - default=14, + default=-1, ) parser.add_argument( "-l", From 4d0ecb7f0054becf3ce5aa8c721858affb434971 Mon Sep 17 00:00:00 2001 From: George Doyle Date: Tue, 28 Jan 2025 12:13:58 +0000 Subject: [PATCH 3/3] Rolling back test changes --- demultiplex/demultiplex.py | 3 +-- wscleaner/__main__.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/demultiplex/demultiplex.py b/demultiplex/demultiplex.py index cd221427..7bea2824 100755 --- a/demultiplex/demultiplex.py +++ b/demultiplex/demultiplex.py @@ -766,8 +766,7 @@ def run_demultiplexing(self) -> Optional[bool]: # Runs bcl2fastq2 and checks if completed successfully # Bcl2fastq2 returncode 0 upon success. Outputs info logs to stderr out, err, returncode = execute_subprocess_command( - #self.bcl2fastq2_cmd, - self.cluster_density_cmd, + self.bcl2fastq2_cmd, self.demux_rf_logger, ) if returncode == 0: diff --git a/wscleaner/__main__.py b/wscleaner/__main__.py index c1c7a67f..ec8529a8 100644 --- a/wscleaner/__main__.py +++ b/wscleaner/__main__.py @@ -40,7 +40,7 @@ def get_arguments(): "--min-age", help="The age (days) a runfolder must be to be deleted", type=int, - default=-1, + default=14, ) parser.add_argument( "-l",