Merge pull request #3 from Clinical-Genomics-Lund/documentation-and-d…

…isclaimer-fixes Documentation and disclaimer fixes
SMD-Bioinformatics-Lund · Jun 11, 2024 · ba0d57a · ba0d57a
2 parents dfe3eef + 3c81e99
commit ba0d57a
Show file tree

Hide file tree

Showing 3 changed files with 20 additions and 2 deletions.
diff --git a/README.md b/README.md
@@ -1,4 +1,13 @@
 # Jasentool: A mongodb validation tool for comparing pipeline outputs
+## Disclaimer: Jasentool was developed for the integration of [JASEN](https://github.com/Clinical-Genomics-Lund/jasentool) into [Clinical Genomics Lund](https://github.com/Clinical-Genomics-Lund). Jasentool's submethods are used as follows:
+* Mongodb commands: `find`, `insert`, `remove`.
+* Validate: Search old CGL bacterial pipeline database and compare results to JASEN results. 
+* Missing: Search old CGL bacterial pipeline database and find sample results missing from JASEN output dir.
+* Convert: Convert cgmlst.org target loci files to bed files.
+* Converge: Converge tuberculosis mutation catlogues in order to add FoHM and tbdb catalogues to the WHO catalogue.
+* Fix: Fix internal software's (bjorn's) generation of nextflow input csvs.
+* QC: Create post alignment qc output.
+
 ## Dependencies (latest)
 * python=3.11
 * pymongo

diff --git a/jasentool/fix.py b/jasentool/fix.py
@@ -13,6 +13,8 @@ def fix_csv(input_file, output_fpath):
         out_fpaths = []
         with open(input_file, 'r', encoding="utf-8") as csvfile:
             samples = pd.read_csv(csvfile)
+            samples.insert(2, 'sample_name', samples['id'])
+            samples['id'] = samples['id'].str.lower() + "_" + samples['sequencing_run'].str.lower()
             samples['assay'] = samples['species']
             for assay, df_assay in samples.groupby('assay'):
                 out_fpath = f'{os.path.splitext(output_fpath)[0]}_{assay}.csv'

diff --git a/jasentool_cronjob.sh b/jasentool_cronjob.sh
@@ -1,4 +1,11 @@
 #!/bin/bash
-seqrunid=$(head -2 /data/tmp/multi_microbiology.csv | tail -1 | cut -d',' -f7 | cut -d'/' -f5)
 
-conda-exec -n jasen jasentool fix --csv_file /data/tmp/multi_microbiology.csv --sh_file /data/tmp/multi_microbiology.sh -o ${seqrunid}_jasen.csv --remote_dir /fs1/ryan/pipelines/jasen/bjorn/ --remote --auto-start
+if [ "$1" == "missing" ]; then
+  current_date=$(date +"%y%m%d")
+  conda run -n jasentool jasentool missing --db_name cgviz --db_collection sample --analysis_dir /fs1/results_dev/jasen/saureus/analysis_result/ --restore_dir /fs1/ryan/pipelines/jasen/reruns/seqdata/ --restore_file /data/bnf/dev/ryan/pipelines/jasen/reruns/saureus_${current_date}.sh -o /data/bnf/dev/ryan/pipelines/jasen/reruns/saureus_${current_date}.csv
+  seqrunid=$(head -2 /data/tmp/multi_microbiology.csv | tail -1 | cut -d',' -f7 | cut -d'/' -f5)
+  conda run -n jasentool jasentool fix --csv_file /data/bnf/dev/ryan/pipelines/jasen/reruns/saureus_${current_date}.csv --sh_file /data/bnf/dev/ryan/pipelines/jasen/reruns/saureus_${current_date}.sh -o ${seqrunid}_jasen.csv --remote_dir /fs1/ryan/pipelines/jasen/bjorn/ --remote --auto-start
+else
+  seqrunid=$(head -2 /data/tmp/multi_microbiology.csv | tail -1 | cut -d',' -f7 | cut -d'/' -f5)
+  conda run -n jasentool jasentool fix --csv_file /data/tmp/multi_microbiology.csv --sh_file /data/tmp/multi_microbiology.sh -o ${seqrunid}_jasen.csv --remote_dir /fs1/ryan/pipelines/jasen/bjorn/ --remote --auto-start
+fi