Fix fix subarg

SMD-Bioinformatics-Lund · Jul 23, 2024 · f429e6d · f429e6d
1 parent 8b2ef63
commit f429e6d
Show file tree

Hide file tree

Showing 4 changed files with 9 additions and 7 deletions.
diff --git a/jasentool/cli.py b/jasentool/cli.py
@@ -43,7 +43,7 @@ def __csv_file(group, required, help):
 
 def __sh_file(group, required, help):
     """Add sh_file argument to group"""
-    group.add_argument('--sh_file', required=required, help=help)
+    group.add_argument('--sh_file', required=required, default=None, help=help)
 
 def __bam_file(group, required):
     """Add bam_file argument to group"""
@@ -268,13 +268,14 @@ def get_main_parser():
     with subparser(sub_parsers, 'fix', 'Fix bjorn microbiology csv file') as parser:
         with arg_group(parser, 'required named arguments') as group:
             __csv_file(group, required=True, help='path to bjorn csv file')
-            __sh_file(group, required=True, help='path to bjorn sh file')
             __output_file(group, required=True, help='path to fixed output csv file')
         with arg_group(parser, 'optional arguments') as group:
+            __sh_file(group, required=False, help='path to bjorn sh file')
             __remote_dir(group, required=False)
             __remote_hostname(group, required=False)
             __remote(group, required=False)
             __auto_start(group, required=False)
+            __alter_sample_id(group, required=False)
             __help(group)
 
     with subparser(sub_parsers, 'converge', 'Converge TB mutation catalogues') as parser:

diff --git a/jasentool/fix.py b/jasentool/fix.py
@@ -14,8 +14,9 @@ def fix_csv(input_file, output_fpath, alter_sample_id):
         with open(input_file, 'r', encoding="utf-8") as csvfile:
             samples = pd.read_csv(csvfile)
             samples.insert(2, 'sample_name', samples['id'])
-            samples['id'] = str(samples['clarity_sample_id'].str.lower() + "_" + samples['sequencing_run'].str.lower()) if alter_sample_id else samples['id']
-            samples['assay'] = samples['species']
+            samples['id'] = samples['clarity_sample_id'].str.lower() + "_" + samples['sequencing_run'].str.lower() if alter_sample_id else samples['id']
+            if "species" in samples.columns:
+                samples['assay'] = samples['species']
             for assay, df_assay in samples.groupby('assay'):
                 out_fpath = f'{os.path.splitext(output_fpath)[0]}_{assay}.csv'
                 df_assay.to_csv(out_fpath, encoding='utf-8', index=False)

diff --git a/jasentool/main.py b/jasentool/main.py
@@ -127,7 +127,7 @@ def fix(self, options):
         utils = Utils()
         fix = Fix()
         csv_files, assays = fix.fix_csv(options.csv_file, options.output_file, options.alter_sample_id)
-        batch_files = fix.fix_sh(options.sh_file, options.output_file, assays)
+        batch_files = fix.fix_sh(options.sh_file, options.output_file, assays) if options.sh_file else options.sh_file
         if (options.remote or options.auto_start) and batch_files:
             utils.copy_batch_and_csv_files(batch_files, csv_files, options.remote_dir, options.remote_hostname, options.auto_start or options.remote)
             if options.auto_start:

diff --git a/jasentool_cronjob.sh b/jasentool_cronjob.sh
@@ -4,8 +4,8 @@ if [ "$1" == "missing" ]; then
   current_date=$(date +"%y%m%d")
   conda run -n jasentool jasentool missing --db_name cgviz --db_collection sample --analysis_dir /fs1/results_dev/jasen/saureus/analysis_result/ --restore_dir /fs1/ryan/pipelines/jasen/reruns/seqdata/ --restore_file /data/bnf/dev/ryan/pipelines/jasen/reruns/saureus_${current_date}.sh -o /data/bnf/dev/ryan/pipelines/jasen/reruns/saureus_${current_date}.csv
   seqrunid=$(head -2 /data/tmp/multi_microbiology.csv | tail -1 | cut -d',' -f7 | cut -d'/' -f5)
-  conda run -n jasentool jasentool fix --csv_file /data/bnf/dev/ryan/pipelines/jasen/reruns/saureus_${current_date}.csv --sh_file /data/bnf/dev/ryan/pipelines/jasen/reruns/saureus_${current_date}.sh -o ${seqrunid}_jasen.csv --remote_dir /fs1/ryan/pipelines/jasen/bjorn/ --remote --auto-start
+  conda run -n jasentool jasentool fix --csv_file /data/bnf/dev/ryan/pipelines/jasen/reruns/saureus_${current_date}.csv --sh_file /data/bnf/dev/ryan/pipelines/jasen/reruns/saureus_${current_date}.sh -o ${seqrunid}_jasen.csv --remote_dir /fs1/ryan/pipelines/jasen/bjorn/ --remote --auto-start --alter_sample_id
 else
   seqrunid=$(head -2 /data/tmp/multi_microbiology.csv | tail -1 | cut -d',' -f7 | cut -d'/' -f5)
-  conda run -n jasentool jasentool fix --csv_file /data/tmp/multi_microbiology.csv --sh_file /data/tmp/multi_microbiology.sh -o ${seqrunid}_jasen.csv --remote_dir /fs1/ryan/pipelines/jasen/bjorn/ --remote --auto-start
+  conda run -n jasentool jasentool fix --csv_file /data/tmp/multi_microbiology.csv --sh_file /data/tmp/multi_microbiology.sh -o ${seqrunid}_jasen.csv --remote_dir /fs1/ryan/pipelines/jasen/bjorn/ --remote --auto-start --alter_sample_id
 fi