From 6a70c3175ee0ce544ce27f143554add1416cdd50 Mon Sep 17 00:00:00 2001 From: Fattigman Date: Tue, 27 Jun 2023 08:34:52 +0200 Subject: [PATCH 01/12] fix val output --- modules/cellranger2multiqc/count/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/cellranger2multiqc/count/main.nf b/modules/cellranger2multiqc/count/main.nf index cd7c239..04fcded 100644 --- a/modules/cellranger2multiqc/count/main.nf +++ b/modules/cellranger2multiqc/count/main.nf @@ -4,7 +4,7 @@ process CELLRANGER_COUNT_TO_MULTIQC{ val project_id val pipeline output: - tuple val(project_id), emit: project_id + val(project_id), emit: project_id script: number_of_samples = sample_id.size() -1 From 8292954db10d5731d90ded48c62301318c7b4590 Mon Sep 17 00:00:00 2001 From: Fattigman Date: Tue, 27 Jun 2023 08:36:24 +0200 Subject: [PATCH 02/12] fix module output --- modules/cellranger2multiqc/multi/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/cellranger2multiqc/multi/main.nf b/modules/cellranger2multiqc/multi/main.nf index fc06504..fb04a0c 100644 --- a/modules/cellranger2multiqc/multi/main.nf +++ b/modules/cellranger2multiqc/multi/main.nf @@ -4,7 +4,7 @@ process CELLRANGER_MULTI_TO_MULTIQC{ val project_id output: val project_id, emit: project_id - tuple path("*_mqc.yaml") + path("*_mqc.yaml") script: """ # Create the multiqc folder From b55b62beee4979d9f5d2c63081d2097882541657 Mon Sep 17 00:00:00 2001 From: Fattigman Date: Tue, 27 Jun 2023 08:37:45 +0200 Subject: [PATCH 03/12] fixed path to py script --- modules/cellranger2multiqc/multi/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/cellranger2multiqc/multi/main.nf b/modules/cellranger2multiqc/multi/main.nf index fb04a0c..3bef68b 100644 --- a/modules/cellranger2multiqc/multi/main.nf +++ b/modules/cellranger2multiqc/multi/main.nf @@ -28,7 +28,7 @@ process CELLRANGER_MULTI_TO_MULTIQC{ # Convert the cellranger multi csv output to json for file in $params.outdir/$project_id/2_multi/*/outs/per_sample_outs/* do - python multimetric2mqc.py \$file/metrics_summary.csv \$(basename \$file) + python $projectDir/bin/multimetric2mqc.py \$file/metrics_summary.csv \$(basename \$file) done # Populate the multiqc config files From bd93bba7b7382837b9ce863954d33bfbc50661d0 Mon Sep 17 00:00:00 2001 From: Fattigman Date: Tue, 27 Jun 2023 15:32:27 +0200 Subject: [PATCH 04/12] split command substitution and expansion --- modules/cellranger2multiqc/multi/main.nf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/cellranger2multiqc/multi/main.nf b/modules/cellranger2multiqc/multi/main.nf index 3bef68b..c5f0bb3 100644 --- a/modules/cellranger2multiqc/multi/main.nf +++ b/modules/cellranger2multiqc/multi/main.nf @@ -36,7 +36,8 @@ process CELLRANGER_MULTI_TO_MULTIQC{ do for json in *\${file_name}.json do - json_name = \"\${\$(basename \$json)%_\${file_name}.json}\" + json_name=\"\$(basename \"\$json\" .json)\" + json_name=\"\${json_name%_\${file_name}}\" echo \"\${json_name}: \$(cat \${json})\" >> \${file_name}_mqc.yaml done done From a70ec1b2bbad7f13c8bd2ff80f095c4d2b4f656f Mon Sep 17 00:00:00 2001 From: Fattigman Date: Wed, 28 Jun 2023 09:41:36 +0200 Subject: [PATCH 05/12] Only uses Library and Cells for multi --- bin/multimetric2mqc.py | 2 -- modules/cellranger2multiqc/multi/main.nf | 20 ++++++-------------- 2 files changed, 6 insertions(+), 16 deletions(-) diff --git a/bin/multimetric2mqc.py b/bin/multimetric2mqc.py index 7c4b44e..a25c8a4 100644 --- a/bin/multimetric2mqc.py +++ b/bin/multimetric2mqc.py @@ -41,5 +41,3 @@ json.dump(cells, f) with open('{}_library.json'.format(sample_name), 'w') as f: json.dump(library, f) -with open('{}_other.json'.format(sample_name), 'w') as f: - json.dump(other, f) diff --git a/modules/cellranger2multiqc/multi/main.nf b/modules/cellranger2multiqc/multi/main.nf index c5f0bb3..7c9d506 100644 --- a/modules/cellranger2multiqc/multi/main.nf +++ b/modules/cellranger2multiqc/multi/main.nf @@ -11,7 +11,7 @@ process CELLRANGER_MULTI_TO_MULTIQC{ mkdir -p $params.outdir/$project_id/1_qc/multiqc # Create the multiqc config files - for file_name in cells library other + for file_name in cells library do echo \"\"\" id: \"single_cell_workflows_table\" @@ -19,10 +19,9 @@ process CELLRANGER_MULTI_TO_MULTIQC{ description: \"This table consists of the data gathered from cellranger output \" plot_type: \"table\" pconfig: - id: \"single_cell_workflows_table \$file_name \" - title: \"Single Cell Workflows \$file_name Stats\" - data: - \"\"\" > \${file_name}_mqc.yaml + id: \"single_cell_workflows_table \$file_name \" + title: \"Single Cell Workflows \$file_name Stats\" + data:\"\"\" > \${file_name}_mqc.yaml done # Convert the cellranger multi csv output to json @@ -32,22 +31,15 @@ process CELLRANGER_MULTI_TO_MULTIQC{ done # Populate the multiqc config files - for file_name in cells library other + for file_name in cells library do for json in *\${file_name}.json do json_name=\"\$(basename \"\$json\" .json)\" json_name=\"\${json_name%_\${file_name}}\" - echo \"\${json_name}: \$(cat \${json})\" >> \${file_name}_mqc.yaml + echo \" \${json_name}: \$(cat \${json})\" >> \${file_name}_mqc.yaml done done """ - - stub: - """ - touch cells_mqc.yaml - touch library_mqc.yaml - touch other_mqc.yaml - """ } \ No newline at end of file From b6e112c031e6146e347d0081585d7d6b5bcbfb0e Mon Sep 17 00:00:00 2001 From: Fattigman Date: Wed, 28 Jun 2023 09:41:44 +0200 Subject: [PATCH 06/12] added example data --- examples/CTG_SampleSheet.csv | 3 ++ modules/cellranger/multi/main.nf | 85 ++++++++++++++++++++++++++++++-- 2 files changed, 84 insertions(+), 4 deletions(-) diff --git a/examples/CTG_SampleSheet.csv b/examples/CTG_SampleSheet.csv index 2f21196..b1e9edd 100644 --- a/examples/CTG_SampleSheet.csv +++ b/examples/CTG_SampleSheet.csv @@ -28,6 +28,9 @@ sample-15,human,project8,n,n,n,atac,scatac-10x,n,n,n,n,n sample-16,human,project9,n,n,4,atac,scarc-10x,n,n,n,n,n sample-17,human,project9,n,n,4,gex,scarc-10x,n,n,n,n,n sample-18,human,project10,n,n,n,gex,scvisium-10x,cytaimage,darkimage,image,slide,slide_area +sample-19,human,project7,n,n,5,gex,scmulti-10x,n,n,n,n,n +sample-20,human,project7,n,n,5,tcr,scmulti-10x,n,n,n,n,n +sample-21,human,project7,n,n,5,bcr,scmulti-10x,n,n,n,n,n [FlexConfig_Data],,,,,,,, sample_id,probe_barcode,Sample_Source,,,,,, sample1,BC001|BC002,sample_7,,,,,, diff --git a/modules/cellranger/multi/main.nf b/modules/cellranger/multi/main.nf index c2a78dc..5549308 100644 --- a/modules/cellranger/multi/main.nf +++ b/modules/cellranger/multi/main.nf @@ -21,9 +21,86 @@ process MULTI { """ stub: """ - mkdir -p $sample_id/outs - touch $sample_id/outs/metrics_summary.csv - touch $sample_id/outs/web_summary.html - touch $sample_id/outs/cloupe.cloupe + sample_dir=$sample_id/outs/per_sample_outs/$sample_id + mkdir -p \$sample_dir + touch \$sample_dir/web_summary.html + touch \$sample_dir/cloupe.cloupe + echo \'\'\'Category,Library Type,Grouped By,Group Name,Metric Name,Metric Value +Cells,Gene Expression,,,Cells,"5,727" +Cells,Gene Expression,,,Confidently mapped reads in cells,84.20% +Cells,Gene Expression,,,Median UMI counts per cell,"1,585" +Cells,Gene Expression,,,Median genes per cell,744 +Cells,Gene Expression,,,Median reads per cell,"16,412" +Cells,Gene Expression,,,Total genes detected,"14,371" +Cells,VDJ B,,,Cells with productive IGH contig,78.52% +Cells,VDJ B,,,Cells with productive IGK contig,70.10% +Cells,VDJ B,,,Cells with productive IGL contig,6.19% +Cells,VDJ B,,,"Cells with productive V-J spanning (IGK, IGH) pair",49.31% +Cells,VDJ B,,,"Cells with productive V-J spanning (IGL, IGH) pair",5.50% +Cells,VDJ B,,,Cells with productive V-J spanning pair,54.47% +Cells,VDJ B,,,Estimated number of cells,582 +Cells,VDJ B,,,Median IGH UMIs per Cell,9 +Cells,VDJ B,,,Median IGK UMIs per Cell,10 +Cells,VDJ B,,,Median IGL UMIs per Cell,0 +Cells,VDJ B,,,Number of cells with productive V-J spanning pair,317 +Cells,VDJ B,,,Paired clonotype diversity,72.28 +Cells,VDJ T,,,Cells with productive TRA contig,79.03% +Cells,VDJ T,,,Cells with productive TRB contig,97.58% +Cells,VDJ T,,,"Cells with productive V-J spanning (TRA, TRB) pair",76.61% +Cells,VDJ T,,,Cells with productive V-J spanning pair,76.61% +Cells,VDJ T,,,Estimated number of cells,124 +Cells,VDJ T,,,Median TRA UMIs per Cell,3 +Cells,VDJ T,,,Median TRB UMIs per Cell,7 +Cells,VDJ T,,,Number of cells with productive V-J spanning pair,95 +Cells,VDJ T,,,Paired clonotype diversity,35.11 +Library,Gene Expression,Fastq ID,1a_522_3wbm,Number of reads,"172,468,563" +Library,Gene Expression,Fastq ID,1a_522_3wbm,Number of short reads skipped,0 +Library,Gene Expression,Fastq ID,1a_522_3wbm,Q30 RNA read,91.1% +Library,Gene Expression,Fastq ID,1a_522_3wbm,Q30 UMI,94.8% +Library,Gene Expression,Fastq ID,1a_522_3wbm,Q30 barcodes,95.8% +Library,Gene Expression,Physical library ID,GEX_1,Confidently mapped antisense,2.56% +Library,Gene Expression,Physical library ID,GEX_1,Confidently mapped reads in cells,84.20% +Library,Gene Expression,Physical library ID,GEX_1,Confidently mapped to exonic regions,21.93% +Library,Gene Expression,Physical library ID,GEX_1,Confidently mapped to genome,26.08% +Library,Gene Expression,Physical library ID,GEX_1,Confidently mapped to intergenic regions,2.50% +Library,Gene Expression,Physical library ID,GEX_1,Confidently mapped to intronic regions,1.65% +Library,Gene Expression,Physical library ID,GEX_1,Confidently mapped to transcriptome,20.79% +Library,Gene Expression,Physical library ID,GEX_1,Estimated number of cells,"5,727" +Library,Gene Expression,Physical library ID,GEX_1,Mapped to genome,36.49% +Library,Gene Expression,Physical library ID,GEX_1,Mean reads per cell,"30,115" +Library,Gene Expression,Physical library ID,GEX_1,Number of reads,"172,468,563" +Library,Gene Expression,Physical library ID,GEX_1,Number of reads in the library,"172,468,563" +Library,Gene Expression,Physical library ID,GEX_1,Sequencing saturation,50.84% +Library,Gene Expression,Physical library ID,GEX_1,Valid UMIs,99.85% +Library,Gene Expression,Physical library ID,GEX_1,Valid barcodes,92.38% +Library,VDJ B,Fastq ID,1a_522_3wbm_BCR,Number of reads,"30,607,267" +Library,VDJ B,Fastq ID,1a_522_3wbm_BCR,Number of short reads skipped,0 +Library,VDJ B,Fastq ID,1a_522_3wbm_BCR,Q30 RNA read,92.7% +Library,VDJ B,Fastq ID,1a_522_3wbm_BCR,Q30 UMI,94.5% +Library,VDJ B,Fastq ID,1a_522_3wbm_BCR,Q30 barcodes,95.6% +Library,VDJ B,Physical library ID,VDJB_1,Estimated number of cells,582 +Library,VDJ B,Physical library ID,VDJB_1,Fraction reads in cells,83.31% +Library,VDJ B,Physical library ID,VDJB_1,Mean reads per cell,"52,590" +Library,VDJ B,Physical library ID,VDJB_1,Mean used reads per cell,"9,638" +Library,VDJ B,Physical library ID,VDJB_1,Number of reads,"30,607,267" +Library,VDJ B,Physical library ID,VDJB_1,Reads mapped to IGH,22.57% +Library,VDJ B,Physical library ID,VDJB_1,Reads mapped to IGK,50.50% +Library,VDJ B,Physical library ID,VDJB_1,Reads mapped to IGL,14.06% +Library,VDJ B,Physical library ID,VDJB_1,Reads mapped to any V(D)J gene,87.14% +Library,VDJ B,Physical library ID,VDJB_1,Valid barcodes,95.44% +Library,VDJ T,Fastq ID,1a_522_3wbm_TCR,Number of reads,"39,674,884" +Library,VDJ T,Fastq ID,1a_522_3wbm_TCR,Number of short reads skipped,0 +Library,VDJ T,Fastq ID,1a_522_3wbm_TCR,Q30 RNA read,91.5% +Library,VDJ T,Fastq ID,1a_522_3wbm_TCR,Q30 UMI,94.7% +Library,VDJ T,Fastq ID,1a_522_3wbm_TCR,Q30 barcodes,95.4% +Library,VDJ T,Physical library ID,VDJT_1,Estimated number of cells,124 +Library,VDJ T,Physical library ID,VDJT_1,Fraction reads in cells,15.60% +Library,VDJ T,Physical library ID,VDJT_1,Mean reads per cell,"319,959" +Library,VDJ T,Physical library ID,VDJT_1,Mean used reads per cell,"35,979" +Library,VDJ T,Physical library ID,VDJT_1,Number of reads,"39,674,884" +Library,VDJ T,Physical library ID,VDJT_1,Reads mapped to TRA,7.91% +Library,VDJ T,Physical library ID,VDJT_1,Reads mapped to TRB,24.14% +Library,VDJ T,Physical library ID,VDJT_1,Reads mapped to any V(D)J gene,32.31% +Library,VDJ T,Physical library ID,VDJT_1,Valid barcodes,79.69%\'\'\' > \$sample_dir/metrics_summary.csv """ } \ No newline at end of file From 72c34bb8bbdcc98cfbd9ec625c21d930cfee0b29 Mon Sep 17 00:00:00 2001 From: Fattigman Date: Wed, 28 Jun 2023 10:30:51 +0200 Subject: [PATCH 07/12] fixed typo --- modules/multi_config/gen_multi_config/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/multi_config/gen_multi_config/main.nf b/modules/multi_config/gen_multi_config/main.nf index 59de1e2..458bbd3 100644 --- a/modules/multi_config/gen_multi_config/main.nf +++ b/modules/multi_config/gen_multi_config/main.nf @@ -17,7 +17,7 @@ process GENERATE_MULTI_CONFIG{ gex_ref=params.human } else if (sample_species == 'mouse') { vdj_ref=params.mouse_vdj - gex_ref=params.human + gex_ref=params.mouse } if (libtype.contains('bcr') || libtype.contains('tcr')){ vdj+='[vdj]\n' From 96157278bb164a3ff3ed167556af0e1dc19956b0 Mon Sep 17 00:00:00 2001 From: Fattigman Date: Wed, 28 Jun 2023 12:05:45 +0200 Subject: [PATCH 08/12] changed log path --- nextflow.config | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/nextflow.config b/nextflow.config index 29ff03d..2fb1dc9 100755 --- a/nextflow.config +++ b/nextflow.config @@ -43,8 +43,7 @@ params { // runOptions intron_mode="true" - // nextflow_log = "/projects/fs1/shared/Logs/nextflow.log" - nextflow_log = "/Users/jacobkarlstrom/Projects/singleCellWorkflows/nextflow.log" + nextflow_log = "/projects/fs1/shared/Logs/nextflow.log" } From 1fe1a7427d821ea0a4ed9fe135e4267f00d679e5 Mon Sep 17 00:00:00 2001 From: Fattigman Date: Wed, 28 Jun 2023 12:09:04 +0200 Subject: [PATCH 09/12] changed error message --- main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main.nf b/main.nf index ceae941..b74518a 100755 --- a/main.nf +++ b/main.nf @@ -53,8 +53,8 @@ workflow { workflow.onComplete { if (workflow.success) { - writetofile("${new Date()} [Information] singleCellWorkflow completed successfully #") + writetofile("${new Date()} [Information] singleCellWorkflow $params.samplesheet completed successfully #") } else { - writetofile("${new Date()} [Critical] singleCellWorkflow failed. Error message: ${workflow.errorMessage} #") + writetofile("${new Date()} [Critical] singleCellWorkflow failed. $params.samplesheet #") } } \ No newline at end of file From 58b8836bfbb1cf71d38c5fa152077a3ecc64b980 Mon Sep 17 00:00:00 2001 From: Fattigman Date: Thu, 29 Jun 2023 10:05:34 +0200 Subject: [PATCH 10/12] Now makes two tables --- modules/cellranger2multiqc/multi/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/cellranger2multiqc/multi/main.nf b/modules/cellranger2multiqc/multi/main.nf index 7c9d506..1f96f5c 100644 --- a/modules/cellranger2multiqc/multi/main.nf +++ b/modules/cellranger2multiqc/multi/main.nf @@ -14,12 +14,12 @@ process CELLRANGER_MULTI_TO_MULTIQC{ for file_name in cells library do echo \"\"\" - id: \"single_cell_workflows_table\" + id: \"single_cell_workflows_table_\${file_name}\" section_name : \"Single Cell Workflows \$file_name Stats\" description: \"This table consists of the data gathered from cellranger output \" plot_type: \"table\" pconfig: - id: \"single_cell_workflows_table \$file_name \" + id: \"single_cell_workflows_table_\${file_name} \" title: \"Single Cell Workflows \$file_name Stats\" data:\"\"\" > \${file_name}_mqc.yaml done From 22da17de34a2e03779430758b26285f0944d775b Mon Sep 17 00:00:00 2001 From: Fattigman Date: Thu, 29 Jun 2023 11:02:34 +0200 Subject: [PATCH 11/12] generalized script --- modules/cellranger2multiqc/count/main.nf | 33 ++++++++++-------------- 1 file changed, 13 insertions(+), 20 deletions(-) diff --git a/modules/cellranger2multiqc/count/main.nf b/modules/cellranger2multiqc/count/main.nf index 04fcded..886eab7 100644 --- a/modules/cellranger2multiqc/count/main.nf +++ b/modules/cellranger2multiqc/count/main.nf @@ -15,8 +15,6 @@ process CELLRANGER_COUNT_TO_MULTIQC{ summary="metrics_summary.csv" } """ - # Create the multiqc folder - mkdir -p $params.outdir/$project_id/1_qc/multiqc # Convert groovy variables into bash arrays sample_string=\$(echo $sample_id | tr -d '[]') @@ -24,24 +22,19 @@ process CELLRANGER_COUNT_TO_MULTIQC{ IFS=', ' read -ra sample_array <<< \"\$sample_string\" IFS=', ' read -ra project_array <<< \"\$project_string\" - data_section=\"\" - for i in {0..${number_of_samples}} - do - echo \"\"\" - id: \"single_cell_workflows_table\" - section_name : \"Single Cell Workflows Stats\" - description: \"This table consists of the data gathered from cellranger output \" - plot_type: \"table\" - pconfig: - id: \"single_cell_workflows_table\" - title: \"Single Cell Workflows Stats\" - data: - \"\"\" > $params.outdir/\${project_array[\$i]}/1_qc/multiqc/multiqc_mqc.yaml - data_section+=\" \${sample_array[\$i]}: \$(cat $params.outdir/$project_id/2_count/\${sample_array[\$i]}/out/$summary | python -c 'import csv, json, sys; print(json.dumps([dict(r) for r in csv.DictReader(sys.stdin)]))')\n \" - done - echo \"\"\" - \$data_section - \"\"\" >> $params.outdir/$project_id/1_qc/multiqc/multiqc_mqc.yaml + for i in {0..5}; do + if ! [ -f \"$params.outdir/\${project_array[\$i]}/1_qc/multiqc/multiqc_mqc.yaml\" ]; then + echo \"\"\"id: \"single_cell_workflows_table\" +section_name : \"Single Cell Workflows Stats\" +description: \"This table consists of the data gathered from cellranger output \" +plot_type: \"table\" +pconfig: + id: \"single_cell_workflows_table\" + title: \"Single Cell Workflows Stats\" +data:\"\"\" > \"$params.outdir/\${project_array[\$i]}/1_qc/multiqc/multiqc_mqc.yaml\" + fi + echo \" \${sample_array[\$i]}: $(cat $params.outdir/\${project_array[\$i]}/2_count/\${sample_array[\$i]}/outs/metrics_summary.csv | python -c 'import csv, json, sys; print(json.dumps([dict(r) for r in csv.DictReader(sys.stdin)]))')\" >> "$params.outdir/\${project_array[\$i]}/1_qc/multiqc/multiqc_mqc.yaml\" +done """ stub: From fbe8025b8e2f583767a54ace3b1dcb518d0de14f Mon Sep 17 00:00:00 2001 From: Fattigman Date: Thu, 29 Jun 2023 11:23:53 +0200 Subject: [PATCH 12/12] generalized web packing --- modules/pack_websummaries/main.nf | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/modules/pack_websummaries/main.nf b/modules/pack_websummaries/main.nf index 19131c2..768fb1f 100644 --- a/modules/pack_websummaries/main.nf +++ b/modules/pack_websummaries/main.nf @@ -26,7 +26,9 @@ process PACK_WEBSUMMARIES{ # append the file path and the sample name to the array file_paths+=("$file") dir=$(dirname "$file") - dir=$(dirname "$dir") + if ! [ $(dirname "$dir") == "per_sample_output" ]; then + dir=$(dirname "$dir") + fi result=$(basename "$dir") sample_names+=("$result") echo $file $result