diff --git a/bin/countmetric2mqc.py b/bin/countmetric2mqc.py new file mode 100644 index 0000000..0a664c3 --- /dev/null +++ b/bin/countmetric2mqc.py @@ -0,0 +1,18 @@ +import sys + +# Check if input file name was provided +if len(sys.argv) < 3: + print('Usage: python countmetric2mqc.py file.csv sample_name output_mqc.yaml') + sys.exit(1) + +# Get input file name from command-line argument +input_file = sys.argv[1] +sample_name = sys.argv[2] +mqc_yaml = sys.argv[3] +# Initialize dictionaries for each category +with open(input_file, 'r') as file: + keys, values = file.readline().split(','), file.readline().split(',') + data = {k.strip():v.strip() for (k,v) in zip(keys,values)} +# Appends to an already existing mqc.yaml file +with open(mqc_yaml, 'a') as file: + file.write(f' {sample_name}: {data}\n') \ No newline at end of file diff --git a/examples/CTG_SampleSheet.csv b/examples/CTG_SampleSheet.csv index b1e9edd..2a45cdf 100644 --- a/examples/CTG_SampleSheet.csv +++ b/examples/CTG_SampleSheet.csv @@ -1,13 +1,5 @@ [Header],,,,,,,, IEMFileVersion,4,,,,,,, -[Data],,,,,,,, -Sample_ID,index,index2,Sample_Project,,,,, -sample-1,SI-TT-A1,SI-TT-A1,project1,,,,, -sample-2,SI-TT-A2,SI-TT-A2,project1,,,,, -sample-3,SI-TT-A3,SI-TT-A3,project2,,,,, -sample-4,SI-TT-A4,SI-TT-A4,project3,,,,, -sample-5,SI-TT-A5,SI-TT-A5,project3,,,,, -sample-6,SI-TT-A6,SI-TT-A6,project3,,,,, [10X_Data],,,,,,,, Sample_ID,Sample_Species,Sample_Project,force,agg,sample_pair,libtype,pipeline,cytaimage,darkimage,image,slide,slide_area sample-1,human,project1,n,n,n,gex,scrna-10x,n,n,n,n,n @@ -31,6 +23,8 @@ sample-18,human,project10,n,n,n,gex,scvisium-10x,cytaimage,darkimage,image,slide sample-19,human,project7,n,n,5,gex,scmulti-10x,n,n,n,n,n sample-20,human,project7,n,n,5,tcr,scmulti-10x,n,n,n,n,n sample-21,human,project7,n,n,5,bcr,scmulti-10x,n,n,n,n,n +sample-22,human,project9,n,n,6,atac,scarc-10x,n,n,n,n,n +sample-23,human,project9,n,n,6,gex,scarc-10x,n,n,n,n,n [FlexConfig_Data],,,,,,,, sample_id,probe_barcode,Sample_Source,,,,,, sample1,BC001|BC002,sample_7,,,,,, diff --git a/modules/cellranger/multi/main.nf b/modules/cellranger/multi/main.nf index 5549308..e3a4ff5 100644 --- a/modules/cellranger/multi/main.nf +++ b/modules/cellranger/multi/main.nf @@ -21,10 +21,14 @@ process MULTI { """ stub: """ - sample_dir=$sample_id/outs/per_sample_outs/$sample_id - mkdir -p \$sample_dir - touch \$sample_dir/web_summary.html - touch \$sample_dir/cloupe.cloupe + sample_dir_1=$sample_id/outs/per_sample_outs/${sample_id}_1 + sample_dir_2=$sample_id/outs/per_sample_outs/${sample_id}_2 + mkdir -p \$sample_dir_1 + mkdir -p \$sample_dir_2 + touch \$sample_dir_1/web_summary.html + touch \$sample_dir_2/web_summary.html + touch \$sample_dir_1/cloupe.cloupe + touch \$sample_dir_2/cloupe.cloupe echo \'\'\'Category,Library Type,Grouped By,Group Name,Metric Name,Metric Value Cells,Gene Expression,,,Cells,"5,727" Cells,Gene Expression,,,Confidently mapped reads in cells,84.20% @@ -37,70 +41,37 @@ Cells,VDJ B,,,Cells with productive IGK contig,70.10% Cells,VDJ B,,,Cells with productive IGL contig,6.19% Cells,VDJ B,,,"Cells with productive V-J spanning (IGK, IGH) pair",49.31% Cells,VDJ B,,,"Cells with productive V-J spanning (IGL, IGH) pair",5.50% -Cells,VDJ B,,,Cells with productive V-J spanning pair,54.47% -Cells,VDJ B,,,Estimated number of cells,582 -Cells,VDJ B,,,Median IGH UMIs per Cell,9 -Cells,VDJ B,,,Median IGK UMIs per Cell,10 -Cells,VDJ B,,,Median IGL UMIs per Cell,0 -Cells,VDJ B,,,Number of cells with productive V-J spanning pair,317 Cells,VDJ B,,,Paired clonotype diversity,72.28 Cells,VDJ T,,,Cells with productive TRA contig,79.03% Cells,VDJ T,,,Cells with productive TRB contig,97.58% Cells,VDJ T,,,"Cells with productive V-J spanning (TRA, TRB) pair",76.61% Cells,VDJ T,,,Cells with productive V-J spanning pair,76.61% -Cells,VDJ T,,,Estimated number of cells,124 -Cells,VDJ T,,,Median TRA UMIs per Cell,3 -Cells,VDJ T,,,Median TRB UMIs per Cell,7 -Cells,VDJ T,,,Number of cells with productive V-J spanning pair,95 -Cells,VDJ T,,,Paired clonotype diversity,35.11 Library,Gene Expression,Fastq ID,1a_522_3wbm,Number of reads,"172,468,563" Library,Gene Expression,Fastq ID,1a_522_3wbm,Number of short reads skipped,0 Library,Gene Expression,Fastq ID,1a_522_3wbm,Q30 RNA read,91.1% -Library,Gene Expression,Fastq ID,1a_522_3wbm,Q30 UMI,94.8% -Library,Gene Expression,Fastq ID,1a_522_3wbm,Q30 barcodes,95.8% -Library,Gene Expression,Physical library ID,GEX_1,Confidently mapped antisense,2.56% -Library,Gene Expression,Physical library ID,GEX_1,Confidently mapped reads in cells,84.20% -Library,Gene Expression,Physical library ID,GEX_1,Confidently mapped to exonic regions,21.93% -Library,Gene Expression,Physical library ID,GEX_1,Confidently mapped to genome,26.08% -Library,Gene Expression,Physical library ID,GEX_1,Confidently mapped to intergenic regions,2.50% -Library,Gene Expression,Physical library ID,GEX_1,Confidently mapped to intronic regions,1.65% -Library,Gene Expression,Physical library ID,GEX_1,Confidently mapped to transcriptome,20.79% -Library,Gene Expression,Physical library ID,GEX_1,Estimated number of cells,"5,727" -Library,Gene Expression,Physical library ID,GEX_1,Mapped to genome,36.49% -Library,Gene Expression,Physical library ID,GEX_1,Mean reads per cell,"30,115" -Library,Gene Expression,Physical library ID,GEX_1,Number of reads,"172,468,563" -Library,Gene Expression,Physical library ID,GEX_1,Number of reads in the library,"172,468,563" -Library,Gene Expression,Physical library ID,GEX_1,Sequencing saturation,50.84% -Library,Gene Expression,Physical library ID,GEX_1,Valid UMIs,99.85% -Library,Gene Expression,Physical library ID,GEX_1,Valid barcodes,92.38% -Library,VDJ B,Fastq ID,1a_522_3wbm_BCR,Number of reads,"30,607,267" -Library,VDJ B,Fastq ID,1a_522_3wbm_BCR,Number of short reads skipped,0 -Library,VDJ B,Fastq ID,1a_522_3wbm_BCR,Q30 RNA read,92.7% -Library,VDJ B,Fastq ID,1a_522_3wbm_BCR,Q30 UMI,94.5% -Library,VDJ B,Fastq ID,1a_522_3wbm_BCR,Q30 barcodes,95.6% -Library,VDJ B,Physical library ID,VDJB_1,Estimated number of cells,582 -Library,VDJ B,Physical library ID,VDJB_1,Fraction reads in cells,83.31% -Library,VDJ B,Physical library ID,VDJB_1,Mean reads per cell,"52,590" -Library,VDJ B,Physical library ID,VDJB_1,Mean used reads per cell,"9,638" -Library,VDJ B,Physical library ID,VDJB_1,Number of reads,"30,607,267" -Library,VDJ B,Physical library ID,VDJB_1,Reads mapped to IGH,22.57% -Library,VDJ B,Physical library ID,VDJB_1,Reads mapped to IGK,50.50% -Library,VDJ B,Physical library ID,VDJB_1,Reads mapped to IGL,14.06% -Library,VDJ B,Physical library ID,VDJB_1,Reads mapped to any V(D)J gene,87.14% -Library,VDJ B,Physical library ID,VDJB_1,Valid barcodes,95.44% -Library,VDJ T,Fastq ID,1a_522_3wbm_TCR,Number of reads,"39,674,884" -Library,VDJ T,Fastq ID,1a_522_3wbm_TCR,Number of short reads skipped,0 -Library,VDJ T,Fastq ID,1a_522_3wbm_TCR,Q30 RNA read,91.5% -Library,VDJ T,Fastq ID,1a_522_3wbm_TCR,Q30 UMI,94.7% -Library,VDJ T,Fastq ID,1a_522_3wbm_TCR,Q30 barcodes,95.4% -Library,VDJ T,Physical library ID,VDJT_1,Estimated number of cells,124 -Library,VDJ T,Physical library ID,VDJT_1,Fraction reads in cells,15.60% -Library,VDJ T,Physical library ID,VDJT_1,Mean reads per cell,"319,959" -Library,VDJ T,Physical library ID,VDJT_1,Mean used reads per cell,"35,979" -Library,VDJ T,Physical library ID,VDJT_1,Number of reads,"39,674,884" -Library,VDJ T,Physical library ID,VDJT_1,Reads mapped to TRA,7.91% -Library,VDJ T,Physical library ID,VDJT_1,Reads mapped to TRB,24.14% -Library,VDJ T,Physical library ID,VDJT_1,Reads mapped to any V(D)J gene,32.31% -Library,VDJ T,Physical library ID,VDJT_1,Valid barcodes,79.69%\'\'\' > \$sample_dir/metrics_summary.csv +Library,Gene Expression,Fastq ID,1a_522_3wbm,Q30 UMI,94.8%\'\'\' > \$sample_dir_1/metrics_summary.csv + +echo \'\'\'Category,Library Type,Grouped By,Group Name,Metric Name,Metric Value +Cells,Gene Expression,,,Cells,"5,727" +Cells,Gene Expression,,,Confidently mapped reads in cells,84.20% +Cells,Gene Expression,,,Median UMI counts per cell,"1,585" +Cells,Gene Expression,,,Median genes per cell,744 +Cells,Gene Expression,,,Median reads per cell,"16,412" +Cells,Gene Expression,,,Total genes detected,"14,371" +Cells,VDJ B,,,Cells with productive IGH contig,78.52% +Cells,VDJ B,,,Cells with productive IGK contig,70.10% +Cells,VDJ B,,,Cells with productive IGL contig,6.19% +Cells,VDJ B,,,"Cells with productive V-J spanning (IGK, IGH) pair",49.31% +Cells,VDJ B,,,"Cells with productive V-J spanning (IGL, IGH) pair",5.50% +Cells,VDJ B,,,Paired clonotype diversity,72.28 +Cells,VDJ T,,,Cells with productive TRA contig,79.03% +Cells,VDJ T,,,Cells with productive TRB contig,97.58% +Cells,VDJ T,,,"Cells with productive V-J spanning (TRA, TRB) pair",76.61% +Cells,VDJ T,,,Cells with productive V-J spanning pair,76.61% +Library,Gene Expression,Fastq ID,1a_522_3wbm,Number of reads,"172,468,563" +Library,Gene Expression,Fastq ID,1a_522_3wbm,Number of short reads skipped,0 +Library,Gene Expression,Fastq ID,1a_522_3wbm,Q30 RNA read,91.1% +Library,Gene Expression,Fastq ID,1a_522_3wbm,Q30 UMI,94.8%\'\'\' > \$sample_dir_2/metrics_summary.csv + """ } \ No newline at end of file diff --git a/modules/cellranger2multiqc/count/main.nf b/modules/cellranger2multiqc/count/main.nf index 9a3a5ff..f731df2 100644 --- a/modules/cellranger2multiqc/count/main.nf +++ b/modules/cellranger2multiqc/count/main.nf @@ -22,10 +22,11 @@ process CELLRANGER_COUNT_TO_MULTIQC{ IFS=', ' read -ra project_array <<< \"\$project_string\" # Checks if mqc_yaml exists, if not create it for i in {0..$number_of_samples}; do + echo \$i if ! [ -f \"$params.outdir/\${project_array[\$i]}/1_qc/multiqc/multiqc_mqc.yaml\" ]; then mkdir -p \"$params.outdir/\${project_array[\$i]}/1_qc/multiqc/\" echo \"\"\"id: \"single_cell_workflows_table\" -section_name : \"Single Cell Workflows Stats\" +section_name : \"Single Cell Workflows Count Stats\" description: \"This table consists of the data gathered from cellranger output \" plot_type: \"table\" pconfig: @@ -34,7 +35,7 @@ pconfig: data:\"\"\" > \"$params.outdir/\${project_array[\$i]}/1_qc/multiqc/multiqc_mqc.yaml\" fi # Extends mqc_yaml file with sample information - echo \" \${sample_array[\$i]}: \$(cat $params.outdir/\${project_array[\$i]}/2_count/\${sample_array[\$i]}/outs/$summary | python -c 'import csv, json, sys; print(json.dumps([dict(r) for r in csv.DictReader(sys.stdin)]))')\" | tr -d '[]' >> \"$params.outdir/\${project_array[\$i]}/1_qc/multiqc/multiqc_mqc.yaml\" + python $projectDir/bin/countmetric2mqc.py $params.outdir/\${project_array[\$i]}/2_count/\${sample_array[\$i]}/outs/$summary \${sample_array[\$i]} $params.outdir/\${project_array[\$i]}/1_qc/multiqc/multiqc_mqc.yaml done """ } \ No newline at end of file diff --git a/modules/split_sheet/main.nf b/modules/split_sheet/main.nf index c185d91..1424139 100644 --- a/modules/split_sheet/main.nf +++ b/modules/split_sheet/main.nf @@ -5,7 +5,7 @@ process SPLITSHEET { output: path "10X_Data.csv", emit: data path "Data.csv", emit: pipe_data, optional: true - path "10X_Flex_Data.csv", emit: flex, optional: true + path "FlexConfig_Data.csv", emit: flex, optional: true path "FeatureReference_Data.csv", emit: feature_reference, optional: true shell: ''' diff --git a/subworkflows/finish.nf b/subworkflows/finish.nf index 83b21c1..5cf1a4d 100644 --- a/subworkflows/finish.nf +++ b/subworkflows/finish.nf @@ -24,5 +24,4 @@ workflow FINISH_PROJECTS { md5sum_ch = MD5SUM(publish_ch) deliver_auto_ch = DELIVER_PROJ(md5sum_ch.project_id) } - print params.ctg_mode } \ No newline at end of file diff --git a/templates/manifest.html b/templates/manifest.html index b2fff9c..18465c0 100644 --- a/templates/manifest.html +++ b/templates/manifest.html @@ -83,14 +83,14 @@

Nextflow manifest

-

Pipeline release: 1.4.1

+

Pipeline release: 1.4.2

Subworkflow: xxSubWorkflowxx

Maintainer: jacob.karlstrom@med.lu.se

Description: Your data has been processed by the nextflow pipeline singleCellWorkflows. If you want more information on how your data was processed, follow the link below and navigate to your release!

- +