Skip to content

Commit

Permalink
Merge pull request #27 from ctg-lund/dev
Browse files Browse the repository at this point in the history
Webpack and multiqc stable for both count and multi worfklows
  • Loading branch information
Fattigman authored Jun 29, 2023
2 parents badb484 + fbe8025 commit d732555
Show file tree
Hide file tree
Showing 9 changed files with 116 additions and 51 deletions.
2 changes: 0 additions & 2 deletions bin/multimetric2mqc.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,5 +41,3 @@
json.dump(cells, f)
with open('{}_library.json'.format(sample_name), 'w') as f:
json.dump(library, f)
with open('{}_other.json'.format(sample_name), 'w') as f:
json.dump(other, f)
3 changes: 3 additions & 0 deletions examples/CTG_SampleSheet.csv
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ sample-15,human,project8,n,n,n,atac,scatac-10x,n,n,n,n,n
sample-16,human,project9,n,n,4,atac,scarc-10x,n,n,n,n,n
sample-17,human,project9,n,n,4,gex,scarc-10x,n,n,n,n,n
sample-18,human,project10,n,n,n,gex,scvisium-10x,cytaimage,darkimage,image,slide,slide_area
sample-19,human,project7,n,n,5,gex,scmulti-10x,n,n,n,n,n
sample-20,human,project7,n,n,5,tcr,scmulti-10x,n,n,n,n,n
sample-21,human,project7,n,n,5,bcr,scmulti-10x,n,n,n,n,n
[FlexConfig_Data],,,,,,,,
sample_id,probe_barcode,Sample_Source,,,,,,
sample1,BC001|BC002,sample_7,,,,,,
Expand Down
4 changes: 2 additions & 2 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ workflow {

workflow.onComplete {
if (workflow.success) {
writetofile("${new Date()} [Information] singleCellWorkflow completed successfully #")
writetofile("${new Date()} [Information] singleCellWorkflow $params.samplesheet completed successfully #")
} else {
writetofile("${new Date()} [Critical] singleCellWorkflow failed. Error message: ${workflow.errorMessage} #")
writetofile("${new Date()} [Critical] singleCellWorkflow failed. $params.samplesheet #")
}
}
85 changes: 81 additions & 4 deletions modules/cellranger/multi/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,86 @@ process MULTI {
"""
stub:
"""
mkdir -p $sample_id/outs
touch $sample_id/outs/metrics_summary.csv
touch $sample_id/outs/web_summary.html
touch $sample_id/outs/cloupe.cloupe
sample_dir=$sample_id/outs/per_sample_outs/$sample_id
mkdir -p \$sample_dir
touch \$sample_dir/web_summary.html
touch \$sample_dir/cloupe.cloupe
echo \'\'\'Category,Library Type,Grouped By,Group Name,Metric Name,Metric Value
Cells,Gene Expression,,,Cells,"5,727"
Cells,Gene Expression,,,Confidently mapped reads in cells,84.20%
Cells,Gene Expression,,,Median UMI counts per cell,"1,585"
Cells,Gene Expression,,,Median genes per cell,744
Cells,Gene Expression,,,Median reads per cell,"16,412"
Cells,Gene Expression,,,Total genes detected,"14,371"
Cells,VDJ B,,,Cells with productive IGH contig,78.52%
Cells,VDJ B,,,Cells with productive IGK contig,70.10%
Cells,VDJ B,,,Cells with productive IGL contig,6.19%
Cells,VDJ B,,,"Cells with productive V-J spanning (IGK, IGH) pair",49.31%
Cells,VDJ B,,,"Cells with productive V-J spanning (IGL, IGH) pair",5.50%
Cells,VDJ B,,,Cells with productive V-J spanning pair,54.47%
Cells,VDJ B,,,Estimated number of cells,582
Cells,VDJ B,,,Median IGH UMIs per Cell,9
Cells,VDJ B,,,Median IGK UMIs per Cell,10
Cells,VDJ B,,,Median IGL UMIs per Cell,0
Cells,VDJ B,,,Number of cells with productive V-J spanning pair,317
Cells,VDJ B,,,Paired clonotype diversity,72.28
Cells,VDJ T,,,Cells with productive TRA contig,79.03%
Cells,VDJ T,,,Cells with productive TRB contig,97.58%
Cells,VDJ T,,,"Cells with productive V-J spanning (TRA, TRB) pair",76.61%
Cells,VDJ T,,,Cells with productive V-J spanning pair,76.61%
Cells,VDJ T,,,Estimated number of cells,124
Cells,VDJ T,,,Median TRA UMIs per Cell,3
Cells,VDJ T,,,Median TRB UMIs per Cell,7
Cells,VDJ T,,,Number of cells with productive V-J spanning pair,95
Cells,VDJ T,,,Paired clonotype diversity,35.11
Library,Gene Expression,Fastq ID,1a_522_3wbm,Number of reads,"172,468,563"
Library,Gene Expression,Fastq ID,1a_522_3wbm,Number of short reads skipped,0
Library,Gene Expression,Fastq ID,1a_522_3wbm,Q30 RNA read,91.1%
Library,Gene Expression,Fastq ID,1a_522_3wbm,Q30 UMI,94.8%
Library,Gene Expression,Fastq ID,1a_522_3wbm,Q30 barcodes,95.8%
Library,Gene Expression,Physical library ID,GEX_1,Confidently mapped antisense,2.56%
Library,Gene Expression,Physical library ID,GEX_1,Confidently mapped reads in cells,84.20%
Library,Gene Expression,Physical library ID,GEX_1,Confidently mapped to exonic regions,21.93%
Library,Gene Expression,Physical library ID,GEX_1,Confidently mapped to genome,26.08%
Library,Gene Expression,Physical library ID,GEX_1,Confidently mapped to intergenic regions,2.50%
Library,Gene Expression,Physical library ID,GEX_1,Confidently mapped to intronic regions,1.65%
Library,Gene Expression,Physical library ID,GEX_1,Confidently mapped to transcriptome,20.79%
Library,Gene Expression,Physical library ID,GEX_1,Estimated number of cells,"5,727"
Library,Gene Expression,Physical library ID,GEX_1,Mapped to genome,36.49%
Library,Gene Expression,Physical library ID,GEX_1,Mean reads per cell,"30,115"
Library,Gene Expression,Physical library ID,GEX_1,Number of reads,"172,468,563"
Library,Gene Expression,Physical library ID,GEX_1,Number of reads in the library,"172,468,563"
Library,Gene Expression,Physical library ID,GEX_1,Sequencing saturation,50.84%
Library,Gene Expression,Physical library ID,GEX_1,Valid UMIs,99.85%
Library,Gene Expression,Physical library ID,GEX_1,Valid barcodes,92.38%
Library,VDJ B,Fastq ID,1a_522_3wbm_BCR,Number of reads,"30,607,267"
Library,VDJ B,Fastq ID,1a_522_3wbm_BCR,Number of short reads skipped,0
Library,VDJ B,Fastq ID,1a_522_3wbm_BCR,Q30 RNA read,92.7%
Library,VDJ B,Fastq ID,1a_522_3wbm_BCR,Q30 UMI,94.5%
Library,VDJ B,Fastq ID,1a_522_3wbm_BCR,Q30 barcodes,95.6%
Library,VDJ B,Physical library ID,VDJB_1,Estimated number of cells,582
Library,VDJ B,Physical library ID,VDJB_1,Fraction reads in cells,83.31%
Library,VDJ B,Physical library ID,VDJB_1,Mean reads per cell,"52,590"
Library,VDJ B,Physical library ID,VDJB_1,Mean used reads per cell,"9,638"
Library,VDJ B,Physical library ID,VDJB_1,Number of reads,"30,607,267"
Library,VDJ B,Physical library ID,VDJB_1,Reads mapped to IGH,22.57%
Library,VDJ B,Physical library ID,VDJB_1,Reads mapped to IGK,50.50%
Library,VDJ B,Physical library ID,VDJB_1,Reads mapped to IGL,14.06%
Library,VDJ B,Physical library ID,VDJB_1,Reads mapped to any V(D)J gene,87.14%
Library,VDJ B,Physical library ID,VDJB_1,Valid barcodes,95.44%
Library,VDJ T,Fastq ID,1a_522_3wbm_TCR,Number of reads,"39,674,884"
Library,VDJ T,Fastq ID,1a_522_3wbm_TCR,Number of short reads skipped,0
Library,VDJ T,Fastq ID,1a_522_3wbm_TCR,Q30 RNA read,91.5%
Library,VDJ T,Fastq ID,1a_522_3wbm_TCR,Q30 UMI,94.7%
Library,VDJ T,Fastq ID,1a_522_3wbm_TCR,Q30 barcodes,95.4%
Library,VDJ T,Physical library ID,VDJT_1,Estimated number of cells,124
Library,VDJ T,Physical library ID,VDJT_1,Fraction reads in cells,15.60%
Library,VDJ T,Physical library ID,VDJT_1,Mean reads per cell,"319,959"
Library,VDJ T,Physical library ID,VDJT_1,Mean used reads per cell,"35,979"
Library,VDJ T,Physical library ID,VDJT_1,Number of reads,"39,674,884"
Library,VDJ T,Physical library ID,VDJT_1,Reads mapped to TRA,7.91%
Library,VDJ T,Physical library ID,VDJT_1,Reads mapped to TRB,24.14%
Library,VDJ T,Physical library ID,VDJT_1,Reads mapped to any V(D)J gene,32.31%
Library,VDJ T,Physical library ID,VDJT_1,Valid barcodes,79.69%\'\'\' > \$sample_dir/metrics_summary.csv
"""
}
35 changes: 14 additions & 21 deletions modules/cellranger2multiqc/count/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ process CELLRANGER_COUNT_TO_MULTIQC{
val project_id
val pipeline
output:
tuple val(project_id), emit: project_id
val(project_id), emit: project_id
script:

number_of_samples = sample_id.size() -1
Expand All @@ -15,33 +15,26 @@ process CELLRANGER_COUNT_TO_MULTIQC{
summary="metrics_summary.csv"
}
"""
# Create the multiqc folder
mkdir -p $params.outdir/$project_id/1_qc/multiqc
# Convert groovy variables into bash arrays
sample_string=\$(echo $sample_id | tr -d '[]')
project_string=\$(echo $project_id | tr -d '[]')
IFS=', ' read -ra sample_array <<< \"\$sample_string\"
IFS=', ' read -ra project_array <<< \"\$project_string\"
data_section=\"\"
for i in {0..${number_of_samples}}
do
echo \"\"\"
id: \"single_cell_workflows_table\"
section_name : \"Single Cell Workflows Stats\"
description: \"This table consists of the data gathered from cellranger output \"
plot_type: \"table\"
pconfig:
id: \"single_cell_workflows_table\"
title: \"Single Cell Workflows Stats\"
data:
\"\"\" > $params.outdir/\${project_array[\$i]}/1_qc/multiqc/multiqc_mqc.yaml
data_section+=\" \${sample_array[\$i]}: \$(cat $params.outdir/$project_id/2_count/\${sample_array[\$i]}/out/$summary | python -c 'import csv, json, sys; print(json.dumps([dict(r) for r in csv.DictReader(sys.stdin)]))')\n \"
done
echo \"\"\"
\$data_section
\"\"\" >> $params.outdir/$project_id/1_qc/multiqc/multiqc_mqc.yaml
for i in {0..5}; do
if ! [ -f \"$params.outdir/\${project_array[\$i]}/1_qc/multiqc/multiqc_mqc.yaml\" ]; then
echo \"\"\"id: \"single_cell_workflows_table\"
section_name : \"Single Cell Workflows Stats\"
description: \"This table consists of the data gathered from cellranger output \"
plot_type: \"table\"
pconfig:
id: \"single_cell_workflows_table\"
title: \"Single Cell Workflows Stats\"
data:\"\"\" > \"$params.outdir/\${project_array[\$i]}/1_qc/multiqc/multiqc_mqc.yaml\"
fi
echo \" \${sample_array[\$i]}: $(cat $params.outdir/\${project_array[\$i]}/2_count/\${sample_array[\$i]}/outs/metrics_summary.csv | python -c 'import csv, json, sys; print(json.dumps([dict(r) for r in csv.DictReader(sys.stdin)]))')\" >> "$params.outdir/\${project_array[\$i]}/1_qc/multiqc/multiqc_mqc.yaml\"
done
"""

stub:
Expand Down
29 changes: 11 additions & 18 deletions modules/cellranger2multiqc/multi/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,49 +4,42 @@ process CELLRANGER_MULTI_TO_MULTIQC{
val project_id
output:
val project_id, emit: project_id
tuple path("*_mqc.yaml")
path("*_mqc.yaml")
script:
"""
# Create the multiqc folder
mkdir -p $params.outdir/$project_id/1_qc/multiqc
# Create the multiqc config files
for file_name in cells library other
for file_name in cells library
do
echo \"\"\"
id: \"single_cell_workflows_table\"
id: \"single_cell_workflows_table_\${file_name}\"
section_name : \"Single Cell Workflows \$file_name Stats\"
description: \"This table consists of the data gathered from cellranger output \"
plot_type: \"table\"
pconfig:
id: \"single_cell_workflows_table \$file_name \"
title: \"Single Cell Workflows \$file_name Stats\"
data:
\"\"\" > \${file_name}_mqc.yaml
id: \"single_cell_workflows_table_\${file_name} \"
title: \"Single Cell Workflows \$file_name Stats\"
data:\"\"\" > \${file_name}_mqc.yaml
done
# Convert the cellranger multi csv output to json
for file in $params.outdir/$project_id/2_multi/*/outs/per_sample_outs/*
do
python multimetric2mqc.py \$file/metrics_summary.csv \$(basename \$file)
python $projectDir/bin/multimetric2mqc.py \$file/metrics_summary.csv \$(basename \$file)
done
# Populate the multiqc config files
for file_name in cells library other
for file_name in cells library
do
for json in *\${file_name}.json
do
json_name = \"\${\$(basename \$json)%_\${file_name}.json}\"
echo \"\${json_name}: \$(cat \${json})\" >> \${file_name}_mqc.yaml
json_name=\"\$(basename \"\$json\" .json)\"
json_name=\"\${json_name%_\${file_name}}\"
echo \" \${json_name}: \$(cat \${json})\" >> \${file_name}_mqc.yaml
done
done
"""

stub:
"""
touch cells_mqc.yaml
touch library_mqc.yaml
touch other_mqc.yaml
"""
}
2 changes: 1 addition & 1 deletion modules/multi_config/gen_multi_config/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ process GENERATE_MULTI_CONFIG{
gex_ref=params.human
} else if (sample_species == 'mouse') {
vdj_ref=params.mouse_vdj
gex_ref=params.human
gex_ref=params.mouse
}
if (libtype.contains('bcr') || libtype.contains('tcr')){
vdj+='[vdj]\n'
Expand Down
4 changes: 3 additions & 1 deletion modules/pack_websummaries/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@ process PACK_WEBSUMMARIES{
# append the file path and the sample name to the array
file_paths+=("$file")
dir=$(dirname "$file")
dir=$(dirname "$dir")
if ! [ $(dirname "$dir") == "per_sample_output" ]; then
dir=$(dirname "$dir")
fi
result=$(basename "$dir")
sample_names+=("$result")
echo $file $result
Expand Down
3 changes: 1 addition & 2 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,7 @@ params {
// runOptions
intron_mode="true"

// nextflow_log = "/projects/fs1/shared/Logs/nextflow.log"
nextflow_log = "/Users/jacobkarlstrom/Projects/singleCellWorkflows/nextflow.log"
nextflow_log = "/projects/fs1/shared/Logs/nextflow.log"

}

Expand Down

0 comments on commit d732555

Please sign in to comment.