From 373b889f6ba2f05497182a3295b04e49718a5cd3 Mon Sep 17 00:00:00 2001 From: Fattigman Date: Wed, 16 Aug 2023 18:14:48 +0200 Subject: [PATCH 01/11] shortened stub data --- modules/cellranger/multi/main.nf | 57 +------------------------------- 1 file changed, 1 insertion(+), 56 deletions(-) diff --git a/modules/cellranger/multi/main.nf b/modules/cellranger/multi/main.nf index 5549308..ff6050e 100644 --- a/modules/cellranger/multi/main.nf +++ b/modules/cellranger/multi/main.nf @@ -37,70 +37,15 @@ Cells,VDJ B,,,Cells with productive IGK contig,70.10% Cells,VDJ B,,,Cells with productive IGL contig,6.19% Cells,VDJ B,,,"Cells with productive V-J spanning (IGK, IGH) pair",49.31% Cells,VDJ B,,,"Cells with productive V-J spanning (IGL, IGH) pair",5.50% -Cells,VDJ B,,,Cells with productive V-J spanning pair,54.47% -Cells,VDJ B,,,Estimated number of cells,582 -Cells,VDJ B,,,Median IGH UMIs per Cell,9 -Cells,VDJ B,,,Median IGK UMIs per Cell,10 -Cells,VDJ B,,,Median IGL UMIs per Cell,0 -Cells,VDJ B,,,Number of cells with productive V-J spanning pair,317 Cells,VDJ B,,,Paired clonotype diversity,72.28 Cells,VDJ T,,,Cells with productive TRA contig,79.03% Cells,VDJ T,,,Cells with productive TRB contig,97.58% Cells,VDJ T,,,"Cells with productive V-J spanning (TRA, TRB) pair",76.61% Cells,VDJ T,,,Cells with productive V-J spanning pair,76.61% -Cells,VDJ T,,,Estimated number of cells,124 -Cells,VDJ T,,,Median TRA UMIs per Cell,3 -Cells,VDJ T,,,Median TRB UMIs per Cell,7 -Cells,VDJ T,,,Number of cells with productive V-J spanning pair,95 -Cells,VDJ T,,,Paired clonotype diversity,35.11 Library,Gene Expression,Fastq ID,1a_522_3wbm,Number of reads,"172,468,563" Library,Gene Expression,Fastq ID,1a_522_3wbm,Number of short reads skipped,0 Library,Gene Expression,Fastq ID,1a_522_3wbm,Q30 RNA read,91.1% Library,Gene Expression,Fastq ID,1a_522_3wbm,Q30 UMI,94.8% -Library,Gene Expression,Fastq ID,1a_522_3wbm,Q30 barcodes,95.8% -Library,Gene Expression,Physical library ID,GEX_1,Confidently mapped antisense,2.56% -Library,Gene Expression,Physical library ID,GEX_1,Confidently mapped reads in cells,84.20% -Library,Gene Expression,Physical library ID,GEX_1,Confidently mapped to exonic regions,21.93% -Library,Gene Expression,Physical library ID,GEX_1,Confidently mapped to genome,26.08% -Library,Gene Expression,Physical library ID,GEX_1,Confidently mapped to intergenic regions,2.50% -Library,Gene Expression,Physical library ID,GEX_1,Confidently mapped to intronic regions,1.65% -Library,Gene Expression,Physical library ID,GEX_1,Confidently mapped to transcriptome,20.79% -Library,Gene Expression,Physical library ID,GEX_1,Estimated number of cells,"5,727" -Library,Gene Expression,Physical library ID,GEX_1,Mapped to genome,36.49% -Library,Gene Expression,Physical library ID,GEX_1,Mean reads per cell,"30,115" -Library,Gene Expression,Physical library ID,GEX_1,Number of reads,"172,468,563" -Library,Gene Expression,Physical library ID,GEX_1,Number of reads in the library,"172,468,563" -Library,Gene Expression,Physical library ID,GEX_1,Sequencing saturation,50.84% -Library,Gene Expression,Physical library ID,GEX_1,Valid UMIs,99.85% -Library,Gene Expression,Physical library ID,GEX_1,Valid barcodes,92.38% -Library,VDJ B,Fastq ID,1a_522_3wbm_BCR,Number of reads,"30,607,267" -Library,VDJ B,Fastq ID,1a_522_3wbm_BCR,Number of short reads skipped,0 -Library,VDJ B,Fastq ID,1a_522_3wbm_BCR,Q30 RNA read,92.7% -Library,VDJ B,Fastq ID,1a_522_3wbm_BCR,Q30 UMI,94.5% -Library,VDJ B,Fastq ID,1a_522_3wbm_BCR,Q30 barcodes,95.6% -Library,VDJ B,Physical library ID,VDJB_1,Estimated number of cells,582 -Library,VDJ B,Physical library ID,VDJB_1,Fraction reads in cells,83.31% -Library,VDJ B,Physical library ID,VDJB_1,Mean reads per cell,"52,590" -Library,VDJ B,Physical library ID,VDJB_1,Mean used reads per cell,"9,638" -Library,VDJ B,Physical library ID,VDJB_1,Number of reads,"30,607,267" -Library,VDJ B,Physical library ID,VDJB_1,Reads mapped to IGH,22.57% -Library,VDJ B,Physical library ID,VDJB_1,Reads mapped to IGK,50.50% -Library,VDJ B,Physical library ID,VDJB_1,Reads mapped to IGL,14.06% -Library,VDJ B,Physical library ID,VDJB_1,Reads mapped to any V(D)J gene,87.14% -Library,VDJ B,Physical library ID,VDJB_1,Valid barcodes,95.44% -Library,VDJ T,Fastq ID,1a_522_3wbm_TCR,Number of reads,"39,674,884" -Library,VDJ T,Fastq ID,1a_522_3wbm_TCR,Number of short reads skipped,0 -Library,VDJ T,Fastq ID,1a_522_3wbm_TCR,Q30 RNA read,91.5% -Library,VDJ T,Fastq ID,1a_522_3wbm_TCR,Q30 UMI,94.7% -Library,VDJ T,Fastq ID,1a_522_3wbm_TCR,Q30 barcodes,95.4% -Library,VDJ T,Physical library ID,VDJT_1,Estimated number of cells,124 -Library,VDJ T,Physical library ID,VDJT_1,Fraction reads in cells,15.60% -Library,VDJ T,Physical library ID,VDJT_1,Mean reads per cell,"319,959" -Library,VDJ T,Physical library ID,VDJT_1,Mean used reads per cell,"35,979" -Library,VDJ T,Physical library ID,VDJT_1,Number of reads,"39,674,884" -Library,VDJ T,Physical library ID,VDJT_1,Reads mapped to TRA,7.91% -Library,VDJ T,Physical library ID,VDJT_1,Reads mapped to TRB,24.14% -Library,VDJ T,Physical library ID,VDJT_1,Reads mapped to any V(D)J gene,32.31% -Library,VDJ T,Physical library ID,VDJT_1,Valid barcodes,79.69%\'\'\' > \$sample_dir/metrics_summary.csv +\'\'\' > \$sample_dir/metrics_summary.csv """ } \ No newline at end of file From cdb0d03d18c746e12c715d8ab4175cf880770a49 Mon Sep 17 00:00:00 2001 From: Fattigman Date: Wed, 16 Aug 2023 18:17:17 +0200 Subject: [PATCH 02/11] two plex stub data --- modules/cellranger/multi/main.nf | 38 +++++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/modules/cellranger/multi/main.nf b/modules/cellranger/multi/main.nf index ff6050e..67de7f5 100644 --- a/modules/cellranger/multi/main.nf +++ b/modules/cellranger/multi/main.nf @@ -21,10 +21,14 @@ process MULTI { """ stub: """ - sample_dir=$sample_id/outs/per_sample_outs/$sample_id - mkdir -p \$sample_dir - touch \$sample_dir/web_summary.html - touch \$sample_dir/cloupe.cloupe + sample_dir_1=$sample_id/outs/per_sample_outs/${sample_id}_1 + sample_dir_2=$sample_id/outs/per_sample_outs/${sample_id}_2 + mkdir -p \$sample_dir_1 + mkdir -p \$sample_dir_2 + touch \$sample_dir_1/web_summary.html + touch \$sample_dir_2/web_summary.html + touch \$sample_dir_1/cloupe.cloupe + touch \$sample_dir_2/cloupe.cloupe echo \'\'\'Category,Library Type,Grouped By,Group Name,Metric Name,Metric Value Cells,Gene Expression,,,Cells,"5,727" Cells,Gene Expression,,,Confidently mapped reads in cells,84.20% @@ -46,6 +50,30 @@ Library,Gene Expression,Fastq ID,1a_522_3wbm,Number of reads,"172,468,563" Library,Gene Expression,Fastq ID,1a_522_3wbm,Number of short reads skipped,0 Library,Gene Expression,Fastq ID,1a_522_3wbm,Q30 RNA read,91.1% Library,Gene Expression,Fastq ID,1a_522_3wbm,Q30 UMI,94.8% -\'\'\' > \$sample_dir/metrics_summary.csv +\'\'\' > \$sample_dir_1/metrics_summary.csv + +echo \'\'\'Category,Library Type,Grouped By,Group Name,Metric Name,Metric Value +Cells,Gene Expression,,,Cells,"5,727" +Cells,Gene Expression,,,Confidently mapped reads in cells,84.20% +Cells,Gene Expression,,,Median UMI counts per cell,"1,585" +Cells,Gene Expression,,,Median genes per cell,744 +Cells,Gene Expression,,,Median reads per cell,"16,412" +Cells,Gene Expression,,,Total genes detected,"14,371" +Cells,VDJ B,,,Cells with productive IGH contig,78.52% +Cells,VDJ B,,,Cells with productive IGK contig,70.10% +Cells,VDJ B,,,Cells with productive IGL contig,6.19% +Cells,VDJ B,,,"Cells with productive V-J spanning (IGK, IGH) pair",49.31% +Cells,VDJ B,,,"Cells with productive V-J spanning (IGL, IGH) pair",5.50% +Cells,VDJ B,,,Paired clonotype diversity,72.28 +Cells,VDJ T,,,Cells with productive TRA contig,79.03% +Cells,VDJ T,,,Cells with productive TRB contig,97.58% +Cells,VDJ T,,,"Cells with productive V-J spanning (TRA, TRB) pair",76.61% +Cells,VDJ T,,,Cells with productive V-J spanning pair,76.61% +Library,Gene Expression,Fastq ID,1a_522_3wbm,Number of reads,"172,468,563" +Library,Gene Expression,Fastq ID,1a_522_3wbm,Number of short reads skipped,0 +Library,Gene Expression,Fastq ID,1a_522_3wbm,Q30 RNA read,91.1% +Library,Gene Expression,Fastq ID,1a_522_3wbm,Q30 UMI,94.8% +\'\'\' > \$sample_dir_2/metrics_summary.csv + """ } \ No newline at end of file From b943341e0dc2c5f4e1421c718db28ca433c409e6 Mon Sep 17 00:00:00 2001 From: Fattigman Date: Wed, 23 Aug 2023 10:02:27 +0200 Subject: [PATCH 03/11] updated vdj reference path --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index aaded70..f3c46ad 100755 --- a/nextflow.config +++ b/nextflow.config @@ -18,7 +18,7 @@ params { mixed_genome="$refdir/cellranger/hg38_mm10/refdata-gex-GRCh38-and-mm10-2020-A" // VDJ references - human_vdj="$refdir/cellranger/vdj/refdata-cellranger-vdj-GRCh38-alts-ensembl-7.0.0" + human_vdj="$refdir/cellranger/vdj/refdata-cellranger-vdj-GRCh38-alts-ensembl-7.1.0" mouse_vdj="$refdir/cellranger/vdj/refdata-cellranger-vdj-GRCm38-alts-ensembl-7.0.0" // Probe sets From 27a664995fc43b17b619616d5a1bb0bfc8c799ff Mon Sep 17 00:00:00 2001 From: Fattigman Date: Wed, 23 Aug 2023 10:02:48 +0200 Subject: [PATCH 04/11] update manifest --- templates/manifest.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/manifest.html b/templates/manifest.html index c229c98..b2fff9c 100644 --- a/templates/manifest.html +++ b/templates/manifest.html @@ -83,7 +83,7 @@

Nextflow manifest

-

Pipeline release: 1.4.0

+

Pipeline release: 1.4.1

Subworkflow: xxSubWorkflowxx

Maintainer: jacob.karlstrom@med.lu.se

Description: Your data has been processed by the nextflow pipeline singleCellWorkflows. If you want more information on how your data was processed, follow the link below and navigate to your release!

From 23d04137ffa6394584041290ba887fe5942061ea Mon Sep 17 00:00:00 2001 From: Fattigman Date: Mon, 28 Aug 2023 12:22:42 +0200 Subject: [PATCH 05/11] removed debugging print --- subworkflows/finish.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/subworkflows/finish.nf b/subworkflows/finish.nf index 83b21c1..5cf1a4d 100644 --- a/subworkflows/finish.nf +++ b/subworkflows/finish.nf @@ -24,5 +24,4 @@ workflow FINISH_PROJECTS { md5sum_ch = MD5SUM(publish_ch) deliver_auto_ch = DELIVER_PROJ(md5sum_ch.project_id) } - print params.ctg_mode } \ No newline at end of file From 998bc0323ef7a2673e181369c478f136dd18a8bb Mon Sep 17 00:00:00 2001 From: Fattigman Date: Mon, 28 Aug 2023 12:39:44 +0200 Subject: [PATCH 06/11] fixed output formatting error --- modules/cellranger/multi/main.nf | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/modules/cellranger/multi/main.nf b/modules/cellranger/multi/main.nf index 67de7f5..e3a4ff5 100644 --- a/modules/cellranger/multi/main.nf +++ b/modules/cellranger/multi/main.nf @@ -49,8 +49,7 @@ Cells,VDJ T,,,Cells with productive V-J spanning pair,76.61% Library,Gene Expression,Fastq ID,1a_522_3wbm,Number of reads,"172,468,563" Library,Gene Expression,Fastq ID,1a_522_3wbm,Number of short reads skipped,0 Library,Gene Expression,Fastq ID,1a_522_3wbm,Q30 RNA read,91.1% -Library,Gene Expression,Fastq ID,1a_522_3wbm,Q30 UMI,94.8% -\'\'\' > \$sample_dir_1/metrics_summary.csv +Library,Gene Expression,Fastq ID,1a_522_3wbm,Q30 UMI,94.8%\'\'\' > \$sample_dir_1/metrics_summary.csv echo \'\'\'Category,Library Type,Grouped By,Group Name,Metric Name,Metric Value Cells,Gene Expression,,,Cells,"5,727" @@ -72,8 +71,7 @@ Cells,VDJ T,,,Cells with productive V-J spanning pair,76.61% Library,Gene Expression,Fastq ID,1a_522_3wbm,Number of reads,"172,468,563" Library,Gene Expression,Fastq ID,1a_522_3wbm,Number of short reads skipped,0 Library,Gene Expression,Fastq ID,1a_522_3wbm,Q30 RNA read,91.1% -Library,Gene Expression,Fastq ID,1a_522_3wbm,Q30 UMI,94.8% -\'\'\' > \$sample_dir_2/metrics_summary.csv +Library,Gene Expression,Fastq ID,1a_522_3wbm,Q30 UMI,94.8%\'\'\' > \$sample_dir_2/metrics_summary.csv """ } \ No newline at end of file From e3df841dcdbda5d8ffe1967e05e08cf2f0a25861 Mon Sep 17 00:00:00 2001 From: Fattigman Date: Mon, 28 Aug 2023 13:17:28 +0200 Subject: [PATCH 07/11] fixed new config header --- modules/split_sheet/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/split_sheet/main.nf b/modules/split_sheet/main.nf index c185d91..1424139 100644 --- a/modules/split_sheet/main.nf +++ b/modules/split_sheet/main.nf @@ -5,7 +5,7 @@ process SPLITSHEET { output: path "10X_Data.csv", emit: data path "Data.csv", emit: pipe_data, optional: true - path "10X_Flex_Data.csv", emit: flex, optional: true + path "FlexConfig_Data.csv", emit: flex, optional: true path "FeatureReference_Data.csv", emit: feature_reference, optional: true shell: ''' From 914c47128e1f21f91beee29c433c1fb5887e8af0 Mon Sep 17 00:00:00 2001 From: Fattigman Date: Mon, 28 Aug 2023 14:05:59 +0200 Subject: [PATCH 08/11] rewrote script to python for readability --- bin/countmetric2mqc.py | 18 ++++++++++++++++++ modules/cellranger2multiqc/count/main.nf | 5 +++-- 2 files changed, 21 insertions(+), 2 deletions(-) create mode 100644 bin/countmetric2mqc.py diff --git a/bin/countmetric2mqc.py b/bin/countmetric2mqc.py new file mode 100644 index 0000000..3757a5b --- /dev/null +++ b/bin/countmetric2mqc.py @@ -0,0 +1,18 @@ +import sys + +# Check if input file name was provided +if len(sys.argv) < 3: + print('Usage: python countmetric2mqc.py file.csv sample_name output_mqc.yaml') + sys.exit(1) + +# Get input file name from command-line argument +input_file = sys.argv[1] +sample_name = sys.argv[2] +mqc_yaml = sys.argv[3] +# Initialize dictionaries for each category +with open(input_file, 'r') as file: + keys, values = file.readline().split(','), file.readline().split(',') + data = {k.strip():v.strip() for (k,v) in zip(keys,values)} +# Appends to an already existing mqc.yaml file +with open(mqc_yaml, 'a') as file: + file.write(f' {sample_name}: {data}') \ No newline at end of file diff --git a/modules/cellranger2multiqc/count/main.nf b/modules/cellranger2multiqc/count/main.nf index 9a3a5ff..f731df2 100644 --- a/modules/cellranger2multiqc/count/main.nf +++ b/modules/cellranger2multiqc/count/main.nf @@ -22,10 +22,11 @@ process CELLRANGER_COUNT_TO_MULTIQC{ IFS=', ' read -ra project_array <<< \"\$project_string\" # Checks if mqc_yaml exists, if not create it for i in {0..$number_of_samples}; do + echo \$i if ! [ -f \"$params.outdir/\${project_array[\$i]}/1_qc/multiqc/multiqc_mqc.yaml\" ]; then mkdir -p \"$params.outdir/\${project_array[\$i]}/1_qc/multiqc/\" echo \"\"\"id: \"single_cell_workflows_table\" -section_name : \"Single Cell Workflows Stats\" +section_name : \"Single Cell Workflows Count Stats\" description: \"This table consists of the data gathered from cellranger output \" plot_type: \"table\" pconfig: @@ -34,7 +35,7 @@ pconfig: data:\"\"\" > \"$params.outdir/\${project_array[\$i]}/1_qc/multiqc/multiqc_mqc.yaml\" fi # Extends mqc_yaml file with sample information - echo \" \${sample_array[\$i]}: \$(cat $params.outdir/\${project_array[\$i]}/2_count/\${sample_array[\$i]}/outs/$summary | python -c 'import csv, json, sys; print(json.dumps([dict(r) for r in csv.DictReader(sys.stdin)]))')\" | tr -d '[]' >> \"$params.outdir/\${project_array[\$i]}/1_qc/multiqc/multiqc_mqc.yaml\" + python $projectDir/bin/countmetric2mqc.py $params.outdir/\${project_array[\$i]}/2_count/\${sample_array[\$i]}/outs/$summary \${sample_array[\$i]} $params.outdir/\${project_array[\$i]}/1_qc/multiqc/multiqc_mqc.yaml done """ } \ No newline at end of file From 33cb9937d82033f63cfeb2e89ea4c6426c9c3158 Mon Sep 17 00:00:00 2001 From: Fattigman Date: Mon, 28 Aug 2023 14:07:13 +0200 Subject: [PATCH 09/11] removed unecessary part --- examples/CTG_SampleSheet.csv | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/examples/CTG_SampleSheet.csv b/examples/CTG_SampleSheet.csv index b1e9edd..2a45cdf 100644 --- a/examples/CTG_SampleSheet.csv +++ b/examples/CTG_SampleSheet.csv @@ -1,13 +1,5 @@ [Header],,,,,,,, IEMFileVersion,4,,,,,,, -[Data],,,,,,,, -Sample_ID,index,index2,Sample_Project,,,,, -sample-1,SI-TT-A1,SI-TT-A1,project1,,,,, -sample-2,SI-TT-A2,SI-TT-A2,project1,,,,, -sample-3,SI-TT-A3,SI-TT-A3,project2,,,,, -sample-4,SI-TT-A4,SI-TT-A4,project3,,,,, -sample-5,SI-TT-A5,SI-TT-A5,project3,,,,, -sample-6,SI-TT-A6,SI-TT-A6,project3,,,,, [10X_Data],,,,,,,, Sample_ID,Sample_Species,Sample_Project,force,agg,sample_pair,libtype,pipeline,cytaimage,darkimage,image,slide,slide_area sample-1,human,project1,n,n,n,gex,scrna-10x,n,n,n,n,n @@ -31,6 +23,8 @@ sample-18,human,project10,n,n,n,gex,scvisium-10x,cytaimage,darkimage,image,slide sample-19,human,project7,n,n,5,gex,scmulti-10x,n,n,n,n,n sample-20,human,project7,n,n,5,tcr,scmulti-10x,n,n,n,n,n sample-21,human,project7,n,n,5,bcr,scmulti-10x,n,n,n,n,n +sample-22,human,project9,n,n,6,atac,scarc-10x,n,n,n,n,n +sample-23,human,project9,n,n,6,gex,scarc-10x,n,n,n,n,n [FlexConfig_Data],,,,,,,, sample_id,probe_barcode,Sample_Source,,,,,, sample1,BC001|BC002,sample_7,,,,,, From 50f90d6f05f2f058df2384bd1f2ccc3ef268ea9d Mon Sep 17 00:00:00 2001 From: Fattigman Date: Mon, 28 Aug 2023 14:07:13 +0200 Subject: [PATCH 10/11] removed unecessary part --- bin/countmetric2mqc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/countmetric2mqc.py b/bin/countmetric2mqc.py index 3757a5b..0a664c3 100644 --- a/bin/countmetric2mqc.py +++ b/bin/countmetric2mqc.py @@ -15,4 +15,4 @@ data = {k.strip():v.strip() for (k,v) in zip(keys,values)} # Appends to an already existing mqc.yaml file with open(mqc_yaml, 'a') as file: - file.write(f' {sample_name}: {data}') \ No newline at end of file + file.write(f' {sample_name}: {data}\n') \ No newline at end of file From a7ae3905ec8c104062d53c47b6530d9ba4e0eac9 Mon Sep 17 00:00:00 2001 From: Fattigman Date: Mon, 28 Aug 2023 15:30:28 +0200 Subject: [PATCH 11/11] update manifest --- templates/manifest.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/templates/manifest.html b/templates/manifest.html index b2fff9c..18465c0 100644 --- a/templates/manifest.html +++ b/templates/manifest.html @@ -83,14 +83,14 @@

Nextflow manifest

-

Pipeline release: 1.4.1

+

Pipeline release: 1.4.2

Subworkflow: xxSubWorkflowxx

Maintainer: jacob.karlstrom@med.lu.se

Description: Your data has been processed by the nextflow pipeline singleCellWorkflows. If you want more information on how your data was processed, follow the link below and navigate to your release!

- +