Skip to content

Commit

Permalink
Np pd 2326 add mapping to snm3c (#1075)
Browse files Browse the repository at this point in the history
* testing

* testing

* no samtools

* comment

* comment

* comment

* add mapping task

* remove plate id from mapping

* remove snakefile

* remove some unwanted outputs

* remove some unwanted outputs

* paranthesis
  • Loading branch information
nikellepetrillo authored Sep 7, 2023
1 parent 657bc6c commit 5d06cd6
Showing 1 changed file with 104 additions and 34 deletions.
138 changes: 104 additions & 34 deletions beta-pipelines/skylab/m3c/CondensedSnm3C.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ workflow WDLized_snm3C {
Array[File] fastq_input_read2
File random_primer_indexes
String plate_id
# mapping inputs
File tarred_index_files
File genome_fa
File chromosome_sizes
}

call Demultiplexing {
Expand All @@ -22,15 +26,18 @@ workflow WDLized_snm3C {
tarred_demultiplexed_fastqs = Demultiplexing.tarred_demultiplexed_fastqs
}

# call hisat_3n_pair_end_mapping_dna_mode {
# input:
# r1_trimmed = Sort_and_trim_r1_and_r2.r1_trimmed_fq,
# r2_trimmed = Sort_and_trim_r1_and_r2.r2_trimmed_fq
# }
#
call Hisat_3n_pair_end_mapping_dna_mode {
input:
r1_trimmed_tar = Sort_and_trim_r1_and_r2.r1_trimmed_fq_tar,
r2_trimmed_tar = Sort_and_trim_r1_and_r2.r2_trimmed_fq_tar,
tarred_index_files = tarred_index_files,
genome_fa = genome_fa,
chromosome_sizes = chromosome_sizes
}

# call separate_unmapped_reads {
# input:
# hisat3n_bam = hisat_3n_pair_end_mapping_dna_mode.hisat3n_bam
# hisat3n_bam = Hisat_3n_pair_end_mapping_dna_mode.hisat3n_bam
# }
#
# call split_unmapped_reads {
Expand Down Expand Up @@ -80,7 +87,7 @@ workflow WDLized_snm3C {
# call summary {
# input:
# trimmed_stats = Sort_and_trim_r1_and_r2.trim_stats,
# hisat3n_stats = hisat_3n_pair_end_mapping_dna_mode.hisat3n_stats,
# hisat3n_stats = Hisat_3n_pair_end_mapping_dna_mode.hisat3n_stats,
# r1_hisat3n_stats = hisat_single_end_r1_r2_mapping_dna_mode_and_merge_sort_split_reads_by_name.r1_hisat3n_stats,
# r2_hisat3n_stats = hisat_single_end_r1_r2_mapping_dna_mode_and_merge_sort_split_reads_by_name.r2_hisat3n_stats,
# dedup_stats = dedup_unique_bam_and_index_unique_bam.dedup_stats,
Expand All @@ -97,9 +104,11 @@ workflow WDLized_snm3C {
#File UniqueAlign_cell_parser_picard_dedup = dedup_unique_bam_and_index_unique_bam.dedup_stats
#File SplitReads_cell_parser_hisat_summary = "?"
#File hicFiles = call_chromatin_contacts.chromatin_contact_stats
File trimmed_stats = Sort_and_trim_r1_and_r2.trim_stats
File r1_trimmed_fq = Sort_and_trim_r1_and_r2.r1_trimmed_fq
File r2_trimmed_fq = Sort_and_trim_r1_and_r2.r2_trimmed_fq
File trimmed_stats = Sort_and_trim_r1_and_r2.trim_stats_tar
File r1_trimmed_fq = Sort_and_trim_r1_and_r2.r1_trimmed_fq_tar
File r2_trimmed_fq = Sort_and_trim_r1_and_r2.r2_trimmed_fq_tar
File hisat3n_stats_tar = Hisat_3n_pair_end_mapping_dna_mode.hisat3n_paired_end_stats_tar
File hisat3n_bam_tar = Hisat_3n_pair_end_mapping_dna_mode.hisat3n_paired_end_bam_tar
}
}

Expand Down Expand Up @@ -204,8 +213,8 @@ task Sort_and_trim_r1_and_r2 {
for file in "${R1_files[@]}"; do
sample_id=$(basename "$file" "-R1.fq.gz")
r2_file="${sample_id}-R2.fq.gz"
gunzip -c "$file" | paste - - - - | sort -k1,1 -t " " | tr "\t" "\n" > "${sample_id}-R1_sorted.fq"
gunzip -c "$r2_file" | paste - - - - | sort -k1,1 -t " " | tr "\t" "\n" > "${sample_id}-R2_sorted.fq"
zcat "$file" | paste - - - - | sort -k1,1 -t " " | tr "\t" "\n" > "${sample_id}-R1_sorted.fq"
zcat "$r2_file" | paste - - - - | sort -k1,1 -t " " | tr "\t" "\n" > "${sample_id}-R2_sorted.fq"
done
Expand Down Expand Up @@ -245,30 +254,91 @@ task Sort_and_trim_r1_and_r2 {
memory: "${mem_size} GiB"
}
output {
File r1_trimmed_fq = "R1_trimmed_files.tar.gz"
File r2_trimmed_fq = "R2_trimmed_files.tar.gz"
File trim_stats = "trimmed_stats_files.tar.gz"
File r1_trimmed_fq_tar = "R1_trimmed_files.tar.gz"
File r2_trimmed_fq_tar = "R2_trimmed_files.tar.gz"
File trim_stats_tar = "trimmed_stats_files.tar.gz"
}
}
#task hisat_3n_pair_end_mapping_dna_mode{
# input {
# File r1_trimmed
# File r2_trimmed
# }
# command <<<
# >>>
# runtime {
# docker: "fill_in"
# disks: "local-disk ${disk_size} HDD"
# cpu: 1
# memory: "${mem_size} GiB"
# }
# output {
# File hisat3n_bam = ""
# File hisat3n_stats = ""
# }
#}
task Hisat_3n_pair_end_mapping_dna_mode{
input {
File r1_trimmed_tar
File r2_trimmed_tar
File tarred_index_files
File genome_fa
File chromosome_sizes
String docker = "us.gcr.io/broad-gotc-prod/m3c-yap-hisat:1.0.0-2.2.1"
Int disk_size = 100
Int mem_size = 100
}
command <<<
set -euo pipefail
mkdir reference/
mkdir fastq/
cp ~{tarred_index_files} reference/
cp ~{genome_fa} reference/
cp ~{chromosome_sizes} reference/
cp ~{r1_trimmed_tar} fastq/
cp ~{r2_trimmed_tar} fastq/
# untar the index files
cd reference/
echo "Untarring the index files"
tar -zxvf ~{tarred_index_files}
rm ~{tarred_index_files}
samtools faidx hg38.fa
# untar the demultiplexed fastq files
cd ../fastq/
echo "Untarring the fastq files"
tar -zxvf ~{r1_trimmed_tar}
tar -zxvf ~{r2_trimmed_tar}
rm ~{r1_trimmed_tar}
rm ~{r2_trimmed_tar}
# define lists of r1 and r2 fq files
R1_files=($(ls | grep "\-R1_trimmed.fq.gz"))
R2_files=($(ls | grep "\-R2_trimmed.fq.gz"))
for file in "${R1_files[@]}"; do
sample_id=$(basename "$file" "-R1_trimmed.fq.gz")
hisat-3n /cromwell_root/reference/hg38 \
-q \
-1 ${sample_id}-R1_trimmed.fq.gz \
-2 ${sample_id}-R2_trimmed.fq.gz \
--directional-mapping-reverse \
--base-change C,T \
--no-repeat-index \
--no-spliced-alignment \
--no-temp-splicesite \
-t \
--new-summary \
--summary-file ${sample_id}.hisat3n_dna_summary.txt \
--threads 11 | samtools view -b -q 0 -o "${sample_id}.hisat3n_dna.unsort.bam"
done
# tar up the bam files and stats files
tar -zcvf hisat3n_paired_end_bam_files.tar.gz *.bam
tar -zcvf hisat3n_paired_end_stats_files.tar.gz *.hisat3n_dna_summary.txt
mv hisat3n_paired_end_bam_files.tar.gz ../
mv hisat3n_paired_end_stats_files.tar.gz ../
>>>
runtime {
docker: docker
disks: "local-disk ${disk_size} HDD"
cpu: 1
memory: "${mem_size} GiB"
}
output {
File hisat3n_paired_end_bam_tar = "hisat3n_paired_end_bam_files.tar.gz"
File hisat3n_paired_end_stats_tar = "hisat3n_paired_end_stats_files.tar.gz"
}
}
#task separate_unmapped_reads {
# input {
Expand Down

0 comments on commit 5d06cd6

Please sign in to comment.