-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #24 from N-Hoffmann/stringtie
Add gffcompare class codes to final gtf; reformat code
- Loading branch information
Showing
18 changed files
with
162 additions
and
96 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,16 +1,20 @@ | ||
process ADD_CLASS_CODE { | ||
conda (params.enable_conda ? "$baseDir/environment.yml" : null) | ||
container "ghcr.io/igdrion/annexa:${workflow.revision? workflow.revision: "main"}" | ||
publishDir "$params.outdir/final", mode: 'copy', saveAs: {filename -> "${gtf}"}, overwrite: true | ||
tag "$gtf" | ||
|
||
input: | ||
file class_code_gtf | ||
file extended_annotation | ||
file gtf | ||
|
||
output: | ||
path 'extended_annotation_class_code.gtf', emit: extended_annotation_class_code | ||
path "class_code.${gtf}" | ||
|
||
script: | ||
""" | ||
class_code.R ${class_code_gtf} ${extended_annotation} | ||
""" | ||
class_code.R ${class_code_gtf} ${gtf} "class_code.${gtf}" | ||
## Remove header created by gtfsort | ||
sed -i 1,3d "class_code.${gtf}" | ||
""" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -30,4 +30,7 @@ workflow QC { | |
origin | ||
) | ||
} | ||
|
||
emit: | ||
gtf = MERGE_ANNOTATIONS.out.gtf | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
process FORMAT_GFFCOMPARE { | ||
publishDir "$params.outdir/stringtie2", pattern: 'extended_annotations.gtf', mode: 'copy' | ||
|
||
input: | ||
path merged_gtf | ||
path tracking_file | ||
|
||
output: | ||
path("extended_annotations.gtf"), emit: stringtie_gtf | ||
|
||
shell: | ||
''' | ||
# Add information for stringtie2 process | ||
# Reformat the output of gffcompare to correctly match novel isoforms to known genes | ||
# Takes the transcript_id identified by Stringtie and assigns it to reference gene_id | ||
awk 'BEGIN{ | ||
while(getline<"!{tracking_file}">0){ | ||
if ($4 !="u" && $4 !="r"){ | ||
split($3,gn,"|"); | ||
split($5,tx,"|"); | ||
final["\\""tx[2]"\\";"]="\\""gn[1]"\\";" | ||
} | ||
} | ||
} { | ||
if ($12 in final){ | ||
$10=final[$12]; print $0} else {print $0} | ||
}' !{merged_gtf} | gtf2gtf_cleanall.sh > extended_annotations_preaclean.gtf | ||
# Match correct ref_gene_id to gene_id to some overlapping genes in the reference annotation | ||
awk '{if ($3 == "transcript" && $13=="ref_gene_id" && $10!=$14) { | ||
$10 = $14; | ||
print $0 | ||
} else if ($3 == "exon" && $15=="ref_gene_id" && $10!=$16) { | ||
$10 = $16; | ||
print $0 | ||
} else {print $0} | ||
}' extended_annotations_preaclean.gtf | gtf2gtf_cleanall.sh > extended_annotations.gtf | ||
# Remove header lines (command and version) | ||
sed -i 1,2d extended_annotations.gtf | ||
''' | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.