Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make changes to gtf-to-bed process #1409

Merged
merged 1 commit into from
Dec 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@ Added
- Add ``--bam-output`` input argument to ``vc-gatk4-hc``
- Add ``--max-mnp-distance`` input argument to ``vc-gatk4-hc``

Changed
-------
- Change output data object name in ``gtf-to-bed`` process,
add geneset as a required field
and hide canonical transcripts table if gene feature type is selected


===================
61.0.0 - 2024-11-21
Expand Down
28 changes: 14 additions & 14 deletions resolwe_bio/processes/support_processors/gtf_to_bed.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,17 @@


class GTFtoBED(Process):
"""GTF to BED conversion.
"""GTF to BED conversion for predefined genes and feature types.

Note that this process only works with ENSEMBL annotations.
"""

slug = "gtf-to-bed"
name = "GTF to BED"
process_type = "data:bed"
version = "1.1.0"
version = "1.2.0"
category = "Other"
data_name = "Converted GTF to BED file"
data_name = "{{ geneset|name|default('?') }}"
marcellevstek marked this conversation as resolved.
Show resolved Hide resolved
scheduling_class = SchedulingClass.BATCH
persistence = Persistence.CACHED

Expand Down Expand Up @@ -100,14 +100,15 @@ class Input:
"geneset",
label="Gene set",
description="Gene set to use for filtering.",
required=False,
required=True,
)

canonical_transcripts = DataField(
"geneset",
label="Canonical transcripts",
description="Canonical transcripts to use for filtering. Only used for transcript and exon feature types.",
required=False,
disabled="feature_type == 'gene'",
)

output_strand = BooleanField(
Expand Down Expand Up @@ -180,17 +181,16 @@ def run(self, inputs, outputs):
gtf = gtf[gtf["source"].isin(inputs.annotation_source)]
gtf = gtf[gtf["feature_type"] == feature_type]

if inputs.geneset:
if inputs.annotation.output.species != inputs.geneset.output.species:
self.error(
"Gene set data object species does not match the annotation species."
)
geneset = pd.read_csv(
inputs.geneset.output.geneset.path,
delimiter="\t",
names=["ID"],
if inputs.annotation.output.species != inputs.geneset.output.species:
self.error(
"Species of the gene set data object does not match the species of the annotation data object."
)
gtf = gtf[gtf["gene_id"].isin(geneset["ID"])]
geneset = pd.read_csv(
inputs.geneset.output.geneset.path,
delimiter="\t",
names=["ID"],
)
gtf = gtf[gtf["gene_id"].isin(geneset["ID"])]

if inputs.canonical_transcripts and not feature_type == "gene":
if (
Expand Down
2 changes: 2 additions & 0 deletions resolwe_bio/tests/processes/test_support_processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1789,6 +1789,7 @@ def test_gtf_to_bed(self):
"gtf-to-bed",
{
"annotation": gtf.id,
"geneset": geneset.id,
"annotation_field": "gene_name",
},
)
Expand All @@ -1798,6 +1799,7 @@ def test_gtf_to_bed(self):
"gtf-to-bed",
{
"annotation": gtf.id,
"geneset": geneset.id,
"annotation_field": "gene_id_feature_id",
"feature_type": "exon",
},
Expand Down
Loading