-
Notifications
You must be signed in to change notification settings - Fork 152
/
variant_recoder
executable file
·159 lines (123 loc) · 5.36 KB
/
variant_recoder
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
#!/usr/bin/env perl
# Copyright [2016-2024] EMBL-European Bioinformatics Institute
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
use strict;
use warnings;
use Getopt::Long;
use FindBin qw($RealBin);
use lib $RealBin;
use lib $RealBin.'/modules';
use Bio::EnsEMBL::VEP::VariantRecoder;
use Bio::EnsEMBL::VEP::Utils qw(get_version_string);
BEGIN {
unless(eval q{require JSON;}) {
die("ERROR: variant_recoder requires JSON perl library\n");
}
}
my $config = {};
my $arg_count = scalar @ARGV;
my @argv_copy = @ARGV;
GetOptions(
$config,
'help', # displays help message
# input options,
'config=s', # config file name
'input_file|i=s', # input file name
'input_data|id=s', # input data
'format=s', # input file format
'delimiter=s', # delimiter between fields in input
'pretty', # print prettified JSON
# DB options
'species|s=s', # species e.g. human, homo_sapiens
'registry=s', # registry file
'host=s', # database host
'port=s', # database port
'user|u=s', # database user name
'password|pass=s', # database password
'db_version=i', # Ensembl database version to use e.g. 62
'assembly|a=s', # assembly version to use
'grch37', # set for using GRCh37
'genomes', # automatically sets DB params for e!Genomes
'refseq', # use otherfeatures RefSeq DB instead of Ensembl
'merged', # use merged cache
'all_refseq', # report consequences on all transcripts in RefSeq cache, includes CCDS, EST etc
'gencode_basic', # limit to using just GenCode basic transcript set
'is_multispecies=i', # '1' for a multispecies database (e.g protists_euglenozoa1_collection_core_29_82_1)
# runtime options
'transcript_filter=s' => ($config->{transcript_filter} ||= []), # filter transcripts
'exclude_predicted',
'minimal', # convert input alleles to minimal representation
'pick', # used defined criteria to return most severe line
'pick_allele', # choose one con per allele
'per_gene', # choose one con per gene
'pick_allele_gene', # choose one con per gene, allele
'pick_order=s', # define the order of categories used by the --*pick* flags
'buffer_size=i', # number of variations to read in before analysis
'failed=i', # include failed variations when finding existing
# output options
'warning_file=s', # file to write warnings to
'shift_hgvs=i', # disable/enable 3-prime shifting of HGVS indels to comply with standard
'lrg', # enable LRG-based features
'fields=s', # define your own output fields
'synonyms=s', # file of chromosome synonyms
'vcf_string', # returns the vcf format in a string
'var_synonyms', # returns the variation synonyms
'mane_select', # returns MANE Select transcripts in HGVS format (e.g. hgvsg, hgvsc, hgvsp)
'ga4gh_vrs', # returns the GA4GH VRS allele object
# these flags are for use with RefSeq caches
'bam=s', # bam file used to modify transcripts
'use_transcript_ref', # extract the reference allele from the transcript (or genome)
'use_given_ref', # override use_transcript_ref setting that may be set from cache info
# debug
'debug', # print out debug info
) or die "ERROR: Failed to parse command-line flags\n";
&usage && exit(0) if (!$arg_count) || $config->{help};
main($config);
sub main {
my $config = shift;
if($config->{mane_select} && defined($config->{species}) && $config->{species} !~ /homo_sapiens|human/) {
die("ERROR: MANE Select is only available for human\n");
}
my $vr = Bio::EnsEMBL::VEP::VariantRecoder->new($config);
my $results = $vr->recode_all;
push @$results, {warnings => [map {$_->{msg}} @{$vr->warnings}]} unless @$results;
my $json = JSON->new;
$json->pretty if $vr->param('pretty');
print $json->encode($results);
}
# outputs usage message
sub usage {
my $versions = get_version_string($RealBin.'/.version');
my $usage =<<END;
#-----------------#
# VARIANT RECODER #
#-----------------#
Versions:
$versions
Help: dev\@ensembl.org , helpdesk\@ensembl.org
Example usage:
./variant_recoder --input_data "rs699"
Basic options
=============
--help Display this message and quit
--input_data | --id Input as string
--input_file | -i Input file
--species [species] Species to use [default: "human"]
--pretty Print prettified JSON
For full option documentation see:
https://www.ensembl.org/info/docs/tools/vep/recoder/index.html#vr_options
END
print $usage;
}
1;