-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
added functionality listed in #45. Need to add tests and fix validati…
…on checks
- Loading branch information
Showing
2 changed files
with
256 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,181 @@ | ||
package Bio::GenomeUpdate::TP; | ||
use strict; | ||
use warnings; | ||
|
||
use Moose; | ||
use MooseX::FollowPBP; | ||
use Moose::Util::TypeConstraints; | ||
use Bio::GenomeUpdate::TP::TPLine; | ||
|
||
=head1 NAME | ||
SP - Trim point information for NCBI GRC pipeline with instructions used to generate a Accessioned Golden Path (AGP) file | ||
=head1 SYNOPSIS | ||
my $variable = SwitchPoint->new(); | ||
=head1 DESCRIPTION | ||
This class stores information for transitions between TPF components including TPF type, taxonomy and assembly and generates a trim point (TP) file. The trim point file specifies the extent of a component to be used in the AGP file. | ||
=head2 Methods | ||
=over | ||
=item C<set_taxid ( $taxonomy_id )> | ||
Sets the taxonomy identifier for the SP file, e.g. 4081 for Solanum lycopersicum. | ||
=item C<get_taxid> | ||
Gets the taxonomy identifier for the SP file. | ||
=cut | ||
|
||
has 'tp_taxid' => ( | ||
isa => 'Str', | ||
is => 'rw', | ||
default => '4081', | ||
required => 1, | ||
clearer => 'clear_tp_taxid' | ||
); | ||
|
||
=item C<set_assembly_group ( $organism_string )> | ||
Sets the assembly_group (required). | ||
=item C<get_assembly_group> | ||
Gets the assembly_group. | ||
=cut | ||
|
||
has 'assembly_group' => | ||
( isa => 'Str', is => 'rw', default => 'TGP', required => 1, clearer => 'clear_assembly_group' ); | ||
|
||
=item C<set_assembly_unit ( $organism_string )> | ||
Sets the assembly_unit (required). | ||
=item C<get_assembly_unit> | ||
Gets the assembly_unit. | ||
=cut | ||
|
||
has 'assembly_unit' => | ||
( isa => 'Str', is => 'rw', default => 'Primary', required => 1, clearer => 'clear_assembly_unit' ); | ||
|
||
=item C<set_tpf_type ( $tpf_type )> | ||
Sets the TPF type. Valid values are chromosome and contig (latter used for unlocalized or unplaced scaffolds, required). | ||
=item C<get_tpf_type > | ||
Gets the TPF type. | ||
=cut | ||
|
||
subtype 'TPTPFType', as 'Str', where { $_ eq "chromosome" || $_ eq "contig" }, | ||
message { "The string, $_, was not a valid TPF type. See http://www.ncbi.nlm.nih.gov/projects/genome/assembly/grc/overlap/ specification link" }; | ||
|
||
has 'tp_tpf_type' => ( isa => 'TPTPFType', is => 'rw', default => 'chromosome', required => 1, clearer => 'clear_chromosome' ); | ||
|
||
subtype 'TPLine', | ||
as 'Bio::GenomeUpdate::TP::TPLine', | ||
message { "The object was not a TP line" }; | ||
|
||
has 'tp_lines' => ( | ||
isa => 'HashRef[TPLine]', | ||
is => 'rw', | ||
predicate => 'has_tp_lines', | ||
clearer => 'clear_tp_lines' | ||
); | ||
|
||
=item C<add_line_to_end > | ||
Add a TPLine object. | ||
=cut | ||
|
||
sub add_line_to_end { | ||
my $self = shift; | ||
my $line_to_add = shift; | ||
my %lines; | ||
if ( $self->has_tp_lines() ) { | ||
%lines = %{ $self->get_tp_lines() }; | ||
} | ||
my $last_line = $self->get_number_of_lines(); | ||
$lines{ $last_line + 1 } = $line_to_add;#key is just the index or line number | ||
$self->set_tp_lines( {%lines} ); | ||
} | ||
|
||
=item C<get_number_of_lines > | ||
Return number of lines in the TPLine object. | ||
=cut | ||
|
||
sub get_number_of_lines { | ||
my $self = shift; | ||
my %lines; | ||
if ( $self->has_tp_lines() ) { | ||
%lines = %{ $self->get_tp_lines() }; | ||
my @sorted_line_numbers = sort { $a <=> $b } keys %lines; | ||
return $sorted_line_numbers[-1]; | ||
} | ||
else { | ||
return 0; | ||
} | ||
} | ||
|
||
=item C<get_formatted_tp > | ||
Return string with all lines in the TPLine object. | ||
=cut | ||
|
||
sub get_formatted_tp { | ||
my $self = shift; | ||
my %lines; | ||
my $out_str; | ||
|
||
if ( $self->has_tp_lines() ) { | ||
%lines = %{ $self->get_tp_lines() }; | ||
my @sorted_line_numbers = sort { $a <=> $b } keys %lines; | ||
foreach my $line_key (@sorted_line_numbers) { | ||
$out_str .= $self->get_tp_taxid() . "\t"; | ||
$out_str .= $self->get_assembly_group() . "\t"; | ||
$out_str .= $self->get_assembly_unit() . "\t"; | ||
$out_str .= $lines{$line_key}->get_chromosome() . "\t"; | ||
$out_str .= $self->get_tp_tpf_type() . "\t"; | ||
$out_str .= $lines{$line_key}->get_accession_prefix() . "\t"; | ||
$out_str .= $lines{$line_key}->get_accession_suffix() . "\t"; | ||
$out_str .= $lines{$line_key}->get_accession_prefix_orientation() . "\t"; | ||
$out_str .= $lines{$line_key}->get_accession_suffix_orientation() . "\t"; | ||
$out_str .= $lines{$line_key}->get_accession_prefix_last_base() . "\t"; | ||
$out_str .= $lines{$line_key}->get_accession_suffix_first_base() . "\t"; | ||
$out_str .= $lines{$line_key}->get_comment() . "\n"; | ||
} | ||
} | ||
return $out_str; | ||
} | ||
|
||
### | ||
1; #do not remove | ||
### | ||
|
||
=pod | ||
=back | ||
=head1 LICENSE | ||
Same as Perl. | ||
=head1 AUTHORS | ||
Surya Saha <[email protected]> | ||
=cut |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
package Bio::GenomeUpdate::TP::TPLine; | ||
use strict; | ||
use warnings; | ||
|
||
use Moose; | ||
use MooseX::FollowPBP; | ||
use Moose::Util::TypeConstraints; | ||
|
||
use Data::Dumper;#for debugging | ||
|
||
=head1 NAME | ||
TP - Trim point lines for NCBI GRC pipeline with instructions used to generate a Accessioned Golden Path (AGP) file | ||
=head1 SYNOPSIS | ||
my $variable = TPLine->new(); | ||
=head1 DESCRIPTION | ||
This class stores information for transitions between TPF components including chromosome, accessions and coordinates for generating a trim point (TP) file. The trim point file specifies the extent of a component in the AGP file. | ||
=head2 Methods | ||
=over | ||
=cut | ||
|
||
=item C<set_chromosome ( $chromosome )> | ||
Sets the chromosome. Valid values for Solanum lycopersicum are 1-12 and Un (required). | ||
=item C<get_chromosome > | ||
Gets the chromosome. | ||
=cut | ||
|
||
subtype 'TPChromosome', | ||
as 'Str', | ||
where { ( $_ >= 1 && $_ <=12 ) || ( $_ eq "Un" )},#does NOT work. need to do -> if int check 1-12, if str check Un | ||
message { "The string, $_, was not a valid chromosome number. Valid values for Solanum lycopersicum are 1-12 and Un." }; | ||
has 'chromosome' => ( isa => 'TPChromosome', is => 'rw', required => 1, clearer => 'clear_chromosome' ); | ||
|
||
has 'accession' => ( isa => 'Str', is => 'rw', required => 1, clearer => 'clear_accession' ); | ||
|
||
subtype 'PositiveInt', | ||
as 'Int', | ||
where { $_ > 0 }, | ||
message { "The string, $_, was not a positive coordinate" }; | ||
has 'accession_prefix_first_or_last_base' => ( isa => 'PositiveInt', is => 'rw', required => 1, clearer => 'clear_accession_prefix_first_or_last_base' ); | ||
|
||
subtype 'TPComment', | ||
as 'Str', | ||
where { (scalar $_) >= 25 },#does not work!! | ||
message { "The string, $_, was shorter than the minimum length of 25 characters." }; | ||
has 'comment' => ( isa => 'TPComment', is => 'rw', required => 1, clearer => 'clear_comment' ); | ||
|
||
### | ||
1; #do not remove | ||
### | ||
|
||
=pod | ||
=back | ||
=head1 LICENSE | ||
Same as Perl. | ||
=head1 AUTHORS | ||
Surya Saha <[email protected]> | ||
=cut |