Skip to content

Commit

Permalink
added functionality listed in #45. Need to add tests and fix validati…
Browse files Browse the repository at this point in the history
…on checks
  • Loading branch information
suryasaha committed May 11, 2015
1 parent dcbe38a commit 0db9c00
Show file tree
Hide file tree
Showing 2 changed files with 256 additions and 0 deletions.
181 changes: 181 additions & 0 deletions lib/Bio/GenomeUpdate/TP.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
package Bio::GenomeUpdate::TP;
use strict;
use warnings;

use Moose;
use MooseX::FollowPBP;
use Moose::Util::TypeConstraints;
use Bio::GenomeUpdate::TP::TPLine;

=head1 NAME
SP - Trim point information for NCBI GRC pipeline with instructions used to generate a Accessioned Golden Path (AGP) file
=head1 SYNOPSIS
my $variable = SwitchPoint->new();
=head1 DESCRIPTION
This class stores information for transitions between TPF components including TPF type, taxonomy and assembly and generates a trim point (TP) file. The trim point file specifies the extent of a component to be used in the AGP file.
=head2 Methods
=over
=item C<set_taxid ( $taxonomy_id )>
Sets the taxonomy identifier for the SP file, e.g. 4081 for Solanum lycopersicum.
=item C<get_taxid>
Gets the taxonomy identifier for the SP file.
=cut

has 'tp_taxid' => (
isa => 'Str',
is => 'rw',
default => '4081',
required => 1,
clearer => 'clear_tp_taxid'
);

=item C<set_assembly_group ( $organism_string )>
Sets the assembly_group (required).
=item C<get_assembly_group>
Gets the assembly_group.
=cut

has 'assembly_group' =>
( isa => 'Str', is => 'rw', default => 'TGP', required => 1, clearer => 'clear_assembly_group' );

=item C<set_assembly_unit ( $organism_string )>
Sets the assembly_unit (required).
=item C<get_assembly_unit>
Gets the assembly_unit.
=cut

has 'assembly_unit' =>
( isa => 'Str', is => 'rw', default => 'Primary', required => 1, clearer => 'clear_assembly_unit' );

=item C<set_tpf_type ( $tpf_type )>
Sets the TPF type. Valid values are chromosome and contig (latter used for unlocalized or unplaced scaffolds, required).
=item C<get_tpf_type >
Gets the TPF type.
=cut

subtype 'TPTPFType', as 'Str', where { $_ eq "chromosome" || $_ eq "contig" },
message { "The string, $_, was not a valid TPF type. See http://www.ncbi.nlm.nih.gov/projects/genome/assembly/grc/overlap/ specification link" };

has 'tp_tpf_type' => ( isa => 'TPTPFType', is => 'rw', default => 'chromosome', required => 1, clearer => 'clear_chromosome' );

subtype 'TPLine',
as 'Bio::GenomeUpdate::TP::TPLine',
message { "The object was not a TP line" };

has 'tp_lines' => (
isa => 'HashRef[TPLine]',
is => 'rw',
predicate => 'has_tp_lines',
clearer => 'clear_tp_lines'
);

=item C<add_line_to_end >
Add a TPLine object.
=cut

sub add_line_to_end {
my $self = shift;
my $line_to_add = shift;
my %lines;
if ( $self->has_tp_lines() ) {
%lines = %{ $self->get_tp_lines() };
}
my $last_line = $self->get_number_of_lines();
$lines{ $last_line + 1 } = $line_to_add;#key is just the index or line number
$self->set_tp_lines( {%lines} );
}

=item C<get_number_of_lines >
Return number of lines in the TPLine object.
=cut

sub get_number_of_lines {
my $self = shift;
my %lines;
if ( $self->has_tp_lines() ) {
%lines = %{ $self->get_tp_lines() };
my @sorted_line_numbers = sort { $a <=> $b } keys %lines;
return $sorted_line_numbers[-1];
}
else {
return 0;
}
}

=item C<get_formatted_tp >
Return string with all lines in the TPLine object.
=cut

sub get_formatted_tp {
my $self = shift;
my %lines;
my $out_str;

if ( $self->has_tp_lines() ) {
%lines = %{ $self->get_tp_lines() };
my @sorted_line_numbers = sort { $a <=> $b } keys %lines;
foreach my $line_key (@sorted_line_numbers) {
$out_str .= $self->get_tp_taxid() . "\t";
$out_str .= $self->get_assembly_group() . "\t";
$out_str .= $self->get_assembly_unit() . "\t";
$out_str .= $lines{$line_key}->get_chromosome() . "\t";
$out_str .= $self->get_tp_tpf_type() . "\t";
$out_str .= $lines{$line_key}->get_accession_prefix() . "\t";
$out_str .= $lines{$line_key}->get_accession_suffix() . "\t";
$out_str .= $lines{$line_key}->get_accession_prefix_orientation() . "\t";
$out_str .= $lines{$line_key}->get_accession_suffix_orientation() . "\t";
$out_str .= $lines{$line_key}->get_accession_prefix_last_base() . "\t";
$out_str .= $lines{$line_key}->get_accession_suffix_first_base() . "\t";
$out_str .= $lines{$line_key}->get_comment() . "\n";
}
}
return $out_str;
}

###
1; #do not remove
###

=pod
=back
=head1 LICENSE
Same as Perl.
=head1 AUTHORS
Surya Saha <[email protected]>
=cut
75 changes: 75 additions & 0 deletions lib/Bio/GenomeUpdate/TP/TPLine.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
package Bio::GenomeUpdate::TP::TPLine;
use strict;
use warnings;

use Moose;
use MooseX::FollowPBP;
use Moose::Util::TypeConstraints;

use Data::Dumper;#for debugging

=head1 NAME
TP - Trim point lines for NCBI GRC pipeline with instructions used to generate a Accessioned Golden Path (AGP) file
=head1 SYNOPSIS
my $variable = TPLine->new();
=head1 DESCRIPTION
This class stores information for transitions between TPF components including chromosome, accessions and coordinates for generating a trim point (TP) file. The trim point file specifies the extent of a component in the AGP file.
=head2 Methods
=over
=cut

=item C<set_chromosome ( $chromosome )>
Sets the chromosome. Valid values for Solanum lycopersicum are 1-12 and Un (required).
=item C<get_chromosome >
Gets the chromosome.
=cut

subtype 'TPChromosome',
as 'Str',
where { ( $_ >= 1 && $_ <=12 ) || ( $_ eq "Un" )},#does NOT work. need to do -> if int check 1-12, if str check Un
message { "The string, $_, was not a valid chromosome number. Valid values for Solanum lycopersicum are 1-12 and Un." };
has 'chromosome' => ( isa => 'TPChromosome', is => 'rw', required => 1, clearer => 'clear_chromosome' );

has 'accession' => ( isa => 'Str', is => 'rw', required => 1, clearer => 'clear_accession' );

subtype 'PositiveInt',
as 'Int',
where { $_ > 0 },
message { "The string, $_, was not a positive coordinate" };
has 'accession_prefix_first_or_last_base' => ( isa => 'PositiveInt', is => 'rw', required => 1, clearer => 'clear_accession_prefix_first_or_last_base' );

subtype 'TPComment',
as 'Str',
where { (scalar $_) >= 25 },#does not work!!
message { "The string, $_, was shorter than the minimum length of 25 characters." };
has 'comment' => ( isa => 'TPComment', is => 'rw', required => 1, clearer => 'clear_comment' );

###
1; #do not remove
###

=pod
=back
=head1 LICENSE
Same as Perl.
=head1 AUTHORS
Surya Saha <[email protected]>
=cut

0 comments on commit 0db9c00

Please sign in to comment.