diff --git a/lib/Bio/GenomeUpdate/TP.pm b/lib/Bio/GenomeUpdate/TP.pm index e69de29..afed7ba 100644 --- a/lib/Bio/GenomeUpdate/TP.pm +++ b/lib/Bio/GenomeUpdate/TP.pm @@ -0,0 +1,181 @@ +package Bio::GenomeUpdate::TP; +use strict; +use warnings; + +use Moose; +use MooseX::FollowPBP; +use Moose::Util::TypeConstraints; +use Bio::GenomeUpdate::TP::TPLine; + +=head1 NAME + + SP - Trim point information for NCBI GRC pipeline with instructions used to generate a Accessioned Golden Path (AGP) file + +=head1 SYNOPSIS + + my $variable = SwitchPoint->new(); + +=head1 DESCRIPTION + +This class stores information for transitions between TPF components including TPF type, taxonomy and assembly and generates a trim point (TP) file. The trim point file specifies the extent of a component to be used in the AGP file. + +=head2 Methods + +=over + +=item C + +Sets the taxonomy identifier for the SP file, e.g. 4081 for Solanum lycopersicum. + +=item C + +Gets the taxonomy identifier for the SP file. + +=cut + +has 'tp_taxid' => ( + isa => 'Str', + is => 'rw', + default => '4081', + required => 1, + clearer => 'clear_tp_taxid' +); + +=item C + +Sets the assembly_group (required). + +=item C + +Gets the assembly_group. + +=cut + +has 'assembly_group' => + ( isa => 'Str', is => 'rw', default => 'TGP', required => 1, clearer => 'clear_assembly_group' ); + +=item C + +Sets the assembly_unit (required). + +=item C + +Gets the assembly_unit. + +=cut + +has 'assembly_unit' => + ( isa => 'Str', is => 'rw', default => 'Primary', required => 1, clearer => 'clear_assembly_unit' ); + +=item C + +Sets the TPF type. Valid values are chromosome and contig (latter used for unlocalized or unplaced scaffolds, required). + +=item C + +Gets the TPF type. + +=cut + +subtype 'TPTPFType', as 'Str', where { $_ eq "chromosome" || $_ eq "contig" }, + message { "The string, $_, was not a valid TPF type. See http://www.ncbi.nlm.nih.gov/projects/genome/assembly/grc/overlap/ specification link" }; + +has 'tp_tpf_type' => ( isa => 'TPTPFType', is => 'rw', default => 'chromosome', required => 1, clearer => 'clear_chromosome' ); + +subtype 'TPLine', + as 'Bio::GenomeUpdate::TP::TPLine', + message { "The object was not a TP line" }; + +has 'tp_lines' => ( + isa => 'HashRef[TPLine]', + is => 'rw', + predicate => 'has_tp_lines', + clearer => 'clear_tp_lines' +); + +=item C + +Add a TPLine object. + +=cut + +sub add_line_to_end { + my $self = shift; + my $line_to_add = shift; + my %lines; + if ( $self->has_tp_lines() ) { + %lines = %{ $self->get_tp_lines() }; + } + my $last_line = $self->get_number_of_lines(); + $lines{ $last_line + 1 } = $line_to_add;#key is just the index or line number + $self->set_tp_lines( {%lines} ); +} + +=item C + +Return number of lines in the TPLine object. + +=cut + +sub get_number_of_lines { + my $self = shift; + my %lines; + if ( $self->has_tp_lines() ) { + %lines = %{ $self->get_tp_lines() }; + my @sorted_line_numbers = sort { $a <=> $b } keys %lines; + return $sorted_line_numbers[-1]; + } + else { + return 0; + } +} + +=item C + +Return string with all lines in the TPLine object. + +=cut + +sub get_formatted_tp { + my $self = shift; + my %lines; + my $out_str; + + if ( $self->has_tp_lines() ) { + %lines = %{ $self->get_tp_lines() }; + my @sorted_line_numbers = sort { $a <=> $b } keys %lines; + foreach my $line_key (@sorted_line_numbers) { + $out_str .= $self->get_tp_taxid() . "\t"; + $out_str .= $self->get_assembly_group() . "\t"; + $out_str .= $self->get_assembly_unit() . "\t"; + $out_str .= $lines{$line_key}->get_chromosome() . "\t"; + $out_str .= $self->get_tp_tpf_type() . "\t"; + $out_str .= $lines{$line_key}->get_accession_prefix() . "\t"; + $out_str .= $lines{$line_key}->get_accession_suffix() . "\t"; + $out_str .= $lines{$line_key}->get_accession_prefix_orientation() . "\t"; + $out_str .= $lines{$line_key}->get_accession_suffix_orientation() . "\t"; + $out_str .= $lines{$line_key}->get_accession_prefix_last_base() . "\t"; + $out_str .= $lines{$line_key}->get_accession_suffix_first_base() . "\t"; + $out_str .= $lines{$line_key}->get_comment() . "\n"; + } + } + return $out_str; +} + +### +1; #do not remove +### + +=pod + +=back + +=head1 LICENSE + +Same as Perl. + +=head1 AUTHORS + + Surya Saha + +=cut diff --git a/lib/Bio/GenomeUpdate/TP/TPLine.pm b/lib/Bio/GenomeUpdate/TP/TPLine.pm index e69de29..61f7781 100644 --- a/lib/Bio/GenomeUpdate/TP/TPLine.pm +++ b/lib/Bio/GenomeUpdate/TP/TPLine.pm @@ -0,0 +1,75 @@ +package Bio::GenomeUpdate::TP::TPLine; +use strict; +use warnings; + +use Moose; +use MooseX::FollowPBP; +use Moose::Util::TypeConstraints; + +use Data::Dumper;#for debugging + +=head1 NAME + + TP - Trim point lines for NCBI GRC pipeline with instructions used to generate a Accessioned Golden Path (AGP) file + +=head1 SYNOPSIS + + my $variable = TPLine->new(); + +=head1 DESCRIPTION + +This class stores information for transitions between TPF components including chromosome, accessions and coordinates for generating a trim point (TP) file. The trim point file specifies the extent of a component in the AGP file. + +=head2 Methods + +=over + +=cut + +=item C + +Sets the chromosome. Valid values for Solanum lycopersicum are 1-12 and Un (required). + +=item C + +Gets the chromosome. + +=cut + +subtype 'TPChromosome', + as 'Str', + where { ( $_ >= 1 && $_ <=12 ) || ( $_ eq "Un" )},#does NOT work. need to do -> if int check 1-12, if str check Un + message { "The string, $_, was not a valid chromosome number. Valid values for Solanum lycopersicum are 1-12 and Un." }; +has 'chromosome' => ( isa => 'TPChromosome', is => 'rw', required => 1, clearer => 'clear_chromosome' ); + +has 'accession' => ( isa => 'Str', is => 'rw', required => 1, clearer => 'clear_accession' ); + +subtype 'PositiveInt', + as 'Int', + where { $_ > 0 }, + message { "The string, $_, was not a positive coordinate" }; +has 'accession_prefix_first_or_last_base' => ( isa => 'PositiveInt', is => 'rw', required => 1, clearer => 'clear_accession_prefix_first_or_last_base' ); + +subtype 'TPComment', + as 'Str', + where { (scalar $_) >= 25 },#does not work!! + message { "The string, $_, was shorter than the minimum length of 25 characters." }; +has 'comment' => ( isa => 'TPComment', is => 'rw', required => 1, clearer => 'clear_comment' ); + +### +1; #do not remove +### + +=pod + +=back + +=head1 LICENSE + +Same as Perl. + +=head1 AUTHORS + + Surya Saha + +=cut \ No newline at end of file