Skip to content

Commit

Permalink
Merge pull request #4764 from solgenomics/topic/update_load_genotypin…
Browse files Browse the repository at this point in the history
…g_plates_script

update bin/load_genotyping_plates.pl script
  • Loading branch information
isaak authored Dec 22, 2023
2 parents b1613d0 + be96355 commit 48b081e
Showing 1 changed file with 54 additions and 70 deletions.
124 changes: 54 additions & 70 deletions bin/load_genotyping_plates.pl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
=head1 SYNOPSIS
load_genotyping_plates.pl -H [dbhost] -D [dbname] -i inFile -b [breeding program name] -u [username] -l location [-t]
load_genotyping_plates.pl -H [dbhost] -D [dbname] -i inFile -b [breeding program name] -u [username] -g genotyping_project -l location [-t]
=head1 COMMAND-LINE OPTIONS
Expand Down Expand Up @@ -43,43 +43,17 @@ =head1 COMMAND-LINE OPTIONS
year
=item -g
genotyping project name (the genotyping project to which this plate is associated)
=back
=head2 DESCRIPTION
Load genotyping plate layouts for many plates
Minimal metadata requirements are
=over 3
=item
trial_name
=item
trial_description (can also be built from the trial name, type, year, location)
=item
trial_type (read from an input file)
=item
trial_location geo_description ( must be in the database - nd_geolocation.description - can be read from metadata file)
=item
year (can be read from the metadata file )
=item
breeding_program (provide with option -b )
=back
The infile is an Excel file (.xls format) with the following columns:
The infile is a tab delimited file with the following columns:
=over 3
Expand Down Expand Up @@ -137,12 +111,13 @@ =head2 AUTHORS

use CXGN::Trial::TrialCreate;

my ( $help, $dbhost, $dbname, $infile, $sites, $types, $test, $username, $breeding_program_name, $metadata_file, $location, $year, $format );
my ( $help, $dbhost, $dbname, $infile, $sites, $types, $test, $username, $breeding_program_name, $genotyping_project, $location, $year, $format );
GetOptions(
'i=s' => \$infile,
'b=s' => \$breeding_program_name,
'l=s' => \$location,
'y=s' => \$year,
'g=s' => \$genotyping_project,
't' => \$test,
'f=s' => \$format,
'user|u=s' => \$username,
Expand Down Expand Up @@ -188,8 +163,20 @@ =head2 AUTHORS
if (!$breeding_program) { die "Breeding program $breeding_program_name does not exist in the database. Check your input \n"; }
print "Found breeding program $breeding_program_name " . $breeding_program->project_id . "\n";

# check if genotyping project exists
#
my $genotyping_project_row = $schema->resultset("Project::Project")->find(
{
'name' => $genotyping_project,
# 'type.name' => 'genotyping_project_name',
} );

if (! $genotyping_project_row) { die "Please enter a valid genotyping project. You may have to create it before running this script."; }

my $genotyping_project_id = $genotyping_project_row->project_id();

if (!$format) {
die "Please specify format (-f) as CIP. No other format is supported right now\n";
$format = "standard";
}

my $sp_person_id= CXGN::People::Person->get_person_by_username($dbh, $username);
Expand Down Expand Up @@ -220,22 +207,17 @@ =head2 AUTHORS
my %phen_params = map { if ($_ =~ m/^\w+\|(\w+:\d{7})$/ ) { $_ => $1 } } @trial_columns ;
delete $phen_params{''};

my @traits = (keys %phen_params) ;
print "Found traits " . Dumper(\%phen_params) . "\n" ;
#foreach my $trait_string ( keys %phen_params ) {
# my ($trait_name, $trait_accession) = split "|", $col_header ;
# my ($db_name, $dbxref_accession) = split ":" , $trait_accession ;
#}


my %trial_design_hash; #multi-level hash of hashes of hashrefs
my %phen_data_by_trial; #

#plot_name accession_name plot_number block_number trial_name trial_description trial_location year trial_type is_a_control rep_number range_number row_number col_number
# CIP format:
# Item Plate ID Intertek plate/well ID CIP Number Breeder ID

# standard format:
# Item Plate ID Intertek plate/well ID accession name Breeder ID



# CIP format:
## Item Plate ID Intertek plate/well ID CIP Number Breeder ID

my $operator;

Expand Down Expand Up @@ -280,37 +262,36 @@ =head2 AUTHORS
$trial_design_hash{$trial_name}{$plot_number}->{row_number} = $row_number;
$trial_design_hash{$trial_name}{$plot_number}->{col_number} = $col_number;
}
else {
$accession = $spreadsheet->value_at($plot_name, "accession_name");
$plot_number = $spreadsheet->value_at($plot_name, "plot_number");
$block_number = $spreadsheet->value_at($plot_name, "block_number");
$trial_name = $spreadsheet->value_at($plot_name, "trial_name");
$is_a_control = $spreadsheet->value_at($plot_name, "is_a_control");
$rep_number = $spreadsheet->value_at($plot_name, "rep_number");
$range_number = $spreadsheet->value_at($plot_name, "range_number");
$row_number = $spreadsheet->value_at($plot_name, "row_number");
$col_number = $spreadsheet->value_at($plot_name, "col_number");

if (!$plot_number) {
$plot_number = 1;
use List::Util qw(max);
my @keys = (keys %{ $trial_design_hash{$trial_name} } );
my $max = max( @keys );
if ( $max ) {
$max++;
$plot_number = $max ;
}
else {

$accession = $spreadsheet->value_at($plot_name, "accession name");
$plot_number = $spreadsheet->value_at($plot_name, "Intertek plate/well ID");
$trial_name = $spreadsheet->value_at($plot_name, "Plate ID");
$operator = $spreadsheet->value_at($plot_name, "Breeder ID");

if (! $accession) {
print STDERR "Ignoring entry for plot_number $plot_number as accession is empty - presumably a check?\n";
next;
} # some plates have empty wells - ignore

if ($plot_number =~ m/^([A-Ha-h])(\d+)$/) {
$row_number = $1;
$col_number = $2;
}

$is_a_control = 0;
if ($accession eq "") {
$is_a_control = 1;
}

if (! $row_number ) { die "Weird well number: $plot_number\n"; }

$trial_design_hash{$trial_name}{$plot_number}->{plot_number} = $plot_number;
$trial_design_hash{$trial_name}{$plot_number}->{stock_name} = $accession;
$trial_design_hash{$trial_name}{$plot_number}->{plot_name} = $plot_name;
$trial_design_hash{$trial_name}{$plot_number}->{block_number} = $block_number;
$trial_design_hash{$trial_name}{$plot_number}->{rep_number} = $rep_number;
$trial_design_hash{$trial_name}{$plot_number}->{is_a_control} = $is_a_control;
$trial_design_hash{$trial_name}{$plot_number}->{range_number} = $range_number;
$trial_design_hash{$trial_name}{$plot_number}->{row_number} = $row_number;
$trial_design_hash{$trial_name}{$plot_number}->{col_number} = $col_number;

}

# Add the plot name into the multi trial data hashref of hashes
Expand All @@ -337,6 +318,9 @@ =head2 AUTHORS
#######

my $coderef= sub {


print STDERR "Working with genotyping project name $genotyping_project\n";
foreach my $trial_name (keys %multi_trial_data ) {

my $trial_create = CXGN::Trial::TrialCreate->new({
Expand All @@ -355,7 +339,7 @@ =head2 AUTHORS
genotyping_user_id => $sp_person_id,
genotyping_plate_format => 96,
genotyping_plate_sample_type => 'accession',

genotyping_project_id => $genotyping_project_id

});
try {
Expand Down

0 comments on commit 48b081e

Please sign in to comment.