Skip to content

Commit

Permalink
create blast tab file
Browse files Browse the repository at this point in the history
  • Loading branch information
MFlores2021 committed Mar 12, 2023
1 parent f78b31f commit 51326ee
Show file tree
Hide file tree
Showing 3 changed files with 139 additions and 2 deletions.
100 changes: 100 additions & 0 deletions VD/bin/Util.pm
Original file line number Diff line number Diff line change
Expand Up @@ -1931,6 +1931,106 @@ $align
return $out_table;
}

=head2
plot_result_tab -- plot the result in tab format
=cut
sub plot_result_tab
{
my ($known_identified, $known_contig_blast_table, $output, $type, $mode, $sample) = @_;

# parse blast result to get contig_length and match length
# push contig and match info to hash
# key 1: contig # reference virus ID
# key 2: length
# match
# iden
my %contig_match;
chomp($known_contig_blast_table);
my @bs = split(/\n/, $known_contig_blast_table);
foreach my $bs (@bs) {
my @line = split(/\t/, $bs);
if ($line[13] =~ m/(\d+)\/(\d+)\(\d+%\)/) {
$contig_match{$line[0]."#".$line[2]}{'match'} = $1;
$contig_match{$line[0]."#".$line[2]}{'length'} = $2;
$contig_match{$line[0]."#".$line[2]}{'iden'} = $1/$2;
}
else {
die "[ERR]CONTIG $bs\n";
}
}

my $OUTPUT_DIR = "$output/$type"."_references";
mkdir $OUTPUT_DIR unless -e $OUTPUT_DIR;
my $output_file = "$output/$type.tab";

my %all_hits;
my %out;
my %index;

my $out_table = '';
my $line_number=0;

# save contig and virus to hash
# key: contig ID
# value: virus accession ID
my %contig_virus;
chomp($known_identified);
my @a = split(/\n/, $known_identified);
foreach my $line (@a)
{
next if $line =~ m/^#/;
$line_number++;

my @ta = split(/\t/, $line);
# 0 [col1] - virus seq ID
# 1 [col2] - virus seq length
# 2 [col3] - virus seq cover length
# 3 [col4] - coverage (cover_length/seq_length: col3/col2)
# 4 [col5] - contigs
# 5 [col6] - contig number
# 6 [col7] - raw depth (read depth, from pileup file)
# 7 [col8] - normalized depth
# 8 [col9] - genus
# 9 [col10]- desc

die "[ERR]Num of cols: $line\n" unless scalar @ta == 10;
my ($ref, $len, $cov_base, $coverage, $contig, $contig_num, $raw_depth, $norm_depth, $genus, $desc) = @ta;

my ($match, $len_c, $max_iden, $min_iden) = (0, 0, 0, 100);
my @c = split(/,/, $ta[4]);
foreach my $cid (@c) {
my $key = $cid."#".$ref;
$match += $contig_match{$key}{'match'};
$len_c += $contig_match{$key}{'length'};
my $iden_c = $contig_match{$key}{'iden'};
$max_iden = $iden_c if $iden_c > $max_iden;
$min_iden = $iden_c if $iden_c < $min_iden;
}
my $iden = sprintf("%.2f", $match/$len_c*100);
$max_iden = sprintf("%.2f", $max_iden*100);
$min_iden = sprintf("%.2f", $min_iden*100);
$iden = 100 if $iden == 100;
$max_iden = 100 if $max_iden == 100;
$min_iden = 100 if $min_iden == 100;

$all_hits{$ref} = 1;
$index{$ref} = $line_number;

my $link = $type."_references/".$ref.".html";
$coverage = sprintf("%.3f", $coverage) * 100;
$raw_depth = 'NA' if $mode == 1;
$norm_depth = 'NA' if $mode == 1;
$raw_depth = sprintf("%.1f", $raw_depth) if $mode == 2;
$norm_depth = sprintf("%.1f", $norm_depth) if $mode == 2;

$out_table .= $sample . "\t". $type . "\t" . $ref ."\t" . $len ."\t" . $cov_base . "\t" . $coverage ."\t" . $contig_num ."\t" . $raw_depth ."\t" . $norm_depth ."\t" . $iden ."\t" . $max_iden ."\t" . $min_iden ."\t" . $genus ."\t" . $desc ."\n";
}

$out_table = "Sample\tBlast\tReference\t Length \t Coverage \t Coverage(%) \t contig \t Depth \t Depth (Norm) \t Identity \t Iden Max \t Iden Min \t Genus \t Description \n" . $out_table;

# output table to file
save_file($out_table, $output_file);
}

=head2
srna_range -- only get selected size of sRNA reads for next analysis
Expand Down
6 changes: 4 additions & 2 deletions VD/bin/virus_identify.pl
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,8 @@
my $known_num = 0; ($known_num, $known_identified) = arrange_col2($known_identified);

if ( length($known_identified) > 1 ) {
Util::plot_result($known_identified, $known_contig_blast_table, $sample_dir, 'blastn', $mode);
Util::plot_result($known_identified, $known_contig_blast_table, $sample_dir, 'blastn', $mode);
Util::plot_result_tab($known_identified, $known_contig_blast_table, $sample_dir, 'blastn', $mode, $sample);
} else {
unlink "$sample_dir/$sample_base.blastn.xls" if -e "$sample_dir/$sample_base.blastn.xls";
}
Expand Down Expand Up @@ -401,7 +402,8 @@
my $novel_num = 0; ($novel_num, $novel_identified) = arrange_col2($novel_identified);

if ( length($novel_identified) > 1 ) {
Util::plot_result($novel_identified, $novel_contig_blast_table, $sample_dir, 'blastx', $mode);
Util::plot_result($novel_identified, $novel_contig_blast_table, $sample_dir, 'blastx', $mode);
Util::plot_result_tab($novel_identified, $novel_contig_blast_table, $sample_dir, 'blastx', $mode);
} else {
unlink "$sample_dir/$sample_base.blastx.xls" if -e "$sample_dir/$sample_base.blastx.xls";
}
Expand Down
35 changes: 35 additions & 0 deletions perl/analysis.pl
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,11 @@
if($spike ne 'NA'){
merge_spike_files($dir);
}
# Get blast
my $fileBlast;
if($fileBlast ne 'NA'){
# merge_blast($dir);
}

# Print
print_summary($dir,"report_sRNA_trim.txt","control.tsv","spikeSummary.txt", "sRNA_length.txt",$spike,$controlfile, @array_files);
Expand Down Expand Up @@ -477,6 +482,32 @@ sub print_summary {
}
}

#Get blast results
my %dataFileBlast;
my $fileBlast;

if (-e -s $fileBlast){

open FILE5, "$fileBlast" or warn;

while (my $line1=<FILE5>) {
# chomp;
my @field = split /\t/, $line1;
my $sample = trim($field[0]);
$dataFileBlast{$sample}{'reference'} = trim($field[3]);
$dataFileBlast{$sample}{'description'} = trim($field[13]);
$dataFileBlast{$sample}{'genus'} = trim($field[12]);
$dataFileBlast{$sample}{'type'} = trim($field[2]);
$dataFileBlast{$sample}{'lenCoverage'} = trim($field[5]);
$dataFileBlast{$sample}{'perCoverage'} = trim($field[6]);
$dataFileBlast{$sample}{'depth'} = trim($field[8]);
$dataFileBlast{$sample}{'depthNor'} = trim($field[9]);
$dataFileBlast{$sample}{'iden'} = trim($field[10]);
$dataFileBlast{$sample}{'idenMax'} = trim($field[11]);
$dataFileBlast{$sample}{'idenMin'} = trim($field[12]);
}
}

### Run summary
## Header
open my $fh1, '>', catfile($dir,"Summary.tsv") or warn "couldn't open: $!";
Expand Down Expand Up @@ -613,3 +644,7 @@ sub get_control_cutoff{
my $cutoff = $av + ($std * $const);
return ($cutoff,$av,$std);
}

sub merge_blast {

}

0 comments on commit 51326ee

Please sign in to comment.