Skip to content

Commit

Permalink
Merge pull request #35 from MFlores2021/vdw0.96
Browse files Browse the repository at this point in the history
merge git with vdw0.96
  • Loading branch information
MFlores2021 authored Jun 17, 2023
2 parents 5d477c4 + 51326ee commit 846933e
Show file tree
Hide file tree
Showing 5 changed files with 158 additions and 35 deletions.
100 changes: 100 additions & 0 deletions VD/bin/Util.pm
Original file line number Diff line number Diff line change
Expand Up @@ -1931,6 +1931,106 @@ $align
return $out_table;
}

=head2
plot_result_tab -- plot the result in tab format
=cut
sub plot_result_tab
{
my ($known_identified, $known_contig_blast_table, $output, $type, $mode, $sample) = @_;

# parse blast result to get contig_length and match length
# push contig and match info to hash
# key 1: contig # reference virus ID
# key 2: length
# match
# iden
my %contig_match;
chomp($known_contig_blast_table);
my @bs = split(/\n/, $known_contig_blast_table);
foreach my $bs (@bs) {
my @line = split(/\t/, $bs);
if ($line[13] =~ m/(\d+)\/(\d+)\(\d+%\)/) {
$contig_match{$line[0]."#".$line[2]}{'match'} = $1;
$contig_match{$line[0]."#".$line[2]}{'length'} = $2;
$contig_match{$line[0]."#".$line[2]}{'iden'} = $1/$2;
}
else {
die "[ERR]CONTIG $bs\n";
}
}

my $OUTPUT_DIR = "$output/$type"."_references";
mkdir $OUTPUT_DIR unless -e $OUTPUT_DIR;
my $output_file = "$output/$type.tab";

my %all_hits;
my %out;
my %index;

my $out_table = '';
my $line_number=0;

# save contig and virus to hash
# key: contig ID
# value: virus accession ID
my %contig_virus;
chomp($known_identified);
my @a = split(/\n/, $known_identified);
foreach my $line (@a)
{
next if $line =~ m/^#/;
$line_number++;

my @ta = split(/\t/, $line);
# 0 [col1] - virus seq ID
# 1 [col2] - virus seq length
# 2 [col3] - virus seq cover length
# 3 [col4] - coverage (cover_length/seq_length: col3/col2)
# 4 [col5] - contigs
# 5 [col6] - contig number
# 6 [col7] - raw depth (read depth, from pileup file)
# 7 [col8] - normalized depth
# 8 [col9] - genus
# 9 [col10]- desc

die "[ERR]Num of cols: $line\n" unless scalar @ta == 10;
my ($ref, $len, $cov_base, $coverage, $contig, $contig_num, $raw_depth, $norm_depth, $genus, $desc) = @ta;

my ($match, $len_c, $max_iden, $min_iden) = (0, 0, 0, 100);
my @c = split(/,/, $ta[4]);
foreach my $cid (@c) {
my $key = $cid."#".$ref;
$match += $contig_match{$key}{'match'};
$len_c += $contig_match{$key}{'length'};
my $iden_c = $contig_match{$key}{'iden'};
$max_iden = $iden_c if $iden_c > $max_iden;
$min_iden = $iden_c if $iden_c < $min_iden;
}
my $iden = sprintf("%.2f", $match/$len_c*100);
$max_iden = sprintf("%.2f", $max_iden*100);
$min_iden = sprintf("%.2f", $min_iden*100);
$iden = 100 if $iden == 100;
$max_iden = 100 if $max_iden == 100;
$min_iden = 100 if $min_iden == 100;

$all_hits{$ref} = 1;
$index{$ref} = $line_number;

my $link = $type."_references/".$ref.".html";
$coverage = sprintf("%.3f", $coverage) * 100;
$raw_depth = 'NA' if $mode == 1;
$norm_depth = 'NA' if $mode == 1;
$raw_depth = sprintf("%.1f", $raw_depth) if $mode == 2;
$norm_depth = sprintf("%.1f", $norm_depth) if $mode == 2;

$out_table .= $sample . "\t". $type . "\t" . $ref ."\t" . $len ."\t" . $cov_base . "\t" . $coverage ."\t" . $contig_num ."\t" . $raw_depth ."\t" . $norm_depth ."\t" . $iden ."\t" . $max_iden ."\t" . $min_iden ."\t" . $genus ."\t" . $desc ."\n";
}

$out_table = "Sample\tBlast\tReference\t Length \t Coverage \t Coverage(%) \t contig \t Depth \t Depth (Norm) \t Identity \t Iden Max \t Iden Min \t Genus \t Description \n" . $out_table;

# output table to file
save_file($out_table, $output_file);
}

=head2
srna_range -- only get selected size of sRNA reads for next analysis
Expand Down
1 change: 1 addition & 0 deletions VD/bin/get_version.pl
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,4 @@
my $folder = max(@versions);

print "v" . $folder;

8 changes: 5 additions & 3 deletions VD/bin/virus_identify.pl
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,8 @@
my $known_num = 0; ($known_num, $known_identified) = arrange_col2($known_identified);

if ( length($known_identified) > 1 ) {
Util::plot_result($known_identified, $known_contig_blast_table, $sample_dir, 'blastn', $mode);
Util::plot_result($known_identified, $known_contig_blast_table, $sample_dir, 'blastn', $mode);
Util::plot_result_tab($known_identified, $known_contig_blast_table, $sample_dir, 'blastn', $mode, $sample);
} else {
unlink "$sample_dir/$sample_base.blastn.xls" if -e "$sample_dir/$sample_base.blastn.xls";
}
Expand Down Expand Up @@ -401,7 +402,8 @@
my $novel_num = 0; ($novel_num, $novel_identified) = arrange_col2($novel_identified);

if ( length($novel_identified) > 1 ) {
Util::plot_result($novel_identified, $novel_contig_blast_table, $sample_dir, 'blastx', $mode);
Util::plot_result($novel_identified, $novel_contig_blast_table, $sample_dir, 'blastx', $mode);
Util::plot_result_tab($novel_identified, $novel_contig_blast_table, $sample_dir, 'blastx', $mode);
} else {
unlink "$sample_dir/$sample_base.blastx.xls" if -e "$sample_dir/$sample_base.blastx.xls";
}
Expand Down Expand Up @@ -510,7 +512,7 @@
}
}

if ($select_ctg_num > 0 && $mode == 2 && -e $sample.pileup) {
if ($select_ctg_num > 0 && $mode == 2 ) {
# plot the select ctg
Util::plot_select(\%select_ctg_for_sRNA_len_check, \%select_label, \%ctg_norm_depth, \%contig_best_blast, \%best_virus_info, $map_sRNA_len_stat, $sample_dir, 'undetermined');
# report to screen
Expand Down
28 changes: 1 addition & 27 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,27 +1 @@
{
"name": "VDW",
"description": "virus detect GUI",
"main": "index.html",
"window": {
"height": 500,
"width": 800,
"icon": "icon.png"
},
"css": "css/style.css",
"js": "js/menu.js",
"bin": "./bin/npm-cli.js",
"chromium-args": "-ignore-certificate-errors",
"webview": {
"partitions": [
{
"name": "trusted",
"accessible_resources": [
"<all_urls>"
]
}
]
},
"devDependencies": {
"nwjs-builder-phoenix": "^1.15.0"
}
}
{"name":"VDW","description":"virus detect GUI","main":"index.html","window":{"height":500,"width":800,"icon":"icon.png"},"css":"css/style.css","js":"js/menu.js","bin":"./bin/npm-cli.js","chromium-args":"-ignore-certificate-errors","webview":{"partitions":[{"name":"trusted","accessible_resources":["<all_urls>"]}]}}
56 changes: 51 additions & 5 deletions perl/analysis.pl
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@
### Save options in a file.
open my $writef, '>>', catfile($dir,"running_options.txt") or warn "couldn't open: $!";
my $datestring = localtime();
my $logoptions = "Options for running VDW (v0.93):\n" .
my $logoptions = "Options for running VDW (v0.95):\n" .
"========================\n" .
"Results are in folder: " . $dir . "\n" .
"Files: ". join(",",@files) . "\n" .
Expand Down Expand Up @@ -309,14 +309,21 @@
push @array_files , $file;
}

# Write file
## Following the pipeline after finishing the samples loop.

# Write log file
my $datestringend = localtime();
print $writef "End: $datestringend \n" ;

# Get spiking
if($spike ne 'NA'){
merge_spike_files($dir);
}
# Get blast
my $fileBlast;
if($fileBlast ne 'NA'){
# merge_blast($dir);
}

# Print
print_summary($dir,"report_sRNA_trim.txt","control.tsv","spikeSummary.txt", "sRNA_length.txt",$spike,$controlfile, @array_files);
Expand All @@ -325,6 +332,7 @@
graph_spiking_sum($dir,$spike);
}

#Get control_cutoff
my $control_cutoff;
my $av;
my $sd;
Expand All @@ -349,6 +357,7 @@

closedir(DIR);

### End of analysis

sub format_spike {
my %count;
Expand Down Expand Up @@ -473,10 +482,36 @@ sub print_summary {
}
}

#Get blast results
my %dataFileBlast;
my $fileBlast;

if (-e -s $fileBlast){

open FILE5, "$fileBlast" or warn;

while (my $line1=<FILE5>) {
# chomp;
my @field = split /\t/, $line1;
my $sample = trim($field[0]);
$dataFileBlast{$sample}{'reference'} = trim($field[3]);
$dataFileBlast{$sample}{'description'} = trim($field[13]);
$dataFileBlast{$sample}{'genus'} = trim($field[12]);
$dataFileBlast{$sample}{'type'} = trim($field[2]);
$dataFileBlast{$sample}{'lenCoverage'} = trim($field[5]);
$dataFileBlast{$sample}{'perCoverage'} = trim($field[6]);
$dataFileBlast{$sample}{'depth'} = trim($field[8]);
$dataFileBlast{$sample}{'depthNor'} = trim($field[9]);
$dataFileBlast{$sample}{'iden'} = trim($field[10]);
$dataFileBlast{$sample}{'idenMax'} = trim($field[11]);
$dataFileBlast{$sample}{'idenMin'} = trim($field[12]);
}
}

### Run summary
## Header
open my $fh1, '>', catfile($dir,"Summary.tsv") or warn "couldn't open: $!";
my $out = "File\t#Raw reads\t#clean reads\t21\t22\t23\t24\t";
my $out = "File\t#Raw reads\t#clean reads\t21\t22\t23\t24\tSum(21-24)\tSum(21-24)/clean\t";
foreach my $spk (@spikes) {
$out = $out . "Norm. Spike: ". $spk. "\t"."# Spikes: ". $spk. "\t";
}
Expand Down Expand Up @@ -505,19 +540,26 @@ sub print_summary {
$out = $out . "NA\tNA\t";
}

#21-24
#21-24 and sum + sum/clean
my $sumReads = 0;
foreach (21..24){
if($dataFile3{$file}{$_}){
$out = $out . $dataFile3{$file}{$_}."\t";
$sumReads += $dataFile3{$file}{$_};
} else{
$out = $out . "NA\t";
}
}
$out = $out . $sumReads . "\t";
if ($clean && $clean > 0) { $out = $out . $sumReads/$clean . "\t"; }
else { $out = $out . "NA\t"; }

#spikes
foreach my $spk (@spikes) {
if($dataFile2{$file}{$spk}){
if($dataFile2{$file}{$spk} && $clean && $clean != 0){
$out = $out . ($dataFile2{$file}{$spk}/$clean*1000000) . "\t" . ($dataFile2{$file}{$spk}) ."\t";
} elsif ($dataFile2{$file}{$spk}){
$out = $out . "NA\t" . ($dataFile2{$file}{$spk}) ."\t";
} else {
$out = $out . "NA\tNA\t";
}
Expand Down Expand Up @@ -602,3 +644,7 @@ sub get_control_cutoff{
my $cutoff = $av + ($std * $const);
return ($cutoff,$av,$std);
}

sub merge_blast {

}

0 comments on commit 846933e

Please sign in to comment.