diff --git a/VD/bin/Util.pm b/VD/bin/Util.pm index 76f3d58..b895eb8 100644 --- a/VD/bin/Util.pm +++ b/VD/bin/Util.pm @@ -1931,6 +1931,106 @@ $align return $out_table; } +=head2 + plot_result_tab -- plot the result in tab format +=cut +sub plot_result_tab +{ + my ($known_identified, $known_contig_blast_table, $output, $type, $mode, $sample) = @_; + + # parse blast result to get contig_length and match length + # push contig and match info to hash + # key 1: contig # reference virus ID + # key 2: length + # match + # iden + my %contig_match; + chomp($known_contig_blast_table); + my @bs = split(/\n/, $known_contig_blast_table); + foreach my $bs (@bs) { + my @line = split(/\t/, $bs); + if ($line[13] =~ m/(\d+)\/(\d+)\(\d+%\)/) { + $contig_match{$line[0]."#".$line[2]}{'match'} = $1; + $contig_match{$line[0]."#".$line[2]}{'length'} = $2; + $contig_match{$line[0]."#".$line[2]}{'iden'} = $1/$2; + } + else { + die "[ERR]CONTIG $bs\n"; + } + } + + my $OUTPUT_DIR = "$output/$type"."_references"; + mkdir $OUTPUT_DIR unless -e $OUTPUT_DIR; + my $output_file = "$output/$type.tab"; + + my %all_hits; + my %out; + my %index; + + my $out_table = ''; + my $line_number=0; + + # save contig and virus to hash + # key: contig ID + # value: virus accession ID + my %contig_virus; + chomp($known_identified); + my @a = split(/\n/, $known_identified); + foreach my $line (@a) + { + next if $line =~ m/^#/; + $line_number++; + + my @ta = split(/\t/, $line); + # 0 [col1] - virus seq ID + # 1 [col2] - virus seq length + # 2 [col3] - virus seq cover length + # 3 [col4] - coverage (cover_length/seq_length: col3/col2) + # 4 [col5] - contigs + # 5 [col6] - contig number + # 6 [col7] - raw depth (read depth, from pileup file) + # 7 [col8] - normalized depth + # 8 [col9] - genus + # 9 [col10]- desc + + die "[ERR]Num of cols: $line\n" unless scalar @ta == 10; + my ($ref, $len, $cov_base, $coverage, $contig, $contig_num, $raw_depth, $norm_depth, $genus, $desc) = @ta; + + my ($match, $len_c, $max_iden, $min_iden) = (0, 0, 0, 100); + my @c = split(/,/, $ta[4]); + foreach my $cid (@c) { + my $key = $cid."#".$ref; + $match += $contig_match{$key}{'match'}; + $len_c += $contig_match{$key}{'length'}; + my $iden_c = $contig_match{$key}{'iden'}; + $max_iden = $iden_c if $iden_c > $max_iden; + $min_iden = $iden_c if $iden_c < $min_iden; + } + my $iden = sprintf("%.2f", $match/$len_c*100); + $max_iden = sprintf("%.2f", $max_iden*100); + $min_iden = sprintf("%.2f", $min_iden*100); + $iden = 100 if $iden == 100; + $max_iden = 100 if $max_iden == 100; + $min_iden = 100 if $min_iden == 100; + + $all_hits{$ref} = 1; + $index{$ref} = $line_number; + + my $link = $type."_references/".$ref.".html"; + $coverage = sprintf("%.3f", $coverage) * 100; + $raw_depth = 'NA' if $mode == 1; + $norm_depth = 'NA' if $mode == 1; + $raw_depth = sprintf("%.1f", $raw_depth) if $mode == 2; + $norm_depth = sprintf("%.1f", $norm_depth) if $mode == 2; + + $out_table .= $sample . "\t". $type . "\t" . $ref ."\t" . $len ."\t" . $cov_base . "\t" . $coverage ."\t" . $contig_num ."\t" . $raw_depth ."\t" . $norm_depth ."\t" . $iden ."\t" . $max_iden ."\t" . $min_iden ."\t" . $genus ."\t" . $desc ."\n"; + } + + $out_table = "Sample\tBlast\tReference\t Length \t Coverage \t Coverage(%) \t contig \t Depth \t Depth (Norm) \t Identity \t Iden Max \t Iden Min \t Genus \t Description \n" . $out_table; + + # output table to file + save_file($out_table, $output_file); +} =head2 srna_range -- only get selected size of sRNA reads for next analysis diff --git a/VD/bin/get_version.pl b/VD/bin/get_version.pl index e9a88cc..78e8b61 100644 --- a/VD/bin/get_version.pl +++ b/VD/bin/get_version.pl @@ -22,3 +22,4 @@ my $folder = max(@versions); print "v" . $folder; + diff --git a/VD/bin/virus_identify.pl b/VD/bin/virus_identify.pl index 7756df7..7b2a7de 100644 --- a/VD/bin/virus_identify.pl +++ b/VD/bin/virus_identify.pl @@ -283,7 +283,8 @@ my $known_num = 0; ($known_num, $known_identified) = arrange_col2($known_identified); if ( length($known_identified) > 1 ) { - Util::plot_result($known_identified, $known_contig_blast_table, $sample_dir, 'blastn', $mode); + Util::plot_result($known_identified, $known_contig_blast_table, $sample_dir, 'blastn', $mode); + Util::plot_result_tab($known_identified, $known_contig_blast_table, $sample_dir, 'blastn', $mode, $sample); } else { unlink "$sample_dir/$sample_base.blastn.xls" if -e "$sample_dir/$sample_base.blastn.xls"; } @@ -401,7 +402,8 @@ my $novel_num = 0; ($novel_num, $novel_identified) = arrange_col2($novel_identified); if ( length($novel_identified) > 1 ) { - Util::plot_result($novel_identified, $novel_contig_blast_table, $sample_dir, 'blastx', $mode); + Util::plot_result($novel_identified, $novel_contig_blast_table, $sample_dir, 'blastx', $mode); + Util::plot_result_tab($novel_identified, $novel_contig_blast_table, $sample_dir, 'blastx', $mode); } else { unlink "$sample_dir/$sample_base.blastx.xls" if -e "$sample_dir/$sample_base.blastx.xls"; } @@ -510,7 +512,7 @@ } } - if ($select_ctg_num > 0 && $mode == 2 && -e $sample.pileup) { + if ($select_ctg_num > 0 && $mode == 2 ) { # plot the select ctg Util::plot_select(\%select_ctg_for_sRNA_len_check, \%select_label, \%ctg_norm_depth, \%contig_best_blast, \%best_virus_info, $map_sRNA_len_stat, $sample_dir, 'undetermined'); # report to screen diff --git a/package.json b/package.json index f10ef8b..cfa2679 100644 --- a/package.json +++ b/package.json @@ -1,27 +1 @@ -{ - "name": "VDW", - "description": "virus detect GUI", - "main": "index.html", - "window": { - "height": 500, - "width": 800, - "icon": "icon.png" - }, - "css": "css/style.css", - "js": "js/menu.js", - "bin": "./bin/npm-cli.js", - "chromium-args": "-ignore-certificate-errors", - "webview": { - "partitions": [ - { - "name": "trusted", - "accessible_resources": [ - "" - ] - } - ] - }, - "devDependencies": { - "nwjs-builder-phoenix": "^1.15.0" - } -} +{"name":"VDW","description":"virus detect GUI","main":"index.html","window":{"height":500,"width":800,"icon":"icon.png"},"css":"css/style.css","js":"js/menu.js","bin":"./bin/npm-cli.js","chromium-args":"-ignore-certificate-errors","webview":{"partitions":[{"name":"trusted","accessible_resources":[""]}]}} \ No newline at end of file diff --git a/perl/analysis.pl b/perl/analysis.pl index 344c83f..bfdf265 100755 --- a/perl/analysis.pl +++ b/perl/analysis.pl @@ -67,7 +67,7 @@ ### Save options in a file. open my $writef, '>>', catfile($dir,"running_options.txt") or warn "couldn't open: $!"; my $datestring = localtime(); -my $logoptions = "Options for running VDW (v0.93):\n" . +my $logoptions = "Options for running VDW (v0.95):\n" . "========================\n" . "Results are in folder: " . $dir . "\n" . "Files: ". join(",",@files) . "\n" . @@ -309,7 +309,9 @@ push @array_files , $file; } -# Write file +## Following the pipeline after finishing the samples loop. + +# Write log file my $datestringend = localtime(); print $writef "End: $datestringend \n" ; @@ -317,6 +319,11 @@ if($spike ne 'NA'){ merge_spike_files($dir); } +# Get blast +my $fileBlast; +if($fileBlast ne 'NA'){ + # merge_blast($dir); +} # Print print_summary($dir,"report_sRNA_trim.txt","control.tsv","spikeSummary.txt", "sRNA_length.txt",$spike,$controlfile, @array_files); @@ -325,6 +332,7 @@ graph_spiking_sum($dir,$spike); } +#Get control_cutoff my $control_cutoff; my $av; my $sd; @@ -349,6 +357,7 @@ closedir(DIR); +### End of analysis sub format_spike { my %count; @@ -473,10 +482,36 @@ sub print_summary { } } + #Get blast results + my %dataFileBlast; + my $fileBlast; + + if (-e -s $fileBlast){ + + open FILE5, "$fileBlast" or warn; + + while (my $line1=) { + # chomp; + my @field = split /\t/, $line1; + my $sample = trim($field[0]); + $dataFileBlast{$sample}{'reference'} = trim($field[3]); + $dataFileBlast{$sample}{'description'} = trim($field[13]); + $dataFileBlast{$sample}{'genus'} = trim($field[12]); + $dataFileBlast{$sample}{'type'} = trim($field[2]); + $dataFileBlast{$sample}{'lenCoverage'} = trim($field[5]); + $dataFileBlast{$sample}{'perCoverage'} = trim($field[6]); + $dataFileBlast{$sample}{'depth'} = trim($field[8]); + $dataFileBlast{$sample}{'depthNor'} = trim($field[9]); + $dataFileBlast{$sample}{'iden'} = trim($field[10]); + $dataFileBlast{$sample}{'idenMax'} = trim($field[11]); + $dataFileBlast{$sample}{'idenMin'} = trim($field[12]); + } + } + ### Run summary ## Header open my $fh1, '>', catfile($dir,"Summary.tsv") or warn "couldn't open: $!"; - my $out = "File\t#Raw reads\t#clean reads\t21\t22\t23\t24\t"; + my $out = "File\t#Raw reads\t#clean reads\t21\t22\t23\t24\tSum(21-24)\tSum(21-24)/clean\t"; foreach my $spk (@spikes) { $out = $out . "Norm. Spike: ". $spk. "\t"."# Spikes: ". $spk. "\t"; } @@ -505,19 +540,26 @@ sub print_summary { $out = $out . "NA\tNA\t"; } - #21-24 + #21-24 and sum + sum/clean + my $sumReads = 0; foreach (21..24){ if($dataFile3{$file}{$_}){ $out = $out . $dataFile3{$file}{$_}."\t"; + $sumReads += $dataFile3{$file}{$_}; } else{ $out = $out . "NA\t"; } } + $out = $out . $sumReads . "\t"; + if ($clean && $clean > 0) { $out = $out . $sumReads/$clean . "\t"; } + else { $out = $out . "NA\t"; } #spikes foreach my $spk (@spikes) { - if($dataFile2{$file}{$spk}){ + if($dataFile2{$file}{$spk} && $clean && $clean != 0){ $out = $out . ($dataFile2{$file}{$spk}/$clean*1000000) . "\t" . ($dataFile2{$file}{$spk}) ."\t"; + } elsif ($dataFile2{$file}{$spk}){ + $out = $out . "NA\t" . ($dataFile2{$file}{$spk}) ."\t"; } else { $out = $out . "NA\tNA\t"; } @@ -602,3 +644,7 @@ sub get_control_cutoff{ my $cutoff = $av + ($std * $const); return ($cutoff,$av,$std); } + +sub merge_blast { + +} \ No newline at end of file