-
Notifications
You must be signed in to change notification settings - Fork 0
/
GVCF_snp_extract_to_longtbl.pl
99 lines (81 loc) · 2.27 KB
/
GVCF_snp_extract_to_longtbl.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#!/usr/bin/perl
use strict;
#use warnings;
# open a text file with the snp calls
my $input = $ARGV[0];
open (VCF, "<$input") or die$!;
#say POS "Position,Quality";
print "Pos,Ref,Alt,ID,Chain,Mouse_No,DP,Allele_Ratio,SNP_type\n";
my @vcf=<VCF>;
close VCF or die$!;
my @names;
#Parse the list file and take each line as a variable
foreach my $line (@vcf) {
chomp $line;
#ignore information lines at top
#next if /^##/;
#Use header line to store sample names and print them
if ($line =~ m/CHROM/){
my @headers=split("\t",$line);
my $length = @headers;
#print "My vcf has $length header fields\n";
#Create a loop through the line to place each sample name in a array (9 info fields)
my @samples;
foreach my $i (10..$length) {
my $j = $i - 1;
my $k = $i - 9;
my $name = $headers[$j];
#print "Sample $k is $name\n";
#Push the sample ids into the array @names
push(@names,$name);
}
}
#Parse remaining lines of code assign information fields to variables
if ($line =~ m/#/){
}else{
my @fields=split("\t",$line);
my $length2 = @fields;
my $qual = $fields[5];
my $ref = $fields[3];
my $alt = $fields[4];
my $pos = $fields[1];
#print "My data are $pos\t$ref\t$alt\n";
#For each samples, take each field and assign DP, AD and push allele call to array in a hash
#print POS "$pos,$qual\n";
foreach my $k (10..$length2){
my $l = $k - 1;
my $n = $k - 10;
my @stats = split(":",$fields[$l]);
my $AR;
my $type;
my $ID = $names[$n];
my @chain = split("P",$ID);
#Assign a value to the DP and save it in an array called @DPs
my $DP = $stats[2];
my @AD = split(",",$stats[1]);
#Based on Allelic ratio, designate SNPs as Reference (R) Het (H) or Homozygous (Z)
if ($DP < 20 ){
$AR = 0.000;
$type = "-";
}
elsif ($DP >= 20 ){
$AR = ($AD[1]) / $DP;
$AR = sprintf "%.3f", $AR;
if ($AR < 0.1){
$type = "R";
} elsif ($AR > 0.9){
$type = "Z";
}else{
$type = "H";
}
}
print "$pos,$ref,$alt,$ID,$chain[0],$chain[1],$DP,$AR,$type\n";
}
#print "$pos\t", join("\t", @DPs),"\n";
#print "$pos\t", join("\t", @major),"\n";
#print "$pos\t", join("\t", @AR),"\n";
#say MAJ "$pos\t", join("\t", @major);
#say ARAT "$pos,", join(",", @AR);
}
}
close VCF;