-
Notifications
You must be signed in to change notification settings - Fork 4
/
Prj2CmtChk1.perl
executable file
·94 lines (82 loc) · 1.93 KB
/
Prj2CmtChk1.perl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
use strict;
use warnings;
use Error qw(:try);
use TokyoCabinet;
use Compress::LZF;
sub toHex {
return unpack "H*", $_[0];
}
sub fromHex {
return pack "H*", $_[0];
}
my $lines = 0;
my %p2c0;
procBin ($ARGV[0]);
sub procBin {
my $fname = $_[0];
print "processing $fname\n";
open A, "<$fname";
binmode(A);
until (eof(A))
{
my $buffer;
my $nread0 = read (A, $buffer, 2, 0);
my $lk = unpack 'S', $buffer;
my $nread01 = read (A, $buffer, $lk, 0);
my $prj = $buffer;
my $nread1 = read (A, $buffer, 4, 0);
my $ns = unpack 'L', $buffer;
#print "$nread0;$nread01;$nread1;$lk\;$prj;$ns";
for my $i (0..($ns-1)){
my $nread11 = read (A, $buffer, 20, 0);
$p2c0{$prj}{$buffer}++;
#print ";$nread11:".(toHex($buffer));
}
#print "\n";
$lines ++;
#last if $lines > 2;
print "read $lines\n" if !($lines%10000000);
}
print "read $fname\n";
}
my %p2c;
tie %p2c, "TokyoCabinet::HDB", "/fast1/All.sha1c/project_commit.tch", TokyoCabinet::HDB::OREADER,
16777213, -1, -1, TokyoCabinet::TDB::TLARGE, 100000
or die "cant open project_commit.tch\n";
$lines = 0;
while (my ($p, $v) = each %p2c0){
if (defined $p2c{$p}){
list ($p, $p2c{$p}, $v);
}else{
if (defined $p2c{"github.com_$p"}){
list ($p, $p2c{"github.com_$p"}, $v);
}else{
if (defined $p2c{"gh_$p"}){
list ($p, $p2c{"gh_$p"}, $v);
}else{
print STDERR "$p";
while (my ($c, $v0) = each %{$v}){
print STDERR ";".(toHex($c));
}
print STDERR "\n";
}
}
}
$lines ++;
print "done $lines\n" if !($lines%100000);
}
untie %p2c;
sub list {
my ($p, $v1, $v) = @_;
my $ns = length($v1)/20;
my %tmp = ();
for my $i (0..($ns-1)){
my $c = substr ($v1, 20*$i, 20);
$tmp{$c}++;
}
while (my ($c, $v0) = each %{$v}){
if (!defined $tmp{$c}){
print STDERR "$p\;".(toHex($c))."\n";
}
}
}