-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathAllUpdateCmts.perl
executable file
·114 lines (97 loc) · 2.82 KB
/
AllUpdateCmts.perl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#!/usr/bin/perl
use lib ("$ENV{HOME}/lookup", "$ENV{HOME}/lib64/perl5", "/home/audris/lib64/perl5","$ENV{HOME}/lib/perl5", "$ENV{HOME}/lib/x86_64-linux-gnu/perl", "$ENV{HOME}/share/perl5");
use strict;
use warnings;
use Error qw(:try);
use TokyoCabinet;
sub toHex {
return unpack "H*", $_[0];
}
sub fromHex {
return pack "H*", $_[0];
}
my $debug = 0;
my $sections = 128;
my $parts = 2;
my $fbase="All.sha1/sha1.commit_";
my $fbasei ="/data/All.blobs/commit_";
my (%size, %cnt, %fhob, %fhoi, %fhov, %fhos);
for my $sec (0 .. ($sections-1)){
my $off = 0;
my $n = 0;
if ( -f "$fbasei$sec.idx"){
open A, "tail -1 $fbasei$sec.idx|" or die ($!);
my $str = <A>;
close A;
if (defined $str){
chop ($str);
my ($nn, $of, $len, @rest) = split (/\;/, $str, -1);
if (defined $len){
$off = $of + $len;
$n = $nn + 1;
}else{
die "bad format in $fbasei$sec.idx\n";
}
}else{
die "empty $fbasei$sec.idx\n";
}
}else{
$off = 0;
$n = 0;
}
$size{$sec} = $off;
$cnt{$sec} = $n;
my $pre = "/fast/";
$pre = "/fast" if $sec % $parts;
tie %{$fhos{$sec}}, "TokyoCabinet::HDB", "$pre/${fbase}$sec.tch", TokyoCabinet::HDB::OWRITER |
TokyoCabinet::HDB::OCREAT, 16777213, -1, -1, TokyoCabinet::TDB::TLARGE, 100000
or die "cant open $pre/$fbase$sec.tch\n";
open $fhoi{$sec}, ">>$fbasei$sec.idx" or die ($!);
open $fhob{$sec}, ">>$fbasei$sec.bin" or die ($!);
open $fhov{$sec}, ">>$fbasei$sec.vs" or die ($!);
}
while (<STDIN>){
chop();
$_ =~ s/\.bin$//;
my $readFileBase = $_;
my %id2n = ();
open IDXR, "$readFileBase.idx" or die ($!);
my $base = $readFileBase;
$base =~ s|^.*/||;
#$base =~ s|\..*$||;
$base =~ s|\.[^\.]*$||;$base =~ s|\.[^\.]*$||;#for, e.g., sources.git.github.com.2.18.blob.bin
open my $fh, '<', "$readFileBase.bin" or die ($!);
while(<IDXR>){
chop();
my ($offset, $siz, $sec, $hsha1Full, @p) = split(/\;/, $_, -1);
next if (!defined $offset);
my $path = join ';', @p;
my $codeC = "";
seek ($fh, $offset, 0);
my $rl = read($fh, $codeC, $siz);
if ($siz == 0){
print STDERR "zero length for: $offset\;$siz\;@p\;$readFileBase\n";
next;
}
my $sha1Full = fromHex ($hsha1Full);
my $id = $size{$sec};
my $n = $cnt{$sec};
my $fb = $fhob{$sec};
my $fi = $fhoi{$sec};
my $fv = $fhov{$sec};
if (defined $fhos{$sec}{$sha1Full}){
my $nn = unpack "w", $fhos{$sec}{$sha1Full};
print $fv "$nn:$sec;$siz;$sec;$hsha1Full;$base;$path\n";
}else{
$fhos{$sec}{$sha1Full} = pack "w", $n;
print $fv "$n:$sec;$siz;$sec;$hsha1Full;$base;$path\n";
print $fi "$n;$id;$siz;$hsha1Full\n";
print $fb "$codeC";
$size{$sec} += $siz;
$cnt{$sec} ++;
}
}
}
for my $sec (0 .. ($sections-1)){
untie %{$fhos{$sec}};
}