-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathlaunch_parsers
executable file
·64 lines (47 loc) · 1.52 KB
/
launch_parsers
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/bin/bash
# Import data from raw .results files into certs* tables, where * is the /8
# Usage:
# ./launch_parsers [ --resume ]
# --resume prevents any certs* tables that already exist from being updated/reimported
set -x
export RESULTS_ROOT=/reimport/scan/
if ! [ -d "$RESULTS_ROOT" ] ; then
echo RESULTS_ROOT $RESULTS_ROOT is not a directory
exit 1
fi
# This was sort-of optimised for 4 modern hyperthreaded CPU cores, but
# hackparse keeps getting more computationally expensive. YMMV.
# XXX replace these crude timings with some load monitoring to launch new
# parsing tasks when others are done. Should save a few hours each time!
NUM_THRDS=8
export RESUME=0
if [ $# -gt 1 ] && [ "$1" = --resume ] ; then
export RESUME=1
fi
cd $RESULTS_ROOT
TARGETS=`echo *.x.x.x`
echo $TARGETS
if echo $targets | grep -q \* ; then
echo no targets found
exit 1
fi
cd ~-
TABLES=""
for n in $TARGETS ; do
TNAME=certs`echo $n | sed s/\.x\.x\.x//`
echo $TNAME
if [ -f /tmp/scanner$n.$$.txt.gz ] && ! rm /tmp/scanner$n.$$.txt.gz ; then
echo TRIPPED OVER SOMEONE ELSE\'S TEMP FILES
fi
if [ $RESUME = 1 ] && echo show tables | obsdb | grep -q $TNAME$ ; then
echo ALREADY EXISTS: $TNAME
else
echo IMPORTING $TNAME
python ./hackparse.py --table $TNAME --create $RESULTS_ROOT/$n | gzip -c > /tmp/scanner$n.$$.txt.gz &
fi
TABLES="$TABLES $TNAME"
while [ `ps waux | grep -v grep | grep hackpa | wc -l` -ge $NUM_THRDS ] ; do
sleep 10s
done
done
./once python python ./stitch_tables.py --into valid_certs $TABLES