-
Notifications
You must be signed in to change notification settings - Fork 2
/
go2
executable file
·156 lines (137 loc) · 6.63 KB
/
go2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
#! /bin/bash
#
# Read files named good_blobs.txt and bad_blobs.txt.
# The file bad_blobs.txt contains a list of blobs that contain a vulnerability.
# The file good_blobs.txt contains a list of blobs that are fixed.
# Find all projects that contain at least one of the good blobs and one of
# the bad blobs. Find and sort the commit dates so that we can see when
# a vulnerability was introduced and when it was fixed.
#
#------------------------------------------------------------------------
# Check the command line args
if [[ $# -ne 2 ]]; then
echo "usage: go2 <output directory> <time>"
echo "example: ./go2 out/CVE-2007-12345" 1242055800
echo ""
echo "Output directory is per-project directory created by go1."
echo "This script is called by go once for each project."
exit 1
fi
dir="$1"
v_unixtime="$2"
error_log=$dir/error_log
outdir="$1/stage2"
if [ ! -d "$dir" ]; then
echo Error: directory $dir does not exits | tee $error_log
exit 1
fi
mkdir "$outdir"
if [ $? -ne 0 ]; then
exit 1
fi
# Make sure good_blobs.txt and bad_blobs.txt exist in the directory
stage1_dir="$dir/stage1"
if [ ! -d "$stage1_dir" ]; then
echo "$stage1_dir not found" | tee $error_log
exit 1
fi
if [[ ! -f $stage1_dir/good_blobs.txt || ! -f $stage1_dir/bad_blobs.txt ]]; then
echo "$stage1_dir (good_blobs.txt or bad_blobs.txt not found)" | tee $error_log
exit 1
fi
# Get the original project where the vulnerablity was fixed
orig_project=`grep ^project= $stage1_dir/info | cut -d= -f2`
if [ "$orig_project" == "" ]; then
echo "could not find project name in $stage1_dir/info" | tee $error_log
exit 1
fi
echo "Original Project = $orig_project"
# Get all project with good (fixed) blobs (_cb file also include commit and
# blob hashes)
cat $stage1_dir/good_blobs.txt | ~/lookup/getValues -f b2c | awk -F\; '{ printf ("%s;%s\n", $2, $1) }' | ~/lookup/getValues -f c2P | sort -u > $outdir/good_projs_cb.txt
cut -d \; -f 3 $outdir/good_projs_cb.txt | sort -u > $outdir/good_projs.txt
# Get all project with bad (vulnerable) blobs (_c file also include commit)
cat $stage1_dir/bad_blobs.txt | ~/lookup/getValues -f b2c | awk -F\; '{ printf ("%s;%s\n", $2, $1) }' | ~/lookup/getValues -f c2P | sort -u > $outdir/bad_projs_cb.txt
cut -d \; -f 3 $outdir/bad_projs_cb.txt | sort -u > $outdir/bad_projs.txt
# comm.txt is all the projects that contain both vulnerable and fixed
# versions of a file.
comm -12 $outdir/good_projs.txt $outdir/bad_projs.txt | grep -v "$orig_project" > $outdir/comm.txt
# only_bad.txt is all the projects which contain vulnerable versions of the
# file but no fixed version.
comm -13 $outdir/good_projs.txt $outdir/bad_projs.txt > $outdir/only_bad.txt
# find out of the most recent version of the blobs is vulnerable or
# unknown (we know it is not fixed, but we are looking at only_bad.txt
touch $dir/results.notfixed.txt
touch $dir/results.unknown.txt
for line in $(cat $outdir/only_bad.txt); do
commit=`grep $line $outdir/bad_projs_cb.txt | head -n 1 | cut -d\; -f 1`
blob=`grep $line $outdir/bad_projs_cb.txt | head -n 1 | cut -d\; -f 2`
pathname=`echo $commit | ~/lookup/showCmtTree.perl | grep ";$blob" | head -n 1 | cut -d\; -f 3`
url=`./toUrl $line`
# get the host platform (hp), like github.com or bitbucket.org
hp=`echo $url | sed -e "s@https://@@" -e "s@\.com.*@@" -e "s@\.org.*@@"`
proj=`echo $line | sed -e "s@_@/@"`
rev=`./get_latest_rev $hp $proj $pathname`
if [ $? -ne 0 ]; then
if [[ "$rev" == *"Not Found"* ]]; then
continue
fi
echo "Error: get_latest_rev failed:"
echo "./get_latest_rev \"$hp\" \"$proj\" \"$pathname\""
echo " $rev"
continue
fi
latest_blob=`echo $rev | cut -d ';' -f 1`
# see if latest_blob is in bad_blobs.txt. if so, proj is still vulnerable
bad=`grep "$latest_blob" $outdir/../stage1/bad_blobs.txt`
#echo "github.com/$proj/blob/HEAD/$pathname"
if [ "$bad" != "" ]; then
# project is still vulnerable
echo $line >> $dir/results.notfixed.txt
else
# file has changed, we don't know if the change fixed it.
echo $line >> $dir/results.unknown.txt
fi
done
## If there are no projects that contain both vulnerable and fixed blobs, quit.
#count=`cat $outdir/comm.txt | wc -l`
#if [ "$count" == "0" ]; then
# echo "There are no clone projects that contain both vulnerable and fixed blobs" | tee $error_log
# exit 1
#fi
# create header line for results.fixed.csv file
# time diff is time from cve publication to first fixed commit
echo "time_diff, project, first good blob commit, first good commit time, first bad blob commit, first bad commit time, cve publication date" > $dir/results.fixed.csv
# For each project that has contained both vulnerable and fixed blobs, collect
# information.
for line in $(cat $outdir/comm.txt); do
if [[ "$line" == *"gitorious"* ]]; then
# c2P does not seem to return the right format for gitorious
continue
fi
grep $line $outdir/good_projs_cb.txt > $outdir/tmpfile
for tmp in $(cat $outdir/tmpfile); do
time=`echo $tmp | ~/lookup/showCnt commit | cut -d \; -f 7 | awk -F\; '{ printf ("%s;%s\n", $1, strftime("%F:%T", $1)) }'`
echo "$time;$tmp" >> $outdir/${line}_good.txt
done
grep $line $outdir/bad_projs_cb.txt > $outdir/tmpfile
for tmp in $(cat $outdir/tmpfile); do
time=`echo $tmp | ~/lookup/showCnt commit | cut -d \; -f 7 | awk -F\; '{ printf ("%s;%s\n", $1, strftime("%F:%T", $1)) }'`
echo "$time;$tmp" >> $outdir/${line}_bad.txt
done
sort -n $outdir/${line}_good.txt > $outdir/${line}_good_sorted.txt
first_good_blob_commit=`head -n 1 $outdir/${line}_good_sorted.txt | cut -d\; -f3`
first_good_blob_commit_time=`head -n 1 $outdir/${line}_good_sorted.txt | cut -d\; -f1`
good_time_formatted=`head -n 1 $outdir/${line}_good_sorted.txt | cut -d\; -f2`
sort -n $outdir/${line}_bad.txt > $outdir/${line}_bad_sorted.txt
first_bad_blob_commit=`head -n 1 $outdir/${line}_bad_sorted.txt | cut -d\; -f3`
first_bad_blob_commit_time=`head -n 1 $outdir/${line}_bad_sorted.txt | cut -d\; -f1`
bad_time_formatted=`head -n 1 $outdir/${line}_bad_sorted.txt | cut -d\; -f2`
# find the number of days between cve publication and fixed commit
diff=$(((first_good_blob_commit_time-v_unixtime)/86400))
# convert to years and days
years=`echo "$diff 365.25" | awk '{printf "%.2f", $1 / $2}' | awk -F'.' '{print $1 " years and " $2 " days"}'`
echo $years, $line, $first_good_blob_commit, $good_time_formatted, $first_bad_blob_commit, $bad_time_formatted, `date -d @$v_unixtime +%Y-%m-%d_%H:%M:%S` >> $dir/results.fixed.csv
done
rm -f $outdir/tmpfile
echo "Success (go2)"