-
Notifications
You must be signed in to change notification settings - Fork 1
/
normalize.py
65 lines (46 loc) · 1.66 KB
/
normalize.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/usr/bin/env python3
"""C/C++ source code normalization."""
import re
import hashlib
from subprocess import Popen, PIPE
def remove_comments(text):
"""Remove comments from C/C++ code"""
pattern = r"(\".*?\"|\'.*?\')|(/\*.*?\*/|//[^\r\n]*$)"
regex = re.compile(pattern, re.MULTILINE | re.DOTALL)
def replacer(match):
if match.group(2) is not None:
return ""
else:
return match.group(1)
return regex.sub(replacer, text)
def remove_empty_lines(text):
"""Remove duplicate empty lines"""
return re.sub(r"\n\s*\n", "\n", text, re.MULTILINE)
def clang_format(text, filename):
"""Auto-format C/C++ code with clang."""
p = Popen(["clang-format", "--style=llvm",
f"--assume-filename={filename}"], stdin=PIPE, stdout=PIPE)
return p.communicate(input=text.encode("UTF-8"))[0].decode('UTF-8')
def normalized_text(text, filename):
return clang_format(remove_empty_lines(remove_comments(text)),
filename)
def normalized_sha256(text, filename):
ntext = normalized_text(text, filename)
m = hashlib.sha256()
m.update(ntext.encode('UTF-8'))
return m.hexdigest()
def sha256(blob):
m = hashlib.sha256()
m.update(blob)
return m.hexdigest()
if __name__ == "__main__":
import sys
if len(sys.argv) != 2:
print("Usage: normalize.py <filename>")
sys.exit(1)
filename = sys.argv[1]
blob = open(filename, "rb").read()
text = blob.decode('UTF-8')
print("sha256: ", sha256(blob))
print("normalized_sha256:", normalized_sha256(text, filename))
# vim:set expandtab tabstop=4 shiftwidth=4 softtabstop=4 nowrap: