-
Notifications
You must be signed in to change notification settings - Fork 3
/
tmp.py
66 lines (52 loc) · 1.73 KB
/
tmp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import re
import sys
import tokenize
import typing
from pathlib import Path
REX_WORD = re.compile(r'[A-Za-z]+')
# https://stackoverflow.com/a/1176023/8704691
REX1 = re.compile(r'(.)([A-Z][a-z]+)')
REX2 = re.compile(r'([a-z0-9])([A-Z])')
def get_words(text: str) -> typing.Set[str]:
text = REX1.sub(r'\1 \2', text)
text = REX2.sub(r'\1 \2', text).lower()
text = text.replace('_', ' ')
return set(text.split())
def get_redundant_comments(tokens: typing.Iterable[tokenize.TokenInfo]):
comment = None
code_words: typing.Set[str] = set()
for token in tokens:
if token.type == tokenize.NL:
continue
if token.type == tokenize.COMMENT:
if comment is None:
comment = token
else:
comment = None
continue
if comment is None:
continue
if token.start[0] == comment.start[0] + 1:
code_words.update(get_words(token.string))
continue
comment_words = set(REX_WORD.findall(comment.string))
if comment_words and not comment_words - code_words:
yield comment
code_words = set()
comment = None
def process_file(path: Path):
with path.open('rb') as stream:
tokens = tokenize.tokenize(stream.readline)
for comment in get_redundant_comments(tokens):
print(f'{path}:{comment.start[0]} {comment.string.strip()}')
def get_paths(path: Path):
if path.is_dir():
for p in path.iterdir():
yield from get_paths(p)
elif path.suffix == '.py':
yield path
if __name__ == '__main__':
for path in sys.argv[1:]:
for p in get_paths(Path(path)):
# process file
process_file(p)