-
Notifications
You must be signed in to change notification settings - Fork 23
/
find-duplicates.py
31 lines (26 loc) · 1.08 KB
/
find-duplicates.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import os
import argparse
from collections import defaultdict
empty_checksum = "d41d8cd98f00b204e9800998ecf8427e"
def find_duplicates(directory):
checksum_dict = defaultdict(list)
for foldername in os.listdir(directory):
contract_path = os.path.join(directory, foldername)
if os.path.isdir(contract_path):
checksum_file = os.path.join(contract_path, "naive_checksum.txt")
if os.path.exists(checksum_file):
with open(checksum_file, 'r') as file:
checksum = file.read()
if checksum == empty_checksum:
continue
checksum_dict[checksum].append(foldername)
for checksum, paths in checksum_dict.items():
if len(paths) > 1:
print(f"{checksum}:")
for path in paths:
print(f"- {path}")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Find duplicate checksums.')
parser.add_argument('dir', type=str, help='The directory to process')
args = parser.parse_args()
find_duplicates(args.dir)