forked from chinhungtseng/cs50x2021
-
Notifications
You must be signed in to change notification settings - Fork 0
/
dna.py
90 lines (59 loc) · 1.73 KB
/
dna.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import sys
import csv
# Command line arguments
args = sys.argv
# STR sequences header
STRS_H = []
def load_database():
database = []
with open(args[1], "r") as f:
reader = csv.DictReader(f)
# Update STR sequences header, the first
# element is name, so we drop it out.
global STRS_H
STRS_H = reader.fieldnames[1:]
for row in reader:
for k in STRS_H:
row[k] = int(row[k])
database.append(row)
return database
def load_sequence_data():
with open(args[2], "r") as f:
sequence = f.readline()
return sequence
def str_sequence_counter(sequence):
# Initialize counter all values to zero.
counter = {k: 0 for k in STRS_H}
for k in counter:
n = find_maxnum_key(k, sequence)
counter[k] = n
return counter
def find_maxnum_key(key, text):
count = 0
pattern = key
while pattern in text:
count += 1
pattern += key
return count
def search_database(target, database):
for people in database:
if all([people[k] == target[k] for k in STRS_H]):
return people["name"]
return "No match"
def main():
# Check for correct number of args.
if len(args) != 3:
print("Usage: python dna.py data.csv sequence.txt")
sys.exit(1)
# Read databases data.
database = load_database()
# Read sequences data.
sequence = load_sequence_data()
# Count the number of each STR sequence.
target = str_sequence_counter(sequence)
# Search target in database, if match, return target's name;
# otherwise return "No match".
result = search_database(target, database)
print(result)
if __name__ == "__main__":
main()