-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmodel.py
137 lines (105 loc) · 3.8 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import csv
import pandas as pd
import numpy as np
from collections import defaultdict
import seaborn as sns
import matplotlib.pyplot as plt
df = pd.read_excel('raw_data.xlsx')
data = df.fillna(method='ffill')
data.head()
# Process Disease and Symptom Names
def process_data(data):
data_list = []
data_name = data.replace('^','_').split('_')
n = 1
for names in data_name:
if (n % 2 == 0):
data_list.append(names)
n += 1
return data_list
disease_list = []
disease_symptom_dict = defaultdict(list)
disease_symptom_count = {}
count = 0
for idx, row in data.iterrows():
# Get the Disease Names
if (row['Disease'] !="\xc2\xa0") and (row['Disease'] != ""):
disease = row['Disease']
disease_list = process_data(data=disease)
count = row['Count of Disease Occurrence']
# Get the Symptoms Corresponding to Diseases
if (row['Symptom'] !="\xc2\xa0") and (row['Symptom'] != ""):
symptom = row['Symptom']
symptom_list = process_data(data=symptom)
for d in disease_list:
for s in symptom_list:
disease_symptom_dict[d].append(s)
disease_symptom_count[d] = count
# See that the data is Processed Correctly
disease_symptom_dict
# Count of Disease Occurence w.r.t each Disease
disease_symptom_count
df1 = pd.DataFrame(list(disease_symptom_dict.items()), columns=['Disease','Symptom'])
df1.head()
for vals in disease_symptom_count.items():
print(vals[1])
df1["Number_of_Occurences"]=0
for i in range(len(df1)):
df1["Number_of_Occurences"][i] = disease_symptom_count[df1["Disease"][i]]
#######################################################################################################################
#PREDICTION
prediction = []
input_symptoms = ['hematuria']
for i in range(len(df1)):
flag=1
for j in range(len(input_symptoms)):
symp = input_symptoms[j]
if symp not in df1["Symptom"][i]:
flag = 0
break
if flag==1:
prediction.append(df1["Disease"][i])
#COMMON ELEMENTS
max_n = 0
dis1=""
dis2=""
for i in range(len(df1)):
for j in range(i+1, len(df1)):
list1 = df1["Symptom"][i]
list2 = df1["Symptom"][j]
if len(list(set(list1).intersection(list2))) > max_n:
dis1 = df1["Disease"][i]
dis2 = df1["Disease"][j]
max_n = len(list(set(list1).intersection(list2)))
while(len(prediction) > 3):
symp = ""
for i in range(len(prediction)):
for j in range(len(disease_symptom_dict[prediction[i]])):
if disease_symptom_dict[prediction[i]][j] not in input_symptoms:
symp = disease_symptom_dict[prediction[i]][j]
break
if(symp!=""):
break
############################# ASK USER ABOUT THIS SYMPTOM #########################
############################ 0 for not pressent, 1 for present and 2 for not sure #######
print("")
symptom_present = int(input("IS "+symp+" PRESENT"))
input_symptoms.append(symp)
if symptom_present == 0:
for i in range(len(prediction)):
if symp in disease_symptom_dict[prediction[i]]:
prediction[i] = ""
elif symptom_present == 1:
for i in range(len(prediction)):
if symp not in disease_symptom_dict[prediction[i]]:
prediction[i] = ""
elif symptom_present == 2:
pass
else:
print("provide valid input")
temp = []
for i in range(len(prediction)):
if prediction[i]!="":
temp.append(prediction[i])
prediction = temp
print(prediction)