-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcreate_dataframe.py
50 lines (39 loc) · 1.75 KB
/
create_dataframe.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import pandas as pd
import data_grabber
data = data_grabber.grab_data()
new_data = list()
for item in data:
if item["short_id"] is not None:
if item["short_id"].startswith("A") or item["short_id"].startswith("B"):
new_item = {
"transcription": item["transcription"]["original"],
"short_id": item["short_id"],
"label": item["label"]
}
if "corrected" in item["transcription"]:
new_item["transcription"] = item["transcription"]["corrected"]
if len(item["location"]) > 0:
if "country" in item["location"][0]:
new_item["country_code"] = item["location"][0]["country"]["code"]
new_item["location_name"] = item["location"][0]["inferred"]
else:
new_item["country_code"] = "YY"
new_item["location_name"] = item["location"][0]["transcription"]
else:
new_item["country_code"] = "XX"
new_item["location_name"] = "Unknown"
if len(item["connection"]) > 0:
if "to" in item["connection"][0]:
new_item["company_connection"] = item["connection"][0]["to"][0]["short_id"]
if "what" in item["connection"][0]:
new_item["company_goods"] = item["connection"][0]["what"][0]["short_id"]
new_data.append(new_item)
df = pd.DataFrame.from_records(new_data)
# Count Country codes
print(df["country_code"].value_counts())
# Count Location occurences
print(df["location_name"].value_counts())
# Count Company connections
print(df["company_connection"].value_counts())
# Count Company connections
print(df["company_goods"].value_counts())