-
Notifications
You must be signed in to change notification settings - Fork 3
/
fda_faers_dl_extract.py
71 lines (58 loc) · 2.85 KB
/
fda_faers_dl_extract.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import zipfile
import urllib.request
import glob, os, shutil
## Define Year and Quarter ranges
years = list(range(2013, 2018))
quarters = list(range(1, 5))
# pre 2012 q4 should be "aers" not "faers"
base_url = "https://fis.fda.gov/content/Exports/faers_ascii_"
test_add = base_url + str(years[0]) + "q" + str(quarters[0]) + ".zip"
download_urls = []
## Generate list of URLS for download
for i in range(0, len(years)):
for j in range(0, len(quarters)):
temp = base_url + str(years[i]) + "q" + str(quarters[j]) + ".zip"
download_urls.append(temp)
##
base_path = "E:/SQL/FDA_FAERS/Python_DL/fda_faers_dl_extract/TEST/"
split_val = "https://fis.fda.gov/content/Exports/"
## Download each file, extract to txt files to DATA FOLDER, delete temporary files
for k in range(0, len(download_urls)):
end_path = download_urls[k].split(split_val, 1)[1]
filename = base_path + end_path
urllib.request.urlretrieve(download_urls[k], filename)
os.makedirs("E:/SQL/FDA_FAERS/Python_DL/fda_faers_dl_extract/TEST/TEMP")
with zipfile.ZipFile(filename) as zip:
zip.extractall("E:/SQL/FDA_FAERS/Python_DL/fda_faers_dl_extract/TEST/TEMP")
files = glob.iglob(os.path.join("E:/SQL/FDA_FAERS/Python_DL/fda_faers_dl_extract/TEST/TEMP/ascii", "*.txt"))
for file in files:
if os.path.isfile(file):
shutil.copy2(file, "E:/SQL/FDA_FAERS/Python_DL/fda_faers_dl_extract/TEST/DATA")
shutil.rmtree("E:/SQL/FDA_FAERS/Python_DL/fda_faers_dl_extract/TEST/TEMP/")
## Repeat above process for aers data
years = list(range(2004, 2013))
quarters = list(range(1, 5))
# pre 2012 q4 should ve "aers" not "faers"
base_url = "https://fis.fda.gov/content/Exports/aers_ascii_"
test_add = base_url + str(years[0]) + "q" + str(quarters[0]) + ".zip"
# print(test_add)
download_urls = []
for i in range(0, len(years)):
for j in range(0, len(quarters)):
temp = base_url + str(years[i]) + "q" + str(quarters[j]) + ".zip"
download_urls.append(temp)
# print(download_urls)
base_path = "E:/SQL/FDA_FAERS/Python_DL/fda_faers_dl_extract/TEST/"
split_val = "https://fis.fda.gov/content/Exports/"
for k in range(0, len(download_urls)):
end_path = download_urls[k].split(split_val, 1)[1]
filename = base_path + end_path
urllib.request.urlretrieve(download_urls[k], filename)
os.makedirs("E:/SQL/FDA_FAERS/Python_DL/fda_faers_dl_extract/TEST/TEMP")
with zipfile.ZipFile(filename) as zip:
zip.extractall("E:/SQL/FDA_FAERS/Python_DL/fda_faers_dl_extract/TEST/TEMP")
files = glob.iglob(os.path.join("E:/SQL/FDA_FAERS/Python_DL/fda_faers_dl_extract/TEST/TEMP/ascii", "*.txt"))
for file in files:
if os.path.isfile(file):
shutil.copy2(file, "E:/SQL/FDA_FAERS/Python_DL/fda_faers_dl_extract/TEST/DATA")
shutil.rmtree("E:/SQL/FDA_FAERS/Python_DL/fda_faers_dl_extract/TEST/TEMP/")