-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDhsrFacilityListing.py
129 lines (98 loc) · 4.13 KB
/
DhsrFacilityListing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
from requests import get
from csv import DictReader
import re, cStringIO
# The base URI of the NC DHSR website
DHSR_SITE = 'https://www2.ncdhhs.gov/dhsr'
# The URI for the facility listing header file on the DHSR website
DHSR_HAHEADER_FILE_URL = DHSR_SITE + '/data/icfmr-header.txt'
# The URI for the facility listing records on the DHSR website
DHSR_DATA_FILE_URL = DHSR_SITE + '/data/icfmr.txt'
class DhsrFacilityListing:
def __init__(self):
# A list of all the record dictionaries
self._facilities = list()
self._counties = list()
self._licensees = list()
# Retrieve and store the facility listings from the DHSR
# website when we create a new listing object
self._retrieveRecords()
self._getListOfCountiesWithFacilities()
self._getListOfLicensees()
# A method to retrieve and return contents from the web for both
# the header file and the facility listing
def _retrieve(self, url):
# Use HTTP GET to retrieve the text contents of the file based
# on the URI
file = get(url).text
# Create a new StringIO object so we can stream the file
# contents into a variable
# Necessary to remove Unicode encoding per Python 2.7
# shortcomings
output = cStringIO.StringIO()
# Stream the contents of the file into the StringIO object
output.write(file)
# Return the value of the StringIO object and save it in the
# content variable
content = output.getvalue()
# Close the StringIO object
output.close
# Return the processed content from the file retrieved from
# the web
return content
# A method to retrieve the header file from the DHSR website
# and return a the field names as a list
def _retrieveFieldNames(self, url):
# Retrieve and store the contents of the facilities listing
# header file from the DHSR website
content = self._retrieve(url)
# Process the file to find the line containing the field names
lines = content.split('\r\n')
pattern = re.compile("^(\w+,)+\w+$")
for line in lines:
if pattern.match(line):
fieldNames = line.split(',')
# Return a list of field names to the caller
return fieldNames
def _retrieveRecords(self):
# Retrieve a list of field names from the DHSR website
fieldNames = self._retrieveFieldNames(DHSR_HAHEADER_FILE_URL)
# Retrieve the facility records from the DHSR website
records = self._retrieve(DHSR_DATA_FILE_URL)
facilityRecords = records.split('\r\n')
# Use the csv DictReader to create dictionaries for each row
# in the csv row inserting the field names as keys in the
# dictionaries
reader = DictReader(facilityRecords, fieldnames=fieldNames)
# append each facility dictionary to the list of facilities
for row in reader:
self._facilities.append(row)
def _getListOfCountiesWithFacilities(self):
counties = list()
for facility in self._facilities:
if facility['FCOUNTY'] not in counties:
counties.append(facility['FCOUNTY'])
counties.sort()
self._counties = counties
def _getListOfLicensees(self):
licensees = list()
for facility in self._facilities:
if facility['LICENSEE'] not in licensees:
licensees.append(facility['LICENSEE'])
licensees.sort()
self._licensees = licensees
def _getRecordsByKey(self, key, value):
records = list()
for facility in self._facilities:
if facility[key] == value:
records.append(facility)
return records.sort()
def getRecords(self):
return self._facilities
def getCounties(self):
return self._counties
def getLicensees(self):
return self._licensees
def getRecordsForCounty(self, county):
return self._getRecordsByKey("FCOUNTY", county)
def getRecordsForLicensee(self, licensee):
return self._getRecordsByKey("LICENSEE", licensee)