Skip to content

Commit

Permalink
added census geocoder example
Browse files Browse the repository at this point in the history
  • Loading branch information
gboeing committed Jan 24, 2017
1 parent 6bce1e7 commit fde20a2
Show file tree
Hide file tree
Showing 2 changed files with 361 additions and 0 deletions.
349 changes: 349 additions & 0 deletions 18-Geocoding-and-Geolocation/census.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,349 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Reverse-geocode lat-long to census geography\n",
"\n",
"Using https://geocoding.geo.census.gov/"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import time, pandas as pd, requests"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Define helper functions"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def census_geocode(lon, lat, benchmark='Public_AR_Current', vintage='Current_Current',\n",
" output_format='json', layers='Census Tracts'):\n",
" \n",
" url = ('https://geocoding.geo.census.gov/geocoder/geographies/coordinates?'\n",
" 'benchmark={benchmark}&vintage={vintage}&x={lon}&y={lat}&format={output_format}&layers={layers}')\n",
" \n",
" prepared_url = url.format(benchmark=benchmark, vintage=vintage, lon=lon, lat=lat, \n",
" output_format=output_format, layers=layers)\n",
"\n",
" response = requests.get(prepared_url)\n",
" return response"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def geocode_row(row):\n",
"\n",
" response = census_geocode(lon=row['longitude'], lat=row['latitude'])\n",
" result = response.json()['result']\n",
" tract = result['geographies']['Census Tracts'][0]\n",
" \n",
" return pd.Series(tract)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Load the data and reverse geocode it"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# load the dataset of rental listings\n",
"listings = pd.read_csv('data/listings.csv')"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# reverse geocode each listing's lat-long to tract\n",
"tracts = listings.apply(lambda row: geocode_row(row), axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>GEOID</th>\n",
" <th>AREALAND</th>\n",
" <th>AREAWATER</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>06083001304</td>\n",
" <td>5090828</td>\n",
" <td>144119</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>51153900701</td>\n",
" <td>2702493</td>\n",
" <td>43468</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>24031705000</td>\n",
" <td>4552733</td>\n",
" <td>14831</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>06073016301</td>\n",
" <td>1068848</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>22063040806</td>\n",
" <td>69992596</td>\n",
" <td>1310597</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" GEOID AREALAND AREAWATER\n",
"0 06083001304 5090828 144119\n",
"1 51153900701 2702493 43468\n",
"2 24031705000 4552733 14831\n",
"3 06073016301 1068848 0\n",
"4 22063040806 69992596 1310597"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# look at the tracts we got back\n",
"tracts[['GEOID', 'AREALAND', 'AREAWATER']].head()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>pid</th>\n",
" <th>date</th>\n",
" <th>region</th>\n",
" <th>neighborhood</th>\n",
" <th>rent</th>\n",
" <th>bedrooms</th>\n",
" <th>sqft</th>\n",
" <th>rent_sqft</th>\n",
" <th>rent_sqft_cat</th>\n",
" <th>longitude</th>\n",
" <th>latitude</th>\n",
" <th>GEOID</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>4454264047</td>\n",
" <td>2014-05-11</td>\n",
" <td>santabarbara</td>\n",
" <td>NaN</td>\n",
" <td>3500.0</td>\n",
" <td>3.0</td>\n",
" <td>1200.0</td>\n",
" <td>2.916667</td>\n",
" <td>5</td>\n",
" <td>-119.726987</td>\n",
" <td>34.399757</td>\n",
" <td>06083001304</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>4468128892</td>\n",
" <td>2014-05-13</td>\n",
" <td>washingtondc</td>\n",
" <td>14300 Jeffries Rd Ste 1207</td>\n",
" <td>1099.0</td>\n",
" <td>2.0</td>\n",
" <td>775.0</td>\n",
" <td>1.418065</td>\n",
" <td>4</td>\n",
" <td>-77.268300</td>\n",
" <td>38.635600</td>\n",
" <td>51153900701</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>4474037012</td>\n",
" <td>2014-05-16</td>\n",
" <td>washingtondc</td>\n",
" <td>Bethesda</td>\n",
" <td>2743.0</td>\n",
" <td>1.0</td>\n",
" <td>714.0</td>\n",
" <td>3.841737</td>\n",
" <td>5</td>\n",
" <td>-77.102200</td>\n",
" <td>39.000300</td>\n",
" <td>24031705000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4482003715</td>\n",
" <td>2014-05-21</td>\n",
" <td>sandiego</td>\n",
" <td>escondido</td>\n",
" <td>995.0</td>\n",
" <td>1.0</td>\n",
" <td>614.0</td>\n",
" <td>1.620521</td>\n",
" <td>4</td>\n",
" <td>-116.950989</td>\n",
" <td>32.807693</td>\n",
" <td>06073016301</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>4488888498</td>\n",
" <td>2014-05-26</td>\n",
" <td>batonrouge</td>\n",
" <td>Denham Springs</td>\n",
" <td>1000.0</td>\n",
" <td>3.0</td>\n",
" <td>1100.0</td>\n",
" <td>0.909091</td>\n",
" <td>2</td>\n",
" <td>-90.942121</td>\n",
" <td>30.438018</td>\n",
" <td>22063040806</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" pid date region neighborhood rent \\\n",
"0 4454264047 2014-05-11 santabarbara NaN 3500.0 \n",
"1 4468128892 2014-05-13 washingtondc 14300 Jeffries Rd Ste 1207 1099.0 \n",
"2 4474037012 2014-05-16 washingtondc Bethesda 2743.0 \n",
"3 4482003715 2014-05-21 sandiego escondido 995.0 \n",
"4 4488888498 2014-05-26 batonrouge Denham Springs 1000.0 \n",
"\n",
" bedrooms sqft rent_sqft rent_sqft_cat longitude latitude \\\n",
"0 3.0 1200.0 2.916667 5 -119.726987 34.399757 \n",
"1 2.0 775.0 1.418065 4 -77.268300 38.635600 \n",
"2 1.0 714.0 3.841737 5 -77.102200 39.000300 \n",
"3 1.0 614.0 1.620521 4 -116.950989 32.807693 \n",
"4 3.0 1100.0 0.909091 2 -90.942121 30.438018 \n",
"\n",
" GEOID \n",
"0 06083001304 \n",
"1 51153900701 \n",
"2 24031705000 \n",
"3 06073016301 \n",
"4 22063040806 "
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# merge the listings with the tracts' geoids\n",
"geoids = pd.DataFrame(tracts['GEOID'])\n",
"listings_geoids = pd.concat([listings, geoids], axis=1)\n",
"listings_geoids.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python [conda root]",
"language": "python",
"name": "conda-root-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
12 changes: 12 additions & 0 deletions 18-Geocoding-and-Geolocation/data/listings.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
pid,date,region,neighborhood,rent,bedrooms,sqft,rent_sqft,rent_sqft_cat,longitude,latitude
4454264047,2014-05-11,santabarbara,,3500.0,3.0,1200.0,2.91666666667,5,-119.72698700000001,34.399757
4468128892,2014-05-13,washingtondc,14300 Jeffries Rd Ste 1207,1099.0,2.0,775.0,1.41806451613,4,-77.2683,38.6356
4474037012,2014-05-16,washingtondc,Bethesda,2743.0,1.0,714.0,3.8417366946800007,5,-77.1022,39.0003
4482003715,2014-05-21,sandiego,escondido,995.0,1.0,614.0,1.62052117264,4,-116.95098899999999,32.807693
4488888498,2014-05-26,batonrouge,Denham Springs,1000.0,3.0,1100.0,0.909090909091,2,-90.942121,30.438018
4496333550,2014-05-30,phoenix,Tempe,875.0,2.0,900.0,0.972222222222,2,-111.899847,33.407446
4511903470,2014-06-09,kansascity,Westside / Crossroads,980.0,2.0,980.0,1.0,3,-94.589923,39.092269
4524061401,2014-06-16,rochester,106 Edgerton,1125.0,2.0,1100.0,1.0227272727299999,3,-77.581158,43.146495
4535208051,2014-06-23,seattle,bellevue,2001.0,2.0,1010.0,1.9811881188099998,5,-122.1663,47.615
4548031631,2014-07-01,sfbay,daly city,3500.0,3.0,1190.0,2.9411764705900003,5,-122.380149,37.72937
4573697663,2014-07-17,saltlakecity,14901 Steep Mountain Drive,1650.0,4.0,3154.0,0.523145212429,1,-111.875961,40.483255

0 comments on commit fde20a2

Please sign in to comment.