forked from gboeing/ppde642
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
2 changed files
with
361 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,349 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Reverse-geocode lat-long to census geography\n", | ||
"\n", | ||
"Using https://geocoding.geo.census.gov/" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"import time, pandas as pd, requests" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"### Define helper functions" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"def census_geocode(lon, lat, benchmark='Public_AR_Current', vintage='Current_Current',\n", | ||
" output_format='json', layers='Census Tracts'):\n", | ||
" \n", | ||
" url = ('https://geocoding.geo.census.gov/geocoder/geographies/coordinates?'\n", | ||
" 'benchmark={benchmark}&vintage={vintage}&x={lon}&y={lat}&format={output_format}&layers={layers}')\n", | ||
" \n", | ||
" prepared_url = url.format(benchmark=benchmark, vintage=vintage, lon=lon, lat=lat, \n", | ||
" output_format=output_format, layers=layers)\n", | ||
"\n", | ||
" response = requests.get(prepared_url)\n", | ||
" return response" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"def geocode_row(row):\n", | ||
"\n", | ||
" response = census_geocode(lon=row['longitude'], lat=row['latitude'])\n", | ||
" result = response.json()['result']\n", | ||
" tract = result['geographies']['Census Tracts'][0]\n", | ||
" \n", | ||
" return pd.Series(tract)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"### Load the data and reverse geocode it" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"# load the dataset of rental listings\n", | ||
"listings = pd.read_csv('data/listings.csv')" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 5, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"# reverse geocode each listing's lat-long to tract\n", | ||
"tracts = listings.apply(lambda row: geocode_row(row), axis=1)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 6, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/html": [ | ||
"<div>\n", | ||
"<table border=\"1\" class=\"dataframe\">\n", | ||
" <thead>\n", | ||
" <tr style=\"text-align: right;\">\n", | ||
" <th></th>\n", | ||
" <th>GEOID</th>\n", | ||
" <th>AREALAND</th>\n", | ||
" <th>AREAWATER</th>\n", | ||
" </tr>\n", | ||
" </thead>\n", | ||
" <tbody>\n", | ||
" <tr>\n", | ||
" <th>0</th>\n", | ||
" <td>06083001304</td>\n", | ||
" <td>5090828</td>\n", | ||
" <td>144119</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>1</th>\n", | ||
" <td>51153900701</td>\n", | ||
" <td>2702493</td>\n", | ||
" <td>43468</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>2</th>\n", | ||
" <td>24031705000</td>\n", | ||
" <td>4552733</td>\n", | ||
" <td>14831</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>3</th>\n", | ||
" <td>06073016301</td>\n", | ||
" <td>1068848</td>\n", | ||
" <td>0</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>4</th>\n", | ||
" <td>22063040806</td>\n", | ||
" <td>69992596</td>\n", | ||
" <td>1310597</td>\n", | ||
" </tr>\n", | ||
" </tbody>\n", | ||
"</table>\n", | ||
"</div>" | ||
], | ||
"text/plain": [ | ||
" GEOID AREALAND AREAWATER\n", | ||
"0 06083001304 5090828 144119\n", | ||
"1 51153900701 2702493 43468\n", | ||
"2 24031705000 4552733 14831\n", | ||
"3 06073016301 1068848 0\n", | ||
"4 22063040806 69992596 1310597" | ||
] | ||
}, | ||
"execution_count": 6, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"# look at the tracts we got back\n", | ||
"tracts[['GEOID', 'AREALAND', 'AREAWATER']].head()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 7, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/html": [ | ||
"<div>\n", | ||
"<table border=\"1\" class=\"dataframe\">\n", | ||
" <thead>\n", | ||
" <tr style=\"text-align: right;\">\n", | ||
" <th></th>\n", | ||
" <th>pid</th>\n", | ||
" <th>date</th>\n", | ||
" <th>region</th>\n", | ||
" <th>neighborhood</th>\n", | ||
" <th>rent</th>\n", | ||
" <th>bedrooms</th>\n", | ||
" <th>sqft</th>\n", | ||
" <th>rent_sqft</th>\n", | ||
" <th>rent_sqft_cat</th>\n", | ||
" <th>longitude</th>\n", | ||
" <th>latitude</th>\n", | ||
" <th>GEOID</th>\n", | ||
" </tr>\n", | ||
" </thead>\n", | ||
" <tbody>\n", | ||
" <tr>\n", | ||
" <th>0</th>\n", | ||
" <td>4454264047</td>\n", | ||
" <td>2014-05-11</td>\n", | ||
" <td>santabarbara</td>\n", | ||
" <td>NaN</td>\n", | ||
" <td>3500.0</td>\n", | ||
" <td>3.0</td>\n", | ||
" <td>1200.0</td>\n", | ||
" <td>2.916667</td>\n", | ||
" <td>5</td>\n", | ||
" <td>-119.726987</td>\n", | ||
" <td>34.399757</td>\n", | ||
" <td>06083001304</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>1</th>\n", | ||
" <td>4468128892</td>\n", | ||
" <td>2014-05-13</td>\n", | ||
" <td>washingtondc</td>\n", | ||
" <td>14300 Jeffries Rd Ste 1207</td>\n", | ||
" <td>1099.0</td>\n", | ||
" <td>2.0</td>\n", | ||
" <td>775.0</td>\n", | ||
" <td>1.418065</td>\n", | ||
" <td>4</td>\n", | ||
" <td>-77.268300</td>\n", | ||
" <td>38.635600</td>\n", | ||
" <td>51153900701</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>2</th>\n", | ||
" <td>4474037012</td>\n", | ||
" <td>2014-05-16</td>\n", | ||
" <td>washingtondc</td>\n", | ||
" <td>Bethesda</td>\n", | ||
" <td>2743.0</td>\n", | ||
" <td>1.0</td>\n", | ||
" <td>714.0</td>\n", | ||
" <td>3.841737</td>\n", | ||
" <td>5</td>\n", | ||
" <td>-77.102200</td>\n", | ||
" <td>39.000300</td>\n", | ||
" <td>24031705000</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>3</th>\n", | ||
" <td>4482003715</td>\n", | ||
" <td>2014-05-21</td>\n", | ||
" <td>sandiego</td>\n", | ||
" <td>escondido</td>\n", | ||
" <td>995.0</td>\n", | ||
" <td>1.0</td>\n", | ||
" <td>614.0</td>\n", | ||
" <td>1.620521</td>\n", | ||
" <td>4</td>\n", | ||
" <td>-116.950989</td>\n", | ||
" <td>32.807693</td>\n", | ||
" <td>06073016301</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>4</th>\n", | ||
" <td>4488888498</td>\n", | ||
" <td>2014-05-26</td>\n", | ||
" <td>batonrouge</td>\n", | ||
" <td>Denham Springs</td>\n", | ||
" <td>1000.0</td>\n", | ||
" <td>3.0</td>\n", | ||
" <td>1100.0</td>\n", | ||
" <td>0.909091</td>\n", | ||
" <td>2</td>\n", | ||
" <td>-90.942121</td>\n", | ||
" <td>30.438018</td>\n", | ||
" <td>22063040806</td>\n", | ||
" </tr>\n", | ||
" </tbody>\n", | ||
"</table>\n", | ||
"</div>" | ||
], | ||
"text/plain": [ | ||
" pid date region neighborhood rent \\\n", | ||
"0 4454264047 2014-05-11 santabarbara NaN 3500.0 \n", | ||
"1 4468128892 2014-05-13 washingtondc 14300 Jeffries Rd Ste 1207 1099.0 \n", | ||
"2 4474037012 2014-05-16 washingtondc Bethesda 2743.0 \n", | ||
"3 4482003715 2014-05-21 sandiego escondido 995.0 \n", | ||
"4 4488888498 2014-05-26 batonrouge Denham Springs 1000.0 \n", | ||
"\n", | ||
" bedrooms sqft rent_sqft rent_sqft_cat longitude latitude \\\n", | ||
"0 3.0 1200.0 2.916667 5 -119.726987 34.399757 \n", | ||
"1 2.0 775.0 1.418065 4 -77.268300 38.635600 \n", | ||
"2 1.0 714.0 3.841737 5 -77.102200 39.000300 \n", | ||
"3 1.0 614.0 1.620521 4 -116.950989 32.807693 \n", | ||
"4 3.0 1100.0 0.909091 2 -90.942121 30.438018 \n", | ||
"\n", | ||
" GEOID \n", | ||
"0 06083001304 \n", | ||
"1 51153900701 \n", | ||
"2 24031705000 \n", | ||
"3 06073016301 \n", | ||
"4 22063040806 " | ||
] | ||
}, | ||
"execution_count": 7, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"# merge the listings with the tracts' geoids\n", | ||
"geoids = pd.DataFrame(tracts['GEOID'])\n", | ||
"listings_geoids = pd.concat([listings, geoids], axis=1)\n", | ||
"listings_geoids.head()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"anaconda-cloud": {}, | ||
"kernelspec": { | ||
"display_name": "Python [conda root]", | ||
"language": "python", | ||
"name": "conda-root-py" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.5.2" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
pid,date,region,neighborhood,rent,bedrooms,sqft,rent_sqft,rent_sqft_cat,longitude,latitude | ||
4454264047,2014-05-11,santabarbara,,3500.0,3.0,1200.0,2.91666666667,5,-119.72698700000001,34.399757 | ||
4468128892,2014-05-13,washingtondc,14300 Jeffries Rd Ste 1207,1099.0,2.0,775.0,1.41806451613,4,-77.2683,38.6356 | ||
4474037012,2014-05-16,washingtondc,Bethesda,2743.0,1.0,714.0,3.8417366946800007,5,-77.1022,39.0003 | ||
4482003715,2014-05-21,sandiego,escondido,995.0,1.0,614.0,1.62052117264,4,-116.95098899999999,32.807693 | ||
4488888498,2014-05-26,batonrouge,Denham Springs,1000.0,3.0,1100.0,0.909090909091,2,-90.942121,30.438018 | ||
4496333550,2014-05-30,phoenix,Tempe,875.0,2.0,900.0,0.972222222222,2,-111.899847,33.407446 | ||
4511903470,2014-06-09,kansascity,Westside / Crossroads,980.0,2.0,980.0,1.0,3,-94.589923,39.092269 | ||
4524061401,2014-06-16,rochester,106 Edgerton,1125.0,2.0,1100.0,1.0227272727299999,3,-77.581158,43.146495 | ||
4535208051,2014-06-23,seattle,bellevue,2001.0,2.0,1010.0,1.9811881188099998,5,-122.1663,47.615 | ||
4548031631,2014-07-01,sfbay,daly city,3500.0,3.0,1190.0,2.9411764705900003,5,-122.380149,37.72937 | ||
4573697663,2014-07-17,saltlakecity,14901 Steep Mountain Drive,1650.0,4.0,3154.0,0.523145212429,1,-111.875961,40.483255 |