diff --git a/18-Geocoding-and-Geolocation/census.ipynb b/18-Geocoding-and-Geolocation/census.ipynb new file mode 100644 index 0000000..759dd94 --- /dev/null +++ b/18-Geocoding-and-Geolocation/census.ipynb @@ -0,0 +1,349 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Reverse-geocode lat-long to census geography\n", + "\n", + "Using https://geocoding.geo.census.gov/" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "import time, pandas as pd, requests" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Define helper functions" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def census_geocode(lon, lat, benchmark='Public_AR_Current', vintage='Current_Current',\n", + " output_format='json', layers='Census Tracts'):\n", + " \n", + " url = ('https://geocoding.geo.census.gov/geocoder/geographies/coordinates?'\n", + " 'benchmark={benchmark}&vintage={vintage}&x={lon}&y={lat}&format={output_format}&layers={layers}')\n", + " \n", + " prepared_url = url.format(benchmark=benchmark, vintage=vintage, lon=lon, lat=lat, \n", + " output_format=output_format, layers=layers)\n", + "\n", + " response = requests.get(prepared_url)\n", + " return response" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def geocode_row(row):\n", + "\n", + " response = census_geocode(lon=row['longitude'], lat=row['latitude'])\n", + " result = response.json()['result']\n", + " tract = result['geographies']['Census Tracts'][0]\n", + " \n", + " return pd.Series(tract)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Load the data and reverse geocode it" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# load the dataset of rental listings\n", + "listings = pd.read_csv('data/listings.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# reverse geocode each listing's lat-long to tract\n", + "tracts = listings.apply(lambda row: geocode_row(row), axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
GEOIDAREALANDAREAWATER
0060830013045090828144119
151153900701270249343468
224031705000455273314831
30607301630110688480
422063040806699925961310597
\n", + "
" + ], + "text/plain": [ + " GEOID AREALAND AREAWATER\n", + "0 06083001304 5090828 144119\n", + "1 51153900701 2702493 43468\n", + "2 24031705000 4552733 14831\n", + "3 06073016301 1068848 0\n", + "4 22063040806 69992596 1310597" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# look at the tracts we got back\n", + "tracts[['GEOID', 'AREALAND', 'AREAWATER']].head()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
piddateregionneighborhoodrentbedroomssqftrent_sqftrent_sqft_catlongitudelatitudeGEOID
044542640472014-05-11santabarbaraNaN3500.03.01200.02.9166675-119.72698734.39975706083001304
144681288922014-05-13washingtondc14300 Jeffries Rd Ste 12071099.02.0775.01.4180654-77.26830038.63560051153900701
244740370122014-05-16washingtondcBethesda2743.01.0714.03.8417375-77.10220039.00030024031705000
344820037152014-05-21sandiegoescondido995.01.0614.01.6205214-116.95098932.80769306073016301
444888884982014-05-26batonrougeDenham Springs1000.03.01100.00.9090912-90.94212130.43801822063040806
\n", + "
" + ], + "text/plain": [ + " pid date region neighborhood rent \\\n", + "0 4454264047 2014-05-11 santabarbara NaN 3500.0 \n", + "1 4468128892 2014-05-13 washingtondc 14300 Jeffries Rd Ste 1207 1099.0 \n", + "2 4474037012 2014-05-16 washingtondc Bethesda 2743.0 \n", + "3 4482003715 2014-05-21 sandiego escondido 995.0 \n", + "4 4488888498 2014-05-26 batonrouge Denham Springs 1000.0 \n", + "\n", + " bedrooms sqft rent_sqft rent_sqft_cat longitude latitude \\\n", + "0 3.0 1200.0 2.916667 5 -119.726987 34.399757 \n", + "1 2.0 775.0 1.418065 4 -77.268300 38.635600 \n", + "2 1.0 714.0 3.841737 5 -77.102200 39.000300 \n", + "3 1.0 614.0 1.620521 4 -116.950989 32.807693 \n", + "4 3.0 1100.0 0.909091 2 -90.942121 30.438018 \n", + "\n", + " GEOID \n", + "0 06083001304 \n", + "1 51153900701 \n", + "2 24031705000 \n", + "3 06073016301 \n", + "4 22063040806 " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# merge the listings with the tracts' geoids\n", + "geoids = pd.DataFrame(tracts['GEOID'])\n", + "listings_geoids = pd.concat([listings, geoids], axis=1)\n", + "listings_geoids.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "anaconda-cloud": {}, + "kernelspec": { + "display_name": "Python [conda root]", + "language": "python", + "name": "conda-root-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/18-Geocoding-and-Geolocation/data/listings.csv b/18-Geocoding-and-Geolocation/data/listings.csv new file mode 100644 index 0000000..4c1ab62 --- /dev/null +++ b/18-Geocoding-and-Geolocation/data/listings.csv @@ -0,0 +1,12 @@ +pid,date,region,neighborhood,rent,bedrooms,sqft,rent_sqft,rent_sqft_cat,longitude,latitude +4454264047,2014-05-11,santabarbara,,3500.0,3.0,1200.0,2.91666666667,5,-119.72698700000001,34.399757 +4468128892,2014-05-13,washingtondc,14300 Jeffries Rd Ste 1207,1099.0,2.0,775.0,1.41806451613,4,-77.2683,38.6356 +4474037012,2014-05-16,washingtondc,Bethesda,2743.0,1.0,714.0,3.8417366946800007,5,-77.1022,39.0003 +4482003715,2014-05-21,sandiego,escondido,995.0,1.0,614.0,1.62052117264,4,-116.95098899999999,32.807693 +4488888498,2014-05-26,batonrouge,Denham Springs,1000.0,3.0,1100.0,0.909090909091,2,-90.942121,30.438018 +4496333550,2014-05-30,phoenix,Tempe,875.0,2.0,900.0,0.972222222222,2,-111.899847,33.407446 +4511903470,2014-06-09,kansascity,Westside / Crossroads,980.0,2.0,980.0,1.0,3,-94.589923,39.092269 +4524061401,2014-06-16,rochester,106 Edgerton,1125.0,2.0,1100.0,1.0227272727299999,3,-77.581158,43.146495 +4535208051,2014-06-23,seattle,bellevue,2001.0,2.0,1010.0,1.9811881188099998,5,-122.1663,47.615 +4548031631,2014-07-01,sfbay,daly city,3500.0,3.0,1190.0,2.9411764705900003,5,-122.380149,37.72937 +4573697663,2014-07-17,saltlakecity,14901 Steep Mountain Drive,1650.0,4.0,3154.0,0.523145212429,1,-111.875961,40.483255