From 9ebe3b02373ee34c0f1eac961e01faa72a74b9e1 Mon Sep 17 00:00:00 2001 From: richik-ray <45549622+richik-ray@users.noreply.github.com> Date: Sun, 3 Nov 2019 10:52:52 -0500 Subject: [PATCH] final dataframe --- final stacking.ipynb | 490 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 490 insertions(+) create mode 100644 final stacking.ipynb diff --git a/final stacking.ipynb b/final stacking.ipynb new file mode 100644 index 0000000..7198bf5 --- /dev/null +++ b/final stacking.ipynb @@ -0,0 +1,490 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import pandas as pd\n", + "pd.set_option('display.max_columns', None)\n", + "pd.set_option('display.max_colwidth', -1)" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ValueCounty, State
0184500.0Valdez Cordova, AK
1397000.0Sitka, AK
2276450.0Matanuska Susitna, AK
3289000.0Ketchikan Gateway, AK
4318000.0Anchorage, AK
5337000.0Kodiak Island, AK
6384900.0Juneau, AK
7229900.0Fairbanks North Star, AK
8259000.0Kenai Peninsula, AK
9159000.0Clarke, AL
10188700.0Bibb, AL
11173000.0Coffee, AL
12147000.0Covington, AL
13170000.0De Kalb, AL
14134900.0Escambia, AL
15117684.0Monroe, AL
16310000.0Baldwin, AL
17169500.0Barbour, AL
18152950.0Walker, AL
19149900.0Hale, AL
20299949.5Tallapoosa, AL
21299900.0Winston, AL
22183999.0Montgomery, AL
23170450.0Chilton, AL
24185000.0Blount, AL
25137500.0Crenshaw, AL
26139900.0Russell, AL
2799000.0Dallas, AL
28175000.0Jackson, AL
29219000.0Cherokee, AL
.........
2259139900.0Kanawha, WV
2260133950.0Pendleton, WV
226194500.0Hancock, WV
2262151950.0Hardy, WV
2263160500.0Preston, WV
2264199000.0Hampshire, WV
2265135500.0Nicholas, WV
2266298500.0Jefferson, WV
2267159000.0Upshur, WV
2268199900.0Uinta, WY
2269260000.0Sublette, WY
2270269900.0Crook, WY
2271175900.0Carbon, WY
2272360000.0Lincoln, WY
2273225450.0Fremont, WY
2274182450.0Weston, WY
2275154900.0Washakie, WY
2276161750.0Goshen, WY
2277216900.0Natrona, WY
2278248000.0Johnson, WY
2279235000.0Sweetwater, WY
2280350000.0Park, WY
22811662500.0Teton, WY
2282229925.0Converse, WY
2283265000.0Albany, WY
2284229000.0Campbell, WY
2285190000.0Platte, WY
2286161250.0Hot Springs, WY
2287275000.0Sheridan, WY
2288296450.0Laramie, WY
\n", + "

2289 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " Value County, State\n", + "0 184500.0 Valdez Cordova, AK \n", + "1 397000.0 Sitka, AK \n", + "2 276450.0 Matanuska Susitna, AK \n", + "3 289000.0 Ketchikan Gateway, AK \n", + "4 318000.0 Anchorage, AK \n", + "5 337000.0 Kodiak Island, AK \n", + "6 384900.0 Juneau, AK \n", + "7 229900.0 Fairbanks North Star, AK\n", + "8 259000.0 Kenai Peninsula, AK \n", + "9 159000.0 Clarke, AL \n", + "10 188700.0 Bibb, AL \n", + "11 173000.0 Coffee, AL \n", + "12 147000.0 Covington, AL \n", + "13 170000.0 De Kalb, AL \n", + "14 134900.0 Escambia, AL \n", + "15 117684.0 Monroe, AL \n", + "16 310000.0 Baldwin, AL \n", + "17 169500.0 Barbour, AL \n", + "18 152950.0 Walker, AL \n", + "19 149900.0 Hale, AL \n", + "20 299949.5 Tallapoosa, AL \n", + "21 299900.0 Winston, AL \n", + "22 183999.0 Montgomery, AL \n", + "23 170450.0 Chilton, AL \n", + "24 185000.0 Blount, AL \n", + "25 137500.0 Crenshaw, AL \n", + "26 139900.0 Russell, AL \n", + "27 99000.0 Dallas, AL \n", + "28 175000.0 Jackson, AL \n", + "29 219000.0 Cherokee, AL \n", + "... ... ... \n", + "2259 139900.0 Kanawha, WV \n", + "2260 133950.0 Pendleton, WV \n", + "2261 94500.0 Hancock, WV \n", + "2262 151950.0 Hardy, WV \n", + "2263 160500.0 Preston, WV \n", + "2264 199000.0 Hampshire, WV \n", + "2265 135500.0 Nicholas, WV \n", + "2266 298500.0 Jefferson, WV \n", + "2267 159000.0 Upshur, WV \n", + "2268 199900.0 Uinta, WY \n", + "2269 260000.0 Sublette, WY \n", + "2270 269900.0 Crook, WY \n", + "2271 175900.0 Carbon, WY \n", + "2272 360000.0 Lincoln, WY \n", + "2273 225450.0 Fremont, WY \n", + "2274 182450.0 Weston, WY \n", + "2275 154900.0 Washakie, WY \n", + "2276 161750.0 Goshen, WY \n", + "2277 216900.0 Natrona, WY \n", + "2278 248000.0 Johnson, WY \n", + "2279 235000.0 Sweetwater, WY \n", + "2280 350000.0 Park, WY \n", + "2281 1662500.0 Teton, WY \n", + "2282 229925.0 Converse, WY \n", + "2283 265000.0 Albany, WY \n", + "2284 229000.0 Campbell, WY \n", + "2285 190000.0 Platte, WY \n", + "2286 161250.0 Hot Springs, WY \n", + "2287 275000.0 Sheridan, WY \n", + "2288 296450.0 Laramie, WY \n", + "\n", + "[2289 rows x 2 columns]" + ] + }, + "execution_count": 107, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "os.chdir(r\"C:\\Users\\rayri\\Desktop\\Python\\HackDuke\")\n", + "df = pd.read_csv(\"Housing Parsing.csv\")\n", + "#df[\"COUNTY\"] = df[\"COUNTY\"].str.replace(\"County\", \"\")\n", + "#df[\"COUNTY\"] = df[\"COUNTY\"].str.replace(\"Parish\", \"\")\n", + "#df[\"COUNTY\"] = df[\"COUNTY\"].str.replace(\"Borough\", \"\")\n", + "#df = df.sort_values(\"State\")\n", + "#df[\"State\"] = df[\"county_name\"].str[-2:]\n", + "#df[\"1901-2000\"] = df[\"1901-2000\"].str[:-2]\n", + "#df[\"COUNTY\"] = df[\"COUNTY\"].str[:-4]\n", + "#df[\"Value\"] = df[\"Value\"].str[:-2]\n", + "df = df.rename(columns = {\"RegionName\": \"County\"})\n", + "df = df.rename(columns = {\"2019-09\": \"Value\"})\n", + "#df = df.rename(columns = {\"perc\": \"Value\"})\n", + "#df[\"County\"] =df[\"County\"].str.title()\n", + "#df = df.drop(columns = [\"cohort\", \"zsc\"])\n", + "df['County'] = df['County'].str.strip()\n", + "df.drop(df.columns.difference(['County','State', 'Value']), 1, inplace=True)\n", + "df[\"County, State\"] = df[\"County\"] + \", \" + df[\"State\"]\n", + "df = df.drop(columns = [\"County\", \"State\"])\n", + "#df = df.drop(df.index[0])\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": {}, + "outputs": [], + "source": [ + "df.to_csv(\"Housing Parsing2.csv\", index = False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}