diff --git a/src/explore.ipynb b/src/explore.ipynb index a323483c5..60812281a 100644 --- a/src/explore.ipynb +++ b/src/explore.ipynb @@ -13,7 +13,673 @@ "metadata": {}, "outputs": [], "source": [ - "# Your code here" + "import pandas as pd\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.cluster import KMeans\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "from sklearn.tree import DecisionTreeClassifier\n", + "from sklearn import tree\n", + "from sklearn.metrics import accuracy_score" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " | MedInc | \n", + "HouseAge | \n", + "AveRooms | \n", + "AveBedrms | \n", + "Population | \n", + "AveOccup | \n", + "Latitude | \n", + "Longitude | \n", + "MedHouseVal | \n", + "
---|---|---|---|---|---|---|---|---|---|
0 | \n", + "8.3252 | \n", + "41.0 | \n", + "6.984127 | \n", + "1.023810 | \n", + "322.0 | \n", + "2.555556 | \n", + "37.88 | \n", + "-122.23 | \n", + "4.526 | \n", + "
1 | \n", + "8.3014 | \n", + "21.0 | \n", + "6.238137 | \n", + "0.971880 | \n", + "2401.0 | \n", + "2.109842 | \n", + "37.86 | \n", + "-122.22 | \n", + "3.585 | \n", + "
2 | \n", + "7.2574 | \n", + "52.0 | \n", + "8.288136 | \n", + "1.073446 | \n", + "496.0 | \n", + "2.802260 | \n", + "37.85 | \n", + "-122.24 | \n", + "3.521 | \n", + "
3 | \n", + "5.6431 | \n", + "52.0 | \n", + "5.817352 | \n", + "1.073059 | \n", + "558.0 | \n", + "2.547945 | \n", + "37.85 | \n", + "-122.25 | \n", + "3.413 | \n", + "
4 | \n", + "3.8462 | \n", + "52.0 | \n", + "6.281853 | \n", + "1.081081 | \n", + "565.0 | \n", + "2.181467 | \n", + "37.85 | \n", + "-122.25 | \n", + "3.422 | \n", + "
\n", + " | MedInc | \n", + "Latitude | \n", + "Longitude | \n", + "
---|---|---|---|
0 | \n", + "8.3252 | \n", + "37.88 | \n", + "-122.23 | \n", + "
1 | \n", + "8.3014 | \n", + "37.86 | \n", + "-122.22 | \n", + "
2 | \n", + "7.2574 | \n", + "37.85 | \n", + "-122.24 | \n", + "
3 | \n", + "5.6431 | \n", + "37.85 | \n", + "-122.25 | \n", + "
4 | \n", + "3.8462 | \n", + "37.85 | \n", + "-122.25 | \n", + "
\n", + " | MedInc | \n", + "Latitude | \n", + "Longitude | \n", + "
---|---|---|---|
14196 | \n", + "3.2596 | \n", + "32.71 | \n", + "-117.03 | \n", + "
8267 | \n", + "3.8125 | \n", + "33.77 | \n", + "-118.16 | \n", + "
17445 | \n", + "4.1563 | \n", + "34.66 | \n", + "-120.48 | \n", + "
14265 | \n", + "1.9425 | \n", + "32.69 | \n", + "-117.11 | \n", + "
2271 | \n", + "3.5542 | \n", + "36.78 | \n", + "-119.80 | \n", + "
KMeans(n_clusters=6, n_init='auto', random_state=42)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
KMeans(n_clusters=6, n_init='auto', random_state=42)
\n", + " | MedInc | \n", + "Latitude | \n", + "Longitude | \n", + "cluster | \n", + "
---|---|---|---|---|
14196 | \n", + "3.2596 | \n", + "32.71 | \n", + "-117.03 | \n", + "3 | \n", + "
8267 | \n", + "3.8125 | \n", + "33.77 | \n", + "-118.16 | \n", + "1 | \n", + "
17445 | \n", + "4.1563 | \n", + "34.66 | \n", + "-120.48 | \n", + "1 | \n", + "
14265 | \n", + "1.9425 | \n", + "32.69 | \n", + "-117.11 | \n", + "3 | \n", + "
2271 | \n", + "3.5542 | \n", + "36.78 | \n", + "-119.80 | \n", + "5 | \n", + "
\n", + " | MedInc | \n", + "Latitude | \n", + "Longitude | \n", + "cluster | \n", + "
---|---|---|---|---|
20046 | \n", + "1.6812 | \n", + "36.06 | \n", + "-119.01 | \n", + "3 | \n", + "
3024 | \n", + "2.5313 | \n", + "35.14 | \n", + "-119.46 | \n", + "3 | \n", + "
15663 | \n", + "3.4801 | \n", + "37.80 | \n", + "-122.44 | \n", + "5 | \n", + "
20484 | \n", + "5.7376 | \n", + "34.28 | \n", + "-118.72 | \n", + "2 | \n", + "
9814 | \n", + "3.7250 | \n", + "36.62 | \n", + "-121.93 | \n", + "5 | \n", + "