diff --git a/Cereal_classifier.ipynb b/Cereal_classifier.ipynb
new file mode 100644
index 0000000..99ac766
--- /dev/null
+++ b/Cereal_classifier.ipynb
@@ -0,0 +1,1939 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 173,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "from sklearn import preprocessing\n",
+ "from sklearn.cluster import KMeans\n",
+ "from sklearn.cluster import MeanShift\n",
+ "import matplotlib.pyplot as plt"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "%matplotlib inline"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 81,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "cereals = pd.read_csv('cereals.csv')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 120,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " name | \n",
+ " mfr | \n",
+ " type | \n",
+ " calories | \n",
+ " protein | \n",
+ " fat | \n",
+ " sodium | \n",
+ " fiber | \n",
+ " carbo | \n",
+ " sugars | \n",
+ " potass | \n",
+ " vitamins | \n",
+ " shelf | \n",
+ " weight | \n",
+ " cups | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 72 | \n",
+ " Triples | \n",
+ " G | \n",
+ " C | \n",
+ " 110 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 250 | \n",
+ " 0 | \n",
+ " 21 | \n",
+ " 3 | \n",
+ " 60 | \n",
+ " 25 | \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 0.75 | \n",
+ "
\n",
+ " \n",
+ " 73 | \n",
+ " Trix | \n",
+ " G | \n",
+ " C | \n",
+ " 110 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 140 | \n",
+ " 0 | \n",
+ " 13 | \n",
+ " 12 | \n",
+ " 25 | \n",
+ " 25 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 1.00 | \n",
+ "
\n",
+ " \n",
+ " 74 | \n",
+ " Wheat_Chex | \n",
+ " R | \n",
+ " C | \n",
+ " 100 | \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 230 | \n",
+ " 3 | \n",
+ " 17 | \n",
+ " 3 | \n",
+ " 115 | \n",
+ " 25 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 0.67 | \n",
+ "
\n",
+ " \n",
+ " 75 | \n",
+ " Wheaties | \n",
+ " G | \n",
+ " C | \n",
+ " 100 | \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 200 | \n",
+ " 3 | \n",
+ " 17 | \n",
+ " 3 | \n",
+ " 110 | \n",
+ " 25 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1.00 | \n",
+ "
\n",
+ " \n",
+ " 76 | \n",
+ " Wheaties_Honey_Gold | \n",
+ " G | \n",
+ " C | \n",
+ " 110 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 200 | \n",
+ " 1 | \n",
+ " 16 | \n",
+ " 8 | \n",
+ " 60 | \n",
+ " 25 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 0.75 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " name mfr type calories protein fat sodium fiber \\\n",
+ "72 Triples G C 110 2 1 250 0 \n",
+ "73 Trix G C 110 1 1 140 0 \n",
+ "74 Wheat_Chex R C 100 3 1 230 3 \n",
+ "75 Wheaties G C 100 3 1 200 3 \n",
+ "76 Wheaties_Honey_Gold G C 110 2 1 200 1 \n",
+ "\n",
+ " carbo sugars potass vitamins shelf weight cups \n",
+ "72 21 3 60 25 3 1 0.75 \n",
+ "73 13 12 25 25 2 1 1.00 \n",
+ "74 17 3 115 25 1 1 0.67 \n",
+ "75 17 3 110 25 1 1 1.00 \n",
+ "76 16 8 60 25 1 1 0.75 "
+ ]
+ },
+ "execution_count": 120,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "cereals.tail()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 168,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "test_cereals = np.array([[170, 11, 1.5, 85, 9, 36, 8, 270, 26, 1],\n",
+ " [103, 1.5, .6, 38, 1.4, 24, 15, 49, 20, .75]])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 169,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "test_df = pd.DataFrame(test_cereals, columns=['calories', 'protein', 'fat', 'sodium', 'fiber', 'carbo', 'sugars', 'potass', 'vitamins', 'cups'])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "##Processing Data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 82,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "cereals.replace('-1', cereals.mean(), inplace=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 83,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "num_columns = cereals[['calories', 'protein', 'fat', 'sodium', 'fiber', 'carbo', 'sugars', 'potass', 'vitamins', 'shelf', 'weight', 'cups']]\n",
+ "norm_df = num_columns.apply(lambda row: row * (1/row['cups']), axis=1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 170,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "norm_test = test_df.apply(lambda row: row * 1/row['cups'], axis=1)\n",
+ "scaled_test = preprocessing.scale(norm_test)\n",
+ "test = pd.DataFrame(scaled_test, columns=['calories', 'protein', 'fat', 'sodium', 'fiber', 'carbo', 'sugars', 'potass', 'vitamins', 'cups'])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 84,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "nums_scaled = preprocessing.scale(norm_df)\n",
+ "scaled_df = pd.DataFrame(nums_scaled, columns=['calories', 'protein', 'fat', 'sodium', 'fiber', 'carbo', 'sugars', 'potass', 'vitamins', 'shelf', 'weight', 'cups'])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 85,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " calories | \n",
+ " protein | \n",
+ " fat | \n",
+ " sodium | \n",
+ " fiber | \n",
+ " carbo | \n",
+ " sugars | \n",
+ " potass | \n",
+ " vitamins | \n",
+ " shelf | \n",
+ " weight | \n",
+ " cups | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1.147341 | \n",
+ " 3.423808 | \n",
+ " 0.970720 | \n",
+ " 1.323849 | \n",
+ " 4.729379 | \n",
+ " -0.527990 | \n",
+ " 1.516802 | \n",
+ " 4.197513 | \n",
+ " 1.459918 | \n",
+ " 2.762739 | \n",
+ " 2.675558 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " -0.392323 | \n",
+ " -0.248748 | \n",
+ " 2.175351 | \n",
+ " -1.452691 | \n",
+ " -0.282811 | \n",
+ " -1.423470 | \n",
+ " -0.232761 | \n",
+ " -0.101023 | \n",
+ " -1.394093 | \n",
+ " -0.070810 | \n",
+ " -0.666052 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 1.147341 | \n",
+ " 3.423808 | \n",
+ " 0.970720 | \n",
+ " 4.210295 | \n",
+ " 4.192742 | \n",
+ " 0.230891 | \n",
+ " 0.996099 | \n",
+ " 4.927781 | \n",
+ " 1.459918 | \n",
+ " 2.762739 | \n",
+ " 2.675558 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " -0.726592 | \n",
+ " 1.764447 | \n",
+ " -0.882558 | \n",
+ " 0.488999 | \n",
+ " 4.321535 | \n",
+ " -0.421747 | \n",
+ " -1.607417 | \n",
+ " 3.061948 | \n",
+ " 0.489555 | \n",
+ " 1.324819 | \n",
+ " 0.979816 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 0.053369 | \n",
+ " -0.382961 | \n",
+ " 0.748327 | \n",
+ " 0.391304 | \n",
+ " -0.400871 | \n",
+ " -0.087839 | \n",
+ " 0.225458 | \n",
+ " -0.142570 | \n",
+ " -0.138328 | \n",
+ " 0.394400 | \n",
+ " -0.117430 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " calories protein fat sodium fiber carbo sugars \\\n",
+ "0 1.147341 3.423808 0.970720 1.323849 4.729379 -0.527990 1.516802 \n",
+ "1 -0.392323 -0.248748 2.175351 -1.452691 -0.282811 -1.423470 -0.232761 \n",
+ "2 1.147341 3.423808 0.970720 4.210295 4.192742 0.230891 0.996099 \n",
+ "3 -0.726592 1.764447 -0.882558 0.488999 4.321535 -0.421747 -1.607417 \n",
+ "4 0.053369 -0.382961 0.748327 0.391304 -0.400871 -0.087839 0.225458 \n",
+ "\n",
+ " potass vitamins shelf weight cups \n",
+ "0 4.197513 1.459918 2.762739 2.675558 0 \n",
+ "1 -0.101023 -1.394093 -0.070810 -0.666052 0 \n",
+ "2 4.927781 1.459918 2.762739 2.675558 0 \n",
+ "3 3.061948 0.489555 1.324819 0.979816 0 \n",
+ "4 -0.142570 -0.138328 0.394400 -0.117430 0 "
+ ]
+ },
+ "execution_count": 85,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "scaled_df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 86,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "other_cols = cereals[['name', 'mfr', 'type']]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 87,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "new_df = pd.merge(other_cols, scaled_df, left_index=True, right_index=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 122,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " name | \n",
+ " mfr | \n",
+ " type | \n",
+ " calories | \n",
+ " protein | \n",
+ " fat | \n",
+ " sodium | \n",
+ " fiber | \n",
+ " carbo | \n",
+ " sugars | \n",
+ " potass | \n",
+ " vitamins | \n",
+ " shelf | \n",
+ " weight | \n",
+ " cups | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 57 | \n",
+ " Quaker_Oatmeal | \n",
+ " Q | \n",
+ " H | \n",
+ " 0.096608 | \n",
+ " 1.548104 | \n",
+ " 0.943059 | \n",
+ " -1.562598 | \n",
+ " 0.076656 | \n",
+ " 0.302896 | \n",
+ " 0.167859 | \n",
+ " 0.074773 | \n",
+ " -1.394093 | \n",
+ " -0.772096 | \n",
+ " 0.144599 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 58 | \n",
+ " Raisin_Bran | \n",
+ " K | \n",
+ " C | \n",
+ " 0.276215 | \n",
+ " 0.153891 | \n",
+ " -0.067116 | \n",
+ " 0.488999 | \n",
+ " 0.543610 | \n",
+ " -0.087839 | \n",
+ " 1.141896 | \n",
+ " 1.013548 | \n",
+ " -0.138328 | \n",
+ " -0.225880 | \n",
+ " 0.606752 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 59 | \n",
+ " Raisin_Nut_Bran | \n",
+ " G | \n",
+ " C | \n",
+ " 0.944754 | \n",
+ " 0.959169 | \n",
+ " 1.563769 | \n",
+ " 0.488999 | \n",
+ " 0.248460 | \n",
+ " 0.204330 | \n",
+ " 1.141896 | \n",
+ " 0.772559 | \n",
+ " 0.489555 | \n",
+ " 1.324819 | \n",
+ " 0.979816 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 60 | \n",
+ " Raisin_Squares | \n",
+ " K | \n",
+ " C | \n",
+ " 0.610485 | \n",
+ " 0.153891 | \n",
+ " -0.882558 | \n",
+ " -1.562598 | \n",
+ " 0.071370 | \n",
+ " 1.331269 | \n",
+ " 0.454568 | \n",
+ " 0.411077 | \n",
+ " 0.489555 | \n",
+ " 1.324819 | \n",
+ " 0.979816 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 61 | \n",
+ " Rice_Chex | \n",
+ " R | \n",
+ " C | \n",
+ " -0.770964 | \n",
+ " -1.100347 | \n",
+ " -0.882558 | \n",
+ " -0.006392 | \n",
+ " -0.636991 | \n",
+ " 0.123439 | \n",
+ " -1.303290 | \n",
+ " -0.754410 | \n",
+ " -0.560621 | \n",
+ " -1.054748 | \n",
+ " -0.855400 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 62 | \n",
+ " Rice_Krispies | \n",
+ " K | \n",
+ " C | \n",
+ " -0.559457 | \n",
+ " -0.651387 | \n",
+ " -0.882558 | \n",
+ " 0.562271 | \n",
+ " -0.636991 | \n",
+ " 0.329546 | \n",
+ " -1.091921 | \n",
+ " -0.703494 | \n",
+ " -0.452269 | \n",
+ " -1.001229 | \n",
+ " -0.666052 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 63 | \n",
+ " Shredded_Wheat | \n",
+ " N | \n",
+ " C | \n",
+ " -1.060861 | \n",
+ " -0.651387 | \n",
+ " -0.882558 | \n",
+ " -1.562598 | \n",
+ " -0.105721 | \n",
+ " -0.421747 | \n",
+ " -1.607417 | \n",
+ " -0.342011 | \n",
+ " -1.394093 | \n",
+ " -1.001229 | \n",
+ " -0.945850 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 64 | \n",
+ " Shredded_Wheat_'n'Bran | \n",
+ " N | \n",
+ " C | \n",
+ " -0.152846 | \n",
+ " 0.346196 | \n",
+ " -0.882558 | \n",
+ " -1.562598 | \n",
+ " 0.420264 | \n",
+ " 1.125692 | \n",
+ " -1.607417 | \n",
+ " 0.344536 | \n",
+ " -1.394093 | \n",
+ " -0.772096 | \n",
+ " 0.144599 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 65 | \n",
+ " Shredded_Wheat_spoon_size | \n",
+ " N | \n",
+ " C | \n",
+ " -0.152846 | \n",
+ " 0.346196 | \n",
+ " -0.882558 | \n",
+ " -1.562598 | \n",
+ " 0.155950 | \n",
+ " 1.312580 | \n",
+ " -1.607417 | \n",
+ " 0.164694 | \n",
+ " -1.394093 | \n",
+ " -0.772096 | \n",
+ " 0.144599 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 66 | \n",
+ " Smacks | \n",
+ " K | \n",
+ " C | \n",
+ " 0.053369 | \n",
+ " -0.382961 | \n",
+ " -0.067116 | \n",
+ " -0.878732 | \n",
+ " -0.400871 | \n",
+ " -0.922609 | \n",
+ " 1.829224 | \n",
+ " -0.593041 | \n",
+ " -0.138328 | \n",
+ " -0.225880 | \n",
+ " -0.117430 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 67 | \n",
+ " Special_K | \n",
+ " K | \n",
+ " C | \n",
+ " -0.559457 | \n",
+ " 0.959169 | \n",
+ " -0.882558 | \n",
+ " 0.122643 | \n",
+ " -0.459901 | \n",
+ " -0.421747 | \n",
+ " -1.091921 | \n",
+ " -0.582999 | \n",
+ " -0.452269 | \n",
+ " -1.001229 | \n",
+ " -0.666052 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 68 | \n",
+ " Strawberry_Fruit_Wheats | \n",
+ " N | \n",
+ " C | \n",
+ " -0.893727 | \n",
+ " -0.651387 | \n",
+ " -0.882558 | \n",
+ " -1.452691 | \n",
+ " -0.105721 | \n",
+ " -0.546962 | \n",
+ " -0.748257 | \n",
+ " -0.372135 | \n",
+ " -0.452269 | \n",
+ " -0.536019 | \n",
+ " -0.666052 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 69 | \n",
+ " Total_Corn_Flakes | \n",
+ " G | \n",
+ " C | \n",
+ " -0.559457 | \n",
+ " -0.651387 | \n",
+ " -0.270976 | \n",
+ " -0.097171 | \n",
+ " -0.636991 | \n",
+ " 0.204330 | \n",
+ " -1.091921 | \n",
+ " -0.703494 | \n",
+ " 2.373202 | \n",
+ " -0.070810 | \n",
+ " -0.666052 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 70 | \n",
+ " Total_Raisin_Bran | \n",
+ " G | \n",
+ " C | \n",
+ " -0.058054 | \n",
+ " -0.248748 | \n",
+ " -0.270976 | \n",
+ " -0.170442 | \n",
+ " 0.071370 | \n",
+ " -0.546962 | \n",
+ " 0.798232 | \n",
+ " 0.471324 | \n",
+ " 2.373202 | \n",
+ " -0.070810 | \n",
+ " 0.156882 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 71 | \n",
+ " Total_Whole_Grain | \n",
+ " G | \n",
+ " C | \n",
+ " -0.726592 | \n",
+ " -0.248748 | \n",
+ " -0.270976 | \n",
+ " -0.097171 | \n",
+ " -0.105721 | \n",
+ " -0.421747 | \n",
+ " -1.091921 | \n",
+ " -0.251641 | \n",
+ " 2.373202 | \n",
+ " -0.070810 | \n",
+ " -0.666052 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 72 | \n",
+ " Triples | \n",
+ " G | \n",
+ " C | \n",
+ " 0.053369 | \n",
+ " -0.382961 | \n",
+ " -0.067116 | \n",
+ " 0.879780 | \n",
+ " -0.636991 | \n",
+ " 1.080838 | \n",
+ " -0.920089 | \n",
+ " -0.432382 | \n",
+ " -0.138328 | \n",
+ " 0.394400 | \n",
+ " -0.117430 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 73 | \n",
+ " Trix | \n",
+ " G | \n",
+ " C | \n",
+ " -0.559457 | \n",
+ " -1.054026 | \n",
+ " -0.270976 | \n",
+ " -0.536799 | \n",
+ " -0.636991 | \n",
+ " -0.797393 | \n",
+ " 0.454568 | \n",
+ " -0.763741 | \n",
+ " -0.452269 | \n",
+ " -0.536019 | \n",
+ " -0.666052 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 74 | \n",
+ " Wheat_Chex | \n",
+ " R | \n",
+ " C | \n",
+ " 0.096608 | \n",
+ " 0.346196 | \n",
+ " 0.030251 | \n",
+ " 0.952687 | \n",
+ " 0.155950 | \n",
+ " 0.751914 | \n",
+ " -0.838020 | \n",
+ " 0.119733 | \n",
+ " 0.011614 | \n",
+ " -0.772096 | \n",
+ " 0.144599 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 75 | \n",
+ " Wheaties | \n",
+ " G | \n",
+ " C | \n",
+ " -0.726592 | \n",
+ " -0.248748 | \n",
+ " -0.270976 | \n",
+ " -0.097171 | \n",
+ " -0.105721 | \n",
+ " -0.296531 | \n",
+ " -1.091921 | \n",
+ " -0.251641 | \n",
+ " -0.452269 | \n",
+ " -1.001229 | \n",
+ " -0.666052 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 76 | \n",
+ " Wheaties_Honey_Gold | \n",
+ " G | \n",
+ " C | \n",
+ " 0.053369 | \n",
+ " -0.382961 | \n",
+ " -0.067116 | \n",
+ " 0.391304 | \n",
+ " -0.400871 | \n",
+ " 0.246069 | \n",
+ " 0.225458 | \n",
+ " -0.432382 | \n",
+ " -0.138328 | \n",
+ " -0.846159 | \n",
+ " -0.117430 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " name mfr type calories protein fat \\\n",
+ "57 Quaker_Oatmeal Q H 0.096608 1.548104 0.943059 \n",
+ "58 Raisin_Bran K C 0.276215 0.153891 -0.067116 \n",
+ "59 Raisin_Nut_Bran G C 0.944754 0.959169 1.563769 \n",
+ "60 Raisin_Squares K C 0.610485 0.153891 -0.882558 \n",
+ "61 Rice_Chex R C -0.770964 -1.100347 -0.882558 \n",
+ "62 Rice_Krispies K C -0.559457 -0.651387 -0.882558 \n",
+ "63 Shredded_Wheat N C -1.060861 -0.651387 -0.882558 \n",
+ "64 Shredded_Wheat_'n'Bran N C -0.152846 0.346196 -0.882558 \n",
+ "65 Shredded_Wheat_spoon_size N C -0.152846 0.346196 -0.882558 \n",
+ "66 Smacks K C 0.053369 -0.382961 -0.067116 \n",
+ "67 Special_K K C -0.559457 0.959169 -0.882558 \n",
+ "68 Strawberry_Fruit_Wheats N C -0.893727 -0.651387 -0.882558 \n",
+ "69 Total_Corn_Flakes G C -0.559457 -0.651387 -0.270976 \n",
+ "70 Total_Raisin_Bran G C -0.058054 -0.248748 -0.270976 \n",
+ "71 Total_Whole_Grain G C -0.726592 -0.248748 -0.270976 \n",
+ "72 Triples G C 0.053369 -0.382961 -0.067116 \n",
+ "73 Trix G C -0.559457 -1.054026 -0.270976 \n",
+ "74 Wheat_Chex R C 0.096608 0.346196 0.030251 \n",
+ "75 Wheaties G C -0.726592 -0.248748 -0.270976 \n",
+ "76 Wheaties_Honey_Gold G C 0.053369 -0.382961 -0.067116 \n",
+ "\n",
+ " sodium fiber carbo sugars potass vitamins shelf \\\n",
+ "57 -1.562598 0.076656 0.302896 0.167859 0.074773 -1.394093 -0.772096 \n",
+ "58 0.488999 0.543610 -0.087839 1.141896 1.013548 -0.138328 -0.225880 \n",
+ "59 0.488999 0.248460 0.204330 1.141896 0.772559 0.489555 1.324819 \n",
+ "60 -1.562598 0.071370 1.331269 0.454568 0.411077 0.489555 1.324819 \n",
+ "61 -0.006392 -0.636991 0.123439 -1.303290 -0.754410 -0.560621 -1.054748 \n",
+ "62 0.562271 -0.636991 0.329546 -1.091921 -0.703494 -0.452269 -1.001229 \n",
+ "63 -1.562598 -0.105721 -0.421747 -1.607417 -0.342011 -1.394093 -1.001229 \n",
+ "64 -1.562598 0.420264 1.125692 -1.607417 0.344536 -1.394093 -0.772096 \n",
+ "65 -1.562598 0.155950 1.312580 -1.607417 0.164694 -1.394093 -0.772096 \n",
+ "66 -0.878732 -0.400871 -0.922609 1.829224 -0.593041 -0.138328 -0.225880 \n",
+ "67 0.122643 -0.459901 -0.421747 -1.091921 -0.582999 -0.452269 -1.001229 \n",
+ "68 -1.452691 -0.105721 -0.546962 -0.748257 -0.372135 -0.452269 -0.536019 \n",
+ "69 -0.097171 -0.636991 0.204330 -1.091921 -0.703494 2.373202 -0.070810 \n",
+ "70 -0.170442 0.071370 -0.546962 0.798232 0.471324 2.373202 -0.070810 \n",
+ "71 -0.097171 -0.105721 -0.421747 -1.091921 -0.251641 2.373202 -0.070810 \n",
+ "72 0.879780 -0.636991 1.080838 -0.920089 -0.432382 -0.138328 0.394400 \n",
+ "73 -0.536799 -0.636991 -0.797393 0.454568 -0.763741 -0.452269 -0.536019 \n",
+ "74 0.952687 0.155950 0.751914 -0.838020 0.119733 0.011614 -0.772096 \n",
+ "75 -0.097171 -0.105721 -0.296531 -1.091921 -0.251641 -0.452269 -1.001229 \n",
+ "76 0.391304 -0.400871 0.246069 0.225458 -0.432382 -0.138328 -0.846159 \n",
+ "\n",
+ " weight cups \n",
+ "57 0.144599 0 \n",
+ "58 0.606752 0 \n",
+ "59 0.979816 0 \n",
+ "60 0.979816 0 \n",
+ "61 -0.855400 0 \n",
+ "62 -0.666052 0 \n",
+ "63 -0.945850 0 \n",
+ "64 0.144599 0 \n",
+ "65 0.144599 0 \n",
+ "66 -0.117430 0 \n",
+ "67 -0.666052 0 \n",
+ "68 -0.666052 0 \n",
+ "69 -0.666052 0 \n",
+ "70 0.156882 0 \n",
+ "71 -0.666052 0 \n",
+ "72 -0.117430 0 \n",
+ "73 -0.666052 0 \n",
+ "74 0.144599 0 \n",
+ "75 -0.666052 0 \n",
+ "76 -0.117430 0 "
+ ]
+ },
+ "execution_count": 122,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "new_df.tail(20)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "###Strongest Correlation"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 90,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " calories | \n",
+ " protein | \n",
+ " fat | \n",
+ " sodium | \n",
+ " fiber | \n",
+ " carbo | \n",
+ " sugars | \n",
+ " potass | \n",
+ " vitamins | \n",
+ " shelf | \n",
+ " weight | \n",
+ " cups | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " calories | \n",
+ " 1.000000 | \n",
+ " 0.695173 | \n",
+ " 0.571156 | \n",
+ " 0.557644 | \n",
+ " 0.397544 | \n",
+ " 0.779935 | \n",
+ " 0.550719 | \n",
+ " 0.489555 | \n",
+ " 0.460674 | \n",
+ " 0.798645 | \n",
+ " 0.917061 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " protein | \n",
+ " 0.695173 | \n",
+ " 1.000000 | \n",
+ " 0.408003 | \n",
+ " 0.499966 | \n",
+ " 0.792964 | \n",
+ " 0.543850 | \n",
+ " 0.216226 | \n",
+ " 0.830069 | \n",
+ " 0.377264 | \n",
+ " 0.783356 | \n",
+ " 0.839599 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " fat | \n",
+ " 0.571156 | \n",
+ " 0.408003 | \n",
+ " 1.000000 | \n",
+ " 0.207203 | \n",
+ " 0.209571 | \n",
+ " 0.140466 | \n",
+ " 0.436592 | \n",
+ " 0.319417 | \n",
+ " 0.180092 | \n",
+ " 0.450872 | \n",
+ " 0.456609 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " sodium | \n",
+ " 0.557644 | \n",
+ " 0.499966 | \n",
+ " 0.207203 | \n",
+ " 1.000000 | \n",
+ " 0.473645 | \n",
+ " 0.446843 | \n",
+ " 0.313413 | \n",
+ " 0.526907 | \n",
+ " 0.491255 | \n",
+ " 0.543823 | \n",
+ " 0.628566 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " fiber | \n",
+ " 0.397544 | \n",
+ " 0.792964 | \n",
+ " 0.209571 | \n",
+ " 0.473645 | \n",
+ " 1.000000 | \n",
+ " 0.217233 | \n",
+ " 0.175416 | \n",
+ " 0.961150 | \n",
+ " 0.317400 | \n",
+ " 0.670541 | \n",
+ " 0.690713 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " carbo | \n",
+ " 0.779935 | \n",
+ " 0.543850 | \n",
+ " 0.140466 | \n",
+ " 0.446843 | \n",
+ " 0.217233 | \n",
+ " 1.000000 | \n",
+ " 0.018465 | \n",
+ " 0.253442 | \n",
+ " 0.370145 | \n",
+ " 0.595333 | \n",
+ " 0.705771 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " sugars | \n",
+ " 0.550719 | \n",
+ " 0.216226 | \n",
+ " 0.436592 | \n",
+ " 0.313413 | \n",
+ " 0.175416 | \n",
+ " 0.018465 | \n",
+ " 1.000000 | \n",
+ " 0.297057 | \n",
+ " 0.249821 | \n",
+ " 0.382400 | \n",
+ " 0.512461 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " potass | \n",
+ " 0.489555 | \n",
+ " 0.830069 | \n",
+ " 0.319417 | \n",
+ " 0.526907 | \n",
+ " 0.961150 | \n",
+ " 0.253442 | \n",
+ " 0.297057 | \n",
+ " 1.000000 | \n",
+ " 0.338606 | \n",
+ " 0.727946 | \n",
+ " 0.759611 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " vitamins | \n",
+ " 0.460674 | \n",
+ " 0.377264 | \n",
+ " 0.180092 | \n",
+ " 0.491255 | \n",
+ " 0.317400 | \n",
+ " 0.370145 | \n",
+ " 0.249821 | \n",
+ " 0.338606 | \n",
+ " 1.000000 | \n",
+ " 0.537410 | \n",
+ " 0.488464 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " shelf | \n",
+ " 0.798645 | \n",
+ " 0.783356 | \n",
+ " 0.450872 | \n",
+ " 0.543823 | \n",
+ " 0.670541 | \n",
+ " 0.595333 | \n",
+ " 0.382400 | \n",
+ " 0.727946 | \n",
+ " 0.537410 | \n",
+ " 1.000000 | \n",
+ " 0.874568 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " weight | \n",
+ " 0.917061 | \n",
+ " 0.839599 | \n",
+ " 0.456609 | \n",
+ " 0.628566 | \n",
+ " 0.690713 | \n",
+ " 0.705771 | \n",
+ " 0.512461 | \n",
+ " 0.759611 | \n",
+ " 0.488464 | \n",
+ " 0.874568 | \n",
+ " 1.000000 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " cups | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " calories protein fat sodium fiber carbo \\\n",
+ "calories 1.000000 0.695173 0.571156 0.557644 0.397544 0.779935 \n",
+ "protein 0.695173 1.000000 0.408003 0.499966 0.792964 0.543850 \n",
+ "fat 0.571156 0.408003 1.000000 0.207203 0.209571 0.140466 \n",
+ "sodium 0.557644 0.499966 0.207203 1.000000 0.473645 0.446843 \n",
+ "fiber 0.397544 0.792964 0.209571 0.473645 1.000000 0.217233 \n",
+ "carbo 0.779935 0.543850 0.140466 0.446843 0.217233 1.000000 \n",
+ "sugars 0.550719 0.216226 0.436592 0.313413 0.175416 0.018465 \n",
+ "potass 0.489555 0.830069 0.319417 0.526907 0.961150 0.253442 \n",
+ "vitamins 0.460674 0.377264 0.180092 0.491255 0.317400 0.370145 \n",
+ "shelf 0.798645 0.783356 0.450872 0.543823 0.670541 0.595333 \n",
+ "weight 0.917061 0.839599 0.456609 0.628566 0.690713 0.705771 \n",
+ "cups NaN NaN NaN NaN NaN NaN \n",
+ "\n",
+ " sugars potass vitamins shelf weight cups \n",
+ "calories 0.550719 0.489555 0.460674 0.798645 0.917061 NaN \n",
+ "protein 0.216226 0.830069 0.377264 0.783356 0.839599 NaN \n",
+ "fat 0.436592 0.319417 0.180092 0.450872 0.456609 NaN \n",
+ "sodium 0.313413 0.526907 0.491255 0.543823 0.628566 NaN \n",
+ "fiber 0.175416 0.961150 0.317400 0.670541 0.690713 NaN \n",
+ "carbo 0.018465 0.253442 0.370145 0.595333 0.705771 NaN \n",
+ "sugars 1.000000 0.297057 0.249821 0.382400 0.512461 NaN \n",
+ "potass 0.297057 1.000000 0.338606 0.727946 0.759611 NaN \n",
+ "vitamins 0.249821 0.338606 1.000000 0.537410 0.488464 NaN \n",
+ "shelf 0.382400 0.727946 0.537410 1.000000 0.874568 NaN \n",
+ "weight 0.512461 0.759611 0.488464 0.874568 1.000000 NaN \n",
+ "cups NaN NaN NaN NaN NaN NaN "
+ ]
+ },
+ "execution_count": 90,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "new_df.corr()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 91,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "pot_and_fib = new_df[['potass', 'fiber']]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 92,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 92,
+ "metadata": {},
+ "output_type": "execute_result"
+ },
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAEPCAYAAACqZsSmAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAHEFJREFUeJzt3X+cXXV95/HXm/xiAh0gY1tUWAdRVuWBhiAsu/gjtoRg\nK7pZla6P2hLaB9tq6SAJ7FrEh7pV+mhtAk1d14pIYMu2urisoLJJiuBC7QP5FWBJWGXruKBFNEMY\ngTBJyGf/OGecO8nNzLkz9873fu95Px+P+8j9nnvm3vdM8vjkzOd8z/coIjAzs3o5JHUAMzObey7+\nZmY15OJvZlZDLv5mZjXk4m9mVkMu/mZmNZS0+Es6UtKNkrZL2ibp9JR5zMzqYn7iz/8L4BsR8R5J\n84HDEucxM6sFpbrIS9IRwAMR8cokAczMaixl2+c44CeSrpV0v6SrJS1OmMfMrDZSFv/5wDLgsxGx\nDHgO+HDCPGZmtZGy5/8E8ERE3FOOb2S/4i/JCw+Zmc1ARGiq15MV/4h4UtLjkk6IiO8CZwKPNNlv\nym+gm0n6eER8PHWOmco5f87ZwflT64H80x44p57t84fADZIWAv8XOD9xnnYbTB1glgZTB5iFwdQB\nZmkwdYBZGkwdYJYGUwfotKTFPyIeBE5NmcHMrI58hW9nbUwdYJY2pg4wCxtTB5iljakDzNLG1AFm\naWPqAJ2WbJ5/FZIi556/mVkKVWqnj/w7SNLy1BlmI+f8OWcH508t9/xVuPibmdWQ2z5mZj3GbR8z\nM2vKxb+Dcu8b5pw/5+zg/Knlnr8KF38zsxpyz9/MrMe4529mZk25+HdQ7n3DnPPnnB2cP7Xc81fh\n4m9mVkPu+ZuZ9Rj3/M3MrCkX/w7KvW+Yc/6cs4Pzp5Z7/ipc/M3Masg9fzOzHuOev5mZNeXi30G5\n9w1zzp9zdnD+1HLPX0XSe/hKGgZGgReBPRFxWso8ZmZ1kbTnL+n7wCkRMXKQ193zNzNrUS49fxd3\nM7M5lrr4B/B3ku6VdEHiLG2Xe98w5/w5ZwfnTy33/FWkLv5nRMTJwNuBP5D05sR5zKzDJK2UBjYX\nD61Mnaeuumaev6SPAc9GxLqGbQFcBwyXm3YCWyPijvL15QAee+xxNuNTof8TsKEPtgN/OQbPvysi\nNnVJvizH5fPVFIaBj03X809W/CUtBuZFxM8kHQZsBj4REZsb9vEJX7MeIg1shvUr4Lxyy3XAmi0R\nO85KmavXdPsJ318G7pS0Fbgb+Fpj4e8FufcNc86fc3Zw/tRyz19Fsnn+EfF9YGmqzzezFEbWwdCb\ngL5iPLQLRtdN+SXWEV3T82/GbR+z3lOc5F2ythiNrIuITWkT9Z4qtdPF38ysx3R7z7/n5d43zDl/\nztnB+VPLPX8VLv5mZjXkto+ZWY9x28fMzJpy8e+g3PuGOefPOTs4f2q556/Cxd/MrIbc8zcz6zHu\n+ZuZWVMu/h2Ue98w5/w5ZwfnTy33/FW4+JuZ1ZB7/mZmPcY9fzMza8rFv4Ny7xvmnD/n7OD8qeWe\nvwoXfzPrSfK9gqfknr+Z9Zyi2PffVNwrGMqbxqyqy70DqtTOZHfyMjPrnCVrYX3fxL2C6YM1a4Fa\nFP8q3PbpoNz7hjnnzzk7OH9queevIvmRv6R5wL3AExFxTuo8ZtYLfK/g6STv+UtaA5wC/EJEvHO/\n19zzN7MZqfO9grv+Hr6SjgE2Ap8C1ux/5O/ib2bWuhwu8roSuBTYlzhHR+TeN8w5f87ZwflTyz1/\nFcl6/pLeATwVEQ9M9YOWtBEYLoc7ga0RcUf52nKAbh0DSyV1TZ665ffY47qMy+erKQxTQbK2j6Qr\ngN8C9gKHAv3AVyLitxv2cdvHzKxFXd/z/3kI6a3AJe75m5nNXg49/0bp/xdqs9z7hjnnzzk7OH9q\nueevIvk8f4CI+BbwrdQ5zMzqoivaPgfjto+ZWetya/uYmdkccfHvoNz7hjnnzzk7OH9queevwsXf\nzKyG3PM3M+sx7vmbmVlTLv4dlHvfMOf8OWcH508t9/xVuPibmdWQe/5mZj3GPX8zM2vKxb+Dcu8b\n5pw/5+zg/Knlnr8KF38zsxpyz9/MrMe4529mZk25+HdQ7n3DnPPnnB2cP7Xc81fh4m9mVkPu+ZuZ\n9Rj3/M3MrCkX/w7KvW+Yc/6cs4Pzp5Z7/iqSFX9Jh0q6W9JWSdsk/UmqLGaWnqSV0sDm4qGVqfP0\nuqQ9f0mLI+J5SfOBu4BLIuKuhtfd8zergaLY998EG/qKLUO7YHRVRGxKmyxPVWrn/LkK00xEPF8+\nXQjMA0YSxjGzZJashfV9cN74hj5YsxZw8e+QpD1/SYdI2gr8GLg9IralzNNuufcNc86fc3Zw/tRy\nz19F6iP/fcBSSUcAmyQtj4g7GveRtBEYLoc7ga3j+4z/BXXruPzeuiZP3fJ7nNN4ZB188C2wfRG8\nlrLts6WxJnRX3u4al89XUximgq6Z5y/po8CuiPjzhm3u+ZvVRNH3X7K2GI2sc79/5qrUzmTFX9JL\ngL0RsVNSH0Vv7xMRcVvDPi7+ZmYt6vaLvF4KfLPs+d8N3NJY+HtB7n3DnPPnnB2cP7Xc81eRrOcf\nEQ8Dy1J9vplZnXVNz78Zt33MzFrX7W0fMzNLxMW/g3LvG+acP+fs4Pyp5Z6/Chd/M7Macs/fzKzH\nuOdvZmZNufh3UO59w5zz55wdnD+13PNX4eJvZlZDLfX8JS0BjomIhzoXadLnuedvZtaitvT8JX1L\nUn9Z+O8DviDpynaFNDOzuVel7XNERIwC/wa4PiJOA87sbKzekHvfMOf8OWcH508t9/xVVCn+8yS9\nFDgX+Hq5rXvnh5qZ2bSm7flLei/wUeDvI+IDko4H/iwi3t3xcO75m5m1rKvX86/Cxd/MrHXtOuH7\nZ+UJ3wWSbpP0U0m/1b6YvSv3vmHO+XPODs6fWu75q6jS819ZnvB9B8W9IY8HLu1kKDMz66wqPf9H\nIuJESdcAN0bErZIejIg3dDyc2z5mZi2rUjur3MnrFkmPAi8AH5D0S+VzMzPL1LRtn4j4MHAGcEpE\n7AaeA97V6WC9IPe+Yc75c84Ozp9a7vmrqHoP35cBvyqpj4k5/tfP5oMlHVu+xy+V7/n5iNgwm/c0\nM7NqqvT8Pw68FTiR4iKvtwN3RcR7ZvXB0tHA0RGxVdLhFEtH/OuI2N6wj3v+ZmYtatd6/u+hWM7h\nnyLifOANwJGzDRcRT0bE1vL5s8B2it8wzMysw6oU/10R8SKwV9IRwFPAse0MIWkQOBm4u53vm1ru\nfcOc8+ecHZw/tdzzV1Gl53+vpKOAq4F7KU74frtdAcqWz43AReVvAPu/vpHi+gKAncDWiLijfG05\nQLeOgaWSuiZP3fJ77HFdxuXz1RSGqaDV9fyPA/oj4sHKXzT1+y0AvgbcGhFXNXndPX8zsxa1pecv\n6bbx5xHx/Yh4sHHbLMIJuAbY1qzwm5lZ5xy0+EvqkzQA/KKkJQ2PQeDlbfjsM4D3A2+T9ED5OLsN\n79s1cu8b5pw/5+zg/Knlnr+KqXr+vwdcRDED576G7T8DPjPbD46Iu/A9hM3Mkqgyz38o1cVX7vmb\nmbWuSu2sUvwXAh8A3kJxJe63gM9FxJ52BZ3is138zcxa1K6LvP4zsAz4T+XzU8o/bRq59w1zzp9z\ndnD+1HLPX0WVef6nRsTrG8a3SXqoU4HMzKzzqrR97gfOjYjHyvHxwH+LiGUdD+e2j5lZy6rUzipH\n/pcC35T0j4CAQeD82cczM7NUqvT8vw18HtgH7AD+ijYu79DLcu8b5pw/5+zg/Knlnr+KKsX/euA4\n4I8p5ve/EvgvnQxlZmadVaXnvy0iXjfdtk5wz3/uSFoJS9YWo5F1EbEpbSIzm6l29fzvl/QvI+If\nyjc9nclX/FrmisLffxOs7yu2DL1J0ir/B2DWu6q0fd4I/L2kH0gapuj3v1HSw57yObV8+oZL1sKG\nPjiP4rGhD5aszSf/gXLODs6fWu75q6hy5N9Ti62ZmVmL6/nPNff858ZE22fDeNtnF4y67WOWqbas\n7ZOSi//cmasTvj6xbNZ5Lv6JSVrecEvE7LQ7/1z+huGffVrOn1a7ZvuYtcmStcWMovPGN/TBmrWA\nj/7N5phvptJBOR85QN75c84Ozp9a7vmr8JG/zaGRdTD0JqCx7bMuaSSzmvKRfwflPle43fmL3v7o\nKlizpXh0bkaRf/ZpOX/3S3rkL+mLwK8DT0XESSmz2Nwoi717/GaJJZ3tI+nNwLPA9c2Kf+6zfTqt\nlWmTnmJpVh9ZTPWUNAjc4uLfmlamTfoiLrN6adc9fG2GOts3bL4ez+z3nZBz3zPn7OD8qeWev4qu\nn+0jaSMwXA53AlvHp2GN/wV16xhYKqmD778duANYXn7c7qPKfRYVxX33UfDsl2FJ+fp4rEKT978U\nDj8XFj5dzMzhtZ3M77HHHrdnXD5fTWGYCtz2ydTBWjnF8wO2fxL6L5+q7ePWkFnv8BW+PSwiNkla\nVV4hC4yuK7YNbG5yFe1yGDlg38nv6Ktvzeokac9f0t9Q3B/gBEmPS+qpG8NX7RtKWikNbC4eWln1\n/SNiU8SOs4pHYzF/GHh3+Xh4mn2nsvuoqlm6Te49W+dPK/f8VSQ98o+I96X8/G6gtt9Fa+SHcDWw\noRwPAaM/rPB1Ta6+ffbLM8tgZt0uec9/KnXo+ZdtmhUT7ZbrgDVbInacNcP3+ymsH9jv/XZE7HjJ\n9F/rawHMeoF7/tYSX31rVh+e599B1fqGI+uKFst1FI+hXeU0yxkaWV+0en7+fuW21uXc98w5Ozh/\narnnr8JH/okdbNbOLN7vCknAmjXl+62PiCvaENXMeoh7/mZmPcbLO8yBVqdpznRap5lZO7n4z0LD\nNM0VxaP/psaCvn/fcLr9u03Ofc+cs4Pzp5Z7/irc85+V6a+KnTx98rCBiQXWmu9vZjYXXPw7a9F+\nF3DtG7/iNgcNC9RlJ+fs4Pyp5Z6/Chf/WZnunrQH/GZwCHxoH5x0SPP9zczmhnv+sxDT3pP2hdcd\n+FX7nmz1HrapThLn3PfMOTs4f2q556/CR/6zNPVVsfsG4JKG8SXAviNbWbqh/Wv/mJl5nn9HSUt2\nw+8sgO+XW44DvrgnYmRh9fdo79o/Ztb7PM8/ued/UKyw+c7ycTXws594nr+Zpebi31Fjn4exPXA5\nxWPXXlg80No8/5F1MDTWsFbP2OzW/qku575nztnB+VPLPX8V7vl31j0wdg483zDP/3eXwc3lyxf0\nwbUV5vnvBT7X8NzMbHbc859D0uH3Qd8y+PNyyyXArvsjnj3l4F/jnr+ZtcY9/zkg6TJp4KfFQ5dN\nvfcCiiJ+c/k4r9xmZja3XPxnoSj2/Z8q7py1fgD6P9X4H4Ck5Y1z9GF3f3HkPn4C+DpgzzSf0u71\n/qvLue+Zc3Zw/tRyz19F0p6/pLOBq4B5wBci4k9T5qlqYr2eo5bDlTRcwUu5jv74+vmnQv8nJubo\nX7SvaPk07v+hKT+r3ev9m5lBwuIvaR7wGeBM4IfAPZJujojtqTJVMfmiq8un2XvJisnLO3yuyW9a\nh+yY7jNT3V4x5/VNcs4Ozp9a7vmrSHnkfxrwWEQMA0j6W+BdQFcX/8nr9TxBcZvEcUPA6BS3TDyD\nYnE3vLaPmSWVsvi/HHi8YfwE8C8SZZmhjwC3AhcHsBdGb5h8y8SRLZMXfrt6F4x+EtYsL8bd3cKR\ntDzXI6Ccs4Pzp5Z7/ipSFv9Kc0wlbQSGy+FOYOv4X8r4SZkOji+Fw8+FhU+XJ1nHgC0w9BZgEXwD\neAD4rIAF8MH3SdoWEZ8u8+6B0Y/BmhXFcHQLcE/EjivGP6/xH9kcfD8tjYGlkromj8cee9x8XD5f\nTWGYCpLN85d0OvDxiDi7HP8RsK/xpO9czvOffNOV8dk0/TcVN1+BskWzqni+6Bb4xQXwApNP4E7M\nwd///br5CN/MekuV2pnyyP9e4NWSBoEfAb8BvC9FkGYrZ8KL24srcPe/GndsAPoWwCeZuOp2+vfz\nSpxm1k2SFf+I2CvpQopZLPOAa9LN9Gl2O8YLX1UcyTdejTs2AIteMXG0fzTw/ob3GT+BO/5+rwCW\nl++X3+0ac+575pwdnD+13PNXkXSef0TcSnHGtAstUPM5+ft+AAwU45XAP6fhhO+Xinn5A2vnOKyZ\nWUu8tg/jbZrFX4XXLyq2PDQGCx+Bq5bt388vzgf0fxU2LIJbgC3AhnKfIWD0I8B9zc4XuO1jZnOh\nSu2sdfGfOCk7NgDzToIN5UI7Q2Mw+h+h//JmBXzi62I5XLlg8n8QF+8B3QEjd8CS5cV2n/A1s7lT\n64XdNM19bxtOyq6Ak5YVhf88iseGRUXhHv1SUcwv3jPe0oHiittiVU2NHvjJhy0o1+q/HEa2ROw4\nK9fCn/P6JjlnB+dPLff8VfTkev7VZts0nuS9ucm77D4D+hcXa/cADK2W9L39LuIahqGBifEQcDzl\nbwJ9cOG5wKcxM+syPdn2KVbQPH/F5HvnXjtpDfzJ6+RvAs4FXle+ug34ZxQzfCb1/HdE7HjJxHss\n2Q2/sgC2lluWAncCP8br7ptZKjVu+4wNHLh08tjA5H0al0reUm77/fKxCDii4medAzxWPs4B9jHX\nSy+bmbWqJ9s+xQ1Spl46OSYvlbwMNgxM3v9aiiP/cc0WbXv6BhhaPXmf538Eax4pF2wbm+13klLO\nc51zzg7On1ru+avo0eLfbJnk6ZdOnmwMeH4PfOgFOGQ3jK6f3O+HiDhfEnDxbxZbRm+IiPPHX6/D\nSSMzy1OP9vx1L/Sfst/8+/si4o0N+zTM7d9JscLEz/ffBy9uhecuy3WmjpnVV23n+UtHBfwuk0/4\nXkPE05rY5/D7YNEyGO/kXAjM3wuHPAMjBxzlm5nlosYnfAWcBHylfJxUbmu04NVF4R+f2/8Z4DXz\ny3vxXt7s2oCWU2Te9sk5f87ZwflTyz1/FT3a8x97ES6ZNzG+pNzWSAsP/LqXMTFHP7+F2MzMqurR\n4q8xeGbxxGydZ4CF+8282TNv8myeS4C/bmuK3GcL5Jw/5+zg/Knlnr+KHi3+e+cV0z1fVY4fKrft\nbzfFmvzPAs8DT1LO0QdG75iDoGZmSfRoz3/+QljMxEVbi8ttjfbOg73l88OBF4GrKJZ6uICJRdlm\nLve+Yc75c84Ozp9a7vmr6NEj/4VN1uK/eL8zvn3Ar1AszfAz4B3AjeVr13U+oplZQj1a/F+ssO25\nEdgyUMztfxi4momiP35HrtnJvW+Yc/6cs4Pzp5Z7/ip6dJ7/omfg0P7JF3m9MBoxdsTEPgOb4W0r\nJhZlOxrYvgO43+vvm1nOajzP//C7YQXwx+VjRbmt0cg62LILPkrxeHgXjPxmO9ffz71vmHP+nLOD\n86eWe/4qkhR/Se+V9IikFyUta/8n7F/YtxywwmZR4EdXFbdmXLPFt1k0szpJ0vaR9BqKtY//Clgb\nEfcfZL8Z38Zx4laL4DaOmdVJldqZ5IRvRDwKUKyI2bHP2ISv0DUza6pHe/7dIfe+Yc75c84Ozp9a\n7vmr6NiRv6QtFFNo9ndZRNzSwvtsBIbL4U5g6/g0rPG/oG4dA0sldU2euuX32OO6jMvnqykMU0HS\nqZ6SbqdDPX8zs7rKZaqni7uZ2RxLNdVzlaTHgdOBr0u6NUWOTsu9b5hz/pyzg/Onlnv+KlLN9rkJ\nuCnFZ5uZWY8u72BmVme59PzNzGyOufh3UO59w5zz55wdnD+13PNX4eJvZlZD7vmbmfUY9/zNzKwp\nF/8Oyr1vmHP+nLOD86eWe/4qXPzNzGrIPX8zsx7jnr+ZmTXl4t9BufcNc86fc3Zw/tRyz1+Fi7+Z\nWQ25529m1mPc8zczs6Zc/Dso975hzvlzzg7On1ru+atw8TczqyH3/M3Meox7/mZm1lSqe/h+WtJ2\nSQ9K+u+SjkiRo9Ny7xvmnD/n7OD8qeWev4pUR/6bgRMj4g3Ad4E/SpSj05amDjBLOefPOTs4f2q5\n559WkuIfEVsiYl85vBs4JkWOOXBk6gCzlHP+nLOD86eWe/5pdUPP/3eAb6QOYWZWJ/M79caStgBH\nN3npsoi4pdznI8DuiPivncqR2GDqALM0mDrALAymDjBLg6kDzNJg6gCzNJg6QKclm+opaTVwAfCr\nEfHCQfbp3nmoZmZdbLqpnh078p+KpLOBS4G3Hqzww/ThzcxsZpIc+Uv6HrAQGCk3/UNEfHDOg5iZ\n1VRXX+FrZmad0Q2zfaYk6b2SHpH0oqRlqfNUIelsSY9K+p6k/5A6TyskfVHSjyU9nDrLTEg6VtLt\n5b+Z/y1pKHWmVkg6VNLdkrZK2ibpT1JnapWkeZIekHRL6iwzIWlY0kPl9/Cd1HlaIelISTeWF9Fu\nk3T6wfbt+uIPPAysAv5X6iBVSJoHfAY4G3gd8D5Jr02bqiXXUmTP1R7g4og4ETgd+IOcfv7lObC3\nRcRS4PXA2yS9KXGsVl0EbANybSsEsDwiTo6I01KHadFfAN+IiNdS/PvZfrAdu774R8SjEfHd1Dla\ncBrwWEQMR8Qe4G+BdyXOVFlE3Ak8nTrHTEXEkxGxtXz+LMU//pelTdWaiHi+fLoQmMfEubGuJ+kY\n4NeALwA5T9jILnu5TM6bI+KLABGxNyKeOdj+XV/8M/Ry4PGG8RPlNptjkgaBkymuIs+GpEMkbQV+\nDNweEdtSZ2rBlRQz+fZNt2MXC+DvJN0r6YLUYVpwHPATSddKul/S1ZIWH2znrij+krZIerjJ45zU\n2WYg1191e4qkw4EbgYvK3wCyERH7yrbPMcBbcllkTNI7gKci4gEyPHJucEZEnAy8naJt+ObUgSqa\nDywDPhsRy4DngA9PtXNyEbEidYY2+iFwbMP4WIqjf5sjkhYAXwH+OiL+R+o8MxURz0j6OvBG4I7E\ncar4V8A7Jf0acCjQL+n6iPjtxLlaEhH/VP75E0k3UbRy70ybqpIngCci4p5yfCNTFP+uOPJvQQ5H\nE/cCr5Y0KGkh8BvAzYkz1YYkAdcA2yLiqtR5WiXpJZKOLJ/3ASuAB9KmqiYiLouIYyPiOODfAt/M\nrfBLWizpF8rnhwFnUUw66XoR8STwuKQTyk1nAo8cbP+uL/6SVkl6nGLmxtcl3Zo601QiYi9wIbCJ\nYsbDlyLioGfcu42kvwG+DZwg6XFJ56fO1KIzgPdTzJJ5oHzkNHvppcA3y57/3cAtEXFb4kwzlWML\n9JeBOxt+/l+LiM2JM7XiD4EbJD1IMdvnioPt6Iu8zMxqqOuP/M3MrP1c/M3MasjF38yshlz8zcxq\nyMXfzKyGXPzNzGrIxd+sgaShcincEUn/vty2UdK7U2cza6euWN7BrIt8gOK+0j9q2Dbji2EkzS8v\n/DPrKj7yNytJ+hzwSuB/SvqQpL9sePlMSfdI+j+Sfr3cf56kT0v6jqQHJf27cvtySXdK+ipTXF5v\nlpKP/M1KEfH7klYCy4HGFWUFvCIiTpX0KuD28s/zgJ0RcZqkRcBdksaXAjgZODEifjCH34JZZS7+\nZgcSkxcRDODLABHxmKR/BF5DsejXSZLeU+7XD7wK2At8x4XfupmLv9nMjJ8HuDAitjS+UK6//9yc\nJzJrgXv+ZtMT8F4Vjqc4L/AoxcqtH5Q0H0DSCVPdOcmsm/jI32yy2O8xvu3/Ad+haO38XkTslvQF\nYBC4v7yPwFPAqv2+1qwreUlnM7MactvHzKyGXPzNzGrIxd/MrIZc/M3MasjF38yshlz8zcxqyMXf\nzKyGXPzNzGro/wPu6BKyUfa2cAAAAABJRU5ErkJggg==\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "pot_and_fib.plot(kind='scatter', x='fiber', y='potass')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "##Clustering"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 94,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "num_data = new_df[['calories', 'protein', 'fat', 'sodium', 'fiber', 'carbo', 'sugars', 'potass', 'vitamins', 'shelf']]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 136,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "KMeans(copy_x=True, init='k-means++', max_iter=300, n_clusters=3, n_init=10,\n",
+ " n_jobs=1, precompute_distances='auto', random_state=None, tol=0.0001,\n",
+ " verbose=0)"
+ ]
+ },
+ "execution_count": 136,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "kmeans = KMeans(3)\n",
+ "kmeans.fit(num_data)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 137,
+ "metadata": {
+ "collapsed": false,
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([1, 2, 1, 0, 2, 2, 2, 0, 2, 0, 2, 2, 2, 0, 2, 2, 2, 2, 2, 0, 2, 2, 2,\n",
+ " 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 0, 2, 0, 2, 2, 2, 2,\n",
+ " 0, 2, 2, 0, 2, 0, 0, 2, 2, 2, 0, 2, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2,\n",
+ " 2, 0, 2, 2, 2, 2, 2, 2], dtype=int32)"
+ ]
+ },
+ "execution_count": 137,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "clusters = kmeans.predict(num_data)\n",
+ "clusters"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 154,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "cereals = new_df['name']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 155,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "cluster = kmeans.labels_"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 156,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "results = pd.DataFrame(dict(cereals = cereals, cluster = cluster))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 163,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " cereals | \n",
+ " cluster | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 100%_Bran | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 100%_Natural_Bran | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " All-Bran | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " All-Bran_with_Extra_Fiber | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Almond_Delight | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " Apple_Cinnamon_Cheerios | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " Apple_Jacks | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " Basic_4 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " Bran_Chex | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " Bran_Flakes | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " Cap'n'Crunch | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " Cheerios | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " Cinnamon_Toast_Crunch | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " Clusters | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " Cocoa_Puffs | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " Corn_Chex | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " Corn_Flakes | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 17 | \n",
+ " Corn_Pops | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 18 | \n",
+ " Count_Chocula | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 19 | \n",
+ " Cracklin'_Oat_Bran | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 20 | \n",
+ " Cream_of_Wheat_(Quick) | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 21 | \n",
+ " Crispix | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 22 | \n",
+ " Crispy_Wheat_&_Raisins | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 23 | \n",
+ " Double_Chex | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 24 | \n",
+ " Froot_Loops | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 25 | \n",
+ " Frosted_Flakes | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 26 | \n",
+ " Frosted_Mini-Wheats | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 27 | \n",
+ " Fruit_&_Fibre_Dates,_Walnuts,_and_Oats | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 28 | \n",
+ " Fruitful_Bran | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 29 | \n",
+ " Fruity_Pebbles | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 47 | \n",
+ " Multi-Grain_Cheerios | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 48 | \n",
+ " Nut&Honey_Crunch | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 49 | \n",
+ " Nutri-Grain_Almond-Raisin | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 50 | \n",
+ " Nutri-grain_Wheat | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 51 | \n",
+ " Oatmeal_Raisin_Crisp | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 52 | \n",
+ " Post_Nat._Raisin_Bran | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 53 | \n",
+ " Product_19 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 54 | \n",
+ " Puffed_Rice | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 55 | \n",
+ " Puffed_Wheat | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 56 | \n",
+ " Quaker_Oat_Squares | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 57 | \n",
+ " Quaker_Oatmeal | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 58 | \n",
+ " Raisin_Bran | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 59 | \n",
+ " Raisin_Nut_Bran | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 60 | \n",
+ " Raisin_Squares | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 61 | \n",
+ " Rice_Chex | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 62 | \n",
+ " Rice_Krispies | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 63 | \n",
+ " Shredded_Wheat | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 64 | \n",
+ " Shredded_Wheat_'n'Bran | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 65 | \n",
+ " Shredded_Wheat_spoon_size | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 66 | \n",
+ " Smacks | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 67 | \n",
+ " Special_K | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 68 | \n",
+ " Strawberry_Fruit_Wheats | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 69 | \n",
+ " Total_Corn_Flakes | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 70 | \n",
+ " Total_Raisin_Bran | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 71 | \n",
+ " Total_Whole_Grain | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 72 | \n",
+ " Triples | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 73 | \n",
+ " Trix | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 74 | \n",
+ " Wheat_Chex | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 75 | \n",
+ " Wheaties | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 76 | \n",
+ " Wheaties_Honey_Gold | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
77 rows × 2 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " cereals cluster\n",
+ "0 100%_Bran 1\n",
+ "1 100%_Natural_Bran 2\n",
+ "2 All-Bran 1\n",
+ "3 All-Bran_with_Extra_Fiber 0\n",
+ "4 Almond_Delight 2\n",
+ "5 Apple_Cinnamon_Cheerios 2\n",
+ "6 Apple_Jacks 2\n",
+ "7 Basic_4 0\n",
+ "8 Bran_Chex 2\n",
+ "9 Bran_Flakes 0\n",
+ "10 Cap'n'Crunch 2\n",
+ "11 Cheerios 2\n",
+ "12 Cinnamon_Toast_Crunch 2\n",
+ "13 Clusters 0\n",
+ "14 Cocoa_Puffs 2\n",
+ "15 Corn_Chex 2\n",
+ "16 Corn_Flakes 2\n",
+ "17 Corn_Pops 2\n",
+ "18 Count_Chocula 2\n",
+ "19 Cracklin'_Oat_Bran 0\n",
+ "20 Cream_of_Wheat_(Quick) 2\n",
+ "21 Crispix 2\n",
+ "22 Crispy_Wheat_&_Raisins 2\n",
+ "23 Double_Chex 2\n",
+ "24 Froot_Loops 2\n",
+ "25 Frosted_Flakes 2\n",
+ "26 Frosted_Mini-Wheats 2\n",
+ "27 Fruit_&_Fibre_Dates,_Walnuts,_and_Oats 0\n",
+ "28 Fruitful_Bran 0\n",
+ "29 Fruity_Pebbles 2\n",
+ ".. ... ...\n",
+ "47 Multi-Grain_Cheerios 2\n",
+ "48 Nut&Honey_Crunch 2\n",
+ "49 Nutri-Grain_Almond-Raisin 0\n",
+ "50 Nutri-grain_Wheat 2\n",
+ "51 Oatmeal_Raisin_Crisp 0\n",
+ "52 Post_Nat._Raisin_Bran 0\n",
+ "53 Product_19 2\n",
+ "54 Puffed_Rice 2\n",
+ "55 Puffed_Wheat 2\n",
+ "56 Quaker_Oat_Squares 0\n",
+ "57 Quaker_Oatmeal 2\n",
+ "58 Raisin_Bran 0\n",
+ "59 Raisin_Nut_Bran 0\n",
+ "60 Raisin_Squares 0\n",
+ "61 Rice_Chex 2\n",
+ "62 Rice_Krispies 2\n",
+ "63 Shredded_Wheat 2\n",
+ "64 Shredded_Wheat_'n'Bran 2\n",
+ "65 Shredded_Wheat_spoon_size 2\n",
+ "66 Smacks 2\n",
+ "67 Special_K 2\n",
+ "68 Strawberry_Fruit_Wheats 2\n",
+ "69 Total_Corn_Flakes 2\n",
+ "70 Total_Raisin_Bran 0\n",
+ "71 Total_Whole_Grain 2\n",
+ "72 Triples 2\n",
+ "73 Trix 2\n",
+ "74 Wheat_Chex 2\n",
+ "75 Wheaties 2\n",
+ "76 Wheaties_Honey_Gold 2\n",
+ "\n",
+ "[77 rows x 2 columns]"
+ ]
+ },
+ "execution_count": 163,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "results"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Looking at the first few results, these choices make sense because, on the whole, 100% natural bran appears to have less nutritional content than those surrounding it (especially when looking at the vitamin content)."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Looking at the last 4, the normalized dataframe has very similar data for the ones which have been classified together. Generally, the ones that are positive are clustered together, as are the negatives."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "It seems like cereals with higher carbs and sodium are being clustered together. Cluster 2 seems to be the more sugary cereals. Cluster 0 looks like really dense, fibrous cereals. Cluster 1 seems to contain the few outliers (grape nuts and such)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 139,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAeAAAAF6CAYAAADBKYuwAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xu4XFWd5vH3PYRLQgAhPCMK4eal28Co2Io2osSgJI0K\nZlpGWx0nOvM47QV8NN7AsZNMO0qr0VbUme5WO15oHRsFxVZCujHeAK9BhAQBGzAgIgQFclESzm/+\n2KtInUrVqapTu2pV7fp+nqeenNq1L2vXOam31tprre2IEAAAGKyJ3AUAAGAcEcAAAGRAAAMAkAEB\nDABABgQwAAAZEMAAAGRAAGOo2H657bUl7Gel7c+WUaZhY/tI2w/Y9jTrTNo+todjrLf932a6fT/Y\nXmh7c+5yAGUhgDFQtm+1fWqr1yPiwohYXMKhOh7gbnuN7b8u4ZgDOU5E/DIiDog0iL/XsGzxZSXU\nxXs4StLf4KLc5QAIYAxayw9223sNuCxVkTUobc/KefwZCEktWw+m46Tk8mBMEcDIxvYy29+z/UHb\n90hamZZ9J71u2x+yfZft+2xfa/u4Fvs6xva3bN9v+3JJhza8/s+277T9u7TegrT8NZJeJultqVn3\nK2n5O2zfnPZ3ve0X1e3rsWkfv7N9t+0v1L32x7bX2d5i+wbbZ013nIYyrrL9kfTz3ra32X5fej7b\n9u9tP8L20amJeS/b/1vSsyR9NO33I3W7fJ7tG23/1vZHW7xvSySdK+klafsNdS8fbfu76T1Ya3te\n2qZ2/Ffbvk3Sv6blr7a90fa9ti+zfWS796VFmQ6x/Y+270j7urjFelOa2etbGGwfavtr6dy32P52\n+nv6rKQjJV2azvctaf1n2L4yrX+N7VPq9rve9rttf0/SNknHpr/TX6T35t9tv6zV+QAtRQQPHgN7\nSLpF0qL08zJJOyW9XsWXwf3Ssu+k1xdL+pGkA9PzP5J0WIv9XiXpA5L2VhFI90v6TN3ryyTtn17/\nkKQNda/9o6T/1bC/F9eOJek/S9oq6ZHp+eclnZt+3kfSSenn/SVtlvRf0/k8WdLdkp7Q6jgNx3yO\npGvTzydJulnS1en5olqZJR0taVLSRHr+TUmvbtjXpKSvSjpQ0nxJv5G0uMVxV9S/V2nZ+nT8x6bf\nyzclvbfh+GskzU6vnynppvQ7mpD0Tknf6+R9aVKef0nv8UGSZkl6Vlq+UNLmhnM8ttnvUdJ7Jf0f\nSXulxzOb/Q2m54dLukfSkvT8uen5vLr34lZJT0jlP0jSfZIel15/pKQFuf9v8Ri9BzVg5PariPhY\nRExGxO8bXtsp6QBJT7A9ERE/j4hfN+4g1bSeKuldEbEzIr4j6VLVNTNGxJqI2BYROyWtkvQk2wfU\n76Z+nxFxUe1YEfFFFeFyYnr5QRW1w8Mj4sGIuDItf4GkWyLi0+l8rpH0ZUm12p4bj9PgakmPs32I\nii8Rn5R0uO39JZ0i6VvTbNtsv+dHxP0RsVlFgD55mm0btw9Jn4qIm9Pv5YtNtl8ZETvS63+pIqB/\nHhGTKgLwyel30+592V0Q+1GSlkj6y4i4LyJ2pd9ntx6U9ChJR0fEQxHxvWnWfYWkr0fEZZIUEf+q\n4ovf8+veizURsSmd2y4V4f8fbc+OiLsiYuMMyogxRwAjt5a9WiPiCkkflfQxSXfZ/ruG0Kx5tKTf\nRsSOumW31X5ITbXnpybl+1TUgKSGZup6tl9pe0NqkvytpOPr1n+bisD6ge3rbL8qLT9K0tNr26Tt\nXqaihiS1uVabyv8jFWH7bBWBe6WkZ9Y9b7l5k2X1X1a2S5o73fHbbL+jyfb1v7ujJH247ry3pOWH\nq/37Um++pHsj4r4uy1pT+yLxfhU1+MtTU/Hbp9nmKElnNZTvmZIOq1vn4XONiG2SXqLiS8evUlP3\nH82wvBhjBDByaxdKF0TEUyUtkPR4SW9tstqdkg62Padu2VF1+36ZpDMknRoRB0k6Ji2vfVhPKYPt\noyT9vYqm8UMi4mBJ19XWTzWe10TE4ZL+h6SP236MpF9K+lZEHFz3OCAiXt/JuSbfknSqpBMk/TA9\nX6Ki9v3tFtv02glrcobb1R/3l5Je03Du+0fEVWr/vtTbLOkQ2wd1cPztkup/54+qlSkitkbEWyLi\nMSp+92+2/Zwm5a6V/bNNyve+FueqiLg8Ik5TEdI3SPqHDsoLTEEAY2jZfqrtp9veW8WH7e8lPdS4\nXkTcpqLmuCp1XjpZRbNnzVxJf5B0b2rOfU/DLu6SVD9mdn8VH7j3SJpINdzj68p1lu0j0tPfpXUf\nkvQ1SY+3/YpUjr1tP832H7c4TjPfkvRKSden5vL1kv67pH+PiC0ttrlL0mPa7He6pu+7VDSpN67T\nTW/f/yvpPO/u3HZQXUerdu/LwyLiTknfUPGl5hFp3We3OOY1kl6eWjiWqGglUDr+C1x0lrOK/gAP\nafcXjcb363OSXmj7tLSv/VyMOT682Xth+z/YPjP9Le1U0TFrj79LoB0CGDk1G5JUv+xAFTXRe1V0\ngrlHRdNiMy+T9PS07l9J+nTda59R0SR9h4qa7FUNx/2kpAWp+fHL6Xre6rTer1WE73fr1n+qpKtt\nPyDpK5LOiYhbI2KrpNMkvTQd604V10L3aXacFudxlYpOTbXa7iYVzb+Ntd/68n9Y0otTj+G/bbHf\n6cb1/nP6d4vtH7U4RuP2jTXCSyT9jaQvpGb+n6noRKcO3pdG/0VFsN2gIizPaXHcN0p6oaRak3Z9\nb+nHSlon6QEVzfgfi4haE/57Jf3P9Ht4c0TcrqIT2XkqOqv9UtJyTf0CUn/cCUlvSueyRcX1+te2\nOBegJUf01npl+xGSPiHpOBV/pK+OiKtLKBsAAJVVxgD6D6voQfhiFwPy9y9hnwAAVFpPNeDUUWJD\nRMx4zlkAAMZRr9eAj5F0t4tZa35i+x8aeqICAIAmeg3gWZKeIunjEfEUFb0B39FzqQAAqLherwHf\nLun2iPhhen6RGgLYdiXvqAIAwHQiYtqhfD0FcET82vZm24+PiBtVzKF6fbeFqDLbKyNiZe5y5ML5\nj+/5j/O5S5w/59++8llGL+izJV1oex9Jv5D0qjbrAwAw9noO4Ij4qaSnlVAWAADGBjNh9d/63AXI\nbH3uAmS2PncBMlqfuwCZrc9dgMzW5y7AsOt5Jqy2B7BjnK8BAwDGTyfZRw0YAIAMCGAAADIggAEA\nyIAABgAgAwIYAIAMCGAAADIggAEAyIAABgAgAwIYAIAMCGAAADIggAEAyIAABgAgAwIYAIAMCGAA\nADIggAEAyIAABgAgAwIYAIAMCGAAADIggAEAyGBW7gIA6N3mRcsWS1qenq6ef8WatTnLA6A9R0R/\nD2BHRLivBwHGWArfiyXNTot2SFpKCAP5dJJ9NEEDo2+5doev0s/LW6wLYEgQwAAAZEAAA6NvtYpm\n55odaRmAIcY1YKAC6IQFDJdOso8ABgCgZHTCAgBgSBHAAABkQAADAJABAQwAQAYEMAAAGRDAAABk\nQAADAJABAQwAQAYEMAAAGRDAAABkQAADAJABAQwAQAYEMAAAGRDAAABkQAADAJABAQwAQAYEMAAA\nGRDAAABkQAADAJABAQwAQAYEMAAAGRDAAABkQAADAJABAQwAQAYEMAAAGRDAAABkQAADAJABAQwA\nQAYEMAAAGRDAAABkUEoA297L9gbbl5axPwAAqq6sGvAbJW2UFCXtDwCASus5gG0fIel0SZ+Q5J5L\nBADAGCijBvwhSW+VNFnCvgAAGAs9BbDtF0j6TURsELVfAAA6NqvH7U+SdIbt0yXtJ+lA25+JiFfW\nr2R7Zd3T9RGxvsfjAgAwNGwvlLSwq20iyuk3ZfsUSW+JiBc2LI+IoHYMABgbnWRf2eOA6QUNAEAH\nSqsBtzwANWAAwJjJUQMGAAAdIIABAMiAAAYAIAMCGACADAhgAAAyIIABAMiAAAYAIAMCGACADAhg\nAAAyIIABAMiAAAYAIAMCGACADAhgAAAymJW7AFWx8pIjFktanp6uXvmi29fmLA8AYLhxO8ISpPC9\nWNLstGiHpKWEMACMp06yjxpwOZZrd/gq/bxc0sADmJo4AIwGrgFXSF1N/HnpcXFaBgAYMgRwOVar\naHau2ZGWDVqrmjgAYMgQwCVIzbxLJa1LD67/AgCmxTXgkqTAzR26qyWdrKmdwXLUxAEAbdALumLo\nhAUA+XWSfQQwAAAl6yT7uAYMAEAGBDAAABkQwAAAZEAAAwCQAQEMAEAGBDAAABkQwAAAZEAAAwCQ\nAQEMAEAGBDAAABkQwAAAZEAAAwCQAQEMAEAGBDAAABkQwAAAZEAAAwCQAQEMAEAGBDAAABkQwAAA\nZEAAAwCQAQEMAEAGBDAAABkQwAAAZEAAAwCQAQEMAEAGBDAAABkQwAAAZEAAAwCQAQEMAEAGBDAA\nABkQwAAAZEAAAwCQAQEMAEAGBDAAABkQwAAAZEAAAwCQQc8BbHu+7W/avt72dbbPKaNgAABUmSOi\ntx3Yh0k6LCKusT1X0o8lvSgiNqXXIyLce1EBABgNnWRfzzXgiPh1RFyTft4qaZOkR/e6XwAAqqzU\na8C2j5Z0gqTvl7lfAACqprQATs3PF0l6Y6oJAwCAFmaVsRPbe0v6kqTPRcQlTV5fWfd0fUSsL+O4\nAAAMA9sLJS3sapsSOmFZ0qclbYmINzV5nU5YAICx0kn2lRHAJ0v6tqRrJdV2dm5EXNZpIQAAqJKB\nBHAZhQAAoEoGMgwJAAB0jwAGACADAhgAgAwIYAAAMiCAAQDIgAAGACADAhgAgAwIYAAAMiCAAQDI\ngAAGACCDUu6GBAAYDnMmFiyWtDw9Xb19cuPanOVBa8wFDQAVkcL3Ykmz06IdkpYSwoPHXNAAMF6W\na3f4Kv28vMW6yIwmaADAyBrlJndqwABQHatVNDvX7EjLRtqciQWL50wsuDw9FtcvV9Hk/rz0uLj+\n9WHHNWAAqJBRrhE2M9117TkTCy5XEbz11m2f3HjaIMvYTCfZRxM0AFRICtyRDt0Gra5rj/w50gQN\nABhVI93kTg0YwNjYvGjZlObZ+VesGflaVDsVaJJeLelkTW2CXi0Vtf05EwuWakTPj2vAAMZCCt89\nriVWOYSrMi54FL9EcA0YAHar7LXEaVTinCt4XVsS14ABAMiCAAYwLka6w84MjeM5jwyuAQMYG3TC\nGo3rp1XQSfYRwACAysn9xYMABgC0lDuk+mUYen9zNyQAQFP9mEe51ZzNGYzEXaEIYAAYT6WG1Kjf\nGCEHAhgAUIZhqnWORO9vAhgAxlNjSP1B0rwhaD7uWbrWu1TSuvQYytm/6IQ1RLzKsyQdI2luWrRV\n0i2xInblKxWAqmjsdJX+XS5pnqTjJO2blnXdaWkYOj4NE3pBjwiv8sGSTpK0WNL+DS9vUzEF25Wx\nIn476LIBqIZB3Fe3qr2qZ4Je0CPAq3y8pPMl/ScV/yE2Nzx2pNfOT+sCwEz0/RptCtyHa9aj3pTd\nbwRwRilQl0u6X7vDtlEtlO+XtJwQBtAHpXRaoid0dwjgTFKz89mS7lHRzNzOtrTu2WlbAOhGy5At\nsdPSMPWEHnrcjjCfk1S8/52Eb802SYdIeoakb/SjUACqqdXN67lumw+dsDLwKu+t3d9GmzU7T2d2\neryZ3tEAelF2z2V6Qu9GJ6zhdbSK3s7dhq/SNvurGK4EAL0otcl4VMbfDguaoPOY236VthqHKwFA\ndilwCd0OUAMGgPE1ElM2VhUBnMfWEvbRTectANgDTcZ50QSdxy0qAnS2ZtYJa2vaBwD0pNsm4371\nmh7H3tj0gs7Eq/x8FTNcbe5y0/mSLooVwTAkAC31I9D61cu5ir2n6QU93K6UtEvddaaam7a5ui8l\nAlAJZc9INWdiweI0X/SF6s9EG2M5gQcBnEm6scIFkg5VZyE8V8UdSy7gpgwA2igt0BrCfF7vRUMN\nAZxRrIjrVPQ4PFBF0/LsJqvNTq8dIOkDaRsAY6pWGx3gfXsbw7xeWb2mx7I3NteAh0Ca2/lPJS3R\n1NqwJT0g6TJJV1PzBaql2+u0nV4rLfOaaotbFW6R9JNOytzFcSrVCYv7AY8Yr/IsFTNc1UJ4m6Rb\nmHISqJ6ZhGQ39+0tK9Cq2EFqEDrJPoYhDZEUtDflLgeA5kqupbW6TltKsPU6I1XDub5b0sL0c9fn\nXbXabVkIYADoQJOa4MlzJhYMuia4WtLJmlobnfG10lbB2OxcNfMm7GF434YSnbAAoDNlD5XpuuNR\nmTNXtRmqVOa5juUQo05QAwaADFrdn7eT7VROM3Vfm8DRHgEMAJ1p2/zb7bXOIb5zUOO5TkpaX9K+\nxmKIUSfoBQ2gsjYvWjYlEOdfsaaMKRObBuww9hauG1lRuwXqVqWRFe3KO2diwXmS/lq7L1VOSnrX\n9smN7+m2HL10whrVDlwMQwIwMsoOy7S/PQKm1/220s0Qobpt+hIuaW6BkyQt1p4z7W1TUeu+cvaq\nJ5zY6vgtzmdS0umDCsFh/FLTKeaCBjAS6sLy4Q5BaVkvhrrzT9nzNdd4lY+XdL6Km73sUHHDl/rH\njvTa+TtWbLpj++TG09Kjk1Cb0GDfw6H+HfaKAAbQN5sXLVu8edGyy9NjunCpwgdt017N00wdWfo5\np/BdLul+7Q7bRrVQvl/S8rRNM6tV1HjRJwQwgL7oU622GwOdX7jZEKH0Uje13Bnf7CA1O58t6R4V\nzcztbEvrnp22nSKdz7s0NYQH3YGq0nNE9xzAtpfYvsH2TbbfXkahAFRCNzW80j9o07XeKYHYr+u/\nNdsnN65taNKd7j1YLekPDbs4rodm6JNUjGzpJHxrtqVtntHsxdTh6nSVMO54Jsoc9zyMeuqEZXsv\nST+X9FxJd0j6oaS/iIhNdevQCQsYQ5sXLWvaKWn+FWuadkoquxPWMGjXMWvOxIIfS3pKq9c75VXe\nW7u/xDRrdp7O7PR4M/POl2cQc0GfKOnmiLg1HfALks6UtGm6jQCMha7Gf6bAHfnQbdDuPdhS0nGO\nVtHb+d4ZbLtDxX3JjxFz0Q9UrwF8uIqL+TW3S3p6j/sEUAHzr1izdvOiZVNmejrylE//m1Z9+nFq\nMi41SyH7rIPZrsqapGJu+1XaahyuhD7rNYD7O4gYwEir1WrrxqV+UE3GpXqV10q6sor3vJ5utquZ\nTkeJaug1gO+QNL/u+XwVteApbK+se7o+Itb3eFwAIyINczlbxefNPdqzmXS2inGpZ3iVL4gVcd2A\ni5hVSdNRbi2hKN103kID2wu1+5aNnW3TYyesWSo6YZ0q6VeSfiA6YQFI6saldjI0Zn8V1yJXj1sI\n9ypNOflBzbwT1n6Sllf1UkAOfZ8JKyJ2SXqDim9vGyX9v/rwBTC+uh2XesL9hx521p3HPufP7j7y\nG1874wV/3vcCVkgKzrUqvsB061BJlxG+g9fz3ZAi4huSvlFCWQBUS8fjUk+4/9DHPOu3j37pRHjW\nHdqmu/fZceHmRcu2ljEUaSbDm0Z0SNSVks5Q0ZLQaXPyXEm7JF3dr0KhNWbCAlC6NC51sYrab1tP\nfGDeSRPhWZI0b+e++vYhv9p3px96S6/lmMlsXP2cwWuaaSl7ljqwXaCiRttJj+a5KmbeuqDTzm/t\nyt/P86siAhhAPxytIgS6vR6p/SZnafvELt04577Z7dduq6v5llPQXtjNNp3q180X6qVr56slHaii\nU2yz93B2eu0ASR/o9Hp7u/IP4vyqhgAG0A9djUu99oAtV0569zXISceu7x78qwvLL1ZrdTXfGc/H\n3MZAbjiRAvUdkr6k3WFbexyposPVRZLeESvi+i523a78VbihxkD1fA0YAHq14cB7fiHpC098YN5J\nkvTjA39z00WH/eKqd/e+624mumgMkJpsNwCY6f2CU5Py173Kl6uY4arWJL1NFZ74ZNQQwAD6oetx\nqRsOvOcXKYilorbW87jUZrNxddmhaoukl3fbCasxONO/81TcWajW8jhtsKd9fEXSvmnRs+dMLDiz\nm4k6YkXsmjOx4FhNDfGZTjfZ7stMWbN6jY2exgF3dADGAQMjqZeewKM4LrWuCbo+QLq+g1LdtdDa\nfmp3PNq3brWbJb1hujBtcaOGn2yf3PgnPZRlh9rcUajFl4fa8/XaPdnEHjXymdbYq6iT7COAAeyh\njDDyKj9fxQxXm9ut22C+pItiRQx8eGMZw49aBGejSUmntwnCe7Tn9egt2yc3djzWt93dmJqs3+7L\nQ9sAR2EQd0MCUE2tOtR088E7cuNSe70jUwqwJ3ew6oTav5+3ac8Avm2GRetU4+9934bXZ/J3gBbo\nBQ2gLwYxLnUILdeen6uT6dGt87S7Bqr083ld7qN2j+AarssOEQIYQDOlfHD3c1zqCLlG0rs0NYTb\nvp+pmfdMSevSo6sOWHX7WFq3j3bNx42/953dlhud4xowgKbKnI4xzQv9p5KWaGpt2JIekHSZpKtH\nuOYrafpOT6PSQamunPMkHaepzdBrtk9ufFWWgo0YOmEBGCqpd3Slx6WOStC206ID16Sk09PP75F0\nlIrr0ueN6nn2CwEMAJiRFgEsST/RnjXjP2gGTeRV1vfbEQIA+mMQNzZoc4zVat557Cjt2Tt6XzHt\nZNcIYAAYMoO4sUG7Y6TabLPOY/0eCjU2CGAAGD6DuLFB22Nsn9z4HhXXfB/uRa09h0cpPad3dJeY\niAMA0FKqCTdOOXmm6ITVMwIYAHrQp17Pg7ixwYyP0SyU0T16QQMYG2WObZZmdrODLvfddbB3s11V\nhkwNI4YhAUBS1t2O6nV7s4N+6+cXAnSHYUgAsNsgOjaVoochSCNzjiCAAaAXpd/sYBBDkDAcCGAA\n46L0sJzBzQ460UstlrsfjRCuAQOYouyOSv3a5yiXYzq9XlemY9VwoBMWMEbKCJd+dFTqxz6rjI5U\n1UAnLGBM1IXcw9cN07Ju9aMTDx2D2qjvdJUWld2sjSHERBxANbQKOT64h1yTGu/JKkI3y1AmDA41\nYAD1+tGJh45B06OFYEwRwEA1lBJy6brslObPXq/V9mOfQBXQCQuoiFHo4Ys90emqmugFDQAjgPmb\nq4cABoAKobY8OhiGBADVQoetCmEYEoCsuHaNcUUNGEA2JU4gklUPdy/qVmNv9z9ImjeA46IPuAYM\nYIpB1kg3L1rWdN7j+VesGZlJKAZ9XbauE9Y8ScdJ2ncQx0V3Osm+sWiCXnnJEVM+UFa+6Hb+QIEm\nmszbfPLmRcsYtzu9gc5ClgJ2bZq2ct+6l5j9bMRUvgk6he+UJq60DMCeBt3Jh1myMLYqH8Ci1yAw\ntCoyS1auLxF8eRlxY9EEDaBjq1XcDKD+emZfP9RT4I5a6D5s++TGtXMmFizVgCfHyHVclKfynbDq\nmqCndJDgOjDQHMOCgN4xE1ZCJywAwCARwAAGjho0QAADGLAmw5h2aDQ7VgE9YS5oAIPGqAOgQ/SC\nBoBMuLXgeCOAAZRp4MOYRs00U0mePGdiAVNJjhGuAQMoFZ2wWmsyb3SjddsnN47MPNhojU5YADBE\n0vzNjTefqEcAVwSdsABgdNBcP2YIYAAYnGb38/2J0jzYXP8dLzRBA8AA0fN5PHANGACADLgGDADA\nkCKAAQDIgAAGACADAhgAgAwIYAAAMugpgG2/3/Ym2z+1/WXbB5VVMAAAqqzXGvDlko6LiCdJulHS\nub0XCQCA6uvpbkgRsa7u6fcl/XlvxQHQDW58AIyu0ibisH2ppM9HxD81LGciDqAPUvjW31lnh6Sl\nhDCQXyfZ17YGbHudpMOavHReRFya1nmnpAcbwxdAXy3X1NvazU7LCGBgBLQN4IiY7tZZsr1M0umS\nTp1mnZV1T9dHxPrOigcAwPCzvVDSwm626ekasO0lkt4q6ZSI+H2r9SJiZS/HAdDUakkna2oTNLez\nAzJIFcv1tee2V7TbpqdrwLZvkrSPpHvToqsi4nUN63ANGOgTOmEBw4m7IQEAkAF3QwIAYEgRwAAA\nZEAAAwCQAQEMAEAGBDAAABkQwAAAZEAAAwCQAQEMAEAGBDAAABkQwAAAZEAAAwCQAQEMAEAGBDAA\nABkQwAAAZEAAAwCQwazcBcBoW3nJEVNuCL/yRbdzQ3gA6IAjor8H6OCmxBhNKXwvljQ7LdohaSkh\nDGDcdZJ9NEGjF8u1O3yVfl7eYl0AQB0CGACADAhg9GK1imbnmh1pGQCgDa4Boyd0wgKAPXWSfQQw\nAAAloxMWAABDigAGACADAhgAgAwIYAAAMiCAAQDIgAAGACADAhgAgAwIYAAAMiCAAQDIgAAGACAD\nAhgAgAwIYAAAMiCAAQDIgAAGACADAhgAgAwIYAAAMiCAAQDIgAAGACADAhgAgAwIYAAAMiCAAQDI\ngAAGACADAhgAgAwIYAAAMiCAAQDIgAAGACADAhgAgAwIYAAAMiCAAQDIgAAGACADAhgAgAwIYAAA\nMiCAAQDIgAAGACADAhgAgAwIYAAAMug5gG0vtz1p+5AyCgQAwDjoKYBtz5f0PEm3lVOc6rG9MHcZ\ncuL8x/f8x/ncJc5/3M+/E73WgD8o6W1lFKTCFuYuQGYLcxcgs4W5C5DRwtwFyGxh7gJktjB3AYbd\njAPY9pmSbo+Ia0ssDwAAY2HWdC/aXifpsCYvvVPSuZJOq1+9xHIBAFBpjojuN7KPl/RvkranRUdI\nukPSiRHxm4Z1uz8AAAAjLiKmrZjOKID32Il9i6Q/iYh7e94ZAABjoKxxwNRyAQDoQik1YAAA0J2+\nzYRl+yzb19t+yPZTGl471/ZNtm+wfVqrfVSF7RNt/8D2Bts/tP203GUaJNtn295k+zrbf5O7PDmM\n64Q1tt+ffvc/tf1l2wflLtMg2F6SPt9usv323OUZJNvzbX8zff5fZ/uc3GUaNNt7pc/7S6dbr59T\nUf5M0lJJ324o2AJJL5G0QNISSR+3XfUpMd8n6V0RcYKkv0rPx4Lt50g6Q9ITI+J4SR/IXKSBG/MJ\nay6XdFxEPEnSjSpGT1Sa7b0kfVTF59sCSX9h+wl5SzVQOyW9KSKOk/QMSa8fs/OXpDdK2qg2l2f7\nFnwRcUMiCuswAAAC4UlEQVRE3NjkpTMlfT4idkbErZJulnRiv8oxJO6UVPvm/wgVPcbHxWslvTci\ndkpSRNyduTw5jO2ENRGxLiIm09PvqxgxUXUnSro5Im5Nf/dfUPG5NxYi4tcRcU36eaukTZIenbdU\ng2P7CEmnS/qE2gzPzVHzfLSk2+ue3y7p8AzlGKR3SFpt+5eS3q8xqAXUeZykZ9u+2vZ620/NXaBB\nYsKaKV4t6eu5CzEAh0vaXPd8HD7jmrJ9tKQTVHz5GhcfkvRWSZPtVpx2Io52ppmo47yImLbtu8HI\n9wRrM2nJOZLOiYiLbZ8l6VMqmiQroc25z5J0cEQ8I137/qKkYwdZvn4b9wlrOvkcsP1OSQ9GxD8N\ntHB5jPznWRlsz5V0kaQ3pppw5dl+gaTfRMSGTubC7imAI2ImIXKHpPl1z2uTeIy06d4L25+LiOem\npxepaJqojDbn/lpJX07r/TB1RJoXEVsGVsA+a3X+acKaYyT91LZU/K3/2PYeE9aMsnafA7aXqWiS\nO3UgBcqv8TNuvqa2+lWe7b0lfUnS5yLiktzlGaCTJJ1h+3RJ+0k60PZnIuKVzVYeVBN0/bf+r0p6\nqe19bB+joonyBwMqRy432z4l/bxIRWeUcXGJinOW7cdL2qdK4TudiLguIh4ZEcdExDEqPoSfUqXw\nbcf2EhXNcWdGxO9zl2dAfiTpcbaPtr2Pik6nX81cpoFx8W3zk5I2RsTf5i7PIEXEeRExP/1/f6mk\nK1qFr9RjDXg6tpdK+oikQyX9i+0NEfFnEbHR9hdV9BDbJel1Uf3ByK+R9DHb+0rakZ6Pi09J+pTt\nn0l6UFLLP8YxUPW/82YukLSPpHWpFeCqiHhd3iL1V0Tssv0GSWsl7SXpkxGxKXOxBumZkl4h6Vrb\nG9KycyPisoxlymXa//NMxAEAQAZVH38LAMBQIoABAMiAAAYAIAMCGACADAhgAAAyIIABAMiAAAYA\nIAMCGACADP4/3v8IdqjGURcAAAAASUVORK5CYII=\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "from sklearn.decomposition import PCA\n",
+ "pca = PCA(n_components=2).fit(num_data)\n",
+ "pca_2d = pca.transform(num_data)\n",
+ "\n",
+ "cluster_2d = pca.transform(kmeans.cluster_centers_)\n",
+ "colors = [\"#E2415F\", \"#7FAF1B\", \"#090129\", \"#843023\", \"#037892\"]\n",
+ "markers = [\"v\", \"D\", \"s\"]\n",
+ "\n",
+ "plt.figure(figsize=(8, 6))\n",
+ "\n",
+ "for i in range(pca_2d.shape[0]):\n",
+ " c = clusters[i]\n",
+ " color = colors[c]\n",
+ " plt.scatter(pca_2d[i,0], pca_2d[i, 1], color=color)\n",
+ " \n",
+ "for i in range(cluster_2d.shape[0]):\n",
+ " plt.scatter(cluster_2d[i, 0], cluster_2d[i, 1], color='g', alpha='0.5', s=250, marker='o')\n",
+ "\n",
+ "plt.title(\"Iris dataset with three clusters\")\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Testing 2 cereals, one really fibrous, protein-filled healthy cereal and one super sugary cereal."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 171,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "cluster = kmeans.predict(test)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 172,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([0, 2], dtype=int32)"
+ ]
+ },
+ "execution_count": 172,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "cluster"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The healthy cereal got 0 for the dense healthy cereals and the sugary one got 2 for the less healthy, less substantial cereals."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "##MeanShift"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 209,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "MeanShift(bandwidth=2.1, bin_seeding=False, cluster_all=True, min_bin_freq=1,\n",
+ " seeds=None)"
+ ]
+ },
+ "execution_count": 209,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "ms = MeanShift(2.1)\n",
+ "ms.fit(num_data)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 210,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([ 8, 9, 5, 10, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,\n",
+ " 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 6,\n",
+ " 4, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0,\n",
+ " 1, 1, 2, 0, 0, 1, 9, 1, 1, 7, 0, 0, 0, 0, 0, 0, 0,\n",
+ " 0, 2, 2, 2, 0, 0, 0, 0, 0])"
+ ]
+ },
+ "execution_count": 210,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "ms.predict(num_data)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 211,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([1, 0])"
+ ]
+ },
+ "execution_count": 211,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "ms.predict(test)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "2.1 is the lowest bandwidth size at which the meanshift predictor distinguishes between the really sugary cereal and the fibrous, hearty cereal."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.4.3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}