diff --git a/Cereal_classifier.ipynb b/Cereal_classifier.ipynb new file mode 100644 index 0000000..99ac766 --- /dev/null +++ b/Cereal_classifier.ipynb @@ -0,0 +1,1939 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 173, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "from sklearn import preprocessing\n", + "from sklearn.cluster import KMeans\n", + "from sklearn.cluster import MeanShift\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "cereals = pd.read_csv('cereals.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 120, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
namemfrtypecaloriesproteinfatsodiumfibercarbosugarspotassvitaminsshelfweightcups
72TriplesGC1102125002136025310.75
73TrixGC11011140013122525211.00
74Wheat_ChexRC10031230317311525110.67
75WheatiesGC10031200317311025111.00
76Wheaties_Honey_GoldGC1102120011686025110.75
\n", + "
" + ], + "text/plain": [ + " name mfr type calories protein fat sodium fiber \\\n", + "72 Triples G C 110 2 1 250 0 \n", + "73 Trix G C 110 1 1 140 0 \n", + "74 Wheat_Chex R C 100 3 1 230 3 \n", + "75 Wheaties G C 100 3 1 200 3 \n", + "76 Wheaties_Honey_Gold G C 110 2 1 200 1 \n", + "\n", + " carbo sugars potass vitamins shelf weight cups \n", + "72 21 3 60 25 3 1 0.75 \n", + "73 13 12 25 25 2 1 1.00 \n", + "74 17 3 115 25 1 1 0.67 \n", + "75 17 3 110 25 1 1 1.00 \n", + "76 16 8 60 25 1 1 0.75 " + ] + }, + "execution_count": 120, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cereals.tail()" + ] + }, + { + "cell_type": "code", + "execution_count": 168, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "test_cereals = np.array([[170, 11, 1.5, 85, 9, 36, 8, 270, 26, 1],\n", + " [103, 1.5, .6, 38, 1.4, 24, 15, 49, 20, .75]])" + ] + }, + { + "cell_type": "code", + "execution_count": 169, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "test_df = pd.DataFrame(test_cereals, columns=['calories', 'protein', 'fat', 'sodium', 'fiber', 'carbo', 'sugars', 'potass', 'vitamins', 'cups'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##Processing Data" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "cereals.replace('-1', cereals.mean(), inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "num_columns = cereals[['calories', 'protein', 'fat', 'sodium', 'fiber', 'carbo', 'sugars', 'potass', 'vitamins', 'shelf', 'weight', 'cups']]\n", + "norm_df = num_columns.apply(lambda row: row * (1/row['cups']), axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 170, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "norm_test = test_df.apply(lambda row: row * 1/row['cups'], axis=1)\n", + "scaled_test = preprocessing.scale(norm_test)\n", + "test = pd.DataFrame(scaled_test, columns=['calories', 'protein', 'fat', 'sodium', 'fiber', 'carbo', 'sugars', 'potass', 'vitamins', 'cups'])" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "nums_scaled = preprocessing.scale(norm_df)\n", + "scaled_df = pd.DataFrame(nums_scaled, columns=['calories', 'protein', 'fat', 'sodium', 'fiber', 'carbo', 'sugars', 'potass', 'vitamins', 'shelf', 'weight', 'cups'])" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
caloriesproteinfatsodiumfibercarbosugarspotassvitaminsshelfweightcups
01.1473413.4238080.9707201.3238494.729379-0.5279901.5168024.1975131.4599182.7627392.6755580
1-0.392323-0.2487482.175351-1.452691-0.282811-1.423470-0.232761-0.101023-1.394093-0.070810-0.6660520
21.1473413.4238080.9707204.2102954.1927420.2308910.9960994.9277811.4599182.7627392.6755580
3-0.7265921.764447-0.8825580.4889994.321535-0.421747-1.6074173.0619480.4895551.3248190.9798160
40.053369-0.3829610.7483270.391304-0.400871-0.0878390.225458-0.142570-0.1383280.394400-0.1174300
\n", + "
" + ], + "text/plain": [ + " calories protein fat sodium fiber carbo sugars \\\n", + "0 1.147341 3.423808 0.970720 1.323849 4.729379 -0.527990 1.516802 \n", + "1 -0.392323 -0.248748 2.175351 -1.452691 -0.282811 -1.423470 -0.232761 \n", + "2 1.147341 3.423808 0.970720 4.210295 4.192742 0.230891 0.996099 \n", + "3 -0.726592 1.764447 -0.882558 0.488999 4.321535 -0.421747 -1.607417 \n", + "4 0.053369 -0.382961 0.748327 0.391304 -0.400871 -0.087839 0.225458 \n", + "\n", + " potass vitamins shelf weight cups \n", + "0 4.197513 1.459918 2.762739 2.675558 0 \n", + "1 -0.101023 -1.394093 -0.070810 -0.666052 0 \n", + "2 4.927781 1.459918 2.762739 2.675558 0 \n", + "3 3.061948 0.489555 1.324819 0.979816 0 \n", + "4 -0.142570 -0.138328 0.394400 -0.117430 0 " + ] + }, + "execution_count": 85, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "scaled_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "other_cols = cereals[['name', 'mfr', 'type']]" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "new_df = pd.merge(other_cols, scaled_df, left_index=True, right_index=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 122, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
namemfrtypecaloriesproteinfatsodiumfibercarbosugarspotassvitaminsshelfweightcups
57Quaker_OatmealQH0.0966081.5481040.943059-1.5625980.0766560.3028960.1678590.074773-1.394093-0.7720960.1445990
58Raisin_BranKC0.2762150.153891-0.0671160.4889990.543610-0.0878391.1418961.013548-0.138328-0.2258800.6067520
59Raisin_Nut_BranGC0.9447540.9591691.5637690.4889990.2484600.2043301.1418960.7725590.4895551.3248190.9798160
60Raisin_SquaresKC0.6104850.153891-0.882558-1.5625980.0713701.3312690.4545680.4110770.4895551.3248190.9798160
61Rice_ChexRC-0.770964-1.100347-0.882558-0.006392-0.6369910.123439-1.303290-0.754410-0.560621-1.054748-0.8554000
62Rice_KrispiesKC-0.559457-0.651387-0.8825580.562271-0.6369910.329546-1.091921-0.703494-0.452269-1.001229-0.6660520
63Shredded_WheatNC-1.060861-0.651387-0.882558-1.562598-0.105721-0.421747-1.607417-0.342011-1.394093-1.001229-0.9458500
64Shredded_Wheat_'n'BranNC-0.1528460.346196-0.882558-1.5625980.4202641.125692-1.6074170.344536-1.394093-0.7720960.1445990
65Shredded_Wheat_spoon_sizeNC-0.1528460.346196-0.882558-1.5625980.1559501.312580-1.6074170.164694-1.394093-0.7720960.1445990
66SmacksKC0.053369-0.382961-0.067116-0.878732-0.400871-0.9226091.829224-0.593041-0.138328-0.225880-0.1174300
67Special_KKC-0.5594570.959169-0.8825580.122643-0.459901-0.421747-1.091921-0.582999-0.452269-1.001229-0.6660520
68Strawberry_Fruit_WheatsNC-0.893727-0.651387-0.882558-1.452691-0.105721-0.546962-0.748257-0.372135-0.452269-0.536019-0.6660520
69Total_Corn_FlakesGC-0.559457-0.651387-0.270976-0.097171-0.6369910.204330-1.091921-0.7034942.373202-0.070810-0.6660520
70Total_Raisin_BranGC-0.058054-0.248748-0.270976-0.1704420.071370-0.5469620.7982320.4713242.373202-0.0708100.1568820
71Total_Whole_GrainGC-0.726592-0.248748-0.270976-0.097171-0.105721-0.421747-1.091921-0.2516412.373202-0.070810-0.6660520
72TriplesGC0.053369-0.382961-0.0671160.879780-0.6369911.080838-0.920089-0.432382-0.1383280.394400-0.1174300
73TrixGC-0.559457-1.054026-0.270976-0.536799-0.636991-0.7973930.454568-0.763741-0.452269-0.536019-0.6660520
74Wheat_ChexRC0.0966080.3461960.0302510.9526870.1559500.751914-0.8380200.1197330.011614-0.7720960.1445990
75WheatiesGC-0.726592-0.248748-0.270976-0.097171-0.105721-0.296531-1.091921-0.251641-0.452269-1.001229-0.6660520
76Wheaties_Honey_GoldGC0.053369-0.382961-0.0671160.391304-0.4008710.2460690.225458-0.432382-0.138328-0.846159-0.1174300
\n", + "
" + ], + "text/plain": [ + " name mfr type calories protein fat \\\n", + "57 Quaker_Oatmeal Q H 0.096608 1.548104 0.943059 \n", + "58 Raisin_Bran K C 0.276215 0.153891 -0.067116 \n", + "59 Raisin_Nut_Bran G C 0.944754 0.959169 1.563769 \n", + "60 Raisin_Squares K C 0.610485 0.153891 -0.882558 \n", + "61 Rice_Chex R C -0.770964 -1.100347 -0.882558 \n", + "62 Rice_Krispies K C -0.559457 -0.651387 -0.882558 \n", + "63 Shredded_Wheat N C -1.060861 -0.651387 -0.882558 \n", + "64 Shredded_Wheat_'n'Bran N C -0.152846 0.346196 -0.882558 \n", + "65 Shredded_Wheat_spoon_size N C -0.152846 0.346196 -0.882558 \n", + "66 Smacks K C 0.053369 -0.382961 -0.067116 \n", + "67 Special_K K C -0.559457 0.959169 -0.882558 \n", + "68 Strawberry_Fruit_Wheats N C -0.893727 -0.651387 -0.882558 \n", + "69 Total_Corn_Flakes G C -0.559457 -0.651387 -0.270976 \n", + "70 Total_Raisin_Bran G C -0.058054 -0.248748 -0.270976 \n", + "71 Total_Whole_Grain G C -0.726592 -0.248748 -0.270976 \n", + "72 Triples G C 0.053369 -0.382961 -0.067116 \n", + "73 Trix G C -0.559457 -1.054026 -0.270976 \n", + "74 Wheat_Chex R C 0.096608 0.346196 0.030251 \n", + "75 Wheaties G C -0.726592 -0.248748 -0.270976 \n", + "76 Wheaties_Honey_Gold G C 0.053369 -0.382961 -0.067116 \n", + "\n", + " sodium fiber carbo sugars potass vitamins shelf \\\n", + "57 -1.562598 0.076656 0.302896 0.167859 0.074773 -1.394093 -0.772096 \n", + "58 0.488999 0.543610 -0.087839 1.141896 1.013548 -0.138328 -0.225880 \n", + "59 0.488999 0.248460 0.204330 1.141896 0.772559 0.489555 1.324819 \n", + "60 -1.562598 0.071370 1.331269 0.454568 0.411077 0.489555 1.324819 \n", + "61 -0.006392 -0.636991 0.123439 -1.303290 -0.754410 -0.560621 -1.054748 \n", + "62 0.562271 -0.636991 0.329546 -1.091921 -0.703494 -0.452269 -1.001229 \n", + "63 -1.562598 -0.105721 -0.421747 -1.607417 -0.342011 -1.394093 -1.001229 \n", + "64 -1.562598 0.420264 1.125692 -1.607417 0.344536 -1.394093 -0.772096 \n", + "65 -1.562598 0.155950 1.312580 -1.607417 0.164694 -1.394093 -0.772096 \n", + "66 -0.878732 -0.400871 -0.922609 1.829224 -0.593041 -0.138328 -0.225880 \n", + "67 0.122643 -0.459901 -0.421747 -1.091921 -0.582999 -0.452269 -1.001229 \n", + "68 -1.452691 -0.105721 -0.546962 -0.748257 -0.372135 -0.452269 -0.536019 \n", + "69 -0.097171 -0.636991 0.204330 -1.091921 -0.703494 2.373202 -0.070810 \n", + "70 -0.170442 0.071370 -0.546962 0.798232 0.471324 2.373202 -0.070810 \n", + "71 -0.097171 -0.105721 -0.421747 -1.091921 -0.251641 2.373202 -0.070810 \n", + "72 0.879780 -0.636991 1.080838 -0.920089 -0.432382 -0.138328 0.394400 \n", + "73 -0.536799 -0.636991 -0.797393 0.454568 -0.763741 -0.452269 -0.536019 \n", + "74 0.952687 0.155950 0.751914 -0.838020 0.119733 0.011614 -0.772096 \n", + "75 -0.097171 -0.105721 -0.296531 -1.091921 -0.251641 -0.452269 -1.001229 \n", + "76 0.391304 -0.400871 0.246069 0.225458 -0.432382 -0.138328 -0.846159 \n", + "\n", + " weight cups \n", + "57 0.144599 0 \n", + "58 0.606752 0 \n", + "59 0.979816 0 \n", + "60 0.979816 0 \n", + "61 -0.855400 0 \n", + "62 -0.666052 0 \n", + "63 -0.945850 0 \n", + "64 0.144599 0 \n", + "65 0.144599 0 \n", + "66 -0.117430 0 \n", + "67 -0.666052 0 \n", + "68 -0.666052 0 \n", + "69 -0.666052 0 \n", + "70 0.156882 0 \n", + "71 -0.666052 0 \n", + "72 -0.117430 0 \n", + "73 -0.666052 0 \n", + "74 0.144599 0 \n", + "75 -0.666052 0 \n", + "76 -0.117430 0 " + ] + }, + "execution_count": 122, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_df.tail(20)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "###Strongest Correlation" + ] + }, + { + "cell_type": "code", + "execution_count": 90, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
caloriesproteinfatsodiumfibercarbosugarspotassvitaminsshelfweightcups
calories1.0000000.6951730.5711560.5576440.3975440.7799350.5507190.4895550.4606740.7986450.917061NaN
protein0.6951731.0000000.4080030.4999660.7929640.5438500.2162260.8300690.3772640.7833560.839599NaN
fat0.5711560.4080031.0000000.2072030.2095710.1404660.4365920.3194170.1800920.4508720.456609NaN
sodium0.5576440.4999660.2072031.0000000.4736450.4468430.3134130.5269070.4912550.5438230.628566NaN
fiber0.3975440.7929640.2095710.4736451.0000000.2172330.1754160.9611500.3174000.6705410.690713NaN
carbo0.7799350.5438500.1404660.4468430.2172331.0000000.0184650.2534420.3701450.5953330.705771NaN
sugars0.5507190.2162260.4365920.3134130.1754160.0184651.0000000.2970570.2498210.3824000.512461NaN
potass0.4895550.8300690.3194170.5269070.9611500.2534420.2970571.0000000.3386060.7279460.759611NaN
vitamins0.4606740.3772640.1800920.4912550.3174000.3701450.2498210.3386061.0000000.5374100.488464NaN
shelf0.7986450.7833560.4508720.5438230.6705410.5953330.3824000.7279460.5374101.0000000.874568NaN
weight0.9170610.8395990.4566090.6285660.6907130.7057710.5124610.7596110.4884640.8745681.000000NaN
cupsNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", + "
" + ], + "text/plain": [ + " calories protein fat sodium fiber carbo \\\n", + "calories 1.000000 0.695173 0.571156 0.557644 0.397544 0.779935 \n", + "protein 0.695173 1.000000 0.408003 0.499966 0.792964 0.543850 \n", + "fat 0.571156 0.408003 1.000000 0.207203 0.209571 0.140466 \n", + "sodium 0.557644 0.499966 0.207203 1.000000 0.473645 0.446843 \n", + "fiber 0.397544 0.792964 0.209571 0.473645 1.000000 0.217233 \n", + "carbo 0.779935 0.543850 0.140466 0.446843 0.217233 1.000000 \n", + "sugars 0.550719 0.216226 0.436592 0.313413 0.175416 0.018465 \n", + "potass 0.489555 0.830069 0.319417 0.526907 0.961150 0.253442 \n", + "vitamins 0.460674 0.377264 0.180092 0.491255 0.317400 0.370145 \n", + "shelf 0.798645 0.783356 0.450872 0.543823 0.670541 0.595333 \n", + "weight 0.917061 0.839599 0.456609 0.628566 0.690713 0.705771 \n", + "cups NaN NaN NaN NaN NaN NaN \n", + "\n", + " sugars potass vitamins shelf weight cups \n", + "calories 0.550719 0.489555 0.460674 0.798645 0.917061 NaN \n", + "protein 0.216226 0.830069 0.377264 0.783356 0.839599 NaN \n", + "fat 0.436592 0.319417 0.180092 0.450872 0.456609 NaN \n", + "sodium 0.313413 0.526907 0.491255 0.543823 0.628566 NaN \n", + "fiber 0.175416 0.961150 0.317400 0.670541 0.690713 NaN \n", + "carbo 0.018465 0.253442 0.370145 0.595333 0.705771 NaN \n", + "sugars 1.000000 0.297057 0.249821 0.382400 0.512461 NaN \n", + "potass 0.297057 1.000000 0.338606 0.727946 0.759611 NaN \n", + "vitamins 0.249821 0.338606 1.000000 0.537410 0.488464 NaN \n", + "shelf 0.382400 0.727946 0.537410 1.000000 0.874568 NaN \n", + "weight 0.512461 0.759611 0.488464 0.874568 1.000000 NaN \n", + "cups NaN NaN NaN NaN NaN NaN " + ] + }, + "execution_count": 90, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_df.corr()" + ] + }, + { + "cell_type": "code", + "execution_count": 91, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "pot_and_fib = new_df[['potass', 'fiber']]" + ] + }, + { + "cell_type": "code", + "execution_count": 92, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 92, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAEPCAYAAACqZsSmAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAHEFJREFUeJzt3X+cXXV95/HXm/xiAh0gY1tUWAdRVuWBhiAsu/gjtoRg\nK7pZla6P2hLaB9tq6SAJ7FrEh7pV+mhtAk1d14pIYMu2urisoLJJiuBC7QP5FWBJWGXruKBFNEMY\ngTBJyGf/OGecO8nNzLkz9873fu95Px+P+8j9nnvm3vdM8vjkzOd8z/coIjAzs3o5JHUAMzObey7+\nZmY15OJvZlZDLv5mZjXk4m9mVkMu/mZmNZS0+Es6UtKNkrZL2ibp9JR5zMzqYn7iz/8L4BsR8R5J\n84HDEucxM6sFpbrIS9IRwAMR8cokAczMaixl2+c44CeSrpV0v6SrJS1OmMfMrDZSFv/5wDLgsxGx\nDHgO+HDCPGZmtZGy5/8E8ERE3FOOb2S/4i/JCw+Zmc1ARGiq15MV/4h4UtLjkk6IiO8CZwKPNNlv\nym+gm0n6eER8PHWOmco5f87ZwflT64H80x44p57t84fADZIWAv8XOD9xnnYbTB1glgZTB5iFwdQB\nZmkwdYBZGkwdYJYGUwfotKTFPyIeBE5NmcHMrI58hW9nbUwdYJY2pg4wCxtTB5iljakDzNLG1AFm\naWPqAJ2WbJ5/FZIi556/mVkKVWqnj/w7SNLy1BlmI+f8OWcH508t9/xVuPibmdWQ2z5mZj3GbR8z\nM2vKxb+Dcu8b5pw/5+zg/Knlnr8KF38zsxpyz9/MrMe4529mZk25+HdQ7n3DnPPnnB2cP7Xc81fh\n4m9mVkPu+ZuZ9Rj3/M3MrCkX/w7KvW+Yc/6cs4Pzp5Z7/ipc/M3Masg9fzOzHuOev5mZNeXi30G5\n9w1zzp9zdnD+1HLPX0XSe/hKGgZGgReBPRFxWso8ZmZ1kbTnL+n7wCkRMXKQ193zNzNrUS49fxd3\nM7M5lrr4B/B3ku6VdEHiLG2Xe98w5/w5ZwfnTy33/FWkLv5nRMTJwNuBP5D05sR5zKzDJK2UBjYX\nD61Mnaeuumaev6SPAc9GxLqGbQFcBwyXm3YCWyPijvL15QAee+xxNuNTof8TsKEPtgN/OQbPvysi\nNnVJvizH5fPVFIaBj03X809W/CUtBuZFxM8kHQZsBj4REZsb9vEJX7MeIg1shvUr4Lxyy3XAmi0R\nO85KmavXdPsJ318G7pS0Fbgb+Fpj4e8FufcNc86fc3Zw/tRyz19Fsnn+EfF9YGmqzzezFEbWwdCb\ngL5iPLQLRtdN+SXWEV3T82/GbR+z3lOc5F2ythiNrIuITWkT9Z4qtdPF38ysx3R7z7/n5d43zDl/\nztnB+VPLPX8VLv5mZjXkto+ZWY9x28fMzJpy8e+g3PuGOefPOTs4f2q556/Cxd/MrIbc8zcz6zHu\n+ZuZWVMu/h2Ue98w5/w5ZwfnTy33/FW4+JuZ1ZB7/mZmPcY9fzMza8rFv4Ny7xvmnD/n7OD8qeWe\nvwoXfzPrSfK9gqfknr+Z9Zyi2PffVNwrGMqbxqyqy70DqtTOZHfyMjPrnCVrYX3fxL2C6YM1a4Fa\nFP8q3PbpoNz7hjnnzzk7OH9queevIvmRv6R5wL3AExFxTuo8ZtYLfK/g6STv+UtaA5wC/EJEvHO/\n19zzN7MZqfO9grv+Hr6SjgE2Ap8C1ux/5O/ib2bWuhwu8roSuBTYlzhHR+TeN8w5f87ZwflTyz1/\nFcl6/pLeATwVEQ9M9YOWtBEYLoc7ga0RcUf52nKAbh0DSyV1TZ665ffY47qMy+erKQxTQbK2j6Qr\ngN8C9gKHAv3AVyLitxv2cdvHzKxFXd/z/3kI6a3AJe75m5nNXg49/0bp/xdqs9z7hjnnzzk7OH9q\nueevIvk8f4CI+BbwrdQ5zMzqoivaPgfjto+ZWetya/uYmdkccfHvoNz7hjnnzzk7OH9queevwsXf\nzKyG3PM3M+sx7vmbmVlTLv4dlHvfMOf8OWcH508t9/xVuPibmdWQe/5mZj3GPX8zM2vKxb+Dcu8b\n5pw/5+zg/Knlnr8KF38zsxpyz9/MrMe4529mZk25+HdQ7n3DnPPnnB2cP7Xc81fh4m9mVkPu+ZuZ\n9Rj3/M3MrCkX/w7KvW+Yc/6cs4Pzp5Z7/iqSFX9Jh0q6W9JWSdsk/UmqLGaWnqSV0sDm4qGVqfP0\nuqQ9f0mLI+J5SfOBu4BLIuKuhtfd8zergaLY998EG/qKLUO7YHRVRGxKmyxPVWrn/LkK00xEPF8+\nXQjMA0YSxjGzZJashfV9cN74hj5YsxZw8e+QpD1/SYdI2gr8GLg9IralzNNuufcNc86fc3Zw/tRy\nz19F6iP/fcBSSUcAmyQtj4g7GveRtBEYLoc7ga3j+4z/BXXruPzeuiZP3fJ7nNN4ZB188C2wfRG8\nlrLts6WxJnRX3u4al89XUximgq6Z5y/po8CuiPjzhm3u+ZvVRNH3X7K2GI2sc79/5qrUzmTFX9JL\ngL0RsVNSH0Vv7xMRcVvDPi7+ZmYt6vaLvF4KfLPs+d8N3NJY+HtB7n3DnPPnnB2cP7Xc81eRrOcf\nEQ8Dy1J9vplZnXVNz78Zt33MzFrX7W0fMzNLxMW/g3LvG+acP+fs4Pyp5Z6/Chd/M7Macs/fzKzH\nuOdvZmZNufh3UO59w5zz55wdnD+13PNX4eJvZlZDLfX8JS0BjomIhzoXadLnuedvZtaitvT8JX1L\nUn9Z+O8DviDpynaFNDOzuVel7XNERIwC/wa4PiJOA87sbKzekHvfMOf8OWcH508t9/xVVCn+8yS9\nFDgX+Hq5rXvnh5qZ2bSm7flLei/wUeDvI+IDko4H/iwi3t3xcO75m5m1rKvX86/Cxd/MrHXtOuH7\nZ+UJ3wWSbpP0U0m/1b6YvSv3vmHO+XPODs6fWu75q6jS819ZnvB9B8W9IY8HLu1kKDMz66wqPf9H\nIuJESdcAN0bErZIejIg3dDyc2z5mZi2rUjur3MnrFkmPAi8AH5D0S+VzMzPL1LRtn4j4MHAGcEpE\n7AaeA97V6WC9IPe+Yc75c84Ozp9a7vmrqHoP35cBvyqpj4k5/tfP5oMlHVu+xy+V7/n5iNgwm/c0\nM7NqqvT8Pw68FTiR4iKvtwN3RcR7ZvXB0tHA0RGxVdLhFEtH/OuI2N6wj3v+ZmYtatd6/u+hWM7h\nnyLifOANwJGzDRcRT0bE1vL5s8B2it8wzMysw6oU/10R8SKwV9IRwFPAse0MIWkQOBm4u53vm1ru\nfcOc8+ecHZw/tdzzV1Gl53+vpKOAq4F7KU74frtdAcqWz43AReVvAPu/vpHi+gKAncDWiLijfG05\nQLeOgaWSuiZP3fJ77HFdxuXz1RSGqaDV9fyPA/oj4sHKXzT1+y0AvgbcGhFXNXndPX8zsxa1pecv\n6bbx5xHx/Yh4sHHbLMIJuAbY1qzwm5lZ5xy0+EvqkzQA/KKkJQ2PQeDlbfjsM4D3A2+T9ED5OLsN\n79s1cu8b5pw/5+zg/Knlnr+KqXr+vwdcRDED576G7T8DPjPbD46Iu/A9hM3Mkqgyz38o1cVX7vmb\nmbWuSu2sUvwXAh8A3kJxJe63gM9FxJ52BZ3is138zcxa1K6LvP4zsAz4T+XzU8o/bRq59w1zzp9z\ndnD+1HLPX0WVef6nRsTrG8a3SXqoU4HMzKzzqrR97gfOjYjHyvHxwH+LiGUdD+e2j5lZy6rUzipH\n/pcC35T0j4CAQeD82cczM7NUqvT8vw18HtgH7AD+ijYu79DLcu8b5pw/5+zg/Knlnr+KKsX/euA4\n4I8p5ve/EvgvnQxlZmadVaXnvy0iXjfdtk5wz3/uSFoJS9YWo5F1EbEpbSIzm6l29fzvl/QvI+If\nyjc9nclX/FrmisLffxOs7yu2DL1J0ir/B2DWu6q0fd4I/L2kH0gapuj3v1HSw57yObV8+oZL1sKG\nPjiP4rGhD5aszSf/gXLODs6fWu75q6hy5N9Ti62ZmVmL6/nPNff858ZE22fDeNtnF4y67WOWqbas\n7ZOSi//cmasTvj6xbNZ5Lv6JSVrecEvE7LQ7/1z+huGffVrOn1a7ZvuYtcmStcWMovPGN/TBmrWA\nj/7N5phvptJBOR85QN75c84Ozp9a7vmr8JG/zaGRdTD0JqCx7bMuaSSzmvKRfwflPle43fmL3v7o\nKlizpXh0bkaRf/ZpOX/3S3rkL+mLwK8DT0XESSmz2Nwoi717/GaJJZ3tI+nNwLPA9c2Kf+6zfTqt\nlWmTnmJpVh9ZTPWUNAjc4uLfmlamTfoiLrN6adc9fG2GOts3bL4ez+z3nZBz3zPn7OD8qeWev4qu\nn+0jaSMwXA53AlvHp2GN/wV16xhYKqmD778duANYXn7c7qPKfRYVxX33UfDsl2FJ+fp4rEKT978U\nDj8XFj5dzMzhtZ3M77HHHrdnXD5fTWGYCtz2ydTBWjnF8wO2fxL6L5+q7ePWkFnv8BW+PSwiNkla\nVV4hC4yuK7YNbG5yFe1yGDlg38nv6Ktvzeokac9f0t9Q3B/gBEmPS+qpG8NX7RtKWikNbC4eWln1\n/SNiU8SOs4pHYzF/GHh3+Xh4mn2nsvuoqlm6Te49W+dPK/f8VSQ98o+I96X8/G6gtt9Fa+SHcDWw\noRwPAaM/rPB1Ta6+ffbLM8tgZt0uec9/KnXo+ZdtmhUT7ZbrgDVbInacNcP3+ymsH9jv/XZE7HjJ\n9F/rawHMeoF7/tYSX31rVh+e599B1fqGI+uKFst1FI+hXeU0yxkaWV+0en7+fuW21uXc98w5Ozh/\narnnr8JH/okdbNbOLN7vCknAmjXl+62PiCvaENXMeoh7/mZmPcbLO8yBVqdpznRap5lZO7n4z0LD\nNM0VxaP/psaCvn/fcLr9u03Ofc+cs4Pzp5Z7/irc85+V6a+KnTx98rCBiQXWmu9vZjYXXPw7a9F+\nF3DtG7/iNgcNC9RlJ+fs4Pyp5Z6/Chf/WZnunrQH/GZwCHxoH5x0SPP9zczmhnv+sxDT3pP2hdcd\n+FX7nmz1HrapThLn3PfMOTs4f2q556/CR/6zNPVVsfsG4JKG8SXAviNbWbqh/Wv/mJl5nn9HSUt2\nw+8sgO+XW44DvrgnYmRh9fdo79o/Ztb7PM8/ued/UKyw+c7ycTXws594nr+Zpebi31Fjn4exPXA5\nxWPXXlg80No8/5F1MDTWsFbP2OzW/qku575nztnB+VPLPX8V7vl31j0wdg483zDP/3eXwc3lyxf0\nwbUV5vnvBT7X8NzMbHbc859D0uH3Qd8y+PNyyyXArvsjnj3l4F/jnr+ZtcY9/zkg6TJp4KfFQ5dN\nvfcCiiJ+c/k4r9xmZja3XPxnoSj2/Z8q7py1fgD6P9X4H4Ck5Y1z9GF3f3HkPn4C+DpgzzSf0u71\n/qvLue+Zc3Zw/tRyz19F0p6/pLOBq4B5wBci4k9T5qlqYr2eo5bDlTRcwUu5jv74+vmnQv8nJubo\nX7SvaPk07v+hKT+r3ev9m5lBwuIvaR7wGeBM4IfAPZJujojtqTJVMfmiq8un2XvJisnLO3yuyW9a\nh+yY7jNT3V4x5/VNcs4Ozp9a7vmrSHnkfxrwWEQMA0j6W+BdQFcX/8nr9TxBcZvEcUPA6BS3TDyD\nYnE3vLaPmSWVsvi/HHi8YfwE8C8SZZmhjwC3AhcHsBdGb5h8y8SRLZMXfrt6F4x+EtYsL8bd3cKR\ntDzXI6Ccs4Pzp5Z7/ipSFv9Kc0wlbQSGy+FOYOv4X8r4SZkOji+Fw8+FhU+XJ1nHgC0w9BZgEXwD\neAD4rIAF8MH3SdoWEZ8u8+6B0Y/BmhXFcHQLcE/EjivGP6/xH9kcfD8tjYGlkromj8cee9x8XD5f\nTWGYCpLN85d0OvDxiDi7HP8RsK/xpO9czvOffNOV8dk0/TcVN1+BskWzqni+6Bb4xQXwApNP4E7M\nwd///br5CN/MekuV2pnyyP9e4NWSBoEfAb8BvC9FkGYrZ8KL24srcPe/GndsAPoWwCeZuOp2+vfz\nSpxm1k2SFf+I2CvpQopZLPOAa9LN9Gl2O8YLX1UcyTdejTs2AIteMXG0fzTw/ob3GT+BO/5+rwCW\nl++X3+0ac+575pwdnD+13PNXkXSef0TcSnHGtAstUPM5+ft+AAwU45XAP6fhhO+Xinn5A2vnOKyZ\nWUu8tg/jbZrFX4XXLyq2PDQGCx+Bq5bt388vzgf0fxU2LIJbgC3AhnKfIWD0I8B9zc4XuO1jZnOh\nSu2sdfGfOCk7NgDzToIN5UI7Q2Mw+h+h//JmBXzi62I5XLlg8n8QF+8B3QEjd8CS5cV2n/A1s7lT\n64XdNM19bxtOyq6Ak5YVhf88iseGRUXhHv1SUcwv3jPe0oHiittiVU2NHvjJhy0o1+q/HEa2ROw4\nK9fCn/P6JjlnB+dPLff8VfTkev7VZts0nuS9ucm77D4D+hcXa/cADK2W9L39LuIahqGBifEQcDzl\nbwJ9cOG5wKcxM+syPdn2KVbQPH/F5HvnXjtpDfzJ6+RvAs4FXle+ug34ZxQzfCb1/HdE7HjJxHss\n2Q2/sgC2lluWAncCP8br7ptZKjVu+4wNHLh08tjA5H0al0reUm77/fKxCDii4medAzxWPs4B9jHX\nSy+bmbWqJ9s+xQ1Spl46OSYvlbwMNgxM3v9aiiP/cc0WbXv6BhhaPXmf538Eax4pF2wbm+13klLO\nc51zzg7On1ru+avo0eLfbJnk6ZdOnmwMeH4PfOgFOGQ3jK6f3O+HiDhfEnDxbxZbRm+IiPPHX6/D\nSSMzy1OP9vx1L/Sfst/8+/si4o0N+zTM7d9JscLEz/ffBy9uhecuy3WmjpnVV23n+UtHBfwuk0/4\nXkPE05rY5/D7YNEyGO/kXAjM3wuHPAMjBxzlm5nlosYnfAWcBHylfJxUbmu04NVF4R+f2/8Z4DXz\ny3vxXt7s2oCWU2Te9sk5f87ZwflTyz1/FT3a8x97ES6ZNzG+pNzWSAsP/LqXMTFHP7+F2MzMqurR\n4q8xeGbxxGydZ4CF+8282TNv8myeS4C/bmuK3GcL5Jw/5+zg/Knlnr+KHi3+e+cV0z1fVY4fKrft\nbzfFmvzPAs8DT1LO0QdG75iDoGZmSfRoz3/+QljMxEVbi8ttjfbOg73l88OBF4GrKJZ6uICJRdlm\nLve+Yc75c84Ozp9a7vmr6NEj/4VN1uK/eL8zvn3Ar1AszfAz4B3AjeVr13U+oplZQj1a/F+ssO25\nEdgyUMztfxi4momiP35HrtnJvW+Yc/6cs4Pzp5Z7/ip6dJ7/omfg0P7JF3m9MBoxdsTEPgOb4W0r\nJhZlOxrYvgO43+vvm1nOajzP//C7YQXwx+VjRbmt0cg62LILPkrxeHgXjPxmO9ffz71vmHP+nLOD\n86eWe/4qkhR/Se+V9IikFyUta/8n7F/YtxywwmZR4EdXFbdmXLPFt1k0szpJ0vaR9BqKtY//Clgb\nEfcfZL8Z38Zx4laL4DaOmdVJldqZ5IRvRDwKUKyI2bHP2ISv0DUza6pHe/7dIfe+Yc75c84Ozp9a\n7vmr6NiRv6QtFFNo9ndZRNzSwvtsBIbL4U5g6/g0rPG/oG4dA0sldU2euuX32OO6jMvnqykMU0HS\nqZ6SbqdDPX8zs7rKZaqni7uZ2RxLNdVzlaTHgdOBr0u6NUWOTsu9b5hz/pyzg/Onlnv+KlLN9rkJ\nuCnFZ5uZWY8u72BmVme59PzNzGyOufh3UO59w5zz55wdnD+13PNX4eJvZlZD7vmbmfUY9/zNzKwp\nF/8Oyr1vmHP+nLOD86eWe/4qXPzNzGrIPX8zsx7jnr+ZmTXl4t9BufcNc86fc3Zw/tRyz1+Fi7+Z\nWQ25529m1mPc8zczs6Zc/Dso975hzvlzzg7On1ru+atw8TczqyH3/M3Meox7/mZm1lSqe/h+WtJ2\nSQ9K+u+SjkiRo9Ny7xvmnD/n7OD8qeWev4pUR/6bgRMj4g3Ad4E/SpSj05amDjBLOefPOTs4f2q5\n559WkuIfEVsiYl85vBs4JkWOOXBk6gCzlHP+nLOD86eWe/5pdUPP/3eAb6QOYWZWJ/M79caStgBH\nN3npsoi4pdznI8DuiPivncqR2GDqALM0mDrALAymDjBLg6kDzNJg6gCzNJg6QKclm+opaTVwAfCr\nEfHCQfbp3nmoZmZdbLqpnh078p+KpLOBS4G3Hqzww/ThzcxsZpIc+Uv6HrAQGCk3/UNEfHDOg5iZ\n1VRXX+FrZmad0Q2zfaYk6b2SHpH0oqRlqfNUIelsSY9K+p6k/5A6TyskfVHSjyU9nDrLTEg6VtLt\n5b+Z/y1pKHWmVkg6VNLdkrZK2ibpT1JnapWkeZIekHRL6iwzIWlY0kPl9/Cd1HlaIelISTeWF9Fu\nk3T6wfbt+uIPPAysAv5X6iBVSJoHfAY4G3gd8D5Jr02bqiXXUmTP1R7g4og4ETgd+IOcfv7lObC3\nRcRS4PXA2yS9KXGsVl0EbANybSsEsDwiTo6I01KHadFfAN+IiNdS/PvZfrAdu774R8SjEfHd1Dla\ncBrwWEQMR8Qe4G+BdyXOVFlE3Ak8nTrHTEXEkxGxtXz+LMU//pelTdWaiHi+fLoQmMfEubGuJ+kY\n4NeALwA5T9jILnu5TM6bI+KLABGxNyKeOdj+XV/8M/Ry4PGG8RPlNptjkgaBkymuIs+GpEMkbQV+\nDNweEdtSZ2rBlRQz+fZNt2MXC+DvJN0r6YLUYVpwHPATSddKul/S1ZIWH2znrij+krZIerjJ45zU\n2WYg1191e4qkw4EbgYvK3wCyERH7yrbPMcBbcllkTNI7gKci4gEyPHJucEZEnAy8naJt+ObUgSqa\nDywDPhsRy4DngA9PtXNyEbEidYY2+iFwbMP4WIqjf5sjkhYAXwH+OiL+R+o8MxURz0j6OvBG4I7E\ncar4V8A7Jf0acCjQL+n6iPjtxLlaEhH/VP75E0k3UbRy70ybqpIngCci4p5yfCNTFP+uOPJvQQ5H\nE/cCr5Y0KGkh8BvAzYkz1YYkAdcA2yLiqtR5WiXpJZKOLJ/3ASuAB9KmqiYiLouIYyPiOODfAt/M\nrfBLWizpF8rnhwFnUUw66XoR8STwuKQTyk1nAo8cbP+uL/6SVkl6nGLmxtcl3Zo601QiYi9wIbCJ\nYsbDlyLioGfcu42kvwG+DZwg6XFJ56fO1KIzgPdTzJJ5oHzkNHvppcA3y57/3cAtEXFb4kwzlWML\n9JeBOxt+/l+LiM2JM7XiD4EbJD1IMdvnioPt6Iu8zMxqqOuP/M3MrP1c/M3MasjF38yshlz8zcxq\nyMXfzKyGXPzNzGrIxd+sgaShcincEUn/vty2UdK7U2cza6euWN7BrIt8gOK+0j9q2Dbji2EkzS8v\n/DPrKj7yNytJ+hzwSuB/SvqQpL9sePlMSfdI+j+Sfr3cf56kT0v6jqQHJf27cvtySXdK+ipTXF5v\nlpKP/M1KEfH7klYCy4HGFWUFvCIiTpX0KuD28s/zgJ0RcZqkRcBdksaXAjgZODEifjCH34JZZS7+\nZgcSkxcRDODLABHxmKR/BF5DsejXSZLeU+7XD7wK2At8x4XfupmLv9nMjJ8HuDAitjS+UK6//9yc\nJzJrgXv+ZtMT8F4Vjqc4L/AoxcqtH5Q0H0DSCVPdOcmsm/jI32yy2O8xvu3/Ad+haO38XkTslvQF\nYBC4v7yPwFPAqv2+1qwreUlnM7MactvHzKyGXPzNzGrIxd/MrIZc/M3MasjF38yshlz8zcxqyMXf\nzKyGXPzNzGro/wPu6BKyUfa2cAAAAABJRU5ErkJggg==\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "pot_and_fib.plot(kind='scatter', x='fiber', y='potass')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##Clustering" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "num_data = new_df[['calories', 'protein', 'fat', 'sodium', 'fiber', 'carbo', 'sugars', 'potass', 'vitamins', 'shelf']]" + ] + }, + { + "cell_type": "code", + "execution_count": 136, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "KMeans(copy_x=True, init='k-means++', max_iter=300, n_clusters=3, n_init=10,\n", + " n_jobs=1, precompute_distances='auto', random_state=None, tol=0.0001,\n", + " verbose=0)" + ] + }, + "execution_count": 136, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kmeans = KMeans(3)\n", + "kmeans.fit(num_data)" + ] + }, + { + "cell_type": "code", + "execution_count": 137, + "metadata": { + "collapsed": false, + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1, 2, 1, 0, 2, 2, 2, 0, 2, 0, 2, 2, 2, 0, 2, 2, 2, 2, 2, 0, 2, 2, 2,\n", + " 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 0, 2, 0, 2, 2, 2, 2,\n", + " 0, 2, 2, 0, 2, 0, 0, 2, 2, 2, 0, 2, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2,\n", + " 2, 0, 2, 2, 2, 2, 2, 2], dtype=int32)" + ] + }, + "execution_count": 137, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "clusters = kmeans.predict(num_data)\n", + "clusters" + ] + }, + { + "cell_type": "code", + "execution_count": 154, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "cereals = new_df['name']" + ] + }, + { + "cell_type": "code", + "execution_count": 155, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "cluster = kmeans.labels_" + ] + }, + { + "cell_type": "code", + "execution_count": 156, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "results = pd.DataFrame(dict(cereals = cereals, cluster = cluster))" + ] + }, + { + "cell_type": "code", + "execution_count": 163, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
cerealscluster
0100%_Bran1
1100%_Natural_Bran2
2All-Bran1
3All-Bran_with_Extra_Fiber0
4Almond_Delight2
5Apple_Cinnamon_Cheerios2
6Apple_Jacks2
7Basic_40
8Bran_Chex2
9Bran_Flakes0
10Cap'n'Crunch2
11Cheerios2
12Cinnamon_Toast_Crunch2
13Clusters0
14Cocoa_Puffs2
15Corn_Chex2
16Corn_Flakes2
17Corn_Pops2
18Count_Chocula2
19Cracklin'_Oat_Bran0
20Cream_of_Wheat_(Quick)2
21Crispix2
22Crispy_Wheat_&_Raisins2
23Double_Chex2
24Froot_Loops2
25Frosted_Flakes2
26Frosted_Mini-Wheats2
27Fruit_&_Fibre_Dates,_Walnuts,_and_Oats0
28Fruitful_Bran0
29Fruity_Pebbles2
.........
47Multi-Grain_Cheerios2
48Nut&Honey_Crunch2
49Nutri-Grain_Almond-Raisin0
50Nutri-grain_Wheat2
51Oatmeal_Raisin_Crisp0
52Post_Nat._Raisin_Bran0
53Product_192
54Puffed_Rice2
55Puffed_Wheat2
56Quaker_Oat_Squares0
57Quaker_Oatmeal2
58Raisin_Bran0
59Raisin_Nut_Bran0
60Raisin_Squares0
61Rice_Chex2
62Rice_Krispies2
63Shredded_Wheat2
64Shredded_Wheat_'n'Bran2
65Shredded_Wheat_spoon_size2
66Smacks2
67Special_K2
68Strawberry_Fruit_Wheats2
69Total_Corn_Flakes2
70Total_Raisin_Bran0
71Total_Whole_Grain2
72Triples2
73Trix2
74Wheat_Chex2
75Wheaties2
76Wheaties_Honey_Gold2
\n", + "

77 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " cereals cluster\n", + "0 100%_Bran 1\n", + "1 100%_Natural_Bran 2\n", + "2 All-Bran 1\n", + "3 All-Bran_with_Extra_Fiber 0\n", + "4 Almond_Delight 2\n", + "5 Apple_Cinnamon_Cheerios 2\n", + "6 Apple_Jacks 2\n", + "7 Basic_4 0\n", + "8 Bran_Chex 2\n", + "9 Bran_Flakes 0\n", + "10 Cap'n'Crunch 2\n", + "11 Cheerios 2\n", + "12 Cinnamon_Toast_Crunch 2\n", + "13 Clusters 0\n", + "14 Cocoa_Puffs 2\n", + "15 Corn_Chex 2\n", + "16 Corn_Flakes 2\n", + "17 Corn_Pops 2\n", + "18 Count_Chocula 2\n", + "19 Cracklin'_Oat_Bran 0\n", + "20 Cream_of_Wheat_(Quick) 2\n", + "21 Crispix 2\n", + "22 Crispy_Wheat_&_Raisins 2\n", + "23 Double_Chex 2\n", + "24 Froot_Loops 2\n", + "25 Frosted_Flakes 2\n", + "26 Frosted_Mini-Wheats 2\n", + "27 Fruit_&_Fibre_Dates,_Walnuts,_and_Oats 0\n", + "28 Fruitful_Bran 0\n", + "29 Fruity_Pebbles 2\n", + ".. ... ...\n", + "47 Multi-Grain_Cheerios 2\n", + "48 Nut&Honey_Crunch 2\n", + "49 Nutri-Grain_Almond-Raisin 0\n", + "50 Nutri-grain_Wheat 2\n", + "51 Oatmeal_Raisin_Crisp 0\n", + "52 Post_Nat._Raisin_Bran 0\n", + "53 Product_19 2\n", + "54 Puffed_Rice 2\n", + "55 Puffed_Wheat 2\n", + "56 Quaker_Oat_Squares 0\n", + "57 Quaker_Oatmeal 2\n", + "58 Raisin_Bran 0\n", + "59 Raisin_Nut_Bran 0\n", + "60 Raisin_Squares 0\n", + "61 Rice_Chex 2\n", + "62 Rice_Krispies 2\n", + "63 Shredded_Wheat 2\n", + "64 Shredded_Wheat_'n'Bran 2\n", + "65 Shredded_Wheat_spoon_size 2\n", + "66 Smacks 2\n", + "67 Special_K 2\n", + "68 Strawberry_Fruit_Wheats 2\n", + "69 Total_Corn_Flakes 2\n", + "70 Total_Raisin_Bran 0\n", + "71 Total_Whole_Grain 2\n", + "72 Triples 2\n", + "73 Trix 2\n", + "74 Wheat_Chex 2\n", + "75 Wheaties 2\n", + "76 Wheaties_Honey_Gold 2\n", + "\n", + "[77 rows x 2 columns]" + ] + }, + "execution_count": 163, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "results" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Looking at the first few results, these choices make sense because, on the whole, 100% natural bran appears to have less nutritional content than those surrounding it (especially when looking at the vitamin content)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Looking at the last 4, the normalized dataframe has very similar data for the ones which have been classified together. Generally, the ones that are positive are clustered together, as are the negatives." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It seems like cereals with higher carbs and sodium are being clustered together. Cluster 2 seems to be the more sugary cereals. Cluster 0 looks like really dense, fibrous cereals. Cluster 1 seems to contain the few outliers (grape nuts and such)." + ] + }, + { + "cell_type": "code", + "execution_count": 139, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAeAAAAF6CAYAAADBKYuwAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xu4XFWd5vH3PYRLQgAhPCMK4eal28Co2Io2osSgJI0K\nZlpGWx0nOvM47QV8NN7AsZNMO0qr0VbUme5WO15oHRsFxVZCujHeAK9BhAQBGzAgIgQFclESzm/+\n2KtInUrVqapTu2pV7fp+nqeenNq1L2vXOam31tprre2IEAAAGKyJ3AUAAGAcEcAAAGRAAAMAkAEB\nDABABgQwAAAZEMAAAGRAAGOo2H657bUl7Gel7c+WUaZhY/tI2w/Y9jTrTNo+todjrLf932a6fT/Y\nXmh7c+5yAGUhgDFQtm+1fWqr1yPiwohYXMKhOh7gbnuN7b8u4ZgDOU5E/DIiDog0iL/XsGzxZSXU\nxXs4StLf4KLc5QAIYAxayw9223sNuCxVkTUobc/KefwZCEktWw+m46Tk8mBMEcDIxvYy29+z/UHb\n90hamZZ9J71u2x+yfZft+2xfa/u4Fvs6xva3bN9v+3JJhza8/s+277T9u7TegrT8NZJeJultqVn3\nK2n5O2zfnPZ3ve0X1e3rsWkfv7N9t+0v1L32x7bX2d5i+wbbZ013nIYyrrL9kfTz3ra32X5fej7b\n9u9tP8L20amJeS/b/1vSsyR9NO33I3W7fJ7tG23/1vZHW7xvSySdK+klafsNdS8fbfu76T1Ya3te\n2qZ2/Ffbvk3Sv6blr7a90fa9ti+zfWS796VFmQ6x/Y+270j7urjFelOa2etbGGwfavtr6dy32P52\n+nv6rKQjJV2azvctaf1n2L4yrX+N7VPq9rve9rttf0/SNknHpr/TX6T35t9tv6zV+QAtRQQPHgN7\nSLpF0qL08zJJOyW9XsWXwf3Ssu+k1xdL+pGkA9PzP5J0WIv9XiXpA5L2VhFI90v6TN3ryyTtn17/\nkKQNda/9o6T/1bC/F9eOJek/S9oq6ZHp+eclnZt+3kfSSenn/SVtlvRf0/k8WdLdkp7Q6jgNx3yO\npGvTzydJulnS1en5olqZJR0taVLSRHr+TUmvbtjXpKSvSjpQ0nxJv5G0uMVxV9S/V2nZ+nT8x6bf\nyzclvbfh+GskzU6vnynppvQ7mpD0Tknf6+R9aVKef0nv8UGSZkl6Vlq+UNLmhnM8ttnvUdJ7Jf0f\nSXulxzOb/Q2m54dLukfSkvT8uen5vLr34lZJT0jlP0jSfZIel15/pKQFuf9v8Ri9BzVg5PariPhY\nRExGxO8bXtsp6QBJT7A9ERE/j4hfN+4g1bSeKuldEbEzIr4j6VLVNTNGxJqI2BYROyWtkvQk2wfU\n76Z+nxFxUe1YEfFFFeFyYnr5QRW1w8Mj4sGIuDItf4GkWyLi0+l8rpH0ZUm12p4bj9PgakmPs32I\nii8Rn5R0uO39JZ0i6VvTbNtsv+dHxP0RsVlFgD55mm0btw9Jn4qIm9Pv5YtNtl8ZETvS63+pIqB/\nHhGTKgLwyel30+592V0Q+1GSlkj6y4i4LyJ2pd9ntx6U9ChJR0fEQxHxvWnWfYWkr0fEZZIUEf+q\n4ovf8+veizURsSmd2y4V4f8fbc+OiLsiYuMMyogxRwAjt5a9WiPiCkkflfQxSXfZ/ruG0Kx5tKTf\nRsSOumW31X5ITbXnpybl+1TUgKSGZup6tl9pe0NqkvytpOPr1n+bisD6ge3rbL8qLT9K0tNr26Tt\nXqaihiS1uVabyv8jFWH7bBWBe6WkZ9Y9b7l5k2X1X1a2S5o73fHbbL+jyfb1v7ujJH247ry3pOWH\nq/37Um++pHsj4r4uy1pT+yLxfhU1+MtTU/Hbp9nmKElnNZTvmZIOq1vn4XONiG2SXqLiS8evUlP3\nH82wvBhjBDByaxdKF0TEUyUtkPR4SW9tstqdkg62Padu2VF1+36ZpDMknRoRB0k6Ji2vfVhPKYPt\noyT9vYqm8UMi4mBJ19XWTzWe10TE4ZL+h6SP236MpF9K+lZEHFz3OCAiXt/JuSbfknSqpBMk/TA9\nX6Ki9v3tFtv02glrcobb1R/3l5Je03Du+0fEVWr/vtTbLOkQ2wd1cPztkup/54+qlSkitkbEWyLi\nMSp+92+2/Zwm5a6V/bNNyve+FueqiLg8Ik5TEdI3SPqHDsoLTEEAY2jZfqrtp9veW8WH7e8lPdS4\nXkTcpqLmuCp1XjpZRbNnzVxJf5B0b2rOfU/DLu6SVD9mdn8VH7j3SJpINdzj68p1lu0j0tPfpXUf\nkvQ1SY+3/YpUjr1tP832H7c4TjPfkvRKSden5vL1kv67pH+PiC0ttrlL0mPa7He6pu+7VDSpN67T\nTW/f/yvpPO/u3HZQXUerdu/LwyLiTknfUPGl5hFp3We3OOY1kl6eWjiWqGglUDr+C1x0lrOK/gAP\nafcXjcb363OSXmj7tLSv/VyMOT682Xth+z/YPjP9Le1U0TFrj79LoB0CGDk1G5JUv+xAFTXRe1V0\ngrlHRdNiMy+T9PS07l9J+nTda59R0SR9h4qa7FUNx/2kpAWp+fHL6Xre6rTer1WE73fr1n+qpKtt\nPyDpK5LOiYhbI2KrpNMkvTQd604V10L3aXacFudxlYpOTbXa7iYVzb+Ntd/68n9Y0otTj+G/bbHf\n6cb1/nP6d4vtH7U4RuP2jTXCSyT9jaQvpGb+n6noRKcO3pdG/0VFsN2gIizPaXHcN0p6oaRak3Z9\nb+nHSlon6QEVzfgfi4haE/57Jf3P9Ht4c0TcrqIT2XkqOqv9UtJyTf0CUn/cCUlvSueyRcX1+te2\nOBegJUf01npl+xGSPiHpOBV/pK+OiKtLKBsAAJVVxgD6D6voQfhiFwPy9y9hnwAAVFpPNeDUUWJD\nRMx4zlkAAMZRr9eAj5F0t4tZa35i+x8aeqICAIAmeg3gWZKeIunjEfEUFb0B39FzqQAAqLherwHf\nLun2iPhhen6RGgLYdiXvqAIAwHQiYtqhfD0FcET82vZm24+PiBtVzKF6fbeFqDLbKyNiZe5y5ML5\nj+/5j/O5S5w/59++8llGL+izJV1oex9Jv5D0qjbrAwAw9noO4Ij4qaSnlVAWAADGBjNh9d/63AXI\nbH3uAmS2PncBMlqfuwCZrc9dgMzW5y7AsOt5Jqy2B7BjnK8BAwDGTyfZRw0YAIAMCGAAADIggAEA\nyIAABgAgAwIYAIAMCGAAADIggAEAyIAABgAgAwIYAIAMCGAAADIggAEAyIAABgAgAwIYAIAMCGAA\nADIggAEAyIAABgAgAwIYAIAMCGAAADIggAEAyGBW7gIA6N3mRcsWS1qenq6ef8WatTnLA6A9R0R/\nD2BHRLivBwHGWArfiyXNTot2SFpKCAP5dJJ9NEEDo2+5doev0s/LW6wLYEgQwAAAZEAAA6NvtYpm\n55odaRmAIcY1YKAC6IQFDJdOso8ABgCgZHTCAgBgSBHAAABkQAADAJABAQwAQAYEMAAAGRDAAABk\nQAADAJABAQwAQAYEMAAAGRDAAABkQAADAJABAQwAQAYEMAAAGRDAAABkQAADAJABAQwAQAYEMAAA\nGRDAAABkQAADAJABAQwAQAYEMAAAGRDAAABkQAADAJABAQwAQAYEMAAAGRDAAABkQAADAJABAQwA\nQAYEMAAAGRDAAABkUEoA297L9gbbl5axPwAAqq6sGvAbJW2UFCXtDwCASus5gG0fIel0SZ+Q5J5L\nBADAGCijBvwhSW+VNFnCvgAAGAs9BbDtF0j6TURsELVfAAA6NqvH7U+SdIbt0yXtJ+lA25+JiFfW\nr2R7Zd3T9RGxvsfjAgAwNGwvlLSwq20iyuk3ZfsUSW+JiBc2LI+IoHYMABgbnWRf2eOA6QUNAEAH\nSqsBtzwANWAAwJjJUQMGAAAdIIABAMiAAAYAIAMCGACADAhgAAAyIIABAMiAAAYAIAMCGACADAhg\nAAAyIIABAMiAAAYAIAMCGACADAhgAAAymJW7AFWx8pIjFktanp6uXvmi29fmLA8AYLhxO8ISpPC9\nWNLstGiHpKWEMACMp06yjxpwOZZrd/gq/bxc0sADmJo4AIwGrgFXSF1N/HnpcXFaBgAYMgRwOVar\naHau2ZGWDVqrmjgAYMgQwCVIzbxLJa1LD67/AgCmxTXgkqTAzR26qyWdrKmdwXLUxAEAbdALumLo\nhAUA+XWSfQQwAAAl6yT7uAYMAEAGBDAAABkQwAAAZEAAAwCQAQEMAEAGBDAAABkQwAAAZEAAAwCQ\nAQEMAEAGBDAAABkQwAAAZEAAAwCQAQEMAEAGBDAAABkQwAAAZEAAAwCQAQEMAEAGBDAAABkQwAAA\nZEAAAwCQAQEMAEAGBDAAABkQwAAAZEAAAwCQAQEMAEAGBDAAABkQwAAAZEAAAwCQAQEMAEAGBDAA\nABkQwAAAZEAAAwCQAQEMAEAGBDAAABkQwAAAZEAAAwCQQc8BbHu+7W/avt72dbbPKaNgAABUmSOi\ntx3Yh0k6LCKusT1X0o8lvSgiNqXXIyLce1EBABgNnWRfzzXgiPh1RFyTft4qaZOkR/e6XwAAqqzU\na8C2j5Z0gqTvl7lfAACqprQATs3PF0l6Y6oJAwCAFmaVsRPbe0v6kqTPRcQlTV5fWfd0fUSsL+O4\nAAAMA9sLJS3sapsSOmFZ0qclbYmINzV5nU5YAICx0kn2lRHAJ0v6tqRrJdV2dm5EXNZpIQAAqJKB\nBHAZhQAAoEoGMgwJAAB0jwAGACADAhgAgAwIYAAAMiCAAQDIgAAGACADAhgAgAwIYAAAMiCAAQDI\ngAAGACCDUu6GBAAYDnMmFiyWtDw9Xb19cuPanOVBa8wFDQAVkcL3Ykmz06IdkpYSwoPHXNAAMF6W\na3f4Kv28vMW6yIwmaADAyBrlJndqwABQHatVNDvX7EjLRtqciQWL50wsuDw9FtcvV9Hk/rz0uLj+\n9WHHNWAAqJBRrhE2M9117TkTCy5XEbz11m2f3HjaIMvYTCfZRxM0AFRICtyRDt0Gra5rj/w50gQN\nABhVI93kTg0YwNjYvGjZlObZ+VesGflaVDsVaJJeLelkTW2CXi0Vtf05EwuWakTPj2vAAMZCCt89\nriVWOYSrMi54FL9EcA0YAHar7LXEaVTinCt4XVsS14ABAMiCAAYwLka6w84MjeM5jwyuAQMYG3TC\nGo3rp1XQSfYRwACAysn9xYMABgC0lDuk+mUYen9zNyQAQFP9mEe51ZzNGYzEXaEIYAAYT6WG1Kjf\nGCEHAhgAUIZhqnWORO9vAhgAxlNjSP1B0rwhaD7uWbrWu1TSuvQYytm/6IQ1RLzKsyQdI2luWrRV\n0i2xInblKxWAqmjsdJX+XS5pnqTjJO2blnXdaWkYOj4NE3pBjwiv8sGSTpK0WNL+DS9vUzEF25Wx\nIn476LIBqIZB3Fe3qr2qZ4Je0CPAq3y8pPMl/ScV/yE2Nzx2pNfOT+sCwEz0/RptCtyHa9aj3pTd\nbwRwRilQl0u6X7vDtlEtlO+XtJwQBtAHpXRaoid0dwjgTFKz89mS7lHRzNzOtrTu2WlbAOhGy5At\nsdPSMPWEHnrcjjCfk1S8/52Eb802SYdIeoakb/SjUACqqdXN67lumw+dsDLwKu+t3d9GmzU7T2d2\neryZ3tEAelF2z2V6Qu9GJ6zhdbSK3s7dhq/SNvurGK4EAL0otcl4VMbfDguaoPOY236VthqHKwFA\ndilwCd0OUAMGgPE1ElM2VhUBnMfWEvbRTectANgDTcZ50QSdxy0qAnS2ZtYJa2vaBwD0pNsm4371\nmh7H3tj0gs7Eq/x8FTNcbe5y0/mSLooVwTAkAC31I9D61cu5ir2n6QU93K6UtEvddaaam7a5ui8l\nAlAJZc9INWdiweI0X/SF6s9EG2M5gQcBnEm6scIFkg5VZyE8V8UdSy7gpgwA2igt0BrCfF7vRUMN\nAZxRrIjrVPQ4PFBF0/LsJqvNTq8dIOkDaRsAY6pWGx3gfXsbw7xeWb2mx7I3NteAh0Ca2/lPJS3R\n1NqwJT0g6TJJV1PzBaql2+u0nV4rLfOaaotbFW6R9JNOytzFcSrVCYv7AY8Yr/IsFTNc1UJ4m6Rb\nmHISqJ6ZhGQ39+0tK9Cq2EFqEDrJPoYhDZEUtDflLgeA5kqupbW6TltKsPU6I1XDub5b0sL0c9fn\nXbXabVkIYADoQJOa4MlzJhYMuia4WtLJmlobnfG10lbB2OxcNfMm7GF434YSnbAAoDNlD5XpuuNR\nmTNXtRmqVOa5juUQo05QAwaADFrdn7eT7VROM3Vfm8DRHgEMAJ1p2/zb7bXOIb5zUOO5TkpaX9K+\nxmKIUSfoBQ2gsjYvWjYlEOdfsaaMKRObBuww9hauG1lRuwXqVqWRFe3KO2diwXmS/lq7L1VOSnrX\n9smN7+m2HL10whrVDlwMQwIwMsoOy7S/PQKm1/220s0Qobpt+hIuaW6BkyQt1p4z7W1TUeu+cvaq\nJ5zY6vgtzmdS0umDCsFh/FLTKeaCBjAS6sLy4Q5BaVkvhrrzT9nzNdd4lY+XdL6Km73sUHHDl/rH\njvTa+TtWbLpj++TG09Kjk1Cb0GDfw6H+HfaKAAbQN5sXLVu8edGyy9NjunCpwgdt017N00wdWfo5\np/BdLul+7Q7bRrVQvl/S8rRNM6tV1HjRJwQwgL7oU622GwOdX7jZEKH0Uje13Bnf7CA1O58t6R4V\nzcztbEvrnp22nSKdz7s0NYQH3YGq0nNE9xzAtpfYvsH2TbbfXkahAFRCNzW80j9o07XeKYHYr+u/\nNdsnN65taNKd7j1YLekPDbs4rodm6JNUjGzpJHxrtqVtntHsxdTh6nSVMO54Jsoc9zyMeuqEZXsv\nST+X9FxJd0j6oaS/iIhNdevQCQsYQ5sXLWvaKWn+FWuadkoquxPWMGjXMWvOxIIfS3pKq9c75VXe\nW7u/xDRrdp7O7PR4M/POl2cQc0GfKOnmiLg1HfALks6UtGm6jQCMha7Gf6bAHfnQbdDuPdhS0nGO\nVtHb+d4ZbLtDxX3JjxFz0Q9UrwF8uIqL+TW3S3p6j/sEUAHzr1izdvOiZVNmejrylE//m1Z9+nFq\nMi41SyH7rIPZrsqapGJu+1XaahyuhD7rNYD7O4gYwEir1WrrxqV+UE3GpXqV10q6sor3vJ5utquZ\nTkeJaug1gO+QNL/u+XwVteApbK+se7o+Itb3eFwAIyINczlbxefNPdqzmXS2inGpZ3iVL4gVcd2A\ni5hVSdNRbi2hKN103kID2wu1+5aNnW3TYyesWSo6YZ0q6VeSfiA6YQFI6saldjI0Zn8V1yJXj1sI\n9ypNOflBzbwT1n6Sllf1UkAOfZ8JKyJ2SXqDim9vGyX9v/rwBTC+uh2XesL9hx521p3HPufP7j7y\nG1874wV/3vcCVkgKzrUqvsB061BJlxG+g9fz3ZAi4huSvlFCWQBUS8fjUk+4/9DHPOu3j37pRHjW\nHdqmu/fZceHmRcu2ljEUaSbDm0Z0SNSVks5Q0ZLQaXPyXEm7JF3dr0KhNWbCAlC6NC51sYrab1tP\nfGDeSRPhWZI0b+e++vYhv9p3px96S6/lmMlsXP2cwWuaaSl7ljqwXaCiRttJj+a5KmbeuqDTzm/t\nyt/P86siAhhAPxytIgS6vR6p/SZnafvELt04577Z7dduq6v5llPQXtjNNp3q180X6qVr56slHaii\nU2yz93B2eu0ASR/o9Hp7u/IP4vyqhgAG0A9djUu99oAtV0569zXISceu7x78qwvLL1ZrdTXfGc/H\n3MZAbjiRAvUdkr6k3WFbexyposPVRZLeESvi+i523a78VbihxkD1fA0YAHq14cB7fiHpC098YN5J\nkvTjA39z00WH/eKqd/e+624mumgMkJpsNwCY6f2CU5Py173Kl6uY4arWJL1NFZ74ZNQQwAD6oetx\nqRsOvOcXKYilorbW87jUZrNxddmhaoukl3fbCasxONO/81TcWajW8jhtsKd9fEXSvmnRs+dMLDiz\nm4k6YkXsmjOx4FhNDfGZTjfZ7stMWbN6jY2exgF3dADGAQMjqZeewKM4LrWuCbo+QLq+g1LdtdDa\nfmp3PNq3brWbJb1hujBtcaOGn2yf3PgnPZRlh9rcUajFl4fa8/XaPdnEHjXymdbYq6iT7COAAeyh\njDDyKj9fxQxXm9ut22C+pItiRQx8eGMZw49aBGejSUmntwnCe7Tn9egt2yc3djzWt93dmJqs3+7L\nQ9sAR2EQd0MCUE2tOtR088E7cuNSe70jUwqwJ3ew6oTav5+3ac8Avm2GRetU4+9934bXZ/J3gBbo\nBQ2gLwYxLnUILdeen6uT6dGt87S7Bqr083ld7qN2j+AarssOEQIYQDOlfHD3c1zqCLlG0rs0NYTb\nvp+pmfdMSevSo6sOWHX7WFq3j3bNx42/953dlhud4xowgKbKnI4xzQv9p5KWaGpt2JIekHSZpKtH\nuOYrafpOT6PSQamunPMkHaepzdBrtk9ufFWWgo0YOmEBGCqpd3Slx6WOStC206ID16Sk09PP75F0\nlIrr0ueN6nn2CwEMAJiRFgEsST/RnjXjP2gGTeRV1vfbEQIA+mMQNzZoc4zVat557Cjt2Tt6XzHt\nZNcIYAAYMoO4sUG7Y6TabLPOY/0eCjU2CGAAGD6DuLFB22Nsn9z4HhXXfB/uRa09h0cpPad3dJeY\niAMA0FKqCTdOOXmm6ITVMwIYAHrQp17Pg7ixwYyP0SyU0T16QQMYG2WObZZmdrODLvfddbB3s11V\nhkwNI4YhAUBS1t2O6nV7s4N+6+cXAnSHYUgAsNsgOjaVoochSCNzjiCAAaAXpd/sYBBDkDAcCGAA\n46L0sJzBzQ460UstlrsfjRCuAQOYouyOSv3a5yiXYzq9XlemY9VwoBMWMEbKCJd+dFTqxz6rjI5U\n1UAnLGBM1IXcw9cN07Ju9aMTDx2D2qjvdJUWld2sjSHERBxANbQKOT64h1yTGu/JKkI3y1AmDA41\nYAD1+tGJh45B06OFYEwRwEA1lBJy6brslObPXq/V9mOfQBXQCQuoiFHo4Ys90emqmugFDQAjgPmb\nq4cABoAKobY8OhiGBADVQoetCmEYEoCsuHaNcUUNGEA2JU4gklUPdy/qVmNv9z9ImjeA46IPuAYM\nYIpB1kg3L1rWdN7j+VesGZlJKAZ9XbauE9Y8ScdJ2ncQx0V3Osm+sWiCXnnJEVM+UFa+6Hb+QIEm\nmszbfPLmRcsYtzu9gc5ClgJ2bZq2ct+6l5j9bMRUvgk6he+UJq60DMCeBt3Jh1myMLYqH8Ci1yAw\ntCoyS1auLxF8eRlxY9EEDaBjq1XcDKD+emZfP9RT4I5a6D5s++TGtXMmFizVgCfHyHVclKfynbDq\nmqCndJDgOjDQHMOCgN4xE1ZCJywAwCARwAAGjho0QAADGLAmw5h2aDQ7VgE9YS5oAIPGqAOgQ/SC\nBoBMuLXgeCOAAZRp4MOYRs00U0mePGdiAVNJjhGuAQMoFZ2wWmsyb3SjddsnN47MPNhojU5YADBE\n0vzNjTefqEcAVwSdsABgdNBcP2YIYAAYnGb38/2J0jzYXP8dLzRBA8AA0fN5PHANGACADLgGDADA\nkCKAAQDIgAAGACADAhgAgAwIYAAAMugpgG2/3/Ym2z+1/WXbB5VVMAAAqqzXGvDlko6LiCdJulHS\nub0XCQCA6uvpbkgRsa7u6fcl/XlvxQHQDW58AIyu0ibisH2ppM9HxD81LGciDqAPUvjW31lnh6Sl\nhDCQXyfZ17YGbHudpMOavHReRFya1nmnpAcbwxdAXy3X1NvazU7LCGBgBLQN4IiY7tZZsr1M0umS\nTp1mnZV1T9dHxPrOigcAwPCzvVDSwm626ekasO0lkt4q6ZSI+H2r9SJiZS/HAdDUakkna2oTNLez\nAzJIFcv1tee2V7TbpqdrwLZvkrSPpHvToqsi4nUN63ANGOgTOmEBw4m7IQEAkAF3QwIAYEgRwAAA\nZEAAAwCQAQEMAEAGBDAAABkQwAAAZEAAAwCQAQEMAEAGBDAAABkQwAAAZEAAAwCQAQEMAEAGBDAA\nABkQwAAAZEAAAwCQwazcBcBoW3nJEVNuCL/yRbdzQ3gA6IAjor8H6OCmxBhNKXwvljQ7LdohaSkh\nDGDcdZJ9NEGjF8u1O3yVfl7eYl0AQB0CGACADAhg9GK1imbnmh1pGQCgDa4Boyd0wgKAPXWSfQQw\nAAAloxMWAABDigAGACADAhgAgAwIYAAAMiCAAQDIgAAGACADAhgAgAwIYAAAMiCAAQDIgAAGACAD\nAhgAgAwIYAAAMiCAAQDIgAAGACADAhgAgAwIYAAAMiCAAQDIgAAGACADAhgAgAwIYAAAMiCAAQDI\ngAAGACADAhgAgAwIYAAAMiCAAQDIgAAGACADAhgAgAwIYAAAMiCAAQDIgAAGACADAhgAgAwIYAAA\nMiCAAQDIgAAGACADAhgAgAwIYAAAMug5gG0vtz1p+5AyCgQAwDjoKYBtz5f0PEm3lVOc6rG9MHcZ\ncuL8x/f8x/ncJc5/3M+/E73WgD8o6W1lFKTCFuYuQGYLcxcgs4W5C5DRwtwFyGxh7gJktjB3AYbd\njAPY9pmSbo+Ia0ssDwAAY2HWdC/aXifpsCYvvVPSuZJOq1+9xHIBAFBpjojuN7KPl/RvkranRUdI\nukPSiRHxm4Z1uz8AAAAjLiKmrZjOKID32Il9i6Q/iYh7e94ZAABjoKxxwNRyAQDoQik1YAAA0J2+\nzYRl+yzb19t+yPZTGl471/ZNtm+wfVqrfVSF7RNt/8D2Bts/tP203GUaJNtn295k+zrbf5O7PDmM\n64Q1tt+ffvc/tf1l2wflLtMg2F6SPt9usv323OUZJNvzbX8zff5fZ/uc3GUaNNt7pc/7S6dbr59T\nUf5M0lJJ324o2AJJL5G0QNISSR+3XfUpMd8n6V0RcYKkv0rPx4Lt50g6Q9ITI+J4SR/IXKSBG/MJ\nay6XdFxEPEnSjSpGT1Sa7b0kfVTF59sCSX9h+wl5SzVQOyW9KSKOk/QMSa8fs/OXpDdK2qg2l2f7\nFnwRcUMiCuswAAAC4UlEQVRE3NjkpTMlfT4idkbErZJulnRiv8oxJO6UVPvm/wgVPcbHxWslvTci\ndkpSRNyduTw5jO2ENRGxLiIm09PvqxgxUXUnSro5Im5Nf/dfUPG5NxYi4tcRcU36eaukTZIenbdU\ng2P7CEmnS/qE2gzPzVHzfLSk2+ue3y7p8AzlGKR3SFpt+5eS3q8xqAXUeZykZ9u+2vZ620/NXaBB\nYsKaKV4t6eu5CzEAh0vaXPd8HD7jmrJ9tKQTVHz5GhcfkvRWSZPtVpx2Io52ppmo47yImLbtu8HI\n9wRrM2nJOZLOiYiLbZ8l6VMqmiQroc25z5J0cEQ8I137/qKkYwdZvn4b9wlrOvkcsP1OSQ9GxD8N\ntHB5jPznWRlsz5V0kaQ3pppw5dl+gaTfRMSGTubC7imAI2ImIXKHpPl1z2uTeIy06d4L25+LiOem\npxepaJqojDbn/lpJX07r/TB1RJoXEVsGVsA+a3X+acKaYyT91LZU/K3/2PYeE9aMsnafA7aXqWiS\nO3UgBcqv8TNuvqa2+lWe7b0lfUnS5yLiktzlGaCTJJ1h+3RJ+0k60PZnIuKVzVYeVBN0/bf+r0p6\nqe19bB+joonyBwMqRy432z4l/bxIRWeUcXGJinOW7cdL2qdK4TudiLguIh4ZEcdExDEqPoSfUqXw\nbcf2EhXNcWdGxO9zl2dAfiTpcbaPtr2Pik6nX81cpoFx8W3zk5I2RsTf5i7PIEXEeRExP/1/f6mk\nK1qFr9RjDXg6tpdK+oikQyX9i+0NEfFnEbHR9hdV9BDbJel1Uf3ByK+R9DHb+0rakZ6Pi09J+pTt\nn0l6UFLLP8YxUPW/82YukLSPpHWpFeCqiHhd3iL1V0Tssv0GSWsl7SXpkxGxKXOxBumZkl4h6Vrb\nG9KycyPisoxlymXa//NMxAEAQAZVH38LAMBQIoABAMiAAAYAIAMCGACADAhgAAAyIIABAMiAAAYA\nIAMCGACADP4/3v8IdqjGURcAAAAASUVORK5CYII=\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from sklearn.decomposition import PCA\n", + "pca = PCA(n_components=2).fit(num_data)\n", + "pca_2d = pca.transform(num_data)\n", + "\n", + "cluster_2d = pca.transform(kmeans.cluster_centers_)\n", + "colors = [\"#E2415F\", \"#7FAF1B\", \"#090129\", \"#843023\", \"#037892\"]\n", + "markers = [\"v\", \"D\", \"s\"]\n", + "\n", + "plt.figure(figsize=(8, 6))\n", + "\n", + "for i in range(pca_2d.shape[0]):\n", + " c = clusters[i]\n", + " color = colors[c]\n", + " plt.scatter(pca_2d[i,0], pca_2d[i, 1], color=color)\n", + " \n", + "for i in range(cluster_2d.shape[0]):\n", + " plt.scatter(cluster_2d[i, 0], cluster_2d[i, 1], color='g', alpha='0.5', s=250, marker='o')\n", + "\n", + "plt.title(\"Iris dataset with three clusters\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Testing 2 cereals, one really fibrous, protein-filled healthy cereal and one super sugary cereal." + ] + }, + { + "cell_type": "code", + "execution_count": 171, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "cluster = kmeans.predict(test)" + ] + }, + { + "cell_type": "code", + "execution_count": 172, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0, 2], dtype=int32)" + ] + }, + "execution_count": 172, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cluster" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The healthy cereal got 0 for the dense healthy cereals and the sugary one got 2 for the less healthy, less substantial cereals." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##MeanShift" + ] + }, + { + "cell_type": "code", + "execution_count": 209, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "MeanShift(bandwidth=2.1, bin_seeding=False, cluster_all=True, min_bin_freq=1,\n", + " seeds=None)" + ] + }, + "execution_count": 209, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ms = MeanShift(2.1)\n", + "ms.fit(num_data)" + ] + }, + { + "cell_type": "code", + "execution_count": 210, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 8, 9, 5, 10, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,\n", + " 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 6,\n", + " 4, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0,\n", + " 1, 1, 2, 0, 0, 1, 9, 1, 1, 7, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 2, 2, 2, 0, 0, 0, 0, 0])" + ] + }, + "execution_count": 210, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ms.predict(num_data)" + ] + }, + { + "cell_type": "code", + "execution_count": 211, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1, 0])" + ] + }, + "execution_count": 211, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ms.predict(test)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "2.1 is the lowest bandwidth size at which the meanshift predictor distinguishes between the really sugary cereal and the fibrous, hearty cereal." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.4.3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +}