diff --git a/Honey_nut_clusters.ipynb b/Honey_nut_clusters.ipynb new file mode 100644 index 0000000..e4a017b --- /dev/null +++ b/Honey_nut_clusters.ipynb @@ -0,0 +1,3538 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 46, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from sklearn.cluster import KMeans\n", + "from sklearn import preprocessing\n" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "df = pd.DataFrame.from_csv('cereals.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "df_nan = df.replace(to_replace=-1, value=np.nan)" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
mfrtypecaloriesproteinfatsodiumfibercarbosugarspotassvitaminsshelfweightcups
name
100%_BranNC7041130105628025310.33
100%_Natural_BranQC12035152881350311.00
All-BranKC704126097532025310.33
All-Bran_with_Extra_FiberKC5040140148033025310.50
Almond_DelightRC110222001148NaN25310.75
\n", + "
" + ], + "text/plain": [ + " mfr type calories protein fat sodium fiber \\\n", + "name \n", + "100%_Bran N C 70 4 1 130 10 \n", + "100%_Natural_Bran Q C 120 3 5 15 2 \n", + "All-Bran K C 70 4 1 260 9 \n", + "All-Bran_with_Extra_Fiber K C 50 4 0 140 14 \n", + "Almond_Delight R C 110 2 2 200 1 \n", + "\n", + " carbo sugars potass vitamins shelf weight \\\n", + "name \n", + "100%_Bran 5 6 280 25 3 1 \n", + "100%_Natural_Bran 8 8 135 0 3 1 \n", + "All-Bran 7 5 320 25 3 1 \n", + "All-Bran_with_Extra_Fiber 8 0 330 25 3 1 \n", + "Almond_Delight 14 8 NaN 25 3 1 \n", + "\n", + " cups \n", + "name \n", + "100%_Bran 0.33 \n", + "100%_Natural_Bran 1.00 \n", + "All-Bran 0.33 \n", + "All-Bran_with_Extra_Fiber 0.50 \n", + "Almond_Delight 0.75 " + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_nan.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "calories 106.883117\n", + "protein 2.545455\n", + "fat 1.012987\n", + "sodium 159.675325\n", + "fiber 2.151948\n", + "carbo 14.802632\n", + "sugars 7.026316\n", + "potass 98.666667\n", + "vitamins 28.246753\n", + "shelf 2.207792\n", + "weight 1.029610\n", + "cups 0.821039\n", + "dtype: float64" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_nan.mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "filled_in_df = df_nan.fillna(df_nan.mean())" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
mfrtypecaloriesproteinfatsodiumfibercarbosugarspotassvitaminsshelfweightcups
name
100%_BranNC704113010.05.0000006.000000280.0000002531.000.33
100%_Natural_BranQC12035152.08.0000008.000000135.000000031.001.00
All-BranKC70412609.07.0000005.000000320.0000002531.000.33
All-Bran_with_Extra_FiberKC504014014.08.0000000.000000330.0000002531.000.50
Almond_DelightRC110222001.014.0000008.00000098.6666672531.000.75
Apple_Cinnamon_CheeriosGC110221801.510.50000010.00000070.0000002511.000.75
Apple_JacksKC110201251.011.00000014.00000030.0000002521.001.00
Basic_4GC130322102.018.0000008.000000100.0000002531.330.75
Bran_ChexRC90212004.015.0000006.000000125.0000002511.000.67
Bran_FlakesPC90302105.013.0000005.000000190.0000002531.000.67
Cap'n'CrunchQC120122200.012.00000012.00000035.0000002521.000.75
CheeriosGC110622902.017.0000001.000000105.0000002511.001.25
Cinnamon_Toast_CrunchGC120132100.013.0000009.00000045.0000002521.000.75
ClustersGC110321402.013.0000007.000000105.0000002531.000.50
Cocoa_PuffsGC110111800.012.00000013.00000055.0000002521.001.00
Corn_ChexRC110202800.022.0000003.00000025.0000002511.001.00
Corn_FlakesKC100202901.021.0000002.00000035.0000002511.001.00
Corn_PopsKC11010901.013.00000012.00000020.0000002521.001.00
Count_ChoculaGC110111800.012.00000013.00000065.0000002521.001.00
Cracklin'_Oat_BranKC110331404.010.0000007.000000160.0000002531.000.50
Cream_of_Wheat_(Quick)NH10030801.021.0000000.00000098.666667021.001.00
CrispixKC110202201.021.0000003.00000030.0000002531.001.00
Crispy_Wheat_&_RaisinsGC100211402.011.00000010.000000120.0000002531.000.75
Double_ChexRC100201901.018.0000005.00000080.0000002531.000.75
Froot_LoopsKC110211251.011.00000013.00000030.0000002521.001.00
Frosted_FlakesKC110102001.014.00000011.00000025.0000002511.000.75
Frosted_Mini-WheatsKC1003003.014.0000007.000000100.0000002521.000.80
Fruit_&_Fibre_Dates,_Walnuts,_and_OatsPC120321605.012.00000010.000000200.0000002531.250.67
Fruitful_BranKC120302405.014.00000012.000000190.0000002531.330.67
Fruity_PebblesPC110111350.013.00000012.00000025.0000002521.000.75
.............................................
Multi-Grain_CheeriosGC100212202.015.0000006.00000090.0000002511.001.00
Nut&Honey_CrunchKC120211900.015.0000009.00000040.0000002521.000.67
Nutri-Grain_Almond-RaisinKC140322203.021.0000007.000000130.0000002531.330.67
Nutri-grain_WheatKC90301703.018.0000002.00000090.0000002531.001.00
Oatmeal_Raisin_CrispGC130321701.513.50000010.000000120.0000002531.250.50
Post_Nat._Raisin_BranPC120312006.011.00000014.000000260.0000002531.330.67
Product_19KC100303201.020.0000003.00000045.00000010031.001.00
Puffed_RiceQC501000.013.0000000.00000015.000000030.501.00
Puffed_WheatQC502001.010.0000000.00000050.000000030.501.00
Quaker_Oat_SquaresQC100411352.014.0000006.000000110.0000002531.000.50
Quaker_OatmealQH1005202.714.8026327.026316110.000000011.000.67
Raisin_BranKC120312105.014.00000012.000000240.0000002521.330.75
Raisin_Nut_BranGC100321402.510.5000008.000000140.0000002531.000.50
Raisin_SquaresKC902002.015.0000006.000000110.0000002531.000.50
Rice_ChexRC110102400.023.0000002.00000030.0000002511.001.13
Rice_KrispiesKC110202900.022.0000003.00000035.0000002511.001.00
Shredded_WheatNC802003.016.0000000.00000095.000000010.831.00
Shredded_Wheat_'n'BranNC903004.019.0000000.000000140.000000011.000.67
Shredded_Wheat_spoon_sizeNC903003.020.0000000.000000120.000000011.000.67
SmacksKC11021701.09.00000015.00000040.0000002521.000.75
Special_KKC110602301.016.0000003.00000055.0000002511.001.00
Strawberry_Fruit_WheatsNC9020153.015.0000005.00000090.0000002521.001.00
Total_Corn_FlakesGC110212000.021.0000003.00000035.00000010031.001.00
Total_Raisin_BranGC140311904.015.00000014.000000230.00000010031.501.00
Total_Whole_GrainGC100312003.016.0000003.000000110.00000010031.001.00
TriplesGC110212500.021.0000003.00000060.0000002531.000.75
TrixGC110111400.013.00000012.00000025.0000002521.001.00
Wheat_ChexRC100312303.017.0000003.000000115.0000002511.000.67
WheatiesGC100312003.017.0000003.000000110.0000002511.001.00
Wheaties_Honey_GoldGC110212001.016.0000008.00000060.0000002511.000.75
\n", + "

77 rows × 14 columns

\n", + "
" + ], + "text/plain": [ + " mfr type calories protein fat \\\n", + "name \n", + "100%_Bran N C 70 4 1 \n", + "100%_Natural_Bran Q C 120 3 5 \n", + "All-Bran K C 70 4 1 \n", + "All-Bran_with_Extra_Fiber K C 50 4 0 \n", + "Almond_Delight R C 110 2 2 \n", + "Apple_Cinnamon_Cheerios G C 110 2 2 \n", + "Apple_Jacks K C 110 2 0 \n", + "Basic_4 G C 130 3 2 \n", + "Bran_Chex R C 90 2 1 \n", + "Bran_Flakes P C 90 3 0 \n", + "Cap'n'Crunch Q C 120 1 2 \n", + "Cheerios G C 110 6 2 \n", + "Cinnamon_Toast_Crunch G C 120 1 3 \n", + "Clusters G C 110 3 2 \n", + "Cocoa_Puffs G C 110 1 1 \n", + "Corn_Chex R C 110 2 0 \n", + "Corn_Flakes K C 100 2 0 \n", + "Corn_Pops K C 110 1 0 \n", + "Count_Chocula G C 110 1 1 \n", + "Cracklin'_Oat_Bran K C 110 3 3 \n", + "Cream_of_Wheat_(Quick) N H 100 3 0 \n", + "Crispix K C 110 2 0 \n", + "Crispy_Wheat_&_Raisins G C 100 2 1 \n", + "Double_Chex R C 100 2 0 \n", + "Froot_Loops K C 110 2 1 \n", + "Frosted_Flakes K C 110 1 0 \n", + "Frosted_Mini-Wheats K C 100 3 0 \n", + "Fruit_&_Fibre_Dates,_Walnuts,_and_Oats P C 120 3 2 \n", + "Fruitful_Bran K C 120 3 0 \n", + "Fruity_Pebbles P C 110 1 1 \n", + "... .. ... ... ... ... \n", + "Multi-Grain_Cheerios G C 100 2 1 \n", + "Nut&Honey_Crunch K C 120 2 1 \n", + "Nutri-Grain_Almond-Raisin K C 140 3 2 \n", + "Nutri-grain_Wheat K C 90 3 0 \n", + "Oatmeal_Raisin_Crisp G C 130 3 2 \n", + "Post_Nat._Raisin_Bran P C 120 3 1 \n", + "Product_19 K C 100 3 0 \n", + "Puffed_Rice Q C 50 1 0 \n", + "Puffed_Wheat Q C 50 2 0 \n", + "Quaker_Oat_Squares Q C 100 4 1 \n", + "Quaker_Oatmeal Q H 100 5 2 \n", + "Raisin_Bran K C 120 3 1 \n", + "Raisin_Nut_Bran G C 100 3 2 \n", + "Raisin_Squares K C 90 2 0 \n", + "Rice_Chex R C 110 1 0 \n", + "Rice_Krispies K C 110 2 0 \n", + "Shredded_Wheat N C 80 2 0 \n", + "Shredded_Wheat_'n'Bran N C 90 3 0 \n", + "Shredded_Wheat_spoon_size N C 90 3 0 \n", + "Smacks K C 110 2 1 \n", + "Special_K K C 110 6 0 \n", + "Strawberry_Fruit_Wheats N C 90 2 0 \n", + "Total_Corn_Flakes G C 110 2 1 \n", + "Total_Raisin_Bran G C 140 3 1 \n", + "Total_Whole_Grain G C 100 3 1 \n", + "Triples G C 110 2 1 \n", + "Trix G C 110 1 1 \n", + "Wheat_Chex R C 100 3 1 \n", + "Wheaties G C 100 3 1 \n", + "Wheaties_Honey_Gold G C 110 2 1 \n", + "\n", + " sodium fiber carbo sugars \\\n", + "name \n", + "100%_Bran 130 10.0 5.000000 6.000000 \n", + "100%_Natural_Bran 15 2.0 8.000000 8.000000 \n", + "All-Bran 260 9.0 7.000000 5.000000 \n", + "All-Bran_with_Extra_Fiber 140 14.0 8.000000 0.000000 \n", + "Almond_Delight 200 1.0 14.000000 8.000000 \n", + "Apple_Cinnamon_Cheerios 180 1.5 10.500000 10.000000 \n", + "Apple_Jacks 125 1.0 11.000000 14.000000 \n", + "Basic_4 210 2.0 18.000000 8.000000 \n", + "Bran_Chex 200 4.0 15.000000 6.000000 \n", + "Bran_Flakes 210 5.0 13.000000 5.000000 \n", + "Cap'n'Crunch 220 0.0 12.000000 12.000000 \n", + "Cheerios 290 2.0 17.000000 1.000000 \n", + "Cinnamon_Toast_Crunch 210 0.0 13.000000 9.000000 \n", + "Clusters 140 2.0 13.000000 7.000000 \n", + "Cocoa_Puffs 180 0.0 12.000000 13.000000 \n", + "Corn_Chex 280 0.0 22.000000 3.000000 \n", + "Corn_Flakes 290 1.0 21.000000 2.000000 \n", + "Corn_Pops 90 1.0 13.000000 12.000000 \n", + "Count_Chocula 180 0.0 12.000000 13.000000 \n", + "Cracklin'_Oat_Bran 140 4.0 10.000000 7.000000 \n", + "Cream_of_Wheat_(Quick) 80 1.0 21.000000 0.000000 \n", + "Crispix 220 1.0 21.000000 3.000000 \n", + "Crispy_Wheat_&_Raisins 140 2.0 11.000000 10.000000 \n", + "Double_Chex 190 1.0 18.000000 5.000000 \n", + "Froot_Loops 125 1.0 11.000000 13.000000 \n", + "Frosted_Flakes 200 1.0 14.000000 11.000000 \n", + "Frosted_Mini-Wheats 0 3.0 14.000000 7.000000 \n", + "Fruit_&_Fibre_Dates,_Walnuts,_and_Oats 160 5.0 12.000000 10.000000 \n", + "Fruitful_Bran 240 5.0 14.000000 12.000000 \n", + "Fruity_Pebbles 135 0.0 13.000000 12.000000 \n", + "... ... ... ... ... \n", + "Multi-Grain_Cheerios 220 2.0 15.000000 6.000000 \n", + "Nut&Honey_Crunch 190 0.0 15.000000 9.000000 \n", + "Nutri-Grain_Almond-Raisin 220 3.0 21.000000 7.000000 \n", + "Nutri-grain_Wheat 170 3.0 18.000000 2.000000 \n", + "Oatmeal_Raisin_Crisp 170 1.5 13.500000 10.000000 \n", + "Post_Nat._Raisin_Bran 200 6.0 11.000000 14.000000 \n", + "Product_19 320 1.0 20.000000 3.000000 \n", + "Puffed_Rice 0 0.0 13.000000 0.000000 \n", + "Puffed_Wheat 0 1.0 10.000000 0.000000 \n", + "Quaker_Oat_Squares 135 2.0 14.000000 6.000000 \n", + "Quaker_Oatmeal 0 2.7 14.802632 7.026316 \n", + "Raisin_Bran 210 5.0 14.000000 12.000000 \n", + "Raisin_Nut_Bran 140 2.5 10.500000 8.000000 \n", + "Raisin_Squares 0 2.0 15.000000 6.000000 \n", + "Rice_Chex 240 0.0 23.000000 2.000000 \n", + "Rice_Krispies 290 0.0 22.000000 3.000000 \n", + "Shredded_Wheat 0 3.0 16.000000 0.000000 \n", + "Shredded_Wheat_'n'Bran 0 4.0 19.000000 0.000000 \n", + "Shredded_Wheat_spoon_size 0 3.0 20.000000 0.000000 \n", + "Smacks 70 1.0 9.000000 15.000000 \n", + "Special_K 230 1.0 16.000000 3.000000 \n", + "Strawberry_Fruit_Wheats 15 3.0 15.000000 5.000000 \n", + "Total_Corn_Flakes 200 0.0 21.000000 3.000000 \n", + "Total_Raisin_Bran 190 4.0 15.000000 14.000000 \n", + "Total_Whole_Grain 200 3.0 16.000000 3.000000 \n", + "Triples 250 0.0 21.000000 3.000000 \n", + "Trix 140 0.0 13.000000 12.000000 \n", + "Wheat_Chex 230 3.0 17.000000 3.000000 \n", + "Wheaties 200 3.0 17.000000 3.000000 \n", + "Wheaties_Honey_Gold 200 1.0 16.000000 8.000000 \n", + "\n", + " potass vitamins shelf weight \\\n", + "name \n", + "100%_Bran 280.000000 25 3 1.00 \n", + "100%_Natural_Bran 135.000000 0 3 1.00 \n", + "All-Bran 320.000000 25 3 1.00 \n", + "All-Bran_with_Extra_Fiber 330.000000 25 3 1.00 \n", + "Almond_Delight 98.666667 25 3 1.00 \n", + "Apple_Cinnamon_Cheerios 70.000000 25 1 1.00 \n", + "Apple_Jacks 30.000000 25 2 1.00 \n", + "Basic_4 100.000000 25 3 1.33 \n", + "Bran_Chex 125.000000 25 1 1.00 \n", + "Bran_Flakes 190.000000 25 3 1.00 \n", + "Cap'n'Crunch 35.000000 25 2 1.00 \n", + "Cheerios 105.000000 25 1 1.00 \n", + "Cinnamon_Toast_Crunch 45.000000 25 2 1.00 \n", + "Clusters 105.000000 25 3 1.00 \n", + "Cocoa_Puffs 55.000000 25 2 1.00 \n", + "Corn_Chex 25.000000 25 1 1.00 \n", + "Corn_Flakes 35.000000 25 1 1.00 \n", + "Corn_Pops 20.000000 25 2 1.00 \n", + "Count_Chocula 65.000000 25 2 1.00 \n", + "Cracklin'_Oat_Bran 160.000000 25 3 1.00 \n", + "Cream_of_Wheat_(Quick) 98.666667 0 2 1.00 \n", + "Crispix 30.000000 25 3 1.00 \n", + "Crispy_Wheat_&_Raisins 120.000000 25 3 1.00 \n", + "Double_Chex 80.000000 25 3 1.00 \n", + "Froot_Loops 30.000000 25 2 1.00 \n", + "Frosted_Flakes 25.000000 25 1 1.00 \n", + "Frosted_Mini-Wheats 100.000000 25 2 1.00 \n", + "Fruit_&_Fibre_Dates,_Walnuts,_and_Oats 200.000000 25 3 1.25 \n", + "Fruitful_Bran 190.000000 25 3 1.33 \n", + "Fruity_Pebbles 25.000000 25 2 1.00 \n", + "... ... ... ... ... \n", + "Multi-Grain_Cheerios 90.000000 25 1 1.00 \n", + "Nut&Honey_Crunch 40.000000 25 2 1.00 \n", + "Nutri-Grain_Almond-Raisin 130.000000 25 3 1.33 \n", + "Nutri-grain_Wheat 90.000000 25 3 1.00 \n", + "Oatmeal_Raisin_Crisp 120.000000 25 3 1.25 \n", + "Post_Nat._Raisin_Bran 260.000000 25 3 1.33 \n", + "Product_19 45.000000 100 3 1.00 \n", + "Puffed_Rice 15.000000 0 3 0.50 \n", + "Puffed_Wheat 50.000000 0 3 0.50 \n", + "Quaker_Oat_Squares 110.000000 25 3 1.00 \n", + "Quaker_Oatmeal 110.000000 0 1 1.00 \n", + "Raisin_Bran 240.000000 25 2 1.33 \n", + "Raisin_Nut_Bran 140.000000 25 3 1.00 \n", + "Raisin_Squares 110.000000 25 3 1.00 \n", + "Rice_Chex 30.000000 25 1 1.00 \n", + "Rice_Krispies 35.000000 25 1 1.00 \n", + "Shredded_Wheat 95.000000 0 1 0.83 \n", + "Shredded_Wheat_'n'Bran 140.000000 0 1 1.00 \n", + "Shredded_Wheat_spoon_size 120.000000 0 1 1.00 \n", + "Smacks 40.000000 25 2 1.00 \n", + "Special_K 55.000000 25 1 1.00 \n", + "Strawberry_Fruit_Wheats 90.000000 25 2 1.00 \n", + "Total_Corn_Flakes 35.000000 100 3 1.00 \n", + "Total_Raisin_Bran 230.000000 100 3 1.50 \n", + "Total_Whole_Grain 110.000000 100 3 1.00 \n", + "Triples 60.000000 25 3 1.00 \n", + "Trix 25.000000 25 2 1.00 \n", + "Wheat_Chex 115.000000 25 1 1.00 \n", + "Wheaties 110.000000 25 1 1.00 \n", + "Wheaties_Honey_Gold 60.000000 25 1 1.00 \n", + "\n", + " cups \n", + "name \n", + "100%_Bran 0.33 \n", + "100%_Natural_Bran 1.00 \n", + "All-Bran 0.33 \n", + "All-Bran_with_Extra_Fiber 0.50 \n", + "Almond_Delight 0.75 \n", + "Apple_Cinnamon_Cheerios 0.75 \n", + "Apple_Jacks 1.00 \n", + "Basic_4 0.75 \n", + "Bran_Chex 0.67 \n", + "Bran_Flakes 0.67 \n", + "Cap'n'Crunch 0.75 \n", + "Cheerios 1.25 \n", + "Cinnamon_Toast_Crunch 0.75 \n", + "Clusters 0.50 \n", + "Cocoa_Puffs 1.00 \n", + "Corn_Chex 1.00 \n", + "Corn_Flakes 1.00 \n", + "Corn_Pops 1.00 \n", + "Count_Chocula 1.00 \n", + "Cracklin'_Oat_Bran 0.50 \n", + "Cream_of_Wheat_(Quick) 1.00 \n", + "Crispix 1.00 \n", + "Crispy_Wheat_&_Raisins 0.75 \n", + "Double_Chex 0.75 \n", + "Froot_Loops 1.00 \n", + "Frosted_Flakes 0.75 \n", + "Frosted_Mini-Wheats 0.80 \n", + "Fruit_&_Fibre_Dates,_Walnuts,_and_Oats 0.67 \n", + "Fruitful_Bran 0.67 \n", + "Fruity_Pebbles 0.75 \n", + "... ... \n", + "Multi-Grain_Cheerios 1.00 \n", + "Nut&Honey_Crunch 0.67 \n", + "Nutri-Grain_Almond-Raisin 0.67 \n", + "Nutri-grain_Wheat 1.00 \n", + "Oatmeal_Raisin_Crisp 0.50 \n", + "Post_Nat._Raisin_Bran 0.67 \n", + "Product_19 1.00 \n", + "Puffed_Rice 1.00 \n", + "Puffed_Wheat 1.00 \n", + "Quaker_Oat_Squares 0.50 \n", + "Quaker_Oatmeal 0.67 \n", + "Raisin_Bran 0.75 \n", + "Raisin_Nut_Bran 0.50 \n", + "Raisin_Squares 0.50 \n", + "Rice_Chex 1.13 \n", + "Rice_Krispies 1.00 \n", + "Shredded_Wheat 1.00 \n", + "Shredded_Wheat_'n'Bran 0.67 \n", + "Shredded_Wheat_spoon_size 0.67 \n", + "Smacks 0.75 \n", + "Special_K 1.00 \n", + "Strawberry_Fruit_Wheats 1.00 \n", + "Total_Corn_Flakes 1.00 \n", + "Total_Raisin_Bran 1.00 \n", + "Total_Whole_Grain 1.00 \n", + "Triples 0.75 \n", + "Trix 1.00 \n", + "Wheat_Chex 0.67 \n", + "Wheaties 1.00 \n", + "Wheaties_Honey_Gold 0.75 \n", + "\n", + "[77 rows x 14 columns]" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "filled_in_df" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "filled_in_df_removed = filled_in_df.drop(['mfr', 'type'], 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": { + "collapsed": false, + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
caloriesproteinfatsodiumfibercarbosugarspotassvitaminsshelfweightcups
name
100%_Bran70411301056280.00000025310.33
100%_Natural_Bran1203515288135.0000000311.00
All-Bran7041260975320.00000025310.33
All-Bran_with_Extra_Fiber50401401480330.00000025310.50
Almond_Delight11022200114898.66666725310.75
\n", + "
" + ], + "text/plain": [ + " calories protein fat sodium fiber carbo \\\n", + "name \n", + "100%_Bran 70 4 1 130 10 5 \n", + "100%_Natural_Bran 120 3 5 15 2 8 \n", + "All-Bran 70 4 1 260 9 7 \n", + "All-Bran_with_Extra_Fiber 50 4 0 140 14 8 \n", + "Almond_Delight 110 2 2 200 1 14 \n", + "\n", + " sugars potass vitamins shelf weight cups \n", + "name \n", + "100%_Bran 6 280.000000 25 3 1 0.33 \n", + "100%_Natural_Bran 8 135.000000 0 3 1 1.00 \n", + "All-Bran 5 320.000000 25 3 1 0.33 \n", + "All-Bran_with_Extra_Fiber 0 330.000000 25 3 1 0.50 \n", + "Almond_Delight 8 98.666667 25 3 1 0.75 " + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "filled_in_df_removed.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "calories int64\n", + "protein int64\n", + "fat int64\n", + "sodium int64\n", + "fiber float64\n", + "carbo float64\n", + "sugars float64\n", + "potass float64\n", + "vitamins int64\n", + "shelf int64\n", + "weight float64\n", + "cups float64\n", + "dtype: object" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "filled_in_df_removed.dtypes" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#Normalizing:" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "test_mult = filled_in_df_removed*3" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
caloriesproteinfatsodiumfibercarbosugarspotassvitaminsshelfweightcups
name
100%_Bran21012339030151884075930.99
100%_Natural_Bran36091545624244050933.00
All-Bran21012378027211596075930.99
All-Bran_with_Extra_Fiber1501204204224099075931.50
Almond_Delight330666003422429675932.25
\n", + "
" + ], + "text/plain": [ + " calories protein fat sodium fiber carbo \\\n", + "name \n", + "100%_Bran 210 12 3 390 30 15 \n", + "100%_Natural_Bran 360 9 15 45 6 24 \n", + "All-Bran 210 12 3 780 27 21 \n", + "All-Bran_with_Extra_Fiber 150 12 0 420 42 24 \n", + "Almond_Delight 330 6 6 600 3 42 \n", + "\n", + " sugars potass vitamins shelf weight cups \n", + "name \n", + "100%_Bran 18 840 75 9 3 0.99 \n", + "100%_Natural_Bran 24 405 0 9 3 3.00 \n", + "All-Bran 15 960 75 9 3 0.99 \n", + "All-Bran_with_Extra_Fiber 0 990 75 9 3 1.50 \n", + "Almond_Delight 24 296 75 9 3 2.25 " + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_mult.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "3.0" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_mult.iloc[2, 10]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "def normalize(data_frame):\n", + " for i in range(len(data_frame)):\n", + " data_frame.iloc[i]*1/data_frame.iloc[i, 10]\n", + " return data_frame\n" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
caloriesproteinfatsodiumfibercarbosugarspotassvitaminsshelfweightcups
name
100%_Bran704113010.05.0000006.000000280.0000002531.000.33
100%_Natural_Bran12035152.08.0000008.000000135.000000031.001.00
All-Bran70412609.07.0000005.000000320.0000002531.000.33
All-Bran_with_Extra_Fiber504014014.08.0000000.000000330.0000002531.000.50
Almond_Delight110222001.014.0000008.00000098.6666672531.000.75
Apple_Cinnamon_Cheerios110221801.510.50000010.00000070.0000002511.000.75
Apple_Jacks110201251.011.00000014.00000030.0000002521.001.00
Basic_4130322102.018.0000008.000000100.0000002531.330.75
Bran_Chex90212004.015.0000006.000000125.0000002511.000.67
Bran_Flakes90302105.013.0000005.000000190.0000002531.000.67
Cap'n'Crunch120122200.012.00000012.00000035.0000002521.000.75
Cheerios110622902.017.0000001.000000105.0000002511.001.25
Cinnamon_Toast_Crunch120132100.013.0000009.00000045.0000002521.000.75
Clusters110321402.013.0000007.000000105.0000002531.000.50
Cocoa_Puffs110111800.012.00000013.00000055.0000002521.001.00
Corn_Chex110202800.022.0000003.00000025.0000002511.001.00
Corn_Flakes100202901.021.0000002.00000035.0000002511.001.00
Corn_Pops11010901.013.00000012.00000020.0000002521.001.00
Count_Chocula110111800.012.00000013.00000065.0000002521.001.00
Cracklin'_Oat_Bran110331404.010.0000007.000000160.0000002531.000.50
Cream_of_Wheat_(Quick)10030801.021.0000000.00000098.666667021.001.00
Crispix110202201.021.0000003.00000030.0000002531.001.00
Crispy_Wheat_&_Raisins100211402.011.00000010.000000120.0000002531.000.75
Double_Chex100201901.018.0000005.00000080.0000002531.000.75
Froot_Loops110211251.011.00000013.00000030.0000002521.001.00
Frosted_Flakes110102001.014.00000011.00000025.0000002511.000.75
Frosted_Mini-Wheats1003003.014.0000007.000000100.0000002521.000.80
Fruit_&_Fibre_Dates,_Walnuts,_and_Oats120321605.012.00000010.000000200.0000002531.250.67
Fruitful_Bran120302405.014.00000012.000000190.0000002531.330.67
Fruity_Pebbles110111350.013.00000012.00000025.0000002521.000.75
.......................................
Multi-Grain_Cheerios100212202.015.0000006.00000090.0000002511.001.00
Nut&Honey_Crunch120211900.015.0000009.00000040.0000002521.000.67
Nutri-Grain_Almond-Raisin140322203.021.0000007.000000130.0000002531.330.67
Nutri-grain_Wheat90301703.018.0000002.00000090.0000002531.001.00
Oatmeal_Raisin_Crisp130321701.513.50000010.000000120.0000002531.250.50
Post_Nat._Raisin_Bran120312006.011.00000014.000000260.0000002531.330.67
Product_19100303201.020.0000003.00000045.00000010031.001.00
Puffed_Rice501000.013.0000000.00000015.000000030.501.00
Puffed_Wheat502001.010.0000000.00000050.000000030.501.00
Quaker_Oat_Squares100411352.014.0000006.000000110.0000002531.000.50
Quaker_Oatmeal1005202.714.8026327.026316110.000000011.000.67
Raisin_Bran120312105.014.00000012.000000240.0000002521.330.75
Raisin_Nut_Bran100321402.510.5000008.000000140.0000002531.000.50
Raisin_Squares902002.015.0000006.000000110.0000002531.000.50
Rice_Chex110102400.023.0000002.00000030.0000002511.001.13
Rice_Krispies110202900.022.0000003.00000035.0000002511.001.00
Shredded_Wheat802003.016.0000000.00000095.000000010.831.00
Shredded_Wheat_'n'Bran903004.019.0000000.000000140.000000011.000.67
Shredded_Wheat_spoon_size903003.020.0000000.000000120.000000011.000.67
Smacks11021701.09.00000015.00000040.0000002521.000.75
Special_K110602301.016.0000003.00000055.0000002511.001.00
Strawberry_Fruit_Wheats9020153.015.0000005.00000090.0000002521.001.00
Total_Corn_Flakes110212000.021.0000003.00000035.00000010031.001.00
Total_Raisin_Bran140311904.015.00000014.000000230.00000010031.501.00
Total_Whole_Grain100312003.016.0000003.000000110.00000010031.001.00
Triples110212500.021.0000003.00000060.0000002531.000.75
Trix110111400.013.00000012.00000025.0000002521.001.00
Wheat_Chex100312303.017.0000003.000000115.0000002511.000.67
Wheaties100312003.017.0000003.000000110.0000002511.001.00
Wheaties_Honey_Gold110212001.016.0000008.00000060.0000002511.000.75
\n", + "

77 rows × 12 columns

\n", + "
" + ], + "text/plain": [ + " calories protein fat sodium fiber \\\n", + "name \n", + "100%_Bran 70 4 1 130 10.0 \n", + "100%_Natural_Bran 120 3 5 15 2.0 \n", + "All-Bran 70 4 1 260 9.0 \n", + "All-Bran_with_Extra_Fiber 50 4 0 140 14.0 \n", + "Almond_Delight 110 2 2 200 1.0 \n", + "Apple_Cinnamon_Cheerios 110 2 2 180 1.5 \n", + "Apple_Jacks 110 2 0 125 1.0 \n", + "Basic_4 130 3 2 210 2.0 \n", + "Bran_Chex 90 2 1 200 4.0 \n", + "Bran_Flakes 90 3 0 210 5.0 \n", + "Cap'n'Crunch 120 1 2 220 0.0 \n", + "Cheerios 110 6 2 290 2.0 \n", + "Cinnamon_Toast_Crunch 120 1 3 210 0.0 \n", + "Clusters 110 3 2 140 2.0 \n", + "Cocoa_Puffs 110 1 1 180 0.0 \n", + "Corn_Chex 110 2 0 280 0.0 \n", + "Corn_Flakes 100 2 0 290 1.0 \n", + "Corn_Pops 110 1 0 90 1.0 \n", + "Count_Chocula 110 1 1 180 0.0 \n", + "Cracklin'_Oat_Bran 110 3 3 140 4.0 \n", + "Cream_of_Wheat_(Quick) 100 3 0 80 1.0 \n", + "Crispix 110 2 0 220 1.0 \n", + "Crispy_Wheat_&_Raisins 100 2 1 140 2.0 \n", + "Double_Chex 100 2 0 190 1.0 \n", + "Froot_Loops 110 2 1 125 1.0 \n", + "Frosted_Flakes 110 1 0 200 1.0 \n", + "Frosted_Mini-Wheats 100 3 0 0 3.0 \n", + "Fruit_&_Fibre_Dates,_Walnuts,_and_Oats 120 3 2 160 5.0 \n", + "Fruitful_Bran 120 3 0 240 5.0 \n", + "Fruity_Pebbles 110 1 1 135 0.0 \n", + "... ... ... ... ... ... \n", + "Multi-Grain_Cheerios 100 2 1 220 2.0 \n", + "Nut&Honey_Crunch 120 2 1 190 0.0 \n", + "Nutri-Grain_Almond-Raisin 140 3 2 220 3.0 \n", + "Nutri-grain_Wheat 90 3 0 170 3.0 \n", + "Oatmeal_Raisin_Crisp 130 3 2 170 1.5 \n", + "Post_Nat._Raisin_Bran 120 3 1 200 6.0 \n", + "Product_19 100 3 0 320 1.0 \n", + "Puffed_Rice 50 1 0 0 0.0 \n", + "Puffed_Wheat 50 2 0 0 1.0 \n", + "Quaker_Oat_Squares 100 4 1 135 2.0 \n", + "Quaker_Oatmeal 100 5 2 0 2.7 \n", + "Raisin_Bran 120 3 1 210 5.0 \n", + "Raisin_Nut_Bran 100 3 2 140 2.5 \n", + "Raisin_Squares 90 2 0 0 2.0 \n", + "Rice_Chex 110 1 0 240 0.0 \n", + "Rice_Krispies 110 2 0 290 0.0 \n", + "Shredded_Wheat 80 2 0 0 3.0 \n", + "Shredded_Wheat_'n'Bran 90 3 0 0 4.0 \n", + "Shredded_Wheat_spoon_size 90 3 0 0 3.0 \n", + "Smacks 110 2 1 70 1.0 \n", + "Special_K 110 6 0 230 1.0 \n", + "Strawberry_Fruit_Wheats 90 2 0 15 3.0 \n", + "Total_Corn_Flakes 110 2 1 200 0.0 \n", + "Total_Raisin_Bran 140 3 1 190 4.0 \n", + "Total_Whole_Grain 100 3 1 200 3.0 \n", + "Triples 110 2 1 250 0.0 \n", + "Trix 110 1 1 140 0.0 \n", + "Wheat_Chex 100 3 1 230 3.0 \n", + "Wheaties 100 3 1 200 3.0 \n", + "Wheaties_Honey_Gold 110 2 1 200 1.0 \n", + "\n", + " carbo sugars potass \\\n", + "name \n", + "100%_Bran 5.000000 6.000000 280.000000 \n", + "100%_Natural_Bran 8.000000 8.000000 135.000000 \n", + "All-Bran 7.000000 5.000000 320.000000 \n", + "All-Bran_with_Extra_Fiber 8.000000 0.000000 330.000000 \n", + "Almond_Delight 14.000000 8.000000 98.666667 \n", + "Apple_Cinnamon_Cheerios 10.500000 10.000000 70.000000 \n", + "Apple_Jacks 11.000000 14.000000 30.000000 \n", + "Basic_4 18.000000 8.000000 100.000000 \n", + "Bran_Chex 15.000000 6.000000 125.000000 \n", + "Bran_Flakes 13.000000 5.000000 190.000000 \n", + "Cap'n'Crunch 12.000000 12.000000 35.000000 \n", + "Cheerios 17.000000 1.000000 105.000000 \n", + "Cinnamon_Toast_Crunch 13.000000 9.000000 45.000000 \n", + "Clusters 13.000000 7.000000 105.000000 \n", + "Cocoa_Puffs 12.000000 13.000000 55.000000 \n", + "Corn_Chex 22.000000 3.000000 25.000000 \n", + "Corn_Flakes 21.000000 2.000000 35.000000 \n", + "Corn_Pops 13.000000 12.000000 20.000000 \n", + "Count_Chocula 12.000000 13.000000 65.000000 \n", + "Cracklin'_Oat_Bran 10.000000 7.000000 160.000000 \n", + "Cream_of_Wheat_(Quick) 21.000000 0.000000 98.666667 \n", + "Crispix 21.000000 3.000000 30.000000 \n", + "Crispy_Wheat_&_Raisins 11.000000 10.000000 120.000000 \n", + "Double_Chex 18.000000 5.000000 80.000000 \n", + "Froot_Loops 11.000000 13.000000 30.000000 \n", + "Frosted_Flakes 14.000000 11.000000 25.000000 \n", + "Frosted_Mini-Wheats 14.000000 7.000000 100.000000 \n", + "Fruit_&_Fibre_Dates,_Walnuts,_and_Oats 12.000000 10.000000 200.000000 \n", + "Fruitful_Bran 14.000000 12.000000 190.000000 \n", + "Fruity_Pebbles 13.000000 12.000000 25.000000 \n", + "... ... ... ... \n", + "Multi-Grain_Cheerios 15.000000 6.000000 90.000000 \n", + "Nut&Honey_Crunch 15.000000 9.000000 40.000000 \n", + "Nutri-Grain_Almond-Raisin 21.000000 7.000000 130.000000 \n", + "Nutri-grain_Wheat 18.000000 2.000000 90.000000 \n", + "Oatmeal_Raisin_Crisp 13.500000 10.000000 120.000000 \n", + "Post_Nat._Raisin_Bran 11.000000 14.000000 260.000000 \n", + "Product_19 20.000000 3.000000 45.000000 \n", + "Puffed_Rice 13.000000 0.000000 15.000000 \n", + "Puffed_Wheat 10.000000 0.000000 50.000000 \n", + "Quaker_Oat_Squares 14.000000 6.000000 110.000000 \n", + "Quaker_Oatmeal 14.802632 7.026316 110.000000 \n", + "Raisin_Bran 14.000000 12.000000 240.000000 \n", + "Raisin_Nut_Bran 10.500000 8.000000 140.000000 \n", + "Raisin_Squares 15.000000 6.000000 110.000000 \n", + "Rice_Chex 23.000000 2.000000 30.000000 \n", + "Rice_Krispies 22.000000 3.000000 35.000000 \n", + "Shredded_Wheat 16.000000 0.000000 95.000000 \n", + "Shredded_Wheat_'n'Bran 19.000000 0.000000 140.000000 \n", + "Shredded_Wheat_spoon_size 20.000000 0.000000 120.000000 \n", + "Smacks 9.000000 15.000000 40.000000 \n", + "Special_K 16.000000 3.000000 55.000000 \n", + "Strawberry_Fruit_Wheats 15.000000 5.000000 90.000000 \n", + "Total_Corn_Flakes 21.000000 3.000000 35.000000 \n", + "Total_Raisin_Bran 15.000000 14.000000 230.000000 \n", + "Total_Whole_Grain 16.000000 3.000000 110.000000 \n", + "Triples 21.000000 3.000000 60.000000 \n", + "Trix 13.000000 12.000000 25.000000 \n", + "Wheat_Chex 17.000000 3.000000 115.000000 \n", + "Wheaties 17.000000 3.000000 110.000000 \n", + "Wheaties_Honey_Gold 16.000000 8.000000 60.000000 \n", + "\n", + " vitamins shelf weight cups \n", + "name \n", + "100%_Bran 25 3 1.00 0.33 \n", + "100%_Natural_Bran 0 3 1.00 1.00 \n", + "All-Bran 25 3 1.00 0.33 \n", + "All-Bran_with_Extra_Fiber 25 3 1.00 0.50 \n", + "Almond_Delight 25 3 1.00 0.75 \n", + "Apple_Cinnamon_Cheerios 25 1 1.00 0.75 \n", + "Apple_Jacks 25 2 1.00 1.00 \n", + "Basic_4 25 3 1.33 0.75 \n", + "Bran_Chex 25 1 1.00 0.67 \n", + "Bran_Flakes 25 3 1.00 0.67 \n", + "Cap'n'Crunch 25 2 1.00 0.75 \n", + "Cheerios 25 1 1.00 1.25 \n", + "Cinnamon_Toast_Crunch 25 2 1.00 0.75 \n", + "Clusters 25 3 1.00 0.50 \n", + "Cocoa_Puffs 25 2 1.00 1.00 \n", + "Corn_Chex 25 1 1.00 1.00 \n", + "Corn_Flakes 25 1 1.00 1.00 \n", + "Corn_Pops 25 2 1.00 1.00 \n", + "Count_Chocula 25 2 1.00 1.00 \n", + "Cracklin'_Oat_Bran 25 3 1.00 0.50 \n", + "Cream_of_Wheat_(Quick) 0 2 1.00 1.00 \n", + "Crispix 25 3 1.00 1.00 \n", + "Crispy_Wheat_&_Raisins 25 3 1.00 0.75 \n", + "Double_Chex 25 3 1.00 0.75 \n", + "Froot_Loops 25 2 1.00 1.00 \n", + "Frosted_Flakes 25 1 1.00 0.75 \n", + "Frosted_Mini-Wheats 25 2 1.00 0.80 \n", + "Fruit_&_Fibre_Dates,_Walnuts,_and_Oats 25 3 1.25 0.67 \n", + "Fruitful_Bran 25 3 1.33 0.67 \n", + "Fruity_Pebbles 25 2 1.00 0.75 \n", + "... ... ... ... ... \n", + "Multi-Grain_Cheerios 25 1 1.00 1.00 \n", + "Nut&Honey_Crunch 25 2 1.00 0.67 \n", + "Nutri-Grain_Almond-Raisin 25 3 1.33 0.67 \n", + "Nutri-grain_Wheat 25 3 1.00 1.00 \n", + "Oatmeal_Raisin_Crisp 25 3 1.25 0.50 \n", + "Post_Nat._Raisin_Bran 25 3 1.33 0.67 \n", + "Product_19 100 3 1.00 1.00 \n", + "Puffed_Rice 0 3 0.50 1.00 \n", + "Puffed_Wheat 0 3 0.50 1.00 \n", + "Quaker_Oat_Squares 25 3 1.00 0.50 \n", + "Quaker_Oatmeal 0 1 1.00 0.67 \n", + "Raisin_Bran 25 2 1.33 0.75 \n", + "Raisin_Nut_Bran 25 3 1.00 0.50 \n", + "Raisin_Squares 25 3 1.00 0.50 \n", + "Rice_Chex 25 1 1.00 1.13 \n", + "Rice_Krispies 25 1 1.00 1.00 \n", + "Shredded_Wheat 0 1 0.83 1.00 \n", + "Shredded_Wheat_'n'Bran 0 1 1.00 0.67 \n", + "Shredded_Wheat_spoon_size 0 1 1.00 0.67 \n", + "Smacks 25 2 1.00 0.75 \n", + "Special_K 25 1 1.00 1.00 \n", + "Strawberry_Fruit_Wheats 25 2 1.00 1.00 \n", + "Total_Corn_Flakes 100 3 1.00 1.00 \n", + "Total_Raisin_Bran 100 3 1.50 1.00 \n", + "Total_Whole_Grain 100 3 1.00 1.00 \n", + "Triples 25 3 1.00 0.75 \n", + "Trix 25 2 1.00 1.00 \n", + "Wheat_Chex 25 1 1.00 0.67 \n", + "Wheaties 25 1 1.00 1.00 \n", + "Wheaties_Honey_Gold 25 1 1.00 0.75 \n", + "\n", + "[77 rows x 12 columns]" + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "normalize(filled_in_df_removed)" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "ename": "SyntaxError", + "evalue": "invalid character in identifier (, line 1)", + "output_type": "error", + "traceback": [ + "\u001b[0;36m File \u001b[0;32m\"\"\u001b[0;36m, line \u001b[0;32m1\u001b[0m\n\u001b[0;31m df.div(df.cups, axis=“index”)`\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid character in identifier\n" + ] + } + ], + "source": [ + "df.div(df.cups, axis=“index”)`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "norm_df= filled_in_df_removed.div(df.cups, axis=\"index\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "norm_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "processed = preprocessing.scale(norm_df)" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 1.14734117, 3.42380788, 0.97072001, 1.32384857, 4.72937882,\n", + " -0.52840366, 1.51635962, 4.19746977, 1.45991832, 2.76273944,\n", + " 2.67555812, 0. ])" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "processed[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "KMeans(copy_x=True, init='k-means++', max_iter=300, n_clusters=3, n_init=10,\n", + " n_jobs=1, precompute_distances='auto', random_state=None, tol=0.0001,\n", + " verbose=0)" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "k_means_method = KMeans(3)\n", + "k_means_method.fit(processed)" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "three_clusters = k_means_method.predict(processed)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1, 0, 1, 1, 0, 0, 0, 2, 0, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0,\n", + " 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0,\n", + " 2, 0, 2, 2, 0, 2, 2, 0, 0, 0, 2, 0, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 2, 0, 0, 0, 0, 0, 0], dtype=int32)" + ] + }, + "execution_count": 80, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "three_clusters\n" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "four_k_means = KMeans(4)\n", + "four_k_means.fit(processed)\n", + "four_clusters = four_k_means.predict(processed)" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "five_k_means = KMeans(5)\n", + "five_k_means.fit(processed)\n", + "five_clusters = five_k_means.predict(processed)" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "six_k_means = KMeans(6)\n", + "six_k_means.fit(processed)\n", + "six_clusters = six_k_means.predict(processed)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 90, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import matplotlib\n", + "import numpy as np\n", + "import math\n", + "\n", + "from scipy.spatial import Voronoi, voronoi_plot_2d\n", + "%matplotlib inline\n", + "from sklearn.decomposition import PCA as PCA" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "ename": "TypeError", + "evalue": "'KMeans' object is not iterable", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mcereal_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpreprocessing\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mscale\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnorm_df\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mcentroids\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mclusters\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mKMeans\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprocessed\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m: 'KMeans' object is not iterable" + ] + } + ], + "source": [ + "cereal_data = preprocessing.scale(norm_df)\n", + "centroids, clusters = KMeans(processed, 3)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#Plotting (attempt)" + ] + }, + { + "cell_type": "code", + "execution_count": 93, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "ename": "ValueError", + "evalue": "operands could not be broadcast together with shapes (1,77) (12,) ", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mpca\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mPCA\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn_components\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnorm_df\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;31m#pca = processed\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mcentroids_2d\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpca\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtransform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mthree_clusters\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 6\u001b[0m \u001b[0mcolors\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m\"#E2415F\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"#7FAF1B\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"#090129\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0mmarkers\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m\"v\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"D\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"s\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Users/willflowers/Documents/The_Iron_Yard/python-assignments/honey-nut-clusters/.direnv/python-3.4.3/lib/python3.4/site-packages/sklearn/decomposition/pca.py\u001b[0m in \u001b[0;36mtransform\u001b[0;34m(self, X)\u001b[0m\n\u001b[1;32m 386\u001b[0m \u001b[0mX\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcheck_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 387\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean_\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 388\u001b[0;31m \u001b[0mX\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mX\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean_\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 389\u001b[0m \u001b[0mX_transformed\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfast_dot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcomponents_\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mT\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 390\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwhiten\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: operands could not be broadcast together with shapes (1,77) (12,) " + ] + } + ], + "source": [ + "\n", + "pca = PCA(n_components=2).fit(cereals_data)\n", + "centroids_2d = pca.transform(centroids)\n", + "colors = [\"#E2415F\", \"#7FAF1B\", \"#090129\"]\n", + "markers = [\"v\", \"D\", \"s\"]\n", + "\n", + "plt.figure(figsize=(8, 6))\n", + "axes = plt.subplot(1,1,1)\n", + "\n", + "for i, cluster in enumerate(clusters):\n", + " cluster = np.array(cluster)\n", + " cluster_2d = pca.transform(cluster)\n", + " plt.scatter(cluster_2d[:,0], cluster_2d[:,1], color=colors[i])\n", + "\n", + "plt.scatter(centroids_2d[:, 0], centroids_2d[:, 1], color='g', alpha='0.5', marker='o', s=250)\n", + "\n", + "polys = voronoi_polygons(centroids_2d)\n", + "\n", + "for poly in polys:\n", + " p = matplotlib.patches.Polygon(poly, facecolor=np.random.rand(3,1), alpha=0.1)\n", + " axes.add_patch(p)\n", + " \n", + "plt.title(\"Iris dataset with three clusters\")\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.4.3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +}