From 4017ebfc24ec4dd9cc59ca5dbf6310e479e93f8f Mon Sep 17 00:00:00 2001 From: Will Flowers Date: Mon, 8 Jun 2015 21:33:38 -0400 Subject: [PATCH 1/2] cleaning data --- Honey_nut_clusters.ipynb | 3031 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 3031 insertions(+) create mode 100644 Honey_nut_clusters.ipynb diff --git a/Honey_nut_clusters.ipynb b/Honey_nut_clusters.ipynb new file mode 100644 index 0000000..0a09e4b --- /dev/null +++ b/Honey_nut_clusters.ipynb @@ -0,0 +1,3031 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 39, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "df = pd.DataFrame.from_csv('cereals.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "df_nan = df.replace(to_replace=-1, value=np.nan)" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
mfrtypecaloriesproteinfatsodiumfibercarbosugarspotassvitaminsshelfweightcups
name
100%_BranNC7041130105628025310.33
100%_Natural_BranQC12035152881350311.00
All-BranKC704126097532025310.33
All-Bran_with_Extra_FiberKC5040140148033025310.50
Almond_DelightRC110222001148NaN25310.75
\n", + "
" + ], + "text/plain": [ + " mfr type calories protein fat sodium fiber \\\n", + "name \n", + "100%_Bran N C 70 4 1 130 10 \n", + "100%_Natural_Bran Q C 120 3 5 15 2 \n", + "All-Bran K C 70 4 1 260 9 \n", + "All-Bran_with_Extra_Fiber K C 50 4 0 140 14 \n", + "Almond_Delight R C 110 2 2 200 1 \n", + "\n", + " carbo sugars potass vitamins shelf weight \\\n", + "name \n", + "100%_Bran 5 6 280 25 3 1 \n", + "100%_Natural_Bran 8 8 135 0 3 1 \n", + "All-Bran 7 5 320 25 3 1 \n", + "All-Bran_with_Extra_Fiber 8 0 330 25 3 1 \n", + "Almond_Delight 14 8 NaN 25 3 1 \n", + "\n", + " cups \n", + "name \n", + "100%_Bran 0.33 \n", + "100%_Natural_Bran 1.00 \n", + "All-Bran 0.33 \n", + "All-Bran_with_Extra_Fiber 0.50 \n", + "Almond_Delight 0.75 " + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_nan.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "calories 106.883117\n", + "protein 2.545455\n", + "fat 1.012987\n", + "sodium 159.675325\n", + "fiber 2.151948\n", + "carbo 14.802632\n", + "sugars 7.026316\n", + "potass 98.666667\n", + "vitamins 28.246753\n", + "shelf 2.207792\n", + "weight 1.029610\n", + "cups 0.821039\n", + "dtype: float64" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_nan.mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "filled_in_df = df_nan.fillna(df_nan.mean())" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
mfrtypecaloriesproteinfatsodiumfibercarbosugarspotassvitaminsweightcups
name
100%_BranNC704113010.05.0000006.000000280.000000251.000.33
100%_Natural_BranQC12035152.08.0000008.000000135.00000001.001.00
All-BranKC70412609.07.0000005.000000320.000000251.000.33
All-Bran_with_Extra_FiberKC504014014.08.0000000.000000330.000000251.000.50
Almond_DelightRC110222001.014.0000008.00000098.666667251.000.75
Apple_Cinnamon_CheeriosGC110221801.510.50000010.00000070.000000251.000.75
Apple_JacksKC110201251.011.00000014.00000030.000000251.001.00
Basic_4GC130322102.018.0000008.000000100.000000251.330.75
Bran_ChexRC90212004.015.0000006.000000125.000000251.000.67
Bran_FlakesPC90302105.013.0000005.000000190.000000251.000.67
Cap'n'CrunchQC120122200.012.00000012.00000035.000000251.000.75
CheeriosGC110622902.017.0000001.000000105.000000251.001.25
Cinnamon_Toast_CrunchGC120132100.013.0000009.00000045.000000251.000.75
ClustersGC110321402.013.0000007.000000105.000000251.000.50
Cocoa_PuffsGC110111800.012.00000013.00000055.000000251.001.00
Corn_ChexRC110202800.022.0000003.00000025.000000251.001.00
Corn_FlakesKC100202901.021.0000002.00000035.000000251.001.00
Corn_PopsKC11010901.013.00000012.00000020.000000251.001.00
Count_ChoculaGC110111800.012.00000013.00000065.000000251.001.00
Cracklin'_Oat_BranKC110331404.010.0000007.000000160.000000251.000.50
Cream_of_Wheat_(Quick)NH10030801.021.0000000.00000098.66666701.001.00
CrispixKC110202201.021.0000003.00000030.000000251.001.00
Crispy_Wheat_&_RaisinsGC100211402.011.00000010.000000120.000000251.000.75
Double_ChexRC100201901.018.0000005.00000080.000000251.000.75
Froot_LoopsKC110211251.011.00000013.00000030.000000251.001.00
Frosted_FlakesKC110102001.014.00000011.00000025.000000251.000.75
Frosted_Mini-WheatsKC1003003.014.0000007.000000100.000000251.000.80
Fruit_&_Fibre_Dates,_Walnuts,_and_OatsPC120321605.012.00000010.000000200.000000251.250.67
Fruitful_BranKC120302405.014.00000012.000000190.000000251.330.67
Fruity_PebblesPC110111350.013.00000012.00000025.000000251.000.75
..........................................
Multi-Grain_CheeriosGC100212202.015.0000006.00000090.000000251.001.00
Nut&Honey_CrunchKC120211900.015.0000009.00000040.000000251.000.67
Nutri-Grain_Almond-RaisinKC140322203.021.0000007.000000130.000000251.330.67
Nutri-grain_WheatKC90301703.018.0000002.00000090.000000251.001.00
Oatmeal_Raisin_CrispGC130321701.513.50000010.000000120.000000251.250.50
Post_Nat._Raisin_BranPC120312006.011.00000014.000000260.000000251.330.67
Product_19KC100303201.020.0000003.00000045.0000001001.001.00
Puffed_RiceQC501000.013.0000000.00000015.00000000.501.00
Puffed_WheatQC502001.010.0000000.00000050.00000000.501.00
Quaker_Oat_SquaresQC100411352.014.0000006.000000110.000000251.000.50
Quaker_OatmealQH1005202.714.8026327.026316110.00000001.000.67
Raisin_BranKC120312105.014.00000012.000000240.000000251.330.75
Raisin_Nut_BranGC100321402.510.5000008.000000140.000000251.000.50
Raisin_SquaresKC902002.015.0000006.000000110.000000251.000.50
Rice_ChexRC110102400.023.0000002.00000030.000000251.001.13
Rice_KrispiesKC110202900.022.0000003.00000035.000000251.001.00
Shredded_WheatNC802003.016.0000000.00000095.00000000.831.00
Shredded_Wheat_'n'BranNC903004.019.0000000.000000140.00000001.000.67
Shredded_Wheat_spoon_sizeNC903003.020.0000000.000000120.00000001.000.67
SmacksKC11021701.09.00000015.00000040.000000251.000.75
Special_KKC110602301.016.0000003.00000055.000000251.001.00
Strawberry_Fruit_WheatsNC9020153.015.0000005.00000090.000000251.001.00
Total_Corn_FlakesGC110212000.021.0000003.00000035.0000001001.001.00
Total_Raisin_BranGC140311904.015.00000014.000000230.0000001001.501.00
Total_Whole_GrainGC100312003.016.0000003.000000110.0000001001.001.00
TriplesGC110212500.021.0000003.00000060.000000251.000.75
TrixGC110111400.013.00000012.00000025.000000251.001.00
Wheat_ChexRC100312303.017.0000003.000000115.000000251.000.67
WheatiesGC100312003.017.0000003.000000110.000000251.001.00
Wheaties_Honey_GoldGC110212001.016.0000008.00000060.000000251.000.75
\n", + "

77 rows × 13 columns

\n", + "
" + ], + "text/plain": [ + " mfr type calories protein fat \\\n", + "name \n", + "100%_Bran N C 70 4 1 \n", + "100%_Natural_Bran Q C 120 3 5 \n", + "All-Bran K C 70 4 1 \n", + "All-Bran_with_Extra_Fiber K C 50 4 0 \n", + "Almond_Delight R C 110 2 2 \n", + "Apple_Cinnamon_Cheerios G C 110 2 2 \n", + "Apple_Jacks K C 110 2 0 \n", + "Basic_4 G C 130 3 2 \n", + "Bran_Chex R C 90 2 1 \n", + "Bran_Flakes P C 90 3 0 \n", + "Cap'n'Crunch Q C 120 1 2 \n", + "Cheerios G C 110 6 2 \n", + "Cinnamon_Toast_Crunch G C 120 1 3 \n", + "Clusters G C 110 3 2 \n", + "Cocoa_Puffs G C 110 1 1 \n", + "Corn_Chex R C 110 2 0 \n", + "Corn_Flakes K C 100 2 0 \n", + "Corn_Pops K C 110 1 0 \n", + "Count_Chocula G C 110 1 1 \n", + "Cracklin'_Oat_Bran K C 110 3 3 \n", + "Cream_of_Wheat_(Quick) N H 100 3 0 \n", + "Crispix K C 110 2 0 \n", + "Crispy_Wheat_&_Raisins G C 100 2 1 \n", + "Double_Chex R C 100 2 0 \n", + "Froot_Loops K C 110 2 1 \n", + "Frosted_Flakes K C 110 1 0 \n", + "Frosted_Mini-Wheats K C 100 3 0 \n", + "Fruit_&_Fibre_Dates,_Walnuts,_and_Oats P C 120 3 2 \n", + "Fruitful_Bran K C 120 3 0 \n", + "Fruity_Pebbles P C 110 1 1 \n", + "... .. ... ... ... ... \n", + "Multi-Grain_Cheerios G C 100 2 1 \n", + "Nut&Honey_Crunch K C 120 2 1 \n", + "Nutri-Grain_Almond-Raisin K C 140 3 2 \n", + "Nutri-grain_Wheat K C 90 3 0 \n", + "Oatmeal_Raisin_Crisp G C 130 3 2 \n", + "Post_Nat._Raisin_Bran P C 120 3 1 \n", + "Product_19 K C 100 3 0 \n", + "Puffed_Rice Q C 50 1 0 \n", + "Puffed_Wheat Q C 50 2 0 \n", + "Quaker_Oat_Squares Q C 100 4 1 \n", + "Quaker_Oatmeal Q H 100 5 2 \n", + "Raisin_Bran K C 120 3 1 \n", + "Raisin_Nut_Bran G C 100 3 2 \n", + "Raisin_Squares K C 90 2 0 \n", + "Rice_Chex R C 110 1 0 \n", + "Rice_Krispies K C 110 2 0 \n", + "Shredded_Wheat N C 80 2 0 \n", + "Shredded_Wheat_'n'Bran N C 90 3 0 \n", + "Shredded_Wheat_spoon_size N C 90 3 0 \n", + "Smacks K C 110 2 1 \n", + "Special_K K C 110 6 0 \n", + "Strawberry_Fruit_Wheats N C 90 2 0 \n", + "Total_Corn_Flakes G C 110 2 1 \n", + "Total_Raisin_Bran G C 140 3 1 \n", + "Total_Whole_Grain G C 100 3 1 \n", + "Triples G C 110 2 1 \n", + "Trix G C 110 1 1 \n", + "Wheat_Chex R C 100 3 1 \n", + "Wheaties G C 100 3 1 \n", + "Wheaties_Honey_Gold G C 110 2 1 \n", + "\n", + " sodium fiber carbo sugars \\\n", + "name \n", + "100%_Bran 130 10.0 5.000000 6.000000 \n", + "100%_Natural_Bran 15 2.0 8.000000 8.000000 \n", + "All-Bran 260 9.0 7.000000 5.000000 \n", + "All-Bran_with_Extra_Fiber 140 14.0 8.000000 0.000000 \n", + "Almond_Delight 200 1.0 14.000000 8.000000 \n", + "Apple_Cinnamon_Cheerios 180 1.5 10.500000 10.000000 \n", + "Apple_Jacks 125 1.0 11.000000 14.000000 \n", + "Basic_4 210 2.0 18.000000 8.000000 \n", + "Bran_Chex 200 4.0 15.000000 6.000000 \n", + "Bran_Flakes 210 5.0 13.000000 5.000000 \n", + "Cap'n'Crunch 220 0.0 12.000000 12.000000 \n", + "Cheerios 290 2.0 17.000000 1.000000 \n", + "Cinnamon_Toast_Crunch 210 0.0 13.000000 9.000000 \n", + "Clusters 140 2.0 13.000000 7.000000 \n", + "Cocoa_Puffs 180 0.0 12.000000 13.000000 \n", + "Corn_Chex 280 0.0 22.000000 3.000000 \n", + "Corn_Flakes 290 1.0 21.000000 2.000000 \n", + "Corn_Pops 90 1.0 13.000000 12.000000 \n", + "Count_Chocula 180 0.0 12.000000 13.000000 \n", + "Cracklin'_Oat_Bran 140 4.0 10.000000 7.000000 \n", + "Cream_of_Wheat_(Quick) 80 1.0 21.000000 0.000000 \n", + "Crispix 220 1.0 21.000000 3.000000 \n", + "Crispy_Wheat_&_Raisins 140 2.0 11.000000 10.000000 \n", + "Double_Chex 190 1.0 18.000000 5.000000 \n", + "Froot_Loops 125 1.0 11.000000 13.000000 \n", + "Frosted_Flakes 200 1.0 14.000000 11.000000 \n", + "Frosted_Mini-Wheats 0 3.0 14.000000 7.000000 \n", + "Fruit_&_Fibre_Dates,_Walnuts,_and_Oats 160 5.0 12.000000 10.000000 \n", + "Fruitful_Bran 240 5.0 14.000000 12.000000 \n", + "Fruity_Pebbles 135 0.0 13.000000 12.000000 \n", + "... ... ... ... ... \n", + "Multi-Grain_Cheerios 220 2.0 15.000000 6.000000 \n", + "Nut&Honey_Crunch 190 0.0 15.000000 9.000000 \n", + "Nutri-Grain_Almond-Raisin 220 3.0 21.000000 7.000000 \n", + "Nutri-grain_Wheat 170 3.0 18.000000 2.000000 \n", + "Oatmeal_Raisin_Crisp 170 1.5 13.500000 10.000000 \n", + "Post_Nat._Raisin_Bran 200 6.0 11.000000 14.000000 \n", + "Product_19 320 1.0 20.000000 3.000000 \n", + "Puffed_Rice 0 0.0 13.000000 0.000000 \n", + "Puffed_Wheat 0 1.0 10.000000 0.000000 \n", + "Quaker_Oat_Squares 135 2.0 14.000000 6.000000 \n", + "Quaker_Oatmeal 0 2.7 14.802632 7.026316 \n", + "Raisin_Bran 210 5.0 14.000000 12.000000 \n", + "Raisin_Nut_Bran 140 2.5 10.500000 8.000000 \n", + "Raisin_Squares 0 2.0 15.000000 6.000000 \n", + "Rice_Chex 240 0.0 23.000000 2.000000 \n", + "Rice_Krispies 290 0.0 22.000000 3.000000 \n", + "Shredded_Wheat 0 3.0 16.000000 0.000000 \n", + "Shredded_Wheat_'n'Bran 0 4.0 19.000000 0.000000 \n", + "Shredded_Wheat_spoon_size 0 3.0 20.000000 0.000000 \n", + "Smacks 70 1.0 9.000000 15.000000 \n", + "Special_K 230 1.0 16.000000 3.000000 \n", + "Strawberry_Fruit_Wheats 15 3.0 15.000000 5.000000 \n", + "Total_Corn_Flakes 200 0.0 21.000000 3.000000 \n", + "Total_Raisin_Bran 190 4.0 15.000000 14.000000 \n", + "Total_Whole_Grain 200 3.0 16.000000 3.000000 \n", + "Triples 250 0.0 21.000000 3.000000 \n", + "Trix 140 0.0 13.000000 12.000000 \n", + "Wheat_Chex 230 3.0 17.000000 3.000000 \n", + "Wheaties 200 3.0 17.000000 3.000000 \n", + "Wheaties_Honey_Gold 200 1.0 16.000000 8.000000 \n", + "\n", + " potass vitamins weight cups \n", + "name \n", + "100%_Bran 280.000000 25 1.00 0.33 \n", + "100%_Natural_Bran 135.000000 0 1.00 1.00 \n", + "All-Bran 320.000000 25 1.00 0.33 \n", + "All-Bran_with_Extra_Fiber 330.000000 25 1.00 0.50 \n", + "Almond_Delight 98.666667 25 1.00 0.75 \n", + "Apple_Cinnamon_Cheerios 70.000000 25 1.00 0.75 \n", + "Apple_Jacks 30.000000 25 1.00 1.00 \n", + "Basic_4 100.000000 25 1.33 0.75 \n", + "Bran_Chex 125.000000 25 1.00 0.67 \n", + "Bran_Flakes 190.000000 25 1.00 0.67 \n", + "Cap'n'Crunch 35.000000 25 1.00 0.75 \n", + "Cheerios 105.000000 25 1.00 1.25 \n", + "Cinnamon_Toast_Crunch 45.000000 25 1.00 0.75 \n", + "Clusters 105.000000 25 1.00 0.50 \n", + "Cocoa_Puffs 55.000000 25 1.00 1.00 \n", + "Corn_Chex 25.000000 25 1.00 1.00 \n", + "Corn_Flakes 35.000000 25 1.00 1.00 \n", + "Corn_Pops 20.000000 25 1.00 1.00 \n", + "Count_Chocula 65.000000 25 1.00 1.00 \n", + "Cracklin'_Oat_Bran 160.000000 25 1.00 0.50 \n", + "Cream_of_Wheat_(Quick) 98.666667 0 1.00 1.00 \n", + "Crispix 30.000000 25 1.00 1.00 \n", + "Crispy_Wheat_&_Raisins 120.000000 25 1.00 0.75 \n", + "Double_Chex 80.000000 25 1.00 0.75 \n", + "Froot_Loops 30.000000 25 1.00 1.00 \n", + "Frosted_Flakes 25.000000 25 1.00 0.75 \n", + "Frosted_Mini-Wheats 100.000000 25 1.00 0.80 \n", + "Fruit_&_Fibre_Dates,_Walnuts,_and_Oats 200.000000 25 1.25 0.67 \n", + "Fruitful_Bran 190.000000 25 1.33 0.67 \n", + "Fruity_Pebbles 25.000000 25 1.00 0.75 \n", + "... ... ... ... ... \n", + "Multi-Grain_Cheerios 90.000000 25 1.00 1.00 \n", + "Nut&Honey_Crunch 40.000000 25 1.00 0.67 \n", + "Nutri-Grain_Almond-Raisin 130.000000 25 1.33 0.67 \n", + "Nutri-grain_Wheat 90.000000 25 1.00 1.00 \n", + "Oatmeal_Raisin_Crisp 120.000000 25 1.25 0.50 \n", + "Post_Nat._Raisin_Bran 260.000000 25 1.33 0.67 \n", + "Product_19 45.000000 100 1.00 1.00 \n", + "Puffed_Rice 15.000000 0 0.50 1.00 \n", + "Puffed_Wheat 50.000000 0 0.50 1.00 \n", + "Quaker_Oat_Squares 110.000000 25 1.00 0.50 \n", + "Quaker_Oatmeal 110.000000 0 1.00 0.67 \n", + "Raisin_Bran 240.000000 25 1.33 0.75 \n", + "Raisin_Nut_Bran 140.000000 25 1.00 0.50 \n", + "Raisin_Squares 110.000000 25 1.00 0.50 \n", + "Rice_Chex 30.000000 25 1.00 1.13 \n", + "Rice_Krispies 35.000000 25 1.00 1.00 \n", + "Shredded_Wheat 95.000000 0 0.83 1.00 \n", + "Shredded_Wheat_'n'Bran 140.000000 0 1.00 0.67 \n", + "Shredded_Wheat_spoon_size 120.000000 0 1.00 0.67 \n", + "Smacks 40.000000 25 1.00 0.75 \n", + "Special_K 55.000000 25 1.00 1.00 \n", + "Strawberry_Fruit_Wheats 90.000000 25 1.00 1.00 \n", + "Total_Corn_Flakes 35.000000 100 1.00 1.00 \n", + "Total_Raisin_Bran 230.000000 100 1.50 1.00 \n", + "Total_Whole_Grain 110.000000 100 1.00 1.00 \n", + "Triples 60.000000 25 1.00 0.75 \n", + "Trix 25.000000 25 1.00 1.00 \n", + "Wheat_Chex 115.000000 25 1.00 0.67 \n", + "Wheaties 110.000000 25 1.00 1.00 \n", + "Wheaties_Honey_Gold 60.000000 25 1.00 0.75 \n", + "\n", + "[77 rows x 13 columns]" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "filled_in_df" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "filled_in_df_removed = filled_in_df.drop(['mfr', 'type'], 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": { + "collapsed": false, + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
caloriesproteinfatsodiumfibercarbosugarspotassvitaminsweightcups
name
100%_Bran70411301056280.0000002510.33
100%_Natural_Bran1203515288135.000000011.00
All-Bran7041260975320.0000002510.33
All-Bran_with_Extra_Fiber50401401480330.0000002510.50
Almond_Delight11022200114898.6666672510.75
\n", + "
" + ], + "text/plain": [ + " calories protein fat sodium fiber carbo \\\n", + "name \n", + "100%_Bran 70 4 1 130 10 5 \n", + "100%_Natural_Bran 120 3 5 15 2 8 \n", + "All-Bran 70 4 1 260 9 7 \n", + "All-Bran_with_Extra_Fiber 50 4 0 140 14 8 \n", + "Almond_Delight 110 2 2 200 1 14 \n", + "\n", + " sugars potass vitamins weight cups \n", + "name \n", + "100%_Bran 6 280.000000 25 1 0.33 \n", + "100%_Natural_Bran 8 135.000000 0 1 1.00 \n", + "All-Bran 5 320.000000 25 1 0.33 \n", + "All-Bran_with_Extra_Fiber 0 330.000000 25 1 0.50 \n", + "Almond_Delight 8 98.666667 25 1 0.75 " + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "filled_in_df_removed.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "calories int64\n", + "protein int64\n", + "fat int64\n", + "sodium int64\n", + "fiber float64\n", + "carbo float64\n", + "sugars float64\n", + "potass float64\n", + "vitamins int64\n", + "weight float64\n", + "cups float64\n", + "dtype: object" + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "filled_in_df_removed.dtypes" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#Normalizing:" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "test_mult = filled_in_df_removed*3" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
caloriesproteinfatsodiumfibercarbosugarspotassvitaminsweightcups
name
100%_Bran2101233903015188407530.99
100%_Natural_Bran3609154562424405033.00
All-Bran2101237802721159607530.99
All-Bran_with_Extra_Fiber150120420422409907531.50
Almond_Delight33066600342242967532.25
\n", + "
" + ], + "text/plain": [ + " calories protein fat sodium fiber carbo \\\n", + "name \n", + "100%_Bran 210 12 3 390 30 15 \n", + "100%_Natural_Bran 360 9 15 45 6 24 \n", + "All-Bran 210 12 3 780 27 21 \n", + "All-Bran_with_Extra_Fiber 150 12 0 420 42 24 \n", + "Almond_Delight 330 6 6 600 3 42 \n", + "\n", + " sugars potass vitamins weight cups \n", + "name \n", + "100%_Bran 18 840 75 3 0.99 \n", + "100%_Natural_Bran 24 405 0 3 3.00 \n", + "All-Bran 15 960 75 3 0.99 \n", + "All-Bran_with_Extra_Fiber 0 990 75 3 1.50 \n", + "Almond_Delight 24 296 75 3 2.25 " + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_mult.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.98999999999999999" + ] + }, + "execution_count": 82, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_mult.iloc[2, 10]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 89, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "def normalize(data_frame):\n", + " for i in range(len(data_frame)):\n", + " data_frame.iloc[i]*1/data_frame.iloc[i, 10]\n", + " return data_frame\n" + ] + }, + { + "cell_type": "code", + "execution_count": 90, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
caloriesproteinfatsodiumfibercarbosugarspotassvitaminsweightcups
name
100%_Bran704113010.05.0000006.000000280.000000251.000.33
100%_Natural_Bran12035152.08.0000008.000000135.00000001.001.00
All-Bran70412609.07.0000005.000000320.000000251.000.33
All-Bran_with_Extra_Fiber504014014.08.0000000.000000330.000000251.000.50
Almond_Delight110222001.014.0000008.00000098.666667251.000.75
Apple_Cinnamon_Cheerios110221801.510.50000010.00000070.000000251.000.75
Apple_Jacks110201251.011.00000014.00000030.000000251.001.00
Basic_4130322102.018.0000008.000000100.000000251.330.75
Bran_Chex90212004.015.0000006.000000125.000000251.000.67
Bran_Flakes90302105.013.0000005.000000190.000000251.000.67
Cap'n'Crunch120122200.012.00000012.00000035.000000251.000.75
Cheerios110622902.017.0000001.000000105.000000251.001.25
Cinnamon_Toast_Crunch120132100.013.0000009.00000045.000000251.000.75
Clusters110321402.013.0000007.000000105.000000251.000.50
Cocoa_Puffs110111800.012.00000013.00000055.000000251.001.00
Corn_Chex110202800.022.0000003.00000025.000000251.001.00
Corn_Flakes100202901.021.0000002.00000035.000000251.001.00
Corn_Pops11010901.013.00000012.00000020.000000251.001.00
Count_Chocula110111800.012.00000013.00000065.000000251.001.00
Cracklin'_Oat_Bran110331404.010.0000007.000000160.000000251.000.50
Cream_of_Wheat_(Quick)10030801.021.0000000.00000098.66666701.001.00
Crispix110202201.021.0000003.00000030.000000251.001.00
Crispy_Wheat_&_Raisins100211402.011.00000010.000000120.000000251.000.75
Double_Chex100201901.018.0000005.00000080.000000251.000.75
Froot_Loops110211251.011.00000013.00000030.000000251.001.00
Frosted_Flakes110102001.014.00000011.00000025.000000251.000.75
Frosted_Mini-Wheats1003003.014.0000007.000000100.000000251.000.80
Fruit_&_Fibre_Dates,_Walnuts,_and_Oats120321605.012.00000010.000000200.000000251.250.67
Fruitful_Bran120302405.014.00000012.000000190.000000251.330.67
Fruity_Pebbles110111350.013.00000012.00000025.000000251.000.75
....................................
Multi-Grain_Cheerios100212202.015.0000006.00000090.000000251.001.00
Nut&Honey_Crunch120211900.015.0000009.00000040.000000251.000.67
Nutri-Grain_Almond-Raisin140322203.021.0000007.000000130.000000251.330.67
Nutri-grain_Wheat90301703.018.0000002.00000090.000000251.001.00
Oatmeal_Raisin_Crisp130321701.513.50000010.000000120.000000251.250.50
Post_Nat._Raisin_Bran120312006.011.00000014.000000260.000000251.330.67
Product_19100303201.020.0000003.00000045.0000001001.001.00
Puffed_Rice501000.013.0000000.00000015.00000000.501.00
Puffed_Wheat502001.010.0000000.00000050.00000000.501.00
Quaker_Oat_Squares100411352.014.0000006.000000110.000000251.000.50
Quaker_Oatmeal1005202.714.8026327.026316110.00000001.000.67
Raisin_Bran120312105.014.00000012.000000240.000000251.330.75
Raisin_Nut_Bran100321402.510.5000008.000000140.000000251.000.50
Raisin_Squares902002.015.0000006.000000110.000000251.000.50
Rice_Chex110102400.023.0000002.00000030.000000251.001.13
Rice_Krispies110202900.022.0000003.00000035.000000251.001.00
Shredded_Wheat802003.016.0000000.00000095.00000000.831.00
Shredded_Wheat_'n'Bran903004.019.0000000.000000140.00000001.000.67
Shredded_Wheat_spoon_size903003.020.0000000.000000120.00000001.000.67
Smacks11021701.09.00000015.00000040.000000251.000.75
Special_K110602301.016.0000003.00000055.000000251.001.00
Strawberry_Fruit_Wheats9020153.015.0000005.00000090.000000251.001.00
Total_Corn_Flakes110212000.021.0000003.00000035.0000001001.001.00
Total_Raisin_Bran140311904.015.00000014.000000230.0000001001.501.00
Total_Whole_Grain100312003.016.0000003.000000110.0000001001.001.00
Triples110212500.021.0000003.00000060.000000251.000.75
Trix110111400.013.00000012.00000025.000000251.001.00
Wheat_Chex100312303.017.0000003.000000115.000000251.000.67
Wheaties100312003.017.0000003.000000110.000000251.001.00
Wheaties_Honey_Gold110212001.016.0000008.00000060.000000251.000.75
\n", + "

77 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " calories protein fat sodium fiber \\\n", + "name \n", + "100%_Bran 70 4 1 130 10.0 \n", + "100%_Natural_Bran 120 3 5 15 2.0 \n", + "All-Bran 70 4 1 260 9.0 \n", + "All-Bran_with_Extra_Fiber 50 4 0 140 14.0 \n", + "Almond_Delight 110 2 2 200 1.0 \n", + "Apple_Cinnamon_Cheerios 110 2 2 180 1.5 \n", + "Apple_Jacks 110 2 0 125 1.0 \n", + "Basic_4 130 3 2 210 2.0 \n", + "Bran_Chex 90 2 1 200 4.0 \n", + "Bran_Flakes 90 3 0 210 5.0 \n", + "Cap'n'Crunch 120 1 2 220 0.0 \n", + "Cheerios 110 6 2 290 2.0 \n", + "Cinnamon_Toast_Crunch 120 1 3 210 0.0 \n", + "Clusters 110 3 2 140 2.0 \n", + "Cocoa_Puffs 110 1 1 180 0.0 \n", + "Corn_Chex 110 2 0 280 0.0 \n", + "Corn_Flakes 100 2 0 290 1.0 \n", + "Corn_Pops 110 1 0 90 1.0 \n", + "Count_Chocula 110 1 1 180 0.0 \n", + "Cracklin'_Oat_Bran 110 3 3 140 4.0 \n", + "Cream_of_Wheat_(Quick) 100 3 0 80 1.0 \n", + "Crispix 110 2 0 220 1.0 \n", + "Crispy_Wheat_&_Raisins 100 2 1 140 2.0 \n", + "Double_Chex 100 2 0 190 1.0 \n", + "Froot_Loops 110 2 1 125 1.0 \n", + "Frosted_Flakes 110 1 0 200 1.0 \n", + "Frosted_Mini-Wheats 100 3 0 0 3.0 \n", + "Fruit_&_Fibre_Dates,_Walnuts,_and_Oats 120 3 2 160 5.0 \n", + "Fruitful_Bran 120 3 0 240 5.0 \n", + "Fruity_Pebbles 110 1 1 135 0.0 \n", + "... ... ... ... ... ... \n", + "Multi-Grain_Cheerios 100 2 1 220 2.0 \n", + "Nut&Honey_Crunch 120 2 1 190 0.0 \n", + "Nutri-Grain_Almond-Raisin 140 3 2 220 3.0 \n", + "Nutri-grain_Wheat 90 3 0 170 3.0 \n", + "Oatmeal_Raisin_Crisp 130 3 2 170 1.5 \n", + "Post_Nat._Raisin_Bran 120 3 1 200 6.0 \n", + "Product_19 100 3 0 320 1.0 \n", + "Puffed_Rice 50 1 0 0 0.0 \n", + "Puffed_Wheat 50 2 0 0 1.0 \n", + "Quaker_Oat_Squares 100 4 1 135 2.0 \n", + "Quaker_Oatmeal 100 5 2 0 2.7 \n", + "Raisin_Bran 120 3 1 210 5.0 \n", + "Raisin_Nut_Bran 100 3 2 140 2.5 \n", + "Raisin_Squares 90 2 0 0 2.0 \n", + "Rice_Chex 110 1 0 240 0.0 \n", + "Rice_Krispies 110 2 0 290 0.0 \n", + "Shredded_Wheat 80 2 0 0 3.0 \n", + "Shredded_Wheat_'n'Bran 90 3 0 0 4.0 \n", + "Shredded_Wheat_spoon_size 90 3 0 0 3.0 \n", + "Smacks 110 2 1 70 1.0 \n", + "Special_K 110 6 0 230 1.0 \n", + "Strawberry_Fruit_Wheats 90 2 0 15 3.0 \n", + "Total_Corn_Flakes 110 2 1 200 0.0 \n", + "Total_Raisin_Bran 140 3 1 190 4.0 \n", + "Total_Whole_Grain 100 3 1 200 3.0 \n", + "Triples 110 2 1 250 0.0 \n", + "Trix 110 1 1 140 0.0 \n", + "Wheat_Chex 100 3 1 230 3.0 \n", + "Wheaties 100 3 1 200 3.0 \n", + "Wheaties_Honey_Gold 110 2 1 200 1.0 \n", + "\n", + " carbo sugars potass \\\n", + "name \n", + "100%_Bran 5.000000 6.000000 280.000000 \n", + "100%_Natural_Bran 8.000000 8.000000 135.000000 \n", + "All-Bran 7.000000 5.000000 320.000000 \n", + "All-Bran_with_Extra_Fiber 8.000000 0.000000 330.000000 \n", + "Almond_Delight 14.000000 8.000000 98.666667 \n", + "Apple_Cinnamon_Cheerios 10.500000 10.000000 70.000000 \n", + "Apple_Jacks 11.000000 14.000000 30.000000 \n", + "Basic_4 18.000000 8.000000 100.000000 \n", + "Bran_Chex 15.000000 6.000000 125.000000 \n", + "Bran_Flakes 13.000000 5.000000 190.000000 \n", + "Cap'n'Crunch 12.000000 12.000000 35.000000 \n", + "Cheerios 17.000000 1.000000 105.000000 \n", + "Cinnamon_Toast_Crunch 13.000000 9.000000 45.000000 \n", + "Clusters 13.000000 7.000000 105.000000 \n", + "Cocoa_Puffs 12.000000 13.000000 55.000000 \n", + "Corn_Chex 22.000000 3.000000 25.000000 \n", + "Corn_Flakes 21.000000 2.000000 35.000000 \n", + "Corn_Pops 13.000000 12.000000 20.000000 \n", + "Count_Chocula 12.000000 13.000000 65.000000 \n", + "Cracklin'_Oat_Bran 10.000000 7.000000 160.000000 \n", + "Cream_of_Wheat_(Quick) 21.000000 0.000000 98.666667 \n", + "Crispix 21.000000 3.000000 30.000000 \n", + "Crispy_Wheat_&_Raisins 11.000000 10.000000 120.000000 \n", + "Double_Chex 18.000000 5.000000 80.000000 \n", + "Froot_Loops 11.000000 13.000000 30.000000 \n", + "Frosted_Flakes 14.000000 11.000000 25.000000 \n", + "Frosted_Mini-Wheats 14.000000 7.000000 100.000000 \n", + "Fruit_&_Fibre_Dates,_Walnuts,_and_Oats 12.000000 10.000000 200.000000 \n", + "Fruitful_Bran 14.000000 12.000000 190.000000 \n", + "Fruity_Pebbles 13.000000 12.000000 25.000000 \n", + "... ... ... ... \n", + "Multi-Grain_Cheerios 15.000000 6.000000 90.000000 \n", + "Nut&Honey_Crunch 15.000000 9.000000 40.000000 \n", + "Nutri-Grain_Almond-Raisin 21.000000 7.000000 130.000000 \n", + "Nutri-grain_Wheat 18.000000 2.000000 90.000000 \n", + "Oatmeal_Raisin_Crisp 13.500000 10.000000 120.000000 \n", + "Post_Nat._Raisin_Bran 11.000000 14.000000 260.000000 \n", + "Product_19 20.000000 3.000000 45.000000 \n", + "Puffed_Rice 13.000000 0.000000 15.000000 \n", + "Puffed_Wheat 10.000000 0.000000 50.000000 \n", + "Quaker_Oat_Squares 14.000000 6.000000 110.000000 \n", + "Quaker_Oatmeal 14.802632 7.026316 110.000000 \n", + "Raisin_Bran 14.000000 12.000000 240.000000 \n", + "Raisin_Nut_Bran 10.500000 8.000000 140.000000 \n", + "Raisin_Squares 15.000000 6.000000 110.000000 \n", + "Rice_Chex 23.000000 2.000000 30.000000 \n", + "Rice_Krispies 22.000000 3.000000 35.000000 \n", + "Shredded_Wheat 16.000000 0.000000 95.000000 \n", + "Shredded_Wheat_'n'Bran 19.000000 0.000000 140.000000 \n", + "Shredded_Wheat_spoon_size 20.000000 0.000000 120.000000 \n", + "Smacks 9.000000 15.000000 40.000000 \n", + "Special_K 16.000000 3.000000 55.000000 \n", + "Strawberry_Fruit_Wheats 15.000000 5.000000 90.000000 \n", + "Total_Corn_Flakes 21.000000 3.000000 35.000000 \n", + "Total_Raisin_Bran 15.000000 14.000000 230.000000 \n", + "Total_Whole_Grain 16.000000 3.000000 110.000000 \n", + "Triples 21.000000 3.000000 60.000000 \n", + "Trix 13.000000 12.000000 25.000000 \n", + "Wheat_Chex 17.000000 3.000000 115.000000 \n", + "Wheaties 17.000000 3.000000 110.000000 \n", + "Wheaties_Honey_Gold 16.000000 8.000000 60.000000 \n", + "\n", + " vitamins weight cups \n", + "name \n", + "100%_Bran 25 1.00 0.33 \n", + "100%_Natural_Bran 0 1.00 1.00 \n", + "All-Bran 25 1.00 0.33 \n", + "All-Bran_with_Extra_Fiber 25 1.00 0.50 \n", + "Almond_Delight 25 1.00 0.75 \n", + "Apple_Cinnamon_Cheerios 25 1.00 0.75 \n", + "Apple_Jacks 25 1.00 1.00 \n", + "Basic_4 25 1.33 0.75 \n", + "Bran_Chex 25 1.00 0.67 \n", + "Bran_Flakes 25 1.00 0.67 \n", + "Cap'n'Crunch 25 1.00 0.75 \n", + "Cheerios 25 1.00 1.25 \n", + "Cinnamon_Toast_Crunch 25 1.00 0.75 \n", + "Clusters 25 1.00 0.50 \n", + "Cocoa_Puffs 25 1.00 1.00 \n", + "Corn_Chex 25 1.00 1.00 \n", + "Corn_Flakes 25 1.00 1.00 \n", + "Corn_Pops 25 1.00 1.00 \n", + "Count_Chocula 25 1.00 1.00 \n", + "Cracklin'_Oat_Bran 25 1.00 0.50 \n", + "Cream_of_Wheat_(Quick) 0 1.00 1.00 \n", + "Crispix 25 1.00 1.00 \n", + "Crispy_Wheat_&_Raisins 25 1.00 0.75 \n", + "Double_Chex 25 1.00 0.75 \n", + "Froot_Loops 25 1.00 1.00 \n", + "Frosted_Flakes 25 1.00 0.75 \n", + "Frosted_Mini-Wheats 25 1.00 0.80 \n", + "Fruit_&_Fibre_Dates,_Walnuts,_and_Oats 25 1.25 0.67 \n", + "Fruitful_Bran 25 1.33 0.67 \n", + "Fruity_Pebbles 25 1.00 0.75 \n", + "... ... ... ... \n", + "Multi-Grain_Cheerios 25 1.00 1.00 \n", + "Nut&Honey_Crunch 25 1.00 0.67 \n", + "Nutri-Grain_Almond-Raisin 25 1.33 0.67 \n", + "Nutri-grain_Wheat 25 1.00 1.00 \n", + "Oatmeal_Raisin_Crisp 25 1.25 0.50 \n", + "Post_Nat._Raisin_Bran 25 1.33 0.67 \n", + "Product_19 100 1.00 1.00 \n", + "Puffed_Rice 0 0.50 1.00 \n", + "Puffed_Wheat 0 0.50 1.00 \n", + "Quaker_Oat_Squares 25 1.00 0.50 \n", + "Quaker_Oatmeal 0 1.00 0.67 \n", + "Raisin_Bran 25 1.33 0.75 \n", + "Raisin_Nut_Bran 25 1.00 0.50 \n", + "Raisin_Squares 25 1.00 0.50 \n", + "Rice_Chex 25 1.00 1.13 \n", + "Rice_Krispies 25 1.00 1.00 \n", + "Shredded_Wheat 0 0.83 1.00 \n", + "Shredded_Wheat_'n'Bran 0 1.00 0.67 \n", + "Shredded_Wheat_spoon_size 0 1.00 0.67 \n", + "Smacks 25 1.00 0.75 \n", + "Special_K 25 1.00 1.00 \n", + "Strawberry_Fruit_Wheats 25 1.00 1.00 \n", + "Total_Corn_Flakes 100 1.00 1.00 \n", + "Total_Raisin_Bran 100 1.50 1.00 \n", + "Total_Whole_Grain 100 1.00 1.00 \n", + "Triples 25 1.00 0.75 \n", + "Trix 25 1.00 1.00 \n", + "Wheat_Chex 25 1.00 0.67 \n", + "Wheaties 25 1.00 1.00 \n", + "Wheaties_Honey_Gold 25 1.00 0.75 \n", + "\n", + "[77 rows x 11 columns]" + ] + }, + "execution_count": 90, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "normalize(filled_in_df_removed)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.4.3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} From e157f672f6490914d85585397f8b89b42c083e8e Mon Sep 17 00:00:00 2001 From: Will Flowers Date: Tue, 9 Jun 2015 00:06:03 -0400 Subject: [PATCH 2/2] Initial attempt and some data manipulation --- Honey_nut_clusters.ipynb | 847 +++++++++++++++++++++++++++++++-------- 1 file changed, 677 insertions(+), 170 deletions(-) diff --git a/Honey_nut_clusters.ipynb b/Honey_nut_clusters.ipynb index 0a09e4b..e4a017b 100644 --- a/Honey_nut_clusters.ipynb +++ b/Honey_nut_clusters.ipynb @@ -2,19 +2,21 @@ "cells": [ { "cell_type": "code", - "execution_count": 39, + "execution_count": 46, "metadata": { - "collapsed": true + "collapsed": false }, "outputs": [], "source": [ "import pandas as pd\n", - "import numpy as np" + "import numpy as np\n", + "from sklearn.cluster import KMeans\n", + "from sklearn import preprocessing\n" ] }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 47, "metadata": { "collapsed": false }, @@ -34,7 +36,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 48, "metadata": { "collapsed": false }, @@ -45,7 +47,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 49, "metadata": { "collapsed": false }, @@ -207,7 +209,7 @@ "Almond_Delight 0.75 " ] }, - "execution_count": 42, + "execution_count": 49, "metadata": {}, "output_type": "execute_result" } @@ -218,7 +220,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 50, "metadata": { "collapsed": false }, @@ -241,7 +243,7 @@ "dtype: float64" ] }, - "execution_count": 43, + "execution_count": 50, "metadata": {}, "output_type": "execute_result" } @@ -252,7 +254,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 51, "metadata": { "collapsed": false }, @@ -263,7 +265,7 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 52, "metadata": { "collapsed": false }, @@ -287,6 +289,7 @@ " sugars\n", " potass\n", " vitamins\n", + " shelf\n", " weight\n", " cups\n", " \n", @@ -305,6 +308,7 @@ " \n", " \n", " \n", + " \n", " \n", " \n", " \n", @@ -321,6 +325,7 @@ " 6.000000\n", " 280.000000\n", " 25\n", + " 3\n", " 1.00\n", " 0.33\n", " \n", @@ -337,6 +342,7 @@ " 8.000000\n", " 135.000000\n", " 0\n", + " 3\n", " 1.00\n", " 1.00\n", " \n", @@ -353,6 +359,7 @@ " 5.000000\n", " 320.000000\n", " 25\n", + " 3\n", " 1.00\n", " 0.33\n", " \n", @@ -369,6 +376,7 @@ " 0.000000\n", " 330.000000\n", " 25\n", + " 3\n", " 1.00\n", " 0.50\n", " \n", @@ -385,6 +393,7 @@ " 8.000000\n", " 98.666667\n", " 25\n", + " 3\n", " 1.00\n", " 0.75\n", " \n", @@ -401,6 +410,7 @@ " 10.000000\n", " 70.000000\n", " 25\n", + " 1\n", " 1.00\n", " 0.75\n", " \n", @@ -417,6 +427,7 @@ " 14.000000\n", " 30.000000\n", " 25\n", + " 2\n", " 1.00\n", " 1.00\n", " \n", @@ -433,6 +444,7 @@ " 8.000000\n", " 100.000000\n", " 25\n", + " 3\n", " 1.33\n", " 0.75\n", " \n", @@ -449,6 +461,7 @@ " 6.000000\n", " 125.000000\n", " 25\n", + " 1\n", " 1.00\n", " 0.67\n", " \n", @@ -465,6 +478,7 @@ " 5.000000\n", " 190.000000\n", " 25\n", + " 3\n", " 1.00\n", " 0.67\n", " \n", @@ -481,6 +495,7 @@ " 12.000000\n", " 35.000000\n", " 25\n", + " 2\n", " 1.00\n", " 0.75\n", " \n", @@ -497,6 +512,7 @@ " 1.000000\n", " 105.000000\n", " 25\n", + " 1\n", " 1.00\n", " 1.25\n", " \n", @@ -513,6 +529,7 @@ " 9.000000\n", " 45.000000\n", " 25\n", + " 2\n", " 1.00\n", " 0.75\n", " \n", @@ -529,6 +546,7 @@ " 7.000000\n", " 105.000000\n", " 25\n", + " 3\n", " 1.00\n", " 0.50\n", " \n", @@ -545,6 +563,7 @@ " 13.000000\n", " 55.000000\n", " 25\n", + " 2\n", " 1.00\n", " 1.00\n", " \n", @@ -561,6 +580,7 @@ " 3.000000\n", " 25.000000\n", " 25\n", + " 1\n", " 1.00\n", " 1.00\n", " \n", @@ -577,6 +597,7 @@ " 2.000000\n", " 35.000000\n", " 25\n", + " 1\n", " 1.00\n", " 1.00\n", " \n", @@ -593,6 +614,7 @@ " 12.000000\n", " 20.000000\n", " 25\n", + " 2\n", " 1.00\n", " 1.00\n", " \n", @@ -609,6 +631,7 @@ " 13.000000\n", " 65.000000\n", " 25\n", + " 2\n", " 1.00\n", " 1.00\n", " \n", @@ -625,6 +648,7 @@ " 7.000000\n", " 160.000000\n", " 25\n", + " 3\n", " 1.00\n", " 0.50\n", " \n", @@ -641,6 +665,7 @@ " 0.000000\n", " 98.666667\n", " 0\n", + " 2\n", " 1.00\n", " 1.00\n", " \n", @@ -657,6 +682,7 @@ " 3.000000\n", " 30.000000\n", " 25\n", + " 3\n", " 1.00\n", " 1.00\n", " \n", @@ -673,6 +699,7 @@ " 10.000000\n", " 120.000000\n", " 25\n", + " 3\n", " 1.00\n", " 0.75\n", " \n", @@ -689,6 +716,7 @@ " 5.000000\n", " 80.000000\n", " 25\n", + " 3\n", " 1.00\n", " 0.75\n", " \n", @@ -705,6 +733,7 @@ " 13.000000\n", " 30.000000\n", " 25\n", + " 2\n", " 1.00\n", " 1.00\n", " \n", @@ -721,6 +750,7 @@ " 11.000000\n", " 25.000000\n", " 25\n", + " 1\n", " 1.00\n", " 0.75\n", " \n", @@ -737,6 +767,7 @@ " 7.000000\n", " 100.000000\n", " 25\n", + " 2\n", " 1.00\n", " 0.80\n", " \n", @@ -753,6 +784,7 @@ " 10.000000\n", " 200.000000\n", " 25\n", + " 3\n", " 1.25\n", " 0.67\n", " \n", @@ -769,6 +801,7 @@ " 12.000000\n", " 190.000000\n", " 25\n", + " 3\n", " 1.33\n", " 0.67\n", " \n", @@ -785,6 +818,7 @@ " 12.000000\n", " 25.000000\n", " 25\n", + " 2\n", " 1.00\n", " 0.75\n", " \n", @@ -803,6 +837,7 @@ " ...\n", " ...\n", " ...\n", + " ...\n", " \n", " \n", " Multi-Grain_Cheerios\n", @@ -817,6 +852,7 @@ " 6.000000\n", " 90.000000\n", " 25\n", + " 1\n", " 1.00\n", " 1.00\n", " \n", @@ -833,6 +869,7 @@ " 9.000000\n", " 40.000000\n", " 25\n", + " 2\n", " 1.00\n", " 0.67\n", " \n", @@ -849,6 +886,7 @@ " 7.000000\n", " 130.000000\n", " 25\n", + " 3\n", " 1.33\n", " 0.67\n", " \n", @@ -865,6 +903,7 @@ " 2.000000\n", " 90.000000\n", " 25\n", + " 3\n", " 1.00\n", " 1.00\n", " \n", @@ -881,6 +920,7 @@ " 10.000000\n", " 120.000000\n", " 25\n", + " 3\n", " 1.25\n", " 0.50\n", " \n", @@ -897,6 +937,7 @@ " 14.000000\n", " 260.000000\n", " 25\n", + " 3\n", " 1.33\n", " 0.67\n", " \n", @@ -913,6 +954,7 @@ " 3.000000\n", " 45.000000\n", " 100\n", + " 3\n", " 1.00\n", " 1.00\n", " \n", @@ -929,6 +971,7 @@ " 0.000000\n", " 15.000000\n", " 0\n", + " 3\n", " 0.50\n", " 1.00\n", " \n", @@ -945,6 +988,7 @@ " 0.000000\n", " 50.000000\n", " 0\n", + " 3\n", " 0.50\n", " 1.00\n", " \n", @@ -961,6 +1005,7 @@ " 6.000000\n", " 110.000000\n", " 25\n", + " 3\n", " 1.00\n", " 0.50\n", " \n", @@ -977,6 +1022,7 @@ " 7.026316\n", " 110.000000\n", " 0\n", + " 1\n", " 1.00\n", " 0.67\n", " \n", @@ -993,6 +1039,7 @@ " 12.000000\n", " 240.000000\n", " 25\n", + " 2\n", " 1.33\n", " 0.75\n", " \n", @@ -1009,6 +1056,7 @@ " 8.000000\n", " 140.000000\n", " 25\n", + " 3\n", " 1.00\n", " 0.50\n", " \n", @@ -1025,6 +1073,7 @@ " 6.000000\n", " 110.000000\n", " 25\n", + " 3\n", " 1.00\n", " 0.50\n", " \n", @@ -1041,6 +1090,7 @@ " 2.000000\n", " 30.000000\n", " 25\n", + " 1\n", " 1.00\n", " 1.13\n", " \n", @@ -1057,6 +1107,7 @@ " 3.000000\n", " 35.000000\n", " 25\n", + " 1\n", " 1.00\n", " 1.00\n", " \n", @@ -1073,6 +1124,7 @@ " 0.000000\n", " 95.000000\n", " 0\n", + " 1\n", " 0.83\n", " 1.00\n", " \n", @@ -1089,6 +1141,7 @@ " 0.000000\n", " 140.000000\n", " 0\n", + " 1\n", " 1.00\n", " 0.67\n", " \n", @@ -1105,6 +1158,7 @@ " 0.000000\n", " 120.000000\n", " 0\n", + " 1\n", " 1.00\n", " 0.67\n", " \n", @@ -1121,6 +1175,7 @@ " 15.000000\n", " 40.000000\n", " 25\n", + " 2\n", " 1.00\n", " 0.75\n", " \n", @@ -1137,6 +1192,7 @@ " 3.000000\n", " 55.000000\n", " 25\n", + " 1\n", " 1.00\n", " 1.00\n", " \n", @@ -1153,6 +1209,7 @@ " 5.000000\n", " 90.000000\n", " 25\n", + " 2\n", " 1.00\n", " 1.00\n", " \n", @@ -1169,6 +1226,7 @@ " 3.000000\n", " 35.000000\n", " 100\n", + " 3\n", " 1.00\n", " 1.00\n", " \n", @@ -1185,6 +1243,7 @@ " 14.000000\n", " 230.000000\n", " 100\n", + " 3\n", " 1.50\n", " 1.00\n", " \n", @@ -1201,6 +1260,7 @@ " 3.000000\n", " 110.000000\n", " 100\n", + " 3\n", " 1.00\n", " 1.00\n", " \n", @@ -1217,6 +1277,7 @@ " 3.000000\n", " 60.000000\n", " 25\n", + " 3\n", " 1.00\n", " 0.75\n", " \n", @@ -1233,6 +1294,7 @@ " 12.000000\n", " 25.000000\n", " 25\n", + " 2\n", " 1.00\n", " 1.00\n", " \n", @@ -1249,6 +1311,7 @@ " 3.000000\n", " 115.000000\n", " 25\n", + " 1\n", " 1.00\n", " 0.67\n", " \n", @@ -1265,6 +1328,7 @@ " 3.000000\n", " 110.000000\n", " 25\n", + " 1\n", " 1.00\n", " 1.00\n", " \n", @@ -1281,12 +1345,13 @@ " 8.000000\n", " 60.000000\n", " 25\n", + " 1\n", " 1.00\n", " 0.75\n", " \n", " \n", "\n", - "

77 rows × 13 columns

\n", + "

77 rows × 14 columns

\n", "" ], "text/plain": [ @@ -1418,74 +1483,138 @@ "Wheaties 200 3.0 17.000000 3.000000 \n", "Wheaties_Honey_Gold 200 1.0 16.000000 8.000000 \n", "\n", - " potass vitamins weight cups \n", - "name \n", - "100%_Bran 280.000000 25 1.00 0.33 \n", - "100%_Natural_Bran 135.000000 0 1.00 1.00 \n", - "All-Bran 320.000000 25 1.00 0.33 \n", - "All-Bran_with_Extra_Fiber 330.000000 25 1.00 0.50 \n", - "Almond_Delight 98.666667 25 1.00 0.75 \n", - "Apple_Cinnamon_Cheerios 70.000000 25 1.00 0.75 \n", - "Apple_Jacks 30.000000 25 1.00 1.00 \n", - "Basic_4 100.000000 25 1.33 0.75 \n", - "Bran_Chex 125.000000 25 1.00 0.67 \n", - "Bran_Flakes 190.000000 25 1.00 0.67 \n", - "Cap'n'Crunch 35.000000 25 1.00 0.75 \n", - "Cheerios 105.000000 25 1.00 1.25 \n", - "Cinnamon_Toast_Crunch 45.000000 25 1.00 0.75 \n", - "Clusters 105.000000 25 1.00 0.50 \n", - "Cocoa_Puffs 55.000000 25 1.00 1.00 \n", - "Corn_Chex 25.000000 25 1.00 1.00 \n", - "Corn_Flakes 35.000000 25 1.00 1.00 \n", - "Corn_Pops 20.000000 25 1.00 1.00 \n", - "Count_Chocula 65.000000 25 1.00 1.00 \n", - "Cracklin'_Oat_Bran 160.000000 25 1.00 0.50 \n", - "Cream_of_Wheat_(Quick) 98.666667 0 1.00 1.00 \n", - "Crispix 30.000000 25 1.00 1.00 \n", - "Crispy_Wheat_&_Raisins 120.000000 25 1.00 0.75 \n", - "Double_Chex 80.000000 25 1.00 0.75 \n", - "Froot_Loops 30.000000 25 1.00 1.00 \n", - "Frosted_Flakes 25.000000 25 1.00 0.75 \n", - "Frosted_Mini-Wheats 100.000000 25 1.00 0.80 \n", - "Fruit_&_Fibre_Dates,_Walnuts,_and_Oats 200.000000 25 1.25 0.67 \n", - "Fruitful_Bran 190.000000 25 1.33 0.67 \n", - "Fruity_Pebbles 25.000000 25 1.00 0.75 \n", - "... ... ... ... ... \n", - "Multi-Grain_Cheerios 90.000000 25 1.00 1.00 \n", - "Nut&Honey_Crunch 40.000000 25 1.00 0.67 \n", - "Nutri-Grain_Almond-Raisin 130.000000 25 1.33 0.67 \n", - "Nutri-grain_Wheat 90.000000 25 1.00 1.00 \n", - "Oatmeal_Raisin_Crisp 120.000000 25 1.25 0.50 \n", - "Post_Nat._Raisin_Bran 260.000000 25 1.33 0.67 \n", - "Product_19 45.000000 100 1.00 1.00 \n", - "Puffed_Rice 15.000000 0 0.50 1.00 \n", - "Puffed_Wheat 50.000000 0 0.50 1.00 \n", - "Quaker_Oat_Squares 110.000000 25 1.00 0.50 \n", - "Quaker_Oatmeal 110.000000 0 1.00 0.67 \n", - "Raisin_Bran 240.000000 25 1.33 0.75 \n", - "Raisin_Nut_Bran 140.000000 25 1.00 0.50 \n", - "Raisin_Squares 110.000000 25 1.00 0.50 \n", - "Rice_Chex 30.000000 25 1.00 1.13 \n", - "Rice_Krispies 35.000000 25 1.00 1.00 \n", - "Shredded_Wheat 95.000000 0 0.83 1.00 \n", - "Shredded_Wheat_'n'Bran 140.000000 0 1.00 0.67 \n", - "Shredded_Wheat_spoon_size 120.000000 0 1.00 0.67 \n", - "Smacks 40.000000 25 1.00 0.75 \n", - "Special_K 55.000000 25 1.00 1.00 \n", - "Strawberry_Fruit_Wheats 90.000000 25 1.00 1.00 \n", - "Total_Corn_Flakes 35.000000 100 1.00 1.00 \n", - "Total_Raisin_Bran 230.000000 100 1.50 1.00 \n", - "Total_Whole_Grain 110.000000 100 1.00 1.00 \n", - "Triples 60.000000 25 1.00 0.75 \n", - "Trix 25.000000 25 1.00 1.00 \n", - "Wheat_Chex 115.000000 25 1.00 0.67 \n", - "Wheaties 110.000000 25 1.00 1.00 \n", - "Wheaties_Honey_Gold 60.000000 25 1.00 0.75 \n", + " potass vitamins shelf weight \\\n", + "name \n", + "100%_Bran 280.000000 25 3 1.00 \n", + "100%_Natural_Bran 135.000000 0 3 1.00 \n", + "All-Bran 320.000000 25 3 1.00 \n", + "All-Bran_with_Extra_Fiber 330.000000 25 3 1.00 \n", + "Almond_Delight 98.666667 25 3 1.00 \n", + "Apple_Cinnamon_Cheerios 70.000000 25 1 1.00 \n", + "Apple_Jacks 30.000000 25 2 1.00 \n", + "Basic_4 100.000000 25 3 1.33 \n", + "Bran_Chex 125.000000 25 1 1.00 \n", + "Bran_Flakes 190.000000 25 3 1.00 \n", + "Cap'n'Crunch 35.000000 25 2 1.00 \n", + "Cheerios 105.000000 25 1 1.00 \n", + "Cinnamon_Toast_Crunch 45.000000 25 2 1.00 \n", + "Clusters 105.000000 25 3 1.00 \n", + "Cocoa_Puffs 55.000000 25 2 1.00 \n", + "Corn_Chex 25.000000 25 1 1.00 \n", + "Corn_Flakes 35.000000 25 1 1.00 \n", + "Corn_Pops 20.000000 25 2 1.00 \n", + "Count_Chocula 65.000000 25 2 1.00 \n", + "Cracklin'_Oat_Bran 160.000000 25 3 1.00 \n", + "Cream_of_Wheat_(Quick) 98.666667 0 2 1.00 \n", + "Crispix 30.000000 25 3 1.00 \n", + "Crispy_Wheat_&_Raisins 120.000000 25 3 1.00 \n", + "Double_Chex 80.000000 25 3 1.00 \n", + "Froot_Loops 30.000000 25 2 1.00 \n", + "Frosted_Flakes 25.000000 25 1 1.00 \n", + "Frosted_Mini-Wheats 100.000000 25 2 1.00 \n", + "Fruit_&_Fibre_Dates,_Walnuts,_and_Oats 200.000000 25 3 1.25 \n", + "Fruitful_Bran 190.000000 25 3 1.33 \n", + "Fruity_Pebbles 25.000000 25 2 1.00 \n", + "... ... ... ... ... \n", + "Multi-Grain_Cheerios 90.000000 25 1 1.00 \n", + "Nut&Honey_Crunch 40.000000 25 2 1.00 \n", + "Nutri-Grain_Almond-Raisin 130.000000 25 3 1.33 \n", + "Nutri-grain_Wheat 90.000000 25 3 1.00 \n", + "Oatmeal_Raisin_Crisp 120.000000 25 3 1.25 \n", + "Post_Nat._Raisin_Bran 260.000000 25 3 1.33 \n", + "Product_19 45.000000 100 3 1.00 \n", + "Puffed_Rice 15.000000 0 3 0.50 \n", + "Puffed_Wheat 50.000000 0 3 0.50 \n", + "Quaker_Oat_Squares 110.000000 25 3 1.00 \n", + "Quaker_Oatmeal 110.000000 0 1 1.00 \n", + "Raisin_Bran 240.000000 25 2 1.33 \n", + "Raisin_Nut_Bran 140.000000 25 3 1.00 \n", + "Raisin_Squares 110.000000 25 3 1.00 \n", + "Rice_Chex 30.000000 25 1 1.00 \n", + "Rice_Krispies 35.000000 25 1 1.00 \n", + "Shredded_Wheat 95.000000 0 1 0.83 \n", + "Shredded_Wheat_'n'Bran 140.000000 0 1 1.00 \n", + "Shredded_Wheat_spoon_size 120.000000 0 1 1.00 \n", + "Smacks 40.000000 25 2 1.00 \n", + "Special_K 55.000000 25 1 1.00 \n", + "Strawberry_Fruit_Wheats 90.000000 25 2 1.00 \n", + "Total_Corn_Flakes 35.000000 100 3 1.00 \n", + "Total_Raisin_Bran 230.000000 100 3 1.50 \n", + "Total_Whole_Grain 110.000000 100 3 1.00 \n", + "Triples 60.000000 25 3 1.00 \n", + "Trix 25.000000 25 2 1.00 \n", + "Wheat_Chex 115.000000 25 1 1.00 \n", + "Wheaties 110.000000 25 1 1.00 \n", + "Wheaties_Honey_Gold 60.000000 25 1 1.00 \n", + "\n", + " cups \n", + "name \n", + "100%_Bran 0.33 \n", + "100%_Natural_Bran 1.00 \n", + "All-Bran 0.33 \n", + "All-Bran_with_Extra_Fiber 0.50 \n", + "Almond_Delight 0.75 \n", + "Apple_Cinnamon_Cheerios 0.75 \n", + "Apple_Jacks 1.00 \n", + "Basic_4 0.75 \n", + "Bran_Chex 0.67 \n", + "Bran_Flakes 0.67 \n", + "Cap'n'Crunch 0.75 \n", + "Cheerios 1.25 \n", + "Cinnamon_Toast_Crunch 0.75 \n", + "Clusters 0.50 \n", + "Cocoa_Puffs 1.00 \n", + "Corn_Chex 1.00 \n", + "Corn_Flakes 1.00 \n", + "Corn_Pops 1.00 \n", + "Count_Chocula 1.00 \n", + "Cracklin'_Oat_Bran 0.50 \n", + "Cream_of_Wheat_(Quick) 1.00 \n", + "Crispix 1.00 \n", + "Crispy_Wheat_&_Raisins 0.75 \n", + "Double_Chex 0.75 \n", + "Froot_Loops 1.00 \n", + "Frosted_Flakes 0.75 \n", + "Frosted_Mini-Wheats 0.80 \n", + "Fruit_&_Fibre_Dates,_Walnuts,_and_Oats 0.67 \n", + "Fruitful_Bran 0.67 \n", + "Fruity_Pebbles 0.75 \n", + "... ... \n", + "Multi-Grain_Cheerios 1.00 \n", + "Nut&Honey_Crunch 0.67 \n", + "Nutri-Grain_Almond-Raisin 0.67 \n", + "Nutri-grain_Wheat 1.00 \n", + "Oatmeal_Raisin_Crisp 0.50 \n", + "Post_Nat._Raisin_Bran 0.67 \n", + "Product_19 1.00 \n", + "Puffed_Rice 1.00 \n", + "Puffed_Wheat 1.00 \n", + "Quaker_Oat_Squares 0.50 \n", + "Quaker_Oatmeal 0.67 \n", + "Raisin_Bran 0.75 \n", + "Raisin_Nut_Bran 0.50 \n", + "Raisin_Squares 0.50 \n", + "Rice_Chex 1.13 \n", + "Rice_Krispies 1.00 \n", + "Shredded_Wheat 1.00 \n", + "Shredded_Wheat_'n'Bran 0.67 \n", + "Shredded_Wheat_spoon_size 0.67 \n", + "Smacks 0.75 \n", + "Special_K 1.00 \n", + "Strawberry_Fruit_Wheats 1.00 \n", + "Total_Corn_Flakes 1.00 \n", + "Total_Raisin_Bran 1.00 \n", + "Total_Whole_Grain 1.00 \n", + "Triples 0.75 \n", + "Trix 1.00 \n", + "Wheat_Chex 0.67 \n", + "Wheaties 1.00 \n", + "Wheaties_Honey_Gold 0.75 \n", "\n", - "[77 rows x 13 columns]" + "[77 rows x 14 columns]" ] }, - "execution_count": 56, + "execution_count": 52, "metadata": {}, "output_type": "execute_result" } @@ -1496,7 +1625,7 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 53, "metadata": { "collapsed": false }, @@ -1507,7 +1636,7 @@ }, { "cell_type": "code", - "execution_count": 58, + "execution_count": 54, "metadata": { "collapsed": false, "scrolled": true @@ -1530,6 +1659,7 @@ " sugars\n", " potass\n", " vitamins\n", + " shelf\n", " weight\n", " cups\n", " \n", @@ -1546,6 +1676,7 @@ " \n", " \n", " \n", + " \n", " \n", " \n", " \n", @@ -1560,6 +1691,7 @@ " 6\n", " 280.000000\n", " 25\n", + " 3\n", " 1\n", " 0.33\n", " \n", @@ -1574,6 +1706,7 @@ " 8\n", " 135.000000\n", " 0\n", + " 3\n", " 1\n", " 1.00\n", " \n", @@ -1588,6 +1721,7 @@ " 5\n", " 320.000000\n", " 25\n", + " 3\n", " 1\n", " 0.33\n", " \n", @@ -1602,6 +1736,7 @@ " 0\n", " 330.000000\n", " 25\n", + " 3\n", " 1\n", " 0.50\n", " \n", @@ -1616,6 +1751,7 @@ " 8\n", " 98.666667\n", " 25\n", + " 3\n", " 1\n", " 0.75\n", " \n", @@ -1632,16 +1768,16 @@ "All-Bran_with_Extra_Fiber 50 4 0 140 14 8 \n", "Almond_Delight 110 2 2 200 1 14 \n", "\n", - " sugars potass vitamins weight cups \n", - "name \n", - "100%_Bran 6 280.000000 25 1 0.33 \n", - "100%_Natural_Bran 8 135.000000 0 1 1.00 \n", - "All-Bran 5 320.000000 25 1 0.33 \n", - "All-Bran_with_Extra_Fiber 0 330.000000 25 1 0.50 \n", - "Almond_Delight 8 98.666667 25 1 0.75 " + " sugars potass vitamins shelf weight cups \n", + "name \n", + "100%_Bran 6 280.000000 25 3 1 0.33 \n", + "100%_Natural_Bran 8 135.000000 0 3 1 1.00 \n", + "All-Bran 5 320.000000 25 3 1 0.33 \n", + "All-Bran_with_Extra_Fiber 0 330.000000 25 3 1 0.50 \n", + "Almond_Delight 8 98.666667 25 3 1 0.75 " ] }, - "execution_count": 58, + "execution_count": 54, "metadata": {}, "output_type": "execute_result" } @@ -1652,7 +1788,7 @@ }, { "cell_type": "code", - "execution_count": 59, + "execution_count": 55, "metadata": { "collapsed": false }, @@ -1669,12 +1805,13 @@ "sugars float64\n", "potass float64\n", "vitamins int64\n", + "shelf int64\n", "weight float64\n", "cups float64\n", "dtype: object" ] }, - "execution_count": 59, + "execution_count": 55, "metadata": {}, "output_type": "execute_result" } @@ -1692,7 +1829,7 @@ }, { "cell_type": "code", - "execution_count": 70, + "execution_count": 56, "metadata": { "collapsed": true }, @@ -1703,7 +1840,7 @@ }, { "cell_type": "code", - "execution_count": 71, + "execution_count": 57, "metadata": { "collapsed": false }, @@ -1725,6 +1862,7 @@ " sugars\n", " potass\n", " vitamins\n", + " shelf\n", " weight\n", " cups\n", " \n", @@ -1741,6 +1879,7 @@ " \n", " \n", " \n", + " \n", " \n", " \n", " \n", @@ -1755,6 +1894,7 @@ " 18\n", " 840\n", " 75\n", + " 9\n", " 3\n", " 0.99\n", " \n", @@ -1769,6 +1909,7 @@ " 24\n", " 405\n", " 0\n", + " 9\n", " 3\n", " 3.00\n", " \n", @@ -1783,6 +1924,7 @@ " 15\n", " 960\n", " 75\n", + " 9\n", " 3\n", " 0.99\n", " \n", @@ -1797,6 +1939,7 @@ " 0\n", " 990\n", " 75\n", + " 9\n", " 3\n", " 1.50\n", " \n", @@ -1811,6 +1954,7 @@ " 24\n", " 296\n", " 75\n", + " 9\n", " 3\n", " 2.25\n", " \n", @@ -1827,16 +1971,16 @@ "All-Bran_with_Extra_Fiber 150 12 0 420 42 24 \n", "Almond_Delight 330 6 6 600 3 42 \n", "\n", - " sugars potass vitamins weight cups \n", - "name \n", - "100%_Bran 18 840 75 3 0.99 \n", - "100%_Natural_Bran 24 405 0 3 3.00 \n", - "All-Bran 15 960 75 3 0.99 \n", - "All-Bran_with_Extra_Fiber 0 990 75 3 1.50 \n", - "Almond_Delight 24 296 75 3 2.25 " + " sugars potass vitamins shelf weight cups \n", + "name \n", + "100%_Bran 18 840 75 9 3 0.99 \n", + "100%_Natural_Bran 24 405 0 9 3 3.00 \n", + "All-Bran 15 960 75 9 3 0.99 \n", + "All-Bran_with_Extra_Fiber 0 990 75 9 3 1.50 \n", + "Almond_Delight 24 296 75 9 3 2.25 " ] }, - "execution_count": 71, + "execution_count": 57, "metadata": {}, "output_type": "execute_result" } @@ -1847,7 +1991,7 @@ }, { "cell_type": "code", - "execution_count": 82, + "execution_count": 58, "metadata": { "collapsed": false }, @@ -1855,10 +1999,10 @@ { "data": { "text/plain": [ - "0.98999999999999999" + "3.0" ] }, - "execution_count": 82, + "execution_count": 58, "metadata": {}, "output_type": "execute_result" } @@ -1878,7 +2022,7 @@ }, { "cell_type": "code", - "execution_count": 89, + "execution_count": 59, "metadata": { "collapsed": false }, @@ -1892,7 +2036,7 @@ }, { "cell_type": "code", - "execution_count": 90, + "execution_count": 60, "metadata": { "collapsed": false }, @@ -1914,6 +2058,7 @@ " sugars\n", " potass\n", " vitamins\n", + " shelf\n", " weight\n", " cups\n", " \n", @@ -1930,6 +2075,7 @@ " \n", " \n", " \n", + " \n", " \n", " \n", " \n", @@ -1944,6 +2090,7 @@ " 6.000000\n", " 280.000000\n", " 25\n", + " 3\n", " 1.00\n", " 0.33\n", " \n", @@ -1958,6 +2105,7 @@ " 8.000000\n", " 135.000000\n", " 0\n", + " 3\n", " 1.00\n", " 1.00\n", " \n", @@ -1972,6 +2120,7 @@ " 5.000000\n", " 320.000000\n", " 25\n", + " 3\n", " 1.00\n", " 0.33\n", " \n", @@ -1986,6 +2135,7 @@ " 0.000000\n", " 330.000000\n", " 25\n", + " 3\n", " 1.00\n", " 0.50\n", " \n", @@ -2000,6 +2150,7 @@ " 8.000000\n", " 98.666667\n", " 25\n", + " 3\n", " 1.00\n", " 0.75\n", " \n", @@ -2014,6 +2165,7 @@ " 10.000000\n", " 70.000000\n", " 25\n", + " 1\n", " 1.00\n", " 0.75\n", " \n", @@ -2028,6 +2180,7 @@ " 14.000000\n", " 30.000000\n", " 25\n", + " 2\n", " 1.00\n", " 1.00\n", " \n", @@ -2042,6 +2195,7 @@ " 8.000000\n", " 100.000000\n", " 25\n", + " 3\n", " 1.33\n", " 0.75\n", " \n", @@ -2056,6 +2210,7 @@ " 6.000000\n", " 125.000000\n", " 25\n", + " 1\n", " 1.00\n", " 0.67\n", " \n", @@ -2070,6 +2225,7 @@ " 5.000000\n", " 190.000000\n", " 25\n", + " 3\n", " 1.00\n", " 0.67\n", " \n", @@ -2084,6 +2240,7 @@ " 12.000000\n", " 35.000000\n", " 25\n", + " 2\n", " 1.00\n", " 0.75\n", " \n", @@ -2098,6 +2255,7 @@ " 1.000000\n", " 105.000000\n", " 25\n", + " 1\n", " 1.00\n", " 1.25\n", " \n", @@ -2112,6 +2270,7 @@ " 9.000000\n", " 45.000000\n", " 25\n", + " 2\n", " 1.00\n", " 0.75\n", " \n", @@ -2126,6 +2285,7 @@ " 7.000000\n", " 105.000000\n", " 25\n", + " 3\n", " 1.00\n", " 0.50\n", " \n", @@ -2140,6 +2300,7 @@ " 13.000000\n", " 55.000000\n", " 25\n", + " 2\n", " 1.00\n", " 1.00\n", " \n", @@ -2154,6 +2315,7 @@ " 3.000000\n", " 25.000000\n", " 25\n", + " 1\n", " 1.00\n", " 1.00\n", " \n", @@ -2168,6 +2330,7 @@ " 2.000000\n", " 35.000000\n", " 25\n", + " 1\n", " 1.00\n", " 1.00\n", " \n", @@ -2182,6 +2345,7 @@ " 12.000000\n", " 20.000000\n", " 25\n", + " 2\n", " 1.00\n", " 1.00\n", " \n", @@ -2196,6 +2360,7 @@ " 13.000000\n", " 65.000000\n", " 25\n", + " 2\n", " 1.00\n", " 1.00\n", " \n", @@ -2210,6 +2375,7 @@ " 7.000000\n", " 160.000000\n", " 25\n", + " 3\n", " 1.00\n", " 0.50\n", " \n", @@ -2224,6 +2390,7 @@ " 0.000000\n", " 98.666667\n", " 0\n", + " 2\n", " 1.00\n", " 1.00\n", " \n", @@ -2238,6 +2405,7 @@ " 3.000000\n", " 30.000000\n", " 25\n", + " 3\n", " 1.00\n", " 1.00\n", " \n", @@ -2252,6 +2420,7 @@ " 10.000000\n", " 120.000000\n", " 25\n", + " 3\n", " 1.00\n", " 0.75\n", " \n", @@ -2266,6 +2435,7 @@ " 5.000000\n", " 80.000000\n", " 25\n", + " 3\n", " 1.00\n", " 0.75\n", " \n", @@ -2280,6 +2450,7 @@ " 13.000000\n", " 30.000000\n", " 25\n", + " 2\n", " 1.00\n", " 1.00\n", " \n", @@ -2294,6 +2465,7 @@ " 11.000000\n", " 25.000000\n", " 25\n", + " 1\n", " 1.00\n", " 0.75\n", " \n", @@ -2308,6 +2480,7 @@ " 7.000000\n", " 100.000000\n", " 25\n", + " 2\n", " 1.00\n", " 0.80\n", " \n", @@ -2322,6 +2495,7 @@ " 10.000000\n", " 200.000000\n", " 25\n", + " 3\n", " 1.25\n", " 0.67\n", " \n", @@ -2336,6 +2510,7 @@ " 12.000000\n", " 190.000000\n", " 25\n", + " 3\n", " 1.33\n", " 0.67\n", " \n", @@ -2350,6 +2525,7 @@ " 12.000000\n", " 25.000000\n", " 25\n", + " 2\n", " 1.00\n", " 0.75\n", " \n", @@ -2366,6 +2542,7 @@ " ...\n", " ...\n", " ...\n", + " ...\n", " \n", " \n", " Multi-Grain_Cheerios\n", @@ -2378,6 +2555,7 @@ " 6.000000\n", " 90.000000\n", " 25\n", + " 1\n", " 1.00\n", " 1.00\n", " \n", @@ -2392,6 +2570,7 @@ " 9.000000\n", " 40.000000\n", " 25\n", + " 2\n", " 1.00\n", " 0.67\n", " \n", @@ -2406,6 +2585,7 @@ " 7.000000\n", " 130.000000\n", " 25\n", + " 3\n", " 1.33\n", " 0.67\n", " \n", @@ -2420,6 +2600,7 @@ " 2.000000\n", " 90.000000\n", " 25\n", + " 3\n", " 1.00\n", " 1.00\n", " \n", @@ -2434,6 +2615,7 @@ " 10.000000\n", " 120.000000\n", " 25\n", + " 3\n", " 1.25\n", " 0.50\n", " \n", @@ -2448,6 +2630,7 @@ " 14.000000\n", " 260.000000\n", " 25\n", + " 3\n", " 1.33\n", " 0.67\n", " \n", @@ -2462,6 +2645,7 @@ " 3.000000\n", " 45.000000\n", " 100\n", + " 3\n", " 1.00\n", " 1.00\n", " \n", @@ -2476,6 +2660,7 @@ " 0.000000\n", " 15.000000\n", " 0\n", + " 3\n", " 0.50\n", " 1.00\n", " \n", @@ -2490,6 +2675,7 @@ " 0.000000\n", " 50.000000\n", " 0\n", + " 3\n", " 0.50\n", " 1.00\n", " \n", @@ -2504,6 +2690,7 @@ " 6.000000\n", " 110.000000\n", " 25\n", + " 3\n", " 1.00\n", " 0.50\n", " \n", @@ -2518,6 +2705,7 @@ " 7.026316\n", " 110.000000\n", " 0\n", + " 1\n", " 1.00\n", " 0.67\n", " \n", @@ -2532,6 +2720,7 @@ " 12.000000\n", " 240.000000\n", " 25\n", + " 2\n", " 1.33\n", " 0.75\n", " \n", @@ -2546,6 +2735,7 @@ " 8.000000\n", " 140.000000\n", " 25\n", + " 3\n", " 1.00\n", " 0.50\n", " \n", @@ -2560,6 +2750,7 @@ " 6.000000\n", " 110.000000\n", " 25\n", + " 3\n", " 1.00\n", " 0.50\n", " \n", @@ -2574,6 +2765,7 @@ " 2.000000\n", " 30.000000\n", " 25\n", + " 1\n", " 1.00\n", " 1.13\n", " \n", @@ -2588,6 +2780,7 @@ " 3.000000\n", " 35.000000\n", " 25\n", + " 1\n", " 1.00\n", " 1.00\n", " \n", @@ -2602,6 +2795,7 @@ " 0.000000\n", " 95.000000\n", " 0\n", + " 1\n", " 0.83\n", " 1.00\n", " \n", @@ -2616,6 +2810,7 @@ " 0.000000\n", " 140.000000\n", " 0\n", + " 1\n", " 1.00\n", " 0.67\n", " \n", @@ -2630,6 +2825,7 @@ " 0.000000\n", " 120.000000\n", " 0\n", + " 1\n", " 1.00\n", " 0.67\n", " \n", @@ -2644,6 +2840,7 @@ " 15.000000\n", " 40.000000\n", " 25\n", + " 2\n", " 1.00\n", " 0.75\n", " \n", @@ -2658,6 +2855,7 @@ " 3.000000\n", " 55.000000\n", " 25\n", + " 1\n", " 1.00\n", " 1.00\n", " \n", @@ -2672,6 +2870,7 @@ " 5.000000\n", " 90.000000\n", " 25\n", + " 2\n", " 1.00\n", " 1.00\n", " \n", @@ -2686,6 +2885,7 @@ " 3.000000\n", " 35.000000\n", " 100\n", + " 3\n", " 1.00\n", " 1.00\n", " \n", @@ -2700,6 +2900,7 @@ " 14.000000\n", " 230.000000\n", " 100\n", + " 3\n", " 1.50\n", " 1.00\n", " \n", @@ -2714,6 +2915,7 @@ " 3.000000\n", " 110.000000\n", " 100\n", + " 3\n", " 1.00\n", " 1.00\n", " \n", @@ -2728,6 +2930,7 @@ " 3.000000\n", " 60.000000\n", " 25\n", + " 3\n", " 1.00\n", " 0.75\n", " \n", @@ -2742,6 +2945,7 @@ " 12.000000\n", " 25.000000\n", " 25\n", + " 2\n", " 1.00\n", " 1.00\n", " \n", @@ -2756,6 +2960,7 @@ " 3.000000\n", " 115.000000\n", " 25\n", + " 1\n", " 1.00\n", " 0.67\n", " \n", @@ -2770,6 +2975,7 @@ " 3.000000\n", " 110.000000\n", " 25\n", + " 1\n", " 1.00\n", " 1.00\n", " \n", @@ -2784,12 +2990,13 @@ " 8.000000\n", " 60.000000\n", " 25\n", + " 1\n", " 1.00\n", " 0.75\n", " \n", " \n", "\n", - "

77 rows × 11 columns

\n", + "

77 rows × 12 columns

\n", "" ], "text/plain": [ @@ -2921,74 +3128,74 @@ "Wheaties 17.000000 3.000000 110.000000 \n", "Wheaties_Honey_Gold 16.000000 8.000000 60.000000 \n", "\n", - " vitamins weight cups \n", - "name \n", - "100%_Bran 25 1.00 0.33 \n", - "100%_Natural_Bran 0 1.00 1.00 \n", - "All-Bran 25 1.00 0.33 \n", - "All-Bran_with_Extra_Fiber 25 1.00 0.50 \n", - "Almond_Delight 25 1.00 0.75 \n", - "Apple_Cinnamon_Cheerios 25 1.00 0.75 \n", - "Apple_Jacks 25 1.00 1.00 \n", - "Basic_4 25 1.33 0.75 \n", - "Bran_Chex 25 1.00 0.67 \n", - "Bran_Flakes 25 1.00 0.67 \n", - "Cap'n'Crunch 25 1.00 0.75 \n", - "Cheerios 25 1.00 1.25 \n", - "Cinnamon_Toast_Crunch 25 1.00 0.75 \n", - "Clusters 25 1.00 0.50 \n", - "Cocoa_Puffs 25 1.00 1.00 \n", - "Corn_Chex 25 1.00 1.00 \n", - "Corn_Flakes 25 1.00 1.00 \n", - "Corn_Pops 25 1.00 1.00 \n", - "Count_Chocula 25 1.00 1.00 \n", - "Cracklin'_Oat_Bran 25 1.00 0.50 \n", - "Cream_of_Wheat_(Quick) 0 1.00 1.00 \n", - "Crispix 25 1.00 1.00 \n", - "Crispy_Wheat_&_Raisins 25 1.00 0.75 \n", - "Double_Chex 25 1.00 0.75 \n", - "Froot_Loops 25 1.00 1.00 \n", - "Frosted_Flakes 25 1.00 0.75 \n", - "Frosted_Mini-Wheats 25 1.00 0.80 \n", - "Fruit_&_Fibre_Dates,_Walnuts,_and_Oats 25 1.25 0.67 \n", - "Fruitful_Bran 25 1.33 0.67 \n", - "Fruity_Pebbles 25 1.00 0.75 \n", - "... ... ... ... \n", - "Multi-Grain_Cheerios 25 1.00 1.00 \n", - "Nut&Honey_Crunch 25 1.00 0.67 \n", - "Nutri-Grain_Almond-Raisin 25 1.33 0.67 \n", - "Nutri-grain_Wheat 25 1.00 1.00 \n", - "Oatmeal_Raisin_Crisp 25 1.25 0.50 \n", - "Post_Nat._Raisin_Bran 25 1.33 0.67 \n", - "Product_19 100 1.00 1.00 \n", - "Puffed_Rice 0 0.50 1.00 \n", - "Puffed_Wheat 0 0.50 1.00 \n", - "Quaker_Oat_Squares 25 1.00 0.50 \n", - "Quaker_Oatmeal 0 1.00 0.67 \n", - "Raisin_Bran 25 1.33 0.75 \n", - "Raisin_Nut_Bran 25 1.00 0.50 \n", - "Raisin_Squares 25 1.00 0.50 \n", - "Rice_Chex 25 1.00 1.13 \n", - "Rice_Krispies 25 1.00 1.00 \n", - "Shredded_Wheat 0 0.83 1.00 \n", - "Shredded_Wheat_'n'Bran 0 1.00 0.67 \n", - "Shredded_Wheat_spoon_size 0 1.00 0.67 \n", - "Smacks 25 1.00 0.75 \n", - "Special_K 25 1.00 1.00 \n", - "Strawberry_Fruit_Wheats 25 1.00 1.00 \n", - "Total_Corn_Flakes 100 1.00 1.00 \n", - "Total_Raisin_Bran 100 1.50 1.00 \n", - "Total_Whole_Grain 100 1.00 1.00 \n", - "Triples 25 1.00 0.75 \n", - "Trix 25 1.00 1.00 \n", - "Wheat_Chex 25 1.00 0.67 \n", - "Wheaties 25 1.00 1.00 \n", - "Wheaties_Honey_Gold 25 1.00 0.75 \n", + " vitamins shelf weight cups \n", + "name \n", + "100%_Bran 25 3 1.00 0.33 \n", + "100%_Natural_Bran 0 3 1.00 1.00 \n", + "All-Bran 25 3 1.00 0.33 \n", + "All-Bran_with_Extra_Fiber 25 3 1.00 0.50 \n", + "Almond_Delight 25 3 1.00 0.75 \n", + "Apple_Cinnamon_Cheerios 25 1 1.00 0.75 \n", + "Apple_Jacks 25 2 1.00 1.00 \n", + "Basic_4 25 3 1.33 0.75 \n", + "Bran_Chex 25 1 1.00 0.67 \n", + "Bran_Flakes 25 3 1.00 0.67 \n", + "Cap'n'Crunch 25 2 1.00 0.75 \n", + "Cheerios 25 1 1.00 1.25 \n", + "Cinnamon_Toast_Crunch 25 2 1.00 0.75 \n", + "Clusters 25 3 1.00 0.50 \n", + "Cocoa_Puffs 25 2 1.00 1.00 \n", + "Corn_Chex 25 1 1.00 1.00 \n", + "Corn_Flakes 25 1 1.00 1.00 \n", + "Corn_Pops 25 2 1.00 1.00 \n", + "Count_Chocula 25 2 1.00 1.00 \n", + "Cracklin'_Oat_Bran 25 3 1.00 0.50 \n", + "Cream_of_Wheat_(Quick) 0 2 1.00 1.00 \n", + "Crispix 25 3 1.00 1.00 \n", + "Crispy_Wheat_&_Raisins 25 3 1.00 0.75 \n", + "Double_Chex 25 3 1.00 0.75 \n", + "Froot_Loops 25 2 1.00 1.00 \n", + "Frosted_Flakes 25 1 1.00 0.75 \n", + "Frosted_Mini-Wheats 25 2 1.00 0.80 \n", + "Fruit_&_Fibre_Dates,_Walnuts,_and_Oats 25 3 1.25 0.67 \n", + "Fruitful_Bran 25 3 1.33 0.67 \n", + "Fruity_Pebbles 25 2 1.00 0.75 \n", + "... ... ... ... ... \n", + "Multi-Grain_Cheerios 25 1 1.00 1.00 \n", + "Nut&Honey_Crunch 25 2 1.00 0.67 \n", + "Nutri-Grain_Almond-Raisin 25 3 1.33 0.67 \n", + "Nutri-grain_Wheat 25 3 1.00 1.00 \n", + "Oatmeal_Raisin_Crisp 25 3 1.25 0.50 \n", + "Post_Nat._Raisin_Bran 25 3 1.33 0.67 \n", + "Product_19 100 3 1.00 1.00 \n", + "Puffed_Rice 0 3 0.50 1.00 \n", + "Puffed_Wheat 0 3 0.50 1.00 \n", + "Quaker_Oat_Squares 25 3 1.00 0.50 \n", + "Quaker_Oatmeal 0 1 1.00 0.67 \n", + "Raisin_Bran 25 2 1.33 0.75 \n", + "Raisin_Nut_Bran 25 3 1.00 0.50 \n", + "Raisin_Squares 25 3 1.00 0.50 \n", + "Rice_Chex 25 1 1.00 1.13 \n", + "Rice_Krispies 25 1 1.00 1.00 \n", + "Shredded_Wheat 0 1 0.83 1.00 \n", + "Shredded_Wheat_'n'Bran 0 1 1.00 0.67 \n", + "Shredded_Wheat_spoon_size 0 1 1.00 0.67 \n", + "Smacks 25 2 1.00 0.75 \n", + "Special_K 25 1 1.00 1.00 \n", + "Strawberry_Fruit_Wheats 25 2 1.00 1.00 \n", + "Total_Corn_Flakes 100 3 1.00 1.00 \n", + "Total_Raisin_Bran 100 3 1.50 1.00 \n", + "Total_Whole_Grain 100 3 1.00 1.00 \n", + "Triples 25 3 1.00 0.75 \n", + "Trix 25 2 1.00 1.00 \n", + "Wheat_Chex 25 1 1.00 0.67 \n", + "Wheaties 25 1 1.00 1.00 \n", + "Wheaties_Honey_Gold 25 1 1.00 0.75 \n", "\n", - "[77 rows x 11 columns]" + "[77 rows x 12 columns]" ] }, - "execution_count": 90, + "execution_count": 60, "metadata": {}, "output_type": "execute_result" } @@ -2997,6 +3204,306 @@ "normalize(filled_in_df_removed)" ] }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "ename": "SyntaxError", + "evalue": "invalid character in identifier (, line 1)", + "output_type": "error", + "traceback": [ + "\u001b[0;36m File \u001b[0;32m\"\"\u001b[0;36m, line \u001b[0;32m1\u001b[0m\n\u001b[0;31m df.div(df.cups, axis=“index”)`\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid character in identifier\n" + ] + } + ], + "source": [ + "df.div(df.cups, axis=“index”)`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "norm_df= filled_in_df_removed.div(df.cups, axis=\"index\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "norm_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "processed = preprocessing.scale(norm_df)" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 1.14734117, 3.42380788, 0.97072001, 1.32384857, 4.72937882,\n", + " -0.52840366, 1.51635962, 4.19746977, 1.45991832, 2.76273944,\n", + " 2.67555812, 0. ])" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "processed[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "KMeans(copy_x=True, init='k-means++', max_iter=300, n_clusters=3, n_init=10,\n", + " n_jobs=1, precompute_distances='auto', random_state=None, tol=0.0001,\n", + " verbose=0)" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "k_means_method = KMeans(3)\n", + "k_means_method.fit(processed)" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "three_clusters = k_means_method.predict(processed)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1, 0, 1, 1, 0, 0, 0, 2, 0, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0,\n", + " 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0,\n", + " 2, 0, 2, 2, 0, 2, 2, 0, 0, 0, 2, 0, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 2, 0, 0, 0, 0, 0, 0], dtype=int32)" + ] + }, + "execution_count": 80, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "three_clusters\n" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "four_k_means = KMeans(4)\n", + "four_k_means.fit(processed)\n", + "four_clusters = four_k_means.predict(processed)" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "five_k_means = KMeans(5)\n", + "five_k_means.fit(processed)\n", + "five_clusters = five_k_means.predict(processed)" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "six_k_means = KMeans(6)\n", + "six_k_means.fit(processed)\n", + "six_clusters = six_k_means.predict(processed)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 90, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import matplotlib\n", + "import numpy as np\n", + "import math\n", + "\n", + "from scipy.spatial import Voronoi, voronoi_plot_2d\n", + "%matplotlib inline\n", + "from sklearn.decomposition import PCA as PCA" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "ename": "TypeError", + "evalue": "'KMeans' object is not iterable", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mcereal_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpreprocessing\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mscale\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnorm_df\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mcentroids\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mclusters\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mKMeans\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprocessed\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m: 'KMeans' object is not iterable" + ] + } + ], + "source": [ + "cereal_data = preprocessing.scale(norm_df)\n", + "centroids, clusters = KMeans(processed, 3)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#Plotting (attempt)" + ] + }, + { + "cell_type": "code", + "execution_count": 93, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "ename": "ValueError", + "evalue": "operands could not be broadcast together with shapes (1,77) (12,) ", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mpca\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mPCA\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn_components\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnorm_df\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;31m#pca = processed\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mcentroids_2d\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpca\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtransform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mthree_clusters\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 6\u001b[0m \u001b[0mcolors\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m\"#E2415F\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"#7FAF1B\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"#090129\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0mmarkers\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m\"v\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"D\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"s\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Users/willflowers/Documents/The_Iron_Yard/python-assignments/honey-nut-clusters/.direnv/python-3.4.3/lib/python3.4/site-packages/sklearn/decomposition/pca.py\u001b[0m in \u001b[0;36mtransform\u001b[0;34m(self, X)\u001b[0m\n\u001b[1;32m 386\u001b[0m \u001b[0mX\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcheck_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 387\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean_\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 388\u001b[0;31m \u001b[0mX\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mX\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean_\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 389\u001b[0m \u001b[0mX_transformed\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfast_dot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcomponents_\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mT\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 390\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwhiten\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: operands could not be broadcast together with shapes (1,77) (12,) " + ] + } + ], + "source": [ + "\n", + "pca = PCA(n_components=2).fit(cereals_data)\n", + "centroids_2d = pca.transform(centroids)\n", + "colors = [\"#E2415F\", \"#7FAF1B\", \"#090129\"]\n", + "markers = [\"v\", \"D\", \"s\"]\n", + "\n", + "plt.figure(figsize=(8, 6))\n", + "axes = plt.subplot(1,1,1)\n", + "\n", + "for i, cluster in enumerate(clusters):\n", + " cluster = np.array(cluster)\n", + " cluster_2d = pca.transform(cluster)\n", + " plt.scatter(cluster_2d[:,0], cluster_2d[:,1], color=colors[i])\n", + "\n", + "plt.scatter(centroids_2d[:, 0], centroids_2d[:, 1], color='g', alpha='0.5', marker='o', s=250)\n", + "\n", + "polys = voronoi_polygons(centroids_2d)\n", + "\n", + "for poly in polys:\n", + " p = matplotlib.patches.Polygon(poly, facecolor=np.random.rand(3,1), alpha=0.1)\n", + " axes.add_patch(p)\n", + " \n", + "plt.title(\"Iris dataset with three clusters\")\n", + "plt.show()\n" + ] + }, { "cell_type": "code", "execution_count": null,