From f10a94884e597931052145526a3ddc006d90f74b Mon Sep 17 00:00:00 2001
From: Bill Duncan <wdduncan@gmail.com>
Date: Wed, 7 Nov 2018 22:53:53 -0500
Subject: [PATCH] update notebook

---
 src/analysis/surival_01.ipynb | 1216 +++++++--------------------------
 1 file changed, 259 insertions(+), 957 deletions(-)
diff --git a/src/analysis/surival_01.ipynb b/src/analysis/surival_01.ipynb
index 0111215..4cef3db 100644
--- a/src/analysis/surival_01.ipynb
+++ b/src/analysis/surival_01.ipynb
@@ -75,13 +75,6 @@
     "# df_tooth_surface_procedures.head() # good"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
   {
    "cell_type": "code",
    "execution_count": 6,
@@ -111,13 +104,6 @@
     "len(df_patients_teeth) # 1,231,726"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
   {
    "cell_type": "code",
    "execution_count": 8,
@@ -159,13 +145,6 @@
     "plt.show()"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
   {
    "cell_type": "code",
    "execution_count": 10,
@@ -234,225 +213,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 198,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>tooth_id</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>tooth_num</th>\n",
-       "      <th></th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>953</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>2098</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>1831</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>1373</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>1226</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>624</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>842</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>740</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9</th>\n",
-       "      <td>772</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>10</th>\n",
-       "      <td>817</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>11</th>\n",
-       "      <td>603</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>12</th>\n",
-       "      <td>1226</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>13</th>\n",
-       "      <td>1354</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>14</th>\n",
-       "      <td>1819</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>15</th>\n",
-       "      <td>2275</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>16</th>\n",
-       "      <td>962</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>17</th>\n",
-       "      <td>898</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>18</th>\n",
-       "      <td>2055</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>19</th>\n",
-       "      <td>1941</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>20</th>\n",
-       "      <td>1022</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>21</th>\n",
-       "      <td>551</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>22</th>\n",
-       "      <td>236</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>23</th>\n",
-       "      <td>321</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>24</th>\n",
-       "      <td>336</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>25</th>\n",
-       "      <td>368</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>26</th>\n",
-       "      <td>316</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>27</th>\n",
-       "      <td>240</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>28</th>\n",
-       "      <td>598</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>29</th>\n",
-       "      <td>1051</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>30</th>\n",
-       "      <td>1968</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>31</th>\n",
-       "      <td>2186</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>32</th>\n",
-       "      <td>875</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "           tooth_id\n",
-       "tooth_num          \n",
-       "1               953\n",
-       "2              2098\n",
-       "3              1831\n",
-       "4              1373\n",
-       "5              1226\n",
-       "6               624\n",
-       "7               842\n",
-       "8               740\n",
-       "9               772\n",
-       "10              817\n",
-       "11              603\n",
-       "12             1226\n",
-       "13             1354\n",
-       "14             1819\n",
-       "15             2275\n",
-       "16              962\n",
-       "17              898\n",
-       "18             2055\n",
-       "19             1941\n",
-       "20             1022\n",
-       "21              551\n",
-       "22              236\n",
-       "23              321\n",
-       "24              336\n",
-       "25              368\n",
-       "26              316\n",
-       "27              240\n",
-       "28              598\n",
-       "29             1051\n",
-       "30             1968\n",
-       "31             2186\n",
-       "32              875"
-      ]
-     },
-     "execution_count": 14,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "df_extracted = df_obs[df_obs.extract_date.notnull()][['tooth_id','tooth_num']]\n",
     "# df_extracted = df_obs[df_obs.extract_date.notna()][['tooth_id','tooth_num']]\n",
     "df_extracted.tooth_num = pds.to_numeric(df_extracted.tooth_num)\n",
     "df_extracted.drop_duplicates(inplace=True)\n",
-    "df_extracted.head()\n",
-    "df_extracted.groupby('tooth_num').count()\n",
+    "# df_extracted.head()\n",
+    "# df_extracted.groupby('tooth_num').count()\n",
     "# len(df_extracted) # isnull -> 1,617,783; isna -> 1,617,783; notnull -> 41,331; notna -> 41,331"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 199,
    "metadata": {},
    "outputs": [
     {
@@ -472,48 +248,6 @@
       "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
       "  # Remove the CWD from sys.path while we load stuff.\n"
      ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "15    2275\n",
-       "31    2186\n",
-       "2     2098\n",
-       "18    2055\n",
-       "30    1968\n",
-       "19    1941\n",
-       "3     1831\n",
-       "14    1819\n",
-       "4     1373\n",
-       "13    1354\n",
-       "5     1226\n",
-       "12    1226\n",
-       "29    1051\n",
-       "20    1022\n",
-       "16     962\n",
-       "1      953\n",
-       "17     898\n",
-       "32     875\n",
-       "7      842\n",
-       "10     817\n",
-       "9      772\n",
-       "8      740\n",
-       "6      624\n",
-       "11     603\n",
-       "28     598\n",
-       "21     551\n",
-       "25     368\n",
-       "24     336\n",
-       "23     321\n",
-       "26     316\n",
-       "27     240\n",
-       "22     236\n",
-       "Name: tooth_num, dtype: int64"
-      ]
-     },
-     "execution_count": 15,
-     "metadata": {},
-     "output_type": "execute_result"
     }
    ],
    "source": [
@@ -527,7 +261,7 @@
     "print(len(temp3))\n",
     "# temp3.groupby('tooth_num').size()\n",
     "temp3.drop_duplicates(inplace=True)\n",
-    "temp3.tooth_num.value_counts()\n",
+    "# temp3.tooth_num.value_counts()\n",
     "# print(len(temp3))\n",
     "# temp2.head()\n",
     "# temp3.groupby('tooth_num').size()\n",
@@ -539,100 +273,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 200,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>tooth_num</th>\n",
-       "      <th>tooth_id</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>275</th>\n",
-       "      <td>1</td>\n",
-       "      <td>A_1_1_1_1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1084</th>\n",
-       "      <td>1</td>\n",
-       "      <td>A_1_1_149_1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8836</th>\n",
-       "      <td>1</td>\n",
-       "      <td>A_1_1_2946_1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>13571</th>\n",
-       "      <td>1</td>\n",
-       "      <td>A_1_1_3302_1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>15481</th>\n",
-       "      <td>1</td>\n",
-       "      <td>A_1_1_3046_1</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "      tooth_num      tooth_id\n",
-       "275           1     A_1_1_1_1\n",
-       "1084          1   A_1_1_149_1\n",
-       "8836          1  A_1_1_2946_1\n",
-       "13571         1  A_1_1_3302_1\n",
-       "15481         1  A_1_1_3046_1"
-      ]
-     },
-     "execution_count": 16,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "temp4 = temp3.query(\"tooth_num == '1'\")\n",
-    "temp4.head()"
+    "# temp4 = temp3.query(\"tooth_num == '1'\")\n",
+    "# temp4.head()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 201,
    "metadata": {},
    "outputs": [
     {
@@ -642,7 +293,7 @@
       "Index(['practice', 'patient_id', 'gender', 'dob', 'first_visit', 'last_visit',\n",
       "       'tooth_id', 'tooth_num', 'first_PCR', 'first_RCT', 'extract_date',\n",
       "       'missing_date', 'event_id', 'event_name', 'event_date', 'ada_code', 'm',\n",
-      "       'o', 'd', 'b', 'l', 'f', 'i'],\n",
+      "       'o', 'd', 'b', 'l', 'f', 'i', 'patient_age', 'missing_flag'],\n",
       "      dtype='object')\n"
      ]
     }
@@ -726,27 +377,6 @@
     "# df_demographics.head()"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
   {
    "cell_type": "code",
    "execution_count": 23,
@@ -858,161 +488,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 202,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/usr/local/anaconda3/lib/python3.6/site-packages/pandas/core/generic.py:4405: SettingWithCopyWarning: \n",
-      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
-      "Try using .loc[row_indexer,col_indexer] = value instead\n",
-      "\n",
-      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
-      "  self[name] = value\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>tooth_id</th>\n",
-       "      <th>tooth_num</th>\n",
-       "      <th>ada_code</th>\n",
-       "      <th>event_name</th>\n",
-       "      <th>m</th>\n",
-       "      <th>o</th>\n",
-       "      <th>d</th>\n",
-       "      <th>b</th>\n",
-       "      <th>l</th>\n",
-       "      <th>f</th>\n",
-       "      <th>i</th>\n",
-       "      <th>num_restored_surfaces</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>10</th>\n",
-       "      <td>A_1_1_155_15</td>\n",
-       "      <td>15</td>\n",
-       "      <td>D2150</td>\n",
-       "      <td>amalgam filling restoration procedure</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>2.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>14</th>\n",
-       "      <td>A_1_1_155_31</td>\n",
-       "      <td>31</td>\n",
-       "      <td>D2391</td>\n",
-       "      <td>resin filling restoration procedure</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>1.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>18</th>\n",
-       "      <td>A_1_1_155_30</td>\n",
-       "      <td>30</td>\n",
-       "      <td>D2391</td>\n",
-       "      <td>resin filling restoration procedure</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>1.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>26</th>\n",
-       "      <td>A_1_1_155_20</td>\n",
-       "      <td>20</td>\n",
-       "      <td>D2150</td>\n",
-       "      <td>amalgam filling restoration procedure</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>2.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>27</th>\n",
-       "      <td>A_1_1_155_8</td>\n",
-       "      <td>8</td>\n",
-       "      <td>D2330</td>\n",
-       "      <td>resin filling restoration procedure</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>1.0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "        tooth_id tooth_num ada_code                             event_name  \\\n",
-       "10  A_1_1_155_15        15    D2150  amalgam filling restoration procedure   \n",
-       "14  A_1_1_155_31        31    D2391    resin filling restoration procedure   \n",
-       "18  A_1_1_155_30        30    D2391    resin filling restoration procedure   \n",
-       "26  A_1_1_155_20        20    D2150  amalgam filling restoration procedure   \n",
-       "27   A_1_1_155_8         8    D2330    resin filling restoration procedure   \n",
-       "\n",
-       "      m    o    d    b    l    f    i  num_restored_surfaces  \n",
-       "10  1.0  1.0  0.0  0.0  0.0  0.0  0.0                    2.0  \n",
-       "14  0.0  1.0  0.0  0.0  0.0  0.0  0.0                    1.0  \n",
-       "18  0.0  1.0  0.0  0.0  0.0  0.0  0.0                    1.0  \n",
-       "26  0.0  1.0  1.0  0.0  0.0  0.0  0.0                    2.0  \n",
-       "27  0.0  0.0  0.0  0.0  0.0  0.0  1.0                    1.0  "
-      ]
-     },
-     "execution_count": 27,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "df_fillings.num_restored_surfaces = pds.to_numeric(df_fillings.num_restored_surfaces)\n",
     "df_fillings = df_fillings[df_fillings.num_restored_surfaces > 0]\n",
-    "df_fillings.head()"
+    "# df_fillings.head()"
    ]
   },
   {
@@ -1026,53 +508,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 204,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "tooth_num\n",
-       "1      5557\n",
-       "2     70930\n",
-       "3     83304\n",
-       "4     47524\n",
-       "5     41943\n",
-       "6     26819\n",
-       "7     31896\n",
-       "8     41835\n",
-       "9     40930\n",
-       "10    31771\n",
-       "11    27314\n",
-       "12    41213\n",
-       "13    45505\n",
-       "14    82109\n",
-       "15    73201\n",
-       "16     5720\n",
-       "17     8883\n",
-       "18    77981\n",
-       "19    80683\n",
-       "20    44953\n",
-       "21    27576\n",
-       "22    13604\n",
-       "23    10007\n",
-       "24    12255\n",
-       "25    11986\n",
-       "26     9916\n",
-       "27    13670\n",
-       "28    27844\n",
-       "29    45195\n",
-       "30    82534\n",
-       "31    77107\n",
-       "32     8840\n",
-       "Name: tooth_num, dtype: int64"
-      ]
-     },
-     "execution_count": 29,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "# find number of procedures per tooth\n",
     "# tooth_counts = pds.DataFrame(df_fillings.groupby('tooth_num')['tooth_num'].count())\n",
@@ -1081,12 +519,12 @@
     "tooth_counts.columns = ['count']\n",
     "tooth_counts.index = pds.to_numeric(tooth_counts.index)\n",
     "tooth_counts.sort_index(inplace=True) # sort data by tooth number 1 -> 32\n",
-    "tooth_counts"
+    "# tooth_counts"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 205,
    "metadata": {},
    "outputs": [
     {
@@ -1202,97 +640,22 @@
     "# note the use of engine=\"python\"\n",
     "# df_extracted = df_obs[df_obs.extract_date.notnull()]\n",
     "df_extracted = df_obs[~np.isnat(df_obs.extract_date)][['tooth_id','tooth_num','extract_date']]\n",
-    "# df_extracted = df_obs[~np.isnat(df_obs.extract_date)][['tooth_id','tooth_num']]\n",
-    "df_extracted.tooth_num = pds.to_numeric(df_extracted.tooth_num)\n",
-    "df_extracted.drop_duplicates(inplace=True)\n",
-    "df_extracted.head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 32,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>tooth_num</th>\n",
-       "      <th>tooth_id</th>\n",
-       "      <th>extract_date</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1</td>\n",
-       "      <td>A_1_1_1_1</td>\n",
-       "      <td>2003-05-16T00:00:00</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>1</td>\n",
-       "      <td>A_1_1_2946_1</td>\n",
-       "      <td>2012-01-30T00:00:00</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>1</td>\n",
-       "      <td>A_1_1_149_1</td>\n",
-       "      <td>2015-01-06T00:00:00</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>1</td>\n",
-       "      <td>A_1_1_3046_1</td>\n",
-       "      <td>2014-11-26T00:00:00</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>1</td>\n",
-       "      <td>A_1_1_3302_1</td>\n",
-       "      <td>2010-08-30T00:00:00</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   tooth_num      tooth_id         extract_date\n",
-       "0          1     A_1_1_1_1  2003-05-16T00:00:00\n",
-       "1          1  A_1_1_2946_1  2012-01-30T00:00:00\n",
-       "2          1   A_1_1_149_1  2015-01-06T00:00:00\n",
-       "3          1  A_1_1_3046_1  2014-11-26T00:00:00\n",
-       "4          1  A_1_1_3302_1  2010-08-30T00:00:00"
-      ]
-     },
-     "execution_count": 32,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+    "# df_extracted = df_obs[~np.isnat(df_obs.extract_date)][['tooth_id','tooth_num']]\n",
+    "df_extracted.tooth_num = pds.to_numeric(df_extracted.tooth_num)\n",
+    "df_extracted.drop_duplicates(inplace=True)\n",
+    "df_extracted.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 206,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "# query results from triplestore\n",
     "df_tooth1 = pds.read_csv('extracted_tooth_1_no_es.csv')\n",
     "df_tooth1.drop_duplicates(inplace=True)\n",
-    "df_tooth1.head()"
+    "# df_tooth1.head()"
    ]
   },
   {
@@ -1318,7 +681,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": 207,
    "metadata": {},
    "outputs": [
     {
@@ -1328,80 +691,6 @@
       "/usr/local/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:3: UserWarning: Boolean Series key will be reindexed to match DataFrame index.\n",
       "  This is separate from the ipykernel package so we can avoid doing imports until\n"
      ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>tooth_id</th>\n",
-       "      <th>tooth_num</th>\n",
-       "      <th>extract_date</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>275</th>\n",
-       "      <td>A_1_1_1_1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2003-05-16</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1084</th>\n",
-       "      <td>A_1_1_149_1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2015-01-06</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8836</th>\n",
-       "      <td>A_1_1_2946_1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2012-01-30</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>13571</th>\n",
-       "      <td>A_1_1_3302_1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2010-08-30</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>15481</th>\n",
-       "      <td>A_1_1_3046_1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2014-11-26</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "           tooth_id tooth_num extract_date\n",
-       "275       A_1_1_1_1         1   2003-05-16\n",
-       "1084    A_1_1_149_1         1   2015-01-06\n",
-       "8836   A_1_1_2946_1         1   2012-01-30\n",
-       "13571  A_1_1_3302_1         1   2010-08-30\n",
-       "15481  A_1_1_3046_1         1   2014-11-26"
-      ]
-     },
-     "execution_count": 34,
-     "metadata": {},
-     "output_type": "execute_result"
     }
    ],
    "source": [
@@ -1409,7 +698,7 @@
     "temp = df_obs.query(\"tooth_num == '1'\")\n",
     "extract = temp[df_obs.extract_date.notnull()][['tooth_id','tooth_num', 'extract_date']]\n",
     "extract.drop_duplicates(inplace=True)\n",
-    "extract.head()"
+    "# extract.head()"
    ]
   },
   {
@@ -1770,20 +1059,6 @@
     "df_missing[df_missing.missing_flag == 0].head() # should be none ..."
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
   {
    "cell_type": "code",
    "execution_count": 49,
@@ -1839,7 +1114,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 52,
+   "execution_count": 208,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1852,7 +1127,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 53,
+   "execution_count": 210,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1877,7 +1152,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 55,
+   "execution_count": 211,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1895,7 +1170,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 90,
+   "execution_count": 212,
    "metadata": {},
    "outputs": [
     {
@@ -1925,7 +1200,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 91,
+   "execution_count": 213,
    "metadata": {},
    "outputs": [
     {
@@ -1958,7 +1233,129 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 95,
+   "execution_count": 188,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# add some meta information about teeth\n",
+    "# tooth_info = pds.read_csv(\"tooth_numbers_and_labels.csv\")\n",
+    "# tooth_info"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 190,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# add meta info about tooth type and region in mouth\n",
+    "# posterior_tooth = [1, 2, 3, 4, 5, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 28, 29, 30, 31, 32]\n",
+    "# anterior_tooth = [6, 7, 8, 9, 10, 11, 22, 23, 24, 25, 26, 27]\n",
+    "\n",
+    "# tooth_info['molar'] =  tooth_info.tooth_label.map(lambda label: 1 if \" molar \" in label else 0)\n",
+    "# tooth_info['premolar'] =  tooth_info.tooth_label.map(lambda label: 1 if \" premolar \" in label else 0)\n",
+    "# tooth_info['canine'] =  tooth_info.tooth_label.map(lambda label: 1 if \" canine \" in label else 0)\n",
+    "# tooth_info['incisor'] =  tooth_info.tooth_label.map(lambda label: 1 if \" incisor \" in label else 0)\n",
+    "# tooth_info['upper'] =  tooth_info.tooth_label.map(lambda label: 1 if \" upper \" in label else 0)\n",
+    "# tooth_info['lower'] =  tooth_info.tooth_label.map(lambda label: 1 if \" lower \" in label else 0)\n",
+    "# tooth_info['right'] =  tooth_info.tooth_label.map(lambda label: 1 if \"Right \" in label else 0)\n",
+    "# tooth_info['right upper'] =  tooth_info.tooth_label.map(lambda label: 1 if \"Right upper \" in label else 0)\n",
+    "# tooth_info['right lower'] =  tooth_info.tooth_label.map(lambda label: 1 if \"Right lower \" in label else 0)\n",
+    "# tooth_info['left'] =  tooth_info.tooth_label.map(lambda label: 1 if \"Left \" in label else 0)\n",
+    "# tooth_info['left upper'] =  tooth_info.tooth_label.map(lambda label: 1 if \"Left upper \" in label else 0)\n",
+    "# tooth_info['left lower'] =  tooth_info.tooth_label.map(lambda label: 1 if \"Left lower \" in label else 0)\n",
+    "# tooth_info['posterior'] = tooth_info.tooth.map(lambda tooth: 1 if tooth in posterior_tooth else 0)\n",
+    "# tooth_info['anterior'] = tooth_info.tooth.map(lambda tooth: 1 if tooth in anterior_tooth else 0)\n",
+    "# tooth_info"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 214,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# tooth_info.to_csv(\"tooth_meta_info.csv\", index=False) # save tooth info dataframe"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 192,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# tooth_info = pds.read_csv(\"tooth_meta_info.csv\", index_col='tooth')\n",
+    "# tooth_info = pds.read_csv(\"tooth_meta_info.csv\")\n",
+    "# tooth_info.set_index('tooth', inplace=True)\n",
+    "# tooth_info"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 222,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# restored_surface_counts.reset_index(inplace=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 228,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# restored_surface_counts.drop(columns='index', inplace=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 231,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# restored_surface_counts.columns.name = None"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 235,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# restored_surface_counts.tooth_num = restored_surface_counts.tooth_num.astype(int)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 239,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# restored_surface_counts = restored_surface_counts.merge(tooth_info, left_on='tooth_num', right_on='tooth', how='left')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 241,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# restored_surface_counts"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 245,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "incisor_surface_counts = restored_surface_counts.query('incisor == 1')[['tooth_num', '1', '2', '3', '4', '5', '6']]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 249,
    "metadata": {},
    "outputs": [
     {
@@ -1982,256 +1379,161 @@
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
-       "      <th>tooth</th>\n",
-       "      <th>tooth_label</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1</td>\n",
-       "      <td>Right upper third secondary molar tooth</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
+       "      <th>tooth_num</th>\n",
        "      <th>1</th>\n",
-       "      <td>2</td>\n",
-       "      <td>Right upper second secondary molar tooth</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>3</td>\n",
-       "      <td>Right upper first secondary molar tooth</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>4</td>\n",
-       "      <td>Right upper second secondary premolar tooth</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>5</td>\n",
-       "      <td>Right upper first secondary premolar tooth</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
        "      <th>5</th>\n",
-       "      <td>6</td>\n",
-       "      <td>Right upper secondary canine tooth</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
        "      <th>6</th>\n",
-       "      <td>7</td>\n",
-       "      <td>Right upper lateral secondary incisor tooth</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>8</td>\n",
-       "      <td>Right upper central secondary incisor tooth</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>9</td>\n",
-       "      <td>Left upper central secondary incisor tooth</td>\n",
        "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
        "    <tr>\n",
-       "      <th>9</th>\n",
+       "      <th>1</th>\n",
        "      <td>10</td>\n",
-       "      <td>Left upper lateral secondary incisor tooth</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>10</th>\n",
-       "      <td>11</td>\n",
-       "      <td>Left upper secondary canine tooth</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>11</th>\n",
-       "      <td>12</td>\n",
-       "      <td>Left upper first secondary premolar tooth</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>12</th>\n",
-       "      <td>13</td>\n",
-       "      <td>Left upper second secondary premolar tooth</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>13</th>\n",
-       "      <td>14</td>\n",
-       "      <td>Left upper first secondary molar tooth</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>14</th>\n",
-       "      <td>15</td>\n",
-       "      <td>Left upper second secondary molar tooth</td>\n",
+       "      <td>11438</td>\n",
+       "      <td>10234</td>\n",
+       "      <td>6438</td>\n",
+       "      <td>3140</td>\n",
+       "      <td>521</td>\n",
+       "      <td>0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>15</th>\n",
-       "      <td>16</td>\n",
-       "      <td>Left upper third secondary molar tooth</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>16</th>\n",
-       "      <td>17</td>\n",
-       "      <td>Left lower third secondary molar tooth</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>17</th>\n",
-       "      <td>18</td>\n",
-       "      <td>Left lower second secondary molar tooth</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>18</th>\n",
-       "      <td>19</td>\n",
-       "      <td>Left lower first secondary molar tooth</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>19</th>\n",
-       "      <td>20</td>\n",
-       "      <td>Left lower second secondary premolar tooth</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>20</th>\n",
-       "      <td>21</td>\n",
-       "      <td>Left lower first secondary premolar tooth</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>21</th>\n",
-       "      <td>22</td>\n",
-       "      <td>Left lower secondary canine tooth</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>22</th>\n",
        "      <td>23</td>\n",
-       "      <td>Left lower lateral secondary incisor tooth</td>\n",
+       "      <td>4365</td>\n",
+       "      <td>3089</td>\n",
+       "      <td>1720</td>\n",
+       "      <td>727</td>\n",
+       "      <td>106</td>\n",
+       "      <td>0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>23</th>\n",
+       "      <th>16</th>\n",
        "      <td>24</td>\n",
-       "      <td>Left lower central secondary incisor tooth</td>\n",
+       "      <td>5125</td>\n",
+       "      <td>3697</td>\n",
+       "      <td>2083</td>\n",
+       "      <td>1109</td>\n",
+       "      <td>240</td>\n",
+       "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>24</th>\n",
+       "      <th>17</th>\n",
        "      <td>25</td>\n",
-       "      <td>Right lower central secondary incisor tooth</td>\n",
+       "      <td>4806</td>\n",
+       "      <td>3808</td>\n",
+       "      <td>2087</td>\n",
+       "      <td>1082</td>\n",
+       "      <td>203</td>\n",
+       "      <td>0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>25</th>\n",
+       "      <th>18</th>\n",
        "      <td>26</td>\n",
-       "      <td>Right lower lateral secondary incisor tooth</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>26</th>\n",
-       "      <td>27</td>\n",
-       "      <td>Right lower secondary canine tooth</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>27</th>\n",
-       "      <td>28</td>\n",
-       "      <td>Right lower first secondary premolar tooth</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>28</th>\n",
-       "      <td>29</td>\n",
-       "      <td>Right lower second secondary premolar tooth</td>\n",
+       "      <td>4194</td>\n",
+       "      <td>3247</td>\n",
+       "      <td>1695</td>\n",
+       "      <td>665</td>\n",
+       "      <td>115</td>\n",
+       "      <td>0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>29</th>\n",
-       "      <td>30</td>\n",
-       "      <td>Right lower first secondary molar tooth</td>\n",
+       "      <td>7</td>\n",
+       "      <td>11249</td>\n",
+       "      <td>10399</td>\n",
+       "      <td>6536</td>\n",
+       "      <td>3214</td>\n",
+       "      <td>498</td>\n",
+       "      <td>0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>30</th>\n",
-       "      <td>31</td>\n",
-       "      <td>Right lower second secondary molar tooth</td>\n",
+       "      <td>8</td>\n",
+       "      <td>12742</td>\n",
+       "      <td>13514</td>\n",
+       "      <td>8559</td>\n",
+       "      <td>6064</td>\n",
+       "      <td>956</td>\n",
+       "      <td>0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>31</th>\n",
-       "      <td>32</td>\n",
-       "      <td>Right lower third secondary molar tooth</td>\n",
+       "      <td>9</td>\n",
+       "      <td>12488</td>\n",
+       "      <td>13227</td>\n",
+       "      <td>8316</td>\n",
+       "      <td>5976</td>\n",
+       "      <td>923</td>\n",
+       "      <td>0</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "    tooth                                  tooth_label\n",
-       "0       1      Right upper third secondary molar tooth\n",
-       "1       2     Right upper second secondary molar tooth\n",
-       "2       3      Right upper first secondary molar tooth\n",
-       "3       4  Right upper second secondary premolar tooth\n",
-       "4       5   Right upper first secondary premolar tooth\n",
-       "5       6           Right upper secondary canine tooth\n",
-       "6       7  Right upper lateral secondary incisor tooth\n",
-       "7       8  Right upper central secondary incisor tooth\n",
-       "8       9   Left upper central secondary incisor tooth\n",
-       "9      10   Left upper lateral secondary incisor tooth\n",
-       "10     11            Left upper secondary canine tooth\n",
-       "11     12    Left upper first secondary premolar tooth\n",
-       "12     13   Left upper second secondary premolar tooth\n",
-       "13     14       Left upper first secondary molar tooth\n",
-       "14     15      Left upper second secondary molar tooth\n",
-       "15     16       Left upper third secondary molar tooth\n",
-       "16     17       Left lower third secondary molar tooth\n",
-       "17     18      Left lower second secondary molar tooth\n",
-       "18     19       Left lower first secondary molar tooth\n",
-       "19     20   Left lower second secondary premolar tooth\n",
-       "20     21    Left lower first secondary premolar tooth\n",
-       "21     22            Left lower secondary canine tooth\n",
-       "22     23   Left lower lateral secondary incisor tooth\n",
-       "23     24   Left lower central secondary incisor tooth\n",
-       "24     25  Right lower central secondary incisor tooth\n",
-       "25     26  Right lower lateral secondary incisor tooth\n",
-       "26     27           Right lower secondary canine tooth\n",
-       "27     28   Right lower first secondary premolar tooth\n",
-       "28     29  Right lower second secondary premolar tooth\n",
-       "29     30      Right lower first secondary molar tooth\n",
-       "30     31     Right lower second secondary molar tooth\n",
-       "31     32      Right lower third secondary molar tooth"
+       "    tooth_num      1      2     3     4    5  6\n",
+       "1          10  11438  10234  6438  3140  521  0\n",
+       "15         23   4365   3089  1720   727  106  0\n",
+       "16         24   5125   3697  2083  1109  240  1\n",
+       "17         25   4806   3808  2087  1082  203  0\n",
+       "18         26   4194   3247  1695   665  115  0\n",
+       "29          7  11249  10399  6536  3214  498  0\n",
+       "30          8  12742  13514  8559  6064  956  0\n",
+       "31          9  12488  13227  8316  5976  923  0"
       ]
      },
-     "execution_count": 95,
+     "execution_count": 249,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "# add some meta information about teeth\n",
-    "tooth_info = pds.read_csv(\"tooth_numbers_and_labels.csv\")\n",
-    "tooth_info"
+    "incisor_surface_counts"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 109,
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 252,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# add meta info about tooth type and region in mouth\n",
-    "posterior_tooth = [1, 2, 3, 4, 5, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 28, 29, 30, 31, 32]\n",
-    "anterior_tooth = [6, 7, 8, 9, 10, 11, 22, 23, 24, 25, 26, 27]\n",
-    "\n",
-    "tooth_info['molar'] =  tooth_info.tooth_label.map(lambda label: 1 if \" molar \" in label else 0)\n",
-    "tooth_info['premolar'] =  tooth_info.tooth_label.map(lambda label: 1 if \" premolar \" in label else 0)\n",
-    "tooth_info['canine'] =  tooth_info.tooth_label.map(lambda label: 1 if \" canine \" in label else 0)\n",
-    "tooth_info['incisor'] =  tooth_info.tooth_label.map(lambda label: 1 if \" incisor \" in label else 0)\n",
-    "tooth_info['upper'] =  tooth_info.tooth_label.map(lambda label: 1 if \" upper \" in label else 0)\n",
-    "tooth_info['lower'] =  tooth_info.tooth_label.map(lambda label: 1 if \" lower \" in label else 0)\n",
-    "tooth_info['right'] =  tooth_info.tooth_label.map(lambda label: 1 if \"Right \" in label else 0)\n",
-    "tooth_info['right upper'] =  tooth_info.tooth_label.map(lambda label: 1 if \"Right upper \" in label else 0)\n",
-    "tooth_info['right lower'] =  tooth_info.tooth_label.map(lambda label: 1 if \"Right lower \" in label else 0)\n",
-    "tooth_info['left'] =  tooth_info.tooth_label.map(lambda label: 1 if \"Left \" in label else 0)\n",
-    "tooth_info['left upper'] =  tooth_info.tooth_label.map(lambda label: 1 if \"Left upper \" in label else 0)\n",
-    "tooth_info['left lower'] =  tooth_info.tooth_label.map(lambda label: 1 if \"Left lower \" in label else 0)\n",
-    "tooth_info['posterior'] = tooth_info.tooth.map(lambda tooth: 1 if tooth in posterior_tooth else 0)\n",
-    "tooth_info['anterior'] = tooth_info.tooth.map(lambda tooth: 1 if tooth in anterior_tooth else 0)\n",
-    "# tooth_info"
+    "incisor_surface_counts.set_index('tooth_num', inplace=True)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 253,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1    66407\n",
+       "2    61215\n",
+       "3    37434\n",
+       "4    21977\n",
+       "5     3562\n",
+       "6        1\n",
+       "dtype: int64"
+      ]
+     },
+     "execution_count": 253,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "tooth_info.to_csv(\"tooth_meta_info.csv\", index=False) # save tooth info dataframe"
+    "incisor_surface_counts.sum()"
    ]
   },
   {

	tooth_id
tooth_num
1	953
2	2098
3	1831
4	1373
5	1226
6	624
7	842
8	740
9	772
10	817
11	603
12	1226
13	1354
14	1819
15	2275
16	962
17	898
18	2055
19	1941
20	1022
21	551
22	236
23	321
24	336
25	368
26	316
27	240
28	598
29	1051
30	1968
31	2186
32	875
	tooth_num	tooth_id
275	1	A_1_1_1_1
1084	1	A_1_1_149_1
8836	1	A_1_1_2946_1
13571	1	A_1_1_3302_1
15481	1	A_1_1_3046_1
	tooth_id	tooth_num	ada_code	event_name	m	o	d	i	num_restored_surfaces
10	A_1_1_155_15	15	D2150	amalgam filling restoration procedure	1.0	1.0	0.0	0.0	2.0
14	A_1_1_155_31	31	D2391	resin filling restoration procedure	0.0	1.0	0.0	0.0	1.0
18	A_1_1_155_30	30	D2391	resin filling restoration procedure	0.0	1.0	0.0	0.0	1.0
26	A_1_1_155_20	20	D2150	amalgam filling restoration procedure	0.0	1.0	1.0	0.0	2.0
27	A_1_1_155_8	8	D2330	resin filling restoration procedure	0.0	0.0	0.0	1.0	1.0
	tooth_num	tooth_id	extract_date
0	1	A_1_1_1_1	2003-05-16T00:00:00
1	1	A_1_1_2946_1	2012-01-30T00:00:00
2	1	A_1_1_149_1	2015-01-06T00:00:00
3	1	A_1_1_3046_1	2014-11-26T00:00:00
4	1	A_1_1_3302_1	2010-08-30T00:00:00
	tooth_id	tooth_num	extract_date
275	A_1_1_1_1	1	2003-05-16
1084	A_1_1_149_1	1	2015-01-06
8836	A_1_1_2946_1	1	2012-01-30
13571	A_1_1_3302_1	1	2010-08-30
15481	A_1_1_3046_1	1	2014-11-26