From f10a94884e597931052145526a3ddc006d90f74b Mon Sep 17 00:00:00 2001 From: Bill Duncan Date: Wed, 7 Nov 2018 22:53:53 -0500 Subject: [PATCH] update notebook --- src/analysis/surival_01.ipynb | 1216 +++++++-------------------------- 1 file changed, 259 insertions(+), 957 deletions(-) diff --git a/src/analysis/surival_01.ipynb b/src/analysis/surival_01.ipynb index 0111215..4cef3db 100644 --- a/src/analysis/surival_01.ipynb +++ b/src/analysis/surival_01.ipynb @@ -75,13 +75,6 @@ "# df_tooth_surface_procedures.head() # good" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "code", "execution_count": 6, @@ -111,13 +104,6 @@ "len(df_patients_teeth) # 1,231,726" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "code", "execution_count": 8, @@ -159,13 +145,6 @@ "plt.show()" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "code", "execution_count": 10, @@ -234,225 +213,22 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 198, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
tooth_id
tooth_num
1953
22098
31831
41373
51226
6624
7842
8740
9772
10817
11603
121226
131354
141819
152275
16962
17898
182055
191941
201022
21551
22236
23321
24336
25368
26316
27240
28598
291051
301968
312186
32875
\n", - "
" - ], - "text/plain": [ - " tooth_id\n", - "tooth_num \n", - "1 953\n", - "2 2098\n", - "3 1831\n", - "4 1373\n", - "5 1226\n", - "6 624\n", - "7 842\n", - "8 740\n", - "9 772\n", - "10 817\n", - "11 603\n", - "12 1226\n", - "13 1354\n", - "14 1819\n", - "15 2275\n", - "16 962\n", - "17 898\n", - "18 2055\n", - "19 1941\n", - "20 1022\n", - "21 551\n", - "22 236\n", - "23 321\n", - "24 336\n", - "25 368\n", - "26 316\n", - "27 240\n", - "28 598\n", - "29 1051\n", - "30 1968\n", - "31 2186\n", - "32 875" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df_extracted = df_obs[df_obs.extract_date.notnull()][['tooth_id','tooth_num']]\n", "# df_extracted = df_obs[df_obs.extract_date.notna()][['tooth_id','tooth_num']]\n", "df_extracted.tooth_num = pds.to_numeric(df_extracted.tooth_num)\n", "df_extracted.drop_duplicates(inplace=True)\n", - "df_extracted.head()\n", - "df_extracted.groupby('tooth_num').count()\n", + "# df_extracted.head()\n", + "# df_extracted.groupby('tooth_num').count()\n", "# len(df_extracted) # isnull -> 1,617,783; isna -> 1,617,783; notnull -> 41,331; notna -> 41,331" ] }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 199, "metadata": {}, "outputs": [ { @@ -472,48 +248,6 @@ "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", " # Remove the CWD from sys.path while we load stuff.\n" ] - }, - { - "data": { - "text/plain": [ - "15 2275\n", - "31 2186\n", - "2 2098\n", - "18 2055\n", - "30 1968\n", - "19 1941\n", - "3 1831\n", - "14 1819\n", - "4 1373\n", - "13 1354\n", - "5 1226\n", - "12 1226\n", - "29 1051\n", - "20 1022\n", - "16 962\n", - "1 953\n", - "17 898\n", - "32 875\n", - "7 842\n", - "10 817\n", - "9 772\n", - "8 740\n", - "6 624\n", - "11 603\n", - "28 598\n", - "21 551\n", - "25 368\n", - "24 336\n", - "23 321\n", - "26 316\n", - "27 240\n", - "22 236\n", - "Name: tooth_num, dtype: int64" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ @@ -527,7 +261,7 @@ "print(len(temp3))\n", "# temp3.groupby('tooth_num').size()\n", "temp3.drop_duplicates(inplace=True)\n", - "temp3.tooth_num.value_counts()\n", + "# temp3.tooth_num.value_counts()\n", "# print(len(temp3))\n", "# temp2.head()\n", "# temp3.groupby('tooth_num').size()\n", @@ -539,100 +273,17 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 200, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
tooth_numtooth_id
2751A_1_1_1_1
10841A_1_1_149_1
88361A_1_1_2946_1
135711A_1_1_3302_1
154811A_1_1_3046_1
\n", - "
" - ], - "text/plain": [ - " tooth_num tooth_id\n", - "275 1 A_1_1_1_1\n", - "1084 1 A_1_1_149_1\n", - "8836 1 A_1_1_2946_1\n", - "13571 1 A_1_1_3302_1\n", - "15481 1 A_1_1_3046_1" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "temp4 = temp3.query(\"tooth_num == '1'\")\n", - "temp4.head()" + "# temp4 = temp3.query(\"tooth_num == '1'\")\n", + "# temp4.head()" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 17, + "execution_count": 201, "metadata": {}, "outputs": [ { @@ -642,7 +293,7 @@ "Index(['practice', 'patient_id', 'gender', 'dob', 'first_visit', 'last_visit',\n", " 'tooth_id', 'tooth_num', 'first_PCR', 'first_RCT', 'extract_date',\n", " 'missing_date', 'event_id', 'event_name', 'event_date', 'ada_code', 'm',\n", - " 'o', 'd', 'b', 'l', 'f', 'i'],\n", + " 'o', 'd', 'b', 'l', 'f', 'i', 'patient_age', 'missing_flag'],\n", " dtype='object')\n" ] } @@ -726,27 +377,6 @@ "# df_demographics.head()" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "code", "execution_count": 23, @@ -858,161 +488,13 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 202, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/anaconda3/lib/python3.6/site-packages/pandas/core/generic.py:4405: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame.\n", - "Try using .loc[row_indexer,col_indexer] = value instead\n", - "\n", - "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", - " self[name] = value\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
tooth_idtooth_numada_codeevent_namemodblfinum_restored_surfaces
10A_1_1_155_1515D2150amalgam filling restoration procedure1.01.00.00.00.00.00.02.0
14A_1_1_155_3131D2391resin filling restoration procedure0.01.00.00.00.00.00.01.0
18A_1_1_155_3030D2391resin filling restoration procedure0.01.00.00.00.00.00.01.0
26A_1_1_155_2020D2150amalgam filling restoration procedure0.01.01.00.00.00.00.02.0
27A_1_1_155_88D2330resin filling restoration procedure0.00.00.00.00.00.01.01.0
\n", - "
" - ], - "text/plain": [ - " tooth_id tooth_num ada_code event_name \\\n", - "10 A_1_1_155_15 15 D2150 amalgam filling restoration procedure \n", - "14 A_1_1_155_31 31 D2391 resin filling restoration procedure \n", - "18 A_1_1_155_30 30 D2391 resin filling restoration procedure \n", - "26 A_1_1_155_20 20 D2150 amalgam filling restoration procedure \n", - "27 A_1_1_155_8 8 D2330 resin filling restoration procedure \n", - "\n", - " m o d b l f i num_restored_surfaces \n", - "10 1.0 1.0 0.0 0.0 0.0 0.0 0.0 2.0 \n", - "14 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 \n", - "18 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 \n", - "26 0.0 1.0 1.0 0.0 0.0 0.0 0.0 2.0 \n", - "27 0.0 0.0 0.0 0.0 0.0 0.0 1.0 1.0 " - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df_fillings.num_restored_surfaces = pds.to_numeric(df_fillings.num_restored_surfaces)\n", "df_fillings = df_fillings[df_fillings.num_restored_surfaces > 0]\n", - "df_fillings.head()" + "# df_fillings.head()" ] }, { @@ -1026,53 +508,9 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 204, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "tooth_num\n", - "1 5557\n", - "2 70930\n", - "3 83304\n", - "4 47524\n", - "5 41943\n", - "6 26819\n", - "7 31896\n", - "8 41835\n", - "9 40930\n", - "10 31771\n", - "11 27314\n", - "12 41213\n", - "13 45505\n", - "14 82109\n", - "15 73201\n", - "16 5720\n", - "17 8883\n", - "18 77981\n", - "19 80683\n", - "20 44953\n", - "21 27576\n", - "22 13604\n", - "23 10007\n", - "24 12255\n", - "25 11986\n", - "26 9916\n", - "27 13670\n", - "28 27844\n", - "29 45195\n", - "30 82534\n", - "31 77107\n", - "32 8840\n", - "Name: tooth_num, dtype: int64" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# find number of procedures per tooth\n", "# tooth_counts = pds.DataFrame(df_fillings.groupby('tooth_num')['tooth_num'].count())\n", @@ -1081,12 +519,12 @@ "tooth_counts.columns = ['count']\n", "tooth_counts.index = pds.to_numeric(tooth_counts.index)\n", "tooth_counts.sort_index(inplace=True) # sort data by tooth number 1 -> 32\n", - "tooth_counts" + "# tooth_counts" ] }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 205, "metadata": {}, "outputs": [ { @@ -1202,97 +640,22 @@ "# note the use of engine=\"python\"\n", "# df_extracted = df_obs[df_obs.extract_date.notnull()]\n", "df_extracted = df_obs[~np.isnat(df_obs.extract_date)][['tooth_id','tooth_num','extract_date']]\n", - "# df_extracted = df_obs[~np.isnat(df_obs.extract_date)][['tooth_id','tooth_num']]\n", - "df_extracted.tooth_num = pds.to_numeric(df_extracted.tooth_num)\n", - "df_extracted.drop_duplicates(inplace=True)\n", - "df_extracted.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
tooth_numtooth_idextract_date
01A_1_1_1_12003-05-16T00:00:00
11A_1_1_2946_12012-01-30T00:00:00
21A_1_1_149_12015-01-06T00:00:00
31A_1_1_3046_12014-11-26T00:00:00
41A_1_1_3302_12010-08-30T00:00:00
\n", - "
" - ], - "text/plain": [ - " tooth_num tooth_id extract_date\n", - "0 1 A_1_1_1_1 2003-05-16T00:00:00\n", - "1 1 A_1_1_2946_1 2012-01-30T00:00:00\n", - "2 1 A_1_1_149_1 2015-01-06T00:00:00\n", - "3 1 A_1_1_3046_1 2014-11-26T00:00:00\n", - "4 1 A_1_1_3302_1 2010-08-30T00:00:00" - ] - }, - "execution_count": 32, - "metadata": {}, - "output_type": "execute_result" - } - ], + "# df_extracted = df_obs[~np.isnat(df_obs.extract_date)][['tooth_id','tooth_num']]\n", + "df_extracted.tooth_num = pds.to_numeric(df_extracted.tooth_num)\n", + "df_extracted.drop_duplicates(inplace=True)\n", + "df_extracted.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 206, + "metadata": {}, + "outputs": [], "source": [ "# query results from triplestore\n", "df_tooth1 = pds.read_csv('extracted_tooth_1_no_es.csv')\n", "df_tooth1.drop_duplicates(inplace=True)\n", - "df_tooth1.head()" + "# df_tooth1.head()" ] }, { @@ -1318,7 +681,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 207, "metadata": {}, "outputs": [ { @@ -1328,80 +691,6 @@ "/usr/local/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:3: UserWarning: Boolean Series key will be reindexed to match DataFrame index.\n", " This is separate from the ipykernel package so we can avoid doing imports until\n" ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
tooth_idtooth_numextract_date
275A_1_1_1_112003-05-16
1084A_1_1_149_112015-01-06
8836A_1_1_2946_112012-01-30
13571A_1_1_3302_112010-08-30
15481A_1_1_3046_112014-11-26
\n", - "
" - ], - "text/plain": [ - " tooth_id tooth_num extract_date\n", - "275 A_1_1_1_1 1 2003-05-16\n", - "1084 A_1_1_149_1 1 2015-01-06\n", - "8836 A_1_1_2946_1 1 2012-01-30\n", - "13571 A_1_1_3302_1 1 2010-08-30\n", - "15481 A_1_1_3046_1 1 2014-11-26" - ] - }, - "execution_count": 34, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ @@ -1409,7 +698,7 @@ "temp = df_obs.query(\"tooth_num == '1'\")\n", "extract = temp[df_obs.extract_date.notnull()][['tooth_id','tooth_num', 'extract_date']]\n", "extract.drop_duplicates(inplace=True)\n", - "extract.head()" + "# extract.head()" ] }, { @@ -1770,20 +1059,6 @@ "df_missing[df_missing.missing_flag == 0].head() # should be none ..." ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "code", "execution_count": 49, @@ -1839,7 +1114,7 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 208, "metadata": {}, "outputs": [], "source": [ @@ -1852,7 +1127,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 210, "metadata": {}, "outputs": [], "source": [ @@ -1877,7 +1152,7 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 211, "metadata": {}, "outputs": [], "source": [ @@ -1895,7 +1170,7 @@ }, { "cell_type": "code", - "execution_count": 90, + "execution_count": 212, "metadata": {}, "outputs": [ { @@ -1925,7 +1200,7 @@ }, { "cell_type": "code", - "execution_count": 91, + "execution_count": 213, "metadata": {}, "outputs": [ { @@ -1958,7 +1233,129 @@ }, { "cell_type": "code", - "execution_count": 95, + "execution_count": 188, + "metadata": {}, + "outputs": [], + "source": [ + "# add some meta information about teeth\n", + "# tooth_info = pds.read_csv(\"tooth_numbers_and_labels.csv\")\n", + "# tooth_info" + ] + }, + { + "cell_type": "code", + "execution_count": 190, + "metadata": {}, + "outputs": [], + "source": [ + "# add meta info about tooth type and region in mouth\n", + "# posterior_tooth = [1, 2, 3, 4, 5, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 28, 29, 30, 31, 32]\n", + "# anterior_tooth = [6, 7, 8, 9, 10, 11, 22, 23, 24, 25, 26, 27]\n", + "\n", + "# tooth_info['molar'] = tooth_info.tooth_label.map(lambda label: 1 if \" molar \" in label else 0)\n", + "# tooth_info['premolar'] = tooth_info.tooth_label.map(lambda label: 1 if \" premolar \" in label else 0)\n", + "# tooth_info['canine'] = tooth_info.tooth_label.map(lambda label: 1 if \" canine \" in label else 0)\n", + "# tooth_info['incisor'] = tooth_info.tooth_label.map(lambda label: 1 if \" incisor \" in label else 0)\n", + "# tooth_info['upper'] = tooth_info.tooth_label.map(lambda label: 1 if \" upper \" in label else 0)\n", + "# tooth_info['lower'] = tooth_info.tooth_label.map(lambda label: 1 if \" lower \" in label else 0)\n", + "# tooth_info['right'] = tooth_info.tooth_label.map(lambda label: 1 if \"Right \" in label else 0)\n", + "# tooth_info['right upper'] = tooth_info.tooth_label.map(lambda label: 1 if \"Right upper \" in label else 0)\n", + "# tooth_info['right lower'] = tooth_info.tooth_label.map(lambda label: 1 if \"Right lower \" in label else 0)\n", + "# tooth_info['left'] = tooth_info.tooth_label.map(lambda label: 1 if \"Left \" in label else 0)\n", + "# tooth_info['left upper'] = tooth_info.tooth_label.map(lambda label: 1 if \"Left upper \" in label else 0)\n", + "# tooth_info['left lower'] = tooth_info.tooth_label.map(lambda label: 1 if \"Left lower \" in label else 0)\n", + "# tooth_info['posterior'] = tooth_info.tooth.map(lambda tooth: 1 if tooth in posterior_tooth else 0)\n", + "# tooth_info['anterior'] = tooth_info.tooth.map(lambda tooth: 1 if tooth in anterior_tooth else 0)\n", + "# tooth_info" + ] + }, + { + "cell_type": "code", + "execution_count": 214, + "metadata": {}, + "outputs": [], + "source": [ + "# tooth_info.to_csv(\"tooth_meta_info.csv\", index=False) # save tooth info dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 192, + "metadata": {}, + "outputs": [], + "source": [ + "# tooth_info = pds.read_csv(\"tooth_meta_info.csv\", index_col='tooth')\n", + "# tooth_info = pds.read_csv(\"tooth_meta_info.csv\")\n", + "# tooth_info.set_index('tooth', inplace=True)\n", + "# tooth_info" + ] + }, + { + "cell_type": "code", + "execution_count": 222, + "metadata": {}, + "outputs": [], + "source": [ + "# restored_surface_counts.reset_index(inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 228, + "metadata": {}, + "outputs": [], + "source": [ + "# restored_surface_counts.drop(columns='index', inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 231, + "metadata": {}, + "outputs": [], + "source": [ + "# restored_surface_counts.columns.name = None" + ] + }, + { + "cell_type": "code", + "execution_count": 235, + "metadata": {}, + "outputs": [], + "source": [ + "# restored_surface_counts.tooth_num = restored_surface_counts.tooth_num.astype(int)" + ] + }, + { + "cell_type": "code", + "execution_count": 239, + "metadata": {}, + "outputs": [], + "source": [ + "# restored_surface_counts = restored_surface_counts.merge(tooth_info, left_on='tooth_num', right_on='tooth', how='left')" + ] + }, + { + "cell_type": "code", + "execution_count": 241, + "metadata": {}, + "outputs": [], + "source": [ + "# restored_surface_counts" + ] + }, + { + "cell_type": "code", + "execution_count": 245, + "metadata": {}, + "outputs": [], + "source": [ + "incisor_surface_counts = restored_surface_counts.query('incisor == 1')[['tooth_num', '1', '2', '3', '4', '5', '6']]" + ] + }, + { + "cell_type": "code", + "execution_count": 249, "metadata": {}, "outputs": [ { @@ -1982,256 +1379,161 @@ " \n", " \n", " \n", - " tooth\n", - " tooth_label\n", - " \n", - " \n", - " \n", - " \n", - " 0\n", - " 1\n", - " Right upper third secondary molar tooth\n", - " \n", - " \n", + " tooth_num\n", " 1\n", - " 2\n", - " Right upper second secondary molar tooth\n", - " \n", - " \n", " 2\n", - " 3\n", - " Right upper first secondary molar tooth\n", - " \n", - " \n", " 3\n", - " 4\n", - " Right upper second secondary premolar tooth\n", - " \n", - " \n", " 4\n", - " 5\n", - " Right upper first secondary premolar tooth\n", - " \n", - " \n", " 5\n", - " 6\n", - " Right upper secondary canine tooth\n", - " \n", - " \n", " 6\n", - " 7\n", - " Right upper lateral secondary incisor tooth\n", - " \n", - " \n", - " 7\n", - " 8\n", - " Right upper central secondary incisor tooth\n", - " \n", - " \n", - " 8\n", - " 9\n", - " Left upper central secondary incisor tooth\n", " \n", + " \n", + " \n", " \n", - " 9\n", + " 1\n", " 10\n", - " Left upper lateral secondary incisor tooth\n", - " \n", - " \n", - " 10\n", - " 11\n", - " Left upper secondary canine tooth\n", - " \n", - " \n", - " 11\n", - " 12\n", - " Left upper first secondary premolar tooth\n", - " \n", - " \n", - " 12\n", - " 13\n", - " Left upper second secondary premolar tooth\n", - " \n", - " \n", - " 13\n", - " 14\n", - " Left upper first secondary molar tooth\n", - " \n", - " \n", - " 14\n", - " 15\n", - " Left upper second secondary molar tooth\n", + " 11438\n", + " 10234\n", + " 6438\n", + " 3140\n", + " 521\n", + " 0\n", " \n", " \n", " 15\n", - " 16\n", - " Left upper third secondary molar tooth\n", - " \n", - " \n", - " 16\n", - " 17\n", - " Left lower third secondary molar tooth\n", - " \n", - " \n", - " 17\n", - " 18\n", - " Left lower second secondary molar tooth\n", - " \n", - " \n", - " 18\n", - " 19\n", - " Left lower first secondary molar tooth\n", - " \n", - " \n", - " 19\n", - " 20\n", - " Left lower second secondary premolar tooth\n", - " \n", - " \n", - " 20\n", - " 21\n", - " Left lower first secondary premolar tooth\n", - " \n", - " \n", - " 21\n", - " 22\n", - " Left lower secondary canine tooth\n", - " \n", - " \n", - " 22\n", " 23\n", - " Left lower lateral secondary incisor tooth\n", + " 4365\n", + " 3089\n", + " 1720\n", + " 727\n", + " 106\n", + " 0\n", " \n", " \n", - " 23\n", + " 16\n", " 24\n", - " Left lower central secondary incisor tooth\n", + " 5125\n", + " 3697\n", + " 2083\n", + " 1109\n", + " 240\n", + " 1\n", " \n", " \n", - " 24\n", + " 17\n", " 25\n", - " Right lower central secondary incisor tooth\n", + " 4806\n", + " 3808\n", + " 2087\n", + " 1082\n", + " 203\n", + " 0\n", " \n", " \n", - " 25\n", + " 18\n", " 26\n", - " Right lower lateral secondary incisor tooth\n", - " \n", - " \n", - " 26\n", - " 27\n", - " Right lower secondary canine tooth\n", - " \n", - " \n", - " 27\n", - " 28\n", - " Right lower first secondary premolar tooth\n", - " \n", - " \n", - " 28\n", - " 29\n", - " Right lower second secondary premolar tooth\n", + " 4194\n", + " 3247\n", + " 1695\n", + " 665\n", + " 115\n", + " 0\n", " \n", " \n", " 29\n", - " 30\n", - " Right lower first secondary molar tooth\n", + " 7\n", + " 11249\n", + " 10399\n", + " 6536\n", + " 3214\n", + " 498\n", + " 0\n", " \n", " \n", " 30\n", - " 31\n", - " Right lower second secondary molar tooth\n", + " 8\n", + " 12742\n", + " 13514\n", + " 8559\n", + " 6064\n", + " 956\n", + " 0\n", " \n", " \n", " 31\n", - " 32\n", - " Right lower third secondary molar tooth\n", + " 9\n", + " 12488\n", + " 13227\n", + " 8316\n", + " 5976\n", + " 923\n", + " 0\n", " \n", " \n", "\n", "" ], "text/plain": [ - " tooth tooth_label\n", - "0 1 Right upper third secondary molar tooth\n", - "1 2 Right upper second secondary molar tooth\n", - "2 3 Right upper first secondary molar tooth\n", - "3 4 Right upper second secondary premolar tooth\n", - "4 5 Right upper first secondary premolar tooth\n", - "5 6 Right upper secondary canine tooth\n", - "6 7 Right upper lateral secondary incisor tooth\n", - "7 8 Right upper central secondary incisor tooth\n", - "8 9 Left upper central secondary incisor tooth\n", - "9 10 Left upper lateral secondary incisor tooth\n", - "10 11 Left upper secondary canine tooth\n", - "11 12 Left upper first secondary premolar tooth\n", - "12 13 Left upper second secondary premolar tooth\n", - "13 14 Left upper first secondary molar tooth\n", - "14 15 Left upper second secondary molar tooth\n", - "15 16 Left upper third secondary molar tooth\n", - "16 17 Left lower third secondary molar tooth\n", - "17 18 Left lower second secondary molar tooth\n", - "18 19 Left lower first secondary molar tooth\n", - "19 20 Left lower second secondary premolar tooth\n", - "20 21 Left lower first secondary premolar tooth\n", - "21 22 Left lower secondary canine tooth\n", - "22 23 Left lower lateral secondary incisor tooth\n", - "23 24 Left lower central secondary incisor tooth\n", - "24 25 Right lower central secondary incisor tooth\n", - "25 26 Right lower lateral secondary incisor tooth\n", - "26 27 Right lower secondary canine tooth\n", - "27 28 Right lower first secondary premolar tooth\n", - "28 29 Right lower second secondary premolar tooth\n", - "29 30 Right lower first secondary molar tooth\n", - "30 31 Right lower second secondary molar tooth\n", - "31 32 Right lower third secondary molar tooth" + " tooth_num 1 2 3 4 5 6\n", + "1 10 11438 10234 6438 3140 521 0\n", + "15 23 4365 3089 1720 727 106 0\n", + "16 24 5125 3697 2083 1109 240 1\n", + "17 25 4806 3808 2087 1082 203 0\n", + "18 26 4194 3247 1695 665 115 0\n", + "29 7 11249 10399 6536 3214 498 0\n", + "30 8 12742 13514 8559 6064 956 0\n", + "31 9 12488 13227 8316 5976 923 0" ] }, - "execution_count": 95, + "execution_count": 249, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# add some meta information about teeth\n", - "tooth_info = pds.read_csv(\"tooth_numbers_and_labels.csv\")\n", - "tooth_info" + "incisor_surface_counts" ] }, { "cell_type": "code", - "execution_count": 109, + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 252, "metadata": {}, "outputs": [], "source": [ - "# add meta info about tooth type and region in mouth\n", - "posterior_tooth = [1, 2, 3, 4, 5, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 28, 29, 30, 31, 32]\n", - "anterior_tooth = [6, 7, 8, 9, 10, 11, 22, 23, 24, 25, 26, 27]\n", - "\n", - "tooth_info['molar'] = tooth_info.tooth_label.map(lambda label: 1 if \" molar \" in label else 0)\n", - "tooth_info['premolar'] = tooth_info.tooth_label.map(lambda label: 1 if \" premolar \" in label else 0)\n", - "tooth_info['canine'] = tooth_info.tooth_label.map(lambda label: 1 if \" canine \" in label else 0)\n", - "tooth_info['incisor'] = tooth_info.tooth_label.map(lambda label: 1 if \" incisor \" in label else 0)\n", - "tooth_info['upper'] = tooth_info.tooth_label.map(lambda label: 1 if \" upper \" in label else 0)\n", - "tooth_info['lower'] = tooth_info.tooth_label.map(lambda label: 1 if \" lower \" in label else 0)\n", - "tooth_info['right'] = tooth_info.tooth_label.map(lambda label: 1 if \"Right \" in label else 0)\n", - "tooth_info['right upper'] = tooth_info.tooth_label.map(lambda label: 1 if \"Right upper \" in label else 0)\n", - "tooth_info['right lower'] = tooth_info.tooth_label.map(lambda label: 1 if \"Right lower \" in label else 0)\n", - "tooth_info['left'] = tooth_info.tooth_label.map(lambda label: 1 if \"Left \" in label else 0)\n", - "tooth_info['left upper'] = tooth_info.tooth_label.map(lambda label: 1 if \"Left upper \" in label else 0)\n", - "tooth_info['left lower'] = tooth_info.tooth_label.map(lambda label: 1 if \"Left lower \" in label else 0)\n", - "tooth_info['posterior'] = tooth_info.tooth.map(lambda tooth: 1 if tooth in posterior_tooth else 0)\n", - "tooth_info['anterior'] = tooth_info.tooth.map(lambda tooth: 1 if tooth in anterior_tooth else 0)\n", - "# tooth_info" + "incisor_surface_counts.set_index('tooth_num', inplace=True)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 253, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "1 66407\n", + "2 61215\n", + "3 37434\n", + "4 21977\n", + "5 3562\n", + "6 1\n", + "dtype: int64" + ] + }, + "execution_count": 253, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "tooth_info.to_csv(\"tooth_meta_info.csv\", index=False) # save tooth info dataframe" + "incisor_surface_counts.sum()" ] }, {