diff --git a/src/analysis/surival_01.ipynb b/src/analysis/surival_01.ipynb
index 0111215..4cef3db 100644
--- a/src/analysis/surival_01.ipynb
+++ b/src/analysis/surival_01.ipynb
@@ -75,13 +75,6 @@
"# df_tooth_surface_procedures.head() # good"
]
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
{
"cell_type": "code",
"execution_count": 6,
@@ -111,13 +104,6 @@
"len(df_patients_teeth) # 1,231,726"
]
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
{
"cell_type": "code",
"execution_count": 8,
@@ -159,13 +145,6 @@
"plt.show()"
]
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
{
"cell_type": "code",
"execution_count": 10,
@@ -234,225 +213,22 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 198,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " tooth_id | \n",
- "
\n",
- " \n",
- " tooth_num | \n",
- " | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 1 | \n",
- " 953 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 2098 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 1831 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 1373 | \n",
- "
\n",
- " \n",
- " 5 | \n",
- " 1226 | \n",
- "
\n",
- " \n",
- " 6 | \n",
- " 624 | \n",
- "
\n",
- " \n",
- " 7 | \n",
- " 842 | \n",
- "
\n",
- " \n",
- " 8 | \n",
- " 740 | \n",
- "
\n",
- " \n",
- " 9 | \n",
- " 772 | \n",
- "
\n",
- " \n",
- " 10 | \n",
- " 817 | \n",
- "
\n",
- " \n",
- " 11 | \n",
- " 603 | \n",
- "
\n",
- " \n",
- " 12 | \n",
- " 1226 | \n",
- "
\n",
- " \n",
- " 13 | \n",
- " 1354 | \n",
- "
\n",
- " \n",
- " 14 | \n",
- " 1819 | \n",
- "
\n",
- " \n",
- " 15 | \n",
- " 2275 | \n",
- "
\n",
- " \n",
- " 16 | \n",
- " 962 | \n",
- "
\n",
- " \n",
- " 17 | \n",
- " 898 | \n",
- "
\n",
- " \n",
- " 18 | \n",
- " 2055 | \n",
- "
\n",
- " \n",
- " 19 | \n",
- " 1941 | \n",
- "
\n",
- " \n",
- " 20 | \n",
- " 1022 | \n",
- "
\n",
- " \n",
- " 21 | \n",
- " 551 | \n",
- "
\n",
- " \n",
- " 22 | \n",
- " 236 | \n",
- "
\n",
- " \n",
- " 23 | \n",
- " 321 | \n",
- "
\n",
- " \n",
- " 24 | \n",
- " 336 | \n",
- "
\n",
- " \n",
- " 25 | \n",
- " 368 | \n",
- "
\n",
- " \n",
- " 26 | \n",
- " 316 | \n",
- "
\n",
- " \n",
- " 27 | \n",
- " 240 | \n",
- "
\n",
- " \n",
- " 28 | \n",
- " 598 | \n",
- "
\n",
- " \n",
- " 29 | \n",
- " 1051 | \n",
- "
\n",
- " \n",
- " 30 | \n",
- " 1968 | \n",
- "
\n",
- " \n",
- " 31 | \n",
- " 2186 | \n",
- "
\n",
- " \n",
- " 32 | \n",
- " 875 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " tooth_id\n",
- "tooth_num \n",
- "1 953\n",
- "2 2098\n",
- "3 1831\n",
- "4 1373\n",
- "5 1226\n",
- "6 624\n",
- "7 842\n",
- "8 740\n",
- "9 772\n",
- "10 817\n",
- "11 603\n",
- "12 1226\n",
- "13 1354\n",
- "14 1819\n",
- "15 2275\n",
- "16 962\n",
- "17 898\n",
- "18 2055\n",
- "19 1941\n",
- "20 1022\n",
- "21 551\n",
- "22 236\n",
- "23 321\n",
- "24 336\n",
- "25 368\n",
- "26 316\n",
- "27 240\n",
- "28 598\n",
- "29 1051\n",
- "30 1968\n",
- "31 2186\n",
- "32 875"
- ]
- },
- "execution_count": 14,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"df_extracted = df_obs[df_obs.extract_date.notnull()][['tooth_id','tooth_num']]\n",
"# df_extracted = df_obs[df_obs.extract_date.notna()][['tooth_id','tooth_num']]\n",
"df_extracted.tooth_num = pds.to_numeric(df_extracted.tooth_num)\n",
"df_extracted.drop_duplicates(inplace=True)\n",
- "df_extracted.head()\n",
- "df_extracted.groupby('tooth_num').count()\n",
+ "# df_extracted.head()\n",
+ "# df_extracted.groupby('tooth_num').count()\n",
"# len(df_extracted) # isnull -> 1,617,783; isna -> 1,617,783; notnull -> 41,331; notna -> 41,331"
]
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": 199,
"metadata": {},
"outputs": [
{
@@ -472,48 +248,6 @@
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
" # Remove the CWD from sys.path while we load stuff.\n"
]
- },
- {
- "data": {
- "text/plain": [
- "15 2275\n",
- "31 2186\n",
- "2 2098\n",
- "18 2055\n",
- "30 1968\n",
- "19 1941\n",
- "3 1831\n",
- "14 1819\n",
- "4 1373\n",
- "13 1354\n",
- "5 1226\n",
- "12 1226\n",
- "29 1051\n",
- "20 1022\n",
- "16 962\n",
- "1 953\n",
- "17 898\n",
- "32 875\n",
- "7 842\n",
- "10 817\n",
- "9 772\n",
- "8 740\n",
- "6 624\n",
- "11 603\n",
- "28 598\n",
- "21 551\n",
- "25 368\n",
- "24 336\n",
- "23 321\n",
- "26 316\n",
- "27 240\n",
- "22 236\n",
- "Name: tooth_num, dtype: int64"
- ]
- },
- "execution_count": 15,
- "metadata": {},
- "output_type": "execute_result"
}
],
"source": [
@@ -527,7 +261,7 @@
"print(len(temp3))\n",
"# temp3.groupby('tooth_num').size()\n",
"temp3.drop_duplicates(inplace=True)\n",
- "temp3.tooth_num.value_counts()\n",
+ "# temp3.tooth_num.value_counts()\n",
"# print(len(temp3))\n",
"# temp2.head()\n",
"# temp3.groupby('tooth_num').size()\n",
@@ -539,100 +273,17 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": 200,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " tooth_num | \n",
- " tooth_id | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 275 | \n",
- " 1 | \n",
- " A_1_1_1_1 | \n",
- "
\n",
- " \n",
- " 1084 | \n",
- " 1 | \n",
- " A_1_1_149_1 | \n",
- "
\n",
- " \n",
- " 8836 | \n",
- " 1 | \n",
- " A_1_1_2946_1 | \n",
- "
\n",
- " \n",
- " 13571 | \n",
- " 1 | \n",
- " A_1_1_3302_1 | \n",
- "
\n",
- " \n",
- " 15481 | \n",
- " 1 | \n",
- " A_1_1_3046_1 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " tooth_num tooth_id\n",
- "275 1 A_1_1_1_1\n",
- "1084 1 A_1_1_149_1\n",
- "8836 1 A_1_1_2946_1\n",
- "13571 1 A_1_1_3302_1\n",
- "15481 1 A_1_1_3046_1"
- ]
- },
- "execution_count": 16,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
- "temp4 = temp3.query(\"tooth_num == '1'\")\n",
- "temp4.head()"
+ "# temp4 = temp3.query(\"tooth_num == '1'\")\n",
+ "# temp4.head()"
]
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
- {
- "cell_type": "code",
- "execution_count": 17,
+ "execution_count": 201,
"metadata": {},
"outputs": [
{
@@ -642,7 +293,7 @@
"Index(['practice', 'patient_id', 'gender', 'dob', 'first_visit', 'last_visit',\n",
" 'tooth_id', 'tooth_num', 'first_PCR', 'first_RCT', 'extract_date',\n",
" 'missing_date', 'event_id', 'event_name', 'event_date', 'ada_code', 'm',\n",
- " 'o', 'd', 'b', 'l', 'f', 'i'],\n",
+ " 'o', 'd', 'b', 'l', 'f', 'i', 'patient_age', 'missing_flag'],\n",
" dtype='object')\n"
]
}
@@ -726,27 +377,6 @@
"# df_demographics.head()"
]
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
{
"cell_type": "code",
"execution_count": 23,
@@ -858,161 +488,13 @@
},
{
"cell_type": "code",
- "execution_count": 27,
+ "execution_count": 202,
"metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/usr/local/anaconda3/lib/python3.6/site-packages/pandas/core/generic.py:4405: SettingWithCopyWarning: \n",
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
- "\n",
- "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
- " self[name] = value\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " tooth_id | \n",
- " tooth_num | \n",
- " ada_code | \n",
- " event_name | \n",
- " m | \n",
- " o | \n",
- " d | \n",
- " b | \n",
- " l | \n",
- " f | \n",
- " i | \n",
- " num_restored_surfaces | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 10 | \n",
- " A_1_1_155_15 | \n",
- " 15 | \n",
- " D2150 | \n",
- " amalgam filling restoration procedure | \n",
- " 1.0 | \n",
- " 1.0 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 2.0 | \n",
- "
\n",
- " \n",
- " 14 | \n",
- " A_1_1_155_31 | \n",
- " 31 | \n",
- " D2391 | \n",
- " resin filling restoration procedure | \n",
- " 0.0 | \n",
- " 1.0 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 1.0 | \n",
- "
\n",
- " \n",
- " 18 | \n",
- " A_1_1_155_30 | \n",
- " 30 | \n",
- " D2391 | \n",
- " resin filling restoration procedure | \n",
- " 0.0 | \n",
- " 1.0 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 1.0 | \n",
- "
\n",
- " \n",
- " 26 | \n",
- " A_1_1_155_20 | \n",
- " 20 | \n",
- " D2150 | \n",
- " amalgam filling restoration procedure | \n",
- " 0.0 | \n",
- " 1.0 | \n",
- " 1.0 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 2.0 | \n",
- "
\n",
- " \n",
- " 27 | \n",
- " A_1_1_155_8 | \n",
- " 8 | \n",
- " D2330 | \n",
- " resin filling restoration procedure | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- " 1.0 | \n",
- " 1.0 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " tooth_id tooth_num ada_code event_name \\\n",
- "10 A_1_1_155_15 15 D2150 amalgam filling restoration procedure \n",
- "14 A_1_1_155_31 31 D2391 resin filling restoration procedure \n",
- "18 A_1_1_155_30 30 D2391 resin filling restoration procedure \n",
- "26 A_1_1_155_20 20 D2150 amalgam filling restoration procedure \n",
- "27 A_1_1_155_8 8 D2330 resin filling restoration procedure \n",
- "\n",
- " m o d b l f i num_restored_surfaces \n",
- "10 1.0 1.0 0.0 0.0 0.0 0.0 0.0 2.0 \n",
- "14 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 \n",
- "18 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 \n",
- "26 0.0 1.0 1.0 0.0 0.0 0.0 0.0 2.0 \n",
- "27 0.0 0.0 0.0 0.0 0.0 0.0 1.0 1.0 "
- ]
- },
- "execution_count": 27,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"df_fillings.num_restored_surfaces = pds.to_numeric(df_fillings.num_restored_surfaces)\n",
"df_fillings = df_fillings[df_fillings.num_restored_surfaces > 0]\n",
- "df_fillings.head()"
+ "# df_fillings.head()"
]
},
{
@@ -1026,53 +508,9 @@
},
{
"cell_type": "code",
- "execution_count": 29,
+ "execution_count": 204,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "tooth_num\n",
- "1 5557\n",
- "2 70930\n",
- "3 83304\n",
- "4 47524\n",
- "5 41943\n",
- "6 26819\n",
- "7 31896\n",
- "8 41835\n",
- "9 40930\n",
- "10 31771\n",
- "11 27314\n",
- "12 41213\n",
- "13 45505\n",
- "14 82109\n",
- "15 73201\n",
- "16 5720\n",
- "17 8883\n",
- "18 77981\n",
- "19 80683\n",
- "20 44953\n",
- "21 27576\n",
- "22 13604\n",
- "23 10007\n",
- "24 12255\n",
- "25 11986\n",
- "26 9916\n",
- "27 13670\n",
- "28 27844\n",
- "29 45195\n",
- "30 82534\n",
- "31 77107\n",
- "32 8840\n",
- "Name: tooth_num, dtype: int64"
- ]
- },
- "execution_count": 29,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"# find number of procedures per tooth\n",
"# tooth_counts = pds.DataFrame(df_fillings.groupby('tooth_num')['tooth_num'].count())\n",
@@ -1081,12 +519,12 @@
"tooth_counts.columns = ['count']\n",
"tooth_counts.index = pds.to_numeric(tooth_counts.index)\n",
"tooth_counts.sort_index(inplace=True) # sort data by tooth number 1 -> 32\n",
- "tooth_counts"
+ "# tooth_counts"
]
},
{
"cell_type": "code",
- "execution_count": 30,
+ "execution_count": 205,
"metadata": {},
"outputs": [
{
@@ -1202,97 +640,22 @@
"# note the use of engine=\"python\"\n",
"# df_extracted = df_obs[df_obs.extract_date.notnull()]\n",
"df_extracted = df_obs[~np.isnat(df_obs.extract_date)][['tooth_id','tooth_num','extract_date']]\n",
- "# df_extracted = df_obs[~np.isnat(df_obs.extract_date)][['tooth_id','tooth_num']]\n",
- "df_extracted.tooth_num = pds.to_numeric(df_extracted.tooth_num)\n",
- "df_extracted.drop_duplicates(inplace=True)\n",
- "df_extracted.head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 32,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " tooth_num | \n",
- " tooth_id | \n",
- " extract_date | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 1 | \n",
- " A_1_1_1_1 | \n",
- " 2003-05-16T00:00:00 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 1 | \n",
- " A_1_1_2946_1 | \n",
- " 2012-01-30T00:00:00 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 1 | \n",
- " A_1_1_149_1 | \n",
- " 2015-01-06T00:00:00 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 1 | \n",
- " A_1_1_3046_1 | \n",
- " 2014-11-26T00:00:00 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 1 | \n",
- " A_1_1_3302_1 | \n",
- " 2010-08-30T00:00:00 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " tooth_num tooth_id extract_date\n",
- "0 1 A_1_1_1_1 2003-05-16T00:00:00\n",
- "1 1 A_1_1_2946_1 2012-01-30T00:00:00\n",
- "2 1 A_1_1_149_1 2015-01-06T00:00:00\n",
- "3 1 A_1_1_3046_1 2014-11-26T00:00:00\n",
- "4 1 A_1_1_3302_1 2010-08-30T00:00:00"
- ]
- },
- "execution_count": 32,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "# df_extracted = df_obs[~np.isnat(df_obs.extract_date)][['tooth_id','tooth_num']]\n",
+ "df_extracted.tooth_num = pds.to_numeric(df_extracted.tooth_num)\n",
+ "df_extracted.drop_duplicates(inplace=True)\n",
+ "df_extracted.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 206,
+ "metadata": {},
+ "outputs": [],
"source": [
"# query results from triplestore\n",
"df_tooth1 = pds.read_csv('extracted_tooth_1_no_es.csv')\n",
"df_tooth1.drop_duplicates(inplace=True)\n",
- "df_tooth1.head()"
+ "# df_tooth1.head()"
]
},
{
@@ -1318,7 +681,7 @@
},
{
"cell_type": "code",
- "execution_count": 34,
+ "execution_count": 207,
"metadata": {},
"outputs": [
{
@@ -1328,80 +691,6 @@
"/usr/local/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:3: UserWarning: Boolean Series key will be reindexed to match DataFrame index.\n",
" This is separate from the ipykernel package so we can avoid doing imports until\n"
]
- },
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " tooth_id | \n",
- " tooth_num | \n",
- " extract_date | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 275 | \n",
- " A_1_1_1_1 | \n",
- " 1 | \n",
- " 2003-05-16 | \n",
- "
\n",
- " \n",
- " 1084 | \n",
- " A_1_1_149_1 | \n",
- " 1 | \n",
- " 2015-01-06 | \n",
- "
\n",
- " \n",
- " 8836 | \n",
- " A_1_1_2946_1 | \n",
- " 1 | \n",
- " 2012-01-30 | \n",
- "
\n",
- " \n",
- " 13571 | \n",
- " A_1_1_3302_1 | \n",
- " 1 | \n",
- " 2010-08-30 | \n",
- "
\n",
- " \n",
- " 15481 | \n",
- " A_1_1_3046_1 | \n",
- " 1 | \n",
- " 2014-11-26 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " tooth_id tooth_num extract_date\n",
- "275 A_1_1_1_1 1 2003-05-16\n",
- "1084 A_1_1_149_1 1 2015-01-06\n",
- "8836 A_1_1_2946_1 1 2012-01-30\n",
- "13571 A_1_1_3302_1 1 2010-08-30\n",
- "15481 A_1_1_3046_1 1 2014-11-26"
- ]
- },
- "execution_count": 34,
- "metadata": {},
- "output_type": "execute_result"
}
],
"source": [
@@ -1409,7 +698,7 @@
"temp = df_obs.query(\"tooth_num == '1'\")\n",
"extract = temp[df_obs.extract_date.notnull()][['tooth_id','tooth_num', 'extract_date']]\n",
"extract.drop_duplicates(inplace=True)\n",
- "extract.head()"
+ "# extract.head()"
]
},
{
@@ -1770,20 +1059,6 @@
"df_missing[df_missing.missing_flag == 0].head() # should be none ..."
]
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
{
"cell_type": "code",
"execution_count": 49,
@@ -1839,7 +1114,7 @@
},
{
"cell_type": "code",
- "execution_count": 52,
+ "execution_count": 208,
"metadata": {},
"outputs": [],
"source": [
@@ -1852,7 +1127,7 @@
},
{
"cell_type": "code",
- "execution_count": 53,
+ "execution_count": 210,
"metadata": {},
"outputs": [],
"source": [
@@ -1877,7 +1152,7 @@
},
{
"cell_type": "code",
- "execution_count": 55,
+ "execution_count": 211,
"metadata": {},
"outputs": [],
"source": [
@@ -1895,7 +1170,7 @@
},
{
"cell_type": "code",
- "execution_count": 90,
+ "execution_count": 212,
"metadata": {},
"outputs": [
{
@@ -1925,7 +1200,7 @@
},
{
"cell_type": "code",
- "execution_count": 91,
+ "execution_count": 213,
"metadata": {},
"outputs": [
{
@@ -1958,7 +1233,129 @@
},
{
"cell_type": "code",
- "execution_count": 95,
+ "execution_count": 188,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# add some meta information about teeth\n",
+ "# tooth_info = pds.read_csv(\"tooth_numbers_and_labels.csv\")\n",
+ "# tooth_info"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 190,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# add meta info about tooth type and region in mouth\n",
+ "# posterior_tooth = [1, 2, 3, 4, 5, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 28, 29, 30, 31, 32]\n",
+ "# anterior_tooth = [6, 7, 8, 9, 10, 11, 22, 23, 24, 25, 26, 27]\n",
+ "\n",
+ "# tooth_info['molar'] = tooth_info.tooth_label.map(lambda label: 1 if \" molar \" in label else 0)\n",
+ "# tooth_info['premolar'] = tooth_info.tooth_label.map(lambda label: 1 if \" premolar \" in label else 0)\n",
+ "# tooth_info['canine'] = tooth_info.tooth_label.map(lambda label: 1 if \" canine \" in label else 0)\n",
+ "# tooth_info['incisor'] = tooth_info.tooth_label.map(lambda label: 1 if \" incisor \" in label else 0)\n",
+ "# tooth_info['upper'] = tooth_info.tooth_label.map(lambda label: 1 if \" upper \" in label else 0)\n",
+ "# tooth_info['lower'] = tooth_info.tooth_label.map(lambda label: 1 if \" lower \" in label else 0)\n",
+ "# tooth_info['right'] = tooth_info.tooth_label.map(lambda label: 1 if \"Right \" in label else 0)\n",
+ "# tooth_info['right upper'] = tooth_info.tooth_label.map(lambda label: 1 if \"Right upper \" in label else 0)\n",
+ "# tooth_info['right lower'] = tooth_info.tooth_label.map(lambda label: 1 if \"Right lower \" in label else 0)\n",
+ "# tooth_info['left'] = tooth_info.tooth_label.map(lambda label: 1 if \"Left \" in label else 0)\n",
+ "# tooth_info['left upper'] = tooth_info.tooth_label.map(lambda label: 1 if \"Left upper \" in label else 0)\n",
+ "# tooth_info['left lower'] = tooth_info.tooth_label.map(lambda label: 1 if \"Left lower \" in label else 0)\n",
+ "# tooth_info['posterior'] = tooth_info.tooth.map(lambda tooth: 1 if tooth in posterior_tooth else 0)\n",
+ "# tooth_info['anterior'] = tooth_info.tooth.map(lambda tooth: 1 if tooth in anterior_tooth else 0)\n",
+ "# tooth_info"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 214,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# tooth_info.to_csv(\"tooth_meta_info.csv\", index=False) # save tooth info dataframe"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 192,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# tooth_info = pds.read_csv(\"tooth_meta_info.csv\", index_col='tooth')\n",
+ "# tooth_info = pds.read_csv(\"tooth_meta_info.csv\")\n",
+ "# tooth_info.set_index('tooth', inplace=True)\n",
+ "# tooth_info"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 222,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# restored_surface_counts.reset_index(inplace=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 228,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# restored_surface_counts.drop(columns='index', inplace=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 231,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# restored_surface_counts.columns.name = None"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 235,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# restored_surface_counts.tooth_num = restored_surface_counts.tooth_num.astype(int)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 239,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# restored_surface_counts = restored_surface_counts.merge(tooth_info, left_on='tooth_num', right_on='tooth', how='left')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 241,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# restored_surface_counts"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 245,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "incisor_surface_counts = restored_surface_counts.query('incisor == 1')[['tooth_num', '1', '2', '3', '4', '5', '6']]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 249,
"metadata": {},
"outputs": [
{
@@ -1982,256 +1379,161 @@
" \n",
" \n",
" | \n",
- " tooth | \n",
- " tooth_label | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " 1 | \n",
- " Right upper third secondary molar tooth | \n",
- "
\n",
- " \n",
+ " tooth_num | \n",
" 1 | \n",
- " 2 | \n",
- " Right upper second secondary molar tooth | \n",
- "
\n",
- " \n",
" 2 | \n",
- " 3 | \n",
- " Right upper first secondary molar tooth | \n",
- "
\n",
- " \n",
" 3 | \n",
- " 4 | \n",
- " Right upper second secondary premolar tooth | \n",
- "
\n",
- " \n",
" 4 | \n",
- " 5 | \n",
- " Right upper first secondary premolar tooth | \n",
- "
\n",
- " \n",
" 5 | \n",
- " 6 | \n",
- " Right upper secondary canine tooth | \n",
- "
\n",
- " \n",
" 6 | \n",
- " 7 | \n",
- " Right upper lateral secondary incisor tooth | \n",
- "
\n",
- " \n",
- " 7 | \n",
- " 8 | \n",
- " Right upper central secondary incisor tooth | \n",
- "
\n",
- " \n",
- " 8 | \n",
- " 9 | \n",
- " Left upper central secondary incisor tooth | \n",
"
\n",
+ " \n",
+ " \n",
" \n",
- " 9 | \n",
+ " 1 | \n",
" 10 | \n",
- " Left upper lateral secondary incisor tooth | \n",
- "
\n",
- " \n",
- " 10 | \n",
- " 11 | \n",
- " Left upper secondary canine tooth | \n",
- "
\n",
- " \n",
- " 11 | \n",
- " 12 | \n",
- " Left upper first secondary premolar tooth | \n",
- "
\n",
- " \n",
- " 12 | \n",
- " 13 | \n",
- " Left upper second secondary premolar tooth | \n",
- "
\n",
- " \n",
- " 13 | \n",
- " 14 | \n",
- " Left upper first secondary molar tooth | \n",
- "
\n",
- " \n",
- " 14 | \n",
- " 15 | \n",
- " Left upper second secondary molar tooth | \n",
+ " 11438 | \n",
+ " 10234 | \n",
+ " 6438 | \n",
+ " 3140 | \n",
+ " 521 | \n",
+ " 0 | \n",
"
\n",
" \n",
" 15 | \n",
- " 16 | \n",
- " Left upper third secondary molar tooth | \n",
- "
\n",
- " \n",
- " 16 | \n",
- " 17 | \n",
- " Left lower third secondary molar tooth | \n",
- "
\n",
- " \n",
- " 17 | \n",
- " 18 | \n",
- " Left lower second secondary molar tooth | \n",
- "
\n",
- " \n",
- " 18 | \n",
- " 19 | \n",
- " Left lower first secondary molar tooth | \n",
- "
\n",
- " \n",
- " 19 | \n",
- " 20 | \n",
- " Left lower second secondary premolar tooth | \n",
- "
\n",
- " \n",
- " 20 | \n",
- " 21 | \n",
- " Left lower first secondary premolar tooth | \n",
- "
\n",
- " \n",
- " 21 | \n",
- " 22 | \n",
- " Left lower secondary canine tooth | \n",
- "
\n",
- " \n",
- " 22 | \n",
" 23 | \n",
- " Left lower lateral secondary incisor tooth | \n",
+ " 4365 | \n",
+ " 3089 | \n",
+ " 1720 | \n",
+ " 727 | \n",
+ " 106 | \n",
+ " 0 | \n",
"
\n",
" \n",
- " 23 | \n",
+ " 16 | \n",
" 24 | \n",
- " Left lower central secondary incisor tooth | \n",
+ " 5125 | \n",
+ " 3697 | \n",
+ " 2083 | \n",
+ " 1109 | \n",
+ " 240 | \n",
+ " 1 | \n",
"
\n",
" \n",
- " 24 | \n",
+ " 17 | \n",
" 25 | \n",
- " Right lower central secondary incisor tooth | \n",
+ " 4806 | \n",
+ " 3808 | \n",
+ " 2087 | \n",
+ " 1082 | \n",
+ " 203 | \n",
+ " 0 | \n",
"
\n",
" \n",
- " 25 | \n",
+ " 18 | \n",
" 26 | \n",
- " Right lower lateral secondary incisor tooth | \n",
- "
\n",
- " \n",
- " 26 | \n",
- " 27 | \n",
- " Right lower secondary canine tooth | \n",
- "
\n",
- " \n",
- " 27 | \n",
- " 28 | \n",
- " Right lower first secondary premolar tooth | \n",
- "
\n",
- " \n",
- " 28 | \n",
- " 29 | \n",
- " Right lower second secondary premolar tooth | \n",
+ " 4194 | \n",
+ " 3247 | \n",
+ " 1695 | \n",
+ " 665 | \n",
+ " 115 | \n",
+ " 0 | \n",
"
\n",
" \n",
" 29 | \n",
- " 30 | \n",
- " Right lower first secondary molar tooth | \n",
+ " 7 | \n",
+ " 11249 | \n",
+ " 10399 | \n",
+ " 6536 | \n",
+ " 3214 | \n",
+ " 498 | \n",
+ " 0 | \n",
"
\n",
" \n",
" 30 | \n",
- " 31 | \n",
- " Right lower second secondary molar tooth | \n",
+ " 8 | \n",
+ " 12742 | \n",
+ " 13514 | \n",
+ " 8559 | \n",
+ " 6064 | \n",
+ " 956 | \n",
+ " 0 | \n",
"
\n",
" \n",
" 31 | \n",
- " 32 | \n",
- " Right lower third secondary molar tooth | \n",
+ " 9 | \n",
+ " 12488 | \n",
+ " 13227 | \n",
+ " 8316 | \n",
+ " 5976 | \n",
+ " 923 | \n",
+ " 0 | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- " tooth tooth_label\n",
- "0 1 Right upper third secondary molar tooth\n",
- "1 2 Right upper second secondary molar tooth\n",
- "2 3 Right upper first secondary molar tooth\n",
- "3 4 Right upper second secondary premolar tooth\n",
- "4 5 Right upper first secondary premolar tooth\n",
- "5 6 Right upper secondary canine tooth\n",
- "6 7 Right upper lateral secondary incisor tooth\n",
- "7 8 Right upper central secondary incisor tooth\n",
- "8 9 Left upper central secondary incisor tooth\n",
- "9 10 Left upper lateral secondary incisor tooth\n",
- "10 11 Left upper secondary canine tooth\n",
- "11 12 Left upper first secondary premolar tooth\n",
- "12 13 Left upper second secondary premolar tooth\n",
- "13 14 Left upper first secondary molar tooth\n",
- "14 15 Left upper second secondary molar tooth\n",
- "15 16 Left upper third secondary molar tooth\n",
- "16 17 Left lower third secondary molar tooth\n",
- "17 18 Left lower second secondary molar tooth\n",
- "18 19 Left lower first secondary molar tooth\n",
- "19 20 Left lower second secondary premolar tooth\n",
- "20 21 Left lower first secondary premolar tooth\n",
- "21 22 Left lower secondary canine tooth\n",
- "22 23 Left lower lateral secondary incisor tooth\n",
- "23 24 Left lower central secondary incisor tooth\n",
- "24 25 Right lower central secondary incisor tooth\n",
- "25 26 Right lower lateral secondary incisor tooth\n",
- "26 27 Right lower secondary canine tooth\n",
- "27 28 Right lower first secondary premolar tooth\n",
- "28 29 Right lower second secondary premolar tooth\n",
- "29 30 Right lower first secondary molar tooth\n",
- "30 31 Right lower second secondary molar tooth\n",
- "31 32 Right lower third secondary molar tooth"
+ " tooth_num 1 2 3 4 5 6\n",
+ "1 10 11438 10234 6438 3140 521 0\n",
+ "15 23 4365 3089 1720 727 106 0\n",
+ "16 24 5125 3697 2083 1109 240 1\n",
+ "17 25 4806 3808 2087 1082 203 0\n",
+ "18 26 4194 3247 1695 665 115 0\n",
+ "29 7 11249 10399 6536 3214 498 0\n",
+ "30 8 12742 13514 8559 6064 956 0\n",
+ "31 9 12488 13227 8316 5976 923 0"
]
},
- "execution_count": 95,
+ "execution_count": 249,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "# add some meta information about teeth\n",
- "tooth_info = pds.read_csv(\"tooth_numbers_and_labels.csv\")\n",
- "tooth_info"
+ "incisor_surface_counts"
]
},
{
"cell_type": "code",
- "execution_count": 109,
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 252,
"metadata": {},
"outputs": [],
"source": [
- "# add meta info about tooth type and region in mouth\n",
- "posterior_tooth = [1, 2, 3, 4, 5, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 28, 29, 30, 31, 32]\n",
- "anterior_tooth = [6, 7, 8, 9, 10, 11, 22, 23, 24, 25, 26, 27]\n",
- "\n",
- "tooth_info['molar'] = tooth_info.tooth_label.map(lambda label: 1 if \" molar \" in label else 0)\n",
- "tooth_info['premolar'] = tooth_info.tooth_label.map(lambda label: 1 if \" premolar \" in label else 0)\n",
- "tooth_info['canine'] = tooth_info.tooth_label.map(lambda label: 1 if \" canine \" in label else 0)\n",
- "tooth_info['incisor'] = tooth_info.tooth_label.map(lambda label: 1 if \" incisor \" in label else 0)\n",
- "tooth_info['upper'] = tooth_info.tooth_label.map(lambda label: 1 if \" upper \" in label else 0)\n",
- "tooth_info['lower'] = tooth_info.tooth_label.map(lambda label: 1 if \" lower \" in label else 0)\n",
- "tooth_info['right'] = tooth_info.tooth_label.map(lambda label: 1 if \"Right \" in label else 0)\n",
- "tooth_info['right upper'] = tooth_info.tooth_label.map(lambda label: 1 if \"Right upper \" in label else 0)\n",
- "tooth_info['right lower'] = tooth_info.tooth_label.map(lambda label: 1 if \"Right lower \" in label else 0)\n",
- "tooth_info['left'] = tooth_info.tooth_label.map(lambda label: 1 if \"Left \" in label else 0)\n",
- "tooth_info['left upper'] = tooth_info.tooth_label.map(lambda label: 1 if \"Left upper \" in label else 0)\n",
- "tooth_info['left lower'] = tooth_info.tooth_label.map(lambda label: 1 if \"Left lower \" in label else 0)\n",
- "tooth_info['posterior'] = tooth_info.tooth.map(lambda tooth: 1 if tooth in posterior_tooth else 0)\n",
- "tooth_info['anterior'] = tooth_info.tooth.map(lambda tooth: 1 if tooth in anterior_tooth else 0)\n",
- "# tooth_info"
+ "incisor_surface_counts.set_index('tooth_num', inplace=True)"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 253,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "1 66407\n",
+ "2 61215\n",
+ "3 37434\n",
+ "4 21977\n",
+ "5 3562\n",
+ "6 1\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 253,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "tooth_info.to_csv(\"tooth_meta_info.csv\", index=False) # save tooth info dataframe"
+ "incisor_surface_counts.sum()"
]
},
{