diff --git a/scripts/tp53_phenocopy.ipynb b/scripts/tp53_phenocopy.ipynb
new file mode 100644
index 0000000..9337f46
--- /dev/null
+++ b/scripts/tp53_phenocopy.ipynb
@@ -0,0 +1,808 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# TP53 Phenocopying Events\n",
+ "\n",
+ "In the following script, we visualize and determine if specific gene events phenocopy TP53 loss of function. We focus on _MDM2_, _MDM4_, and _PPM1D_ amplifications, _CDKN2A_ deletions, and _ATM_, _CHEK1_, _CHEK2_, _ATR_, and _RPS6KA3_ mutations. \n",
+ "\n",
+ "We first filter out samples with _TP53_ mutations and deep deletions as well as hypermutated samples. We then visualize the difference of the TP53 classifier score across tumors with the given events above. We perform t-tests to determine if the classifier scores are significantly different in the direction of a priori assumptions."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "from decimal import Decimal\n",
+ "from scipy.stats import ttest_ind\n",
+ "\n",
+ "import matplotlib.pyplot as plt\n",
+ "import seaborn as sns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "np.random.seed(123)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "sns.set(style='whitegrid')\n",
+ "sns.set_context('paper', rc={'font.size':11, 'axes.titlesize':11, 'axes.labelsize':16,\n",
+ " 'xtick.labelsize':11, 'ytick.labelsize':11})"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": [
+ "# genes of interest\n",
+ "amplifications = ['MDM2', 'MDM4', 'PPM1D']\n",
+ "deletions = ['CDKN2A']\n",
+ "mutations = ['ATM', 'CHEK1', 'CHEK2', 'ATR', 'RPS6KA3']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Load mutation, copy number, and sample freeze data\n",
+ "mut_file = os.path.join('..', 'data', 'pancan_mutation_freeze.tsv')\n",
+ "gistic_file = os.path.join('..', 'data', 'raw', 'pancan_GISTIC_threshold.tsv')\n",
+ "sample_freeze_file = os.path.join('..', 'data', 'sample_freeze.tsv')\n",
+ "\n",
+ "mutation_df = pd.read_table(mut_file, index_col=0)\n",
+ "copy_df = pd.read_table(gistic_file, index_col=0)\n",
+ "sample_freeze_df = pd.read_table(sample_freeze_file)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " log10_mut | \n",
+ " total_status | \n",
+ " weight | \n",
+ " TP53 | \n",
+ " TP53_loss | \n",
+ " PATIENT_BARCODE | \n",
+ " DISEASE | \n",
+ " SUBTYPE | \n",
+ " hypermutated | \n",
+ " include | \n",
+ " Hugo_Symbol | \n",
+ " HGVSc | \n",
+ " HGVSp | \n",
+ " Variant_Classification | \n",
+ "
\n",
+ " \n",
+ " ID | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " TCGA-02-0047-01 | \n",
+ " 1.812913 | \n",
+ " 0.0 | \n",
+ " 0.723395 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " TCGA-02-0047 | \n",
+ " GBM | \n",
+ " IDHwt | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Wild-Type | \n",
+ "
\n",
+ " \n",
+ " TCGA-02-0055-01 | \n",
+ " 1.707570 | \n",
+ " 1.0 | \n",
+ " 0.698910 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " TCGA-02-0055 | \n",
+ " GBM | \n",
+ " IDHwt | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " TP53 | \n",
+ " c.646G>A | \n",
+ " p.Val216Met | \n",
+ " Missense_Mutation | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " log10_mut total_status weight TP53 TP53_loss \\\n",
+ "ID \n",
+ "TCGA-02-0047-01 1.812913 0.0 0.723395 0.0 0.0 \n",
+ "TCGA-02-0055-01 1.707570 1.0 0.698910 1.0 0.0 \n",
+ "\n",
+ " PATIENT_BARCODE DISEASE SUBTYPE hypermutated include \\\n",
+ "ID \n",
+ "TCGA-02-0047-01 TCGA-02-0047 GBM IDHwt 0.0 1.0 \n",
+ "TCGA-02-0055-01 TCGA-02-0055 GBM IDHwt 0.0 1.0 \n",
+ "\n",
+ " Hugo_Symbol HGVSc HGVSp Variant_Classification \n",
+ "ID \n",
+ "TCGA-02-0047-01 NaN NaN NaN Wild-Type \n",
+ "TCGA-02-0055-01 TP53 c.646G>A p.Val216Met Missense_Mutation "
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# The prediction file stores the TP53 classifier scores for all samples\n",
+ "prediction_file = os.path.join('..', 'classifiers', 'TP53', 'tables',\n",
+ " 'TP53_loss_prediction_scores.tsv')\n",
+ "prediction_df = pd.read_table(prediction_file, index_col=0)\n",
+ "prediction_df.head(2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " Gene Symbol | \n",
+ " ACAP3 | \n",
+ " ACTRT2 | \n",
+ " AGRN | \n",
+ " ANKRD65 | \n",
+ " ATAD3A | \n",
+ " ATAD3B | \n",
+ " ATAD3C | \n",
+ " AURKAIP1 | \n",
+ " B3GALT6 | \n",
+ " C1orf159 | \n",
+ " ... | \n",
+ " RP13-228J13.1 | \n",
+ " SMIM9 | \n",
+ " SNORA36A | \n",
+ " SNORA56 | \n",
+ " TMLHE | \n",
+ " VBP1 | \n",
+ " DDX11L16|chrX | \n",
+ " IL9R|chrX | \n",
+ " SPRY3|chrX | \n",
+ " VAMP7|chrX | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " TCGA-02-0047-01 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " TCGA-02-0055-01 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " ... | \n",
+ " -1 | \n",
+ " -1 | \n",
+ " -1 | \n",
+ " -1 | \n",
+ " -1 | \n",
+ " -1 | \n",
+ " -1 | \n",
+ " -1 | \n",
+ " -1 | \n",
+ " -1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
2 rows × 25128 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ "Gene Symbol ACAP3 ACTRT2 AGRN ANKRD65 ATAD3A ATAD3B ATAD3C \\\n",
+ "TCGA-02-0047-01 0 0 0 0 0 0 0 \n",
+ "TCGA-02-0055-01 1 1 1 1 1 1 1 \n",
+ "\n",
+ "Gene Symbol AURKAIP1 B3GALT6 C1orf159 ... RP13-228J13.1 \\\n",
+ "TCGA-02-0047-01 0 0 0 ... 0 \n",
+ "TCGA-02-0055-01 1 1 1 ... -1 \n",
+ "\n",
+ "Gene Symbol SMIM9 SNORA36A SNORA56 TMLHE VBP1 DDX11L16|chrX \\\n",
+ "TCGA-02-0047-01 0 0 0 0 0 0 \n",
+ "TCGA-02-0055-01 -1 -1 -1 -1 -1 -1 \n",
+ "\n",
+ "Gene Symbol IL9R|chrX SPRY3|chrX VAMP7|chrX \n",
+ "TCGA-02-0047-01 0 0 0 \n",
+ "TCGA-02-0055-01 -1 -1 -1 \n",
+ "\n",
+ "[2 rows x 25128 columns]"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Process and subset the GISTIC copy number data\n",
+ "# We do not use the preprocessed copy number data because we are also \n",
+ "# interested in intermediate copy number events\n",
+ "copy_df.drop(['Locus ID', 'Cytoband'], axis=1, inplace=True)\n",
+ "copy_df.columns = copy_df.columns.str[0:15]\n",
+ "\n",
+ "copy_df = copy_df.T\n",
+ "copy_df = copy_df.loc[sorted(sample_freeze_df['SAMPLE_BARCODE'])]\n",
+ "copy_df = copy_df.fillna(0)\n",
+ "copy_df = copy_df.astype(int)\n",
+ "copy_df.head(2)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Subset corresponding data type according to predicted phenocopying mechanism"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "del_phenocopy_df = copy_df.loc[:, deletions]\n",
+ "del_phenocopy_df = del_phenocopy_df.reset_index().melt(id_vars=['index'])\n",
+ "del_phenocopy_df.columns = ['sample_id', 'gene', 'value']\n",
+ "del_phenocopy_df = del_phenocopy_df.assign(event = 'Deletion')\n",
+ "\n",
+ "amp_phenocopy_df = copy_df.loc[:, amplifications]\n",
+ "amp_phenocopy_df = amp_phenocopy_df.reset_index().melt(id_vars=['index'])\n",
+ "amp_phenocopy_df.columns = ['sample_id', 'gene', 'value']\n",
+ "amp_phenocopy_df = amp_phenocopy_df.assign(event = 'Amplification')\n",
+ "\n",
+ "mut_phenocopy_df = mutation_df.loc[:, mutations]\n",
+ "mut_phenocopy_df = mut_phenocopy_df.reset_index().melt(id_vars=['SAMPLE_BARCODE'])\n",
+ "mut_phenocopy_df.columns = ['sample_id', 'gene', 'value']\n",
+ "mut_phenocopy_df = mut_phenocopy_df.assign(event = 'Mutation')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "(86994, 9)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " sample_id | \n",
+ " gene | \n",
+ " value | \n",
+ " event | \n",
+ " weight | \n",
+ " total_status | \n",
+ " DISEASE | \n",
+ " SUBTYPE | \n",
+ " hypermutated | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " TCGA-02-0047-01 | \n",
+ " CDKN2A | \n",
+ " -2 | \n",
+ " Deletion | \n",
+ " 0.723395 | \n",
+ " 0.0 | \n",
+ " GBM | \n",
+ " IDHwt | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " TCGA-02-0055-01 | \n",
+ " CDKN2A | \n",
+ " 0 | \n",
+ " Deletion | \n",
+ " 0.698910 | \n",
+ " 1.0 | \n",
+ " GBM | \n",
+ " IDHwt | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " sample_id gene value event weight total_status DISEASE \\\n",
+ "0 TCGA-02-0047-01 CDKN2A -2 Deletion 0.723395 0.0 GBM \n",
+ "1 TCGA-02-0055-01 CDKN2A 0 Deletion 0.698910 1.0 GBM \n",
+ "\n",
+ " SUBTYPE hypermutated \n",
+ "0 IDHwt 0.0 \n",
+ "1 IDHwt 0.0 "
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Combine the gene events of interest\n",
+ "event_df = pd.concat([del_phenocopy_df, amp_phenocopy_df, mut_phenocopy_df], axis=0)\n",
+ "\n",
+ "# Add predictions and covariate information for each sample in the event_df\n",
+ "pred_cols = ['weight', 'total_status', 'DISEASE', 'SUBTYPE', 'hypermutated']\n",
+ "prediction_sub_df = prediction_df.loc[:, pred_cols]\n",
+ "combined_df = event_df.merge(prediction_sub_df, how='left', left_on='sample_id', right_index=True)\n",
+ "print(combined_df.shape)\n",
+ "combined_df.head(2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "(48690, 9)\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Filter TP53 mutants and hypermutated samples\n",
+ "plot_ready_df = combined_df[combined_df['total_status'] == 0.0]\n",
+ "plot_ready_df = plot_ready_df[plot_ready_df['hypermutated'] == 0.0]\n",
+ "print(plot_ready_df.shape)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## How many samples were filtered?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "RPS6KA3 9666\n",
+ "CHEK1 9666\n",
+ "ATR 9666\n",
+ "MDM4 9666\n",
+ "ATM 9666\n",
+ "CHEK2 9666\n",
+ "PPM1D 9666\n",
+ "MDM2 9666\n",
+ "CDKN2A 9666\n",
+ "Name: gene, dtype: int64"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# The same samples have equivalent representation across genes\n",
+ "combined_df['gene'].value_counts()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Filter 4037 TP53 mutated or deep deleted samples out of 9666 total samples\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Subset to a gene and observe filtering TP53 inactive tumors\n",
+ "tp53_status_df = combined_df[combined_df['gene'] == 'CDKN2A']\n",
+ "tp53_status = tp53_status_df['total_status'].value_counts()\n",
+ "print('Filter {} TP53 mutated or deep deleted samples out of {} total samples'\n",
+ " .format(tp53_status[1], tp53_status_df.shape[0]))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Filter additional 219 hypermutated samples out of the remaining 5629 total samples\n",
+ "Final sample size: n = 5410\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Observe effects of hypermutated tumor filtering\n",
+ "hyper_df = tp53_status_df[tp53_status_df['total_status'] == 0.0]\n",
+ "hyper_status = hyper_df['hypermutated'].value_counts() \n",
+ "print('Filter additional {} hypermutated samples out of the remaining {} total samples'\n",
+ " .format(hyper_status[1], hyper_df.shape[0]))\n",
+ "\n",
+ "use_sample_df = hyper_df[hyper_df['hypermutated'] == 0]\n",
+ "print('Final sample size: n = {}'.format(use_sample_df.shape[0]))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "RPS6KA3 5410\n",
+ "PPM1D 5410\n",
+ "CHEK1 5410\n",
+ "ATR 5410\n",
+ "MDM4 5410\n",
+ "ATM 5410\n",
+ "MDM2 5410\n",
+ "CDKN2A 5410\n",
+ "CHEK2 5410\n",
+ "Name: gene, dtype: int64"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Confirm sample size\n",
+ "plot_ready_df['gene'].value_counts()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Perform visualizations and include t-test results"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [],
+ "source": [
+ "# Mutation plotting parameters\n",
+ "x1_mut, x2_mut = -0.2, 0.2\n",
+ "y_mut, h_mut = 1.01, 0.03\n",
+ "\n",
+ "for gene in set(plot_ready_df['gene']):\n",
+ " gene_df = plot_ready_df[plot_ready_df['gene'] == gene]\n",
+ " event = gene_df['event'].tolist()[0]\n",
+ " \n",
+ " if event == 'Mutation':\n",
+ " plt.rcParams['figure.figsize']=(3, 4)\n",
+ " use_palette = {0: '#d8b365', 1: '#5ab4ac'}\n",
+ " legend_label_key = {0: 'Wild-Type', 1: 'Mutant'}\n",
+ " \n",
+ " # Perform an independent t-test against:\n",
+ " # Mutant vs. Wild-type weights\n",
+ " mutant_weight = gene_df[gene_df['value'] == 1]\n",
+ "\n",
+ " else:\n",
+ " plt.rcParams['figure.figsize']=(6, 4)\n",
+ " use_palette = {-2: '#2c7bb6', -1: '#abd9e9', 0: '#fee08b', 1: '#fdae61', 2: '#d7191c'}\n",
+ " legend_label_key = {-2: 'Deep Deletion', -1: 'Mild Deletion', 0: 'Wild-Type',\n",
+ " 1: 'Mild Amplification', 2: 'High Amplification'}\n",
+ "\n",
+ " # Perform an independent t-test against:\n",
+ " # Amplification vs. Wild-type weights\n",
+ " if gene == 'CDKN2A':\n",
+ " mutant_weight = gene_df[(gene_df['value'] == -1) | (gene_df['value'] == -2)]\n",
+ " x1_amp, x2_amp = -0.25, 0\n",
+ " y_amp, h_mut = 1.01, 0.03\n",
+ " y_text_amp = -0.125\n",
+ " \n",
+ " \n",
+ " deep_del_weight = gene_df[gene_df['value'] == -2]\n",
+ " mild_del_weight = gene_df[gene_df['value'] == -1]\n",
+ " \n",
+ " t_del_results = ttest_ind(a = mild_del_weight['weight'],\n",
+ " b = deep_del_weight['weight'], equal_var = False)\n",
+ " \n",
+ " p_val_del = t_del_results.pvalue\n",
+ " if p_val_del > 0.05:\n",
+ " add_text_del = 'NS'\n",
+ " else:\n",
+ " add_text_del = \"{:.2E}\".format(Decimal(p_val_del))\n",
+ " \n",
+ " else:\n",
+ " mutant_weight = gene_df[(gene_df['value'] == 1) | (gene_df['value'] == 2)]\n",
+ " x1_amp, x2_amp = 0, 0.25\n",
+ " y_amp, h_mut = 1.01, 0.03\n",
+ " y_text_amp = 0.125\n",
+ " \n",
+ " if gene == 'MDM2':\n",
+ " x1_amp, x2_amp = -0.1, 0.2\n",
+ " y_text_amp = 0.05\n",
+ "\n",
+ " wt_weight = gene_df[gene_df['value'] == 0]\n",
+ "\n",
+ " # Output t-test results\n",
+ " t_results = ttest_ind(a = wt_weight['weight'],\n",
+ " b = mutant_weight['weight'], equal_var = False)\n",
+ "\n",
+ " p_val = t_results.pvalue\n",
+ " if p_val > 0.05:\n",
+ " add_text = 'NS'\n",
+ " else:\n",
+ " add_text = \"{:.2E}\".format(Decimal(p_val))\n",
+ " \n",
+ " # Generate Plots\n",
+ " ax = sns.boxplot(x=\"gene\", y=\"weight\", hue='value',\n",
+ " data=gene_df,\n",
+ " palette=use_palette,\n",
+ " fliersize=0)\n",
+ " \n",
+ " handles, labels = ax.get_legend_handles_labels()\n",
+ "\n",
+ " ax = sns.stripplot(x='gene', y='weight', hue='value',\n",
+ " data=gene_df, \n",
+ " dodge=True,\n",
+ " palette=use_palette,\n",
+ " edgecolor='black',\n",
+ " jitter=0.3,\n",
+ " size=2,\n",
+ " alpha=0.75)\n",
+ " \n",
+ " # Set Legend labels\n",
+ " use_labels = []\n",
+ " for lab in labels:\n",
+ " use_labels.append(legend_label_key[int(lab)])\n",
+ "\n",
+ " # Get x axis tick labels\n",
+ " x_axis_tick_labels = []\n",
+ " for key, value in legend_label_key.items():\n",
+ " n = gene_df['value'].value_counts()\n",
+ " try:\n",
+ " n = n[key]\n",
+ " result = 'n = {}\\n{}'.format(n, value)\n",
+ " x_axis_tick_labels.append(result)\n",
+ " except:\n",
+ " next\n",
+ " \n",
+ " l = plt.legend(handles, use_labels, borderaxespad=0.)\n",
+ " l.set_title('Status')\n",
+ " ax.axes.set_ylim(-0.003, 1.2)\n",
+ " ax.set_yticklabels(['', 0, 0.2, 0.4, 0.6, 0.8, 1, ''])\n",
+ " ax.set_xticklabels('')\n",
+ " ax.set_ylabel('TP53 Classifier Score')\n",
+ " ax.legend\n",
+ " ax.set_title(gene)\n",
+ " plt.axhline(0.5, color='black', linestyle='dashed', linewidth=1)\n",
+ " plt.tight_layout()\n",
+ " \n",
+ " if event == 'Mutation':\n",
+ " plt.plot([x1_mut, x1_mut, x2_mut, x2_mut], [y_mut, y_mut + h_mut, y_mut + h_mut, y_mut],\n",
+ " lw=1.2, c='black')\n",
+ " plt.text(0, y_mut + h_mut, add_text, ha='center', va='bottom', color=\"black\")\n",
+ " else:\n",
+ " plt.plot([x1_amp, x1_amp, x2_amp, x2_amp], [y_amp, y_amp + h_mut, y_amp + h_mut, y_amp],\n",
+ " lw=1.2, c='black')\n",
+ " plt.text(y_text_amp, y_amp + h_mut, add_text, ha='center', va='bottom', color=\"black\")\n",
+ " \n",
+ " if gene == 'CDKN2A':\n",
+ " y_add = 0.1\n",
+ " plt.plot([x1_amp - 0.075, x1_amp - 0.075, x2_amp - 0.16, x2_amp - 0.16],\n",
+ " [y_amp + y_add, y_amp + h_mut + y_add, y_amp + h_mut + y_add, y_amp + y_add],\n",
+ " lw=1.2, c='black')\n",
+ " plt.text(y_text_amp - 0.1, y_amp + h_mut + 0.1,\n",
+ " add_text_del, ha='center', va='bottom', color=\"black\")\n",
+ " \n",
+ " phenocopy_fig_file = os.path.join('..', 'classifiers', 'TP53', 'figures',\n",
+ " '{}_gene_phenocopy.pdf'.format(gene))\n",
+ " plt.savefig(phenocopy_fig_file)\n",
+ " plt.close()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python (pancancer-classifier)",
+ "language": "python",
+ "name": "pancancer-classifier"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.5.2"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}