From 5c7c9aa10285571c477e550d4bb4c6cfcd7749f5 Mon Sep 17 00:00:00 2001
From: Andrew Green <55896483+Green-Andrew@users.noreply.github.com>
Date: Sun, 22 Dec 2019 23:04:39 -0500
Subject: [PATCH] Add files via upload
---
nfl_kaggle.ipynb | 922 +++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 922 insertions(+)
create mode 100644 nfl_kaggle.ipynb
diff --git a/nfl_kaggle.ipynb b/nfl_kaggle.ipynb
new file mode 100644
index 0000000..d7c75e5
--- /dev/null
+++ b/nfl_kaggle.ipynb
@@ -0,0 +1,922 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# NFL 1st and Future - Analytics"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "from scipy import stats\n",
+ "import matplotlib.pyplot as plt\n",
+ "import seaborn as sns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "injury_record = pd.read_csv(r'C:\\Users\\andre\\Desktop\\Kaggle\\NFL Project\\injuryrecord.csv')\n",
+ "player_track_data = pd.read_csv(r'C:\\Users\\andre\\Desktop\\Kaggle\\NFL Project\\playertrackdata.csv')\n",
+ "play_list = pd.read_csv(r'C:\\Users\\andre\\Desktop\\Kaggle\\NFL Project\\playlist.csv')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " PlayerKey | \n",
+ " GameID | \n",
+ " PlayKey | \n",
+ " BodyPart | \n",
+ " Surface | \n",
+ " DM_M1 | \n",
+ " DM_M7 | \n",
+ " DM_M28 | \n",
+ " DM_M42 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 39873 | \n",
+ " 39873-4 | \n",
+ " 39873-4-32 | \n",
+ " Knee | \n",
+ " Synthetic | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 46074 | \n",
+ " 46074-7 | \n",
+ " 46074-7-26 | \n",
+ " Knee | \n",
+ " Natural | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 36557 | \n",
+ " 36557-1 | \n",
+ " 36557-1-70 | \n",
+ " Ankle | \n",
+ " Synthetic | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 46646 | \n",
+ " 46646-3 | \n",
+ " 46646-3-30 | \n",
+ " Ankle | \n",
+ " Natural | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 43532 | \n",
+ " 43532-5 | \n",
+ " 43532-5-69 | \n",
+ " Ankle | \n",
+ " Synthetic | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " PlayerKey GameID PlayKey BodyPart Surface DM_M1 DM_M7 DM_M28 \\\n",
+ "0 39873 39873-4 39873-4-32 Knee Synthetic 1 1 1 \n",
+ "1 46074 46074-7 46074-7-26 Knee Natural 1 1 0 \n",
+ "2 36557 36557-1 36557-1-70 Ankle Synthetic 1 1 1 \n",
+ "3 46646 46646-3 46646-3-30 Ankle Natural 1 0 0 \n",
+ "4 43532 43532-5 43532-5-69 Ankle Synthetic 1 1 1 \n",
+ "\n",
+ " DM_M42 \n",
+ "0 1 \n",
+ "1 0 \n",
+ "2 1 \n",
+ "3 0 \n",
+ "4 1 "
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "injury_record.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "76"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "injury_record['PlayKey'].nunique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ },
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEGCAYAAABiq/5QAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAbgUlEQVR4nO3deXRV5b3/8ffXMBbQH0P0MhTjskWFkgQJQ1UsQkHtT0WcEK2CVbGr4NAqLm/1KnX46XK4VLRFoCig0utPVMDeDk6gKCgkGAZB5VpTpaVMToBQGb73j70TDiEJB8g+J+H5vNbK4pzn7H329+wVPnnOs/d+trk7IiISjsOyXYCIiGSWgl9EJDAKfhGRwCj4RUQCo+AXEQlMg2wXkI42bdp4Xl5etssQEalXSkpKNrh7buX2ehH8eXl5FBcXZ7sMEZF6xcz+VlW7hnpERAKj4BcRCYyCX0QkMPVijF/CtH37dlavXs22bduyXUq91qRJEzp06EDDhg2zXYrUEQp+qbNWr15NixYtyMvLw8yyXU695O5s3LiR1atXc8wxx2S7HKkjNNQjdda2bdto3bq1Qv8gmBmtW7fWtybZQ2LBb2bfNrM5ZrbSzN4zs+vj9jFm9nczK41/fpRUDVL/KfQPnvahVJbkUM8O4EZ3X2xmLYASM3s5fm2suz+Y4LZFRKQaifX43X2Nuy+OH28CVgLtk9qeyP6655576NKlC/n5+RQWFvLOO+/s1/pDhw4lPz+fsWPHJlShSDIycnDXzPKAbsA7wMnAKDO7HCgm+lbweRXrjABGAHTs2DETZdZ73UdPy8h2Sh64PCPbSdKCBQv4wx/+wOLFi2ncuDEbNmzgm2++SWvdHTt2sGHDBubPn8/f/lblhZEidVriB3fNrDnwHHCDu38FjAeOBQqBNcBDVa3n7hPdvcjdi3Jz95pqQuSgrFmzhjZt2tC4cWMA2rRpQ7t27cjLy2PDhg0AFBcX07dvXwDGjBnDiBEjGDhwIJdffjkDBw5k3bp1FBYWMm/ePCZNmkSPHj0oKCjg/PPP5+uvvwZg7dq1DB48mIKCAgoKCpg/fz4ATz31FD179qSwsJBrrrmGnTt3Zn4nSLASDX4za0gU+k+7+/MA7r7W3Xe6+y5gEtAzyRpEqjJw4EA+/fRTOnXqxM9+9jNef/31fa5TUlLCrFmzmD59OrNnz+bYY4+ltLSUPn36cN5557Fo0SKWLFnCCSecwOTJkwG47rrr+MEPfsCSJUtYvHgxXbp0YeXKlTzzzDO89dZblJaWkpOTw9NPP530RxapkNhQj0WnEkwGVrr7f6a0t3X3NfHTwcDypGoQqU7z5s0pKSlh3rx5zJkzhyFDhnDffffVuM4555xD06ZNq3xt+fLl3HbbbXzxxRds3ryZ008/HYDXXnuNadOiIbicnByOOOIInnzySUpKSujRowcAW7du5cgjj6zFTydSsyTH+E8GLgOWmVlp3PZLYKiZFQIOlAHXJFiDSLVycnLo27cvffv2pWvXrkydOpUGDRqwa9cugL3OfW/WrFm17zV8+HBmzpxJQUEBU6ZMYe7cudUu6+4MGzaMe++9t1Y+h8j+SvKsnjfd3dw9390L458/uvtl7t41bj8npfcvkjEffPABq1atqnheWlrK0UcfTV5eHiUlJQA899xzab/fpk2baNu2Ldu3b99j2KZ///6MHz8egJ07d/LVV1/Rv39/ZsyYwbp16wD47LPPdJBYMkpX7kqQNm/ezLBhw+jcuTP5+fmsWLGCMWPGcMcdd3D99dfTp08fcnJy0n6/u+66i169ejFgwACOP/74ivaHH36YOXPm0LVrV7p37857771H586dufvuuxk4cCD5+fkMGDCANWvU/5HMMXfPdg37VFRU5LoRy74daqdzrly5khNOOCEj2zrUaV+GycxK3L2ocnu9n6QtU2H3QosHMrIdgI63L8vYtkQkPBrqEREJjIJfRCQwCn4RkcAo+EVEAqPgFxEJTL0/q0fCUdtncKVzWuo999zD9OnTycnJ4bDDDmPChAn06tVrv7Yzc+ZMOnXqROfOnQHo27cvDz74IEVFe51lV6WysjLmz5/PJZdcAkSTx02bNo1x48btVx0i5RT8ItU4mKmbU82cOZOzzjqrIvj3V1lZGdOnT68I/qKiorT/aIhURUM9ItWoaurmlStXMnjw4IplXn75Zc477zwgmvjt1ltvpaCggN69e7N27Vrmz5/P7NmzGT16NIWFhXz00UcAPPvss/Ts2ZNOnToxb948IJrSYfTo0fTo0YP8/HwmTJgAwC233MK8efMoLCxk7NixzJ07l7POOguIrkC+4oor6Nq1K/n5+fs1zYSES8EvUo2qpm7u168fK1euZP369QA88cQTXHHFFQBs2bKF3r17s2TJEk499VQmTZrESSedxDnnnMMDDzxAaWkpxx57LBDdzGXhwoX8+te/5le/+hUAkydP5ogjjmDRokUsWrSISZMm8fHHH3PffffRp08fSktL+fnPf75HjXfddRdHHHEEy5YtY+nSpfTr1y+De0jqKwW/SDXKp26eOHEiubm5DBkyhKlTp3LZZZfx1FNP8cUXX7BgwQLOPPNMABo1alTRE+/evTtlZWXVvnf5t4TU5V566SWmTZtGYWEhvXr1YuPGjXtMJFeVV155hZEjR1Y8b9my5UF8YgmFxvhFalDV1M0TJkzg7LPPpkmTJlx44YU0aBD9N2rYsCHRbSii9Xbs2FHt+5YPH6Uu5+488sgjFXP5l9vXFM/l2xRJl3r8ItWoburmdu3a0a5dO+6++26GDx++z/dp0aIFmzZt2udyp59+OuPHj2f79u0AfPjhh2zZsqXG9QcOHMijjz5a8fzzz/e6fbXIXtTjl3oj0zd537x5M9deey1ffPEFDRo04Dvf+Q4TJ04E4NJLL2X9+vVpnalz8cUXc/XVVzNu3DhmzJhR7XJXXXUVZWVlnHjiibg7ubm5zJw5k/z8fBo0aEBBQQHDhw+nW7duFevcdtttjBw5ku9973vk5ORwxx13VAwjiVSn3k/LrNk5d9O0zJkzatQounXrxpVXXpntUtJSl/elJOeQnZZZJNO6d+9Os2bNeOihh7JdisgBUfCL7KfyWzOK1Fc6uCsiEhgFv4hIYBT8IiKBUfCLiARGB3el3vjkzq61+n7pnDZrZvziF7+oOIPnwQcfZPPmzYwZM6badebOnUujRo046aSTaqtU8vLyKC4upk2bNrX2nhIu9fhFatC4cWOef/55NmzYkPY6c+fOZf78+fu1nZ07d+5vaSIHTMEvUoMGDRowYsQIxo4du9drL774Ir169aJbt2788Ic/ZO3atZSVlfHYY48xduxYCgsLmTdvHsOHD9/jit3mzZsD0R+I0047jUsuuYSuXaNvM+eeey7du3enS5cuFVcJi9Q2DfWI7MPIkSPJz8/n5ptv3qP9lFNO4e2338bM+N3vfsf999/PQw89xE9/+lOaN2/OTTfdBETTLVdn4cKFLF++nGOOOQaAxx9/nFatWrF161Z69OjB+eefT+vWrZP7cBIkBb/IPhx++OFcfvnljBs3jqZNm1a0r169miFDhrBmzRq++eabivDeHz179txjvXHjxvHCCy8A8Omnn7Jq1SoFv9Q6DfWIpOGGG25g8uTJbNmypaLt2muvZdSoUSxbtowJEyawbdu2Ktdt0KABu3btAqJplFNv39isWbOKx3PnzuWVV15hwYIFLFmyhG7dulX7niIHQ8EvkoZWrVpx0UUX7TFs8+WXX9K+fXsApk6dWtFeeRrlvLy8imkeZs2aVTHtcmVffvklLVu25Fvf+hbvv/8+b7/9dhIfRURDPVJ/HOispbXlxhtv3GPu+zFjxnDhhRfSvn17evfuzccffwzA2WefzQUXXMCsWbN45JFHuPrqqxk0aBA9e/akf//+e/TyU51xxhk89thj5Ofnc9xxx9G7d++MfC4Jj4JfpAabN2+ueHzUUUfx9ddfVzwfNGgQgwYN2mudTp06sXTp0j3aUnvv9957L0DFnb3KNW7cmD/96U9V1lHTbRxF9ldiQz1m9m0zm2NmK83sPTO7Pm5vZWYvm9mq+F/dJFREJIOSHOPfAdzo7icAvYGRZtYZuAV41d2/C7waPxcRkQxJLPjdfY27L44fbwJWAu2BQUD5kbCpwLlJ1SD1X324Q1xdp30olWXkrB4zywO6Ae8AR7n7Goj+OABHVrPOCDMrNrPi9evXZ6JMqWOaNGnCxo0bFVwHwd3ZuHEjTZo0yXYpUockfnDXzJoDzwE3uPtXZpbWeu4+EZgI0T13k6tQ6qoOHTqwevVq9If/4DRp0oQOHTpkuwypQxINfjNrSBT6T7v783HzWjNr6+5rzKwtsC7JGqT+atiw4QFdDSsiNUvyrB4DJgMr3f0/U16aDQyLHw8DZiVVg4iI7C3JHv/JwGXAMjMrjdt+CdwH/H8zuxL4BLgwwRpERKSSxILf3d8EqhvQ75/UdkVEpGaaq0dEJDAKfhGRwCj4RUQCo+AXEQmMgl9EJDAKfhGRwCj4RUQCo+AXEQmMgl9EJDAKfhGRwCj4RUQCo+AXEQmMgl9EJDAKfhGRwCj4RUQCo+AXEQmMgl9EJDAKfhGRwCj4RUQCk+TN1uUQ9cmdXTO2rY63L8vYtkRCoR6/iEhgFPwiIoFR8IuIBEbBLyISGAW/iEhgFPwiIoFR8IuIBEbBLyISGAW/iEhgFPwiIoFR8IuIBEbBLyISmMSC38weN7N1ZrY8pW2Mmf3dzErjnx8ltX0REalakj3+KcAZVbSPdffC+OePCW5fRESqkFjwu/sbwGdJvb+IiByYbIzxjzKzpfFQUMssbF9EJGiZDv7xwLFAIbAGeKi6Bc1shJkVm1nx+vXrM1WfiMghL6PB7+5r3X2nu+8CJgE9a1h2orsXuXtRbm5u5ooUETnEZTT4zaxtytPBwPLqlhURkWQkds9dM/s90BdoY2argTuAvmZWCDhQBlyT1PZFRKRqaQW/mb3q7v331ZbK3YdW0Tx5P+sTEZFaVmPwm1kT4FtEvfaWgMUvHQ60S7g2ERFJwL56/NcANxCFfAm7g/8r4DcJ1iUiIgmpMfjd/WHgYTO71t0fyVBNIiKSoLTG+N39ETM7CchLXcfdpyVUl4iIJCTdg7tPEl14VQrsjJsdUPCLiNQz6Z7OWQR0dndPshgREUleuhdwLQf+LclCREQkM9Lt8bcBVpjZQuBf5Y3ufk4iVYmISGLSDf4xSRYhIiKZk+5ZPa8nXYiIiGRGumf1bCI6iwegEdAQ2OLuhydVmIiIJCPdHn+L1Odmdi41TKksIiJ11wFNy+zuM4F+tVyLiIhkQLpDPeelPD2M6Lx+ndMvIlIPpXtWz9kpj3cQzaU/qNarERGRxKU7xn9F0oWIiEhmpDXGb2YdzOwFM1tnZmvN7Dkz65B0cSIiUvvSPbj7BDCbaF7+9sCLcZuIiNQz6QZ/rrs/4e474p8pQG6CdYmISELSDf4NZvZjM8uJf34MbEyyMBERSUa6wf8T4CLgn8Aa4AJAB3xFROqhdE/nvAsY5u6fA5hZK+BBoj8IIiJSj6Tb488vD30Ad/8M6JZMSSIikqR0g/8wM2tZ/iTu8af7bUFEROqQdMP7IWC+mc0gmqrhIuCexKoSEZHEpHvl7jQzKyaamM2A89x9RaKViYhIItIeromDXmEvIlLPHdC0zCIiUn8p+EVEAqPgFxEJjIJfRCQwCn4RkcAo+EVEApNY8JvZ4/GNW5antLUys5fNbFX8b8ua3kNERGpfkj3+KcAZldpuAV519+8Cr8bPRUQkgxILfnd/A/isUvMgYGr8eCpwblLbFxGRqmV6jP8od18DEP97ZHULmtkIMys2s+L169dnrEARkUNdnT246+4T3b3I3Ytyc3WXRxGR2pLp4F9rZm0B4n/XZXj7IiLBy3TwzwaGxY+HAbMyvH0RkeAleTrn74EFwHFmttrMrgTuAwaY2SpgQPxcREQyKLG7aLn70Gpe6p/UNkVEZN/q7MFdERFJhoJfRCQwCn4RkcAo+EVEAqPgFxEJjIJfRCQwCn4RkcAo+EVEAqPgFxEJjIJfRCQwCn4RkcAo+EVEAqPgFxEJjIJfRCQwCn4RkcAo+EVEAqPgFxEJjIJfRCQwCn4RkcAo+EVEAqPgFxEJjIJfRCQwCn4RkcAo+EVEAqPgFxEJjIJfRCQwCn4RkcA0yHYBIpKs7qOnZWQ7JQ9cnpHtyMFTj19EJDAKfhGRwCj4RUQCo+AXEQlMVg7umlkZsAnYCexw96Js1CEiEqJsntVzmrtvyOL2RUSCpKEeEZHAZKvH78BLZubABHefWHkBMxsBjADo2LFjhssTSc8nd3bN2LY63r4sY9uSQ1u2evwnu/uJwJnASDM7tfIC7j7R3YvcvSg3NzfzFYqIHKKyEvzu/o/433XAC0DPbNQhIhKijAe/mTUzsxblj4GBwPJM1yEiEqpsjPEfBbxgZuXbn+7uf85CHSIiQcp48Lv7X4GCTG9XREQiOp1TRCQwCn4RkcAo+EVEAqPgFxEJjIJfRCQwCn4RkcAo+EVEAqPgFxEJjIJfRCQwCn4RkcAo+EVEAqPgFxEJjIJfRCQwCn4RkcAo+EVEAqPgFxEJjIJfRCQwCn4RkcAo+EVEApONm62LJK776GkZ2c4LLTKyGZFapR6/iEhgFPwiIoFR8IuIBEbBLyISGAW/iEhgFPwiIoFR8IuIBEbBLyISGF3AJSK14pM7u2ZsWx1vX5axbR2Iur4v1OMXEQmMgl9EJDAKfhGRwCj4RUQCk5XgN7MzzOwDM/sfM7slGzWIiIQq48FvZjnAb4Azgc7AUDPrnOk6RERClY0ef0/gf9z9r+7+DfBfwKAs1CEiEiRz98xu0OwC4Ax3vyp+fhnQy91HVVpuBDAifnoc8EFGC91bG2BDlmuoK7QvdtO+2E37Yre6si+Odvfcyo3ZuIDLqmjb66+Pu08EJiZfTnrMrNjdi7JdR12gfbGb9sVu2he71fV9kY2hntXAt1OedwD+kYU6RESClI3gXwR818yOMbNGwMXA7CzUISISpIwP9bj7DjMbBfwFyAEed/f3Ml3HAagzw051gPbFbtoXu2lf7Fan90XGD+6KiEh26cpdEZHAKPhFRAITdPCb2eaUxz8ys1Vm1jGbNWWDmQ02Mzez49NYtszM2lTRvrmq5eszM9tpZqUpP3kH8B6FZvaj2q8uM8ysdcrn/6eZ/T3leaNs15dplX/PzWy4mT16gO/V18z+UDuV7R/diAUws/7AI8BAd/8k2/VkwVDgTaIzrMZkt5Q6Zau7Fx7kexQCRcAfa6GejHP3jUSfATMbA2x29wezWpQctKB7/ABm1geYBPxfd/8obptiZuPMbL6Z/TW+2rh8+dFmtsjMlprZr1Laf2xmC+Oe0IR4TqI6z8yaAycDVxIFf3lPZK6ZzTCz983saTOzSus1NbM/m9nVVbxnlfvoUGBmTczsCTNbZmbvmtlp1bXHPeI7gSHx78WQ7FZfu8zsZjNbHv9cm9I+LOX/wm/N7DAza2BmT8b7Z7mZXZfN2pNgZrlm9lz8u7/IzE6O25uZ2eNx27tmlvUpakLv8TcGZgF93f39Sq+1BU4Bjie6zmCGmQ0Evks035ABs83sVGA9MAQ42d23m9lvgUuBaZn5GAflXODP7v6hmX1mZifG7d2ALkQX171F9Mfhzfi15kRzLE1z9z0+Y3X7yN3fSP6j1LqmZlYaP/7Y3QcDIwHcvWs8NPaSmXWqqh3oBNwOFFWekqS+M7OeRL/jPYlOy15oZq8Du4DBwEnxqdsTiToUHwFt3L1rvP7/yU7lBy31dwKgFbuvQ3oYGOvub8ZDxn8BTgBuBV5z95/En3uhmb2S0aorCT34twPziXq711d6baa77wJWmNlRcdvA+Ofd+HlzopDLB7oDi+KOcVNgXbKl15qhwK/jx/8VP/9vYKG7rwaIf9Hz2B38s4D73f3pKt6vun1UH4O/qqGeU4iGBXH3983sb0QBX137oaoP8Jy7fw1gZjOJ9kEjoAdQnPJ/4VOiEDzOzB4mGvZ6KRtF14I9fifMbDjRUB7AD4HOKV+ODzezFkT/H84xs5vi9iZAVo8lhh78u4CLgFfM7Jfu/v9SXvtXymNL+fded5+Q+ibx19yp7v7viVZby8ysNdAP+J6ZOVHPzYn+Y6Z+/p3s+bvyFnCmmU33vS8EqXIfHUKqmmuqpvZDVU374XF3/4+9XjDLJ5qO/TrgfHZPwnioOAz4vrtvTW2Mh0nPd/cPKrUfRZYEP8Yf91jOAi41syv3sfhfgJ/E4+KYWXszOxJ4FbggfoyZtTKzo5Osu5ZcQDRcc7S757n7t4GPiXpuNbkd2Aj8torXqttHh4o3iIY4iId4OhLNHFtd+yagRVYqTdYbwOD4WE9zoqnV5wGvABdZfOaXRWcFdTSzXKILRp8F7gBOrO6N67GXgIohPTMr/2bwF+Da8uNkZtYtC7XtIfjgB3D3z4AzgNtqOvDi7i8B04EFZrYMmAG0cPcVwG1E471LgZeJjhHUdUOBFyq1PQdcksa6NwBNzOz+1Mbq9lEt1FpX/BbIiT/bM8Bwd/9XDe1ziL7+H1IHd919IfB7orm33gbGu/syd18G/IroW/RSojA8imhixjfiYcNJwC+zU3mirgOK4pMaVgA/jdvvAhoCS81sefw8qzRlg4hIYNTjFxEJjIJfRCQwCn4RkcAo+EVEAqPgFxEJjIJfgmG7Z9tcYmaLzeyk/Vx/iqXM21TNMmXxfDRLzOwlM/u3/dzGcDNrtz/riOwvBb+EZKu7F7p7AfDvwL0Jbee0eBvF7Mf56hZN7DccUPBLohT8EqrDgc8huqTezB6IZ41cVn6hVdz+qJmtMLP/BsqvzO5vZhUXvpnZADN7voptvAF8J15mvJkVm9l7tuesrmVmdruZvUl0QV0R8HT8zaRpQp9dAhf6XD0SlvKZFZsQXVndL24/j2jO+QKgDdFke28A3weOA7oSXX26AngceA34jZnluvt64ArgiSq2dxawLH58q7t/FvfqXzWzfHdfGr+2zd1PATCzq4Cb3L24Nj+4SCr1+CUk5UM9xxNN0TEtnj/lFOD37r7T3dcCrxPNMHlqSvs/iAKfeGK6J4Efx9Psfh/4U8p25sR/YA5n93DSRWa2mGjW0i5A55Tln0no84pUST1+CZK7L4gnEsul5pk1q5vT5AngRWAb8Ky770h57TR331D+xMyOAW4Cerj752Y2hehbR7ktB/ARRA6YevwSpPhmKTlEs4y+QXSXrJx4FslTgYVx+8Vxe1vgtPL1428A/yCanG/KPjZ3OFG4fxlPxXtmDcseqrN5Sh2iHr+EJPXuSQYMc/ed8YHa7wNLiHr4N7v7P+P2fkTj9B8SDQGlehrIjWdnrZa7LzGzd4H3gL8S3c+gOlOAx8xsK1XM7S5SGzQ7p8gBMrNHgXfdfXK2axHZHwp+kQNgZiVEwzcD4nn3ReoNBb+ISGB0cFdEJDAKfhGRwCj4RUQCo+AXEQmMgl9EJDD/C3+wINftG2w9AAAAAElFTkSuQmCC\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "#quick look at the difference in number of injuries and surface\n",
+ "sns.countplot(x='BodyPart', hue='Surface', data = injury_record)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " PlayKey | \n",
+ " time | \n",
+ " event | \n",
+ " x | \n",
+ " y | \n",
+ " dir | \n",
+ " dis | \n",
+ " o | \n",
+ " s | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 26624-1-1 | \n",
+ " 0.0 | \n",
+ " huddle_start_offense | \n",
+ " 87.46 | \n",
+ " 28.93 | \n",
+ " 288.24 | \n",
+ " 0.01 | \n",
+ " 262.33 | \n",
+ " 0.13 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 26624-1-1 | \n",
+ " 0.1 | \n",
+ " NaN | \n",
+ " 87.45 | \n",
+ " 28.92 | \n",
+ " 283.91 | \n",
+ " 0.01 | \n",
+ " 261.69 | \n",
+ " 0.12 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 26624-1-1 | \n",
+ " 0.2 | \n",
+ " NaN | \n",
+ " 87.44 | \n",
+ " 28.92 | \n",
+ " 280.40 | \n",
+ " 0.01 | \n",
+ " 261.17 | \n",
+ " 0.12 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 26624-1-1 | \n",
+ " 0.3 | \n",
+ " NaN | \n",
+ " 87.44 | \n",
+ " 28.92 | \n",
+ " 278.79 | \n",
+ " 0.01 | \n",
+ " 260.66 | \n",
+ " 0.10 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 26624-1-1 | \n",
+ " 0.4 | \n",
+ " NaN | \n",
+ " 87.44 | \n",
+ " 28.92 | \n",
+ " 275.44 | \n",
+ " 0.01 | \n",
+ " 260.27 | \n",
+ " 0.09 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " PlayKey time event x y dir dis o \\\n",
+ "0 26624-1-1 0.0 huddle_start_offense 87.46 28.93 288.24 0.01 262.33 \n",
+ "1 26624-1-1 0.1 NaN 87.45 28.92 283.91 0.01 261.69 \n",
+ "2 26624-1-1 0.2 NaN 87.44 28.92 280.40 0.01 261.17 \n",
+ "3 26624-1-1 0.3 NaN 87.44 28.92 278.79 0.01 260.66 \n",
+ "4 26624-1-1 0.4 NaN 87.44 28.92 275.44 0.01 260.27 \n",
+ "\n",
+ " s \n",
+ "0 0.13 \n",
+ "1 0.12 \n",
+ "2 0.12 \n",
+ "3 0.10 \n",
+ "4 0.09 "
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "player_track_data.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "266960"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#number of different plays in the dataframe\n",
+ "player_track_data['PlayKey'].nunique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "ball_snap 253271\n",
+ "line_set 247189\n",
+ "huddle_break_offense 175415\n",
+ "tackle 149626\n",
+ "first_contact 140437\n",
+ " ... \n",
+ "end_path 8\n",
+ "xp_fake 5\n",
+ "drop_kick 5\n",
+ "extra_point_fake 5\n",
+ "play_submit\\t 2\n",
+ "Name: event, Length: 80, dtype: int64"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# number of different events(play types) in the data frame\n",
+ "player_track_data['event'].value_counts()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Below, join injury_recrod and player_track record to process data so each bodypart is grouped individually"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "22223"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "record_track_merge = injury_record.join(player_track_data.set_index('PlayKey'),on='PlayKey')\n",
+ "record_track_merge.shape[0]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " PlayerKey | \n",
+ " GameID | \n",
+ " PlayKey | \n",
+ " BodyPart | \n",
+ " Surface | \n",
+ " DM_M1 | \n",
+ " DM_M7 | \n",
+ " DM_M28 | \n",
+ " DM_M42 | \n",
+ " time | \n",
+ " event | \n",
+ " x | \n",
+ " y | \n",
+ " dir | \n",
+ " dis | \n",
+ " o | \n",
+ " s | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 39873 | \n",
+ " 39873-4 | \n",
+ " 39873-4-32 | \n",
+ " Knee | \n",
+ " Synthetic | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 0.0 | \n",
+ " punt_play | \n",
+ " 56.73 | \n",
+ " 17.30 | \n",
+ " 356.14 | \n",
+ " 0.18 | \n",
+ " 249.95 | \n",
+ " 1.83 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " 39873 | \n",
+ " 39873-4 | \n",
+ " 39873-4-32 | \n",
+ " Knee | \n",
+ " Synthetic | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 0.1 | \n",
+ " NaN | \n",
+ " 56.74 | \n",
+ " 17.48 | \n",
+ " 355.73 | \n",
+ " 0.18 | \n",
+ " 248.49 | \n",
+ " 1.82 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " 39873 | \n",
+ " 39873-4 | \n",
+ " 39873-4-32 | \n",
+ " Knee | \n",
+ " Synthetic | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 0.2 | \n",
+ " NaN | \n",
+ " 56.72 | \n",
+ " 17.66 | \n",
+ " 355.56 | \n",
+ " 0.18 | \n",
+ " 246.95 | \n",
+ " 1.81 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " 39873 | \n",
+ " 39873-4 | \n",
+ " 39873-4-32 | \n",
+ " Knee | \n",
+ " Synthetic | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 0.3 | \n",
+ " NaN | \n",
+ " 56.69 | \n",
+ " 17.84 | \n",
+ " 355.84 | \n",
+ " 0.18 | \n",
+ " 245.09 | \n",
+ " 1.79 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " 39873 | \n",
+ " 39873-4 | \n",
+ " 39873-4-32 | \n",
+ " Knee | \n",
+ " Synthetic | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 0.4 | \n",
+ " NaN | \n",
+ " 56.68 | \n",
+ " 18.02 | \n",
+ " 356.01 | \n",
+ " 0.18 | \n",
+ " 243.02 | \n",
+ " 1.76 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " PlayerKey GameID PlayKey BodyPart Surface DM_M1 DM_M7 DM_M28 \\\n",
+ "0 39873 39873-4 39873-4-32 Knee Synthetic 1 1 1 \n",
+ "0 39873 39873-4 39873-4-32 Knee Synthetic 1 1 1 \n",
+ "0 39873 39873-4 39873-4-32 Knee Synthetic 1 1 1 \n",
+ "0 39873 39873-4 39873-4-32 Knee Synthetic 1 1 1 \n",
+ "0 39873 39873-4 39873-4-32 Knee Synthetic 1 1 1 \n",
+ "\n",
+ " DM_M42 time event x y dir dis o s \n",
+ "0 1 0.0 punt_play 56.73 17.30 356.14 0.18 249.95 1.83 \n",
+ "0 1 0.1 NaN 56.74 17.48 355.73 0.18 248.49 1.82 \n",
+ "0 1 0.2 NaN 56.72 17.66 355.56 0.18 246.95 1.81 \n",
+ "0 1 0.3 NaN 56.69 17.84 355.84 0.18 245.09 1.79 \n",
+ "0 1 0.4 NaN 56.68 18.02 356.01 0.18 243.02 1.76 "
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#group plays by bodypart\n",
+ "record_track_merge.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Knee 10549\n",
+ "Ankle 9942\n",
+ "Foot 1724\n",
+ "Toes 7\n",
+ "Heel 1\n",
+ "Name: BodyPart, dtype: int64"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "record_track_merge['BodyPart'].value_counts()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#remove toes and heel body part as they dont contain data from player_track_data\n",
+ "knee_inj = record_track_merge.loc[record_track_merge['BodyPart'] == 'Knee']\n",
+ "ankle_inj = record_track_merge.loc[record_track_merge['BodyPart'] == 'Ankle']\n",
+ "foot_inj = record_track_merge.loc[record_track_merge['BodyPart'] == 'Foot']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#concatenate dataframes\n",
+ "injury_track_data = pd.concat([knee_inj, ankle_inj], ignore_index=True)\n",
+ "injury_track_data = pd.concat([injury_track_data, foot_inj], ignore_index=True) "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Knee 10549\n",
+ "Ankle 9942\n",
+ "Foot 1724\n",
+ "Name: BodyPart, dtype: int64"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "injury_track_data['BodyPart'].value_counts()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#join play file to injury track data\n",
+ "inj_data_full = injury_track_data.merge(play_list, on='PlayKey', how='left')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['PlayerKey_x',\n",
+ " 'GameID_x',\n",
+ " 'PlayKey',\n",
+ " 'BodyPart',\n",
+ " 'Surface',\n",
+ " 'DM_M1',\n",
+ " 'DM_M7',\n",
+ " 'DM_M28',\n",
+ " 'DM_M42',\n",
+ " 'time',\n",
+ " 'event',\n",
+ " 'x',\n",
+ " 'y',\n",
+ " 'dir',\n",
+ " 'dis',\n",
+ " 'o',\n",
+ " 's',\n",
+ " 'PlayerKey_y',\n",
+ " 'GameID_y',\n",
+ " 'RosterPosition',\n",
+ " 'PlayerDay',\n",
+ " 'PlayerGame',\n",
+ " 'StadiumType',\n",
+ " 'FieldType',\n",
+ " 'Temperature',\n",
+ " 'Weather',\n",
+ " 'PlayType',\n",
+ " 'PlayerGamePlay',\n",
+ " 'Position',\n",
+ " 'PositionGroup']"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "list(inj_data_full.columns)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Removing Columns that are repeated with different names"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#remove playerkey_y and gameid_y\n",
+ "inj_data_full = inj_data_full.drop(['PlayerKey_y', 'GameID_y'], axis=1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "inj_data_full = inj_data_full.drop(['event'], axis=1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "inj_data_full = inj_data_full.drop(['FieldType'], axis=1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "inj_data_full = inj_data_full.drop(['Position'], axis=1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "inj_data_full = inj_data_full.drop(['PositionGroup'], axis=1)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Binary Logistic Regression"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 98,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearn.linear_model import LogisticRegression\n",
+ "from sklearn import metrics\n",
+ "import statsmodels.api as sm"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 86,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "XY = inj_data_full[['x', 'y', 'dir', 'dis', 'o', 's','DM_M42']].dropna()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 96,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "y = XY['DM_M42']\n",
+ "X = XY[['x', 'y', 'dir', 'dis', 'o', 's']]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 100,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Optimization terminated successfully.\n",
+ " Current function value: 0.602531\n",
+ " Iterations 5\n",
+ " Results: Logit\n",
+ "==================================================================\n",
+ "Model: Logit Pseudo R-squared: 0.039 \n",
+ "Dependent Variable: DM_M42 AIC: 26758.3622 \n",
+ "Date: 2019-12-22 22:40 BIC: 26806.4079 \n",
+ "No. Observations: 22195 Log-Likelihood: -13373. \n",
+ "Df Model: 5 LL-Null: -13913. \n",
+ "Df Residuals: 22189 LLR p-value: 3.2133e-231\n",
+ "Converged: 1.0000 Scale: 1.0000 \n",
+ "No. Iterations: 5.0000 \n",
+ "--------------------------------------------------------------------\n",
+ " Coef. Std.Err. z P>|z| [0.025 0.975]\n",
+ "--------------------------------------------------------------------\n",
+ "x -0.0154 0.0005 -30.0113 0.0000 -0.0164 -0.0144\n",
+ "y 0.0204 0.0011 18.4749 0.0000 0.0182 0.0225\n",
+ "dir -0.0012 0.0001 -9.1840 0.0000 -0.0015 -0.0010\n",
+ "dis 3.7890 0.4708 8.0474 0.0000 2.8662 4.7119\n",
+ "o -0.0008 0.0001 -6.3876 0.0000 -0.0011 -0.0006\n",
+ "s -0.4374 0.0486 -8.9946 0.0000 -0.5327 -0.3421\n",
+ "==================================================================\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "logit_model =sm.Logit(y,X)\n",
+ "result = logit_model.fit()\n",
+ "print(result.summary2())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 97,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "C:\\Users\\andre\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
+ " FutureWarning)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n",
+ " intercept_scaling=1, l1_ratio=None, max_iter=100,\n",
+ " multi_class='warn', n_jobs=None, penalty='l2',\n",
+ " random_state=None, solver='warn', tol=0.0001, verbose=0,\n",
+ " warm_start=False)"
+ ]
+ },
+ "execution_count": 97,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "model = LogisticRegression()\n",
+ "model.fit(X,y)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.4"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}