From 5c7c9aa10285571c477e550d4bb4c6cfcd7749f5 Mon Sep 17 00:00:00 2001 From: Andrew Green <55896483+Green-Andrew@users.noreply.github.com> Date: Sun, 22 Dec 2019 23:04:39 -0500 Subject: [PATCH] Add files via upload --- nfl_kaggle.ipynb | 922 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 922 insertions(+) create mode 100644 nfl_kaggle.ipynb diff --git a/nfl_kaggle.ipynb b/nfl_kaggle.ipynb new file mode 100644 index 0000000..d7c75e5 --- /dev/null +++ b/nfl_kaggle.ipynb @@ -0,0 +1,922 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# NFL 1st and Future - Analytics" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "from scipy import stats\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "injury_record = pd.read_csv(r'C:\\Users\\andre\\Desktop\\Kaggle\\NFL Project\\injuryrecord.csv')\n", + "player_track_data = pd.read_csv(r'C:\\Users\\andre\\Desktop\\Kaggle\\NFL Project\\playertrackdata.csv')\n", + "play_list = pd.read_csv(r'C:\\Users\\andre\\Desktop\\Kaggle\\NFL Project\\playlist.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PlayerKeyGameIDPlayKeyBodyPartSurfaceDM_M1DM_M7DM_M28DM_M42
03987339873-439873-4-32KneeSynthetic1111
14607446074-746074-7-26KneeNatural1100
23655736557-136557-1-70AnkleSynthetic1111
34664646646-346646-3-30AnkleNatural1000
44353243532-543532-5-69AnkleSynthetic1111
\n", + "
" + ], + "text/plain": [ + " PlayerKey GameID PlayKey BodyPart Surface DM_M1 DM_M7 DM_M28 \\\n", + "0 39873 39873-4 39873-4-32 Knee Synthetic 1 1 1 \n", + "1 46074 46074-7 46074-7-26 Knee Natural 1 1 0 \n", + "2 36557 36557-1 36557-1-70 Ankle Synthetic 1 1 1 \n", + "3 46646 46646-3 46646-3-30 Ankle Natural 1 0 0 \n", + "4 43532 43532-5 43532-5-69 Ankle Synthetic 1 1 1 \n", + "\n", + " DM_M42 \n", + "0 1 \n", + "1 0 \n", + "2 1 \n", + "3 0 \n", + "4 1 " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "injury_record.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "76" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "injury_record['PlayKey'].nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEGCAYAAABiq/5QAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAbgUlEQVR4nO3deXRV5b3/8ffXMBbQH0P0MhTjskWFkgQJQ1UsQkHtT0WcEK2CVbGr4NAqLm/1KnX46XK4VLRFoCig0utPVMDeDk6gKCgkGAZB5VpTpaVMToBQGb73j70TDiEJB8g+J+H5vNbK4pzn7H329+wVPnnOs/d+trk7IiISjsOyXYCIiGSWgl9EJDAKfhGRwCj4RUQCo+AXEQlMg2wXkI42bdp4Xl5etssQEalXSkpKNrh7buX2ehH8eXl5FBcXZ7sMEZF6xcz+VlW7hnpERAKj4BcRCYyCX0QkMPVijF/CtH37dlavXs22bduyXUq91qRJEzp06EDDhg2zXYrUEQp+qbNWr15NixYtyMvLw8yyXU695O5s3LiR1atXc8wxx2S7HKkjNNQjdda2bdto3bq1Qv8gmBmtW7fWtybZQ2LBb2bfNrM5ZrbSzN4zs+vj9jFm9nczK41/fpRUDVL/KfQPnvahVJbkUM8O4EZ3X2xmLYASM3s5fm2suz+Y4LZFRKQaifX43X2Nuy+OH28CVgLtk9qeyP6655576NKlC/n5+RQWFvLOO+/s1/pDhw4lPz+fsWPHJlShSDIycnDXzPKAbsA7wMnAKDO7HCgm+lbweRXrjABGAHTs2DETZdZ73UdPy8h2Sh64PCPbSdKCBQv4wx/+wOLFi2ncuDEbNmzgm2++SWvdHTt2sGHDBubPn8/f/lblhZEidVriB3fNrDnwHHCDu38FjAeOBQqBNcBDVa3n7hPdvcjdi3Jz95pqQuSgrFmzhjZt2tC4cWMA2rRpQ7t27cjLy2PDhg0AFBcX07dvXwDGjBnDiBEjGDhwIJdffjkDBw5k3bp1FBYWMm/ePCZNmkSPHj0oKCjg/PPP5+uvvwZg7dq1DB48mIKCAgoKCpg/fz4ATz31FD179qSwsJBrrrmGnTt3Zn4nSLASDX4za0gU+k+7+/MA7r7W3Xe6+y5gEtAzyRpEqjJw4EA+/fRTOnXqxM9+9jNef/31fa5TUlLCrFmzmD59OrNnz+bYY4+ltLSUPn36cN5557Fo0SKWLFnCCSecwOTJkwG47rrr+MEPfsCSJUtYvHgxXbp0YeXKlTzzzDO89dZblJaWkpOTw9NPP530RxapkNhQj0WnEkwGVrr7f6a0t3X3NfHTwcDypGoQqU7z5s0pKSlh3rx5zJkzhyFDhnDffffVuM4555xD06ZNq3xt+fLl3HbbbXzxxRds3ryZ008/HYDXXnuNadOiIbicnByOOOIInnzySUpKSujRowcAW7du5cgjj6zFTydSsyTH+E8GLgOWmVlp3PZLYKiZFQIOlAHXJFiDSLVycnLo27cvffv2pWvXrkydOpUGDRqwa9cugL3OfW/WrFm17zV8+HBmzpxJQUEBU6ZMYe7cudUu6+4MGzaMe++9t1Y+h8j+SvKsnjfd3dw9390L458/uvtl7t41bj8npfcvkjEffPABq1atqnheWlrK0UcfTV5eHiUlJQA899xzab/fpk2baNu2Ldu3b99j2KZ///6MHz8egJ07d/LVV1/Rv39/ZsyYwbp16wD47LPPdJBYMkpX7kqQNm/ezLBhw+jcuTP5+fmsWLGCMWPGcMcdd3D99dfTp08fcnJy0n6/u+66i169ejFgwACOP/74ivaHH36YOXPm0LVrV7p37857771H586dufvuuxk4cCD5+fkMGDCANWvU/5HMMXfPdg37VFRU5LoRy74daqdzrly5khNOOCEj2zrUaV+GycxK3L2ocnu9n6QtU2H3QosHMrIdgI63L8vYtkQkPBrqEREJjIJfRCQwCn4RkcAo+EVEAqPgFxEJTL0/q0fCUdtncKVzWuo999zD9OnTycnJ4bDDDmPChAn06tVrv7Yzc+ZMOnXqROfOnQHo27cvDz74IEVFe51lV6WysjLmz5/PJZdcAkSTx02bNo1x48btVx0i5RT8ItU4mKmbU82cOZOzzjqrIvj3V1lZGdOnT68I/qKiorT/aIhURUM9ItWoaurmlStXMnjw4IplXn75Zc477zwgmvjt1ltvpaCggN69e7N27Vrmz5/P7NmzGT16NIWFhXz00UcAPPvss/Ts2ZNOnToxb948IJrSYfTo0fTo0YP8/HwmTJgAwC233MK8efMoLCxk7NixzJ07l7POOguIrkC+4oor6Nq1K/n5+fs1zYSES8EvUo2qpm7u168fK1euZP369QA88cQTXHHFFQBs2bKF3r17s2TJEk499VQmTZrESSedxDnnnMMDDzxAaWkpxx57LBDdzGXhwoX8+te/5le/+hUAkydP5ogjjmDRokUsWrSISZMm8fHHH3PffffRp08fSktL+fnPf75HjXfddRdHHHEEy5YtY+nSpfTr1y+De0jqKwW/SDXKp26eOHEiubm5DBkyhKlTp3LZZZfx1FNP8cUXX7BgwQLOPPNMABo1alTRE+/evTtlZWXVvnf5t4TU5V566SWmTZtGYWEhvXr1YuPGjXtMJFeVV155hZEjR1Y8b9my5UF8YgmFxvhFalDV1M0TJkzg7LPPpkmTJlx44YU0aBD9N2rYsCHRbSii9Xbs2FHt+5YPH6Uu5+488sgjFXP5l9vXFM/l2xRJl3r8ItWoburmdu3a0a5dO+6++26GDx++z/dp0aIFmzZt2udyp59+OuPHj2f79u0AfPjhh2zZsqXG9QcOHMijjz5a8fzzz/e6fbXIXtTjl3oj0zd537x5M9deey1ffPEFDRo04Dvf+Q4TJ04E4NJLL2X9+vVpnalz8cUXc/XVVzNu3DhmzJhR7XJXXXUVZWVlnHjiibg7ubm5zJw5k/z8fBo0aEBBQQHDhw+nW7duFevcdtttjBw5ku9973vk5ORwxx13VAwjiVSn3k/LrNk5d9O0zJkzatQounXrxpVXXpntUtJSl/elJOeQnZZZJNO6d+9Os2bNeOihh7JdisgBUfCL7KfyWzOK1Fc6uCsiEhgFv4hIYBT8IiKBUfCLiARGB3el3vjkzq61+n7pnDZrZvziF7+oOIPnwQcfZPPmzYwZM6badebOnUujRo046aSTaqtU8vLyKC4upk2bNrX2nhIu9fhFatC4cWOef/55NmzYkPY6c+fOZf78+fu1nZ07d+5vaSIHTMEvUoMGDRowYsQIxo4du9drL774Ir169aJbt2788Ic/ZO3atZSVlfHYY48xduxYCgsLmTdvHsOHD9/jit3mzZsD0R+I0047jUsuuYSuXaNvM+eeey7du3enS5cuFVcJi9Q2DfWI7MPIkSPJz8/n5ptv3qP9lFNO4e2338bM+N3vfsf999/PQw89xE9/+lOaN2/OTTfdBETTLVdn4cKFLF++nGOOOQaAxx9/nFatWrF161Z69OjB+eefT+vWrZP7cBIkBb/IPhx++OFcfvnljBs3jqZNm1a0r169miFDhrBmzRq++eabivDeHz179txjvXHjxvHCCy8A8Omnn7Jq1SoFv9Q6DfWIpOGGG25g8uTJbNmypaLt2muvZdSoUSxbtowJEyawbdu2Ktdt0KABu3btAqJplFNv39isWbOKx3PnzuWVV15hwYIFLFmyhG7dulX7niIHQ8EvkoZWrVpx0UUX7TFs8+WXX9K+fXsApk6dWtFeeRrlvLy8imkeZs2aVTHtcmVffvklLVu25Fvf+hbvv/8+b7/9dhIfRURDPVJ/HOispbXlxhtv3GPu+zFjxnDhhRfSvn17evfuzccffwzA2WefzQUXXMCsWbN45JFHuPrqqxk0aBA9e/akf//+e/TyU51xxhk89thj5Ofnc9xxx9G7d++MfC4Jj4JfpAabN2+ueHzUUUfx9ddfVzwfNGgQgwYN2mudTp06sXTp0j3aUnvv9957L0DFnb3KNW7cmD/96U9V1lHTbRxF9ldiQz1m9m0zm2NmK83sPTO7Pm5vZWYvm9mq+F/dJFREJIOSHOPfAdzo7icAvYGRZtYZuAV41d2/C7waPxcRkQxJLPjdfY27L44fbwJWAu2BQUD5kbCpwLlJ1SD1X324Q1xdp30olWXkrB4zywO6Ae8AR7n7Goj+OABHVrPOCDMrNrPi9evXZ6JMqWOaNGnCxo0bFVwHwd3ZuHEjTZo0yXYpUockfnDXzJoDzwE3uPtXZpbWeu4+EZgI0T13k6tQ6qoOHTqwevVq9If/4DRp0oQOHTpkuwypQxINfjNrSBT6T7v783HzWjNr6+5rzKwtsC7JGqT+atiw4QFdDSsiNUvyrB4DJgMr3f0/U16aDQyLHw8DZiVVg4iI7C3JHv/JwGXAMjMrjdt+CdwH/H8zuxL4BLgwwRpERKSSxILf3d8EqhvQ75/UdkVEpGaaq0dEJDAKfhGRwCj4RUQCo+AXEQmMgl9EJDAKfhGRwCj4RUQCo+AXEQmMgl9EJDAKfhGRwCj4RUQCo+AXEQmMgl9EJDAKfhGRwCj4RUQCo+AXEQmMgl9EJDAKfhGRwCj4RUQCk+TN1uUQ9cmdXTO2rY63L8vYtkRCoR6/iEhgFPwiIoFR8IuIBEbBLyISGAW/iEhgFPwiIoFR8IuIBEbBLyISGAW/iEhgFPwiIoFR8IuIBEbBLyISmMSC38weN7N1ZrY8pW2Mmf3dzErjnx8ltX0REalakj3+KcAZVbSPdffC+OePCW5fRESqkFjwu/sbwGdJvb+IiByYbIzxjzKzpfFQUMssbF9EJGiZDv7xwLFAIbAGeKi6Bc1shJkVm1nx+vXrM1WfiMghL6PB7+5r3X2nu+8CJgE9a1h2orsXuXtRbm5u5ooUETnEZTT4zaxtytPBwPLqlhURkWQkds9dM/s90BdoY2argTuAvmZWCDhQBlyT1PZFRKRqaQW/mb3q7v331ZbK3YdW0Tx5P+sTEZFaVmPwm1kT4FtEvfaWgMUvHQ60S7g2ERFJwL56/NcANxCFfAm7g/8r4DcJ1iUiIgmpMfjd/WHgYTO71t0fyVBNIiKSoLTG+N39ETM7CchLXcfdpyVUl4iIJCTdg7tPEl14VQrsjJsdUPCLiNQz6Z7OWQR0dndPshgREUleuhdwLQf+LclCREQkM9Lt8bcBVpjZQuBf5Y3ufk4iVYmISGLSDf4xSRYhIiKZk+5ZPa8nXYiIiGRGumf1bCI6iwegEdAQ2OLuhydVmIiIJCPdHn+L1Odmdi41TKksIiJ11wFNy+zuM4F+tVyLiIhkQLpDPeelPD2M6Lx+ndMvIlIPpXtWz9kpj3cQzaU/qNarERGRxKU7xn9F0oWIiEhmpDXGb2YdzOwFM1tnZmvN7Dkz65B0cSIiUvvSPbj7BDCbaF7+9sCLcZuIiNQz6QZ/rrs/4e474p8pQG6CdYmISELSDf4NZvZjM8uJf34MbEyyMBERSUa6wf8T4CLgn8Aa4AJAB3xFROqhdE/nvAsY5u6fA5hZK+BBoj8IIiJSj6Tb488vD30Ad/8M6JZMSSIikqR0g/8wM2tZ/iTu8af7bUFEROqQdMP7IWC+mc0gmqrhIuCexKoSEZHEpHvl7jQzKyaamM2A89x9RaKViYhIItIeromDXmEvIlLPHdC0zCIiUn8p+EVEAqPgFxEJjIJfRCQwCn4RkcAo+EVEApNY8JvZ4/GNW5antLUys5fNbFX8b8ua3kNERGpfkj3+KcAZldpuAV519+8Cr8bPRUQkgxILfnd/A/isUvMgYGr8eCpwblLbFxGRqmV6jP8od18DEP97ZHULmtkIMys2s+L169dnrEARkUNdnT246+4T3b3I3Ytyc3WXRxGR2pLp4F9rZm0B4n/XZXj7IiLBy3TwzwaGxY+HAbMyvH0RkeAleTrn74EFwHFmttrMrgTuAwaY2SpgQPxcREQyKLG7aLn70Gpe6p/UNkVEZN/q7MFdERFJhoJfRCQwCn4RkcAo+EVEAqPgFxEJjIJfRCQwCn4RkcAo+EVEAqPgFxEJjIJfRCQwCn4RkcAo+EVEAqPgFxEJjIJfRCQwCn4RkcAo+EVEAqPgFxEJjIJfRCQwCn4RkcAo+EVEAqPgFxEJjIJfRCQwCn4RkcAo+EVEAqPgFxEJjIJfRCQwCn4RkcA0yHYBIpKs7qOnZWQ7JQ9cnpHtyMFTj19EJDAKfhGRwCj4RUQCo+AXEQlMVg7umlkZsAnYCexw96Js1CEiEqJsntVzmrtvyOL2RUSCpKEeEZHAZKvH78BLZubABHefWHkBMxsBjADo2LFjhssTSc8nd3bN2LY63r4sY9uSQ1u2evwnu/uJwJnASDM7tfIC7j7R3YvcvSg3NzfzFYqIHKKyEvzu/o/433XAC0DPbNQhIhKijAe/mTUzsxblj4GBwPJM1yEiEqpsjPEfBbxgZuXbn+7uf85CHSIiQcp48Lv7X4GCTG9XREQiOp1TRCQwCn4RkcAo+EVEAqPgFxEJjIJfRCQwCn4RkcAo+EVEAqPgFxEJjIJfRCQwCn4RkcAo+EVEAqPgFxEJjIJfRCQwCn4RkcAo+EVEAqPgFxEJjIJfRCQwCn4RkcAo+EVEApONm62LJK776GkZ2c4LLTKyGZFapR6/iEhgFPwiIoFR8IuIBEbBLyISGAW/iEhgFPwiIoFR8IuIBEbBLyISGF3AJSK14pM7u2ZsWx1vX5axbR2Iur4v1OMXEQmMgl9EJDAKfhGRwCj4RUQCk5XgN7MzzOwDM/sfM7slGzWIiIQq48FvZjnAb4Azgc7AUDPrnOk6RERClY0ef0/gf9z9r+7+DfBfwKAs1CEiEiRz98xu0OwC4Ax3vyp+fhnQy91HVVpuBDAifnoc8EFGC91bG2BDlmuoK7QvdtO+2E37Yre6si+Odvfcyo3ZuIDLqmjb66+Pu08EJiZfTnrMrNjdi7JdR12gfbGb9sVu2he71fV9kY2hntXAt1OedwD+kYU6RESClI3gXwR818yOMbNGwMXA7CzUISISpIwP9bj7DjMbBfwFyAEed/f3Ml3HAagzw051gPbFbtoXu2lf7Fan90XGD+6KiEh26cpdEZHAKPhFRAITdPCb2eaUxz8ys1Vm1jGbNWWDmQ02Mzez49NYtszM2lTRvrmq5eszM9tpZqUpP3kH8B6FZvaj2q8uM8ysdcrn/6eZ/T3leaNs15dplX/PzWy4mT16gO/V18z+UDuV7R/diAUws/7AI8BAd/8k2/VkwVDgTaIzrMZkt5Q6Zau7Fx7kexQCRcAfa6GejHP3jUSfATMbA2x29wezWpQctKB7/ABm1geYBPxfd/8obptiZuPMbL6Z/TW+2rh8+dFmtsjMlprZr1Laf2xmC+Oe0IR4TqI6z8yaAycDVxIFf3lPZK6ZzTCz983saTOzSus1NbM/m9nVVbxnlfvoUGBmTczsCTNbZmbvmtlp1bXHPeI7gSHx78WQ7FZfu8zsZjNbHv9cm9I+LOX/wm/N7DAza2BmT8b7Z7mZXZfN2pNgZrlm9lz8u7/IzE6O25uZ2eNx27tmlvUpakLv8TcGZgF93f39Sq+1BU4Bjie6zmCGmQ0Evks035ABs83sVGA9MAQ42d23m9lvgUuBaZn5GAflXODP7v6hmX1mZifG7d2ALkQX171F9Mfhzfi15kRzLE1z9z0+Y3X7yN3fSP6j1LqmZlYaP/7Y3QcDIwHcvWs8NPaSmXWqqh3oBNwOFFWekqS+M7OeRL/jPYlOy15oZq8Du4DBwEnxqdsTiToUHwFt3L1rvP7/yU7lBy31dwKgFbuvQ3oYGOvub8ZDxn8BTgBuBV5z95/En3uhmb2S0aorCT34twPziXq711d6baa77wJWmNlRcdvA+Ofd+HlzopDLB7oDi+KOcVNgXbKl15qhwK/jx/8VP/9vYKG7rwaIf9Hz2B38s4D73f3pKt6vun1UH4O/qqGeU4iGBXH3983sb0QBX137oaoP8Jy7fw1gZjOJ9kEjoAdQnPJ/4VOiEDzOzB4mGvZ6KRtF14I9fifMbDjRUB7AD4HOKV+ODzezFkT/H84xs5vi9iZAVo8lhh78u4CLgFfM7Jfu/v9SXvtXymNL+fded5+Q+ibx19yp7v7viVZby8ysNdAP+J6ZOVHPzYn+Y6Z+/p3s+bvyFnCmmU33vS8EqXIfHUKqmmuqpvZDVU374XF3/4+9XjDLJ5qO/TrgfHZPwnioOAz4vrtvTW2Mh0nPd/cPKrUfRZYEP8Yf91jOAi41syv3sfhfgJ/E4+KYWXszOxJ4FbggfoyZtTKzo5Osu5ZcQDRcc7S757n7t4GPiXpuNbkd2Aj8torXqttHh4o3iIY4iId4OhLNHFtd+yagRVYqTdYbwOD4WE9zoqnV5wGvABdZfOaXRWcFdTSzXKILRp8F7gBOrO6N67GXgIohPTMr/2bwF+Da8uNkZtYtC7XtIfjgB3D3z4AzgNtqOvDi7i8B04EFZrYMmAG0cPcVwG1E471LgZeJjhHUdUOBFyq1PQdcksa6NwBNzOz+1Mbq9lEt1FpX/BbIiT/bM8Bwd/9XDe1ziL7+H1IHd919IfB7orm33gbGu/syd18G/IroW/RSojA8imhixjfiYcNJwC+zU3mirgOK4pMaVgA/jdvvAhoCS81sefw8qzRlg4hIYNTjFxEJjIJfRCQwCn4RkcAo+EVEAqPgFxEJjIJfgmG7Z9tcYmaLzeyk/Vx/iqXM21TNMmXxfDRLzOwlM/u3/dzGcDNrtz/riOwvBb+EZKu7F7p7AfDvwL0Jbee0eBvF7Mf56hZN7DccUPBLohT8EqrDgc8huqTezB6IZ41cVn6hVdz+qJmtMLP/BsqvzO5vZhUXvpnZADN7voptvAF8J15mvJkVm9l7tuesrmVmdruZvUl0QV0R8HT8zaRpQp9dAhf6XD0SlvKZFZsQXVndL24/j2jO+QKgDdFke28A3weOA7oSXX26AngceA34jZnluvt64ArgiSq2dxawLH58q7t/FvfqXzWzfHdfGr+2zd1PATCzq4Cb3L24Nj+4SCr1+CUk5UM9xxNN0TEtnj/lFOD37r7T3dcCrxPNMHlqSvs/iAKfeGK6J4Efx9Psfh/4U8p25sR/YA5n93DSRWa2mGjW0i5A55Tln0no84pUST1+CZK7L4gnEsul5pk1q5vT5AngRWAb8Ky770h57TR331D+xMyOAW4Cerj752Y2hehbR7ktB/ARRA6YevwSpPhmKTlEs4y+QXSXrJx4FslTgYVx+8Vxe1vgtPL1428A/yCanG/KPjZ3OFG4fxlPxXtmDcseqrN5Sh2iHr+EJPXuSQYMc/ed8YHa7wNLiHr4N7v7P+P2fkTj9B8SDQGlehrIjWdnrZa7LzGzd4H3gL8S3c+gOlOAx8xsK1XM7S5SGzQ7p8gBMrNHgXfdfXK2axHZHwp+kQNgZiVEwzcD4nn3ReoNBb+ISGB0cFdEJDAKfhGRwCj4RUQCo+AXEQmMgl9EJDD/C3+wINftG2w9AAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "#quick look at the difference in number of injuries and surface\n", + "sns.countplot(x='BodyPart', hue='Surface', data = injury_record)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PlayKeytimeeventxydirdisos
026624-1-10.0huddle_start_offense87.4628.93288.240.01262.330.13
126624-1-10.1NaN87.4528.92283.910.01261.690.12
226624-1-10.2NaN87.4428.92280.400.01261.170.12
326624-1-10.3NaN87.4428.92278.790.01260.660.10
426624-1-10.4NaN87.4428.92275.440.01260.270.09
\n", + "
" + ], + "text/plain": [ + " PlayKey time event x y dir dis o \\\n", + "0 26624-1-1 0.0 huddle_start_offense 87.46 28.93 288.24 0.01 262.33 \n", + "1 26624-1-1 0.1 NaN 87.45 28.92 283.91 0.01 261.69 \n", + "2 26624-1-1 0.2 NaN 87.44 28.92 280.40 0.01 261.17 \n", + "3 26624-1-1 0.3 NaN 87.44 28.92 278.79 0.01 260.66 \n", + "4 26624-1-1 0.4 NaN 87.44 28.92 275.44 0.01 260.27 \n", + "\n", + " s \n", + "0 0.13 \n", + "1 0.12 \n", + "2 0.12 \n", + "3 0.10 \n", + "4 0.09 " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "player_track_data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "266960" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#number of different plays in the dataframe\n", + "player_track_data['PlayKey'].nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "ball_snap 253271\n", + "line_set 247189\n", + "huddle_break_offense 175415\n", + "tackle 149626\n", + "first_contact 140437\n", + " ... \n", + "end_path 8\n", + "xp_fake 5\n", + "drop_kick 5\n", + "extra_point_fake 5\n", + "play_submit\\t 2\n", + "Name: event, Length: 80, dtype: int64" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# number of different events(play types) in the data frame\n", + "player_track_data['event'].value_counts()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Below, join injury_recrod and player_track record to process data so each bodypart is grouped individually" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "22223" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "record_track_merge = injury_record.join(player_track_data.set_index('PlayKey'),on='PlayKey')\n", + "record_track_merge.shape[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PlayerKeyGameIDPlayKeyBodyPartSurfaceDM_M1DM_M7DM_M28DM_M42timeeventxydirdisos
03987339873-439873-4-32KneeSynthetic11110.0punt_play56.7317.30356.140.18249.951.83
03987339873-439873-4-32KneeSynthetic11110.1NaN56.7417.48355.730.18248.491.82
03987339873-439873-4-32KneeSynthetic11110.2NaN56.7217.66355.560.18246.951.81
03987339873-439873-4-32KneeSynthetic11110.3NaN56.6917.84355.840.18245.091.79
03987339873-439873-4-32KneeSynthetic11110.4NaN56.6818.02356.010.18243.021.76
\n", + "
" + ], + "text/plain": [ + " PlayerKey GameID PlayKey BodyPart Surface DM_M1 DM_M7 DM_M28 \\\n", + "0 39873 39873-4 39873-4-32 Knee Synthetic 1 1 1 \n", + "0 39873 39873-4 39873-4-32 Knee Synthetic 1 1 1 \n", + "0 39873 39873-4 39873-4-32 Knee Synthetic 1 1 1 \n", + "0 39873 39873-4 39873-4-32 Knee Synthetic 1 1 1 \n", + "0 39873 39873-4 39873-4-32 Knee Synthetic 1 1 1 \n", + "\n", + " DM_M42 time event x y dir dis o s \n", + "0 1 0.0 punt_play 56.73 17.30 356.14 0.18 249.95 1.83 \n", + "0 1 0.1 NaN 56.74 17.48 355.73 0.18 248.49 1.82 \n", + "0 1 0.2 NaN 56.72 17.66 355.56 0.18 246.95 1.81 \n", + "0 1 0.3 NaN 56.69 17.84 355.84 0.18 245.09 1.79 \n", + "0 1 0.4 NaN 56.68 18.02 356.01 0.18 243.02 1.76 " + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#group plays by bodypart\n", + "record_track_merge.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Knee 10549\n", + "Ankle 9942\n", + "Foot 1724\n", + "Toes 7\n", + "Heel 1\n", + "Name: BodyPart, dtype: int64" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "record_track_merge['BodyPart'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "#remove toes and heel body part as they dont contain data from player_track_data\n", + "knee_inj = record_track_merge.loc[record_track_merge['BodyPart'] == 'Knee']\n", + "ankle_inj = record_track_merge.loc[record_track_merge['BodyPart'] == 'Ankle']\n", + "foot_inj = record_track_merge.loc[record_track_merge['BodyPart'] == 'Foot']" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "#concatenate dataframes\n", + "injury_track_data = pd.concat([knee_inj, ankle_inj], ignore_index=True)\n", + "injury_track_data = pd.concat([injury_track_data, foot_inj], ignore_index=True) " + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Knee 10549\n", + "Ankle 9942\n", + "Foot 1724\n", + "Name: BodyPart, dtype: int64" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "injury_track_data['BodyPart'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "#join play file to injury track data\n", + "inj_data_full = injury_track_data.merge(play_list, on='PlayKey', how='left')" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['PlayerKey_x',\n", + " 'GameID_x',\n", + " 'PlayKey',\n", + " 'BodyPart',\n", + " 'Surface',\n", + " 'DM_M1',\n", + " 'DM_M7',\n", + " 'DM_M28',\n", + " 'DM_M42',\n", + " 'time',\n", + " 'event',\n", + " 'x',\n", + " 'y',\n", + " 'dir',\n", + " 'dis',\n", + " 'o',\n", + " 's',\n", + " 'PlayerKey_y',\n", + " 'GameID_y',\n", + " 'RosterPosition',\n", + " 'PlayerDay',\n", + " 'PlayerGame',\n", + " 'StadiumType',\n", + " 'FieldType',\n", + " 'Temperature',\n", + " 'Weather',\n", + " 'PlayType',\n", + " 'PlayerGamePlay',\n", + " 'Position',\n", + " 'PositionGroup']" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "list(inj_data_full.columns)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Removing Columns that are repeated with different names" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "#remove playerkey_y and gameid_y\n", + "inj_data_full = inj_data_full.drop(['PlayerKey_y', 'GameID_y'], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "inj_data_full = inj_data_full.drop(['event'], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "inj_data_full = inj_data_full.drop(['FieldType'], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "inj_data_full = inj_data_full.drop(['Position'], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "inj_data_full = inj_data_full.drop(['PositionGroup'], axis=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Binary Logistic Regression" + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn import metrics\n", + "import statsmodels.api as sm" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "metadata": {}, + "outputs": [], + "source": [ + "XY = inj_data_full[['x', 'y', 'dir', 'dis', 'o', 's','DM_M42']].dropna()" + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "metadata": {}, + "outputs": [], + "source": [ + "y = XY['DM_M42']\n", + "X = XY[['x', 'y', 'dir', 'dis', 'o', 's']]" + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Optimization terminated successfully.\n", + " Current function value: 0.602531\n", + " Iterations 5\n", + " Results: Logit\n", + "==================================================================\n", + "Model: Logit Pseudo R-squared: 0.039 \n", + "Dependent Variable: DM_M42 AIC: 26758.3622 \n", + "Date: 2019-12-22 22:40 BIC: 26806.4079 \n", + "No. Observations: 22195 Log-Likelihood: -13373. \n", + "Df Model: 5 LL-Null: -13913. \n", + "Df Residuals: 22189 LLR p-value: 3.2133e-231\n", + "Converged: 1.0000 Scale: 1.0000 \n", + "No. Iterations: 5.0000 \n", + "--------------------------------------------------------------------\n", + " Coef. Std.Err. z P>|z| [0.025 0.975]\n", + "--------------------------------------------------------------------\n", + "x -0.0154 0.0005 -30.0113 0.0000 -0.0164 -0.0144\n", + "y 0.0204 0.0011 18.4749 0.0000 0.0182 0.0225\n", + "dir -0.0012 0.0001 -9.1840 0.0000 -0.0015 -0.0010\n", + "dis 3.7890 0.4708 8.0474 0.0000 2.8662 4.7119\n", + "o -0.0008 0.0001 -6.3876 0.0000 -0.0011 -0.0006\n", + "s -0.4374 0.0486 -8.9946 0.0000 -0.5327 -0.3421\n", + "==================================================================\n", + "\n" + ] + } + ], + "source": [ + "logit_model =sm.Logit(y,X)\n", + "result = logit_model.fit()\n", + "print(result.summary2())" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\andre\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", + " FutureWarning)\n" + ] + }, + { + "data": { + "text/plain": [ + "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n", + " intercept_scaling=1, l1_ratio=None, max_iter=100,\n", + " multi_class='warn', n_jobs=None, penalty='l2',\n", + " random_state=None, solver='warn', tol=0.0001, verbose=0,\n", + " warm_start=False)" + ] + }, + "execution_count": 97, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model = LogisticRegression()\n", + "model.fit(X,y)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}