diff --git a/.ipynb_checkpoints/1. Classifying Partial Permits-checkpoint.ipynb b/.ipynb_checkpoints/1. Classifying Partial Permits-checkpoint.ipynb index 7cef37e..c804982 100644 --- a/.ipynb_checkpoints/1. Classifying Partial Permits-checkpoint.ipynb +++ b/.ipynb_checkpoints/1. Classifying Partial Permits-checkpoint.ipynb @@ -19,7 +19,7 @@ }, { "cell_type": "code", - "execution_count": 342, + "execution_count": 382, "metadata": {}, "outputs": [], "source": [ @@ -39,7 +39,7 @@ }, { "cell_type": "code", - "execution_count": 343, + "execution_count": 383, "metadata": {}, "outputs": [], "source": [ @@ -57,7 +57,7 @@ }, { "cell_type": "code", - "execution_count": 344, + "execution_count": 384, "metadata": {}, "outputs": [ { @@ -1520,7 +1520,7 @@ "[75891 rows x 16 columns]" ] }, - "execution_count": 344, + "execution_count": 384, "metadata": {}, "output_type": "execute_result" } @@ -1545,7 +1545,7 @@ }, { "cell_type": "code", - "execution_count": 345, + "execution_count": 385, "metadata": {}, "outputs": [ { @@ -2884,7 +2884,7 @@ "[75891 rows x 14 columns]" ] }, - "execution_count": 345, + "execution_count": 385, "metadata": {}, "output_type": "execute_result" } @@ -2905,7 +2905,7 @@ }, { "cell_type": "code", - "execution_count": 346, + "execution_count": 386, "metadata": {}, "outputs": [ { @@ -4244,7 +4244,7 @@ "[75891 rows x 14 columns]" ] }, - "execution_count": 346, + "execution_count": 386, "metadata": {}, "output_type": "execute_result" } @@ -4265,7 +4265,7 @@ }, { "cell_type": "code", - "execution_count": 347, + "execution_count": 387, "metadata": {}, "outputs": [ { @@ -4301,7 +4301,7 @@ }, { "cell_type": "code", - "execution_count": 348, + "execution_count": 388, "metadata": {}, "outputs": [ { @@ -4371,7 +4371,7 @@ "Name: Purpose, Length: 75891, dtype: object" ] }, - "execution_count": 348, + "execution_count": 388, "metadata": {}, "output_type": "execute_result" } @@ -4391,7 +4391,7 @@ }, { "cell_type": "code", - "execution_count": 349, + "execution_count": 389, "metadata": {}, "outputs": [ { @@ -5417,7 +5417,7 @@ "[75891 rows x 12 columns]" ] }, - "execution_count": 349, + "execution_count": 389, "metadata": {}, "output_type": "execute_result" } @@ -5437,27 +5437,3577 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "data = pd.get_dummies(dataframe, columns=columns)\n", - "\n", - "data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Our data is looking better, but to make things easier on our model, we can scale everything to between 0-1..." - ] - }, - { - "cell_type": "code", - "execution_count": null, + "execution_count": 390, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Importer reported quantityExporter reported quantityTaxon_Abronia gramineaTaxon_Acampe papillosaTaxon_Acampe praemorsaTaxon_Acampe rigidaTaxon_Acampe spp.Taxon_Acanthastrea amakusensisTaxon_Acanthastrea bowerbankiTaxon_Acanthastrea echinata...Source_ASource_CSource_DSource_FSource_ISource_OSource_RSource_USource_WSource_X
00.001.000000000...0100000000
10.001.000000000...0000010000
20.0043.000000000...0000000010
30.0043.000000000...0000000010
4700.000.000000000...0000000010
50.001.000000000...0000010000
60.0012.000000000...0100000000
70.004.000000000...0000000100
80.002.000000000...0000000010
90.004.000000000...0100000000
100.003.000000000...0010000000
11100.000.000000000...0000100000
120.001.000000000...0010000000
130.002.000000000...0100000000
141.001.000000000...0001000000
150.002.000000000...0100000000
161.000.000000000...0100000000
170.001.000000000...0100000000
180.001.000000000...0000000100
190.001.000000000...0100000000
200.001.000000000...0100000000
215.005.000000000...0100000000
220.001.000000000...0000000100
2310.000.000000000...0001000000
240.003.000000000...0000100000
252.000.000000000...0000100000
261.001.000000000...0100000000
271.002.000000000...0100000000
281.000.000000000...0100000000
292.000.000000000...0000100000
..................................................................
758610.0022.000000000...0000000010
758620.0016.000000000...0000000010
758631.000.000000000...0000000010
75864459.000.000000000...0000000010
758651.000.000000000...0100000000
758661.000.000000000...0000000010
758672.000.000000000...0000000010
758682.000.000000000...0000000010
758698.000.000000000...0000000010
758700.008.000000000...0000000010
758710.0021.000000000...0100000000
758720.001.000000000...0000000010
758730.001500.000000000...1000000000
758740.002.000000000...0000000010
758750.001500.000000000...1000000000
758760.0050.000000000...0000000010
758770.001.000000000...0000000010
758780.007.000000000...0000000010
758790.002.000000000...0001000000
7588019.550.000000000...0000000010
758810.005.000000000...0000000010
758820.0096.000000000...0000000010
758830.00452.000000000...0000000010
7588420.300.000000000...1000000000
7588517.660.000000000...1000000000
758868.670.000000000...1000000000
758870.0020.300000000...0000000010
75888200.000.000000000...0001000000
758890.00635.100000000...0000001000
758900.00480.000000000...0000001000
\n", + "

75891 rows × 9400 columns

\n", + "
" + ], + "text/plain": [ + " Importer reported quantity Exporter reported quantity \\\n", + "0 0.00 1.0 \n", + "1 0.00 1.0 \n", + "2 0.00 43.0 \n", + "3 0.00 43.0 \n", + "4 700.00 0.0 \n", + "5 0.00 1.0 \n", + "6 0.00 12.0 \n", + "7 0.00 4.0 \n", + "8 0.00 2.0 \n", + "9 0.00 4.0 \n", + "10 0.00 3.0 \n", + "11 100.00 0.0 \n", + "12 0.00 1.0 \n", + "13 0.00 2.0 \n", + "14 1.00 1.0 \n", + "15 0.00 2.0 \n", + "16 1.00 0.0 \n", + "17 0.00 1.0 \n", + "18 0.00 1.0 \n", + "19 0.00 1.0 \n", + "20 0.00 1.0 \n", + "21 5.00 5.0 \n", + "22 0.00 1.0 \n", + "23 10.00 0.0 \n", + "24 0.00 3.0 \n", + "25 2.00 0.0 \n", + "26 1.00 1.0 \n", + "27 1.00 2.0 \n", + "28 1.00 0.0 \n", + "29 2.00 0.0 \n", + "... ... ... \n", + "75861 0.00 22.0 \n", + "75862 0.00 16.0 \n", + "75863 1.00 0.0 \n", + "75864 459.00 0.0 \n", + "75865 1.00 0.0 \n", + "75866 1.00 0.0 \n", + "75867 2.00 0.0 \n", + "75868 2.00 0.0 \n", + "75869 8.00 0.0 \n", + "75870 0.00 8.0 \n", + "75871 0.00 21.0 \n", + "75872 0.00 1.0 \n", + "75873 0.00 1500.0 \n", + "75874 0.00 2.0 \n", + "75875 0.00 1500.0 \n", + "75876 0.00 50.0 \n", + "75877 0.00 1.0 \n", + "75878 0.00 7.0 \n", + "75879 0.00 2.0 \n", + "75880 19.55 0.0 \n", + "75881 0.00 5.0 \n", + "75882 0.00 96.0 \n", + "75883 0.00 452.0 \n", + "75884 20.30 0.0 \n", + "75885 17.66 0.0 \n", + "75886 8.67 0.0 \n", + "75887 0.00 20.3 \n", + "75888 200.00 0.0 \n", + "75889 0.00 635.1 \n", + "75890 0.00 480.0 \n", + "\n", + " Taxon_Abronia graminea Taxon_Acampe papillosa Taxon_Acampe praemorsa \\\n", + "0 0 0 0 \n", + "1 0 0 0 \n", + "2 0 0 0 \n", + "3 0 0 0 \n", + "4 0 0 0 \n", + "5 0 0 0 \n", + "6 0 0 0 \n", + "7 0 0 0 \n", + "8 0 0 0 \n", + "9 0 0 0 \n", + "10 0 0 0 \n", + "11 0 0 0 \n", + "12 0 0 0 \n", + "13 0 0 0 \n", + "14 0 0 0 \n", + "15 0 0 0 \n", + "16 0 0 0 \n", + "17 0 0 0 \n", + "18 0 0 0 \n", + "19 0 0 0 \n", + "20 0 0 0 \n", + "21 0 0 0 \n", + "22 0 0 0 \n", + "23 0 0 0 \n", + "24 0 0 0 \n", + "25 0 0 0 \n", + "26 0 0 0 \n", + "27 0 0 0 \n", + "28 0 0 0 \n", + "29 0 0 0 \n", + "... ... ... ... \n", + "75861 0 0 0 \n", + "75862 0 0 0 \n", + "75863 0 0 0 \n", + "75864 0 0 0 \n", + "75865 0 0 0 \n", + "75866 0 0 0 \n", + "75867 0 0 0 \n", + "75868 0 0 0 \n", + "75869 0 0 0 \n", + "75870 0 0 0 \n", + "75871 0 0 0 \n", + "75872 0 0 0 \n", + "75873 0 0 0 \n", + "75874 0 0 0 \n", + "75875 0 0 0 \n", + "75876 0 0 0 \n", + "75877 0 0 0 \n", + "75878 0 0 0 \n", + "75879 0 0 0 \n", + "75880 0 0 0 \n", + "75881 0 0 0 \n", + "75882 0 0 0 \n", + "75883 0 0 0 \n", + "75884 0 0 0 \n", + "75885 0 0 0 \n", + "75886 0 0 0 \n", + "75887 0 0 0 \n", + "75888 0 0 0 \n", + "75889 0 0 0 \n", + "75890 0 0 0 \n", + "\n", + " Taxon_Acampe rigida Taxon_Acampe spp. Taxon_Acanthastrea amakusensis \\\n", + "0 0 0 0 \n", + "1 0 0 0 \n", + "2 0 0 0 \n", + "3 0 0 0 \n", + "4 0 0 0 \n", + "5 0 0 0 \n", + "6 0 0 0 \n", + "7 0 0 0 \n", + "8 0 0 0 \n", + "9 0 0 0 \n", + "10 0 0 0 \n", + "11 0 0 0 \n", + "12 0 0 0 \n", + "13 0 0 0 \n", + "14 0 0 0 \n", + "15 0 0 0 \n", + "16 0 0 0 \n", + "17 0 0 0 \n", + "18 0 0 0 \n", + "19 0 0 0 \n", + "20 0 0 0 \n", + "21 0 0 0 \n", + "22 0 0 0 \n", + "23 0 0 0 \n", + "24 0 0 0 \n", + "25 0 0 0 \n", + "26 0 0 0 \n", + "27 0 0 0 \n", + "28 0 0 0 \n", + "29 0 0 0 \n", + "... ... ... ... \n", + "75861 0 0 0 \n", + "75862 0 0 0 \n", + "75863 0 0 0 \n", + "75864 0 0 0 \n", + "75865 0 0 0 \n", + "75866 0 0 0 \n", + "75867 0 0 0 \n", + "75868 0 0 0 \n", + "75869 0 0 0 \n", + "75870 0 0 0 \n", + "75871 0 0 0 \n", + "75872 0 0 0 \n", + "75873 0 0 0 \n", + "75874 0 0 0 \n", + "75875 0 0 0 \n", + "75876 0 0 0 \n", + "75877 0 0 0 \n", + "75878 0 0 0 \n", + "75879 0 0 0 \n", + "75880 0 0 0 \n", + "75881 0 0 0 \n", + "75882 0 0 0 \n", + "75883 0 0 0 \n", + "75884 0 0 0 \n", + "75885 0 0 0 \n", + "75886 0 0 0 \n", + "75887 0 0 0 \n", + "75888 0 0 0 \n", + "75889 0 0 0 \n", + "75890 0 0 0 \n", + "\n", + " Taxon_Acanthastrea bowerbanki Taxon_Acanthastrea echinata ... \\\n", + "0 0 0 ... \n", + "1 0 0 ... \n", + "2 0 0 ... \n", + "3 0 0 ... \n", + "4 0 0 ... \n", + "5 0 0 ... \n", + "6 0 0 ... \n", + "7 0 0 ... \n", + "8 0 0 ... \n", + "9 0 0 ... \n", + "10 0 0 ... \n", + "11 0 0 ... \n", + "12 0 0 ... \n", + "13 0 0 ... \n", + "14 0 0 ... \n", + "15 0 0 ... \n", + "16 0 0 ... \n", + "17 0 0 ... \n", + "18 0 0 ... \n", + "19 0 0 ... \n", + "20 0 0 ... \n", + "21 0 0 ... \n", + "22 0 0 ... \n", + "23 0 0 ... \n", + "24 0 0 ... \n", + "25 0 0 ... \n", + "26 0 0 ... \n", + "27 0 0 ... \n", + "28 0 0 ... \n", + "29 0 0 ... \n", + "... ... ... ... \n", + "75861 0 0 ... \n", + "75862 0 0 ... \n", + "75863 0 0 ... \n", + "75864 0 0 ... \n", + "75865 0 0 ... \n", + "75866 0 0 ... \n", + "75867 0 0 ... \n", + "75868 0 0 ... \n", + "75869 0 0 ... \n", + "75870 0 0 ... \n", + "75871 0 0 ... \n", + "75872 0 0 ... \n", + "75873 0 0 ... \n", + "75874 0 0 ... \n", + "75875 0 0 ... \n", + "75876 0 0 ... \n", + "75877 0 0 ... \n", + "75878 0 0 ... \n", + "75879 0 0 ... \n", + "75880 0 0 ... \n", + "75881 0 0 ... \n", + "75882 0 0 ... \n", + "75883 0 0 ... \n", + "75884 0 0 ... \n", + "75885 0 0 ... \n", + "75886 0 0 ... \n", + "75887 0 0 ... \n", + "75888 0 0 ... \n", + "75889 0 0 ... \n", + "75890 0 0 ... \n", + "\n", + " Source_A Source_C Source_D Source_F Source_I Source_O Source_R \\\n", + "0 0 1 0 0 0 0 0 \n", + "1 0 0 0 0 0 1 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "5 0 0 0 0 0 1 0 \n", + "6 0 1 0 0 0 0 0 \n", + "7 0 0 0 0 0 0 0 \n", + "8 0 0 0 0 0 0 0 \n", + "9 0 1 0 0 0 0 0 \n", + "10 0 0 1 0 0 0 0 \n", + "11 0 0 0 0 1 0 0 \n", + "12 0 0 1 0 0 0 0 \n", + "13 0 1 0 0 0 0 0 \n", + "14 0 0 0 1 0 0 0 \n", + "15 0 1 0 0 0 0 0 \n", + "16 0 1 0 0 0 0 0 \n", + "17 0 1 0 0 0 0 0 \n", + "18 0 0 0 0 0 0 0 \n", + "19 0 1 0 0 0 0 0 \n", + "20 0 1 0 0 0 0 0 \n", + "21 0 1 0 0 0 0 0 \n", + "22 0 0 0 0 0 0 0 \n", + "23 0 0 0 1 0 0 0 \n", + "24 0 0 0 0 1 0 0 \n", + "25 0 0 0 0 1 0 0 \n", + "26 0 1 0 0 0 0 0 \n", + "27 0 1 0 0 0 0 0 \n", + "28 0 1 0 0 0 0 0 \n", + "29 0 0 0 0 1 0 0 \n", + "... ... ... ... ... ... ... ... \n", + "75861 0 0 0 0 0 0 0 \n", + "75862 0 0 0 0 0 0 0 \n", + "75863 0 0 0 0 0 0 0 \n", + "75864 0 0 0 0 0 0 0 \n", + "75865 0 1 0 0 0 0 0 \n", + "75866 0 0 0 0 0 0 0 \n", + "75867 0 0 0 0 0 0 0 \n", + "75868 0 0 0 0 0 0 0 \n", + "75869 0 0 0 0 0 0 0 \n", + "75870 0 0 0 0 0 0 0 \n", + "75871 0 1 0 0 0 0 0 \n", + "75872 0 0 0 0 0 0 0 \n", + "75873 1 0 0 0 0 0 0 \n", + "75874 0 0 0 0 0 0 0 \n", + "75875 1 0 0 0 0 0 0 \n", + "75876 0 0 0 0 0 0 0 \n", + "75877 0 0 0 0 0 0 0 \n", + "75878 0 0 0 0 0 0 0 \n", + "75879 0 0 0 1 0 0 0 \n", + "75880 0 0 0 0 0 0 0 \n", + "75881 0 0 0 0 0 0 0 \n", + "75882 0 0 0 0 0 0 0 \n", + "75883 0 0 0 0 0 0 0 \n", + "75884 1 0 0 0 0 0 0 \n", + "75885 1 0 0 0 0 0 0 \n", + "75886 1 0 0 0 0 0 0 \n", + "75887 0 0 0 0 0 0 0 \n", + "75888 0 0 0 1 0 0 0 \n", + "75889 0 0 0 0 0 0 1 \n", + "75890 0 0 0 0 0 0 1 \n", + "\n", + " Source_U Source_W Source_X \n", + "0 0 0 0 \n", + "1 0 0 0 \n", + "2 0 1 0 \n", + "3 0 1 0 \n", + "4 0 1 0 \n", + "5 0 0 0 \n", + "6 0 0 0 \n", + "7 1 0 0 \n", + "8 0 1 0 \n", + "9 0 0 0 \n", + "10 0 0 0 \n", + "11 0 0 0 \n", + "12 0 0 0 \n", + "13 0 0 0 \n", + "14 0 0 0 \n", + "15 0 0 0 \n", + "16 0 0 0 \n", + "17 0 0 0 \n", + "18 1 0 0 \n", + "19 0 0 0 \n", + "20 0 0 0 \n", + "21 0 0 0 \n", + "22 1 0 0 \n", + "23 0 0 0 \n", + "24 0 0 0 \n", + "25 0 0 0 \n", + "26 0 0 0 \n", + "27 0 0 0 \n", + "28 0 0 0 \n", + "29 0 0 0 \n", + "... ... ... ... \n", + "75861 0 1 0 \n", + "75862 0 1 0 \n", + "75863 0 1 0 \n", + "75864 0 1 0 \n", + "75865 0 0 0 \n", + "75866 0 1 0 \n", + "75867 0 1 0 \n", + "75868 0 1 0 \n", + "75869 0 1 0 \n", + "75870 0 1 0 \n", + "75871 0 0 0 \n", + "75872 0 1 0 \n", + "75873 0 0 0 \n", + "75874 0 1 0 \n", + "75875 0 0 0 \n", + "75876 0 1 0 \n", + "75877 0 1 0 \n", + "75878 0 1 0 \n", + "75879 0 0 0 \n", + "75880 0 1 0 \n", + "75881 0 1 0 \n", + "75882 0 1 0 \n", + "75883 0 1 0 \n", + "75884 0 0 0 \n", + "75885 0 0 0 \n", + "75886 0 0 0 \n", + "75887 0 1 0 \n", + "75888 0 0 0 \n", + "75889 0 0 0 \n", + "75890 0 0 0 \n", + "\n", + "[75891 rows x 9400 columns]" + ] + }, + "execution_count": 390, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = pd.get_dummies(dataframe, columns=columns)\n", + "\n", + "data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Our data is looking better, but to make things easier on our model, we can scale everything to between 0-1..." + ] + }, + { + "cell_type": "code", + "execution_count": 391, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0123456789...9390939193929393939493959396939793989399
00.000000e+004.641746e-080.00.00.00.00.00.00.00.0...0.01.00.00.00.00.00.00.00.00.0
10.000000e+004.641746e-080.00.00.00.00.00.00.00.0...0.00.00.00.00.01.00.00.00.00.0
20.000000e+001.995951e-060.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00.0
30.000000e+001.995951e-060.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00.0
43.585151e-050.000000e+000.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00.0
50.000000e+004.641746e-080.00.00.00.00.00.00.00.0...0.00.00.00.00.01.00.00.00.00.0
60.000000e+005.570095e-070.00.00.00.00.00.00.00.0...0.01.00.00.00.00.00.00.00.00.0
70.000000e+001.856698e-070.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.01.00.00.0
80.000000e+009.283492e-080.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00.0
90.000000e+001.856698e-070.00.00.00.00.00.00.00.0...0.01.00.00.00.00.00.00.00.00.0
100.000000e+001.392524e-070.00.00.00.00.00.00.00.0...0.00.01.00.00.00.00.00.00.00.0
115.121645e-060.000000e+000.00.00.00.00.00.00.00.0...0.00.00.00.01.00.00.00.00.00.0
120.000000e+004.641746e-080.00.00.00.00.00.00.00.0...0.00.01.00.00.00.00.00.00.00.0
130.000000e+009.283492e-080.00.00.00.00.00.00.00.0...0.01.00.00.00.00.00.00.00.00.0
145.121645e-084.641746e-080.00.00.00.00.00.00.00.0...0.00.00.01.00.00.00.00.00.00.0
150.000000e+009.283492e-080.00.00.00.00.00.00.00.0...0.01.00.00.00.00.00.00.00.00.0
165.121645e-080.000000e+000.00.00.00.00.00.00.00.0...0.01.00.00.00.00.00.00.00.00.0
170.000000e+004.641746e-080.00.00.00.00.00.00.00.0...0.01.00.00.00.00.00.00.00.00.0
180.000000e+004.641746e-080.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.01.00.00.0
190.000000e+004.641746e-080.00.00.00.00.00.00.00.0...0.01.00.00.00.00.00.00.00.00.0
200.000000e+004.641746e-080.00.00.00.00.00.00.00.0...0.01.00.00.00.00.00.00.00.00.0
212.560822e-072.320873e-070.00.00.00.00.00.00.00.0...0.01.00.00.00.00.00.00.00.00.0
220.000000e+004.641746e-080.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.01.00.00.0
235.121645e-070.000000e+000.00.00.00.00.00.00.00.0...0.00.00.01.00.00.00.00.00.00.0
240.000000e+001.392524e-070.00.00.00.00.00.00.00.0...0.00.00.00.01.00.00.00.00.00.0
251.024329e-070.000000e+000.00.00.00.00.00.00.00.0...0.00.00.00.01.00.00.00.00.00.0
265.121645e-084.641746e-080.00.00.00.00.00.00.00.0...0.01.00.00.00.00.00.00.00.00.0
275.121645e-089.283492e-080.00.00.00.00.00.00.00.0...0.01.00.00.00.00.00.00.00.00.0
285.121645e-080.000000e+000.00.00.00.00.00.00.00.0...0.01.00.00.00.00.00.00.00.00.0
291.024329e-070.000000e+000.00.00.00.00.00.00.00.0...0.00.00.00.01.00.00.00.00.00.0
..................................................................
758610.000000e+001.021184e-060.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00.0
758620.000000e+007.426793e-070.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00.0
758635.121645e-080.000000e+000.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00.0
758642.350835e-050.000000e+000.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00.0
758655.121645e-080.000000e+000.00.00.00.00.00.00.00.0...0.01.00.00.00.00.00.00.00.00.0
758665.121645e-080.000000e+000.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00.0
758671.024329e-070.000000e+000.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00.0
758681.024329e-070.000000e+000.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00.0
758694.097316e-070.000000e+000.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00.0
758700.000000e+003.713397e-070.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00.0
758710.000000e+009.747666e-070.00.00.00.00.00.00.00.0...0.01.00.00.00.00.00.00.00.00.0
758720.000000e+004.641746e-080.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00.0
758730.000000e+006.962619e-050.00.00.00.00.00.00.00.0...1.00.00.00.00.00.00.00.00.00.0
758740.000000e+009.283492e-080.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00.0
758750.000000e+006.962619e-050.00.00.00.00.00.00.00.0...1.00.00.00.00.00.00.00.00.00.0
758760.000000e+002.320873e-060.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00.0
758770.000000e+004.641746e-080.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00.0
758780.000000e+003.249222e-070.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00.0
758790.000000e+009.283492e-080.00.00.00.00.00.00.00.0...0.00.00.01.00.00.00.00.00.00.0
758801.001282e-060.000000e+000.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00.0
758810.000000e+002.320873e-070.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00.0
758820.000000e+004.456076e-060.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00.0
758830.000000e+002.098069e-050.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00.0
758841.039694e-060.000000e+000.00.00.00.00.00.00.00.0...1.00.00.00.00.00.00.00.00.00.0
758859.044825e-070.000000e+000.00.00.00.00.00.00.00.0...1.00.00.00.00.00.00.00.00.00.0
758864.440466e-070.000000e+000.00.00.00.00.00.00.00.0...1.00.00.00.00.00.00.00.00.00.0
758870.000000e+009.422744e-070.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00.0
758881.024329e-050.000000e+000.00.00.00.00.00.00.00.0...0.00.00.01.00.00.00.00.00.00.0
758890.000000e+002.947973e-050.00.00.00.00.00.00.00.0...0.00.00.00.00.00.01.00.00.00.0
758900.000000e+002.228038e-050.00.00.00.00.00.00.00.0...0.00.00.00.00.00.01.00.00.00.0
\n", + "

75891 rows × 9400 columns

\n", + "
" + ], + "text/plain": [ + " 0 1 2 3 4 5 6 7 8 \\\n", + "0 0.000000e+00 4.641746e-08 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "1 0.000000e+00 4.641746e-08 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "2 0.000000e+00 1.995951e-06 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "3 0.000000e+00 1.995951e-06 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "4 3.585151e-05 0.000000e+00 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "5 0.000000e+00 4.641746e-08 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "6 0.000000e+00 5.570095e-07 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "7 0.000000e+00 1.856698e-07 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "8 0.000000e+00 9.283492e-08 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "9 0.000000e+00 1.856698e-07 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "10 0.000000e+00 1.392524e-07 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "11 5.121645e-06 0.000000e+00 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "12 0.000000e+00 4.641746e-08 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "13 0.000000e+00 9.283492e-08 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "14 5.121645e-08 4.641746e-08 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "15 0.000000e+00 9.283492e-08 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "16 5.121645e-08 0.000000e+00 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "17 0.000000e+00 4.641746e-08 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "18 0.000000e+00 4.641746e-08 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "19 0.000000e+00 4.641746e-08 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "20 0.000000e+00 4.641746e-08 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "21 2.560822e-07 2.320873e-07 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "22 0.000000e+00 4.641746e-08 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "23 5.121645e-07 0.000000e+00 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "24 0.000000e+00 1.392524e-07 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "25 1.024329e-07 0.000000e+00 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "26 5.121645e-08 4.641746e-08 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "27 5.121645e-08 9.283492e-08 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "28 5.121645e-08 0.000000e+00 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "29 1.024329e-07 0.000000e+00 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "... ... ... ... ... ... ... ... ... ... \n", + "75861 0.000000e+00 1.021184e-06 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75862 0.000000e+00 7.426793e-07 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75863 5.121645e-08 0.000000e+00 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75864 2.350835e-05 0.000000e+00 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75865 5.121645e-08 0.000000e+00 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75866 5.121645e-08 0.000000e+00 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75867 1.024329e-07 0.000000e+00 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75868 1.024329e-07 0.000000e+00 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75869 4.097316e-07 0.000000e+00 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75870 0.000000e+00 3.713397e-07 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75871 0.000000e+00 9.747666e-07 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75872 0.000000e+00 4.641746e-08 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75873 0.000000e+00 6.962619e-05 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75874 0.000000e+00 9.283492e-08 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75875 0.000000e+00 6.962619e-05 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75876 0.000000e+00 2.320873e-06 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75877 0.000000e+00 4.641746e-08 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75878 0.000000e+00 3.249222e-07 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75879 0.000000e+00 9.283492e-08 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75880 1.001282e-06 0.000000e+00 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75881 0.000000e+00 2.320873e-07 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75882 0.000000e+00 4.456076e-06 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75883 0.000000e+00 2.098069e-05 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75884 1.039694e-06 0.000000e+00 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75885 9.044825e-07 0.000000e+00 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75886 4.440466e-07 0.000000e+00 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75887 0.000000e+00 9.422744e-07 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75888 1.024329e-05 0.000000e+00 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75889 0.000000e+00 2.947973e-05 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75890 0.000000e+00 2.228038e-05 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "\n", + " 9 ... 9390 9391 9392 9393 9394 9395 9396 9397 9398 9399 \n", + "0 0.0 ... 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "1 0.0 ... 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 \n", + "2 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 \n", + "3 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 \n", + "4 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 \n", + "5 0.0 ... 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 \n", + "6 0.0 ... 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "7 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 \n", + "8 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 \n", + "9 0.0 ... 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "10 0.0 ... 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "11 0.0 ... 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 \n", + "12 0.0 ... 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "13 0.0 ... 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "14 0.0 ... 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "15 0.0 ... 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "16 0.0 ... 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "17 0.0 ... 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "18 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 \n", + "19 0.0 ... 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "20 0.0 ... 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "21 0.0 ... 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "22 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 \n", + "23 0.0 ... 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "24 0.0 ... 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 \n", + "25 0.0 ... 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 \n", + "26 0.0 ... 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "27 0.0 ... 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "28 0.0 ... 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "29 0.0 ... 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 \n", + "... ... ... ... ... ... ... ... ... ... ... ... ... \n", + "75861 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 \n", + "75862 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 \n", + "75863 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 \n", + "75864 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 \n", + "75865 0.0 ... 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75866 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 \n", + "75867 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 \n", + "75868 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 \n", + "75869 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 \n", + "75870 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 \n", + "75871 0.0 ... 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75872 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 \n", + "75873 0.0 ... 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75874 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 \n", + "75875 0.0 ... 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75876 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 \n", + "75877 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 \n", + "75878 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 \n", + "75879 0.0 ... 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75880 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 \n", + "75881 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 \n", + "75882 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 \n", + "75883 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 \n", + "75884 0.0 ... 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75885 0.0 ... 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75886 0.0 ... 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75887 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 \n", + "75888 0.0 ... 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75889 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 \n", + "75890 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 \n", + "\n", + "[75891 rows x 9400 columns]" + ] + }, + "execution_count": 391, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "scaler = MinMaxScaler(feature_range=(0, 1))\n", "data_scaled = scaler.fit_transform(data)\n", @@ -5477,7 +9027,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 392, "metadata": {}, "outputs": [], "source": [ @@ -5486,9 +9036,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 393, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X_train shape: (60712, 9400)\n", + "X_test shape: (15179, 9400)\n", + "y_train shape: (60712, 12)\n", + "y_test shape: (15179, 12)\n" + ] + } + ], "source": [ "print(\"X_train shape: \", X_train.shape)\n", "print(\"X_test shape: \", X_test.shape)\n", @@ -5507,7 +9068,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 394, "metadata": {}, "outputs": [], "source": [ @@ -5530,7 +9091,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 395, "metadata": {}, "outputs": [], "source": [ @@ -5547,24 +9108,268 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 396, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/5\n", + "60712/60712 [==============================] - 148s 2ms/step - loss: 0.7000 - acc: 0.7340\n", + "Epoch 2/5\n", + "60712/60712 [==============================] - 146s 2ms/step - loss: 0.3237 - acc: 0.8088\n", + "Epoch 3/5\n", + "60712/60712 [==============================] - 152s 3ms/step - loss: 0.2564 - acc: 0.8270\n", + "Epoch 4/5\n", + "60712/60712 [==============================] - 145s 2ms/step - loss: 0.2190 - acc: 0.8382\n", + "Epoch 5/5\n", + "60712/60712 [==============================] - 143s 2ms/step - loss: 0.1948 - acc: 0.8446\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 396, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ + "model = build_model()\n", "model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 397, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "15179/15179 [==============================] - 31s 2ms/step\n", + "acc: 81.85%\n" + ] + } + ], "source": [ "score = model.evaluate(X_test, y_test)\n", "\n", "print(\"%s: %.2f%%\" % (model.metrics_names[1], score[1]*100))" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Predicting a Permit\n", + "\n", + "Let's pick a random entry and see if our model can predict its Purpose successfully. You can play with the index to test our model on different permits..." + ] + }, + { + "cell_type": "code", + "execution_count": 543, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0123456789...9390939193929393939493959396939793989399
10.04.641746e-080.00.00.00.00.00.00.00.0...0.00.00.00.00.01.00.00.00.00.0
\n", + "

1 rows × 9400 columns

\n", + "
" + ], + "text/plain": [ + " 0 1 2 3 4 5 6 7 8 9 ... \\\n", + "1 0.0 4.641746e-08 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... \n", + "\n", + " 9390 9391 9392 9393 9394 9395 9396 9397 9398 9399 \n", + "1 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 \n", + "\n", + "[1 rows x 9400 columns]" + ] + }, + "execution_count": 543, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "index = 1 # Play with this to see how it predicts each permit...\n", + "permit = X_train.loc[[index]]\n", + "\n", + "permit" + ] + }, + { + "cell_type": "code", + "execution_count": 544, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
BEGHLMNPQSTZ
1000000001000
\n", + "
" + ], + "text/plain": [ + " B E G H L M N P Q S T Z\n", + "1 0 0 0 0 0 0 0 0 1 0 0 0" + ] + }, + "execution_count": 544, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "actual = y_train.loc[[index]]\n", + "actual" + ] + }, + { + "cell_type": "code", + "execution_count": 545, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Predicted: Q\n", + "Actual: Q\n" + ] + } + ], + "source": [ + "probabilities = model.predict(permit)\n", + "prediction = np.argmax(probabilities)\n", + "\n", + "print(\"Predicted: \", labels.columns[prediction])\n", + "print(\"Actual: \", max(actual.idxmax(axis=1)))" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -5576,22 +9381,37 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 398, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "KeyError", + "evalue": "'[ 9400 9401 9402 ... 75888 75889 75890] not in index'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mtrain\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtest\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mkfold\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabels\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mmodel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbuild_model\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mtrain\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabels\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mtrain\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mepochs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mepochs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mverbose\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7\u001b[0m \u001b[0mscores\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mevaluate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mtest\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabels\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mtest\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mverbose\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"%s: %.2f%%\"\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmetrics_names\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mscores\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0;36m100\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.6/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 2131\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mSeries\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndarray\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mIndex\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2132\u001b[0m \u001b[0;31m# either boolean or fancy integer index\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2133\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2134\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mDataFrame\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2135\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_frame\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.6/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m_getitem_array\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 2175\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_take\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mconvert\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2176\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2177\u001b[0;31m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_convert_to_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2178\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_take\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mconvert\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2179\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.6/site-packages/pandas/core/indexing.py\u001b[0m in \u001b[0;36m_convert_to_indexer\u001b[0;34m(self, obj, axis, is_setter)\u001b[0m\n\u001b[1;32m 1267\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mmask\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0many\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1268\u001b[0m raise KeyError('{mask} not in index'\n\u001b[0;32m-> 1269\u001b[0;31m .format(mask=objarr[mask]))\n\u001b[0m\u001b[1;32m 1270\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1271\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0m_values_from_object\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mKeyError\u001b[0m: '[ 9400 9401 9402 ... 75888 75889 75890] not in index'" + ] + } + ], "source": [ - "k_fold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)\n", + "k_fold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)\n", "cv_scores = []\n", "\n", "for train, test in kfold.split(data, labels):\n", " model = build_model()\n", - " model.fit(data[train], labels[train], epochs=150, batch_size=10, verbose=0)\n", + " model.fit(data[train], labels[train], epochs=epochs, batch_size=10, verbose=0)\n", " scores = model.evaluate(data[test], labels[test], verbose=0)\n", " print(\"%s: %.2f%%\" % (model.metrics_names[1], scores[1]*100))\n", " \n", " cv_scores.append(scores[1] * 100)\n", " \n", - "print(\"%.2f%% (+/- %.2f%%)\" % (numpy.mean(cvscores), numpy.std(cvscores)))" + "print(\"%.2f%% (+/- %.2f%%)\" % (numpy.mean(cv_scores), numpy.std(cv_scores)))" ] }, { diff --git a/1. Classifying Partial Permits.ipynb b/1. Classifying Partial Permits.ipynb index 708ee07..c804982 100644 --- a/1. Classifying Partial Permits.ipynb +++ b/1. Classifying Partial Permits.ipynb @@ -19,7 +19,7 @@ }, { "cell_type": "code", - "execution_count": 342, + "execution_count": 382, "metadata": {}, "outputs": [], "source": [ @@ -39,7 +39,7 @@ }, { "cell_type": "code", - "execution_count": 343, + "execution_count": 383, "metadata": {}, "outputs": [], "source": [ @@ -57,7 +57,7 @@ }, { "cell_type": "code", - "execution_count": 344, + "execution_count": 384, "metadata": {}, "outputs": [ { @@ -1520,7 +1520,7 @@ "[75891 rows x 16 columns]" ] }, - "execution_count": 344, + "execution_count": 384, "metadata": {}, "output_type": "execute_result" } @@ -1545,7 +1545,7 @@ }, { "cell_type": "code", - "execution_count": 345, + "execution_count": 385, "metadata": {}, "outputs": [ { @@ -2884,7 +2884,7 @@ "[75891 rows x 14 columns]" ] }, - "execution_count": 345, + "execution_count": 385, "metadata": {}, "output_type": "execute_result" } @@ -2905,7 +2905,7 @@ }, { "cell_type": "code", - "execution_count": 346, + "execution_count": 386, "metadata": {}, "outputs": [ { @@ -4244,7 +4244,7 @@ "[75891 rows x 14 columns]" ] }, - "execution_count": 346, + "execution_count": 386, "metadata": {}, "output_type": "execute_result" } @@ -4265,7 +4265,7 @@ }, { "cell_type": "code", - "execution_count": 347, + "execution_count": 387, "metadata": {}, "outputs": [ { @@ -4301,7 +4301,7 @@ }, { "cell_type": "code", - "execution_count": 348, + "execution_count": 388, "metadata": {}, "outputs": [ { @@ -4371,7 +4371,7 @@ "Name: Purpose, Length: 75891, dtype: object" ] }, - "execution_count": 348, + "execution_count": 388, "metadata": {}, "output_type": "execute_result" } @@ -4391,7 +4391,7 @@ }, { "cell_type": "code", - "execution_count": 349, + "execution_count": 389, "metadata": {}, "outputs": [ { @@ -5417,7 +5417,7 @@ "[75891 rows x 12 columns]" ] }, - "execution_count": 349, + "execution_count": 389, "metadata": {}, "output_type": "execute_result" } @@ -5437,7 +5437,7 @@ }, { "cell_type": "code", - "execution_count": 350, + "execution_count": 390, "metadata": {}, "outputs": [ { @@ -7336,7 +7336,7 @@ "[75891 rows x 9400 columns]" ] }, - "execution_count": 350, + "execution_count": 390, "metadata": {}, "output_type": "execute_result" } @@ -7356,7 +7356,7 @@ }, { "cell_type": "code", - "execution_count": 351, + "execution_count": 391, "metadata": {}, "outputs": [ { @@ -9003,7 +9003,7 @@ "[75891 rows x 9400 columns]" ] }, - "execution_count": 351, + "execution_count": 391, "metadata": {}, "output_type": "execute_result" } @@ -9027,7 +9027,7 @@ }, { "cell_type": "code", - "execution_count": 352, + "execution_count": 392, "metadata": {}, "outputs": [], "source": [ @@ -9036,7 +9036,7 @@ }, { "cell_type": "code", - "execution_count": 353, + "execution_count": 393, "metadata": {}, "outputs": [ { @@ -9068,7 +9068,7 @@ }, { "cell_type": "code", - "execution_count": 354, + "execution_count": 394, "metadata": {}, "outputs": [], "source": [ @@ -9091,7 +9091,7 @@ }, { "cell_type": "code", - "execution_count": 355, + "execution_count": 395, "metadata": {}, "outputs": [], "source": [ @@ -9108,7 +9108,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 396, "metadata": {}, "outputs": [ { @@ -9116,25 +9116,260 @@ "output_type": "stream", "text": [ "Epoch 1/5\n", - " 6000/60712 [=>............................] - ETA: 3:11 - loss: 0.1748 - acc: 0.8497" + "60712/60712 [==============================] - 148s 2ms/step - loss: 0.7000 - acc: 0.7340\n", + "Epoch 2/5\n", + "60712/60712 [==============================] - 146s 2ms/step - loss: 0.3237 - acc: 0.8088\n", + "Epoch 3/5\n", + "60712/60712 [==============================] - 152s 3ms/step - loss: 0.2564 - acc: 0.8270\n", + "Epoch 4/5\n", + "60712/60712 [==============================] - 145s 2ms/step - loss: 0.2190 - acc: 0.8382\n", + "Epoch 5/5\n", + "60712/60712 [==============================] - 143s 2ms/step - loss: 0.1948 - acc: 0.8446\n" ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 396, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ + "model = build_model()\n", "model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 397, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "15179/15179 [==============================] - 31s 2ms/step\n", + "acc: 81.85%\n" + ] + } + ], "source": [ "score = model.evaluate(X_test, y_test)\n", "\n", "print(\"%s: %.2f%%\" % (model.metrics_names[1], score[1]*100))" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Predicting a Permit\n", + "\n", + "Let's pick a random entry and see if our model can predict its Purpose successfully. You can play with the index to test our model on different permits..." + ] + }, + { + "cell_type": "code", + "execution_count": 543, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0123456789...9390939193929393939493959396939793989399
10.04.641746e-080.00.00.00.00.00.00.00.0...0.00.00.00.00.01.00.00.00.00.0
\n", + "

1 rows × 9400 columns

\n", + "
" + ], + "text/plain": [ + " 0 1 2 3 4 5 6 7 8 9 ... \\\n", + "1 0.0 4.641746e-08 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... \n", + "\n", + " 9390 9391 9392 9393 9394 9395 9396 9397 9398 9399 \n", + "1 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 \n", + "\n", + "[1 rows x 9400 columns]" + ] + }, + "execution_count": 543, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "index = 1 # Play with this to see how it predicts each permit...\n", + "permit = X_train.loc[[index]]\n", + "\n", + "permit" + ] + }, + { + "cell_type": "code", + "execution_count": 544, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
BEGHLMNPQSTZ
1000000001000
\n", + "
" + ], + "text/plain": [ + " B E G H L M N P Q S T Z\n", + "1 0 0 0 0 0 0 0 0 1 0 0 0" + ] + }, + "execution_count": 544, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "actual = y_train.loc[[index]]\n", + "actual" + ] + }, + { + "cell_type": "code", + "execution_count": 545, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Predicted: Q\n", + "Actual: Q\n" + ] + } + ], + "source": [ + "probabilities = model.predict(permit)\n", + "prediction = np.argmax(probabilities)\n", + "\n", + "print(\"Predicted: \", labels.columns[prediction])\n", + "print(\"Actual: \", max(actual.idxmax(axis=1)))" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -9146,22 +9381,37 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 398, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "KeyError", + "evalue": "'[ 9400 9401 9402 ... 75888 75889 75890] not in index'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mtrain\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtest\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mkfold\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabels\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mmodel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbuild_model\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mtrain\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabels\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mtrain\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mepochs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mepochs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mverbose\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7\u001b[0m \u001b[0mscores\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mevaluate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mtest\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabels\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mtest\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mverbose\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"%s: %.2f%%\"\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmetrics_names\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mscores\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0;36m100\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.6/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 2131\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mSeries\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndarray\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mIndex\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2132\u001b[0m \u001b[0;31m# either boolean or fancy integer index\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2133\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2134\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mDataFrame\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2135\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_frame\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.6/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m_getitem_array\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 2175\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_take\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mconvert\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2176\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2177\u001b[0;31m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_convert_to_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2178\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_take\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mconvert\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2179\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.6/site-packages/pandas/core/indexing.py\u001b[0m in \u001b[0;36m_convert_to_indexer\u001b[0;34m(self, obj, axis, is_setter)\u001b[0m\n\u001b[1;32m 1267\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mmask\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0many\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1268\u001b[0m raise KeyError('{mask} not in index'\n\u001b[0;32m-> 1269\u001b[0;31m .format(mask=objarr[mask]))\n\u001b[0m\u001b[1;32m 1270\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1271\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0m_values_from_object\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mKeyError\u001b[0m: '[ 9400 9401 9402 ... 75888 75889 75890] not in index'" + ] + } + ], "source": [ - "k_fold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)\n", + "k_fold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)\n", "cv_scores = []\n", "\n", "for train, test in kfold.split(data, labels):\n", " model = build_model()\n", - " model.fit(data[train], labels[train], epochs=150, batch_size=10, verbose=0)\n", + " model.fit(data[train], labels[train], epochs=epochs, batch_size=10, verbose=0)\n", " scores = model.evaluate(data[test], labels[test], verbose=0)\n", " print(\"%s: %.2f%%\" % (model.metrics_names[1], scores[1]*100))\n", " \n", " cv_scores.append(scores[1] * 100)\n", " \n", - "print(\"%.2f%% (+/- %.2f%%)\" % (numpy.mean(cvscores), numpy.std(cvscores)))" + "print(\"%.2f%% (+/- %.2f%%)\" % (numpy.mean(cv_scores), numpy.std(cv_scores)))" ] }, {