From f5569d6197989b5b2a357efa99f2c58b77e2b531 Mon Sep 17 00:00:00 2001 From: Spencer Dixon Date: Fri, 9 Mar 2018 15:48:29 +0000 Subject: [PATCH] First implementation of model --- ...assifying Partial Permits-checkpoint.ipynb | 5624 +++++++++- 1. Classifying Partial Permits.ipynb | 9161 ++++++++++++++++- 2 files changed, 14755 insertions(+), 30 deletions(-) diff --git a/.ipynb_checkpoints/1. Classifying Partial Permits-checkpoint.ipynb b/.ipynb_checkpoints/1. Classifying Partial Permits-checkpoint.ipynb index 2fd6442..7cef37e 100644 --- a/.ipynb_checkpoints/1. Classifying Partial Permits-checkpoint.ipynb +++ b/.ipynb_checkpoints/1. Classifying Partial Permits-checkpoint.ipynb @@ -1,6 +1,5626 @@ { - "cells": [], - "metadata": {}, + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Goal 1\n", + "## Given a partial permit, can we predict what classification it belongs to?\n", + "\n", + "Trade permits can be messy and incomplete. Can we use this partial data to successfully predict which Purpose code the permit should belong to?\n", + "\n", + "### Getting Started\n", + "- Open up the CITES trade database at https://trade.cites.org/\n", + "- Select a year range and click *Search*\n", + "- Download a Comparative Tabulation report and place it in `data/`\n", + "- Install requirements with `pip install -r requirements.txt`\n", + "- Run this notebook" + ] + }, + { + "cell_type": "code", + "execution_count": 342, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "from keras.models import Sequential\n", + "from keras.layers import Dense, Dropout, Activation\n", + "from keras.wrappers.scikit_learn import KerasClassifier\n", + "from keras.utils import np_utils\n", + "from sklearn.model_selection import cross_val_score\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.model_selection import StratifiedKFold\n", + "from sklearn.preprocessing import LabelEncoder\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "from sklearn.pipeline import Pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": 343, + "metadata": {}, + "outputs": [], + "source": [ + "seed = 1\n", + "np.random.seed(seed)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Importing our data\n", + "Let's import our data into a pandas dataframe and take a look at it." + ] + }, + { + "cell_type": "code", + "execution_count": 344, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
YearApp.TaxonClassOrderFamilyGenusImporterExporterOriginImporter reported quantityExporter reported quantityTermUnitPurposeSource
02016IAquila heliacaAvesFalconiformesAccipitridaeAquilaTRNLCZNaN1.0bodiesNaNTC
12016IAquila heliacaAvesFalconiformesAccipitridaeAquilaXVRSRSNaN1.0bodiesNaNQO
22016IHaliaeetus albicillaAvesFalconiformesAccipitridaeHaliaeetusBENONaNNaN43.0feathersNaNSW
32016IHaliaeetus albicillaAvesFalconiformesAccipitridaeHaliaeetusBENONaNNaN43.0specimensNaNSW
42016IHaliaeetus albicillaAvesFalconiformesAccipitridaeHaliaeetusDKISNaN700.00NaNspecimensNaNSW
52016IHaliaeetus albicillaAvesFalconiformesAccipitridaeHaliaeetusXVRSRSNaN1.0bodiesNaNQO
62016IHarpia harpyjaAvesFalconiformesAccipitridaeHarpiaBRFRNaNNaN12.0feathersNaNSC
72016IHarpia harpyjaAvesFalconiformesAccipitridaeHarpiaBRFRNaNNaN4.0feathersNaNSU
82016IHarpia harpyjaAvesFalconiformesAccipitridaeHarpiaBRFRNaNNaN2.0feathersNaNSW
92016IAcipenser brevirostrumActinopteriAcipenseriformesAcipenseridaeAcipenserCHDENaNNaN4.0liveNaNTC
102016IAcipenser brevirostrumActinopteriAcipenseriformesAcipenseridaeAcipenserTWCANaNNaN3.0eggs (live)kgTD
112016IAcipenser sturioActinopteriAcipenseriformesAcipenseridaeAcipenserUSIRNaN100.00NaNcaviargPI
122016IAgave parvifloraNaNLilialesAgavaceaeAgaveUSTHNaNNaN1.0liveNaNTD
132016IAilurus fulgensMammaliaCarnivoraAiluridaeAilurusAUNZNaNNaN2.0liveNaNZC
142016IAilurus fulgensMammaliaCarnivoraAiluridaeAilurusCAUSNaN1.001.0liveNaNZF
152016IAilurus fulgensMammaliaCarnivoraAiluridaeAilurusILDENaNNaN2.0liveNaNZC
162016IAilurus fulgensMammaliaCarnivoraAiluridaeAilurusJPUSNaN1.00NaNliveNaNBC
172016IAilurus fulgensMammaliaCarnivoraAiluridaeAilurusJPUSNaNNaN1.0liveNaNZC
182016IAilurus fulgensMammaliaCarnivoraAiluridaeAilurusKPCNNaNNaN1.0bodiesNaNEU
192016IAilurus fulgensMammaliaCarnivoraAiluridaeAilurusKRCNNaNNaN1.0specimensNaNEC
202016IAilurus fulgensMammaliaCarnivoraAiluridaeAilurusKRJPNaNNaN1.0liveNaNZC
212016IAilurus fulgensMammaliaCarnivoraAiluridaeAilurusUSCANaN5.005.0liveNaNZC
222016IAlligator sinensisReptiliaCrocodyliaAlligatoridaeAlligatorKPCNNaNNaN1.0bodiesNaNEU
232016IMelanosuchus nigerReptiliaCrocodyliaAlligatoridaeMelanosuchusUSDKNaN10.00NaNliveNaNZF
242016IAnas laysanensisAvesAnseriformesAnatidaeAnasCAUSXXNaN3.0feathersNaNSI
252016IAnas laysanensisAvesAnseriformesAnatidaeAnasMCFRXX2.00NaNliveNaNZI
262016IAsarcornis scutulataAvesAnseriformesAnatidaeAsarcornisGBJEGB1.001.0bodiesNaNSC
272016IAsarcornis scutulataAvesAnseriformesAnatidaeAsarcornisGBJENaN1.002.0bodiesNaNSC
282016IBranta sandvicensisAvesAnseriformesAnatidaeBrantaCNGBNaN1.00NaNbodiesNaNQC
292016IBranta sandvicensisAvesAnseriformesAnatidaeBrantaMCFRXX2.00NaNliveNaNZI
...................................................
758612017IIThalurania furcataAvesApodiformesTrochilidaeThaluraniaUSPENaNNaN22.0specimensNaNSW
758622017IIThrenetes nigerAvesApodiformesTrochilidaeThrenetesUSPENaNNaN16.0specimensNaNSW
758632017IIUrsus arctosMammaliaCarnivoraUrsidaeUrsusBGRUNaN1.00NaNtrophiesNaNHW
758642017IIVaranus niloticusReptiliaSauriaVaranidaeVaranusMGFRTD459.00NaNskin piecesNaNTW
758652017IIVaranus salvatorReptiliaSauriaVaranidaeVaranusCRUSNaN1.00NaNliveNaNTC
758662017IIVaranus salvatorReptiliaSauriaVaranidaeVaranusMOFRID1.00NaNleather products (small)NaNTW
758672017IIVaranus salvatorReptiliaSauriaVaranidaeVaranusMOFRMY2.00NaNleather products (small)NaNTW
758682017IIVaranus salvatorReptiliaSauriaVaranidaeVaranusMOGBID2.00NaNleather products (small)NaNTW
758692017IIVaranus salvatorReptiliaSauriaVaranidaeVaranusMOITID8.00NaNleather products (small)NaNTW
758702017IIVaranus salvatorReptiliaSauriaVaranidaeVaranusUSCOIDNaN8.0leather products (small)NaNTW
758712017IIVaranus salvatorReptiliaSauriaVaranidaeVaranusUSCRNaNNaN21.0liveNaNTC
758722017IIVaranus salvatorReptiliaSauriaVaranidaeVaranusXXCOIDNaN1.0leather products (small)NaNTW
758732017IIDioon spinulosumNaNCycadalesZamiaceaeDioonNLCRNaNNaN1500.0liveNaNTA
758742017IIZamiaceae spp.NaNCycadalesZamiaceaeNaNDEAONaNNaN2.0specimensNaNSW
758752017IIZamia integrifoliaNaNCycadalesZamiaceaeZamiaNLCRNaNNaN1500.0liveNaNTA
758762017IIScleractinia spp.AnthozoaScleractiniaNaNNaNDEAONaNNaN50.0raw coralskgSW
758772017IIICanis aureusMammaliaCarnivoraCanidaeCanisDEETNaNNaN1.0skinsNaNHW
758782017IIICanis aureusMammaliaCarnivoraCanidaeCanisUSETNaNNaN7.0trophiesNaNHW
758792017IIISarcoramphus papaAvesFalconiformesCathartidaeSarcoramphusJPPENaNNaN2.0liveNaNTF
758802017IIIDipteryx panamensisNaNFabalesLeguminosaeDipteryxCRNINaN19.55NaNsawn woodm3TW
758812017IIICholoepus hoffmanniMammaliaPilosaMegalonychidaeCholoepusUSCRNaNNaN5.0bonesNaNSW
758822017IIICholoepus hoffmanniMammaliaPilosaMegalonychidaeCholoepusUSCRNaNNaN96.0hairNaNSW
758832017IIICholoepus hoffmanniMammaliaPilosaMegalonychidaeCholoepusUSCRNaNNaN452.0specimensNaNSW
758842017IIICedrela odorataNaNSapindalesMeliaceaeCedrelaCRINNaN20.30NaNtimberm3TA
758852017IIICedrela odorataNaNSapindalesMeliaceaeCedrelaCRPRNaN17.66NaNtimberm3TA
758862017IIICedrela odorataNaNSapindalesMeliaceaeCedrelaCRUSNaN8.67NaNsawn woodm3TA
758872017IIICedrela odorataNaNSapindalesMeliaceaeCedrelaINCRNaNNaN20.3timberm3TW
758882017IIIDaboia russeliiReptiliaSerpentesViperidaeDaboiaRSITUS200.00NaNspecimensNaNTF
758892017IIICivettictis civettaMammaliaCarnivoraViverridaeCivettictisFRETNaNNaN635.1muskkgTR
758902017IIICivettictis civettaMammaliaCarnivoraViverridaeCivettictisKRETNaNNaN480.0muskkgTR
\n", + "

75891 rows × 16 columns

\n", + "
" + ], + "text/plain": [ + " Year App. Taxon Class Order \\\n", + "0 2016 I Aquila heliaca Aves Falconiformes \n", + "1 2016 I Aquila heliaca Aves Falconiformes \n", + "2 2016 I Haliaeetus albicilla Aves Falconiformes \n", + "3 2016 I Haliaeetus albicilla Aves Falconiformes \n", + "4 2016 I Haliaeetus albicilla Aves Falconiformes \n", + "5 2016 I Haliaeetus albicilla Aves Falconiformes \n", + "6 2016 I Harpia harpyja Aves Falconiformes \n", + "7 2016 I Harpia harpyja Aves Falconiformes \n", + "8 2016 I Harpia harpyja Aves Falconiformes \n", + "9 2016 I Acipenser brevirostrum Actinopteri Acipenseriformes \n", + "10 2016 I Acipenser brevirostrum Actinopteri Acipenseriformes \n", + "11 2016 I Acipenser sturio Actinopteri Acipenseriformes \n", + "12 2016 I Agave parviflora NaN Liliales \n", + "13 2016 I Ailurus fulgens Mammalia Carnivora \n", + "14 2016 I Ailurus fulgens Mammalia Carnivora \n", + "15 2016 I Ailurus fulgens Mammalia Carnivora \n", + "16 2016 I Ailurus fulgens Mammalia Carnivora \n", + "17 2016 I Ailurus fulgens Mammalia Carnivora \n", + "18 2016 I Ailurus fulgens Mammalia Carnivora \n", + "19 2016 I Ailurus fulgens Mammalia Carnivora \n", + "20 2016 I Ailurus fulgens Mammalia Carnivora \n", + "21 2016 I Ailurus fulgens Mammalia Carnivora \n", + "22 2016 I Alligator sinensis Reptilia Crocodylia \n", + "23 2016 I Melanosuchus niger Reptilia Crocodylia \n", + "24 2016 I Anas laysanensis Aves Anseriformes \n", + "25 2016 I Anas laysanensis Aves Anseriformes \n", + "26 2016 I Asarcornis scutulata Aves Anseriformes \n", + "27 2016 I Asarcornis scutulata Aves Anseriformes \n", + "28 2016 I Branta sandvicensis Aves Anseriformes \n", + "29 2016 I Branta sandvicensis Aves Anseriformes \n", + "... ... ... ... ... ... \n", + "75861 2017 II Thalurania furcata Aves Apodiformes \n", + "75862 2017 II Threnetes niger Aves Apodiformes \n", + "75863 2017 II Ursus arctos Mammalia Carnivora \n", + "75864 2017 II Varanus niloticus Reptilia Sauria \n", + "75865 2017 II Varanus salvator Reptilia Sauria \n", + "75866 2017 II Varanus salvator Reptilia Sauria \n", + "75867 2017 II Varanus salvator Reptilia Sauria \n", + "75868 2017 II Varanus salvator Reptilia Sauria \n", + "75869 2017 II Varanus salvator Reptilia Sauria \n", + "75870 2017 II Varanus salvator Reptilia Sauria \n", + "75871 2017 II Varanus salvator Reptilia Sauria \n", + "75872 2017 II Varanus salvator Reptilia Sauria \n", + "75873 2017 II Dioon spinulosum NaN Cycadales \n", + "75874 2017 II Zamiaceae spp. NaN Cycadales \n", + "75875 2017 II Zamia integrifolia NaN Cycadales \n", + "75876 2017 II Scleractinia spp. Anthozoa Scleractinia \n", + "75877 2017 III Canis aureus Mammalia Carnivora \n", + "75878 2017 III Canis aureus Mammalia Carnivora \n", + "75879 2017 III Sarcoramphus papa Aves Falconiformes \n", + "75880 2017 III Dipteryx panamensis NaN Fabales \n", + "75881 2017 III Choloepus hoffmanni Mammalia Pilosa \n", + "75882 2017 III Choloepus hoffmanni Mammalia Pilosa \n", + "75883 2017 III Choloepus hoffmanni Mammalia Pilosa \n", + "75884 2017 III Cedrela odorata NaN Sapindales \n", + "75885 2017 III Cedrela odorata NaN Sapindales \n", + "75886 2017 III Cedrela odorata NaN Sapindales \n", + "75887 2017 III Cedrela odorata NaN Sapindales \n", + "75888 2017 III Daboia russelii Reptilia Serpentes \n", + "75889 2017 III Civettictis civetta Mammalia Carnivora \n", + "75890 2017 III Civettictis civetta Mammalia Carnivora \n", + "\n", + " Family Genus Importer Exporter Origin \\\n", + "0 Accipitridae Aquila TR NL CZ \n", + "1 Accipitridae Aquila XV RS RS \n", + "2 Accipitridae Haliaeetus BE NO NaN \n", + "3 Accipitridae Haliaeetus BE NO NaN \n", + "4 Accipitridae Haliaeetus DK IS NaN \n", + "5 Accipitridae Haliaeetus XV RS RS \n", + "6 Accipitridae Harpia BR FR NaN \n", + "7 Accipitridae Harpia BR FR NaN \n", + "8 Accipitridae Harpia BR FR NaN \n", + "9 Acipenseridae Acipenser CH DE NaN \n", + "10 Acipenseridae Acipenser TW CA NaN \n", + "11 Acipenseridae Acipenser US IR NaN \n", + "12 Agavaceae Agave US TH NaN \n", + "13 Ailuridae Ailurus AU NZ NaN \n", + "14 Ailuridae Ailurus CA US NaN \n", + "15 Ailuridae Ailurus IL DE NaN \n", + "16 Ailuridae Ailurus JP US NaN \n", + "17 Ailuridae Ailurus JP US NaN \n", + "18 Ailuridae Ailurus KP CN NaN \n", + "19 Ailuridae Ailurus KR CN NaN \n", + "20 Ailuridae Ailurus KR JP NaN \n", + "21 Ailuridae Ailurus US CA NaN \n", + "22 Alligatoridae Alligator KP CN NaN \n", + "23 Alligatoridae Melanosuchus US DK NaN \n", + "24 Anatidae Anas CA US XX \n", + "25 Anatidae Anas MC FR XX \n", + "26 Anatidae Asarcornis GB JE GB \n", + "27 Anatidae Asarcornis GB JE NaN \n", + "28 Anatidae Branta CN GB NaN \n", + "29 Anatidae Branta MC FR XX \n", + "... ... ... ... ... ... \n", + "75861 Trochilidae Thalurania US PE NaN \n", + "75862 Trochilidae Threnetes US PE NaN \n", + "75863 Ursidae Ursus BG RU NaN \n", + "75864 Varanidae Varanus MG FR TD \n", + "75865 Varanidae Varanus CR US NaN \n", + "75866 Varanidae Varanus MO FR ID \n", + "75867 Varanidae Varanus MO FR MY \n", + "75868 Varanidae Varanus MO GB ID \n", + "75869 Varanidae Varanus MO IT ID \n", + "75870 Varanidae Varanus US CO ID \n", + "75871 Varanidae Varanus US CR NaN \n", + "75872 Varanidae Varanus XX CO ID \n", + "75873 Zamiaceae Dioon NL CR NaN \n", + "75874 Zamiaceae NaN DE AO NaN \n", + "75875 Zamiaceae Zamia NL CR NaN \n", + "75876 NaN NaN DE AO NaN \n", + "75877 Canidae Canis DE ET NaN \n", + "75878 Canidae Canis US ET NaN \n", + "75879 Cathartidae Sarcoramphus JP PE NaN \n", + "75880 Leguminosae Dipteryx CR NI NaN \n", + "75881 Megalonychidae Choloepus US CR NaN \n", + "75882 Megalonychidae Choloepus US CR NaN \n", + "75883 Megalonychidae Choloepus US CR NaN \n", + "75884 Meliaceae Cedrela CR IN NaN \n", + "75885 Meliaceae Cedrela CR PR NaN \n", + "75886 Meliaceae Cedrela CR US NaN \n", + "75887 Meliaceae Cedrela IN CR NaN \n", + "75888 Viperidae Daboia RS IT US \n", + "75889 Viverridae Civettictis FR ET NaN \n", + "75890 Viverridae Civettictis KR ET NaN \n", + "\n", + " Importer reported quantity Exporter reported quantity \\\n", + "0 NaN 1.0 \n", + "1 NaN 1.0 \n", + "2 NaN 43.0 \n", + "3 NaN 43.0 \n", + "4 700.00 NaN \n", + "5 NaN 1.0 \n", + "6 NaN 12.0 \n", + "7 NaN 4.0 \n", + "8 NaN 2.0 \n", + "9 NaN 4.0 \n", + "10 NaN 3.0 \n", + "11 100.00 NaN \n", + "12 NaN 1.0 \n", + "13 NaN 2.0 \n", + "14 1.00 1.0 \n", + "15 NaN 2.0 \n", + "16 1.00 NaN \n", + "17 NaN 1.0 \n", + "18 NaN 1.0 \n", + "19 NaN 1.0 \n", + "20 NaN 1.0 \n", + "21 5.00 5.0 \n", + "22 NaN 1.0 \n", + "23 10.00 NaN \n", + "24 NaN 3.0 \n", + "25 2.00 NaN \n", + "26 1.00 1.0 \n", + "27 1.00 2.0 \n", + "28 1.00 NaN \n", + "29 2.00 NaN \n", + "... ... ... \n", + "75861 NaN 22.0 \n", + "75862 NaN 16.0 \n", + "75863 1.00 NaN \n", + "75864 459.00 NaN \n", + "75865 1.00 NaN \n", + "75866 1.00 NaN \n", + "75867 2.00 NaN \n", + "75868 2.00 NaN \n", + "75869 8.00 NaN \n", + "75870 NaN 8.0 \n", + "75871 NaN 21.0 \n", + "75872 NaN 1.0 \n", + "75873 NaN 1500.0 \n", + "75874 NaN 2.0 \n", + "75875 NaN 1500.0 \n", + "75876 NaN 50.0 \n", + "75877 NaN 1.0 \n", + "75878 NaN 7.0 \n", + "75879 NaN 2.0 \n", + "75880 19.55 NaN \n", + "75881 NaN 5.0 \n", + "75882 NaN 96.0 \n", + "75883 NaN 452.0 \n", + "75884 20.30 NaN \n", + "75885 17.66 NaN \n", + "75886 8.67 NaN \n", + "75887 NaN 20.3 \n", + "75888 200.00 NaN \n", + "75889 NaN 635.1 \n", + "75890 NaN 480.0 \n", + "\n", + " Term Unit Purpose Source \n", + "0 bodies NaN T C \n", + "1 bodies NaN Q O \n", + "2 feathers NaN S W \n", + "3 specimens NaN S W \n", + "4 specimens NaN S W \n", + "5 bodies NaN Q O \n", + "6 feathers NaN S C \n", + "7 feathers NaN S U \n", + "8 feathers NaN S W \n", + "9 live NaN T C \n", + "10 eggs (live) kg T D \n", + "11 caviar g P I \n", + "12 live NaN T D \n", + "13 live NaN Z C \n", + "14 live NaN Z F \n", + "15 live NaN Z C \n", + "16 live NaN B C \n", + "17 live NaN Z C \n", + "18 bodies NaN E U \n", + "19 specimens NaN E C \n", + "20 live NaN Z C \n", + "21 live NaN Z C \n", + "22 bodies NaN E U \n", + "23 live NaN Z F \n", + "24 feathers NaN S I \n", + "25 live NaN Z I \n", + "26 bodies NaN S C \n", + "27 bodies NaN S C \n", + "28 bodies NaN Q C \n", + "29 live NaN Z I \n", + "... ... ... ... ... \n", + "75861 specimens NaN S W \n", + "75862 specimens NaN S W \n", + "75863 trophies NaN H W \n", + "75864 skin pieces NaN T W \n", + "75865 live NaN T C \n", + "75866 leather products (small) NaN T W \n", + "75867 leather products (small) NaN T W \n", + "75868 leather products (small) NaN T W \n", + "75869 leather products (small) NaN T W \n", + "75870 leather products (small) NaN T W \n", + "75871 live NaN T C \n", + "75872 leather products (small) NaN T W \n", + "75873 live NaN T A \n", + "75874 specimens NaN S W \n", + "75875 live NaN T A \n", + "75876 raw corals kg S W \n", + "75877 skins NaN H W \n", + "75878 trophies NaN H W \n", + "75879 live NaN T F \n", + "75880 sawn wood m3 T W \n", + "75881 bones NaN S W \n", + "75882 hair NaN S W \n", + "75883 specimens NaN S W \n", + "75884 timber m3 T A \n", + "75885 timber m3 T A \n", + "75886 sawn wood m3 T A \n", + "75887 timber m3 T W \n", + "75888 specimens NaN T F \n", + "75889 musk kg T R \n", + "75890 musk kg T R \n", + "\n", + "[75891 rows x 16 columns]" + ] + }, + "execution_count": 344, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataframe = pd.read_csv(\"data/data.csv\", skipinitialspace=True, dtype={\n", + " 'Importer reported quantity': float,\n", + " 'Exporter reported quantity': float\n", + "})\n", + "\n", + "dataframe" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Formatting the data\n", + "The Year and App. columns probably aren't going to matter to us in how we classify these records, so let's drop those first...\n", + "We'll also remove any additional whitespace from the column names to make things easier to deal with later on..." + ] + }, + { + "cell_type": "code", + "execution_count": 345, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TaxonClassOrderFamilyGenusImporterExporterOriginImporter reported quantityExporter reported quantityTermUnitPurposeSource
0Aquila heliacaAvesFalconiformesAccipitridaeAquilaTRNLCZNaN1.0bodiesNaNTC
1Aquila heliacaAvesFalconiformesAccipitridaeAquilaXVRSRSNaN1.0bodiesNaNQO
2Haliaeetus albicillaAvesFalconiformesAccipitridaeHaliaeetusBENONaNNaN43.0feathersNaNSW
3Haliaeetus albicillaAvesFalconiformesAccipitridaeHaliaeetusBENONaNNaN43.0specimensNaNSW
4Haliaeetus albicillaAvesFalconiformesAccipitridaeHaliaeetusDKISNaN700.00NaNspecimensNaNSW
5Haliaeetus albicillaAvesFalconiformesAccipitridaeHaliaeetusXVRSRSNaN1.0bodiesNaNQO
6Harpia harpyjaAvesFalconiformesAccipitridaeHarpiaBRFRNaNNaN12.0feathersNaNSC
7Harpia harpyjaAvesFalconiformesAccipitridaeHarpiaBRFRNaNNaN4.0feathersNaNSU
8Harpia harpyjaAvesFalconiformesAccipitridaeHarpiaBRFRNaNNaN2.0feathersNaNSW
9Acipenser brevirostrumActinopteriAcipenseriformesAcipenseridaeAcipenserCHDENaNNaN4.0liveNaNTC
10Acipenser brevirostrumActinopteriAcipenseriformesAcipenseridaeAcipenserTWCANaNNaN3.0eggs (live)kgTD
11Acipenser sturioActinopteriAcipenseriformesAcipenseridaeAcipenserUSIRNaN100.00NaNcaviargPI
12Agave parvifloraNaNLilialesAgavaceaeAgaveUSTHNaNNaN1.0liveNaNTD
13Ailurus fulgensMammaliaCarnivoraAiluridaeAilurusAUNZNaNNaN2.0liveNaNZC
14Ailurus fulgensMammaliaCarnivoraAiluridaeAilurusCAUSNaN1.001.0liveNaNZF
15Ailurus fulgensMammaliaCarnivoraAiluridaeAilurusILDENaNNaN2.0liveNaNZC
16Ailurus fulgensMammaliaCarnivoraAiluridaeAilurusJPUSNaN1.00NaNliveNaNBC
17Ailurus fulgensMammaliaCarnivoraAiluridaeAilurusJPUSNaNNaN1.0liveNaNZC
18Ailurus fulgensMammaliaCarnivoraAiluridaeAilurusKPCNNaNNaN1.0bodiesNaNEU
19Ailurus fulgensMammaliaCarnivoraAiluridaeAilurusKRCNNaNNaN1.0specimensNaNEC
20Ailurus fulgensMammaliaCarnivoraAiluridaeAilurusKRJPNaNNaN1.0liveNaNZC
21Ailurus fulgensMammaliaCarnivoraAiluridaeAilurusUSCANaN5.005.0liveNaNZC
22Alligator sinensisReptiliaCrocodyliaAlligatoridaeAlligatorKPCNNaNNaN1.0bodiesNaNEU
23Melanosuchus nigerReptiliaCrocodyliaAlligatoridaeMelanosuchusUSDKNaN10.00NaNliveNaNZF
24Anas laysanensisAvesAnseriformesAnatidaeAnasCAUSXXNaN3.0feathersNaNSI
25Anas laysanensisAvesAnseriformesAnatidaeAnasMCFRXX2.00NaNliveNaNZI
26Asarcornis scutulataAvesAnseriformesAnatidaeAsarcornisGBJEGB1.001.0bodiesNaNSC
27Asarcornis scutulataAvesAnseriformesAnatidaeAsarcornisGBJENaN1.002.0bodiesNaNSC
28Branta sandvicensisAvesAnseriformesAnatidaeBrantaCNGBNaN1.00NaNbodiesNaNQC
29Branta sandvicensisAvesAnseriformesAnatidaeBrantaMCFRXX2.00NaNliveNaNZI
.............................................
75861Thalurania furcataAvesApodiformesTrochilidaeThaluraniaUSPENaNNaN22.0specimensNaNSW
75862Threnetes nigerAvesApodiformesTrochilidaeThrenetesUSPENaNNaN16.0specimensNaNSW
75863Ursus arctosMammaliaCarnivoraUrsidaeUrsusBGRUNaN1.00NaNtrophiesNaNHW
75864Varanus niloticusReptiliaSauriaVaranidaeVaranusMGFRTD459.00NaNskin piecesNaNTW
75865Varanus salvatorReptiliaSauriaVaranidaeVaranusCRUSNaN1.00NaNliveNaNTC
75866Varanus salvatorReptiliaSauriaVaranidaeVaranusMOFRID1.00NaNleather products (small)NaNTW
75867Varanus salvatorReptiliaSauriaVaranidaeVaranusMOFRMY2.00NaNleather products (small)NaNTW
75868Varanus salvatorReptiliaSauriaVaranidaeVaranusMOGBID2.00NaNleather products (small)NaNTW
75869Varanus salvatorReptiliaSauriaVaranidaeVaranusMOITID8.00NaNleather products (small)NaNTW
75870Varanus salvatorReptiliaSauriaVaranidaeVaranusUSCOIDNaN8.0leather products (small)NaNTW
75871Varanus salvatorReptiliaSauriaVaranidaeVaranusUSCRNaNNaN21.0liveNaNTC
75872Varanus salvatorReptiliaSauriaVaranidaeVaranusXXCOIDNaN1.0leather products (small)NaNTW
75873Dioon spinulosumNaNCycadalesZamiaceaeDioonNLCRNaNNaN1500.0liveNaNTA
75874Zamiaceae spp.NaNCycadalesZamiaceaeNaNDEAONaNNaN2.0specimensNaNSW
75875Zamia integrifoliaNaNCycadalesZamiaceaeZamiaNLCRNaNNaN1500.0liveNaNTA
75876Scleractinia spp.AnthozoaScleractiniaNaNNaNDEAONaNNaN50.0raw coralskgSW
75877Canis aureusMammaliaCarnivoraCanidaeCanisDEETNaNNaN1.0skinsNaNHW
75878Canis aureusMammaliaCarnivoraCanidaeCanisUSETNaNNaN7.0trophiesNaNHW
75879Sarcoramphus papaAvesFalconiformesCathartidaeSarcoramphusJPPENaNNaN2.0liveNaNTF
75880Dipteryx panamensisNaNFabalesLeguminosaeDipteryxCRNINaN19.55NaNsawn woodm3TW
75881Choloepus hoffmanniMammaliaPilosaMegalonychidaeCholoepusUSCRNaNNaN5.0bonesNaNSW
75882Choloepus hoffmanniMammaliaPilosaMegalonychidaeCholoepusUSCRNaNNaN96.0hairNaNSW
75883Choloepus hoffmanniMammaliaPilosaMegalonychidaeCholoepusUSCRNaNNaN452.0specimensNaNSW
75884Cedrela odorataNaNSapindalesMeliaceaeCedrelaCRINNaN20.30NaNtimberm3TA
75885Cedrela odorataNaNSapindalesMeliaceaeCedrelaCRPRNaN17.66NaNtimberm3TA
75886Cedrela odorataNaNSapindalesMeliaceaeCedrelaCRUSNaN8.67NaNsawn woodm3TA
75887Cedrela odorataNaNSapindalesMeliaceaeCedrelaINCRNaNNaN20.3timberm3TW
75888Daboia russeliiReptiliaSerpentesViperidaeDaboiaRSITUS200.00NaNspecimensNaNTF
75889Civettictis civettaMammaliaCarnivoraViverridaeCivettictisFRETNaNNaN635.1muskkgTR
75890Civettictis civettaMammaliaCarnivoraViverridaeCivettictisKRETNaNNaN480.0muskkgTR
\n", + "

75891 rows × 14 columns

\n", + "
" + ], + "text/plain": [ + " Taxon Class Order Family \\\n", + "0 Aquila heliaca Aves Falconiformes Accipitridae \n", + "1 Aquila heliaca Aves Falconiformes Accipitridae \n", + "2 Haliaeetus albicilla Aves Falconiformes Accipitridae \n", + "3 Haliaeetus albicilla Aves Falconiformes Accipitridae \n", + "4 Haliaeetus albicilla Aves Falconiformes Accipitridae \n", + "5 Haliaeetus albicilla Aves Falconiformes Accipitridae \n", + "6 Harpia harpyja Aves Falconiformes Accipitridae \n", + "7 Harpia harpyja Aves Falconiformes Accipitridae \n", + "8 Harpia harpyja Aves Falconiformes Accipitridae \n", + "9 Acipenser brevirostrum Actinopteri Acipenseriformes Acipenseridae \n", + "10 Acipenser brevirostrum Actinopteri Acipenseriformes Acipenseridae \n", + "11 Acipenser sturio Actinopteri Acipenseriformes Acipenseridae \n", + "12 Agave parviflora NaN Liliales Agavaceae \n", + "13 Ailurus fulgens Mammalia Carnivora Ailuridae \n", + "14 Ailurus fulgens Mammalia Carnivora Ailuridae \n", + "15 Ailurus fulgens Mammalia Carnivora Ailuridae \n", + "16 Ailurus fulgens Mammalia Carnivora Ailuridae \n", + "17 Ailurus fulgens Mammalia Carnivora Ailuridae \n", + "18 Ailurus fulgens Mammalia Carnivora Ailuridae \n", + "19 Ailurus fulgens Mammalia Carnivora Ailuridae \n", + "20 Ailurus fulgens Mammalia Carnivora Ailuridae \n", + "21 Ailurus fulgens Mammalia Carnivora Ailuridae \n", + "22 Alligator sinensis Reptilia Crocodylia Alligatoridae \n", + "23 Melanosuchus niger Reptilia Crocodylia Alligatoridae \n", + "24 Anas laysanensis Aves Anseriformes Anatidae \n", + "25 Anas laysanensis Aves Anseriformes Anatidae \n", + "26 Asarcornis scutulata Aves Anseriformes Anatidae \n", + "27 Asarcornis scutulata Aves Anseriformes Anatidae \n", + "28 Branta sandvicensis Aves Anseriformes Anatidae \n", + "29 Branta sandvicensis Aves Anseriformes Anatidae \n", + "... ... ... ... ... \n", + "75861 Thalurania furcata Aves Apodiformes Trochilidae \n", + "75862 Threnetes niger Aves Apodiformes Trochilidae \n", + "75863 Ursus arctos Mammalia Carnivora Ursidae \n", + "75864 Varanus niloticus Reptilia Sauria Varanidae \n", + "75865 Varanus salvator Reptilia Sauria Varanidae \n", + "75866 Varanus salvator Reptilia Sauria Varanidae \n", + "75867 Varanus salvator Reptilia Sauria Varanidae \n", + "75868 Varanus salvator Reptilia Sauria Varanidae \n", + "75869 Varanus salvator Reptilia Sauria Varanidae \n", + "75870 Varanus salvator Reptilia Sauria Varanidae \n", + "75871 Varanus salvator Reptilia Sauria Varanidae \n", + "75872 Varanus salvator Reptilia Sauria Varanidae \n", + "75873 Dioon spinulosum NaN Cycadales Zamiaceae \n", + "75874 Zamiaceae spp. NaN Cycadales Zamiaceae \n", + "75875 Zamia integrifolia NaN Cycadales Zamiaceae \n", + "75876 Scleractinia spp. Anthozoa Scleractinia NaN \n", + "75877 Canis aureus Mammalia Carnivora Canidae \n", + "75878 Canis aureus Mammalia Carnivora Canidae \n", + "75879 Sarcoramphus papa Aves Falconiformes Cathartidae \n", + "75880 Dipteryx panamensis NaN Fabales Leguminosae \n", + "75881 Choloepus hoffmanni Mammalia Pilosa Megalonychidae \n", + "75882 Choloepus hoffmanni Mammalia Pilosa Megalonychidae \n", + "75883 Choloepus hoffmanni Mammalia Pilosa Megalonychidae \n", + "75884 Cedrela odorata NaN Sapindales Meliaceae \n", + "75885 Cedrela odorata NaN Sapindales Meliaceae \n", + "75886 Cedrela odorata NaN Sapindales Meliaceae \n", + "75887 Cedrela odorata NaN Sapindales Meliaceae \n", + "75888 Daboia russelii Reptilia Serpentes Viperidae \n", + "75889 Civettictis civetta Mammalia Carnivora Viverridae \n", + "75890 Civettictis civetta Mammalia Carnivora Viverridae \n", + "\n", + " Genus Importer Exporter Origin Importer reported quantity \\\n", + "0 Aquila TR NL CZ NaN \n", + "1 Aquila XV RS RS NaN \n", + "2 Haliaeetus BE NO NaN NaN \n", + "3 Haliaeetus BE NO NaN NaN \n", + "4 Haliaeetus DK IS NaN 700.00 \n", + "5 Haliaeetus XV RS RS NaN \n", + "6 Harpia BR FR NaN NaN \n", + "7 Harpia BR FR NaN NaN \n", + "8 Harpia BR FR NaN NaN \n", + "9 Acipenser CH DE NaN NaN \n", + "10 Acipenser TW CA NaN NaN \n", + "11 Acipenser US IR NaN 100.00 \n", + "12 Agave US TH NaN NaN \n", + "13 Ailurus AU NZ NaN NaN \n", + "14 Ailurus CA US NaN 1.00 \n", + "15 Ailurus IL DE NaN NaN \n", + "16 Ailurus JP US NaN 1.00 \n", + "17 Ailurus JP US NaN NaN \n", + "18 Ailurus KP CN NaN NaN \n", + "19 Ailurus KR CN NaN NaN \n", + "20 Ailurus KR JP NaN NaN \n", + "21 Ailurus US CA NaN 5.00 \n", + "22 Alligator KP CN NaN NaN \n", + "23 Melanosuchus US DK NaN 10.00 \n", + "24 Anas CA US XX NaN \n", + "25 Anas MC FR XX 2.00 \n", + "26 Asarcornis GB JE GB 1.00 \n", + "27 Asarcornis GB JE NaN 1.00 \n", + "28 Branta CN GB NaN 1.00 \n", + "29 Branta MC FR XX 2.00 \n", + "... ... ... ... ... ... \n", + "75861 Thalurania US PE NaN NaN \n", + "75862 Threnetes US PE NaN NaN \n", + "75863 Ursus BG RU NaN 1.00 \n", + "75864 Varanus MG FR TD 459.00 \n", + "75865 Varanus CR US NaN 1.00 \n", + "75866 Varanus MO FR ID 1.00 \n", + "75867 Varanus MO FR MY 2.00 \n", + "75868 Varanus MO GB ID 2.00 \n", + "75869 Varanus MO IT ID 8.00 \n", + "75870 Varanus US CO ID NaN \n", + "75871 Varanus US CR NaN NaN \n", + "75872 Varanus XX CO ID NaN \n", + "75873 Dioon NL CR NaN NaN \n", + "75874 NaN DE AO NaN NaN \n", + "75875 Zamia NL CR NaN NaN \n", + "75876 NaN DE AO NaN NaN \n", + "75877 Canis DE ET NaN NaN \n", + "75878 Canis US ET NaN NaN \n", + "75879 Sarcoramphus JP PE NaN NaN \n", + "75880 Dipteryx CR NI NaN 19.55 \n", + "75881 Choloepus US CR NaN NaN \n", + "75882 Choloepus US CR NaN NaN \n", + "75883 Choloepus US CR NaN NaN \n", + "75884 Cedrela CR IN NaN 20.30 \n", + "75885 Cedrela CR PR NaN 17.66 \n", + "75886 Cedrela CR US NaN 8.67 \n", + "75887 Cedrela IN CR NaN NaN \n", + "75888 Daboia RS IT US 200.00 \n", + "75889 Civettictis FR ET NaN NaN \n", + "75890 Civettictis KR ET NaN NaN \n", + "\n", + " Exporter reported quantity Term Unit Purpose \\\n", + "0 1.0 bodies NaN T \n", + "1 1.0 bodies NaN Q \n", + "2 43.0 feathers NaN S \n", + "3 43.0 specimens NaN S \n", + "4 NaN specimens NaN S \n", + "5 1.0 bodies NaN Q \n", + "6 12.0 feathers NaN S \n", + "7 4.0 feathers NaN S \n", + "8 2.0 feathers NaN S \n", + "9 4.0 live NaN T \n", + "10 3.0 eggs (live) kg T \n", + "11 NaN caviar g P \n", + "12 1.0 live NaN T \n", + "13 2.0 live NaN Z \n", + "14 1.0 live NaN Z \n", + "15 2.0 live NaN Z \n", + "16 NaN live NaN B \n", + "17 1.0 live NaN Z \n", + "18 1.0 bodies NaN E \n", + "19 1.0 specimens NaN E \n", + "20 1.0 live NaN Z \n", + "21 5.0 live NaN Z \n", + "22 1.0 bodies NaN E \n", + "23 NaN live NaN Z \n", + "24 3.0 feathers NaN S \n", + "25 NaN live NaN Z \n", + "26 1.0 bodies NaN S \n", + "27 2.0 bodies NaN S \n", + "28 NaN bodies NaN Q \n", + "29 NaN live NaN Z \n", + "... ... ... ... ... \n", + "75861 22.0 specimens NaN S \n", + "75862 16.0 specimens NaN S \n", + "75863 NaN trophies NaN H \n", + "75864 NaN skin pieces NaN T \n", + "75865 NaN live NaN T \n", + "75866 NaN leather products (small) NaN T \n", + "75867 NaN leather products (small) NaN T \n", + "75868 NaN leather products (small) NaN T \n", + "75869 NaN leather products (small) NaN T \n", + "75870 8.0 leather products (small) NaN T \n", + "75871 21.0 live NaN T \n", + "75872 1.0 leather products (small) NaN T \n", + "75873 1500.0 live NaN T \n", + "75874 2.0 specimens NaN S \n", + "75875 1500.0 live NaN T \n", + "75876 50.0 raw corals kg S \n", + "75877 1.0 skins NaN H \n", + "75878 7.0 trophies NaN H \n", + "75879 2.0 live NaN T \n", + "75880 NaN sawn wood m3 T \n", + "75881 5.0 bones NaN S \n", + "75882 96.0 hair NaN S \n", + "75883 452.0 specimens NaN S \n", + "75884 NaN timber m3 T \n", + "75885 NaN timber m3 T \n", + "75886 NaN sawn wood m3 T \n", + "75887 20.3 timber m3 T \n", + "75888 NaN specimens NaN T \n", + "75889 635.1 musk kg T \n", + "75890 480.0 musk kg T \n", + "\n", + " Source \n", + "0 C \n", + "1 O \n", + "2 W \n", + "3 W \n", + "4 W \n", + "5 O \n", + "6 C \n", + "7 U \n", + "8 W \n", + "9 C \n", + "10 D \n", + "11 I \n", + "12 D \n", + "13 C \n", + "14 F \n", + "15 C \n", + "16 C \n", + "17 C \n", + "18 U \n", + "19 C \n", + "20 C \n", + "21 C \n", + "22 U \n", + "23 F \n", + "24 I \n", + "25 I \n", + "26 C \n", + "27 C \n", + "28 C \n", + "29 I \n", + "... ... \n", + "75861 W \n", + "75862 W \n", + "75863 W \n", + "75864 W \n", + "75865 C \n", + "75866 W \n", + "75867 W \n", + "75868 W \n", + "75869 W \n", + "75870 W \n", + "75871 C \n", + "75872 W \n", + "75873 A \n", + "75874 W \n", + "75875 A \n", + "75876 W \n", + "75877 W \n", + "75878 W \n", + "75879 F \n", + "75880 W \n", + "75881 W \n", + "75882 W \n", + "75883 W \n", + "75884 A \n", + "75885 A \n", + "75886 A \n", + "75887 W \n", + "75888 F \n", + "75889 R \n", + "75890 R \n", + "\n", + "[75891 rows x 14 columns]" + ] + }, + "execution_count": 345, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataframe.columns = dataframe.columns.str.strip()\n", + "dataframe = dataframe.drop(columns=['Year', 'App.'])\n", + "\n", + "dataframe" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We're going to have to replace those NaN values in the reported quantities columns..." + ] + }, + { + "cell_type": "code", + "execution_count": 346, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TaxonClassOrderFamilyGenusImporterExporterOriginImporter reported quantityExporter reported quantityTermUnitPurposeSource
0Aquila heliacaAvesFalconiformesAccipitridaeAquilaTRNLCZ0.001.0bodiesNaNTC
1Aquila heliacaAvesFalconiformesAccipitridaeAquilaXVRSRS0.001.0bodiesNaNQO
2Haliaeetus albicillaAvesFalconiformesAccipitridaeHaliaeetusBENONaN0.0043.0feathersNaNSW
3Haliaeetus albicillaAvesFalconiformesAccipitridaeHaliaeetusBENONaN0.0043.0specimensNaNSW
4Haliaeetus albicillaAvesFalconiformesAccipitridaeHaliaeetusDKISNaN700.000.0specimensNaNSW
5Haliaeetus albicillaAvesFalconiformesAccipitridaeHaliaeetusXVRSRS0.001.0bodiesNaNQO
6Harpia harpyjaAvesFalconiformesAccipitridaeHarpiaBRFRNaN0.0012.0feathersNaNSC
7Harpia harpyjaAvesFalconiformesAccipitridaeHarpiaBRFRNaN0.004.0feathersNaNSU
8Harpia harpyjaAvesFalconiformesAccipitridaeHarpiaBRFRNaN0.002.0feathersNaNSW
9Acipenser brevirostrumActinopteriAcipenseriformesAcipenseridaeAcipenserCHDENaN0.004.0liveNaNTC
10Acipenser brevirostrumActinopteriAcipenseriformesAcipenseridaeAcipenserTWCANaN0.003.0eggs (live)kgTD
11Acipenser sturioActinopteriAcipenseriformesAcipenseridaeAcipenserUSIRNaN100.000.0caviargPI
12Agave parvifloraNaNLilialesAgavaceaeAgaveUSTHNaN0.001.0liveNaNTD
13Ailurus fulgensMammaliaCarnivoraAiluridaeAilurusAUNZNaN0.002.0liveNaNZC
14Ailurus fulgensMammaliaCarnivoraAiluridaeAilurusCAUSNaN1.001.0liveNaNZF
15Ailurus fulgensMammaliaCarnivoraAiluridaeAilurusILDENaN0.002.0liveNaNZC
16Ailurus fulgensMammaliaCarnivoraAiluridaeAilurusJPUSNaN1.000.0liveNaNBC
17Ailurus fulgensMammaliaCarnivoraAiluridaeAilurusJPUSNaN0.001.0liveNaNZC
18Ailurus fulgensMammaliaCarnivoraAiluridaeAilurusKPCNNaN0.001.0bodiesNaNEU
19Ailurus fulgensMammaliaCarnivoraAiluridaeAilurusKRCNNaN0.001.0specimensNaNEC
20Ailurus fulgensMammaliaCarnivoraAiluridaeAilurusKRJPNaN0.001.0liveNaNZC
21Ailurus fulgensMammaliaCarnivoraAiluridaeAilurusUSCANaN5.005.0liveNaNZC
22Alligator sinensisReptiliaCrocodyliaAlligatoridaeAlligatorKPCNNaN0.001.0bodiesNaNEU
23Melanosuchus nigerReptiliaCrocodyliaAlligatoridaeMelanosuchusUSDKNaN10.000.0liveNaNZF
24Anas laysanensisAvesAnseriformesAnatidaeAnasCAUSXX0.003.0feathersNaNSI
25Anas laysanensisAvesAnseriformesAnatidaeAnasMCFRXX2.000.0liveNaNZI
26Asarcornis scutulataAvesAnseriformesAnatidaeAsarcornisGBJEGB1.001.0bodiesNaNSC
27Asarcornis scutulataAvesAnseriformesAnatidaeAsarcornisGBJENaN1.002.0bodiesNaNSC
28Branta sandvicensisAvesAnseriformesAnatidaeBrantaCNGBNaN1.000.0bodiesNaNQC
29Branta sandvicensisAvesAnseriformesAnatidaeBrantaMCFRXX2.000.0liveNaNZI
.............................................
75861Thalurania furcataAvesApodiformesTrochilidaeThaluraniaUSPENaN0.0022.0specimensNaNSW
75862Threnetes nigerAvesApodiformesTrochilidaeThrenetesUSPENaN0.0016.0specimensNaNSW
75863Ursus arctosMammaliaCarnivoraUrsidaeUrsusBGRUNaN1.000.0trophiesNaNHW
75864Varanus niloticusReptiliaSauriaVaranidaeVaranusMGFRTD459.000.0skin piecesNaNTW
75865Varanus salvatorReptiliaSauriaVaranidaeVaranusCRUSNaN1.000.0liveNaNTC
75866Varanus salvatorReptiliaSauriaVaranidaeVaranusMOFRID1.000.0leather products (small)NaNTW
75867Varanus salvatorReptiliaSauriaVaranidaeVaranusMOFRMY2.000.0leather products (small)NaNTW
75868Varanus salvatorReptiliaSauriaVaranidaeVaranusMOGBID2.000.0leather products (small)NaNTW
75869Varanus salvatorReptiliaSauriaVaranidaeVaranusMOITID8.000.0leather products (small)NaNTW
75870Varanus salvatorReptiliaSauriaVaranidaeVaranusUSCOID0.008.0leather products (small)NaNTW
75871Varanus salvatorReptiliaSauriaVaranidaeVaranusUSCRNaN0.0021.0liveNaNTC
75872Varanus salvatorReptiliaSauriaVaranidaeVaranusXXCOID0.001.0leather products (small)NaNTW
75873Dioon spinulosumNaNCycadalesZamiaceaeDioonNLCRNaN0.001500.0liveNaNTA
75874Zamiaceae spp.NaNCycadalesZamiaceaeNaNDEAONaN0.002.0specimensNaNSW
75875Zamia integrifoliaNaNCycadalesZamiaceaeZamiaNLCRNaN0.001500.0liveNaNTA
75876Scleractinia spp.AnthozoaScleractiniaNaNNaNDEAONaN0.0050.0raw coralskgSW
75877Canis aureusMammaliaCarnivoraCanidaeCanisDEETNaN0.001.0skinsNaNHW
75878Canis aureusMammaliaCarnivoraCanidaeCanisUSETNaN0.007.0trophiesNaNHW
75879Sarcoramphus papaAvesFalconiformesCathartidaeSarcoramphusJPPENaN0.002.0liveNaNTF
75880Dipteryx panamensisNaNFabalesLeguminosaeDipteryxCRNINaN19.550.0sawn woodm3TW
75881Choloepus hoffmanniMammaliaPilosaMegalonychidaeCholoepusUSCRNaN0.005.0bonesNaNSW
75882Choloepus hoffmanniMammaliaPilosaMegalonychidaeCholoepusUSCRNaN0.0096.0hairNaNSW
75883Choloepus hoffmanniMammaliaPilosaMegalonychidaeCholoepusUSCRNaN0.00452.0specimensNaNSW
75884Cedrela odorataNaNSapindalesMeliaceaeCedrelaCRINNaN20.300.0timberm3TA
75885Cedrela odorataNaNSapindalesMeliaceaeCedrelaCRPRNaN17.660.0timberm3TA
75886Cedrela odorataNaNSapindalesMeliaceaeCedrelaCRUSNaN8.670.0sawn woodm3TA
75887Cedrela odorataNaNSapindalesMeliaceaeCedrelaINCRNaN0.0020.3timberm3TW
75888Daboia russeliiReptiliaSerpentesViperidaeDaboiaRSITUS200.000.0specimensNaNTF
75889Civettictis civettaMammaliaCarnivoraViverridaeCivettictisFRETNaN0.00635.1muskkgTR
75890Civettictis civettaMammaliaCarnivoraViverridaeCivettictisKRETNaN0.00480.0muskkgTR
\n", + "

75891 rows × 14 columns

\n", + "
" + ], + "text/plain": [ + " Taxon Class Order Family \\\n", + "0 Aquila heliaca Aves Falconiformes Accipitridae \n", + "1 Aquila heliaca Aves Falconiformes Accipitridae \n", + "2 Haliaeetus albicilla Aves Falconiformes Accipitridae \n", + "3 Haliaeetus albicilla Aves Falconiformes Accipitridae \n", + "4 Haliaeetus albicilla Aves Falconiformes Accipitridae \n", + "5 Haliaeetus albicilla Aves Falconiformes Accipitridae \n", + "6 Harpia harpyja Aves Falconiformes Accipitridae \n", + "7 Harpia harpyja Aves Falconiformes Accipitridae \n", + "8 Harpia harpyja Aves Falconiformes Accipitridae \n", + "9 Acipenser brevirostrum Actinopteri Acipenseriformes Acipenseridae \n", + "10 Acipenser brevirostrum Actinopteri Acipenseriformes Acipenseridae \n", + "11 Acipenser sturio Actinopteri Acipenseriformes Acipenseridae \n", + "12 Agave parviflora NaN Liliales Agavaceae \n", + "13 Ailurus fulgens Mammalia Carnivora Ailuridae \n", + "14 Ailurus fulgens Mammalia Carnivora Ailuridae \n", + "15 Ailurus fulgens Mammalia Carnivora Ailuridae \n", + "16 Ailurus fulgens Mammalia Carnivora Ailuridae \n", + "17 Ailurus fulgens Mammalia Carnivora Ailuridae \n", + "18 Ailurus fulgens Mammalia Carnivora Ailuridae \n", + "19 Ailurus fulgens Mammalia Carnivora Ailuridae \n", + "20 Ailurus fulgens Mammalia Carnivora Ailuridae \n", + "21 Ailurus fulgens Mammalia Carnivora Ailuridae \n", + "22 Alligator sinensis Reptilia Crocodylia Alligatoridae \n", + "23 Melanosuchus niger Reptilia Crocodylia Alligatoridae \n", + "24 Anas laysanensis Aves Anseriformes Anatidae \n", + "25 Anas laysanensis Aves Anseriformes Anatidae \n", + "26 Asarcornis scutulata Aves Anseriformes Anatidae \n", + "27 Asarcornis scutulata Aves Anseriformes Anatidae \n", + "28 Branta sandvicensis Aves Anseriformes Anatidae \n", + "29 Branta sandvicensis Aves Anseriformes Anatidae \n", + "... ... ... ... ... \n", + "75861 Thalurania furcata Aves Apodiformes Trochilidae \n", + "75862 Threnetes niger Aves Apodiformes Trochilidae \n", + "75863 Ursus arctos Mammalia Carnivora Ursidae \n", + "75864 Varanus niloticus Reptilia Sauria Varanidae \n", + "75865 Varanus salvator Reptilia Sauria Varanidae \n", + "75866 Varanus salvator Reptilia Sauria Varanidae \n", + "75867 Varanus salvator Reptilia Sauria Varanidae \n", + "75868 Varanus salvator Reptilia Sauria Varanidae \n", + "75869 Varanus salvator Reptilia Sauria Varanidae \n", + "75870 Varanus salvator Reptilia Sauria Varanidae \n", + "75871 Varanus salvator Reptilia Sauria Varanidae \n", + "75872 Varanus salvator Reptilia Sauria Varanidae \n", + "75873 Dioon spinulosum NaN Cycadales Zamiaceae \n", + "75874 Zamiaceae spp. NaN Cycadales Zamiaceae \n", + "75875 Zamia integrifolia NaN Cycadales Zamiaceae \n", + "75876 Scleractinia spp. Anthozoa Scleractinia NaN \n", + "75877 Canis aureus Mammalia Carnivora Canidae \n", + "75878 Canis aureus Mammalia Carnivora Canidae \n", + "75879 Sarcoramphus papa Aves Falconiformes Cathartidae \n", + "75880 Dipteryx panamensis NaN Fabales Leguminosae \n", + "75881 Choloepus hoffmanni Mammalia Pilosa Megalonychidae \n", + "75882 Choloepus hoffmanni Mammalia Pilosa Megalonychidae \n", + "75883 Choloepus hoffmanni Mammalia Pilosa Megalonychidae \n", + "75884 Cedrela odorata NaN Sapindales Meliaceae \n", + "75885 Cedrela odorata NaN Sapindales Meliaceae \n", + "75886 Cedrela odorata NaN Sapindales Meliaceae \n", + "75887 Cedrela odorata NaN Sapindales Meliaceae \n", + "75888 Daboia russelii Reptilia Serpentes Viperidae \n", + "75889 Civettictis civetta Mammalia Carnivora Viverridae \n", + "75890 Civettictis civetta Mammalia Carnivora Viverridae \n", + "\n", + " Genus Importer Exporter Origin Importer reported quantity \\\n", + "0 Aquila TR NL CZ 0.00 \n", + "1 Aquila XV RS RS 0.00 \n", + "2 Haliaeetus BE NO NaN 0.00 \n", + "3 Haliaeetus BE NO NaN 0.00 \n", + "4 Haliaeetus DK IS NaN 700.00 \n", + "5 Haliaeetus XV RS RS 0.00 \n", + "6 Harpia BR FR NaN 0.00 \n", + "7 Harpia BR FR NaN 0.00 \n", + "8 Harpia BR FR NaN 0.00 \n", + "9 Acipenser CH DE NaN 0.00 \n", + "10 Acipenser TW CA NaN 0.00 \n", + "11 Acipenser US IR NaN 100.00 \n", + "12 Agave US TH NaN 0.00 \n", + "13 Ailurus AU NZ NaN 0.00 \n", + "14 Ailurus CA US NaN 1.00 \n", + "15 Ailurus IL DE NaN 0.00 \n", + "16 Ailurus JP US NaN 1.00 \n", + "17 Ailurus JP US NaN 0.00 \n", + "18 Ailurus KP CN NaN 0.00 \n", + "19 Ailurus KR CN NaN 0.00 \n", + "20 Ailurus KR JP NaN 0.00 \n", + "21 Ailurus US CA NaN 5.00 \n", + "22 Alligator KP CN NaN 0.00 \n", + "23 Melanosuchus US DK NaN 10.00 \n", + "24 Anas CA US XX 0.00 \n", + "25 Anas MC FR XX 2.00 \n", + "26 Asarcornis GB JE GB 1.00 \n", + "27 Asarcornis GB JE NaN 1.00 \n", + "28 Branta CN GB NaN 1.00 \n", + "29 Branta MC FR XX 2.00 \n", + "... ... ... ... ... ... \n", + "75861 Thalurania US PE NaN 0.00 \n", + "75862 Threnetes US PE NaN 0.00 \n", + "75863 Ursus BG RU NaN 1.00 \n", + "75864 Varanus MG FR TD 459.00 \n", + "75865 Varanus CR US NaN 1.00 \n", + "75866 Varanus MO FR ID 1.00 \n", + "75867 Varanus MO FR MY 2.00 \n", + "75868 Varanus MO GB ID 2.00 \n", + "75869 Varanus MO IT ID 8.00 \n", + "75870 Varanus US CO ID 0.00 \n", + "75871 Varanus US CR NaN 0.00 \n", + "75872 Varanus XX CO ID 0.00 \n", + "75873 Dioon NL CR NaN 0.00 \n", + "75874 NaN DE AO NaN 0.00 \n", + "75875 Zamia NL CR NaN 0.00 \n", + "75876 NaN DE AO NaN 0.00 \n", + "75877 Canis DE ET NaN 0.00 \n", + "75878 Canis US ET NaN 0.00 \n", + "75879 Sarcoramphus JP PE NaN 0.00 \n", + "75880 Dipteryx CR NI NaN 19.55 \n", + "75881 Choloepus US CR NaN 0.00 \n", + "75882 Choloepus US CR NaN 0.00 \n", + "75883 Choloepus US CR NaN 0.00 \n", + "75884 Cedrela CR IN NaN 20.30 \n", + "75885 Cedrela CR PR NaN 17.66 \n", + "75886 Cedrela CR US NaN 8.67 \n", + "75887 Cedrela IN CR NaN 0.00 \n", + "75888 Daboia RS IT US 200.00 \n", + "75889 Civettictis FR ET NaN 0.00 \n", + "75890 Civettictis KR ET NaN 0.00 \n", + "\n", + " Exporter reported quantity Term Unit Purpose \\\n", + "0 1.0 bodies NaN T \n", + "1 1.0 bodies NaN Q \n", + "2 43.0 feathers NaN S \n", + "3 43.0 specimens NaN S \n", + "4 0.0 specimens NaN S \n", + "5 1.0 bodies NaN Q \n", + "6 12.0 feathers NaN S \n", + "7 4.0 feathers NaN S \n", + "8 2.0 feathers NaN S \n", + "9 4.0 live NaN T \n", + "10 3.0 eggs (live) kg T \n", + "11 0.0 caviar g P \n", + "12 1.0 live NaN T \n", + "13 2.0 live NaN Z \n", + "14 1.0 live NaN Z \n", + "15 2.0 live NaN Z \n", + "16 0.0 live NaN B \n", + "17 1.0 live NaN Z \n", + "18 1.0 bodies NaN E \n", + "19 1.0 specimens NaN E \n", + "20 1.0 live NaN Z \n", + "21 5.0 live NaN Z \n", + "22 1.0 bodies NaN E \n", + "23 0.0 live NaN Z \n", + "24 3.0 feathers NaN S \n", + "25 0.0 live NaN Z \n", + "26 1.0 bodies NaN S \n", + "27 2.0 bodies NaN S \n", + "28 0.0 bodies NaN Q \n", + "29 0.0 live NaN Z \n", + "... ... ... ... ... \n", + "75861 22.0 specimens NaN S \n", + "75862 16.0 specimens NaN S \n", + "75863 0.0 trophies NaN H \n", + "75864 0.0 skin pieces NaN T \n", + "75865 0.0 live NaN T \n", + "75866 0.0 leather products (small) NaN T \n", + "75867 0.0 leather products (small) NaN T \n", + "75868 0.0 leather products (small) NaN T \n", + "75869 0.0 leather products (small) NaN T \n", + "75870 8.0 leather products (small) NaN T \n", + "75871 21.0 live NaN T \n", + "75872 1.0 leather products (small) NaN T \n", + "75873 1500.0 live NaN T \n", + "75874 2.0 specimens NaN S \n", + "75875 1500.0 live NaN T \n", + "75876 50.0 raw corals kg S \n", + "75877 1.0 skins NaN H \n", + "75878 7.0 trophies NaN H \n", + "75879 2.0 live NaN T \n", + "75880 0.0 sawn wood m3 T \n", + "75881 5.0 bones NaN S \n", + "75882 96.0 hair NaN S \n", + "75883 452.0 specimens NaN S \n", + "75884 0.0 timber m3 T \n", + "75885 0.0 timber m3 T \n", + "75886 0.0 sawn wood m3 T \n", + "75887 20.3 timber m3 T \n", + "75888 0.0 specimens NaN T \n", + "75889 635.1 musk kg T \n", + "75890 480.0 musk kg T \n", + "\n", + " Source \n", + "0 C \n", + "1 O \n", + "2 W \n", + "3 W \n", + "4 W \n", + "5 O \n", + "6 C \n", + "7 U \n", + "8 W \n", + "9 C \n", + "10 D \n", + "11 I \n", + "12 D \n", + "13 C \n", + "14 F \n", + "15 C \n", + "16 C \n", + "17 C \n", + "18 U \n", + "19 C \n", + "20 C \n", + "21 C \n", + "22 U \n", + "23 F \n", + "24 I \n", + "25 I \n", + "26 C \n", + "27 C \n", + "28 C \n", + "29 I \n", + "... ... \n", + "75861 W \n", + "75862 W \n", + "75863 W \n", + "75864 W \n", + "75865 C \n", + "75866 W \n", + "75867 W \n", + "75868 W \n", + "75869 W \n", + "75870 W \n", + "75871 C \n", + "75872 W \n", + "75873 A \n", + "75874 W \n", + "75875 A \n", + "75876 W \n", + "75877 W \n", + "75878 W \n", + "75879 F \n", + "75880 W \n", + "75881 W \n", + "75882 W \n", + "75883 W \n", + "75884 A \n", + "75885 A \n", + "75886 A \n", + "75887 W \n", + "75888 F \n", + "75889 R \n", + "75890 R \n", + "\n", + "[75891 rows x 14 columns]" + ] + }, + "execution_count": 346, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataframe['Importer reported quantity'].fillna(0, inplace=True)\n", + "dataframe['Exporter reported quantity'].fillna(0, inplace=True)\n", + "\n", + "dataframe" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We have a lot of text data in the form of ISO country codes, and specialist categories. We'll need to encode these as one hot vectors in the next step so that our neural net can understand them. We'll grab a list of all the columns we'll need to encode first and remove the ones we don't want to encode (the numeric columns)" + ] + }, + { + "cell_type": "code", + "execution_count": 347, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of Columns: 14\n", + "['Taxon', 'Class', 'Order', 'Family', 'Genus', 'Importer', 'Exporter', 'Origin', 'Term', 'Unit', 'Source']\n" + ] + } + ], + "source": [ + "print(\"Number of Columns: \", len(dataframe.columns))\n", + "columns = list(dataframe.columns)\n", + "columns.remove('Importer reported quantity')\n", + "columns.remove('Exporter reported quantity')\n", + "columns.remove('Purpose')\n", + "print(columns)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Encoding our labels and data\n", + "\n", + "In order to test the performance of our neural net, we'll need to split up our data into the data, and their corresponding classifications. \n", + "\n", + "The purpose column will be what we are going to attempt to predict (notice we removed it from the list of columns we'd like to one hot encode already).\n", + "\n", + "Let's pop off our labels from our dataframe, and keep them separate..." + ] + }, + { + "cell_type": "code", + "execution_count": 348, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 T\n", + "1 Q\n", + "2 S\n", + "3 S\n", + "4 S\n", + "5 Q\n", + "6 S\n", + "7 S\n", + "8 S\n", + "9 T\n", + "10 T\n", + "11 P\n", + "12 T\n", + "13 Z\n", + "14 Z\n", + "15 Z\n", + "16 B\n", + "17 Z\n", + "18 E\n", + "19 E\n", + "20 Z\n", + "21 Z\n", + "22 E\n", + "23 Z\n", + "24 S\n", + "25 Z\n", + "26 S\n", + "27 S\n", + "28 Q\n", + "29 Z\n", + " ..\n", + "75861 S\n", + "75862 S\n", + "75863 H\n", + "75864 T\n", + "75865 T\n", + "75866 T\n", + "75867 T\n", + "75868 T\n", + "75869 T\n", + "75870 T\n", + "75871 T\n", + "75872 T\n", + "75873 T\n", + "75874 S\n", + "75875 T\n", + "75876 S\n", + "75877 H\n", + "75878 H\n", + "75879 T\n", + "75880 T\n", + "75881 S\n", + "75882 S\n", + "75883 S\n", + "75884 T\n", + "75885 T\n", + "75886 T\n", + "75887 T\n", + "75888 T\n", + "75889 T\n", + "75890 T\n", + "Name: Purpose, Length: 75891, dtype: object" + ] + }, + "execution_count": 348, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "labels = dataframe.pop('Purpose')\n", + "\n", + "labels" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We'll need to convert our classifications into one hot vectors..." + ] + }, + { + "cell_type": "code", + "execution_count": 349, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
BEGHLMNPQSTZ
0000000000010
1000000001000
2000000000100
3000000000100
4000000000100
5000000001000
6000000000100
7000000000100
8000000000100
9000000000010
10000000000010
11000000010000
12000000000010
13000000000001
14000000000001
15000000000001
16100000000000
17000000000001
18010000000000
19010000000000
20000000000001
21000000000001
22010000000000
23000000000001
24000000000100
25000000000001
26000000000100
27000000000100
28000000001000
29000000000001
.......................................
75861000000000100
75862000000000100
75863000100000000
75864000000000010
75865000000000010
75866000000000010
75867000000000010
75868000000000010
75869000000000010
75870000000000010
75871000000000010
75872000000000010
75873000000000010
75874000000000100
75875000000000010
75876000000000100
75877000100000000
75878000100000000
75879000000000010
75880000000000010
75881000000000100
75882000000000100
75883000000000100
75884000000000010
75885000000000010
75886000000000010
75887000000000010
75888000000000010
75889000000000010
75890000000000010
\n", + "

75891 rows × 12 columns

\n", + "
" + ], + "text/plain": [ + " B E G H L M N P Q S T Z\n", + "0 0 0 0 0 0 0 0 0 0 0 1 0\n", + "1 0 0 0 0 0 0 0 0 1 0 0 0\n", + "2 0 0 0 0 0 0 0 0 0 1 0 0\n", + "3 0 0 0 0 0 0 0 0 0 1 0 0\n", + "4 0 0 0 0 0 0 0 0 0 1 0 0\n", + "5 0 0 0 0 0 0 0 0 1 0 0 0\n", + "6 0 0 0 0 0 0 0 0 0 1 0 0\n", + "7 0 0 0 0 0 0 0 0 0 1 0 0\n", + "8 0 0 0 0 0 0 0 0 0 1 0 0\n", + "9 0 0 0 0 0 0 0 0 0 0 1 0\n", + "10 0 0 0 0 0 0 0 0 0 0 1 0\n", + "11 0 0 0 0 0 0 0 1 0 0 0 0\n", + "12 0 0 0 0 0 0 0 0 0 0 1 0\n", + "13 0 0 0 0 0 0 0 0 0 0 0 1\n", + "14 0 0 0 0 0 0 0 0 0 0 0 1\n", + "15 0 0 0 0 0 0 0 0 0 0 0 1\n", + "16 1 0 0 0 0 0 0 0 0 0 0 0\n", + "17 0 0 0 0 0 0 0 0 0 0 0 1\n", + "18 0 1 0 0 0 0 0 0 0 0 0 0\n", + "19 0 1 0 0 0 0 0 0 0 0 0 0\n", + "20 0 0 0 0 0 0 0 0 0 0 0 1\n", + "21 0 0 0 0 0 0 0 0 0 0 0 1\n", + "22 0 1 0 0 0 0 0 0 0 0 0 0\n", + "23 0 0 0 0 0 0 0 0 0 0 0 1\n", + "24 0 0 0 0 0 0 0 0 0 1 0 0\n", + "25 0 0 0 0 0 0 0 0 0 0 0 1\n", + "26 0 0 0 0 0 0 0 0 0 1 0 0\n", + "27 0 0 0 0 0 0 0 0 0 1 0 0\n", + "28 0 0 0 0 0 0 0 0 1 0 0 0\n", + "29 0 0 0 0 0 0 0 0 0 0 0 1\n", + "... .. .. .. .. .. .. .. .. .. .. .. ..\n", + "75861 0 0 0 0 0 0 0 0 0 1 0 0\n", + "75862 0 0 0 0 0 0 0 0 0 1 0 0\n", + "75863 0 0 0 1 0 0 0 0 0 0 0 0\n", + "75864 0 0 0 0 0 0 0 0 0 0 1 0\n", + "75865 0 0 0 0 0 0 0 0 0 0 1 0\n", + "75866 0 0 0 0 0 0 0 0 0 0 1 0\n", + "75867 0 0 0 0 0 0 0 0 0 0 1 0\n", + "75868 0 0 0 0 0 0 0 0 0 0 1 0\n", + "75869 0 0 0 0 0 0 0 0 0 0 1 0\n", + "75870 0 0 0 0 0 0 0 0 0 0 1 0\n", + "75871 0 0 0 0 0 0 0 0 0 0 1 0\n", + "75872 0 0 0 0 0 0 0 0 0 0 1 0\n", + "75873 0 0 0 0 0 0 0 0 0 0 1 0\n", + "75874 0 0 0 0 0 0 0 0 0 1 0 0\n", + "75875 0 0 0 0 0 0 0 0 0 0 1 0\n", + "75876 0 0 0 0 0 0 0 0 0 1 0 0\n", + "75877 0 0 0 1 0 0 0 0 0 0 0 0\n", + "75878 0 0 0 1 0 0 0 0 0 0 0 0\n", + "75879 0 0 0 0 0 0 0 0 0 0 1 0\n", + "75880 0 0 0 0 0 0 0 0 0 0 1 0\n", + "75881 0 0 0 0 0 0 0 0 0 1 0 0\n", + "75882 0 0 0 0 0 0 0 0 0 1 0 0\n", + "75883 0 0 0 0 0 0 0 0 0 1 0 0\n", + "75884 0 0 0 0 0 0 0 0 0 0 1 0\n", + "75885 0 0 0 0 0 0 0 0 0 0 1 0\n", + "75886 0 0 0 0 0 0 0 0 0 0 1 0\n", + "75887 0 0 0 0 0 0 0 0 0 0 1 0\n", + "75888 0 0 0 0 0 0 0 0 0 0 1 0\n", + "75889 0 0 0 0 0 0 0 0 0 0 1 0\n", + "75890 0 0 0 0 0 0 0 0 0 0 1 0\n", + "\n", + "[75891 rows x 12 columns]" + ] + }, + "execution_count": 349, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "labels = pd.get_dummies(labels)\n", + "\n", + "labels" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next we'll create one hot vectors for the rest of our datatable and call this *data*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "data = pd.get_dummies(dataframe, columns=columns)\n", + "\n", + "data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Our data is looking better, but to make things easier on our model, we can scale everything to between 0-1..." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "scaler = MinMaxScaler(feature_range=(0, 1))\n", + "data_scaled = scaler.fit_transform(data)\n", + "data = pd.DataFrame(data_scaled)\n", + "\n", + "data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Creating a train / test split\n", + "\n", + "In order to evaluate our model, we'll split our data into two groups, a group for training, which the neural net will learn on, and a group for validation, which the neural net will not see, but be validated against once trained." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"X_train shape: \", X_train.shape)\n", + "print(\"X_test shape: \", X_test.shape)\n", + "print(\"y_train shape: \", y_train.shape)\n", + "print(\"y_test shape: \", y_test.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Building a simple model\n", + "\n", + "We'll build a simple neural network which accepts our input of 9400 bits of data, and passes it to 6279 neurons in a hidden layer (two thirds of the input layer plus the output layer, is a good rule of thumb for how many neurons a hidden layer should have). Finally, our hidden layer is passed to an output layer representing our categories (so 12 neurons in this) and uses a softmax activation function to turn our predictions into probabilities of it being that class..." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def build_model():\n", + " model = Sequential()\n", + " model.add(Dense(6279, input_dim=X_train.shape[1], activation='relu'))\n", + " model.add(Dropout(0.1))\n", + " model.add(Dense(y_train.shape[1], activation='softmax'))\n", + " model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])\n", + " \n", + " return model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Hyperparameters" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "epochs = 5\n", + "batch_size = 2000" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's train our simple model..." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "score = model.evaluate(X_test, y_test)\n", + "\n", + "print(\"%s: %.2f%%\" % (model.metrics_names[1], score[1]*100))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Evaluating our model with K-Fold Cross Validation\n", + "\n", + "We'll use k-fold validation to get a better representation of how our model did..." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "k_fold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)\n", + "cv_scores = []\n", + "\n", + "for train, test in kfold.split(data, labels):\n", + " model = build_model()\n", + " model.fit(data[train], labels[train], epochs=150, batch_size=10, verbose=0)\n", + " scores = model.evaluate(data[test], labels[test], verbose=0)\n", + " print(\"%s: %.2f%%\" % (model.metrics_names[1], scores[1]*100))\n", + " \n", + " cv_scores.append(scores[1] * 100)\n", + " \n", + "print(\"%.2f%% (+/- %.2f%%)\" % (numpy.mean(cvscores), numpy.std(cvscores)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.4" + } + }, "nbformat": 4, "nbformat_minor": 2 } diff --git a/1. Classifying Partial Permits.ipynb b/1. Classifying Partial Permits.ipynb index 69059b9..708ee07 100644 --- a/1. Classifying Partial Permits.ipynb +++ b/1. Classifying Partial Permits.ipynb @@ -7,6 +7,8 @@ "# Goal 1\n", "## Given a partial permit, can we predict what classification it belongs to?\n", "\n", + "Trade permits can be messy and incomplete. Can we use this partial data to successfully predict which Purpose code the permit should belong to?\n", + "\n", "### Getting Started\n", "- Open up the CITES trade database at https://trade.cites.org/\n", "- Select a year range and click *Search*\n", @@ -17,44 +19,9109 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 342, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "from keras.models import Sequential\n", + "from keras.layers import Dense, Dropout, Activation\n", + "from keras.wrappers.scikit_learn import KerasClassifier\n", + "from keras.utils import np_utils\n", + "from sklearn.model_selection import cross_val_score\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.model_selection import StratifiedKFold\n", + "from sklearn.preprocessing import LabelEncoder\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "from sklearn.pipeline import Pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": 343, + "metadata": {}, + "outputs": [], + "source": [ + "seed = 1\n", + "np.random.seed(seed)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Importing our data\n", + "Let's import our data into a pandas dataframe and take a look at it." + ] + }, + { + "cell_type": "code", + "execution_count": 344, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
YearApp.TaxonClassOrderFamilyGenusImporterExporterOriginImporter reported quantityExporter reported quantityTermUnitPurposeSource
02016IAquila heliacaAvesFalconiformesAccipitridaeAquilaTRNLCZNaN1.0bodiesNaNTC
12016IAquila heliacaAvesFalconiformesAccipitridaeAquilaXVRSRSNaN1.0bodiesNaNQO
22016IHaliaeetus albicillaAvesFalconiformesAccipitridaeHaliaeetusBENONaNNaN43.0feathersNaNSW
32016IHaliaeetus albicillaAvesFalconiformesAccipitridaeHaliaeetusBENONaNNaN43.0specimensNaNSW
42016IHaliaeetus albicillaAvesFalconiformesAccipitridaeHaliaeetusDKISNaN700.00NaNspecimensNaNSW
52016IHaliaeetus albicillaAvesFalconiformesAccipitridaeHaliaeetusXVRSRSNaN1.0bodiesNaNQO
62016IHarpia harpyjaAvesFalconiformesAccipitridaeHarpiaBRFRNaNNaN12.0feathersNaNSC
72016IHarpia harpyjaAvesFalconiformesAccipitridaeHarpiaBRFRNaNNaN4.0feathersNaNSU
82016IHarpia harpyjaAvesFalconiformesAccipitridaeHarpiaBRFRNaNNaN2.0feathersNaNSW
92016IAcipenser brevirostrumActinopteriAcipenseriformesAcipenseridaeAcipenserCHDENaNNaN4.0liveNaNTC
102016IAcipenser brevirostrumActinopteriAcipenseriformesAcipenseridaeAcipenserTWCANaNNaN3.0eggs (live)kgTD
112016IAcipenser sturioActinopteriAcipenseriformesAcipenseridaeAcipenserUSIRNaN100.00NaNcaviargPI
122016IAgave parvifloraNaNLilialesAgavaceaeAgaveUSTHNaNNaN1.0liveNaNTD
132016IAilurus fulgensMammaliaCarnivoraAiluridaeAilurusAUNZNaNNaN2.0liveNaNZC
142016IAilurus fulgensMammaliaCarnivoraAiluridaeAilurusCAUSNaN1.001.0liveNaNZF
152016IAilurus fulgensMammaliaCarnivoraAiluridaeAilurusILDENaNNaN2.0liveNaNZC
162016IAilurus fulgensMammaliaCarnivoraAiluridaeAilurusJPUSNaN1.00NaNliveNaNBC
172016IAilurus fulgensMammaliaCarnivoraAiluridaeAilurusJPUSNaNNaN1.0liveNaNZC
182016IAilurus fulgensMammaliaCarnivoraAiluridaeAilurusKPCNNaNNaN1.0bodiesNaNEU
192016IAilurus fulgensMammaliaCarnivoraAiluridaeAilurusKRCNNaNNaN1.0specimensNaNEC
202016IAilurus fulgensMammaliaCarnivoraAiluridaeAilurusKRJPNaNNaN1.0liveNaNZC
212016IAilurus fulgensMammaliaCarnivoraAiluridaeAilurusUSCANaN5.005.0liveNaNZC
222016IAlligator sinensisReptiliaCrocodyliaAlligatoridaeAlligatorKPCNNaNNaN1.0bodiesNaNEU
232016IMelanosuchus nigerReptiliaCrocodyliaAlligatoridaeMelanosuchusUSDKNaN10.00NaNliveNaNZF
242016IAnas laysanensisAvesAnseriformesAnatidaeAnasCAUSXXNaN3.0feathersNaNSI
252016IAnas laysanensisAvesAnseriformesAnatidaeAnasMCFRXX2.00NaNliveNaNZI
262016IAsarcornis scutulataAvesAnseriformesAnatidaeAsarcornisGBJEGB1.001.0bodiesNaNSC
272016IAsarcornis scutulataAvesAnseriformesAnatidaeAsarcornisGBJENaN1.002.0bodiesNaNSC
282016IBranta sandvicensisAvesAnseriformesAnatidaeBrantaCNGBNaN1.00NaNbodiesNaNQC
292016IBranta sandvicensisAvesAnseriformesAnatidaeBrantaMCFRXX2.00NaNliveNaNZI
...................................................
758612017IIThalurania furcataAvesApodiformesTrochilidaeThaluraniaUSPENaNNaN22.0specimensNaNSW
758622017IIThrenetes nigerAvesApodiformesTrochilidaeThrenetesUSPENaNNaN16.0specimensNaNSW
758632017IIUrsus arctosMammaliaCarnivoraUrsidaeUrsusBGRUNaN1.00NaNtrophiesNaNHW
758642017IIVaranus niloticusReptiliaSauriaVaranidaeVaranusMGFRTD459.00NaNskin piecesNaNTW
758652017IIVaranus salvatorReptiliaSauriaVaranidaeVaranusCRUSNaN1.00NaNliveNaNTC
758662017IIVaranus salvatorReptiliaSauriaVaranidaeVaranusMOFRID1.00NaNleather products (small)NaNTW
758672017IIVaranus salvatorReptiliaSauriaVaranidaeVaranusMOFRMY2.00NaNleather products (small)NaNTW
758682017IIVaranus salvatorReptiliaSauriaVaranidaeVaranusMOGBID2.00NaNleather products (small)NaNTW
758692017IIVaranus salvatorReptiliaSauriaVaranidaeVaranusMOITID8.00NaNleather products (small)NaNTW
758702017IIVaranus salvatorReptiliaSauriaVaranidaeVaranusUSCOIDNaN8.0leather products (small)NaNTW
758712017IIVaranus salvatorReptiliaSauriaVaranidaeVaranusUSCRNaNNaN21.0liveNaNTC
758722017IIVaranus salvatorReptiliaSauriaVaranidaeVaranusXXCOIDNaN1.0leather products (small)NaNTW
758732017IIDioon spinulosumNaNCycadalesZamiaceaeDioonNLCRNaNNaN1500.0liveNaNTA
758742017IIZamiaceae spp.NaNCycadalesZamiaceaeNaNDEAONaNNaN2.0specimensNaNSW
758752017IIZamia integrifoliaNaNCycadalesZamiaceaeZamiaNLCRNaNNaN1500.0liveNaNTA
758762017IIScleractinia spp.AnthozoaScleractiniaNaNNaNDEAONaNNaN50.0raw coralskgSW
758772017IIICanis aureusMammaliaCarnivoraCanidaeCanisDEETNaNNaN1.0skinsNaNHW
758782017IIICanis aureusMammaliaCarnivoraCanidaeCanisUSETNaNNaN7.0trophiesNaNHW
758792017IIISarcoramphus papaAvesFalconiformesCathartidaeSarcoramphusJPPENaNNaN2.0liveNaNTF
758802017IIIDipteryx panamensisNaNFabalesLeguminosaeDipteryxCRNINaN19.55NaNsawn woodm3TW
758812017IIICholoepus hoffmanniMammaliaPilosaMegalonychidaeCholoepusUSCRNaNNaN5.0bonesNaNSW
758822017IIICholoepus hoffmanniMammaliaPilosaMegalonychidaeCholoepusUSCRNaNNaN96.0hairNaNSW
758832017IIICholoepus hoffmanniMammaliaPilosaMegalonychidaeCholoepusUSCRNaNNaN452.0specimensNaNSW
758842017IIICedrela odorataNaNSapindalesMeliaceaeCedrelaCRINNaN20.30NaNtimberm3TA
758852017IIICedrela odorataNaNSapindalesMeliaceaeCedrelaCRPRNaN17.66NaNtimberm3TA
758862017IIICedrela odorataNaNSapindalesMeliaceaeCedrelaCRUSNaN8.67NaNsawn woodm3TA
758872017IIICedrela odorataNaNSapindalesMeliaceaeCedrelaINCRNaNNaN20.3timberm3TW
758882017IIIDaboia russeliiReptiliaSerpentesViperidaeDaboiaRSITUS200.00NaNspecimensNaNTF
758892017IIICivettictis civettaMammaliaCarnivoraViverridaeCivettictisFRETNaNNaN635.1muskkgTR
758902017IIICivettictis civettaMammaliaCarnivoraViverridaeCivettictisKRETNaNNaN480.0muskkgTR
\n", + "

75891 rows × 16 columns

\n", + "
" + ], + "text/plain": [ + " Year App. Taxon Class Order \\\n", + "0 2016 I Aquila heliaca Aves Falconiformes \n", + "1 2016 I Aquila heliaca Aves Falconiformes \n", + "2 2016 I Haliaeetus albicilla Aves Falconiformes \n", + "3 2016 I Haliaeetus albicilla Aves Falconiformes \n", + "4 2016 I Haliaeetus albicilla Aves Falconiformes \n", + "5 2016 I Haliaeetus albicilla Aves Falconiformes \n", + "6 2016 I Harpia harpyja Aves Falconiformes \n", + "7 2016 I Harpia harpyja Aves Falconiformes \n", + "8 2016 I Harpia harpyja Aves Falconiformes \n", + "9 2016 I Acipenser brevirostrum Actinopteri Acipenseriformes \n", + "10 2016 I Acipenser brevirostrum Actinopteri Acipenseriformes \n", + "11 2016 I Acipenser sturio Actinopteri Acipenseriformes \n", + "12 2016 I Agave parviflora NaN Liliales \n", + "13 2016 I Ailurus fulgens Mammalia Carnivora \n", + "14 2016 I Ailurus fulgens Mammalia Carnivora \n", + "15 2016 I Ailurus fulgens Mammalia Carnivora \n", + "16 2016 I Ailurus fulgens Mammalia Carnivora \n", + "17 2016 I Ailurus fulgens Mammalia Carnivora \n", + "18 2016 I Ailurus fulgens Mammalia Carnivora \n", + "19 2016 I Ailurus fulgens Mammalia Carnivora \n", + "20 2016 I Ailurus fulgens Mammalia Carnivora \n", + "21 2016 I Ailurus fulgens Mammalia Carnivora \n", + "22 2016 I Alligator sinensis Reptilia Crocodylia \n", + "23 2016 I Melanosuchus niger Reptilia Crocodylia \n", + "24 2016 I Anas laysanensis Aves Anseriformes \n", + "25 2016 I Anas laysanensis Aves Anseriformes \n", + "26 2016 I Asarcornis scutulata Aves Anseriformes \n", + "27 2016 I Asarcornis scutulata Aves Anseriformes \n", + "28 2016 I Branta sandvicensis Aves Anseriformes \n", + "29 2016 I Branta sandvicensis Aves Anseriformes \n", + "... ... ... ... ... ... \n", + "75861 2017 II Thalurania furcata Aves Apodiformes \n", + "75862 2017 II Threnetes niger Aves Apodiformes \n", + "75863 2017 II Ursus arctos Mammalia Carnivora \n", + "75864 2017 II Varanus niloticus Reptilia Sauria \n", + "75865 2017 II Varanus salvator Reptilia Sauria \n", + "75866 2017 II Varanus salvator Reptilia Sauria \n", + "75867 2017 II Varanus salvator Reptilia Sauria \n", + "75868 2017 II Varanus salvator Reptilia Sauria \n", + "75869 2017 II Varanus salvator Reptilia Sauria \n", + "75870 2017 II Varanus salvator Reptilia Sauria \n", + "75871 2017 II Varanus salvator Reptilia Sauria \n", + "75872 2017 II Varanus salvator Reptilia Sauria \n", + "75873 2017 II Dioon spinulosum NaN Cycadales \n", + "75874 2017 II Zamiaceae spp. NaN Cycadales \n", + "75875 2017 II Zamia integrifolia NaN Cycadales \n", + "75876 2017 II Scleractinia spp. Anthozoa Scleractinia \n", + "75877 2017 III Canis aureus Mammalia Carnivora \n", + "75878 2017 III Canis aureus Mammalia Carnivora \n", + "75879 2017 III Sarcoramphus papa Aves Falconiformes \n", + "75880 2017 III Dipteryx panamensis NaN Fabales \n", + "75881 2017 III Choloepus hoffmanni Mammalia Pilosa \n", + "75882 2017 III Choloepus hoffmanni Mammalia Pilosa \n", + "75883 2017 III Choloepus hoffmanni Mammalia Pilosa \n", + "75884 2017 III Cedrela odorata NaN Sapindales \n", + "75885 2017 III Cedrela odorata NaN Sapindales \n", + "75886 2017 III Cedrela odorata NaN Sapindales \n", + "75887 2017 III Cedrela odorata NaN Sapindales \n", + "75888 2017 III Daboia russelii Reptilia Serpentes \n", + "75889 2017 III Civettictis civetta Mammalia Carnivora \n", + "75890 2017 III Civettictis civetta Mammalia Carnivora \n", + "\n", + " Family Genus Importer Exporter Origin \\\n", + "0 Accipitridae Aquila TR NL CZ \n", + "1 Accipitridae Aquila XV RS RS \n", + "2 Accipitridae Haliaeetus BE NO NaN \n", + "3 Accipitridae Haliaeetus BE NO NaN \n", + "4 Accipitridae Haliaeetus DK IS NaN \n", + "5 Accipitridae Haliaeetus XV RS RS \n", + "6 Accipitridae Harpia BR FR NaN \n", + "7 Accipitridae Harpia BR FR NaN \n", + "8 Accipitridae Harpia BR FR NaN \n", + "9 Acipenseridae Acipenser CH DE NaN \n", + "10 Acipenseridae Acipenser TW CA NaN \n", + "11 Acipenseridae Acipenser US IR NaN \n", + "12 Agavaceae Agave US TH NaN \n", + "13 Ailuridae Ailurus AU NZ NaN \n", + "14 Ailuridae Ailurus CA US NaN \n", + "15 Ailuridae Ailurus IL DE NaN \n", + "16 Ailuridae Ailurus JP US NaN \n", + "17 Ailuridae Ailurus JP US NaN \n", + "18 Ailuridae Ailurus KP CN NaN \n", + "19 Ailuridae Ailurus KR CN NaN \n", + "20 Ailuridae Ailurus KR JP NaN \n", + "21 Ailuridae Ailurus US CA NaN \n", + "22 Alligatoridae Alligator KP CN NaN \n", + "23 Alligatoridae Melanosuchus US DK NaN \n", + "24 Anatidae Anas CA US XX \n", + "25 Anatidae Anas MC FR XX \n", + "26 Anatidae Asarcornis GB JE GB \n", + "27 Anatidae Asarcornis GB JE NaN \n", + "28 Anatidae Branta CN GB NaN \n", + "29 Anatidae Branta MC FR XX \n", + "... ... ... ... ... ... \n", + "75861 Trochilidae Thalurania US PE NaN \n", + "75862 Trochilidae Threnetes US PE NaN \n", + "75863 Ursidae Ursus BG RU NaN \n", + "75864 Varanidae Varanus MG FR TD \n", + "75865 Varanidae Varanus CR US NaN \n", + "75866 Varanidae Varanus MO FR ID \n", + "75867 Varanidae Varanus MO FR MY \n", + "75868 Varanidae Varanus MO GB ID \n", + "75869 Varanidae Varanus MO IT ID \n", + "75870 Varanidae Varanus US CO ID \n", + "75871 Varanidae Varanus US CR NaN \n", + "75872 Varanidae Varanus XX CO ID \n", + "75873 Zamiaceae Dioon NL CR NaN \n", + "75874 Zamiaceae NaN DE AO NaN \n", + "75875 Zamiaceae Zamia NL CR NaN \n", + "75876 NaN NaN DE AO NaN \n", + "75877 Canidae Canis DE ET NaN \n", + "75878 Canidae Canis US ET NaN \n", + "75879 Cathartidae Sarcoramphus JP PE NaN \n", + "75880 Leguminosae Dipteryx CR NI NaN \n", + "75881 Megalonychidae Choloepus US CR NaN \n", + "75882 Megalonychidae Choloepus US CR NaN \n", + "75883 Megalonychidae Choloepus US CR NaN \n", + "75884 Meliaceae Cedrela CR IN NaN \n", + "75885 Meliaceae Cedrela CR PR NaN \n", + "75886 Meliaceae Cedrela CR US NaN \n", + "75887 Meliaceae Cedrela IN CR NaN \n", + "75888 Viperidae Daboia RS IT US \n", + "75889 Viverridae Civettictis FR ET NaN \n", + "75890 Viverridae Civettictis KR ET NaN \n", + "\n", + " Importer reported quantity Exporter reported quantity \\\n", + "0 NaN 1.0 \n", + "1 NaN 1.0 \n", + "2 NaN 43.0 \n", + "3 NaN 43.0 \n", + "4 700.00 NaN \n", + "5 NaN 1.0 \n", + "6 NaN 12.0 \n", + "7 NaN 4.0 \n", + "8 NaN 2.0 \n", + "9 NaN 4.0 \n", + "10 NaN 3.0 \n", + "11 100.00 NaN \n", + "12 NaN 1.0 \n", + "13 NaN 2.0 \n", + "14 1.00 1.0 \n", + "15 NaN 2.0 \n", + "16 1.00 NaN \n", + "17 NaN 1.0 \n", + "18 NaN 1.0 \n", + "19 NaN 1.0 \n", + "20 NaN 1.0 \n", + "21 5.00 5.0 \n", + "22 NaN 1.0 \n", + "23 10.00 NaN \n", + "24 NaN 3.0 \n", + "25 2.00 NaN \n", + "26 1.00 1.0 \n", + "27 1.00 2.0 \n", + "28 1.00 NaN \n", + "29 2.00 NaN \n", + "... ... ... \n", + "75861 NaN 22.0 \n", + "75862 NaN 16.0 \n", + "75863 1.00 NaN \n", + "75864 459.00 NaN \n", + "75865 1.00 NaN \n", + "75866 1.00 NaN \n", + "75867 2.00 NaN \n", + "75868 2.00 NaN \n", + "75869 8.00 NaN \n", + "75870 NaN 8.0 \n", + "75871 NaN 21.0 \n", + "75872 NaN 1.0 \n", + "75873 NaN 1500.0 \n", + "75874 NaN 2.0 \n", + "75875 NaN 1500.0 \n", + "75876 NaN 50.0 \n", + "75877 NaN 1.0 \n", + "75878 NaN 7.0 \n", + "75879 NaN 2.0 \n", + "75880 19.55 NaN \n", + "75881 NaN 5.0 \n", + "75882 NaN 96.0 \n", + "75883 NaN 452.0 \n", + "75884 20.30 NaN \n", + "75885 17.66 NaN \n", + "75886 8.67 NaN \n", + "75887 NaN 20.3 \n", + "75888 200.00 NaN \n", + "75889 NaN 635.1 \n", + "75890 NaN 480.0 \n", + "\n", + " Term Unit Purpose Source \n", + "0 bodies NaN T C \n", + "1 bodies NaN Q O \n", + "2 feathers NaN S W \n", + "3 specimens NaN S W \n", + "4 specimens NaN S W \n", + "5 bodies NaN Q O \n", + "6 feathers NaN S C \n", + "7 feathers NaN S U \n", + "8 feathers NaN S W \n", + "9 live NaN T C \n", + "10 eggs (live) kg T D \n", + "11 caviar g P I \n", + "12 live NaN T D \n", + "13 live NaN Z C \n", + "14 live NaN Z F \n", + "15 live NaN Z C \n", + "16 live NaN B C \n", + "17 live NaN Z C \n", + "18 bodies NaN E U \n", + "19 specimens NaN E C \n", + "20 live NaN Z C \n", + "21 live NaN Z C \n", + "22 bodies NaN E U \n", + "23 live NaN Z F \n", + "24 feathers NaN S I \n", + "25 live NaN Z I \n", + "26 bodies NaN S C \n", + "27 bodies NaN S C \n", + "28 bodies NaN Q C \n", + "29 live NaN Z I \n", + "... ... ... ... ... \n", + "75861 specimens NaN S W \n", + "75862 specimens NaN S W \n", + "75863 trophies NaN H W \n", + "75864 skin pieces NaN T W \n", + "75865 live NaN T C \n", + "75866 leather products (small) NaN T W \n", + "75867 leather products (small) NaN T W \n", + "75868 leather products (small) NaN T W \n", + "75869 leather products (small) NaN T W \n", + "75870 leather products (small) NaN T W \n", + "75871 live NaN T C \n", + "75872 leather products (small) NaN T W \n", + "75873 live NaN T A \n", + "75874 specimens NaN S W \n", + "75875 live NaN T A \n", + "75876 raw corals kg S W \n", + "75877 skins NaN H W \n", + "75878 trophies NaN H W \n", + "75879 live NaN T F \n", + "75880 sawn wood m3 T W \n", + "75881 bones NaN S W \n", + "75882 hair NaN S W \n", + "75883 specimens NaN S W \n", + "75884 timber m3 T A \n", + "75885 timber m3 T A \n", + "75886 sawn wood m3 T A \n", + "75887 timber m3 T W \n", + "75888 specimens NaN T F \n", + "75889 musk kg T R \n", + "75890 musk kg T R \n", + "\n", + "[75891 rows x 16 columns]" + ] + }, + "execution_count": 344, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataframe = pd.read_csv(\"data/data.csv\", skipinitialspace=True, dtype={\n", + " 'Importer reported quantity': float,\n", + " 'Exporter reported quantity': float\n", + "})\n", + "\n", + "dataframe" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Formatting the data\n", + "The Year and App. columns probably aren't going to matter to us in how we classify these records, so let's drop those first...\n", + "We'll also remove any additional whitespace from the column names to make things easier to deal with later on..." + ] + }, + { + "cell_type": "code", + "execution_count": 345, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TaxonClassOrderFamilyGenusImporterExporterOriginImporter reported quantityExporter reported quantityTermUnitPurposeSource
0Aquila heliacaAvesFalconiformesAccipitridaeAquilaTRNLCZNaN1.0bodiesNaNTC
1Aquila heliacaAvesFalconiformesAccipitridaeAquilaXVRSRSNaN1.0bodiesNaNQO
2Haliaeetus albicillaAvesFalconiformesAccipitridaeHaliaeetusBENONaNNaN43.0feathersNaNSW
3Haliaeetus albicillaAvesFalconiformesAccipitridaeHaliaeetusBENONaNNaN43.0specimensNaNSW
4Haliaeetus albicillaAvesFalconiformesAccipitridaeHaliaeetusDKISNaN700.00NaNspecimensNaNSW
5Haliaeetus albicillaAvesFalconiformesAccipitridaeHaliaeetusXVRSRSNaN1.0bodiesNaNQO
6Harpia harpyjaAvesFalconiformesAccipitridaeHarpiaBRFRNaNNaN12.0feathersNaNSC
7Harpia harpyjaAvesFalconiformesAccipitridaeHarpiaBRFRNaNNaN4.0feathersNaNSU
8Harpia harpyjaAvesFalconiformesAccipitridaeHarpiaBRFRNaNNaN2.0feathersNaNSW
9Acipenser brevirostrumActinopteriAcipenseriformesAcipenseridaeAcipenserCHDENaNNaN4.0liveNaNTC
10Acipenser brevirostrumActinopteriAcipenseriformesAcipenseridaeAcipenserTWCANaNNaN3.0eggs (live)kgTD
11Acipenser sturioActinopteriAcipenseriformesAcipenseridaeAcipenserUSIRNaN100.00NaNcaviargPI
12Agave parvifloraNaNLilialesAgavaceaeAgaveUSTHNaNNaN1.0liveNaNTD
13Ailurus fulgensMammaliaCarnivoraAiluridaeAilurusAUNZNaNNaN2.0liveNaNZC
14Ailurus fulgensMammaliaCarnivoraAiluridaeAilurusCAUSNaN1.001.0liveNaNZF
15Ailurus fulgensMammaliaCarnivoraAiluridaeAilurusILDENaNNaN2.0liveNaNZC
16Ailurus fulgensMammaliaCarnivoraAiluridaeAilurusJPUSNaN1.00NaNliveNaNBC
17Ailurus fulgensMammaliaCarnivoraAiluridaeAilurusJPUSNaNNaN1.0liveNaNZC
18Ailurus fulgensMammaliaCarnivoraAiluridaeAilurusKPCNNaNNaN1.0bodiesNaNEU
19Ailurus fulgensMammaliaCarnivoraAiluridaeAilurusKRCNNaNNaN1.0specimensNaNEC
20Ailurus fulgensMammaliaCarnivoraAiluridaeAilurusKRJPNaNNaN1.0liveNaNZC
21Ailurus fulgensMammaliaCarnivoraAiluridaeAilurusUSCANaN5.005.0liveNaNZC
22Alligator sinensisReptiliaCrocodyliaAlligatoridaeAlligatorKPCNNaNNaN1.0bodiesNaNEU
23Melanosuchus nigerReptiliaCrocodyliaAlligatoridaeMelanosuchusUSDKNaN10.00NaNliveNaNZF
24Anas laysanensisAvesAnseriformesAnatidaeAnasCAUSXXNaN3.0feathersNaNSI
25Anas laysanensisAvesAnseriformesAnatidaeAnasMCFRXX2.00NaNliveNaNZI
26Asarcornis scutulataAvesAnseriformesAnatidaeAsarcornisGBJEGB1.001.0bodiesNaNSC
27Asarcornis scutulataAvesAnseriformesAnatidaeAsarcornisGBJENaN1.002.0bodiesNaNSC
28Branta sandvicensisAvesAnseriformesAnatidaeBrantaCNGBNaN1.00NaNbodiesNaNQC
29Branta sandvicensisAvesAnseriformesAnatidaeBrantaMCFRXX2.00NaNliveNaNZI
.............................................
75861Thalurania furcataAvesApodiformesTrochilidaeThaluraniaUSPENaNNaN22.0specimensNaNSW
75862Threnetes nigerAvesApodiformesTrochilidaeThrenetesUSPENaNNaN16.0specimensNaNSW
75863Ursus arctosMammaliaCarnivoraUrsidaeUrsusBGRUNaN1.00NaNtrophiesNaNHW
75864Varanus niloticusReptiliaSauriaVaranidaeVaranusMGFRTD459.00NaNskin piecesNaNTW
75865Varanus salvatorReptiliaSauriaVaranidaeVaranusCRUSNaN1.00NaNliveNaNTC
75866Varanus salvatorReptiliaSauriaVaranidaeVaranusMOFRID1.00NaNleather products (small)NaNTW
75867Varanus salvatorReptiliaSauriaVaranidaeVaranusMOFRMY2.00NaNleather products (small)NaNTW
75868Varanus salvatorReptiliaSauriaVaranidaeVaranusMOGBID2.00NaNleather products (small)NaNTW
75869Varanus salvatorReptiliaSauriaVaranidaeVaranusMOITID8.00NaNleather products (small)NaNTW
75870Varanus salvatorReptiliaSauriaVaranidaeVaranusUSCOIDNaN8.0leather products (small)NaNTW
75871Varanus salvatorReptiliaSauriaVaranidaeVaranusUSCRNaNNaN21.0liveNaNTC
75872Varanus salvatorReptiliaSauriaVaranidaeVaranusXXCOIDNaN1.0leather products (small)NaNTW
75873Dioon spinulosumNaNCycadalesZamiaceaeDioonNLCRNaNNaN1500.0liveNaNTA
75874Zamiaceae spp.NaNCycadalesZamiaceaeNaNDEAONaNNaN2.0specimensNaNSW
75875Zamia integrifoliaNaNCycadalesZamiaceaeZamiaNLCRNaNNaN1500.0liveNaNTA
75876Scleractinia spp.AnthozoaScleractiniaNaNNaNDEAONaNNaN50.0raw coralskgSW
75877Canis aureusMammaliaCarnivoraCanidaeCanisDEETNaNNaN1.0skinsNaNHW
75878Canis aureusMammaliaCarnivoraCanidaeCanisUSETNaNNaN7.0trophiesNaNHW
75879Sarcoramphus papaAvesFalconiformesCathartidaeSarcoramphusJPPENaNNaN2.0liveNaNTF
75880Dipteryx panamensisNaNFabalesLeguminosaeDipteryxCRNINaN19.55NaNsawn woodm3TW
75881Choloepus hoffmanniMammaliaPilosaMegalonychidaeCholoepusUSCRNaNNaN5.0bonesNaNSW
75882Choloepus hoffmanniMammaliaPilosaMegalonychidaeCholoepusUSCRNaNNaN96.0hairNaNSW
75883Choloepus hoffmanniMammaliaPilosaMegalonychidaeCholoepusUSCRNaNNaN452.0specimensNaNSW
75884Cedrela odorataNaNSapindalesMeliaceaeCedrelaCRINNaN20.30NaNtimberm3TA
75885Cedrela odorataNaNSapindalesMeliaceaeCedrelaCRPRNaN17.66NaNtimberm3TA
75886Cedrela odorataNaNSapindalesMeliaceaeCedrelaCRUSNaN8.67NaNsawn woodm3TA
75887Cedrela odorataNaNSapindalesMeliaceaeCedrelaINCRNaNNaN20.3timberm3TW
75888Daboia russeliiReptiliaSerpentesViperidaeDaboiaRSITUS200.00NaNspecimensNaNTF
75889Civettictis civettaMammaliaCarnivoraViverridaeCivettictisFRETNaNNaN635.1muskkgTR
75890Civettictis civettaMammaliaCarnivoraViverridaeCivettictisKRETNaNNaN480.0muskkgTR
\n", + "

75891 rows × 14 columns

\n", + "
" + ], + "text/plain": [ + " Taxon Class Order Family \\\n", + "0 Aquila heliaca Aves Falconiformes Accipitridae \n", + "1 Aquila heliaca Aves Falconiformes Accipitridae \n", + "2 Haliaeetus albicilla Aves Falconiformes Accipitridae \n", + "3 Haliaeetus albicilla Aves Falconiformes Accipitridae \n", + "4 Haliaeetus albicilla Aves Falconiformes Accipitridae \n", + "5 Haliaeetus albicilla Aves Falconiformes Accipitridae \n", + "6 Harpia harpyja Aves Falconiformes Accipitridae \n", + "7 Harpia harpyja Aves Falconiformes Accipitridae \n", + "8 Harpia harpyja Aves Falconiformes Accipitridae \n", + "9 Acipenser brevirostrum Actinopteri Acipenseriformes Acipenseridae \n", + "10 Acipenser brevirostrum Actinopteri Acipenseriformes Acipenseridae \n", + "11 Acipenser sturio Actinopteri Acipenseriformes Acipenseridae \n", + "12 Agave parviflora NaN Liliales Agavaceae \n", + "13 Ailurus fulgens Mammalia Carnivora Ailuridae \n", + "14 Ailurus fulgens Mammalia Carnivora Ailuridae \n", + "15 Ailurus fulgens Mammalia Carnivora Ailuridae \n", + "16 Ailurus fulgens Mammalia Carnivora Ailuridae \n", + "17 Ailurus fulgens Mammalia Carnivora Ailuridae \n", + "18 Ailurus fulgens Mammalia Carnivora Ailuridae \n", + "19 Ailurus fulgens Mammalia Carnivora Ailuridae \n", + "20 Ailurus fulgens Mammalia Carnivora Ailuridae \n", + "21 Ailurus fulgens Mammalia Carnivora Ailuridae \n", + "22 Alligator sinensis Reptilia Crocodylia Alligatoridae \n", + "23 Melanosuchus niger Reptilia Crocodylia Alligatoridae \n", + "24 Anas laysanensis Aves Anseriformes Anatidae \n", + "25 Anas laysanensis Aves Anseriformes Anatidae \n", + "26 Asarcornis scutulata Aves Anseriformes Anatidae \n", + "27 Asarcornis scutulata Aves Anseriformes Anatidae \n", + "28 Branta sandvicensis Aves Anseriformes Anatidae \n", + "29 Branta sandvicensis Aves Anseriformes Anatidae \n", + "... ... ... ... ... \n", + "75861 Thalurania furcata Aves Apodiformes Trochilidae \n", + "75862 Threnetes niger Aves Apodiformes Trochilidae \n", + "75863 Ursus arctos Mammalia Carnivora Ursidae \n", + "75864 Varanus niloticus Reptilia Sauria Varanidae \n", + "75865 Varanus salvator Reptilia Sauria Varanidae \n", + "75866 Varanus salvator Reptilia Sauria Varanidae \n", + "75867 Varanus salvator Reptilia Sauria Varanidae \n", + "75868 Varanus salvator Reptilia Sauria Varanidae \n", + "75869 Varanus salvator Reptilia Sauria Varanidae \n", + "75870 Varanus salvator Reptilia Sauria Varanidae \n", + "75871 Varanus salvator Reptilia Sauria Varanidae \n", + "75872 Varanus salvator Reptilia Sauria Varanidae \n", + "75873 Dioon spinulosum NaN Cycadales Zamiaceae \n", + "75874 Zamiaceae spp. NaN Cycadales Zamiaceae \n", + "75875 Zamia integrifolia NaN Cycadales Zamiaceae \n", + "75876 Scleractinia spp. Anthozoa Scleractinia NaN \n", + "75877 Canis aureus Mammalia Carnivora Canidae \n", + "75878 Canis aureus Mammalia Carnivora Canidae \n", + "75879 Sarcoramphus papa Aves Falconiformes Cathartidae \n", + "75880 Dipteryx panamensis NaN Fabales Leguminosae \n", + "75881 Choloepus hoffmanni Mammalia Pilosa Megalonychidae \n", + "75882 Choloepus hoffmanni Mammalia Pilosa Megalonychidae \n", + "75883 Choloepus hoffmanni Mammalia Pilosa Megalonychidae \n", + "75884 Cedrela odorata NaN Sapindales Meliaceae \n", + "75885 Cedrela odorata NaN Sapindales Meliaceae \n", + "75886 Cedrela odorata NaN Sapindales Meliaceae \n", + "75887 Cedrela odorata NaN Sapindales Meliaceae \n", + "75888 Daboia russelii Reptilia Serpentes Viperidae \n", + "75889 Civettictis civetta Mammalia Carnivora Viverridae \n", + "75890 Civettictis civetta Mammalia Carnivora Viverridae \n", + "\n", + " Genus Importer Exporter Origin Importer reported quantity \\\n", + "0 Aquila TR NL CZ NaN \n", + "1 Aquila XV RS RS NaN \n", + "2 Haliaeetus BE NO NaN NaN \n", + "3 Haliaeetus BE NO NaN NaN \n", + "4 Haliaeetus DK IS NaN 700.00 \n", + "5 Haliaeetus XV RS RS NaN \n", + "6 Harpia BR FR NaN NaN \n", + "7 Harpia BR FR NaN NaN \n", + "8 Harpia BR FR NaN NaN \n", + "9 Acipenser CH DE NaN NaN \n", + "10 Acipenser TW CA NaN NaN \n", + "11 Acipenser US IR NaN 100.00 \n", + "12 Agave US TH NaN NaN \n", + "13 Ailurus AU NZ NaN NaN \n", + "14 Ailurus CA US NaN 1.00 \n", + "15 Ailurus IL DE NaN NaN \n", + "16 Ailurus JP US NaN 1.00 \n", + "17 Ailurus JP US NaN NaN \n", + "18 Ailurus KP CN NaN NaN \n", + "19 Ailurus KR CN NaN NaN \n", + "20 Ailurus KR JP NaN NaN \n", + "21 Ailurus US CA NaN 5.00 \n", + "22 Alligator KP CN NaN NaN \n", + "23 Melanosuchus US DK NaN 10.00 \n", + "24 Anas CA US XX NaN \n", + "25 Anas MC FR XX 2.00 \n", + "26 Asarcornis GB JE GB 1.00 \n", + "27 Asarcornis GB JE NaN 1.00 \n", + "28 Branta CN GB NaN 1.00 \n", + "29 Branta MC FR XX 2.00 \n", + "... ... ... ... ... ... \n", + "75861 Thalurania US PE NaN NaN \n", + "75862 Threnetes US PE NaN NaN \n", + "75863 Ursus BG RU NaN 1.00 \n", + "75864 Varanus MG FR TD 459.00 \n", + "75865 Varanus CR US NaN 1.00 \n", + "75866 Varanus MO FR ID 1.00 \n", + "75867 Varanus MO FR MY 2.00 \n", + "75868 Varanus MO GB ID 2.00 \n", + "75869 Varanus MO IT ID 8.00 \n", + "75870 Varanus US CO ID NaN \n", + "75871 Varanus US CR NaN NaN \n", + "75872 Varanus XX CO ID NaN \n", + "75873 Dioon NL CR NaN NaN \n", + "75874 NaN DE AO NaN NaN \n", + "75875 Zamia NL CR NaN NaN \n", + "75876 NaN DE AO NaN NaN \n", + "75877 Canis DE ET NaN NaN \n", + "75878 Canis US ET NaN NaN \n", + "75879 Sarcoramphus JP PE NaN NaN \n", + "75880 Dipteryx CR NI NaN 19.55 \n", + "75881 Choloepus US CR NaN NaN \n", + "75882 Choloepus US CR NaN NaN \n", + "75883 Choloepus US CR NaN NaN \n", + "75884 Cedrela CR IN NaN 20.30 \n", + "75885 Cedrela CR PR NaN 17.66 \n", + "75886 Cedrela CR US NaN 8.67 \n", + "75887 Cedrela IN CR NaN NaN \n", + "75888 Daboia RS IT US 200.00 \n", + "75889 Civettictis FR ET NaN NaN \n", + "75890 Civettictis KR ET NaN NaN \n", + "\n", + " Exporter reported quantity Term Unit Purpose \\\n", + "0 1.0 bodies NaN T \n", + "1 1.0 bodies NaN Q \n", + "2 43.0 feathers NaN S \n", + "3 43.0 specimens NaN S \n", + "4 NaN specimens NaN S \n", + "5 1.0 bodies NaN Q \n", + "6 12.0 feathers NaN S \n", + "7 4.0 feathers NaN S \n", + "8 2.0 feathers NaN S \n", + "9 4.0 live NaN T \n", + "10 3.0 eggs (live) kg T \n", + "11 NaN caviar g P \n", + "12 1.0 live NaN T \n", + "13 2.0 live NaN Z \n", + "14 1.0 live NaN Z \n", + "15 2.0 live NaN Z \n", + "16 NaN live NaN B \n", + "17 1.0 live NaN Z \n", + "18 1.0 bodies NaN E \n", + "19 1.0 specimens NaN E \n", + "20 1.0 live NaN Z \n", + "21 5.0 live NaN Z \n", + "22 1.0 bodies NaN E \n", + "23 NaN live NaN Z \n", + "24 3.0 feathers NaN S \n", + "25 NaN live NaN Z \n", + "26 1.0 bodies NaN S \n", + "27 2.0 bodies NaN S \n", + "28 NaN bodies NaN Q \n", + "29 NaN live NaN Z \n", + "... ... ... ... ... \n", + "75861 22.0 specimens NaN S \n", + "75862 16.0 specimens NaN S \n", + "75863 NaN trophies NaN H \n", + "75864 NaN skin pieces NaN T \n", + "75865 NaN live NaN T \n", + "75866 NaN leather products (small) NaN T \n", + "75867 NaN leather products (small) NaN T \n", + "75868 NaN leather products (small) NaN T \n", + "75869 NaN leather products (small) NaN T \n", + "75870 8.0 leather products (small) NaN T \n", + "75871 21.0 live NaN T \n", + "75872 1.0 leather products (small) NaN T \n", + "75873 1500.0 live NaN T \n", + "75874 2.0 specimens NaN S \n", + "75875 1500.0 live NaN T \n", + "75876 50.0 raw corals kg S \n", + "75877 1.0 skins NaN H \n", + "75878 7.0 trophies NaN H \n", + "75879 2.0 live NaN T \n", + "75880 NaN sawn wood m3 T \n", + "75881 5.0 bones NaN S \n", + "75882 96.0 hair NaN S \n", + "75883 452.0 specimens NaN S \n", + "75884 NaN timber m3 T \n", + "75885 NaN timber m3 T \n", + "75886 NaN sawn wood m3 T \n", + "75887 20.3 timber m3 T \n", + "75888 NaN specimens NaN T \n", + "75889 635.1 musk kg T \n", + "75890 480.0 musk kg T \n", + "\n", + " Source \n", + "0 C \n", + "1 O \n", + "2 W \n", + "3 W \n", + "4 W \n", + "5 O \n", + "6 C \n", + "7 U \n", + "8 W \n", + "9 C \n", + "10 D \n", + "11 I \n", + "12 D \n", + "13 C \n", + "14 F \n", + "15 C \n", + "16 C \n", + "17 C \n", + "18 U \n", + "19 C \n", + "20 C \n", + "21 C \n", + "22 U \n", + "23 F \n", + "24 I \n", + "25 I \n", + "26 C \n", + "27 C \n", + "28 C \n", + "29 I \n", + "... ... \n", + "75861 W \n", + "75862 W \n", + "75863 W \n", + "75864 W \n", + "75865 C \n", + "75866 W \n", + "75867 W \n", + "75868 W \n", + "75869 W \n", + "75870 W \n", + "75871 C \n", + "75872 W \n", + "75873 A \n", + "75874 W \n", + "75875 A \n", + "75876 W \n", + "75877 W \n", + "75878 W \n", + "75879 F \n", + "75880 W \n", + "75881 W \n", + "75882 W \n", + "75883 W \n", + "75884 A \n", + "75885 A \n", + "75886 A \n", + "75887 W \n", + "75888 F \n", + "75889 R \n", + "75890 R \n", + "\n", + "[75891 rows x 14 columns]" + ] + }, + "execution_count": 345, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataframe.columns = dataframe.columns.str.strip()\n", + "dataframe = dataframe.drop(columns=['Year', 'App.'])\n", + "\n", + "dataframe" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We're going to have to replace those NaN values in the reported quantities columns..." + ] + }, + { + "cell_type": "code", + "execution_count": 346, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TaxonClassOrderFamilyGenusImporterExporterOriginImporter reported quantityExporter reported quantityTermUnitPurposeSource
0Aquila heliacaAvesFalconiformesAccipitridaeAquilaTRNLCZ0.001.0bodiesNaNTC
1Aquila heliacaAvesFalconiformesAccipitridaeAquilaXVRSRS0.001.0bodiesNaNQO
2Haliaeetus albicillaAvesFalconiformesAccipitridaeHaliaeetusBENONaN0.0043.0feathersNaNSW
3Haliaeetus albicillaAvesFalconiformesAccipitridaeHaliaeetusBENONaN0.0043.0specimensNaNSW
4Haliaeetus albicillaAvesFalconiformesAccipitridaeHaliaeetusDKISNaN700.000.0specimensNaNSW
5Haliaeetus albicillaAvesFalconiformesAccipitridaeHaliaeetusXVRSRS0.001.0bodiesNaNQO
6Harpia harpyjaAvesFalconiformesAccipitridaeHarpiaBRFRNaN0.0012.0feathersNaNSC
7Harpia harpyjaAvesFalconiformesAccipitridaeHarpiaBRFRNaN0.004.0feathersNaNSU
8Harpia harpyjaAvesFalconiformesAccipitridaeHarpiaBRFRNaN0.002.0feathersNaNSW
9Acipenser brevirostrumActinopteriAcipenseriformesAcipenseridaeAcipenserCHDENaN0.004.0liveNaNTC
10Acipenser brevirostrumActinopteriAcipenseriformesAcipenseridaeAcipenserTWCANaN0.003.0eggs (live)kgTD
11Acipenser sturioActinopteriAcipenseriformesAcipenseridaeAcipenserUSIRNaN100.000.0caviargPI
12Agave parvifloraNaNLilialesAgavaceaeAgaveUSTHNaN0.001.0liveNaNTD
13Ailurus fulgensMammaliaCarnivoraAiluridaeAilurusAUNZNaN0.002.0liveNaNZC
14Ailurus fulgensMammaliaCarnivoraAiluridaeAilurusCAUSNaN1.001.0liveNaNZF
15Ailurus fulgensMammaliaCarnivoraAiluridaeAilurusILDENaN0.002.0liveNaNZC
16Ailurus fulgensMammaliaCarnivoraAiluridaeAilurusJPUSNaN1.000.0liveNaNBC
17Ailurus fulgensMammaliaCarnivoraAiluridaeAilurusJPUSNaN0.001.0liveNaNZC
18Ailurus fulgensMammaliaCarnivoraAiluridaeAilurusKPCNNaN0.001.0bodiesNaNEU
19Ailurus fulgensMammaliaCarnivoraAiluridaeAilurusKRCNNaN0.001.0specimensNaNEC
20Ailurus fulgensMammaliaCarnivoraAiluridaeAilurusKRJPNaN0.001.0liveNaNZC
21Ailurus fulgensMammaliaCarnivoraAiluridaeAilurusUSCANaN5.005.0liveNaNZC
22Alligator sinensisReptiliaCrocodyliaAlligatoridaeAlligatorKPCNNaN0.001.0bodiesNaNEU
23Melanosuchus nigerReptiliaCrocodyliaAlligatoridaeMelanosuchusUSDKNaN10.000.0liveNaNZF
24Anas laysanensisAvesAnseriformesAnatidaeAnasCAUSXX0.003.0feathersNaNSI
25Anas laysanensisAvesAnseriformesAnatidaeAnasMCFRXX2.000.0liveNaNZI
26Asarcornis scutulataAvesAnseriformesAnatidaeAsarcornisGBJEGB1.001.0bodiesNaNSC
27Asarcornis scutulataAvesAnseriformesAnatidaeAsarcornisGBJENaN1.002.0bodiesNaNSC
28Branta sandvicensisAvesAnseriformesAnatidaeBrantaCNGBNaN1.000.0bodiesNaNQC
29Branta sandvicensisAvesAnseriformesAnatidaeBrantaMCFRXX2.000.0liveNaNZI
.............................................
75861Thalurania furcataAvesApodiformesTrochilidaeThaluraniaUSPENaN0.0022.0specimensNaNSW
75862Threnetes nigerAvesApodiformesTrochilidaeThrenetesUSPENaN0.0016.0specimensNaNSW
75863Ursus arctosMammaliaCarnivoraUrsidaeUrsusBGRUNaN1.000.0trophiesNaNHW
75864Varanus niloticusReptiliaSauriaVaranidaeVaranusMGFRTD459.000.0skin piecesNaNTW
75865Varanus salvatorReptiliaSauriaVaranidaeVaranusCRUSNaN1.000.0liveNaNTC
75866Varanus salvatorReptiliaSauriaVaranidaeVaranusMOFRID1.000.0leather products (small)NaNTW
75867Varanus salvatorReptiliaSauriaVaranidaeVaranusMOFRMY2.000.0leather products (small)NaNTW
75868Varanus salvatorReptiliaSauriaVaranidaeVaranusMOGBID2.000.0leather products (small)NaNTW
75869Varanus salvatorReptiliaSauriaVaranidaeVaranusMOITID8.000.0leather products (small)NaNTW
75870Varanus salvatorReptiliaSauriaVaranidaeVaranusUSCOID0.008.0leather products (small)NaNTW
75871Varanus salvatorReptiliaSauriaVaranidaeVaranusUSCRNaN0.0021.0liveNaNTC
75872Varanus salvatorReptiliaSauriaVaranidaeVaranusXXCOID0.001.0leather products (small)NaNTW
75873Dioon spinulosumNaNCycadalesZamiaceaeDioonNLCRNaN0.001500.0liveNaNTA
75874Zamiaceae spp.NaNCycadalesZamiaceaeNaNDEAONaN0.002.0specimensNaNSW
75875Zamia integrifoliaNaNCycadalesZamiaceaeZamiaNLCRNaN0.001500.0liveNaNTA
75876Scleractinia spp.AnthozoaScleractiniaNaNNaNDEAONaN0.0050.0raw coralskgSW
75877Canis aureusMammaliaCarnivoraCanidaeCanisDEETNaN0.001.0skinsNaNHW
75878Canis aureusMammaliaCarnivoraCanidaeCanisUSETNaN0.007.0trophiesNaNHW
75879Sarcoramphus papaAvesFalconiformesCathartidaeSarcoramphusJPPENaN0.002.0liveNaNTF
75880Dipteryx panamensisNaNFabalesLeguminosaeDipteryxCRNINaN19.550.0sawn woodm3TW
75881Choloepus hoffmanniMammaliaPilosaMegalonychidaeCholoepusUSCRNaN0.005.0bonesNaNSW
75882Choloepus hoffmanniMammaliaPilosaMegalonychidaeCholoepusUSCRNaN0.0096.0hairNaNSW
75883Choloepus hoffmanniMammaliaPilosaMegalonychidaeCholoepusUSCRNaN0.00452.0specimensNaNSW
75884Cedrela odorataNaNSapindalesMeliaceaeCedrelaCRINNaN20.300.0timberm3TA
75885Cedrela odorataNaNSapindalesMeliaceaeCedrelaCRPRNaN17.660.0timberm3TA
75886Cedrela odorataNaNSapindalesMeliaceaeCedrelaCRUSNaN8.670.0sawn woodm3TA
75887Cedrela odorataNaNSapindalesMeliaceaeCedrelaINCRNaN0.0020.3timberm3TW
75888Daboia russeliiReptiliaSerpentesViperidaeDaboiaRSITUS200.000.0specimensNaNTF
75889Civettictis civettaMammaliaCarnivoraViverridaeCivettictisFRETNaN0.00635.1muskkgTR
75890Civettictis civettaMammaliaCarnivoraViverridaeCivettictisKRETNaN0.00480.0muskkgTR
\n", + "

75891 rows × 14 columns

\n", + "
" + ], + "text/plain": [ + " Taxon Class Order Family \\\n", + "0 Aquila heliaca Aves Falconiformes Accipitridae \n", + "1 Aquila heliaca Aves Falconiformes Accipitridae \n", + "2 Haliaeetus albicilla Aves Falconiformes Accipitridae \n", + "3 Haliaeetus albicilla Aves Falconiformes Accipitridae \n", + "4 Haliaeetus albicilla Aves Falconiformes Accipitridae \n", + "5 Haliaeetus albicilla Aves Falconiformes Accipitridae \n", + "6 Harpia harpyja Aves Falconiformes Accipitridae \n", + "7 Harpia harpyja Aves Falconiformes Accipitridae \n", + "8 Harpia harpyja Aves Falconiformes Accipitridae \n", + "9 Acipenser brevirostrum Actinopteri Acipenseriformes Acipenseridae \n", + "10 Acipenser brevirostrum Actinopteri Acipenseriformes Acipenseridae \n", + "11 Acipenser sturio Actinopteri Acipenseriformes Acipenseridae \n", + "12 Agave parviflora NaN Liliales Agavaceae \n", + "13 Ailurus fulgens Mammalia Carnivora Ailuridae \n", + "14 Ailurus fulgens Mammalia Carnivora Ailuridae \n", + "15 Ailurus fulgens Mammalia Carnivora Ailuridae \n", + "16 Ailurus fulgens Mammalia Carnivora Ailuridae \n", + "17 Ailurus fulgens Mammalia Carnivora Ailuridae \n", + "18 Ailurus fulgens Mammalia Carnivora Ailuridae \n", + "19 Ailurus fulgens Mammalia Carnivora Ailuridae \n", + "20 Ailurus fulgens Mammalia Carnivora Ailuridae \n", + "21 Ailurus fulgens Mammalia Carnivora Ailuridae \n", + "22 Alligator sinensis Reptilia Crocodylia Alligatoridae \n", + "23 Melanosuchus niger Reptilia Crocodylia Alligatoridae \n", + "24 Anas laysanensis Aves Anseriformes Anatidae \n", + "25 Anas laysanensis Aves Anseriformes Anatidae \n", + "26 Asarcornis scutulata Aves Anseriformes Anatidae \n", + "27 Asarcornis scutulata Aves Anseriformes Anatidae \n", + "28 Branta sandvicensis Aves Anseriformes Anatidae \n", + "29 Branta sandvicensis Aves Anseriformes Anatidae \n", + "... ... ... ... ... \n", + "75861 Thalurania furcata Aves Apodiformes Trochilidae \n", + "75862 Threnetes niger Aves Apodiformes Trochilidae \n", + "75863 Ursus arctos Mammalia Carnivora Ursidae \n", + "75864 Varanus niloticus Reptilia Sauria Varanidae \n", + "75865 Varanus salvator Reptilia Sauria Varanidae \n", + "75866 Varanus salvator Reptilia Sauria Varanidae \n", + "75867 Varanus salvator Reptilia Sauria Varanidae \n", + "75868 Varanus salvator Reptilia Sauria Varanidae \n", + "75869 Varanus salvator Reptilia Sauria Varanidae \n", + "75870 Varanus salvator Reptilia Sauria Varanidae \n", + "75871 Varanus salvator Reptilia Sauria Varanidae \n", + "75872 Varanus salvator Reptilia Sauria Varanidae \n", + "75873 Dioon spinulosum NaN Cycadales Zamiaceae \n", + "75874 Zamiaceae spp. NaN Cycadales Zamiaceae \n", + "75875 Zamia integrifolia NaN Cycadales Zamiaceae \n", + "75876 Scleractinia spp. Anthozoa Scleractinia NaN \n", + "75877 Canis aureus Mammalia Carnivora Canidae \n", + "75878 Canis aureus Mammalia Carnivora Canidae \n", + "75879 Sarcoramphus papa Aves Falconiformes Cathartidae \n", + "75880 Dipteryx panamensis NaN Fabales Leguminosae \n", + "75881 Choloepus hoffmanni Mammalia Pilosa Megalonychidae \n", + "75882 Choloepus hoffmanni Mammalia Pilosa Megalonychidae \n", + "75883 Choloepus hoffmanni Mammalia Pilosa Megalonychidae \n", + "75884 Cedrela odorata NaN Sapindales Meliaceae \n", + "75885 Cedrela odorata NaN Sapindales Meliaceae \n", + "75886 Cedrela odorata NaN Sapindales Meliaceae \n", + "75887 Cedrela odorata NaN Sapindales Meliaceae \n", + "75888 Daboia russelii Reptilia Serpentes Viperidae \n", + "75889 Civettictis civetta Mammalia Carnivora Viverridae \n", + "75890 Civettictis civetta Mammalia Carnivora Viverridae \n", + "\n", + " Genus Importer Exporter Origin Importer reported quantity \\\n", + "0 Aquila TR NL CZ 0.00 \n", + "1 Aquila XV RS RS 0.00 \n", + "2 Haliaeetus BE NO NaN 0.00 \n", + "3 Haliaeetus BE NO NaN 0.00 \n", + "4 Haliaeetus DK IS NaN 700.00 \n", + "5 Haliaeetus XV RS RS 0.00 \n", + "6 Harpia BR FR NaN 0.00 \n", + "7 Harpia BR FR NaN 0.00 \n", + "8 Harpia BR FR NaN 0.00 \n", + "9 Acipenser CH DE NaN 0.00 \n", + "10 Acipenser TW CA NaN 0.00 \n", + "11 Acipenser US IR NaN 100.00 \n", + "12 Agave US TH NaN 0.00 \n", + "13 Ailurus AU NZ NaN 0.00 \n", + "14 Ailurus CA US NaN 1.00 \n", + "15 Ailurus IL DE NaN 0.00 \n", + "16 Ailurus JP US NaN 1.00 \n", + "17 Ailurus JP US NaN 0.00 \n", + "18 Ailurus KP CN NaN 0.00 \n", + "19 Ailurus KR CN NaN 0.00 \n", + "20 Ailurus KR JP NaN 0.00 \n", + "21 Ailurus US CA NaN 5.00 \n", + "22 Alligator KP CN NaN 0.00 \n", + "23 Melanosuchus US DK NaN 10.00 \n", + "24 Anas CA US XX 0.00 \n", + "25 Anas MC FR XX 2.00 \n", + "26 Asarcornis GB JE GB 1.00 \n", + "27 Asarcornis GB JE NaN 1.00 \n", + "28 Branta CN GB NaN 1.00 \n", + "29 Branta MC FR XX 2.00 \n", + "... ... ... ... ... ... \n", + "75861 Thalurania US PE NaN 0.00 \n", + "75862 Threnetes US PE NaN 0.00 \n", + "75863 Ursus BG RU NaN 1.00 \n", + "75864 Varanus MG FR TD 459.00 \n", + "75865 Varanus CR US NaN 1.00 \n", + "75866 Varanus MO FR ID 1.00 \n", + "75867 Varanus MO FR MY 2.00 \n", + "75868 Varanus MO GB ID 2.00 \n", + "75869 Varanus MO IT ID 8.00 \n", + "75870 Varanus US CO ID 0.00 \n", + "75871 Varanus US CR NaN 0.00 \n", + "75872 Varanus XX CO ID 0.00 \n", + "75873 Dioon NL CR NaN 0.00 \n", + "75874 NaN DE AO NaN 0.00 \n", + "75875 Zamia NL CR NaN 0.00 \n", + "75876 NaN DE AO NaN 0.00 \n", + "75877 Canis DE ET NaN 0.00 \n", + "75878 Canis US ET NaN 0.00 \n", + "75879 Sarcoramphus JP PE NaN 0.00 \n", + "75880 Dipteryx CR NI NaN 19.55 \n", + "75881 Choloepus US CR NaN 0.00 \n", + "75882 Choloepus US CR NaN 0.00 \n", + "75883 Choloepus US CR NaN 0.00 \n", + "75884 Cedrela CR IN NaN 20.30 \n", + "75885 Cedrela CR PR NaN 17.66 \n", + "75886 Cedrela CR US NaN 8.67 \n", + "75887 Cedrela IN CR NaN 0.00 \n", + "75888 Daboia RS IT US 200.00 \n", + "75889 Civettictis FR ET NaN 0.00 \n", + "75890 Civettictis KR ET NaN 0.00 \n", + "\n", + " Exporter reported quantity Term Unit Purpose \\\n", + "0 1.0 bodies NaN T \n", + "1 1.0 bodies NaN Q \n", + "2 43.0 feathers NaN S \n", + "3 43.0 specimens NaN S \n", + "4 0.0 specimens NaN S \n", + "5 1.0 bodies NaN Q \n", + "6 12.0 feathers NaN S \n", + "7 4.0 feathers NaN S \n", + "8 2.0 feathers NaN S \n", + "9 4.0 live NaN T \n", + "10 3.0 eggs (live) kg T \n", + "11 0.0 caviar g P \n", + "12 1.0 live NaN T \n", + "13 2.0 live NaN Z \n", + "14 1.0 live NaN Z \n", + "15 2.0 live NaN Z \n", + "16 0.0 live NaN B \n", + "17 1.0 live NaN Z \n", + "18 1.0 bodies NaN E \n", + "19 1.0 specimens NaN E \n", + "20 1.0 live NaN Z \n", + "21 5.0 live NaN Z \n", + "22 1.0 bodies NaN E \n", + "23 0.0 live NaN Z \n", + "24 3.0 feathers NaN S \n", + "25 0.0 live NaN Z \n", + "26 1.0 bodies NaN S \n", + "27 2.0 bodies NaN S \n", + "28 0.0 bodies NaN Q \n", + "29 0.0 live NaN Z \n", + "... ... ... ... ... \n", + "75861 22.0 specimens NaN S \n", + "75862 16.0 specimens NaN S \n", + "75863 0.0 trophies NaN H \n", + "75864 0.0 skin pieces NaN T \n", + "75865 0.0 live NaN T \n", + "75866 0.0 leather products (small) NaN T \n", + "75867 0.0 leather products (small) NaN T \n", + "75868 0.0 leather products (small) NaN T \n", + "75869 0.0 leather products (small) NaN T \n", + "75870 8.0 leather products (small) NaN T \n", + "75871 21.0 live NaN T \n", + "75872 1.0 leather products (small) NaN T \n", + "75873 1500.0 live NaN T \n", + "75874 2.0 specimens NaN S \n", + "75875 1500.0 live NaN T \n", + "75876 50.0 raw corals kg S \n", + "75877 1.0 skins NaN H \n", + "75878 7.0 trophies NaN H \n", + "75879 2.0 live NaN T \n", + "75880 0.0 sawn wood m3 T \n", + "75881 5.0 bones NaN S \n", + "75882 96.0 hair NaN S \n", + "75883 452.0 specimens NaN S \n", + "75884 0.0 timber m3 T \n", + "75885 0.0 timber m3 T \n", + "75886 0.0 sawn wood m3 T \n", + "75887 20.3 timber m3 T \n", + "75888 0.0 specimens NaN T \n", + "75889 635.1 musk kg T \n", + "75890 480.0 musk kg T \n", + "\n", + " Source \n", + "0 C \n", + "1 O \n", + "2 W \n", + "3 W \n", + "4 W \n", + "5 O \n", + "6 C \n", + "7 U \n", + "8 W \n", + "9 C \n", + "10 D \n", + "11 I \n", + "12 D \n", + "13 C \n", + "14 F \n", + "15 C \n", + "16 C \n", + "17 C \n", + "18 U \n", + "19 C \n", + "20 C \n", + "21 C \n", + "22 U \n", + "23 F \n", + "24 I \n", + "25 I \n", + "26 C \n", + "27 C \n", + "28 C \n", + "29 I \n", + "... ... \n", + "75861 W \n", + "75862 W \n", + "75863 W \n", + "75864 W \n", + "75865 C \n", + "75866 W \n", + "75867 W \n", + "75868 W \n", + "75869 W \n", + "75870 W \n", + "75871 C \n", + "75872 W \n", + "75873 A \n", + "75874 W \n", + "75875 A \n", + "75876 W \n", + "75877 W \n", + "75878 W \n", + "75879 F \n", + "75880 W \n", + "75881 W \n", + "75882 W \n", + "75883 W \n", + "75884 A \n", + "75885 A \n", + "75886 A \n", + "75887 W \n", + "75888 F \n", + "75889 R \n", + "75890 R \n", + "\n", + "[75891 rows x 14 columns]" + ] + }, + "execution_count": 346, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataframe['Importer reported quantity'].fillna(0, inplace=True)\n", + "dataframe['Exporter reported quantity'].fillna(0, inplace=True)\n", + "\n", + "dataframe" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We have a lot of text data in the form of ISO country codes, and specialist categories. We'll need to encode these as one hot vectors in the next step so that our neural net can understand them. We'll grab a list of all the columns we'll need to encode first and remove the ones we don't want to encode (the numeric columns)" + ] + }, + { + "cell_type": "code", + "execution_count": 347, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of Columns: 14\n", + "['Taxon', 'Class', 'Order', 'Family', 'Genus', 'Importer', 'Exporter', 'Origin', 'Term', 'Unit', 'Source']\n" + ] + } + ], + "source": [ + "print(\"Number of Columns: \", len(dataframe.columns))\n", + "columns = list(dataframe.columns)\n", + "columns.remove('Importer reported quantity')\n", + "columns.remove('Exporter reported quantity')\n", + "columns.remove('Purpose')\n", + "print(columns)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Encoding our labels and data\n", + "\n", + "In order to test the performance of our neural net, we'll need to split up our data into the data, and their corresponding classifications. \n", + "\n", + "The purpose column will be what we are going to attempt to predict (notice we removed it from the list of columns we'd like to one hot encode already).\n", + "\n", + "Let's pop off our labels from our dataframe, and keep them separate..." + ] + }, + { + "cell_type": "code", + "execution_count": 348, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 T\n", + "1 Q\n", + "2 S\n", + "3 S\n", + "4 S\n", + "5 Q\n", + "6 S\n", + "7 S\n", + "8 S\n", + "9 T\n", + "10 T\n", + "11 P\n", + "12 T\n", + "13 Z\n", + "14 Z\n", + "15 Z\n", + "16 B\n", + "17 Z\n", + "18 E\n", + "19 E\n", + "20 Z\n", + "21 Z\n", + "22 E\n", + "23 Z\n", + "24 S\n", + "25 Z\n", + "26 S\n", + "27 S\n", + "28 Q\n", + "29 Z\n", + " ..\n", + "75861 S\n", + "75862 S\n", + "75863 H\n", + "75864 T\n", + "75865 T\n", + "75866 T\n", + "75867 T\n", + "75868 T\n", + "75869 T\n", + "75870 T\n", + "75871 T\n", + "75872 T\n", + "75873 T\n", + "75874 S\n", + "75875 T\n", + "75876 S\n", + "75877 H\n", + "75878 H\n", + "75879 T\n", + "75880 T\n", + "75881 S\n", + "75882 S\n", + "75883 S\n", + "75884 T\n", + "75885 T\n", + "75886 T\n", + "75887 T\n", + "75888 T\n", + "75889 T\n", + "75890 T\n", + "Name: Purpose, Length: 75891, dtype: object" + ] + }, + "execution_count": 348, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "labels = dataframe.pop('Purpose')\n", + "\n", + "labels" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We'll need to convert our classifications into one hot vectors..." + ] + }, + { + "cell_type": "code", + "execution_count": 349, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
BEGHLMNPQSTZ
0000000000010
1000000001000
2000000000100
3000000000100
4000000000100
5000000001000
6000000000100
7000000000100
8000000000100
9000000000010
10000000000010
11000000010000
12000000000010
13000000000001
14000000000001
15000000000001
16100000000000
17000000000001
18010000000000
19010000000000
20000000000001
21000000000001
22010000000000
23000000000001
24000000000100
25000000000001
26000000000100
27000000000100
28000000001000
29000000000001
.......................................
75861000000000100
75862000000000100
75863000100000000
75864000000000010
75865000000000010
75866000000000010
75867000000000010
75868000000000010
75869000000000010
75870000000000010
75871000000000010
75872000000000010
75873000000000010
75874000000000100
75875000000000010
75876000000000100
75877000100000000
75878000100000000
75879000000000010
75880000000000010
75881000000000100
75882000000000100
75883000000000100
75884000000000010
75885000000000010
75886000000000010
75887000000000010
75888000000000010
75889000000000010
75890000000000010
\n", + "

75891 rows × 12 columns

\n", + "
" + ], + "text/plain": [ + " B E G H L M N P Q S T Z\n", + "0 0 0 0 0 0 0 0 0 0 0 1 0\n", + "1 0 0 0 0 0 0 0 0 1 0 0 0\n", + "2 0 0 0 0 0 0 0 0 0 1 0 0\n", + "3 0 0 0 0 0 0 0 0 0 1 0 0\n", + "4 0 0 0 0 0 0 0 0 0 1 0 0\n", + "5 0 0 0 0 0 0 0 0 1 0 0 0\n", + "6 0 0 0 0 0 0 0 0 0 1 0 0\n", + "7 0 0 0 0 0 0 0 0 0 1 0 0\n", + "8 0 0 0 0 0 0 0 0 0 1 0 0\n", + "9 0 0 0 0 0 0 0 0 0 0 1 0\n", + "10 0 0 0 0 0 0 0 0 0 0 1 0\n", + "11 0 0 0 0 0 0 0 1 0 0 0 0\n", + "12 0 0 0 0 0 0 0 0 0 0 1 0\n", + "13 0 0 0 0 0 0 0 0 0 0 0 1\n", + "14 0 0 0 0 0 0 0 0 0 0 0 1\n", + "15 0 0 0 0 0 0 0 0 0 0 0 1\n", + "16 1 0 0 0 0 0 0 0 0 0 0 0\n", + "17 0 0 0 0 0 0 0 0 0 0 0 1\n", + "18 0 1 0 0 0 0 0 0 0 0 0 0\n", + "19 0 1 0 0 0 0 0 0 0 0 0 0\n", + "20 0 0 0 0 0 0 0 0 0 0 0 1\n", + "21 0 0 0 0 0 0 0 0 0 0 0 1\n", + "22 0 1 0 0 0 0 0 0 0 0 0 0\n", + "23 0 0 0 0 0 0 0 0 0 0 0 1\n", + "24 0 0 0 0 0 0 0 0 0 1 0 0\n", + "25 0 0 0 0 0 0 0 0 0 0 0 1\n", + "26 0 0 0 0 0 0 0 0 0 1 0 0\n", + "27 0 0 0 0 0 0 0 0 0 1 0 0\n", + "28 0 0 0 0 0 0 0 0 1 0 0 0\n", + "29 0 0 0 0 0 0 0 0 0 0 0 1\n", + "... .. .. .. .. .. .. .. .. .. .. .. ..\n", + "75861 0 0 0 0 0 0 0 0 0 1 0 0\n", + "75862 0 0 0 0 0 0 0 0 0 1 0 0\n", + "75863 0 0 0 1 0 0 0 0 0 0 0 0\n", + "75864 0 0 0 0 0 0 0 0 0 0 1 0\n", + "75865 0 0 0 0 0 0 0 0 0 0 1 0\n", + "75866 0 0 0 0 0 0 0 0 0 0 1 0\n", + "75867 0 0 0 0 0 0 0 0 0 0 1 0\n", + "75868 0 0 0 0 0 0 0 0 0 0 1 0\n", + "75869 0 0 0 0 0 0 0 0 0 0 1 0\n", + "75870 0 0 0 0 0 0 0 0 0 0 1 0\n", + "75871 0 0 0 0 0 0 0 0 0 0 1 0\n", + "75872 0 0 0 0 0 0 0 0 0 0 1 0\n", + "75873 0 0 0 0 0 0 0 0 0 0 1 0\n", + "75874 0 0 0 0 0 0 0 0 0 1 0 0\n", + "75875 0 0 0 0 0 0 0 0 0 0 1 0\n", + "75876 0 0 0 0 0 0 0 0 0 1 0 0\n", + "75877 0 0 0 1 0 0 0 0 0 0 0 0\n", + "75878 0 0 0 1 0 0 0 0 0 0 0 0\n", + "75879 0 0 0 0 0 0 0 0 0 0 1 0\n", + "75880 0 0 0 0 0 0 0 0 0 0 1 0\n", + "75881 0 0 0 0 0 0 0 0 0 1 0 0\n", + "75882 0 0 0 0 0 0 0 0 0 1 0 0\n", + "75883 0 0 0 0 0 0 0 0 0 1 0 0\n", + "75884 0 0 0 0 0 0 0 0 0 0 1 0\n", + "75885 0 0 0 0 0 0 0 0 0 0 1 0\n", + "75886 0 0 0 0 0 0 0 0 0 0 1 0\n", + "75887 0 0 0 0 0 0 0 0 0 0 1 0\n", + "75888 0 0 0 0 0 0 0 0 0 0 1 0\n", + "75889 0 0 0 0 0 0 0 0 0 0 1 0\n", + "75890 0 0 0 0 0 0 0 0 0 0 1 0\n", + "\n", + "[75891 rows x 12 columns]" + ] + }, + "execution_count": 349, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "labels = pd.get_dummies(labels)\n", + "\n", + "labels" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next we'll create one hot vectors for the rest of our datatable and call this *data*" + ] + }, + { + "cell_type": "code", + "execution_count": 350, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Importer reported quantityExporter reported quantityTaxon_Abronia gramineaTaxon_Acampe papillosaTaxon_Acampe praemorsaTaxon_Acampe rigidaTaxon_Acampe spp.Taxon_Acanthastrea amakusensisTaxon_Acanthastrea bowerbankiTaxon_Acanthastrea echinata...Source_ASource_CSource_DSource_FSource_ISource_OSource_RSource_USource_WSource_X
00.001.000000000...0100000000
10.001.000000000...0000010000
20.0043.000000000...0000000010
30.0043.000000000...0000000010
4700.000.000000000...0000000010
50.001.000000000...0000010000
60.0012.000000000...0100000000
70.004.000000000...0000000100
80.002.000000000...0000000010
90.004.000000000...0100000000
100.003.000000000...0010000000
11100.000.000000000...0000100000
120.001.000000000...0010000000
130.002.000000000...0100000000
141.001.000000000...0001000000
150.002.000000000...0100000000
161.000.000000000...0100000000
170.001.000000000...0100000000
180.001.000000000...0000000100
190.001.000000000...0100000000
200.001.000000000...0100000000
215.005.000000000...0100000000
220.001.000000000...0000000100
2310.000.000000000...0001000000
240.003.000000000...0000100000
252.000.000000000...0000100000
261.001.000000000...0100000000
271.002.000000000...0100000000
281.000.000000000...0100000000
292.000.000000000...0000100000
..................................................................
758610.0022.000000000...0000000010
758620.0016.000000000...0000000010
758631.000.000000000...0000000010
75864459.000.000000000...0000000010
758651.000.000000000...0100000000
758661.000.000000000...0000000010
758672.000.000000000...0000000010
758682.000.000000000...0000000010
758698.000.000000000...0000000010
758700.008.000000000...0000000010
758710.0021.000000000...0100000000
758720.001.000000000...0000000010
758730.001500.000000000...1000000000
758740.002.000000000...0000000010
758750.001500.000000000...1000000000
758760.0050.000000000...0000000010
758770.001.000000000...0000000010
758780.007.000000000...0000000010
758790.002.000000000...0001000000
7588019.550.000000000...0000000010
758810.005.000000000...0000000010
758820.0096.000000000...0000000010
758830.00452.000000000...0000000010
7588420.300.000000000...1000000000
7588517.660.000000000...1000000000
758868.670.000000000...1000000000
758870.0020.300000000...0000000010
75888200.000.000000000...0001000000
758890.00635.100000000...0000001000
758900.00480.000000000...0000001000
\n", + "

75891 rows × 9400 columns

\n", + "
" + ], + "text/plain": [ + " Importer reported quantity Exporter reported quantity \\\n", + "0 0.00 1.0 \n", + "1 0.00 1.0 \n", + "2 0.00 43.0 \n", + "3 0.00 43.0 \n", + "4 700.00 0.0 \n", + "5 0.00 1.0 \n", + "6 0.00 12.0 \n", + "7 0.00 4.0 \n", + "8 0.00 2.0 \n", + "9 0.00 4.0 \n", + "10 0.00 3.0 \n", + "11 100.00 0.0 \n", + "12 0.00 1.0 \n", + "13 0.00 2.0 \n", + "14 1.00 1.0 \n", + "15 0.00 2.0 \n", + "16 1.00 0.0 \n", + "17 0.00 1.0 \n", + "18 0.00 1.0 \n", + "19 0.00 1.0 \n", + "20 0.00 1.0 \n", + "21 5.00 5.0 \n", + "22 0.00 1.0 \n", + "23 10.00 0.0 \n", + "24 0.00 3.0 \n", + "25 2.00 0.0 \n", + "26 1.00 1.0 \n", + "27 1.00 2.0 \n", + "28 1.00 0.0 \n", + "29 2.00 0.0 \n", + "... ... ... \n", + "75861 0.00 22.0 \n", + "75862 0.00 16.0 \n", + "75863 1.00 0.0 \n", + "75864 459.00 0.0 \n", + "75865 1.00 0.0 \n", + "75866 1.00 0.0 \n", + "75867 2.00 0.0 \n", + "75868 2.00 0.0 \n", + "75869 8.00 0.0 \n", + "75870 0.00 8.0 \n", + "75871 0.00 21.0 \n", + "75872 0.00 1.0 \n", + "75873 0.00 1500.0 \n", + "75874 0.00 2.0 \n", + "75875 0.00 1500.0 \n", + "75876 0.00 50.0 \n", + "75877 0.00 1.0 \n", + "75878 0.00 7.0 \n", + "75879 0.00 2.0 \n", + "75880 19.55 0.0 \n", + "75881 0.00 5.0 \n", + "75882 0.00 96.0 \n", + "75883 0.00 452.0 \n", + "75884 20.30 0.0 \n", + "75885 17.66 0.0 \n", + "75886 8.67 0.0 \n", + "75887 0.00 20.3 \n", + "75888 200.00 0.0 \n", + "75889 0.00 635.1 \n", + "75890 0.00 480.0 \n", + "\n", + " Taxon_Abronia graminea Taxon_Acampe papillosa Taxon_Acampe praemorsa \\\n", + "0 0 0 0 \n", + "1 0 0 0 \n", + "2 0 0 0 \n", + "3 0 0 0 \n", + "4 0 0 0 \n", + "5 0 0 0 \n", + "6 0 0 0 \n", + "7 0 0 0 \n", + "8 0 0 0 \n", + "9 0 0 0 \n", + "10 0 0 0 \n", + "11 0 0 0 \n", + "12 0 0 0 \n", + "13 0 0 0 \n", + "14 0 0 0 \n", + "15 0 0 0 \n", + "16 0 0 0 \n", + "17 0 0 0 \n", + "18 0 0 0 \n", + "19 0 0 0 \n", + "20 0 0 0 \n", + "21 0 0 0 \n", + "22 0 0 0 \n", + "23 0 0 0 \n", + "24 0 0 0 \n", + "25 0 0 0 \n", + "26 0 0 0 \n", + "27 0 0 0 \n", + "28 0 0 0 \n", + "29 0 0 0 \n", + "... ... ... ... \n", + "75861 0 0 0 \n", + "75862 0 0 0 \n", + "75863 0 0 0 \n", + "75864 0 0 0 \n", + "75865 0 0 0 \n", + "75866 0 0 0 \n", + "75867 0 0 0 \n", + "75868 0 0 0 \n", + "75869 0 0 0 \n", + "75870 0 0 0 \n", + "75871 0 0 0 \n", + "75872 0 0 0 \n", + "75873 0 0 0 \n", + "75874 0 0 0 \n", + "75875 0 0 0 \n", + "75876 0 0 0 \n", + "75877 0 0 0 \n", + "75878 0 0 0 \n", + "75879 0 0 0 \n", + "75880 0 0 0 \n", + "75881 0 0 0 \n", + "75882 0 0 0 \n", + "75883 0 0 0 \n", + "75884 0 0 0 \n", + "75885 0 0 0 \n", + "75886 0 0 0 \n", + "75887 0 0 0 \n", + "75888 0 0 0 \n", + "75889 0 0 0 \n", + "75890 0 0 0 \n", + "\n", + " Taxon_Acampe rigida Taxon_Acampe spp. Taxon_Acanthastrea amakusensis \\\n", + "0 0 0 0 \n", + "1 0 0 0 \n", + "2 0 0 0 \n", + "3 0 0 0 \n", + "4 0 0 0 \n", + "5 0 0 0 \n", + "6 0 0 0 \n", + "7 0 0 0 \n", + "8 0 0 0 \n", + "9 0 0 0 \n", + "10 0 0 0 \n", + "11 0 0 0 \n", + "12 0 0 0 \n", + "13 0 0 0 \n", + "14 0 0 0 \n", + "15 0 0 0 \n", + "16 0 0 0 \n", + "17 0 0 0 \n", + "18 0 0 0 \n", + "19 0 0 0 \n", + "20 0 0 0 \n", + "21 0 0 0 \n", + "22 0 0 0 \n", + "23 0 0 0 \n", + "24 0 0 0 \n", + "25 0 0 0 \n", + "26 0 0 0 \n", + "27 0 0 0 \n", + "28 0 0 0 \n", + "29 0 0 0 \n", + "... ... ... ... \n", + "75861 0 0 0 \n", + "75862 0 0 0 \n", + "75863 0 0 0 \n", + "75864 0 0 0 \n", + "75865 0 0 0 \n", + "75866 0 0 0 \n", + "75867 0 0 0 \n", + "75868 0 0 0 \n", + "75869 0 0 0 \n", + "75870 0 0 0 \n", + "75871 0 0 0 \n", + "75872 0 0 0 \n", + "75873 0 0 0 \n", + "75874 0 0 0 \n", + "75875 0 0 0 \n", + "75876 0 0 0 \n", + "75877 0 0 0 \n", + "75878 0 0 0 \n", + "75879 0 0 0 \n", + "75880 0 0 0 \n", + "75881 0 0 0 \n", + "75882 0 0 0 \n", + "75883 0 0 0 \n", + "75884 0 0 0 \n", + "75885 0 0 0 \n", + "75886 0 0 0 \n", + "75887 0 0 0 \n", + "75888 0 0 0 \n", + "75889 0 0 0 \n", + "75890 0 0 0 \n", + "\n", + " Taxon_Acanthastrea bowerbanki Taxon_Acanthastrea echinata ... \\\n", + "0 0 0 ... \n", + "1 0 0 ... \n", + "2 0 0 ... \n", + "3 0 0 ... \n", + "4 0 0 ... \n", + "5 0 0 ... \n", + "6 0 0 ... \n", + "7 0 0 ... \n", + "8 0 0 ... \n", + "9 0 0 ... \n", + "10 0 0 ... \n", + "11 0 0 ... \n", + "12 0 0 ... \n", + "13 0 0 ... \n", + "14 0 0 ... \n", + "15 0 0 ... \n", + "16 0 0 ... \n", + "17 0 0 ... \n", + "18 0 0 ... \n", + "19 0 0 ... \n", + "20 0 0 ... \n", + "21 0 0 ... \n", + "22 0 0 ... \n", + "23 0 0 ... \n", + "24 0 0 ... \n", + "25 0 0 ... \n", + "26 0 0 ... \n", + "27 0 0 ... \n", + "28 0 0 ... \n", + "29 0 0 ... \n", + "... ... ... ... \n", + "75861 0 0 ... \n", + "75862 0 0 ... \n", + "75863 0 0 ... \n", + "75864 0 0 ... \n", + "75865 0 0 ... \n", + "75866 0 0 ... \n", + "75867 0 0 ... \n", + "75868 0 0 ... \n", + "75869 0 0 ... \n", + "75870 0 0 ... \n", + "75871 0 0 ... \n", + "75872 0 0 ... \n", + "75873 0 0 ... \n", + "75874 0 0 ... \n", + "75875 0 0 ... \n", + "75876 0 0 ... \n", + "75877 0 0 ... \n", + "75878 0 0 ... \n", + "75879 0 0 ... \n", + "75880 0 0 ... \n", + "75881 0 0 ... \n", + "75882 0 0 ... \n", + "75883 0 0 ... \n", + "75884 0 0 ... \n", + "75885 0 0 ... \n", + "75886 0 0 ... \n", + "75887 0 0 ... \n", + "75888 0 0 ... \n", + "75889 0 0 ... \n", + "75890 0 0 ... \n", + "\n", + " Source_A Source_C Source_D Source_F Source_I Source_O Source_R \\\n", + "0 0 1 0 0 0 0 0 \n", + "1 0 0 0 0 0 1 0 \n", + "2 0 0 0 0 0 0 0 \n", + "3 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "5 0 0 0 0 0 1 0 \n", + "6 0 1 0 0 0 0 0 \n", + "7 0 0 0 0 0 0 0 \n", + "8 0 0 0 0 0 0 0 \n", + "9 0 1 0 0 0 0 0 \n", + "10 0 0 1 0 0 0 0 \n", + "11 0 0 0 0 1 0 0 \n", + "12 0 0 1 0 0 0 0 \n", + "13 0 1 0 0 0 0 0 \n", + "14 0 0 0 1 0 0 0 \n", + "15 0 1 0 0 0 0 0 \n", + "16 0 1 0 0 0 0 0 \n", + "17 0 1 0 0 0 0 0 \n", + "18 0 0 0 0 0 0 0 \n", + "19 0 1 0 0 0 0 0 \n", + "20 0 1 0 0 0 0 0 \n", + "21 0 1 0 0 0 0 0 \n", + "22 0 0 0 0 0 0 0 \n", + "23 0 0 0 1 0 0 0 \n", + "24 0 0 0 0 1 0 0 \n", + "25 0 0 0 0 1 0 0 \n", + "26 0 1 0 0 0 0 0 \n", + "27 0 1 0 0 0 0 0 \n", + "28 0 1 0 0 0 0 0 \n", + "29 0 0 0 0 1 0 0 \n", + "... ... ... ... ... ... ... ... \n", + "75861 0 0 0 0 0 0 0 \n", + "75862 0 0 0 0 0 0 0 \n", + "75863 0 0 0 0 0 0 0 \n", + "75864 0 0 0 0 0 0 0 \n", + "75865 0 1 0 0 0 0 0 \n", + "75866 0 0 0 0 0 0 0 \n", + "75867 0 0 0 0 0 0 0 \n", + "75868 0 0 0 0 0 0 0 \n", + "75869 0 0 0 0 0 0 0 \n", + "75870 0 0 0 0 0 0 0 \n", + "75871 0 1 0 0 0 0 0 \n", + "75872 0 0 0 0 0 0 0 \n", + "75873 1 0 0 0 0 0 0 \n", + "75874 0 0 0 0 0 0 0 \n", + "75875 1 0 0 0 0 0 0 \n", + "75876 0 0 0 0 0 0 0 \n", + "75877 0 0 0 0 0 0 0 \n", + "75878 0 0 0 0 0 0 0 \n", + "75879 0 0 0 1 0 0 0 \n", + "75880 0 0 0 0 0 0 0 \n", + "75881 0 0 0 0 0 0 0 \n", + "75882 0 0 0 0 0 0 0 \n", + "75883 0 0 0 0 0 0 0 \n", + "75884 1 0 0 0 0 0 0 \n", + "75885 1 0 0 0 0 0 0 \n", + "75886 1 0 0 0 0 0 0 \n", + "75887 0 0 0 0 0 0 0 \n", + "75888 0 0 0 1 0 0 0 \n", + "75889 0 0 0 0 0 0 1 \n", + "75890 0 0 0 0 0 0 1 \n", + "\n", + " Source_U Source_W Source_X \n", + "0 0 0 0 \n", + "1 0 0 0 \n", + "2 0 1 0 \n", + "3 0 1 0 \n", + "4 0 1 0 \n", + "5 0 0 0 \n", + "6 0 0 0 \n", + "7 1 0 0 \n", + "8 0 1 0 \n", + "9 0 0 0 \n", + "10 0 0 0 \n", + "11 0 0 0 \n", + "12 0 0 0 \n", + "13 0 0 0 \n", + "14 0 0 0 \n", + "15 0 0 0 \n", + "16 0 0 0 \n", + "17 0 0 0 \n", + "18 1 0 0 \n", + "19 0 0 0 \n", + "20 0 0 0 \n", + "21 0 0 0 \n", + "22 1 0 0 \n", + "23 0 0 0 \n", + "24 0 0 0 \n", + "25 0 0 0 \n", + "26 0 0 0 \n", + "27 0 0 0 \n", + "28 0 0 0 \n", + "29 0 0 0 \n", + "... ... ... ... \n", + "75861 0 1 0 \n", + "75862 0 1 0 \n", + "75863 0 1 0 \n", + "75864 0 1 0 \n", + "75865 0 0 0 \n", + "75866 0 1 0 \n", + "75867 0 1 0 \n", + "75868 0 1 0 \n", + "75869 0 1 0 \n", + "75870 0 1 0 \n", + "75871 0 0 0 \n", + "75872 0 1 0 \n", + "75873 0 0 0 \n", + "75874 0 1 0 \n", + "75875 0 0 0 \n", + "75876 0 1 0 \n", + "75877 0 1 0 \n", + "75878 0 1 0 \n", + "75879 0 0 0 \n", + "75880 0 1 0 \n", + "75881 0 1 0 \n", + "75882 0 1 0 \n", + "75883 0 1 0 \n", + "75884 0 0 0 \n", + "75885 0 0 0 \n", + "75886 0 0 0 \n", + "75887 0 1 0 \n", + "75888 0 0 0 \n", + "75889 0 0 0 \n", + "75890 0 0 0 \n", + "\n", + "[75891 rows x 9400 columns]" + ] + }, + "execution_count": 350, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = pd.get_dummies(dataframe, columns=columns)\n", + "\n", + "data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Our data is looking better, but to make things easier on our model, we can scale everything to between 0-1..." + ] + }, + { + "cell_type": "code", + "execution_count": 351, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0123456789...9390939193929393939493959396939793989399
00.000000e+004.641746e-080.00.00.00.00.00.00.00.0...0.01.00.00.00.00.00.00.00.00.0
10.000000e+004.641746e-080.00.00.00.00.00.00.00.0...0.00.00.00.00.01.00.00.00.00.0
20.000000e+001.995951e-060.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00.0
30.000000e+001.995951e-060.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00.0
43.585151e-050.000000e+000.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00.0
50.000000e+004.641746e-080.00.00.00.00.00.00.00.0...0.00.00.00.00.01.00.00.00.00.0
60.000000e+005.570095e-070.00.00.00.00.00.00.00.0...0.01.00.00.00.00.00.00.00.00.0
70.000000e+001.856698e-070.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.01.00.00.0
80.000000e+009.283492e-080.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00.0
90.000000e+001.856698e-070.00.00.00.00.00.00.00.0...0.01.00.00.00.00.00.00.00.00.0
100.000000e+001.392524e-070.00.00.00.00.00.00.00.0...0.00.01.00.00.00.00.00.00.00.0
115.121645e-060.000000e+000.00.00.00.00.00.00.00.0...0.00.00.00.01.00.00.00.00.00.0
120.000000e+004.641746e-080.00.00.00.00.00.00.00.0...0.00.01.00.00.00.00.00.00.00.0
130.000000e+009.283492e-080.00.00.00.00.00.00.00.0...0.01.00.00.00.00.00.00.00.00.0
145.121645e-084.641746e-080.00.00.00.00.00.00.00.0...0.00.00.01.00.00.00.00.00.00.0
150.000000e+009.283492e-080.00.00.00.00.00.00.00.0...0.01.00.00.00.00.00.00.00.00.0
165.121645e-080.000000e+000.00.00.00.00.00.00.00.0...0.01.00.00.00.00.00.00.00.00.0
170.000000e+004.641746e-080.00.00.00.00.00.00.00.0...0.01.00.00.00.00.00.00.00.00.0
180.000000e+004.641746e-080.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.01.00.00.0
190.000000e+004.641746e-080.00.00.00.00.00.00.00.0...0.01.00.00.00.00.00.00.00.00.0
200.000000e+004.641746e-080.00.00.00.00.00.00.00.0...0.01.00.00.00.00.00.00.00.00.0
212.560822e-072.320873e-070.00.00.00.00.00.00.00.0...0.01.00.00.00.00.00.00.00.00.0
220.000000e+004.641746e-080.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.01.00.00.0
235.121645e-070.000000e+000.00.00.00.00.00.00.00.0...0.00.00.01.00.00.00.00.00.00.0
240.000000e+001.392524e-070.00.00.00.00.00.00.00.0...0.00.00.00.01.00.00.00.00.00.0
251.024329e-070.000000e+000.00.00.00.00.00.00.00.0...0.00.00.00.01.00.00.00.00.00.0
265.121645e-084.641746e-080.00.00.00.00.00.00.00.0...0.01.00.00.00.00.00.00.00.00.0
275.121645e-089.283492e-080.00.00.00.00.00.00.00.0...0.01.00.00.00.00.00.00.00.00.0
285.121645e-080.000000e+000.00.00.00.00.00.00.00.0...0.01.00.00.00.00.00.00.00.00.0
291.024329e-070.000000e+000.00.00.00.00.00.00.00.0...0.00.00.00.01.00.00.00.00.00.0
..................................................................
758610.000000e+001.021184e-060.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00.0
758620.000000e+007.426793e-070.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00.0
758635.121645e-080.000000e+000.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00.0
758642.350835e-050.000000e+000.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00.0
758655.121645e-080.000000e+000.00.00.00.00.00.00.00.0...0.01.00.00.00.00.00.00.00.00.0
758665.121645e-080.000000e+000.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00.0
758671.024329e-070.000000e+000.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00.0
758681.024329e-070.000000e+000.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00.0
758694.097316e-070.000000e+000.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00.0
758700.000000e+003.713397e-070.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00.0
758710.000000e+009.747666e-070.00.00.00.00.00.00.00.0...0.01.00.00.00.00.00.00.00.00.0
758720.000000e+004.641746e-080.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00.0
758730.000000e+006.962619e-050.00.00.00.00.00.00.00.0...1.00.00.00.00.00.00.00.00.00.0
758740.000000e+009.283492e-080.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00.0
758750.000000e+006.962619e-050.00.00.00.00.00.00.00.0...1.00.00.00.00.00.00.00.00.00.0
758760.000000e+002.320873e-060.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00.0
758770.000000e+004.641746e-080.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00.0
758780.000000e+003.249222e-070.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00.0
758790.000000e+009.283492e-080.00.00.00.00.00.00.00.0...0.00.00.01.00.00.00.00.00.00.0
758801.001282e-060.000000e+000.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00.0
758810.000000e+002.320873e-070.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00.0
758820.000000e+004.456076e-060.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00.0
758830.000000e+002.098069e-050.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00.0
758841.039694e-060.000000e+000.00.00.00.00.00.00.00.0...1.00.00.00.00.00.00.00.00.00.0
758859.044825e-070.000000e+000.00.00.00.00.00.00.00.0...1.00.00.00.00.00.00.00.00.00.0
758864.440466e-070.000000e+000.00.00.00.00.00.00.00.0...1.00.00.00.00.00.00.00.00.00.0
758870.000000e+009.422744e-070.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.01.00.0
758881.024329e-050.000000e+000.00.00.00.00.00.00.00.0...0.00.00.01.00.00.00.00.00.00.0
758890.000000e+002.947973e-050.00.00.00.00.00.00.00.0...0.00.00.00.00.00.01.00.00.00.0
758900.000000e+002.228038e-050.00.00.00.00.00.00.00.0...0.00.00.00.00.00.01.00.00.00.0
\n", + "

75891 rows × 9400 columns

\n", + "
" + ], + "text/plain": [ + " 0 1 2 3 4 5 6 7 8 \\\n", + "0 0.000000e+00 4.641746e-08 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "1 0.000000e+00 4.641746e-08 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "2 0.000000e+00 1.995951e-06 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "3 0.000000e+00 1.995951e-06 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "4 3.585151e-05 0.000000e+00 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "5 0.000000e+00 4.641746e-08 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "6 0.000000e+00 5.570095e-07 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "7 0.000000e+00 1.856698e-07 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "8 0.000000e+00 9.283492e-08 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "9 0.000000e+00 1.856698e-07 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "10 0.000000e+00 1.392524e-07 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "11 5.121645e-06 0.000000e+00 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "12 0.000000e+00 4.641746e-08 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "13 0.000000e+00 9.283492e-08 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "14 5.121645e-08 4.641746e-08 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "15 0.000000e+00 9.283492e-08 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "16 5.121645e-08 0.000000e+00 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "17 0.000000e+00 4.641746e-08 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "18 0.000000e+00 4.641746e-08 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "19 0.000000e+00 4.641746e-08 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "20 0.000000e+00 4.641746e-08 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "21 2.560822e-07 2.320873e-07 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "22 0.000000e+00 4.641746e-08 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "23 5.121645e-07 0.000000e+00 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "24 0.000000e+00 1.392524e-07 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "25 1.024329e-07 0.000000e+00 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "26 5.121645e-08 4.641746e-08 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "27 5.121645e-08 9.283492e-08 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "28 5.121645e-08 0.000000e+00 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "29 1.024329e-07 0.000000e+00 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "... ... ... ... ... ... ... ... ... ... \n", + "75861 0.000000e+00 1.021184e-06 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75862 0.000000e+00 7.426793e-07 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75863 5.121645e-08 0.000000e+00 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75864 2.350835e-05 0.000000e+00 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75865 5.121645e-08 0.000000e+00 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75866 5.121645e-08 0.000000e+00 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75867 1.024329e-07 0.000000e+00 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75868 1.024329e-07 0.000000e+00 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75869 4.097316e-07 0.000000e+00 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75870 0.000000e+00 3.713397e-07 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75871 0.000000e+00 9.747666e-07 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75872 0.000000e+00 4.641746e-08 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75873 0.000000e+00 6.962619e-05 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75874 0.000000e+00 9.283492e-08 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75875 0.000000e+00 6.962619e-05 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75876 0.000000e+00 2.320873e-06 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75877 0.000000e+00 4.641746e-08 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75878 0.000000e+00 3.249222e-07 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75879 0.000000e+00 9.283492e-08 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75880 1.001282e-06 0.000000e+00 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75881 0.000000e+00 2.320873e-07 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75882 0.000000e+00 4.456076e-06 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75883 0.000000e+00 2.098069e-05 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75884 1.039694e-06 0.000000e+00 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75885 9.044825e-07 0.000000e+00 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75886 4.440466e-07 0.000000e+00 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75887 0.000000e+00 9.422744e-07 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75888 1.024329e-05 0.000000e+00 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75889 0.000000e+00 2.947973e-05 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75890 0.000000e+00 2.228038e-05 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "\n", + " 9 ... 9390 9391 9392 9393 9394 9395 9396 9397 9398 9399 \n", + "0 0.0 ... 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "1 0.0 ... 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 \n", + "2 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 \n", + "3 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 \n", + "4 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 \n", + "5 0.0 ... 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 \n", + "6 0.0 ... 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "7 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 \n", + "8 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 \n", + "9 0.0 ... 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "10 0.0 ... 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "11 0.0 ... 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 \n", + "12 0.0 ... 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "13 0.0 ... 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "14 0.0 ... 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "15 0.0 ... 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "16 0.0 ... 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "17 0.0 ... 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "18 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 \n", + "19 0.0 ... 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "20 0.0 ... 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "21 0.0 ... 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "22 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 \n", + "23 0.0 ... 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "24 0.0 ... 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 \n", + "25 0.0 ... 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 \n", + "26 0.0 ... 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "27 0.0 ... 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "28 0.0 ... 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "29 0.0 ... 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 \n", + "... ... ... ... ... ... ... ... ... ... ... ... ... \n", + "75861 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 \n", + "75862 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 \n", + "75863 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 \n", + "75864 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 \n", + "75865 0.0 ... 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75866 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 \n", + "75867 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 \n", + "75868 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 \n", + "75869 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 \n", + "75870 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 \n", + "75871 0.0 ... 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75872 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 \n", + "75873 0.0 ... 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75874 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 \n", + "75875 0.0 ... 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75876 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 \n", + "75877 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 \n", + "75878 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 \n", + "75879 0.0 ... 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75880 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 \n", + "75881 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 \n", + "75882 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 \n", + "75883 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 \n", + "75884 0.0 ... 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75885 0.0 ... 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75886 0.0 ... 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75887 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 \n", + "75888 0.0 ... 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "75889 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 \n", + "75890 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 \n", + "\n", + "[75891 rows x 9400 columns]" + ] + }, + "execution_count": 351, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "scaler = MinMaxScaler(feature_range=(0, 1))\n", + "data_scaled = scaler.fit_transform(data)\n", + "data = pd.DataFrame(data_scaled)\n", + "\n", + "data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Creating a train / test split\n", + "\n", + "In order to evaluate our model, we'll split our data into two groups, a group for training, which the neural net will learn on, and a group for validation, which the neural net will not see, but be validated against once trained." + ] + }, + { + "cell_type": "code", + "execution_count": 352, + "metadata": {}, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2)" + ] + }, + { + "cell_type": "code", + "execution_count": 353, "metadata": {}, "outputs": [ { - "name": "stderr", + "name": "stdout", "output_type": "stream", "text": [ - "Using TensorFlow backend.\n" + "X_train shape: (60712, 9400)\n", + "X_test shape: (15179, 9400)\n", + "y_train shape: (60712, 12)\n", + "y_test shape: (15179, 12)\n" ] - }, + } + ], + "source": [ + "print(\"X_train shape: \", X_train.shape)\n", + "print(\"X_test shape: \", X_test.shape)\n", + "print(\"y_train shape: \", y_train.shape)\n", + "print(\"y_test shape: \", y_test.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Building a simple model\n", + "\n", + "We'll build a simple neural network which accepts our input of 9400 bits of data, and passes it to 6279 neurons in a hidden layer (two thirds of the input layer plus the output layer, is a good rule of thumb for how many neurons a hidden layer should have). Finally, our hidden layer is passed to an output layer representing our categories (so 12 neurons in this) and uses a softmax activation function to turn our predictions into probabilities of it being that class..." + ] + }, + { + "cell_type": "code", + "execution_count": 354, + "metadata": {}, + "outputs": [], + "source": [ + "def build_model():\n", + " model = Sequential()\n", + " model.add(Dense(6279, input_dim=X_train.shape[1], activation='relu'))\n", + " model.add(Dropout(0.1))\n", + " model.add(Dense(y_train.shape[1], activation='softmax'))\n", + " model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])\n", + " \n", + " return model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Hyperparameters" + ] + }, + { + "cell_type": "code", + "execution_count": 355, + "metadata": {}, + "outputs": [], + "source": [ + "epochs = 5\n", + "batch_size = 2000" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's train our simple model..." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ { - "ename": "ModuleNotFoundError", - "evalue": "No module named 'tensorflow'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mnumpy\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mpandas\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mkeras\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodels\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mSequential\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mkeras\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlayers\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mDense\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mkeras\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwrappers\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mscikit_learn\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mKerasClassifier\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/local/lib/python3.6/site-packages/keras/__init__.py\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0m__future__\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mabsolute_import\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mutils\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mactivations\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mapplications\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/local/lib/python3.6/site-packages/keras/utils/__init__.py\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mdata_utils\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mio_utils\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mconv_utils\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0;31m# Globally-importable utils.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/local/lib/python3.6/site-packages/keras/utils/conv_utils.py\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0msix\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmoves\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mnumpy\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mbackend\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mK\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 10\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/local/lib/python3.6/site-packages/keras/backend/__init__.py\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 82\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0m_BACKEND\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'tensorflow'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 83\u001b[0m \u001b[0msys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstderr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwrite\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Using TensorFlow backend.\\n'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 84\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0mtensorflow_backend\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 85\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 86\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Unknown backend: '\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_BACKEND\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/usr/local/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0m__future__\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mprint_function\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mtensorflow\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mtf\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 6\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mtensorflow\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpython\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtraining\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mmoving_averages\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mtensorflow\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpython\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mops\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mtensor_array_ops\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'tensorflow'" + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/5\n", + " 6000/60712 [=>............................] - ETA: 3:11 - loss: 0.1748 - acc: 0.8497" ] } ], "source": [ - "import numpy as np\n", - "import pandas as pd\n", - "from keras.models import Sequential\n", - "from keras.layers import Dense\n", - "from keras.wrappers.scikit_learn import KerasClassifier\n", - "from keras.utils import np_utils\n", - "from sklearn.model_selection import cross_val_score\n", - "from sklearn.model_selection import KFold\n", - "from sklearn.preprocessing import LabelEncoder\n", - "from sklearn.pipeline import Pipeline" + "model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size)" ] }, { @@ -63,8 +9130,46 @@ "metadata": {}, "outputs": [], "source": [ - "np.random.seed(1)" + "score = model.evaluate(X_test, y_test)\n", + "\n", + "print(\"%s: %.2f%%\" % (model.metrics_names[1], score[1]*100))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Evaluating our model with K-Fold Cross Validation\n", + "\n", + "We'll use k-fold validation to get a better representation of how our model did..." ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "k_fold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)\n", + "cv_scores = []\n", + "\n", + "for train, test in kfold.split(data, labels):\n", + " model = build_model()\n", + " model.fit(data[train], labels[train], epochs=150, batch_size=10, verbose=0)\n", + " scores = model.evaluate(data[test], labels[test], verbose=0)\n", + " print(\"%s: %.2f%%\" % (model.metrics_names[1], scores[1]*100))\n", + " \n", + " cv_scores.append(scores[1] * 100)\n", + " \n", + "print(\"%.2f%% (+/- %.2f%%)\" % (numpy.mean(cvscores), numpy.std(cvscores)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": {