diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..60baa9c --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +data/* diff --git a/.ipynb_checkpoints/1. Classifying Partial Permits-checkpoint.ipynb b/.ipynb_checkpoints/1. Classifying Partial Permits-checkpoint.ipynb new file mode 100644 index 0000000..2fd6442 --- /dev/null +++ b/.ipynb_checkpoints/1. Classifying Partial Permits-checkpoint.ipynb @@ -0,0 +1,6 @@ +{ + "cells": [], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/1. Classifying Partial Permits.ipynb b/1. Classifying Partial Permits.ipynb new file mode 100644 index 0000000..69059b9 --- /dev/null +++ b/1. Classifying Partial Permits.ipynb @@ -0,0 +1,91 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Goal 1\n", + "## Given a partial permit, can we predict what classification it belongs to?\n", + "\n", + "### Getting Started\n", + "- Open up the CITES trade database at https://trade.cites.org/\n", + "- Select a year range and click *Search*\n", + "- Download a Comparative Tabulation report and place it in `data/`\n", + "- Install requirements with `pip install -r requirements.txt`\n", + "- Run this notebook" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using TensorFlow backend.\n" + ] + }, + { + "ename": "ModuleNotFoundError", + "evalue": "No module named 'tensorflow'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mnumpy\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mpandas\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mkeras\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodels\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mSequential\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mkeras\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlayers\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mDense\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mkeras\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwrappers\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mscikit_learn\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mKerasClassifier\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.6/site-packages/keras/__init__.py\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0m__future__\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mabsolute_import\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mutils\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mactivations\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mapplications\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.6/site-packages/keras/utils/__init__.py\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mdata_utils\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mio_utils\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mconv_utils\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0;31m# Globally-importable utils.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.6/site-packages/keras/utils/conv_utils.py\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0msix\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmoves\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mnumpy\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mbackend\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mK\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 10\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.6/site-packages/keras/backend/__init__.py\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 82\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0m_BACKEND\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'tensorflow'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 83\u001b[0m \u001b[0msys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstderr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwrite\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Using TensorFlow backend.\\n'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 84\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0mtensorflow_backend\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 85\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 86\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Unknown backend: '\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_BACKEND\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0m__future__\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mprint_function\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mtensorflow\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mtf\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 6\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mtensorflow\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpython\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtraining\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mmoving_averages\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mtensorflow\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpython\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mops\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mtensor_array_ops\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'tensorflow'" + ] + } + ], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "from keras.models import Sequential\n", + "from keras.layers import Dense\n", + "from keras.wrappers.scikit_learn import KerasClassifier\n", + "from keras.utils import np_utils\n", + "from sklearn.model_selection import cross_val_score\n", + "from sklearn.model_selection import KFold\n", + "from sklearn.preprocessing import LabelEncoder\n", + "from sklearn.pipeline import Pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(1)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/README.md b/README.md new file mode 100644 index 0000000..286f8d8 --- /dev/null +++ b/README.md @@ -0,0 +1,14 @@ +# CITES ML +## Machine Learning with the CITES Trade Database + +### Installation + +Install Python 3.6, jupyter notebook, and run `pip install -r requirements.txt` + +### Aims + +This repo is an exploratory effort to see how Machine Learning can be applied to the CITES Trade Database, with three main objectives and experiments, each represented in a jupyter notebook: + +1. Given a partial permit, can we figure out what category it belongs to? +2. Given some trade data over time, can we predict future trade numbers, per country and per species? +3. Given valid and invalid permits (raw data before validation steps), can we learn the sanity checks and validation processes to spot invalid permits automatically? diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..671482e --- /dev/null +++ b/requirements.txt @@ -0,0 +1,66 @@ +absl-py==0.1.11 +appnope==0.1.0 +astor==0.6.2 +bleach==1.5.0 +certifi==2018.1.18 +chardet==3.0.4 +decorator==4.2.1 +docopt==0.6.2 +entrypoints==0.2.3 +gast==0.2.0 +grpcio==1.10.0 +html5lib==0.9999999 +idna==2.6 +ipykernel==4.8.2 +ipython==6.2.1 +ipython-genutils==0.2.0 +ipywidgets==7.1.2 +jedi==0.11.1 +Jinja2==2.10 +jsonschema==2.6.0 +jupyter==1.0.0 +jupyter-client==5.2.2 +jupyter-console==5.2.0 +jupyter-core==4.4.0 +Keras==2.1.5 +Markdown==2.6.11 +MarkupSafe==1.0 +mistune==0.8.3 +nbconvert==5.3.1 +nbformat==4.4.0 +notebook==5.4.0 +numpy==1.14.1 +pandas==0.22.0 +pandocfilters==1.4.2 +parso==0.1.1 +pexpect==4.4.0 +pickleshare==0.7.4 +prompt-toolkit==1.0.15 +protobuf==3.5.2 +ptyprocess==0.5.2 +Pygments==2.2.0 +python-dateutil==2.6.1 +pytz==2018.3 +PyYAML==3.12 +pyzmq==17.0.0 +qtconsole==4.3.1 +requests==2.18.4 +scikit-learn==0.19.1 +scipy==1.0.0 +Send2Trash==1.5.0 +simplegeneric==0.8.1 +six==1.11.0 +sklearn==0.0 +tensorboard==1.6.0 +tensorflow==1.6.0 +termcolor==1.1.0 +terminado==0.8.1 +testpath==0.3.1 +tornado==5.0 +traitlets==4.3.2 +urllib3==1.22 +wcwidth==0.1.7 +webencodings==0.5.1 +Werkzeug==0.14.1 +widgetsnbextension==3.1.4 +yarg==0.1.9