more exploration on yb features

DistrictDataLabs · Aug 13, 2016 · 21b5919 · 21b5919
1 parent c449293
commit 21b5919
Show file tree

Hide file tree

Showing 4 changed files with 451 additions and 1 deletion.
diff --git a/examples/examples.ipynb b/examples/examples.ipynb
@@ -6,7 +6,7 @@
    "source": [
     "# Yellowbrick Examples \n",
     "\n",
-    "Ths notebook is a sample of the examples that yellowbrick provids."
+    "Ths notebook is a sample of the examples that yellowbrick provides."
    ]
   },
   {

diff --git a/examples/figures/pipeline_prototype.png b/examples/figures/pipeline_prototype.png
diff --git a/examples/pipeline.ipynb b/examples/pipeline.ipynb
@@ -0,0 +1,179 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Visual Pipelines \n",
+    "\n",
+    "This notebook demonstrates a proof of concept for a visual pipeline for analytics. \n",
+    "\n",
+    "![Yellowbrick Prototype Pipeline Objects](figures/pipeline_prototype.png)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "%matplotlib inline\n",
+    "\n",
+    "import os\n",
+    "import sys \n",
+    "\n",
+    "# Modify the path \n",
+    "sys.path.append(\"..\")\n",
+    "\n",
+    "import pandas as pd\n",
+    "import yellowbrick as yb \n",
+    "import matplotlib as mpl \n",
+    "import matplotlib.pyplot as plt "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Load Datasets \n",
+    "\n",
+    "Note that if datasets do not exist, please see the `download.py` located in this directory. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "FIXTURES  = os.path.join(os.getcwd(), \"data\")\n",
+    "credit    = pd.read_excel(os.path.join(FIXTURES, \"credit.xls\"), header=1)\n",
+    "concrete  = pd.read_excel(os.path.join(FIXTURES, \"concrete.xls\"))\n",
+    "occupancy = pd.read_csv(os.path.join('data','occupancy','datatraining.txt'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# Rename the columns of the datasets for ease of use. \n",
+    "credit.columns = [\n",
+    "    'id', 'limit', 'sex', 'edu', 'married', 'age', 'apr_delay', 'may_delay',\n",
+    "    'jun_delay', 'jul_delay', 'aug_delay', 'sep_delay', 'apr_bill', 'may_bill',\n",
+    "    'jun_bill', 'jul_bill', 'aug_bill', 'sep_bill', 'apr_pay', 'may_pay', 'jun_pay',\n",
+    "    'jul_pay', 'aug_pay', 'sep_pay', 'default'\n",
+    "]\n",
+    "\n",
+    "concrete.columns = [\n",
+    "    'cement', 'slag', 'ash', 'water', 'splast',\n",
+    "    'coarse', 'fine', 'age', 'strength'\n",
+    "]\n",
+    "\n",
+    "occupancy.columns = [\n",
+    "    'date', 'temp', 'humid', 'light', 'co2', 'hratio', 'occupied'\n",
+    "]\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[('scale', StandardScaler(copy=True, with_mean=True, with_std=True)),\n",
+       " ('model', LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,\n",
+       "       intercept_scaling=1, loss='squared_hinge', max_iter=1000,\n",
+       "       multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,\n",
+       "       verbose=0))]"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from sklearn.svm import LinearSVC\n",
+    "from sklearn.preprocessing import StandardScaler \n",
+    "\n",
+    "model = Pipeline([\n",
+    "    ('scale', StandardScaler()), \n",
+    "    ('model', LinearSVC())\n",
+    "])\n",
+    "\n",
+    "model.steps"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Evaluation Visualization Prototype"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "from sklearn.pipeline import Pipeline\n",
+    "from sklearn.base import BaseEstimator, TransformerMixin\n",
+    "\n",
+    "\n",
+    "class VisualPipeline(Pipeline):\n",
+    "    \n",
+    "    def draw(self):\n",
+    "        \"\"\"\n",
+    "        Calls the draw method on every visual transformer/estimator  \n",
+    "        \"\"\"\n",
+    "        for name, estimator in self.steps:\n",
+    "            try:\n",
+    "                estimator.draw()\n",
+    "            except AttributeError:\n",
+    "                continue \n",
+    "\n",
+    "                \n",
+    "class ClassifierEvaluation(object):\n",
+    "    \n",
+    "    def draw(self):\n",
+    "        yb.crplot()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 2",
+   "language": "python",
+   "name": "python2"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/examples/rank2d.ipynb b/examples/rank2d.ipynb