-
-
Notifications
You must be signed in to change notification settings - Fork 560
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
460 changed files
with
12,554 additions
and
3,178 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
# Creates the anscombe visualization. | ||
|
||
import yellowbrick as yb | ||
import matplotlib.pyplot as plt | ||
|
||
g = yb.anscombe() | ||
plt.savefig("images/anscombe.png") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
Anscombe's Quartet | ||
================== | ||
|
||
Yellowbrick has learned Anscombe's lesson - which is why we believe that | ||
visual diagnostics are vital to machine learning. | ||
|
||
.. code:: python | ||
import yellowbrick as yb | ||
import matplotlib.pyplot as plt | ||
g = yb.anscombe() | ||
plt.show() | ||
.. image:: images/anscombe.png | ||
|
||
API Reference | ||
------------- | ||
|
||
.. automodule:: yellowbrick.anscombe | ||
:members: | ||
:undoc-members: | ||
:show-inheritance: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
import pandas as pd | ||
import matplotlib.pyplot as plt | ||
|
||
from sklearn.ensemble import RandomForestClassifier | ||
from sklearn.model_selection import train_test_split | ||
|
||
from yellowbrick.classifier import ClassBalance | ||
|
||
|
||
if __name__ == '__main__': | ||
# Load the regression data set | ||
data = pd.read_csv("../../../examples/data/occupancy/occupancy.csv") | ||
|
||
features = ["temperature", "relative humidity", "light", "C02", "humidity"] | ||
classes = ['unoccupied', 'occupied'] | ||
|
||
# Extract the numpy arrays from the data frame | ||
X = data[features].as_matrix() | ||
y = data.occupancy.as_matrix() | ||
|
||
# Create the train and test data | ||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) | ||
|
||
# Instantiate the classification model and visualizer | ||
forest = RandomForestClassifier() | ||
visualizer = ClassBalance(forest, classes=classes) | ||
|
||
visualizer.fit(X_train, y_train) # Fit the training data to the visualizer | ||
visualizer.score(X_test, y_test) # Evaluate the model on the test data | ||
g = visualizer.poof(outpath="images/class_balance.png") # Draw/show/poof the data |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
Class Balance | ||
============= | ||
|
||
Oftentimes classifiers perform badly because of a class imbalance. A class balance chart can help prepare the user for such a case by showing the support for each class in the fitted | ||
classification model. | ||
|
||
.. code:: python | ||
# Load the classification data set | ||
data = load_data('occupancy') | ||
# Specify the features of interest and the classes of the target | ||
features = ["temperature", "relative humidity", "light", "C02", "humidity"] | ||
classes = ['unoccupied', 'occupied'] | ||
# Extract the numpy arrays from the data frame | ||
X = data[features].as_matrix() | ||
y = data.occupancy.as_matrix() | ||
# Create the train and test data | ||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) | ||
.. code:: python | ||
# Instantiate the classification model and visualizer | ||
forest = RandomForestClassifier() | ||
visualizer = ClassBalance(forest, classes=classes) | ||
visualizer.fit(X_train, y_train) # Fit the training data to the visualizer | ||
visualizer.score(X_test, y_test) # Evaluate the model on the test data | ||
g = visualizer.poof() # Draw/show/poof the data | ||
.. image:: images/class_balance.png | ||
|
||
|
||
API Reference | ||
------------- | ||
|
||
.. automodule:: yellowbrick.classifier.class_balance | ||
:members: ClassBalance | ||
:undoc-members: | ||
:show-inheritance: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
import pandas as pd | ||
import matplotlib.pyplot as plt | ||
|
||
from sklearn.naive_bayes import GaussianNB | ||
from sklearn.model_selection import train_test_split | ||
|
||
from yellowbrick.classifier import ClassificationReport | ||
|
||
|
||
if __name__ == '__main__': | ||
# Load the regression data set | ||
data = pd.read_csv("../../../examples/data/occupancy/occupancy.csv") | ||
|
||
features = ["temperature", "relative humidity", "light", "C02", "humidity"] | ||
classes = ['unoccupied', 'occupied'] | ||
|
||
# Extract the numpy arrays from the data frame | ||
X = data[features].as_matrix() | ||
y = data.occupancy.as_matrix() | ||
|
||
# Create the train and test data | ||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) | ||
|
||
# Instantiate the classification model and visualizer | ||
bayes = GaussianNB() | ||
visualizer = ClassificationReport(bayes, classes=classes) | ||
|
||
visualizer.fit(X_train, y_train) # Fit the training data to the visualizer | ||
visualizer.score(X_test, y_test) # Evaluate the model on the test data | ||
g = visualizer.poof(outpath="images/classification_report.png") # Draw/show/poof the data |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
Classification Report | ||
~~~~~~~~~~~~~~~~~~~~~ | ||
|
||
The classification report visualizer displays the precision, recall, and | ||
F1 scores for the model. In order to support easier interpretation and problem detection, the report integrates numerical scores with a color-coded | ||
heatmap. | ||
|
||
.. code:: python | ||
# Load the classification data set | ||
data = load_data('occupancy') | ||
# Specify the features of interest and the classes of the target | ||
features = ["temperature", "relative humidity", "light", "C02", "humidity"] | ||
classes = ['unoccupied', 'occupied'] | ||
# Extract the numpy arrays from the data frame | ||
X = data[features].as_matrix() | ||
y = data.occupancy.as_matrix() | ||
# Create the train and test data | ||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) | ||
.. code:: python | ||
# Instantiate the classification model and visualizer | ||
bayes = GaussianNB() | ||
visualizer = ClassificationReport(bayes, classes=classes) | ||
visualizer.fit(X_train, y_train) # Fit the training data to the visualizer | ||
visualizer.score(X_test, y_test) # Evaluate the model on the test data | ||
g = visualizer.poof() # Draw/show/poof the data | ||
.. image:: images/classification_report.png | ||
|
||
|
||
API Reference | ||
------------- | ||
|
||
.. automodule:: yellowbrick.classifier.classification_report | ||
:members: ClassificationReport | ||
:undoc-members: | ||
:show-inheritance: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
import pandas as pd | ||
import matplotlib.pyplot as plt | ||
|
||
from sklearn.datasets import load_digits | ||
from sklearn.linear_model import LogisticRegression | ||
from sklearn.model_selection import train_test_split | ||
|
||
from yellowbrick.classifier import ConfusionMatrix | ||
|
||
|
||
if __name__ == '__main__': | ||
# Load the regression data set | ||
digits = load_digits() | ||
X = digits.data | ||
y = digits.target | ||
|
||
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size =0.2, random_state=11) | ||
|
||
model = LogisticRegression() | ||
|
||
#The ConfusionMatrix visualizer taxes a model | ||
cm = ConfusionMatrix(model, classes=[0,1,2,3,4,5,6,7,8,9]) | ||
|
||
cm.fit(X_train, y_train) # Fit the training data to the visualizer | ||
cm.score(X_test, y_test) # Evaluate the model on the test data | ||
g = cm.poof(outpath="images/confusion_matrix.png") # Draw/show/poof the data |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
Confusion Matrix | ||
================ | ||
|
||
The ``ConfusionMatrix`` visualizer is a ScoreVisualizer that takes a | ||
fitted Scikit-Learn classifier and a set of test X and y values and | ||
returns a report showing how each of the test values predicted classes | ||
compare to their actual classes. Data scientists use confusion matrices | ||
to understand which classes are most easily confused. These provide | ||
similar information as what is available in a ClassificationReport, but | ||
rather than top-level scores they provide deeper insight into the | ||
classification of individual data points. | ||
|
||
Below are a few examples of using the ConfusionMatrix visualizer; more | ||
information can be found by looking at the | ||
Scikit-Learn documentation on `confusion matrices <http://scikit-learn.org/stable/modules/generated/sklearn.metrics.confusion_matrix.html>`_. | ||
|
||
.. code:: python | ||
#First do our imports | ||
import yellowbrick | ||
from sklearn.datasets import load_digits | ||
from sklearn.model_selection import train_test_split | ||
from sklearn.linear_model import LogisticRegression | ||
from yellowbrick.classifier import ConfusionMatrix | ||
.. code:: python | ||
# We'll use the handwritten digits data set from scikit-learn. | ||
# Each feature of this dataset is an 8x8 pixel image of a handwritten number. | ||
# Digits.data converts these 64 pixels into a single array of features | ||
digits = load_digits() | ||
X = digits.data | ||
y = digits.target | ||
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size =0.2, random_state=11) | ||
model = LogisticRegression() | ||
#The ConfusionMatrix visualizer taxes a model | ||
cm = ConfusionMatrix(model, classes=[0,1,2,3,4,5,6,7,8,9]) | ||
#Fit fits the passed model. This is unnecessary if you pass the visualizer a pre-fitted model | ||
cm.fit(X_train, y_train) | ||
#To create the ConfusionMatrix, we need some test data. Score runs predict() on the data | ||
#and then creates the confusion_matrix from scikit learn. | ||
cm.score(X_test, y_test) | ||
#How did we do? | ||
cm.poof() | ||
.. image:: images/confusion_matrix.png | ||
|
||
|
||
API Reference | ||
------------- | ||
|
||
.. automodule:: yellowbrick.classifier.confusion_matrix | ||
:members: ConfusionMatrix | ||
:undoc-members: | ||
:show-inheritance: |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Oops, something went wrong.