diff --git a/README.md b/README.md index a8816b6..e9466fc 100644 --- a/README.md +++ b/README.md @@ -10,15 +10,32 @@ pip install scikit-psl ``` # Usage + ```python -from skpsl import ProbabilisticScoringList from sklearn.datasets import make_classification -from sklearn.model_selection import cross_val_score +from sklearn.model_selection import ShuffleSplit + +from skpsl import ProbabilisticScoringList # Generating synthetic data with continuous features and a binary target variable X, y = make_classification(random_state=42) X = (X > .5).astype(int) clf = ProbabilisticScoringList([-1, 1, 2]) -print(cross_val_score(clf, X, y, cv=5)) + +for train, test in ShuffleSplit(1, test_size=.2, random_state=42).split(X): + psl = ProbabilisticScoringList([-1, 1, 2]) + psl.fit(X[train], y[train]) + print(f"Brier score: {psl.score(X[test], y[test]):.4f}") + #> Brier score: 0.1924 (lower is better) + + df = psl.inspect(5) + print(df.to_string(index=False, na_rep="-", justify="center", float_format=lambda x: f"{x:.2f}")) + #> Stage Score T = -3 T = -2 T = -1 T = 0 T = 1 T = 2 T = 3 + #> 0 - - - - 0.54 - - - + #> 1 2.00 - - - 0.18 - 0.97 - + #> 2 -1.00 - - 0.00 0.28 0.91 1.00 - + #> 3 -1.00 - 0.00 0.07 0.86 0.91 1.00 - + #> 4 1.00 - 0.00 0.00 0.29 0.92 1.00 1.00 + #> 5 -1.00 0.00 0.00 0.00 0.40 1.00 1.00 1.00 ``` diff --git a/scratch/psl_describe.ipynb b/scratch/psl_describe.ipynb new file mode 100644 index 0000000..2e98e3a --- /dev/null +++ b/scratch/psl_describe.ipynb @@ -0,0 +1,203 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "adc6c663-b073-4451-9821-216c578bbd69", + "metadata": {}, + "outputs": [], + "source": [ + "from skpsl import ProbabilisticScoringList\n", + "from sklearn.datasets import make_classification\n", + "from sklearn.model_selection import cross_val_score, ShuffleSplit\n", + "from functools import reduce\n", + "from operator import or_\n", + "import numpy as np\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "764414e5-261c-4a77-b14b-2235472b9baf", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Brier score: 0.1924\n" + ] + } + ], + "source": [ + "# Generating synthetic data with continuous features and a binary target variable\n", + "X, y = make_classification(random_state=42)\n", + "X = (X > .5).astype(int)\n", + "\n", + "for train, test in ShuffleSplit(1, test_size=.2, random_state=42).split(X):\n", + " psl = ProbabilisticScoringList([-1, 1, 2])\n", + " psl.fit(X[train], y[train])\n", + " print(f\"Brier score: {psl.score(X[test], y[test]):.4f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "26a044d2-d3a1-43eb-bbef-b8e7050ce568", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " | Stage | \n", + "Score | \n", + "T = -3 | \n", + "T = -2 | \n", + "T = -1 | \n", + "T = 0 | \n", + "T = 1 | \n", + "T = 2 | \n", + "T = 3 | \n", + "
---|---|---|---|---|---|---|---|---|---|
0 | \n", + "0 | \n", + "- | \n", + "- | \n", + "- | \n", + "- | \n", + "0.5375 | \n", + "- | \n", + "- | \n", + "- | \n", + "
1 | \n", + "1 | \n", + "2.0 | \n", + "- | \n", + "- | \n", + "- | \n", + "0.1818 | \n", + "- | \n", + "0.9722 | \n", + "- | \n", + "
2 | \n", + "2 | \n", + "-1.0 | \n", + "- | \n", + "- | \n", + "0.0 | \n", + "0.2759 | \n", + "0.9091 | \n", + "1.0 | \n", + "- | \n", + "
3 | \n", + "3 | \n", + "-1.0 | \n", + "- | \n", + "0.0 | \n", + "0.069 | \n", + "0.8571 | \n", + "0.9091 | \n", + "1.0 | \n", + "- | \n", + "
4 | \n", + "4 | \n", + "1.0 | \n", + "- | \n", + "0.0 | \n", + "0.0 | \n", + "0.2857 | \n", + "0.9167 | \n", + "1.0 | \n", + "1.0 | \n", + "
5 | \n", + "5 | \n", + "-1.0 | \n", + "0.0 | \n", + "0.0 | \n", + "0.0 | \n", + "0.4000 | \n", + "1.0 | \n", + "1.0 | \n", + "1.0 | \n", + "