From d220d274cafe3174fe28e1a164a6c8eb9ce0fa05 Mon Sep 17 00:00:00 2001 From: KUSHAGRA SRIVASTAVA <kushagra.2024cse1172@kiet.edu> Date: Thu, 26 May 2022 23:38:06 +0530 Subject: [PATCH] Add files via upload --- Breast_cancer.ipynb | 2395 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 2395 insertions(+) create mode 100644 Breast_cancer.ipynb diff --git a/Breast_cancer.ipynb b/Breast_cancer.ipynb new file mode 100644 index 0000000..65fc952 --- /dev/null +++ b/Breast_cancer.ipynb @@ -0,0 +1,2395 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "Breast cancer.ipynb", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "code", + "metadata": { + "id": "G03i8TxURuPK" + }, + "source": [ + "import pandas as pd" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "from google.colab import drive\n", + "drive.mount('/content/drive')" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "6BGBRFPLtNiq", + "outputId": "5c885cfe-9297-4100-d6f4-c90a4995cafa" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Mounted at /content/drive\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "foeYIXhxT7oQ" + }, + "source": [ + "dataframe = pd.read_csv(\"/content/data.csv\")" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 505 + }, + "id": "THvmGLDXUWW1", + "outputId": "602577b7-ecde-4ee2-a5ec-c657ca918802" + }, + "source": [ + "dataframe" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " id diagnosis radius_mean texture_mean perimeter_mean area_mean \\\n", + "0 842302 M 17.99 10.38 122.80 1001.0 \n", + "1 842517 M 20.57 17.77 132.90 1326.0 \n", + "2 84300903 M 19.69 21.25 130.00 1203.0 \n", + "3 84348301 M 11.42 20.38 77.58 386.1 \n", + "4 84358402 M 20.29 14.34 135.10 1297.0 \n", + ".. ... ... ... ... ... ... \n", + "564 926424 M 21.56 22.39 142.00 1479.0 \n", + "565 926682 M 20.13 28.25 131.20 1261.0 \n", + "566 926954 M 16.60 28.08 108.30 858.1 \n", + "567 927241 M 20.60 29.33 140.10 1265.0 \n", + "568 92751 B 7.76 24.54 47.92 181.0 \n", + "\n", + " smoothness_mean compactness_mean concavity_mean concave points_mean \\\n", + "0 0.11840 0.27760 0.30010 0.14710 \n", + "1 0.08474 0.07864 0.08690 0.07017 \n", + "2 0.10960 0.15990 0.19740 0.12790 \n", + "3 0.14250 0.28390 0.24140 0.10520 \n", + "4 0.10030 0.13280 0.19800 0.10430 \n", + ".. ... ... ... ... \n", + "564 0.11100 0.11590 0.24390 0.13890 \n", + "565 0.09780 0.10340 0.14400 0.09791 \n", + "566 0.08455 0.10230 0.09251 0.05302 \n", + "567 0.11780 0.27700 0.35140 0.15200 \n", + "568 0.05263 0.04362 0.00000 0.00000 \n", + "\n", + " ... texture_worst perimeter_worst area_worst smoothness_worst \\\n", + "0 ... 17.33 184.60 2019.0 0.16220 \n", + "1 ... 23.41 158.80 1956.0 0.12380 \n", + "2 ... 25.53 152.50 1709.0 0.14440 \n", + "3 ... 26.50 98.87 567.7 0.20980 \n", + "4 ... 16.67 152.20 1575.0 0.13740 \n", + ".. ... ... ... ... ... \n", + "564 ... 26.40 166.10 2027.0 0.14100 \n", + "565 ... 38.25 155.00 1731.0 0.11660 \n", + "566 ... 34.12 126.70 1124.0 0.11390 \n", + "567 ... 39.42 184.60 1821.0 0.16500 \n", + "568 ... 30.37 59.16 268.6 0.08996 \n", + "\n", + " compactness_worst concavity_worst concave points_worst symmetry_worst \\\n", + "0 0.66560 0.7119 0.2654 0.4601 \n", + "1 0.18660 0.2416 0.1860 0.2750 \n", + "2 0.42450 0.4504 0.2430 0.3613 \n", + "3 0.86630 0.6869 0.2575 0.6638 \n", + "4 0.20500 0.4000 0.1625 0.2364 \n", + ".. ... ... ... ... \n", + "564 0.21130 0.4107 0.2216 0.2060 \n", + "565 0.19220 0.3215 0.1628 0.2572 \n", + "566 0.30940 0.3403 0.1418 0.2218 \n", + "567 0.86810 0.9387 0.2650 0.4087 \n", + "568 0.06444 0.0000 0.0000 0.2871 \n", + "\n", + " fractal_dimension_worst Unnamed: 32 \n", + "0 0.11890 NaN \n", + "1 0.08902 NaN \n", + "2 0.08758 NaN \n", + "3 0.17300 NaN \n", + "4 0.07678 NaN \n", + ".. ... ... \n", + "564 0.07115 NaN \n", + "565 0.06637 NaN \n", + "566 0.07820 NaN \n", + "567 0.12400 NaN \n", + "568 0.07039 NaN \n", + "\n", + "[569 rows x 33 columns]" + ], + "text/html": [ + "\n", + " <div id=\"df-8f364658-3db9-470e-996d-116ea1062db3\">\n", + " <div class=\"colab-df-container\">\n", + " <div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>id</th>\n", + " <th>diagnosis</th>\n", + " <th>radius_mean</th>\n", + " <th>texture_mean</th>\n", + " <th>perimeter_mean</th>\n", + " <th>area_mean</th>\n", + " <th>smoothness_mean</th>\n", + " <th>compactness_mean</th>\n", + " <th>concavity_mean</th>\n", + " <th>concave points_mean</th>\n", + " <th>...</th>\n", + " <th>texture_worst</th>\n", + " <th>perimeter_worst</th>\n", + " <th>area_worst</th>\n", + " <th>smoothness_worst</th>\n", + " <th>compactness_worst</th>\n", + " <th>concavity_worst</th>\n", + " <th>concave points_worst</th>\n", + " <th>symmetry_worst</th>\n", + " <th>fractal_dimension_worst</th>\n", + " <th>Unnamed: 32</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>842302</td>\n", + " <td>M</td>\n", + " <td>17.99</td>\n", + " <td>10.38</td>\n", + " <td>122.80</td>\n", + " <td>1001.0</td>\n", + " <td>0.11840</td>\n", + " <td>0.27760</td>\n", + " <td>0.30010</td>\n", + " <td>0.14710</td>\n", + " <td>...</td>\n", + " <td>17.33</td>\n", + " <td>184.60</td>\n", + " <td>2019.0</td>\n", + " <td>0.16220</td>\n", + " <td>0.66560</td>\n", + " <td>0.7119</td>\n", + " <td>0.2654</td>\n", + " <td>0.4601</td>\n", + " <td>0.11890</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>842517</td>\n", + " <td>M</td>\n", + " <td>20.57</td>\n", + " <td>17.77</td>\n", + " <td>132.90</td>\n", + " <td>1326.0</td>\n", + " <td>0.08474</td>\n", + " <td>0.07864</td>\n", + " <td>0.08690</td>\n", + " <td>0.07017</td>\n", + " <td>...</td>\n", + " <td>23.41</td>\n", + " <td>158.80</td>\n", + " <td>1956.0</td>\n", + " <td>0.12380</td>\n", + " <td>0.18660</td>\n", + " <td>0.2416</td>\n", + " <td>0.1860</td>\n", + " <td>0.2750</td>\n", + " <td>0.08902</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>84300903</td>\n", + " <td>M</td>\n", + " <td>19.69</td>\n", + " <td>21.25</td>\n", + " <td>130.00</td>\n", + " <td>1203.0</td>\n", + " <td>0.10960</td>\n", + " <td>0.15990</td>\n", + " <td>0.19740</td>\n", + " <td>0.12790</td>\n", + " <td>...</td>\n", + " <td>25.53</td>\n", + " <td>152.50</td>\n", + " <td>1709.0</td>\n", + " <td>0.14440</td>\n", + " <td>0.42450</td>\n", + " <td>0.4504</td>\n", + " <td>0.2430</td>\n", + " <td>0.3613</td>\n", + " <td>0.08758</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>84348301</td>\n", + " <td>M</td>\n", + " <td>11.42</td>\n", + " <td>20.38</td>\n", + " <td>77.58</td>\n", + " <td>386.1</td>\n", + " <td>0.14250</td>\n", + " <td>0.28390</td>\n", + " <td>0.24140</td>\n", + " <td>0.10520</td>\n", + " <td>...</td>\n", + " <td>26.50</td>\n", + " <td>98.87</td>\n", + " <td>567.7</td>\n", + " <td>0.20980</td>\n", + " <td>0.86630</td>\n", + " <td>0.6869</td>\n", + " <td>0.2575</td>\n", + " <td>0.6638</td>\n", + " <td>0.17300</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>84358402</td>\n", + " <td>M</td>\n", + " <td>20.29</td>\n", + " <td>14.34</td>\n", + " <td>135.10</td>\n", + " <td>1297.0</td>\n", + " <td>0.10030</td>\n", + " <td>0.13280</td>\n", + " <td>0.19800</td>\n", + " <td>0.10430</td>\n", + " <td>...</td>\n", + " <td>16.67</td>\n", + " <td>152.20</td>\n", + " <td>1575.0</td>\n", + " <td>0.13740</td>\n", + " <td>0.20500</td>\n", + " <td>0.4000</td>\n", + " <td>0.1625</td>\n", + " <td>0.2364</td>\n", + " <td>0.07678</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>564</th>\n", + " <td>926424</td>\n", + " <td>M</td>\n", + " <td>21.56</td>\n", + " <td>22.39</td>\n", + " <td>142.00</td>\n", + " <td>1479.0</td>\n", + " <td>0.11100</td>\n", + " <td>0.11590</td>\n", + " <td>0.24390</td>\n", + " <td>0.13890</td>\n", + " <td>...</td>\n", + " <td>26.40</td>\n", + " <td>166.10</td>\n", + " <td>2027.0</td>\n", + " <td>0.14100</td>\n", + " <td>0.21130</td>\n", + " <td>0.4107</td>\n", + " <td>0.2216</td>\n", + " <td>0.2060</td>\n", + " <td>0.07115</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>565</th>\n", + " <td>926682</td>\n", + " <td>M</td>\n", + " <td>20.13</td>\n", + " <td>28.25</td>\n", + " <td>131.20</td>\n", + " <td>1261.0</td>\n", + " <td>0.09780</td>\n", + " <td>0.10340</td>\n", + " <td>0.14400</td>\n", + " <td>0.09791</td>\n", + " <td>...</td>\n", + " <td>38.25</td>\n", + " <td>155.00</td>\n", + " <td>1731.0</td>\n", + " <td>0.11660</td>\n", + " <td>0.19220</td>\n", + " <td>0.3215</td>\n", + " <td>0.1628</td>\n", + " <td>0.2572</td>\n", + " <td>0.06637</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>566</th>\n", + " <td>926954</td>\n", + " <td>M</td>\n", + " <td>16.60</td>\n", + " <td>28.08</td>\n", + " <td>108.30</td>\n", + " <td>858.1</td>\n", + " <td>0.08455</td>\n", + " <td>0.10230</td>\n", + " <td>0.09251</td>\n", + " <td>0.05302</td>\n", + " <td>...</td>\n", + " <td>34.12</td>\n", + " <td>126.70</td>\n", + " <td>1124.0</td>\n", + " <td>0.11390</td>\n", + " <td>0.30940</td>\n", + " <td>0.3403</td>\n", + " <td>0.1418</td>\n", + " <td>0.2218</td>\n", + " <td>0.07820</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>567</th>\n", + " <td>927241</td>\n", + " <td>M</td>\n", + " <td>20.60</td>\n", + " <td>29.33</td>\n", + " <td>140.10</td>\n", + " <td>1265.0</td>\n", + " <td>0.11780</td>\n", + " <td>0.27700</td>\n", + " <td>0.35140</td>\n", + " <td>0.15200</td>\n", + " <td>...</td>\n", + " <td>39.42</td>\n", + " <td>184.60</td>\n", + " <td>1821.0</td>\n", + " <td>0.16500</td>\n", + " <td>0.86810</td>\n", + " <td>0.9387</td>\n", + " <td>0.2650</td>\n", + " <td>0.4087</td>\n", + " <td>0.12400</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>568</th>\n", + " <td>92751</td>\n", + " <td>B</td>\n", + " <td>7.76</td>\n", + " <td>24.54</td>\n", + " <td>47.92</td>\n", + " <td>181.0</td>\n", + " <td>0.05263</td>\n", + " <td>0.04362</td>\n", + " <td>0.00000</td>\n", + " <td>0.00000</td>\n", + " <td>...</td>\n", + " <td>30.37</td>\n", + " <td>59.16</td>\n", + " <td>268.6</td>\n", + " <td>0.08996</td>\n", + " <td>0.06444</td>\n", + " <td>0.0000</td>\n", + " <td>0.0000</td>\n", + " <td>0.2871</td>\n", + " <td>0.07039</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>569 rows × 33 columns</p>\n", + "</div>\n", + " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-8f364658-3db9-470e-996d-116ea1062db3')\"\n", + " title=\"Convert this dataframe to an interactive table.\"\n", + " style=\"display:none;\">\n", + " \n", + " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n", + " width=\"24px\">\n", + " <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n", + " <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n", + " </svg>\n", + " </button>\n", + " \n", + " <style>\n", + " .colab-df-container {\n", + " display:flex;\n", + " flex-wrap:wrap;\n", + " gap: 12px;\n", + " }\n", + "\n", + " .colab-df-convert {\n", + " background-color: #E8F0FE;\n", + " border: none;\n", + " border-radius: 50%;\n", + " cursor: pointer;\n", + " display: none;\n", + " fill: #1967D2;\n", + " height: 32px;\n", + " padding: 0 0 0 0;\n", + " width: 32px;\n", + " }\n", + "\n", + " .colab-df-convert:hover {\n", + " background-color: #E2EBFA;\n", + " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", + " fill: #174EA6;\n", + " }\n", + "\n", + " [theme=dark] .colab-df-convert {\n", + " background-color: #3B4455;\n", + " fill: #D2E3FC;\n", + " }\n", + "\n", + " [theme=dark] .colab-df-convert:hover {\n", + " background-color: #434B5C;\n", + " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", + " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", + " fill: #FFFFFF;\n", + " }\n", + " </style>\n", + "\n", + " <script>\n", + " const buttonEl =\n", + " document.querySelector('#df-8f364658-3db9-470e-996d-116ea1062db3 button.colab-df-convert');\n", + " buttonEl.style.display =\n", + " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", + "\n", + " async function convertToInteractive(key) {\n", + " const element = document.querySelector('#df-8f364658-3db9-470e-996d-116ea1062db3');\n", + " const dataTable =\n", + " await google.colab.kernel.invokeFunction('convertToInteractive',\n", + " [key], {});\n", + " if (!dataTable) return;\n", + "\n", + " const docLinkHtml = 'Like what you see? Visit the ' +\n", + " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n", + " + ' to learn more about interactive tables.';\n", + " element.innerHTML = '';\n", + " dataTable['output_type'] = 'display_data';\n", + " await google.colab.output.renderOutput(dataTable, element);\n", + " const docLink = document.createElement('div');\n", + " docLink.innerHTML = docLinkHtml;\n", + " element.appendChild(docLink);\n", + " }\n", + " </script>\n", + " </div>\n", + " </div>\n", + " " + ] + }, + "metadata": {}, + "execution_count": 4 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "CwwVWCjQUZY5" + }, + "source": [ + "y=dataframe.iloc[:,1].values" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ieklOnmvU51x", + "outputId": "33223f33-031e-4ca8-9bfe-15bb0c626e7b" + }, + "source": [ + "y" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array(['M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M',\n", + " 'M', 'M', 'M', 'M', 'M', 'M', 'B', 'B', 'B', 'M', 'M', 'M', 'M',\n", + " 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'B', 'M',\n", + " 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'B', 'M', 'B', 'B', 'B', 'B',\n", + " 'B', 'M', 'M', 'B', 'M', 'M', 'B', 'B', 'B', 'B', 'M', 'B', 'M',\n", + " 'M', 'B', 'B', 'B', 'B', 'M', 'B', 'M', 'M', 'B', 'M', 'B', 'M',\n", + " 'M', 'B', 'B', 'B', 'M', 'M', 'B', 'M', 'M', 'M', 'B', 'B', 'B',\n", + " 'M', 'B', 'B', 'M', 'M', 'B', 'B', 'B', 'M', 'M', 'B', 'B', 'B',\n", + " 'B', 'M', 'B', 'B', 'M', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B',\n", + " 'M', 'M', 'M', 'B', 'M', 'M', 'B', 'B', 'B', 'M', 'M', 'B', 'M',\n", + " 'B', 'M', 'M', 'B', 'M', 'M', 'B', 'B', 'M', 'B', 'B', 'M', 'B',\n", + " 'B', 'B', 'B', 'M', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B',\n", + " 'M', 'B', 'B', 'B', 'B', 'M', 'M', 'B', 'M', 'B', 'B', 'M', 'M',\n", + " 'B', 'B', 'M', 'M', 'B', 'B', 'B', 'B', 'M', 'B', 'B', 'M', 'M',\n", + " 'M', 'B', 'M', 'B', 'M', 'B', 'B', 'B', 'M', 'B', 'B', 'M', 'M',\n", + " 'B', 'M', 'M', 'M', 'M', 'B', 'M', 'M', 'M', 'B', 'M', 'B', 'M',\n", + " 'B', 'B', 'M', 'B', 'M', 'M', 'M', 'M', 'B', 'B', 'M', 'M', 'B',\n", + " 'B', 'B', 'M', 'B', 'B', 'B', 'B', 'B', 'M', 'M', 'B', 'B', 'M',\n", + " 'B', 'B', 'M', 'M', 'B', 'M', 'B', 'B', 'B', 'B', 'M', 'B', 'B',\n", + " 'B', 'B', 'B', 'M', 'B', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M',\n", + " 'M', 'M', 'M', 'M', 'M', 'M', 'B', 'B', 'B', 'B', 'B', 'B', 'M',\n", + " 'B', 'M', 'B', 'B', 'M', 'B', 'B', 'M', 'B', 'M', 'M', 'B', 'B',\n", + " 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'M', 'B',\n", + " 'B', 'M', 'B', 'M', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B',\n", + " 'B', 'B', 'B', 'B', 'B', 'M', 'B', 'B', 'B', 'M', 'B', 'M', 'B',\n", + " 'B', 'B', 'B', 'M', 'M', 'M', 'B', 'B', 'B', 'B', 'M', 'B', 'M',\n", + " 'B', 'M', 'B', 'B', 'B', 'M', 'B', 'B', 'B', 'B', 'B', 'B', 'B',\n", + " 'M', 'M', 'M', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B',\n", + " 'B', 'M', 'M', 'B', 'M', 'M', 'M', 'B', 'M', 'M', 'B', 'B', 'B',\n", + " 'B', 'B', 'M', 'B', 'B', 'B', 'B', 'B', 'M', 'B', 'B', 'B', 'M',\n", + " 'B', 'B', 'M', 'M', 'B', 'B', 'B', 'B', 'B', 'B', 'M', 'B', 'B',\n", + " 'B', 'B', 'B', 'B', 'B', 'M', 'B', 'B', 'B', 'B', 'B', 'M', 'B',\n", + " 'B', 'M', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B',\n", + " 'B', 'M', 'B', 'M', 'M', 'B', 'M', 'B', 'B', 'B', 'B', 'B', 'M',\n", + " 'B', 'B', 'M', 'B', 'M', 'B', 'B', 'M', 'B', 'M', 'B', 'B', 'B',\n", + " 'B', 'B', 'B', 'B', 'B', 'M', 'M', 'B', 'B', 'B', 'B', 'B', 'B',\n", + " 'M', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'M', 'B',\n", + " 'B', 'B', 'B', 'B', 'B', 'B', 'M', 'B', 'M', 'B', 'B', 'M', 'B',\n", + " 'B', 'B', 'B', 'B', 'M', 'M', 'B', 'M', 'B', 'M', 'B', 'B', 'B',\n", + " 'B', 'B', 'M', 'B', 'B', 'M', 'B', 'M', 'B', 'M', 'M', 'B', 'B',\n", + " 'B', 'M', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B',\n", + " 'M', 'B', 'M', 'M', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B',\n", + " 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B',\n", + " 'B', 'B', 'B', 'M', 'M', 'M', 'M', 'M', 'M', 'B'], dtype=object)" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "BNuU_gjHU-Bt", + "outputId": "f4d70885-7a36-431f-f0e7-45e6bebf98fa" + }, + "source": [ + "y.shape" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(569,)" + ] + }, + "metadata": {}, + "execution_count": 7 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "qte2sVrCVB8r" + }, + "source": [ + "x=dataframe.iloc[:,2:].values" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ebU7UaAWVMCY", + "outputId": "9e18aa72-abc2-43cb-9003-100d61cb9997" + }, + "source": [ + "x.shape" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(569, 31)" + ] + }, + "metadata": {}, + "execution_count": 9 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "ROqOCf_brvBI", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "b6b0aebb-5bed-4e6e-8cae-30ca2d8f32f3" + }, + "source": [ + "import numpy as np\n", + "from sklearn.impute import SimpleImputer\n", + "imputer=SimpleImputer(missing_values=np.nan,strategy='mean')\n", + "imputer.fit(x)\n", + "x=imputer.transform(x)\n", + "print(x)\n", + "#for fitting some values in place of empty(null) values" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[[1.799e+01 1.038e+01 1.228e+02 ... 2.654e-01 4.601e-01 1.189e-01]\n", + " [2.057e+01 1.777e+01 1.329e+02 ... 1.860e-01 2.750e-01 8.902e-02]\n", + " [1.969e+01 2.125e+01 1.300e+02 ... 2.430e-01 3.613e-01 8.758e-02]\n", + " ...\n", + " [1.660e+01 2.808e+01 1.083e+02 ... 1.418e-01 2.218e-01 7.820e-02]\n", + " [2.060e+01 2.933e+01 1.401e+02 ... 2.650e-01 4.087e-01 1.240e-01]\n", + " [7.760e+00 2.454e+01 4.792e+01 ... 0.000e+00 2.871e-01 7.039e-02]]\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "8Y6u4MLbbZ8y", + "outputId": "c1441ed2-64bd-4fa7-8248-ed1cc14b1068" + }, + "source": [ + "from sklearn.preprocessing import LabelEncoder\n", + "le = LabelEncoder()\n", + "#label encoder encodes the two types of cancer into 0(maybe not cancerous) and 1(the other case)\n", + "y=le.fit_transform(y)\n", + "print(y)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n", + " 0 1 1 1 1 1 1 1 1 0 1 0 0 0 0 0 1 1 0 1 1 0 0 0 0 1 0 1 1 0 0 0 0 1 0 1 1\n", + " 0 1 0 1 1 0 0 0 1 1 0 1 1 1 0 0 0 1 0 0 1 1 0 0 0 1 1 0 0 0 0 1 0 0 1 0 0\n", + " 0 0 0 0 0 0 1 1 1 0 1 1 0 0 0 1 1 0 1 0 1 1 0 1 1 0 0 1 0 0 1 0 0 0 0 1 0\n", + " 0 0 0 0 0 0 0 0 1 0 0 0 0 1 1 0 1 0 0 1 1 0 0 1 1 0 0 0 0 1 0 0 1 1 1 0 1\n", + " 0 1 0 0 0 1 0 0 1 1 0 1 1 1 1 0 1 1 1 0 1 0 1 0 0 1 0 1 1 1 1 0 0 1 1 0 0\n", + " 0 1 0 0 0 0 0 1 1 0 0 1 0 0 1 1 0 1 0 0 0 0 1 0 0 0 0 0 1 0 1 1 1 1 1 1 1\n", + " 1 1 1 1 1 1 1 0 0 0 0 0 0 1 0 1 0 0 1 0 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 1 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 1 0 0 0 0 1 1 1 0 0\n", + " 0 0 1 0 1 0 1 0 0 0 1 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 1\n", + " 1 0 1 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 1 0 0 1 1 0 0 0 0 0 0 1 0 0 0 0 0 0\n", + " 0 1 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 0 1 0 0 0 0 0 1 0 0\n", + " 1 0 1 0 0 1 0 1 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0\n", + " 0 0 0 0 0 0 1 0 1 0 0 1 0 0 0 0 0 1 1 0 1 0 1 0 0 0 0 0 1 0 0 1 0 1 0 1 1\n", + " 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " 0 0 0 0 0 0 0 1 1 1 1 1 1 0]\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "laUz8Qr-cbJT" + }, + "source": [ + "#now we have to divide data into 2 parts : one for training and one for testing\n", + "from sklearn.model_selection import train_test_split #****\n", + "x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=1)#**\n", + "#test_size = 0.2 means 20% data will be used for testing" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "25ZqM-I-eNgN" + }, + "source": [ + "#test me transform lagega hamesha fit transform nhi lagega\n", + "from sklearn.preprocessing import StandardScaler\n", + "sc = StandardScaler()\n", + "x_train = sc.fit_transform(x_train)\n", + "x_test = sc.transform(x_test)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "F0MxORDbffqY", + "outputId": "937e4d50-38f6-4fac-b6fb-7efa65393ddc" + }, + "source": [ + "from sklearn.svm import SVC\n", + "classifier = SVC(kernel='linear',random_state=0)\n", + "classifier.fit(x_train,y_train)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "SVC(kernel='linear', random_state=0)" + ] + }, + "metadata": {}, + "execution_count": 14 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "kw-QAwE4gx2A" + }, + "source": [ + "y_pred = classifier.predict(x_test)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "sO6ztX1Ag8co", + "outputId": "0230dee0-93c3-45ac-a885-c47848263392" + }, + "source": [ + "from sklearn.metrics import confusion_matrix,accuracy_score\n", + "cm = confusion_matrix(y_test,y_pred)\n", + "print(cm)\n", + "print(\"\\nAccuracy for Linear Regression: \")\n", + "accuracy_score(y_test,y_pred)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[[71 1]\n", + " [ 3 39]]\n", + "\n", + "Accuracy for Linear Regression: \n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0.9649122807017544" + ] + }, + "metadata": {}, + "execution_count": 16 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "23WYACaOi4BA", + "outputId": "379578a5-866b-4849-ad89-a49dcaf76b41" + }, + "source": [ + "from sklearn.linear_model import LogisticRegression\n", + "classifier2 = LogisticRegression(random_state = 0)\n", + "classifier2.fit(x_train, y_train)\n", + "y_pred = classifier2.predict(x_test)\n", + "cm = confusion_matrix(y_test, y_pred)\n", + "print(cm)\n", + "print(\"\\nAccuracy for Logistic Regression: \")\n", + "accuracy_score(y_test,y_pred)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[[71 1]\n", + " [ 2 40]]\n", + "\n", + "Accuracy for Logistic Regression: \n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0.9736842105263158" + ] + }, + "metadata": {}, + "execution_count": 17 + } + ] + }, + { + "cell_type": "code", + "source": [ + "from sklearn.ensemble import RandomForestClassifier\n", + "classifier3 = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 0)\n", + "classifier3.fit(x_train, y_train)\n", + "y_pred = classifier3.predict(x_test)\n", + "cm = confusion_matrix(y_test, y_pred)\n", + "print(cm)\n", + "print(\"\\nAccuracy for Random Forest: \")\n", + "accuracy_score(y_test,y_pred)\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "o1QOzVD9wyf2", + "outputId": "4302d5a7-373f-457c-f8a3-ab30a7c35d05" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[[71 1]\n", + " [ 5 37]]\n", + "\n", + "Accuracy for Random Forest: \n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0.9473684210526315" + ] + }, + "metadata": {}, + "execution_count": 18 + } + ] + }, + { + "cell_type": "code", + "source": [ + "from sklearn.tree import DecisionTreeClassifier\n", + "classifier4 = DecisionTreeClassifier(criterion = 'entropy', random_state = 0)\n", + "classifier4.fit(x_train, y_train)\n", + "y_pred = classifier4.predict(x_test)\n", + "cm = confusion_matrix(y_test, y_pred)\n", + "print(cm)\n", + "print(\"\\nAccuracy for Decision Tree: \")\n", + "accuracy_score(y_test,y_pred)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "en-CZhPGxam5", + "outputId": "c4384355-485c-4f23-813c-0da9c6558996" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[[72 0]\n", + " [ 6 36]]\n", + "\n", + "Accuracy for Decision Tree: \n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0.9473684210526315" + ] + }, + "metadata": {}, + "execution_count": 19 + } + ] + }, + { + "cell_type": "code", + "source": [ + "from sklearn.naive_bayes import GaussianNB\n", + "classifier5 = GaussianNB()\n", + "classifier5.fit(x_train, y_train)\n", + "y_pred = classifier5.predict(x_test)\n", + "cm = confusion_matrix(y_test, y_pred)\n", + "print(cm)\n", + "print(\"\\nAccuracy for Naïve Bayes Algorithm: \")\n", + "accuracy_score(y_test,y_pred)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "fUiNRY8txyXm", + "outputId": "b65b2218-0c6f-4ce8-b292-924a32432b59" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[[70 2]\n", + " [ 4 38]]\n", + "\n", + "Accuracy for Naïve Bayes Algorithm: \n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0.9473684210526315" + ] + }, + "metadata": {}, + "execution_count": 20 + } + ] + }, + { + "cell_type": "code", + "source": [ + "from sklearn.svm import SVC\n", + "classifier6 = SVC(kernel = 'rbf', random_state = 0)\n", + "classifier6.fit(x_train, y_train)\n", + "y_pred = classifier6.predict(x_test)\n", + "cm = confusion_matrix(y_test, y_pred)\n", + "print(cm)\n", + "print(\"\\nAccuracy Using Kernel SVM Algorithm: \")\n", + "accuracy_score(y_test,y_pred)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "yWc_DAMs2l8f", + "outputId": "d98004c7-04d8-4c77-8b2b-e3d87e1ba85b" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[[71 1]\n", + " [ 2 40]]\n", + "\n", + "Accuracy Using Kernel SVM Algorithm: \n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0.9736842105263158" + ] + }, + "metadata": {}, + "execution_count": 21 + } + ] + }, + { + "cell_type": "code", + "source": [ + "from sklearn.svm import SVC\n", + "classifier7 = SVC(kernel = 'linear', random_state = 0)\n", + "classifier7.fit(x_train, y_train)\n", + "y_pred = classifier7.predict(x_test)\n", + "cm = confusion_matrix(y_test, y_pred)\n", + "print(cm)\n", + "print(\"\\nAccuracy Using Support Vector Machine Algorithm: \")\n", + "accuracy_score(y_test,y_pred)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "pEjg9nww4H7e", + "outputId": "456d388a-b49d-476b-ac96-2ee6477bfd83" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[[71 1]\n", + " [ 3 39]]\n", + "\n", + "Accuracy Using Support Vector Machine Algorithm: \n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0.9649122807017544" + ] + }, + "metadata": {}, + "execution_count": 22 + } + ] + }, + { + "cell_type": "code", + "source": [ + "from sklearn.neighbors import KNeighborsClassifier\n", + "classifier8 = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)\n", + "classifier8.fit(x_train, y_train)\n", + "y_pred = classifier8.predict(x_test)\n", + "cm = confusion_matrix(y_test, y_pred)\n", + "print(cm)\n", + "print(\"\\nAccuracy for KNN: \")\n", + "accuracy_score(y_test,y_pred)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "WAzcXMxs4S3Z", + "outputId": "d61023d5-4f79-4422-cf0d-985839d678cd" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[[72 0]\n", + " [ 5 37]]\n", + "\n", + "Accuracy for KNN: \n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0.956140350877193" + ] + }, + "metadata": {}, + "execution_count": 23 + } + ] + }, + { + "cell_type": "code", + "source": [ + "modelxyz = SVC(kernel=\"rbf\",C=30,gamma='auto')\n", + "modelxyz.fit(x_train,y_train)\n", + "modelxyz.score(x_test,y_test)" + ], + "metadata": { + "id": "A2mtxgjL4qXG", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "aea9ce9f-b74e-428d-d358-d01ca224c9ef" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0.9649122807017544" + ] + }, + "metadata": {}, + "execution_count": 27 + } + ] + }, + { + "cell_type": "code", + "source": [ + "from sklearn.model_selection import GridSearchCV\n", + "\n", + "clf = GridSearchCV(SVC(gamma='auto'),{\n", + " 'C': [1,10,20,30,40,50,60,70,80],\n", + " 'kernel': ['rbf','linear']\n", + "}, cv=5, return_train_score=False)\n", + "\n", + "clf.fit(x,y)\n", + "clf.cv_results_" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "rQZw3TEhaqJx", + "outputId": "dfc58c4a-649e-4faf-b4fe-24a0772bc1a9" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "{'mean_fit_time': array([0.02990212, 1.5451304 , 0.0216116 , 2.67975316, 0.01789918,\n", + " 4.38982725, 0.01766524, 4.13524318, 0.01783051, 5.60923376,\n", + " 0.01791935, 5.72623005, 0.01807461, 7.97815323, 0.01971617,\n", + " 7.93726101, 0.01765423, 7.63122673]),\n", + " 'mean_score_time': array([0.01029086, 0.00098829, 0.00586171, 0.00078259, 0.0051774 ,\n", + " 0.00076194, 0.0051949 , 0.0007658 , 0.00510898, 0.00078254,\n", + " 0.00536723, 0.00080252, 0.00508952, 0.00076923, 0.00572472,\n", + " 0.0007926 , 0.00502954, 0.00088406]),\n", + " 'mean_test_score': array([0.6274181 , 0.94553641, 0.6274181 , 0.95081509, 0.6274181 ,\n", + " 0.95081509, 0.6274181 , 0.952585 , 0.6274181 , 0.95784816,\n", + " 0.6274181 , 0.95256948, 0.6274181 , 0.95432386, 0.6274181 ,\n", + " 0.95432386, 0.6274181 , 0.95255395]),\n", + " 'param_C': masked_array(data=[1, 1, 10, 10, 20, 20, 30, 30, 40, 40, 50, 50, 60, 60,\n", + " 70, 70, 80, 80],\n", + " mask=[False, False, False, False, False, False, False, False,\n", + " False, False, False, False, False, False, False, False,\n", + " False, False],\n", + " fill_value='?',\n", + " dtype=object),\n", + " 'param_kernel': masked_array(data=['rbf', 'linear', 'rbf', 'linear', 'rbf', 'linear',\n", + " 'rbf', 'linear', 'rbf', 'linear', 'rbf', 'linear',\n", + " 'rbf', 'linear', 'rbf', 'linear', 'rbf', 'linear'],\n", + " mask=[False, False, False, False, False, False, False, False,\n", + " False, False, False, False, False, False, False, False,\n", + " False, False],\n", + " fill_value='?',\n", + " dtype=object),\n", + " 'params': [{'C': 1, 'kernel': 'rbf'},\n", + " {'C': 1, 'kernel': 'linear'},\n", + " {'C': 10, 'kernel': 'rbf'},\n", + " {'C': 10, 'kernel': 'linear'},\n", + " {'C': 20, 'kernel': 'rbf'},\n", + " {'C': 20, 'kernel': 'linear'},\n", + " {'C': 30, 'kernel': 'rbf'},\n", + " {'C': 30, 'kernel': 'linear'},\n", + " {'C': 40, 'kernel': 'rbf'},\n", + " {'C': 40, 'kernel': 'linear'},\n", + " {'C': 50, 'kernel': 'rbf'},\n", + " {'C': 50, 'kernel': 'linear'},\n", + " {'C': 60, 'kernel': 'rbf'},\n", + " {'C': 60, 'kernel': 'linear'},\n", + " {'C': 70, 'kernel': 'rbf'},\n", + " {'C': 70, 'kernel': 'linear'},\n", + " {'C': 80, 'kernel': 'rbf'},\n", + " {'C': 80, 'kernel': 'linear'}],\n", + " 'rank_test_score': array([10, 9, 10, 7, 10, 7, 10, 4, 10, 1, 10, 5, 10, 2, 10, 2, 10,\n", + " 6], dtype=int32),\n", + " 'split0_test_score': array([0.62280702, 0.94736842, 0.62280702, 0.93859649, 0.62280702,\n", + " 0.93859649, 0.62280702, 0.92982456, 0.62280702, 0.93859649,\n", + " 0.62280702, 0.92982456, 0.62280702, 0.92982456, 0.62280702,\n", + " 0.92982456, 0.62280702, 0.92982456]),\n", + " 'split1_test_score': array([0.62280702, 0.92982456, 0.62280702, 0.93859649, 0.62280702,\n", + " 0.93859649, 0.62280702, 0.93859649, 0.62280702, 0.95614035,\n", + " 0.62280702, 0.94736842, 0.62280702, 0.94736842, 0.62280702,\n", + " 0.94736842, 0.62280702, 0.94736842]),\n", + " 'split2_test_score': array([0.63157895, 0.97368421, 0.63157895, 0.97368421, 0.63157895,\n", + " 0.97368421, 0.63157895, 0.97368421, 0.63157895, 0.97368421,\n", + " 0.63157895, 0.97368421, 0.63157895, 0.97368421, 0.63157895,\n", + " 0.97368421, 0.63157895, 0.97368421]),\n", + " 'split3_test_score': array([0.63157895, 0.92105263, 0.63157895, 0.93859649, 0.63157895,\n", + " 0.93859649, 0.63157895, 0.94736842, 0.63157895, 0.94736842,\n", + " 0.63157895, 0.94736842, 0.63157895, 0.95614035, 0.63157895,\n", + " 0.95614035, 0.63157895, 0.95614035]),\n", + " 'split4_test_score': array([0.62831858, 0.95575221, 0.62831858, 0.96460177, 0.62831858,\n", + " 0.96460177, 0.62831858, 0.97345133, 0.62831858, 0.97345133,\n", + " 0.62831858, 0.96460177, 0.62831858, 0.96460177, 0.62831858,\n", + " 0.96460177, 0.62831858, 0.95575221]),\n", + " 'std_fit_time': array([1.43591318e-03, 3.63438763e-01, 3.71925596e-03, 3.38731073e-01,\n", + " 2.48843473e-04, 9.59890076e-01, 2.15279731e-04, 6.15422987e-01,\n", + " 4.13181987e-04, 1.65625532e+00, 4.22119765e-04, 1.29921533e+00,\n", + " 7.47276195e-04, 4.14420744e+00, 9.65537489e-04, 2.64174946e+00,\n", + " 2.96906377e-04, 4.89833929e+00]),\n", + " 'std_score_time': array([2.75344883e-03, 1.10914763e-04, 1.18816508e-03, 2.14473912e-05,\n", + " 1.28012137e-04, 2.27038591e-05, 1.44955986e-04, 1.75246576e-05,\n", + " 8.62766442e-05, 1.56946805e-05, 4.56816030e-04, 9.76288420e-05,\n", + " 9.81799487e-05, 2.87314074e-05, 3.56127912e-04, 6.51490448e-05,\n", + " 3.04853746e-05, 1.99582014e-04]),\n", + " 'std_test_score': array([0.00394868, 0.01868869, 0.00394868, 0.01523779, 0.00394868,\n", + " 0.01523779, 0.00394868, 0.01800838, 0.00394868, 0.0139829 ,\n", + " 0.00394868, 0.01524494, 0.00394868, 0.01504893, 0.00394868,\n", + " 0.01504893, 0.00394868, 0.01423442])}" + ] + }, + "metadata": {}, + "execution_count": 43 + } + ] + }, + { + "cell_type": "code", + "source": [ + "df=pd.DataFrame(clf.cv_results_)\n", + "df" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "sS22-FJ-bfM6", + "outputId": "bcb8946f-92b4-4349-e8ef-55925273f8ef" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " mean_fit_time std_fit_time mean_score_time std_score_time param_C \\\n", + "0 0.029902 0.001436 0.010291 0.002753 1 \n", + "1 1.545130 0.363439 0.000988 0.000111 1 \n", + "2 0.021612 0.003719 0.005862 0.001188 10 \n", + "3 2.679753 0.338731 0.000783 0.000021 10 \n", + "4 0.017899 0.000249 0.005177 0.000128 20 \n", + "5 4.389827 0.959890 0.000762 0.000023 20 \n", + "6 0.017665 0.000215 0.005195 0.000145 30 \n", + "7 4.135243 0.615423 0.000766 0.000018 30 \n", + "8 0.017831 0.000413 0.005109 0.000086 40 \n", + "9 5.609234 1.656255 0.000783 0.000016 40 \n", + "10 0.017919 0.000422 0.005367 0.000457 50 \n", + "11 5.726230 1.299215 0.000803 0.000098 50 \n", + "12 0.018075 0.000747 0.005090 0.000098 60 \n", + "13 7.978153 4.144207 0.000769 0.000029 60 \n", + "14 0.019716 0.000966 0.005725 0.000356 70 \n", + "15 7.937261 2.641749 0.000793 0.000065 70 \n", + "16 0.017654 0.000297 0.005030 0.000030 80 \n", + "17 7.631227 4.898339 0.000884 0.000200 80 \n", + "\n", + " param_kernel params split0_test_score \\\n", + "0 rbf {'C': 1, 'kernel': 'rbf'} 0.622807 \n", + "1 linear {'C': 1, 'kernel': 'linear'} 0.947368 \n", + "2 rbf {'C': 10, 'kernel': 'rbf'} 0.622807 \n", + "3 linear {'C': 10, 'kernel': 'linear'} 0.938596 \n", + "4 rbf {'C': 20, 'kernel': 'rbf'} 0.622807 \n", + "5 linear {'C': 20, 'kernel': 'linear'} 0.938596 \n", + "6 rbf {'C': 30, 'kernel': 'rbf'} 0.622807 \n", + "7 linear {'C': 30, 'kernel': 'linear'} 0.929825 \n", + "8 rbf {'C': 40, 'kernel': 'rbf'} 0.622807 \n", + "9 linear {'C': 40, 'kernel': 'linear'} 0.938596 \n", + "10 rbf {'C': 50, 'kernel': 'rbf'} 0.622807 \n", + "11 linear {'C': 50, 'kernel': 'linear'} 0.929825 \n", + "12 rbf {'C': 60, 'kernel': 'rbf'} 0.622807 \n", + "13 linear {'C': 60, 'kernel': 'linear'} 0.929825 \n", + "14 rbf {'C': 70, 'kernel': 'rbf'} 0.622807 \n", + "15 linear {'C': 70, 'kernel': 'linear'} 0.929825 \n", + "16 rbf {'C': 80, 'kernel': 'rbf'} 0.622807 \n", + "17 linear {'C': 80, 'kernel': 'linear'} 0.929825 \n", + "\n", + " split1_test_score split2_test_score split3_test_score \\\n", + "0 0.622807 0.631579 0.631579 \n", + "1 0.929825 0.973684 0.921053 \n", + "2 0.622807 0.631579 0.631579 \n", + "3 0.938596 0.973684 0.938596 \n", + "4 0.622807 0.631579 0.631579 \n", + "5 0.938596 0.973684 0.938596 \n", + "6 0.622807 0.631579 0.631579 \n", + "7 0.938596 0.973684 0.947368 \n", + "8 0.622807 0.631579 0.631579 \n", + "9 0.956140 0.973684 0.947368 \n", + "10 0.622807 0.631579 0.631579 \n", + "11 0.947368 0.973684 0.947368 \n", + "12 0.622807 0.631579 0.631579 \n", + "13 0.947368 0.973684 0.956140 \n", + "14 0.622807 0.631579 0.631579 \n", + "15 0.947368 0.973684 0.956140 \n", + "16 0.622807 0.631579 0.631579 \n", + "17 0.947368 0.973684 0.956140 \n", + "\n", + " split4_test_score mean_test_score std_test_score rank_test_score \n", + "0 0.628319 0.627418 0.003949 10 \n", + "1 0.955752 0.945536 0.018689 9 \n", + "2 0.628319 0.627418 0.003949 10 \n", + "3 0.964602 0.950815 0.015238 7 \n", + "4 0.628319 0.627418 0.003949 10 \n", + "5 0.964602 0.950815 0.015238 7 \n", + "6 0.628319 0.627418 0.003949 10 \n", + "7 0.973451 0.952585 0.018008 4 \n", + "8 0.628319 0.627418 0.003949 10 \n", + "9 0.973451 0.957848 0.013983 1 \n", + "10 0.628319 0.627418 0.003949 10 \n", + "11 0.964602 0.952569 0.015245 5 \n", + "12 0.628319 0.627418 0.003949 10 \n", + "13 0.964602 0.954324 0.015049 2 \n", + "14 0.628319 0.627418 0.003949 10 \n", + "15 0.964602 0.954324 0.015049 2 \n", + "16 0.628319 0.627418 0.003949 10 \n", + "17 0.955752 0.952554 0.014234 6 " + ], + "text/html": [ + "\n", + " <div id=\"df-274de024-a912-4181-8729-6ddd37cac5fe\">\n", + " <div class=\"colab-df-container\">\n", + " <div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>mean_fit_time</th>\n", + " <th>std_fit_time</th>\n", + " <th>mean_score_time</th>\n", + " <th>std_score_time</th>\n", + " <th>param_C</th>\n", + " <th>param_kernel</th>\n", + " <th>params</th>\n", + " <th>split0_test_score</th>\n", + " <th>split1_test_score</th>\n", + " <th>split2_test_score</th>\n", + " <th>split3_test_score</th>\n", + " <th>split4_test_score</th>\n", + " <th>mean_test_score</th>\n", + " <th>std_test_score</th>\n", + " <th>rank_test_score</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>0.029902</td>\n", + " <td>0.001436</td>\n", + " <td>0.010291</td>\n", + " <td>0.002753</td>\n", + " <td>1</td>\n", + " <td>rbf</td>\n", + " <td>{'C': 1, 'kernel': 'rbf'}</td>\n", + " <td>0.622807</td>\n", + " <td>0.622807</td>\n", + " <td>0.631579</td>\n", + " <td>0.631579</td>\n", + " <td>0.628319</td>\n", + " <td>0.627418</td>\n", + " <td>0.003949</td>\n", + " <td>10</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>1.545130</td>\n", + " <td>0.363439</td>\n", + " <td>0.000988</td>\n", + " <td>0.000111</td>\n", + " <td>1</td>\n", + " <td>linear</td>\n", + " <td>{'C': 1, 'kernel': 'linear'}</td>\n", + " <td>0.947368</td>\n", + " <td>0.929825</td>\n", + " <td>0.973684</td>\n", + " <td>0.921053</td>\n", + " <td>0.955752</td>\n", + " <td>0.945536</td>\n", + " <td>0.018689</td>\n", + " <td>9</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>0.021612</td>\n", + " <td>0.003719</td>\n", + " <td>0.005862</td>\n", + " <td>0.001188</td>\n", + " <td>10</td>\n", + " <td>rbf</td>\n", + " <td>{'C': 10, 'kernel': 'rbf'}</td>\n", + " <td>0.622807</td>\n", + " <td>0.622807</td>\n", + " <td>0.631579</td>\n", + " <td>0.631579</td>\n", + " <td>0.628319</td>\n", + " <td>0.627418</td>\n", + " <td>0.003949</td>\n", + " <td>10</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>2.679753</td>\n", + " <td>0.338731</td>\n", + " <td>0.000783</td>\n", + " <td>0.000021</td>\n", + " <td>10</td>\n", + " <td>linear</td>\n", + " <td>{'C': 10, 'kernel': 'linear'}</td>\n", + " <td>0.938596</td>\n", + " <td>0.938596</td>\n", + " <td>0.973684</td>\n", + " <td>0.938596</td>\n", + " <td>0.964602</td>\n", + " <td>0.950815</td>\n", + " <td>0.015238</td>\n", + " <td>7</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>0.017899</td>\n", + " <td>0.000249</td>\n", + " <td>0.005177</td>\n", + " <td>0.000128</td>\n", + " <td>20</td>\n", + " <td>rbf</td>\n", + " <td>{'C': 20, 'kernel': 'rbf'}</td>\n", + " <td>0.622807</td>\n", + " <td>0.622807</td>\n", + " <td>0.631579</td>\n", + " <td>0.631579</td>\n", + " <td>0.628319</td>\n", + " <td>0.627418</td>\n", + " <td>0.003949</td>\n", + " <td>10</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>4.389827</td>\n", + " <td>0.959890</td>\n", + " <td>0.000762</td>\n", + " <td>0.000023</td>\n", + " <td>20</td>\n", + " <td>linear</td>\n", + " <td>{'C': 20, 'kernel': 'linear'}</td>\n", + " <td>0.938596</td>\n", + " <td>0.938596</td>\n", + " <td>0.973684</td>\n", + " <td>0.938596</td>\n", + " <td>0.964602</td>\n", + " <td>0.950815</td>\n", + " <td>0.015238</td>\n", + " <td>7</td>\n", + " </tr>\n", + " <tr>\n", + " <th>6</th>\n", + " <td>0.017665</td>\n", + " <td>0.000215</td>\n", + " <td>0.005195</td>\n", + " <td>0.000145</td>\n", + " <td>30</td>\n", + " <td>rbf</td>\n", + " <td>{'C': 30, 'kernel': 'rbf'}</td>\n", + " <td>0.622807</td>\n", + " <td>0.622807</td>\n", + " <td>0.631579</td>\n", + " <td>0.631579</td>\n", + " <td>0.628319</td>\n", + " <td>0.627418</td>\n", + " <td>0.003949</td>\n", + " <td>10</td>\n", + " </tr>\n", + " <tr>\n", + " <th>7</th>\n", + " <td>4.135243</td>\n", + " <td>0.615423</td>\n", + " <td>0.000766</td>\n", + " <td>0.000018</td>\n", + " <td>30</td>\n", + " <td>linear</td>\n", + " <td>{'C': 30, 'kernel': 'linear'}</td>\n", + " <td>0.929825</td>\n", + " <td>0.938596</td>\n", + " <td>0.973684</td>\n", + " <td>0.947368</td>\n", + " <td>0.973451</td>\n", + " <td>0.952585</td>\n", + " <td>0.018008</td>\n", + " <td>4</td>\n", + " </tr>\n", + " <tr>\n", + " <th>8</th>\n", + " <td>0.017831</td>\n", + " <td>0.000413</td>\n", + " <td>0.005109</td>\n", + " <td>0.000086</td>\n", + " <td>40</td>\n", + " <td>rbf</td>\n", + " <td>{'C': 40, 'kernel': 'rbf'}</td>\n", + " <td>0.622807</td>\n", + " <td>0.622807</td>\n", + " <td>0.631579</td>\n", + " <td>0.631579</td>\n", + " <td>0.628319</td>\n", + " <td>0.627418</td>\n", + " <td>0.003949</td>\n", + " <td>10</td>\n", + " </tr>\n", + " <tr>\n", + " <th>9</th>\n", + " <td>5.609234</td>\n", + " <td>1.656255</td>\n", + " <td>0.000783</td>\n", + " <td>0.000016</td>\n", + " <td>40</td>\n", + " <td>linear</td>\n", + " <td>{'C': 40, 'kernel': 'linear'}</td>\n", + " <td>0.938596</td>\n", + " <td>0.956140</td>\n", + " <td>0.973684</td>\n", + " <td>0.947368</td>\n", + " <td>0.973451</td>\n", + " <td>0.957848</td>\n", + " <td>0.013983</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>10</th>\n", + " <td>0.017919</td>\n", + " <td>0.000422</td>\n", + " <td>0.005367</td>\n", + " <td>0.000457</td>\n", + " <td>50</td>\n", + " <td>rbf</td>\n", + " <td>{'C': 50, 'kernel': 'rbf'}</td>\n", + " <td>0.622807</td>\n", + " <td>0.622807</td>\n", + " <td>0.631579</td>\n", + " <td>0.631579</td>\n", + " <td>0.628319</td>\n", + " <td>0.627418</td>\n", + " <td>0.003949</td>\n", + " <td>10</td>\n", + " </tr>\n", + " <tr>\n", + " <th>11</th>\n", + " <td>5.726230</td>\n", + " <td>1.299215</td>\n", + " <td>0.000803</td>\n", + " <td>0.000098</td>\n", + " <td>50</td>\n", + " <td>linear</td>\n", + " <td>{'C': 50, 'kernel': 'linear'}</td>\n", + " <td>0.929825</td>\n", + " <td>0.947368</td>\n", + " <td>0.973684</td>\n", + " <td>0.947368</td>\n", + " <td>0.964602</td>\n", + " <td>0.952569</td>\n", + " <td>0.015245</td>\n", + " <td>5</td>\n", + " </tr>\n", + " <tr>\n", + " <th>12</th>\n", + " <td>0.018075</td>\n", + " <td>0.000747</td>\n", + " <td>0.005090</td>\n", + " <td>0.000098</td>\n", + " <td>60</td>\n", + " <td>rbf</td>\n", + " <td>{'C': 60, 'kernel': 'rbf'}</td>\n", + " <td>0.622807</td>\n", + " <td>0.622807</td>\n", + " <td>0.631579</td>\n", + " <td>0.631579</td>\n", + " <td>0.628319</td>\n", + " <td>0.627418</td>\n", + " <td>0.003949</td>\n", + " <td>10</td>\n", + " </tr>\n", + " <tr>\n", + " <th>13</th>\n", + " <td>7.978153</td>\n", + " <td>4.144207</td>\n", + " <td>0.000769</td>\n", + " <td>0.000029</td>\n", + " <td>60</td>\n", + " <td>linear</td>\n", + " <td>{'C': 60, 'kernel': 'linear'}</td>\n", + " <td>0.929825</td>\n", + " <td>0.947368</td>\n", + " <td>0.973684</td>\n", + " <td>0.956140</td>\n", + " <td>0.964602</td>\n", + " <td>0.954324</td>\n", + " <td>0.015049</td>\n", + " <td>2</td>\n", + " </tr>\n", + " <tr>\n", + " <th>14</th>\n", + " <td>0.019716</td>\n", + " <td>0.000966</td>\n", + " <td>0.005725</td>\n", + " <td>0.000356</td>\n", + " <td>70</td>\n", + " <td>rbf</td>\n", + " <td>{'C': 70, 'kernel': 'rbf'}</td>\n", + " <td>0.622807</td>\n", + " <td>0.622807</td>\n", + " <td>0.631579</td>\n", + " <td>0.631579</td>\n", + " <td>0.628319</td>\n", + " <td>0.627418</td>\n", + " <td>0.003949</td>\n", + " <td>10</td>\n", + " </tr>\n", + " <tr>\n", + " <th>15</th>\n", + " <td>7.937261</td>\n", + " <td>2.641749</td>\n", + " <td>0.000793</td>\n", + " <td>0.000065</td>\n", + " <td>70</td>\n", + " <td>linear</td>\n", + " <td>{'C': 70, 'kernel': 'linear'}</td>\n", + " <td>0.929825</td>\n", + " <td>0.947368</td>\n", + " <td>0.973684</td>\n", + " <td>0.956140</td>\n", + " <td>0.964602</td>\n", + " <td>0.954324</td>\n", + " <td>0.015049</td>\n", + " <td>2</td>\n", + " </tr>\n", + " <tr>\n", + " <th>16</th>\n", + " <td>0.017654</td>\n", + " <td>0.000297</td>\n", + " <td>0.005030</td>\n", + " <td>0.000030</td>\n", + " <td>80</td>\n", + " <td>rbf</td>\n", + " <td>{'C': 80, 'kernel': 'rbf'}</td>\n", + " <td>0.622807</td>\n", + " <td>0.622807</td>\n", + " <td>0.631579</td>\n", + " <td>0.631579</td>\n", + " <td>0.628319</td>\n", + " <td>0.627418</td>\n", + " <td>0.003949</td>\n", + " <td>10</td>\n", + " </tr>\n", + " <tr>\n", + " <th>17</th>\n", + " <td>7.631227</td>\n", + " <td>4.898339</td>\n", + " <td>0.000884</td>\n", + " <td>0.000200</td>\n", + " <td>80</td>\n", + " <td>linear</td>\n", + " <td>{'C': 80, 'kernel': 'linear'}</td>\n", + " <td>0.929825</td>\n", + " <td>0.947368</td>\n", + " <td>0.973684</td>\n", + " <td>0.956140</td>\n", + " <td>0.955752</td>\n", + " <td>0.952554</td>\n", + " <td>0.014234</td>\n", + " <td>6</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>\n", + " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-274de024-a912-4181-8729-6ddd37cac5fe')\"\n", + " title=\"Convert this dataframe to an interactive table.\"\n", + " style=\"display:none;\">\n", + " \n", + " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n", + " width=\"24px\">\n", + " <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n", + " <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n", + " </svg>\n", + " </button>\n", + " \n", + " <style>\n", + " .colab-df-container {\n", + " display:flex;\n", + " flex-wrap:wrap;\n", + " gap: 12px;\n", + " }\n", + "\n", + " .colab-df-convert {\n", + " background-color: #E8F0FE;\n", + " border: none;\n", + " border-radius: 50%;\n", + " cursor: pointer;\n", + " display: none;\n", + " fill: #1967D2;\n", + " height: 32px;\n", + " padding: 0 0 0 0;\n", + " width: 32px;\n", + " }\n", + "\n", + " .colab-df-convert:hover {\n", + " background-color: #E2EBFA;\n", + " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", + " fill: #174EA6;\n", + " }\n", + "\n", + " [theme=dark] .colab-df-convert {\n", + " background-color: #3B4455;\n", + " fill: #D2E3FC;\n", + " }\n", + "\n", + " [theme=dark] .colab-df-convert:hover {\n", + " background-color: #434B5C;\n", + " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", + " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", + " fill: #FFFFFF;\n", + " }\n", + " </style>\n", + "\n", + " <script>\n", + " const buttonEl =\n", + " document.querySelector('#df-274de024-a912-4181-8729-6ddd37cac5fe button.colab-df-convert');\n", + " buttonEl.style.display =\n", + " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", + "\n", + " async function convertToInteractive(key) {\n", + " const element = document.querySelector('#df-274de024-a912-4181-8729-6ddd37cac5fe');\n", + " const dataTable =\n", + " await google.colab.kernel.invokeFunction('convertToInteractive',\n", + " [key], {});\n", + " if (!dataTable) return;\n", + "\n", + " const docLinkHtml = 'Like what you see? Visit the ' +\n", + " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n", + " + ' to learn more about interactive tables.';\n", + " element.innerHTML = '';\n", + " dataTable['output_type'] = 'display_data';\n", + " await google.colab.output.renderOutput(dataTable, element);\n", + " const docLink = document.createElement('div');\n", + " docLink.innerHTML = docLinkHtml;\n", + " element.appendChild(docLink);\n", + " }\n", + " </script>\n", + " </div>\n", + " </div>\n", + " " + ] + }, + "metadata": {}, + "execution_count": 44 + } + ] + }, + { + "cell_type": "code", + "source": [ + "df[['param_C','param_kernel','mean_test_score']]\n", + "#df[['param_kernel','mean_test_score']]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 614 + }, + "id": "tTYcEIGPcvgQ", + "outputId": "332559ac-2c29-4c42-fd73-3d36d3b6f9b7" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " param_C param_kernel mean_test_score\n", + "0 1 rbf 0.627418\n", + "1 1 linear 0.945536\n", + "2 10 rbf 0.627418\n", + "3 10 linear 0.950815\n", + "4 20 rbf 0.627418\n", + "5 20 linear 0.950815\n", + "6 30 rbf 0.627418\n", + "7 30 linear 0.952585\n", + "8 40 rbf 0.627418\n", + "9 40 linear 0.957848\n", + "10 50 rbf 0.627418\n", + "11 50 linear 0.952569\n", + "12 60 rbf 0.627418\n", + "13 60 linear 0.954324\n", + "14 70 rbf 0.627418\n", + "15 70 linear 0.954324\n", + "16 80 rbf 0.627418\n", + "17 80 linear 0.952554" + ], + "text/html": [ + "\n", + " <div id=\"df-7fad378b-bb88-4f42-97d1-0dad8980cd2b\">\n", + " <div class=\"colab-df-container\">\n", + " <div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>param_C</th>\n", + " <th>param_kernel</th>\n", + " <th>mean_test_score</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>1</td>\n", + " <td>rbf</td>\n", + " <td>0.627418</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>1</td>\n", + " <td>linear</td>\n", + " <td>0.945536</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>10</td>\n", + " <td>rbf</td>\n", + " <td>0.627418</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>10</td>\n", + " <td>linear</td>\n", + " <td>0.950815</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>20</td>\n", + " <td>rbf</td>\n", + " <td>0.627418</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>20</td>\n", + " <td>linear</td>\n", + " <td>0.950815</td>\n", + " </tr>\n", + " <tr>\n", + " <th>6</th>\n", + " <td>30</td>\n", + " <td>rbf</td>\n", + " <td>0.627418</td>\n", + " </tr>\n", + " <tr>\n", + " <th>7</th>\n", + " <td>30</td>\n", + " <td>linear</td>\n", + " <td>0.952585</td>\n", + " </tr>\n", + " <tr>\n", + " <th>8</th>\n", + " <td>40</td>\n", + " <td>rbf</td>\n", + " <td>0.627418</td>\n", + " </tr>\n", + " <tr>\n", + " <th>9</th>\n", + " <td>40</td>\n", + " <td>linear</td>\n", + " <td>0.957848</td>\n", + " </tr>\n", + " <tr>\n", + " <th>10</th>\n", + " <td>50</td>\n", + " <td>rbf</td>\n", + " <td>0.627418</td>\n", + " </tr>\n", + " <tr>\n", + " <th>11</th>\n", + " <td>50</td>\n", + " <td>linear</td>\n", + " <td>0.952569</td>\n", + " </tr>\n", + " <tr>\n", + " <th>12</th>\n", + " <td>60</td>\n", + " <td>rbf</td>\n", + " <td>0.627418</td>\n", + " </tr>\n", + " <tr>\n", + " <th>13</th>\n", + " <td>60</td>\n", + " <td>linear</td>\n", + " <td>0.954324</td>\n", + " </tr>\n", + " <tr>\n", + " <th>14</th>\n", + " <td>70</td>\n", + " <td>rbf</td>\n", + " <td>0.627418</td>\n", + " </tr>\n", + " <tr>\n", + " <th>15</th>\n", + " <td>70</td>\n", + " <td>linear</td>\n", + " <td>0.954324</td>\n", + " </tr>\n", + " <tr>\n", + " <th>16</th>\n", + " <td>80</td>\n", + " <td>rbf</td>\n", + " <td>0.627418</td>\n", + " </tr>\n", + " <tr>\n", + " <th>17</th>\n", + " <td>80</td>\n", + " <td>linear</td>\n", + " <td>0.952554</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>\n", + " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-7fad378b-bb88-4f42-97d1-0dad8980cd2b')\"\n", + " title=\"Convert this dataframe to an interactive table.\"\n", + " style=\"display:none;\">\n", + " \n", + " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n", + " width=\"24px\">\n", + " <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n", + " <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n", + " </svg>\n", + " </button>\n", + " \n", + " <style>\n", + " .colab-df-container {\n", + " display:flex;\n", + " flex-wrap:wrap;\n", + " gap: 12px;\n", + " }\n", + "\n", + " .colab-df-convert {\n", + " background-color: #E8F0FE;\n", + " border: none;\n", + " border-radius: 50%;\n", + " cursor: pointer;\n", + " display: none;\n", + " fill: #1967D2;\n", + " height: 32px;\n", + " padding: 0 0 0 0;\n", + " width: 32px;\n", + " }\n", + "\n", + " .colab-df-convert:hover {\n", + " background-color: #E2EBFA;\n", + " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", + " fill: #174EA6;\n", + " }\n", + "\n", + " [theme=dark] .colab-df-convert {\n", + " background-color: #3B4455;\n", + " fill: #D2E3FC;\n", + " }\n", + "\n", + " [theme=dark] .colab-df-convert:hover {\n", + " background-color: #434B5C;\n", + " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", + " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", + " fill: #FFFFFF;\n", + " }\n", + " </style>\n", + "\n", + " <script>\n", + " const buttonEl =\n", + " document.querySelector('#df-7fad378b-bb88-4f42-97d1-0dad8980cd2b button.colab-df-convert');\n", + " buttonEl.style.display =\n", + " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", + "\n", + " async function convertToInteractive(key) {\n", + " const element = document.querySelector('#df-7fad378b-bb88-4f42-97d1-0dad8980cd2b');\n", + " const dataTable =\n", + " await google.colab.kernel.invokeFunction('convertToInteractive',\n", + " [key], {});\n", + " if (!dataTable) return;\n", + "\n", + " const docLinkHtml = 'Like what you see? Visit the ' +\n", + " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n", + " + ' to learn more about interactive tables.';\n", + " element.innerHTML = '';\n", + " dataTable['output_type'] = 'display_data';\n", + " await google.colab.output.renderOutput(dataTable, element);\n", + " const docLink = document.createElement('div');\n", + " docLink.innerHTML = docLinkHtml;\n", + " element.appendChild(docLink);\n", + " }\n", + " </script>\n", + " </div>\n", + " </div>\n", + " " + ] + }, + "metadata": {}, + "execution_count": 45 + } + ] + }, + { + "cell_type": "code", + "source": [ + "clf.best_score_" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "lI8Dy0Lyc8ue", + "outputId": "81c3bfdc-c00a-4b06-9243-4881b6ac5db9" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0.9578481602235678" + ] + }, + "metadata": {}, + "execution_count": 46 + } + ] + }, + { + "cell_type": "code", + "source": [ + "###LINEAR REGRESSION HYPERPARAMETER TUNING" + ], + "metadata": { + "id": "Y5yg8EHwdIeB" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "param_grid = [ \n", + " {'penalty' : ['l1', 'l2', 'elasticnet', 'none'],\n", + " 'C' : np.logspace(-4, 4, 20),\n", + " 'solver' : ['lbfgs','newton-cg','liblinear','sag','saga'],\n", + " 'max_iter' : [100, 1000,2500, 5000]\n", + " }\n", + "]" + ], + "metadata": { + "id": "VXf9FwGemhIa" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "modellr=LogisticRegression()\n", + "clf2 = GridSearchCV(modellr, param_grid = param_grid, cv = 3, verbose=True, n_jobs=-1)\n", + "best_clf = clf2.fit(x,y)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "xjW4CcV2miHP", + "outputId": "1d23868e-8ba3-447b-8abf-07be55da73c1" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Fitting 3 folds for each of 1600 candidates, totalling 4800 fits\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py:372: FitFailedWarning: \n", + "2160 fits failed out of a total of 4800.\n", + "The score on these train-test partitions for these parameters will be set to nan.\n", + "If these failures are not expected, you can try to debug them by setting error_score='raise'.\n", + "\n", + "Below are more details about the failures:\n", + "--------------------------------------------------------------------------------\n", + "240 fits failed with the following error:\n", + "Traceback (most recent call last):\n", + " File \"/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py\", line 680, in _fit_and_score\n", + " estimator.fit(X_train, y_train, **fit_params)\n", + " File \"/usr/local/lib/python3.7/dist-packages/sklearn/linear_model/_logistic.py\", line 1461, in fit\n", + " solver = _check_solver(self.solver, self.penalty, self.dual)\n", + " File \"/usr/local/lib/python3.7/dist-packages/sklearn/linear_model/_logistic.py\", line 449, in _check_solver\n", + " % (solver, penalty)\n", + "ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.\n", + "\n", + "--------------------------------------------------------------------------------\n", + "240 fits failed with the following error:\n", + "Traceback (most recent call last):\n", + " File \"/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py\", line 680, in _fit_and_score\n", + " estimator.fit(X_train, y_train, **fit_params)\n", + " File \"/usr/local/lib/python3.7/dist-packages/sklearn/linear_model/_logistic.py\", line 1461, in fit\n", + " solver = _check_solver(self.solver, self.penalty, self.dual)\n", + " File \"/usr/local/lib/python3.7/dist-packages/sklearn/linear_model/_logistic.py\", line 449, in _check_solver\n", + " % (solver, penalty)\n", + "ValueError: Solver newton-cg supports only 'l2' or 'none' penalties, got l1 penalty.\n", + "\n", + "--------------------------------------------------------------------------------\n", + "240 fits failed with the following error:\n", + "Traceback (most recent call last):\n", + " File \"/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py\", line 680, in _fit_and_score\n", + " estimator.fit(X_train, y_train, **fit_params)\n", + " File \"/usr/local/lib/python3.7/dist-packages/sklearn/linear_model/_logistic.py\", line 1461, in fit\n", + " solver = _check_solver(self.solver, self.penalty, self.dual)\n", + " File \"/usr/local/lib/python3.7/dist-packages/sklearn/linear_model/_logistic.py\", line 449, in _check_solver\n", + " % (solver, penalty)\n", + "ValueError: Solver sag supports only 'l2' or 'none' penalties, got l1 penalty.\n", + "\n", + "--------------------------------------------------------------------------------\n", + "240 fits failed with the following error:\n", + "Traceback (most recent call last):\n", + " File \"/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py\", line 680, in _fit_and_score\n", + " estimator.fit(X_train, y_train, **fit_params)\n", + " File \"/usr/local/lib/python3.7/dist-packages/sklearn/linear_model/_logistic.py\", line 1461, in fit\n", + " solver = _check_solver(self.solver, self.penalty, self.dual)\n", + " File \"/usr/local/lib/python3.7/dist-packages/sklearn/linear_model/_logistic.py\", line 449, in _check_solver\n", + " % (solver, penalty)\n", + "ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got elasticnet penalty.\n", + "\n", + "--------------------------------------------------------------------------------\n", + "240 fits failed with the following error:\n", + "Traceback (most recent call last):\n", + " File \"/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py\", line 680, in _fit_and_score\n", + " estimator.fit(X_train, y_train, **fit_params)\n", + " File \"/usr/local/lib/python3.7/dist-packages/sklearn/linear_model/_logistic.py\", line 1461, in fit\n", + " solver = _check_solver(self.solver, self.penalty, self.dual)\n", + " File \"/usr/local/lib/python3.7/dist-packages/sklearn/linear_model/_logistic.py\", line 449, in _check_solver\n", + " % (solver, penalty)\n", + "ValueError: Solver newton-cg supports only 'l2' or 'none' penalties, got elasticnet penalty.\n", + "\n", + "--------------------------------------------------------------------------------\n", + "240 fits failed with the following error:\n", + "Traceback (most recent call last):\n", + " File \"/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py\", line 680, in _fit_and_score\n", + " estimator.fit(X_train, y_train, **fit_params)\n", + " File \"/usr/local/lib/python3.7/dist-packages/sklearn/linear_model/_logistic.py\", line 1461, in fit\n", + " solver = _check_solver(self.solver, self.penalty, self.dual)\n", + " File \"/usr/local/lib/python3.7/dist-packages/sklearn/linear_model/_logistic.py\", line 459, in _check_solver\n", + " solver\n", + "ValueError: Only 'saga' solver supports elasticnet penalty, got solver=liblinear.\n", + "\n", + "--------------------------------------------------------------------------------\n", + "240 fits failed with the following error:\n", + "Traceback (most recent call last):\n", + " File \"/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py\", line 680, in _fit_and_score\n", + " estimator.fit(X_train, y_train, **fit_params)\n", + " File \"/usr/local/lib/python3.7/dist-packages/sklearn/linear_model/_logistic.py\", line 1461, in fit\n", + " solver = _check_solver(self.solver, self.penalty, self.dual)\n", + " File \"/usr/local/lib/python3.7/dist-packages/sklearn/linear_model/_logistic.py\", line 449, in _check_solver\n", + " % (solver, penalty)\n", + "ValueError: Solver sag supports only 'l2' or 'none' penalties, got elasticnet penalty.\n", + "\n", + "--------------------------------------------------------------------------------\n", + "240 fits failed with the following error:\n", + "Traceback (most recent call last):\n", + " File \"/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py\", line 680, in _fit_and_score\n", + " estimator.fit(X_train, y_train, **fit_params)\n", + " File \"/usr/local/lib/python3.7/dist-packages/sklearn/linear_model/_logistic.py\", line 1473, in fit\n", + " % self.l1_ratio\n", + "ValueError: l1_ratio must be between 0 and 1; got (l1_ratio=None)\n", + "\n", + "--------------------------------------------------------------------------------\n", + "240 fits failed with the following error:\n", + "Traceback (most recent call last):\n", + " File \"/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py\", line 680, in _fit_and_score\n", + " estimator.fit(X_train, y_train, **fit_params)\n", + " File \"/usr/local/lib/python3.7/dist-packages/sklearn/linear_model/_logistic.py\", line 1461, in fit\n", + " solver = _check_solver(self.solver, self.penalty, self.dual)\n", + " File \"/usr/local/lib/python3.7/dist-packages/sklearn/linear_model/_logistic.py\", line 464, in _check_solver\n", + " raise ValueError(\"penalty='none' is not supported for the liblinear solver\")\n", + "ValueError: penalty='none' is not supported for the liblinear solver\n", + "\n", + " warnings.warn(some_fits_failed_message, FitFailedWarning)\n", + "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_search.py:972: UserWarning: One or more of the test scores are non-finite: [ nan nan 0.3725796 ... nan 0.92091339 0.92267706]\n", + " category=UserWarning,\n", + "/usr/local/lib/python3.7/dist-packages/sklearn/linear_model/_logistic.py:1484: UserWarning: Setting penalty='none' will ignore the C and l1_ratio parameters\n", + " \"Setting penalty='none' will ignore the C and l1_ratio parameters\"\n", + "/usr/local/lib/python3.7/dist-packages/sklearn/linear_model/_logistic.py:818: ConvergenceWarning: lbfgs failed to converge (status=1):\n", + "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", + "\n", + "Increase the number of iterations (max_iter) or scale the data as shown in:\n", + " https://scikit-learn.org/stable/modules/preprocessing.html\n", + "Please also refer to the documentation for alternative solver options:\n", + " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", + " extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG,\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "print(best_clf.best_estimator_)\n", + "print (f'Accuracy: {best_clf.score(x,y):.3f}')" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cMWT6T5xmnR9", + "outputId": "3300d07e-73ff-42c1-836d-8446ecbfb9a2" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "LogisticRegression(C=0.0001, max_iter=5000, penalty='none')\n", + "Accuracy: 0.986\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "best_clf.best_estimator_" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "4ONmIWVsnGby", + "outputId": "20c6d67f-46c5-4e30-b76c-0b7d48e84308" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "LogisticRegression(C=0.0001, max_iter=5000, penalty='none')" + ] + }, + "metadata": {}, + "execution_count": 57 + } + ] + }, + { + "cell_type": "code", + "source": [ + "" + ], + "metadata": { + "id": "UY0huq3D097C" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file