diff --git a/BreastCancerDetectionKNN.ipynb b/BreastCancerDetectionKNN.ipynb new file mode 100644 index 0000000..8715b29 --- /dev/null +++ b/BreastCancerDetectionKNN.ipynb @@ -0,0 +1,1926 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "3WqKX1WmV6T2" + }, + "source": [ + "**Breast Cancer Detection with Logistic Regression**" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "qYA5sD53VKHl" + }, + "outputs": [], + "source": [ + "#importing libraries\n", + "import numpy as np\n", + "import sklearn.datasets" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 0 + }, + "id": "JDudCNzrWLO5", + "outputId": "cadc6e48-ce5d-4ad6-cc1c-d2c32c7ba832" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'data': array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,\n", + " 1.189e-01],\n", + " [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,\n", + " 8.902e-02],\n", + " [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,\n", + " 8.758e-02],\n", + " ...,\n", + " [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,\n", + " 7.820e-02],\n", + " [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,\n", + " 1.240e-01],\n", + " [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,\n", + " 7.039e-02]]), 'target': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,\n", + " 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,\n", + " 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,\n", + " 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,\n", + " 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0,\n", + " 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0,\n", + " 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1,\n", + " 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0,\n", + " 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0,\n", + " 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0,\n", + " 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1,\n", + " 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1,\n", + " 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,\n", + " 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,\n", + " 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1,\n", + " 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1]), 'frame': None, 'target_names': array(['malignant', 'benign'], dtype='\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
mean radiusmean texturemean perimetermean areamean smoothnessmean compactnessmean concavitymean concave pointsmean symmetrymean fractal dimension...worst textureworst perimeterworst areaworst smoothnessworst compactnessworst concavityworst concave pointsworst symmetryworst fractal dimensionclass
017.9910.38122.801001.00.118400.277600.30010.147100.24190.07871...17.33184.602019.00.16220.66560.71190.26540.46010.118900
120.5717.77132.901326.00.084740.078640.08690.070170.18120.05667...23.41158.801956.00.12380.18660.24160.18600.27500.089020
219.6921.25130.001203.00.109600.159900.19740.127900.20690.05999...25.53152.501709.00.14440.42450.45040.24300.36130.087580
311.4220.3877.58386.10.142500.283900.24140.105200.25970.09744...26.5098.87567.70.20980.86630.68690.25750.66380.173000
420.2914.34135.101297.00.100300.132800.19800.104300.18090.05883...16.67152.201575.00.13740.20500.40000.16250.23640.076780
\n", + "

5 rows × 31 columns

\n", + "" + ], + "text/plain": [ + " mean radius mean texture mean perimeter mean area mean smoothness \\\n", + "0 17.99 10.38 122.80 1001.0 0.11840 \n", + "1 20.57 17.77 132.90 1326.0 0.08474 \n", + "2 19.69 21.25 130.00 1203.0 0.10960 \n", + "3 11.42 20.38 77.58 386.1 0.14250 \n", + "4 20.29 14.34 135.10 1297.0 0.10030 \n", + "\n", + " mean compactness mean concavity mean concave points mean symmetry \\\n", + "0 0.27760 0.3001 0.14710 0.2419 \n", + "1 0.07864 0.0869 0.07017 0.1812 \n", + "2 0.15990 0.1974 0.12790 0.2069 \n", + "3 0.28390 0.2414 0.10520 0.2597 \n", + "4 0.13280 0.1980 0.10430 0.1809 \n", + "\n", + " mean fractal dimension ... worst texture worst perimeter worst area \\\n", + "0 0.07871 ... 17.33 184.60 2019.0 \n", + "1 0.05667 ... 23.41 158.80 1956.0 \n", + "2 0.05999 ... 25.53 152.50 1709.0 \n", + "3 0.09744 ... 26.50 98.87 567.7 \n", + "4 0.05883 ... 16.67 152.20 1575.0 \n", + "\n", + " worst smoothness worst compactness worst concavity worst concave points \\\n", + "0 0.1622 0.6656 0.7119 0.2654 \n", + "1 0.1238 0.1866 0.2416 0.1860 \n", + "2 0.1444 0.4245 0.4504 0.2430 \n", + "3 0.2098 0.8663 0.6869 0.2575 \n", + "4 0.1374 0.2050 0.4000 0.1625 \n", + "\n", + " worst symmetry worst fractal dimension class \n", + "0 0.4601 0.11890 0 \n", + "1 0.2750 0.08902 0 \n", + "2 0.3613 0.08758 0 \n", + "3 0.6638 0.17300 0 \n", + "4 0.2364 0.07678 0 \n", + "\n", + "[5 rows x 31 columns]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 351 + }, + "id": "QTUBDupdXPZH", + "outputId": "54baafb5-0203-44ec-ff8a-48ab9e93f719" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
mean radiusmean texturemean perimetermean areamean smoothnessmean compactnessmean concavitymean concave pointsmean symmetrymean fractal dimension...worst textureworst perimeterworst areaworst smoothnessworst compactnessworst concavityworst concave pointsworst symmetryworst fractal dimensionclass
count569.000000569.000000569.000000569.000000569.000000569.000000569.000000569.000000569.000000569.000000...569.000000569.000000569.000000569.000000569.000000569.000000569.000000569.000000569.000000569.000000
mean14.12729219.28964991.969033654.8891040.0963600.1043410.0887990.0489190.1811620.062798...25.677223107.261213880.5831280.1323690.2542650.2721880.1146060.2900760.0839460.627417
std3.5240494.30103624.298981351.9141290.0140640.0528130.0797200.0388030.0274140.007060...6.14625833.602542569.3569930.0228320.1573360.2086240.0657320.0618670.0180610.483918
min6.9810009.71000043.790000143.5000000.0526300.0193800.0000000.0000000.1060000.049960...12.02000050.410000185.2000000.0711700.0272900.0000000.0000000.1565000.0550400.000000
25%11.70000016.17000075.170000420.3000000.0863700.0649200.0295600.0203100.1619000.057700...21.08000084.110000515.3000000.1166000.1472000.1145000.0649300.2504000.0714600.000000
50%13.37000018.84000086.240000551.1000000.0958700.0926300.0615400.0335000.1792000.061540...25.41000097.660000686.5000000.1313000.2119000.2267000.0999300.2822000.0800401.000000
75%15.78000021.800000104.100000782.7000000.1053000.1304000.1307000.0740000.1957000.066120...29.720000125.4000001084.0000000.1460000.3391000.3829000.1614000.3179000.0920801.000000
max28.11000039.280000188.5000002501.0000000.1634000.3454000.4268000.2012000.3040000.097440...49.540000251.2000004254.0000000.2226001.0580001.2520000.2910000.6638000.2075001.000000
\n", + "

8 rows × 31 columns

\n", + "
" + ], + "text/plain": [ + " mean radius mean texture mean perimeter mean area \\\n", + "count 569.000000 569.000000 569.000000 569.000000 \n", + "mean 14.127292 19.289649 91.969033 654.889104 \n", + "std 3.524049 4.301036 24.298981 351.914129 \n", + "min 6.981000 9.710000 43.790000 143.500000 \n", + "25% 11.700000 16.170000 75.170000 420.300000 \n", + "50% 13.370000 18.840000 86.240000 551.100000 \n", + "75% 15.780000 21.800000 104.100000 782.700000 \n", + "max 28.110000 39.280000 188.500000 2501.000000 \n", + "\n", + " mean smoothness mean compactness mean concavity mean concave points \\\n", + "count 569.000000 569.000000 569.000000 569.000000 \n", + "mean 0.096360 0.104341 0.088799 0.048919 \n", + "std 0.014064 0.052813 0.079720 0.038803 \n", + "min 0.052630 0.019380 0.000000 0.000000 \n", + "25% 0.086370 0.064920 0.029560 0.020310 \n", + "50% 0.095870 0.092630 0.061540 0.033500 \n", + "75% 0.105300 0.130400 0.130700 0.074000 \n", + "max 0.163400 0.345400 0.426800 0.201200 \n", + "\n", + " mean symmetry mean fractal dimension ... worst texture \\\n", + "count 569.000000 569.000000 ... 569.000000 \n", + "mean 0.181162 0.062798 ... 25.677223 \n", + "std 0.027414 0.007060 ... 6.146258 \n", + "min 0.106000 0.049960 ... 12.020000 \n", + "25% 0.161900 0.057700 ... 21.080000 \n", + "50% 0.179200 0.061540 ... 25.410000 \n", + "75% 0.195700 0.066120 ... 29.720000 \n", + "max 0.304000 0.097440 ... 49.540000 \n", + "\n", + " worst perimeter worst area worst smoothness worst compactness \\\n", + "count 569.000000 569.000000 569.000000 569.000000 \n", + "mean 107.261213 880.583128 0.132369 0.254265 \n", + "std 33.602542 569.356993 0.022832 0.157336 \n", + "min 50.410000 185.200000 0.071170 0.027290 \n", + "25% 84.110000 515.300000 0.116600 0.147200 \n", + "50% 97.660000 686.500000 0.131300 0.211900 \n", + "75% 125.400000 1084.000000 0.146000 0.339100 \n", + "max 251.200000 4254.000000 0.222600 1.058000 \n", + "\n", + " worst concavity worst concave points worst symmetry \\\n", + "count 569.000000 569.000000 569.000000 \n", + "mean 0.272188 0.114606 0.290076 \n", + "std 0.208624 0.065732 0.061867 \n", + "min 0.000000 0.000000 0.156500 \n", + "25% 0.114500 0.064930 0.250400 \n", + "50% 0.226700 0.099930 0.282200 \n", + "75% 0.382900 0.161400 0.317900 \n", + "max 1.252000 0.291000 0.663800 \n", + "\n", + " worst fractal dimension class \n", + "count 569.000000 569.000000 \n", + "mean 0.083946 0.627417 \n", + "std 0.018061 0.483918 \n", + "min 0.055040 0.000000 \n", + "25% 0.071460 0.000000 \n", + "50% 0.080040 1.000000 \n", + "75% 0.092080 1.000000 \n", + "max 0.207500 1.000000 \n", + "\n", + "[8 rows x 31 columns]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 0 + }, + "id": "ard7qMIwXVgO", + "outputId": "3a78a525-e7f3-48a8-a268-d91357c847b8" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "class\n", + "1 357\n", + "0 212\n", + "Name: count, dtype: int64\n" + ] + } + ], + "source": [ + "print(data['class'].value_counts())" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 0 + }, + "id": "d14AxiQDXdIU", + "outputId": "999e81b8-c231-4cbd-a51f-e0b56e8b7050" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['malignant' 'benign']\n" + ] + } + ], + "source": [ + "print(breast_cancer.target_names)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 196 + }, + "id": "9dQOD_nVXhVM", + "outputId": "260044dd-2012-44a0-9c44-f1609783994b" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
mean radiusmean texturemean perimetermean areamean smoothnessmean compactnessmean concavitymean concave pointsmean symmetrymean fractal dimension...worst radiusworst textureworst perimeterworst areaworst smoothnessworst compactnessworst concavityworst concave pointsworst symmetryworst fractal dimension
class
017.46283021.604906115.365377978.3764150.1028980.1451880.1607750.0879900.1929090.062680...21.13481129.318208141.3703301422.2863210.1448450.3748240.4506060.1822370.3234680.091530
112.14652417.91476278.075406462.7901960.0924780.0800850.0460580.0257170.1741860.062867...13.37980123.51507087.005938558.8994400.1249590.1826730.1662380.0744440.2702460.079442
\n", + "

2 rows × 30 columns

\n", + "
" + ], + "text/plain": [ + " mean radius mean texture mean perimeter mean area mean smoothness \\\n", + "class \n", + "0 17.462830 21.604906 115.365377 978.376415 0.102898 \n", + "1 12.146524 17.914762 78.075406 462.790196 0.092478 \n", + "\n", + " mean compactness mean concavity mean concave points mean symmetry \\\n", + "class \n", + "0 0.145188 0.160775 0.087990 0.192909 \n", + "1 0.080085 0.046058 0.025717 0.174186 \n", + "\n", + " mean fractal dimension ... worst radius worst texture \\\n", + "class ... \n", + "0 0.062680 ... 21.134811 29.318208 \n", + "1 0.062867 ... 13.379801 23.515070 \n", + "\n", + " worst perimeter worst area worst smoothness worst compactness \\\n", + "class \n", + "0 141.370330 1422.286321 0.144845 0.374824 \n", + "1 87.005938 558.899440 0.124959 0.182673 \n", + "\n", + " worst concavity worst concave points worst symmetry \\\n", + "class \n", + "0 0.450606 0.182237 0.323468 \n", + "1 0.166238 0.074444 0.270246 \n", + "\n", + " worst fractal dimension \n", + "class \n", + "0 0.091530 \n", + "1 0.079442 \n", + "\n", + "[2 rows x 30 columns]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.groupby('class').mean()\n", + "#0 = malignant\n", + "#1 = benign" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3ZLnktrYXuZ4" + }, + "source": [ + "Train and Test Data Split" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "id": "RDCD-3t2XmNG" + }, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "X_train, X_test, Y_train, Y_test = train_test_split(X,Y)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 0 + }, + "id": "JgoRzmx4Xs0j", + "outputId": "1b4a6567-90ed-4500-d7ff-c38db180b6cd" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(569,) (426,) (143,)\n" + ] + } + ], + "source": [ + "print(Y.shape, Y_train.shape, Y_test.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "id": "TfXjgKecX2aR" + }, + "outputs": [], + "source": [ + "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.1)\n", + "#test_size --> to specify the percentage of test data needed" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 0 + }, + "id": "Vayg1nTMX5GA", + "outputId": "3b91257c-711d-4a9c-d8a8-d352b3d6416b" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(569,) (512,) (57,)\n" + ] + } + ], + "source": [ + "print(Y.shape, Y_train.shape, Y_test.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 0 + }, + "id": "s3TkXPZ8X72R", + "outputId": "86dcf406-c372-41e5-aab0-e79c7c84e9b7" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.6274165202108963 0.62109375 0.6842105263157895\n" + ] + } + ], + "source": [ + "print(Y.mean(), Y_train.mean(), Y_test.mean())" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "id": "_jFZwc9OX-Av" + }, + "outputs": [], + "source": [ + "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.1, stratify=Y)\n", + "#stratify --> for correct distribution of data as of the original data" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 0 + }, + "id": "GZY9NLkNYBez", + "outputId": "be216507-a170-4797-942d-025b8629dcd2" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.6274165202108963 0.626953125 0.631578947368421\n" + ] + } + ], + "source": [ + "print(Y.mean(), Y_train.mean(), Y_test.mean())" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "id": "6hnHZaOWYNb1" + }, + "outputs": [], + "source": [ + "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.1, stratify=Y, random_state=1)\n", + "#random_state --> specific split of data, each value of random_state splits the data differently" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 0 + }, + "id": "P9t5iXyIYP95", + "outputId": "e3d22056-9b9b-44f2-a5cb-434341a6c8b2" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "61.31637960106119 67.04963097269005 61.890712339519624\n" + ] + } + ], + "source": [ + "print(X_train.mean(), X_test.mean(), X.mean())" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 0 + }, + "id": "v-fNBVlrYR4I", + "outputId": "44bea766-7649-4732-94a7-f24acb0337a8" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[1.490e+01 2.253e+01 1.021e+02 ... 2.475e-01 2.866e-01 1.155e-01]\n", + " [1.205e+01 1.463e+01 7.804e+01 ... 6.548e-02 2.747e-01 8.301e-02]\n", + " [1.311e+01 1.556e+01 8.721e+01 ... 1.986e-01 3.147e-01 1.405e-01]\n", + " ...\n", + " [1.258e+01 1.840e+01 7.983e+01 ... 8.772e-03 2.505e-01 6.431e-02]\n", + " [1.349e+01 2.230e+01 8.691e+01 ... 1.282e-01 2.871e-01 6.917e-02]\n", + " [1.919e+01 1.594e+01 1.263e+02 ... 1.777e-01 2.443e-01 6.251e-02]]\n" + ] + } + ], + "source": [ + "print(X_train)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dlt2G3FqYd4_" + }, + "source": [ + "**Logistic Regression**" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "id": "A63VGGb9YVRp" + }, + "outputs": [], + "source": [ + "#import KNeighborsClassifier from sklearn\n", + "from sklearn.neighbors import KNeighborsClassifier" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "id": "dNLK1uKmYZEM" + }, + "outputs": [], + "source": [ + "classifier = KNeighborsClassifier(n_neighbors=3)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 0 + }, + "id": "XAMvZvsrYjhw", + "outputId": "c849137f-c146-4b53-d813-6239d4b25a68" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
KNeighborsClassifier(n_neighbors=3)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "KNeighborsClassifier(n_neighbors=3)" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#training the model on training data\n", + "classifier.fit(X_train, Y_train)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ev9Eq9QMYxhA" + }, + "source": [ + "**Evaluation of the Model**" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "id": "x4Cw-CwoYlzE" + }, + "outputs": [], + "source": [ + "#import accuracy_score\n", + "from sklearn.metrics import accuracy_score" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "id": "J7iCwz2QYrN_" + }, + "outputs": [], + "source": [ + "prediction_on_training_data = classifier.predict(X_train)\n", + "accuracy_on_training_data = accuracy_score(Y_train, prediction_on_training_data)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 0 + }, + "id": "SYIS_NDgZaxk", + "outputId": "3e0ff651-9f72-4791-8b47-839caa45a6cd" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy on training data : 0.955078125\n" + ] + } + ], + "source": [ + "print('Accuracy on training data :', accuracy_on_training_data)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "id": "OzBEFHyyZc0X" + }, + "outputs": [], + "source": [ + "#prediction on test_data\n", + "prediction_on_test_data = classifier.predict(X_test)\n", + "accuracy_on_test_data = accuracy_score(Y_test, prediction_on_test_data)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 0 + }, + "id": "DO7tebMAZhgC", + "outputId": "a47d433c-4503-4f93-f4b9-83e1231d112c" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy on test data : 0.9298245614035088\n" + ] + } + ], + "source": [ + "print('Accuracy on test data :', accuracy_on_test_data)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hgTziDZmZpX8" + }, + "source": [ + "Detecting whether the Patient has Breast Cancer in Benign or Malignant Stage" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 0 + }, + "id": "NgZj6gRwZmOe", + "outputId": "59299578-eff6-42cb-8fb1-19ff6125a9cf" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(17.99, 10.38, 122.8, 1001, 0.1184, 0.2776, 0.3001, 0.1471, 0.2419, 0.07871, 1.095, 0.9053, 8.589, 153.4, 0.006399, 0.04904, 0.05373, 0.01587, 0.03003, 0.006193, 25.38, 17.33, 184.6, 2019, 0.1622, 0.6656, 0.7119, 0.2654, 0.4601, 0.1189)\n" + ] + } + ], + "source": [ + "input_data = (17.99,10.38,122.8,1001,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,1.095,0.9053,8.589,153.4,0.006399,0.04904,0.05373,0.01587,0.03003,0.006193,25.38,17.33,184.6,2019,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189)\n", + "#change the input_data to numpy_array to make prediction\n", + "input_data_as_numpy_array = np.array(input_data)\n", + "print(input_data)" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "id": "_0QS56eSZ6xR" + }, + "outputs": [], + "source": [ + "#reshape the array we are predicting the output for one instance\n", + "input_data_reshaped = input_data_as_numpy_array.reshape(1, -1)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 0 + }, + "id": "F-HjwKa_Z-pT", + "outputId": "b7f617b4-db60-4a21-9b5f-9e0a46919e1a" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0]\n" + ] + } + ], + "source": [ + "#prediction\n", + "prediction = classifier.predict(input_data_reshaped)\n", + "print(prediction) #returns a list with elements 0 [if Malignant] and 1 [if Benign]" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 0 + }, + "id": "65lx_nTzaDHn", + "outputId": "503b3b16-d731-4111-ec93-4bbd3f2eef93" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The Breast Cancer is at Malignant Stage.\n" + ] + } + ], + "source": [ + "if (prediction[0]==0):\n", + " print(\"The Breast Cancer is at Malignant Stage.\")\n", + "else:\n", + " print(\"The Breast Cancer is at Benign Stage.\")" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "BreastCancerDetection.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3.10.3 64-bit", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.3" + }, + "vscode": { + "interpreter": { + "hash": "ecf692a57d25ce2e9c36ee4bfd0c74b739bffc9771b8483c93bb91a271af2e18" + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/BreastCancerDetection.ipynb b/BreastCancerDetectionLogisticRegression.ipynb similarity index 54% rename from BreastCancerDetection.ipynb rename to BreastCancerDetectionLogisticRegression.ipynb index f9e0878..c5dec74 100644 --- a/BreastCancerDetection.ipynb +++ b/BreastCancerDetectionLogisticRegression.ipynb @@ -11,7 +11,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": { "id": "qYA5sD53VKHl" }, @@ -24,7 +24,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -75,7 +75,7 @@ " 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1,\n", " 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,\n", " 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", - " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1]), 'target_names': array(['malignant', 'benign'], dtype='mean concave points\n", " mean symmetry\n", " mean fractal dimension\n", - " radius error\n", - " texture error\n", - " perimeter error\n", - " area error\n", - " smoothness error\n", - " compactness error\n", - " concavity error\n", - " concave points error\n", - " symmetry error\n", - " fractal dimension error\n", - " worst radius\n", + " ...\n", " worst texture\n", " worst perimeter\n", " worst area\n", @@ -299,17 +304,7 @@ " 0.14710\n", " 0.2419\n", " 0.07871\n", - " 1.0950\n", - " 0.9053\n", - " 8.589\n", - " 153.40\n", - " 0.006399\n", - " 0.04904\n", - " 0.05373\n", - " 0.01587\n", - " 0.03003\n", - " 0.006193\n", - " 25.38\n", + " ...\n", " 17.33\n", " 184.60\n", " 2019.0\n", @@ -333,17 +328,7 @@ " 0.07017\n", " 0.1812\n", " 0.05667\n", - " 0.5435\n", - " 0.7339\n", - " 3.398\n", - " 74.08\n", - " 0.005225\n", - " 0.01308\n", - " 0.01860\n", - " 0.01340\n", - " 0.01389\n", - " 0.003532\n", - " 24.99\n", + " ...\n", " 23.41\n", " 158.80\n", " 1956.0\n", @@ -367,17 +352,7 @@ " 0.12790\n", " 0.2069\n", " 0.05999\n", - " 0.7456\n", - " 0.7869\n", - " 4.585\n", - " 94.03\n", - " 0.006150\n", - " 0.04006\n", - " 0.03832\n", - " 0.02058\n", - " 0.02250\n", - " 0.004571\n", - " 23.57\n", + " ...\n", " 25.53\n", " 152.50\n", " 1709.0\n", @@ -401,17 +376,7 @@ " 0.10520\n", " 0.2597\n", " 0.09744\n", - " 0.4956\n", - " 1.1560\n", - " 3.445\n", - " 27.23\n", - " 0.009110\n", - " 0.07458\n", - " 0.05661\n", - " 0.01867\n", - " 0.05963\n", - " 0.009208\n", - " 14.91\n", + " ...\n", " 26.50\n", " 98.87\n", " 567.7\n", @@ -435,17 +400,7 @@ " 0.10430\n", " 0.1809\n", " 0.05883\n", - " 0.7572\n", - " 0.7813\n", - " 5.438\n", - " 94.44\n", - " 0.011490\n", - " 0.02461\n", - " 0.05688\n", - " 0.01885\n", - " 0.01756\n", - " 0.005115\n", - " 22.54\n", + " ...\n", " 16.67\n", " 152.20\n", " 1575.0\n", @@ -459,20 +414,49 @@ " \n", " \n", "\n", + "

5 rows × 31 columns

\n", "" ], "text/plain": [ - " mean radius mean texture ... worst fractal dimension class\n", - "0 17.99 10.38 ... 0.11890 0\n", - "1 20.57 17.77 ... 0.08902 0\n", - "2 19.69 21.25 ... 0.08758 0\n", - "3 11.42 20.38 ... 0.17300 0\n", - "4 20.29 14.34 ... 0.07678 0\n", + " mean radius mean texture mean perimeter mean area mean smoothness \\\n", + "0 17.99 10.38 122.80 1001.0 0.11840 \n", + "1 20.57 17.77 132.90 1326.0 0.08474 \n", + "2 19.69 21.25 130.00 1203.0 0.10960 \n", + "3 11.42 20.38 77.58 386.1 0.14250 \n", + "4 20.29 14.34 135.10 1297.0 0.10030 \n", + "\n", + " mean compactness mean concavity mean concave points mean symmetry \\\n", + "0 0.27760 0.3001 0.14710 0.2419 \n", + "1 0.07864 0.0869 0.07017 0.1812 \n", + "2 0.15990 0.1974 0.12790 0.2069 \n", + "3 0.28390 0.2414 0.10520 0.2597 \n", + "4 0.13280 0.1980 0.10430 0.1809 \n", + "\n", + " mean fractal dimension ... worst texture worst perimeter worst area \\\n", + "0 0.07871 ... 17.33 184.60 2019.0 \n", + "1 0.05667 ... 23.41 158.80 1956.0 \n", + "2 0.05999 ... 25.53 152.50 1709.0 \n", + "3 0.09744 ... 26.50 98.87 567.7 \n", + "4 0.05883 ... 16.67 152.20 1575.0 \n", + "\n", + " worst smoothness worst compactness worst concavity worst concave points \\\n", + "0 0.1622 0.6656 0.7119 0.2654 \n", + "1 0.1238 0.1866 0.2416 0.1860 \n", + "2 0.1444 0.4245 0.4504 0.2430 \n", + "3 0.2098 0.8663 0.6869 0.2575 \n", + "4 0.1374 0.2050 0.4000 0.1625 \n", + "\n", + " worst symmetry worst fractal dimension class \n", + "0 0.4601 0.11890 0 \n", + "1 0.2750 0.08902 0 \n", + "2 0.3613 0.08758 0 \n", + "3 0.6638 0.17300 0 \n", + "4 0.2364 0.07678 0 \n", "\n", "[5 rows x 31 columns]" ] }, - "execution_count": 11, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -483,7 +467,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 10, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -524,17 +508,7 @@ " mean concave points\n", " mean symmetry\n", " mean fractal dimension\n", - " radius error\n", - " texture error\n", - " perimeter error\n", - " area error\n", - " smoothness error\n", - " compactness error\n", - " concavity error\n", - " concave points error\n", - " symmetry error\n", - " fractal dimension error\n", - " worst radius\n", + " ...\n", " worst texture\n", " worst perimeter\n", " worst area\n", @@ -560,17 +534,7 @@ " 569.000000\n", " 569.000000\n", " 569.000000\n", - " 569.000000\n", - " 569.000000\n", - " 569.000000\n", - " 569.000000\n", - " 569.000000\n", - " 569.000000\n", - " 569.000000\n", - " 569.000000\n", - " 569.000000\n", - " 569.000000\n", - " 569.000000\n", + " ...\n", " 569.000000\n", " 569.000000\n", " 569.000000\n", @@ -594,17 +558,7 @@ " 0.048919\n", " 0.181162\n", " 0.062798\n", - " 0.405172\n", - " 1.216853\n", - " 2.866059\n", - " 40.337079\n", - " 0.007041\n", - " 0.025478\n", - " 0.031894\n", - " 0.011796\n", - " 0.020542\n", - " 0.003795\n", - " 16.269190\n", + " ...\n", " 25.677223\n", " 107.261213\n", " 880.583128\n", @@ -628,17 +582,7 @@ " 0.038803\n", " 0.027414\n", " 0.007060\n", - " 0.277313\n", - " 0.551648\n", - " 2.021855\n", - " 45.491006\n", - " 0.003003\n", - " 0.017908\n", - " 0.030186\n", - " 0.006170\n", - " 0.008266\n", - " 0.002646\n", - " 4.833242\n", + " ...\n", " 6.146258\n", " 33.602542\n", " 569.356993\n", @@ -662,17 +606,7 @@ " 0.000000\n", " 0.106000\n", " 0.049960\n", - " 0.111500\n", - " 0.360200\n", - " 0.757000\n", - " 6.802000\n", - " 0.001713\n", - " 0.002252\n", - " 0.000000\n", - " 0.000000\n", - " 0.007882\n", - " 0.000895\n", - " 7.930000\n", + " ...\n", " 12.020000\n", " 50.410000\n", " 185.200000\n", @@ -696,17 +630,7 @@ " 0.020310\n", " 0.161900\n", " 0.057700\n", - " 0.232400\n", - " 0.833900\n", - " 1.606000\n", - " 17.850000\n", - " 0.005169\n", - " 0.013080\n", - " 0.015090\n", - " 0.007638\n", - " 0.015160\n", - " 0.002248\n", - " 13.010000\n", + " ...\n", " 21.080000\n", " 84.110000\n", " 515.300000\n", @@ -730,17 +654,7 @@ " 0.033500\n", " 0.179200\n", " 0.061540\n", - " 0.324200\n", - " 1.108000\n", - " 2.287000\n", - " 24.530000\n", - " 0.006380\n", - " 0.020450\n", - " 0.025890\n", - " 0.010930\n", - " 0.018730\n", - " 0.003187\n", - " 14.970000\n", + " ...\n", " 25.410000\n", " 97.660000\n", " 686.500000\n", @@ -764,17 +678,7 @@ " 0.074000\n", " 0.195700\n", " 0.066120\n", - " 0.478900\n", - " 1.474000\n", - " 3.357000\n", - " 45.190000\n", - " 0.008146\n", - " 0.032450\n", - " 0.042050\n", - " 0.014710\n", - " 0.023480\n", - " 0.004558\n", - " 18.790000\n", + " ...\n", " 29.720000\n", " 125.400000\n", " 1084.000000\n", @@ -798,17 +702,7 @@ " 0.201200\n", " 0.304000\n", " 0.097440\n", - " 2.873000\n", - " 4.885000\n", - " 21.980000\n", - " 542.200000\n", - " 0.031130\n", - " 0.135400\n", - " 0.396000\n", - " 0.052790\n", - " 0.078950\n", - " 0.029840\n", - " 36.040000\n", + " ...\n", " 49.540000\n", " 251.200000\n", " 4254.000000\n", @@ -822,23 +716,74 @@ " \n", " \n", "\n", + "

8 rows × 31 columns

\n", "" ], "text/plain": [ - " mean radius mean texture ... worst fractal dimension class\n", - "count 569.000000 569.000000 ... 569.000000 569.000000\n", - "mean 14.127292 19.289649 ... 0.083946 0.627417\n", - "std 3.524049 4.301036 ... 0.018061 0.483918\n", - "min 6.981000 9.710000 ... 0.055040 0.000000\n", - "25% 11.700000 16.170000 ... 0.071460 0.000000\n", - "50% 13.370000 18.840000 ... 0.080040 1.000000\n", - "75% 15.780000 21.800000 ... 0.092080 1.000000\n", - "max 28.110000 39.280000 ... 0.207500 1.000000\n", + " mean radius mean texture mean perimeter mean area \\\n", + "count 569.000000 569.000000 569.000000 569.000000 \n", + "mean 14.127292 19.289649 91.969033 654.889104 \n", + "std 3.524049 4.301036 24.298981 351.914129 \n", + "min 6.981000 9.710000 43.790000 143.500000 \n", + "25% 11.700000 16.170000 75.170000 420.300000 \n", + "50% 13.370000 18.840000 86.240000 551.100000 \n", + "75% 15.780000 21.800000 104.100000 782.700000 \n", + "max 28.110000 39.280000 188.500000 2501.000000 \n", + "\n", + " mean smoothness mean compactness mean concavity mean concave points \\\n", + "count 569.000000 569.000000 569.000000 569.000000 \n", + "mean 0.096360 0.104341 0.088799 0.048919 \n", + "std 0.014064 0.052813 0.079720 0.038803 \n", + "min 0.052630 0.019380 0.000000 0.000000 \n", + "25% 0.086370 0.064920 0.029560 0.020310 \n", + "50% 0.095870 0.092630 0.061540 0.033500 \n", + "75% 0.105300 0.130400 0.130700 0.074000 \n", + "max 0.163400 0.345400 0.426800 0.201200 \n", + "\n", + " mean symmetry mean fractal dimension ... worst texture \\\n", + "count 569.000000 569.000000 ... 569.000000 \n", + "mean 0.181162 0.062798 ... 25.677223 \n", + "std 0.027414 0.007060 ... 6.146258 \n", + "min 0.106000 0.049960 ... 12.020000 \n", + "25% 0.161900 0.057700 ... 21.080000 \n", + "50% 0.179200 0.061540 ... 25.410000 \n", + "75% 0.195700 0.066120 ... 29.720000 \n", + "max 0.304000 0.097440 ... 49.540000 \n", + "\n", + " worst perimeter worst area worst smoothness worst compactness \\\n", + "count 569.000000 569.000000 569.000000 569.000000 \n", + "mean 107.261213 880.583128 0.132369 0.254265 \n", + "std 33.602542 569.356993 0.022832 0.157336 \n", + "min 50.410000 185.200000 0.071170 0.027290 \n", + "25% 84.110000 515.300000 0.116600 0.147200 \n", + "50% 97.660000 686.500000 0.131300 0.211900 \n", + "75% 125.400000 1084.000000 0.146000 0.339100 \n", + "max 251.200000 4254.000000 0.222600 1.058000 \n", + "\n", + " worst concavity worst concave points worst symmetry \\\n", + "count 569.000000 569.000000 569.000000 \n", + "mean 0.272188 0.114606 0.290076 \n", + "std 0.208624 0.065732 0.061867 \n", + "min 0.000000 0.000000 0.156500 \n", + "25% 0.114500 0.064930 0.250400 \n", + "50% 0.226700 0.099930 0.282200 \n", + "75% 0.382900 0.161400 0.317900 \n", + "max 1.252000 0.291000 0.663800 \n", + "\n", + " worst fractal dimension class \n", + "count 569.000000 569.000000 \n", + "mean 0.083946 0.627417 \n", + "std 0.018061 0.483918 \n", + "min 0.055040 0.000000 \n", + "25% 0.071460 0.000000 \n", + "50% 0.080040 1.000000 \n", + "75% 0.092080 1.000000 \n", + "max 0.207500 1.000000 \n", "\n", "[8 rows x 31 columns]" ] }, - "execution_count": 12, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -849,7 +794,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 11, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -863,9 +808,10 @@ "name": "stdout", "output_type": "stream", "text": [ + "class\n", "1 357\n", "0 212\n", - "Name: class, dtype: int64\n" + "Name: count, dtype: int64\n" ] } ], @@ -875,7 +821,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 12, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -899,7 +845,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 13, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -940,16 +886,7 @@ " mean concave points\n", " mean symmetry\n", " mean fractal dimension\n", - " radius error\n", - " texture error\n", - " perimeter error\n", - " area error\n", - " smoothness error\n", - " compactness error\n", - " concavity error\n", - " concave points error\n", - " symmetry error\n", - " fractal dimension error\n", + " ...\n", " worst radius\n", " worst texture\n", " worst perimeter\n", @@ -984,15 +921,6 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", " \n", " \n", " \n", @@ -1008,16 +936,7 @@ " 0.087990\n", " 0.192909\n", " 0.062680\n", - " 0.609083\n", - " 1.210915\n", - " 4.323929\n", - " 72.672406\n", - " 0.006780\n", - " 0.032281\n", - " 0.041824\n", - " 0.015060\n", - " 0.020472\n", - " 0.004062\n", + " ...\n", " 21.134811\n", " 29.318208\n", " 141.370330\n", @@ -1041,16 +960,7 @@ " 0.025717\n", " 0.174186\n", " 0.062867\n", - " 0.284082\n", - " 1.220380\n", - " 2.000321\n", - " 21.135148\n", - " 0.007196\n", - " 0.021438\n", - " 0.025997\n", - " 0.009858\n", - " 0.020584\n", - " 0.003636\n", + " ...\n", " 13.379801\n", " 23.515070\n", " 87.005938\n", @@ -1064,18 +974,44 @@ " \n", " \n", "\n", + "

2 rows × 30 columns

\n", "" ], "text/plain": [ - " mean radius mean texture ... worst symmetry worst fractal dimension\n", - "class ... \n", - "0 17.462830 21.604906 ... 0.323468 0.091530\n", - "1 12.146524 17.914762 ... 0.270246 0.079442\n", + " mean radius mean texture mean perimeter mean area mean smoothness \\\n", + "class \n", + "0 17.462830 21.604906 115.365377 978.376415 0.102898 \n", + "1 12.146524 17.914762 78.075406 462.790196 0.092478 \n", + "\n", + " mean compactness mean concavity mean concave points mean symmetry \\\n", + "class \n", + "0 0.145188 0.160775 0.087990 0.192909 \n", + "1 0.080085 0.046058 0.025717 0.174186 \n", + "\n", + " mean fractal dimension ... worst radius worst texture \\\n", + "class ... \n", + "0 0.062680 ... 21.134811 29.318208 \n", + "1 0.062867 ... 13.379801 23.515070 \n", + "\n", + " worst perimeter worst area worst smoothness worst compactness \\\n", + "class \n", + "0 141.370330 1422.286321 0.144845 0.374824 \n", + "1 87.005938 558.899440 0.124959 0.182673 \n", + "\n", + " worst concavity worst concave points worst symmetry \\\n", + "class \n", + "0 0.450606 0.182237 0.323468 \n", + "1 0.166238 0.074444 0.270246 \n", + "\n", + " worst fractal dimension \n", + "class \n", + "0 0.091530 \n", + "1 0.079442 \n", "\n", "[2 rows x 30 columns]" ] }, - "execution_count": 15, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -1097,7 +1033,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 14, "metadata": { "id": "RDCD-3t2XmNG" }, @@ -1109,7 +1045,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 15, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1133,7 +1069,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 16, "metadata": { "id": "TfXjgKecX2aR" }, @@ -1145,7 +1081,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 17, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1169,7 +1105,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 18, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1183,7 +1119,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "0.6274165202108963 0.630859375 0.5964912280701754\n" + "0.6274165202108963 0.623046875 0.6666666666666666\n" ] } ], @@ -1193,7 +1129,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 19, "metadata": { "id": "_jFZwc9OX-Av" }, @@ -1205,7 +1141,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 20, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1229,7 +1165,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 21, "metadata": { "id": "6hnHZaOWYNb1" }, @@ -1241,7 +1177,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 22, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1265,7 +1201,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 23, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1304,7 +1240,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 24, "metadata": { "id": "A63VGGb9YVRp" }, @@ -1316,7 +1252,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 25, "metadata": { "id": "dNLK1uKmYZEM" }, @@ -1327,7 +1263,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 26, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1341,27 +1277,430 @@ "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/lib/python3.7/dist-packages/sklearn/linear_model/_logistic.py:940: ConvergenceWarning: lbfgs failed to converge (status=1):\n", + "d:\\Users\\PC\\AppData\\Local\\Programs\\Python\\Python310\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:469: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", - " extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG)\n" + " n_iter_i = _check_optimize_result(\n" ] }, { "data": { + "text/html": [ + "
LogisticRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], "text/plain": [ - "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n", - " intercept_scaling=1, l1_ratio=None, max_iter=100,\n", - " multi_class='auto', n_jobs=None, penalty='l2',\n", - " random_state=None, solver='lbfgs', tol=0.0001, verbose=0,\n", - " warm_start=False)" + "LogisticRegression()" ] }, - "execution_count": 28, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } @@ -1382,7 +1721,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 27, "metadata": { "id": "x4Cw-CwoYlzE" }, @@ -1394,7 +1733,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 28, "metadata": { "id": "J7iCwz2QYrN_" }, @@ -1406,7 +1745,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 29, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1430,7 +1769,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 30, "metadata": { "id": "OzBEFHyyZc0X" }, @@ -1443,7 +1782,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 31, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1476,7 +1815,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 32, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1503,7 +1842,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 33, "metadata": { "id": "_0QS56eSZ6xR" }, @@ -1515,7 +1854,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 34, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1541,7 +1880,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 35, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -1579,7 +1918,15 @@ "name": "python3" }, "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", "version": "3.10.3" }, "vscode": { diff --git a/breastcancerdetection.py b/breastcancerdetection.py deleted file mode 100644 index a92950c..0000000 --- a/breastcancerdetection.py +++ /dev/null @@ -1,104 +0,0 @@ -# Importing libraries -from sklearn import datasets -import pandas as pd -from sklearn.model_selection import train_test_split -from sklearn.linear_model import LogisticRegression -from sklearn.metrics import accuracy_score -import numpy as np - -# Fetching the dataset -breast_cancer = datasets.load_breast_cancer() -# print(breast_cancer) # printing the dataset to check whether is has been loaded or not. - -X = breast_cancer.data -Y = breast_cancer.target - -# Printing the Data and Target of the Dataset -# print(X) -# print(Y) - -# printing the shape of X and Y or the instances of X and Y -# print(X.shape, Y.shape) - -"""Import data to the Pandas Data Frame""" -data = pd.DataFrame(breast_cancer.data, columns = breast_cancer.feature_names) - -data['class'] = breast_cancer.target - -# print(data.head()) # print some data samples - -# print(data.describe()) # prints statistical data - -# print(data['class'].value_counts()) # prints the number of cases for malignant and benign - -# print(breast_cancer.target_names) # Displays the target names, i.e., Malignant and Benign - -# data.groupby('class').mean() # printing the mean feature values for each of the targets -#0 = malignant -#1 = benign - -"""Train and Test Data Split""" - -# X_train, X_test, Y_train, Y_test = train_test_split(X,Y) - -# print(Y.shape, Y_train.shape, Y_test.shape) -# After running this data we will see that we have 426 training data and 143 testing data(we don't need so many) - -# X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.1) -#test_size --> to specify the percentage of test data needed - -# print(Y.shape, Y_train.shape, Y_test.shape) -# Now there are 512 training data and 57 testing data. - -# print(Y.mean(), Y_train.mean(), Y_test.mean()) -# This output lets us know the distribution of the data of the mean and we found out that the distribution of the test data is a bit lo. - -X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.1, stratify=Y) -#stratify --> for correct distribution of data as of the original data - -# print(Y.mean(), Y_train.mean(), Y_test.mean()) -#Now there is equal distribution between the training data of Malignant and Benign - -X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.1, stratify=Y, random_state=1) -#random_state --> specific split of data, each value of random_state splits the data differently - -# print(X_train.mean(), X_test.mean(), X.mean()) - -# print(X_train) - -"""**Logistic Regression**""" -classifier = LogisticRegression() #loading the logistic regression model to the variable "classifier" - -# training the model on training data -classifier.fit(X_train, Y_train) - -"""**Evaluation of the Model**""" -prediction_on_training_data = classifier.predict(X_train) -accuracy_on_training_data = accuracy_score(Y_train, prediction_on_training_data) - -print('Accuracy on training data :', accuracy_on_training_data) - -#prediction on test_data -prediction_on_test_data = classifier.predict(X_test) -accuracy_on_test_data = accuracy_score(Y_test, prediction_on_test_data) - -print('Accuracy on test data :', accuracy_on_test_data) - -"""Detecting whether the Patient has Breast Cancer in Benign or Malignant Stage""" - -input_data = (17.99,10.38,122.8,1001,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,0.095,1.9053,8.589,153.4,0.006399,0.04904,0.05373,0.01587,0.03003,0.006193,25.38,17.33,184.6,2019,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189) -#change the input_data to numpy_array to make prediction -input_data_as_numpy_array = np.array(input_data) -print(input_data) - -#reshape the array we are predicting the output for one instance -input_data_reshaped = input_data_as_numpy_array.reshape(1, -1) - -#prediction -prediction = classifier.predict(input_data_reshaped) -print(prediction) #returns a list with elements 0 [if Malignant] and 1 [if Benign] - -if (prediction[0]==0): - print("The Breast Cancer is at Malignant Stage.") -else: - print("The Breast Cancer is at Benign Stage.") \ No newline at end of file