From 13a8a7ddcd1c8129370559c0418d1d36276d7d71 Mon Sep 17 00:00:00 2001 From: Arindam Sahoo <88739246+arindam-sahoo@users.noreply.github.com> Date: Mon, 24 Jun 2024 14:06:38 +0530 Subject: [PATCH] Add files via upload --- breast-cancer-detection-using-various-algorithms.ipynb | 1 + 1 file changed, 1 insertion(+) create mode 100644 breast-cancer-detection-using-various-algorithms.ipynb diff --git a/breast-cancer-detection-using-various-algorithms.ipynb b/breast-cancer-detection-using-various-algorithms.ipynb new file mode 100644 index 0000000..a0b0db9 --- /dev/null +++ b/breast-cancer-detection-using-various-algorithms.ipynb @@ -0,0 +1 @@ +{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.10.13","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"kaggle":{"accelerator":"none","dataSources":[{"sourceId":2984728,"sourceType":"datasetVersion","datasetId":1829286}],"dockerImageVersionId":30732,"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":false}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"# This Python 3 environment comes with many helpful analytics libraries installed\n# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python\n# For example, here's several helpful packages to load\n\nimport numpy as np # linear algebra\nimport pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\nimport sklearn.datasets","metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","execution":{"iopub.status.busy":"2024-06-24T08:32:29.769529Z","iopub.execute_input":"2024-06-24T08:32:29.770856Z","iopub.status.idle":"2024-06-24T08:32:29.777355Z","shell.execute_reply.started":"2024-06-24T08:32:29.770763Z","shell.execute_reply":"2024-06-24T08:32:29.776088Z"},"trusted":true},"execution_count":84,"outputs":[]},{"cell_type":"markdown","source":"**Loading the dataset in a Pandas Dataframe**","metadata":{}},{"cell_type":"code","source":"breast_cancer = sklearn.datasets.load_breast_cancer()","metadata":{"execution":{"iopub.status.busy":"2024-06-24T08:32:29.779501Z","iopub.execute_input":"2024-06-24T08:32:29.779951Z","iopub.status.idle":"2024-06-24T08:32:29.801645Z","shell.execute_reply.started":"2024-06-24T08:32:29.779911Z","shell.execute_reply":"2024-06-24T08:32:29.800338Z"},"trusted":true},"execution_count":85,"outputs":[]},{"cell_type":"code","source":"breast_cancer","metadata":{"execution":{"iopub.status.busy":"2024-06-24T08:32:29.803388Z","iopub.execute_input":"2024-06-24T08:32:29.804481Z","iopub.status.idle":"2024-06-24T08:32:29.815681Z","shell.execute_reply.started":"2024-06-24T08:32:29.804443Z","shell.execute_reply":"2024-06-24T08:32:29.814392Z"},"trusted":true},"execution_count":86,"outputs":[{"execution_count":86,"output_type":"execute_result","data":{"text/plain":"{'data': array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,\n 1.189e-01],\n [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,\n 8.902e-02],\n [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,\n 8.758e-02],\n ...,\n [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,\n 7.820e-02],\n [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,\n 1.240e-01],\n [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,\n 7.039e-02]]),\n 'target': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,\n 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,\n 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,\n 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,\n 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,\n 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,\n 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,\n 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,\n 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0,\n 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0,\n 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1,\n 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1,\n 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1,\n 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0,\n 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0,\n 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0,\n 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1,\n 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0,\n 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1,\n 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,\n 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,\n 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1,\n 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,\n 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1]),\n 'frame': None,\n 'target_names': array(['malignant', 'benign'], dtype='\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
mean radiusmean texturemean perimetermean areamean smoothnessmean compactnessmean concavitymean concave pointsmean symmetrymean fractal dimension...worst textureworst perimeterworst areaworst smoothnessworst compactnessworst concavityworst concave pointsworst symmetryworst fractal dimensionclass
017.9910.38122.801001.00.118400.277600.300100.147100.24190.07871...17.33184.602019.00.162200.665600.71190.26540.46010.118900
120.5717.77132.901326.00.084740.078640.086900.070170.18120.05667...23.41158.801956.00.123800.186600.24160.18600.27500.089020
219.6921.25130.001203.00.109600.159900.197400.127900.20690.05999...25.53152.501709.00.144400.424500.45040.24300.36130.087580
311.4220.3877.58386.10.142500.283900.241400.105200.25970.09744...26.5098.87567.70.209800.866300.68690.25750.66380.173000
420.2914.34135.101297.00.100300.132800.198000.104300.18090.05883...16.67152.201575.00.137400.205000.40000.16250.23640.076780
..................................................................
56421.5622.39142.001479.00.111000.115900.243900.138900.17260.05623...26.40166.102027.00.141000.211300.41070.22160.20600.071150
56520.1328.25131.201261.00.097800.103400.144000.097910.17520.05533...38.25155.001731.00.116600.192200.32150.16280.25720.066370
56616.6028.08108.30858.10.084550.102300.092510.053020.15900.05648...34.12126.701124.00.113900.309400.34030.14180.22180.078200
56720.6029.33140.101265.00.117800.277000.351400.152000.23970.07016...39.42184.601821.00.165000.868100.93870.26500.40870.124000
5687.7624.5447.92181.00.052630.043620.000000.000000.15870.05884...30.3759.16268.60.089960.064440.00000.00000.28710.070391
\n

569 rows × 31 columns

\n"},"metadata":{}}]},{"cell_type":"code","source":"print(breast_cancer.target_names)","metadata":{"execution":{"iopub.status.busy":"2024-06-24T08:32:29.934362Z","iopub.execute_input":"2024-06-24T08:32:29.934862Z","iopub.status.idle":"2024-06-24T08:32:29.943888Z","shell.execute_reply.started":"2024-06-24T08:32:29.934819Z","shell.execute_reply":"2024-06-24T08:32:29.942488Z"},"trusted":true},"execution_count":94,"outputs":[{"name":"stdout","text":"['malignant' 'benign']\n","output_type":"stream"}]},{"cell_type":"code","source":"df.groupby('class').mean()\n#0 = malignant\n#1 = benign","metadata":{"execution":{"iopub.status.busy":"2024-06-24T08:32:29.945523Z","iopub.execute_input":"2024-06-24T08:32:29.946053Z","iopub.status.idle":"2024-06-24T08:32:29.977139Z","shell.execute_reply.started":"2024-06-24T08:32:29.946009Z","shell.execute_reply":"2024-06-24T08:32:29.975901Z"},"trusted":true},"execution_count":95,"outputs":[{"execution_count":95,"output_type":"execute_result","data":{"text/plain":" mean radius mean texture mean perimeter mean area mean smoothness \\\nclass \n0 17.462830 21.604906 115.365377 978.376415 0.102898 \n1 12.146524 17.914762 78.075406 462.790196 0.092478 \n\n mean compactness mean concavity mean concave points mean symmetry \\\nclass \n0 0.145188 0.160775 0.087990 0.192909 \n1 0.080085 0.046058 0.025717 0.174186 \n\n mean fractal dimension ... worst radius worst texture \\\nclass ... \n0 0.062680 ... 21.134811 29.318208 \n1 0.062867 ... 13.379801 23.515070 \n\n worst perimeter worst area worst smoothness worst compactness \\\nclass \n0 141.370330 1422.286321 0.144845 0.374824 \n1 87.005938 558.899440 0.124959 0.182673 \n\n worst concavity worst concave points worst symmetry \\\nclass \n0 0.450606 0.182237 0.323468 \n1 0.166238 0.074444 0.270246 \n\n worst fractal dimension \nclass \n0 0.091530 \n1 0.079442 \n\n[2 rows x 30 columns]","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
mean radiusmean texturemean perimetermean areamean smoothnessmean compactnessmean concavitymean concave pointsmean symmetrymean fractal dimension...worst radiusworst textureworst perimeterworst areaworst smoothnessworst compactnessworst concavityworst concave pointsworst symmetryworst fractal dimension
class
017.46283021.604906115.365377978.3764150.1028980.1451880.1607750.0879900.1929090.062680...21.13481129.318208141.3703301422.2863210.1448450.3748240.4506060.1822370.3234680.091530
112.14652417.91476278.075406462.7901960.0924780.0800850.0460580.0257170.1741860.062867...13.37980123.51507087.005938558.8994400.1249590.1826730.1662380.0744440.2702460.079442
\n

2 rows × 30 columns

\n
"},"metadata":{}}]},{"cell_type":"markdown","source":"**Train and Test Data Split**","metadata":{}},{"cell_type":"code","source":"from sklearn.model_selection import train_test_split\nX_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=42)\n#stratify --> for correct distribution of data as of the original data\n#random_state --> specific split of data, each value of random_state splits the data differently","metadata":{"execution":{"iopub.status.busy":"2024-06-24T08:32:29.978542Z","iopub.execute_input":"2024-06-24T08:32:29.978958Z","iopub.status.idle":"2024-06-24T08:32:29.989465Z","shell.execute_reply.started":"2024-06-24T08:32:29.978919Z","shell.execute_reply":"2024-06-24T08:32:29.988222Z"},"trusted":true},"execution_count":96,"outputs":[]},{"cell_type":"code","source":"print(Y.shape, Y_train.shape, Y_test.shape)","metadata":{"execution":{"iopub.status.busy":"2024-06-24T08:32:29.995241Z","iopub.execute_input":"2024-06-24T08:32:29.996307Z","iopub.status.idle":"2024-06-24T08:32:30.002447Z","shell.execute_reply.started":"2024-06-24T08:32:29.996255Z","shell.execute_reply":"2024-06-24T08:32:30.001250Z"},"trusted":true},"execution_count":97,"outputs":[{"name":"stdout","text":"(569,) (455,) (114,)\n","output_type":"stream"}]},{"cell_type":"code","source":"print(X_train.mean(), X_test.mean(), X.mean())","metadata":{"execution":{"iopub.status.busy":"2024-06-24T08:32:30.003703Z","iopub.execute_input":"2024-06-24T08:32:30.004058Z","iopub.status.idle":"2024-06-24T08:32:30.014110Z","shell.execute_reply.started":"2024-06-24T08:32:30.004029Z","shell.execute_reply":"2024-06-24T08:32:30.012490Z"},"trusted":true},"execution_count":98,"outputs":[{"name":"stdout","text":"61.21359590991941 64.59323844011696 61.890712339519624\n","output_type":"stream"}]},{"cell_type":"markdown","source":"## **Logistic Regression**","metadata":{}},{"cell_type":"code","source":"#import logistic regression from sklearn\nfrom sklearn.linear_model import LogisticRegression","metadata":{"execution":{"iopub.status.busy":"2024-06-24T08:32:30.015373Z","iopub.execute_input":"2024-06-24T08:32:30.015776Z","iopub.status.idle":"2024-06-24T08:32:30.029406Z","shell.execute_reply.started":"2024-06-24T08:32:30.015747Z","shell.execute_reply":"2024-06-24T08:32:30.027816Z"},"trusted":true},"execution_count":99,"outputs":[]},{"cell_type":"code","source":"classifier = LogisticRegression()","metadata":{"execution":{"iopub.status.busy":"2024-06-24T08:32:30.030892Z","iopub.execute_input":"2024-06-24T08:32:30.033226Z","iopub.status.idle":"2024-06-24T08:32:30.041930Z","shell.execute_reply.started":"2024-06-24T08:32:30.033165Z","shell.execute_reply":"2024-06-24T08:32:30.040010Z"},"trusted":true},"execution_count":100,"outputs":[]},{"cell_type":"code","source":"#training the model on training data\nclassifier.fit(X_train, Y_train)","metadata":{"execution":{"iopub.status.busy":"2024-06-24T08:32:30.044236Z","iopub.execute_input":"2024-06-24T08:32:30.044799Z","iopub.status.idle":"2024-06-24T08:32:30.121502Z","shell.execute_reply.started":"2024-06-24T08:32:30.044758Z","shell.execute_reply":"2024-06-24T08:32:30.120260Z"},"trusted":true},"execution_count":101,"outputs":[{"name":"stderr","text":"/opt/conda/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):\nSTOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n\nIncrease the number of iterations (max_iter) or scale the data as shown in:\n https://scikit-learn.org/stable/modules/preprocessing.html\nPlease also refer to the documentation for alternative solver options:\n https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n n_iter_i = _check_optimize_result(\n","output_type":"stream"},{"execution_count":101,"output_type":"execute_result","data":{"text/plain":"LogisticRegression()","text/html":"
LogisticRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
"},"metadata":{}}]},{"cell_type":"code","source":"#import accuracy_score\nfrom sklearn.metrics import accuracy_score, confusion_matrix, classification_report","metadata":{"execution":{"iopub.status.busy":"2024-06-24T08:32:30.123112Z","iopub.execute_input":"2024-06-24T08:32:30.124346Z","iopub.status.idle":"2024-06-24T08:32:30.130323Z","shell.execute_reply.started":"2024-06-24T08:32:30.124304Z","shell.execute_reply":"2024-06-24T08:32:30.128660Z"},"trusted":true},"execution_count":102,"outputs":[]},{"cell_type":"code","source":"#prediction on test_data\nprediction = classifier.predict(X_test)\naccuracy = accuracy_score(Y_test, prediction)","metadata":{"execution":{"iopub.status.busy":"2024-06-24T08:32:30.131984Z","iopub.execute_input":"2024-06-24T08:32:30.133174Z","iopub.status.idle":"2024-06-24T08:32:30.143408Z","shell.execute_reply.started":"2024-06-24T08:32:30.133120Z","shell.execute_reply":"2024-06-24T08:32:30.141487Z"},"trusted":true},"execution_count":103,"outputs":[]},{"cell_type":"code","source":"# Creating accuracy dictionary\naccuracy_dict = {}\naccuracy_dict['Logistic Regression'] = round((accuracy * 100),2)","metadata":{"execution":{"iopub.status.busy":"2024-06-24T08:32:30.145736Z","iopub.execute_input":"2024-06-24T08:32:30.147438Z","iopub.status.idle":"2024-06-24T08:32:30.153917Z","shell.execute_reply.started":"2024-06-24T08:32:30.147388Z","shell.execute_reply":"2024-06-24T08:32:30.152543Z"},"trusted":true},"execution_count":104,"outputs":[]},{"cell_type":"code","source":"print(f'Accuracy: {accuracy}')","metadata":{"execution":{"iopub.status.busy":"2024-06-24T08:32:30.156317Z","iopub.execute_input":"2024-06-24T08:32:30.158114Z","iopub.status.idle":"2024-06-24T08:32:30.166258Z","shell.execute_reply.started":"2024-06-24T08:32:30.158065Z","shell.execute_reply":"2024-06-24T08:32:30.164929Z"},"trusted":true},"execution_count":105,"outputs":[{"name":"stdout","text":"Accuracy: 0.9473684210526315\n","output_type":"stream"}]},{"cell_type":"code","source":"accuracy_dict","metadata":{"execution":{"iopub.status.busy":"2024-06-24T08:32:30.168545Z","iopub.execute_input":"2024-06-24T08:32:30.171022Z","iopub.status.idle":"2024-06-24T08:32:30.179325Z","shell.execute_reply.started":"2024-06-24T08:32:30.170973Z","shell.execute_reply":"2024-06-24T08:32:30.177905Z"},"trusted":true},"execution_count":106,"outputs":[{"execution_count":106,"output_type":"execute_result","data":{"text/plain":"{'Logistic Regression': 94.74}"},"metadata":{}}]},{"cell_type":"code","source":"# Confusion Matrix\nconf_matrix = confusion_matrix(Y_test, prediction)\nprint(f'Confusion Matrix:\\n{conf_matrix}')","metadata":{"execution":{"iopub.status.busy":"2024-06-24T08:32:30.181508Z","iopub.execute_input":"2024-06-24T08:32:30.182785Z","iopub.status.idle":"2024-06-24T08:32:30.193209Z","shell.execute_reply.started":"2024-06-24T08:32:30.182730Z","shell.execute_reply":"2024-06-24T08:32:30.191665Z"},"trusted":true},"execution_count":107,"outputs":[{"name":"stdout","text":"Confusion Matrix:\n[[38 4]\n [ 2 70]]\n","output_type":"stream"}]},{"cell_type":"code","source":"# Classification Report\nclass_report = classification_report(Y_test, prediction)\nprint(f'Classification Report:\\n{class_report}')","metadata":{"execution":{"iopub.status.busy":"2024-06-24T08:32:30.195311Z","iopub.execute_input":"2024-06-24T08:32:30.196812Z","iopub.status.idle":"2024-06-24T08:32:30.232652Z","shell.execute_reply.started":"2024-06-24T08:32:30.196764Z","shell.execute_reply":"2024-06-24T08:32:30.231131Z"},"trusted":true},"execution_count":108,"outputs":[{"name":"stdout","text":"Classification Report:\n precision recall f1-score support\n\n 0 0.95 0.90 0.93 42\n 1 0.95 0.97 0.96 72\n\n accuracy 0.95 114\n macro avg 0.95 0.94 0.94 114\nweighted avg 0.95 0.95 0.95 114\n\n","output_type":"stream"}]},{"cell_type":"markdown","source":"## **Support Vector Machine**","metadata":{}},{"cell_type":"code","source":"from sklearn.svm import SVC","metadata":{"execution":{"iopub.status.busy":"2024-06-24T08:32:30.234861Z","iopub.execute_input":"2024-06-24T08:32:30.236254Z","iopub.status.idle":"2024-06-24T08:32:30.242121Z","shell.execute_reply.started":"2024-06-24T08:32:30.236203Z","shell.execute_reply":"2024-06-24T08:32:30.240920Z"},"trusted":true},"execution_count":109,"outputs":[]},{"cell_type":"code","source":"# Initialize the model\nmodel = SVC(kernel='linear')","metadata":{"execution":{"iopub.status.busy":"2024-06-24T08:32:30.244166Z","iopub.execute_input":"2024-06-24T08:32:30.245508Z","iopub.status.idle":"2024-06-24T08:32:30.253306Z","shell.execute_reply.started":"2024-06-24T08:32:30.245460Z","shell.execute_reply":"2024-06-24T08:32:30.252015Z"},"trusted":true},"execution_count":110,"outputs":[]},{"cell_type":"code","source":"# Train the model\nmodel.fit(X_train, Y_train)","metadata":{"execution":{"iopub.status.busy":"2024-06-24T08:32:30.255111Z","iopub.execute_input":"2024-06-24T08:32:30.256144Z","iopub.status.idle":"2024-06-24T08:32:31.882133Z","shell.execute_reply.started":"2024-06-24T08:32:30.256095Z","shell.execute_reply":"2024-06-24T08:32:31.880904Z"},"trusted":true},"execution_count":111,"outputs":[{"execution_count":111,"output_type":"execute_result","data":{"text/plain":"SVC(kernel='linear')","text/html":"
SVC(kernel='linear')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
"},"metadata":{}}]},{"cell_type":"code","source":"# Predictions\nprediction = model.predict(X_test)","metadata":{"execution":{"iopub.status.busy":"2024-06-24T08:32:31.883960Z","iopub.execute_input":"2024-06-24T08:32:31.884311Z","iopub.status.idle":"2024-06-24T08:32:31.890641Z","shell.execute_reply.started":"2024-06-24T08:32:31.884284Z","shell.execute_reply":"2024-06-24T08:32:31.888687Z"},"trusted":true},"execution_count":112,"outputs":[]},{"cell_type":"code","source":"# Accuracy\naccuracy = accuracy_score(Y_test, prediction)\nprint(f'Accuracy: {accuracy}')","metadata":{"execution":{"iopub.status.busy":"2024-06-24T08:32:31.893265Z","iopub.execute_input":"2024-06-24T08:32:31.893850Z","iopub.status.idle":"2024-06-24T08:32:31.905892Z","shell.execute_reply.started":"2024-06-24T08:32:31.893805Z","shell.execute_reply":"2024-06-24T08:32:31.904212Z"},"trusted":true},"execution_count":113,"outputs":[{"name":"stdout","text":"Accuracy: 0.956140350877193\n","output_type":"stream"}]},{"cell_type":"code","source":"accuracy_dict['SVM'] = round(accuracy * 100, 2)","metadata":{"execution":{"iopub.status.busy":"2024-06-24T08:32:31.907944Z","iopub.execute_input":"2024-06-24T08:32:31.908346Z","iopub.status.idle":"2024-06-24T08:32:31.916944Z","shell.execute_reply.started":"2024-06-24T08:32:31.908314Z","shell.execute_reply":"2024-06-24T08:32:31.915821Z"},"trusted":true},"execution_count":114,"outputs":[]},{"cell_type":"code","source":"# Confusion Matrix\nconf_matrix = confusion_matrix(Y_test, prediction)\nprint(f'Confusion Matrix:\\n{conf_matrix}')","metadata":{"execution":{"iopub.status.busy":"2024-06-24T08:32:31.918278Z","iopub.execute_input":"2024-06-24T08:32:31.918675Z","iopub.status.idle":"2024-06-24T08:32:31.932889Z","shell.execute_reply.started":"2024-06-24T08:32:31.918633Z","shell.execute_reply":"2024-06-24T08:32:31.931595Z"},"trusted":true},"execution_count":115,"outputs":[{"name":"stdout","text":"Confusion Matrix:\n[[38 4]\n [ 1 71]]\n","output_type":"stream"}]},{"cell_type":"code","source":"# Classification Report\nclass_report = classification_report(Y_test, prediction)\nprint(f'Classification Report:\\n{class_report}')","metadata":{"execution":{"iopub.status.busy":"2024-06-24T08:33:08.046815Z","iopub.execute_input":"2024-06-24T08:33:08.047230Z","iopub.status.idle":"2024-06-24T08:33:08.064365Z","shell.execute_reply.started":"2024-06-24T08:33:08.047198Z","shell.execute_reply":"2024-06-24T08:33:08.062874Z"},"trusted":true},"execution_count":116,"outputs":[{"name":"stdout","text":"Classification Report:\n precision recall f1-score support\n\n 0 0.97 0.90 0.94 42\n 1 0.95 0.99 0.97 72\n\n accuracy 0.96 114\n macro avg 0.96 0.95 0.95 114\nweighted avg 0.96 0.96 0.96 114\n\n","output_type":"stream"}]},{"cell_type":"markdown","source":"## **k-Nearest Neighbors**","metadata":{}},{"cell_type":"code","source":"from sklearn.neighbors import KNeighborsClassifier","metadata":{"execution":{"iopub.status.busy":"2024-06-24T08:33:09.873558Z","iopub.execute_input":"2024-06-24T08:33:09.874403Z","iopub.status.idle":"2024-06-24T08:33:09.880385Z","shell.execute_reply.started":"2024-06-24T08:33:09.874363Z","shell.execute_reply":"2024-06-24T08:33:09.878342Z"},"trusted":true},"execution_count":117,"outputs":[]},{"cell_type":"code","source":"# Initialize the model\nk = 99 # Choosing k=99 for this example\nmodel = KNeighborsClassifier(n_neighbors=k)","metadata":{"execution":{"iopub.status.busy":"2024-06-24T08:33:12.611797Z","iopub.execute_input":"2024-06-24T08:33:12.612317Z","iopub.status.idle":"2024-06-24T08:33:12.618590Z","shell.execute_reply.started":"2024-06-24T08:33:12.612276Z","shell.execute_reply":"2024-06-24T08:33:12.617325Z"},"trusted":true},"execution_count":118,"outputs":[]},{"cell_type":"code","source":"# Train the model\nmodel.fit(X_train, Y_train)","metadata":{"execution":{"iopub.status.busy":"2024-06-24T08:33:13.089043Z","iopub.execute_input":"2024-06-24T08:33:13.089446Z","iopub.status.idle":"2024-06-24T08:33:13.099429Z","shell.execute_reply.started":"2024-06-24T08:33:13.089415Z","shell.execute_reply":"2024-06-24T08:33:13.097972Z"},"trusted":true},"execution_count":119,"outputs":[{"execution_count":119,"output_type":"execute_result","data":{"text/plain":"KNeighborsClassifier(n_neighbors=99)","text/html":"
KNeighborsClassifier(n_neighbors=99)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
"},"metadata":{}}]},{"cell_type":"code","source":"# Predictions\nprediction = model.predict(X_test)","metadata":{"execution":{"iopub.status.busy":"2024-06-24T08:33:13.482205Z","iopub.execute_input":"2024-06-24T08:33:13.482625Z","iopub.status.idle":"2024-06-24T08:33:13.551838Z","shell.execute_reply.started":"2024-06-24T08:33:13.482587Z","shell.execute_reply":"2024-06-24T08:33:13.550653Z"},"trusted":true},"execution_count":120,"outputs":[]},{"cell_type":"code","source":"# Accuracy\naccuracy = accuracy_score(Y_test, prediction)\nprint(f'Accuracy: {accuracy}')","metadata":{"execution":{"iopub.status.busy":"2024-06-24T08:33:14.009733Z","iopub.execute_input":"2024-06-24T08:33:14.010151Z","iopub.status.idle":"2024-06-24T08:33:14.017984Z","shell.execute_reply.started":"2024-06-24T08:33:14.010119Z","shell.execute_reply":"2024-06-24T08:33:14.016846Z"},"trusted":true},"execution_count":121,"outputs":[{"name":"stdout","text":"Accuracy: 0.9210526315789473\n","output_type":"stream"}]},{"cell_type":"code","source":"accuracy_dict['kNN-99'] = round(accuracy * 100, 2)","metadata":{"execution":{"iopub.status.busy":"2024-06-24T08:33:14.475762Z","iopub.execute_input":"2024-06-24T08:33:14.476193Z","iopub.status.idle":"2024-06-24T08:33:14.481485Z","shell.execute_reply.started":"2024-06-24T08:33:14.476161Z","shell.execute_reply":"2024-06-24T08:33:14.480361Z"},"trusted":true},"execution_count":122,"outputs":[]},{"cell_type":"code","source":"# Confusion Matrix\nconf_matrix = confusion_matrix(Y_test, prediction)\nprint(f'Confusion Matrix:\\n{conf_matrix}')","metadata":{"execution":{"iopub.status.busy":"2024-06-24T08:33:14.865244Z","iopub.execute_input":"2024-06-24T08:33:14.865647Z","iopub.status.idle":"2024-06-24T08:33:14.873803Z","shell.execute_reply.started":"2024-06-24T08:33:14.865617Z","shell.execute_reply":"2024-06-24T08:33:14.872454Z"},"trusted":true},"execution_count":123,"outputs":[{"name":"stdout","text":"Confusion Matrix:\n[[35 7]\n [ 2 70]]\n","output_type":"stream"}]},{"cell_type":"code","source":"# Classification Report\nclass_report = classification_report(Y_test, prediction)\nprint(f'Classification Report:\\n{class_report}')","metadata":{"execution":{"iopub.status.busy":"2024-06-24T08:33:15.271034Z","iopub.execute_input":"2024-06-24T08:33:15.271897Z","iopub.status.idle":"2024-06-24T08:33:15.292649Z","shell.execute_reply.started":"2024-06-24T08:33:15.271860Z","shell.execute_reply":"2024-06-24T08:33:15.291165Z"},"trusted":true},"execution_count":124,"outputs":[{"name":"stdout","text":"Classification Report:\n precision recall f1-score support\n\n 0 0.95 0.83 0.89 42\n 1 0.91 0.97 0.94 72\n\n accuracy 0.92 114\n macro avg 0.93 0.90 0.91 114\nweighted avg 0.92 0.92 0.92 114\n\n","output_type":"stream"}]},{"cell_type":"markdown","source":"## **Decision Tree**","metadata":{}},{"cell_type":"code","source":"from sklearn.tree import DecisionTreeClassifier","metadata":{"execution":{"iopub.status.busy":"2024-06-24T08:33:16.148428Z","iopub.execute_input":"2024-06-24T08:33:16.149489Z","iopub.status.idle":"2024-06-24T08:33:16.163615Z","shell.execute_reply.started":"2024-06-24T08:33:16.149445Z","shell.execute_reply":"2024-06-24T08:33:16.161901Z"},"trusted":true},"execution_count":125,"outputs":[]},{"cell_type":"code","source":"# Initialize the model\nmodel = DecisionTreeClassifier(random_state=42)","metadata":{"execution":{"iopub.status.busy":"2024-06-24T08:33:16.689011Z","iopub.execute_input":"2024-06-24T08:33:16.689430Z","iopub.status.idle":"2024-06-24T08:33:16.695022Z","shell.execute_reply.started":"2024-06-24T08:33:16.689402Z","shell.execute_reply":"2024-06-24T08:33:16.693604Z"},"trusted":true},"execution_count":126,"outputs":[]},{"cell_type":"code","source":"# Train the model\nmodel.fit(X_train, Y_train)","metadata":{"execution":{"iopub.status.busy":"2024-06-24T08:33:17.129040Z","iopub.execute_input":"2024-06-24T08:33:17.129457Z","iopub.status.idle":"2024-06-24T08:33:17.148679Z","shell.execute_reply.started":"2024-06-24T08:33:17.129426Z","shell.execute_reply":"2024-06-24T08:33:17.147286Z"},"trusted":true},"execution_count":127,"outputs":[{"execution_count":127,"output_type":"execute_result","data":{"text/plain":"DecisionTreeClassifier(random_state=42)","text/html":"
DecisionTreeClassifier(random_state=42)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
"},"metadata":{}}]},{"cell_type":"code","source":"# Predictions\nprediction = model.predict(X_test)","metadata":{"execution":{"iopub.status.busy":"2024-06-24T08:33:17.600465Z","iopub.execute_input":"2024-06-24T08:33:17.600874Z","iopub.status.idle":"2024-06-24T08:33:17.607374Z","shell.execute_reply.started":"2024-06-24T08:33:17.600846Z","shell.execute_reply":"2024-06-24T08:33:17.605501Z"},"trusted":true},"execution_count":128,"outputs":[]},{"cell_type":"code","source":"# Accuracy\naccuracy = accuracy_score(Y_test, prediction)\nprint(f'Accuracy: {accuracy}')","metadata":{"execution":{"iopub.status.busy":"2024-06-24T08:33:18.071411Z","iopub.execute_input":"2024-06-24T08:33:18.071827Z","iopub.status.idle":"2024-06-24T08:33:18.079924Z","shell.execute_reply.started":"2024-06-24T08:33:18.071799Z","shell.execute_reply":"2024-06-24T08:33:18.078179Z"},"trusted":true},"execution_count":129,"outputs":[{"name":"stdout","text":"Accuracy: 0.9122807017543859\n","output_type":"stream"}]},{"cell_type":"code","source":"accuracy_dict['DT-42'] = round(accuracy * 100, 2)","metadata":{"execution":{"iopub.status.busy":"2024-06-24T08:33:18.433722Z","iopub.execute_input":"2024-06-24T08:33:18.434130Z","iopub.status.idle":"2024-06-24T08:33:18.439981Z","shell.execute_reply.started":"2024-06-24T08:33:18.434099Z","shell.execute_reply":"2024-06-24T08:33:18.438055Z"},"trusted":true},"execution_count":130,"outputs":[]},{"cell_type":"code","source":"# Confusion Matrix\nconf_matrix = confusion_matrix(Y_test, prediction)\nprint(f'Confusion Matrix:\\n{conf_matrix}')","metadata":{"execution":{"iopub.status.busy":"2024-06-24T08:33:18.870802Z","iopub.execute_input":"2024-06-24T08:33:18.871247Z","iopub.status.idle":"2024-06-24T08:33:18.879073Z","shell.execute_reply.started":"2024-06-24T08:33:18.871215Z","shell.execute_reply":"2024-06-24T08:33:18.877958Z"},"trusted":true},"execution_count":131,"outputs":[{"name":"stdout","text":"Confusion Matrix:\n[[39 3]\n [ 7 65]]\n","output_type":"stream"}]},{"cell_type":"code","source":"# Classification Report\nclass_report = classification_report(Y_test, prediction)\nprint(f'Classification Report:\\n{class_report}')","metadata":{"execution":{"iopub.status.busy":"2024-06-24T08:33:19.488733Z","iopub.execute_input":"2024-06-24T08:33:19.489127Z","iopub.status.idle":"2024-06-24T08:33:19.504035Z","shell.execute_reply.started":"2024-06-24T08:33:19.489097Z","shell.execute_reply":"2024-06-24T08:33:19.502981Z"},"trusted":true},"execution_count":132,"outputs":[{"name":"stdout","text":"Classification Report:\n precision recall f1-score support\n\n 0 0.85 0.93 0.89 42\n 1 0.96 0.90 0.93 72\n\n accuracy 0.91 114\n macro avg 0.90 0.92 0.91 114\nweighted avg 0.92 0.91 0.91 114\n\n","output_type":"stream"}]},{"cell_type":"markdown","source":"## **Random Forest**","metadata":{}},{"cell_type":"code","source":"from sklearn.ensemble import RandomForestClassifier","metadata":{"execution":{"iopub.status.busy":"2024-06-24T08:33:20.547002Z","iopub.execute_input":"2024-06-24T08:33:20.547409Z","iopub.status.idle":"2024-06-24T08:33:20.624951Z","shell.execute_reply.started":"2024-06-24T08:33:20.547378Z","shell.execute_reply":"2024-06-24T08:33:20.623193Z"},"trusted":true},"execution_count":133,"outputs":[]},{"cell_type":"code","source":"# Initialize the model\nmodel = RandomForestClassifier(random_state=42)","metadata":{"execution":{"iopub.status.busy":"2024-06-24T08:33:21.459257Z","iopub.execute_input":"2024-06-24T08:33:21.459698Z","iopub.status.idle":"2024-06-24T08:33:21.465460Z","shell.execute_reply.started":"2024-06-24T08:33:21.459668Z","shell.execute_reply":"2024-06-24T08:33:21.464186Z"},"trusted":true},"execution_count":134,"outputs":[]},{"cell_type":"code","source":"# Train the model\nmodel.fit(X_train, Y_train)","metadata":{"execution":{"iopub.status.busy":"2024-06-24T08:33:21.887675Z","iopub.execute_input":"2024-06-24T08:33:21.889290Z","iopub.status.idle":"2024-06-24T08:33:22.166937Z","shell.execute_reply.started":"2024-06-24T08:33:21.889231Z","shell.execute_reply":"2024-06-24T08:33:22.165397Z"},"trusted":true},"execution_count":135,"outputs":[{"execution_count":135,"output_type":"execute_result","data":{"text/plain":"RandomForestClassifier(random_state=42)","text/html":"
RandomForestClassifier(random_state=42)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
"},"metadata":{}}]},{"cell_type":"code","source":"# Predictions\nprediction = model.predict(X_test)","metadata":{"execution":{"iopub.status.busy":"2024-06-24T08:33:22.361001Z","iopub.execute_input":"2024-06-24T08:33:22.361390Z","iopub.status.idle":"2024-06-24T08:33:22.379099Z","shell.execute_reply.started":"2024-06-24T08:33:22.361360Z","shell.execute_reply":"2024-06-24T08:33:22.377896Z"},"trusted":true},"execution_count":136,"outputs":[]},{"cell_type":"code","source":"# Accuracy\naccuracy = accuracy_score(Y_test, prediction)\nprint(f'Accuracy: {accuracy}')","metadata":{"execution":{"iopub.status.busy":"2024-06-24T08:33:22.570245Z","iopub.execute_input":"2024-06-24T08:33:22.570661Z","iopub.status.idle":"2024-06-24T08:33:22.577956Z","shell.execute_reply.started":"2024-06-24T08:33:22.570628Z","shell.execute_reply":"2024-06-24T08:33:22.576821Z"},"trusted":true},"execution_count":137,"outputs":[{"name":"stdout","text":"Accuracy: 0.956140350877193\n","output_type":"stream"}]},{"cell_type":"code","source":"accuracy_dict['RF'] = round(accuracy * 100, 2)","metadata":{"execution":{"iopub.status.busy":"2024-06-24T08:33:22.807702Z","iopub.execute_input":"2024-06-24T08:33:22.808408Z","iopub.status.idle":"2024-06-24T08:33:22.813491Z","shell.execute_reply.started":"2024-06-24T08:33:22.808377Z","shell.execute_reply":"2024-06-24T08:33:22.812098Z"},"trusted":true},"execution_count":138,"outputs":[]},{"cell_type":"code","source":"# Confusion Matrix\nconf_matrix = confusion_matrix(Y_test, prediction)\nprint(f'Confusion Matrix:\\n{conf_matrix}')","metadata":{"execution":{"iopub.status.busy":"2024-06-24T08:33:23.013857Z","iopub.execute_input":"2024-06-24T08:33:23.014272Z","iopub.status.idle":"2024-06-24T08:33:23.022495Z","shell.execute_reply.started":"2024-06-24T08:33:23.014240Z","shell.execute_reply":"2024-06-24T08:33:23.021298Z"},"trusted":true},"execution_count":139,"outputs":[{"name":"stdout","text":"Confusion Matrix:\n[[39 3]\n [ 2 70]]\n","output_type":"stream"}]},{"cell_type":"code","source":"# Classification Report\nclass_report = classification_report(Y_test, prediction)\nprint(f'Classification Report:\\n{class_report}')","metadata":{"execution":{"iopub.status.busy":"2024-06-24T08:33:23.844248Z","iopub.execute_input":"2024-06-24T08:33:23.844762Z","iopub.status.idle":"2024-06-24T08:33:23.864212Z","shell.execute_reply.started":"2024-06-24T08:33:23.844725Z","shell.execute_reply":"2024-06-24T08:33:23.862313Z"},"trusted":true},"execution_count":140,"outputs":[{"name":"stdout","text":"Classification Report:\n precision recall f1-score support\n\n 0 0.95 0.93 0.94 42\n 1 0.96 0.97 0.97 72\n\n accuracy 0.96 114\n macro avg 0.96 0.95 0.95 114\nweighted avg 0.96 0.96 0.96 114\n\n","output_type":"stream"}]},{"cell_type":"markdown","source":"## **Accuracy Comparison**","metadata":{}},{"cell_type":"code","source":"accuracy_dict","metadata":{"execution":{"iopub.status.busy":"2024-06-24T08:33:28.827662Z","iopub.execute_input":"2024-06-24T08:33:28.828502Z","iopub.status.idle":"2024-06-24T08:33:28.836995Z","shell.execute_reply.started":"2024-06-24T08:33:28.828446Z","shell.execute_reply":"2024-06-24T08:33:28.835658Z"},"trusted":true},"execution_count":141,"outputs":[{"execution_count":141,"output_type":"execute_result","data":{"text/plain":"{'Logistic Regression': 94.74,\n 'SVM': 95.61,\n 'kNN-99': 92.11,\n 'DT-42': 91.23,\n 'RF': 95.61}"},"metadata":{}}]},{"cell_type":"code","source":"import plotly.graph_objects as go","metadata":{"execution":{"iopub.status.busy":"2024-06-24T08:33:29.604210Z","iopub.execute_input":"2024-06-24T08:33:29.604670Z","iopub.status.idle":"2024-06-24T08:33:29.622791Z","shell.execute_reply.started":"2024-06-24T08:33:29.604635Z","shell.execute_reply":"2024-06-24T08:33:29.621236Z"},"trusted":true},"execution_count":142,"outputs":[]},{"cell_type":"code","source":"# Define the data\nmodels = list(accuracy_dict.keys())\naccuracy_scores = list(accuracy_dict.values())\n\n# Create a bar graph\nfig = go.Figure(data=[go.Bar(\n x=models,\n y=accuracy_scores,\n text=accuracy_scores,\n textposition='auto',\n)])\n\n# Customize layout\nfig.update_layout(\n title='Accuracy Scores of Different Models',\n xaxis_title='Models',\n yaxis_title='Accuracy (%)',\n yaxis=dict(range=[0, 100]), # set the range of y-axis\n)\n\n# Show the plot\nfig.show()","metadata":{"execution":{"iopub.status.busy":"2024-06-24T08:33:30.560324Z","iopub.execute_input":"2024-06-24T08:33:30.560773Z","iopub.status.idle":"2024-06-24T08:33:31.112170Z","shell.execute_reply.started":"2024-06-24T08:33:30.560716Z","shell.execute_reply":"2024-06-24T08:33:31.110816Z"},"trusted":true},"execution_count":143,"outputs":[{"output_type":"display_data","data":{"text/html":" \n "},"metadata":{}},{"output_type":"display_data","data":{"text/html":"
"},"metadata":{}}]}]} \ No newline at end of file