ISTE-NITK · skanda99 · May 24, 2018 · May 28, 2018 · Jun 24, 2018
diff --git a/...on-to-Data-Science/Week-6/Assignment-5/Skanda/.ipynb_checkpoints/MLmodel-checkpoint.ipynb b/...on-to-Data-Science/Week-6/Assignment-5/Skanda/.ipynb_checkpoints/MLmodel-checkpoint.ipynb
@@ -0,0 +1,181 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "' ML Assignment (SVM-linear) (Accuracy: 97.202  %)'"
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "''' ML Assignment (SVM-linear) (Accuracy: 97.202  %)'''"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# importing files\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import matplotlib.pyplot as plt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# getting the dataset\n",
+    "from sklearn import datasets\n",
+    "cancer=datasets.load_breast_cancer()\n",
+    "dataset=pd.DataFrame(cancer['data'],columns=cancer['feature_names'])\n",
+    "# 0:-malignant,1:-benign\n",
+    "dataset['Target']=cancer['target']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# features and target\n",
+    "X=dataset.iloc[:,:-1].values\n",
+    "y=dataset.iloc[:,30].values"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# spliting data into test set and train set\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.25,random_state=0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# feature scaling \n",
+    "from sklearn.preprocessing import StandardScaler\n",
+    "sc_X=StandardScaler()\n",
+    "X_train=sc_X.fit_transform(X_train)\n",
+    "X_test=sc_X.transform(X_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n",
+       "  decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',\n",
+       "  max_iter=-1, probability=False, random_state=0, shrinking=True,\n",
+       "  tol=0.001, verbose=False)"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# making SVM-linear classifier\n",
+    "from sklearn.svm import SVC\n",
+    "classifier=SVC(kernel='linear',random_state=0)\n",
+    "classifier.fit(X_train,y_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# predicting test set\n",
+    "y_pred=classifier.predict(X_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# making of confusion matrix\n",
+    "from sklearn.metrics import confusion_matrix\n",
+    "cm=confusion_matrix(y_test,y_pred)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[51,  2],\n",
+       "       [ 2, 88]], dtype=int64)"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cm"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/Introduction-to-Data-Science/Week-6/Assignment-5/Skanda/MLmodel.ipynb b/Introduction-to-Data-Science/Week-6/Assignment-5/Skanda/MLmodel.ipynb
@@ -0,0 +1,181 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "' ML Assignment (SVM-linear) (Accuracy: 97.202  %)'"
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "''' ML Assignment (SVM-linear) (Accuracy: 97.202  %)'''"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# importing files\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import matplotlib.pyplot as plt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# getting the dataset\n",
+    "from sklearn import datasets\n",
+    "cancer=datasets.load_breast_cancer()\n",
+    "dataset=pd.DataFrame(cancer['data'],columns=cancer['feature_names'])\n",
+    "# 0:-malignant,1:-benign\n",
+    "dataset['Target']=cancer['target']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# features and target\n",
+    "X=dataset.iloc[:,:-1].values\n",
+    "y=dataset.iloc[:,30].values"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# spliting data into test set and train set\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.25,random_state=0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# feature scaling \n",
+    "from sklearn.preprocessing import StandardScaler\n",
+    "sc_X=StandardScaler()\n",
+    "X_train=sc_X.fit_transform(X_train)\n",
+    "X_test=sc_X.transform(X_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n",
+       "  decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',\n",
+       "  max_iter=-1, probability=False, random_state=0, shrinking=True,\n",
+       "  tol=0.001, verbose=False)"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# making SVM-linear classifier\n",
+    "from sklearn.svm import SVC\n",
+    "classifier=SVC(kernel='linear',random_state=0)\n",
+    "classifier.fit(X_train,y_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# predicting test set\n",
+    "y_pred=classifier.predict(X_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# making of confusion matrix\n",
+    "from sklearn.metrics import confusion_matrix\n",
+    "cm=confusion_matrix(y_test,y_pred)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[51,  2],\n",
+       "       [ 2, 88]], dtype=int64)"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cm"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/Introduction-to-Data-Science/Week-6/Assignment-5/Skanda/README.md b/Introduction-to-Data-Science/Week-6/Assignment-5/Skanda/README.md
@@ -0,0 +1,24 @@
+These are the accuracies I got with different models:
+1. Logistic Regressor: 95.804 %
+2. KNN: 95.104 %
+3. SVM (linear): 97.202 %
+4. SVM (rbf): 96.503 %
+5. Naive Bayes: 91.608 %
+6. Decision Tree: 95.804 %
+7. Random Forest: 95.804 %
+
+
+Model Used: SVM (linear) with 97.202 % accuracy
+
+
+Code flow:
+1. Importing files
+2. Getting the dataset
+3. Making of features and target
+4. Splitting of dataset into test and training sets
+5. Feature scaling (Important for SVM linear)
+6. Making of SVM classifier with linear kernel
+7. Predicting test set
+8. Making of confusion matrix
+
+
diff --git a/Introduction-to-Data-Science/Week-6/Assignment-5/Skanda/desktop.ini b/Introduction-to-Data-Science/Week-6/Assignment-5/Skanda/desktop.ini
@@ -0,0 +1,4 @@
+[ViewState]
+Mode=
+Vid=
+FolderType=Documents