From 6fb87056b5e11071716dacd6b8850a60dcac2ab0 Mon Sep 17 00:00:00 2001
From: berkedilekoglu <berkedilekoglu@gmail.com>
Date: Sat, 26 Aug 2023 19:05:36 +0300
Subject: [PATCH] Delete .ipynb_checkpoints directory

---
 .ipynb_checkpoints/Untitled-checkpoint.ipynb |   6 -
 .ipynb_checkpoints/tutorial-checkpoint.ipynb | 669 -------------------
 2 files changed, 675 deletions(-)
 delete mode 100644 .ipynb_checkpoints/Untitled-checkpoint.ipynb
 delete mode 100644 .ipynb_checkpoints/tutorial-checkpoint.ipynb

diff --git a/.ipynb_checkpoints/Untitled-checkpoint.ipynb b/.ipynb_checkpoints/Untitled-checkpoint.ipynb
deleted file mode 100644
index 363fcab..0000000
--- a/.ipynb_checkpoints/Untitled-checkpoint.ipynb
+++ /dev/null
@@ -1,6 +0,0 @@
-{
- "cells": [],
- "metadata": {},
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/.ipynb_checkpoints/tutorial-checkpoint.ipynb b/.ipynb_checkpoints/tutorial-checkpoint.ipynb
deleted file mode 100644
index 77ba33c..0000000
--- a/.ipynb_checkpoints/tutorial-checkpoint.ipynb
+++ /dev/null
@@ -1,669 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "collapsed": true
-   },
-   "source": [
-    "# Example Usage of sumonet"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Loading Data #"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "You can load data in 2 different ways:\n",
-    "\n",
-    "1) By using Encoding class -> Takes data path or data sequence and output encoded (one-hot, nlf, blosum62) vectors\n",
-    "\n",
-    "2) By using Data class -> It does not take any input, output our dbPTM data -> entire or sampled data can be taken"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Data Class ###"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### You can use our data automatically  by using Data Class####"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "- Data class gives X_train, X_test as samples so you need to encode them \n",
-    "- y_test, y_train are list so you need to convert them to a 2-d array"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": [
-    "from sumonet.utils.load_data import Data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": [
-    "data = Data()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "X_train, y_train, X_test, y_test = data.sample_data(ratio = 0.2) #ratio defined as 0.4 in class\n",
-    "# If you want to use entire data as we did, you can set ratio as 1."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "A sample from X_train: LLPPSATASVKMEPENKYLPE\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(f'A sample from X_train: {X_train[0]}')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Encode samples and convert label list to 2-d vectors###"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Encoding Class ###"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": [
-    "from sumonet.utils.encodings import Encoding"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### Define Encoding class ###"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Encoding class takes 2 parameters: encoderTypes and scaler.\n",
-    "\n",
-    "- encoderTypes is initially defined as blosum62 according to our experiments but you can use one-hot or nlf also\n",
-    "- scaler is initially defined as True according to our experiments. It means that data will be passed into min-max scaler. If you want you can cancel it.\n",
-    "- You can change encoder type with set_encoder_type(encoderType) function"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "encoder = Encoding(encoderType='one-hot') ## Encoding(encoderType = 'blosum62', scale = True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "X_train, y_train = encoder.get_encoded_vectors_from_data(X_train, y_train)\n",
-    "X_test, y_test = encoder.get_encoded_vectors_from_data(X_test, y_test)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Shape of the train and test samples are: X_train = (1912, 21, 21) || X_test = (211, 21, 21)\n",
-      "Shape of the train and test labels are: y_train = (1912, 2) || y_test = (211, 2)\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(f\"Shape of the train and test samples are: X_train = {X_train.shape} || X_test = {X_test.shape}\")\n",
-    "print(f\"Shape of the train and test labels are: y_train = {y_train.shape} || y_test = {y_test.shape}\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Or you can use data path (we use ours in that tutorial) to take encoded vectors ###"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### You can give data path ###"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": [
-    "trainDataPath = \"sumonet/data/train\"\n",
-    "testDataPath = \"sumonet/data/test\"\n",
-    "\n",
-    "dataPathPositiveTrain = trainDataPath+'/Sumoylation_pos_Train.fasta'\n",
-    "dataPathNegativeTrain = trainDataPath+'/Sumoylation_neg_Train.fasta'\n",
-    "\n",
-    "dataPathPositiveTest = testDataPath+'/Sumoylation_pos_Test.fasta'\n",
-    "dataPathNegativeTest = testDataPath+'/Sumoylation_neg_Test.fasta'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#Lets first change encoding type\n",
-    "encoder.set_encoder_type('blosum62')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### !! The order of the paths is important !! Positive train path should come first ###"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "X_train, y_train = encoder.get_encoded_vectors_from_path(dataPathPositiveTrain,dataPathNegativeTrain)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "X_test, y_test = encoder.get_encoded_vectors_from_path(dataPathPositiveTest,dataPathNegativeTest)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Shape of the train and test samples are: X_train = (19131, 21, 24) || X_test = (2126, 21, 24)\n",
-      "Shape of the train and test labels are: y_train = (19131, 2) || y_test = (2126, 2)\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(f\"Shape of the train and test samples are: X_train = {X_train.shape} || X_test = {X_test.shape}\")\n",
-    "print(f\"Shape of the train and test labels are: y_train = {y_train.shape} || y_test = {y_test.shape}\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Now our data is ready ###"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## SUMOnet Model ##"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "- You can use our architecture with randomly initialized weights\n",
-    "\n",
-    "- You can also use our pre-trained model"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### Let's import SUMOnet ####"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": [
-    "from sumonet.model.architecture import SUMOnet\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### You can use our architecture with randomly initialized weights ####"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": [
-    "model = SUMOnet()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "collapsed": true
-   },
-   "source": [
-    "### If you want to see summary of the model you need to build it with input shape ###"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "input_shape = X_train.shape"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "##### Build function takes entire shape because it takes batch_size #####"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "model.build(input_shape)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "##### model.summary will not show output shape because it is a subclass #####"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Model: \"sum_onet\"\n",
-      "_________________________________________________________________\n",
-      "Layer (type)                 Output Shape              Param #   \n",
-      "=================================================================\n",
-      "conv1d (Conv1D)              multiple                  6272      \n",
-      "_________________________________________________________________\n",
-      "bidirectional (Bidirectional multiple                  14016     \n",
-      "_________________________________________________________________\n",
-      "global_average_pooling1d (Gl multiple                  0         \n",
-      "_________________________________________________________________\n",
-      "dense (Dense)                multiple                  2112      \n",
-      "_________________________________________________________________\n",
-      "dropout (Dropout)            multiple                  0         \n",
-      "_________________________________________________________________\n",
-      "activation (Activation)      multiple                  0         \n",
-      "_________________________________________________________________\n",
-      "dense_1 (Dense)              multiple                  8320      \n",
-      "_________________________________________________________________\n",
-      "dropout_1 (Dropout)          multiple                  0         \n",
-      "_________________________________________________________________\n",
-      "activation_1 (Activation)    multiple                  0         \n",
-      "_________________________________________________________________\n",
-      "dense_2 (Dense)              multiple                  16512     \n",
-      "_________________________________________________________________\n",
-      "dropout_2 (Dropout)          multiple                  0         \n",
-      "_________________________________________________________________\n",
-      "activation_2 (Activation)    multiple                  0         \n",
-      "_________________________________________________________________\n",
-      "dense_3 (Dense)              multiple                  258       \n",
-      "_________________________________________________________________\n",
-      "activation_3 (Activation)    multiple                  0         \n",
-      "=================================================================\n",
-      "Total params: 47,490\n",
-      "Trainable params: 47,490\n",
-      "Non-trainable params: 0\n",
-      "_________________________________________________________________\n"
-     ]
-    }
-   ],
-   "source": [
-    "model.summary()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### Let's compile and train our model ####"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": [
-    "model.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=['accuracy'])\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 1/3\n",
-      "598/598 [==============================] - 11s 14ms/step - loss: 0.5580 - accuracy: 0.7499\n",
-      "Epoch 2/3\n",
-      "598/598 [==============================] - 9s 14ms/step - loss: 0.4869 - accuracy: 0.7769\n",
-      "Epoch 3/3\n",
-      "598/598 [==============================] - 9s 15ms/step - loss: 0.4598 - accuracy: 0.7958\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "<tensorflow.python.keras.callbacks.History at 0x7feb3a3f1240>"
-      ]
-     },
-     "execution_count": 21,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model.fit(X_train,y_train,epochs=3)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### You can use pre-trained model###"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "- By using load_weights function SUMOnet creates our provided model SUMOnet-3\n",
-    "- Again you need to build model first with input shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": [
-    "from sumonet.model.architecture import SUMOnet\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "SUMOnet3_model = SUMOnet()\n",
-    "SUMOnet3_model.build(input_shape)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### Let's load weights of pre-trained model ####"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 25,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "SUMOnet3_model.load_weights()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### Now we can predict ####"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "y_preds = SUMOnet3_model.predict(X_test)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Let's evaluate results ###"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### import evaluate function, which organized according to our evaluation set-up ####"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from sumonet.evaluation.metrics import evaluate"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "evaluate function takes 3 arguments:\n",
-    "- y_test -> Gold labels should be in 1-d so if yours is 2-d as ours, use argmax(-1)\n",
-    "- y_pred -> Predictions are already 2-d vector\n",
-    "- string or array that includes metrics\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### You can calculate results one-by-one ####"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 31,
-   "metadata": {
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": [
-    "f1_score = evaluate(y_test.argmax(-1),y_preds,'f1')\n",
-    "mcc = evaluate(y_test.argmax(-1),y_preds,'mcc')\n",
-    "roc = evaluate(y_test.argmax(-1),y_preds,'roc')\n",
-    "aupr = evaluate(y_test.argmax(-1),y_preds,'aupr')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 32,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "F1 score:  {'f1': 0.6580921757770631}\n",
-      "MCC score:  {'mcc': 0.5694399870602478}\n",
-      "ROC score:  {'roc': 0.8713018549625735}\n",
-      "AUPR score:  {'aupr': 0.7598319565641193}\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(f\"F1 score: \", f1_score)\n",
-    "print(f\"MCC score: \", mcc)\n",
-    "print(f\"ROC score: \", roc)\n",
-    "print(f\"AUPR score: \", aupr)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### You can calculate all results at once ####\n",
-    "\n",
-    "- This calculation outputs a dictionary"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 29,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'aupr': 0.7598319565641193,\n",
-       " 'f1': 0.6580921757770631,\n",
-       " 'mcc': 0.5694399870602478,\n",
-       " 'roc': 0.8713018549625735}"
-      ]
-     },
-     "execution_count": 29,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "evaluate(y_test.argmax(-1),y_preds,['f1','mcc','roc','aupr'])"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.13"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}