diff --git a/notebooks/c02_Intro_to_NN_Part_2/Intro_to_NN_Part_2.ipynb b/notebooks/c02_Intro_to_NN_Part_2/Intro_to_NN_Part_2.ipynb
index d411f7d..6e07d38 100644
--- a/notebooks/c02_Intro_to_NN_Part_2/Intro_to_NN_Part_2.ipynb
+++ b/notebooks/c02_Intro_to_NN_Part_2/Intro_to_NN_Part_2.ipynb
@@ -1559,6 +1559,8 @@
    },
    "outputs": [],
    "source": [
+    "# We generally want out input data to be between 0 and 1 or -1 and 1\n",
+    "# Because most layers are build with the assumption of data that is distributed within this range\n",
     "x_train = landmassf3_train.data / 255.0\n",
     "y_train = landmassf3_train.targets\n",
     "x_test = landmassf3_test.data / 255.0\n",
@@ -1577,11 +1579,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 27,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2020-11-03T05:42:17.513911Z",
-     "start_time": "2020-11-03T05:42:17.507813Z"
+     "end_time": "2020-11-03T06:23:51.061099Z",
+     "start_time": "2020-11-03T06:23:51.054585Z"
     }
    },
    "outputs": [],
@@ -1620,11 +1622,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 46,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2020-11-03T05:42:17.580445Z",
-     "start_time": "2020-11-03T05:42:17.515248Z"
+     "end_time": "2020-11-03T06:32:23.444225Z",
+     "start_time": "2020-11-03T06:32:23.439700Z"
     }
    },
    "outputs": [],
@@ -1636,7 +1638,8 @@
     "    total = 0\n",
     "\n",
     "    for idx, image in enumerate(x):\n",
-    "        pred = model(image.unsqueeze(0).unsqueeze(0).cuda()).argmax()\n",
+    "        y_pred_outs = model(image.unsqueeze(0).unsqueeze(0).cuda())\n",
+    "        pred = F.log_softmax(y_pred_outs).argmax()\n",
     "        if int(pred) == int(y[idx]):\n",
     "            correct += 1\n",
     "        total += 1\n",
@@ -2016,7 +2019,8 @@
     "ExecuteTime": {
      "end_time": "2020-11-03T05:43:28.977303Z",
      "start_time": "2020-11-03T05:43:28.946269Z"
-    }
+    },
+    "scrolled": true
    },
    "outputs": [
     {
@@ -2095,7 +2099,7 @@
     "<div class=\"alert alert-success\" style=\"font-size:100%\">\n",
     "<b>Exercise 1</b>: <br>\n",
     "\n",
-    "Modify the previous code to train `BetterCNN` using `Adam` optimizer for a total of 10 `epochs`. Use 1e-3 `learning_rate`.\n",
+    "Modify the previous code to train `BetterCNN` using `Adam` optimizer for a total of 3 `epochs`. Use 1e-3 `learning_rate`.\n",
     "</div>"
    ]
   },
@@ -2110,31 +2114,18 @@
     "    <font size=\"4\" color=\"darkblue\"><b>See the solution for Exercise 1</b></font>\n",
     "</summary>\n",
     "    \n",
-    "If we check the loss, we can notice that Adam is converging faster. However, the model is clearly underfitted. Let's train now the model for 10 epochs more:\n",
+    "If we check the loss, we can notice that Adam is converging faster. However, the model is clearly underfitted. Let's train now the model for 3 epochs:\n",
     "    \n",
     "```python\n",
-    "learning_rate = 1e-3\n",
     "convnet2 = BetterCNN().to(device)\n",
-    "optimizer = torch.optim.Adam(convnet2.parameters(), lr=learning_rate)\n",
-    "model = train(convnet2, x_train, y_train, criterion, optimizer, n_epochs=10)\n",
-    "test(model, x_test, y_test)\n",
+    "optimizer = torch.optim.Adam(convnet2.parameters(), lr=1e-3)\n",
+    "convnet2 = train(convnet2, x_train, y_train, criterion, optimizer, n_epochs=2)\n",
+    "test(convnet2, x_test, y_test)\n",
     "```\n",
     "\n",
     "</details>"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-10-13T11:08:21.574558Z",
-     "start_time": "2020-10-13T11:03:52.622772Z"
-    }
-   },
-   "outputs": [],
-   "source": []
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -2144,27 +2135,68 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 49,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2020-11-03T05:43:29.004642Z",
-     "start_time": "2020-11-03T05:43:28.978505Z"
+     "end_time": "2020-11-03T06:33:25.318386Z",
+     "start_time": "2020-11-03T06:33:25.299896Z"
     }
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/anaconda/envs/py37_pytorch/lib/python3.7/site-packages/ipykernel_launcher.py:4: UserWarning: Implicit dimension choice for log_softmax has been deprecated. Change the call to include dim=X as an argument.\n",
+      "  after removing the cwd from sys.path.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "tensor([2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 2, 2, 0, 2, 0, 3, 2, 0, 2, 2],\n",
+       "       device='cuda:0')"
+      ]
+     },
+     "execution_count": 49,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Now that we finished the training let's save our best model\n",
-    "#PATH = \"./landmass_net.pth\"\n",
-    "#torch.save(model.state_dict(), PATH)"
+    "y_pred_outs = convnet2(\n",
+    "    # Pass in first 20 images\n",
+    "    x_test[:20].unsqueeze(1).float().to(device)\n",
+    ")\n",
+    "y_pred = F.log_softmax(y_pred_outs).argmax(-1)\n",
+    "y_pred"
    ]
   },
   {
-   "cell_type": "markdown",
-   "metadata": {},
+   "cell_type": "code",
+   "execution_count": 45,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2020-11-03T06:31:50.339852Z",
+     "start_time": "2020-11-03T06:31:50.336207Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "tensor([2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 2, 2, 0, 2, 0, 3, 2, 0, 2, 2],\n",
+       "       dtype=torch.uint8)"
+      ]
+     },
+     "execution_count": 45,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "Now let's load a new model to check that the performance of the saved model.\n",
-    "\n",
-    "Check more information about how to save models in Pytorch [here](https://pytorch.org/tutorials/beginner/saving_loading_models.html)"
+    "# compare to the real answer\n",
+    "y_test[:20]"
    ]
   },
   {
diff --git a/notebooks/c02_Intro_to_NN_Part_2/Intro_to_NN_Part_2.py b/notebooks/c02_Intro_to_NN_Part_2/Intro_to_NN_Part_2.py
index c4fa0c8..bc752c0 100644
--- a/notebooks/c02_Intro_to_NN_Part_2/Intro_to_NN_Part_2.py
+++ b/notebooks/c02_Intro_to_NN_Part_2/Intro_to_NN_Part_2.py
@@ -281,6 +281,8 @@ def num_flat_features(self, x):
 torch.manual_seed(2020)
 # -
 
+# We generally want out input data to be between 0 and 1 or -1 and 1
+# Because most layers are build with the assumption of data that is distributed within this range
 x_train = landmassf3_train.data / 255.0
 y_train = landmassf3_train.targets
 x_test = landmassf3_test.data / 255.0
@@ -331,7 +333,8 @@ def test(model, x, y):
     total = 0
 
     for idx, image in enumerate(x):
-        pred = model(image.unsqueeze(0).unsqueeze(0).cuda()).argmax()
+        y_pred_outs = model(image.unsqueeze(0).unsqueeze(0).cuda())
+        pred = F.log_softmax(y_pred_outs).argmax()
         if int(pred) == int(y[idx]):
             correct += 1
         total += 1
@@ -424,7 +427,7 @@ def forward(self, x):
 # <div class="alert alert-success" style="font-size:100%">
 # <b>Exercise 1</b>: <br>
 #
-# Modify the previous code to train `BetterCNN` using `Adam` optimizer for a total of 10 `epochs`. Use 1e-3 `learning_rate`.
+# Modify the previous code to train `BetterCNN` using `Adam` optimizer for a total of 3 `epochs`. Use 1e-3 `learning_rate`.
 # </div>
 
 # You can click in the button below the reveal the solution for exercise 1
@@ -434,31 +437,28 @@ def forward(self, x):
 #     <font size="4" color="darkblue"><b>See the solution for Exercise 1</b></font>
 # </summary>
 #     
-# If we check the loss, we can notice that Adam is converging faster. However, the model is clearly underfitted. Let's train now the model for 10 epochs more:
+# If we check the loss, we can notice that Adam is converging faster. However, the model is clearly underfitted. Let's train now the model for 3 epochs:
 #     
 # ```python
-# learning_rate = 1e-3
 # convnet2 = BetterCNN().to(device)
-# optimizer = torch.optim.Adam(convnet2.parameters(), lr=learning_rate)
-# model = train(convnet2, x_train, y_train, criterion, optimizer, n_epochs=10)
-# test(model, x_test, y_test)
+# optimizer = torch.optim.Adam(convnet2.parameters(), lr=1e-3)
+# convnet2 = train(convnet2, x_train, y_train, criterion, optimizer, n_epochs=2)
+# test(convnet2, x_test, y_test)
 # ```
 #
 # </details>
 
-
-
 # Finally ! After changing the optimizer, creating a better CNN architecture and train for a couple of epochs we got an accuracy of over 99% on unseen data.
 
-# +
-# Now that we finished the training let's save our best model
-#PATH = "./landmass_net.pth"
-#torch.save(model.state_dict(), PATH)
-# -
+y_pred_outs = convnet2(
+    # Pass in first 20 images
+    x_test[:20].unsqueeze(1).float().to(device)
+)
+y_pred = F.log_softmax(y_pred_outs).argmax(-1)
+y_pred
 
-# Now let's load a new model to check that the performance of the saved model.
-#
-# Check more information about how to save models in Pytorch [here](https://pytorch.org/tutorials/beginner/saving_loading_models.html)
+# compare to the real answer
+y_test[:20]
 
 # # References and further reading
 #