diff --git a/notebooks/c02_Intro_to_NN_Part_2/Intro_to_NN_Part_2.ipynb b/notebooks/c02_Intro_to_NN_Part_2/Intro_to_NN_Part_2.ipynb index d411f7d..6e07d38 100644 --- a/notebooks/c02_Intro_to_NN_Part_2/Intro_to_NN_Part_2.ipynb +++ b/notebooks/c02_Intro_to_NN_Part_2/Intro_to_NN_Part_2.ipynb @@ -1559,6 +1559,8 @@ }, "outputs": [], "source": [ + "# We generally want out input data to be between 0 and 1 or -1 and 1\n", + "# Because most layers are build with the assumption of data that is distributed within this range\n", "x_train = landmassf3_train.data / 255.0\n", "y_train = landmassf3_train.targets\n", "x_test = landmassf3_test.data / 255.0\n", @@ -1577,11 +1579,11 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 27, "metadata": { "ExecuteTime": { - "end_time": "2020-11-03T05:42:17.513911Z", - "start_time": "2020-11-03T05:42:17.507813Z" + "end_time": "2020-11-03T06:23:51.061099Z", + "start_time": "2020-11-03T06:23:51.054585Z" } }, "outputs": [], @@ -1620,11 +1622,11 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 46, "metadata": { "ExecuteTime": { - "end_time": "2020-11-03T05:42:17.580445Z", - "start_time": "2020-11-03T05:42:17.515248Z" + "end_time": "2020-11-03T06:32:23.444225Z", + "start_time": "2020-11-03T06:32:23.439700Z" } }, "outputs": [], @@ -1636,7 +1638,8 @@ " total = 0\n", "\n", " for idx, image in enumerate(x):\n", - " pred = model(image.unsqueeze(0).unsqueeze(0).cuda()).argmax()\n", + " y_pred_outs = model(image.unsqueeze(0).unsqueeze(0).cuda())\n", + " pred = F.log_softmax(y_pred_outs).argmax()\n", " if int(pred) == int(y[idx]):\n", " correct += 1\n", " total += 1\n", @@ -2016,7 +2019,8 @@ "ExecuteTime": { "end_time": "2020-11-03T05:43:28.977303Z", "start_time": "2020-11-03T05:43:28.946269Z" - } + }, + "scrolled": true }, "outputs": [ { @@ -2095,7 +2099,7 @@ "
\n", "Exercise 1:
\n", "\n", - "Modify the previous code to train `BetterCNN` using `Adam` optimizer for a total of 10 `epochs`. Use 1e-3 `learning_rate`.\n", + "Modify the previous code to train `BetterCNN` using `Adam` optimizer for a total of 3 `epochs`. Use 1e-3 `learning_rate`.\n", "
" ] }, @@ -2110,31 +2114,18 @@ " See the solution for Exercise 1\n", "\n", " \n", - "If we check the loss, we can notice that Adam is converging faster. However, the model is clearly underfitted. Let's train now the model for 10 epochs more:\n", + "If we check the loss, we can notice that Adam is converging faster. However, the model is clearly underfitted. Let's train now the model for 3 epochs:\n", " \n", "```python\n", - "learning_rate = 1e-3\n", "convnet2 = BetterCNN().to(device)\n", - "optimizer = torch.optim.Adam(convnet2.parameters(), lr=learning_rate)\n", - "model = train(convnet2, x_train, y_train, criterion, optimizer, n_epochs=10)\n", - "test(model, x_test, y_test)\n", + "optimizer = torch.optim.Adam(convnet2.parameters(), lr=1e-3)\n", + "convnet2 = train(convnet2, x_train, y_train, criterion, optimizer, n_epochs=2)\n", + "test(convnet2, x_test, y_test)\n", "```\n", "\n", "" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2020-10-13T11:08:21.574558Z", - "start_time": "2020-10-13T11:03:52.622772Z" - } - }, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, @@ -2144,27 +2135,68 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 49, "metadata": { "ExecuteTime": { - "end_time": "2020-11-03T05:43:29.004642Z", - "start_time": "2020-11-03T05:43:28.978505Z" + "end_time": "2020-11-03T06:33:25.318386Z", + "start_time": "2020-11-03T06:33:25.299896Z" } }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/anaconda/envs/py37_pytorch/lib/python3.7/site-packages/ipykernel_launcher.py:4: UserWarning: Implicit dimension choice for log_softmax has been deprecated. Change the call to include dim=X as an argument.\n", + " after removing the cwd from sys.path.\n" + ] + }, + { + "data": { + "text/plain": [ + "tensor([2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 2, 2, 0, 2, 0, 3, 2, 0, 2, 2],\n", + " device='cuda:0')" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Now that we finished the training let's save our best model\n", - "#PATH = \"./landmass_net.pth\"\n", - "#torch.save(model.state_dict(), PATH)" + "y_pred_outs = convnet2(\n", + " # Pass in first 20 images\n", + " x_test[:20].unsqueeze(1).float().to(device)\n", + ")\n", + "y_pred = F.log_softmax(y_pred_outs).argmax(-1)\n", + "y_pred" ] }, { - "cell_type": "markdown", - "metadata": {}, + "cell_type": "code", + "execution_count": 45, + "metadata": { + "ExecuteTime": { + "end_time": "2020-11-03T06:31:50.339852Z", + "start_time": "2020-11-03T06:31:50.336207Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 2, 2, 0, 2, 0, 3, 2, 0, 2, 2],\n", + " dtype=torch.uint8)" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "Now let's load a new model to check that the performance of the saved model.\n", - "\n", - "Check more information about how to save models in Pytorch [here](https://pytorch.org/tutorials/beginner/saving_loading_models.html)" + "# compare to the real answer\n", + "y_test[:20]" ] }, { diff --git a/notebooks/c02_Intro_to_NN_Part_2/Intro_to_NN_Part_2.py b/notebooks/c02_Intro_to_NN_Part_2/Intro_to_NN_Part_2.py index c4fa0c8..bc752c0 100644 --- a/notebooks/c02_Intro_to_NN_Part_2/Intro_to_NN_Part_2.py +++ b/notebooks/c02_Intro_to_NN_Part_2/Intro_to_NN_Part_2.py @@ -281,6 +281,8 @@ def num_flat_features(self, x): torch.manual_seed(2020) # - +# We generally want out input data to be between 0 and 1 or -1 and 1 +# Because most layers are build with the assumption of data that is distributed within this range x_train = landmassf3_train.data / 255.0 y_train = landmassf3_train.targets x_test = landmassf3_test.data / 255.0 @@ -331,7 +333,8 @@ def test(model, x, y): total = 0 for idx, image in enumerate(x): - pred = model(image.unsqueeze(0).unsqueeze(0).cuda()).argmax() + y_pred_outs = model(image.unsqueeze(0).unsqueeze(0).cuda()) + pred = F.log_softmax(y_pred_outs).argmax() if int(pred) == int(y[idx]): correct += 1 total += 1 @@ -424,7 +427,7 @@ def forward(self, x): #
# Exercise 1:
# -# Modify the previous code to train `BetterCNN` using `Adam` optimizer for a total of 10 `epochs`. Use 1e-3 `learning_rate`. +# Modify the previous code to train `BetterCNN` using `Adam` optimizer for a total of 3 `epochs`. Use 1e-3 `learning_rate`. #
# You can click in the button below the reveal the solution for exercise 1 @@ -434,31 +437,28 @@ def forward(self, x): # See the solution for Exercise 1 # # -# If we check the loss, we can notice that Adam is converging faster. However, the model is clearly underfitted. Let's train now the model for 10 epochs more: +# If we check the loss, we can notice that Adam is converging faster. However, the model is clearly underfitted. Let's train now the model for 3 epochs: # # ```python -# learning_rate = 1e-3 # convnet2 = BetterCNN().to(device) -# optimizer = torch.optim.Adam(convnet2.parameters(), lr=learning_rate) -# model = train(convnet2, x_train, y_train, criterion, optimizer, n_epochs=10) -# test(model, x_test, y_test) +# optimizer = torch.optim.Adam(convnet2.parameters(), lr=1e-3) +# convnet2 = train(convnet2, x_train, y_train, criterion, optimizer, n_epochs=2) +# test(convnet2, x_test, y_test) # ``` # # - - # Finally ! After changing the optimizer, creating a better CNN architecture and train for a couple of epochs we got an accuracy of over 99% on unseen data. -# + -# Now that we finished the training let's save our best model -#PATH = "./landmass_net.pth" -#torch.save(model.state_dict(), PATH) -# - +y_pred_outs = convnet2( + # Pass in first 20 images + x_test[:20].unsqueeze(1).float().to(device) +) +y_pred = F.log_softmax(y_pred_outs).argmax(-1) +y_pred -# Now let's load a new model to check that the performance of the saved model. -# -# Check more information about how to save models in Pytorch [here](https://pytorch.org/tutorials/beginner/saving_loading_models.html) +# compare to the real answer +y_test[:20] # # References and further reading #