diff --git a/.gitignore b/.gitignore index 4d42147..f9f5b3e 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ *wandb *demo *.DS_Store +temp.py \ No newline at end of file diff --git a/my_homework/.gitignore b/my_homework/.gitignore new file mode 100644 index 0000000..571a1f5 --- /dev/null +++ b/my_homework/.gitignore @@ -0,0 +1,6 @@ +.idea +DI-engine +ding_study +wandb +output +.data \ No newline at end of file diff --git a/my_homework/README.md b/my_homework/README.md new file mode 100644 index 0000000..1cd000f --- /dev/null +++ b/my_homework/README.md @@ -0,0 +1,15 @@ +# PPOxFamily +this repository is to learn family algorithms of PPO. + +more details in: [https://github.com/opendilab/PPOxFamily](https://github.com/opendilab/PPOxFamily) + +
+ +
+ +## TODO +- [x] finish all the code in class one to four +- [x] correct notes +- [x] theorical inference processes +- [ ] 🆕class five + diff --git a/my_homework/ch2/homework2.ipynb b/my_homework/ch2/homework2.ipynb new file mode 100644 index 0000000..1ddb8ea --- /dev/null +++ b/my_homework/ch2/homework2.ipynb @@ -0,0 +1,307 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 1. 重参数化技巧" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "一开始写错了,以为是对`eps`求导,变成了\n", + "\n", + "![](demo/reparam_gradient.png)" + ] + }, + { + "cell_type": "code", + "execution_count": 117, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "naive grad variance: [5.74129343 1.00604956 0.17195452 0.08618732 0.01622461]\n", + "reparameterization grad variance: [0.39380306 0.03859908 0.00890374 0.00394094 0.00082136]\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAhYAAAGhCAYAAAA9YP2DAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy88F64QAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA/RElEQVR4nO3deXhU9aH/8U9mshIm7IR932QLkEASVhFBqCBBIVTxd/X2aW9tra12U65tbXvvLW1ttT6Wtvfe3raiSA0IQfZFQZaEsCeRHWQNIawh+35+fyQZGRMgk8zkzPJ+Pc/3ITlzzsxnGIf5eOac8w2QZAgAAMAFLGYHAAAAvoNiAQAAXIZiAQAAXIZiAQAAXIZiAQAAXIZiAQAAXIZiAQAAXIZiAQAAXIZiAQAAXIZiAQAAXMbpYtGlSxe9++67un79uoqKipSRkaHo6Gh3ZAMAAF4m0JmVW7durd27d2vbtm2aMWOGrl27pv79++vWrVvuygcAALxIgJyYhGzRokUaN26cJk6c2KQH7dKli/Lz85t0HwAAoHnZbDZdvnz5nus4VSyOHDmiTZs2qVu3bpo0aZKysrL0pz/9SX/961/vuk1wcLBCQkLsv3fu3FknTpxo6EMCAAAP0rVr13uWC6eKRXFxsSTpjTfe0PLlyzV69Gi99dZbeu6557RkyZJ6t3nttdf085//vN5g7LUAAMA72Gw2ZWVlKSIi4p6f304Vi9LSUu3fv1/jxo2zL3vrrbc0evRojR07tt5tvrzHoqHBAACA57DZbMrLy7vv57dTZ4VkZ2fr6NGjDsuOHTumHj163HWbsrIy5efnOwwAAOCbnCoWu3fv1sCBAx2WDRgwQOfPn3dpKAAA4J2cKhZvvvmm4uLitHDhQvXt21dPPvmk/u3f/k2LFy92Vz4AAOBFnDrGQpIeffRRLVq0SP3799fZs2f1xhtv3POskC9r6Hc0AADvFxAQoNatW8tmsykgIMDsOLgLwzCUn5+v3NxcGUb9taChn99OF4umolgAgH/o0KGDvvGNb2jQoEFmR0EDHT9+XP/7v/+ra9eu1bmNYgEAME1gYKD+9Kc/qaCgQElJSbp69aoqKyvNjoW7sFqt6tixoxITE9WyZUt9+9vfVkVFhcM6Df38duqS3gAANETnzp0VGhqq3/3udzp58qTZcdAAn3/+uW7evKmf/OQn6tSpky5dutSo+2F2UwCAy1ks1R8vpaWlJieBM2pfL6vV2uj7oFgAAACXoVgAAOBCzzzzjF/P+k2xAADAhT744AMNGDDA7Bim4eBNAABcqKSkRCUlJWbHMI1P7LEIDA7WmDmz9Mwbv+ICLACAJtm2bZveeust/eY3v9GNGzeUnZ2t1157zX77Sy+9pIyMDBUUFOjChQtavHixwsPD7bff+VVI//79ZRhGnekwXnzxRZ0+fdr++5AhQ7R+/Xrl5+frypUrWrJkidq1a+fmZ+oePlEsLFaLHvvhCxo+dbL6x402Ow4A4C6Cw0JNGc565plnVFhYqNjYWP34xz/Wz372Mz388MOSpKqqKn33u9/VkCFD9Mwzz+ihhx7Sb3/723rv59SpU9q3b58WLFjgsHzBggV6//33JUmtWrXSJ598okOHDikmJkbTp09XZGSkkpKSnM7tCXziq5Cy4hLtX7NBExYkKj5xjk6m7jU7EgDgS4LDQrVo7zZTHnvhmMkqK2741xMZGRn65S9/KUk6ffq0vvOd72jKlCnaunWr3nrrLft658+f109+8hP95S9/0fPPP1/vfS1dulTf+c539LOf/UxS9V6MmJgYPf3005Kk73znOzp06JBeffVV+zZf+9rXdOnSJfXv31+nTp1y+vmaySf2WEhSatIqSdKQB8cromMHk9MAALxZRkaGw+/Z2dnq2LGjJNkLxqVLl5SXl6d3331X7du3V1hYWL339c9//lO9evVSbGyspOq9FQcOHNCJEyckSVFRUZo8ebLy8/Pt4/jx45Kkvn37uuspuo1P7LGQpJzPz+nM/kPqGzNScY/P0ua//M3sSACAO5QVl2jhmMmmPbYzysvLHX43DEMWi0U9e/bU2rVr9ec//1mvvvqqbt68qfHjx+tvf/ubgoODVVxcXOe+cnJy9Mknn+ipp55SWlqannrqKf35z3+2396yZUutWbNGL7/8cp1ts7OzncrtCXymWEjVey36xoxU7NzZ2vq/76iK69IDgEdx9gPe00RHR8tisegHP/iBfRbQxMTE+263dOlS/fa3v9WyZcvUp08f/fOf/7TfdvDgQT3xxBM6d+6cT8yn4jNfhUhSxtbtKrh5S60jO+qBiWPNjgMA8DGnT59WcHCwXnjhBfXu3VtPP/20nnvuuftut3LlStlsNv35z3/Wtm3bHPZELF68WG3bttWyZcsUExOjPn36aNq0afrb3/5mvzS6N/G+xPdQWV6uvclrJUnxiXNMTgMA8DUZGRl66aWX9PLLL+uzzz7TggULtHDhwvtuV1BQoDVr1mjEiBFaunSpw23Z2dkaN26crFarNm/erMzMTP3hD39Qbm6uqqqq3PVU3MpozmGz2QzDMAybzeaW+2/Xravx+8xU4/X03Ubbbl2a9bkxGAwGo3r07NnTWLJkidGzZ0/TszBc87o19PPbp/ZYSNKNS1k6sXuPLBaL4p6YbXYcAAD8is8VC0lKSUqWJI2ZM1PWQJ86PhUAAI/mk8Xi6Ke7dDvnmmzt2mrYlElmxwEAwG/4ZLGoqqzUng9XS+IgTgAAmpNPFgtJSlv5kaoqK9VvTLQ69u5pdhwAAPyCzxaL2znXdPTTXZKk+HnstQAAoDn4bLGQvjiIM2b2DAWFhpgbBgAAP+DTxeJkSppuXLqsFhERGvHIFLPjAADg83y6WBiGoT0rkiXxdQgAAM3Bp4uFJO1NXquK8nL1jBqqroMGmB0HAACf5vPFouDGLWVu3S5JipuXYGoWAAB8nc8XC0lKSVolSRr16DSFtGhhchoAgDcJCgoy7bGtVqtpj91YflEsPt9/SDmfn1NoeLhGzXzE7DgAAA+2bds2vf3223rzzTd17do1bdq0SUOGDNH69euVn5+vK1euaMmSJWrXrl2dbd5++23l5ubq2rVr+uUvf+lwv08//bT27dunvLw8ZWdna+nSperQoYP99kmTJskwDE2fPl379+9XaWmpxo8frz59+ig5OVlXrlxRfn6+9u7dqylTHE9IOHv2rF599VW98847ys/P17lz5zRr1iy1b99eycnJys/PV3p6uqKjo937lyc/KRaSlFqz12Ls/MdNTgIA/qtFixBThrOeeeYZlZWVady4cXrllVf0ySef6NChQ4qJidH06dMVGRmppKSkOttUVFRozJgx+t73vqfvf//7+vrXv26/PSgoSD/96U8VFRWlhIQE9erVS//4xz/qPPavf/1rvfLKK3rggQeUkZGhli1bav369ZoyZYpGjhypjRs3as2aNerevbvDdi+99JJ2796tkSNHat26dXr33Xe1ZMkSvffeexo1apTOnDmjJUuWOP130RjNOiWru6dNv9sIi7AZv9633fh9ZqrRM2qo6VPTMhgMhi+P+qbfbtEixKgy1pgyWrQIaXD2bdu2GQcOHLD//uqrrxobN250WKdr166GYRhG//797dscOXLEYZ1FixbVWXbniI6ONgzDMMLDww1JxqRJkwzDMIzHHnvsvhkzMzON559/3v772bNnjSVLlth/j4yMNAzDMH7xi1/Yl8XGxhqGYRiRkZFOvW61w2+nTb+b4rx8Hdq4RRKnngIA7u3AgQP2n6OiojR58mTl5+fbx/HjxyVJffv2ta+3Z88eh/tITU1V//79ZbFUf9SOGjVKH330kc6fP6+8vDx9+umnkqQePXo4bLd//36H38PDw/X666/r6NGjunXrlvLz8/XAAw/U2S4jI8P+c05OjiQpMzOzzrKOHTs68TfhPL+aUzw1aZXGJMzUiOlTtPq3b6k4L8/sSADgN4qKStUyfK5pj+2MwsJC+88tW7bUmjVr9PLLL9dZLzs7u0H316JFC23atEmbNm3SggULdO3aNfXo0UObN29WcHDwXR9bkn73u99p6tSp+uEPf6jTp0+ruLhYK1asqLNdeXl5nce9c5lhGJJkLzru4lfF4kLmUWUdO6muDwzQ6Nlf0Y53/2l2JADwK85+wHuCgwcP6oknntC5c+dUWVl51/ViY2Mdfo+Li9OpU6dUVVWlQYMGqX379nrllVd06dIlSVJMTEyDHn/cuHH6xz/+oeTkZEnVezB69erVqOfSHPzmq5BaKctXSZLiuaYFAKABFi9erLZt22rZsmWKiYlRnz59NG3aNP3tb39z+L//Hj166Pe//70GDBigr371q3rhhRf01ltvSZIuXLig0tJSvfDCC+rdu7dmzZqln/70pw16/FOnTunxxx9XVFSUhg8frvfff9/tex2awnOTucmhdZtVUlCojr17qu/oUWbHAQB4uOzsbI0bN05Wq1WbN29WZmam/vCHPyg3N1dVVVX29ZYsWaKwsDDt3btXixcv1ltvvaX/+Z//kSRdv35dzz77rObNm6ejR4/qlVde0Q9/+MMGPf73v/993bp1SykpKVqzZo02bdqkgwcPuuW5ukqzHils1lkhd47HX/2h8fvMVOP//e4/TcvAYDAYvjzudXaBL45t27YZb775puk53Pm6cVbIPaQuT5YkDXtokmzt2pobBgAAH+KXxSL75GmdO5wpa1CgxsyZZXYcAAB8hl8WC+mL+UPi5s5WgAcfBAMA8HyTJ0/WSy+9ZHYMj+C3n6jpmz9R0e08te3aWQPHxd5/AwAAcF9+WywqSku1b/U6SdJYrsQJAIBL+G2xkL44iPOBiWPVulOkuWEAwIfUXuUxMNCvrsPo9Wpfr9rXrzH8ulhcO3dBp/bsl8VqVewTj5kdBwB8xo0bNyRJgwYNMjkJnFH7el2/fr3R9+H3VTJl+Sr1j4tR7OOztOW//6aqirtfrhUA0DCFhYXavn27EhMTJUnHjx9XRUWFyalwN4GBgRo0aJASExO1fft2FRUVNf6+XJjLKx35ZIfyrt9Qq44dNOTBCcrcut3sSADgE/7+979LkubPn29yEjTU9u3b7a9bYwWo+kpZzcZmsykvL08RERHKz89vzoe+qxkvfFMP/9uzOpm6V//9b98zOw4A+JQWLVqoffv2CggIMDsK7sIwDF2/fv2eeyoa+vnt93ssJGnPh6v10Nf/RQPix6h9j266fuGS2ZEAwGcUFRXpwoULZsdAM3Hq4M3XXntNhmE4jGPHjrkrW7O5dfmKju9KlSTFzU0wNwwAAF7M6bNCPvvsM3Xq1Mk+xo8f745czS41KVmSNCbhUQUGB5sbBgAAL+V0saioqFBOTo591J5S5O2O7UzRrewrCm/TWsOnPmh2HAAAvJLTxaJ///7KysrSmTNn9N5776l79+73XD84OFg2m81heCKjqkp7VqyWJMVzJU4AABrFqWKRlpamZ599VtOnT9e3vvUt9e7dWzt37lTLli3vus3ChQuVl5dnH1lZWU0O7S5pK9eosqJCfaJHqFP/vmbHAQDAKxmNHa1atTJyc3ONr33ta3ddJzg42LDZbPbRpUsXwzAMw2azNfpx3Tn+5ff/Zfw+M9WY8+8/MD0Lg8FgMBieMmw2W4M+v5t0Se/bt2/r5MmT6tev313XKSsrU35+vsPwZLXzh8TMmqHgsDBzwwAA4GWaVCzCw8PVt29fZWdnuyqP6U6n7de18xcV2jJcI2c8bHYcAAC8ilPF4vXXX9fEiRPVs2dPxcfHa9WqVaqsrNSyZcvcla/ZGYahPTV7LeITOYgTAABnOFUsunXrpmXLlunEiRNKSkrSjRs3FBcX16RZ0DzRvtXrVFFWpu5DHlC3wczMBwBAQzl1Se8nn3zSXTk8SmHubaVv/kTRM6drbOIcJf18kdmRAADwCk06xsKXpSatkiSNmDFVoba7n04LAAC+QLG4i7OHMpR96oxCWoQpZtZ0s+MAAOAVKBb3ULvXgitxAgDQMBSLeziwdqNKi4rVqV8f9R4VZXYcAAA8HsXiHkoKCnVo/WZJ0lhOPQUA4L4oFveRurz665DhUycrvE1rc8MAAODhKBb3cenoCV347KgCg4M1evajZscBAMCjUSwaIDUpWZIUPy9BAQEB5oYBAMCDUSwa4PDGLSrOy1f7Ht3UPy7G7DgAAHgsikUDlBWXaP+aDZI49RQAgHuhWDRQ7XTqQyZPUETHDuaGAQDAQ1EsGijnzFmdOXBI1sBAxT4+y+w4AAB4JIqFE2oP4ox74jFZrFZzwwAA4IEoFk7I2LJNBTdvqXWnSD0wId7sOAAAeByKhRMqy8u1L3mdJCmeK3ECAFAHxcJJqStWS5IGjotT266dTU4DAIBnoVg46cbFSzqRkiaLxaLYJ2abHQcAAI9CsWiElA+q5w+JfXyWrIGBJqcBAMBzUCwa4eiOXbqdc022dm01bMoks+MAAOAxKBaNUFVRqbSVH0niIE4AAO5EsWiktA8/UlVlpfqNiVbH3j3NjgMAgEegWDRSbs5VHd2xW5IUNy/B3DAAAHgIikUTpCZVH8Q5evZXFBgSYnIaAADMR7FoghMpe3Xj0mW1iIjQiEemmB0HAADTUSyawKiq0p6aC2bFJyaYGwYAAA9AsWiivclrVFFerl5Rw9RlYH+z4wAAYCqKRRMV3LilzK3bJXHqKQAAFAsXqD2Ic9Sj0xTSooXJaQAAMA/FwgXO7D+kq2fPKzQ8XKMefcTsOAAAmIZi4SIpNXstOIgTAODPKBYusv+jDSovKVXXQQPUY/gQs+MAAGAKioWLFOfl6fCmrZKksRzECQDwUxQLF6r9OmTEIw8rLCLC5DQAADQ/ioULXcg4oqxjJxUUGqKYx2aYHQcAgGZHsXCxlOXVey34OgQA4I8oFi52aN1mlRQWqmPvnuo7epTZcQAAaFYUCxcrLSrSwbWbJLHXAgDgfygWblB7EOewKQ+qZbs2JqcBAKD5UCzcIPvkaZ1Lz5Q1KFBjEmaZHQcAgGZDsXCT1KRkSVLc3NkKsPDXDADwD3ziucnhTR+r6Hae2nXrooFjx5gdBwCAZkGxcJOK0lLtW71OEtOpAwD8B8XCjVKXJ0uSBk8cp9adIs0NAwBAM6BYuNG1cxd0Km2/LFarYp94zOw4AAC4HcXCzWr3WsQ+PkuWQKu5YQAAcDOKhZt99vGnyr9xU606dtCQSePNjgMAgFtRLNyssqJCaSvXSOIgTgCA72tSsXj55ZdlGIbefPNNV+XxSWkfrlZVVZUGjo1Vu+7dzI4DAIDbNLpYxMTE6Jvf/KbS09Ndmccn3czK1ondeyRJ8XNnm5wGAAD3aVSxCA8P19KlS/WNb3xDt27dcnUmn5RaM3/ImDkzZQ0KMjkNAADu0ahisXjxYq1bt04ff/zxfdcNDg6WzWZzGP7o6I4U3cq+ovA2rRU1bbLZcQAAcAuni8X8+fM1atQoLVy4sEHrL1y4UHl5efaRlZXldEhfYFRVac+HH0mS4udxECcAwDc5VSy6deumt956SwsWLFBpaWmDtlm0aJEiIiLso2vXro0K6gv2rlyjyooK9YkeoU79+pgdBwAAl3OqWERHRysyMlIHDx5UeXm5ysvL9eCDD+q73/2uysvLZalnFs+ysjLl5+c7DH+Vd+26jmzbKUmKn5dgbhgAANzAqWLx8ccfa+jQoRoxYoR97Nu3T0uXLtWIESNUVVXlrpw+I3V59UGc0bNmKDgs1OQ0AAC4VqAzKxcUFOjIkSMOywoLC3Xjxo06y1G/U3v26/qFS2rfo5tGTJ+qvavWmB0JAACX4cqbzcwwDPv8IfGJCaZmAQDA1ZpcLCZPnqyXXnrJFVn8xr7V61RRVqYeQwer2+BBZscBAMBl2GNhgsJbuUrf/IkkDuIEAPgWioVJaq/EOfIr0xTaMtzkNAAAuAbFwiRnD2Uo+9QZhbQIU/SsGWbHAQDAJSgWJrIfxMnXIQAAH0GxMNGBNRtUWlSszv37qvfI4WbHAQCgySgWJiopKNThDVskSfGJzB8CAPB+FAuTpdQcxBk17SGFt25lchoAAJqGYmGyS0eP6+KRYwoMDtbo2Y+aHQcAgCahWHiAlA+q91rEzUtQQECAyWkAAGg8ioUHOLxxi4rzC9ShZ3f1j4sxOw4AAI1GsfAAZcUlOrBmgyQpfh4HcQIAvBfFwkPUXtNiyOQJiujQ3twwAAA0EsXCQ1w5/bk+P3BY1sBAjXl8ltlxAABoFIqFB0ldXn0QZ/zc2bJYrSanAQDAeRQLD5KxZbsKb+WqdadIDRofb3YcAACcRrHwIBVlZdqbvE6SFJ+YYG4YAAAagWLhYWoP4hw0Pl5tunQyNwwAAE6iWHiYGxcv6URKmiwWi+LmJpgdBwAAp1AsPFBqzfwhsY/PkjUw0OQ0AAA0HMXCAx35dJduX70mW7u2GjplktlxAABoMIqFB6qqqFTayjWSpPh5CeaGAQDACRQLD5X24UeqqqxU/9gYdezd0+w4AAA0CMXCQ+VeydGxHSmSpLi5s01OAwBAw1AsPFhKzZU4R89+VIEhISanAQDg/igWHuzE7jTduHRZLVpFKGraQ2bHAQDgvigWHsyoqtKeFaslSWPnM506AMDzUSw83N7kNaosr1CvqGHqMrC/2XEAALgnioWHK7hxS5kfb5fEqacAAM9HsfACKTVX4hw18xGFtGhhchoAAO6OYuEFzuw7qKtnzys0PFwjH51mdhwAAO6KYuElamc9HZvIQZwAAM9FsfAS+1avV3lpqboOGqAewwabHQcAgHpRLLxEcV6eDm/8WJIUz14LAICHolh4kZSklZKkkdOnKiwiwuQ0AADURbHwIhcyjijr+EkFhYYo5rEZZscBAKAOioWXSU1KlsRBnAAAz0Sx8DIH121SSWGhOvbuqb4xI82OAwCAA4qFlyktKtLBdZslcRAnAMDzUCy80J6aa1oMe/hBtWzXxtwwAADcgWLhhbKOn9T59M8UGBSkMQkzzY4DAIAdxcJLpS6vnj8kbm6CAgICTE4DAEA1ioWXOrTxYxXl5aldty4aOC7W7DgAAEiiWHititJS7Vu9XhIHcQIAPAfFwovVHsQ5eOI4tY7saG4YAABEsfBqV8+e1+m9B2SxWhX7xGNmxwEAgGLh7VKTqg/ijH3iMVkCrSanAQD4O4qFl8v8+FPl37ipVh07aPDE8WbHAQD4OYqFl6usqNDeVWslSWMTE8wNAwDwe04Vi+eee07p6em6ffu2bt++rZSUFE2fPt1d2dBAe1Ykq6qqSgPHxaldt65mxwEA+DGnisWlS5f0yiuvKDo6WjExMfrkk0+0evVqDR482F350AA3s7J1YvceSVLcvNkmpwEA+DOnisXatWu1YcMGnT59WqdOndJPfvITFRQUKC4uzl350EC1B3GOSZgpa1CQyWkAAP6q0cdYWCwWzZ8/X+Hh4UpNTb3resHBwbLZbA4DrndsZ6pyr+SoZds2Gj51stlxAAB+yuliMXToUOXn56u0tFR/+ctfNGfOHB07duyu6y9cuFB5eXn2kZWV1aTAqF9VZaX2fPiRJCmegzgBACYJkGQ4s0FQUJB69OihVq1aae7cufr617+uSZMm3bVcBAcHKyQkxP67zWZTVlaWIiIilJ+f36TwcBTRsYN+smmlrIGB+m3CU8o5c9bsSAAAH2Gz2ZSXl3ffz2+n91iUl5frzJkzOnjwoP793/9d6enp+t73vnfX9cvKypSfn+8w4B55V6/pyPZdkqT4eQnmhgEA+KUmX8fCYrE47JGAuWoP4oyZNUPBYaEmpwEA+BunisWvfvUrTZgwQT179tTQoUP1q1/9Sg8++KCWLl3qrnxw0qk9+3T9wiWFRdg04pGHzY4DAPAzThWLjh07asmSJTpx4oQ+/vhjjR49Wo888oi2bt3qrnxwkmEYSq2Z9ZTp1AEAzc3pgzebqqEHf6Dxwtu01s+2rlZgcLDenP+sLh09YXYkAICXc9vBm/B8hbdylbFlmyQpfh57LQAAzYdi4aNSag7iHPmVaQptGW5yGgCAv6BY+KizB9N15fTnCmkRpuiZTBQHAGgeFAsflrq8eq8FB3ECAJoLxcKH7V+zUWXFJercv696jRhudhwAgB+gWPiwkvwCHdqwRRLzhwAAmgfFwselfLBSkhQ17SGFt25lchoAgK+jWPi4S0eP6+KRYwoKCdHo2Y+aHQcA4OMoFn6gdv6QuHkJCggIMDkNAMCXUSz8wKENW1WcX6AOPburX2yM2XEAAD6MYuEHyoqLdWDtRklMpw4AcC+KhZ+onZhs6EMTFdGhvblhAAA+i2LhJ66cOqOzB9NlDQzUmDkzzY4DAPBRFAs/klJzJc64ubMVYOGlBwC4Hp8ufiRj8zYV3spVm86dNGh8vNlxAAA+iGLhRyrKyrQ3eZ0kaex85g8BALgexcLP7FmRLEkaND5ebbp0MjcMAMDnUCz8zPULl3Qyda8sFovinphtdhwAgI+hWPihlJorcY55fJasgYEmpwEA+BKKhR86sn2n8q5dV0T7dhry0ESz4wAAfAjFwg9VVVQqbeUaSdLYeRzECQBwHYqFn9qzYrWqKivVPy5GHXr1MDsOAMBHUCz8VO6VHB3bmSqp+oJZAAC4AsXCj6UkrZQkjUmYqcCQEJPTAAB8AcXCj53YnaabWdlq0SpCUdMeMjsOAMAHUCz8mFFVpT0rVkuSxiZyECcAoOkoFn5u76o1qiyvUK8Rw9R5QD+z4wAAvBzFws/l37ipzE8+lSTFz0swNwwAwOtRLKDUmitxRs+arpAWLUxOAwDwZhQL6PTeA7p69rxCw8M18itTzY4DAPBiFAtIklJrZj2N50qcAIAmoFhAkrQveb3KS0vVbfBA9Rg22Ow4AAAvRbGAJKk4L0+HN34sSYrn1FMAQCNRLGCXurz6IM6R06cqLMJmchoAgDeiWMDufPpnunzilIJCQxQza4bZcQAAXohiAQcpNaee8nUIAKAxKBZwcHDdJpUWFSmyTy/1iRlpdhwAgJehWMBBaWGRDq7bLEkay5U4AQBOoligjtorcQ6bOlkt27YxOQ0AwJtQLFBH1vGTOp/+mQKDgjRmzkyz4wAAvAjFAvWqPfU0bm6CAgICTE4DAPAWFAvU6/Cmj1WUl6d23bpowNhYs+MAALwExQL1Ki8p1f7VGyRJYxMTzA0DAPAaFAvcVe3XIYMnjVeryA4mpwEAeAOKBe7q6tnzOr3voCxWq2Iff8zsOAAAL0CxwD3Vnnoa98RsWaxWk9MAADwdxQL3lPnxp8q/cVOtIjto8KRxZscBAHg4p4rFK6+8or179yovL085OTlatWqVBgwY4K5s8ACV5eXau2qtJCl+HvOHAADuzaliMWnSJC1evFhxcXGaOnWqgoKCtHnzZrVo0cJd+eAB9ny4WlVVVRo0Pk7tunU1Ow4AwIM5VSxmzJihd955R0ePHlVGRoaeffZZ9ezZU9HR0e7KBw9w89JlnUhJkyTFzZttchoAgCdr0jEWrVq1kiTdvHnzrusEBwfLZrM5DHif2oM4xyTMlDUoyOQ0AABP1ehiERAQoD/84Q/atWuXjhw5ctf1Fi5cqLy8PPvIyspq7EPCRMd2pCg356patm2j4Q8/aHYcAICHanSxWLx4sYYOHaqvfvWr91xv0aJFioiIsI+uXfmO3htVVVYqbcVqSVJ8IgdxAgDq16hi8fbbb2vmzJmaPHnyffdAlJWVKT8/32HAO+1ZuUaVFRXqGzNSkX16mR0HAOCBnC4Wb7/9tubMmaOHHnpI586dc0MkeKq8q9d09NPdkqS4eQnmhgEAeCSnisXixYv19NNP66mnnlJ+fr4iIyMVGRmp0NBQd+WDh0n5YKUkafRjX1FQaIjJaQAAnsapYvHtb39brVu31qeffqorV67Yx/z5892VDx7m1J59un7xksIibBo5farZcQAAHsapYhEQEFDveOedd9yVDx7GMAztWZ4siYM4AQB1MVcInLY3eZ0qysvVY9hgdRs80Ow4AAAPQrGA0wpv5SpjyzZJHMQJAHBEsUCj1F6Jc9RXpim0ZbjJaQAAnoJigUb5/MBhXTlzViEtWmjUo4+YHQcA4CEoFmi02r0WHMQJAKhFsUCj7V+zQWXFJeoyoJ96RQ0zOw4AwANQLNBoJfkFOrRhiyQpfj57LQAAFAs0Ue3XIVHTHlJ461YmpwEAmI1igSa5eOSYLh49rqCQEMU89hWz4wAATEaxQJPZD+Kcl6CAgACT0wAAzESxQJMdWr9FJQWF6tCrh/qNiTY7DgDARBQLNFlZcbEOrN0oiVNPAcDfUSzgEqnLq78OGTp5omzt25mcBgBgFooFXCL75BmdPZQha1CgxsyZaXYcAIBJKBZwmZSklZKkuLmzFWDhPy0A8Ef86w+Xydi8TYW5t9W2S2cNGh9vdhwAgAkoFnCZirIy7UteJ0kay0GcAOCXKBZwqdQVyZKkQRPi1aZzJ3PDAACaHcUCLnX9/EWd3LNPFotFsXMfMzsOAKCZUSzgcrVX4ox9/DFZAq0mpwEANCeKBVzus207lHftuiLat9PQyRPNjgMAaEYUC7hcVUWl0latkcSVOAHA31As4BZ7lq9WVWWlBsSNVvue3c2OAwBoJhQLuEXulRwd25kqqXrWUwCAf6BYwG1qD+IcPftRBYaEmJwGANAcKBZwm+O79+jm5WyFt26lqKmTzY4DAGgGFAu4jVFVpT0rVkviIE4A8BcUC7jV3lVrVVleod4jh6vzgL5mxwEAuBnFAm6Vf/2GPtu2Q5IUP4+9FgDg6ygWcLvagzijZ05XcFiYyWkAAO5EsYDbnUrbr6tnzyu0ZbhGPTrN7DgAADeiWKBZ1M56ytchAODbKBZoFvtXr1d5aam6DR6o7kMHmx0HAOAmFAs0i6LbeUrf9IkkaSynngKAz6JYoNnUHsQ5YvrDCouwmZwGAOAOFAs0m3Ppmbp88rSCw0IVPXO62XEAAG5AsUCzqt1rwZU4AcA3USzQrA6s3ajSoiJ16ttbfaJHmB0HAOBiFAs0q9LCIh1ct1kSey0AwBdRLNDsar8OGT51slq2bWNyGgCAK1Es0Oyyjp/U+YwjCgwK0uiER82OAwBwIYoFTJG6vOYgznkJCggIMDkNAMBVKBYwxeGNW1Wcl6923bpqQPwYs+MAAFyEYgFTlJeUat9H6yVxECcA+BKKBUyzZ3myJGnwpHFqFdnB3DAAAJegWMA0OZ+f05n9h2QNDFTsnFlmxwEAuADFAqZK+WClJCl27mxZrFaT0wAAmopiAVNlfvyp8m/cVOvIjho8aZzZcQAATeR0sZgwYYI++ugjZWVlyTAMzZ492x254Ccqy8u1L3mtJCl+HgdxAoC3c7pYhIeHKz09Xc8//7w78sAPpa5YLUkaND5Obbt1MTkNAKApnC4WGzdu1E9/+lMlJye7IQ780c1Ll3V81x5J0vgn55qcBgDQFG4/xiI4OFg2m81hAF9WeyXOSf/ypL637P80auYjsgYGmpwKAOAstxeLhQsXKi8vzz6ysrLc/ZDwQke27dSnS5apvLRUPYYO1oJFP9dPNq/StOe+ppbtmKgMALxFgCSjsRsbhqGEhAStXr36rusEBwcrJCTE/rvNZlNWVpYiIiKUn5/f2IeGjwpv01rx8xI0dv7jatWx+qJZFWVlOrRhq3Yu/UBZx06anBAA/JPNZlNeXt59P7/dXiwaGwz+zRoYqOFTJ2vCgkT1jBpqX/75gcPauTRJn32yQ1WVlSYmBAD/0tDPb77EhkeqrKjQoQ1bdGjDFvUYNlgTnp6vqKkPqU/0CPWJHqFb2Ve0+58fas+Kj1Scl2d2XABADaf3WISHh6tfv36SpMOHD+ull17Stm3bdPPmTV28ePG+27PHAo0V0bGDxs6fo/i5CWrZtvq4i7LiEh1Yu1E7lyYp58xZkxMCgO9y21chkyZN0vbt2+ss/8c//qF//dd/dVkw4G4Cg4M18itTNWFBoroOGmBffjJ1r3a8l6TjO1NkGI3+hg8AUI9mOcaiMSgWcKU+MSM1YUGihk6eYJ9r5PqFS9r1/nLtTV6r0sIikxMCgG+gWMCvtOnSSeOfnKfYx2cpLKL6WiklBYXam7xWu95foRsXL5mcEAC8G8UCfik4LEwxj83QhAWJ6ti7pySpqqpKx3akaOfSJJ3as8/khADgnSgW8GsBAQEaED9GE55O1AMTxtqXZ586o13vL9eBtRtVXlJqYkIA8C4UC6BGh149NP6peRo9+ysKadFCklR0O097Plyt3cs+VO6VHJMTAoDno1gAXxJqa6kxc2Zq/JPz1K5mFtXKigp99skO7XzvA509lGFyQgDwXBQL4C4CLBYNnjROExYkqn9sjH35xaPHtfO9JB3euFWV5eUmJgQAz0OxABqg84C+mvBUokY9+oiCQqvntMm7fkOpSauUmrRK+TdumpwQADwDxQJwQnjrVoqbm6CxX31crSM7SpIqyst1eONW7XwvSZeOHjc5IQCYi2IBNIIl0KrhUx7UhKfnq9eIYfblZw9laOfSJGVu3c7kZwD8EsUCaKLuQx7QhKcTFfXIFAUGBUmScq/k1Ex+tlpFt5n8DID/oFgALmJr305j5z+u+HkJsrVrK6lm8rN1G7Vr6XJdOf25yQkBwP0oFoCLBQYHa+SMhzVhwXx1feCLyc9O7dmvnUs/0NEdKTKqqkxMCADuQ7EA3Kj3qChNWJCoYVMmfTH52cVL2vX+Cu1LXquSgkKTEwKAa1EsgGbQpnMnjfvqE4qd+5haRERIkkoKC7UveZ12vb9c1y8w+RkA30CxAJpRcFioomfO0PgF89Spb2/78qM7dmvne0k6mbrXxHQA0HQUC8AkA+JHa8KC+Ro8aZx92ZUzZ7Vr6XIdWLtBZcUlJqYDgMahWAAma9+jW/XkZwmPKjQ8XJJUlJentBUfafc/P9St7CsmJwSAhqNYAB4itGW4RifM1Pin5qp9926SpKrKSmV+/Kl2vb9cnx84bG5AAGgAigXgYQIsFj0wYawmPJ2oAXGj7csvHT2hXe8n6dCGraooKzMxIQDcHcUC8GCd+vXR+AXzFDNzhn3ys/wbN5W6PFkpH6xU/vUbJicEAEcUC8ALtGgVobi5szXuq0+odadISdWTn6Vv+lg730vSxSPHTE4IANUoFoAXsQRaNWzKg5qwIFG9Rw63Lz93OFM7lyYpY+s2VVUw+RkA81AsAC/VbfAgTViQqBEzHv5i8rOcq0r550rtWZGswtzbJicE4I8oFoCXs7Vrq/jEORo7/3H75GflJaU6uG6TdixN0pVTZ0xOCMCfUCwAH2ENCtKI6Q9rwtOJ6j54kH35qbT92rk0SUc/3c3kZwDcjmIB+KBeI4ZrwtPVk59ZAwMlSTcuZWnXshXau2qtSvILTE4IwFdRLAAf1rpTpMZ99XHFzU1Qi1bVk5+VFhVp3+r12vX+cl07d8HkhAB8jd8Vi//8z/+ngADp8OGzSk8/q9Ons1XF7mH4uKDQEEXPnK7xT81T5/597cuP7UzRzqXLdTIlTYbRrG9xAD7K74rF1WtL1b59hP33wsISffbZeaXXFI309LPKyDingoJilz0m4En6x43WhKfm6YFJ42SxWCRJOZ+f0673l2v/RxtUVsx/+wAaz6+KhdVq0Te/OV1RUb01PKq3hg3rpRYtQupd98yZbB0+fFYZ6V8UjvPnr7okB+AJ2nXvpvFPztWYOTMV2rJ68rPivHylrVyjXcuW69ZlJj8D4Dy/KhZfZrFY1L9/F0VF9bKXjREj+qhr13b1rp+bW6D09HMOZePIkQsqKWHeBnivkPAWGj37UY1/ap469OwuqXrys8+27dTOpUn6fP8hkxMC8CZ+XSzupl27iDpl44EHuik4OKjOuhUVlTpxIqv6K5SasnH48OfKyclt1sxAUwUEBGjQhLGa+HSiBsSPsS/POn5Su5Yu18H1m5n8DMB9USwaKCgoUA880E1RUb2rx4g+iorq7XC8xp1ycm7Z924cPvy50tPP6sSJLFVwuWV4gci+vTX+qXmKmTVDwWGhkqSCm7eUuiJZKf9cqbxr101OCMBTUSyaqEuXtnXKxoABXewHxd2ptLRcR45cqP4apaZspKefVW5uoQnJgfsLi4hQ3BOzNO7JuWrTuZMkqbK8QulbPtHO9z7QhcyjJicE4GkoFm4QFhaioUN7aERN0Rge1VvDh/dSRESLete/cOFanbJx5swVTv+Dx7BYrRr60ESNXzBPfaNH2pefT/9MO5cmKX3LJ0x+BkASxaLZBAQEqFevjg5lIyqqt3r3jqx3/YKCYmVkOB4ompl5XoWFJc2cHHDU9YEBmrAgUSNnTFVgcLAk6XbONaUkrVTq8mQV3so1NyAAU1EsTNaqVbiGD+9l/zql+jTYngoNDa6zblVVlU6fzv7SgaJndekS33ej+bVs10bx8+ZobOIcRXRoL0kqLy3VofVbtOO9D5R98rTJCQGYgWLhgaxWiwYM6OpQNqKieqlLl/pPg715M79O2Th69ILKyiqaOTn8kTUwUFHTp2jCgkT1GDrYvvz0voPa+V6SjmzfyeRngB+hWHiRDh1aOZSNESN6a9CgbgoKCqyzbnl5hY4fv2QvHLWXML927bYJyeEvekUN0/gF8zR86uQ7Jj+7rN3LViht1RomPwP8AMXCywUHB2rw4B53nJlS/WfbtrZ618/OvllzoOgXx26cPJmlykr+jxKu0yqyg8bOf0Lxc2crvE1rSVJpUbH2f1Q9+dnVs+fNDQjAbSgWPqpbt/Z1yka/fp3rPQ22uLi0+jTYO8pGevpZ5eUVmZAcviQwJETRj07ThKfnO0x+dmL3Hl0+eUZlxcUqKypWaXGxyopLVFZUpLLiEpUWFVffVlx8x88lfKUCeAGKhR8JDw/V0KE9NWJEb4fTYFu2DKt3/bNnc+pcUfTcuaucBotG6TcmWhOeTtTgSePrLbgNUV5S6lA27iwddUtJiUprlpUVFam0uMReZL68LVcUBVyHYuHnAgIC1KdPJ4eyERXVWz17dqx3/by8IofTYA8f/lyffXZBxcWlzZwc3qptty4a8cgUtWjVSsFhoQoOC1NIizCHP4PDQhV8xzKL1erWTJUVFTUFpG4psZeTOqWkxLGg1LPnpay4mCIOv0OxQL1atw7/4quUmsIxZEiPek+Drays1KlT2XccKFp9oa/Ll2+akBy+KDA4+Eulo4WCw0IVUvOnQylpUb1OiP3nuoWl9ragkPpnN3al+gqIfZlDISlWWVGJyoqLVFZUYr+tbnmpLi6VFZz1Bc9EsUCDBQZaNXBg1zpnpkRGtql3/evX8+pcUfTYsUsqL+cfRHgGi9VqLyYORaWewnK3vSl1t61e1tivexqqsryi/nJyx56Xu30t9OWy80VxKVF5SQl7WdAkFAs0WWRk6y+VjT4aOLCrAgPr7r4uKyvXsWOXdPjw53dcVfScbtzIMyE54D6BISE1xeOL0nHnHpaGlJP6tq292qk73XnwbH2lpPYrn/LiEpWXllYf+1JSooqan8tLS1VWUvtzSfWftctrtuFAXN9FsYBbhIQEaciQHl+aoK2XWrduWe/6ly5ddzhQND39nE6duqwq/vEBHFgCrTWlI8xeXOrds1JvOQmzbxtcT+lpThXl5TWFw7GclJeWqqKkppjcUVTsJeaOcuKwvE6hqbnvklIOzm1mbi0W3/72t/WjH/1InTp1Unp6ul544QXt27fPpcHgXXr06FCnbPTr16XedYuKSpWZec6hbGRknFV+fnEzpwZ8X0BAgIJCQxpcSkLCQhUYElK9TWiogmp+DrrjZ4flNX+aoaqqqrpglJbWKTHlDkXky8tLvritTrmpuzem9s+qSv+ekM9txSIxMVFLlizRc889p7S0NL344ouaN2+eBg4cqGvXrrksGLxfy5ZhGjasp6JqvkYZHtVLw4b1Unh4aL3rnzmTrezsW6qoqFRlZZUqKirvGI6/V9azrL71apfVvT/ntq//Pu9+G3tk4E8CAgIUGBKsoJDq0hFYUziCw2oKyJ3lpPa2O36uvi2kTnEJrFNuqtepvfprc7PvjSl1LCdlJSWqcCgxpY4lpr5yU2+h+WIbT9wb47ZisWfPHu3bt08vvPBC9R0EBOjixYt6++239Zvf/MZlweCbLBaL+vbtdEfZqJ4vpXv3DmZHc7ny8op7lJO6heSL8nP/dSsqqmrK1T3Kl0OZck2xash91h4gaBjGHT/rS8tVZx2goSyB1nrKyR2l5R6F5s6C4rB9SIiCwurfK2OGqqoqVZSW1VtiHPak1LNXpqKkVKkrklVa6NqLITb089up2hcUFKTo6GgtWrTIvswwDG3dulXx8fGNTwu/UVVVpVOnLuvUqctasWK3fXnbtjYNG9ZTbdq0VGCg9Y5h+dLvXyyzWu9+W+2w3uc+7vzdam34urU/1zefS62goMB73o67q6qqalApqW/Z/Ze7bltX35+z297//px5nPqe2xel78t/71/+O7jfbfXd771uc9191JetSlKxDKPI8T5KJKPY8T4CFCBZAmSxWhVgschisSjAYpXFalGA1SqLxfLFbVarw88BFossgVZZLFYFWGtut1hlCbTa76N2G4vVar8/x+dllRQuQ+H6snorec3CWdu367KLi0VDOfWvXvv27RUYGKicnByH5Tk5ORo0aFC92wQHByvkjnPKbbb657qAf7t5M1+ffvqZ2TEaxWKx1C00DqWnIWXl7ssaUqDuuX2Dczn3OLX35Y7y5O5TOgHPU1kzXMPiwvtyltv/d2rhwoX6+c9/7u6HAUxTVVWlsrIqv57OvrZc1ZaMgIAABQTojp8Dan7WHT/fa/n9t3Xucepbp2nbNja3O7Z19XOuXVbry7fVt969bqvvPpy9/8bdR9Pu//73oXqWufI+Gnf/knTrunkzXjtVLK5fv66KigpFRkY6LI+MjNSVK1fq3WbRokV644037L/bbDZlZWU1IioAT0W5AlDLqf2N5eXlOnDggKZMmWJfFhAQoClTpig1NbXebcrKypSfn+8wAACAb3L6q5A33nhD77zzjvbv36+9e/fqxRdfVHh4uP7+97+7Ix8AAPAiTheLpKQkdejQQb/85S/VqVMnHT58WNOnT9fVq1fdkQ8AAHgRLukNAADuq6Gf35zTBQAAXIZiAQAAXIZiAQAAXIZiAQAAXIZiAQAAXIZiAQAAXIZiAQAAXIZiAQAAXIZiAQAAXMbt06bfjc1mM+uhAQCAkxr6ud3sxaI2GFOnAwDgfWw22z0v6d3sc4VIUpcuXVw+T4jNZlNWVpa6du3KHCReitfQ+/EaejdeP+/n7tfQZrPp8uXL91zHlK9C7heqKfLz83lDeDleQ+/Ha+jdeP28n7tew4bcJwdvAgAAl6FYAAAAl/GZYlFaWqqf//znKi0tNTsKGonX0PvxGno3Xj/v5wmvoSkHbwIAAN/kM3ssAACA+SgWAADAZSgWAADAZSgWAADAZbyuWEyYMEEfffSRsrKyZBiGZs+eXWedX/ziF7p8+bKKioq0ZcsW9evXz4SkkFzzerVp00bvvfeebt++rVu3bumvf/2rwsPDm+sp+L3XXntNhmE4jGPHjtlvDwkJ0R//+Eddv35d+fn5WrFihTp27OhwH927d9fatWtVWFionJwc/fa3v5XVam3up+IXmus9N2zYMO3YsUPFxcW6cOGCfvSjH7n1efmT5nrPTZo0SQcOHFBJSYlOnTqlZ555xmXPwfCmMX36dOM//uM/jISEBMMwDGP27NkOt//4xz82bt26ZTz22GPGsGHDjOTkZOPMmTNGSEiI6dn9cbji9Vq/fr1x6NAhY8yYMca4ceOMkydPGkuXLjX9ufnLeO2114zMzEwjMjLSPtq1a2e//U9/+pNx/vx5Y/LkycaoUaOMlJQUY9euXfbbLRaLkZGRYWzevNmIiooypk+fbly9etX4r//6L9Ofmy+O5njP2Ww2Izs723j33XeNwYMHG/PnzzcKCwuNb3zjG6Y/f18YzfGe69Wrl1FQUGD87ne/MwYNGmQ8//zzRnl5uTFt2jRXPAfz/xIbO+p701y+fNn4wQ9+YP89IiLCKC4uNubPn296Xn8fjXm9Bg0aZBiGYURHR9vXeeSRR4zKykqjc+fOpj8nfxivvfaacejQoXpvi4iIMEpLS40nnnjCvmzgwIGGYRhGbGysIVV/0FVUVBgdO3a0r/PNb37TyM3NNYKCgkx/fr483PWee+6554wbN244vH6LFi0yjh07Zvpz9oXRHO+5X//610ZmZqbDfS9btszYsGFDk/N73Vch99K7d2917txZW7dutS/Ly8tTWlqa4uPjTUyG+jTk9YqPj9etW7d04MAB+zpbt25VVVWVYmNjmz2zv+rfv7+ysrJ05swZvffee+revbskKTo6WsHBwQ6v4YkTJ3T+/HmH1zAzM1NXr161r7Np0ya1atVKQ4YMad4n4udc9Z6Lj4/Xjh07VF5ebl9n06ZNGjRokFq3bt08T8bHufs9Fx8f73Afteu44rPSp4pFp06dJEk5OTkOy3Nycuy3wXM05PXq1KmTw5tDkiorK3Xz5k1e02aSlpamZ599VtOnT9e3vvUt9e7dWzt37lTLli3VqVMnlZaW6vbt2w7bfPk1rO81rr0NzcdV7zleU/dqjvfc3dZp1aqVQkNDm5TflNlNAXiPjRs32n/OzMxUWlqazp8/r8TERBUXF5uYDPBN3v6e86k9FleuXJEkRUZGOiyPjIy03wbP0ZDX68qVK3WOdrZarWrbti2vqUlu376tkydPql+/frpy5YpCQkLUqlUrh3W+/BrW9xrX3obm46r3HK9p83LHe+5u69y+fVslJSVNyutTxeLs2bPKzs7WlClT7MtsNptiY2OVmppqYjLUpyGvV2pqqtq0aaNRo0bZ13nooYdksViUlpbW7JkhhYeHq2/fvsrOztaBAwdUVlbm8BoOGDBAPXv2dHgNhw0bpg4dOtjXmTp1qm7fvq2jR482e35/5qr3XGpqqiZOnKjAwC92ek+dOlXHjx9Xbm5u8zwZP+KO91xqaqrDfdSu46rPStOPgHVmhIeHG1FRUUZUVJRhGIbx4osvGlFRUUb37t0NqfpUqps3bxqzZs0yhg4daqxatYrTTb389Vq/fr1x4MABY/To0cbYsWONEydOcLppM47XX3/dmDhxotGzZ08jPj7e2Lx5s3H16lWjffv2hlR96tu5c+eMBx980Bg1apSxe/duY/fu3fbta09927hxozF8+HBj2rRpRk5ODqebumk0x3suIiLCyM7ONt555x1j8ODBRmJiolFQUMDppi4azfGeqz3d9De/+Y0xcOBA41vf+pb/nm46adIkoz5///vf7ev84he/MLKzs43i4mJjy5YtRv/+/U3P7a/DFa9XmzZtjKVLlxp5eXlGbm6u8X//939GeHi46c/NX8ayZcuMrKwso6SkxLh48aKxbNkyo0+fPvbbQ0JCjD/+8Y/GjRs3jIKCAuPDDz80IiMjHe6jR48exrp164zCwkLj6tWrxuuvv25YrVbTn5svjuZ6zw0bNszYsWOHUVxcbFy8eNH48Y9/bPpz95XRXO+5SZMmGQcPHjRKSkqM06dPG88884xL8jNtOgAAcBmfOsYCAACYi2IBAABchmIBAABchmIBAABchmIBAABchmIBAABchmIBAABchmIBAABchmIBAABchmIBAABchmIBAABchmIBAABc5v8Dy9hk/NClnCoAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "def naive_grad(x, mu):\n", + " # the naive gradient to mu\n", + " # \\nabla_\\mu \\mathbb{E}_q[x^2] = \\mathbb{E}_q[x^2(x-\\mu)]\n", + " return np.mean(x ** 2 * (x - mu))\n", + " \n", + "def reparam_grad(eps, mu):\n", + " #### You need to finish the reparameterization gradient to mu here ####\n", + " # \\nabla_\\mu \\mathbb{E}_q[x^2] = \\mathbb{E}_q[2*(\\varepsilon+\\mu)]\n", + "\t# return np.mean((2-eps)*(mu+eps))\n", + " return np.mean(2*(mu+eps))\n", + " \n", + "def main():\n", + " data_size_list = [10, 100, 500, 1000, 5000]\n", + " sample_num = 100\n", + " mu, sigma = 2.0, 1.0\n", + " # variance of the gradient to mu\n", + " var1 = np.zeros(len(data_size_list))\n", + " var2 = np.zeros(len(data_size_list))\n", + "\n", + " for i, data_size in enumerate(data_size_list):\n", + " estimation1 = np.zeros(sample_num)\n", + " estimation2 = np.zeros(sample_num)\n", + " \n", + " for n in range(sample_num):\n", + " # 1.naive method\n", + " x = np.random.normal(mu, sigma, size=(data_size, ))\n", + " estimation1[n] = naive_grad(x, mu)\n", + " \n", + " # 2.reparameterization method\n", + " eps = np.random.normal(0.0, 1.0, size=(data_size, ))\n", + " x = eps * sigma + mu\n", + " estimation2[n] = reparam_grad(eps, mu)\n", + " var1[i] = np.var(estimation1)\n", + " var2[i] = np.var(estimation2)\n", + " \n", + " print('naive grad variance: {}'.format(var1))\n", + " print('reparameterization grad variance: {}'.format(var2))\n", + " # plot figure\n", + " index = [_ for _ in range(len(data_size_list))]\n", + " plt.plot(index, var1)\n", + " plt.plot(index, var2)\n", + " plt.xticks(index, data_size_list)\n", + " plt.legend(['naive', 'reparam'])\n", + " plt.savefig('demo/reparam.png')\n", + " plt.show()\n", + "\n", + "if __name__ == \"__main__\":\n", + " main()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 2. 连续动作空间(小人越悬崖)" + ] + }, + { + "cell_type": "code", + "execution_count": 118, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import torch.nn as nn\n", + "from torch.distributions import MultivariateNormal\n", + "import torch.nn.functional as F " + ] + }, + { + "cell_type": "code", + "execution_count": 119, + "metadata": {}, + "outputs": [], + "source": [ + "class LittleHuman(nn.Module):\n", + "\n", + " def __init__(self, lr = 0.01) -> None:\n", + " super().__init__()\n", + "\n", + " self.lr = lr\n", + "\n", + " self.param1 = nn.Parameter(torch.zeros(1), requires_grad=True)\n", + " self.param2 = nn.Parameter(torch.zeros(1), requires_grad=True)\n", + " # sigma_theta = \n", + " # sigma_v = \n", + "\n", + " # 只有 torch 本身的方法才会计算计算图\n", + " self.sigma = torch.concat([torch.exp(self.param1) * torch.pi / 2., torch.exp(self.param2) * 10.])\n", + " self.hidden = nn.Sequential(\n", + " nn.Linear(2, 2),\n", + " nn.Tanh()\n", + " )\n", + "\n", + "\n", + " def _get_mu(self, x):\n", + " x = 0.5 * (x + 1)\n", + " x[..., 0] *= 0.5 * torch.pi\n", + " x[..., 1] *= 10.\n", + " \n", + " return x\n", + "\n", + " def policy(self, d):\n", + " x = self.hidden(d)\n", + " mu = self._get_mu(x)\n", + " dist = MultivariateNormal(mu, scale_tril=torch.diag(self.sigma))\n", + "\n", + " return dist\n", + " \n", + " def success(self, a, d) -> bool:\n", + " theta, v = torch.split(a, 1, -1)\n", + " vx = v * torch.cos(theta)\n", + " vy = v * torch.cos(theta)\n", + "\n", + " d1, d2 = torch.split(a, 1, -1)\n", + " t = d1 / vx\n", + " return vy*t - 0.5*9.8*t*t >= d2\n", + "\n", + " def reward(self, action, success):\n", + " v = action[...,-1]\n", + " return success * (100 - v**2)\n", + "\n", + " def update(self, reward, prob):\n", + " \n", + " # print(reward * prob)\n", + " (reward * prob).mean().backward()\n", + " # print(self.param1.grad)\n", + "\n", + " with torch.no_grad():\n", + " self.param1 += self.lr * self.param1.grad\n", + " self.param2 += self.lr * self.param2.grad\n", + "\n", + " print(self.hidden[0].weight.grad, self.hidden[0].bias.grad)\n", + " print(self.param1.grad, self.param2.grad)\n", + "\n", + " def run(self, d):\n", + " dist = self.policy(d)\n", + " action = dist.sample()\n", + " success = self.success(action, d)\n", + " with torch.no_grad():\n", + " reward = self.reward(action, success)\n", + " prob = dist.log_prob(action)\n", + " grad = self.update(reward, prob)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 120, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tensor([[0., 0.],\n", + " [0., 0.]]) tensor([0., 0.])\n", + "tensor([0.]) tensor([0.])\n" + ] + } + ], + "source": [ + "lm = LittleHuman()\n", + "lm.train()\n", + "lm.run(torch.tensor([[[0.2, 0.2]]]))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 题⽬3(应⽤实践)\n", + "在课程第⼆讲(解构复杂动作空间)⼏个应⽤中任选⼀个\n", + "\n", + "- ⽕箭回收(离散动作空间)\n", + "- ⽆⼈机姿态控制(连续动作空间)\n", + "- 导航控制(混合动作空间)\n", + "\n", + "根据课程组给出的⽰例代码,训练得到相应的智能体。最终提交需要上传相关训练代码、⽇志截图或\n", + "最终所得的智能体效果视频(replay),具体样式可以参考第⼆讲的⽰例ISSUE。" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Please install latest DI-engine's main branch first\n", + "# And we will release DI-engine v0.4.6 version with stable and tuned configuration of these demos.\n", + "from ding.bonus import PPOF\n", + "\n", + "\n", + "def lunarlander_discrete():\n", + " # Please install lunarlander env first, `pip3 install box2d`\n", + " agent = PPOF(env='lunarlander_discrete', exp_name='./lunarlander_discrete_demo')\n", + " agent.train(step=int(1e5))\n", + " # Classic RL interaction loop and save replay video\n", + " agent.deploy(enable_save_replay=True)\n", + "\n", + "\n", + "def lunarlander_continuous():\n", + " # Please install lunarlander env first, `pip3 install box2d`\n", + " agent = PPOF(env='lunarlander_continuous', exp_name='./lunarlander_continuous_demo', seed=314)\n", + " agent.train(step=int(1e5))\n", + " # Batch (Vectorized) evaluation\n", + " agent.batch_evaluate(env_num=4, n_evaluator_episode=8)\n", + "\n", + "\n", + "def rocket_landing():\n", + " # Please install rocket env first, `pip3 install git+https://github.com/nighood/rocket-recycling@master#egg=rocket_recycling`\n", + " agent = PPOF(env='rocket_landing', exp_name='./rocket_landing_demo')\n", + " agent.train(step=int(5e6), context='spawn')\n", + "\n", + "\n", + "def drone_fly():\n", + " # Please install gym_pybullet_drones env first, `pip3 install git+https://github.com/zjowowen/gym-pybullet-drones@master`\n", + " agent = PPOF(env='drone_fly', exp_name='./drone_fly_demo')\n", + " agent.train(step=int(5e6))\n", + "\n", + "\n", + "def hybrid_moving():\n", + " # Please install gym_hybrid env first, refer to the doc `https://di-engine-docs.readthedocs.io/zh_CN/latest/13_envs/gym_hybrid_zh.html`\n", + " agent = PPOF(env='hybrid_moving', exp_name='./hybrid_moving_demo')\n", + " agent.train(step=int(5e6))\n", + "\n", + "\n", + "if __name__ == \"__main__\":\n", + " # You can select and run your favorite demo\n", + " # lunarlander_discrete()\n", + " # lunarlander_continuous()\n", + " # rocket_landing()\n", + " # drone_fly()\n", + " hybrid_moving()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "torch11", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/my_homework/ch2/homework2_3.py b/my_homework/ch2/homework2_3.py new file mode 100644 index 0000000..a9c1187 --- /dev/null +++ b/my_homework/ch2/homework2_3.py @@ -0,0 +1,56 @@ +# Please install latest DI-engine's main branch first +# And we will release DI-engine v0.4.6 version with stable and tuned configuration of these demos. +from ding.bonus import PPOF + + +def lunarlander_discrete(): + # Please install lunarlander env first, `pip3 install box2d` + agent = PPOF(env='lunarlander_discrete', exp_name='./lunarlander_discrete_demo') + agent.train(step=int(1e5)) + # Classic RL interaction loop and save replay video + agent.deploy(enable_save_replay=True) + + +def lunarlander_continuous(): + # Please install lunarlander env first, `pip3 install box2d` + agent = PPOF(env='lunarlander_continuous', exp_name='./lunarlander_continuous_demo', seed=314) + agent.train(step=int(1e5)) + # Batch (Vectorized) evaluation + agent.batch_evaluate(env_num=4, n_evaluator_episode=8) + + +def rocket_landing(): + # Please install rocket env first, `pip3 install git+https://github.com/nighood/rocket-recycling@master#egg=rocket_recycling` + agent = PPOF(env='rocket_landing', exp_name='./rocket_landing_demo') + agent.train(step=int(5e6), context='spawn') + +def rocket_landing_deploy(): + # Please install rocket env first, `pip3 install git+https://github.com/nighood/rocket-recycling@master#egg=rocket_recycling` + agent = PPOF(env='rocket_landing', exp_name='./rocket_landing_demo') + agent.deploy(ckpt_path="rocket_landing_demo\ckpt\iteration_38400.pth.tar", enable_save_replay=True) + + +def drone_fly(): + # Please install gym_pybullet_drones env first, `pip3 install git+https://github.com/zjowowen/gym-pybullet-drones@master` + agent = PPOF(env='drone_fly', exp_name='./drone_fly_demo') + agent.train(step=int(5e6)) + + +def hybrid_moving(): + # Please install gym_hybrid env first, refer to the doc `https://di-engine-docs.readthedocs.io/zh_CN/latest/13_envs/gym_hybrid_zh.html` + agent = PPOF(env='hybrid_moving', exp_name='./hybrid_moving_demo') + agent.train(step=int(5e6)) + +def hybrid_moving_deploy(): + agent = PPOF(env='hybrid_moving', exp_name='./hybrid_moving_demo') + agent.deploy(enable_save_replay=True) + +if __name__ == "__main__": + # You can select and run your favorite demo + # lunarlander_discrete() + # lunarlander_continuous() + # rocket_landing() + rocket_landing_deploy() + # drone_fly() + # hybrid_moving() + # hybrid_moving_deploy() diff --git a/my_homework/ch3/homework3.ipynb b/my_homework/ch3/homework3.ipynb new file mode 100644 index 0000000..866dfbf --- /dev/null +++ b/my_homework/ch3/homework3.ipynb @@ -0,0 +1,459 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 代码实践题目" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 题目1(奇偶数预测问题)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import torch.nn as nn\n", + "import math" + ] + }, + { + "cell_type": "code", + "execution_count": 128, + "metadata": {}, + "outputs": [], + "source": [ + "class OddAndEven(nn.Module):\n", + " def __init__(self) -> None:\n", + " super().__init__()\n", + "\n", + " # directly input data\n", + " self.net_direct = nn.Sequential(\n", + " nn.Linear(10, 128),\n", + " nn.ReLU(True),\n", + " nn.Linear(128, 2),\n", + " )\n", + "\n", + " # turn input to binary format,binary encoding length = 9\n", + " self.net_binary = nn.Sequential(\n", + " nn.Linear(10, 128),\n", + " nn.ReLU(True),\n", + " nn.Linear(128, 2),\n", + " )\n", + "\n", + " # use trigonometric function to encode,length = 9\n", + " self.net_trigo = nn.Sequential(\n", + " nn.Linear(10, 128),\n", + " nn.ReLU(True),\n", + " nn.Linear(128, 2),\n", + " )\n", + "\n", + " self.sigmoid = nn.Sigmoid()\n", + "\n", + "\n", + " def _binary_encode(self, x):\n", + " \n", + " def num2binary(x):\n", + " if x == 0:\n", + " return []\n", + " else:\n", + " temp = x % 2\n", + " l = num2binary(x//2)\n", + " l.append(temp)\n", + " return l\n", + "\n", + " binary_encode = []\n", + " for i in range(x.size(0)):\n", + " sub_1 = num2binary(x[i])\n", + " while len(sub_1) < 10: sub_1.insert(0, 0)\n", + " binary_encode.append(sub_1)\n", + "\n", + " return torch.tensor(binary_encode, dtype=torch.float)\n", + " \n", + " \n", + " def _trigo_encode(self, x):\n", + " # div_term = torch.exp(torch.arange(0, 10, 2) *\n", + " # -(math.log(10000.0) / 10)).to(x.device)\n", + " trigo_encode = torch.zeros(x.size(0), 10).to(x.device)\n", + " # trigo_encode[..., 0::2] = torch.sin(x * div_term)\n", + " # trigo_encode[..., 1::2] = torch.cos(x * div_term)\n", + " trigo_encode[..., 0::2] = torch.sin(x).repeat(1, 5)\n", + " trigo_encode[..., 1::2] = torch.cos(x).repeat(1, 5)\n", + "\n", + "\n", + " return trigo_encode.to(x.device)\n", + " \n", + " def forward(self, x):\n", + " binary_encode = self._binary_encode(x).to(x.device)\n", + " trigo_encode = self._trigo_encode(x).to(x.device)\n", + " origin_encode = x.repeat(1, 10).to(x.device)\n", + "\n", + " # binary_result = torch.argmax(self.sigmoid(self.net_direct(binary_encode)), dim=-1)\n", + " # trigo_result = torch.argmax(self.sigmoid(self.net_direct(trigo_encode)), dim=-1)\n", + " # origin_result = torch.argmax(self.sigmoid(self.net_direct(origin_encode)), dim=-1)\n", + " binary_result = self.net_binary(binary_encode)\n", + " trigo_result = self.net_trigo(trigo_encode)\n", + " origin_result = self.net_direct(origin_encode)\n", + "\n", + " return binary_result, trigo_result, origin_result\n" + ] + }, + { + "cell_type": "code", + "execution_count": 137, + "metadata": {}, + "outputs": [], + "source": [ + "def main():\n", + " batch_size = 64\n", + " epochs = 200\n", + " net = OddAndEven()\n", + " net.to(device='cuda')\n", + " net.train()\n", + " loss_fn = nn.CrossEntropyLoss()\n", + "\n", + " optimizer = torch.optim.Adam(net.parameters(),lr=1e-3)\n", + " \n", + " for epoch in range(epochs):\n", + " data = torch.randint(0, 1000, size=(batch_size, 1), dtype=torch.float).cuda()\n", + " label = (data % 2 == 0).long().cuda().squeeze()\n", + " b, t, o = net(data)\n", + " l1 = loss_fn(b, label)\n", + " l2 = loss_fn(t, label)\n", + " l3 = loss_fn(o, label)\n", + " \n", + " l1.backward()\n", + " l2.backward()\n", + " l3.backward()\n", + " optimizer.step()\n", + " optimizer.zero_grad()\n", + "\n", + " print(f\"epoch:[{epoch:>3d}/{epochs:>3d}] binary_loss:{l1} triangle_loss:{l2} origin_loss:{l3}\")\n", + " \n", + " print('----------------------------- Test -----------------------------' )\n", + " data = torch.randint(0, 10, size=(10, 1), dtype=torch.float).cuda()\n", + " label = (data % 2 == 0).long().cuda().squeeze()\n", + " b, t, o = net(data)\n", + " print('binary_result: ', torch.argmax(b, dim=-1))\n", + " print('triangle_result: ', torch.argmax(t, dim=-1))\n", + " print('origin_result: ', torch.argmax(o, dim=-1))\n", + " print('label: ', label)" + ] + }, + { + "cell_type": "code", + "execution_count": 138, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\73106\\AppData\\Local\\Temp\\ipykernel_51568\\3798698966.py:36: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').\n", + " l = num2binary(x//2)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "epoch:[ 0/200] binary_loss:0.7349872589111328 triangle_loss:0.6947663426399231 origin_loss:52.24180221557617\n", + "epoch:[ 1/200] binary_loss:0.683316171169281 triangle_loss:0.6998624801635742 origin_loss:28.34395408630371\n", + "epoch:[ 2/200] binary_loss:0.6706985235214233 triangle_loss:0.6971087455749512 origin_loss:9.654135704040527\n", + "epoch:[ 3/200] binary_loss:0.6902305483818054 triangle_loss:0.7240961790084839 origin_loss:13.732337951660156\n", + "epoch:[ 4/200] binary_loss:0.6652613878250122 triangle_loss:0.7089925408363342 origin_loss:22.04283905029297\n", + "epoch:[ 5/200] binary_loss:0.6852871775627136 triangle_loss:0.6977459788322449 origin_loss:23.781091690063477\n", + "epoch:[ 6/200] binary_loss:0.6469615697860718 triangle_loss:0.7086113691329956 origin_loss:18.387901306152344\n", + "epoch:[ 7/200] binary_loss:0.6761390566825867 triangle_loss:0.6878255009651184 origin_loss:11.120370864868164\n", + "epoch:[ 8/200] binary_loss:0.6498953700065613 triangle_loss:0.6898926496505737 origin_loss:3.8951528072357178\n", + "epoch:[ 9/200] binary_loss:0.6452895998954773 triangle_loss:0.6870980858802795 origin_loss:7.197264671325684\n", + "epoch:[ 10/200] binary_loss:0.6411842107772827 triangle_loss:0.6955468058586121 origin_loss:15.669432640075684\n", + "epoch:[ 11/200] binary_loss:0.6168311834335327 triangle_loss:0.691303014755249 origin_loss:17.784526824951172\n", + "epoch:[ 12/200] binary_loss:0.6193327903747559 triangle_loss:0.6875953674316406 origin_loss:15.500490188598633\n", + "epoch:[ 13/200] binary_loss:0.6365929841995239 triangle_loss:0.7006270885467529 origin_loss:15.674912452697754\n", + "epoch:[ 14/200] binary_loss:0.6062679886817932 triangle_loss:0.6948075294494629 origin_loss:5.189105987548828\n", + "epoch:[ 15/200] binary_loss:0.6067568063735962 triangle_loss:0.6975786685943604 origin_loss:3.1493356227874756\n", + "epoch:[ 16/200] binary_loss:0.608468770980835 triangle_loss:0.6939046382904053 origin_loss:7.675893306732178\n", + "epoch:[ 17/200] binary_loss:0.6114965081214905 triangle_loss:0.6975736021995544 origin_loss:8.78271198272705\n", + "epoch:[ 18/200] binary_loss:0.6031761169433594 triangle_loss:0.6943066716194153 origin_loss:8.025813102722168\n", + "epoch:[ 19/200] binary_loss:0.5792003273963928 triangle_loss:0.6954648494720459 origin_loss:6.359289169311523\n", + "epoch:[ 20/200] binary_loss:0.5873932838439941 triangle_loss:0.6948984265327454 origin_loss:1.4420090913772583\n", + "epoch:[ 21/200] binary_loss:0.576849102973938 triangle_loss:0.6919821500778198 origin_loss:4.236663341522217\n", + "epoch:[ 22/200] binary_loss:0.571341872215271 triangle_loss:0.6960058212280273 origin_loss:6.050136089324951\n", + "epoch:[ 23/200] binary_loss:0.5722900629043579 triangle_loss:0.6991226077079773 origin_loss:6.566732406616211\n", + "epoch:[ 24/200] binary_loss:0.5546126365661621 triangle_loss:0.7047006487846375 origin_loss:4.965843677520752\n", + "epoch:[ 25/200] binary_loss:0.564728319644928 triangle_loss:0.6888614296913147 origin_loss:4.809279441833496\n", + "epoch:[ 26/200] binary_loss:0.5243043303489685 triangle_loss:0.6944065093994141 origin_loss:1.078822374343872\n", + "epoch:[ 27/200] binary_loss:0.5426250100135803 triangle_loss:0.6965641975402832 origin_loss:5.407868385314941\n", + "epoch:[ 28/200] binary_loss:0.5398126244544983 triangle_loss:0.6974223852157593 origin_loss:4.551272392272949\n", + "epoch:[ 29/200] binary_loss:0.5203456282615662 triangle_loss:0.6939593553543091 origin_loss:2.7113089561462402\n", + "epoch:[ 30/200] binary_loss:0.5111867785453796 triangle_loss:0.6940088272094727 origin_loss:2.442976474761963\n", + "epoch:[ 31/200] binary_loss:0.5241106748580933 triangle_loss:0.6938029527664185 origin_loss:3.008760452270508\n", + "epoch:[ 32/200] binary_loss:0.5260311961174011 triangle_loss:0.6884706616401672 origin_loss:2.7661871910095215\n", + "epoch:[ 33/200] binary_loss:0.5120359063148499 triangle_loss:0.6904221177101135 origin_loss:0.7845602035522461\n", + "epoch:[ 34/200] binary_loss:0.48890921473503113 triangle_loss:0.6909964680671692 origin_loss:2.0516700744628906\n", + "epoch:[ 35/200] binary_loss:0.4827214777469635 triangle_loss:0.6962442994117737 origin_loss:1.3384929895401\n", + "epoch:[ 36/200] binary_loss:0.48885440826416016 triangle_loss:0.7017158269882202 origin_loss:2.092763900756836\n", + "epoch:[ 37/200] binary_loss:0.4982365667819977 triangle_loss:0.6895340085029602 origin_loss:2.3815908432006836\n", + "epoch:[ 38/200] binary_loss:0.4856633245944977 triangle_loss:0.6928762793540955 origin_loss:0.7130910754203796\n", + "epoch:[ 39/200] binary_loss:0.48265963792800903 triangle_loss:0.6963449716567993 origin_loss:2.2626967430114746\n", + "epoch:[ 40/200] binary_loss:0.454795777797699 triangle_loss:0.7114068269729614 origin_loss:2.2706518173217773\n", + "epoch:[ 41/200] binary_loss:0.4513421952724457 triangle_loss:0.6965122222900391 origin_loss:1.0171977281570435\n", + "epoch:[ 42/200] binary_loss:0.4239877164363861 triangle_loss:0.6824066042900085 origin_loss:3.3366150856018066\n", + "epoch:[ 43/200] binary_loss:0.44531551003456116 triangle_loss:0.6963104605674744 origin_loss:2.4402527809143066\n", + "epoch:[ 44/200] binary_loss:0.442457914352417 triangle_loss:0.6910649538040161 origin_loss:1.375940203666687\n", + "epoch:[ 45/200] binary_loss:0.42885127663612366 triangle_loss:0.6793301105499268 origin_loss:1.1944801807403564\n", + "epoch:[ 46/200] binary_loss:0.41018545627593994 triangle_loss:0.6838625073432922 origin_loss:2.426095485687256\n", + "epoch:[ 47/200] binary_loss:0.4177663326263428 triangle_loss:0.7011746168136597 origin_loss:2.045269250869751\n", + "epoch:[ 48/200] binary_loss:0.4092172682285309 triangle_loss:0.6895523071289062 origin_loss:0.7601649761199951\n", + "epoch:[ 49/200] binary_loss:0.4036576449871063 triangle_loss:0.6964073181152344 origin_loss:3.113070249557495\n", + "epoch:[ 50/200] binary_loss:0.4094778299331665 triangle_loss:0.7039382457733154 origin_loss:3.12691068649292\n", + "epoch:[ 51/200] binary_loss:0.38065823912620544 triangle_loss:0.70387202501297 origin_loss:0.9295028448104858\n", + "epoch:[ 52/200] binary_loss:0.38542598485946655 triangle_loss:0.689373791217804 origin_loss:2.0076301097869873\n", + "epoch:[ 53/200] binary_loss:0.3746945261955261 triangle_loss:0.6892282962799072 origin_loss:1.2521824836730957\n", + "epoch:[ 54/200] binary_loss:0.3558580279350281 triangle_loss:0.7045082449913025 origin_loss:1.554581642150879\n", + "epoch:[ 55/200] binary_loss:0.3838288187980652 triangle_loss:0.6708875298500061 origin_loss:2.4315130710601807\n", + "epoch:[ 56/200] binary_loss:0.3504653573036194 triangle_loss:0.6960404515266418 origin_loss:1.1471725702285767\n", + "epoch:[ 57/200] binary_loss:0.35902509093284607 triangle_loss:0.6887332201004028 origin_loss:1.36702299118042\n", + "epoch:[ 58/200] binary_loss:0.35262101888656616 triangle_loss:0.7100775837898254 origin_loss:1.0076611042022705\n", + "epoch:[ 59/200] binary_loss:0.33532989025115967 triangle_loss:0.7020472884178162 origin_loss:2.4345083236694336\n", + "epoch:[ 60/200] binary_loss:0.32175207138061523 triangle_loss:0.6896390914916992 origin_loss:2.259103775024414\n", + "epoch:[ 61/200] binary_loss:0.31157442927360535 triangle_loss:0.6837162375450134 origin_loss:0.7770151495933533\n", + "epoch:[ 62/200] binary_loss:0.3145330250263214 triangle_loss:0.7004241347312927 origin_loss:3.46669340133667\n", + "epoch:[ 63/200] binary_loss:0.3079836070537567 triangle_loss:0.697003185749054 origin_loss:2.5996756553649902\n", + "epoch:[ 64/200] binary_loss:0.3224138915538788 triangle_loss:0.6847421526908875 origin_loss:1.2387276887893677\n", + "epoch:[ 65/200] binary_loss:0.304223895072937 triangle_loss:0.6960766315460205 origin_loss:2.0109214782714844\n", + "epoch:[ 66/200] binary_loss:0.2903444170951843 triangle_loss:0.69034343957901 origin_loss:3.183044672012329\n", + "epoch:[ 67/200] binary_loss:0.2922017276287079 triangle_loss:0.6900668144226074 origin_loss:0.8038842678070068\n", + "epoch:[ 68/200] binary_loss:0.2764824330806732 triangle_loss:0.6822840571403503 origin_loss:1.5272748470306396\n", + "epoch:[ 69/200] binary_loss:0.2803954482078552 triangle_loss:0.6820774674415588 origin_loss:2.048795461654663\n", + "epoch:[ 70/200] binary_loss:0.27685508131980896 triangle_loss:0.6681204438209534 origin_loss:0.7206596732139587\n", + "epoch:[ 71/200] binary_loss:0.2825746238231659 triangle_loss:0.6895530819892883 origin_loss:1.203648328781128\n", + "epoch:[ 72/200] binary_loss:0.267378568649292 triangle_loss:0.6949990391731262 origin_loss:0.6912379860877991\n", + "epoch:[ 73/200] binary_loss:0.2624054253101349 triangle_loss:0.7102674245834351 origin_loss:1.3324981927871704\n", + "epoch:[ 74/200] binary_loss:0.2592426538467407 triangle_loss:0.6956061720848083 origin_loss:0.8078610301017761\n", + "epoch:[ 75/200] binary_loss:0.2388114184141159 triangle_loss:0.7085279226303101 origin_loss:0.7872973084449768\n", + "epoch:[ 76/200] binary_loss:0.2498103678226471 triangle_loss:0.7143706679344177 origin_loss:0.7895103096961975\n", + "epoch:[ 77/200] binary_loss:0.2495998740196228 triangle_loss:0.6946998834609985 origin_loss:0.7280941605567932\n", + "epoch:[ 78/200] binary_loss:0.2221311777830124 triangle_loss:0.7052234411239624 origin_loss:1.0244276523590088\n", + "epoch:[ 79/200] binary_loss:0.2232351005077362 triangle_loss:0.703346312046051 origin_loss:0.7086902260780334\n", + "epoch:[ 80/200] binary_loss:0.2105594277381897 triangle_loss:0.6976587176322937 origin_loss:0.7661758065223694\n", + "epoch:[ 81/200] binary_loss:0.2023281455039978 triangle_loss:0.6943191289901733 origin_loss:0.9605141878128052\n", + "epoch:[ 82/200] binary_loss:0.22747276723384857 triangle_loss:0.6962918639183044 origin_loss:0.6980372667312622\n", + "epoch:[ 83/200] binary_loss:0.20634843409061432 triangle_loss:0.6876935958862305 origin_loss:0.8451524972915649\n", + "epoch:[ 84/200] binary_loss:0.19815467298030853 triangle_loss:0.6919587254524231 origin_loss:0.8076283931732178\n", + "epoch:[ 85/200] binary_loss:0.2119293510913849 triangle_loss:0.695551335811615 origin_loss:0.6835743188858032\n", + "epoch:[ 86/200] binary_loss:0.18542352318763733 triangle_loss:0.6960914134979248 origin_loss:0.7016379237174988\n", + "epoch:[ 87/200] binary_loss:0.20857815444469452 triangle_loss:0.688342809677124 origin_loss:0.7000477910041809\n", + "epoch:[ 88/200] binary_loss:0.18668517470359802 triangle_loss:0.6933719515800476 origin_loss:0.6942405104637146\n", + "epoch:[ 89/200] binary_loss:0.17689242959022522 triangle_loss:0.6969907879829407 origin_loss:0.725574254989624\n", + "epoch:[ 90/200] binary_loss:0.1765035092830658 triangle_loss:0.6943527460098267 origin_loss:0.7483500242233276\n", + "epoch:[ 91/200] binary_loss:0.16177265346050262 triangle_loss:0.6898742914199829 origin_loss:0.7039273977279663\n", + "epoch:[ 92/200] binary_loss:0.1863662153482437 triangle_loss:0.6932089328765869 origin_loss:0.6911659836769104\n", + "epoch:[ 93/200] binary_loss:0.17570847272872925 triangle_loss:0.6944540739059448 origin_loss:0.7154741287231445\n", + "epoch:[ 94/200] binary_loss:0.16655488312244415 triangle_loss:0.6921955943107605 origin_loss:0.7207168340682983\n", + "epoch:[ 95/200] binary_loss:0.16621196269989014 triangle_loss:0.6977362036705017 origin_loss:0.695478618144989\n", + "epoch:[ 96/200] binary_loss:0.15088799595832825 triangle_loss:0.6874958872795105 origin_loss:0.6733831167221069\n", + "epoch:[ 97/200] binary_loss:0.14286692440509796 triangle_loss:0.6912994384765625 origin_loss:0.8340612053871155\n", + "epoch:[ 98/200] binary_loss:0.14626899361610413 triangle_loss:0.6969191431999207 origin_loss:0.951200008392334\n", + "epoch:[ 99/200] binary_loss:0.13898897171020508 triangle_loss:0.6898265480995178 origin_loss:0.7024137377738953\n", + "epoch:[100/200] binary_loss:0.12772198021411896 triangle_loss:0.6935615539550781 origin_loss:0.8891034722328186\n", + "epoch:[101/200] binary_loss:0.13615547120571136 triangle_loss:0.6902524828910828 origin_loss:0.876529335975647\n", + "epoch:[102/200] binary_loss:0.1368696093559265 triangle_loss:0.6901503801345825 origin_loss:1.7799768447875977\n", + "epoch:[103/200] binary_loss:0.11514176428318024 triangle_loss:0.6901337504386902 origin_loss:1.1336190700531006\n", + "epoch:[104/200] binary_loss:0.14130844175815582 triangle_loss:0.687634289264679 origin_loss:2.2398648262023926\n", + "epoch:[105/200] binary_loss:0.12069921940565109 triangle_loss:0.692103385925293 origin_loss:2.6389667987823486\n", + "epoch:[106/200] binary_loss:0.11108091473579407 triangle_loss:0.6862013936042786 origin_loss:1.5090280771255493\n", + "epoch:[107/200] binary_loss:0.11332057416439056 triangle_loss:0.6919528245925903 origin_loss:2.4566643238067627\n", + "epoch:[108/200] binary_loss:0.10871178656816483 triangle_loss:0.687377393245697 origin_loss:5.361363410949707\n", + "epoch:[109/200] binary_loss:0.1323341578245163 triangle_loss:0.6944746375083923 origin_loss:3.039844036102295\n", + "epoch:[110/200] binary_loss:0.095506452023983 triangle_loss:0.7080479860305786 origin_loss:1.127760648727417\n", + "epoch:[111/200] binary_loss:0.11259392648935318 triangle_loss:0.6918506622314453 origin_loss:2.388592481613159\n", + "epoch:[112/200] binary_loss:0.10051442682743073 triangle_loss:0.7013694047927856 origin_loss:0.7327517867088318\n", + "epoch:[113/200] binary_loss:0.09991258382797241 triangle_loss:0.6949799060821533 origin_loss:1.8605810403823853\n", + "epoch:[114/200] binary_loss:0.10168365389108658 triangle_loss:0.6933864951133728 origin_loss:2.4932992458343506\n", + "epoch:[115/200] binary_loss:0.08752864599227905 triangle_loss:0.6823626756668091 origin_loss:0.7203071117401123\n", + "epoch:[116/200] binary_loss:0.08649759739637375 triangle_loss:0.6858214735984802 origin_loss:2.1679165363311768\n", + "epoch:[117/200] binary_loss:0.08971147239208221 triangle_loss:0.6870915293693542 origin_loss:3.2604293823242188\n", + "epoch:[118/200] binary_loss:0.08288431912660599 triangle_loss:0.6923970580101013 origin_loss:0.7936244010925293\n", + "epoch:[119/200] binary_loss:0.0842982828617096 triangle_loss:0.7073819041252136 origin_loss:2.997469663619995\n", + "epoch:[120/200] binary_loss:0.09680286794900894 triangle_loss:0.696508526802063 origin_loss:3.780165433883667\n", + "epoch:[121/200] binary_loss:0.08652789145708084 triangle_loss:0.6985299587249756 origin_loss:1.7138829231262207\n", + "epoch:[122/200] binary_loss:0.07835863530635834 triangle_loss:0.6903572082519531 origin_loss:2.6759748458862305\n", + "epoch:[123/200] binary_loss:0.08530818670988083 triangle_loss:0.6854627132415771 origin_loss:3.169684410095215\n", + "epoch:[124/200] binary_loss:0.08764059841632843 triangle_loss:0.7012335062026978 origin_loss:1.042251706123352\n", + "epoch:[125/200] binary_loss:0.08478764444589615 triangle_loss:0.7063227295875549 origin_loss:2.8906428813934326\n", + "epoch:[126/200] binary_loss:0.07852668315172195 triangle_loss:0.7049480080604553 origin_loss:5.6561198234558105\n", + "epoch:[127/200] binary_loss:0.0811678022146225 triangle_loss:0.6802017688751221 origin_loss:3.109095573425293\n", + "epoch:[128/200] binary_loss:0.07246676087379456 triangle_loss:0.6935749650001526 origin_loss:0.7487837076187134\n", + "epoch:[129/200] binary_loss:0.07044298201799393 triangle_loss:0.7022777199745178 origin_loss:3.2509870529174805\n", + "epoch:[130/200] binary_loss:0.07041819393634796 triangle_loss:0.7126251459121704 origin_loss:3.229290008544922\n", + "epoch:[131/200] binary_loss:0.06961921602487564 triangle_loss:0.691372811794281 origin_loss:5.487561225891113\n", + "epoch:[132/200] binary_loss:0.06656275689601898 triangle_loss:0.6888192296028137 origin_loss:1.9402388334274292\n", + "epoch:[133/200] binary_loss:0.0645320937037468 triangle_loss:0.68475741147995 origin_loss:3.5155093669891357\n", + "epoch:[134/200] binary_loss:0.06718369573354721 triangle_loss:0.7006494402885437 origin_loss:7.323747158050537\n", + "epoch:[135/200] binary_loss:0.06353143602609634 triangle_loss:0.6876631379127502 origin_loss:6.278201580047607\n", + "epoch:[136/200] binary_loss:0.06759455800056458 triangle_loss:0.6961510181427002 origin_loss:5.258127212524414\n", + "epoch:[137/200] binary_loss:0.060593247413635254 triangle_loss:0.6949125528335571 origin_loss:0.8664026856422424\n", + "epoch:[138/200] binary_loss:0.06355620920658112 triangle_loss:0.6999309062957764 origin_loss:3.676255941390991\n", + "epoch:[139/200] binary_loss:0.0517202764749527 triangle_loss:0.6957522630691528 origin_loss:6.86445951461792\n", + "epoch:[140/200] binary_loss:0.05425957217812538 triangle_loss:0.6924275755882263 origin_loss:6.270211219787598\n", + "epoch:[141/200] binary_loss:0.05130635201931 triangle_loss:0.6964737176895142 origin_loss:7.5930938720703125\n", + "epoch:[142/200] binary_loss:0.040976837277412415 triangle_loss:0.6947215795516968 origin_loss:3.7717576026916504\n", + "epoch:[143/200] binary_loss:0.06092624366283417 triangle_loss:0.6899179816246033 origin_loss:3.885256052017212\n", + "epoch:[144/200] binary_loss:0.05417744815349579 triangle_loss:0.6956518888473511 origin_loss:6.35058069229126\n", + "epoch:[145/200] binary_loss:0.04716465622186661 triangle_loss:0.6922026872634888 origin_loss:8.708785057067871\n", + "epoch:[146/200] binary_loss:0.03843593969941139 triangle_loss:0.6927590370178223 origin_loss:5.383594512939453\n", + "epoch:[147/200] binary_loss:0.05326106771826744 triangle_loss:0.6861076951026917 origin_loss:2.6733367443084717\n", + "epoch:[148/200] binary_loss:0.06043548136949539 triangle_loss:0.6890475153923035 origin_loss:2.419140577316284\n", + "epoch:[149/200] binary_loss:0.04881884902715683 triangle_loss:0.6953604817390442 origin_loss:3.814293622970581\n", + "epoch:[150/200] binary_loss:0.04605145752429962 triangle_loss:0.6958069205284119 origin_loss:3.042513132095337\n", + "epoch:[151/200] binary_loss:0.041860319674015045 triangle_loss:0.696122944355011 origin_loss:0.7048391699790955\n", + "epoch:[152/200] binary_loss:0.037437401711940765 triangle_loss:0.6884347796440125 origin_loss:2.256472587585449\n", + "epoch:[153/200] binary_loss:0.0400431789457798 triangle_loss:0.6935689449310303 origin_loss:4.893984317779541\n", + "epoch:[154/200] binary_loss:0.03959518298506737 triangle_loss:0.6866165995597839 origin_loss:1.3660632371902466\n", + "epoch:[155/200] binary_loss:0.052060458809137344 triangle_loss:0.7079979777336121 origin_loss:1.760554552078247\n", + "epoch:[156/200] binary_loss:0.037945687770843506 triangle_loss:0.6789488196372986 origin_loss:5.268474578857422\n", + "epoch:[157/200] binary_loss:0.041535403579473495 triangle_loss:0.6868088841438293 origin_loss:3.6021039485931396\n", + "epoch:[158/200] binary_loss:0.04050735756754875 triangle_loss:0.6895782351493835 origin_loss:0.789107620716095\n", + "epoch:[159/200] binary_loss:0.038463253527879715 triangle_loss:0.693579375743866 origin_loss:3.370272397994995\n", + "epoch:[160/200] binary_loss:0.03212359547615051 triangle_loss:0.6959198117256165 origin_loss:1.5537272691726685\n", + "epoch:[161/200] binary_loss:0.037256937474012375 triangle_loss:0.7071434855461121 origin_loss:1.950494647026062\n", + "epoch:[162/200] binary_loss:0.03797942027449608 triangle_loss:0.6938918828964233 origin_loss:3.242302656173706\n", + "epoch:[163/200] binary_loss:0.041252825409173965 triangle_loss:0.7100152969360352 origin_loss:0.6823515892028809\n", + "epoch:[164/200] binary_loss:0.03435976058244705 triangle_loss:0.6873098015785217 origin_loss:1.7277344465255737\n", + "epoch:[165/200] binary_loss:0.032651349902153015 triangle_loss:0.6913737058639526 origin_loss:1.5551472902297974\n", + "epoch:[166/200] binary_loss:0.03461290895938873 triangle_loss:0.6935893297195435 origin_loss:1.4802002906799316\n", + "epoch:[167/200] binary_loss:0.04245539754629135 triangle_loss:0.6951685547828674 origin_loss:1.8507494926452637\n", + "epoch:[168/200] binary_loss:0.03427766636013985 triangle_loss:0.6920654773712158 origin_loss:1.2124189138412476\n", + "epoch:[169/200] binary_loss:0.03074006922543049 triangle_loss:0.6985407471656799 origin_loss:3.914273977279663\n", + "epoch:[170/200] binary_loss:0.03159628435969353 triangle_loss:0.6887461543083191 origin_loss:2.1902830600738525\n", + "epoch:[171/200] binary_loss:0.028655314818024635 triangle_loss:0.6959819793701172 origin_loss:0.6954247951507568\n", + "epoch:[172/200] binary_loss:0.03784453496336937 triangle_loss:0.6983382701873779 origin_loss:2.2794580459594727\n", + "epoch:[173/200] binary_loss:0.030401241034269333 triangle_loss:0.6895791888237 origin_loss:2.2745752334594727\n", + "epoch:[174/200] binary_loss:0.02523604966700077 triangle_loss:0.6969538927078247 origin_loss:0.7948993444442749\n", + "epoch:[175/200] binary_loss:0.02768283523619175 triangle_loss:0.6914940476417542 origin_loss:2.5793118476867676\n", + "epoch:[176/200] binary_loss:0.027413194999098778 triangle_loss:0.7008165121078491 origin_loss:3.107913017272949\n", + "epoch:[177/200] binary_loss:0.025515709072351456 triangle_loss:0.6954799890518188 origin_loss:1.4279037714004517\n", + "epoch:[178/200] binary_loss:0.028135864064097404 triangle_loss:0.6901707649230957 origin_loss:2.110584259033203\n", + "epoch:[179/200] binary_loss:0.03260519728064537 triangle_loss:0.688378095626831 origin_loss:3.9851977825164795\n", + "epoch:[180/200] binary_loss:0.024494122713804245 triangle_loss:0.694216251373291 origin_loss:3.6094977855682373\n", + "epoch:[181/200] binary_loss:0.024996260181069374 triangle_loss:0.6849022507667542 origin_loss:0.7197354435920715\n", + "epoch:[182/200] binary_loss:0.025226108729839325 triangle_loss:0.6919670104980469 origin_loss:2.90372371673584\n", + "epoch:[183/200] binary_loss:0.030825594440102577 triangle_loss:0.697807788848877 origin_loss:2.219836711883545\n", + "epoch:[184/200] binary_loss:0.02299787849187851 triangle_loss:0.6935791969299316 origin_loss:0.9706622958183289\n", + "epoch:[185/200] binary_loss:0.022575298324227333 triangle_loss:0.6921379566192627 origin_loss:3.2727930545806885\n", + "epoch:[186/200] binary_loss:0.024245237931609154 triangle_loss:0.6936569213867188 origin_loss:2.564678192138672\n", + "epoch:[187/200] binary_loss:0.02511601336300373 triangle_loss:0.6916142106056213 origin_loss:1.6069061756134033\n", + "epoch:[188/200] binary_loss:0.02285928465425968 triangle_loss:0.6886249780654907 origin_loss:1.2139039039611816\n", + "epoch:[189/200] binary_loss:0.026819679886102676 triangle_loss:0.6917797923088074 origin_loss:2.4676527976989746\n", + "epoch:[190/200] binary_loss:0.022400056943297386 triangle_loss:0.6969032883644104 origin_loss:1.6492072343826294\n", + "epoch:[191/200] binary_loss:0.02470509707927704 triangle_loss:0.6954671144485474 origin_loss:3.0599381923675537\n", + "epoch:[192/200] binary_loss:0.0231486689299345 triangle_loss:0.6977156400680542 origin_loss:4.135528564453125\n", + "epoch:[193/200] binary_loss:0.024425290524959564 triangle_loss:0.6975620985031128 origin_loss:3.454710006713867\n", + "epoch:[194/200] binary_loss:0.023887356743216515 triangle_loss:0.6981842517852783 origin_loss:1.3243358135223389\n", + "epoch:[195/200] binary_loss:0.018893973901867867 triangle_loss:0.6980413198471069 origin_loss:2.4780077934265137\n", + "epoch:[196/200] binary_loss:0.02293287217617035 triangle_loss:0.6993746757507324 origin_loss:2.5406863689422607\n", + "epoch:[197/200] binary_loss:0.020013099536299706 triangle_loss:0.6974712610244751 origin_loss:0.8364153504371643\n", + "epoch:[198/200] binary_loss:0.01902010291814804 triangle_loss:0.6927831172943115 origin_loss:3.662505626678467\n", + "epoch:[199/200] binary_loss:0.02204802818596363 triangle_loss:0.6838313937187195 origin_loss:3.501659870147705\n", + "----------------------------- Test -----------------------------\n", + "binary_result: tensor([1, 0, 1, 1, 1, 0, 0, 0, 0, 1], device='cuda:0')\n", + "triangle_result: tensor([0, 0, 1, 0, 1, 0, 0, 1, 1, 0], device='cuda:0')\n", + "origin_result: tensor([1, 0, 1, 1, 1, 1, 0, 0, 0, 1], device='cuda:0')\n", + "label: tensor([1, 0, 1, 1, 1, 0, 0, 0, 0, 1], device='cuda:0')\n" + ] + } + ], + "source": [ + "main()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 总结\n", + "\n", + "### 结果\n", + "\n", + "![](demo/homework3_2_result.png)\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 设计\n", + "\n", + "这里为了网络参数公平,另编码长度保证相同,三种方式如下\n", + "\n", + "- 二进制方法:对 [0,999] 范围的数转变成 10 位二进制数\n", + "- 三角函数法:对 sin、cos 进行交替编码,分别尝试了 postion_encoding、直接对原来的数进行 cos、sin\n", + "- 直接数字法:将输入数字重复 10 次" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 分析\n", + "针对三种方案,其中 `二进制` 的方式是可行的\n", + "\n", + "- `二进制`方法:因为二进制最后一位数与数的奇偶性具有强相关性,故经过迭代基本能够无误实现判断\n", + "\n", + "- `三角函数`方法:按道理来说,三角函数具有周期性,只要能够学到周期为1,那应该是能够收敛的,但是我在测试的时候并没有收敛,可能是我设计的编码他的周期不为1,导致出现了奇偶模糊,因为 position_encoding 中有 $PE_{t+\\Delta t} = T_{\\Delta t} PE_{t}$ 的变换关系\n", + "\n", + "- `直接数字`方法:完全不收敛\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 题目2(应用实践" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![](demo/homework3_2.png)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "torch11", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/my_homework/ch3/homework3_2.py b/my_homework/ch3/homework3_2.py new file mode 100644 index 0000000..336c790 --- /dev/null +++ b/my_homework/ch3/homework3_2.py @@ -0,0 +1,68 @@ +# Please install latest DI-engine's main branch first +from ding.bonus import PPOF + + +def bipedalwalker(): + # Please install bipedalwalker env first, `pip3 install box2d` + # You can refer to the env doc (https://di-engine-docs.readthedocs.io/zh_CN/latest/13_envs/bipedalwalker_zh.html) for more details + agent = PPOF(env='bipedalwalker', exp_name='./bipedalwalker_demo') + agent.train(step=int(1e6)) + # Classic RL interaction loop and save replay video +def bipedalwalker_deploy(): + agent = PPOF(env='bipedalwalker', exp_name='./bipedalwalker_demo') + agent.deploy(ckpt_path="bipedalwalker_demo\ckpt\iteration_155680.pth.tar",enable_save_replay=True) + + +def evogym_carrier(): + # Please install evogym env first, refer to its doc (https://github.com/EvolutionGym/evogym#installation) + # Or you can use our provided docker (opendilab/ding:nightly-evogym) + # You can refer to the env doc (https://di-engine-docs.readthedocs.io/zh_CN/latest/13_envs/Evogym_zh.html) for more details + agent = PPOF(env='evogym_carrier', exp_name='./evogym_carrier_demo') + agent.train(step=int(1e6)) + + +def mario(): + # Please install mario env first, `pip install gym-super-mario-bros` + # You can refer to the env doc (https://di-engine-docs.readthedocs.io/zh_CN/latest/13_envs/gym_super_mario_bros_zh.html) for more details + agent = PPOF(env='mario', exp_name='./mario_demo') + agent.train(step=int(3e6)) + +def mario_deploy(): + agent = PPOF(env='mario', exp_name='./mario_demo') + agent.deploy(enable_save_replay=True) + + +def di_sheep(): + # Please prepare di_sheep env and modelfirst, you can copy the env and model file to to current directory, + # which are placed in https://github.com/opendilab/DI-sheep/blob/master/service + from sheep_env import SheepEnv + from sheep_model import SheepModel + env = SheepEnv(level=9) + obs_space = env.observation_space + model = SheepModel( + item_obs_size=obs_space['item_obs'].shape[1], + item_num=obs_space['item_obs'].shape[0], + item_encoder_type='TF', + bucket_obs_size=obs_space['bucket_obs'].shape[0], + global_obs_size=obs_space['global_obs'].shape[0], + ttorch_return=True, + ) + agent = PPOF(env='di_sheep', exp_name='./di_sheep_demo', model=model) + agent.train(step=int(1e6)) + + +def procgen_bigfish(): + # Please install procgen env first, `pip install procgen` + # You can refer to the env doc (https://di-engine-docs.readthedocs.io/zh_CN/latest/13_envs/procgen_zh.html) for more details + agent = PPOF(env='procgen_bigfish', exp_name='./procgen_bigfish_demo') + agent.train(step=int(1e7)) + + +if __name__ == "__main__": + # You can select and run your favorite demo + # bipedalwalker() + bipedalwalker_deploy() + # evogym_carrier() + # mario() + # di_sheep() + # procgen_bigfish() \ No newline at end of file diff --git a/my_homework/ch4/home_analyse.ipynb b/my_homework/ch4/home_analyse.ipynb new file mode 100644 index 0000000..40d14ae --- /dev/null +++ b/my_homework/ch4/home_analyse.ipynb @@ -0,0 +1,113 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# CH4 " + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## homework 1" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![](demo/image1.png)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Result" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### small\n", + "![](demo/small.png)\n", + "#### little\n", + "![](demo/little.png)\n", + "#### standard\n", + "![](demo/standard.png)\n", + "#### large\n", + "![](demo/large.png)\n", + "#### very large\n", + "![](demo/verylarge.png)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Analyse" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "从上面不同模型大小的输出图像分析模型阐述 RND 网络的过拟合和欠拟合问题\n", + "\n", + "可以看到在模型参数较小的时候,整体的 RND 网络的 reward 是非常大的,说明模型随机性很大,出现欠拟合\n", + "\n", + "随着模型参数量的增加 mse 和 reward 都在往下降,但是当模型逐渐转变到 large 和 very large 的时候 reward_min 无法收敛,发生了过拟合" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## homework 2 " + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![](demo/image2.png)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "由于没法制作视频,好像是 metadrive 需要的是 gym=0.19.0 的环境,我安装之后发现运行 deploy 成功但是没法记录视频,因此只能够上传 log" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "log_path = [run-20230312_134251-g1s73ewz]()" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/my_homework/ch4/homework4.py b/my_homework/ch4/homework4.py new file mode 100644 index 0000000..cbac48a --- /dev/null +++ b/my_homework/ch4/homework4.py @@ -0,0 +1,55 @@ +# Please install latest DI-engine's main branch first +from ding.bonus import PPOF + + +def acrobot(): + # Please install acrobot env first, `pip3 install gym` + # You can refer to the env doc (https://di-engine-docs.readthedocs.io/zh_CN/latest/13_envs/acrobot_zh.html) for more details + agent = PPOF(env='acrobot', exp_name='output/ch4/acrobot_demo') + agent.train(step=int(1e5)) + +def acrobot_deploy(): + # Please install acrobot env first, `pip3 install gym` + # You can refer to the env doc (https://di-engine-docs.readthedocs.io/zh_CN/latest/13_envs/acrobot_zh.html) for more details + agent = PPOF(env='acrobot', exp_name='output/ch4/acrobot_demo') + agent.deploy(enable_save_replay=True) + + +def metadrive(): + # Please install metadrive env first, `pip install metadrive-simulator` + # You can refer to the env doc (https://di-engine-docs.readthedocs.io/zh_CN/latest/13_envs/metadrive_zh.html) for more details + agent = PPOF(env='metadrive', exp_name='output/ch4/metadrive_demo') + agent.train(step=int(1e6), context='spawn') + +def metadrive_deploy(): + agent = PPOF(env='metadrive', exp_name='output/ch4/metadrive_demo') + agent.deploy(enable_save_replay=True) + +def metadrive_install_test(): + from metadrive import MetaDriveEnv + env = MetaDriveEnv() + obs = env.reset() + print(obs.shape) # 输出 (259,) + +def minigrid_fourroom(): + # Please install minigrid env first, `pip install gym-minigrid` + # Note: minigrid env doesn't support Windows platform + # You can refer to the env doc (https://di-engine-docs.readthedocs.io/zh_CN/latest/13_envs/minigrid_zh.html) for more details + agent = PPOF(env='minigrid_fourroom', exp_name='output/ch4/minigrid_fourroom') + agent.train(step=int(3e6)) + + +def minigrid_fourroom_deploy(): + agent = PPOF(env='minigrid_fourroom', exp_name='output/ch4/minigrid_fourroom') + agent.deploy(enable_save_replay=True) + + + + +if __name__ == "__main__": + # acrobot() + # acrobot_deploy() + # metadrive_install_test() + metadrive() + # metadrive_deploy() + # minigrid_fourroom() \ No newline at end of file diff --git a/my_homework/ch4/ppof_ch4_code_p1.py b/my_homework/ch4/ppof_ch4_code_p1.py new file mode 100644 index 0000000..57bc778 --- /dev/null +++ b/my_homework/ch4/ppof_ch4_code_p1.py @@ -0,0 +1,326 @@ +# pip install minigrid +import os.path +from typing import Union, Tuple, Dict, List, Optional +from multiprocessing import Process +import multiprocessing as mp +import random +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim +import minigrid +import gymnasium as gym +from tqdm import tqdm +from torch.optim.lr_scheduler import ExponentialLR, MultiStepLR +from tensorboardX import SummaryWriter +from minigrid.wrappers import FlatObsWrapper + +random.seed(0) +np.random.seed(0) +torch.manual_seed(0) +if torch.cuda.is_available(): + device = torch.device("cuda:0") +else: + device = torch.device("cpu") + +train_config = dict( + train_iter=1024, + train_data_count=128, + test_data_count=4096, +) + +little_RND_net_config = dict( + exp_name="output/ch4/little_rnd_network", + observation_shape=2835, + hidden_size_list=[32, 16], + learning_rate=1e-3, + batch_size=64, + update_per_collect=100, + obs_norm=True, + obs_norm_clamp_min=-1, + obs_norm_clamp_max=1, + reward_mse_ratio=1e5, +) + +small_RND_net_config = dict( + exp_name="output/ch4/small_rnd_network", + observation_shape=2835, + hidden_size_list=[64, 64], + learning_rate=1e-3, + batch_size=64, + update_per_collect=100, + obs_norm=True, + obs_norm_clamp_min=-1, + obs_norm_clamp_max=1, + reward_mse_ratio=1e5, +) + +standard_RND_net_config = dict( + exp_name="output/ch4/standard_rnd_network", + observation_shape=2835, + hidden_size_list=[128, 64], + learning_rate=1e-3, + batch_size=64, + update_per_collect=100, + obs_norm=True, + obs_norm_clamp_min=-1, + obs_norm_clamp_max=1, + reward_mse_ratio=1e5, +) + +large_RND_net_config = dict( + exp_name="output/ch4/large_RND_network", + observation_shape=2835, + hidden_size_list=[256, 256], + learning_rate=1e-3, + batch_size=64, + update_per_collect=100, + obs_norm=True, + obs_norm_clamp_min=-1, + obs_norm_clamp_max=1, + reward_mse_ratio=1e5, +) + +very_large_RND_net_config = dict( + exp_name="output/ch4/very_large_RND_network", + observation_shape=2835, + hidden_size_list=[512, 512], + learning_rate=1e-3, + batch_size=64, + update_per_collect=100, + obs_norm=True, + obs_norm_clamp_min=-1, + obs_norm_clamp_max=1, + reward_mse_ratio=1e5, +) + +class FCEncoder(nn.Module): + def __init__( + self, + obs_shape: int, + hidden_size_list, + activation: Optional[nn.Module] = nn.ReLU(), + ) -> None: + super(FCEncoder, self).__init__() + self.obs_shape = obs_shape + self.act = activation + self.init = nn.Linear(obs_shape, hidden_size_list[0]) + + layers = [] + for i in range(len(hidden_size_list) - 1): + layers.append(nn.Linear(hidden_size_list[i], hidden_size_list[i + 1])) + layers.append(self.act) + self.main = nn.Sequential(*layers) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + x = self.act(self.init(x)) + x = self.main(x) + return x + +class RndNetwork(nn.Module): + def __init__(self, obs_shape: Union[int, list], hidden_size_list: list) -> None: + super(RndNetwork, self).__init__() + self.target = FCEncoder(obs_shape, hidden_size_list) + self.predictor = FCEncoder(obs_shape, hidden_size_list) + + for param in self.target.parameters(): + param.requires_grad = False + + def forward(self, obs: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: + predict_feature = self.predictor(obs) + with torch.no_grad(): + target_feature = self.target(obs) + return predict_feature, target_feature + +class RunningMeanStd(object): + def __init__(self, epsilon=1e-4, shape=(), device=torch.device('cpu')): + self._epsilon = epsilon + self._shape = shape + self._device = device + self.reset() + + def update(self, x): + batch_mean = np.mean(x, axis=0) + batch_var = np.var(x, axis=0) + batch_count = x.shape[0] + + new_count = batch_count + self._count + mean_delta = batch_mean - self._mean + new_mean = self._mean + mean_delta * batch_count / new_count + # this method for calculating new variable might be numerically unstable + m_a = self._var * self._count + m_b = batch_var * batch_count + m2 = m_a + m_b + np.square(mean_delta) * self._count * batch_count / new_count + new_var = m2 / new_count + self._mean = new_mean + self._var = new_var + self._count = new_count + + def reset(self): + if len(self._shape) > 0: + self._mean = np.zeros(self._shape, 'float32') + self._var = np.ones(self._shape, 'float32') + else: + self._mean, self._var = 0., 1. + self._count = self._epsilon + + @property + def mean(self) -> np.ndarray: + if np.isscalar(self._mean): + return self._mean + else: + return torch.FloatTensor(self._mean).to(self._device) + + @property + def std(self) -> np.ndarray: + std = np.sqrt(self._var + 1e-8) + if np.isscalar(std): + return std + else: + return torch.FloatTensor(std).to(self._device) + +class RndRewardModel(): + + def __init__(self, config) -> None: # noqa + super(RndRewardModel, self).__init__() + self.cfg = config + + self.tb_logger = SummaryWriter(os.path.join('output/ch4', config["exp_name"])) + self.reward_model = RndNetwork( + obs_shape=config["observation_shape"], hidden_size_list=config["hidden_size_list"] + ).to(device) + + self.opt = optim.Adam(self.reward_model.predictor.parameters(), config["learning_rate"]) + self.scheduler = ExponentialLR(self.opt, gamma=0.997) + + self.estimate_cnt_rnd = 0 + if self.cfg["obs_norm"]: + self._running_mean_std_rnd_obs = RunningMeanStd(epsilon=1e-4, device=device) + + def __del__(self): + self.tb_logger.flush() + self.tb_logger.close() + + def train(self, data) -> None: + for _ in range(self.cfg["update_per_collect"]): + train_data: list = random.sample(data, self.cfg["batch_size"]) + train_data: torch.Tensor = torch.stack(train_data).to(device) + if self.cfg["obs_norm"]: + # Note: observation normalization: transform obs to mean 0, std 1 + self._running_mean_std_rnd_obs.update(train_data.cpu().numpy()) + train_data = (train_data - self._running_mean_std_rnd_obs.mean) / self._running_mean_std_rnd_obs.std + train_data = torch.clamp( + train_data, min=self.cfg["obs_norm_clamp_min"], max=self.cfg["obs_norm_clamp_max"] + ) + + predict_feature, target_feature = self.reward_model(train_data) + loss = F.mse_loss(predict_feature, target_feature.detach()) + self.opt.zero_grad() + loss.backward() + self.opt.step() + self.scheduler.step() + + def estimate(self, data: list) -> List[Dict]: + """ + estimate the rnd intrinsic reward + """ + + obs = torch.stack(data).to(device) + if self.cfg["obs_norm"]: + # Note: observation normalization: transform obs to mean 0, std 1 + obs = (obs - self._running_mean_std_rnd_obs.mean) / self._running_mean_std_rnd_obs.std + obs = torch.clamp(obs, min=self.cfg["obs_norm_clamp_min"], max=self.cfg["obs_norm_clamp_max"]) + + with torch.no_grad(): + self.estimate_cnt_rnd += 1 + predict_feature, target_feature = self.reward_model(obs) + mse = F.mse_loss(predict_feature, target_feature, reduction='none').mean(dim=1) + self.tb_logger.add_scalar('rnd_reward/mse', mse.cpu().numpy().mean(), self.estimate_cnt_rnd) + + # Note: according to the min-max normalization, transform rnd reward to [0,1] + rnd_reward = mse * self.cfg["reward_mse_ratio"] #(mse - mse.min()) / (mse.max() - mse.min() + 1e-11) + + self.tb_logger.add_scalar('rnd_reward/rnd_reward_max', rnd_reward.max(), self.estimate_cnt_rnd) + self.tb_logger.add_scalar('rnd_reward/rnd_reward_mean', rnd_reward.mean(), self.estimate_cnt_rnd) + self.tb_logger.add_scalar('rnd_reward/rnd_reward_min', rnd_reward.min(), self.estimate_cnt_rnd) + + rnd_reward = torch.chunk(rnd_reward, rnd_reward.shape[0], dim=0) + +def training(config, train_data, test_data): + rnd_reward_model = RndRewardModel(config=config) + for i in tqdm(range(train_config["train_iter"])): + rnd_reward_model.train([torch.Tensor(item["last_observation"]) for item in train_data[i]]) + rnd_reward_model.estimate([torch.Tensor(item["last_observation"]) for item in test_data]) + +def main(): + env = gym.make("MiniGrid-Empty-8x8-v0") + env_obs = FlatObsWrapper(env) + + train_data = [] + test_data = [] + + for i in range(train_config["train_iter"]): + + train_data_per_iter = [] + + while len(train_data_per_iter) < train_config["train_data_count"]: + last_observation, _ = env_obs.reset() + terminated = False + while terminated != True and len(train_data_per_iter) < train_config["train_data_count"]: + action = env_obs.action_space.sample() + observation, reward, terminated, truncated, info = env_obs.step(action) + train_data_per_iter.append( + { + "last_observation": last_observation, + "action": action, + "reward": reward, + "observation": observation + } + ) + last_observation = observation + env_obs.close() + + train_data.append(train_data_per_iter) + + while len(test_data) < train_config["test_data_count"]: + last_observation, _ = env_obs.reset() + terminated = False + while terminated != True and len(train_data_per_iter) < train_config["test_data_count"]: + action = env_obs.action_space.sample() + observation, reward, terminated, truncated, info = env_obs.step(action) + test_data.append( + { + "last_observation": last_observation, + "action": action, + "reward": reward, + "observation": observation + } + ) + last_observation = observation + env_obs.close() + + # p0 = Process(target=training, args=(little_RND_net_config, train_data, test_data)) + # p0.start() + + # p1 = Process(target=training, args=(small_RND_net_config, train_data, test_data)) + # p1.start() + + # p2 = Process(target=training, args=(standard_RND_net_config, train_data, test_data)) + # p2.start() + + p3 = Process(target=training, args=(large_RND_net_config, train_data, test_data)) + p3.start() + + p4 = Process(target=training, args=(very_large_RND_net_config, train_data, test_data)) + p4.start() + + # p0.join() + # p1.join() + # p2.join() + p3.join() + p4.join() + +if __name__ == "__main__": + mp.set_start_method('spawn') + main() diff --git a/my_homework/ch4/run-20230312_134251-g1s73ewz/files/conda-environment.yaml b/my_homework/ch4/run-20230312_134251-g1s73ewz/files/conda-environment.yaml new file mode 100644 index 0000000..e69de29 diff --git a/my_homework/ch4/run-20230312_134251-g1s73ewz/files/config.yaml b/my_homework/ch4/run-20230312_134251-g1s73ewz/files/config.yaml new file mode 100644 index 0000000..295b980 --- /dev/null +++ b/my_homework/ch4/run-20230312_134251-g1s73ewz/files/config.yaml @@ -0,0 +1,29 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + cli_version: 0.13.10 + framework: torch + is_jupyter_run: false + is_kaggle_kernel: false + python_version: 3.9.16 + start_time: 1678599771.618358 + t: + 1: + - 1 + - 5 + - 53 + - 55 + 2: + - 1 + - 5 + - 53 + - 55 + 3: + - 23 + 4: 3.9.16 + 5: 0.13.10 + 8: + - 3 + - 5 diff --git a/my_homework/ch4/run-20230312_134251-g1s73ewz/files/output.log b/my_homework/ch4/run-20230312_134251-g1s73ewz/files/output.log new file mode 100644 index 0000000..5d28eb0 --- /dev/null +++ b/my_homework/ch4/run-20230312_134251-g1s73ewz/files/output.log @@ -0,0 +1,4 @@ +[03-12 13:42:54] WARNING  If you want to use wandb to visualize the gradient, please set ]8;id=271493;file://E:\miniconda\envs\torch11\lib\site-packages\ding\utils\default_helper.py\default_helper.py]8;;\:]8;id=536110;file://E:\miniconda\envs\torch11\lib\site-packages\ding\utils\default_helper.py#410\410]8;;\ + gradient_logger = True in the config. +[03-12 13:44:15] INFO  Evaluation: Train Iter(0) Env Step(0) Mean Episode Return(0.062) ]8;id=499748;file://E:\miniconda\envs\torch11\lib\site-packages\ding\framework\middleware\functional\evaluator.py\evaluator.py]8;;\:]8;id=375441;file://E:\miniconda\envs\torch11\lib\site-packages\ding\framework\middleware\functional\evaluator.py#370\370]8;;\ +[03-12 13:44:51] INFO  Exceeded maximum number of env_step(3003), program is terminated ]8;id=88754;file://E:\miniconda\envs\torch11\lib\site-packages\ding\framework\middleware\functional\termination_checker.py\termination_checker.py]8;;\:]8;id=541618;file://E:\miniconda\envs\torch11\lib\site-packages\ding\framework\middleware\functional\termination_checker.py#22\22]8;;\ \ No newline at end of file diff --git a/my_homework/ch4/run-20230312_134251-g1s73ewz/files/requirements.txt b/my_homework/ch4/run-20230312_134251-g1s73ewz/files/requirements.txt new file mode 100644 index 0000000..7e4ab09 --- /dev/null +++ b/my_homework/ch4/run-20230312_134251-g1s73ewz/files/requirements.txt @@ -0,0 +1,179 @@ +absl-py==1.4.0 +aiohttp==3.8.4 +aiosignal==1.3.1 +appdirs==1.4.4 +asttokens==2.2.1 +async-timeout==4.0.2 +attrs==22.2.0 +backcall==0.2.0 +backports.functools-lru-cache==1.6.4 +bitmath==1.3.3.1 +box2d==2.3.10 +brotlipy==0.7.0 +cachetools==5.3.0 +certifi==2022.12.7 +cffi==1.15.1 +chardet==4.0.0 +charset-normalizer==2.0.4 +click==7.1.2 +cloudpickle==1.6.0 +cmake==3.25.2 +colorama==0.4.6 +contourpy==1.0.7 +cryptography==39.0.1 +cycler==0.11.0 +debugpy==1.5.1 +decorator==5.1.1 +di-engine==0.4.6 +di-toolkit==0.1.0 +di-treetensor==0.4.0 +dill==0.3.6 +docker-pycreds==0.4.0 +easydict==1.9 +enum-tools==0.9.0.post1 +exceptiongroup==1.1.0 +executing==1.2.0 +flask==1.1.4 +fonttools==4.38.0 +frozenlist==1.3.3 +gitdb==4.0.10 +gitpython==3.1.31 +glfw==2.5.6 +google-auth-oauthlib==0.4.6 +google-auth==2.16.2 +graphviz==0.20.1 +grpcio==1.51.3 +gym-notices==0.0.8 +gym-super-mario-bros==7.4.0 +gym==0.19.0 +gymnasium-notices==0.0.1 +gymnasium==0.27.1 +h5py==3.8.0 +hbutils==0.8.1 +hickle==5.0.2 +idna==3.4 +imageio-ffmpeg==0.4.8 +imageio==2.26.0 +importlib-metadata==6.0.0 +importlib-resources==5.12.0 +iniconfig==2.0.0 +ipykernel==6.15.0 +ipython==8.11.0 +itsdangerous==1.1.0 +jax-jumpy==0.2.0 +jedi==0.18.2 +jinja2==2.11.3 +joblib==1.2.0 +jupyter-client==8.0.3 +jupyter-core==5.2.0 +kiwisolver==1.4.4 +lazy-loader==0.1 +libtorrent==2.0.7 +llvmlite==0.39.1 +lxml==4.9.2 +lz4==4.3.2 +markdown-it-py==2.2.0 +markdown==3.4.1 +markupsafe==2.0.1 +matplotlib-inline==0.1.6 +matplotlib==3.7.0 +mdurl==0.1.2 +metadrive-simulator==0.2.6.0 +minigrid==2.1.1 +mkl-fft==1.3.1 +mkl-random==1.2.2 +mkl-service==2.4.0 +mpire==2.6.0 +multidict==6.0.4 +nes-py==8.2.1 +nest-asyncio==1.5.6 +networkx==3.0 +numba==0.56.4 +numpy==1.23.5 +oauthlib==3.2.2 +openai==0.27.0 +opencv-python-headless==4.7.0.72 +opencv-python==4.7.0.72 +packaging==23.0 +panda3d-gltf==0.13 +panda3d-simplepbr==0.10 +panda3d==1.10.8 +pandas==1.5.3 +parso==0.8.3 +pathtools==0.1.2 +pettingzoo==1.22.3 +pickleshare==0.7.5 +pillow==9.4.0 +pip==23.0.1 +platformdirs==3.0.0 +pluggy==1.0.0 +prompt-toolkit==3.0.38 +protobuf==3.20.1 +psutil==5.9.0 +pure-eval==0.2.2 +pyasn1-modules==0.2.8 +pyasn1==0.4.8 +pycparser==2.21 +pygame==2.2.0 +pyglet==1.5.21 +pygments==2.14.0 +pynng==0.7.2 +pyopengl==3.1.6 +pyopenssl==23.0.0 +pyparsing==3.0.9 +pysocks==1.7.1 +pytest==7.2.2 +python-dateutil==2.8.2 +pytimeparse==1.1.8 +pytz==2022.7.1 +pywavelets==1.4.1 +pywin32==305 +pyyaml==6.0 +pyzmq==23.2.0 +readerwriterlock==1.0.9 +redis==4.5.1 +requests-oauthlib==1.3.1 +requests==2.28.1 +responses==0.12.1 +rich==13.3.1 +rocket-recycling==0.1 +rsa==4.9 +scikit-image==0.20.0 +scikit-learn==1.2.1 +scipy==1.9.1 +seaborn==0.12.2 +sentry-sdk==1.16.0 +setproctitle==1.3.2 +setuptools==65.6.3 +six==1.16.0 +smmap==5.0.0 +sniffio==1.3.0 +stack-data==0.6.2 +tabulate==0.9.0 +tensorboard-data-server==0.7.0 +tensorboard-plugin-wit==1.8.1 +tensorboard==2.12.0 +tensorboardx==2.6 +threadpoolctl==3.1.0 +tifffile==2023.2.28 +tomli==2.0.1 +torch==1.11.0 +torchaudio==0.11.0 +torchvision==0.12.0 +tornado==6.2 +tqdm==4.64.1 +traitlets==5.9.0 +treevalue==1.4.7 +trueskill==0.4.5 +typing-extensions==4.4.0 +urllib3==1.26.14 +urlobject==2.4.3 +wandb==0.13.10 +wcwidth==0.2.6 +werkzeug==1.0.1 +wheel==0.38.4 +win-inet-pton==1.1.0 +wincertstore==0.2 +yapf==0.29.0 +yarl==1.8.2 +zipp==3.15.0 \ No newline at end of file diff --git a/my_homework/ch4/run-20230312_134251-g1s73ewz/files/wandb-metadata.json b/my_homework/ch4/run-20230312_134251-g1s73ewz/files/wandb-metadata.json new file mode 100644 index 0000000..52f22e5 --- /dev/null +++ b/my_homework/ch4/run-20230312_134251-g1s73ewz/files/wandb-metadata.json @@ -0,0 +1,50 @@ +{ + "os": "Windows-10-10.0.19044-SP0", + "python": "3.9.16", + "heartbeatAt": "2023-03-12T05:42:54.179314", + "startedAt": "2023-03-12T05:42:51.588439", + "docker": null, + "cuda": null, + "args": [], + "state": "running", + "program": "e:\\workspace\\ANM\\PPOxFamily\\ch4\\homework4.py", + "codePath": "ch4\\homework4.py", + "git": { + "remote": "https://github.com/GuoPingPan/PPOxFamily.git", + "commit": "5d0b49a096f580bb94d0f3e0b6c7870b19f462ae" + }, + "email": "731061720@qq.com", + "root": "E:/workspace/ANM/PPOxFamily", + "host": "Kindel-PG", + "username": "73106", + "executable": "E:\\miniconda\\envs\\torch11\\python.exe", + "cpu_count": 4, + "cpu_count_logical": 8, + "cpu_freq": { + "current": 2400.0, + "min": 0.0, + "max": 2400.0 + }, + "cpu_freq_per_core": [ + { + "current": 2400.0, + "min": 0.0, + "max": 2400.0 + } + ], + "disk": { + "total": 195.31151962280273, + "used": 132.6543083190918 + }, + "gpu": "NVIDIA GeForce GTX 1650", + "gpu_count": 1, + "gpu_devices": [ + { + "name": "NVIDIA GeForce GTX 1650", + "memory_total": 4294967296 + } + ], + "memory": { + "total": 15.922107696533203 + } +} diff --git a/my_homework/ch4/run-20230312_134251-g1s73ewz/files/wandb-summary.json b/my_homework/ch4/run-20230312_134251-g1s73ewz/files/wandb-summary.json new file mode 100644 index 0000000..008260e --- /dev/null +++ b/my_homework/ch4/run-20230312_134251-g1s73ewz/files/wandb-summary.json @@ -0,0 +1 @@ +{"cur_lr": 0.00010000000000000003, "_timestamp": 1678599891.7756824, "_runtime": 120.15732455253601, "_step": 10, "policy_loss": -0.006951711806671127, "value_loss": 4.781930442316376, "entropy_loss": 1.8287189849044965, "adv_max": 1.6704593328030213, "adv_mean": 5.895676820174507e-09, "approx_kl": 0.005069196849818463, "clipfrac": 0.057676630434782605, "value_max": 0.40170730336356664, "value_mean": -0.08156897273299325, "reward": 0.06240813036887266, "train iter": 460, "env step": 3003, "_wandb": {"runtime": 117}} \ No newline at end of file diff --git a/my_homework/ch4/run-20230312_134251-g1s73ewz/logs/debug-internal.log b/my_homework/ch4/run-20230312_134251-g1s73ewz/logs/debug-internal.log new file mode 100644 index 0000000..b881d8c --- /dev/null +++ b/my_homework/ch4/run-20230312_134251-g1s73ewz/logs/debug-internal.log @@ -0,0 +1,341 @@ +2023-03-12 13:42:51,617 INFO StreamThr :32820 [internal.py:wandb_internal():87] W&B internal server running at pid: 32820, started at: 2023-03-12 13:42:51.616363 +2023-03-12 13:42:51,618 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: status +2023-03-12 13:42:51,621 INFO WriterThread:32820 [datastore.py:open_for_write():85] open: E:\workspace\ANM\PPOxFamily\wandb\run-20230312_134251-g1s73ewz\run-g1s73ewz.wandb +2023-03-12 13:42:51,624 DEBUG SenderThread:32820 [sender.py:send():336] send: header +2023-03-12 13:42:51,692 DEBUG SenderThread:32820 [sender.py:send():336] send: run +2023-03-12 13:42:53,334 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: check_version +2023-03-12 13:42:53,334 INFO SenderThread:32820 [dir_watcher.py:__init__():216] watching files in: E:\workspace\ANM\PPOxFamily\wandb\run-20230312_134251-g1s73ewz\files +2023-03-12 13:42:53,349 INFO SenderThread:32820 [sender.py:_start_run_threads():1067] run started: g1s73ewz with start time 1678599771.618358 +2023-03-12 13:42:53,350 DEBUG SenderThread:32820 [sender.py:send_request():363] send_request: summary_record +2023-03-12 13:42:53,350 INFO SenderThread:32820 [sender.py:_save_file():1321] saving file wandb-summary.json with policy end +2023-03-12 13:42:53,351 DEBUG SenderThread:32820 [sender.py:send_request():363] send_request: check_version +2023-03-12 13:42:54,082 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: run_start +2023-03-12 13:42:54,131 DEBUG HandlerThread:32820 [system_info.py:__init__():31] System info init +2023-03-12 13:42:54,131 DEBUG HandlerThread:32820 [system_info.py:__init__():46] System info init done +2023-03-12 13:42:54,131 INFO HandlerThread:32820 [system_monitor.py:start():151] Starting system monitor +2023-03-12 13:42:54,132 INFO SystemMonitor:32820 [system_monitor.py:_start():116] Starting system asset monitoring threads +2023-03-12 13:42:54,132 INFO HandlerThread:32820 [system_monitor.py:probe():172] Collecting system info +2023-03-12 13:42:54,137 INFO SystemMonitor:32820 [interfaces.py:start():168] Started cpu +2023-03-12 13:42:54,137 INFO SystemMonitor:32820 [interfaces.py:start():168] Started disk +2023-03-12 13:42:54,137 INFO SystemMonitor:32820 [interfaces.py:start():168] Started gpu +2023-03-12 13:42:54,138 INFO SystemMonitor:32820 [interfaces.py:start():168] Started memory +2023-03-12 13:42:54,155 INFO SystemMonitor:32820 [interfaces.py:start():168] Started network +2023-03-12 13:42:54,179 DEBUG HandlerThread:32820 [system_info.py:probe():195] Probing system +2023-03-12 13:42:54,181 DEBUG HandlerThread:32820 [system_info.py:_probe_git():180] Probing git +2023-03-12 13:42:54,184 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:42:54,310 DEBUG HandlerThread:32820 [system_info.py:_probe_git():188] Probing git done +2023-03-12 13:42:54,310 DEBUG HandlerThread:32820 [system_info.py:probe():241] Probing system done +2023-03-12 13:42:54,310 DEBUG HandlerThread:32820 [system_monitor.py:probe():181] {'os': 'Windows-10-10.0.19044-SP0', 'python': '3.9.16', 'heartbeatAt': '2023-03-12T05:42:54.179314', 'startedAt': '2023-03-12T05:42:51.588439', 'docker': None, 'cuda': None, 'args': (), 'state': 'running', 'program': 'e:\\workspace\\ANM\\PPOxFamily\\ch4\\homework4.py', 'codePath': 'ch4\\homework4.py', 'git': {'remote': 'https://github.com/GuoPingPan/PPOxFamily.git', 'commit': '5d0b49a096f580bb94d0f3e0b6c7870b19f462ae'}, 'email': '731061720@qq.com', 'root': 'E:/workspace/ANM/PPOxFamily', 'host': 'Kindel-PG', 'username': '73106', 'executable': 'E:\\miniconda\\envs\\torch11\\python.exe', 'cpu_count': 4, 'cpu_count_logical': 8, 'cpu_freq': {'current': 2400.0, 'min': 0.0, 'max': 2400.0}, 'cpu_freq_per_core': [{'current': 2400.0, 'min': 0.0, 'max': 2400.0}], 'disk': {'total': 195.31151962280273, 'used': 132.6543083190918}, 'gpu': 'NVIDIA GeForce GTX 1650', 'gpu_count': 1, 'gpu_devices': [{'name': 'NVIDIA GeForce GTX 1650', 'memory_total': 4294967296}], 'memory': {'total': 15.922107696533203}} +2023-03-12 13:42:54,310 INFO HandlerThread:32820 [system_monitor.py:probe():182] Finished collecting system info +2023-03-12 13:42:54,310 INFO HandlerThread:32820 [system_monitor.py:probe():185] Publishing system info +2023-03-12 13:42:54,310 DEBUG HandlerThread:32820 [system_info.py:_save_pip():51] Saving list of pip packages installed into the current environment +2023-03-12 13:42:54,311 DEBUG HandlerThread:32820 [system_info.py:_save_pip():67] Saving pip packages done +2023-03-12 13:42:54,311 DEBUG HandlerThread:32820 [system_info.py:_save_conda():74] Saving list of conda packages installed into the current environment +2023-03-12 13:42:54,323 ERROR HandlerThread:32820 [system_info.py:_save_conda():85] Error saving conda packages: [WinError 2] ϵͳҲָļ +Traceback (most recent call last): + File "E:\miniconda\envs\torch11\lib\site-packages\wandb\sdk\internal\system\system_info.py", line 81, in _save_conda + subprocess.call( + File "E:\miniconda\envs\torch11\lib\subprocess.py", line 349, in call + with Popen(*popenargs, **kwargs) as p: + File "E:\miniconda\envs\torch11\lib\subprocess.py", line 951, in __init__ + self._execute_child(args, executable, preexec_fn, close_fds, + File "E:\miniconda\envs\torch11\lib\subprocess.py", line 1420, in _execute_child + hp, ht, pid, tid = _winapi.CreateProcess(executable, args, +FileNotFoundError: [WinError 2] ϵͳҲָļ +2023-03-12 13:42:54,324 DEBUG HandlerThread:32820 [system_info.py:_save_conda():86] Saving conda packages done +2023-03-12 13:42:54,325 INFO HandlerThread:32820 [system_monitor.py:probe():187] Finished publishing system info +2023-03-12 13:42:54,337 INFO Thread-19 :32820 [dir_watcher.py:_on_file_created():275] file/dir created: E:\workspace\ANM\PPOxFamily\wandb\run-20230312_134251-g1s73ewz\files\wandb-metadata.json +2023-03-12 13:42:54,338 INFO Thread-19 :32820 [dir_watcher.py:_on_file_created():275] file/dir created: E:\workspace\ANM\PPOxFamily\wandb\run-20230312_134251-g1s73ewz\files\conda-environment.yaml +2023-03-12 13:42:54,338 INFO Thread-19 :32820 [dir_watcher.py:_on_file_created():275] file/dir created: E:\workspace\ANM\PPOxFamily\wandb\run-20230312_134251-g1s73ewz\files\wandb-summary.json +2023-03-12 13:42:54,338 INFO Thread-19 :32820 [dir_watcher.py:_on_file_created():275] file/dir created: E:\workspace\ANM\PPOxFamily\wandb\run-20230312_134251-g1s73ewz\files\requirements.txt +2023-03-12 13:42:54,342 DEBUG SenderThread:32820 [sender.py:send():336] send: files +2023-03-12 13:42:54,342 INFO SenderThread:32820 [sender.py:_save_file():1321] saving file wandb-metadata.json with policy now +2023-03-12 13:42:54,356 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: stop_status +2023-03-12 13:42:54,357 DEBUG SenderThread:32820 [sender.py:send_request():363] send_request: stop_status +2023-03-12 13:42:55,075 DEBUG SenderThread:32820 [sender.py:send():336] send: telemetry +2023-03-12 13:42:55,339 INFO Thread-19 :32820 [dir_watcher.py:_on_file_created():275] file/dir created: E:\workspace\ANM\PPOxFamily\wandb\run-20230312_134251-g1s73ewz\files\output.log +2023-03-12 13:42:56,029 INFO wandb-upload_0:32820 [upload_job.py:push():138] Uploaded file C:\Users\73106\AppData\Local\Temp\tmpx1i8pn77wandb\9jhqbby0-wandb-metadata.json +2023-03-12 13:42:56,239 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:42:57,096 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: status_report +2023-03-12 13:42:57,350 INFO Thread-19 :32820 [dir_watcher.py:_on_file_modified():292] file/dir modified: E:\workspace\ANM\PPOxFamily\wandb\run-20230312_134251-g1s73ewz\files\output.log +2023-03-12 13:42:58,289 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:43:00,350 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:43:02,122 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: status_report +2023-03-12 13:43:02,404 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:43:04,438 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:43:06,487 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:43:07,153 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: status_report +2023-03-12 13:43:08,527 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:43:09,372 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: keepalive +2023-03-12 13:43:09,373 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: keepalive +2023-03-12 13:43:10,575 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:43:12,204 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: status_report +2023-03-12 13:43:12,626 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:43:14,419 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: keepalive +2023-03-12 13:43:14,668 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:43:16,704 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:43:17,241 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: status_report +2023-03-12 13:43:18,748 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:43:19,436 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: keepalive +2023-03-12 13:43:20,794 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:43:22,306 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: status_report +2023-03-12 13:43:22,833 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:43:23,840 INFO Thread-19 :32820 [dir_watcher.py:_on_file_modified():292] file/dir modified: E:\workspace\ANM\PPOxFamily\wandb\run-20230312_134251-g1s73ewz\files\config.yaml +2023-03-12 13:43:24,483 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: keepalive +2023-03-12 13:43:24,945 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:43:26,959 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:43:28,359 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: status_report +2023-03-12 13:43:28,986 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:43:29,517 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: keepalive +2023-03-12 13:43:31,030 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:43:33,076 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:43:33,391 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: status_report +2023-03-12 13:43:34,556 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: keepalive +2023-03-12 13:43:35,119 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:43:37,173 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:43:38,413 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: status_report +2023-03-12 13:43:39,215 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:43:39,590 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: keepalive +2023-03-12 13:43:41,264 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:43:43,319 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:43:43,452 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: status_report +2023-03-12 13:43:44,639 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: keepalive +2023-03-12 13:43:45,353 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:43:47,408 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:43:48,526 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: status_report +2023-03-12 13:43:49,447 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:43:49,696 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: keepalive +2023-03-12 13:43:51,494 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:43:53,545 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:43:53,555 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: status_report +2023-03-12 13:43:54,162 DEBUG SystemMonitor:32820 [system_monitor.py:_start():130] Starting system metrics aggregation loop +2023-03-12 13:43:54,163 DEBUG SenderThread:32820 [sender.py:send():336] send: stats +2023-03-12 13:43:54,751 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: keepalive +2023-03-12 13:43:55,674 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:43:57,695 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:43:59,228 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: status_report +2023-03-12 13:43:59,710 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:43:59,810 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: keepalive +2023-03-12 13:44:01,748 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:44:03,802 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:44:04,255 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: status_report +2023-03-12 13:44:04,851 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: keepalive +2023-03-12 13:44:05,854 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:44:07,898 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:44:09,275 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: status_report +2023-03-12 13:44:09,907 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: keepalive +2023-03-12 13:44:09,932 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:44:11,973 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:44:14,026 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:44:14,335 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: status_report +2023-03-12 13:44:14,922 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: keepalive +2023-03-12 13:44:16,102 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:44:18,138 INFO Thread-19 :32820 [dir_watcher.py:_on_file_modified():292] file/dir modified: E:\workspace\ANM\PPOxFamily\wandb\run-20230312_134251-g1s73ewz\files\output.log +2023-03-12 13:44:18,155 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:44:19,510 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: status_report +2023-03-12 13:44:19,972 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: keepalive +2023-03-12 13:44:20,197 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:44:22,261 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:44:24,165 DEBUG SenderThread:32820 [sender.py:send():336] send: stats +2023-03-12 13:44:24,308 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:44:25,206 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: status_report +2023-03-12 13:44:25,211 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: keepalive +2023-03-12 13:44:26,466 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:44:28,492 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:44:30,246 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: status_report +2023-03-12 13:44:30,247 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: keepalive +2023-03-12 13:44:30,511 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:44:32,537 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:44:34,582 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:44:35,293 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: status_report +2023-03-12 13:44:35,293 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: keepalive +2023-03-12 13:44:36,640 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:44:38,683 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:44:40,318 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: status_report +2023-03-12 13:44:40,318 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: keepalive +2023-03-12 13:44:40,729 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:44:42,773 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:44:44,822 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:44:45,367 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: status_report +2023-03-12 13:44:45,368 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: keepalive +2023-03-12 13:44:46,864 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:44:48,905 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:44:50,421 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: status_report +2023-03-12 13:44:50,421 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: keepalive +2023-03-12 13:44:50,949 ERROR gpu :32820 [interfaces.py:monitor():129] Failed to sample metric: Not Supported +2023-03-12 13:44:51,773 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: partial_history +2023-03-12 13:44:51,774 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: partial_history +2023-03-12 13:44:51,775 DEBUG SenderThread:32820 [sender.py:send():336] send: history +2023-03-12 13:44:51,775 DEBUG SenderThread:32820 [sender.py:send_request():363] send_request: summary_record +2023-03-12 13:44:51,776 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: partial_history +2023-03-12 13:44:51,776 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: partial_history +2023-03-12 13:44:51,779 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: partial_history +2023-03-12 13:44:51,779 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: partial_history +2023-03-12 13:44:51,779 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: partial_history +2023-03-12 13:44:51,780 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: partial_history +2023-03-12 13:44:51,780 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: partial_history +2023-03-12 13:44:51,781 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: partial_history +2023-03-12 13:44:51,781 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: partial_history +2023-03-12 13:44:51,782 INFO SenderThread:32820 [sender.py:_save_file():1321] saving file wandb-summary.json with policy end +2023-03-12 13:44:51,783 DEBUG SenderThread:32820 [sender.py:send():336] send: history +2023-03-12 13:44:51,783 DEBUG SenderThread:32820 [sender.py:send_request():363] send_request: summary_record +2023-03-12 13:44:51,785 INFO SenderThread:32820 [sender.py:_save_file():1321] saving file wandb-summary.json with policy end +2023-03-12 13:44:51,785 DEBUG SenderThread:32820 [sender.py:send():336] send: history +2023-03-12 13:44:51,785 DEBUG SenderThread:32820 [sender.py:send_request():363] send_request: summary_record +2023-03-12 13:44:51,786 INFO SenderThread:32820 [sender.py:_save_file():1321] saving file wandb-summary.json with policy end +2023-03-12 13:44:51,786 DEBUG SenderThread:32820 [sender.py:send():336] send: history +2023-03-12 13:44:51,786 DEBUG SenderThread:32820 [sender.py:send_request():363] send_request: summary_record +2023-03-12 13:44:51,787 INFO SenderThread:32820 [sender.py:_save_file():1321] saving file wandb-summary.json with policy end +2023-03-12 13:44:51,787 DEBUG SenderThread:32820 [sender.py:send():336] send: history +2023-03-12 13:44:51,787 DEBUG SenderThread:32820 [sender.py:send_request():363] send_request: summary_record +2023-03-12 13:44:51,788 INFO SenderThread:32820 [sender.py:_save_file():1321] saving file wandb-summary.json with policy end +2023-03-12 13:44:51,788 DEBUG SenderThread:32820 [sender.py:send():336] send: history +2023-03-12 13:44:51,788 DEBUG SenderThread:32820 [sender.py:send_request():363] send_request: summary_record +2023-03-12 13:44:51,789 INFO SenderThread:32820 [sender.py:_save_file():1321] saving file wandb-summary.json with policy end +2023-03-12 13:44:51,790 DEBUG SenderThread:32820 [sender.py:send():336] send: history +2023-03-12 13:44:51,790 DEBUG SenderThread:32820 [sender.py:send_request():363] send_request: summary_record +2023-03-12 13:44:51,791 INFO SenderThread:32820 [sender.py:_save_file():1321] saving file wandb-summary.json with policy end +2023-03-12 13:44:51,791 DEBUG SenderThread:32820 [sender.py:send():336] send: history +2023-03-12 13:44:51,792 DEBUG SenderThread:32820 [sender.py:send_request():363] send_request: summary_record +2023-03-12 13:44:51,792 INFO SenderThread:32820 [sender.py:_save_file():1321] saving file wandb-summary.json with policy end +2023-03-12 13:44:51,793 DEBUG SenderThread:32820 [sender.py:send():336] send: history +2023-03-12 13:44:51,793 DEBUG SenderThread:32820 [sender.py:send_request():363] send_request: summary_record +2023-03-12 13:44:51,794 INFO SenderThread:32820 [sender.py:_save_file():1321] saving file wandb-summary.json with policy end +2023-03-12 13:44:51,794 DEBUG SenderThread:32820 [sender.py:send():336] send: history +2023-03-12 13:44:51,794 DEBUG SenderThread:32820 [sender.py:send_request():363] send_request: summary_record +2023-03-12 13:44:51,795 INFO SenderThread:32820 [sender.py:_save_file():1321] saving file wandb-summary.json with policy end +2023-03-12 13:44:51,796 DEBUG SenderThread:32820 [sender.py:send():336] send: history +2023-03-12 13:44:51,796 DEBUG SenderThread:32820 [sender.py:send_request():363] send_request: summary_record +2023-03-12 13:44:51,797 INFO SenderThread:32820 [sender.py:_save_file():1321] saving file wandb-summary.json with policy end +2023-03-12 13:44:51,853 DEBUG SenderThread:32820 [sender.py:send():336] send: exit +2023-03-12 13:44:51,853 INFO SenderThread:32820 [sender.py:send_exit():559] handling exit code: 0 +2023-03-12 13:44:51,854 INFO SenderThread:32820 [sender.py:send_exit():561] handling runtime: 117 +2023-03-12 13:44:51,856 INFO SenderThread:32820 [sender.py:_save_file():1321] saving file wandb-summary.json with policy end +2023-03-12 13:44:51,857 INFO SenderThread:32820 [sender.py:send_exit():567] send defer +2023-03-12 13:44:51,858 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: defer +2023-03-12 13:44:51,858 INFO HandlerThread:32820 [handler.py:handle_request_defer():170] handle defer: 0 +2023-03-12 13:44:51,858 DEBUG SenderThread:32820 [sender.py:send_request():363] send_request: defer +2023-03-12 13:44:51,858 INFO SenderThread:32820 [sender.py:send_request_defer():583] handle sender defer: 0 +2023-03-12 13:44:51,859 INFO SenderThread:32820 [sender.py:transition_state():587] send defer: 1 +2023-03-12 13:44:51,859 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: defer +2023-03-12 13:44:51,859 INFO HandlerThread:32820 [handler.py:handle_request_defer():170] handle defer: 1 +2023-03-12 13:44:51,860 DEBUG SenderThread:32820 [sender.py:send_request():363] send_request: defer +2023-03-12 13:44:51,860 INFO SenderThread:32820 [sender.py:send_request_defer():583] handle sender defer: 1 +2023-03-12 13:44:51,860 INFO SenderThread:32820 [sender.py:transition_state():587] send defer: 2 +2023-03-12 13:44:51,860 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: defer +2023-03-12 13:44:51,861 INFO HandlerThread:32820 [handler.py:handle_request_defer():170] handle defer: 2 +2023-03-12 13:44:51,861 INFO HandlerThread:32820 [system_monitor.py:finish():161] Stopping system monitor +2023-03-12 13:44:51,875 DEBUG SystemMonitor:32820 [system_monitor.py:_start():137] Finished system metrics aggregation loop +2023-03-12 13:44:51,876 DEBUG SystemMonitor:32820 [system_monitor.py:_start():141] Publishing last batch of metrics +2023-03-12 13:44:51,879 INFO HandlerThread:32820 [interfaces.py:finish():175] Joined cpu +2023-03-12 13:44:51,880 INFO HandlerThread:32820 [interfaces.py:finish():175] Joined disk +2023-03-12 13:44:51,923 INFO HandlerThread:32820 [interfaces.py:finish():175] Joined gpu +2023-03-12 13:44:51,923 INFO HandlerThread:32820 [interfaces.py:finish():175] Joined memory +2023-03-12 13:44:51,923 INFO HandlerThread:32820 [interfaces.py:finish():175] Joined network +2023-03-12 13:44:51,924 DEBUG SenderThread:32820 [sender.py:send_request():363] send_request: defer +2023-03-12 13:44:51,924 INFO SenderThread:32820 [sender.py:send_request_defer():583] handle sender defer: 2 +2023-03-12 13:44:51,924 INFO SenderThread:32820 [sender.py:transition_state():587] send defer: 3 +2023-03-12 13:44:51,924 DEBUG SenderThread:32820 [sender.py:send():336] send: stats +2023-03-12 13:44:51,925 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: defer +2023-03-12 13:44:51,925 INFO HandlerThread:32820 [handler.py:handle_request_defer():170] handle defer: 3 +2023-03-12 13:44:51,925 DEBUG SenderThread:32820 [sender.py:send_request():363] send_request: defer +2023-03-12 13:44:51,925 INFO SenderThread:32820 [sender.py:send_request_defer():583] handle sender defer: 3 +2023-03-12 13:44:51,925 INFO SenderThread:32820 [sender.py:transition_state():587] send defer: 4 +2023-03-12 13:44:51,925 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: defer +2023-03-12 13:44:51,925 INFO HandlerThread:32820 [handler.py:handle_request_defer():170] handle defer: 4 +2023-03-12 13:44:51,926 DEBUG SenderThread:32820 [sender.py:send_request():363] send_request: defer +2023-03-12 13:44:51,926 INFO SenderThread:32820 [sender.py:send_request_defer():583] handle sender defer: 4 +2023-03-12 13:44:51,926 INFO SenderThread:32820 [sender.py:transition_state():587] send defer: 5 +2023-03-12 13:44:51,926 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: defer +2023-03-12 13:44:51,926 INFO HandlerThread:32820 [handler.py:handle_request_defer():170] handle defer: 5 +2023-03-12 13:44:51,926 DEBUG SenderThread:32820 [sender.py:send():336] send: summary +2023-03-12 13:44:51,927 INFO SenderThread:32820 [sender.py:_save_file():1321] saving file wandb-summary.json with policy end +2023-03-12 13:44:51,927 DEBUG SenderThread:32820 [sender.py:send_request():363] send_request: defer +2023-03-12 13:44:51,927 INFO SenderThread:32820 [sender.py:send_request_defer():583] handle sender defer: 5 +2023-03-12 13:44:51,927 INFO SenderThread:32820 [sender.py:transition_state():587] send defer: 6 +2023-03-12 13:44:51,928 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: defer +2023-03-12 13:44:51,928 INFO HandlerThread:32820 [handler.py:handle_request_defer():170] handle defer: 6 +2023-03-12 13:44:51,928 DEBUG SenderThread:32820 [sender.py:send_request():363] send_request: defer +2023-03-12 13:44:51,928 INFO SenderThread:32820 [sender.py:send_request_defer():583] handle sender defer: 6 +2023-03-12 13:44:51,928 INFO SenderThread:32820 [sender.py:transition_state():587] send defer: 7 +2023-03-12 13:44:51,928 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: status_report +2023-03-12 13:44:51,928 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: defer +2023-03-12 13:44:51,929 INFO HandlerThread:32820 [handler.py:handle_request_defer():170] handle defer: 7 +2023-03-12 13:44:51,929 DEBUG SenderThread:32820 [sender.py:send_request():363] send_request: defer +2023-03-12 13:44:51,929 INFO SenderThread:32820 [sender.py:send_request_defer():583] handle sender defer: 7 +2023-03-12 13:44:51,956 INFO Thread-19 :32820 [dir_watcher.py:_on_file_modified():292] file/dir modified: E:\workspace\ANM\PPOxFamily\wandb\run-20230312_134251-g1s73ewz\files\wandb-summary.json +2023-03-12 13:44:52,857 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: poll_exit +2023-03-12 13:44:53,635 INFO SenderThread:32820 [sender.py:transition_state():587] send defer: 8 +2023-03-12 13:44:53,636 DEBUG SenderThread:32820 [sender.py:send_request():363] send_request: poll_exit +2023-03-12 13:44:53,637 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: defer +2023-03-12 13:44:53,638 INFO HandlerThread:32820 [handler.py:handle_request_defer():170] handle defer: 8 +2023-03-12 13:44:53,639 DEBUG SenderThread:32820 [sender.py:send_request():363] send_request: defer +2023-03-12 13:44:53,639 INFO SenderThread:32820 [sender.py:send_request_defer():583] handle sender defer: 8 +2023-03-12 13:44:53,806 INFO SenderThread:32820 [sender.py:transition_state():587] send defer: 9 +2023-03-12 13:44:53,806 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: defer +2023-03-12 13:44:53,806 INFO HandlerThread:32820 [handler.py:handle_request_defer():170] handle defer: 9 +2023-03-12 13:44:53,807 DEBUG SenderThread:32820 [sender.py:send():336] send: artifact +2023-03-12 13:44:53,871 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: poll_exit +2023-03-12 13:44:53,980 INFO Thread-19 :32820 [dir_watcher.py:_on_file_modified():292] file/dir modified: E:\workspace\ANM\PPOxFamily\wandb\run-20230312_134251-g1s73ewz\files\output.log +2023-03-12 13:44:57,682 INFO SenderThread:32820 [sender.py:send_artifact():1417] sent artifact job-https___github.com_GuoPingPan_PPOxFamily.git_ch4_homework4.py - {'id': 'QXJ0aWZhY3Q6Mzk0ODE0Nzc4', 'digest': '1077a87f88af546d42f5d124083a1b78', 'state': 'COMMITTED', 'aliases': [{'artifactCollectionName': 'job-https___github.com_GuoPingPan_PPOxFamily.git_ch4_homework4.py', 'alias': 'latest'}, {'artifactCollectionName': 'job-https___github.com_GuoPingPan_PPOxFamily.git_ch4_homework4.py', 'alias': 'v0'}], 'artifactSequence': {'id': 'QXJ0aWZhY3RDb2xsZWN0aW9uOjU2NzY1MDI4', 'latestArtifact': {'id': 'QXJ0aWZhY3Q6Mzk0ODE0Nzc4', 'versionIndex': 0}}, 'version': 'v0'} +2023-03-12 13:44:57,682 DEBUG SenderThread:32820 [sender.py:send_request():363] send_request: defer +2023-03-12 13:44:57,682 INFO SenderThread:32820 [sender.py:send_request_defer():583] handle sender defer: 9 +2023-03-12 13:44:57,682 INFO SenderThread:32820 [dir_watcher.py:finish():362] shutting down directory watcher +2023-03-12 13:44:57,682 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: status_report +2023-03-12 13:44:58,016 INFO SenderThread:32820 [dir_watcher.py:finish():392] scan: E:\workspace\ANM\PPOxFamily\wandb\run-20230312_134251-g1s73ewz\files +2023-03-12 13:44:58,017 INFO SenderThread:32820 [dir_watcher.py:finish():406] scan save: E:\workspace\ANM\PPOxFamily\wandb\run-20230312_134251-g1s73ewz\files\conda-environment.yaml conda-environment.yaml +2023-03-12 13:44:58,019 INFO SenderThread:32820 [dir_watcher.py:finish():406] scan save: E:\workspace\ANM\PPOxFamily\wandb\run-20230312_134251-g1s73ewz\files\config.yaml config.yaml +2023-03-12 13:44:58,021 INFO SenderThread:32820 [dir_watcher.py:finish():406] scan save: E:\workspace\ANM\PPOxFamily\wandb\run-20230312_134251-g1s73ewz\files\output.log output.log +2023-03-12 13:44:58,028 INFO SenderThread:32820 [dir_watcher.py:finish():406] scan save: E:\workspace\ANM\PPOxFamily\wandb\run-20230312_134251-g1s73ewz\files\requirements.txt requirements.txt +2023-03-12 13:44:58,034 INFO SenderThread:32820 [dir_watcher.py:finish():406] scan save: E:\workspace\ANM\PPOxFamily\wandb\run-20230312_134251-g1s73ewz\files\wandb-metadata.json wandb-metadata.json +2023-03-12 13:44:58,036 INFO SenderThread:32820 [dir_watcher.py:finish():406] scan save: E:\workspace\ANM\PPOxFamily\wandb\run-20230312_134251-g1s73ewz\files\wandb-summary.json wandb-summary.json +2023-03-12 13:44:58,042 INFO SenderThread:32820 [sender.py:transition_state():587] send defer: 10 +2023-03-12 13:44:58,042 DEBUG SenderThread:32820 [sender.py:send_request():363] send_request: poll_exit +2023-03-12 13:44:58,042 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: defer +2023-03-12 13:44:58,043 INFO HandlerThread:32820 [handler.py:handle_request_defer():170] handle defer: 10 +2023-03-12 13:44:58,047 DEBUG SenderThread:32820 [sender.py:send_request():363] send_request: defer +2023-03-12 13:44:58,047 INFO SenderThread:32820 [sender.py:send_request_defer():583] handle sender defer: 10 +2023-03-12 13:44:58,047 INFO SenderThread:32820 [file_pusher.py:finish():162] shutting down file pusher +2023-03-12 13:44:58,943 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: keepalive +2023-03-12 13:44:59,218 INFO wandb-upload_3:32820 [upload_job.py:push():138] Uploaded file E:\workspace\ANM\PPOxFamily\wandb\run-20230312_134251-g1s73ewz\files\wandb-summary.json +2023-03-12 13:44:59,688 INFO wandb-upload_0:32820 [upload_job.py:push():138] Uploaded file E:\workspace\ANM\PPOxFamily\wandb\run-20230312_134251-g1s73ewz\files\config.yaml +2023-03-12 13:44:59,701 INFO wandb-upload_2:32820 [upload_job.py:push():138] Uploaded file E:\workspace\ANM\PPOxFamily\wandb\run-20230312_134251-g1s73ewz\files\requirements.txt +2023-03-12 13:44:59,779 INFO wandb-upload_1:32820 [upload_job.py:push():138] Uploaded file E:\workspace\ANM\PPOxFamily\wandb\run-20230312_134251-g1s73ewz\files\output.log +2023-03-12 13:44:59,991 INFO Thread-18 :32820 [sender.py:transition_state():587] send defer: 11 +2023-03-12 13:44:59,991 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: defer +2023-03-12 13:44:59,991 INFO HandlerThread:32820 [handler.py:handle_request_defer():170] handle defer: 11 +2023-03-12 13:44:59,991 DEBUG SenderThread:32820 [sender.py:send_request():363] send_request: defer +2023-03-12 13:44:59,991 INFO SenderThread:32820 [sender.py:send_request_defer():583] handle sender defer: 11 +2023-03-12 13:44:59,992 INFO SenderThread:32820 [file_pusher.py:join():167] waiting for file pusher +2023-03-12 13:44:59,992 INFO SenderThread:32820 [sender.py:transition_state():587] send defer: 12 +2023-03-12 13:44:59,992 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: defer +2023-03-12 13:44:59,992 INFO HandlerThread:32820 [handler.py:handle_request_defer():170] handle defer: 12 +2023-03-12 13:44:59,992 DEBUG SenderThread:32820 [sender.py:send_request():363] send_request: defer +2023-03-12 13:44:59,992 INFO SenderThread:32820 [sender.py:send_request_defer():583] handle sender defer: 12 +2023-03-12 13:45:00,962 INFO SenderThread:32820 [sender.py:transition_state():587] send defer: 13 +2023-03-12 13:45:00,962 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: defer +2023-03-12 13:45:00,962 INFO HandlerThread:32820 [handler.py:handle_request_defer():170] handle defer: 13 +2023-03-12 13:45:00,962 DEBUG SenderThread:32820 [sender.py:send_request():363] send_request: defer +2023-03-12 13:45:00,962 INFO SenderThread:32820 [sender.py:send_request_defer():583] handle sender defer: 13 +2023-03-12 13:45:00,962 INFO SenderThread:32820 [sender.py:transition_state():587] send defer: 14 +2023-03-12 13:45:00,963 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: defer +2023-03-12 13:45:00,963 INFO HandlerThread:32820 [handler.py:handle_request_defer():170] handle defer: 14 +2023-03-12 13:45:00,963 DEBUG SenderThread:32820 [sender.py:send():336] send: final +2023-03-12 13:45:00,963 DEBUG SenderThread:32820 [sender.py:send():336] send: footer +2023-03-12 13:45:00,963 DEBUG SenderThread:32820 [sender.py:send_request():363] send_request: defer +2023-03-12 13:45:00,964 INFO SenderThread:32820 [sender.py:send_request_defer():583] handle sender defer: 14 +2023-03-12 13:45:00,964 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: poll_exit +2023-03-12 13:45:00,965 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: server_info +2023-03-12 13:45:00,965 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: get_summary +2023-03-12 13:45:00,965 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: sampled_history +2023-03-12 13:45:00,966 DEBUG SenderThread:32820 [sender.py:send_request():363] send_request: poll_exit +2023-03-12 13:45:00,966 DEBUG SenderThread:32820 [sender.py:send_request():363] send_request: server_info +2023-03-12 13:45:01,721 INFO MainThread:32820 [wandb_run.py:_footer_history_summary_info():3442] rendering history +2023-03-12 13:45:01,723 INFO MainThread:32820 [wandb_run.py:_footer_history_summary_info():3474] rendering summary +2023-03-12 13:45:01,741 INFO MainThread:32820 [wandb_run.py:_footer_sync_info():3398] logging synced files +2023-03-12 13:45:01,743 DEBUG HandlerThread:32820 [handler.py:handle_request():144] handle_request: shutdown +2023-03-12 13:45:01,743 INFO HandlerThread:32820 [handler.py:finish():841] shutting down handler +2023-03-12 13:45:01,966 INFO WriterThread:32820 [datastore.py:close():299] close: E:\workspace\ANM\PPOxFamily\wandb\run-20230312_134251-g1s73ewz\run-g1s73ewz.wandb +2023-03-12 13:45:02,730 INFO SenderThread:32820 [sender.py:finish():1493] shutting down sender +2023-03-12 13:45:02,731 INFO SenderThread:32820 [file_pusher.py:finish():162] shutting down file pusher +2023-03-12 13:45:02,731 INFO SenderThread:32820 [file_pusher.py:join():167] waiting for file pusher diff --git a/my_homework/ch4/run-20230312_134251-g1s73ewz/logs/debug.log b/my_homework/ch4/run-20230312_134251-g1s73ewz/logs/debug.log new file mode 100644 index 0000000..cbb0a65 --- /dev/null +++ b/my_homework/ch4/run-20230312_134251-g1s73ewz/logs/debug.log @@ -0,0 +1,27 @@ +2023-03-12 13:42:51,599 INFO MainThread:36176 [wandb_setup.py:_flush():68] Configure stats pid to 36176 +2023-03-12 13:42:51,599 INFO MainThread:36176 [wandb_setup.py:_flush():68] Loading settings from C:\Users\73106\.config\wandb\settings +2023-03-12 13:42:51,600 INFO MainThread:36176 [wandb_setup.py:_flush():68] Loading settings from E:\workspace\ANM\PPOxFamily\wandb\settings +2023-03-12 13:42:51,601 INFO MainThread:36176 [wandb_setup.py:_flush():68] Loading settings from environment variables: {'_require_service': 'True'} +2023-03-12 13:42:51,602 INFO MainThread:36176 [wandb_setup.py:_flush():68] Inferring run settings from compute environment: {'program_relpath': 'ch4\\homework4.py', 'program': 'e:\\workspace\\ANM\\PPOxFamily\\ch4\\homework4.py'} +2023-03-12 13:42:51,603 INFO MainThread:36176 [wandb_setup.py:_flush():68] Applying login settings: {'anonymous': 'must'} +2023-03-12 13:42:51,603 INFO MainThread:36176 [wandb_init.py:_log_setup():492] Logging user logs to E:\workspace\ANM\PPOxFamily\wandb\run-20230312_134251-g1s73ewz\logs\debug.log +2023-03-12 13:42:51,604 INFO MainThread:36176 [wandb_init.py:_log_setup():493] Logging internal logs to E:\workspace\ANM\PPOxFamily\wandb\run-20230312_134251-g1s73ewz\logs\debug-internal.log +2023-03-12 13:42:51,604 INFO MainThread:36176 [wandb_init.py:init():532] calling init triggers +2023-03-12 13:42:51,605 INFO MainThread:36176 [wandb_init.py:init():538] wandb.init called with sweep_config: {} +config: {} +2023-03-12 13:42:51,605 INFO MainThread:36176 [wandb_init.py:init():588] starting backend +2023-03-12 13:42:51,605 INFO MainThread:36176 [wandb_init.py:init():592] setting up manager +2023-03-12 13:42:51,610 INFO MainThread:36176 [backend.py:_multiprocessing_setup():106] multiprocessing start_methods=spawn, using: spawn +2023-03-12 13:42:51,618 INFO MainThread:36176 [wandb_init.py:init():599] backend started and connected +2023-03-12 13:42:51,620 INFO MainThread:36176 [wandb_init.py:init():687] updated telemetry +2023-03-12 13:42:51,692 INFO MainThread:36176 [wandb_init.py:init():727] communicating run to backend with 60.0 second timeout +2023-03-12 13:42:53,333 INFO MainThread:36176 [wandb_run.py:_on_init():2134] communicating current version +2023-03-12 13:42:54,067 INFO MainThread:36176 [wandb_run.py:_on_init():2143] got version response upgrade_message: "wandb version 0.13.11 is available! To upgrade, please run:\n $ pip install wandb --upgrade" + +2023-03-12 13:42:54,067 INFO MainThread:36176 [wandb_init.py:init():775] starting run threads in backend +2023-03-12 13:42:54,357 INFO MainThread:36176 [wandb_run.py:_console_start():2114] atexit reg +2023-03-12 13:42:54,357 INFO MainThread:36176 [wandb_run.py:_redirect():1969] redirect: SettingsConsole.WRAP_RAW +2023-03-12 13:42:54,357 INFO MainThread:36176 [wandb_run.py:_redirect():2034] Wrapping output streams. +2023-03-12 13:42:54,357 INFO MainThread:36176 [wandb_run.py:_redirect():2059] Redirects installed. +2023-03-12 13:42:54,358 INFO MainThread:36176 [wandb_init.py:init():817] run started, returning control to user process +2023-03-12 13:45:02,818 WARNING MsgRouterThr:36176 [router.py:message_loop():77] message_loop has been closed diff --git "a/my_homework/ch4/\343\200\220PPO\303\227Family\343\200\221+\347\233\230\345\233\275\350\220\215+vol.4+20230312.pdf" "b/my_homework/ch4/\343\200\220PPO\303\227Family\343\200\221+\347\233\230\345\233\275\350\220\215+vol.4+20230312.pdf" new file mode 100644 index 0000000..eb334f0 Binary files /dev/null and "b/my_homework/ch4/\343\200\220PPO\303\227Family\343\200\221+\347\233\230\345\233\275\350\220\215+vol.4+20230312.pdf" differ diff --git a/my_homework/ch5/lstm.py b/my_homework/ch5/lstm.py new file mode 100644 index 0000000..b483882 --- /dev/null +++ b/my_homework/ch5/lstm.py @@ -0,0 +1,197 @@ +""" +Long Short Term Memory (LSTM) is a kind of recurrent neural network that can capture long-short term information. +This document mainly includes: +- Pytorch implementation for LSTM. +- An example to test LSTM. +For beginners, you can refer to to learn the basics about how LSTM works. +""" +from typing import Optional, Union, Tuple, List, Dict +import math +import torch +import torch.nn as nn +from ding.torch_utils import build_normalization + + +class LSTM(nn.Module): + """ + **Overview:** + Implementation of LSTM cell with layer norm. + """ + + def __init__( + self, + input_size: int, + hidden_size: int, + num_layers: int, + norm_type: Optional[str] = 'LN', + dropout: float = 0. + ) -> None: + # Initialize arguments. + super(LSTM, self).__init__() + self.input_size = input_size + self.hidden_size = hidden_size + self.num_layers = num_layers + # Initialize normalization functions. + norm_func = build_normalization(norm_type) + self.norm = nn.ModuleList([norm_func(hidden_size * 4) for _ in range(2 * num_layers)]) + # Initialize LSTM parameters. + self.wx = nn.ParameterList() + self.wh = nn.ParameterList() + dims = [input_size] + [hidden_size] * num_layers + for l in range(num_layers): + self.wx.append(nn.Parameter(torch.zeros(dims[l], dims[l + 1] * 4))) + self.wh.append(nn.Parameter(torch.zeros(hidden_size, hidden_size * 4))) + self.bias = nn.Parameter(torch.zeros(num_layers, hidden_size * 4)) + # Initialize the Dropout Layer. + self.use_dropout = dropout > 0. + if self.use_dropout: + self.dropout = nn.Dropout(dropout) + self._init() + + # Dealing with different types of input and return preprocessed prev_state. + def _before_forward(self, inputs: torch.Tensor, prev_state: Union[None, List[Dict]]) -> torch.Tensor: + seq_len, batch_size = inputs.shape[:2] + # If prev_state is None, it indicates that this is the beginning of a sequence. In this case, prev_state will be initialized as zero. + if prev_state is None: + zeros = torch.zeros(self.num_layers, batch_size, self.hidden_size, dtype=inputs.dtype, device=inputs.device) + prev_state = (zeros, zeros) + # If prev_state is not None, then preprocess it into one batch. + else: + assert len(prev_state) == batch_size + state = [[v for v in prev.values()] for prev in prev_state] + state = list(zip(*state)) + prev_state = [torch.cat(t, dim=1) for t in state] + + return prev_state + + def _init(self): + # Initialize parameters. Each parameter is initialized using a uniform distribution of: $$U(-\sqrt {\frac 1 {HiddenSize}}, -\sqrt {\frac 1 {HiddenSize}})$$ + gain = math.sqrt(1. / self.hidden_size) + for l in range(self.num_layers): + torch.nn.init.uniform_(self.wx[l], -gain, gain) + torch.nn.init.uniform_(self.wh[l], -gain, gain) + if self.bias is not None: + torch.nn.init.uniform_(self.bias[l], -gain, gain) + + def forward( + self, + inputs: torch.Tensor, + prev_state: torch.Tensor, + ) -> Tuple[torch.Tensor, Union[torch.Tensor, list]]: + # The shape of input is: [sequence length, batch size, input size] + seq_len, batch_size = inputs.shape[:2] + prev_state = self._before_forward(inputs, prev_state) + + H, C = prev_state + x = inputs + next_state = [] + for l in range(self.num_layers): + h, c = H[l], C[l] + new_x = [] + for s in range(seq_len): + # Calculate $$z, z^i, z^f, z^o$$ simultaneously. + gate = self.norm[l * 2](torch.matmul(x[s], self.wx[l]) + ) + self.norm[l * 2 + 1](torch.matmul(h, self.wh[l])) + if self.bias is not None: + gate += self.bias[l] + gate = list(torch.chunk(gate, 4, dim=1)) + i, f, o, z = gate + # $$z^i = \sigma (Wx^ix^t + Wh^ih^{t-1})$$ + i = torch.sigmoid(i) + # $$z^f = \sigma (Wx^fx^t + Wh^fh^{t-1})$$ + f = torch.sigmoid(f) + # $$z^o = \sigma (Wx^ox^t + Wh^oh^{t-1})$$ + o = torch.sigmoid(o) + # $$z = tanh(Wxx^t + Whh^{t-1})$$ + z = torch.tanh(z) + # $$c^t = z^f \odot c^{t-1}+z^i \odot z$$ + c = f * c + i * z + # $$h^t = z^o \odot tanh(c^t)$$ + h = o * torch.tanh(c) + new_x.append(h) + next_state.append((h, c)) + x = torch.stack(new_x, dim=0) + # Dropout layer. + if self.use_dropout and l != self.num_layers - 1: + x = self.dropout(x) + next_state = [torch.stack(t, dim=0) for t in zip(*next_state)] + # Return list type, split the next_state . + h, c = next_state + batch_size = h.shape[1] + # Split h with shape [num_layers, batch_size, hidden_size] to a list with length batch_size and each element is a tensor with shape [num_layers, 1, hidden_size]. The same operation is performed on c. + next_state = [torch.chunk(h, batch_size, dim=1), torch.chunk(c, batch_size, dim=1)] + next_state = list(zip(*next_state)) + next_state = [{k: v for k, v in zip(['h', 'c'], item)} for item in next_state] + return x, next_state + + +def pack_data(data: List[torch.Tensor], traj_len: int) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Overview: + You need to pack variable-length data to regular tensor, return tensor and corresponding mask. + If len(data_i) < traj_len, use `null_padding`, + else split the whole sequences info different trajectories. + Returns: + - tensor (:obj:`torch.Tensor`): dtype (torch.float32), shape (traj_len, B, N) + - mask (:obj:`torch.Tensor`): dtype (torch.float32), shape (traj_len, B) + """ + + traj_list = [] + mask_list = [] + + for traj in data: + length, num = traj.shape + # less than traj_len use null padding + if length < traj_len: + pad_traj = torch.concat([traj, torch.zeros(traj_len - length, num)]) + pad_mask = torch.ones(traj_len) + pad_mask[length:] = 0.0 + traj_list.append(pad_traj) + mask_list.append(pad_mask) + # greater than traj_len use split and forward padding + else: + for i in range(0, length, traj_len): + if i+traj_len <= length: + pad_traj = traj[i:i+traj_len] + else: + pad_traj = traj[-traj_len:] + traj_list.append(pad_traj) + mask_list.append(torch.ones(traj_len)) + + traj_pad_and_split = torch.stack(traj_list, dim=1) + mask_pad_and_split = torch.stack(mask_list, dim=1) + + return traj_pad_and_split, mask_pad_and_split + + +def test_lstm(): + seq_len_list = [32, 49, 24, 78, 45] + traj_len = 32 + N = 10 + hidden_size = 32 + num_layers = 2 + + variable_len_data = [torch.rand(s, N) for s in seq_len_list] + input_, mask = pack_data(variable_len_data, traj_len) + assert isinstance(input_, torch.Tensor), type(input_) + batch_size = input_.shape[1] + assert batch_size == 9, "packed data must have 9 trajectories" + lstm = LSTM(N, hidden_size=hidden_size, num_layers=num_layers, norm_type='LN', dropout=0.1) + + prev_state = None + for s in range(traj_len): + input_step = input_[s:s + 1] + output, prev_state = lstm(input_step, prev_state) + + assert output.shape == (1, batch_size, hidden_size) + assert len(prev_state) == batch_size + assert prev_state[0]['h'].shape == (num_layers, 1, hidden_size) + loss = (output * mask.unsqueeze(-1)).mean() + loss.backward() + for _, m in lstm.named_parameters(): + assert isinstance(m.grad, torch.Tensor) + print('finished') + + +if __name__ == '__main__': + test_lstm() diff --git a/my_homework/ch6/h2_mpe/ckpt/ckpt_best.pth.tar b/my_homework/ch6/h2_mpe/ckpt/ckpt_best.pth.tar new file mode 100644 index 0000000..18940a7 Binary files /dev/null and b/my_homework/ch6/h2_mpe/ckpt/ckpt_best.pth.tar differ diff --git a/my_homework/ch6/h2_mpe/ckpt/iteration_0.pth.tar b/my_homework/ch6/h2_mpe/ckpt/iteration_0.pth.tar new file mode 100644 index 0000000..1b86012 Binary files /dev/null and b/my_homework/ch6/h2_mpe/ckpt/iteration_0.pth.tar differ diff --git a/my_homework/ch6/h2_mpe/formatted_total_config.py b/my_homework/ch6/h2_mpe/formatted_total_config.py new file mode 100644 index 0000000..a012272 --- /dev/null +++ b/my_homework/ch6/h2_mpe/formatted_total_config.py @@ -0,0 +1,158 @@ +from easydict import EasyDict + +main_config = dict( + exp_name='ptz_simple_spread_mappo_seed0_230429_135625', + env=dict( + manager=dict( + episode_num=float('inf'), + max_retry=5, + step_timeout=None, + auto_reset=True, + reset_timeout=None, + retry_type='reset', + retry_waiting_time=0.1, + shared_memory=True, + copy_on_get=True, + context='fork', + wait_num=float('inf'), + step_wait_timeout=None, + connect_timeout=60, + reset_inplace=False, + cfg_type='SyncSubprocessEnvManagerDict', + type='subprocess', + ), + stop_value=0, + env_family='mpe', + env_id='simple_spread_v2', + n_agent=3, + n_landmark=3, + max_cycles=25, + agent_obs_only=False, + agent_specific_global_state=True, + continuous_actions=False, + collector_env_num=64, + evaluator_env_num=64, + n_evaluator_episode=64, + ), + policy=dict( + model=dict( + action_space='discrete', + agent_num=3, + agent_obs_shape=18, + global_obs_shape=48, + action_shape=5, + ), + learn=dict( + learner=dict( + train_iterations=1000000000, + dataloader=dict( + num_workers=0, + ), + log_policy=True, + hook=dict( + load_ckpt_before_run='', + log_show_after_iter=100, + save_ckpt_after_iter=10000, + save_ckpt_after_run=True, + ), + cfg_type='BaseLearnerDict', + ), + epoch_per_collect=5, + batch_size=3200, + learning_rate=0.0005, + value_weight=0.5, + entropy_weight=0.01, + clip_ratio=0.2, + adv_norm=False, + value_norm=True, + ppo_param_init=True, + grad_clip_type='clip_norm', + grad_clip_value=10, + ignore_done=False, + multi_gpu=False, + ), + collect=dict( + collector=dict( + deepcopy_obs=False, + transform_obs=False, + collect_print_freq=100, + cfg_type='SampleSerialCollectorDict', + type='sample', + ), + unroll_len=1, + discount_factor=0.99, + gae_lambda=0.95, + n_sample=3200, + env_num=64, + ), + eval=dict( + evaluator=dict( + eval_freq=50, + render={'render_freq': -1, 'mode': 'train_iter'}, + cfg_type='InteractionSerialEvaluatorDict', + n_episode=64, + stop_value=0, + ), + env_num=64, + ), + other=dict( + replay_buffer=dict( + type='advanced', + replay_buffer_size=4096, + max_use=float('inf'), + max_staleness=float('inf'), + alpha=0.6, + beta=0.4, + anneal_step=100000, + enable_track_used_data=False, + deepcopy=False, + thruput_controller=dict( + push_sample_rate_limit=dict( + max=float('inf'), + min=0, + ), + window_seconds=30, + sample_min_limit_ratio=1, + ), + monitor=dict( + sampled_data_attr=dict( + average_range=5, + print_freq=200, + ), + periodic_thruput=dict( + seconds=60, + ), + ), + cfg_type='AdvancedReplayBufferDict', + ), + ), + on_policy=True, + cuda=True, + multi_gpu=False, + bp_update_sync=True, + traj_len_inf=False, + priority=False, + priority_IS_weight=False, + recompute_adv=True, + action_space='discrete', + nstep_return=False, + multi_agent=True, + transition_with_policy_data=True, + cfg_type='PPOCommandModePolicyDict', + ), +) +main_config = EasyDict(main_config) +main_config = main_config +create_config = dict( + env=dict( + import_names=['dizoo.petting_zoo.envs.petting_zoo_simple_spread_env'], + type='petting_zoo', + ), + env_manager=dict( + cfg_type='SyncSubprocessEnvManagerDict', + type='subprocess', + ), + policy=dict(type='ppo'), +) +create_config = EasyDict(create_config) +create_config = create_config diff --git a/my_homework/ch6/h2_mpe/git_diff.txt b/my_homework/ch6/h2_mpe/git_diff.txt new file mode 100644 index 0000000..aaed5f1 --- /dev/null +++ b/my_homework/ch6/h2_mpe/git_diff.txt @@ -0,0 +1,15 @@ +diff --git a/dizoo/petting_zoo/config/ptz_simple_spread_mappo_config.py b/dizoo/petting_zoo/config/ptz_simple_spread_mappo_config.py +index 5eb1095a..3ea3d302 100644 +--- a/dizoo/petting_zoo/config/ptz_simple_spread_mappo_config.py ++++ b/dizoo/petting_zoo/config/ptz_simple_spread_mappo_config.py +@@ -2,8 +2,8 @@ from easydict import EasyDict + + n_agent = 3 + n_landmark = n_agent +-collector_env_num = 8 +-evaluator_env_num = 8 ++collector_env_num = 64 ++evaluator_env_num = 64 + main_config = dict( + exp_name='ptz_simple_spread_mappo_seed0', + env=dict( \ No newline at end of file diff --git a/my_homework/ch6/h2_mpe/git_log.txt b/my_homework/ch6/h2_mpe/git_log.txt new file mode 100644 index 0000000..d423e38 --- /dev/null +++ b/my_homework/ch6/h2_mpe/git_log.txt @@ -0,0 +1,148 @@ +v0.4.7-9-g6f73fe2c + +commit 6f73fe2c2007c248805000f4d03bf6d9a8d78ade +Author: Ikko Eltociear Ashimine +Date: Fri Apr 28 11:22:44 2023 +0900 + + fix(eltociear): typo in gyn any_trading env (#654) + + defination -> definition + + dizoo/gym_anytrading/envs/README.md | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +commit b094f5ed0e2133e8abc10ff2c996b0532fe90b3f +Author: zjowowen <93968541+zjowowen@users.noreply.github.com> +Date: Thu Apr 27 15:20:44 2023 +0800 + + fix(zjow): fix incompatible gym version bug in Dockerfile.env (#653) + + This dockerfile will make mujoco image would having gym version 0.26.2, which is not supported by DI-engine. + + docker/Dockerfile.env | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +commit a82caab559541352a60274af0ed42db1f442ce39 +Author: niuyazhe +Date: Wed Apr 26 15:32:54 2023 +0800 + + fix(nyz): fix ensemble head unittest bug + + ding/model/common/tests/test_head.py | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +commit 9e7002f74a3ff0805bbb537934863036489cebdc +Author: 蒲源 <48008469+puyuan1996@users.noreply.github.com> +Date: Tue Apr 25 17:54:48 2023 +0800 + + fix(pu): fix last_linear_layer_weight_bias_init_zero in MLP and add its unittest (#650) + + * fix(pu): fix last_linear_layer_weight_bias_init_zero in MLP and add its unittest + + * polish(pu): polish unittest of mlp + + * style(pu): yapf format + + * style(pu): flake8 format + + * polish(pu): polish the output_activation and output_norm in MLP + + * style(pu): polish the annotations in MLP, yapf format + + * style(pu): flake8 style fix + + * fix(pu): fix output_activation and output_norm in MLP + + ding/torch_utils/network/nn_module.py | 62 +++++++++++++----------- + ding/torch_utils/network/tests/test_nn_module.py | 62 ++++++++++++++++++------ + 2 files changed, 79 insertions(+), 45 deletions(-) + +commit aefddace0c5a1c7f437d43e6ef2de40a4d1d4b19 +Author: Super1ce <32703938+Super1ce@users.noreply.github.com> +Date: Tue Apr 25 17:40:45 2023 +0800 + + feature(zc): add EDAC and modify config of td3bc (#639) + + * add EDAC and modify config of td3bc + + * modify edac + + * add conv1d + + * add test_ensemble + + * add encoder + + * add encoder + + * add encoder + + * modify policy_init + + * modify edac + + * add init + + * modify td3_bc and readme + + * remove head in qac + + * modify edac comment + + * modify edac comment + + * modif edac + + * modify edac + + * modify head overview + + * modify example + + * format + + README.md | 1 + + ding/example/edac.py | 42 +++ + ding/model/common/__init__.py | 2 +- + ding/model/common/head.py | 76 +++++- + ding/model/common/tests/test_head.py | 9 +- + ding/model/template/__init__.py | 1 + + ding/model/template/edac.py | 181 +++++++++++++ + ding/model/template/qac.py | 0 + ding/policy/__init__.py | 1 + + ding/policy/command_mode_policy_instance.py | 6 + + ding/policy/edac.py | 290 +++++++++++++++++++++ + ding/policy/td3_bc.py | 6 + + ding/torch_utils/__init__.py | 0 + ding/torch_utils/network/__init__.py | 2 +- + dizoo/d4rl/config/halfcheetah_expert_cql_config.py | 6 +- + .../d4rl/config/halfcheetah_expert_td3bc_config.py | 10 +- + dizoo/d4rl/config/halfcheetah_medium_cql_config.py | 6 +- + .../d4rl/config/halfcheetah_medium_edac_config.py | 58 +++++ + .../config/halfcheetah_medium_expert_cql_config.py | 6 +- + .../halfcheetah_medium_expert_edac_config.py | 58 +++++ + .../halfcheetah_medium_expert_td3bc_config.py | 10 +- + .../config/halfcheetah_medium_replay_cql_config.py | 6 +- + .../halfcheetah_medium_replay_td3bc_config.py | 8 +- + .../d4rl/config/halfcheetah_medium_td3bc_config.py | 10 +- + dizoo/d4rl/config/halfcheetah_random_cql_config.py | 6 +- + .../d4rl/config/halfcheetah_random_td3bc_config.py | 8 +- + dizoo/d4rl/config/hopper_expert_td3bc_config.py | 6 +- + dizoo/d4rl/config/hopper_medium_edac_config.py | 58 +++++ + .../config/hopper_medium_expert_edac_config.py | 58 +++++ + .../config/hopper_medium_expert_td3bc_config.py | 6 +- + .../config/hopper_medium_replay_td3bc_config.py | 6 +- + dizoo/d4rl/config/hopper_medium_td3bc_config.py | 6 +- + dizoo/d4rl/config/hopper_random_td3bc_config.py | 2 +- + dizoo/d4rl/config/walker2d_expert_cql_config.py | 6 +- + dizoo/d4rl/config/walker2d_expert_td3bc_config.py | 8 +- + dizoo/d4rl/config/walker2d_medium_cql_config.py | 6 +- + .../config/walker2d_medium_expert_cql_config.py | 6 +- + .../config/walker2d_medium_expert_td3bc_config.py | 8 +- + .../config/walker2d_medium_replay_cql_config.py | 6 +- + .../config/walker2d_medium_replay_td3bc_config.py | 8 +- + dizoo/d4rl/config/walker2d_medium_td3bc_config.py | 8 +- + dizoo/d4rl/config/walker2d_random_cql_config.py | 6 +- + dizoo/d4rl/config/walker2d_random_td3bc_config.py | 8 +- + dizoo/d4rl/entry/d4rl_edac_main.py | 21 ++ + 44 files changed, 944 insertions(+), 98 deletions(-) \ No newline at end of file diff --git a/my_homework/ch6/h2_mpe/log/collector/collector_logger.txt b/my_homework/ch6/h2_mpe/log/collector/collector_logger.txt new file mode 100644 index 0000000..fa61e25 --- /dev/null +++ b/my_homework/ch6/h2_mpe/log/collector/collector_logger.txt @@ -0,0 +1,252 @@ +[2023-04-29 14:00:40][sample_serial_collector.py:365][INFO] collect end: +episode_count: 2688 +envstep_count: 67200 +train_sample_count: 67200 +avg_envstep_per_episode: 25.0 +avg_sample_per_episode: 25.0 +avg_envstep_per_sec: 296.14747770898214 +avg_train_sample_per_sec: 296.14747770898214 +avg_episode_per_sec: 11.845899108359285 +collect_time: 226.91397043075958 +reward_mean: -217.99486322996893 +reward_std: 40.789886426358365 +reward_max: -115.53577301212832 +reward_min: -414.28030682599575 +total_envstep_count: 67200 +total_train_sample_count: 67200 +total_episode_count: 2688 +total_duration: 226.91397043075958 +[2023-04-29 14:05:29][sample_serial_collector.py:365][INFO] collect end: +episode_count: 2560 +envstep_count: 64000 +train_sample_count: 64000 +avg_envstep_per_episode: 25.0 +avg_sample_per_episode: 25.0 +avg_envstep_per_sec: 237.10342375645823 +avg_train_sample_per_sec: 237.10342375645823 +avg_episode_per_sec: 9.484136950258328 +collect_time: 269.9244025499095 +reward_mean: -204.11036846673005 +reward_std: 34.915294736845155 +reward_max: -113.68032928106577 +reward_min: -328.84952915901005 +total_envstep_count: 131200 +total_train_sample_count: 131200 +total_episode_count: 5248 +total_duration: 496.8383729806691 +[2023-04-29 14:10:06][sample_serial_collector.py:365][INFO] collect end: +episode_count: 2560 +envstep_count: 64000 +train_sample_count: 64000 +avg_envstep_per_episode: 25.0 +avg_sample_per_episode: 25.0 +avg_envstep_per_sec: 246.95106290344873 +avg_train_sample_per_sec: 246.95106290344873 +avg_episode_per_sec: 9.878042516137949 +collect_time: 259.16065817875136 +reward_mean: -203.0305983161908 +reward_std: 33.34253434715472 +reward_max: -113.27561146019649 +reward_min: -308.27528385666324 +total_envstep_count: 195200 +total_train_sample_count: 195200 +total_episode_count: 7808 +total_duration: 755.9990311594204 +[2023-04-29 14:14:04][sample_serial_collector.py:365][INFO] collect end: +episode_count: 2560 +envstep_count: 64000 +train_sample_count: 64000 +avg_envstep_per_episode: 25.0 +avg_sample_per_episode: 25.0 +avg_envstep_per_sec: 290.33792254959866 +avg_train_sample_per_sec: 290.33792254959866 +avg_episode_per_sec: 11.613516901983946 +collect_time: 220.43279581938467 +reward_mean: -199.42708762074133 +reward_std: 32.26345074390525 +reward_max: -111.79729853847049 +reward_min: -302.1158277870628 +total_envstep_count: 259200 +total_train_sample_count: 259200 +total_episode_count: 10368 +total_duration: 976.431826978805 +[2023-04-29 14:19:05][sample_serial_collector.py:365][INFO] collect end: +episode_count: 2560 +envstep_count: 64000 +train_sample_count: 64000 +avg_envstep_per_episode: 25.0 +avg_sample_per_episode: 25.0 +avg_envstep_per_sec: 226.01024646662924 +avg_train_sample_per_sec: 226.01024646662924 +avg_episode_per_sec: 9.04040985866517 +collect_time: 283.17300211187415 +reward_mean: -198.5036286730239 +reward_std: 32.89677999679214 +reward_max: -110.08446163992775 +reward_min: -298.65060460116956 +total_envstep_count: 323200 +total_train_sample_count: 323200 +total_episode_count: 12928 +total_duration: 1259.6048290906792 +[2023-04-29 14:23:53][sample_serial_collector.py:365][INFO] collect end: +episode_count: 2560 +envstep_count: 64000 +train_sample_count: 64000 +avg_envstep_per_episode: 25.0 +avg_sample_per_episode: 25.0 +avg_envstep_per_sec: 236.71547256731867 +avg_train_sample_per_sec: 236.71547256731867 +avg_episode_per_sec: 9.468618902692747 +collect_time: 270.3667796020358 +reward_mean: -194.7217888726822 +reward_std: 32.57204887259738 +reward_max: -112.65667395005265 +reward_min: -297.05066550855605 +total_envstep_count: 387200 +total_train_sample_count: 387200 +total_episode_count: 15488 +total_duration: 1529.971608692715 +[2023-04-29 14:27:43][sample_serial_collector.py:365][INFO] collect end: +episode_count: 2560 +envstep_count: 64000 +train_sample_count: 64000 +avg_envstep_per_episode: 25.0 +avg_sample_per_episode: 25.0 +avg_envstep_per_sec: 303.09903084069515 +avg_train_sample_per_sec: 303.09903084069515 +avg_episode_per_sec: 12.123961233627806 +collect_time: 211.15211032673196 +reward_mean: -191.45372039848226 +reward_std: 31.822333173425722 +reward_max: -104.79884411776607 +reward_min: -291.83384915496396 +total_envstep_count: 451200 +total_train_sample_count: 451200 +total_episode_count: 18048 +total_duration: 1741.123719019447 +[2023-04-29 14:32:43][sample_serial_collector.py:365][INFO] collect end: +episode_count: 2560 +envstep_count: 64000 +train_sample_count: 64000 +avg_envstep_per_episode: 25.0 +avg_sample_per_episode: 25.0 +avg_envstep_per_sec: 227.55734997427254 +avg_train_sample_per_sec: 227.55734997427254 +avg_episode_per_sec: 9.102293998970902 +collect_time: 281.2477821842968 +reward_mean: -187.6594948786638 +reward_std: 31.067222150300406 +reward_max: -105.86726943335208 +reward_min: -288.57740855540555 +total_envstep_count: 515200 +total_train_sample_count: 515200 +total_episode_count: 20608 +total_duration: 2022.3715012037437 +[2023-04-29 14:37:29][sample_serial_collector.py:365][INFO] collect end: +episode_count: 2560 +envstep_count: 64000 +train_sample_count: 64000 +avg_envstep_per_episode: 25.0 +avg_sample_per_episode: 25.0 +avg_envstep_per_sec: 238.97107729311685 +avg_train_sample_per_sec: 238.97107729311685 +avg_episode_per_sec: 9.558843091724674 +collect_time: 267.8148365272629 +reward_mean: -185.0094225879725 +reward_std: 30.74591188319982 +reward_max: -106.9537674260614 +reward_min: -288.1991909859423 +total_envstep_count: 579200 +total_train_sample_count: 579200 +total_episode_count: 23168 +total_duration: 2290.1863377310065 +[2023-04-29 14:41:15][sample_serial_collector.py:365][INFO] collect end: +episode_count: 2560 +envstep_count: 64000 +train_sample_count: 64000 +avg_envstep_per_episode: 25.0 +avg_sample_per_episode: 25.0 +avg_envstep_per_sec: 306.75093391652797 +avg_train_sample_per_sec: 306.75093391652797 +avg_episode_per_sec: 12.270037356661119 +collect_time: 208.63832159485932 +reward_mean: -181.7676889409079 +reward_std: 29.868471246448248 +reward_max: -103.96260654410423 +reward_min: -272.1104092430865 +total_envstep_count: 643200 +total_train_sample_count: 643200 +total_episode_count: 25728 +total_duration: 2498.824659325866 +[2023-04-29 14:46:15][sample_serial_collector.py:365][INFO] collect end: +episode_count: 2560 +envstep_count: 64000 +train_sample_count: 64000 +avg_envstep_per_episode: 25.0 +avg_sample_per_episode: 25.0 +avg_envstep_per_sec: 227.5391924469946 +avg_train_sample_per_sec: 227.5391924469946 +avg_episode_per_sec: 9.101567697879783 +collect_time: 281.2702256333657 +reward_mean: -179.13182512314762 +reward_std: 30.226717439210613 +reward_max: -102.28849076491042 +reward_min: -281.2275785876745 +total_envstep_count: 707200 +total_train_sample_count: 707200 +total_episode_count: 28288 +total_duration: 2780.0948849592314 +[2023-04-29 14:51:05][sample_serial_collector.py:365][INFO] collect end: +episode_count: 2560 +envstep_count: 64000 +train_sample_count: 64000 +avg_envstep_per_episode: 25.0 +avg_sample_per_episode: 25.0 +avg_envstep_per_sec: 235.06629064834866 +avg_train_sample_per_sec: 235.06629064834866 +avg_episode_per_sec: 9.402651625933945 +collect_time: 272.26362326762484 +reward_mean: -177.12983063968738 +reward_std: 29.151326295813146 +reward_max: -105.29684973095158 +reward_min: -272.0329000952998 +total_envstep_count: 771200 +total_train_sample_count: 771200 +total_episode_count: 30848 +total_duration: 3052.3585082268564 +[2023-04-29 14:54:50][sample_serial_collector.py:365][INFO] collect end: +episode_count: 2560 +envstep_count: 64000 +train_sample_count: 64000 +avg_envstep_per_episode: 25.0 +avg_sample_per_episode: 25.0 +avg_envstep_per_sec: 309.03995825680533 +avg_train_sample_per_sec: 309.03995825680533 +avg_episode_per_sec: 12.361598330272212 +collect_time: 207.0929609264878 +reward_mean: -175.39500065516654 +reward_std: 28.659428626551257 +reward_max: -103.40343793654888 +reward_min: -260.95956286062926 +total_envstep_count: 835200 +total_train_sample_count: 835200 +total_episode_count: 33408 +total_duration: 3259.451469153344 +[2023-04-29 14:59:45][sample_serial_collector.py:365][INFO] collect end: +episode_count: 2560 +envstep_count: 64000 +train_sample_count: 64000 +avg_envstep_per_episode: 25.0 +avg_sample_per_episode: 25.0 +avg_envstep_per_sec: 231.60953850571443 +avg_train_sample_per_sec: 231.60953850571443 +avg_episode_per_sec: 9.264381540228577 +collect_time: 276.32713407622003 +reward_mean: -172.36962698622952 +reward_std: 28.43641588623627 +reward_max: -104.05006979728763 +reward_min: -261.3536097951072 +total_envstep_count: 899200 +total_train_sample_count: 899200 +total_episode_count: 35968 +total_duration: 3535.778603229564 diff --git a/my_homework/ch6/h2_mpe/log/evaluator/evaluator_logger.txt b/my_homework/ch6/h2_mpe/log/evaluator/evaluator_logger.txt new file mode 100644 index 0000000..f07b8f3 --- /dev/null +++ b/my_homework/ch6/h2_mpe/log/evaluator/evaluator_logger.txt @@ -0,0 +1,2542 @@ +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 0 finish episode, final reward: -275.9037, current episode: 1 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 1 finish episode, final reward: -538.8339, current episode: 2 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 2 finish episode, final reward: -268.1921, current episode: 3 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 3 finish episode, final reward: -348.1588, current episode: 4 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 4 finish episode, final reward: -300.2376, current episode: 5 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 5 finish episode, final reward: -273.9619, current episode: 6 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 6 finish episode, final reward: -245.5217, current episode: 7 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 7 finish episode, final reward: -255.3755, current episode: 8 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 8 finish episode, final reward: -238.7327, current episode: 9 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 9 finish episode, final reward: -403.0160, current episode: 10 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 10 finish episode, final reward: -467.5439, current episode: 11 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 11 finish episode, final reward: -350.9550, current episode: 12 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 12 finish episode, final reward: -349.6909, current episode: 13 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 13 finish episode, final reward: -333.2660, current episode: 14 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 14 finish episode, final reward: -192.4466, current episode: 15 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 15 finish episode, final reward: -381.9500, current episode: 16 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 16 finish episode, final reward: -534.1245, current episode: 17 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 17 finish episode, final reward: -221.3578, current episode: 18 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 18 finish episode, final reward: -374.8877, current episode: 19 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 19 finish episode, final reward: -561.9864, current episode: 20 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 20 finish episode, final reward: -501.8416, current episode: 21 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 21 finish episode, final reward: -259.1218, current episode: 22 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 22 finish episode, final reward: -568.3029, current episode: 23 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 23 finish episode, final reward: -235.6255, current episode: 24 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 24 finish episode, final reward: -260.2124, current episode: 25 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 25 finish episode, final reward: -719.9672, current episode: 26 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 26 finish episode, final reward: -192.3016, current episode: 27 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 27 finish episode, final reward: -300.1699, current episode: 28 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 28 finish episode, final reward: -465.4619, current episode: 29 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 29 finish episode, final reward: -256.7880, current episode: 30 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 30 finish episode, final reward: -211.3826, current episode: 31 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 31 finish episode, final reward: -307.9787, current episode: 32 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 32 finish episode, final reward: -358.4941, current episode: 33 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 33 finish episode, final reward: -297.6725, current episode: 34 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 34 finish episode, final reward: -347.9472, current episode: 35 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 35 finish episode, final reward: -528.8097, current episode: 36 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 36 finish episode, final reward: -386.6518, current episode: 37 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 37 finish episode, final reward: -613.8024, current episode: 38 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 38 finish episode, final reward: -325.8535, current episode: 39 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 39 finish episode, final reward: -202.0597, current episode: 40 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 40 finish episode, final reward: -338.1969, current episode: 41 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 41 finish episode, final reward: -424.8166, current episode: 42 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 42 finish episode, final reward: -260.0030, current episode: 43 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 43 finish episode, final reward: -296.8323, current episode: 44 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 44 finish episode, final reward: -378.5358, current episode: 45 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 45 finish episode, final reward: -194.5858, current episode: 46 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 46 finish episode, final reward: -404.3053, current episode: 47 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 47 finish episode, final reward: -354.0030, current episode: 48 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 48 finish episode, final reward: -283.6507, current episode: 49 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 49 finish episode, final reward: -507.8806, current episode: 50 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 50 finish episode, final reward: -520.9576, current episode: 51 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 51 finish episode, final reward: -456.7448, current episode: 52 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 52 finish episode, final reward: -295.3069, current episode: 53 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 53 finish episode, final reward: -337.1736, current episode: 54 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 54 finish episode, final reward: -278.4070, current episode: 55 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 55 finish episode, final reward: -347.3921, current episode: 56 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 56 finish episode, final reward: -339.0478, current episode: 57 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 57 finish episode, final reward: -554.2265, current episode: 58 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 58 finish episode, final reward: -226.0882, current episode: 59 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 59 finish episode, final reward: -592.8055, current episode: 60 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 60 finish episode, final reward: -286.0428, current episode: 61 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 61 finish episode, final reward: -215.7761, current episode: 62 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 62 finish episode, final reward: -398.7136, current episode: 63 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 63 finish episode, final reward: -498.7523, current episode: 64 +[2023-04-29 13:56:36][interaction_serial_evaluator.py:279][INFO] ++-------+------------+---------------------+---------------+---------------+ +| Name | train_iter | ckpt_name | episode_count | envstep_count | ++-------+------------+---------------------+---------------+---------------+ +| Value | 0.000000 | iteration_0.pth.tar | 64.000000 | 1600.000000 | ++-------+------------+---------------------+---------------+---------------+ ++-------+-------------------------+---------------+---------------------+----------------------+ +| Name | avg_envstep_per_episode | evaluate_time | avg_envstep_per_sec | avg_time_per_episode | ++-------+-------------------------+---------------+---------------------+----------------------+ +| Value | 25.000000 | 0.580551 | 2756.000628 | 110.240025 | ++-------+-------------------------+---------------+---------------------+----------------------+ ++-------+-------------+------------+-------------+-------------+ +| Name | reward_mean | reward_std | reward_max | reward_min | ++-------+-------------+------------+-------------+-------------+ +| Value | -360.106749 | 121.300715 | -192.301590 | -719.967163 | ++-------+-------------+------------+-------------+-------------+ + + +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 0 finish episode, final reward: -205.4476, current episode: 1 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 1 finish episode, final reward: -187.2263, current episode: 2 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 2 finish episode, final reward: -177.2961, current episode: 3 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 3 finish episode, final reward: -171.5119, current episode: 4 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 4 finish episode, final reward: -147.5460, current episode: 5 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 5 finish episode, final reward: -241.4884, current episode: 6 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 6 finish episode, final reward: -245.2181, current episode: 7 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 7 finish episode, final reward: -215.1408, current episode: 8 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 8 finish episode, final reward: -191.0824, current episode: 9 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 9 finish episode, final reward: -252.8632, current episode: 10 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 10 finish episode, final reward: -311.9903, current episode: 11 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 11 finish episode, final reward: -229.4050, current episode: 12 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 12 finish episode, final reward: -271.4858, current episode: 13 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 13 finish episode, final reward: -230.0103, current episode: 14 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 14 finish episode, final reward: -183.1030, current episode: 15 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 15 finish episode, final reward: -198.0174, current episode: 16 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 16 finish episode, final reward: -274.5836, current episode: 17 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 17 finish episode, final reward: -195.5403, current episode: 18 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 18 finish episode, final reward: -133.5203, current episode: 19 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 19 finish episode, final reward: -211.6881, current episode: 20 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 20 finish episode, final reward: -234.6236, current episode: 21 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 21 finish episode, final reward: -208.9516, current episode: 22 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 22 finish episode, final reward: -231.4382, current episode: 23 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 23 finish episode, final reward: -182.6857, current episode: 24 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 24 finish episode, final reward: -196.2780, current episode: 25 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 25 finish episode, final reward: -312.3509, current episode: 26 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 26 finish episode, final reward: -151.8632, current episode: 27 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 27 finish episode, final reward: -219.8410, current episode: 28 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 28 finish episode, final reward: -178.2628, current episode: 29 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 29 finish episode, final reward: -211.1314, current episode: 30 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 30 finish episode, final reward: -172.8125, current episode: 31 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 31 finish episode, final reward: -291.9189, current episode: 32 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 32 finish episode, final reward: -204.8825, current episode: 33 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 33 finish episode, final reward: -197.3047, current episode: 34 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 34 finish episode, final reward: -195.1559, current episode: 35 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 35 finish episode, final reward: -224.7990, current episode: 36 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 36 finish episode, final reward: -173.5211, current episode: 37 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 37 finish episode, final reward: -293.5205, current episode: 38 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 38 finish episode, final reward: -195.1696, current episode: 39 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 39 finish episode, final reward: -253.1041, current episode: 40 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 40 finish episode, final reward: -209.0477, current episode: 41 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 41 finish episode, final reward: -169.9447, current episode: 42 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 42 finish episode, final reward: -178.6109, current episode: 43 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 43 finish episode, final reward: -234.1064, current episode: 44 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 44 finish episode, final reward: -258.3249, current episode: 45 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 45 finish episode, final reward: -180.0413, current episode: 46 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 46 finish episode, final reward: -178.9284, current episode: 47 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 47 finish episode, final reward: -200.4158, current episode: 48 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 48 finish episode, final reward: -259.0764, current episode: 49 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 49 finish episode, final reward: -191.4059, current episode: 50 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 50 finish episode, final reward: -278.2798, current episode: 51 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 51 finish episode, final reward: -248.5861, current episode: 52 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 52 finish episode, final reward: -230.0114, current episode: 53 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 53 finish episode, final reward: -239.5945, current episode: 54 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 54 finish episode, final reward: -217.9220, current episode: 55 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 55 finish episode, final reward: -203.7074, current episode: 56 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 56 finish episode, final reward: -221.8934, current episode: 57 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 57 finish episode, final reward: -162.8643, current episode: 58 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 58 finish episode, final reward: -183.6405, current episode: 59 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 59 finish episode, final reward: -182.5500, current episode: 60 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 60 finish episode, final reward: -203.6483, current episode: 61 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 61 finish episode, final reward: -225.4314, current episode: 62 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 62 finish episode, final reward: -207.0636, current episode: 63 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 63 finish episode, final reward: -161.8516, current episode: 64 +[2023-04-29 13:58:15][interaction_serial_evaluator.py:279][INFO] ++-------+------------+----------------------+---------------+---------------+ +| Name | train_iter | ckpt_name | episode_count | envstep_count | ++-------+------------+----------------------+---------------+---------------+ +| Value | 50.000000 | iteration_50.pth.tar | 64.000000 | 1600.000000 | ++-------+------------+----------------------+---------------+---------------+ ++-------+-------------------------+---------------+---------------------+----------------------+ +| Name | avg_envstep_per_episode | evaluate_time | avg_envstep_per_sec | avg_time_per_episode | ++-------+-------------------------+---------------+---------------------+----------------------+ +| Value | 25.000000 | 0.583314 | 2742.947930 | 109.717917 | ++-------+-------------------------+---------------+---------------------+----------------------+ ++-------+-------------+------------+-------------+-------------+ +| Name | reward_mean | reward_std | reward_max | reward_min | ++-------+-------------+------------+-------------+-------------+ +| Value | -212.980107 | 38.963933 | -133.520309 | -312.350922 | ++-------+-------------+------------+-------------+-------------+ + + +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 0 finish episode, final reward: -187.2281, current episode: 1 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 1 finish episode, final reward: -199.6840, current episode: 2 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 2 finish episode, final reward: -182.9556, current episode: 3 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 3 finish episode, final reward: -181.4420, current episode: 4 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 4 finish episode, final reward: -108.9465, current episode: 5 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 5 finish episode, final reward: -225.6067, current episode: 6 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 6 finish episode, final reward: -235.0110, current episode: 7 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 7 finish episode, final reward: -237.9777, current episode: 8 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 8 finish episode, final reward: -185.7329, current episode: 9 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 9 finish episode, final reward: -246.4638, current episode: 10 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 10 finish episode, final reward: -243.6110, current episode: 11 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 11 finish episode, final reward: -221.6359, current episode: 12 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 12 finish episode, final reward: -251.7029, current episode: 13 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 13 finish episode, final reward: -216.4750, current episode: 14 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 14 finish episode, final reward: -191.6212, current episode: 15 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 15 finish episode, final reward: -154.2946, current episode: 16 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 16 finish episode, final reward: -301.9589, current episode: 17 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 17 finish episode, final reward: -193.3316, current episode: 18 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 18 finish episode, final reward: -160.3873, current episode: 19 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 19 finish episode, final reward: -224.7507, current episode: 20 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 20 finish episode, final reward: -222.7320, current episode: 21 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 21 finish episode, final reward: -178.9668, current episode: 22 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 22 finish episode, final reward: -241.5874, current episode: 23 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 23 finish episode, final reward: -164.6184, current episode: 24 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 24 finish episode, final reward: -149.4929, current episode: 25 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 25 finish episode, final reward: -307.6077, current episode: 26 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 26 finish episode, final reward: -143.7532, current episode: 27 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 27 finish episode, final reward: -174.8819, current episode: 28 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 28 finish episode, final reward: -184.1815, current episode: 29 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 29 finish episode, final reward: -190.9121, current episode: 30 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 30 finish episode, final reward: -154.7546, current episode: 31 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 31 finish episode, final reward: -286.4962, current episode: 32 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 32 finish episode, final reward: -192.5477, current episode: 33 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 33 finish episode, final reward: -235.6304, current episode: 34 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 34 finish episode, final reward: -193.3696, current episode: 35 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 35 finish episode, final reward: -200.9325, current episode: 36 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 36 finish episode, final reward: -176.6496, current episode: 37 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 37 finish episode, final reward: -268.8667, current episode: 38 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 38 finish episode, final reward: -174.5385, current episode: 39 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 39 finish episode, final reward: -174.3552, current episode: 40 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 40 finish episode, final reward: -168.5679, current episode: 41 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 41 finish episode, final reward: -170.2487, current episode: 42 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 42 finish episode, final reward: -175.0631, current episode: 43 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 43 finish episode, final reward: -193.1332, current episode: 44 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 44 finish episode, final reward: -208.3241, current episode: 45 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 45 finish episode, final reward: -154.7536, current episode: 46 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 46 finish episode, final reward: -175.6315, current episode: 47 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 47 finish episode, final reward: -175.4105, current episode: 48 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 48 finish episode, final reward: -236.4451, current episode: 49 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 49 finish episode, final reward: -170.1877, current episode: 50 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 50 finish episode, final reward: -257.9345, current episode: 51 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 51 finish episode, final reward: -252.7052, current episode: 52 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 52 finish episode, final reward: -197.9507, current episode: 53 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 53 finish episode, final reward: -244.6079, current episode: 54 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 54 finish episode, final reward: -225.6282, current episode: 55 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 55 finish episode, final reward: -180.1023, current episode: 56 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 56 finish episode, final reward: -220.6841, current episode: 57 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 57 finish episode, final reward: -161.8015, current episode: 58 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 58 finish episode, final reward: -171.8993, current episode: 59 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 59 finish episode, final reward: -192.9080, current episode: 60 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 60 finish episode, final reward: -220.6704, current episode: 61 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 61 finish episode, final reward: -201.6148, current episode: 62 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 62 finish episode, final reward: -216.1802, current episode: 63 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 63 finish episode, final reward: -159.0747, current episode: 64 +[2023-04-29 14:00:25][interaction_serial_evaluator.py:279][INFO] ++-------+------------+-----------------------+---------------+---------------+ +| Name | train_iter | ckpt_name | episode_count | envstep_count | ++-------+------------+-----------------------+---------------+---------------+ +| Value | 100.000000 | iteration_100.pth.tar | 64.000000 | 1600.000000 | ++-------+------------+-----------------------+---------------+---------------+ ++-------+-------------------------+---------------+---------------------+----------------------+ +| Name | avg_envstep_per_episode | evaluate_time | avg_envstep_per_sec | avg_time_per_episode | ++-------+-------------------------+---------------+---------------------+----------------------+ +| Value | 25.000000 | 0.529882 | 3019.538004 | 120.781520 | ++-------+-------------------------+---------------+---------------------+----------------------+ ++-------+-------------+------------+-------------+-------------+ +| Name | reward_mean | reward_std | reward_max | reward_min | ++-------+-------------+------------+-------------+-------------+ +| Value | -201.550300 | 38.882103 | -108.946518 | -307.607697 | ++-------+-------------+------------+-------------+-------------+ + + +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 0 finish episode, final reward: -205.2282, current episode: 1 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 1 finish episode, final reward: -192.0812, current episode: 2 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 2 finish episode, final reward: -182.9556, current episode: 3 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 3 finish episode, final reward: -172.0809, current episode: 4 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 4 finish episode, final reward: -115.9716, current episode: 5 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 5 finish episode, final reward: -238.5279, current episode: 6 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 6 finish episode, final reward: -221.8655, current episode: 7 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 7 finish episode, final reward: -206.7530, current episode: 8 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 8 finish episode, final reward: -178.2358, current episode: 9 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 9 finish episode, final reward: -218.7804, current episode: 10 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 10 finish episode, final reward: -262.7650, current episode: 11 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 11 finish episode, final reward: -215.1558, current episode: 12 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 12 finish episode, final reward: -262.4443, current episode: 13 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 13 finish episode, final reward: -191.0844, current episode: 14 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 14 finish episode, final reward: -168.1061, current episode: 15 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 15 finish episode, final reward: -156.3011, current episode: 16 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 16 finish episode, final reward: -265.4622, current episode: 17 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 17 finish episode, final reward: -202.0071, current episode: 18 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 18 finish episode, final reward: -144.1121, current episode: 19 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 19 finish episode, final reward: -214.5759, current episode: 20 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 20 finish episode, final reward: -221.9673, current episode: 21 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 21 finish episode, final reward: -188.0651, current episode: 22 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 22 finish episode, final reward: -223.0292, current episode: 23 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 23 finish episode, final reward: -148.8257, current episode: 24 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 24 finish episode, final reward: -142.1748, current episode: 25 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 25 finish episode, final reward: -312.3340, current episode: 26 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 26 finish episode, final reward: -143.9546, current episode: 27 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 27 finish episode, final reward: -172.2048, current episode: 28 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 28 finish episode, final reward: -187.7635, current episode: 29 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 29 finish episode, final reward: -199.0601, current episode: 30 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 30 finish episode, final reward: -164.9139, current episode: 31 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 31 finish episode, final reward: -283.4005, current episode: 32 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 32 finish episode, final reward: -194.1550, current episode: 33 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 33 finish episode, final reward: -200.3256, current episode: 34 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 34 finish episode, final reward: -184.5029, current episode: 35 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 35 finish episode, final reward: -226.7240, current episode: 36 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 36 finish episode, final reward: -165.3591, current episode: 37 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 37 finish episode, final reward: -258.7562, current episode: 38 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 38 finish episode, final reward: -176.6937, current episode: 39 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 39 finish episode, final reward: -180.9489, current episode: 40 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 40 finish episode, final reward: -176.1530, current episode: 41 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 41 finish episode, final reward: -168.3628, current episode: 42 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 42 finish episode, final reward: -176.4343, current episode: 43 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 43 finish episode, final reward: -201.8598, current episode: 44 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 44 finish episode, final reward: -212.7103, current episode: 45 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 45 finish episode, final reward: -162.2741, current episode: 46 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 46 finish episode, final reward: -184.3275, current episode: 47 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 47 finish episode, final reward: -174.2825, current episode: 48 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 48 finish episode, final reward: -218.5967, current episode: 49 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 49 finish episode, final reward: -157.1208, current episode: 50 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 50 finish episode, final reward: -227.8741, current episode: 51 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 51 finish episode, final reward: -227.1817, current episode: 52 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 52 finish episode, final reward: -184.0312, current episode: 53 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 53 finish episode, final reward: -253.6058, current episode: 54 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 54 finish episode, final reward: -222.2701, current episode: 55 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 55 finish episode, final reward: -181.2547, current episode: 56 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 56 finish episode, final reward: -191.4113, current episode: 57 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 57 finish episode, final reward: -162.4414, current episode: 58 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 58 finish episode, final reward: -152.1785, current episode: 59 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 59 finish episode, final reward: -192.9080, current episode: 60 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 60 finish episode, final reward: -210.6114, current episode: 61 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 61 finish episode, final reward: -204.8181, current episode: 62 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 62 finish episode, final reward: -182.6209, current episode: 63 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 63 finish episode, final reward: -146.5314, current episode: 64 +[2023-04-29 14:02:53][interaction_serial_evaluator.py:279][INFO] ++-------+------------+-----------------------+---------------+---------------+ +| Name | train_iter | ckpt_name | episode_count | envstep_count | ++-------+------------+-----------------------+---------------+---------------+ +| Value | 150.000000 | iteration_150.pth.tar | 64.000000 | 1600.000000 | ++-------+------------+-----------------------+---------------+---------------+ ++-------+-------------------------+---------------+---------------------+----------------------+ +| Name | avg_envstep_per_episode | evaluate_time | avg_envstep_per_sec | avg_time_per_episode | ++-------+-------------------------+---------------+---------------------+----------------------+ +| Value | 25.000000 | 0.701749 | 2280.018613 | 91.200745 | ++-------+-------------------------+---------------+---------------------+----------------------+ ++-------+-------------+------------+-------------+-------------+ +| Name | reward_mean | reward_std | reward_max | reward_min | ++-------+-------------+------------+-------------+-------------+ +| Value | -196.273648 | 36.830724 | -115.971642 | -312.333984 | ++-------+-------------+------------+-------------+-------------+ + + +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 0 finish episode, final reward: -205.2282, current episode: 1 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 1 finish episode, final reward: -217.6694, current episode: 2 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 2 finish episode, final reward: -182.9556, current episode: 3 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 3 finish episode, final reward: -179.5034, current episode: 4 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 4 finish episode, final reward: -110.1182, current episode: 5 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 5 finish episode, final reward: -236.2090, current episode: 6 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 6 finish episode, final reward: -229.9970, current episode: 7 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 7 finish episode, final reward: -202.0045, current episode: 8 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 8 finish episode, final reward: -180.8807, current episode: 9 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 9 finish episode, final reward: -223.3144, current episode: 10 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 10 finish episode, final reward: -227.0542, current episode: 11 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 11 finish episode, final reward: -223.5144, current episode: 12 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 12 finish episode, final reward: -248.6731, current episode: 13 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 13 finish episode, final reward: -202.0044, current episode: 14 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 14 finish episode, final reward: -163.6766, current episode: 15 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 15 finish episode, final reward: -158.7447, current episode: 16 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 16 finish episode, final reward: -246.4953, current episode: 17 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 17 finish episode, final reward: -200.5168, current episode: 18 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 18 finish episode, final reward: -150.2568, current episode: 19 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 19 finish episode, final reward: -214.5759, current episode: 20 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 20 finish episode, final reward: -216.0586, current episode: 21 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 21 finish episode, final reward: -205.5844, current episode: 22 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 22 finish episode, final reward: -226.5573, current episode: 23 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 23 finish episode, final reward: -162.8544, current episode: 24 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 24 finish episode, final reward: -139.4200, current episode: 25 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 25 finish episode, final reward: -284.2111, current episode: 26 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 26 finish episode, final reward: -141.8662, current episode: 27 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 27 finish episode, final reward: -169.2080, current episode: 28 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 28 finish episode, final reward: -181.3638, current episode: 29 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 29 finish episode, final reward: -200.0945, current episode: 30 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 30 finish episode, final reward: -175.1742, current episode: 31 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 31 finish episode, final reward: -282.3666, current episode: 32 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 32 finish episode, final reward: -190.1109, current episode: 33 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 33 finish episode, final reward: -217.4866, current episode: 34 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 34 finish episode, final reward: -194.4346, current episode: 35 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 35 finish episode, final reward: -221.9621, current episode: 36 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 36 finish episode, final reward: -176.6436, current episode: 37 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 37 finish episode, final reward: -254.5643, current episode: 38 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 38 finish episode, final reward: -170.8189, current episode: 39 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 39 finish episode, final reward: -193.0464, current episode: 40 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 40 finish episode, final reward: -158.5323, current episode: 41 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 41 finish episode, final reward: -173.6675, current episode: 42 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 42 finish episode, final reward: -178.2368, current episode: 43 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 43 finish episode, final reward: -201.8598, current episode: 44 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 44 finish episode, final reward: -202.4931, current episode: 45 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 45 finish episode, final reward: -155.4929, current episode: 46 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 46 finish episode, final reward: -189.4277, current episode: 47 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 47 finish episode, final reward: -174.4784, current episode: 48 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 48 finish episode, final reward: -207.0948, current episode: 49 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 49 finish episode, final reward: -142.2500, current episode: 50 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 50 finish episode, final reward: -243.2396, current episode: 51 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 51 finish episode, final reward: -218.6741, current episode: 52 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 52 finish episode, final reward: -184.0312, current episode: 53 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 53 finish episode, final reward: -249.6307, current episode: 54 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 54 finish episode, final reward: -219.7228, current episode: 55 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 55 finish episode, final reward: -178.2935, current episode: 56 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 56 finish episode, final reward: -191.0775, current episode: 57 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 57 finish episode, final reward: -160.3608, current episode: 58 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 58 finish episode, final reward: -145.1126, current episode: 59 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 59 finish episode, final reward: -187.0939, current episode: 60 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 60 finish episode, final reward: -228.8836, current episode: 61 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 61 finish episode, final reward: -192.1092, current episode: 62 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 62 finish episode, final reward: -175.6019, current episode: 63 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 63 finish episode, final reward: -146.5314, current episode: 64 +[2023-04-29 14:05:22][interaction_serial_evaluator.py:279][INFO] ++-------+------------+-----------------------+---------------+---------------+ +| Name | train_iter | ckpt_name | episode_count | envstep_count | ++-------+------------+-----------------------+---------------+---------------+ +| Value | 200.000000 | iteration_200.pth.tar | 64.000000 | 1600.000000 | ++-------+------------+-----------------------+---------------+---------------+ ++-------+-------------------------+---------------+---------------------+----------------------+ +| Name | avg_envstep_per_episode | evaluate_time | avg_envstep_per_sec | avg_time_per_episode | ++-------+-------------------------+---------------+---------------------+----------------------+ +| Value | 25.000000 | 0.695219 | 2301.431477 | 92.057259 | ++-------+-------------------------+---------------+---------------------+----------------------+ ++-------+-------------+------------+-------------+-------------+ +| Name | reward_mean | reward_std | reward_max | reward_min | ++-------+-------------+------------+-------------+-------------+ +| Value | -195.486174 | 34.693106 | -110.118217 | -284.211060 | ++-------+-------------+------------+-------------+-------------+ + + +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 0 finish episode, final reward: -181.0803, current episode: 1 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 1 finish episode, final reward: -211.1574, current episode: 2 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 2 finish episode, final reward: -174.7337, current episode: 3 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 3 finish episode, final reward: -185.1850, current episode: 4 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 4 finish episode, final reward: -114.7999, current episode: 5 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 5 finish episode, final reward: -234.3418, current episode: 6 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 6 finish episode, final reward: -235.2930, current episode: 7 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 7 finish episode, final reward: -232.8680, current episode: 8 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 8 finish episode, final reward: -199.8975, current episode: 9 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 9 finish episode, final reward: -252.4601, current episode: 10 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 10 finish episode, final reward: -235.9426, current episode: 11 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 11 finish episode, final reward: -200.9567, current episode: 12 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 12 finish episode, final reward: -252.9538, current episode: 13 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 13 finish episode, final reward: -212.0292, current episode: 14 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 14 finish episode, final reward: -208.3789, current episode: 15 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 15 finish episode, final reward: -173.4365, current episode: 16 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 16 finish episode, final reward: -230.4334, current episode: 17 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 17 finish episode, final reward: -200.0923, current episode: 18 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 18 finish episode, final reward: -175.2290, current episode: 19 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 19 finish episode, final reward: -231.1222, current episode: 20 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 20 finish episode, final reward: -265.4702, current episode: 21 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 21 finish episode, final reward: -198.4274, current episode: 22 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 22 finish episode, final reward: -229.0852, current episode: 23 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 23 finish episode, final reward: -146.0107, current episode: 24 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 24 finish episode, final reward: -143.3525, current episode: 25 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 25 finish episode, final reward: -301.3060, current episode: 26 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 26 finish episode, final reward: -142.6440, current episode: 27 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 27 finish episode, final reward: -189.2390, current episode: 28 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 28 finish episode, final reward: -174.8356, current episode: 29 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 29 finish episode, final reward: -175.4751, current episode: 30 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 30 finish episode, final reward: -163.0487, current episode: 31 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 31 finish episode, final reward: -271.3767, current episode: 32 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 32 finish episode, final reward: -184.4469, current episode: 33 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 33 finish episode, final reward: -236.3716, current episode: 34 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 34 finish episode, final reward: -196.1071, current episode: 35 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 35 finish episode, final reward: -207.4573, current episode: 36 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 36 finish episode, final reward: -181.9621, current episode: 37 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 37 finish episode, final reward: -277.5360, current episode: 38 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 38 finish episode, final reward: -170.5597, current episode: 39 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 39 finish episode, final reward: -159.5184, current episode: 40 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 40 finish episode, final reward: -158.8667, current episode: 41 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 41 finish episode, final reward: -154.5679, current episode: 42 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 42 finish episode, final reward: -173.9567, current episode: 43 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 43 finish episode, final reward: -200.4992, current episode: 44 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 44 finish episode, final reward: -186.6675, current episode: 45 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 45 finish episode, final reward: -153.3205, current episode: 46 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 46 finish episode, final reward: -191.4474, current episode: 47 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 47 finish episode, final reward: -211.9339, current episode: 48 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 48 finish episode, final reward: -218.4199, current episode: 49 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 49 finish episode, final reward: -154.4454, current episode: 50 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 50 finish episode, final reward: -242.3426, current episode: 51 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 51 finish episode, final reward: -175.9896, current episode: 52 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 52 finish episode, final reward: -201.2840, current episode: 53 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 53 finish episode, final reward: -250.0280, current episode: 54 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 54 finish episode, final reward: -221.8441, current episode: 55 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 55 finish episode, final reward: -177.5594, current episode: 56 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 56 finish episode, final reward: -199.7704, current episode: 57 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 57 finish episode, final reward: -179.3853, current episode: 58 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 58 finish episode, final reward: -141.8520, current episode: 59 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 59 finish episode, final reward: -210.8811, current episode: 60 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 60 finish episode, final reward: -249.4441, current episode: 61 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 61 finish episode, final reward: -202.9632, current episode: 62 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 62 finish episode, final reward: -209.0303, current episode: 63 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 63 finish episode, final reward: -161.2344, current episode: 64 +[2023-04-29 14:07:49][interaction_serial_evaluator.py:279][INFO] ++-------+------------+-----------------------+---------------+---------------+ +| Name | train_iter | ckpt_name | episode_count | envstep_count | ++-------+------------+-----------------------+---------------+---------------+ +| Value | 250.000000 | iteration_250.pth.tar | 64.000000 | 1600.000000 | ++-------+------------+-----------------------+---------------+---------------+ ++-------+-------------------------+---------------+---------------------+----------------------+ +| Name | avg_envstep_per_episode | evaluate_time | avg_envstep_per_sec | avg_time_per_episode | ++-------+-------------------------+---------------+---------------------+----------------------+ +| Value | 25.000000 | 0.770248 | 2077.252914 | 83.090117 | ++-------+-------------------------+---------------+---------------------+----------------------+ ++-------+-------------+------------+-------------+-------------+ +| Name | reward_mean | reward_std | reward_max | reward_min | ++-------+-------------+------------+-------------+-------------+ +| Value | -199.755544 | 37.231754 | -114.799942 | -301.306000 | ++-------+-------------+------------+-------------+-------------+ + + +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 0 finish episode, final reward: -196.4215, current episode: 1 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 1 finish episode, final reward: -194.9277, current episode: 2 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 2 finish episode, final reward: -166.3999, current episode: 3 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 3 finish episode, final reward: -186.0863, current episode: 4 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 4 finish episode, final reward: -114.7999, current episode: 5 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 5 finish episode, final reward: -244.3763, current episode: 6 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 6 finish episode, final reward: -233.5229, current episode: 7 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 7 finish episode, final reward: -228.2204, current episode: 8 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 8 finish episode, final reward: -192.5429, current episode: 9 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 9 finish episode, final reward: -254.7029, current episode: 10 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 10 finish episode, final reward: -228.8231, current episode: 11 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 11 finish episode, final reward: -214.8451, current episode: 12 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 12 finish episode, final reward: -252.1669, current episode: 13 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 13 finish episode, final reward: -220.4095, current episode: 14 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 14 finish episode, final reward: -204.2755, current episode: 15 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 15 finish episode, final reward: -177.7030, current episode: 16 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 16 finish episode, final reward: -225.0703, current episode: 17 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 17 finish episode, final reward: -219.4565, current episode: 18 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 18 finish episode, final reward: -170.5495, current episode: 19 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 19 finish episode, final reward: -236.1740, current episode: 20 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 20 finish episode, final reward: -267.4284, current episode: 21 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 21 finish episode, final reward: -198.7756, current episode: 22 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 22 finish episode, final reward: -230.4781, current episode: 23 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 23 finish episode, final reward: -138.3619, current episode: 24 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 24 finish episode, final reward: -150.6696, current episode: 25 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 25 finish episode, final reward: -253.2410, current episode: 26 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 26 finish episode, final reward: -131.1156, current episode: 27 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 27 finish episode, final reward: -185.4775, current episode: 28 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 28 finish episode, final reward: -171.5875, current episode: 29 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 29 finish episode, final reward: -206.1801, current episode: 30 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 30 finish episode, final reward: -175.3547, current episode: 31 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 31 finish episode, final reward: -270.1190, current episode: 32 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 32 finish episode, final reward: -180.9219, current episode: 33 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 33 finish episode, final reward: -249.1457, current episode: 34 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 34 finish episode, final reward: -194.3970, current episode: 35 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 35 finish episode, final reward: -205.9067, current episode: 36 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 36 finish episode, final reward: -174.3744, current episode: 37 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 37 finish episode, final reward: -244.8342, current episode: 38 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 38 finish episode, final reward: -190.4943, current episode: 39 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 39 finish episode, final reward: -159.5184, current episode: 40 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 40 finish episode, final reward: -159.0378, current episode: 41 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 41 finish episode, final reward: -148.0535, current episode: 42 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 42 finish episode, final reward: -169.2331, current episode: 43 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 43 finish episode, final reward: -193.5003, current episode: 44 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 44 finish episode, final reward: -201.1036, current episode: 45 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 45 finish episode, final reward: -160.1663, current episode: 46 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 46 finish episode, final reward: -175.3031, current episode: 47 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 47 finish episode, final reward: -201.1599, current episode: 48 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 48 finish episode, final reward: -227.3560, current episode: 49 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 49 finish episode, final reward: -171.2852, current episode: 50 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 50 finish episode, final reward: -274.9165, current episode: 51 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 51 finish episode, final reward: -173.4787, current episode: 52 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 52 finish episode, final reward: -213.6666, current episode: 53 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 53 finish episode, final reward: -239.6660, current episode: 54 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 54 finish episode, final reward: -222.7764, current episode: 55 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 55 finish episode, final reward: -164.2889, current episode: 56 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 56 finish episode, final reward: -217.4065, current episode: 57 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 57 finish episode, final reward: -163.0690, current episode: 58 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 58 finish episode, final reward: -146.5544, current episode: 59 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 59 finish episode, final reward: -210.9312, current episode: 60 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 60 finish episode, final reward: -230.2037, current episode: 61 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 61 finish episode, final reward: -197.2699, current episode: 62 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 62 finish episode, final reward: -211.1949, current episode: 63 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 63 finish episode, final reward: -172.6543, current episode: 64 +[2023-04-29 14:09:47][interaction_serial_evaluator.py:279][INFO] ++-------+------------+-----------------------+---------------+---------------+ +| Name | train_iter | ckpt_name | episode_count | envstep_count | ++-------+------------+-----------------------+---------------+---------------+ +| Value | 300.000000 | iteration_300.pth.tar | 64.000000 | 1600.000000 | ++-------+------------+-----------------------+---------------+---------------+ ++-------+-------------------------+---------------+---------------------+----------------------+ +| Name | avg_envstep_per_episode | evaluate_time | avg_envstep_per_sec | avg_time_per_episode | ++-------+-------------------------+---------------+---------------------+----------------------+ +| Value | 25.000000 | 0.839486 | 1905.928099 | 76.237124 | ++-------+-------------------------+---------------+---------------------+----------------------+ ++-------+-------------+------------+-------------+-------------+ +| Name | reward_mean | reward_std | reward_max | reward_min | ++-------+-------------+------------+-------------+-------------+ +| Value | -199.752056 | 35.793557 | -114.799942 | -274.916504 | ++-------+-------------+------------+-------------+-------------+ + + +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 0 finish episode, final reward: -202.9319, current episode: 1 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 1 finish episode, final reward: -182.6266, current episode: 2 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 2 finish episode, final reward: -169.8902, current episode: 3 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 3 finish episode, final reward: -202.0789, current episode: 4 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 4 finish episode, final reward: -114.7999, current episode: 5 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 5 finish episode, final reward: -227.6967, current episode: 6 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 6 finish episode, final reward: -221.4775, current episode: 7 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 7 finish episode, final reward: -223.6498, current episode: 8 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 8 finish episode, final reward: -177.1600, current episode: 9 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 9 finish episode, final reward: -219.4634, current episode: 10 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 10 finish episode, final reward: -230.9501, current episode: 11 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 11 finish episode, final reward: -217.2374, current episode: 12 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 12 finish episode, final reward: -258.2667, current episode: 13 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 13 finish episode, final reward: -226.9432, current episode: 14 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 14 finish episode, final reward: -182.1154, current episode: 15 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 15 finish episode, final reward: -173.2038, current episode: 16 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 16 finish episode, final reward: -199.7006, current episode: 17 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 17 finish episode, final reward: -202.5596, current episode: 18 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 18 finish episode, final reward: -162.0479, current episode: 19 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 19 finish episode, final reward: -230.2970, current episode: 20 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 20 finish episode, final reward: -246.7662, current episode: 21 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 21 finish episode, final reward: -204.9069, current episode: 22 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 22 finish episode, final reward: -210.2217, current episode: 23 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 23 finish episode, final reward: -146.3872, current episode: 24 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 24 finish episode, final reward: -150.9197, current episode: 25 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 25 finish episode, final reward: -278.0017, current episode: 26 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 26 finish episode, final reward: -140.6923, current episode: 27 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 27 finish episode, final reward: -185.9354, current episode: 28 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 28 finish episode, final reward: -172.9278, current episode: 29 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 29 finish episode, final reward: -188.0118, current episode: 30 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 30 finish episode, final reward: -167.6129, current episode: 31 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 31 finish episode, final reward: -272.8329, current episode: 32 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 32 finish episode, final reward: -186.5162, current episode: 33 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 33 finish episode, final reward: -219.7244, current episode: 34 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 34 finish episode, final reward: -173.7483, current episode: 35 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 35 finish episode, final reward: -209.3400, current episode: 36 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 36 finish episode, final reward: -175.3538, current episode: 37 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 37 finish episode, final reward: -267.9726, current episode: 38 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 38 finish episode, final reward: -174.4784, current episode: 39 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 39 finish episode, final reward: -155.0486, current episode: 40 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 40 finish episode, final reward: -175.4787, current episode: 41 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 41 finish episode, final reward: -161.0601, current episode: 42 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 42 finish episode, final reward: -175.2188, current episode: 43 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 43 finish episode, final reward: -155.3845, current episode: 44 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 44 finish episode, final reward: -207.1568, current episode: 45 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 45 finish episode, final reward: -161.5135, current episode: 46 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 46 finish episode, final reward: -191.3083, current episode: 47 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 47 finish episode, final reward: -192.4593, current episode: 48 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 48 finish episode, final reward: -200.6798, current episode: 49 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 49 finish episode, final reward: -172.0034, current episode: 50 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 50 finish episode, final reward: -234.5556, current episode: 51 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 51 finish episode, final reward: -162.7853, current episode: 52 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 52 finish episode, final reward: -202.0614, current episode: 53 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 53 finish episode, final reward: -235.5857, current episode: 54 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 54 finish episode, final reward: -233.3586, current episode: 55 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 55 finish episode, final reward: -151.8300, current episode: 56 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 56 finish episode, final reward: -201.7011, current episode: 57 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 57 finish episode, final reward: -180.2999, current episode: 58 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 58 finish episode, final reward: -158.6285, current episode: 59 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 59 finish episode, final reward: -196.7257, current episode: 60 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 60 finish episode, final reward: -217.7570, current episode: 61 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 61 finish episode, final reward: -186.2294, current episode: 62 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 62 finish episode, final reward: -192.5976, current episode: 63 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 63 finish episode, final reward: -153.9682, current episode: 64 +[2023-04-29 14:11:35][interaction_serial_evaluator.py:279][INFO] ++-------+------------+-----------------------+---------------+---------------+ +| Name | train_iter | ckpt_name | episode_count | envstep_count | ++-------+------------+-----------------------+---------------+---------------+ +| Value | 350.000000 | iteration_350.pth.tar | 64.000000 | 1600.000000 | ++-------+------------+-----------------------+---------------+---------------+ ++-------+-------------------------+---------------+---------------------+----------------------+ +| Name | avg_envstep_per_episode | evaluate_time | avg_envstep_per_sec | avg_time_per_episode | ++-------+-------------------------+---------------+---------------------+----------------------+ +| Value | 25.000000 | 1.048725 | 1525.662149 | 61.026486 | ++-------+-------------------------+---------------+---------------------+----------------------+ ++-------+-------------+------------+-------------+-------------+ +| Name | reward_mean | reward_std | reward_max | reward_min | ++-------+-------------+------------+-------------+-------------+ +| Value | -194.575672 | 33.350840 | -114.799942 | -278.001678 | ++-------+-------------+------------+-------------+-------------+ + + +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 0 finish episode, final reward: -207.9027, current episode: 1 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 1 finish episode, final reward: -167.0500, current episode: 2 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 2 finish episode, final reward: -161.1096, current episode: 3 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 3 finish episode, final reward: -195.3309, current episode: 4 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 4 finish episode, final reward: -108.9465, current episode: 5 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 5 finish episode, final reward: -251.0442, current episode: 6 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 6 finish episode, final reward: -221.4775, current episode: 7 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 7 finish episode, final reward: -222.6647, current episode: 8 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 8 finish episode, final reward: -181.6731, current episode: 9 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 9 finish episode, final reward: -252.6070, current episode: 10 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 10 finish episode, final reward: -243.5552, current episode: 11 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 11 finish episode, final reward: -217.1063, current episode: 12 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 12 finish episode, final reward: -266.8352, current episode: 13 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 13 finish episode, final reward: -233.1039, current episode: 14 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 14 finish episode, final reward: -153.7867, current episode: 15 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 15 finish episode, final reward: -174.6716, current episode: 16 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 16 finish episode, final reward: -224.1025, current episode: 17 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 17 finish episode, final reward: -205.1042, current episode: 18 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 18 finish episode, final reward: -161.2413, current episode: 19 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 19 finish episode, final reward: -230.4367, current episode: 20 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 20 finish episode, final reward: -264.6400, current episode: 21 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 21 finish episode, final reward: -205.7796, current episode: 22 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 22 finish episode, final reward: -211.4018, current episode: 23 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 23 finish episode, final reward: -142.5181, current episode: 24 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 24 finish episode, final reward: -154.2594, current episode: 25 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 25 finish episode, final reward: -276.6152, current episode: 26 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 26 finish episode, final reward: -136.0169, current episode: 27 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 27 finish episode, final reward: -197.1915, current episode: 28 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 28 finish episode, final reward: -191.9259, current episode: 29 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 29 finish episode, final reward: -180.6680, current episode: 30 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 30 finish episode, final reward: -155.9025, current episode: 31 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 31 finish episode, final reward: -271.3767, current episode: 32 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 32 finish episode, final reward: -185.1744, current episode: 33 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 33 finish episode, final reward: -211.9648, current episode: 34 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 34 finish episode, final reward: -173.0769, current episode: 35 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 35 finish episode, final reward: -202.1489, current episode: 36 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 36 finish episode, final reward: -174.3241, current episode: 37 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 37 finish episode, final reward: -247.9803, current episode: 38 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 38 finish episode, final reward: -190.2438, current episode: 39 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 39 finish episode, final reward: -163.0181, current episode: 40 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 40 finish episode, final reward: -179.8732, current episode: 41 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 41 finish episode, final reward: -162.0468, current episode: 42 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 42 finish episode, final reward: -175.4727, current episode: 43 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 43 finish episode, final reward: -185.2690, current episode: 44 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 44 finish episode, final reward: -202.9673, current episode: 45 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 45 finish episode, final reward: -158.0955, current episode: 46 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 46 finish episode, final reward: -179.0794, current episode: 47 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 47 finish episode, final reward: -204.2491, current episode: 48 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 48 finish episode, final reward: -214.4522, current episode: 49 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 49 finish episode, final reward: -169.9330, current episode: 50 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 50 finish episode, final reward: -257.0569, current episode: 51 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 51 finish episode, final reward: -177.8266, current episode: 52 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 52 finish episode, final reward: -209.3965, current episode: 53 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 53 finish episode, final reward: -236.9369, current episode: 54 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 54 finish episode, final reward: -235.9294, current episode: 55 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 55 finish episode, final reward: -154.1952, current episode: 56 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 56 finish episode, final reward: -204.6221, current episode: 57 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 57 finish episode, final reward: -165.4405, current episode: 58 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 58 finish episode, final reward: -158.7669, current episode: 59 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 59 finish episode, final reward: -214.9565, current episode: 60 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 60 finish episode, final reward: -209.8327, current episode: 61 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 61 finish episode, final reward: -199.7447, current episode: 62 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 62 finish episode, final reward: -192.3090, current episode: 63 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 63 finish episode, final reward: -146.0844, current episode: 64 +[2023-04-29 14:13:57][interaction_serial_evaluator.py:279][INFO] ++-------+------------+-----------------------+---------------+---------------+ +| Name | train_iter | ckpt_name | episode_count | envstep_count | ++-------+------------+-----------------------+---------------+---------------+ +| Value | 400.000000 | iteration_400.pth.tar | 64.000000 | 1600.000000 | ++-------+------------+-----------------------+---------------+---------------+ ++-------+-------------------------+---------------+---------------------+----------------------+ +| Name | avg_envstep_per_episode | evaluate_time | avg_envstep_per_sec | avg_time_per_episode | ++-------+-------------------------+---------------+---------------------+----------------------+ +| Value | 25.000000 | 0.817595 | 1956.959492 | 78.278380 | ++-------+-------------------------+---------------+---------------------+----------------------+ ++-------+-------------+------------+-------------+-------------+ +| Name | reward_mean | reward_std | reward_max | reward_min | ++-------+-------------+------------+-------------+-------------+ +| Value | -197.039270 | 36.261736 | -108.946518 | -276.615173 | ++-------+-------------+------------+-------------+-------------+ + + +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 0 finish episode, final reward: -205.2282, current episode: 1 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 1 finish episode, final reward: -176.4776, current episode: 2 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 2 finish episode, final reward: -164.9409, current episode: 3 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 3 finish episode, final reward: -198.3102, current episode: 4 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 4 finish episode, final reward: -108.9465, current episode: 5 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 5 finish episode, final reward: -232.8472, current episode: 6 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 6 finish episode, final reward: -218.1895, current episode: 7 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 7 finish episode, final reward: -220.5849, current episode: 8 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 8 finish episode, final reward: -174.8468, current episode: 9 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 9 finish episode, final reward: -234.9934, current episode: 10 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 10 finish episode, final reward: -220.1860, current episode: 11 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 11 finish episode, final reward: -212.8183, current episode: 12 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 12 finish episode, final reward: -260.5780, current episode: 13 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 13 finish episode, final reward: -213.4720, current episode: 14 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 14 finish episode, final reward: -156.3100, current episode: 15 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 15 finish episode, final reward: -167.0897, current episode: 16 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 16 finish episode, final reward: -204.2159, current episode: 17 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 17 finish episode, final reward: -202.5596, current episode: 18 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 18 finish episode, final reward: -160.2516, current episode: 19 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 19 finish episode, final reward: -230.2969, current episode: 20 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 20 finish episode, final reward: -225.2017, current episode: 21 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 21 finish episode, final reward: -199.4241, current episode: 22 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 22 finish episode, final reward: -213.0997, current episode: 23 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 23 finish episode, final reward: -150.8300, current episode: 24 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 24 finish episode, final reward: -164.9789, current episode: 25 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 25 finish episode, final reward: -262.2566, current episode: 26 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 26 finish episode, final reward: -124.2999, current episode: 27 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 27 finish episode, final reward: -190.7909, current episode: 28 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 28 finish episode, final reward: -167.2646, current episode: 29 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 29 finish episode, final reward: -168.4082, current episode: 30 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 30 finish episode, final reward: -141.0441, current episode: 31 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 31 finish episode, final reward: -271.0081, current episode: 32 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 32 finish episode, final reward: -197.5149, current episode: 33 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 33 finish episode, final reward: -211.6372, current episode: 34 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 34 finish episode, final reward: -160.5972, current episode: 35 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 35 finish episode, final reward: -214.4167, current episode: 36 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 36 finish episode, final reward: -168.6259, current episode: 37 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 37 finish episode, final reward: -232.5483, current episode: 38 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 38 finish episode, final reward: -192.0791, current episode: 39 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 39 finish episode, final reward: -159.5184, current episode: 40 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 40 finish episode, final reward: -175.5373, current episode: 41 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 41 finish episode, final reward: -164.4377, current episode: 42 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 42 finish episode, final reward: -185.6742, current episode: 43 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 43 finish episode, final reward: -178.3931, current episode: 44 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 44 finish episode, final reward: -198.0340, current episode: 45 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 45 finish episode, final reward: -152.8577, current episode: 46 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 46 finish episode, final reward: -172.7739, current episode: 47 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 47 finish episode, final reward: -206.3049, current episode: 48 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 48 finish episode, final reward: -210.0471, current episode: 49 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 49 finish episode, final reward: -177.2365, current episode: 50 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 50 finish episode, final reward: -229.9539, current episode: 51 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 51 finish episode, final reward: -164.6633, current episode: 52 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 52 finish episode, final reward: -205.6198, current episode: 53 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 53 finish episode, final reward: -222.4246, current episode: 54 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 54 finish episode, final reward: -237.4438, current episode: 55 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 55 finish episode, final reward: -151.7998, current episode: 56 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 56 finish episode, final reward: -220.6093, current episode: 57 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 57 finish episode, final reward: -166.6118, current episode: 58 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 58 finish episode, final reward: -160.8607, current episode: 59 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 59 finish episode, final reward: -214.2297, current episode: 60 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 60 finish episode, final reward: -192.0380, current episode: 61 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 61 finish episode, final reward: -188.3553, current episode: 62 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 62 finish episode, final reward: -182.5524, current episode: 63 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 63 finish episode, final reward: -147.9032, current episode: 64 +[2023-04-29 14:16:28][interaction_serial_evaluator.py:279][INFO] ++-------+------------+-----------------------+---------------+---------------+ +| Name | train_iter | ckpt_name | episode_count | envstep_count | ++-------+------------+-----------------------+---------------+---------------+ +| Value | 450.000000 | iteration_450.pth.tar | 64.000000 | 1600.000000 | ++-------+------------+-----------------------+---------------+---------------+ ++-------+-------------------------+---------------+---------------------+----------------------+ +| Name | avg_envstep_per_episode | evaluate_time | avg_envstep_per_sec | avg_time_per_episode | ++-------+-------------------------+---------------+---------------------+----------------------+ +| Value | 25.000000 | 0.745105 | 2147.347009 | 85.893880 | ++-------+-------------------------+---------------+---------------------+----------------------+ ++-------+-------------+------------+-------------+-------------+ +| Name | reward_mean | reward_std | reward_max | reward_min | ++-------+-------------+------------+-------------+-------------+ +| Value | -191.953906 | 32.840458 | -108.946518 | -271.008148 | ++-------+-------------+------------+-------------+-------------+ + + +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 0 finish episode, final reward: -205.2282, current episode: 1 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 1 finish episode, final reward: -173.9875, current episode: 2 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 2 finish episode, final reward: -152.4852, current episode: 3 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 3 finish episode, final reward: -185.2876, current episode: 4 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 4 finish episode, final reward: -105.5731, current episode: 5 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 5 finish episode, final reward: -234.1571, current episode: 6 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 6 finish episode, final reward: -218.0876, current episode: 7 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 7 finish episode, final reward: -216.3839, current episode: 8 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 8 finish episode, final reward: -169.6425, current episode: 9 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 9 finish episode, final reward: -234.0063, current episode: 10 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 10 finish episode, final reward: -225.1490, current episode: 11 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 11 finish episode, final reward: -212.3294, current episode: 12 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 12 finish episode, final reward: -255.5986, current episode: 13 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 13 finish episode, final reward: -223.8395, current episode: 14 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 14 finish episode, final reward: -155.8460, current episode: 15 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 15 finish episode, final reward: -157.6037, current episode: 16 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 16 finish episode, final reward: -196.5370, current episode: 17 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 17 finish episode, final reward: -197.5429, current episode: 18 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 18 finish episode, final reward: -159.3089, current episode: 19 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 19 finish episode, final reward: -230.2969, current episode: 20 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 20 finish episode, final reward: -226.4105, current episode: 21 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 21 finish episode, final reward: -199.4241, current episode: 22 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 22 finish episode, final reward: -212.6559, current episode: 23 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 23 finish episode, final reward: -148.8299, current episode: 24 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 24 finish episode, final reward: -166.2834, current episode: 25 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 25 finish episode, final reward: -268.0692, current episode: 26 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 26 finish episode, final reward: -116.6146, current episode: 27 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 27 finish episode, final reward: -184.1239, current episode: 28 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 28 finish episode, final reward: -160.9601, current episode: 29 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 29 finish episode, final reward: -164.6981, current episode: 30 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 30 finish episode, final reward: -137.7903, current episode: 31 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 31 finish episode, final reward: -271.0081, current episode: 32 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 32 finish episode, final reward: -198.7399, current episode: 33 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 33 finish episode, final reward: -202.9499, current episode: 34 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 34 finish episode, final reward: -162.1778, current episode: 35 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 35 finish episode, final reward: -206.5085, current episode: 36 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 36 finish episode, final reward: -175.6275, current episode: 37 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 37 finish episode, final reward: -232.0206, current episode: 38 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 38 finish episode, final reward: -188.8875, current episode: 39 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 39 finish episode, final reward: -171.3562, current episode: 40 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 40 finish episode, final reward: -164.3677, current episode: 41 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 41 finish episode, final reward: -174.7325, current episode: 42 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 42 finish episode, final reward: -185.3737, current episode: 43 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 43 finish episode, final reward: -193.1332, current episode: 44 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 44 finish episode, final reward: -210.9309, current episode: 45 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 45 finish episode, final reward: -152.8577, current episode: 46 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 46 finish episode, final reward: -158.5112, current episode: 47 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 47 finish episode, final reward: -201.6829, current episode: 48 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 48 finish episode, final reward: -193.5849, current episode: 49 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 49 finish episode, final reward: -163.7445, current episode: 50 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 50 finish episode, final reward: -238.8163, current episode: 51 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 51 finish episode, final reward: -172.4511, current episode: 52 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 52 finish episode, final reward: -188.7710, current episode: 53 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 53 finish episode, final reward: -232.9407, current episode: 54 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 54 finish episode, final reward: -238.0977, current episode: 55 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 55 finish episode, final reward: -157.6080, current episode: 56 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 56 finish episode, final reward: -197.5075, current episode: 57 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 57 finish episode, final reward: -156.7054, current episode: 58 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 58 finish episode, final reward: -155.5956, current episode: 59 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 59 finish episode, final reward: -188.5474, current episode: 60 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 60 finish episode, final reward: -187.6421, current episode: 61 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 61 finish episode, final reward: -190.1189, current episode: 62 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 62 finish episode, final reward: -166.6651, current episode: 63 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 63 finish episode, final reward: -148.4506, current episode: 64 +[2023-04-29 14:18:59][interaction_serial_evaluator.py:279][INFO] ++-------+------------+-----------------------+---------------+---------------+ +| Name | train_iter | ckpt_name | episode_count | envstep_count | ++-------+------------+-----------------------+---------------+---------------+ +| Value | 500.000000 | iteration_500.pth.tar | 64.000000 | 1600.000000 | ++-------+------------+-----------------------+---------------+---------------+ ++-------+-------------------------+---------------+---------------------+----------------------+ +| Name | avg_envstep_per_episode | evaluate_time | avg_envstep_per_sec | avg_time_per_episode | ++-------+-------------------------+---------------+---------------------+----------------------+ +| Value | 25.000000 | 0.763085 | 2096.751101 | 83.870044 | ++-------+-------------------------+---------------+---------------------+----------------------+ ++-------+-------------+------------+-------------+-------------+ +| Name | reward_mean | reward_std | reward_max | reward_min | ++-------+-------------+------------+-------------+-------------+ +| Value | -189.419742 | 34.015793 | -105.573143 | -271.008148 | ++-------+-------------+------------+-------------+-------------+ + + +[2023-04-29 14:21:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 0 finish episode, final reward: -205.2282, current episode: 1 +[2023-04-29 14:21:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 1 finish episode, final reward: -180.6680, current episode: 2 +[2023-04-29 14:21:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 2 finish episode, final reward: -165.4477, current episode: 3 +[2023-04-29 14:21:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 3 finish episode, final reward: -185.2876, current episode: 4 +[2023-04-29 14:21:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 4 finish episode, final reward: -108.9465, current episode: 5 +[2023-04-29 14:21:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 5 finish episode, final reward: -231.8625, current episode: 6 +[2023-04-29 14:21:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 6 finish episode, final reward: -213.4896, current episode: 7 +[2023-04-29 14:21:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 7 finish episode, final reward: -203.8739, current episode: 8 +[2023-04-29 14:21:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 8 finish episode, final reward: -175.0804, current episode: 9 +[2023-04-29 14:21:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 9 finish episode, final reward: -211.8085, current episode: 10 +[2023-04-29 14:21:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 10 finish episode, final reward: -217.2232, current episode: 11 +[2023-04-29 14:21:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 11 finish episode, final reward: -212.3100, current episode: 12 +[2023-04-29 14:21:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 12 finish episode, final reward: -260.6329, current episode: 13 +[2023-04-29 14:21:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 13 finish episode, final reward: -213.8040, current episode: 14 +[2023-04-29 14:21:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 14 finish episode, final reward: -157.2435, current episode: 15 +[2023-04-29 14:21:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 15 finish episode, final reward: -157.6037, current episode: 16 +[2023-04-29 14:21:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 16 finish episode, final reward: -208.9800, current episode: 17 +[2023-04-29 14:21:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 17 finish episode, final reward: -197.5429, current episode: 18 +[2023-04-29 14:21:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 18 finish episode, final reward: -153.3254, current episode: 19 +[2023-04-29 14:21:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 19 finish episode, final reward: -230.2970, current episode: 20 +[2023-04-29 14:21:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 20 finish episode, final reward: -222.2881, current episode: 21 +[2023-04-29 14:21:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 21 finish episode, final reward: -206.9128, current episode: 22 +[2023-04-29 14:21:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 22 finish episode, final reward: -214.2255, current episode: 23 +[2023-04-29 14:21:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 23 finish episode, final reward: -137.3778, current episode: 24 +[2023-04-29 14:21:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 24 finish episode, final reward: -166.2834, current episode: 25 +[2023-04-29 14:21:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 25 finish episode, final reward: -266.5998, current episode: 26 +[2023-04-29 14:21:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 26 finish episode, final reward: -118.0935, current episode: 27 +[2023-04-29 14:21:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 27 finish episode, final reward: -179.8648, current episode: 28 +[2023-04-29 14:21:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 28 finish episode, final reward: -174.4144, current episode: 29 +[2023-04-29 14:21:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 29 finish episode, final reward: -175.9927, current episode: 30 +[2023-04-29 14:21:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 30 finish episode, final reward: -140.6515, current episode: 31 +[2023-04-29 14:21:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 31 finish episode, final reward: -260.7602, current episode: 32 +[2023-04-29 14:21:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 32 finish episode, final reward: -199.6722, current episode: 33 +[2023-04-29 14:21:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 33 finish episode, final reward: -204.2041, current episode: 34 +[2023-04-29 14:21:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 34 finish episode, final reward: -164.7788, current episode: 35 +[2023-04-29 14:21:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 35 finish episode, final reward: -194.1609, current episode: 36 +[2023-04-29 14:21:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 36 finish episode, final reward: -170.5404, current episode: 37 +[2023-04-29 14:21:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 37 finish episode, final reward: -232.4957, current episode: 38 +[2023-04-29 14:21:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 38 finish episode, final reward: -187.5881, current episode: 39 +[2023-04-29 14:21:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 39 finish episode, final reward: -185.8441, current episode: 40 +[2023-04-29 14:21:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 40 finish episode, final reward: -164.5002, current episode: 41 +[2023-04-29 14:21:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 41 finish episode, final reward: -177.1750, current episode: 42 +[2023-04-29 14:21:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 42 finish episode, final reward: -174.5860, current episode: 43 +[2023-04-29 14:21:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 43 finish episode, final reward: -193.1332, current episode: 44 +[2023-04-29 14:21:29][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 44 finish episode, final reward: -216.9059, current episode: 45 +[2023-04-29 14:21:29][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 45 finish episode, final reward: -152.8577, current episode: 46 +[2023-04-29 14:21:29][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 46 finish episode, final reward: -180.6828, current episode: 47 +[2023-04-29 14:21:29][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 47 finish episode, final reward: -186.2687, current episode: 48 +[2023-04-29 14:21:29][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 48 finish episode, final reward: -193.2588, current episode: 49 +[2023-04-29 14:21:29][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 49 finish episode, final reward: -156.3129, current episode: 50 +[2023-04-29 14:21:29][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 50 finish episode, final reward: -236.3334, current episode: 51 +[2023-04-29 14:21:29][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 51 finish episode, final reward: -179.2896, current episode: 52 +[2023-04-29 14:21:29][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 52 finish episode, final reward: -195.6810, current episode: 53 +[2023-04-29 14:21:29][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 53 finish episode, final reward: -224.1437, current episode: 54 +[2023-04-29 14:21:29][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 54 finish episode, final reward: -237.4438, current episode: 55 +[2023-04-29 14:21:29][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 55 finish episode, final reward: -161.8461, current episode: 56 +[2023-04-29 14:21:29][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 56 finish episode, final reward: -201.5644, current episode: 57 +[2023-04-29 14:21:29][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 57 finish episode, final reward: -160.1515, current episode: 58 +[2023-04-29 14:21:29][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 58 finish episode, final reward: -162.8803, current episode: 59 +[2023-04-29 14:21:29][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 59 finish episode, final reward: -189.9642, current episode: 60 +[2023-04-29 14:21:29][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 60 finish episode, final reward: -180.7295, current episode: 61 +[2023-04-29 14:21:29][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 61 finish episode, final reward: -190.3378, current episode: 62 +[2023-04-29 14:21:29][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 62 finish episode, final reward: -166.6651, current episode: 63 +[2023-04-29 14:21:29][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 63 finish episode, final reward: -138.5418, current episode: 64 +[2023-04-29 14:21:29][interaction_serial_evaluator.py:279][INFO] ++-------+------------+-----------------------+---------------+---------------+ +| Name | train_iter | ckpt_name | episode_count | envstep_count | ++-------+------------+-----------------------+---------------+---------------+ +| Value | 550.000000 | iteration_550.pth.tar | 64.000000 | 1600.000000 | ++-------+------------+-----------------------+---------------+---------------+ ++-------+-------------------------+---------------+---------------------+----------------------+ +| Name | avg_envstep_per_episode | evaluate_time | avg_envstep_per_sec | avg_time_per_episode | ++-------+-------------------------+---------------+---------------------+----------------------+ +| Value | 25.000000 | 0.916508 | 1745.755798 | 69.830232 | ++-------+-------------------------+---------------+---------------------+----------------------+ ++-------+-------------+------------+-------------+-------------+ +| Name | reward_mean | reward_std | reward_max | reward_min | ++-------+-------------+------------+-------------+-------------+ +| Value | -189.291521 | 32.200207 | -108.946518 | -266.599792 | ++-------+-------------+------------+-------------+-------------+ + + +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 0 finish episode, final reward: -205.2282, current episode: 1 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 1 finish episode, final reward: -166.9176, current episode: 2 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 2 finish episode, final reward: -160.9663, current episode: 3 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 3 finish episode, final reward: -187.4241, current episode: 4 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 4 finish episode, final reward: -111.4266, current episode: 5 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 5 finish episode, final reward: -230.7540, current episode: 6 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 6 finish episode, final reward: -213.4896, current episode: 7 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 7 finish episode, final reward: -196.5198, current episode: 8 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 8 finish episode, final reward: -175.0804, current episode: 9 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 9 finish episode, final reward: -203.5374, current episode: 10 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 10 finish episode, final reward: -219.7343, current episode: 11 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 11 finish episode, final reward: -198.7857, current episode: 12 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 12 finish episode, final reward: -252.9901, current episode: 13 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 13 finish episode, final reward: -225.8176, current episode: 14 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 14 finish episode, final reward: -158.5839, current episode: 15 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 15 finish episode, final reward: -154.9687, current episode: 16 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 16 finish episode, final reward: -203.7315, current episode: 17 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 17 finish episode, final reward: -197.5429, current episode: 18 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 18 finish episode, final reward: -153.3254, current episode: 19 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 19 finish episode, final reward: -228.9871, current episode: 20 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 20 finish episode, final reward: -207.7029, current episode: 21 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 21 finish episode, final reward: -207.1065, current episode: 22 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 22 finish episode, final reward: -210.6118, current episode: 23 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 23 finish episode, final reward: -139.7894, current episode: 24 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 24 finish episode, final reward: -179.2657, current episode: 25 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 25 finish episode, final reward: -267.8303, current episode: 26 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 26 finish episode, final reward: -127.8768, current episode: 27 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 27 finish episode, final reward: -179.5671, current episode: 28 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 28 finish episode, final reward: -162.0813, current episode: 29 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 29 finish episode, final reward: -174.7538, current episode: 30 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 30 finish episode, final reward: -146.8096, current episode: 31 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 31 finish episode, final reward: -260.7602, current episode: 32 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 32 finish episode, final reward: -208.8387, current episode: 33 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 33 finish episode, final reward: -196.6001, current episode: 34 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 34 finish episode, final reward: -164.7788, current episode: 35 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 35 finish episode, final reward: -197.8358, current episode: 36 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 36 finish episode, final reward: -170.0724, current episode: 37 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 37 finish episode, final reward: -223.9591, current episode: 38 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 38 finish episode, final reward: -188.4543, current episode: 39 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 39 finish episode, final reward: -185.8441, current episode: 40 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 40 finish episode, final reward: -156.8642, current episode: 41 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 41 finish episode, final reward: -168.5811, current episode: 42 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 42 finish episode, final reward: -175.0631, current episode: 43 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 43 finish episode, final reward: -193.1332, current episode: 44 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 44 finish episode, final reward: -206.2518, current episode: 45 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 45 finish episode, final reward: -134.0784, current episode: 46 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 46 finish episode, final reward: -163.5127, current episode: 47 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 47 finish episode, final reward: -159.0772, current episode: 48 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 48 finish episode, final reward: -185.2779, current episode: 49 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 49 finish episode, final reward: -159.8352, current episode: 50 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 50 finish episode, final reward: -219.0620, current episode: 51 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 51 finish episode, final reward: -174.7980, current episode: 52 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 52 finish episode, final reward: -197.9849, current episode: 53 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 53 finish episode, final reward: -223.1301, current episode: 54 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 54 finish episode, final reward: -243.1231, current episode: 55 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 55 finish episode, final reward: -160.5587, current episode: 56 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 56 finish episode, final reward: -201.5644, current episode: 57 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 57 finish episode, final reward: -142.1751, current episode: 58 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 58 finish episode, final reward: -167.8103, current episode: 59 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 59 finish episode, final reward: -182.8452, current episode: 60 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 60 finish episode, final reward: -179.9662, current episode: 61 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 61 finish episode, final reward: -188.6189, current episode: 62 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 62 finish episode, final reward: -164.2240, current episode: 63 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 63 finish episode, final reward: -147.1851, current episode: 64 +[2023-04-29 14:23:28][interaction_serial_evaluator.py:279][INFO] ++-------+------------+-----------------------+---------------+---------------+ +| Name | train_iter | ckpt_name | episode_count | envstep_count | ++-------+------------+-----------------------+---------------+---------------+ +| Value | 600.000000 | iteration_600.pth.tar | 64.000000 | 1600.000000 | ++-------+------------+-----------------------+---------------+---------------+ ++-------+-------------------------+---------------+---------------------+----------------------+ +| Name | avg_envstep_per_episode | evaluate_time | avg_envstep_per_sec | avg_time_per_episode | ++-------+-------------------------+---------------+---------------------+----------------------+ +| Value | 25.000000 | 0.821933 | 1946.630437 | 77.865217 | ++-------+-------------------------+---------------+---------------------+----------------------+ ++-------+-------------+------------+-------------+-------------+ +| Name | reward_mean | reward_std | reward_max | reward_min | ++-------+-------------+------------+-------------+-------------+ +| Value | -186.579228 | 31.805448 | -111.426567 | -267.830292 | ++-------+-------------+------------+-------------+-------------+ + + +[2023-04-29 14:25:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 0 finish episode, final reward: -205.2282, current episode: 1 +[2023-04-29 14:25:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 1 finish episode, final reward: -175.8523, current episode: 2 +[2023-04-29 14:25:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 2 finish episode, final reward: -156.7276, current episode: 3 +[2023-04-29 14:25:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 3 finish episode, final reward: -202.2514, current episode: 4 +[2023-04-29 14:25:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 4 finish episode, final reward: -110.1182, current episode: 5 +[2023-04-29 14:25:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 5 finish episode, final reward: -226.4334, current episode: 6 +[2023-04-29 14:25:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 6 finish episode, final reward: -213.4896, current episode: 7 +[2023-04-29 14:25:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 7 finish episode, final reward: -205.8895, current episode: 8 +[2023-04-29 14:25:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 8 finish episode, final reward: -169.3161, current episode: 9 +[2023-04-29 14:25:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 9 finish episode, final reward: -212.3450, current episode: 10 +[2023-04-29 14:25:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 10 finish episode, final reward: -244.7324, current episode: 11 +[2023-04-29 14:25:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 11 finish episode, final reward: -198.7857, current episode: 12 +[2023-04-29 14:25:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 12 finish episode, final reward: -251.7004, current episode: 13 +[2023-04-29 14:25:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 13 finish episode, final reward: -199.5374, current episode: 14 +[2023-04-29 14:25:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 14 finish episode, final reward: -161.7199, current episode: 15 +[2023-04-29 14:25:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 15 finish episode, final reward: -171.7012, current episode: 16 +[2023-04-29 14:25:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 16 finish episode, final reward: -209.0824, current episode: 17 +[2023-04-29 14:25:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 17 finish episode, final reward: -197.9843, current episode: 18 +[2023-04-29 14:25:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 18 finish episode, final reward: -153.3254, current episode: 19 +[2023-04-29 14:25:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 19 finish episode, final reward: -228.9871, current episode: 20 +[2023-04-29 14:25:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 20 finish episode, final reward: -228.0661, current episode: 21 +[2023-04-29 14:25:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 21 finish episode, final reward: -199.4241, current episode: 22 +[2023-04-29 14:25:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 22 finish episode, final reward: -211.1396, current episode: 23 +[2023-04-29 14:25:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 23 finish episode, final reward: -152.1290, current episode: 24 +[2023-04-29 14:25:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 24 finish episode, final reward: -178.6073, current episode: 25 +[2023-04-29 14:25:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 25 finish episode, final reward: -264.9465, current episode: 26 +[2023-04-29 14:25:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 26 finish episode, final reward: -125.6415, current episode: 27 +[2023-04-29 14:25:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 27 finish episode, final reward: -178.1540, current episode: 28 +[2023-04-29 14:25:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 28 finish episode, final reward: -156.9297, current episode: 29 +[2023-04-29 14:25:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 29 finish episode, final reward: -172.9906, current episode: 30 +[2023-04-29 14:25:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 30 finish episode, final reward: -164.0517, current episode: 31 +[2023-04-29 14:25:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 31 finish episode, final reward: -272.5674, current episode: 32 +[2023-04-29 14:25:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 32 finish episode, final reward: -208.4409, current episode: 33 +[2023-04-29 14:25:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 33 finish episode, final reward: -195.8576, current episode: 34 +[2023-04-29 14:25:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 34 finish episode, final reward: -164.7788, current episode: 35 +[2023-04-29 14:25:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 35 finish episode, final reward: -217.6407, current episode: 36 +[2023-04-29 14:25:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 36 finish episode, final reward: -174.0180, current episode: 37 +[2023-04-29 14:25:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 37 finish episode, final reward: -215.2855, current episode: 38 +[2023-04-29 14:25:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 38 finish episode, final reward: -188.9678, current episode: 39 +[2023-04-29 14:25:15][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 39 finish episode, final reward: -185.8441, current episode: 40 +[2023-04-29 14:25:16][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 40 finish episode, final reward: -156.8642, current episode: 41 +[2023-04-29 14:25:16][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 41 finish episode, final reward: -175.7844, current episode: 42 +[2023-04-29 14:25:16][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 42 finish episode, final reward: -178.9596, current episode: 43 +[2023-04-29 14:25:16][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 43 finish episode, final reward: -193.1332, current episode: 44 +[2023-04-29 14:25:16][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 44 finish episode, final reward: -204.1956, current episode: 45 +[2023-04-29 14:25:16][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 45 finish episode, final reward: -144.5440, current episode: 46 +[2023-04-29 14:25:16][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 46 finish episode, final reward: -165.5448, current episode: 47 +[2023-04-29 14:25:16][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 47 finish episode, final reward: -189.4623, current episode: 48 +[2023-04-29 14:25:16][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 48 finish episode, final reward: -186.8172, current episode: 49 +[2023-04-29 14:25:16][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 49 finish episode, final reward: -159.8792, current episode: 50 +[2023-04-29 14:25:16][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 50 finish episode, final reward: -230.6902, current episode: 51 +[2023-04-29 14:25:16][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 51 finish episode, final reward: -158.2355, current episode: 52 +[2023-04-29 14:25:16][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 52 finish episode, final reward: -188.7710, current episode: 53 +[2023-04-29 14:25:16][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 53 finish episode, final reward: -215.2125, current episode: 54 +[2023-04-29 14:25:16][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 54 finish episode, final reward: -231.9248, current episode: 55 +[2023-04-29 14:25:16][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 55 finish episode, final reward: -167.3752, current episode: 56 +[2023-04-29 14:25:16][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 56 finish episode, final reward: -182.4612, current episode: 57 +[2023-04-29 14:25:16][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 57 finish episode, final reward: -146.9785, current episode: 58 +[2023-04-29 14:25:16][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 58 finish episode, final reward: -157.1158, current episode: 59 +[2023-04-29 14:25:16][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 59 finish episode, final reward: -210.0822, current episode: 60 +[2023-04-29 14:25:16][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 60 finish episode, final reward: -179.9408, current episode: 61 +[2023-04-29 14:25:16][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 61 finish episode, final reward: -190.3378, current episode: 62 +[2023-04-29 14:25:16][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 62 finish episode, final reward: -159.0016, current episode: 63 +[2023-04-29 14:25:16][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 63 finish episode, final reward: -147.5542, current episode: 64 +[2023-04-29 14:25:16][interaction_serial_evaluator.py:279][INFO] ++-------+------------+-----------------------+---------------+---------------+ +| Name | train_iter | ckpt_name | episode_count | envstep_count | ++-------+------------+-----------------------+---------------+---------------+ +| Value | 650.000000 | iteration_650.pth.tar | 64.000000 | 1600.000000 | ++-------+------------+-----------------------+---------------+---------------+ ++-------+-------------------------+---------------+---------------------+----------------------+ +| Name | avg_envstep_per_episode | evaluate_time | avg_envstep_per_sec | avg_time_per_episode | ++-------+-------------------------+---------------+---------------------+----------------------+ +| Value | 25.000000 | 0.830538 | 1926.463058 | 77.058522 | ++-------+-------------------------+---------------+---------------------+----------------------+ ++-------+-------------+------------+-------------+-------------+ +| Name | reward_mean | reward_std | reward_max | reward_min | ++-------+-------------+------------+-------------+-------------+ +| Value | -188.618032 | 31.735551 | -110.118217 | -272.567352 | ++-------+-------------+------------+-------------+-------------+ + + +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 0 finish episode, final reward: -205.2282, current episode: 1 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 1 finish episode, final reward: -174.0161, current episode: 2 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 2 finish episode, final reward: -160.9663, current episode: 3 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 3 finish episode, final reward: -207.9501, current episode: 4 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 4 finish episode, final reward: -110.1182, current episode: 5 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 5 finish episode, final reward: -216.2331, current episode: 6 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 6 finish episode, final reward: -213.4896, current episode: 7 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 7 finish episode, final reward: -200.2346, current episode: 8 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 8 finish episode, final reward: -170.0483, current episode: 9 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 9 finish episode, final reward: -212.3450, current episode: 10 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 10 finish episode, final reward: -219.6182, current episode: 11 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 11 finish episode, final reward: -202.1429, current episode: 12 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 12 finish episode, final reward: -247.9084, current episode: 13 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 13 finish episode, final reward: -198.8670, current episode: 14 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 14 finish episode, final reward: -154.9499, current episode: 15 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 15 finish episode, final reward: -157.6037, current episode: 16 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 16 finish episode, final reward: -209.5936, current episode: 17 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 17 finish episode, final reward: -198.3148, current episode: 18 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 18 finish episode, final reward: -151.1521, current episode: 19 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 19 finish episode, final reward: -228.9871, current episode: 20 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 20 finish episode, final reward: -213.3226, current episode: 21 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 21 finish episode, final reward: -205.2808, current episode: 22 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 22 finish episode, final reward: -207.1161, current episode: 23 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 23 finish episode, final reward: -157.0610, current episode: 24 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 24 finish episode, final reward: -178.3846, current episode: 25 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 25 finish episode, final reward: -259.8378, current episode: 26 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 26 finish episode, final reward: -113.2361, current episode: 27 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 27 finish episode, final reward: -173.8985, current episode: 28 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 28 finish episode, final reward: -156.1971, current episode: 29 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 29 finish episode, final reward: -178.3508, current episode: 30 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 30 finish episode, final reward: -167.0222, current episode: 31 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 31 finish episode, final reward: -272.5674, current episode: 32 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 32 finish episode, final reward: -210.0311, current episode: 33 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 33 finish episode, final reward: -193.1470, current episode: 34 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 34 finish episode, final reward: -158.1042, current episode: 35 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 35 finish episode, final reward: -217.5902, current episode: 36 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 36 finish episode, final reward: -174.3800, current episode: 37 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 37 finish episode, final reward: -204.8982, current episode: 38 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 38 finish episode, final reward: -157.3960, current episode: 39 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 39 finish episode, final reward: -171.1446, current episode: 40 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 40 finish episode, final reward: -157.2398, current episode: 41 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 41 finish episode, final reward: -175.7844, current episode: 42 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 42 finish episode, final reward: -173.6908, current episode: 43 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 43 finish episode, final reward: -183.9035, current episode: 44 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 44 finish episode, final reward: -212.0320, current episode: 45 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 45 finish episode, final reward: -144.5440, current episode: 46 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 46 finish episode, final reward: -168.8040, current episode: 47 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 47 finish episode, final reward: -194.9857, current episode: 48 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 48 finish episode, final reward: -186.8172, current episode: 49 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 49 finish episode, final reward: -175.1598, current episode: 50 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 50 finish episode, final reward: -220.8819, current episode: 51 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 51 finish episode, final reward: -140.8836, current episode: 52 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 52 finish episode, final reward: -195.5419, current episode: 53 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 53 finish episode, final reward: -219.8906, current episode: 54 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 54 finish episode, final reward: -225.5750, current episode: 55 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 55 finish episode, final reward: -159.0486, current episode: 56 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 56 finish episode, final reward: -182.4612, current episode: 57 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 57 finish episode, final reward: -142.0819, current episode: 58 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 58 finish episode, final reward: -162.3512, current episode: 59 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 59 finish episode, final reward: -201.5598, current episode: 60 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 60 finish episode, final reward: -173.8078, current episode: 61 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 61 finish episode, final reward: -188.6189, current episode: 62 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 62 finish episode, final reward: -167.4108, current episode: 63 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 63 finish episode, final reward: -147.9032, current episode: 64 +[2023-04-29 14:27:37][interaction_serial_evaluator.py:279][INFO] ++-------+------------+-----------------------+---------------+---------------+ +| Name | train_iter | ckpt_name | episode_count | envstep_count | ++-------+------------+-----------------------+---------------+---------------+ +| Value | 700.000000 | iteration_700.pth.tar | 64.000000 | 1600.000000 | ++-------+------------+-----------------------+---------------+---------------+ ++-------+-------------------------+---------------+---------------------+----------------------+ +| Name | avg_envstep_per_episode | evaluate_time | avg_envstep_per_sec | avg_time_per_episode | ++-------+-------------------------+---------------+---------------------+----------------------+ +| Value | 25.000000 | 0.788524 | 2029.106796 | 81.164272 | ++-------+-------------------------+---------------+---------------------+----------------------+ ++-------+-------------+------------+-------------+-------------+ +| Name | reward_mean | reward_std | reward_max | reward_min | ++-------+-------------+------------+-------------+-------------+ +| Value | -186.089235 | 31.294294 | -110.118217 | -272.567352 | ++-------+-------------+------------+-------------+-------------+ + + +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 0 finish episode, final reward: -202.0331, current episode: 1 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 1 finish episode, final reward: -174.0161, current episode: 2 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 2 finish episode, final reward: -175.0733, current episode: 3 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 3 finish episode, final reward: -198.0528, current episode: 4 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 4 finish episode, final reward: -110.1182, current episode: 5 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 5 finish episode, final reward: -222.4870, current episode: 6 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 6 finish episode, final reward: -213.4896, current episode: 7 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 7 finish episode, final reward: -204.0174, current episode: 8 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 8 finish episode, final reward: -160.2357, current episode: 9 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 9 finish episode, final reward: -204.0739, current episode: 10 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 10 finish episode, final reward: -222.7247, current episode: 11 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 11 finish episode, final reward: -204.6160, current episode: 12 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 12 finish episode, final reward: -251.7004, current episode: 13 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 13 finish episode, final reward: -200.3442, current episode: 14 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 14 finish episode, final reward: -153.6646, current episode: 15 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 15 finish episode, final reward: -154.9687, current episode: 16 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 16 finish episode, final reward: -204.1693, current episode: 17 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 17 finish episode, final reward: -198.0946, current episode: 18 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 18 finish episode, final reward: -153.3254, current episode: 19 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 19 finish episode, final reward: -228.9871, current episode: 20 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 20 finish episode, final reward: -211.2444, current episode: 21 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 21 finish episode, final reward: -207.1065, current episode: 22 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 22 finish episode, final reward: -203.3187, current episode: 23 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 23 finish episode, final reward: -144.9200, current episode: 24 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 24 finish episode, final reward: -178.3846, current episode: 25 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 25 finish episode, final reward: -240.3554, current episode: 26 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 26 finish episode, final reward: -112.2357, current episode: 27 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 27 finish episode, final reward: -172.9200, current episode: 28 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 28 finish episode, final reward: -154.8602, current episode: 29 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 29 finish episode, final reward: -172.5743, current episode: 30 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 30 finish episode, final reward: -167.1911, current episode: 31 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 31 finish episode, final reward: -272.5674, current episode: 32 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 32 finish episode, final reward: -206.0666, current episode: 33 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 33 finish episode, final reward: -190.8138, current episode: 34 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 34 finish episode, final reward: -164.9580, current episode: 35 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 35 finish episode, final reward: -220.1328, current episode: 36 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 36 finish episode, final reward: -168.3968, current episode: 37 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 37 finish episode, final reward: -204.8982, current episode: 38 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 38 finish episode, final reward: -167.5311, current episode: 39 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 39 finish episode, final reward: -182.9747, current episode: 40 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 40 finish episode, final reward: -156.8060, current episode: 41 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 41 finish episode, final reward: -175.7844, current episode: 42 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 42 finish episode, final reward: -175.0631, current episode: 43 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 43 finish episode, final reward: -183.9035, current episode: 44 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 44 finish episode, final reward: -202.9775, current episode: 45 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 45 finish episode, final reward: -144.5440, current episode: 46 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 46 finish episode, final reward: -170.1517, current episode: 47 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 47 finish episode, final reward: -169.0260, current episode: 48 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 48 finish episode, final reward: -183.8299, current episode: 49 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 49 finish episode, final reward: -180.3306, current episode: 50 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 50 finish episode, final reward: -229.5909, current episode: 51 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 51 finish episode, final reward: -146.5292, current episode: 52 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 52 finish episode, final reward: -195.5419, current episode: 53 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 53 finish episode, final reward: -190.1415, current episode: 54 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 54 finish episode, final reward: -231.3936, current episode: 55 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 55 finish episode, final reward: -166.2302, current episode: 56 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 56 finish episode, final reward: -182.4612, current episode: 57 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 57 finish episode, final reward: -135.8694, current episode: 58 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 58 finish episode, final reward: -157.1158, current episode: 59 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 59 finish episode, final reward: -199.5208, current episode: 60 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 60 finish episode, final reward: -170.2651, current episode: 61 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 61 finish episode, final reward: -188.6189, current episode: 62 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 62 finish episode, final reward: -167.4108, current episode: 63 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 63 finish episode, final reward: -147.9032, current episode: 64 +[2023-04-29 14:30:06][interaction_serial_evaluator.py:279][INFO] ++-------+------------+-----------------------+---------------+---------------+ +| Name | train_iter | ckpt_name | episode_count | envstep_count | ++-------+------------+-----------------------+---------------+---------------+ +| Value | 750.000000 | iteration_750.pth.tar | 64.000000 | 1600.000000 | ++-------+------------+-----------------------+---------------+---------------+ ++-------+-------------------------+---------------+---------------------+----------------------+ +| Name | avg_envstep_per_episode | evaluate_time | avg_envstep_per_sec | avg_time_per_episode | ++-------+-------------------------+---------------+---------------------+----------------------+ +| Value | 25.000000 | 0.997803 | 1603.523171 | 64.140927 | ++-------+-------------------------+---------------+---------------------+----------------------+ ++-------+-------------+------------+-------------+-------------+ +| Name | reward_mean | reward_std | reward_max | reward_min | ++-------+-------------+------------+-------------+-------------+ +| Value | -184.853930 | 30.715020 | -110.118217 | -272.567352 | ++-------+-------------+------------+-------------+-------------+ + + +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 0 finish episode, final reward: -202.0331, current episode: 1 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 1 finish episode, final reward: -164.7792, current episode: 2 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 2 finish episode, final reward: -175.0733, current episode: 3 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 3 finish episode, final reward: -189.5615, current episode: 4 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 4 finish episode, final reward: -110.1182, current episode: 5 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 5 finish episode, final reward: -222.4870, current episode: 6 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 6 finish episode, final reward: -213.4896, current episode: 7 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 7 finish episode, final reward: -199.6849, current episode: 8 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 8 finish episode, final reward: -167.5063, current episode: 9 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 9 finish episode, final reward: -200.0363, current episode: 10 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 10 finish episode, final reward: -206.4339, current episode: 11 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 11 finish episode, final reward: -204.8257, current episode: 12 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 12 finish episode, final reward: -251.7004, current episode: 13 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 13 finish episode, final reward: -200.6557, current episode: 14 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 14 finish episode, final reward: -150.8691, current episode: 15 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 15 finish episode, final reward: -157.6037, current episode: 16 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 16 finish episode, final reward: -185.7656, current episode: 17 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 17 finish episode, final reward: -198.3148, current episode: 18 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 18 finish episode, final reward: -151.1521, current episode: 19 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 19 finish episode, final reward: -228.9871, current episode: 20 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 20 finish episode, final reward: -242.7674, current episode: 21 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 21 finish episode, final reward: -207.1065, current episode: 22 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 22 finish episode, final reward: -202.7908, current episode: 23 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 23 finish episode, final reward: -156.4275, current episode: 24 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 24 finish episode, final reward: -176.6404, current episode: 25 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 25 finish episode, final reward: -241.0769, current episode: 26 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 26 finish episode, final reward: -120.9160, current episode: 27 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 27 finish episode, final reward: -170.6819, current episode: 28 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 28 finish episode, final reward: -143.9137, current episode: 29 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 29 finish episode, final reward: -168.5965, current episode: 30 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 30 finish episode, final reward: -159.7933, current episode: 31 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 31 finish episode, final reward: -280.4853, current episode: 32 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 32 finish episode, final reward: -205.2515, current episode: 33 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 33 finish episode, final reward: -190.8138, current episode: 34 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 34 finish episode, final reward: -166.3692, current episode: 35 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 35 finish episode, final reward: -212.8697, current episode: 36 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 36 finish episode, final reward: -166.6988, current episode: 37 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 37 finish episode, final reward: -214.5422, current episode: 38 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 38 finish episode, final reward: -147.9948, current episode: 39 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 39 finish episode, final reward: -182.9747, current episode: 40 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 40 finish episode, final reward: -148.8799, current episode: 41 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 41 finish episode, final reward: -154.6361, current episode: 42 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 42 finish episode, final reward: -167.6011, current episode: 43 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 43 finish episode, final reward: -170.9516, current episode: 44 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 44 finish episode, final reward: -213.7779, current episode: 45 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 45 finish episode, final reward: -144.5440, current episode: 46 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 46 finish episode, final reward: -160.6108, current episode: 47 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 47 finish episode, final reward: -165.6988, current episode: 48 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 48 finish episode, final reward: -184.2671, current episode: 49 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 49 finish episode, final reward: -167.9897, current episode: 50 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 50 finish episode, final reward: -218.6720, current episode: 51 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 51 finish episode, final reward: -137.3055, current episode: 52 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 52 finish episode, final reward: -195.5419, current episode: 53 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 53 finish episode, final reward: -189.4057, current episode: 54 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 54 finish episode, final reward: -214.0140, current episode: 55 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 55 finish episode, final reward: -158.0799, current episode: 56 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 56 finish episode, final reward: -182.4612, current episode: 57 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 57 finish episode, final reward: -129.0385, current episode: 58 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 58 finish episode, final reward: -158.1857, current episode: 59 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 59 finish episode, final reward: -172.4777, current episode: 60 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 60 finish episode, final reward: -170.3219, current episode: 61 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 61 finish episode, final reward: -190.4728, current episode: 62 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 62 finish episode, final reward: -167.4108, current episode: 63 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 63 finish episode, final reward: -139.4688, current episode: 64 +[2023-04-29 14:32:36][interaction_serial_evaluator.py:279][INFO] ++-------+------------+-----------------------+---------------+---------------+ +| Name | train_iter | ckpt_name | episode_count | envstep_count | ++-------+------------+-----------------------+---------------+---------------+ +| Value | 800.000000 | iteration_800.pth.tar | 64.000000 | 1600.000000 | ++-------+------------+-----------------------+---------------+---------------+ ++-------+-------------------------+---------------+---------------------+----------------------+ +| Name | avg_envstep_per_episode | evaluate_time | avg_envstep_per_sec | avg_time_per_episode | ++-------+-------------------------+---------------+---------------------+----------------------+ +| Value | 25.000000 | 0.758741 | 2108.755554 | 84.350222 | ++-------+-------------------------+---------------+---------------------+----------------------+ ++-------+-------------+------------+-------------+-------------+ +| Name | reward_mean | reward_std | reward_max | reward_min | ++-------+-------------+------------+-------------+-------------+ +| Value | -181.900024 | 32.121789 | -110.118217 | -280.485291 | ++-------+-------------+------------+-------------+-------------+ + + +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 0 finish episode, final reward: -192.7626, current episode: 1 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 1 finish episode, final reward: -164.7792, current episode: 2 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 2 finish episode, final reward: -160.1466, current episode: 3 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 3 finish episode, final reward: -178.4193, current episode: 4 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 4 finish episode, final reward: -113.9745, current episode: 5 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 5 finish episode, final reward: -226.2391, current episode: 6 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 6 finish episode, final reward: -213.4896, current episode: 7 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 7 finish episode, final reward: -186.5923, current episode: 8 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 8 finish episode, final reward: -159.4059, current episode: 9 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 9 finish episode, final reward: -204.0739, current episode: 10 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 10 finish episode, final reward: -204.3372, current episode: 11 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 11 finish episode, final reward: -211.3791, current episode: 12 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 12 finish episode, final reward: -243.6096, current episode: 13 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 13 finish episode, final reward: -200.2195, current episode: 14 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 14 finish episode, final reward: -153.2479, current episode: 15 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 15 finish episode, final reward: -161.4033, current episode: 16 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 16 finish episode, final reward: -189.4724, current episode: 17 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 17 finish episode, final reward: -191.3161, current episode: 18 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 18 finish episode, final reward: -149.9684, current episode: 19 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 19 finish episode, final reward: -228.9871, current episode: 20 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 20 finish episode, final reward: -226.1639, current episode: 21 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 21 finish episode, final reward: -206.9128, current episode: 22 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 22 finish episode, final reward: -202.7813, current episode: 23 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 23 finish episode, final reward: -151.9358, current episode: 24 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 24 finish episode, final reward: -167.0545, current episode: 25 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 25 finish episode, final reward: -249.5714, current episode: 26 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 26 finish episode, final reward: -123.0537, current episode: 27 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 27 finish episode, final reward: -181.9475, current episode: 28 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 28 finish episode, final reward: -143.4188, current episode: 29 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 29 finish episode, final reward: -168.5965, current episode: 30 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 30 finish episode, final reward: -154.3117, current episode: 31 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 31 finish episode, final reward: -277.2811, current episode: 32 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 32 finish episode, final reward: -192.3409, current episode: 33 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 33 finish episode, final reward: -184.6727, current episode: 34 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 34 finish episode, final reward: -160.2156, current episode: 35 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 35 finish episode, final reward: -197.9537, current episode: 36 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 36 finish episode, final reward: -166.6988, current episode: 37 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 37 finish episode, final reward: -197.3406, current episode: 38 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 38 finish episode, final reward: -158.0904, current episode: 39 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 39 finish episode, final reward: -204.0696, current episode: 40 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 40 finish episode, final reward: -156.4428, current episode: 41 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 41 finish episode, final reward: -148.8089, current episode: 42 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 42 finish episode, final reward: -176.3516, current episode: 43 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 43 finish episode, final reward: -167.0486, current episode: 44 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 44 finish episode, final reward: -221.8593, current episode: 45 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 45 finish episode, final reward: -144.5440, current episode: 46 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 46 finish episode, final reward: -156.5281, current episode: 47 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 47 finish episode, final reward: -183.1641, current episode: 48 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 48 finish episode, final reward: -184.2671, current episode: 49 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 49 finish episode, final reward: -175.9292, current episode: 50 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 50 finish episode, final reward: -211.4499, current episode: 51 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 51 finish episode, final reward: -136.8948, current episode: 52 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 52 finish episode, final reward: -208.6570, current episode: 53 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 53 finish episode, final reward: -184.8010, current episode: 54 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 54 finish episode, final reward: -213.1514, current episode: 55 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 55 finish episode, final reward: -150.5431, current episode: 56 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 56 finish episode, final reward: -179.4683, current episode: 57 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 57 finish episode, final reward: -140.5269, current episode: 58 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 58 finish episode, final reward: -164.5236, current episode: 59 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 59 finish episode, final reward: -172.4777, current episode: 60 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 60 finish episode, final reward: -174.8789, current episode: 61 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 61 finish episode, final reward: -190.4728, current episode: 62 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 62 finish episode, final reward: -164.3246, current episode: 63 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 63 finish episode, final reward: -147.9032, current episode: 64 +[2023-04-29 14:35:05][interaction_serial_evaluator.py:279][INFO] ++-------+------------+-----------------------+---------------+---------------+ +| Name | train_iter | ckpt_name | episode_count | envstep_count | ++-------+------------+-----------------------+---------------+---------------+ +| Value | 850.000000 | iteration_850.pth.tar | 64.000000 | 1600.000000 | ++-------+------------+-----------------------+---------------+---------------+ ++-------+-------------------------+---------------+---------------------+----------------------+ +| Name | avg_envstep_per_episode | evaluate_time | avg_envstep_per_sec | avg_time_per_episode | ++-------+-------------------------+---------------+---------------------+----------------------+ +| Value | 25.000000 | 0.739490 | 2163.654429 | 86.546177 | ++-------+-------------------------+---------------+---------------------+----------------------+ ++-------+-------------+------------+-------------+-------------+ +| Name | reward_mean | reward_std | reward_max | reward_min | ++-------+-------------+------------+-------------+-------------+ +| Value | -181.300807 | 30.779804 | -113.974457 | -277.281097 | ++-------+-------------+------------+-------------+-------------+ + + +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 0 finish episode, final reward: -192.7626, current episode: 1 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 1 finish episode, final reward: -162.6094, current episode: 2 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 2 finish episode, final reward: -160.1466, current episode: 3 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 3 finish episode, final reward: -181.1174, current episode: 4 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 4 finish episode, final reward: -115.3831, current episode: 5 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 5 finish episode, final reward: -232.8577, current episode: 6 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 6 finish episode, final reward: -213.4896, current episode: 7 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 7 finish episode, final reward: -186.1234, current episode: 8 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 8 finish episode, final reward: -158.8085, current episode: 9 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 9 finish episode, final reward: -212.3450, current episode: 10 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 10 finish episode, final reward: -205.7306, current episode: 11 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 11 finish episode, final reward: -201.2435, current episode: 12 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 12 finish episode, final reward: -241.5654, current episode: 13 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 13 finish episode, final reward: -200.3985, current episode: 14 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 14 finish episode, final reward: -161.1233, current episode: 15 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 15 finish episode, final reward: -159.1541, current episode: 16 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 16 finish episode, final reward: -177.3881, current episode: 17 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 17 finish episode, final reward: -191.3161, current episode: 18 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 18 finish episode, final reward: -140.7806, current episode: 19 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 19 finish episode, final reward: -218.9500, current episode: 20 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 20 finish episode, final reward: -209.4292, current episode: 21 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 21 finish episode, final reward: -206.7203, current episode: 22 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 22 finish episode, final reward: -202.7813, current episode: 23 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 23 finish episode, final reward: -130.5688, current episode: 24 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 24 finish episode, final reward: -169.5772, current episode: 25 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 25 finish episode, final reward: -249.5406, current episode: 26 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 26 finish episode, final reward: -132.7934, current episode: 27 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 27 finish episode, final reward: -173.8985, current episode: 28 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 28 finish episode, final reward: -143.2918, current episode: 29 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 29 finish episode, final reward: -168.5965, current episode: 30 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 30 finish episode, final reward: -155.9268, current episode: 31 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 31 finish episode, final reward: -257.3462, current episode: 32 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 32 finish episode, final reward: -207.4900, current episode: 33 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 33 finish episode, final reward: -186.6446, current episode: 34 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 34 finish episode, final reward: -160.2156, current episode: 35 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 35 finish episode, final reward: -191.7204, current episode: 36 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 36 finish episode, final reward: -166.6988, current episode: 37 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 37 finish episode, final reward: -196.6223, current episode: 38 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 38 finish episode, final reward: -158.0904, current episode: 39 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 39 finish episode, final reward: -183.6741, current episode: 40 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 40 finish episode, final reward: -142.8347, current episode: 41 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 41 finish episode, final reward: -153.7031, current episode: 42 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 42 finish episode, final reward: -176.3516, current episode: 43 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 43 finish episode, final reward: -161.5814, current episode: 44 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 44 finish episode, final reward: -216.7848, current episode: 45 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 45 finish episode, final reward: -144.5440, current episode: 46 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 46 finish episode, final reward: -150.8545, current episode: 47 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 47 finish episode, final reward: -196.8308, current episode: 48 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 48 finish episode, final reward: -184.2671, current episode: 49 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 49 finish episode, final reward: -175.9292, current episode: 50 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 50 finish episode, final reward: -203.6999, current episode: 51 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 51 finish episode, final reward: -137.5531, current episode: 52 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 52 finish episode, final reward: -208.6570, current episode: 53 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 53 finish episode, final reward: -181.5768, current episode: 54 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 54 finish episode, final reward: -207.0468, current episode: 55 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 55 finish episode, final reward: -174.9850, current episode: 56 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 56 finish episode, final reward: -170.9492, current episode: 57 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 57 finish episode, final reward: -132.5928, current episode: 58 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 58 finish episode, final reward: -156.9778, current episode: 59 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 59 finish episode, final reward: -173.8398, current episode: 60 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 60 finish episode, final reward: -170.3219, current episode: 61 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 61 finish episode, final reward: -177.2695, current episode: 62 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 62 finish episode, final reward: -172.2374, current episode: 63 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 63 finish episode, final reward: -147.9032, current episode: 64 +[2023-04-29 14:37:06][interaction_serial_evaluator.py:279][INFO] ++-------+------------+-----------------------+---------------+---------------+ +| Name | train_iter | ckpt_name | episode_count | envstep_count | ++-------+------------+-----------------------+---------------+---------------+ +| Value | 900.000000 | iteration_900.pth.tar | 64.000000 | 1600.000000 | ++-------+------------+-----------------------+---------------+---------------+ ++-------+-------------------------+---------------+---------------------+----------------------+ +| Name | avg_envstep_per_episode | evaluate_time | avg_envstep_per_sec | avg_time_per_episode | ++-------+-------------------------+---------------+---------------------+----------------------+ +| Value | 25.000000 | 0.802166 | 1994.598824 | 79.783953 | ++-------+-------------------------+---------------+---------------------+----------------------+ ++-------+-------------+------------+-------------+-------------+ +| Name | reward_mean | reward_std | reward_max | reward_min | ++-------+-------------+------------+-------------+-------------+ +| Value | -179.440809 | 29.598483 | -115.383057 | -257.346161 | ++-------+-------------+------------+-------------+-------------+ + + +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 0 finish episode, final reward: -192.7626, current episode: 1 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 1 finish episode, final reward: -162.6094, current episode: 2 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 2 finish episode, final reward: -156.7276, current episode: 3 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 3 finish episode, final reward: -181.1174, current episode: 4 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 4 finish episode, final reward: -113.9745, current episode: 5 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 5 finish episode, final reward: -228.6195, current episode: 6 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 6 finish episode, final reward: -213.4896, current episode: 7 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 7 finish episode, final reward: -186.1234, current episode: 8 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 8 finish episode, final reward: -158.9388, current episode: 9 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 9 finish episode, final reward: -212.3450, current episode: 10 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 10 finish episode, final reward: -205.7306, current episode: 11 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 11 finish episode, final reward: -205.4019, current episode: 12 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 12 finish episode, final reward: -228.3563, current episode: 13 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 13 finish episode, final reward: -188.8482, current episode: 14 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 14 finish episode, final reward: -161.7199, current episode: 15 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 15 finish episode, final reward: -159.1541, current episode: 16 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 16 finish episode, final reward: -177.3881, current episode: 17 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 17 finish episode, final reward: -199.8528, current episode: 18 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 18 finish episode, final reward: -140.7806, current episode: 19 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 19 finish episode, final reward: -228.9871, current episode: 20 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 20 finish episode, final reward: -219.7014, current episode: 21 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 21 finish episode, final reward: -206.6353, current episode: 22 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 22 finish episode, final reward: -202.7908, current episode: 23 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 23 finish episode, final reward: -141.7573, current episode: 24 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 24 finish episode, final reward: -171.6973, current episode: 25 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 25 finish episode, final reward: -239.2726, current episode: 26 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 26 finish episode, final reward: -137.2118, current episode: 27 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 27 finish episode, final reward: -173.8985, current episode: 28 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 28 finish episode, final reward: -143.2918, current episode: 29 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 29 finish episode, final reward: -168.5965, current episode: 30 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 30 finish episode, final reward: -156.3114, current episode: 31 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 31 finish episode, final reward: -271.1989, current episode: 32 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 32 finish episode, final reward: -186.6458, current episode: 33 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 33 finish episode, final reward: -184.6727, current episode: 34 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 34 finish episode, final reward: -160.2156, current episode: 35 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 35 finish episode, final reward: -183.3268, current episode: 36 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 36 finish episode, final reward: -159.0000, current episode: 37 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 37 finish episode, final reward: -202.0103, current episode: 38 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 38 finish episode, final reward: -158.0904, current episode: 39 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 39 finish episode, final reward: -161.9726, current episode: 40 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 40 finish episode, final reward: -142.8347, current episode: 41 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 41 finish episode, final reward: -152.7610, current episode: 42 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 42 finish episode, final reward: -176.3516, current episode: 43 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 43 finish episode, final reward: -161.5814, current episode: 44 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 44 finish episode, final reward: -214.8992, current episode: 45 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 45 finish episode, final reward: -144.5440, current episode: 46 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 46 finish episode, final reward: -148.1983, current episode: 47 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 47 finish episode, final reward: -184.5476, current episode: 48 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 48 finish episode, final reward: -184.2271, current episode: 49 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 49 finish episode, final reward: -167.9897, current episode: 50 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 50 finish episode, final reward: -203.6999, current episode: 51 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 51 finish episode, final reward: -136.0074, current episode: 52 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 52 finish episode, final reward: -205.8456, current episode: 53 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 53 finish episode, final reward: -181.6264, current episode: 54 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 54 finish episode, final reward: -211.6700, current episode: 55 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 55 finish episode, final reward: -174.9850, current episode: 56 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 56 finish episode, final reward: -170.9492, current episode: 57 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 57 finish episode, final reward: -149.5355, current episode: 58 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 58 finish episode, final reward: -157.0863, current episode: 59 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 59 finish episode, final reward: -173.8398, current episode: 60 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 60 finish episode, final reward: -170.2651, current episode: 61 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 61 finish episode, final reward: -188.6189, current episode: 62 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 62 finish episode, final reward: -153.2419, current episode: 63 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 63 finish episode, final reward: -138.5418, current episode: 64 +[2023-04-29 14:38:52][interaction_serial_evaluator.py:279][INFO] ++-------+------------+-----------------------+---------------+---------------+ +| Name | train_iter | ckpt_name | episode_count | envstep_count | ++-------+------------+-----------------------+---------------+---------------+ +| Value | 950.000000 | iteration_950.pth.tar | 64.000000 | 1600.000000 | ++-------+------------+-----------------------+---------------+---------------+ ++-------+-------------------------+---------------+---------------------+----------------------+ +| Name | avg_envstep_per_episode | evaluate_time | avg_envstep_per_sec | avg_time_per_episode | ++-------+-------------------------+---------------+---------------------+----------------------+ +| Value | 25.000000 | 0.985097 | 1624.206164 | 64.968247 | ++-------+-------------------------+---------------+---------------------+----------------------+ ++-------+-------------+------------+-------------+-------------+ +| Name | reward_mean | reward_std | reward_max | reward_min | ++-------+-------------+------------+-------------+-------------+ +| Value | -178.516761 | 29.543728 | -113.974457 | -271.198853 | ++-------+-------------+------------+-------------+-------------+ + + +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 0 finish episode, final reward: -192.7626, current episode: 1 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 1 finish episode, final reward: -155.2880, current episode: 2 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 2 finish episode, final reward: -161.0592, current episode: 3 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 3 finish episode, final reward: -173.4111, current episode: 4 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 4 finish episode, final reward: -113.9745, current episode: 5 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 5 finish episode, final reward: -227.3939, current episode: 6 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 6 finish episode, final reward: -213.4896, current episode: 7 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 7 finish episode, final reward: -192.9307, current episode: 8 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 8 finish episode, final reward: -160.9806, current episode: 9 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 9 finish episode, final reward: -204.0739, current episode: 10 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 10 finish episode, final reward: -205.7306, current episode: 11 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 11 finish episode, final reward: -209.8560, current episode: 12 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 12 finish episode, final reward: -227.6893, current episode: 13 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 13 finish episode, final reward: -188.8428, current episode: 14 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 14 finish episode, final reward: -161.1233, current episode: 15 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 15 finish episode, final reward: -159.5803, current episode: 16 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 16 finish episode, final reward: -190.9718, current episode: 17 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 17 finish episode, final reward: -199.8528, current episode: 18 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 18 finish episode, final reward: -156.0153, current episode: 19 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 19 finish episode, final reward: -218.9500, current episode: 20 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 20 finish episode, final reward: -201.1823, current episode: 21 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 21 finish episode, final reward: -205.2549, current episode: 22 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 22 finish episode, final reward: -196.0353, current episode: 23 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 23 finish episode, final reward: -118.7018, current episode: 24 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 24 finish episode, final reward: -159.6599, current episode: 25 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 25 finish episode, final reward: -246.2920, current episode: 26 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 26 finish episode, final reward: -133.1812, current episode: 27 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 27 finish episode, final reward: -177.1757, current episode: 28 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 28 finish episode, final reward: -151.2742, current episode: 29 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 29 finish episode, final reward: -165.1011, current episode: 30 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 30 finish episode, final reward: -163.0401, current episode: 31 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 31 finish episode, final reward: -262.5036, current episode: 32 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 32 finish episode, final reward: -186.6458, current episode: 33 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 33 finish episode, final reward: -182.9847, current episode: 34 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 34 finish episode, final reward: -161.4581, current episode: 35 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 35 finish episode, final reward: -183.3268, current episode: 36 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 36 finish episode, final reward: -159.0000, current episode: 37 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 37 finish episode, final reward: -214.4334, current episode: 38 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 38 finish episode, final reward: -148.6892, current episode: 39 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 39 finish episode, final reward: -158.9851, current episode: 40 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 40 finish episode, final reward: -144.9855, current episode: 41 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 41 finish episode, final reward: -152.7610, current episode: 42 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 42 finish episode, final reward: -165.6172, current episode: 43 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 43 finish episode, final reward: -156.9933, current episode: 44 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 44 finish episode, final reward: -211.0759, current episode: 45 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 45 finish episode, final reward: -144.5440, current episode: 46 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 46 finish episode, final reward: -147.1920, current episode: 47 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 47 finish episode, final reward: -184.4923, current episode: 48 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 48 finish episode, final reward: -184.7551, current episode: 49 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 49 finish episode, final reward: -175.9292, current episode: 50 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 50 finish episode, final reward: -187.3951, current episode: 51 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 51 finish episode, final reward: -141.5677, current episode: 52 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 52 finish episode, final reward: -200.6345, current episode: 53 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 53 finish episode, final reward: -181.8468, current episode: 54 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 54 finish episode, final reward: -211.6700, current episode: 55 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 55 finish episode, final reward: -146.9053, current episode: 56 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 56 finish episode, final reward: -166.2490, current episode: 57 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 57 finish episode, final reward: -143.7787, current episode: 58 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 58 finish episode, final reward: -166.1858, current episode: 59 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 59 finish episode, final reward: -161.8443, current episode: 60 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 60 finish episode, final reward: -181.1017, current episode: 61 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 61 finish episode, final reward: -190.3309, current episode: 62 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 62 finish episode, final reward: -153.2419, current episode: 63 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 63 finish episode, final reward: -138.5418, current episode: 64 +[2023-04-29 14:41:09][interaction_serial_evaluator.py:279][INFO] ++-------+-------------+------------------------+---------------+---------------+ +| Name | train_iter | ckpt_name | episode_count | envstep_count | ++-------+-------------+------------------------+---------------+---------------+ +| Value | 1000.000000 | iteration_1000.pth.tar | 64.000000 | 1600.000000 | ++-------+-------------+------------------------+---------------+---------------+ ++-------+-------------------------+---------------+---------------------+----------------------+ +| Name | avg_envstep_per_episode | evaluate_time | avg_envstep_per_sec | avg_time_per_episode | ++-------+-------------------------+---------------+---------------------+----------------------+ +| Value | 25.000000 | 0.763529 | 2095.534080 | 83.821363 | ++-------+-------------------------+---------------+---------------------+----------------------+ ++-------+-------------+------------+-------------+-------------+ +| Name | reward_mean | reward_std | reward_max | reward_min | ++-------+-------------+------------+-------------+-------------+ +| Value | -177.008442 | 29.346648 | -113.974457 | -262.503601 | ++-------+-------------+------------+-------------+-------------+ + + +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 0 finish episode, final reward: -192.7626, current episode: 1 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 1 finish episode, final reward: -158.1348, current episode: 2 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 2 finish episode, final reward: -151.7103, current episode: 3 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 3 finish episode, final reward: -152.1969, current episode: 4 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 4 finish episode, final reward: -114.4441, current episode: 5 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 5 finish episode, final reward: -219.4009, current episode: 6 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 6 finish episode, final reward: -213.4896, current episode: 7 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 7 finish episode, final reward: -179.2378, current episode: 8 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 8 finish episode, final reward: -159.5027, current episode: 9 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 9 finish episode, final reward: -204.0739, current episode: 10 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 10 finish episode, final reward: -196.4718, current episode: 11 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 11 finish episode, final reward: -219.1651, current episode: 12 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 12 finish episode, final reward: -216.3454, current episode: 13 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 13 finish episode, final reward: -188.8495, current episode: 14 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 14 finish episode, final reward: -161.1233, current episode: 15 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 15 finish episode, final reward: -157.4092, current episode: 16 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 16 finish episode, final reward: -171.5723, current episode: 17 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 17 finish episode, final reward: -199.8910, current episode: 18 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 18 finish episode, final reward: -140.8940, current episode: 19 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 19 finish episode, final reward: -212.8246, current episode: 20 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 20 finish episode, final reward: -202.4429, current episode: 21 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 21 finish episode, final reward: -205.0907, current episode: 22 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 22 finish episode, final reward: -200.7895, current episode: 23 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 23 finish episode, final reward: -118.7018, current episode: 24 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 24 finish episode, final reward: -166.4461, current episode: 25 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 25 finish episode, final reward: -241.4861, current episode: 26 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 26 finish episode, final reward: -133.5219, current episode: 27 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 27 finish episode, final reward: -168.7478, current episode: 28 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 28 finish episode, final reward: -140.7315, current episode: 29 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 29 finish episode, final reward: -165.1011, current episode: 30 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 30 finish episode, final reward: -147.6968, current episode: 31 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 31 finish episode, final reward: -230.5580, current episode: 32 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 32 finish episode, final reward: -206.0666, current episode: 33 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 33 finish episode, final reward: -186.6446, current episode: 34 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 34 finish episode, final reward: -164.2527, current episode: 35 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 35 finish episode, final reward: -175.6511, current episode: 36 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 36 finish episode, final reward: -156.0917, current episode: 37 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 37 finish episode, final reward: -211.5872, current episode: 38 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 38 finish episode, final reward: -147.9948, current episode: 39 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 39 finish episode, final reward: -137.8004, current episode: 40 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 40 finish episode, final reward: -135.0125, current episode: 41 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 41 finish episode, final reward: -143.0553, current episode: 42 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 42 finish episode, final reward: -161.1012, current episode: 43 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 43 finish episode, final reward: -163.6646, current episode: 44 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 44 finish episode, final reward: -214.3938, current episode: 45 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 45 finish episode, final reward: -137.6972, current episode: 46 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 46 finish episode, final reward: -153.3660, current episode: 47 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 47 finish episode, final reward: -183.4200, current episode: 48 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 48 finish episode, final reward: -187.6818, current episode: 49 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 49 finish episode, final reward: -167.9897, current episode: 50 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 50 finish episode, final reward: -193.8446, current episode: 51 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 51 finish episode, final reward: -141.5677, current episode: 52 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 52 finish episode, final reward: -212.8944, current episode: 53 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 53 finish episode, final reward: -181.6261, current episode: 54 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 54 finish episode, final reward: -209.9026, current episode: 55 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 55 finish episode, final reward: -148.5122, current episode: 56 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 56 finish episode, final reward: -164.4797, current episode: 57 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 57 finish episode, final reward: -139.2711, current episode: 58 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 58 finish episode, final reward: -168.7156, current episode: 59 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 59 finish episode, final reward: -161.8443, current episode: 60 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 60 finish episode, final reward: -176.3865, current episode: 61 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 61 finish episode, final reward: -179.7322, current episode: 62 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 62 finish episode, final reward: -144.8326, current episode: 63 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 63 finish episode, final reward: -135.8960, current episode: 64 +[2023-04-29 14:43:37][interaction_serial_evaluator.py:279][INFO] ++-------+-------------+------------------------+---------------+---------------+ +| Name | train_iter | ckpt_name | episode_count | envstep_count | ++-------+-------------+------------------------+---------------+---------------+ +| Value | 1050.000000 | iteration_1050.pth.tar | 64.000000 | 1600.000000 | ++-------+-------------+------------------------+---------------+---------------+ ++-------+-------------------------+---------------+---------------------+----------------------+ +| Name | avg_envstep_per_episode | evaluate_time | avg_envstep_per_sec | avg_time_per_episode | ++-------+-------------------------+---------------+---------------------+----------------------+ +| Value | 25.000000 | 0.709157 | 2256.200411 | 90.248016 | ++-------+-------------------------+---------------+---------------------+----------------------+ ++-------+-------------+------------+-------------+-------------+ +| Name | reward_mean | reward_std | reward_max | reward_min | ++-------+-------------+------------+-------------+-------------+ +| Value | -173.809232 | 29.389850 | -114.444099 | -241.486115 | ++-------+-------------+------------+-------------+-------------+ + + +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 0 finish episode, final reward: -183.3539, current episode: 1 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 1 finish episode, final reward: -155.2880, current episode: 2 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 2 finish episode, final reward: -151.7103, current episode: 3 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 3 finish episode, final reward: -166.8356, current episode: 4 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 4 finish episode, final reward: -113.9745, current episode: 5 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 5 finish episode, final reward: -218.7023, current episode: 6 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 6 finish episode, final reward: -213.4896, current episode: 7 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 7 finish episode, final reward: -179.2378, current episode: 8 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 8 finish episode, final reward: -159.9526, current episode: 9 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 9 finish episode, final reward: -207.5976, current episode: 10 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 10 finish episode, final reward: -196.4718, current episode: 11 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 11 finish episode, final reward: -218.2513, current episode: 12 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 12 finish episode, final reward: -213.0169, current episode: 13 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 13 finish episode, final reward: -183.6299, current episode: 14 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 14 finish episode, final reward: -161.1233, current episode: 15 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 15 finish episode, final reward: -148.9031, current episode: 16 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 16 finish episode, final reward: -177.3881, current episode: 17 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 17 finish episode, final reward: -200.7612, current episode: 18 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 18 finish episode, final reward: -146.5195, current episode: 19 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 19 finish episode, final reward: -212.8246, current episode: 20 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 20 finish episode, final reward: -210.3878, current episode: 21 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 21 finish episode, final reward: -205.2549, current episode: 22 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 22 finish episode, final reward: -200.7895, current episode: 23 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 23 finish episode, final reward: -124.2646, current episode: 24 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 24 finish episode, final reward: -159.6286, current episode: 25 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 25 finish episode, final reward: -252.1357, current episode: 26 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 26 finish episode, final reward: -133.5953, current episode: 27 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 27 finish episode, final reward: -165.1017, current episode: 28 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 28 finish episode, final reward: -138.8225, current episode: 29 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 29 finish episode, final reward: -168.5965, current episode: 30 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 30 finish episode, final reward: -162.2838, current episode: 31 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 31 finish episode, final reward: -250.2940, current episode: 32 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 32 finish episode, final reward: -195.8542, current episode: 33 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 33 finish episode, final reward: -186.6446, current episode: 34 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 34 finish episode, final reward: -164.2527, current episode: 35 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 35 finish episode, final reward: -180.9464, current episode: 36 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 36 finish episode, final reward: -156.0917, current episode: 37 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 37 finish episode, final reward: -201.9400, current episode: 38 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 38 finish episode, final reward: -147.9948, current episode: 39 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 39 finish episode, final reward: -149.8199, current episode: 40 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 40 finish episode, final reward: -153.6921, current episode: 41 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 41 finish episode, final reward: -157.2619, current episode: 42 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 42 finish episode, final reward: -160.6048, current episode: 43 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 43 finish episode, final reward: -161.5814, current episode: 44 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 44 finish episode, final reward: -211.8438, current episode: 45 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 45 finish episode, final reward: -137.6972, current episode: 46 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 46 finish episode, final reward: -153.3660, current episode: 47 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 47 finish episode, final reward: -177.0210, current episode: 48 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 48 finish episode, final reward: -187.6818, current episode: 49 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 49 finish episode, final reward: -167.9897, current episode: 50 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 50 finish episode, final reward: -193.4241, current episode: 51 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 51 finish episode, final reward: -145.3812, current episode: 52 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 52 finish episode, final reward: -205.8456, current episode: 53 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 53 finish episode, final reward: -178.9525, current episode: 54 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 54 finish episode, final reward: -209.9026, current episode: 55 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 55 finish episode, final reward: -155.6166, current episode: 56 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 56 finish episode, final reward: -164.4261, current episode: 57 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 57 finish episode, final reward: -139.2575, current episode: 58 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 58 finish episode, final reward: -150.7935, current episode: 59 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 59 finish episode, final reward: -161.8443, current episode: 60 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 60 finish episode, final reward: -181.2119, current episode: 61 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 61 finish episode, final reward: -179.1245, current episode: 62 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 62 finish episode, final reward: -144.8326, current episode: 63 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 63 finish episode, final reward: -127.4616, current episode: 64 +[2023-04-29 14:46:08][interaction_serial_evaluator.py:279][INFO] ++-------+-------------+------------------------+---------------+---------------+ +| Name | train_iter | ckpt_name | episode_count | envstep_count | ++-------+-------------+------------------------+---------------+---------------+ +| Value | 1100.000000 | iteration_1100.pth.tar | 64.000000 | 1600.000000 | ++-------+-------------+------------------------+---------------+---------------+ ++-------+-------------------------+---------------+---------------------+----------------------+ +| Name | avg_envstep_per_episode | evaluate_time | avg_envstep_per_sec | avg_time_per_episode | ++-------+-------------------------+---------------+---------------------+----------------------+ +| Value | 25.000000 | 0.806099 | 1984.867565 | 79.394703 | ++-------+-------------------------+---------------+---------------------+----------------------+ ++-------+-------------+------------+-------------+-------------+ +| Name | reward_mean | reward_std | reward_max | reward_min | ++-------+-------------+------------+-------------+-------------+ +| Value | -174.539770 | 29.209699 | -113.974457 | -252.135712 | ++-------+-------------+------------+-------------+-------------+ + + +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 0 finish episode, final reward: -183.3539, current episode: 1 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 1 finish episode, final reward: -155.2880, current episode: 2 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 2 finish episode, final reward: -139.8352, current episode: 3 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 3 finish episode, final reward: -157.0084, current episode: 4 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 4 finish episode, final reward: -113.9745, current episode: 5 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 5 finish episode, final reward: -218.8896, current episode: 6 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 6 finish episode, final reward: -213.4896, current episode: 7 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 7 finish episode, final reward: -182.8661, current episode: 8 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 8 finish episode, final reward: -159.1038, current episode: 9 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 9 finish episode, final reward: -207.5976, current episode: 10 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 10 finish episode, final reward: -206.2426, current episode: 11 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 11 finish episode, final reward: -218.2158, current episode: 12 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 12 finish episode, final reward: -213.5410, current episode: 13 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 13 finish episode, final reward: -191.1060, current episode: 14 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 14 finish episode, final reward: -161.1233, current episode: 15 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 15 finish episode, final reward: -148.9031, current episode: 16 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 16 finish episode, final reward: -174.3486, current episode: 17 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 17 finish episode, final reward: -194.2280, current episode: 18 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 18 finish episode, final reward: -158.0285, current episode: 19 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 19 finish episode, final reward: -217.3980, current episode: 20 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 20 finish episode, final reward: -210.3878, current episode: 21 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 21 finish episode, final reward: -200.6059, current episode: 22 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 22 finish episode, final reward: -196.8128, current episode: 23 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 23 finish episode, final reward: -132.1120, current episode: 24 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 24 finish episode, final reward: -159.9019, current episode: 25 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 25 finish episode, final reward: -246.9557, current episode: 26 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 26 finish episode, final reward: -130.5500, current episode: 27 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 27 finish episode, final reward: -168.5249, current episode: 28 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 28 finish episode, final reward: -138.8225, current episode: 29 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 29 finish episode, final reward: -168.5965, current episode: 30 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 30 finish episode, final reward: -162.2838, current episode: 31 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 31 finish episode, final reward: -240.4890, current episode: 32 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 32 finish episode, final reward: -186.5118, current episode: 33 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 33 finish episode, final reward: -186.6446, current episode: 34 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 34 finish episode, final reward: -164.2527, current episode: 35 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 35 finish episode, final reward: -180.9464, current episode: 36 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 36 finish episode, final reward: -156.0917, current episode: 37 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 37 finish episode, final reward: -202.9936, current episode: 38 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 38 finish episode, final reward: -147.9948, current episode: 39 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 39 finish episode, final reward: -150.0519, current episode: 40 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 40 finish episode, final reward: -156.7593, current episode: 41 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 41 finish episode, final reward: -143.0215, current episode: 42 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 42 finish episode, final reward: -165.6172, current episode: 43 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 43 finish episode, final reward: -168.2603, current episode: 44 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 44 finish episode, final reward: -222.0362, current episode: 45 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 45 finish episode, final reward: -137.6972, current episode: 46 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 46 finish episode, final reward: -158.3499, current episode: 47 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 47 finish episode, final reward: -172.7937, current episode: 48 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 48 finish episode, final reward: -186.8172, current episode: 49 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 49 finish episode, final reward: -167.9897, current episode: 50 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 50 finish episode, final reward: -194.0005, current episode: 51 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 51 finish episode, final reward: -148.5913, current episode: 52 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 52 finish episode, final reward: -208.6570, current episode: 53 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 53 finish episode, final reward: -179.3147, current episode: 54 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 54 finish episode, final reward: -204.2646, current episode: 55 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 55 finish episode, final reward: -148.5122, current episode: 56 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 56 finish episode, final reward: -182.5685, current episode: 57 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 57 finish episode, final reward: -139.0616, current episode: 58 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 58 finish episode, final reward: -162.2092, current episode: 59 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 59 finish episode, final reward: -173.8398, current episode: 60 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 60 finish episode, final reward: -176.1559, current episode: 61 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 61 finish episode, final reward: -170.9073, current episode: 62 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 62 finish episode, final reward: -144.8326, current episode: 63 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 63 finish episode, final reward: -135.8960, current episode: 64 +[2023-04-29 14:48:39][interaction_serial_evaluator.py:279][INFO] ++-------+-------------+------------------------+---------------+---------------+ +| Name | train_iter | ckpt_name | episode_count | envstep_count | ++-------+-------------+------------------------+---------------+---------------+ +| Value | 1150.000000 | iteration_1150.pth.tar | 64.000000 | 1600.000000 | ++-------+-------------+------------------------+---------------+---------------+ ++-------+-------------------------+---------------+---------------------+----------------------+ +| Name | avg_envstep_per_episode | evaluate_time | avg_envstep_per_sec | avg_time_per_episode | ++-------+-------------------------+---------------+---------------------+----------------------+ +| Value | 25.000000 | 0.970210 | 1649.126952 | 65.965078 | ++-------+-------------------------+---------------+---------------------+----------------------+ ++-------+-------------+------------+-------------+-------------+ +| Name | reward_mean | reward_std | reward_max | reward_min | ++-------+-------------+------------+-------------+-------------+ +| Value | -174.909772 | 28.533461 | -113.974457 | -246.955658 | ++-------+-------------+------------+-------------+-------------+ + + +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 0 finish episode, final reward: -183.3539, current episode: 1 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 1 finish episode, final reward: -147.9039, current episode: 2 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 2 finish episode, final reward: -155.1293, current episode: 3 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 3 finish episode, final reward: -159.7561, current episode: 4 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 4 finish episode, final reward: -113.9745, current episode: 5 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 5 finish episode, final reward: -208.7302, current episode: 6 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 6 finish episode, final reward: -213.4896, current episode: 7 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 7 finish episode, final reward: -179.2378, current episode: 8 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 8 finish episode, final reward: -159.5027, current episode: 9 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 9 finish episode, final reward: -208.0907, current episode: 10 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 10 finish episode, final reward: -209.7922, current episode: 11 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 11 finish episode, final reward: -217.9274, current episode: 12 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 12 finish episode, final reward: -217.0124, current episode: 13 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 13 finish episode, final reward: -182.7017, current episode: 14 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 14 finish episode, final reward: -161.1233, current episode: 15 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 15 finish episode, final reward: -153.3026, current episode: 16 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 16 finish episode, final reward: -186.4329, current episode: 17 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 17 finish episode, final reward: -199.9807, current episode: 18 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 18 finish episode, final reward: -154.8893, current episode: 19 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 19 finish episode, final reward: -217.6614, current episode: 20 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 20 finish episode, final reward: -217.4373, current episode: 21 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 21 finish episode, final reward: -205.2549, current episode: 22 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 22 finish episode, final reward: -194.2063, current episode: 23 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 23 finish episode, final reward: -134.1752, current episode: 24 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 24 finish episode, final reward: -162.9932, current episode: 25 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 25 finish episode, final reward: -243.9038, current episode: 26 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 26 finish episode, final reward: -137.0957, current episode: 27 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 27 finish episode, final reward: -174.1264, current episode: 28 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 28 finish episode, final reward: -138.8225, current episode: 29 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 29 finish episode, final reward: -179.3573, current episode: 30 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 30 finish episode, final reward: -147.6968, current episode: 31 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 31 finish episode, final reward: -233.0936, current episode: 32 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 32 finish episode, final reward: -186.3932, current episode: 33 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 33 finish episode, final reward: -186.6446, current episode: 34 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 34 finish episode, final reward: -171.5744, current episode: 35 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 35 finish episode, final reward: -175.6511, current episode: 36 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 36 finish episode, final reward: -156.0917, current episode: 37 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 37 finish episode, final reward: -203.8547, current episode: 38 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 38 finish episode, final reward: -157.3960, current episode: 39 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 39 finish episode, final reward: -150.0519, current episode: 40 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 40 finish episode, final reward: -142.9386, current episode: 41 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 41 finish episode, final reward: -149.0013, current episode: 42 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 42 finish episode, final reward: -165.5638, current episode: 43 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 43 finish episode, final reward: -159.9184, current episode: 44 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 44 finish episode, final reward: -209.0801, current episode: 45 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 45 finish episode, final reward: -137.6972, current episode: 46 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 46 finish episode, final reward: -158.3499, current episode: 47 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 47 finish episode, final reward: -172.7937, current episode: 48 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 48 finish episode, final reward: -186.8172, current episode: 49 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 49 finish episode, final reward: -171.3752, current episode: 50 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 50 finish episode, final reward: -194.5736, current episode: 51 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 51 finish episode, final reward: -145.3812, current episode: 52 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 52 finish episode, final reward: -213.8682, current episode: 53 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 53 finish episode, final reward: -179.3147, current episode: 54 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 54 finish episode, final reward: -204.4239, current episode: 55 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 55 finish episode, final reward: -146.9053, current episode: 56 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 56 finish episode, final reward: -164.4261, current episode: 57 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 57 finish episode, final reward: -145.2769, current episode: 58 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 58 finish episode, final reward: -161.8499, current episode: 59 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 59 finish episode, final reward: -161.7421, current episode: 60 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 60 finish episode, final reward: -180.0790, current episode: 61 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 61 finish episode, final reward: -168.1252, current episode: 62 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 62 finish episode, final reward: -144.8326, current episode: 63 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 63 finish episode, final reward: -135.8960, current episode: 64 +[2023-04-29 14:50:38][interaction_serial_evaluator.py:279][INFO] ++-------+-------------+------------------------+---------------+---------------+ +| Name | train_iter | ckpt_name | episode_count | envstep_count | ++-------+-------------+------------------------+---------------+---------------+ +| Value | 1200.000000 | iteration_1200.pth.tar | 64.000000 | 1600.000000 | ++-------+-------------+------------------------+---------------+---------------+ ++-------+-------------------------+---------------+---------------------+----------------------+ +| Name | avg_envstep_per_episode | evaluate_time | avg_envstep_per_sec | avg_time_per_episode | ++-------+-------------------------+---------------+---------------------+----------------------+ +| Value | 25.000000 | 0.856177 | 1868.771392 | 74.750856 | ++-------+-------------------------+---------------+---------------------+----------------------+ ++-------+-------------+------------+-------------+-------------+ +| Name | reward_mean | reward_std | reward_max | reward_min | ++-------+-------------+------------+-------------+-------------+ +| Value | -174.781929 | 27.918213 | -113.974457 | -243.903793 | ++-------+-------------+------------+-------------+-------------+ + + +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 0 finish episode, final reward: -183.3539, current episode: 1 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 1 finish episode, final reward: -147.9039, current episode: 2 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 2 finish episode, final reward: -155.1293, current episode: 3 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 3 finish episode, final reward: -158.7406, current episode: 4 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 4 finish episode, final reward: -113.9745, current episode: 5 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 5 finish episode, final reward: -211.2402, current episode: 6 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 6 finish episode, final reward: -213.4896, current episode: 7 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 7 finish episode, final reward: -179.2378, current episode: 8 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 8 finish episode, final reward: -158.7546, current episode: 9 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 9 finish episode, final reward: -207.5976, current episode: 10 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 10 finish episode, final reward: -196.4718, current episode: 11 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 11 finish episode, final reward: -217.5658, current episode: 12 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 12 finish episode, final reward: -213.0169, current episode: 13 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 13 finish episode, final reward: -184.9471, current episode: 14 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 14 finish episode, final reward: -156.6470, current episode: 15 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 15 finish episode, final reward: -148.0165, current episode: 16 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 16 finish episode, final reward: -174.3486, current episode: 17 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 17 finish episode, final reward: -199.2143, current episode: 18 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 18 finish episode, final reward: -154.8893, current episode: 19 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 19 finish episode, final reward: -219.9733, current episode: 20 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 20 finish episode, final reward: -202.8726, current episode: 21 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 21 finish episode, final reward: -193.4885, current episode: 22 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 22 finish episode, final reward: -201.8015, current episode: 23 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 23 finish episode, final reward: -124.2646, current episode: 24 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 24 finish episode, final reward: -155.5410, current episode: 25 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 25 finish episode, final reward: -245.4584, current episode: 26 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 26 finish episode, final reward: -124.6247, current episode: 27 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 27 finish episode, final reward: -173.8985, current episode: 28 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 28 finish episode, final reward: -138.6090, current episode: 29 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 29 finish episode, final reward: -179.3573, current episode: 30 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 30 finish episode, final reward: -150.2042, current episode: 31 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 31 finish episode, final reward: -241.1252, current episode: 32 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 32 finish episode, final reward: -179.4966, current episode: 33 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 33 finish episode, final reward: -184.6727, current episode: 34 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 34 finish episode, final reward: -164.1597, current episode: 35 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 35 finish episode, final reward: -165.0645, current episode: 36 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 36 finish episode, final reward: -156.0917, current episode: 37 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 37 finish episode, final reward: -199.9833, current episode: 38 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 38 finish episode, final reward: -148.6892, current episode: 39 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 39 finish episode, final reward: -138.0324, current episode: 40 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 40 finish episode, final reward: -142.9386, current episode: 41 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 41 finish episode, final reward: -156.7160, current episode: 42 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 42 finish episode, final reward: -165.6172, current episode: 43 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 43 finish episode, final reward: -153.5998, current episode: 44 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 44 finish episode, final reward: -207.9207, current episode: 45 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 45 finish episode, final reward: -137.6972, current episode: 46 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 46 finish episode, final reward: -150.9262, current episode: 47 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 47 finish episode, final reward: -182.7274, current episode: 48 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 48 finish episode, final reward: -186.8172, current episode: 49 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 49 finish episode, final reward: -167.9897, current episode: 50 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 50 finish episode, final reward: -186.3110, current episode: 51 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 51 finish episode, final reward: -145.3812, current episode: 52 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 52 finish episode, final reward: -208.6570, current episode: 53 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 53 finish episode, final reward: -181.0365, current episode: 54 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 54 finish episode, final reward: -212.9537, current episode: 55 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 55 finish episode, final reward: -138.8726, current episode: 56 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 56 finish episode, final reward: -167.1122, current episode: 57 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 57 finish episode, final reward: -144.2377, current episode: 58 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 58 finish episode, final reward: -162.0726, current episode: 59 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 59 finish episode, final reward: -161.7421, current episode: 60 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 60 finish episode, final reward: -175.5101, current episode: 61 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 61 finish episode, final reward: -167.2765, current episode: 62 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 62 finish episode, final reward: -144.8326, current episode: 63 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 63 finish episode, final reward: -135.8960, current episode: 64 +[2023-04-29 14:52:35][interaction_serial_evaluator.py:279][INFO] ++-------+-------------+------------------------+---------------+---------------+ +| Name | train_iter | ckpt_name | episode_count | envstep_count | ++-------+-------------+------------------------+---------------+---------------+ +| Value | 1250.000000 | iteration_1250.pth.tar | 64.000000 | 1600.000000 | ++-------+-------------+------------------------+---------------+---------------+ ++-------+-------------------------+---------------+---------------------+----------------------+ +| Name | avg_envstep_per_episode | evaluate_time | avg_envstep_per_sec | avg_time_per_episode | ++-------+-------------------------+---------------+---------------------+----------------------+ +| Value | 25.000000 | 0.827138 | 1934.380675 | 77.375227 | ++-------+-------------------------+---------------+---------------------+----------------------+ ++-------+-------------+------------+-------------+-------------+ +| Name | reward_mean | reward_std | reward_max | reward_min | ++-------+-------------+------------+-------------+-------------+ +| Value | -172.606087 | 28.731556 | -113.974457 | -245.458405 | ++-------+-------------+------------+-------------+-------------+ + + +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 0 finish episode, final reward: -183.3539, current episode: 1 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 1 finish episode, final reward: -155.7400, current episode: 2 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 2 finish episode, final reward: -144.2926, current episode: 3 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 3 finish episode, final reward: -150.0185, current episode: 4 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 4 finish episode, final reward: -113.9745, current episode: 5 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 5 finish episode, final reward: -219.4052, current episode: 6 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 6 finish episode, final reward: -213.4896, current episode: 7 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 7 finish episode, final reward: -178.4157, current episode: 8 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 8 finish episode, final reward: -157.8074, current episode: 9 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 9 finish episode, final reward: -207.5976, current episode: 10 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 10 finish episode, final reward: -196.4718, current episode: 11 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 11 finish episode, final reward: -217.3127, current episode: 12 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 12 finish episode, final reward: -213.5410, current episode: 13 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 13 finish episode, final reward: -189.6893, current episode: 14 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 14 finish episode, final reward: -156.6470, current episode: 15 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 15 finish episode, final reward: -148.9031, current episode: 16 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 16 finish episode, final reward: -186.5400, current episode: 17 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 17 finish episode, final reward: -180.7221, current episode: 18 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 18 finish episode, final reward: -154.8893, current episode: 19 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 19 finish episode, final reward: -218.9500, current episode: 20 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 20 finish episode, final reward: -183.5544, current episode: 21 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 21 finish episode, final reward: -205.0907, current episode: 22 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 22 finish episode, final reward: -201.8015, current episode: 23 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 23 finish episode, final reward: -119.9998, current episode: 24 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 24 finish episode, final reward: -151.4033, current episode: 25 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 25 finish episode, final reward: -246.8646, current episode: 26 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 26 finish episode, final reward: -122.7166, current episode: 27 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 27 finish episode, final reward: -163.9125, current episode: 28 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 28 finish episode, final reward: -138.8225, current episode: 29 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 29 finish episode, final reward: -165.7623, current episode: 30 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 30 finish episode, final reward: -151.4237, current episode: 31 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 31 finish episode, final reward: -234.7980, current episode: 32 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 32 finish episode, final reward: -173.6848, current episode: 33 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 33 finish episode, final reward: -182.9847, current episode: 34 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 34 finish episode, final reward: -164.2527, current episode: 35 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 35 finish episode, final reward: -153.3035, current episode: 36 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 36 finish episode, final reward: -156.0917, current episode: 37 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 37 finish episode, final reward: -198.6385, current episode: 38 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 38 finish episode, final reward: -159.8161, current episode: 39 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 39 finish episode, final reward: -159.8939, current episode: 40 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 40 finish episode, final reward: -152.9554, current episode: 41 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 41 finish episode, final reward: -156.7160, current episode: 42 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 42 finish episode, final reward: -167.3829, current episode: 43 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 43 finish episode, final reward: -153.5998, current episode: 44 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 44 finish episode, final reward: -202.3308, current episode: 45 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 45 finish episode, final reward: -137.6972, current episode: 46 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 46 finish episode, final reward: -144.5469, current episode: 47 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 47 finish episode, final reward: -172.7937, current episode: 48 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 48 finish episode, final reward: -187.3594, current episode: 49 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 49 finish episode, final reward: -167.9897, current episode: 50 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 50 finish episode, final reward: -194.0005, current episode: 51 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 51 finish episode, final reward: -145.3812, current episode: 52 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 52 finish episode, final reward: -200.6345, current episode: 53 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 53 finish episode, final reward: -179.3147, current episode: 54 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 54 finish episode, final reward: -209.9026, current episode: 55 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 55 finish episode, final reward: -138.8726, current episode: 56 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 56 finish episode, final reward: -180.6034, current episode: 57 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 57 finish episode, final reward: -142.3003, current episode: 58 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 58 finish episode, final reward: -167.1353, current episode: 59 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 59 finish episode, final reward: -161.7421, current episode: 60 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 60 finish episode, final reward: -186.2552, current episode: 61 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 61 finish episode, final reward: -167.2765, current episode: 62 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 62 finish episode, final reward: -144.8326, current episode: 63 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 63 finish episode, final reward: -135.8960, current episode: 64 +[2023-04-29 14:54:45][interaction_serial_evaluator.py:279][INFO] ++-------+-------------+------------------------+---------------+---------------+ +| Name | train_iter | ckpt_name | episode_count | envstep_count | ++-------+-------------+------------------------+---------------+---------------+ +| Value | 1300.000000 | iteration_1300.pth.tar | 64.000000 | 1600.000000 | ++-------+-------------+------------------------+---------------+---------------+ ++-------+-------------------------+---------------+---------------------+----------------------+ +| Name | avg_envstep_per_episode | evaluate_time | avg_envstep_per_sec | avg_time_per_episode | ++-------+-------------------------+---------------+---------------------+----------------------+ +| Value | 25.000000 | 0.850599 | 1881.026562 | 75.241062 | ++-------+-------------------------+---------------+---------------------+----------------------+ ++-------+-------------+------------+-------------+-------------+ +| Name | reward_mean | reward_std | reward_max | reward_min | ++-------+-------------+------------+-------------+-------------+ +| Value | -172.189004 | 28.335101 | -113.974457 | -246.864624 | ++-------+-------------+------------+-------------+-------------+ + + +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 0 finish episode, final reward: -183.3539, current episode: 1 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 1 finish episode, final reward: -155.7771, current episode: 2 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 2 finish episode, final reward: -138.7260, current episode: 3 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 3 finish episode, final reward: -153.1797, current episode: 4 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 4 finish episode, final reward: -110.1182, current episode: 5 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 5 finish episode, final reward: -203.3348, current episode: 6 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 6 finish episode, final reward: -213.4896, current episode: 7 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 7 finish episode, final reward: -178.4392, current episode: 8 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 8 finish episode, final reward: -157.8074, current episode: 9 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 9 finish episode, final reward: -207.5976, current episode: 10 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 10 finish episode, final reward: -196.4718, current episode: 11 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 11 finish episode, final reward: -222.8595, current episode: 12 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 12 finish episode, final reward: -213.0169, current episode: 13 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 13 finish episode, final reward: -189.3464, current episode: 14 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 14 finish episode, final reward: -157.2435, current episode: 15 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 15 finish episode, final reward: -148.5605, current episode: 16 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 16 finish episode, final reward: -174.3486, current episode: 17 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 17 finish episode, final reward: -180.7221, current episode: 18 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 18 finish episode, final reward: -154.8893, current episode: 19 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 19 finish episode, final reward: -218.9500, current episode: 20 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 20 finish episode, final reward: -191.3717, current episode: 21 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 21 finish episode, final reward: -205.2549, current episode: 22 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 22 finish episode, final reward: -190.6321, current episode: 23 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 23 finish episode, final reward: -121.1117, current episode: 24 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 24 finish episode, final reward: -156.7779, current episode: 25 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 25 finish episode, final reward: -240.3455, current episode: 26 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 26 finish episode, final reward: -124.6563, current episode: 27 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 27 finish episode, final reward: -164.8738, current episode: 28 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 28 finish episode, final reward: -138.8225, current episode: 29 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 29 finish episode, final reward: -168.5965, current episode: 30 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 30 finish episode, final reward: -147.6968, current episode: 31 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 31 finish episode, final reward: -231.8783, current episode: 32 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 32 finish episode, final reward: -173.6848, current episode: 33 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 33 finish episode, final reward: -182.9847, current episode: 34 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 34 finish episode, final reward: -161.4823, current episode: 35 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 35 finish episode, final reward: -161.0175, current episode: 36 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 36 finish episode, final reward: -159.0000, current episode: 37 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 37 finish episode, final reward: -194.7262, current episode: 38 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 38 finish episode, final reward: -148.6892, current episode: 39 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 39 finish episode, final reward: -125.4887, current episode: 40 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 40 finish episode, final reward: -143.0007, current episode: 41 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 41 finish episode, final reward: -156.7160, current episode: 42 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 42 finish episode, final reward: -165.5638, current episode: 43 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 43 finish episode, final reward: -153.5998, current episode: 44 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 44 finish episode, final reward: -188.2004, current episode: 45 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 45 finish episode, final reward: -137.6972, current episode: 46 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 46 finish episode, final reward: -136.7483, current episode: 47 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 47 finish episode, final reward: -177.0210, current episode: 48 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 48 finish episode, final reward: -187.3909, current episode: 49 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 49 finish episode, final reward: -167.9897, current episode: 50 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 50 finish episode, final reward: -194.5736, current episode: 51 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 51 finish episode, final reward: -136.5013, current episode: 52 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 52 finish episode, final reward: -200.6345, current episode: 53 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 53 finish episode, final reward: -178.5875, current episode: 54 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 54 finish episode, final reward: -201.6701, current episode: 55 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 55 finish episode, final reward: -138.8726, current episode: 56 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 56 finish episode, final reward: -160.9397, current episode: 57 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 57 finish episode, final reward: -138.3115, current episode: 58 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 58 finish episode, final reward: -152.2713, current episode: 59 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 59 finish episode, final reward: -161.7421, current episode: 60 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 60 finish episode, final reward: -170.4413, current episode: 61 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 61 finish episode, final reward: -143.6270, current episode: 62 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 62 finish episode, final reward: -144.8326, current episode: 63 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 63 finish episode, final reward: -130.1074, current episode: 64 +[2023-04-29 14:57:12][interaction_serial_evaluator.py:279][INFO] ++-------+-------------+------------------------+---------------+---------------+ +| Name | train_iter | ckpt_name | episode_count | envstep_count | ++-------+-------------+------------------------+---------------+---------------+ +| Value | 1350.000000 | iteration_1350.pth.tar | 64.000000 | 1600.000000 | ++-------+-------------+------------------------+---------------+---------------+ ++-------+-------------------------+---------------+---------------------+----------------------+ +| Name | avg_envstep_per_episode | evaluate_time | avg_envstep_per_sec | avg_time_per_episode | ++-------+-------------------------+---------------+---------------------+----------------------+ +| Value | 25.000000 | 0.897986 | 1781.765653 | 71.270626 | ++-------+-------------------------+---------------+---------------------+----------------------+ ++-------+-------------+------------+-------------+-------------+ +| Name | reward_mean | reward_std | reward_max | reward_min | ++-------+-------------+------------+-------------+-------------+ +| Value | -168.974434 | 28.654939 | -110.118217 | -240.345520 | ++-------+-------------+------------+-------------+-------------+ + + +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 0 finish episode, final reward: -183.3539, current episode: 1 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 1 finish episode, final reward: -164.1813, current episode: 2 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 2 finish episode, final reward: -138.7260, current episode: 3 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 3 finish episode, final reward: -155.8753, current episode: 4 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 4 finish episode, final reward: -110.1182, current episode: 5 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 5 finish episode, final reward: -202.9938, current episode: 6 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 6 finish episode, final reward: -213.4896, current episode: 7 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 7 finish episode, final reward: -178.4392, current episode: 8 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 8 finish episode, final reward: -157.8074, current episode: 9 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 9 finish episode, final reward: -204.0531, current episode: 10 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 10 finish episode, final reward: -196.4718, current episode: 11 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 11 finish episode, final reward: -206.5740, current episode: 12 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 12 finish episode, final reward: -215.4967, current episode: 13 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 13 finish episode, final reward: -189.2379, current episode: 14 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 14 finish episode, final reward: -157.2435, current episode: 15 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 15 finish episode, final reward: -141.6747, current episode: 16 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 16 finish episode, final reward: -174.3486, current episode: 17 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 17 finish episode, final reward: -161.7969, current episode: 18 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 18 finish episode, final reward: -154.8893, current episode: 19 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 19 finish episode, final reward: -218.9500, current episode: 20 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 20 finish episode, final reward: -190.7397, current episode: 21 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 21 finish episode, final reward: -193.4885, current episode: 22 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 22 finish episode, final reward: -190.6321, current episode: 23 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 23 finish episode, final reward: -114.1864, current episode: 24 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 24 finish episode, final reward: -161.7627, current episode: 25 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 25 finish episode, final reward: -239.4087, current episode: 26 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 26 finish episode, final reward: -120.1181, current episode: 27 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 27 finish episode, final reward: -164.1403, current episode: 28 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 28 finish episode, final reward: -138.8225, current episode: 29 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 29 finish episode, final reward: -168.5965, current episode: 30 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 30 finish episode, final reward: -147.6968, current episode: 31 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 31 finish episode, final reward: -235.6370, current episode: 32 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 32 finish episode, final reward: -175.3656, current episode: 33 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 33 finish episode, final reward: -181.0107, current episode: 34 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 34 finish episode, final reward: -160.8673, current episode: 35 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 35 finish episode, final reward: -152.3166, current episode: 36 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 36 finish episode, final reward: -156.0917, current episode: 37 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 37 finish episode, final reward: -186.1077, current episode: 38 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 38 finish episode, final reward: -159.8161, current episode: 39 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 39 finish episode, final reward: -128.4115, current episode: 40 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 40 finish episode, final reward: -153.9935, current episode: 41 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 41 finish episode, final reward: -156.7160, current episode: 42 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 42 finish episode, final reward: -173.6908, current episode: 43 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 43 finish episode, final reward: -169.1669, current episode: 44 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 44 finish episode, final reward: -189.7011, current episode: 45 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 45 finish episode, final reward: -137.6972, current episode: 46 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 46 finish episode, final reward: -136.7483, current episode: 47 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 47 finish episode, final reward: -177.0210, current episode: 48 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 48 finish episode, final reward: -187.3594, current episode: 49 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 49 finish episode, final reward: -167.9897, current episode: 50 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 50 finish episode, final reward: -180.2762, current episode: 51 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 51 finish episode, final reward: -136.5013, current episode: 52 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 52 finish episode, final reward: -200.6345, current episode: 53 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 53 finish episode, final reward: -176.7160, current episode: 54 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 54 finish episode, final reward: -201.8043, current episode: 55 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 55 finish episode, final reward: -138.8726, current episode: 56 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 56 finish episode, final reward: -157.4804, current episode: 57 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 57 finish episode, final reward: -138.3115, current episode: 58 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 58 finish episode, final reward: -167.9649, current episode: 59 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 59 finish episode, final reward: -161.7421, current episode: 60 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 60 finish episode, final reward: -172.7622, current episode: 61 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 61 finish episode, final reward: -151.2073, current episode: 62 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 62 finish episode, final reward: -144.8326, current episode: 63 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 63 finish episode, final reward: -130.1074, current episode: 64 +[2023-04-29 14:59:40][interaction_serial_evaluator.py:279][INFO] ++-------+-------------+------------------------+---------------+---------------+ +| Name | train_iter | ckpt_name | episode_count | envstep_count | ++-------+-------------+------------------------+---------------+---------------+ +| Value | 1400.000000 | iteration_1400.pth.tar | 64.000000 | 1600.000000 | ++-------+-------------+------------------------+---------------+---------------+ ++-------+-------------------------+---------------+---------------------+----------------------+ +| Name | avg_envstep_per_episode | evaluate_time | avg_envstep_per_sec | avg_time_per_episode | ++-------+-------------------------+---------------+---------------------+----------------------+ +| Value | 25.000000 | 0.813910 | 1965.818939 | 78.632758 | ++-------+-------------------------+---------------+---------------------+----------------------+ ++-------+-------------+------------+-------------+-------------+ +| Name | reward_mean | reward_std | reward_max | reward_min | ++-------+-------------+------------+-------------+-------------+ +| Value | -168.753671 | 27.749428 | -110.118217 | -239.408707 | ++-------+-------------+------------+-------------+-------------+ + + +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 0 finish episode, final reward: -183.3539, current episode: 1 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 1 finish episode, final reward: -153.5734, current episode: 2 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 2 finish episode, final reward: -138.7260, current episode: 3 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 3 finish episode, final reward: -155.8753, current episode: 4 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 4 finish episode, final reward: -113.9745, current episode: 5 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 5 finish episode, final reward: -208.6102, current episode: 6 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 6 finish episode, final reward: -213.4896, current episode: 7 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 7 finish episode, final reward: -167.8655, current episode: 8 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 8 finish episode, final reward: -153.3068, current episode: 9 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 9 finish episode, final reward: -204.0531, current episode: 10 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 10 finish episode, final reward: -196.4718, current episode: 11 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 11 finish episode, final reward: -217.4949, current episode: 12 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 12 finish episode, final reward: -205.9143, current episode: 13 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 13 finish episode, final reward: -180.4530, current episode: 14 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 14 finish episode, final reward: -157.2435, current episode: 15 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 15 finish episode, final reward: -138.3336, current episode: 16 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 16 finish episode, final reward: -174.3486, current episode: 17 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 17 finish episode, final reward: -161.1284, current episode: 18 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 18 finish episode, final reward: -145.1099, current episode: 19 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 19 finish episode, final reward: -218.9500, current episode: 20 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 20 finish episode, final reward: -176.6572, current episode: 21 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 21 finish episode, final reward: -193.6565, current episode: 22 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 22 finish episode, final reward: -190.6321, current episode: 23 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 23 finish episode, final reward: -113.7316, current episode: 24 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 24 finish episode, final reward: -149.0520, current episode: 25 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 25 finish episode, final reward: -239.1797, current episode: 26 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 26 finish episode, final reward: -121.8599, current episode: 27 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 27 finish episode, final reward: -163.9125, current episode: 28 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 28 finish episode, final reward: -138.8142, current episode: 29 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 29 finish episode, final reward: -165.0387, current episode: 30 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 30 finish episode, final reward: -144.4093, current episode: 31 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 31 finish episode, final reward: -239.6880, current episode: 32 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 32 finish episode, final reward: -179.4966, current episode: 33 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 33 finish episode, final reward: -182.9847, current episode: 34 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 34 finish episode, final reward: -161.1666, current episode: 35 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 35 finish episode, final reward: -152.5858, current episode: 36 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 36 finish episode, final reward: -156.0917, current episode: 37 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 37 finish episode, final reward: -185.3884, current episode: 38 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 38 finish episode, final reward: -159.8161, current episode: 39 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 39 finish episode, final reward: -141.8348, current episode: 40 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 40 finish episode, final reward: -153.1418, current episode: 41 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 41 finish episode, final reward: -156.7499, current episode: 42 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 42 finish episode, final reward: -168.1719, current episode: 43 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 43 finish episode, final reward: -170.9516, current episode: 44 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 44 finish episode, final reward: -179.9600, current episode: 45 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 45 finish episode, final reward: -144.5440, current episode: 46 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 46 finish episode, final reward: -137.3573, current episode: 47 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 47 finish episode, final reward: -177.0210, current episode: 48 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 48 finish episode, final reward: -185.7424, current episode: 49 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 49 finish episode, final reward: -167.9897, current episode: 50 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 50 finish episode, final reward: -180.2762, current episode: 51 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 51 finish episode, final reward: -135.9614, current episode: 52 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 52 finish episode, final reward: -200.6345, current episode: 53 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 53 finish episode, final reward: -178.9525, current episode: 54 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 54 finish episode, final reward: -196.0322, current episode: 55 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 55 finish episode, final reward: -155.6166, current episode: 56 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 56 finish episode, final reward: -157.4804, current episode: 57 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 57 finish episode, final reward: -138.3115, current episode: 58 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 58 finish episode, final reward: -166.1234, current episode: 59 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 59 finish episode, final reward: -160.7244, current episode: 60 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 60 finish episode, final reward: -171.6362, current episode: 61 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 61 finish episode, final reward: -164.7631, current episode: 62 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 62 finish episode, final reward: -144.8326, current episode: 63 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 63 finish episode, final reward: -116.5146, current episode: 64 +[2023-04-29 15:01:58][interaction_serial_evaluator.py:279][INFO] ++-------+-------------+------------------------+---------------+---------------+ +| Name | train_iter | ckpt_name | episode_count | envstep_count | ++-------+-------------+------------------------+---------------+---------------+ +| Value | 1450.000000 | iteration_1450.pth.tar | 64.000000 | 1600.000000 | ++-------+-------------+------------------------+---------------+---------------+ ++-------+-------------------------+---------------+---------------------+----------------------+ +| Name | avg_envstep_per_episode | evaluate_time | avg_envstep_per_sec | avg_time_per_episode | ++-------+-------------------------+---------------+---------------------+----------------------+ +| Value | 25.000000 | 0.858086 | 1864.614972 | 74.584599 | ++-------+-------------------------+---------------+---------------------+----------------------+ ++-------+-------------+------------+-------------+-------------+ +| Name | reward_mean | reward_std | reward_max | reward_min | ++-------+-------------+------------+-------------+-------------+ +| Value | -168.027528 | 27.601254 | -113.731590 | -239.688004 | ++-------+-------------+------------+-------------+-------------+ + + +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 0 finish episode, final reward: -176.5200, current episode: 1 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 1 finish episode, final reward: -153.5734, current episode: 2 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 2 finish episode, final reward: -131.6958, current episode: 3 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 3 finish episode, final reward: -147.5686, current episode: 4 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 4 finish episode, final reward: -124.4536, current episode: 5 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 5 finish episode, final reward: -202.8201, current episode: 6 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 6 finish episode, final reward: -213.4896, current episode: 7 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 7 finish episode, final reward: -183.7837, current episode: 8 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 8 finish episode, final reward: -154.2571, current episode: 9 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 9 finish episode, final reward: -200.0363, current episode: 10 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 10 finish episode, final reward: -196.4718, current episode: 11 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 11 finish episode, final reward: -217.4949, current episode: 12 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 12 finish episode, final reward: -205.9143, current episode: 13 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 13 finish episode, final reward: -180.4530, current episode: 14 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 14 finish episode, final reward: -161.1233, current episode: 15 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 15 finish episode, final reward: -141.6747, current episode: 16 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 16 finish episode, final reward: -174.3486, current episode: 17 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 17 finish episode, final reward: -163.3854, current episode: 18 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 18 finish episode, final reward: -154.8893, current episode: 19 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 19 finish episode, final reward: -219.9577, current episode: 20 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 20 finish episode, final reward: -178.0461, current episode: 21 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 21 finish episode, final reward: -193.5142, current episode: 22 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 22 finish episode, final reward: -190.6321, current episode: 23 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 23 finish episode, final reward: -114.0561, current episode: 24 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 24 finish episode, final reward: -161.8792, current episode: 25 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 25 finish episode, final reward: -241.8034, current episode: 26 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 26 finish episode, final reward: -121.8599, current episode: 27 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 27 finish episode, final reward: -163.9125, current episode: 28 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 28 finish episode, final reward: -143.2918, current episode: 29 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 29 finish episode, final reward: -154.7546, current episode: 30 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 30 finish episode, final reward: -140.7544, current episode: 31 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 31 finish episode, final reward: -239.6880, current episode: 32 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 32 finish episode, final reward: -164.7227, current episode: 33 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 33 finish episode, final reward: -182.9847, current episode: 34 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 34 finish episode, final reward: -161.1666, current episode: 35 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 35 finish episode, final reward: -145.4733, current episode: 36 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 36 finish episode, final reward: -156.0917, current episode: 37 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 37 finish episode, final reward: -185.3884, current episode: 38 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 38 finish episode, final reward: -159.8161, current episode: 39 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 39 finish episode, final reward: -122.0331, current episode: 40 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 40 finish episode, final reward: -153.9935, current episode: 41 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 41 finish episode, final reward: -147.5412, current episode: 42 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 42 finish episode, final reward: -161.2910, current episode: 43 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 43 finish episode, final reward: -170.9516, current episode: 44 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 44 finish episode, final reward: -177.3347, current episode: 45 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 45 finish episode, final reward: -137.6972, current episode: 46 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 46 finish episode, final reward: -136.3683, current episode: 47 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 47 finish episode, final reward: -172.9376, current episode: 48 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 48 finish episode, final reward: -185.2804, current episode: 49 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 49 finish episode, final reward: -165.9868, current episode: 50 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 50 finish episode, final reward: -180.2762, current episode: 51 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 51 finish episode, final reward: -136.5013, current episode: 52 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 52 finish episode, final reward: -200.6345, current episode: 53 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 53 finish episode, final reward: -179.2731, current episode: 54 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 54 finish episode, final reward: -196.0322, current episode: 55 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 55 finish episode, final reward: -148.0213, current episode: 56 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 56 finish episode, final reward: -152.8068, current episode: 57 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 57 finish episode, final reward: -138.3115, current episode: 58 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 58 finish episode, final reward: -155.1254, current episode: 59 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 59 finish episode, final reward: -160.7635, current episode: 60 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 60 finish episode, final reward: -171.6362, current episode: 61 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 61 finish episode, final reward: -145.1632, current episode: 62 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 62 finish episode, final reward: -159.0016, current episode: 63 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:253][INFO] [EVALUATOR]env 63 finish episode, final reward: -138.9592, current episode: 64 +[2023-04-29 15:03:44][interaction_serial_evaluator.py:279][INFO] ++-------+-------------+------------------------+---------------+---------------+ +| Name | train_iter | ckpt_name | episode_count | envstep_count | ++-------+-------------+------------------------+---------------+---------------+ +| Value | 1500.000000 | iteration_1500.pth.tar | 64.000000 | 1600.000000 | ++-------+-------------+------------------------+---------------+---------------+ ++-------+-------------------------+---------------+---------------------+----------------------+ +| Name | avg_envstep_per_episode | evaluate_time | avg_envstep_per_sec | avg_time_per_episode | ++-------+-------------------------+---------------+---------------------+----------------------+ +| Value | 25.000000 | 0.928586 | 1723.050567 | 68.922023 | ++-------+-------------------------+---------------+---------------------+----------------------+ ++-------+-------------+------------+-------------+-------------+ +| Name | reward_mean | reward_std | reward_max | reward_min | ++-------+-------------+------------+-------------+-------------+ +| Value | -167.151066 | 27.481417 | -114.056099 | -241.803360 | ++-------+-------------+------------+-------------+-------------+ + + diff --git a/my_homework/ch6/h2_mpe/log/learner/learner_logger.txt b/my_homework/ch6/h2_mpe/log/learner/learner_logger.txt new file mode 100644 index 0000000..0dc9520 --- /dev/null +++ b/my_homework/ch6/h2_mpe/log/learner/learner_logger.txt @@ -0,0 +1,362 @@ +[2023-04-29 13:56:30][base_learner.py:338][INFO] [RANK0]: DI-engine DRL Policy +MAVAC( + (actor_encoder): Identity() + (critic_encoder): Identity() + (critic_head): Sequential( + (0): Linear(in_features=48, out_features=512, bias=True) + (1): ReLU() + (2): RegressionHead( + (main): Sequential( + (0): Linear(in_features=512, out_features=512, bias=True) + (1): ReLU() + ) + (last): Linear(in_features=512, out_features=1, bias=True) + ) + ) + (actor_head): Sequential( + (0): Linear(in_features=18, out_features=256, bias=True) + (1): ReLU() + (2): DiscreteHead( + (Q): Sequential( + (0): Sequential( + (0): Linear(in_features=256, out_features=256, bias=True) + (1): ReLU() + (2): Linear(in_features=256, out_features=256, bias=True) + (3): ReLU() + ) + (1): Sequential( + (0): Linear(in_features=256, out_features=5, bias=True) + ) + ) + ) + ) + (actor): ModuleList( + (0): Identity() + (1): Sequential( + (0): Linear(in_features=18, out_features=256, bias=True) + (1): ReLU() + (2): DiscreteHead( + (Q): Sequential( + (0): Sequential( + (0): Linear(in_features=256, out_features=256, bias=True) + (1): ReLU() + (2): Linear(in_features=256, out_features=256, bias=True) + (3): ReLU() + ) + (1): Sequential( + (0): Linear(in_features=256, out_features=5, bias=True) + ) + ) + ) + ) + ) + (critic): ModuleList( + (0): Identity() + (1): Sequential( + (0): Linear(in_features=48, out_features=512, bias=True) + (1): ReLU() + (2): RegressionHead( + (main): Sequential( + (0): Linear(in_features=512, out_features=512, bias=True) + (1): ReLU() + ) + (last): Linear(in_features=512, out_features=1, bias=True) + ) + ) + ) +) +[2023-04-29 13:56:36][base_learner.py:338][INFO] [RANK0]: learner save ckpt in ./ptz_simple_spread_mappo_seed0_230429_135625/ckpt/ckpt_best.pth.tar +[2023-04-29 13:56:55][base_learner.py:338][INFO] [RANK0]: === Training Iteration 0 Result === +[2023-04-29 13:56:55][learner_hook.py:224][INFO] ++-------+------------+----------------+-----------------+----------------+ +| Name | cur_lr_avg | total_loss_avg | policy_loss_avg | value_loss_avg | ++-------+------------+----------------+-----------------+----------------+ +| Value | 0.000500 | 2075.879639 | 80.492325 | 3990.806885 | ++-------+------------+----------------+-----------------+----------------+ ++-------+------------------+-------------+--------------+---------------+ +| Name | entropy_loss_avg | adv_max_avg | adv_mean_avg | approx_kl_avg | ++-------+------------------+-------------+--------------+---------------+ +| Value | 1.605870 | -5.583335 | -80.492325 | -0.000000 | ++-------+------------------+-------------+--------------+---------------+ ++-------+--------------+---------------+----------------+ +| Name | clipfrac_avg | value_max_avg | value_mean_avg | ++-------+--------------+---------------+----------------+ +| Value | 0.000000 | 0.308324 | 0.078416 | ++-------+--------------+---------------+----------------+ + +[2023-04-29 13:56:55][base_learner.py:338][INFO] [RANK0]: learner save ckpt in ./ptz_simple_spread_mappo_seed0_230429_135625/ckpt/iteration_0.pth.tar +[2023-04-29 13:58:15][base_learner.py:338][INFO] [RANK0]: learner save ckpt in ./ptz_simple_spread_mappo_seed0_230429_135625/ckpt/ckpt_best.pth.tar +[2023-04-29 14:00:25][base_learner.py:338][INFO] [RANK0]: learner save ckpt in ./ptz_simple_spread_mappo_seed0_230429_135625/ckpt/ckpt_best.pth.tar +[2023-04-29 14:00:41][base_learner.py:338][INFO] [RANK0]: === Training Iteration 100 Result === +[2023-04-29 14:00:41][learner_hook.py:224][INFO] ++-------+------------+----------------+-----------------+----------------+ +| Name | cur_lr_avg | total_loss_avg | policy_loss_avg | value_loss_avg | ++-------+------------+----------------+-----------------+----------------+ +| Value | 0.000500 | 1.328617 | 1.229656 | 0.225120 | ++-------+------------+----------------+-----------------+----------------+ ++-------+------------------+-------------+--------------+---------------+ +| Name | entropy_loss_avg | adv_max_avg | adv_mean_avg | approx_kl_avg | ++-------+------------------+-------------+--------------+---------------+ +| Value | 1.359878 | 119.440120 | -1.273968 | 0.001813 | ++-------+------------------+-------------+--------------+---------------+ ++-------+--------------+---------------+----------------+ +| Name | clipfrac_avg | value_max_avg | value_mean_avg | ++-------+--------------+---------------+----------------+ +| Value | 0.006326 | -0.873831 | -1.830284 | ++-------+--------------+---------------+----------------+ + +[2023-04-29 14:02:53][base_learner.py:338][INFO] [RANK0]: learner save ckpt in ./ptz_simple_spread_mappo_seed0_230429_135625/ckpt/ckpt_best.pth.tar +[2023-04-29 14:05:22][base_learner.py:338][INFO] [RANK0]: learner save ckpt in ./ptz_simple_spread_mappo_seed0_230429_135625/ckpt/ckpt_best.pth.tar +[2023-04-29 14:05:29][base_learner.py:338][INFO] [RANK0]: === Training Iteration 200 Result === +[2023-04-29 14:05:29][learner_hook.py:224][INFO] ++-------+------------+----------------+-----------------+----------------+ +| Name | cur_lr_avg | total_loss_avg | policy_loss_avg | value_loss_avg | ++-------+------------+----------------+-----------------+----------------+ +| Value | 0.000500 | -0.853290 | -0.915839 | 0.148980 | ++-------+------------+----------------+-----------------+----------------+ ++-------+------------------+-------------+--------------+---------------+ +| Name | entropy_loss_avg | adv_max_avg | adv_mean_avg | approx_kl_avg | ++-------+------------------+-------------+--------------+---------------+ +| Value | 1.194120 | 116.854263 | 0.895214 | 0.001230 | ++-------+------------------+-------------+--------------+---------------+ ++-------+--------------+---------------+----------------+ +| Name | clipfrac_avg | value_max_avg | value_mean_avg | ++-------+--------------+---------------+----------------+ +| Value | 0.008703 | -0.676063 | -1.837957 | ++-------+--------------+---------------+----------------+ + +[2023-04-29 14:10:06][base_learner.py:338][INFO] [RANK0]: === Training Iteration 300 Result === +[2023-04-29 14:10:06][learner_hook.py:224][INFO] ++-------+------------+----------------+-----------------+----------------+ +| Name | cur_lr_avg | total_loss_avg | policy_loss_avg | value_loss_avg | ++-------+------------+----------------+-----------------+----------------+ +| Value | 0.000500 | -0.696598 | -0.744479 | 0.118915 | ++-------+------------+----------------+-----------------+----------------+ ++-------+------------------+-------------+--------------+---------------+ +| Name | entropy_loss_avg | adv_max_avg | adv_mean_avg | approx_kl_avg | ++-------+------------------+-------------+--------------+---------------+ +| Value | 1.157619 | 103.689232 | 0.716742 | 0.001688 | ++-------+------------------+-------------+--------------+---------------+ ++-------+--------------+---------------+----------------+ +| Name | clipfrac_avg | value_max_avg | value_mean_avg | ++-------+--------------+---------------+----------------+ +| Value | 0.009612 | -0.509467 | -1.839954 | ++-------+--------------+---------------+----------------+ + +[2023-04-29 14:11:35][base_learner.py:338][INFO] [RANK0]: learner save ckpt in ./ptz_simple_spread_mappo_seed0_230429_135625/ckpt/ckpt_best.pth.tar +[2023-04-29 14:14:04][base_learner.py:338][INFO] [RANK0]: === Training Iteration 400 Result === +[2023-04-29 14:14:04][learner_hook.py:224][INFO] ++-------+------------+----------------+-----------------+----------------+ +| Name | cur_lr_avg | total_loss_avg | policy_loss_avg | value_loss_avg | ++-------+------------+----------------+-----------------+----------------+ +| Value | 0.000500 | 0.124338 | 0.084298 | 0.100803 | ++-------+------------+----------------+-----------------+----------------+ ++-------+------------------+-------------+--------------+---------------+ +| Name | entropy_loss_avg | adv_max_avg | adv_mean_avg | approx_kl_avg | ++-------+------------------+-------------+--------------+---------------+ +| Value | 1.036151 | 105.315164 | -0.099251 | 0.001614 | ++-------+------------------+-------------+--------------+---------------+ ++-------+--------------+---------------+----------------+ +| Name | clipfrac_avg | value_max_avg | value_mean_avg | ++-------+--------------+---------------+----------------+ +| Value | 0.016080 | -0.306038 | -1.808327 | ++-------+--------------+---------------+----------------+ + +[2023-04-29 14:16:28][base_learner.py:338][INFO] [RANK0]: learner save ckpt in ./ptz_simple_spread_mappo_seed0_230429_135625/ckpt/ckpt_best.pth.tar +[2023-04-29 14:18:59][base_learner.py:338][INFO] [RANK0]: learner save ckpt in ./ptz_simple_spread_mappo_seed0_230429_135625/ckpt/ckpt_best.pth.tar +[2023-04-29 14:19:06][base_learner.py:338][INFO] [RANK0]: === Training Iteration 500 Result === +[2023-04-29 14:19:06][learner_hook.py:224][INFO] ++-------+------------+----------------+-----------------+----------------+ +| Name | cur_lr_avg | total_loss_avg | policy_loss_avg | value_loss_avg | ++-------+------------+----------------+-----------------+----------------+ +| Value | 0.000500 | -0.138530 | -0.172593 | 0.084042 | ++-------+------------+----------------+-----------------+----------------+ ++-------+------------------+-------------+--------------+---------------+ +| Name | entropy_loss_avg | adv_max_avg | adv_mean_avg | approx_kl_avg | ++-------+------------------+-------------+--------------+---------------+ +| Value | 0.795740 | 96.393870 | 0.151795 | 0.002708 | ++-------+------------------+-------------+--------------+---------------+ ++-------+--------------+---------------+----------------+ +| Name | clipfrac_avg | value_max_avg | value_mean_avg | ++-------+--------------+---------------+----------------+ +| Value | 0.040720 | -0.164231 | -1.776674 | ++-------+--------------+---------------+----------------+ + +[2023-04-29 14:21:29][base_learner.py:338][INFO] [RANK0]: learner save ckpt in ./ptz_simple_spread_mappo_seed0_230429_135625/ckpt/ckpt_best.pth.tar +[2023-04-29 14:23:28][base_learner.py:338][INFO] [RANK0]: learner save ckpt in ./ptz_simple_spread_mappo_seed0_230429_135625/ckpt/ckpt_best.pth.tar +[2023-04-29 14:23:54][base_learner.py:338][INFO] [RANK0]: === Training Iteration 600 Result === +[2023-04-29 14:23:54][learner_hook.py:224][INFO] ++-------+------------+----------------+-----------------+----------------+ +| Name | cur_lr_avg | total_loss_avg | policy_loss_avg | value_loss_avg | ++-------+------------+----------------+-----------------+----------------+ +| Value | 0.000500 | 0.329802 | 0.302324 | 0.065377 | ++-------+------------+----------------+-----------------+----------------+ ++-------+------------------+-------------+--------------+---------------+ +| Name | entropy_loss_avg | adv_max_avg | adv_mean_avg | approx_kl_avg | ++-------+------------------+-------------+--------------+---------------+ +| Value | 0.521076 | 99.549687 | -0.316268 | 0.001452 | ++-------+------------------+-------------+--------------+---------------+ ++-------+--------------+---------------+----------------+ +| Name | clipfrac_avg | value_max_avg | value_mean_avg | ++-------+--------------+---------------+----------------+ +| Value | 0.012992 | -0.015066 | -1.731947 | ++-------+--------------+---------------+----------------+ + +[2023-04-29 14:27:37][base_learner.py:338][INFO] [RANK0]: learner save ckpt in ./ptz_simple_spread_mappo_seed0_230429_135625/ckpt/ckpt_best.pth.tar +[2023-04-29 14:27:43][base_learner.py:338][INFO] [RANK0]: === Training Iteration 700 Result === +[2023-04-29 14:27:43][learner_hook.py:224][INFO] ++-------+------------+----------------+-----------------+----------------+ +| Name | cur_lr_avg | total_loss_avg | policy_loss_avg | value_loss_avg | ++-------+------------+----------------+-----------------+----------------+ +| Value | 0.000500 | 0.088881 | 0.064415 | 0.056688 | ++-------+------------+----------------+-----------------+----------------+ ++-------+------------------+-------------+--------------+---------------+ +| Name | entropy_loss_avg | adv_max_avg | adv_mean_avg | approx_kl_avg | ++-------+------------------+-------------+--------------+---------------+ +| Value | 0.387834 | 106.585342 | -0.074249 | 0.001238 | ++-------+------------------+-------------+--------------+---------------+ ++-------+--------------+---------------+----------------+ +| Name | clipfrac_avg | value_max_avg | value_mean_avg | ++-------+--------------+---------------+----------------+ +| Value | 0.017225 | -0.094952 | -1.694315 | ++-------+--------------+---------------+----------------+ + +[2023-04-29 14:30:06][base_learner.py:338][INFO] [RANK0]: learner save ckpt in ./ptz_simple_spread_mappo_seed0_230429_135625/ckpt/ckpt_best.pth.tar +[2023-04-29 14:32:37][base_learner.py:338][INFO] [RANK0]: learner save ckpt in ./ptz_simple_spread_mappo_seed0_230429_135625/ckpt/ckpt_best.pth.tar +[2023-04-29 14:32:43][base_learner.py:338][INFO] [RANK0]: === Training Iteration 800 Result === +[2023-04-29 14:32:43][learner_hook.py:224][INFO] ++-------+------------+----------------+-----------------+----------------+ +| Name | cur_lr_avg | total_loss_avg | policy_loss_avg | value_loss_avg | ++-------+------------+----------------+-----------------+----------------+ +| Value | 0.000500 | -0.074063 | -0.094149 | 0.045884 | ++-------+------------+----------------+-----------------+----------------+ ++-------+------------------+-------------+--------------+---------------+ +| Name | entropy_loss_avg | adv_max_avg | adv_mean_avg | approx_kl_avg | ++-------+------------------+-------------+--------------+---------------+ +| Value | 0.285545 | 82.474568 | 0.086433 | 0.001356 | ++-------+------------------+-------------+--------------+---------------+ ++-------+--------------+---------------+----------------+ +| Name | clipfrac_avg | value_max_avg | value_mean_avg | ++-------+--------------+---------------+----------------+ +| Value | 0.016354 | -0.118351 | -1.672728 | ++-------+--------------+---------------+----------------+ + +[2023-04-29 14:35:05][base_learner.py:338][INFO] [RANK0]: learner save ckpt in ./ptz_simple_spread_mappo_seed0_230429_135625/ckpt/ckpt_best.pth.tar +[2023-04-29 14:37:06][base_learner.py:338][INFO] [RANK0]: learner save ckpt in ./ptz_simple_spread_mappo_seed0_230429_135625/ckpt/ckpt_best.pth.tar +[2023-04-29 14:37:29][base_learner.py:338][INFO] [RANK0]: === Training Iteration 900 Result === +[2023-04-29 14:37:29][learner_hook.py:224][INFO] ++-------+------------+----------------+-----------------+----------------+ +| Name | cur_lr_avg | total_loss_avg | policy_loss_avg | value_loss_avg | ++-------+------------+----------------+-----------------+----------------+ +| Value | 0.000500 | -0.128098 | -0.148398 | 0.044915 | ++-------+------------+----------------+-----------------+----------------+ ++-------+------------------+-------------+--------------+---------------+ +| Name | entropy_loss_avg | adv_max_avg | adv_mean_avg | approx_kl_avg | ++-------+------------------+-------------+--------------+---------------+ +| Value | 0.215749 | 81.114602 | 0.143275 | 0.000887 | ++-------+------------------+-------------+--------------+---------------+ ++-------+--------------+---------------+----------------+ +| Name | clipfrac_avg | value_max_avg | value_mean_avg | ++-------+--------------+---------------+----------------+ +| Value | 0.010985 | 0.026648 | -1.647371 | ++-------+--------------+---------------+----------------+ + +[2023-04-29 14:38:52][base_learner.py:338][INFO] [RANK0]: learner save ckpt in ./ptz_simple_spread_mappo_seed0_230429_135625/ckpt/ckpt_best.pth.tar +[2023-04-29 14:41:09][base_learner.py:338][INFO] [RANK0]: learner save ckpt in ./ptz_simple_spread_mappo_seed0_230429_135625/ckpt/ckpt_best.pth.tar +[2023-04-29 14:41:16][base_learner.py:338][INFO] [RANK0]: === Training Iteration 1000 Result === +[2023-04-29 14:41:16][learner_hook.py:224][INFO] ++-------+------------+----------------+-----------------+----------------+ +| Name | cur_lr_avg | total_loss_avg | policy_loss_avg | value_loss_avg | ++-------+------------+----------------+-----------------+----------------+ +| Value | 0.000500 | -0.050657 | -0.069202 | 0.040985 | ++-------+------------+----------------+-----------------+----------------+ ++-------+------------------+-------------+--------------+---------------+ +| Name | entropy_loss_avg | adv_max_avg | adv_mean_avg | approx_kl_avg | ++-------+------------------+-------------+--------------+---------------+ +| Value | 0.194806 | 90.351094 | 0.064764 | 0.001257 | ++-------+------------------+-------------+--------------+---------------+ ++-------+--------------+---------------+----------------+ +| Name | clipfrac_avg | value_max_avg | value_mean_avg | ++-------+--------------+---------------+----------------+ +| Value | 0.013996 | -0.156766 | -1.622931 | ++-------+--------------+---------------+----------------+ + +[2023-04-29 14:43:37][base_learner.py:338][INFO] [RANK0]: learner save ckpt in ./ptz_simple_spread_mappo_seed0_230429_135625/ckpt/ckpt_best.pth.tar +[2023-04-29 14:46:16][base_learner.py:338][INFO] [RANK0]: === Training Iteration 1100 Result === +[2023-04-29 14:46:16][learner_hook.py:224][INFO] ++-------+------------+----------------+-----------------+----------------+ +| Name | cur_lr_avg | total_loss_avg | policy_loss_avg | value_loss_avg | ++-------+------------+----------------+-----------------+----------------+ +| Value | 0.000500 | 0.081978 | 0.065850 | 0.035498 | ++-------+------------+----------------+-----------------+----------------+ ++-------+------------------+-------------+--------------+---------------+ +| Name | entropy_loss_avg | adv_max_avg | adv_mean_avg | approx_kl_avg | ++-------+------------------+-------------+--------------+---------------+ +| Value | 0.162093 | 71.864799 | -0.074214 | 0.001236 | ++-------+------------------+-------------+--------------+---------------+ ++-------+--------------+---------------+----------------+ +| Name | clipfrac_avg | value_max_avg | value_mean_avg | ++-------+--------------+---------------+----------------+ +| Value | 0.012131 | -0.121981 | -1.602524 | ++-------+--------------+---------------+----------------+ + +[2023-04-29 14:51:06][base_learner.py:338][INFO] [RANK0]: === Training Iteration 1200 Result === +[2023-04-29 14:51:06][learner_hook.py:224][INFO] ++-------+------------+----------------+-----------------+----------------+ +| Name | cur_lr_avg | total_loss_avg | policy_loss_avg | value_loss_avg | ++-------+------------+----------------+-----------------+----------------+ +| Value | 0.000500 | 0.029659 | 0.017418 | 0.027345 | ++-------+------------+----------------+-----------------+----------------+ ++-------+------------------+-------------+--------------+---------------+ +| Name | entropy_loss_avg | adv_max_avg | adv_mean_avg | approx_kl_avg | ++-------+------------------+-------------+--------------+---------------+ +| Value | 0.143233 | 77.994717 | -0.024909 | 0.000585 | ++-------+------------------+-------------+--------------+---------------+ ++-------+--------------+---------------+----------------+ +| Name | clipfrac_avg | value_max_avg | value_mean_avg | ++-------+--------------+---------------+----------------+ +| Value | 0.010331 | -0.018866 | -1.594278 | ++-------+--------------+---------------+----------------+ + +[2023-04-29 14:52:35][base_learner.py:338][INFO] [RANK0]: learner save ckpt in ./ptz_simple_spread_mappo_seed0_230429_135625/ckpt/ckpt_best.pth.tar +[2023-04-29 14:54:45][base_learner.py:338][INFO] [RANK0]: learner save ckpt in ./ptz_simple_spread_mappo_seed0_230429_135625/ckpt/ckpt_best.pth.tar +[2023-04-29 14:54:51][base_learner.py:338][INFO] [RANK0]: === Training Iteration 1300 Result === +[2023-04-29 14:54:51][learner_hook.py:224][INFO] ++-------+------------+----------------+-----------------+----------------+ +| Name | cur_lr_avg | total_loss_avg | policy_loss_avg | value_loss_avg | ++-------+------------+----------------+-----------------+----------------+ +| Value | 0.000500 | -0.211276 | -0.222894 | 0.025842 | ++-------+------------+----------------+-----------------+----------------+ ++-------+------------------+-------------+--------------+---------------+ +| Name | entropy_loss_avg | adv_max_avg | adv_mean_avg | approx_kl_avg | ++-------+------------------+-------------+--------------+---------------+ +| Value | 0.130331 | 74.865286 | 0.217152 | 0.001244 | ++-------+------------------+-------------+--------------+---------------+ ++-------+--------------+---------------+----------------+ +| Name | clipfrac_avg | value_max_avg | value_mean_avg | ++-------+--------------+---------------+----------------+ +| Value | 0.009678 | 0.028361 | -1.583741 | ++-------+--------------+---------------+----------------+ + +[2023-04-29 14:57:12][base_learner.py:338][INFO] [RANK0]: learner save ckpt in ./ptz_simple_spread_mappo_seed0_230429_135625/ckpt/ckpt_best.pth.tar +[2023-04-29 14:59:40][base_learner.py:338][INFO] [RANK0]: learner save ckpt in ./ptz_simple_spread_mappo_seed0_230429_135625/ckpt/ckpt_best.pth.tar +[2023-04-29 14:59:46][base_learner.py:338][INFO] [RANK0]: === Training Iteration 1400 Result === +[2023-04-29 14:59:46][learner_hook.py:224][INFO] ++-------+------------+----------------+-----------------+----------------+ +| Name | cur_lr_avg | total_loss_avg | policy_loss_avg | value_loss_avg | ++-------+------------+----------------+-----------------+----------------+ +| Value | 0.000500 | 0.077108 | 0.067353 | 0.021888 | ++-------+------------+----------------+-----------------+----------------+ ++-------+------------------+-------------+--------------+---------------+ +| Name | entropy_loss_avg | adv_max_avg | adv_mean_avg | approx_kl_avg | ++-------+------------------+-------------+--------------+---------------+ +| Value | 0.118881 | 72.534637 | -0.070525 | 0.000768 | ++-------+------------------+-------------+--------------+---------------+ ++-------+--------------+---------------+----------------+ +| Name | clipfrac_avg | value_max_avg | value_mean_avg | ++-------+--------------+---------------+----------------+ +| Value | 0.011572 | 0.001574 | -1.543682 | ++-------+--------------+---------------+----------------+ + +[2023-04-29 15:01:58][base_learner.py:338][INFO] [RANK0]: learner save ckpt in ./ptz_simple_spread_mappo_seed0_230429_135625/ckpt/ckpt_best.pth.tar +[2023-04-29 15:03:44][base_learner.py:338][INFO] [RANK0]: learner save ckpt in ./ptz_simple_spread_mappo_seed0_230429_135625/ckpt/ckpt_best.pth.tar diff --git a/my_homework/ch6/h2_mpe/log/serial/events.out.tfevents.1682776590.xhztmp1 b/my_homework/ch6/h2_mpe/log/serial/events.out.tfevents.1682776590.xhztmp1 new file mode 100644 index 0000000..fea653b Binary files /dev/null and b/my_homework/ch6/h2_mpe/log/serial/events.out.tfevents.1682776590.xhztmp1 differ diff --git a/my_homework/ch6/h2_mpe/total_config.py b/my_homework/ch6/h2_mpe/total_config.py new file mode 100644 index 0000000..d950336 --- /dev/null +++ b/my_homework/ch6/h2_mpe/total_config.py @@ -0,0 +1,152 @@ +exp_config = { + 'env': { + 'manager': { + 'episode_num': float("inf"), + 'max_retry': 5, + 'step_timeout': None, + 'auto_reset': True, + 'reset_timeout': None, + 'retry_type': 'reset', + 'retry_waiting_time': 0.1, + 'shared_memory': True, + 'copy_on_get': True, + 'context': 'fork', + 'wait_num': float("inf"), + 'step_wait_timeout': None, + 'connect_timeout': 60, + 'reset_inplace': False, + 'cfg_type': 'SyncSubprocessEnvManagerDict', + 'type': 'subprocess' + }, + 'stop_value': 0, + 'import_names': + ['dizoo.petting_zoo.envs.petting_zoo_simple_spread_env'], + 'type': 'petting_zoo', + 'env_family': 'mpe', + 'env_id': 'simple_spread_v2', + 'n_agent': 3, + 'n_landmark': 3, + 'max_cycles': 25, + 'agent_obs_only': False, + 'agent_specific_global_state': True, + 'continuous_actions': False, + 'collector_env_num': 64, + 'evaluator_env_num': 64, + 'n_evaluator_episode': 64 + }, + 'policy': { + 'model': { + 'action_space': 'discrete', + 'agent_num': 3, + 'agent_obs_shape': 18, + 'global_obs_shape': 48, + 'action_shape': 5 + }, + 'learn': { + 'learner': { + 'train_iterations': 1000000000, + 'dataloader': { + 'num_workers': 0 + }, + 'log_policy': True, + 'hook': { + 'load_ckpt_before_run': '', + 'log_show_after_iter': 100, + 'save_ckpt_after_iter': 10000, + 'save_ckpt_after_run': True + }, + 'cfg_type': 'BaseLearnerDict' + }, + 'epoch_per_collect': 5, + 'batch_size': 3200, + 'learning_rate': 0.0005, + 'value_weight': 0.5, + 'entropy_weight': 0.01, + 'clip_ratio': 0.2, + 'adv_norm': False, + 'value_norm': True, + 'ppo_param_init': True, + 'grad_clip_type': 'clip_norm', + 'grad_clip_value': 10, + 'ignore_done': False, + 'multi_gpu': False + }, + 'collect': { + 'collector': { + 'deepcopy_obs': False, + 'transform_obs': False, + 'collect_print_freq': 100, + 'cfg_type': 'SampleSerialCollectorDict', + 'type': 'sample' + }, + 'unroll_len': 1, + 'discount_factor': 0.99, + 'gae_lambda': 0.95, + 'n_sample': 3200, + 'env_num': 64 + }, + 'eval': { + 'evaluator': { + 'eval_freq': 50, + 'render': { + 'render_freq': -1, + 'mode': 'train_iter' + }, + 'cfg_type': 'InteractionSerialEvaluatorDict', + 'n_episode': 64, + 'stop_value': 0 + }, + 'env_num': 64 + }, + 'other': { + 'replay_buffer': { + 'type': 'advanced', + 'replay_buffer_size': 4096, + 'max_use': float("inf"), + 'max_staleness': float("inf"), + 'alpha': 0.6, + 'beta': 0.4, + 'anneal_step': 100000, + 'enable_track_used_data': False, + 'deepcopy': False, + 'thruput_controller': { + 'push_sample_rate_limit': { + 'max': float("inf"), + 'min': 0 + }, + 'window_seconds': 30, + 'sample_min_limit_ratio': 1 + }, + 'monitor': { + 'sampled_data_attr': { + 'average_range': 5, + 'print_freq': 200 + }, + 'periodic_thruput': { + 'seconds': 60 + } + }, + 'cfg_type': 'AdvancedReplayBufferDict' + }, + 'commander': { + 'cfg_type': 'BaseSerialCommanderDict' + } + }, + 'on_policy': True, + 'cuda': True, + 'multi_gpu': False, + 'bp_update_sync': True, + 'traj_len_inf': False, + 'type': 'ppo_command', + 'priority': False, + 'priority_IS_weight': False, + 'recompute_adv': True, + 'action_space': 'discrete', + 'nstep_return': False, + 'multi_agent': True, + 'transition_with_policy_data': True, + 'cfg_type': 'PPOCommandModePolicyDict' + }, + 'exp_name': 'ptz_simple_spread_mappo_seed0_230429_135625', + 'seed': 0 +} diff --git a/my_homework/ch6/ppof_ch6_code_p1.py b/my_homework/ch6/ppof_ch6_code_p1.py new file mode 100644 index 0000000..80f6132 --- /dev/null +++ b/my_homework/ch6/ppof_ch6_code_p1.py @@ -0,0 +1,95 @@ +import numpy as np +import torch + + +def get_agent_id_feature(agent_id, agent_num): + agent_id_feature = torch.zeros(agent_num) + agent_id_feature[agent_id] = 1 + return agent_id_feature + + +def get_movement_feature(): + # for simplicity, we use random movement feature here + movement_feature = torch.randint(0, 2, (8, )) + return movement_feature + + +def get_own_feature(): + # for simplicity, we use random own feature here + return torch.randn(10) + + +def get_ally_visible_feature(): + # this function only return the visible feature of one ally + # for simplicity, we use random tensor as ally visible feature while zero tensor as ally invisible feature + if np.random.random() > 0.5: + ally_visible_feature = torch.randn(4) + else: + ally_visible_feature = torch.zeros(4) + return ally_visible_feature + + +def get_enemy_visible_feature(): + # this function only return the visible feature of one enemy + # for simplicity, we use random tensor as enemy visible feature while zero tensor as enemy invisible feature + if np.random.random() > 0.8: + enemy_visible_feature = torch.randn(4) + else: + enemy_visible_feature = torch.zeros(4) + return enemy_visible_feature + + +def get_ind_global_state(agent_id, ally_agent_num, enemy_agent_num): + # You need to implement this function + + agent_id_feature = get_agent_id_feature(agent_id, ally_agent_num+enemy_agent_num) + own_feature = get_own_feature() + ally_visible_features = [] + for i in range(ally_agent_num): + if i != agent_id: + ally_visible_features.append(get_ally_visible_feature()) + + enemy_visible_feature = [] + for i in range(enemy_agent_num): + enemy_visible_feature.append(get_enemy_visible_feature()) + + + return torch.cat([agent_id_feature, own_feature, *ally_visible_features, *enemy_visible_feature]) + + +def get_ep_global_state(agent_id, ally_agent_num, enemy_agent_num): + # In many multi-agent environments such as SMAC, the global state is the simplified version of the combination + # of all the agent's independent state, and the concrete implementation depends on the characteris of environment. + # For simplicity, we use random feature here. + ally_center_feature = torch.randn(8) + enemy_center_feature = torch.randn(8) + return torch.cat([ally_center_feature, enemy_center_feature]) + + +def get_as_global_state(agent_id, ally_agent_num, enemy_agent_num): + # You need to implement this function + ind_global_state = get_ind_global_state(agent_id, ally_agent_num, enemy_agent_num) + ep_global_state = get_ep_global_state(agent_id, ally_agent_num, enemy_agent_num) + + return torch.cat([ind_global_state, ep_global_state]) + + +def test_global_state(): + ally_agent_num = 3 + enemy_agent_num = 5 + # get independent global state, which usually used in decentralized training + for agent_id in range(ally_agent_num): + ind_global_state = get_ind_global_state(agent_id, ally_agent_num, enemy_agent_num) + assert isinstance(ind_global_state, torch.Tensor) + # get environment provide global state, which is the same for all agents, used in centralized training + for agent_id in range(ally_agent_num): + ep_global_state = get_ep_global_state(agent_id, ally_agent_num, enemy_agent_num) + assert isinstance(ep_global_state, torch.Tensor) + # get naive agent-specific global state, which is the specific for each agent, used in centralized training + for agent_id in range(ally_agent_num): + as_global_state = get_as_global_state(agent_id, ally_agent_num, enemy_agent_num) + assert isinstance(as_global_state, torch.Tensor) + + +if __name__ == "__main__": + test_global_state()