diff --git a/.buildinfo b/.buildinfo index e1801f3b..055f7d26 100644 --- a/.buildinfo +++ b/.buildinfo @@ -1,4 +1,4 @@ # Sphinx build info version 1 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. -config: 3d61a9c2c162a7ac452c7ec2cbffeab5 +config: 9e28e034788f88155ad752a56985e689 tags: d77d1c0d9ca2f4c8421862c7c5a0d620 diff --git a/404.html b/404.html index e8fdd53e..a34feb5b 100644 --- a/404.html +++ b/404.html @@ -11,7 +11,7 @@ - + 404 - Page Not Found - MO-Gymnasium Documentation @@ -215,6 +215,7 @@
  • Install
  • API
  • Wrappers
  • +
  • Vector Wrappers
  • MORL Baselines
  • Environments

    @@ -234,11 +235,14 @@
  • MO-Mountaincar
  • MO-Mountaincarcontinuous
  • MO-Lunar-Lander
  • +
  • MO-Lunar-Lander-Continuous
  • Miscellaneous @@ -326,9 +330,9 @@
    -

    404 - Page Not Found#

    +

    404 - Page Not Found

    -

    The requested page could not be found.#

    +

    The requested page could not be found.

    @@ -415,8 +419,8 @@

    The requested page could not be found. - + + diff --git a/README/index.html b/README/index.html index e4efb355..7fb6c104 100644 --- a/README/index.html +++ b/README/index.html @@ -11,7 +11,7 @@ - + MO-Gymnasium documentation - MO-Gymnasium Documentation @@ -215,6 +215,7 @@
  • Install
  • API
  • Wrappers
  • +
  • Vector Wrappers
  • MORL Baselines
  • Environments

    @@ -234,11 +235,14 @@
  • MO-Mountaincar
  • MO-Mountaincarcontinuous
  • MO-Lunar-Lander
  • +
  • MO-Lunar-Lander-Continuous
  • Miscellaneous @@ -326,11 +330,11 @@
    -

    MO-Gymnasium documentation#

    +

    MO-Gymnasium documentation

    This folder contains the documentation for MO-Gymnasium.

    For more information about how to contribute to the documentation go to our CONTRIBUTING.md

    -

    Build the Documentation#

    +

    Build the Documentation

    Install the required packages and Gymnasium (or your fork):

    pip install -r docs/requirements.txt
     pip install -e .
    @@ -432,8 +436,8 @@ 

    Build the Documentation - + + diff --git a/_images/minecart-rgb.gif b/_images/minecart-rgb.gif new file mode 100644 index 00000000..39ad4172 Binary files /dev/null and b/_images/minecart-rgb.gif differ diff --git a/_images/mo-lunar-lander-continuous.gif b/_images/mo-lunar-lander-continuous.gif new file mode 100644 index 00000000..2051d754 Binary files /dev/null and b/_images/mo-lunar-lander-continuous.gif differ diff --git a/_static/basic.css b/_static/basic.css index 30fee9d0..f316efcb 100644 --- a/_static/basic.css +++ b/_static/basic.css @@ -4,7 +4,7 @@ * * Sphinx stylesheet -- basic theme. * - * :copyright: Copyright 2007-2023 by the Sphinx team, see AUTHORS. + * :copyright: Copyright 2007-2024 by the Sphinx team, see AUTHORS. * :license: BSD, see LICENSE for details. * */ diff --git a/_static/doctools.js b/_static/doctools.js index d06a71d7..4d67807d 100644 --- a/_static/doctools.js +++ b/_static/doctools.js @@ -4,7 +4,7 @@ * * Base JavaScript utilities for all Sphinx HTML documentation. * - * :copyright: Copyright 2007-2023 by the Sphinx team, see AUTHORS. + * :copyright: Copyright 2007-2024 by the Sphinx team, see AUTHORS. * :license: BSD, see LICENSE for details. * */ diff --git a/_static/documentation_options.js b/_static/documentation_options.js index 6fe7b331..59816731 100644 --- a/_static/documentation_options.js +++ b/_static/documentation_options.js @@ -1,5 +1,5 @@ const DOCUMENTATION_OPTIONS = { - VERSION: '1.1.0', + VERSION: '1.2.0', LANGUAGE: 'en', COLLAPSE_INDEX: false, BUILDER: 'dirhtml', diff --git a/_static/language_data.js b/_static/language_data.js index 250f5665..367b8ed8 100644 --- a/_static/language_data.js +++ b/_static/language_data.js @@ -5,7 +5,7 @@ * This script contains the language-specific data used by searchtools.js, * namely the list of stopwords, stemmer, scorer and splitter. * - * :copyright: Copyright 2007-2023 by the Sphinx team, see AUTHORS. + * :copyright: Copyright 2007-2024 by the Sphinx team, see AUTHORS. * :license: BSD, see LICENSE for details. * */ @@ -13,7 +13,7 @@ var stopwords = ["a", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "near", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"]; -/* Non-minified version is copied as a separate JS file, is available */ +/* Non-minified version is copied as a separate JS file, if available */ /** * Porter Stemmer diff --git a/_static/pygments.css b/_static/pygments.css index c2e07c71..02b4b128 100644 --- a/_static/pygments.css +++ b/_static/pygments.css @@ -106,17 +106,17 @@ body[data-theme="dark"] .highlight .cp { color: #ff3a3a; font-weight: bold } /* body[data-theme="dark"] .highlight .cpf { color: #ababab; font-style: italic } /* Comment.PreprocFile */ body[data-theme="dark"] .highlight .c1 { color: #ababab; font-style: italic } /* Comment.Single */ body[data-theme="dark"] .highlight .cs { color: #e50808; font-weight: bold; background-color: #520000 } /* Comment.Special */ -body[data-theme="dark"] .highlight .gd { color: #d22323 } /* Generic.Deleted */ +body[data-theme="dark"] .highlight .gd { color: #ff3a3a } /* Generic.Deleted */ body[data-theme="dark"] .highlight .ge { color: #d0d0d0; font-style: italic } /* Generic.Emph */ body[data-theme="dark"] .highlight .ges { color: #d0d0d0; font-weight: bold; font-style: italic } /* Generic.EmphStrong */ -body[data-theme="dark"] .highlight .gr { color: #d22323 } /* Generic.Error */ +body[data-theme="dark"] .highlight .gr { color: #ff3a3a } /* Generic.Error */ body[data-theme="dark"] .highlight .gh { color: #ffffff; font-weight: bold } /* Generic.Heading */ body[data-theme="dark"] .highlight .gi { color: #589819 } /* Generic.Inserted */ body[data-theme="dark"] .highlight .go { color: #cccccc } /* Generic.Output */ body[data-theme="dark"] .highlight .gp { color: #aaaaaa } /* Generic.Prompt */ body[data-theme="dark"] .highlight .gs { color: #d0d0d0; font-weight: bold } /* Generic.Strong */ body[data-theme="dark"] .highlight .gu { color: #ffffff; text-decoration: underline } /* Generic.Subheading */ -body[data-theme="dark"] .highlight .gt { color: #d22323 } /* Generic.Traceback */ +body[data-theme="dark"] .highlight .gt { color: #ff3a3a } /* Generic.Traceback */ body[data-theme="dark"] .highlight .kc { color: #6ebf26; font-weight: bold } /* Keyword.Constant */ body[data-theme="dark"] .highlight .kd { color: #6ebf26; font-weight: bold } /* Keyword.Declaration */ body[data-theme="dark"] .highlight .kn { color: #6ebf26; font-weight: bold } /* Keyword.Namespace */ @@ -192,17 +192,17 @@ body:not([data-theme="light"]) .highlight .cp { color: #ff3a3a; font-weight: bol body:not([data-theme="light"]) .highlight .cpf { color: #ababab; font-style: italic } /* Comment.PreprocFile */ body:not([data-theme="light"]) .highlight .c1 { color: #ababab; font-style: italic } /* Comment.Single */ body:not([data-theme="light"]) .highlight .cs { color: #e50808; font-weight: bold; background-color: #520000 } /* Comment.Special */ -body:not([data-theme="light"]) .highlight .gd { color: #d22323 } /* Generic.Deleted */ +body:not([data-theme="light"]) .highlight .gd { color: #ff3a3a } /* Generic.Deleted */ body:not([data-theme="light"]) .highlight .ge { color: #d0d0d0; font-style: italic } /* Generic.Emph */ body:not([data-theme="light"]) .highlight .ges { color: #d0d0d0; font-weight: bold; font-style: italic } /* Generic.EmphStrong */ -body:not([data-theme="light"]) .highlight .gr { color: #d22323 } /* Generic.Error */ +body:not([data-theme="light"]) .highlight .gr { color: #ff3a3a } /* Generic.Error */ body:not([data-theme="light"]) .highlight .gh { color: #ffffff; font-weight: bold } /* Generic.Heading */ body:not([data-theme="light"]) .highlight .gi { color: #589819 } /* Generic.Inserted */ body:not([data-theme="light"]) .highlight .go { color: #cccccc } /* Generic.Output */ body:not([data-theme="light"]) .highlight .gp { color: #aaaaaa } /* Generic.Prompt */ body:not([data-theme="light"]) .highlight .gs { color: #d0d0d0; font-weight: bold } /* Generic.Strong */ body:not([data-theme="light"]) .highlight .gu { color: #ffffff; text-decoration: underline } /* Generic.Subheading */ -body:not([data-theme="light"]) .highlight .gt { color: #d22323 } /* Generic.Traceback */ +body:not([data-theme="light"]) .highlight .gt { color: #ff3a3a } /* Generic.Traceback */ body:not([data-theme="light"]) .highlight .kc { color: #6ebf26; font-weight: bold } /* Keyword.Constant */ body:not([data-theme="light"]) .highlight .kd { color: #6ebf26; font-weight: bold } /* Keyword.Declaration */ body:not([data-theme="light"]) .highlight .kn { color: #6ebf26; font-weight: bold } /* Keyword.Namespace */ diff --git a/_static/searchtools.js b/_static/searchtools.js index 7918c3fa..b08d58c9 100644 --- a/_static/searchtools.js +++ b/_static/searchtools.js @@ -4,7 +4,7 @@ * * Sphinx JavaScript utilities for the full-text search. * - * :copyright: Copyright 2007-2023 by the Sphinx team, see AUTHORS. + * :copyright: Copyright 2007-2024 by the Sphinx team, see AUTHORS. * :license: BSD, see LICENSE for details. * */ @@ -99,7 +99,7 @@ const _displayItem = (item, searchTerms, highlightTerms) => { .then((data) => { if (data) listItem.appendChild( - Search.makeSearchSummary(data, searchTerms) + Search.makeSearchSummary(data, searchTerms, anchor) ); // highlight search terms in the summary if (SPHINX_HIGHLIGHT_ENABLED) // set in sphinx_highlight.js @@ -116,8 +116,8 @@ const _finishSearch = (resultCount) => { ); else Search.status.innerText = _( - `Search finished, found ${resultCount} page(s) matching the search query.` - ); + "Search finished, found ${resultCount} page(s) matching the search query." + ).replace('${resultCount}', resultCount); }; const _displayNextItem = ( results, @@ -137,6 +137,22 @@ const _displayNextItem = ( // search finished, update title and status message else _finishSearch(resultCount); }; +// Helper function used by query() to order search results. +// Each input is an array of [docname, title, anchor, descr, score, filename]. +// Order the results by score (in opposite order of appearance, since the +// `_displayNextItem` function uses pop() to retrieve items) and then alphabetically. +const _orderResultsByScoreThenName = (a, b) => { + const leftScore = a[4]; + const rightScore = b[4]; + if (leftScore === rightScore) { + // same score: sort alphabetically + const leftTitle = a[1].toLowerCase(); + const rightTitle = b[1].toLowerCase(); + if (leftTitle === rightTitle) return 0; + return leftTitle > rightTitle ? -1 : 1; // inverted is intentional + } + return leftScore > rightScore ? 1 : -1; +}; /** * Default splitQuery function. Can be overridden in ``sphinx.search`` with a @@ -160,13 +176,26 @@ const Search = { _queued_query: null, _pulse_status: -1, - htmlToText: (htmlString) => { + htmlToText: (htmlString, anchor) => { const htmlElement = new DOMParser().parseFromString(htmlString, 'text/html'); - htmlElement.querySelectorAll(".headerlink").forEach((el) => { el.remove() }); + for (const removalQuery of [".headerlink", "script", "style"]) { + htmlElement.querySelectorAll(removalQuery).forEach((el) => { el.remove() }); + } + if (anchor) { + const anchorContent = htmlElement.querySelector(`[role="main"] ${anchor}`); + if (anchorContent) return anchorContent.textContent; + + console.warn( + `Anchored content block not found. Sphinx search tries to obtain it via DOM query '[role=main] ${anchor}'. Check your theme or template.` + ); + } + + // if anchor not specified or not found, fall back to main content const docContent = htmlElement.querySelector('[role="main"]'); - if (docContent !== undefined) return docContent.textContent; + if (docContent) return docContent.textContent; + console.warn( - "Content block not found. Sphinx search tries to obtain it via '[role=main]'. Could you check your theme or template." + "Content block not found. Sphinx search tries to obtain it via DOM query '[role=main]'. Check your theme or template." ); return ""; }, @@ -239,16 +268,7 @@ const Search = { else Search.deferQuery(query); }, - /** - * execute search (requires search index to be loaded) - */ - query: (query) => { - const filenames = Search._index.filenames; - const docNames = Search._index.docnames; - const titles = Search._index.titles; - const allTitles = Search._index.alltitles; - const indexEntries = Search._index.indexentries; - + _parseQuery: (query) => { // stem the search terms and add them to the correct list const stemmer = new Stemmer(); const searchTerms = new Set(); @@ -284,21 +304,38 @@ const Search = { // console.info("required: ", [...searchTerms]); // console.info("excluded: ", [...excludedTerms]); - // array of [docname, title, anchor, descr, score, filename] - let results = []; + return [query, searchTerms, excludedTerms, highlightTerms, objectTerms]; + }, + + /** + * execute search (requires search index to be loaded) + */ + _performSearch: (query, searchTerms, excludedTerms, highlightTerms, objectTerms) => { + const filenames = Search._index.filenames; + const docNames = Search._index.docnames; + const titles = Search._index.titles; + const allTitles = Search._index.alltitles; + const indexEntries = Search._index.indexentries; + + // Collect multiple result groups to be sorted separately and then ordered. + // Each is an array of [docname, title, anchor, descr, score, filename]. + const normalResults = []; + const nonMainIndexResults = []; + _removeChildren(document.getElementById("search-progress")); - const queryLower = query.toLowerCase(); + const queryLower = query.toLowerCase().trim(); for (const [title, foundTitles] of Object.entries(allTitles)) { - if (title.toLowerCase().includes(queryLower) && (queryLower.length >= title.length/2)) { + if (title.toLowerCase().trim().includes(queryLower) && (queryLower.length >= title.length/2)) { for (const [file, id] of foundTitles) { - let score = Math.round(100 * queryLower.length / title.length) - results.push([ + const score = Math.round(Scorer.title * queryLower.length / title.length); + const boost = titles[file] === title ? 1 : 0; // add a boost for document titles + normalResults.push([ docNames[file], titles[file] !== title ? `${titles[file]} > ${title}` : title, id !== null ? "#" + id : "", null, - score, + score + boost, filenames[file], ]); } @@ -308,46 +345,47 @@ const Search = { // search for explicit entries in index directives for (const [entry, foundEntries] of Object.entries(indexEntries)) { if (entry.includes(queryLower) && (queryLower.length >= entry.length/2)) { - for (const [file, id] of foundEntries) { - let score = Math.round(100 * queryLower.length / entry.length) - results.push([ + for (const [file, id, isMain] of foundEntries) { + const score = Math.round(100 * queryLower.length / entry.length); + const result = [ docNames[file], titles[file], id ? "#" + id : "", null, score, filenames[file], - ]); + ]; + if (isMain) { + normalResults.push(result); + } else { + nonMainIndexResults.push(result); + } } } } // lookup as object objectTerms.forEach((term) => - results.push(...Search.performObjectSearch(term, objectTerms)) + normalResults.push(...Search.performObjectSearch(term, objectTerms)) ); // lookup as search terms in fulltext - results.push(...Search.performTermsSearch(searchTerms, excludedTerms)); + normalResults.push(...Search.performTermsSearch(searchTerms, excludedTerms)); // let the scorer override scores with a custom scoring function - if (Scorer.score) results.forEach((item) => (item[4] = Scorer.score(item))); - - // now sort the results by score (in opposite order of appearance, since the - // display function below uses pop() to retrieve items) and then - // alphabetically - results.sort((a, b) => { - const leftScore = a[4]; - const rightScore = b[4]; - if (leftScore === rightScore) { - // same score: sort alphabetically - const leftTitle = a[1].toLowerCase(); - const rightTitle = b[1].toLowerCase(); - if (leftTitle === rightTitle) return 0; - return leftTitle > rightTitle ? -1 : 1; // inverted is intentional - } - return leftScore > rightScore ? 1 : -1; - }); + if (Scorer.score) { + normalResults.forEach((item) => (item[4] = Scorer.score(item))); + nonMainIndexResults.forEach((item) => (item[4] = Scorer.score(item))); + } + + // Sort each group of results by score and then alphabetically by name. + normalResults.sort(_orderResultsByScoreThenName); + nonMainIndexResults.sort(_orderResultsByScoreThenName); + + // Combine the result groups in (reverse) order. + // Non-main index entries are typically arbitrary cross-references, + // so display them after other results. + let results = [...nonMainIndexResults, ...normalResults]; // remove duplicate search results // note the reversing of results, so that in the case of duplicates, the highest-scoring entry is kept @@ -361,7 +399,12 @@ const Search = { return acc; }, []); - results = results.reverse(); + return results.reverse(); + }, + + query: (query) => { + const [searchQuery, searchTerms, excludedTerms, highlightTerms, objectTerms] = Search._parseQuery(query); + const results = Search._performSearch(searchQuery, searchTerms, excludedTerms, highlightTerms, objectTerms); // for debugging //Search.lastresults = results.slice(); // a copy @@ -466,14 +509,18 @@ const Search = { // add support for partial matches if (word.length > 2) { const escapedWord = _escapeRegExp(word); - Object.keys(terms).forEach((term) => { - if (term.match(escapedWord) && !terms[word]) - arr.push({ files: terms[term], score: Scorer.partialTerm }); - }); - Object.keys(titleTerms).forEach((term) => { - if (term.match(escapedWord) && !titleTerms[word]) - arr.push({ files: titleTerms[word], score: Scorer.partialTitle }); - }); + if (!terms.hasOwnProperty(word)) { + Object.keys(terms).forEach((term) => { + if (term.match(escapedWord)) + arr.push({ files: terms[term], score: Scorer.partialTerm }); + }); + } + if (!titleTerms.hasOwnProperty(word)) { + Object.keys(titleTerms).forEach((term) => { + if (term.match(escapedWord)) + arr.push({ files: titleTerms[term], score: Scorer.partialTitle }); + }); + } } // no match but word was a required one @@ -496,9 +543,8 @@ const Search = { // create the mapping files.forEach((file) => { - if (fileMap.has(file) && fileMap.get(file).indexOf(word) === -1) - fileMap.get(file).push(word); - else fileMap.set(file, [word]); + if (!fileMap.has(file)) fileMap.set(file, [word]); + else if (fileMap.get(file).indexOf(word) === -1) fileMap.get(file).push(word); }); }); @@ -549,8 +595,8 @@ const Search = { * search summary for a given text. keywords is a list * of stemmed words. */ - makeSearchSummary: (htmlText, keywords) => { - const text = Search.htmlToText(htmlText); + makeSearchSummary: (htmlText, keywords, anchor) => { + const text = Search.htmlToText(htmlText, anchor); if (text === "") return null; const textLower = text.toLowerCase(); diff --git a/_static/versioning/versioning_menu.html b/_static/versioning/versioning_menu.html index 1daf4cef..68109cf8 100644 --- a/_static/versioning/versioning_menu.html +++ b/_static/versioning/versioning_menu.html @@ -122,15 +122,33 @@ }); const sortVersions = (a, b) => { - // Alpha versions - if (a.includes("a")) return 1; - if (b.includes("a")) return -1; - if (a.replace("v", "") > b.replace("v", "")) - return -1 - else if (a.replace("v", "") < b.replace("v", "")) - return 1 - else - return 0 + const innerA = a.replace("v", ""); + const innerB = b.replace("v", ""); + + if (innerA.match(/[a-z]/) && !innerB.match(/[a-z]/)) return 1; + if (!innerA.match(/[a-z]/) && innerB.match(/[a-z]/)) return -1; + + const splittedInnerA = innerA.split(""); + const splittedInnerB = innerB.split(""); + + while (splittedInnerA.length && splittedInnerB.length) { + const charA = splittedInnerA.shift(); + const charB = splittedInnerB.shift(); + + if (charA === charB) continue; + else if (charA === ".") return 1; + else if (charB === ".") return -1; + else if (charA.match(/[0-9]/) && charB.match(/[0-9]/)) { + return Number(charB) - Number(charA); + } else { + if (charA < charB) return 1; + else if (charA > charB) return -1; + else return 0; + } + } + if (innerA < innerB) return 1; + else if (innerA > innerB) return -1; + else return 0; } if ((githubUser !== null && githubUser !== "") || (repo !== null && repo !== "")) { @@ -204,4 +222,4 @@ console.error("Invalid versioning configuration"); } -

    \ No newline at end of file +
    diff --git a/_static/videos/minecart-rgb.gif b/_static/videos/minecart-rgb.gif new file mode 100644 index 00000000..39ad4172 Binary files /dev/null and b/_static/videos/minecart-rgb.gif differ diff --git a/_static/videos/mo-lunar-lander-continuous.gif b/_static/videos/mo-lunar-lander-continuous.gif new file mode 100644 index 00000000..2051d754 Binary files /dev/null and b/_static/videos/mo-lunar-lander-continuous.gif differ diff --git a/citing/citing/index.html b/citing/citing/index.html index 0079bb78..587905c2 100644 --- a/citing/citing/index.html +++ b/citing/citing/index.html @@ -11,7 +11,7 @@ - + <no title> - MO-Gymnasium Documentation @@ -215,6 +215,7 @@
  • Install
  • API
  • Wrappers
  • +
  • Vector Wrappers
  • MORL Baselines
  • Environments

    @@ -234,11 +235,14 @@
  • MO-Mountaincar
  • MO-Mountaincarcontinuous
  • MO-Lunar-Lander
  • +
  • MO-Lunar-Lander-Continuous
  • Miscellaneous @@ -401,8 +405,8 @@ - - + + diff --git a/community/community/index.html b/community/community/index.html index 5ac06a12..c1a5ea80 100644 --- a/community/community/index.html +++ b/community/community/index.html @@ -11,7 +11,7 @@ - + Community - MO-Gymnasium Documentation @@ -215,6 +215,7 @@
  • Install
  • API
  • Wrappers
  • +
  • Vector Wrappers
  • MORL Baselines
  • Environments

    @@ -234,11 +235,14 @@
  • MO-Mountaincar
  • MO-Mountaincarcontinuous
  • MO-Lunar-Lander
  • +
  • MO-Lunar-Lander-Continuous
  • Miscellaneous @@ -326,10 +330,10 @@
    -

    Community#

    +

    Community

    If you want to help us out, reach us, or simply ask questions, you can join the Farama discord server here.

    -

    Acknowledgements#

    +

    Acknowledgements

    Aside from the main contributors, some people have also contributed to the project in various ways. We would like to thank them all for their contributions.

    Environments

    @@ -234,11 +235,14 @@
  • MO-Mountaincar
  • MO-Mountaincarcontinuous
  • MO-Lunar-Lander
  • +
  • MO-Lunar-Lander-Continuous
  • Miscellaneous @@ -317,9 +321,10 @@
    -

    Breakable-Bottles#

    +

    Breakable-Bottles

    -../../_images/breakable-bottles.gif +../../_images/breakable-bottles.gif +
    @@ -333,7 +338,7 @@

    Breakable-Bottles

    - + @@ -351,12 +356,12 @@

    Breakable-Bottles -

    Description#

    +

    Description

    This environment implements the problems UnbreakableBottles and BreakableBottles defined in Section 4.1.2 of the paper Potential-based multiobjective reinforcement learning approaches to low-impact agents for AI safety.

    -

    Action Space#

    +

    Action Space

    The action space is a discrete space with 3 actions:

    • 0: move left

    • @@ -365,17 +370,20 @@

      Action Space -

      Observation Space#

      +

      Observation Space

      The observation space is a dictionary with 4 keys:

      • location: the current location of the agent

      • bottles_carrying: the number of bottles the agent is currently carrying (0, 1 or 2)

      • -
      • bottles_delivered: the number of bottles the agent has delivered (0 or 1)

      • +
      • bottles_delivered: the number of bottles the agent has delivered (0, 1 or 2)

      • bottles_dropped: for each location, a boolean flag indicating if that location currently contains a bottle

      +

      Note that this observation space is different from that listed in the paper above. In the paper, bottles_delivered’s possible values are listed as (0 or 1), +rather than (0, 1 or 2). This is because the paper did not take the terminal state, in which 2 bottles have been delivered, into account when calculating +the observation space. As such, the observation space of this implementation is larger than specified in the paper, having 360 possible states instead of 240.

    -

    Reward Space#

    +

    Reward Space

    The reward space has 3 dimensions:

    • time penalty: -1 for each time step

    • @@ -384,15 +392,15 @@

      Reward Space -

      Starting State#

      +

      Starting State

      The agent starts at location 0, carrying no bottles, having delivered no bottles and having dropped no bottles.

    -

    Episode Termination#

    +

    Episode Termination

    The episode terminates when the agent has delivered 2 bottles.

    -

    Arguments#

    +

    Arguments

    Environments

    @@ -234,11 +235,14 @@
  • MO-Mountaincar
  • MO-Mountaincarcontinuous
  • MO-Lunar-Lander
  • +
  • MO-Lunar-Lander-Continuous
  • Miscellaneous @@ -326,7 +330,7 @@
    -

    Classic Control#

    +

    Classic Control

    Multi-objective versions of classical Gymnasium’s environments.

  • Observation Space

    Dict(‘bottles_carrying’: Discrete(3), ‘bottles_delivered’: Discrete(2), ‘bottles_dropped’: MultiBinary(3), ‘location’: Discrete(5))

    Dict(‘bottles_carrying’: Discrete(3), ‘bottles_delivered’: Discrete(3), ‘bottles_dropped’: MultiBinary(3), ‘location’: Discrete(5))

    Reward Shape

    (3,)

    @@ -343,12 +347,27 @@

    Classic Control[time_penalty, reverse_penalty, forward_penalty]

    - + + + + + + + + + + + + + + + + - + @@ -356,6 +375,9 @@

    Classic ControlPranav Gupta's Dissertation

    +
    @@ -446,8 +468,8 @@

    Classic Control - + + diff --git a/environments/deep-sea-treasure-concave/index.html b/environments/deep-sea-treasure-concave/index.html index 151b812e..b9dc38bd 100644 --- a/environments/deep-sea-treasure-concave/index.html +++ b/environments/deep-sea-treasure-concave/index.html @@ -11,7 +11,7 @@ - + Deep-Sea-Treasure-Concave - MO-Gymnasium Documentation @@ -215,6 +215,7 @@
  • Install
  • API
  • Wrappers
  • +
  • Vector Wrappers
  • MORL Baselines
  • Environments

    @@ -234,11 +235,14 @@
  • MO-Mountaincar
  • MO-Mountaincarcontinuous
  • MO-Lunar-Lander
  • +
  • MO-Lunar-Lander-Continuous
  • Miscellaneous @@ -317,9 +321,10 @@
    -

    Deep-Sea-Treasure-Concave#

    +

    Deep-Sea-Treasure-Concave

    -../../_images/deep-sea-treasure-concave.gif +../../_images/deep-sea-treasure-concave.gif +
  • Classic Mountain Car env, but with extra penalties for the forward and reverse actions. From Vamplew et al. 2011.

    mo-mountaincarcontinuous-v0

    mo-mountaincar-3d-v0 **

    Continuous / Discrete

    [time_penalty, move_penalty, speed_objective]

    The forward and backward penalties have been merged into the move penalty and a speed objective has been introduced which gives the positive reward equivalent to the car’s speed at that time step.*

    mo-mountaincar-timemove-v0 **

    Continuous / Discrete

    [time_penalty, move_penalty]

    Class Mountain Car env but an extra penalty for moving backwards or forwards merged into a move penalty.

    mo-mountaincar-timespeed-v0 **

    Continuous / Discrete

    [time_penalty, speed_objective]

    Class Mountain Car env but an extra positive objective of speed which gives the positive reward equivalent to the car’s speed at that time step.*

    mo-mountaincarcontinuous-v0

    Continuous / Continuous

    [time_penalty, fuel_consumption_penalty]

    Continuous Mountain Car env, but with penalties for fuel consumption.

    mo-lunar-lander-v2

    mo-lunar-lander-v2

    Continuous / Discrete or Continuous

    [landed, shaped_reward, main_engine_fuel, side_engine_fuel]

    MO version of the LunarLander-v2 environment. Objectives defined similarly as in Hung et al. 2022.

    @@ -357,15 +362,15 @@

    Deep-Sea-Treasure-Concave -

    Description#

    +

    Description

    The Deep Sea Treasure environment is classic MORL problem in which the agent controls a submarine in a 2D grid world.

    -

    Observation Space#

    +

    Observation Space

    The observation space is a 2D discrete box with values in [0, 10] for the x and y coordinates of the submarine.

    -

    Action Space#

    +

    Action Space

    The actions is a discrete space where:

    • 0: up

    • @@ -375,7 +380,7 @@

      Action Space -

      Reward Space#

      +

      Reward Space

      The reward is 2-dimensional:

      • time penalty: -1 at each time step

      • @@ -383,22 +388,22 @@

        Reward Space -

        Starting State#

        +

        Starting State

        The starting state is always the same: (0, 0)

    -

    Episode Termination#

    +

    Episode Termination

    The episode terminates when the agent reaches a treasure.

    -

    Arguments#

    +

    Arguments

    • dst_map: the map of the deep sea treasure. Default is the convex map from Yang et al. (2019). To change, use mo_gymnasium.make("DeepSeaTreasure-v0", dst_map=CONCAVE_MAP | MIRRORED_MAP).

    • float_state: if True, the state is a 2D continuous box with values in [0.0, 1.0] for the x and y coordinates of the submarine.

    -

    Credits#

    +

    Credits

    The code was adapted from: Yang’s source. The background art is from https://ansimuz.itch.io/underwater-fantasy-pixel-art-environment. The submarine art was created with the assistance of DALL·E 2.

    @@ -517,8 +522,8 @@

    Credits#< - - + + diff --git a/environments/deep-sea-treasure-mirrored/index.html b/environments/deep-sea-treasure-mirrored/index.html index 26df938a..4ff35b44 100644 --- a/environments/deep-sea-treasure-mirrored/index.html +++ b/environments/deep-sea-treasure-mirrored/index.html @@ -11,7 +11,7 @@ - + Deep-Sea-Treasure-Mirrored - MO-Gymnasium Documentation @@ -215,6 +215,7 @@
  • Install
  • API
  • Wrappers
  • +
  • Vector Wrappers
  • MORL Baselines
  • Environments

    @@ -234,11 +235,14 @@
  • MO-Mountaincar
  • MO-Mountaincarcontinuous
  • MO-Lunar-Lander
  • +
  • MO-Lunar-Lander-Continuous
  • Miscellaneous @@ -317,9 +321,10 @@
    -

    Deep-Sea-Treasure-Mirrored#

    +

    Deep-Sea-Treasure-Mirrored

    -../../_images/deep-sea-treasure-mirrored.gif +../../_images/deep-sea-treasure-mirrored.gif +
  • @@ -357,15 +362,15 @@

    Deep-Sea-Treasure-Mirrored -

    Description#

    +

    Description

    The Deep Sea Treasure environment is classic MORL problem in which the agent controls a submarine in a 2D grid world.

    -

    Observation Space#

    +

    Observation Space

    The observation space is a 2D discrete box with values in [0, 10] for the x and y coordinates of the submarine.

    -

    Action Space#

    +

    Action Space

    The actions is a discrete space where:

    • 0: up

    • @@ -375,7 +380,7 @@

      Action Space -

      Reward Space#

      +

      Reward Space

      The reward is 2-dimensional:

      • time penalty: -1 at each time step

      • @@ -383,22 +388,22 @@

        Reward Space -

        Starting State#

        +

        Starting State

        The starting state is always the same: (0, 0)

    -

    Episode Termination#

    +

    Episode Termination

    The episode terminates when the agent reaches a treasure.

    -

    Arguments#

    +

    Arguments

    • dst_map: the map of the deep sea treasure. Default is the convex map from Yang et al. (2019). To change, use mo_gymnasium.make("DeepSeaTreasure-v0", dst_map=CONCAVE_MAP | MIRRORED_MAP).

    • float_state: if True, the state is a 2D continuous box with values in [0.0, 1.0] for the x and y coordinates of the submarine.

    -

    Credits#

    +

    Credits

    The code was adapted from: Yang’s source. The background art is from https://ansimuz.itch.io/underwater-fantasy-pixel-art-environment. The submarine art was created with the assistance of DALL·E 2.

    @@ -507,8 +512,8 @@

    Credits#< - - + + diff --git a/environments/deep-sea-treasure/index.html b/environments/deep-sea-treasure/index.html index 9a4f7271..37335e52 100644 --- a/environments/deep-sea-treasure/index.html +++ b/environments/deep-sea-treasure/index.html @@ -11,7 +11,7 @@ - + Deep-Sea-Treasure - MO-Gymnasium Documentation @@ -215,6 +215,7 @@
  • Install
  • API
  • Wrappers
  • +
  • Vector Wrappers
  • MORL Baselines
  • Environments

    @@ -234,11 +235,14 @@
  • MO-Mountaincar
  • MO-Mountaincarcontinuous
  • MO-Lunar-Lander
  • +
  • MO-Lunar-Lander-Continuous
  • Miscellaneous @@ -317,9 +321,10 @@
    -

    Deep-Sea-Treasure#

    +

    Deep-Sea-Treasure

    -../../_images/deep-sea-treasure.gif +../../_images/deep-sea-treasure.gif +
  • @@ -357,15 +362,15 @@

    Deep-Sea-Treasure -

    Description#

    +

    Description

    The Deep Sea Treasure environment is classic MORL problem in which the agent controls a submarine in a 2D grid world.

    -

    Observation Space#

    +

    Observation Space

    The observation space is a 2D discrete box with values in [0, 10] for the x and y coordinates of the submarine.

    -

    Action Space#

    +

    Action Space

    The actions is a discrete space where:

    • 0: up

    • @@ -375,7 +380,7 @@

      Action Space -

      Reward Space#

      +

      Reward Space

      The reward is 2-dimensional:

      • time penalty: -1 at each time step

      • @@ -383,22 +388,22 @@

        Reward Space -

        Starting State#

        +

        Starting State

        The starting state is always the same: (0, 0)

    -

    Episode Termination#

    +

    Episode Termination

    The episode terminates when the agent reaches a treasure.

    -

    Arguments#

    +

    Arguments

    • dst_map: the map of the deep sea treasure. Default is the convex map from Yang et al. (2019). To change, use mo_gymnasium.make("DeepSeaTreasure-v0", dst_map=CONCAVE_MAP | MIRRORED_MAP).

    • float_state: if True, the state is a 2D continuous box with values in [0.0, 1.0] for the x and y coordinates of the submarine.

    -

    Credits#

    +

    Credits

    The code was adapted from: Yang’s source. The background art is from https://ansimuz.itch.io/underwater-fantasy-pixel-art-environment. The submarine art was created with the assistance of DALL·E 2.

    @@ -505,8 +510,8 @@

    Credits#< - - + + diff --git a/environments/fishwood/index.html b/environments/fishwood/index.html index 4acbfed0..389cffcf 100644 --- a/environments/fishwood/index.html +++ b/environments/fishwood/index.html @@ -11,7 +11,7 @@ - + Fishwood - MO-Gymnasium Documentation @@ -215,6 +215,7 @@
  • Install
  • API
  • Wrappers
  • +
  • Vector Wrappers
  • MORL Baselines
  • Environments

    @@ -234,11 +235,14 @@
  • MO-Mountaincar
  • MO-Mountaincarcontinuous
  • MO-Lunar-Lander
  • +
  • MO-Lunar-Lander-Continuous
  • Miscellaneous @@ -317,7 +321,7 @@
    -

    Fishwood#

    +

    Fishwood

  • @@ -329,8 +333,14 @@

    Fishwood

    - - + + + + + + + + @@ -348,12 +358,12 @@

    Fishwood

    Action Space

    Discrete(2)

    Observation Space

    Discrete(2)

    Observation Shape

    (1,)

    Observation High

    [1]

    Observation Low

    [0]

    Reward Shape

    (2,)

    -

    Description#

    +

    Description

    The FishWood environment is a simple MORL problem in which the agent controls a fisherman which can either fish or go collect wood. From Multi-objective Reinforcement Learning for the Expected Utility of the Return.

    -

    Observation Space#

    +

    Observation Space

    The observation space is a discrete space with two states:

    • 0: fishing

    • @@ -361,7 +371,7 @@

      Observation Space -

      Action Space#

      +

      Action Space

      The actions is a discrete space where:

      • 0: go fishing

      • @@ -369,7 +379,7 @@

        Action Space -

        Reward Space#

        +

        Reward Space

        The reward is 2-dimensional:

        • 0: +1 if agent is in the woods, with woodproba probability, and 0 otherwise

        • @@ -377,22 +387,22 @@

          Reward Space -

          Starting State#

          +

          Starting State

          Agent starts in the woods

    -

    Termination#

    +

    Termination

    The episode ends after MAX_TS=200 steps

    -

    Arguments#

    +

    Arguments

    • fishproba: probability of catching a fish when fishing

    • woodproba: probability of collecting wood when in the woods

    -

    Credits#

    +

    Credits

    Code provided by Denis Steckelmacher

    @@ -497,8 +507,8 @@

    Credits#< - - + + diff --git a/environments/four-room/index.html b/environments/four-room/index.html index b246ebc8..19e87389 100644 --- a/environments/four-room/index.html +++ b/environments/four-room/index.html @@ -11,7 +11,7 @@ - + Four-Room - MO-Gymnasium Documentation @@ -215,6 +215,7 @@
  • Install
  • API
  • Wrappers
  • +
  • Vector Wrappers
  • MORL Baselines
  • Environments

    @@ -234,11 +235,14 @@
  • MO-Mountaincar
  • MO-Mountaincarcontinuous
  • MO-Lunar-Lander
  • +
  • MO-Lunar-Lander-Continuous
  • Miscellaneous @@ -317,9 +321,10 @@
    -

    Four-Room#

    +

    Four-Room

    -../../_images/four-room.gif +../../_images/four-room.gif +
    @@ -357,25 +362,25 @@

    Four-Room -

    Description#

    +

    Description

    A discretized version of the gridworld environment introduced in [1]. Here, an agent learns to collect shapes with positive reward, while avoid those with negative reward, and then travel to a fixed goal. The gridworld is split into four rooms separated by walls with passage-ways.

    -

    References#

    +

    References

    [1] Barreto, André, et al. “Successor Features for Transfer in Reinforcement Learning.” NIPS. 2017.

    -

    Observation Space#

    +

    Observation Space

    The observation contains the 2D position of the agent in the gridworld, plus a binary vector indicating which items were collected.

    -

    Action Space#

    +

    Action Space

    The action space is discrete with 4 actions: left, up, right, down.

    -

    Reward Space#

    +

    Reward Space

    The reward is a 3-dimensional vector with the following components:

    • +1 if collected a blue square, else 0

    • @@ -384,21 +389,21 @@

      Reward Space -

      Starting State#

      +

      Starting State

      The agent starts in the lower left of the map.

    -

    Episode Termination#

    +

    Episode Termination

    The episode terminates when the agent reaches the goal state, G.

    -

    Arguments#

    +

    Arguments

    • maze: Array containing the gridworld map. See MAZE for an example.

    -

    Credits#

    +

    Credits

    Code adapted from: Mike Gimelfarb’s source.

    @@ -504,8 +509,8 @@

    Credits#< - - + + diff --git a/environments/fruit-tree/index.html b/environments/fruit-tree/index.html index 823036ca..413e6361 100644 --- a/environments/fruit-tree/index.html +++ b/environments/fruit-tree/index.html @@ -11,7 +11,7 @@ - + Fruit-Tree - MO-Gymnasium Documentation @@ -215,6 +215,7 @@
  • Install
  • API
  • Wrappers
  • +
  • Vector Wrappers
  • MORL Baselines
  • Environments

    @@ -234,11 +235,14 @@
  • MO-Mountaincar
  • MO-Mountaincarcontinuous
  • MO-Lunar-Lander
  • +
  • MO-Lunar-Lander-Continuous
  • Miscellaneous @@ -317,9 +321,10 @@
    -

    Fruit-Tree#

    +

    Fruit-Tree

    -../../_images/fruit-tree.gif +../../_images/fruit-tree.gif +
  • @@ -357,28 +362,28 @@

    Fruit-Tree -

    Description#

    +

    Description

    Full binary tree of depth d=5,6 or 7. Every leaf contains a fruit with a value for the nutrients Protein, Carbs, Fats, Vitamins, Minerals and Water. From Yang et al. 2019.

    -

    Observation Space#

    +

    Observation Space

    Discrete space of size 2^d-1, where d is the depth of the tree.

    -

    Action Space#

    +

    Action Space

    The agent can chose to go left or right at every node. The action space is therefore a discrete space of size 2.

    -

    Reward Space#

    +

    Reward Space

    Each leaf node contains a 6-dimensional vector containing the nutrients of the fruit. The agent receives a reward for each nutrient it collects.

    -

    Starting State#

    +

    Starting State

    The agent starts at the root node (0, 0).

    -

    Episode Termination#

    +

    Episode Termination

    The episode terminates when the agent reaches a leaf node.

    @@ -481,8 +486,8 @@

    Episode Termination - + + diff --git a/environments/grid-world/index.html b/environments/grid-world/index.html index c5a08d43..4a1e46d8 100644 --- a/environments/grid-world/index.html +++ b/environments/grid-world/index.html @@ -11,7 +11,7 @@ - + Grid-World - MO-Gymnasium Documentation @@ -215,6 +215,7 @@
  • Install
  • API
  • Wrappers
  • +
  • Vector Wrappers
  • MORL Baselines
  • Environments

    @@ -234,11 +235,14 @@
  • MO-Mountaincar
  • MO-Mountaincarcontinuous
  • MO-Lunar-Lander
  • +
  • MO-Lunar-Lander-Continuous
  • Miscellaneous @@ -326,7 +330,7 @@
    -

    Grid-World#

    +

    Grid-World

    Environments with discrete observation spaces, e.g., grid-worlds.

  • @@ -471,8 +475,8 @@

    Grid-World - + + diff --git a/environments/minecart-deterministic/index.html b/environments/minecart-deterministic/index.html index 12f4babb..4b865583 100644 --- a/environments/minecart-deterministic/index.html +++ b/environments/minecart-deterministic/index.html @@ -8,10 +8,10 @@ - + - + Minecart-Deterministic - MO-Gymnasium Documentation @@ -215,10 +215,11 @@
  • Install
  • API
  • Wrappers
  • +
  • Vector Wrappers
  • MORL Baselines
  • Environments

    -

    @@ -357,12 +362,12 @@

    Minecart-Deterministic
    -

    Description#

    +

    Description

    Agent must collect two types of ores and minimize fuel consumption. From Abels et al. 2019.

    -

    Observation Space#

    +

    Observation Space

    The observation is a 7-dimensional vector containing the following information:

    • 2D position of the cart

    • @@ -373,7 +378,7 @@

      Observation Space -

      Action Space#

      +

      Action Space

      The action space is a discrete space with 6 actions:

      • 0: Mine

      • @@ -385,7 +390,7 @@

        Action Space -

        Reward Space#

        +

        Reward Space

        The reward is a 3D vector:

        • 0: Quantity of the first minerium that was retrieved to the base (sparse)

        • @@ -394,15 +399,15 @@

          Reward Space -

          Starting State#

          +

          Starting State

          The cart starts at the base on the upper left corner of the map.

    -

    Episode Termination#

    +

    Episode Termination

    The episode ends when the cart returns to the base.

    -

    Arguments#

    +

    Arguments

    • render_mode: The render mode to use. Can be “rgb_array” or “human”.

    • image_observation: If True, the observation is a RGB image of the environment.

    • @@ -412,7 +417,7 @@

      Arguments -

      Credits#

      +

      Credits

      The code was refactored from Axel Abels’ source.

    @@ -422,8 +427,30 @@

    Credits#<

    @@ -357,12 +362,12 @@

    Minecart-Rgb -

    Description#

    +

    Description

    Agent must collect two types of ores and minimize fuel consumption. From Abels et al. 2019.

    -

    Observation Space#

    +

    Observation Space

    The observation is a 7-dimensional vector containing the following information:

    • 2D position of the cart

    • @@ -373,7 +378,7 @@

      Observation Space -

      Action Space#

      +

      Action Space

      The action space is a discrete space with 6 actions:

      • 0: Mine

      • @@ -385,7 +390,7 @@

        Action Space -

        Reward Space#

        +

        Reward Space

        The reward is a 3D vector:

        • 0: Quantity of the first minerium that was retrieved to the base (sparse)

        • @@ -394,15 +399,15 @@

          Reward Space -

          Starting State#

          +

          Starting State

          The cart starts at the base on the upper left corner of the map.

    -

    Episode Termination#

    +

    Episode Termination

    The episode ends when the cart returns to the base.

    -

    Arguments#

    +

    Arguments

    • render_mode: The render mode to use. Can be “rgb_array” or “human”.

    • image_observation: If True, the observation is a RGB image of the environment.

    • @@ -412,7 +417,7 @@

      Arguments -

      Credits#

      +

      Credits

      The code was refactored from Axel Abels’ source.

    @@ -423,7 +428,19 @@

    Credits#<

    @@ -357,12 +362,12 @@

    Minecart

    -

    Description#

    +

    Description

    Agent must collect two types of ores and minimize fuel consumption. From Abels et al. 2019.

    -

    Observation Space#

    +

    Observation Space

    The observation is a 7-dimensional vector containing the following information:

    • 2D position of the cart

    • @@ -373,7 +378,7 @@

      Observation Space -

      Action Space#

      +

      Action Space

      The action space is a discrete space with 6 actions:

      • 0: Mine

      • @@ -385,7 +390,7 @@

        Action Space -

        Reward Space#

        +

        Reward Space

        The reward is a 3D vector:

        • 0: Quantity of the first minerium that was retrieved to the base (sparse)

        • @@ -394,15 +399,15 @@

          Reward Space -

          Starting State#

          +

          Starting State

          The cart starts at the base on the upper left corner of the map.

    -

    Episode Termination#

    +

    Episode Termination

    The episode ends when the cart returns to the base.

    -

    Arguments#

    +

    Arguments

    • render_mode: The render mode to use. Can be “rgb_array” or “human”.

    • image_observation: If True, the observation is a RGB image of the environment.

    • @@ -412,7 +417,7 @@

      Arguments -

      Credits#

      +

      Credits

      The code was refactored from Axel Abels’ source.

    @@ -422,12 +427,12 @@

    Credits#<

  • Miscellaneous @@ -326,7 +330,7 @@
    -

    Miscellaneous#

    +

    Miscellaneous

    MO-Gymnasium also includes other miscellaneous multi-objective environments:

    @@ -381,7 +385,7 @@

    Miscellaneous - + @@ -390,7 +394,7 @@

    MiscellaneousMO-Lunar-Lander +
    MO-Lunar-Lander-Continuous
    @@ -451,8 +455,8 @@

    Miscellaneous - + + diff --git a/environments/mo-ant-2d/index.html b/environments/mo-ant-2d/index.html index b7a49d8b..62116594 100644 --- a/environments/mo-ant-2d/index.html +++ b/environments/mo-ant-2d/index.html @@ -11,7 +11,7 @@ - + MO-Ant-2D - MO-Gymnasium Documentation @@ -215,6 +215,7 @@
  • Install
  • API
  • Wrappers
  • +
  • Vector Wrappers
  • MORL Baselines
  • Environments

    @@ -234,11 +235,14 @@
  • MO-Mountaincar
  • MO-Mountaincarcontinuous
  • MO-Lunar-Lander
  • +
  • MO-Lunar-Lander-Continuous
  • Miscellaneous @@ -317,9 +321,10 @@
    -

    MO-Ant-2D#

    +

    MO-Ant-2D

    -_static/videos/mo-ant-2d.gif +_static/videos/mo-ant-2d.gif +
  • @@ -357,12 +362,15 @@

    MO-Ant-2D -

    Description#

    +

    Description

    Multi-objective version of the AntEnv environment.

    See Gymnasium’s env for more information.

    +

    The original Gymnasium’s ‘Ant-v4’ is recovered by the following linear scalarization:

    +

    env = mo_gym.make(‘mo-ant-v4’, cost_objective=False) +LinearReward(env, weight=np.array([1.0, 0.0]))

    -

    Reward Space#

    +

    Reward Space

    The reward is 2- or 3-dimensional:

    Environments

    @@ -234,11 +235,14 @@
  • MO-Mountaincar
  • MO-Mountaincarcontinuous
  • MO-Lunar-Lander
  • +
  • MO-Lunar-Lander-Continuous
  • Miscellaneous @@ -317,9 +321,10 @@
    -

    MO-Ant#

    +

    MO-Ant

    -../../_images/mo-ant.gif +../../_images/mo-ant.gif +
  • @@ -357,12 +362,15 @@

    MO-Ant#
    -

    Description#

    +

    Description

    Multi-objective version of the AntEnv environment.

    See Gymnasium’s env for more information.

    +

    The original Gymnasium’s ‘Ant-v4’ is recovered by the following linear scalarization:

    +

    env = mo_gym.make(‘mo-ant-v4’, cost_objective=False) +LinearReward(env, weight=np.array([1.0, 0.0]))

    -

    Reward Space#

    +

    Reward Space

    The reward is 2- or 3-dimensional:

    Environments

    @@ -234,11 +235,14 @@
  • MO-Mountaincar
  • MO-Mountaincarcontinuous
  • MO-Lunar-Lander
  • +
  • MO-Lunar-Lander-Continuous
  • Miscellaneous @@ -317,9 +321,10 @@
    -

    MO-Halfcheetah#

    +

    MO-Halfcheetah

    -../../_images/mo-halfcheetah.gif +../../_images/mo-halfcheetah.gif +
  • @@ -357,12 +362,15 @@

    MO-Halfcheetah -

    Description#

    +

    Description

    Multi-objective version of the HalfCheetahEnv environment.

    See Gymnasium’s env for more information.

    +

    The original Gymnasium’s ‘HalfCheetah-v4’ is recovered by the following linear scalarization:

    +

    env = mo_gym.make(‘mo-halfcheetah-v4’) +LinearReward(env, weight=np.array([1.0, 1.0]))

    -

    Reward Space#

    +

    Reward Space

    The reward is 2-dimensional:

    Environments

    @@ -234,11 +235,14 @@
  • MO-Mountaincar
  • MO-Mountaincarcontinuous
  • MO-Lunar-Lander
  • +
  • MO-Lunar-Lander-Continuous
  • Miscellaneous @@ -317,9 +321,10 @@
    -

    MO-Highway-Fast#

    +

    MO-Highway-Fast

    -../../_images/mo-highway-fast.gif +../../_images/mo-highway-fast.gif +
  • @@ -423,8 +428,8 @@

    MO-Highway-Fast - + + diff --git a/environments/mo-highway/index.html b/environments/mo-highway/index.html index 5996b505..19c60fda 100644 --- a/environments/mo-highway/index.html +++ b/environments/mo-highway/index.html @@ -8,10 +8,10 @@ - + - + MO-Highway - MO-Gymnasium Documentation @@ -215,6 +215,7 @@
  • Install
  • API
  • Wrappers
  • +
  • Vector Wrappers
  • MORL Baselines
  • Environments

    @@ -234,11 +235,14 @@
  • MO-Mountaincar
  • MO-Mountaincarcontinuous
  • MO-Lunar-Lander
  • +
  • MO-Lunar-Lander-Continuous
  • Miscellaneous @@ -317,9 +321,10 @@
    -

    MO-Highway#

    +

    MO-Highway

    -../../_images/mo-highway.gif +../../_images/mo-highway.gif +
  • @@ -357,12 +362,12 @@

    MO-Highway -

    Description#

    +

    Description

    Multi-objective version of the HighwayEnv environment.

    See highway-env for more information.

    -

    Reward Space#

    +

    Reward Space

    The reward is 3-dimensional:

    Environments

    @@ -234,11 +235,14 @@
  • MO-Mountaincar
  • MO-Mountaincarcontinuous
  • MO-Lunar-Lander
  • +
  • MO-Lunar-Lander-Continuous
  • Miscellaneous @@ -317,9 +321,10 @@
    -

    MO-Hopper-2D#

    +

    MO-Hopper-2D

    -_static/videos/mo-hopper-2d.gif +_static/videos/mo-hopper-2d.gif +
  • @@ -357,12 +362,15 @@

    MO-Hopper-2D -

    Description#

    +

    Description

    Multi-objective version of the HopperEnv environment.

    See Gymnasium’s env for more information.

    +

    The original Gymnasium’s ‘Hopper-v4’ is recovered by the following linear scalarization:

    +

    env = mo_gym.make(‘mo-hopper-v4’, cost_objective=False) +LinearReward(env, weight=np.array([1.0, 0.0]))

    -

    Reward Space#

    +

    Reward Space

    The reward is 3-dimensional:

    Environments

    @@ -234,11 +235,14 @@
  • MO-Mountaincar
  • MO-Mountaincarcontinuous
  • MO-Lunar-Lander
  • +
  • MO-Lunar-Lander-Continuous
  • Miscellaneous @@ -317,9 +321,10 @@
    -

    MO-Hopper#

    +

    MO-Hopper

    -../../_images/mo-hopper.gif +../../_images/mo-hopper.gif +
  • @@ -357,12 +362,15 @@

    MO-Hopper -

    Description#

    +

    Description

    Multi-objective version of the HopperEnv environment.

    See Gymnasium’s env for more information.

    +

    The original Gymnasium’s ‘Hopper-v4’ is recovered by the following linear scalarization:

    +

    env = mo_gym.make(‘mo-hopper-v4’, cost_objective=False) +LinearReward(env, weight=np.array([1.0, 0.0]))

    -

    Reward Space#

    +

    Reward Space

    The reward is 3-dimensional:

    Environments

    @@ -234,11 +235,14 @@
  • MO-Mountaincar
  • MO-Mountaincarcontinuous
  • MO-Lunar-Lander
  • +
  • MO-Lunar-Lander-Continuous
  • Miscellaneous @@ -317,9 +321,10 @@
    -

    MO-Humanoid#

    +

    MO-Humanoid

    -../../_images/mo-humanoid.gif +../../_images/mo-humanoid.gif +
  • @@ -357,12 +362,12 @@

    MO-Humanoid -

    Description#

    +

    Description

    Multi-objective version of the HumanoidEnv environment.

    See Gymnasium’s env for more information.

    -

    Reward Space#

    +

    Reward Space

    The reward is 2-dimensional:

    Environments

    -
    @@ -336,10 +341,10 @@

    MO-Lunar-Lander-Continuous

    - + - + @@ -351,18 +356,18 @@

    MO-Lunar-Lander-Continuous

    - +

    Observation High

    [1.5 1.5 5. 5. 3.14 5. 1. 1. ]

    [ 2.5 2.5 10. 10. 6.28 10. 1. 1. ]

    Observation Low

    [-1.5 -1.5 -5. -5. -3.14 -5. -0. -0. ]

    [ -2.5 -2.5 -10. -10. -6.28 -10. -0. -0. ]

    Reward Shape

    (4,)

    Import

    mo_gymnasium.make("mo-lunar-lander-continuous-v2")

    mo_gymnasium.make("mo-lunar-lander-continuous-v3")

    -

    Description#

    +

    Description

    Multi-objective version of the LunarLander environment.

    See Gymnasium’s env for more information.

    -

    Reward Space#

    +

    Reward Space

    The reward is 4-dimensional:

    Environments

    @@ -234,11 +235,14 @@
  • MO-Mountaincar
  • MO-Mountaincarcontinuous
  • MO-Lunar-Lander
  • +
  • MO-Lunar-Lander-Continuous
  • Miscellaneous @@ -317,9 +321,10 @@
    -

    MO-Lunar-Lander#

    +

    MO-Lunar-Lander

    -../../_images/mo-lunar-lander.gif +../../_images/mo-lunar-lander.gif +
    @@ -336,10 +341,10 @@

    MO-Lunar-Lander

    - + - + @@ -351,18 +356,18 @@

    MO-Lunar-Lander

    - +

    Observation High

    [1.5 1.5 5. 5. 3.14 5. 1. 1. ]

    [ 2.5 2.5 10. 10. 6.28 10. 1. 1. ]

    Observation Low

    [-1.5 -1.5 -5. -5. -3.14 -5. -0. -0. ]

    [ -2.5 -2.5 -10. -10. -6.28 -10. -0. -0. ]

    Reward Shape

    (4,)

    Import

    mo_gymnasium.make("mo-lunar-lander-v2")

    mo_gymnasium.make("mo-lunar-lander-v3")

    -

    Description#

    +

    Description

    Multi-objective version of the LunarLander environment.

    See Gymnasium’s env for more information.

    -

    Reward Space#

    +

    Reward Space

    The reward is 4-dimensional:

    • 0: -100 if crash, +100 if lands successfully

    • @@ -378,12 +383,12 @@

      Reward Space - +
      Next
      -
      Miscellaneous
      +
      MO-Lunar-Lander-Continuous
      @@ -467,8 +472,8 @@

      Reward Space - + + diff --git a/environments/mo-mountaincar-3d/index.html b/environments/mo-mountaincar-3d/index.html new file mode 100644 index 00000000..35045fa1 --- /dev/null +++ b/environments/mo-mountaincar-3d/index.html @@ -0,0 +1,662 @@ + + + + + + + + + + + + + + + MO-Mountaincar-3D - MO-Gymnasium Documentation + + + + + + + + + +
      + +
      + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + +
      + + +
      + + + + + +
      +
      +
      + + + + + Back to top + +
      + +
      + +
      +
      + +
      +

      MO-Mountaincar-3D

      +
      +_static/videos/mo-mountaincar-3d.gif + +
      +
      + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

      Action Space

      Discrete(3)

      Observation Shape

      (2,)

      Observation High

      [0.6 0.07]

      Observation Low

      [-1.2 -0.07]

      Reward Shape

      (3,)

      Reward High

      [-1. 0. 1.1]

      Reward Low

      [-1. -1. 0.]

      Import

      mo_gymnasium.make("mo-mountaincar-3d-v0")

      +
      +

      A multi-objective version of the MountainCar environment, where the goal is to reach the top of the mountain.

      +

      See Gymnasium’s env for more information.

      +
      +

      Reward space:

      +

      By default, the reward space is a 3D vector containing the time penalty, and penalties for reversing and going forward.

      +
        +
      • time penalty: -1.0 for each time step

      • +
      • reverse penalty: -1.0 for each time step the action is 0 (reverse)

      • +
      • forward penalty: -1.0 for each time step the action is 2 (forward)

      • +
      +

      #Alternatively, the reward can be changed with the following options:

      +
        +
      • add_speed_objective: Add an extra objective corresponding to the speed of the car.

      • +
      • remove_move_penalty: Remove the reverse and forward objectives.

      • +
      • merge_move_penalty: Merge reverse and forward penalties into a single penalty.

      • +
      +
      +
      + +
      +
      +
      + + +
      +
      + + +
      +
      + +
      +
      + +
      +
      + +
      +
      +
      + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/environments/mo-mountaincar-timemove/index.html b/environments/mo-mountaincar-timemove/index.html new file mode 100644 index 00000000..8e9b9c9d --- /dev/null +++ b/environments/mo-mountaincar-timemove/index.html @@ -0,0 +1,662 @@ + + + + + + + + + + + + + + + MO-Mountaincar-Timemove - MO-Gymnasium Documentation + + + + + + + + + +
      +
      +
      + +
      + +
      +
      + + +
      +
      +
      +
      + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + +
      + + +
      + + + + + +
      +
      +
      + + + + + Back to top + +
      + +
      + +
      +
      + +
      +

      MO-Mountaincar-Timemove

      +
      +_static/videos/mo-mountaincar-timemove.gif + +
      +
      + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

      Action Space

      Discrete(3)

      Observation Shape

      (2,)

      Observation High

      [0.6 0.07]

      Observation Low

      [-1.2 -0.07]

      Reward Shape

      (2,)

      Reward High

      [-1. 0.]

      Reward Low

      [-1. -1.]

      Import

      mo_gymnasium.make("mo-mountaincar-timemove-v0")

      +
      +

      A multi-objective version of the MountainCar environment, where the goal is to reach the top of the mountain.

      +

      See Gymnasium’s env for more information.

      +
      +

      Reward space:

      +

      By default, the reward space is a 3D vector containing the time penalty, and penalties for reversing and going forward.

      +
        +
      • time penalty: -1.0 for each time step

      • +
      • reverse penalty: -1.0 for each time step the action is 0 (reverse)

      • +
      • forward penalty: -1.0 for each time step the action is 2 (forward)

      • +
      +

      #Alternatively, the reward can be changed with the following options:

      +
        +
      • add_speed_objective: Add an extra objective corresponding to the speed of the car.

      • +
      • remove_move_penalty: Remove the reverse and forward objectives.

      • +
      • merge_move_penalty: Merge reverse and forward penalties into a single penalty.

      • +
      +
      +
      + +
      +
      +
      + + +
      +
      + + +
      +
      + +
      +
      + +
      +
      + +
      +
      +
      + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/environments/mo-mountaincar-timespeed/index.html b/environments/mo-mountaincar-timespeed/index.html new file mode 100644 index 00000000..cb088892 --- /dev/null +++ b/environments/mo-mountaincar-timespeed/index.html @@ -0,0 +1,662 @@ + + + + + + + + + + + + + + + MO-Mountaincar-Timespeed - MO-Gymnasium Documentation + + + + + + + + + +
      +
      +
      + +
      + +
      +
      + + +
      +
      +
      +
      + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + +
      + + +
      + + + + + +
      +
      +
      + + + + + Back to top + +
      + +
      + +
      +
      + +
      +

      MO-Mountaincar-Timespeed

      +
      +_static/videos/mo-mountaincar-timespeed.gif + +
      +
      + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

      Action Space

      Discrete(3)

      Observation Shape

      (2,)

      Observation High

      [0.6 0.07]

      Observation Low

      [-1.2 -0.07]

      Reward Shape

      (2,)

      Reward High

      [-1. 1.1]

      Reward Low

      [-1. 0.]

      Import

      mo_gymnasium.make("mo-mountaincar-timespeed-v0")

      +
      +

      A multi-objective version of the MountainCar environment, where the goal is to reach the top of the mountain.

      +

      See Gymnasium’s env for more information.

      +
      +

      Reward space:

      +

      By default, the reward space is a 3D vector containing the time penalty, and penalties for reversing and going forward.

      +
        +
      • time penalty: -1.0 for each time step

      • +
      • reverse penalty: -1.0 for each time step the action is 0 (reverse)

      • +
      • forward penalty: -1.0 for each time step the action is 2 (forward)

      • +
      +

      #Alternatively, the reward can be changed with the following options:

      +
        +
      • add_speed_objective: Add an extra objective corresponding to the speed of the car.

      • +
      • remove_move_penalty: Remove the reverse and forward objectives.

      • +
      • merge_move_penalty: Merge reverse and forward penalties into a single penalty.

      • +
      +
      +
      + +
      +
      +
      + + +
      +
      + + +
      +
      + +
      +
      + +
      +
      + +
      +
      +
      + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/environments/mo-mountaincar/index.html b/environments/mo-mountaincar/index.html index ed96e5c4..21fefc46 100644 --- a/environments/mo-mountaincar/index.html +++ b/environments/mo-mountaincar/index.html @@ -11,7 +11,7 @@ - + MO-Mountaincar - MO-Gymnasium Documentation @@ -215,6 +215,7 @@
    • Install
    • API
    • Wrappers
    • +
    • Vector Wrappers
    • MORL Baselines

    Environments

    @@ -234,11 +235,14 @@
  • MO-Mountaincar
  • MO-Mountaincarcontinuous
  • MO-Lunar-Lander
  • +
  • MO-Lunar-Lander-Continuous
  • Miscellaneous @@ -317,9 +321,10 @@
    -

    MO-Mountaincar#

    +

    MO-Mountaincar

    -../../_images/mo-mountaincar.gif +../../_images/mo-mountaincar.gif +
    @@ -359,13 +364,19 @@

    MO-MountaincarGymnasium’s env for more information.

    -

    Reward space:#

    -

    The reward space is a 3D vector containing the time penalty, and penalties for reversing and going forward.

    +

    Reward space:

    +

    By default, the reward space is a 3D vector containing the time penalty, and penalties for reversing and going forward.

    • time penalty: -1.0 for each time step

    • reverse penalty: -1.0 for each time step the action is 0 (reverse)

    • forward penalty: -1.0 for each time step the action is 2 (forward)

    +

    #Alternatively, the reward can be changed with the following options:

    +
      +
    • add_speed_objective: Add an extra objective corresponding to the speed of the car.

    • +
    • remove_move_penalty: Remove the reverse and forward objectives.

    • +
    • merge_move_penalty: Merge reverse and forward penalties into a single penalty.

    • +
    @@ -462,8 +473,8 @@

    Reward space: - + + diff --git a/environments/mo-mountaincarcontinuous/index.html b/environments/mo-mountaincarcontinuous/index.html index 181ab110..0132d200 100644 --- a/environments/mo-mountaincarcontinuous/index.html +++ b/environments/mo-mountaincarcontinuous/index.html @@ -11,7 +11,7 @@ - + MO-Mountaincarcontinuous - MO-Gymnasium Documentation @@ -215,6 +215,7 @@
  • Install
  • API
  • Wrappers
  • +
  • Vector Wrappers
  • MORL Baselines
  • Environments

    @@ -234,11 +235,14 @@
  • MO-Mountaincar
  • MO-Mountaincarcontinuous
  • MO-Lunar-Lander
  • +
  • MO-Lunar-Lander-Continuous
  • Miscellaneous @@ -317,9 +321,10 @@
    -

    MO-Mountaincarcontinuous#

    +

    MO-Mountaincarcontinuous

    -../../_images/mo-mountaincarcontinuous.gif +../../_images/mo-mountaincarcontinuous.gif +
  • @@ -359,7 +364,7 @@

    MO-Mountaincarcontinuoussource for more information.

    -

    Reward space:#

    +

    Reward space:

    The reward space is a 2D vector containing the time penalty and the fuel reward.

    Environments

    @@ -234,11 +235,14 @@
  • MO-Mountaincar
  • MO-Mountaincarcontinuous
  • MO-Lunar-Lander
  • +
  • MO-Lunar-Lander-Continuous
  • Miscellaneous @@ -317,9 +321,10 @@
    -

    MO-Reacher#

    +

    MO-Reacher

    -../../_images/mo-reacher.gif +../../_images/mo-reacher.gif +
  • @@ -357,11 +362,11 @@

    MO-Reacher -

    Description#

    -

    Mujoco version of mo-reacher-v0, based on Reacher-v4 environment.

    +

    Description

    +

    Multi-objective version of the Reacher-v4 environment.

    -

    Observation Space#

    +

    Observation Space

    The observation is 6-dimensional and contains:

    • sin and cos of the angles of the central and elbow joints

    • @@ -369,11 +374,11 @@

      Observation Space -

      Action Space#

      +

      Action Space

      The action space is discrete and contains the 3^2=9 possible actions based on applying positive (+1), negative (-1) or zero (0) torque to each of the two joints.

    -

    Reward Space#

    +

    Reward Space

    The reward is 4-dimensional and is defined based on the distance of the tip of the arm and the four target locations. For each i={1,2,3,4} it is computed as:

    @@ -357,12 +362,12 @@

    MO-Supermario -

    Description#

    +

    Description

    Multi-objective version of the SuperMarioBro environment.

    See gym-super-mario-bros for more information.

    -

    Reward Space#

    +

    Reward Space

    The reward is a 5-dimensional vector:

    • 0: How far Mario moved in the x position

    • @@ -373,7 +378,7 @@

      Reward Space -

      Episode Termination#

      +

      Episode Termination

      The episode terminates when Mario dies or reaches the flag.

    @@ -473,8 +478,8 @@

    Episode Termination - + + diff --git a/environments/mo-swimmer/index.html b/environments/mo-swimmer/index.html index 1debafa9..48809779 100644 --- a/environments/mo-swimmer/index.html +++ b/environments/mo-swimmer/index.html @@ -11,7 +11,7 @@ - + MO-Swimmer - MO-Gymnasium Documentation @@ -215,6 +215,7 @@
  • Install
  • API
  • Wrappers
  • +
  • Vector Wrappers
  • MORL Baselines
  • Environments

    @@ -234,11 +235,14 @@
  • MO-Mountaincar
  • MO-Mountaincarcontinuous
  • MO-Lunar-Lander
  • +
  • MO-Lunar-Lander-Continuous
  • Miscellaneous @@ -317,9 +321,10 @@
    -

    MO-Swimmer#

    +

    MO-Swimmer

    -../../_images/mo-swimmer.gif +../../_images/mo-swimmer.gif +
  • @@ -357,12 +362,15 @@

    MO-Swimmer -

    Description#

    +

    Description

    Multi-objective version of the SwimmerEnv environment.

    See Gymnasium’s env for more information.

    +

    The original Gymnasium’s ‘Swimmer-v4’ is recovered by the following linear scalarization:

    +

    env = mo_gym.make(‘mo-swimmer-v4’) +LinearReward(env, weight=np.array([1.0, 1e-4]))

    -

    Reward Space#

    +

    Reward Space

    The reward is 2-dimensional:

    Environments

    @@ -234,11 +235,14 @@
  • MO-Mountaincar
  • MO-Mountaincarcontinuous
  • MO-Lunar-Lander
  • +
  • MO-Lunar-Lander-Continuous
  • Miscellaneous @@ -317,9 +321,10 @@
    -

    MO-Walker2D#

    +

    MO-Walker2D

    -../../_images/mo-walker2d.gif +../../_images/mo-walker2d.gif +
  • @@ -357,12 +362,12 @@

    MO-Walker2D -

    Description#

    +

    Description

    Multi-objective version of the Walker2dEnv environment.

    See Gymnasium’s env for more information.

    -

    Reward Space#

    +

    Reward Space

    The reward is 2-dimensional:

    Environments

    @@ -234,11 +235,14 @@
  • MO-Mountaincar
  • MO-Mountaincarcontinuous
  • MO-Lunar-Lander
  • +
  • MO-Lunar-Lander-Continuous
  • Miscellaneous @@ -326,7 +330,7 @@
    -

    MuJoCo#

    +

    MuJoCo

    Multi-objective versions of Mujoco environments.

  • @@ -466,8 +470,8 @@

    MuJoCo# - + + diff --git a/environments/resource-gathering/index.html b/environments/resource-gathering/index.html index ec85bd29..11289649 100644 --- a/environments/resource-gathering/index.html +++ b/environments/resource-gathering/index.html @@ -11,7 +11,7 @@ - + Resource-Gathering - MO-Gymnasium Documentation @@ -215,6 +215,7 @@
  • Install
  • API
  • Wrappers
  • +
  • Vector Wrappers
  • MORL Baselines
  • Environments

    @@ -234,11 +235,14 @@
  • MO-Mountaincar
  • MO-Mountaincarcontinuous
  • MO-Lunar-Lander
  • +
  • MO-Lunar-Lander-Continuous
  • Miscellaneous @@ -317,9 +321,10 @@
    -

    Resource-Gathering#

    +

    Resource-Gathering

    -../../_images/resource-gathering.gif +../../_images/resource-gathering.gif +
  • @@ -357,12 +362,12 @@

    Resource-Gathering -

    Description#

    +

    Description

    From “Barrett, Leon & Narayanan, Srini. (2008). Learning all optimal policies with multiple criteria. Proceedings of the 25th International Conference on Machine Learning. 41-47. 10.1145/1390156.1390162.”

    -

    Observation Space#

    +

    Observation Space

    The observation is discrete and consists of 4 elements:

    • 0: The x coordinate of the agent

    • @@ -372,7 +377,7 @@

      Observation Space -

      Action Space#

      +

      Action Space

      The action is discrete and consists of 4 elements:

      • 0: Move up

      • @@ -382,7 +387,7 @@

        Action Space -

        Reward Space#

        +

        Reward Space

        The reward is 3-dimensional:

        • 0: -1 if killed by an enemy, else 0

        • @@ -391,15 +396,15 @@

          Reward Space -

          Starting State#

          +

          Starting State

          The agent starts at the home position with no gold or diamond.

    -

    Episode Termination#

    +

    Episode Termination

    The episode terminates when the agent returns home, or when the agent is killed by an enemy.

    -

    Credits#

    +

    Credits

    The home asset is from https://limezu.itch.io/serenevillagerevamped The gold, enemy and gem assets are from https://ninjikin.itch.io/treasure

    @@ -504,8 +509,8 @@

    Credits#< - - + + diff --git a/environments/water-reservoir/index.html b/environments/water-reservoir/index.html index a3b7dbb0..1168e99d 100644 --- a/environments/water-reservoir/index.html +++ b/environments/water-reservoir/index.html @@ -11,7 +11,7 @@ - + Water-Reservoir - MO-Gymnasium Documentation @@ -215,6 +215,7 @@
  • Install
  • API
  • Wrappers
  • +
  • Vector Wrappers
  • MORL Baselines
  • Environments

    @@ -234,11 +235,14 @@
  • MO-Mountaincar
  • MO-Mountaincarcontinuous
  • MO-Lunar-Lander
  • +
  • MO-Lunar-Lander-Continuous
  • Miscellaneous @@ -317,9 +321,10 @@
    -

    Water-Reservoir#

    +

    Water-Reservoir

    -../../_images/water-reservoir.gif +../../_images/water-reservoir.gif +
  • @@ -357,7 +362,7 @@

    Water-Reservoir -

    Description#

    +

    Description

    A Water reservoir environment. The agent executes a continuous action, corresponding to the amount of water released by the dam.

    A. Castelletti, F. Pianosi and M. Restelli, “Tree-based Fitted Q-iteration for Multi-Objective Markov Decision problems,” @@ -365,16 +370,16 @@

    Description -

    Observation Space#

    +

    Observation Space

    The observation is a float corresponding to the current level of the reservoir.

    -

    Action Space#

    +

    Action Space

    The action is a float corresponding to the amount of water released by the dam. If normalized_action is True, the action is a float between 0 and 1 corresponding to the percentage of water released by the dam.

    -

    Reward Space#

    +

    Reward Space

    There are up to 4 rewards:

    • cost due to excess level wrt a flooding threshold (upstream)

    • @@ -385,11 +390,11 @@

      Reward Space -

      Starting State#

      +

      Starting State

      The reservoir is initialized with a random level between 0 and 160.

    -

    Arguments#

    +

    Arguments

    - render_mode: The render mode to use. Can be 'human', 'rgb_array' or 'ansi'.
     - time_limit: The maximum number of steps until the episode is truncated.
     - nO: The number of objectives to use. Can be 2, 3 or 4.
    @@ -400,7 +405,7 @@ 

    Arguments -

    Credits#

    +

    Credits

    Code from: Mathieu Reymond. Ported from: @@ -508,8 +513,8 @@

    Credits#< - - + + diff --git a/examples/citation/index.html b/examples/citation/index.html index baae2629..89d6c0c3 100644 --- a/examples/citation/index.html +++ b/examples/citation/index.html @@ -11,7 +11,7 @@ - + Citation - MO-Gymnasium Documentation @@ -215,6 +215,7 @@
  • Install
  • API
  • Wrappers
  • +
  • Vector Wrappers
  • MORL Baselines
  • Environments

    @@ -234,11 +235,14 @@
  • MO-Mountaincar
  • MO-Mountaincarcontinuous
  • MO-Lunar-Lander
  • +
  • MO-Lunar-Lander-Continuous
  • Miscellaneous @@ -326,7 +330,7 @@
    -

    Citation#

    +

    Citation

    If you use this repository in your research, please cite:

  • Miscellaneous @@ -326,7 +330,7 @@
  • +
    Vector Wrappers
    @@ -416,8 +420,8 @@

    MORL Baselines - + + diff --git a/examples/publications/index.html b/examples/publications/index.html index 679e9a25..04305791 100644 --- a/examples/publications/index.html +++ b/examples/publications/index.html @@ -11,7 +11,7 @@ - + List of Publications - MO-Gymnasium Documentation @@ -215,6 +215,7 @@
  • Install
  • API
  • Wrappers
  • +
  • Vector Wrappers
  • MORL Baselines
  • Environments

    @@ -234,11 +235,14 @@
  • MO-Mountaincar
  • MO-Mountaincarcontinuous
  • MO-Lunar-Lander
  • +
  • MO-Lunar-Lander-Continuous
  • Miscellaneous @@ -326,7 +330,7 @@
    -

    List of Publications#

    +

    List of Publications

    MO-Gymnasium (formerly MO-Gym) was first published in:

    Environments

    @@ -232,11 +233,14 @@
  • MO-Mountaincar
  • MO-Mountaincarcontinuous
  • MO-Lunar-Lander
  • +
  • MO-Lunar-Lander-Continuous
  • Miscellaneous @@ -323,7 +327,7 @@

    Index

    L

  • @@ -333,15 +337,21 @@

    L

    M

    @@ -412,8 +422,8 @@

    M

    - - + + diff --git a/index.html b/index.html index 899ecde3..6b7864c4 100644 --- a/index.html +++ b/index.html @@ -11,7 +11,7 @@ - + MO-Gymnasium Documentation @@ -215,6 +215,7 @@
  • Install
  • API
  • Wrappers
  • +
  • Vector Wrappers
  • MORL Baselines
  • Environments

    @@ -234,11 +235,14 @@
  • MO-Mountaincar
  • MO-Mountaincarcontinuous
  • MO-Lunar-Lander
  • +
  • MO-Lunar-Lander-Continuous
  • Miscellaneous @@ -344,12 +348,13 @@

    -MO-HalfCheetah gif +MO-HalfCheetah gif +

    MO-Gymnasium is an open source Python library for developing and comparing multi-objective reinforcement learning algorithms by providing a standard API to communicate between learning algorithms and environments, as well as a standard set of environments compliant with that API. Essentially, the environments follow the standard Gymnasium API, but return vectorized rewards as numpy arrays.

    The documentation website is at mo-gymnasium.farama.org, and we have a public discord server (which we also use to coordinate development work) that you can join here: https://discord.gg/bnJ6kubTg6.

    -

    API#

    +

    API

    As for Gymnasium, the MO-Gymnasium API models environments as simple Python env classes. Creating environment instances and interacting with them is very simple - here’s an example using the “minecart-v0” environment:

    import gymnasium as gym
     import mo_gymnasium as mo_gym
    @@ -363,14 +368,14 @@ 

    API#

    next_obs, vector_reward, terminated, truncated, info = env.step(your_agent.act(obs)) # Optionally, you can scalarize the reward function with the LinearReward wrapper -env = mo_gym.LinearReward(env, weight=np.array([0.8, 0.2, 0.2])) +env = mo_gym.wrappers.LinearReward(env, weight=np.array([0.8, 0.2, 0.2]))

    For details on multi-objective MDP’s (MOMDP’s) and other MORL definitions, see A practical guide to multi-objective reinforcement learning and planning.

    You can also check more examples in this colab notebook! MO-Gym Demo in Colab

    -

    Install#

    +

    Install

    To install MO-Gymnasium, use:

    -

    Citing#

    +

    Citing

    If you use this repository in your research, please cite:

    @inproceedings{felten_toolkit_2023,
     	author = {Felten, Florian and Alegre, Lucas N. and Now{\'e}, Ann and Bazzan, Ana L. C. and Talbi, El Ghazali and Danoy, Gr{\'e}goire and Silva, Bruno C. {\relax da}},
    @@ -454,8 +459,8 @@ 

    Citing# - + + diff --git a/introduction/api/index.html b/introduction/api/index.html index 9f36acf2..91ab4264 100644 --- a/introduction/api/index.html +++ b/introduction/api/index.html @@ -11,7 +11,7 @@ - + API - MO-Gymnasium Documentation @@ -215,6 +215,7 @@
  • Install
  • API
  • Wrappers
  • +
  • Vector Wrappers
  • MORL Baselines
  • Environments

    @@ -234,11 +235,14 @@
  • MO-Mountaincar
  • MO-Mountaincarcontinuous
  • MO-Lunar-Lander
  • +
  • MO-Lunar-Lander-Continuous
  • Miscellaneous @@ -326,7 +330,7 @@
    -

    API#

    +

    API

    As for Gymnasium, the MO-Gymnasium API models environments as simple Python env classes. Creating environment instances and interacting with them is very simple - here’s an example using the “minecart-v0” environment:

    import gymnasium as gym
     import mo_gymnasium as mo_gym
    @@ -340,7 +344,7 @@ 

    API#

    next_obs, vector_reward, terminated, truncated, info = env.step(your_agent.act(obs)) # Optionally, you can scalarize the reward function with the LinearReward wrapper -env = mo_gym.LinearReward(env, weight=np.array([0.8, 0.2, 0.2])) +env = mo_gym.wrappers.LinearReward(env, weight=np.array([0.8, 0.2, 0.2]))

    For details on multi-objective MDP’s (MOMDP’s) and other MORL definitions, see A practical guide to multi-objective reinforcement learning and planning.

    @@ -421,8 +425,8 @@

    API#

    - - + + diff --git a/introduction/install/index.html b/introduction/install/index.html index 549b43b3..d09f6d3b 100644 --- a/introduction/install/index.html +++ b/introduction/install/index.html @@ -11,7 +11,7 @@ - + Install - MO-Gymnasium Documentation @@ -215,6 +215,7 @@
  • Install
  • API
  • Wrappers
  • +
  • Vector Wrappers
  • MORL Baselines
  • Environments

    @@ -234,11 +235,14 @@
  • MO-Mountaincar
  • MO-Mountaincarcontinuous
  • MO-Lunar-Lander
  • +
  • MO-Lunar-Lander-Continuous
  • Miscellaneous @@ -326,7 +330,7 @@
    -

    Install#

    +

    Install

    To install MO-Gymnasium, use:

  • Miscellaneous @@ -326,9 +330,54 @@
    -

    Release Notes#

    +

    Release Notes

    +
    +

    v1.2.0: MO-Gymnasium 1.2.0 Release: Update Gymnasium to v1.0.0, New Mountaincar Environments, Documentation and Test Improvements, and more

    +

    Released on 2024-10-16 - GitHub - PyPI

    +

    Breaking Changes

    +
      +
    • Similar to Gymnasium v1.0, VecEnvs now differ from normal Envs. The associated wrappers also differ. See Gymnasium 1.0.0 release notes.
    • +
    • Wrappers have been moved to their wrappers subpackage, e.g., from mo_gymnasium import MORecordEpisodeStatistics -> from mo_gymnasium.wrappers import MORecordEpisodeStatistics. Vector wrappers can be found under mo-gymnasium.wrappers.vector. See the tests/ folder or our documentation for example usage.
    • +
    +

    Environments

    + +

    Documentation and Tests

    + +

    Bug Fixes

    + +

    New Contributors

    + +

    Full Changelog: v1.1.0...v1.2.0

    -

    v1.1.0: MO-Gymnasium 1.1.0 Release: New MuJoCo environments, Mirrored Deep Sea Treasure, Fruit Tree rendering, and more#

    +

    v1.1.0: MO-Gymnasium 1.1.0 Release: New MuJoCo environments, Mirrored Deep Sea Treasure, Fruit Tree rendering, and more

    Released on 2024-03-11 - GitHub - PyPI

    Environments

      @@ -352,7 +401,7 @@

      Bug fixes

    Full Changelog: v1.0.1...v1.1.0

    -

    v1.0.1: MO-Gymnasium 1.0.1 Release: Support Gymnasium 0.29, breakable-bottles pygame render, and more#

    +

    v1.0.1: MO-Gymnasium 1.0.1 Release: Support Gymnasium 0.29, breakable-bottles pygame render, and more

    Released on 2023-08-24 - GitHub - PyPI

    Environments

      @@ -379,7 +428,7 @@

      Bug fixes

    Full Changelog: v1.0.0...v1.0.1

    -

    v1.0.0: MO-Gymnasium becomes mature#

    +

    v1.0.0: MO-Gymnasium becomes mature

    Released on 2023-06-12 - GitHub - PyPI

    MO-Gymnasium 1.0.0 Release Notes

    We are thrilled to introduce the mature release of MO-Gymnasium, a standardized API and collection of environments designed for Multi-Objective Reinforcement Learning (MORL).

    @@ -391,7 +440,7 @@

    MO-Gymnasium 1.0.0 Release Notes

    reward = forward_reward - ctrl_cost

    With MORL, users have the flexibility to determine the compromises they desire based on their preferences for each objective. Consequently, the environments in MO-Gymnasium do not have predefined weights. Thus, MO-Gymnasium extends the capabilities of Gymnasium to the multi-objective setting, where the agents receives a vectorial reward.

    For example, here is an illustration of the multiple policies learned by an MORL agent for the mo-halfcheetah domain, balancing between saving battery and speed:

    - +

    This release marks the first mature version of MO-Gymnasium within Farama, indicating that the API is stable, and we have achieved a high level of quality in this library.

    API

    v0.3.4...v1.0.0

    -

    v0.3.4: MO-Gymnasium 0.3.4 Release: Known Pareto Front, improved renders and documentation#

    +

    v0.3.4: MO-Gymnasium 0.3.4 Release: Known Pareto Front, improved renders and documentation

    Released on 2023-03-14 - GitHub - PyPI

    Changelogs

    Environments

    @@ -470,7 +519,7 @@

    New Contributors

    Full Changelog: v0.3.3...0.3.4

    -

    v0.3.3: MO-Gymnasium 0.3.3 Release: Policy Evaluation bug fix, better documentation page#

    +

    v0.3.3: MO-Gymnasium 0.3.3 Release: Policy Evaluation bug fix, better documentation page

    Released on 2023-02-13 - GitHub - PyPI

    New improvements/features

      @@ -489,7 +538,7 @@

      Documentation

    Full Changelog: 0.3.2...v0.3.3

    -

    MO-Gymnasium 0.3.2 Release: Bug fixes, improved webpage#

    +

    MO-Gymnasium 0.3.2 Release: Bug fixes, improved webpage

    Released on 2023-02-03 - GitHub - PyPI

    Bug fixes

      @@ -502,7 +551,7 @@

      Documentation

    Full Changelog: 0.3.1...0.3.2

    -

    MO-Gymnasium 0.3.1 Release: Improved documentation and MuJoco MO-Reacher environment#

    +

    MO-Gymnasium 0.3.1 Release: Improved documentation and MuJoco MO-Reacher environment

    Released on 2023-02-02 - GitHub - PyPI

    This minor release adds "mo-reacher-v4", a MuJoco version of the Reacher environment, fixes a bug in Lunar Lander and improves the library documentation.

    Environments

    @@ -521,7 +570,7 @@

    Bug Fixes

    Full Changelog: 0.3.0...0.3.1

    -

    MO-Gymnasium 0.3.0 Release: Migrate to Gymnasium#

    +

    MO-Gymnasium 0.3.0 Release: Migrate to Gymnasium

    Released on 2023-01-23 - GitHub - PyPI

    This release marks our first release as part of the Farama Foundation. Benefitting the Farama structure, this library should reach a higher level of quality and more integration with the tools from the RL community.

    Breaking changes

    @@ -544,7 +593,7 @@

    Community

    Full Changelog: 0.2.1...0.3.0

    -

    0.2.1#

    +

    0.2.1

    Released on 2022-12-09 - GitHub - PyPI

    • 5 new environments: fishwood-v0 (ESR), mo-MountainCarContinuous-v0, water-reservoir-v0, mo-highway-v0 and mo-highway-fast-v0;
    • @@ -554,15 +603,15 @@

      0.2.1 -

      0.2.0#

      +

      0.2.0

      Released on 2022-09-25 - GitHub - PyPI

      Support for new Gym>=0.26 API

    -

    0.1.2#

    +

    0.1.2

    Released on 2022-09-25 - GitHub - PyPI

    -

    0.1.1#

    +

    0.1.1

    Released on 2022-08-24 - GitHub - PyPI

    @@ -615,6 +664,7 @@

    0.1.1

  • Miscellaneous @@ -389,8 +393,8 @@ - - + + diff --git a/searchindex.js b/searchindex.js index c35fa381..9ee85872 100644 --- a/searchindex.js +++ b/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["404", "README", "citing/citing", "community/community", "environments/breakable-bottles", "environments/classical", "environments/deep-sea-treasure", "environments/deep-sea-treasure-concave", "environments/deep-sea-treasure-mirrored", "environments/fishwood", "environments/four-room", "environments/fruit-tree", "environments/grid-world", "environments/minecart", "environments/minecart-deterministic", "environments/minecart-rgb", "environments/misc", "environments/mo-ant", "environments/mo-ant-2d", "environments/mo-halfcheetah", "environments/mo-highway", "environments/mo-highway-fast", "environments/mo-hopper", "environments/mo-hopper-2d", "environments/mo-humanoid", "environments/mo-lunar-lander", "environments/mo-lunar-lander-continuous", "environments/mo-mountaincar", "environments/mo-mountaincarcontinuous", "environments/mo-reacher", "environments/mo-supermario", "environments/mo-swimmer", "environments/mo-walker2d", "environments/mujoco", "environments/resource-gathering", "environments/water-reservoir", "examples/citation", "examples/morl_baselines", "examples/publications", "index", "introduction/api", "introduction/install", "release_notes", "tutorials/custom_env", "wrappers/wrappers"], "filenames": ["404.md", "README.md", "citing/citing.md", "community/community.md", "environments/breakable-bottles.md", "environments/classical.md", "environments/deep-sea-treasure.md", "environments/deep-sea-treasure-concave.md", "environments/deep-sea-treasure-mirrored.md", "environments/fishwood.md", "environments/four-room.md", "environments/fruit-tree.md", "environments/grid-world.md", "environments/minecart.md", "environments/minecart-deterministic.md", "environments/minecart-rgb.md", "environments/misc.md", "environments/mo-ant.md", "environments/mo-ant-2d.md", "environments/mo-halfcheetah.md", "environments/mo-highway.md", "environments/mo-highway-fast.md", "environments/mo-hopper.md", "environments/mo-hopper-2d.md", "environments/mo-humanoid.md", "environments/mo-lunar-lander.md", "environments/mo-lunar-lander-continuous.md", "environments/mo-mountaincar.md", "environments/mo-mountaincarcontinuous.md", "environments/mo-reacher.md", "environments/mo-supermario.md", "environments/mo-swimmer.md", "environments/mo-walker2d.md", "environments/mujoco.md", "environments/resource-gathering.md", "environments/water-reservoir.md", "examples/citation.md", "examples/morl_baselines.md", "examples/publications.md", "index.md", "introduction/api.md", "introduction/install.md", "release_notes.md", "tutorials/custom_env.md", "wrappers/wrappers.md"], "titles": ["404 - Page Not Found", "MO-Gymnasium documentation", "<no title>", "Community", "Breakable-Bottles", "Classic Control", "Deep-Sea-Treasure", "Deep-Sea-Treasure-Concave", "Deep-Sea-Treasure-Mirrored", "Fishwood", "Four-Room", "Fruit-Tree", "Grid-World", "Minecart", "Minecart-Deterministic", "Minecart-Rgb", "Miscellaneous", "MO-Ant", "MO-Ant-2D", "MO-Halfcheetah", "MO-Highway", "MO-Highway-Fast", "MO-Hopper", "MO-Hopper-2D", "MO-Humanoid", "MO-Lunar-Lander", "MO-Lunar-Lander-Continuous", "MO-Mountaincar", "MO-Mountaincarcontinuous", "MO-Reacher", "MO-Supermario", "MO-Swimmer", "MO-Walker2D", "MuJoCo", "Resource-Gathering", "Water-Reservoir", "Citation", "MORL Baselines", "List of Publications", "API", "API", "Install", "Release Notes", "Creating a custom environment", "Wrappers"], "terms": {"thi": [1, 4, 36, 39, 40, 41, 42, 43, 44], "folder": 1, "contain": [1, 4, 10, 11, 12, 13, 14, 15, 27, 28, 29, 37], "For": [1, 29, 39, 40, 42, 44], "more": [1, 13, 14, 15, 17, 18, 19, 20, 22, 23, 24, 25, 26, 27, 28, 30, 31, 32, 39, 40], "inform": [1, 13, 14, 15, 17, 18, 19, 20, 22, 23, 24, 25, 26, 27, 28, 30, 31, 32, 36, 39], "about": 1, "how": [1, 13, 14, 15, 30, 37], "contribut": [1, 3, 4, 42], "go": [1, 9, 11, 22, 23, 27], "our": [1, 42, 43], "md": [1, 42], "instal": 1, "requir": 1, "packag": [1, 42], "your": [1, 36, 39], "fork": 1, "pip": [1, 39, 41], "r": [1, 44], "doc": [1, 42], "txt": 1, "e": [1, 2, 4, 6, 7, 8, 12, 28, 36, 39, 42], "To": [1, 6, 7, 8, 39, 41], "onc": 1, "cd": 1, "make": [1, 4, 6, 7, 8, 9, 10, 11, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 39, 40, 42, 43, 44], "dirhtml": 1, "rebuild": 1, "automat": [1, 42, 43], "everi": [1, 11, 12], "time": [1, 4, 6, 7, 8, 12, 13, 14, 15, 16, 27, 28, 30, 44], "chang": [1, 6, 7, 8, 42], "i": [1, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 37, 39, 40, 42, 43, 44], "made": [1, 42], "sphinx": 1, "autobuild": 1, "b": 1, "_build": 1, "mo": [2, 3, 5, 13, 14, 15, 16, 33, 37, 38, 39, 40, 41, 43, 44], "gymnasium": [2, 5, 13, 14, 15, 16, 17, 18, 19, 22, 23, 24, 25, 26, 27, 31, 32, 37, 38, 39, 40, 41, 43, 44], "formerli": [2, 38], "gym": [2, 30, 38, 39, 40, 42], "appear": 2, "first": [2, 13, 14, 15, 35, 38, 42, 44], "follow": [2, 10, 13, 14, 15, 39, 40, 42, 43], "workshop": [2, 38], "public": [2, 39], "inproceed": [2, 36, 39], "alegr": [2, 12, 36, 38, 39], "2022bnaic": 2, "author": [2, 36, 39], "luca": [2, 36, 39], "n": [2, 36, 39], "florian": [2, 36, 39], "felten": [2, 12, 36, 38, 39], "el": [2, 36, 39], "ghazali": [2, 36, 39], "talbi": [2, 36, 39], "gr": [2, 36, 39], "goir": [2, 36, 39], "danoi": [2, 36, 39], "ann": [2, 36, 39], "now": [2, 36, 39, 42], "ana": [2, 36, 39], "l": [2, 36, 39, 44], "c": [2, 36, 39], "bazzan": [2, 36, 39], "bruno": [2, 36, 39], "relax": [2, 36, 39], "da": [2, 36, 39], "silva": [2, 36, 39], "titl": [2, 36, 39], "A": [2, 4, 10, 16, 17, 18, 21, 27, 28, 35, 36, 38, 39, 40, 44], "librari": [2, 38, 39, 42], "multi": [2, 5, 9, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 30, 31, 32, 33, 35, 36, 38, 39, 40, 42, 43], "object": [2, 5, 9, 12, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 30, 31, 32, 33, 35, 36, 38, 39, 40, 42, 43], "reinforc": [2, 4, 9, 10, 36, 38, 39, 40, 42], "learn": [2, 4, 9, 10, 34, 36, 38, 39, 40, 42], "environ": [2, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 38, 39, 40, 41, 44], "booktitl": [2, 36, 39], "proceed": [2, 34, 36, 39], "34th": 2, "benelux": 2, "confer": [2, 34, 35, 36, 39], "artifici": 2, "intellig": 2, "bnaic": [2, 38], "benelearn": 2, "2022": [2, 5, 12, 38, 42], "year": [2, 36, 39], "If": [3, 13, 14, 15, 17, 18, 22, 23, 35, 36, 39, 43], "you": [3, 36, 39, 40, 41, 42, 43], "want": 3, "help": 3, "u": 3, "out": [3, 35], "reach": [3, 6, 7, 8, 10, 11, 12, 16, 27, 28, 30, 42], "simpli": 3, "ask": 3, "question": 3, "can": [3, 4, 9, 11, 13, 14, 15, 35, 39, 40, 41, 42], "join": [3, 39], "farama": [3, 13, 14, 15, 39, 42], "discord": [3, 39], "server": [3, 39], "here": [3, 10, 39, 40, 42], "asid": 3, "from": [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 34, 35, 42, 43, 44], "main": [3, 13, 14, 15, 25, 26], "contributor": [3, 42], "some": [3, 39, 41, 43], "peopl": 3, "have": [3, 4, 12, 39, 42], "also": [3, 16, 39, 40, 42], "project": 3, "variou": [3, 37], "wai": [3, 10], "we": [3, 39, 42, 43], "would": 3, "like": [3, 39, 41, 44], "thank": 3, "them": [3, 4, 39, 40], "all": [3, 17, 18, 34, 39, 41, 42, 44], "The": [3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 39, 42, 43], "minecart": [3, 16, 39, 40, 42], "v0": [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 20, 21, 27, 28, 29, 30, 33, 34, 35, 39, 40], "env": [3, 5, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 23, 24, 25, 26, 27, 31, 32, 33, 39, 40, 42, 43, 44], "refactor": [3, 13, 14, 15], "http": [3, 4, 6, 7, 8, 13, 14, 15, 34, 35, 39], "github": [3, 13, 14, 15, 42], "com": [3, 13, 14, 15], "axelabel": 3, "dynmorl": 3, "deep": [3, 12], "sea": [3, 12], "treasur": [3, 4, 12, 34], "fruit": [3, 12], "tree": [3, 12, 35], "supermario": [3, 16], "ar": [3, 4, 13, 14, 15, 16, 34, 35, 42, 43, 44], "base": [3, 4, 13, 14, 15, 29, 33, 35, 38, 42], "runzheyang": 3, "morl": [3, 6, 7, 8, 9, 39, 40, 42], "four": [3, 12, 29], "room": [3, 12], "mike": [3, 10], "gimelfarb": [3, 10], "successor": [3, 10], "featur": [3, 10, 42], "transfer": [3, 10], "fishwood": [3, 12, 42], "code": [3, 6, 7, 8, 9, 10, 13, 14, 15, 35, 42], "wa": [3, 4, 6, 7, 8, 13, 14, 15, 38, 42], "provid": [3, 9, 39, 42], "deni": [3, 9], "steckelmach": [3, 9], "conor": 3, "f": [3, 35], "hay": 3, "water": [3, 11, 12, 16, 42], "reservoir": [3, 16, 42], "mathieu": [3, 35], "reymond": [3, 35], "discret": [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 20, 21, 25, 27, 29, 30, 33, 34, 42], "3": [4, 6, 7, 8, 10, 13, 14, 15, 17, 18, 20, 21, 22, 23, 25, 26, 27, 29, 30, 34, 35], "dict": 4, "bottles_carri": 4, "bottles_deliv": [4, 12], "2": [4, 6, 7, 8, 9, 11, 13, 14, 15, 17, 18, 19, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 39, 40], "bottles_drop": 4, "multibinari": 4, "locat": [4, 12, 29], "5": [4, 11, 12, 13, 14, 15, 20, 21, 25, 26, 30, 34, 42], "shape": [4, 6, 7, 8, 9, 10, 11, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 43, 44], "high": [4, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 42], "0": [4, 6, 7, 8, 9, 10, 11, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 39, 40, 44], "50": [4, 42], "low": [4, 6, 7, 8, 9, 10, 11, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34, 35], "inf": [4, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 29, 30, 31, 32, 35], "1": [4, 6, 7, 8, 9, 10, 11, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 44], "import": [4, 6, 7, 8, 9, 10, 11, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 39, 40, 42, 43], "mo_gymnasium": [4, 6, 7, 8, 9, 10, 11, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 39, 40, 42, 43, 44], "implement": [4, 37, 43], "problem": [4, 6, 7, 8, 9, 35, 42], "unbreakablebottl": 4, "breakablebottl": 4, "defin": [4, 5, 16, 29, 43], "section": 4, "4": [4, 6, 7, 8, 10, 13, 14, 15, 24, 25, 26, 29, 30, 34, 35], "paper": 4, "potenti": [4, 12, 42], "multiobject": 4, "approach": 4, "impact": 4, "agent": [4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 34, 35, 42], "ai": 4, "safeti": 4, "move": [4, 30, 31, 34, 42], "left": [4, 6, 7, 8, 10, 11, 13, 14, 15, 34], "right": [4, 6, 7, 8, 10, 11, 13, 14, 15, 20, 34], "pick": 4, "up": [4, 6, 7, 8, 10, 34, 35], "dictionari": [4, 12], "kei": 4, "current": [4, 6, 7, 8, 35], "number": [4, 35], "carri": 4, "ha": [4, 30, 42], "deliv": [4, 12], "each": [4, 6, 7, 8, 11, 13, 14, 15, 27, 28, 29, 42], "boolean": [4, 44], "flag": [4, 17, 18, 22, 23, 30, 34, 43], "indic": [4, 10, 34, 42], "dimens": [4, 42], "penalti": [4, 5, 6, 7, 8, 12, 27, 28, 30], "step": [4, 6, 7, 8, 9, 27, 28, 30, 35, 38, 39, 40, 42, 43], "bottle_reward": 4, "while": [4, 10, 12, 16], "multipl": [4, 34, 37, 38, 42, 43, 44], "small": [4, 42], "probabl": [4, 9], "drop": [4, 42], "appli": [4, 29, 44], "ground": 4, "when": [4, 6, 7, 8, 9, 10, 11, 13, 14, 15, 30, 34, 42, 43], "size": [4, 11, 43], "prob_drop": 4, "time_penalti": [4, 5, 12], "unbreakable_bottl": 4, "true": [4, 6, 7, 8, 13, 14, 15, 35, 44], "which": [4, 6, 7, 8, 9, 10, 39, 42, 43, 44], "again": 4, "so": 4, "outcom": 4, "revers": [4, 5, 27], "otherwis": [4, 9, 30], "cannot": 4, "origin": [4, 39, 40, 42], "robert": 4, "klassert": 4, "home": [4, 34], "asset": [4, 34, 42], "limezu": [4, 34], "itch": [4, 6, 7, 8, 34], "io": [4, 6, 7, 8, 34], "serenevillagerevamp": [4, 34], "gold": [4, 12, 34], "enemi": [4, 12, 16, 30, 34], "gem": [4, 12, 34], "ninjikin": [4, 34], "pixel": [4, 6, 7, 8, 42], "art": [4, 6, 7, 8, 42], "creat": [4, 6, 7, 8, 39, 40, 42], "assist": [4, 6, 7, 8], "dall": [4, 6, 7, 8], "version": [5, 10, 12, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 42], "": [5, 6, 7, 8, 10, 13, 14, 15, 16, 17, 18, 19, 22, 23, 24, 25, 26, 27, 31, 32, 39, 40, 44], "ob": [5, 12, 16, 33, 39, 40, 42], "action": [5, 12, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 30, 31, 32, 33, 42, 43], "space": [5, 12, 16, 21, 33, 42, 43], "descript": [5, 12, 16, 33], "mountaincar": [5, 28], "continu": [5, 6, 7, 8, 16, 28, 33, 35, 42], "reverse_penalti": 5, "forward_penalti": 5, "mountain": [5, 27, 28, 42], "car": [5, 42], "extra": 5, "forward": [5, 19, 22, 23, 24, 27, 31, 32], "vamplew": [5, 12], "et": [5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 33, 38], "al": [5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 33, 38], "2011": 5, "mountaincarcontinu": [5, 42], "fuel_consumption_penalti": 5, "fuel": [5, 13, 14, 15, 16, 25, 26, 28], "consumpt": [5, 13, 14, 15, 16], "lunar": [5, 42], "lander": [5, 42], "v2": [5, 25, 26, 42], "land": [5, 25, 26], "shaped_reward": 5, "main_engine_fuel": 5, "side_engine_fuel": 5, "lunarland": [5, 25, 26], "similarli": [5, 16], "hung": 5, "11": [6, 7, 22, 23, 42], "23": [6, 42], "7": [6, 11, 12, 13, 14, 15, 42], "classic": [6, 7, 8, 43], "control": [6, 7, 8, 9, 17, 18, 19, 22, 23, 24, 31, 32], "submarin": [6, 7, 8, 12], "2d": [6, 7, 8, 10, 13, 14, 15, 28, 42, 44], "grid": [6, 7, 8], "world": [6, 7, 8], "box": [6, 7, 8, 17, 18, 19, 22, 23, 24, 26, 28, 31, 32, 35], "valu": [6, 7, 8, 11, 12, 38], "10": [6, 7, 8, 11, 12, 34, 35], "x": [6, 7, 8, 17, 18, 22, 23, 24, 30, 31, 32, 34], "y": [6, 7, 8, 17, 18, 34], "coordin": [6, 7, 8, 34, 39], "where": [6, 7, 8, 9, 11, 27, 28, 42], "down": [6, 7, 8, 10, 34], "dimension": [6, 7, 8, 9, 10, 11, 13, 14, 15, 17, 18, 19, 20, 22, 23, 24, 25, 26, 29, 30, 31, 32, 34], "posit": [6, 7, 8, 10, 13, 14, 15, 29, 30, 34], "alwai": [6, 7, 8], "same": [6, 7, 8], "dst_map": [6, 7, 8], "map": [6, 7, 8, 10, 12, 13, 14, 15], "default": [6, 7, 8, 13, 14, 15, 35], "convex": [6, 7, 8], "yang": [6, 7, 8, 11, 12, 16], "2019": [6, 7, 8, 11, 12, 13, 14, 15, 16], "us": [6, 7, 8, 13, 14, 15, 35, 36, 37, 38, 39, 40, 41, 42, 43], "deepseatreasur": [6, 7, 8], "concave_map": [6, 7, 8], "mirrored_map": [6, 7, 8], "float_stat": [6, 7, 8], "adapt": [6, 7, 8, 10], "sourc": [6, 7, 8, 10, 12, 13, 14, 15, 28, 39], "background": [6, 7, 8, 35], "ansimuz": [6, 7, 8], "underwat": [6, 7, 8], "fantasi": [6, 7, 8], "124": [7, 8], "20": [8, 42], "simpl": [9, 39, 40], "fisherman": 9, "either": 9, "fish": [9, 12], "collect": [9, 10, 11, 12, 13, 14, 15, 16, 30, 34, 42], "wood": [9, 12], "expect": 9, "util": [9, 42], "return": [9, 13, 14, 15, 34, 39, 42, 43, 44], "two": [9, 13, 14, 15, 16, 29, 30, 35], "woodproba": 9, "fishproba": 9, "episod": [9, 35, 44], "end": [9, 13, 14, 15], "after": [9, 42, 44], "max_t": 9, "200": 9, "catch": 9, "14": [10, 25, 26, 42], "13": [10, 42], "gridworld": [10, 12], "introduc": [10, 42], "an": [10, 30, 34, 35, 39, 40, 42, 43, 44], "avoid": [10, 16], "those": 10, "neg": [10, 28, 29], "travel": 10, "fix": 10, "goal": [10, 12, 27, 28], "split": 10, "separ": 10, "wall": 10, "passag": 10, "barreto": 10, "andr\u00e9": 10, "nip": 10, "2017": 10, "plu": 10, "binari": [10, 11, 12], "vector": [10, 11, 13, 14, 15, 27, 28, 30, 39, 42, 43, 44], "item": [10, 12], "were": 10, "compon": [10, 42, 44], "blue": 10, "squar": 10, "els": [10, 30, 34], "green": 10, "triangl": 10, "red": 10, "circl": 10, "lower": 10, "g": [10, 12, 42], "maze": 10, "arrai": [10, 39, 40, 42, 43, 44], "see": [10, 13, 14, 15, 17, 18, 19, 20, 22, 23, 24, 25, 26, 27, 28, 30, 31, 32, 39, 40, 42, 43], "exampl": [10, 37, 39, 40, 42], "63": 11, "6": [11, 12, 13, 14, 15, 19, 27, 28, 29, 32], "full": [11, 12, 42], "depth": [11, 12], "d": [11, 12, 38], "leaf": [11, 12], "nutrient": [11, 12], "protein": [11, 12], "carb": [11, 12], "fat": [11, 12], "vitamin": [11, 12], "miner": [11, 12], "chose": 11, "node": 11, "therefor": 11, "receiv": [11, 42], "root": [11, 42], "observ": [12, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 30, 31, 32, 42, 43], "must": [12, 13, 14, 15, 16, 43], "take": 12, "account": 12, "taken": 12, "concav": [12, 42], "2010": 12, "mirror": 12, "harder": 12, "dst": [12, 42], "resourc": [12, 42], "gather": [12, 42], "chanc": 12, "kill": [12, 30, 34], "barret": 12, "narayanan": [12, 34], "2008": [12, 34], "fish_amount": 12, "wood_amount": 12, "esr": [12, 42], "light": 12, "fire": 12, "eat": 12, "roijer": 12, "2018": 12, "breakabl": 12, "bottl": 12, "cell": 12, "destin": 12, "2021": 12, "nutri1": 12, "nutri6": 12, "item1": 12, "item2": 12, "item3": 12, "three": 12, "differ": 12, "type": [12, 13, 14, 15, 16, 42], "ores": [13, 14, 15, 16], "minim": [13, 14, 15, 16], "abel": [13, 14, 15, 16], "cart": [13, 14, 15], "speed": [13, 14, 15, 16, 20, 42], "sin": [13, 14, 15, 29], "co": [13, 14, 15, 29], "orient": [13, 14, 15], "porcentag": [13, 14, 15], "capac": [13, 14, 15], "fill": [13, 14, 15], "image_observ": [13, 14, 15], "3d": [13, 14, 15, 27], "imag": [13, 14, 15, 16, 35], "mine": [13, 14, 15], "acceler": [13, 14, 15], "brake": [13, 14, 15], "none": [13, 14, 15, 35, 42, 44], "quantiti": [13, 14, 15], "minerium": [13, 14, 15], "retriev": [13, 14, 15], "spars": [13, 14, 15], "second": [13, 14, 15], "consum": [13, 14, 15], "dens": [13, 14, 15], "upper": [13, 14, 15], "corner": [13, 14, 15], "render_mod": [13, 14, 15, 35], "render": [13, 14, 15, 35, 43], "mode": [13, 14, 15, 35], "rgb_arrai": [13, 14, 15, 35], "human": [13, 14, 15, 35], "rgb": [13, 14], "frame_skip": [13, 14, 15], "mani": [13, 14, 15], "repeat": [13, 14, 15], "incremental_frame_skip": [13, 14, 15], "whether": [13, 14, 15, 35], "increment": [13, 14, 15], "config": [13, 14, 15], "path": [13, 14, 15], "json": [13, 14, 15], "configur": [13, 14, 15], "file": [13, 14, 15, 42, 43], "foundat": [13, 14, 15, 42], "blob": [13, 14, 15], "mine_config": [13, 14, 15], "axel": [13, 14, 15], "480": 15, "255": [15, 30], "includ": [16, 39, 41], "other": [16, 17, 18, 22, 23, 39, 40, 42, 44], "cost_flood": 16, "deficit_wat": 16, "execut": [16, 35], "correspond": [16, 35], "amount": [16, 35], "releas": [16, 35], "dam": [16, 35], "pianosi": [16, 35], "2013": 16, "ore1": 16, "ore2": 16, "highwai": [16, 42], "fast": [16, 42], "right_lan": 16, "collis": [16, 20], "neighbour": 16, "vehicl": 16, "stai": 16, "rightest": 16, "lane": [16, 20], "x_po": 16, "death": 16, "coin": [16, 30], "warn": [16, 42], "supermariobrosenv": 16, "support": 16, "limit": 16, "8": [17, 18, 25, 26, 31, 35, 39, 40, 42], "float32": [17, 18, 19, 22, 23, 24, 26, 28, 31, 32, 35], "27": [17, 18, 42], "v4": [17, 18, 19, 22, 23, 24, 29, 31, 32, 33, 42], "antenv": [17, 18], "veloc": [17, 18, 24, 29, 31, 32, 33], "cost": [17, 18, 19, 22, 23, 24, 25, 26, 31, 32, 35], "cost_object": [17, 18, 22, 23], "set": [17, 18, 22, 23, 39, 42, 43], "fals": [17, 18, 22, 23], "ad": [17, 18, 22, 23, 43], "healthi": [17, 18], "17": [19, 24, 32], "halfcheetahenv": 19, "run": [19, 24, 32, 43, 44], "highwayenv": 20, "reward": [21, 39, 40, 42, 43, 44], "highwayfastenv": 21, "hopperenv": [22, 23], "axi": [22, 23], "jump": [22, 23], "z": [22, 23], "376": 24, "humanoidenv": 24, "100": [25, 26, 30, 44], "crash": [25, 26], "successfulli": [25, 26], "engin": [25, 26], "side": [25, 26], "07": [27, 28], "top": [27, 28], "norm": 28, "9": 29, "mujoco": [29, 39, 41], "angl": 29, "central": 29, "elbow": 29, "joint": [29, 35], "angular": 29, "possibl": 29, "zero": [29, 42], "torqu": 29, "distanc": [29, 43], "tip": 29, "arm": 29, "target": 29, "comput": [29, 42, 43], "r_i": 29, "finger_tip_coord": 29, "target_i": 29, "256": 30, "240": 30, "25": [30, 42], "supermariobro": 30, "super": 30, "mario": [30, 42], "bro": 30, "far": 30, "much": 30, "pass": [30, 42], "between": [30, 35, 39, 42, 44], "di": 30, "point": 30, "swimmerenv": 31, "walker2denv": 32, "reacher": 33, "target_1": 33, "target_2": 33, "target_3": 33, "target_4": 33, "hopper": [33, 42], "height": 33, "energi": 33, "halfcheetah": [33, 42], "similar": 33, "xu": 33, "2020": 33, "walker2d": 33, "ant": 33, "x_veloc": [33, 42], "y_veloc": 33, "swimmer": 33, "humanoid": 33, "humonoid": 33, "barrett": 34, "leon": 34, "srini": 34, "optim": [34, 38, 42], "polici": [34, 38], "criteria": 34, "25th": 34, "intern": [34, 35], "machin": 34, "41": [34, 42], "47": [34, 42], "1145": 34, "1390156": 34, "1390162": 34, "consist": 34, "element": [34, 42], "diamond": 34, "castelletti": 35, "m": 35, "restelli": 35, "fit": 35, "q": 35, "iter": [35, 44], "markov": 35, "decis": 35, "2012": 35, "neural": [35, 36, 39], "network": 35, "ijcnn": 35, "brisban": 35, "qld": 35, "australia": 35, "pp": 35, "doi": 35, "1109": 35, "6252759": 35, "float": [35, 43, 44], "level": [35, 42], "normalized_act": 35, "percentag": 35, "There": 35, "due": 35, "excess": 35, "wrt": 35, "flood": 35, "threshold": 35, "upstream": 35, "deficit": 35, "suppli": 35, "demand": 35, "hydroelectr": 35, "downstream": 35, "By": 35, "onli": 35, "initi": [35, 43], "random": 35, "160": 35, "ansi": 35, "time_limit": 35, "maximum": 35, "until": 35, "truncat": [35, 39, 40, 42, 43], "nO": 35, "penal": 35, "select": 35, "bound": [35, 42], "normal": [35, 42, 44], "initial_st": 35, "port": 35, "simon": 35, "parisi": 35, "sky": 35, "paulina": 35, "riva": 35, "opengameart": 35, "org": [35, 39], "content": 35, "repositori": [36, 37, 39, 43], "research": [36, 38, 39], "pleas": [36, 38, 39], "cite": 36, "felten_toolkit_2023": [36, 39], "toolkit": [36, 38, 39], "reliabl": [36, 38, 39], "benchmark": [36, 38, 39], "37th": [36, 39], "process": [36, 39], "system": [36, 39, 41], "neurip": [36, 38, 39], "2023": [36, 38, 39, 42], "algorithm": [37, 39], "under": [37, 42], "api": [37, 42, 43], "It": [37, 39, 40, 42, 43], "wrapper": [37, 39, 40, 42], "publish": 38, "submiss": 38, "open": [38, 39], "pull": [38, 43], "request": 38, "add": [38, 42], "miss": [38, 42], "entri": 38, "sampl": 38, "effici": 38, "via": 38, "gener": [38, 42, 43], "improv": 38, "priorit": 38, "aama": 38, "hyperparamet": 38, "modem": 38, "leverag": 38, "approxim": 38, "model": [38, 39, 40], "distribut": 38, "pareto": [38, 43], "cai": 38, "welfar": 38, "fair": 38, "fan": 38, "person": 38, "budget": 38, "ivanov": 38, "2024": [38, 42], "decomposit": 38, "taxonomi": 38, "framework": 38, "guarante": 38, "align": 38, "rodriguez": 38, "soto": 38, "mofl": 38, "feder": 38, "hartmann": 38, "standard": [39, 42], "suit": 39, "python": [39, 40, 42], "develop": 39, "compar": 39, "commun": [39, 42], "well": [39, 42, 43], "compliant": 39, "essenti": [39, 43], "numpi": [39, 40, 42, 43, 44], "document": [39, 43], "websit": [39, 42], "work": 39, "gg": 39, "bnj6kubtg6": 39, "As": [39, 40], "class": [39, 40, 44], "instanc": [39, 40, 42], "interact": [39, 40], "veri": [39, 40, 43], "mo_gym": [39, 40, 42], "np": [39, 40, 42], "info": [39, 40, 42, 43, 44], "reset": [39, 40, 42, 43], "vector_reward": [39, 40, 42], "next_ob": [39, 40, 42], "termin": [39, 40, 42, 43], "your_ag": [39, 40, 42], "act": [39, 40, 42], "option": [39, 40, 42, 43], "scalar": [39, 40, 42, 44], "function": [39, 40, 42], "linearreward": [39, 40, 42], "weight": [39, 40, 42, 44], "detail": [39, 40], "mdp": [39, 40, 42], "momdp": [39, 40, 42], "definit": [39, 40], "practic": [39, 40], "guid": [39, 40], "plan": [39, 40], "check": [39, 40], "colab": [39, 40], "notebook": [39, 40], "doe": [39, 41, 44], "depend": [39, 41], "famili": [39, 41], "problemat": [39, 41], "certain": [39, 41], "one": [39, 41], "03": 42, "pypi": 42, "lucasalegr": 42, "87": 42, "ffelten": 42, "79": 42, "unwrap": 42, "access": 42, "reward_spac": [42, 43], "77": 42, "fruit_tre": 42, "tomekst": 42, "81": 42, "group": 42, "83": 42, "80": 42, "updat": 42, "citat": 42, "86": 42, "unpin": 42, "kallinteri": 42, "andrea": 42, "84": 42, "changelog": 42, "08": [42, 44], "24": 42, "75": 42, "momaxandskipobserv": 42, "76": 42, "modifi": 42, "part": [42, 43], "info_dict": 42, "ianleongudri": 42, "69": 42, "order": 42, "wrap": [42, 44], "morecordepisodestatist": 42, "70": 42, "73": 42, "tuto": 42, "custom": 42, "creation": 42, "72": 42, "test": 42, "worker": 42, "67": 42, "pf": 42, "cc": 42, "determinist": 42, "74": 42, "06": 42, "12": 42, "thrill": 42, "design": 42, "expand": 42, "capabl": 42, "rl": 42, "scenario": 42, "need": 42, "mai": 42, "conflict": 42, "repres": 42, "distinct": 42, "In": 42, "context": 42, "trade": 42, "off": 42, "combin": 42, "linearli": 42, "predefin": 42, "shown": 42, "snippet": 42, "ctrl_cost": 42, "self": [42, 43], "control_cost": 42, "forward_reward": 42, "_forward_reward_weight": 42, "With": 42, "user": 42, "flexibl": 42, "determin": 42, "compromis": 42, "thei": [42, 44], "desir": 42, "prefer": 42, "consequ": 42, "do": 42, "thu": 42, "extend": 42, "vectori": 42, "illustr": 42, "domain": 42, "balanc": 42, "save": 42, "batteri": 42, "mark": 42, "within": [42, 43], "stabl": 42, "achiev": 42, "qualiti": 42, "allow": 42, "fall": 42, "back": 42, "singl": 42, "rang": 42, "literatur": 42, "inher": 42, "exhaust": 42, "list": 42, "avail": [42, 44], "addition": 42, "tailor": 42, "monormalizereward": 42, "linearwrapp": 42, "transform": 42, "bump": 42, "51": 42, "52": 42, "accept": 42, "55": 42, "pre": 42, "commit": 42, "spell": 42, "mistak": 42, "56": 42, "method": [42, 43], "57": 42, "59": 42, "60": 42, "61": 42, "65": 42, "caus": 42, "randint": 42, "deprec": 42, "53": 42, "edit": 42, "button": 42, "58": 42, "62": 42, "logo": 42, "64": 42, "pareto_front": [42, 43], "get": 42, "45": 42, "43": 42, "baselin": 42, "readm": 42, "40": 42, "link": 42, "mgoulao": 42, "42": 42, "enabl": 42, "46": 42, "02": 42, "ezpickl": 42, "34": 42, "37": 42, "convers": 42, "33": 42, "eval_mo": 42, "observationspac": 42, "dtype": 42, "35": 42, "gif": 42, "minor": 42, "directli": [42, 44], "26": 42, "pydoc": 42, "31": 42, "hotfix": 42, "vecenv": 42, "30": 42, "01": 42, "benefit": 42, "structur": 42, "should": [42, 43], "higher": 42, "integr": 42, "tool": 42, "break": 42, "been": 42, "renam": 42, "previous": 42, "call": 42, "reli": 42, "instead": 42, "16": 42, "deep_sea_treasur": 42, "gt": 42, "18": 42, "life": 42, "ci": 42, "setup": 42, "19": 42, "init": 42, "Of": 42, "conduct": 42, "21": 42, "09": 42, "revamp": 42, "lint": 42, "bib": 42, "few": [42, 44], "bugfix": 42, "tutori": 43, "goe": 43, "through": 43, "sinc": [43, 44], "close": 43, "ti": 43, "refer": 43, "its": 43, "inherit": 43, "__init__": 43, "constructor": 43, "action_spac": 43, "observation_spac": 43, "attribut": 43, "moreov": 43, "deal": 43, "reward_dim": 43, "integ": 43, "seed": 43, "kwarg": 43, "perform": 43, "next": 43, "gamma": [43, 44], "discount": [43, 44], "front": 43, "known": 43, "oject": 43, "metric": 43, "invert": 43, "igd": 43, "registri": 43, "done": 43, "line": 43, "py": 43, "directori": 43, "registr": 43, "id": 43, "my_env_v0": 43, "entry_point": 43, "my_env_dir": 43, "my_env_fil": 43, "myenv": 43, "pytest": 43, "inspir": 44, "modul": 44, "ndarrai": 44, "dot": 44, "product": 44, "idx": 44, "int": 44, "99": 44, "epsilon": 44, "1e": 44, "index": 44, "touch": 44, "min_r": 44, "max_r": 44, "clip": 44, "min": 44, "max": 44, "env_fn": 44, "callabl": 44, "copi": 44, "bool": 44, "serial": 44, "deque_s": 44, "keep": 44, "track": 44, "cumul": 44, "length": 44, "complet": 44, "look": 44, "dr": 44, "contrari": 44, "t": 44, "elaps": 44, "begin": 44, "output": 44, "form": 44, "care": 44, "befor": 44, "morewordstatist": 44, "final_observ": 44, "num": 44, "_final_observ": 44, "final_info": 44, "_final_info": 44, "num_env": 44, "dim_reward": 44, "_episod": 44}, "objects": {"mo_gymnasium": [[44, 0, 1, "", "LinearReward"], [44, 0, 1, "", "MOClipReward"], [44, 0, 1, "", "MONormalizeReward"], [44, 0, 1, "", "MORecordEpisodeStatistics"], [44, 0, 1, "", "MOSyncVectorEnv"]]}, "objtypes": {"0": "py:class"}, "objnames": {"0": ["py", "class", "Python class"]}, "titleterms": {"404": 0, "page": [0, 42], "Not": 0, "found": 0, "The": 0, "request": 0, "could": 0, "mo": [1, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 42], "gymnasium": [1, 42], "document": [1, 42], "build": 1, "commun": 3, "acknowledg": 3, "breakabl": [4, 42], "bottl": [4, 42], "descript": [4, 6, 7, 8, 9, 10, 11, 13, 14, 15, 17, 18, 19, 20, 22, 23, 24, 25, 26, 29, 30, 31, 32, 34, 35], "action": [4, 6, 7, 8, 9, 10, 11, 13, 14, 15, 29, 34, 35], "space": [4, 6, 7, 8, 9, 10, 11, 13, 14, 15, 17, 18, 19, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34, 35], "observ": [4, 6, 7, 8, 9, 10, 11, 13, 14, 15, 29, 34, 35], "reward": [4, 6, 7, 8, 9, 10, 11, 13, 14, 15, 17, 18, 19, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34, 35], "start": [4, 6, 7, 8, 9, 10, 11, 13, 14, 15, 34, 35], "state": [4, 6, 7, 8, 9, 10, 11, 13, 14, 15, 34, 35], "episod": [4, 6, 7, 8, 10, 11, 13, 14, 15, 30, 34], "termin": [4, 6, 7, 8, 9, 10, 11, 13, 14, 15, 30, 34], "argument": [4, 6, 7, 8, 9, 10, 13, 14, 15, 35], "credit": [4, 6, 7, 8, 9, 10, 13, 14, 15, 34, 35], "classic": 5, "control": 5, "deep": [6, 7, 8, 42], "sea": [6, 7, 8, 42], "treasur": [6, 7, 8, 42], "concav": 7, "mirror": [8, 42], "fishwood": 9, "four": 10, "room": 10, "refer": 10, "fruit": [11, 42], "tree": [11, 42], "grid": 12, "world": 12, "minecart": [13, 14, 15], "determinist": 14, "rgb": 15, "miscellan": 16, "ant": [17, 18], "2d": [18, 23], "halfcheetah": 19, "highwai": [20, 21], "fast": 21, "hopper": [22, 23], "humanoid": 24, "lunar": [25, 26], "lander": [25, 26], "continu": 26, "mountaincar": 27, "mountaincarcontinu": 28, "reacher": [29, 42], "supermario": 30, "swimmer": 31, "walker2d": 32, "mujoco": [33, 42], "resourc": 34, "gather": 34, "water": 35, "reservoir": 35, "citat": 36, "morl": 37, "baselin": 37, "list": 38, "public": 38, "api": [39, 40], "instal": [39, 41], "cite": 39, "releas": 42, "note": 42, "v1": 42, "1": [42, 43], "0": 42, "new": [42, 43], "environ": [42, 43], "render": 42, "more": 42, "support": 42, "29": 42, "pygam": 42, "becom": 42, "matur": 42, "v0": 42, "3": [42, 43], "4": [42, 43], "known": 42, "pareto": 42, "front": 42, "improv": 42, "polici": 42, "evalu": 42, "bug": 42, "fix": 42, "better": 42, "2": [42, 43], "webpag": 42, "migrat": 42, "creat": 43, "custom": 43, "class": 43, "regist": 43, "test": 43, "instanti": 43, "your": 43, "wrapper": 44, "linearreward": 44, "monormalizereward": 44, "moclipreward": 44, "mosyncvectorenv": 44, "morecordepisodestatist": 44}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx": 60}, "alltitles": {"404 - Page Not Found": [[0, "page-not-found"]], "The requested page could not be found.": [[0, "the-requested-page-could-not-be-found"]], "MO-Gymnasium documentation": [[1, "mo-gymnasium-documentation"]], "Build the Documentation": [[1, "build-the-documentation"]], "Community": [[3, "community"]], "Acknowledgements": [[3, "acknowledgements"]], "Breakable-Bottles": [[4, "breakable-bottles"]], "Description": [[4, "description"], [6, "description"], [7, "description"], [8, "description"], [9, "description"], [10, "description"], [11, "description"], [13, "description"], [14, "description"], [15, "description"], [17, "description"], [18, "description"], [19, "description"], [20, "description"], [22, "description"], [23, "description"], [24, "description"], [25, "description"], [26, "description"], [29, "description"], [30, "description"], [31, "description"], [32, "description"], [34, "description"], [35, "description"]], "Action Space": [[4, "action-space"], [6, "action-space"], [7, "action-space"], [8, "action-space"], [9, "action-space"], [10, "action-space"], [11, "action-space"], [13, "action-space"], [14, "action-space"], [15, "action-space"], [29, "action-space"], [34, "action-space"], [35, "action-space"]], "Observation Space": [[4, "observation-space"], [6, "observation-space"], [7, "observation-space"], [8, "observation-space"], [9, "observation-space"], [10, "observation-space"], [11, "observation-space"], [13, "observation-space"], [14, "observation-space"], [15, "observation-space"], [29, "observation-space"], [34, "observation-space"], [35, "observation-space"]], "Reward Space": [[4, "reward-space"], [6, "reward-space"], [7, "reward-space"], [8, "reward-space"], [9, "reward-space"], [10, "reward-space"], [11, "reward-space"], [13, "reward-space"], [14, "reward-space"], [15, "reward-space"], [17, "reward-space"], [18, "reward-space"], [19, "reward-space"], [20, "reward-space"], [22, "reward-space"], [23, "reward-space"], [24, "reward-space"], [25, "reward-space"], [26, "reward-space"], [29, "reward-space"], [30, "reward-space"], [31, "reward-space"], [32, "reward-space"], [34, "reward-space"], [35, "reward-space"]], "Starting State": [[4, "starting-state"], [6, "starting-state"], [7, "starting-state"], [8, "starting-state"], [9, "starting-state"], [10, "starting-state"], [11, "starting-state"], [13, "starting-state"], [14, "starting-state"], [15, "starting-state"], [34, "starting-state"], [35, "starting-state"]], "Episode Termination": [[4, "episode-termination"], [6, "episode-termination"], [7, "episode-termination"], [8, "episode-termination"], [10, "episode-termination"], [11, "episode-termination"], [13, "episode-termination"], [14, "episode-termination"], [15, "episode-termination"], [30, "episode-termination"], [34, "episode-termination"]], "Arguments": [[4, "arguments"], [6, "arguments"], [7, "arguments"], [8, "arguments"], [9, "arguments"], [10, "arguments"], [13, "arguments"], [14, "arguments"], [15, "arguments"], [35, "arguments"]], "Credits": [[4, "credits"], [6, "credits"], [7, "credits"], [8, "credits"], [9, "credits"], [10, "credits"], [13, "credits"], [14, "credits"], [15, "credits"], [34, "credits"], [35, "credits"]], "Classic Control": [[5, "classic-control"]], "Deep-Sea-Treasure": [[6, "deep-sea-treasure"]], "Deep-Sea-Treasure-Concave": [[7, "deep-sea-treasure-concave"]], "Deep-Sea-Treasure-Mirrored": [[8, "deep-sea-treasure-mirrored"]], "Fishwood": [[9, "fishwood"]], "Termination": [[9, "termination"]], "Four-Room": [[10, "four-room"]], "References": [[10, "references"]], "Fruit-Tree": [[11, "fruit-tree"]], "Grid-World": [[12, "grid-world"]], "Minecart": [[13, "minecart"]], "Minecart-Deterministic": [[14, "minecart-deterministic"]], "Minecart-Rgb": [[15, "minecart-rgb"]], "Miscellaneous": [[16, "miscellaneous"]], "MO-Ant": [[17, "mo-ant"]], "MO-Ant-2D": [[18, "mo-ant-2d"]], "MO-Halfcheetah": [[19, "mo-halfcheetah"]], "MO-Highway": [[20, "mo-highway"]], "MO-Highway-Fast": [[21, "mo-highway-fast"]], "MO-Hopper": [[22, "mo-hopper"]], "MO-Hopper-2D": [[23, "mo-hopper-2d"]], "MO-Humanoid": [[24, "mo-humanoid"]], "MO-Lunar-Lander": [[25, "mo-lunar-lander"]], "MO-Lunar-Lander-Continuous": [[26, "mo-lunar-lander-continuous"]], "MO-Mountaincar": [[27, "mo-mountaincar"]], "Reward space:": [[27, "reward-space"], [28, "reward-space"]], "MO-Mountaincarcontinuous": [[28, "mo-mountaincarcontinuous"]], "MO-Reacher": [[29, "mo-reacher"]], "MO-Supermario": [[30, "mo-supermario"]], "MO-Swimmer": [[31, "mo-swimmer"]], "MO-Walker2D": [[32, "mo-walker2d"]], "MuJoCo": [[33, "mujoco"]], "Resource-Gathering": [[34, "resource-gathering"]], "Water-Reservoir": [[35, "water-reservoir"]], "Citation": [[36, "citation"]], "MORL Baselines": [[37, "morl-baselines"]], "List of Publications": [[38, "list-of-publications"]], "API": [[39, "api"], [40, "api"]], "Install": [[39, "install"], [41, "install"]], "Citing": [[39, "citing"]], "Release Notes": [[42, "release-notes"]], "v1.1.0: MO-Gymnasium 1.1.0 Release: New MuJoCo environments, Mirrored Deep Sea Treasure, Fruit Tree rendering, and more": [[42, "release-v1-1-0"]], "v1.0.1: MO-Gymnasium 1.0.1 Release: Support Gymnasium 0.29, breakable-bottles pygame render, and more": [[42, "release-v1-0-1"]], "v1.0.0: MO-Gymnasium becomes mature": [[42, "release-v1-0-0"]], "v0.3.4: MO-Gymnasium 0.3.4 Release: Known Pareto Front, improved renders and documentation": [[42, "release-v0-3-4"]], "v0.3.3: MO-Gymnasium 0.3.3 Release: Policy Evaluation bug fix, better documentation page": [[42, "release-v0-3-3"]], "MO-Gymnasium 0.3.2 Release: Bug fixes, improved webpage": [[42, "release-0-3-2"]], "MO-Gymnasium 0.3.1 Release: Improved documentation and MuJoco MO-Reacher environment": [[42, "release-0-3-1"]], "MO-Gymnasium 0.3.0 Release: Migrate to Gymnasium": [[42, "release-0-3-0"]], "0.2.1": [[42, "release-0-2-1"]], "0.2.0": [[42, "release-0-2-0"]], "0.1.2": [[42, "release-0-1-2"]], "0.1.1": [[42, "release-0-1-1"]], "Creating a custom environment": [[43, "creating-a-custom-environment"]], "1. Create a new environment class": [[43, "create-a-new-environment-class"]], "2. Register the environment": [[43, "register-the-environment"]], "3. Test the environment": [[43, "test-the-environment"]], "4. Instantiate your environment": [[43, "instantiate-your-environment"]], "Wrappers": [[44, "wrappers"]], "LinearReward": [[44, "linearreward"]], "MONormalizeReward": [[44, "monormalizereward"]], "MOClipReward": [[44, "moclipreward"]], "MOSyncVectorEnv": [[44, "mosyncvectorenv"]], "MORecordEpisodeStatistics": [[44, "morecordepisodestatistics"]]}, "indexentries": {"linearreward (class in mo_gymnasium)": [[44, "mo_gymnasium.LinearReward"]], "moclipreward (class in mo_gymnasium)": [[44, "mo_gymnasium.MOClipReward"]], "monormalizereward (class in mo_gymnasium)": [[44, "mo_gymnasium.MONormalizeReward"]], "morecordepisodestatistics (class in mo_gymnasium)": [[44, "mo_gymnasium.MORecordEpisodeStatistics"]], "mosyncvectorenv (class in mo_gymnasium)": [[44, "mo_gymnasium.MOSyncVectorEnv"]]}}) \ No newline at end of file +Search.setIndex({"alltitles": {"0.1.1": [[45, "release-0-1-1"]], "0.1.2": [[45, "release-0-1-2"]], "0.2.0": [[45, "release-0-2-0"]], "0.2.1": [[45, "release-0-2-1"]], "1. Create a new environment class": [[46, "create-a-new-environment-class"]], "2. Register the environment": [[46, "register-the-environment"]], "3. Test the environment": [[46, "test-the-environment"]], "4. Instantiate your environment": [[46, "instantiate-your-environment"]], "404 - Page Not Found": [[0, null]], "API": [[42, null], [43, null]], "Acknowledgements": [[3, "acknowledgements"]], "Action Space": [[4, "action-space"], [6, "action-space"], [7, "action-space"], [8, "action-space"], [9, "action-space"], [10, "action-space"], [11, "action-space"], [13, "action-space"], [14, "action-space"], [15, "action-space"], [32, "action-space"], [37, "action-space"], [38, "action-space"]], "Arguments": [[4, "arguments"], [6, "arguments"], [7, "arguments"], [8, "arguments"], [9, "arguments"], [10, "arguments"], [13, "arguments"], [14, "arguments"], [15, "arguments"], [38, "arguments"]], "Breakable-Bottles": [[4, null]], "Build the Documentation": [[1, "build-the-documentation"]], "Citation": [[39, null]], "Citing": [[42, "citing"]], "Classic Control": [[5, null]], "Community": [[3, null]], "Creating a custom environment": [[46, null]], "Credits": [[4, "credits"], [6, "credits"], [7, "credits"], [8, "credits"], [9, "credits"], [10, "credits"], [13, "credits"], [14, "credits"], [15, "credits"], [37, "credits"], [38, "credits"]], "Deep-Sea-Treasure": [[6, null]], "Deep-Sea-Treasure-Concave": [[7, null]], "Deep-Sea-Treasure-Mirrored": [[8, null]], "Description": [[4, "description"], [6, "description"], [7, "description"], [8, "description"], [9, "description"], [10, "description"], [11, "description"], [13, "description"], [14, "description"], [15, "description"], [17, "description"], [18, "description"], [19, "description"], [20, "description"], [22, "description"], [23, "description"], [24, "description"], [25, "description"], [26, "description"], [32, "description"], [33, "description"], [34, "description"], [35, "description"], [37, "description"], [38, "description"]], "Episode Termination": [[4, "episode-termination"], [6, "episode-termination"], [7, "episode-termination"], [8, "episode-termination"], [10, "episode-termination"], [11, "episode-termination"], [13, "episode-termination"], [14, "episode-termination"], [15, "episode-termination"], [33, "episode-termination"], [37, "episode-termination"]], "Fishwood": [[9, null]], "Four-Room": [[10, null]], "Fruit-Tree": [[11, null]], "Grid-World": [[12, null]], "Install": [[42, "install"], [44, null]], "LinearReward": [[48, "linearreward"]], "List of Publications": [[41, null]], "MO-Ant": [[17, null]], "MO-Ant-2D": [[18, null]], "MO-Gymnasium 0.3.0 Release: Migrate to Gymnasium": [[45, "release-0-3-0"]], "MO-Gymnasium 0.3.1 Release: Improved documentation and MuJoco MO-Reacher environment": [[45, "release-0-3-1"]], "MO-Gymnasium 0.3.2 Release: Bug fixes, improved webpage": [[45, "release-0-3-2"]], "MO-Gymnasium documentation": [[1, null]], "MO-Halfcheetah": [[19, null]], "MO-Highway": [[20, null]], "MO-Highway-Fast": [[21, null]], "MO-Hopper": [[22, null]], "MO-Hopper-2D": [[23, null]], "MO-Humanoid": [[24, null]], "MO-Lunar-Lander": [[25, null]], "MO-Lunar-Lander-Continuous": [[26, null]], "MO-Mountaincar": [[27, null]], "MO-Mountaincar-3D": [[28, null]], "MO-Mountaincar-Timemove": [[29, null]], "MO-Mountaincar-Timespeed": [[30, null]], "MO-Mountaincarcontinuous": [[31, null]], "MO-Reacher": [[32, null]], "MO-Supermario": [[33, null]], "MO-Swimmer": [[34, null]], "MO-Walker2D": [[35, null]], "MOClipReward": [[48, "moclipreward"]], "MOMaxAndSkipObservation": [[48, "momaxandskipobservation"]], "MONormalizeReward": [[48, "monormalizereward"]], "MORL Baselines": [[40, null]], "MORecordEpisodeStatistics": [[47, "morecordepisodestatistics"], [48, "morecordepisodestatistics"]], "MOSyncVectorEnv": [[47, "mosyncvectorenv"]], "Minecart": [[13, null]], "Minecart-Deterministic": [[14, null]], "Minecart-Rgb": [[15, null]], "Miscellaneous": [[16, null]], "MuJoCo": [[36, null]], "Observation Space": [[4, "observation-space"], [6, "observation-space"], [7, "observation-space"], [8, "observation-space"], [9, "observation-space"], [10, "observation-space"], [11, "observation-space"], [13, "observation-space"], [14, "observation-space"], [15, "observation-space"], [32, "observation-space"], [37, "observation-space"], [38, "observation-space"]], "References": [[10, "references"]], "Release Notes": [[45, null]], "Resource-Gathering": [[37, null]], "Reward Space": [[4, "reward-space"], [6, "reward-space"], [7, "reward-space"], [8, "reward-space"], [9, "reward-space"], [10, "reward-space"], [11, "reward-space"], [13, "reward-space"], [14, "reward-space"], [15, "reward-space"], [17, "reward-space"], [18, "reward-space"], [19, "reward-space"], [20, "reward-space"], [22, "reward-space"], [23, "reward-space"], [24, "reward-space"], [25, "reward-space"], [26, "reward-space"], [32, "reward-space"], [33, "reward-space"], [34, "reward-space"], [35, "reward-space"], [37, "reward-space"], [38, "reward-space"]], "Reward space:": [[27, "reward-space"], [28, "reward-space"], [29, "reward-space"], [30, "reward-space"], [31, "reward-space"]], "Starting State": [[4, "starting-state"], [6, "starting-state"], [7, "starting-state"], [8, "starting-state"], [9, "starting-state"], [10, "starting-state"], [11, "starting-state"], [13, "starting-state"], [14, "starting-state"], [15, "starting-state"], [37, "starting-state"], [38, "starting-state"]], "Termination": [[9, "termination"]], "The requested page could not be found.": [[0, "the-requested-page-could-not-be-found"]], "Vector Wrappers": [[47, null]], "Water-Reservoir": [[38, null]], "Wrappers": [[48, null]], "v0.3.3: MO-Gymnasium 0.3.3 Release: Policy Evaluation bug fix, better documentation page": [[45, "release-v0-3-3"]], "v0.3.4: MO-Gymnasium 0.3.4 Release: Known Pareto Front, improved renders and documentation": [[45, "release-v0-3-4"]], "v1.0.0: MO-Gymnasium becomes mature": [[45, "release-v1-0-0"]], "v1.0.1: MO-Gymnasium 1.0.1 Release: Support Gymnasium 0.29, breakable-bottles pygame render, and more": [[45, "release-v1-0-1"]], "v1.1.0: MO-Gymnasium 1.1.0 Release: New MuJoCo environments, Mirrored Deep Sea Treasure, Fruit Tree rendering, and more": [[45, "release-v1-1-0"]], "v1.2.0: MO-Gymnasium 1.2.0 Release: Update Gymnasium to v1.0.0, New Mountaincar Environments, Documentation and Test Improvements, and more": [[45, "release-v1-2-0"]]}, "docnames": ["404", "README", "citing/citing", "community/community", "environments/breakable-bottles", "environments/classical", "environments/deep-sea-treasure", "environments/deep-sea-treasure-concave", "environments/deep-sea-treasure-mirrored", "environments/fishwood", "environments/four-room", "environments/fruit-tree", "environments/grid-world", "environments/minecart", "environments/minecart-deterministic", "environments/minecart-rgb", "environments/misc", "environments/mo-ant", "environments/mo-ant-2d", "environments/mo-halfcheetah", "environments/mo-highway", "environments/mo-highway-fast", "environments/mo-hopper", "environments/mo-hopper-2d", "environments/mo-humanoid", "environments/mo-lunar-lander", "environments/mo-lunar-lander-continuous", "environments/mo-mountaincar", "environments/mo-mountaincar-3d", "environments/mo-mountaincar-timemove", "environments/mo-mountaincar-timespeed", "environments/mo-mountaincarcontinuous", "environments/mo-reacher", "environments/mo-supermario", "environments/mo-swimmer", "environments/mo-walker2d", "environments/mujoco", "environments/resource-gathering", "environments/water-reservoir", "examples/citation", "examples/morl_baselines", "examples/publications", "index", "introduction/api", "introduction/install", "release_notes", "tutorials/custom_env", "wrappers/vector_wrappers", "wrappers/wrappers"], "envversion": {"sphinx": 62, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2}, "filenames": ["404.md", "README.md", "citing/citing.md", "community/community.md", "environments/breakable-bottles.md", "environments/classical.md", "environments/deep-sea-treasure.md", "environments/deep-sea-treasure-concave.md", "environments/deep-sea-treasure-mirrored.md", "environments/fishwood.md", "environments/four-room.md", "environments/fruit-tree.md", "environments/grid-world.md", "environments/minecart.md", "environments/minecart-deterministic.md", "environments/minecart-rgb.md", "environments/misc.md", "environments/mo-ant.md", "environments/mo-ant-2d.md", "environments/mo-halfcheetah.md", "environments/mo-highway.md", "environments/mo-highway-fast.md", "environments/mo-hopper.md", "environments/mo-hopper-2d.md", "environments/mo-humanoid.md", "environments/mo-lunar-lander.md", "environments/mo-lunar-lander-continuous.md", "environments/mo-mountaincar.md", "environments/mo-mountaincar-3d.md", "environments/mo-mountaincar-timemove.md", "environments/mo-mountaincar-timespeed.md", "environments/mo-mountaincarcontinuous.md", "environments/mo-reacher.md", "environments/mo-supermario.md", "environments/mo-swimmer.md", "environments/mo-walker2d.md", "environments/mujoco.md", "environments/resource-gathering.md", "environments/water-reservoir.md", "examples/citation.md", "examples/morl_baselines.md", "examples/publications.md", "index.md", "introduction/api.md", "introduction/install.md", "release_notes.md", "tutorials/custom_env.md", "wrappers/vector_wrappers.md", "wrappers/wrappers.md"], "indexentries": {"linearreward (class in mo_gymnasium.wrappers)": [[48, "mo_gymnasium.wrappers.LinearReward", false]], "moclipreward (class in mo_gymnasium.wrappers)": [[48, "mo_gymnasium.wrappers.MOClipReward", false]], "momaxandskipobservation (class in mo_gymnasium.wrappers)": [[48, "mo_gymnasium.wrappers.MOMaxAndSkipObservation", false]], "monormalizereward (class in mo_gymnasium.wrappers)": [[48, "mo_gymnasium.wrappers.MONormalizeReward", false]], "morecordepisodestatistics (class in mo_gymnasium.wrappers)": [[48, "mo_gymnasium.wrappers.MORecordEpisodeStatistics", false]], "morecordepisodestatistics (class in mo_gymnasium.wrappers.vector)": [[47, "mo_gymnasium.wrappers.vector.MORecordEpisodeStatistics", false]], "mosyncvectorenv (class in mo_gymnasium.wrappers.vector)": [[47, "mo_gymnasium.wrappers.vector.MOSyncVectorEnv", false]]}, "objects": {"mo_gymnasium.wrappers": [[48, 0, 1, "", "LinearReward"], [48, 0, 1, "", "MOClipReward"], [48, 0, 1, "", "MOMaxAndSkipObservation"], [48, 0, 1, "", "MONormalizeReward"], [48, 0, 1, "", "MORecordEpisodeStatistics"]], "mo_gymnasium.wrappers.vector": [[47, 0, 1, "", "MORecordEpisodeStatistics"], [47, 0, 1, "", "MOSyncVectorEnv"]]}, "objnames": {"0": ["py", "class", "Python class"]}, "objtypes": {"0": "py:class"}, "terms": {"": [4, 5, 6, 7, 8, 10, 13, 14, 15, 16, 17, 18, 19, 22, 23, 24, 25, 26, 27, 28, 29, 30, 34, 35, 42, 43, 45, 48], "0": [4, 6, 7, 8, 9, 10, 11, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 42, 43, 47, 48], "01": 45, "02": 45, "03": 45, "06": 45, "07": [27, 28, 29, 30, 31], "08": [45, 48], "09": 45, "1": [4, 6, 7, 8, 9, 10, 11, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 47, 48], "10": [6, 7, 8, 11, 12, 25, 26, 37, 38, 45], "100": [25, 26, 33, 45, 47, 48], "101": 45, "103": 45, "105": 45, "106": 45, "107": 45, "11": [6, 7, 22, 23, 45], "1109": 38, "1145": 37, "12": 45, "124": [7, 8], "13": [10, 45], "1390156": 37, "1390162": 37, "14": [10, 45], "16": 45, "160": 38, "17": [19, 24, 35], "18": 45, "19": 45, "1e": [34, 48], "2": [4, 6, 7, 8, 9, 11, 13, 14, 15, 17, 18, 19, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 42, 43, 47], "20": [8, 45], "200": 9, "2008": [12, 37], "2010": 12, "2011": 5, "2012": 38, "2013": 16, "2017": 10, "2018": 12, "2019": [6, 7, 8, 11, 12, 13, 14, 15, 16], "2020": 36, "2021": 12, "2022": [2, 5, 12, 41, 45], "2022bnaic": 2, "2023": [39, 41, 42, 45], "2024": [41, 45], "21": 45, "23": [6, 45], "24": 45, "240": [4, 33], "25": [33, 45], "255": [15, 33], "256": 33, "25th": 37, "26": 45, "27": [17, 18, 45], "28": [25, 26], "2d": [6, 7, 8, 10, 13, 14, 15, 31, 45, 47], "3": [4, 6, 7, 8, 10, 13, 14, 15, 17, 18, 20, 21, 22, 23, 25, 26, 27, 28, 29, 30, 32, 33, 37, 38, 47], "30": 45, "31": 45, "33": 45, "34": 45, "34th": 2, "35": 45, "360": 4, "37": 45, "376": 24, "37th": [39, 42], "3d": [5, 13, 14, 15, 27, 29, 30], "4": [4, 6, 7, 8, 10, 13, 14, 15, 24, 25, 26, 32, 33, 34, 37, 38, 47, 48], "40": 45, "41": [37, 45], "42": [45, 47], "43": 45, "45": 45, "46": 45, "47": [37, 45], "480": 15, "5": [4, 11, 12, 13, 14, 15, 20, 21, 25, 26, 33, 37, 45], "50": [4, 45], "51": 45, "52": 45, "53": 45, "55": 45, "56": 45, "57": 45, "58": 45, "59": 45, "6": [11, 12, 13, 14, 15, 19, 25, 26, 27, 28, 29, 30, 31, 32, 35], "60": 45, "61": 45, "62": 45, "6252759": 38, "63": 11, "64": 45, "65": 45, "67": 45, "69": 45, "7": [6, 11, 12, 13, 14, 15, 45, 47], "70": 45, "72": 45, "73": 45, "74": 45, "75": 45, "76": 45, "77": 45, "79": 45, "8": [17, 18, 25, 26, 34, 38, 42, 43, 45], "80": 45, "81": 45, "83": 45, "84": 45, "86": 45, "87": 45, "89": 45, "9": 32, "90": 45, "91": 45, "92": 45, "93": 45, "94": 45, "95": 45, "97": 45, "99": [45, 48], "A": [2, 4, 10, 16, 17, 18, 21, 27, 28, 29, 30, 31, 38, 39, 41, 42, 43, 48], "As": [4, 42, 43], "At": 47, "But": 48, "By": [27, 28, 29, 30, 38], "For": [1, 32, 42, 43, 45, 47], "If": [3, 13, 14, 15, 17, 18, 22, 23, 38, 39, 42, 46], "In": [4, 45], "It": [40, 42, 43, 45, 46], "Of": 45, "The": [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 23, 24, 25, 26, 31, 32, 33, 34, 35, 37, 38, 42, 45, 46, 47], "There": 38, "To": [1, 6, 7, 8, 42, 44], "With": 45, "_": 47, "__init__": 46, "_build": 1, "_episod": 47, "_forward_reward_weight": 45, "_modul": 48, "aama": 41, "abel": [13, 14, 15, 16], "about": [1, 5], "abov": 4, "acceler": [13, 14, 15], "accept": 45, "access": [45, 47], "account": [4, 12], "achiev": 45, "act": [42, 43, 45], "action": [5, 12, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 33, 34, 35, 36, 45, 46, 47], "action_spac": [46, 47], "acttyp": 48, "ad": [17, 18, 22, 23, 45, 46, 47], "adapt": [6, 7, 8, 10], "add": [27, 28, 29, 30, 41, 45], "add_speed_object": [27, 28, 29, 30], "addit": 5, "addition": 45, "after": [9, 45, 48], "again": 4, "agent": [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 37, 38, 45], "ai": 4, "al": [5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 36, 41], "alegr": [2, 12, 39, 41, 42], "algorithm": [40, 42], "align": 41, "all": [3, 17, 18, 37, 42, 44, 45, 47, 48], "allow": 45, "also": [3, 16, 42, 43, 45], "altern": [27, 28, 29, 30], "alwai": [6, 7, 8], "amount": [16, 38], "amp": 45, "an": [5, 10, 27, 28, 29, 30, 33, 37, 38, 42, 43, 45, 46, 48], "ana": [2, 39, 42], "andrea": 45, "andr\u00e9": 10, "angl": 32, "angular": 32, "ani": 47, "ann": [2, 39, 42], "ansi": 38, "ansimuz": [6, 7, 8], "ant": 36, "antenv": [17, 18], "api": [40, 45, 46], "appear": 2, "appli": [4, 32, 47], "approach": 4, "approxim": 41, "ar": [3, 4, 13, 14, 15, 16, 37, 38, 45, 46, 47, 48], "arm": 32, "arrai": [10, 17, 18, 19, 22, 23, 34, 42, 43, 45, 46, 47, 48], "art": [4, 6, 7, 8, 45], "artifici": 2, "asid": 3, "ask": 3, "asset": [4, 37, 45], "assist": [4, 6, 7, 8], "associ": 45, "atari_wrapp": 48, "attribut": 46, "australia": 38, "author": [2, 39, 42], "autobuild": 1, "automat": [1, 45, 46], "avail": [45, 47, 48], "averag": 48, "avoid": [10, 16], "axel": [13, 14, 15], "axelabel": 3, "axi": [22, 23], "b": 1, "back": 45, "background": [6, 7, 8, 38], "backward": 5, "balanc": 45, "barret": 12, "barreto": 10, "barrett": 37, "base": [3, 4, 13, 14, 15, 32, 36, 38, 41, 45, 48], "baselin": 45, "baselines3": 48, "batteri": 45, "bazzan": [2, 39, 42], "becaus": 4, "been": [4, 5, 45], "befor": 47, "begin": [47, 48], "behind": 5, "benchmark": [39, 41, 42], "benefit": 45, "benelearn": 2, "benelux": 2, "between": [33, 38, 42, 45, 48], "bib": 45, "binari": [10, 11, 12], "blob": [13, 14, 15], "blue": 10, "bnaic": [2, 41], "bnj6kubtg6": 42, "booktitl": [2, 39, 42], "bool": 47, "boolean": [4, 47], "both": 48, "both_norm_env": 48, "bottl": 12, "bottle_reward": 4, "bottles_carri": 4, "bottles_deliv": [4, 12], "bottles_drop": 4, "bound": [38, 45], "box": [6, 7, 8, 17, 18, 19, 22, 23, 24, 26, 31, 34, 35, 38], "brake": [13, 14, 15], "break": 45, "breakabl": 12, "breakablebottl": 4, "brisban": 38, "bro": 33, "bruno": [2, 39, 42], "budget": 41, "buffer": 47, "buffer_length": [47, 48], "bugfix": 45, "bump": 45, "button": 45, "c": [2, 39, 42], "cai": 41, "calcul": 4, "call": 45, "callabl": 47, "can": [3, 4, 9, 11, 13, 14, 15, 27, 28, 29, 30, 38, 42, 43, 44, 45, 47, 48], "cannot": 4, "capabl": 45, "capac": [13, 14, 15], "car": [5, 27, 28, 29, 30, 45], "carb": [11, 12], "care": 47, "carri": 4, "cart": [13, 14, 15], "castelletti": 38, "catch": 9, "caus": 45, "cc": 45, "cd": 1, "cell": 12, "central": 32, "certain": [42, 44], "chanc": 12, "chang": [1, 6, 7, 8, 27, 28, 29, 30, 45], "changelog": 45, "check": [42, 43, 45], "chose": 11, "ci": 45, "circl": 10, "citat": 45, "cite": 39, "class": [5, 42, 43, 47, 48], "classic": [6, 7, 8, 46], "clip": 48, "close": 46, "co": [13, 14, 15, 32], "code": [3, 6, 7, 8, 9, 10, 13, 14, 15, 38, 45, 48], "coin": [16, 33], "colab": [42, 43], "collect": [9, 10, 11, 12, 13, 14, 15, 16, 33, 37, 45], "collis": [16, 20], "com": [3, 13, 14, 15], "combin": 45, "commit": 45, "common": 48, "commun": [42, 45], "compar": 42, "complet": 48, "compliant": 42, "compon": [10, 45, 48], "compromis": 45, "comput": [32, 45, 46], "concav": [12, 45], "concave_map": [6, 7, 8], "conduct": 45, "confer": [2, 37, 38, 39, 42], "config": [13, 14, 15], "configur": [13, 14, 15], "conflict": 45, "conor": 3, "consequ": 45, "consist": 37, "constructor": 46, "consum": [13, 14, 15], "consumpt": [5, 13, 14, 15, 16], "contain": [1, 4, 10, 11, 12, 13, 14, 15, 27, 28, 29, 30, 31, 32, 40], "content": 38, "context": 45, "continu": [5, 6, 7, 8, 16, 31, 36, 38, 45], "contribut": [1, 3, 4, 45], "contributor": [3, 45], "control": [6, 7, 8, 9, 17, 18, 19, 22, 23, 24, 34, 35], "control_cost": 45, "converg": 5, "convers": 45, "convex": [6, 7, 8], "coordin": [6, 7, 8, 37, 42], "copi": 47, "corner": [13, 14, 15], "correct": 45, "correspond": [16, 27, 28, 29, 30, 38], "cost": [17, 18, 19, 22, 23, 24, 25, 26, 34, 35, 38], "cost_flood": 16, "cost_object": [17, 18, 22, 23], "crash": [25, 26], "creat": [4, 6, 7, 8, 42, 43, 45], "creation": 45, "criteria": 37, "ctrl_cost": 45, "cumul": [47, 48], "current": [4, 6, 7, 8, 38], "custom": 45, "d": [11, 12, 41], "da": [2, 39, 42], "dall": [4, 6, 7, 8], "dam": [16, 38], "danoi": [2, 39, 42], "deal": 46, "death": 16, "decis": 38, "decomposit": 41, "deep": [3, 12, 47, 48], "deep_sea_treasur": 45, "deepseatreasur": [6, 7, 8], "default": [6, 7, 8, 13, 14, 15, 27, 28, 29, 30, 38], "deficit": 38, "deficit_wat": 16, "defin": [4, 5, 16, 32, 46], "definit": [42, 43], "deliv": [4, 12], "demand": 38, "deni": [3, 9], "dens": [13, 14, 15], "depend": [42, 44], "deprec": 45, "depth": [11, 12], "deque_s": 47, "descript": [5, 12, 16, 36], "design": [45, 47], "desir": 45, "destin": 12, "detail": [5, 42, 43], "determin": 45, "determinist": 45, "develop": 42, "deviat": 48, "di": 33, "diamond": 37, "dict": 4, "dictionari": [4, 12], "did": 4, "differ": [4, 12, 45], "dim_reward": 47, "dimens": [4, 45], "dimension": [6, 7, 8, 9, 10, 11, 13, 14, 15, 17, 18, 19, 20, 22, 23, 24, 25, 26, 32, 33, 34, 35, 37], "directli": [45, 47, 48], "directori": 46, "dirhtml": 1, "disc_episode_return": 45, "discord": [3, 42], "discount": [46, 47, 48], "discret": [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 20, 21, 25, 27, 28, 29, 30, 32, 33, 36, 37, 45], "dissert": 5, "distanc": [32, 46], "distinct": 45, "distribut": 41, "do": 45, "doc": [1, 45], "document": [42, 46], "doe": [42, 44, 48], "doi": 38, "domain": 45, "done": [46, 47], "dot": 48, "down": [6, 7, 8, 10, 37], "downstream": 38, "dr": [47, 48], "drop": [4, 45], "dst": [12, 45], "dst_map": [6, 7, 8], "dtype": [45, 47], "due": [5, 38], "dynmorl": 3, "e": [1, 2, 4, 6, 7, 8, 12, 31, 39, 42, 45], "each": [4, 6, 7, 8, 11, 13, 14, 15, 27, 28, 29, 30, 31, 32, 45, 47], "eat": 12, "edit": 45, "effici": 41, "either": 9, "el": [2, 39, 42], "elaps": [47, 48], "elbow": 32, "element": [37, 45], "els": [10, 33, 37], "en": 48, "enabl": 45, "end": [9, 13, 14, 15, 47], "enemi": [4, 12, 16, 33, 37], "energi": 36, "engin": [25, 26], "entri": 41, "entry_point": 46, "env": [3, 5, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 34, 35, 36, 42, 43, 45, 46, 47, 48], "env_fn": 47, "environ": [2, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 38, 41, 42, 43, 44, 47], "episod": [9, 38, 47, 48], "epsilon": 48, "equival": 5, "error": 45, "esr": [12, 45], "essenti": [42, 46], "et": [5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 36, 41], "eval_mo": 45, "everi": [1, 11, 12, 48], "exampl": [10, 40, 42, 43, 45, 47, 48], "except": 48, "excess": 38, "execut": [16, 38], "exhaust": 45, "expand": 45, "expect": 9, "extend": 45, "extra": [5, 27, 28, 29, 30], "extract": 48, "ezpickl": 45, "f": [3, 38], "fair": 41, "fall": 45, "fals": [17, 18, 22, 23, 47], "famili": [42, 44], "fan": 41, "fantasi": [6, 7, 8], "far": 33, "farama": [3, 13, 14, 15, 42, 45], "fast": [16, 45], "fat": [11, 12], "featur": [3, 10, 45], "feder": 41, "felten": [2, 12, 39, 41, 42], "felten_toolkit_2023": [39, 42], "few": [45, 47, 48], "ffelten": 45, "file": [13, 14, 15, 45, 46], "fill": [13, 14, 15], "finger_tip_coord": 32, "fire": 12, "first": [2, 13, 14, 15, 38, 41, 45, 47], "fish": [9, 12], "fish_amount": 12, "fisherman": 9, "fishproba": 9, "fishwood": [3, 12, 45], "fit": 38, "fix": 10, "flag": [4, 17, 18, 22, 23, 33, 37, 46], "flexibl": 45, "float": [38, 46, 47, 48], "float32": [17, 18, 19, 22, 23, 24, 26, 31, 34, 35, 38, 47], "float_stat": [6, 7, 8], "flood": 38, "florian": [2, 39, 42], "folder": [1, 45], "follow": [2, 10, 13, 14, 15, 17, 18, 19, 22, 23, 27, 28, 29, 30, 34, 42, 43, 45, 46], "forgotten": 45, "fork": 1, "form": 47, "formerli": [2, 41], "forward": [5, 19, 22, 23, 24, 27, 28, 29, 30, 34, 35], "forward_penalti": 5, "forward_reward": 45, "found": 45, "foundat": [13, 14, 15, 45], "four": [3, 12, 32], "frame": 48, "frame_skip": [13, 14, 15], "frameskip": 48, "framework": 41, "from": [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 37, 38, 45, 46, 47, 48], "front": 46, "fruit": [3, 12], "fruit_tre": 45, "fuel": [5, 13, 14, 15, 16, 25, 26, 31], "fuel_consumption_penalti": 5, "full": [11, 12, 45], "function": [42, 43, 45], "g": [10, 12, 45], "gamma": [46, 47, 48], "gather": [12, 45], "gem": [4, 12, 37], "gener": [41, 45, 46], "get": 45, "gg": 42, "ghazali": [2, 39, 42], "gif": 45, "gimelfarb": [3, 10], "github": [3, 13, 14, 15, 45], "give": 5, "given": 48, "go": [1, 9, 11, 22, 23, 27, 28, 29, 30], "goal": [10, 12, 27, 28, 29, 30, 31], "goe": 46, "goir": [2, 39, 42], "gold": [4, 12, 37], "gr": [2, 39, 42], "green": 10, "grid": [6, 7, 8], "gridworld": [10, 12], "ground": 4, "group": 45, "gt": 45, "guarante": 41, "guid": [42, 43], "gupta": 5, "gym": [2, 33, 41, 42, 43, 45], "gymnasium": [2, 5, 13, 14, 15, 16, 17, 18, 19, 22, 23, 24, 25, 26, 27, 28, 29, 30, 34, 35, 40, 41, 42, 43, 44, 46, 47, 48], "ha": [4, 5, 33, 45, 47], "halfcheetah": [36, 45], "halfcheetahenv": 19, "harder": 12, "hartmann": 41, "have": [3, 4, 5, 12, 42, 45, 48], "hay": 3, "healthi": [17, 18], "heavili": 48, "height": 36, "help": 3, "here": [3, 10, 42, 43, 45], "high": [4, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 45], "higher": 45, "highwai": [16, 45], "highwayenv": 20, "highwayfastenv": 21, "home": [4, 37], "hopper": [36, 45], "hopperenv": [22, 23], "hotfix": 45, "how": [1, 13, 14, 15, 33, 40, 45], "html": 48, "http": [3, 4, 6, 7, 8, 13, 14, 15, 37, 38, 42, 48], "human": [13, 14, 15, 38], "humanoid": 36, "humanoidenv": 24, "humonoid": 36, "hung": 5, "hydroelectr": 38, "hyperparamet": 41, "i": [1, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 40, 42, 43, 45, 46, 47, 48], "ianleongudri": 45, "id": 46, "idx": 48, "igd": 46, "ijcnn": 38, "illustr": 45, "imag": [13, 14, 15, 16, 38], "image_observ": [13, 14, 15], "impact": 4, "implement": [4, 40, 46], "import": [4, 6, 7, 8, 9, 10, 11, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 42, 43, 45, 46, 47, 48], "improv": 41, "includ": [16, 42, 44], "inconsist": 45, "increment": [13, 14, 15], "incremental_frame_skip": [13, 14, 15], "index": [47, 48], "indic": [4, 10, 37, 45, 47], "inf": [4, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 32, 33, 34, 35, 38], "info": [42, 43, 45, 46, 47, 48], "info_dict": 45, "inform": [1, 13, 14, 15, 17, 18, 19, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 33, 34, 35, 39, 42], "inher": 45, "inherit": 46, "init": 45, "initi": [38, 46], "initial_st": 38, "inproceed": [2, 39, 42], "inspir": 48, "instal": 1, "instanc": [42, 43, 45], "instead": [4, 45], "int": [47, 48], "int32": 47, "integ": 46, "integr": 45, "intellig": 2, "interact": [42, 43], "intern": [37, 38], "introduc": [5, 10, 45], "invert": 46, "io": [4, 6, 7, 8, 37, 48], "issu": 45, "itch": [4, 6, 7, 8, 37], "item": [10, 12], "item1": 12, "item2": 12, "item3": 12, "iter": [38, 47], "its": 46, "ivanov": 41, "j": 45, "johnson": 45, "join": [3, 42], "joint": [32, 38], "json": [13, 14, 15], "jump": [22, 23], "kallinteri": 45, "katze2664": 45, "keep": [47, 48], "kei": [4, 47], "kill": [12, 33, 37], "klassert": 4, "known": 46, "kwarg": 46, "l": [2, 39, 42, 47, 48], "lack": 5, "lambda": 47, "land": [5, 25, 26], "lander": [5, 45], "lane": [16, 20], "larger": 4, "last": [47, 48], "leaf": [11, 12], "learn": [2, 4, 9, 10, 37, 39, 41, 42, 43, 45], "left": [4, 6, 7, 8, 10, 11, 13, 14, 15, 37], "length": [47, 48], "length_queu": 47, "leon": 37, "level": [38, 45], "leverag": 41, "librari": [2, 41, 42, 45], "life": 45, "light": 12, "like": [3, 42, 44, 48], "limezu": [4, 37], "limit": 16, "line": 46, "linear": [17, 18, 19, 22, 23, 34], "linearli": 45, "linearreward": [17, 18, 19, 22, 23, 34, 42, 43, 45], "linearwrapp": 45, "link": 45, "lint": 45, "list": [4, 45], "literatur": 45, "local": 5, "locat": [4, 12, 32], "logo": 45, "look": 48, "low": [4, 6, 7, 8, 9, 10, 11, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38], "lower": 10, "luca": [2, 39, 42], "lucasalegr": 45, "lunar": [5, 45], "lunarland": [5, 25, 26], "m": 38, "machin": 37, "made": [1, 45], "mai": 45, "main": [3, 13, 14, 15, 25, 26], "main_engine_fuel": 5, "make": [1, 4, 6, 7, 8, 9, 10, 11, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 42, 43, 45, 46, 47, 48], "mani": [13, 14, 15, 47], "map": [6, 7, 8, 10, 12, 13, 14, 15, 45], "mario": [33, 45], "mark": 45, "markov": 38, "master": 48, "match": 45, "mathieu": [3, 38], "max": 48, "max_r": 48, "max_t": 9, "maxandskipenv": 48, "maxima": 5, "maximum": 38, "maze": 10, "md": [1, 45], "mdp": [42, 43, 45], "mean": 48, "merg": [5, 27, 28, 29, 30], "merge_move_penalti": [27, 28, 29, 30], "method": [45, 46], "metric": 46, "mgoulao": 45, "mike": [3, 10], "min": 48, "min_r": 48, "mine": [13, 14, 15], "mine_config": [13, 14, 15], "minecart": [3, 16, 42, 43, 45], "miner": [11, 12], "minerium": [13, 14, 15], "minim": [13, 14, 15, 16], "minor": 45, "mirror": 12, "mirrored_map": [6, 7, 8], "miss": [41, 45], "mistak": 45, "mo": [2, 3, 5, 13, 14, 15, 16, 36, 40, 41, 42, 43, 44, 46, 47, 48], "mo_gym": [17, 18, 19, 22, 23, 34, 42, 43, 45, 47, 48], "mo_gymnasium": [4, 6, 7, 8, 9, 10, 11, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 42, 43, 45, 46, 47, 48], "mode": [13, 14, 15, 38], "model": [41, 42, 43], "modem": 41, "modifi": 45, "modul": [47, 48], "mofl": 41, "momaxandskipobserv": 45, "momdp": [42, 43, 45], "monormalizereward": 45, "more": [1, 5, 13, 14, 15, 17, 18, 19, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 33, 34, 35, 42, 43], "morecordepisodestatist": 45, "moreov": [46, 47], "morewordstatist": 47, "morl": [3, 6, 7, 8, 9, 42, 43, 45], "most": 47, "mountain": [5, 27, 28, 29, 30, 31, 45], "mountaincar": [5, 31], "mountaincarcontinu": [5, 45], "move": [4, 5, 33, 34, 37, 45, 48], "move_penalti": 5, "much": 33, "mujoco": [42, 44], "multi": [2, 5, 9, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 32, 33, 34, 35, 36, 38, 39, 41, 42, 43, 45, 46], "multibinari": 4, "multiobject": 4, "multipl": [4, 37, 40, 41, 45, 46, 47], "must": [12, 13, 14, 15, 16, 46], "my_env_dir": 46, "my_env_fil": 46, "my_env_v0": 46, "myenv": 46, "mypi": 45, "n": [2, 39, 42], "nO": 38, "narayanan": [12, 37], "ndarrai": 48, "need": 45, "neg": [10, 31, 32], "neighbour": 16, "network": 38, "neural": [38, 39, 42], "neurip": [39, 41, 42], "next": 46, "next_ob": [42, 43, 45], "ninjikin": [4, 37], "nip": 10, "node": 11, "none": [13, 14, 15, 38, 45, 48], "norm": 31, "norm_treasure_env": 48, "normal": [38, 45, 47, 48], "normalized_act": 38, "note": [4, 48], "notebook": [42, 43], "now": [2, 39, 42, 45], "np": [17, 18, 19, 22, 23, 34, 42, 43, 45], "num": 47, "num_env": 47, "number": [4, 38], "numpi": [42, 43, 45, 46], "nutri1": 12, "nutri6": 12, "nutrient": [11, 12], "ob": [5, 12, 16, 36, 42, 43, 45, 47], "object": [2, 5, 9, 12, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 32, 33, 34, 35, 36, 38, 39, 41, 42, 43, 45, 46], "observ": [12, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 33, 34, 35, 45, 46, 48], "observation_spac": 46, "observationspac": 45, "obstyp": 48, "off": 45, "oject": 46, "onc": 1, "one": [42, 44, 45, 48], "onli": [38, 48], "open": [41, 42], "opengameart": 38, "optim": [37, 41, 45], "option": [27, 28, 29, 30, 42, 43, 45, 46], "order": 45, "ore1": 16, "ore2": 16, "ores": [13, 14, 15, 16], "org": [38, 42], "orient": [13, 14, 15], "origin": [4, 17, 18, 19, 22, 23, 34, 42, 43, 45], "other": [16, 17, 18, 22, 23, 42, 43, 45, 48], "otherwis": [4, 9, 33], "our": [1, 45, 46], "out": [3, 38], "outcom": 4, "output": 47, "packag": [1, 45], "paper": 4, "pareto": [41, 46], "pareto_front": [45, 46], "parisi": 38, "part": [45, 46], "pass": [33, 45], "passag": 10, "path": [13, 14, 15], "paulina": 38, "penal": 38, "penalti": [4, 5, 6, 7, 8, 12, 27, 28, 29, 30, 31, 33], "peopl": 3, "percentag": 38, "perform": [45, 46], "person": 41, "pf": 45, "pianosi": [16, 38], "pick": 4, "pip": [1, 42, 44], "pixel": [4, 6, 7, 8, 45], "plan": [42, 43], "pleas": [39, 41, 42], "plu": 10, "point": 33, "polici": [37, 41], "porcentag": [13, 14, 15], "port": 38, "posit": [5, 6, 7, 8, 10, 13, 14, 15, 32, 33, 37], "possibl": [4, 32], "potenti": [4, 12, 45], "pp": 38, "practic": [42, 43], "pranav": 5, "pranavg23": 45, "pre": 45, "predefin": 45, "prefer": 45, "prevent": 5, "previous": 45, "priorit": 41, "prob_drop": 4, "probabl": [4, 9], "problem": [4, 6, 7, 8, 9, 38, 45], "problemat": [42, 44], "proceed": [2, 37, 39, 42], "process": [39, 42], "product": 48, "project": 3, "protein": [11, 12], "provid": [3, 9, 42, 45, 47], "pseudo": 45, "public": [2, 42], "publish": 41, "pull": [41, 46], "py": [45, 46], "pydoc": 45, "pypi": 45, "pytest": 46, "python": [42, 43, 45], "q": 38, "qld": 38, "qualiti": 45, "quantiti": [13, 14, 15], "question": 3, "r": [1, 47, 48], "r_i": 32, "randint": 45, "random": 38, "rang": [45, 47], "rather": 4, "reach": [3, 6, 7, 8, 10, 11, 12, 16, 27, 28, 29, 30, 31, 33, 45], "reacher": 36, "read": 5, "readm": 45, "readthedoc": 48, "reason": 5, "rebuild": 1, "receiv": [11, 45], "recent": 47, "recov": [17, 18, 19, 22, 23, 34], "red": 10, "refactor": [3, 13, 14, 15], "refer": 46, "registr": 46, "registri": 46, "reinforc": [2, 4, 9, 10, 39, 41, 42, 43, 45], "reinject": 48, "relax": [2, 39, 42], "releas": [16, 38], "reli": 45, "reliabl": [39, 41, 42], "remov": [27, 28, 29, 30], "remove_move_penalti": [27, 28, 29, 30], "renam": 45, "render": [13, 14, 15, 38, 46], "render_mod": [13, 14, 15, 38], "repeat": [13, 14, 15], "repositori": [39, 40, 42, 46], "repres": 45, "request": 41, "requir": 1, "research": [39, 41, 42], "reservoir": [3, 16, 45], "reset": [42, 43, 45, 46, 47, 48], "resourc": [12, 45], "respect": 47, "restelli": 38, "retriev": [13, 14, 15], "return": [9, 13, 14, 15, 37, 42, 45, 46, 48], "return_queu": 47, "revamp": 45, "revers": [4, 5, 27, 28, 29, 30], "reverse_penalti": 5, "reward": [5, 21, 42, 43, 45, 46, 47, 48], "reward_dim": 46, "reward_spac": [45, 46], "reymond": [3, 38], "rgb": [13, 14], "rgb_arrai": [13, 14, 15, 38], "right": [4, 6, 7, 8, 10, 11, 13, 14, 15, 20, 37], "right_lan": 16, "rightest": 16, "riva": 38, "rl": 45, "rnd": 45, "robert": 4, "rodriguez": 41, "roijer": 12, "room": [3, 12], "root": [11, 45], "run": [19, 24, 35, 46, 47], "runzheyang": 3, "safeti": 4, "same": [6, 7, 8], "sampl": [41, 47], "save": 45, "scalar": [17, 18, 19, 22, 23, 34, 42, 43, 45, 48], "scenario": 45, "scott": 45, "sea": [3, 12, 47, 48], "sebimarkgraf": 45, "second": [13, 14, 15], "section": 4, "see": [10, 13, 14, 15, 17, 18, 19, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 33, 34, 35, 42, 43, 45, 46], "seed": [46, 47], "select": 38, "self": [45, 46], "sens": 48, "separ": 10, "serenevillagerevamp": [4, 37], "serial": 47, "server": [3, 42], "set": [17, 18, 22, 23, 42, 45, 46], "setup": 45, "shape": [4, 6, 7, 8, 9, 10, 11, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 46, 47], "shaped_reward": 5, "should": [45, 46], "shown": 45, "side": [25, 26], "side_engine_fuel": 5, "signal": 5, "silva": [2, 39, 42], "similar": [36, 45, 47], "similarli": [5, 16], "simon": 38, "simpl": [9, 42, 43], "simpli": 3, "sin": [13, 14, 15, 32], "sinc": [46, 47, 48], "singl": [27, 28, 29, 30, 45], "size": [4, 11, 46], "skip": 48, "sky": 38, "small": [4, 45], "smooth": 48, "snippet": 45, "so": 4, "some": [3, 42, 44, 46], "soto": 41, "sourc": [6, 7, 8, 10, 12, 13, 14, 15, 31, 42], "space": [5, 12, 16, 21, 36, 45, 46], "spars": [13, 14, 15], "specif": 47, "specifi": 4, "speed": [5, 13, 14, 15, 16, 20, 27, 28, 29, 30, 45], "speed_object": 5, "spell": 45, "sphinx": 1, "split": 10, "squar": 10, "srini": 37, "stabil": 48, "stabl": [45, 48], "stable_baselines3": 48, "stai": 16, "standard": [42, 45, 48], "static": 5, "statist": 47, "stats_kei": [47, 48], "steckelmach": [3, 9], "step": [4, 5, 6, 7, 8, 9, 27, 28, 29, 30, 31, 33, 38, 41, 42, 43, 45, 46, 47], "store": 47, "str": [47, 48], "structur": 45, "sub": 47, "submarin": [6, 7, 8, 12], "submiss": 41, "subpackag": 45, "successfulli": [25, 26], "successor": [3, 10], "suit": 42, "super": 33, "supermario": [3, 16], "supermariobro": 33, "supermariobrosenv": 16, "suppli": 38, "support": 16, "swimmer": 36, "swimmerenv": 34, "system": [39, 42, 44], "t": [47, 48], "tailor": 45, "take": [4, 12], "taken": 12, "talbi": [2, 39, 42], "target": 32, "target_1": 36, "target_2": 36, "target_3": 36, "target_4": 36, "target_i": 32, "taxonomi": 41, "termin": [42, 43, 45, 46, 47], "th": 48, "than": 4, "thank": 3, "thei": [45, 47, 48], "them": [3, 4, 5, 42, 43], "therefor": 11, "thi": [1, 4, 39, 42, 43, 44, 45, 46, 47, 48], "those": 10, "thought": 45, "three": [12, 45], "threshold": 38, "thrill": 45, "through": 46, "thu": 45, "ti": 46, "time": [1, 4, 5, 6, 7, 8, 12, 13, 14, 15, 16, 27, 28, 29, 30, 31, 33, 47, 48], "time_limit": 38, "time_penalti": [4, 5, 12], "timemov": 5, "timespe": 5, "timondesch": 45, "tip": 32, "titl": [2, 39, 42], "tomekst": 45, "tool": 45, "toolkit": [39, 41, 42], "top": [27, 28, 29, 30, 31], "torqu": 32, "touch": 48, "track": [47, 48], "trade": 45, "train": 48, "transfer": [3, 10], "transform": 45, "travel": 10, "treasur": [3, 4, 12, 37, 47, 48], "tree": [3, 12, 38], "triangl": 10, "true": [4, 6, 7, 8, 13, 14, 15, 38, 47], "truncat": [38, 42, 43, 45, 46, 47], "tuto": 45, "tutori": 46, "two": [9, 13, 14, 15, 16, 32, 33, 38, 48], "txt": 1, "type": [12, 13, 14, 15, 16, 45], "u": 3, "unbreakable_bottl": 4, "unbreakablebottl": 4, "under": [40, 45], "underwat": [6, 7, 8], "unpin": 45, "until": 38, "unwrap": 45, "up": [4, 6, 7, 8, 10, 37, 38], "upper": [13, 14, 15], "upstream": 38, "us": [6, 7, 8, 13, 14, 15, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48], "usag": 45, "user": 45, "util": [9, 45], "v0": [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 20, 21, 27, 28, 29, 30, 31, 33, 36, 37, 38, 42, 43, 47, 48], "v2": [5, 45], "v3": [25, 26, 45], "v4": [17, 18, 19, 22, 23, 24, 32, 34, 35, 36, 45], "valu": [4, 6, 7, 8, 11, 12, 41], "vamplew": [5, 12], "variabl": 47, "variou": [3, 40], "vecenv": 45, "vector": [10, 11, 13, 14, 15, 27, 28, 29, 30, 31, 33, 42, 45, 46, 48], "vector_reward": [42, 43, 45], "vectorenv": 47, "vectori": 45, "vehicl": 16, "veloc": [17, 18, 24, 32, 34, 35, 36], "veri": [42, 43, 46], "version": [5, 10, 12, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 45], "via": [41, 47], "vitamin": [11, 12], "wa": [3, 4, 5, 6, 7, 8, 13, 14, 15, 41, 45], "wai": [3, 10], "walker2d": 36, "walker2denv": 35, "wall": 10, "want": 3, "warn": [16, 45], "water": [3, 11, 12, 16, 45], "we": [3, 42, 45, 46], "websit": [42, 45], "weight": [17, 18, 19, 22, 23, 34, 42, 43, 45, 48], "welfar": 41, "well": [42, 45, 46], "were": 10, "when": [4, 6, 7, 8, 9, 10, 11, 13, 14, 15, 33, 37, 45, 46], "where": [6, 7, 8, 9, 11, 27, 28, 29, 30, 31, 45], "whether": [13, 14, 15, 38], "which": [4, 5, 6, 7, 8, 9, 10, 42, 45, 46, 47, 48], "while": [4, 10, 12, 16], "within": [45, 46, 47], "wood": [9, 12], "wood_amount": 12, "woodproba": 9, "work": [42, 47], "worker": 45, "workshop": [2, 41], "world": [6, 7, 8], "would": 3, "wrap": [45, 47], "wrapped_env": 47, "wrapper": [40, 42, 43, 45], "wrt": 38, "x": [6, 7, 8, 17, 18, 22, 23, 24, 33, 34, 35, 37], "x_po": 16, "x_veloc": [36, 45], "xu": 36, "y": [6, 7, 8, 17, 18, 37], "y_veloc": 36, "yang": [6, 7, 8, 11, 12, 16], "year": [2, 39, 42], "you": [3, 39, 42, 43, 44, 45, 46], "your": [1, 39, 42], "your_ag": [42, 43, 45], "z": [22, 23], "zero": [32, 45]}, "titles": ["404 - Page Not Found", "MO-Gymnasium documentation", "<no title>", "Community", "Breakable-Bottles", "Classic Control", "Deep-Sea-Treasure", "Deep-Sea-Treasure-Concave", "Deep-Sea-Treasure-Mirrored", "Fishwood", "Four-Room", "Fruit-Tree", "Grid-World", "Minecart", "Minecart-Deterministic", "Minecart-Rgb", "Miscellaneous", "MO-Ant", "MO-Ant-2D", "MO-Halfcheetah", "MO-Highway", "MO-Highway-Fast", "MO-Hopper", "MO-Hopper-2D", "MO-Humanoid", "MO-Lunar-Lander", "MO-Lunar-Lander-Continuous", "MO-Mountaincar", "MO-Mountaincar-3D", "MO-Mountaincar-Timemove", "MO-Mountaincar-Timespeed", "MO-Mountaincarcontinuous", "MO-Reacher", "MO-Supermario", "MO-Swimmer", "MO-Walker2D", "MuJoCo", "Resource-Gathering", "Water-Reservoir", "Citation", "MORL Baselines", "List of Publications", "API", "API", "Install", "Release Notes", "Creating a custom environment", "Vector Wrappers", "Wrappers"], "titleterms": {"0": 45, "1": [45, 46], "2": [45, 46], "29": 45, "2d": [18, 23], "3": [45, 46], "3d": 28, "4": [45, 46], "404": 0, "Not": 0, "The": 0, "acknowledg": 3, "action": [4, 6, 7, 8, 9, 10, 11, 13, 14, 15, 32, 37, 38], "ant": [17, 18], "api": [42, 43], "argument": [4, 6, 7, 8, 9, 10, 13, 14, 15, 38], "baselin": 40, "becom": 45, "better": 45, "bottl": [4, 45], "breakabl": [4, 45], "bug": 45, "build": 1, "citat": 39, "cite": 42, "class": 46, "classic": 5, "commun": 3, "concav": 7, "continu": 26, "control": 5, "could": 0, "creat": 46, "credit": [4, 6, 7, 8, 9, 10, 13, 14, 15, 37, 38], "custom": 46, "deep": [6, 7, 8, 45], "descript": [4, 6, 7, 8, 9, 10, 11, 13, 14, 15, 17, 18, 19, 20, 22, 23, 24, 25, 26, 32, 33, 34, 35, 37, 38], "determinist": 14, "document": [1, 45], "environ": [45, 46], "episod": [4, 6, 7, 8, 10, 11, 13, 14, 15, 33, 37], "evalu": 45, "fast": 21, "fishwood": 9, "fix": 45, "found": 0, "four": 10, "front": 45, "fruit": [11, 45], "gather": 37, "grid": 12, "gymnasium": [1, 45], "halfcheetah": 19, "highwai": [20, 21], "hopper": [22, 23], "humanoid": 24, "improv": 45, "instal": [42, 44], "instanti": 46, "known": 45, "lander": [25, 26], "linearreward": 48, "list": 41, "lunar": [25, 26], "matur": 45, "migrat": 45, "minecart": [13, 14, 15], "mirror": [8, 45], "miscellan": 16, "mo": [1, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 45], "moclipreward": 48, "momaxandskipobserv": 48, "monormalizereward": 48, "more": 45, "morecordepisodestatist": [47, 48], "morl": 40, "mosyncvectorenv": 47, "mountaincar": [27, 28, 29, 30, 45], "mountaincarcontinu": 31, "mujoco": [36, 45], "new": [45, 46], "note": 45, "observ": [4, 6, 7, 8, 9, 10, 11, 13, 14, 15, 32, 37, 38], "page": [0, 45], "pareto": 45, "polici": 45, "public": 41, "pygam": 45, "reacher": [32, 45], "refer": 10, "regist": 46, "releas": 45, "render": 45, "request": 0, "reservoir": 38, "resourc": 37, "reward": [4, 6, 7, 8, 9, 10, 11, 13, 14, 15, 17, 18, 19, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38], "rgb": 15, "room": 10, "sea": [6, 7, 8, 45], "space": [4, 6, 7, 8, 9, 10, 11, 13, 14, 15, 17, 18, 19, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38], "start": [4, 6, 7, 8, 9, 10, 11, 13, 14, 15, 37, 38], "state": [4, 6, 7, 8, 9, 10, 11, 13, 14, 15, 37, 38], "supermario": 33, "support": 45, "swimmer": 34, "termin": [4, 6, 7, 8, 9, 10, 11, 13, 14, 15, 33, 37], "test": [45, 46], "timemov": 29, "timespe": 30, "treasur": [6, 7, 8, 45], "tree": [11, 45], "updat": 45, "v0": 45, "v1": 45, "vector": 47, "walker2d": 35, "water": 38, "webpag": 45, "world": 12, "wrapper": [47, 48], "your": 46}}) \ No newline at end of file diff --git a/tutorials/custom_env/index.html b/tutorials/custom_env/index.html index d06175b0..5e0ae121 100644 --- a/tutorials/custom_env/index.html +++ b/tutorials/custom_env/index.html @@ -11,7 +11,7 @@ - + Creating a custom environment - MO-Gymnasium Documentation @@ -215,6 +215,7 @@
  • Install
  • API
  • Wrappers
  • +
  • Vector Wrappers
  • MORL Baselines
  • Environments

    @@ -234,11 +235,14 @@
  • MO-Mountaincar
  • MO-Mountaincarcontinuous
  • MO-Lunar-Lander
  • +
  • MO-Lunar-Lander-Continuous
  • Miscellaneous @@ -327,10 +331,10 @@
    -

    Creating a custom environment#

    +

    Creating a custom environment

    This tutorials goes through the steps of creating a custom environment for MO-Gymnasium. Since MO-Gymnasium is closely tied to Gymnasium, we will refer to its documentation for some parts.

    -

    1. Create a new environment class#

    +

    1. Create a new environment class

    Create an environment class that inherits from gymnasium.Env. The class must implement the following methods:

    • __init__(self, ...) - The constructor of the class. It should initialize the environment and set the self.action_space andself.observation_space attributes as in classical Gymnasium (see Spaces. Moreover, since we are dealing with multiple objective/rewards, you should define a self.reward_space attribute that defines the shape of the vector rewards returned by the environment, as well as self.reward_dim which is an integer defining the size of the reward vector.

    • @@ -341,7 +345,7 @@

      1. Create a new environment class -

      2. Register the environment#

      +

      2. Register the environment

      Register the environment in the registry. This is done by adding the following line to the __init__.py file in your env directory:

      from gymnasium.envs.registration import register
       register(
      @@ -352,11 +356,11 @@ 

      2. Register the environment -

      3. Test the environment#

      +

      3. Test the environment

      If your environment is registered within the MO-Gymnasium repository (step 2), it should be automatically pulled for testing when you run pytest.

    -

    4. Instantiate your environment#

    +

    4. Instantiate your environment

    See our API documentation, but essentially:

    import mo_gymnasium
     env = mo_gymnasium.make('my_env_v0')
    @@ -461,8 +465,8 @@ 

    4. Instantiate your environment - + + diff --git a/wrappers/vector_wrappers/index.html b/wrappers/vector_wrappers/index.html new file mode 100644 index 00000000..d0fee156 --- /dev/null +++ b/wrappers/vector_wrappers/index.html @@ -0,0 +1,713 @@ + + + + + + + + + + + + + + + Vector Wrappers - MO-Gymnasium Documentation + + + + + + + + + +
    + +
    + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + +
    + + +
    + + + + + +
    +
    +
    + + + + + Back to top + +
    + +
    + +
    +
    + +
    +

    Vector Wrappers

    +

    Similar to the normal wrappers, MO-Gymnasium provides a few wrappers that are specifically designed to work with vectorized environments. They are all available directly from the mo_gymnasium.wrappers.vector module.

    +
    +

    MOSyncVectorEnv

    +
    +
    +class mo_gymnasium.wrappers.vector.MOSyncVectorEnv(env_fns: Iterator[callable], copy: bool = True)
    +

    Vectorized environment that serially runs multiple environments.

    +

    Example

    +
    >>> import mo_gymnasium as mo_gym
    +
    +
    +
    >>> envs = mo_gym.wrappers.vector.MOSyncVectorEnv([
    +...     lambda: mo_gym.make("deep-sea-treasure-v0") for _ in range(4)
    +... ])
    +>>> envs
    +MOSyncVectorEnv(num_envs=4)
    +>>> obs, infos = envs.reset()
    +>>> obs
    +array([[0, 0], [0, 0], [0, 0], [0, 0]], dtype=int32)
    +>>> _ = envs.action_space.seed(42)
    +>>> actions = envs.action_space.sample()
    +>>> obs, rewards, terminateds, truncateds, infos = envs.step([0, 1, 2, 3])
    +>>> obs
    +array([[0, 0], [1, 0], [0, 0], [0, 3]], dtype=int32)
    +>>> rewards
    +array([[0., -1.], [0.7, -1.], [0., -1.], [0., -1.]], dtype=float32)
    +>>> terminateds
    +array([False,  True, False, False])
    +
    +
    +
    + +
    +
    +

    MORecordEpisodeStatistics

    +
    +
    +class mo_gymnasium.wrappers.vector.MORecordEpisodeStatistics(env: VectorEnv, gamma: float = 1.0, buffer_length: int = 100, stats_key: str = 'episode')
    +

    This wrapper will keep track of cumulative rewards and episode lengths.

    +

    At the end of any episode within the vectorized env, the statistics of the episode +will be added to info using the key episode, and the _episode key +is used to indicate the environment index which has a terminated or truncated episode.

    +
    +

    For a vectorized environments the output will be in the form of (be careful to first wrap the env into vector before applying MORewordStatistics):

    +
    >>> infos = { 
    +...     "episode": {
    +...         "r": "<array of cumulative reward for each done sub-environment (2d array, shape (num_envs, dim_reward))>",
    +...         "dr": "<array of discounted reward for each done sub-environment (2d array, shape (num_envs, dim_reward))>",
    +...         "l": "<array of episode length for each done sub-environment (array)>",
    +...         "t": "<array of elapsed time since beginning of episode for each done sub-environment (array)>"
    +...     },
    +...     "_episode": "<boolean array of length num-envs>"
    +... }
    +
    +
    +
    +

    Moreover, the most recent rewards and episode lengths are stored in buffers that can be accessed via +wrapped_env.return_queue and wrapped_env.length_queue respectively.

    +
    +
    Variables:
    +
      +
    • return_queue – The cumulative rewards of the last deque_size-many episodes

    • +
    • length_queue – The lengths of the last deque_size-many episodes

    • +
    +
    +
    +
    + +
    +
    + +
    +
    + +
    + +
    +
    +
    + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/wrappers/wrappers/index.html b/wrappers/wrappers/index.html index a73dbd8c..22a8655f 100644 --- a/wrappers/wrappers/index.html +++ b/wrappers/wrappers/index.html @@ -8,10 +8,10 @@ - + - + Wrappers - MO-Gymnasium Documentation @@ -215,6 +215,7 @@
  • Install
  • API
  • Wrappers
  • +
  • Vector Wrappers
  • MORL Baselines
  • Environments

    @@ -234,11 +235,14 @@
  • MO-Mountaincar
  • MO-Mountaincarcontinuous
  • MO-Lunar-Lander
  • +
  • MO-Lunar-Lander-Continuous
  • Miscellaneous @@ -326,77 +330,73 @@
    -

    Wrappers#

    -

    A few wrappers inspired from Gymnasium’s wrappers are available in MO-Gymnasium. They are all available directly from the mo_gymnasium module.

    +

    Wrappers

    +

    A few wrappers inspired from Gymnasium’s wrappers are available in MO-Gymnasium. They are all available directly from the mo_gymnasium.wrappers module.

    -

    LinearReward#

    +

    LinearReward

    -
    -class mo_gymnasium.LinearReward(env: Env, weight: ndarray | None = None)#
    +
    +class mo_gymnasium.wrappers.LinearReward(env: Env, weight: ndarray | None = None)

    Makes the env return a scalar reward, which is the dot-product between the reward vector and the weight vector.

    -

    MONormalizeReward#

    +

    MONormalizeReward

    -
    -class mo_gymnasium.MONormalizeReward(env: Env, idx: int, gamma: float = 0.99, epsilon: float = 1e-08)#
    +
    +class mo_gymnasium.wrappers.MONormalizeReward(env: Env, idx: int, gamma: float = 0.99, epsilon: float = 1e-08)

    Wrapper to normalize the reward component at index idx. Does not touch other reward components.

    +

    This code is heavily inspired on Gymnasium’s except that it extracts the reward component at given idx, normalizes it, and reinjects it.

    +

    (!) This smoothes the moving average of the reward, which can be useful for training stability. But it does not “normalize” the reward in the sense of making it have a mean of 0 and a standard deviation of 1.

    +

    Example

    +
    >>> import mo_gymnasium as mo_gym
    +>>> from mo_gymnasium.wrappers import MONormalizeReward
    +>>> env = mo_gym.make("deep-sea-treasure-v0")
    +>>> norm_treasure_env = MONormalizeReward(env, idx=0)
    +>>> both_norm_env = MONormalizeReward(norm_treasure_env, idx=1)
    +>>> both_norm_env.reset() # This one normalizes both rewards
    +
    +
    -

    MOClipReward#

    +

    MOClipReward

    -
    -class mo_gymnasium.MOClipReward(env: Env, idx: int, min_r, max_r)#
    +
    +class mo_gymnasium.wrappers.MOClipReward(env: Env, idx: int, min_r, max_r)

    Clip reward[idx] to [min, max].

    -
    -
    -

    MOSyncVectorEnv#

    -
    -
    -class mo_gymnasium.MOSyncVectorEnv(env_fns: Iterator[callable], copy: bool = True)#
    -

    Vectorized environment that serially runs multiple environments.

    -
    -
    -

    MORecordEpisodeStatistics#

    +

    MORecordEpisodeStatistics

    -
    -class mo_gymnasium.MORecordEpisodeStatistics(env: Env, gamma: float = 1.0, deque_size: int = 100)#
    +
    +class mo_gymnasium.wrappers.MORecordEpisodeStatistics(env: Env, gamma: float = 1.0, buffer_length: int = 100, stats_key: str = 'episode')

    This wrapper will keep track of cumulative rewards and episode lengths.

    After the completion of an episode, info will look like this:

    >>> info = {
     ...     "episode": {
     ...         "r": "<cumulative reward (array)>",
     ...         "dr": "<discounted reward (array)>",
    -...         "l": "<episode length (scalar)>", # contrary to Gymnasium, these are not a numpy array
    +...         "l": "<episode length (scalar)>",
     ...         "t": "<elapsed time since beginning of episode (scalar)>"
     ...     },
     ... }
     
    -

    For a vectorized environments the output will be in the form of (be careful to first wrap the env into vector before applying MORewordStatistics):

    -
    >>> infos = {
    -...     "final_observation": "<array of length num-envs>",
    -...     "_final_observation": "<boolean array of length num-envs>",
    -...     "final_info": "<array of length num-envs>",
    -...     "_final_info": "<boolean array of length num-envs>",
    -...     "episode": {
    -...         "r": "<array of cumulative reward (2d array, shape (num_envs, dim_reward))>",
    -...         "dr": "<array of discounted reward (2d array, shape (num_envs, dim_reward))>",
    -...         "l": "<array of episode length (array)>",
    -...         "t": "<array of elapsed time since beginning of episode (array)>"
    -...     },
    -...     "_episode": "<boolean array of length num-envs>"
    -... }
    -
    -
    +
    + +
    +
    +

    MOMaxAndSkipObservation

    +
    +
    +class mo_gymnasium.wrappers.MOMaxAndSkipObservation(env: Env[ObsType, ActType], skip: int = 4)
    +

    This wrapper will return only every skip-th frame (frameskipping) and return the max between the two last observations.

    +

    Note: This wrapper is based on the wrapper from stable-baselines3: https://stable-baselines3.readthedocs.io/en/master/_modules/stable_baselines3/common/atari_wrappers.html#MaxAndSkipEnv

    @@ -407,12 +407,12 @@

    MORecordEpisode