diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst index 24a7501c9..17a0f11a0 100644 --- a/RELEASE_NOTES.rst +++ b/RELEASE_NOTES.rst @@ -12,6 +12,7 @@ All changes - :mod:`message_ix` is tested and compatible with `Python 3.13 `__ (:pull:`881`). - Support for Python 3.8 is dropped (:pull:`881`), as it has reached end-of-life. +- Add functionality to create Sankey diagrams from :class:`.Reporter` together with a new tutorial showcase (:pull:`770`). - Add option to :func:`.util.copy_model` from a non-default location of model files (:pull:`877`). .. _v3.9.0: diff --git a/doc/api.rst b/doc/api.rst index dfe05ade0..e17e468a6 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -300,6 +300,8 @@ Utility methods .. automodule:: message_ix.util :members: expand_dims, copy_model, make_df +.. automodule:: message_ix.util.sankey + :members: map_for_sankey Testing utilities ----------------- diff --git a/doc/reporting.rst b/doc/reporting.rst index b6ea7516f..540f20952 100644 --- a/doc/reporting.rst +++ b/doc/reporting.rst @@ -215,6 +215,7 @@ These automatic contents are prepared using: .. autosummary:: add add_queue + add_sankey add_single apply check_keys diff --git a/message_ix/report/__init__.py b/message_ix/report/__init__.py index 5e17d7744..b453728e8 100644 --- a/message_ix/report/__init__.py +++ b/message_ix/report/__init__.py @@ -243,3 +243,24 @@ def add_tasks(self, fail_action: Union[int, str] = "raise") -> None: # Use a queue pattern via Reporter.add_queue() self.add_queue(get_tasks(), fail=fail_action) + + def add_sankey(self, fail_action: Union[int, str] = "raise") -> None: + """Add the calculations required to produce Sankey plots. + + Parameters + ---------- + fail_action : "raise" or int + :mod:`logging` level or level name, passed to the `fail` argument of + :meth:`.Reporter.add_queue`. + """ + # NOTE This includes just one task for the base version, but could later be + # expanded. + self.add_queue( + [ + ( + ("message::sankey", "concat", "out::pyam", "in::pyam"), + dict(strict=True), + ) + ], + fail=fail_action, + ) diff --git a/message_ix/tests/test_report.py b/message_ix/tests/test_report.py index 9523a23fe..ca00d36a5 100644 --- a/message_ix/tests/test_report.py +++ b/message_ix/tests/test_report.py @@ -272,3 +272,20 @@ def add_tm(df, name="Activity"): # Results have the expected units assert all(df5["unit"] == "centiUSD / case") assert_series_equal(df4["value"], df5["value"] / 100.0) + + +def test_reporter_add_sankey(test_mp, request): + scen = make_westeros( + test_mp, emissions=True, solve=True, quiet=True, request=request + ) + + # Reporter.from_scenario can handle Westeros example model + rep = Reporter.from_scenario(scen) + + # Westeros-specific configuration: '-' is a reserved character in pint + configure(units={"replace": {"-": ""}}) + + # Add Sankey calculation(s) + rep.add_sankey() + + assert rep.check_keys("message::sankey") diff --git a/message_ix/tests/test_tutorials.py b/message_ix/tests/test_tutorials.py index 3e92de825..ffb02cb1b 100644 --- a/message_ix/tests/test_tutorials.py +++ b/message_ix/tests/test_tutorials.py @@ -86,6 +86,7 @@ def _t(group: Union[str, None], basename: str, *, check=None, marks=None): _t("w0", f"{W}_addon_technologies"), _t("w0", f"{W}_historical_new_capacity"), _t("w0", f"{W}_multinode_energy_trade"), + _t("w0", f"{W}_sankey"), # NB this is the same value as in test_reporter() _t(None, f"{W}_report", check=[("len-rep-graph", 13724)]), _t("at0", "austria", check=[("solve-objective-value", 206321.90625)]), diff --git a/message_ix/tests/test_util.py b/message_ix/tests/test_util.py index bb29e6dc5..61d0238af 100644 --- a/message_ix/tests/test_util.py +++ b/message_ix/tests/test_util.py @@ -4,7 +4,9 @@ import pytest from message_ix import Scenario, make_df +from message_ix.report import Reporter from message_ix.testing import make_dantzig, make_westeros +from message_ix.util.sankey import map_for_sankey def test_make_df(): @@ -59,3 +61,46 @@ def test_testing_make_scenario(test_mp, request): # Westeros model can be created scen = make_westeros(test_mp, solve=True, request=request) assert isinstance(scen, Scenario) + + +def test_map_for_sankey(test_mp, request): + # NB: we actually only need a pyam.IamDataFrame that has the same form as the result + # of these setup steps, so maybe this can be simplified + scen = make_westeros(test_mp, solve=True, request=request) + rep = Reporter.from_scenario(scen) + rep.configure(units={"replace": {"-": ""}}) + rep.add_sankey() + df = rep.get("message::sankey") + + # Set expectations + expected_all = { + "in|final|electricity|bulb|standard": ("final|electricity", "bulb|standard"), + "in|secondary|electricity|grid|standard": ( + "secondary|electricity", + "grid|standard", + ), + "out|final|electricity|grid|standard": ("grid|standard", "final|electricity"), + "out|secondary|electricity|coal_ppl|standard": ( + "coal_ppl|standard", + "secondary|electricity", + ), + "out|secondary|electricity|wind_ppl|standard": ( + "wind_ppl|standard", + "secondary|electricity", + ), + "out|useful|light|bulb|standard": ("bulb|standard", "useful|light"), + } + expected_without_final_electricity = { + key: value + for (key, value) in expected_all.items() + if "final|electricity" not in value + } + + # Load all variables + mapping_all = map_for_sankey(df, year=700, region="Westeros") + assert mapping_all == expected_all + + mapping_without_final_electricity = map_for_sankey( + df, year=700, region="Westeros", exclude=["final|electricity"] + ) + assert mapping_without_final_electricity == expected_without_final_electricity diff --git a/message_ix/util/sankey.py b/message_ix/util/sankey.py new file mode 100644 index 000000000..77730dc41 --- /dev/null +++ b/message_ix/util/sankey.py @@ -0,0 +1,75 @@ +from typing import Any, Dict, List, Optional, Tuple, Union + +from pyam import IamDataFrame + +try: + from pyam.str import get_variable_components +except ImportError: # Python < 3.10, pandas < 2.0 + from pyam.utils import get_variable_components + +try: + from typing import LiteralString +except ImportError: # Python < 3.11 + from typing_extensions import LiteralString + + +def map_for_sankey( + iam_df: IamDataFrame, + year: int, + region: str, + exclude: List[Optional[str]] = [], +) -> Dict[str, Tuple[Union[List, Any, LiteralString], Union[List, Any, LiteralString]]]: + """Maps input to output flows to enable Sankey plots. + + Parameters + ---------- + iam_df: :class:`pyam.IamDataframe` + The IAMC-format DataFrame holding the data to plot as Sankey diagrams. + year: int + The year to display in the Sankey diagram. + region: str + The region to display in the Sankey diagram. + exclude: list[str], optional + If provided, exclude these keys from the Sankey diagram. Defaults to an empty + list, i.e. showing all flows. + + Returns + ------- + mapping: dict + A mapping from variable names to their inputs and outputs. + """ + return { + var: get_source_and_target(var) + for var in iam_df.filter(region=region + "*", year=year).variable + if not exclude_flow(get_source_and_target(var), exclude) + } + + +def get_source_and_target( + variable: str, +) -> Tuple[Union[List, Any, LiteralString], Union[List, Any, LiteralString]]: + """Get source and target for the `variable` flow.""" + start_idx, end_idx = set_start_and_end_index(variable) + return ( + get_variable_components(variable, start_idx, join=True), + get_variable_components(variable, end_idx, join=True), + ) + + +def set_start_and_end_index(variable: str) -> Tuple[List[int], List[int]]: + """Get indices of source and target in variable name.""" + return ( + ([1, 2], [3, 4]) + if get_variable_components(variable, 0) == "in" + else ([3, 4], [1, 2]) + ) + + +def exclude_flow( + flow: Tuple[Union[List, Any, LiteralString], Union[List, Any, LiteralString]], + exclude: List[Optional[str]], +) -> bool: + """Exclude sources or targets of variable flow if requested.""" + if flow[0] in exclude or flow[1] in exclude: + return True + return False diff --git a/pyproject.toml b/pyproject.toml index 7f47e878e..08e2c928e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,7 +55,7 @@ docs = [ "sphinx_rtd_theme", "sphinxcontrib-bibtex", ] -tutorial = ["jupyter", "matplotlib", "message_ix[report]"] +tutorial = ["jupyter", "matplotlib", "message_ix[report]", "plotly"] report = ["ixmp[report]"] tests = [ "asyncssh", @@ -96,7 +96,7 @@ local_partial_types = true [[tool.mypy.overrides]] # Packages/modules for which no type hints are available. module = [ - "pyam", + "pyam.*", "scipy.*", # Indirectly via ixmp; this should be a subset of the list in ixmp's pyproject.toml "jpype", diff --git a/tutorial/README.rst b/tutorial/README.rst index 5ea76bdf0..cb73d8615 100644 --- a/tutorial/README.rst +++ b/tutorial/README.rst @@ -164,6 +164,10 @@ framework, such as used in global research applications of |MESSAGEix|. module to ‘report’ results, e.g. do post-processing, plotting, and other calculations (:tut:`westeros/westeros_report.ipynb`). + #. After familiarizing yourself with ‘reporting’, learn how to quickly assess + variable flows by plotting Sankey diagrams + (:tut:`westeros/westeros_sankey.ipynb`). + #. Build the baseline scenario using data stored in Excel files to populate sets and parameters: diff --git a/tutorial/westeros/westeros_sankey.ipynb b/tutorial/westeros/westeros_sankey.ipynb new file mode 100644 index 000000000..329fa813f --- /dev/null +++ b/tutorial/westeros/westeros_sankey.ipynb @@ -0,0 +1,258 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Westeros Tutorial - Introducing Sankey diagrams\n", + "\n", + "Sankey diagrams are a useful technique to visualize energy flow accounts.\n", + "\n", + "This tutorial introduces the sankey feature provided by the ``pyam`` packages.\n", + "\n", + "\n", + "**Pre-requisites**\n", + "- You have the *MESSAGEix* framework installed and working\n", + " In particular, you should have installed ``message_ix``, ``pyam``, and ``plotly``.\n", + "- Complete tutorial Part 1 (``westeros_baseline.ipynb``) and Introducing Reporting (``westeros_report.ipynb``)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We start as usual by connecting to a database and loading a scenario. Note that we do not `clone()` the scenario here because we do not intend to make any changes to it. " + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "import ixmp\n", + "\n", + "from message_ix import Scenario\n", + "\n", + "mp = ixmp.Platform()\n", + "scenario = Scenario(mp, model=\"Westeros Electrified\", scenario=\"baseline\")\n", + "\n", + "# Ensure the scenario has a solution\n", + "if not scenario.has_solution():\n", + " scenario.solve(quiet=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, we create the `Reporter` object. Since ``\"-\"`` is a reserved character in the unit-handling [pint](https://github.com/hgrecco/pint), we need to replace it by ``\"\"``." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "from message_ix.report import Reporter\n", + "\n", + "rep = Reporter.from_scenario(scenario)\n", + "\n", + "rep.configure(units={\"replace\": {\"-\": \"\"}})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This `Reporter` already includes everything we need to construct the `pyam.IamDataFrame` required for plotting Sankey diagrams! In other words, it includes the input and output flows in the IAMC format (`in::pyam` and `out::pyam`, respectively). We can start the calculation manually:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from genno.operator import concat\n", + "\n", + "pyam_out = rep.get(\"out::pyam\")\n", + "pyam_in = rep.get(\"in::pyam\")\n", + "\n", + "concat(pyam_out, pyam_in)\n", + "\n", + "# Please note: if you don't use the convenience function below, you need to store the\n", + "# result of concat(pyam_out, pyam_in) as df here!" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Or we can use a built-in convenience function. This will also add the calculation to the `Reporter`, so the same calculation would not need to be repeated if it's used anywhere else, saving us time and memory." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "rep.add_sankey()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The resulting `pyam.IamDataFrame` is accessible through the key `message::sankey`:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "df = rep.get(\"message::sankey\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, we can use the utility function `map_for_sankey(iam_df, year, region, exclude=[])` to create the mapping required for the `figures.sankey()` function of the `pyam` package. Each Sankey diagram will depict one year and region, which we have to provide to the function. In some models it might be necessary to exclude variables and flows to get meaningful Sankey diagrams; for this, you can use `exclude` as detailed below. But let´s try with all!" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "from message_ix.util.sankey import map_for_sankey\n", + "\n", + "mapping = map_for_sankey(df, year=700, region=\"Westeros\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The pyam function `pyam.figures.sankey()`returns a `plotly` figure object of our desired Sankey diagram that can be further modified. However, it can currently only handle data for single years, so we need to ensure that the input data we provide is filtered for the same year we filtered for above. \n", + "\n", + "Finally, we can plot it as an interactive diagram!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pyam.figures import sankey\n", + "\n", + "fig = sankey(df=df.filter(year=700), mapping=mapping)\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "With hundreds of variables, you can imagine this diagram getting crowded! We can use the `exclude` parameter of `map_for_sankey()` to exclude variables we are not interested in:" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "mapping_without_wind_ppl_standard = map_for_sankey(\n", + " df,\n", + " year=700,\n", + " region=\"Westeros\",\n", + " exclude=[\"wind_ppl|standard\"],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Then, we can display the figure as before:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig = sankey(df=df.filter(year=700), mapping=mapping_without_wind_ppl_standard)\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can pick any variable for this, even if it's in the middle of another flow! And for this scenario, you can pick other years, too:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mapping_without_final_electricity = map_for_sankey(\n", + " df, year=720, region=\"Westeros\", exclude=[\"final|electricity\"]\n", + ")\n", + "fig = sankey(df=df.filter(year=720), mapping=mapping_without_final_electricity)\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And lastly, as always, please do not forget to close the database ;-) " + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "mp.close_db()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}