diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst index 24a7501c9..caa5774b6 100644 --- a/RELEASE_NOTES.rst +++ b/RELEASE_NOTES.rst @@ -12,6 +12,8 @@ All changes - :mod:`message_ix` is tested and compatible with `Python 3.13 `__ (:pull:`881`). - Support for Python 3.8 is dropped (:pull:`881`), as it has reached end-of-life. +- Add :meth:`.Reporter.add_sankey` and :mod:`.tools.sankey` to create Sankey diagrams from solved scenarios (:pull:`770`). + The :file:`westeros_sankey.ipynb` :ref:`tutorial ` shows how to use this feature. - Add option to :func:`.util.copy_model` from a non-default location of model files (:pull:`877`). .. _v3.9.0: diff --git a/doc/api.rst b/doc/api.rst index dfe05ade0..e17e468a6 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -300,6 +300,8 @@ Utility methods .. automodule:: message_ix.util :members: expand_dims, copy_model, make_df +.. automodule:: message_ix.util.sankey + :members: map_for_sankey Testing utilities ----------------- diff --git a/doc/conf.py b/doc/conf.py index ebf9bfb2f..57a466553 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -185,6 +185,7 @@ def local_inv(name: str, *parts: str) -> Optional[str]: "message_doc": ("https://docs.messageix.org/projects/global/en/latest/", None), "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None), "pint": ("https://pint.readthedocs.io/en/stable/", None), + "plotly": ("https://plotly.com/python-api-reference", None), "plotnine": ("https://plotnine.org", None), "pyam": ("https://pyam-iamc.readthedocs.io/en/stable/", None), "python": ("https://docs.python.org/3/", None), diff --git a/doc/reporting.rst b/doc/reporting.rst index b6ea7516f..540f20952 100644 --- a/doc/reporting.rst +++ b/doc/reporting.rst @@ -215,6 +215,7 @@ These automatic contents are prepared using: .. autosummary:: add add_queue + add_sankey add_single apply check_keys diff --git a/doc/tools/sankey.rst b/doc/tools/sankey.rst new file mode 100644 index 000000000..1b67c5ae3 --- /dev/null +++ b/doc/tools/sankey.rst @@ -0,0 +1,12 @@ +.. currentmodule:: message_ix.tools.sankey + +:mod:`.sankey`: generate Sankey diagrams +**************************************** + +See :meth:`.Reporter.add_sankey` and the :file:`westeros_sankey.ipynb` :ref:`tutorial `. + +API reference +============= + +.. automodule:: message_ix.tools.sankey + :members: diff --git a/message_ix/report/__init__.py b/message_ix/report/__init__.py index 5e17d7744..953b41cb3 100644 --- a/message_ix/report/__init__.py +++ b/message_ix/report/__init__.py @@ -229,6 +229,62 @@ def from_scenario(cls, scenario, **kwargs) -> "Reporter": return rep + def add_sankey( + self, + year: int, + node: str, + exclude: list[str] = [], + ) -> str: + """Add the tasks required to produce a Sankey diagram. + + See :func:`.map_for_sankey` for the meaning of the `node`, and `exclude` + parameters. + + Parameters + ---------- + year : int + The period (year) to be plotted. + + Returns + ------- + str + A key like :py:`"sankey figure a1b2c"`, where the last part is a unique hash + of the arguments `year`, `node`, and `exclude`. Calling + :meth:`.Reporter.get` with this key triggers generation of a + :class:`plotly.Figure ` with the Sankey + diagram. + + See also + -------- + map_for_sankey + pyam.figures.sankey + """ + from warnings import filterwarnings + + from genno import KeySeq + from genno.caching import hash_args + from pyam import IamDataFrame + from pyam.figures import sankey + + from message_ix.tools.sankey import map_for_sankey + + # Silence a warning raised by pyam-iamc 3.0.0 with pandas 2.2.3 + filterwarnings("ignore", "Downcasting behavior", FutureWarning, "pyam.figures") + + # Sequence of similar Keys for individual operations; use a unique hash of the + # arguments to avoid conflicts between multiple calls + unique = hash_args(year, node, exclude)[:6] + k = KeySeq(f"message sankey {unique}") + + # Concatenate 'out' and 'in' data + self.add(k[0], "concat", "out::pyam", "in::pyam", strict=True) + # `df` argument to pyam.figures.sankey() + self.add(k[1], partial(IamDataFrame.filter, year=year), k[0]) + # `mapping` argument to pyam.figures.sankey() + self.add(k[2], map_for_sankey, k[1], node=node, exclude=exclude) + # Generate the plotly.Figure object; return the key + return str(self.add(f"sankey figure {unique}", sankey, k[1], k[2])) + def add_tasks(self, fail_action: Union[int, str] = "raise") -> None: """Add the pre-defined MESSAGEix reporting tasks to the Reporter. diff --git a/message_ix/tests/test_report.py b/message_ix/tests/test_report.py index 53f8f7d94..dc95c5852 100644 --- a/message_ix/tests/test_report.py +++ b/message_ix/tests/test_report.py @@ -20,6 +20,20 @@ from message_ix.testing import SCENARIO, make_dantzig, make_westeros +class TestReporter: + def test_add_sankey(self, test_mp, request) -> None: + scen = make_westeros(test_mp, solve=True, quiet=True, request=request) + rep = Reporter.from_scenario(scen, units={"replace": {"-": ""}}) + + # Method runs + key = rep.add_sankey(year=700, node="Westeros") + + # Returns an existing key of the expected form + assert key.startswith("sankey figure ") + + assert rep.check_keys(key) + + def test_reporter_no_solution(caplog, message_test_mp): scen = Scenario(message_test_mp, **SCENARIO["dantzig"]) diff --git a/message_ix/tests/test_tutorials.py b/message_ix/tests/test_tutorials.py index 3e92de825..ffb02cb1b 100644 --- a/message_ix/tests/test_tutorials.py +++ b/message_ix/tests/test_tutorials.py @@ -86,6 +86,7 @@ def _t(group: Union[str, None], basename: str, *, check=None, marks=None): _t("w0", f"{W}_addon_technologies"), _t("w0", f"{W}_historical_new_capacity"), _t("w0", f"{W}_multinode_energy_trade"), + _t("w0", f"{W}_sankey"), # NB this is the same value as in test_reporter() _t(None, f"{W}_report", check=[("len-rep-graph", 13724)]), _t("at0", "austria", check=[("solve-objective-value", 206321.90625)]), diff --git a/message_ix/tests/tools/test_sankey.py b/message_ix/tests/tools/test_sankey.py new file mode 100644 index 000000000..fa2bfa43e --- /dev/null +++ b/message_ix/tests/tools/test_sankey.py @@ -0,0 +1,50 @@ +from typing import TYPE_CHECKING, cast + +from ixmp.testing import assert_logs + +from message_ix.report import Reporter +from message_ix.testing import make_westeros +from message_ix.tools.sankey import map_for_sankey + +if TYPE_CHECKING: + import pyam + + +def test_map_for_sankey(caplog, test_mp, request) -> None: + from genno.operator import concat + + scen = make_westeros(test_mp, solve=True, request=request) + rep = Reporter.from_scenario(scen, units={"replace": {"-": ""}}) + df = cast( + "pyam.IamDataFrame", concat(rep.get("in::pyam"), rep.get("out::pyam")) + ).filter(year=700) + + # Set expectations + expected_all = { + "in|final|electricity|bulb|standard": ("final|electricity", "bulb|standard"), + "in|secondary|electricity|grid|standard": ( + "secondary|electricity", + "grid|standard", + ), + "out|final|electricity|grid|standard": ("grid|standard", "final|electricity"), + "out|secondary|electricity|coal_ppl|standard": ( + "coal_ppl|standard", + "secondary|electricity", + ), + "out|secondary|electricity|wind_ppl|standard": ( + "wind_ppl|standard", + "secondary|electricity", + ), + "out|useful|light|bulb|standard": ("bulb|standard", "useful|light"), + } + + # Load all variables + assert expected_all == map_for_sankey(df, node="Westeros") + + x = "final|electricity" + assert {k: v for (k, v) in expected_all.items() if x not in v} == map_for_sankey( + df, node="Westeros", exclude=[x] + ) + + with assert_logs(caplog, "No mapping entries generated"): + map_for_sankey(df, node="not_a_node") diff --git a/message_ix/tools/sankey.py b/message_ix/tools/sankey.py new file mode 100644 index 000000000..f655e1688 --- /dev/null +++ b/message_ix/tools/sankey.py @@ -0,0 +1,70 @@ +import logging +from typing import TYPE_CHECKING + +try: + from pyam.str import get_variable_components +except ImportError: # Python < 3.10 → pyam-iamc < 3 + from pyam.utils import get_variable_components + + +if TYPE_CHECKING: + import pyam + +log = logging.getLogger(__name__) + + +def exclude_flow(flow: tuple[str, str], exclude: list[str]) -> bool: + """Return :any:`True` if either the source or target of `flow` is in `exclude`.""" + return flow[0] in exclude or flow[1] in exclude + + +def get_source_and_target(variable: str) -> tuple[str, str]: + """Get source and target for the `variable` flow.""" + start_idx, end_idx = get_start_and_end_index(variable) + return ( + get_variable_components(variable, start_idx, join=True), + get_variable_components(variable, end_idx, join=True), + ) + + +def get_start_and_end_index(variable: str) -> tuple[list[int], list[int]]: + """Get indices of source and target in variable name.""" + return ( + ([1, 2], [3, 4]) + if get_variable_components(variable, 0) == "in" + else ([3, 4], [1, 2]) + ) + + +def map_for_sankey( + iam_df: "pyam.IamDataFrame", node: str, exclude: list[str] = [] +) -> dict[str, tuple[str, str]]: + """Maps input to output flows to enable Sankey diagram. + + Parameters + ---------- + iam_df : :class:`pyam.IamDataframe` + Data to plot as Sankey diagram. + node : str + The node (MESSAGEix) or region (pyam) to plot. + exclude : list[str], optional + Flows to omit from the diagram. By default, nothing is excluded. + + Returns + ------- + dict + mapping from variable names to 2-tuples of their (inputs, output) flows. + """ + result = { + var: get_source_and_target(var) + for var in iam_df.filter(region=node + "*").variable + if not exclude_flow(get_source_and_target(var), exclude) + } + + if not result: + log.warning( + f"No mapping entries generated for {node=}, {exclude=} and data:\n" + + repr(iam_df) + ) + + return result diff --git a/pyproject.toml b/pyproject.toml index 7f47e878e..614412bde 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,8 +55,9 @@ docs = [ "sphinx_rtd_theme", "sphinxcontrib-bibtex", ] -tutorial = ["jupyter", "matplotlib", "message_ix[report]"] +tutorial = ["jupyter", "matplotlib", "message_ix[report,sankey]"] report = ["ixmp[report]"] +sankey = ["plotly"] tests = [ "asyncssh", "message_ix[docs,tutorial]", @@ -96,7 +97,7 @@ local_partial_types = true [[tool.mypy.overrides]] # Packages/modules for which no type hints are available. module = [ - "pyam", + "pyam.*", "scipy.*", # Indirectly via ixmp; this should be a subset of the list in ixmp's pyproject.toml "jpype", diff --git a/tutorial/README.rst b/tutorial/README.rst index 5ea76bdf0..414bedb85 100644 --- a/tutorial/README.rst +++ b/tutorial/README.rst @@ -83,6 +83,8 @@ From the command line $ jupyter notebook +.. _tutorial-westeros: + Westeros Electrified ==================== @@ -164,6 +166,10 @@ framework, such as used in global research applications of |MESSAGEix|. module to ‘report’ results, e.g. do post-processing, plotting, and other calculations (:tut:`westeros/westeros_report.ipynb`). + #. After familiarizing yourself with ‘reporting’, learn how to quickly assess + variable flows by plotting Sankey diagrams + (:tut:`westeros/westeros_sankey.ipynb`). + #. Build the baseline scenario using data stored in Excel files to populate sets and parameters: diff --git a/tutorial/westeros/westeros_sankey.ipynb b/tutorial/westeros/westeros_sankey.ipynb new file mode 100644 index 000000000..ba8fcfb75 --- /dev/null +++ b/tutorial/westeros/westeros_sankey.ipynb @@ -0,0 +1,353 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Westeros Tutorial - Introducing Sankey diagrams\n", + "\n", + "Sankey diagrams are a useful technique to visualize energy flow accounts.\n", + "This tutorial demonstrates how to produce Sankey diagrams from the solution of a MESSAGEix Scenario object, using features provided by [`plotly`](https://plotly.com/python/) via [`pyam-iamc`](https://pyam-iamc.readthedocs.io).\n", + "\n", + "\n", + "**Pre-requisites**\n", + "- You have the *MESSAGEix* framework installed and working.\n", + " In particular, you should have installed `message_ix[report,sankey]`, which installs the dependencies `pyam` and `plotly`.\n", + "- Complete tutorials Part 1 (`westeros_baseline.ipynb`) and “Introducing Reporting” (`westeros_report.ipynb`)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We start as usual by connecting to a database and loading the solved \"baseline\" scenario of the \"Westeros Electified\" MESSAGE model.\n", + "(Note that we do not `clone()` the scenario here because we do not intend to make any changes to it.)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "import ixmp\n", + "\n", + "from message_ix import Scenario\n", + "\n", + "mp = ixmp.Platform()\n", + "\n", + "try:\n", + " scenario = Scenario(mp, model=\"Westeros Electrified\", scenario=\"baseline\")\n", + "\n", + " # Ensure the scenario has a solution\n", + " if not scenario.has_solution():\n", + " scenario.solve(quiet=True)\n", + "except ValueError:\n", + " # The scenario doesn't exist → use a utility function to create it\n", + " from message_ix.testing import make_westeros\n", + "\n", + " scenario = make_westeros(mp, solve=True, quiet=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, we create the `Reporter` object from the solved scenario:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from message_ix.report import Reporter\n", + "\n", + "rep = Reporter.from_scenario(\n", + " scenario,\n", + " # Reporter uses the Python package 'pint' to handle units.\n", + " # \"-\"\", used in the Westeros tutorial, is not a defined SI\n", + " # unit. We tell the Reporter to replace it with \"\"\n", + " # (unitless) everywhere it appears.\n", + " units={\"replace\": {\"-\": \"\"}},\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## The `add_sankey()` method\n", + "\n", + "The code uses [`pyam.figures.sankey()`](https://pyam-iamc.readthedocs.io/en/stable/api/plotting.html#pyam.figures.sankey) under the hood which (as of `pyam-iamc` version 3.0.0) supports only one year (MESSAGE time period) and one region (MESSAGE `node`).\n", + "Our model is already a single-node model, so we use its one node, and choose to prepare our first Sankey diagram for the **year 700**:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "key = rep.add_sankey(year=700, node=\"Westeros\")\n", + "key" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This returns a *key*.\n", + "As explained in the “Introducing Reporting” tutorial, nothing has happened yet; no data has been retrieved from the Scenario.\n", + "The key identifies a task that will trigger all these steps and return the created diagram.\n", + "Let's now do that:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig = rep.get(key)\n", + "type(fig)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The diagram is created!\n", + "It is a `plotly.Figure` object.\n", + "A Jupyter notebook, like this one, can provide interactive display of this figure:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This diagram alternates between showing `{technology}|{mode}` (for example: `coal_ppl|standard`) and `{level}|{commodity}` (for example, `secondary|electricity`).\n", + "By mousing over the colored areas, we can see that:\n", + "\n", + "- 61.1 units of (level=secondary, commodity=electricity) are produced in (year=700, node=Westeros); of these, 47.4 units are supplied by (technology=coal_ppl, mode=standard) and 13.7 units are supplied by (technology=wind_ppl, mode=standard).\n", + "- All of the (secondary, electricity) is consumed as an input to (technology=grid, mode=standard).\n", + "- …and so on.\n", + "\n", + "## Simplifying the diagram\n", + "\n", + "Large models like [`MESSAGEix-GLOBIOM`](https://docs.messageix.org/models) can include hundreds of (technology, mode) and (level, commodity) combinations.\n", + "You can imagine that this diagram could get very crowded!\n", + "To exclude flows we are not interested in, we can use the `exclude` parameter of `add_sankey()`.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "key2 = rep.add_sankey(year=700, node=\"Westeros\", exclude=[\"wind_ppl|standard\"])\n", + "key2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Notice this key is different from the previous key.\n", + "This allows to prepare multiple diagrams, and later generate one or more of them, without conflict.\n", + "\n", + "Next, we can display the figure as before:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "rep.get(key2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Compare this diagram to the first one and notice that `wind_ppl|standard` does not appear any more.\n", + "\n", + "You can pick any variable for this, even if it's in the middle of the overall flow!\n", + "And, for any scenario like this one with multiple periods, you can pick other years, too:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "key3 = rep.add_sankey(year=720, node=\"Westeros\", exclude=[\"final|electricity\"])\n", + "print(key3)\n", + "rep.get(key3)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Omitting `final|electricity` splits this Sankey diagram in two, so Plotly automatically arranges the two parts on top of one another.\n", + "\n", + "## Under the hood\n", + "\n", + "This section gives a step-by-step explanation of the atomic tasks that are prepared by `add_sankey()`.\n", + "You may wish to read this section to get a better understanding of how the code operates, or if you want to build your own code to do something different.\n", + "\n", + "The function we want to use, `pyam.figures.sankey()`, takes two arguments: `df` and `mapping`.\n", + "\n", + "After calling `Reporter.from_scenario()`, `rep` already has keys for `in::pyam` and `out::pyam`.\n", + "These give, respectively the total (level, commodity) inputs to, and outputs from, each (technology, mode), in the IAMC data structure and as a `pyam.IamDataFrame` object.\n", + "\n", + "The first step is to concatenate these two objects together:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from genno.operator import concat\n", + "\n", + "df_all = concat(rep.get(\"in::pyam\"), rep.get(\"out::pyam\"))\n", + "df_all" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "…and then select the one year to be plotted:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = df_all.filter(year=700)\n", + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, to prepare the `mapping` argument, we use the function `message_ix.tools.map_for_sankey()`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from message_ix.tools.sankey import map_for_sankey\n", + "\n", + "mapping = map_for_sankey(\n", + " df,\n", + " node=\"Westeros\",\n", + " exclude=[\"wind_ppl|standard\"],\n", + ")\n", + "mapping" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally, we generate the plot:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pyam.figures import sankey\n", + "\n", + "fig = sankey(df=df, mapping=mapping)\n", + "fig" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can see this is the same as the second example diagram in the tutorial.\n", + "\n", + "We can also visualize the steps created by `add_sankey()`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(rep.describe(key2))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This also shows how the core MESSAGE parameters `input` and `output`, and solution variable `ACT`, are retrieved, multiplied, summed on some dimensions, and mapped in the the IAMC data structure understood by `pyam`, leading up to the `concat()` step with which we started this section.\n", + "\n", + "Lastly, as always, please do not forget to close the database 😉" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "mp.close_db()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "3.13", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}