From d91ede1be0ffbe0d3f8946a50b4fb97ea482a0ea Mon Sep 17 00:00:00 2001
From: Lourens Veen <l.veen@esciencecenter.nl>
Date: Fri, 21 Jun 2024 13:57:28 +0200
Subject: [PATCH 01/49] Add initial virtual cluster tests

---
 Makefile                                      |   7 +
 integration_test/cluster_test/component.py    |  39 ++++
 integration_test/cluster_test/dispatch.sh     |  12 +
 integration_test/cluster_test/dispatch.ymmsl  |  15 ++
 .../cluster_test/implementations.ymmsl        |   8 +
 integration_test/cluster_test/multiple.sh     |  12 +
 integration_test/cluster_test/multiple.ymmsl  |  32 +++
 integration_test/cluster_test/settings.ymmsl  |   5 +
 integration_test/cluster_test/single.sh       |  12 +
 integration_test/cluster_test/single.ymmsl    |  10 +
 integration_test/conftest.py                  |   4 +
 integration_test/test_cluster.Dockerfile      |   9 +
 integration_test/test_cluster.py              | 207 ++++++++++++++++++
 tox.ini                                       |  21 +-
 14 files changed, 392 insertions(+), 1 deletion(-)
 create mode 100644 integration_test/cluster_test/component.py
 create mode 100755 integration_test/cluster_test/dispatch.sh
 create mode 100644 integration_test/cluster_test/dispatch.ymmsl
 create mode 100644 integration_test/cluster_test/implementations.ymmsl
 create mode 100755 integration_test/cluster_test/multiple.sh
 create mode 100644 integration_test/cluster_test/multiple.ymmsl
 create mode 100644 integration_test/cluster_test/settings.ymmsl
 create mode 100755 integration_test/cluster_test/single.sh
 create mode 100644 integration_test/cluster_test/single.ymmsl
 create mode 100644 integration_test/test_cluster.Dockerfile
 create mode 100644 integration_test/test_cluster.py

diff --git a/Makefile b/Makefile
index c54cc9b6..91045897 100644
--- a/Makefile
+++ b/Makefile
@@ -21,6 +21,9 @@ endif
 .PHONY: test
 test: test_python test_scripts test_cpp test_fortran
 
+.PHONY: test_all
+test_all: test test_cluster
+
 .PHONY: test_python_only
 test_python_only:
 	MUSCLE_TEST_PYTHON_ONLY=1 tox
@@ -37,6 +40,10 @@ test_cpp: cpp
 test_fortran: fortran_tests
 	cd libmuscle/fortran && $(MAKE) test
 
+.PHONY: test_cluster
+test_cluster:
+	tox -e cluster
+
 .PHONY: test_scripts
 test_scripts:
 	cd scripts && $(MAKE) test
diff --git a/integration_test/cluster_test/component.py b/integration_test/cluster_test/component.py
new file mode 100644
index 00000000..e14d0523
--- /dev/null
+++ b/integration_test/cluster_test/component.py
@@ -0,0 +1,39 @@
+import logging
+
+from libmuscle import Instance, Message
+from ymmsl import Operator
+
+
+def component() -> None:
+    """A simple dummy component.
+
+    This sends and receives on all operators, allowing different coupling patterns
+    with a single program.
+    """
+    instance = Instance({
+            Operator.F_INIT: ['init_in'],
+            Operator.O_I: ['inter_out'],
+            Operator.S: ['inter_in'],
+            Operator.O_F: ['final_out']})
+
+    while instance.reuse_instance():
+        # F_INIT
+        steps = instance.get_setting('steps', 'int')
+
+        instance.receive('init_in', default=Message(0.0))
+
+        for step in range(steps):
+            # O_I
+            instance.send('inter_out', Message(step))
+
+            # S
+            instance.receive('inter_in', default=Message(0.0))
+
+        # O_F
+        instance.send('final_out', Message(steps))
+
+
+if __name__ == '__main__':
+    logging.basicConfig()
+    logging.getLogger().setLevel(logging.INFO)
+    component()
diff --git a/integration_test/cluster_test/dispatch.sh b/integration_test/cluster_test/dispatch.sh
new file mode 100755
index 00000000..10fb1fb9
--- /dev/null
+++ b/integration_test/cluster_test/dispatch.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+#SBATCH --time=0:1:00
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=2
+
+set -e
+
+source /home/cerulean/venv/bin/activate
+
+muscle_manager --log-level=DEBUG --start-all /home/cerulean/cluster_test/dispatch.ymmsl /home/cerulean/cluster_test/settings.ymmsl /home/cerulean/cluster_test/implementations.ymmsl
+
diff --git a/integration_test/cluster_test/dispatch.ymmsl b/integration_test/cluster_test/dispatch.ymmsl
new file mode 100644
index 00000000..a786e2a9
--- /dev/null
+++ b/integration_test/cluster_test/dispatch.ymmsl
@@ -0,0 +1,15 @@
+ymmsl_version: v0.1
+
+model:
+    name: dispatch
+    components:
+        c1: component
+        c2: component
+    conduits:
+        c1.final_out: c2.init_in
+
+resources:
+    c1:
+        threads: 1
+    c2:
+        threads: 1
diff --git a/integration_test/cluster_test/implementations.ymmsl b/integration_test/cluster_test/implementations.ymmsl
new file mode 100644
index 00000000..04737a2f
--- /dev/null
+++ b/integration_test/cluster_test/implementations.ymmsl
@@ -0,0 +1,8 @@
+ymmsl_version: v0.1
+
+implementations:
+  component:
+    virtual_env: /home/cerulean/venv
+    executable: python
+    args:
+        - /home/cerulean/cluster_test/component.py
diff --git a/integration_test/cluster_test/multiple.sh b/integration_test/cluster_test/multiple.sh
new file mode 100755
index 00000000..a5122dd2
--- /dev/null
+++ b/integration_test/cluster_test/multiple.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+#SBATCH --time=0:1:00
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=1
+
+set -e
+
+source /home/cerulean/venv/bin/activate
+
+muscle_manager --log-level=DEBUG --start-all /home/cerulean/cluster_test/multiple.ymmsl /home/cerulean/cluster_test/settings.ymmsl /home/cerulean/cluster_test/implementations.ymmsl
+
diff --git a/integration_test/cluster_test/multiple.ymmsl b/integration_test/cluster_test/multiple.ymmsl
new file mode 100644
index 00000000..60260aad
--- /dev/null
+++ b/integration_test/cluster_test/multiple.ymmsl
@@ -0,0 +1,32 @@
+ymmsl_version: v0.1
+
+model:
+    name: multiple
+    components:
+        c1: component
+        c2: component
+        c3: component
+        c4: component
+        c5: component
+        c6: component
+    conduits:
+        c1.inter_out: c2.inter_in
+        c2.inter_out: c3.inter_in
+        c3.inter_out: c4.inter_in
+        c4.inter_out: c5.inter_in
+        c5.inter_out: c6.inter_in
+        c6.inter_out: c1.inter_in
+
+resources:
+    c1:
+        threads: 1
+    c2:
+        threads: 1
+    c3:
+        threads: 1
+    c4:
+        threads: 1
+    c5:
+        threads: 1
+    c6:
+        threads: 1
diff --git a/integration_test/cluster_test/settings.ymmsl b/integration_test/cluster_test/settings.ymmsl
new file mode 100644
index 00000000..be4fb16f
--- /dev/null
+++ b/integration_test/cluster_test/settings.ymmsl
@@ -0,0 +1,5 @@
+ymmsl_version: v0.1
+
+settings:
+    muscle_remote_log_level: DEBUG
+    steps: 10
diff --git a/integration_test/cluster_test/single.sh b/integration_test/cluster_test/single.sh
new file mode 100755
index 00000000..8197854e
--- /dev/null
+++ b/integration_test/cluster_test/single.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+#SBATCH --time=0:1:00
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=1
+
+set -e
+
+source /home/cerulean/venv/bin/activate
+
+muscle_manager --log-level=DEBUG --start-all /home/cerulean/cluster_test/single.ymmsl /home/cerulean/cluster_test/settings.ymmsl /home/cerulean/cluster_test/implementations.ymmsl
+
diff --git a/integration_test/cluster_test/single.ymmsl b/integration_test/cluster_test/single.ymmsl
new file mode 100644
index 00000000..304579fc
--- /dev/null
+++ b/integration_test/cluster_test/single.ymmsl
@@ -0,0 +1,10 @@
+ymmsl_version: v0.1
+
+model:
+    name: single
+    components:
+        c1: component
+
+resources:
+    c1:
+        threads: 1
diff --git a/integration_test/conftest.py b/integration_test/conftest.py
index 18ab5ce4..78ac48e5 100644
--- a/integration_test/conftest.py
+++ b/integration_test/conftest.py
@@ -28,6 +28,10 @@
         'MUSCLE_ENABLE_CPP_MPI' not in os.environ,
         reason='MPI support was not detected')
 
+skip_unless_cluster = pytest.mark.skipif(
+        'MUSCLE_TEST_CLUSTER' not in os.environ,
+        reason='Cluster tests were not explicitly enabled')
+
 
 @pytest.fixture
 def yatiml_log_warning():
diff --git a/integration_test/test_cluster.Dockerfile b/integration_test/test_cluster.Dockerfile
new file mode 100644
index 00000000..5e2cf213
--- /dev/null
+++ b/integration_test/test_cluster.Dockerfile
@@ -0,0 +1,9 @@
+FROM ghcr.io/naturalhpc/cerulean-test-docker-images/cerulean-fake-slurm-23-11:latest
+
+RUN apt-get update && \
+    apt-get install -y python3-venv libopenmpi-dev
+
+RUN apt-get clean && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /home/cerulean
+
diff --git a/integration_test/test_cluster.py b/integration_test/test_cluster.py
new file mode 100644
index 00000000..7cf06112
--- /dev/null
+++ b/integration_test/test_cluster.py
@@ -0,0 +1,207 @@
+# This ensures that pytest can import this module in the non-cluster test env
+# in which these dependencies don't exist, because these tests won' be run.
+try:
+    import cerulean
+except ImportError:
+    pass
+
+import logging
+from pathlib import Path
+import pytest
+import time
+
+from .conftest import skip_unless_cluster
+
+
+logger = logging.getLogger(__name__)
+
+
+def _run(term, timeout, command):
+    exit_code, out, err = term.run(timeout, command, [])
+    if exit_code != 0:
+        logger.error(err)
+    assert exit_code == 0
+    return out
+
+
+@pytest.fixture(scope='session')
+def local_term():
+    return cerulean.LocalTerminal()
+
+
+@pytest.fixture(scope='session')
+def local_fs():
+    return cerulean.LocalFileSystem()
+
+
+@pytest.fixture(scope='session')
+def virtual_cluster_image(local_term):
+    IMAGE_NAME = 'muscle3_test_cluster'
+    _run(local_term, 180, (
+        f'docker buildx build -t {IMAGE_NAME}'
+        ' -f integration_test/test_cluster.Dockerfile .'))
+    return IMAGE_NAME
+
+
+def _ssh_term(timeout_msg):
+    cred = cerulean.PasswordCredential('cerulean', 'kingfisher')
+    ready = False
+    start = time.monotonic()
+    while not ready:
+        if (time.monotonic() - start) > 60.0:
+            raise Exception(timeout_msg)
+
+        try:
+            term = cerulean.SshTerminal('localhost', 10022, cred)
+            ready = True
+        except Exception:
+            time.sleep(3.0)
+
+    return term
+
+
+@pytest.fixture(scope='session')
+def virtual_cluster_container(local_term, virtual_cluster_image):
+    # clean up stray container from previous run, if any
+    _run(local_term, 60, 'docker rm -f muscle3_test_slurm')
+
+    _run(local_term, 60, (
+        'docker run -d --name muscle3_test_slurm -p 10022:22'
+        f' {virtual_cluster_image}'))
+
+    _ssh_term('Virtual cluster container start timed out')
+    yield None
+
+    # _run(local_term, 60, 'docker rm -f muscle3_test_slurm')
+
+
+@pytest.fixture(scope='session')
+def setup_connection(virtual_cluster_container):
+    # Session-wide connection used for container setup actions only
+    # Tests each have their own connection, see virtual_cluster() below
+    term = _ssh_term('Connection to virtual cluster container timed out')
+    with cerulean.SftpFileSystem(term, True) as fs:
+        yield term, fs
+
+
+@pytest.fixture(scope='session')
+def repo_root(local_fs):
+    root_dir = Path(__file__).parents[1]
+    return local_fs / str(root_dir)
+
+
+@pytest.fixture(scope='session')
+def muscle3_venv(repo_root, setup_connection):
+    remote_term, remote_fs = setup_connection
+
+    _run(remote_term, 10, 'python3 -m venv /home/cerulean/venv')
+    in_venv = 'source /home/cerulean/venv/bin/activate && '
+    _run(remote_term, 30, (
+        f'/bin/bash -c "{in_venv} python3 -m pip install pip wheel setuptools"'))
+
+    muscle3_tgt = remote_fs / 'home/cerulean/muscle3'
+    muscle3_tgt.mkdir()
+    (muscle3_tgt / 'libmuscle').mkdir()
+
+    for f in (
+            'muscle3', 'libmuscle/python', 'setup.py', 'MANIFEST.in', 'LICENSE',
+            'NOTICE', 'VERSION', 'README.rst'):
+        cerulean.copy(repo_root / f, muscle3_tgt / f)
+
+    _run(remote_term, 60, f'/bin/bash -c "{in_venv} pip install ./muscle3"')
+    return in_venv
+
+
+@pytest.fixture(scope='session')
+def create_remote_test_files(repo_root, setup_connection):
+    remote_term, remote_fs = setup_connection
+
+    remote_home = remote_fs / 'home' / 'cerulean'
+
+    cerulean.copy(
+            repo_root / 'integration_test' / 'cluster_test', remote_home,
+            copy_permissions=True)
+
+
+@pytest.fixture
+def virtual_cluster(virtual_cluster_container, muscle3_venv, create_remote_test_files):
+    term = _ssh_term('Connection to vitrual cluster container timed out')
+    with cerulean.SftpFileSystem(term, True) as fs:
+        sched = cerulean.SlurmScheduler(term)
+        yield term, fs, sched
+
+
+@pytest.fixture
+def remote_home(virtual_cluster):
+    _, remote_fs, _ = virtual_cluster
+    return remote_fs / 'home' / 'cerulean'
+
+
+@pytest.fixture
+def remote_test_files(remote_home):
+    return remote_home / 'cluster_test'
+
+
+@pytest.fixture
+def remote_out_dir(remote_home):
+    return remote_home / 'test_results'
+
+
+def _make_job(name, remote_test_files, remote_out_dir):
+    job_dir = remote_out_dir / f'test_{name}'
+
+    job = cerulean.JobDescription()
+    job.name = name
+    job.working_directory = job_dir
+    job.command = remote_test_files / f'{name}.sh'
+    job.stdout_file = job_dir / 'stdout.txt'
+    job.stderr_file = job_dir / 'stderr.txt'
+    job.queue_name = 'debug'
+    job.time_reserved = 60
+    job.system_out_file = job_dir / 'sysout.txt'
+    job.system_err_file = job_dir / 'syserr.txt'
+
+    return job
+
+
+_SCHED_OVERHEAD = 60
+
+
+@skip_unless_cluster
+def test_single(virtual_cluster, remote_test_files, remote_out_dir):
+    remote_term, remote_fs, sched = virtual_cluster
+
+    job = _make_job('single', remote_test_files, remote_out_dir)
+    job.num_nodes = 1
+    job.mpi_processes_per_node = 1
+    job.extra_scheduler_options = '--ntasks-per-core=1'
+
+    job_id = sched.submit(job)
+    assert sched.wait(job_id, job.time_reserved + _SCHED_OVERHEAD) is not None
+    assert sched.get_exit_code(job_id) == 0
+
+
+@skip_unless_cluster
+def test_dispatch(virtual_cluster, remote_test_files, remote_out_dir):
+    remote_term, remote_fs, sched = virtual_cluster
+
+    job = _make_job('dispatch', remote_test_files, remote_out_dir)
+    job.num_nodes = 2
+    job.mpi_processes_per_node = 1
+    job.extra_scheduler_options = '--ntasks-per-core=1'
+
+    job_id = sched.submit(job)
+    assert sched.wait(job_id, job.time_reserved + _SCHED_OVERHEAD) is not None
+    assert sched.get_exit_code(job_id) == 0
+
+
+@skip_unless_cluster
+def test_multiple(virtual_cluster, remote_test_files, remote_out_dir):
+    remote_term, remote_fs, sched = virtual_cluster
+
+    job = _make_job('multiple', remote_test_files, remote_out_dir)
+    job.num_nodes = 3
+
+    job_id = sched.submit(job)
+    assert sched.wait(job_id, job.time_reserved + _SCHED_OVERHEAD) is not None
+    assert sched.get_exit_code(job_id) == 0
diff --git a/tox.ini b/tox.ini
index e9d89a3e..548c5a64 100644
--- a/tox.ini
+++ b/tox.ini
@@ -4,10 +4,11 @@ skip_missing_interpreters = true
 
 [testenv]
 deps =
-    mypy
     flake8
+    mypy
     pytest
     pytest-cov
+    requests                # missing dependency in cerulean...
     types-psutil
     ymmsl
 
@@ -22,6 +23,24 @@ commands =
     pytest {posargs}
     flake8 libmuscle/python/libmuscle integration_test scripts/
 
+[testenv:cluster]
+deps =
+    cerulean
+    docker
+    pytest
+    pytest-cov
+    requests                # missing dependency in cerulean...
+    types-psutil
+    ymmsl
+
+setenv =
+    MUSCLE_TEST_CLUSTER=1
+
+commands =
+    pytest -k 'test_cluster' {posargs}
+    # pytest --log-cli-level=DEBUG -s -k 'test_cluster' {posargs}
+
+
 [gh-actions]
 python =
     3.7: py37

From c7f0329a1c2677a342fb87a605f46d7e831497f3 Mon Sep 17 00:00:00 2001
From: Lourens Veen <l.veen@esciencecenter.nl>
Date: Wed, 17 Jul 2024 18:05:05 +0200
Subject: [PATCH 02/49] Refactor QCGPJInstantiator

---
 .../libmuscle/manager/instance_manager.py     |  6 +--
 .../python/libmuscle/manager/instantiator.py  | 43 +++++++++++++++++-
 .../libmuscle/manager/qcgpj_instantiator.py   | 44 +++----------------
 3 files changed, 51 insertions(+), 42 deletions(-)

diff --git a/libmuscle/python/libmuscle/manager/instance_manager.py b/libmuscle/python/libmuscle/manager/instance_manager.py
index 8d06c45e..4241b17e 100644
--- a/libmuscle/python/libmuscle/manager/instance_manager.py
+++ b/libmuscle/python/libmuscle/manager/instance_manager.py
@@ -11,9 +11,9 @@
 from libmuscle.manager.instance_registry import InstanceRegistry
 from libmuscle.manager.instantiator import (
         CancelAllRequest, CrashedResult, InstantiatorRequest,
-        InstantiationRequest, ProcessStatus, ShutdownRequest)
+        InstantiationRequest, Process, ProcessStatus, ShutdownRequest)
 from libmuscle.manager.logger import last_lines
-from libmuscle.manager.qcgpj_instantiator import Process, QCGPJInstantiator
+from libmuscle.manager.qcgpj_instantiator import QCGPJInstantiator
 from libmuscle.manager.run_dir import RunDir
 from libmuscle.planner.planner import Planner, Resources
 
@@ -61,7 +61,7 @@ class InstanceManager:
     def __init__(
             self, configuration: Configuration, run_dir: RunDir,
             instance_registry: InstanceRegistry) -> None:
-        """Create a ProcessManager.
+        """Create an InstanceManager.
 
         Args:
             configuration: The global configuration
diff --git a/libmuscle/python/libmuscle/manager/instantiator.py b/libmuscle/python/libmuscle/manager/instantiator.py
index 9afca712..41fa5124 100644
--- a/libmuscle/python/libmuscle/manager/instantiator.py
+++ b/libmuscle/python/libmuscle/manager/instantiator.py
@@ -1,9 +1,10 @@
 import enum
 import logging
 import multiprocessing as mp
+import os
 from pathlib import Path
 import traceback
-from typing import Optional
+from typing import Dict, Optional
 
 from ymmsl import Implementation, Reference, ResourceRequirements
 
@@ -133,3 +134,43 @@ def emit(self, record: logging.LogRecord) -> None:
             record.exc_info = None
 
         self._queue.put(record)
+
+
+def reconfigure_logging(queue: mp.Queue) -> None:
+    """Reconfigure logging to send to queue.
+
+    This reconfigures the logging subsystem to intercept all log
+    messages and send them to the given queue, rather than to the
+    previously configured handler.
+    """
+    root_logger = logging.getLogger()
+    for h in list(root_logger.handlers):
+        root_logger.removeHandler(h)
+
+    handler = QueueingLogHandler(queue)
+    root_logger.addHandler(handler)
+
+
+def create_instance_env(
+        instance: Reference, overlay: Dict[str, str]) -> Dict[str, str]:
+    """Creates an environment for an instance.
+
+    This takes the current (manager) environment variables and makes
+    a copy, then adds or extends it according to the overlay given.
+
+    Keys from overlay that start with will have the corresponding
+    value appended to the matching (by key, without the +) value in
+    env, otherwise the value in env gets overwritten.
+    """
+    env = os.environ.copy()
+    env['MUSCLE_INSTANCE'] = str(instance)
+
+    for key, value in overlay.items():
+        if key.startswith('+'):
+            if key[1:] in env:
+                env[key[1:]] += value
+            else:
+                env[key[1:]] = value
+        else:
+            env[key] = value
+    return env
diff --git a/libmuscle/python/libmuscle/manager/qcgpj_instantiator.py b/libmuscle/python/libmuscle/manager/qcgpj_instantiator.py
index 9b5836d4..ae58089b 100644
--- a/libmuscle/python/libmuscle/manager/qcgpj_instantiator.py
+++ b/libmuscle/python/libmuscle/manager/qcgpj_instantiator.py
@@ -26,8 +26,8 @@
 from ymmsl import ExecutionModel, MPICoresResReq, Reference, ThreadedResReq
 
 from libmuscle.manager.instantiator import (
-        CancelAllRequest, CrashedResult, InstantiationRequest, Process,
-        ProcessStatus, QueueingLogHandler, ShutdownRequest)
+        CancelAllRequest, CrashedResult, create_instance_env, InstantiationRequest,
+        Process, ProcessStatus, reconfigure_logging, ShutdownRequest)
 from libmuscle.planner.planner import Resources
 
 
@@ -95,7 +95,7 @@ class QCGPJInstantiator(mp.Process):
     def __init__(
             self, resources: mp.Queue, requests: mp.Queue, results: mp.Queue,
             log_records: mp.Queue, run_dir: Path) -> None:
-        """Create a QCGPJProcessManager.
+        """Create a QCGPJInstantiator.
 
         Args:
             resources: Queue for returning the available resources
@@ -103,7 +103,7 @@ def __init__(
             results: Queue to communicate finished processes over
             log_messages: Queue to push log messages to
         """
-        super().__init__(name='QCGPJProcessManager')
+        super().__init__(name='QCGPJInstantiator')
         self._resources_out = resources
         self._requests_in = requests
         self._results_out = results
@@ -120,7 +120,7 @@ def run(self) -> None:
             qcgpj_dir.mkdir(exist_ok=True)
             os.chdir(qcgpj_dir)
 
-            self._reconfigure_logging()
+            reconfigure_logging(self._log_records_out)
 
             # Executor needs to be instantiated before we go async
             qcg_config: Dict[str, str] = {qcg_Config.AUX_DIR: str(qcgpj_dir)}
@@ -196,15 +196,6 @@ async def _main(self) -> None:
         _logger.debug('Stopping executor')
         await self._executor.stop()
 
-    def _reconfigure_logging(self) -> None:
-        """Reconfigure logging to send to log_records_out."""
-        root_logger = logging.getLogger()
-        for h in list(root_logger.handlers):
-            root_logger.removeHandler(h)
-
-        handler = QueueingLogHandler(self._log_records_out)
-        root_logger.addHandler(handler)
-
     def _send_resources(self) -> None:
         """Converts and sends QCG available resources."""
         resources = Resources()
@@ -247,7 +238,7 @@ def _create_job(
         """Creates a QCG allocation and job for a request."""
         total_cores = sum(map(len, request.resources.cores.values()))
 
-        env = self._create_env(request.instance, request.implementation.env)
+        env = create_instance_env(request.instance, request.implementation.env)
 
         if request.implementation.script:
             execution = self._qcg_job_execution_with_script(request, env)
@@ -272,29 +263,6 @@ def _create_job(
         qcg_iteration = qcg_SchedulingIteration(sjob, None, None, resources, [])
         return qcg_allocation, qcg_iteration
 
-    def _create_env(
-            self, instance: Reference, overlay: Dict[str, str]
-            ) -> Dict[str, str]:
-        """Updates the environment with the implementation's env.
-
-        This updates env in-place. Keys from overlay that start with
-        + will have the corresponding value appended to the matching
-        (by key, without the +) value in env, otherwise the value in
-        env gets overwritten.
-        """
-        env = os.environ.copy()
-        env['MUSCLE_INSTANCE'] = str(instance)
-
-        for key, value in overlay.items():
-            if key.startswith('+'):
-                if key[1:] in env:
-                    env[key[1:]] += value
-                else:
-                    env[key[1:]] = value
-            else:
-                env[key] = value
-        return env
-
     def _qcg_job_execution_with_script(
             self, request: InstantiationRequest, env: Dict[str, str]
             ) -> qcg_JobExecution:

From 5cbe41799759ba999e649ae99761acd5b056d674 Mon Sep 17 00:00:00 2001
From: Lourens Veen <l.veen@esciencecenter.nl>
Date: Fri, 20 Sep 2024 19:07:42 +0200
Subject: [PATCH 03/49] Improve docstsrings in Instantiator

---
 libmuscle/python/libmuscle/manager/instantiator.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/libmuscle/python/libmuscle/manager/instantiator.py b/libmuscle/python/libmuscle/manager/instantiator.py
index 41fa5124..db83a52d 100644
--- a/libmuscle/python/libmuscle/manager/instantiator.py
+++ b/libmuscle/python/libmuscle/manager/instantiator.py
@@ -72,7 +72,12 @@ class InstantiationRequest(InstantiatorRequest):
     Attributes:
         instance: The name of the instance
         implementation: The implementation to start for it
-        resources: The resources to start it on
+        res_req: The resource requirements for this instance
+        resources: The specific resources to start it on
+        instance_dir: The main directory for this instance
+        work_dir: The directory in which to start it
+        stdout_path: Path of file to redirect stdout to
+        stderr_path: Path of file to redirect stderr to
     """
     def __init__(
             self, instance: Reference, implementation: Implementation,
@@ -85,7 +90,7 @@ def __init__(
             instance: The name of the instance
             implementation: The implementation to start for it
             res_req: The resource requirements for this instance
-            resources: The resources to instantiate on
+            resources: The specific resources to instantiate on
             instance_dir: The main directory for this instance
             work_dir: The directory in which to start it
             stdout_path: Path of file to redirect stdout to
@@ -158,7 +163,7 @@ def create_instance_env(
     This takes the current (manager) environment variables and makes
     a copy, then adds or extends it according to the overlay given.
 
-    Keys from overlay that start with will have the corresponding
+    Keys from overlay that start with + will have the corresponding
     value appended to the matching (by key, without the +) value in
     env, otherwise the value in env gets overwritten.
     """

From 9d54910842e69997fb40a5159d13f85438204f9f Mon Sep 17 00:00:00 2001
From: Lourens Veen <l.veen@esciencecenter.nl>
Date: Fri, 20 Sep 2024 20:05:00 +0200
Subject: [PATCH 04/49] Add ports and implementations to cluster test ymmsl
 files

---
 integration_test/cluster_test/dispatch.ymmsl | 13 +++++--
 integration_test/cluster_test/multiple.ymmsl | 37 ++++++++++++++++----
 2 files changed, 42 insertions(+), 8 deletions(-)

diff --git a/integration_test/cluster_test/dispatch.ymmsl b/integration_test/cluster_test/dispatch.ymmsl
index a786e2a9..d8b5a715 100644
--- a/integration_test/cluster_test/dispatch.ymmsl
+++ b/integration_test/cluster_test/dispatch.ymmsl
@@ -3,8 +3,17 @@ ymmsl_version: v0.1
 model:
     name: dispatch
     components:
-        c1: component
-        c2: component
+        c1:
+            ports:
+                f_init: init_in
+                o_f: final_out
+            implementation: component_python
+        c2:
+            ports:
+                f_init: init_in
+                o_f: final_out
+            implementation: component_python
+
     conduits:
         c1.final_out: c2.init_in
 
diff --git a/integration_test/cluster_test/multiple.ymmsl b/integration_test/cluster_test/multiple.ymmsl
index 60260aad..64cb8b42 100644
--- a/integration_test/cluster_test/multiple.ymmsl
+++ b/integration_test/cluster_test/multiple.ymmsl
@@ -3,12 +3,37 @@ ymmsl_version: v0.1
 model:
     name: multiple
     components:
-        c1: component
-        c2: component
-        c3: component
-        c4: component
-        c5: component
-        c6: component
+        c1:
+            ports:
+                o_i: inter_out
+                s: inter_in
+            implementation: component_python
+        c2:
+            ports:
+                o_i: inter_out
+                s: inter_in
+            implementation: component_python
+        c3:
+            ports:
+                o_i: inter_out
+                s: inter_in
+            implementation: component_python
+        c4:
+            ports:
+                o_i: inter_out
+                s: inter_in
+            implementation: component_python
+        c5:
+            ports:
+                o_i: inter_out
+                s: inter_in
+            implementation: component_python
+        c6:
+            ports:
+                o_i: inter_out
+                s: inter_in
+            implementation: component_python
+
     conduits:
         c1.inter_out: c2.inter_in
         c2.inter_out: c3.inter_in

From 3b0ec5691c297fb4ddebc9eba30b9cefe22acafd Mon Sep 17 00:00:00 2001
From: Lourens Veen <l.veen@esciencecenter.nl>
Date: Fri, 20 Sep 2024 20:18:43 +0200
Subject: [PATCH 05/49] Move fake cluster into subdirectory

---
 integration_test/fake_cluster/Dockerfile      |  46 +++++
 integration_test/fake_cluster/__init__.py     |   0
 integration_test/fake_cluster/slurm.conf      | 163 ++++++++++++++++++
 .../fake_cluster/start-services.sh            |  70 ++++++++
 integration_test/test_cluster.Dockerfile      |   9 -
 5 files changed, 279 insertions(+), 9 deletions(-)
 create mode 100644 integration_test/fake_cluster/Dockerfile
 create mode 100644 integration_test/fake_cluster/__init__.py
 create mode 100644 integration_test/fake_cluster/slurm.conf
 create mode 100644 integration_test/fake_cluster/start-services.sh
 delete mode 100644 integration_test/test_cluster.Dockerfile

diff --git a/integration_test/fake_cluster/Dockerfile b/integration_test/fake_cluster/Dockerfile
new file mode 100644
index 00000000..523b137e
--- /dev/null
+++ b/integration_test/fake_cluster/Dockerfile
@@ -0,0 +1,46 @@
+FROM ghcr.io/naturalhpc/cerulean-test-docker-images/cerulean-fake-slurm-23-11:latest
+
+RUN apt-get update && \
+    apt-get remove -y openmpi-bin && \
+    apt-get install -y python3-venv gcc g++ gfortran git build-essential xz-utils \
+        bzip2 cmake
+
+RUN cd /opt && \
+    git clone --depth=100 --branch=releases/v0.22 https://github.com/spack/spack.git
+
+RUN . /opt/spack/share/spack/setup-env.sh && \
+    spack config add "modules:default:enable:[tcl]" && \
+    spack install lmod && \
+    echo >>/etc/profile && \
+    echo ". $(spack location -i lmod)/lmod/lmod/init/bash" >>/etc/profile && \
+    echo ". /opt/spack/share/spack/setup-env.sh" >>/etc/profile
+
+# OpenMPI uses libmunge from munge, which needs to look for the munge unix socket
+# in /run because that's where the apt-get installed munge we're actually running
+# puts it. Munge doesn't have a configuration file, but it does have a compiled-in
+# constant that can be set when building. So that's what we do here.
+RUN bash -l -c 'spack install munge localstatedir=/'
+RUN bash -l -c 'spack install openmpi+legacylaunchers+pmi schedulers=slurm'
+RUN bash -l -c 'spack install mpich+slurm'
+RUN bash -l -c 'spack install intel-oneapi-mpi'
+
+# Enable Spack when running ssh -c
+RUN echo >>/etc/ssh/sshd_config && \
+    echo 'SetEnv BASH_ENV=/etc/profile' >>/etc/ssh/sshd_config
+
+# Point workers to muscle3-headnode
+COPY integration_test/fake_cluster/slurm.conf /usr/local/etc/slurm/slurm.conf
+
+# Replace start-up scripts so we can run nodes separately
+COPY integration_test/fake_cluster/start-services.sh /etc/start-services.sh
+RUN chmod +x /etc/start-services.sh
+
+# Disable ssh debug output
+RUN sed -i -e 's/^LogLevel DEBUG3$//' /etc/ssh/sshd_config
+RUN sed -i -e 's^Subsystem sftp /usr/lib/openssh/sftp-server -l DEBUG3^Subsystem sftp /usr/lib/openssh/sftp-server^' /etc/ssh/sshd_config
+
+
+RUN apt-get clean && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /home/cerulean
+
diff --git a/integration_test/fake_cluster/__init__.py b/integration_test/fake_cluster/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/integration_test/fake_cluster/slurm.conf b/integration_test/fake_cluster/slurm.conf
new file mode 100644
index 00000000..1959f614
--- /dev/null
+++ b/integration_test/fake_cluster/slurm.conf
@@ -0,0 +1,163 @@
+# slurm.conf file generated by configurator.html.
+# Put this file on all nodes of your cluster.
+# See the slurm.conf man page for more information.
+#
+ControlMachine=muscle3-headnode
+#ControlAddr=
+#BackupController=
+#BackupAddr=
+#
+AuthType=auth/munge
+#CheckpointType=checkpoint/none
+CredType=cred/none
+CryptoType=crypto/openssl
+JobCredentialPrivateKey=/usr/local/etc/slurm/slurm.key
+JobCredentialPublicCertificate=/usr/local/etc/slurm/slurm.cert
+#DisableRootJobs=NO
+#EnforcePartLimits=NO
+#Epilog=
+#EpilogSlurmctld=
+#FirstJobId=1
+#MaxJobId=999999
+#GresTypes=
+#GroupUpdateForce=0
+GroupUpdateTime=2
+#JobCheckpointDir=/var/slurm/checkpoint
+#JobCredentialPrivateKey=
+#JobCredentialPublicCertificate=
+#JobFileAppend=0
+#JobRequeue=1
+#JobSubmitPlugins=1
+#KillOnBadExit=0
+#Licenses=foo*4,bar
+# don't send any emails:
+MailProg=/bin/true
+#MaxJobCount=5000
+#MaxStepCount=40000
+#MaxTasksPerNode=128
+MpiDefault=none
+#MpiParams=ports=#-#
+#PluginDir=
+#PlugStackConfig=
+#PrivateData=jobs
+ProctrackType=proctrack/linuxproc
+#Prolog=
+#PrologSlurmctld=
+#PropagatePrioProcess=0
+#PropagateResourceLimits=
+#PropagateResourceLimitsExcept=
+ReturnToService=1
+#SallocDefaultCommand=
+#SlurmctldPidFile=/var/run/slurmctld.pid
+SlurmctldPort=6817
+SlurmdPidFile=/var/run/slurmd.%n.pid
+SlurmdPort=6818
+SlurmdSpoolDir=/var/spool/slurmd.%n
+SlurmUser=root
+SlurmdUser=root
+#SrunEpilog=
+#SrunProlog=
+StateSaveLocation=/var/spool/slurmctld/state
+SwitchType=switch/none
+#TaskEpilog=
+TaskPlugin=task/none
+#TaskPluginParam=
+#TaskProlog=
+#TopologyPlugin=topology/tree
+#TmpFs=/tmp
+#TrackWCKey=no
+#TreeWidth=
+#UnkillableStepProgram=
+#UsePAM=0
+#
+#
+# TIMERS
+BatchStartTimeout=2
+#CompleteWait=0
+EpilogMsgTime=1
+#GetEnvTimeout=2
+#HealthCheckInterval=0
+#HealthCheckProgram=
+InactiveLimit=0
+KillWait=2
+MessageTimeout=2
+#ResvOverRun=0
+MinJobAge=2
+#OverTimeLimit=0
+SlurmctldTimeout=2
+SlurmdTimeout=2
+#UnkillableStepTimeout=60
+#VSizeFactor=0
+Waittime=0
+#
+#
+# SCHEDULING
+#DefMemPerCPU=0
+#MaxMemPerCPU=0
+#SchedulerRootFilter=1
+SchedulerTimeSlice=5
+SchedulerType=sched/backfill
+SchedulerParameters=bf_interval=1,bf_resolution=1,sched_interval=1
+SelectType=select/linear
+#SelectTypeParameters=
+#
+#
+# JOB PRIORITY
+#PriorityType=priority/basic
+#PriorityDecayHalfLife=
+#PriorityCalcPeriod=
+#PriorityFavorSmall=
+#PriorityMaxAge=
+#PriorityUsageResetPeriod=
+#PriorityWeightAge=
+#PriorityWeightFairshare=
+#PriorityWeightJobSize=
+#PriorityWeightPartition=
+#PriorityWeightQOS=
+#
+#
+# LOGGING AND ACCOUNTING
+#AccountingStorageEnforce=0
+AccountingStorageType=accounting_storage/slurmdbd
+AccountingStoragePort=6819
+AccountingStorageUser=root
+AccountingStoreFlags=job_comment
+ClusterName=mycluster
+#DebugFlags=
+#JobCompHost=localhost
+#JobCompLoc=slurm_acct_db
+JobCompLoc=/var/log/slurm/job_completions
+JobCompType=jobcomp/filetxt
+#JobCompPass=xenon-slurm-pw
+#JobCompPort=
+#JobCompUser=root
+JobAcctGatherFrequency=2
+JobAcctGatherType=jobacct_gather/linux
+SlurmctldDebug=3
+#SlurmctldLogFile=
+SlurmdDebug=3
+SlurmdLogFile=/var/log/slurm/slurmd.%n.log
+#SlurmSchedLogFile=
+#SlurmSchedLogLevel=
+#
+#
+# POWER SAVE SUPPORT FOR IDLE NODES (optional)
+#SuspendProgram=
+#ResumeProgram=
+#SuspendTimeout=
+#ResumeTimeout=
+#ResumeRate=
+#SuspendExcNodes=
+#SuspendExcParts=
+#SuspendRate=
+#SuspendTime=
+#
+#
+# COMPUTE NODES
+NodeName=muscle3-node-0 Procs=2 NodeAddr=muscle3-node-0 Port=17001 State=UNKNOWN
+NodeName=muscle3-node-1 Procs=2 NodeAddr=muscle3-node-1 Port=17002 State=UNKNOWN
+NodeName=muscle3-node-2 Procs=2 NodeAddr=muscle3-node-2 Port=17003 State=UNKNOWN
+NodeName=muscle3-node-3 Procs=2 NodeAddr=muscle3-node-3 Port=17004 State=UNKNOWN
+NodeName=muscle3-node-4 Procs=2 NodeAddr=muscle3-node-4 Port=17005 State=UNKNOWN
+PartitionName=debug Nodes=muscle3-node-[0-4] Default=YES MaxTime=INFINITE State=UP
+PartitionName=batch Nodes=muscle3-node-[0-2] Default=NO MaxTime=INFINITE State=UP
diff --git a/integration_test/fake_cluster/start-services.sh b/integration_test/fake_cluster/start-services.sh
new file mode 100644
index 00000000..4f131964
--- /dev/null
+++ b/integration_test/fake_cluster/start-services.sh
@@ -0,0 +1,70 @@
+#!/bin/bash
+echo -e "\nstarting syslog-ng..."
+syslog-ng
+
+
+echo -e "\nstarting munged..."
+setuser munge /usr/sbin/munged --foreground > /var/log/munged.out.log 2> /var/log/munged.err.log &
+
+echo -n -e "\nwaiting for munged to start..."
+while [ ! -e /run/munge/munge.socket.2 ] ; do
+    sleep 1
+    echo '.'
+done
+echo
+
+
+NODENAME=$(hostname)
+
+if [ "a${NODENAME}" == "amuscle3-headnode"  ] ; then
+    # Run as a headnode
+    echo -e "\nstarting mariadb..."
+    setuser mysql /usr/bin/mariadbd-safe >/var/log/mariadb.out.log 2>/var/log/mariadb.err.log &
+
+    echo -n -e "\nwaiting for mariadb to start..."
+    while ! nc -z localhost 3306 ; do
+        sleep 1
+        echo '.'
+    done
+    echo
+
+
+    echo -e "\nstarting slurmdbd..."
+    /usr/local/sbin/slurmdbd -D >/var/log/slurmdbd.out.log 2>/var/log/slurmdbd.err.log &
+
+    echo -n -e "\nwaiting for slurmdbd to start..."
+    while ! nc -z localhost 6819 ; do
+        sleep 1
+        echo '.'
+    done
+    echo
+
+
+    echo -e "\nstarting slurmctld..."
+    /usr/local/sbin/slurmctld -D -c -vvvv > /var/log/slurmctld.out.log 2> /var/log/slurmctld.err.log &
+
+    echo -n -e "\nwaiting for slurmctld to start..."
+    while ! nc -z localhost 6817 ; do
+        sleep 1
+        echo '.'
+    done
+    echo
+
+
+    echo -e "\nmaking accounting readable to users..."
+    /bin/chmod -R og+rX /var/log/slurm
+
+else
+    # Run as a compute node
+
+    echo -e "\nstarting compute node..."
+    /usr/local/sbin/slurmd -D -N ${NODENAME} > /var/log/slurmd.out.log 2> /var/log/slurmd.err.log &
+fi
+
+echo -e "\nstarting sshd..."
+/usr/sbin/sshd -De > /var/log/sshd.out.log 2> /var/log/sshd.err.log &
+
+echo -e "\nStartup complete"
+
+sleep infinity
+
diff --git a/integration_test/test_cluster.Dockerfile b/integration_test/test_cluster.Dockerfile
deleted file mode 100644
index 5e2cf213..00000000
--- a/integration_test/test_cluster.Dockerfile
+++ /dev/null
@@ -1,9 +0,0 @@
-FROM ghcr.io/naturalhpc/cerulean-test-docker-images/cerulean-fake-slurm-23-11:latest
-
-RUN apt-get update && \
-    apt-get install -y python3-venv libopenmpi-dev
-
-RUN apt-get clean && rm -rf /var/lib/apt/lists/*
-
-WORKDIR /home/cerulean
-

From 83406ccbc3abc5204240027230429086c36312e5 Mon Sep 17 00:00:00 2001
From: Lourens Veen <l.veen@esciencecenter.nl>
Date: Fri, 20 Sep 2024 20:36:38 +0200
Subject: [PATCH 06/49] Add shared filesystem to fake cluster

---
 integration_test/cluster_test/implementations.ymmsl | 4 ++--
 integration_test/cluster_test/multiple.sh           | 6 ++++--
 integration_test/cluster_test/single.sh             | 6 ++++--
 3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/integration_test/cluster_test/implementations.ymmsl b/integration_test/cluster_test/implementations.ymmsl
index 04737a2f..c90db7f9 100644
--- a/integration_test/cluster_test/implementations.ymmsl
+++ b/integration_test/cluster_test/implementations.ymmsl
@@ -2,7 +2,7 @@ ymmsl_version: v0.1
 
 implementations:
   component:
-    virtual_env: /home/cerulean/venv
+    virtual_env: /home/cerulean/shared/venv
     executable: python
     args:
-        - /home/cerulean/cluster_test/component.py
+        - /home/cerulean/shared/cluster_test/component.py
diff --git a/integration_test/cluster_test/multiple.sh b/integration_test/cluster_test/multiple.sh
index a5122dd2..225caa43 100755
--- a/integration_test/cluster_test/multiple.sh
+++ b/integration_test/cluster_test/multiple.sh
@@ -6,7 +6,9 @@
 
 set -e
 
-source /home/cerulean/venv/bin/activate
+source /home/cerulean/shared/venv/bin/activate
 
-muscle_manager --log-level=DEBUG --start-all /home/cerulean/cluster_test/multiple.ymmsl /home/cerulean/cluster_test/settings.ymmsl /home/cerulean/cluster_test/implementations.ymmsl
+CT=/home/cerulean/shared/cluster_test
+
+muscle_manager --log-level=DEBUG --start-all $CT/multiple.ymmsl $CT/settings.ymmsl $CT/implementations.ymmsl
 
diff --git a/integration_test/cluster_test/single.sh b/integration_test/cluster_test/single.sh
index 8197854e..1a3d0984 100755
--- a/integration_test/cluster_test/single.sh
+++ b/integration_test/cluster_test/single.sh
@@ -6,7 +6,9 @@
 
 set -e
 
-source /home/cerulean/venv/bin/activate
+source /home/cerulean/shared/venv/bin/activate
 
-muscle_manager --log-level=DEBUG --start-all /home/cerulean/cluster_test/single.ymmsl /home/cerulean/cluster_test/settings.ymmsl /home/cerulean/cluster_test/implementations.ymmsl
+CT=/home/cerulean/shared/cluster_test
+
+muscle_manager --log-level=DEBUG --start-all $CT/single.ymmsl $CT/settings.ymmsl $CT/implementations.ymmsl
 

From a4638723d32a59537d5b78599da6b441953b407f Mon Sep 17 00:00:00 2001
From: Lourens Veen <l.veen@esciencecenter.nl>
Date: Fri, 20 Sep 2024 20:38:12 +0200
Subject: [PATCH 07/49] Clean up slurm script

---
 integration_test/cluster_test/multiple.sh | 4 ----
 integration_test/cluster_test/single.sh   | 4 ----
 2 files changed, 8 deletions(-)

diff --git a/integration_test/cluster_test/multiple.sh b/integration_test/cluster_test/multiple.sh
index 225caa43..49093155 100755
--- a/integration_test/cluster_test/multiple.sh
+++ b/integration_test/cluster_test/multiple.sh
@@ -1,9 +1,5 @@
 #!/bin/bash
 
-#SBATCH --time=0:1:00
-#SBATCH --ntasks=1
-#SBATCH --cpus-per-task=1
-
 set -e
 
 source /home/cerulean/shared/venv/bin/activate
diff --git a/integration_test/cluster_test/single.sh b/integration_test/cluster_test/single.sh
index 1a3d0984..00f7e0b9 100755
--- a/integration_test/cluster_test/single.sh
+++ b/integration_test/cluster_test/single.sh
@@ -1,9 +1,5 @@
 #!/bin/bash
 
-#SBATCH --time=0:1:00
-#SBATCH --ntasks=1
-#SBATCH --cpus-per-task=1
-
 set -e
 
 source /home/cerulean/shared/venv/bin/activate

From 3ded0199ca8cbd64e9967f4815313278e497a062 Mon Sep 17 00:00:00 2001
From: Lourens Veen <l.veen@esciencecenter.nl>
Date: Fri, 20 Sep 2024 20:40:46 +0200
Subject: [PATCH 08/49] Add initial NativeInstantiator

---
 .../libmuscle/manager/instance_manager.py     |   8 +
 .../libmuscle/native_instantiator/__init__.py |   0
 .../native_instantiator.py                    | 230 ++++++++++++++
 .../native_instantiator/process_manager.py    |  68 +++++
 .../native_instantiator/resource_detector.py  |  45 +++
 .../native_instantiator/run_script.py         | 244 +++++++++++++++
 .../libmuscle/native_instantiator/slurm.py    | 280 ++++++++++++++++++
 .../test/test_process_manager.py              | 120 ++++++++
 .../native_instantiator/test/test_slurm.py    |  72 +++++
 setup.py                                      |   1 +
 tox.ini                                       |   7 +-
 11 files changed, 1073 insertions(+), 2 deletions(-)
 create mode 100644 libmuscle/python/libmuscle/native_instantiator/__init__.py
 create mode 100644 libmuscle/python/libmuscle/native_instantiator/native_instantiator.py
 create mode 100644 libmuscle/python/libmuscle/native_instantiator/process_manager.py
 create mode 100644 libmuscle/python/libmuscle/native_instantiator/resource_detector.py
 create mode 100644 libmuscle/python/libmuscle/native_instantiator/run_script.py
 create mode 100644 libmuscle/python/libmuscle/native_instantiator/slurm.py
 create mode 100644 libmuscle/python/libmuscle/native_instantiator/test/test_process_manager.py
 create mode 100644 libmuscle/python/libmuscle/native_instantiator/test/test_slurm.py

diff --git a/libmuscle/python/libmuscle/manager/instance_manager.py b/libmuscle/python/libmuscle/manager/instance_manager.py
index 4241b17e..bc6e8edd 100644
--- a/libmuscle/python/libmuscle/manager/instance_manager.py
+++ b/libmuscle/python/libmuscle/manager/instance_manager.py
@@ -15,6 +15,7 @@
 from libmuscle.manager.logger import last_lines
 from libmuscle.manager.qcgpj_instantiator import QCGPJInstantiator
 from libmuscle.manager.run_dir import RunDir
+from libmuscle.native_instantiator.native_instantiator import NativeInstantiator
 from libmuscle.planner.planner import Planner, Resources
 
 
@@ -77,9 +78,16 @@ def __init__(
         self._results_in: Queue[_ResultType] = Queue()
         self._log_records_in: Queue[logging.LogRecord] = Queue()
 
+        # TODO: Instantiator factory function
+        # TODO: Add argument that specifies whether to use QCG or not
+        '''
         self._instantiator = QCGPJInstantiator(
                 self._resources_in, self._requests_out, self._results_in,
                 self._log_records_in, self._run_dir.path)
+        '''
+        self._instantiator = NativeInstantiator(
+                self._resources_in, self._requests_out, self._results_in,
+                self._log_records_in, self._run_dir.path)
         self._instantiator.start()
 
         self._log_handler = LogHandlingThread(self._log_records_in)
diff --git a/libmuscle/python/libmuscle/native_instantiator/__init__.py b/libmuscle/python/libmuscle/native_instantiator/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/libmuscle/python/libmuscle/native_instantiator/native_instantiator.py b/libmuscle/python/libmuscle/native_instantiator/native_instantiator.py
new file mode 100644
index 00000000..d34d5482
--- /dev/null
+++ b/libmuscle/python/libmuscle/native_instantiator/native_instantiator.py
@@ -0,0 +1,230 @@
+import logging
+import multiprocessing as mp
+from os import chdir
+from pathlib import Path
+import queue
+import sys
+from time import sleep
+import traceback
+from typing import Dict, List, Optional
+
+from libmuscle.manager.instantiator import (
+        CancelAllRequest, CrashedResult, create_instance_env, InstantiationRequest,
+        Process, ProcessStatus, reconfigure_logging, ShutdownRequest)
+from libmuscle.native_instantiator.process_manager import ProcessManager
+from libmuscle.native_instantiator.resource_detector import ResourceDetector
+from libmuscle.native_instantiator.run_script import make_script, prep_resources
+from libmuscle.planner.planner import Resources
+from ymmsl import MPICoresResReq, MPINodesResReq, ResourceRequirements, ThreadedResReq
+
+
+_logger = logging.getLogger(__name__)
+
+
+class NativeInstantiator(mp.Process):
+    """Instantiates instances on the local machine."""
+    def __init__(
+            self, resources: mp.Queue, requests: mp.Queue, results: mp.Queue,
+            log_records: mp.Queue, run_dir: Path) -> None:
+        """Create a NativeInstantiator
+
+        Args:
+            resources: Queue for returning the available resources
+            requests: Queue to take requests from
+            results: Queue to communicate finished processes over
+            log_messages: Queue to push log messages to
+            run_dir: Run directory for the current run
+        """
+        super().__init__(name='NativeInstantiator')
+        self._resources_out = resources
+        self._requests_in = requests
+        self._results_out = results
+        self._log_records_out = log_records
+        self._run_dir = run_dir
+
+        self._resource_detector = ResourceDetector()
+        self._process_manager = ProcessManager()
+        self._processes: Dict[str, Process] = dict()
+
+    def run(self) -> None:
+        """Entry point for the process"""
+        try:
+            m3_dir = self._run_dir / 'muscle3'
+            m3_dir.mkdir(exist_ok=True)
+            chdir(m3_dir)
+
+            reconfigure_logging(self._log_records_out)
+            self._send_resources()
+            self._main()
+
+        except:     # noqa
+            for line in traceback.format_exception(*sys.exc_info()):
+                _logger.error(line)
+            self._resources_out.put(CrashedResult())
+            self._results_out.put(CrashedResult())
+
+    def _main(self) -> None:
+        """Main function for the background process.
+
+        This accepts requests for instantiating jobs, stopping them, or shutting down.
+        Results of finished jobs are returned via the results queue.
+        """
+        shutting_down = False
+        done = False
+        while not done:
+            while not shutting_down:
+                try:
+                    request = self._requests_in.get_nowait()
+                    if isinstance(request, ShutdownRequest):
+                        _logger.debug('Got ShutdownRequest')
+                        shutting_down = True
+
+                    elif isinstance(request, CancelAllRequest):
+                        _logger.debug('Got CancelAllRequest')
+                        self._process_manager.cancel_all()
+                        _logger.debug('Done CancelAllRequest')
+
+                    elif isinstance(request, InstantiationRequest):
+                        if not shutting_down:
+                            self._instantiate(request)
+
+                except queue.Empty:
+                    break
+
+            self._report_failed_processes()
+            self._report_finished_processes()
+
+            if shutting_down:
+                _logger.debug(f'Done: {self._processes}')
+                done = not self._processes
+
+            if not done:
+                sleep(0.1)
+
+    def _send_resources(self) -> None:
+        """Detect resources and report them to the manager."""
+        resources = Resources()
+
+        res = zip(self._resource_detector.nodes, self._resource_detector.cores_per_node)
+        for node, num_cores in res:
+            resources.cores[node] = set(range(num_cores))
+
+        self._resources_out.put(resources)
+
+    def _instantiate(self, request: InstantiationRequest) -> None:
+        """Instantiate an implementation according to the request."""
+        name = str(request.instance)
+
+        env = create_instance_env(request.instance, request.implementation.env)
+        self._add_resources(env, request.res_req)
+
+        rankfile: Optional[Path] = None
+        if self._resource_detector.on_cluster():
+            _logger.debug('On cluster...')
+            rankfile_contents, resource_env = prep_resources(
+                  request.implementation.execution_model, request.resources)
+
+            _logger.debug(f'Rankfile: {rankfile_contents}')
+            _logger.debug(f'Resource env: {resource_env}')
+
+            if rankfile_contents:
+                rankfile = self._write_rankfile(request, rankfile_contents)
+
+            if resource_env:
+                env.update(resource_env)
+
+        # env['MUSCLE_THREADS_PER_MPI_PROCESS'] = str(
+        #         request.res_req.threads_per_mpi_process)
+        # env['MUSCLE_OPENMPI_RANK_FILE'] = str(rank_file)
+        # env['MUSCLE_INTELMPI_RESOURCES'] = ' '.join(mpi_res_args)
+
+        run_script_file = self._write_run_script(request, rankfile)
+        args = [str(run_script_file)]
+
+        self._processes[name] = Process(request.instance, request.resources)
+
+        try:
+            self._process_manager.start(
+                    name, request.work_dir, args, env,
+                    request.stdout_path, request.stderr_path)
+            self._processes[name].status = ProcessStatus.RUNNING
+
+        except Exception as e:
+            self._processes[name].status = ProcessStatus.ERROR
+            self._processes[name].error_msg = f'Instance failed to start: {e}'
+
+    def _write_rankfile(self, request: InstantiationRequest, rankfile: str) -> Path:
+        """Create and write out the rankfile and return its location.
+
+        Also known as a machinefile or hostfile depending on the MPI implementation.
+        """
+        rankfile_file = request.instance_dir / 'rankfile'
+
+        with rankfile_file.open('w') as f:
+            f.write(rankfile)
+
+        return rankfile_file
+
+    def _write_run_script(
+            self, request: InstantiationRequest, rankfile: Optional[Path]) -> Path:
+        """Create and write out the run script and return its location."""
+        if request.implementation.script:
+            run_script = request.implementation.script
+        else:
+            run_script = make_script(
+                    request.implementation, request.res_req,
+                    not self._resource_detector.on_cluster(), rankfile)
+
+        run_script_file = request.instance_dir / 'run_script.sh'
+
+        with run_script_file.open('w') as f:
+            f.write(run_script)
+
+        run_script_file.chmod(0o700)
+        return run_script_file
+
+    def _add_resources(
+            self, env: Dict[str, str], res_req: ResourceRequirements) -> None:
+        """Add resource env vars to the given env."""
+        if isinstance(res_req, ThreadedResReq):
+            num_threads = res_req.threads
+        elif isinstance(res_req, (MPICoresResReq, MPINodesResReq)):
+            num_threads = res_req.threads_per_mpi_process
+
+        env['MUSCLE_THREADS'] = str(num_threads)
+        env['OMP_NUM_THREADS'] = str(num_threads)
+
+        num_mpi_processes: Optional[int] = None
+        if isinstance(res_req, MPICoresResReq):
+            num_mpi_processes = res_req.mpi_processes
+        elif isinstance(res_req, MPINodesResReq):
+            num_mpi_processes = res_req.nodes * res_req.mpi_processes_per_node
+
+        if num_mpi_processes is not None:
+            env['MUSCLE_MPI_PROCESSES'] = str(num_mpi_processes)
+
+    def _report_failed_processes(self) -> None:
+        """Get processes that failed to start and report their status."""
+        failed_processes: List[str] = list()
+
+        for name, process in self._processes.items():
+            if process.status == ProcessStatus.ERROR:
+                self._results_out.put(process)
+                failed_processes.append(name)
+
+        for name in failed_processes:
+            del self._processes[name]
+
+    def _report_finished_processes(self) -> None:
+        """Get finished processes and report back their status."""
+        for name, exit_code in self._process_manager.get_finished():
+            process = self._processes[name]
+            if process.status == ProcessStatus.RUNNING:
+                if exit_code == 0:
+                    process.status = ProcessStatus.SUCCESS
+                else:
+                    process.status = ProcessStatus.ERROR
+                    process.error_msg = 'Instance returned a non-zero exit code'
+            process.exit_code = exit_code
+            self._results_out.put(process)
+            del self._processes[name]
diff --git a/libmuscle/python/libmuscle/native_instantiator/process_manager.py b/libmuscle/python/libmuscle/native_instantiator/process_manager.py
new file mode 100644
index 00000000..bfd8f3ca
--- /dev/null
+++ b/libmuscle/python/libmuscle/native_instantiator/process_manager.py
@@ -0,0 +1,68 @@
+import logging
+from pathlib import Path
+from subprocess import Popen
+from typing import Dict, List, Tuple
+
+
+_logger = logging.getLogger(__name__)
+
+
+class ProcessManager:
+    """Manages a set of running processes."""
+    def __init__(self) -> None:
+        """Create a ProcessManager."""
+        self._processes: Dict[str, Popen] = dict()
+
+    def start(
+            self, name: str, work_dir: Path, args: List[str], env: Dict[str, str],
+            stdout: Path, stderr: Path) -> None:
+        """Start a process.
+
+        The files that the output is directed to will be overwritten if they already
+        exist.
+
+        Args:
+            name: Name under which this process will be known
+            work_dir: Working directory in which to start
+            args: Executable and arguments to run
+            env: Environment variables to set
+            stdout: File to redirect stdout to
+            stderr: File to redirect stderr to
+
+        Raises:
+            RuntimeError: If there is already a process with the given name.
+            OSError: If the process could not be started.
+        """
+        if name in self._processes:
+            raise RuntimeError(f'Process {name} already exists')
+        _logger.debug(f'Starting process {args} with env {env} in {work_dir}')
+        with stdout.open('w') as out, stderr.open('w') as err:
+            self._processes[name] = Popen(
+                    args, cwd=work_dir, env=env, stdout=out, stderr=err)
+
+    def cancel_all(self) -> None:
+        """Stops all running processes.
+
+        This does not wait for them to terminate, it just sends the signal to kill
+        them.
+        """
+        for process in self._processes.values():
+            process.kill()
+
+    def get_finished(self) -> List[Tuple[str, int]]:
+        """Returns names and exit codes of finished processes.
+
+        This returns all processes that have finished running since the previous call;
+        each started process will be returned exactly once.
+        """
+        result: List[Tuple[str, int]] = list()
+
+        for name, process in self._processes.items():
+            exit_code = process.poll()
+            if exit_code is not None:
+                result.append((name, exit_code))
+
+        for name, _ in result:
+            del self._processes[name]
+
+        return result
diff --git a/libmuscle/python/libmuscle/native_instantiator/resource_detector.py b/libmuscle/python/libmuscle/native_instantiator/resource_detector.py
new file mode 100644
index 00000000..8ff22db9
--- /dev/null
+++ b/libmuscle/python/libmuscle/native_instantiator/resource_detector.py
@@ -0,0 +1,45 @@
+from enum import Enum
+import logging
+from os import sched_getaffinity
+
+from libmuscle.native_instantiator import slurm
+
+
+_logger = logging.getLogger(__name__)
+
+
+class Scheduler(Enum):
+    NONE = 0
+    SLURM = 1
+
+
+class ResourceDetector:
+    """Detects available compute resources.
+
+    This detects whether we're running locally or in a SLURM allocation, and returns
+    available resources on request.
+    """
+    def __init__(self) -> None:
+        """Create a ResourceDetector.
+
+        Detects available resources and initialises the object, which can then be
+        queried.
+        """
+        if slurm.in_slurm_allocation():
+            _logger.info('Detected a SLURM allocation')
+            self.scheduler = Scheduler.SLURM
+            self.nodes = slurm.get_nodes()
+            self.cores_per_node = slurm.get_cores_per_node()
+            _logger.info(
+                    f'We have {len(self.nodes)} nodes and a total of'
+                    f' {sum(self.cores_per_node)} cores available')
+        else:
+            _logger.info('Running locally without a cluster scheduler')
+            self.scheduler = Scheduler.NONE
+            self.nodes = ['localhost']
+            self.cores_per_node = [len(sched_getaffinity(0))]
+            _logger.info(f'We have {sum(self.cores_per_node)} cores available')
+
+    def on_cluster(self) -> bool:
+        _logger.debug(f'On cluster: {self.scheduler}')
+        return self.scheduler != Scheduler.NONE
diff --git a/libmuscle/python/libmuscle/native_instantiator/run_script.py b/libmuscle/python/libmuscle/native_instantiator/run_script.py
new file mode 100644
index 00000000..62aa7f77
--- /dev/null
+++ b/libmuscle/python/libmuscle/native_instantiator/run_script.py
@@ -0,0 +1,244 @@
+from pathlib import Path
+from typing import Dict, List, Optional, Tuple
+
+from libmuscle.planner.planner import Resources
+from ymmsl import (
+        ExecutionModel, Implementation, MPICoresResReq, MPINodesResReq,
+        ResourceRequirements, ThreadedResReq)
+
+
+def openmpi_prep_resources(resources: Resources) -> Tuple[str, Dict[str, str]]:
+    """Create resource description for OpenMPI mpirun
+
+    Args:
+        resources: The resources to describe
+
+    Return:
+        The contents of the rankfile, and a set of environment variables
+    """
+    ranklines: List[str] = list()
+    all_cores = (
+            (node, core) for node, cores in resources.cores.items() for core in cores)
+
+    for i, (node, core) in enumerate(all_cores):
+        ranklines.append(f'rank {i}={node} slot={core}')
+
+    rankfile = '\n'.join(ranklines) + '\n'
+
+    return rankfile, dict()
+
+
+def impi_prep_resources(resources: Resources) -> Tuple[str, Dict[str, str]]:
+    """Create resource description for Intel MPI mpirun
+
+    Args:
+        resources: The resources to describe
+
+    Return:
+        The contents of the machinefile, and a set of environment variables
+    """
+    # I_MPI_PIN_PROCESSOR_LIST=0,1,5,6
+    # pins rank 0 to core 0, rank 1 to core 1, rank 2 to core 5, rank 3 to core 6
+    raise NotImplementedError()
+
+
+def mpich_prep_resources(resources: Resources) -> Tuple[str, Dict[str, str]]:
+    """Create resource description for MPICH mpirun
+
+    Args:
+        resources: The resources to describe
+
+    Return:
+        The contents of the machinefile, and a set of environment variables
+    """
+    # No env vars, but rankfile
+    raise NotImplementedError()
+
+
+def srun_prep_resources(resources: Resources) -> Tuple[str, Dict[str, str]]:
+    """Create resource description for srun
+
+    Args:
+        resources: The resources to describe
+
+    Return:
+        The contents of the hostfile, and a set of environment variables
+    """
+    # SLURM_HOSTFILE to point to the rankfile
+    # CPU_BIND=verbose,mask_cpu=0x01,0x02,0x04,0x01 to specify cores 0,1,2,0 for ranks
+    # 0-3
+    raise NotImplementedError()
+
+
+def prep_resources(
+        model: ExecutionModel, resources: Resources
+        ) -> Tuple[str, Dict[str, str]]:
+    """Create resource description for the given execution model.
+
+    Args:
+        model: The execution model to generate a description for
+        resources: The resources to describe
+
+    Return:
+        The contents of the rank/machine/hostfile, and a set of environment variables.
+    """
+    if model == ExecutionModel.DIRECT:
+        return '', dict()
+    elif model == ExecutionModel.OPENMPI:
+        return openmpi_prep_resources(resources)
+    elif model == ExecutionModel.INTELMPI:
+        return impi_prep_resources(resources)
+    elif model == ExecutionModel.SRUNMPI:
+        return srun_prep_resources(resources)
+    # elif model == ExecutionModel.MPICH:
+    #     return mpich_prep_resources(resources)
+    raise RuntimeError(
+            f'Impossible execution model {model}, please create an issue on GitHub')
+
+
+def num_mpi_tasks(res_req: ResourceRequirements) -> int:
+    """Determine the number of MPI tasks to be started.
+
+    For non-MPI resource requirements, returns 1.
+
+    Args:
+        res_req: Resource requirements to analyse.
+    """
+    if isinstance(res_req, ThreadedResReq):
+        return 1
+    elif isinstance(res_req, MPICoresResReq):
+        return res_req.mpi_processes
+    elif isinstance(res_req, MPINodesResReq):
+        return res_req.nodes * res_req.mpi_processes_per_node
+    raise RuntimeError('Invalid ResourceRequirements')
+
+
+def local_command(implementation: Implementation) -> str:
+    """Make a format string for the command to run.
+
+    This interprets the execution_model and produces an appropriate shell command to
+    start the implementation. This function produces commands for running locally:
+    pinning is disabled and there's only one node.
+
+    Args:
+        implementation: The implementation to start.
+
+    Return:
+        A format string with embedded {ntasks} and {rankfile}.
+    """
+    if implementation.execution_model == ExecutionModel.DIRECT:
+        fstr = '{command} {args}'
+    elif implementation.execution_model == ExecutionModel.OPENMPI:
+        # Native name is orterun for older and prterun for newer OpenMPI.
+        # So we go with mpirun, which works for either.
+        fstr = 'mpirun -np {{ntasks}} --oversubscribe {command} {args}'
+    elif implementation.execution_model == ExecutionModel.INTELMPI:
+        fstr = 'mpirun -n {{ntasks}} {command} {args}'
+    elif implementation.execution_model == ExecutionModel.SRUNMPI:
+        fstr = 'srun -n {{ntasks}} -m arbitrary {command} {args}'
+    # elif implementation.execution_model == ExecutionModel.MPICH
+    #    fstr = 'mpiexec -n {{ntasks}} {command} {args}'
+
+    if implementation.args is None:
+        args = ''
+    elif isinstance(implementation.args, str):
+        args = implementation.args
+    elif isinstance(implementation.args, list):
+        args = ' '.join(implementation.args)
+
+    return fstr.format(
+            command=implementation.executable,
+            args=args
+            )
+
+
+def cluster_command(implementation: Implementation) -> str:
+    """Make a format string for the command to run.
+
+    This interprets the execution_model and produces an appropriate shell command to
+    start the implementation. This function produces commands for running on a cluster,
+    with processes distributed across nodes and CPU pinning enabled.
+
+    Args:
+        implementation: The implementation to start.
+
+    Return:
+        A format string with embedded {ntasks} and {rankfile}.
+    """
+    if implementation.execution_model == ExecutionModel.DIRECT:
+        fstr = '{command} {args}'
+    elif implementation.execution_model == ExecutionModel.OPENMPI:
+        # Native name is orterun for older and prterun for newer OpenMPI.
+        # So we go with mpirun, which works for either.
+        fstr = (
+                'mpirun -v -np {{ntasks}}'
+                ' -d --debug-daemons'
+                ' --rankfile {{rankfile}} --oversubscribe'
+                # ' --map-by rankfile:file={{rankfile}}:oversubscribe'
+                ' --display-map --display-allocation {command} {args}')
+                # ' --bind-to core --display-map --display-allocation {command} {args}')
+    elif implementation.execution_model == ExecutionModel.INTELMPI:
+        fstr = 'mpirun -n {{ntasks}} -machinefile {{rankfile}} {command} {args}'
+    elif implementation.execution_model == ExecutionModel.SRUNMPI:
+        fstr = 'srun -n {{ntasks}} -m arbitrary {command} {args}'
+    # elif implementation.execution_model == ExecutionModel.MPICH
+    #    fstr = 'mpiexec -n {{ntasks}} -f {{rankfile}} {command} {args}'
+
+    if implementation.args is None:
+        args = ''
+    elif isinstance(implementation.args, str):
+        args = implementation.args
+    elif isinstance(implementation.args, list):
+        args = ' '.join(implementation.args)
+
+    return fstr.format(
+            command=implementation.executable,
+            args=args
+            )
+
+
+def make_script(
+        implementation: Implementation, res_req: ResourceRequirements,
+        local: bool, rankfile: Optional[Path] = None) -> str:
+    """Make a launch script for a given implementation.
+
+    Args:
+        implementation: The implementation to launch
+        res_req: The job's resource requirements
+        local: Whether this is to run locally (True) or on a cluster (False)
+        rankfile: Location of the rankfile, if any
+
+    Return:
+        A string with embedded newlines containing the shell script.
+    """
+    lines: List[str] = list()
+
+    lines.append('#!/bin/bash')
+    lines.append('')
+
+    # The environment is passed when starting the script, rather than as a set of
+    # export statements here.
+
+    if implementation.modules:
+        if isinstance(implementation.modules, str):
+            lines.append(f'module load {implementation.modules}')
+        else:
+            for module in implementation.modules:
+                lines.append(f'module load {module}')
+        lines.append('')
+
+    if implementation.virtual_env:
+        lines.append(f'. {implementation.virtual_env}/bin/activate')
+        lines.append('')
+
+    if local:
+        cmd = local_command(implementation)
+    else:
+        cmd = cluster_command(implementation)
+
+    ntasks = num_mpi_tasks(res_req)
+    lines.append(cmd.format(ntasks=ntasks, rankfile=rankfile))
+
+    lines.append('')
+
+    return '\n'.join(lines)
diff --git a/libmuscle/python/libmuscle/native_instantiator/slurm.py b/libmuscle/python/libmuscle/native_instantiator/slurm.py
new file mode 100644
index 00000000..59258cc9
--- /dev/null
+++ b/libmuscle/python/libmuscle/native_instantiator/slurm.py
@@ -0,0 +1,280 @@
+from itertools import product
+import logging
+import os
+from parsimonious import Grammar, NodeVisitor
+from parsimonious.nodes import Node
+from typing import Any, cast, List, Sequence, Tuple
+
+
+_logger = logging.getLogger(__name__)
+
+
+_node_range_expression_grammar = Grammar(
+        """
+        nre = nre_parts ("," nre_parts)*
+        nre_parts = nre_part+
+        nre_part = identifier ("[" index_set "]")?
+        index_set = index_range ("," index_range)*
+        index_range = integer ("-" integer)?
+        identifier = ~"[a-z 0-9 _-]+"i
+        integer = padded_int / int
+        int = ~"[0-9]+"
+        padded_int = ~"0[0-9]+"
+        """
+        )
+
+
+class NREVisitor(NodeVisitor):
+    """Processes a parsed NRE and produces a list of nodes.
+
+    Node range expressions are used by SLURM to describe collections of nodes. See
+    parse_slurm_nodelist() below.
+    """
+    def visit_nre(
+            self, node: Node,
+            visited_children: Tuple[List[str], Sequence[Tuple[Any, List[str]]]]
+            ) -> List[str]:
+        """Return a list of nodes corresponding to the NRE."""
+        nodes = visited_children[0].copy()
+        for _, more_nodes in visited_children[1]:
+            nodes.extend(more_nodes)
+        return nodes
+
+    def visit_nre_parts(
+            self, node: Node, visited_children: Sequence[Tuple[str, List[str]]]
+            ) -> List[str]:
+        """Return a list of node ids for the part."""
+        fmt = ''.join([c[0] + '{}' for c in visited_children])
+        index_lists = [c[1] for c in visited_children]
+        return [fmt.format(*idxs) for idxs in product(*index_lists)]
+
+    def visit_nre_part(
+            self, node: Node, visited_children: Tuple[
+                str, Sequence[Tuple[Any, List[str], Any]]]
+            ) -> Tuple[str, List[str]]:
+        """Return the identifier part and a list of indexes for the set."""
+        identifier = visited_children[0]
+        if not visited_children[1]:
+            index_set = ['']
+        else:
+            index_set = visited_children[1][0][1]
+        return identifier, index_set
+
+    def visit_index_set(
+            self, node: Node,
+            visited_children: Tuple[List[str], Sequence[Tuple[Any, List[str]]]]
+            ) -> List[str]:
+        """Return a list of indexes corresponding to the set."""
+        indexes = visited_children[0].copy()
+        for _, more_indexes in visited_children[1]:
+            indexes.extend(more_indexes)
+        return indexes
+
+    def visit_index_range(
+            self, node: Node,
+            visited_children: Tuple[
+                Tuple[int, int],
+                Sequence[
+                    Tuple[Any, Tuple[int, int]]
+                    ]]
+            ) -> List[str]:
+        """Return a list of indexes corresponding to the range."""
+
+        def format_str(width: int) -> str:
+            if width == -1:
+                return '{}'
+            return f'{{:0{width}}}'
+
+        start_value, width = visited_children[0]
+        if visited_children[1]:
+            end_value, _ = visited_children[1][0][1]
+            fmt = format_str(width)
+            return [fmt.format(i) for i in range(start_value, end_value + 1)]
+
+        fmt = format_str(width)
+        return [fmt.format(start_value)]
+
+    def visit_identifier(self, node: Node, _: Sequence[Any]) -> str:
+        return node.text
+
+    def visit_integer(
+            self, node: Node, visited_children: Sequence[Tuple[int, int]]
+            ) -> Tuple[int, int]:
+        """Returns the value of the int, and a field width or -1."""
+        return visited_children[0]
+
+    def visit_int(self, node: Node, _: Sequence[Any]) -> Tuple[int, int]:
+        """Returns the value and a field width of -1."""
+        return int(node.text), -1
+
+    def visit_padded_int(self, node: Node, _: Sequence[Any]) -> Tuple[int, int]:
+        """Returns the value of the int and the field width."""
+        return int(node.text), len(node.text)
+
+    def generic_visit(
+            self, node: Node, visited_children: Sequence[Any]) -> Sequence[Any]:
+        return visited_children
+
+
+_nre_visitor = NREVisitor()
+
+
+def parse_slurm_nodelist(s: str) -> List[str]:
+    """Parse a SLURM node range expression and produce node names.
+
+    Exactly what the syntax is for a "node range expression" isn't entirely
+    clear. Some examples are given throughout the documentation:
+
+    linux[00-17]
+    lx[10-20]
+    tux[2,1-2]
+    tux[1-2,2]
+    tux[1-3]
+    linux[0-64,128]
+    alpha,beta,gamma
+    lx[15,18,32-33]
+    linux[0000-1023]
+    rack[0-63]_blade[0-41]
+
+    unit[0-31]rack is invalid
+
+    If a range uses leading zeros, then so should the generated indexes.
+    See _node_range_expression_grammar above for my best guess at the
+    correct grammar.
+
+    This function takes a string containing an NRE and returns the
+    corresponding list of node names.
+    """
+    ast = _node_range_expression_grammar.parse(s)
+    return cast(List[str], _nre_visitor.visit(ast))
+
+
+_nodes_cores_expression_grammar = Grammar(
+        """
+        nce = nce_run ("," nce_run)*
+        nce_run = int ("(" run_length ")")?
+        run_length = "x" int
+        int = ~"[0-9]+"
+        """
+        )
+
+
+class NCEVisitor(NodeVisitor):
+    """Processes a parsed NCE and produces a list of cpu counts per node.
+
+    Nodes cores expressions are used by SLURM to describe cores on a collection of
+    nodes.  See parse_slurm_nodes_cores() below.
+    """
+    def visit_nce(
+            self, node: Node,
+            visited_children: Tuple[List[int], Sequence[Tuple[Any, List[int]]]]
+            ) -> List[int]:
+        """Return a list of nodes corresponding to the NRE."""
+        nodes_cores = visited_children[0].copy()
+        for _, more_nodes_cores in visited_children[1]:
+            nodes_cores.extend(more_nodes_cores)
+        return nodes_cores
+
+    def visit_nce_run(
+            self, node: Node,
+            visited_children: Tuple[int, Sequence[Tuple[Any, int, Any]]]
+            ) -> List[int]:
+        """Return a list of core counts produced by this run."""
+        num_cores = visited_children[0]
+        result = [num_cores]
+
+        if visited_children[1]:
+            result *= visited_children[1][0][1]
+
+        return result
+
+    def visit_run_length(
+            self, node: Node, visited_children: Tuple[str, int]) -> int:
+        """Return the number of repetitions."""
+        return visited_children[1]
+
+    def visit_int(self, node: Node, _: Sequence[Any]) -> int:
+        """Returns the value as an int"""
+        return int(node.text)
+
+    def generic_visit(
+            self, node: Node, visited_children: Sequence[Any]) -> Sequence[Any]:
+        return visited_children
+
+
+_nce_visitor = NCEVisitor()
+
+
+def parse_slurm_nodes_cores(s: str) -> List[int]:
+    """Parse a SLURM nodes cores expression and produce node names.
+
+    The sbatch documentation page describes the format under
+    SLURM_JOB_CPUS_PER_NODE as CPU_count[(xnumber_of_nodes)][,CPU_count
+    [(xnumber_of_nodes)] ...]. and gives the example of '72(x2),36' describing a set of
+    three nodes, the first two with 72 cores and the third with 36.
+
+    See _nodes_cores_expression_grammar above for the corresponding grammar.
+
+    This function takes a string containing an NCE and returns the corresponding list of
+    node names.
+    """
+    ast = _nodes_cores_expression_grammar.parse(s)
+    return cast(List[int], _nce_visitor.visit(ast))
+
+
+def in_slurm_allocation() -> bool:
+    """Check whether we're in a SLURM allocation.
+
+    Returns true iff SLURM was detected.
+    """
+    return 'SLURM_JOB_ID' in os.environ
+
+
+def get_nodes() -> List[str]:
+    """Get a list of node names from SLURM_JOB_NODELIST.
+
+    This inspects SLURM_JOB_NODELIST or SLURM_NODELIST and returns an
+    expanded list of node names.
+
+    If SLURM_JOB_NODELIST is "node[020-023]" then this returns
+    ["node020", "node021", "node022", "node023"].
+    """
+    nodelist = os.environ.get('SLURM_JOB_NODELIST')
+    if not nodelist:
+        nodelist = os.environ.get('SLURM_NODELIST')
+    if not nodelist:
+        raise RuntimeError('SLURM_(JOB_)NODELIST not set, are we running locally?')
+
+    _logger.debug(f'SLURM node list: {nodelist}')
+
+    return parse_slurm_nodelist(nodelist)
+
+
+def get_cores_per_node() -> List[int]:
+    """Return the number of CPU cores per node.
+
+    This returns a list with the number of cores of each node in the result of
+    get_nodes(), which gets read from SLURM_JOB_CPUS_PER_NODE.
+    """
+    sjcpn = os.environ.get('SLURM_JOB_CPUS_PER_NODE')
+    _logger.debug(f'SLURM_JOB_CPUS_PER_NODE: {sjcpn}')
+
+    if sjcpn:
+        return parse_slurm_nodes_cores(sjcpn)
+    else:
+        scon = os.environ.get('SLURM_CPUS_ON_NODE')
+        _logger.debug(f'SLURM_CPUS_ON_NODE: {scon}')
+
+        snn = os.environ.get('SLURM_JOB_NUM_NODES')
+        if not snn:
+            snn = os.environ.get('SLURM_NNODES')
+        _logger.debug(f'SLURM num nodes: {snn}')
+
+        if scon and snn:
+            return [int(scon)] * int(snn)
+
+    raise RuntimeError(
+            'SLURM_JOB_CPUS_PER_NODE is not set in the environment, and also'
+            ' SLURM_CPUS_ON_NODE is missing or neither SLURM_JOB_NUM_NODES nor'
+            ' SLURM_NNODES is set. Please create an issue on GitHub with the output'
+            ' of "sbatch --version" on this cluster.')
diff --git a/libmuscle/python/libmuscle/native_instantiator/test/test_process_manager.py b/libmuscle/python/libmuscle/native_instantiator/test/test_process_manager.py
new file mode 100644
index 00000000..93dabcfb
--- /dev/null
+++ b/libmuscle/python/libmuscle/native_instantiator/test/test_process_manager.py
@@ -0,0 +1,120 @@
+from time import monotonic, sleep
+
+import pytest
+
+from libmuscle.native_instantiator.process_manager import ProcessManager
+
+
+@pytest.fixture
+def lpm():
+    return ProcessManager()
+
+
+def _poll_completion(lpm, num_jobs):
+    completed_jobs = list()
+    while len(completed_jobs) < num_jobs:
+        done = lpm.get_finished()
+        while not done:
+            sleep(0.1)
+            done = lpm.get_finished()
+        completed_jobs.extend(done)
+
+    return completed_jobs
+
+
+def test_run_process(lpm, tmp_path):
+    lpm.start(
+            'test', tmp_path, ['bash', '-c', 'exit 0'], {},
+            tmp_path / 'out', tmp_path / 'err')
+    completed_jobs = _poll_completion(lpm, 1)
+    assert completed_jobs[0] == ('test', 0)
+
+
+def test_existing_process(lpm, tmp_path):
+    lpm.start(
+            'test', tmp_path, ['bash', '-c', 'exit 0'], {},
+            tmp_path / 'out', tmp_path / 'err')
+    with pytest.raises(RuntimeError):
+        lpm.start(
+                'test', tmp_path, ['bash', '-c', 'exit 0'], {},
+                tmp_path / 'out', tmp_path / 'err')
+
+    completed_jobs = _poll_completion(lpm, 1)
+
+    assert completed_jobs[0] == ('test', 0)
+
+
+def test_env(lpm, tmp_path):
+    env = {'ENVVAR': 'TESTING123'}
+    lpm.start(
+            'test', tmp_path, ['bash', '-c', 'echo ${ENVVAR}'], env,
+            tmp_path / 'out', tmp_path / 'err')
+    _poll_completion(lpm, 1)
+
+    with (tmp_path / 'out').open('r') as f:
+        lines = f.readlines()
+
+    assert lines[0] == 'TESTING123\n'
+
+
+def test_exit_code(lpm, tmp_path):
+    lpm.start(
+            'test_exit_code', tmp_path, ['bash', '-c', 'exit 3'], {},
+            tmp_path / 'out', tmp_path / 'err')
+    done = lpm.get_finished()
+    while not done:
+        sleep(0.02)
+        done = lpm.get_finished()
+
+    assert done[0] == ('test_exit_code', 3)
+
+
+def test_multiple(lpm, tmp_path):
+    for i in range(3):
+        lpm.start(
+                f'test_{i}', tmp_path, ['bash', '-c', 'sleep 1'], {},
+                tmp_path / f'out{i}', tmp_path / f'err{i}')
+
+    completed_jobs = _poll_completion(lpm, 3)
+
+    assert sorted(completed_jobs) == [('test_0', 0), ('test_1', 0), ('test_2', 0)]
+
+
+def test_cancel_all(lpm, tmp_path):
+    begin_time = monotonic()
+
+    for i in range(2):
+        lpm.start(
+                f'test_{i}', tmp_path, ['bash', '-c', 'sleep 1'], {},
+                tmp_path / f'out{i}', tmp_path / f'err{i}')
+
+    lpm.cancel_all()
+
+    completed_jobs = _poll_completion(lpm, 2)
+
+    end_time = monotonic()
+
+    assert sorted(completed_jobs) == [('test_0', -9), ('test_1', -9)]
+    assert end_time - begin_time < 1.0
+
+
+def test_output_redirect(lpm, tmp_path):
+    lpm.start(
+            'test', tmp_path, ['bash', '-c', 'ls'], {},
+            tmp_path / 'out', tmp_path / 'err')
+    _poll_completion(lpm, 1)
+    with (tmp_path / 'out').open('r') as f:
+        assert f.readlines()
+    with (tmp_path / 'err').open('r') as f:
+        assert f.readlines() == []
+
+
+def test_error_redirect(lpm, tmp_path):
+    lpm.start(
+            'test', tmp_path, ['bash', '-c', 'ls 1>&2'], {},
+            tmp_path / 'out', tmp_path / 'err')
+    _poll_completion(lpm, 1)
+    with (tmp_path / 'out').open('r') as f:
+        assert f.readlines() == []
+    with (tmp_path / 'err').open('r') as f:
+        assert f.readlines()
diff --git a/libmuscle/python/libmuscle/native_instantiator/test/test_slurm.py b/libmuscle/python/libmuscle/native_instantiator/test/test_slurm.py
new file mode 100644
index 00000000..d3610b65
--- /dev/null
+++ b/libmuscle/python/libmuscle/native_instantiator/test/test_slurm.py
@@ -0,0 +1,72 @@
+from libmuscle.native_instantiator.slurm import (
+        parse_slurm_nodelist, parse_slurm_nodes_cores)
+
+import pytest
+
+
+NRES_ = [
+        # from various bits of SLURM documentation
+        (
+            'linux[00-17]', [
+                'linux00', 'linux01', 'linux02', 'linux03', 'linux04', 'linux05',
+                'linux06', 'linux07', 'linux08', 'linux09', 'linux10', 'linux11',
+                'linux12', 'linux13', 'linux14', 'linux15', 'linux16', 'linux17']),
+        (
+            'lx[10-20]', [
+                'lx10', 'lx11', 'lx12', 'lx13', 'lx14', 'lx15', 'lx16', 'lx17', 'lx18',
+                'lx19', 'lx20']),
+        ('tux[2,1-2]', ['tux2', 'tux1', 'tux2']),
+        ('tux[1-2,2]', ['tux1', 'tux2', 'tux2']),
+        ('tux[1-3]', ['tux1', 'tux2', 'tux3']),
+        (
+            'linux[0-64,128]', [
+                'linux0', 'linux1', 'linux2', 'linux3', 'linux4', 'linux5', 'linux6',
+                'linux7', 'linux8', 'linux9', 'linux10', 'linux11', 'linux12',
+                'linux13', 'linux14', 'linux15', 'linux16', 'linux17', 'linux18',
+                'linux19', 'linux20', 'linux21', 'linux22', 'linux23', 'linux24',
+                'linux25', 'linux26', 'linux27', 'linux28', 'linux29', 'linux30',
+                'linux31', 'linux32', 'linux33', 'linux34', 'linux35', 'linux36',
+                'linux37', 'linux38', 'linux39', 'linux40', 'linux41', 'linux42',
+                'linux43', 'linux44', 'linux45', 'linux46', 'linux47', 'linux48',
+                'linux49', 'linux50', 'linux51', 'linux52', 'linux53', 'linux54',
+                'linux55', 'linux56', 'linux57', 'linux58', 'linux59', 'linux60',
+                'linux61', 'linux62', 'linux63', 'linux64', 'linux128']),
+        ('alpha,beta,gamma', ['alpha', 'beta', 'gamma']),
+        ('lx[15,18,32-33]', ['lx15', 'lx18', 'lx32', 'lx33']),
+        ('linux[0000-1023]', [f'linux{i:04}' for i in range(1024)]),
+        (
+            'rack[0-63]_blade[0-41]', [
+                f'rack{i}_blade{j}' for i in range(64) for j in range(42)]),
+        # my additions
+        ('linux', ['linux']),
+        ('linux[0]', ['linux0']),
+        ('linux[0,1]', ['linux0', 'linux1']),
+        ('linux[0-2]', ['linux0', 'linux1', 'linux2']),
+        (
+            'rack[00-12,14]_blade[0-2],alpha,tux[1-3,6]', (
+                [f'rack{i:02}_blade{j}' for i in range(13) for j in range(3)] + [
+                    'rack14_blade0', 'rack14_blade1', 'rack14_blade2', 'alpha',
+                    'tux1', 'tux2', 'tux3', 'tux6'])),
+        ('node-0', ['node-0']),
+        ('node-[0-3]', ['node-0', 'node-1', 'node-2', 'node-3']),
+        ]
+
+
+@pytest.mark.parametrize('nre,expected', NRES_)
+def test_parse_slurm_nodelist(nre, expected):
+    assert parse_slurm_nodelist(nre) == expected
+
+
+NCES_ = [
+        ('8', [8]),
+        ('8(x2)', [8, 8]),
+        ('16,24', [16, 24]),
+        ('16,24(x3)', [16, 24, 24, 24]),
+        ('1(x1),2', [1, 2]),
+        ('72(x2),36', [72, 72, 36])
+        ]
+
+
+@pytest.mark.parametrize('nce,expected', NCES_)
+def test_parse_slurm_nodes_cores(nce, expected):
+    assert parse_slurm_nodes_cores(nce) == expected
diff --git a/setup.py b/setup.py
index a8d3fda7..d31fa790 100644
--- a/setup.py
+++ b/setup.py
@@ -57,6 +57,7 @@
         'matplotlib>=3,<4',
         'msgpack>=1,<2',
         'psutil>=5.0.0',
+        'parsimonious',
         "numpy>=1.22",
         'qcg-pilotjob==0.13.1',
         'typing_extensions>=4.4.0,<5',
diff --git a/tox.ini b/tox.ini
index 6291e5f5..970daf2d 100644
--- a/tox.ini
+++ b/tox.ini
@@ -4,11 +4,13 @@ skip_missing_interpreters = true
 
 [testenv]
 deps =
+    cerulean                # not actually used for these non-cluster tests
     flake8
     mypy
     pytest
     pytest-cov
     requests                # missing dependency in cerulean...
+    types-parsimonious
     types-psutil
     ymmsl
 
@@ -30,6 +32,7 @@ deps =
     pytest
     pytest-cov
     requests                # missing dependency in cerulean...
+    types-parsimonious
     types-psutil
     ymmsl
 
@@ -37,8 +40,8 @@ setenv =
     MUSCLE_TEST_CLUSTER=1
 
 commands =
-    pytest -k 'test_cluster' {posargs}
-    # pytest --log-cli-level=DEBUG -s -k 'test_cluster' {posargs}
+    pytest -k 'test_cluster' --log-disable=paramiko.transport {posargs}
+    # pytest --log-cli-level=DEBUG --log-disable=paramiko.transport --log-disable=paramiko.transport.sftp --log-disable=cerulean.copy_files -s -k 'test_cluster' {posargs}
 
 
 [gh-actions]

From 8776e99476bd685785eff83a091665979c24bbf5 Mon Sep 17 00:00:00 2001
From: Lourens Veen <l.veen@esciencecenter.nl>
Date: Fri, 20 Sep 2024 20:46:20 +0200
Subject: [PATCH 09/49] Refactor cluster tests into subdirectory

---
 integration_test/cluster_test/__init__.py     |   0
 integration_test/cluster_test/conftest.py     | 200 +++++++++++++++++
 integration_test/cluster_test/test_cluster.py | 206 +++++++++++++++++
 integration_test/conftest.py                  |   4 -
 integration_test/test_cluster.py              | 207 ------------------
 5 files changed, 406 insertions(+), 211 deletions(-)
 create mode 100644 integration_test/cluster_test/__init__.py
 create mode 100644 integration_test/cluster_test/conftest.py
 create mode 100644 integration_test/cluster_test/test_cluster.py
 delete mode 100644 integration_test/test_cluster.py

diff --git a/integration_test/cluster_test/__init__.py b/integration_test/cluster_test/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/integration_test/cluster_test/conftest.py b/integration_test/cluster_test/conftest.py
new file mode 100644
index 00000000..a4f5cba4
--- /dev/null
+++ b/integration_test/cluster_test/conftest.py
@@ -0,0 +1,200 @@
+import logging
+import os
+from pathlib import Path
+from tempfile import TemporaryDirectory
+import time
+
+import cerulean
+import pytest
+
+
+logger_ = logging.getLogger(__name__)
+
+
+REMOTE_SHARED = '/home/cerulean/shared'
+
+
+skip_unless_cluster = pytest.mark.skipif(
+        'MUSCLE_TEST_CLUSTER' not in os.environ,
+        reason='Cluster tests were not explicitly enabled')
+
+
+def run_cmd(term, timeout, command):
+    exit_code, out, err = term.run(timeout, command, [])
+    if exit_code != 0:
+        logger_.error(err)
+    assert exit_code == 0
+    return out
+
+
+@pytest.fixture(scope='session')
+def local_term():
+    return cerulean.LocalTerminal()
+
+
+@pytest.fixture(scope='session')
+def local_fs():
+    return cerulean.LocalFileSystem()
+
+
+@pytest.fixture(scope='session')
+def fake_cluster_image(local_term):
+    IMAGE_NAME = 'muscle3_test_cluster'
+    run_cmd(local_term, 5400, (
+        f'docker buildx build -t {IMAGE_NAME}'
+        ' -f integration_test/fake_cluster/Dockerfile .'))
+    return IMAGE_NAME
+
+
+def ssh_term(timeout_msg):
+    cred = cerulean.PasswordCredential('cerulean', 'kingfisher')
+    ready = False
+    start = time.monotonic()
+    while not ready:
+        if (time.monotonic() - start) > 60.0:
+            raise Exception(timeout_msg)
+
+        try:
+            term = cerulean.SshTerminal('localhost', 10022, cred)
+            ready = True
+        except Exception:
+            time.sleep(3.0)
+
+    return term
+
+
+@pytest.fixture(scope='session')
+def shared_dir():
+    # Note that pytest's tmp_path is function-scoped, so cannot be used here
+    with TemporaryDirectory(ignore_cleanup_errors=True) as tmp_dir:
+        path = Path(tmp_dir)
+        path.chmod(0o1777)
+        yield path
+
+
+@pytest.fixture(scope='session')
+def cleanup_docker(local_term):
+    for i in range(5):
+        node_name = f'muscle3-node-{i}'
+        run_cmd(local_term, 60, f'docker rm -f {node_name}')
+
+    run_cmd(local_term, 60, 'docker rm -f muscle3-headnode')
+    run_cmd(local_term, 60, 'docker network rm -f muscle3-net')
+
+
+@pytest.fixture(scope='session')
+def fake_cluster_network(local_term, cleanup_docker):
+    name = 'muscle3-net'
+    run_cmd(local_term, 60, f'docker network create {name}')
+    yield name
+    run_cmd(local_term, 60, 'docker network rm -f muscle3-net')
+
+
+@pytest.fixture(scope='session')
+def fake_cluster_nodes(
+        local_term, fake_cluster_image, fake_cluster_network, shared_dir):
+
+    node_names = list()
+
+    for i in range(5):
+        node_name = f'muscle3-node-{i}'
+        ssh_port = 10030 + i
+
+        run_cmd(local_term, 60, (
+            f'docker run -d --name={node_name} --hostname={node_name}'
+            f' --network={fake_cluster_network} -p {ssh_port}:22'
+            f' --mount type=bind,source={shared_dir},target={REMOTE_SHARED}'
+            f' {fake_cluster_image}'))
+
+        node_names.append(node_name)
+
+    yield None
+
+    run_cmd(local_term, 60, f'docker rm -f {" ".join(node_names)}')
+
+
+@pytest.fixture(scope='session')
+def fake_cluster_headnode(
+        local_term, fake_cluster_image, fake_cluster_network, fake_cluster_nodes,
+        shared_dir):
+
+    run_cmd(local_term, 60, (
+        'docker run -d --name=muscle3-headnode --hostname=muscle3-headnode'
+        f' --network={fake_cluster_network} -p 10022:22'
+        f' --mount type=bind,source={shared_dir},target={REMOTE_SHARED}'
+        f' {fake_cluster_image}'))
+
+    ssh_term('Virtual cluster container start timed out')
+    yield None
+
+    run_cmd(local_term, 60, 'docker rm -f muscle3-headnode')
+
+
+@pytest.fixture(scope='session')
+def setup_connection(fake_cluster_headnode):
+    # Session-wide connection used for container setup actions only
+    # Tests each have their own connection, see fake_cluster() below
+    term = ssh_term('Connection to virtual cluster container timed out')
+    with cerulean.SftpFileSystem(term, True) as fs:
+        yield term, fs
+
+    # We abuse this to clean up the contents of the shared directory.
+    # Because it's been made inside of the container, it has a different owner
+    # than what we're running with on the host, and the host user cannot remove
+    # the files.
+
+    run_cmd(term, 60, f'rm -rf {REMOTE_SHARED}/*')
+
+
+@pytest.fixture(scope='session')
+def repo_root(local_fs):
+    root_dir = Path(__file__).parents[2]
+    return local_fs / str(root_dir)
+
+
+@pytest.fixture(scope='session')
+def remote_source(repo_root, setup_connection):
+    remote_term, remote_fs = setup_connection
+
+    muscle3_tgt = remote_fs / 'home' / 'cerulean' / 'muscle3'
+    muscle3_tgt.mkdir()
+    (muscle3_tgt / 'libmuscle').mkdir()
+
+    for f in (
+            'muscle3', 'libmuscle', 'scripts', 'docs', 'setup.py', 'Makefile',
+            'MANIFEST.in', 'LICENSE', 'NOTICE', 'VERSION', 'README.rst'):
+        cerulean.copy(
+                repo_root / f, muscle3_tgt / f, overwrite='always', copy_into=False)
+
+    return muscle3_tgt
+
+
+@pytest.fixture(scope='session')
+def muscle3_venv(repo_root, remote_source, setup_connection):
+    remote_term, remote_fs = setup_connection
+
+    run_cmd(remote_term, 10, f'python3 -m venv {REMOTE_SHARED}/venv')
+    in_venv = f'source {REMOTE_SHARED}/venv/bin/activate && '
+
+    run_cmd(remote_term, 30, (
+        f'/bin/bash -c "{in_venv} python3 -m pip install pip wheel setuptools"'))
+
+    run_cmd(remote_term, 60, f'/bin/bash -c "{in_venv} pip install {remote_source}"')
+    return in_venv
+
+
+@pytest.fixture(scope='session')
+def muscle3_native_openmpi(remote_source, setup_connection):
+    remote_term, remote_fs = setup_connection
+
+    prefix = remote_fs / REMOTE_SHARED / 'muscle3-openmpi'
+    prefix.mkdir()
+
+    run_cmd(remote_term, 600, (
+        f'/bin/bash -l -c "'
+        f'module load openmpi && '
+        f'cd {remote_source} && '
+        f'make distclean && '
+        f'PREFIX={prefix} make install"'))
+
+    return prefix
diff --git a/integration_test/cluster_test/test_cluster.py b/integration_test/cluster_test/test_cluster.py
new file mode 100644
index 00000000..d9b1d85f
--- /dev/null
+++ b/integration_test/cluster_test/test_cluster.py
@@ -0,0 +1,206 @@
+import cerulean
+import logging
+import pytest
+
+from integration_test.cluster_test.conftest import (
+        REMOTE_SHARED, run_cmd, ssh_term, skip_unless_cluster)
+
+
+logger_ = logging.getLogger(__name__)
+
+
+@pytest.fixture(scope='session')
+def copy_test_files(repo_root, setup_connection):
+    remote_term, remote_fs = setup_connection
+    remote_home = remote_fs / REMOTE_SHARED
+
+    cerulean.copy(
+            repo_root / 'integration_test' / 'cluster_test', remote_home,
+            copy_permissions=True)
+
+    return remote_home / 'cluster_test'
+
+
+@pytest.fixture(scope='session')
+def build_native_components(
+        muscle3_native_openmpi, setup_connection, copy_test_files):
+    remote_term, remote_fs = setup_connection
+    remote_source = copy_test_files
+
+    run_cmd(remote_term, 30, (
+        f"/bin/bash -l -c '"
+        f"module load openmpi && "
+        f". {muscle3_native_openmpi}/bin/muscle3.env && "
+        f"make -C {remote_source}'"))
+
+
+@pytest.fixture
+def fake_cluster(
+        fake_cluster_headnode, muscle3_venv, build_native_components, copy_test_files):
+    term = ssh_term('Connection to virtual cluster container timed out')
+    with cerulean.SftpFileSystem(term, True) as fs:
+        local_sched = cerulean.DirectGnuScheduler(term)
+        slurm_sched = cerulean.SlurmScheduler(term)
+        yield term, fs, local_sched, slurm_sched
+
+
+@pytest.fixture
+def remote_home(fake_cluster):
+    remote_fs = fake_cluster[1]
+    return remote_fs / REMOTE_SHARED
+
+
+@pytest.fixture
+def remote_test_files(remote_home):
+    return remote_home / 'cluster_test'
+
+
+@pytest.fixture
+def remote_out_dir(remote_home):
+    return remote_home / 'test_results'
+
+
+def _make_job(name, mode, remote_test_files, remote_out_dir):
+    job_dir = remote_out_dir / f'test_{name}_{mode}'
+    job_dir.mkdir(0o755, True, True)
+
+    job = cerulean.JobDescription()
+    job.name = name
+    job.working_directory = job_dir
+    job.command = str(remote_test_files / f'{name}.sh')
+    job.stdout_file = job_dir / 'stdout.txt'
+    job.stderr_file = job_dir / 'stderr.txt'
+    job.queue_name = 'debug'
+    job.time_reserved = 60
+    job.system_out_file = job_dir / 'sysout.txt'
+    job.system_err_file = job_dir / 'syserr.txt'
+
+    return job
+
+
+def _sched(fake_cluster, mode):
+    if mode == 'local':
+        return fake_cluster[2]
+    else:
+        return fake_cluster[3]
+
+
+def run_cmd_dir(remote_out_dir, testname, mode):
+    results_name = f'test_{testname}_{mode}'
+
+    for p in (remote_out_dir / results_name).iterdir():
+        if p.name.startswith('run_'):
+            return p
+
+
+def _get_stdout(remote_out_dir, testname, mode, instance):
+    run_dir = run_cmd_dir(remote_out_dir, testname, mode)
+    stdout_file = run_dir / 'instances' / instance / 'stdout.txt'
+    assert stdout_file.exists()     # test output redirection
+    return stdout_file.read_text()
+
+
+def _get_outfile(remote_out_dir, testname, mode, instance, rank):
+    run_dir = run_cmd_dir(remote_out_dir, testname, mode)
+    work_dir = run_dir / 'instances' / instance / 'workdir'
+    out_file = work_dir / f'out_{rank}.txt'
+    assert out_file.exists()        # test working directory
+    return out_file.read_text()
+
+
+_SCHED_OVERHEAD = 60
+
+
+@skip_unless_cluster
+@pytest.mark.parametrize('mode', ['local', 'slurm'])
+def test_single(fake_cluster, remote_test_files, remote_out_dir, mode):
+    sched = _sched(fake_cluster, mode)
+
+    job = _make_job('single', mode, remote_test_files, remote_out_dir)
+    if mode == 'slurm':
+        job.num_nodes = 1
+        job.mpi_processes_per_node = 1
+        job.extra_scheduler_options = '--ntasks-per-core=1 --nodelist=muscle3-node-0'
+
+    job_id = sched.submit(job)
+    assert sched.wait(job_id, job.time_reserved + _SCHED_OVERHEAD) is not None
+    assert sched.get_exit_code(job_id) == 0
+
+    output = _get_stdout(remote_out_dir, 'single', mode, 'c1')
+
+    if mode == 'local':
+        assert output == 'muscle3-headnode\n'
+    else:
+        assert output == 'muscle3-node-0\n'
+
+
+@skip_unless_cluster
+@pytest.mark.parametrize('mode', ['local', 'slurm'])
+def test_dispatch(fake_cluster, remote_test_files, remote_out_dir, mode):
+    sched = _sched(fake_cluster, mode)
+
+    job = _make_job('dispatch', mode, remote_test_files, remote_out_dir)
+    if mode == 'slurm':
+        job.num_nodes = 1
+        job.mpi_processes_per_node = 1
+        job.extra_scheduler_options = '--ntasks-per-core=1 --nodelist=muscle3-node-1'
+
+    job_id = sched.submit(job)
+    assert sched.wait(job_id, job.time_reserved + _SCHED_OVERHEAD) is not None
+    assert sched.get_exit_code(job_id) == 0
+
+    c1_out = _get_stdout(remote_out_dir, 'dispatch', mode, 'c1')
+    c2_out = _get_stdout(remote_out_dir, 'dispatch', mode, 'c2')
+    if mode == 'local':
+        assert c1_out == 'muscle3-headnode\n'
+        assert c2_out == 'muscle3-headnode\n'
+    else:
+        assert c1_out == 'muscle3-node-1\n'
+        assert c2_out == 'muscle3-node-1\n'
+
+
+@skip_unless_cluster
+@pytest.mark.parametrize('mode', ['local'])
+# SLURM mode is not implemented yet
+def test_multiple(fake_cluster, remote_test_files, remote_out_dir, mode):
+    sched = _sched(fake_cluster, mode)
+
+    job = _make_job('multiple', mode, remote_test_files, remote_out_dir)
+    if mode == 'slurm':
+        job.num_nodes = 3
+        job.extra_scheduler_options = '--nodelist=muscle3-node-[0-2]'
+
+    job_id = sched.submit(job)
+    assert sched.wait(job_id, job.time_reserved + _SCHED_OVERHEAD) is not None
+    assert sched.get_exit_code(job_id) == 0
+
+    for i in range(1, 7):
+        if mode == 'local':
+            assert _get_stdout(
+                    remote_out_dir, 'multiple', mode, f'c{i}') == 'muscle3-headnode\n'
+        else:
+            out = _get_stdout(remote_out_dir, 'multiple', mode, f'c{i}')
+            assert out == f'muscle3-node-{(i - 1) // 2}\n'
+
+
+@skip_unless_cluster
+@pytest.mark.parametrize('mode', ['local', 'slurm'])
+def test_double_mpi(fake_cluster, remote_test_files, remote_out_dir, mode):
+    sched = _sched(fake_cluster, mode)
+
+    job = _make_job('double_mpi', mode, remote_test_files, remote_out_dir)
+    if mode == 'slurm':
+        job.num_nodes = 2
+        job.extra_scheduler_options = '--nodelist=muscle3-node-[3-4]'
+
+    job_id = sched.submit(job)
+    assert sched.wait(job_id, job.time_reserved + _SCHED_OVERHEAD) is not None
+    assert sched.get_exit_code(job_id) == 0
+
+    for i in range(1, 3):
+        for rank in range(2):
+            output = _get_outfile(remote_out_dir, 'double_mpi', mode, f'c{i}', rank)
+            if mode == 'local':
+                assert output == 'muscle3-headnode\n'
+            else:
+                assert output == f'muscle3-node-{i + 2}\n'
diff --git a/integration_test/conftest.py b/integration_test/conftest.py
index 78ac48e5..18ab5ce4 100644
--- a/integration_test/conftest.py
+++ b/integration_test/conftest.py
@@ -28,10 +28,6 @@
         'MUSCLE_ENABLE_CPP_MPI' not in os.environ,
         reason='MPI support was not detected')
 
-skip_unless_cluster = pytest.mark.skipif(
-        'MUSCLE_TEST_CLUSTER' not in os.environ,
-        reason='Cluster tests were not explicitly enabled')
-
 
 @pytest.fixture
 def yatiml_log_warning():
diff --git a/integration_test/test_cluster.py b/integration_test/test_cluster.py
deleted file mode 100644
index 7cf06112..00000000
--- a/integration_test/test_cluster.py
+++ /dev/null
@@ -1,207 +0,0 @@
-# This ensures that pytest can import this module in the non-cluster test env
-# in which these dependencies don't exist, because these tests won' be run.
-try:
-    import cerulean
-except ImportError:
-    pass
-
-import logging
-from pathlib import Path
-import pytest
-import time
-
-from .conftest import skip_unless_cluster
-
-
-logger = logging.getLogger(__name__)
-
-
-def _run(term, timeout, command):
-    exit_code, out, err = term.run(timeout, command, [])
-    if exit_code != 0:
-        logger.error(err)
-    assert exit_code == 0
-    return out
-
-
-@pytest.fixture(scope='session')
-def local_term():
-    return cerulean.LocalTerminal()
-
-
-@pytest.fixture(scope='session')
-def local_fs():
-    return cerulean.LocalFileSystem()
-
-
-@pytest.fixture(scope='session')
-def virtual_cluster_image(local_term):
-    IMAGE_NAME = 'muscle3_test_cluster'
-    _run(local_term, 180, (
-        f'docker buildx build -t {IMAGE_NAME}'
-        ' -f integration_test/test_cluster.Dockerfile .'))
-    return IMAGE_NAME
-
-
-def _ssh_term(timeout_msg):
-    cred = cerulean.PasswordCredential('cerulean', 'kingfisher')
-    ready = False
-    start = time.monotonic()
-    while not ready:
-        if (time.monotonic() - start) > 60.0:
-            raise Exception(timeout_msg)
-
-        try:
-            term = cerulean.SshTerminal('localhost', 10022, cred)
-            ready = True
-        except Exception:
-            time.sleep(3.0)
-
-    return term
-
-
-@pytest.fixture(scope='session')
-def virtual_cluster_container(local_term, virtual_cluster_image):
-    # clean up stray container from previous run, if any
-    _run(local_term, 60, 'docker rm -f muscle3_test_slurm')
-
-    _run(local_term, 60, (
-        'docker run -d --name muscle3_test_slurm -p 10022:22'
-        f' {virtual_cluster_image}'))
-
-    _ssh_term('Virtual cluster container start timed out')
-    yield None
-
-    # _run(local_term, 60, 'docker rm -f muscle3_test_slurm')
-
-
-@pytest.fixture(scope='session')
-def setup_connection(virtual_cluster_container):
-    # Session-wide connection used for container setup actions only
-    # Tests each have their own connection, see virtual_cluster() below
-    term = _ssh_term('Connection to virtual cluster container timed out')
-    with cerulean.SftpFileSystem(term, True) as fs:
-        yield term, fs
-
-
-@pytest.fixture(scope='session')
-def repo_root(local_fs):
-    root_dir = Path(__file__).parents[1]
-    return local_fs / str(root_dir)
-
-
-@pytest.fixture(scope='session')
-def muscle3_venv(repo_root, setup_connection):
-    remote_term, remote_fs = setup_connection
-
-    _run(remote_term, 10, 'python3 -m venv /home/cerulean/venv')
-    in_venv = 'source /home/cerulean/venv/bin/activate && '
-    _run(remote_term, 30, (
-        f'/bin/bash -c "{in_venv} python3 -m pip install pip wheel setuptools"'))
-
-    muscle3_tgt = remote_fs / 'home/cerulean/muscle3'
-    muscle3_tgt.mkdir()
-    (muscle3_tgt / 'libmuscle').mkdir()
-
-    for f in (
-            'muscle3', 'libmuscle/python', 'setup.py', 'MANIFEST.in', 'LICENSE',
-            'NOTICE', 'VERSION', 'README.rst'):
-        cerulean.copy(repo_root / f, muscle3_tgt / f)
-
-    _run(remote_term, 60, f'/bin/bash -c "{in_venv} pip install ./muscle3"')
-    return in_venv
-
-
-@pytest.fixture(scope='session')
-def create_remote_test_files(repo_root, setup_connection):
-    remote_term, remote_fs = setup_connection
-
-    remote_home = remote_fs / 'home' / 'cerulean'
-
-    cerulean.copy(
-            repo_root / 'integration_test' / 'cluster_test', remote_home,
-            copy_permissions=True)
-
-
-@pytest.fixture
-def virtual_cluster(virtual_cluster_container, muscle3_venv, create_remote_test_files):
-    term = _ssh_term('Connection to vitrual cluster container timed out')
-    with cerulean.SftpFileSystem(term, True) as fs:
-        sched = cerulean.SlurmScheduler(term)
-        yield term, fs, sched
-
-
-@pytest.fixture
-def remote_home(virtual_cluster):
-    _, remote_fs, _ = virtual_cluster
-    return remote_fs / 'home' / 'cerulean'
-
-
-@pytest.fixture
-def remote_test_files(remote_home):
-    return remote_home / 'cluster_test'
-
-
-@pytest.fixture
-def remote_out_dir(remote_home):
-    return remote_home / 'test_results'
-
-
-def _make_job(name, remote_test_files, remote_out_dir):
-    job_dir = remote_out_dir / f'test_{name}'
-
-    job = cerulean.JobDescription()
-    job.name = name
-    job.working_directory = job_dir
-    job.command = remote_test_files / f'{name}.sh'
-    job.stdout_file = job_dir / 'stdout.txt'
-    job.stderr_file = job_dir / 'stderr.txt'
-    job.queue_name = 'debug'
-    job.time_reserved = 60
-    job.system_out_file = job_dir / 'sysout.txt'
-    job.system_err_file = job_dir / 'syserr.txt'
-
-    return job
-
-
-_SCHED_OVERHEAD = 60
-
-
-@skip_unless_cluster
-def test_single(virtual_cluster, remote_test_files, remote_out_dir):
-    remote_term, remote_fs, sched = virtual_cluster
-
-    job = _make_job('single', remote_test_files, remote_out_dir)
-    job.num_nodes = 1
-    job.mpi_processes_per_node = 1
-    job.extra_scheduler_options = '--ntasks-per-core=1'
-
-    job_id = sched.submit(job)
-    assert sched.wait(job_id, job.time_reserved + _SCHED_OVERHEAD) is not None
-    assert sched.get_exit_code(job_id) == 0
-
-
-@skip_unless_cluster
-def test_dispatch(virtual_cluster, remote_test_files, remote_out_dir):
-    remote_term, remote_fs, sched = virtual_cluster
-
-    job = _make_job('dispatch', remote_test_files, remote_out_dir)
-    job.num_nodes = 2
-    job.mpi_processes_per_node = 1
-    job.extra_scheduler_options = '--ntasks-per-core=1'
-
-    job_id = sched.submit(job)
-    assert sched.wait(job_id, job.time_reserved + _SCHED_OVERHEAD) is not None
-    assert sched.get_exit_code(job_id) == 0
-
-
-@skip_unless_cluster
-def test_multiple(virtual_cluster, remote_test_files, remote_out_dir):
-    remote_term, remote_fs, sched = virtual_cluster
-
-    job = _make_job('multiple', remote_test_files, remote_out_dir)
-    job.num_nodes = 3
-
-    job_id = sched.submit(job)
-    assert sched.wait(job_id, job.time_reserved + _SCHED_OVERHEAD) is not None
-    assert sched.get_exit_code(job_id) == 0

From 0e2a94aff9bc8c8ca4eaf7f42a9433d0180b135e Mon Sep 17 00:00:00 2001
From: Lourens Veen <l.veen@esciencecenter.nl>
Date: Fri, 20 Sep 2024 20:48:52 +0200
Subject: [PATCH 10/49] Add MPI C++ component for cluster test

---
 integration_test/cluster_test/Makefile        | 12 +++
 integration_test/cluster_test/component.cpp   | 73 +++++++++++++++++++
 integration_test/cluster_test/double_mpi.sh   | 12 +++
 .../cluster_test/double_mpi.ymmsl             | 25 +++++++
 .../cluster_test/implementations.ymmsl        |  9 ++-
 integration_test/cluster_test/single.ymmsl    |  2 +-
 6 files changed, 131 insertions(+), 2 deletions(-)
 create mode 100644 integration_test/cluster_test/Makefile
 create mode 100644 integration_test/cluster_test/component.cpp
 create mode 100755 integration_test/cluster_test/double_mpi.sh
 create mode 100644 integration_test/cluster_test/double_mpi.ymmsl

diff --git a/integration_test/cluster_test/Makefile b/integration_test/cluster_test/Makefile
new file mode 100644
index 00000000..4ef1fd9e
--- /dev/null
+++ b/integration_test/cluster_test/Makefile
@@ -0,0 +1,12 @@
+.PHONY: all
+all: component_openmpi
+
+
+CXXFLAGS += $(shell pkg-config --cflags libmuscle_mpi ymmsl)
+LDLIBS += $(shell pkg-config --libs libmuscle_mpi ymmsl)
+
+CXXFLAGS += -g
+
+component_openmpi: component.cpp
+	mpic++ -o $@ $(CXXFLAGS) $^ $(LDLIBS)
+
diff --git a/integration_test/cluster_test/component.cpp b/integration_test/cluster_test/component.cpp
new file mode 100644
index 00000000..42b0cb48
--- /dev/null
+++ b/integration_test/cluster_test/component.cpp
@@ -0,0 +1,73 @@
+#include <cstdlib>
+#include <fstream>
+#include <string>
+
+#include <unistd.h>
+
+#include "mpi.h"
+
+#include "libmuscle/libmuscle.hpp"
+#include "ymmsl/ymmsl.hpp"
+
+using std::ofstream;
+using std::to_string;
+
+using libmuscle::Instance;
+using libmuscle::Message;
+using ymmsl::Operator;
+
+
+/** A simple dummy component. */
+void component(int argc, char * argv[]) {
+    const int root_rank = 0;
+    int rank;
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+
+    char nodeid[1024];
+    gethostname(nodeid, sizeof(nodeid));
+
+    {
+        ofstream outfile("out_" + to_string(rank) + ".txt");
+        outfile << nodeid << std::endl;
+    }
+
+    Instance instance(argc, argv, {
+            {Operator::F_INIT, {"init_in"}},
+            {Operator::O_I, {"inter_out"}},
+            {Operator::S, {"inter_in"}},
+            {Operator::O_F, {"final_out"}}},
+            MPI_COMM_WORLD, root_rank);
+
+    // outfile << "Starting reuse loop" << std::endl;
+    while (instance.reuse_instance()) {
+        // F_INIT
+
+        int64_t steps = instance.get_setting_as<int64_t>("steps");
+
+        instance.receive("init_in", Message(0.0));
+
+        for (int step = 0; step < steps; ++step) {
+            // O_I
+            if (rank == root_rank) {
+                instance.send("inter_out", Message(step));
+            }
+
+            // S
+            instance.receive("inter_in", Message(0.0));
+        }
+
+        // O_F
+        if (rank == root_rank) {
+            instance.send("final_out", Message(steps));
+        }
+    }
+}
+
+
+int main(int argc, char * argv[]) {
+    MPI_Init(&argc, &argv);
+    component(argc, argv);
+    MPI_Finalize();
+    return EXIT_SUCCESS;
+}
+
diff --git a/integration_test/cluster_test/double_mpi.sh b/integration_test/cluster_test/double_mpi.sh
new file mode 100755
index 00000000..1357283b
--- /dev/null
+++ b/integration_test/cluster_test/double_mpi.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+set -e
+
+env
+
+source /home/cerulean/shared/venv/bin/activate
+
+CT=/home/cerulean/shared/cluster_test
+
+muscle_manager --log-level=DEBUG --start-all $CT/double_mpi.ymmsl $CT/settings.ymmsl $CT/implementations.ymmsl
+
diff --git a/integration_test/cluster_test/double_mpi.ymmsl b/integration_test/cluster_test/double_mpi.ymmsl
new file mode 100644
index 00000000..9d04b238
--- /dev/null
+++ b/integration_test/cluster_test/double_mpi.ymmsl
@@ -0,0 +1,25 @@
+ymmsl_version: v0.1
+
+model:
+    name: double
+    components:
+        c1:
+            ports:
+                o_i: inter_out
+                s: inter_in
+            implementation: component_cpp_openmpi
+        c2:
+            ports:
+                o_i: inter_out
+                s: inter_in
+            implementation: component_cpp_openmpi
+
+    conduits:
+        c1.inter_out: c2.inter_in
+        c2.inter_out: c1.inter_in
+
+resources:
+    c1:
+        mpi_processes: 2
+    c2:
+        mpi_processes: 2
diff --git a/integration_test/cluster_test/implementations.ymmsl b/integration_test/cluster_test/implementations.ymmsl
index c90db7f9..6dab9d57 100644
--- a/integration_test/cluster_test/implementations.ymmsl
+++ b/integration_test/cluster_test/implementations.ymmsl
@@ -1,8 +1,15 @@
 ymmsl_version: v0.1
 
 implementations:
-  component:
+  component_python:
     virtual_env: /home/cerulean/shared/venv
     executable: python
     args:
         - /home/cerulean/shared/cluster_test/component.py
+
+  component_cpp_openmpi:
+    modules: openmpi
+    env:
+        +LD_LIBRARY_PATH: /home/cerulean/shared/muscle3-openmpi/lib
+    execution_model: openmpi
+    executable: /home/cerulean/shared/cluster_test/component_openmpi
diff --git a/integration_test/cluster_test/single.ymmsl b/integration_test/cluster_test/single.ymmsl
index 304579fc..957023f2 100644
--- a/integration_test/cluster_test/single.ymmsl
+++ b/integration_test/cluster_test/single.ymmsl
@@ -3,7 +3,7 @@ ymmsl_version: v0.1
 model:
     name: single
     components:
-        c1: component
+        c1: component_python
 
 resources:
     c1:

From f0b676cb37e7930cf5b1c6d02fe00c2c2d339f07 Mon Sep 17 00:00:00 2001
From: Lourens Veen <l.veen@esciencecenter.nl>
Date: Fri, 20 Sep 2024 20:51:57 +0200
Subject: [PATCH 11/49] Print where we are running so we can test that

---
 integration_test/cluster_test/component.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/integration_test/cluster_test/component.py b/integration_test/cluster_test/component.py
index e14d0523..aa8dd260 100644
--- a/integration_test/cluster_test/component.py
+++ b/integration_test/cluster_test/component.py
@@ -1,4 +1,5 @@
 import logging
+import socket
 
 from libmuscle import Instance, Message
 from ymmsl import Operator
@@ -10,6 +11,8 @@ def component() -> None:
     This sends and receives on all operators, allowing different coupling patterns
     with a single program.
     """
+    print(socket.gethostname())
+
     instance = Instance({
             Operator.F_INIT: ['init_in'],
             Operator.O_I: ['inter_out'],

From 2c72c9e61ab4701523aec867c3e1e602e666c51a Mon Sep 17 00:00:00 2001
From: Lourens Veen <l.veen@esciencecenter.nl>
Date: Fri, 20 Sep 2024 20:52:18 +0200
Subject: [PATCH 12/49] Fix dispatch clustert test case

---
 integration_test/cluster_test/dispatch.sh | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/integration_test/cluster_test/dispatch.sh b/integration_test/cluster_test/dispatch.sh
index 10fb1fb9..aef00e66 100755
--- a/integration_test/cluster_test/dispatch.sh
+++ b/integration_test/cluster_test/dispatch.sh
@@ -1,12 +1,10 @@
 #!/bin/bash
 
-#SBATCH --time=0:1:00
-#SBATCH --ntasks=1
-#SBATCH --cpus-per-task=2
-
 set -e
 
-source /home/cerulean/venv/bin/activate
+source /home/cerulean/shared/venv/bin/activate
+
+CT=/home/cerulean/shared/cluster_test
 
-muscle_manager --log-level=DEBUG --start-all /home/cerulean/cluster_test/dispatch.ymmsl /home/cerulean/cluster_test/settings.ymmsl /home/cerulean/cluster_test/implementations.ymmsl
+muscle_manager --log-level=DEBUG --start-all $CT/dispatch.ymmsl $CT/settings.ymmsl $CT/implementations.ymmsl
 

From 8b66edc6f8a979f02ea2b11c8d6274d787ba3779 Mon Sep 17 00:00:00 2001
From: Lourens Veen <l.veen@esciencecenter.nl>
Date: Mon, 23 Sep 2024 13:40:24 +0200
Subject: [PATCH 13/49] Remove leftover RequestHandler interface from
 PostOffice

---
 libmuscle/python/libmuscle/post_office.py | 25 +----------------------
 1 file changed, 1 insertion(+), 24 deletions(-)

diff --git a/libmuscle/python/libmuscle/post_office.py b/libmuscle/python/libmuscle/post_office.py
index e15057fc..59f6c90a 100644
--- a/libmuscle/python/libmuscle/post_office.py
+++ b/libmuscle/python/libmuscle/post_office.py
@@ -2,15 +2,12 @@
 import time
 from typing import Dict
 
-import msgpack
 from ymmsl import Reference
 
-from libmuscle.mcp.protocol import RequestType
-from libmuscle.mcp.transport_server import RequestHandler
 from libmuscle.outbox import Outbox
 
 
-class PostOffice(RequestHandler):
+class PostOffice:
     """A PostOffice is an object that holds messages to be retrieved.
 
     A PostOffice holds outboxes with messages for receivers. It also
@@ -23,26 +20,6 @@ def __init__(self) -> None:
 
         self._outbox_lock = Lock()
 
-    def handle_request(self, request: bytes) -> bytes:
-        """Handle a request.
-
-        This receives an MCP request and handles it by blocking until
-        the requested message is available, then returning it.
-
-        Args:
-            request: A received request
-
-        Returns:
-            An encoded response
-        """
-        req = msgpack.unpackb(request, raw=False)
-        if len(req) != 2 or req[0] != RequestType.GET_NEXT_MESSAGE.value:
-            raise RuntimeError(
-                    'Invalid request type. Did the streams get crossed?')
-        recv_port = Reference(req[1])
-        self._ensure_outbox_exists(recv_port)
-        return self._outboxes[recv_port].retrieve()
-
     def get_message(self, receiver: Reference) -> bytes:
         """Get a message from a receiver's outbox.
 

From 4d2665a505cf75104d3e301e53a1018c15a44ede Mon Sep 17 00:00:00 2001
From: Lourens Veen <l.veen@esciencecenter.nl>
Date: Thu, 17 Oct 2024 09:46:55 +0200
Subject: [PATCH 14/49] Add MUSCLE Agent Protocol

---
 libmuscle/python/libmuscle/mcp/protocol.py    |  24 ++-
 .../agent/agent_commands.py                   |  25 +++
 .../native_instantiator/agent/map_client.py   | 102 +++++++++++
 .../native_instantiator/map_server.py         | 172 ++++++++++++++++++
 libmuscle/python/libmuscle/post_office.py     |   9 +
 5 files changed, 328 insertions(+), 4 deletions(-)
 create mode 100644 libmuscle/python/libmuscle/native_instantiator/agent/agent_commands.py
 create mode 100644 libmuscle/python/libmuscle/native_instantiator/agent/map_client.py
 create mode 100644 libmuscle/python/libmuscle/native_instantiator/map_server.py

diff --git a/libmuscle/python/libmuscle/mcp/protocol.py b/libmuscle/python/libmuscle/mcp/protocol.py
index 5d1217ed..b6f662a2 100644
--- a/libmuscle/python/libmuscle/mcp/protocol.py
+++ b/libmuscle/python/libmuscle/mcp/protocol.py
@@ -8,10 +8,10 @@ class RequestType(Enum):
     Call protocol in which a request is sent to the server and a response is
     sent back to the calling client. In MCP, both of these are chunks of bytes.
 
-    The MUSCLE Manager Protocol and MUSCLE Peer Protocol define the encoded
-    messages sent in those chunks, using MsgPack encoding. To distinguish
-    different kinds of requests, a request type identifier is used, as
-    represented by this class.
+    The MUSCLE Manager Protocol, MUSCLE Peer Protocol and MUSCLE Agent Protocol
+    define the encoded messages sent in those chunks, using MsgPack encoding.
+    To distinguish different kinds of requests, a request type identifier is
+    used, as represented by this class.
     """
     # MUSCLE Manager Protocol
     REGISTER_INSTANCE = 1
@@ -26,6 +26,11 @@ class RequestType(Enum):
     # MUSCLE Peer Protocol
     GET_NEXT_MESSAGE = 21
 
+    # MUSCLE Agent Protocol
+    REPORT_RESOURCES = 41
+    GET_COMMAND = 42
+    REPORT_RESULT = 43
+
 
 class ResponseType(Enum):
     """Identifier for different types of response
@@ -37,3 +42,14 @@ class ResponseType(Enum):
     SUCCESS = 0
     ERROR = 1
     PENDING = 2
+
+
+class AgentCommandType(Enum):
+    """Identifier for different types of commands
+
+    These are requested from the manager by the agent, and tell it what to do. Part
+    of the MUSCLE Agent Protocol, used in the response to RequestType.GET_COMMAND.
+    """
+    START = 1
+    CANCEL_ALL = 2
+    SHUTDOWN = 3
diff --git a/libmuscle/python/libmuscle/native_instantiator/agent/agent_commands.py b/libmuscle/python/libmuscle/native_instantiator/agent/agent_commands.py
new file mode 100644
index 00000000..56a830d1
--- /dev/null
+++ b/libmuscle/python/libmuscle/native_instantiator/agent/agent_commands.py
@@ -0,0 +1,25 @@
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Dict, List
+
+
+class AgentCommand:
+    pass
+
+
+@dataclass
+class StartCommand(AgentCommand):
+    name: str
+    work_dir: Path
+    args: List[str]
+    env: Dict[str, str]
+    stdout: Path
+    stderr: Path
+
+
+class CancelAllCommand(AgentCommand):
+    pass
+
+
+class ShutdownCommand(AgentCommand):
+    pass
diff --git a/libmuscle/python/libmuscle/native_instantiator/agent/map_client.py b/libmuscle/python/libmuscle/native_instantiator/agent/map_client.py
new file mode 100644
index 00000000..d360b0a5
--- /dev/null
+++ b/libmuscle/python/libmuscle/native_instantiator/agent/map_client.py
@@ -0,0 +1,102 @@
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+
+import msgpack
+
+from libmuscle.mcp.protocol import AgentCommandType, RequestType, ResponseType
+from libmuscle.mcp.tcp_transport_client import TcpTransportClient
+from libmuscle.native_instantiator.agent.agent_commands import (
+        AgentCommand, StartCommand, CancelAllCommand, ShutdownCommand)
+
+
+class MAPClient:
+    """The client for the MUSCLE Agent Protocol.
+
+    This class connects to the AgentManager and communicates with it.
+    """
+    def __init__(self, node_id: str, location: str) -> None:
+        """Create a MAPClient
+
+        Args:
+            node_id: Id of the local node
+            location: A connection string of the form hostname:port
+        """
+        self._node_id = node_id
+        self._transport_client = TcpTransportClient(location)
+
+    def close(self) -> None:
+        """Close the connection
+
+        This closes the connection. After this no other member functions can be called.
+        """
+        self._transport_client.close()
+
+    def report_resources(self, resources: Dict[str, Any]) -> None:
+        """Report local resources
+
+        The only key in the dict is currently 'cpu', and it maps to a list of frozensets
+        of hwthread ids that we can bind to with taskset or in a rankfile.
+
+        Args:
+            resources: Available resource ids by type
+        """
+        enc_cpu_resources = [
+                list(hwthreads) for hwthreads in resources['cpu']]
+        request = [
+                RequestType.REPORT_RESOURCES.value,
+                self._node_id, {'cpu': enc_cpu_resources}]
+        self._call_agent_manager(request)
+
+    def get_command(self) -> Optional[AgentCommand]:
+        """Get a command from the agent manager.
+
+        Returns:
+            A command, or None if there are no commands pending.
+        """
+        request = [RequestType.GET_COMMAND.value, self._node_id]
+        response = self._call_agent_manager(request)
+
+        if response[0] == ResponseType.PENDING.value:
+            return None
+        else:
+            command = msgpack.unpackb(response[1], raw=False)
+
+        if command[0] == AgentCommandType.START.value:
+            name = command[1]
+            workdir = Path(command[2])
+            args = command[3]
+            env = command[4]
+            stdout = Path(command[5])
+            stderr = Path(command[6])
+
+            return StartCommand(name, workdir, args, env, stdout, stderr)
+
+        elif command[0] == AgentCommandType.CANCEL_ALL.value:
+            return CancelAllCommand()
+
+        elif command[0] == AgentCommandType.SHUTDOWN.value:
+            return ShutdownCommand()
+
+        raise Exception('Unknown AgentCommand')
+
+    def report_result(self, names_exit_codes: List[Tuple[str, int]]) -> None:
+        """Report results of finished processes.
+
+        Args:
+            names_exit_codes: A list of names and exit codes of finished processes.
+        """
+        request = [RequestType.REPORT_RESULT.value, names_exit_codes]
+        self._call_agent_manager(request)
+
+    def _call_agent_manager(self, request: Any) -> Any:
+        """Call the manager and do en/decoding.
+
+        Args:
+            request: The request to encode and send
+
+        Returns:
+            The decoded response
+        """
+        encoded_request = msgpack.packb(request, use_bin_type=True)
+        response, _ = self._transport_client.call(encoded_request)
+        return msgpack.unpackb(response, raw=False)
diff --git a/libmuscle/python/libmuscle/native_instantiator/map_server.py b/libmuscle/python/libmuscle/native_instantiator/map_server.py
new file mode 100644
index 00000000..6ab847c0
--- /dev/null
+++ b/libmuscle/python/libmuscle/native_instantiator/map_server.py
@@ -0,0 +1,172 @@
+import errno
+import logging
+from typing import Any, Dict, cast, List, Optional
+
+import msgpack
+
+from libmuscle.mcp.protocol import AgentCommandType, RequestType, ResponseType
+from libmuscle.mcp.tcp_transport_server import TcpTransportServer
+from libmuscle.mcp.transport_server import RequestHandler
+from libmuscle.native_instantiator.agent.agent_commands import (
+        AgentCommand, CancelAllCommand, ShutdownCommand, StartCommand)
+from libmuscle.native_instantiator.iagent_manager import IAgentManager
+from libmuscle.post_office import PostOffice
+
+from ymmsl import Reference
+
+
+_logger = logging.getLogger(__name__)
+
+
+class MAPRequestHandler(RequestHandler):
+    """Handles Agent requests."""
+    def __init__(self, agent_manager: IAgentManager, post_office: PostOffice) -> None:
+        """Create a MAPRequestHandler.
+
+        Args:
+            agent_manager: The AgentManager to forward reports to
+            post_office: The PostOffice to get commands from
+        """
+        self._agent_manager = agent_manager
+        self._post_office = post_office
+
+    def handle_request(self, request: bytes) -> bytes:
+        """Handles an agent request.
+
+        Args:
+            request: The encoded request
+
+        Returns:
+            response: An encoded response
+        """
+        req_list = msgpack.unpackb(request, raw=False)
+        req_type = req_list[0]
+        req_args = req_list[1:]
+        if req_type == RequestType.REPORT_RESOURCES.value:
+            response = self._report_resources(*req_args)
+        elif req_type == RequestType.GET_COMMAND.value:
+            response = self._get_command(*req_args)
+        elif req_type == RequestType.REPORT_RESULT.value:
+            response = self._report_result(*req_args)
+
+        return cast(bytes, msgpack.packb(response, use_bin_type=True))
+
+    def _report_resources(
+            self, node_id: str, resources: Dict[str, Any]) -> Any:
+        """Handle a report resources request.
+
+        This is used by the agent to report available resources on its node when
+        it starts up.
+
+        Args:
+            node_id: Hostname (id) of the node
+            resources: Resource dictionary, containing a single key 'cpu' which
+                maps to a list of lists of hwthread ids representing cores.
+        """
+        dec_cpu_resources = [frozenset(hwthreads) for hwthreads in resources['cpu']]
+        self._agent_manager.report_resources(node_id, {'cpu': dec_cpu_resources})
+        return [ResponseType.SUCCESS.value]
+
+    def _get_command(self, node_id: str) -> Any:
+        """Handle a get command request.
+
+        This is used by the agent to ask if there's anything we would like it to do.
+        Command sounds a bit brusque, but we already have the agent sending requests
+        to this handler, so I needed a different word to distinguish them. Requests
+        are sent by the agent to the manager (because it's the client in an RPC setup),
+        commands are returned by the manager to the agent (because it tells it what to
+        do).
+
+        Args:
+            node_id: Hostname (id) of the agent's node
+        """
+        node_ref = Reference(node_id.replace('-', '_'))
+        next_request: Optional[bytes] = None
+        if self._post_office.have_message(node_ref):
+            next_request = self._post_office.get_message(node_ref)
+
+        if next_request is not None:
+            return [ResponseType.SUCCESS.value, next_request]
+
+        return [ResponseType.PENDING.value]
+
+    def _report_result(self, instances: List[List[Any]]) -> Any:
+        """Handle a report result rquest.
+
+        This is sent by the agent if an instance it launched exited.
+
+        Args:
+            instances: List of instance descriptions, comprising an id str and exit
+                    code int. Really a List[Tuple[str, int]] but msgpack doesn't know
+                    about tuples.
+        """
+        self._agent_manager.report_result(list(map(tuple, instances)))
+        return [ResponseType.SUCCESS.value]
+
+
+class MAPServer:
+    """The MUSCLE Agent Protocol server.
+
+    This class accepts connections from the agents and services them using a
+    MAPRequestHandler.
+    """
+    def __init__(self, agent_manager: IAgentManager) -> None:
+        """Create a MAPServer.
+
+        This starts a TCP Transport server and connects it to a MAPRequestHandler,
+        which uses the given agent manager to service the requests. By default, we
+        listen on port 9009, unless it's not available in which case we use a random
+        other one.
+
+        Args:
+            agent_manager: AgentManager to forward requests to
+        """
+        self._post_office = PostOffice()
+        self._handler = MAPRequestHandler(agent_manager, self._post_office)
+        try:
+            self._server = TcpTransportServer(self._handler, 9009)
+        except OSError as e:
+            if e.errno != errno.EADDRINUSE:
+                raise
+            self._server = TcpTransportServer(self._handler)
+
+    def get_location(self) -> str:
+        """Return this server's network location.
+
+        This is a string of the form tcp:<hostname>:<port>.
+        """
+        return self._server.get_location()
+
+    def stop(self) -> None:
+        """Stop the server.
+
+        This makes the server stop serving requests, and shuts down its
+        background threads.
+        """
+        self._server.close()
+
+    def deposit_command(self, node_id: str, command: AgentCommand) -> None:
+        """Deposit a command for the given agent.
+
+        This takes the given command and queues it for the given agent to pick up next
+        time it asks us for one.
+
+        Args:
+            node_id: Id of the node whose agent should execute the command
+            command: The command to send
+        """
+        agent = Reference(node_id.replace('-', '_'))
+
+        if isinstance(command, StartCommand):
+            command_obj = [
+                    AgentCommandType.START.value, command.name, str(command.work_dir),
+                    command.args, command.env, str(command.stdout), str(command.stderr)
+                    ]
+        elif isinstance(command, CancelAllCommand):
+            command_obj = [AgentCommandType.CANCEL_ALL.value]
+        elif isinstance(command, ShutdownCommand):
+            command_obj = [AgentCommandType.SHUTDOWN.value]
+
+        encoded_command = cast(bytes, msgpack.packb(command_obj, use_bin_type=True))
+
+        self._post_office.deposit(agent, encoded_command)
diff --git a/libmuscle/python/libmuscle/post_office.py b/libmuscle/python/libmuscle/post_office.py
index 59f6c90a..2ec2056c 100644
--- a/libmuscle/python/libmuscle/post_office.py
+++ b/libmuscle/python/libmuscle/post_office.py
@@ -20,6 +20,15 @@ def __init__(self) -> None:
 
         self._outbox_lock = Lock()
 
+    def have_message(self, receiver: Reference) -> bool:
+        """Return whether there's a message for the given receiver.
+
+        Args:
+            receiver: The receiver of the message.
+        """
+        self._ensure_outbox_exists(receiver)
+        return not self._outboxes[receiver].is_empty()
+
     def get_message(self, receiver: Reference) -> bytes:
         """Get a message from a receiver's outbox.
 

From b10948f66dafab89bc4fcfa232214dc2f1b43827 Mon Sep 17 00:00:00 2001
From: Lourens Veen <l.veen@esciencecenter.nl>
Date: Thu, 17 Oct 2024 09:59:23 +0200
Subject: [PATCH 15/49] Use sets of hwthreads to designate CPU resources

This commit does not pass tests, you need the next one as well. I split
them up because it was getting very big, and I didn't clean it up
because this has taken enough time already.
---
 integration_test/fake_cluster/slurm.conf      |  20 +-
 .../python/libmuscle/manager/profile_store.py |   4 +-
 .../libmuscle/manager/qcgpj_instantiator.py   |   3 +-
 .../manager/test/test_profile_database.py     |  10 +-
 .../native_instantiator.py                    | 281 ++++++++++++++++--
 .../native_instantiator/run_script.py         |  76 +++--
 libmuscle/python/libmuscle/planner/planner.py |  27 +-
 .../libmuscle/planner/test/test_planner.py    | 128 ++++----
 libmuscle/python/libmuscle/test/conftest.py   |   9 +
 muscle3/muscle3.py                            |   4 +-
 10 files changed, 430 insertions(+), 132 deletions(-)

diff --git a/integration_test/fake_cluster/slurm.conf b/integration_test/fake_cluster/slurm.conf
index 1959f614..647b5315 100644
--- a/integration_test/fake_cluster/slurm.conf
+++ b/integration_test/fake_cluster/slurm.conf
@@ -60,7 +60,7 @@ SlurmdUser=root
 StateSaveLocation=/var/spool/slurmctld/state
 SwitchType=switch/none
 #TaskEpilog=
-TaskPlugin=task/none
+TaskPlugin=task/cgroup
 #TaskPluginParam=
 #TaskProlog=
 #TopologyPlugin=topology/tree
@@ -98,8 +98,8 @@ Waittime=0
 SchedulerTimeSlice=5
 SchedulerType=sched/backfill
 SchedulerParameters=bf_interval=1,bf_resolution=1,sched_interval=1
-SelectType=select/linear
-#SelectTypeParameters=
+SelectType=select/cons_tres
+SelectTypeParameters=CR_Core
 #
 #
 # JOB PRIORITY
@@ -133,9 +133,9 @@ JobCompType=jobcomp/filetxt
 #JobCompUser=root
 JobAcctGatherFrequency=2
 JobAcctGatherType=jobacct_gather/linux
-SlurmctldDebug=3
+SlurmctldDebug=debug5
 #SlurmctldLogFile=
-SlurmdDebug=3
+SlurmdDebug=debug3
 SlurmdLogFile=/var/log/slurm/slurmd.%n.log
 #SlurmSchedLogFile=
 #SlurmSchedLogLevel=
@@ -154,10 +154,10 @@ SlurmdLogFile=/var/log/slurm/slurmd.%n.log
 #
 #
 # COMPUTE NODES
-NodeName=muscle3-node-0 Procs=2 NodeAddr=muscle3-node-0 Port=17001 State=UNKNOWN
-NodeName=muscle3-node-1 Procs=2 NodeAddr=muscle3-node-1 Port=17002 State=UNKNOWN
-NodeName=muscle3-node-2 Procs=2 NodeAddr=muscle3-node-2 Port=17003 State=UNKNOWN
-NodeName=muscle3-node-3 Procs=2 NodeAddr=muscle3-node-3 Port=17004 State=UNKNOWN
-NodeName=muscle3-node-4 Procs=2 NodeAddr=muscle3-node-4 Port=17005 State=UNKNOWN
+NodeName=muscle3-node-0 Sockets=1 CoresPerSocket=2 ThreadsPerCore=2 NodeAddr=muscle3-node-0 Port=17001 State=UNKNOWN
+NodeName=muscle3-node-1 Sockets=1 CoresPerSocket=2 ThreadsPerCore=2 NodeAddr=muscle3-node-1 Port=17002 State=UNKNOWN
+NodeName=muscle3-node-2 Sockets=1 CoresPerSocket=2 ThreadsPerCore=2 NodeAddr=muscle3-node-2 Port=17003 State=UNKNOWN
+NodeName=muscle3-node-3 Sockets=1 CoresPerSocket=2 ThreadsPerCore=2 NodeAddr=muscle3-node-3 Port=17004 State=UNKNOWN
+NodeName=muscle3-node-4 Sockets=1 CoresPerSocket=2 ThreadsPerCore=2 NodeAddr=muscle3-node-4 Port=17005 State=UNKNOWN
 PartitionName=debug Nodes=muscle3-node-[0-4] Default=YES MaxTime=INFINITE State=UP
 PartitionName=batch Nodes=muscle3-node-[0-2] Default=NO MaxTime=INFINITE State=UP
diff --git a/libmuscle/python/libmuscle/manager/profile_store.py b/libmuscle/python/libmuscle/manager/profile_store.py
index 036dea85..0fba694e 100644
--- a/libmuscle/python/libmuscle/manager/profile_store.py
+++ b/libmuscle/python/libmuscle/manager/profile_store.py
@@ -90,9 +90,9 @@ def store_resources(self, resources: Dict[Reference, Resources]) -> None:
             instance_oid = self._get_instance_oid(cur, instance_id)
 
             tuples = [
-                    (instance_oid, node, core)
+                    (instance_oid, node, hwthread)
                     for node, cores in res.cores.items()
-                    for core in cores]
+                    for core in cores for hwthread in core]
 
             cur.executemany(
                     "INSERT INTO assigned_cores (instance_oid, node, core)"
diff --git a/libmuscle/python/libmuscle/manager/qcgpj_instantiator.py b/libmuscle/python/libmuscle/manager/qcgpj_instantiator.py
index ae58089b..9130779f 100644
--- a/libmuscle/python/libmuscle/manager/qcgpj_instantiator.py
+++ b/libmuscle/python/libmuscle/manager/qcgpj_instantiator.py
@@ -200,7 +200,8 @@ def _send_resources(self) -> None:
         """Converts and sends QCG available resources."""
         resources = Resources()
         for node in self._qcg_resources.nodes:
-            resources.cores[node.name] = {int(n.split(',')[0]) for n in node.free_ids}
+            resources.cores[node.name] = {
+                    frozenset(n.split(',')) for n in node.free_ids}
 
         self._resources_out.put(resources)
 
diff --git a/libmuscle/python/libmuscle/manager/test/test_profile_database.py b/libmuscle/python/libmuscle/manager/test/test_profile_database.py
index 2d6d472c..33bbb9dd 100644
--- a/libmuscle/python/libmuscle/manager/test/test_profile_database.py
+++ b/libmuscle/python/libmuscle/manager/test/test_profile_database.py
@@ -8,6 +8,8 @@
 
 from ymmsl import Operator, Port, Reference
 
+from libmuscle.test.conftest import frozenset_of as s
+
 import pytest
 
 from pathlib import Path
@@ -22,12 +24,12 @@ def db_file(tmp_path) -> Path:
         store.store_instances([Reference('instance1'), Reference('instance2')])
 
         resources1 = Resources({
-            'node001': {0, 1},
-            'node002': {0, 1}})
+            'node001': {s(0), s(1)},
+            'node002': {s(0), s(1)}})
 
         resources2 = Resources({
-            'node001': {0},
-            'node002': {0, 1, 2}})
+            'node001': {s(0)},
+            'node002': {s(0), s(1), s(2)}})
 
         store.store_resources({
             Reference('instance1'): resources1,
diff --git a/libmuscle/python/libmuscle/native_instantiator/native_instantiator.py b/libmuscle/python/libmuscle/native_instantiator/native_instantiator.py
index d34d5482..391d89fe 100644
--- a/libmuscle/python/libmuscle/native_instantiator/native_instantiator.py
+++ b/libmuscle/python/libmuscle/native_instantiator/native_instantiator.py
@@ -1,3 +1,191 @@
+"""Module for examining resources and instantiating instances on them
+
+There's a huge comment here because there's a big mess here that took me forever to
+figure out, so now I'm going to document it for the future.
+
+
+Identifying hardware resources
+
+Today's computers all contain multi-core CPUs, often with symmetric multithreading
+(SMT), also known as hyperthreading. This means that we have hardware threads
+(hwthreads) and also cores, and then there's caches and memory as well but we're not
+going into NUMA here.
+
+Cores and hwthreads are identified by number, but they have multiple different numbers
+that are referred to by different names in different contexts, making everything very
+confusing. So here are some definitions to disambiguate things.  Note that this is still
+a rather simplified representation, but it's enough for what we're doing here in
+MUSCLE3.
+
+
+Hardware threads
+
+A *hardware thread (hwthread)* is, at the hardware level, an instruction decoder. It
+points to wherever in the code we are currently executing, and it can read the next
+couple of instructions and figure out how to execute them. It can't actually execute
+anything however, because it doesn't have the hardware that does that.
+
+Intel refers to hwthreads as "logical processors" and so does Linux, hwloc calls them
+"processing units" or PUs and so does OpenMPI unless it uses the term hwthread just to
+confuse things a bit more.
+
+Cores
+
+A *core* contains at least one hwthread, and at least one functional unit, which is a
+hardware component that actually does calculations and other data processing. Within a
+core, the hwthread(s) read instructions and pass them to the functional units to be
+executed. If a core has more than one hwthread, then the CPU supports SMT.
+
+Intel refers to cores as "physical processors", hwloc calls them cores and so do most
+other sources. We'll use cores here.
+
+Since a hwthread cannot do anything on its own, it's always part of a core.
+
+CPUs
+
+The term CPU is used in many ways by various bits of documentation, sometimes referring
+to a hwthread or a core, but here we'll take it to mean a collection of cores in a
+plastic box. Similar terms are *package* (referring to that plastic box with very many
+metal pins) and *socket* (the thing the package mounts into), or *processor*, which was
+originally used to refer to all of the above when CPUs still had only one core with only
+one hwthread, and has now become ambiguous.
+
+Weird things can happen here, I've seen CPUs that as far as I can tell are a single
+package, but nevertheless claim to have two sockets. I suspect that that's two physical
+chips in a single plastic box, but I don't know for sure.
+
+Here, we're concerned with hwthreads and cores and how to identify them and assign
+instances to them.
+
+
+Linux
+
+On modern operating systems, hardware access is mediated by the operating system, and
+we're mainly concerned with Linux here because that is what all the clusters are running
+(see the note on macOS below). Information about the CPU(s) can be obtained on Linux
+from the /proc/cpuinfo file, or equivalently but more modernly, from the files in
+/sys/devices/system/cpu/cpu<x>/topology/.
+
+Linux collects information about processors because it needs to run processes (programs,
+software threads) on them on behalf of the user. Processes are assigned to hwthreads, so
+that is what Linux considers a *processor*.  /proc/cpuinfo lists all these processors,
+and they each have their own directory /sys/devices/system/cpu/cpu<x>.
+
+On Linux, processors have an id, which is that number <x> in the directory, and is
+listed under "processor" in /proc/cpuinfo. Since this number identifies a hwthread and
+is assigned by Linux rather than being baked into the hardware, I'm calling it a
+"logical hwthread id", this being a logical id of a hwthread, not an id of a logical
+hwthread. It's also the id of a logical processor in Intel-speak.
+
+Hwthreads actually have a second number associated with them, which does come from the
+hardware. In /proc/cpuinfo, that's listed under "apicid"; it doesn't seem to be
+available from sysfs. Hwloc call this the "physical PU (its name for a hwthread) id",
+and OpenMPI's mpirun manpage also refers to it as a "physical processor location".
+
+There's great potential for confusion here: the "physical PU id" and "physical processor
+location" both identify a hardware-specified number (a physical id or a physical
+location) for a hwthread. This is something completely different than what Intel calls a
+"physical processor", which they use to refer to a core.
+
+MUSCLE3 uses logical hwthread ids everywhere, it does not use physical ids.
+
+Linux knows about how hwthreads are grouped into bigger things of course. Cores are
+identified in Linux using the "core id", which is listed in /proc/cpuinfo and in
+/sys/devices/system/cpu/cpu<x>/topology/core_id. So for each hwthread, identified by its
+logical id, we can look up which core it is a part of. The core id is a logical id,
+assigned by Linux, not by the hardware.  While logical hwthread ids seem to always be
+consecutive at least on the hardware I've seen so far, core ids may have gaps.
+
+MUSCLE3 does not use core ids, although it uses groups of hwthread ids that contain all
+the hwthreads for a given core.
+
+
+Resource binding
+
+Running processes need something to run on, a hwthread. The assignment of process to
+hwthread is done by the operating system's scheduler: when a process is ready to run,
+the scheduler will try to find it a free hwthread to run on.
+
+The scheduler can be constrained in which hwthreads it considers for a given process,
+which is known as binding the process. This may have performance benefits, because
+moving a process from one hwthread to another takes time. In MUSCLE3, when running on a
+cluster, each process is assigned its own specific set of hwthreads to run on, and we
+try to bind the instance to the assigned hwthreads.
+
+Taskset
+
+How this is done depends on how the instance is started. For non-MPI instances, we use a
+Linux utility named 'taskset' that starts another program with a giving binding. The
+binding is expressed as an *affinity mask*, a string of bits that say whether a given
+processor (hwthread) can be used by the process or not. Each position in the string of
+bits corresponds to the hwthread with that logical id.
+
+OpenMPI
+
+OpenMPI can bind cores in various ways, we use a rankfile and the --use-hwthread-cpus
+option to specify the logical hwthread ids we want to bind each MPI process (rank) to.
+Note that OpenMPI by default binds to cores, and can also bind to various other things
+including sockets.
+
+MPICH
+
+MPICH doesn't support binding, as far as I can see.
+
+Intel MPI
+
+Intel MPI uses logical hwthread ids-based masks, specified in an environment variable,
+to go with a machinefile that lists the nodes to put each process on.
+
+Slurm srun
+
+Slurm's srun has a CPU_BIND environment variable that likewise contains logical hwthread
+ids-based masks, and a hostfile that lists the nodes to put each process on.
+
+Here are some disambiguation tables to help with the confusion:
+
+
+```
+MUSCLE3     hwthread        logical hwthread id         physical hwthread id
+
+Linux       processor       processor                   apicid
+                                                        (/proc/cpuinfo only)
+
+cgroups                     always uses these
+
+taskset                     always uses these
+
+hwloc       PU              PU L#<x>                    PU P#<x>
+
+OpenMPI     hwthread        used in rankfile if         used in rankfile if
+                            --use-hwthread-cpus         rmaps_rank_file_physical
+                            is specified                MCA param set
+
+Intel       logical         logical processor
+            processor       number
+
+srun                        used by --bind-to
+
+psutil      logical         returned by Process.cpu_affinity()
+            core            counted by psutil.cpu_count(logical=True)
+```
+
+
+```
+MUSCLE3     core            (uses list of hwthread ids)
+
+Linux       core            core id
+
+Hwloc       core            core L#<x>
+
+OpenMPI     core            used in rankfile if
+                            --use-hwthread-cpus not
+                            specified
+
+psutil      physical        counted by psutil.cpu_count(logical=False)
+            core
+```
+
+"""
 import logging
 import multiprocessing as mp
 from os import chdir
@@ -11,8 +199,8 @@
 from libmuscle.manager.instantiator import (
         CancelAllRequest, CrashedResult, create_instance_env, InstantiationRequest,
         Process, ProcessStatus, reconfigure_logging, ShutdownRequest)
-from libmuscle.native_instantiator.process_manager import ProcessManager
-from libmuscle.native_instantiator.resource_detector import ResourceDetector
+from libmuscle.native_instantiator.agent_manager import AgentManager
+from libmuscle.native_instantiator.global_resources import global_resources
 from libmuscle.native_instantiator.run_script import make_script, prep_resources
 from libmuscle.planner.planner import Resources
 from ymmsl import MPICoresResReq, MPINodesResReq, ResourceRequirements, ThreadedResReq
@@ -42,8 +230,6 @@ def __init__(
         self._log_records_out = log_records
         self._run_dir = run_dir
 
-        self._resource_detector = ResourceDetector()
-        self._process_manager = ProcessManager()
         self._processes: Dict[str, Process] = dict()
 
     def run(self) -> None:
@@ -53,6 +239,8 @@ def run(self) -> None:
             m3_dir.mkdir(exist_ok=True)
             chdir(m3_dir)
 
+            self._agent_manager = AgentManager(m3_dir)
+
             reconfigure_logging(self._log_records_out)
             self._send_resources()
             self._main()
@@ -81,10 +269,10 @@ def _main(self) -> None:
 
                     elif isinstance(request, CancelAllRequest):
                         _logger.debug('Got CancelAllRequest')
-                        self._process_manager.cancel_all()
-                        _logger.debug('Done CancelAllRequest')
+                        self._agent_manager.cancel_all()
 
                     elif isinstance(request, InstantiationRequest):
+                        _logger.debug('Got InstantiationRequest')
                         if not shutting_down:
                             self._instantiate(request)
 
@@ -95,19 +283,65 @@ def _main(self) -> None:
             self._report_finished_processes()
 
             if shutting_down:
-                _logger.debug(f'Done: {self._processes}')
+                _logger.debug(f'Remaining processes: {self._processes}')
                 done = not self._processes
 
             if not done:
                 sleep(0.1)
 
+        self._agent_manager.shutdown()
+
     def _send_resources(self) -> None:
-        """Detect resources and report them to the manager."""
+        """Detect resources and report them to the manager.
+
+        We have potentially two sources of truth here: the Slurm environment variables
+        and what the agents report based on what they're bound to. These should be
+        consistent, but we check that and then try to be conservative to try to not
+        step outside our bounds even if the cluster doesn't constrain processes to their
+        assigned processors.
+        """
         resources = Resources()
 
-        res = zip(self._resource_detector.nodes, self._resource_detector.cores_per_node)
-        for node, num_cores in res:
-            resources.cores[node] = set(range(num_cores))
+        agent_cores = self._agent_manager.get_resources()
+
+        env_ncores = dict(
+                zip(global_resources.nodes, global_resources.cores_per_node)
+                )
+
+        for node in env_ncores:
+            if node not in agent_cores:
+                _logger.warning(
+                        f'The environment suggests we should have node {node},'
+                        ' but no agent reported running on it. We won''t be able'
+                        ' to use this node.')
+            else:
+                resources.cores[node] = set(agent_cores[node])
+
+                env_nncores = env_ncores[node]
+                ag_nncores = len(agent_cores[node])
+                if ag_nncores < env_nncores:
+                    _logger.warning(
+                            f'Node {node} should have {env_nncores} cores available,'
+                            f' but the agent reports only {ag_nncores} available to it.'
+                            f' We\'ll use the {ag_nncores} we seem to have.')
+
+                    resources.cores[node] = set(agent_cores[node])
+
+                elif env_nncores < ag_nncores:
+                    _logger.warning(
+                            f'Node {node} should have {env_nncores} cores available,'
+                            f' but the agent reports {ag_nncores} available to it.'
+                            ' Maybe the cluster does not constrain resources? We\'ll'
+                            f' use the {env_nncores} that we should have got.')
+                    resources.cores[node] = set(agent_cores[node][:env_nncores])
+
+        for node in agent_cores:
+            if node not in env_ncores:
+                _logger.warning(
+                        f'An agent is running on node {node} but the environment'
+                        ' does not list it as ours. It seems that the node\'s'
+                        ' hostname does not match what SLURM calls it. We will not use'
+                        ' this node, because we\'re not sure it\'s really ours.')
 
         self._resources_out.put(resources)
 
@@ -119,37 +353,31 @@ def _instantiate(self, request: InstantiationRequest) -> None:
         self._add_resources(env, request.res_req)
 
         rankfile: Optional[Path] = None
-        if self._resource_detector.on_cluster():
-            _logger.debug('On cluster...')
+        if global_resources.on_cluster():
             rankfile_contents, resource_env = prep_resources(
                   request.implementation.execution_model, request.resources)
 
-            _logger.debug(f'Rankfile: {rankfile_contents}')
-            _logger.debug(f'Resource env: {resource_env}')
-
             if rankfile_contents:
                 rankfile = self._write_rankfile(request, rankfile_contents)
+                env['MUSCLE_RANKFILE'] = str(rankfile)
 
-            if resource_env:
-                env.update(resource_env)
-
-        # env['MUSCLE_THREADS_PER_MPI_PROCESS'] = str(
-        #         request.res_req.threads_per_mpi_process)
-        # env['MUSCLE_OPENMPI_RANK_FILE'] = str(rank_file)
-        # env['MUSCLE_INTELMPI_RESOURCES'] = ' '.join(mpi_res_args)
+            env.update(resource_env)
 
         run_script_file = self._write_run_script(request, rankfile)
         args = [str(run_script_file)]
 
         self._processes[name] = Process(request.instance, request.resources)
 
+        _logger.debug(f'Instantiating {name} on {request.resources}')
         try:
-            self._process_manager.start(
+            self._agent_manager.start(
+                    next(iter(request.resources.cores.keys())),
                     name, request.work_dir, args, env,
                     request.stdout_path, request.stderr_path)
             self._processes[name].status = ProcessStatus.RUNNING
 
         except Exception as e:
+            _logger.warning(f'Instance {name} failed to start: {e}')
             self._processes[name].status = ProcessStatus.ERROR
             self._processes[name].error_msg = f'Instance failed to start: {e}'
 
@@ -168,12 +396,13 @@ def _write_rankfile(self, request: InstantiationRequest, rankfile: str) -> Path:
     def _write_run_script(
             self, request: InstantiationRequest, rankfile: Optional[Path]) -> Path:
         """Create and write out the run script and return its location."""
+        # TODO: Only write out once for each implementation
         if request.implementation.script:
             run_script = request.implementation.script
         else:
             run_script = make_script(
                     request.implementation, request.res_req,
-                    not self._resource_detector.on_cluster(), rankfile)
+                    not global_resources.on_cluster(), rankfile)
 
         run_script_file = request.instance_dir / 'run_script.sh'
 
@@ -217,7 +446,7 @@ def _report_failed_processes(self) -> None:
 
     def _report_finished_processes(self) -> None:
         """Get finished processes and report back their status."""
-        for name, exit_code in self._process_manager.get_finished():
+        for name, exit_code in self._agent_manager.get_finished():
             process = self._processes[name]
             if process.status == ProcessStatus.RUNNING:
                 if exit_code == 0:
diff --git a/libmuscle/python/libmuscle/native_instantiator/run_script.py b/libmuscle/python/libmuscle/native_instantiator/run_script.py
index 62aa7f77..1c615823 100644
--- a/libmuscle/python/libmuscle/native_instantiator/run_script.py
+++ b/libmuscle/python/libmuscle/native_instantiator/run_script.py
@@ -7,6 +7,32 @@
         ResourceRequirements, ThreadedResReq)
 
 
+def direct_prep_resources(resources: Resources) -> Tuple[str, Dict[str, str]]:
+    """Create resources for a non-MPI program with taskset.
+
+    Taskset expects a set of cores on the command line, which we put into a
+    MUSCLE_CORES environment variable here.
+
+    Args:
+        resources: The resources to describe
+
+    Return:
+        No rank file, and a set of environment variables.
+    """
+    env: Dict[str, str] = dict()
+    only_node_hwthreads_list = [
+            hwthread
+            for core in next(iter(resources.cores.values()))
+            for hwthread in core]
+
+    env['MUSCLE_BIND_LIST'] = ','.join(map(str, only_node_hwthreads_list))
+
+    mask_int = sum((1 << c for c in only_node_hwthreads_list))
+    env['MUSCLE_BIND_MASK'] = format(mask_int, 'X')
+
+    return '', env
+
+
 def openmpi_prep_resources(resources: Resources) -> Tuple[str, Dict[str, str]]:
     """Create resource description for OpenMPI mpirun
 
@@ -18,10 +44,12 @@ def openmpi_prep_resources(resources: Resources) -> Tuple[str, Dict[str, str]]:
     """
     ranklines: List[str] = list()
     all_cores = (
-            (node, core) for node, cores in resources.cores.items() for core in cores)
+            (node, ','.join(sorted(map(str, hwthreads))))
+            for node, cores in resources.cores.items()
+            for hwthreads in cores)
 
-    for i, (node, core) in enumerate(all_cores):
-        ranklines.append(f'rank {i}={node} slot={core}')
+    for i, (node, hwthreads) in enumerate(all_cores):
+        ranklines.append(f'rank {i}={node} slot={hwthreads}')
 
     rankfile = '\n'.join(ranklines) + '\n'
 
@@ -83,7 +111,7 @@ def prep_resources(
         The contents of the rank/machine/hostfile, and a set of environment variables.
     """
     if model == ExecutionModel.DIRECT:
-        return '', dict()
+        return direct_prep_resources(resources)
     elif model == ExecutionModel.OPENMPI:
         return openmpi_prep_resources(resources)
     elif model == ExecutionModel.INTELMPI:
@@ -131,11 +159,11 @@ def local_command(implementation: Implementation) -> str:
     elif implementation.execution_model == ExecutionModel.OPENMPI:
         # Native name is orterun for older and prterun for newer OpenMPI.
         # So we go with mpirun, which works for either.
-        fstr = 'mpirun -np {{ntasks}} --oversubscribe {command} {args}'
+        fstr = 'mpirun -np $MUSCLE_MPI_PROCESSES --oversubscribe {command} {args}'
     elif implementation.execution_model == ExecutionModel.INTELMPI:
-        fstr = 'mpirun -n {{ntasks}} {command} {args}'
+        fstr = 'mpirun -n $MUSCLE_MPI_PROCESSES {command} {args}'
     elif implementation.execution_model == ExecutionModel.SRUNMPI:
-        fstr = 'srun -n {{ntasks}} -m arbitrary {command} {args}'
+        fstr = 'srun -n $MUSCLE_MPI_PROCESSES -m arbitrary {command} {args}'
     # elif implementation.execution_model == ExecutionModel.MPICH
     #    fstr = 'mpiexec -n {{ntasks}} {command} {args}'
 
@@ -163,26 +191,31 @@ def cluster_command(implementation: Implementation) -> str:
         implementation: The implementation to start.
 
     Return:
-        A format string with embedded {ntasks} and {rankfile}.
+        A string with the command to use to start the implementation.
     """
+    # TODO: enable debug options iff the manager log level is set to DEBUG
+    # TODO: don't use taskset if it's not available
     if implementation.execution_model == ExecutionModel.DIRECT:
-        fstr = '{command} {args}'
+        fstr = 'taskset $MUSCLE_BIND_MASK {command} {args}'
     elif implementation.execution_model == ExecutionModel.OPENMPI:
         # Native name is orterun for older and prterun for newer OpenMPI.
         # So we go with mpirun, which works for either.
         fstr = (
-                'mpirun -v -np {{ntasks}}'
+                'mpirun -v -np $MUSCLE_MPI_PROCESSES'
                 ' -d --debug-daemons'
-                ' --rankfile {{rankfile}} --oversubscribe'
-                # ' --map-by rankfile:file={{rankfile}}:oversubscribe'
-                ' --display-map --display-allocation {command} {args}')
-                # ' --bind-to core --display-map --display-allocation {command} {args}')
+                ' --rankfile $MUSCLE_RANKFILE --use-hwthread-cpus --oversubscribe'
+                # ' --map-by rankfile:file=$MUSCLE_RANKFILE:oversubscribe'
+                # ' --display-map --display-allocation {command} {args}'
+                ' --bind-to core --display-map --display-allocation {command} {args}'
+                )
     elif implementation.execution_model == ExecutionModel.INTELMPI:
-        fstr = 'mpirun -n {{ntasks}} -machinefile {{rankfile}} {command} {args}'
+        fstr = (
+                'mpirun -n $MUSCLE_MPI_PROCESSES -machinefile $MUSCLE_RANKFILE'
+                ' {command} {args}')
     elif implementation.execution_model == ExecutionModel.SRUNMPI:
-        fstr = 'srun -n {{ntasks}} -m arbitrary {command} {args}'
+        fstr = 'srun -n $MUSCLE_MPI_PROCESSES -m arbitrary {command} {args}'
     # elif implementation.execution_model == ExecutionModel.MPICH
-    #    fstr = 'mpiexec -n {{ntasks}} -f {{rankfile}} {command} {args}'
+    #    fstr = 'mpiexec -n $MUSCLE_MPI_PROCESSES -f $MUSCLE_RANKFILE {command} {args}'
 
     if implementation.args is None:
         args = ''
@@ -200,7 +233,7 @@ def cluster_command(implementation: Implementation) -> str:
 def make_script(
         implementation: Implementation, res_req: ResourceRequirements,
         local: bool, rankfile: Optional[Path] = None) -> str:
-    """Make a launch script for a given implementation.
+    """Make a run script for a given implementation.
 
     Args:
         implementation: The implementation to launch
@@ -232,12 +265,9 @@ def make_script(
         lines.append('')
 
     if local:
-        cmd = local_command(implementation)
+        lines.append(local_command(implementation))
     else:
-        cmd = cluster_command(implementation)
-
-    ntasks = num_mpi_tasks(res_req)
-    lines.append(cmd.format(ntasks=ntasks, rankfile=rankfile))
+        lines.append(cluster_command(implementation))
 
     lines.append('')
 
diff --git a/libmuscle/python/libmuscle/planner/planner.py b/libmuscle/python/libmuscle/planner/planner.py
index 47d4b903..2d63828e 100644
--- a/libmuscle/python/libmuscle/planner/planner.py
+++ b/libmuscle/python/libmuscle/planner/planner.py
@@ -1,6 +1,6 @@
 from copy import copy, deepcopy
 import logging
-from typing import Dict, Iterable, List, Mapping, Optional, Set, Tuple
+from typing import Dict, Iterable, FrozenSet, List, Mapping, Optional, Set, Tuple
 
 from ymmsl import (
         Component, Configuration, Model, MPICoresResReq, MPINodesResReq,
@@ -391,17 +391,17 @@ class Resources:
     resources we're talking about.
 
     Attributes:
-        cores: A dictionary mapping designated nodes to designated
-                cores on them.
+        cores: A dictionary mapping designated nodes to designated cores on them. Cores
+                are represented by sets of hwthreads they have.
     """
-    def __init__(self, cores: Optional[Dict[str, Set[int]]] = None) -> None:
+    def __init__(self, cores: Optional[Dict[str, Set[FrozenSet[int]]]] = None) -> None:
         """Create a Resources object with the given cores.
 
         Args:
             cores: Cores to be designated by this object.
         """
         if cores is None:
-            self.cores: Dict[str, Set[int]] = {}
+            self.cores: Dict[str, Set[FrozenSet[int]]] = {}
         else:
             self.cores = cores
 
@@ -444,22 +444,22 @@ def __isub__(self, other: 'Resources') -> 'Resources':
 
     def __str__(self) -> str:
         """Return a human-readable string representation."""
-        def collapse_ranges(cores: Set[int]) -> str:
+        def collapse_ranges(cores: Set[FrozenSet[int]]) -> str:
             if len(cores) == 0:
                 return ''
 
             result = list()
-            scores = sorted(cores)
+            hwthreads = sorted((hwthread for core in cores for hwthread in core))
             start = 0
             i = 1
-            while i <= len(scores):
-                if (i == len(scores)) or (scores[i-1] != scores[i] - 1):
+            while i <= len(hwthreads):
+                if (i == len(hwthreads)) or (hwthreads[i-1] != hwthreads[i] - 1):
                     if start == i - 1:
                         # run of one
-                        result.append(str(scores[i-1]))
+                        result.append(str(hwthreads[i-1]))
                     else:
                         # run of at least two
-                        result.append(f'{scores[start]}-{scores[i-1]}')
+                        result.append(f'{hwthreads[start]}-{hwthreads[i-1]}')
                     start = i
                 i += 1
             return ','.join(result)
@@ -477,7 +477,7 @@ def nodes(self) -> Iterable[str]:
         return self.cores.keys()
 
     def total_cores(self) -> int:
-        """Returns the total number of cores designated."""
+        """Returns the total number of cores (not hwthreads) designated."""
         return sum([len(cs) for cs in self.cores.values()])
 
     def isdisjoint(self, other: 'Resources') -> bool:
@@ -701,7 +701,8 @@ def _expand_resources(
                         f' {req.threads_per_mpi_process} threads per process,'
                         f' which is impossible with {num_cores} cores per'
                         ' node.')
-        self._all_resources.cores[new_node] = set(range(num_cores))
+        self._all_resources.cores[new_node] = {
+                frozenset([i]) for i in range(num_cores)}
 
     def _allocate_instance(
             self, instance: Reference, component: Component,
diff --git a/libmuscle/python/libmuscle/planner/test/test_planner.py b/libmuscle/python/libmuscle/planner/test/test_planner.py
index 95e8e7fb..25883aab 100644
--- a/libmuscle/python/libmuscle/planner/test/test_planner.py
+++ b/libmuscle/python/libmuscle/planner/test/test_planner.py
@@ -9,13 +9,15 @@
         Component, Conduit, Configuration, Implementation, Model,
         MPICoresResReq, Ports, Reference, ResourceRequirements, ThreadedResReq)
 
+from libmuscle.test.conftest import frozenset_of as s
+
 
 @pytest.fixture
 def all_resources() -> Resources:
     return Resources({
-        'node001': {1, 2, 3, 4},
-        'node002': {1, 2, 3, 4},
-        'node003': {1, 2, 3, 4}})
+        'node001': {s(1), s(2), s(3), s(4)},
+        'node002': {s(1), s(2), s(3), s(4)},
+        'node003': {s(1), s(2), s(3), s(4)}})
 
 
 @pytest.fixture
@@ -96,42 +98,48 @@ def test_model_graph(
 def test_resources(all_resources: Resources) -> None:
     res1 = all_resources
     assert res1.cores == {
-            'node001': {1, 2, 3, 4},
-            'node002': {1, 2, 3, 4},
-            'node003': {1, 2, 3, 4}}
+            'node001': {s(1), s(2), s(3), s(4)},
+            'node002': {s(1), s(2), s(3), s(4)},
+            'node003': {s(1), s(2), s(3), s(4)}}
     assert set(res1.nodes()) == {'node001', 'node002', 'node003'}
 
     res2 = Resources({
-        'node004': {1, 2, 3, 4, 5, 6}, 'node005': {1, 2, 3, 4, 5, 6}})
+        'node004': {s(1), s(2), s(3), s(4), s(5), s(6)},
+        'node005': {s(1), s(2), s(3), s(4), s(5), s(6)}})
     res1 += res2
 
     assert res1.cores == {
-            'node001': {1, 2, 3, 4}, 'node002': {1, 2, 3, 4},
-            'node003': {1, 2, 3, 4}, 'node004': {1, 2, 3, 4, 5, 6},
-            'node005': {1, 2, 3, 4, 5, 6}}
-
-    res3 = Resources({'node003': {1, 2, 3, 4}, 'node005': {4, 5, 6}})
+            'node001': {s(1), s(2), s(3), s(4)},
+            'node002': {s(1), s(2), s(3), s(4)},
+            'node003': {s(1), s(2), s(3), s(4)},
+            'node004': {s(1), s(2), s(3), s(4), s(5), s(6)},
+            'node005': {s(1), s(2), s(3), s(4), s(5), s(6)}}
+
+    res3 = Resources({
+        'node003': {s(1), s(2), s(3), s(4)}, 'node005': {s(4), s(5), s(6)}})
     res1 -= res3
 
     assert res1.cores == {
-            'node001': {1, 2, 3, 4}, 'node002': {1, 2, 3, 4},
-            'node004': {1, 2, 3, 4, 5, 6}, 'node005': {1, 2, 3}}
+            'node001': {s(1), s(2), s(3), s(4)},
+            'node002': {s(1), s(2), s(3), s(4)},
+            'node004': {s(1), s(2), s(3), s(4), s(5), s(6)},
+            'node005': {s(1), s(2), s(3)}}
     assert res1.nodes() == {
             'node001', 'node002', 'node004', 'node005'}
 
     res4 = copy(res3)
-    res4.cores['node003'] = {8}
+    res4.cores['node003'] = {s(8)}
 
-    assert res3.cores['node003'] == {1, 2, 3, 4}
-    assert res4.cores['node003'] == {8}
+    assert res3.cores['node003'] == {s(1), s(2), s(3), s(4)}
+    assert res4.cores['node003'] == {s(8)}
 
     all_resources = Resources.union([res1, res2, res3, res4])
 
-    assert all_resources.cores['node001'] == {1, 2, 3, 4}
-    assert all_resources.cores['node002'] == {1, 2, 3, 4}
-    assert all_resources.cores['node003'] == {1, 2, 3, 4, 8}
-    assert all_resources.cores['node004'] == {1, 2, 3, 4, 5, 6}
-    assert all_resources.cores['node005'] == {1, 2, 3, 4, 5, 6}
+    assert all_resources.cores['node001'] == {s(1), s(2), s(3), s(4)}
+    assert all_resources.cores['node002'] == {s(1), s(2), s(3), s(4)}
+    assert all_resources.cores['node003'] == {s(1), s(2), s(3), s(4), s(8)}
+    assert all_resources.cores['node004'] == {s(1), s(2), s(3), s(4), s(5), s(6)}
+    assert all_resources.cores['node005'] == {s(1), s(2), s(3), s(4), s(5), s(6)}
 
 
 def test_planner(
@@ -139,9 +147,12 @@ def test_planner(
     planner = Planner(all_resources)
     allocations = planner.allocate_all(configuration)
 
-    assert allocations[Reference('init')].cores == {'node001': {1, 2, 3, 4}}
-    assert allocations[Reference('macro')].cores == {'node001': {1, 2, 3, 4}}
-    assert allocations[Reference('micro')].cores == {'node001': {1, 2, 3, 4}}
+    assert allocations[Reference('init')].cores == {
+            'node001': {s(1), s(2), s(3), s(4)}}
+    assert allocations[Reference('macro')].cores == {
+            'node001': {s(1), s(2), s(3), s(4)}}
+    assert allocations[Reference('micro')].cores == {
+            'node001': {s(1), s(2), s(3), s(4)}}
 
 
 def test_planner_exclusive_macro(
@@ -151,9 +162,12 @@ def test_planner_exclusive_macro(
             False)
     allocations = planner.allocate_all(configuration)
 
-    assert allocations[Reference('init')].cores == {'node001': {1, 2, 3, 4}}
-    assert allocations[Reference('macro')].cores == {'node002': {1, 2, 3, 4}}
-    assert allocations[Reference('micro')].cores == {'node001': {1, 2, 3, 4}}
+    assert allocations[Reference('init')].cores == {
+            'node001': {s(1), s(2), s(3), s(4)}}
+    assert allocations[Reference('macro')].cores == {
+            'node002': {s(1), s(2), s(3), s(4)}}
+    assert allocations[Reference('micro')].cores == {
+            'node001': {s(1), s(2), s(3), s(4)}}
 
 
 def test_planner_exclusive_predecessor(
@@ -163,9 +177,12 @@ def test_planner_exclusive_predecessor(
             False)
     allocations = planner.allocate_all(configuration)
 
-    assert allocations[Reference('init')].cores == {'node001': {1, 2, 3, 4}}
-    assert allocations[Reference('macro')].cores == {'node001': {1, 2, 3, 4}}
-    assert allocations[Reference('micro')].cores == {'node001': {1, 2, 3, 4}}
+    assert allocations[Reference('init')].cores == {
+            'node001': {s(1), s(2), s(3), s(4)}}
+    assert allocations[Reference('macro')].cores == {
+            'node001': {s(1), s(2), s(3), s(4)}}
+    assert allocations[Reference('micro')].cores == {
+            'node001': {s(1), s(2), s(3), s(4)}}
 
 
 def test_oversubscribe(
@@ -177,33 +194,38 @@ def test_oversubscribe(
     planner = Planner(all_resources)
     allocations = planner.allocate_all(configuration)
 
-    assert allocations[Reference('init[0]')].cores == {'node001': {1, 2, 3, 4}}
-    assert allocations[Reference('init[1]')].cores == {'node002': {1, 2, 3, 4}}
-    assert allocations[Reference('init[2]')].cores == {'node003': {1, 2, 3, 4}}
-    assert allocations[Reference('init[3]')].cores == {'node001': {1, 2, 3, 4}}
-    assert allocations[Reference('init[4]')].cores == {'node002': {1, 2, 3, 4}}
+    assert allocations[Reference('init[0]')].cores == {
+            'node001': {s(1), s(2), s(3), s(4)}}
+    assert allocations[Reference('init[1]')].cores == {
+            'node002': {s(1), s(2), s(3), s(4)}}
+    assert allocations[Reference('init[2]')].cores == {
+            'node003': {s(1), s(2), s(3), s(4)}}
+    assert allocations[Reference('init[3]')].cores == {
+            'node001': {s(1), s(2), s(3), s(4)}}
+    assert allocations[Reference('init[4]')].cores == {
+            'node002': {s(1), s(2), s(3), s(4)}}
 
     assert allocations[Reference('macro[0]')].cores == {
-            'node001': {1, 2, 3, 4}}
+            'node001': {s(1), s(2), s(3), s(4)}}
     assert allocations[Reference('macro[1]')].cores == {
-            'node002': {1, 2, 3, 4}}
+            'node002': {s(1), s(2), s(3), s(4)}}
     assert allocations[Reference('macro[2]')].cores == {
-            'node003': {1, 2, 3, 4}}
+            'node003': {s(1), s(2), s(3), s(4)}}
     assert allocations[Reference('macro[3]')].cores == {
-            'node001': {1, 2, 3, 4}}
+            'node001': {s(1), s(2), s(3), s(4)}}
     assert allocations[Reference('macro[4]')].cores == {
-            'node002': {1, 2, 3, 4}}
+            'node002': {s(1), s(2), s(3), s(4)}}
 
     assert allocations[Reference('micro[0]')].cores == {
-            'node001': {1, 2, 3, 4}}
+            'node001': {s(1), s(2), s(3), s(4)}}
     assert allocations[Reference('micro[1]')].cores == {
-            'node002': {1, 2, 3, 4}}
+            'node002': {s(1), s(2), s(3), s(4)}}
     assert allocations[Reference('micro[2]')].cores == {
-            'node003': {1, 2, 3, 4}}
+            'node003': {s(1), s(2), s(3), s(4)}}
     assert allocations[Reference('micro[3]')].cores == {
-            'node001': {1, 2, 3, 4}}
+            'node001': {s(1), s(2), s(3), s(4)}}
     assert allocations[Reference('micro[4]')].cores == {
-            'node002': {1, 2, 3, 4}}
+            'node002': {s(1), s(2), s(3), s(4)}}
 
 
 def test_oversubscribe_single_instance_threaded() -> None:
@@ -213,12 +235,13 @@ def test_oversubscribe_single_instance_threaded() -> None:
             Reference('x'): ThreadedResReq(Reference('x'), 24)}
     config = Configuration(model, None, impl, reqs)
 
-    res = Resources({'node001': {1, 2, 3, 4}})
+    res = Resources({'node001': {s(1), s(2), s(3), s(4)}})
 
     planner = Planner(res)
     allocations = planner.allocate_all(config)
 
-    assert allocations[Reference('x')].cores == {'node001': {1, 2, 3, 4}}
+    assert allocations[Reference('x')].cores == {
+            'node001': {s(1), s(2), s(3), s(4)}}
 
 
 def test_oversubscribe_single_instance_mpi() -> None:
@@ -228,12 +251,13 @@ def test_oversubscribe_single_instance_mpi() -> None:
             Reference('x'): MPICoresResReq(Reference('x'), 24)}
     config = Configuration(model, None, impl, reqs)
 
-    res = Resources({'node001': {1, 2, 3, 4}})
+    res = Resources({'node001': {s(1), s(2), s(3), s(4)}})
 
     planner = Planner(res)
     allocations = planner.allocate_all(config)
 
-    assert allocations[Reference('x')].cores == {'node001': {1, 2, 3, 4}}
+    assert allocations[Reference('x')].cores == {
+            'node001': {s(1), s(2), s(3), s(4)}}
 
 
 def test_virtual_allocation() -> None:
@@ -243,7 +267,7 @@ def test_virtual_allocation() -> None:
             Reference('x'): MPICoresResReq(Reference('x'), 13)}
     config = Configuration(model, None, impl, reqs)
 
-    res = Resources({'node000001': {1, 2, 3, 4}})
+    res = Resources({'node000001': {s(1), s(2), s(3), s(4)}})
 
     planner = Planner(res)
     allocations = planner.allocate_all(config, virtual=True)
@@ -260,7 +284,7 @@ def test_impossible_virtual_allocation() -> None:
             Reference('x'): ThreadedResReq(Reference('x'), 13)}
     config = Configuration(model, None, impl, reqs)
 
-    res = Resources({'node000001': {1, 2, 3, 4}})
+    res = Resources({'node000001': {s(1), s(2), s(3), s(4)}})
 
     planner = Planner(res)
     with pytest.raises(InsufficientResourcesAvailable):
diff --git a/libmuscle/python/libmuscle/test/conftest.py b/libmuscle/python/libmuscle/test/conftest.py
index 2802061d..3215517f 100644
--- a/libmuscle/python/libmuscle/test/conftest.py
+++ b/libmuscle/python/libmuscle/test/conftest.py
@@ -99,3 +99,12 @@ def port_exists(name):
     port_manager.list_ports.return_value = declared_ports
     port_manager.port_exists = port_exists
     return port_manager
+
+
+def frozenset_of(*args):
+    """Create a frozenset containing the arguments.
+
+    This is a helper to shorten notation used in some of the planning and
+    launching-related tests.
+    """
+    return frozenset(args)
diff --git a/muscle3/muscle3.py b/muscle3/muscle3.py
index 04a8c3a8..c4f39af1 100644
--- a/muscle3/muscle3.py
+++ b/muscle3/muscle3.py
@@ -138,7 +138,9 @@ def resources(
         click.echo(_RESOURCES_INCOMPLETE_MODEL, err=True)
         sys.exit(1)
 
-    resources = Resources({'node000001': set(range(cores_per_node))})
+    resources = Resources({
+        'node000001': {frozenset([r]) for r in range(cores_per_node)}})
+
     planner = Planner(resources)
     try:
         allocations = planner.allocate_all(config, True)

From 68ea7da41b84ce5b6aa8c0a061e7577d3bbad4b5 Mon Sep 17 00:00:00 2001
From: Lourens Veen <l.veen@esciencecenter.nl>
Date: Thu, 17 Oct 2024 10:07:13 +0200
Subject: [PATCH 16/49] Add agents for the native instantiator

---
 .../native_instantiator/agent/__init__.py     |   0
 .../native_instantiator/agent/__main__.py     | 164 ++++++++++++++
 .../native_instantiator/agent_manager.py      | 205 ++++++++++++++++++
 .../native_instantiator/global_resources.py   |  72 ++++++
 .../native_instantiator/iagent_manager.py     |  29 +++
 .../native_instantiator/resource_detector.py  |  45 ----
 .../libmuscle/native_instantiator/slurm.py    |  11 +
 7 files changed, 481 insertions(+), 45 deletions(-)
 create mode 100644 libmuscle/python/libmuscle/native_instantiator/agent/__init__.py
 create mode 100644 libmuscle/python/libmuscle/native_instantiator/agent/__main__.py
 create mode 100644 libmuscle/python/libmuscle/native_instantiator/agent_manager.py
 create mode 100644 libmuscle/python/libmuscle/native_instantiator/global_resources.py
 create mode 100644 libmuscle/python/libmuscle/native_instantiator/iagent_manager.py
 delete mode 100644 libmuscle/python/libmuscle/native_instantiator/resource_detector.py

diff --git a/libmuscle/python/libmuscle/native_instantiator/agent/__init__.py b/libmuscle/python/libmuscle/native_instantiator/agent/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/libmuscle/python/libmuscle/native_instantiator/agent/__main__.py b/libmuscle/python/libmuscle/native_instantiator/agent/__main__.py
new file mode 100644
index 00000000..712da253
--- /dev/null
+++ b/libmuscle/python/libmuscle/native_instantiator/agent/__main__.py
@@ -0,0 +1,164 @@
+from itertools import groupby
+import logging
+import os
+import psutil
+from socket import gethostname
+import sys
+from time import sleep
+from typing import Any, Dict, Set
+
+from libmuscle.native_instantiator.process_manager import ProcessManager
+from libmuscle.native_instantiator.agent.map_client import MAPClient
+from libmuscle.native_instantiator.agent.agent_commands import (
+        CancelAllCommand, ShutdownCommand, StartCommand)
+
+
+_logger = logging.getLogger(__name__)
+
+
+class Agent:
+    """Runs on a compute node and starts processes there."""
+    def __init__(self, node_id: str, server_location: str) -> None:
+        """Create an Agent.
+
+        Args:
+            node_id: Id (hostname) of this node
+            server_location: MAP server of the manager to connect to
+        """
+        _logger.info(f'Agent at {node_id} starting')
+
+        self._process_manager = ProcessManager()
+
+        self._node_id = node_id
+
+        _logger.info(f'Connecting to manager at {server_location}')
+        self._server = MAPClient(self._node_id, server_location)
+        _logger.info('Connected to manager')
+
+    def run(self) -> None:
+        """Execute commands and monitor processes."""
+        _logger.info('Reporting resources')
+        self._server.report_resources(self._inspect_resources())
+
+        shutting_down = False
+        while not shutting_down:
+            command = self._server.get_command()
+            if isinstance(command, StartCommand):
+                _logger.info(f'Starting process {command.name}')
+                _logger.debug(f'Args: {command.args}')
+                _logger.debug(f'Env: {command.env}')
+
+                self._process_manager.start(
+                        command.name, command.work_dir, command.args, command.env,
+                        command.stdout, command.stderr)
+            elif isinstance(command, CancelAllCommand):
+                _logger.info('Cancelling all instances')
+                self._process_manager.cancel_all()
+
+            elif isinstance(command, ShutdownCommand):
+                # check that nothing is running
+                shutting_down = True
+                _logger.info('Agent shutting down')
+
+            finished = self._process_manager.get_finished()
+            if finished:
+                for name, exit_code in finished:
+                    _logger.info(f'Process {name} finished with exit code {exit_code}')
+                self._server.report_result(finished)
+
+            sleep(0.1)
+
+    def _inspect_resources(self) -> Dict[str, Any]:
+        """Inspect the node to find resources and report on them.
+
+        The only resource type for now is 'cpu'. The returned dict will have that key
+        mapping to a list of sets of logical hwthread ids, with each set designating
+        a set of hwthreads that share a core.
+
+        The terminology for identifying processors gets very convoluted, with Linux,
+        Slurm, OpenMPI and IntelMPI all using different terms, or sometimes the same
+        terms for different things. See the comment in native_instantiator.py for what
+        is what and how we use it.
+
+        Returns:
+            A dict mapping resource types to resource descriptions.
+        """
+        if hasattr(os, 'sched_getaffinity'):
+            hwthreads_by_core: Dict[int, Set[int]] = dict()
+
+            # these are the logical hwthread ids that we can use
+            hwthread_ids = list(os.sched_getaffinity(0))
+
+            for i in hwthread_ids:
+                with open(f'/sys/devices/system/cpu/cpu{i}/topology/core_id', 'r') as f:
+                    # this gets the logical core id for the hwthread
+                    core_id = int(f.read())
+                hwthreads_by_core.setdefault(core_id, set()).add(i)
+
+            cpu_resources = sorted(
+                    map(frozenset, hwthreads_by_core.values()), key=sorted)
+
+        else:
+            # MacOS doesn't support thread affinity, but older Macs with Intel
+            # processors do have SMT. Getting the hwthread to core mapping is not so
+            # easy, and if we're running on macOS then we're not on a cluster and don't
+            # do binding anyway. So we're going to get the number of hwthreads and the
+            # number of cores here, and synthesise a mapping that may be wrong, but will
+            # at least represent the number of cores and threads per core correctly.
+            nhwthreads = psutil.cpu_count(logical=True)
+            ncores = psutil.cpu_count(logical=False)
+
+            hwthreads_per_core = nhwthreads // ncores
+
+            if ncores * hwthreads_per_core != nhwthreads:
+                # As far as I know, there are no Macs with heterogeneous SMT, like in
+                # the latest Intel CPUs.
+                _logger.warning(
+                        'Only some cores seem to have SMT, core ids are probably'
+                        ' wrong. If this is a cluster then this will cause problems,'
+                        ' please report an issue on GitHub and report the machine and'
+                        ' what kind of OS and hardware it has. If we\'re running on a'
+                        ' local machine, then this won\'t affect the run, but I\'d'
+                        ' still appreciate an issue, because it is unexpected for sure.'
+                        )
+
+            hwthread_ids = list(range(nhwthreads))
+            cpu_resources = [
+                    frozenset(g)
+                    for _, g in groupby(
+                        hwthread_ids, lambda i: i // hwthreads_per_core)]
+
+        _logger.info(f'Found CPU resources: {cpu_resources}')
+        return {'cpu': cpu_resources}
+
+
+def configure_logging(node_id: str, log_level: int) -> None:
+    """Make us output logs to a custom log file."""
+    fmt = '%(asctime)s %(levelname)s %(message)s'
+    formatter = logging.Formatter(fmt)
+
+    handler = logging.FileHandler(f'muscle3_agent_{node_id}.log', mode='w')
+    handler.setFormatter(formatter)
+
+    # Find and remove default handler to disable automatic console output
+    # Testing for 'stderr' in the stringified version is not nice, but
+    # seems reliable, and doesn't mess up pytest's caplog mechanism while
+    # it also doesn't introduce a runtime dependency on pytest.
+    logging.getLogger().handlers = [
+            h for h in logging.getLogger().handlers
+            if 'stderr' not in str(h)]
+
+    logging.getLogger().addHandler(handler)
+
+    logging.getLogger().setLevel(log_level)
+
+
+if __name__ == '__main__':
+    node_id = gethostname()
+    server_location = sys.argv[1]
+    log_level = int(sys.argv[2])
+
+    configure_logging(node_id, log_level)
+
+    agent = Agent(node_id, server_location)
+    agent.run()
diff --git a/libmuscle/python/libmuscle/native_instantiator/agent_manager.py b/libmuscle/python/libmuscle/native_instantiator/agent_manager.py
new file mode 100644
index 00000000..2e5aa361
--- /dev/null
+++ b/libmuscle/python/libmuscle/native_instantiator/agent_manager.py
@@ -0,0 +1,205 @@
+import logging
+from pathlib import Path
+from subprocess import Popen, TimeoutExpired
+import sys
+from threading import Lock
+from time import sleep
+from typing import Any, Dict, FrozenSet, List, Tuple
+
+from libmuscle.native_instantiator.agent.agent_commands import (
+        CancelAllCommand, StartCommand, ShutdownCommand)
+from libmuscle.native_instantiator.iagent_manager import IAgentManager
+from libmuscle.native_instantiator.map_server import MAPServer
+from libmuscle.native_instantiator.global_resources import global_resources
+
+
+_logger = logging.getLogger(__name__)
+
+
+class AgentManager(IAgentManager):
+    """Manage the node agents.
+
+    Each node of our allocated resources gets an agent, which launches and monitors
+    processes or that node. This class launches those agents across the nodes,
+    and communicates with them.
+
+    The AgentManager sits in between the NativeInstantiator and the MAPServer. It gets
+    called by NativeInstantiator with requests for resources and commands to start and
+    cancel processes on nodes, and it gets called by MAPServer with requests from the
+    agents.
+    """
+    def __init__(self, agent_dir: Path) -> None:
+        """Create an AgentManager.
+
+        Create the object, then launch the agents and wait for them to connect and send
+        information about the available resources.
+
+        Args:
+            agent_dir: Directory in which agents can write log files.
+        """
+        self._nodes: List[str] = list()
+        self._resources: Dict[str, Dict[str, Any]] = dict()
+        self._resources_lock = Lock()   # protects _nodes and _resources
+
+        self._finished_processes: List[Tuple[str, int]] = list()
+        self._finished_processes_lock = Lock()
+
+        self._server = MAPServer(self)
+
+        _logger.info('Launching MUSCLE agents...')
+        self._agents_process = self._launch_agents(
+                agent_dir, self._server.get_location())
+
+        expected_nodes = global_resources.nodes
+
+        resources_complete = False
+        while not resources_complete:
+            sleep(0.1)
+            with self._resources_lock:
+                resources_complete = len(self._nodes) == len(expected_nodes)
+            _logger.debug(f'{len(self._resources)} agents up')
+
+            if self._agents_process.poll() is not None:
+                msg = (
+                        'Agents unexpectedly stopped running. This is not supposed'
+                        ' to happen. Please see the agent log for more information,'
+                        ' and please file an issue on GitHub.')
+                _logger.error(msg)
+                raise RuntimeError(msg)
+
+        _logger.info(f'All agents running on {self._nodes}')
+
+        if sorted(expected_nodes) != sorted(self._nodes):
+            _logger.error(
+                    'Agent-reported node hostnames do not match what we got from the'
+                    ' resource manager.')
+            _logger.error(
+                    'According to the resource manager, we have'
+                    f' {sorted(expected_nodes)}')
+            _logger.error(
+                    f'The agents are reporting {sorted(self._nodes)}')
+
+    def get_resources(self) -> Dict[str, List[FrozenSet[int]]]:
+        """Return detected resources.
+
+        This returns a list of tuples of logical hwthread ids for each core per node.
+
+        Called by NativeInstantiator.
+        """
+        # no need to lock, _resources is already in its final state
+        return {node_id: res['cpu'] for node_id, res in self._resources.items()}
+
+    def start(
+            self, node_id: str, name: str, work_dir: Path, args: List[str],
+            env: Dict[str, str], stdout: Path, stderr: Path) -> None:
+        """Start a process on a node.
+
+        The files that the output is directed to will be overwritten if they already
+        exist.
+
+        Args:
+            node_id: Id of the node to run the process on
+            name: Name under which this process will be known
+            work_dir: Working directory in which to start
+            args: Executable and arguments to run
+            env: Environment variables to set
+            stdout: File to redirect stdout to
+            stderr: File to redirect stderr to
+        """
+        command = StartCommand(name, work_dir, args, env, stdout, stderr)
+        self._server.deposit_command(node_id, command)
+
+    def cancel_all(self) -> None:
+        """Cancel all processes.
+
+        This tells the agents to stop all running processes they've started.
+
+        Called by NativeInstantiator.
+        """
+        for node_id in self._nodes:
+            self._server.deposit_command(node_id, CancelAllCommand())
+
+    def get_finished(self) -> List[Tuple[str, int]]:
+        """Returns names and exit codes of finished processes.
+
+        This returns all processes that have finished running since the previous call;
+        each started process will be returned exactly once. The names are the ones
+        passed to start().
+
+        Called by NativeInstantiator.
+        """
+        with self._finished_processes_lock:
+            next_batch = self._finished_processes
+            self._finished_processes = list()
+
+        return next_batch
+
+    def shutdown(self) -> None:
+        """Shut down the manager and its agents."""
+        command = ShutdownCommand()
+        for node_id in self._nodes:
+            self._server.deposit_command(node_id, command)
+
+        try:
+            self._agents_process.wait(60)
+        except TimeoutExpired:
+            _logger.warning(
+                    'Agents did not shut down within one minute, sending signal...')
+            self._agents_process.kill()
+
+        try:
+            self._agents_process.wait(10)
+        except TimeoutExpired:
+            _logger.warning('Agents still not down, continuing shutdown anyway.')
+
+        self._server.stop()
+
+    def report_resources(self, node_id: str, resources: Dict[str, Any]) -> None:
+        """Report resources found on a node.
+
+        Called by MAPServer from a server thread.
+
+        Args:
+            node_id: Id of the node these resources are on
+            resources: Dict mapping resource type to resource ids
+        """
+        with self._resources_lock:
+            self._nodes.append(node_id)
+            self._resources[node_id] = resources
+
+    def report_result(self, names_exit_codes: List[Tuple[str, int]]) -> None:
+        """Report results of finished processes.
+
+        Called by MAPServer from a server thread.
+
+        Args:
+            names_exit_codes: A list of names and exit codes of finished processes.
+        """
+        with self._finished_processes_lock:
+            self._finished_processes.extend(names_exit_codes)
+
+    def _launch_agents(self, agent_dir: Path, server_location: str) -> Popen:
+        """Actually launch the agents.
+
+        This runs a local process, either to start a single agent locally, or on a
+        cluster to start all of them in one go.
+
+        Args:
+            agent_dir: Working directory for the agents
+            server_location: MAPServer network location string for the agents to
+                connect to
+        """
+        python = sys.executable
+        if not python:
+            raise RuntimeError(
+                    'Could not launch agents because sys.executable is not set.')
+
+        log_level = logging.getLogger('libmuscle').getEffectiveLevel()
+
+        args = [
+                sys.executable, '-m', 'libmuscle.native_instantiator.agent',
+                server_location, str(log_level)]
+
+        args = global_resources.agent_launch_command(args)
+
+        return Popen(args, cwd=agent_dir)
diff --git a/libmuscle/python/libmuscle/native_instantiator/global_resources.py b/libmuscle/python/libmuscle/native_instantiator/global_resources.py
new file mode 100644
index 00000000..08d294a3
--- /dev/null
+++ b/libmuscle/python/libmuscle/native_instantiator/global_resources.py
@@ -0,0 +1,72 @@
+from enum import Enum
+import logging
+from socket import gethostname
+from typing import List
+
+import psutil
+
+from libmuscle.native_instantiator import slurm
+
+
+_logger = logging.getLogger(__name__)
+
+
+class Scheduler(Enum):
+    NONE = 0
+    SLURM = 1
+
+
+class GlobalResources:
+    """Detects available compute resources.
+
+    This detects whether we're running locally or in a SLURM allocation, and returns
+    available resources on request. This class describes all the available resources,
+    not the ones local to a node.
+
+    Attributes:
+        scheduler: The HPC scheduler we're running under, if any.
+        nodes: List of hostnames of available nodes to run on.
+        cores_per_node: Number of cores available on each node. List alongside nodes.
+    """
+    def __init__(self) -> None:
+        """Create a GlobalResources.
+
+        Detects available resources and initialises the object, which can then be
+        queried.
+        """
+        if slurm.in_slurm_allocation():
+            _logger.info('Detected a SLURM allocation')
+            self.scheduler = Scheduler.SLURM
+            self.nodes = slurm.get_nodes()
+            self.cores_per_node = slurm.get_cores_per_node()
+            _logger.info(
+                    f'We have {len(self.nodes)} nodes and a total of'
+                    f' {sum(self.cores_per_node)} cores available')
+        else:
+            _logger.info('Running locally without a cluster scheduler')
+            self.scheduler = Scheduler.NONE
+            self.nodes = [gethostname()]
+            self.cores_per_node = [psutil.cpu_count(logical=False)]
+            _logger.info(f'We have {self.cores_per_node[0]} cores available')
+
+    def on_cluster(self) -> bool:
+        """Return whether we're running on a cluster."""
+        return self.scheduler != Scheduler.NONE
+
+    def agent_launch_command(self, agent_cmd: List[str]) -> List[str]:
+        """Return a command for launching one agent on each node.
+
+        Args:
+            agent_cmd: A command that will start the agent.
+        """
+        if self.scheduler == Scheduler.SLURM:
+            return slurm.agent_launch_command(agent_cmd)
+        return agent_cmd
+
+
+global_resources = GlobalResources()
+"""Global resources object.
+
+This is a singleton, and that's fine because it's created once and then read-only. Also,
+it's used in two places, and making two objects logs everything twice which is annoying.
+"""
diff --git a/libmuscle/python/libmuscle/native_instantiator/iagent_manager.py b/libmuscle/python/libmuscle/native_instantiator/iagent_manager.py
new file mode 100644
index 00000000..93d063f8
--- /dev/null
+++ b/libmuscle/python/libmuscle/native_instantiator/iagent_manager.py
@@ -0,0 +1,29 @@
+from typing import Any, Dict, List, Tuple
+
+
+class IAgentManager:
+    """Interface for Agent Managers.
+
+    Only implemented by AgentManager, and only exists to avoid a circular dependency
+    between AgentManager, MAPServer, and MAPRequestHandler. Ugh.
+    """
+    def report_resources(self, node_id: str, resources: Dict[str, Any]) -> None:
+        """Report resources found on a node.
+
+        Called by MAPServer from a server thread.
+
+        Args:
+            node_id: Id of the node these resources are on
+            resources: Dict mapping resource type to resource ids
+        """
+        raise NotImplementedError()
+
+    def report_result(self, names_exit_codes: List[Tuple[str, int]]) -> None:
+        """Report results of finished processes.
+
+        Called by MAPServer from a server thread.
+
+        Args:
+            names_exit_codes: A list of names and exit codes of finished processes.
+        """
+        raise NotImplementedError()
diff --git a/libmuscle/python/libmuscle/native_instantiator/resource_detector.py b/libmuscle/python/libmuscle/native_instantiator/resource_detector.py
deleted file mode 100644
index 8ff22db9..00000000
--- a/libmuscle/python/libmuscle/native_instantiator/resource_detector.py
+++ /dev/null
@@ -1,45 +0,0 @@
-from enum import Enum
-import logging
-from os import sched_getaffinity
-
-from libmuscle.native_instantiator import slurm
-
-
-_logger = logging.getLogger(__name__)
-
-
-class Scheduler(Enum):
-    NONE = 0
-    SLURM = 1
-
-
-class ResourceDetector:
-    """Detects available compute resources.
-
-    This detects whether we're running locally or in a SLURM allocation, and returns
-    available resources on request.
-    """
-    def __init__(self) -> None:
-        """Create a ResourceDetector.
-
-        Detects available resources and initialises the object, which can then be
-        queried.
-        """
-        if slurm.in_slurm_allocation():
-            _logger.info('Detected a SLURM allocation')
-            self.scheduler = Scheduler.SLURM
-            self.nodes = slurm.get_nodes()
-            self.cores_per_node = slurm.get_cores_per_node()
-            _logger.info(
-                    f'We have {len(self.nodes)} nodes and a total of'
-                    f' {sum(self.cores_per_node)} cores available')
-        else:
-            _logger.info('Running locally without a cluster scheduler')
-            self.scheduler = Scheduler.NONE
-            self.nodes = ['localhost']
-            self.cores_per_node = [len(sched_getaffinity(0))]
-            _logger.info(f'We have {sum(self.cores_per_node)} cores available')
-
-    def on_cluster(self) -> bool:
-        _logger.debug(f'On cluster: {self.scheduler}')
-        return self.scheduler != Scheduler.NONE
diff --git a/libmuscle/python/libmuscle/native_instantiator/slurm.py b/libmuscle/python/libmuscle/native_instantiator/slurm.py
index 59258cc9..d9685687 100644
--- a/libmuscle/python/libmuscle/native_instantiator/slurm.py
+++ b/libmuscle/python/libmuscle/native_instantiator/slurm.py
@@ -278,3 +278,14 @@ def get_cores_per_node() -> List[int]:
             ' SLURM_CPUS_ON_NODE is missing or neither SLURM_JOB_NUM_NODES nor'
             ' SLURM_NNODES is set. Please create an issue on GitHub with the output'
             ' of "sbatch --version" on this cluster.')
+
+
+def agent_launch_command(agent_cmd: List[str]) -> List[str]:
+    """Return a command for launching one agent on each node.
+
+    Args:
+        agent_cmd: A command that will start the agent.
+    """
+    # TODO: On the latest Slurm, there's a special command for this that we should use
+    # if we have that.
+    return ['srun', '--ntasks-per-node', '1'] + agent_cmd

From af4ab522c3fca0d3b484940e6000c562a42df06f Mon Sep 17 00:00:00 2001
From: Lourens Veen <l.veen@esciencecenter.nl>
Date: Thu, 17 Oct 2024 10:07:44 +0200
Subject: [PATCH 17/49] Add affinity checks to test

---
 integration_test/cluster_test/component.cpp   | 32 ++++++++++--
 integration_test/cluster_test/component.py    | 11 +++-
 integration_test/cluster_test/conftest.py     | 34 ++++++++++++-
 integration_test/cluster_test/test_cluster.py | 51 ++++++++++++-------
 4 files changed, 103 insertions(+), 25 deletions(-)

diff --git a/integration_test/cluster_test/component.cpp b/integration_test/cluster_test/component.cpp
index 42b0cb48..0cc9726f 100644
--- a/integration_test/cluster_test/component.cpp
+++ b/integration_test/cluster_test/component.cpp
@@ -2,6 +2,9 @@
 #include <fstream>
 #include <string>
 
+// This is a Linux-specific API, but this test always runs on Linux so that's okay.
+#define _GNU_SOURCE
+#include <sched.h>
 #include <unistd.h>
 
 #include "mpi.h"
@@ -17,19 +20,41 @@ using libmuscle::Message;
 using ymmsl::Operator;
 
 
-/** A simple dummy component. */
-void component(int argc, char * argv[]) {
-    const int root_rank = 0;
+/** Log where we are running so that the test can check for it. */
+void log_location() {
     int rank;
     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 
     char nodeid[1024];
     gethostname(nodeid, sizeof(nodeid));
 
+    cpu_set_t cpu_set;
+    CPU_ZERO(&cpu_set);
+    sched_getaffinity(0, sizeof(cpu_set_t), &cpu_set);
+
     {
         ofstream outfile("out_" + to_string(rank) + ".txt");
         outfile << nodeid << std::endl;
+
+        bool first = true;
+        for (int i = 0; i < CPU_SETSIZE; ++i) {
+            if (CPU_ISSET(i, &cpu_set)) {
+                if (!first)
+                    outfile << ",";
+                outfile << i;
+                first = false;
+            }
+        }
+        outfile << std::endl;
     }
+}
+
+
+/** A simple dummy component. */
+void component(int argc, char * argv[]) {
+    const int root_rank = 0;
+    int rank;
+    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 
     Instance instance(argc, argv, {
             {Operator::F_INIT, {"init_in"}},
@@ -66,6 +91,7 @@ void component(int argc, char * argv[]) {
 
 int main(int argc, char * argv[]) {
     MPI_Init(&argc, &argv);
+    log_location();
     component(argc, argv);
     MPI_Finalize();
     return EXIT_SUCCESS;
diff --git a/integration_test/cluster_test/component.py b/integration_test/cluster_test/component.py
index aa8dd260..a22c7d96 100644
--- a/integration_test/cluster_test/component.py
+++ b/integration_test/cluster_test/component.py
@@ -1,18 +1,23 @@
 import logging
+import os
 import socket
 
 from libmuscle import Instance, Message
 from ymmsl import Operator
 
 
+def log_location() -> None:
+    """Log where we are running so that the test can check for it."""
+    print(socket.gethostname())
+    print(','.join(map(str, sorted(os.sched_getaffinity(0)))))
+
+
 def component() -> None:
     """A simple dummy component.
 
     This sends and receives on all operators, allowing different coupling patterns
     with a single program.
     """
-    print(socket.gethostname())
-
     instance = Instance({
             Operator.F_INIT: ['init_in'],
             Operator.O_I: ['inter_out'],
@@ -39,4 +44,6 @@ def component() -> None:
 if __name__ == '__main__':
     logging.basicConfig()
     logging.getLogger().setLevel(logging.INFO)
+
+    log_location()
     component()
diff --git a/integration_test/cluster_test/conftest.py b/integration_test/cluster_test/conftest.py
index a4f5cba4..ec066556 100644
--- a/integration_test/cluster_test/conftest.py
+++ b/integration_test/cluster_test/conftest.py
@@ -142,7 +142,6 @@ def setup_connection(fake_cluster_headnode):
     # Because it's been made inside of the container, it has a different owner
     # than what we're running with on the host, and the host user cannot remove
     # the files.
-
     run_cmd(term, 60, f'rm -rf {REMOTE_SHARED}/*')
 
 
@@ -198,3 +197,36 @@ def muscle3_native_openmpi(remote_source, setup_connection):
         f'PREFIX={prefix} make install"'))
 
     return prefix
+
+
+@pytest.fixture(scope='session')
+def hwthread_to_core():
+    """Translates hwthreads to core ids.
+
+    In our tests, we use sched_getaffinity to check which cores we're bound to. This
+    returns numbers identifying hwthreads, but our planner binds swthreads and processes
+    to entire cores. So we get a comma-separated list of hwthread ids and want to
+    compare that to a list of core ids.
+
+    This reads /proc/cpuinfo to get the mapping between hwthreads and cores, and returns
+    a function that takes a comma-separated list of hwthread ids and returns a list of
+    corresponding core ids.
+    """
+    with open('/proc/cpuinfo', 'r') as f:
+        cpuinfo = f.readlines()
+
+    def get_values(cpuinfo, field):
+        return [
+                int(line.split(':')[1].strip())
+                for line in cpuinfo if line.startswith(field)]
+
+    hwthread_ids = get_values(cpuinfo, 'processor')
+    core_ids = get_values(cpuinfo, 'core id')
+
+    table = dict(zip(hwthread_ids, core_ids))
+
+    def convert(aff_ids):
+        cores = {table[i] for i in map(int, aff_ids.split(','))}
+        return sorted(cores)
+
+    return convert
diff --git a/integration_test/cluster_test/test_cluster.py b/integration_test/cluster_test/test_cluster.py
index d9b1d85f..b25ccb5a 100644
--- a/integration_test/cluster_test/test_cluster.py
+++ b/integration_test/cluster_test/test_cluster.py
@@ -113,7 +113,8 @@ def _get_outfile(remote_out_dir, testname, mode, instance, rank):
 
 @skip_unless_cluster
 @pytest.mark.parametrize('mode', ['local', 'slurm'])
-def test_single(fake_cluster, remote_test_files, remote_out_dir, mode):
+def test_single(
+        fake_cluster, remote_test_files, remote_out_dir, mode, hwthread_to_core):
     sched = _sched(fake_cluster, mode)
 
     job = _make_job('single', mode, remote_test_files, remote_out_dir)
@@ -129,14 +130,17 @@ def test_single(fake_cluster, remote_test_files, remote_out_dir, mode):
     output = _get_stdout(remote_out_dir, 'single', mode, 'c1')
 
     if mode == 'local':
-        assert output == 'muscle3-headnode\n'
+        assert output.split('\n')[0] == 'muscle3-headnode'
     else:
-        assert output == 'muscle3-node-0\n'
+        node, hwthreads, _ = output.split('\n')
+        assert node == 'muscle3-node-0'
+        assert hwthread_to_core(hwthreads) == [0]
 
 
 @skip_unless_cluster
 @pytest.mark.parametrize('mode', ['local', 'slurm'])
-def test_dispatch(fake_cluster, remote_test_files, remote_out_dir, mode):
+def test_dispatch(
+        fake_cluster, remote_test_files, remote_out_dir, mode, hwthread_to_core):
     sched = _sched(fake_cluster, mode)
 
     job = _make_job('dispatch', mode, remote_test_files, remote_out_dir)
@@ -152,17 +156,22 @@ def test_dispatch(fake_cluster, remote_test_files, remote_out_dir, mode):
     c1_out = _get_stdout(remote_out_dir, 'dispatch', mode, 'c1')
     c2_out = _get_stdout(remote_out_dir, 'dispatch', mode, 'c2')
     if mode == 'local':
-        assert c1_out == 'muscle3-headnode\n'
-        assert c2_out == 'muscle3-headnode\n'
+        assert c1_out.split('\n')[0] == 'muscle3-headnode'
+        assert c2_out.split('\n')[0] == 'muscle3-headnode'
     else:
-        assert c1_out == 'muscle3-node-1\n'
-        assert c2_out == 'muscle3-node-1\n'
+        node, hwthreads, _ = c1_out.split('\n')
+        assert node == 'muscle3-node-1'
+        assert hwthread_to_core(hwthreads) == [0]
+
+        node, hwthreads, _ = c2_out.split('\n')
+        assert node == 'muscle3-node-1'
+        assert hwthread_to_core(hwthreads) == [0]
 
 
 @skip_unless_cluster
-@pytest.mark.parametrize('mode', ['local'])
-# SLURM mode is not implemented yet
-def test_multiple(fake_cluster, remote_test_files, remote_out_dir, mode):
+@pytest.mark.parametrize('mode', ['local', 'slurm'])
+def test_multiple(
+        fake_cluster, remote_test_files, remote_out_dir, mode, hwthread_to_core):
     sched = _sched(fake_cluster, mode)
 
     job = _make_job('multiple', mode, remote_test_files, remote_out_dir)
@@ -175,17 +184,19 @@ def test_multiple(fake_cluster, remote_test_files, remote_out_dir, mode):
     assert sched.get_exit_code(job_id) == 0
 
     for i in range(1, 7):
+        out = _get_stdout(remote_out_dir, 'multiple', mode, f'c{i}')
         if mode == 'local':
-            assert _get_stdout(
-                    remote_out_dir, 'multiple', mode, f'c{i}') == 'muscle3-headnode\n'
+            assert out.split('\n')[0] == 'muscle3-headnode'
         else:
-            out = _get_stdout(remote_out_dir, 'multiple', mode, f'c{i}')
-            assert out == f'muscle3-node-{(i - 1) // 2}\n'
+            node, hwthreads, _ = out.split('\n')
+            assert node == f'muscle3-node-{(i - 1) // 2}'
+            assert hwthread_to_core(hwthreads) == [(i - 1) % 2]
 
 
 @skip_unless_cluster
 @pytest.mark.parametrize('mode', ['local', 'slurm'])
-def test_double_mpi(fake_cluster, remote_test_files, remote_out_dir, mode):
+def test_double_mpi(
+        fake_cluster, remote_test_files, remote_out_dir, mode, hwthread_to_core):
     sched = _sched(fake_cluster, mode)
 
     job = _make_job('double_mpi', mode, remote_test_files, remote_out_dir)
@@ -199,8 +210,10 @@ def test_double_mpi(fake_cluster, remote_test_files, remote_out_dir, mode):
 
     for i in range(1, 3):
         for rank in range(2):
-            output = _get_outfile(remote_out_dir, 'double_mpi', mode, f'c{i}', rank)
+            out = _get_outfile(remote_out_dir, 'double_mpi', mode, f'c{i}', rank)
             if mode == 'local':
-                assert output == 'muscle3-headnode\n'
+                assert out.split('\n')[0] == 'muscle3-headnode'
             else:
-                assert output == f'muscle3-node-{i + 2}\n'
+                node, hwthreads, _ = out.split('\n')
+                assert node == f'muscle3-node-{i + 2}'
+                assert hwthread_to_core(hwthreads) == [rank]

From 33b65fff97ba92dd741fbecda78dd7ee558a31d4 Mon Sep 17 00:00:00 2001
From: Lourens Veen <l.veen@esciencecenter.nl>
Date: Thu, 17 Oct 2024 10:19:47 +0200
Subject: [PATCH 18/49] Improve handling of crashing agents

---
 .../python/libmuscle/manager/instance_manager.py     | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/libmuscle/python/libmuscle/manager/instance_manager.py b/libmuscle/python/libmuscle/manager/instance_manager.py
index bc6e8edd..d29356b0 100644
--- a/libmuscle/python/libmuscle/manager/instance_manager.py
+++ b/libmuscle/python/libmuscle/manager/instance_manager.py
@@ -94,7 +94,17 @@ def __init__(
         self._log_handler.start()
 
         self._allocations: Optional[Dict[Reference, Resources]] = None
-        self._planner = Planner(self._resources_in.get())
+
+        resources = self._resources_in.get()
+        _logger.debug(f'Got resources {resources}')
+        if isinstance(resources, CrashedResult):
+            msg = (
+                'Instantiator crashed. This should not happen, please file a bug'
+                ' report.')
+            _logger.error(msg)
+            raise RuntimeError(msg)
+
+        self._planner = Planner(resources)
         self._num_running = 0
 
     def set_manager_location(self, location: str) -> None:

From 7dc20b5b319e174bd9aa275ea5711b3cb611f35a Mon Sep 17 00:00:00 2001
From: Lourens Veen <l.veen@esciencecenter.nl>
Date: Fri, 18 Oct 2024 11:38:41 +0200
Subject: [PATCH 19/49] Comment out temporarily unused import to make CI pass

---
 libmuscle/python/libmuscle/manager/instance_manager.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libmuscle/python/libmuscle/manager/instance_manager.py b/libmuscle/python/libmuscle/manager/instance_manager.py
index d29356b0..31fc5edb 100644
--- a/libmuscle/python/libmuscle/manager/instance_manager.py
+++ b/libmuscle/python/libmuscle/manager/instance_manager.py
@@ -13,7 +13,7 @@
         CancelAllRequest, CrashedResult, InstantiatorRequest,
         InstantiationRequest, Process, ProcessStatus, ShutdownRequest)
 from libmuscle.manager.logger import last_lines
-from libmuscle.manager.qcgpj_instantiator import QCGPJInstantiator
+# from libmuscle.manager.qcgpj_instantiator import QCGPJInstantiator
 from libmuscle.manager.run_dir import RunDir
 from libmuscle.native_instantiator.native_instantiator import NativeInstantiator
 from libmuscle.planner.planner import Planner, Resources

From 3c04f03618110663f2c8047056ec4354c7bc76d4 Mon Sep 17 00:00:00 2001
From: Lourens Veen <l.veen@esciencecenter.nl>
Date: Fri, 22 Nov 2024 14:28:06 +0100
Subject: [PATCH 20/49] Switch to new Cerulean fake cluster Docker images

---
 integration_test/cluster_test/conftest.py     |  26 ++-
 integration_test/cluster_test/test_cluster.py |  30 ++--
 integration_test/fake_cluster/Dockerfile      |  46 ++---
 integration_test/fake_cluster/cgroup.conf     |   6 +
 integration_test/fake_cluster/slurm.conf      | 163 ------------------
 .../fake_cluster/start-services.sh            |  70 --------
 .../native_instantiator/global_resources.py   |   2 +-
 .../libmuscle/native_instantiator/slurm.py    |   6 +-
 8 files changed, 58 insertions(+), 291 deletions(-)
 create mode 100644 integration_test/fake_cluster/cgroup.conf
 delete mode 100644 integration_test/fake_cluster/slurm.conf
 delete mode 100644 integration_test/fake_cluster/start-services.sh

diff --git a/integration_test/cluster_test/conftest.py b/integration_test/cluster_test/conftest.py
index ec066556..f6d6e6d4 100644
--- a/integration_test/cluster_test/conftest.py
+++ b/integration_test/cluster_test/conftest.py
@@ -14,6 +14,10 @@
 REMOTE_SHARED = '/home/cerulean/shared'
 
 
+# Shut down the containers after running the tests. Set to False to debug.
+CLEAN_UP_CONTAINERS = True
+
+
 skip_unless_cluster = pytest.mark.skipif(
         'MUSCLE_TEST_CLUSTER' not in os.environ,
         reason='Cluster tests were not explicitly enabled')
@@ -75,10 +79,10 @@ def shared_dir():
 @pytest.fixture(scope='session')
 def cleanup_docker(local_term):
     for i in range(5):
-        node_name = f'muscle3-node-{i}'
+        node_name = f'node-{i}'
         run_cmd(local_term, 60, f'docker rm -f {node_name}')
 
-    run_cmd(local_term, 60, 'docker rm -f muscle3-headnode')
+    run_cmd(local_term, 60, 'docker rm -f headnode')
     run_cmd(local_term, 60, 'docker network rm -f muscle3-net')
 
 
@@ -87,7 +91,9 @@ def fake_cluster_network(local_term, cleanup_docker):
     name = 'muscle3-net'
     run_cmd(local_term, 60, f'docker network create {name}')
     yield name
-    run_cmd(local_term, 60, 'docker network rm -f muscle3-net')
+
+    if CLEAN_UP_CONTAINERS:
+        run_cmd(local_term, 60, 'docker network rm -f muscle3-net')
 
 
 @pytest.fixture(scope='session')
@@ -97,12 +103,13 @@ def fake_cluster_nodes(
     node_names = list()
 
     for i in range(5):
-        node_name = f'muscle3-node-{i}'
+        node_name = f'node-{i}'
         ssh_port = 10030 + i
 
         run_cmd(local_term, 60, (
             f'docker run -d --name={node_name} --hostname={node_name}'
             f' --network={fake_cluster_network} -p {ssh_port}:22'
+            f' --cap-add=CAP_SYS_NICE'
             f' --mount type=bind,source={shared_dir},target={REMOTE_SHARED}'
             f' {fake_cluster_image}'))
 
@@ -110,7 +117,8 @@ def fake_cluster_nodes(
 
     yield None
 
-    run_cmd(local_term, 60, f'docker rm -f {" ".join(node_names)}')
+    if CLEAN_UP_CONTAINERS:
+        run_cmd(local_term, 60, f'docker rm -f {" ".join(node_names)}')
 
 
 @pytest.fixture(scope='session')
@@ -119,7 +127,7 @@ def fake_cluster_headnode(
         shared_dir):
 
     run_cmd(local_term, 60, (
-        'docker run -d --name=muscle3-headnode --hostname=muscle3-headnode'
+        'docker run -d --name=headnode --hostname=headnode'
         f' --network={fake_cluster_network} -p 10022:22'
         f' --mount type=bind,source={shared_dir},target={REMOTE_SHARED}'
         f' {fake_cluster_image}'))
@@ -127,7 +135,8 @@ def fake_cluster_headnode(
     ssh_term('Virtual cluster container start timed out')
     yield None
 
-    run_cmd(local_term, 60, 'docker rm -f muscle3-headnode')
+    if CLEAN_UP_CONTAINERS:
+        run_cmd(local_term, 60, 'docker rm -f headnode')
 
 
 @pytest.fixture(scope='session')
@@ -142,7 +151,8 @@ def setup_connection(fake_cluster_headnode):
     # Because it's been made inside of the container, it has a different owner
     # than what we're running with on the host, and the host user cannot remove
     # the files.
-    run_cmd(term, 60, f'rm -rf {REMOTE_SHARED}/*')
+    if CLEAN_UP_CONTAINERS:
+        run_cmd(term, 60, f'rm -rf {REMOTE_SHARED}/*')
 
 
 @pytest.fixture(scope='session')
diff --git a/integration_test/cluster_test/test_cluster.py b/integration_test/cluster_test/test_cluster.py
index b25ccb5a..c65717dc 100644
--- a/integration_test/cluster_test/test_cluster.py
+++ b/integration_test/cluster_test/test_cluster.py
@@ -74,6 +74,7 @@ def _make_job(name, mode, remote_test_files, remote_out_dir):
     job.time_reserved = 60
     job.system_out_file = job_dir / 'sysout.txt'
     job.system_err_file = job_dir / 'syserr.txt'
+    job.extra_scheduler_options = '--ntasks-per-node=4'
 
     return job
 
@@ -121,7 +122,7 @@ def test_single(
     if mode == 'slurm':
         job.num_nodes = 1
         job.mpi_processes_per_node = 1
-        job.extra_scheduler_options = '--ntasks-per-core=1 --nodelist=muscle3-node-0'
+        job.extra_scheduler_options += ' --nodelist=node-0'
 
     job_id = sched.submit(job)
     assert sched.wait(job_id, job.time_reserved + _SCHED_OVERHEAD) is not None
@@ -130,10 +131,10 @@ def test_single(
     output = _get_stdout(remote_out_dir, 'single', mode, 'c1')
 
     if mode == 'local':
-        assert output.split('\n')[0] == 'muscle3-headnode'
+        assert output.split('\n')[0] == 'headnode'
     else:
         node, hwthreads, _ = output.split('\n')
-        assert node == 'muscle3-node-0'
+        assert node == 'node-0'
         assert hwthread_to_core(hwthreads) == [0]
 
 
@@ -147,7 +148,7 @@ def test_dispatch(
     if mode == 'slurm':
         job.num_nodes = 1
         job.mpi_processes_per_node = 1
-        job.extra_scheduler_options = '--ntasks-per-core=1 --nodelist=muscle3-node-1'
+        job.extra_scheduler_options += ' --nodelist=node-1'
 
     job_id = sched.submit(job)
     assert sched.wait(job_id, job.time_reserved + _SCHED_OVERHEAD) is not None
@@ -156,15 +157,15 @@ def test_dispatch(
     c1_out = _get_stdout(remote_out_dir, 'dispatch', mode, 'c1')
     c2_out = _get_stdout(remote_out_dir, 'dispatch', mode, 'c2')
     if mode == 'local':
-        assert c1_out.split('\n')[0] == 'muscle3-headnode'
-        assert c2_out.split('\n')[0] == 'muscle3-headnode'
+        assert c1_out.split('\n')[0] == 'headnode'
+        assert c2_out.split('\n')[0] == 'headnode'
     else:
         node, hwthreads, _ = c1_out.split('\n')
-        assert node == 'muscle3-node-1'
+        assert node == 'node-1'
         assert hwthread_to_core(hwthreads) == [0]
 
         node, hwthreads, _ = c2_out.split('\n')
-        assert node == 'muscle3-node-1'
+        assert node == 'node-1'
         assert hwthread_to_core(hwthreads) == [0]
 
 
@@ -177,7 +178,7 @@ def test_multiple(
     job = _make_job('multiple', mode, remote_test_files, remote_out_dir)
     if mode == 'slurm':
         job.num_nodes = 3
-        job.extra_scheduler_options = '--nodelist=muscle3-node-[0-2]'
+        job.extra_scheduler_options += ' --nodelist=node-[0-2]'
 
     job_id = sched.submit(job)
     assert sched.wait(job_id, job.time_reserved + _SCHED_OVERHEAD) is not None
@@ -186,10 +187,10 @@ def test_multiple(
     for i in range(1, 7):
         out = _get_stdout(remote_out_dir, 'multiple', mode, f'c{i}')
         if mode == 'local':
-            assert out.split('\n')[0] == 'muscle3-headnode'
+            assert out.split('\n')[0] == 'headnode'
         else:
             node, hwthreads, _ = out.split('\n')
-            assert node == f'muscle3-node-{(i - 1) // 2}'
+            assert node == f'node-{(i - 1) // 2}'
             assert hwthread_to_core(hwthreads) == [(i - 1) % 2]
 
 
@@ -197,12 +198,13 @@ def test_multiple(
 @pytest.mark.parametrize('mode', ['local', 'slurm'])
 def test_double_mpi(
         fake_cluster, remote_test_files, remote_out_dir, mode, hwthread_to_core):
+
     sched = _sched(fake_cluster, mode)
 
     job = _make_job('double_mpi', mode, remote_test_files, remote_out_dir)
     if mode == 'slurm':
         job.num_nodes = 2
-        job.extra_scheduler_options = '--nodelist=muscle3-node-[3-4]'
+        job.extra_scheduler_options += ' --nodelist=node-[3-4]'
 
     job_id = sched.submit(job)
     assert sched.wait(job_id, job.time_reserved + _SCHED_OVERHEAD) is not None
@@ -212,8 +214,8 @@ def test_double_mpi(
         for rank in range(2):
             out = _get_outfile(remote_out_dir, 'double_mpi', mode, f'c{i}', rank)
             if mode == 'local':
-                assert out.split('\n')[0] == 'muscle3-headnode'
+                assert out.split('\n')[0] == 'headnode'
             else:
                 node, hwthreads, _ = out.split('\n')
-                assert node == f'muscle3-node-{i + 2}'
+                assert node == f'node-{i + 2}'
                 assert hwthread_to_core(hwthreads) == [rank]
diff --git a/integration_test/fake_cluster/Dockerfile b/integration_test/fake_cluster/Dockerfile
index 523b137e..bc1db68d 100644
--- a/integration_test/fake_cluster/Dockerfile
+++ b/integration_test/fake_cluster/Dockerfile
@@ -1,39 +1,19 @@
-FROM ghcr.io/naturalhpc/cerulean-test-docker-images/cerulean-fake-slurm-23-11:latest
+FROM ghcr.io/naturalhpc/cerulean-fake-slurm-23-11:latest
+# FROM naturalhpc/cerulean-fake-slurm-23-11:latest
 
-RUN apt-get update && \
-    apt-get remove -y openmpi-bin && \
-    apt-get install -y python3-venv gcc g++ gfortran git build-essential xz-utils \
-        bzip2 cmake
+RUN . /opt/spack/share/spack/setup-env.sh && \
+    . $(spack location -i lmod)/lmod/lmod/init/bash && \
+    spack install openmpi+legacylaunchers+pmi schedulers=slurm ^pmix@3.2.3 ^slurm/dckfty
 
-RUN cd /opt && \
-    git clone --depth=100 --branch=releases/v0.22 https://github.com/spack/spack.git
+# RUN . /opt/spack/share/spack/setup-env.sh && \
+#     . $(spack location -i lmod)/lmod/lmod/init/bash && \
+#     spack install mpich+slurm pmi=pmix ^pmix@3.2.3
 
-RUN . /opt/spack/share/spack/setup-env.sh && \
-    spack config add "modules:default:enable:[tcl]" && \
-    spack install lmod && \
-    echo >>/etc/profile && \
-    echo ". $(spack location -i lmod)/lmod/lmod/init/bash" >>/etc/profile && \
-    echo ". /opt/spack/share/spack/setup-env.sh" >>/etc/profile
-
-# OpenMPI uses libmunge from munge, which needs to look for the munge unix socket
-# in /run because that's where the apt-get installed munge we're actually running
-# puts it. Munge doesn't have a configuration file, but it does have a compiled-in
-# constant that can be set when building. So that's what we do here.
-RUN bash -l -c 'spack install munge localstatedir=/'
-RUN bash -l -c 'spack install openmpi+legacylaunchers+pmi schedulers=slurm'
-RUN bash -l -c 'spack install mpich+slurm'
-RUN bash -l -c 'spack install intel-oneapi-mpi'
-
-# Enable Spack when running ssh -c
-RUN echo >>/etc/ssh/sshd_config && \
-    echo 'SetEnv BASH_ENV=/etc/profile' >>/etc/ssh/sshd_config
-
-# Point workers to muscle3-headnode
-COPY integration_test/fake_cluster/slurm.conf /usr/local/etc/slurm/slurm.conf
-
-# Replace start-up scripts so we can run nodes separately
-COPY integration_test/fake_cluster/start-services.sh /etc/start-services.sh
-RUN chmod +x /etc/start-services.sh
+# RUN . /opt/spack/share/spack/setup-env.sh && \
+#     . $(spack location -i lmod)/lmod/lmod/init/bash && \
+#     spack install intel-oneapi-mpi ^pmix@3.2.3
+
+COPY integration_test/fake_cluster/cgroup.conf /etc/slurm/cgroup.conf
 
 # Disable ssh debug output
 RUN sed -i -e 's/^LogLevel DEBUG3$//' /etc/ssh/sshd_config
diff --git a/integration_test/fake_cluster/cgroup.conf b/integration_test/fake_cluster/cgroup.conf
new file mode 100644
index 00000000..4c11eb00
--- /dev/null
+++ b/integration_test/fake_cluster/cgroup.conf
@@ -0,0 +1,6 @@
+IgnoreSystemd=yes
+CgroupPlugin=cgroup/v1
+ConstrainSwapSpace=no
+ConstrainCores=yes
+# ConstrainDevices=yes
+
diff --git a/integration_test/fake_cluster/slurm.conf b/integration_test/fake_cluster/slurm.conf
deleted file mode 100644
index 647b5315..00000000
--- a/integration_test/fake_cluster/slurm.conf
+++ /dev/null
@@ -1,163 +0,0 @@
-# slurm.conf file generated by configurator.html.
-# Put this file on all nodes of your cluster.
-# See the slurm.conf man page for more information.
-#
-ControlMachine=muscle3-headnode
-#ControlAddr=
-#BackupController=
-#BackupAddr=
-#
-AuthType=auth/munge
-#CheckpointType=checkpoint/none
-CredType=cred/none
-CryptoType=crypto/openssl
-JobCredentialPrivateKey=/usr/local/etc/slurm/slurm.key
-JobCredentialPublicCertificate=/usr/local/etc/slurm/slurm.cert
-#DisableRootJobs=NO
-#EnforcePartLimits=NO
-#Epilog=
-#EpilogSlurmctld=
-#FirstJobId=1
-#MaxJobId=999999
-#GresTypes=
-#GroupUpdateForce=0
-GroupUpdateTime=2
-#JobCheckpointDir=/var/slurm/checkpoint
-#JobCredentialPrivateKey=
-#JobCredentialPublicCertificate=
-#JobFileAppend=0
-#JobRequeue=1
-#JobSubmitPlugins=1
-#KillOnBadExit=0
-#Licenses=foo*4,bar
-# don't send any emails:
-MailProg=/bin/true
-#MaxJobCount=5000
-#MaxStepCount=40000
-#MaxTasksPerNode=128
-MpiDefault=none
-#MpiParams=ports=#-#
-#PluginDir=
-#PlugStackConfig=
-#PrivateData=jobs
-ProctrackType=proctrack/linuxproc
-#Prolog=
-#PrologSlurmctld=
-#PropagatePrioProcess=0
-#PropagateResourceLimits=
-#PropagateResourceLimitsExcept=
-ReturnToService=1
-#SallocDefaultCommand=
-#SlurmctldPidFile=/var/run/slurmctld.pid
-SlurmctldPort=6817
-SlurmdPidFile=/var/run/slurmd.%n.pid
-SlurmdPort=6818
-SlurmdSpoolDir=/var/spool/slurmd.%n
-SlurmUser=root
-SlurmdUser=root
-#SrunEpilog=
-#SrunProlog=
-StateSaveLocation=/var/spool/slurmctld/state
-SwitchType=switch/none
-#TaskEpilog=
-TaskPlugin=task/cgroup
-#TaskPluginParam=
-#TaskProlog=
-#TopologyPlugin=topology/tree
-#TmpFs=/tmp
-#TrackWCKey=no
-#TreeWidth=
-#UnkillableStepProgram=
-#UsePAM=0
-#
-#
-# TIMERS
-BatchStartTimeout=2
-#CompleteWait=0
-EpilogMsgTime=1
-#GetEnvTimeout=2
-#HealthCheckInterval=0
-#HealthCheckProgram=
-InactiveLimit=0
-KillWait=2
-MessageTimeout=2
-#ResvOverRun=0
-MinJobAge=2
-#OverTimeLimit=0
-SlurmctldTimeout=2
-SlurmdTimeout=2
-#UnkillableStepTimeout=60
-#VSizeFactor=0
-Waittime=0
-#
-#
-# SCHEDULING
-#DefMemPerCPU=0
-#MaxMemPerCPU=0
-#SchedulerRootFilter=1
-SchedulerTimeSlice=5
-SchedulerType=sched/backfill
-SchedulerParameters=bf_interval=1,bf_resolution=1,sched_interval=1
-SelectType=select/cons_tres
-SelectTypeParameters=CR_Core
-#
-#
-# JOB PRIORITY
-#PriorityType=priority/basic
-#PriorityDecayHalfLife=
-#PriorityCalcPeriod=
-#PriorityFavorSmall=
-#PriorityMaxAge=
-#PriorityUsageResetPeriod=
-#PriorityWeightAge=
-#PriorityWeightFairshare=
-#PriorityWeightJobSize=
-#PriorityWeightPartition=
-#PriorityWeightQOS=
-#
-#
-# LOGGING AND ACCOUNTING
-#AccountingStorageEnforce=0
-AccountingStorageType=accounting_storage/slurmdbd
-AccountingStoragePort=6819
-AccountingStorageUser=root
-AccountingStoreFlags=job_comment
-ClusterName=mycluster
-#DebugFlags=
-#JobCompHost=localhost
-#JobCompLoc=slurm_acct_db
-JobCompLoc=/var/log/slurm/job_completions
-JobCompType=jobcomp/filetxt
-#JobCompPass=xenon-slurm-pw
-#JobCompPort=
-#JobCompUser=root
-JobAcctGatherFrequency=2
-JobAcctGatherType=jobacct_gather/linux
-SlurmctldDebug=debug5
-#SlurmctldLogFile=
-SlurmdDebug=debug3
-SlurmdLogFile=/var/log/slurm/slurmd.%n.log
-#SlurmSchedLogFile=
-#SlurmSchedLogLevel=
-#
-#
-# POWER SAVE SUPPORT FOR IDLE NODES (optional)
-#SuspendProgram=
-#ResumeProgram=
-#SuspendTimeout=
-#ResumeTimeout=
-#ResumeRate=
-#SuspendExcNodes=
-#SuspendExcParts=
-#SuspendRate=
-#SuspendTime=
-#
-#
-# COMPUTE NODES
-NodeName=muscle3-node-0 Sockets=1 CoresPerSocket=2 ThreadsPerCore=2 NodeAddr=muscle3-node-0 Port=17001 State=UNKNOWN
-NodeName=muscle3-node-1 Sockets=1 CoresPerSocket=2 ThreadsPerCore=2 NodeAddr=muscle3-node-1 Port=17002 State=UNKNOWN
-NodeName=muscle3-node-2 Sockets=1 CoresPerSocket=2 ThreadsPerCore=2 NodeAddr=muscle3-node-2 Port=17003 State=UNKNOWN
-NodeName=muscle3-node-3 Sockets=1 CoresPerSocket=2 ThreadsPerCore=2 NodeAddr=muscle3-node-3 Port=17004 State=UNKNOWN
-NodeName=muscle3-node-4 Sockets=1 CoresPerSocket=2 ThreadsPerCore=2 NodeAddr=muscle3-node-4 Port=17005 State=UNKNOWN
-PartitionName=debug Nodes=muscle3-node-[0-4] Default=YES MaxTime=INFINITE State=UP
-PartitionName=batch Nodes=muscle3-node-[0-2] Default=NO MaxTime=INFINITE State=UP
diff --git a/integration_test/fake_cluster/start-services.sh b/integration_test/fake_cluster/start-services.sh
deleted file mode 100644
index 4f131964..00000000
--- a/integration_test/fake_cluster/start-services.sh
+++ /dev/null
@@ -1,70 +0,0 @@
-#!/bin/bash
-echo -e "\nstarting syslog-ng..."
-syslog-ng
-
-
-echo -e "\nstarting munged..."
-setuser munge /usr/sbin/munged --foreground > /var/log/munged.out.log 2> /var/log/munged.err.log &
-
-echo -n -e "\nwaiting for munged to start..."
-while [ ! -e /run/munge/munge.socket.2 ] ; do
-    sleep 1
-    echo '.'
-done
-echo
-
-
-NODENAME=$(hostname)
-
-if [ "a${NODENAME}" == "amuscle3-headnode"  ] ; then
-    # Run as a headnode
-    echo -e "\nstarting mariadb..."
-    setuser mysql /usr/bin/mariadbd-safe >/var/log/mariadb.out.log 2>/var/log/mariadb.err.log &
-
-    echo -n -e "\nwaiting for mariadb to start..."
-    while ! nc -z localhost 3306 ; do
-        sleep 1
-        echo '.'
-    done
-    echo
-
-
-    echo -e "\nstarting slurmdbd..."
-    /usr/local/sbin/slurmdbd -D >/var/log/slurmdbd.out.log 2>/var/log/slurmdbd.err.log &
-
-    echo -n -e "\nwaiting for slurmdbd to start..."
-    while ! nc -z localhost 6819 ; do
-        sleep 1
-        echo '.'
-    done
-    echo
-
-
-    echo -e "\nstarting slurmctld..."
-    /usr/local/sbin/slurmctld -D -c -vvvv > /var/log/slurmctld.out.log 2> /var/log/slurmctld.err.log &
-
-    echo -n -e "\nwaiting for slurmctld to start..."
-    while ! nc -z localhost 6817 ; do
-        sleep 1
-        echo '.'
-    done
-    echo
-
-
-    echo -e "\nmaking accounting readable to users..."
-    /bin/chmod -R og+rX /var/log/slurm
-
-else
-    # Run as a compute node
-
-    echo -e "\nstarting compute node..."
-    /usr/local/sbin/slurmd -D -N ${NODENAME} > /var/log/slurmd.out.log 2> /var/log/slurmd.err.log &
-fi
-
-echo -e "\nstarting sshd..."
-/usr/sbin/sshd -De > /var/log/sshd.out.log 2> /var/log/sshd.err.log &
-
-echo -e "\nStartup complete"
-
-sleep infinity
-
diff --git a/libmuscle/python/libmuscle/native_instantiator/global_resources.py b/libmuscle/python/libmuscle/native_instantiator/global_resources.py
index 08d294a3..aea612e1 100644
--- a/libmuscle/python/libmuscle/native_instantiator/global_resources.py
+++ b/libmuscle/python/libmuscle/native_instantiator/global_resources.py
@@ -60,7 +60,7 @@ def agent_launch_command(self, agent_cmd: List[str]) -> List[str]:
             agent_cmd: A command that will start the agent.
         """
         if self.scheduler == Scheduler.SLURM:
-            return slurm.agent_launch_command(agent_cmd)
+            return slurm.agent_launch_command(agent_cmd, len(self.nodes))
         return agent_cmd
 
 
diff --git a/libmuscle/python/libmuscle/native_instantiator/slurm.py b/libmuscle/python/libmuscle/native_instantiator/slurm.py
index d9685687..f11a0cba 100644
--- a/libmuscle/python/libmuscle/native_instantiator/slurm.py
+++ b/libmuscle/python/libmuscle/native_instantiator/slurm.py
@@ -280,7 +280,7 @@ def get_cores_per_node() -> List[int]:
             ' of "sbatch --version" on this cluster.')
 
 
-def agent_launch_command(agent_cmd: List[str]) -> List[str]:
+def agent_launch_command(agent_cmd: List[str], nnodes: int) -> List[str]:
     """Return a command for launching one agent on each node.
 
     Args:
@@ -288,4 +288,6 @@ def agent_launch_command(agent_cmd: List[str]) -> List[str]:
     """
     # TODO: On the latest Slurm, there's a special command for this that we should use
     # if we have that.
-    return ['srun', '--ntasks-per-node', '1'] + agent_cmd
+    return [
+            'srun', f'--ntasks={nnodes}', '--ntasks-per-node=1', '--cpu-bind=none'
+            ] + agent_cmd

From a482b3e646ce200604d3ca2a5a9edf538f3476c6 Mon Sep 17 00:00:00 2001
From: Lourens Veen <l.veen@esciencecenter.nl>
Date: Fri, 22 Nov 2024 15:57:22 +0100
Subject: [PATCH 21/49] Add srun test

---
 .../{double_mpi.ymmsl => double.ymmsl}        |  4 +-
 integration_test/cluster_test/double_mpi.sh   | 12 -----
 .../cluster_test/double_openmpi.sh            | 12 +++++
 .../cluster_test/double_srunmpi.sh            | 12 +++++
 .../cluster_test/implementations.ymmsl        |  7 ---
 .../implementations_openmpi.ymmsl             |  9 ++++
 .../implementations_srunmpi.ymmsl             |  9 ++++
 integration_test/cluster_test/test_cluster.py | 42 ++++++++++++-----
 libmuscle/python/libmuscle/errors.py          |  2 +
 .../libmuscle/manager/instance_manager.py     | 13 ++++--
 .../python/libmuscle/manager/instantiator.py  |  3 +-
 .../native_instantiator.py                    | 31 ++++++-------
 .../native_instantiator/run_script.py         | 46 +++++++++++++++----
 13 files changed, 137 insertions(+), 65 deletions(-)
 rename integration_test/cluster_test/{double_mpi.ymmsl => double.ymmsl} (80%)
 delete mode 100755 integration_test/cluster_test/double_mpi.sh
 create mode 100755 integration_test/cluster_test/double_openmpi.sh
 create mode 100755 integration_test/cluster_test/double_srunmpi.sh
 create mode 100644 integration_test/cluster_test/implementations_openmpi.ymmsl
 create mode 100644 integration_test/cluster_test/implementations_srunmpi.ymmsl
 create mode 100644 libmuscle/python/libmuscle/errors.py

diff --git a/integration_test/cluster_test/double_mpi.ymmsl b/integration_test/cluster_test/double.ymmsl
similarity index 80%
rename from integration_test/cluster_test/double_mpi.ymmsl
rename to integration_test/cluster_test/double.ymmsl
index 9d04b238..16f9094f 100644
--- a/integration_test/cluster_test/double_mpi.ymmsl
+++ b/integration_test/cluster_test/double.ymmsl
@@ -7,12 +7,12 @@ model:
             ports:
                 o_i: inter_out
                 s: inter_in
-            implementation: component_cpp_openmpi
+            implementation: component_cpp
         c2:
             ports:
                 o_i: inter_out
                 s: inter_in
-            implementation: component_cpp_openmpi
+            implementation: component_cpp
 
     conduits:
         c1.inter_out: c2.inter_in
diff --git a/integration_test/cluster_test/double_mpi.sh b/integration_test/cluster_test/double_mpi.sh
deleted file mode 100755
index 1357283b..00000000
--- a/integration_test/cluster_test/double_mpi.sh
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/bin/bash
-
-set -e
-
-env
-
-source /home/cerulean/shared/venv/bin/activate
-
-CT=/home/cerulean/shared/cluster_test
-
-muscle_manager --log-level=DEBUG --start-all $CT/double_mpi.ymmsl $CT/settings.ymmsl $CT/implementations.ymmsl
-
diff --git a/integration_test/cluster_test/double_openmpi.sh b/integration_test/cluster_test/double_openmpi.sh
new file mode 100755
index 00000000..12e117b8
--- /dev/null
+++ b/integration_test/cluster_test/double_openmpi.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+set -e
+
+env
+
+source /home/cerulean/shared/venv/bin/activate
+
+CT=/home/cerulean/shared/cluster_test
+
+muscle_manager --log-level=DEBUG --start-all $CT/double.ymmsl $CT/settings.ymmsl $CT/implementations_openmpi.ymmsl
+
diff --git a/integration_test/cluster_test/double_srunmpi.sh b/integration_test/cluster_test/double_srunmpi.sh
new file mode 100755
index 00000000..2e7dbbf4
--- /dev/null
+++ b/integration_test/cluster_test/double_srunmpi.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+set -e
+
+env
+
+source /home/cerulean/shared/venv/bin/activate
+
+CT=/home/cerulean/shared/cluster_test
+
+muscle_manager --log-level=DEBUG --start-all $CT/double.ymmsl $CT/settings.ymmsl $CT/implementations_srunmpi.ymmsl
+
diff --git a/integration_test/cluster_test/implementations.ymmsl b/integration_test/cluster_test/implementations.ymmsl
index 6dab9d57..df88e24d 100644
--- a/integration_test/cluster_test/implementations.ymmsl
+++ b/integration_test/cluster_test/implementations.ymmsl
@@ -6,10 +6,3 @@ implementations:
     executable: python
     args:
         - /home/cerulean/shared/cluster_test/component.py
-
-  component_cpp_openmpi:
-    modules: openmpi
-    env:
-        +LD_LIBRARY_PATH: /home/cerulean/shared/muscle3-openmpi/lib
-    execution_model: openmpi
-    executable: /home/cerulean/shared/cluster_test/component_openmpi
diff --git a/integration_test/cluster_test/implementations_openmpi.ymmsl b/integration_test/cluster_test/implementations_openmpi.ymmsl
new file mode 100644
index 00000000..4a0d1dab
--- /dev/null
+++ b/integration_test/cluster_test/implementations_openmpi.ymmsl
@@ -0,0 +1,9 @@
+ymmsl_version: v0.1
+
+implementations:
+  component_cpp:
+    modules: openmpi
+    env:
+        +LD_LIBRARY_PATH: /home/cerulean/shared/muscle3-openmpi/lib
+    execution_model: openmpi
+    executable: /home/cerulean/shared/cluster_test/component_openmpi
diff --git a/integration_test/cluster_test/implementations_srunmpi.ymmsl b/integration_test/cluster_test/implementations_srunmpi.ymmsl
new file mode 100644
index 00000000..0ccf1265
--- /dev/null
+++ b/integration_test/cluster_test/implementations_srunmpi.ymmsl
@@ -0,0 +1,9 @@
+ymmsl_version: v0.1
+
+implementations:
+  component_cpp:
+    modules: openmpi
+    env:
+        +LD_LIBRARY_PATH: /home/cerulean/shared/muscle3-openmpi/lib
+    execution_model: srunmpi
+    executable: /home/cerulean/shared/cluster_test/component_openmpi
diff --git a/integration_test/cluster_test/test_cluster.py b/integration_test/cluster_test/test_cluster.py
index c65717dc..b350edbc 100644
--- a/integration_test/cluster_test/test_cluster.py
+++ b/integration_test/cluster_test/test_cluster.py
@@ -60,14 +60,13 @@ def remote_out_dir(remote_home):
     return remote_home / 'test_results'
 
 
-def _make_job(name, mode, remote_test_files, remote_out_dir):
-    job_dir = remote_out_dir / f'test_{name}_{mode}'
+def _make_base_job(name, remote_out_dir, dir_name):
+    job_dir = remote_out_dir / dir_name
     job_dir.mkdir(0o755, True, True)
 
     job = cerulean.JobDescription()
     job.name = name
     job.working_directory = job_dir
-    job.command = str(remote_test_files / f'{name}.sh')
     job.stdout_file = job_dir / 'stdout.txt'
     job.stderr_file = job_dir / 'stderr.txt'
     job.queue_name = 'debug'
@@ -79,6 +78,18 @@ def _make_job(name, mode, remote_test_files, remote_out_dir):
     return job
 
 
+def _make_job(name, mode, remote_test_files, remote_out_dir):
+    job = _make_base_job(name, remote_out_dir, f'test_{name}_{mode}')
+    job.command = str(remote_test_files / f'{name}.sh')
+    return job
+
+
+def _make_mpi_job(name, mode, execution_model, remote_test_files, remote_out_dir):
+    job = _make_base_job(name, remote_out_dir, f'test_{name}_{mode}_{execution_model}')
+    job.command = str(remote_test_files / f'{name}_{execution_model}.sh')
+    return job
+
+
 def _sched(fake_cluster, mode):
     if mode == 'local':
         return fake_cluster[2]
@@ -86,8 +97,10 @@ def _sched(fake_cluster, mode):
         return fake_cluster[3]
 
 
-def run_cmd_dir(remote_out_dir, testname, mode):
+def _run_cmd_dir(remote_out_dir, testname, mode, execution_model=None):
     results_name = f'test_{testname}_{mode}'
+    if execution_model is not None:
+        results_name += f'_{execution_model}'
 
     for p in (remote_out_dir / results_name).iterdir():
         if p.name.startswith('run_'):
@@ -95,14 +108,14 @@ def run_cmd_dir(remote_out_dir, testname, mode):
 
 
 def _get_stdout(remote_out_dir, testname, mode, instance):
-    run_dir = run_cmd_dir(remote_out_dir, testname, mode)
+    run_dir = _run_cmd_dir(remote_out_dir, testname, mode)
     stdout_file = run_dir / 'instances' / instance / 'stdout.txt'
     assert stdout_file.exists()     # test output redirection
     return stdout_file.read_text()
 
 
-def _get_outfile(remote_out_dir, testname, mode, instance, rank):
-    run_dir = run_cmd_dir(remote_out_dir, testname, mode)
+def _get_outfile(remote_out_dir, testname, mode, execution_model, instance, rank):
+    run_dir = _run_cmd_dir(remote_out_dir, testname, mode, execution_model)
     work_dir = run_dir / 'instances' / instance / 'workdir'
     out_file = work_dir / f'out_{rank}.txt'
     assert out_file.exists()        # test working directory
@@ -196,12 +209,18 @@ def test_multiple(
 
 @skip_unless_cluster
 @pytest.mark.parametrize('mode', ['local', 'slurm'])
-def test_double_mpi(
-        fake_cluster, remote_test_files, remote_out_dir, mode, hwthread_to_core):
+@pytest.mark.parametrize('execution_model', ['openmpi', 'srunmpi'])
+def test_double(
+        fake_cluster, remote_test_files, remote_out_dir, hwthread_to_core,
+        mode, execution_model):
+
+    if mode == 'local' and execution_model == 'srunmpi':
+        pytest.skip('srun does not work without slurm')
 
     sched = _sched(fake_cluster, mode)
 
-    job = _make_job('double_mpi', mode, remote_test_files, remote_out_dir)
+    job = _make_mpi_job(
+            'double', mode, execution_model, remote_test_files, remote_out_dir)
     if mode == 'slurm':
         job.num_nodes = 2
         job.extra_scheduler_options += ' --nodelist=node-[3-4]'
@@ -212,7 +231,8 @@ def test_double_mpi(
 
     for i in range(1, 3):
         for rank in range(2):
-            out = _get_outfile(remote_out_dir, 'double_mpi', mode, f'c{i}', rank)
+            out = _get_outfile(
+                    remote_out_dir, 'double', mode, execution_model, f'c{i}', rank)
             if mode == 'local':
                 assert out.split('\n')[0] == 'headnode'
             else:
diff --git a/libmuscle/python/libmuscle/errors.py b/libmuscle/python/libmuscle/errors.py
new file mode 100644
index 00000000..9e819602
--- /dev/null
+++ b/libmuscle/python/libmuscle/errors.py
@@ -0,0 +1,2 @@
+class ConfigurationError(Exception):
+    """Signals an issue with the user's configuration."""
diff --git a/libmuscle/python/libmuscle/manager/instance_manager.py b/libmuscle/python/libmuscle/manager/instance_manager.py
index 31fc5edb..9d7cf90d 100644
--- a/libmuscle/python/libmuscle/manager/instance_manager.py
+++ b/libmuscle/python/libmuscle/manager/instance_manager.py
@@ -8,6 +8,7 @@
 
 from ymmsl import Configuration, Reference
 
+from libmuscle.errors import ConfigurationError
 from libmuscle.manager.instance_registry import InstanceRegistry
 from libmuscle.manager.instantiator import (
         CancelAllRequest, CrashedResult, InstantiatorRequest,
@@ -160,8 +161,7 @@ def get_resources(self) -> Dict[Reference, Resources]:
         """
         if self._allocations is None:
             raise RuntimeError(
-                    'Tried to get resources but we are running without'
-                    ' --start-all')
+                    'Tried to get resources but we are running without --start-all')
 
         return self._allocations
 
@@ -182,9 +182,12 @@ def cancel_all() -> None:
             result = self._results_in.get()
 
             if isinstance(result, CrashedResult):
-                _logger.error(
-                    'Instantiator crashed. This should not happen, please file'
-                    ' a bug report.')
+                if isinstance(result.exception, ConfigurationError):
+                    _logger.error(str(result.exception))
+                else:
+                    _logger.error(
+                        'Instantiator crashed. This should not happen, please file'
+                        ' a bug report.')
                 return False
 
             results.append(result)
diff --git a/libmuscle/python/libmuscle/manager/instantiator.py b/libmuscle/python/libmuscle/manager/instantiator.py
index db83a52d..798482e0 100644
--- a/libmuscle/python/libmuscle/manager/instantiator.py
+++ b/libmuscle/python/libmuscle/manager/instantiator.py
@@ -113,7 +113,8 @@ class CancelAllRequest(InstantiatorRequest):
 
 class CrashedResult:
     """Signals that the instantiator process crashed."""
-    pass
+    def __init__(self, exception: Optional[Exception] = None) -> None:
+        self.exception = exception
 
 
 class QueueingLogHandler(logging.Handler):
diff --git a/libmuscle/python/libmuscle/native_instantiator/native_instantiator.py b/libmuscle/python/libmuscle/native_instantiator/native_instantiator.py
index 391d89fe..0de23936 100644
--- a/libmuscle/python/libmuscle/native_instantiator/native_instantiator.py
+++ b/libmuscle/python/libmuscle/native_instantiator/native_instantiator.py
@@ -196,6 +196,7 @@
 import traceback
 from typing import Dict, List, Optional
 
+from libmuscle.errors import ConfigurationError
 from libmuscle.manager.instantiator import (
         CancelAllRequest, CrashedResult, create_instance_env, InstantiationRequest,
         Process, ProcessStatus, reconfigure_logging, ShutdownRequest)
@@ -245,11 +246,16 @@ def run(self) -> None:
             self._send_resources()
             self._main()
 
+        except ConfigurationError as e:
+            self._results_out.put(CrashedResult(e))
+
         except:     # noqa
             for line in traceback.format_exception(*sys.exc_info()):
                 _logger.error(line)
-            self._resources_out.put(CrashedResult())
-            self._results_out.put(CrashedResult())
+
+            result = CrashResult(sys.exc_info()[1])
+            self._resources_out.put(result)
+            self._results_out.put(result)
 
     def _main(self) -> None:
         """Main function for the background process.
@@ -352,13 +358,16 @@ def _instantiate(self, request: InstantiationRequest) -> None:
         env = create_instance_env(request.instance, request.implementation.env)
         self._add_resources(env, request.res_req)
 
-        rankfile: Optional[Path] = None
+        rankfile = request.instance_dir / 'rankfile'
+
         if global_resources.on_cluster():
             rankfile_contents, resource_env = prep_resources(
-                  request.implementation.execution_model, request.resources)
+                  request.implementation.execution_model, request.resources,
+                  rankfile)
 
             if rankfile_contents:
-                rankfile = self._write_rankfile(request, rankfile_contents)
+                with rankfile.open('w') as f:
+                    f.write(rankfile_contents)
                 env['MUSCLE_RANKFILE'] = str(rankfile)
 
             env.update(resource_env)
@@ -381,18 +390,6 @@ def _instantiate(self, request: InstantiationRequest) -> None:
             self._processes[name].status = ProcessStatus.ERROR
             self._processes[name].error_msg = f'Instance failed to start: {e}'
 
-    def _write_rankfile(self, request: InstantiationRequest, rankfile: str) -> Path:
-        """Create and write out the rankfile and return its location.
-
-        Also known as a machinefile or hostfile depending on the MPI implementation.
-        """
-        rankfile_file = request.instance_dir / 'rankfile'
-
-        with rankfile_file.open('w') as f:
-            f.write(rankfile)
-
-        return rankfile_file
-
     def _write_run_script(
             self, request: InstantiationRequest, rankfile: Optional[Path]) -> Path:
         """Create and write out the run script and return its location."""
diff --git a/libmuscle/python/libmuscle/native_instantiator/run_script.py b/libmuscle/python/libmuscle/native_instantiator/run_script.py
index 1c615823..cb8c002f 100644
--- a/libmuscle/python/libmuscle/native_instantiator/run_script.py
+++ b/libmuscle/python/libmuscle/native_instantiator/run_script.py
@@ -1,6 +1,7 @@
 from pathlib import Path
-from typing import Dict, List, Optional, Tuple
+from typing import Dict, FrozenSet, List, Optional, Tuple
 
+from libmuscle.errors import ConfigurationError
 from libmuscle.planner.planner import Resources
 from ymmsl import (
         ExecutionModel, Implementation, MPICoresResReq, MPINodesResReq,
@@ -83,29 +84,45 @@ def mpich_prep_resources(resources: Resources) -> Tuple[str, Dict[str, str]]:
     raise NotImplementedError()
 
 
-def srun_prep_resources(resources: Resources) -> Tuple[str, Dict[str, str]]:
+def srun_prep_resources(
+        resources: Resources, rankfile_location: Path) -> Tuple[str, Dict[str, str]]:
     """Create resource description for srun
 
     Args:
         resources: The resources to describe
+        rankfile_location: Location where the rankfile will be written
 
     Return:
         The contents of the hostfile, and a set of environment variables
     """
-    # SLURM_HOSTFILE to point to the rankfile
-    # CPU_BIND=verbose,mask_cpu=0x01,0x02,0x04,0x01 to specify cores 0,1,2,0 for ranks
-    # 0-3
-    raise NotImplementedError()
+    hostfile = '\n'.join((
+        node for node, cores in resources.cores.items() for _ in cores))
+
+    env = {'SLURM_HOSTFILE': str(rankfile_location)}
+
+    bind_list = [
+            core for _, cores in resources.cores.items() for core in cores]
+
+    def core_mask(core: FrozenSet[int]) -> str:
+        mask = sum((1 << hwthread) for hwthread in core)
+        return format(mask, '#x')
+
+    bind_str = ','.join(map(core_mask, bind_list))
+
+    env['SLURM_CPU_BIND'] = f'verbose,mask_cpu:{bind_str}'
+
+    return hostfile, env
 
 
 def prep_resources(
-        model: ExecutionModel, resources: Resources
+        model: ExecutionModel, resources: Resources, rankfile_location: Path
         ) -> Tuple[str, Dict[str, str]]:
     """Create resource description for the given execution model.
 
     Args:
         model: The execution model to generate a description for
         resources: The resources to describe
+        rankfile_location: Path to where the rankfile will be written
 
     Return:
         The contents of the rank/machine/hostfile, and a set of environment variables.
@@ -117,7 +134,7 @@ def prep_resources(
     elif model == ExecutionModel.INTELMPI:
         return impi_prep_resources(resources)
     elif model == ExecutionModel.SRUNMPI:
-        return srun_prep_resources(resources)
+        return srun_prep_resources(resources, rankfile_location)
     # elif model == ExecutionModel.MPICH:
     #     return mpich_prep_resources(resources)
     raise RuntimeError(
@@ -163,7 +180,13 @@ def local_command(implementation: Implementation) -> str:
     elif implementation.execution_model == ExecutionModel.INTELMPI:
         fstr = 'mpirun -n $MUSCLE_MPI_PROCESSES {command} {args}'
     elif implementation.execution_model == ExecutionModel.SRUNMPI:
-        fstr = 'srun -n $MUSCLE_MPI_PROCESSES -m arbitrary {command} {args}'
+        raise ConfigurationError(
+                f'Could not start {implementation.name} because the SRUNMPI execution'
+                ' method only works in a SLURM allocation, and we are running locally.'
+                ' Please switch this implementation to a different execution method'
+                ' in the configuration file. You will probably want OPENMPI or'
+                ' INTELMPI depending on which MPI implementation this code was'
+                ' compiled with.')
     # elif implementation.execution_model == ExecutionModel.MPICH
     #    fstr = 'mpiexec -n {{ntasks}} {command} {args}'
 
@@ -213,7 +236,10 @@ def cluster_command(implementation: Implementation) -> str:
                 'mpirun -n $MUSCLE_MPI_PROCESSES -machinefile $MUSCLE_RANKFILE'
                 ' {command} {args}')
     elif implementation.execution_model == ExecutionModel.SRUNMPI:
-        fstr = 'srun -n $MUSCLE_MPI_PROCESSES -m arbitrary {command} {args}'
+        # TODO: set SLURM_CPU_BIND_VERBOSE for verbose output
+        fstr = (
+                'srun -n $MUSCLE_MPI_PROCESSES -m arbitrary --overlap'
+                ' --cpu-bind=$SLURM_CPU_BIND {command} {args}')
     # elif implementation.execution_model == ExecutionModel.MPICH
     #    fstr = 'mpiexec -n $MUSCLE_MPI_PROCESSES -f $MUSCLE_RANKFILE {command} {args}'
 

From cbee20270410e50e313d5b8d91edabf0cd7e0afd Mon Sep 17 00:00:00 2001
From: Lourens Veen <l.veen@esciencecenter.nl>
Date: Fri, 22 Nov 2024 15:57:47 +0100
Subject: [PATCH 22/49] Improve agent manager debug output

---
 .../python/libmuscle/native_instantiator/agent_manager.py     | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/libmuscle/python/libmuscle/native_instantiator/agent_manager.py b/libmuscle/python/libmuscle/native_instantiator/agent_manager.py
index 2e5aa361..c3a29fcc 100644
--- a/libmuscle/python/libmuscle/native_instantiator/agent_manager.py
+++ b/libmuscle/python/libmuscle/native_instantiator/agent_manager.py
@@ -57,7 +57,7 @@ def __init__(self, agent_dir: Path) -> None:
             sleep(0.1)
             with self._resources_lock:
                 resources_complete = len(self._nodes) == len(expected_nodes)
-            _logger.debug(f'{len(self._resources)} agents up')
+            _logger.debug(f'{len(self._nodes)} agents up of {len(expected_nodes)}')
 
             if self._agents_process.poll() is not None:
                 msg = (
@@ -163,6 +163,7 @@ def report_resources(self, node_id: str, resources: Dict[str, Any]) -> None:
             node_id: Id of the node these resources are on
             resources: Dict mapping resource type to resource ids
         """
+        _logger.debug(f'Agent on {node_id} reported {resources}')
         with self._resources_lock:
             self._nodes.append(node_id)
             self._resources[node_id] = resources
@@ -202,4 +203,5 @@ def _launch_agents(self, agent_dir: Path, server_location: str) -> Popen:
 
         args = global_resources.agent_launch_command(args)
 
+        _logger.debug(f'Launching agents using {args}')
         return Popen(args, cwd=agent_dir)

From edb63ac6ac4c8d52571d2cf49a1d01695f24cc87 Mon Sep 17 00:00:00 2001
From: Lourens Veen <l.veen@esciencecenter.nl>
Date: Fri, 29 Nov 2024 13:20:17 +0100
Subject: [PATCH 23/49] Rearrange tests to enable multiple SLURM and MPI
 versions

---
 integration_test/cluster_test/conftest.py     | 217 +++++++++++-------
 integration_test/cluster_test/test_cluster.py |  31 +--
 integration_test/fake_cluster/Dockerfile      |  32 ++-
 3 files changed, 171 insertions(+), 109 deletions(-)

diff --git a/integration_test/cluster_test/conftest.py b/integration_test/cluster_test/conftest.py
index f6d6e6d4..19eb7ebe 100644
--- a/integration_test/cluster_test/conftest.py
+++ b/integration_test/cluster_test/conftest.py
@@ -11,8 +11,11 @@
 logger_ = logging.getLogger(__name__)
 
 
+IMAGE_NAME = 'muscle3_test_cluster'
+
 REMOTE_SHARED = '/home/cerulean/shared'
 
+IDX_SLURM_VERSIONS = list(enumerate(['23-11']))
 
 # Shut down the containers after running the tests. Set to False to debug.
 CLEAN_UP_CONTAINERS = True
@@ -41,16 +44,20 @@ def local_fs():
     return cerulean.LocalFileSystem()
 
 
+@pytest.fixture(scope='session')
+def repo_root(local_fs):
+    root_dir = Path(__file__).parents[2]
+    return local_fs / str(root_dir)
+
+
 @pytest.fixture(scope='session')
 def fake_cluster_image(local_term):
-    IMAGE_NAME = 'muscle3_test_cluster'
     run_cmd(local_term, 5400, (
         f'docker buildx build -t {IMAGE_NAME}'
         ' -f integration_test/fake_cluster/Dockerfile .'))
-    return IMAGE_NAME
 
 
-def ssh_term(timeout_msg):
+def ssh_term(port, timeout_msg):
     cred = cerulean.PasswordCredential('cerulean', 'kingfisher')
     ready = False
     start = time.monotonic()
@@ -59,7 +66,7 @@ def ssh_term(timeout_msg):
             raise Exception(timeout_msg)
 
         try:
-            term = cerulean.SshTerminal('localhost', 10022, cred)
+            term = cerulean.SshTerminal('localhost', port, cred)
             ready = True
         except Exception:
             time.sleep(3.0)
@@ -78,93 +85,55 @@ def shared_dir():
 
 @pytest.fixture(scope='session')
 def cleanup_docker(local_term):
-    for i in range(5):
-        node_name = f'node-{i}'
-        run_cmd(local_term, 60, f'docker rm -f {node_name}')
+    for _, slurm_version in IDX_SLURM_VERSIONS:
+        _clean_up_base_cluster(local_term, slurm_version)
 
-    run_cmd(local_term, 60, 'docker rm -f headnode')
-    run_cmd(local_term, 60, 'docker network rm -f muscle3-net')
 
-
-@pytest.fixture(scope='session')
-def fake_cluster_network(local_term, cleanup_docker):
-    name = 'muscle3-net'
+def _create_network(local_term, slurm_version):
+    name = f'muscle3-net-{slurm_version}'
     run_cmd(local_term, 60, f'docker network create {name}')
-    yield name
-
-    if CLEAN_UP_CONTAINERS:
-        run_cmd(local_term, 60, 'docker network rm -f muscle3-net')
+    return name
 
 
-@pytest.fixture(scope='session')
-def fake_cluster_nodes(
-        local_term, fake_cluster_image, fake_cluster_network, shared_dir):
-
-    node_names = list()
-
+def _start_nodes(local_term, slurm_version, net_name, shared_dir):
     for i in range(5):
         node_name = f'node-{i}'
-        ssh_port = 10030 + i
 
         run_cmd(local_term, 60, (
-            f'docker run -d --name={node_name} --hostname={node_name}'
-            f' --network={fake_cluster_network} -p {ssh_port}:22'
-            f' --cap-add=CAP_SYS_NICE'
+            f'docker run -d --name={node_name}-{slurm_version} --hostname={node_name}'
+            f' --network={net_name} --cap-add=CAP_SYS_NICE'
+            f' --env SLURM_VERSION={slurm_version}'
             f' --mount type=bind,source={shared_dir},target={REMOTE_SHARED}'
-            f' {fake_cluster_image}'))
-
-        node_names.append(node_name)
+            f' {IMAGE_NAME}'))
 
-    yield None
-
-    if CLEAN_UP_CONTAINERS:
-        run_cmd(local_term, 60, f'docker rm -f {" ".join(node_names)}')
-
-
-@pytest.fixture(scope='session')
-def fake_cluster_headnode(
-        local_term, fake_cluster_image, fake_cluster_network, fake_cluster_nodes,
-        shared_dir):
 
+def _start_headnode(local_term, slurm_version, net_name, shared_dir, headnode_port):
     run_cmd(local_term, 60, (
-        'docker run -d --name=headnode --hostname=headnode'
-        f' --network={fake_cluster_network} -p 10022:22'
+        f'docker run -d --name=headnode-{slurm_version} --hostname=headnode'
+        f' --network={net_name} -p {headnode_port}:22'
+        f' --env SLURM_VERSION={slurm_version}'
         f' --mount type=bind,source={shared_dir},target={REMOTE_SHARED}'
-        f' {fake_cluster_image}'))
+        f' {IMAGE_NAME}'))
 
-    ssh_term('Virtual cluster container start timed out')
-    yield None
+    ssh_term(headnode_port, 'Virtual cluster container start timed out')
 
-    if CLEAN_UP_CONTAINERS:
-        run_cmd(local_term, 60, 'docker rm -f headnode')
 
+def _start_base_cluster(local_term, idx_slurm_version, shared_dir):
+    slurm_index, slurm_version = idx_slurm_version
 
-@pytest.fixture(scope='session')
-def setup_connection(fake_cluster_headnode):
-    # Session-wide connection used for container setup actions only
-    # Tests each have their own connection, see fake_cluster() below
-    term = ssh_term('Connection to virtual cluster container timed out')
-    with cerulean.SftpFileSystem(term, True) as fs:
-        yield term, fs
-
-    # We abuse this to clean up the contents of the shared directory.
-    # Because it's been made inside of the container, it has a different owner
-    # than what we're running with on the host, and the host user cannot remove
-    # the files.
-    if CLEAN_UP_CONTAINERS:
-        run_cmd(term, 60, f'rm -rf {REMOTE_SHARED}/*')
+    headnode_port = 10022 + slurm_index
 
+    net_name = _create_network(local_term, slurm_version)
+    _start_nodes(local_term, slurm_version, net_name, shared_dir)
+    _start_headnode(local_term, slurm_version, net_name, shared_dir, headnode_port)
 
-@pytest.fixture(scope='session')
-def repo_root(local_fs):
-    root_dir = Path(__file__).parents[2]
-    return local_fs / str(root_dir)
+    term = ssh_term(headnode_port, 'Connection to virtual cluster container timed out')
+    fs = cerulean.SftpFileSystem(term, False)
 
+    return term, fs, headnode_port
 
-@pytest.fixture(scope='session')
-def remote_source(repo_root, setup_connection):
-    remote_term, remote_fs = setup_connection
 
+def _install_remote_source(repo_root, remote_term, remote_fs):
     muscle3_tgt = remote_fs / 'home' / 'cerulean' / 'muscle3'
     muscle3_tgt.mkdir()
     (muscle3_tgt / 'libmuscle').mkdir()
@@ -178,10 +147,7 @@ def remote_source(repo_root, setup_connection):
     return muscle3_tgt
 
 
-@pytest.fixture(scope='session')
-def muscle3_venv(repo_root, remote_source, setup_connection):
-    remote_term, remote_fs = setup_connection
-
+def _create_muscle3_venv(remote_term, remote_source):
     run_cmd(remote_term, 10, f'python3 -m venv {REMOTE_SHARED}/venv')
     in_venv = f'source {REMOTE_SHARED}/venv/bin/activate && '
 
@@ -192,21 +158,116 @@ def muscle3_venv(repo_root, remote_source, setup_connection):
     return in_venv
 
 
-@pytest.fixture(scope='session')
-def muscle3_native_openmpi(remote_source, setup_connection):
-    remote_term, remote_fs = setup_connection
-
+def _install_muscle3_native_openmpi(
+        remote_source, remote_term, remote_fs, slurm_version):
     prefix = remote_fs / REMOTE_SHARED / 'muscle3-openmpi'
     prefix.mkdir()
 
+    openmpi_hash = run_cmd(remote_term, 600, (
+        '/bin/bash -c "'
+        'for phash in $(/opt/spack/bin/spack find --format \\"{hash}\\" openmpi'
+        '        | tr \'\\n\' \' \') ; do'
+        '    if /opt/spack/bin/spack find --deps /\\${phash} |'
+        f'                grep -q slurm@{slurm_version} ; then'
+        '        echo \\${phash} ;'
+        '     fi ;'
+        'done'
+        '"'))
+
+    openmpi_version = run_cmd(remote_term, 600, (
+        '/bin/bash -c "'
+        f'/opt/spack/bin/spack find --format \\"{{version}}\\" /{openmpi_hash}'
+        '"')).strip()
+
+    module_name = f'openmpi/{openmpi_version}-gcc-11.4.0-{openmpi_hash[:7]}'
+
+    logger_.info(f'Slurm {slurm_version} and module {module_name}')
+
     run_cmd(remote_term, 600, (
         f'/bin/bash -l -c "'
-        f'module load openmpi && '
+        f'module load {module_name} && '
         f'cd {remote_source} && '
         f'make distclean && '
         f'PREFIX={prefix} make install"'))
 
-    return prefix
+    return prefix, module_name
+
+
+def _install_muscle3(repo_root, remote_term, remote_fs, slurm_version):
+    remote_source = _install_remote_source(repo_root, remote_term, remote_fs)
+    in_venv = _create_muscle3_venv(remote_term, remote_source)
+    return _install_muscle3_native_openmpi(
+            remote_source, remote_term, remote_fs, slurm_version)
+
+
+def _install_tests(repo_root, remote_term, remote_fs, remote_m3_openmpi):
+    remote_home = remote_fs / REMOTE_SHARED
+    remote_m3, openmpi_module = remote_m3_openmpi
+
+    cerulean.copy(
+            repo_root / 'integration_test' / 'cluster_test', remote_home,
+            copy_permissions=True)
+
+    remote_source = remote_home / 'cluster_test'
+
+    run_cmd(remote_term, 30, (
+        '/bin/bash -c "'
+        f'sed -i \\"s^modules: openmpi^modules: {openmpi_module}^\\"'
+        f' {remote_source}/implementations_openmpi.ymmsl'
+        '"'))
+
+    run_cmd(remote_term, 30, (
+        '/bin/bash -c "'
+        f'sed -i \\"s^modules: openmpi^modules: {openmpi_module}^\\"'
+        f' {remote_source}/implementations_srunmpi.ymmsl'
+        '"'))
+
+    run_cmd(remote_term, 30, (
+        f'/bin/bash -l -c "'
+        f'module load {openmpi_module} && '
+        f'. {remote_m3}/bin/muscle3.env && '
+        f'make -C {remote_source}"'))
+
+
+def _clean_up_base_cluster(local_term, slurm_version):
+    node_names = [f'node-{i}-{slurm_version}' for i in range(5)]
+    run_cmd(local_term, 60, f'docker rm -f {" ".join(node_names)}')
+
+    run_cmd(local_term, 60, f'docker rm -f headnode-{slurm_version}')
+
+    net_name = f'muscle3-net-{slurm_version}'
+    run_cmd(local_term, 60, f'docker network rm -f {net_name}')
+
+
+@pytest.fixture(scope='session', params=IDX_SLURM_VERSIONS)
+def installed_cluster(
+        request, cleanup_docker, fake_cluster_image, shared_dir,
+        repo_root, local_term):
+
+    slurm_version = request.param[1]
+    local_shared_dir = shared_dir / slurm_version
+    local_shared_dir.mkdir()
+    local_shared_dir.chmod(0o1777)
+
+    remote_term, remote_fs, headnode_port = _start_base_cluster(
+            local_term, request.param, local_shared_dir)
+    remote_m3_openmpi = _install_muscle3(
+            repo_root, remote_term, remote_fs, slurm_version)
+    _install_tests(repo_root, remote_term, remote_fs, remote_m3_openmpi)
+
+    yield headnode_port
+
+    # Because it's been made inside of the container, the shared directory has a
+    # different owner than what we're running with on the host, and the host user cannot
+    # remove the files. So we do it here from inside the container
+    if CLEAN_UP_CONTAINERS:
+        run_cmd(remote_term, 60, f'rm -rf {REMOTE_SHARED}/*')
+
+    remote_fs.close()
+    remote_term.close()
+
+    if CLEAN_UP_CONTAINERS:
+        _clean_up_base_cluster(local_term, slurm_version)
 
 
 @pytest.fixture(scope='session')
diff --git a/integration_test/cluster_test/test_cluster.py b/integration_test/cluster_test/test_cluster.py
index b350edbc..b9b08b6e 100644
--- a/integration_test/cluster_test/test_cluster.py
+++ b/integration_test/cluster_test/test_cluster.py
@@ -9,35 +9,10 @@
 logger_ = logging.getLogger(__name__)
 
 
-@pytest.fixture(scope='session')
-def copy_test_files(repo_root, setup_connection):
-    remote_term, remote_fs = setup_connection
-    remote_home = remote_fs / REMOTE_SHARED
-
-    cerulean.copy(
-            repo_root / 'integration_test' / 'cluster_test', remote_home,
-            copy_permissions=True)
-
-    return remote_home / 'cluster_test'
-
-
-@pytest.fixture(scope='session')
-def build_native_components(
-        muscle3_native_openmpi, setup_connection, copy_test_files):
-    remote_term, remote_fs = setup_connection
-    remote_source = copy_test_files
-
-    run_cmd(remote_term, 30, (
-        f"/bin/bash -l -c '"
-        f"module load openmpi && "
-        f". {muscle3_native_openmpi}/bin/muscle3.env && "
-        f"make -C {remote_source}'"))
-
-
 @pytest.fixture
-def fake_cluster(
-        fake_cluster_headnode, muscle3_venv, build_native_components, copy_test_files):
-    term = ssh_term('Connection to virtual cluster container timed out')
+def fake_cluster(installed_cluster):
+    headnode_port = installed_cluster
+    term = ssh_term(headnode_port, 'Connection to virtual cluster container timed out')
     with cerulean.SftpFileSystem(term, True) as fs:
         local_sched = cerulean.DirectGnuScheduler(term)
         slurm_sched = cerulean.SlurmScheduler(term)
diff --git a/integration_test/fake_cluster/Dockerfile b/integration_test/fake_cluster/Dockerfile
index bc1db68d..16561062 100644
--- a/integration_test/fake_cluster/Dockerfile
+++ b/integration_test/fake_cluster/Dockerfile
@@ -1,9 +1,35 @@
-FROM ghcr.io/naturalhpc/cerulean-fake-slurm-23-11:latest
-# FROM naturalhpc/cerulean-fake-slurm-23-11:latest
+FROM ghcr.io/naturalhpc/cerulean-fake-slurm-base:latest
+# FROM naturalhpc/cerulean-fake-slurm-base:latest
 
 RUN . /opt/spack/share/spack/setup-env.sh && \
     . $(spack location -i lmod)/lmod/lmod/init/bash && \
-    spack install openmpi+legacylaunchers+pmi schedulers=slurm ^pmix@3.2.3 ^slurm/dckfty
+    spack install openmpi@3.1.6 +legacylaunchers +pmi schedulers=slurm \
+    ^$(spack find --deps slurm@20-11 | grep pmix |  tr -d ' ') \
+    ^$(spack find --format "slurm/{hash}" slurm@20-11)
+
+RUN . /opt/spack/share/spack/setup-env.sh && \
+    . $(spack location -i lmod)/lmod/lmod/init/bash && \
+    spack install openmpi@3.1.6 +legacylaunchers +pmi schedulers=slurm \
+    ^$(spack find --deps slurm@21-08 | grep pmix |  tr -d ' ') \
+    ^$(spack find --format "slurm/{hash}" slurm@21-08)
+
+RUN . /opt/spack/share/spack/setup-env.sh && \
+    . $(spack location -i lmod)/lmod/lmod/init/bash && \
+    spack install openmpi@3.1.6 +legacylaunchers +pmi schedulers=slurm \
+    ^$(spack find --deps slurm@22-05 | grep pmix |  tr -d ' ') \
+    ^$(spack find --format "slurm/{hash}" slurm@22-05)
+
+RUN . /opt/spack/share/spack/setup-env.sh && \
+    . $(spack location -i lmod)/lmod/lmod/init/bash && \
+    spack install openmpi@3.1.6 +legacylaunchers +pmi schedulers=slurm \
+    ^$(spack find --deps slurm@23-02 | grep pmix |  tr -d ' ') \
+    ^$(spack find --format "slurm/{hash}" slurm@23-02)
+
+RUN . /opt/spack/share/spack/setup-env.sh && \
+    . $(spack location -i lmod)/lmod/lmod/init/bash && \
+    spack install openmpi@4.1.6 +legacylaunchers +pmi schedulers=slurm \
+    ^$(spack find --deps slurm@23-11 | grep pmix |  tr -d ' ') \
+    ^$(spack find --format "slurm/{hash}" slurm@23-11)
 
 # RUN . /opt/spack/share/spack/setup-env.sh && \
 #     . $(spack location -i lmod)/lmod/lmod/init/bash && \

From 75909083299f5f9ba15704ad7335a09c58430b5e Mon Sep 17 00:00:00 2001
From: Lourens Veen <l.veen@esciencecenter.nl>
Date: Sat, 30 Nov 2024 19:27:55 +0100
Subject: [PATCH 24/49] Fix mpirun/srun/agents resource collision on SLURM <=
 21-08

---
 .../python/libmuscle/native_instantiator/run_script.py     | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/libmuscle/python/libmuscle/native_instantiator/run_script.py b/libmuscle/python/libmuscle/native_instantiator/run_script.py
index cb8c002f..042f897d 100644
--- a/libmuscle/python/libmuscle/native_instantiator/run_script.py
+++ b/libmuscle/python/libmuscle/native_instantiator/run_script.py
@@ -229,6 +229,13 @@ def cluster_command(implementation: Implementation) -> str:
                 ' --rankfile $MUSCLE_RANKFILE --use-hwthread-cpus --oversubscribe'
                 # ' --map-by rankfile:file=$MUSCLE_RANKFILE:oversubscribe'
                 # ' --display-map --display-allocation {command} {args}'
+
+                # This adds the given option to the srun command used by mpirun to
+                # launch its daemons. mpirun specifies --exclusive, which on SLURM <=
+                # 21-08 causes SLURM to wait for our agents to quit, as it considers
+                # them to be occupying the cores, causing a deadlock. Fortunately, it
+                # seems that adding --overlap overrides the --exclusive and it works.
+                ' -mca plm_slurm_args "--overlap"'
                 ' --bind-to core --display-map --display-allocation {command} {args}'
                 )
     elif implementation.execution_model == ExecutionModel.INTELMPI:

From cda6c6501107ccb091dac59ee8923d9d2f4158a0 Mon Sep 17 00:00:00 2001
From: Lourens Veen <l.veen@esciencecenter.nl>
Date: Sat, 30 Nov 2024 19:44:32 +0100
Subject: [PATCH 25/49] Fix number of agents launched on SLURM <= 23-02

---
 .../python/libmuscle/native_instantiator/slurm.py     | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/libmuscle/python/libmuscle/native_instantiator/slurm.py b/libmuscle/python/libmuscle/native_instantiator/slurm.py
index f11a0cba..0c726cd2 100644
--- a/libmuscle/python/libmuscle/native_instantiator/slurm.py
+++ b/libmuscle/python/libmuscle/native_instantiator/slurm.py
@@ -287,7 +287,12 @@ def agent_launch_command(agent_cmd: List[str], nnodes: int) -> List[str]:
         agent_cmd: A command that will start the agent.
     """
     # TODO: On the latest Slurm, there's a special command for this that we should use
-    # if we have that.
+    # if we have that, --external-launcher. Poorly documented though, so will require
+    # some experimentation.
+
+    # On SLURM <= 23-02, the number of tasks is inherited by srun from sbatch rather
+    # than calculated anew from --nodes and --ntasks-per-node, so we specify it
+    # explicitly to avoid getting an agent per logical cpu rather than per node.
     return [
-            'srun', f'--ntasks={nnodes}', '--ntasks-per-node=1', '--cpu-bind=none'
-            ] + agent_cmd
+            'srun', f'--nodes={nnodes}', f'--ntasks={nnodes}', '--ntasks-per-node=1',
+            '--overlap'] + agent_cmd

From 12dedf60d7995122066de180bd13cab311e24812 Mon Sep 17 00:00:00 2001
From: Lourens Veen <l.veen@esciencecenter.nl>
Date: Sat, 30 Nov 2024 20:02:27 +0100
Subject: [PATCH 26/49] Improve agent launch logging and error handling

---
 .../native_instantiator/agent_manager.py      | 87 +++++++++++--------
 1 file changed, 52 insertions(+), 35 deletions(-)

diff --git a/libmuscle/python/libmuscle/native_instantiator/agent_manager.py b/libmuscle/python/libmuscle/native_instantiator/agent_manager.py
index c3a29fcc..c42b96c0 100644
--- a/libmuscle/python/libmuscle/native_instantiator/agent_manager.py
+++ b/libmuscle/python/libmuscle/native_instantiator/agent_manager.py
@@ -45,39 +45,7 @@ def __init__(self, agent_dir: Path) -> None:
         self._finished_processes_lock = Lock()
 
         self._server = MAPServer(self)
-
-        _logger.info('Launching MUSCLE agents...')
-        self._agents_process = self._launch_agents(
-                agent_dir, self._server.get_location())
-
-        expected_nodes = global_resources.nodes
-
-        resources_complete = False
-        while not resources_complete:
-            sleep(0.1)
-            with self._resources_lock:
-                resources_complete = len(self._nodes) == len(expected_nodes)
-            _logger.debug(f'{len(self._nodes)} agents up of {len(expected_nodes)}')
-
-            if self._agents_process.poll() is not None:
-                msg = (
-                        'Agents unexpectedly stopped running. This is not supposed'
-                        ' to happen. Please see the agent log for more information,'
-                        ' and please file an issue on GitHub.')
-                _logger.error(msg)
-                raise RuntimeError(msg)
-
-        _logger.info(f'All agents running on {self._nodes}')
-
-        if sorted(expected_nodes) != sorted(self._nodes):
-            _logger.error(
-                    'Agent-reported node hostnames do not match what we got from the'
-                    ' resource manager.')
-            _logger.error(
-                    'According to the resource manager, we have'
-                    f' {sorted(expected_nodes)}')
-            _logger.error(
-                    f'The agents are reporting {sorted(self._nodes)}')
+        self._launch_agents(agent_dir, self._server.get_location())
 
     def get_resources(self) -> Dict[str, List[FrozenSet[int]]]:
         """Return detected resources.
@@ -149,6 +117,8 @@ def shutdown(self) -> None:
 
         try:
             self._agents_process.wait(10)
+            self._agents_stdout.close()
+            self._agents_stderr.close()
         except TimeoutExpired:
             _logger.warning('Agents still not down, continuing shutdown anyway.')
 
@@ -179,7 +149,7 @@ def report_result(self, names_exit_codes: List[Tuple[str, int]]) -> None:
         with self._finished_processes_lock:
             self._finished_processes.extend(names_exit_codes)
 
-    def _launch_agents(self, agent_dir: Path, server_location: str) -> Popen:
+    def _launch_agents(self, agent_dir: Path, server_location: str) -> None:
         """Actually launch the agents.
 
         This runs a local process, either to start a single agent locally, or on a
@@ -190,6 +160,8 @@ def _launch_agents(self, agent_dir: Path, server_location: str) -> Popen:
             server_location: MAPServer network location string for the agents to
                 connect to
         """
+        _logger.info('Launching MUSCLE agents...')
+
         python = sys.executable
         if not python:
             raise RuntimeError(
@@ -203,5 +175,50 @@ def _launch_agents(self, agent_dir: Path, server_location: str) -> Popen:
 
         args = global_resources.agent_launch_command(args)
 
+        self._agents_stdout = (agent_dir / 'agent_launch.out').open('a')
+        self._agents_stderr = (agent_dir / 'agent_launch.err').open('a')
+
         _logger.debug(f'Launching agents using {args}')
-        return Popen(args, cwd=agent_dir)
+        self._agents_process = Popen(
+                args, cwd=agent_dir, stdout=self._agents_stdout,
+                stderr=self._agents_stderr)
+
+        expected_nodes = global_resources().nodes
+
+        resources_complete = False
+        while not resources_complete:
+            sleep(0.1)
+            with self._resources_lock:
+                resources_complete = len(self._nodes) == len(expected_nodes)
+                too_many_agents = len(self._nodes) > len(expected_nodes)
+
+            _logger.debug(f'{len(self._nodes)} agents up of {len(expected_nodes)}')
+
+            if self._agents_process.poll() is not None:
+                msg = (
+                        'Agents unexpectedly stopped running. This is not supposed'
+                        ' to happen. Please see the agent log for more information,'
+                        ' and please file an issue on GitHub.')
+                _logger.error(msg)
+                raise RuntimeError(msg)
+
+            if too_many_agents:
+                msg = (
+                        'More agents were started than MUSCLE3 asked for. This is not'
+                        ' supposed to happen. Please file an issue on GitHub, with the'
+                        ' SLURM version (use "sbatch -v") and the sbatch command used'
+                        ' to submit the job.')
+                _logger.error(msg)
+                raise RuntimeError(msg)
+
+        _logger.info(f'All agents running on {self._nodes}')
+
+        if sorted(expected_nodes) != sorted(self._nodes):
+            _logger.error(
+                    'Agent-reported node hostnames do not match what we got from the'
+                    ' resource manager.')
+            _logger.error(
+                    'According to the resource manager, we have'
+                    f' {sorted(expected_nodes)}')
+            _logger.error(
+                    f'The agents are reporting {sorted(self._nodes)}')

From 9a94ed1fc54fdf20d132db9ab385df70bc1fb2dd Mon Sep 17 00:00:00 2001
From: Lourens Veen <l.veen@esciencecenter.nl>
Date: Sat, 30 Nov 2024 20:03:48 +0100
Subject: [PATCH 27/49] Fix global resources log output

---
 .../native_instantiator/agent_manager.py      |  2 +-
 .../native_instantiator/global_resources.py   | 21 +++++++++++++++++--
 .../native_instantiator.py                    |  6 +++---
 3 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/libmuscle/python/libmuscle/native_instantiator/agent_manager.py b/libmuscle/python/libmuscle/native_instantiator/agent_manager.py
index c42b96c0..a6e249a6 100644
--- a/libmuscle/python/libmuscle/native_instantiator/agent_manager.py
+++ b/libmuscle/python/libmuscle/native_instantiator/agent_manager.py
@@ -173,7 +173,7 @@ def _launch_agents(self, agent_dir: Path, server_location: str) -> None:
                 sys.executable, '-m', 'libmuscle.native_instantiator.agent',
                 server_location, str(log_level)]
 
-        args = global_resources.agent_launch_command(args)
+        args = global_resources().agent_launch_command(args)
 
         self._agents_stdout = (agent_dir / 'agent_launch.out').open('a')
         self._agents_stderr = (agent_dir / 'agent_launch.err').open('a')
diff --git a/libmuscle/python/libmuscle/native_instantiator/global_resources.py b/libmuscle/python/libmuscle/native_instantiator/global_resources.py
index aea612e1..e3c12e02 100644
--- a/libmuscle/python/libmuscle/native_instantiator/global_resources.py
+++ b/libmuscle/python/libmuscle/native_instantiator/global_resources.py
@@ -1,7 +1,7 @@
 from enum import Enum
 import logging
 from socket import gethostname
-from typing import List
+from typing import List, Optional
 
 import psutil
 
@@ -64,9 +64,26 @@ def agent_launch_command(self, agent_cmd: List[str]) -> List[str]:
         return agent_cmd
 
 
-global_resources = GlobalResources()
+_global_resources: Optional[GlobalResources] = None
 """Global resources object.
 
 This is a singleton, and that's fine because it's created once and then read-only. Also,
 it's used in two places, and making two objects logs everything twice which is annoying.
 """
+
+
+def global_resources() -> GlobalResources:
+    """Wrapper for _global_resources.
+
+    This is here to ensure that the object gets created after we've configured logging,
+    so that the log output it generates actually ends up in the manager log.
+
+    The users are all in the main thread of the NativeInstantiator background process,
+    so there's no need for a lock right now.
+    """
+    global _global_resources
+
+    if _global_resources is None:
+        _global_resources = GlobalResources()
+
+    return _global_resources
diff --git a/libmuscle/python/libmuscle/native_instantiator/native_instantiator.py b/libmuscle/python/libmuscle/native_instantiator/native_instantiator.py
index 0de23936..bc90cb3b 100644
--- a/libmuscle/python/libmuscle/native_instantiator/native_instantiator.py
+++ b/libmuscle/python/libmuscle/native_instantiator/native_instantiator.py
@@ -311,7 +311,7 @@ def _send_resources(self) -> None:
         agent_cores = self._agent_manager.get_resources()
 
         env_ncores = dict(
-                zip(global_resources.nodes, global_resources.cores_per_node)
+                zip(global_resources().nodes, global_resources().cores_per_node)
                 )
 
         for node in env_ncores:
@@ -360,7 +360,7 @@ def _instantiate(self, request: InstantiationRequest) -> None:
 
         rankfile = request.instance_dir / 'rankfile'
 
-        if global_resources.on_cluster():
+        if global_resources().on_cluster():
             rankfile_contents, resource_env = prep_resources(
                   request.implementation.execution_model, request.resources,
                   rankfile)
@@ -399,7 +399,7 @@ def _write_run_script(
         else:
             run_script = make_script(
                     request.implementation, request.res_req,
-                    not global_resources.on_cluster(), rankfile)
+                    not global_resources().on_cluster(), rankfile)
 
         run_script_file = request.instance_dir / 'run_script.sh'
 

From 21e68b4db48cdadf18811ea14f771a9e9e9a8848 Mon Sep 17 00:00:00 2001
From: Lourens Veen <l.veen@esciencecenter.nl>
Date: Sat, 30 Nov 2024 20:04:28 +0100
Subject: [PATCH 28/49] Fix planner predictability and add some logging

---
 libmuscle/python/libmuscle/planner/planner.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/libmuscle/python/libmuscle/planner/planner.py b/libmuscle/python/libmuscle/planner/planner.py
index 2d63828e..612a89a2 100644
--- a/libmuscle/python/libmuscle/planner/planner.py
+++ b/libmuscle/python/libmuscle/planner/planner.py
@@ -550,6 +550,8 @@ def allocate_all(
         """
         result: Dict[Reference, Resources] = {}
 
+        _logger.debug(f'Planning on resources {self._all_resources}')
+
         # Analyse model
         model = ModelGraph(configuration.model)
         requirements = configuration.resources
@@ -570,6 +572,7 @@ def allocate_all(
                     unallocated_instances, requirements)
 
             for instance in to_allocate:
+                _logger.debug(f'Placing {instance}')
                 component = model.component(instance.without_trailing_ints())
                 conflicting_names = self._conflicting_names(
                         model, exclusive, component, instance)
@@ -735,6 +738,7 @@ def _allocate_instance(
             if other in simultaneous_instances:
                 free_resources -= self._allocations[other]
 
+        _logger.debug(f'Free resources: {free_resources}')
         try:
             if isinstance(requirements, ThreadedResReq):
                 allocation = self._allocate_thread_block(
@@ -788,7 +792,9 @@ def _allocate_thread_block(
         """
         for node in free_resources.nodes():
             if len(free_resources.cores[node]) >= threads:
-                available_cores = sorted(free_resources.cores[node])
+                available_cores = sorted(free_resources.cores[node], key=sorted)
+                _logger.debug(f'available cores: {available_cores}')
                 to_reserve = set(available_cores[:threads])
+                _logger.debug(f'assigned {to_reserve}')
                 return Resources({node: to_reserve})
         raise InsufficientResourcesAvailable()

From 8e5e7eb1705af56be29c20bdb783db15f9346d7f Mon Sep 17 00:00:00 2001
From: Lourens Veen <l.veen@esciencecenter.nl>
Date: Sat, 30 Nov 2024 20:05:16 +0100
Subject: [PATCH 29/49] Improve assertion precision if it fails

---
 integration_test/cluster_test/test_cluster.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/integration_test/cluster_test/test_cluster.py b/integration_test/cluster_test/test_cluster.py
index b9b08b6e..57ef408c 100644
--- a/integration_test/cluster_test/test_cluster.py
+++ b/integration_test/cluster_test/test_cluster.py
@@ -173,13 +173,14 @@ def test_multiple(
     assert sched.get_exit_code(job_id) == 0
 
     for i in range(1, 7):
-        out = _get_stdout(remote_out_dir, 'multiple', mode, f'c{i}')
+        instance = f'c{i}'
+        out = _get_stdout(remote_out_dir, 'multiple', mode, instance)
         if mode == 'local':
             assert out.split('\n')[0] == 'headnode'
         else:
             node, hwthreads, _ = out.split('\n')
-            assert node == f'node-{(i - 1) // 2}'
-            assert hwthread_to_core(hwthreads) == [(i - 1) % 2]
+            assert (instance, node) == (instance, f'node-{(i - 1) // 2}')
+            assert (instance, hwthread_to_core(hwthreads)) == (instance, [(i - 1) % 2])
 
 
 @skip_unless_cluster

From 2601d877ad8c5a2a7f0f4d0db0475e0a13393704 Mon Sep 17 00:00:00 2001
From: Lourens Veen <l.veen@esciencecenter.nl>
Date: Sat, 30 Nov 2024 22:07:57 +0100
Subject: [PATCH 30/49] Fix global/local core/cpu confusion on SMT systems

---
 .../native_instantiator/agent_manager.py      |  2 +-
 .../native_instantiator/global_resources.py   | 12 ++++---
 .../native_instantiator.py                    | 33 ++++++++++++-------
 .../libmuscle/native_instantiator/slurm.py    |  4 +--
 4 files changed, 32 insertions(+), 19 deletions(-)

diff --git a/libmuscle/python/libmuscle/native_instantiator/agent_manager.py b/libmuscle/python/libmuscle/native_instantiator/agent_manager.py
index a6e249a6..39d9a648 100644
--- a/libmuscle/python/libmuscle/native_instantiator/agent_manager.py
+++ b/libmuscle/python/libmuscle/native_instantiator/agent_manager.py
@@ -50,7 +50,7 @@ def __init__(self, agent_dir: Path) -> None:
     def get_resources(self) -> Dict[str, List[FrozenSet[int]]]:
         """Return detected resources.
 
-        This returns a list of tuples of logical hwthread ids for each core per node.
+        This returns a list of sets of logical hwthread ids per core, per node.
 
         Called by NativeInstantiator.
         """
diff --git a/libmuscle/python/libmuscle/native_instantiator/global_resources.py b/libmuscle/python/libmuscle/native_instantiator/global_resources.py
index e3c12e02..1053a717 100644
--- a/libmuscle/python/libmuscle/native_instantiator/global_resources.py
+++ b/libmuscle/python/libmuscle/native_instantiator/global_resources.py
@@ -26,7 +26,8 @@ class GlobalResources:
     Attributes:
         scheduler: The HPC scheduler we're running under, if any.
         nodes: List of hostnames of available nodes to run on.
-        cores_per_node: Number of cores available on each node. List alongside nodes.
+        logical_cpus_per_node: Number of cores available on each node.
+                List alongside nodes.
     """
     def __init__(self) -> None:
         """Create a GlobalResources.
@@ -38,16 +39,17 @@ def __init__(self) -> None:
             _logger.info('Detected a SLURM allocation')
             self.scheduler = Scheduler.SLURM
             self.nodes = slurm.get_nodes()
-            self.cores_per_node = slurm.get_cores_per_node()
+            self.logical_cpus_per_node = slurm.get_logical_cpus_per_node()
             _logger.info(
                     f'We have {len(self.nodes)} nodes and a total of'
-                    f' {sum(self.cores_per_node)} cores available')
+                    f' {sum(self.logical_cpus_per_node)} logical CPUs available')
         else:
             _logger.info('Running locally without a cluster scheduler')
             self.scheduler = Scheduler.NONE
             self.nodes = [gethostname()]
-            self.cores_per_node = [psutil.cpu_count(logical=False)]
-            _logger.info(f'We have {self.cores_per_node[0]} cores available')
+            self.logical_cpus_per_node = [psutil.cpu_count(logical=True)]
+            _logger.info(
+                    f'We have {self.logical_cpus_per_node[0]} logical CPUS available')
 
     def on_cluster(self) -> bool:
         """Return whether we're running on a cluster."""
diff --git a/libmuscle/python/libmuscle/native_instantiator/native_instantiator.py b/libmuscle/python/libmuscle/native_instantiator/native_instantiator.py
index bc90cb3b..bccaabbc 100644
--- a/libmuscle/python/libmuscle/native_instantiator/native_instantiator.py
+++ b/libmuscle/python/libmuscle/native_instantiator/native_instantiator.py
@@ -306,15 +306,16 @@ def _send_resources(self) -> None:
         step outside our bounds even if the cluster doesn't constrain processes to their
         assigned processors.
         """
+        already_logged_smt = False
         resources = Resources()
 
         agent_cores = self._agent_manager.get_resources()
 
-        env_ncores = dict(
-                zip(global_resources().nodes, global_resources().cores_per_node)
+        env_ncpus = dict(
+                zip(global_resources().nodes, global_resources().logical_cpus_per_node)
                 )
 
-        for node in env_ncores:
+        for node in env_ncpus:
             if node not in agent_cores:
                 _logger.warning(
                         f'The environment suggests we should have node {node},'
@@ -323,26 +324,36 @@ def _send_resources(self) -> None:
             else:
                 resources.cores[node] = set(agent_cores[node])
 
-                env_nncores = env_ncores[node]
+                env_nncpus = env_ncpus[node]
                 ag_nncores = len(agent_cores[node])
-                if ag_nncores < env_nncores:
+                ag_nnthreads = sum((len(ts) for ts in agent_cores[node]))
+
+                if ag_nncores != ag_nnthreads and ag_nnthreads == env_nncpus:
+                    if not already_logged_smt:
+                        _logger.info(
+                                'Detected SMT (hyperthreading) as available and'
+                                ' enabled. Note that MUSCLE3 will assign whole cores to'
+                                ' each thread or MPI process.')
+                        already_logged_smt = True
+
+                elif ag_nncores < env_nncpus:
                     _logger.warning(
-                            f'Node {node} should have {env_nncores} cores available,'
+                            f'Node {node} should have {env_nncpus} cores available,'
                             f' but the agent reports only {ag_nncores} available to it.'
                             f' We\'ll use the {ag_nncores} we seem to have.')
 
                     resources.cores[node] = set(agent_cores[node])
 
-                elif env_nncores < ag_nncores:
+                elif env_nncpus < ag_nncores:
                     _logger.warning(
-                            f'Node {node} should have {env_nncores} cores available,'
+                            f'Node {node} should have {env_nncpus} cores available,'
                             f' but the agent reports {ag_nncores} available to it.'
                             ' Maybe the cluster does not constrain resources? We\'ll'
-                            f' use the {env_nncores} that we should have got.')
-                    resources.cores[node] = set(agent_cores[node][:env_nncores])
+                            f' use the {env_nncpus} that we should have got.')
+                    resources.cores[node] = set(agent_cores[node][:env_nncpus])
 
         for node in agent_cores:
-            if node not in env_ncores:
+            if node not in env_ncpus:
                 _logger.warning(
                         f'An agent is running on node {node} but the environment'
                         ' does not list it as ours. It seems that the node\'s'
diff --git a/libmuscle/python/libmuscle/native_instantiator/slurm.py b/libmuscle/python/libmuscle/native_instantiator/slurm.py
index 0c726cd2..f61a02e8 100644
--- a/libmuscle/python/libmuscle/native_instantiator/slurm.py
+++ b/libmuscle/python/libmuscle/native_instantiator/slurm.py
@@ -250,8 +250,8 @@ def get_nodes() -> List[str]:
     return parse_slurm_nodelist(nodelist)
 
 
-def get_cores_per_node() -> List[int]:
-    """Return the number of CPU cores per node.
+def get_logical_cpus_per_node() -> List[int]:
+    """Return the number of logical CPU cores per node.
 
     This returns a list with the number of cores of each node in the result of
     get_nodes(), which gets read from SLURM_JOB_CPUS_PER_NODE.

From 39a3c4402b5d70501a101991471d773225a6eb9d Mon Sep 17 00:00:00 2001
From: Lourens Veen <l.veen@esciencecenter.nl>
Date: Sun, 1 Dec 2024 17:23:24 +0100
Subject: [PATCH 31/49] Improve compatibility with older SLURM versions

---
 .../native_instantiator/global_resources.py   |   2 +-
 .../native_instantiator/run_script.py         |  51 +++---
 .../libmuscle/native_instantiator/slurm.py    | 168 ++++++++++++------
 3 files changed, 143 insertions(+), 78 deletions(-)

diff --git a/libmuscle/python/libmuscle/native_instantiator/global_resources.py b/libmuscle/python/libmuscle/native_instantiator/global_resources.py
index 1053a717..4b1e28c7 100644
--- a/libmuscle/python/libmuscle/native_instantiator/global_resources.py
+++ b/libmuscle/python/libmuscle/native_instantiator/global_resources.py
@@ -5,7 +5,7 @@
 
 import psutil
 
-from libmuscle.native_instantiator import slurm
+from libmuscle.native_instantiator.slurm import slurm
 
 
 _logger = logging.getLogger(__name__)
diff --git a/libmuscle/python/libmuscle/native_instantiator/run_script.py b/libmuscle/python/libmuscle/native_instantiator/run_script.py
index 042f897d..8be23d3d 100644
--- a/libmuscle/python/libmuscle/native_instantiator/run_script.py
+++ b/libmuscle/python/libmuscle/native_instantiator/run_script.py
@@ -2,6 +2,7 @@
 from typing import Dict, FrozenSet, List, Optional, Tuple
 
 from libmuscle.errors import ConfigurationError
+from libmuscle.native_instantiator.slurm import slurm
 from libmuscle.planner.planner import Resources
 from ymmsl import (
         ExecutionModel, Implementation, MPICoresResReq, MPINodesResReq,
@@ -221,32 +222,42 @@ def cluster_command(implementation: Implementation) -> str:
     if implementation.execution_model == ExecutionModel.DIRECT:
         fstr = 'taskset $MUSCLE_BIND_MASK {command} {args}'
     elif implementation.execution_model == ExecutionModel.OPENMPI:
-        # Native name is orterun for older and prterun for newer OpenMPI.
-        # So we go with mpirun, which works for either.
-        fstr = (
-                'mpirun -v -np $MUSCLE_MPI_PROCESSES'
-                ' -d --debug-daemons'
-                ' --rankfile $MUSCLE_RANKFILE --use-hwthread-cpus --oversubscribe'
-                # ' --map-by rankfile:file=$MUSCLE_RANKFILE:oversubscribe'
-                # ' --display-map --display-allocation {command} {args}'
-
-                # This adds the given option to the srun command used by mpirun to
-                # launch its daemons. mpirun specifies --exclusive, which on SLURM <=
-                # 21-08 causes SLURM to wait for our agents to quit, as it considers
-                # them to be occupying the cores, causing a deadlock. Fortunately, it
-                # seems that adding --overlap overrides the --exclusive and it works.
-                ' -mca plm_slurm_args "--overlap"'
-                ' --bind-to core --display-map --display-allocation {command} {args}'
-                )
+        fargs = [
+                # Native name is orterun for older and prterun for newer OpenMPI.
+                # So we go with mpirun, which works for either.
+                'mpirun -v -np $MUSCLE_MPI_PROCESSES',
+                '-d --debug-daemons',
+                '--rankfile $MUSCLE_RANKFILE --use-hwthread-cpus --oversubscribe'
+                ]
+
+        if slurm.quirks.overlap:
+            # This adds the given option to the srun command used by mpirun to launch
+            # its daemons. mpirun specifies --exclusive, which on SLURM <= 21-08 causes
+            # SLURM to wait for our agents to quit, as it considers them to be occupying
+            # the cores, causing a deadlock. Fortunately, it seems that adding --overlap
+            # overrides the --exclusive and it works.
+            fargs.append('-mca plm_slurm_args "--overlap"')
+
+        fargs.extend([
+            '--bind-to core --display-map --display-allocation {command} {args}'])
+
+        fstr = ' '.join(fargs)
+
     elif implementation.execution_model == ExecutionModel.INTELMPI:
         fstr = (
                 'mpirun -n $MUSCLE_MPI_PROCESSES -machinefile $MUSCLE_RANKFILE'
                 ' {command} {args}')
     elif implementation.execution_model == ExecutionModel.SRUNMPI:
         # TODO: set SLURM_CPU_BIND_VERBOSE for verbose output
-        fstr = (
-                'srun -n $MUSCLE_MPI_PROCESSES -m arbitrary --overlap'
-                ' --cpu-bind=$SLURM_CPU_BIND {command} {args}')
+        fargs = ['srun -n $MUSCLE_MPI_PROCESSES -m arbitrary']
+
+        if slurm.quirks.overlap:
+            fargs.append('--overlap')
+
+        fargs.append(f'{slurm.quirks.cpu_bind}=$SLURM_CPU_BIND {{command}} {{args}}')
+
+        fstr = ' '.join(fargs)
+
     # elif implementation.execution_model == ExecutionModel.MPICH
     #    fstr = 'mpiexec -n $MUSCLE_MPI_PROCESSES -f $MUSCLE_RANKFILE {command} {args}'
 
diff --git a/libmuscle/python/libmuscle/native_instantiator/slurm.py b/libmuscle/python/libmuscle/native_instantiator/slurm.py
index f61a02e8..a6286ee0 100644
--- a/libmuscle/python/libmuscle/native_instantiator/slurm.py
+++ b/libmuscle/python/libmuscle/native_instantiator/slurm.py
@@ -3,6 +3,7 @@
 import os
 from parsimonious import Grammar, NodeVisitor
 from parsimonious.nodes import Node
+import subprocess
 from typing import Any, cast, List, Sequence, Tuple
 
 
@@ -222,77 +223,130 @@ def parse_slurm_nodes_cores(s: str) -> List[int]:
     return cast(List[int], _nce_visitor.visit(ast))
 
 
-def in_slurm_allocation() -> bool:
-    """Check whether we're in a SLURM allocation.
+class SlurmQuirks:
+    """Collects features of the present SLURM."""
+    overlap: bool
+    """True iff --overlap must be specified for srun."""
+    cpu_bind: str
+    """CPU binding argument, --cpu-bind or --cpu_bind."""
 
-    Returns true iff SLURM was detected.
-    """
-    return 'SLURM_JOB_ID' in os.environ
 
+class SlurmInfo:
+    """Detects and holds information about the present SLURM scheduler."""
+    def __init__(self) -> None:
+        if self.in_slurm_allocation():
+            self.version = self._slurm_version()
+            self.quirks = SlurmQuirks()
 
-def get_nodes() -> List[str]:
-    """Get a list of node names from SLURM_JOB_NODELIST.
+            self.quirks.overlap = self.version > (20, 2)
+            self.quirks.cpu_bind = (
+                    '--cpu-bind' if self.version > (17, 2) else '--cpu_bind')
 
-    This inspects SLURM_JOB_NODELIST or SLURM_NODELIST and returns an
-    expanded list of node names.
+    def in_slurm_allocation(self) -> bool:
+        """Check whether we're in a SLURM allocation.
 
-    If SLURM_JOB_NODELIST is "node[020-023]" then this returns
-    ["node020", "node021", "node022", "node023"].
-    """
-    nodelist = os.environ.get('SLURM_JOB_NODELIST')
-    if not nodelist:
-        nodelist = os.environ.get('SLURM_NODELIST')
-    if not nodelist:
-        raise RuntimeError('SLURM_(JOB_)NODELIST not set, are we running locally?')
+        Returns true iff SLURM was detected.
+        """
+        return 'SLURM_JOB_ID' in os.environ
 
-    _logger.debug(f'SLURM node list: {nodelist}')
+    def get_nodes(self) -> List[str]:
+        """Get a list of node names from SLURM_JOB_NODELIST.
 
-    return parse_slurm_nodelist(nodelist)
+        This inspects SLURM_JOB_NODELIST or SLURM_NODELIST and returns an
+        expanded list of node names.
 
+        If SLURM_JOB_NODELIST is "node[020-023]" then this returns
+        ["node020", "node021", "node022", "node023"].
+        """
+        nodelist = os.environ.get('SLURM_JOB_NODELIST')
+        if not nodelist:
+            nodelist = os.environ.get('SLURM_NODELIST')
+        if not nodelist:
+            raise RuntimeError('SLURM_(JOB_)NODELIST not set, are we running locally?')
 
-def get_logical_cpus_per_node() -> List[int]:
-    """Return the number of logical CPU cores per node.
+        _logger.debug(f'SLURM node list: {nodelist}')
 
-    This returns a list with the number of cores of each node in the result of
-    get_nodes(), which gets read from SLURM_JOB_CPUS_PER_NODE.
-    """
-    sjcpn = os.environ.get('SLURM_JOB_CPUS_PER_NODE')
-    _logger.debug(f'SLURM_JOB_CPUS_PER_NODE: {sjcpn}')
+        return parse_slurm_nodelist(nodelist)
 
-    if sjcpn:
-        return parse_slurm_nodes_cores(sjcpn)
-    else:
-        scon = os.environ.get('SLURM_CPUS_ON_NODE')
-        _logger.debug(f'SLURM_CPUS_ON_NODE: {scon}')
+    def get_logical_cpus_per_node(self) -> List[int]:
+        """Return the number of logical CPU cores per node.
 
-        snn = os.environ.get('SLURM_JOB_NUM_NODES')
-        if not snn:
-            snn = os.environ.get('SLURM_NNODES')
-        _logger.debug(f'SLURM num nodes: {snn}')
+        This returns a list with the number of cores of each node in the result of
+        get_nodes(), which gets read from SLURM_JOB_CPUS_PER_NODE.
+        """
+        sjcpn = os.environ.get('SLURM_JOB_CPUS_PER_NODE')
+        _logger.debug(f'SLURM_JOB_CPUS_PER_NODE: {sjcpn}')
 
-        if scon and snn:
-            return [int(scon)] * int(snn)
+        if sjcpn:
+            return parse_slurm_nodes_cores(sjcpn)
+        else:
+            scon = os.environ.get('SLURM_CPUS_ON_NODE')
+            _logger.debug(f'SLURM_CPUS_ON_NODE: {scon}')
 
-    raise RuntimeError(
-            'SLURM_JOB_CPUS_PER_NODE is not set in the environment, and also'
-            ' SLURM_CPUS_ON_NODE is missing or neither SLURM_JOB_NUM_NODES nor'
-            ' SLURM_NNODES is set. Please create an issue on GitHub with the output'
-            ' of "sbatch --version" on this cluster.')
+            snn = os.environ.get('SLURM_JOB_NUM_NODES')
+            if not snn:
+                snn = os.environ.get('SLURM_NNODES')
+            _logger.debug(f'SLURM num nodes: {snn}')
 
+            if scon and snn:
+                return [int(scon)] * int(snn)
 
-def agent_launch_command(agent_cmd: List[str], nnodes: int) -> List[str]:
-    """Return a command for launching one agent on each node.
+        raise RuntimeError(
+                'SLURM_JOB_CPUS_PER_NODE is not set in the environment, and also'
+                ' SLURM_CPUS_ON_NODE is missing or neither SLURM_JOB_NUM_NODES nor'
+                ' SLURM_NNODES is set. Please create an issue on GitHub with the output'
+                ' of "sbatch --version" on this cluster.')
 
-    Args:
-        agent_cmd: A command that will start the agent.
-    """
-    # TODO: On the latest Slurm, there's a special command for this that we should use
-    # if we have that, --external-launcher. Poorly documented though, so will require
-    # some experimentation.
-
-    # On SLURM <= 23-02, the number of tasks is inherited by srun from sbatch rather
-    # than calculated anew from --nodes and --ntasks-per-node, so we specify it
-    # explicitly to avoid getting an agent per logical cpu rather than per node.
-    return [
-            'srun', f'--nodes={nnodes}', f'--ntasks={nnodes}', '--ntasks-per-node=1',
-            '--overlap'] + agent_cmd
+    def agent_launch_command(self, agent_cmd: List[str], nnodes: int) -> List[str]:
+        """Return a command for launching one agent on each node.
+
+        Args:
+            agent_cmd: A command that will start the agent.
+        """
+        # TODO: On the latest Slurm, there's a special command for this that we should use
+        # if we have that, --external-launcher. Poorly documented though, so will require
+        # some experimentation.
+
+        # On SLURM <= 23-02, the number of tasks is inherited by srun from sbatch rather
+        # than calculated anew from --nodes and --ntasks-per-node, so we specify it
+        # explicitly to avoid getting an agent per logical cpu rather than per node.
+        srun_cmd = [
+                'srun', f'--nodes={nnodes}', f'--ntasks={nnodes}',
+                '--ntasks-per-node=1'
+                ]
+
+        if self.quirks.overlap:
+            srun_cmd.append('--overlap')
+
+        return srun_cmd + agent_cmd
+
+    def _slurm_version(self) -> Tuple[int, int]:
+        """Obtains current version of SLURM from srun -v.
+
+        This returns only the first two numbers, hopefully there won't be any changes in
+        behaviour within a release series.
+        """
+        proc = subprocess.run(
+                ['srun', '--version'], check=True, capture_output=True, text=True,
+                encoding='utf-8'
+                )
+
+        output = proc.stdout.strip().split()
+        if len(output) < 2:
+            raise RuntimeError(
+                    f'Unexpected srun version output "{output}". MUSCLE3 does not know'
+                    ' how to run on this version of SLURM. Please file an issue on'
+                    ' GitHub.')
+
+        version_str = output[1]
+        version = version_str.split('.')
+        if len(version) < 2:
+            _logger.error(f'srun produced unexpected version {version_str}')
+            raise RuntimeError(
+                    f'Unexpected srun version output "{output}". MUSCLE3 does not know'
+                    ' how to run on this version of SLURM. Please file an issue on'
+                    ' GitHub.')
+        return int(version[0]), int(version[1])
+
+
+slurm = SlurmInfo()

From f27757661927d3a922042ee18b15cc590f68b0ba Mon Sep 17 00:00:00 2001
From: Lourens Veen <l.veen@esciencecenter.nl>
Date: Sun, 1 Dec 2024 17:23:49 +0100
Subject: [PATCH 32/49] Add older SLURM versions to test setup

---
 integration_test/cluster_test/conftest.py    | 38 +++++++++++++--
 integration_test/fake_cluster/old.Dockerfile | 50 ++++++++++++++++++++
 2 files changed, 83 insertions(+), 5 deletions(-)
 create mode 100644 integration_test/fake_cluster/old.Dockerfile

diff --git a/integration_test/cluster_test/conftest.py b/integration_test/cluster_test/conftest.py
index 19eb7ebe..721d12dd 100644
--- a/integration_test/cluster_test/conftest.py
+++ b/integration_test/cluster_test/conftest.py
@@ -15,7 +15,10 @@
 
 REMOTE_SHARED = '/home/cerulean/shared'
 
-IDX_SLURM_VERSIONS = list(enumerate(['23-11']))
+IDX_SLURM_VERSIONS = list(enumerate([
+    '17-02', '17-11', '18-08', '19-05', '20-02', '20-11', '21-08', '22-05', '23-02',
+    '23-11'
+    ]))
 
 # Shut down the containers after running the tests. Set to False to debug.
 CLEAN_UP_CONTAINERS = True
@@ -57,6 +60,25 @@ def fake_cluster_image(local_term):
         ' -f integration_test/fake_cluster/Dockerfile .'))
 
 
+@pytest.fixture(scope='session')
+def fake_cluster_image_old(local_term):
+    run_cmd(local_term, 5400, (
+        f'docker buildx build -t {IMAGE_NAME}_old'
+        ' -f integration_test/fake_cluster/old.Dockerfile .'))
+
+
+def _image_name(slurm_version):
+    if slurm_version <= '20-02':
+        return IMAGE_NAME + '_old'
+    return IMAGE_NAME
+
+
+def _gcc_version(slurm_version):
+    if slurm_version <= '20-02':
+        return '7.5.0'
+    return '11.4.0'
+
+
 def ssh_term(port, timeout_msg):
     cred = cerulean.PasswordCredential('cerulean', 'kingfisher')
     ready = False
@@ -99,21 +121,25 @@ def _start_nodes(local_term, slurm_version, net_name, shared_dir):
     for i in range(5):
         node_name = f'node-{i}'
 
+        image_name = _image_name(slurm_version)
+
         run_cmd(local_term, 60, (
             f'docker run -d --name={node_name}-{slurm_version} --hostname={node_name}'
             f' --network={net_name} --cap-add=CAP_SYS_NICE'
             f' --env SLURM_VERSION={slurm_version}'
             f' --mount type=bind,source={shared_dir},target={REMOTE_SHARED}'
-            f' {IMAGE_NAME}'))
+            f' {image_name}'))
 
 
 def _start_headnode(local_term, slurm_version, net_name, shared_dir, headnode_port):
+    image_name = _image_name(slurm_version)
+
     run_cmd(local_term, 60, (
         f'docker run -d --name=headnode-{slurm_version} --hostname=headnode'
         f' --network={net_name} -p {headnode_port}:22'
         f' --env SLURM_VERSION={slurm_version}'
         f' --mount type=bind,source={shared_dir},target={REMOTE_SHARED}'
-        f' {IMAGE_NAME}'))
+        f' {image_name}'))
 
     ssh_term(headnode_port, 'Virtual cluster container start timed out')
 
@@ -179,7 +205,9 @@ def _install_muscle3_native_openmpi(
         f'/opt/spack/bin/spack find --format \\"{{version}}\\" /{openmpi_hash}'
         '"')).strip()
 
-    module_name = f'openmpi/{openmpi_version}-gcc-11.4.0-{openmpi_hash[:7]}'
+    gcc_version = _gcc_version(slurm_version)
+
+    module_name = f'openmpi/{openmpi_version}-gcc-{gcc_version}-{openmpi_hash[:7]}'
 
     logger_.info(f'Slurm {slurm_version} and module {module_name}')
 
@@ -241,7 +269,7 @@ def _clean_up_base_cluster(local_term, slurm_version):
 
 @pytest.fixture(scope='session', params=IDX_SLURM_VERSIONS)
 def installed_cluster(
-        request, cleanup_docker, fake_cluster_image, shared_dir,
+        request, cleanup_docker, fake_cluster_image, fake_cluster_image_old, shared_dir,
         repo_root, local_term):
 
     slurm_version = request.param[1]
diff --git a/integration_test/fake_cluster/old.Dockerfile b/integration_test/fake_cluster/old.Dockerfile
new file mode 100644
index 00000000..700075c7
--- /dev/null
+++ b/integration_test/fake_cluster/old.Dockerfile
@@ -0,0 +1,50 @@
+FROM ghcr.io/naturalhpc/cerulean-fake-slurm-base-old:latest
+# FROM naturalhpc/cerulean-fake-slurm-base-old:latest
+
+RUN . /opt/spack/share/spack/setup-env.sh && \
+    . $(spack location -i lmod)/lmod/lmod/init/bash && \
+    spack install openmpi@2.0.0 +legacylaunchers +pmi schedulers=slurm \
+    ^$(spack find --deps slurm@17-02 | grep pmix |  tr -d ' ') \
+    ^$(spack find --format "slurm/{hash}" slurm@17-02)
+
+RUN . /opt/spack/share/spack/setup-env.sh && \
+    . $(spack location -i lmod)/lmod/lmod/init/bash && \
+    spack install openmpi@2.0.0 +legacylaunchers +pmi schedulers=slurm \
+    ^$(spack find --deps slurm@17-11 | grep pmix |  tr -d ' ') \
+    ^$(spack find --format "slurm/{hash}" slurm@17-11)
+
+RUN . /opt/spack/share/spack/setup-env.sh && \
+    . $(spack location -i lmod)/lmod/lmod/init/bash && \
+    spack install openmpi@2.0.0 +legacylaunchers +pmi schedulers=slurm \
+    ^$(spack find --deps slurm@18-08 | grep pmix |  tr -d ' ') \
+    ^$(spack find --format "slurm/{hash}" slurm@18-08)
+
+RUN . /opt/spack/share/spack/setup-env.sh && \
+    . $(spack location -i lmod)/lmod/lmod/init/bash && \
+    spack install openmpi@2.1.6 +legacylaunchers +pmi schedulers=slurm \
+    ^$(spack find --deps slurm@19-05 | grep pmix |  tr -d ' ') \
+    ^$(spack find --format "slurm/{hash}" slurm@19-05)
+
+RUN . /opt/spack/share/spack/setup-env.sh && \
+    . $(spack location -i lmod)/lmod/lmod/init/bash && \
+    spack install openmpi@2.1.6 +legacylaunchers +pmi schedulers=slurm \
+    ^$(spack find --deps slurm@20-02 | grep pmix |  tr -d ' ') \
+    ^$(spack find --format "slurm/{hash}" slurm@20-02)
+
+# RUN . /opt/spack/share/spack/setup-env.sh && \
+#     . $(spack location -i lmod)/lmod/lmod/init/bash && \
+#     spack install mpich+slurm pmi=pmix ^pmix@3.2.3
+
+# RUN . /opt/spack/share/spack/setup-env.sh && \
+#     . $(spack location -i lmod)/lmod/lmod/init/bash && \
+#     spack install intel-oneapi-mpi ^pmix@3.2.3
+
+# Disable ssh debug output
+RUN sed -i -e 's/^LogLevel DEBUG3$//' /etc/ssh/sshd_config
+RUN sed -i -e 's^Subsystem sftp /usr/lib/openssh/sftp-server -l DEBUG3^Subsystem sftp /usr/lib/openssh/sftp-server^' /etc/ssh/sshd_config
+
+
+RUN apt-get clean && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /home/cerulean
+

From ceae033537aab2f49d19b69815ab058a7c33c211 Mon Sep 17 00:00:00 2001
From: Lourens Veen <l.veen@esciencecenter.nl>
Date: Sun, 1 Dec 2024 18:22:12 +0100
Subject: [PATCH 33/49] Fix planner tests after SMT updates (oops!)

---
 .../planner/test/test_planner_scenarios.py    | 369 +++++++++---------
 1 file changed, 194 insertions(+), 175 deletions(-)

diff --git a/libmuscle/python/libmuscle/planner/test/test_planner_scenarios.py b/libmuscle/python/libmuscle/planner/test/test_planner_scenarios.py
index cf6067d4..6a4a2a95 100644
--- a/libmuscle/python/libmuscle/planner/test/test_planner_scenarios.py
+++ b/libmuscle/python/libmuscle/planner/test/test_planner_scenarios.py
@@ -1,7 +1,7 @@
 from copy import deepcopy
 from libmuscle.planner.planner import ModelGraph, Planner, Resources
 
-from typing import Dict, Tuple
+from typing import Dict, FrozenSet, Tuple
 
 import pytest
 from ymmsl import (
@@ -9,6 +9,11 @@
         MPICoresResReq, Ports, Reference, ResourceRequirements, ThreadedResReq)
 
 
+def c(hwthread_id: int) -> FrozenSet[int]:
+    """Helper that defines a core with the given hwthread id."""
+    return frozenset({hwthread_id})
+
+
 _ResReqs = Dict[Reference, ResourceRequirements]
 
 
@@ -38,12 +43,12 @@
         s0_model, None, s0_implementations, s0_requirements)
 
 
-s0_resources = Resources({'node001': {0, 1, 2, 3}})
+s0_resources = Resources({'node001': {c(0), c(1), c(2), c(3)}})
 
 
 s0_solution = {
-        Reference('macro'): Resources({'node001': {0, 1}}),
-        Reference('micro'): Resources({'node001': {2, 3}})}
+        Reference('macro'): Resources({'node001': {c(0), c(1)}}),
+        Reference('micro'): Resources({'node001': {c(2), c(3)}})}
 
 
 s1_model = Model(
@@ -83,14 +88,14 @@
         s1_model, None, s1_implementations, s1_requirements)
 
 
-s1_resources = Resources({'node001': {0, 1, 2, 3}})
+s1_resources = Resources({'node001': {c(0), c(1), c(2), c(3)}})
 
 
 s1_solution = {
-        Reference('macro'): Resources({'node001': {0, 1, 2, 3}}),
-        Reference('micro1'): Resources({'node001': {0, 1}}),
-        Reference('micro2'): Resources({'node001': {0, 1}}),
-        Reference('micro3'): Resources({'node001': {0}})}
+        Reference('macro'): Resources({'node001': {c(0), c(1), c(2), c(3)}}),
+        Reference('micro1'): Resources({'node001': {c(0), c(1)}}),
+        Reference('micro2'): Resources({'node001': {c(0), c(1)}}),
+        Reference('micro3'): Resources({'node001': {c(0)}})}
 
 
 s2_model = Model(
@@ -125,13 +130,14 @@
         s2_model, None, s2_implementations, s2_requirements)
 
 
-s2_resources = Resources({'node001': {0, 1, 2, 3}, 'node002': {0, 1, 2, 3}})
+s2_resources = Resources(
+        {'node001': {c(0), c(1), c(2), c(3)}, 'node002': {c(0), c(1), c(2), c(3)}})
 
 
 s2_solution = {
-        Reference('macro'): Resources({'node001': {0}}),
-        Reference('micro1'): Resources({'node001': {0, 1, 2}}),
-        Reference('micro2'): Resources({'node002': {0, 1}})}
+        Reference('macro'): Resources({'node001': {c(0)}}),
+        Reference('micro1'): Resources({'node001': {c(0), c(1), c(2)}}),
+        Reference('micro2'): Resources({'node002': {c(0), c(1)}})}
 
 
 s3_model = Model(
@@ -170,14 +176,16 @@
         s3_model, None, s3_implementations, s3_requirements)
 
 
-s3_resources = Resources({'node001': {0, 1, 2, 3}, 'node002': {0, 1, 2, 3}})
+s3_resources = Resources(
+        {'node001': {c(0), c(1), c(2), c(3)}, 'node002': {c(0), c(1), c(2), c(3)}})
 
 
 s3_solution = {
-        Reference('a'): Resources({'node001': {0}}),
-        Reference('b1'): Resources({'node001': {2, 3}, 'node002': {0, 1, 2, 3}}),
-        Reference('b2'): Resources({'node001': {0, 1}}),
-        Reference('c'): Resources({'node001': {0, 1, 2, 3}})}
+        Reference('a'): Resources({'node001': {c(0)}}),
+        Reference('b1'): Resources(
+            {'node001': {c(2), c(3)}, 'node002': {c(0), c(1), c(2), c(3)}}),
+        Reference('b2'): Resources({'node001': {c(0), c(1)}}),
+        Reference('c'): Resources({'node001': {c(0), c(1), c(2), c(3)}})}
 
 
 s4_model = Model(
@@ -213,13 +221,14 @@
         s4_model, None, s4_implementations, s4_requirements)
 
 
-s4_resources = Resources({'node001': {0, 1, 2, 3}, 'node002': {0, 1, 2, 3}})
+s4_resources = Resources(
+        {'node001': {c(0), c(1), c(2), c(3)}, 'node002': {c(0), c(1), c(2), c(3)}})
 
 
 s4_solution = {
-        Reference('macro1'): Resources({'node002': {0, 1}}),
-        Reference('macro2'): Resources({'node001': {0, 1, 2}}),
-        Reference('micro'): Resources({'node001': {0, 1, 2}})}
+        Reference('macro1'): Resources({'node002': {c(0), c(1)}}),
+        Reference('macro2'): Resources({'node001': {c(0), c(1), c(2)}}),
+        Reference('micro'): Resources({'node001': {c(0), c(1), c(2)}})}
 
 
 s5_model = Model(
@@ -262,17 +271,18 @@
 
 
 s5_resources = Resources({
-    'node001': {0, 1, 2, 3}, 'node002': {0, 1, 2, 3}, 'node003': {0, 1}})
+    'node001': {c(0), c(1), c(2), c(3)}, 'node002': {c(0), c(1), c(2), c(3)},
+    'node003': {c(0), c(1)}})
 
 
 # This is inefficient, as the models can all share resources. But repeater
 # is funny, and the algorithm cannot deal with it yet. It does give a valid
 # result with no overlap, so we'll accept that for the time being.
 s5_solution = {
-        Reference('init'): Resources({'node001': {0, 1, 2, 3}}),
-        Reference('macro'): Resources({'node001': {0, 1, 2, 3}}),
-        Reference('micro'): Resources({'node002': {0, 1, 2, 3}}),
-        Reference('repeater'): Resources({'node003': {0}})}
+        Reference('init'): Resources({'node001': {c(0), c(1), c(2), c(3)}}),
+        Reference('macro'): Resources({'node001': {c(0), c(1), c(2), c(3)}}),
+        Reference('micro'): Resources({'node002': {c(0), c(1), c(2), c(3)}}),
+        Reference('repeater'): Resources({'node003': {c(0)}})}
 
 
 s6_model = Model(
@@ -309,21 +319,21 @@
 
 
 s6_resources = Resources({
-        'node001': {0, 1, 2, 3}, 'node002': {0, 1, 2, 3},
-        'node003': {0, 1, 2, 3}, 'node004': {0, 1, 2, 3},
-        'node005': {0, 1, 2, 3}, 'node006': {0, 1, 2, 3}
+        'node001': {c(0), c(1), c(2), c(3)}, 'node002': {c(0), c(1), c(2), c(3)},
+        'node003': {c(0), c(1), c(2), c(3)}, 'node004': {c(0), c(1), c(2), c(3)},
+        'node005': {c(0), c(1), c(2), c(3)}, 'node006': {c(0), c(1), c(2), c(3)}
         })
 
 
 s6_solution = {
-        Reference('a'): Resources({'node001': {0, 1, 2, 3}}),
-        Reference('tcf'): Resources({'node002': {0}}),
+        Reference('a'): Resources({'node001': {c(0), c(1), c(2), c(3)}}),
+        Reference('tcf'): Resources({'node002': {c(0)}}),
         Reference('b'): Resources({
-            'node002': {1, 2, 3},
-            'node003': {0, 1, 2, 3},
-            'node004': {0, 1, 2, 3},
-            'node005': {0, 1, 2, 3},
-            'node006': {0}})}
+            'node002': {c(1), c(2), c(3)},
+            'node003': {c(0), c(1), c(2), c(3)},
+            'node004': {c(0), c(1), c(2), c(3)},
+            'node005': {c(0), c(1), c(2), c(3)},
+            'node006': {c(0)}})}
 
 
 s7_model = Model(
@@ -365,46 +375,46 @@
 
 
 s7_resources = Resources({
-        'node001': {0, 1, 2, 3, 4, 5, 6, 7},
-        'node002': {0, 1, 2, 3, 4, 5, 6, 7},
-        'node003': {0, 1, 2, 3, 4, 5, 6, 7},
-        'node004': {0, 1, 2, 3, 4, 5, 6, 7},
-        'node005': {0, 1, 2, 3, 4, 5, 6, 7},
+        'node001': {c(0), c(1), c(2), c(3), c(4), c(5), c(6), c(7)},
+        'node002': {c(0), c(1), c(2), c(3), c(4), c(5), c(6), c(7)},
+        'node003': {c(0), c(1), c(2), c(3), c(4), c(5), c(6), c(7)},
+        'node004': {c(0), c(1), c(2), c(3), c(4), c(5), c(6), c(7)},
+        'node005': {c(0), c(1), c(2), c(3), c(4), c(5), c(6), c(7)},
         })
 
 
 s7_solution = {
-        Reference('mc'): Resources({'node001': {0}}),
-        Reference('init[0]'): Resources({'node001': {0, 1, 2, 3}}),
-        Reference('init[1]'): Resources({'node001': {4, 5, 6, 7}}),
-        Reference('init[2]'): Resources({'node002': {0, 1, 2, 3}}),
-        Reference('init[3]'): Resources({'node002': {4, 5, 6, 7}}),
-        Reference('init[4]'): Resources({'node003': {0, 1, 2, 3}}),
-        Reference('init[5]'): Resources({'node003': {4, 5, 6, 7}}),
-        Reference('init[6]'): Resources({'node004': {0, 1, 2, 3}}),
-        Reference('init[7]'): Resources({'node004': {4, 5, 6, 7}}),
-        Reference('init[8]'): Resources({'node005': {0, 1, 2, 3}}),
-        Reference('init[9]'): Resources({'node005': {4, 5, 6, 7}}),
-        Reference('macro[0]'): Resources({'node001': {0, 1, 2, 3}}),
-        Reference('macro[1]'): Resources({'node001': {4, 5, 6, 7}}),
-        Reference('macro[2]'): Resources({'node002': {0, 1, 2, 3}}),
-        Reference('macro[3]'): Resources({'node002': {4, 5, 6, 7}}),
-        Reference('macro[4]'): Resources({'node003': {0, 1, 2, 3}}),
-        Reference('macro[5]'): Resources({'node003': {4, 5, 6, 7}}),
-        Reference('macro[6]'): Resources({'node004': {0, 1, 2, 3}}),
-        Reference('macro[7]'): Resources({'node004': {4, 5, 6, 7}}),
-        Reference('macro[8]'): Resources({'node005': {0, 1, 2, 3}}),
-        Reference('macro[9]'): Resources({'node005': {4, 5, 6, 7}}),
-        Reference('micro[0]'): Resources({'node001': {0, 1, 2, 3}}),
-        Reference('micro[1]'): Resources({'node001': {4, 5, 6, 7}}),
-        Reference('micro[2]'): Resources({'node002': {0, 1, 2, 3}}),
-        Reference('micro[3]'): Resources({'node002': {4, 5, 6, 7}}),
-        Reference('micro[4]'): Resources({'node003': {0, 1, 2, 3}}),
-        Reference('micro[5]'): Resources({'node003': {4, 5, 6, 7}}),
-        Reference('micro[6]'): Resources({'node004': {0, 1, 2, 3}}),
-        Reference('micro[7]'): Resources({'node004': {4, 5, 6, 7}}),
-        Reference('micro[8]'): Resources({'node005': {0, 1, 2, 3}}),
-        Reference('micro[9]'): Resources({'node005': {4, 5, 6, 7}})}
+        Reference('mc'): Resources({'node001': {c(0)}}),
+        Reference('init[0]'): Resources({'node001': {c(0), c(1), c(2), c(3)}}),
+        Reference('init[1]'): Resources({'node001': {c(4), c(5), c(6), c(7)}}),
+        Reference('init[2]'): Resources({'node002': {c(0), c(1), c(2), c(3)}}),
+        Reference('init[3]'): Resources({'node002': {c(4), c(5), c(6), c(7)}}),
+        Reference('init[4]'): Resources({'node003': {c(0), c(1), c(2), c(3)}}),
+        Reference('init[5]'): Resources({'node003': {c(4), c(5), c(6), c(7)}}),
+        Reference('init[6]'): Resources({'node004': {c(0), c(1), c(2), c(3)}}),
+        Reference('init[7]'): Resources({'node004': {c(4), c(5), c(6), c(7)}}),
+        Reference('init[8]'): Resources({'node005': {c(0), c(1), c(2), c(3)}}),
+        Reference('init[9]'): Resources({'node005': {c(4), c(5), c(6), c(7)}}),
+        Reference('macro[0]'): Resources({'node001': {c(0), c(1), c(2), c(3)}}),
+        Reference('macro[1]'): Resources({'node001': {c(4), c(5), c(6), c(7)}}),
+        Reference('macro[2]'): Resources({'node002': {c(0), c(1), c(2), c(3)}}),
+        Reference('macro[3]'): Resources({'node002': {c(4), c(5), c(6), c(7)}}),
+        Reference('macro[4]'): Resources({'node003': {c(0), c(1), c(2), c(3)}}),
+        Reference('macro[5]'): Resources({'node003': {c(4), c(5), c(6), c(7)}}),
+        Reference('macro[6]'): Resources({'node004': {c(0), c(1), c(2), c(3)}}),
+        Reference('macro[7]'): Resources({'node004': {c(4), c(5), c(6), c(7)}}),
+        Reference('macro[8]'): Resources({'node005': {c(0), c(1), c(2), c(3)}}),
+        Reference('macro[9]'): Resources({'node005': {c(4), c(5), c(6), c(7)}}),
+        Reference('micro[0]'): Resources({'node001': {c(0), c(1), c(2), c(3)}}),
+        Reference('micro[1]'): Resources({'node001': {c(4), c(5), c(6), c(7)}}),
+        Reference('micro[2]'): Resources({'node002': {c(0), c(1), c(2), c(3)}}),
+        Reference('micro[3]'): Resources({'node002': {c(4), c(5), c(6), c(7)}}),
+        Reference('micro[4]'): Resources({'node003': {c(0), c(1), c(2), c(3)}}),
+        Reference('micro[5]'): Resources({'node003': {c(4), c(5), c(6), c(7)}}),
+        Reference('micro[6]'): Resources({'node004': {c(0), c(1), c(2), c(3)}}),
+        Reference('micro[7]'): Resources({'node004': {c(4), c(5), c(6), c(7)}}),
+        Reference('micro[8]'): Resources({'node005': {c(0), c(1), c(2), c(3)}}),
+        Reference('micro[9]'): Resources({'node005': {c(4), c(5), c(6), c(7)}})}
 
 
 s8_model = Model(
@@ -441,13 +451,14 @@
         s8_model, None, s8_implementations, s8_requirements)
 
 
-s8_resources = Resources({'node001': {0, 1, 2, 3}, 'node002': {0, 1, 2, 3}})
+s8_resources = Resources(
+        {'node001': {c(0), c(1), c(2), c(3)}, 'node002': {c(0), c(1), c(2), c(3)}})
 
 
 s8_solution = {
-        Reference('macro'): Resources({'node001': {3}}),
-        Reference('micro1'): Resources({'node001': {0, 1, 2}}),
-        Reference('micro2'): Resources({'node001': {0, 1}})}
+        Reference('macro'): Resources({'node001': {c(3)}}),
+        Reference('micro1'): Resources({'node001': {c(0), c(1), c(2)}}),
+        Reference('micro2'): Resources({'node001': {c(0), c(1)}})}
 
 
 s9_model = Model(
@@ -489,15 +500,15 @@
         s9_model, None, s9_implementations, s9_requirements)
 
 
-s9_resources = Resources({'node001': {0, 1, 2, 3}})
+s9_resources = Resources({'node001': {c(0), c(1), c(2), c(3)}})
 
 
 s9_solution = {
-        Reference('a'): Resources({'node001': {1}}),
-        Reference('b'): Resources({'node001': {0}}),
-        Reference('c'): Resources({'node001': {0}}),
-        Reference('d'): Resources({'node001': {1}}),
-        Reference('e'): Resources({'node001': {0}})}
+        Reference('a'): Resources({'node001': {c(1)}}),
+        Reference('b'): Resources({'node001': {c(0)}}),
+        Reference('c'): Resources({'node001': {c(0)}}),
+        Reference('d'): Resources({'node001': {c(1)}}),
+        Reference('e'): Resources({'node001': {c(0)}})}
 
 
 s10_model = Model(
@@ -542,31 +553,37 @@
 
 
 s10_resources = Resources({
-        'node001': {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
-        'node002': {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
-        'node003': {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
+        'node001': {
+            c(0), c(1), c(2), c(3), c(4), c(5), c(6), c(7),
+            c(8),c(9), c(10), c(11), c(12), c(13), c(14), c(15)},
+        'node002': {
+            c(0), c(1), c(2), c(3), c(4), c(5), c(6), c(7),
+            c(8), c(9), c(10), c(11), c(12), c(13), c(14), c(15)},
+        'node003': {
+            c(0), c(1), c(2), c(3), c(4), c(5), c(6), c(7),
+            c(8), c(9), c(10), c(11), c(12), c(13), c(14), c(15)},
         })
 
 
 s10_solution = {
-        Reference('mc'): Resources({'node001': {0}}),
-        Reference('rr'): Resources({'node001': {0}}),
-        Reference('macro[0]'): Resources({'node001': {0, 1, 2, 3}}),
-        Reference('macro[1]'): Resources({'node001': {4, 5, 6, 7}}),
-        Reference('macro[2]'): Resources({'node001': {8, 9, 10, 11}}),
-        Reference('macro[3]'): Resources({'node001': {12, 13, 14, 15}}),
-        Reference('macro[4]'): Resources({'node002': {0, 1, 2, 3}}),
-        Reference('macro[5]'): Resources({'node002': {4, 5, 6, 7}}),
-        Reference('macro[6]'): Resources({'node002': {8, 9, 10, 11}}),
-        Reference('macro[7]'): Resources({'node002': {12, 13, 14, 15}}),
-        Reference('micro[0]'): Resources({'node001': {0, 1}}),
-        Reference('micro[1]'): Resources({'node001': {4, 5}}),
-        Reference('micro[2]'): Resources({'node001': {8, 9}}),
-        Reference('micro[3]'): Resources({'node001': {12, 13}}),
-        Reference('micro[4]'): Resources({'node002': {0, 1}}),
-        Reference('micro[5]'): Resources({'node002': {4, 5}}),
-        Reference('micro[6]'): Resources({'node002': {8, 9}}),
-        Reference('micro[7]'): Resources({'node002': {12, 13}})}
+        Reference('mc'): Resources({'node001': {c(0)}}),
+        Reference('rr'): Resources({'node001': {c(0)}}),
+        Reference('macro[0]'): Resources({'node001': {c(0), c(1), c(2), c(3)}}),
+        Reference('macro[1]'): Resources({'node001': {c(4), c(5), c(6), c(7)}}),
+        Reference('macro[2]'): Resources({'node001': {c(8), c(9), c(10), c(11)}}),
+        Reference('macro[3]'): Resources({'node001': {c(12), c(13), c(14), c(15)}}),
+        Reference('macro[4]'): Resources({'node002': {c(0), c(1), c(2), c(3)}}),
+        Reference('macro[5]'): Resources({'node002': {c(4), c(5), c(6), c(7)}}),
+        Reference('macro[6]'): Resources({'node002': {c(8), c(9), c(10), c(11)}}),
+        Reference('macro[7]'): Resources({'node002': {c(12), c(13), c(14), c(15)}}),
+        Reference('micro[0]'): Resources({'node001': {c(0), c(1)}}),
+        Reference('micro[1]'): Resources({'node001': {c(4), c(5)}}),
+        Reference('micro[2]'): Resources({'node001': {c(8), c(9)}}),
+        Reference('micro[3]'): Resources({'node001': {c(12), c(13)}}),
+        Reference('micro[4]'): Resources({'node002': {c(0), c(1)}}),
+        Reference('micro[5]'): Resources({'node002': {c(4), c(5)}}),
+        Reference('micro[6]'): Resources({'node002': {c(8), c(9)}}),
+        Reference('micro[7]'): Resources({'node002': {c(12), c(13)}})}
 
 
 s11_model = Model(
@@ -606,24 +623,24 @@
 
 
 s11_resources = Resources({
-        'node001': {0, 1, 2, 3, 4, 5, 6, 7},
-        'node002': {0, 1, 2, 3, 4, 5, 6, 7},
+        'node001': {c(0), c(1), c(2), c(3), c(4), c(5), c(6), c(7)},
+        'node002': {c(0), c(1), c(2), c(3), c(4), c(5), c(6), c(7)},
         })
 
 
 s11_solution = {
-        Reference('macro1[0]'): Resources({'node001': {0, 1, 2, 3}}),
-        Reference('macro1[1]'): Resources({'node001': {4, 5, 6, 7}}),
-        Reference('macro1[2]'): Resources({'node002': {0, 1, 2, 3}}),
-        Reference('micro1[0]'): Resources({'node001': {0, 1, 2, 3}}),
-        Reference('micro1[1]'): Resources({'node001': {4, 5, 6, 7}}),
-        Reference('micro1[2]'): Resources({'node002': {0, 1, 2, 3}}),
-        Reference('macro2[0]'): Resources({'node001': {0, 1, 2, 3}}),
-        Reference('macro2[1]'): Resources({'node001': {4, 5, 6, 7}}),
-        Reference('macro2[2]'): Resources({'node002': {0, 1, 2, 3}}),
-        Reference('micro2[0]'): Resources({'node001': {0, 1, 2, 3}}),
-        Reference('micro2[1]'): Resources({'node001': {4, 5, 6, 7}}),
-        Reference('micro2[2]'): Resources({'node002': {0, 1, 2, 3}}),
+        Reference('macro1[0]'): Resources({'node001': {c(0), c(1), c(2), c(3)}}),
+        Reference('macro1[1]'): Resources({'node001': {c(4), c(5), c(6), c(7)}}),
+        Reference('macro1[2]'): Resources({'node002': {c(0), c(1), c(2), c(3)}}),
+        Reference('micro1[0]'): Resources({'node001': {c(0), c(1), c(2), c(3)}}),
+        Reference('micro1[1]'): Resources({'node001': {c(4), c(5), c(6), c(7)}}),
+        Reference('micro1[2]'): Resources({'node002': {c(0), c(1), c(2), c(3)}}),
+        Reference('macro2[0]'): Resources({'node001': {c(0), c(1), c(2), c(3)}}),
+        Reference('macro2[1]'): Resources({'node001': {c(4), c(5), c(6), c(7)}}),
+        Reference('macro2[2]'): Resources({'node002': {c(0), c(1), c(2), c(3)}}),
+        Reference('micro2[0]'): Resources({'node001': {c(0), c(1), c(2), c(3)}}),
+        Reference('micro2[1]'): Resources({'node001': {c(4), c(5), c(6), c(7)}}),
+        Reference('micro2[2]'): Resources({'node002': {c(0), c(1), c(2), c(3)}}),
         }
 
 
@@ -646,14 +663,16 @@
 
 
 s12_solution = {
-        Reference('macro1'): Resources({'node001': {0, 1, 2, 3}}),
-        Reference('micro1[0]'): Resources({'node001': {0, 1, 2, 3, 4, 5, 6, 7}}),
-        Reference('micro1[1]'): Resources({'node002': {0, 1, 2, 3, 4, 5, 6, 7}}),
-        Reference('macro2'): Resources({'node001': {0, 1, 2, 3}}),
-        Reference('micro2[0]'): Resources({'node001': {0, 1, 2, 3}}),
-        Reference('micro2[1]'): Resources({'node001': {4, 5, 6, 7}}),
-        Reference('micro2[2]'): Resources({'node002': {0, 1, 2, 3}}),
-        Reference('micro2[3]'): Resources({'node002': {4, 5, 6, 7}}),
+        Reference('macro1'): Resources({'node001': {c(0), c(1), c(2), c(3)}}),
+        Reference('micro1[0]'): Resources({'node001': {
+            c(0), c(1), c(2), c(3), c(4), c(5), c(6), c(7)}}),
+        Reference('micro1[1]'): Resources({'node002': {
+            c(0), c(1), c(2), c(3), c(4), c(5), c(6), c(7)}}),
+        Reference('macro2'): Resources({'node001': {c(0), c(1), c(2), c(3)}}),
+        Reference('micro2[0]'): Resources({'node001': {c(0), c(1), c(2), c(3)}}),
+        Reference('micro2[1]'): Resources({'node001': {c(4), c(5), c(6), c(7)}}),
+        Reference('micro2[2]'): Resources({'node002': {c(0), c(1), c(2), c(3)}}),
+        Reference('micro2[3]'): Resources({'node002': {c(4), c(5), c(6), c(7)}}),
         }
 
 
@@ -676,58 +695,58 @@
 
 
 s13_resources = Resources({
-        'node001': {0, 1, 2, 3, 4, 5, 6, 7},
-        'node002': {0, 1, 2, 3, 4, 5, 6, 7},
-        'node003': {0, 1, 2, 3, 4, 5, 6, 7},
-        'node004': {0, 1, 2, 3, 4, 5, 6, 7},
-        'node005': {0, 1, 2, 3, 4, 5, 6, 7},
+        'node001': {c(0), c(1), c(2), c(3), c(4), c(5), c(6), c(7)},
+        'node002': {c(0), c(1), c(2), c(3), c(4), c(5), c(6), c(7)},
+        'node003': {c(0), c(1), c(2), c(3), c(4), c(5), c(6), c(7)},
+        'node004': {c(0), c(1), c(2), c(3), c(4), c(5), c(6), c(7)},
+        'node005': {c(0), c(1), c(2), c(3), c(4), c(5), c(6), c(7)},
         })
 
 
 s13_solution = {
-        Reference('macro1[0]'): Resources({'node001': {0, 1, 2, 3}}),
-        Reference('macro1[1]'): Resources({'node001': {4, 5, 6, 7}}),
-        Reference('macro1[2]'): Resources({'node002': {0, 1, 2, 3}}),
-        Reference('macro1[3]'): Resources({'node002': {4, 5, 6, 7}}),
-        Reference('macro1[4]'): Resources({'node003': {0, 1, 2, 3}}),
-
-        Reference('micro1[0][0]'): Resources({'node001': {0, 1}}),
-        Reference('micro1[0][1]'): Resources({'node001': {2, 3}}),
-        Reference('micro1[0][2]'): Resources({'node003': {4, 5}}),
-        Reference('micro1[0][3]'): Resources({'node003': {6, 7}}),
-        Reference('micro1[1][0]'): Resources({'node001': {4, 5}}),
-        Reference('micro1[1][1]'): Resources({'node001': {6, 7}}),
-        Reference('micro1[1][2]'): Resources({'node004': {0, 1}}),
-        Reference('micro1[1][3]'): Resources({'node004': {2, 3}}),
-        Reference('micro1[2][0]'): Resources({'node002': {0, 1}}),
-        Reference('micro1[2][1]'): Resources({'node002': {2, 3}}),
-        Reference('micro1[2][2]'): Resources({'node004': {4, 5}}),
-        Reference('micro1[2][3]'): Resources({'node004': {6, 7}}),
-        Reference('micro1[3][0]'): Resources({'node002': {4, 5}}),
-        Reference('micro1[3][1]'): Resources({'node002': {6, 7}}),
-        Reference('micro1[3][2]'): Resources({'node005': {0, 1}}),
-        Reference('micro1[3][3]'): Resources({'node005': {2, 3}}),
-        Reference('micro1[4][0]'): Resources({'node003': {0, 1}}),
-        Reference('micro1[4][1]'): Resources({'node003': {2, 3}}),
-        Reference('micro1[4][2]'): Resources({'node005': {4, 5}}),
-        Reference('micro1[4][3]'): Resources({'node005': {6, 7}}),
-
-        Reference('macro2[0]'): Resources({'node001': {0, 1, 2, 3}}),
-        Reference('macro2[1]'): Resources({'node001': {4, 5, 6, 7}}),
-        Reference('macro2[2]'): Resources({'node002': {0, 1, 2, 3}}),
-        Reference('macro2[3]'): Resources({'node002': {4, 5, 6, 7}}),
-        Reference('macro2[4]'): Resources({'node003': {0, 1, 2, 3}}),
-
-        Reference('micro2[0][0]'): Resources({'node001': {0, 1, 2, 3}}),
-        Reference('micro2[0][1]'): Resources({'node003': {4, 5, 6, 7}}),
-        Reference('micro2[1][0]'): Resources({'node001': {4, 5, 6, 7}}),
-        Reference('micro2[1][1]'): Resources({'node004': {0, 1, 2, 3}}),
-        Reference('micro2[2][0]'): Resources({'node002': {0, 1, 2, 3}}),
-        Reference('micro2[2][1]'): Resources({'node004': {4, 5, 6, 7}}),
-        Reference('micro2[3][0]'): Resources({'node002': {4, 5, 6, 7}}),
-        Reference('micro2[3][1]'): Resources({'node005': {0, 1, 2, 3}}),
-        Reference('micro2[4][0]'): Resources({'node003': {0, 1, 2, 3}}),
-        Reference('micro2[4][1]'): Resources({'node005': {4, 5, 6, 7}}),
+        Reference('macro1[0]'): Resources({'node001': {c(0), c(1), c(2), c(3)}}),
+        Reference('macro1[1]'): Resources({'node001': {c(4), c(5), c(6), c(7)}}),
+        Reference('macro1[2]'): Resources({'node002': {c(0), c(1), c(2), c(3)}}),
+        Reference('macro1[3]'): Resources({'node002': {c(4), c(5), c(6), c(7)}}),
+        Reference('macro1[4]'): Resources({'node003': {c(0), c(1), c(2), c(3)}}),
+
+        Reference('micro1[0][0]'): Resources({'node001': {c(0), c(1)}}),
+        Reference('micro1[0][1]'): Resources({'node001': {c(2), c(3)}}),
+        Reference('micro1[0][2]'): Resources({'node003': {c(4), c(5)}}),
+        Reference('micro1[0][3]'): Resources({'node003': {c(6), c(7)}}),
+        Reference('micro1[1][0]'): Resources({'node001': {c(4), c(5)}}),
+        Reference('micro1[1][1]'): Resources({'node001': {c(6), c(7)}}),
+        Reference('micro1[1][2]'): Resources({'node004': {c(0), c(1)}}),
+        Reference('micro1[1][3]'): Resources({'node004': {c(2), c(3)}}),
+        Reference('micro1[2][0]'): Resources({'node002': {c(0), c(1)}}),
+        Reference('micro1[2][1]'): Resources({'node002': {c(2), c(3)}}),
+        Reference('micro1[2][2]'): Resources({'node004': {c(4), c(5)}}),
+        Reference('micro1[2][3]'): Resources({'node004': {c(6), c(7)}}),
+        Reference('micro1[3][0]'): Resources({'node002': {c(4), c(5)}}),
+        Reference('micro1[3][1]'): Resources({'node002': {c(6), c(7)}}),
+        Reference('micro1[3][2]'): Resources({'node005': {c(0), c(1)}}),
+        Reference('micro1[3][3]'): Resources({'node005': {c(2), c(3)}}),
+        Reference('micro1[4][0]'): Resources({'node003': {c(0), c(1)}}),
+        Reference('micro1[4][1]'): Resources({'node003': {c(2), c(3)}}),
+        Reference('micro1[4][2]'): Resources({'node005': {c(4), c(5)}}),
+        Reference('micro1[4][3]'): Resources({'node005': {c(6), c(7)}}),
+
+        Reference('macro2[0]'): Resources({'node001': {c(0), c(1), c(2), c(3)}}),
+        Reference('macro2[1]'): Resources({'node001': {c(4), c(5), c(6), c(7)}}),
+        Reference('macro2[2]'): Resources({'node002': {c(0), c(1), c(2), c(3)}}),
+        Reference('macro2[3]'): Resources({'node002': {c(4), c(5), c(6), c(7)}}),
+        Reference('macro2[4]'): Resources({'node003': {c(0), c(1), c(2), c(3)}}),
+
+        Reference('micro2[0][0]'): Resources({'node001': {c(0), c(1), c(2), c(3)}}),
+        Reference('micro2[0][1]'): Resources({'node003': {c(4), c(5), c(6), c(7)}}),
+        Reference('micro2[1][0]'): Resources({'node001': {c(4), c(5), c(6), c(7)}}),
+        Reference('micro2[1][1]'): Resources({'node004': {c(0), c(1), c(2), c(3)}}),
+        Reference('micro2[2][0]'): Resources({'node002': {c(0), c(1), c(2), c(3)}}),
+        Reference('micro2[2][1]'): Resources({'node004': {c(4), c(5), c(6), c(7)}}),
+        Reference('micro2[3][0]'): Resources({'node002': {c(4), c(5), c(6), c(7)}}),
+        Reference('micro2[3][1]'): Resources({'node005': {c(0), c(1), c(2), c(3)}}),
+        Reference('micro2[4][0]'): Resources({'node003': {c(0), c(1), c(2), c(3)}}),
+        Reference('micro2[4][1]'): Resources({'node005': {c(4), c(5), c(6), c(7)}}),
         }
 
 
@@ -763,7 +782,7 @@
         s14_model, None, s14_implementations, s14_requirements)
 
 
-s14_resources = Resources({'node001': {0, 1, 2, 3, 4, 5}})
+s14_resources = Resources({'node001': {c(0), c(1), c(2), c(3), c(4), c(5)}})
 
 
 s14_solution = RuntimeError

From 00a1ccef3fa07df3f0bf0dc65a401c8e96c3f83e Mon Sep 17 00:00:00 2001
From: Lourens Veen <l.veen@esciencecenter.nl>
Date: Sun, 1 Dec 2024 18:22:38 +0100
Subject: [PATCH 34/49] Fix type typo

---
 .../python/libmuscle/native_instantiator/native_instantiator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libmuscle/python/libmuscle/native_instantiator/native_instantiator.py b/libmuscle/python/libmuscle/native_instantiator/native_instantiator.py
index bccaabbc..a876a682 100644
--- a/libmuscle/python/libmuscle/native_instantiator/native_instantiator.py
+++ b/libmuscle/python/libmuscle/native_instantiator/native_instantiator.py
@@ -253,7 +253,7 @@ def run(self) -> None:
             for line in traceback.format_exception(*sys.exc_info()):
                 _logger.error(line)
 
-            result = CrashResult(sys.exc_info()[1])
+            result = CrashedResult(sys.exc_info()[1])
             self._resources_out.put(result)
             self._results_out.put(result)
 

From dfd6b2a01add897add48de7c0d5a7b5a673739e0 Mon Sep 17 00:00:00 2001
From: Lourens Veen <l.veen@esciencecenter.nl>
Date: Sun, 1 Dec 2024 18:23:22 +0100
Subject: [PATCH 35/49] Fix mypy errors

---
 libmuscle/python/libmuscle/manager/instantiator.py |  2 +-
 .../native_instantiator/agent/__main__.py          | 14 ++++++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/libmuscle/python/libmuscle/manager/instantiator.py b/libmuscle/python/libmuscle/manager/instantiator.py
index 798482e0..b86f7cbf 100644
--- a/libmuscle/python/libmuscle/manager/instantiator.py
+++ b/libmuscle/python/libmuscle/manager/instantiator.py
@@ -113,7 +113,7 @@ class CancelAllRequest(InstantiatorRequest):
 
 class CrashedResult:
     """Signals that the instantiator process crashed."""
-    def __init__(self, exception: Optional[Exception] = None) -> None:
+    def __init__(self, exception: Optional[BaseException] = None) -> None:
         self.exception = exception
 
 
diff --git a/libmuscle/python/libmuscle/native_instantiator/agent/__main__.py b/libmuscle/python/libmuscle/native_instantiator/agent/__main__.py
index 712da253..35af9c8a 100644
--- a/libmuscle/python/libmuscle/native_instantiator/agent/__main__.py
+++ b/libmuscle/python/libmuscle/native_instantiator/agent/__main__.py
@@ -108,6 +108,20 @@ def _inspect_resources(self) -> Dict[str, Any]:
             nhwthreads = psutil.cpu_count(logical=True)
             ncores = psutil.cpu_count(logical=False)
 
+            if nhwthreads is None and ncores is not None:
+                _logger.warning(
+                        'Could not determine number of hwthreads, assuming no SMT')
+                nhwthreads = ncores
+            elif ncores is None and nhwthreads is not None:
+                _logger.warning(
+                        'Could not determine number of cores, assuming no SMT')
+                ncores = nhwthreads
+            elif ncores is None and nhwthreads is None:
+                _logger.warning(
+                        'Could not determine CPU configuration, assuming a single core')
+                ncores = 1
+                nhwthreads = 1
+
             hwthreads_per_core = nhwthreads // ncores
 
             if ncores * hwthreads_per_core != nhwthreads:

From 65f20d8cb9ed836566a19bbea72b18669e55d1bb Mon Sep 17 00:00:00 2001
From: Lourens Veen <l.veen@esciencecenter.nl>
Date: Sun, 1 Dec 2024 18:44:48 +0100
Subject: [PATCH 36/49] Fix linter warnings

---
 integration_test/cluster_test/conftest.py                   | 3 +--
 integration_test/cluster_test/test_cluster.py               | 2 +-
 libmuscle/python/libmuscle/native_instantiator/slurm.py     | 6 +++---
 .../python/libmuscle/planner/test/test_planner_scenarios.py | 2 +-
 4 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/integration_test/cluster_test/conftest.py b/integration_test/cluster_test/conftest.py
index 721d12dd..51f934b7 100644
--- a/integration_test/cluster_test/conftest.py
+++ b/integration_test/cluster_test/conftest.py
@@ -181,7 +181,6 @@ def _create_muscle3_venv(remote_term, remote_source):
         f'/bin/bash -c "{in_venv} python3 -m pip install pip wheel setuptools"'))
 
     run_cmd(remote_term, 60, f'/bin/bash -c "{in_venv} pip install {remote_source}"')
-    return in_venv
 
 
 def _install_muscle3_native_openmpi(
@@ -223,7 +222,7 @@ def _install_muscle3_native_openmpi(
 
 def _install_muscle3(repo_root, remote_term, remote_fs, slurm_version):
     remote_source = _install_remote_source(repo_root, remote_term, remote_fs)
-    in_venv = _create_muscle3_venv(remote_term, remote_source)
+    _create_muscle3_venv(remote_term, remote_source)
     return _install_muscle3_native_openmpi(
             remote_source, remote_term, remote_fs, slurm_version)
 
diff --git a/integration_test/cluster_test/test_cluster.py b/integration_test/cluster_test/test_cluster.py
index 57ef408c..51655584 100644
--- a/integration_test/cluster_test/test_cluster.py
+++ b/integration_test/cluster_test/test_cluster.py
@@ -3,7 +3,7 @@
 import pytest
 
 from integration_test.cluster_test.conftest import (
-        REMOTE_SHARED, run_cmd, ssh_term, skip_unless_cluster)
+        REMOTE_SHARED, ssh_term, skip_unless_cluster)
 
 
 logger_ = logging.getLogger(__name__)
diff --git a/libmuscle/python/libmuscle/native_instantiator/slurm.py b/libmuscle/python/libmuscle/native_instantiator/slurm.py
index a6286ee0..dc22d23d 100644
--- a/libmuscle/python/libmuscle/native_instantiator/slurm.py
+++ b/libmuscle/python/libmuscle/native_instantiator/slurm.py
@@ -303,9 +303,9 @@ def agent_launch_command(self, agent_cmd: List[str], nnodes: int) -> List[str]:
         Args:
             agent_cmd: A command that will start the agent.
         """
-        # TODO: On the latest Slurm, there's a special command for this that we should use
-        # if we have that, --external-launcher. Poorly documented though, so will require
-        # some experimentation.
+        # TODO: On the latest Slurm, there's a special command for this that we should
+        # use if we have that, --external-launcher. Poorly documented though, so will
+        # require some experimentation.
 
         # On SLURM <= 23-02, the number of tasks is inherited by srun from sbatch rather
         # than calculated anew from --nodes and --ntasks-per-node, so we specify it
diff --git a/libmuscle/python/libmuscle/planner/test/test_planner_scenarios.py b/libmuscle/python/libmuscle/planner/test/test_planner_scenarios.py
index 6a4a2a95..f1f5b02a 100644
--- a/libmuscle/python/libmuscle/planner/test/test_planner_scenarios.py
+++ b/libmuscle/python/libmuscle/planner/test/test_planner_scenarios.py
@@ -555,7 +555,7 @@ def c(hwthread_id: int) -> FrozenSet[int]:
 s10_resources = Resources({
         'node001': {
             c(0), c(1), c(2), c(3), c(4), c(5), c(6), c(7),
-            c(8),c(9), c(10), c(11), c(12), c(13), c(14), c(15)},
+            c(8), c(9), c(10), c(11), c(12), c(13), c(14), c(15)},
         'node002': {
             c(0), c(1), c(2), c(3), c(4), c(5), c(6), c(7),
             c(8), c(9), c(10), c(11), c(12), c(13), c(14), c(15)},

From 447acd530144697c7eed48ac480a26bde92a6473 Mon Sep 17 00:00:00 2001
From: Lourens Veen <l.veen@esciencecenter.nl>
Date: Sun, 1 Dec 2024 20:54:05 +0100
Subject: [PATCH 37/49] Use Docker cp to upload to the fake cluster for better
 speed

---
 integration_test/cluster_test/conftest.py | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/integration_test/cluster_test/conftest.py b/integration_test/cluster_test/conftest.py
index 51f934b7..3d1df34a 100644
--- a/integration_test/cluster_test/conftest.py
+++ b/integration_test/cluster_test/conftest.py
@@ -159,16 +159,22 @@ def _start_base_cluster(local_term, idx_slurm_version, shared_dir):
     return term, fs, headnode_port
 
 
-def _install_remote_source(repo_root, remote_term, remote_fs):
+def _install_remote_source(local_term, repo_root, remote_fs, slurm_version):
     muscle3_tgt = remote_fs / 'home' / 'cerulean' / 'muscle3'
     muscle3_tgt.mkdir()
-    (muscle3_tgt / 'libmuscle').mkdir()
+
+    container = f'headnode-{slurm_version}'
 
     for f in (
             'muscle3', 'libmuscle', 'scripts', 'docs', 'setup.py', 'Makefile',
             'MANIFEST.in', 'LICENSE', 'NOTICE', 'VERSION', 'README.rst'):
-        cerulean.copy(
-                repo_root / f, muscle3_tgt / f, overwrite='always', copy_into=False)
+        run_cmd(local_term, 60, (
+            f'docker cp {repo_root / f} {container}:{muscle3_tgt / f}'))
+
+    # needs to run as root, so not run through remote_term
+    run_cmd(local_term, 60, (
+        f'docker exec {container} /bin/bash -c'
+        f' "chown -R cerulean:cerulean {muscle3_tgt}"'))
 
     return muscle3_tgt
 
@@ -220,8 +226,9 @@ def _install_muscle3_native_openmpi(
     return prefix, module_name
 
 
-def _install_muscle3(repo_root, remote_term, remote_fs, slurm_version):
-    remote_source = _install_remote_source(repo_root, remote_term, remote_fs)
+def _install_muscle3(local_term, repo_root, remote_term, remote_fs, slurm_version):
+    remote_source = _install_remote_source(
+            local_term, repo_root, remote_fs, slurm_version)
     _create_muscle3_venv(remote_term, remote_source)
     return _install_muscle3_native_openmpi(
             remote_source, remote_term, remote_fs, slurm_version)
@@ -279,7 +286,7 @@ def installed_cluster(
     remote_term, remote_fs, headnode_port = _start_base_cluster(
             local_term, request.param, local_shared_dir)
     remote_m3_openmpi = _install_muscle3(
-            repo_root, remote_term, remote_fs, slurm_version)
+            local_term, repo_root, remote_term, remote_fs, slurm_version)
     _install_tests(repo_root, remote_term, remote_fs, remote_m3_openmpi)
 
     yield headnode_port

From 5fd98a77a66339f45253cb4324267eaea3cc667e Mon Sep 17 00:00:00 2001
From: Lourens Veen <l.veen@esciencecenter.nl>
Date: Sun, 1 Dec 2024 21:02:24 +0100
Subject: [PATCH 38/49] Fix issues raised by latest mypy

---
 .../native_instantiator/agent/__main__.py     | 22 ++++++++++---------
 .../native_instantiator/global_resources.py   |  2 +-
 2 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/libmuscle/python/libmuscle/native_instantiator/agent/__main__.py b/libmuscle/python/libmuscle/native_instantiator/agent/__main__.py
index 35af9c8a..a47dfca6 100644
--- a/libmuscle/python/libmuscle/native_instantiator/agent/__main__.py
+++ b/libmuscle/python/libmuscle/native_instantiator/agent/__main__.py
@@ -108,19 +108,21 @@ def _inspect_resources(self) -> Dict[str, Any]:
             nhwthreads = psutil.cpu_count(logical=True)
             ncores = psutil.cpu_count(logical=False)
 
-            if nhwthreads is None and ncores is not None:
-                _logger.warning(
-                        'Could not determine number of hwthreads, assuming no SMT')
-                nhwthreads = ncores
-            elif ncores is None and nhwthreads is not None:
+            if nhwthreads is None:
+                if ncores is not None:
+                    _logger.warning(
+                            'Could not determine number of hwthreads, assuming no SMT')
+                    nhwthreads = ncores
+                else:
+                    _logger.warning(
+                            'Could not determine CPU configuration, assuming a single'
+                            ' core')
+                    ncores = 1
+                    nhwthreads = 1
+            elif ncores is None:
                 _logger.warning(
                         'Could not determine number of cores, assuming no SMT')
                 ncores = nhwthreads
-            elif ncores is None and nhwthreads is None:
-                _logger.warning(
-                        'Could not determine CPU configuration, assuming a single core')
-                ncores = 1
-                nhwthreads = 1
 
             hwthreads_per_core = nhwthreads // ncores
 
diff --git a/libmuscle/python/libmuscle/native_instantiator/global_resources.py b/libmuscle/python/libmuscle/native_instantiator/global_resources.py
index 4b1e28c7..ce5ab82c 100644
--- a/libmuscle/python/libmuscle/native_instantiator/global_resources.py
+++ b/libmuscle/python/libmuscle/native_instantiator/global_resources.py
@@ -47,7 +47,7 @@ def __init__(self) -> None:
             _logger.info('Running locally without a cluster scheduler')
             self.scheduler = Scheduler.NONE
             self.nodes = [gethostname()]
-            self.logical_cpus_per_node = [psutil.cpu_count(logical=True)]
+            self.logical_cpus_per_node = [psutil.cpu_count(logical=True) or 0]
             _logger.info(
                     f'We have {self.logical_cpus_per_node[0]} logical CPUS available')
 

From 414a835ec056fd81fe61d145cf7860db83c311e9 Mon Sep 17 00:00:00 2001
From: Lourens Veen <l.veen@esciencecenter.nl>
Date: Wed, 4 Dec 2024 13:43:09 +0100
Subject: [PATCH 39/49] Add test with two instances on the same cores

---
 integration_test/cluster_test/conftest.py     |  2 +-
 .../cluster_test/macro_micro.ymmsl            | 25 ++++++++++++++
 .../cluster_test/macro_micro_openmpi.sh       | 12 +++++++
 .../cluster_test/macro_micro_srunmpi.sh       | 12 +++++++
 integration_test/cluster_test/test_cluster.py | 34 +++++++++++++++++++
 5 files changed, 84 insertions(+), 1 deletion(-)
 create mode 100644 integration_test/cluster_test/macro_micro.ymmsl
 create mode 100755 integration_test/cluster_test/macro_micro_openmpi.sh
 create mode 100755 integration_test/cluster_test/macro_micro_srunmpi.sh

diff --git a/integration_test/cluster_test/conftest.py b/integration_test/cluster_test/conftest.py
index 3d1df34a..2350f38a 100644
--- a/integration_test/cluster_test/conftest.py
+++ b/integration_test/cluster_test/conftest.py
@@ -21,7 +21,7 @@
     ]))
 
 # Shut down the containers after running the tests. Set to False to debug.
-CLEAN_UP_CONTAINERS = True
+CLEAN_UP_CONTAINERS = False
 
 
 skip_unless_cluster = pytest.mark.skipif(
diff --git a/integration_test/cluster_test/macro_micro.ymmsl b/integration_test/cluster_test/macro_micro.ymmsl
new file mode 100644
index 00000000..22cbf8a5
--- /dev/null
+++ b/integration_test/cluster_test/macro_micro.ymmsl
@@ -0,0 +1,25 @@
+ymmsl_version: v0.1
+
+model:
+    name: macro_micro
+    components:
+        c1:
+            ports:
+                o_i: inter_out
+                s: inter_in
+            implementation: component_cpp
+        c2:
+            ports:
+                f_init: init_in
+                o_f: final_out
+            implementation: component_cpp
+
+    conduits:
+        c1.inter_out: c2.init_in
+        c2.final_out: c1.inter_in
+
+resources:
+    c1:
+        mpi_processes: 2
+    c2:
+        mpi_processes: 2
diff --git a/integration_test/cluster_test/macro_micro_openmpi.sh b/integration_test/cluster_test/macro_micro_openmpi.sh
new file mode 100755
index 00000000..6b7fccb3
--- /dev/null
+++ b/integration_test/cluster_test/macro_micro_openmpi.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+set -e
+
+env
+
+source /home/cerulean/shared/venv/bin/activate
+
+CT=/home/cerulean/shared/cluster_test
+
+muscle_manager --log-level=DEBUG --start-all $CT/macro_micro.ymmsl $CT/settings.ymmsl $CT/implementations_openmpi.ymmsl
+
diff --git a/integration_test/cluster_test/macro_micro_srunmpi.sh b/integration_test/cluster_test/macro_micro_srunmpi.sh
new file mode 100755
index 00000000..a98aca57
--- /dev/null
+++ b/integration_test/cluster_test/macro_micro_srunmpi.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+set -e
+
+env
+
+source /home/cerulean/shared/venv/bin/activate
+
+CT=/home/cerulean/shared/cluster_test
+
+muscle_manager --log-level=DEBUG --start-all $CT/macro_micro.ymmsl $CT/settings.ymmsl $CT/implementations_srunmpi.ymmsl
+
diff --git a/integration_test/cluster_test/test_cluster.py b/integration_test/cluster_test/test_cluster.py
index 51655584..9f0a7156 100644
--- a/integration_test/cluster_test/test_cluster.py
+++ b/integration_test/cluster_test/test_cluster.py
@@ -215,3 +215,37 @@ def test_double(
                 node, hwthreads, _ = out.split('\n')
                 assert node == f'node-{i + 2}'
                 assert hwthread_to_core(hwthreads) == [rank]
+
+
+@skip_unless_cluster
+@pytest.mark.parametrize('mode', ['local', 'slurm'])
+@pytest.mark.parametrize('execution_model', ['openmpi', 'srunmpi'])
+def test_macro_micro(
+        fake_cluster, remote_test_files, remote_out_dir, hwthread_to_core,
+        mode, execution_model):
+
+    if mode == 'local' and execution_model == 'srunmpi':
+        pytest.skip('srun does not work without slurm')
+
+    sched = _sched(fake_cluster, mode)
+
+    job = _make_mpi_job(
+            'macro_micro', mode, execution_model, remote_test_files, remote_out_dir)
+    if mode == 'slurm':
+        job.num_nodes = 1
+        job.extra_scheduler_options += ' --nodelist=node-4'
+
+    job_id = sched.submit(job)
+    assert sched.wait(job_id, job.time_reserved + _SCHED_OVERHEAD) is not None
+    assert sched.get_exit_code(job_id) == 0
+
+    for i in range(1, 3):
+        for rank in range(2):
+            out = _get_outfile(
+                    remote_out_dir, 'macro_micro', mode, execution_model, f'c{i}', rank)
+            if mode == 'local':
+                assert out.split('\n')[0] == 'headnode'
+            else:
+                node, hwthreads, _ = out.split('\n')
+                assert node == f'node-4'
+                assert hwthread_to_core(hwthreads) == [rank]

From 88ceb42ba796b9811b6a1966cf0a00a0b8376fac Mon Sep 17 00:00:00 2001
From: Lourens Veen <l.veen@esciencecenter.nl>
Date: Wed, 4 Dec 2024 20:31:44 +0100
Subject: [PATCH 40/49] Enable MPI debug output only if manager log level is
 debug

---
 .../native_instantiator/run_script.py         | 45 +++++++++++++------
 1 file changed, 32 insertions(+), 13 deletions(-)

diff --git a/libmuscle/python/libmuscle/native_instantiator/run_script.py b/libmuscle/python/libmuscle/native_instantiator/run_script.py
index 8be23d3d..a2c3d9b7 100644
--- a/libmuscle/python/libmuscle/native_instantiator/run_script.py
+++ b/libmuscle/python/libmuscle/native_instantiator/run_script.py
@@ -1,3 +1,4 @@
+import logging
 from pathlib import Path
 from typing import Dict, FrozenSet, List, Optional, Tuple
 
@@ -159,7 +160,7 @@ def num_mpi_tasks(res_req: ResourceRequirements) -> int:
     raise RuntimeError('Invalid ResourceRequirements')
 
 
-def local_command(implementation: Implementation) -> str:
+def local_command(implementation: Implementation, enable_debug: bool) -> str:
     """Make a format string for the command to run.
 
     This interprets the execution_model and produces an appropriate shell command to
@@ -168,6 +169,7 @@ def local_command(implementation: Implementation) -> str:
 
     Args:
         implementation: The implementation to start.
+        enable_debug: Whether to produce extra debug output.
 
     Return:
         A format string with embedded {ntasks} and {rankfile}.
@@ -177,7 +179,18 @@ def local_command(implementation: Implementation) -> str:
     elif implementation.execution_model == ExecutionModel.OPENMPI:
         # Native name is orterun for older and prterun for newer OpenMPI.
         # So we go with mpirun, which works for either.
-        fstr = 'mpirun -np $MUSCLE_MPI_PROCESSES --oversubscribe {command} {args}'
+        fargs = [
+                'mpirun -np $MUSCLE_MPI_PROCESSES',
+                '--oversubscribe'
+                ]
+
+        if enable_debug:
+            fargs.append('-v --debug-daemons --display-map --display-allocation')
+
+        fargs.append('{command} {args}')
+
+        fstr = ' '.join(fargs)
+
     elif implementation.execution_model == ExecutionModel.INTELMPI:
         fstr = 'mpirun -n $MUSCLE_MPI_PROCESSES {command} {args}'
     elif implementation.execution_model == ExecutionModel.SRUNMPI:
@@ -204,7 +217,7 @@ def local_command(implementation: Implementation) -> str:
             )
 
 
-def cluster_command(implementation: Implementation) -> str:
+def cluster_command(implementation: Implementation, enable_debug: bool) -> str:
     """Make a format string for the command to run.
 
     This interprets the execution_model and produces an appropriate shell command to
@@ -213,11 +226,11 @@ def cluster_command(implementation: Implementation) -> str:
 
     Args:
         implementation: The implementation to start.
+        enable_debug: Whether to produce extra debug output.
 
     Return:
         A string with the command to use to start the implementation.
     """
-    # TODO: enable debug options iff the manager log level is set to DEBUG
     # TODO: don't use taskset if it's not available
     if implementation.execution_model == ExecutionModel.DIRECT:
         fstr = 'taskset $MUSCLE_BIND_MASK {command} {args}'
@@ -225,11 +238,14 @@ def cluster_command(implementation: Implementation) -> str:
         fargs = [
                 # Native name is orterun for older and prterun for newer OpenMPI.
                 # So we go with mpirun, which works for either.
-                'mpirun -v -np $MUSCLE_MPI_PROCESSES',
-                '-d --debug-daemons',
-                '--rankfile $MUSCLE_RANKFILE --use-hwthread-cpus --oversubscribe'
+                'mpirun -np $MUSCLE_MPI_PROCESSES',
+                '--rankfile $MUSCLE_RANKFILE --use-hwthread-cpus --bind-to core',
+                '--oversubscribe'
                 ]
 
+        if enable_debug:
+            fargs.append('-v --debug-daemons --display-map --display-allocation')
+
         if slurm.quirks.overlap:
             # This adds the given option to the srun command used by mpirun to launch
             # its daemons. mpirun specifies --exclusive, which on SLURM <= 21-08 causes
@@ -238,8 +254,7 @@ def cluster_command(implementation: Implementation) -> str:
             # overrides the --exclusive and it works.
             fargs.append('-mca plm_slurm_args "--overlap"')
 
-        fargs.extend([
-            '--bind-to core --display-map --display-allocation {command} {args}'])
+        fargs.append('{command} {args}')
 
         fstr = ' '.join(fargs)
 
@@ -248,13 +263,15 @@ def cluster_command(implementation: Implementation) -> str:
                 'mpirun -n $MUSCLE_MPI_PROCESSES -machinefile $MUSCLE_RANKFILE'
                 ' {command} {args}')
     elif implementation.execution_model == ExecutionModel.SRUNMPI:
-        # TODO: set SLURM_CPU_BIND_VERBOSE for verbose output
         fargs = ['srun -n $MUSCLE_MPI_PROCESSES -m arbitrary']
 
         if slurm.quirks.overlap:
             fargs.append('--overlap')
 
-        fargs.append(f'{slurm.quirks.cpu_bind}=$SLURM_CPU_BIND {{command}} {{args}}')
+        verbose = 'verbose,' if enable_debug else ''
+
+        fargs.append(f'{slurm.quirks.cpu_bind}={verbose}$SLURM_CPU_BIND')
+        fargs.append('{command} {args}')
 
         fstr = ' '.join(fargs)
 
@@ -288,6 +305,8 @@ def make_script(
     Return:
         A string with embedded newlines containing the shell script.
     """
+    enable_debug = logging.getLogger('libmuscle').getEffectiveLevel() <= logging.DEBUG
+
     lines: List[str] = list()
 
     lines.append('#!/bin/bash')
@@ -309,9 +328,9 @@ def make_script(
         lines.append('')
 
     if local:
-        lines.append(local_command(implementation))
+        lines.append(local_command(implementation, enable_debug))
     else:
-        lines.append(cluster_command(implementation))
+        lines.append(cluster_command(implementation, enable_debug))
 
     lines.append('')
 

From e19e10e2f1910c3645a2015bd071e2e6d2345bcd Mon Sep 17 00:00:00 2001
From: Lourens Veen <l.veen@esciencecenter.nl>
Date: Wed, 4 Dec 2024 20:36:27 +0100
Subject: [PATCH 41/49] Fix linter warning

---
 integration_test/cluster_test/test_cluster.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/integration_test/cluster_test/test_cluster.py b/integration_test/cluster_test/test_cluster.py
index 9f0a7156..d8a52c67 100644
--- a/integration_test/cluster_test/test_cluster.py
+++ b/integration_test/cluster_test/test_cluster.py
@@ -247,5 +247,5 @@ def test_macro_micro(
                 assert out.split('\n')[0] == 'headnode'
             else:
                 node, hwthreads, _ = out.split('\n')
-                assert node == f'node-4'
+                assert node == 'node-4'
                 assert hwthread_to_core(hwthreads) == [rank]

From 7d363b02239806aaafa0933c1536380e09a3b2d1 Mon Sep 17 00:00:00 2001
From: Lourens Veen <l.veen@esciencecenter.nl>
Date: Wed, 4 Dec 2024 22:18:16 +0100
Subject: [PATCH 42/49] Don't bind (but also don't crash) if taskset doesn't
 exist

---
 .../libmuscle/native_instantiator/run_script.py       | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/libmuscle/python/libmuscle/native_instantiator/run_script.py b/libmuscle/python/libmuscle/native_instantiator/run_script.py
index a2c3d9b7..124b0897 100644
--- a/libmuscle/python/libmuscle/native_instantiator/run_script.py
+++ b/libmuscle/python/libmuscle/native_instantiator/run_script.py
@@ -231,9 +231,16 @@ def cluster_command(implementation: Implementation, enable_debug: bool) -> str:
     Return:
         A string with the command to use to start the implementation.
     """
-    # TODO: don't use taskset if it's not available
     if implementation.execution_model == ExecutionModel.DIRECT:
-        fstr = 'taskset $MUSCLE_BIND_MASK {command} {args}'
+        fargs = [
+                'if ! taskset -V >/dev/null 2>&1 ; then',
+                '    {command} {args}',
+                'else',
+                '    taskset $MUSCLE_BIND_MASK {command} {args}',
+                'fi'
+                ]
+        fstr = '\n'.join(fargs)
+
     elif implementation.execution_model == ExecutionModel.OPENMPI:
         fargs = [
                 # Native name is orterun for older and prterun for newer OpenMPI.

From 17c51d14e8eb236c70c441f767295fe193004730 Mon Sep 17 00:00:00 2001
From: Lourens Veen <l.veen@esciencecenter.nl>
Date: Sun, 8 Dec 2024 23:23:29 +0100
Subject: [PATCH 43/49] Add SLURM 24-05 and 24-11

---
 integration_test/cluster_test/conftest.py |  4 ++--
 integration_test/fake_cluster/Dockerfile  | 12 ++++++++++++
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/integration_test/cluster_test/conftest.py b/integration_test/cluster_test/conftest.py
index 2350f38a..97b7b255 100644
--- a/integration_test/cluster_test/conftest.py
+++ b/integration_test/cluster_test/conftest.py
@@ -17,11 +17,11 @@
 
 IDX_SLURM_VERSIONS = list(enumerate([
     '17-02', '17-11', '18-08', '19-05', '20-02', '20-11', '21-08', '22-05', '23-02',
-    '23-11'
+    '23-11', '24-05', '24-11'
     ]))
 
 # Shut down the containers after running the tests. Set to False to debug.
-CLEAN_UP_CONTAINERS = False
+CLEAN_UP_CONTAINERS = True
 
 
 skip_unless_cluster = pytest.mark.skipif(
diff --git a/integration_test/fake_cluster/Dockerfile b/integration_test/fake_cluster/Dockerfile
index 16561062..25a85ebe 100644
--- a/integration_test/fake_cluster/Dockerfile
+++ b/integration_test/fake_cluster/Dockerfile
@@ -31,6 +31,18 @@ RUN . /opt/spack/share/spack/setup-env.sh && \
     ^$(spack find --deps slurm@23-11 | grep pmix |  tr -d ' ') \
     ^$(spack find --format "slurm/{hash}" slurm@23-11)
 
+RUN . /opt/spack/share/spack/setup-env.sh && \
+    . $(spack location -i lmod)/lmod/lmod/init/bash && \
+    spack install openmpi@4.1.6 +legacylaunchers +pmi schedulers=slurm \
+    ^$(spack find --deps slurm@24-05 | grep pmix |  tr -d ' ') \
+    ^$(spack find --format "slurm/{hash}" slurm@24-05)
+
+RUN . /opt/spack/share/spack/setup-env.sh && \
+    . $(spack location -i lmod)/lmod/lmod/init/bash && \
+    spack install openmpi@4.1.6 +legacylaunchers +pmi schedulers=slurm \
+    ^$(spack find --deps slurm@24-11 | grep pmix |  tr -d ' ') \
+    ^$(spack find --format "slurm/{hash}" slurm@24-11)
+
 # RUN . /opt/spack/share/spack/setup-env.sh && \
 #     . $(spack location -i lmod)/lmod/lmod/init/bash && \
 #     spack install mpich+slurm pmi=pmix ^pmix@3.2.3

From c27239eb28371ae96d1bfbbafaf30099b96e58fc Mon Sep 17 00:00:00 2001
From: Lourens Veen <l.veen@esciencecenter.nl>
Date: Sun, 8 Dec 2024 23:23:49 +0100
Subject: [PATCH 44/49] Improve mpirun command

---
 libmuscle/python/libmuscle/native_instantiator/run_script.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libmuscle/python/libmuscle/native_instantiator/run_script.py b/libmuscle/python/libmuscle/native_instantiator/run_script.py
index 124b0897..c3aa2bfc 100644
--- a/libmuscle/python/libmuscle/native_instantiator/run_script.py
+++ b/libmuscle/python/libmuscle/native_instantiator/run_script.py
@@ -246,12 +246,12 @@ def cluster_command(implementation: Implementation, enable_debug: bool) -> str:
                 # Native name is orterun for older and prterun for newer OpenMPI.
                 # So we go with mpirun, which works for either.
                 'mpirun -np $MUSCLE_MPI_PROCESSES',
-                '--rankfile $MUSCLE_RANKFILE --use-hwthread-cpus --bind-to core',
+                '--rankfile $MUSCLE_RANKFILE --use-hwthread-cpus --bind-to hwthread',
                 '--oversubscribe'
                 ]
 
         if enable_debug:
-            fargs.append('-v --debug-daemons --display-map --display-allocation')
+            fargs.append('-v --display-allocation --display-map --report-bindings')
 
         if slurm.quirks.overlap:
             # This adds the given option to the srun command used by mpirun to launch

From 0a67cc614fe66e9059412e6f7f047ab92cfcd424 Mon Sep 17 00:00:00 2001
From: Lourens Veen <l.veen@esciencecenter.nl>
Date: Sun, 29 Dec 2024 20:48:41 +0100
Subject: [PATCH 45/49] Switch to class-based resource representation

---
 .../libmuscle/manager/instance_manager.py     |   7 +-
 .../python/libmuscle/manager/instantiator.py  |   8 +-
 .../python/libmuscle/manager/profile_store.py |  10 +-
 .../libmuscle/manager/qcgpj_instantiator.py   |  38 +-
 .../manager/test/test_profile_database.py     |  16 +-
 .../native_instantiator/agent/__main__.py     |  57 +-
 .../native_instantiator/agent/map_client.py   |  23 +-
 .../native_instantiator/agent_manager.py      |  34 +-
 .../native_instantiator/iagent_manager.py     |   8 +-
 .../native_instantiator/map_server.py         |  28 +-
 .../native_instantiator.py                    | 252 +------
 .../native_instantiator/run_script.py         |  59 +-
 libmuscle/python/libmuscle/planner/planner.py | 271 ++++----
 .../python/libmuscle/planner/resources.py     | 647 ++++++++++++++++++
 .../libmuscle/planner/test/test_planner.py    | 238 +++----
 .../planner/test/test_planner_scenarios.py    | 437 ++++++------
 .../libmuscle/planner/test/test_resources.py  | 435 ++++++++++++
 libmuscle/python/libmuscle/test/conftest.py   |  24 +-
 muscle3/muscle3.py                            |   9 +-
 19 files changed, 1759 insertions(+), 842 deletions(-)
 create mode 100644 libmuscle/python/libmuscle/planner/resources.py
 create mode 100644 libmuscle/python/libmuscle/planner/test/test_resources.py

diff --git a/libmuscle/python/libmuscle/manager/instance_manager.py b/libmuscle/python/libmuscle/manager/instance_manager.py
index 9d7cf90d..23980903 100644
--- a/libmuscle/python/libmuscle/manager/instance_manager.py
+++ b/libmuscle/python/libmuscle/manager/instance_manager.py
@@ -17,7 +17,8 @@
 # from libmuscle.manager.qcgpj_instantiator import QCGPJInstantiator
 from libmuscle.manager.run_dir import RunDir
 from libmuscle.native_instantiator.native_instantiator import NativeInstantiator
-from libmuscle.planner.planner import Planner, Resources
+from libmuscle.planner.planner import Planner, ResourceAssignment
+from libmuscle.planner.resources import Resources
 
 
 _logger = logging.getLogger(__name__)
@@ -94,7 +95,7 @@ def __init__(
         self._log_handler = LogHandlingThread(self._log_records_in)
         self._log_handler.start()
 
-        self._allocations: Optional[Dict[Reference, Resources]] = None
+        self._allocations: Optional[Dict[Reference, ResourceAssignment]] = None
 
         resources = self._resources_in.get()
         _logger.debug(f'Got resources {resources}')
@@ -150,7 +151,7 @@ def start_all(self) -> None:
             self._requests_out.put(request)
             self._num_running += 1
 
-    def get_resources(self) -> Dict[Reference, Resources]:
+    def get_resources(self) -> Dict[Reference, ResourceAssignment]:
         """Returns the resources allocated to each instance.
 
         Only call this after start_all() has been called, or it will raise
diff --git a/libmuscle/python/libmuscle/manager/instantiator.py b/libmuscle/python/libmuscle/manager/instantiator.py
index b86f7cbf..e29e48c2 100644
--- a/libmuscle/python/libmuscle/manager/instantiator.py
+++ b/libmuscle/python/libmuscle/manager/instantiator.py
@@ -8,7 +8,7 @@
 
 from ymmsl import Implementation, Reference, ResourceRequirements
 
-from libmuscle.planner.planner import Resources
+from libmuscle.planner.planner import ResourceAssignment
 
 
 class ProcessStatus(enum.Enum):
@@ -40,7 +40,7 @@ class Process:
         exit_code: Exit code, if status is ERROR
         error_msg: Error message, if status is ERROR
     """
-    def __init__(self, instance: Reference, resources: Resources) -> None:
+    def __init__(self, instance: Reference, resources: ResourceAssignment) -> None:
         """Create a Process object.
 
         Args:
@@ -81,8 +81,8 @@ class InstantiationRequest(InstantiatorRequest):
     """
     def __init__(
             self, instance: Reference, implementation: Implementation,
-            res_req: ResourceRequirements, resources: Resources, instance_dir:
-            Path, work_dir: Path, stdout_path: Path, stderr_path: Path
+            res_req: ResourceRequirements, resources: ResourceAssignment,
+            instance_dir: Path, work_dir: Path, stdout_path: Path, stderr_path: Path
             ) -> None:
         """Create an InstantiationRequest.
 
diff --git a/libmuscle/python/libmuscle/manager/profile_store.py b/libmuscle/python/libmuscle/manager/profile_store.py
index 0fba694e..3ee262b8 100644
--- a/libmuscle/python/libmuscle/manager/profile_store.py
+++ b/libmuscle/python/libmuscle/manager/profile_store.py
@@ -5,7 +5,7 @@
 from threading import Thread
 from typing import cast, Dict, Iterable, List, Optional, Tuple
 
-from libmuscle.planner.planner import Resources
+from libmuscle.planner.planner import ResourceAssignment
 from libmuscle.profiling import ProfileEvent, ProfileEventType
 from libmuscle.manager.profile_database import ProfileDatabase
 from ymmsl import Operator, Reference
@@ -77,7 +77,7 @@ def store_instances(
         cur.execute("COMMIT")
         cur.close()
 
-    def store_resources(self, resources: Dict[Reference, Resources]) -> None:
+    def store_resources(self, resources: Dict[Reference, ResourceAssignment]) -> None:
         """Store resource assignments into the database.
 
         Args:
@@ -90,9 +90,9 @@ def store_resources(self, resources: Dict[Reference, Resources]) -> None:
             instance_oid = self._get_instance_oid(cur, instance_id)
 
             tuples = [
-                    (instance_oid, node, hwthread)
-                    for node, cores in res.cores.items()
-                    for core in cores for hwthread in core]
+                    (instance_oid, node.node_name, core.cid)
+                    for node in res.as_resources()
+                    for core in node.cpu_cores]
 
             cur.executemany(
                     "INSERT INTO assigned_cores (instance_oid, node, core)"
diff --git a/libmuscle/python/libmuscle/manager/qcgpj_instantiator.py b/libmuscle/python/libmuscle/manager/qcgpj_instantiator.py
index 9130779f..f54e96e2 100644
--- a/libmuscle/python/libmuscle/manager/qcgpj_instantiator.py
+++ b/libmuscle/python/libmuscle/manager/qcgpj_instantiator.py
@@ -28,7 +28,7 @@
 from libmuscle.manager.instantiator import (
         CancelAllRequest, CrashedResult, create_instance_env, InstantiationRequest,
         Process, ProcessStatus, reconfigure_logging, ShutdownRequest)
-from libmuscle.planner.planner import Resources
+from libmuscle.planner.resources import Core, CoreSet, OnNodeResources, Resources
 
 
 _logger = logging.getLogger(__name__)
@@ -198,10 +198,13 @@ async def _main(self) -> None:
 
     def _send_resources(self) -> None:
         """Converts and sends QCG available resources."""
-        resources = Resources()
+        resources = Resources([])
         for node in self._qcg_resources.nodes:
-            resources.cores[node.name] = {
-                    frozenset(n.split(',')) for n in node.free_ids}
+            cs = CoreSet([
+                Core(cid, set(map(int, hwthreads_str.split(','))))
+                for cid, hwthreads_str in enumerate(node.free_ids)])
+            nr = OnNodeResources(node.name, cs)
+            resources.add_node(nr)
 
         self._resources_out.put(resources)
 
@@ -237,7 +240,8 @@ def _create_job(
             qcg_resources_type: qcg_ResourcesType
             ) -> Tuple[qcg_Allocation, qcg_SchedulingIteration]:
         """Creates a QCG allocation and job for a request."""
-        total_cores = sum(map(len, request.resources.cores.values()))
+        total_cores = sum([
+            nres.total_cores() for nres in request.resources.by_rank])
 
         env = create_instance_env(request.instance, request.implementation.env)
 
@@ -255,10 +259,13 @@ def _create_job(
                 resources=resources)
 
         qcg_allocation = qcg_Allocation()
-        for node_name, cores in request.resources.cores.items():
-            qcg_cores = [str(i) for i in cores]
+        res = request.resources.as_resources()
+        for node in res:
+            qcg_cores = [
+                    ','.join(map(str, core.hwthreads))
+                    for core in node.cpu_cores]
             qcg_allocation.add_node(
-                    qcg_NodeAllocation(qcg_Node(node_name), qcg_cores, {}))
+                    qcg_NodeAllocation(qcg_Node(node.node_name), qcg_cores, {}))
 
         sjob = qcg_SchedulingJob(self._state_tracker, qcg_job)
         qcg_iteration = qcg_SchedulingIteration(sjob, None, None, resources, [])
@@ -284,16 +291,19 @@ def _qcg_job_execution_with_script(
             rank_file = request.instance_dir / 'rankfile'
             with rank_file.open('w') as f:
                 i = 0
-                for node, cores in request.resources.cores.items():
-                    for c in sorted(cores):
-                        f.write(f'rank {i}={node} slot={c}\n')
+                res = request.resources.as_resources()
+                for node in res:
+                    for cid in sorted([c.cid for c in node.cpu_cores]):
+                        f.write(f'rank {i}={node.node_name} slot={cid}\n')
                         i += 1
             env['MUSCLE_OPENMPI_RANK_FILE'] = str(rank_file)
 
             # IntelMPI support
             mpi_res_args = list()
-            for node, cores in request.resources.cores.items():
-                mpi_res_args.extend(['-host', node, '-n', str(len(cores))])
+            res = request.resources.as_resources()
+            for node in res:
+                mpi_res_args.extend([
+                    '-host', node.node_name, '-n', str(node.total_cores())])
             env['MUSCLE_INTELMPI_RESOURCES'] = ' '.join(mpi_res_args)
 
             # General environment
@@ -315,7 +325,7 @@ def _qcg_job_execution_normal(
             qcg_resources_type: qcg_ResourcesType) -> qcg_JobExecution:
         """Create a JobExecution for a normal description."""
         impl = request.implementation
-        total_cores = sum(map(len, request.resources.cores.values()))
+        total_cores = request.resources.as_resources().total_cores()
 
         if impl.execution_model == ExecutionModel.DIRECT:
             env['OMP_NUM_THREADS'] = str(total_cores)
diff --git a/libmuscle/python/libmuscle/manager/test/test_profile_database.py b/libmuscle/python/libmuscle/manager/test/test_profile_database.py
index 33bbb9dd..b72c964a 100644
--- a/libmuscle/python/libmuscle/manager/test/test_profile_database.py
+++ b/libmuscle/python/libmuscle/manager/test/test_profile_database.py
@@ -2,13 +2,13 @@
 
 from libmuscle.manager.profile_database import ProfileDatabase
 from libmuscle.manager.profile_store import ProfileStore
-from libmuscle.planner.planner import Resources
+from libmuscle.planner.planner import ResourceAssignment
 from libmuscle.profiling import (
         ProfileEvent, ProfileEventType, ProfileTimestamp)
 
-from ymmsl import Operator, Port, Reference
+from libmuscle.test.conftest import on_node_resources as onr
 
-from libmuscle.test.conftest import frozenset_of as s
+from ymmsl import Operator, Port, Reference
 
 import pytest
 
@@ -23,13 +23,11 @@ def db_file(tmp_path) -> Path:
 
         store.store_instances([Reference('instance1'), Reference('instance2')])
 
-        resources1 = Resources({
-            'node001': {s(0), s(1)},
-            'node002': {s(0), s(1)}})
+        resources1 = ResourceAssignment([
+            onr('node001', {0, 1}), onr('node002', {0, 1})])
 
-        resources2 = Resources({
-            'node001': {s(0)},
-            'node002': {s(0), s(1), s(2)}})
+        resources2 = ResourceAssignment([
+            onr('node001', {0}), onr('node002', {0, 1, 2})])
 
         store.store_resources({
             Reference('instance1'): resources1,
diff --git a/libmuscle/python/libmuscle/native_instantiator/agent/__main__.py b/libmuscle/python/libmuscle/native_instantiator/agent/__main__.py
index a47dfca6..a85f2096 100644
--- a/libmuscle/python/libmuscle/native_instantiator/agent/__main__.py
+++ b/libmuscle/python/libmuscle/native_instantiator/agent/__main__.py
@@ -1,16 +1,16 @@
-from itertools import groupby
 import logging
 import os
 import psutil
 from socket import gethostname
 import sys
 from time import sleep
-from typing import Any, Dict, Set
+from typing import Dict, Set
 
 from libmuscle.native_instantiator.process_manager import ProcessManager
 from libmuscle.native_instantiator.agent.map_client import MAPClient
 from libmuscle.native_instantiator.agent.agent_commands import (
         CancelAllCommand, ShutdownCommand, StartCommand)
+from libmuscle.planner.resources import Core, CoreSet, OnNodeResources
 
 
 _logger = logging.getLogger(__name__)
@@ -18,21 +18,21 @@
 
 class Agent:
     """Runs on a compute node and starts processes there."""
-    def __init__(self, node_id: str, server_location: str) -> None:
+    def __init__(self, node_name: str, server_location: str) -> None:
         """Create an Agent.
 
         Args:
-            node_id: Id (hostname) of this node
+            node_name: Name (hostname) of this node
             server_location: MAP server of the manager to connect to
         """
-        _logger.info(f'Agent at {node_id} starting')
+        _logger.info(f'Agent at {node_name} starting')
 
         self._process_manager = ProcessManager()
 
-        self._node_id = node_id
+        self._node_name = node_name
 
         _logger.info(f'Connecting to manager at {server_location}')
-        self._server = MAPClient(self._node_id, server_location)
+        self._server = MAPClient(self._node_name, server_location)
         _logger.info('Connected to manager')
 
     def run(self) -> None:
@@ -68,17 +68,13 @@ def run(self) -> None:
 
             sleep(0.1)
 
-    def _inspect_resources(self) -> Dict[str, Any]:
+    def _inspect_resources(self) -> OnNodeResources:
         """Inspect the node to find resources and report on them.
 
-        The only resource type for now is 'cpu'. The returned dict will have that key
-        mapping to a list of sets of logical hwthread ids, with each set designating
-        a set of hwthreads that share a core.
-
         The terminology for identifying processors gets very convoluted, with Linux,
         Slurm, OpenMPI and IntelMPI all using different terms, or sometimes the same
-        terms for different things. See the comment in native_instantiator.py for what
-        is what and how we use it.
+        terms for different things. See the comment in planner/resources.py for what is
+        what and how we use it.
 
         Returns:
             A dict mapping resource types to resource descriptions.
@@ -95,8 +91,9 @@ def _inspect_resources(self) -> Dict[str, Any]:
                     core_id = int(f.read())
                 hwthreads_by_core.setdefault(core_id, set()).add(i)
 
-            cpu_resources = sorted(
-                    map(frozenset, hwthreads_by_core.values()), key=sorted)
+            cores = CoreSet((
+                    Core(core_id, hwthreads)
+                    for core_id, hwthreads in hwthreads_by_core.items()))
 
         else:
             # MacOS doesn't support thread affinity, but older Macs with Intel
@@ -138,22 +135,26 @@ def _inspect_resources(self) -> Dict[str, Any]:
                         ' still appreciate an issue, because it is unexpected for sure.'
                         )
 
-            hwthread_ids = list(range(nhwthreads))
-            cpu_resources = [
-                    frozenset(g)
-                    for _, g in groupby(
-                        hwthread_ids, lambda i: i // hwthreads_per_core)]
+            cores = CoreSet((
+                    Core(
+                        cid,
+                        set(range(
+                            cid * hwthreads_per_core, (cid + 1) * hwthreads_per_core))
+                        )
+                    for cid in range(ncores)
+                    ))
 
-        _logger.info(f'Found CPU resources: {cpu_resources}')
-        return {'cpu': cpu_resources}
+        resources = OnNodeResources(self._node_name, cores)
+        _logger.info(f'Found resources: {resources}')
+        return resources
 
 
-def configure_logging(node_id: str, log_level: int) -> None:
+def configure_logging(node_name: str, log_level: int) -> None:
     """Make us output logs to a custom log file."""
     fmt = '%(asctime)s %(levelname)s %(message)s'
     formatter = logging.Formatter(fmt)
 
-    handler = logging.FileHandler(f'muscle3_agent_{node_id}.log', mode='w')
+    handler = logging.FileHandler(f'muscle3_agent_{node_name}.log', mode='w')
     handler.setFormatter(formatter)
 
     # Find and remove default handler to disable automatic console output
@@ -170,11 +171,11 @@ def configure_logging(node_id: str, log_level: int) -> None:
 
 
 if __name__ == '__main__':
-    node_id = gethostname()
+    node_name = gethostname()
     server_location = sys.argv[1]
     log_level = int(sys.argv[2])
 
-    configure_logging(node_id, log_level)
+    configure_logging(node_name, log_level)
 
-    agent = Agent(node_id, server_location)
+    agent = Agent(node_name, server_location)
     agent.run()
diff --git a/libmuscle/python/libmuscle/native_instantiator/agent/map_client.py b/libmuscle/python/libmuscle/native_instantiator/agent/map_client.py
index d360b0a5..e402b29f 100644
--- a/libmuscle/python/libmuscle/native_instantiator/agent/map_client.py
+++ b/libmuscle/python/libmuscle/native_instantiator/agent/map_client.py
@@ -1,5 +1,5 @@
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, List, Optional, Tuple
 
 import msgpack
 
@@ -7,6 +7,7 @@
 from libmuscle.mcp.tcp_transport_client import TcpTransportClient
 from libmuscle.native_instantiator.agent.agent_commands import (
         AgentCommand, StartCommand, CancelAllCommand, ShutdownCommand)
+from libmuscle.planner.resources import OnNodeResources
 
 
 class MAPClient:
@@ -14,14 +15,14 @@ class MAPClient:
 
     This class connects to the AgentManager and communicates with it.
     """
-    def __init__(self, node_id: str, location: str) -> None:
+    def __init__(self, node_name: str, location: str) -> None:
         """Create a MAPClient
 
         Args:
-            node_id: Id of the local node
+            node_name: Name (hostname) of the local node
             location: A connection string of the form hostname:port
         """
-        self._node_id = node_id
+        self._node_name = node_name
         self._transport_client = TcpTransportClient(location)
 
     def close(self) -> None:
@@ -31,20 +32,16 @@ def close(self) -> None:
         """
         self._transport_client.close()
 
-    def report_resources(self, resources: Dict[str, Any]) -> None:
+    def report_resources(self, resources: OnNodeResources) -> None:
         """Report local resources
 
-        The only key in the dict is currently 'cpu', and it maps to a list of frozensets
-        of hwthread ids that we can bind to with taskset or in a rankfile.
-
         Args:
-            resources: Available resource ids by type
+            resources: Description of the resources on this node
         """
-        enc_cpu_resources = [
-                list(hwthreads) for hwthreads in resources['cpu']]
+        enc_cpu_resources = [[c.cid] + list(c.hwthreads) for c in resources.cpu_cores]
         request = [
                 RequestType.REPORT_RESOURCES.value,
-                self._node_id, {'cpu': enc_cpu_resources}]
+                resources.node_name, {'cpu': enc_cpu_resources}]
         self._call_agent_manager(request)
 
     def get_command(self) -> Optional[AgentCommand]:
@@ -53,7 +50,7 @@ def get_command(self) -> Optional[AgentCommand]:
         Returns:
             A command, or None if there are no commands pending.
         """
-        request = [RequestType.GET_COMMAND.value, self._node_id]
+        request = [RequestType.GET_COMMAND.value, self._node_name]
         response = self._call_agent_manager(request)
 
         if response[0] == ResponseType.PENDING.value:
diff --git a/libmuscle/python/libmuscle/native_instantiator/agent_manager.py b/libmuscle/python/libmuscle/native_instantiator/agent_manager.py
index 39d9a648..37883749 100644
--- a/libmuscle/python/libmuscle/native_instantiator/agent_manager.py
+++ b/libmuscle/python/libmuscle/native_instantiator/agent_manager.py
@@ -4,13 +4,14 @@
 import sys
 from threading import Lock
 from time import sleep
-from typing import Any, Dict, FrozenSet, List, Tuple
+from typing import Dict, List, Tuple
 
 from libmuscle.native_instantiator.agent.agent_commands import (
         CancelAllCommand, StartCommand, ShutdownCommand)
 from libmuscle.native_instantiator.iagent_manager import IAgentManager
 from libmuscle.native_instantiator.map_server import MAPServer
 from libmuscle.native_instantiator.global_resources import global_resources
+from libmuscle.planner.resources import OnNodeResources, Resources
 
 
 _logger = logging.getLogger(__name__)
@@ -38,7 +39,7 @@ def __init__(self, agent_dir: Path) -> None:
             agent_dir: Directory in which agents can write log files.
         """
         self._nodes: List[str] = list()
-        self._resources: Dict[str, Dict[str, Any]] = dict()
+        self._resources: Resources = Resources([])
         self._resources_lock = Lock()   # protects _nodes and _resources
 
         self._finished_processes: List[Tuple[str, int]] = list()
@@ -47,7 +48,7 @@ def __init__(self, agent_dir: Path) -> None:
         self._server = MAPServer(self)
         self._launch_agents(agent_dir, self._server.get_location())
 
-    def get_resources(self) -> Dict[str, List[FrozenSet[int]]]:
+    def get_resources(self) -> Resources:
         """Return detected resources.
 
         This returns a list of sets of logical hwthread ids per core, per node.
@@ -55,10 +56,10 @@ def get_resources(self) -> Dict[str, List[FrozenSet[int]]]:
         Called by NativeInstantiator.
         """
         # no need to lock, _resources is already in its final state
-        return {node_id: res['cpu'] for node_id, res in self._resources.items()}
+        return self._resources
 
     def start(
-            self, node_id: str, name: str, work_dir: Path, args: List[str],
+            self, node_name: str, name: str, work_dir: Path, args: List[str],
             env: Dict[str, str], stdout: Path, stderr: Path) -> None:
         """Start a process on a node.
 
@@ -66,7 +67,7 @@ def start(
         exist.
 
         Args:
-            node_id: Id of the node to run the process on
+            node_name: Name of the node to run the process on
             name: Name under which this process will be known
             work_dir: Working directory in which to start
             args: Executable and arguments to run
@@ -75,7 +76,7 @@ def start(
             stderr: File to redirect stderr to
         """
         command = StartCommand(name, work_dir, args, env, stdout, stderr)
-        self._server.deposit_command(node_id, command)
+        self._server.deposit_command(node_name, command)
 
     def cancel_all(self) -> None:
         """Cancel all processes.
@@ -84,8 +85,8 @@ def cancel_all(self) -> None:
 
         Called by NativeInstantiator.
         """
-        for node_id in self._nodes:
-            self._server.deposit_command(node_id, CancelAllCommand())
+        for node_name in self._nodes:
+            self._server.deposit_command(node_name, CancelAllCommand())
 
     def get_finished(self) -> List[Tuple[str, int]]:
         """Returns names and exit codes of finished processes.
@@ -105,8 +106,8 @@ def get_finished(self) -> List[Tuple[str, int]]:
     def shutdown(self) -> None:
         """Shut down the manager and its agents."""
         command = ShutdownCommand()
-        for node_id in self._nodes:
-            self._server.deposit_command(node_id, command)
+        for node_name in self._nodes:
+            self._server.deposit_command(node_name, command)
 
         try:
             self._agents_process.wait(60)
@@ -124,19 +125,18 @@ def shutdown(self) -> None:
 
         self._server.stop()
 
-    def report_resources(self, node_id: str, resources: Dict[str, Any]) -> None:
+    def report_resources(self, resources: OnNodeResources) -> None:
         """Report resources found on a node.
 
         Called by MAPServer from a server thread.
 
         Args:
-            node_id: Id of the node these resources are on
-            resources: Dict mapping resource type to resource ids
+            resources: Description of a node's resources
         """
-        _logger.debug(f'Agent on {node_id} reported {resources}')
+        _logger.debug(f'Agent reported {resources}')
         with self._resources_lock:
-            self._nodes.append(node_id)
-            self._resources[node_id] = resources
+            self._nodes.append(resources.node_name)
+            self._resources.add_node(resources)
 
     def report_result(self, names_exit_codes: List[Tuple[str, int]]) -> None:
         """Report results of finished processes.
diff --git a/libmuscle/python/libmuscle/native_instantiator/iagent_manager.py b/libmuscle/python/libmuscle/native_instantiator/iagent_manager.py
index 93d063f8..badf6a46 100644
--- a/libmuscle/python/libmuscle/native_instantiator/iagent_manager.py
+++ b/libmuscle/python/libmuscle/native_instantiator/iagent_manager.py
@@ -1,4 +1,6 @@
-from typing import Any, Dict, List, Tuple
+from typing import List, Tuple
+
+from libmuscle.planner.resources import OnNodeResources
 
 
 class IAgentManager:
@@ -7,13 +9,13 @@ class IAgentManager:
     Only implemented by AgentManager, and only exists to avoid a circular dependency
     between AgentManager, MAPServer, and MAPRequestHandler. Ugh.
     """
-    def report_resources(self, node_id: str, resources: Dict[str, Any]) -> None:
+    def report_resources(self, resources: OnNodeResources) -> None:
         """Report resources found on a node.
 
         Called by MAPServer from a server thread.
 
         Args:
-            node_id: Id of the node these resources are on
+            node_name: Id of the node these resources are on
             resources: Dict mapping resource type to resource ids
         """
         raise NotImplementedError()
diff --git a/libmuscle/python/libmuscle/native_instantiator/map_server.py b/libmuscle/python/libmuscle/native_instantiator/map_server.py
index 6ab847c0..87c3f5ca 100644
--- a/libmuscle/python/libmuscle/native_instantiator/map_server.py
+++ b/libmuscle/python/libmuscle/native_instantiator/map_server.py
@@ -10,6 +10,7 @@
 from libmuscle.native_instantiator.agent.agent_commands import (
         AgentCommand, CancelAllCommand, ShutdownCommand, StartCommand)
 from libmuscle.native_instantiator.iagent_manager import IAgentManager
+from libmuscle.planner.resources import Core, CoreSet, OnNodeResources
 from libmuscle.post_office import PostOffice
 
 from ymmsl import Reference
@@ -52,22 +53,25 @@ def handle_request(self, request: bytes) -> bytes:
         return cast(bytes, msgpack.packb(response, use_bin_type=True))
 
     def _report_resources(
-            self, node_id: str, resources: Dict[str, Any]) -> Any:
+            self, node_name: str, data: Dict[str, Any]) -> Any:
         """Handle a report resources request.
 
         This is used by the agent to report available resources on its node when
         it starts up.
 
         Args:
-            node_id: Hostname (id) of the node
-            resources: Resource dictionary, containing a single key 'cpu' which
-                maps to a list of lists of hwthread ids representing cores.
+            node_name: Name (hostname) of the node
+            data: Resource dictionary, containing a single key 'cpu' which maps to a
+                list of cores, where each core is a list of ints, starting with the core
+                id at index [0] followed by the hwthread ids of all hwthreads in this
+                core.
         """
-        dec_cpu_resources = [frozenset(hwthreads) for hwthreads in resources['cpu']]
-        self._agent_manager.report_resources(node_id, {'cpu': dec_cpu_resources})
+        cores = CoreSet((Core(ids[0], set(ids[1:])) for ids in data['cpu']))
+        node_resources = OnNodeResources(node_name, cores)
+        self._agent_manager.report_resources(node_resources)
         return [ResponseType.SUCCESS.value]
 
-    def _get_command(self, node_id: str) -> Any:
+    def _get_command(self, node_name: str) -> Any:
         """Handle a get command request.
 
         This is used by the agent to ask if there's anything we would like it to do.
@@ -78,9 +82,9 @@ def _get_command(self, node_id: str) -> Any:
         do).
 
         Args:
-            node_id: Hostname (id) of the agent's node
+            node_name: Hostname (name) of the agent's node
         """
-        node_ref = Reference(node_id.replace('-', '_'))
+        node_ref = Reference(node_name.replace('-', '_'))
         next_request: Optional[bytes] = None
         if self._post_office.have_message(node_ref):
             next_request = self._post_office.get_message(node_ref)
@@ -145,17 +149,17 @@ def stop(self) -> None:
         """
         self._server.close()
 
-    def deposit_command(self, node_id: str, command: AgentCommand) -> None:
+    def deposit_command(self, node_name: str, command: AgentCommand) -> None:
         """Deposit a command for the given agent.
 
         This takes the given command and queues it for the given agent to pick up next
         time it asks us for one.
 
         Args:
-            node_id: Id of the node whose agent should execute the command
+            node_name: Name of the node whose agent should execute the command
             command: The command to send
         """
-        agent = Reference(node_id.replace('-', '_'))
+        agent = Reference(node_name.replace('-', '_'))
 
         if isinstance(command, StartCommand):
             command_obj = [
diff --git a/libmuscle/python/libmuscle/native_instantiator/native_instantiator.py b/libmuscle/python/libmuscle/native_instantiator/native_instantiator.py
index a876a682..5c3ecd95 100644
--- a/libmuscle/python/libmuscle/native_instantiator/native_instantiator.py
+++ b/libmuscle/python/libmuscle/native_instantiator/native_instantiator.py
@@ -1,191 +1,3 @@
-"""Module for examining resources and instantiating instances on them
-
-There's a huge comment here because there's a big mess here that took me forever to
-figure out, so now I'm going to document it for the future.
-
-
-Identifying hardware resources
-
-Today's computers all contain multi-core CPUs, often with symmetric multithreading
-(SMT), also known as hyperthreading. This means that we have hardware threads
-(hwthreads) and also cores, and then there's caches and memory as well but we're not
-going into NUMA here.
-
-Cores and hwthreads are identified by number, but they have multiple different numbers
-that are referred to by different names in different contexts, making everything very
-confusing. So here are some definitions to disambiguate things.  Note that this is still
-a rather simplified representation, but it's enough for what we're doing here in
-MUSCLE3.
-
-
-Hardware threads
-
-A *hardware thread (hwthread)* is, at the hardware level, an instruction decoder. It
-points to wherever in the code we are currently executing, and it can read the next
-couple of instructions and figure out how to execute them. It can't actually execute
-anything however, because it doesn't have the hardware that does that.
-
-Intel refers to hwthreads as "logical processors" and so does Linux, hwloc calls them
-"processing units" or PUs and so does OpenMPI unless it uses the term hwthread just to
-confuse things a bit more.
-
-Cores
-
-A *core* contains at least one hwthread, and at least one functional unit, which is a
-hardware component that actually does calculations and other data processing. Within a
-core, the hwthread(s) read instructions and pass them to the functional units to be
-executed. If a core has more than one hwthread, then the CPU supports SMT.
-
-Intel refers to cores as "physical processors", hwloc calls them cores and so do most
-other sources. We'll use cores here.
-
-Since a hwthread cannot do anything on its own, it's always part of a core.
-
-CPUs
-
-The term CPU is used in many ways by various bits of documentation, sometimes referring
-to a hwthread or a core, but here we'll take it to mean a collection of cores in a
-plastic box. Similar terms are *package* (referring to that plastic box with very many
-metal pins) and *socket* (the thing the package mounts into), or *processor*, which was
-originally used to refer to all of the above when CPUs still had only one core with only
-one hwthread, and has now become ambiguous.
-
-Weird things can happen here, I've seen CPUs that as far as I can tell are a single
-package, but nevertheless claim to have two sockets. I suspect that that's two physical
-chips in a single plastic box, but I don't know for sure.
-
-Here, we're concerned with hwthreads and cores and how to identify them and assign
-instances to them.
-
-
-Linux
-
-On modern operating systems, hardware access is mediated by the operating system, and
-we're mainly concerned with Linux here because that is what all the clusters are running
-(see the note on macOS below). Information about the CPU(s) can be obtained on Linux
-from the /proc/cpuinfo file, or equivalently but more modernly, from the files in
-/sys/devices/system/cpu/cpu<x>/topology/.
-
-Linux collects information about processors because it needs to run processes (programs,
-software threads) on them on behalf of the user. Processes are assigned to hwthreads, so
-that is what Linux considers a *processor*.  /proc/cpuinfo lists all these processors,
-and they each have their own directory /sys/devices/system/cpu/cpu<x>.
-
-On Linux, processors have an id, which is that number <x> in the directory, and is
-listed under "processor" in /proc/cpuinfo. Since this number identifies a hwthread and
-is assigned by Linux rather than being baked into the hardware, I'm calling it a
-"logical hwthread id", this being a logical id of a hwthread, not an id of a logical
-hwthread. It's also the id of a logical processor in Intel-speak.
-
-Hwthreads actually have a second number associated with them, which does come from the
-hardware. In /proc/cpuinfo, that's listed under "apicid"; it doesn't seem to be
-available from sysfs. Hwloc call this the "physical PU (its name for a hwthread) id",
-and OpenMPI's mpirun manpage also refers to it as a "physical processor location".
-
-There's great potential for confusion here: the "physical PU id" and "physical processor
-location" both identify a hardware-specified number (a physical id or a physical
-location) for a hwthread. This is something completely different than what Intel calls a
-"physical processor", which they use to refer to a core.
-
-MUSCLE3 uses logical hwthread ids everywhere, it does not use physical ids.
-
-Linux knows about how hwthreads are grouped into bigger things of course. Cores are
-identified in Linux using the "core id", which is listed in /proc/cpuinfo and in
-/sys/devices/system/cpu/cpu<x>/topology/core_id. So for each hwthread, identified by its
-logical id, we can look up which core it is a part of. The core id is a logical id,
-assigned by Linux, not by the hardware.  While logical hwthread ids seem to always be
-consecutive at least on the hardware I've seen so far, core ids may have gaps.
-
-MUSCLE3 does not use core ids, although it uses groups of hwthread ids that contain all
-the hwthreads for a given core.
-
-
-Resource binding
-
-Running processes need something to run on, a hwthread. The assignment of process to
-hwthread is done by the operating system's scheduler: when a process is ready to run,
-the scheduler will try to find it a free hwthread to run on.
-
-The scheduler can be constrained in which hwthreads it considers for a given process,
-which is known as binding the process. This may have performance benefits, because
-moving a process from one hwthread to another takes time. In MUSCLE3, when running on a
-cluster, each process is assigned its own specific set of hwthreads to run on, and we
-try to bind the instance to the assigned hwthreads.
-
-Taskset
-
-How this is done depends on how the instance is started. For non-MPI instances, we use a
-Linux utility named 'taskset' that starts another program with a giving binding. The
-binding is expressed as an *affinity mask*, a string of bits that say whether a given
-processor (hwthread) can be used by the process or not. Each position in the string of
-bits corresponds to the hwthread with that logical id.
-
-OpenMPI
-
-OpenMPI can bind cores in various ways, we use a rankfile and the --use-hwthread-cpus
-option to specify the logical hwthread ids we want to bind each MPI process (rank) to.
-Note that OpenMPI by default binds to cores, and can also bind to various other things
-including sockets.
-
-MPICH
-
-MPICH doesn't support binding, as far as I can see.
-
-Intel MPI
-
-Intel MPI uses logical hwthread ids-based masks, specified in an environment variable,
-to go with a machinefile that lists the nodes to put each process on.
-
-Slurm srun
-
-Slurm's srun has a CPU_BIND environment variable that likewise contains logical hwthread
-ids-based masks, and a hostfile that lists the nodes to put each process on.
-
-Here are some disambiguation tables to help with the confusion:
-
-
-```
-MUSCLE3     hwthread        logical hwthread id         physical hwthread id
-
-Linux       processor       processor                   apicid
-                                                        (/proc/cpuinfo only)
-
-cgroups                     always uses these
-
-taskset                     always uses these
-
-hwloc       PU              PU L#<x>                    PU P#<x>
-
-OpenMPI     hwthread        used in rankfile if         used in rankfile if
-                            --use-hwthread-cpus         rmaps_rank_file_physical
-                            is specified                MCA param set
-
-Intel       logical         logical processor
-            processor       number
-
-srun                        used by --bind-to
-
-psutil      logical         returned by Process.cpu_affinity()
-            core            counted by psutil.cpu_count(logical=True)
-```
-
-
-```
-MUSCLE3     core            (uses list of hwthread ids)
-
-Linux       core            core id
-
-Hwloc       core            core L#<x>
-
-OpenMPI     core            used in rankfile if
-                            --use-hwthread-cpus not
-                            specified
-
-psutil      physical        counted by psutil.cpu_count(logical=False)
-            core
-```
-
-"""
 import logging
 import multiprocessing as mp
 from os import chdir
@@ -203,7 +15,7 @@
 from libmuscle.native_instantiator.agent_manager import AgentManager
 from libmuscle.native_instantiator.global_resources import global_resources
 from libmuscle.native_instantiator.run_script import make_script, prep_resources
-from libmuscle.planner.planner import Resources
+from libmuscle.planner.resources import OnNodeResources, Resources
 from ymmsl import MPICoresResReq, MPINodesResReq, ResourceRequirements, ThreadedResReq
 
 
@@ -309,24 +121,22 @@ def _send_resources(self) -> None:
         already_logged_smt = False
         resources = Resources()
 
-        agent_cores = self._agent_manager.get_resources()
+        agent_res = self._agent_manager.get_resources()
 
         env_ncpus = dict(
                 zip(global_resources().nodes, global_resources().logical_cpus_per_node)
                 )
 
-        for node in env_ncpus:
-            if node not in agent_cores:
+        for node_name in env_ncpus:
+            if node_name not in agent_res.nodes():
                 _logger.warning(
-                        f'The environment suggests we should have node {node},'
+                        f'The environment suggests we should have node {node_name},'
                         ' but no agent reported running on it. We won''t be able'
                         ' to use this node.')
             else:
-                resources.cores[node] = set(agent_cores[node])
-
-                env_nncpus = env_ncpus[node]
-                ag_nncores = len(agent_cores[node])
-                ag_nnthreads = sum((len(ts) for ts in agent_cores[node]))
+                env_nncpus = env_ncpus[node_name]
+                ag_nncores = len(agent_res[node_name].cpu_cores)
+                ag_nnthreads = len(list(agent_res[node_name].hwthreads()))
 
                 if ag_nncores != ag_nnthreads and ag_nnthreads == env_nncpus:
                     if not already_logged_smt:
@@ -336,29 +146,41 @@ def _send_resources(self) -> None:
                                 ' each thread or MPI process.')
                         already_logged_smt = True
 
+                    resources.add_node(agent_res[node_name])
+
                 elif ag_nncores < env_nncpus:
                     _logger.warning(
-                            f'Node {node} should have {env_nncpus} cores available,'
-                            f' but the agent reports only {ag_nncores} available to it.'
-                            f' We\'ll use the {ag_nncores} we seem to have.')
+                            f'Node {node_name} should have {env_nncpus} cores'
+                            f' available, but the agent reports only {ag_nncores}'
+                            f' available to it. We\'ll use the {ag_nncores} we seem to'
+                            ' have.')
 
-                    resources.cores[node] = set(agent_cores[node])
+                    resources.add_node(agent_res[node_name])
 
                 elif env_nncpus < ag_nncores:
                     _logger.warning(
-                            f'Node {node} should have {env_nncpus} cores available,'
-                            f' but the agent reports {ag_nncores} available to it.'
-                            ' Maybe the cluster does not constrain resources? We\'ll'
-                            f' use the {env_nncpus} that we should have got.')
-                    resources.cores[node] = set(agent_cores[node][:env_nncpus])
-
-        for node in agent_cores:
-            if node not in env_ncpus:
+                            f'Node {node_name} should have {env_nncpus} cores'
+                            f' available, but the agent reports {ag_nncores} available'
+                            ' to it. Maybe the cluster does not constrain resources?'
+                            f' We\'ll use the {env_nncpus} that we should have got.')
+                    resources.add_node(
+                            OnNodeResources(
+                                node_name,
+                                agent_res[node_name].cpu_cores.get_first_cores(
+                                    env_nncpus)))
+
+                else:
+                    # no SMT, agent matches environment
+                    resources.add_node(agent_res[node_name])
+
+        for node in agent_res:
+            if node.node_name not in env_ncpus:
                 _logger.warning(
-                        f'An agent is running on node {node} but the environment'
-                        ' does not list it as ours. It seems that the node\'s'
-                        ' hostname does not match what SLURM calls it. We will not use'
-                        ' this node, because we\'re not sure it\'s really ours.')
+                        f'An agent is running on node {node.node_name} but the'
+                        ' environment does not list it as ours. It seems that the'
+                        ' node\'s hostname does not match what SLURM calls it. We will'
+                        ' not use this node, because we\'re not sure it\'s really ours.'
+                        )
 
         self._resources_out.put(resources)
 
@@ -391,7 +213,7 @@ def _instantiate(self, request: InstantiationRequest) -> None:
         _logger.debug(f'Instantiating {name} on {request.resources}')
         try:
             self._agent_manager.start(
-                    next(iter(request.resources.cores.keys())),
+                    request.resources.by_rank[0].node_name,
                     name, request.work_dir, args, env,
                     request.stdout_path, request.stderr_path)
             self._processes[name].status = ProcessStatus.RUNNING
diff --git a/libmuscle/python/libmuscle/native_instantiator/run_script.py b/libmuscle/python/libmuscle/native_instantiator/run_script.py
index c3aa2bfc..faa14a68 100644
--- a/libmuscle/python/libmuscle/native_instantiator/run_script.py
+++ b/libmuscle/python/libmuscle/native_instantiator/run_script.py
@@ -1,32 +1,30 @@
 import logging
 from pathlib import Path
-from typing import Dict, FrozenSet, List, Optional, Tuple
+from typing import Dict, Iterable, List, Optional, Tuple
 
 from libmuscle.errors import ConfigurationError
 from libmuscle.native_instantiator.slurm import slurm
-from libmuscle.planner.planner import Resources
+from libmuscle.planner.planner import ResourceAssignment
 from ymmsl import (
         ExecutionModel, Implementation, MPICoresResReq, MPINodesResReq,
         ResourceRequirements, ThreadedResReq)
 
 
-def direct_prep_resources(resources: Resources) -> Tuple[str, Dict[str, str]]:
+def direct_prep_resources(resources: ResourceAssignment) -> Tuple[str, Dict[str, str]]:
     """Create resources for a non-MPI program with taskset.
 
-    Taskset expects a set of cores on the command line, which we put into a
-    MUSCLE_CORES environment variable here.
+    Taskset expects a set of hwthreads on the command line, either as a comma-separated
+    list or as a hexadecimal mask. We generate both here and set two environment
+    variables.
 
     Args:
-        resources: The resources to describe
+        resources: The resource assignment to describe
 
     Return:
         No rank file, and a set of environment variables.
     """
     env: Dict[str, str] = dict()
-    only_node_hwthreads_list = [
-            hwthread
-            for core in next(iter(resources.cores.values()))
-            for hwthread in core]
+    only_node_hwthreads_list = list(resources.by_rank[0].hwthreads())
 
     env['MUSCLE_BIND_LIST'] = ','.join(map(str, only_node_hwthreads_list))
 
@@ -36,34 +34,33 @@ def direct_prep_resources(resources: Resources) -> Tuple[str, Dict[str, str]]:
     return '', env
 
 
-def openmpi_prep_resources(resources: Resources) -> Tuple[str, Dict[str, str]]:
+def openmpi_prep_resources(resources: ResourceAssignment) -> Tuple[str, Dict[str, str]]:
     """Create resource description for OpenMPI mpirun
 
     Args:
-        resources: The resources to describe
+        resources: The resource assignment to describe
 
     Return:
         The contents of the rankfile, and a set of environment variables
     """
     ranklines: List[str] = list()
     all_cores = (
-            (node, ','.join(sorted(map(str, hwthreads))))
-            for node, cores in resources.cores.items()
-            for hwthreads in cores)
+            (node_res, ','.join(map(str, sorted(node_res.hwthreads()))))
+            for node_res in resources.by_rank)
 
-    for i, (node, hwthreads) in enumerate(all_cores):
-        ranklines.append(f'rank {i}={node} slot={hwthreads}')
+    for i, (node_res, hwthreads) in enumerate(all_cores):
+        ranklines.append(f'rank {i}={node_res.node_name} slot={hwthreads}')
 
     rankfile = '\n'.join(ranklines) + '\n'
 
     return rankfile, dict()
 
 
-def impi_prep_resources(resources: Resources) -> Tuple[str, Dict[str, str]]:
+def impi_prep_resources(resources: ResourceAssignment) -> Tuple[str, Dict[str, str]]:
     """Create resource description for Intel MPI mpirun
 
     Args:
-        resources: The resources to describe
+        resources: The resource assignment to describe
 
     Return:
         The contents of the machinefile, and a set of environment variables
@@ -73,11 +70,11 @@ def impi_prep_resources(resources: Resources) -> Tuple[str, Dict[str, str]]:
     raise NotImplementedError()
 
 
-def mpich_prep_resources(resources: Resources) -> Tuple[str, Dict[str, str]]:
+def mpich_prep_resources(resources: ResourceAssignment) -> Tuple[str, Dict[str, str]]:
     """Create resource description for MPICH mpirun
 
     Args:
-        resources: The resources to describe
+        resources: The resource assignment to describe
 
     Return:
         The contents of the machinefile, and a set of environment variables
@@ -87,7 +84,8 @@ def mpich_prep_resources(resources: Resources) -> Tuple[str, Dict[str, str]]:
 
 
 def srun_prep_resources(
-        resources: Resources, rankfile_location: Path) -> Tuple[str, Dict[str, str]]:
+        resources: ResourceAssignment, rankfile_location: Path
+        ) -> Tuple[str, Dict[str, str]]:
     """Create resource description for srun
 
     Args:
@@ -98,18 +96,17 @@ def srun_prep_resources(
         The contents of the hostfile, and a set of environment variables
     """
     hostfile = '\n'.join((
-        node for node, cores in resources.cores.items() for _ in cores))
+        node_res.node_name for node_res in resources.by_rank
+        for _ in node_res.hwthreads()))
 
     env = {'SLURM_HOSTFILE': str(rankfile_location)}
 
-    bind_list = [
-            core for _, cores in resources.cores.items() for core in cores]
-
-    def core_mask(core: FrozenSet[int]) -> str:
-        mask = sum((1 << hwthread) for hwthread in core)
+    def core_mask(hwthreads: Iterable[int]) -> str:
+        mask = sum((1 << hwthread) for hwthread in hwthreads)
         return format(mask, '#x')
 
-    bind_str = ','.join(map(core_mask, bind_list))
+    bind_str = ','.join([
+        core_mask(node_res.hwthreads()) for node_res in resources.by_rank])
 
     env['SLURM_CPU_BIND'] = f'verbose,mask_cpu:{bind_str}'
 
@@ -117,13 +114,13 @@ def core_mask(core: FrozenSet[int]) -> str:
 
 
 def prep_resources(
-        model: ExecutionModel, resources: Resources, rankfile_location: Path
+        model: ExecutionModel, resources: ResourceAssignment, rankfile_location: Path
         ) -> Tuple[str, Dict[str, str]]:
     """Create resource description for the given execution model.
 
     Args:
         model: The execution model to generate a description for
-        resources: The resources to describe
+        resources: The resource assignment to describe
         rankfile_location: Path to where the rankfile will be written
 
     Return:
diff --git a/libmuscle/python/libmuscle/planner/planner.py b/libmuscle/python/libmuscle/planner/planner.py
index 612a89a2..5a443a68 100644
--- a/libmuscle/python/libmuscle/planner/planner.py
+++ b/libmuscle/python/libmuscle/planner/planner.py
@@ -1,11 +1,12 @@
-from copy import copy, deepcopy
+from copy import copy
 import logging
-from typing import Dict, Iterable, FrozenSet, List, Mapping, Optional, Set, Tuple
+from typing import Dict, Iterable, List, Mapping, Set, Tuple
 
 from ymmsl import (
         Component, Configuration, Model, MPICoresResReq, MPINodesResReq,
         Operator, Reference, ResourceRequirements, ThreadedResReq)
 
+from libmuscle.planner.resources import OnNodeResources, Resources
 from libmuscle.util import instance_indices
 
 
@@ -383,125 +384,54 @@ def _calc_direct_succs_preds(self) -> None:
                 self._direct_supersuccs[sender].add((receiver, shared_dims))
 
 
-class Resources:
-    """Designates a (sub)set of resources.
+class ResourceAssignment:
+    """Assigned resources for each process of an instance.
 
-    Whether these resources are free or allocated in general or by
-    something specific depends on the context, this just says which
-    resources we're talking about.
+    Note that we use the classes from libmuscle.planner.resources to generically refer
+    to collections of resources, either to describe the available hardware or to
+    designate a subset of it that is occupied by a particular instance, or a subset that
+    isn't currently occupied.
+
+    This class has more detailed information, because it knows for each process (MPI
+    rank) in the instance which subset of the overall resources for the instance it
+    should be on, which we need to launch it in the right place.
 
     Attributes:
-        cores: A dictionary mapping designated nodes to designated cores on them. Cores
-                are represented by sets of hwthreads they have.
+        by_rank: List of OnNodeResources objects containing assigned resources,
+        indexed by rank.
     """
-    def __init__(self, cores: Optional[Dict[str, Set[FrozenSet[int]]]] = None) -> None:
-        """Create a Resources object with the given cores.
+    def __init__(self, by_rank: List[OnNodeResources]) -> None:
+        """Create a ResourceAssignment.
 
         Args:
-            cores: Cores to be designated by this object.
+            by_rank: List of OnNodeResources objects containing assigned resources,
+            indexed by rank.
         """
-        if cores is None:
-            self.cores: Dict[str, Set[FrozenSet[int]]] = {}
-        else:
-            self.cores = cores
-
-    def __copy__(self) -> 'Resources':
-        """Copy the object."""
-        return Resources(deepcopy(self.cores))
+        self.by_rank = by_rank
 
     def __eq__(self, other: object) -> bool:
-        """Check for equality."""
-        if not isinstance(other, Resources):
+        if not isinstance(other, ResourceAssignment):
             return NotImplemented
 
-        if len(self.cores) != len(other.cores):
-            return False
-
-        for node, cores in self.cores.items():
-            if node not in other.cores:
-                return False
-            if other.cores[node] != cores:
-                return False
-        return True
-
-    def __iadd__(self, other: 'Resources') -> 'Resources':
-        """Add the resources in the argument to this object."""
-        for node in other.cores:
-            if node in self.cores:
-                self.cores[node] |= other.cores[node]
-            else:
-                self.cores[node] = set(other.cores[node])
-        return self
-
-    def __isub__(self, other: 'Resources') -> 'Resources':
-        """Remove the resources in the argument from this object."""
-        for node in other.cores:
-            if node in self.cores:
-                self.cores[node] -= other.cores[node]
-                if not self.cores[node]:
-                    del self.cores[node]
-        return self
+        return (
+                len(self.by_rank) == len(other.by_rank) and
+                all([
+                    snr == onr
+                    for snr, onr in zip(self.by_rank, other.by_rank)]))
 
     def __str__(self) -> str:
-        """Return a human-readable string representation."""
-        def collapse_ranges(cores: Set[FrozenSet[int]]) -> str:
-            if len(cores) == 0:
-                return ''
-
-            result = list()
-            hwthreads = sorted((hwthread for core in cores for hwthread in core))
-            start = 0
-            i = 1
-            while i <= len(hwthreads):
-                if (i == len(hwthreads)) or (hwthreads[i-1] != hwthreads[i] - 1):
-                    if start == i - 1:
-                        # run of one
-                        result.append(str(hwthreads[i-1]))
-                    else:
-                        # run of at least two
-                        result.append(f'{hwthreads[start]}-{hwthreads[i-1]}')
-                    start = i
-                i += 1
-            return ','.join(result)
-
-        return 'Resources(' + '; '.join([
-            n + ': ' + collapse_ranges(cs)
-            for n, cs in self.cores.items()]) + ')'
+        # str(list()) uses repr() on the elements, we want str()
+        str_rbr = ', '.join([str(nr) for nr in self.by_rank])
+        return f'[{str_rbr}]'
 
     def __repr__(self) -> str:
-        """Return a string representation."""
-        return f'Resources({self.cores})'
-
-    def nodes(self) -> Iterable[str]:
-        """Returns the nodes on which we designate resources."""
-        return self.cores.keys()
-
-    def total_cores(self) -> int:
-        """Returns the total number of cores (not hwthreads) designated."""
-        return sum([len(cs) for cs in self.cores.values()])
-
-    def isdisjoint(self, other: 'Resources') -> bool:
-        """Returns whether we share resources with other."""
-        for node, cores in self.cores.items():
-            if node in other.cores:
-                if not cores.isdisjoint(other.cores[node]):
-                    return False
-        return True
-
-    @staticmethod
-    def union(resources: Iterable['Resources']) -> 'Resources':
-        """Combines the resources into one.
+        return f'ResourceAssignment({repr(self.by_rank)})'
 
-        Args:
-            resources: A collection of resources to merge.
-
-        Return:
-            A Resources object referring to all the resources in the
-            input.
-        """
+    def as_resources(self) -> Resources:
+        """Return a Resources representing the combined assigned resources."""
         result = Resources()
-        for cur_resources in resources:
-            result += cur_resources
+        for node_res in self.by_rank:
+            result.merge_node(node_res)
         return result
 
 
@@ -511,12 +441,12 @@ class InsufficientResourcesAvailable(RuntimeError):
 
 class Planner:
     """Allocates resources and keeps track of allocations."""
-    def __init__(self, all_resources: Resources):
-        """Create a ResourceManager.
+    def __init__(self, all_resources: Resources) -> None:
+        """Create a Planner.
 
         Args:
             all_resources: An object describing the available resources
-                    to be managed by this ResourceManager.
+                    for the planner to use.
         """
         self._all_resources = all_resources
         self._allocations: Dict[Reference, Resources] = {}
@@ -525,7 +455,7 @@ def __init__(self, all_resources: Resources):
 
     def allocate_all(
             self, configuration: Configuration, virtual: bool = False
-            ) -> Dict[Reference, Resources]:
+            ) -> Dict[Reference, ResourceAssignment]:
         """Allocates resources for the given components.
 
         Allocation can occur either on a fixed set of available
@@ -546,9 +476,9 @@ def allocate_all(
             virtual: Allocate on virtual resources or not, see above
 
         Returns:
-            Resources for each instance required by the model.
+            Assigned resources for each instance required by the model.
         """
-        result: Dict[Reference, Resources] = {}
+        result: Dict[Reference, ResourceAssignment] = {}
 
         _logger.debug(f'Planning on resources {self._all_resources}')
 
@@ -580,7 +510,7 @@ def allocate_all(
                 done = False
                 while not done:
                     try:
-                        result[instance] = self._allocate_instance(
+                        result[instance] = self._assign_instance(
                                 instance, component,
                                 requirements[component.name],
                                 conflicting_names, virtual)
@@ -686,11 +616,14 @@ def _expand_resources(
         """Adds an extra virtual node to the available resources."""
         taken = True
         while taken:
-            new_node = 'node{:06d}'.format(self._next_virtual_node)
-            taken = new_node in self._all_resources.cores
+            new_node_name = 'node{:06d}'.format(self._next_virtual_node)
+            taken = new_node_name in self._all_resources.nodes()
             self._next_virtual_node += 1
 
-        num_cores = len(next(iter(self._all_resources.cores.values())))
+        new_node = copy(next(iter(self._all_resources)))
+        new_node.node_name = new_node_name
+
+        num_cores = len(new_node.cpu_cores)
         if isinstance(req, ThreadedResReq):
             if req.threads > num_cores:
                 raise InsufficientResourcesAvailable(
@@ -704,14 +637,14 @@ def _expand_resources(
                         f' {req.threads_per_mpi_process} threads per process,'
                         f' which is impossible with {num_cores} cores per'
                         ' node.')
-        self._all_resources.cores[new_node] = {
-                frozenset([i]) for i in range(num_cores)}
 
-    def _allocate_instance(
+        self._all_resources.add_node(new_node)
+
+    def _assign_instance(
             self, instance: Reference, component: Component,
             requirements: ResourceRequirements,
             simultaneous_instances: Set[Reference], virtual: bool
-            ) -> Resources:
+            ) -> ResourceAssignment:
         """Allocates resources for the given instance.
 
         If we are on real resources, and the instance requires more
@@ -720,7 +653,7 @@ def _allocate_instance(
         resources, this will raise InsufficientResourcesAvailable.
 
         Args:
-            instance: The instance to allocate for
+            instance: The instance to assign resources to
             component: The component it is an instance of
             requirements: Its resource requirements
             simultaneous_instances: Instances which may execute
@@ -729,9 +662,9 @@ def _allocate_instance(
             virtual: Whether we are on virtual resources
 
         Returns:
-            A Resources object describing the resources allocated
+            The resources assigned to each process in the instance
         """
-        allocation = Resources({})
+        assignment = ResourceAssignment([])
         free_resources = copy(self._all_resources)
 
         for other in self._allocations:
@@ -741,8 +674,8 @@ def _allocate_instance(
         _logger.debug(f'Free resources: {free_resources}')
         try:
             if isinstance(requirements, ThreadedResReq):
-                allocation = self._allocate_thread_block(
-                        free_resources, requirements.threads)
+                assignment.by_rank.append(self._assign_thread_block(
+                        free_resources, requirements.threads))
 
             elif isinstance(requirements, MPICoresResReq):
                 if requirements.threads_per_mpi_process != 1:
@@ -750,10 +683,10 @@ def _allocate_instance(
                             'Multiple threads per MPI process is not supported'
                             ' yet. Please make an issue on GitHub.')
                 for proc in range(requirements.mpi_processes):
-                    allocation += self._allocate_thread_block(
-                            free_resources,
-                            requirements.threads_per_mpi_process)
-                    free_resources -= allocation
+                    block = self._assign_thread_block(
+                                free_resources, requirements.threads_per_mpi_process)
+                    assignment.by_rank.append(block)
+                    free_resources -= Resources([block])
 
             elif isinstance(requirements, MPINodesResReq):
                 raise RuntimeError(
@@ -764,37 +697,81 @@ def _allocate_instance(
             if not self._allocations and not virtual:
                 # There are no other allocations and it's still not
                 # enough. Just give it all and hope for the best.
-                _logger.warning((
-                        'Instance {} requires more resources than are'
-                        ' available in total. Oversubscribing this'
-                        ' instance.').format(instance))
-                allocation = copy(self._all_resources)
+                assignment = self._oversubscribe_instance(instance, requirements)
             else:
                 raise
 
-        self._allocations[instance] = allocation
-        return allocation
+        self._allocations[instance] = assignment.as_resources()
+        return assignment
 
-    def _allocate_thread_block(
-            self, free_resources: Resources, threads: int) -> Resources:
-        """Allocate resources for a group of threads.
+    def _assign_thread_block(
+            self, free_resources: Resources, num_threads: int) -> OnNodeResources:
+        """Assign resources for a group of threads.
 
-        This chooses a set of <threads> cores on the same node. It
-        returns the allocated resources; it doesn't update
-        self._allocations or free_resources.
+        This chooses a set of <num_threads> cores on the same node. It returns the
+        assigned resources; it doesn't update self._allocations or free_resources.
 
         Args:
-            threads: Number of cores
+            num_threads: Number of threads to allocate for
             free_resources: Available resources to allocate from
 
         Returns:
-            The allocated resources
+            The assigned resources
         """
-        for node in free_resources.nodes():
-            if len(free_resources.cores[node]) >= threads:
-                available_cores = sorted(free_resources.cores[node], key=sorted)
+        for node in free_resources:
+            if len(node.cpu_cores) >= num_threads:
+                available_cores = node.cpu_cores
                 _logger.debug(f'available cores: {available_cores}')
-                to_reserve = set(available_cores[:threads])
+                to_reserve = available_cores.get_first_cores(num_threads)
                 _logger.debug(f'assigned {to_reserve}')
-                return Resources({node: to_reserve})
+                return OnNodeResources(node.node_name, to_reserve)
         raise InsufficientResourcesAvailable()
+
+    def _oversubscribe_instance(
+            self, instance: Reference, requirements: ResourceRequirements
+            ) -> ResourceAssignment:
+        """Oversubscribe an instance.
+
+        This is called when all resources are available and we still cannot fit an
+        instance, i.e. that single instance requires more resources than we have
+        available in total. In that case, we're just going to map it onto the resources
+        we have and hope for the best, which is what this function does.
+
+        There's a lot of repetition between this and the code above. There's probably a
+        cleaner way to do this, but it'll do for now. Eventually we'll have an optimiser
+        and all this goes away anyway.
+
+        Args:
+            instance: The instance we're oversubscribing
+            requirements: The required resources
+
+        Returns:
+            An oversubscribed resource assignment
+        """
+        _logger.warning(
+                f'Instance {instance} requires more resources than are available in'
+                ' total. Oversubscribing this instance.')
+
+        res_by_rank: List[OnNodeResources] = list()
+
+        if isinstance(requirements, ThreadedResReq):
+            res_by_rank.append(copy(next(iter(self._all_resources))))
+
+        elif isinstance(requirements, MPICoresResReq):
+            if requirements.threads_per_mpi_process != 1:
+                raise RuntimeError(
+                        'Multiple threads per MPI process is not supported yet. Please'
+                        ' make an issue on GitHub.')
+
+            free_resources = copy(self._all_resources)
+            for proc in range(requirements.mpi_processes):
+                if free_resources.total_cores() < requirements.threads_per_mpi_process:
+                    free_resources = copy(self._all_resources)
+
+                block = self._assign_thread_block(
+                            free_resources, requirements.threads_per_mpi_process)
+
+                res_by_rank.append(block)
+                free_resources -= Resources([block])
+
+        return ResourceAssignment(res_by_rank)
diff --git a/libmuscle/python/libmuscle/planner/resources.py b/libmuscle/python/libmuscle/planner/resources.py
new file mode 100644
index 00000000..0e1dd41a
--- /dev/null
+++ b/libmuscle/python/libmuscle/planner/resources.py
@@ -0,0 +1,647 @@
+"""Module for describing compute resources
+
+There's a huge comment here because there's a big mess here that took me forever to
+figure out, so now I'm going to document it for the future.
+
+
+Identifying hardware resources
+
+Today's computers all contain multi-core CPUs, often with symmetric multithreading
+(SMT), also known as hyperthreading. This means that we have hardware threads
+(hwthreads) and also cores, and then there's caches and memory as well but we're not
+going into NUMA here.
+
+Cores and hwthreads are identified by number, but they have multiple different numbers
+that are referred to by different names in different contexts, making everything very
+confusing. So here are some definitions to disambiguate things.  Note that this is still
+a rather simplified representation, but it's enough for what we're doing here in
+MUSCLE3.
+
+
+Hardware threads
+
+A *hardware thread (hwthread)* is, at the hardware level, an instruction decoder. It
+points to wherever in the code we are currently executing, and it can read the next
+couple of instructions and figure out how to execute them. It can't actually execute
+anything however, because it doesn't have the hardware that does that.
+
+Intel refers to hwthreads as "logical processors" and so does Linux, hwloc calls them
+"processing units" or PUs and so does OpenMPI unless it uses the term hwthread just to
+confuse things a bit more.
+
+Cores
+
+A *core* contains at least one hwthread, and at least one functional unit, which is a
+hardware component that actually does calculations and other data processing. Within a
+core, the hwthread(s) read instructions and pass them to the functional units to be
+executed. If a core has more than one hwthread, then the CPU supports SMT.
+
+Intel refers to cores as "physical processors", hwloc calls them cores and so do most
+other sources. We'll use cores here.
+
+Since a hwthread cannot do anything on its own, it's always part of a core.
+
+CPUs
+
+The term CPU is used in many ways by various bits of documentation, sometimes referring
+to a hwthread or a core, but here we'll take it to mean a collection of cores in a
+plastic box. Similar terms are *package* (referring to that plastic box with very many
+metal pins) and *socket* (the thing the package mounts into), or *processor*, which was
+originally used to refer to all of the above when CPUs still had only one core with only
+one hwthread, and has now become ambiguous.
+
+Weird things can happen here, I've seen CPUs that as far as I can tell are a single
+package, but nevertheless claim to have two sockets. I suspect that that's two physical
+chips in a single plastic box, but I don't know for sure.
+
+Here, we're concerned with hwthreads and cores and how to identify them and assign
+instances to them.
+
+
+Linux
+
+On modern operating systems, hardware access is mediated by the operating system, and
+we're mainly concerned with Linux here because that is what all the clusters are running
+(see the note on macOS below). Information about the CPU(s) can be obtained on Linux
+from the /proc/cpuinfo file, or equivalently but more modernly, from the files in
+/sys/devices/system/cpu/cpu<x>/topology/.
+
+Linux collects information about processors because it needs to run processes (programs,
+software threads) on them on behalf of the user. Processes are assigned to hwthreads, so
+that is what Linux considers a *processor*.  /proc/cpuinfo lists all these processors,
+and they each have their own directory /sys/devices/system/cpu/cpu<x>.
+
+On Linux, processors have an id, which is that number <x> in the directory, and is
+listed under "processor" in /proc/cpuinfo. Since this number identifies a hwthread and
+is assigned by Linux rather than being baked into the hardware, I'm calling it a
+"logical hwthread id", this being a logical id of a hwthread, not an id of a logical
+hwthread. It's also the id of a logical processor in Intel-speak.
+
+Hwthreads actually have a second number associated with them, which does come from the
+hardware. In /proc/cpuinfo, that's listed under "apicid"; it doesn't seem to be
+available from sysfs. Hwloc call this the "physical PU (its name for a hwthread) id",
+and OpenMPI's mpirun manpage also refers to it as a "physical processor location".
+
+There's great potential for confusion here: the "physical PU id" and "physical processor
+location" both identify a hardware-specified number (a physical id or a physical
+location) for a hwthread. This is something completely different than what Intel calls a
+"physical processor", which they use to refer to a core.
+
+MUSCLE3 uses logical hwthread ids everywhere, it does not use physical ids.
+
+Linux knows about how hwthreads are grouped into bigger things of course. Cores are
+identified in Linux using the "core id", which is listed in /proc/cpuinfo and in
+/sys/devices/system/cpu/cpu<x>/topology/core_id. So for each hwthread, identified by its
+logical id, we can look up which core it is a part of. The core id is a logical id,
+assigned by Linux, not by the hardware.  While logical hwthread ids seem to always be
+consecutive at least on the hardware I've seen so far, core ids may have gaps.
+
+MUSCLE3 does not use core ids, although it uses groups of hwthread ids that contain all
+the hwthreads for a given core.
+
+
+Resource binding
+
+Running processes need something to run on, a hwthread. The assignment of process to
+hwthread is done by the operating system's scheduler: when a process is ready to run,
+the scheduler will try to find it a free hwthread to run on.
+
+The scheduler can be constrained in which hwthreads it considers for a given process,
+which is known as binding the process. This may have performance benefits, because
+moving a process from one hwthread to another takes time. In MUSCLE3, when running on a
+cluster, each process is assigned its own specific set of hwthreads to run on, and we
+try to bind the instance to the assigned hwthreads.
+
+Taskset
+
+How this is done depends on how the instance is started. For non-MPI instances, we use a
+Linux utility named 'taskset' that starts another program with a giving binding. The
+binding is expressed as an *affinity mask*, a string of bits that say whether a given
+processor (hwthread) can be used by the process or not. Each position in the string of
+bits corresponds to the hwthread with that logical id.
+
+OpenMPI
+
+OpenMPI can bind cores in various ways, we use a rankfile and the --use-hwthread-cpus
+option to specify the logical hwthread ids we want to bind each MPI process (rank) to.
+Note that OpenMPI by default binds to cores, and can also bind to various other things
+including sockets.
+
+MPICH
+
+MPICH doesn't support binding, as far as I can see.
+
+Intel MPI
+
+Intel MPI uses logical hwthread ids-based masks, specified in an environment variable,
+to go with a machinefile that lists the nodes to put each process on.
+
+Slurm srun
+
+Slurm's srun has a CPU_BIND environment variable that likewise contains logical hwthread
+ids-based masks, and a hostfile that lists the nodes to put each process on.
+
+Here are some disambiguation tables to help with the confusion:
+
+
+```
+MUSCLE3     hwthread        logical hwthread id         physical hwthread id
+
+Linux       processor       processor                   apicid
+                                                        (/proc/cpuinfo only)
+
+cgroups                     always uses these
+
+taskset                     always uses these
+
+hwloc       PU              PU L#<x>                    PU P#<x>
+
+OpenMPI     hwthread        used in rankfile if         used in rankfile if
+                            --use-hwthread-cpus         rmaps_rank_file_physical
+                            is specified                MCA param set
+
+Intel       logical         logical processor
+            processor       number
+
+srun                        used by --bind-to
+
+psutil      logical         returned by Process.cpu_affinity()
+            core            counted by psutil.cpu_count(logical=True)
+```
+
+
+```
+MUSCLE3     core            core id
+
+Linux       core            core id
+
+Hwloc       core            core L#<x>
+
+OpenMPI     core            used in rankfile if
+                            --use-hwthread-cpus not
+                            specified
+
+psutil      physical        counted by psutil.cpu_count(logical=False)
+            core
+```
+
+"""
+from copy import copy, deepcopy
+from typing import Dict, Iterable, Iterator, List, Optional, Set, Tuple
+
+
+class Core:
+    """Describes a CPU core or designates a core or one or more hwthreads.
+
+    A core is a group of functional units with one or more instruction decoders. If the
+    core supports symmetric multithreading (SMT, aka hyperthreading) then there will be
+    more than one instruction decoder or hardware thread in the core.
+
+    Note that the term "logical CPU" refers to an instruction decoder/hwthread. If the
+    processor does not support SMT, then each core has a single decoder and so a logical
+    CPU is also a core.
+
+    This class can be used in different ways with slighly different interpretations.
+    When describing hardware resources, it describes a core and all of its hwthreads. In
+    this case, cid is the core id, and hwthreads contains the hwthread ids of all
+    hwthreads on this core. If no SMT is supported, then there will be only one
+    hwthread id.
+
+    When designating a whole core (e.g. for use by a process), cid is set to the id of
+    the core, and hwthreads contains all of the hwthreads on that core. When designating
+    a hwthread on a particular core, cid is set to the id of the core and hwthreads
+    contains the designated (single) hwthread.
+
+    MUSCLE3 never assigns swthreads to subsets of hwthreads on a core, it assigns them
+    to either a single hwthread or a single whole core. So if more than one hwthread is
+    given, then we can assume that those are all the hwthreads on that core.
+
+    Objects of this class automatically deepcopy when copied. This means that you can
+    make a copy using copy.copy() and modify that copy anywhere without changing the
+    original.
+
+    Args:
+        cid: ID of this core, to be used to refer to it
+        hwthreads: Ids of hwthreads (logical CPUs) belonging to this core
+    """
+    def __init__(self, cid: int, hwthreads: Set[int]) -> None:
+        """Create a Core"""
+        self.cid = cid
+        self.hwthreads = copy(hwthreads)
+
+    def __eq__(self, other: object) -> bool:
+        if not isinstance(other, Core):
+            return NotImplemented
+
+        return self.cid == other.cid and self.hwthreads == other.hwthreads
+
+    def __len__(self) -> int:
+        return len(self.hwthreads)
+
+    def __copy__(self) -> 'Core':
+        return Core(self.cid, self.hwthreads)
+
+    def __or__(self, other: object) -> 'Core':
+        if not isinstance(other, Core):
+            return NotImplemented
+
+        if other.cid != self.cid:
+            raise ValueError('Cannot merge hwthreads on different cores')
+
+        return Core(self.cid, self.hwthreads | other.hwthreads)
+
+    def __ior__(self, other: object) -> 'Core':
+        if not isinstance(other, Core):
+            return NotImplemented
+
+        if other.cid != self.cid:
+            raise ValueError('Cannot merge hwthreads on different cores')
+
+        self.hwthreads |= other.hwthreads
+        return self
+
+    def __isub__(self, other: object) -> 'Core':
+        if not isinstance(other, Core):
+            return NotImplemented
+
+        if other.cid != self.cid:
+            raise ValueError('Cannot merge hwthreads on different cores')
+
+        self.hwthreads -= other.hwthreads
+        return self
+
+    def __str__(self) -> str:
+        hwthreads = ','.join(map(str, sorted(self.hwthreads)))
+        return f'{self.cid}({hwthreads})'
+
+    def __repr__(self) -> str:
+        hwthreads = ','.join(map(str, sorted(self.hwthreads)))
+        return f'Core({self.cid}, {{{hwthreads}}})'
+
+    def isdisjoint(self, other: 'Core') -> bool:
+        """Returns whether we share resources with other."""
+        if self.cid != other.cid:
+            raise ValueError('Cannot compare hwthreads on different cores')
+
+        return self.hwthreads.isdisjoint(other.hwthreads)
+
+
+class CoreSet:
+    """A set of cores on a single node.
+
+    This exists to make it a bit easier to operate on sets of cores, merging and
+    subtracting them.
+
+    Objects of this class automatically deepcopy when copied. This means that you can
+    make a copy using copy.copy() and modify that copy anywhere without changing the
+    original.
+    """
+    def __init__(self, cores: Iterable[Core]) -> None:
+        """Create a CoreSet
+
+        Args:
+            cores: A set of cores to contain.
+        """
+        self._cores = {c.cid: c for c in cores}
+
+    def __eq__(self, other: object) -> bool:
+        if not isinstance(other, CoreSet):
+            return NotImplemented
+
+        if len(self._cores) != len(other._cores):
+            return False
+
+        for cid, core in self._cores.items():
+            if cid not in other._cores:
+                return False
+            if core.hwthreads != other._cores[cid].hwthreads:
+                return False
+
+        return True
+
+    def __len__(self) -> int:
+        return len(self._cores)
+
+    def __iter__(self) -> Iterator[Core]:
+        return iter(self._cores.values())
+
+    def __copy__(self) -> 'CoreSet':
+        return CoreSet(deepcopy(list(self._cores.values())))
+
+    def __ior__(self, other: object) -> 'CoreSet':
+        if not isinstance(other, CoreSet):
+            return NotImplemented
+
+        for cid, core in other._cores.items():
+            if cid in self._cores:
+                self._cores[cid] |= core
+            else:
+                self._cores[cid] = copy(core)
+
+        return self
+
+    def __isub__(self, other: object) -> 'CoreSet':
+        if not isinstance(other, CoreSet):
+            return NotImplemented
+
+        for cid, core in other._cores.items():
+            if cid in self._cores:
+                self._cores[cid] -= core
+                if not self._cores[cid].hwthreads:
+                    del self._cores[cid]
+
+        return self
+
+    def __str__(self) -> str:
+        def collapse_ranges(ids: List[int]) -> str:
+            if len(ids) == 0:
+                return ''
+
+            result = list()
+            start = 0
+            i = 1
+            while i <= len(ids):
+                if (i == len(ids)) or (ids[i-1] != ids[i] - 1):
+                    if start == i - 1:
+                        # run of one
+                        result.append(str(ids[i-1]))
+                    else:
+                        # run of at least two
+                        result.append(f'{ids[start]}-{ids[i-1]}')
+                    start = i
+                i += 1
+            return ','.join(result)
+
+        cores = sorted((c.cid for c in self._cores.values()))
+        hwthreads = sorted((t for c in self._cores.values() for t in c.hwthreads))
+
+        return f'{collapse_ranges(cores)}({collapse_ranges(hwthreads)})'
+
+    def __repr__(self) -> str:
+        cores = ', '.join(map(repr, sorted(self._cores.values(), key=lambda c: c.cid)))
+        return f'CoreSet({{{cores}}})'
+
+    def isdisjoint(self, other: 'CoreSet') -> bool:
+        """Returns whether we share resources with other."""
+        for cid, core in self._cores.items():
+            if cid in other._cores:
+                if not core.isdisjoint(other._cores[cid]):
+                    return False
+        return True
+
+    def get_first_cores(self, num_cores: int) -> 'CoreSet':
+        """Returns the first num_cores cores in this set.
+
+        Args:
+            The number of cores to select.
+        """
+        result = copy(self)
+        cids = list(self._cores.keys())
+        selected = cids[:num_cores]
+        if len(selected) < num_cores:
+            raise RuntimeError('Tried to get more cores than available')
+
+        result._cores = {c.cid: c for c in result._cores.values() if c.cid in selected}
+        return result
+
+
+class OnNodeResources:
+    """Resources on a single node, currently only CPU cores.
+
+    This represents a set of resources on a single node, either all of the resources
+    available or some subset of interest.
+
+    Objects of this class automatically deepcopy when copied. This means that you can
+    make a copy using copy.copy() and modify that copy anywhere without changing the
+    original.
+    """
+    def __init__(self, node_name: str, cpu_cores: CoreSet) -> None:
+        """Create an OnNodeResources.
+
+        Args:
+            name: (Host)name of the node.
+            cpu_cores: A set of cores for this node.
+        """
+        self.node_name = node_name
+        self.cpu_cores = cpu_cores
+
+    def __eq__(self, other: object) -> bool:
+        if not isinstance(other, OnNodeResources):
+            return NotImplemented
+
+        return (
+                isinstance(other, OnNodeResources) and
+                self.node_name == other.node_name and
+                self.cpu_cores == other.cpu_cores)
+
+    def __copy__(self) -> 'OnNodeResources':
+        return OnNodeResources(self.node_name, copy(self.cpu_cores))
+
+    def __ior__(self, other: object) -> 'OnNodeResources':
+        if not isinstance(other, OnNodeResources):
+            return NotImplemented
+
+        if self.node_name != other.node_name:
+            raise ValueError('Cannot merge resources on different nodes')
+
+        self.cpu_cores |= other.cpu_cores
+        return self
+
+    def __isub__(self, other: object) -> 'OnNodeResources':
+        if not isinstance(other, OnNodeResources):
+            return NotImplemented
+
+        if self.node_name != other.node_name:
+            raise ValueError('Cannot remove resources on different nodes')
+
+        self.cpu_cores -= other.cpu_cores
+        return self
+
+    def __str__(self) -> str:
+        return f'OnNodeResources({self.node_name}, c: {str(self.cpu_cores)})'
+
+    def __repr__(self) -> str:
+        return f'OnNodeResources("{self.node_name}", {repr(self.cpu_cores)})'
+
+    def hwthreads(self) -> Iterable[int]:
+        """Return the hwthreads in this node."""
+        return (thread for core in self.cpu_cores for thread in core.hwthreads)
+
+    def total_cores(self) -> int:
+        """Return the number of CPU cores in this node."""
+        return len(self.cpu_cores)
+
+    def isdisjoint(self, other: 'OnNodeResources') -> bool:
+        """Returns whether we share resources with other."""
+        return (
+                self.node_name != other.node_name or
+                self.cpu_cores.isdisjoint(other.cpu_cores))
+
+
+class Resources:
+    """Designates a (sub)set of resources.
+
+    Whether these resources are free or allocated in general or by something specific
+    depends on the context, this just says which resources we're talking about.
+
+    Objects of this class automatically deepcopy when copied. This means that you can
+    make a copy using copy.copy() and modify that copy anywhere without changing the
+    original.
+
+    Attributes:
+        nodes: A collection of nodes to include in this resource set
+    """
+    def __init__(self, nodes: Optional[Iterable[OnNodeResources]] = None) -> None:
+        """Create a Resources object with the given nodes.
+
+        Args:
+            nodes: OnNodeResourcess to be designated by this object.
+        """
+        if nodes is None:
+            self._nodes: Dict[str, OnNodeResources] = {}
+        else:
+            self._nodes = {n.node_name: n for n in nodes}
+
+    def __len__(self) -> int:
+        return len(self._nodes)
+
+    def __iter__(self) -> Iterator[OnNodeResources]:
+        return iter(self._nodes.values())
+
+    def __getitem__(self, node_name: str) -> OnNodeResources:
+        return self._nodes[node_name]
+
+    def __eq__(self, other: object) -> bool:
+        """Check for equality."""
+        if not isinstance(other, Resources):
+            return NotImplemented
+
+        if len(self._nodes) != len(other._nodes):
+            return False
+
+        for node_name, node in self._nodes.items():
+            if node_name not in other._nodes:
+                return False
+            if other._nodes[node_name] != node:
+                return False
+
+        return True
+
+    def __copy__(self) -> 'Resources':
+        """Copy the object."""
+        return Resources((copy(n) for n in self._nodes.values()))
+
+    def __ior__(self, other: object) -> 'Resources':
+        """Add the resources in the argument to this object."""
+        if not isinstance(other, Resources):
+            return NotImplemented
+
+        for node_name, other_node in other._nodes.items():
+            if node_name in self._nodes:
+                self._nodes[node_name] |= other_node
+            else:
+                self._nodes[node_name] = copy(other_node)
+
+        return self
+
+    def __isub__(self, other: object) -> 'Resources':
+        """Remove the resources in the argument from this object."""
+        if not isinstance(other, Resources):
+            return NotImplemented
+
+        for node_name, other_node in other._nodes.items():
+            if node_name in self._nodes:
+                self._nodes[node_name] -= other_node
+                if not self._nodes[node_name]:
+                    del self._nodes[node_name]
+
+        return self
+
+    def __str__(self) -> str:
+        """Return a human-readable string representation."""
+        nodes = ','.join(
+                map(str, sorted(self._nodes.values(), key=lambda n: n.node_name)))
+        return f'Resources({nodes})'
+
+    def __repr__(self) -> str:
+        """Return a string representation."""
+        nodes = sorted(self._nodes.values(), key=lambda n: n.node_name)
+        return f'Resources({nodes})'
+
+    def nodes(self) -> Iterable[str]:
+        """Return the names of the nodes on which we designate resources."""
+        return self._nodes.keys()
+
+    def total_cores(self) -> int:
+        """Return the total number of cores (not hwthreads) designated."""
+        return sum((len(n.cpu_cores) for n in self._nodes.values()))
+
+    def cores(self) -> Iterable[Tuple[str, int]]:
+        """Return this resources as a list of node, core."""
+        return (
+                (node.node_name, core.cid)
+                for node in self._nodes.values() for core in node.cpu_cores)
+
+    def hwthreads(self) -> Iterable[Tuple[str, int]]:
+        """Return this resources as a list of node, hwthread."""
+        return (
+                (node.node_name, hwthread)
+                for node in self._nodes.values() for hwthread in node.hwthreads())
+
+    def isdisjoint(self, other: 'Resources') -> bool:
+        """Return whether we share resources with other."""
+        for node_name, node in self._nodes.items():
+            if node_name in other._nodes:
+                if not node.isdisjoint(other._nodes[node_name]):
+                    return False
+        return True
+
+    def add_node(self, node_res: OnNodeResources) -> None:
+        """Add a node's resources.
+
+        This absorbs node_res into this Resources object, so if you change node_res
+        after adding it, the changes will be reflected in this Resources.
+
+        Args:
+            node_res: Resources on a node not yet included in this Resources.
+
+        Raises:
+            RuntimeError: if we already have a node with this node name.
+        """
+        if node_res.node_name in self._nodes:
+            raise RuntimeError(
+                    'Tried to add a OnNodeResources to a Resources for a node that is'
+                    ' already present. This is a bug in MUSCLE3, please report it on'
+                    ' GitHub.')
+
+        self._nodes[node_res.node_name] = node_res
+
+    def merge_node(self, node_res: OnNodeResources) -> None:
+        """Merges a node's resources
+
+        This always copies the object.
+
+        Args:
+            node_res: Resources on a node that may already be included in this
+                    Resources.
+        """
+        if node_res.node_name in self._nodes:
+            self._nodes[node_res.node_name] |= node_res
+        else:
+            self._nodes[node_res.node_name] = copy(node_res)
+
+    @staticmethod
+    def union(resources: Iterable['Resources']) -> 'Resources':
+        """Combines the resources into one.
+
+        Args:
+            resources: A collection of resources to merge.
+
+        Return:
+            A Resources object referring to all the resources in the
+            input.
+        """
+        result = Resources()
+        for cur_resources in resources:
+            result |= cur_resources
+        return result
diff --git a/libmuscle/python/libmuscle/planner/test/test_planner.py b/libmuscle/python/libmuscle/planner/test/test_planner.py
index 25883aab..273b0c7f 100644
--- a/libmuscle/python/libmuscle/planner/test/test_planner.py
+++ b/libmuscle/python/libmuscle/planner/test/test_planner.py
@@ -1,7 +1,3 @@
-from libmuscle.planner.planner import (
-        InsufficientResourcesAvailable, ModelGraph, Planner, Resources)
-
-from copy import copy
 import pytest
 from typing import Dict, List
 
@@ -9,15 +5,22 @@
         Component, Conduit, Configuration, Implementation, Model,
         MPICoresResReq, Ports, Reference, ResourceRequirements, ThreadedResReq)
 
-from libmuscle.test.conftest import frozenset_of as s
+from libmuscle.planner.planner import (
+        InsufficientResourcesAvailable, ModelGraph, Planner, ResourceAssignment)
+from libmuscle.planner.resources import Resources
+
+from libmuscle.test.conftest import core as c, on_node_resources as onr, resources
+
+
+Ref = Reference
 
 
 @pytest.fixture
 def all_resources() -> Resources:
-    return Resources({
-        'node001': {s(1), s(2), s(3), s(4)},
-        'node002': {s(1), s(2), s(3), s(4)},
-        'node003': {s(1), s(2), s(3), s(4)}})
+    return resources({
+        'node001': [c(1), c(2), c(3), c(4)],
+        'node002': [c(1), c(2), c(3), c(4)],
+        'node003': [c(1), c(2), c(3), c(4)]})
 
 
 @pytest.fixture
@@ -51,17 +54,17 @@ def model(init: Component, macro: Component, micro: Component) -> Model:
 @pytest.fixture
 def implementations() -> List[Implementation]:
     return [
-            Implementation(Reference('init'), script='init'),
-            Implementation(Reference('macro'), script='macro'),
-            Implementation(Reference('micro'), script='micro')]
+            Implementation(Ref('init'), script='init'),
+            Implementation(Ref('macro'), script='macro'),
+            Implementation(Ref('micro'), script='micro')]
 
 
 @pytest.fixture
 def requirements() -> Dict[Reference, ResourceRequirements]:
     res_list = [
-            ThreadedResReq(Reference('init'), 4),
-            ThreadedResReq(Reference('macro'), 4),
-            ThreadedResReq(Reference('micro'), 4)]
+            ThreadedResReq(Ref('init'), 4),
+            ThreadedResReq(Ref('macro'), 4),
+            ThreadedResReq(Ref('micro'), 4)]
     return {r.name: r for r in res_list}
 
 
@@ -72,6 +75,13 @@ def configuration(
     return Configuration(model, None, implementations, requirements)
 
 
+@pytest.fixture
+def assignment() -> ResourceAssignment:
+    return ResourceAssignment([
+        onr('node001', {0, 1}),
+        onr('node002', {2, 3})])
+
+
 def test_model_graph(
         init: Component, macro: Component, micro: Component, model: Model
         ) -> None:
@@ -95,51 +105,51 @@ def test_model_graph(
     assert not graph.successors(micro)
 
 
-def test_resources(all_resources: Resources) -> None:
-    res1 = all_resources
-    assert res1.cores == {
-            'node001': {s(1), s(2), s(3), s(4)},
-            'node002': {s(1), s(2), s(3), s(4)},
-            'node003': {s(1), s(2), s(3), s(4)}}
-    assert set(res1.nodes()) == {'node001', 'node002', 'node003'}
+def test_resource_assignment_eq() -> None:
+    asm1 = ResourceAssignment([])
+    asm2 = ResourceAssignment([])
+
+    assert asm1 == asm2
 
-    res2 = Resources({
-        'node004': {s(1), s(2), s(3), s(4), s(5), s(6)},
-        'node005': {s(1), s(2), s(3), s(4), s(5), s(6)}})
-    res1 += res2
+    asm1.by_rank.append(onr('node001', {0, 1}))
+    assert asm1 != asm2
 
-    assert res1.cores == {
-            'node001': {s(1), s(2), s(3), s(4)},
-            'node002': {s(1), s(2), s(3), s(4)},
-            'node003': {s(1), s(2), s(3), s(4)},
-            'node004': {s(1), s(2), s(3), s(4), s(5), s(6)},
-            'node005': {s(1), s(2), s(3), s(4), s(5), s(6)}}
+    asm2.by_rank.append(onr('node001', {0, 2}))
+    assert asm1 != asm2
 
-    res3 = Resources({
-        'node003': {s(1), s(2), s(3), s(4)}, 'node005': {s(4), s(5), s(6)}})
-    res1 -= res3
+    asm2.by_rank[0] = onr('node001', {0, 1})
+    assert asm1 == asm2
 
-    assert res1.cores == {
-            'node001': {s(1), s(2), s(3), s(4)},
-            'node002': {s(1), s(2), s(3), s(4)},
-            'node004': {s(1), s(2), s(3), s(4), s(5), s(6)},
-            'node005': {s(1), s(2), s(3)}}
-    assert res1.nodes() == {
-            'node001', 'node002', 'node004', 'node005'}
 
-    res4 = copy(res3)
-    res4.cores['node003'] = {s(8)}
+def test_resource_assignment_str(assignment: ResourceAssignment) -> None:
+    assert str(assignment) == (
+            '[OnNodeResources(node001, c: 0-1(0-1)),'
+            ' OnNodeResources(node002, c: 2-3(2-3))]')
 
-    assert res3.cores['node003'] == {s(1), s(2), s(3), s(4)}
-    assert res4.cores['node003'] == {s(8)}
 
-    all_resources = Resources.union([res1, res2, res3, res4])
+def test_resource_assignment_repr(assignment: ResourceAssignment) -> None:
+    assert repr(assignment) == (
+            'ResourceAssignment(['
+            'OnNodeResources("node001", CoreSet({Core(0, {0}), Core(1, {1})})),'
+            ' OnNodeResources("node002", CoreSet({Core(2, {2}), Core(3, {3})}))])')
 
-    assert all_resources.cores['node001'] == {s(1), s(2), s(3), s(4)}
-    assert all_resources.cores['node002'] == {s(1), s(2), s(3), s(4)}
-    assert all_resources.cores['node003'] == {s(1), s(2), s(3), s(4), s(8)}
-    assert all_resources.cores['node004'] == {s(1), s(2), s(3), s(4), s(5), s(6)}
-    assert all_resources.cores['node005'] == {s(1), s(2), s(3), s(4), s(5), s(6)}
+
+def test_resource_assignment_as_resources(assignment) -> None:
+    res = assignment.as_resources()
+
+    assert res._nodes == {
+            'node001': onr('node001', {0, 1}),
+            'node002': onr('node002', {2, 3})}
+
+    asm2 = ResourceAssignment([
+        onr('node001', {0, 1}), onr('node001', {2, 3}), onr('node001', {2, 3}),
+        onr('node003', {4, 5})])
+
+    res = asm2.as_resources()
+
+    assert res._nodes == {
+            'node001': onr('node001', {0, 1, 2, 3}),
+            'node003': onr('node003', {4, 5})}
 
 
 def test_planner(
@@ -147,42 +157,31 @@ def test_planner(
     planner = Planner(all_resources)
     allocations = planner.allocate_all(configuration)
 
-    assert allocations[Reference('init')].cores == {
-            'node001': {s(1), s(2), s(3), s(4)}}
-    assert allocations[Reference('macro')].cores == {
-            'node001': {s(1), s(2), s(3), s(4)}}
-    assert allocations[Reference('micro')].cores == {
-            'node001': {s(1), s(2), s(3), s(4)}}
+    assert allocations[Ref('init')].by_rank == [onr('node001', {1, 2, 3, 4})]
+    assert allocations[Ref('macro')].by_rank == [onr('node001', {1, 2, 3, 4})]
+    assert allocations[Ref('micro')].by_rank == [onr('node001', {1, 2, 3, 4})]
 
 
 def test_planner_exclusive_macro(
         all_resources: Resources, configuration: Configuration) -> None:
     planner = Planner(all_resources)
-    configuration.implementations[Reference('macro')].can_share_resources = (
-            False)
+    configuration.implementations[Ref('macro')].can_share_resources = False
     allocations = planner.allocate_all(configuration)
 
-    assert allocations[Reference('init')].cores == {
-            'node001': {s(1), s(2), s(3), s(4)}}
-    assert allocations[Reference('macro')].cores == {
-            'node002': {s(1), s(2), s(3), s(4)}}
-    assert allocations[Reference('micro')].cores == {
-            'node001': {s(1), s(2), s(3), s(4)}}
+    assert allocations[Ref('init')].by_rank == [onr('node001', {1, 2, 3, 4})]
+    assert allocations[Ref('macro')].by_rank == [onr('node002', {1, 2, 3, 4})]
+    assert allocations[Ref('micro')].by_rank == [onr('node001', {1, 2, 3, 4})]
 
 
 def test_planner_exclusive_predecessor(
         all_resources: Resources, configuration: Configuration) -> None:
     planner = Planner(all_resources)
-    configuration.implementations[Reference('init')].can_share_resources = (
-            False)
+    configuration.implementations[Ref('init')].can_share_resources = False
     allocations = planner.allocate_all(configuration)
 
-    assert allocations[Reference('init')].cores == {
-            'node001': {s(1), s(2), s(3), s(4)}}
-    assert allocations[Reference('macro')].cores == {
-            'node001': {s(1), s(2), s(3), s(4)}}
-    assert allocations[Reference('micro')].cores == {
-            'node001': {s(1), s(2), s(3), s(4)}}
+    assert allocations[Ref('init')].by_rank == [onr('node001', {1, 2, 3, 4})]
+    assert allocations[Ref('macro')].by_rank == [onr('node001', {1, 2, 3, 4})]
+    assert allocations[Ref('micro')].by_rank == [onr('node001', {1, 2, 3, 4})]
 
 
 def test_oversubscribe(
@@ -194,97 +193,84 @@ def test_oversubscribe(
     planner = Planner(all_resources)
     allocations = planner.allocate_all(configuration)
 
-    assert allocations[Reference('init[0]')].cores == {
-            'node001': {s(1), s(2), s(3), s(4)}}
-    assert allocations[Reference('init[1]')].cores == {
-            'node002': {s(1), s(2), s(3), s(4)}}
-    assert allocations[Reference('init[2]')].cores == {
-            'node003': {s(1), s(2), s(3), s(4)}}
-    assert allocations[Reference('init[3]')].cores == {
-            'node001': {s(1), s(2), s(3), s(4)}}
-    assert allocations[Reference('init[4]')].cores == {
-            'node002': {s(1), s(2), s(3), s(4)}}
-
-    assert allocations[Reference('macro[0]')].cores == {
-            'node001': {s(1), s(2), s(3), s(4)}}
-    assert allocations[Reference('macro[1]')].cores == {
-            'node002': {s(1), s(2), s(3), s(4)}}
-    assert allocations[Reference('macro[2]')].cores == {
-            'node003': {s(1), s(2), s(3), s(4)}}
-    assert allocations[Reference('macro[3]')].cores == {
-            'node001': {s(1), s(2), s(3), s(4)}}
-    assert allocations[Reference('macro[4]')].cores == {
-            'node002': {s(1), s(2), s(3), s(4)}}
-
-    assert allocations[Reference('micro[0]')].cores == {
-            'node001': {s(1), s(2), s(3), s(4)}}
-    assert allocations[Reference('micro[1]')].cores == {
-            'node002': {s(1), s(2), s(3), s(4)}}
-    assert allocations[Reference('micro[2]')].cores == {
-            'node003': {s(1), s(2), s(3), s(4)}}
-    assert allocations[Reference('micro[3]')].cores == {
-            'node001': {s(1), s(2), s(3), s(4)}}
-    assert allocations[Reference('micro[4]')].cores == {
-            'node002': {s(1), s(2), s(3), s(4)}}
+    assert allocations[Ref('init[0]')].by_rank == [onr('node001', {1, 2, 3, 4})]
+    assert allocations[Ref('init[1]')].by_rank == [onr('node002', {1, 2, 3, 4})]
+    assert allocations[Ref('init[2]')].by_rank == [onr('node003', {1, 2, 3, 4})]
+    assert allocations[Ref('init[3]')].by_rank == [onr('node001', {1, 2, 3, 4})]
+    assert allocations[Ref('init[4]')].by_rank == [onr('node002', {1, 2, 3, 4})]
+
+    assert allocations[Ref('macro[0]')].by_rank == [onr('node001', {1, 2, 3, 4})]
+    assert allocations[Ref('macro[1]')].by_rank == [onr('node002', {1, 2, 3, 4})]
+    assert allocations[Ref('macro[2]')].by_rank == [onr('node003', {1, 2, 3, 4})]
+    assert allocations[Ref('macro[3]')].by_rank == [onr('node001', {1, 2, 3, 4})]
+    assert allocations[Ref('macro[4]')].by_rank == [onr('node002', {1, 2, 3, 4})]
+
+    assert allocations[Ref('micro[0]')].by_rank == [onr('node001', {1, 2, 3, 4})]
+    assert allocations[Ref('micro[1]')].by_rank == [onr('node002', {1, 2, 3, 4})]
+    assert allocations[Ref('micro[2]')].by_rank == [onr('node003', {1, 2, 3, 4})]
+    assert allocations[Ref('micro[3]')].by_rank == [onr('node001', {1, 2, 3, 4})]
+    assert allocations[Ref('micro[4]')].by_rank == [onr('node002', {1, 2, 3, 4})]
 
 
 def test_oversubscribe_single_instance_threaded() -> None:
     model = Model('single_instance', [Component('x', 'x', ports=Ports())])
-    impl = [Implementation(Reference('x'), script='x')]
+    impl = [Implementation(Ref('x'), script='x')]
     reqs: Dict[Reference, ResourceRequirements] = {
-            Reference('x'): ThreadedResReq(Reference('x'), 24)}
+            Ref('x'): ThreadedResReq(Ref('x'), 24)}
     config = Configuration(model, None, impl, reqs)
 
-    res = Resources({'node001': {s(1), s(2), s(3), s(4)}})
+    res = resources({'node001': [c(1), c(2), c(3), c(4)]})
 
     planner = Planner(res)
     allocations = planner.allocate_all(config)
 
-    assert allocations[Reference('x')].cores == {
-            'node001': {s(1), s(2), s(3), s(4)}}
+    assert allocations[Ref('x')].by_rank == [onr('node001', {1, 2, 3, 4})]
 
 
 def test_oversubscribe_single_instance_mpi() -> None:
     model = Model('single_instance', [Component('x', 'x', ports=Ports())])
-    impl = [Implementation(Reference('x'), script='x')]
+    impl = [Implementation(Ref('x'), script='x')]
     reqs: Dict[Reference, ResourceRequirements] = {
-            Reference('x'): MPICoresResReq(Reference('x'), 24)}
+            Ref('x'): MPICoresResReq(Ref('x'), 24)}
     config = Configuration(model, None, impl, reqs)
 
-    res = Resources({'node001': {s(1), s(2), s(3), s(4)}})
+    res = resources({'node001': [c(1), c(2), c(3), c(4)]})
 
     planner = Planner(res)
     allocations = planner.allocate_all(config)
 
-    assert allocations[Reference('x')].cores == {
-            'node001': {s(1), s(2), s(3), s(4)}}
+    assert len(allocations[Ref('x')].by_rank) == 24
+    for r in range(24):
+        assert allocations[Ref('x')].by_rank[r] == onr('node001', {r % 4 + 1})
 
 
 def test_virtual_allocation() -> None:
     model = Model('ensemble', [Component('x', 'x', 9, ports=Ports())])
-    impl = [Implementation(Reference('x'), script='x')]
-    reqs: Dict[Reference, ResourceRequirements] = {
-            Reference('x'): MPICoresResReq(Reference('x'), 13)}
+    impl = [Implementation(Ref('x'), script='x')]
+    reqs: Dict[Ref, ResourceRequirements] = {
+            Ref('x'): MPICoresResReq(Ref('x'), 13)}
     config = Configuration(model, None, impl, reqs)
 
-    res = Resources({'node000001': {s(1), s(2), s(3), s(4)}})
+    res = resources({'node000001': [c(1), c(2), c(3), c(4)]})
 
     planner = Planner(res)
     allocations = planner.allocate_all(config, virtual=True)
 
     assert res.total_cores() == 120
-    assert allocations[Reference('x[0]')].total_cores() == 13
-    assert allocations[Reference('x[8]')].total_cores() == 13
+    for i in range(9):
+        for r in range(13):
+            assert len(allocations[Ref(f'x[{i}]')].by_rank) == 13
+            assert allocations[Ref(f'x[{i}]')].by_rank[r].total_cores() == 1
 
 
 def test_impossible_virtual_allocation() -> None:
     model = Model('ensemble', [Component('x', 'x', 9, ports=Ports())])
-    impl = [Implementation(Reference('x'), script='x')]
-    reqs: Dict[Reference, ResourceRequirements] = {
-            Reference('x'): ThreadedResReq(Reference('x'), 13)}
+    impl = [Implementation(Ref('x'), script='x')]
+    reqs: Dict[Ref, ResourceRequirements] = {
+            Ref('x'): ThreadedResReq(Ref('x'), 13)}
     config = Configuration(model, None, impl, reqs)
 
-    res = Resources({'node000001': {s(1), s(2), s(3), s(4)}})
+    res = resources({'node000001': [c(1), c(2), c(3), c(4)]})
 
     planner = Planner(res)
     with pytest.raises(InsufficientResourcesAvailable):
diff --git a/libmuscle/python/libmuscle/planner/test/test_planner_scenarios.py b/libmuscle/python/libmuscle/planner/test/test_planner_scenarios.py
index f1f5b02a..13ec5ce3 100644
--- a/libmuscle/python/libmuscle/planner/test/test_planner_scenarios.py
+++ b/libmuscle/python/libmuscle/planner/test/test_planner_scenarios.py
@@ -1,17 +1,15 @@
 from copy import deepcopy
-from libmuscle.planner.planner import ModelGraph, Planner, Resources
-
-from typing import Dict, FrozenSet, Tuple
+from typing import Dict, Tuple
 
 import pytest
 from ymmsl import (
         Component, Conduit, Configuration, Implementation, Model,
         MPICoresResReq, Ports, Reference, ResourceRequirements, ThreadedResReq)
 
+from libmuscle.planner.planner import ModelGraph, Planner, ResourceAssignment
+from libmuscle.planner.resources import Resources
 
-def c(hwthread_id: int) -> FrozenSet[int]:
-    """Helper that defines a core with the given hwthread id."""
-    return frozenset({hwthread_id})
+from libmuscle.test.conftest import core as c, on_node_resources as onr, resources
 
 
 _ResReqs = Dict[Reference, ResourceRequirements]
@@ -43,12 +41,12 @@ def c(hwthread_id: int) -> FrozenSet[int]:
         s0_model, None, s0_implementations, s0_requirements)
 
 
-s0_resources = Resources({'node001': {c(0), c(1), c(2), c(3)}})
+s0_resources = resources({'node001': [c(0), c(1), c(2), c(3)]})
 
 
 s0_solution = {
-        Reference('macro'): Resources({'node001': {c(0), c(1)}}),
-        Reference('micro'): Resources({'node001': {c(2), c(3)}})}
+        Reference('macro'): ResourceAssignment([onr('node001', {0, 1})]),
+        Reference('micro'): ResourceAssignment([onr('node001', {2, 3})])}
 
 
 s1_model = Model(
@@ -88,14 +86,14 @@ def c(hwthread_id: int) -> FrozenSet[int]:
         s1_model, None, s1_implementations, s1_requirements)
 
 
-s1_resources = Resources({'node001': {c(0), c(1), c(2), c(3)}})
+s1_resources = resources({'node001': [c(0), c(1), c(2), c(3)]})
 
 
 s1_solution = {
-        Reference('macro'): Resources({'node001': {c(0), c(1), c(2), c(3)}}),
-        Reference('micro1'): Resources({'node001': {c(0), c(1)}}),
-        Reference('micro2'): Resources({'node001': {c(0), c(1)}}),
-        Reference('micro3'): Resources({'node001': {c(0)}})}
+        Reference('macro'): ResourceAssignment([onr('node001', {0, 1, 2, 3})]),
+        Reference('micro1'): ResourceAssignment([onr('node001', {0, 1})]),
+        Reference('micro2'): ResourceAssignment([onr('node001', {0, 1})]),
+        Reference('micro3'): ResourceAssignment([onr('node001', 0)])}
 
 
 s2_model = Model(
@@ -130,14 +128,14 @@ def c(hwthread_id: int) -> FrozenSet[int]:
         s2_model, None, s2_implementations, s2_requirements)
 
 
-s2_resources = Resources(
-        {'node001': {c(0), c(1), c(2), c(3)}, 'node002': {c(0), c(1), c(2), c(3)}})
+s2_resources = resources(
+        {'node001': [c(0), c(1), c(2), c(3)], 'node002': [c(0), c(1), c(2), c(3)]})
 
 
 s2_solution = {
-        Reference('macro'): Resources({'node001': {c(0)}}),
-        Reference('micro1'): Resources({'node001': {c(0), c(1), c(2)}}),
-        Reference('micro2'): Resources({'node002': {c(0), c(1)}})}
+        Reference('macro'): ResourceAssignment([onr('node001', 0)]),
+        Reference('micro1'): ResourceAssignment([onr('node001', {0, 1, 2})]),
+        Reference('micro2'): ResourceAssignment([onr('node002', {0, 1})])}
 
 
 s3_model = Model(
@@ -176,16 +174,17 @@ def c(hwthread_id: int) -> FrozenSet[int]:
         s3_model, None, s3_implementations, s3_requirements)
 
 
-s3_resources = Resources(
-        {'node001': {c(0), c(1), c(2), c(3)}, 'node002': {c(0), c(1), c(2), c(3)}})
+s3_resources = resources(
+        {'node001': [c(0), c(1), c(2), c(3)], 'node002': [c(0), c(1), c(2), c(3)]})
 
 
 s3_solution = {
-        Reference('a'): Resources({'node001': {c(0)}}),
-        Reference('b1'): Resources(
-            {'node001': {c(2), c(3)}, 'node002': {c(0), c(1), c(2), c(3)}}),
-        Reference('b2'): Resources({'node001': {c(0), c(1)}}),
-        Reference('c'): Resources({'node001': {c(0), c(1), c(2), c(3)}})}
+        Reference('a'): ResourceAssignment([onr('node001', 0)]),
+        Reference('b1'): ResourceAssignment([
+            onr('node001', 2), onr('node001', 3), onr('node002', 0), onr('node002', 1),
+            onr('node002', 2), onr('node002', 3)]),
+        Reference('b2'): ResourceAssignment([onr('node001', {0, 1})]),
+        Reference('c'): ResourceAssignment([onr('node001', {0, 1, 2, 3})])}
 
 
 s4_model = Model(
@@ -221,14 +220,14 @@ def c(hwthread_id: int) -> FrozenSet[int]:
         s4_model, None, s4_implementations, s4_requirements)
 
 
-s4_resources = Resources(
-        {'node001': {c(0), c(1), c(2), c(3)}, 'node002': {c(0), c(1), c(2), c(3)}})
+s4_resources = resources(
+        {'node001': [c(0), c(1), c(2), c(3)], 'node002': [c(0), c(1), c(2), c(3)]})
 
 
 s4_solution = {
-        Reference('macro1'): Resources({'node002': {c(0), c(1)}}),
-        Reference('macro2'): Resources({'node001': {c(0), c(1), c(2)}}),
-        Reference('micro'): Resources({'node001': {c(0), c(1), c(2)}})}
+        Reference('macro1'): ResourceAssignment([onr('node002', {0, 1})]),
+        Reference('macro2'): ResourceAssignment([onr('node001', {0, 1, 2})]),
+        Reference('micro'): ResourceAssignment([onr('node001', {0, 1, 2})])}
 
 
 s5_model = Model(
@@ -270,19 +269,19 @@ def c(hwthread_id: int) -> FrozenSet[int]:
         s5_model, None, s5_implementations, s5_requirements)
 
 
-s5_resources = Resources({
-    'node001': {c(0), c(1), c(2), c(3)}, 'node002': {c(0), c(1), c(2), c(3)},
-    'node003': {c(0), c(1)}})
+s5_resources = resources({
+    'node001': [c(0), c(1), c(2), c(3)], 'node002': [c(0), c(1), c(2), c(3)],
+    'node003': [c(0), c(1)]})
 
 
 # This is inefficient, as the models can all share resources. But repeater
 # is funny, and the algorithm cannot deal with it yet. It does give a valid
 # result with no overlap, so we'll accept that for the time being.
 s5_solution = {
-        Reference('init'): Resources({'node001': {c(0), c(1), c(2), c(3)}}),
-        Reference('macro'): Resources({'node001': {c(0), c(1), c(2), c(3)}}),
-        Reference('micro'): Resources({'node002': {c(0), c(1), c(2), c(3)}}),
-        Reference('repeater'): Resources({'node003': {c(0)}})}
+        Reference('init'): ResourceAssignment([onr('node001', {0, 1, 2, 3})]),
+        Reference('macro'): ResourceAssignment([onr('node001', {0, 1, 2, 3})]),
+        Reference('micro'): ResourceAssignment([onr('node002', {0, 1, 2, 3})]),
+        Reference('repeater'): ResourceAssignment([onr('node003', 0)])}
 
 
 s6_model = Model(
@@ -318,22 +317,22 @@ def c(hwthread_id: int) -> FrozenSet[int]:
         s6_model, None, s6_implementations, s6_requirements)
 
 
-s6_resources = Resources({
-        'node001': {c(0), c(1), c(2), c(3)}, 'node002': {c(0), c(1), c(2), c(3)},
-        'node003': {c(0), c(1), c(2), c(3)}, 'node004': {c(0), c(1), c(2), c(3)},
-        'node005': {c(0), c(1), c(2), c(3)}, 'node006': {c(0), c(1), c(2), c(3)}
+s6_resources = resources({
+        'node001': [c(0), c(1), c(2), c(3)], 'node002': [c(0), c(1), c(2), c(3)],
+        'node003': [c(0), c(1), c(2), c(3)], 'node004': [c(0), c(1), c(2), c(3)],
+        'node005': [c(0), c(1), c(2), c(3)], 'node006': [c(0), c(1), c(2), c(3)]
         })
 
 
 s6_solution = {
-        Reference('a'): Resources({'node001': {c(0), c(1), c(2), c(3)}}),
-        Reference('tcf'): Resources({'node002': {c(0)}}),
-        Reference('b'): Resources({
-            'node002': {c(1), c(2), c(3)},
-            'node003': {c(0), c(1), c(2), c(3)},
-            'node004': {c(0), c(1), c(2), c(3)},
-            'node005': {c(0), c(1), c(2), c(3)},
-            'node006': {c(0)}})}
+        Reference('a'): ResourceAssignment([onr('node001', {0, 1, 2, 3})]),
+        Reference('tcf'): ResourceAssignment([onr('node002', 0)]),
+        Reference('b'): ResourceAssignment([
+            onr('node002', 1), onr('node002', 2), onr('node002', 3), onr('node003', 0),
+            onr('node003', 1), onr('node003', 2), onr('node003', 3), onr('node004', 0),
+            onr('node004', 1), onr('node004', 2), onr('node004', 3), onr('node005', 0),
+            onr('node005', 1), onr('node005', 2), onr('node005', 3), onr('node006', 0)])
+        }
 
 
 s7_model = Model(
@@ -374,47 +373,70 @@ def c(hwthread_id: int) -> FrozenSet[int]:
         s7_model, None, s7_implementations, s7_requirements)
 
 
-s7_resources = Resources({
-        'node001': {c(0), c(1), c(2), c(3), c(4), c(5), c(6), c(7)},
-        'node002': {c(0), c(1), c(2), c(3), c(4), c(5), c(6), c(7)},
-        'node003': {c(0), c(1), c(2), c(3), c(4), c(5), c(6), c(7)},
-        'node004': {c(0), c(1), c(2), c(3), c(4), c(5), c(6), c(7)},
-        'node005': {c(0), c(1), c(2), c(3), c(4), c(5), c(6), c(7)},
+s7_resources = resources({
+        'node001': [c(0), c(1), c(2), c(3), c(4), c(5), c(6), c(7)],
+        'node002': [c(0), c(1), c(2), c(3), c(4), c(5), c(6), c(7)],
+        'node003': [c(0), c(1), c(2), c(3), c(4), c(5), c(6), c(7)],
+        'node004': [c(0), c(1), c(2), c(3), c(4), c(5), c(6), c(7)],
+        'node005': [c(0), c(1), c(2), c(3), c(4), c(5), c(6), c(7)],
         })
 
 
 s7_solution = {
-        Reference('mc'): Resources({'node001': {c(0)}}),
-        Reference('init[0]'): Resources({'node001': {c(0), c(1), c(2), c(3)}}),
-        Reference('init[1]'): Resources({'node001': {c(4), c(5), c(6), c(7)}}),
-        Reference('init[2]'): Resources({'node002': {c(0), c(1), c(2), c(3)}}),
-        Reference('init[3]'): Resources({'node002': {c(4), c(5), c(6), c(7)}}),
-        Reference('init[4]'): Resources({'node003': {c(0), c(1), c(2), c(3)}}),
-        Reference('init[5]'): Resources({'node003': {c(4), c(5), c(6), c(7)}}),
-        Reference('init[6]'): Resources({'node004': {c(0), c(1), c(2), c(3)}}),
-        Reference('init[7]'): Resources({'node004': {c(4), c(5), c(6), c(7)}}),
-        Reference('init[8]'): Resources({'node005': {c(0), c(1), c(2), c(3)}}),
-        Reference('init[9]'): Resources({'node005': {c(4), c(5), c(6), c(7)}}),
-        Reference('macro[0]'): Resources({'node001': {c(0), c(1), c(2), c(3)}}),
-        Reference('macro[1]'): Resources({'node001': {c(4), c(5), c(6), c(7)}}),
-        Reference('macro[2]'): Resources({'node002': {c(0), c(1), c(2), c(3)}}),
-        Reference('macro[3]'): Resources({'node002': {c(4), c(5), c(6), c(7)}}),
-        Reference('macro[4]'): Resources({'node003': {c(0), c(1), c(2), c(3)}}),
-        Reference('macro[5]'): Resources({'node003': {c(4), c(5), c(6), c(7)}}),
-        Reference('macro[6]'): Resources({'node004': {c(0), c(1), c(2), c(3)}}),
-        Reference('macro[7]'): Resources({'node004': {c(4), c(5), c(6), c(7)}}),
-        Reference('macro[8]'): Resources({'node005': {c(0), c(1), c(2), c(3)}}),
-        Reference('macro[9]'): Resources({'node005': {c(4), c(5), c(6), c(7)}}),
-        Reference('micro[0]'): Resources({'node001': {c(0), c(1), c(2), c(3)}}),
-        Reference('micro[1]'): Resources({'node001': {c(4), c(5), c(6), c(7)}}),
-        Reference('micro[2]'): Resources({'node002': {c(0), c(1), c(2), c(3)}}),
-        Reference('micro[3]'): Resources({'node002': {c(4), c(5), c(6), c(7)}}),
-        Reference('micro[4]'): Resources({'node003': {c(0), c(1), c(2), c(3)}}),
-        Reference('micro[5]'): Resources({'node003': {c(4), c(5), c(6), c(7)}}),
-        Reference('micro[6]'): Resources({'node004': {c(0), c(1), c(2), c(3)}}),
-        Reference('micro[7]'): Resources({'node004': {c(4), c(5), c(6), c(7)}}),
-        Reference('micro[8]'): Resources({'node005': {c(0), c(1), c(2), c(3)}}),
-        Reference('micro[9]'): Resources({'node005': {c(4), c(5), c(6), c(7)}})}
+        Reference('mc'): ResourceAssignment([onr('node001', 0)]),
+
+        Reference('init[0]'): ResourceAssignment([onr('node001', {0, 1, 2, 3})]),
+        Reference('init[1]'): ResourceAssignment([onr('node001', {4, 5, 6, 7})]),
+        Reference('init[2]'): ResourceAssignment([onr('node002', {0, 1, 2, 3})]),
+        Reference('init[3]'): ResourceAssignment([onr('node002', {4, 5, 6, 7})]),
+        Reference('init[4]'): ResourceAssignment([onr('node003', {0, 1, 2, 3})]),
+        Reference('init[5]'): ResourceAssignment([onr('node003', {4, 5, 6, 7})]),
+        Reference('init[6]'): ResourceAssignment([onr('node004', {0, 1, 2, 3})]),
+        Reference('init[7]'): ResourceAssignment([onr('node004', {4, 5, 6, 7})]),
+        Reference('init[8]'): ResourceAssignment([onr('node005', {0, 1, 2, 3})]),
+        Reference('init[9]'): ResourceAssignment([onr('node005', {4, 5, 6, 7})]),
+
+        Reference('macro[0]'): ResourceAssignment([onr('node001', {0, 1, 2, 3})]),
+        Reference('macro[1]'): ResourceAssignment([onr('node001', {4, 5, 6, 7})]),
+        Reference('macro[2]'): ResourceAssignment([onr('node002', {0, 1, 2, 3})]),
+        Reference('macro[3]'): ResourceAssignment([onr('node002', {4, 5, 6, 7})]),
+        Reference('macro[4]'): ResourceAssignment([onr('node003', {0, 1, 2, 3})]),
+        Reference('macro[5]'): ResourceAssignment([onr('node003', {4, 5, 6, 7})]),
+        Reference('macro[6]'): ResourceAssignment([onr('node004', {0, 1, 2, 3})]),
+        Reference('macro[7]'): ResourceAssignment([onr('node004', {4, 5, 6, 7})]),
+        Reference('macro[8]'): ResourceAssignment([onr('node005', {0, 1, 2, 3})]),
+        Reference('macro[9]'): ResourceAssignment([onr('node005', {4, 5, 6, 7})]),
+
+        Reference('micro[0]'): ResourceAssignment([
+            onr('node001', 0), onr('node001', 1), onr('node001', 2),
+            onr('node001', 3)]),
+        Reference('micro[1]'): ResourceAssignment([
+            onr('node001', 4), onr('node001', 5), onr('node001', 6),
+            onr('node001', 7)]),
+        Reference('micro[2]'): ResourceAssignment([
+            onr('node002', 0), onr('node002', 1), onr('node002', 2),
+            onr('node002', 3)]),
+        Reference('micro[3]'): ResourceAssignment([
+            onr('node002', 4), onr('node002', 5), onr('node002', 6),
+            onr('node002', 7)]),
+        Reference('micro[4]'): ResourceAssignment([
+            onr('node003', 0), onr('node003', 1), onr('node003', 2),
+            onr('node003', 3)]),
+        Reference('micro[5]'): ResourceAssignment([
+            onr('node003', 4), onr('node003', 5), onr('node003', 6),
+            onr('node003', 7)]),
+        Reference('micro[6]'): ResourceAssignment([
+            onr('node004', 0), onr('node004', 1), onr('node004', 2),
+            onr('node004', 3)]),
+        Reference('micro[7]'): ResourceAssignment([
+            onr('node004', 4), onr('node004', 5), onr('node004', 6),
+            onr('node004', 7)]),
+        Reference('micro[8]'): ResourceAssignment([
+            onr('node005', 0), onr('node005', 1), onr('node005', 2),
+            onr('node005', 3)]),
+        Reference('micro[9]'): ResourceAssignment([
+            onr('node005', 4), onr('node005', 5), onr('node005', 6),
+            onr('node005', 7)])}
 
 
 s8_model = Model(
@@ -451,14 +473,14 @@ def c(hwthread_id: int) -> FrozenSet[int]:
         s8_model, None, s8_implementations, s8_requirements)
 
 
-s8_resources = Resources(
-        {'node001': {c(0), c(1), c(2), c(3)}, 'node002': {c(0), c(1), c(2), c(3)}})
+s8_resources = resources(
+        {'node001': [c(0), c(1), c(2), c(3)], 'node002': [c(0), c(1), c(2), c(3)]})
 
 
 s8_solution = {
-        Reference('macro'): Resources({'node001': {c(3)}}),
-        Reference('micro1'): Resources({'node001': {c(0), c(1), c(2)}}),
-        Reference('micro2'): Resources({'node001': {c(0), c(1)}})}
+        Reference('macro'): ResourceAssignment([onr('node001', 3)]),
+        Reference('micro1'): ResourceAssignment([onr('node001', {0, 1, 2})]),
+        Reference('micro2'): ResourceAssignment([onr('node001', {0, 1})])}
 
 
 s9_model = Model(
@@ -500,15 +522,15 @@ def c(hwthread_id: int) -> FrozenSet[int]:
         s9_model, None, s9_implementations, s9_requirements)
 
 
-s9_resources = Resources({'node001': {c(0), c(1), c(2), c(3)}})
+s9_resources = resources({'node001': [c(0), c(1), c(2), c(3)]})
 
 
 s9_solution = {
-        Reference('a'): Resources({'node001': {c(1)}}),
-        Reference('b'): Resources({'node001': {c(0)}}),
-        Reference('c'): Resources({'node001': {c(0)}}),
-        Reference('d'): Resources({'node001': {c(1)}}),
-        Reference('e'): Resources({'node001': {c(0)}})}
+        Reference('a'): ResourceAssignment([onr('node001', 1)]),
+        Reference('b'): ResourceAssignment([onr('node001', 0)]),
+        Reference('c'): ResourceAssignment([onr('node001', 0)]),
+        Reference('d'): ResourceAssignment([onr('node001', 1)]),
+        Reference('e'): ResourceAssignment([onr('node001', 0)])}
 
 
 s10_model = Model(
@@ -552,38 +574,40 @@ def c(hwthread_id: int) -> FrozenSet[int]:
         s10_model, None, s10_implementations, s10_requirements)
 
 
-s10_resources = Resources({
-        'node001': {
+s10_resources = resources({
+        'node001': [
             c(0), c(1), c(2), c(3), c(4), c(5), c(6), c(7),
-            c(8), c(9), c(10), c(11), c(12), c(13), c(14), c(15)},
-        'node002': {
+            c(8), c(9), c(10), c(11), c(12), c(13), c(14), c(15)],
+        'node002': [
             c(0), c(1), c(2), c(3), c(4), c(5), c(6), c(7),
-            c(8), c(9), c(10), c(11), c(12), c(13), c(14), c(15)},
-        'node003': {
+            c(8), c(9), c(10), c(11), c(12), c(13), c(14), c(15)],
+        'node003': [
             c(0), c(1), c(2), c(3), c(4), c(5), c(6), c(7),
-            c(8), c(9), c(10), c(11), c(12), c(13), c(14), c(15)},
+            c(8), c(9), c(10), c(11), c(12), c(13), c(14), c(15)],
         })
 
 
 s10_solution = {
-        Reference('mc'): Resources({'node001': {c(0)}}),
-        Reference('rr'): Resources({'node001': {c(0)}}),
-        Reference('macro[0]'): Resources({'node001': {c(0), c(1), c(2), c(3)}}),
-        Reference('macro[1]'): Resources({'node001': {c(4), c(5), c(6), c(7)}}),
-        Reference('macro[2]'): Resources({'node001': {c(8), c(9), c(10), c(11)}}),
-        Reference('macro[3]'): Resources({'node001': {c(12), c(13), c(14), c(15)}}),
-        Reference('macro[4]'): Resources({'node002': {c(0), c(1), c(2), c(3)}}),
-        Reference('macro[5]'): Resources({'node002': {c(4), c(5), c(6), c(7)}}),
-        Reference('macro[6]'): Resources({'node002': {c(8), c(9), c(10), c(11)}}),
-        Reference('macro[7]'): Resources({'node002': {c(12), c(13), c(14), c(15)}}),
-        Reference('micro[0]'): Resources({'node001': {c(0), c(1)}}),
-        Reference('micro[1]'): Resources({'node001': {c(4), c(5)}}),
-        Reference('micro[2]'): Resources({'node001': {c(8), c(9)}}),
-        Reference('micro[3]'): Resources({'node001': {c(12), c(13)}}),
-        Reference('micro[4]'): Resources({'node002': {c(0), c(1)}}),
-        Reference('micro[5]'): Resources({'node002': {c(4), c(5)}}),
-        Reference('micro[6]'): Resources({'node002': {c(8), c(9)}}),
-        Reference('micro[7]'): Resources({'node002': {c(12), c(13)}})}
+        Reference('mc'): ResourceAssignment([onr('node001', 0)]),
+        Reference('rr'): ResourceAssignment([onr('node001', 0)]),
+
+        Reference('macro[0]'): ResourceAssignment([onr('node001', {0, 1, 2, 3})]),
+        Reference('macro[1]'): ResourceAssignment([onr('node001', {4, 5, 6, 7})]),
+        Reference('macro[2]'): ResourceAssignment([onr('node001', {8, 9, 10, 11})]),
+        Reference('macro[3]'): ResourceAssignment([onr('node001', {12, 13, 14, 15})]),
+        Reference('macro[4]'): ResourceAssignment([onr('node002', {0, 1, 2, 3})]),
+        Reference('macro[5]'): ResourceAssignment([onr('node002', {4, 5, 6, 7})]),
+        Reference('macro[6]'): ResourceAssignment([onr('node002', {8, 9, 10, 11})]),
+        Reference('macro[7]'): ResourceAssignment([onr('node002', {12, 13, 14, 15})]),
+
+        Reference('micro[0]'): ResourceAssignment([onr('node001', {0, 1})]),
+        Reference('micro[1]'): ResourceAssignment([onr('node001', {4, 5})]),
+        Reference('micro[2]'): ResourceAssignment([onr('node001', {8, 9})]),
+        Reference('micro[3]'): ResourceAssignment([onr('node001', {12, 13})]),
+        Reference('micro[4]'): ResourceAssignment([onr('node002', {0, 1})]),
+        Reference('micro[5]'): ResourceAssignment([onr('node002', {4, 5})]),
+        Reference('micro[6]'): ResourceAssignment([onr('node002', {8, 9})]),
+        Reference('micro[7]'): ResourceAssignment([onr('node002', {12, 13})])}
 
 
 s11_model = Model(
@@ -622,26 +646,25 @@ def c(hwthread_id: int) -> FrozenSet[int]:
 s11_config = Configuration(s11_model, None, s11_implementations, s11_requirements)
 
 
-s11_resources = Resources({
-        'node001': {c(0), c(1), c(2), c(3), c(4), c(5), c(6), c(7)},
-        'node002': {c(0), c(1), c(2), c(3), c(4), c(5), c(6), c(7)},
+s11_resources = resources({
+        'node001': [c(0), c(1), c(2), c(3), c(4), c(5), c(6), c(7)],
+        'node002': [c(0), c(1), c(2), c(3), c(4), c(5), c(6), c(7)],
         })
 
 
 s11_solution = {
-        Reference('macro1[0]'): Resources({'node001': {c(0), c(1), c(2), c(3)}}),
-        Reference('macro1[1]'): Resources({'node001': {c(4), c(5), c(6), c(7)}}),
-        Reference('macro1[2]'): Resources({'node002': {c(0), c(1), c(2), c(3)}}),
-        Reference('micro1[0]'): Resources({'node001': {c(0), c(1), c(2), c(3)}}),
-        Reference('micro1[1]'): Resources({'node001': {c(4), c(5), c(6), c(7)}}),
-        Reference('micro1[2]'): Resources({'node002': {c(0), c(1), c(2), c(3)}}),
-        Reference('macro2[0]'): Resources({'node001': {c(0), c(1), c(2), c(3)}}),
-        Reference('macro2[1]'): Resources({'node001': {c(4), c(5), c(6), c(7)}}),
-        Reference('macro2[2]'): Resources({'node002': {c(0), c(1), c(2), c(3)}}),
-        Reference('micro2[0]'): Resources({'node001': {c(0), c(1), c(2), c(3)}}),
-        Reference('micro2[1]'): Resources({'node001': {c(4), c(5), c(6), c(7)}}),
-        Reference('micro2[2]'): Resources({'node002': {c(0), c(1), c(2), c(3)}}),
-        }
+        Reference('macro1[0]'): ResourceAssignment([onr('node001', {0, 1, 2, 3})]),
+        Reference('macro1[1]'): ResourceAssignment([onr('node001', {4, 5, 6, 7})]),
+        Reference('macro1[2]'): ResourceAssignment([onr('node002', {0, 1, 2, 3})]),
+        Reference('micro1[0]'): ResourceAssignment([onr('node001', {0, 1, 2, 3})]),
+        Reference('micro1[1]'): ResourceAssignment([onr('node001', {4, 5, 6, 7})]),
+        Reference('micro1[2]'): ResourceAssignment([onr('node002', {0, 1, 2, 3})]),
+        Reference('macro2[0]'): ResourceAssignment([onr('node001', {0, 1, 2, 3})]),
+        Reference('macro2[1]'): ResourceAssignment([onr('node001', {4, 5, 6, 7})]),
+        Reference('macro2[2]'): ResourceAssignment([onr('node002', {0, 1, 2, 3})]),
+        Reference('micro2[0]'): ResourceAssignment([onr('node001', {0, 1, 2, 3})]),
+        Reference('micro2[1]'): ResourceAssignment([onr('node001', {4, 5, 6, 7})]),
+        Reference('micro2[2]'): ResourceAssignment([onr('node002', {0, 1, 2, 3})])}
 
 
 s12_model = deepcopy(s11_model)
@@ -663,16 +686,16 @@ def c(hwthread_id: int) -> FrozenSet[int]:
 
 
 s12_solution = {
-        Reference('macro1'): Resources({'node001': {c(0), c(1), c(2), c(3)}}),
-        Reference('micro1[0]'): Resources({'node001': {
-            c(0), c(1), c(2), c(3), c(4), c(5), c(6), c(7)}}),
-        Reference('micro1[1]'): Resources({'node002': {
-            c(0), c(1), c(2), c(3), c(4), c(5), c(6), c(7)}}),
-        Reference('macro2'): Resources({'node001': {c(0), c(1), c(2), c(3)}}),
-        Reference('micro2[0]'): Resources({'node001': {c(0), c(1), c(2), c(3)}}),
-        Reference('micro2[1]'): Resources({'node001': {c(4), c(5), c(6), c(7)}}),
-        Reference('micro2[2]'): Resources({'node002': {c(0), c(1), c(2), c(3)}}),
-        Reference('micro2[3]'): Resources({'node002': {c(4), c(5), c(6), c(7)}}),
+        Reference('macro1'): ResourceAssignment([onr('node001', {0, 1, 2, 3})]),
+        Reference('micro1[0]'): ResourceAssignment([
+            onr('node001', {0, 1, 2, 3, 4, 5, 6, 7})]),
+        Reference('micro1[1]'): ResourceAssignment([
+            onr('node002', {0, 1, 2, 3, 4, 5, 6, 7})]),
+        Reference('macro2'): ResourceAssignment([onr('node001', {0, 1, 2, 3})]),
+        Reference('micro2[0]'): ResourceAssignment([onr('node001', {0, 1, 2, 3})]),
+        Reference('micro2[1]'): ResourceAssignment([onr('node001', {4, 5, 6, 7})]),
+        Reference('micro2[2]'): ResourceAssignment([onr('node002', {0, 1, 2, 3})]),
+        Reference('micro2[3]'): ResourceAssignment([onr('node002', {4, 5, 6, 7})]),
         }
 
 
@@ -694,59 +717,59 @@ def c(hwthread_id: int) -> FrozenSet[int]:
 s13_config = Configuration(s13_model, None, s11_implementations, s13_requirements)
 
 
-s13_resources = Resources({
-        'node001': {c(0), c(1), c(2), c(3), c(4), c(5), c(6), c(7)},
-        'node002': {c(0), c(1), c(2), c(3), c(4), c(5), c(6), c(7)},
-        'node003': {c(0), c(1), c(2), c(3), c(4), c(5), c(6), c(7)},
-        'node004': {c(0), c(1), c(2), c(3), c(4), c(5), c(6), c(7)},
-        'node005': {c(0), c(1), c(2), c(3), c(4), c(5), c(6), c(7)},
+s13_resources = resources({
+        'node001': [c(0), c(1), c(2), c(3), c(4), c(5), c(6), c(7)],
+        'node002': [c(0), c(1), c(2), c(3), c(4), c(5), c(6), c(7)],
+        'node003': [c(0), c(1), c(2), c(3), c(4), c(5), c(6), c(7)],
+        'node004': [c(0), c(1), c(2), c(3), c(4), c(5), c(6), c(7)],
+        'node005': [c(0), c(1), c(2), c(3), c(4), c(5), c(6), c(7)],
         })
 
 
 s13_solution = {
-        Reference('macro1[0]'): Resources({'node001': {c(0), c(1), c(2), c(3)}}),
-        Reference('macro1[1]'): Resources({'node001': {c(4), c(5), c(6), c(7)}}),
-        Reference('macro1[2]'): Resources({'node002': {c(0), c(1), c(2), c(3)}}),
-        Reference('macro1[3]'): Resources({'node002': {c(4), c(5), c(6), c(7)}}),
-        Reference('macro1[4]'): Resources({'node003': {c(0), c(1), c(2), c(3)}}),
-
-        Reference('micro1[0][0]'): Resources({'node001': {c(0), c(1)}}),
-        Reference('micro1[0][1]'): Resources({'node001': {c(2), c(3)}}),
-        Reference('micro1[0][2]'): Resources({'node003': {c(4), c(5)}}),
-        Reference('micro1[0][3]'): Resources({'node003': {c(6), c(7)}}),
-        Reference('micro1[1][0]'): Resources({'node001': {c(4), c(5)}}),
-        Reference('micro1[1][1]'): Resources({'node001': {c(6), c(7)}}),
-        Reference('micro1[1][2]'): Resources({'node004': {c(0), c(1)}}),
-        Reference('micro1[1][3]'): Resources({'node004': {c(2), c(3)}}),
-        Reference('micro1[2][0]'): Resources({'node002': {c(0), c(1)}}),
-        Reference('micro1[2][1]'): Resources({'node002': {c(2), c(3)}}),
-        Reference('micro1[2][2]'): Resources({'node004': {c(4), c(5)}}),
-        Reference('micro1[2][3]'): Resources({'node004': {c(6), c(7)}}),
-        Reference('micro1[3][0]'): Resources({'node002': {c(4), c(5)}}),
-        Reference('micro1[3][1]'): Resources({'node002': {c(6), c(7)}}),
-        Reference('micro1[3][2]'): Resources({'node005': {c(0), c(1)}}),
-        Reference('micro1[3][3]'): Resources({'node005': {c(2), c(3)}}),
-        Reference('micro1[4][0]'): Resources({'node003': {c(0), c(1)}}),
-        Reference('micro1[4][1]'): Resources({'node003': {c(2), c(3)}}),
-        Reference('micro1[4][2]'): Resources({'node005': {c(4), c(5)}}),
-        Reference('micro1[4][3]'): Resources({'node005': {c(6), c(7)}}),
-
-        Reference('macro2[0]'): Resources({'node001': {c(0), c(1), c(2), c(3)}}),
-        Reference('macro2[1]'): Resources({'node001': {c(4), c(5), c(6), c(7)}}),
-        Reference('macro2[2]'): Resources({'node002': {c(0), c(1), c(2), c(3)}}),
-        Reference('macro2[3]'): Resources({'node002': {c(4), c(5), c(6), c(7)}}),
-        Reference('macro2[4]'): Resources({'node003': {c(0), c(1), c(2), c(3)}}),
-
-        Reference('micro2[0][0]'): Resources({'node001': {c(0), c(1), c(2), c(3)}}),
-        Reference('micro2[0][1]'): Resources({'node003': {c(4), c(5), c(6), c(7)}}),
-        Reference('micro2[1][0]'): Resources({'node001': {c(4), c(5), c(6), c(7)}}),
-        Reference('micro2[1][1]'): Resources({'node004': {c(0), c(1), c(2), c(3)}}),
-        Reference('micro2[2][0]'): Resources({'node002': {c(0), c(1), c(2), c(3)}}),
-        Reference('micro2[2][1]'): Resources({'node004': {c(4), c(5), c(6), c(7)}}),
-        Reference('micro2[3][0]'): Resources({'node002': {c(4), c(5), c(6), c(7)}}),
-        Reference('micro2[3][1]'): Resources({'node005': {c(0), c(1), c(2), c(3)}}),
-        Reference('micro2[4][0]'): Resources({'node003': {c(0), c(1), c(2), c(3)}}),
-        Reference('micro2[4][1]'): Resources({'node005': {c(4), c(5), c(6), c(7)}}),
+        Reference('macro1[0]'): ResourceAssignment([onr('node001', {0, 1, 2, 3})]),
+        Reference('macro1[1]'): ResourceAssignment([onr('node001', {4, 5, 6, 7})]),
+        Reference('macro1[2]'): ResourceAssignment([onr('node002', {0, 1, 2, 3})]),
+        Reference('macro1[3]'): ResourceAssignment([onr('node002', {4, 5, 6, 7})]),
+        Reference('macro1[4]'): ResourceAssignment([onr('node003', {0, 1, 2, 3})]),
+
+        Reference('micro1[0][0]'): ResourceAssignment([onr('node001', {0, 1})]),
+        Reference('micro1[0][1]'): ResourceAssignment([onr('node001', {2, 3})]),
+        Reference('micro1[0][2]'): ResourceAssignment([onr('node003', {4, 5})]),
+        Reference('micro1[0][3]'): ResourceAssignment([onr('node003', {6, 7})]),
+        Reference('micro1[1][0]'): ResourceAssignment([onr('node001', {4, 5})]),
+        Reference('micro1[1][1]'): ResourceAssignment([onr('node001', {6, 7})]),
+        Reference('micro1[1][2]'): ResourceAssignment([onr('node004', {0, 1})]),
+        Reference('micro1[1][3]'): ResourceAssignment([onr('node004', {2, 3})]),
+        Reference('micro1[2][0]'): ResourceAssignment([onr('node002', {0, 1})]),
+        Reference('micro1[2][1]'): ResourceAssignment([onr('node002', {2, 3})]),
+        Reference('micro1[2][2]'): ResourceAssignment([onr('node004', {4, 5})]),
+        Reference('micro1[2][3]'): ResourceAssignment([onr('node004', {6, 7})]),
+        Reference('micro1[3][0]'): ResourceAssignment([onr('node002', {4, 5})]),
+        Reference('micro1[3][1]'): ResourceAssignment([onr('node002', {6, 7})]),
+        Reference('micro1[3][2]'): ResourceAssignment([onr('node005', {0, 1})]),
+        Reference('micro1[3][3]'): ResourceAssignment([onr('node005', {2, 3})]),
+        Reference('micro1[4][0]'): ResourceAssignment([onr('node003', {0, 1})]),
+        Reference('micro1[4][1]'): ResourceAssignment([onr('node003', {2, 3})]),
+        Reference('micro1[4][2]'): ResourceAssignment([onr('node005', {4, 5})]),
+        Reference('micro1[4][3]'): ResourceAssignment([onr('node005', {6, 7})]),
+
+        Reference('macro2[0]'): ResourceAssignment([onr('node001', {0, 1, 2, 3})]),
+        Reference('macro2[1]'): ResourceAssignment([onr('node001', {4, 5, 6, 7})]),
+        Reference('macro2[2]'): ResourceAssignment([onr('node002', {0, 1, 2, 3})]),
+        Reference('macro2[3]'): ResourceAssignment([onr('node002', {4, 5, 6, 7})]),
+        Reference('macro2[4]'): ResourceAssignment([onr('node003', {0, 1, 2, 3})]),
+
+        Reference('micro2[0][0]'): ResourceAssignment([onr('node001', {0, 1, 2, 3})]),
+        Reference('micro2[0][1]'): ResourceAssignment([onr('node003', {4, 5, 6, 7})]),
+        Reference('micro2[1][0]'): ResourceAssignment([onr('node001', {4, 5, 6, 7})]),
+        Reference('micro2[1][1]'): ResourceAssignment([onr('node004', {0, 1, 2, 3})]),
+        Reference('micro2[2][0]'): ResourceAssignment([onr('node002', {0, 1, 2, 3})]),
+        Reference('micro2[2][1]'): ResourceAssignment([onr('node004', {4, 5, 6, 7})]),
+        Reference('micro2[3][0]'): ResourceAssignment([onr('node002', {4, 5, 6, 7})]),
+        Reference('micro2[3][1]'): ResourceAssignment([onr('node005', {0, 1, 2, 3})]),
+        Reference('micro2[4][0]'): ResourceAssignment([onr('node003', {0, 1, 2, 3})]),
+        Reference('micro2[4][1]'): ResourceAssignment([onr('node005', {4, 5, 6, 7})]),
         }
 
 
@@ -782,7 +805,7 @@ def c(hwthread_id: int) -> FrozenSet[int]:
         s14_model, None, s14_implementations, s14_requirements)
 
 
-s14_resources = Resources({'node001': {c(0), c(1), c(2), c(3), c(4), c(5)}})
+s14_resources = resources({'node001': [c(0), c(1), c(2), c(3), c(4), c(5)]})
 
 
 s14_solution = RuntimeError
@@ -829,16 +852,20 @@ def test_scenarios(scenario: _Scenario) -> None:
 
         if isinstance(req, ThreadedResReq):
             for instance in component.instances():
-                assert len(list(allocations[instance].nodes())) == 1
-                assert allocations[instance].total_cores() == req.threads
+                assert len(allocations[instance].by_rank) == 1
+                assert allocations[instance].by_rank[0].total_cores() == req.threads
         elif isinstance(req, MPICoresResReq):
             for instance in component.instances():
-                tcores = allocations[instance].total_cores()
-                assert tcores == req.mpi_processes
+                nranks = len(allocations[instance].by_rank)
+                assert nranks == req.mpi_processes
+                for r in range(nranks):
+                    assert allocations[instance].by_rank[r].total_cores() == 1
 
     # check for any overlapping instances
-    for instance1, res1 in allocations.items():
-        for instance2, res2 in allocations.items():
+    for instance1, res_asm1 in allocations.items():
+        for instance2, res_asm2 in allocations.items():
+            res1 = res_asm1.as_resources()
+            res2 = res_asm2.as_resources()
             cname1 = instance1.without_trailing_ints()
             cname2 = instance2.without_trailing_ints()
             if cname1 != cname2:
diff --git a/libmuscle/python/libmuscle/planner/test/test_resources.py b/libmuscle/python/libmuscle/planner/test/test_resources.py
new file mode 100644
index 00000000..f0158850
--- /dev/null
+++ b/libmuscle/python/libmuscle/planner/test/test_resources.py
@@ -0,0 +1,435 @@
+from copy import copy
+
+import pytest
+
+from libmuscle.planner.resources import Core, CoreSet, OnNodeResources, Resources
+
+
+@pytest.fixture
+def c1():
+    return Core(0, {0, 1})
+
+
+def test_core_equals(c1):
+    c2 = Core(0, {0, 1})
+    c3 = Core(1, {0, 1})
+    c4 = Core(0, {2, 3})
+
+    assert c1 == c2
+    assert not c1 != c2
+    assert c1 != c3
+    assert c1 != c4
+    assert c3 != c4
+
+
+def test_core_length(c1):
+    assert len(c1) == 2
+
+    c2 = Core(1, {4, 5, 6, 7})
+    assert len(c2) == 4
+
+
+def test_core_copy(c1):
+    c2 = copy(c1)
+    assert c2.cid == 0
+    assert c2.hwthreads == {0, 1}
+
+    c2.hwthreads.add(2)
+    assert c1.hwthreads == {0, 1}
+    assert c2.hwthreads == {0, 1, 2}
+
+
+def test_core_union():
+    c1 = Core(3, {3})
+    c2 = Core(3, {4})
+
+    assert c1 | c2 == Core(3, {3, 4})
+
+    c3 = Core(2, {2})
+    with pytest.raises(ValueError):
+        c1 | c3
+
+
+def test_core_union_onto(c1):
+    c2 = Core(0, {2, 3})
+
+    c1 |= c2
+    assert c1.hwthreads == {0, 1, 2, 3}
+    assert c2.hwthreads == {2, 3}
+
+    c3 = Core(3, {6, 7})
+    with pytest.raises(ValueError):
+        c1 |= c3
+
+
+def test_core_subtract():
+    c1 = Core(0, {0, 1, 2, 3})
+    c2 = Core(0, {0, 3})
+
+    c1 -= c2
+    assert c1.cid == 0
+    assert c1.hwthreads == {1, 2}
+
+    c3 = Core(0, {2, 3})
+    c1 -= c3
+    assert c1.cid == 0
+    assert c1.hwthreads == {1}
+
+    c4 = Core(1, {1, 2})
+    with pytest.raises(ValueError):
+        c1 -= c4
+
+
+def test_core_isdisjoint(c1):
+    c2 = Core(0, {0})
+    c3 = Core(0, {2, 3})
+    c4 = Core(1, {0, 1})
+
+    assert not c1.isdisjoint(c2)
+    assert not c2.isdisjoint(c1)
+    assert c1.isdisjoint(c3)
+
+    with pytest.raises(ValueError):
+        c1.isdisjoint(c4)
+
+
+def test_core_str(c1):
+    assert str(c1) == '0(0,1)'
+
+
+def test_core_repr(c1):
+    assert repr(c1) == 'Core(0, {0,1})'
+
+
+@pytest.fixture
+def cs1():
+    return CoreSet([Core(0, {0, 1}), Core(1, {2, 3})])
+
+
+def test_core_set_equals(cs1):
+    cs2 = CoreSet([Core(0, {0, 1}), Core(1, {2, 3})])
+    cs3 = CoreSet([Core(1, {2, 3})])
+    cs4 = CoreSet([])
+    cs5 = CoreSet([Core(0, {0, 1}), Core(1, {2, 3}), Core(2, {4, 5})])
+    cs6 = CoreSet([Core(3, {6, 7})])
+
+    assert cs1 == cs2
+    assert not cs1 != cs2
+    assert cs1 != cs3
+    assert cs1 != cs4
+    assert cs1 != cs5
+    assert cs1 != cs6
+    assert not cs3 == cs4
+    assert cs4 != cs5
+
+
+def test_core_set_length(cs1):
+    cs2 = CoreSet([])
+    cs3 = CoreSet([Core(3, {6, 7})])
+
+    assert len(cs1) == 2
+    assert len(cs2) == 0
+    assert len(cs3) == 1
+
+
+def test_core_set_iter(cs1):
+    for i, core in enumerate(cs1):
+        assert i == core.cid
+        assert core.hwthreads == {i * 2, i * 2 + 1}
+
+    assert i == 1
+
+
+def test_core_set_copy(cs1):
+    cs2 = copy(cs1)
+    assert cs1 == cs2
+
+    cs2._cores[2] = Core(2, {4, 5})
+    assert len(cs1._cores) == 2
+
+    cs2._cores[0].hwthreads.add(2)
+    assert 2 not in cs1._cores[0].hwthreads
+
+
+def test_core_set_union_onto(cs1):
+    cs2 = CoreSet([Core(3, {6, 7})])
+    cs1 |= cs2
+
+    assert len(cs1) == 3
+    assert 0 in cs1._cores
+    assert cs1._cores[0].cid == 0
+    assert cs1._cores[0].hwthreads == {0, 1}
+    assert 1 in cs1._cores
+    assert cs1._cores[1].cid == 1
+    assert cs1._cores[1].hwthreads == {2, 3}
+    assert 3 in cs1._cores
+    assert cs1._cores[3].cid == 3
+    assert cs1._cores[3].hwthreads == {6, 7}
+
+    assert id(cs1._cores[3]) != id(cs2._cores[3])
+    assert id(cs1._cores[3].hwthreads) != id(cs2._cores[3].hwthreads)
+
+
+def test_core_set_subtract_disjunct(cs1):
+    cs2 = CoreSet([Core(3, {6, 7})])
+    cs1 -= cs2
+
+    assert len(cs1) == 2
+    assert 0 in cs1._cores
+    assert 1 in cs1._cores
+
+    assert len(cs2) == 1
+    assert 3 in cs2._cores
+
+
+def test_core_set_subtract_whole_core(cs1):
+    cs2 = CoreSet([Core(0, {0, 1})])
+    cs1 -= cs2
+
+    assert len(cs1) == 1
+    assert 0 not in cs1._cores
+    assert 1 in cs1._cores
+
+    assert len(cs2) == 1
+    assert 0 in cs2._cores
+
+
+def test_core_set_subtract_threads(cs1):
+    cs2 = CoreSet([Core(1, {2})])
+    i1 = id(cs1._cores[1])
+
+    cs1 -= cs2
+
+    assert len(cs1) == 2
+    assert 0 in cs1._cores
+    assert 1 in cs1._cores
+    assert id(cs1._cores[1]) == i1
+    assert len(cs1._cores[1]) == 1
+    assert cs1._cores[1].hwthreads == {3}
+    assert cs1._cores[0].hwthreads == {0, 1}
+
+
+def test_core_set_str(cs1):
+    assert str(cs1) == '0-1(0-3)'
+
+
+def test_core_set_repr(cs1):
+    assert repr(cs1) == 'CoreSet({Core(0, {0,1}), Core(1, {2,3})})'
+
+
+def test_core_set_get_first_cores(cs1):
+    assert cs1.get_first_cores(0)._cores == {}
+    assert cs1.get_first_cores(1)._cores == {0: Core(0, {0, 1})}
+    assert cs1.get_first_cores(2)._cores == {
+            0: Core(0, {0, 1}),
+            1: Core(1, {2, 3})}
+    with pytest.raises(RuntimeError):
+        cs1.get_first_cores(3)
+
+
+@pytest.fixture
+def n1(cs1):
+    return OnNodeResources('node001', cs1)
+
+
+def test_node_resources_equals(n1):
+    n2 = OnNodeResources('node001', CoreSet([Core(0, {0, 1}), Core(1, {2, 3})]))
+    n3 = OnNodeResources('node002', CoreSet([Core(0, {0, 1}), Core(1, {2, 3})]))
+    n4 = OnNodeResources('node001', CoreSet([Core(0, {0, 1}), Core(1, {4, 3})]))
+
+    assert n1 == n2
+    assert n1 != n3
+    assert n1 != n4
+
+
+def test_node_resources_copy(n1):
+    n2 = copy(n1)
+
+    assert n1 == n2
+    assert id(n1.cpu_cores) != id(n2.cpu_cores)
+    assert id(n1.cpu_cores._cores[0]) != id(n2.cpu_cores._cores[0])
+    assert id(n1.cpu_cores._cores[1].hwthreads) != id(n2.cpu_cores._cores[1].hwthreads)
+
+
+def test_node_resources_union_onto(n1):
+    n2 = OnNodeResources('node001', CoreSet([Core(0, {0, 1}), Core(4, {8, 9, 10, 11})]))
+    n3 = OnNodeResources('node001', CoreSet([Core(0, {3})]))
+    n4 = OnNodeResources('node002', CoreSet([Core(3, {3})]))
+
+    n1 |= n2
+
+    assert len(n1.cpu_cores) == 3
+    assert id(n1.cpu_cores._cores[4]) != id(n2.cpu_cores._cores[4])
+
+    n1 |= n3
+
+    assert len(n1.cpu_cores) == 3
+    assert n1.cpu_cores._cores[0].hwthreads == {0, 1, 3}
+
+    with pytest.raises(ValueError):
+        n1 |= n4
+
+
+def test_node_resources_hwthreads(n1):
+    assert list(n1.hwthreads()) == [0, 1, 2, 3]
+
+
+def test_node_resources_subtract(n1):
+    n2 = OnNodeResources('node001', CoreSet([Core(0, {0, 1}), Core(4, {8, 9, 10, 11})]))
+    n3 = OnNodeResources('node001', CoreSet([Core(1, {3})]))
+    n4 = OnNodeResources('node002', CoreSet([Core(3, {3})]))
+
+    n1 -= n2
+
+    assert len(n1.cpu_cores) == 1
+    assert len(n1.cpu_cores._cores[1]) == 2
+
+    n1 -= n3
+
+    assert len(n1.cpu_cores) == 1
+    assert len(n1.cpu_cores._cores[1]) == 1
+
+    with pytest.raises(ValueError):
+        n1 -= n4
+
+
+@pytest.fixture
+def r1(n1):
+    return Resources([n1])
+
+
+def test_resources_length(r1, n1):
+    r2 = Resources([n1, OnNodeResources('node002', CoreSet([Core(0, {0, 1})]))])
+
+    assert len(r1) == 1
+    assert len(r2) == 2
+
+
+def test_resources_iter(cs1, n1):
+    n2 = OnNodeResources('node004', cs1)
+    n3 = OnNodeResources('node002', CoreSet([Core(3, {3})]))
+    nodes = [n1, n2, n3]
+    res = Resources(nodes)
+
+    for i, n in enumerate(res):
+        assert n == nodes[i]
+
+
+def test_resources_equals(r1):
+    assert r1 == Resources(
+            [OnNodeResources('node001', CoreSet([Core(0, {0, 1}), Core(1, {2, 3})]))])
+
+    r2 = Resources(
+            [OnNodeResources('node002', CoreSet([Core(0, {0, 1}), Core(1, {2, 3})]))])
+    assert r1 != r2
+
+    r3 = Resources(
+            [OnNodeResources(
+                'node001', CoreSet([Core(0, {0, 1}), Core(1, {1, 2, 3})]))])
+    assert r1 != r3
+
+    r4 = Resources([OnNodeResources('node001', CoreSet([Core(1, {1, 2})]))])
+    assert r1 != r4
+
+    r5 = Resources([
+            OnNodeResources('node001', CoreSet([Core(0, {0, 1}), Core(1, {2, 3})])),
+            OnNodeResources('node002', CoreSet([Core(0, {0, 1}), Core(1, {2, 3})]))
+            ])
+    assert r1 != r5
+
+
+def test_resources_copy(r1):
+    r2 = copy(r1)
+    assert id(r2._nodes['node001']) != id(r1._nodes['node001'])
+    assert id(r2._nodes['node001'].cpu_cores) != id(r1._nodes['node001'].cpu_cores)
+
+
+def test_resources_union_onto(r1):
+    r2 = Resources([])
+    r2 |= r1
+    assert r2 == r1
+
+    r3 = Resources([OnNodeResources('node002', CoreSet([Core(0, {0})]))])
+    r3 |= r1
+    assert len(r3._nodes) == 2
+    assert id(r3._nodes['node001']) != id(r1._nodes['node001'])
+    assert sorted(r3._nodes.keys()) == ['node001', 'node002']
+
+
+def test_resources_subtract(r1):
+    r2 = Resources([])
+    r2 -= r1
+    assert len(r2._nodes) == 0
+
+    r1 -= r2
+    assert len(r1._nodes) == 1
+
+    r3 = Resources([OnNodeResources('node001', CoreSet([Core(0, {0})]))])
+    r1 -= r3
+    assert len(r1._nodes) == 1
+    assert r1._nodes['node001'].cpu_cores._cores[0].hwthreads == {1}
+
+
+def test_resources_nodes():
+    r1 = Resources([
+        OnNodeResources('node001', CoreSet([Core(0, {0})])),
+        OnNodeResources('node003', CoreSet([Core(1, {1})])),
+        OnNodeResources('node004', CoreSet([Core(2, {2})]))])
+
+    assert sorted(r1.nodes()) == ['node001', 'node003', 'node004']
+
+
+def test_resources_total_cores():
+    r1 = Resources([
+        OnNodeResources('node001', CoreSet([Core(0, {0, 1})])),
+        OnNodeResources('node003', CoreSet([Core(1, {1}), Core(5, {5})])),
+        OnNodeResources('node004', CoreSet([Core(2, {2})]))])
+
+    assert r1.total_cores() == 4
+
+
+def test_resource_hwthreads(n1, r1):
+    hwthreads = list(r1.hwthreads())
+    assert hwthreads == [('node001', 0), ('node001', 1), ('node001', 2), ('node001', 3)]
+
+    n2 = OnNodeResources('node007', CoreSet([Core(7, {7}), Core(3, {3})]))
+    res = Resources([n1, n2])
+
+    hwthreads = list(res.hwthreads())
+    assert hwthreads == [
+            ('node001', 0), ('node001', 1), ('node001', 2), ('node001', 3),
+            ('node007', 7), ('node007', 3)]
+
+
+def test_resources_isdisjoint(r1):
+    r2 = Resources([])
+    assert r1.isdisjoint(r2)
+
+    r3 = Resources([OnNodeResources('node001', CoreSet([Core(0, {0})]))])
+    assert not r1.isdisjoint(r3)
+
+    r4 = Resources([OnNodeResources('node001', CoreSet([Core(0, {2})]))])
+    assert r1.isdisjoint(r4)
+
+    r5 = Resources([OnNodeResources('node002', CoreSet([Core(0, {0})]))])
+    assert r1.isdisjoint(r5)
+
+
+def test_resources_union(r1):
+    r2 = Resources([])
+    r3 = Resources([OnNodeResources('node001', CoreSet([Core(0, {0})]))])
+    r4 = Resources([OnNodeResources('node001', CoreSet([Core(0, {2})]))])
+    r5 = Resources([OnNodeResources('node002', CoreSet([Core(0, {0})]))])
+
+    assert Resources.union([r1, r2]) == r1
+    assert Resources.union([r1, r3]) == r1
+    assert Resources.union([r1, r4]) == Resources([
+        OnNodeResources('node001', CoreSet([Core(0, {0, 1, 2}), Core(1, {2, 3})]))])
+
+    assert Resources.union([r1, r5]) == Resources([
+        OnNodeResources('node001', CoreSet([Core(0, {0, 1}), Core(1, {2, 3})])),
+        OnNodeResources('node002', CoreSet([Core(0, {0})]))])
+
+    assert Resources.union([r1, r2, r3, r4, r5]) == Resources([
+        OnNodeResources('node001', CoreSet([Core(0, {0, 1, 2}), Core(1, {2, 3})])),
+        OnNodeResources('node002', CoreSet([Core(0, {0})]))])
diff --git a/libmuscle/python/libmuscle/test/conftest.py b/libmuscle/python/libmuscle/test/conftest.py
index 3215517f..b64a51e3 100644
--- a/libmuscle/python/libmuscle/test/conftest.py
+++ b/libmuscle/python/libmuscle/test/conftest.py
@@ -1,5 +1,6 @@
 from copy import copy
 import pytest
+from typing import Dict, List, Set, Union
 from unittest.mock import patch
 
 from ymmsl import Operator, Reference, Settings
@@ -8,6 +9,7 @@
 from libmuscle.communicator import Message
 from libmuscle.mcp.transport_client import ProfileData
 from libmuscle.mmp_client import MMPClient
+from libmuscle.planner.resources import Core, CoreSet, OnNodeResources, Resources
 from libmuscle.port import Port
 from libmuscle.profiler import Profiler
 from libmuscle.timestamp import Timestamp
@@ -101,10 +103,20 @@ def port_exists(name):
     return port_manager
 
 
-def frozenset_of(*args):
-    """Create a frozenset containing the arguments.
+def core(hwthread_id: int) -> Core:
+    """Helper that defines a core with the given core and hwthread id."""
+    return Core(hwthread_id, {hwthread_id})
 
-    This is a helper to shorten notation used in some of the planning and
-    launching-related tests.
-    """
-    return frozenset(args)
+
+def on_node_resources(node_name: str, cores: Union[int, Set[int]]) -> OnNodeResources:
+    """Helper that defines resources on a node from the name and a CPU core."""
+    if isinstance(cores, int):
+        cores = {cores}
+    return OnNodeResources(node_name, CoreSet([Core(core, {core}) for core in cores]))
+
+
+def resources(node_resources: Dict[str, List[Core]]) -> Resources:
+    """Helper that defines a Resources from a dict."""
+    return Resources([
+        OnNodeResources(node_name, CoreSet(cores))
+        for node_name, cores in node_resources.items()])
diff --git a/muscle3/muscle3.py b/muscle3/muscle3.py
index c4f39af1..b1373bc9 100644
--- a/muscle3/muscle3.py
+++ b/muscle3/muscle3.py
@@ -8,8 +8,8 @@
 from ymmsl import PartialConfiguration
 
 
-from libmuscle.planner.planner import (
-        Planner, Resources, InsufficientResourcesAvailable)
+from libmuscle.planner.planner import Planner, InsufficientResourcesAvailable
+from libmuscle.planner.resources import Core, CoreSet, OnNodeResources, Resources
 from libmuscle.snapshot_manager import SnapshotManager
 from muscle3.profiling import (
         plot_instances, plot_resources, plot_timeline, show_plots)
@@ -138,8 +138,9 @@ def resources(
         click.echo(_RESOURCES_INCOMPLETE_MODEL, err=True)
         sys.exit(1)
 
-    resources = Resources({
-        'node000001': {frozenset([r]) for r in range(cores_per_node)}})
+    resources = Resources([
+        OnNodeResources(
+            'node000001', CoreSet([Core(i, {i}) for i in range(cores_per_node)]))])
 
     planner = Planner(resources)
     try:

From c4ef9eba95d96c5fdb77453b133db0b7ea641152 Mon Sep 17 00:00:00 2001
From: Lourens Veen <l.veen@esciencecenter.nl>
Date: Mon, 30 Dec 2024 18:29:37 +0100
Subject: [PATCH 46/49] Add support for Intel MPI to the native instantiator

---
 integration_test/cluster_test/Makefile        |  6 +-
 integration_test/cluster_test/conftest.py     | 73 ++++++++++++-------
 .../cluster_test/double_intelmpi.sh           | 12 +++
 .../implementations_intelmpi.ymmsl            |  9 +++
 .../cluster_test/macro_micro_intelmpi.sh      | 12 +++
 integration_test/cluster_test/test_cluster.py |  4 +-
 integration_test/fake_cluster/Dockerfile      |  8 +-
 integration_test/fake_cluster/old.Dockerfile  |  8 +-
 .../native_instantiator/run_script.py         | 45 +++++++++++-
 scripts/gmake/check_tools.make                |  2 +
 10 files changed, 139 insertions(+), 40 deletions(-)
 create mode 100755 integration_test/cluster_test/double_intelmpi.sh
 create mode 100644 integration_test/cluster_test/implementations_intelmpi.ymmsl
 create mode 100755 integration_test/cluster_test/macro_micro_intelmpi.sh

diff --git a/integration_test/cluster_test/Makefile b/integration_test/cluster_test/Makefile
index 4ef1fd9e..44f5e012 100644
--- a/integration_test/cluster_test/Makefile
+++ b/integration_test/cluster_test/Makefile
@@ -1,5 +1,5 @@
 .PHONY: all
-all: component_openmpi
+all: component_$(MPI_TYPE)
 
 
 CXXFLAGS += $(shell pkg-config --cflags libmuscle_mpi ymmsl)
@@ -7,6 +7,6 @@ LDLIBS += $(shell pkg-config --libs libmuscle_mpi ymmsl)
 
 CXXFLAGS += -g
 
-component_openmpi: component.cpp
-	mpic++ -o $@ $(CXXFLAGS) $^ $(LDLIBS)
+component_$(MPI_TYPE): component.cpp
+	mpicxx -o $@ $(CXXFLAGS) $^ $(LDLIBS)
 
diff --git a/integration_test/cluster_test/conftest.py b/integration_test/cluster_test/conftest.py
index 97b7b255..c0a65131 100644
--- a/integration_test/cluster_test/conftest.py
+++ b/integration_test/cluster_test/conftest.py
@@ -21,7 +21,7 @@
     ]))
 
 # Shut down the containers after running the tests. Set to False to debug.
-CLEAN_UP_CONTAINERS = True
+CLEAN_UP_CONTAINERS = False
 
 
 skip_unless_cluster = pytest.mark.skipif(
@@ -223,44 +223,65 @@ def _install_muscle3_native_openmpi(
         f'make distclean && '
         f'PREFIX={prefix} make install"'))
 
-    return prefix, module_name
+    return 'openmpi', prefix, module_name
+
+
+def _install_muscle3_native_intelmpi(
+        remote_source, remote_term, remote_fs):
+    prefix = remote_fs / REMOTE_SHARED / 'muscle3-intelmpi'
+    prefix.mkdir()
+
+    module_name = 'intel-oneapi-mpi'
+
+    run_cmd(remote_term, 600, (
+        f'/bin/bash -l -c "'
+        f'module load {module_name} && '
+        f'cd {remote_source} && '
+        f'make distclean && '
+        f'PREFIX={prefix} make install"'))
+
+    return 'intelmpi', prefix, module_name
 
 
 def _install_muscle3(local_term, repo_root, remote_term, remote_fs, slurm_version):
     remote_source = _install_remote_source(
             local_term, repo_root, remote_fs, slurm_version)
     _create_muscle3_venv(remote_term, remote_source)
-    return _install_muscle3_native_openmpi(
+    openmpi_install = _install_muscle3_native_openmpi(
             remote_source, remote_term, remote_fs, slurm_version)
+    intelmpi_install = _install_muscle3_native_intelmpi(
+            remote_source, remote_term, remote_fs)
+    return openmpi_install, intelmpi_install
 
 
-def _install_tests(repo_root, remote_term, remote_fs, remote_m3_openmpi):
+def _install_tests(repo_root, remote_term, remote_fs, remote_m3_installs):
     remote_home = remote_fs / REMOTE_SHARED
-    remote_m3, openmpi_module = remote_m3_openmpi
 
-    cerulean.copy(
-            repo_root / 'integration_test' / 'cluster_test', remote_home,
-            copy_permissions=True)
+    for mpi_type, remote_m3, mpi_module in remote_m3_installs:
+        cerulean.copy(
+                repo_root / 'integration_test' / 'cluster_test', remote_home,
+                copy_permissions=True)
 
-    remote_source = remote_home / 'cluster_test'
+        remote_source = remote_home / 'cluster_test'
 
-    run_cmd(remote_term, 30, (
-        '/bin/bash -c "'
-        f'sed -i \\"s^modules: openmpi^modules: {openmpi_module}^\\"'
-        f' {remote_source}/implementations_openmpi.ymmsl'
-        '"'))
+        if mpi_type == 'openmpi':
+            run_cmd(remote_term, 30, (
+                '/bin/bash -c "'
+                f'sed -i \\"s^modules: openmpi^modules: {mpi_module}^\\"'
+                f' {remote_source}/implementations_openmpi.ymmsl'
+                '"'))
 
-    run_cmd(remote_term, 30, (
-        '/bin/bash -c "'
-        f'sed -i \\"s^modules: openmpi^modules: {openmpi_module}^\\"'
-        f' {remote_source}/implementations_srunmpi.ymmsl'
-        '"'))
+            run_cmd(remote_term, 30, (
+                '/bin/bash -c "'
+                f'sed -i \\"s^modules: openmpi^modules: {mpi_module}^\\"'
+                f' {remote_source}/implementations_srunmpi.ymmsl'
+                '"'))
 
-    run_cmd(remote_term, 30, (
-        f'/bin/bash -l -c "'
-        f'module load {openmpi_module} && '
-        f'. {remote_m3}/bin/muscle3.env && '
-        f'make -C {remote_source}"'))
+        run_cmd(remote_term, 30, (
+            f'/bin/bash -l -c "'
+            f'module load {mpi_module} && '
+            f'. {remote_m3}/bin/muscle3.env && '
+            f'make -C {remote_source} MPI_TYPE={mpi_type}"'))
 
 
 def _clean_up_base_cluster(local_term, slurm_version):
@@ -285,9 +306,9 @@ def installed_cluster(
 
     remote_term, remote_fs, headnode_port = _start_base_cluster(
             local_term, request.param, local_shared_dir)
-    remote_m3_openmpi = _install_muscle3(
+    remote_m3_installs = _install_muscle3(
             local_term, repo_root, remote_term, remote_fs, slurm_version)
-    _install_tests(repo_root, remote_term, remote_fs, remote_m3_openmpi)
+    _install_tests(repo_root, remote_term, remote_fs, remote_m3_installs)
 
     yield headnode_port
 
diff --git a/integration_test/cluster_test/double_intelmpi.sh b/integration_test/cluster_test/double_intelmpi.sh
new file mode 100755
index 00000000..e6e47859
--- /dev/null
+++ b/integration_test/cluster_test/double_intelmpi.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+set -e
+
+env
+
+source /home/cerulean/shared/venv/bin/activate
+
+CT=/home/cerulean/shared/cluster_test
+
+muscle_manager --log-level=DEBUG --start-all $CT/double.ymmsl $CT/settings.ymmsl $CT/implementations_intelmpi.ymmsl
+
diff --git a/integration_test/cluster_test/implementations_intelmpi.ymmsl b/integration_test/cluster_test/implementations_intelmpi.ymmsl
new file mode 100644
index 00000000..b216138d
--- /dev/null
+++ b/integration_test/cluster_test/implementations_intelmpi.ymmsl
@@ -0,0 +1,9 @@
+ymmsl_version: v0.1
+
+implementations:
+  component_cpp:
+    modules: intel-oneapi-mpi
+    env:
+        +LD_LIBRARY_PATH: /home/cerulean/shared/muscle3-intelmpi/lib
+    execution_model: intelmpi
+    executable: /home/cerulean/shared/cluster_test/component_intelmpi
diff --git a/integration_test/cluster_test/macro_micro_intelmpi.sh b/integration_test/cluster_test/macro_micro_intelmpi.sh
new file mode 100755
index 00000000..77bec53a
--- /dev/null
+++ b/integration_test/cluster_test/macro_micro_intelmpi.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+set -e
+
+env
+
+source /home/cerulean/shared/venv/bin/activate
+
+CT=/home/cerulean/shared/cluster_test
+
+muscle_manager --log-level=DEBUG --start-all $CT/macro_micro.ymmsl $CT/settings.ymmsl $CT/implementations_intelmpi.ymmsl
+
diff --git a/integration_test/cluster_test/test_cluster.py b/integration_test/cluster_test/test_cluster.py
index d8a52c67..81e02eaa 100644
--- a/integration_test/cluster_test/test_cluster.py
+++ b/integration_test/cluster_test/test_cluster.py
@@ -185,7 +185,7 @@ def test_multiple(
 
 @skip_unless_cluster
 @pytest.mark.parametrize('mode', ['local', 'slurm'])
-@pytest.mark.parametrize('execution_model', ['openmpi', 'srunmpi'])
+@pytest.mark.parametrize('execution_model', ['openmpi', 'intelmpi', 'srunmpi'])
 def test_double(
         fake_cluster, remote_test_files, remote_out_dir, hwthread_to_core,
         mode, execution_model):
@@ -219,7 +219,7 @@ def test_double(
 
 @skip_unless_cluster
 @pytest.mark.parametrize('mode', ['local', 'slurm'])
-@pytest.mark.parametrize('execution_model', ['openmpi', 'srunmpi'])
+@pytest.mark.parametrize('execution_model', ['openmpi', 'intelmpi', 'srunmpi'])
 def test_macro_micro(
         fake_cluster, remote_test_files, remote_out_dir, hwthread_to_core,
         mode, execution_model):
diff --git a/integration_test/fake_cluster/Dockerfile b/integration_test/fake_cluster/Dockerfile
index 25a85ebe..419ec852 100644
--- a/integration_test/fake_cluster/Dockerfile
+++ b/integration_test/fake_cluster/Dockerfile
@@ -43,13 +43,13 @@ RUN . /opt/spack/share/spack/setup-env.sh && \
     ^$(spack find --deps slurm@24-11 | grep pmix |  tr -d ' ') \
     ^$(spack find --format "slurm/{hash}" slurm@24-11)
 
-# RUN . /opt/spack/share/spack/setup-env.sh && \
-#     . $(spack location -i lmod)/lmod/lmod/init/bash && \
-#     spack install mpich+slurm pmi=pmix ^pmix@3.2.3
+RUN . /opt/spack/share/spack/setup-env.sh && \
+    . $(spack location -i lmod)/lmod/lmod/init/bash && \
+    spack install intel-oneapi-mpi@2021.14.0 target=zen2
 
 # RUN . /opt/spack/share/spack/setup-env.sh && \
 #     . $(spack location -i lmod)/lmod/lmod/init/bash && \
-#     spack install intel-oneapi-mpi ^pmix@3.2.3
+#     spack install mpich+slurm pmi=pmix ^pmix@3.2.3
 
 COPY integration_test/fake_cluster/cgroup.conf /etc/slurm/cgroup.conf
 
diff --git a/integration_test/fake_cluster/old.Dockerfile b/integration_test/fake_cluster/old.Dockerfile
index 700075c7..9da30db9 100644
--- a/integration_test/fake_cluster/old.Dockerfile
+++ b/integration_test/fake_cluster/old.Dockerfile
@@ -31,13 +31,13 @@ RUN . /opt/spack/share/spack/setup-env.sh && \
     ^$(spack find --deps slurm@20-02 | grep pmix |  tr -d ' ') \
     ^$(spack find --format "slurm/{hash}" slurm@20-02)
 
-# RUN . /opt/spack/share/spack/setup-env.sh && \
-#     . $(spack location -i lmod)/lmod/lmod/init/bash && \
-#     spack install mpich+slurm pmi=pmix ^pmix@3.2.3
+RUN . /opt/spack/share/spack/setup-env.sh && \
+    . $(spack location -i lmod)/lmod/lmod/init/bash && \
+    spack install intel-oneapi-mpi@2021.14.0
 
 # RUN . /opt/spack/share/spack/setup-env.sh && \
 #     . $(spack location -i lmod)/lmod/lmod/init/bash && \
-#     spack install intel-oneapi-mpi ^pmix@3.2.3
+#     spack install mpich+slurm pmi=pmix ^pmix@3.2.3
 
 # Disable ssh debug output
 RUN sed -i -e 's/^LogLevel DEBUG3$//' /etc/ssh/sshd_config
diff --git a/libmuscle/python/libmuscle/native_instantiator/run_script.py b/libmuscle/python/libmuscle/native_instantiator/run_script.py
index faa14a68..a2ec2cfd 100644
--- a/libmuscle/python/libmuscle/native_instantiator/run_script.py
+++ b/libmuscle/python/libmuscle/native_instantiator/run_script.py
@@ -59,15 +59,58 @@ def openmpi_prep_resources(resources: ResourceAssignment) -> Tuple[str, Dict[str
 def impi_prep_resources(resources: ResourceAssignment) -> Tuple[str, Dict[str, str]]:
     """Create resource description for Intel MPI mpirun
 
+    Intel MPI mpirun accepts either one core for each MPI process, or one hwthread. It
+    cannot bind a process to more than one explicitly specified core or hwthread the way
+    srun and OpenMPI can. At the moment, we bind each process to one core, and that's
+    what we do here as well, but this will become a problem for MPI+OpenMP codes. Those
+    can be pinned to sockets, NUMA domains or caches, which does make sense, so we'll
+    have to figure that out when we add support.
+
     Args:
         resources: The resource assignment to describe
 
     Return:
         The contents of the machinefile, and a set of environment variables
     """
+    env: Dict[str, str] = dict()
+    machine_nodes: List[str] = list()
+    pin_masks: List[int] = list()
+
+    for rank, res in enumerate(resources.by_rank):
+        machine_nodes.append(res.node_name)
+        pin_masks.append(sum((1 << c for c in res.hwthreads())))
+
+    # coalesce machine lines
+    proc_counts = [1] * len(machine_nodes)
+    i = 1
+    while i < len(machine_nodes):
+        if machine_nodes[i-1] == machine_nodes[i]:
+            del machine_nodes[i]
+            proc_counts[i-1] += proc_counts[i]
+            del proc_counts[i]
+        else:
+            i += 1
+
+    machinefile = '\n'.join(
+            (f'{m}:{c}' for m, c in zip(machine_nodes, proc_counts))) + '\n'
+
+    # disable pinning to SLURM-specified resources
+    # env['I_MPI_PIN_RESPECT_CPUSET'] = '0'
+    env['I_MPI_JOB_RESPECT_PROCESS_PLACEMENT'] = 'off'
+
+    # which cores to bind each rank to
+    pin_masks_str = ','.join(format(mask, '#x') for mask in pin_masks)
+    env['I_MPI_PIN_DOMAIN'] = f'[{pin_masks_str}]'
+
+    # I_MPI_PIN_DOMAIN=[55,aa]
+    # pins the first rank to 0,2,16,18 and the second to 1,3,17,19
     # I_MPI_PIN_PROCESSOR_LIST=0,1,5,6
     # pins rank 0 to core 0, rank 1 to core 1, rank 2 to core 5, rank 3 to core 6
-    raise NotImplementedError()
+    # machinefile:
+    # host1:2
+    # host2:4
+    # runs two processes on host1 and four on host2
+    return machinefile, env
 
 
 def mpich_prep_resources(resources: ResourceAssignment) -> Tuple[str, Dict[str, str]]:
diff --git a/scripts/gmake/check_tools.make b/scripts/gmake/check_tools.make
index 0adc8ff0..51113dab 100644
--- a/scripts/gmake/check_tools.make
+++ b/scripts/gmake/check_tools.make
@@ -67,6 +67,8 @@ tool_command := mpi$(CXX)
 include $(TOOLDIR)/detect_tool.make
 tool_command := mpic++
 include $(TOOLDIR)/detect_tool.make
+tool_command := mpicxx
+include $(TOOLDIR)/detect_tool.make
 
 ifndef MPICXX
     $(info - No MPI C++ compiler found! Maybe there's no MPI installed?)

From 4580b5dd1a733ed0f0be087979429f8386986c89 Mon Sep 17 00:00:00 2001
From: Lourens Veen <l.veen@esciencecenter.nl>
Date: Tue, 31 Dec 2024 18:03:51 +0100
Subject: [PATCH 47/49] Improve error reporting on Instantiator crash

---
 libmuscle/python/libmuscle/manager/instance_manager.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libmuscle/python/libmuscle/manager/instance_manager.py b/libmuscle/python/libmuscle/manager/instance_manager.py
index 23980903..6f4021e3 100644
--- a/libmuscle/python/libmuscle/manager/instance_manager.py
+++ b/libmuscle/python/libmuscle/manager/instance_manager.py
@@ -104,7 +104,7 @@ def __init__(
                 'Instantiator crashed. This should not happen, please file a bug'
                 ' report.')
             _logger.error(msg)
-            raise RuntimeError(msg)
+            raise RuntimeError(msg) from resources.exception
 
         self._planner = Planner(resources)
         self._num_running = 0

From 4d7525b9966d49e42eef007a7d5bf515e07c59dc Mon Sep 17 00:00:00 2001
From: Lourens Veen <l.veen@esciencecenter.nl>
Date: Wed, 1 Jan 2025 10:00:52 +0100
Subject: [PATCH 48/49] Enable IntelMPI debug output at log level debug

---
 .../libmuscle/native_instantiator/run_script.py    | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/libmuscle/python/libmuscle/native_instantiator/run_script.py b/libmuscle/python/libmuscle/native_instantiator/run_script.py
index a2ec2cfd..e566d123 100644
--- a/libmuscle/python/libmuscle/native_instantiator/run_script.py
+++ b/libmuscle/python/libmuscle/native_instantiator/run_script.py
@@ -306,9 +306,17 @@ def cluster_command(implementation: Implementation, enable_debug: bool) -> str:
         fstr = ' '.join(fargs)
 
     elif implementation.execution_model == ExecutionModel.INTELMPI:
-        fstr = (
-                'mpirun -n $MUSCLE_MPI_PROCESSES -machinefile $MUSCLE_RANKFILE'
-                ' {command} {args}')
+        fargs = [
+                'mpirun -n $MUSCLE_MPI_PROCESSES',
+                '-machinefile $MUSCLE_RANKFILE']
+
+        if enable_debug:
+            fargs.append('-genv I_MPI_DEBUG=4')
+
+        fargs.append('{command} {args}')
+
+        fstr = ' '.join(fargs)
+
     elif implementation.execution_model == ExecutionModel.SRUNMPI:
         fargs = ['srun -n $MUSCLE_MPI_PROCESSES -m arbitrary']
 

From 5b0ea5c98e4297729868bc184e2a387d03d85378 Mon Sep 17 00:00:00 2001
From: Lourens Veen <l.veen@esciencecenter.nl>
Date: Wed, 1 Jan 2025 10:01:54 +0100
Subject: [PATCH 49/49] Log resources more compactly at info level

---
 libmuscle/python/libmuscle/manager/instance_manager.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libmuscle/python/libmuscle/manager/instance_manager.py b/libmuscle/python/libmuscle/manager/instance_manager.py
index 6f4021e3..51a7a67f 100644
--- a/libmuscle/python/libmuscle/manager/instance_manager.py
+++ b/libmuscle/python/libmuscle/manager/instance_manager.py
@@ -124,7 +124,7 @@ def start_all(self) -> None:
         """Starts all the instances of the model."""
         self._allocations = self._planner.allocate_all(self._configuration)
         for instance, resources in self._allocations.items():
-            _logger.info(f'Planned {instance} on {resources}')
+            _logger.info(f'Planned {instance} on {resources.as_resources()}')
 
         components = {c.name: c for c in self._configuration.model.components}
         for instance, resources in self._allocations.items():
@@ -147,7 +147,7 @@ def start_all(self) -> None:
                     instance, implementation,
                     self._configuration.resources[component.name],
                     resources, idir, workdir, stdout_path, stderr_path)
-            _logger.info(f'Instantiating {instance} on {resources}')
+            _logger.info(f'Instantiating {instance}')
             self._requests_out.put(request)
             self._num_running += 1