equinor · xjules · Jan 28, 2025 · Jan 20, 2025 · Jan 20, 2025 · Jan 20, 2025
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
@@ -12,7 +12,7 @@ _Short description of the approach_
 - [ ] Added appropriate release note label
 - [ ] Commit history is consistent and clean, in line with the [contribution guidelines](https://github.com/equinor/ert/blob/main/CONTRIBUTING.md).
 - [ ] Make sure unit tests pass locally after every commit (`git rebase -i main
-      --exec 'pytest tests/ert/unit_tests -n auto --hypothesis-profile=fast -m "not integration_test"'`)
+      --exec 'pytest tests/ert/unit_tests tests/everest -n auto --hypothesis-profile=fast -m "not integration_test"'`)
 
 ## When applicable
 - [ ] **When there are user facing changes**: Updated documentation

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
@@ -33,7 +33,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        test-type: [ 'performance-tests', 'unit-tests', 'gui-tests', 'cli-tests' ]
+        test-type: [ 'performance-and-unit-tests', 'gui-tests', 'cli-tests' ]
         python-version: [ '3.11', '3.12' ]
         os: [ ubuntu-latest ]
     uses: ./.github/workflows/test_ert.yml
@@ -47,7 +47,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        test-type: [ 'test', 'integration-test', 'everest-models-test', 'doc']
+        test-type: [ 'test', 'everest-models-test', 'doc']
         python-version: [ '3.11', '3.12' ]
         os: [ ubuntu-latest ]
         exclude:
@@ -66,7 +66,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [ ubuntu-latest ]
-        python-version: [ '3.11', '3.12' ]
+        python-version: [ '3.11' ]
     uses: ./.github/workflows/test_ert_with_slurm.yml
     with:
       os: ${{ matrix.os }}
@@ -78,7 +78,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [ ubuntu-latest ]
-        python-version: [ '3.11', '3.12' ]
+        python-version: [ '3.11' ]
     uses: ./.github/workflows/test_ert_with_flow.yml
     with:
       os: ${{ matrix.os }}
@@ -90,7 +90,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        test-type: [ 'test', 'integration-test', 'everest-models-test' ]
+        test-type: [ 'test', 'everest-models-test' ]
         os: [ 'macos-latest' ]
         python-version: [ '3.12' ]
 
@@ -106,7 +106,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        test-type: [ 'performance-tests', 'unit-tests', 'gui-tests', 'cli-tests' ]
+        test-type: [ 'performance-and-unit-tests', 'gui-tests', 'cli-tests' ]
         python-version: [ '3.12' ]
         os: [ 'macos-latest']
 
@@ -115,6 +115,7 @@ jobs:
       os: ${{ matrix.os }}
       python-version: ${{ matrix.python-version }}
       test-type: ${{ matrix.test-type }}
+      select-string: '"not skip_mac_ci"'
     secrets: inherit
 
   docs-ert:

diff --git a/.github/workflows/run_ert_test_data_setups.yml b/.github/workflows/run_ert_test_data_setups.yml
@@ -37,12 +37,6 @@ jobs:
       with:
         python-version: ${{ matrix.python-version }}
 
-    - name: Install Ubuntu dependencies
-      if: matrix.os == 'ubuntu-latest'
-      run: |
-        sudo apt-get update
-        sudo apt-get install xvfb libxcb-image0 libxcb-icccm4 libxcb-keysyms1 libxcb-randr0 libxcb-render0 libxcb-render-util0 libxcb-shape0 libxcb-shm0 libxcb-xfixes0 libxcb-xinerama0 libfontconfig1 libxcb-xkb1 libxkbcommon-x11-0 libdbus-1-3
-
     - name: Install uv
       uses: astral-sh/setup-uv@v5
 

diff --git a/.github/workflows/test_ert.yml b/.github/workflows/test_ert.yml
@@ -7,6 +7,9 @@ on:
         type: string
       test-type:
         type: string
+      select-string:
+        type: string
+        default: "''"
 
 env:
   ERT_SHOW_BACKTRACE: 1
@@ -40,7 +43,7 @@ jobs:
     - name: GUI Test
       if: inputs.test-type == 'gui-tests'
       run: |
-        pytest --cov=ert --cov=everest --cov=_ert --cov-report=xml:cov1.xml --junit-xml=junit.xml -o junit_family=legacy -v --mpl --benchmark-disable tests/ert/ui_tests/gui --durations=25
+        pytest -m ${{ inputs.select-string }} --cov=ert --cov=everest --cov=_ert --cov-report=xml:cov1.xml --junit-xml=junit.xml -o junit_family=legacy -v --mpl --benchmark-disable tests/ert/ui_tests/gui --durations=25
 
     - name: Upload artifact images
       uses: actions/upload-artifact@v4
@@ -53,19 +56,14 @@ jobs:
     - name: CLI Test
       if: inputs.test-type == 'cli-tests'
       run: |
-        pytest --cov=ert --cov=everest --cov=_ert --cov-report=xml:cov1.xml --junit-xml=junit.xml -o junit_family=legacy -v --benchmark-disable  --dist loadgroup tests/ert/ui_tests/cli --durations=25
+        pytest -m ${{ inputs.select-string }} --cov=ert --cov=everest --cov=_ert --cov-report=xml:cov1.xml --junit-xml=junit.xml -o junit_family=legacy -v --benchmark-disable  --dist loadgroup tests/ert/ui_tests/cli --durations=25
 
     - name: Unit Test
-      if: inputs.test-type == 'unit-tests'
+      if: inputs.test-type == 'performance-and-unit-tests'
       run: |
-        pytest --cov=ert --cov=everest --cov=_ert --cov-report=xml:cov1.xml --junit-xml=junit.xml -o junit_family=legacy -n logical --show-capture=stderr -v --benchmark-disable --mpl --dist loadgroup tests/ert/unit_tests --durations=25
+        pytest -m ${{ inputs.select-string }} --cov=ert --cov=everest --cov=_ert --cov-report=xml:cov1.xml --junit-xml=junit.xml -o junit_family=legacy -n logical --show-capture=stderr -v --benchmark-disable --mpl --dist loadgroup tests/ert/unit_tests tests/ert/performance_tests --durations=25
         pytest --doctest-modules --cov=ert --cov=everest --cov=_ert --cov-report=xml:cov2.xml src/ --ignore src/ert/dark_storage
 
-    - name: Performance Test
-      if: inputs.test-type == 'performance-tests'
-      run: |
-        pytest --cov=ert --cov=everest --cov=_ert --cov-report=xml:cov1.xml --junit-xml=junit.xml -o junit_family=legacy -n logical --show-capture=stderr -v --benchmark-disable  --dist loadgroup tests/ert/performance_tests --durations=25
-
     - name: Upload coverage to Codecov
       id: codecov1
       uses: codecov/codecov-action@v5

diff --git a/.github/workflows/test_ert_with_slurm.yml b/.github/workflows/test_ert_with_slurm.yml
@@ -13,7 +13,7 @@ jobs:
   test-ert-on-slurm:
     name: Run ert tests
     timeout-minutes: 20
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-24.04
 
     steps:
     - uses: actions/checkout@v4
@@ -34,13 +34,20 @@ jobs:
       run: |
         set -e
 
-        sudo apt install slurmd slurmctld -y
+        sudo apt install libpmix-dev slurmd slurmctld -y
+
+        sudo mkdir /var/spool/slurm
+        sudo chown slurm /var/spool/slurm
 
         cat <<EOF > slurm.conf
         ClusterName=localcluster
+        SlurmUser=slurm
         SlurmctldHost=localhost
-        SelectType=select/cons_res  # Select nodes based on consumable resources
+        SchedulerType=sched/builtin  # Avoid default backfill scheduler which adds delays
+        SelectType=select/cons_tres  # Select nodes based on consumable resources
         SelectTypeParameters=CR_Core  # Cores are the consumable resource
+        StateSaveLocation=/var/spool/slurm
+        PriorityType=priority/basic  # Tests depend on FIFO scheduling
         ProctrackType=proctrack/linuxproc  # Use /proc to track processes
         PartitionName=LocalQ Nodes=ALL Default=YES MaxTime=INFINITE State=UP
         PartitionName=AlternativeQ Nodes=ALL Default=YES MaxTime=INFINITE State=UP
@@ -54,9 +61,13 @@ jobs:
         sudo mv slurm.conf /etc/slurm/
         sudo systemctl start slurmd  # The compute node slurm daemon
         sudo systemctl start slurmctld  # The slurm controller daemon
+        sleep 1
+        systemctl status slurmd
+        systemctl status slurmctld
 
         # Show partition and node information configured:
         sinfo
+        scontrol show nodes
 
     - name: Verify slurm cluster works
       # Timeout is set low to catch a misconfigured cluster where srun will hang.
@@ -69,7 +80,10 @@ jobs:
       run: |
         set -e
         export _ERT_TESTS_ALTERNATIVE_QUEUE=AlternativeQ
-        pytest tests/ert/unit_tests/scheduler --slurm
+        pytest tests/ert/unit_tests/scheduler/test_{generic,slurm}_driver.py -sv --slurm \
+          -n 8 --durations=10 -k "not (LsfDriver or LocalDriver or OpenPBSDriver)"
+        scontrol show job
+
         pytest tests/ert/ui_tests/cli/test_missing_runpath.py --slurm
 
     - name: Test poly-example on slurm

diff --git a/.github/workflows/test_everest.yml b/.github/workflows/test_everest.yml
@@ -41,22 +41,12 @@ jobs:
     - name: Run Tests Linux
       if: ${{ inputs.test-type == 'test' && runner.os != 'macOS'}}
       run: |
-        pytest tests/everest -n 4 --cov=ert --cov=everest --cov=_ert --cov-report=xml:cov1.xml --junit-xml=junit.xml -o junit_family=legacy -m "not integration_test" --dist loadgroup -sv
+        pytest tests/everest -n 4 --cov=ert --cov=everest --cov=_ert --cov-report=xml:cov1.xml --junit-xml=junit.xml -o junit_family=legacy --dist loadgroup -sv
 
     - name: Run Tests macOS
       if: ${{ inputs.test-type == 'test' && runner.os == 'macOS'}}
       run: |
-        python -m pytest tests/everest -n 4 --cov=ert --cov=everest --cov=_ert --cov-report=xml:cov1.xml --junit-xml=junit.xml -o junit_family=legacy -m "not integration_test and not fails_on_macos_github_workflow" --dist loadgroup -sv
-
-    - name: Run Integration Tests Linux
-      if: ${{inputs.test-type == 'integration-test' && runner.os != 'macOS'}}
-      run: |
-        pytest tests/everest -n 4 --cov=ert --cov=everest --cov=_ert --cov-report=xml:cov1.xml --junit-xml=junit.xml -o junit_family=legacy -m "integration_test" --dist loadgroup -sv
-
-    - name: Run Integration Tests macOS
-      if: ${{inputs.test-type == 'integration-test' && runner.os == 'macOS'}}
-      run: |
-        python -m pytest tests/everest -n 4 --cov=ert --cov=everest --cov=_ert --cov-report=xml:cov1.xml --junit-xml=junit.xml -o junit_family=legacy -m "integration_test and not fails_on_macos_github_workflow" --dist loadgroup
+        python -m pytest tests/everest -n 4 --cov=ert --cov=everest --cov=_ert --cov-report=xml:cov1.xml --junit-xml=junit.xml -o junit_family=legacy -m "not skip_mac_ci" --dist loadgroup -sv
 
     - name: Build Documentation
       if: inputs.test-type == 'doc'

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,3 +1,4 @@
+default_install_hook_types: [pre-commit]
 repos:
 -   repo: https://github.com/pre-commit/pre-commit-hooks
     rev: v5.0.0
@@ -16,3 +17,14 @@ repos:
     - id: ruff
       args: [ --fix ]
     - id: ruff-format
+
+- repo: local
+  hooks:
+  - id: pytest
+    name: pytest
+    entry: pytest
+    args: [-n, auto, tests/ert/unit_tests, tests/everest, --hypothesis-profile=fast, -m, not integration_test]
+    language: system
+    types: [python]
+    stages: [pre-push]
+    pass_filenames: false
diff --git a/README.md b/README.md
@@ -77,7 +77,7 @@ There are many kinds of tests in the `tests` directory, while iterating on your
 code you can run a fast subset of the tests with
 
 ```sh
-pytest -n auto --hypothesis-profile=fast tests/ert/unit_tests -m "not integration_test"
+pytest -n auto --hypothesis-profile=fast tests/ert/unit_tests tests/everest -m "not integration_test"
 ```
 
 [Git LFS](https://git-lfs.com/) must be installed to get all the files. This is
@@ -124,6 +124,14 @@ $ pip install pre-commit
 $ pre-commit install
 ```
 
+There is also a pre-push hook configured in `pre-commit` to run a collection of
+relatively fast tests, to install this hook:
+
+```sh
+$ pre-commit install --hook-type pre-push
+```
+
+
 ### Trouble with setup
 
 As a simple test of your `ert` installation, you may try to run one of the

diff --git a/justfile b/justfile
@@ -10,4 +10,14 @@ snake_oil:
 
 # execute rapid unittests
 rapid-tests:
-    nice pytest -n auto tests/ert/unit_tests --hypothesis-profile=fast -m "not integration_test"
+    nice pytest -n auto tests/ert/unit_tests tests/everest --hypothesis-profile=fast -m "not integration_test"
+
+check-all:
+    mypy src/ert src/everest
+    pre-commit run --all-files
+    pytest tests/everest -n 4 -m everest_models_test --dist loadgroup
+    pytest tests/everest -n 4 -m integration_test --dist loadgroup
+    pytest tests/ert/ui_tests/ --mpl --dist loadgroup
+    pytest tests/ert/unit_tests/ -n 4 --dist loadgroup
+    pytest --doctest-modules src/ --ignore src/ert/dark_storage
+    pytest tests/ert/performance_tests --benchmark-disable --dist loadgroup
diff --git a/pyproject.toml b/pyproject.toml
@@ -171,7 +171,7 @@ markers = [
     "slow",
     "everest_models_test",
     "integration_test",
-    "fails_on_macos_github_workflow", # Tests marked fail due to gui-related issues
+    "skip_mac_ci",
 ]
 log_cli = "false"
 asyncio_mode = "auto"

diff --git a/src/_ert/async_utils.py b/src/_ert/async_utils.py
diff --git a/src/_ert/forward_model_runner/client.py b/src/_ert/forward_model_runner/client.py
@@ -18,6 +18,8 @@ class ClientConnectionError(Exception):
 CONNECT_MSG = b"CONNECT"
 DISCONNECT_MSG = b"DISCONNECT"
 ACK_MSG = b"ACK"
+HEARTBEAT_MSG = b"BEAT"
+HEARTBEAT_TIMEOUT = 5.0
 
 
 class Client:
@@ -83,7 +85,7 @@ async def connect(self) -> None:
         await self._term_receiver_task()
         self._receiver_task = asyncio.create_task(self._receiver())
         try:
-            await self.send(CONNECT_MSG, retries=1)
+            await self.send(CONNECT_MSG)
         except ClientConnectionError:
             await self._term_receiver_task()
             self.term()
@@ -93,11 +95,23 @@ async def process_message(self, msg: str) -> None:
         raise NotImplementedError("Only monitor can receive messages!")
 
     async def _receiver(self) -> None:
+        last_heartbeat_time: float | None = None
         while True:
             try:
                 _, raw_msg = await self.socket.recv_multipart()
                 if raw_msg == ACK_MSG:
                     self._ack_event.set()
+                elif raw_msg == HEARTBEAT_MSG:
+                    if (
+                        last_heartbeat_time
+                        and (asyncio.get_running_loop().time() - last_heartbeat_time)
+                        > 2 * HEARTBEAT_TIMEOUT
+                    ):
+                        await self.socket.send_multipart([b"", CONNECT_MSG])
+                        logger.warning(
+                            f"{self.dealer_id} heartbeat failed - reconnecting."
+                        )
+                    last_heartbeat_time = asyncio.get_running_loop().time()
                 else:
                     await self.process_message(raw_msg.decode("utf-8"))
             except zmq.ZMQError as exc:
@@ -144,5 +158,5 @@ async def send(self, message: str | bytes, retries: int | None = None) -> None:
                 self.socket.connect(self.url)
                 backoff = min(backoff * 2, 10)  # Exponential backoff
         raise ClientConnectionError(
-            f"{self.dealer_id} Failed to send {message!r} after retries!"
+            f"{self.dealer_id} Failed to send {message!r} to {self.url} after retries!"
         )