From 72d623a9af26ef271d6ea24dfa6f611c97e1dfb8 Mon Sep 17 00:00:00 2001 From: Irfan Alibay Date: Thu, 5 Dec 2024 15:18:12 +0000 Subject: [PATCH] Compute selection: deviceIndex & enforce 1 thread in vacuum (#752) * Fix openmm compute platform selection for issues 739 and 704 * Add rever entry * fix typing * Remove erroneous extra file * fix gufe keys * Update news/compute_selection_fixes.rst Co-authored-by: Mike Henry <11765982+mikemhenry@users.noreply.github.com> * Update omm_compute.py * import os * NETCDF3_64BIT is now NETCDF3_64BIT_OFFSET --------- Co-authored-by: Mike Henry <11765982+mikemhenry@users.noreply.github.com> --- news/compute_selection_fixes.rst | 26 ++++++++++++++ openfe/protocols/openmm_afe/base.py | 28 +++++++++++---- .../protocols/openmm_md/plain_md_methods.py | 10 +++--- .../openmm_rfe/_rfe_utils/__init__.py | 1 - .../protocols/openmm_rfe/equil_rfe_methods.py | 12 ++++--- .../omm_compute.py} | 34 ++++++++++++++++--- openfe/protocols/openmm_utils/omm_settings.py | 12 +++++++ .../test_openmm_equil_rfe_protocols.py | 6 ++-- .../tests/protocols/test_rfe_tokenization.py | 11 ++++-- .../test_solvation_afe_tokenization.py | 22 +++++++++--- openfe/utils/handle_trajectories.py | 2 +- 11 files changed, 133 insertions(+), 31 deletions(-) create mode 100644 news/compute_selection_fixes.rst rename openfe/protocols/{openmm_rfe/_rfe_utils/compute.py => openmm_utils/omm_compute.py} (55%) diff --git a/news/compute_selection_fixes.rst b/news/compute_selection_fixes.rst new file mode 100644 index 000000000..6ab5f6c7c --- /dev/null +++ b/news/compute_selection_fixes.rst @@ -0,0 +1,26 @@ +**Added:** + +* OpenMMEngineSettings now has a `gpu_device_index` attribute + allowing users to pass through a list of ints to select the + GPU devices to run their simulations on. + +**Changed:** + +* `openfe.protocols.openmm_rfe._rfe_utils.compute` has been moved + to `openfe.protocols.openmm_utils.omm_compute`. + +**Deprecated:** + +* + +**Removed:** + +* + +**Fixed:** + +* OpenMM CPU vacuum calculations now enforce the use of a single CPU to avoid large performance losses. + +**Security:** + +* diff --git a/openfe/protocols/openmm_afe/base.py b/openfe/protocols/openmm_afe/base.py index 4e201d220..633ec884a 100644 --- a/openfe/protocols/openmm_afe/base.py +++ b/openfe/protocols/openmm_afe/base.py @@ -58,12 +58,13 @@ MultiStateSimulationSettings, OpenMMEngineSettings, IntegratorSettings, LambdaSettings, MultiStateOutputSettings, ThermoSettings, OpenFFPartialChargeSettings, + OpenMMSystemGeneratorFFSettings, ) -from openfe.protocols.openmm_rfe._rfe_utils import compute from openfe.protocols.openmm_md.plain_md_methods import PlainMDProtocolUnit from ..openmm_utils import ( settings_validation, system_creation, - multistate_analysis, charge_generation + multistate_analysis, charge_generation, + omm_compute, ) from openfe.utils import without_oechem_backend @@ -175,6 +176,7 @@ def _pre_equilibrate( settings : dict[str, SettingsBaseModel] A dictionary of settings objects. Expects the following entries: + * `forcefield_settings` * `engine_settings` * `thermo_settings` * `integrator_settings` @@ -189,8 +191,12 @@ def _pre_equilibrate( Equilibrated system positions """ # Prep the simulation object - platform = compute.get_openmm_platform( - settings['engine_settings'].compute_platform + # Restrict CPU count if running vacuum simulation + restrict_cpu = settings['forcefield_settings'].nonbonded_method.lower() == 'nocutoff' + platform = omm_compute.get_openmm_platform( + platform_name=settings['engine_settings'].compute_platform, + gpu_device_index=settings['engine_settings'].gpu_device_index, + restrict_cpu_count=restrict_cpu ) integrator = openmm.LangevinMiddleIntegrator( @@ -710,6 +716,7 @@ def _get_reporter( def _get_ctx_caches( self, + forcefield_settings: OpenMMSystemGeneratorFFSettings, engine_settings: OpenMMEngineSettings ) -> tuple[openmmtools.cache.ContextCache, openmmtools.cache.ContextCache]: """ @@ -717,7 +724,8 @@ def _get_ctx_caches( Parameters ---------- - engine_settings : OpenMMEngineSettings, + forcefield_settings: OpenMMSystemGeneratorFFSettings + engine_settings : OpenMMEngineSettings Returns ------- @@ -726,8 +734,13 @@ def _get_ctx_caches( sampler_context_cache : openmmtools.cache.ContextCache The sampler state context cache. """ - platform = compute.get_openmm_platform( - engine_settings.compute_platform, + # Get the compute platform + # Set the number of CPUs to 1 if running a vacuum simulation + restrict_cpu = forcefield_settings.nonbonded_method.lower() == 'nocutoff' + platform = omm_compute.get_openmm_platform( + platform_name=engine_settings.compute_platform, + gpu_device_index=engine_settings.gpu_device_index, + restrict_cpu_count=restrict_cpu ) energy_context_cache = openmmtools.cache.ContextCache( @@ -1026,6 +1039,7 @@ def run(self, dry=False, verbose=True, try: # 12. Get context caches energy_ctx_cache, sampler_ctx_cache = self._get_ctx_caches( + settings['forcefield_settings'], settings['engine_settings'] ) diff --git a/openfe/protocols/openmm_md/plain_md_methods.py b/openfe/protocols/openmm_md/plain_md_methods.py index 1ac147714..25ae1953b 100644 --- a/openfe/protocols/openmm_md/plain_md_methods.py +++ b/openfe/protocols/openmm_md/plain_md_methods.py @@ -43,10 +43,9 @@ ) from openff.toolkit.topology import Molecule as OFFMolecule -from openfe.protocols.openmm_rfe._rfe_utils import compute from openfe.protocols.openmm_utils import ( system_validation, settings_validation, system_creation, - charge_generation, + charge_generation, omm_compute ) logger = logging.getLogger(__name__) @@ -623,8 +622,11 @@ def run(self, *, dry=False, verbose=True, ) # 10. Get platform - platform = compute.get_openmm_platform( - protocol_settings.engine_settings.compute_platform + restrict_cpu = forcefield_settings.nonbonded_method.lower() == 'nocutoff' + platform = omm_compute.get_openmm_platform( + platform_name=protocol_settings.engine_settings.compute_platform, + gpu_device_index=protocol_settings.engine_settings.gpu_device_index, + restrict_cpu_count=restrict_cpu ) # 11. Set the integrator diff --git a/openfe/protocols/openmm_rfe/_rfe_utils/__init__.py b/openfe/protocols/openmm_rfe/_rfe_utils/__init__.py index 81fb48a67..b8cca820d 100644 --- a/openfe/protocols/openmm_rfe/_rfe_utils/__init__.py +++ b/openfe/protocols/openmm_rfe/_rfe_utils/__init__.py @@ -1,5 +1,4 @@ from . import ( - compute, lambdaprotocol, multistate, relative, diff --git a/openfe/protocols/openmm_rfe/equil_rfe_methods.py b/openfe/protocols/openmm_rfe/equil_rfe_methods.py index ae3f19619..2b80370e0 100644 --- a/openfe/protocols/openmm_rfe/equil_rfe_methods.py +++ b/openfe/protocols/openmm_rfe/equil_rfe_methods.py @@ -61,7 +61,7 @@ ) from ..openmm_utils import ( system_validation, settings_validation, system_creation, - multistate_analysis, charge_generation + multistate_analysis, charge_generation, omm_compute, ) from . import _rfe_utils from ...utils import without_oechem_backend, log_system_probe @@ -933,9 +933,13 @@ def run(self, *, dry=False, verbose=True, bfactors=bfactors, ) - # 10. Get platform - platform = _rfe_utils.compute.get_openmm_platform( - protocol_settings.engine_settings.compute_platform + # 10. Get compute platform + # restrict to a single CPU if running vacuum + restrict_cpu = forcefield_settings.nonbonded_method.lower() == 'nocutoff' + platform = omm_compute.get_openmm_platform( + platform_name=protocol_settings.engine_settings.compute_platform, + gpu_device_index=protocol_settings.engine_settings.gpu_device_index, + restrict_cpu_count=restrict_cpu ) # 11. Set the integrator diff --git a/openfe/protocols/openmm_rfe/_rfe_utils/compute.py b/openfe/protocols/openmm_utils/omm_compute.py similarity index 55% rename from openfe/protocols/openmm_rfe/_rfe_utils/compute.py rename to openfe/protocols/openmm_utils/omm_compute.py index b3bee28f6..af48a96a6 100644 --- a/openfe/protocols/openmm_rfe/_rfe_utils/compute.py +++ b/openfe/protocols/openmm_utils/omm_compute.py @@ -1,23 +1,40 @@ # This code is part of OpenFE and is licensed under the MIT license. # For details, see https://github.com/OpenFreeEnergy/openfe # Adapted Perses' perses.app.setup_relative_calculation.get_openmm_platform +from typing import Optional import warnings import logging +import os logger = logging.getLogger(__name__) -def get_openmm_platform(platform_name=None): +def get_openmm_platform( + platform_name: Optional[str] = None, + gpu_device_index: Optional[list[int]] = None, + restrict_cpu_count: bool = False +): """ Return OpenMM's platform object based on given name. Setting to mixed precision if using CUDA or OpenCL. Parameters ---------- - platform_name : str, optional, default=None + platform_name : Optional[str] String with the platform name. If None, it will use the fastest platform supporting mixed precision. + Default ``None``. + gpu_device_index : Optional[list[str]] + GPU device index selection. If ``None`` the default OpenMM + GPU selection will be used. + See the `OpenMM platform properties documentation `_ + for more details. + Default ``None``. + restrict_cpu_count : bool + Optional hint to restrict the CPU count to 1 when + ``platform_name`` is CPU. This allows Protocols to ensure + that no large performance in cases like vacuum simulations. Returns ------- @@ -44,16 +61,23 @@ def get_openmm_platform(platform_name=None): # Set precision and properties name = platform.getName() if name in ['CUDA', 'OpenCL']: - platform.setPropertyDefaultValue( - 'Precision', 'mixed') + platform.setPropertyDefaultValue('Precision', 'mixed') + if gpu_device_index is not None: + index_list = ','.join(str(i) for i in gpu_device_index) + platform.setPropertyDefaultValue('DeviceIndex', index_list) + if name == 'CUDA': platform.setPropertyDefaultValue( 'DeterministicForces', 'true') if name != 'CUDA': - wmsg = (f"Non-GPU platform selected: {name}, this may significantly " + wmsg = (f"Non-CUDA platform selected: {name}, this may significantly " "impact simulation performance") warnings.warn(wmsg) logging.warning(wmsg) + if name == 'CPU' and restrict_cpu_count: + threads = os.getenv("OPENMM_CPU_THREADS", '1') + platform.setPropertyDefaultValue('Threads', threads) + return platform diff --git a/openfe/protocols/openmm_utils/omm_settings.py b/openfe/protocols/openmm_utils/omm_settings.py index d52db9b41..63cb5789c 100644 --- a/openfe/protocols/openmm_utils/omm_settings.py +++ b/openfe/protocols/openmm_utils/omm_settings.py @@ -311,6 +311,18 @@ class OpenMMEngineSettings(SettingsBaseModel): OpenMM compute platform to perform MD integration with. If ``None``, will choose fastest available platform. Default ``None``. """ + gpu_device_index: Optional[list[int]] = None + """ + List of integer indices for the GPU device to select when + ``compute_platform`` is either set to ``CUDA`` or ``OpenCL``. + + If ``None``, the default OpenMM GPU selection behaviour is used. + + See the `OpenMM platform properties documentation `_ + for more details. + + Default ``None``. + """ class IntegratorSettings(SettingsBaseModel): diff --git a/openfe/tests/protocols/test_openmm_equil_rfe_protocols.py b/openfe/tests/protocols/test_openmm_equil_rfe_protocols.py index a0dcb3b66..9b3183585 100644 --- a/openfe/tests/protocols/test_openmm_equil_rfe_protocols.py +++ b/openfe/tests/protocols/test_openmm_equil_rfe_protocols.py @@ -38,15 +38,15 @@ from openfe.protocols.openmm_rfe.equil_rfe_methods import ( _validate_alchemical_components, _get_alchemical_charge_difference ) -from openfe.protocols.openmm_utils import system_creation +from openfe.protocols.openmm_utils import system_creation, omm_compute from openfe.protocols.openmm_utils.charge_generation import ( HAS_NAGL, HAS_OPENEYE, HAS_ESPALOMA ) def test_compute_platform_warn(): - with pytest.warns(UserWarning, match="Non-GPU platform selected: CPU"): - openmm_rfe._rfe_utils.compute.get_openmm_platform('CPU') + with pytest.warns(UserWarning, match="Non-CUDA platform selected: CPU"): + omm_compute.get_openmm_platform('CPU') def test_append_topology(benzene_complex_system, toluene_complex_system): diff --git a/openfe/tests/protocols/test_rfe_tokenization.py b/openfe/tests/protocols/test_rfe_tokenization.py index 07d6c3c4b..1e765ca0d 100644 --- a/openfe/tests/protocols/test_rfe_tokenization.py +++ b/openfe/tests/protocols/test_rfe_tokenization.py @@ -39,13 +39,20 @@ def instance(self): class TestRelativeHybridTopologyProtocol(GufeTokenizableTestsMixin): cls = openmm_rfe.RelativeHybridTopologyProtocol - key = "RelativeHybridTopologyProtocol-fbc7c8ac0f58808ad4430a155453932f" - repr = f"<{key}>" + key = None + repr = "" + key = None + repr = "AbsoluteSolvationProtocol-" @pytest.fixture() def instance(self, protocol): return protocol + def test_repr(self, instance): + """ + Overwrites the base `test_repr` call. + """ + assert isinstance(repr(instance), str) + assert self.repr in repr(instance) + class TestAbsoluteSolvationSolventUnit(GufeTokenizableTestsMixin): cls = openmm_afe.AbsoluteSolvationSolventUnit @@ -93,9 +100,16 @@ def test_repr(self, instance): class TestAbsoluteSolvationProtocolResult(GufeTokenizableTestsMixin): cls = openmm_afe.AbsoluteSolvationProtocolResult - key = "AbsoluteSolvationProtocolResult-7f80c1cf5a526bde45d385cee7352428" - repr = f"<{key}>" + key = None + repr = "AbsoluteSolvationProtocolResult-" @pytest.fixture() def instance(self, protocol_result): return protocol_result + + def test_repr(self, instance): + """ + Overwrites the base `test_repr` call. + """ + assert isinstance(repr(instance), str) + assert self.repr in repr(instance) diff --git a/openfe/utils/handle_trajectories.py b/openfe/utils/handle_trajectories.py index a10d376ee..1341dbeec 100644 --- a/openfe/utils/handle_trajectories.py +++ b/openfe/utils/handle_trajectories.py @@ -78,7 +78,7 @@ def _create_new_dataset(filename: Path, n_atoms: int, AMBER Conventions compliant NetCDF dataset to store information contained in MultiState reporter generated NetCDF file. """ - ncfile = nc.Dataset(filename, 'w', format='NETCDF3_64BIT') + ncfile = nc.Dataset(filename, 'w', format='NETCDF3_64BIT_OFFSET') ncfile.Conventions = 'AMBER' ncfile.ConventionVersion = "1.0" ncfile.application = "openfe"