From 174389c7513c8bb110c05da54bc9d680935133b8 Mon Sep 17 00:00:00 2001 From: Yurii Kondrakov Date: Tue, 3 Dec 2024 11:10:50 -0500 Subject: [PATCH] Add nouveau check and modprobe to install (#359) * Add nouveau check and modprobe to install the module * https://bugs.launchpad.net/ubuntu/+source/ubuntu-drivers-common/+bug/2090502 --- src/hw_tools.py | 9 ++++++++- tests/unit/test_hw_tools.py | 19 +++++++++++++++++-- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/src/hw_tools.py b/src/hw_tools.py index 3d8f73ab..f617fc88 100644 --- a/src/hw_tools.py +++ b/src/hw_tools.py @@ -277,13 +277,20 @@ def install(self) -> None: logger.info("NVIDIA driver already installed in the machine") return + with open("/proc/modules", encoding="utf-8") as modules: + if "nouveau" in modules.read(): + logger.error("Nouveau driver is loaded. Unload it before installing NVIDIA driver") + raise ResourceInstallationError(self._name) + logger.info("Installing NVIDIA driver") apt.add_package("ubuntu-drivers-common", update_cache=True) try: # This can be changed to check_call and not rely in the output if this is fixed # https://github.com/canonical/ubuntu-drivers-common/issues/106 - result = subprocess.check_output("ubuntu-drivers install --gpgpu".split(), text=True) + # https://bugs.launchpad.net/ubuntu/+source/ubuntu-drivers-common/+bug/2090502 + result = subprocess.check_output("ubuntu-drivers --gpgpu install".split(), text=True) + subprocess.check_call("modprobe nvidia".split()) except subprocess.CalledProcessError as err: logger.error("Failed to install the NVIDIA driver: %s", err) diff --git a/tests/unit/test_hw_tools.py b/tests/unit/test_hw_tools.py index 55d29dc3..481d63de 100644 --- a/tests/unit/test_hw_tools.py +++ b/tests/unit/test_hw_tools.py @@ -1219,7 +1219,7 @@ def test_dcgm_create_custom_metrics_copy_fail( def test_nvidia_driver_strategy_install_success( - mock_path, mock_check_output, mock_apt_lib, nvidia_driver_strategy + mock_path, mock_check_output, mock_apt_lib, mock_check_call, nvidia_driver_strategy ): nvidia_version = mock.MagicMock() nvidia_version.exists.return_value = False @@ -1228,7 +1228,8 @@ def test_nvidia_driver_strategy_install_success( nvidia_driver_strategy.install() mock_apt_lib.add_package.assert_called_once_with("ubuntu-drivers-common", update_cache=True) - mock_check_output.assert_called_once_with("ubuntu-drivers install --gpgpu".split(), text=True) + mock_check_output.assert_called_once_with("ubuntu-drivers --gpgpu install".split(), text=True) + mock_check_call.assert_called_once_with("modprobe nvidia".split()) def test_install_nvidia_drivers_already_installed( @@ -1244,6 +1245,20 @@ def test_install_nvidia_drivers_already_installed( mock_check_output.assert_not_called() +def test_install_nvidia_drivers_nouveau_installed(mock_path, nvidia_driver_strategy, mock_apt_lib): + nvidia_version = mock.MagicMock() + nvidia_version.exists.return_value = False + mock_path.return_value = nvidia_version + mocked_open = mock.mock_open(read_data="nouveau") + + with mock.patch("builtins.open", mocked_open): + with pytest.raises(ResourceInstallationError): + nvidia_driver_strategy.install() + + mock_apt_lib.add_package.assert_not_called() + mocked_open.assert_called_once_with("/proc/modules", encoding="utf-8") + + def test_install_nvidia_drivers_subprocess_exception( mock_path, mock_check_output, mock_apt_lib, nvidia_driver_strategy ):