diff --git a/lisa/features/infiniband.py b/lisa/features/infiniband.py index 9f2883ac7d..84b5371faf 100644 --- a/lisa/features/infiniband.py +++ b/lisa/features/infiniband.py @@ -9,10 +9,11 @@ from retry import retry from lisa.base_tools import Cat, Sed, Uname, Wget +from lisa.tools.git import Git from lisa.feature import Feature from lisa.features import Disk from lisa.operating_system import CBLMariner, Oracle, Redhat, Ubuntu -from lisa.tools import Firewall, Ls, Lspci, Make, Service +from lisa.tools import Chmod, Find, Firewall, Ls, Lspci, Make, Service from lisa.tools.tar import Tar from lisa.util import ( LisaException, @@ -466,7 +467,6 @@ def install_intel_mpi(self) -> None: def install_open_mpi(self) -> None: node = self._node - # Install Open MPI wget = node.tools[Wget] tar_file = ( "https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-4.1.5.tar.gz" @@ -497,6 +497,55 @@ def install_open_mpi(self) -> None: make.make("", cwd=openmpi_folder, sudo=True) make.make_install(cwd=openmpi_folder, sudo=True) + def install_intel_mpi_benchmarking_tool(self, tool_names: List[str] = ["IMB-MPI1"]) -> None: + # Assumption is we have required mpi package built and installed + node = self._node + if not isinstance(node.os, CBLMariner): + # These tools are included in other distro packages + return + # Clone and build Intel MPI Benchmarks https://github.com/intel/mpi-benchmarks.git + git = node.tools[Git] + git.clone(url="https://github.com/intel/mpi-benchmarks.git", cwd=node.working_path) + + imb_src_folder = node.get_pure_path(f"{node.working_path}/mpi-benchmarks") + + find = node.tools[Find] + # find mpicc path + find_results = find.find_files( + node.get_pure_path("/"), "mpicc", sudo=True + ) + assert_that(len(find_results)).described_as( + "Could not find location of mpicc from MPI package" + ).is_greater_than(0) + mpicc_path = find_results[0] + assert_that(mpicc_path).described_as( + "Could not find location of mpicc from MPI package" + ).is_not_empty() + + # find mpicxx path + find_results = find.find_files( + node.get_pure_path("/"), "mpicxx", sudo=True + ) + assert_that(len(find_results)).described_as( + "Could not find location of mpicxx from MPI package" + ).is_greater_than(0) + mpicxx_path = find_results[0] + assert_that(mpicxx_path).described_as( + "Could not find location of mpicxx from MPI package" + ).is_not_empty() + + node.tools[Chmod].chmod(mpicc_path, "755", sudo=True) + node.tools[Chmod].chmod(mpicxx_path, "755", sudo=True) + + # tool_names = ["IMB-MPI1", "IMB-RMA", "IMB-NBC"] + for tool in tool_names: + make = node.tools[Make] + make.make(f"{tool} CC={mpicc_path} CXX={mpicxx_path}", + cwd=imb_src_folder, sudo=True, + shell=False, sendYesCmd=False) + node.tools[Chmod].chmod(f"{imb_src_folder}/{tool}", "755", sudo=True) + + def install_ibm_mpi(self, platform_mpi_url: str) -> None: node = self._node if isinstance(node.os, Redhat): diff --git a/lisa/tools/make.py b/lisa/tools/make.py index c8579cb271..ebd45f15fd 100644 --- a/lisa/tools/make.py +++ b/lisa/tools/make.py @@ -74,6 +74,8 @@ def make( thread_count: int = 0, update_envs: Optional[Dict[str, str]] = None, ignore_error: bool = False, + shell: bool = True, + sendYesCmd: bool = True ) -> ExecutableResult: expected_exit_code: Optional[int] = 0 if thread_count == 0: @@ -95,13 +97,17 @@ def make( if ignore_error: expected_exit_code = None - # yes '' answers all questions with default value. + command = "" + if sendYesCmd: + # yes '' answers all questions with default value. + command = "yes '' | " + result = self.node.execute( - f"yes '' | make -j{thread_count} {arguments}", + f"{command} make -j{thread_count} {arguments}", cwd=cwd, timeout=timeout, sudo=sudo, - shell=True, + shell=shell, update_envs=update_envs, expected_exit_code=expected_exit_code, expected_exit_code_failure_message="Failed to make", diff --git a/microsoft/testsuites/hpc/infinibandsuite.py b/microsoft/testsuites/hpc/infinibandsuite.py index 7b02f3ab1b..91ed8a9435 100644 --- a/microsoft/testsuites/hpc/infinibandsuite.py +++ b/microsoft/testsuites/hpc/infinibandsuite.py @@ -15,7 +15,7 @@ simple_requirement, ) from lisa.features import AvailabilitySetEnabled, Infiniband, Sriov -from lisa.operating_system import BSD, Windows +from lisa.operating_system import BSD, CBLMariner, Windows from lisa.sut_orchestrator.azure.tools import Waagent from lisa.tools import Find, KernelConfig, Ls, Modprobe, Ssh from lisa.util import ( @@ -286,6 +286,9 @@ def verify_intel_mpi(self, environment: Environment, log: Logger) -> None: client_ssh.enable_public_key(server_ssh.generate_key_pairs()) server_ssh.add_known_host(client_ip) client_ssh.add_known_host(server_ip) + sudo=False + if isinstance(server_node.os, CBLMariner): + sudo=True # Note: Using bash because script is not supported by Dash # sh points to dash on Ubuntu @@ -295,6 +298,7 @@ def verify_intel_mpi(self, environment: Environment, log: Logger) -> None: "-env I_MPI_FABRICS=shm:ofi -env SECS_PER_SAMPLE=600 " "-env FI_PROVIDER=mlx -env I_MPI_DEBUG=5 -env I_MPI_PIN_DOMAIN=numa " "/opt/intel/oneapi/mpi/2021.1.1/bin/IMB-MPI1 pingpong", + sudo=sudo, expected_exit_code=0, expected_exit_code_failure_message="Failed intra-node pingpong test " "with intel mpi", @@ -306,6 +310,7 @@ def verify_intel_mpi(self, environment: Environment, log: Logger) -> None: "-env I_MPI_FABRICS=shm:ofi -env SECS_PER_SAMPLE=600 " "-env FI_PROVIDER=mlx -env I_MPI_DEBUG=5 -env I_MPI_PIN_DOMAIN=numa " "/opt/intel/oneapi/mpi/2021.1.1/bin/IMB-MPI1 pingpong", + sudo=sudo, expected_exit_code=0, expected_exit_code_failure_message="Failed inter-node pingpong test " "with intel mpi", @@ -319,6 +324,7 @@ def verify_intel_mpi(self, environment: Environment, log: Logger) -> None: "-n 44 -env I_MPI_FABRICS=shm:ofi -env SECS_PER_SAMPLE=600 " "-env FI_PROVIDER=mlx -env I_MPI_DEBUG=5 -env I_MPI_PIN_DOMAIN=numa " f"/opt/intel/oneapi/mpi/2021.1.1/bin/{test}", + sudo=sudo, expected_exit_code=0, expected_exit_code_failure_message=f"Failed {test} test with intel mpi", timeout=3000, @@ -360,10 +366,13 @@ def verify_open_mpi(self, environment: Environment, log: Logger) -> None: raise SkippedException(err) run_in_parallel([server_ib.install_open_mpi, client_ib.install_open_mpi]) - server_node.execute("ldconfig", sudo=True) client_node.execute("ldconfig", sudo=True) + # Only for mariner, we need to build intel benchmarking tools + # as they are not included in our packages + server_ib.install_intel_mpi_benchmarking_tool() + # Restart the ssh sessions for changes to /etc/security/limits.conf # to take effect server_node.close() @@ -386,7 +395,7 @@ def verify_open_mpi(self, environment: Environment, log: Logger) -> None: # Ping Pong test find = server_node.tools[Find] find_results = find.find_files( - server_node.get_pure_path("/usr"), "IMB-MPI1", sudo=True + server_node.get_pure_path("/"), "IMB-MPI1", sudo=True ) assert_that(len(find_results)).described_as( "Could not find location of IMB-MPI1 for Open MPI" @@ -407,7 +416,7 @@ def verify_open_mpi(self, environment: Environment, log: Logger) -> None: # IMB-MPI Tests find_results = find.find_files( - server_node.get_pure_path("/usr"), "IMB-MPI1", sudo=True + server_node.get_pure_path("/"), "IMB-MPI1", sudo=True ) assert_that(len(find_results)).described_as( "Could not find location of Open MPI test: IMB-MPI1" @@ -417,7 +426,7 @@ def verify_open_mpi(self, environment: Environment, log: Logger) -> None: "Could not find location of Open MPI test: IMB-MPI1" ).is_not_empty() server_node.execute( - f"/usr/local/bin/mpirun --host {server_ip},{client_ip} " + f"/usr/local/bin/mpirun -hosts {server_ip},{client_ip} " "-n 2 --mca btl self,vader,openib --mca btl_openib_cq_size 4096 " "--mca btl_openib_allow_ib 1 --mca " f"btl_openib_warn_no_device_params_found 0 {test_path}", @@ -571,6 +580,12 @@ def verify_mvapich_mpi(self, environment: Environment, log: Logger) -> None: raise SkippedException(err) run_in_parallel([server_ib.install_mvapich_mpi, client_ib.install_mvapich_mpi]) + test_names = ["IMB-MPI1", "IMB-RMA", "IMB-NBC"] + # Only for mariner, we need to build intel benchmarking tools + # as they are not included in our packages + server_ib.install_intel_mpi_benchmarking_tool(tool_names=test_names) + + server_node.execute("ldconfig", sudo=True) # Restart the ssh sessions for changes to /etc/security/limits.conf # to take effect @@ -590,13 +605,15 @@ def verify_mvapich_mpi(self, environment: Environment, log: Logger) -> None: client_ssh.enable_public_key(server_ssh.generate_key_pairs()) server_ssh.add_known_host(client_ip) client_ssh.add_known_host(server_ip) + sudo=False + if isinstance(server_node.os, CBLMariner): + sudo=True # Run MPI tests find = server_node.tools[Find] - test_names = ["IMB-MPI1", "IMB-RMA", "IMB-NBC"] for test in test_names: find_results = find.find_files( - server_node.get_pure_path("/usr"), test, sudo=True + server_node.get_pure_path("/"), test, sudo=True ) assert_that(len(find_results)).described_as( f"Could not find location of MVAPICH MPI test: {test}" @@ -611,6 +628,7 @@ def verify_mvapich_mpi(self, environment: Environment, log: Logger) -> None: expected_exit_code=0, expected_exit_code_failure_message=f"Failed {test} test " "with MVAPICH MPI", + sudo=sudo ) def _check_nd_enabled(self, node: Node) -> None: