From 057d688b555beb462114bf5b47ce46228cbbc9b2 Mon Sep 17 00:00:00 2001 From: muthusamy Date: Wed, 12 Jun 2024 10:00:53 +0000 Subject: [PATCH 01/10] amd-smi [CPU]: Added Support to get number of threads per core Change-Id: I7e6500f3f53068a3483b64a54d78ac9e1d9cd183 --- include/amd_smi/amdsmi.h | 11 +++++++++++ py-interface/amdsmi_wrapper.py | 4 ++++ src/amd_smi/amd_smi.cc | 16 ++++++++++++++++ 3 files changed, 31 insertions(+) diff --git a/include/amd_smi/amdsmi.h b/include/amd_smi/amdsmi.h index 33ac03e1..4a1a8072 100644 --- a/include/amd_smi/amdsmi.h +++ b/include/amd_smi/amdsmi.h @@ -4923,6 +4923,17 @@ amdsmi_status_t amdsmi_get_cpu_socket_energy(amdsmi_processor_handle processor_h * @{ */ +/** + * @brief Get Number of threads Per Core. + * + * @platform{cpu_bm} + * + * @param[in,out] threads_per_core - Input buffer to return the Number of threads Per Core + * + * @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success, non-zero on fail + */ +amdsmi_status_t amdsmi_get_threads_per_core(uint32_t *threads_per_core); + /** * @brief Get SMU Firmware Version. * diff --git a/py-interface/amdsmi_wrapper.py b/py-interface/amdsmi_wrapper.py index 570dc741..6332e2ee 100644 --- a/py-interface/amdsmi_wrapper.py +++ b/py-interface/amdsmi_wrapper.py @@ -2259,6 +2259,9 @@ class struct_amdsmi_hsmp_metrics_table_t(Structure): amdsmi_get_cpu_socket_energy = _libraries['libamd_smi.so'].amdsmi_get_cpu_socket_energy amdsmi_get_cpu_socket_energy.restype = amdsmi_status_t amdsmi_get_cpu_socket_energy.argtypes = [amdsmi_processor_handle, ctypes.POINTER(ctypes.c_uint64)] +amdsmi_get_threads_per_core = _libraries['libamd_smi.so'].amdsmi_get_threads_per_core +amdsmi_get_threads_per_core.restype = amdsmi_status_t +amdsmi_get_threads_per_core.argtypes = [ctypes.POINTER(ctypes.c_uint32)] amdsmi_get_cpu_smu_fw_version = _libraries['libamd_smi.so'].amdsmi_get_cpu_smu_fw_version amdsmi_get_cpu_smu_fw_version.restype = amdsmi_status_t amdsmi_get_cpu_smu_fw_version.argtypes = [amdsmi_processor_handle, ctypes.POINTER(struct_amdsmi_smu_fw_version_t)] @@ -2610,6 +2613,7 @@ class struct_amdsmi_hsmp_metrics_table_t(Structure): 'amdsmi_get_cpu_fclk_mclk', 'amdsmi_get_cpu_hsmp_proto_ver', 'amdsmi_get_cpu_model', 'amdsmi_get_cpu_prochot_status', 'amdsmi_get_cpu_pwr_svi_telemetry_all_rails', + 'amdsmi_get_threads_per_core', 'amdsmi_get_cpu_smu_fw_version', 'amdsmi_get_cpu_socket_c0_residency', 'amdsmi_get_cpu_socket_current_active_freq_limit', diff --git a/src/amd_smi/amd_smi.cc b/src/amd_smi/amd_smi.cc index 79c82d52..0d5443ea 100644 --- a/src/amd_smi/amd_smi.cc +++ b/src/amd_smi/amd_smi.cc @@ -2231,6 +2231,22 @@ static amdsmi_status_t amdsmi_errno_to_esmi_status(amdsmi_status_t status) return AMDSMI_STATUS_SUCCESS; } +amdsmi_status_t amdsmi_get_threads_per_core(uint32_t *threads_per_core) +{ + amdsmi_status_t status; + uint32_t esmi_threads_per_core; + + AMDSMI_CHECK_INIT(); + + status = static_cast(esmi_threads_per_core_get(&esmi_threads_per_core)); + if (status != AMDSMI_STATUS_SUCCESS) + return amdsmi_errno_to_esmi_status(status); + + *threads_per_core = esmi_threads_per_core; + + return AMDSMI_STATUS_SUCCESS; +} + amdsmi_status_t amdsmi_get_cpu_hsmp_proto_ver(amdsmi_processor_handle processor_handle, uint32_t *proto_ver) { From a20db864b821bcd81dc7dbc6f1667e271d8112a4 Mon Sep 17 00:00:00 2001 From: "Oliveira, Daniel" Date: Tue, 18 Jun 2024 13:51:29 -0500 Subject: [PATCH 02/10] fix: [SWDEV-466302] [rocm/amd_smi_lib] Fixes `amdsmi_get_gpu_process_list` now requires sudo to access pid and memory information Code changes related to the following: * amdsmi_get_gpu_process_list() * CLI Change-Id: I72b154c220276b354c350fcc067c9a7c32e6c173 Signed-off-by: Oliveira, Daniel --- amdsmi_cli/amdsmi_commands.py | 2 -- include/amd_smi/amdsmi.h | 17 ++++++----------- src/amd_smi/amd_smi.cc | 10 +--------- 3 files changed, 7 insertions(+), 22 deletions(-) diff --git a/amdsmi_cli/amdsmi_commands.py b/amdsmi_cli/amdsmi_commands.py index 6a078ffb..7ea9ae25 100644 --- a/amdsmi_cli/amdsmi_commands.py +++ b/amdsmi_cli/amdsmi_commands.py @@ -2631,8 +2631,6 @@ def process(self, args, multiple_devices=False, watching_output=False, try: process_list = amdsmi_interface.amdsmi_get_gpu_process_list(args.gpu) except amdsmi_exception.AmdSmiLibraryException as e: - if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NO_PERM: - raise PermissionError('Command requires elevation') from e logging.debug("Failed to get process list for gpu %s | %s", gpu_id, e.get_error_info()) raise e diff --git a/include/amd_smi/amdsmi.h b/include/amd_smi/amdsmi.h index 4a1a8072..df2137f6 100644 --- a/include/amd_smi/amdsmi.h +++ b/include/amd_smi/amdsmi.h @@ -4830,10 +4830,6 @@ amdsmi_get_gpu_vram_usage(amdsmi_processor_handle processor_handle, amdsmi_vram_ * Holding a copy of max_process before it is passed in will be helpful for monitoring * the allocations done upon each call since the max_process will permanently be changed * to reflect the actual number of processes running. - * Note: For the specific cases where the return status is AMDSMI_STATUS_NO_PERM only. - * The list of process and size are AMDSMI_STATUS_SUCCESS, however there are - * processes details not fully retrieved due to permissions. - * * * @param[out] list Reference to a user-provided buffer where the process * list will be returned. This buffer must contain at least @@ -4841,17 +4837,16 @@ amdsmi_get_gpu_vram_usage(amdsmi_processor_handle processor_handle, amdsmi_vram_ * by user. * * @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success, - * | ::AMDSMI_STATUS_NO_PERM on success, but not all details from process retrieved, * | ::AMDSMI_STATUS_OUT_OF_RESOURCES, filled list buffer with data, but number of * actual running processes is larger than the size provided. * */ - // Note: If the reserved size for processes is smaller than the number of - // actual processes running. The AMDSMI_STATUS_OUT_OF_RESOURCES is - // an indication the caller should handle the situation (resize). - // The max_processes is always changed to reflect the actual size of - // list of processes running, so the caller knows where it is at. - // + // Note: If the reserved size for processes is smaller than the number of + // actual processes running. The AMDSMI_STATUS_OUT_OF_RESOURCES is + // an indication the caller should handle the situation (resize). + // The max_processes is always changed to reflect the actual size of + // list of processes running, so the caller knows where it is at. + // amdsmi_status_t amdsmi_get_gpu_process_list(amdsmi_processor_handle processor_handle, uint32_t *max_processes, amdsmi_proc_info_t *list); diff --git a/src/amd_smi/amd_smi.cc b/src/amd_smi/amd_smi.cc index 0d5443ea..0ae21147 100644 --- a/src/amd_smi/amd_smi.cc +++ b/src/amd_smi/amd_smi.cc @@ -1889,15 +1889,9 @@ amdsmi_get_gpu_process_list(amdsmi_processor_handle processor_handle, uint32_t * const auto max_processes_original_size(*max_processes); auto idx = uint32_t(0); - auto is_required_previlegies_required(false); for (auto& process : compute_process_list) { if (idx < *max_processes) { list[idx++] = static_cast(process.second); - // Note: If we could not read the process info for an existing process, - // that is likely a permission error. - if (!is_required_previlegies_required && std::string(process.second.name).empty()) { - is_required_previlegies_required = true; - } } else { break; } @@ -1910,11 +1904,9 @@ amdsmi_get_gpu_process_list(amdsmi_processor_handle processor_handle, uint32_t * // list of processes running, so the caller knows where it is at. // Holding a copy of max_process before it is passed in will be helpful // for the caller. - status_code = is_required_previlegies_required - ? amdsmi_status_t::AMDSMI_STATUS_NO_PERM : AMDSMI_STATUS_SUCCESS; *max_processes = static_cast(compute_process_list.size()); return (max_processes_original_size >= static_cast(compute_process_list.size())) - ? status_code : amdsmi_status_t::AMDSMI_STATUS_OUT_OF_RESOURCES; + ? AMDSMI_STATUS_SUCCESS : amdsmi_status_t::AMDSMI_STATUS_OUT_OF_RESOURCES; } amdsmi_status_t From 413c9ef6fe079d9ef21a1f7bfff495f077ee77ec Mon Sep 17 00:00:00 2001 From: Maisam Arif Date: Fri, 21 Jun 2024 16:12:02 -0500 Subject: [PATCH 03/10] SWDEV-466302 - Changed blank processes to N/A & Updated Docs Change-Id: I2d68430dda8036879f58b0f1dea5d2825b441179 --- CHANGELOG.md | 35 ++++++ docs/how-to/using-amdsmi-for-python.md | 162 ++++++++++++++++--------- py-interface/README.md | 17 +-- py-interface/amdsmi_interface.py | 8 +- 4 files changed, 158 insertions(+), 64 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cbd9fc2a..208c43f0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,6 +40,41 @@ Added `AMDSMI_EVT_NOTIF_RING_HANG` to the possible events in the `amdsmi_evt_not ### Optimizations +- **Removed elevated permission requirements for `amdsmi_get_gpu_process_list()`**. +Previously if a processes with elevated permissions was running amd-smi would required sudo to display all output. Now amd-smi will populate all process data and return N/A for elevated process names instead. However if ran with sudo you will be able to see the name like so: + +```shell +$ amd-smi process +GPU: 0 + PROCESS_INFO: + NAME: N/A + PID: 1693982 + MEMORY_USAGE: + GTT_MEM: 0.0 B + CPU_MEM: 0.0 B + VRAM_MEM: 10.1 GB + MEM_USAGE: 0.0 B + USAGE: + GFX: 0 ns + ENC: 0 ns +``` + +```shell +$ sudo amd-smi process +GPU: 0 + PROCESS_INFO: + NAME: TransferBench + PID: 1693982 + MEMORY_USAGE: + GTT_MEM: 0.0 B + CPU_MEM: 0.0 B + VRAM_MEM: 10.1 GB + MEM_USAGE: 0.0 B + USAGE: + GFX: 0 ns + ENC: 0 ns +``` + - **Updated naming for `amdsmi_set_gpu_clear_sram_data()` to `amdsmi_clean_gpu_local_data()`**. Changed the naming to be more accurate to what the function was doing. This change also extends to the CLI where we changed the `clear-sram-data` command to `clean_local_data`. diff --git a/docs/how-to/using-amdsmi-for-python.md b/docs/how-to/using-amdsmi-for-python.md index 18d4246f..5ab542f9 100644 --- a/docs/how-to/using-amdsmi-for-python.md +++ b/docs/how-to/using-amdsmi-for-python.md @@ -1,9 +1,7 @@ - # AMD SMI Python Library ## Requirements - * Python 3.6+ 64-bit * Driver must be loaded for amdsmi_init() to pass @@ -11,7 +9,6 @@ ### Folder structure - File Name | Note ---|--- `__init__.py` | Python package initialization file @@ -20,7 +17,7 @@ File Name | Note `amdsmi_exception.py` | Amdsmi exceptions python file `README.md` | Documentation -## Usage +### Usage `amdsmi` folder should be copied and placed next to importing script. It should be imported as: @@ -45,17 +42,15 @@ To initialize amdsmi lib, amdsmi_init() must be called before all other calls to To close connection to driver, amdsmi_shut_down() must be the last call. -## Exceptions +### Exceptions All exceptions are in `amdsmi_exception.py` file. Exceptions that can be thrown are: * `AmdSmiException`: base amdsmi exception class * `AmdSmiLibraryException`: derives base `AmdSmiException` class and represents errors that can occur in amdsmi-lib. - When this exception is thrown, `err_code` and `err_info` are set. `err_code` is an integer that corresponds to errors that can occur in amdsmi-lib and `err_info` is a string that explains the error that occurred. - Example: ```python @@ -124,7 +119,6 @@ except AmdSmiException as e: ### amdsmi_shut_down - Description: Finalize and close connection to driver Input parameters: `None` @@ -226,7 +220,6 @@ except AmdSmiException as e: Description: Return socket name Input parameters: - `socket_handle` socket handle Output: Socket name @@ -423,13 +416,13 @@ Input parameters: Output: Dictionary with fields -Field | Description ----|--- -`power_cap` | power capability -`dpm_cap` | dynamic power management capability -`default_power_cap` | default power capability -`min_power_cap` | min power capability -`max_power_cap` | max power capability +Field | Description | Units +---|---|--- +`power_cap` | power capability | uW +`dpm_cap` | dynamic power management capability | MHz +`default_power_cap` | default power capability | uW +`min_power_cap` | min power capability | uW +`max_power_cap` | max power capability | uW Exceptions that can be thrown by `amdsmi_get_power_cap_info` function: @@ -504,10 +497,9 @@ Input parameters: * `processor_handle` device which to query Output: List of Dictionaries containing cache information following the schema below: - Schema: -``` +```JSON { cache_properties: { @@ -519,7 +511,6 @@ Schema: max_num_cu_shared: {"type" : "number"}, num_cache_instance: {"type" : "number"} } - ``` Field | Description @@ -688,8 +679,11 @@ Output: Dictionary with fields Field | Description ---|--- +`current_socket_power` | current socket power `average_socket_power` | average socket power `gfx_voltage` | voltage gfx +`soc_voltage` | voltage soc +`mem_voltage` | voltage mem `power_limit` | power limit Exceptions that can be thrown by `amdsmi_get_power_info` function: @@ -708,8 +702,11 @@ try: else: for device in devices: power_measure = amdsmi_get_power_info(device) + print(power_measure['current_socket_power']) print(power_measure['average_socket_power']) print(power_measure['gfx_voltage']) + print(power_measure['soc_voltage']) + print(power_measure['mem_voltage']) print(power_measure['power_limit']) except AmdSmiException as e: print(e) @@ -780,9 +777,11 @@ Output: Dictionary with fields Field | Description ---|--- -`cur_clk` | Current clock for given clock type -`max_clk` | Maximum clock for given clock type +`clk` | Current clock for given clock type `min_clk` | Minimum clock for given clock type +`max_clk` | Maximum clock for given clock type +`clk_locked` | flag only supported on GFX clock domain +`clk_deep_sleep` | clock deep sleep mode flag Exceptions that can be thrown by `amdsmi_get_clock_info` function: @@ -800,9 +799,11 @@ try: else: for device in devices: clock_measure = amdsmi_get_clock_info(device, AmdSmiClkType.GFX) - print(clock_measure['cur_clk']) + print(clock_measure['clk']) print(clock_measure['min_clk']) print(clock_measure['max_clk']) + print(clock_measure['clk_locked']) + print(clock_measure['clk_deep_sleep']) except AmdSmiException as e: print(e) ``` @@ -854,7 +855,7 @@ Input parameters: * `processor_handle` device which to query -Output: List consisting of dictionaries with fields for each bad page found +Output: List consisting of dictionaries with fields for each bad page found; can be an empty list Field | Description ---|--- @@ -879,7 +880,7 @@ try: else: for device in devices: bad_page_info = amdsmi_get_gpu_bad_page_info(device) - if not len(bad_page_info): + if not bad_page_info: # Can be empty list print("No bad pages found") continue for bad_page in bad_page_info: @@ -891,9 +892,56 @@ except AmdSmiException as e: print(e) ``` +### amdsmi_get_gpu_memory_reserved_pages + +Description: Returns reserved memory page info for the given GPU. +It is not supported on virtual machine guest + +Input parameters: + +* `processor_handle` device which to query + +Output: List consisting of dictionaries with fields for each reserved memory page found; can be an empty list + +Field | Description +---|--- +`value` | Value of memory reserved page +`page_address` | Address of memory reserved page +`page_size` | Size of memory reserved page +`status` | Status of memory reserved page + +Exceptions that can be thrown by `amdsmi_get_gpu_memory_reserved_pages` function: + +* `AmdSmiLibraryException` +* `AmdSmiRetryException` +* `AmdSmiParameterException` + +Example: + +```python +try: + devices = amdsmi_get_processor_handles() + if len(devices) == 0: + print("No GPUs on machine") + else: + for device in devices: + reserved_memory_page_info = amdsmi_get_gpu_memory_reserved_pages(device) + if not reserved_memory_page_info: # Can be empty list + print("No memory reserved pages found") + continue + for reserved_memory_page in reserved_memory_page_info: + print(reserved_memory_page["value"]) + print(reserved_memory_page["page_address"]) + print(reserved_memory_page["page_size"]) + print(reserved_memory_page["status"]) +except AmdSmiException as e: + print(e) +``` + + ### amdsmi_get_gpu_process_list -Description: Returns the list of processes running on the target GPU; May require root level access +Description: Returns the list of processes running on the target GPU; Requires root level access to display root process names; otherwise will return "N/A" Input parameters: @@ -903,7 +951,7 @@ Output: List of Dictionaries with the corresponding fields; empty list if no run Field | Description ---|--- -`name` | Name of process +`name` | Name of process. If user does not have permission this will be "N/A" `pid` | Process ID `mem` | Process memory usage `engine_usage` |
Subfield Description
`gfx`GFX engine usage in ns
`enc`Encode engine usage in ns
@@ -1109,8 +1157,9 @@ Event Type | Description ---|------ `VMFAULT` | VM page fault `THERMAL_THROTTLE` | thermal throttle -`GPU_PRE_RESET` | gpu pre reset +`GPU_PRE_RESET` | gpu pre reset `GPU_POST_RESET` | gpu post reset +`RING_HANG` | ring hang event #### read @@ -1187,7 +1236,7 @@ try: print("No GPUs on machine") else: for device in devices: - amdsmi_set_gpu_pci_bandwidth(device, 0) + amdsmi_set_gpu_pci_bandwidth(device, 0) except AmdSmiException as e: print(e) ``` @@ -1547,8 +1596,12 @@ try: print("No GPUs on machine") else: for device in devices: - memory = amdsmi_get_gpu_memory_total(device) - print(memory) + vram_memory_total = amdsmi_get_gpu_memory_total(device, amdsmi_interface.AmdSmiMemoryType.VRAM) + print(vram_memory_total) + vis_vram_memory_total = amdsmi_get_gpu_memory_total(device, amdsmi_interface.AmdSmiMemoryType.VIS_VRAM) + print(vis_vram_memory_total) + gtt_memory_total = amdsmi_get_gpu_memory_total(device, amdsmi_interface.AmdSmiMemoryType.GTT) + print(gtt_memory_total) except AmdSmiException as e: print(e) ``` @@ -1583,7 +1636,7 @@ try: print("No GPUs on machine") else: for device in devices: - amdsmi_set_gpu_od_clk_info( + amdsmi_set_gpu_od_clk_info( device, AmdSmiFreqInd.AMDSMI_FREQ_IND_MAX, 1000, @@ -1619,8 +1672,12 @@ try: print("No GPUs on machine") else: for device in devices: - memory = amdsmi_get_gpu_memory_usage(device) - print(memory) + vram_memory_usage = amdsmi_get_gpu_memory_usage(device, amdsmi_interface.AmdSmiMemoryType.VRAM) + print(vram_memory_usage) + vis_vram_memory_usage = amdsmi_get_gpu_memory_usage(device, amdsmi_interface.AmdSmiMemoryType.VIS_VRAM) + print(vis_vram_memory_usage) + gtt_memory_usage = amdsmi_get_gpu_memory_usage(device, amdsmi_interface.AmdSmiMemoryType.GTT) + print(gtt_memory_usage) except AmdSmiException as e: print(e) ``` @@ -1654,7 +1711,7 @@ try: print("No GPUs on machine") else: for device in devices: - amdsmi_set_gpu_od_volt_info(device, 1, 1000, 980) + amdsmi_set_gpu_od_volt_info(device, 1, 1000, 980) except AmdSmiException as e: print(e) ``` @@ -2036,7 +2093,7 @@ except AmdSmiException as e: ``` ### amdsmi_clean_gpu_local_data -Description: Clear the local data of the given device. This can be called between user logins to prevent information leak. +Description: Clear the SRAM data of the given device. This can be called between user logins to prevent information leak. Input parameters: @@ -2130,15 +2187,16 @@ try: print("No GPUs on machine") else: for device in devices: - amdsmi_get_clk_freq(device, AmdSmiClkType.SYS) + amdsmi_get_clk_freq(device, AmdSmiClkType.SYS) except AmdSmiException as e: print(e) ``` ### amdsmi_get_gpu_od_volt_info -Description: This function retrieves the voltage/frequency curve information -It is not supported on virtual machine guest +Description: This function retrieves the voltage/frequency curve information. +If the num_regions is 0 then the voltage curve is not supported. +It is not supported on virtual machine guest. Input parameters: @@ -2152,8 +2210,8 @@ Field | Description `curr_mclk_range` |
Subfield Description
`lower_bound`lower bound mclk range
`upper_bound`upper bound mclk range
`sclk_freq_limits` |
Subfield Description
`lower_bound`lower bound sclk range limt
`upper_bound`upper bound sclk range limit
`mclk_freq_limits` |
Subfield Description
`lower_bound`lower bound mclk range limit
`upper_bound`upper bound mclk range limit
-`curve.vc_points` | The number of supported frequencies -`num_regions` | The current frequency index +`curve.vc_points` | List of voltage curve points +`num_regions` | The number of voltage curve regions Exceptions that can be thrown by `amdsmi_get_gpu_od_volt_info` function: @@ -2170,7 +2228,7 @@ try: print("No GPUs on machine") else: for device in devices: - amdsmi_get_gpu_od_volt_info(dev) + amdsmi_get_gpu_od_volt_info(dev) except AmdSmiException as e: print(e) ``` @@ -2214,7 +2272,7 @@ Output: Dictionary with fields `current_dclk0` | Current dclk0 | MHz `current_vclk1` | Current vclk1 | MHz `current_dclk1` | Current dclk1 | MHz -`throttle_status` | Current throttle status | MHz +`throttle_status` | Current throttle status | bool `current_fan_speed` | Current fan speed | RPM `pcie_link_width` | PCIe link width (number of lanes) | lanes `pcie_link_speed` | PCIe link speed in 0.1 GT/s (Giga Transfers per second) | GT/s @@ -2262,7 +2320,7 @@ try: print("No GPUs on machine") else: for device in devices: - amdsmi_get_gpu_metrics_info(dev) + amdsmi_get_gpu_metrics_info(dev) except AmdSmiException as e: print(e) ``` @@ -2299,7 +2357,7 @@ try: print("No GPUs on machine") else: for device in devices: - amdsmi_get_gpu_od_volt_curve_regions(device, 3) + amdsmi_get_gpu_od_volt_curve_regions(device, 3) except AmdSmiException as e: print(e) ``` @@ -2337,7 +2395,7 @@ try: print("No GPUs on machine") else: for device in devices: - amdsmi_get_gpu_power_profile_presets(device, 0) + amdsmi_get_gpu_power_profile_presets(device, 0) except AmdSmiException as e: print(e) ``` @@ -2566,7 +2624,7 @@ try: print("No GPUs on machine") else: for device in devices: - amdsmi_set_gpu_perf_level(device, AmdSmiDevPerfLevel.STABLE_PEAK) + amdsmi_set_gpu_perf_level(device, AmdSmiDevPerfLevel.STABLE_PEAK) except AmdSmiException as e: print(e) ``` @@ -2869,7 +2927,7 @@ try: print("No GPUs on machine") else: for device in devices: - amdsmi_set_gpu_overdrive_level(device, 0) + amdsmi_set_gpu_overdrive_level(device, 0) except AmdSmiException as e: print(e) ``` @@ -3330,13 +3388,8 @@ Example: ```python try: - devices = amdsmi_get_processor_handles() - if len(devices) == 0: - print("No GPUs on machine") - else: - for device in devices: - version = amdsmi_get_lib_version() - print(version) + version = amdsmi_get_lib_version() + print(version) except AmdSmiException as e: print(e) ``` @@ -3748,6 +3801,7 @@ except AmdSmiException as e: ### amdsmi_get_processor_info **Note: CURRENTLY HARDCODED TO RETURN EMPTY VALUES** + Description: Return processor name Input parameters: diff --git a/py-interface/README.md b/py-interface/README.md index fdfdbefe..5ab542f9 100644 --- a/py-interface/README.md +++ b/py-interface/README.md @@ -2,12 +2,12 @@ ## Requirements -* python 3.6+ 64-bit -* driver must be loaded for amdsmi_init() to pass +* Python 3.6+ 64-bit +* Driver must be loaded for amdsmi_init() to pass ## Overview -## Folder structure +### Folder structure File Name | Note ---|--- @@ -17,7 +17,7 @@ File Name | Note `amdsmi_exception.py` | Amdsmi exceptions python file `README.md` | Documentation -## Usage +### Usage `amdsmi` folder should be copied and placed next to importing script. It should be imported as: @@ -42,7 +42,7 @@ To initialize amdsmi lib, amdsmi_init() must be called before all other calls to To close connection to driver, amdsmi_shut_down() must be the last call. -## Exceptions +### Exceptions All exceptions are in `amdsmi_exception.py` file. Exceptions that can be thrown are: @@ -192,6 +192,7 @@ except AmdSmiException as e: ### amdsmi_get_socket_handles **Note: CURRENTLY HARDCODED TO RETURN DUMMY DATA** + Description: Returns list of socket device handle objects on current machine Input parameters: `None` @@ -215,6 +216,7 @@ except AmdSmiException as e: ### amdsmi_get_socket_info **Note: CURRENTLY HARDCODED TO RETURN EMPTY VALUES** + Description: Return socket name Input parameters: @@ -939,7 +941,7 @@ except AmdSmiException as e: ### amdsmi_get_gpu_process_list -Description: Returns the list of processes running on the target GPU; May require root level access +Description: Returns the list of processes running on the target GPU; Requires root level access to display root process names; otherwise will return "N/A" Input parameters: @@ -949,7 +951,7 @@ Output: List of Dictionaries with the corresponding fields; empty list if no run Field | Description ---|--- -`name` | Name of process +`name` | Name of process. If user does not have permission this will be "N/A" `pid` | Process ID `mem` | Process memory usage `engine_usage` |
Subfield Description
`gfx`GFX engine usage in ns
`enc`Encode engine usage in ns
@@ -3799,6 +3801,7 @@ except AmdSmiException as e: ### amdsmi_get_processor_info **Note: CURRENTLY HARDCODED TO RETURN EMPTY VALUES** + Description: Return processor name Input parameters: diff --git a/py-interface/amdsmi_interface.py b/py-interface/amdsmi_interface.py index 928fc1b4..391a7422 100644 --- a/py-interface/amdsmi_interface.py +++ b/py-interface/amdsmi_interface.py @@ -1991,8 +1991,7 @@ def amdsmi_get_gpu_process_list( # This will get populated with the number of processes found max_processes = ctypes.c_uint32(MAX_NUM_PROCESSES) - process_list = (amdsmi_wrapper.amdsmi_proc_info_t * - max_processes.value)() + process_list = (amdsmi_wrapper.amdsmi_proc_info_t * max_processes.value)() _check_res( amdsmi_wrapper.amdsmi_get_gpu_process_list( processor_handle, ctypes.byref(max_processes), process_list @@ -2001,8 +2000,11 @@ def amdsmi_get_gpu_process_list( result = [] for index in range(max_processes.value): + process_name = process_list[index].name.decode("utf-8").strip() + if process_name == "": + process_name = "N/A" result.append({ - "name": process_list[index].name.decode("utf-8"), + "name": process_name, "pid": process_list[index].pid, "mem": process_list[index].mem, "engine_usage": { From 943c74b6ef87455ea454c19c205630b1afc3e83d Mon Sep 17 00:00:00 2001 From: Maisam Arif Date: Mon, 24 Jun 2024 10:35:34 -0500 Subject: [PATCH 04/10] SWDEV-446724 - Version command human readable output file format fix Signed-off-by: Maisam Arif Change-Id: I94de5b03355b503ade307f2a3881acd07266d6c7 --- amdsmi_cli/amdsmi_commands.py | 12 +++++++++--- amdsmi_cli/amdsmi_logger.py | 2 +- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/amdsmi_cli/amdsmi_commands.py b/amdsmi_cli/amdsmi_commands.py index 7ea9ae25..16823c4b 100644 --- a/amdsmi_cli/amdsmi_commands.py +++ b/amdsmi_cli/amdsmi_commands.py @@ -115,9 +115,15 @@ def version(self, args): self.logger.output['rocm_version'] = f'{rocm_version_str}' if self.logger.is_human_readable_format(): - print(f'AMDSMI Tool: {__version__} | '\ - f'AMDSMI Library version: {amdsmi_lib_version_str} | ' \ - f'ROCm version: {rocm_version_str}') + human_readable_output = f"AMDSMI Tool: {__version__} | " \ + f"AMDSMI Library version: {amdsmi_lib_version_str} | " \ + f"ROCm version: {rocm_version_str}" + # Custom human readable handling for version + if self.logger.destination == 'stdout': + print(human_readable_output) + else: + with self.logger.destination.open('a') as output_file: + output_file.write(human_readable_output + '\n') elif self.logger.is_json_format() or self.logger.is_csv_format(): self.logger.print_output() diff --git a/amdsmi_cli/amdsmi_logger.py b/amdsmi_cli/amdsmi_logger.py index c0ffe5a8..b54d6698 100644 --- a/amdsmi_cli/amdsmi_logger.py +++ b/amdsmi_cli/amdsmi_logger.py @@ -465,7 +465,7 @@ def print_output(self, multiple_device_enabled=False, watching_output=False, tab self._print_tabular_output(multiple_device_enabled=multiple_device_enabled, watching_output=watching_output) else: self._print_human_readable_output(multiple_device_enabled=multiple_device_enabled, - watching_output=watching_output) + watching_output=watching_output) def _print_json_output(self, multiple_device_enabled=False, watching_output=False): From a3758f82dcac6417549f7cd571e8b1389016abef Mon Sep 17 00:00:00 2001 From: Maisam Arif Date: Mon, 24 Jun 2024 12:07:34 -0500 Subject: [PATCH 05/10] SWDEV-457854 - Unified BM error codes Signed-off-by: Maisam Arif Change-Id: I5b232de3b598bd3146eb0528f61c628da93278d9 --- CHANGELOG.md | 7 +++++ amdsmi_cli/amdsmi_cli_exceptions.py | 47 ++++++++++++++++++++++------- amdsmi_cli/amdsmi_parser.py | 18 +++++++++-- 3 files changed, 58 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 208c43f0..1a92e9a4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,6 +40,13 @@ Added `AMDSMI_EVT_NOTIF_RING_HANG` to the possible events in the `amdsmi_evt_not ### Optimizations +- **Updated CLI error strings to specify invalid device type queried** + +```shell +$ amd-smi static --asic --gpu 123123 +Can not find a device: GPU '123123' Error code: -3 +``` + - **Removed elevated permission requirements for `amdsmi_get_gpu_process_list()`**. Previously if a processes with elevated permissions was running amd-smi would required sudo to display all output. Now amd-smi will populate all process data and return N/A for elevated process names instead. However if ran with sudo you will be able to see the name like so: diff --git a/amdsmi_cli/amdsmi_cli_exceptions.py b/amdsmi_cli/amdsmi_cli_exceptions.py index c61e7d94..ab8bff5d 100644 --- a/amdsmi_cli/amdsmi_cli_exceptions.py +++ b/amdsmi_cli/amdsmi_cli_exceptions.py @@ -69,6 +69,7 @@ def __init__(self): self.stdout_message = '' self.message = '' self.output_format = '' + self.device_type = '' def __str__(self): # Return message according to the current output format @@ -83,7 +84,7 @@ def __str__(self): class AmdSmiInvalidCommandException(AmdSmiException): - def __init__(self, command, outputformat): + def __init__(self, command, outputformat: str): super().__init__() self.value = -1 self.command = command @@ -98,7 +99,7 @@ def __init__(self, command, outputformat): class AmdSmiInvalidParameterException(AmdSmiException): - def __init__(self, command, outputformat): + def __init__(self, command, outputformat: str): super().__init__() self.value = -2 self.command = command @@ -113,13 +114,22 @@ def __init__(self, command, outputformat): class AmdSmiDeviceNotFoundException(AmdSmiException): - def __init__(self, command, outputformat): + def __init__(self, command, outputformat: str, gpu: bool, cpu: bool, core: bool): super().__init__() self.value = -3 self.command = command self.output_format = outputformat - common_message = f"Can not find a device with the corresponding identifier: '{self.command}'" + # Handle different devices + self.device_type = "" + if gpu: + self.device_type = "GPU" + elif cpu: + self.device_type = "CPU" + elif core: + self.device_type = "CPU CORE" + + common_message = f"Can not find a device: {self.device_type} '{self.command}'" self.json_message["error"] = common_message self.json_message["code"] = self.value @@ -128,7 +138,7 @@ def __init__(self, command, outputformat): class AmdSmiInvalidFilePathException(AmdSmiException): - def __init__(self, command, outputformat): + def __init__(self, command, outputformat: str): super().__init__() self.value = -4 self.command = command @@ -143,7 +153,7 @@ def __init__(self, command, outputformat): class AmdSmiInvalidParameterValueException(AmdSmiException): - def __init__(self, command, outputformat): + def __init__(self, command, outputformat: str): super().__init__() self.value = -5 self.command = command @@ -158,7 +168,7 @@ def __init__(self, command, outputformat): class AmdSmiMissingParameterValueException(AmdSmiException): - def __init__(self, command, outputformat): + def __init__(self, command, outputformat: str): super().__init__() self.value = -6 self.command = command @@ -172,8 +182,23 @@ def __init__(self, command, outputformat): self.stdout_message = f"{common_message} Error code: {self.value}" +class AmdSmiNotSupportedCommandException(AmdSmiException): + def __init__(self, command, outputformat: str): + super().__init__() + self.value = -7 + self.command = command + self.output_format = outputformat + + common_message = f"Command '{self.command}' is not supported on the system. Run '--help' for more info." + + self.json_message["error"] = common_message + self.json_message["code"] = self.value + self.csv_message = f"error,code\n{common_message}, {self.value}" + self.stdout_message = f"{common_message} Error code: {self.value}" + + class AmdSmiParameterNotSupportedException(AmdSmiException): - def __init__(self, command, outputformat): + def __init__(self, command, outputformat: str): super().__init__() self.value = -8 self.command = command @@ -188,7 +213,7 @@ def __init__(self, command, outputformat): class AmdSmiRequiredCommandException(AmdSmiException): - def __init__(self, command, outputformat): + def __init__(self, command, outputformat: str): super().__init__() self.value = -9 self.command = command @@ -203,7 +228,7 @@ def __init__(self, command, outputformat): class AmdSmiUnknownErrorException(AmdSmiException): - def __init__(self, command, outputformat): + def __init__(self, command, outputformat: str): super().__init__() self.value = -100 self.command = command @@ -218,7 +243,7 @@ def __init__(self, command, outputformat): class AmdSmiAMDSMIErrorException(AmdSmiException): - def __init__(self, outputformat, error_code): + def __init__(self, outputformat: str, error_code): super().__init__() self.value = -1000 - abs(error_code) self.smilibcode = error_code diff --git a/amdsmi_cli/amdsmi_parser.py b/amdsmi_cli/amdsmi_parser.py index 10cebf4b..f545650c 100644 --- a/amdsmi_cli/amdsmi_parser.py +++ b/amdsmi_cli/amdsmi_parser.py @@ -111,6 +111,11 @@ def __init__(self, version, list, static, firmware, bad_pages, metric, help="Descriptions:", metavar='') + # Store possible subcommands for later errors + self.possible_commands = ['version', 'list', 'static', 'firmware', 'ucode', 'bad-pages', + 'metric', 'process', 'profile', 'event', 'topology', 'set', + 'reset', 'monitor', 'xgmi'] + # Add all subparsers self._add_version_parser(self.subparsers, version) self._add_list_parser(self.subparsers, list) @@ -257,7 +262,9 @@ def __call__(self, parser, args, values, option_string=None): if selected_device_handles == '': raise amdsmi_cli_exceptions.AmdSmiMissingParameterValueException("--gpu", _GPUSelectAction.ouputformat) else: - raise amdsmi_cli_exceptions.AmdSmiDeviceNotFoundException(selected_device_handles, _GPUSelectAction.ouputformat) + raise amdsmi_cli_exceptions.AmdSmiDeviceNotFoundException(selected_device_handles, + _GPUSelectAction.ouputformat, + True, False, False) return _GPUSelectAction @@ -283,7 +290,8 @@ def __call__(self, parser, args, values, option_string=None): raise amdsmi_cli_exceptions.AmdSmiMissingParameterValueException("--cpu", _CPUSelectAction.ouputformat) else: raise amdsmi_cli_exceptions.AmdSmiDeviceNotFoundException(selected_device_handles, - _CPUSelectAction.ouputformat) + _CPUSelectAction.ouputformat, + False, True, False) return _CPUSelectAction @@ -308,7 +316,8 @@ def __call__(self, parser, args, values, option_string=None): raise amdsmi_cli_exceptions.AmdSmiMissingParameterValueException("--core", _CoreSelectAction.ouputformat) else: raise amdsmi_cli_exceptions.AmdSmiDeviceNotFoundException(selected_device_handles, - _CoreSelectAction.ouputformat) + _CoreSelectAction.ouputformat, + False, False, True) return _CoreSelectAction @@ -1232,6 +1241,9 @@ def error(self, message): l = len("argument : invalid choice: ") + 1 message = message[l:] message = message.split("'")[0] + # Check if the command is possible in other system configurations and error accordingly + if message in self.possible_commands: + raise amdsmi_cli_exceptions.AmdSmiNotSupportedCommandException(message, outputformat) raise amdsmi_cli_exceptions.AmdSmiInvalidCommandException(message, outputformat) elif "unrecognized arguments: " in message: l = len("unrecognized arguments: ") From 7a617e6ef2f1996e64c08b72d37d2f6a09a7dcfe Mon Sep 17 00:00:00 2001 From: "Bill(Shuzhou) Liu" Date: Tue, 25 Jun 2024 10:03:33 -0500 Subject: [PATCH 06/10] Make the the devInfoTypesStrings.at(type) exception safe Wrap it in a function to make it exception safe. Change-Id: I29835993ae4fe2b7aa1a7027fab88c05ba89e6e3 --- rocm_smi/include/rocm_smi/rocm_smi_device.h | 1 + rocm_smi/src/rocm_smi.cc | 53 ++++++++++----------- rocm_smi/src/rocm_smi_device.cc | 47 ++++++++++-------- rocm_smi/src/rocm_smi_main.cc | 2 +- 4 files changed, 56 insertions(+), 47 deletions(-) diff --git a/rocm_smi/include/rocm_smi/rocm_smi_device.h b/rocm_smi/include/rocm_smi/rocm_smi_device.h index 00b553d2..426a9ad0 100755 --- a/rocm_smi/include/rocm_smi/rocm_smi_device.h +++ b/rocm_smi/include/rocm_smi/rocm_smi_device.h @@ -261,6 +261,7 @@ class Device { AMGpuMetricsPublicLatestTupl_t dev_copy_internal_to_external_metrics(); static const std::map devInfoTypesStrings; + static const char* get_type_string(DevInfoTypes type); private: std::shared_ptr monitor_; diff --git a/rocm_smi/src/rocm_smi.cc b/rocm_smi/src/rocm_smi.cc index 28131249..12e26959 100755 --- a/rocm_smi/src/rocm_smi.cc +++ b/rocm_smi/src/rocm_smi.cc @@ -83,7 +83,6 @@ using amd::smi::monitorTypesToString; using amd::smi::getRSMIStatusString; using amd::smi::AMDGpuMetricsUnitType_t; using amd::smi::AMDGpuMetricTypeId_t; -auto &devInfoTypesStrings = amd::smi::Device::devInfoTypesStrings; static const uint32_t kMaxOverdriveLevel = 20; static const float kEnergyCounterResolution = 15.3F; @@ -3849,7 +3848,7 @@ rsmi_dev_memory_total_get(uint32_t dv_ind, rsmi_memory_type_t mem_type, ss << __PRETTY_FUNCTION__ << " | inside success fallback... " << " | Device #: " << std::to_string(dv_ind) - << " | Type = " << devInfoTypesStrings.at(mem_type_file) + << " | Type = " << amd::smi::Device::get_type_string(mem_type_file) << " | Data: total = " << std::to_string(*total) << " | ret = " << getRSMIStatusString(RSMI_STATUS_SUCCESS); LOG_DEBUG(ss); @@ -3860,7 +3859,7 @@ rsmi_dev_memory_total_get(uint32_t dv_ind, rsmi_memory_type_t mem_type, ss << __PRETTY_FUNCTION__ << " | after fallback... " << " | Device #: " << std::to_string(dv_ind) - << " | Type = " << devInfoTypesStrings.at(mem_type_file) + << " | Type = " << amd::smi::Device::get_type_string(mem_type_file) << " | Data: total = " << std::to_string(*total) << " | ret = " << getRSMIStatusString(ret); LOG_DEBUG(ss); @@ -3929,7 +3928,7 @@ rsmi_dev_memory_usage_get(uint32_t dv_ind, rsmi_memory_type_t mem_type, ss << __PRETTY_FUNCTION__ << " no fallback needed! - " << " | Device #: " << std::to_string(dv_ind) - << " | Type = " << devInfoTypesStrings.at(mem_type_file) + << " | Type = " << amd::smi::Device::get_type_string(mem_type_file) << " | Data: Used = " << std::to_string(*used) << " | Data: total = " << std::to_string(total) << " | ret = " << getRSMIStatusString(ret); @@ -3940,7 +3939,7 @@ rsmi_dev_memory_usage_get(uint32_t dv_ind, rsmi_memory_type_t mem_type, ss << __PRETTY_FUNCTION__ << " | in fallback == success ..." << " | Device #: " << std::to_string(dv_ind) - << " | Type = " << devInfoTypesStrings.at(mem_type_file) + << " | Type = " << amd::smi::Device::get_type_string(mem_type_file) << " | Data: Used = " << std::to_string(*used) << " | Data: total = " << std::to_string(total) << " | ret = " << getRSMIStatusString(RSMI_STATUS_SUCCESS); @@ -3951,7 +3950,7 @@ rsmi_dev_memory_usage_get(uint32_t dv_ind, rsmi_memory_type_t mem_type, ss << __PRETTY_FUNCTION__ << " | at end!!!! after fallback ..." << " | Device #: " << std::to_string(dv_ind) - << " | Type = " << devInfoTypesStrings.at(mem_type_file) + << " | Type = " << amd::smi::Device::get_type_string(mem_type_file) << " | Data: Used = " << std::to_string(*used) << " | ret = " << getRSMIStatusString(ret); LOG_DEBUG(ss); @@ -5234,7 +5233,7 @@ rsmi_dev_compute_partition_get(uint32_t dv_ind, char *compute_partition, << " | Fail " << " | Device #: " << dv_ind << " | Type: " - << devInfoTypesStrings.at(amd::smi::kDevComputePartition) + << amd::smi::Device::get_type_string(amd::smi::kDevComputePartition) << " | Cause: len was 0 or compute_partition variable was null" << " | Returning = " << getRSMIStatusString(RSMI_STATUS_INVALID_ARGS) << " |"; @@ -5253,7 +5252,7 @@ rsmi_dev_compute_partition_get(uint32_t dv_ind, char *compute_partition, << " | Fail " << " | Device #: " << dv_ind << " | Type: " - << devInfoTypesStrings.at(amd::smi::kDevComputePartition) + << amd::smi::Device::get_type_string(amd::smi::kDevComputePartition) << " | Cause: could not retrieve current compute partition" << " | Returning = " << getRSMIStatusString(ret) << " |"; @@ -5270,7 +5269,7 @@ rsmi_dev_compute_partition_get(uint32_t dv_ind, char *compute_partition, << " | Fail " << " | Device #: " << dv_ind << " | Type: " - << devInfoTypesStrings.at(amd::smi::kDevComputePartition) + << amd::smi::Device::get_type_string(amd::smi::kDevComputePartition) << " | Cause: requested size was insufficient" << " | Returning = " << getRSMIStatusString(RSMI_STATUS_INSUFFICIENT_SIZE) << " |"; @@ -5282,7 +5281,7 @@ rsmi_dev_compute_partition_get(uint32_t dv_ind, char *compute_partition, << " | Success " << " | Device #: " << dv_ind << " | Type: " - << devInfoTypesStrings.at(amd::smi::kDevComputePartition) + << amd::smi::Device::get_type_string(amd::smi::kDevComputePartition) << " | Data: " << compute_partition << " | Returning = " << getRSMIStatusString(ret) << " |"; @@ -5342,7 +5341,7 @@ rsmi_dev_compute_partition_set(uint32_t dv_ind, << " | Fail " << " | Device #: " << dv_ind << " | Type: " - << devInfoTypesStrings.at(amd::smi::kDevComputePartition) + << amd::smi::Device::get_type_string(amd::smi::kDevComputePartition) << " | Data: " << newComputePartitionStr << " | Cause: requested setting was invalid" << " | Returning = " @@ -5361,7 +5360,7 @@ rsmi_dev_compute_partition_set(uint32_t dv_ind, << " | Fail " << " | Device #: " << dv_ind << " | Type: " - << devInfoTypesStrings.at(amd::smi::kDevComputePartition) + << amd::smi::Device::get_type_string(amd::smi::kDevComputePartition) << " | Data: " << newComputePartitionStr << " | Cause: not an available compute partition setting" << " | Returning = " @@ -5381,7 +5380,7 @@ rsmi_dev_compute_partition_set(uint32_t dv_ind, << " | Fail " << " | Device #: " << dv_ind << " | Type: " - << devInfoTypesStrings.at(amd::smi::kDevComputePartition) + << amd::smi::Device::get_type_string(amd::smi::kDevComputePartition) << " | Cause: could retrieve current compute partition or retrieved" << " unexpected data" << " | Returning = " @@ -5397,7 +5396,7 @@ rsmi_dev_compute_partition_set(uint32_t dv_ind, << " | Success - compute partition was already set at requested value" << " | Device #: " << dv_ind << " | Type: " - << devInfoTypesStrings.at(amd::smi::kDevComputePartition) + << amd::smi::Device::get_type_string(amd::smi::kDevComputePartition) << " | Data: " << newComputePartitionStr << " | Returning = " << getRSMIStatusString(RSMI_STATUS_SUCCESS) << " |"; @@ -5423,7 +5422,7 @@ rsmi_dev_compute_partition_set(uint32_t dv_ind, << " | Success " << " | Device #: " << dv_ind << " | Type: " - << devInfoTypesStrings.at(amd::smi::kDevComputePartition) + << amd::smi::Device::get_type_string(amd::smi::kDevComputePartition) << " | Data: " << newComputePartitionStr << " | Returning = " << getRSMIStatusString(returnResponse) << " |"; @@ -5495,7 +5494,7 @@ rsmi_dev_memory_partition_set(uint32_t dv_ind, << " | Fail " << " | Device #: " << dv_ind << " | Type: " - << devInfoTypesStrings.at(amd::smi::kDevMemoryPartition) + << amd::smi::Device::get_type_string(amd::smi::kDevMemoryPartition) << " | Cause: device board name does not support this action" << " | Returning = " << getRSMIStatusString(RSMI_STATUS_NOT_SUPPORTED) << " |"; @@ -5516,7 +5515,7 @@ rsmi_dev_memory_partition_set(uint32_t dv_ind, << " | Fail " << " | Device #: " << dv_ind << " | Type: " - << devInfoTypesStrings.at(amd::smi::kDevMemoryPartition) + << amd::smi::Device::get_type_string(amd::smi::kDevMemoryPartition) << " | Cause: requested setting was invalid" << " | Returning = " << getRSMIStatusString(RSMI_STATUS_INVALID_ARGS) << " |"; @@ -5537,7 +5536,7 @@ rsmi_dev_memory_partition_set(uint32_t dv_ind, << " | Fail " << " | Device #: " << dv_ind << " | Type: " - << devInfoTypesStrings.at(amd::smi::kDevMemoryPartition) + << amd::smi::Device::get_type_string(amd::smi::kDevMemoryPartition) << " | Cause: could retrieve current memory partition or retrieved" << " unexpected data" << " | Returning = " @@ -5554,7 +5553,7 @@ rsmi_dev_memory_partition_set(uint32_t dv_ind, << " setting" << " | Device #: " << dv_ind << " | Type: " - << devInfoTypesStrings.at(amd::smi::kDevMemoryPartition) + << amd::smi::Device::get_type_string(amd::smi::kDevMemoryPartition) << " | Data: " << newMemoryPartition << " | Returning = " << getRSMIStatusString(RSMI_STATUS_SUCCESS) << " |"; @@ -5576,7 +5575,7 @@ rsmi_dev_memory_partition_set(uint32_t dv_ind, << " | Fail " << " | Device #: " << dv_ind << " | Type: " - << devInfoTypesStrings.at(amd::smi::kDevMemoryPartition) + << amd::smi::Device::get_type_string(amd::smi::kDevMemoryPartition) << " | Cause: issue writing reqested setting of " + newMemoryPartition << " | Returning = " << getRSMIStatusString(err) << " |"; @@ -5590,7 +5589,7 @@ rsmi_dev_memory_partition_set(uint32_t dv_ind, << " | Success - if restart completed successfully" << " | Device #: " << dv_ind << " | Type: " - << devInfoTypesStrings.at(amd::smi::kDevMemoryPartition) + << amd::smi::Device::get_type_string(amd::smi::kDevMemoryPartition) << " | Data: " << newMemoryPartition << " | Returning = " << getRSMIStatusString(restartRet) << " |"; @@ -5612,7 +5611,7 @@ rsmi_dev_memory_partition_get(uint32_t dv_ind, char *memory_partition, << " | Fail " << " | Device #: " << dv_ind << " | Type: " - << devInfoTypesStrings.at(amd::smi::kDevMemoryPartition) + << amd::smi::Device::get_type_string(amd::smi::kDevMemoryPartition) << " | Cause: user sent invalid arguments, len = 0 or memory partition" << " was a null ptr" << " | Returning = " @@ -5632,7 +5631,7 @@ rsmi_dev_memory_partition_get(uint32_t dv_ind, char *memory_partition, << " | Fail " << " | Device #: " << dv_ind << " | Type: " - << devInfoTypesStrings.at(amd::smi::kDevMemoryPartition) + << amd::smi::Device::get_type_string(amd::smi::kDevMemoryPartition) << " | Cause: could not successfully retrieve current memory partition " << " | Returning = " << getRSMIStatusString(ret) << " |"; @@ -5650,7 +5649,7 @@ rsmi_dev_memory_partition_get(uint32_t dv_ind, char *memory_partition, << " | Fail " << " | Device #: " << dv_ind << " | Type: " - << devInfoTypesStrings.at(amd::smi::kDevMemoryPartition) + << amd::smi::Device::get_type_string(amd::smi::kDevMemoryPartition) << " | Cause: could not successfully retrieve current memory partition " << " | Returning = " << getRSMIStatusString(ret) << " |"; @@ -5662,7 +5661,7 @@ rsmi_dev_memory_partition_get(uint32_t dv_ind, char *memory_partition, << " | Success " << " | Device #: " << dv_ind << " | Type: " - << devInfoTypesStrings.at(amd::smi::kDevMemoryPartition) + << amd::smi::Device::get_type_string(amd::smi::kDevMemoryPartition) << " | Data: " << memory_partition << " | Returning = " << getRSMIStatusString(ret) << " |"; @@ -5701,7 +5700,7 @@ rsmi_status_t rsmi_dev_compute_partition_reset(uint32_t dv_ind) { << " | Success - if original boot state was not unknown or valid setting" << " | Device #: " << dv_ind << " | Type: " - << devInfoTypesStrings.at(amd::smi::kDevComputePartition) + << amd::smi::Device::get_type_string(amd::smi::kDevComputePartition) << " | Data: " << bootState << " | Returning = " << getRSMIStatusString(ret) << " |"; @@ -5740,7 +5739,7 @@ rsmi_status_t rsmi_dev_memory_partition_reset(uint32_t dv_ind) { << " | Success - if original boot state was not unknown or valid setting" << " | Device #: " << dv_ind << " | Type: " - << devInfoTypesStrings.at(amd::smi::kDevMemoryPartition) + << amd::smi::Device::get_type_string(amd::smi::kDevMemoryPartition) << " | Data: " << bootState << " | Returning = " << getRSMIStatusString(ret) << " |"; diff --git a/rocm_smi/src/rocm_smi_device.cc b/rocm_smi/src/rocm_smi_device.cc index 5eafc455..e0ebe8a0 100755 --- a/rocm_smi/src/rocm_smi_device.cc +++ b/rocm_smi/src/rocm_smi_device.cc @@ -746,7 +746,7 @@ int Device::openSysfsFileStream(DevInfoTypes type, T *fs, const char *str) { if (ret != 0) { ss << __PRETTY_FUNCTION__ << " | Issue: File did not exist - SYSFS file (" << sysfs_path - << ") for DevInfoInfoType (" << devInfoTypesStrings.at(type) + << ") for DevInfoInfoType (" << get_type_string(type) << "), returning " << std::to_string(ret); LOG_ERROR(ss); return ret; @@ -755,7 +755,7 @@ int Device::openSysfsFileStream(DevInfoTypes type, T *fs, const char *str) { ss << __PRETTY_FUNCTION__ << " | Issue: File is not a regular file - SYSFS file (" << sysfs_path << ") for " - << "DevInfoInfoType (" << devInfoTypesStrings.at(type) << ")," + << "DevInfoInfoType (" << get_type_string(type) << ")," << " returning ENOENT (" << std::strerror(ENOENT) << ")"; LOG_ERROR(ss); return ENOENT; @@ -766,7 +766,7 @@ int Device::openSysfsFileStream(DevInfoTypes type, T *fs, const char *str) { if (!fs->is_open()) { ss << __PRETTY_FUNCTION__ << " | Issue: Could not open - SYSFS file (" << sysfs_path << ") for " - << "DevInfoInfoType (" << devInfoTypesStrings.at(type) << "), " + << "DevInfoInfoType (" << get_type_string(type) << "), " << ", returning " << std::to_string(errno) << " (" << std::strerror(errno) << ")"; LOG_ERROR(ss); @@ -775,7 +775,7 @@ int Device::openSysfsFileStream(DevInfoTypes type, T *fs, const char *str) { ss << __PRETTY_FUNCTION__ << " | Successfully opened SYSFS file (" << sysfs_path - << ") for DevInfoInfoType (" << devInfoTypesStrings.at(type) + << ") for DevInfoInfoType (" << get_type_string(type) << ")"; LOG_INFO(ss); return 0; @@ -792,7 +792,7 @@ int Device::readDebugInfoStr(DevInfoTypes type, std::string *retStr) { ret = openDebugFileStream(type, &fs); if (ret != 0) { ss << "Could not read debugInfoStr for DevInfoType (" - << devInfoTypesStrings.at(type)<< "), returning " + << get_type_string(type)<< "), returning " << std::to_string(ret); LOG_ERROR(ss); return ret; @@ -806,7 +806,7 @@ int Device::readDebugInfoStr(DevInfoTypes type, std::string *retStr) { fs.close(); ss << "Successfully read debugInfoStr for DevInfoType (" - << devInfoTypesStrings.at(type)<< "), retString= " << *retStr; + << get_type_string(type)<< "), retString= " << *retStr; LOG_INFO(ss); return 0; @@ -822,7 +822,7 @@ int Device::readDevInfoStr(DevInfoTypes type, std::string *retStr) { ret = openSysfsFileStream(type, &fs); if (ret != 0) { ss << "Could not read device info string for DevInfoType (" - << devInfoTypesStrings.at(type) << "), returning " + << get_type_string(type) << "), returning " << std::to_string(ret); LOG_ERROR(ss); return ret; @@ -832,7 +832,7 @@ int Device::readDevInfoStr(DevInfoTypes type, std::string *retStr) { fs.close(); ss << __PRETTY_FUNCTION__ << "Successfully read device info string for DevInfoType (" << - devInfoTypesStrings.at(type) << "): " + *retStr + get_type_string(type) << "): " + *retStr << " | " << (fs.is_open() ? " File stream is opened" : " File stream is closed") << " | " << (fs.bad() ? "[ERROR] Bad read operation" : @@ -867,7 +867,7 @@ int Device::writeDevInfoStr(DevInfoTypes type, std::string valStr, fs.close(); ss << __PRETTY_FUNCTION__ << " | Issue: Could not open fileStream; " << "Could not write device info string (" << valStr - << ") for DevInfoType (" << devInfoTypesStrings.at(type) + << ") for DevInfoType (" << get_type_string(type) << "), returning " << std::to_string(ret); LOG_ERROR(ss); return ret; @@ -878,7 +878,7 @@ int Device::writeDevInfoStr(DevInfoTypes type, std::string valStr, fs.flush(); fs.close(); ss << "Successfully wrote device info string (" << valStr - << ") for DevInfoType (" << devInfoTypesStrings.at(type) + << ") for DevInfoType (" << get_type_string(type) << "), returning RSMI_STATUS_SUCCESS"; LOG_INFO(ss); ret = RSMI_STATUS_SUCCESS; @@ -892,7 +892,7 @@ int Device::writeDevInfoStr(DevInfoTypes type, std::string valStr, fs.close(); ss << __PRETTY_FUNCTION__ << " | Issue: Could not write to file; " << "Could not write device info string (" << valStr - << ") for DevInfoType (" << devInfoTypesStrings.at(type) + << ") for DevInfoType (" << get_type_string(type) << "), returning " << getRSMIStatusString(ErrnoToRsmiStatus(ret)); ss << " | " << (fs.is_open() ? "[ERROR] File stream open" : @@ -983,20 +983,29 @@ int Device::readDevInfoLine(DevInfoTypes type, std::string *line) { ret = openSysfsFileStream(type, &fs); if (ret != 0) { ss << "Could not read DevInfoLine for DevInfoType (" - << devInfoTypesStrings.at(type) << ")"; + << get_type_string(type) << ")"; LOG_ERROR(ss); return ret; } std::getline(fs, *line); ss << "Successfully read DevInfoLine for DevInfoType (" - << devInfoTypesStrings.at(type) << "), returning *line = " + << get_type_string(type) << "), returning *line = " << *line; LOG_INFO(ss); return 0; } +const char* Device::get_type_string(DevInfoTypes type) { + auto ite = devInfoTypesStrings.find(type); + if (ite != devInfoTypesStrings.end()) { + return ite->second; + } + + return "Unknown"; + +} int Device::readDevInfoBinary(DevInfoTypes type, std::size_t b_size, void *p_binary_data) { auto sysfs_path = path_; @@ -1009,7 +1018,7 @@ int Device::readDevInfoBinary(DevInfoTypes type, std::size_t b_size, ptr = fopen(sysfs_path.c_str(), "rb"); if (!ptr) { ss << "Could not read DevInfoBinary for DevInfoType (" - << devInfoTypesStrings.at(type) << ")" + << get_type_string(type) << ")" << " - SYSFS (" << sysfs_path << ")" << ", returning " << std::to_string(errno) << " (" << std::strerror(errno) << ")"; @@ -1021,7 +1030,7 @@ int Device::readDevInfoBinary(DevInfoTypes type, std::size_t b_size, fclose(ptr); if ((num*b_size) != b_size) { ss << "Could not read DevInfoBinary for DevInfoType (" - << devInfoTypesStrings.at(type) << ") - SYSFS (" + << get_type_string(type) << ") - SYSFS (" << sysfs_path << "), binary size error; " << "[buff: " << p_binary_data @@ -1035,7 +1044,7 @@ int Device::readDevInfoBinary(DevInfoTypes type, std::size_t b_size, return ENOENT; } ss << "Successfully read DevInfoBinary for DevInfoType (" - << devInfoTypesStrings.at(type) << ") - SYSFS (" + << get_type_string(type) << ") - SYSFS (" << sysfs_path << "), returning binaryData = " << p_binary_data << "; byte_size = " << std::dec << static_cast(b_size); @@ -1067,7 +1076,7 @@ int Device::readDevInfoMultiLineStr(DevInfoTypes type, if (retVec->empty()) { ss << "Read devInfoMultiLineStr for DevInfoType (" - << devInfoTypesStrings.at(type) << ")" + << get_type_string(type) << ")" << ", but contained no string lines"; LOG_ERROR(ss); return ENXIO; @@ -1085,12 +1094,12 @@ int Device::readDevInfoMultiLineStr(DevInfoTypes type, if (!allLines.empty()) { ss << "Successfully read devInfoMultiLineStr for DevInfoType (" - << devInfoTypesStrings.at(type) << ") " + << get_type_string(type) << ") " << ", returning lines read = " << allLines; LOG_INFO(ss); } else { ss << "Read devInfoMultiLineStr for DevInfoType (" - << devInfoTypesStrings.at(type) << ")" + << get_type_string(type) << ")" << ", but lines were empty"; LOG_INFO(ss); return ENXIO; diff --git a/rocm_smi/src/rocm_smi_main.cc b/rocm_smi/src/rocm_smi_main.cc index 3b27d6ae..4c6b0190 100755 --- a/rocm_smi/src/rocm_smi_main.cc +++ b/rocm_smi/src/rocm_smi_main.cc @@ -560,7 +560,7 @@ std::string RocmSMI::getRSMIEnvVarInfo(void) { for (auto it=env_vars_.enum_overrides.begin(); it != env_vars_.enum_overrides.end(); ++it) { DevInfoTypes type = static_cast(*it); - ss << (std::to_string(*it) + " (" + Device::devInfoTypesStrings.at(type) + ")"); + ss << (std::to_string(*it) + " (" + Device::get_type_string(type) + ")"); auto temp_it = it; if(++temp_it != env_vars_.enum_overrides.end()) { ss << ", "; From 27cd092c6d1ccf91e7984c782ce6fdb1df03f1b5 Mon Sep 17 00:00:00 2001 From: Maisam Arif Date: Fri, 28 Jun 2024 08:58:52 -0500 Subject: [PATCH 07/10] Updated error code naming to be inline with Host Signed-off-by: Maisam Arif Change-Id: I6c66640742a25361ad6399763ee442598ffb0ac6 --- amdsmi_cli/amdsmi_cli_exceptions.py | 2 +- amdsmi_cli/amdsmi_parser.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/amdsmi_cli/amdsmi_cli_exceptions.py b/amdsmi_cli/amdsmi_cli_exceptions.py index ab8bff5d..fe6cc79f 100644 --- a/amdsmi_cli/amdsmi_cli_exceptions.py +++ b/amdsmi_cli/amdsmi_cli_exceptions.py @@ -182,7 +182,7 @@ def __init__(self, command, outputformat: str): self.stdout_message = f"{common_message} Error code: {self.value}" -class AmdSmiNotSupportedCommandException(AmdSmiException): +class AmdSmiCommandNotSupportedException(AmdSmiException): def __init__(self, command, outputformat: str): super().__init__() self.value = -7 diff --git a/amdsmi_cli/amdsmi_parser.py b/amdsmi_cli/amdsmi_parser.py index f545650c..359d51fc 100644 --- a/amdsmi_cli/amdsmi_parser.py +++ b/amdsmi_cli/amdsmi_parser.py @@ -1243,7 +1243,7 @@ def error(self, message): message = message.split("'")[0] # Check if the command is possible in other system configurations and error accordingly if message in self.possible_commands: - raise amdsmi_cli_exceptions.AmdSmiNotSupportedCommandException(message, outputformat) + raise amdsmi_cli_exceptions.AmdSmiCommandNotSupportedException(message, outputformat) raise amdsmi_cli_exceptions.AmdSmiInvalidCommandException(message, outputformat) elif "unrecognized arguments: " in message: l = len("unrecognized arguments: ") From 6e5c4b422a319788ad5b1704e8c72f1c419faa34 Mon Sep 17 00:00:00 2001 From: Maisam Arif Date: Thu, 20 Jun 2024 13:05:51 -0500 Subject: [PATCH 08/10] Added dmon alias for 'amd-smi monitor' Signed-off-by: Maisam Arif Change-Id: I4a787034bd7ab1a0d08d8cfdd038add5c35cdea4 --- CHANGELOG.md | 2 ++ amdsmi_cli/README.md | 7 ++++--- amdsmi_cli/amdsmi_parser.py | 6 +++--- docs/how-to/using-AMD-SMI-CLI-tool.md | 3 ++- 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1a92e9a4..11ca692f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,8 @@ Full documentation for amd_smi_lib is available at [https://rocm.docs.amd.com/pr ### Additions +- **`amd-smi dmon` is now available as an alias to `amd-smi monitor`**. + - **Added optional process table under `amd-smi monitor -q`**. The monitor subcommand within the CLI Tool now has the `-q` option to enable an optional process table underneath the original monitored output. diff --git a/amdsmi_cli/README.md b/amdsmi_cli/README.md index f0dc69c3..b48a5883 100644 --- a/amdsmi_cli/README.md +++ b/amdsmi_cli/README.md @@ -73,7 +73,7 @@ Type "help", "copyright", "credits" or "license" for more information. ## Usage -amd-smi will report the version and current platform detected when running the command without arguments: +AMD-SMI reports the version and current platform detected when running the command line interface (CLI) without arguments: ``` bash ~$ amd-smi @@ -97,7 +97,7 @@ AMD-SMI Commands: topology Displays topology information of the devices set Set options for devices reset Reset options for devices - monitor Monitor metrics for target devices + monitor (dmon) Monitor metrics for target devices xgmi Displays xgmi information of the devices ``` @@ -594,7 +594,7 @@ Command Modifiers: usage: amd-smi monitor [-h] [--json | --csv] [--file FILE] [--loglevel LEVEL] [-g GPU [GPU ...] | -U CPU [CPU ...] | -O CORE [CORE ...]] [-w INTERVAL] [-W TIME] [-i ITERATIONS] [-p] [-t] [-u] [-m] [-n] - [-d] [-e] [-v] [-r] + [-d] [-e] [-v] [-r] [-q] Monitor a target device for the specified arguments. If no arguments are provided, all arguments will be enabled. @@ -629,6 +629,7 @@ Monitor Arguments: -e, --ecc Monitor ECC single bit, ECC double bit, and PCIe replay error counts -v, --vram-usage Monitor memory usage in MB -r, --pcie Monitor PCIe bandwidth in Mb/s + -q, --process Enable Process information table below monitor output Command Modifiers: --json Displays output in JSON format (human readable by default). diff --git a/amdsmi_cli/amdsmi_parser.py b/amdsmi_cli/amdsmi_parser.py index 359d51fc..4d2984d6 100644 --- a/amdsmi_cli/amdsmi_parser.py +++ b/amdsmi_cli/amdsmi_parser.py @@ -111,10 +111,10 @@ def __init__(self, version, list, static, firmware, bad_pages, metric, help="Descriptions:", metavar='') - # Store possible subcommands for later errors + # Store possible subcommands & aliases for later errors self.possible_commands = ['version', 'list', 'static', 'firmware', 'ucode', 'bad-pages', 'metric', 'process', 'profile', 'event', 'topology', 'set', - 'reset', 'monitor', 'xgmi'] + 'reset', 'monitor', 'dmon', 'xgmi'] # Add all subparsers self._add_version_parser(self.subparsers, version) @@ -1138,7 +1138,7 @@ def _add_monitor_parser(self, subparsers, func): process_help = "Enable Process information table below monitor output" # Create monitor subparser - monitor_parser = subparsers.add_parser('monitor', help=monitor_help, description=monitor_subcommand_help) + monitor_parser = subparsers.add_parser('monitor', help=monitor_help, description=monitor_subcommand_help, aliases=["dmon"]) monitor_parser._optionals.title = monitor_optionals_title monitor_parser.formatter_class=lambda prog: AMDSMISubparserHelpFormatter(prog) monitor_parser.set_defaults(func=func) diff --git a/docs/how-to/using-AMD-SMI-CLI-tool.md b/docs/how-to/using-AMD-SMI-CLI-tool.md index 6c22596f..60d04498 100644 --- a/docs/how-to/using-AMD-SMI-CLI-tool.md +++ b/docs/how-to/using-AMD-SMI-CLI-tool.md @@ -521,7 +521,7 @@ Command Modifiers: usage: amd-smi monitor [-h] [--json | --csv] [--file FILE] [--loglevel LEVEL] [-g GPU [GPU ...] | -U CPU [CPU ...] | -O CORE [CORE ...]] [-w INTERVAL] [-W TIME] [-i ITERATIONS] [-p] [-t] [-u] [-m] [-n] - [-d] [-e] [-v] [-r] + [-d] [-e] [-v] [-r] [-q] Monitor a target device for the specified arguments. If no arguments are provided, all arguments will be enabled. @@ -556,6 +556,7 @@ Monitor Arguments: -e, --ecc Monitor ECC single bit, ECC double bit, and PCIe replay error counts -v, --vram-usage Monitor memory usage in MB -r, --pcie Monitor PCIe bandwidth in Mb/s + -q, --process Enable Process information table below monitor output Command Modifiers: --json Displays output in JSON format (human readable by default). From 7194aaebf32ece0431220384b52b48224fee03e8 Mon Sep 17 00:00:00 2001 From: Charis Poag Date: Tue, 2 Jul 2024 15:28:55 -0500 Subject: [PATCH 09/10] [SWDEV-455442/SWDEV-464645] Add back voltage curve testing for MI300 Validation requires running tests for MI300 systems, this update removes the exclusion for these systems. Change-Id: Idacf3e8bf0bd569f1cfa6192af47993eb5440ee6 --- tests/amd_smi_test/amdsmitst.exclude | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/tests/amd_smi_test/amdsmitst.exclude b/tests/amd_smi_test/amdsmitst.exclude index 8f955bae..c49791c1 100644 --- a/tests/amd_smi_test/amdsmitst.exclude +++ b/tests/amd_smi_test/amdsmitst.exclude @@ -56,18 +56,6 @@ FILTER[sienna_cichlid]=\ $BLACKLIST_ALL_ASICS\ "amdsmitstReadWrite.TestPerfLevelReadWrite" -# SWDEV-391407 -# aqua_vanjaram and later systems show 'ip discovery' in -# /sys/class/kfd/kfd/topology/nodes/*/name -# -# For those systems gfx_target_version must be used. It can be found in -# /sys/class/kfd/kfd/topology/nodes/*/properties -FILTER[90400]=\ -$BLACKLIST_ALL_ASICS\ -# "amdsmitstReadOnly.TestVoltCurvRead" -FILTER[90401]=${FILTER[90400]} -FILTER[90402]=${FILTER[90400]} - # SWDEV-321166 FILTER[virtualization]=\ $BLACKLIST_ALL_ASICS\ @@ -77,3 +65,14 @@ $BLACKLIST_ALL_ASICS\ "amdsmitstReadWrite.TestOverdriveReadWrite:"\ "amdsmitstReadWrite.TestPowerReadWrite:"\ "amdsmitstReadWrite.TestPowerCapReadWrite" + +# aqua_vanjaram and later systems show 'ip discovery' in +# /sys/class/kfd/kfd/topology/nodes/*/name +# +# For those systems gfx_target_version must be used. It can be found in +# /sys/class/kfd/kfd/topology/nodes/*/properties +# +# ex. +# FILTER[90400]=\ +# $BLACKLIST_ALL_ASICS\ +# "amdsmitstReadOnly.TestVoltCurvRead" From 548938389de2a8f157405fb0f45b443cac71abc6 Mon Sep 17 00:00:00 2001 From: Maisam Arif Date: Wed, 10 Jul 2024 19:14:49 -0500 Subject: [PATCH 10/10] Bump Version to 24.6.2.0 Signed-off-by: Maisam Arif Change-Id: Ic389b6783514e88c43958ff5d3413a4c4a8a884f --- CMakeLists.txt | 2 +- amdsmi_cli/README.md | 2 +- docs/doxygen/Doxyfile | 2 +- docs/how-to/using-AMD-SMI-CLI-tool.md | 2 +- include/amd_smi/amdsmi.h | 2 +- py-interface/amdsmi_wrapper.py | 8 ++++---- 6 files changed, 9 insertions(+), 9 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 67cc63ce..882ab7a1 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,7 +28,7 @@ find_program(GIT NAMES git) ## Setup the package version based on git tags. set(PKG_VERSION_GIT_TAG_PREFIX "amdsmi_pkg_ver") -get_package_version_number("24.6.1" ${PKG_VERSION_GIT_TAG_PREFIX} GIT) +get_package_version_number("24.6.2" ${PKG_VERSION_GIT_TAG_PREFIX} GIT) message("Package version: ${PKG_VERSION_STR}") set(${AMD_SMI_LIBS_TARGET}_VERSION_MAJOR "${CPACK_PACKAGE_VERSION_MAJOR}") set(${AMD_SMI_LIBS_TARGET}_VERSION_MINOR "${CPACK_PACKAGE_VERSION_MINOR}") diff --git a/amdsmi_cli/README.md b/amdsmi_cli/README.md index b48a5883..12af1461 100644 --- a/amdsmi_cli/README.md +++ b/amdsmi_cli/README.md @@ -79,7 +79,7 @@ AMD-SMI reports the version and current platform detected when running the comma ~$ amd-smi usage: amd-smi [-h] ... -AMD System Management Interface | Version: 24.6.1.0 | ROCm version: 6.2.0 | Platform: Linux Baremetal +AMD System Management Interface | Version: 24.6.2.0 | ROCm version: 6.2.0 | Platform: Linux Baremetal options: -h, --help show this help message and exit diff --git a/docs/doxygen/Doxyfile b/docs/doxygen/Doxyfile index 8f1b03c9..ed6ff05a 100644 --- a/docs/doxygen/Doxyfile +++ b/docs/doxygen/Doxyfile @@ -48,7 +48,7 @@ PROJECT_NAME = AMD SMI # could be handy for archiving the generated documentation or if some version # control system is used. -PROJECT_NUMBER = "24.6.1.0" +PROJECT_NUMBER = "24.6.2.0" # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a diff --git a/docs/how-to/using-AMD-SMI-CLI-tool.md b/docs/how-to/using-AMD-SMI-CLI-tool.md index 60d04498..4438c321 100644 --- a/docs/how-to/using-AMD-SMI-CLI-tool.md +++ b/docs/how-to/using-AMD-SMI-CLI-tool.md @@ -6,7 +6,7 @@ AMD-SMI reports the version and current platform detected when running the comma ~$ amd-smi usage: amd-smi [-h] ... -AMD System Management Interface | Version: 24.6.1.0 | ROCm version: 6.2.0 | Platform: Linux Baremetal +AMD System Management Interface | Version: 24.6.2.0 | ROCm version: 6.2.0 | Platform: Linux Baremetal options: -h, --help show this help message and exit diff --git a/include/amd_smi/amdsmi.h b/include/amd_smi/amdsmi.h index df2137f6..2ab20a5b 100644 --- a/include/amd_smi/amdsmi.h +++ b/include/amd_smi/amdsmi.h @@ -154,7 +154,7 @@ typedef enum { #define AMDSMI_LIB_VERSION_MAJOR 6 //! Minor version should be updated for each API change, but without changing headers -#define AMDSMI_LIB_VERSION_MINOR 1 +#define AMDSMI_LIB_VERSION_MINOR 2 //! Release version should be set to 0 as default and can be updated by the PMs for each CSP point release #define AMDSMI_LIB_VERSION_RELEASE 0 diff --git a/py-interface/amdsmi_wrapper.py b/py-interface/amdsmi_wrapper.py index 6332e2ee..870512cc 100644 --- a/py-interface/amdsmi_wrapper.py +++ b/py-interface/amdsmi_wrapper.py @@ -2613,7 +2613,6 @@ class struct_amdsmi_hsmp_metrics_table_t(Structure): 'amdsmi_get_cpu_fclk_mclk', 'amdsmi_get_cpu_hsmp_proto_ver', 'amdsmi_get_cpu_model', 'amdsmi_get_cpu_prochot_status', 'amdsmi_get_cpu_pwr_svi_telemetry_all_rails', - 'amdsmi_get_threads_per_core', 'amdsmi_get_cpu_smu_fw_version', 'amdsmi_get_cpu_socket_c0_residency', 'amdsmi_get_cpu_socket_current_active_freq_limit', @@ -2672,9 +2671,10 @@ class struct_amdsmi_hsmp_metrics_table_t(Structure): 'amdsmi_get_processor_info', 'amdsmi_get_processor_type', 'amdsmi_get_soc_pstate', 'amdsmi_get_socket_handles', 'amdsmi_get_socket_info', 'amdsmi_get_temp_metric', - 'amdsmi_get_utilization_count', 'amdsmi_get_xgmi_info', - 'amdsmi_get_xgmi_plpd', 'amdsmi_gpu_block_t', - 'amdsmi_gpu_cache_info_t', 'amdsmi_gpu_control_counter', + 'amdsmi_get_threads_per_core', 'amdsmi_get_utilization_count', + 'amdsmi_get_xgmi_info', 'amdsmi_get_xgmi_plpd', + 'amdsmi_gpu_block_t', 'amdsmi_gpu_cache_info_t', + 'amdsmi_gpu_control_counter', 'amdsmi_gpu_counter_group_supported', 'amdsmi_gpu_create_counter', 'amdsmi_gpu_destroy_counter', 'amdsmi_gpu_metrics_t', 'amdsmi_gpu_read_counter', 'amdsmi_gpu_xgmi_error_status',