Skip to content

Commit

Permalink
Support new amdgpu fdinfo interface kernel >=5.19
Browse files Browse the repository at this point in the history
  • Loading branch information
Syllo committed Aug 21, 2022
1 parent 994d0ee commit c0031ee
Show file tree
Hide file tree
Showing 5 changed files with 275 additions and 125 deletions.
8 changes: 7 additions & 1 deletion include/nvtop/extract_gpuinfo_common.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
*
* Copyright (C) 2021 Maxime Schmitt <[email protected]>
* Copyright (C) 2021-2022 Maxime Schmitt <[email protected]>
*
* This file is part of Nvtop.
*
Expand Down Expand Up @@ -110,6 +110,9 @@ enum gpu_process_type {
enum gpuinfo_process_info_valid {
gpuinfo_process_cmdline_valid,
gpuinfo_process_user_name_valid,
gpuinfo_process_gpu_gfx_engine_used,
gpuinfo_process_gpu_enc_engine_used,
gpuinfo_process_gpu_dec_engine_used,
gpuinfo_process_gpu_usage_valid,
gpuinfo_process_gpu_encoder_valid,
gpuinfo_process_gpu_decoder_valid,
Expand All @@ -126,6 +129,9 @@ struct gpu_process {
pid_t pid; // Process ID
char *cmdline; // Process User Name
char *user_name; // Process User Name
uint64_t gfx_engine_used; // Time in nanoseconds this process spent using the GPU gfx
uint64_t enc_engine_used; // Time in nanoseconds this process spent using the GPU encoder
uint64_t dec_engine_used; // Time in nanoseconds this process spent using the GPU decoder
unsigned gpu_usage; // Percentage of GPU used by the process
unsigned encode_usage; // Percentage of GPU encoder used by the process
unsigned decode_usage; // Percentage of GPU decoder used by the process
Expand Down
7 changes: 6 additions & 1 deletion include/nvtop/time.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
*
* Copyright (C) 2018 Maxime Schmitt <[email protected]>
* Copyright (C) 2018-2022 Maxime Schmitt <[email protected]>
*
* This file is part of Nvtop.
*
Expand All @@ -24,6 +24,7 @@

#include <stdbool.h>
#include <time.h>
#include <stdint.h>

#ifdef CLOCK_MONOTONIC_RAW
#define NVTOP_CLOCK CLOCK_MONOTONIC_RAW
Expand All @@ -49,6 +50,10 @@ inline double nvtop_difftime(nvtop_time t0, nvtop_time t1) {
return secdiff;
}

inline uint64_t nvtop_difftime_u64(nvtop_time t0, nvtop_time t1) {
return (uint64_t)(t1.tv_sec - t0.tv_sec) * 1000000000 + t1.tv_nsec - t0.tv_nsec;
}

inline nvtop_time nvtop_hmns_to_time(unsigned hour, unsigned minutes,
unsigned long nanosec) {
nvtop_time t = {hour * 60 * 60 + 60 * minutes + nanosec / 1000000,
Expand Down
139 changes: 63 additions & 76 deletions src/extract_gpuinfo.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
*
* Copyright (C) 2017-2021 Maxime Schmitt <[email protected]>
* Copyright (C) 2017-2022 Maxime Schmitt <[email protected]>
*
* This file is part of Nvtop.
*
Expand Down Expand Up @@ -160,85 +160,74 @@ bool gpuinfo_fix_dynamic_info_from_process_info(struct list_head *devices) {
}
#undef MYMIN

static void gpuinfo_populate_process_infos(struct list_head *devices) {
struct gpu_info *device;

list_for_each_entry(device, devices, list) {
for (unsigned j = 0; j < device->processes_count; ++j) {
pid_t current_pid = device->processes[j].pid;
struct process_info_cache *cached_pid_info;
static void gpuinfo_populate_process_info(struct gpu_info *device) {
for (unsigned j = 0; j < device->processes_count; ++j) {
pid_t current_pid = device->processes[j].pid;
struct process_info_cache *cached_pid_info;

HASH_FIND_PID(cached_process_info, &current_pid, cached_pid_info);
HASH_FIND_PID(cached_process_info, &current_pid, cached_pid_info);
if (!cached_pid_info) {
HASH_FIND_PID(updated_process_info, &current_pid, cached_pid_info);
if (!cached_pid_info) {
HASH_FIND_PID(updated_process_info, &current_pid, cached_pid_info);
if (!cached_pid_info) {
// Newly encountered pid
cached_pid_info = malloc(sizeof(*cached_pid_info));
cached_pid_info->pid = current_pid;
get_username_from_pid(current_pid, &cached_pid_info->user_name);
get_command_from_pid(current_pid, &cached_pid_info->cmdline);
cached_pid_info->last_total_consumed_cpu_time = -1.;
HASH_ADD_PID(updated_process_info, cached_pid_info);
}
} else {
// Already encountered so delete from cached list to avoid freeing
// memory at the end of this function
HASH_DEL(cached_process_info, cached_pid_info);
// Newly encountered pid
cached_pid_info = calloc(1, sizeof(*cached_pid_info));
cached_pid_info->pid = current_pid;
get_username_from_pid(current_pid, &cached_pid_info->user_name);
get_command_from_pid(current_pid, &cached_pid_info->cmdline);
cached_pid_info->last_total_consumed_cpu_time = -1.;
HASH_ADD_PID(updated_process_info, cached_pid_info);
}
} else {
// Already encountered so delete from cached list to avoid freeing
// memory at the end of this function
HASH_DEL(cached_process_info, cached_pid_info);
HASH_ADD_PID(updated_process_info, cached_pid_info);
}

if (cached_pid_info->cmdline) {
device->processes[j].cmdline = cached_pid_info->cmdline;
SET_VALID(gpuinfo_process_cmdline_valid, device->processes[j].valid);
}
if (cached_pid_info->user_name) {
device->processes[j].user_name = cached_pid_info->user_name;
SET_VALID(gpuinfo_process_user_name_valid,
device->processes[j].valid);
}
if (cached_pid_info->cmdline) {
device->processes[j].cmdline = cached_pid_info->cmdline;
SET_VALID(gpuinfo_process_cmdline_valid, device->processes[j].valid);
}
if (cached_pid_info->user_name) {
device->processes[j].user_name = cached_pid_info->user_name;
SET_VALID(gpuinfo_process_user_name_valid, device->processes[j].valid);
}

struct process_cpu_usage cpu_usage;
if (get_process_info(current_pid, &cpu_usage)) {
if (cached_pid_info->last_total_consumed_cpu_time > -1.) {
double usage_percent =
round(100. *
(cpu_usage.total_user_time + cpu_usage.total_kernel_time -
cached_pid_info->last_total_consumed_cpu_time) /
nvtop_difftime(cached_pid_info->last_measurement_timestamp,
cpu_usage.timestamp));
device->processes[j].cpu_usage = (unsigned)usage_percent;
} else {
device->processes[j].cpu_usage = 0;
}
SET_VALID(gpuinfo_process_cpu_usage_valid,
device->processes[j].valid);
cached_pid_info->last_measurement_timestamp = cpu_usage.timestamp;
cached_pid_info->last_total_consumed_cpu_time =
cpu_usage.total_kernel_time + cpu_usage.total_user_time;
device->processes[j].cpu_memory_res = cpu_usage.resident_memory;
SET_VALID(gpuinfo_process_cpu_memory_res_valid,
device->processes[j].valid);
device->processes[j].cpu_memory_virt = cpu_usage.virtual_memory;
SET_VALID(gpuinfo_process_cpu_memory_virt_valid,
device->processes[j].valid);
struct process_cpu_usage cpu_usage;
if (get_process_info(current_pid, &cpu_usage)) {
if (cached_pid_info->last_total_consumed_cpu_time > -1.) {
double usage_percent = round(
100. *
(cpu_usage.total_user_time + cpu_usage.total_kernel_time - cached_pid_info->last_total_consumed_cpu_time) /
nvtop_difftime(cached_pid_info->last_measurement_timestamp, cpu_usage.timestamp));
device->processes[j].cpu_usage = (unsigned)usage_percent;
} else {
cached_pid_info->last_total_consumed_cpu_time = -1;
device->processes[j].cpu_usage = 0;
}
SET_VALID(gpuinfo_process_cpu_usage_valid, device->processes[j].valid);
cached_pid_info->last_measurement_timestamp = cpu_usage.timestamp;
cached_pid_info->last_total_consumed_cpu_time = cpu_usage.total_kernel_time + cpu_usage.total_user_time;
device->processes[j].cpu_memory_res = cpu_usage.resident_memory;
SET_VALID(gpuinfo_process_cpu_memory_res_valid, device->processes[j].valid);
device->processes[j].cpu_memory_virt = cpu_usage.virtual_memory;
SET_VALID(gpuinfo_process_cpu_memory_virt_valid, device->processes[j].valid);
} else {
cached_pid_info->last_total_consumed_cpu_time = -1;
}

// Process memory usage percent of total device memory
if (IS_VALID(gpuinfo_total_memory_valid, device->dynamic_info.valid) &&
IS_VALID(gpuinfo_process_gpu_memory_usage_valid,
device->processes[j].valid)) {
float percentage =
roundf(100.f * (float)device->processes[j].gpu_memory_usage /
(float)device->dynamic_info.total_memory);
device->processes[j].gpu_memory_percentage = (unsigned)percentage;
assert(device->processes[j].gpu_memory_percentage <= 100);
SET_VALID(gpuinfo_process_gpu_memory_percentage_valid,
device->processes[j].valid);
}
// Process memory usage percent of total device memory
if (IS_VALID(gpuinfo_total_memory_valid, device->dynamic_info.valid) &&
IS_VALID(gpuinfo_process_gpu_memory_usage_valid, device->processes[j].valid)) {
float percentage =
roundf(100.f * (float)device->processes[j].gpu_memory_usage / (float)device->dynamic_info.total_memory);
device->processes[j].gpu_memory_percentage = (unsigned)percentage;
assert(device->processes[j].gpu_memory_percentage <= 100);
SET_VALID(gpuinfo_process_gpu_memory_percentage_valid, device->processes[j].valid);
}
}
}

static void gpuinfo_clean_old_cache(void) {
struct process_info_cache *pid_not_encountered, *tmp;
HASH_ITER(hh, cached_process_info, pid_not_encountered, tmp) {
HASH_DEL(cached_process_info, pid_not_encountered);
Expand All @@ -254,16 +243,14 @@ bool gpuinfo_refresh_processes(struct list_head *devices) {
struct gpu_info *device;

list_for_each_entry(device, devices, list) {
unsigned processes_count = 0;
struct gpu_process *processes = NULL;
device->vendor->get_running_processes(device,
&processes_count, &processes);
free(device->processes);
device->processes = processes;
device->processes_count = processes_count;
device->processes = NULL;
device->processes_count = 0;
device->vendor->get_running_processes(device, &device->processes_count, &device->processes);
gpuinfo_populate_process_info(device);
}

gpuinfo_populate_process_infos(devices);
gpuinfo_clean_old_cache();

return true;
}
Expand Down
Loading

0 comments on commit c0031ee

Please sign in to comment.