Skip to content

Commit

Permalink
Timeout added to ps call. Handling of unknown cpuconsumptiontime
Browse files Browse the repository at this point in the history
  • Loading branch information
Paul Nilsson committed Jan 26, 2024
1 parent bd00ca6 commit 9b9873c
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 2 deletions.
2 changes: 2 additions & 0 deletions pilot/util/monitoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,8 @@ def job_monitor_tasks(job, mt, args): # noqa: C901
job.cpuconsumptiontime = int(round(cpuconsumptiontime))
job.cpuconversionfactor = 1.0
logger.info(f'(instant) CPU consumption time for pid={job.pid}: {cpuconsumptiontime} (rounded to {job.cpuconsumptiontime})')
elif _cpuconsumptiontime == -1:
logger.warning('could not get CPU consumption time')
else:
logger.warning(f'process {job.pid} is no longer using CPU - aborting')
return 0, ""
Expand Down
8 changes: 6 additions & 2 deletions pilot/util/processes.py
Original file line number Diff line number Diff line change
Expand Up @@ -568,8 +568,12 @@ def get_current_cpu_consumption_time(pid):

# get all the child processes
children = []
_, ps_cache, _ = execute("ps -eo pid,ppid -m", mute=True)
find_processes_in_group(children, pid, ps_cache)
_, ps_cache, _ = execute("ps -eo pid,ppid -m", mute=True, timeout=60)
if ps_cache:
find_processes_in_group(children, pid, ps_cache)
else:
logger.warning('failed to get ps_cache')
return -1

cpuconsumptiontime = 0
for _pid in children:
Expand Down

0 comments on commit 9b9873c

Please sign in to comment.