diff --git a/pilot/control/monitor.py b/pilot/control/monitor.py index 204d6e1a..4f4c89bb 100644 --- a/pilot/control/monitor.py +++ b/pilot/control/monitor.py @@ -298,7 +298,7 @@ def get_proper_pilot_heartbeat() -> int: return int(config.Pilot.pilot_heartbeat) except Exception as exc: logger.warning(f'detected outdated config file: please update default.cfg: {exc}') - return 10 * 60 + return 60 def run_checks(queues: Any, args: Any) -> None: @@ -325,6 +325,7 @@ def run_checks(queues: Any, args: Any) -> None: if is_pilot_check(check='pilot_heartbeat'): last_heartbeat = time.time() - args.pilot_heartbeat _pilot_heartbeat = get_proper_pilot_heartbeat() + if last_heartbeat > _pilot_heartbeat: detected_job_suspension = True if last_heartbeat > 10 * 60 else False if detected_job_suspension: diff --git a/pilot/util/heartbeat.py b/pilot/util/heartbeat.py index 44a077ce..09105243 100644 --- a/pilot/util/heartbeat.py +++ b/pilot/util/heartbeat.py @@ -137,7 +137,7 @@ def time_since_suspension() -> int: if time_since_detection: # reset the time since detection to zero update_pilot_heartbeat(time.time(), False, 0) - + logger.info('reset time since detection to zero') return time_since_detection return 0