From a80e8b2fb784442e96f877c7cff904b595531619 Mon Sep 17 00:00:00 2001 From: Paul Nilsson Date: Wed, 29 Nov 2023 16:53:21 +0100 Subject: [PATCH] Pydocstyle and pylint updates --- pilot/scripts/cpu_arch.py | 69 +++++++++++----- pilot/scripts/data_api_stagein.py | 2 +- pilot/scripts/open_remote_file.py | 123 ++++++++++++++-------------- pilot/scripts/rucio_api_download.py | 5 +- pilot/scripts/stagein.py | 103 +++++++++++++++++------ pilot/scripts/stageout.py | 5 +- 6 files changed, 192 insertions(+), 115 deletions(-) diff --git a/pilot/scripts/cpu_arch.py b/pilot/scripts/cpu_arch.py index 4a4db9b5..144e11e5 100755 --- a/pilot/scripts/cpu_arch.py +++ b/pilot/scripts/cpu_arch.py @@ -20,6 +20,8 @@ # - Alaettin Serhan Mete, alaettin.serhan.mete@cern.ch, 2023 # - Paul Nilsson, paul.nilsson@cern.ch, 2023 +"""Script for reporting CPU architecture.""" + import argparse import logging import re @@ -32,12 +34,14 @@ must_not_v2 = [] -def get_flags_cpuinfo(): +def get_flags_cpuinfo() -> dict: """ - Get the CPU (model) name, number of cores of the corresponding CPU and the CPU flags from the /proc/cpuinfo + Get the CPU (model) name, number of cores of the corresponding CPU and the CPU flags from the /proc/cpuinfo. + + :return: dictionary containing the CPU (model) name, number of cores of the corresponding CPU and the CPU flags (dict). """ cpu, cpu_core, flags = None, None, None - with open('/proc/cpuinfo', 'r') as fiile: + with open('/proc/cpuinfo', 'r', encoding='utf-8') as fiile: for line in fiile.readlines(): if 'model name' in line: cpu = line.split(':')[-1].strip() @@ -48,13 +52,18 @@ def get_flags_cpuinfo(): if all([cpu, cpu_core, flags]): return {"cpu": cpu, "cpu_core": cpu_core, "flags": flags} + return {} + -def get_flags_pilotlog(pilotlogname): +def get_flags_pilotlog(pilotlogname: str) -> dict: """ - Get the site/queue name, the CPU (model) name, number of cores of the corresponding CPU and the CPU flags from the downloaded pilotlog + Get the site/queue name, the CPU (model) name, number of cores of the corresponding CPU and the CPU flags from the downloaded pilotlog. + + :param pilotlogname: full path to the pilotlog (str) + :return: dictionary containing the site/queue name, the CPU (model) name, number of cores of the corresponding CPU and the CPU flags (dict). """ site, cpu, cpu_core, flags = None, None, None, None - with open(pilotlogname, 'r') as fiile: + with open(pilotlogname, 'r', encoding='utf-8') as fiile: for line in fiile.readlines(): if 'PANDA_RESOURCE' in line: site = line.split('=')[-1].strip() @@ -67,10 +76,13 @@ def get_flags_pilotlog(pilotlogname): if all([site, cpu, cpu_core, flags]): return {"site": site, "cpu": cpu, "cpu_core": cpu_core, "flags": flags} + return {} + def set_naive(): """ - Make a decision on the CPU architecture based on the simplified lists (must_'s) of flags + Make a decision on the CPU architecture based on the simplified lists (must_'s) of flags. + The must_not_'s have been left blank, these could be filled if need be """ global must_v4 @@ -92,13 +104,16 @@ def set_naive(): def set_gcc(): """ - Make a decision on the CPU architecture based on the modified lists (must_'s) of flags from gcc: LAHF_SAHF --> LAHF_LM; LZCNT --> ABM; removal of SSE3 + Make a decision on the CPU architecture based on the modified lists (must_'s) of flags from gcc. + + LAHF_SAHF --> LAHF_LM; LZCNT --> ABM; removal of SSE3. + References: https://gcc.gnu.org/git/?p=gcc.git;a=blob_plain;f=gcc/testsuite/gcc.target/i386/x86-64-v4.c;hb=324bec558e95584e8c1997575ae9d75978af59f1 https://gcc.gnu.org/git/?p=gcc.git;a=blob_plain;f=gcc/testsuite/gcc.target/i386/x86-64-v3.c;hb=324bec558e95584e8c1997575ae9d75978af59f1 https://gcc.gnu.org/git/?p=gcc.git;a=blob_plain;f=gcc/testsuite/gcc.target/i386/x86-64-v2.c;hb=324bec558e95584e8c1997575ae9d75978af59f1 - The must_not_'s have been left blank, these could be filled if need be + The must_not_'s have been left blank, these could be filled if need be. """ global must_v4 global must_not_v4 @@ -119,49 +134,61 @@ def set_gcc(): must_not_v2 = [] -def check_flags(must, must_not, flags): +def check_flags(must: list, must_not: list, flags: list) -> bool: """ - Matching of the actual CPU flags w.r.t. the lists of flags defined for deciding on architecture + Match the actual CPU flags w.r.t. the lists of flags defined for deciding on architecture. + + :param must: list of flags that must be present (list) + :param must_not: list of flags that must not be present (list) + :param flags: list of actual flags (list) + :return: True if the actual flags match the must and must_not lists, False otherwise (bool). """ failed = False for flag in must: - if not any([re.match(flag, test_flag, re.IGNORECASE) for test_flag in flags]): + if not any(re.match(flag, test_flag, re.IGNORECASE) for test_flag in flags): logging.debug(f"Missing must-have: {flag}") failed = True + for flag in must_not: - if not any([re.match(flag, test_flag, re.IGNORECASE) for test_flag in flags]): + if not any(re.match(flag, test_flag, re.IGNORECASE) for test_flag in flags): logging.debug(f"Present must-not-have: {flag}") failed = True + return failed -def all_version_checks(flag_string, name): +def all_version_checks(flag_string: str, name: str) -> str: """ - Architecture is assigned to the CPU based on the check_flags() function + Check the CPU flags against the lists of flags for all versions of the CPU architecture. + + Architecture is assigned to the CPU based on the check_flags() function. + + :param flag_string: string containing the CPU flags (str) + :param name: name of the CPU (str) + :return: architecture of the CPU (str). """ flag_list = flag_string.split() logging.debug(f"-------Checking V4 for {name}--------") failed_v4 = check_flags(must_v4, must_not_v4, flag_list) if not failed_v4: return "x86-64-v4" - else: - pass + logging.debug(f"-------Checking V3 for {name}--------") failed_v3 = check_flags(must_v3, must_not_v3, flag_list) if not failed_v3: return "x86-64-v3" - else: - pass + logging.debug(f"-------Checking V2 for {name}--------") failed_v2 = check_flags(must_v2, must_not_v2, flag_list) if not failed_v2: return "x86-64-v2" - else: - pass + logging.debug(f"-------Defaulting {name} to V1--------") if failed_v2 and failed_v3 and failed_v4: return "x86-64-v1" + return "" + if __name__ == "__main__": parser = argparse.ArgumentParser() diff --git a/pilot/scripts/data_api_stagein.py b/pilot/scripts/data_api_stagein.py index 87cc0906..c5fda1de 100644 --- a/pilot/scripts/data_api_stagein.py +++ b/pilot/scripts/data_api_stagein.py @@ -19,7 +19,7 @@ # Authors: # - Paul Nilsson, paul.nilsson@cern.ch, 2018-23 -# This script shows how to use the Data API stage-in client to download a file from storage +"""This script shows how to use the Data API stage-in client to download a file from storage.""" from pilot.api import data from pilot.info import InfoService, FileSpec, infosys diff --git a/pilot/scripts/open_remote_file.py b/pilot/scripts/open_remote_file.py index 3ef563ee..d665c46d 100644 --- a/pilot/scripts/open_remote_file.py +++ b/pilot/scripts/open_remote_file.py @@ -18,17 +18,22 @@ # Authors: # - Paul Nilsson, paul.nilsson@cern.ch, 2020-23 +"""Script for remote file open verification.""" + import argparse import functools import os import logging import queue -import ROOT import signal import subprocess +import sys import threading import traceback from collections import namedtuple +from typing import Any + +import ROOT from pilot.util.config import config from pilot.util.filehandling import ( @@ -43,13 +48,12 @@ logger = logging.getLogger(__name__) -def get_args(): +def get_args() -> argparse.Namespace: """ Return the args from the arg parser. :return: args (arg parser object). """ - arg_parser = argparse.ArgumentParser() arg_parser.add_argument('-d', @@ -81,15 +85,12 @@ def get_args(): return arg_parser.parse_args() -def message(msg): +def message(msg: str): """ Print message to stdout or to log. - Note: not using lazy formatting. - :param msg: message (string). - :return: + :param msg: message (str). """ - if logger: logger.info(msg) # make sure that stdout buffer gets flushed - in case of time-out exceptions @@ -98,114 +99,113 @@ def message(msg): print(msg, flush=True) # always write message to instant log file (message might otherwise get lost in case of time-outs) - with open(config.Pilot.remotefileverification_instant, 'a') as _file: + with open(config.Pilot.remotefileverification_instant, 'a', encoding='utf-8') as _file: _file.write(msg + '\n') -def get_file_lists(turls): +def get_file_lists(turls_string: str) -> dict: """ Return a dictionary with the turls. + Format: {'turls': } - :param turls: comma separated turls (string) - :return: turls dictionary. + :param turls_string: comma separated turls (str) + :return: turls dictionary (dict). """ - _turls = [] try: - _turls = turls.split(',') - except Exception as error: - message(f"exception caught: {error}") + _turls = turls_string.split(',') + except Exception as _error: + message(f"exception caught: {_error}") return {'turls': _turls} -def try_open_file(turl, queues): +# pylint: disable=useless-param-doc +def try_open_file(turl_str: str, _queues: namedtuple): """ Attempt to open a remote file. + Successfully opened turls will be put in the queues.opened queue. Unsuccessful turls will be placed in the queues.unopened queue. - :param turl: turl (string). - :param queues: queues collection. - :return: + :param turl_str: turl (str) + :param _queues: Namedtuple containing queues for opened and unopened turls. + Should have 'opened' and 'unopened' attributes to store respective turls. """ - turl_opened = False _timeout = 30 * 1000 # 30 s per file try: _ = ROOT.TFile.SetOpenTimeout(_timeout) # message(f"internal TFile.Open() time-out set to {_timeout} ms") - message(f'opening {turl}') - in_file = ROOT.TFile.Open(turl) + message(f'opening {turl_str}') + in_file = ROOT.TFile.Open(turl_str) except Exception as exc: message(f'caught exception: {exc}') else: if in_file and in_file.IsOpen(): in_file.Close() turl_opened = True - message(f'closed {turl}') - queues.opened.put(turl) if turl_opened else queues.unopened.put(turl) - queues.result.put(turl) + message(f'closed {turl_str}') + if turl_opened: + _queues.opened.put(turl_str) + else: + _queues.unopened.put(turl_str) + _queues.result.put(turl_str) -def spawn_file_open_thread(queues, file_list): +# pylint: disable=useless-param-doc +def spawn_file_open_thread(_queues: Any, file_list: list) -> threading.Thread: """ - Spawn a thread for the try_open_file(). + Spawn a thread for the try_open_file().. - :param queues: queue collection. - :param file_list: files to open (list). - :return: thread. + :param _queues: queue collection (Any) + :param file_list: files to open (list) + :return: thread (threading.Thread). """ - - thread = None + _thread = None try: - turl = file_list.pop(0) + _turl = file_list.pop(0) except IndexError: pass else: # create and start thread for the current turl - thread = threading.Thread(target=try_open_file, args=(turl, queues)) - thread.daemon = True - thread.start() + _thread = threading.Thread(target=try_open_file, args=(_turl, _queues)) + _thread.daemon = True + _thread.start() - return thread + return _thread -def register_signals(signals, args): +def register_signals(signals: list, _args: Any): """ Register kill signals for intercept function. - :param signals: list of signals. - :param args: pilot args. - :return: + :param signals: list of signals (list) + :param _args: pilot arguments object (Any). """ - for sig in signals: - signal.signal(sig, functools.partial(interrupt, args)) + signal.signal(sig, functools.partial(interrupt, _args)) -def interrupt(args, signum, frame): +def interrupt(_args: Any, signum: Any, frame: Any): """ + Receive and handle kill signals. + Interrupt function on the receiving end of kill signals. This function is forwarded any incoming signals (SIGINT, SIGTERM, etc) and will set abort_job which instructs the threads to abort the job. - :param args: pilot arguments. + :param _args: pilot arguments object (Any). :param signum: signal. :param frame: stack/execution frame pointing to the frame that was interrupted by the signal. - :return: """ - - if args.signal: + if _args.signal: logger.warning('process already being killed') return - try: - sig = [v for v, k in signal.__dict__.iteritems() if k == signum][0] - except Exception: - sig = [v for v, k in list(signal.__dict__.items()) if k == signum][0] + sig = [v for v, k in list(signal.__dict__.items()) if k == signum][0] tmp = '\n'.join(traceback.format_stack(frame)) logger.warning(f'caught signal: {sig} in FRAME=\n{tmp}') cmd = f'ps aux | grep {os.getpid()}' @@ -213,15 +213,11 @@ def interrupt(args, signum, frame): logger.info(f'{cmd}:\n{out}') logger.warning(f'will terminate pid={os.getpid()}') logging.shutdown() - args.signal = sig + _args.signal = sig kill_processes(os.getpid()) -if __name__ == '__main__': - """ - Main function of the remote file open script. - """ - +if __name__ == '__main__': # noqa: C901 # get the args from the arg parser args = get_args() args.debug = True @@ -232,11 +228,11 @@ def interrupt(args, signum, frame): logname = config.Pilot.remotefileverification_log except Exception as error: print(f"caught exception: {error} (skipping remote file open verification)") - exit(1) + sys.exit(1) else: if not logname: print("remote file open verification not desired") - exit(0) + sys.exit(0) establish_logging(debug=args.debug, nopilotlog=args.nopilotlog, filename=logname) logger = logging.getLogger(__name__) @@ -279,7 +275,8 @@ def interrupt(args, signum, frame): threads.append(thread) # wait until all threads have finished - [_thread.join() for _thread in threads] + for thread in threads: + thread.join() opened_turls = list(queues.opened.queue) opened_turls.sort() @@ -297,4 +294,4 @@ def interrupt(args, signum, frame): message('no TURLs to verify') message('file remote open script has finished') - exit(0) + sys.exit(0) diff --git a/pilot/scripts/rucio_api_download.py b/pilot/scripts/rucio_api_download.py index 2c3f64fc..7473b73e 100644 --- a/pilot/scripts/rucio_api_download.py +++ b/pilot/scripts/rucio_api_download.py @@ -19,12 +19,13 @@ # Authors: # - Paul Nilsson, paul.nilsson@cern.ch, 2018-23 -# This script demonstrates how to download a file using the Rucio download client. +"""This script demonstrates how to download a file using the Rucio download client.""" + # Note: Rucio needs to be setup with 'lsetup rucio'. try: from rucio.client.downloadclient import DownloadClient -except Exception: +except ImportError: print("Rucio client has not been setup, please run \'lsetup rucio\' first") else: f_ific = {'did_scope': 'mc16_13TeV', 'did': 'mc16_13TeV:EVNT.16337107._000147.pool.root.1', diff --git a/pilot/scripts/stagein.py b/pilot/scripts/stagein.py index 684fc2e3..66e9c50f 100644 --- a/pilot/scripts/stagein.py +++ b/pilot/scripts/stagein.py @@ -19,7 +19,10 @@ # Authors: # - Paul Nilsson, paul.nilsson@cern.ch, 2020-23 +"""This script is executed by the pilot in a container to perform stage-in of input files.""" + import argparse +import logging import os import re @@ -39,8 +42,6 @@ from pilot.util.loggingsupport import establish_logging from pilot.util.tracereport import TraceReport -import logging - # error codes GENERAL_ERROR = 1 NO_QUEUENAME = 2 @@ -56,13 +57,12 @@ TRANSFER_ERROR = 12 -def get_args(): +def get_args() -> argparse.Namespace: """ Return the args from the arg parser. :return: args (arg parser object). """ - arg_parser = argparse.ArgumentParser() arg_parser.add_argument('-d', @@ -198,9 +198,14 @@ def get_args(): return arg_parser.parse_args() -def str2bool(_str): - """ Helper function to convert string to bool """ +def str2bool(_str: str) -> bool: + """ + Convert string to bool. + :param _str: string to be converted (str) + :return: boolean (bool) + :raise: argparse.ArgumentTypeError. + """ if isinstance(_str, bool): return _str if _str.lower() in ('yes', 'true', 't', 'y', '1'): @@ -211,14 +216,14 @@ def str2bool(_str): raise argparse.ArgumentTypeError('Boolean value expected.') -def verify_args(): +def verify_args() -> int: """ Make sure required arguments are set, and if they are not then set them. + (deprecated) :return: internal error code (int). """ - ret = 0 if not args.workdir: args.workdir = os.getcwd() @@ -266,11 +271,22 @@ def verify_args(): return ret -def message(msg): +def message(msg: str): + """ + Print message to stdout or to log. + + :param msg: message (str). + """ print(msg) if not logger else logger.info(msg) -def str_to_int_list(_list): +def str_to_int_list(_list: list) -> list: + """ + Convert list of strings to list of integers. + + :param _list: list of strings (list) + :return: list of integers (list). + """ _new_list = [] for val in _list: try: @@ -281,13 +297,42 @@ def str_to_int_list(_list): return _new_list -def str_to_bool_list(_list): +def str_to_bool_list(_list: list) -> list: + """ + Convert list of strings to list of booleans. + + :param _list: list of strings (list) + :return: list of booleans (list). + """ changes = {"True": True, "False": False, "None": None, "NULL": None} return [changes.get(x, x) for x in _list] -def get_file_lists(lfns, scopes, filesizes, checksums, allowlans, allowwans, directaccesslans, directaccesswans, istars, - accessmodes, storagetokens, guids): +def get_file_lists(lfns: str, scopes: str, filesizes: str, checksums: str, allowlans: str, allowwans: str, + directaccesslans: str, directaccesswans: str, istars: str, accessmodes: str, + storagetokens: str, guids: str) -> dict: + """ + Return a dictionary with the file lists. + + Format: {'lfns': , 'scopes': , 'filesizes': , 'checksums': , + 'allowlans': , 'allowwans': , 'directaccesslans': , + 'directaccesswans': , 'istars': , 'accessmodes': , + 'storagetokens': , 'guids': } + + :param lfns: comma separated lfns (str) + :param scopes: comma separated scopes (str) + :param filesizes: comma separated filesizes (str) + :param checksums: comma separated checksums (str) + :param allowlans: comma separated allowlans (str) + :param allowwans: comma separated allowwans (str) + :param directaccesslans: comma separated directaccesslans (str) + :param directaccesswans: comma separated directaccesswans (str) + :param istars: comma separated istars (str) + :param accessmodes: comma separated accessmodes (str) + :param storagetokens: comma separated storagetokens (str) + :param guids: comma separated guids (str) + :return: file lists dictionary (dict). + """ _lfns = [] _scopes = [] _filesizes = [] @@ -324,36 +369,42 @@ def get_file_lists(lfns, scopes, filesizes, checksums, allowlans, allowwans, dir class Job: - """ - A minimal implementation of the Pilot Job class with data members necessary for the trace report only. - """ + """A minimal implementation of the Pilot Job class with data members necessary for the trace report only.""" produserid = "" jobid = "" taskid = "" jobdefinitionid = "" - def __init__(self, produserid="", jobid="", taskid="", jobdefinitionid=""): + def __init__(self, produserid: str = "", jobid: str = "", taskid: str = "", jobdefinitionid: str = ""): + """ + Initialize the Job class. + + :param produserid: produserid (str) + :param jobid: jobid (str) + :param taskid: taskid (str) + :param jobdefinitionid: jobdefinitionid (str). + """ self.produserid = produserid.replace('%20', ' ') self.jobid = jobid self.taskid = taskid self.jobdefinitionid = jobdefinitionid -def add_to_dictionary(dictionary, key, value1, value2, value3, value4): +def add_to_dictionary(dictionary: dict, key: str, value1: str, value2: str, value3: str, value4: str) -> dict: """ Add key: [value1, value2, ..] to dictionary. + In practice; lfn: [status, status_code, turl, DDM endpoint]. - :param dictionary: dictionary to be updated. - :param key: lfn key to be added (string). - :param value1: status to be added to list belonging to key (string). - :param value2: status_code to be added to list belonging to key (string). - :param value3: turl (string). - :param value4: DDM endpoint (string). - :return: updated dictionary. + :param dictionary: dictionary to be updated (dict) + :param key: lfn key to be added (str) + :param value1: status to be added to list belonging to key (str) + :param value2: status_code to be added to list belonging to key (str) + :param value3: turl (str) + :param value4: DDM endpoint (str) + :return: updated dictionary (dict). """ - dictionary[key] = [value1, value2, value3, value4] return dictionary diff --git a/pilot/scripts/stageout.py b/pilot/scripts/stageout.py index ec5a3907..6926105c 100644 --- a/pilot/scripts/stageout.py +++ b/pilot/scripts/stageout.py @@ -19,7 +19,10 @@ # Authors: # - Paul Nilsson, paul.nilsson@cern.ch, 2020-23 +"""This script is executed by the pilot in a container to perform stage-out of output files.""" + import argparse +import logging import os import re @@ -36,8 +39,6 @@ from pilot.util.loggingsupport import establish_logging from pilot.util.tracereport import TraceReport -import logging - errors = ErrorCodes() # error codes