From a80e8b2fb784442e96f877c7cff904b595531619 Mon Sep 17 00:00:00 2001
From: Paul Nilsson <palnilsson70@gmail.com>
Date: Wed, 29 Nov 2023 16:53:21 +0100
Subject: [PATCH] Pydocstyle and pylint updates

---
 pilot/scripts/cpu_arch.py           |  69 +++++++++++-----
 pilot/scripts/data_api_stagein.py   |   2 +-
 pilot/scripts/open_remote_file.py   | 123 ++++++++++++++--------------
 pilot/scripts/rucio_api_download.py |   5 +-
 pilot/scripts/stagein.py            | 103 +++++++++++++++++------
 pilot/scripts/stageout.py           |   5 +-
 6 files changed, 192 insertions(+), 115 deletions(-)

diff --git a/pilot/scripts/cpu_arch.py b/pilot/scripts/cpu_arch.py
index 4a4db9b5..144e11e5 100755
--- a/pilot/scripts/cpu_arch.py
+++ b/pilot/scripts/cpu_arch.py
@@ -20,6 +20,8 @@
 # - Alaettin Serhan Mete, alaettin.serhan.mete@cern.ch, 2023
 # - Paul Nilsson, paul.nilsson@cern.ch, 2023
 
+"""Script for reporting CPU architecture."""
+
 import argparse
 import logging
 import re
@@ -32,12 +34,14 @@
 must_not_v2 = []
 
 
-def get_flags_cpuinfo():
+def get_flags_cpuinfo() -> dict:
     """
-    Get the CPU (model) name, number of cores of the corresponding CPU and the CPU flags from the /proc/cpuinfo
+    Get the CPU (model) name, number of cores of the corresponding CPU and the CPU flags from the /proc/cpuinfo.
+
+    :return: dictionary containing the CPU (model) name, number of cores of the corresponding CPU and the CPU flags (dict).
     """
     cpu, cpu_core, flags = None, None, None
-    with open('/proc/cpuinfo', 'r') as fiile:
+    with open('/proc/cpuinfo', 'r', encoding='utf-8') as fiile:
         for line in fiile.readlines():
             if 'model name' in line:
                 cpu = line.split(':')[-1].strip()
@@ -48,13 +52,18 @@ def get_flags_cpuinfo():
             if all([cpu, cpu_core, flags]):
                 return {"cpu": cpu, "cpu_core": cpu_core, "flags": flags}
 
+            return {}
+
 
-def get_flags_pilotlog(pilotlogname):
+def get_flags_pilotlog(pilotlogname: str) -> dict:
     """
-    Get the site/queue name, the CPU (model) name, number of cores of the corresponding CPU and the CPU flags from the downloaded pilotlog
+    Get the site/queue name, the CPU (model) name, number of cores of the corresponding CPU and the CPU flags from the downloaded pilotlog.
+
+    :param pilotlogname: full path to the pilotlog (str)
+    :return: dictionary containing the site/queue name, the CPU (model) name, number of cores of the corresponding CPU and the CPU flags (dict).
     """
     site, cpu, cpu_core, flags = None, None, None, None
-    with open(pilotlogname, 'r') as fiile:
+    with open(pilotlogname, 'r', encoding='utf-8') as fiile:
         for line in fiile.readlines():
             if 'PANDA_RESOURCE' in line:
                 site = line.split('=')[-1].strip()
@@ -67,10 +76,13 @@ def get_flags_pilotlog(pilotlogname):
             if all([site, cpu, cpu_core, flags]):
                 return {"site": site, "cpu": cpu, "cpu_core": cpu_core, "flags": flags}
 
+            return {}
+
 
 def set_naive():
     """
-    Make a decision on the CPU architecture based on the simplified lists (must_'s) of flags
+    Make a decision on the CPU architecture based on the simplified lists (must_'s) of flags.
+
     The must_not_'s have been left blank, these could be filled if need be
     """
     global must_v4
@@ -92,13 +104,16 @@ def set_naive():
 
 def set_gcc():
     """
-    Make a decision on the CPU architecture based on the modified lists (must_'s) of flags from gcc: LAHF_SAHF --> LAHF_LM; LZCNT --> ABM; removal of SSE3
+    Make a decision on the CPU architecture based on the modified lists (must_'s) of flags from gcc.
+
+    LAHF_SAHF --> LAHF_LM; LZCNT --> ABM; removal of SSE3.
+
     References:
         https://gcc.gnu.org/git/?p=gcc.git;a=blob_plain;f=gcc/testsuite/gcc.target/i386/x86-64-v4.c;hb=324bec558e95584e8c1997575ae9d75978af59f1
         https://gcc.gnu.org/git/?p=gcc.git;a=blob_plain;f=gcc/testsuite/gcc.target/i386/x86-64-v3.c;hb=324bec558e95584e8c1997575ae9d75978af59f1
         https://gcc.gnu.org/git/?p=gcc.git;a=blob_plain;f=gcc/testsuite/gcc.target/i386/x86-64-v2.c;hb=324bec558e95584e8c1997575ae9d75978af59f1
 
-    The must_not_'s have been left blank, these could be filled if need be
+    The must_not_'s have been left blank, these could be filled if need be.
     """
     global must_v4
     global must_not_v4
@@ -119,49 +134,61 @@ def set_gcc():
     must_not_v2 = []
 
 
-def check_flags(must, must_not, flags):
+def check_flags(must: list, must_not: list, flags: list) -> bool:
     """
-    Matching of the actual CPU flags w.r.t. the lists of flags defined for deciding on architecture
+    Match the actual CPU flags w.r.t. the lists of flags defined for deciding on architecture.
+
+    :param must: list of flags that must be present (list)
+    :param must_not: list of flags that must not be present (list)
+    :param flags: list of actual flags (list)
+    :return: True if the actual flags match the must and must_not lists, False otherwise (bool).
     """
     failed = False
     for flag in must:
-        if not any([re.match(flag, test_flag, re.IGNORECASE) for test_flag in flags]):
+        if not any(re.match(flag, test_flag, re.IGNORECASE) for test_flag in flags):
             logging.debug(f"Missing must-have: {flag}")
             failed = True
+
     for flag in must_not:
-        if not any([re.match(flag, test_flag, re.IGNORECASE) for test_flag in flags]):
+        if not any(re.match(flag, test_flag, re.IGNORECASE) for test_flag in flags):
             logging.debug(f"Present must-not-have: {flag}")
             failed = True
+
     return failed
 
 
-def all_version_checks(flag_string, name):
+def all_version_checks(flag_string: str, name: str) -> str:
     """
-    Architecture is assigned to the CPU based on the check_flags() function
+    Check the CPU flags against the lists of flags for all versions of the CPU architecture.
+
+    Architecture is assigned to the CPU based on the check_flags() function.
+
+    :param flag_string: string containing the CPU flags (str)
+    :param name: name of the CPU (str)
+    :return: architecture of the CPU (str).
     """
     flag_list = flag_string.split()
     logging.debug(f"-------Checking V4 for {name}--------")
     failed_v4 = check_flags(must_v4, must_not_v4, flag_list)
     if not failed_v4:
         return "x86-64-v4"
-    else:
-        pass
+
     logging.debug(f"-------Checking V3 for {name}--------")
     failed_v3 = check_flags(must_v3, must_not_v3, flag_list)
     if not failed_v3:
         return "x86-64-v3"
-    else:
-        pass
+
     logging.debug(f"-------Checking V2 for {name}--------")
     failed_v2 = check_flags(must_v2, must_not_v2, flag_list)
     if not failed_v2:
         return "x86-64-v2"
-    else:
-        pass
+
     logging.debug(f"-------Defaulting {name} to V1--------")
     if failed_v2 and failed_v3 and failed_v4:
         return "x86-64-v1"
 
+    return ""
+
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
diff --git a/pilot/scripts/data_api_stagein.py b/pilot/scripts/data_api_stagein.py
index 87cc0906..c5fda1de 100644
--- a/pilot/scripts/data_api_stagein.py
+++ b/pilot/scripts/data_api_stagein.py
@@ -19,7 +19,7 @@
 # Authors:
 # - Paul Nilsson, paul.nilsson@cern.ch, 2018-23
 
-# This script shows how to use the Data API stage-in client to download a file from storage
+"""This script shows how to use the Data API stage-in client to download a file from storage."""
 
 from pilot.api import data
 from pilot.info import InfoService, FileSpec, infosys
diff --git a/pilot/scripts/open_remote_file.py b/pilot/scripts/open_remote_file.py
index 3ef563ee..d665c46d 100644
--- a/pilot/scripts/open_remote_file.py
+++ b/pilot/scripts/open_remote_file.py
@@ -18,17 +18,22 @@
 # Authors:
 # - Paul Nilsson, paul.nilsson@cern.ch, 2020-23
 
+"""Script for remote file open verification."""
+
 import argparse
 import functools
 import os
 import logging
 import queue
-import ROOT
 import signal
 import subprocess
+import sys
 import threading
 import traceback
 from collections import namedtuple
+from typing import Any
+
+import ROOT
 
 from pilot.util.config import config
 from pilot.util.filehandling import (
@@ -43,13 +48,12 @@
 logger = logging.getLogger(__name__)
 
 
-def get_args():
+def get_args() -> argparse.Namespace:
     """
     Return the args from the arg parser.
 
     :return: args (arg parser object).
     """
-
     arg_parser = argparse.ArgumentParser()
 
     arg_parser.add_argument('-d',
@@ -81,15 +85,12 @@ def get_args():
     return arg_parser.parse_args()
 
 
-def message(msg):
+def message(msg: str):
     """
     Print message to stdout or to log.
-    Note: not using lazy formatting.
 
-    :param msg: message (string).
-    :return:
+    :param msg: message (str).
     """
-
     if logger:
         logger.info(msg)
         # make sure that stdout buffer gets flushed - in case of time-out exceptions
@@ -98,114 +99,113 @@ def message(msg):
         print(msg, flush=True)
 
     # always write message to instant log file (message might otherwise get lost in case of time-outs)
-    with open(config.Pilot.remotefileverification_instant, 'a') as _file:
+    with open(config.Pilot.remotefileverification_instant, 'a', encoding='utf-8') as _file:
         _file.write(msg + '\n')
 
 
-def get_file_lists(turls):
+def get_file_lists(turls_string: str) -> dict:
     """
     Return a dictionary with the turls.
+
     Format: {'turls': <turl list>}
 
-    :param turls: comma separated turls (string)
-    :return: turls dictionary.
+    :param turls_string: comma separated turls (str)
+    :return: turls dictionary (dict).
     """
-
     _turls = []
 
     try:
-        _turls = turls.split(',')
-    except Exception as error:
-        message(f"exception caught: {error}")
+        _turls = turls_string.split(',')
+    except Exception as _error:
+        message(f"exception caught: {_error}")
 
     return {'turls': _turls}
 
 
-def try_open_file(turl, queues):
+# pylint: disable=useless-param-doc
+def try_open_file(turl_str: str, _queues: namedtuple):
     """
     Attempt to open a remote file.
+
     Successfully opened turls will be put in the queues.opened queue. Unsuccessful turls will be placed in
     the queues.unopened queue.
 
-    :param turl: turl (string).
-    :param queues: queues collection.
-    :return:
+    :param turl_str: turl (str)
+    :param _queues: Namedtuple containing queues for opened and unopened turls.
+                    Should have 'opened' and 'unopened' attributes to store respective turls.
     """
-
     turl_opened = False
     _timeout = 30 * 1000  # 30 s per file
     try:
         _ = ROOT.TFile.SetOpenTimeout(_timeout)
         # message(f"internal TFile.Open() time-out set to {_timeout} ms")
-        message(f'opening {turl}')
-        in_file = ROOT.TFile.Open(turl)
+        message(f'opening {turl_str}')
+        in_file = ROOT.TFile.Open(turl_str)
     except Exception as exc:
         message(f'caught exception: {exc}')
     else:
         if in_file and in_file.IsOpen():
             in_file.Close()
             turl_opened = True
-            message(f'closed {turl}')
-    queues.opened.put(turl) if turl_opened else queues.unopened.put(turl)
-    queues.result.put(turl)
+            message(f'closed {turl_str}')
+    if turl_opened:
+        _queues.opened.put(turl_str)
+    else:
+        _queues.unopened.put(turl_str)
+    _queues.result.put(turl_str)
 
 
-def spawn_file_open_thread(queues, file_list):
+# pylint: disable=useless-param-doc
+def spawn_file_open_thread(_queues: Any, file_list: list) -> threading.Thread:
     """
-    Spawn a thread for the try_open_file().
+    Spawn a thread for the try_open_file()..
 
-    :param queues: queue collection.
-    :param file_list: files to open (list).
-    :return: thread.
+    :param _queues: queue collection (Any)
+    :param file_list: files to open (list)
+    :return: thread (threading.Thread).
     """
-
-    thread = None
+    _thread = None
     try:
-        turl = file_list.pop(0)
+        _turl = file_list.pop(0)
     except IndexError:
         pass
     else:
         # create and start thread for the current turl
-        thread = threading.Thread(target=try_open_file, args=(turl, queues))
-        thread.daemon = True
-        thread.start()
+        _thread = threading.Thread(target=try_open_file, args=(_turl, _queues))
+        _thread.daemon = True
+        _thread.start()
 
-    return thread
+    return _thread
 
 
-def register_signals(signals, args):
+def register_signals(signals: list, _args: Any):
     """
     Register kill signals for intercept function.
 
-    :param signals: list of signals.
-    :param args: pilot args.
-    :return:
+    :param signals: list of signals (list)
+    :param _args: pilot arguments object (Any).
     """
-
     for sig in signals:
-        signal.signal(sig, functools.partial(interrupt, args))
+        signal.signal(sig, functools.partial(interrupt, _args))
 
 
-def interrupt(args, signum, frame):
+def interrupt(_args: Any, signum: Any, frame: Any):
     """
+    Receive and handle kill signals.
+
     Interrupt function on the receiving end of kill signals.
     This function is forwarded any incoming signals (SIGINT, SIGTERM, etc) and will set abort_job which instructs
     the threads to abort the job.
 
-    :param args: pilot arguments.
+    :param _args: pilot arguments object (Any).
     :param signum: signal.
     :param frame: stack/execution frame pointing to the frame that was interrupted by the signal.
-    :return:
     """
-
-    if args.signal:
+    if _args.signal:
         logger.warning('process already being killed')
         return
 
-    try:
-        sig = [v for v, k in signal.__dict__.iteritems() if k == signum][0]
-    except Exception:
-        sig = [v for v, k in list(signal.__dict__.items()) if k == signum][0]
+    sig = [v for v, k in list(signal.__dict__.items()) if k == signum][0]
     tmp = '\n'.join(traceback.format_stack(frame))
     logger.warning(f'caught signal: {sig} in FRAME=\n{tmp}')
     cmd = f'ps aux | grep {os.getpid()}'
@@ -213,15 +213,11 @@ def interrupt(args, signum, frame):
     logger.info(f'{cmd}:\n{out}')
     logger.warning(f'will terminate pid={os.getpid()}')
     logging.shutdown()
-    args.signal = sig
+    _args.signal = sig
     kill_processes(os.getpid())
 
 
-if __name__ == '__main__':
-    """
-    Main function of the remote file open script.
-    """
-
+if __name__ == '__main__':  # noqa: C901
     # get the args from the arg parser
     args = get_args()
     args.debug = True
@@ -232,11 +228,11 @@ def interrupt(args, signum, frame):
         logname = config.Pilot.remotefileverification_log
     except Exception as error:
         print(f"caught exception: {error} (skipping remote file open verification)")
-        exit(1)
+        sys.exit(1)
     else:
         if not logname:
             print("remote file open verification not desired")
-            exit(0)
+            sys.exit(0)
 
     establish_logging(debug=args.debug, nopilotlog=args.nopilotlog, filename=logname)
     logger = logging.getLogger(__name__)
@@ -279,7 +275,8 @@ def interrupt(args, signum, frame):
                 threads.append(thread)
 
         # wait until all threads have finished
-        [_thread.join() for _thread in threads]
+        for thread in threads:
+            thread.join()
 
         opened_turls = list(queues.opened.queue)
         opened_turls.sort()
@@ -297,4 +294,4 @@ def interrupt(args, signum, frame):
         message('no TURLs to verify')
 
     message('file remote open script has finished')
-    exit(0)
+    sys.exit(0)
diff --git a/pilot/scripts/rucio_api_download.py b/pilot/scripts/rucio_api_download.py
index 2c3f64fc..7473b73e 100644
--- a/pilot/scripts/rucio_api_download.py
+++ b/pilot/scripts/rucio_api_download.py
@@ -19,12 +19,13 @@
 # Authors:
 # - Paul Nilsson, paul.nilsson@cern.ch, 2018-23
 
-# This script demonstrates how to download a file using the Rucio download client.
+"""This script demonstrates how to download a file using the Rucio download client."""
+
 # Note: Rucio needs to be setup with 'lsetup rucio'.
 
 try:
     from rucio.client.downloadclient import DownloadClient
-except Exception:
+except ImportError:
     print("Rucio client has not been setup, please run \'lsetup rucio\' first")
 else:
     f_ific = {'did_scope': 'mc16_13TeV', 'did': 'mc16_13TeV:EVNT.16337107._000147.pool.root.1',
diff --git a/pilot/scripts/stagein.py b/pilot/scripts/stagein.py
index 684fc2e3..66e9c50f 100644
--- a/pilot/scripts/stagein.py
+++ b/pilot/scripts/stagein.py
@@ -19,7 +19,10 @@
 # Authors:
 # - Paul Nilsson, paul.nilsson@cern.ch, 2020-23
 
+"""This script is executed by the pilot in a container to perform stage-in of input files."""
+
 import argparse
+import logging
 import os
 import re
 
@@ -39,8 +42,6 @@
 from pilot.util.loggingsupport import establish_logging
 from pilot.util.tracereport import TraceReport
 
-import logging
-
 # error codes
 GENERAL_ERROR = 1
 NO_QUEUENAME = 2
@@ -56,13 +57,12 @@
 TRANSFER_ERROR = 12
 
 
-def get_args():
+def get_args() -> argparse.Namespace:
     """
     Return the args from the arg parser.
 
     :return: args (arg parser object).
     """
-
     arg_parser = argparse.ArgumentParser()
 
     arg_parser.add_argument('-d',
@@ -198,9 +198,14 @@ def get_args():
     return arg_parser.parse_args()
 
 
-def str2bool(_str):
-    """ Helper function to convert string to bool """
+def str2bool(_str: str) -> bool:
+    """
+    Convert string to bool.
 
+    :param _str: string to be converted (str)
+    :return: boolean (bool)
+    :raise: argparse.ArgumentTypeError.
+    """
     if isinstance(_str, bool):
         return _str
     if _str.lower() in ('yes', 'true', 't', 'y', '1'):
@@ -211,14 +216,14 @@ def str2bool(_str):
         raise argparse.ArgumentTypeError('Boolean value expected.')
 
 
-def verify_args():
+def verify_args() -> int:
     """
     Make sure required arguments are set, and if they are not then set them.
+
     (deprecated)
 
     :return: internal error code (int).
     """
-
     ret = 0
     if not args.workdir:
         args.workdir = os.getcwd()
@@ -266,11 +271,22 @@ def verify_args():
     return ret
 
 
-def message(msg):
+def message(msg: str):
+    """
+    Print message to stdout or to log.
+
+    :param msg: message (str).
+    """
     print(msg) if not logger else logger.info(msg)
 
 
-def str_to_int_list(_list):
+def str_to_int_list(_list: list) -> list:
+    """
+    Convert list of strings to list of integers.
+
+    :param _list: list of strings (list)
+    :return: list of integers (list).
+    """
     _new_list = []
     for val in _list:
         try:
@@ -281,13 +297,42 @@ def str_to_int_list(_list):
     return _new_list
 
 
-def str_to_bool_list(_list):
+def str_to_bool_list(_list: list) -> list:
+    """
+    Convert list of strings to list of booleans.
+
+    :param _list: list of strings (list)
+    :return: list of booleans (list).
+    """
     changes = {"True": True, "False": False, "None": None, "NULL": None}
     return [changes.get(x, x) for x in _list]
 
 
-def get_file_lists(lfns, scopes, filesizes, checksums, allowlans, allowwans, directaccesslans, directaccesswans, istars,
-                   accessmodes, storagetokens, guids):
+def get_file_lists(lfns: str, scopes: str, filesizes: str, checksums: str, allowlans: str, allowwans: str,
+                   directaccesslans: str, directaccesswans: str, istars: str, accessmodes: str,
+                   storagetokens: str, guids: str) -> dict:
+    """
+    Return a dictionary with the file lists.
+
+    Format: {'lfns': <lfn list>, 'scopes': <scope list>, 'filesizes': <filesize list>, 'checksums': <checksum list>,
+                'allowlans': <allowlan list>, 'allowwans': <allowwan list>, 'directaccesslans': <directaccesslan list>,
+                'directaccesswans': <directaccesswan list>, 'istars': <istar list>, 'accessmodes': <accessmode list>,
+                'storagetokens': <storagetoken list>, 'guids': <guid list>}
+
+    :param lfns: comma separated lfns (str)
+    :param scopes: comma separated scopes (str)
+    :param filesizes: comma separated filesizes (str)
+    :param checksums: comma separated checksums (str)
+    :param allowlans: comma separated allowlans (str)
+    :param allowwans: comma separated allowwans (str)
+    :param directaccesslans: comma separated directaccesslans (str)
+    :param directaccesswans: comma separated directaccesswans (str)
+    :param istars: comma separated istars (str)
+    :param accessmodes: comma separated accessmodes (str)
+    :param storagetokens: comma separated storagetokens (str)
+    :param guids: comma separated guids (str)
+    :return: file lists dictionary (dict).
+    """
     _lfns = []
     _scopes = []
     _filesizes = []
@@ -324,36 +369,42 @@ def get_file_lists(lfns, scopes, filesizes, checksums, allowlans, allowwans, dir
 
 
 class Job:
-    """
-    A minimal implementation of the Pilot Job class with data members necessary for the trace report only.
-    """
+    """A minimal implementation of the Pilot Job class with data members necessary for the trace report only."""
 
     produserid = ""
     jobid = ""
     taskid = ""
     jobdefinitionid = ""
 
-    def __init__(self, produserid="", jobid="", taskid="", jobdefinitionid=""):
+    def __init__(self, produserid: str = "", jobid: str = "", taskid: str = "", jobdefinitionid: str = ""):
+        """
+        Initialize the Job class.
+
+        :param produserid: produserid (str)
+        :param jobid: jobid (str)
+        :param taskid: taskid (str)
+        :param jobdefinitionid: jobdefinitionid (str).
+        """
         self.produserid = produserid.replace('%20', ' ')
         self.jobid = jobid
         self.taskid = taskid
         self.jobdefinitionid = jobdefinitionid
 
 
-def add_to_dictionary(dictionary, key, value1, value2, value3, value4):
+def add_to_dictionary(dictionary: dict, key: str, value1: str, value2: str, value3: str, value4: str) -> dict:
     """
     Add key: [value1, value2, ..] to dictionary.
+
     In practice; lfn: [status, status_code, turl, DDM endpoint].
 
-    :param dictionary: dictionary to be updated.
-    :param key: lfn key to be added (string).
-    :param value1: status to be added to list belonging to key (string).
-    :param value2: status_code to be added to list belonging to key (string).
-    :param value3: turl (string).
-    :param value4: DDM endpoint (string).
-    :return: updated dictionary.
+    :param dictionary: dictionary to be updated (dict)
+    :param key: lfn key to be added (str)
+    :param value1: status to be added to list belonging to key (str)
+    :param value2: status_code to be added to list belonging to key (str)
+    :param value3: turl (str)
+    :param value4: DDM endpoint (str)
+    :return: updated dictionary (dict).
     """
-
     dictionary[key] = [value1, value2, value3, value4]
     return dictionary
 
diff --git a/pilot/scripts/stageout.py b/pilot/scripts/stageout.py
index ec5a3907..6926105c 100644
--- a/pilot/scripts/stageout.py
+++ b/pilot/scripts/stageout.py
@@ -19,7 +19,10 @@
 # Authors:
 # - Paul Nilsson, paul.nilsson@cern.ch, 2020-23
 
+"""This script is executed by the pilot in a container to perform stage-out of output files."""
+
 import argparse
+import logging
 import os
 import re
 
@@ -36,8 +39,6 @@
 from pilot.util.loggingsupport import establish_logging
 from pilot.util.tracereport import TraceReport
 
-import logging
-
 errors = ErrorCodes()
 
 # error codes