From befa5405e6085f26b331b696b23b3d8c90805a1a Mon Sep 17 00:00:00 2001 From: Paul Nilsson Date: Tue, 21 Nov 2023 11:42:44 +0100 Subject: [PATCH] Pydocstyle + pylint updates --- PILOTVERSION | 2 +- pilot/info/dataloader.py | 98 ++++++++++++++++++++++++++-------------- pilot/util/constants.py | 2 +- 3 files changed, 67 insertions(+), 35 deletions(-) diff --git a/PILOTVERSION b/PILOTVERSION index 126b0124..977858c9 100644 --- a/PILOTVERSION +++ b/PILOTVERSION @@ -1 +1 @@ -3.7.1.15 \ No newline at end of file +3.7.1.18 \ No newline at end of file diff --git a/pilot/info/dataloader.py b/pilot/info/dataloader.py index fc8c11c1..9bea57e0 100644 --- a/pilot/info/dataloader.py +++ b/pilot/info/dataloader.py @@ -74,7 +74,7 @@ def get_file_last_update_time(cls, fname: str) -> datetime or None: """ try: lastupdate = datetime.fromtimestamp(os.stat(fname).st_mtime) - except Exception: + except OSError: lastupdate = None return lastupdate @@ -105,12 +105,36 @@ def _readfile(url: str) -> str: :return: file content (str). """ if os.path.isfile(url): - with open(url, "r") as f: - content = f.read() + try: + with open(url, "r", encoding='utf-8') as f: + content = f.read() + except (OSError, UnicodeDecodeError) as exc: + logger.warning(f"failed to read file {url}: {exc}") + content = "" + return content return "" + def _readurl(url: str, _timeout: int = 20) -> str: + """ + Read url content. + + :param url: url (str) + :return: url content (str). + """ + req = urllib.request.Request(url) + req.add_header('User-Agent', ctx.user_agent) + try: + with urllib.request.urlopen(req, context=ctx.ssl_context, timeout=_timeout) as response: + content = response.read() + except urllib.error.URLError as exc: + logger.warning(f"error occurred with urlopen: {exc.reason}") + # Handle the error, set content to None or handle as needed + content = "" + + return content + content = None if url and cls.is_file_expired(fname, cache_time): # load data into temporary cache file for trial in range(1, nretry + 1): @@ -125,10 +149,10 @@ def _readfile(url: str) -> str: logger.info(f'[attempt={trial}/{nretry}] loading data from url {url}') req = urllib.request.Request(url) req.add_header('User-Agent', ctx.user_agent) - content = urllib.request.urlopen(req, context=ctx.ssl_context, timeout=20).read() + content = _readurl(url) if fname: # save to cache - with open(fname, "w+") as _file: + with open(fname, "w+", encoding='utf-8') as _file: if isinstance(content, bytes): # if-statement will always be needed for python 3 content = content.decode("utf-8") @@ -154,30 +178,31 @@ def _readfile(url: str) -> str: # read data from old cache fname try: - with open(fname, 'r') as f: + with open(fname, 'r', encoding='utf-8') as f: content = f.read() - except Exception as exc: + except (OSError, UnicodeDecodeError) as exc: logger.warning(f"cache file={fname} is not available: {exc} .. skipped") return None return content @classmethod - def load_data(cls, sources, priority, cache_time=60, parser=None): + def load_data(cls, sources: dict, priority: list, cache_time: int = 60, parser: Any = None) -> Any: """ Download data from various sources (prioritized). + Try to get data from sources according to priority values passed Expected format of source entry: - sources = {'NAME':{'url':"source url", 'nretry':int, 'fname':'cache file (optional)', 'cache_time':int (optional), 'sleep_time':opt}} - - :param sources: Dict of source configuration - :param priority: Ordered list of source names - :param cache_time: Default cache time in seconds. Can be overwritten by cache_time value passed in sources dict - :param parser: Callback function to interpret/validate data which takes read data from source as input. Default is json.loads - :return: Data loaded and processed by parser callback + sources = {'NAME':{'url':"source url", 'nretry':int, 'fname':'cache file (optional)', + 'cache_time':int (optional), 'sleep_time':opt}} + + :param sources: dict of source configuration (dict) + :param priority: ordered list of source names (list) + :param cache_time: default cache time in seconds. Can be overwritten by cache_time value passed in sources (dict) + :param parser: callback function to interpret/validate data which takes read data from source as input. Default is json.loads (Any) + :return: data loaded and processed by parser callback (Any) """ - if not priority: # no priority set ## randomly order if need (FIX ME LATER) priority = list(sources.keys()) @@ -217,37 +242,44 @@ def jsonparser(c): return None -def merge_dict_data(d1, d2, keys=[], common=True, left=True, right=True, rec=False): +def merge_dict_data(dic1: dict, dic2: dict, keys: list = [], common: bool = True, left: bool = True, + right: bool = True, rec: bool = False) -> dict: """ - Recursively merge two dict objects - Merge content of d2 dict into copy of d1 - :param common: if True then do merge keys exist in both dicts - :param left: if True then preseve keys exist only in d1 - :param right: if True then preserve keys exist only in d2 + Recursively merge two dictionary objects. + + Merge content of dic2 dict into copy of dic1. + + :param dic1: dictionary to merge into (dict) + :param dic2: dictionary to merge from (dict) + :param keys: list of keys to merge (list) + :param common: if True then merge keys exist in both dictionaries (bool) + :param left: if True then preserve keys exist only in dic1 (bool) + :param right: if True then preserve keys exist only in dic2 (bool) + :param rec: if True then merge recursively (bool) + :return: merged dictionary (dict). """ - ### TODO: verify and configure logic later - if not (isinstance(d1, dict) and isinstance(d2, dict)): - return d2 + if not (isinstance(dic1, dict) and isinstance(dic2, dict)): + return dic2 - ret = d1.copy() + ret = dic1.copy() if keys and rec: - for k in set(keys) & set(d2): - ret[k] = d2[k] + for k in set(keys) & set(dic2): + ret[k] = dic2[k] return ret if common: # common - for k in set(d1) & set(d2): - ret[k] = merge_dict_data(d1[k], d2[k], keys, rec=True) + for k in set(dic1) & set(dic2): + ret[k] = merge_dict_data(dic1[k], dic2[k], keys, rec=True) if not left: # left - for k in set(d1) - set(d2): + for k in set(dic1) - set(dic2): ret.pop(k) if right: # right - for k in set(d2) - set(d1): - ret[k] = d2[k] + for k in set(dic2) - set(dic1): + ret[k] = dic2[k] return ret diff --git a/pilot/util/constants.py b/pilot/util/constants.py index e0f3d4b4..41676fa4 100644 --- a/pilot/util/constants.py +++ b/pilot/util/constants.py @@ -28,7 +28,7 @@ RELEASE = '3' # released number should be fixed at 3 for Pilot 3 VERSION = '7' # version number is '1' for first release, '0' until then, increased for bigger updates REVISION = '1' # revision number should be reset to '0' for every new version release, increased for small updates -BUILD = '16' # build number should be reset to '1' for every new development cycle +BUILD = '18' # build number should be reset to '1' for every new development cycle SUCCESS = 0 FAILURE = 1