From befa5405e6085f26b331b696b23b3d8c90805a1a Mon Sep 17 00:00:00 2001
From: Paul Nilsson <palnilsson70@gmail.com>
Date: Tue, 21 Nov 2023 11:42:44 +0100
Subject: [PATCH] Pydocstyle + pylint updates

---
 PILOTVERSION             |  2 +-
 pilot/info/dataloader.py | 98 ++++++++++++++++++++++++++--------------
 pilot/util/constants.py  |  2 +-
 3 files changed, 67 insertions(+), 35 deletions(-)

diff --git a/PILOTVERSION b/PILOTVERSION
index 126b0124..977858c9 100644
--- a/PILOTVERSION
+++ b/PILOTVERSION
@@ -1 +1 @@
-3.7.1.15
\ No newline at end of file
+3.7.1.18
\ No newline at end of file
diff --git a/pilot/info/dataloader.py b/pilot/info/dataloader.py
index fc8c11c1..9bea57e0 100644
--- a/pilot/info/dataloader.py
+++ b/pilot/info/dataloader.py
@@ -74,7 +74,7 @@ def get_file_last_update_time(cls, fname: str) -> datetime or None:
         """
         try:
             lastupdate = datetime.fromtimestamp(os.stat(fname).st_mtime)
-        except Exception:
+        except OSError:
             lastupdate = None
 
         return lastupdate
@@ -105,12 +105,36 @@ def _readfile(url: str) -> str:
             :return: file content (str).
             """
             if os.path.isfile(url):
-                with open(url, "r") as f:
-                    content = f.read()
+                try:
+                    with open(url, "r", encoding='utf-8') as f:
+                        content = f.read()
+                except (OSError, UnicodeDecodeError) as exc:
+                    logger.warning(f"failed to read file {url}: {exc}")
+                    content = ""
+
                 return content
 
             return ""
 
+        def _readurl(url: str, _timeout: int = 20) -> str:
+            """
+            Read url content.
+
+            :param url: url (str)
+            :return: url content (str).
+            """
+            req = urllib.request.Request(url)
+            req.add_header('User-Agent', ctx.user_agent)
+            try:
+                with urllib.request.urlopen(req, context=ctx.ssl_context, timeout=_timeout) as response:
+                    content = response.read()
+            except urllib.error.URLError as exc:
+                logger.warning(f"error occurred with urlopen: {exc.reason}")
+                # Handle the error, set content to None or handle as needed
+                content = ""
+
+            return content
+
         content = None
         if url and cls.is_file_expired(fname, cache_time):  # load data into temporary cache file
             for trial in range(1, nretry + 1):
@@ -125,10 +149,10 @@ def _readfile(url: str) -> str:
                         logger.info(f'[attempt={trial}/{nretry}] loading data from url {url}')
                         req = urllib.request.Request(url)
                         req.add_header('User-Agent', ctx.user_agent)
-                        content = urllib.request.urlopen(req, context=ctx.ssl_context, timeout=20).read()
+                        content = _readurl(url)
 
                     if fname:  # save to cache
-                        with open(fname, "w+") as _file:
+                        with open(fname, "w+", encoding='utf-8') as _file:
                             if isinstance(content, bytes):  # if-statement will always be needed for python 3
                                 content = content.decode("utf-8")
 
@@ -154,30 +178,31 @@ def _readfile(url: str) -> str:
 
         # read data from old cache fname
         try:
-            with open(fname, 'r') as f:
+            with open(fname, 'r', encoding='utf-8') as f:
                 content = f.read()
-        except Exception as exc:
+        except (OSError, UnicodeDecodeError) as exc:
             logger.warning(f"cache file={fname} is not available: {exc} .. skipped")
             return None
 
         return content
 
     @classmethod
-    def load_data(cls, sources, priority, cache_time=60, parser=None):
+    def load_data(cls, sources: dict, priority: list, cache_time: int = 60, parser: Any = None) -> Any:
         """
         Download data from various sources (prioritized).
+
         Try to get data from sources according to priority values passed
 
         Expected format of source entry:
-        sources = {'NAME':{'url':"source url", 'nretry':int, 'fname':'cache file (optional)', 'cache_time':int (optional), 'sleep_time':opt}}
-
-        :param sources: Dict of source configuration
-        :param priority: Ordered list of source names
-        :param cache_time: Default cache time in seconds. Can be overwritten by cache_time value passed in sources dict
-        :param parser: Callback function to interpret/validate data which takes read data from source as input. Default is json.loads
-        :return: Data loaded and processed by parser callback
+        sources = {'NAME':{'url':"source url", 'nretry':int, 'fname':'cache file (optional)',
+                   'cache_time':int (optional), 'sleep_time':opt}}
+
+        :param sources: dict of source configuration (dict)
+        :param priority: ordered list of source names (list)
+        :param cache_time: default cache time in seconds. Can be overwritten by cache_time value passed in sources (dict)
+        :param parser: callback function to interpret/validate data which takes read data from source as input. Default is json.loads (Any)
+        :return: data loaded and processed by parser callback (Any)
         """
-
         if not priority:  # no priority set ## randomly order if need (FIX ME LATER)
             priority = list(sources.keys())
 
@@ -217,37 +242,44 @@ def jsonparser(c):
         return None
 
 
-def merge_dict_data(d1, d2, keys=[], common=True, left=True, right=True, rec=False):
+def merge_dict_data(dic1: dict, dic2: dict, keys: list = [], common: bool = True, left: bool = True,
+                    right: bool = True, rec: bool = False) -> dict:
     """
-        Recursively merge two dict objects
-        Merge content of d2 dict into copy of d1
-        :param common: if True then do merge keys exist in both dicts
-        :param left: if True then preseve keys exist only in d1
-        :param right: if True then preserve keys exist only in d2
+    Recursively merge two dictionary objects.
+
+    Merge content of dic2 dict into copy of dic1.
+
+    :param dic1: dictionary to merge into (dict)
+    :param dic2: dictionary to merge from (dict)
+    :param keys: list of keys to merge (list)
+    :param common: if True then merge keys exist in both dictionaries (bool)
+    :param left: if True then preserve keys exist only in dic1 (bool)
+    :param right: if True then preserve keys exist only in dic2 (bool)
+    :param rec: if True then merge recursively (bool)
+    :return: merged dictionary (dict).
     """
-
     ### TODO: verify and configure logic later
 
-    if not (isinstance(d1, dict) and isinstance(d2, dict)):
-        return d2
+    if not (isinstance(dic1, dict) and isinstance(dic2, dict)):
+        return dic2
 
-    ret = d1.copy()
+    ret = dic1.copy()
 
     if keys and rec:
-        for k in set(keys) & set(d2):
-            ret[k] = d2[k]
+        for k in set(keys) & set(dic2):
+            ret[k] = dic2[k]
         return ret
 
     if common:  # common
-        for k in set(d1) & set(d2):
-            ret[k] = merge_dict_data(d1[k], d2[k], keys, rec=True)
+        for k in set(dic1) & set(dic2):
+            ret[k] = merge_dict_data(dic1[k], dic2[k], keys, rec=True)
 
     if not left:  # left
-        for k in set(d1) - set(d2):
+        for k in set(dic1) - set(dic2):
             ret.pop(k)
 
     if right:  # right
-        for k in set(d2) - set(d1):
-            ret[k] = d2[k]
+        for k in set(dic2) - set(dic1):
+            ret[k] = dic2[k]
 
     return ret
diff --git a/pilot/util/constants.py b/pilot/util/constants.py
index e0f3d4b4..41676fa4 100644
--- a/pilot/util/constants.py
+++ b/pilot/util/constants.py
@@ -28,7 +28,7 @@
 RELEASE = '3'   # released number should be fixed at 3 for Pilot 3
 VERSION = '7'   # version number is '1' for first release, '0' until then, increased for bigger updates
 REVISION = '1'  # revision number should be reset to '0' for every new version release, increased for small updates
-BUILD = '16'     # build number should be reset to '1' for every new development cycle
+BUILD = '18'     # build number should be reset to '1' for every new development cycle
 
 SUCCESS = 0
 FAILURE = 1