fossid-ab · alexandruz · Jul 10, 2024 · Jul 9, 2024
diff --git a/.pylintrc b/.pylintrc
@@ -524,7 +524,7 @@ preferred-modules=
 
 # Exceptions that will emit a warning when being caught. Defaults to
 # "Exception"
-overgeneral-exceptions=Exception
+overgeneral-exceptions=builtins.Exception
 
 
 [TYPING]

diff --git a/README.md b/README.md
@@ -120,7 +120,7 @@ usage: workbench-agent.py [-h] --api_url API_URL --api_user API_USER
                           [--delta_only] [--reuse_identifications]
                           [--identification_reuse_type {any,only_me,specific_project,specific_scan}]
                           [--specific_code SPECIFIC_CODE]
-                          [--enable_chunk_upload]
+                          [--chunked_upload]
                           [--scan_number_of_tries SCAN_NUMBER_OF_TRIES]
                           [--scan_wait_time SCAN_WAIT_TIME] --path PATH
                           [--log LOG] [--path-result PATH_RESULT]
@@ -181,9 +181,8 @@ optional arguments:
   --target_path TARGET_PATH
                         The path on the Workbench server where the code to be scanned is stored.
                         No upload is done in this scenario.
-  --enable_chunk_upload
-                        For files bigger than 8 MB (which is default post_max_size in php.ini) uploading will be done using
-                        the header Transfer-encoding: chunked with chunks of 5120 bytes. By default, enabled.
+  --chunked_upload      For files bigger than 8 MB (which is default post_max_size in php.ini) uploading will be done using
+                        the header Transfer-encoding: chunked with chunks of 5MB.
   --log LOG             specify logging level. Allowed values: DEBUG, INFO, WARNING, ERROR
   --path-result PATH_RESULT
                         Save results to specified path

diff --git a/workbench-agent.py b/workbench-agent.py
@@ -9,6 +9,7 @@
 import argparse
 import random
 import base64
+import io
 import os
 import subprocess
 from argparse import RawTextHelpFormatter
@@ -78,22 +79,80 @@ def _send_request(self, payload: dict) -> dict:
             print("Problematic JSON:")
             print(response.text)
 
-    def _read_in_chunks(self, file_object, chunk_size=5120):
-        """Generator to read a file piece by piece."""
+    def _read_in_chunks(self,file_object: io.BufferedReader, chunk_size=5242880):
+        """
+        Generator to read a file piece by piece.
+
+        Args:
+            file_object (io.BufferedReader) : The payload of the request.
+            chunk_size (int): Size of the chunk. Default chunk size is 5MB
+        """
         while True:
             data = file_object.read(chunk_size)
             if not data:
                 break
             yield data
 
-    def upload_files(self, scan_code: str, path: str, enable_chunk_upload: bool = True):
+    def _chunked_upload_request(self, scan_code: str, headers: dict, chunk: bytes):
+        """
+        This function will make sure Content-Length header is not sent by Requests library
+        Args:
+            scan_code (str): The scan code where the file or files will be uploaded.
+            headers (dict) : Headers for HTTP request
+            chunk (bytes): Chunk read from large file
+        """
+        try:
+            req = requests.Request(
+                'POST',
+                self.api_url,
+                headers=headers,
+                data=chunk,
+                auth=(self.api_user, self.api_token),
+            )
+            s = requests.Session()
+            prepped = s.prepare_request(req)
+            # Remove the unwanted header  'Content-Length' !!!
+            if 'Content-Length' in prepped.headers:
+                del prepped.headers['Content-Length']
+
+            # Send HTTP request and retrieve response
+            response = s.send(prepped)
+            # print(f"Sent headers: {response.request.headers}")
+            # print(f"response headers: {response.headers}")
+            # Retrieve the HTTP status code
+            status_code = response.status_code
+            print(f"HTTP Status Code: {status_code}")
+
+            # Check if the request was successful (status code 200)
+            if status_code == 200:
+                # Parse the JSON response
+                try:
+                    response.json()
+                except:
+                    print(f"Failed to decode json {response.text}")
+                    print(traceback.print_exc())
+                    sys.exit(1)
+            else:
+                print(f"Request failed with status code {status_code}")
+                reason = response.reason
+                print(f"Reason: {reason}")
+                response_text = response.text
+                print(f"Response Text: {response_text}")
+                sys.exit(1)
+        except IOError:
+            # Error opening file
+            print(f"Failed to upload files to the scan {scan_code}.")
+            print(traceback.print_exc())
+            sys.exit(1)
+
+    def upload_files(self, scan_code: str, path: str, chunked_upload: bool = False):
         """
         Uploads files to the Workbench using the API's File Upload endpoint.
 
         Args:
             scan_code (str): The scan code where the file or files will be uploaded.
             path (str): Path to the file or files to upload.
-            enable_chunk_upload (bool): Enable/disable chunk upload. By default, enabled.
+            chunked_upload (bool): Enable/disable chunk upload.
         """
         file_size = os.path.getsize(path)
         size_limit = 8 * 1024 * 1024  # 8MB in bytes. Based on the default value of post_max_size in php.ini
@@ -102,42 +161,36 @@ def upload_files(self, scan_code: str, path: str, enable_chunk_upload: bool = Tr
         filename_base64 = base64.b64encode(filename.encode()).decode("utf-8")
         scan_code_base64 = base64.b64encode(scan_code.encode()).decode("utf-8")
 
-        if enable_chunk_upload and (file_size > size_limit):
+        if chunked_upload and (file_size > size_limit):
             print(f"Uploading {filename} using 'Transfer-encoding: chunks' due to file size {file_size}.")
             # Use chunked upload for files bigger than size_limit
             # First delete possible existing files because chunk uploading works by appending existing file on disk.
             self.remove_uploaded_content(filename, scan_code)
+            print("Uploading using Transfer-encoding: chunked...")
             headers = {
                 "FOSSID-SCAN-CODE": scan_code_base64,
                 "FOSSID-FILE-NAME": filename_base64,
-                'Transfer-Encoding': 'chunked'
+                'Transfer-Encoding': 'chunked',
+                'Content-Type': 'application/octet-stream'
             }
             try:
                 with open(path, "rb") as file:
-                    resp = requests.post(
-                        self.api_url,
-                        headers=headers,
-                        data=self._read_in_chunks(file, 5120),
-                        auth=(self.api_user, self.api_token),
-                        timeout=1800,
-                    )
-                    try:
-                        resp.json()
-                    except:
-                        print(f"Failed to decode json {resp.text}")
-                        print(traceback.print_exc())
-                        sys.exit(1)
+                    for chunk in self._read_in_chunks(file, 5242880):
+                        # Upload each chunk
+                        self._chunked_upload_request(scan_code, headers, chunk)
             except IOError:
                 # Error opening file
                 print(f"Failed to upload files to the scan {scan_code}.")
                 print(traceback.print_exc())
                 sys.exit(1)
+            print("Finished uploading.")
         else:
             # Regular upload, no chunk upload
             headers = {
                 "FOSSID-SCAN-CODE": scan_code_base64,
                 "FOSSID-FILE-NAME": filename_base64
             }
+            print("Uploading...")
             try:
                 with open(path, "rb") as file:
                     resp = requests.post(
@@ -147,17 +200,32 @@ def upload_files(self, scan_code: str, path: str, enable_chunk_upload: bool = Tr
                         auth=(self.api_user, self.api_token),
                         timeout=1800,
                     )
-                    try:
-                        resp.json()
-                    except:
-                        print(f"Failed to decode json {resp.text}")
-                        print(traceback.print_exc())
+                    # Retrieve the HTTP status code
+                    status_code = resp.status_code
+                    print(f"HTTP Status Code: {status_code}")
+
+                    # Check if the request was successful (status code 200)
+                    if status_code == 200:
+                        # Parse the JSON response
+                        try:
+                            resp.json()
+                        except:
+                            print(f"Failed to decode json {resp.text}")
+                            print(traceback.print_exc())
+                            sys.exit(1)
+                    else:
+                        print(f"Request failed with status code {status_code}")
+                        reason = resp.reason
+                        print(f"Reason: {reason}")
+                        response_text = resp.text
+                        print(f"Response Text: {response_text}")
                         sys.exit(1)
             except IOError:
                 # Error opening file
                 print(f"Failed to upload files to the scan {scan_code}.")
                 print(traceback.print_exc())
                 sys.exit(1)
+            print("Finished uploading.")
 
     def _delete_existing_scan(self, scan_code: str):
         """
@@ -1092,11 +1160,11 @@ def non_empty_string(s):
         required=False,
     )
     optional.add_argument(
-        "--enable_chunk_upload",
+        "--chunked_upload",
         help="For files bigger than 8 MB (which is default post_max_size in php.ini) uploading will be done using\n"
-             "the header Transfer-encoding: chunked with chunks of 5120 bytes. By default, enabled.",
+             "the header Transfer-encoding: chunked with chunks of 5MB.",
         action="store_true",
-        default=True,
+        default=False,
         required=False,
     )
     required.add_argument(
@@ -1303,7 +1371,7 @@ def main():
             print(
                 "Uploading file indicated in --path parameter: {}".format(params.path)
             )
-            workbench.upload_files(params.scan_code, params.path)
+            workbench.upload_files(params.scan_code, params.path, params.chunked_upload)
         else:
             # Get all files found at given path (including in subdirectories). Exclude directories
             print(
@@ -1317,7 +1385,7 @@ def main():
                     if not os.path.isdir(os.path.join(root, filename)):
                         counter_files = counter_files + 1
                         workbench.upload_files(
-                            params.scan_code, os.path.join(root, filename), params.enable_chunk_upload
+                            params.scan_code, os.path.join(root, filename), params.chunked_upload
                         )
             print("A total of {} files uploaded".format(counter_files))
         print("Calling API scans->extracting_archives")