diff --git a/README.md b/README.md index 200ded2..8acc38c 100755 --- a/README.md +++ b/README.md @@ -120,6 +120,7 @@ usage: workbench-agent.py [-h] --api_url API_URL --api_user API_USER [--delta_only] [--reuse_identifications] [--identification_reuse_type {any,only_me,specific_project,specific_scan}] [--specific_code SPECIFIC_CODE] + [--enable_chunk_upload] [--scan_number_of_tries SCAN_NUMBER_OF_TRIES] [--scan_wait_time SCAN_WAIT_TIME] --path PATH [--log LOG] [--path-result PATH_RESULT] @@ -177,6 +178,12 @@ optional arguments: --specific_code SPECIFIC_CODE The scan code used when creating the scan in Workbench. It can be based on some env var, for example: ${BUILD_NUMBER} + --target_path TARGET_PATH + The path on the Workbench server where the code to be scanned is stored. + No upload is done in this scenario. + --enable_chunk_upload + For files bigger than 8 MB (which is default post_max_size in php.ini) uploading will be done using + the header Transfer-encoding: chunked with chunks of 5120 bytes. By default, enabled. --log LOG specify logging level. Allowed values: DEBUG, INFO, WARNING, ERROR --path-result PATH_RESULT Save results to specified path diff --git a/workbench-agent.py b/workbench-agent.py index 7cf3090..35ca6ca 100755 --- a/workbench-agent.py +++ b/workbench-agent.py @@ -78,37 +78,86 @@ def _send_request(self, payload: dict) -> dict: print("Problematic JSON:") print(response.text) - def upload_files(self, scan_code: str, path: str): + def _read_in_chunks(self, file_object, chunk_size=5120): + """Generator to read a file piece by piece.""" + while True: + data = file_object.read(chunk_size) + if not data: + break + yield data + + def upload_files(self, scan_code: str, path: str, enable_chunk_upload: bool = True): """ Uploads files to the Workbench using the API's File Upload endpoint. Args: scan_code (str): The scan code where the file or files will be uploaded. path (str): Path to the file or files to upload. + enable_chunk_upload (bool): Enable/disable chunk upload. By default, enabled. """ - name = base64.b64encode(os.path.basename(path).encode()).decode("utf-8") - scan_code = base64.b64encode(scan_code.encode()).decode("utf-8") - headers = {"FOSSID-SCAN-CODE": scan_code, "FOSSID-FILE-NAME": name} - try: - with open(path, "rb") as file: - resp = requests.post( - self.api_url, - headers=headers, - data=file, - auth=(self.api_user, self.api_token), - timeout=1800, - ) - try: - resp.json() - except: - print(f"Failed to decode json {resp.text}") - print(traceback.print_exc()) - sys.exit(1) - except IOError: - # Error opening file - print(f"Failed to upload files to the scan {scan_code}.") - print(traceback.print_exc()) - sys.exit(1) + file_size = os.path.getsize(path) + size_limit = 8 * 1024 * 1024 # 8MB in bytes. Based on the default value of post_max_size in php.ini + # Prepare parameters + filename = os.path.basename(path) + filename_base64 = base64.b64encode(filename.encode()).decode("utf-8") + scan_code_base64 = base64.b64encode(scan_code.encode()).decode("utf-8") + + if enable_chunk_upload and (file_size > size_limit): + print(f"Uploading {filename} using 'Transfer-encoding: chunks' due to file size {file_size}.") + # Use chunked upload for files bigger than size_limit + # First delete possible existing files because chunk uploading works by appending existing file on disk. + self.remove_uploaded_content(filename, scan_code) + headers = { + "FOSSID-SCAN-CODE": scan_code_base64, + "FOSSID-FILE-NAME": filename_base64, + 'Transfer-Encoding': 'chunked' + } + try: + with open(path, "rb") as file: + resp = requests.post( + self.api_url, + headers=headers, + data=self._read_in_chunks(file, 5120), + auth=(self.api_user, self.api_token), + timeout=1800, + ) + try: + resp.json() + except: + print(f"Failed to decode json {resp.text}") + print(traceback.print_exc()) + sys.exit(1) + except IOError: + # Error opening file + print(f"Failed to upload files to the scan {scan_code}.") + print(traceback.print_exc()) + sys.exit(1) + else: + # Regular upload, no chunk upload + headers = { + "FOSSID-SCAN-CODE": scan_code_base64, + "FOSSID-FILE-NAME": filename_base64 + } + try: + with open(path, "rb") as file: + resp = requests.post( + self.api_url, + headers=headers, + data=file, + auth=(self.api_user, self.api_token), + timeout=1800, + ) + try: + resp.json() + except: + print(f"Failed to decode json {resp.text}") + print(traceback.print_exc()) + sys.exit(1) + except IOError: + # Error opening file + print(f"Failed to upload files to the scan {scan_code}.") + print(traceback.print_exc()) + sys.exit(1) def _delete_existing_scan(self, scan_code: str): """ @@ -750,6 +799,32 @@ def run_scan( ) return response + def remove_uploaded_content(self, filename: str, scan_code: str): + """ + When using chunked uploading every new chunk is appended to existing file, for this reason we need to make sure + that initially there is no file (from previous uploading). + + Args: + filename (str): The file to be deleted + scan_code (str): The unique identifier for the scan. + """ + print("Called scans->remove_uploaded_content on file {}".format(filename)) + payload = { + "group": "scans", + "action": "remove_uploaded_content", + "data": { + "username": self.api_user, + "key": self.api_token, + "scan_code": scan_code, + "filename": filename, + }, + } + resp = self._send_request(payload) + if resp["status"] != "1": + print( + f"Cannot delete file {filename}, maybe is the first time when uploading this file? API response {resp}." + ) + class CliWrapper: """ @@ -1016,6 +1091,14 @@ def non_empty_string(s): type=str, required=False, ) + optional.add_argument( + "--enable_chunk_upload", + help="For files bigger than 8 MB (which is default post_max_size in php.ini) uploading will be done using\n" + "the header Transfer-encoding: chunked with chunks of 5120 bytes. By default, enabled.", + action="store_true", + default=True, + required=False, + ) required.add_argument( "--scan_number_of_tries", help="""Number of calls to 'check_status' till declaring the scan failed from the point of view of the agent""", @@ -1195,7 +1278,7 @@ def main(): workbench.create_webapp_scan(params.scan_code, params.project_code, params.target_path) else: print( - f"Scan with code {params.scan_code} already exists. Proceeding to uploading hashes..." + f"Scan with code {params.scan_code} already exists. Proceeding to upload..." ) # Handle blind scan differently from regular scan if params.blind_scan: @@ -1234,7 +1317,7 @@ def main(): if not os.path.isdir(os.path.join(root, filename)): counter_files = counter_files + 1 workbench.upload_files( - params.scan_code, os.path.join(root, filename) + params.scan_code, os.path.join(root, filename), params.enable_chunk_upload ) print("A total of {} files uploaded".format(counter_files)) print("Calling API scans->extracting_archives")