Skip to content

Commit

Permalink
Merge pull request #5 from fossid-ab/chunked_upload
Browse files Browse the repository at this point in the history
Improve handling chunked upload
  • Loading branch information
alexandruz authored Jul 10, 2024
2 parents f7811b7 + f0852ad commit 5ca3184
Show file tree
Hide file tree
Showing 3 changed files with 101 additions and 34 deletions.
2 changes: 1 addition & 1 deletion .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -524,7 +524,7 @@ preferred-modules=

# Exceptions that will emit a warning when being caught. Defaults to
# "Exception"
overgeneral-exceptions=Exception
overgeneral-exceptions=builtins.Exception


[TYPING]
Expand Down
7 changes: 3 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ usage: workbench-agent.py [-h] --api_url API_URL --api_user API_USER
[--delta_only] [--reuse_identifications]
[--identification_reuse_type {any,only_me,specific_project,specific_scan}]
[--specific_code SPECIFIC_CODE]
[--enable_chunk_upload]
[--chunked_upload]
[--scan_number_of_tries SCAN_NUMBER_OF_TRIES]
[--scan_wait_time SCAN_WAIT_TIME] --path PATH
[--log LOG] [--path-result PATH_RESULT]
Expand Down Expand Up @@ -181,9 +181,8 @@ optional arguments:
--target_path TARGET_PATH
The path on the Workbench server where the code to be scanned is stored.
No upload is done in this scenario.
--enable_chunk_upload
For files bigger than 8 MB (which is default post_max_size in php.ini) uploading will be done using
the header Transfer-encoding: chunked with chunks of 5120 bytes. By default, enabled.
--chunked_upload For files bigger than 8 MB (which is default post_max_size in php.ini) uploading will be done using
the header Transfer-encoding: chunked with chunks of 5MB.
--log LOG specify logging level. Allowed values: DEBUG, INFO, WARNING, ERROR
--path-result PATH_RESULT
Save results to specified path
Expand Down
126 changes: 97 additions & 29 deletions workbench-agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import argparse
import random
import base64
import io
import os
import subprocess
from argparse import RawTextHelpFormatter
Expand Down Expand Up @@ -78,22 +79,80 @@ def _send_request(self, payload: dict) -> dict:
print("Problematic JSON:")
print(response.text)

def _read_in_chunks(self, file_object, chunk_size=5120):
"""Generator to read a file piece by piece."""
def _read_in_chunks(self,file_object: io.BufferedReader, chunk_size=5242880):
"""
Generator to read a file piece by piece.
Args:
file_object (io.BufferedReader) : The payload of the request.
chunk_size (int): Size of the chunk. Default chunk size is 5MB
"""
while True:
data = file_object.read(chunk_size)
if not data:
break
yield data

def upload_files(self, scan_code: str, path: str, enable_chunk_upload: bool = True):
def _chunked_upload_request(self, scan_code: str, headers: dict, chunk: bytes):
"""
This function will make sure Content-Length header is not sent by Requests library
Args:
scan_code (str): The scan code where the file or files will be uploaded.
headers (dict) : Headers for HTTP request
chunk (bytes): Chunk read from large file
"""
try:
req = requests.Request(
'POST',
self.api_url,
headers=headers,
data=chunk,
auth=(self.api_user, self.api_token),
)
s = requests.Session()
prepped = s.prepare_request(req)
# Remove the unwanted header 'Content-Length' !!!
if 'Content-Length' in prepped.headers:
del prepped.headers['Content-Length']

# Send HTTP request and retrieve response
response = s.send(prepped)
# print(f"Sent headers: {response.request.headers}")
# print(f"response headers: {response.headers}")
# Retrieve the HTTP status code
status_code = response.status_code
print(f"HTTP Status Code: {status_code}")

# Check if the request was successful (status code 200)
if status_code == 200:
# Parse the JSON response
try:
response.json()
except:
print(f"Failed to decode json {response.text}")
print(traceback.print_exc())
sys.exit(1)
else:
print(f"Request failed with status code {status_code}")
reason = response.reason
print(f"Reason: {reason}")
response_text = response.text
print(f"Response Text: {response_text}")
sys.exit(1)
except IOError:
# Error opening file
print(f"Failed to upload files to the scan {scan_code}.")
print(traceback.print_exc())
sys.exit(1)

def upload_files(self, scan_code: str, path: str, chunked_upload: bool = False):
"""
Uploads files to the Workbench using the API's File Upload endpoint.
Args:
scan_code (str): The scan code where the file or files will be uploaded.
path (str): Path to the file or files to upload.
enable_chunk_upload (bool): Enable/disable chunk upload. By default, enabled.
chunked_upload (bool): Enable/disable chunk upload.
"""
file_size = os.path.getsize(path)
size_limit = 8 * 1024 * 1024 # 8MB in bytes. Based on the default value of post_max_size in php.ini
Expand All @@ -102,42 +161,36 @@ def upload_files(self, scan_code: str, path: str, enable_chunk_upload: bool = Tr
filename_base64 = base64.b64encode(filename.encode()).decode("utf-8")
scan_code_base64 = base64.b64encode(scan_code.encode()).decode("utf-8")

if enable_chunk_upload and (file_size > size_limit):
if chunked_upload and (file_size > size_limit):
print(f"Uploading {filename} using 'Transfer-encoding: chunks' due to file size {file_size}.")
# Use chunked upload for files bigger than size_limit
# First delete possible existing files because chunk uploading works by appending existing file on disk.
self.remove_uploaded_content(filename, scan_code)
print("Uploading using Transfer-encoding: chunked...")
headers = {
"FOSSID-SCAN-CODE": scan_code_base64,
"FOSSID-FILE-NAME": filename_base64,
'Transfer-Encoding': 'chunked'
'Transfer-Encoding': 'chunked',
'Content-Type': 'application/octet-stream'
}
try:
with open(path, "rb") as file:
resp = requests.post(
self.api_url,
headers=headers,
data=self._read_in_chunks(file, 5120),
auth=(self.api_user, self.api_token),
timeout=1800,
)
try:
resp.json()
except:
print(f"Failed to decode json {resp.text}")
print(traceback.print_exc())
sys.exit(1)
for chunk in self._read_in_chunks(file, 5242880):
# Upload each chunk
self._chunked_upload_request(scan_code, headers, chunk)
except IOError:
# Error opening file
print(f"Failed to upload files to the scan {scan_code}.")
print(traceback.print_exc())
sys.exit(1)
print("Finished uploading.")
else:
# Regular upload, no chunk upload
headers = {
"FOSSID-SCAN-CODE": scan_code_base64,
"FOSSID-FILE-NAME": filename_base64
}
print("Uploading...")
try:
with open(path, "rb") as file:
resp = requests.post(
Expand All @@ -147,17 +200,32 @@ def upload_files(self, scan_code: str, path: str, enable_chunk_upload: bool = Tr
auth=(self.api_user, self.api_token),
timeout=1800,
)
try:
resp.json()
except:
print(f"Failed to decode json {resp.text}")
print(traceback.print_exc())
# Retrieve the HTTP status code
status_code = resp.status_code
print(f"HTTP Status Code: {status_code}")

# Check if the request was successful (status code 200)
if status_code == 200:
# Parse the JSON response
try:
resp.json()
except:
print(f"Failed to decode json {resp.text}")
print(traceback.print_exc())
sys.exit(1)
else:
print(f"Request failed with status code {status_code}")
reason = resp.reason
print(f"Reason: {reason}")
response_text = resp.text
print(f"Response Text: {response_text}")
sys.exit(1)
except IOError:
# Error opening file
print(f"Failed to upload files to the scan {scan_code}.")
print(traceback.print_exc())
sys.exit(1)
print("Finished uploading.")

def _delete_existing_scan(self, scan_code: str):
"""
Expand Down Expand Up @@ -1092,11 +1160,11 @@ def non_empty_string(s):
required=False,
)
optional.add_argument(
"--enable_chunk_upload",
"--chunked_upload",
help="For files bigger than 8 MB (which is default post_max_size in php.ini) uploading will be done using\n"
"the header Transfer-encoding: chunked with chunks of 5120 bytes. By default, enabled.",
"the header Transfer-encoding: chunked with chunks of 5MB.",
action="store_true",
default=True,
default=False,
required=False,
)
required.add_argument(
Expand Down Expand Up @@ -1303,7 +1371,7 @@ def main():
print(
"Uploading file indicated in --path parameter: {}".format(params.path)
)
workbench.upload_files(params.scan_code, params.path)
workbench.upload_files(params.scan_code, params.path, params.chunked_upload)
else:
# Get all files found at given path (including in subdirectories). Exclude directories
print(
Expand All @@ -1317,7 +1385,7 @@ def main():
if not os.path.isdir(os.path.join(root, filename)):
counter_files = counter_files + 1
workbench.upload_files(
params.scan_code, os.path.join(root, filename), params.enable_chunk_upload
params.scan_code, os.path.join(root, filename), params.chunked_upload
)
print("A total of {} files uploaded".format(counter_files))
print("Calling API scans->extracting_archives")
Expand Down

0 comments on commit 5ca3184

Please sign in to comment.