From 8662f244dd35fe64d3f4e67d0f58c3aca58392c4 Mon Sep 17 00:00:00 2001 From: ErebusZ Date: Thu, 22 Feb 2024 14:31:39 +0100 Subject: [PATCH 1/2] Increase file size and timeout values for semgrep command --- agent/semgrep_agent.py | 30 ++++++++++++++++++++++++++---- ostorlab.yaml | 4 ++++ 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/agent/semgrep_agent.py b/agent/semgrep_agent.py index babf509..e55824f 100644 --- a/agent/semgrep_agent.py +++ b/agent/semgrep_agent.py @@ -22,7 +22,11 @@ ) logger = logging.getLogger(__name__) -COMMAND_TIMEOUT = 90 +COMMAND_TIMEOUT = 120 +# Number of semgrep rules that can time out on a file before the file is skipped, 0 will have no limit. +TIMEOUT_THRESHOLD = 0 +FILE_SIZE_LIMIT = 500 * 1024 * 1024 +DEFAULT_MEMORY_LIMIT = 2 * 1024 * 1024 * 1024 FILE_TYPE_BLACKLIST = ( ".car", @@ -51,8 +55,25 @@ ) -def _run_analysis(input_file_path: str) -> tuple[bytes, bytes] | None: - command = ["semgrep", "-q", "--config", "auto", "--json", input_file_path] +def _run_analysis( + input_file_path: str, max_memory_limit: int = DEFAULT_MEMORY_LIMIT +) -> tuple[bytes, bytes] | None: + command = [ + "semgrep", + "-q", + "--config", + "auto", + "--timeout", + COMMAND_TIMEOUT, + "--timeout-threshold", + TIMEOUT_THRESHOLD, + "--max-target-bytes", + FILE_SIZE_LIMIT, + "--max-memory", + max_memory_limit, + "--json", + input_file_path, + ] try: output = subprocess.run( command, capture_output=True, check=True, timeout=COMMAND_TIMEOUT @@ -81,6 +102,7 @@ def process(self, message: m.Message) -> None: """ content = message.data.get("content") path = message.data.get("path") + memory_limit = self.args.get("memory_limit", DEFAULT_MEMORY_LIMIT) if content is None: logger.error("Received empty file.") @@ -104,7 +126,7 @@ def process(self, message: m.Message) -> None: infile.write(content) infile.flush() - output = _run_analysis(infile.name) + output = _run_analysis(infile.name, memory_limit) if output is None: logger.error("Subprocess completed with errors.") diff --git a/ostorlab.yaml b/ostorlab.yaml index 0d557f3..7be3d34 100644 --- a/ostorlab.yaml +++ b/ostorlab.yaml @@ -53,3 +53,7 @@ out_selectors: - v3.report.vulnerability docker_file_path : Dockerfile docker_build_root : . +args: + - name: "memory_limit" + description: "Memory limit for semgrep to use on a single file." + type: "int" \ No newline at end of file From d335e8c416620a7490d4351e68e3299e9564d526 Mon Sep 17 00:00:00 2001 From: ErebusZ Date: Thu, 22 Feb 2024 17:24:15 +0100 Subject: [PATCH 2/2] Add test and fixed comments --- agent/semgrep_agent.py | 14 +++++++++----- ostorlab.yaml | 2 +- tests/semgrep_agent_test.py | 28 ++++++++++++++++++++++++++++ 3 files changed, 38 insertions(+), 6 deletions(-) diff --git a/agent/semgrep_agent.py b/agent/semgrep_agent.py index e55824f..ab3433f 100644 --- a/agent/semgrep_agent.py +++ b/agent/semgrep_agent.py @@ -25,7 +25,9 @@ COMMAND_TIMEOUT = 120 # Number of semgrep rules that can time out on a file before the file is skipped, 0 will have no limit. TIMEOUT_THRESHOLD = 0 +# 500MB FILE_SIZE_LIMIT = 500 * 1024 * 1024 +# 2GB DEFAULT_MEMORY_LIMIT = 2 * 1024 * 1024 * 1024 FILE_TYPE_BLACKLIST = ( @@ -64,13 +66,13 @@ def _run_analysis( "--config", "auto", "--timeout", - COMMAND_TIMEOUT, + str(COMMAND_TIMEOUT), "--timeout-threshold", - TIMEOUT_THRESHOLD, + str(TIMEOUT_THRESHOLD), "--max-target-bytes", - FILE_SIZE_LIMIT, + str(FILE_SIZE_LIMIT), "--max-memory", - max_memory_limit, + str(max_memory_limit), "--json", input_file_path, ] @@ -102,7 +104,9 @@ def process(self, message: m.Message) -> None: """ content = message.data.get("content") path = message.data.get("path") - memory_limit = self.args.get("memory_limit", DEFAULT_MEMORY_LIMIT) + memory_limit = ( + self.args.get("memory_limit", DEFAULT_MEMORY_LIMIT) or DEFAULT_MEMORY_LIMIT + ) if content is None: logger.error("Received empty file.") diff --git a/ostorlab.yaml b/ostorlab.yaml index 7be3d34..e565df4 100644 --- a/ostorlab.yaml +++ b/ostorlab.yaml @@ -56,4 +56,4 @@ docker_build_root : . args: - name: "memory_limit" description: "Memory limit for semgrep to use on a single file." - type: "int" \ No newline at end of file + type: "number" \ No newline at end of file diff --git a/tests/semgrep_agent_test.py b/tests/semgrep_agent_test.py index 5c6d617..f2ce4a1 100644 --- a/tests/semgrep_agent_test.py +++ b/tests/semgrep_agent_test.py @@ -341,3 +341,31 @@ def testAgentSemgrep_whenAnalysisRunsOnJsFile_emitsBackVulnerability( test_agent.process(scan_message_js_file) assert len(agent_mock) > 0 + + +def testAgentSemgrep_whenValidMessage_constructCorrectCommand( + test_agent: semgrep_agent.SemgrepAgent, + scan_message_file: message.Message, + mocker: plugin.MockerFixture, +) -> None: + """Unit test testing semgrep command construction.""" + command_mock = mocker.patch( + "subprocess.run", + side_effect=subprocess.CalledProcessError(cmd="", returncode=2), + ) + + test_agent.process(scan_message_file) + + assert command_mock.call_args.args[0][0] == "semgrep" + assert command_mock.call_args.args[0][1] == "-q" + assert command_mock.call_args.args[0][2] == "--config" + assert command_mock.call_args.args[0][3] == "auto" + assert command_mock.call_args.args[0][4] == "--timeout" + assert command_mock.call_args.args[0][5] == "120" + assert command_mock.call_args.args[0][6] == "--timeout-threshold" + assert command_mock.call_args.args[0][7] == "0" + assert command_mock.call_args.args[0][8] == "--max-target-bytes" + assert command_mock.call_args.args[0][9] == "524288000" + assert command_mock.call_args.args[0][10] == "--max-memory" + assert command_mock.call_args.args[0][11] == "2147483648" + assert command_mock.call_args.args[0][12] == "--json"