Ostorlab · 3asm · Jan 30, 2024 · Jan 30, 2024 · Jan 30, 2024
@@ -30,3 +30,5 @@ ignore_missing_imports = True
 [mypy-ostorlab.assets.*]
 ignore_missing_imports = True
 
+[mypy-jsbeautifier.*]
+ignore_missing_imports = True
@@ -3,6 +3,7 @@
 import logging
 import subprocess
 import tempfile
+import jsbeautifier
 from typing import Any
 
 from ostorlab.agent.message import message as m
@@ -93,7 +94,14 @@ def process(self, message: m.Message) -> None:
             return
 
         with tempfile.NamedTemporaryFile(suffix=file_type) as infile:
-            infile.write(content)
+            if path is not None and path.endswith(".js") is True:
+                # Beautify JavaScript source code to handle minified JS. By using Beautifier, we reduce false positive
+                # and produce better reports.
+                infile.write(
+                    jsbeautifier.beautify(content.decode(errors="ignore")).encode()
+                )
+            else:
+                infile.write(content)
             infile.flush()
 
             output = _run_analysis(infile.name)

@@ -10,6 +10,8 @@
 from ostorlab.agent.kb import kb
 from ostorlab.agent.mixins import agent_report_vulnerability_mixin
 
+LINE_SIZE_MAX = 5000
+
 RISK_RATING_MAPPING = {
     "UNKNOWN": agent_report_vulnerability_mixin.RiskRating.POTENTIALLY,
     "LOW": agent_report_vulnerability_mixin.RiskRating.LOW,
@@ -42,7 +44,7 @@ def construct_technical_detail(vulnerability: dict[str, Any], path: str) -> str:
     col = vulnerability.get("start", {}).get("col", "N/A")
     message = vulnerability["extra"].get("message", "N/A")
     path = path or vulnerability.get("path", "N/A")
-    lines = vulnerability["extra"].get("lines", "").strip()
+    lines = vulnerability["extra"].get("lines", "").strip()[:LINE_SIZE_MAX]
     technology = vulnerability["extra"].get("metadata", {}).get("technology", [""])[0]
     title = construct_vulnerability_title(check_id)
 

@@ -1,4 +1,5 @@
 ostorlab[agent]
 rich
 semgrep
-python-magic
+python-magic
+jsbeautifier
@@ -83,6 +83,16 @@ def scan_message_file() -> message.Message:
     return message.Message.from_data(selector, data=msg_data)
 
 
+@pytest.fixture
+def scan_message_js_file() -> message.Message:
+    """Creates a dummy message of type v3.asset.file to be used by the agent for testing purposes."""
+    selector = "v3.asset.file"
+    path = "tests/files/minified.js"
+    with open(path, "rb") as infile:
+        msg_data = {"content": infile.read(), "path": path}
+    return message.Message.from_data(selector, data=msg_data)
+
+
 @pytest.fixture()
 def test_agent(
     agent_persist_mock: dict[str | bytes, str | bytes],

@@ -321,3 +321,23 @@ def testAgentSemgrep_whenAnalysisRunsWithCalledProcessError_doesNotEmitBackVulne
     test_agent.process(scan_message_file)
 
     assert len(agent_mock) == 0
+
+
+def testAgentSemgrep_whenAnalysisRunsOnJsFile_emitsBackVulnerability(
+    test_agent: semgrep_agent.SemgrepAgent,
+    agent_mock: list[message.Message],
+    agent_persist_mock: dict[str | bytes, str | bytes],
+    scan_message_js_file: message.Message,
+    mocker: plugin.MockerFixture,
+) -> None:
+    """Unittest for the full life cycle of the agent:
+    case where the semgrep analysis runs without a path provided and without errors and yields vulnerabilities.
+    """
+    mocker.patch(
+        "agent.semgrep_agent._run_analysis",
+        return_value=(JSON_OUTPUT, EMPTY_ERROR_MESSAGE),
+    )
+
+    test_agent.process(scan_message_js_file)
+
+    assert len(agent_mock) > 0