Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add truncation and JS formatting to Semgrep. #17

Merged
merged 2 commits into from
Jan 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .mypy.ini
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,5 @@ ignore_missing_imports = True
[mypy-ostorlab.assets.*]
ignore_missing_imports = True

[mypy-jsbeautifier.*]
ignore_missing_imports = True
10 changes: 9 additions & 1 deletion agent/semgrep_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import logging
import subprocess
import tempfile
import jsbeautifier
from typing import Any

from ostorlab.agent.message import message as m
Expand Down Expand Up @@ -93,7 +94,14 @@ def process(self, message: m.Message) -> None:
return

with tempfile.NamedTemporaryFile(suffix=file_type) as infile:
3asm marked this conversation as resolved.
Show resolved Hide resolved
infile.write(content)
if path is not None and path.endswith(".js") is True:
# Beautify JavaScript source code to handle minified JS. By using Beautifier, we reduce false positive
# and produce better reports.
infile.write(
jsbeautifier.beautify(content.decode(errors="ignore")).encode()
)
else:
infile.write(content)
infile.flush()

output = _run_analysis(infile.name)
Expand Down
4 changes: 3 additions & 1 deletion agent/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
from ostorlab.agent.kb import kb
from ostorlab.agent.mixins import agent_report_vulnerability_mixin

LINE_SIZE_MAX = 5000
amine3 marked this conversation as resolved.
Show resolved Hide resolved

RISK_RATING_MAPPING = {
"UNKNOWN": agent_report_vulnerability_mixin.RiskRating.POTENTIALLY,
"LOW": agent_report_vulnerability_mixin.RiskRating.LOW,
Expand Down Expand Up @@ -42,7 +44,7 @@ def construct_technical_detail(vulnerability: dict[str, Any], path: str) -> str:
col = vulnerability.get("start", {}).get("col", "N/A")
message = vulnerability["extra"].get("message", "N/A")
path = path or vulnerability.get("path", "N/A")
lines = vulnerability["extra"].get("lines", "").strip()
lines = vulnerability["extra"].get("lines", "").strip()[:LINE_SIZE_MAX]
technology = vulnerability["extra"].get("metadata", {}).get("technology", [""])[0]
title = construct_vulnerability_title(check_id)

Expand Down
3 changes: 2 additions & 1 deletion requirement.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
ostorlab[agent]
rich
semgrep
python-magic
python-magic
jsbeautifier
10 changes: 10 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,16 @@ def scan_message_file() -> message.Message:
return message.Message.from_data(selector, data=msg_data)


@pytest.fixture
def scan_message_js_file() -> message.Message:
"""Creates a dummy message of type v3.asset.file to be used by the agent for testing purposes."""
selector = "v3.asset.file"
path = "tests/files/minified.js"
with open(path, "rb") as infile:
msg_data = {"content": infile.read(), "path": path}
return message.Message.from_data(selector, data=msg_data)


@pytest.fixture()
def test_agent(
agent_persist_mock: dict[str | bytes, str | bytes],
Expand Down
3 changes: 3 additions & 0 deletions tests/files/minified.js

Large diffs are not rendered by default.

20 changes: 20 additions & 0 deletions tests/semgrep_agent_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,3 +321,23 @@ def testAgentSemgrep_whenAnalysisRunsWithCalledProcessError_doesNotEmitBackVulne
test_agent.process(scan_message_file)

assert len(agent_mock) == 0


def testAgentSemgrep_whenAnalysisRunsOnJsFile_emitsBackVulnerability(
test_agent: semgrep_agent.SemgrepAgent,
agent_mock: list[message.Message],
agent_persist_mock: dict[str | bytes, str | bytes],
scan_message_js_file: message.Message,
mocker: plugin.MockerFixture,
) -> None:
"""Unittest for the full life cycle of the agent:
case where the semgrep analysis runs without a path provided and without errors and yields vulnerabilities.
"""
mocker.patch(
"agent.semgrep_agent._run_analysis",
return_value=(JSON_OUTPUT, EMPTY_ERROR_MESSAGE),
)

test_agent.process(scan_message_js_file)

assert len(agent_mock) > 0
3asm marked this conversation as resolved.
Show resolved Hide resolved
Loading