🎉 advance NoseyParker to support version 0.22.0 (#11565)

* 🎉 advance NoseyParker to support version 0.22.0 * add unittest file * 🔨 restructure * fix version 0.22.0 * ruff * update docs * update docs * cover another usecase * added comment
DefectDojo · Jan 28, 2025 · 3f40c65 · 3f40c65
1 parent 4bdc4b7
commit 3f40c65
Show file tree

Hide file tree

Showing 5 changed files with 169 additions and 60 deletions.
diff --git a/docs/content/en/connecting_your_tools/parsers/file/noseyparker.md b/docs/content/en/connecting_your_tools/parsers/file/noseyparker.md
@@ -6,8 +6,7 @@ Input Type:
 -
 This parser takes JSON Lines Output from Nosey Parker: https://github.com/praetorian-inc/noseyparkerSupports
 
-Supports version 0.16.0: 
-https://github.com/praetorian-inc/noseyparker/releases/tag/v0.16.0
+Supports versions [0.16.0](https://github.com/praetorian-inc/noseyparker/releases/tag/v0.16.0) and [0.22.0](https://github.com/praetorian-inc/noseyparker/releases/tag/v0.22.0) 
 
 Things to note about the Nosey Parker Parser:
 - 

diff --git a/dojo/tools/noseyparker/parser.py b/dojo/tools/noseyparker/parser.py
@@ -17,85 +17,143 @@ def get_label_for_scan_types(self, scan_type):
 
     def get_description_for_scan_types(self, scan_type):
         return "Nosey Parker report file can be imported in JSON Lines format (option --jsonl). " \
-               "Supports v0.16.0 of https://github.com/praetorian-inc/noseyparker"
+               "Supports v0.16.0 and v0.22.0 of https://github.com/praetorian-inc/noseyparker"
 
     def get_findings(self, file, test):
         """
         Returns findings from jsonlines file and uses filter
         to skip findings and determine severity
         """
-        dupes = {}
-
+        self.dupes = {}
         # Turn JSONL file into DataFrame
         if file is None:
             return None
         if file.name.lower().endswith(".jsonl"):
             # Process JSON lines into Dict
             data = [json.loads(line) for line in file]
-
             # Check for empty file
             if len(data[0]) == 0:
                 return []
-
             # Parse through each secret in each JSON line
             for line in data:
                 # Set rule to the current secret type (e.g. AWS S3 Bucket)
-                try:
-                    rule_name = line["rule_name"]
-                    secret = line["match_content"]
-                except Exception:
+                if line.get("rule_name") is not None and line.get("match_content") is not None:
+                    self.version_0_16_0(line, test)
+                elif line.get("rule_name") is not None and line.get("finding_id") is not None:
+                    self.version_0_22_0(line, test)
+                else:
                     msg = "Invalid Nosey Parker data, make sure to use Nosey Parker v0.16.0"
                     raise ValueError(msg)
-
-                # Set Finding details
-                for match in line["matches"]:
-                    # The following path is to account for the variability in the JSON lines output
-                    num_elements = len(match["provenance"]) - 1
-                    json_path = match["provenance"][num_elements]
-
-                    title = f"Secret(s) Found in Repository with Commit ID {json_path['commit_provenance']['commit_metadata']['commit_id']}"
-                    filepath = json_path["commit_provenance"]["blob_path"]
-                    line_num = match["location"]["source_span"]["start"]["line"]
-                    description = f"Secret found of type:   {rule_name} \n" \
-                                  f"SECRET starts with:  '{secret[:3]}' \n" \
-                                  f"Committer Name: {json_path['commit_provenance']['commit_metadata']['committer_name']}  \n" \
-                                  f"Committer Email: {json_path['commit_provenance']['commit_metadata']['committer_email']} \n" \
-                                  f"Commit ID: {json_path['commit_provenance']['commit_metadata']['commit_id']}  \n" \
-                                  f"Location: {filepath} line #{line_num} \n" \
-                                  f"Line #{line_num} \n"
-
-                    # Internal de-duplication
-                    key = hashlib.md5((filepath + "|" + secret + "|" + str(line_num)).encode("utf-8")).hexdigest()
-
-                    # If secret already exists with the same filepath/secret/linenum
-                    if key in dupes:
-                        finding = dupes[key]
-                        finding.nb_occurences += 1
-                        dupes[key] = finding
-                    else:
-                        dupes[key] = True
-                        # Create Finding object
-                        finding = Finding(
-                            test=test,
-                            cwe=798,
-                            title=title,
-                            description=description,
-                            severity="High",
-                            mitigation="Reset the account/token and remove from source code. Store secrets/tokens/passwords in secret managers or secure vaults.",
-                            date=datetime.today().strftime("%Y-%m-%d"),
-                            verified=False,
-                            active=True,
-                            is_mitigated=False,
-                            file_path=filepath,
-                            line=line_num,
-                            static_finding=True,
-                            nb_occurences=1,
-                            dynamic_finding=False,
-
-                        )
-                        dupes[key] = finding
         else:
             msg = "JSON lines format not recognized (.jsonl file extension). Make sure to use Nosey Parker v0.16.0"
             raise ValueError(msg)
 
-        return list(dupes.values())
+        return list(self.dupes.values())
+
+    def version_0_16_0(self, line, test):
+        rule_name = line["rule_name"]
+        secret = line["match_content"]
+        for match in line["matches"]:
+            # The following path is to account for the variability in the JSON lines output
+            num_elements = len(match["provenance"]) - 1
+            json_path = match["provenance"][num_elements]
+
+            title = f"Secret(s) Found in Repository with Commit ID {json_path['commit_provenance']['commit_metadata']['commit_id']}"
+            filepath = json_path["commit_provenance"]["blob_path"]
+            line_num = match["location"]["source_span"]["start"]["line"]
+            description = f"Secret found of type:   {rule_name} \n" \
+                            f"SECRET starts with:  '{secret[:3]}' \n" \
+                            f"Committer Name: {json_path['commit_provenance']['commit_metadata']['committer_name']}  \n" \
+                            f"Committer Email: {json_path['commit_provenance']['commit_metadata']['committer_email']} \n" \
+                            f"Commit ID: {json_path['commit_provenance']['commit_metadata']['commit_id']}  \n" \
+                            f"Location: {filepath} line #{line_num} \n" \
+                            f"Line #{line_num} \n"
+
+            # Internal de-duplication
+            key = hashlib.md5((filepath + "|" + secret + "|" + str(line_num)).encode("utf-8")).hexdigest()
+
+            # If secret already exists with the same filepath/secret/linenum
+            if key in self.dupes:
+                finding = self.dupes[key]
+                finding.nb_occurences += 1
+                self.dupes[key] = finding
+            else:
+                self.dupes[key] = True
+                # Create Finding object
+                finding = Finding(
+                    test=test,
+                    cwe=798,
+                    title=title,
+                    description=description,
+                    severity="High",
+                    mitigation="Reset the account/token and remove from source code. Store secrets/tokens/passwords in secret managers or secure vaults.",
+                    date=datetime.today().strftime("%Y-%m-%d"),
+                    verified=False,
+                    active=True,
+                    is_mitigated=False,
+                    file_path=filepath,
+                    line=line_num,
+                    static_finding=True,
+                    nb_occurences=1,
+                    dynamic_finding=False,
+
+                )
+                self.dupes[key] = finding
+
+    def version_0_22_0(self, line, test):
+        rule_name = line["rule_name"]
+        rule_text_id = line["rule_text_id"]
+        for match in line["matches"]:
+            # The following path is to account for the variability in the JSON lines output
+            num_elements = len(match["provenance"]) - 1
+            json_path = match["provenance"][num_elements]
+            line_num = match["location"]["source_span"]["start"]["line"]
+            # scanned with git history
+            if json_path.get("first_commit"):
+                title = f"Secret(s) Found in Repository with Commit ID {json_path['first_commit']['commit_metadata']['commit_id']}"
+                filepath = json_path["first_commit"]["blob_path"]
+                description = f"Secret found of type:   {rule_name} \n" \
+                                f"SECRET starts with:  '{rule_text_id[:3]}' \n" \
+                                f"Committer Name: {json_path['first_commit']['commit_metadata']['committer_name']}  \n" \
+                                f"Committer Email: {json_path['first_commit']['commit_metadata']['committer_email']} \n" \
+                                f"Commit ID: {json_path['first_commit']['commit_metadata']['commit_id']}  \n" \
+                                f"Location: {filepath} line #{line_num} \n" \
+                                f"Line #{line_num} \n"
+            # scanned wihout git history
+            else:
+                title = "Secret(s) Found in Repository"
+                filepath = json_path["path"]
+                description = f"Secret found of type:   {rule_name} \n" \
+                                f"SECRET starts with:  '{rule_text_id[:3]}' \n" \
+                                f"Location: {filepath} line #{line_num} \n" \
+                                f"Line #{line_num} \n"
+
+            # Internal de-duplication
+            key = hashlib.md5((filepath + "|" + rule_text_id + "|" + str(line_num)).encode("utf-8")).hexdigest()
+
+            # If secret already exists with the same filepath/secret/linenum
+            if key in self.dupes:
+                finding = self.dupes[key]
+                finding.nb_occurences += 1
+                self.dupes[key] = finding
+            else:
+                self.dupes[key] = True
+                # Create Finding object
+                finding = Finding(
+                    test=test,
+                    cwe=798,
+                    title=title,
+                    description=description,
+                    severity="High",
+                    mitigation="Reset the account/token and remove from source code. Store secrets/tokens/passwords in secret managers or secure vaults.",
+                    date=datetime.today().strftime("%Y-%m-%d"),
+                    verified=False,
+                    active=True,
+                    is_mitigated=False,
+                    file_path=filepath,
+                    line=line_num,
+                    static_finding=True,
+                    nb_occurences=1,
+                    dynamic_finding=False,
+                )
+                self.dupes[key] = finding
diff --git a/unittests/scans/noseyparker/noseyparker_0_22_0.jsonl b/unittests/scans/noseyparker/noseyparker_0_22_0.jsonl