Skip to content

Commit

Permalink
Change name of npm mistmatch, add more fields to analysis
Browse files Browse the repository at this point in the history
  • Loading branch information
jamessteel123 committed Oct 16, 2023
1 parent 5290371 commit b2cf92e
Show file tree
Hide file tree
Showing 2 changed files with 103 additions and 0 deletions.
2 changes: 2 additions & 0 deletions guarddog/analyzer/metadata/npm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from guarddog.analyzer.metadata.npm.direct_url_dependency import (
NPMDirectURLDependencyDetector,
)
from guarddog.analyzer.metadata.npm.npm_metadata_mismatch import NPMMetadataMismatch

NPM_METADATA_RULES = {}

Expand All @@ -16,6 +17,7 @@
NPMPotentiallyCompromisedEmailDomainDetector,
NPMTyposquatDetector,
NPMDirectURLDependencyDetector,
NPMMetadataMismatch,
]

for detectorClass in classes:
Expand Down
101 changes: 101 additions & 0 deletions guarddog/analyzer/metadata/npm/npm_metadata_mismatch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
from typing import Optional, Any, Union, get_args
from pathlib import Path
import json

from guarddog.analyzer.metadata.detector import Detector

# List of fields where mismatch between package.json and NPM can carry malicious information
# (field, expected type)
MANIFEST_FIELDS_CHECKLIST = {
"dependencies": dict,
"devDependencies": dict,
"scripts": dict,
"main": str,
"repository": dict,
"bugs": dict,
"homepage": str
}

class NPMMetadataMismatch(Detector):
def __init__(self):
super().__init__(
name="npm_metadata_mismatch",
description="Identify packages which have mismatches between the npm pacakge manifest and the package info"
)

def detect(self, package_info, path: Optional[str] = None, name: Optional[str] = None,
version: Optional[str] = None) -> tuple[bool, Optional[str]]:
# Get the latest version if not specified
if not version:
version = package_info["dist-tags"]["latest"]

# Load package.json manifest
if path is None:
raise ValueError("path is needed to run heuristic " + self.get_name())
package_json = Path(path) / "package" / "package.json"
package_manifest: dict[Any] = json.loads(package_json.read_text())

# Get NPM manifest for version
version_info = package_info["versions"][version]

diff: list[Diff] = {
field: difference_at_key(version_info, package_manifest, field, field_type)
for field, field_type in MANIFEST_FIELDS_CHECKLIST.items()
}
number_different = sum(len(v) for k,v in diff.items())
diff_description = describe_diff(diff) if number_different != 0 else "No differences found"
return number_different != 0, diff_description

PerItemDiff = tuple[str,str,str]
Diff = list[PerItemDiff]

def diff_at_key_dict(version_at_key: dict[Any], manifest_at_key: dict[Any]) -> Diff:
return [
(key, version_at_key.get(key), manifest_at_key.get(key))
for key in set(version_at_key.keys()).union(set(manifest_at_key.keys()))
if version_at_key.get(key) != manifest_at_key.get(key)
]

def difference_at_key(version_info: dict[Any], package_manifest: dict[Any], key: str, key_type) -> Diff:
version_at_key = version_info.get(key, key_type())
manifest_at_key = package_manifest.get(key, key_type())
if not(isinstance(version_at_key, key_type) and isinstance(manifest_at_key, key_type)):
return [(f"Expected type {str(key_type)}", f"{type(version_at_key)}", f"{type(manifest_at_key)}")]
elif key_type == dict:
return diff_at_key_dict(version_at_key, manifest_at_key)
else:
# If it is not a dict do a direct comparison of the value at the key, currently the only other type is strings
return [(f"{key}", version_at_key, manifest_at_key)] if version_at_key != manifest_at_key else []


def describe_diff(diff: Diff) -> str:
"""
Creates a string of the form
Difference between manifest and package.json found:
dependencies:
key: Manifest("v4.0.0"), package.json("v3.0.1")
scripts:
key: Manifest("a"), package.json("b")
main:
Manifest:
index.js
package.json
malicious.js
...
"""
description = "Difference between manifest and package.json found: \n"
for k, differences in diff.items():
if differences:
field_description = f"{k}: \n"
if MANIFEST_FIELDS_CHECKLIST[k] == dict:
for d in differences:
field_description += f" {d[0]}: Manifest(\"{d[1]}\"), package.json(\"{d[2]}\") \n"
else:
manifest_str = " Manifest:\n"
package_str = " package.json:\n"
for d in differences:
manifest_str += f" {d[1]}\n"
package_str += f" {d[2]}\n"
field_description = field_description + manifest_str + package_str
description += field_description
return description

0 comments on commit b2cf92e

Please sign in to comment.