Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: add support for pyproject.toml and env.yml/env.yaml files #550

Closed
wants to merge 11 commits into from
5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
[build-system]
requires = ["setuptools>=42"]
requires = ["setuptools>=42", "insecure-package"

dylanpulver marked this conversation as resolved.
Show resolved Hide resolved
]
build-backend = "setuptools.build_meta"

3 changes: 2 additions & 1 deletion safety/scan/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ class ScannableEcosystems(Enum):
"""Enum representing scannable ecosystems."""
PYTHON = Ecosystem.PYTHON.value


def process_report(
obj: Any, console: Console, report: ReportModel, output: str,
save_as: Optional[Tuple[str, Path]], **kwargs
Expand Down Expand Up @@ -314,6 +313,8 @@ def scan(ctx: typer.Context,
with console.status(wait_msg, spinner=DEFAULT_SPINNER) as status:
for path, analyzed_file in process_files(paths=file_paths,
config=config):
print("now here", analyzed_file.dependency_results.dependencies)
print("now here", analyzed_file.file_type)
dylanpulver marked this conversation as resolved.
Show resolved Hide resolved
count += len(analyzed_file.dependency_results.dependencies)

# Update exit code if vulnerabilities are found
Expand Down
60 changes: 58 additions & 2 deletions safety/scan/ecosystems/python/dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from pathlib import Path
import sys
from typing import Generator, List, Optional

import toml
from safety_schemas.models import FileType, PythonDependency
from safety_schemas.models.package import PythonSpecification
from ..base import InspectableFile
Expand Down Expand Up @@ -270,6 +270,59 @@ def read_virtual_environment_dependencies(f: InspectableFile) -> Generator[Pytho
latest_version_without_known_vulnerabilities=None,
more_info_url=None)

def read_pyproject_toml_dependencies(file: Path) -> Generator[PythonDependency, None, None]:
data = toml.load(file)
print(data)
dylanpulver marked this conversation as resolved.
Show resolved Hide resolved
dependencies = []

# Handle 'build-system.requires'
if 'build-system' in data and 'requires' in data['build-system']:
dependencies.extend(data['build-system']['requires'])

# Handle 'project.dependencies'
if 'project' in data and 'dependencies' in data['project']:
dependencies.extend(data['project']['dependencies'])

# Handle 'project.optional-dependencies'
if 'project' in data and 'optional-dependencies' in data['project']:
for opt_deps in data['project']['optional-dependencies'].values():
dependencies.extend(opt_deps)

# Handle 'tool.poetry.dependencies'
if 'tool' in data and 'poetry' in data['tool'] and 'dependencies' in data['tool']['poetry']:
for dep, version in data['tool']['poetry']['dependencies'].items():
if isinstance(version, str):
dependencies.append(f"{dep}=={version}")
else:
dependencies.append(dep)

dylanpulver marked this conversation as resolved.
Show resolved Hide resolved
# Handle 'tool.poetry.dev-dependencies'
if 'tool' in data and 'poetry' in data['tool'] and 'dev-dependencies' in data['tool']['poetry']:
for dep, version in data['tool']['poetry']['dev-dependencies'].items():
if isinstance(version, str):
dependencies.append(f"{dep}=={version}")
else:
dependencies.append(dep)

print("dependencies")
print(dependencies)
for dep in dependencies:
dep_name, dep_version = (dep.split("==") + [None])[:2]
print(dep_name)
print(dep_version)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Improve dependency parsing to handle various version specifiers

The current approach splits the dependency string on '==' to obtain dep_name and dep_version. This may not correctly handle dependencies with different version specifiers (e.g., '>=', '~=', '!=') or additional markers and extras. Consider using a robust parser, such as packaging.requirements.Requirement, to accurately parse dependency strings.

Apply this diff to parse dependencies using the packaging library:

+    from packaging.requirements import Requirement
...
     for dep in dependencies:
-        dep_name, dep_version = (dep.split("==") + [None])[:2]
-        print(dep_name)
-        print(dep_version)
+        req = Requirement(dep)
+        dep_name = req.name
+        dep_version = str(req.specifier) if req.specifier else None
...
         yield PythonDependency(
             name=dep_name,
             version=dep_version,
             specifications=[
-                PythonSpecification(f"{dep_name}=={dep_version}" if dep_version else dep_name, found=file)
+                PythonSpecification(str(req), found=file)
             ],
...
         )

Committable suggestion was skipped due to low confidence.

yield PythonDependency(
name=dep_name,
version=dep_version,
specifications=[
PythonSpecification(f"{dep_name}=={dep_version}" if dep_version else dep_name, found=file)
],
found=file,
insecure_versions=[],
secure_versions=[],
latest_version=None,
latest_version_without_known_vulnerabilities=None,
more_info_url=None
)

def get_dependencies(f: InspectableFile) -> List[PythonDependency]:
"""
Expand All @@ -291,4 +344,7 @@ def get_dependencies(f: InspectableFile) -> List[PythonDependency]:
if f.file_type == FileType.VIRTUAL_ENVIRONMENT:
return list(read_virtual_environment_dependencies(f))

return []
if f.file_type == FileType.PYPROJECT_TOML:
return list(read_pyproject_toml_dependencies(Path(f.file.name)))

return []
10 changes: 10 additions & 0 deletions safety/scan/finder/file_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ def __init__(
self.target = target
self.include_files = include_files

print("include_files", include_files)
# If no handlers are provided, initialize them from the ecosystem mapping
if not handlers:
handlers = set(ECOSYSTEM_HANDLER_MAPPING[ecosystem]()
Expand Down Expand Up @@ -149,8 +150,17 @@ def process_directory(self, dir_path: str, max_deep: Optional[int] = None) -> Tu
files[file_type.value] = set()
files[file_type.value].add(inspectable_file)
break

special_files = {'pyproject.toml', 'env.yml', 'env.yaml'}
if file_name in special_files:
file_type = FileType(file_name)
inspectable_file = Path(root, file_name)
if file_type.value not in files or not files[file_type.value]:
files[file_type.value] = set()
files[file_type.value].add(inspectable_file)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Consider refactoring special files handling into a dedicated handler.

The current implementation duplicates logic and reduces maintainability. Consider these improvements:

  1. Create a dedicated SpecialFileHandler class that implements the FileHandler interface
  2. Move special files set to a configuration constant
  3. Add validation of file contents before processing

Example implementation:

# Define in a central configuration
SPECIAL_FILES = frozenset({'pyproject.toml', 'env.yml', 'env.yaml'})

class SpecialFileHandler(FileHandler):
    def can_handle(self, root: str, file_name: str, include_files: Optional[Dict[FileType, List[Path]]]) -> Optional[FileType]:
        return FileType(file_name) if file_name in SPECIAL_FILES else None

This approach would:

  • Improve maintainability by centralizing special file definitions
  • Follow the existing handler pattern
  • Make it easier to add validation logic
  • Reduce code duplication

💡 Codebase verification

Based on the search results, I can now generate a final response.

The ecosystem handlers need to be updated for the new file types

The search results reveal that while pyproject.toml and environment files (env.yml, env.yaml) are added to the special files list in file_finder.py, there are gaps in the implementation:

  • The FileType enum is used in multiple places but pyproject.toml and environment files are not included in the DEFAULT_FILE_TYPES list in scan/command.py
  • While there's a handler for pyproject.toml in ecosystems/python/dependencies.py (read_pyproject_toml_dependencies), there's no handler for environment files
  • The supported file types for fixes (SUPPORTED_FILE_TYPES in safety.py) need to be updated if these files should support remediation

Required updates:

  • Add environment file types to the DEFAULT_FILE_TYPES list if they should be scanned by default
  • Implement dependency reading for environment files
  • Update SUPPORTED_FILE_TYPES if fixes should be supported for these file types
🔗 Analysis chain

Verify related ecosystem handlers are updated.

The implementation adds support for pyproject.toml and environment files, but we should verify that corresponding handlers in the ecosystem mapping can process these files correctly.


Let me try a different search strategy to verify the ecosystem handlers.


Let me try one final search strategy focusing on the FileType enum and its usage.

🏁 Scripts executed

The following scripts were executed for the analysis:

Script:

#!/bin/bash
# Search for handlers that might need updates for the new file types
echo "Searching for related ecosystem handlers..."
rg -l "pyproject.toml|env\.ya?ml" --type python

# Search for dependency processing functions
echo "Searching for dependency processing functions..."
ast-grep --pattern 'def read_$_dependencies($$$)' 

Length of output: 336


Script:

#!/bin/bash
# Search for files containing ecosystem-related code
echo "Searching for ecosystem-related files..."
fd -e py ecosystem

# Search for files mentioning the new file types
echo "Searching for files mentioning the new file types..."
rg "pyproject\.toml|env\.ya?ml|env\.yml" 

# Search for dependency processing related files
echo "Searching for dependency processing related files..."
fd -e py dependency

Length of output: 514


Script:

#!/bin/bash
# Search for FileType enum definition and usage
echo "Searching for FileType enum..."
rg -A 5 "class FileType" 

# Search for files that handle file type processing
echo "Searching for file type processing..."
rg -B 2 -A 2 "FileType\." 

# Search for any dependency or requirement parsing
echo "Searching for dependency parsing..."
rg -l "parse.*requirements|parse.*dependencies"

Length of output: 4851

level += 1


dylanpulver marked this conversation as resolved.
Show resolved Hide resolved
return dir_path, files

def search(self) -> Tuple[str, Dict[str, Set[Path]]]:
Expand Down
12 changes: 5 additions & 7 deletions safety/scan/finder/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
import os
from pathlib import Path
from types import MappingProxyType
from typing import Dict, List, Optional, Optional, Tuple

from typing import Dict, List, Optional, Set
import toml
dylanpulver marked this conversation as resolved.
Show resolved Hide resolved
from safety_schemas.models import Ecosystem, FileType


Expand Down Expand Up @@ -52,7 +52,7 @@ def can_handle(self, root: str, file_name: str, include_files: Dict[FileType, Li
return None

@abstractmethod
def download_required_assets(self, session) -> Dict[str, str]:
def download_required_assets(self, session):
"""
Abstract method to download required assets for handling files. Should be implemented
by subclasses.
Expand Down Expand Up @@ -109,12 +109,10 @@ def __init__(self) -> None:
super().__init__()
self.ecosystem = Ecosystem.SAFETY_PROJECT

def download_required_assets(self, session) -> None:
"""
No required assets to download for Safety project files.
"""
def download_required_assets(self, session):
pass


# Mapping of ecosystems to their corresponding file handlers
ECOSYSTEM_HANDLER_MAPPING = MappingProxyType({
Ecosystem.PYTHON: PythonFileHandler,
Expand Down
Loading