Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Also check Python scripts with a valid shebang #732

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 90 additions & 0 deletions prospector/identify.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# Largely inspied by https://github.com/pre-commit/identify/blob/main/identify/identify.py#L178
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# Largely inspied by https://github.com/pre-commit/identify/blob/main/identify/identify.py#L178
# Largely inspired by https://github.com/pre-commit/identify/blob/main/identify/identify.py#L178


import errno
import os
import shlex
import string
from pathlib import Path
from typing import IO

printable = frozenset(string.printable)


def _shebang_split(line: str) -> tuple[str, ...]:
try:
# shebangs aren't supposed to be quoted, though some tools such as
# setuptools will write them with quotes so we'll best-guess parse
# with shlex first
return tuple(shlex.split(line))
except ValueError:
# failing that, we'll do a more "traditional" shebang parsing which
# just involves splitting by whitespace
return tuple(line.split())


def _parse_nix_shebang(
bytes_io: IO[bytes],
cmd: tuple[str, ...],
) -> tuple[str, ...]:
while bytes_io.read(2) == b"#!":
next_line_b = bytes_io.readline()
try:
next_line = next_line_b.decode("UTF-8")
except UnicodeDecodeError:
return cmd

for c in next_line:
if c not in printable:
return cmd

line_tokens = _shebang_split(next_line.strip())
for i, token in enumerate(line_tokens[:-1]):
if token != "-i": # noqa: S105
continue
# The argument to -i flag
cmd = (line_tokens[i + 1],)
return cmd


def _parse_shebang(bytes_io: IO[bytes]) -> tuple[str, ...]:
"""Parse the shebang from a file opened for reading binary."""
if bytes_io.read(2) != b"#!":
return ()
first_line_b = bytes_io.readline()
try:
first_line = first_line_b.decode("UTF-8")
except UnicodeDecodeError:
return ()

# Require only printable ascii
for c in first_line:
if c not in printable:
return ()

cmd = _shebang_split(first_line.strip())
if cmd[:2] == ("/usr/bin/env", "-S"):
cmd = cmd[2:]
elif cmd[:1] == ("/usr/bin/env",):
cmd = cmd[1:]

if cmd == ("nix-shell",):
return _parse_nix_shebang(bytes_io, cmd)

return cmd


def parse_shebang_from_file(path: Path) -> tuple[str, ...]:
"""Parse the shebang given a file path."""
if not path.exists():
raise ValueError(f"{path} does not exist.")
if not os.access(path, os.X_OK):
return ()

try:
with path.open("rb") as f:
return _parse_shebang(f)
except OSError as e:
if e.errno == errno.EINVAL:
return ()
else:
raise
18 changes: 16 additions & 2 deletions prospector/pathutils.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,28 @@
import mimetypes
import os
import re
from pathlib import Path

from prospector import identify

_PYTHON_COMMAND_RE = re.compile(r"^python[0-9]?$")


def is_python_package(path: Path) -> bool:
return path.is_dir() and (path / "__init__.py").exists()


def is_python_module(path: Path) -> bool:
# TODO: is this too simple?
return path.suffix == ".py"
mimetype, encoding = mimetypes.guess_type(path)
del encoding
if mimetype == "text/x-python":
return True

executor = identify.parse_shebang_from_file(path)
if executor is not None and len(executor) > 0:
return _PYTHON_COMMAND_RE.match(Path(executor[0]).name) is not None

return False


def is_virtualenv(path: Path) -> bool:
Expand Down