Skip to content

Commit

Permalink
importlib fix for zipped pkg; catch stream decode errors; handle /Nam…
Browse files Browse the repository at this point in the history
…es better
  • Loading branch information
ashariyar committed Oct 13, 2022
1 parent 06f28dc commit 262762a
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 6 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# NEXT RELEASE

# 1.10.8
* Fix `importlib.resources` usage in case there's a zip file
* `/Names` is an indeterminate reference type
* Catch stream decode exceptions and show error instead of failing.

# 1.10.7
* Improve the handling of ColorSpace and Resources nodes

Expand Down
10 changes: 9 additions & 1 deletion pdfalyzer/decorators/pdf_tree_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from anytree import NodeMixin, SymlinkNode
from PyPDF2.generic import (DictionaryObject, EncodedStreamObject, IndirectObject, NumberObject, PdfObject,
StreamObject)
from PyPDF2.errors import PdfReadError
from rich.markup import escape
from rich.panel import Panel
from rich.table import Table
Expand Down Expand Up @@ -53,7 +54,14 @@ def __init__(self, obj: PdfObject, address: str, idnum: int):
self.all_references_processed = False

if isinstance(obj, StreamObject):
self.stream_data = self.obj.get_data()
try:
self.stream_data = self.obj.get_data()
except PdfReadError as e:
msg = f"Failed to decode stream: {e}"
self.stream_data = msg
log.error(msg)
console.print_exception()

self.stream_length = len(self.stream_data)
else:
self.stream_data = None
Expand Down
23 changes: 19 additions & 4 deletions pdfalyzer/detection/yaralyzer_helper.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,32 @@
"""
Class to help with the pre-configured YARA rules in /yara.
"""
import importlib.resources
from importlib.resources import as_file, files
from typing import Optional, Union

from yaralyzer.yaralyzer import Yaralyzer

YARA_RULES_DIR = importlib.resources.path('pdfalyzer', 'yara_rules')
YARA_RULES_DIR = files('pdfalyzer').joinpath('yara_rules')

YARA_RULES_FILES = [
'lprat.static_file_analysis.yara',
'PDF.yara',
'PDF_binary_stream.yara',
]

def get_file_yaralyzer(file_path_to_scan: str) -> Yaralyzer:
"""Get a yaralyzer for a file path"""
return Yaralyzer.for_rules_dirs([str(YARA_RULES_DIR)], file_path_to_scan)
return _build_yaralyzer(file_path_to_scan)


def get_bytes_yaralyzer(scannable: bytes, label: str) -> Yaralyzer:
return Yaralyzer.for_rules_dirs([str(YARA_RULES_DIR)], scannable, label)
return _build_yaralyzer(scannable, label)


def _build_yaralyzer(scannable: Union[bytes, str], label: Optional[str] = None) -> Yaralyzer:
# TODO: ugh this sucks (handling to extract .yara files from a python pkg zip)
with as_file(YARA_RULES_DIR.joinpath(YARA_RULES_FILES[0])) as yara0:
with as_file(YARA_RULES_DIR.joinpath(YARA_RULES_FILES[1])) as yara1:
with as_file(YARA_RULES_DIR.joinpath(YARA_RULES_FILES[2])) as yara2:
rules_paths = [str(y) for y in [yara0, yara1, yara2]]
return Yaralyzer.for_rules_files(rules_paths, scannable, label)
2 changes: 2 additions & 0 deletions pdfalyzer/util/adobe_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
K = '/K' # Equivalent of /Kids for /StructElem
KIDS = PagesAttributes.KIDS
LAST = '/Last'
NAMES = '/Names'
NEXT = '/Next'
NUMS = '/Nums'
OBJECT_STREAM = '/ObjStm'
Expand Down Expand Up @@ -104,6 +105,7 @@
FIELDS, # At least for /AcroForm
FIRST,
FONT,
NAMES,
OPEN_ACTION,
P, # At least for widgets...
RESOURCES,
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "pdfalyzer"
version = "1.10.7"
version = "1.10.8"
description = "A PDF analysis toolkit. Scan a PDF with relevant YARA rules, visualize its inner tree-like data structure in living color (lots of colors), force decodes of suspicious font binaries, and more."
authors = ["Michel de Cryptadamus <[email protected]>"]
license = "GPL-3.0-or-later"
Expand Down

0 comments on commit 262762a

Please sign in to comment.