diff --git a/tests/pdfalyzer/helpers/test_pdf_object_helper.py b/tests/pdfalyzer/helpers/test_pdf_object_helper.py index 99e7892..03d3129 100644 --- a/tests/pdfalyzer/helpers/test_pdf_object_helper.py +++ b/tests/pdfalyzer/helpers/test_pdf_object_helper.py @@ -1,7 +1,8 @@ from PyPDF2 import PdfReader from PyPDF2.generic import IndirectObject -from pdfalyzer.helpers.pdf_object_helper import PdfObjectRelationship, _sort_pdf_object_refs +from pdfalyzer.helpers.pdf_object_helper import _sort_pdf_object_refs +from pdfalyzer.pdf_object_relationship import PdfObjectRelationship from pdfalyzer.util.adobe_strings import * FONT_IDS = [5, 9, 11, 14, 20, 22, 24] diff --git a/tests/pdfalyzer/lib/binary/test_binary_scanner.py b/tests/pdfalyzer/lib/binary/test_binary_scanner.py deleted file mode 100644 index e959543..0000000 --- a/tests/pdfalyzer/lib/binary/test_binary_scanner.py +++ /dev/null @@ -1,23 +0,0 @@ -def test_quote_extraction_methods(font_info): - _check_matches(font_info.binary_scanner.extract_backtick_quoted_bytes, 163, 52840) - - -def test_front_slash_quoted_bytes_extraction(font_info): - _check_matches(font_info.binary_scanner.extract_front_slash_quoted_bytes, 756, 167814) - - -def test_extract_guillemet(font_info): - _check_matches(font_info.binary_scanner.extract_guillemet_quoted_bytes, 59, 78763) - - -def _check_matches(match_iterator, expected_matches: int, expected_bytes: int) -> None: - quoted_bytes_found = 0 - quoted_sections_found = 0 - - for quoted_bytes, _decoder in match_iterator(): - quoted_bytes_found += quoted_bytes.match_length - quoted_sections_found += 1 - - print(f"sections: {quoted_sections_found}, bytes: {quoted_bytes_found}") - assert quoted_sections_found == expected_matches - assert quoted_bytes_found == expected_bytes diff --git a/tests/pdfalyzer/lib/detection/test_encoding_detector.py b/tests/pdfalyzer/lib/detection/test_encoding_detector.py deleted file mode 100644 index 53d1146..0000000 --- a/tests/pdfalyzer/lib/detection/test_encoding_detector.py +++ /dev/null @@ -1,11 +0,0 @@ -import pytest -from rich.text import Text - - -@pytest.fixture -def hebrew_win_1255(): - return { - 'encoding': 'Windows-1255', - 'language': 'Hebrew', - 'confidence': 0.62538832, - } diff --git a/tests/pdfalyzer/lib/detection/test_javascript_hunter.py b/tests/pdfalyzer/lib/detection/test_javascript_hunter.py deleted file mode 100644 index 5f42202..0000000 --- a/tests/pdfalyzer/lib/detection/test_javascript_hunter.py +++ /dev/null @@ -1,11 +0,0 @@ -from pdfalyzer.detection.javascript_hunter import JavascriptHunter - -TEST_STRING = 'export then gracefully exit before finally rising to the moon' - - -def test_count_js_keywords_in_text(): - assert JavascriptHunter.count_js_keywords_in_text(TEST_STRING) == 3 - - -def test_js_keyword_matches(): - assert JavascriptHunter.js_keyword_matches(TEST_STRING) == ['export', 'for', 'final'] diff --git a/tests/pdfalyzer/lib/helpers/test_pdf_object_helper.py b/tests/pdfalyzer/lib/helpers/test_pdf_object_helper.py deleted file mode 100644 index 03d3129..0000000 --- a/tests/pdfalyzer/lib/helpers/test_pdf_object_helper.py +++ /dev/null @@ -1,50 +0,0 @@ -from PyPDF2 import PdfReader -from PyPDF2.generic import IndirectObject - -from pdfalyzer.helpers.pdf_object_helper import _sort_pdf_object_refs -from pdfalyzer.pdf_object_relationship import PdfObjectRelationship -from pdfalyzer.util.adobe_strings import * - -FONT_IDS = [5, 9, 11, 14, 20, 22, 24] -ANNOTS_IDS = [13, 19] + [i for i in range(26, 54)] -EXT_G_STATE_IDS = [7, 8] - - -def test_get_references(analyzing_malicious_documents_pdf_path): - pdf_file = open(analyzing_malicious_documents_pdf_path, 'rb') - pdf_reader = PdfReader(pdf_file) - pdf_obj = IndirectObject(3, 0, pdf_reader) - - direct_refs = [ - PdfObjectRelationship(pdf_obj, IndirectObject(2, 0, pdf_reader), PARENT, PARENT), - PdfObjectRelationship(pdf_obj, IndirectObject(4, 0, pdf_reader), CONTENTS, CONTENTS), - ] - - ext_g_state_refs = [ - PdfObjectRelationship( - pdf_obj, - IndirectObject(id, 0, pdf_reader), - RESOURCES, - f"{RESOURCES}[{EXT_G_STATE}][/GS{id}]" - ) - for id in EXT_G_STATE_IDS - ] - - font_refs = [ - PdfObjectRelationship( - pdf_obj, - IndirectObject(id, 0, pdf_reader), - RESOURCES, - f"{RESOURCES}[{FONT}][/F{i + 1}]" - ) - for i, id in enumerate(FONT_IDS) - ] - - annots_refs = [ - PdfObjectRelationship(pdf_obj, IndirectObject(id, 0, pdf_reader), ANNOTS, ANNOTS + f"[{i}]") - for i, id in enumerate(ANNOTS_IDS) - ] - - expected_references = _sort_pdf_object_refs(direct_refs + ext_g_state_refs + font_refs + annots_refs) - assert _sort_pdf_object_refs(PdfObjectRelationship.get_references(pdf_obj.get_object())) == expected_references - pdf_file.close diff --git a/tests/pdfalyzer/lib/test_pdf_parser_manager.py b/tests/pdfalyzer/lib/test_pdf_parser_manager.py deleted file mode 100644 index 108413e..0000000 --- a/tests/pdfalyzer/lib/test_pdf_parser_manager.py +++ /dev/null @@ -1,6 +0,0 @@ -from pdfalyzer.util.pdf_parser_manager import PdfParserManager - - -def test_pdf_parser_manager(analyzing_malicious_documents_pdf_path): - pdf_parser_manager = PdfParserManager(analyzing_malicious_documents_pdf_path) - assert pdf_parser_manager.object_ids_containing_stream_data == [4, 71, 411, 412, 416, 419, 421, 423, 424, 426] diff --git a/tests/pdfalyzer/lib/test_pdf_walker.py b/tests/pdfalyzer/lib/test_pdf_walker.py deleted file mode 100644 index 458ea6f..0000000 --- a/tests/pdfalyzer/lib/test_pdf_walker.py +++ /dev/null @@ -1,24 +0,0 @@ -import pytest - -from pdfalyzer.util.pdf_parser_manager import PdfParserManager - - -class TestPdfalyzer: - def test_struct_elem_parent(self, analyzing_malicious_documents_pdfalyzer): - struct_elem_node = analyzing_malicious_documents_pdfalyzer.find_node_by_idnum(120) - assert struct_elem_node.parent.idnum == 119 - - def test_all_nodes_in_tree(self, analyzing_malicious_documents_pdfalyzer, analyzing_malicious_documents_pdf_path): - for object_id in PdfParserManager(analyzing_malicious_documents_pdf_path).object_ids: - if object_id == 71: - # 71 is the ID of the object stream holding many of the /StructElem - continue - elif object_id == 67: - # 67 is an object without any references or data - continue - elif object_id == 426: - # 426 is a Cross-reference stream containing the same info as the trailer - continue - - node = analyzing_malicious_documents_pdfalyzer.find_node_by_idnum(object_id) - assert node is not None, f"Expected {object_id} to appear in tree." diff --git a/tests/pdfalyzer/lib/util/test_dict_helper.py b/tests/pdfalyzer/lib/util/test_dict_helper.py deleted file mode 100644 index 43936e0..0000000 --- a/tests/pdfalyzer/lib/util/test_dict_helper.py +++ /dev/null @@ -1,9 +0,0 @@ -from pdfalyzer.helpers.dict_helper import get_dict_key_by_value - - -def test_get_dict_key_by_value(): - arr = [1, 2, 3] - hsh = {'a': 1, 'b': b'BYTES', 1: arr} - assert get_dict_key_by_value(hsh, 1) == 'a' - assert get_dict_key_by_value(hsh, b'BYTES') == 'b' - assert get_dict_key_by_value(hsh, arr) == 1 diff --git a/tests/pdfalyzer/lib/util/test_string_utils.py b/tests/pdfalyzer/lib/util/test_string_utils.py deleted file mode 100644 index e69de29..0000000