diff --git a/fact_extractor/plugins/unpacking/arj/__init__.py b/fact_extractor/plugins/unpacking/arj/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/fact_extractor/plugins/unpacking/arj/code/__init__.py b/fact_extractor/plugins/unpacking/arj/code/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/fact_extractor/plugins/unpacking/arj/code/arj.py b/fact_extractor/plugins/unpacking/arj/code/arj.py deleted file mode 100644 index 761de2fc..00000000 --- a/fact_extractor/plugins/unpacking/arj/code/arj.py +++ /dev/null @@ -1,30 +0,0 @@ -from os import symlink -from pathlib import Path -from tempfile import TemporaryDirectory - -from common_helper_process import execute_shell_command - -NAME = 'ARJ' -MIME_PATTERNS = ['application/x-arj'] -VERSION = '0.1' - - -def unpack_function(file_path, tmp_dir): - """ - Extract arj files - Since the arj binary only works correct when files end with .arj, this is taken care of - """ - with TemporaryDirectory() as staging_dir: - staged_path = str(Path(staging_dir) / '{}.arj'.format(Path(file_path).name)) - symlink(file_path, staged_path) - output = execute_shell_command( - 'arj x -r -y {} {}'.format(staged_path, tmp_dir), timeout=600 - ) - - return {'output': output} - - -# ----> Do not edit below this line <---- -def setup(unpack_tool): - for item in MIME_PATTERNS: - unpack_tool.register_plugin(item, (unpack_function, NAME, VERSION)) diff --git a/fact_extractor/plugins/unpacking/arj/test/__init__.py b/fact_extractor/plugins/unpacking/arj/test/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/fact_extractor/plugins/unpacking/arj/test/data/test.arj b/fact_extractor/plugins/unpacking/arj/test/data/test.arj deleted file mode 100644 index 5f56bec3..00000000 Binary files a/fact_extractor/plugins/unpacking/arj/test/data/test.arj and /dev/null differ diff --git a/fact_extractor/plugins/unpacking/arj/test/test_arj.py b/fact_extractor/plugins/unpacking/arj/test/test_arj.py deleted file mode 100644 index 38f7223a..00000000 --- a/fact_extractor/plugins/unpacking/arj/test/test_arj.py +++ /dev/null @@ -1,24 +0,0 @@ -from pathlib import Path - -from test.unit.unpacker.test_unpacker import TestUnpackerBase - -TEST_FILE = Path(__file__).parent / 'data' / 'test.arj' - - -class TestArjUnpacker(TestUnpackerBase): - - def test_unpacker_selection_generic(self): - self.check_unpacker_selection('application/x-arj', 'ARJ') - - def test_extraction(self): - files, meta_data = self.unpacker.extract_files_from_file(str(TEST_FILE), self.tmp_dir.name) - - assert len(set(files)) == 2, 'file number incorrect' - - assert all( - any( - Path(extracted).name == file for extracted in files - ) for file in ['testfile1', 'testfile2'] - ) - - assert 'output' in meta_data diff --git a/fact_extractor/plugins/unpacking/sevenz/code/sevenz.py b/fact_extractor/plugins/unpacking/sevenz/code/sevenz.py index 87fd7753..110f7439 100644 --- a/fact_extractor/plugins/unpacking/sevenz/code/sevenz.py +++ b/fact_extractor/plugins/unpacking/sevenz/code/sevenz.py @@ -14,6 +14,7 @@ # compressed archives 'application/rar', 'application/x-7z-compressed', + 'application/x-arj', 'application/x-iso9660-image', 'application/x-lzma', 'application/x-rar', @@ -31,7 +32,7 @@ 'filesystem/hfs', 'filesystem/ntfs', ] -VERSION = '0.8.2' +VERSION = '0.9.0' UNPACKER_EXECUTABLE = '7z' diff --git a/fact_extractor/plugins/unpacking/sevenz/install.sh b/fact_extractor/plugins/unpacking/sevenz/install.sh index 6e7d71c7..21f39c89 100755 --- a/fact_extractor/plugins/unpacking/sevenz/install.sh +++ b/fact_extractor/plugins/unpacking/sevenz/install.sh @@ -1,31 +1,29 @@ #!/usr/bin/env bash -set -e +set -euo pipefail cd "$( dirname "${BASH_SOURCE[0]}" )" echo "------------------------------------" -echo " install p7z from source " +echo " install 7z " echo "------------------------------------" - -# install newest version of p7zip -sudo apt-get remove -y p7zip-full +VERSION="2407" +ARCH=$(uname -m) +if [[ $ARCH == "x86_64" ]]; then + ARCH_SUFFIX="x64" +elif [[ $ARCH == "aarch64" ]]; then + ARCH_SUFFIX="arm64" +else + echo "unsupported architecture ${ARCH}" + exit 1 +fi +FILE="7z${VERSION}-linux-${ARCH_SUFFIX}.tar.xz" mkdir -p /tmp/fact_build cd /tmp/fact_build - -wget -O 7zip.tar.bz2 https://sourceforge.net/projects/p7zip/files/latest/download -# remove possible artifacts from previous installation (: == NOP) -rm -rf ./p7zip* || : -tar xvjf 7zip.tar.bz2 -cd p7zip* -# gcc >= 11 has -Wnarrowing as default flag which leads to an error during compilation -# g++ will try to use standard C++17 but the code is not compatible -> use C++14 -sed -i 's/CXXFLAGS=-c -I. \\/CXXFLAGS=-c -I. -Wno-narrowing -std=c++14 \\/g' makefile.glb || echo "Warning: Could not apply makefile patch" -cp makefile.linux_any_cpu makefile.machine -make -j"$(nproc)" all3 -sudo ./install.sh -cd .. -rm -fr p7zip* 7zip.tar.bz2 +wget "https://www.7-zip.org/a/${FILE}" +tar xvf "${FILE}" 7zzs +sudo mv 7zzs /usr/local/bin/7z +rm "${FILE}" exit 0 diff --git a/fact_extractor/plugins/unpacking/sevenz/test/data/test.arj b/fact_extractor/plugins/unpacking/sevenz/test/data/test.arj new file mode 100644 index 00000000..2c0ec026 Binary files /dev/null and b/fact_extractor/plugins/unpacking/sevenz/test/data/test.arj differ diff --git a/fact_extractor/plugins/unpacking/sevenz/test/test_plugin_sevenz.py b/fact_extractor/plugins/unpacking/sevenz/test/test_plugin_sevenz.py index c1b391ae..aca0e9b7 100644 --- a/fact_extractor/plugins/unpacking/sevenz/test/test_plugin_sevenz.py +++ b/fact_extractor/plugins/unpacking/sevenz/test/test_plugin_sevenz.py @@ -19,6 +19,7 @@ def test_unpacker_selection_generic(self): [ ('test.7z', 'get_files_test'), ('test.rar', 'get_files_test'), + ('test.arj', 'get_files_test'), ('cramfs.img', ''), ('test.iso', ''), ] diff --git a/fact_extractor/plugins/unpacking/sfx/code/sfx.py b/fact_extractor/plugins/unpacking/sfx/code/sfx.py index c3b0db43..e9ae6553 100644 --- a/fact_extractor/plugins/unpacking/sfx/code/sfx.py +++ b/fact_extractor/plugins/unpacking/sfx/code/sfx.py @@ -1,10 +1,28 @@ +from __future__ import annotations + from pathlib import Path from plugins.unpacking.sevenz.code.sevenz import unpack_function as sevenz NAME = 'SFX' -MIME_PATTERNS = ['application/x-executable', 'application/x-dosexec'] -VERSION = '0.1' +MIME_PATTERNS = [ + 'application/x-dosexec', + 'application/x-executable', + 'application/x-pie-executable', +] +VERSION = '0.2.0' + +EXCLUDED_FILE_NAMES_1 = {'.bss', '.data', '.text'} +EXCLUDED_FILE_NAMES_2 = {str(i) for i in range(20)} + + +def _extraction_result_is_invalid(extraction_dir: Path) -> bool: + extracted_files = [f.name for f in extraction_dir.iterdir()] + if any(f in EXCLUDED_FILE_NAMES_1 for f in extracted_files): + return True + if all(f in EXCLUDED_FILE_NAMES_2 for f in extracted_files): + return True + return False def unpack_function(file_path, tmp_dir): @@ -12,12 +30,12 @@ def unpack_function(file_path, tmp_dir): extraction_dir = Path(tmp_dir) - for child_path in extraction_dir.iterdir(): - if child_path.name in ['.text', '.data']: - clean_directory(extraction_dir) - meta['output'] = 'Normal executable files will not be extracted.' \ - '\n\nPlease report if it\'s a self extracting archive' - break + if _extraction_result_is_invalid(extraction_dir): + clean_directory(extraction_dir) + meta['output'] = ( + 'Normal executable files will not be extracted.' + "\n\nPlease report if it's a self extracting archive" + ) return meta diff --git a/fact_extractor/plugins/unpacking/sfx/test/data/no_section_header.elf b/fact_extractor/plugins/unpacking/sfx/test/data/no_section_header.elf new file mode 100755 index 00000000..5efd0dcf Binary files /dev/null and b/fact_extractor/plugins/unpacking/sfx/test/data/no_section_header.elf differ diff --git a/fact_extractor/plugins/unpacking/sfx/test/test_sfx.py b/fact_extractor/plugins/unpacking/sfx/test/test_sfx.py index abb197a0..bb95b865 100644 --- a/fact_extractor/plugins/unpacking/sfx/test/test_sfx.py +++ b/fact_extractor/plugins/unpacking/sfx/test/test_sfx.py @@ -1,7 +1,8 @@ -import os +from pathlib import Path + from test.unit.unpacker.test_unpacker import TestUnpackerBase -TEST_DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data') +TEST_DATA_DIR = Path(__file__).parent / 'data' class TestSfxUnpacker(TestUnpackerBase): @@ -11,15 +12,21 @@ def test_unpacker_selection_generic(self): self.check_unpacker_selection(mime, 'SFX') def test_normal_elf_is_skipped(self): - files, meta_data = self.unpacker.extract_files_from_file(os.path.join(TEST_DATA_DIR, 'test_elf_normal'), self.tmp_dir.name) - assert not files, 'no file should be extracted' - assert 'will not be extracted' in meta_data['output'] + self._assert_not_unpacked(TEST_DATA_DIR / 'test_elf_normal') def test_normal_pe_with_rsrc_directory(self): - files, meta_data = self.unpacker.extract_files_from_file(os.path.join(TEST_DATA_DIR, 'test_rsrc'), self.tmp_dir.name) + self._assert_not_unpacked(TEST_DATA_DIR / 'test_rsrc') + + def test_no_section_headers(self): + self._assert_not_unpacked(TEST_DATA_DIR / 'no_section_header.elf') + + def _assert_not_unpacked(self, test_file: Path): + files, meta_data = self.unpacker.extract_files_from_file(test_file, self.tmp_dir.name) assert not files, 'no file should be extracted' assert 'will not be extracted' in meta_data['output'] - def test_with_self_extracting_archives(self): - self.check_unpacking_of_standard_unpack_set(os.path.join(TEST_DATA_DIR, 'test_elf_sfx'), additional_prefix_folder='get_files_test', output=True) - self.check_unpacking_of_standard_unpack_set(os.path.join(TEST_DATA_DIR, 'test_pe_sfx'), additional_prefix_folder='get_files_test', output=True) + def test_self_extracting_archives(self): + for file in ['test_elf_sfx', 'test_pe_sfx']: + self.check_unpacking_of_standard_unpack_set( + TEST_DATA_DIR / file, additional_prefix_folder='get_files_test', output=True + ) diff --git a/fact_extractor/test/data/container/broken.zip b/fact_extractor/test/data/container/broken.zip index 9ba473fa..e2067fa3 100644 Binary files a/fact_extractor/test/data/container/broken.zip and b/fact_extractor/test/data/container/broken.zip differ diff --git a/fact_extractor/unpacker/unpackBase.py b/fact_extractor/unpacker/unpackBase.py index fb0d629f..38baa0ff 100644 --- a/fact_extractor/unpacker/unpackBase.py +++ b/fact_extractor/unpacker/unpackBase.py @@ -1,5 +1,8 @@ +from __future__ import annotations + import logging from os import getgid, getuid +from pathlib import Path from subprocess import PIPE, Popen from time import time import fnmatch @@ -49,9 +52,9 @@ def get_unpacker(self, mime_type: str): else: return self.unpacker_plugins['generic/carver'] - def extract_files_from_file(self, file_path: str, tmp_dir) -> Tuple[List, Dict]: + def extract_files_from_file(self, file_path: str | Path, tmp_dir) -> Tuple[List, Dict]: current_unpacker = self.get_unpacker(get_file_type_from_path(file_path)['mime']) - return self._extract_files_from_file_using_specific_unpacker(file_path, tmp_dir, current_unpacker) + return self._extract_files_from_file_using_specific_unpacker(str(file_path), tmp_dir, current_unpacker) def unpacking_fallback(self, file_path, tmp_dir, old_meta, fallback_plugin_mime) -> Tuple[List, Dict]: fallback_plugin = self.unpacker_plugins[fallback_plugin_mime]