From 5ab9be2733abdccf49574d8750255a331deb02fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Stucke?= Date: Mon, 29 Apr 2024 11:22:19 +0200 Subject: [PATCH 1/6] updated 7z installation to v24.03 --- .../plugins/unpacking/sevenz/code/sevenz.py | 4 ++-- .../plugins/unpacking/sevenz/install.sh | 24 +++++-------------- 2 files changed, 8 insertions(+), 20 deletions(-) diff --git a/fact_extractor/plugins/unpacking/sevenz/code/sevenz.py b/fact_extractor/plugins/unpacking/sevenz/code/sevenz.py index 87fd7753..41a982e4 100644 --- a/fact_extractor/plugins/unpacking/sevenz/code/sevenz.py +++ b/fact_extractor/plugins/unpacking/sevenz/code/sevenz.py @@ -31,9 +31,9 @@ 'filesystem/hfs', 'filesystem/ntfs', ] -VERSION = '0.8.2' +VERSION = '0.9.0' -UNPACKER_EXECUTABLE = '7z' +UNPACKER_EXECUTABLE = '7zzs' # Empty password must be first in list to correctly detect if archive has no password PW_LIST = [""] diff --git a/fact_extractor/plugins/unpacking/sevenz/install.sh b/fact_extractor/plugins/unpacking/sevenz/install.sh index 736e3ed2..1a248e3f 100755 --- a/fact_extractor/plugins/unpacking/sevenz/install.sh +++ b/fact_extractor/plugins/unpacking/sevenz/install.sh @@ -4,28 +4,16 @@ set -e cd "$( dirname "${BASH_SOURCE[0]}" )" echo "------------------------------------" -echo " install p7z from source " +echo " install 7z " echo "------------------------------------" -# install newest version of p7zip -sudo apt-get remove -y p7zip-full - +# install newest version of 7z mkdir -p /tmp/fact_build cd /tmp/fact_build - -wget -O 7zip.tar.bz2 https://sourceforge.net/projects/p7zip/files/latest/download -# remove possible artifacts from previous installation (: == NOP) -rm -rf ./p7zip* || : -tar xvjf 7zip.tar.bz2 -cd p7zip* -# gcc >= 11 has -Wnarrowing as default flag which leads to an error during compilation -# g++ will try to use standard C++17 but the code is not compatible -> use C++14 -sed -i 's/CXXFLAGS=-c -I. \\/CXXFLAGS=-c -I. -Wno-narrowing -std=c++14 \\/g' makefile.glb || echo "Warning: Could not apply makefile patch" -cp makefile.linux_amd64_asm makefile.machine -make -j"$(nproc)" all3 -sudo ./install.sh -cd .. -rm -fr p7zip* 7zip.tar.bz2 +wget https://www.7-zip.org/a/7z2403-linux-x64.tar.xz +tar xvf 7z2403-linux-x64.tar.xz 7zzs +mv 7zzs /usr/local/bin/ +rm 7z2403-linux-x64.tar.xz exit 0 From 3fb5ad0d4552e16df93bec6dc7e77ecfc9cc1130 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Stucke?= Date: Mon, 29 Apr 2024 11:23:33 +0200 Subject: [PATCH 2/6] replaced arj unpacker with 7z --- .../plugins/unpacking/arj/__init__.py | 0 .../plugins/unpacking/arj/code/__init__.py | 0 .../plugins/unpacking/arj/code/arj.py | 30 ------------------ .../plugins/unpacking/arj/test/__init__.py | 0 .../plugins/unpacking/arj/test/data/test.arj | Bin 332 -> 0 bytes .../plugins/unpacking/arj/test/test_arj.py | 24 -------------- .../plugins/unpacking/sevenz/code/sevenz.py | 1 + .../unpacking/sevenz/test/data/test.arj | Bin 0 -> 436 bytes .../sevenz/test/test_plugin_sevenz.py | 1 + 9 files changed, 2 insertions(+), 54 deletions(-) delete mode 100644 fact_extractor/plugins/unpacking/arj/__init__.py delete mode 100644 fact_extractor/plugins/unpacking/arj/code/__init__.py delete mode 100644 fact_extractor/plugins/unpacking/arj/code/arj.py delete mode 100644 fact_extractor/plugins/unpacking/arj/test/__init__.py delete mode 100644 fact_extractor/plugins/unpacking/arj/test/data/test.arj delete mode 100644 fact_extractor/plugins/unpacking/arj/test/test_arj.py create mode 100644 fact_extractor/plugins/unpacking/sevenz/test/data/test.arj diff --git a/fact_extractor/plugins/unpacking/arj/__init__.py b/fact_extractor/plugins/unpacking/arj/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/fact_extractor/plugins/unpacking/arj/code/__init__.py b/fact_extractor/plugins/unpacking/arj/code/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/fact_extractor/plugins/unpacking/arj/code/arj.py b/fact_extractor/plugins/unpacking/arj/code/arj.py deleted file mode 100644 index 761de2fc..00000000 --- a/fact_extractor/plugins/unpacking/arj/code/arj.py +++ /dev/null @@ -1,30 +0,0 @@ -from os import symlink -from pathlib import Path -from tempfile import TemporaryDirectory - -from common_helper_process import execute_shell_command - -NAME = 'ARJ' -MIME_PATTERNS = ['application/x-arj'] -VERSION = '0.1' - - -def unpack_function(file_path, tmp_dir): - """ - Extract arj files - Since the arj binary only works correct when files end with .arj, this is taken care of - """ - with TemporaryDirectory() as staging_dir: - staged_path = str(Path(staging_dir) / '{}.arj'.format(Path(file_path).name)) - symlink(file_path, staged_path) - output = execute_shell_command( - 'arj x -r -y {} {}'.format(staged_path, tmp_dir), timeout=600 - ) - - return {'output': output} - - -# ----> Do not edit below this line <---- -def setup(unpack_tool): - for item in MIME_PATTERNS: - unpack_tool.register_plugin(item, (unpack_function, NAME, VERSION)) diff --git a/fact_extractor/plugins/unpacking/arj/test/__init__.py b/fact_extractor/plugins/unpacking/arj/test/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/fact_extractor/plugins/unpacking/arj/test/data/test.arj b/fact_extractor/plugins/unpacking/arj/test/data/test.arj deleted file mode 100644 index 5f56bec32687f6780663e17fb465f2751fbd87ad..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 332 zcmYdzrNf}a&B!Fc!1SqNEmtfEGhl#{)Z!Ao#G)()hWWP@CowQ2yb5B_1L^`Qtk4ck zjFAB{K={Xzb9IsoTLd8n6rVg41Cj;tKqly?B$g!Vr>B<0r)B1(7RQ6dKm>?u#K7?W zmyJ3DLr6wuu>ug4WTYw-rzYp;r6_>ZD1a29+iM3j8HA4!BEdg&zLJLwWv5VKhH`bpeR4RC^1*TKo1PL5?(Pd F004xYUS|LR diff --git a/fact_extractor/plugins/unpacking/arj/test/test_arj.py b/fact_extractor/plugins/unpacking/arj/test/test_arj.py deleted file mode 100644 index 38f7223a..00000000 --- a/fact_extractor/plugins/unpacking/arj/test/test_arj.py +++ /dev/null @@ -1,24 +0,0 @@ -from pathlib import Path - -from test.unit.unpacker.test_unpacker import TestUnpackerBase - -TEST_FILE = Path(__file__).parent / 'data' / 'test.arj' - - -class TestArjUnpacker(TestUnpackerBase): - - def test_unpacker_selection_generic(self): - self.check_unpacker_selection('application/x-arj', 'ARJ') - - def test_extraction(self): - files, meta_data = self.unpacker.extract_files_from_file(str(TEST_FILE), self.tmp_dir.name) - - assert len(set(files)) == 2, 'file number incorrect' - - assert all( - any( - Path(extracted).name == file for extracted in files - ) for file in ['testfile1', 'testfile2'] - ) - - assert 'output' in meta_data diff --git a/fact_extractor/plugins/unpacking/sevenz/code/sevenz.py b/fact_extractor/plugins/unpacking/sevenz/code/sevenz.py index 41a982e4..6181d813 100644 --- a/fact_extractor/plugins/unpacking/sevenz/code/sevenz.py +++ b/fact_extractor/plugins/unpacking/sevenz/code/sevenz.py @@ -14,6 +14,7 @@ # compressed archives 'application/rar', 'application/x-7z-compressed', + 'application/x-arj', 'application/x-iso9660-image', 'application/x-lzma', 'application/x-rar', diff --git a/fact_extractor/plugins/unpacking/sevenz/test/data/test.arj b/fact_extractor/plugins/unpacking/sevenz/test/data/test.arj new file mode 100644 index 0000000000000000000000000000000000000000..2c0ec0266f9279fccf18a9efb3c17a96b598294c GIT binary patch literal 436 zcmYdzrNf}a&B!Fcz{I&YQ9liY88AReYH^8PVo??YL*|wCRSXOXui_Z=fVzMR7ytP1 zJyZn90AcpRLo?+VmIy)&CvVkQc=Rd@Krb>lz|~6BUPa!BeN()0j@$Z;gtu9-F86BKv>%R(o250-37Sq z1{npi%aDPg>)(n#1_rpKECFUv^=z$>@x_e}RZUNyRN6yue Date: Mon, 29 Apr 2024 13:30:51 +0200 Subject: [PATCH 3/6] sfx unpacker: don't unpack binaries without section headers --- .../plugins/unpacking/sfx/code/sfx.py | 34 +++++++++++++----- .../sfx/test/data/no_section_header.elf | Bin 0 -> 2816 bytes .../plugins/unpacking/sfx/test/test_sfx.py | 25 ++++++++----- fact_extractor/unpacker/unpackBase.py | 7 ++-- 4 files changed, 47 insertions(+), 19 deletions(-) create mode 100755 fact_extractor/plugins/unpacking/sfx/test/data/no_section_header.elf diff --git a/fact_extractor/plugins/unpacking/sfx/code/sfx.py b/fact_extractor/plugins/unpacking/sfx/code/sfx.py index c3b0db43..e9ae6553 100644 --- a/fact_extractor/plugins/unpacking/sfx/code/sfx.py +++ b/fact_extractor/plugins/unpacking/sfx/code/sfx.py @@ -1,10 +1,28 @@ +from __future__ import annotations + from pathlib import Path from plugins.unpacking.sevenz.code.sevenz import unpack_function as sevenz NAME = 'SFX' -MIME_PATTERNS = ['application/x-executable', 'application/x-dosexec'] -VERSION = '0.1' +MIME_PATTERNS = [ + 'application/x-dosexec', + 'application/x-executable', + 'application/x-pie-executable', +] +VERSION = '0.2.0' + +EXCLUDED_FILE_NAMES_1 = {'.bss', '.data', '.text'} +EXCLUDED_FILE_NAMES_2 = {str(i) for i in range(20)} + + +def _extraction_result_is_invalid(extraction_dir: Path) -> bool: + extracted_files = [f.name for f in extraction_dir.iterdir()] + if any(f in EXCLUDED_FILE_NAMES_1 for f in extracted_files): + return True + if all(f in EXCLUDED_FILE_NAMES_2 for f in extracted_files): + return True + return False def unpack_function(file_path, tmp_dir): @@ -12,12 +30,12 @@ def unpack_function(file_path, tmp_dir): extraction_dir = Path(tmp_dir) - for child_path in extraction_dir.iterdir(): - if child_path.name in ['.text', '.data']: - clean_directory(extraction_dir) - meta['output'] = 'Normal executable files will not be extracted.' \ - '\n\nPlease report if it\'s a self extracting archive' - break + if _extraction_result_is_invalid(extraction_dir): + clean_directory(extraction_dir) + meta['output'] = ( + 'Normal executable files will not be extracted.' + "\n\nPlease report if it's a self extracting archive" + ) return meta diff --git a/fact_extractor/plugins/unpacking/sfx/test/data/no_section_header.elf b/fact_extractor/plugins/unpacking/sfx/test/data/no_section_header.elf new file mode 100755 index 0000000000000000000000000000000000000000..5efd0dcfbf03ed1480c9cfa94ba80bb0f9fb9b50 GIT binary patch literal 2816 zcmcImUrbw77(e%x(hjyuFe-IZ@ zT-}lzhLV^VAB=xq+)R8R(JXr~Qc^=P`T)sHNc3SbE-Awq%y1!*QNQo>oYD}M?8T4V z^E==i&#<)FZ_c6Udjn_d%Jv7_OnA^M>L z!?m}=gJ+5My8V}=&kp~4V(!}OM;ez3zxd`S{Vj|Y4@@6hioo-UO}tAVq2K)oeGTYU zcs5EtC^pMapdX#M{c7U%`a}JlwYA<*XM3QB3`D{qG7$6!!aZO@H*YB19t?Z?+Wi60 zTxda^hBhF)|-@gs5O3XXj65>9gNI8vWBwN3EV*mC(zv zur+xCGSTP{_INsD>Oc#aQe~|$7L;l*L;e~X7jCCO%8nz)k{4zfuSbrhF2oo=h#X5? zm|(mTIhMBYIq?1Wn$5ZM)cl=B|5!TN+MI|Hcw=Z{YCIPqal@*SUt}vN#XM~Nk;A4{B_vYV27$={6y}+>4uVR$AIJVo&cX+VMvsz+ zb@4-p%Ov|715&e|g^Lh_bLhNd(g~YQUSCV4CTn5FoRePxrDMi2HK!+=Guk*hxzYVi zm?exA++D(0#m+B2Git2RkExkR?gEN|-b~=D;2t$rq$v1~8q2lD>f3Wa#<7CaP=b@1 z!$}$)PLha?Ex%ZWOajI-@1~;$*pm__PeAF^jB}NYs&7#x&BKjxo`oKb>o4nONvzK{F6qeX}IKELZ}RHIJ=JfA||OQ_Hl#})b5wp@7_R!-;X zLOs-bCfjaAEwY=rpqAyh3RnSt-KeNn93RMr;(Xs?DC$^_q3pS8rAoz>T&mPWZ7(WX zuTrBc6}qygUQz26C-~PZcDDap5dI3}NuLZRTbFTozflG_G6Dl$J`{ml)0AjA8k@mK zmProWbCf69h`$efY*x%UY?%rEKSG(|cGi2~!v4=%svqq>S%Y!?H;TiO=6LofBeDh~ zA$e?s?+eF#j!up{I0~@v$prrni(U+Iy*PIp=Q}yh@b!g$f^$)~SZ@pW7aZEn_A|o$ n;W{!dala+b1KiHl|2v4^T^kn~3gY7zJw literal 0 HcmV?d00001 diff --git a/fact_extractor/plugins/unpacking/sfx/test/test_sfx.py b/fact_extractor/plugins/unpacking/sfx/test/test_sfx.py index abb197a0..bb95b865 100644 --- a/fact_extractor/plugins/unpacking/sfx/test/test_sfx.py +++ b/fact_extractor/plugins/unpacking/sfx/test/test_sfx.py @@ -1,7 +1,8 @@ -import os +from pathlib import Path + from test.unit.unpacker.test_unpacker import TestUnpackerBase -TEST_DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data') +TEST_DATA_DIR = Path(__file__).parent / 'data' class TestSfxUnpacker(TestUnpackerBase): @@ -11,15 +12,21 @@ def test_unpacker_selection_generic(self): self.check_unpacker_selection(mime, 'SFX') def test_normal_elf_is_skipped(self): - files, meta_data = self.unpacker.extract_files_from_file(os.path.join(TEST_DATA_DIR, 'test_elf_normal'), self.tmp_dir.name) - assert not files, 'no file should be extracted' - assert 'will not be extracted' in meta_data['output'] + self._assert_not_unpacked(TEST_DATA_DIR / 'test_elf_normal') def test_normal_pe_with_rsrc_directory(self): - files, meta_data = self.unpacker.extract_files_from_file(os.path.join(TEST_DATA_DIR, 'test_rsrc'), self.tmp_dir.name) + self._assert_not_unpacked(TEST_DATA_DIR / 'test_rsrc') + + def test_no_section_headers(self): + self._assert_not_unpacked(TEST_DATA_DIR / 'no_section_header.elf') + + def _assert_not_unpacked(self, test_file: Path): + files, meta_data = self.unpacker.extract_files_from_file(test_file, self.tmp_dir.name) assert not files, 'no file should be extracted' assert 'will not be extracted' in meta_data['output'] - def test_with_self_extracting_archives(self): - self.check_unpacking_of_standard_unpack_set(os.path.join(TEST_DATA_DIR, 'test_elf_sfx'), additional_prefix_folder='get_files_test', output=True) - self.check_unpacking_of_standard_unpack_set(os.path.join(TEST_DATA_DIR, 'test_pe_sfx'), additional_prefix_folder='get_files_test', output=True) + def test_self_extracting_archives(self): + for file in ['test_elf_sfx', 'test_pe_sfx']: + self.check_unpacking_of_standard_unpack_set( + TEST_DATA_DIR / file, additional_prefix_folder='get_files_test', output=True + ) diff --git a/fact_extractor/unpacker/unpackBase.py b/fact_extractor/unpacker/unpackBase.py index fb0d629f..38baa0ff 100644 --- a/fact_extractor/unpacker/unpackBase.py +++ b/fact_extractor/unpacker/unpackBase.py @@ -1,5 +1,8 @@ +from __future__ import annotations + import logging from os import getgid, getuid +from pathlib import Path from subprocess import PIPE, Popen from time import time import fnmatch @@ -49,9 +52,9 @@ def get_unpacker(self, mime_type: str): else: return self.unpacker_plugins['generic/carver'] - def extract_files_from_file(self, file_path: str, tmp_dir) -> Tuple[List, Dict]: + def extract_files_from_file(self, file_path: str | Path, tmp_dir) -> Tuple[List, Dict]: current_unpacker = self.get_unpacker(get_file_type_from_path(file_path)['mime']) - return self._extract_files_from_file_using_specific_unpacker(file_path, tmp_dir, current_unpacker) + return self._extract_files_from_file_using_specific_unpacker(str(file_path), tmp_dir, current_unpacker) def unpacking_fallback(self, file_path, tmp_dir, old_meta, fallback_plugin_mime) -> Tuple[List, Dict]: fallback_plugin = self.unpacker_plugins[fallback_plugin_mime] From e1382e533c357145620c33cd30f44dca3e260376 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Stucke?= Date: Mon, 29 Apr 2024 13:45:52 +0200 Subject: [PATCH 4/6] 7z installation bug fix --- fact_extractor/plugins/unpacking/sevenz/install.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fact_extractor/plugins/unpacking/sevenz/install.sh b/fact_extractor/plugins/unpacking/sevenz/install.sh index 1a248e3f..d41fb283 100755 --- a/fact_extractor/plugins/unpacking/sevenz/install.sh +++ b/fact_extractor/plugins/unpacking/sevenz/install.sh @@ -13,7 +13,7 @@ mkdir -p /tmp/fact_build cd /tmp/fact_build wget https://www.7-zip.org/a/7z2403-linux-x64.tar.xz tar xvf 7z2403-linux-x64.tar.xz 7zzs -mv 7zzs /usr/local/bin/ +sudo mv 7zzs /usr/local/bin/ rm 7z2403-linux-x64.tar.xz exit 0 From 1829afd8ccb1745256622effa4169bb40efd4957 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Stucke?= Date: Fri, 3 May 2024 16:13:15 +0200 Subject: [PATCH 5/6] fixed generic carver 7z filters + fallback test --- .../generic_carver/code/generic_carver.py | 4 ++-- fact_extractor/test/data/container/broken.zip | Bin 788 -> 50 bytes 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fact_extractor/plugins/unpacking/generic_carver/code/generic_carver.py b/fact_extractor/plugins/unpacking/generic_carver/code/generic_carver.py index a11b7eee..80653f6d 100644 --- a/fact_extractor/plugins/unpacking/generic_carver/code/generic_carver.py +++ b/fact_extractor/plugins/unpacking/generic_carver/code/generic_carver.py @@ -62,7 +62,7 @@ def remove_false_positive_archives(self) -> str: 'application/zip', 'application/zlib', ]: - self._remove_invalid_archives(file_path, '7z l {}', 'ERROR') + self._remove_invalid_archives(file_path, '7zzs l {}', 'ERROR') if file_path.is_file(): self._remove_trailing_data(file_type, file_path) @@ -116,7 +116,7 @@ def _output_is_empty(output): def _find_trailing_data_index_zip(file_path: Path) -> int | None: '''Archives carved by binwalk often have trailing data at the end. 7z can determine the actual file size.''' - output = execute_shell_command(f'7z l {file_path}') + output = execute_shell_command(f'7zzs l {file_path}') if 'There are data after the end of archive' in output: match = REAL_SIZE_REGEX.search(output) if match: diff --git a/fact_extractor/test/data/container/broken.zip b/fact_extractor/test/data/container/broken.zip index 9ba473faa695eb63c1fcb58a1f5f9ec16997cd97..e2067fa32b9b6fc15cbf61e775818f16a0cc23e8 100644 GIT binary patch delta 5 McmbQjW;DSF00ij)j{pDw literal 788 zcmWIWW@Zs#W&naIN&4;}8V2}*jP%r!__WNN)Z+M()Z!BT0Gx{DFcqh#=A{;8CM%@n z=cJ?-f%S61En+V`G*bkq2ZZ$q>IGS@0CIJa)Tt*S*HV&{y zq|GnAv<8}M2gDMX=7Y3>%rn&X)YS|0IDN)*>m#po=e0f0YHtksu$G6hi*4$&C0Y`% zBwtDvw=NYEO@H=0Qj2h(^XIc_38WqJEKo%lA^yA36IvJovAdJUCqmYcuVg(>7 z0eYc0H90>I$qxbEj7;{-xMKroG!O_hymbW87;%GJGbnZ-0Hj$L$VAhO5mz9y5S~Mi zEr?kP4NDp`37Q3uA!ytoJc&J~5N4mmG#ego5L3V|oY9e_@6muB{s?2N7%{vHGsY;u To0Scuk`)LGf$DmIn1KNRe0a+G From c050a78b12971497d5fe6f1c8e48909be8732613 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Stucke?= Date: Thu, 8 Aug 2024 15:17:42 +0200 Subject: [PATCH 6/6] sevenz plugin: update 7z ... again --- fact_extractor/plugins/unpacking/sevenz/install.sh | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fact_extractor/plugins/unpacking/sevenz/install.sh b/fact_extractor/plugins/unpacking/sevenz/install.sh index d41fb283..9c0d47dd 100755 --- a/fact_extractor/plugins/unpacking/sevenz/install.sh +++ b/fact_extractor/plugins/unpacking/sevenz/install.sh @@ -7,13 +7,15 @@ echo "------------------------------------" echo " install 7z " echo "------------------------------------" +VERSION="2407" +FILE="7z${VERSION}-linux-x64.tar.xz" # install newest version of 7z mkdir -p /tmp/fact_build cd /tmp/fact_build -wget https://www.7-zip.org/a/7z2403-linux-x64.tar.xz -tar xvf 7z2403-linux-x64.tar.xz 7zzs +wget "https://www.7-zip.org/a/${FILE}" +tar xvf "${FILE}" 7zzs sudo mv 7zzs /usr/local/bin/ -rm 7z2403-linux-x64.tar.xz +rm "${FILE}" exit 0