diff --git a/.github/workflows/build_ci.yml b/.github/workflows/build_ci.yml index d4e28805..99d4cebe 100644 --- a/.github/workflows/build_ci.yml +++ b/.github/workflows/build_ci.yml @@ -9,7 +9,7 @@ on: jobs: build-ci: - runs-on: [ self-hosted, linux, x64, focal ] + runs-on: [ self-hosted, linux, x64, jammy ] timeout-minutes: 45 steps: - name: Add Masks diff --git a/Dockerfile b/Dockerfile index 0995c821..ce0a40eb 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM phusion/baseimage:jammy-1.0.1 +FROM phusion/baseimage:jammy-1.0.3 WORKDIR /opt/app diff --git a/README.md b/README.md index 62738f54..b04eb0a2 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ fact_extractor/install/pre_install.sh fact_extractor/install.py ``` -:warning: **We no longer support Ubuntu 16.04 and Python <3.7** +:warning: **We no longer support Ubuntu 18.04 and Python <3.8** (It may still work with a bit of tinkering, though) :warning: For the `generic_fs` unpacker plugin to work with all file system types, you may need to install extra kernel modules diff --git a/extract.py b/extract.py index 9ef11333..dd065af6 100755 --- a/extract.py +++ b/extract.py @@ -82,7 +82,7 @@ def call_docker(input_file, container, target, report_file, memory_limit, tmpdir shutil.copy(input_file, str(Path(tmpdir.name, 'input', Path(input_file).name))) - command = f'docker run --rm -m {memory_limit}m -v {tmpdir.name}:/tmp/extractor -v /dev:/dev --privileged {container} {arguments}' + command = f'docker run --rm --ulimit nofile=20000:50000 -m {memory_limit}m -v {tmpdir.name}:/tmp/extractor -v /dev:/dev --privileged {container} {arguments}' subprocess.run(command, shell=True) with suppress(shutil.Error): diff --git a/fact_extractor/docker_extraction.py b/fact_extractor/docker_extraction.py index 24724290..f38946d4 100755 --- a/fact_extractor/docker_extraction.py +++ b/fact_extractor/docker_extraction.py @@ -22,7 +22,7 @@ from helperFunctions.config import get_config_dir from helperFunctions.file_system import change_owner_of_output_files -from helperFunctions.program_setup import load_config, setup_logging +from helperFunctions.program_setup import check_ulimits, load_config, setup_logging from unpacker.unpack import unpack @@ -43,6 +43,7 @@ def _parse_args(): def main(args): config = load_config(f'{get_config_dir()}/main.cfg') setup_logging(debug=False) + check_ulimits() input_dir = Path(config.get('unpack', 'data_folder'), 'input') input_file = list(input_dir.iterdir())[0] diff --git a/fact_extractor/helperFunctions/install.py b/fact_extractor/helperFunctions/install.py index d4c4bc82..cc0194e7 100644 --- a/fact_extractor/helperFunctions/install.py +++ b/fact_extractor/helperFunctions/install.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import configparser import logging import os @@ -104,7 +106,9 @@ def pip_install_packages(*packages): for packet in packages: try: run_shell_command_raise_on_return_code( - f'{pip_command} install --upgrade {packet}', f'Error in installation of python package {packet}', True + f'{pip_command} install --upgrade "{packet}"', + f'Error in installation of python package {packet}', + True ) except InstallationError as installation_error: if 'is a distutils installed project' in str(installation_error): @@ -113,6 +117,13 @@ def pip_install_packages(*packages): raise installation_error +def load_requirements_file(path: Path) -> list[str]: + return [ + line for line in path.read_text().splitlines() + if line and not line.startswith('#') + ] + + def check_if_command_in_path(command): _, return_code = execute_shell_command_get_return_code(f'command -v {command}') if return_code != 0: diff --git a/fact_extractor/helperFunctions/program_setup.py b/fact_extractor/helperFunctions/program_setup.py index c5e2e0cc..994bdcec 100644 --- a/fact_extractor/helperFunctions/program_setup.py +++ b/fact_extractor/helperFunctions/program_setup.py @@ -1,6 +1,7 @@ import argparse import configparser import logging +import resource from common_helper_files import create_dir_for_file @@ -39,6 +40,16 @@ def setup_logging(debug, log_file=None, log_level=None): logger.addHandler(console_log) +def check_ulimits(): + # Get number of openable files + soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE) + if soft < 1024: + resource.setrlimit(resource.RLIMIT_NOFILE, (min(1024, hard), hard)) + logging.info(f'The number of openable files has been raised from {soft} to {min(1024, hard)}.') + elif soft == resource.RLIM_INFINITY or soft > 100000: + logging.warning('Warning: A very high (or no) nofile limit will slow down fakeroot and cause other problems.') + + def load_config(config_file): config = configparser.ConfigParser() config.read(config_file) diff --git a/fact_extractor/install/common.py b/fact_extractor/install/common.py index b687e277..7bd026ee 100644 --- a/fact_extractor/install/common.py +++ b/fact_extractor/install/common.py @@ -4,47 +4,37 @@ from pathlib import Path from helperFunctions.config import load_config -from helperFunctions.install import apt_install_packages, apt_update_sources, pip_install_packages +from helperFunctions.install import ( + apt_install_packages, apt_update_sources, pip_install_packages, load_requirements_file +) - -DEPENDENCIES = { +APT_DEPENDENCIES = { # Ubuntu - 'bionic': {}, - 'focal': {}, - 'jammy': {}, + 'bionic': [], + 'focal': [], + 'jammy': [], # Debian - 'buster': {}, - 'bullseye': {}, + 'buster': [], + 'bullseye': [], # Packages common to all platforms - 'common': { - 'apt': [ - # Non python dependencies - 'build-essential', - 'automake', - 'autoconf', - 'libtool', - # Python dependencies - 'python3', - 'python3-dev', - 'python-wheel-common', - ], - 'pip3': [ - 'flask', - 'flask_restful', - 'gunicorn', - 'pytest', - 'pytest-cov', - 'testresources', - ], - }, + 'common': [ + # Non python dependencies + 'build-essential', + 'automake', + 'autoconf', + 'libtool', + # Python dependencies + 'python3', + 'python3-dev', + 'python-wheel-common', + ], } +PIP_DEPENDENCY_FILE = Path(__file__).parent.parent.parent / 'requirements-common.txt' -def install_dependencies(dependencies): - apt = dependencies.get('apt', []) - pip3 = dependencies.get('pip3', []) - apt_install_packages(*apt) - pip_install_packages(*pip3) +def install_apt_dependencies(distribution: str): + apt_install_packages(*APT_DEPENDENCIES['common']) + apt_install_packages(*APT_DEPENDENCIES[distribution]) def main(distribution): @@ -52,8 +42,8 @@ def main(distribution): apt_update_sources() # install dependencies - install_dependencies(DEPENDENCIES['common']) - install_dependencies(DEPENDENCIES[distribution]) + install_apt_dependencies(distribution) + pip_install_packages(*load_requirements_file(PIP_DEPENDENCY_FILE)) # make bin dir with suppress(FileExistsError): diff --git a/fact_extractor/install/unpacker.py b/fact_extractor/install/unpacker.py index 6b2006b6..63730bb2 100644 --- a/fact_extractor/install/unpacker.py +++ b/fact_extractor/install/unpacker.py @@ -1,18 +1,22 @@ +import hashlib import logging import os from getpass import getuser from pathlib import Path +from shlex import split +from subprocess import CalledProcessError, run from tempfile import TemporaryDirectory from common_helper_process import execute_shell_command_get_return_code from helperFunctions.install import ( - apt_install_packages, - install_github_project, InstallationError, OperateInDirectory, - pip_install_packages, + apt_install_packages, apt_remove_packages, + install_github_project, + pip_install_packages, + load_requirements_file, ) BIN_DIR = Path(__file__).parent.parent / 'bin' @@ -102,31 +106,32 @@ 'liblzo2-dev', 'xvfb', 'libcapstone-dev', - # patool and unpacking backends - 'lrzip', + # patool + 'arj', + 'cabextract', 'cpio', - 'unadf', - 'rpm2cpio', - 'lzop', + 'flac', + 'gzip', 'lhasa', - 'cabextract', - 'zpaq', 'libchm-dev', - 'arj', - 'xdms', - 'rzip', + 'liblz4-tool', + 'lrzip', 'lzip', - 'unalz', - 'unrar', - 'gzip', + 'lzop', + 'ncompress', 'nomarch', - 'flac', - 'unace', + 'p7zip-full', + 'rpm2cpio', + 'rzip', 'sharutils', + 'unace', + 'unadf', + 'unalz', 'unar', + 'unrar', + 'xdms', + 'zpaq', 'zstd', - 'liblz4-tool', - 'p7zip-full', # Freetz 'autoconf', 'automake', @@ -153,51 +158,18 @@ # 7z 'yasm', ], - 'pip3': [ - 'pluginbase', - 'git+https://github.com/armbues/python-entropy', # To be checked. Original dependency was deleted. - 'git+https://github.com/fkie-cad/common_helper_unpacking_classifier.git', - 'git+https://github.com/fkie-cad/fact_helper_file.git', - 'git+https://github.com/wummel/patool.git', - 'archmage', - # jefferson + deps - 'git+https://github.com/sviehb/jefferson.git', - 'cstruct==2.1', - 'python-lzo', - 'git+https://github.com/jrspruitt/ubi_reader@v0.6.3-master', # pinned as broken currently - # dji / dlink_shrs - 'pycryptodome', - # hp / raw - 'git+https://github.com/fkie-cad/common_helper_extraction.git', - # intel_hex - 'intelhex', - # linuxkernel - 'lz4', - 'git+https://github.com/marin-m/vmlinux-to-elf', - # mikrotik - 'npkPy', - # sevenz - 'git+https://github.com/fkie-cad/common_helper_passwords.git', - # srec - 'bincopy', - # uboot - 'extract-dtb', - # uefi - 'git+https://github.com/theopolis/uefi-firmware-parser@v1.10', - # unblob - 'unblob', - ], 'github': [ ( 'rampageX/firmware-mod-kit', [ '(cd src && make untrx && make -C tpl-tool/src && make -C yaffs2utils)', - 'cp src/untrx src/yaffs2utils/unyaffs2 src/tpl-tool/src/tpl-tool ../../bin/' + 'cp src/untrx src/yaffs2utils/unyaffs2 src/tpl-tool/src/tpl-tool ../../bin/', ], ), ], }, } +PIP_DEPENDENCY_FILE = Path(__file__).parent.parent.parent / 'requirements-unpackers.txt' def check_mod_kit_installed() -> bool: @@ -209,10 +181,9 @@ def check_mod_kit_installed() -> bool: def install_dependencies(dependencies): apt = dependencies.get('apt', []) - pip3 = dependencies.get('pip3', []) github = dependencies.get('github', []) apt_install_packages(*apt) - pip_install_packages(*pip3) + pip_install_packages(*load_requirements_file(PIP_DEPENDENCY_FILE)) for repo in github: if repo[0].endswith('firmware-mod-kit') and check_mod_kit_installed(): logging.info('Skipping firmware-mod-kit since it is already installed') @@ -236,6 +207,7 @@ def main(distribution): # install plug-in dependencies _install_plugins() + _install_patool_deps() # configure environment _edit_sudoers() @@ -270,6 +242,25 @@ def _edit_sudoers(): raise InstallationError('Editing sudoers file did not succeed\n{chown_output}\n{mv_output}') +def _install_patool_deps(): + '''install additional dependencies of patool''' + with TemporaryDirectory(prefix='patool') as build_directory: + with OperateInDirectory(build_directory): + # install zoo unpacker + file_name = 'zoo_2.10-28_amd64.deb' + try: + run(split(f'wget http://launchpadlibrarian.net/230277773/{file_name}'), capture_output=True, check=True) + expected_sha = '953f4f94095ef3813dfd30c8977475c834363aaabce15ab85ac5195e52fd816a' + assert _sha256_hash_file(Path(file_name)) == expected_sha + run(split(f'sudo dpkg -i {file_name}'), capture_output=True, check=True) + except (AssertionError, CalledProcessError) as error: + raise InstallationError('Error during zoo unpacker installation') from error + + +def _sha256_hash_file(file_path: Path) -> str: + return hashlib.sha256(file_path.read_bytes()).hexdigest() + + def _install_freetz(): if all( (Path(__file__).parent.parent / 'bin' / tool).exists() diff --git a/fact_extractor/plugins/unpacking/generic_carver/code/generic_carver.py b/fact_extractor/plugins/unpacking/generic_carver/code/generic_carver.py index 1e56b9c5..acbbb3f4 100644 --- a/fact_extractor/plugins/unpacking/generic_carver/code/generic_carver.py +++ b/fact_extractor/plugins/unpacking/generic_carver/code/generic_carver.py @@ -48,24 +48,15 @@ def unpack_function(file_path, tmp_dir): } -def drop_underscore_directory(tmp_dir): - extracted_contents = list(Path(tmp_dir).iterdir()) - if not extracted_contents: - return - if not len(extracted_contents) == 1 or not extracted_contents[0].name.endswith('_extract'): - return - for result in extracted_contents[0].iterdir(): - shutil.move(str(result), str(result.parent.parent)) - shutil.rmtree(str(extracted_contents[0])) - - class ArchivesFilter: def __init__(self, unpack_directory): self.unpack_directory = Path(unpack_directory) self.screening_logs = [] def remove_false_positive_archives(self) -> str: - for file_path in self.unpack_directory.iterdir(): + for file_path in self.unpack_directory.glob('**/*'): + if not file_path.is_file(): + continue file_type = get_file_type_from_path(file_path)['mime'] if file_type == 'application/x-tar' or self._is_possible_tar(file_type, file_path): @@ -90,7 +81,6 @@ def remove_false_positive_archives(self) -> str: return '\n'.join(self.screening_logs) - @staticmethod def _is_possible_tar(file_type: str, file_path: Path) -> bool: # broken tar archives may be identified as octet-stream by newer versions of libmagic @@ -158,6 +148,17 @@ def _find_trailing_data_index_bz2(file_path: Path) -> int | None: return None +def drop_underscore_directory(tmp_dir): + extracted_contents = list(Path(tmp_dir).iterdir()) + if not extracted_contents: + return + if not len(extracted_contents) == 1 or not extracted_contents[0].name.endswith('_extract'): + return + for result in extracted_contents[0].iterdir(): + shutil.move(str(result), str(result.parent.parent)) + shutil.rmtree(str(extracted_contents[0])) + + # ----> Do not edit below this line <---- def setup(unpack_tool): for item in MIME_PATTERNS: diff --git a/fact_extractor/plugins/unpacking/patool/code/patool.py b/fact_extractor/plugins/unpacking/patool/code/patool.py index 530378b0..961edf81 100644 --- a/fact_extractor/plugins/unpacking/patool/code/patool.py +++ b/fact_extractor/plugins/unpacking/patool/code/patool.py @@ -7,7 +7,6 @@ MIME_PATTERNS = [ 'application/gzip', 'application/java-archive', - 'application/rar', 'application/vnd.debian.binary-package', 'application/vnd.ms-cab-compressed', 'application/x-ace', @@ -23,15 +22,15 @@ 'application/x-debian-package', 'application/x-dms', 'application/x-gzip', - 'application/x-iso9660-image', 'application/x-lha', 'application/x-lrzip', + 'application/x-lz4', 'application/x-lzh', + 'application/x-lzh-compressed', 'application/x-lzip', + 'application/x-lzo', 'application/x-lzop', - 'application/x-rar', 'application/x-redhat-package-manager', - 'application/x-rpm', 'application/x-rzip', 'application/x-shar', 'application/x-tar', @@ -40,7 +39,7 @@ 'application/zpaq', 'audio/flac', ] -VERSION = '0.5.3' +VERSION = '0.6.0' TOOL_PATH = execute_shell_command('which patool').strip() diff --git a/fact_extractor/plugins/unpacking/patool/test/data/test.a b/fact_extractor/plugins/unpacking/patool/test/data/test.a new file mode 100644 index 00000000..059deb11 --- /dev/null +++ b/fact_extractor/plugins/unpacking/patool/test/data/test.a @@ -0,0 +1,3 @@ +! +test.data/ 0 0 0 644 128 ` +d34bb4669ac6e2b15b07f2489a22c289974a3dcfddeec68fc003d0109186e34dbc4c419546211e534c4be6ae657397959ae8b4a5e2c095c5bba3e0bca38f0b51 \ No newline at end of file diff --git a/fact_extractor/plugins/unpacking/patool/test/data/test.arc b/fact_extractor/plugins/unpacking/patool/test/data/test.arc new file mode 100644 index 00000000..5e870a7c Binary files /dev/null and b/fact_extractor/plugins/unpacking/patool/test/data/test.arc differ diff --git a/fact_extractor/plugins/unpacking/patool/test/data/test.bz2 b/fact_extractor/plugins/unpacking/patool/test/data/test.bz2 new file mode 100644 index 00000000..9120d425 Binary files /dev/null and b/fact_extractor/plugins/unpacking/patool/test/data/test.bz2 differ diff --git a/fact_extractor/plugins/unpacking/patool/test/data/test.cpio b/fact_extractor/plugins/unpacking/patool/test/data/test.cpio new file mode 100644 index 00000000..2a09192e Binary files /dev/null and b/fact_extractor/plugins/unpacking/patool/test/data/test.cpio differ diff --git a/fact_extractor/plugins/unpacking/patool/test/data/test.gz b/fact_extractor/plugins/unpacking/patool/test/data/test.gz new file mode 100644 index 00000000..ca11f907 Binary files /dev/null and b/fact_extractor/plugins/unpacking/patool/test/data/test.gz differ diff --git a/fact_extractor/plugins/unpacking/patool/test/data/test.jar b/fact_extractor/plugins/unpacking/patool/test/data/test.jar new file mode 100644 index 00000000..1123f329 Binary files /dev/null and b/fact_extractor/plugins/unpacking/patool/test/data/test.jar differ diff --git a/fact_extractor/plugins/unpacking/patool/test/data/test.lha b/fact_extractor/plugins/unpacking/patool/test/data/test.lha new file mode 100644 index 00000000..27dcb85a Binary files /dev/null and b/fact_extractor/plugins/unpacking/patool/test/data/test.lha differ diff --git a/fact_extractor/plugins/unpacking/patool/test/data/test.lrz b/fact_extractor/plugins/unpacking/patool/test/data/test.lrz new file mode 100644 index 00000000..f87a8a11 Binary files /dev/null and b/fact_extractor/plugins/unpacking/patool/test/data/test.lrz differ diff --git a/fact_extractor/plugins/unpacking/patool/test/data/test.lz b/fact_extractor/plugins/unpacking/patool/test/data/test.lz new file mode 100644 index 00000000..092153e4 Binary files /dev/null and b/fact_extractor/plugins/unpacking/patool/test/data/test.lz differ diff --git a/fact_extractor/plugins/unpacking/patool/test/data/test.lz4 b/fact_extractor/plugins/unpacking/patool/test/data/test.lz4 new file mode 100644 index 00000000..68458a0e Binary files /dev/null and b/fact_extractor/plugins/unpacking/patool/test/data/test.lz4 differ diff --git a/fact_extractor/plugins/unpacking/patool/test/data/test.lzo b/fact_extractor/plugins/unpacking/patool/test/data/test.lzo new file mode 100644 index 00000000..60564316 Binary files /dev/null and b/fact_extractor/plugins/unpacking/patool/test/data/test.lzo differ diff --git a/fact_extractor/plugins/unpacking/patool/test/data/test.rz b/fact_extractor/plugins/unpacking/patool/test/data/test.rz new file mode 100644 index 00000000..61e90425 Binary files /dev/null and b/fact_extractor/plugins/unpacking/patool/test/data/test.rz differ diff --git a/fact_extractor/plugins/unpacking/patool/test/data/test.shar b/fact_extractor/plugins/unpacking/patool/test/data/test.shar new file mode 100644 index 00000000..16048bb4 --- /dev/null +++ b/fact_extractor/plugins/unpacking/patool/test/data/test.shar @@ -0,0 +1,227 @@ +#!/bin/sh +# This is a shell archive (produced by GNU sharutils 4.15.2). +# To extract the files from this archive, save it to some FILE, remove +# everything before the '#!/bin/sh' line above, then type 'sh FILE'. +# +lock_dir=_sh99321 +# Made on 2023-02-10 12:53 CET by . +# Source directory was '/foo/bar'. +# +# Existing files will *not* be overwritten, unless '-c' is specified. +# +# This shar contains: +# length mode name +# ------ ---------- ------------------------------------------ +# 20 -rw-r--r-- get_files_test/generic folder/test file 3_.txt +# 62 -rw-r--r-- get_files_test/testfile1 +# 28 -rw-r--r-- get_files_test/testfile2 +# +MD5SUM=${MD5SUM-md5sum} +f=`${MD5SUM} --version | egrep '^md5sum .*(core|text)utils'` +test -n "${f}" && md5check=true || md5check=false +${md5check} || \ + echo 'Note: not verifying md5sums. Consider installing GNU coreutils.' +if test "X$1" = "X-c" +then keep_file='' +else keep_file=true +fi +echo=echo +save_IFS="${IFS}" +IFS="${IFS}:" +gettext_dir= +locale_dir= +set_echo=false + +for dir in $PATH +do + if test -f $dir/gettext \ + && ($dir/gettext --version >/dev/null 2>&1) + then + case `$dir/gettext --version 2>&1 | sed 1q` in + *GNU*) gettext_dir=$dir + set_echo=true + break ;; + esac + fi +done + +if ${set_echo} +then + set_echo=false + for dir in $PATH + do + if test -f $dir/shar \ + && ($dir/shar --print-text-domain-dir >/dev/null 2>&1) + then + locale_dir=`$dir/shar --print-text-domain-dir` + set_echo=true + break + fi + done + + if ${set_echo} + then + TEXTDOMAINDIR=$locale_dir + export TEXTDOMAINDIR + TEXTDOMAIN=sharutils + export TEXTDOMAIN + echo="$gettext_dir/gettext -s" + fi +fi +IFS="$save_IFS" +if (echo "testing\c"; echo 1,2,3) | grep c >/dev/null +then if (echo -n test; echo 1,2,3) | grep n >/dev/null + then shar_n= shar_c=' +' + else shar_n=-n shar_c= ; fi +else shar_n= shar_c='\c' ; fi +f=shar-touch.$$ +st1=200112312359.59 +st2=123123592001.59 +st2tr=123123592001.5 # old SysV 14-char limit +st3=1231235901 + +if touch -am -t ${st1} ${f} >/dev/null 2>&1 && \ + test ! -f ${st1} && test -f ${f}; then + shar_touch='touch -am -t $1$2$3$4$5$6.$7 "$8"' + +elif touch -am ${st2} ${f} >/dev/null 2>&1 && \ + test ! -f ${st2} && test ! -f ${st2tr} && test -f ${f}; then + shar_touch='touch -am $3$4$5$6$1$2.$7 "$8"' + +elif touch -am ${st3} ${f} >/dev/null 2>&1 && \ + test ! -f ${st3} && test -f ${f}; then + shar_touch='touch -am $3$4$5$6$2 "$8"' + +else + shar_touch=: + echo + ${echo} 'WARNING: not restoring timestamps. Consider getting and +installing GNU '\''touch'\'', distributed in GNU coreutils...' + echo +fi +rm -f ${st1} ${st2} ${st2tr} ${st3} ${f} +# +if test ! -d ${lock_dir} ; then : +else ${echo} "lock directory ${lock_dir} exists" + exit 1 +fi +if mkdir ${lock_dir} +then ${echo} "x - created lock directory ${lock_dir}." +else ${echo} "x - failed to create lock directory ${lock_dir}." + exit 1 +fi +# ============= get_files_test/generic folder/test file 3_.txt ============== +if test ! -d 'get_files_test'; then + mkdir 'get_files_test' +if test $? -eq 0 +then ${echo} "x - created directory get_files_test." +else ${echo} "x - failed to create directory get_files_test." + exit 1 +fi +fi +if test ! -d 'get_files_test/generic folder'; then + mkdir 'get_files_test/generic folder' +if test $? -eq 0 +then ${echo} "x - created directory get_files_test/generic folder." +else ${echo} "x - failed to create directory get_files_test/generic folder." + exit 1 +fi +fi +if test -n "${keep_file}" && test -f 'get_files_test/generic folder/test file 3_.txt' +then +${echo} "x - SKIPPING get_files_test/generic folder/test file 3_.txt (file already exists)" + +else +${echo} "x - extracting get_files_test/generic folder/test file 3_.txt (Text)" + sed 's/^X//' << 'SHAR_EOF' | uudecode && +begin 600 get_files_test/generic folder/test file 3_.txt +45&AE('1H:7)D('1E/dev/null 2>&1 || ${echo} 'get_files_test/generic folder/test file 3_.txt': 'MD5 check failed' + ) << \SHAR_EOF +dfb79b49698fbae3d6eaca69b211b79d get_files_test/generic folder/test file 3_.txt +SHAR_EOF + +else +test `LC_ALL=C wc -c < 'get_files_test/generic folder/test file 3_.txt'` -ne 20 && \ + ${echo} "restoration warning: size of 'get_files_test/generic folder/test file 3_.txt' is not 20" + fi +fi +# ============= get_files_test/testfile1 ============== +if test -n "${keep_file}" && test -f 'get_files_test/testfile1' +then +${echo} "x - SKIPPING get_files_test/testfile1 (file already exists)" + +else +${echo} "x - extracting get_files_test/testfile1 (text)" + sed 's/^X//' << 'SHAR_EOF' > 'get_files_test/testfile1' && +test file: +content: MyTestRule 1.2.3 +Version: Program 0.0.0.0 +SHAR_EOF + (set 20 15 09 15 12 20 40 'get_files_test/testfile1' + eval "${shar_touch}") && \ + chmod 0644 'get_files_test/testfile1' +if test $? -ne 0 +then ${echo} "restore of get_files_test/testfile1 failed" +fi + if ${md5check} + then ( + ${MD5SUM} -c >/dev/null 2>&1 || ${echo} 'get_files_test/testfile1': 'MD5 check failed' + ) << \SHAR_EOF +e802ca22f6cd2d9357cf3da1d191879e get_files_test/testfile1 +SHAR_EOF + +else +test `LC_ALL=C wc -c < 'get_files_test/testfile1'` -ne 62 && \ + ${echo} "restoration warning: size of 'get_files_test/testfile1' is not 62" + fi +fi +# ============= get_files_test/testfile2 ============== +if test -n "${keep_file}" && test -f 'get_files_test/testfile2' +then +${echo} "x - SKIPPING get_files_test/testfile2 (file already exists)" + +else +${echo} "x - extracting get_files_test/testfile2 (Text)" + sed 's/^X//' << 'SHAR_EOF' | uudecode && +begin 600 get_files_test/testfile2 +<5&AI/dev/null 2>&1 || ${echo} 'get_files_test/testfile2': 'MD5 check failed' + ) << \SHAR_EOF +08dd892caa120da5985fb0c0ec739295 get_files_test/testfile2 +SHAR_EOF + +else +test `LC_ALL=C wc -c < 'get_files_test/testfile2'` -ne 28 && \ + ${echo} "restoration warning: size of 'get_files_test/testfile2' is not 28" + fi +fi +if rm -fr ${lock_dir} +then ${echo} "x - removed lock directory ${lock_dir}." +else ${echo} "x - failed to remove lock directory ${lock_dir}." + exit 1 +fi +exit 0 diff --git a/fact_extractor/plugins/unpacking/patool/test/data/test.tar.Z b/fact_extractor/plugins/unpacking/patool/test/data/test.tar.Z new file mode 100644 index 00000000..6a6fddd3 Binary files /dev/null and b/fact_extractor/plugins/unpacking/patool/test/data/test.tar.Z differ diff --git a/fact_extractor/plugins/unpacking/patool/test/data/test.tar.gz b/fact_extractor/plugins/unpacking/patool/test/data/test.tar.gz new file mode 100644 index 00000000..d71a1158 Binary files /dev/null and b/fact_extractor/plugins/unpacking/patool/test/data/test.tar.gz differ diff --git a/fact_extractor/plugins/unpacking/patool/test/data/test.tar.lz b/fact_extractor/plugins/unpacking/patool/test/data/test.tar.lz new file mode 100644 index 00000000..dc0a5616 Binary files /dev/null and b/fact_extractor/plugins/unpacking/patool/test/data/test.tar.lz differ diff --git a/fact_extractor/plugins/unpacking/patool/test/data/test.tar.xz b/fact_extractor/plugins/unpacking/patool/test/data/test.tar.xz new file mode 100644 index 00000000..1c9b049a Binary files /dev/null and b/fact_extractor/plugins/unpacking/patool/test/data/test.tar.xz differ diff --git a/fact_extractor/plugins/unpacking/patool/test/data/test.xz b/fact_extractor/plugins/unpacking/patool/test/data/test.xz new file mode 100644 index 00000000..b4c81db9 Binary files /dev/null and b/fact_extractor/plugins/unpacking/patool/test/data/test.xz differ diff --git a/fact_extractor/plugins/unpacking/patool/test/data/test.zpaq b/fact_extractor/plugins/unpacking/patool/test/data/test.zpaq new file mode 100644 index 00000000..41c8fa28 Binary files /dev/null and b/fact_extractor/plugins/unpacking/patool/test/data/test.zpaq differ diff --git a/fact_extractor/plugins/unpacking/patool/test/test_plugin_patool.py b/fact_extractor/plugins/unpacking/patool/test/test_plugin_patool.py index 65812388..5600db7e 100644 --- a/fact_extractor/plugins/unpacking/patool/test/test_plugin_patool.py +++ b/fact_extractor/plugins/unpacking/patool/test/test_plugin_patool.py @@ -1,34 +1,80 @@ -import os +from pathlib import Path +from tempfile import TemporaryDirectory import pytest +from helperFunctions.hash import get_sha256 from test.unit.unpacker.test_unpacker import TestUnpackerBase -TEST_DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data') +TEST_DATA_DIR = Path(__file__).parent / 'data' class TestPaToolUnpacker(TestUnpackerBase): def test_unpacker_selection_generic(self): self.check_unpacker_selection('application/vnd.ms-cab-compressed', 'PaTool') + self.check_unpacker_selection('application/x-lzh-compressed', 'PaTool') @pytest.mark.parametrize( 'in_file, ignore', [ ('test.cab', None), + ('test.cpio', None), + ('test.jar', {'MANIFEST.MF'}), + ('test.lha', None), + ('test.shar', None), + ('test.tar.Z', None), ('test.tar.bz2', None), + ('test.tar.gz', None), + ('test.tar.lz', None), + ('test.tar.xz', None), ('test.tar.zip', None), + ('test.zoo', None), + ('test.zpaq', None), ], ) - def test_extraction(self, in_file, ignore): + def test_archive_extraction(self, in_file, ignore): self.check_unpacking_of_standard_unpack_set( - os.path.join(TEST_DATA_DIR, in_file), + TEST_DATA_DIR / in_file, additional_prefix_folder='get_files_test', output=False, ignore=ignore, ) + @pytest.mark.parametrize( + 'in_file', + [ + 'test.a', + 'test.bz2', + 'test.gz', + 'test.lrz', + 'test.lz', + 'test.lz4', + 'test.lzo', + 'test.rz', + 'test.xz', + ], + ) + def test_file_extraction(self, in_file): + files, meta = self.unpacker.extract_files_from_file(TEST_DATA_DIR / in_file, self.tmp_dir.name) + assert len(files) == 1, f'unpacking of {in_file} unsuccessful: {meta}' + assert meta['plugin_used'] == 'PaTool' + assert get_sha256(Path(files[0]).read_bytes()).startswith('deadc0de') + + def test_extraction_arc(self): + ''' + special case arc: arguments (i.e. paths) and names of packed files must not be too long. + Unfortunately, the name of the third test file as well as the path of the test folder are too long. + ''' + with TemporaryDirectory() as tmp_dir: + target_file = Path(tmp_dir) / 'test.arc' + target_file.write_bytes((TEST_DATA_DIR / 'test.arc').read_bytes()) + files, _ = self.unpacker.extract_files_from_file(target_file, self.tmp_dir.name) + assert len(files) == 2 + unpacked_files = sorted(Path(f).name for f in files) + assert unpacked_files == ['testfile1', 'testfile2'] + def test_extract_deb(self): - in_file = os.path.join(TEST_DATA_DIR, 'test.deb') - files, meta_data = self.unpacker.extract_files_from_file(in_file, self.tmp_dir.name) + test_file = TEST_DATA_DIR / 'test.deb' + files, meta_data = self.unpacker.extract_files_from_file(test_file, self.tmp_dir.name) assert len(files) == 3, f'file number incorrect: {meta_data}' assert 'extracted to' in meta_data['output'] diff --git a/fact_extractor/plugins/unpacking/sevenz/code/sevenz.py b/fact_extractor/plugins/unpacking/sevenz/code/sevenz.py index 4fb7fcd8..87fd7753 100644 --- a/fact_extractor/plugins/unpacking/sevenz/code/sevenz.py +++ b/fact_extractor/plugins/unpacking/sevenz/code/sevenz.py @@ -1,20 +1,27 @@ ''' This plugin uses 7z to extract several formats ''' -import os import logging +import os from common_helper_passwords import get_merged_password_set from common_helper_process import execute_shell_command + from helperFunctions.file_system import get_src_dir NAME = '7z' MIME_PATTERNS = [ # compressed archives - 'application/x-lzma', + 'application/rar', 'application/x-7z-compressed', - 'application/zip', + 'application/x-iso9660-image', + 'application/x-lzma', + 'application/x-rar', + 'application/x-rpm', + 'application/x-vhd', + 'application/x-vhdx', 'application/x-zip-compressed', + 'application/zip', # file systems 'filesystem/cramfs', 'filesystem/ext2', @@ -24,10 +31,13 @@ 'filesystem/hfs', 'filesystem/ntfs', ] -VERSION = '0.8' +VERSION = '0.8.2' UNPACKER_EXECUTABLE = '7z' -PW_LIST = get_merged_password_set(os.path.join(get_src_dir(), 'unpacker/passwords')) + +# Empty password must be first in list to correctly detect if archive has no password +PW_LIST = [""] +PW_LIST.extend(get_merged_password_set(os.path.join(get_src_dir(), 'unpacker/passwords'))) def unpack_function(file_path, tmp_dir): @@ -42,11 +52,11 @@ def unpack_function(file_path, tmp_dir): meta['output'] = output if 'Wrong password' not in output: - if 'AES' in output: + if password: meta['password'] = password break - # Inform the user if not correct password was found + # Inform the user if no correct password was found if 'Wrong password' in meta['output']: logging.warning(f'Password for {file_path} not found in fact_extractor/unpacker/passwords directory') diff --git a/fact_extractor/plugins/unpacking/sevenz/test/data/test.iso b/fact_extractor/plugins/unpacking/sevenz/test/data/test.iso new file mode 100644 index 00000000..e87023d2 Binary files /dev/null and b/fact_extractor/plugins/unpacking/sevenz/test/data/test.iso differ diff --git a/fact_extractor/plugins/unpacking/sevenz/test/data/test.rar b/fact_extractor/plugins/unpacking/sevenz/test/data/test.rar new file mode 100644 index 00000000..451855ef Binary files /dev/null and b/fact_extractor/plugins/unpacking/sevenz/test/data/test.rar differ diff --git a/fact_extractor/plugins/unpacking/sevenz/test/data/test_password.zip b/fact_extractor/plugins/unpacking/sevenz/test/data/test_password.zip new file mode 100644 index 00000000..d48705e0 Binary files /dev/null and b/fact_extractor/plugins/unpacking/sevenz/test/data/test_password.zip differ diff --git a/fact_extractor/plugins/unpacking/sevenz/test/test_plugin_sevenz.py b/fact_extractor/plugins/unpacking/sevenz/test/test_plugin_sevenz.py index a776e47d..c1b391ae 100644 --- a/fact_extractor/plugins/unpacking/sevenz/test/test_plugin_sevenz.py +++ b/fact_extractor/plugins/unpacking/sevenz/test/test_plugin_sevenz.py @@ -18,15 +18,18 @@ def test_unpacker_selection_generic(self): 'test_file, prefix', [ ('test.7z', 'get_files_test'), + ('test.rar', 'get_files_test'), ('cramfs.img', ''), - ], + ('test.iso', ''), + ] ) def test_extraction(self, test_file, prefix): - self.check_unpacking_of_standard_unpack_set( + meta = self.check_unpacking_of_standard_unpack_set( TEST_DATA_DIR / test_file, additional_prefix_folder=prefix, output=True, ) + assert 'password' not in meta, 'password incorrectly set' @pytest.mark.parametrize( 'test_file, prefix, ignore', @@ -37,16 +40,24 @@ def test_extraction(self, test_file, prefix): ('ext2.img.xz', 'get_files_test', {'Journal'}), ('ext3.img.xz', 'get_files_test', {'Journal'}), ('ext4.img.xz', 'get_files_test', {'Journal'}), - ], + ] ) def test_extraction_compressed(self, test_file, prefix, ignore): with decompress_test_file(TEST_DATA_DIR / test_file) as file: - self.check_unpacking_of_standard_unpack_set( + meta = self.check_unpacking_of_standard_unpack_set( file, output=True, additional_prefix_folder=prefix, ignore=ignore ) + assert 'password' not in meta, 'password incorrectly set' - def test_extraction_password(self): + @pytest.mark.parametrize( + 'test_file', + [ + 'test_password.7z', + 'test_password.zip' + ] + ) + def test_extraction_password(self, test_file): meta = self.check_unpacking_of_standard_unpack_set( - TEST_DATA_DIR / 'test_password.7z', additional_prefix_folder='get_files_test', output=True + TEST_DATA_DIR / test_file, additional_prefix_folder='get_files_test', output=True ) assert meta['password'] == 'test', 'password info not set' diff --git a/fact_extractor/plugins/unpacking/squashFS/code/squash_fs.py b/fact_extractor/plugins/unpacking/squashFS/code/squash_fs.py index 0976fad1..c5489d99 100755 --- a/fact_extractor/plugins/unpacking/squashFS/code/squash_fs.py +++ b/fact_extractor/plugins/unpacking/squashFS/code/squash_fs.py @@ -30,7 +30,10 @@ def unpack_function(file_path, tmp_dir): ''' unpack_result = {} for unpacker, parameter in SQUASH_UNPACKER: - output = execute_shell_command(f'fakeroot {unpacker} {parameter} -d {tmp_dir}/fact_extracted {file_path}') + # We need to force here since "-dest" does not allow existing directories + output = execute_shell_command( + f"fakeroot {unpacker} {parameter} -dest {tmp_dir} -force {file_path}", + ) if _unpack_success(tmp_dir): unpack_result['unpacking_tool'] = unpacker.name unpack_result['output'] = output diff --git a/fact_extractor/plugins/unpacking/squashFS/test/test_plugin_squashfs.py b/fact_extractor/plugins/unpacking/squashFS/test/test_plugin_squashfs.py index 7fc28d3e..ab0bf5c8 100755 --- a/fact_extractor/plugins/unpacking/squashFS/test/test_plugin_squashfs.py +++ b/fact_extractor/plugins/unpacking/squashFS/test/test_plugin_squashfs.py @@ -39,5 +39,5 @@ def test_unpacker_selection_generic(self): def test_extraction_sqfs(self): self.check_unpacking_of_standard_unpack_set( - TEST_DATA_DIR / 'sqfs.img', additional_prefix_folder='fact_extracted' + TEST_DATA_DIR / 'sqfs.img', ) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..decee8b0 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,12 @@ +[tool.black] +line-length = 120 +skip-string-normalization = true +target-version = ['py38'] + +[tool.isort] +line_length=120 +default_section = "THIRDPARTY" +known_first_party = ["analysis", "compare", "helperFunctions", "install", "intercom", "objects", "plugins", "scheduler", + "statistic", "storage", "test", "unpacker", "version", "web_interface"] +known_third_party = "docker" +profile = "black" diff --git a/requirements-common.txt b/requirements-common.txt new file mode 100644 index 00000000..2f593086 --- /dev/null +++ b/requirements-common.txt @@ -0,0 +1,6 @@ +flask~=3.0.3 +flask-restful~=0.3.10 +gunicorn~=21.2.0 +pytest<8.1.1 +pytest-cov~=5.0.0 +testresources~=2.0.1 diff --git a/requirements-unpackers.txt b/requirements-unpackers.txt new file mode 100644 index 00000000..666752dc --- /dev/null +++ b/requirements-unpackers.txt @@ -0,0 +1,33 @@ +# FixMe: deprecated +pluginbase~=1.0.1 +git+https://github.com/fkie-cad/common_helper_unpacking_classifier.git +git+https://github.com/fkie-cad/fact_helper_file.git +patool~=2.2.0 +# jffs2: jefferson + deps +git+https://github.com/sviehb/jefferson.git@v0.4.1 +cstruct==2.1 +python-lzo==1.14 +# ubi +ubi-reader~=0.8.9 +# dji / dlink_shrs +pycryptodome~=3.20.0 +# hp / raw +git+https://github.com/fkie-cad/common_helper_extraction.git +# intel_hex +intelhex~=2.3.0 +# linuxkernel +lz4~=4.3.3 +# FixMe: this has also a dependency to a package called python-lzo (name conflict with python-lzo from jefferson) +git+https://github.com/marin-m/vmlinux-to-elf +# mikrotik +npkPy~=2021.10.22.15.58 +# sevenz +git+https://github.com/fkie-cad/common_helper_passwords.git +# srec +bincopy~=20.0.0 +# uboot +extract-dtb~=1.2.3 +# uefi +uefi-firmware~=1.11 +# unblob +unblob