diff --git a/fact_extractor/__init__.py b/fact_extractor/__init__.py new file mode 100644 index 00000000..cff4742f --- /dev/null +++ b/fact_extractor/__init__.py @@ -0,0 +1,6 @@ +import os +import pathlib as pl + +firmware_magic_path = pl.Path(__file__).parent.parent / "bin" / "firmware" + +os.environ["MAGIC"] = f'/usr/lib/file/magic.mgc:{firmware_magic_path}' diff --git a/fact_extractor/helperFunctions/statistics.py b/fact_extractor/helperFunctions/statistics.py index 1333e70d..61443c40 100644 --- a/fact_extractor/helperFunctions/statistics.py +++ b/fact_extractor/helperFunctions/statistics.py @@ -1,3 +1,4 @@ +import magic from configparser import ConfigParser from contextlib import suppress from pathlib import Path @@ -7,7 +8,6 @@ from common_helper_unpacking_classifier import ( avg_entropy, get_binary_size_without_padding, is_compressed ) -from fact_helper_file import get_file_type_from_path from helperFunctions.config import read_list_from_config @@ -28,7 +28,7 @@ def get_unpack_status(file_path: str, binary: bytes, extracted_files: List[Path] meta_data['entropy'] = avg_entropy(binary) if not extracted_files and meta_data.get('number_of_excluded_files', 0) == 0: - if get_file_type_from_path(file_path)['mime'] in read_list_from_config(config, 'ExpertSettings', 'compressed_file_types')\ + if magic.from_file(file_path, mime=True) in read_list_from_config(config, 'ExpertSettings', 'compressed_file_types')\ or not is_compressed(binary, compress_entropy_threshold=config.getfloat('ExpertSettings', 'unpack_threshold'), classifier=avg_entropy): meta_data['summary'] = ['unpacked'] else: diff --git a/fact_extractor/install/common.py b/fact_extractor/install/common.py index 7bd026ee..6d4c0fff 100644 --- a/fact_extractor/install/common.py +++ b/fact_extractor/install/common.py @@ -1,4 +1,5 @@ import logging +import subprocess as sp import os from contextlib import suppress from pathlib import Path @@ -49,6 +50,23 @@ def main(distribution): with suppress(FileExistsError): os.mkdir('../bin') + sp.run( + [ + "wget", + "--output-document", + "../bin/firmware.xz", + "https://github.com/fkie-cad/firmware-magic-database/releases/download/v0.2.1/firmware.xz", + ], + check=True, + ) + sp.run( + [ + "unxz", + "--force", + "../bin/firmware.xz", + ] + ) + config = load_config('main.cfg') data_folder = config.get('unpack', 'data_folder') os.makedirs(str(Path(data_folder, 'files')), exist_ok=True) diff --git a/fact_extractor/install/pre_install.sh b/fact_extractor/install/pre_install.sh index c6035c55..d5b41ce0 100755 --- a/fact_extractor/install/pre_install.sh +++ b/fact_extractor/install/pre_install.sh @@ -4,7 +4,7 @@ echo "Install Pre-Install Requirements" (apt-get update && apt-get install sudo) || true sudo apt-get update -sudo apt-get -y install git apt-transport-https ca-certificates curl software-properties-common wget libmagic-dev +sudo apt-get -y install git apt-transport-https ca-certificates curl software-properties-common wget libmagic-dev xz-utils IS_VENV=$(python3 -c 'import sys; print(sys.exec_prefix!=sys.base_prefix)') if [[ $IS_VENV == "False" ]] diff --git a/fact_extractor/plugins/unpacking/generic_carver/code/generic_carver.py b/fact_extractor/plugins/unpacking/generic_carver/code/generic_carver.py index a11b7eee..9f21867d 100644 --- a/fact_extractor/plugins/unpacking/generic_carver/code/generic_carver.py +++ b/fact_extractor/plugins/unpacking/generic_carver/code/generic_carver.py @@ -3,13 +3,13 @@ ''' from __future__ import annotations +import magic import logging import re import shutil from pathlib import Path from common_helper_process import execute_shell_command -from fact_helper_file import get_file_type_from_path NAME = 'generic_carver' MIME_PATTERNS = ['generic/carver'] @@ -45,7 +45,7 @@ def remove_false_positive_archives(self) -> str: for file_path in self.unpack_directory.glob('**/*'): if not file_path.is_file(): continue - file_type = get_file_type_from_path(file_path)['mime'] + file_type = magic.from_file(file_path, mime=True) if file_type == 'application/x-tar' or self._is_possible_tar(file_type, file_path): self._remove_invalid_archives(file_path, 'tar -tvf {}', 'does not look like a tar archive') diff --git a/fact_extractor/plugins/unpacking/generic_fs/code/generic_fs.py b/fact_extractor/plugins/unpacking/generic_fs/code/generic_fs.py index f35fd9f4..fdc213b0 100644 --- a/fact_extractor/plugins/unpacking/generic_fs/code/generic_fs.py +++ b/fact_extractor/plugins/unpacking/generic_fs/code/generic_fs.py @@ -2,13 +2,12 @@ This plugin mounts filesystem images and extracts their content ''' import re +import magic from shlex import split from subprocess import run, PIPE, STDOUT from tempfile import TemporaryDirectory from time import sleep -from fact_helper_file import get_file_type_from_path - NAME = 'genericFS' MIME_PATTERNS = [ 'filesystem/btrfs', 'filesystem/dosmbr', 'filesystem/f2fs', 'filesystem/jfs', 'filesystem/minix', @@ -28,7 +27,7 @@ def unpack_function(file_path, tmp_dir): - mime_type = get_file_type_from_path(file_path)['mime'] + mime_type = magic.from_file(file_path, mime=True) if mime_type == 'filesystem/dosmbr': output = _mount_from_boot_record(file_path, tmp_dir) else: diff --git a/fact_extractor/test/data/ros_header b/fact_extractor/test/data/ros_header new file mode 100644 index 00000000..286fc793 Binary files /dev/null and b/fact_extractor/test/data/ros_header differ diff --git a/fact_extractor/test/unit/test_mime.py b/fact_extractor/test/unit/test_mime.py new file mode 100644 index 00000000..824206e3 --- /dev/null +++ b/fact_extractor/test/unit/test_mime.py @@ -0,0 +1,9 @@ +import magic +from helperFunctions.file_system import get_test_data_dir + +def test_magic(): + # Ensures that all submodules of the extractor use the custom mime types + import fact_extractor + assert magic.from_file(f"{get_test_data_dir()}/ros_header", mime=True) == "firmware/ros", "firmware-magic-database is not loaded" + + assert magic.from_file(f"{get_test_data_dir()}/container/test.zip", mime=True) == "application/zip" diff --git a/fact_extractor/unpacker/unpackBase.py b/fact_extractor/unpacker/unpackBase.py index fb0d629f..ea5c011e 100644 --- a/fact_extractor/unpacker/unpackBase.py +++ b/fact_extractor/unpacker/unpackBase.py @@ -4,9 +4,9 @@ from time import time import fnmatch from typing import Callable, Dict, List, Tuple +import magic from common_helper_files import get_files_in_dir -from fact_helper_file import get_file_type_from_path from helperFunctions.config import read_list_from_config from helperFunctions.plugin import import_plugins @@ -50,7 +50,7 @@ def get_unpacker(self, mime_type: str): return self.unpacker_plugins['generic/carver'] def extract_files_from_file(self, file_path: str, tmp_dir) -> Tuple[List, Dict]: - current_unpacker = self.get_unpacker(get_file_type_from_path(file_path)['mime']) + current_unpacker = self.get_unpacker(magic.from_file(file_path, mime=True)) return self._extract_files_from_file_using_specific_unpacker(file_path, tmp_dir, current_unpacker) def unpacking_fallback(self, file_path, tmp_dir, old_meta, fallback_plugin_mime) -> Tuple[List, Dict]: diff --git a/requirements-unpackers.txt b/requirements-unpackers.txt index 85a3b75d..8956be14 100644 --- a/requirements-unpackers.txt +++ b/requirements-unpackers.txt @@ -1,7 +1,7 @@ # FixMe: deprecated pluginbase~=1.0.1 git+https://github.com/fkie-cad/common_helper_unpacking_classifier.git -git+https://github.com/fkie-cad/fact_helper_file.git +python-magic patool~=2.2.0 # jffs2: jefferson + deps git+https://github.com/sviehb/jefferson.git@v0.4.1