From 395e7d33c9c68898dc84d4a2b1186fa1a2d8ec0d Mon Sep 17 00:00:00 2001 From: John Lugton Date: Tue, 24 Sep 2024 12:39:22 +0100 Subject: [PATCH 1/4] Move some class fields to constants/locals --- varc_core/systems/linux.py | 41 +++++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/varc_core/systems/linux.py b/varc_core/systems/linux.py index f143956..037fdc7 100644 --- a/varc_core/systems/linux.py +++ b/varc_core/systems/linux.py @@ -18,6 +18,22 @@ class IOVec(ctypes.Structure): ("iov_len", ctypes.c_size_t) ] + +_process_vm_readv = ctypes.CDLL("libc.so.6").process_vm_readv +_process_vm_readv.args = [ # type: ignore + ctypes.c_int, + ctypes.POINTER(IOVec), + ctypes.c_ulong, + ctypes.POINTER(IOVec), + ctypes.c_ulong, + ctypes.c_ulong +] +_process_vm_readv.restype = ctypes.c_ssize_t + + +_MAX_VIRTUAL_PAGE_CHUNK = 256 * 1000**2 # max number of megabytes that will be read at a time + + class LinuxSystem(BaseSystem): def __init__( @@ -29,23 +45,11 @@ def __init__( **kwargs: Any ) -> None: super().__init__(include_memory=include_memory, include_open=include_open, extract_dumps=extract_dumps, yara_file=yara_file, **kwargs) - self.libc = ctypes.CDLL("libc.so.6") - self.process_vm_readv = self.libc.process_vm_readv - self.process_vm_readv.args = [ # type: ignore - ctypes.c_int, - ctypes.POINTER(IOVec), - ctypes.c_ulong, - ctypes.POINTER(IOVec), - ctypes.c_ulong, - ctypes.c_ulong - ] - self.process_vm_readv.restype = ctypes.c_ssize_t if self.include_memory: - self._MAX_VIRTUAL_PAGE_CHUNK = 256 * 1000**2 # set max number of megabytes that will be read at a time - self.own_pid = getpid() if self.yara_file: self.yara_scan() self.dump_processes() + if self.extract_dumps: from varc_core.utils import dumpfile_extraction dumpfile_extraction.extract_dumps(Path(self.output_path)) @@ -90,7 +94,7 @@ def read_bytes(self, pid: int, address: int, byte: int) -> Optional[bytes]: io_dst = IOVec(ctypes.cast(ctypes.byref(buff), ctypes.c_void_p), byte) io_src = IOVec(ctypes.c_void_p(address), byte) - linux_syscall = self.process_vm_readv(pid, ctypes.byref(io_dst), 1, ctypes.byref(io_src), 1, 0) + linux_syscall = _process_vm_readv(pid, ctypes.byref(io_dst), 1, ctypes.byref(io_src), 1, 0) if linux_syscall == -1: return None @@ -100,12 +104,13 @@ def read_bytes(self, pid: int, address: int, byte: int) -> Optional[bytes]: def dump_processes(self) -> None: """Dumps all processes to temp files, adds temp file to output archive then removes the temp file""" archive_out = self.output_path + own_pid = getpid() with zipfile.ZipFile(archive_out, "a", compression=zipfile.ZIP_DEFLATED) as zip_file: try: for proc in tqdm(self.process_info, desc="Process dump progess", unit=" procs"): # If scanning with YARA, only dump processes if they triggered a rule if self.yara_hit_pids: - if proc["Process ID"] not in self.yara_hit_pids or proc["Process ID"] == self.own_pid: + if proc["Process ID"] not in self.yara_hit_pids or proc["Process ID"] == own_pid: continue pid = proc["Process ID"] p_name = proc["Name"] @@ -117,14 +122,14 @@ def dump_processes(self) -> None: for map in maps: page_start = map[0] page_len = map[1] - map[0] - if page_len > self._MAX_VIRTUAL_PAGE_CHUNK: - sub_chunk_count, final_chunk_size = divmod(page_len, self._MAX_VIRTUAL_PAGE_CHUNK) + if page_len > _MAX_VIRTUAL_PAGE_CHUNK: + sub_chunk_count, final_chunk_size = divmod(page_len, _MAX_VIRTUAL_PAGE_CHUNK) page_len = int(page_len / sub_chunk_count) for sc in range(0, sub_chunk_count): mem_page_content = self.read_bytes(pid, page_start, page_len) if mem_page_content: tmpfile.write(mem_page_content) - page_start = page_start + self._MAX_VIRTUAL_PAGE_CHUNK + page_start = page_start + _MAX_VIRTUAL_PAGE_CHUNK mem_page_content = self.read_bytes(pid, page_start, final_chunk_size) if mem_page_content: tmpfile.write(mem_page_content) From 0e49eff80db10371cc6a12767f90e3da44ab0e4a Mon Sep 17 00:00:00 2001 From: John Lugton Date: Tue, 24 Sep 2024 12:43:47 +0100 Subject: [PATCH 2/4] Dedup yara logic between windows/linux --- varc_core/systems/base_system.py | 12 ++++++++++++ varc_core/systems/linux.py | 18 ------------------ varc_core/systems/windows.py | 21 +-------------------- 3 files changed, 13 insertions(+), 38 deletions(-) diff --git a/varc_core/systems/base_system.py b/varc_core/systems/base_system.py index a9b7a92..cafe0b5 100644 --- a/varc_core/systems/base_system.py +++ b/varc_core/systems/base_system.py @@ -12,6 +12,7 @@ import logging import os import os.path +from pathlib import Path import socket import tarfile import time @@ -113,6 +114,15 @@ def __init__( if self.yara_file and not self.include_memory and _YARA_AVAILABLE: logging.info("YARA hits will be recorded only since include_memory is not selected.") + if self.include_memory: + if self.yara_file: + self.yara_scan() + self.dump_processes() + + if self.extract_dumps: + from varc_core.utils import dumpfile_extraction + dumpfile_extraction.extract_dumps(Path(self.output_path)) + def get_network(self) -> List[str]: """Get active network connections @@ -381,3 +391,5 @@ def yara_hit_callback(hit: dict) -> Any: else: logging.info("No YARA rules were triggered. Nothing will be written to the output archive.") + def dump_processes(self) -> None: + raise NotImplementedError() diff --git a/varc_core/systems/linux.py b/varc_core/systems/linux.py index 037fdc7..ebddf7c 100644 --- a/varc_core/systems/linux.py +++ b/varc_core/systems/linux.py @@ -35,24 +35,6 @@ class IOVec(ctypes.Structure): class LinuxSystem(BaseSystem): - - def __init__( - self, - include_memory: bool, - include_open: bool, - extract_dumps: bool, - yara_file: Optional[str], - **kwargs: Any - ) -> None: - super().__init__(include_memory=include_memory, include_open=include_open, extract_dumps=extract_dumps, yara_file=yara_file, **kwargs) - if self.include_memory: - if self.yara_file: - self.yara_scan() - self.dump_processes() - - if self.extract_dumps: - from varc_core.utils import dumpfile_extraction - dumpfile_extraction.extract_dumps(Path(self.output_path)) def parse_mem_map(self, pid: int, p_name: str) -> List[Tuple[int, int]]: """Returns a list of (start address, end address) tuples of the regions of process memory that are mapped diff --git a/varc_core/systems/windows.py b/varc_core/systems/windows.py index a858a35..5cbd455 100644 --- a/varc_core/systems/windows.py +++ b/varc_core/systems/windows.py @@ -15,27 +15,8 @@ import pymem -class WindowsSystem(BaseSystem): - """ - """ - - def __init__( - self, - include_memory: bool, - include_open: bool, - extract_dumps: bool, - yara_file: Optional[str], - **kwargs: Any - ) -> None: - super().__init__(include_memory=include_memory, include_open=include_open, extract_dumps=extract_dumps, yara_file=yara_file, **kwargs) - if self.include_memory: - if self.yara_file: - self.yara_scan() - self.dump_processes() - if self.extract_dumps: - from varc_core.utils import dumpfile_extraction - dumpfile_extraction.extract_dumps(Path(self.output_path)) +class WindowsSystem(BaseSystem): def read_process(self, handle: int, address: int) -> Tuple[Optional[bytes], int]: """ Read a process. Based on pymems pattern module From 3c0e91d498f597185acfcc7b3a51ad63e4c86dce Mon Sep 17 00:00:00 2001 From: John Lugton Date: Tue, 24 Sep 2024 13:33:27 +0100 Subject: [PATCH 3/4] Fix bug where we assume output_file is zip --- varc_core/systems/base_system.py | 76 +++++++++++++++--------------- varc_core/systems/linux.py | 80 ++++++++++++++++---------------- varc_core/systems/windows.py | 22 ++++----- 3 files changed, 88 insertions(+), 90 deletions(-) diff --git a/varc_core/systems/base_system.py b/varc_core/systems/base_system.py index cafe0b5..643f0d6 100644 --- a/varc_core/systems/base_system.py +++ b/varc_core/systems/base_system.py @@ -99,8 +99,6 @@ def __init__( raise ValueError( "Only one of Process name or Process ID (PID) can be used. Please re-run using one or the other.") - self.acquire_volatile() - if self.yara_file: if not _YARA_AVAILABLE: logging.error("YARA not available. yara-python is required and is either not installed or not functioning correctly.") @@ -113,15 +111,21 @@ def __init__( if self.yara_file and not self.include_memory and _YARA_AVAILABLE: logging.info("YARA hits will be recorded only since include_memory is not selected.") + + with self._open_output() as output: + + self.acquire_volatile(output) - if self.include_memory: - if self.yara_file: - self.yara_scan() - self.dump_processes() + if self.include_memory: + if self.yara_file: + self.yara_scan(output) + self.dump_processes(output) - if self.extract_dumps: - from varc_core.utils import dumpfile_extraction - dumpfile_extraction.extract_dumps(Path(self.output_path)) + if self.extract_dumps: + if not self.output_path.endswith('.zip'): + logging.warning('extract_dumps only supported with zip output') + from varc_core.utils import dumpfile_extraction + dumpfile_extraction.extract_dumps(Path(self.output_path)) def get_network(self) -> List[str]: """Get active network connections @@ -311,7 +315,7 @@ def take_screenshot(self) -> Optional[bytes]: logging.error("Unable to take screenshot") return None - def acquire_volatile(self) -> None: + def acquire_volatile(self, output_file: Union[zipfile.ZipFile, _TarLz4Wrapper]) -> None: """Acquire volatile data into a zip file This is called by all OS's """ @@ -327,27 +331,26 @@ def acquire_volatile(self) -> None: else: screenshot_image = None - with self._open_output() as output_file: - if screenshot_image: - output_file.writestr(f"{self.get_machine_name()}-{self.timestamp}.png", screenshot_image) - for key, value in table_data.items(): - output_file.writestr(f"{key}.json", value.encode()) - if self.network_log: - logging.info("Adding Netstat Data") - output_file.writestr("netstat.log", "\r\n".join(self.network_log).encode()) - if self.include_open and self.dumped_files: - for file_path in self.dumped_files: - logging.info(f"Adding open file {file_path}") - try: - if os.path.getsize(file_path) > _MAX_OPEN_FILE_SIZE: - logging.warning(f"Skipping file as too large {file_path}") - else: - try: - output_file.write(file_path, strip_drive(f"./collected_files/{file_path}")) - except PermissionError: - logging.warn(f"Permission denied copying {file_path}") - except FileNotFoundError: - logging.warning(f"Could not open {file_path} for reading") + if screenshot_image: + output_file.writestr(f"{self.get_machine_name()}-{self.timestamp}.png", screenshot_image) + for key, value in table_data.items(): + output_file.writestr(f"{key}.json", value.encode()) + if self.network_log: + logging.info("Adding Netstat Data") + output_file.writestr("netstat.log", "\r\n".join(self.network_log).encode()) + if self.include_open and self.dumped_files: + for file_path in self.dumped_files: + logging.info(f"Adding open file {file_path}") + try: + if os.path.getsize(file_path) > _MAX_OPEN_FILE_SIZE: + logging.warning(f"Skipping file as too large {file_path}") + else: + try: + output_file.write(file_path, strip_drive(f"./collected_files/{file_path}")) + except PermissionError: + logging.warn(f"Permission denied copying {file_path}") + except FileNotFoundError: + logging.warning(f"Could not open {file_path} for reading") def _open_output(self) -> Union[zipfile.ZipFile, _TarLz4Wrapper]: if self.output_path.endswith('.tar.lz4'): @@ -355,7 +358,7 @@ def _open_output(self) -> Union[zipfile.ZipFile, _TarLz4Wrapper]: else: return zipfile.ZipFile(self.output_path, 'a', compression=zipfile.ZIP_DEFLATED) - def yara_scan(self) -> None: + def yara_scan(self, output_file: Union[zipfile.ZipFile, _TarLz4Wrapper]) -> None: def yara_hit_callback(hit: dict) -> Any: self.yara_results.append(hit) if self.include_memory: @@ -367,7 +370,6 @@ def yara_hit_callback(hit: dict) -> Any: if not _YARA_AVAILABLE: return None - archive_out = self.output_path for proc in tqdm(self.process_info, desc="YARA scan progess", unit=" procs"): pid = proc["Process ID"] p_name = proc["Name"] @@ -385,11 +387,11 @@ def yara_hit_callback(hit: dict) -> Any: combined_yara_results = [] for yara_hit in self.yara_results: combined_yara_results.append(self.yara_hit_readable(yara_hit)) - with zipfile.ZipFile(archive_out, 'a', compression=zipfile.ZIP_DEFLATED) as zip_file: - zip_file.writestr("yara_results.json", self.dict_to_json(combined_yara_results)) - logging.info("YARA scan results written to yara_results.json in output archive.") + + output_file.writestr("yara_results.json", self.dict_to_json(combined_yara_results)) + logging.info("YARA scan results written to yara_results.json in output archive.") else: logging.info("No YARA rules were triggered. Nothing will be written to the output archive.") - def dump_processes(self) -> None: + def dump_processes(self, output_file: Union[zipfile.ZipFile, _TarLz4Wrapper]) -> None: raise NotImplementedError() diff --git a/varc_core/systems/linux.py b/varc_core/systems/linux.py index ebddf7c..3848e2f 100644 --- a/varc_core/systems/linux.py +++ b/varc_core/systems/linux.py @@ -5,10 +5,10 @@ from os import getpid, sep from pathlib import Path from tempfile import NamedTemporaryFile -from typing import Any, List, Optional, Tuple +from typing import Any, List, Optional, Tuple, Union from tqdm import tqdm -from varc_core.systems.base_system import BaseSystem +from varc_core.systems.base_system import _TarLz4Wrapper, BaseSystem # based on https://stackoverflow.com/questions/48897687/why-does-the-syscall-process-vm-readv-sets-errno-to-success and PymemLinux library @@ -83,51 +83,49 @@ def read_bytes(self, pid: int, address: int, byte: int) -> Optional[bytes]: return buff.raw - def dump_processes(self) -> None: + def dump_processes(self, output_file: Union[zipfile.ZipFile, _TarLz4Wrapper]) -> None: """Dumps all processes to temp files, adds temp file to output archive then removes the temp file""" - archive_out = self.output_path own_pid = getpid() - with zipfile.ZipFile(archive_out, "a", compression=zipfile.ZIP_DEFLATED) as zip_file: - try: - for proc in tqdm(self.process_info, desc="Process dump progess", unit=" procs"): - # If scanning with YARA, only dump processes if they triggered a rule - if self.yara_hit_pids: - if proc["Process ID"] not in self.yara_hit_pids or proc["Process ID"] == own_pid: - continue - pid = proc["Process ID"] - p_name = proc["Name"] - maps = self.parse_mem_map(pid, p_name) - if not maps: + try: + for proc in tqdm(self.process_info, desc="Process dump progess", unit=" procs"): + # If scanning with YARA, only dump processes if they triggered a rule + if self.yara_hit_pids: + if proc["Process ID"] not in self.yara_hit_pids or proc["Process ID"] == own_pid: continue - with NamedTemporaryFile(mode="w+b", buffering=0, delete=True) as tmpfile: - try: - for map in maps: - page_start = map[0] - page_len = map[1] - map[0] - if page_len > _MAX_VIRTUAL_PAGE_CHUNK: - sub_chunk_count, final_chunk_size = divmod(page_len, _MAX_VIRTUAL_PAGE_CHUNK) - page_len = int(page_len / sub_chunk_count) - for sc in range(0, sub_chunk_count): - mem_page_content = self.read_bytes(pid, page_start, page_len) - if mem_page_content: - tmpfile.write(mem_page_content) - page_start = page_start + _MAX_VIRTUAL_PAGE_CHUNK - mem_page_content = self.read_bytes(pid, page_start, final_chunk_size) - if mem_page_content: - tmpfile.write(mem_page_content) - else: + pid = proc["Process ID"] + p_name = proc["Name"] + maps = self.parse_mem_map(pid, p_name) + if not maps: + continue + with NamedTemporaryFile(mode="w+b", buffering=0, delete=True) as tmpfile: + try: + for map in maps: + page_start = map[0] + page_len = map[1] - map[0] + if page_len > _MAX_VIRTUAL_PAGE_CHUNK: + sub_chunk_count, final_chunk_size = divmod(page_len, _MAX_VIRTUAL_PAGE_CHUNK) + page_len = int(page_len / sub_chunk_count) + for sc in range(0, sub_chunk_count): mem_page_content = self.read_bytes(pid, page_start, page_len) if mem_page_content: tmpfile.write(mem_page_content) - zip_file.write(tmpfile.name, f"process_dumps{sep}{p_name}_{pid}.mem") - except PermissionError: - logging.warning(f"Permission denied opening process memory for {p_name} (pid {pid}). Cannot dump this process.") - continue - except OSError as oserror: - logging.warning(f"Error opening process memory page for {p_name} (pid {pid}). Error was {oserror}. Dump may be incomplete.") - pass - except MemoryError: - logging.warning("Exceeded available memory, skipping further memory collection") + page_start = page_start + _MAX_VIRTUAL_PAGE_CHUNK + mem_page_content = self.read_bytes(pid, page_start, final_chunk_size) + if mem_page_content: + tmpfile.write(mem_page_content) + else: + mem_page_content = self.read_bytes(pid, page_start, page_len) + if mem_page_content: + tmpfile.write(mem_page_content) + output_file.write(tmpfile.name, f"process_dumps{sep}{p_name}_{pid}.mem") + except PermissionError: + logging.warning(f"Permission denied opening process memory for {p_name} (pid {pid}). Cannot dump this process.") + continue + except OSError as oserror: + logging.warning(f"Error opening process memory page for {p_name} (pid {pid}). Error was {oserror}. Dump may be incomplete.") + pass + except MemoryError: + logging.warning("Exceeded available memory, skipping further memory collection") logging.info(f"Dumping processing has completed. Output file is located: {archive_out}") \ No newline at end of file diff --git a/varc_core/systems/windows.py b/varc_core/systems/windows.py index 5cbd455..43e35ff 100644 --- a/varc_core/systems/windows.py +++ b/varc_core/systems/windows.py @@ -5,10 +5,10 @@ from os import sep from pathlib import Path from sys import platform -from typing import Any, Optional, Tuple +from typing import Any, Optional, Tuple, Union from tqdm import tqdm -from varc_core.systems.base_system import BaseSystem +from varc_core.systems.base_system import _TarLz4Wrapper, BaseSystem if platform == "win32": # dont try to import on linux from sys import maxsize @@ -45,11 +45,10 @@ def read_process(self, handle: int, address: int) -> Tuple[Optional[bytes], int] logging.warning("Failed to read a memory page") return page_bytes, next_region - def dump_processes(self) -> None: + def dump_processes(self, output_file: Union[zipfile.ZipFile, _TarLz4Wrapper]) -> None: """ Based on pymem's 'Pattern' module """ - archive_out = self.output_path for proc in tqdm(self.process_info, desc="Process dump progess", unit=" procs"): # If scanning with YARA, only dump processes if they triggered a rule if self.yara_hit_pids: @@ -74,12 +73,11 @@ def dump_processes(self) -> None: # Dump all pages the process virtual address space next_region = 0 - with zipfile.ZipFile(archive_out, 'a', compression=zipfile.ZIP_DEFLATED) as zip_file: - with tempfile.NamedTemporaryFile(mode="w+b", buffering=0, delete=False) as tmpfile: - while next_region < user_space_limit: - proc_page_bytes, next_region = self.read_process(p.process_handle, next_region) - if proc_page_bytes: - tmpfile.write(proc_page_bytes) - zip_file.write(tmpfile.name, f"process_dumps{sep}{p_name}_{pid}.mem") - del_file(tmpfile.name) + with tempfile.NamedTemporaryFile(mode="w+b", buffering=0, delete=False) as tmpfile: + while next_region < user_space_limit: + proc_page_bytes, next_region = self.read_process(p.process_handle, next_region) + if proc_page_bytes: + tmpfile.write(proc_page_bytes) + output_file.write(tmpfile.name, f"process_dumps{sep}{p_name}_{pid}.mem") + del_file(tmpfile.name) logging.info(f"Dumping processing has completed. Output file is located: {archive_out}") From 75635602a23b71a0761c40b8c422b140f042aba1 Mon Sep 17 00:00:00 2001 From: John Lugton Date: Tue, 24 Sep 2024 14:20:09 +0100 Subject: [PATCH 4/4] Put inits back --- varc_core/systems/linux.py | 10 ++++++++++ varc_core/systems/windows.py | 10 ++++++++++ 2 files changed, 20 insertions(+) diff --git a/varc_core/systems/linux.py b/varc_core/systems/linux.py index 3848e2f..1192ba6 100644 --- a/varc_core/systems/linux.py +++ b/varc_core/systems/linux.py @@ -36,6 +36,16 @@ class IOVec(ctypes.Structure): class LinuxSystem(BaseSystem): + def __init__( + self, + include_memory: bool, + include_open: bool, + extract_dumps: bool, + yara_file: Optional[str], + **kwargs: Any + ) -> None: + super().__init__(include_memory=include_memory, include_open=include_open, extract_dumps=extract_dumps, yara_file=yara_file, **kwargs) + def parse_mem_map(self, pid: int, p_name: str) -> List[Tuple[int, int]]: """Returns a list of (start address, end address) tuples of the regions of process memory that are mapped diff --git a/varc_core/systems/windows.py b/varc_core/systems/windows.py index 43e35ff..719dd99 100644 --- a/varc_core/systems/windows.py +++ b/varc_core/systems/windows.py @@ -18,6 +18,16 @@ class WindowsSystem(BaseSystem): + def __init__( + self, + include_memory: bool, + include_open: bool, + extract_dumps: bool, + yara_file: Optional[str], + **kwargs: Any + ) -> None: + super().__init__(include_memory=include_memory, include_open=include_open, extract_dumps=extract_dumps, yara_file=yara_file, **kwargs) + def read_process(self, handle: int, address: int) -> Tuple[Optional[bytes], int]: """ Read a process. Based on pymems pattern module