Skip to content

Commit

Permalink
Added artifacts filter support for files and Windows Registry #1313
Browse files Browse the repository at this point in the history
  • Loading branch information
jnettesheim committed Jun 9, 2018
1 parent 12454e4 commit 134a34c
Show file tree
Hide file tree
Showing 25 changed files with 372 additions and 823 deletions.
5 changes: 5 additions & 0 deletions config/end-to-end.ini
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@ tagging_file=data/tag_windows.txt
case=image_export
source=test_data/image.qcow2

[image_export_with_artifact_filters]
case=image_export
artifact_filters=data/windows_artifact_filters
source=test_data/image.qcow2

[image_export_with_filter_file]
case=image_export
filter_file=data/filter_windows.txt
Expand Down
2 changes: 2 additions & 0 deletions plaso/cli/extraction_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ def _CreateProcessingConfiguration(self, knowledge_base):
"""
# TODO: pass preferred_encoding.
configuration = configurations.ProcessingConfiguration()
configuration.artifact_filters = self._artifact_filters
configuration.artifacts_registry = self._artifacts_registry
configuration.credentials = self._credential_configurations
configuration.debug_output = self._debug_mode
configuration.event_extraction.text_prepend = self._text_prepend
Expand Down
1 change: 1 addition & 0 deletions plaso/cli/helpers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from plaso.cli.helpers import analysis_plugins
from plaso.cli.helpers import artifact_definitions
from plaso.cli.helpers import artifact_filters
from plaso.cli.helpers import data_location
from plaso.cli.helpers import date_filters
from plaso.cli.helpers import dynamic_output
Expand Down
17 changes: 17 additions & 0 deletions plaso/cli/helpers/artifact_definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,15 @@ def AddArguments(cls, argument_group):
'quickly collect data of interest, such as specific files or '
'Windows Registry keys.'))

argument_group.add_argument(
'--custom_artifact_definitions', '--custom-artifact-definitions',
dest='custom_artifact_definitions_path', type=str, metavar='PATH',
action='store', help=(
'Path to a file containing custom artifact definitions, which are '
'.yaml files. Artifact definitions can be used to describe and '
'quickly collect data of interest, such as specific files or '
'Windows Registry keys.'))

@classmethod
def ParseOptions(cls, options, configuration_object):
"""Parses and validates options.
Expand Down Expand Up @@ -86,11 +95,19 @@ def ParseOptions(cls, options, configuration_object):
raise errors.BadConfigOption(
'Unable to determine path to artifact definitions.')

custom_artifacts_path = getattr(
options, 'custom_artifact_definitions_path', None)

registry = artifacts_registry.ArtifactDefinitionsRegistry()
reader = artifacts_reader.YamlArtifactsReader()

try:
registry.ReadFromDirectory(reader, artifacts_path)
if custom_artifacts_path and os.path.isfile(custom_artifacts_path):
registry.ReadFromFile(reader, custom_artifacts_path)
elif custom_artifacts_path and not os.path.isfile(custom_artifacts_path):
raise errors.BadConfigOption(
'No such artifacts filter file: {0:s}.'.format(custom_artifacts_path))

except (KeyError, artifacts_errors.FormatError) as exception:
raise errors.BadConfigOption((
Expand Down
77 changes: 77 additions & 0 deletions plaso/cli/helpers/artifact_filters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# -*- coding: utf-8 -*-
"""The artifacts filter file CLI arguments helper."""

from __future__ import unicode_literals

import os

from plaso.cli import tools
from plaso.cli.helpers import interface
from plaso.cli.helpers import manager
from plaso.lib import errors


class ArtifactFiltersArgumentsHelper(interface.ArgumentsHelper):
"""Artifacts filter file CLI arguments helper."""

NAME = 'artifact_filters'
DESCRIPTION = 'Artifact filters command line arguments.'

@classmethod
def AddArguments(cls, argument_group):
"""Adds command line arguments to an argument group.
This function takes an argument parser or an argument group object and adds
to it all the command line arguments this helper supports.
Args:
argument_group (argparse._ArgumentGroup|argparse.ArgumentParser):
argparse group.
"""
argument_group.add_argument(
'--artifact_filters', '--artifact-filters',
dest='artifact_filters', type=str, default=None,
action='store', help=(
'Names of forensic artifact definitions, provided in the following'
'formats. (1) Directly on the command line (comma separated), in a'
'in a file with one artifact name per line, or one operating system'
'specific keyword which will process all artifacts supporting that'
'OS (windows, linux, darwin). Forensic artifacts are stored '
'in .yaml files that are directly pulled from the artifact '
'definitions project. You can also specify a custom artifacts yaml'
'file (see --custom_artifact_definitions). Artifact definitions '
'can be used to describe and quickly collect data of interest, such'
' as specific files or Windows Registry keys.'))

@classmethod
def ParseOptions(cls, options, configuration_object):
"""Parses and validates options.
Args:
options (argparse.Namespace): parser options.
configuration_object (CLITool): object to be configured by the argument
helper.
Raises:
BadConfigObject: when the configuration object is of the wrong type.
BadConfigOption: if the required artifact definitions are not defined.
"""

if not isinstance(configuration_object, tools.CLITool):
raise errors.BadConfigObject(
'Configuration object is not an instance of CLITool')

artifact_filters = cls._ParseStringOption(
options, 'artifact_filters').lower()

if artifact_filters and os.path.isfile(artifact_filters):
with open(artifact_filters) as f:
artifact_filters = f.read().splitlines()
elif artifact_filters:
artifact_filters = artifact_filters.split(',')

setattr(configuration_object, '_artifact_filters',
artifact_filters)


manager.ArgumentHelperManager.RegisterHelper(ArtifactFiltersArgumentsHelper)
28 changes: 17 additions & 11 deletions plaso/cli/image_export_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import os
import textwrap


from dfvfs.helpers import file_system_searcher
from dfvfs.lib import errors as dfvfs_errors
from dfvfs.path import factory as path_spec_factory
Expand All @@ -16,9 +17,9 @@
from plaso.analyzers.hashers import manager as hashers_manager
from plaso.cli import logger
from plaso.cli import storage_media_tool
from plaso.cli import tools
from plaso.cli.helpers import manager as helpers_manager
from plaso.engine import extractors
from plaso.engine import filter_file
from plaso.engine import knowledge_base
from plaso.engine import path_helper
from plaso.filters import file_entry as file_entry_filters
Expand Down Expand Up @@ -73,6 +74,7 @@ def __init__(self, input_reader=None, output_writer=None):
super(ImageExportTool, self).__init__(
input_reader=input_reader, output_writer=output_writer)
self._abort = False
self._artifact_filters = None
self._artifacts_registry = None
self._destination_path = None
self._digests = {}
Expand Down Expand Up @@ -287,7 +289,8 @@ def _ExtractFileEntry(
# TODO: merge with collector and/or engine.
def _ExtractWithFilter(
self, source_path_specs, destination_path, output_writer,
filter_file_path, skip_duplicates=True):
artifacts_registry, artifact_filters_path, filter_file_path,
skip_duplicates=True):
"""Extracts files using a filter expression.
This method runs the file extraction process on the image and
Expand All @@ -297,8 +300,12 @@ def _ExtractWithFilter(
source_path_specs (list[dfvfs.PathSpec]): path specifications to extract.
destination_path (str): path where the extracted files should be stored.
output_writer (CLIOutputWriter): output writer.
artifacts_registry (ArtifactRegistry): Artifacts registry object.
artifact_filters_path (str): path of the file that contains the
names of the artifacts filter definitions or definitions directly
listed comma separated.
filter_file_path (str): path of the file that contains the filter
expressions.
expressions or artifact definitions.
skip_duplicates (Optional[bool]): True if files with duplicate content
should be skipped.
"""
Expand All @@ -314,10 +321,8 @@ def _ExtractWithFilter(
output_writer.Write(
'Extracting file entries from: {0:s}\n'.format(display_name))

environment_variables = self._knowledge_base.GetEnvironmentVariables()
filter_file_object = filter_file.FilterFile(filter_file_path)
find_specs = filter_file_object.BuildFindSpecs(
environment_variables=environment_variables)
find_specs = tools.FindSpecsGetter().GetFindSpecs(artifacts_registry,
artifact_filters_path, filter_file_path, self._knowledge_base)

searcher = file_system_searcher.FileSystemSearcher(
file_system, mount_point)
Expand Down Expand Up @@ -399,7 +404,7 @@ def _ParseFilterOptions(self, options):
Raises:
BadConfigOption: if the options are invalid.
"""
names = ['date_filters', 'filter_file']
names = ['artifact_filters', 'date_filters', 'filter_file']
helpers_manager.ArgumentHelperManager.ParseOptions(
options, self, names=names)

Expand All @@ -416,7 +421,7 @@ def _ParseFilterOptions(self, options):
except (IOError, ValueError) as exception:
raise errors.BadConfigOption(exception)

if self._filter_file:
if self._artifact_filters or self._filter_file:
self.has_filters = True
else:
self.has_filters = self._filter_collection.HasFilters()
Expand Down Expand Up @@ -559,7 +564,7 @@ def AddFilterOptions(self, argument_group):
Args:
argument_group (argparse._ArgumentGroup): argparse argument group.
"""
names = ['date_filters', 'filter_file']
names = ['artifact_filters', 'date_filters', 'filter_file']
helpers_manager.ArgumentHelperManager.AddCommandLineArguments(
argument_group, names=names)

Expand Down Expand Up @@ -749,9 +754,10 @@ def ProcessSources(self):
if not os.path.isdir(self._destination_path):
os.makedirs(self._destination_path)

if self._filter_file:
if self._artifact_filters or self._filter_file:
self._ExtractWithFilter(
self._source_path_specs, self._destination_path, self._output_writer,
self._artifacts_registry, self._artifact_filters,
self._filter_file, skip_duplicates=self._skip_duplicates)
else:
self._Extract(
Expand Down
24 changes: 12 additions & 12 deletions plaso/cli/log2timeline_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
from plaso.cli import views
from plaso.cli.helpers import manager as helpers_manager
from plaso.engine import engine
from plaso.engine import filter_file
from plaso.engine import single_process as single_process_engine
from plaso.lib import definitions
from plaso.lib import errors
Expand Down Expand Up @@ -167,7 +166,8 @@ def ParseArguments(self):
'extraction arguments')

argument_helper_names = [
'extraction', 'filter_file', 'hashers', 'parsers', 'yara_rules']
'artifact_filters', 'extraction', 'filter_file', 'hashers',
'parsers', 'yara_rules']
helpers_manager.ArgumentHelperManager.AddCommandLineArguments(
extraction_group, names=argument_helper_names)

Expand Down Expand Up @@ -317,8 +317,9 @@ def ParseOptions(self, options):
self._ParseInformationalOptions(options)

argument_helper_names = [
'artifact_definitions', 'extraction', 'filter_file', 'status_view',
'storage_file', 'storage_format', 'text_prepend', 'yara_rules']
'artifact_definitions', 'artifact_filters', 'extraction',
'filter_file', 'status_view', 'storage_file', 'storage_format',
'text_prepend', 'yara_rules']
helpers_manager.ArgumentHelperManager.ParseOptions(
options, self, names=argument_helper_names)

Expand Down Expand Up @@ -370,7 +371,9 @@ def ExtractEventsFromSources(self):

self._status_view.SetMode(self._status_view_mode)
self._status_view.SetSourceInformation(
self._source_path, self._source_type, filter_file=self._filter_file)
self._source_path, self._source_type,
artifact_filters=self._artifact_filters,
filter_file=self._filter_file)

status_update_callback = (
self._status_view.GetExtractionStatusUpdateCallback())
Expand All @@ -380,6 +383,7 @@ def ExtractEventsFromSources(self):
self._output_writer.Write('Processing started.\n')

session = engine.BaseEngine.CreateSession(
artifact_filters=self._artifact_filters,
command_line_arguments=self._command_line_arguments,
debug_mode=self._debug_mode,
filter_file=self._filter_file,
Expand Down Expand Up @@ -415,13 +419,9 @@ def ExtractEventsFromSources(self):
self._SetExtractionParsersAndPlugins(configuration, session)
self._SetExtractionPreferredTimeZone(extraction_engine.knowledge_base)

filter_find_specs = None
if configuration.filter_file:
environment_variables = (
extraction_engine.knowledge_base.GetEnvironmentVariables())
filter_file_object = filter_file.FilterFile(configuration.filter_file)
filter_find_specs = filter_file_object.BuildFindSpecs(
environment_variables=environment_variables)
filter_find_specs = tools.FindSpecsGetter().GetFindSpecs(
configuration.artifacts_registry, configuration.artifact_filters,
configuration.filter_file, extraction_engine.knowledge_base)

processing_status = None
if single_process_mode:
Expand Down
2 changes: 2 additions & 0 deletions plaso/cli/pinfo_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,7 @@ def _PrintSessionsDetails(self, storage):
command_line_arguments = session.command_line_arguments or 'N/A'
parser_filter_expression = session.parser_filter_expression or 'N/A'
preferred_encoding = session.preferred_encoding or 'N/A'
artifact_filters = session.artifact_filters or 'N/A'
filter_file = session.filter_file or 'N/A'

title = 'Session: {0!s}'.format(session_identifier)
Expand All @@ -368,6 +369,7 @@ def _PrintSessionsDetails(self, storage):
table_view.AddRow(['Enabled parser and plugins', enabled_parser_names])
table_view.AddRow(['Preferred encoding', preferred_encoding])
table_view.AddRow(['Debug mode', session.debug_mode])
table_view.AddRow(['Artifact filters', artifact_filters])
table_view.AddRow(['Filter file', filter_file])

table_view.Write(self._output_writer)
Expand Down
20 changes: 10 additions & 10 deletions plaso/cli/psteal_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@
from plaso.cli import logger
from plaso.cli import status_view
from plaso.cli import tool_options
from plaso.cli import tools
from plaso.cli import views
from plaso.cli.helpers import manager as helpers_manager
from plaso.engine import engine
from plaso.engine import filter_file
from plaso.engine import knowledge_base
from plaso.engine import single_process as single_process_engine
from plaso.lib import errors
Expand Down Expand Up @@ -273,7 +273,9 @@ def ExtractEventsFromSources(self):

self._status_view.SetMode(self._status_view_mode)
self._status_view.SetSourceInformation(
self._source_path, source_type, filter_file=self._filter_file)
self._source_path, source_type,
artifact_filters=self._artifact_filters,
filter_file=self._filter_file)

status_update_callback = (
self._status_view.GetExtractionStatusUpdateCallback())
Expand All @@ -283,6 +285,7 @@ def ExtractEventsFromSources(self):
self._output_writer.Write('Processing started.\n')

session = engine.BaseEngine.CreateSession(
artifact_filters=self._artifact_filters,
command_line_arguments=self._command_line_arguments,
filter_file=self._filter_file,
preferred_encoding=self.preferred_encoding,
Expand Down Expand Up @@ -317,13 +320,9 @@ def ExtractEventsFromSources(self):
self._SetExtractionParsersAndPlugins(configuration, session)
self._SetExtractionPreferredTimeZone(extraction_engine.knowledge_base)

filter_find_specs = None
if configuration.filter_file:
environment_variables = (
extraction_engine.knowledge_base.GetEnvironmentVariables())
filter_file_object = filter_file.FilterFile(configuration.filter_file)
filter_find_specs = filter_file_object.BuildFindSpecs(
environment_variables=environment_variables)
filter_find_specs = tools.FindSpecsGetter().GetFindSpecs(
configuration.artifacts_registry, configuration.artifact_filters,
configuration.filter_file, extraction_engine.knowledge_base)

processing_status = None
if single_process_mode:
Expand Down Expand Up @@ -450,7 +449,8 @@ def ParseOptions(self, options):
self._ParseTimezoneOption(options)

argument_helper_names = [
'artifact_definitions', 'hashers', 'language', 'parsers']
'artifact_definitions', 'custom_artifact_definitions',
'hashers', 'language', 'parsers']
helpers_manager.ArgumentHelperManager.ParseOptions(
options, self, names=argument_helper_names)

Expand Down
Loading

0 comments on commit 134a34c

Please sign in to comment.