diff --git a/config/dpkg/changelog b/config/dpkg/changelog index 5726dcadad..94449aee62 100644 --- a/config/dpkg/changelog +++ b/config/dpkg/changelog @@ -2,4 +2,4 @@ python-plaso (1.3.1-1) unstable; urgency=low * Auto-generated - -- Log2Timeline Tue, 22 Sep 2015 23:05:20 +0200 + -- Log2Timeline Wed, 23 Sep 2015 22:56:35 +0200 diff --git a/plaso/__init__.py b/plaso/__init__.py index 9040bc8fc5..dbe404b1cc 100644 --- a/plaso/__init__.py +++ b/plaso/__init__.py @@ -3,7 +3,7 @@ __version__ = '1.3.1' VERSION_DEV = True -VERSION_DATE = '20150922' +VERSION_DATE = '20150923' def GetVersion(): diff --git a/plaso/dependencies.py b/plaso/dependencies.py index b9f6bd0f5e..75d3534575 100644 --- a/plaso/dependencies.py +++ b/plaso/dependencies.py @@ -16,6 +16,7 @@ u'pyevt': 20120410, u'pyevtx': 20141112, u'pyewf': 20131210, + u'pyfsntfs': 20150829, u'pyfwsi': 20150606, u'pylnk': 20150830, u'pymsiecf': 20150314, @@ -38,7 +39,7 @@ (u'binplist', u'__version__', u'0.1.4', None), (u'construct', u'__version__', u'2.5.2', None), (u'dateutil', u'__version__', u'1.5', None), - (u'dfvfs', u'__version__', u'20150630', None), + (u'dfvfs', u'__version__', u'20150829', None), (u'dpkt', u'__version__', u'1.8', None), # The protobuf module does not appear to have version information. (u'google.protobuf', u'', u'', None), diff --git a/plaso/engine/collector.py b/plaso/engine/collector.py index 3a64d0b999..dfc1e20894 100644 --- a/plaso/engine/collector.py +++ b/plaso/engine/collector.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- """Generic collector that supports both file system and image files.""" +import copy import hashlib import logging @@ -24,11 +25,11 @@ def __init__(self, path_spec_queue, resolver_context=None): as a path specification (instance of dfvfs.PathSpec). Args: - path_spec_queue: The path specification queue (instance of Queue). + path_spec_queue: the path specification queue (instance of Queue). This queue contains path specifications (instances of dfvfs.PathSpec) of the file entries that need to be processed. - resolver_context: Optional resolver context (instance of dfvfs.Context). + resolver_context: optional resolver context (instance of dfvfs.Context). The default is None. """ super(Collector, self).__init__(path_spec_queue) @@ -43,8 +44,8 @@ def _ProcessFileSystem(self, path_spec, find_specs=None): """Processes a file system within a storage media image. Args: - path_spec: The path specification of the root of the file system. - find_specs: Optional list of find specifications (instances of + path_spec: the path specification of the root of the file system. + find_specs: optional list of find specifications (instances of dfvfs.FindSpec). The default is None. """ try: @@ -141,7 +142,7 @@ def SetCollectDirectoryMetadata(self, collect_directory_metadata): """Sets the collect directory metadata flag. Args: - collect_directory_metadata: Boolean value to indicate to collect + collect_directory_metadata: boolean value to indicate to collect directory metadata. """ self._fs_collector.SetCollectDirectoryMetadata(collect_directory_metadata) @@ -150,7 +151,7 @@ def SetFilter(self, filter_find_specs): """Sets the collection filter find specifications. Args: - filter_find_specs: List of filter find specifications (instances of + filter_find_specs: list of filter find specifications (instances of dfvfs.FindSpec). """ self._filter_find_specs = filter_find_specs @@ -172,7 +173,7 @@ def __init__(self, path_spec_queue): as a path specification (instance of dfvfs.PathSpec). Args: - path_spec_queue: The path specification queue (instance of Queue). + path_spec_queue: the path specification queue (instance of Queue). This queue contains path specifications (instances of dfvfs.PathSpec) of the file entries that need to be processed. @@ -182,21 +183,11 @@ def __init__(self, path_spec_queue): self._duplicate_file_check = False self._hashlist = {} - self.number_of_file_entries = 0 - - def __enter__(self): - """Enters a with statement.""" - return self - - def __exit__(self, unused_type, unused_value, unused_traceback): - """Exits a with statement.""" - return - def _CalculateNTFSTimeHash(self, file_entry): """Return a hash value calculated from a NTFS file's metadata. Args: - file_entry: The file entry (instance of TSKFileEntry). + file_entry: the file entry (instance of TSKFileEntry). Returns: A hash value (string) that can be used to determine if a file's timestamp @@ -205,26 +196,53 @@ def _CalculateNTFSTimeHash(self, file_entry): stat_object = file_entry.GetStat() ret_hash = hashlib.md5() - ret_hash.update('atime:{0:d}.{1:d}'.format( - getattr(stat_object, 'atime', 0), - getattr(stat_object, 'atime_nano', 0))) + ret_hash.update(b'atime:{0:d}.{1:d}'.format( + getattr(stat_object, u'atime', 0), + getattr(stat_object, u'atime_nano', 0))) - ret_hash.update('crtime:{0:d}.{1:d}'.format( - getattr(stat_object, 'crtime', 0), - getattr(stat_object, 'crtime_nano', 0))) + ret_hash.update(b'crtime:{0:d}.{1:d}'.format( + getattr(stat_object, u'crtime', 0), + getattr(stat_object, u'crtime_nano', 0))) - ret_hash.update('mtime:{0:d}.{1:d}'.format( - getattr(stat_object, 'mtime', 0), - getattr(stat_object, 'mtime_nano', 0))) + ret_hash.update(b'mtime:{0:d}.{1:d}'.format( + getattr(stat_object, u'mtime', 0), + getattr(stat_object, u'mtime_nano', 0))) - ret_hash.update('ctime:{0:d}.{1:d}'.format( - getattr(stat_object, 'ctime', 0), - getattr(stat_object, 'ctime_nano', 0))) + ret_hash.update(b'ctime:{0:d}.{1:d}'.format( + getattr(stat_object, u'ctime', 0), + getattr(stat_object, u'ctime_nano', 0))) return ret_hash.hexdigest() + def _ProcessDataStreams(self, file_entry): + """Processes the data streams in a file entry. + + Args: + file_entry: a file entry (instance of dfvfs.FileEntry). + """ + produced_main_path_spec = False + for data_stream in file_entry.data_streams: + # Make a copy so we don't make the changes on a path specification + # directly. Otherwise already produced path specifications can be + # altered in the process. + path_spec = copy.deepcopy(file_entry.path_spec) + setattr(path_spec, u'data_stream', data_stream.name) + self.ProduceItem(path_spec) + + if not data_stream.name: + produced_main_path_spec = True + + if (not produced_main_path_spec and ( + not file_entry.IsDirectory() or self._collect_directory_metadata)): + self.ProduceItem(file_entry.path_spec) + def _ProcessDirectory(self, file_entry): - """Processes a directory and extract its metadata if necessary.""" + """Processes a directory and extract its metadata if necessary. + + Args: + file_entry: a file entry (instance of dfvfs.FileEntry) that refers + to the directory to process. + """ # Need to do a breadth-first search otherwise we'll hit the Python # maximum recursion depth. sub_directories = [] @@ -250,12 +268,6 @@ def _ProcessDirectory(self, file_entry): continue if sub_file_entry.IsDirectory(): - # This check is here to improve performance by not producing - # path specifications that don't get processed. - if self._collect_directory_metadata: - self.ProduceItem(sub_file_entry.path_spec) - self.number_of_file_entries += 1 - sub_directories.append(sub_file_entry) elif sub_file_entry.IsFile(): @@ -266,15 +278,14 @@ def _ProcessDirectory(self, file_entry): if self._duplicate_file_check: hash_value = self._CalculateNTFSTimeHash(sub_file_entry) - inode = getattr(sub_file_entry.path_spec, 'inode', 0) + inode = getattr(sub_file_entry.path_spec, u'inode', 0) if inode in self._hashlist: if hash_value in self._hashlist[inode]: continue self._hashlist.setdefault(inode, []).append(hash_value) - self.ProduceItem(sub_file_entry.path_spec) - self.number_of_file_entries += 1 + self._ProcessDataStreams(sub_file_entry) for sub_file_entry in sub_directories: if self._abort: @@ -291,9 +302,9 @@ def Collect(self, file_system, path_spec, find_specs=None): """Collects files from the file system. Args: - file_system: The file system (instance of dfvfs.FileSystem). - path_spec: The path specification (instance of dfvfs.PathSpec). - find_specs: Optional list of find specifications (instances of + file_system: the file system (instance of dfvfs.FileSystem). + path_spec: the path specification (instance of dfvfs.PathSpec). + find_specs: optional list of find specifications (instances of dfvfs.FindSpec). The default is None. """ if find_specs: @@ -304,7 +315,6 @@ def Collect(self, file_system, path_spec, find_specs=None): return self.ProduceItem(path_spec) - self.number_of_file_entries += 1 else: file_entry = file_system.GetFileEntryByPathSpec(path_spec) @@ -315,7 +325,7 @@ def SetCollectDirectoryMetadata(self, collect_directory_metadata): """Sets the collect directory metadata flag. Args: - collect_directory_metadata: Boolean value to indicate to collect + collect_directory_metadata: boolean value to indicate to collect directory metadata. """ self._collect_directory_metadata = collect_directory_metadata diff --git a/plaso/engine/worker.py b/plaso/engine/worker.py index 616a77b1e6..f9222ae693 100644 --- a/plaso/engine/worker.py +++ b/plaso/engine/worker.py @@ -25,15 +25,35 @@ class BaseEventExtractionWorker(queue.ItemQueueConsumer): """Class that defines the event extraction worker base. This class is designed to watch a queue for path specifications of files - and directories (file entries) for which events need to be extracted. + and directories (file entries) and data streams for which events need to + be extracted. The event extraction worker needs to determine if a parser suitable - for parsing a particular file is available. All extracted event objects - are pushed on a storage queue for further processing. + for parsing a particular file entry or data stream is available. All + extracted event objects are pushed on a storage queue for further processing. """ _DEFAULT_HASH_READ_SIZE = 4096 + # TSK metadata files that need special handling. + _METADATA_FILE_LOCATIONS_TSK = frozenset([ + u'/$AttrDef', + u'/$BadClus', + u'/$Bitmap', + u'/$Boot', + u'/$Extend/$ObjId', + u'/$Extend/$Quota', + u'/$Extend/$Reparse', + u'/$Extend/$RmMetadata/$Repair', + u'/$Extend/$RmMetadata/$TxfLog/$Tops', + u'/$LogFile', + u'/$MFT', + u'/$MFTMirr', + u'/$Secure', + u'/$UpCase', + u'/$Volume', + ]) + def __init__( self, identifier, path_spec_queue, event_queue_producer, parse_error_queue_producer, parser_mediator, resolver_context=None): @@ -102,33 +122,20 @@ def _ConsumeItem(self, path_spec, **unused_kwargs): self._ProcessPathSpec(self._compressed_stream_path_spec) self._compressed_stream_path_spec = None - def _GetSignatureMatchParserNames(self, file_entry): - """Determines if a file matches one of the known signatures. + def _GetSignatureMatchParserNames(self, file_object): + """Determines if a file-like object matches one of the known signatures. Args: - file_entry: A file entry object (instance of dfvfs.FileEntry). + file_object: the file-like object whose contents will be checked + for known signatures. Returns: A list of parser names for which the file entry matches their known signatures. - - Raises: - IOError: if scanning for signatures failed. """ parser_name_list = [] scan_state = pysigscan.scan_state() - - file_object = file_entry.GetFileObject() - try: - self._file_scanner.scan_file_object(scan_state, file_object) - except IOError as exception: - raise IOError( - u'Unable to scan for signatures with error: {0:s}'.format(exception)) - finally: - file_object.close() - - # Make sure frame.f_locals does not keep a reference to file_entry. - file_entry = None + self._file_scanner.scan_file_object(scan_state, file_object) for scan_result in scan_state.scan_results: format_specification = ( @@ -140,66 +147,81 @@ def _GetSignatureMatchParserNames(self, file_entry): return parser_name_list - def _HashFileEntry(self, file_entry): - """Hashes the contents of a file entry. + def _HashDataStream(self, file_entry, data_stream_name=u''): + """Hashes the contents of a specific data stream of a file entry. - The resulting digest hashes are set in the parser mediator as attribute - to add to event objects. + The resulting digest hashes are set in the parser mediator as attributes + that are added to produced event objects. Note that some file systems + allow directories to have data streams, e.g. NTFS. Args: - file_entry: The file entry object to be hashed (instance of + file_entry: the file entry relating to the data to be hashed (instance of dfvfs.FileEntry) + data_stream_name: optional data stream name. The default is + an empty string which represents the default + data stream. """ - if not file_entry.IsFile() or not self._hasher_names: + if not self._hasher_names: return - logging.debug(u'[HashFileEntry] hashing file: {0:s}'.format( + logging.debug(u'[HashDataStream] hashing file: {0:s}'.format( self._current_display_name)) - hasher_objects = hashers_manager.HashersManager.GetHasherObjects( - self._hasher_names) - - file_object = file_entry.GetFileObject() - try: - file_object.seek(0, os.SEEK_SET) + file_object = file_entry.GetFileObject(data_stream_name=data_stream_name) + if not file_object: + return - # We only do one read, then pass it to each of the hashers in turn. - data = file_object.read(self._DEFAULT_HASH_READ_SIZE) - while data: - for hasher in hasher_objects: - hasher.Update(data) - data = file_object.read(self._DEFAULT_HASH_READ_SIZE) + # Make sure frame.f_locals does not keep a reference to file_entry. + file_entry = None + try: + digest_hashes = hashers_manager.HashersManager.HashFileObject( + self._hasher_names, file_object, + buffer_size=self._DEFAULT_HASH_READ_SIZE) finally: file_object.close() - # Make sure frame.f_locals does not keep a reference to file_entry. - file_entry = None - - # Get the digest values for every active hasher. - digests = {} - for hasher in hasher_objects: - digests[hasher.NAME] = hasher.GetStringDigest() - logging.debug( - u'[HashFileEntry] digest {0:s} calculated for file: {1:s}.'.format( - hasher.GetStringDigest(), self._current_display_name)) - if self._enable_profiling: self._ProfilingSampleMemory() - for hash_name, digest in iter(digests.items()): + for hash_name, digest_hash_string in iter(digest_hashes.items()): attribute_name = u'{0:s}_hash'.format(hash_name) - self._parser_mediator.AddEventAttribute(attribute_name, digest) + self._parser_mediator.AddEventAttribute( + attribute_name, digest_hash_string) - logging.debug(u'[HashFileEntry] completed hashing file: {0:s}'.format( - self._current_display_name)) + logging.debug( + u'[HashDataStream] digest {0:s} calculated for file: {1:s}.'.format( + digest_hash_string, self._current_display_name)) + + logging.debug( + u'[HashDataStream] completed hashing file: {0:s}'.format( + self._current_display_name)) + + def _IsMetadataFile(self, file_entry): + """Determines if the file entry is a metadata file. + + Args: + file_entry: a file entry object (instance of dfvfs.FileEntry). + + Returns: + A boolean value indicating if the file entry is a metadata file. + """ + if (file_entry.type_indicator == dfvfs_definitions.TYPE_INDICATOR_TSK and + file_entry.path_spec.location in self._METADATA_FILE_LOCATIONS_TSK): + return True - def _ParseFileEntryWithParser(self, parser_object, file_entry): + return False + + def _ParseFileEntryWithParser( + self, parser_object, file_entry, file_object=None): """Parses a file entry with a specific parser. Args: - parser_object: A parser object (instance of BaseParser). - file_entry: A file entry object (instance of dfvfs.FileEntry). + parser_object: a parser object (instance of BaseParser). + file_entry: a file entry object (instance of dfvfs.FileEntry). + file_object: optional file-like object to parse. If not set the parser + will use the parser mediator to open the file entry's + default data stream as a file-like object """ self._parser_mediator.ClearParserChain() @@ -210,7 +232,8 @@ def _ParseFileEntryWithParser(self, parser_object, file_entry): self._parsers_profiler.StartTiming(parser_object.NAME) try: - parser_object.UpdateChainAndParse(self._parser_mediator) + parser_object.UpdateChainAndParse( + self._parser_mediator, file_object=file_object) # We catch the IOError so we can determine the parser that generated # the error. @@ -375,11 +398,51 @@ def _ProcessCompressedStreamFile(self, file_entry): return True - def _ProcessFileEntry(self, file_entry): - """Processes a file entry. + def _ProcessDataStream(self, file_entry, data_stream_name=u''): + """Processes a specific data stream of a file entry. Args: file_entry: A file entry object (instance of dfvfs.FileEntry). + data_stream_name: optional data stream name. The default is + an empty string which represents the default + data stream. + """ + file_object = file_entry.GetFileObject(data_stream_name=data_stream_name) + if not file_object: + return + + try: + parser_name_list = self._GetSignatureMatchParserNames(file_object) + if not parser_name_list: + parser_name_list = self._non_sigscan_parser_names + + for parser_name in parser_name_list: + parser_object = self._parser_objects.get(parser_name, None) + if not parser_object: + logging.warning(u'No such parser: {0:s}'.format(parser_name)) + continue + + logging.debug(( + u'[ProcessDataStream] parsing file: {0:s} with parser: ' + u'{1:s}').format(self._current_display_name, parser_name)) + + self._ParseFileEntryWithParser( + parser_object, file_entry, file_object=file_object) + + finally: + file_object.close() + + # Make sure frame.f_locals does not keep a reference to file_entry. + file_entry = None + + def _ProcessFileEntry(self, file_entry, data_stream_name=u''): + """Processes a specific data stream of a file entry. + + Args: + file_entry: A file entry object (instance of dfvfs.FileEntry). + data_stream_name: optional data stream name. The default is + an empty string which represents the default + data stream. Raises: RuntimeError: if the parser object is missing. @@ -393,41 +456,36 @@ def _ProcessFileEntry(self, file_entry): self._parser_mediator.SetFileEntry(file_entry) - try: - self._HashFileEntry(file_entry) + logging.debug(u'[ProcessFileEntry] parsing file: {0:s}'.format( + self._current_display_name)) - logging.debug(u'[ProcessFileEntry] parsing file: {0:s}'.format( - self._current_display_name)) + is_metadata_file = self._IsMetadataFile(file_entry) - # We always want to use the filestat parser. - if self._filestat_parser_object: + # Not every file entry has a data stream. In such cases we want to + # extract the metadata only. + has_data_stream = file_entry.HasDataStream(data_stream_name) + + try: + if has_data_stream: + self._HashDataStream(file_entry, data_stream_name=data_stream_name) + + # We always want to use the filestat parser if set but we only want + # to invoke it once per file entry, so we only use it if we are + # processing the default (nameless) data stream. + if not data_stream_name and self._filestat_parser_object: self._ParseFileEntryWithParser(self._filestat_parser_object, file_entry) is_archive = False is_compressed_stream = False - is_file = file_entry.IsFile() - if is_file: + if not is_metadata_file and file_entry.IsFile(): is_compressed_stream = self._ProcessCompressedStreamFile(file_entry) if not is_compressed_stream: is_archive = self._ProcessArchiveFile(file_entry) - if is_file and not is_archive and not is_compressed_stream: - parser_name_list = self._GetSignatureMatchParserNames(file_entry) - if not parser_name_list: - parser_name_list = self._non_sigscan_parser_names - - for parser_name in parser_name_list: - parser_object = self._parser_objects.get(parser_name, None) - if not parser_object: - self._parser_mediator.ResetFileEntry() - raise RuntimeError(u'No such parser: {0:s}'.format(parser_name)) - - logging.debug(( - u'[ProcessFileEntry] parsing file: {0:s} with parser: ' - u'{1:s}').format(self._current_display_name, parser_name)) - - self._ParseFileEntryWithParser(parser_object, file_entry) + if (has_data_stream and not is_archive and not is_compressed_stream and + not is_metadata_file): + self._ProcessDataStream(file_entry, data_stream_name=data_stream_name) finally: if reference_count != self._resolver_context.GetFileObjectReferenceCount( @@ -468,7 +526,13 @@ def _ProcessPathSpec(self, path_spec): path_spec.comparable)) return - self._ProcessFileEntry(file_entry) + # Note that data stream can be set but contain None, we'll set it + # to an empty string here. + data_stream_name = getattr(path_spec, u'data_stream', None) + if not data_stream_name: + data_stream_name = u'' + + self._ProcessFileEntry(file_entry, data_stream_name=data_stream_name) except IOError as exception: logging.warning( diff --git a/plaso/frontend/image_export.py b/plaso/frontend/image_export.py index b3be31d7de..1c981606db 100644 --- a/plaso/frontend/image_export.py +++ b/plaso/frontend/image_export.py @@ -334,10 +334,12 @@ def Matches(self, file_entry): if not self._file_scanner or not file_entry.IsFile(): return - scan_state = pysigscan.scan_state() + file_object = file_entry.GetFileObject() + if not file_object: + return False try: - file_object = file_entry.GetFileObject() + scan_state = pysigscan.scan_state() self._file_scanner.scan_file_object(scan_state, file_object) finally: file_object.close() @@ -507,8 +509,11 @@ def WriteFile(self, source_path_spec, destination_path, filename_prefix=u''): os.makedirs(target_directory) if self._skip_duplicates and file_entry.IsFile(): + file_object = file_entry.GetFileObject() + if not file_object: + return + try: - file_object = file_entry.GetFileObject() digest_hash = self._CalculateHash(file_object) except IOError as exception: logging.error(( @@ -527,8 +532,11 @@ def WriteFile(self, source_path_spec, destination_path, filename_prefix=u''): else: self._digest_hashes[inode] = [digest_hash] + file_object = file_entry.GetFileObject() + if not file_object: + return + try: - file_object = file_entry.GetFileObject() target_path = os.path.join(target_directory, target_filename) self._CopyFileObject(file_object, target_path) except IOError as exception: diff --git a/plaso/hashers/manager.py b/plaso/hashers/manager.py index 1395ceef91..7596d09cd6 100644 --- a/plaso/hashers/manager.py +++ b/plaso/hashers/manager.py @@ -1,6 +1,8 @@ # -*- coding: utf-8 -*- """This file contains a class for managing digest hashers for Plaso.""" +import os + class HashersManager(object): """Class that implements the hashers manager.""" @@ -35,7 +37,7 @@ def GetHasherNamesFromString(cls, hasher_names_string): hasher names. Args: - hasher_names_string: Comma separated string of names of + hasher_names_string: comma separated string of names of hashers to enable enable, or the string 'all', to enable all hashers. @@ -90,7 +92,7 @@ def GetHasherObject(cls, hasher_name): """Retrieves an instance of a specific hasher. Args: - hasher_name: The name of the hasher to retrieve. + hasher_name: the name of the hasher to retrieve. Returns: A hasher object (instance of BaseHasher). @@ -111,7 +113,7 @@ def GetHasherObjects(cls, hasher_names): """Retrieves instances for all the specified hashers. Args: - hasher_names: List of the names of the hashers to retrieve. + hasher_names: list of the names of the hashers to retrieve. Returns: A list of hasher objects (instances of BaseHasher). @@ -134,6 +136,41 @@ def GetHashers(cls): for hasher_name, hasher_class in cls._hasher_classes.iteritems(): yield hasher_name, hasher_class + @classmethod + def HashFileObject(cls, hasher_names_string, file_object, buffer_size=4096): + """Hashes the contents of a file-like object. + + Args: + hasher_names_string: comma separated string of names of + hashers to enable enable, or the string 'all', to + enable all hashers. + file_object: the file-like object to be hashed. + buffer_size: optional read buffer size. + + Returns: + A dictionary of digest hashes, where the key contains digest hash name + and value contains the digest hash calculated from the file contents. + """ + hasher_objects = cls.GetHasherObjects(hasher_names_string) + if not hasher_objects: + return {} + + file_object.seek(0, os.SEEK_SET) + + # We only do one read, then pass it to each of the hashers in turn. + data = file_object.read(buffer_size) + while data: + for hasher_object in hasher_objects: + hasher_object.Update(data) + data = file_object.read(buffer_size) + + # Get the digest hash value of every active hasher. + digest_hashes = {} + for hasher_object in hasher_objects: + digest_hashes[hasher_object.NAME] = hasher_object.GetStringDigest() + + return digest_hashes + @classmethod def RegisterHasher(cls, hasher_class): """Registers a hasher class. diff --git a/plaso/parsers/interface.py b/plaso/parsers/interface.py index 9bd715eb80..e002d9ae86 100644 --- a/plaso/parsers/interface.py +++ b/plaso/parsers/interface.py @@ -7,6 +7,7 @@ """ import abc +import os from plaso.parsers import manager @@ -201,22 +202,32 @@ def UpdateChainAndParse(self, parser_mediator, **kwargs): class SingleFileBaseParser(BaseParser): """Class that implements the single file parser base.""" - # The initial file offset set to None if not set. + # The initial file offset. Set this value to None if no initial + # file offset seek needs to be performed. _INITIAL_FILE_OFFSET = 0 - def Parse(self, parser_mediator, **kwargs): + def Parse(self, parser_mediator, file_object=None, **kwargs): """Parses a single file. - Args: - parser_mediator: A parser mediator object (instance of ParserMediator). + parser_mediator: a parser mediator object (instance of ParserMediator). + file_object: optional file-like object to parse. If not set the parser + will use the parser mediator to open the file-like object. """ # TODO: Merge with UpdateChainAndParse for less overhead. - file_object = parser_mediator.GetFileObject( - offset=self._INITIAL_FILE_OFFSET) + close_file_object = False + if not file_object: + file_object = parser_mediator.GetFileObject( + offset=self._INITIAL_FILE_OFFSET) + close_file_object = True + + elif self._INITIAL_FILE_OFFSET is not None: + file_object.seek(self._INITIAL_FILE_OFFSET, os.SEEK_SET) + try: self.ParseFileObject(parser_mediator, file_object, **kwargs) finally: - file_object.close() + if close_file_object: + file_object.close() @abc.abstractmethod def ParseFileObject(self, parser_mediator, file_object, **kwargs): diff --git a/plaso/parsers/mediator.py b/plaso/parsers/mediator.py index e1550f36c2..80b1b9dc94 100644 --- a/plaso/parsers/mediator.py +++ b/plaso/parsers/mediator.py @@ -98,6 +98,10 @@ def _GetRelativePath(self, path_spec): if not location: return + data_stream = getattr(path_spec, u'data_stream', None) + if data_stream: + location = u'{0:s}:{1:s}'.format(location, data_stream) + if path_spec.type_indicator != dfvfs_definitions.TYPE_INDICATOR_OS: return location @@ -142,7 +146,7 @@ def ClearParserChain(self): self._parser_chain_components = [] def GetDisplayName(self, file_entry=None): - """Retrieves the display name for the file entry. + """Retrieves the display name for a file entry. Args: file_entry: optional file entry object (instance of dfvfs.FileEntry). @@ -164,17 +168,15 @@ def GetDisplayName(self, file_entry=None): path_spec = getattr(file_entry, u'path_spec', None) relative_path = self._GetRelativePath(path_spec) - if self._text_prepend: - text_prepend = self._text_prepend - else: - text_prepend = u'' - if not relative_path: relative_path = file_entry.name if not relative_path: return file_entry.path_spec.type_indicator + if self._text_prepend: + relative_path = u'{0:s}{1:s}'.format(self._text_prepend, relative_path) + parent_path_spec = path_spec.parent if parent_path_spec and path_spec.type_indicator in [ dfvfs_definitions.TYPE_INDICATOR_BZIP2, @@ -185,19 +187,24 @@ def GetDisplayName(self, file_entry=None): dfvfs_definitions.TYPE_INDICATOR_VSHADOW]: store_index = getattr(path_spec.parent, u'store_index', None) if store_index is not None: - return u'VSS{0:d}:{1:s}:{2:s}{3:s}'.format( - store_index + 1, file_entry.path_spec.type_indicator, text_prepend, - relative_path) + return u'VSS{0:d}:{1:s}:{2:s}'.format( + store_index + 1, file_entry.path_spec.type_indicator, relative_path) - return u'{0:s}:{1:s}{2:s}'.format( - file_entry.path_spec.type_indicator, text_prepend, relative_path) + return u'{0:s}:{1:s}'.format( + file_entry.path_spec.type_indicator, relative_path) def GetFileEntry(self): - """The dfVFS FileEntry object for the file being parsed.""" + """Retrieves the active file entry. + + Returns: + A file entry (instance of dfvfs.FileEntry). + """ return self._file_entry def GetFileObject(self, offset=0): - """Provides a dfVFS FileObject for the file being parsed. + """Provides a file-like object for the active file entry. + + This will retrieve the file-object of the default (nameless) data stream. Args: offset: the offset to seek within the file-like object. The offset is @@ -211,7 +218,7 @@ def GetFileObject(self, offset=0): ValueError: If no file entry is set in the mediator. """ if not self._file_entry: - raise ValueError(u'Missing file entry') + raise ValueError(u'Missing active file entry') file_object = self._file_entry.GetFileObject() if offset is not None: @@ -356,11 +363,11 @@ def ResetCounters(self): self.number_of_parse_errors = 0 def ResetFileEntry(self): - """Resets the file entry.""" + """Resets the active file entry.""" self._file_entry = None def SetFileEntry(self, file_entry): - """Sets the current file entry and clears the parser chain. + """Sets the active file entry. Args: file_entry: the file entry (instance of dfvfs.FileEntry). diff --git a/tests/engine/collector.py b/tests/engine/collector.py index 2d00103915..17a2395d8a 100644 --- a/tests/engine/collector.py +++ b/tests/engine/collector.py @@ -51,9 +51,13 @@ def GetFilePaths(self): """Retrieves a list of file paths from the path specifications.""" file_paths = [] for path_spec in self.path_specs: - location = getattr(path_spec, 'location', None) + data_stream = getattr(path_spec, u'data_stream', None) + location = getattr(path_spec, u'location', None) if location is not None: + if data_stream: + location = u'{0:s}:{1:s}'.format(location, data_stream) file_paths.append(location) + return file_paths @@ -265,10 +269,14 @@ def testImageWithFilterCollection(self): # image_offset: 0 self.assertEqual(paths[1], u'/passwords.txt') - def testImageWithPartitionsCollections(self): - """Test collection on a storage media image file with multiple partitions. + def _TestImageWithPartitionsCollections(self, collect_directory_metadata): + """Test collection on a storage media image with multiple partitions. The image contains 2 partitions (p1 and p2) with NFTS file systems. + + Args: + collect_directory_metadata: boolean value to indicate to collect + directory metadata. """ test_file = self._GetTestFilePath([u'multi_partition_image.vmdk']) @@ -293,6 +301,8 @@ def testImageWithPartitionsCollections(self): resolver_context = context.Context() test_collector = collector.Collector( test_path_spec_queue, resolver_context=resolver_context) + test_collector.SetCollectDirectoryMetadata(collect_directory_metadata) + test_collector.Collect([p1_file_system_path_spec, p2_file_system_path_spec]) test_collector_queue_consumer = TestCollectorQueueConsumer( @@ -304,54 +314,77 @@ def testImageWithPartitionsCollections(self): expected_paths_p1 = [ u'/$AttrDef', u'/$BadClus', + u'/$BadClus:$Bad', u'/$Bitmap', u'/$Boot', - u'/$Extend', u'/$Extend/$ObjId', u'/$Extend/$Quota', u'/$Extend/$Reparse', - u'/$Extend/$RmMetadata', u'/$Extend/$RmMetadata/$Repair', - u'/$Extend/$RmMetadata/$TxfLog', + u'/$Extend/$RmMetadata/$Repair:$Config', u'/$LogFile', u'/$MFT', u'/$MFTMirr', u'/$Secure', + u'/$Secure:$SDS', u'/$UpCase', u'/$Volume', u'/file1.txt', u'/file2.txt'] + if collect_directory_metadata: + expected_directory_metadata_paths_p1 = [ + u'/$Extend', + u'/$Extend/$RmMetadata', + u'/$Extend/$RmMetadata/$TxfLog', + ] + expected_paths_p2 = [ u'/$AttrDef', u'/$BadClus', + u'/$BadClus:$Bad', u'/$Bitmap', u'/$Boot', - u'/$Extend', u'/$Extend/$ObjId', u'/$Extend/$Quota', u'/$Extend/$Reparse', - u'/$Extend/$RmMetadata', u'/$Extend/$RmMetadata/$Repair', - u'/$Extend/$RmMetadata/$TxfLog', + u'/$Extend/$RmMetadata/$Repair:$Config', u'/$LogFile', u'/$MFT', u'/$MFTMirr', u'/$Secure', + u'/$Secure:$SDS', u'/$UpCase', u'/$Volume', u'/file1_on_part_2.txt', u'/file2_on_part_2.txt'] + if collect_directory_metadata: + expected_directory_metadata_paths_p2 = [ + u'/$Extend', + u'/$Extend/$RmMetadata', + u'/$Extend/$RmMetadata/$TxfLog', + ] + expected_paths = [] expected_paths.extend(expected_paths_p1) expected_paths.extend(expected_paths_p2) + if collect_directory_metadata: + expected_paths.extend(expected_directory_metadata_paths_p1) + expected_paths.extend(expected_directory_metadata_paths_p2) + self.assertEqual( test_collector_queue_consumer.number_of_path_specs, len(expected_paths)) self.assertEqual(sorted(paths), sorted(expected_paths)) + def testImageWithPartitionsCollections(self): + """Test collection on a storage media image with multiple partitions.""" + self._TestImageWithPartitionsCollections(True) + self._TestImageWithPartitionsCollections(False) + class BuildFindSpecsFromFileTest(unittest.TestCase): """Tests for the BuildFindSpecsFromFile function.""" diff --git a/utils/check_dependencies.py b/utils/check_dependencies.py index 497b749ca7..806e5d5743 100755 --- a/utils/check_dependencies.py +++ b/utils/check_dependencies.py @@ -14,8 +14,7 @@ if __name__ == u'__main__': if not plaso.dependencies.CheckDependencies(latest_version_check=True): build_instructions_url = ( - u'https://sites.google.com/a/kiddaland.net/plaso/developer' - u'/building-the-tool') + u'https://github.com/log2timeline/plaso/wiki/Users-Guide') print(u'See: {0:s} on how to set up plaso.'.format(build_instructions_url)) print(u'')