Skip to content

Commit

Permalink
Code review: 258650043: Data stream support and special metadata file…
Browse files Browse the repository at this point in the history
  • Loading branch information
joachimmetz committed Dec 31, 2015
1 parent c3617f8 commit 29ac9ae
Show file tree
Hide file tree
Showing 11 changed files with 345 additions and 175 deletions.
2 changes: 1 addition & 1 deletion config/dpkg/changelog
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ python-plaso (1.3.1-1) unstable; urgency=low

* Auto-generated

-- Log2Timeline <[email protected]> Tue, 22 Sep 2015 23:05:20 +0200
-- Log2Timeline <[email protected]> Wed, 23 Sep 2015 22:56:35 +0200
2 changes: 1 addition & 1 deletion plaso/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
__version__ = '1.3.1'

VERSION_DEV = True
VERSION_DATE = '20150922'
VERSION_DATE = '20150923'


def GetVersion():
Expand Down
3 changes: 2 additions & 1 deletion plaso/dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
u'pyevt': 20120410,
u'pyevtx': 20141112,
u'pyewf': 20131210,
u'pyfsntfs': 20150829,
u'pyfwsi': 20150606,
u'pylnk': 20150830,
u'pymsiecf': 20150314,
Expand All @@ -38,7 +39,7 @@
(u'binplist', u'__version__', u'0.1.4', None),
(u'construct', u'__version__', u'2.5.2', None),
(u'dateutil', u'__version__', u'1.5', None),
(u'dfvfs', u'__version__', u'20150630', None),
(u'dfvfs', u'__version__', u'20150829', None),
(u'dpkt', u'__version__', u'1.8', None),
# The protobuf module does not appear to have version information.
(u'google.protobuf', u'', u'', None),
Expand Down
100 changes: 55 additions & 45 deletions plaso/engine/collector.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
"""Generic collector that supports both file system and image files."""

import copy
import hashlib
import logging

Expand All @@ -24,11 +25,11 @@ def __init__(self, path_spec_queue, resolver_context=None):
as a path specification (instance of dfvfs.PathSpec).
Args:
path_spec_queue: The path specification queue (instance of Queue).
path_spec_queue: the path specification queue (instance of Queue).
This queue contains path specifications (instances
of dfvfs.PathSpec) of the file entries that need
to be processed.
resolver_context: Optional resolver context (instance of dfvfs.Context).
resolver_context: optional resolver context (instance of dfvfs.Context).
The default is None.
"""
super(Collector, self).__init__(path_spec_queue)
Expand All @@ -43,8 +44,8 @@ def _ProcessFileSystem(self, path_spec, find_specs=None):
"""Processes a file system within a storage media image.
Args:
path_spec: The path specification of the root of the file system.
find_specs: Optional list of find specifications (instances of
path_spec: the path specification of the root of the file system.
find_specs: optional list of find specifications (instances of
dfvfs.FindSpec). The default is None.
"""
try:
Expand Down Expand Up @@ -141,7 +142,7 @@ def SetCollectDirectoryMetadata(self, collect_directory_metadata):
"""Sets the collect directory metadata flag.
Args:
collect_directory_metadata: Boolean value to indicate to collect
collect_directory_metadata: boolean value to indicate to collect
directory metadata.
"""
self._fs_collector.SetCollectDirectoryMetadata(collect_directory_metadata)
Expand All @@ -150,7 +151,7 @@ def SetFilter(self, filter_find_specs):
"""Sets the collection filter find specifications.
Args:
filter_find_specs: List of filter find specifications (instances of
filter_find_specs: list of filter find specifications (instances of
dfvfs.FindSpec).
"""
self._filter_find_specs = filter_find_specs
Expand All @@ -172,7 +173,7 @@ def __init__(self, path_spec_queue):
as a path specification (instance of dfvfs.PathSpec).
Args:
path_spec_queue: The path specification queue (instance of Queue).
path_spec_queue: the path specification queue (instance of Queue).
This queue contains path specifications (instances
of dfvfs.PathSpec) of the file entries that need
to be processed.
Expand All @@ -182,21 +183,11 @@ def __init__(self, path_spec_queue):
self._duplicate_file_check = False
self._hashlist = {}

self.number_of_file_entries = 0

def __enter__(self):
"""Enters a with statement."""
return self

def __exit__(self, unused_type, unused_value, unused_traceback):
"""Exits a with statement."""
return

def _CalculateNTFSTimeHash(self, file_entry):
"""Return a hash value calculated from a NTFS file's metadata.
Args:
file_entry: The file entry (instance of TSKFileEntry).
file_entry: the file entry (instance of TSKFileEntry).
Returns:
A hash value (string) that can be used to determine if a file's timestamp
Expand All @@ -205,26 +196,53 @@ def _CalculateNTFSTimeHash(self, file_entry):
stat_object = file_entry.GetStat()
ret_hash = hashlib.md5()

ret_hash.update('atime:{0:d}.{1:d}'.format(
getattr(stat_object, 'atime', 0),
getattr(stat_object, 'atime_nano', 0)))
ret_hash.update(b'atime:{0:d}.{1:d}'.format(
getattr(stat_object, u'atime', 0),
getattr(stat_object, u'atime_nano', 0)))

ret_hash.update('crtime:{0:d}.{1:d}'.format(
getattr(stat_object, 'crtime', 0),
getattr(stat_object, 'crtime_nano', 0)))
ret_hash.update(b'crtime:{0:d}.{1:d}'.format(
getattr(stat_object, u'crtime', 0),
getattr(stat_object, u'crtime_nano', 0)))

ret_hash.update('mtime:{0:d}.{1:d}'.format(
getattr(stat_object, 'mtime', 0),
getattr(stat_object, 'mtime_nano', 0)))
ret_hash.update(b'mtime:{0:d}.{1:d}'.format(
getattr(stat_object, u'mtime', 0),
getattr(stat_object, u'mtime_nano', 0)))

ret_hash.update('ctime:{0:d}.{1:d}'.format(
getattr(stat_object, 'ctime', 0),
getattr(stat_object, 'ctime_nano', 0)))
ret_hash.update(b'ctime:{0:d}.{1:d}'.format(
getattr(stat_object, u'ctime', 0),
getattr(stat_object, u'ctime_nano', 0)))

return ret_hash.hexdigest()

def _ProcessDataStreams(self, file_entry):
"""Processes the data streams in a file entry.
Args:
file_entry: a file entry (instance of dfvfs.FileEntry).
"""
produced_main_path_spec = False
for data_stream in file_entry.data_streams:
# Make a copy so we don't make the changes on a path specification
# directly. Otherwise already produced path specifications can be
# altered in the process.
path_spec = copy.deepcopy(file_entry.path_spec)
setattr(path_spec, u'data_stream', data_stream.name)
self.ProduceItem(path_spec)

if not data_stream.name:
produced_main_path_spec = True

if (not produced_main_path_spec and (
not file_entry.IsDirectory() or self._collect_directory_metadata)):
self.ProduceItem(file_entry.path_spec)

def _ProcessDirectory(self, file_entry):
"""Processes a directory and extract its metadata if necessary."""
"""Processes a directory and extract its metadata if necessary.
Args:
file_entry: a file entry (instance of dfvfs.FileEntry) that refers
to the directory to process.
"""
# Need to do a breadth-first search otherwise we'll hit the Python
# maximum recursion depth.
sub_directories = []
Expand All @@ -250,12 +268,6 @@ def _ProcessDirectory(self, file_entry):
continue

if sub_file_entry.IsDirectory():
# This check is here to improve performance by not producing
# path specifications that don't get processed.
if self._collect_directory_metadata:
self.ProduceItem(sub_file_entry.path_spec)
self.number_of_file_entries += 1

sub_directories.append(sub_file_entry)

elif sub_file_entry.IsFile():
Expand All @@ -266,15 +278,14 @@ def _ProcessDirectory(self, file_entry):
if self._duplicate_file_check:
hash_value = self._CalculateNTFSTimeHash(sub_file_entry)

inode = getattr(sub_file_entry.path_spec, 'inode', 0)
inode = getattr(sub_file_entry.path_spec, u'inode', 0)
if inode in self._hashlist:
if hash_value in self._hashlist[inode]:
continue

self._hashlist.setdefault(inode, []).append(hash_value)

self.ProduceItem(sub_file_entry.path_spec)
self.number_of_file_entries += 1
self._ProcessDataStreams(sub_file_entry)

for sub_file_entry in sub_directories:
if self._abort:
Expand All @@ -291,9 +302,9 @@ def Collect(self, file_system, path_spec, find_specs=None):
"""Collects files from the file system.
Args:
file_system: The file system (instance of dfvfs.FileSystem).
path_spec: The path specification (instance of dfvfs.PathSpec).
find_specs: Optional list of find specifications (instances of
file_system: the file system (instance of dfvfs.FileSystem).
path_spec: the path specification (instance of dfvfs.PathSpec).
find_specs: optional list of find specifications (instances of
dfvfs.FindSpec). The default is None.
"""
if find_specs:
Expand All @@ -304,7 +315,6 @@ def Collect(self, file_system, path_spec, find_specs=None):
return

self.ProduceItem(path_spec)
self.number_of_file_entries += 1

else:
file_entry = file_system.GetFileEntryByPathSpec(path_spec)
Expand All @@ -315,7 +325,7 @@ def SetCollectDirectoryMetadata(self, collect_directory_metadata):
"""Sets the collect directory metadata flag.
Args:
collect_directory_metadata: Boolean value to indicate to collect
collect_directory_metadata: boolean value to indicate to collect
directory metadata.
"""
self._collect_directory_metadata = collect_directory_metadata
Loading

0 comments on commit 29ac9ae

Please sign in to comment.