From d0052c6fa9bd139c7dd38b827bb3b2fa0e01e38d Mon Sep 17 00:00:00 2001 From: Daniel White Date: Wed, 3 Jun 2015 15:01:56 +0200 Subject: [PATCH] Code review: 239300045: Add VirusTotal analysis plugin. --- config/dpkg/changelog | 2 +- docs/plaso.analysis.rst | 16 + docs/plaso.cli.helpers.rst | 8 + plaso/analysis/__init__.py | 1 + plaso/analysis/interface.py | 382 ++++++++++++++++++++++- plaso/analysis/virustotal.py | 180 +++++++++++ plaso/analysis/virustotal_test.py | 122 ++++++++ plaso/cli/helpers/__init__.py | 1 + plaso/cli/helpers/virustotal_analysis.py | 63 ++++ plaso/dependencies.py | 1 + plaso/frontend/psort.py | 21 +- plaso/lib/errors.py | 3 + 12 files changed, 775 insertions(+), 25 deletions(-) create mode 100644 plaso/analysis/virustotal.py create mode 100644 plaso/analysis/virustotal_test.py create mode 100644 plaso/cli/helpers/virustotal_analysis.py diff --git a/config/dpkg/changelog b/config/dpkg/changelog index 06f55bcc1d..94e3761fe8 100644 --- a/config/dpkg/changelog +++ b/config/dpkg/changelog @@ -2,4 +2,4 @@ python-plaso (1.2.1-1) unstable; urgency=low * Auto-generated - -- Log2Timeline + -- Log2Timeline Wed, 03 Jun 2015 15:01:50 +0200 diff --git a/docs/plaso.analysis.rst b/docs/plaso.analysis.rst index 44427f7c18..075168f9ce 100644 --- a/docs/plaso.analysis.rst +++ b/docs/plaso.analysis.rst @@ -116,6 +116,22 @@ plaso.analysis.test_lib module :undoc-members: :show-inheritance: +plaso.analysis.virustotal module +-------------------------------- + +.. automodule:: plaso.analysis.virustotal + :members: + :undoc-members: + :show-inheritance: + +plaso.analysis.virustotal_test module +------------------------------------- + +.. automodule:: plaso.analysis.virustotal_test + :members: + :undoc-members: + :show-inheritance: + plaso.analysis.windows_services module -------------------------------------- diff --git a/docs/plaso.cli.helpers.rst b/docs/plaso.cli.helpers.rst index cb3d3884b2..8b6245f5ea 100644 --- a/docs/plaso.cli.helpers.rst +++ b/docs/plaso.cli.helpers.rst @@ -44,6 +44,14 @@ plaso.cli.helpers.tagging_analysis module :undoc-members: :show-inheritance: +plaso.cli.helpers.virustotal_analysis module +-------------------------------------------- + +.. automodule:: plaso.cli.helpers.virustotal_analysis + :members: + :undoc-members: + :show-inheritance: + plaso.cli.helpers.windows_services_analysis module -------------------------------------------------- diff --git a/plaso/analysis/__init__.py b/plaso/analysis/__init__.py index 812cd4d1ac..51c04a3070 100644 --- a/plaso/analysis/__init__.py +++ b/plaso/analysis/__init__.py @@ -5,4 +5,5 @@ from plaso.analysis import chrome_extension from plaso.analysis import file_hashes from plaso.analysis import tagging +from plaso.analysis import virustotal from plaso.analysis import windows_services diff --git a/plaso/analysis/interface.py b/plaso/analysis/interface.py index ca1fc09e7c..7d9e6957f5 100644 --- a/plaso/analysis/interface.py +++ b/plaso/analysis/interface.py @@ -1,10 +1,17 @@ # -*- coding: utf-8 -*- -"""This file contains basic interface for analysis plugins.""" +"""This file contains the interface for analysis plugins.""" import abc +import logging +import Queue +import threading +import time + +from collections import defaultdict from plaso.engine import queue from plaso.lib import timelib +from plaso.lib import event class AnalysisPlugin(queue.ItemQueueConsumer): @@ -36,21 +43,9 @@ class AnalysisPlugin(queue.ItemQueueConsumer): # annotating or tagging them. TYPE_REPORT = 4 # Inspecting events to provide a summary information. - # Optional arguments to be added to the argument parser. - # An example would be: - # ARGUMENTS = [('--myparameter', { - # 'action': 'store', - # 'help': 'This is my parameter help', - # 'dest': 'myparameter', - # 'default': '', - # 'type': 'unicode'})] - # - # Where all arguments into the dict object have a direct translation - # into the argparse parser. - ARGUMENTS = [] - - # We need to implement the interface for analysis plugins, but we don't use - # command line options here, so disable checking for unused args. + # A flag to indicate that this plugin takes a long time to compile a report. + LONG_RUNNING_PLUGIN = False + def __init__(self, incoming_queue): """Initializes an analysis plugin. @@ -115,8 +110,361 @@ def RunPlugin(self, analysis_mediator): analysis_report = self.CompileReport(analysis_mediator) if analysis_report: - # TODO: move this into the plugins? analysis_report.time_compiled = timelib.Timestamp.GetNow() analysis_mediator.ProduceAnalysisReport( analysis_report, plugin_name=self.plugin_name) analysis_mediator.ReportingComplete() + + +class HashTaggingAnalysisPlugin(AnalysisPlugin): + """An interface for plugins that tag events based on the source file hash. + + An implementation of this class should be paired with an implementation of + the HashAnalyzer interface. + + Attributes: + hash_analysis_queue: A queue (instance of Queue.Queue) that contains + the results of analysis of file hashes. + hash_queue: A queue (instance of Queue.Queue) that contains file hashes. + """ + # The event data types the plugin will collect hashes from. Subclasses + # must override this attribute. + DATA_TYPES = [] + # The plugin will select hashes from the event objects from these attributes, + # in priority order. More collision-resistant hashing algorithms should be + # preferred over less resistant algorithms. + REQUIRED_HASH_ATTRIBUTES = frozenset( + [u'sha256_hash', u'sha1_hash', u'md5_hash']) + # The default number of seconds for the plugin to wait for analysis results + # to be added to the hash_analysis_queue by the analyzer thread. + DEFAULT_QUEUE_TIMEOUT = 4 + SECONDS_BETWEEN_STATUS_LOG_MESSAGES = 30 + # Hashing analysis plugins can take a long time to run, so set this by + # default. + LONG_RUNNING_PLUGIN = True + + def __init__(self, incoming_queue, analyzer_class): + """Initializes a hash tagging analysis plugin. + + Args: + incoming_queue: A queue that is used to listen for incoming events. + analyzer_class: A subclass of HashAnalyzer that will be instantiated + by the plugin. + """ + super(HashTaggingAnalysisPlugin, self).__init__(incoming_queue) + self._analysis_queue_timeout = self.DEFAULT_QUEUE_TIMEOUT + self._analyzer_started = False + self._event_uuids_by_pathspec = defaultdict(list) + self._hash_pathspecs = defaultdict(list) + self._requester_class = None + self._time_of_last_status_log = time.time() + self.hash_analysis_queue = Queue.Queue() + self.hash_queue = Queue.Queue() + + self._analyzer = analyzer_class(self.hash_queue, self.hash_analysis_queue) + + def _GenerateTextLine(self, analysis_mediator, pathspec, tag_string): + """Generates a line of text regarding the plugins findings. + + Args: + analysis_mediator: The analysis mediator object (instance of + AnalysisMediator). + pathspec: The pathspec (instance of dfvfs.PathSpec) whose hash was + looked up by the plugin. + tag_string: A string describing the plugin's results for a given pathspec. + """ + display_name = analysis_mediator.GetDisplayName(pathspec) + return u'{0:s}: {1:s}'.format(display_name, tag_string) + + @abc.abstractmethod + def GenerateTagString(self, hash_information): + """Generates a string to tag events with. + + Args: + hash_information: An object that encapsulates the result of the + analysis of a hash, as returned by the Analyze() method + of the analyzer class associated with this plugin. + """ + + def _CreateTag(self, event_uuid, tag_string): + """Creates an event tag. + + Args: + event_uuid: The UUID of the event that should be tagged. + tag_string: The string that the event should be tagged with. + """ + event_tag = event.EventTag() + event_tag.event_uuid = event_uuid + event_tag.comment = u'Tag applied by {0:s} analysis plugin'.format( + self.NAME) + event_tag.tags = [tag_string] + return event_tag + + def _HandleHashAnalysis(self, hash_analysis): + """Deals with a the results of the analysis of a hash. + + This method ensures that a tag string is generated for the hash, + then tags all events derived from files with that hash. + + Args: + hash_analysis: The the hash analysis plugin's results for a given hash + (instance of HashAnalysis). + + Returns: + A tuple of: + pathspecs: A list of pathspecs that had the hash value looked up. + tag_string: The string that corresponds to the hash value that was + looked up. + tags: A list of EventTags for the events that were extracted from the + pathspecs. + """ + tags = [] + event_uuids = [] + tag_string = self.GenerateTagString(hash_analysis.hash_information) + pathspecs = self._hash_pathspecs[hash_analysis.subject_hash] + for pathspec in pathspecs: + event_uuids.extend(self._event_uuids_by_pathspec[pathspec]) + for event_uuid in event_uuids: + tag = self._CreateTag(event_uuid, tag_string) + tags.append(tag) + return pathspecs, tag_string, tags + + def _EnsureRequesterStarted(self): + """Checks if the analyzer is running and starts it if not.""" + if not self._analyzer_started: + self._analyzer.start() + self._analyzer_started = True + + def ExamineEvent(self, analysis_mediator, event_object, **kwargs): + """Evaluates whether an event contains the appropriate data for a hash + lookup. + + Args: + analysis_mediator: The analysis mediator object (instance of + AnalysisMediator). + event_object: An event object (instance of EventObject). + """ + self._EnsureRequesterStarted() + pathspec = event_object.pathspec + event_uuids = self._event_uuids_by_pathspec[pathspec] + event_uuids.append(event_object.uuid) + if event_object.data_type in self.DATA_TYPES: + for attribute in self.REQUIRED_HASH_ATTRIBUTES: + hash_for_lookup = getattr(event_object, attribute, None) + if not hash_for_lookup: + continue + pathspecs = self._hash_pathspecs[hash_for_lookup] + pathspecs.append(pathspec) + # There may be multiple pathspecs that have the same hash. We only + # want to look them up once. + if len(pathspecs) == 1: + self.hash_queue.put(hash_for_lookup) + return + warning_message = ( + u'Event with ID {0:s} had none of the required attributes ' + u'{1:s}.').format( + event_object.uuid, self.REQUIRED_HASH_ATTRIBUTES) + logging.warning(warning_message) + + def _ContinueReportCompilation(self): + """Determines if the plugin should continue trying to compile the report. + + Returns: + True if the plugin should continue, False otherwise. + """ + analyzer_alive = self._analyzer.is_alive() + hash_queue_has_tasks = self.hash_queue.unfinished_tasks > 0 + analysis_queue = not self.hash_analysis_queue.empty() + return (analyzer_alive and hash_queue_has_tasks) or analysis_queue + + def EstimateTimeRemaining(self): + """Estimate how long until all hashes have been analyzed. + + Returns: + The estimated number of seconds until all hashes have been analyzed. + """ + number_of_hashes = self.hash_queue.qsize() + hashes_per_batch = self._analyzer.hashes_per_batch + wait_time_per_batch = self._analyzer.wait_after_analysis + average_analysis_time = ( + self._analyzer.seconds_spent_analyzing / + self._analyzer.analyses_performed) + batches_remaining = number_of_hashes / hashes_per_batch + estimated_seconds_per_batch = average_analysis_time + wait_time_per_batch + return batches_remaining / estimated_seconds_per_batch + + # TODO: Refactor to do this more elegantly, perhaps via callback. + def _LogProgressUpdateIfReasonable(self): + """Prints a progress update if enough time has passed.""" + next_log_time = ( + self._time_of_last_status_log + + self.SECONDS_BETWEEN_STATUS_LOG_MESSAGES) + next_log_time = time.ctime(next_log_time) + current_time = time.time() + if current_time < next_log_time: + return + completion_time = current_time + self.EstimateTimeRemaining() + log_message = ( + u'{0:s} hash analysis plugin running. {1:d} hashes in queue, ' + u'estimated completion time {2:s}.'.format( + self.NAME, self.hash_queue.qsize(), completion_time)) + logging.info(log_message) + self._time_of_last_status_log = current_time + + def CompileReport(self, analysis_mediator): + """Compiles a report of the analysis. + + Args: + analysis_mediator: The analysis mediator object (instance of + AnalysisMediator). + + Returns: + The analysis report (instance of AnalysisReport). + """ + tags = [] + lines_of_text = [u'{0:s} hash tagging Results'.format(self.NAME)] + while self._ContinueReportCompilation(): + try: + self._LogProgressUpdateIfReasonable() + hash_analysis = self.hash_analysis_queue.get( + timeout=self._analysis_queue_timeout) + except Queue.Empty: + # The result queue is empty, but there could still be items that need + # to be processed by the analyzer. + continue + pathspecs, tag_string, new_tags = self._HandleHashAnalysis( + hash_analysis) + tags.extend(new_tags) + for pathspec in pathspecs: + text_line = self._GenerateTextLine( + analysis_mediator, pathspec, tag_string) + lines_of_text.append(text_line) + self._analyzer.SignalAbort() + + report = event.AnalysisReport(self.NAME) + report.SetText(lines_of_text) + report.SetTags(tags) + return report + + +class HashAnalyzer(threading.Thread): + """Class that defines the interfaces for hash analyzer threads. + + This interface should be implemented once for each hash analysis plugin. + + Attributes: + analyses_performed: The number of analysis batches completed by this + analyzer. + hashes_per_batch: The maximum number of hashes to analyze at once. + seconds_spent_analyzing: The number of seconds this analyzer has spent + performing analysis (as opposed to waiting on + queues, etc.) + wait_after_analysis: How long the analyzer will sleep for after analyzing a + batch of hashes. + """ + # How long to wait for new items to be added to the the input queue. + EMPTY_QUEUE_WAIT_TIME = 4 + + def __init__( + self, hash_queue, hash_analysis_queue, hashes_per_batch=1, + wait_after_analysis=0): + """Initializes a hash analyzer. + + Args: + hash_queue: A queue (instance of Queue.queue) that contains hashes to + be analyzed. + hash_analysis_queue: A queue (instance of Queue.queue) that the analyzer + will append HashAnalysis objects to. + hashes_per_batch: The number of hashes to analyze at once. The default + is 1. + wait_after_analysis: The number of seconds to wait after each batch is + analyzed. The default is 0. + """ + super(HashAnalyzer, self).__init__() + self._abort = False + self._hash_queue = hash_queue + self._hash_analysis_queue = hash_analysis_queue + self.analyses_performed = 0 + self.hashes_per_batch = hashes_per_batch + self.seconds_spent_analyzing = 0 + self.wait_after_analysis = wait_after_analysis + # Indicate that this is a daemon thread. The program will exit if only + # daemon threads are running. This thread should never block program exit. + self.daemon = True + + @abc.abstractmethod + def Analyze(self, hashes): + """Analyzes a list of hashes. + + Args: + hashes: A list of hashes (strings) to look up. + + Returns: + A list of hash analysis objects (instances of HashAnalysis). + """ + + def _GetHashes(self, target_queue, max_hashes): + """Retrieves a list of items from a queue. + + Args: + target_queue: The target_queue to retrieve items from. + max_hashes: The maximum number of items to retrieve from the target_queue. + + Returns: + A list of at most max_hashes elements from the target_queue. The list + may have no elements if the target_queue is empty. + """ + hashes = [] + for _ in range(0, max_hashes): + try: + item = target_queue.get_nowait() + except Queue.Empty: + continue + hashes.append(item) + return hashes + + def SignalAbort(self): + """Instructs this analyzer to stop running.""" + self._abort = True + + # This method is part of the threading.Thread interface, hence its name does + # not follow the style guide. + def run(self): + """The method called by the threading library to start the thread.""" + while not self._abort: + hashes = self._GetHashes(self._hash_queue, self.hashes_per_batch) + if hashes: + time_before_analysis = time.time() + hash_analyses = self.Analyze(hashes) + current_time = time.time() + self.seconds_spent_analyzing += current_time - time_before_analysis + self.analyses_performed += 1 + for hash_analysis in hash_analyses: + self._hash_analysis_queue.put(hash_analysis) + self._hash_queue.task_done() + time.sleep(self.wait_after_analysis) + else: + # Wait for some more hashes to be added to the queue. + time.sleep(self.EMPTY_QUEUE_WAIT_TIME) + + +class HashAnalysis(object): + """A class that holds information about a hash. + + Attributes: + hash_information: An object containing information about the hash. + subject_hash: The hash that was analyzed. + """ + + def __init__(self, subject_hash, hash_information): + """Initializes a HashAnalysis object. + + Args: + subject_hash: The hash that the hash_information relates to. + hash_information: An object containing information about the hash. + This object will be used by the _GenerateTagString + method in the HashTaggingAnalysisPlugin to tag events + that relate to the hash. + """ + self.hash_information = hash_information + self.subject_hash = subject_hash diff --git a/plaso/analysis/virustotal.py b/plaso/analysis/virustotal.py new file mode 100644 index 0000000000..81cfe02dc0 --- /dev/null +++ b/plaso/analysis/virustotal.py @@ -0,0 +1,180 @@ +# -*- coding: utf-8 -*- +"""Look up files in VirusTotal and tag events derived from them.""" + +import logging + +import requests + +from plaso.analysis import interface +from plaso.analysis import manager +from plaso.lib import errors + +class VirusTotalAnalyzer(interface.HashAnalyzer): + """Class that analyzes file hashes by consulting VirusTotal.""" + VIRUSTOTAL_API_REPORT_URL = u'https://www.virustotal.com/vtapi/v2/file/report' + + + def __init__(self, hash_queue, hash_analysis_queue, **kwargs): + """Initializes a VirusTotal Analyzer thread. + + Args: + hash_queue: A queue (instance of Queue.queue) that contains hashes to + be analyzed. + hash_analysis_queue: A queue (instance of Queue.queue) that the analyzer + will append HashAnalysis objects to. + """ + super(VirusTotalAnalyzer, self).__init__( + hash_queue, hash_analysis_queue, **kwargs) + self._api_key = None + + def SetAPIKey(self, api_key): + """Sets the VirusTotal API key to use in queries. + + Args: + _api_key: The VirusTotal API key + """ + self._api_key = api_key + + def Analyze(self, hashes): + """Looks up hashes in VirusTotal using the VirusTotal HTTP API. + + The API is documented here: + https://www.virustotal.com/en/documentation/public-api/ + + Args: + hashes: A list of hashes (strings) to look up. + + Returns: + A list of HashAnalysis objects. + """ + if not self._api_key: + raise RuntimeError(u'No API key specified for VirusTotal lookup.') + + hash_analyses = [] + try: + json_response = self._GetVirusTotalJSONResponse(hashes) + except errors.ConnectionError as exception: + logging.error( + u'Error communicating with VirusTotal {0:s}. VirusTotal plugin is ' + u'aborting.'.format(exception)) + self.SignalAbort() + return hash_analyses + # The content of the response from VirusTotal has a different structure if + # one or more than once hash is looked up at once. + if isinstance(json_response, dict): + # Only one result. + resource = json_response[u'resource'] + hash_analysis = interface.HashAnalysis(resource, json_response) + hash_analyses.append(hash_analysis) + else: + for result in json_response: + resource = result[u'resource'] + hash_analysis = interface.HashAnalysis(resource, result) + hash_analyses.append(hash_analysis) + return hash_analyses + + def _GetVirusTotalJSONResponse(self, hashes): + """Makes a request to VirusTotal for information about hashes. + + Args: + A list of file hashes (strings). + + Returns: + The decoded JSON response from the VirusTotal API for the hashes. + + Raises: + requests.ConnectionError: If it was not possible to connect to + VirusTotal. + requests.exceptions.HTTPError: If the VirusTotal server returned an + error code. + """ + resource_string = u', '.join(hashes) + params = {u'apikey': self._api_key, u'resource': resource_string} + try: + response = requests.get(self.VIRUSTOTAL_API_REPORT_URL, params=params) + response.raise_for_status() + except requests.ConnectionError as exception: + error_string = u'Unable to connect to VirusTotal: {0:s}'.format( + exception) + raise errors.ConnectionError(error_string) + except requests.HTTPError as exception: + error_string = u'VirusTotal returned a HTTP error: {0:s}'.format( + exception) + raise errors.ConnectionError(error_string) + json_response = response.json() + return json_response + + +class VirusTotalAnalysisPlugin(interface.HashTaggingAnalysisPlugin): + """An analysis plugin for looking up hashes in VirusTotal.""" + # VirusTotal allows lookups using any of these hash algorithms. + REQUIRED_HASH_ATTRIBUTES = [u'sha256_hash', u'sha1_hash', u'md5_hash'] + + # TODO: Check if there are other file types worth checking VirusTotal for. + DATA_TYPES = [u'pe:compilation:compilation_time'] + + URLS = [u'https://virustotal.com'] + + NAME = u'virustotal' + + _VIRUSTOTAL_NOT_PRESENT_RESPONSE_CODE = 0 + _VIRUSTOTAL_PRESENT_RESPONSE_CODE = 1 + _VIRUSTOTAL_ANALYSIS_PENDING_RESPONSE_CODE = -2 + + def __init__(self, event_queue): + """Initializes a VirusTotal analysis plugin. + + Args: + event_queue: A queue that is used to listen for incoming events. + """ + super(VirusTotalAnalysisPlugin, self).__init__( + event_queue, VirusTotalAnalyzer) + self._api_key = None + + def SetAPIKey(self, api_key): + """Sets the VirusTotal API key to use in queries. + + Args: + _api_key: The VirusTotal API key + """ + self._analyzer.SetAPIKey(api_key) + + def EnableFreeAPIKeyRateLimit(self, rate_limit): + """Configures Rate limiting for queries to VirusTotal. + + The default rate limit for free VirusTotal API keys is 4 requests per + minute. + + Args: + rate_limit: Whether not to apply the free API key rate limit. + """ + if rate_limit: + self._analyzer.hashes_per_batch = 4 + self._analyzer.wait_after_analysis = 60 + self._analysis_queue_timeout = self._analyzer.wait_after_analysis + 1 + + def GenerateTagString(self, hash_information): + """Generates a string that will be used in the event tag. + + Args: + hash_information: A dictionary containing the JSON decoded contents of the + result of a VirusTotal lookup, as produced by the + VirusTotalAnalyzer. + """ + response_code = hash_information[u'response_code'] + if response_code == self._VIRUSTOTAL_NOT_PRESENT_RESPONSE_CODE: + return u'Unknown to VirusTotal' + elif response_code == self._VIRUSTOTAL_PRESENT_RESPONSE_CODE: + positives = hash_information[u'positives'] + if positives > 0: + return u'VirusTotal Detections {0:d}'.format(positives) + return u'No VirusTotal Detections' + elif response_code == self._VIRUSTOTAL_ANALYSIS_PENDING_RESPONSE_CODE: + return u'VirusTotal Analysis Pending' + else: + logging.error(u'VirusTotal returned unknown response code ' + u'{0!s}'.format(response_code)) + return u'VirusTotal Unknown Response code {0!s}'.format(response_code) + + +manager.AnalysisPluginManager.RegisterPlugin(VirusTotalAnalysisPlugin) diff --git a/plaso/analysis/virustotal_test.py b/plaso/analysis/virustotal_test.py new file mode 100644 index 0000000000..ae42a10d73 --- /dev/null +++ b/plaso/analysis/virustotal_test.py @@ -0,0 +1,122 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +"""Tests for the VirusTotal analysis plugin.""" + +import mock +import unittest + +from dfvfs.path import fake_path_spec + +from plaso.analysis import test_lib +from plaso.analysis import virustotal +from plaso.engine import queue +from plaso.engine import single_process +from plaso.lib import timelib +from plaso.parsers import pe + + +class VirusTotalTest(test_lib.AnalysisPluginTestCase): + """Tests for the VirusTotal analysis plugin.""" + EVENT_1_HASH = u'90' + FAKE_API_KEY = u'4' + TEST_EVENTS = [ + {u'timestamp': timelib.Timestamp.CopyFromString('2015-01-01 17:00:00'), + u'sha256_hash': EVENT_1_HASH, + u'uuid': u'8' + } + ] + + def setUp(self): + """Sets up required objects prior running the tests.""" + self.requests_patcher = mock.patch('requests.get', self._MockGet) + self.requests_patcher.start() + + def tearDown(self): + """Resets required object after testing.""" + self.requests_patcher.stop() + + def _MockGet(self, url, params): + """A mock method to simulate making an API request to VirusTotal. + + Args: + url: The URL (string) being requested. + params: HTTP parameters (instance of dict) for the VirusTotal API request. + + Returns: + A mocked response object (instance of MockResponse) that + simulates a real response object returned by the requests library from the + VirusTotal API. + """ + self.assertEqual( + url, virustotal.VirusTotalAnalyzer.VIRUSTOTAL_API_REPORT_URL) + if params[u'resource'] == self.EVENT_1_HASH: + response = MockResponse() + response[u'resource'] = self.EVENT_1_HASH + response[u'response_code'] = 1 + response[u'positives'] = 10 + return response + self.fail(u'Unexpected parameters to request.get()') + + def _CreateTestEventObject(self, pe_event): + """Create a test event object with a particular path. + + Args: + service_event: A hash containing attributes of an event to add to the + queue. + + Returns: + An event object (instance of EventObject) that contains the necessary + attributes for testing. + """ + test_pathspec = fake_path_spec.FakePathSpec( + location=u'C:\\WINDOWS\\system32\\evil.exe') + event_object = pe.PECompilationEvent( + timestamp=pe_event[u'timestamp'], pe_type=u'Executable (EXE)', + section_names=[], imphash='') + event_object.pathspec = test_pathspec + event_object.sha256_hash = pe_event[u'sha256_hash'] + event_object.uuid = pe_event[u'uuid'] + return event_object + + def testVirusTotalLookup(self): + """Tests for the VirusTotal analysis plugin.""" + event_queue = single_process.SingleProcessQueue() + knowledge_base = self._SetUpKnowledgeBase() + + # Fill the incoming queue with events. + test_queue_producer = queue.ItemQueueProducer(event_queue) + events = [self._CreateTestEventObject(test_event) + for test_event + in self.TEST_EVENTS] + test_queue_producer.ProduceItems(events) + analysis_plugin = virustotal.VirusTotalAnalysisPlugin(event_queue) + analysis_plugin.SetAPIKey(self.FAKE_API_KEY) + + # Run the analysis plugin. + analysis_report_queue_consumer = self._RunAnalysisPlugin( + analysis_plugin, knowledge_base) + analysis_reports = self._GetAnalysisReportsFromQueue( + analysis_report_queue_consumer) + + self.assertEqual(len(analysis_reports), 1) + report = analysis_reports[0] + tags = report.GetTags() + self.assertEqual(len(tags), 1) + tag = tags[0] + self.assertEqual(tag.event_uuid, u'8') + self.assertEqual(tag.tags[0], u'VirusTotal Detections 10') + + +class MockResponse(dict): + """An object to simulate a response object from the requests library.""" + def json(self): + """Provided for compatibility with the requests library.""" + return self + + def raise_for_status(self): + """Provided for compatibility with the requests library.""" + return + + +if __name__ == '__main__': + unittest.main() diff --git a/plaso/cli/helpers/__init__.py b/plaso/cli/helpers/__init__.py index 5c59546a2e..0ab2f19db7 100644 --- a/plaso/cli/helpers/__init__.py +++ b/plaso/cli/helpers/__init__.py @@ -2,4 +2,5 @@ """This file contains an import statement for each argument helper.""" from plaso.cli.helpers import elastic_output +from plaso.cli.helpers import virustotal_analysis from plaso.cli.helpers import windows_services_analysis diff --git a/plaso/cli/helpers/virustotal_analysis.py b/plaso/cli/helpers/virustotal_analysis.py new file mode 100644 index 0000000000..0fcf91cd93 --- /dev/null +++ b/plaso/cli/helpers/virustotal_analysis.py @@ -0,0 +1,63 @@ +# -*- coding: utf-8 -*- +"""The arguments helper for the VirusTotal analysis plugin.""" + +from plaso.lib import errors +from plaso.cli.helpers import interface +from plaso.cli.helpers import manager +from plaso.analysis import virustotal + + +class VirusTotalAnalysisHelper(interface.ArgumentsHelper): + """CLI arguments helper class for the VirusTotal analysis plugin.""" + + NAME = u'virustotal_analysis' + CATEGORY = u'analysis' + DESCRIPTION = u'Argument helper for the VirusTotal analysis plugin.' + + @classmethod + def AddArguments(cls, argument_group): + """Add command line arguments the helper supports to an argument group. + + This function takes an argument parser or an argument group object and adds + to it all the command line arguments this helper supports. + + Args: + argument_group: the argparse group (instance of argparse._ArgumentGroup or + or argparse.ArgumentParser). + """ + argument_group.add_argument( + u'--virustotal-api-key', dest=u'virustotal-api-key', + type=unicode, action='store', default=u'text', help=u'Specify the API ' + u'key for use with VirusTotal.') + argument_group.add_argument( + u'--virustotal-free-rate-limit', dest=u'virustotal-rate-limit', + type=bool, action='store', default=True, help=u'Limit Virustotal ' + u'requests to the default free API key rate of 4 requests per minute. ' + u'Set this to false if you have an key for the private API.') + + @classmethod + def ParseOptions(cls, options, analysis_plugin): + """Parses and validates options. + + Args: + options: the parser option object (instance of argparse.Namespace). + analysis_plugin: an analysis plugin (instance of AnalysisPlugin). + + Raises: + BadConfigObject: when the output module object is of the wrong type. + BadConfigOption: when a configuration parameter fails validation. + """ + if not isinstance(analysis_plugin, virustotal.VirusTotalAnalysisPlugin): + raise errors.BadConfigObject( + u'Analysis plugin is not an instance of VirusTotalAnalysisPlugin') + + api_key = getattr(options, u'virustotal-api-key', None) + rate_limit = getattr(options, u'virustotal-rate-limit', None) + if api_key is None: + raise errors.BadConfigOption(u'VirusTotal API key not set.') + + analysis_plugin.SetAPIKey(api_key) + analysis_plugin.EnableFreeAPIKeyRateLimit(rate_limit) + + +manager.ArgumentHelperManager.RegisterHelper(VirusTotalAnalysisHelper) diff --git a/plaso/dependencies.py b/plaso/dependencies.py index 991e9c10a6..d55cf90a3d 100644 --- a/plaso/dependencies.py +++ b/plaso/dependencies.py @@ -50,6 +50,7 @@ # TODO: determine the version of pytz. # pytz uses __version__ but has a different version indicator e.g. 2012d (u'pytz', u'', u'', None), + (u'requests', u'__version__', u'2.2.1', None), (u'six', u'__version__', u'1.1.0', None), (u'sqlite3', u'sqlite_version', u'3.7.8', None), (u'yaml', u'__version__', u'3.10', None)] diff --git a/plaso/frontend/psort.py b/plaso/frontend/psort.py index 4a83343cff..460834c9d6 100644 --- a/plaso/frontend/psort.py +++ b/plaso/frontend/psort.py @@ -146,19 +146,26 @@ def _ProcessAnalysisPlugins( # Wait for all analysis plugins to complete. for analysis_process_info in self._analysis_process_info: - name = analysis_process_info.plugin_name + name = analysis_process_info.plugin.name + if analysis_process_info.plugin.LONG_RUNNING_PLUGIN: + logging.warning( + u'{0:s} may take a long time to run. It will not be automatically ' + u'terminated.'.format(name)) + report_wait = None + else: + report_wait = self.MAX_ANALYSIS_PLUGIN_REPORT_WAIT completion_event = analysis_process_info.completion_event process = analysis_process_info.process logging.info( u'Waiting for analysis plugin: {0:s} to complete.'.format(name)) - if completion_event.wait(self.MAX_ANALYSIS_PLUGIN_REPORT_WAIT): + if completion_event.wait(report_wait): logging.info(u'Plugin {0:s} has completed.'.format(name)) else: logging.warning( u'Analysis process {0:s} failed to compile its report in a ' u'reasonable time. No report will be displayed or stored.'.format( name)) - process.Terminate() + process.terminate() logging.info(u'All analysis plugins are now completed.') @@ -360,7 +367,7 @@ def ProcessStorage( target=analysis_plugin.RunPlugin, args=(analysis_mediator_object,)) process_info = PsortAnalysisProcess( - completion_event, analysis_plugin.plugin_name, analysis_process) + completion_event, analysis_plugin, analysis_process) self._analysis_process_info.append(process_info) analysis_process.start() @@ -539,12 +546,12 @@ class PsortAnalysisProcess(object): completion_event: An optional Event object (instance of Multiprocessing.Event, Queue.Event or similar) that will be set when the analysis plugin is complete. - plugin_name: The name of the plugin running in the process. + plugin: The plugin running in the process (instance of AnalysisProcess). process: The process (instance of Multiprocessing.Process) that encapsulates the analysis process. """ - def __init__(self, completion_event, plugin_name, process): + def __init__(self, completion_event, plugin, process): super(PsortAnalysisProcess, self).__init__() self.completion_event = completion_event - self.plugin_name = plugin_name + self.plugin = plugin self.process = process diff --git a/plaso/lib/errors.py b/plaso/lib/errors.py index 23f4024a5e..89e89cc2c6 100644 --- a/plaso/lib/errors.py +++ b/plaso/lib/errors.py @@ -17,6 +17,9 @@ class CollectorError(Error): """Class that defines collector errors.""" +class ConnectionError(Error): + """Class that defines errors encountered connecting to a service.""" + class ForemanAbort(Error): """Class that defines a foreman initiated abort exception."""