Skip to content

Commit

Permalink
Changed text plugins for line continuation
Browse files Browse the repository at this point in the history
  • Loading branch information
joachimmetz committed Sep 24, 2023
1 parent 61f7c41 commit 917fcd0
Show file tree
Hide file tree
Showing 8 changed files with 169 additions and 103 deletions.
17 changes: 7 additions & 10 deletions plaso/parsers/text_plugins/bash_history.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def __init__(self):
self.written_time = None


class BashHistoryTextPlugin(interface.TextPlugin):
class BashHistoryTextPlugin(interface.TextPluginWithLineContinuation):
"""Text parser plugin for bash history files."""

NAME = 'bash_history'
Expand All @@ -45,10 +45,7 @@ class BashHistoryTextPlugin(interface.TextPlugin):
_COMMAND_LINE = (
pyparsing.restOfLine().setResultsName('command') + _END_OF_LINE)

# Note that timestamp line must be defined before command line.
_LINE_STRUCTURES = [
('timestamp_line', _TIMESTAMP_LINE),
('command_line', _COMMAND_LINE)]
_LINE_STRUCTURES = [('timestamp_line', _TIMESTAMP_LINE)]

# A desynchronized bash history file will start with the command line
# instead of the timestamp.
Expand All @@ -70,7 +67,7 @@ def _ParseFinalize(self, parser_mediator):
and other components, such as storage and dfVFS.
"""
if self._event_data:
self._event_data.command = '\n'.join(self._command_lines)
self._event_data.command = ' '.join(self._command_lines)
self._command_lines = []

parser_mediator.ProduceEventData(self._event_data)
Expand All @@ -88,19 +85,18 @@ def _ParseRecord(self, parser_mediator, key, structure):
Raises:
ParseError: if the structure cannot be parsed.
"""
if key == 'command_line':
if key == '_line_continuation':
# A desynchronized bash history file will start with the command line
# instead of the timestamp.
if not self._event_data:
self._event_data = BashHistoryEventData()

command = self._GetValueFromStructure(structure, 'command')
command = structure.replace('\n', ' ').strip()
self._command_lines.append(command)

else:
if self._event_data:
self._event_data.command = '\n'.join(self._command_lines)
self._command_lines = []
self._event_data.command = ' '.join(self._command_lines)

parser_mediator.ProduceEventData(self._event_data)

Expand All @@ -109,6 +105,7 @@ def _ParseRecord(self, parser_mediator, key, structure):
self._event_data = BashHistoryEventData()
self._event_data.written_time = dfdatetime_posix_time.PosixTime(
timestamp=timestamp)
self._command_lines = []

def _ResetState(self):
"""Resets stored values."""
Expand Down
38 changes: 20 additions & 18 deletions plaso/parsers/text_plugins/gdrive_synclog.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def __init__(self):
self.thread = None


class GoogleDriveSyncLogTextPlugin(interface.TextPlugin):
class GoogleDriveSyncLogTextPlugin(interface.TextPluginWithLineContinuation):
"""Text parser plugin for Google Drive Sync log files."""

NAME = 'gdrive_synclog'
Expand Down Expand Up @@ -93,13 +93,7 @@ class GoogleDriveSyncLogTextPlugin(interface.TextPlugin):
_LOG_LINE_START + pyparsing.restOfLine().setResultsName('body') +
_END_OF_LINE)

_SUCCESSIVE_LOG_LINE = (
pyparsing.NotAny(_LOG_LINE_START) +
pyparsing.restOfLine().setResultsName('body') + _END_OF_LINE)

_LINE_STRUCTURES = [
('log_line', _LOG_LINE),
('successive_log_line', _SUCCESSIVE_LOG_LINE)]
_LINE_STRUCTURES = [('log_line', _LOG_LINE)]

# Using a regular expression here is faster on non-match than the log line
# grammar.
Expand All @@ -113,6 +107,7 @@ class GoogleDriveSyncLogTextPlugin(interface.TextPlugin):
def __init__(self):
"""Initializes a text parser plugin."""
super(GoogleDriveSyncLogTextPlugin, self).__init__()
self._body_lines = None
self._event_data = None

def _ParseFinalize(self, parser_mediator):
Expand All @@ -123,6 +118,9 @@ def _ParseFinalize(self, parser_mediator):
and other components, such as storage and dfVFS.
"""
if self._event_data:
self._event_data.message = ' '.join(self._body_lines)
self._body_lines = None

parser_mediator.ProduceEventData(self._event_data)
self._event_data = None

Expand All @@ -147,9 +145,9 @@ def _ParseLogline(self, structure):
event_data.thread = self._GetValueFromStructure(structure, 'thread')
event_data.source_code = self._GetValueFromStructure(
structure, 'source_code')
event_data.message = body

self._event_data = event_data
self._body_lines = [body]

def _ParseRecord(self, parser_mediator, key, structure):
"""Parses a pyparsing structure.
Expand All @@ -163,19 +161,18 @@ def _ParseRecord(self, parser_mediator, key, structure):
Raises:
ParseError: if the structure cannot be parsed.
"""
if key == 'log_line':
if key == '_line_continuation':
body = structure.replace('\n', ' ').strip()
self._body_lines.append(body)

else:
if self._event_data:
self._event_data.message = ' '.join(self._body_lines)

parser_mediator.ProduceEventData(self._event_data)
self._event_data = None

self._ParseLogline(structure)

elif key == 'successive_log_line':
body = self._GetValueFromStructure(structure, 'body', default_value='')
body = body.strip()

self._event_data.message = ' '.join([self._event_data.message, body])

def _ParseTimeElements(self, time_elements_structure):
"""Parses date and time elements of a log line.
Expand Down Expand Up @@ -215,6 +212,11 @@ def _ParseTimeElements(self, time_elements_structure):
except (TypeError, ValueError):
return None

def _ResetState(self):
"""Resets stored values."""
self._body_lines = None
self._event_data = None

def CheckRequiredFormat(self, parser_mediator, text_reader):
"""Check if the log record has the minimal structure required by the parser.
Expand All @@ -240,7 +242,7 @@ def CheckRequiredFormat(self, parser_mediator, text_reader):
except errors.ParseError:
return False

self._event_data = None
self._ResetState()

return True

Expand Down
54 changes: 54 additions & 0 deletions plaso/parsers/text_plugins/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,3 +386,57 @@ def Process(self, parser_mediator, file_object=None, **kwargs):

finally:
self._parser_mediator = None


class TextPluginWithLineContinuation(TextPlugin):
"""The interface for text plugins with line continuation."""

# pylint: disable=abstract-method

def __init__(self):
"""Initializes a text parser plugin."""
super(TextPluginWithLineContinuation, self).__init__()
self._last_string_match = None

def _ParseString(self, string):
"""Parses a string for known grammar.
Args:
string (str): string.
Returns:
tuple[str, pyparsing.ParseResults, int, int]: key, parsed tokens, start
and end offset.
Raises:
ParseError: when the string cannot be parsed by the grammar.
"""
if self._last_string_match:
last_string_match = self._last_string_match
self._last_string_match = None
return last_string_match

try:
structure_generator = self._pyparsing_grammar.scanString(
string, maxMatches=1)
structure, start, end = next(structure_generator)

except StopIteration:
structure = None

except pyparsing.ParseException as exception:
raise errors.ParseError(exception)

if not structure:
return '_line_continuation', string, 0, len(string)

# Unwrap the line structure and retrieve its name (key).
keys = list(structure.keys())
if len(keys) != 1:
raise errors.ParseError('Missing key of line structructure.')

if start == 0:
return keys[0], structure[0], start, end

self._last_string_match = (keys[0], structure[0], 0, end - start)
return '_line_continuation', string[:start], 0, start
39 changes: 20 additions & 19 deletions plaso/parsers/text_plugins/ios_lockdownd.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def __init__(self):
self.written_time = None


class IOSLockdowndLogTextPlugin(interface.TextPlugin):
class IOSLockdowndLogTextPlugin(interface.TextPluginWithLineContinuation):
"""Text parser plugin for iOS lockdown daemon log files."""

NAME = 'ios_lockdownd'
Expand Down Expand Up @@ -67,19 +67,14 @@ class IOSLockdowndLogTextPlugin(interface.TextPlugin):
_LOG_LINE_START + pyparsing.restOfLine().setResultsName('body') +
_END_OF_LINE)

_SUCCESSIVE_LOG_LINE = (
pyparsing.NotAny(_LOG_LINE_START) +
pyparsing.restOfLine().setResultsName('body') + _END_OF_LINE)

_LINE_STRUCTURES = [
('log_line', _LOG_LINE),
('successive_log_line', _SUCCESSIVE_LOG_LINE)]
_LINE_STRUCTURES = [('log_line', _LOG_LINE)]

VERIFICATION_GRAMMAR = _LOG_LINE

def __init__(self):
"""Initializes a text parser plugin."""
super(IOSLockdowndLogTextPlugin, self).__init__()
self._body_lines = None
self._event_data = None

def _ParseFinalize(self, parser_mediator):
Expand All @@ -90,6 +85,9 @@ def _ParseFinalize(self, parser_mediator):
and other components, such as storage and dfVFS.
"""
if self._event_data:
self._event_data.body = ' '.join(self._body_lines)
self._body_lines = None

parser_mediator.ProduceEventData(self._event_data)
self._event_data = None

Expand All @@ -104,15 +102,14 @@ def _ParseLogline(self, structure):
structure, 'date_time')

body = self._GetValueFromStructure(structure, 'body', default_value='')
body = body.strip()

event_data = IOSLockdowndLogData()
event_data.body = body
event_data.process_identifier = self._GetValueFromStructure(
structure, 'process_identifier')
event_data.written_time = self._ParseTimeElements(time_elements_structure)

self._event_data = event_data
self._body_lines = [body.strip()]

def _ParseRecord(self, parser_mediator, key, structure):
"""Parses a pyparsing structure.
Expand All @@ -126,19 +123,18 @@ def _ParseRecord(self, parser_mediator, key, structure):
Raises:
ParseError: if the structure cannot be parsed.
"""
if key == 'log_line':
if key == '_line_continuation':
body = structure.replace('\n', ' ').strip()
self._body_lines.append(body)

else:
if self._event_data:
self._event_data.body = ' '.join(self._body_lines)

parser_mediator.ProduceEventData(self._event_data)
self._event_data = None

self._ParseLogline(structure)

elif key == 'successive_log_line':
body = self._GetValueFromStructure(structure, 'body', default_value='')
body = body.strip()

self._event_data.body = ' '.join([self._event_data.body, body])

def _ParseTimeElements(self, time_elements_structure):
"""Parses date and time elements of a log line.
Expand Down Expand Up @@ -168,6 +164,11 @@ def _ParseTimeElements(self, time_elements_structure):
raise errors.ParseError(
'Unable to parse time elements with error: {0!s}'.format(exception))

def _ResetState(self):
"""Resets stored values."""
self._body_lines = None
self._event_data = None

def CheckRequiredFormat(self, parser_mediator, text_reader):
"""Check if the log record has the minimal structure required by the parser.
Expand All @@ -192,7 +193,7 @@ def CheckRequiredFormat(self, parser_mediator, text_reader):
except errors.ParseError:
return False

self._event_data = None
self._ResetState()

return True

Expand Down
Loading

0 comments on commit 917fcd0

Please sign in to comment.