Skip to content

Commit

Permalink
kdump-hdr: implement kdump flattened file format support
Browse files Browse the repository at this point in the history
the code now properly identifies the flattened format and parses
the structure correctly.

the code now uses struct.unpack() to read integers from binary
file.

added unit tests to cover the both file formats.

Signed-off-by: Mustafa Kemal Gilor <[email protected]>
  • Loading branch information
xmkg committed Jul 30, 2024
1 parent 6fb333e commit c8906b0
Show file tree
Hide file tree
Showing 4 changed files with 649 additions and 277 deletions.
263 changes: 212 additions & 51 deletions hotkdump/core/kdumpfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

import os
import logging
import struct
from dataclasses import dataclass, field
from hotkdump.core.exceptions import NotAKernelCrashDumpException

Expand Down Expand Up @@ -45,18 +46,18 @@ def seek_to_first_non_nul(f):
f.seek(pos)

@staticmethod
def read_int32(f, off=None):
"""Read a 4-byte integer from given file."""
if off:
f.seek(off, os.SEEK_SET)
return int.from_bytes(f.read(4), byteorder="little")
def read_int(f, fmt, off=None):
"""Read an integer value from binary file stream."""

@staticmethod
def read_int64(f, off=None):
"""Read a 8-byte integer from given file."""
if off:
f.seek(off, os.SEEK_SET)
return int.from_bytes(f.read(8), byteorder="little")
byte_cnt = struct.calcsize(fmt)
raw_bytes = f.read(byte_cnt)

if not raw_bytes or len(raw_bytes) != byte_cnt:
return None

return struct.unpack(fmt, raw_bytes)[0]

@staticmethod
def read_str(f, ln):
Expand Down Expand Up @@ -102,6 +103,9 @@ def __post_init__(self):
self.normalized_version = self.version.split("-", maxsplit=1)[0].lstrip("#")


# TO-DO: Switch to struct.pack / struct.unpack?


@dataclass()
# pylint: disable-next=too-many-instance-attributes
class DiskDumpHeader:
Expand Down Expand Up @@ -143,10 +147,10 @@ def from_fd(fd):
machine=BinaryFileReader.read_cstr(fd),
domain=BinaryFileReader.read_cstr(fd),
),
timestamp_sec=BinaryFileReader.read_int64(fd, timestamp_offset),
timestamp_usec=BinaryFileReader.read_int64(fd),
status=BinaryFileReader.read_int32(fd),
block_size=BinaryFileReader.read_int32(fd),
timestamp_sec=BinaryFileReader.read_int(fd, "<q", timestamp_offset),
timestamp_usec=BinaryFileReader.read_int(fd, "<q"),
status=BinaryFileReader.read_int(fd, "<i"),
block_size=BinaryFileReader.read_int(fd, "<i"),
)


Expand All @@ -171,32 +175,29 @@ class KdumpSubHeader:
max_mapnr_64: int

@staticmethod
def from_fd(fd, block_size):
def from_fd(fd):
"""Read a KDumpSubHeader from given file."""

disk_dump_header_blocks = 1
offset = disk_dump_header_blocks * block_size
fd.seek(offset, os.SEEK_SET)
return KdumpSubHeader(
phys_base=BinaryFileReader.read_int64(fd),
dump_level=BinaryFileReader.read_int32(fd),
split=BinaryFileReader.read_int32(fd),
start_pfn=BinaryFileReader.read_int64(fd),
end_pfn=BinaryFileReader.read_int64(fd),
offset_vmcoreinfo=BinaryFileReader.read_int64(fd),
size_vmcoreinfo=BinaryFileReader.read_int64(fd),
offset_note=BinaryFileReader.read_int64(fd),
size_note=BinaryFileReader.read_int64(fd),
offset_eraseinfo=BinaryFileReader.read_int64(fd),
size_eraseinfo=BinaryFileReader.read_int64(fd),
start_pfn_64=BinaryFileReader.read_int64(fd),
end_pfn_64=BinaryFileReader.read_int64(fd),
max_mapnr_64=BinaryFileReader.read_int64(fd),
phys_base=BinaryFileReader.read_int(fd, "<q"),
dump_level=BinaryFileReader.read_int(fd, "<i"),
split=BinaryFileReader.read_int(fd, "<i"),
start_pfn=BinaryFileReader.read_int(fd, "<q"),
end_pfn=BinaryFileReader.read_int(fd, "<q"),
offset_vmcoreinfo=BinaryFileReader.read_int(fd, "<q"),
size_vmcoreinfo=BinaryFileReader.read_int(fd, "<q"),
offset_note=BinaryFileReader.read_int(fd, "<q"),
size_note=BinaryFileReader.read_int(fd, "<q"),
offset_eraseinfo=BinaryFileReader.read_int(fd, "<q"),
size_eraseinfo=BinaryFileReader.read_int(fd, "<q"),
start_pfn_64=BinaryFileReader.read_int(fd, "<q"),
end_pfn_64=BinaryFileReader.read_int(fd, "<q"),
max_mapnr_64=BinaryFileReader.read_int(fd, "<q"),
)


class VMCoreInfo:
"""Class for parsing VMCoreInfo section text into a dict."""

def __init__(self, raw):
self.data = {}
for line in raw.split("\n"):
Expand All @@ -212,20 +213,88 @@ def from_fd(fd, vmcoreinfo_offset, vmcoreinfo_size):
fd.seek(vmcoreinfo_offset, os.SEEK_SET)
return VMCoreInfo(BinaryFileReader.read_str(fd, vmcoreinfo_size))

def get(self, key):
def get(self, key, default=None):
"""Retrieve a VMCoreInfo key's value."""
return self.data[key]
if key in self.data:
return self.data[key]
return default

def __repr__(self):
return str(self.data)


@dataclass
class MakeDumpFileHeader:
"""makedumpfile_header struct."""

signature: str
vtype: int
version: int


@dataclass
class MakeDumpFileDataHeader:
"""makedumpfile_data_header struct."""

self_offset: int
offset: int
buf_size: int
sizeof: int = 16

@staticmethod
def from_fd(fd):
"""Read a MakeDumpFileDataHeader from given file."""
return MakeDumpFileDataHeader(
self_offset=fd.tell(),
offset=BinaryFileReader.read_int(fd, ">q"),
buf_size=BinaryFileReader.read_int(fd, ">q"),
)

def next(self, fd):
"""Seek the given file's offset to the next makedumpfile_header"""

fd.seek(self.self_offset + self.sizeof + self.buf_size, os.SEEK_SET)
return MakeDumpFileDataHeader.from_fd(fd)

def in_range(self, offset):
"""Check whether given offset is in range of this block or not."""
return self.offset <= offset < (self.offset + self.buf_size)

@property
def data_offset(self):
"""Offset to the beginning of the data."""
return self.self_offset + self.sizeof

def __bool__(self):
"""Check whether this header is valid."""
return (self.offset is not None and self.offset >= 0) and (
self.buf_size is not None and self.buf_size >= 0
)


class KdumpFile:
"""Helper class for parsing headers from kernel crash
dumps generated with kdump.
"""

# pylint: disable=too-many-instance-attributes
@classmethod
def is_flattened_kdump_file(cls, fd):
"""Check whether file is in makedumpfile format (flat)"""
makedumpfile_signature = b"makedumpfile\0\0\0\0"
signature = fd.peek(len(makedumpfile_signature))[
0 : len(makedumpfile_signature)
]
return signature == makedumpfile_signature

@classmethod
def is_regular_kdump_file(cls, fd):
"""Check whether file is in regular kdump format."""
kdump_hdr_signature = b"KDUMP "

# We're using peek() in order to avoid progressing the position
magic = fd.peek(len(kdump_hdr_signature))[0 : len(kdump_hdr_signature)]
return magic == kdump_hdr_signature

def __init__(self, kdump_file_path) -> None:
"""Parse kdump file header and expose
them as member variables
Expand All @@ -237,28 +306,28 @@ def __init__(self, kdump_file_path) -> None:
Exception: If the kdump_file_path is not recognized as a kdump file
"""

with open(kdump_file_path, "rb") as fd:
self._ddhdr = None
self._ksubhdr = None
self._vmcoreinfo = None

# Let's be more forgiving about locating
# the KDUMP signature:
blob = fd.read(1024 * 8)
expected_magic = b"KDUMP "
offset = blob.find(expected_magic)
if offset == -1:
with open(kdump_file_path, "rb") as fd:
# First check if it's flattened format
# https://github.com/makedumpfile/makedumpfile/blob/bad2a7c4fa75d37a41578441468584963028bdda/IMPLEMENTATION#L285
if self.is_flattened_kdump_file(fd):
logging.debug("the file is in flattened format")
# Skip the first 4096 bytes. It contains the makedumpfile_header
# and it's always 4096 bytes in size.
fd.seek(4096)
self.parse_flattened(fd)
elif self.is_regular_kdump_file(fd):
self.parse_compressed(fd)
else:
raise NotAKernelCrashDumpException(
f"{kdump_file_path} is not a kernel crash dump file"
)

# Seek to the KDUMP signature offset
fd.seek(offset, os.SEEK_SET)
self._ddhdr = DiskDumpHeader.from_fd(fd)
self._ksubhdr = KdumpSubHeader.from_fd(fd, self.ddhdr.block_size)
self._vmcoreinfo = VMCoreInfo.from_fd(
fd, self.ksubhdr.offset_vmcoreinfo, self.ksubhdr.size_vmcoreinfo
)

logging.debug("kdump_hdr: %s", str(self.ddhdr))
logging.debug("kdump_subhdr: %s", str(self.ddhdr))
logging.debug("kdump_subhdr: %s", str(self.ksubhdr))
logging.debug("vmcore-info: %s", str(self.vmcoreinfo))

@property
Expand Down Expand Up @@ -290,3 +359,95 @@ def __str__(self) -> str:
]
]
)

def parse_flattened(self, fd):
"""Parse the diskdumpfile, ksubhdr and vmcoreinfo from a
flattened makedumpfile."""

# Flattened header format consists of list of data chunks starting with
# makediskdumpfile_data_header, followed by (size) bytes of data. This
# format allows splitting a contiguous blob of N bytes to M chunks in
# arbitrary order. The original offset of each chunk is recorded into
# the makediskdumpfile_data_header, so reading code can reconstruct
# the original file. This format is developed to make it possible to
# be able to write a file that requires random access writing to a
# remote endpoint via SSH, etc.
# The format resembles the bittorrent protocol.

# The first header is guaranteed to be disk_dump_header.
mdhdr = MakeDumpFileDataHeader.from_fd(fd)
assert mdhdr.buf_size == DiskDumpHeader.sizeof
self._ddhdr = DiskDumpHeader.from_fd(fd)

# The next header is kdump_sub_header, which is located at the
# page offset. We will walk over the data chunks and try to locate
# the data chunk that contains the `kdump_sub_header_off` offset.
# This code currently does not support reading data that spans across
# multiple data chunks.
disk_dump_header_blocks = 1
kdump_sub_header_off = disk_dump_header_blocks * self.ddhdr.block_size

# List of chunks for back-referencing. The chunks may appear in random
# order so we need to keep track of them.
chunks = []
# Fetch the next chunk.
mdhdr = mdhdr.next(fd)
while mdhdr:

# If the current chunk offset is the offset for kdump_sub_header,
# parse it.
if mdhdr.offset == kdump_sub_header_off:
logging.debug("found the chunk for ksubhdr: %s", mdhdr)
self._ksubhdr = KdumpSubHeader.from_fd(fd)

# Append it to the list of chunks we've seen
chunks.append(mdhdr)

# The kdump_sub_header contains the vmcoreinfo offset, so in order
# to parse that, we must've parsed the kdump_sub_header already.
if self.ksubhdr:
# Search for the chunk that contains the vmcoreinfo
for flat_block in chunks:
if flat_block.in_range(self.ksubhdr.offset_vmcoreinfo):
logging.debug("found the chunk for vmcore: %s", flat_block)
fd.seek(flat_block.data_offset)
# The offset is relative to the original file so we
# need to translate it to current file.
translated_offset = (
self.ksubhdr.offset_vmcoreinfo - flat_block.offset
)
self._vmcoreinfo = VMCoreInfo.from_fd(
fd,
flat_block.data_offset + translated_offset,
self.ksubhdr.size_vmcoreinfo,
)
# We got what we need so there's no point walking on
# the list any further.
return
# Move to the next chunk
mdhdr = mdhdr.next(fd)

def parse_compressed(self, fd):
"""Parse the diskdumpfile, ksubhdr and vmcoreinfo from a
kdump compressed file."""

# Unlike the flat format, this format is contiguous and does not
# contain any extra headers.

# Read the disk_dump_header
self._ddhdr = DiskDumpHeader.from_fd(fd)

# disk_dump_header is always written as a block sized blob so we'll
# progress to the end of the block.
disk_dump_header_blocks = 1
fd.seek(disk_dump_header_blocks * self.ddhdr.block_size, os.SEEK_SET)

# Read the kdump_sub_header
self._ksubhdr = KdumpSubHeader.from_fd(fd)

# Parse vmcoreinfo
self._vmcoreinfo = VMCoreInfo.from_fd(
fd,
self.ksubhdr.offset_vmcoreinfo,
self.ksubhdr.size_vmcoreinfo
)
Loading

0 comments on commit c8906b0

Please sign in to comment.