From 8c25309c3e4a8519948c498c14c102e7b5c97426 Mon Sep 17 00:00:00 2001 From: Joachim Metz Date: Mon, 1 Jan 2024 19:21:37 +0100 Subject: [PATCH] Worked on LevelDB database format support --- ...tlight store database file format.asciidoc | 23 +- .../LevelDB database format.asciidoc | 604 ++++++++++++++++++ dtformats/leveldb.debug.yaml | 37 ++ dtformats/leveldb.py | 327 ++++++++++ dtformats/leveldb.yaml | 65 ++ dtformats/vhdx.yaml | 10 +- scripts/leveldb.py | 105 +++ tests/leveldb.py | 33 + 8 files changed, 1189 insertions(+), 15 deletions(-) create mode 100644 documentation/LevelDB database format.asciidoc create mode 100644 dtformats/leveldb.debug.yaml create mode 100644 dtformats/leveldb.py create mode 100644 dtformats/leveldb.yaml create mode 100755 scripts/leveldb.py create mode 100644 tests/leveldb.py diff --git a/documentation/Apple Spotlight store database file format.asciidoc b/documentation/Apple Spotlight store database file format.asciidoc index 68fccf9..e151430 100644 --- a/documentation/Apple Spotlight store database file format.asciidoc +++ b/documentation/Apple Spotlight store database file format.asciidoc @@ -29,7 +29,7 @@ store file formats specification. == License .... -Copyright (C) 2020-2023, Joachim Metz . +Copyright (C) 2020-2024, Joachim Metz . Permission is granted to copy, distribute and/or modify this document under the terms of the GNU Free Documentation License, Version 1.3 or any later version published by the Free Software Foundation; with no Invariant Sections, no @@ -45,6 +45,7 @@ in the section entitled "GNU Free Documentation License". | Version | Author | Date | Comments | 0.0.1 | J.B. Metz | June 2020 | Initial version based on earlier notes, with thanks to Everest Munro-Zeisberger | 0.0.2 | J.B. Metz | June 2023 | Additional changes based on format analysis +| 0.0.3 | J.B. Metz | January 2024 | Small textual changes. |=== :numbered: @@ -437,21 +438,21 @@ The metadata attributes are stored using several property tables. | Value | Identifier | Description | 0x00 | | Boolean + Contains a variable size integer + -See section: <> + +See section: <> + | 0x01 | | [yellow-background]*Unknown* | 0x02 | | [yellow-background]*Unknown* + Contains a variable size integer + -See section: <> + +See section: <> + Seen in combination with "_kMDItemGroupId" | 0x03 | | [yellow-background]*Unknown* | 0x04 | | [yellow-background]*Unknown* | 0x05 | | [yellow-background]*Unknown* | 0x06 | | [yellow-background]*Unknown* + Contains a variable size integer + -See section: <> + +See section: <> + Seen in combination with "_kStoreMetadataVersion" -| 0x07 | | Variable size integer + -See section: <> + +| 0x07 | | Variable-size integer + +See section: <> + If the 2nd LSB of the property type is set the value data contains a multi-value. See section <> | 0x08 | | [yellow-background]*Unknown (Byte or 8-bit integer)* | 0x09 | | Floating-point 32-bit + @@ -471,7 +472,9 @@ See section: <> [yellow-background]*The 2 LSB of the property type indicate if the value references a localized string (0x3), list (0x2) or value (0x1) type* |=== -==== [[variable_size_integer]]Variable size integer +==== [[variable_size_integer]]Variable-size integer + +TODO: add description ==== [[multi_value]]Multi-value @@ -834,7 +837,7 @@ The dbStr-1.map.data file contains metadata attribute types that consist of: |=== | Offset | Size | Value | Description | 0 | ... | | [yellow-background]*Unknown (stream value data size)* + -See section: <> +See section: <> | ... | 1 | | Value type + See section: <> | ... | 1 | | Property type @@ -850,7 +853,7 @@ The dbStr-2.map.data file contains metadata attribute values that consist of: |=== | Offset | Size | Value | Description | 0 | ... | | [yellow-background]*Unknown (stream value data size)* + -See section: <> +See section: <> | ... | ... | | Metadata attribute value name + Contains an UTF-8 encoded string with an end-of-string character |=== @@ -864,7 +867,7 @@ metadata attribute values that consist of: |=== | Offset | Size | Value | Description | 0 | ... | | [yellow-background]*Unknown* + -See section: <> +See section: <> | ... | ... | | Index data size + Contains a variable size integer | ... | ... | | Index array + diff --git a/documentation/LevelDB database format.asciidoc b/documentation/LevelDB database format.asciidoc new file mode 100644 index 0000000..ecc4b0f --- /dev/null +++ b/documentation/LevelDB database format.asciidoc @@ -0,0 +1,604 @@ += LevelDB database format + +:toc: +:toclevels: 4 + +:numbered!: +[abstract] +== Summary + +[preface] +== Document information + +The LevelDB database format is used to store various kinds of application-level +information such as the Google Chrome local storage. + +[cols="1,5"] +|=== +| Author(s): | Joachim Metz +| Abstract: | This document contains information about the LevelDB database format +| Classification: | Public +| Keywords: | LevelDB +|=== + +[preface] +== License + +.... +Copyright (C) 2024, Joachim Metz . +Permission is granted to copy, distribute and/or modify this document under the +terms of the GNU Free Documentation License, Version 1.3 or any later version +published by the Free Software Foundation; with no Invariant Sections, no +Front-Cover Texts, and no Back-Cover Texts. A copy of the license is included +in the section entitled "GNU Free Documentation License". +.... + +[preface] +== Revision history + +[cols="1,1,1,5",options="header"] +|=== +| Version | Author | Date | Comments +| 0.0.1 | J.B. Metz | January 2024 | Initial version. +|=== + +:numbered: +== Overview + +The LevelDB database format is used to store various kinds of application-level +information such as the Google Chrome local storage. + +A LevelDB database directory contains the following files: + +* `[0-9]{6}.ldb` (sorted tables file) +* `[0-9]{6}.log` (write ahead log file) +* CURRENT (contains the name of the current MANIFEST file) +* LOCK +* LOG, LOG.old (log with informational messages) +* MANIFEST-000001 (information about the sorted tables that make up the database) + +== Current file + +.... +00000000 4d 41 4e 49 46 45 53 54 2d 30 30 30 30 30 31 0a |MANIFEST-000001.| +.... + +== Ldb file + +A ldb file consists of: + +* one or more data blocks +* one or more metadata blocks +* metaindex block +* index block +* footer + +[cols="1,5",options="header"] +|=== +| Characteristics | Description +| Byte order | +| Date and time values | +| Character strings | +|=== + +==== [[ldb_varint64]]Variable-size integer + +The variable-size integer (varint64) allows encoding an unsigned 64-bit integer +using 1 upto 10 bytes, where small integer values use fewer bytes. + +The MSB of each byte in the variable-size integer is a continuation bit, that +indicates if the next byte it is part of the integer. + +For example: + +* integer value 1 is stored as the bytes "01" +* integer value 150 is stored as the bytes "96 01" + +=== [[ldb_block_handle]]Block handle + +[cols="1,1,1,5",options="header"] +|=== +| Offset | Size | Value | Description +| 0 | ... | | Block offset + +Contains a <> +| ... | ... | | Block size + +Contains a <> +|=== + +=== Key prefix + +Each key is prefixed with: + +* database identifier (database ID) +* object store identifier +* index identifier + +The key prefix (KeyPrefix) is of variable size and consists of: + +[cols="1,1,1,5",options="header"] +|=== +| Offset | Size | Value | Description +| 0.0 | 2 bits | | Size of the index identifier + +Where size is stored as size - 1, hence 0 represents 1 byte +| 0.2 | 3 bits | | Size of the object store identifier + +Where size is stored as size - 1, hence 0 represents 1 byte +| 0.3 | 3 bits | | Size of the database identifier + +Where size is stored as size - 1, hence 0 represents 1 byte +| 1.0 | ... | | Database identifier + +Contains a litte-endian integer +| ... | ... | | Object store identifier + +Contains a litte-endian integer +| ... | ... | | Index identifier + +Contains a litte-endian integer +|=== + +=== Data block + +A data block contains a sequence of key/value pairs. + +=== Metadata block + +A metadata block contains information about the data blocks. + +=== Metaindex block + +A metaindex block contains keyed references to metadata blocks. + +The key of global metadata has the prefix: 0, 0, 0 followed by a byte value +that contains the metadata type: + +[cols="1,1,1,3",options="header"] +|=== +| Metadata type | Identifier | Description +| 0 | Int | SchemaVersionKey | Backing store schema version +| 1 | Int | MaxDatabaseIdKey | Maximum allocated database +| 2 | Int | DataVersionKey | Data format version +| 3 | Int | RecoveryBlobJournalKey | Recovery BLOB journal +| 4 | Int | ActiveBlobJournalKey | Active BLOB journal +| 5 | Int | EarliestSweepKey | Earliest sweep time in microseconds +| 100, database ID (VarInt) | DatabaseFreeListKey | Database is in the free list (no longer used) +| 201, origin (StringWithLength), database name (StringWithLength) | Int | DatabaseNameKey | Database identifier +|=== + +=== Index block + +An index block contains keyed references to data blocks. + +=== Footer + +[cols="1,1,1,5",options="header"] +|=== +| Offset | Size | Value | Description +| 0 | ... | | Metaindex block handle + +See section: <> +| ... | ... | | Index block handle + +See section: <> +| ... | ... | 0 | Padding + +The size of the padding is 40 bytes - size of the metaindex and index block handles +| 40 | 8 | "\x57\xfb\x80\x8b\x24\x75\x47\xdb" | Signature +|=== + +:numbered!: +[appendix] +== References + +[cols="1,5",options="header"] +|=== +| Title: | Chromium - LevelDB Coding Scheme +| URL: | https://github.com/chromium/chromium/blob/master/content/browser/indexed_db/docs/leveldb_coding_scheme.md +|=== + +[cols="1,5",options="header"] +|=== +| Title: | Protocol Buffers Documentation - Encoding - Base 128 Varints +| URL: | https://protobuf.dev/programming-guides/encoding/#varints +|=== + +[cols="1,5",options="header"] +|=== +| Title: | LevelDB - Files +| URL: | https://github.com/google/leveldb/blob/main/doc/impl.md +|=== + +[cols="1,5",options="header"] +|=== +| Title: | LevelDB - leveldb File format +| URL: | https://github.com/google/leveldb/blob/main/doc/table_format.md +|=== + +[cols="1,5",options="header"] +|=== +| Title: | LevelDB - leveldb Log format +| URL: | https://github.com/google/leveldb/blob/main/doc/log_format.md +|=== + +[appendix] +== GNU Free Documentation License + +Version 1.3, 3 November 2008 +Copyright © 2000, 2001, 2002, 2007, 2008 Free Software Foundation, Inc. + + +Everyone is permitted to copy and distribute verbatim copies of this license +document, but changing it is not allowed. + +=== 0. PREAMBLE + +The purpose of this License is to make a manual, textbook, or other functional +and useful document "free" in the sense of freedom: to assure everyone the +effective freedom to copy and redistribute it, with or without modifying it, +either commercially or noncommercially. Secondarily, this License preserves for +the author and publisher a way to get credit for their work, while not being +considered responsible for modifications made by others. + +This License is a kind of "copyleft", which means that derivative works of the +document must themselves be free in the same sense. It complements the GNU +General Public License, which is a copyleft license designed for free software. + +We have designed this License in order to use it for manuals for free software, +because free software needs free documentation: a free program should come with +manuals providing the same freedoms that the software does. But this License is +not limited to software manuals; it can be used for any textual work, +regardless of subject matter or whether it is published as a printed book. We +recommend this License principally for works whose purpose is instruction or +reference. + +=== 1. APPLICABILITY AND DEFINITIONS + +This License applies to any manual or other work, in any medium, that contains +a notice placed by the copyright holder saying it can be distributed under the +terms of this License. Such a notice grants a world-wide, royalty-free license, +unlimited in duration, to use that work under the conditions stated herein. The +"Document", below, refers to any such manual or work. Any member of the public +is a licensee, and is addressed as "you". You accept the license if you copy, +modify or distribute the work in a way requiring permission under copyright law. + +A "Modified Version" of the Document means any work containing the Document or +a portion of it, either copied verbatim, or with modifications and/or +translated into another language. + +A "Secondary Section" is a named appendix or a front-matter section of the +Document that deals exclusively with the relationship of the publishers or +authors of the Document to the Document's overall subject (or to related +matters) and contains nothing that could fall directly within that overall +subject. (Thus, if the Document is in part a textbook of mathematics, a +Secondary Section may not explain any mathematics.) The relationship could be a +matter of historical connection with the subject or with related matters, or of +legal, commercial, philosophical, ethical or political position regarding them. + +The "Invariant Sections" are certain Secondary Sections whose titles are +designated, as being those of Invariant Sections, in the notice that says that +the Document is released under this License. If a section does not fit the +above definition of Secondary then it is not allowed to be designated as +Invariant. The Document may contain zero Invariant Sections. If the Document +does not identify any Invariant Sections then there are none. + +The "Cover Texts" are certain short passages of text that are listed, as +Front-Cover Texts or Back-Cover Texts, in the notice that says that the +Document is released under this License. A Front-Cover Text may be at most 5 +words, and a Back-Cover Text may be at most 25 words. + +A "Transparent" copy of the Document means a machine-readable copy, represented +in a format whose specification is available to the general public, that is +suitable for revising the document straightforwardly with generic text editors +or (for images composed of pixels) generic paint programs or (for drawings) +some widely available drawing editor, and that is suitable for input to text +formatters or for automatic translation to a variety of formats suitable for +input to text formatters. A copy made in an otherwise Transparent file format +whose markup, or absence of markup, has been arranged to thwart or discourage +subsequent modification by readers is not Transparent. An image format is not +Transparent if used for any substantial amount of text. A copy that is not +"Transparent" is called "Opaque". + +Examples of suitable formats for Transparent copies include plain ASCII without +markup, Texinfo input format, LaTeX input format, SGML or XML using a publicly +available DTD, and standard-conforming simple HTML, PostScript or PDF designed +for human modification. Examples of transparent image formats include PNG, XCF +and JPG. Opaque formats include proprietary formats that can be read and edited +only by proprietary word processors, SGML or XML for which the DTD and/or +processing tools are not generally available, and the machine-generated HTML, +PostScript or PDF produced by some word processors for output purposes only. + +The "Title Page" means, for a printed book, the title page itself, plus such +following pages as are needed to hold, legibly, the material this License +requires to appear in the title page. For works in formats which do not have +any title page as such, "Title Page" means the text near the most prominent +appearance of the work's title, preceding the beginning of the body of the text. + +The "publisher" means any person or entity that distributes copies of the +Document to the public. + +A section "Entitled XYZ" means a named subunit of the Document whose title +either is precisely XYZ or contains XYZ in parentheses following text that +translates XYZ in another language. (Here XYZ stands for a specific section +name mentioned below, such as "Acknowledgements", "Dedications", +"Endorsements", or "History".) To "Preserve the Title" of such a section when +you modify the Document means that it remains a section "Entitled XYZ" +according to this definition. + +The Document may include Warranty Disclaimers next to the notice which states +that this License applies to the Document. These Warranty Disclaimers are +considered to be included by reference in this License, but only as regards +disclaiming warranties: any other implication that these Warranty Disclaimers +may have is void and has no effect on the meaning of this License. + +=== 2. VERBATIM COPYING + +You may copy and distribute the Document in any medium, either commercially or +noncommercially, provided that this License, the copyright notices, and the +license notice saying this License applies to the Document are reproduced in +all copies, and that you add no other conditions whatsoever to those of this +License. You may not use technical measures to obstruct or control the reading +or further copying of the copies you make or distribute. However, you may +accept compensation in exchange for copies. If you distribute a large enough +number of copies you must also follow the conditions in section 3. + +You may also lend copies, under the same conditions stated above, and you may +publicly display copies. + +=== 3. COPYING IN QUANTITY + +If you publish printed copies (or copies in media that commonly have printed +covers) of the Document, numbering more than 100, and the Document's license +notice requires Cover Texts, you must enclose the copies in covers that carry, +clearly and legibly, all these Cover Texts: Front-Cover Texts on the front +cover, and Back-Cover Texts on the back cover. Both covers must also clearly +and legibly identify you as the publisher of these copies. The front cover must +present the full title with all words of the title equally prominent and +visible. You may add other material on the covers in addition. Copying with +changes limited to the covers, as long as they preserve the title of the +Document and satisfy these conditions, can be treated as verbatim copying in +other respects. + +If the required texts for either cover are too voluminous to fit legibly, you +should put the first ones listed (as many as fit reasonably) on the actual +cover, and continue the rest onto adjacent pages. + +If you publish or distribute Opaque copies of the Document numbering more than +100, you must either include a machine-readable Transparent copy along with +each Opaque copy, or state in or with each Opaque copy a computer-network +location from which the general network-using public has access to download +using public-standard network protocols a complete Transparent copy of the +Document, free of added material. If you use the latter option, you must take +reasonably prudent steps, when you begin distribution of Opaque copies in +quantity, to ensure that this Transparent copy will remain thus accessible at +the stated location until at least one year after the last time you distribute +an Opaque copy (directly or through your agents or retailers) of that edition +to the public. + +It is requested, but not required, that you contact the authors of the Document +well before redistributing any large number of copies, to give them a chance to +provide you with an updated version of the Document. + +=== 4. MODIFICATIONS + +You may copy and distribute a Modified Version of the Document under the +conditions of sections 2 and 3 above, provided that you release the Modified +Version under precisely this License, with the Modified Version filling the +role of the Document, thus licensing distribution and modification of the +Modified Version to whoever possesses a copy of it. In addition, you must do +these things in the Modified Version: + +A. Use in the Title Page (and on the covers, if any) a title distinct from that +of the Document, and from those of previous versions (which should, if there +were any, be listed in the History section of the Document). You may use the +same title as a previous version if the original publisher of that version +gives permission. + +B. List on the Title Page, as authors, one or more persons or entities +responsible for authorship of the modifications in the Modified Version, +together with at least five of the principal authors of the Document (all of +its principal authors, if it has fewer than five), unless they release you from +this requirement. + +C. State on the Title page the name of the publisher of the Modified Version, +as the publisher. + +D. Preserve all the copyright notices of the Document. + +E. Add an appropriate copyright notice for your modifications adjacent to the +other copyright notices. + +F. Include, immediately after the copyright notices, a license notice giving +the public permission to use the Modified Version under the terms of this +License, in the form shown in the Addendum below. + +G. Preserve in that license notice the full lists of Invariant Sections and +required Cover Texts given in the Document's license notice. + +H. Include an unaltered copy of this License. + +I. Preserve the section Entitled "History", Preserve its Title, and add to it +an item stating at least the title, year, new authors, and publisher of the +Modified Version as given on the Title Page. If there is no section Entitled +"History" in the Document, create one stating the title, year, authors, and +publisher of the Document as given on its Title Page, then add an item +describing the Modified Version as stated in the previous sentence. + +J. Preserve the network location, if any, given in the Document for public +access to a Transparent copy of the Document, and likewise the network +locations given in the Document for previous versions it was based on. These +may be placed in the "History" section. You may omit a network location for a +work that was published at least four years before the Document itself, or if +the original publisher of the version it refers to gives permission. + +K. For any section Entitled "Acknowledgements" or "Dedications", Preserve the +Title of the section, and preserve in the section all the substance and tone of +each of the contributor acknowledgements and/or dedications given therein. + +L. Preserve all the Invariant Sections of the Document, unaltered in their text +and in their titles. Section numbers or the equivalent are not considered part +of the section titles. + +M. Delete any section Entitled "Endorsements". Such a section may not be +included in the Modified Version. + +N. Do not retitle any existing section to be Entitled "Endorsements" or to +conflict in title with any Invariant Section. + +O. Preserve any Warranty Disclaimers. + +If the Modified Version includes new front-matter sections or appendices that +qualify as Secondary Sections and contain no material copied from the Document, +you may at your option designate some or all of these sections as invariant. To +do this, add their titles to the list of Invariant Sections in the Modified +Version's license notice. These titles must be distinct from any other section +titles. + +You may add a section Entitled "Endorsements", provided it contains nothing but +endorsements of your Modified Version by various parties—for example, +statements of peer review or that the text has been approved by an organization +as the authoritative definition of a standard. + +You may add a passage of up to five words as a Front-Cover Text, and a passage +of up to 25 words as a Back-Cover Text, to the end of the list of Cover Texts +in the Modified Version. Only one passage of Front-Cover Text and one of +Back-Cover Text may be added by (or through arrangements made by) any one +entity. If the Document already includes a cover text for the same cover, +previously added by you or by arrangement made by the same entity you are +acting on behalf of, you may not add another; but you may replace the old one, +on explicit permission from the previous publisher that added the old one. + +The author(s) and publisher(s) of the Document do not by this License give +permission to use their names for publicity for or to assert or imply +endorsement of any Modified Version. + +=== 5. COMBINING DOCUMENTS + +You may combine the Document with other documents released under this License, +under the terms defined in section 4 above for modified versions, provided that +you include in the combination all of the Invariant Sections of all of the +original documents, unmodified, and list them all as Invariant Sections of your +combined work in its license notice, and that you preserve all their Warranty +Disclaimers. + +The combined work need only contain one copy of this License, and multiple +identical Invariant Sections may be replaced with a single copy. If there are +multiple Invariant Sections with the same name but different contents, make the +title of each such section unique by adding at the end of it, in parentheses, +the name of the original author or publisher of that section if known, or else +a unique number. Make the same adjustment to the section titles in the list of +Invariant Sections in the license notice of the combined work. + +In the combination, you must combine any sections Entitled "History" in the +various original documents, forming one section Entitled "History"; likewise +combine any sections Entitled "Acknowledgements", and any sections Entitled +"Dedications". You must delete all sections Entitled "Endorsements". + +=== 6. COLLECTIONS OF DOCUMENTS + +You may make a collection consisting of the Document and other documents +released under this License, and replace the individual copies of this License +in the various documents with a single copy that is included in the collection, +provided that you follow the rules of this License for verbatim copying of each +of the documents in all other respects. + +You may extract a single document from such a collection, and distribute it +individually under this License, provided you insert a copy of this License +into the extracted document, and follow this License in all other respects +regarding verbatim copying of that document. + +=== 7. AGGREGATION WITH INDEPENDENT WORKS + +A compilation of the Document or its derivatives with other separate and +independent documents or works, in or on a volume of a storage or distribution +medium, is called an "aggregate" if the copyright resulting from the +compilation is not used to limit the legal rights of the compilation's users +beyond what the individual works permit. When the Document is included in an +aggregate, this License does not apply to the other works in the aggregate +which are not themselves derivative works of the Document. + +If the Cover Text requirement of section 3 is applicable to these copies of the +Document, then if the Document is less than one half of the entire aggregate, +the Document's Cover Texts may be placed on covers that bracket the Document +within the aggregate, or the electronic equivalent of covers if the Document is +in electronic form. Otherwise they must appear on printed covers that bracket +the whole aggregate. + +=== 8. TRANSLATION + +Translation is considered a kind of modification, so you may distribute +translations of the Document under the terms of section 4. Replacing Invariant +Sections with translations requires special permission from their copyright +holders, but you may include translations of some or all Invariant Sections in +addition to the original versions of these Invariant Sections. You may include +a translation of this License, and all the license notices in the Document, and +any Warranty Disclaimers, provided that you also include the original English +version of this License and the original versions of those notices and +disclaimers. In case of a disagreement between the translation and the original +version of this License or a notice or disclaimer, the original version will +prevail. + +If a section in the Document is Entitled "Acknowledgements", "Dedications", or +"History", the requirement (section 4) to Preserve its Title (section 1) will +typically require changing the actual title. + +=== 9. TERMINATION + +You may not copy, modify, sublicense, or distribute the Document except as +expressly provided under this License. Any attempt otherwise to copy, modify, +sublicense, or distribute it is void, and will automatically terminate your +rights under this License. + +However, if you cease all violation of this License, then your license from a +particular copyright holder is reinstated (a) provisionally, unless and until +the copyright holder explicitly and finally terminates your license, and (b) +permanently, if the copyright holder fails to notify you of the violation by +some reasonable means prior to 60 days after the cessation. + +Moreover, your license from a particular copyright holder is reinstated +permanently if the copyright holder notifies you of the violation by some +reasonable means, this is the first time you have received notice of violation +of this License (for any work) from that copyright holder, and you cure the +violation prior to 30 days after your receipt of the notice. + +Termination of your rights under this section does not terminate the licenses +of parties who have received copies or rights from you under this License. If +your rights have been terminated and not permanently reinstated, receipt of a +copy of some or all of the same material does not give you any rights to use it. + +=== 10. FUTURE REVISIONS OF THIS LICENSE + +The Free Software Foundation may publish new, revised versions of the GNU Free +Documentation License from time to time. Such new versions will be similar in +spirit to the present version, but may differ in detail to address new problems +or concerns. See http://www.gnu.org/copyleft/. + +Each version of the License is given a distinguishing version number. If the +Document specifies that a particular numbered version of this License "or any +later version" applies to it, you have the option of following the terms and +conditions either of that specified version or of any later version that has +been published (not as a draft) by the Free Software Foundation. If the +Document does not specify a version number of this License, you may choose any +version ever published (not as a draft) by the Free Software Foundation. If the +Document specifies that a proxy can decide which future versions of this +License can be used, that proxy's public statement of acceptance of a version +permanently authorizes you to choose that version for the Document. + +=== 11. RELICENSING + +"Massive Multiauthor Collaboration Site" (or "MMC Site") means any World Wide +Web server that publishes copyrightable works and also provides prominent +facilities for anybody to edit those works. A public wiki that anybody can edit +is an example of such a server. A "Massive Multiauthor Collaboration" (or +"MMC") contained in the site means any set of copyrightable works thus +published on the MMC site. + +"CC-BY-SA" means the Creative Commons Attribution-Share Alike 3.0 license +published by Creative Commons Corporation, a not-for-profit corporation with a +principal place of business in San Francisco, California, as well as future +copyleft versions of that license published by that same organization. + +"Incorporate" means to publish or republish a Document, in whole or in part, as +part of another Document. + +An MMC is "eligible for relicensing" if it is licensed under this License, and +if all works that were first published under this License somewhere other than +this MMC, and subsequently incorporated in whole or in part into the MMC, (1) +had no cover texts or invariant sections, and (2) were thus incorporated prior +to November 1, 2008. + +The operator of an MMC Site may republish an MMC contained in the site under +CC-BY-SA on the same site at any time before August 1, 2009, provided the MMC +is eligible for relicensing. + diff --git a/dtformats/leveldb.debug.yaml b/dtformats/leveldb.debug.yaml new file mode 100644 index 0000000..e9dda61 --- /dev/null +++ b/dtformats/leveldb.debug.yaml @@ -0,0 +1,37 @@ +# dtFormats debug specification. +--- +data_type_map: leveldb_ldb_footer +attributes: +- name: metaindex_block_offset + description: "Metaindex block offset" + format: hexadecimal_8digits +- name: metaindex_block_size + description: "Metaindex block size" + format: decimal +- name: index_block_offset + description: "Index block offset" + format: hexadecimal_8digits +- name: index_block_size + description: "Index block size" + format: decimal +- name: padding + description: "Padding" + format: binary_data +- name: signature + description: "Signature" + format: binary_data +--- +data_type_map: leveldb_log_block +attributes: +- name: checksum + description: "Checksum" + format: hexadecimal_8digits +- name: data_size + description: "Record data size" + format: decimal +- name: record_type + description: "Record type" + format: decimal +- name: record_data + description: "Record data" + format: binary_data diff --git a/dtformats/leveldb.py b/dtformats/leveldb.py new file mode 100644 index 0000000..2abdf76 --- /dev/null +++ b/dtformats/leveldb.py @@ -0,0 +1,327 @@ +# -*- coding: utf-8 -*- +"""LevelDB database files.""" + +from dtformats import data_format +from dtformats import errors + + +class LevelDBDatabaseLogFile(data_format.BinaryDataFile): + """LevelDB write ahead log file ([0-9]{6}.log).""" + + # Using a class constant significantly speeds up the time required to load + # the dtFabric and dtFormats definition files. + _FABRIC = data_format.BinaryDataFile.ReadDefinitionFile('leveldb.yaml') + + _DEBUG_INFORMATION = data_format.BinaryDataFile.ReadDebugInformationFile( + 'leveldb.debug.yaml') + + # TODO: add custom formatter to print record type. + + _RECORD_TYPES = { + 1: 'FULL', + 2: 'FIRST', + 3: 'MIDDLE', + 4: 'LAST'} + + def __init__(self, debug=False, file_system_helper=None, output_writer=None): + """Initializes a LevelDB write ahead log file. + + Args: + debug (Optional[bool]): True if debug information should be written. + file_system_helper (Optional[FileSystemHelper]): file system helper. + output_writer (Optional[OutputWriter]): output writer. + """ + super(LevelDBDatabaseLogFile, self).__init__( + debug=debug, file_system_helper=file_system_helper, + output_writer=output_writer) + + def _ReadBlock(self, file_object, file_offset): + """Reads a block. + + Args: + file_object (file): file-like object. + file_offset (int): offset of the block relative to the start of the file. + + Returns: + tuple[leveldb_log_block, int]: block and number of bytes read. + + Raises: + ParseError: if the block cannot be read. + """ + data_type_map = self._GetDataTypeMap('leveldb_log_block') + + block, bytes_read = self._ReadStructureFromFileObject( + file_object, file_offset, data_type_map, 'block') + + if self._debug: + debug_info = self._DEBUG_INFORMATION.get('leveldb_log_block', None) + self._DebugPrintStructureObject(block, debug_info) + + return block, bytes_read + + def ReadFileObject(self, file_object): + """Reads a LevelDB write ahead log file-like object. + + Args: + file_object (file): file-like object. + + Raises: + ParseError: if the file cannot be read. + """ + file_offset = 0 + page_size = 32 * 1024 + + while file_offset < self._file_size: + _, bytes_read = self._ReadBlock(file_object, file_offset) + file_offset += bytes_read + page_size -= bytes_read + + if page_size <= 6: + file_offset += page_size + page_size = 32 * 1024 + + + +class LevelDBDatabaseStoredTablesFile(data_format.BinaryDataFile): + """LevelDB database stored tables file ([0-9]{6}.ldb).""" + + # Using a class constant significantly speeds up the time required to load + # the dtFabric and dtFormats definition files. + _FABRIC = data_format.BinaryDataFile.ReadDefinitionFile('leveldb.yaml') + + _DEBUG_INFORMATION = data_format.BinaryDataFile.ReadDebugInformationFile( + 'leveldb.debug.yaml') + + def __init__(self, debug=False, file_system_helper=None, output_writer=None): + """Initializes a LevelDB database stored tables file. + + Args: + debug (Optional[bool]): True if debug information should be written. + file_system_helper (Optional[FileSystemHelper]): file system helper. + output_writer (Optional[OutputWriter]): output writer. + """ + super(LevelDBDatabaseStoredTablesFile, self).__init__( + debug=debug, file_system_helper=file_system_helper, + output_writer=output_writer) + + def _DebugPrintKeyPrefix(self, key_prefix): + """Prints key prefix information. + + Args: + key_prefix (tuple[int, int, int]): key prefix. + """ + value_string, _ = self._FormatIntegerAsDecimal(key_prefix[0]) + self._DebugPrintValue('Database identifier', value_string) + + value_string, _ = self._FormatIntegerAsDecimal(key_prefix[1]) + self._DebugPrintValue('Object store identifier', value_string) + + value_string, _ = self._FormatIntegerAsDecimal(key_prefix[2]) + self._DebugPrintValue('Index identifier', value_string) + + def _ReadKeyPrefix(self, data): + """Reads a key prefix. + + Args: + data (bytes): data. + + Returns: + tuple[tuple[int, int, int], int]: key prefix and number of bytes read. + """ + byte_value = data[0] + bytes_read = 1 + + database_identifier = 0 + object_store_identifier = 0 + index_identifier = 0 + + bit_shift = 0 + for _ in range((byte_value >> 5) + 1): + database_identifier |= data[bytes_read] << bit_shift + bytes_read += 1 + bit_shift += 8 + + bit_shift = 0 + for _ in range(((byte_value & 0x1f) >> 2) + 1): + object_store_identifier |= data[bytes_read] << bit_shift + bytes_read += 1 + bit_shift += 8 + + bit_shift = 0 + for _ in range((byte_value & 0x03) + 1): + index_identifier |= data[bytes_read] << bit_shift + bytes_read += 1 + bit_shift += 8 + + key_prefix = ( + database_identifier, object_store_identifier, index_identifier) + + return key_prefix, bytes_read + + def _ReadVariableSizeInteger(self, data): + """Reads a variable size integer. + + Args: + data (bytes): data. + + Returns: + tuple[int, int]: integer value and number of bytes read. + """ + data_size = len(data) + + byte_value = data[0] + bytes_read = 1 + bit_shift = 0 + + integer_value = int(byte_value) & 0x7f + + while bytes_read < data_size and byte_value & 0x80: + byte_value = data[bytes_read] + bytes_read += 1 + bit_shift += 7 + + integer_value |= (int(byte_value) & 0x7f) << bit_shift + + # TODO: check maximum size + + return integer_value, bytes_read + + def _ReadFileFooter(self, file_object): + """Reads the file footer. + + Args: + file_object (file): file-like object. + + Returns: + leveldb_ldb_footer: file footer. + + Raises: + ParseError: if the file footer cannot be read. + """ + data_type_map = self._GetDataTypeMap('leveldb_ldb_footer') + file_offset = self._file_size - 48 + + file_footer, _ = self._ReadStructureFromFileObject( + file_object, file_offset, data_type_map, 'file footer') + + file_footer.metaindex_block_offset, data_offset = ( + self._ReadVariableSizeInteger(file_footer.data)) + + file_footer.metaindex_block_size, bytes_read = ( + self._ReadVariableSizeInteger(file_footer.data[data_offset:])) + data_offset += bytes_read + + file_footer.index_block_offset, bytes_read = ( + self._ReadVariableSizeInteger(file_footer.data[data_offset:])) + data_offset += bytes_read + + file_footer.index_block_size, bytes_read = ( + self._ReadVariableSizeInteger(file_footer.data[data_offset:])) + data_offset += bytes_read + + if self._debug: + file_footer.padding = file_footer.data[data_offset:] + + debug_info = self._DEBUG_INFORMATION.get('leveldb_ldb_footer', None) + self._DebugPrintStructureObject(file_footer, debug_info) + + return file_footer + + def _ReadIndexBlock(self, file_object, file_footer): + """Reads the index block. + + Args: + file_object (file): file-like object. + file_footer (leveldb_ldb_footer): file footer. + + Raises: + ParseError: if the index block cannot be read. + """ + data = self._ReadData( + file_object, file_footer.index_block_offset, + file_footer.index_block_size, 'index block') + + if self._debug: + self._DebugPrintData('Index block data', data) + + data_offset = 0 + + unknown1, bytes_read = ( + self._ReadVariableSizeInteger(data[data_offset:])) + data_offset += bytes_read + + unknown2, bytes_read = ( + self._ReadVariableSizeInteger(data[data_offset:])) + data_offset += bytes_read + + unknown3, bytes_read = ( + self._ReadVariableSizeInteger(data[data_offset:])) + data_offset += bytes_read + + if self._debug: + value_string, _ = self._FormatIntegerAsHexadecimal8(unknown1) + self._DebugPrintValue('Unknown1', value_string) + + value_string, _ = self._FormatIntegerAsHexadecimal8(unknown2) + self._DebugPrintValue('Unknown2', value_string) + + value_string, _ = self._FormatIntegerAsHexadecimal8(unknown3) + self._DebugPrintValue('Unknown3', value_string) + + def _ReadMetaindexBlock(self, file_object, file_footer): + """Reads the metaindex block. + + Args: + file_object (file): file-like object. + file_footer (leveldb_ldb_footer): file footer. + + Raises: + ParseError: if the metaindex block cannot be read. + """ + data = self._ReadData( + file_object, file_footer.metaindex_block_offset, + file_footer.metaindex_block_size, 'metaindex block') + + if self._debug: + self._DebugPrintData('Metaindex block data', data) + + data_offset = 0 + data_size = len(data) + + while data_offset < data_size: + key_prefix, bytes_read = self._ReadKeyPrefix(data[data_offset:]) + data_offset += bytes_read + + metadata_type = int(data[data_offset]) + data_offset += 1 + + if self._debug: + self._DebugPrintKeyPrefix(key_prefix) + + value_string, _ = self._FormatIntegerAsDecimal(metadata_type) + self._DebugPrintValue('Metadata type', value_string) + + if key_prefix != (0, 0, 0): + raise errors.ParseError(f'Unsupported key prefix: {key_prefix!s}') + + if metadata_type not in (0, 1, 2, 3, 4, 5, 201): + raise errors.ParseError(f'Unsupported metadata type: {metadata_type:d}') + + if self._debug: + self._DebugPrintData('Metadata', data[data_offset:]) + + break + + def ReadFileObject(self, file_object): + """Reads a LevelDB database stored tables file-like object. + + Args: + file_object (file): file-like object. + + Raises: + ParseError: if the file cannot be read. + """ + file_footer = self._ReadFileFooter(file_object) + + self._ReadMetaindexBlock(file_object, file_footer) + self._ReadIndexBlock(file_object, file_footer) diff --git a/dtformats/leveldb.yaml b/dtformats/leveldb.yaml new file mode 100644 index 0000000..5683d79 --- /dev/null +++ b/dtformats/leveldb.yaml @@ -0,0 +1,65 @@ +# dtFabric format specification. +--- +name: leveldb +type: format +description: LevelDB database formats +urls: ["https://github.com/libyal/dtformats/blob/main/documentation/LevelDB%20database%20format.asciidoc"] +--- +name: byte +type: integer +attributes: + format: unsigned + size: 1 + units: bytes +--- +name: uint8 +type: integer +attributes: + format: unsigned + size: 1 + units: bytes +--- +name: uint16 +type: integer +attributes: + format: unsigned + size: 2 + units: bytes +--- +name: uint32 +type: integer +attributes: + format: unsigned + size: 4 + units: bytes +--- +name: leveldb_ldb_footer +type: structure +attributes: + byte_order: little-endian +members: +- name: data + type: stream + element_data_type: byte + elements_data_size: 40 +- name: signature + type: stream + element_data_type: byte + elements_data_size: 8 + value: "\x57\xfb\x80\x8b\x24\x75\x47\xdb" +--- +name: leveldb_log_block +type: structure +attributes: + byte_order: little-endian +members: +- name: checksum + data_type: uint32 +- name: record_data_size + data_type: uint16 +- name: record_type + data_type: uint8 +- name: record_data + type: stream + element_data_type: byte + elements_data_size: leveldb_log_block.record_data_size diff --git a/dtformats/vhdx.yaml b/dtformats/vhdx.yaml index 998f3db..48ce281 100644 --- a/dtformats/vhdx.yaml +++ b/dtformats/vhdx.yaml @@ -46,7 +46,7 @@ members: - name: signature type: stream element_data_type: byte - number_of_elements: 8 + elements_data_size: 8 value: "vhdxfile" - name: creator type: string @@ -56,7 +56,7 @@ members: - name: unknown1 type: stream element_data_type: byte - number_of_elements: 65016 + elements_data_size: 65016 --- name: header type: structure @@ -66,7 +66,7 @@ members: - name: signature type: stream element_data_type: byte - number_of_elements: 4 + elements_data_size: 4 value: "head" - name: checksum data_type: uint32 @@ -85,7 +85,7 @@ members: - name: unknown1 type: stream element_data_type: byte - number_of_elements: 4016 + elements_data_size: 4016 --- name: region_table_entry type: structure @@ -109,7 +109,7 @@ members: - name: signature type: stream element_data_type: byte - number_of_elements: 4 + elements_data_size: 4 value: "regi" - name: checksum data_type: uint32 diff --git a/scripts/leveldb.py b/scripts/leveldb.py new file mode 100755 index 0000000..96ab2db --- /dev/null +++ b/scripts/leveldb.py @@ -0,0 +1,105 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +"""Script to parse LevelDB database files.""" + +import argparse +import logging +import sys + +from dtformats import file_system +from dtformats import leveldb +from dtformats import output_writers + +try: + from dtformats import dfvfs_helpers +except ImportError: + dfvfs_helpers = None + + +def Main(): + """The main program function. + + Returns: + bool: True if successful or False if not. + """ + argument_parser = argparse.ArgumentParser(description=( + 'Extracts information from LevelDB database files.')) + + argument_parser.add_argument( + '-d', '--debug', dest='debug', action='store_true', default=False, + help='enable debug output.') + + if dfvfs_helpers: + dfvfs_helpers.AddDFVFSCLIArguments(argument_parser) + + argument_parser.add_argument( + 'source', nargs='?', action='store', metavar='PATH', + default=None, help='path of the LevelDB database file.') + + options = argument_parser.parse_args() + + logging.basicConfig( + level=logging.INFO, format='[%(levelname)s] %(message)s') + + if dfvfs_helpers and getattr(options, 'image', None): + file_system_helper = dfvfs_helpers.ParseDFVFSCLIArguments(options) + if not file_system_helper: + print('No supported file system found in storage media image.') + print('') + return False + + else: + if not options.source: + print('Source file missing.') + print('') + argument_parser.print_help() + print('') + return False + + file_system_helper = file_system.NativeFileSystemHelper() + + output_writer = output_writers.StdoutWriter() + + try: + output_writer.Open() + except IOError as exception: + print(f'Unable to open output writer with error: {exception!s}') + print('') + return False + + file_object = file_system_helper.OpenFileByPath(options.source) + if not file_object: + print('Unable to open source file.') + print('') + return False + + try: + file_signature = file_object.read(4) + finally: + file_object.close() + + if file_signature == b'\x57\xfb\x80\x8b\x24\x75\x47\xdb': + leveldb_file = leveldb.LevelDBDatabaseStoredTablesFile( + debug=options.debug, output_writer=output_writer) + else: + leveldb_file = leveldb.LevelDBDatabaseLogFile( + debug=options.debug, output_writer=output_writer) + + leveldb_file.Open(options.source) + + print('LevelDB database file information:') + + print('') + + leveldb_file.Close() + + output_writer.Close() + + return True + + +if __name__ == '__main__': + if not Main(): + sys.exit(1) + else: + sys.exit(0) diff --git a/tests/leveldb.py b/tests/leveldb.py new file mode 100644 index 0000000..3818832 --- /dev/null +++ b/tests/leveldb.py @@ -0,0 +1,33 @@ +# -*- coding: utf-8 -*- +"""Tests for LevelDB database files.""" + +import unittest + +from dtformats import leveldb + +from tests import test_lib + + +class LevelDBDatabaseStoredTablesFileTest(test_lib.BaseTestCase): + """LevelDB database stored tables file ([0-9]{6}.ldb) tests.""" + + # pylint: disable=protected-access + + def testReadVariableSizeInteger(self): + """Tests the _ReadVariableSizeInteger function.""" + test_file = leveldb.LevelDBDatabaseStoredTablesFile() + + integer_value, bytes_read = test_file._ReadVariableSizeInteger(b'\x01') + self.assertEqual(integer_value, 1) + self.assertEqual(bytes_read, 1) + + integer_value, bytes_read = test_file._ReadVariableSizeInteger(b'\x96\x01') + self.assertEqual(integer_value, 150) + self.assertEqual(bytes_read, 2) + + # TODO: add tests for _ReadFileFooter + # TODO: add tests for ReadFileObject + + +if __name__ == '__main__': + unittest.main()