Skip to content

Commit

Permalink
feat: basic RNTuple writing (#1356)
Browse files Browse the repository at this point in the history
* Writing RNTuple with no data now works

* Re-enabled existing writing tests and fixed a few things

* It works now for flat arrays of native types

* Fix test

* Some cleanup

* Added test for basic writing

* Fixed utf-8 string encoding

* Fixed typo
  • Loading branch information
ariostas authored Feb 17, 2025
1 parent 806e6e3 commit 8a25c5a
Show file tree
Hide file tree
Showing 7 changed files with 432 additions and 439 deletions.
3 changes: 3 additions & 0 deletions src/uproot/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,9 @@
kStreamedMemberWise = numpy.uint16(1 << 14)

############ RNTuple https://github.com/root-project/root/blob/0b9cdbcfd326ba50ee6c2f202675656129eafbe7/tree/ntuple/v7/doc/BinaryFormatSpecification.md

rntuple_version_for_writing = (1, 0, 0, 1)

rntuple_col_num_to_dtype_dict = {
0x00: "bit",
0x01: "uint8", # uninterpreted byte
Expand Down
2 changes: 1 addition & 1 deletion src/uproot/models/RNTuple.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ def read_members(self, chunk, cursor, context, file):
-_rntuple_anchor_format.size
- _rntuple_anchor_checksum_format.size : -_rntuple_anchor_checksum_format.size
]
)
), "Anchor checksum does not match! File is corrupted or incompatible."
cursor.skip(-_rntuple_anchor_checksum_format.size)

self._header_chunk_ready = False
Expand Down
59 changes: 14 additions & 45 deletions src/uproot/writing/_cascade.py
Original file line number Diff line number Diff line change
Expand Up @@ -1727,63 +1727,32 @@ def add_tree(
tree.write_anew(sink)
return tree

def add_rntuple(self, sink, name, title, akform):
def add_rntuple(self, sink, name, title, akform, description=""):
import uproot.writing._cascadentuple

anchor = uproot.writing._cascadentuple.NTuple_Anchor(
None, 0, 0, 48, None, None, None, None, None, None, 0
None,
*uproot.const.rntuple_version_for_writing,
None,
None,
None,
None,
None,
None,
0, # TODO: Fix this
)

header = uproot.writing._cascadentuple.NTuple_Header(None, name, "", akform)

footer = uproot.writing._cascadentuple.NTuple_Footer(
None, 0, header._crc32, akform
header = uproot.writing._cascadentuple.NTuple_Header(
None, name, description, akform
)

# the empty page list is hard-coded bytes which represents:
# 0 1 2 3
# 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
# +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
# | Envelope Version | Minimum Version |
# +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
# | Size |T|
# +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
# | Number of Items (for list frames) |Reserv.|
# +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
# | FRAME PAYLOAD |
# +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
# | CRC32 |
# +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
#
# - Envelope Version = 1 (0x0100)
# - Minimum Version = 1 (0x0100)
# - Size = -8 (0xf8ffffff) [value is negative because this is a list]
# - Number of Items = 0 (0x00000000) [empty list]
# - FRAME PAYLOAD = empty [because number of items is 0]
# - CRC32 = 2678769841
# manually calculate CRC32:

# In [1]: zlib.crc32(b'\x01\x00\x01\x00\xf8\xff\xff\xff\00\00\00\00')
# Out[1]: 2678769841
# In [2]: np.array([177, 200, 170, 159], dtype=np.uint8).view("uint32")
# Out[2]: array([2678769841], dtype=uint32)

empty_page_list_bytes = numpy.array(
[1, 0, 1, 0, 248, 255, 255, 255, 0, 0, 0, 0, 177, 200, 170, 159],
dtype=numpy.uint8,
)
offset = self._freesegments.allocate(16)
footer.cluster_group_record_frames[0].page_list_envlink.locator = (
uproot.writing._cascadentuple.NTuple_Locator(16, offset)
)
footer = uproot.writing._cascadentuple.NTuple_Footer(None, header._checksum)

ntuple = uproot.writing._cascadentuple.NTuple(
self, name, title, akform, self._freesegments, header, footer, [], anchor
self, akform, self._freesegments, header, footer, [], anchor
)

sink.write(offset, empty_page_list_bytes)
ntuple.write(sink)
sink.flush()
return ntuple


Expand Down
Loading

0 comments on commit 8a25c5a

Please sign in to comment.