Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: write RNTuples with strings, structs, and nested lists #1395

Draft
wants to merge 10 commits into
base: main
Choose a base branch
from
21 changes: 9 additions & 12 deletions src/uproot/models/RNTuple.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import struct
import sys
from collections import defaultdict
from itertools import accumulate

import numpy
import xxhash
Expand Down Expand Up @@ -743,10 +742,8 @@ def read_pagedesc(self, destination, desc, dtype_str, dtype, nbits, split):
content = res.view(dtype)

if isbit:
content = (
numpy.unpackbits(content.view(dtype=numpy.uint8))
.reshape(-1, 8)[:, ::-1]
.reshape(-1)
content = numpy.unpackbits(
content.view(dtype=numpy.uint8), bitorder="little"
)
elif dtype_str in ("real32trunc", "real32quant"):
if nbits == 32:
Expand Down Expand Up @@ -778,18 +775,21 @@ def read_col_pages(
if dtype_byte in uproot.const.rntuple_delta_types:
# Extract the last offset values:
last_elements = [
arr[-1] for arr in arrays[:-1]
(arr[-1] if len(arr) > 0 else numpy.zeros((), dtype=arr.dtype))
for arr in arrays[:-1]
] # First value always zero, therefore skip first arr.
# Compute cumulative sum using itertools.accumulate:
last_offsets = list(accumulate(last_elements))
last_offsets = numpy.cumsum(last_elements)
# Add the offsets to each array
for i in range(1, len(arrays)):
arrays[i] += last_offsets[i - 1]
# Remove the first element from every sub-array except for the first one:
arrays = [arrays[0]] + [arr[1:] for arr in arrays[1:]]

res = numpy.concatenate(arrays, axis=0)

dtype_byte = self.column_records[ncol].type
if dtype_byte in uproot.const.rntuple_index_types:
res = numpy.insert(res, 0, 0) # for offsets

if pad_missing_element:
first_element_index = self.column_records[ncol].first_element_index
res = numpy.pad(res, (first_element_index, 0))
Expand Down Expand Up @@ -817,7 +817,6 @@ def read_col_page(self, ncol, cluster_i):
split = dtype_byte in uproot.const.rntuple_split_types
zigzag = dtype_byte in uproot.const.rntuple_zigzag_types
delta = dtype_byte in uproot.const.rntuple_delta_types
index = dtype_byte in uproot.const.rntuple_index_types
nbits = (
self.column_records[ncol].nbits
if ncol < len(self.column_records)
Expand All @@ -836,8 +835,6 @@ def read_col_page(self, ncol, cluster_i):
cumsum += numpy.sum(res[tracker:tracker_end])
tracker = tracker_end

if index:
res = numpy.insert(res, 0, 0) # for offsets
if zigzag:
res = _from_zigzag(res)
elif delta:
Expand Down
Loading
Loading