Skip to content

Commit

Permalink
[RNTuple] refactor display and parse column flags 0x08 (#316)
Browse files Browse the repository at this point in the history
* test schema is slim

* bump version
  • Loading branch information
Moelf authored Mar 15, 2024
1 parent f6d62f5 commit 2d2acf9
Show file tree
Hide file tree
Showing 8 changed files with 94 additions and 30 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "UnROOT"
uuid = "3cd96dde-e98d-4713-81e9-a4a1b0235ce9"
authors = ["Tamas Gal", "Jerry Ling", "Johannes Schumann", "Nick Amin"]
version = "0.10.24"
version = "0.10.25"

[deps]
AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
Expand Down
36 changes: 30 additions & 6 deletions src/RNTuple/displays.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,27 @@
function _showwithkw(io, @nospecialize(k))
T = typeof(k)

print(io, T)
print(io, "(")
for i in fieldnames(T)
print(io, i, "=", repr(getfield(k, i)), ", ")
end
println(io, ")")
end

function Base.show(io::IO, f::FieldRecord)
_showwithkw(io, f)
end

function Base.show(io::IO, f::ColumnRecord)
_showwithkw(io, f)
end

function Base.show(io::IO, f::AliasRecord)
print(io, "AliasRecord(physical_id=$(f.physical_id), field_id=$(f.field_id))")
_showwithkw(io, f)
end
function Base.show(io::IO, f::Locator)
_showwithkw(io, f)
end

function Base.show(io::IO, lf::StringField)
Expand Down Expand Up @@ -36,10 +58,11 @@ function Base.show(io::IO, header::RNTupleHeader, indent=0, short=false)
l1 = maximum(length, [f.field_name for f in header.field_records])
l2 = maximum(length, [f.type_name for f in header.field_records])
println(io, "$ind field_records: ")
for f in header.field_records
for (fidx, f) in enumerate(header.field_records)
print(io, "$ind ")
print(io, "(implicit idx=$(lpad(fidx-1, 2, "0"))), ")
print(io, "parent=$(lpad(Int(f.parent_field_id), 2, "0")), ")
print(io, "role=$(Int(f.struct_role)), ")
print(io, "struct_role=$(Int(f.struct_role)), ")
print(io, "name=$(rpad(f.field_name, l1+1, " ")), ")
print(io, "type=$(rpad(f.type_name, l2+1, " "))")
println(io, "repetition=$(f.repetition)")
Expand All @@ -52,7 +75,8 @@ function Base.show(io::IO, header::RNTupleHeader, indent=0, short=false)
print(io, "type=$(lpad(Int(g.type), 2, "0")), ")
print(io, "nbits=$(lpad(Int(g.nbits), 2, "0")), ")
print(io, "field_id=$(lpad(Int(g.field_id), 3, "0")), ")
println(io, "flags=$(g.flags)")
print(io, "flags=$(g.flags), ")
println(io, "first_ele_index=$(g.first_ele_idx)")
end
end
end
Expand All @@ -77,13 +101,13 @@ function Base.show(io::IO, rn::RNTuple)
print(io, " └─ ")
println(io, "Schema: ")
_io = IOBuffer()
print_tree(_io, rn.schema; maxdepth=1, indicate_truncation=false)
print_tree(_io, rn.schema; maxdepth=3, indicate_truncation=true)
for l in split(String(take!(_io)), '\n')
print(io, " ")
println(io, l)
end
end
Base.show(io::IO, s::RNTupleSchema) = print_tree(io, s)
Base.show(io::IO, s::RNTupleSchema) = print_tree(io, s; maxdepth=10)
printnode(io::IO, s::RNTupleSchema) = print(io, "RNTupleSchema with $(length(s)) top fields")
children(s::RNTupleSchema) = Dict(pairs(getfield(s, :namedtuple)))

Expand Down
29 changes: 20 additions & 9 deletions src/RNTuple/fieldcolumn_reading.jl
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ end

_field_output_type(::Type{StringField{O, T}}) where {O, T} = Vector{String}
function read_field(io, field::StringField{O, T}, page_list) where {O, T}
nbits = field.content_col.nbits
nbits = field.content_col.columnrecord.nbits
pages = page_list[field.content_col.content_col_idx]

offset = read_field(io, field.offset_col, page_list)
Expand All @@ -65,9 +65,9 @@ end

_field_output_type(::Type{RNTupleCardinality{T}}) where {T} = CardinalityVector{T}
function read_field(io, field::RNTupleCardinality{T}, page_list) where T
nbits = field.leaf_field.nbits
nbits = field.leaf_field.columnrecord.nbits
pages = page_list[field.leaf_field.content_col_idx]
typenum = field.leaf_field.type
typenum = field.leaf_field.columnrecord.type
split = 14 <= typenum <= 21 || 26 <= typenum <= 28
delta = 14 <= typenum <= 15
bytes = read_pagedesc(io, pages, nbits; split)
Expand All @@ -81,6 +81,19 @@ end

_from_zigzag(n) = (n >> 1) -(n & 1)
_to_zigzag(n) = (n << 1) (n >> 63)
function _from_zigzag!(res::AbstractVector)
@simd for i in eachindex(res)
res[i] = _from_zigzag(res[i])
end
return res
end

function _to_zigzag!(res::AbstractVector)
@simd for i in eachindex(res)
res[i] = _to_zigzag(res[i])
end
return res
end

function _reset_to_incremental(res::AbstractVector, pages, ::Type{T}) where T
endpoint = 0
Expand All @@ -92,19 +105,17 @@ end

_field_output_type(::Type{LeafField{T}}) where {T} = Vector{T}
function read_field(io, field::LeafField{T}, page_list) where T
nbits = field.nbits
nbits = field.columnrecord.nbits
pages = page_list[field.content_col_idx]
# handle split encoding within page
typenum = field.type
typenum = field.columnrecord.type
split = 14 <= typenum <= 21 || 26 <= typenum <= 28
zigzag = 26 <= typenum <= 28
delta = 14 <= typenum <= 15
bytes = read_pagedesc(io, pages, nbits; split = split)
res = collect(reinterpret(T, bytes))
if zigzag
@simd for i in eachindex(res)
res[i] = _from_zigzag(res[i])
end
_from_zigzag!(res)
elseif delta
# the Index32/64 resets to absolute offset page-by-page
# https://github.com/JuliaHEP/UnROOT.jl/issues/312#issuecomment-1999875348
Expand All @@ -118,7 +129,7 @@ end

_field_output_type(::Type{LeafField{Bool}}) = BitVector
function read_field(io, field::LeafField{Bool}, page_list)
nbits = field.nbits
nbits = field.columnrecord.nbits
pages = page_list[field.content_col_idx]
total_num_elements = sum(p.num_elements for p in pages)

Expand Down
13 changes: 6 additions & 7 deletions src/RNTuple/fieldcolumn_schema.jl
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,7 @@ isvoid(::Type{<:StringField}) = false
"""
struct LeafField{T}
content_col_idx::Int
type::Int
nbits::Int
columnrecord::ColumnRecord
end
Base case of field nesting, this links to a column in the RNTuple by 0-based index.
Expand All @@ -68,8 +67,7 @@ The `type` field is the RNTuple spec type number, used to record split encoding.
"""
struct LeafField{T}
content_col_idx::Int
type::Int
nbits::Int
columnrecord::ColumnRecord
end
Base.eltype(::Type{LeafField{T}}) where {T} = T
isvoid(::Type{<:LeafField}) = false
Expand All @@ -92,16 +90,17 @@ isvoid(::Type{<:RNTupleCardinality}) = false
function _search_col_type(field_id, column_records, col_id::Int...)
if length(col_id) == 2 && column_records[col_id[2]].type == 5
index_record = column_records[col_id[1]]
char_record = column_records[col_id[2]]
index_typenum = index_record.type
LeafType = rntuple_col_type_dict[index_typenum]
return StringField(
LeafField{LeafType}(col_id[1], index_typenum, index_record.nbits),
LeafField{Char}(col_id[2], 5, 8)
LeafField{LeafType}(col_id[1],index_record),
LeafField{Char}(col_id[2], char_record)
)
elseif length(col_id) == 1
record = column_records[only(col_id)]
LeafType = rntuple_col_type_dict[record.type]
return LeafField{LeafType}(only(col_id), record.type, record.nbits)
return LeafField{LeafType}(only(col_id), record)
else
error("un-handled RNTuple case, report issue to UnROOT.jl")
end
Expand Down
4 changes: 3 additions & 1 deletion src/RNTuple/footer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -96,12 +96,14 @@ function split8_reinterpret!(dst, src::Vector{UInt8})
end

"""
read_pagedesc(io, pagedesc::Vector{PageDescription}, nbits::Integer)
read_pagedesc(io, pagedescs::AbstractVector{PageDescription}, nbits::Integer; split=false)
Read the decompressed raw bytes given a Page Description. The
`nbits` need to be provided according to the element type of the
column since `pagedesc` only contains `num_elements` information.
`split` is true when split encoding is needed, this is done per page.
!!! note
Boolean values are always stored as bit in RNTuple, so `nbits = 1`.
Expand Down
18 changes: 16 additions & 2 deletions src/RNTuple/header.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ function _rntuple_read(io, ::Type{FieldRecord})
parent_field_id = read(io, UInt32)
struct_role = read(io, UInt16)
flags = read(io, UInt16)
repetition = if flags == 0x0001
repetition = if flags == 0x01
read(io, Int64)
else
0
Expand All @@ -26,12 +26,26 @@ function _rntuple_read(io, ::Type{FieldRecord})
struct_role, flags, repetition, field_name, type_name, type_alias, field_desc)
end

@SimpleStruct struct ColumnRecord
struct ColumnRecord
type::UInt16
nbits::UInt16
field_id::UInt32
flags::UInt32
first_ele_idx::Int64
end
function _rntuple_read(io, ::Type{ColumnRecord})
type = read(io, UInt16)
nbits = read(io, UInt16)
field_id = read(io, UInt32)
flags = read(io, UInt32)
first_ele_idx = if flags == 0x08
read(io, Int64)
else
0
end
ColumnRecord(type, nbits, field_id, flags, first_ele_idx)
end


@SimpleStruct struct AliasRecord
physical_id::UInt32
Expand Down
13 changes: 9 additions & 4 deletions src/RNTuple/highlevel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -84,11 +84,14 @@ RNTupleSchema with 13 top fields
```
"""
struct RNTupleSchema
namedtuple::Any
namedtuple::NamedTuple
end
Base.propertynames(s::RNTupleSchema) = propertynames(getfield(s, :namedtuple))
Base.getproperty(s::RNTupleSchema, sym::Symbol) = getproperty(getfield(s, :namedtuple), sym)
Base.length(s::RNTupleSchema) = length(getfield(s, :namedtuple))
function Base.getindex(s::RNTupleSchema, idx)
RNTupleSchema(getfield(s, :namedtuple)[idx])
end

function Base.getindex(rf::RNTupleField, idx::Int)
tid = Threads.threadid()
Expand Down Expand Up @@ -174,8 +177,8 @@ struct RNTuple{O}
header::RNTupleHeader
footer::RNTupleFooter
pagelinks::Dict{Int, PageLink}
schema::Any
function RNTuple(io::O, header, footer, schema::S) where {O, S}
schema::RNTupleSchema
function RNTuple(io::O, header, footer, schema) where {O}
new{O}(
io,
header,
Expand Down Expand Up @@ -213,7 +216,9 @@ function LazyTree(rn::RNTuple, selection)
end

N = Tuple(Symbol.(filtered_names))
T = Tuple(RNTupleField(rn, getproperty(rn.schema, k)) for k in N)
skim_schema = getfield(rn.schema, :namedtuple)[N]
new_rn = RNTuple(rn.io, rn.header, rn.footer, skim_schema)
T = Tuple(RNTupleField(new_rn, getproperty(new_rn.schema, k)) for k in N)

return LazyTree(NamedTuple{N}(T))
end
9 changes: 9 additions & 0 deletions test/rntuple_tests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,15 @@ end
@test length(names(df4)) == 1
end

@testset "Skim the schema" begin
f1 = UnROOT.samplefile("RNTuple/DAOD_TRUTH3_RC2.root")
df_full = LazyTree(f1, "RNT:CollectionTree")
df1 = LazyTree(f1, "RNT:CollectionTree", r"AntiKt4TruthDressedWZ")
@test 0 < length(names(df1)) < length(names(df_full))
@test "AntiKt4TruthDressedWZJetsAux:" names(df1)
@test length(df1[!, 1].rn.schema) < length(df_full[!, 1].rn.schema)
end

@testset "Skip Recursively Empty Structs" begin
f1 = UnROOT.samplefile("RNTuple/DAOD_TRUTH3_RC2.root")
df = LazyTree(f1, "RNT:CollectionTree", r"AntiKt4TruthDressedWZ")
Expand Down

2 comments on commit 2d2acf9

@Moelf
Copy link
Member Author

@Moelf Moelf commented on 2d2acf9 Mar 15, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/102963

Tip: Release Notes

Did you know you can add release notes too? Just add markdown formatted text underneath the comment after the text
"Release notes:" and it will be added to the registry PR, and if TagBot is installed it will also be added to the
release that TagBot creates. i.e.

@JuliaRegistrator register

Release notes:

## Breaking changes

- blah

To add them here just re-invoke and the PR will be updated.

Tagging

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.10.25 -m "<description of version>" 2d2acf9ed1fa2f8b6d556b7df10faf9311552bc2
git push origin v0.10.25

Please sign in to comment.