From 5aac865a9203f9532d0ceed873f0636109b6b9d8 Mon Sep 17 00:00:00 2001 From: Adam Beckmeyer Date: Thu, 27 Aug 2020 23:25:02 -0400 Subject: [PATCH] Store first value in Dict directly in innerjoin This avoids allocating a Vector for the case where l does not have multiple indices with the same value. For the smoke-test benchmark in , this reduces allocations by half and overall runtime by 10%. --- src/innerjoin.jl | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/innerjoin.jl b/src/innerjoin.jl index b8c2e61..2cbe3e1 100644 --- a/src/innerjoin.jl +++ b/src/innerjoin.jl @@ -146,9 +146,20 @@ function _innerjoin!(out, l::AbstractArray, r::AbstractArray, v::AbstractArray, else lkeys = keys(l) V = eltype(lkeys) - dict = Dict{eltype(l), Vector{V}}() + # For bitstypes i_l::V we can avoid allocating a vector by storing + # i_l directly in the Dict until a second matching i_l comes along + dict = Dict{eltype(l), Union{V,Vector{V}}}() @inbounds for i_l ∈ lkeys - push!(get!(Vector{V}, dict, l[i_l]), i_l) + dict_index = Base.ht_keyindex2!(dict, l[i_l]) + if dict_index > 0 + old = dict.vals[dict_index] + new = old isa V ? [old, i_l] : push!(old, new) + dict.age += 1 + dict.keys[dict_index] = l[i_l] + dict.vals[dict_index] = new + else + Base._setindex!(dict, i_l, l[i_l], -dict_index) + end end @inbounds for i_r in keys(r)