Skip to content

Commit

Permalink
Remove precompilation
Browse files Browse the repository at this point in the history
  • Loading branch information
chriselrod committed Apr 18, 2023
1 parent 07121c7 commit 0195916
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 32 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "VectorizedRNG"
uuid = "33b4df10-0173-11e9-2a0c-851a7edac40e"
authors = ["Chris Elrod <[email protected]>"]
version = "0.2.23"
version = "0.2.24"

[deps]
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
Expand Down
14 changes: 0 additions & 14 deletions src/VectorizedRNG.jl
Original file line number Diff line number Diff line change
Expand Up @@ -123,18 +123,4 @@ function __init__()
__init()
end

@static if VERSION >= v"1.8.0-beta1"
let
while false
end
__init()
x64 = Vector{Float64}(undef, 16)
rand!(local_rng(), x64)
randn!(local_rng(), x64)
x32 = Vector{Float32}(undef, 16)
rand!(local_rng(), x32)
randn!(local_rng(), x32)
end
end

end # module
37 changes: 20 additions & 17 deletions src/api.jl
Original file line number Diff line number Diff line change
Expand Up @@ -233,14 +233,14 @@ end
) where {K} = vload(ptr, args...)
@inline _vload(x::Number, args::Vararg{Any,K}) where {K} = x

function random_sample_u2!(
@inline function samplevector!(
f::F,
rng::AbstractVRNG{P},
x::AbstractArray{T},
α,
β,
γ,
g::G = identity
g::G
) where {F,P,T,G}
state = getstate(rng, Val{2}(), pick_vector_width(UInt64))
GC.@preserve x begin
Expand Down Expand Up @@ -290,14 +290,14 @@ function random_sample_u2!(
end # GC preserve
x
end
function random_sample_u2!(
@inline function samplevector!(
f::F,
rng::AbstractVRNG{P},
x::AbstractArray{T},
::StaticInt{0},
β,
γ,
g::G = identity
g::G
) where {F,P,T,G}
state = getstate(rng, Val{2}(), pick_vector_width(UInt64))
GC.@preserve x begin
Expand Down Expand Up @@ -353,7 +353,7 @@ function Random.rand!(
β = StaticInt{0}(),
γ = StaticInt{1}()
) where {T<:Union{Float32,Float64},F}
random_sample_u2!(random_uniform, rng, x, α, β, γ, f)
samplevector!(random_uniform, rng, x, α, β, γ, f)
end

function Random.rand!(
Expand All @@ -363,7 +363,7 @@ function Random.rand!(
β = StaticInt{0}(),
γ = StaticInt{1}()
) where {T<:Union{Float32,Float64}}
random_sample_u2!(random_uniform, rng, x, α, β, γ, identity)
samplevector!(random_uniform, rng, x, α, β, γ, identity)
end

function Random.randn!(
Expand All @@ -373,7 +373,7 @@ function Random.randn!(
β = StaticInt{0}(),
γ = StaticInt{1}()
) where {T<:Union{Float32,Float64}}
random_sample_u2!(random_normal, rng, x, α, β, γ)
samplevector!(random_normal, rng, x, α, β, γ, identity)
end
@inline function random_unsigned(
state::AbstractState,
Expand All @@ -383,13 +383,14 @@ end
nextstate(state, Val{N}())
end
function Random.rand!(rng::AbstractVRNG, x::AbstractArray{UInt64})
random_sample_u2!(
samplevector!(
random_unsigned,
rng,
x,
StaticInt{0}(),
StaticInt{0}(),
StaticInt{1}()
StaticInt{1}(),
identity
)
end

Expand All @@ -399,7 +400,7 @@ function Random.rand!(
x::StaticArraysCore.MArray{<:Tuple,T}
) where {T<:Union{Float32,Float64}}
GC.@preserve x begin
random_sample_u2!(random_uniform, rng, PtrArray(x), α, β, γ)
samplevector!(random_uniform, rng, PtrArray(x), α, β, γ, identity)
end
return x
end
Expand All @@ -413,7 +414,7 @@ function Random.rand!(
}
a = MArray{S,UInt64}(undef)
GC.@preserve a begin
random_sample_u2!(random_uniform, rng, PtrArray(a), α, β, γ)
samplevector!(random_uniform, rng, PtrArray(a), α, β, γ, identity)
end
x .= a
end
Expand All @@ -422,7 +423,7 @@ function Random.randn!(
x::StaticArraysCore.MArray{<:Tuple,T}
) where {T<:Union{Float32,Float64}}
GC.@preserve x begin
random_sample_u2!(random_normal, rng, PtrArray(x), α, β, γ)
samplevector!(random_normal, rng, PtrArray(x), α, β, γ, identity)
end
return x
end
Expand All @@ -436,7 +437,7 @@ function Random.randn!(
}
a = MArray{S,UInt64}(undef)
GC.@preserve a begin
random_sample_u2!(random_normal, rng, PtrArray(a), α, β, γ)
samplevector!(random_normal, rng, PtrArray(a), α, β, γ, identity)
end
x .= a
end
Expand All @@ -445,13 +446,14 @@ function Random.rand!(
rng::AbstractVRNG,
x::StaticArraysCore.MArray{<:Tuple,UInt64}
)
random_sample_u2!(
samplevector!(
random_unsigned,
rng,
x,
StaticInt{0}(),
StaticInt{0}(),
StaticInt{1}()
StaticInt{1}(),
identity
)
end
function Random.rand!(
Expand All @@ -460,13 +462,14 @@ function Random.rand!(
) where {S<:Tuple,SA<:StaticArraysCore.StaticArray{S,UInt64}}
a = MArray{S,UInt64}(undef)
GC.@preserve a begin
random_sample_u2!(
samplevector!(
random_unsigned,
rng,
PtrArray(a),
StaticInt{0}(),
StaticInt{0}(),
StaticInt{1}()
StaticInt{1}(),
identity
)
end
x .= a
Expand Down

5 comments on commit 0195916

@chriselrod
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/81842

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.2.24 -m "<description of version>" 0195916663a8f8dbfc9afa87a5155d6aa52380bf
git push origin v0.2.24

@timholy
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here you reported that you removed this because it caused allocation. I tried restoring it and got

julia> @btime rand!($rng, $x64)
  8.054 ns (0 allocations: 0 bytes)

which doesn't reproduce the problem. I'm just guessing, though. If you have a reproducer I'd be very grateful.

@chriselrod
Copy link
Member Author

@chriselrod chriselrod commented on 0195916 Jul 30, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@timholy, I can't reproduce the allocation at the moment, but I do get an >8x performance regression from precompilation.
Using:

julia> versioninfo()
Julia Version 1.11.0-DEV.193
Commit 4825a0cda8 (2023-07-28 23:57 UTC)
Platform Info:
  OS: Linux (x86_64-redhat-linux)
  CPU: 8 × 11th Gen Intel(R) Core(TM) i7-1165G7 @ 2.80GHz
  WORD_SIZE: 64
  LIBM: libopenlibm
  LLVM: libLLVM-15.0.7 (ORCJIT, tigerlake)
  Threads: 11 on 8 virtual cores

with

$ git checkout 07121c744d13765516e56426b1a39c9e26eb1ef5
julia> using VectorizedRNG, Random

julia> drng = Random.default_rng(); lrng = local_rng();

julia> @benchmark rand!($drng, $x64)
BenchmarkTools.Trial: 7651 samples with 651 evaluations.
 Range (min  max):  189.528 ns   1.022 μs  ┊ GC (min  max): 0.00%  0.00%
 Time  (median):     195.982 ns              ┊ GC (median):    0.00%
 Time  (mean ± σ):   196.846 ns ± 10.616 ns  ┊ GC (mean ± σ):  0.00% ± 0.00%

          ▃  ▁ ▇  █▄  ▆                                         
  ▂▂▁▁▇▃▂▅█▆▄█▇█▅▆██▆▇█▆▅▄▆▄▄▃▆▄▃▃▄▃▂▂▂▂▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ ▃
  190 ns          Histogram: frequency by time          212 ns <

 Memory estimate: 0 bytes, allocs estimate: 0.

julia> @benchmark rand!($lrng, $x64)
BenchmarkTools.Trial: 10000 samples with 232 evaluations.
 Range (min  max):  321.810 ns  690.384 ns  ┊ GC (min  max): 0.00%  0.00%
 Time  (median):     350.211 ns               ┊ GC (median):    0.00%
 Time  (mean ± σ):   351.910 ns ±  14.740 ns  ┊ GC (mean ± σ):  0.00% ± 0.00%

               █▇  ▆▃  ▇▂▁▁█▂▂ ▁▁ ▁▁  ▂▅▂  ▁                     
  ▁▁▁▁▁▁▃▅▂▂▆▅▃██▅▇██▇███████████████████▇▇█▆▄▅▆▄▃▅▅▃▃▂▁▂▂▁▂▂▁▁ ▄
  322 ns           Histogram: frequency by time          386 ns <

 Memory estimate: 0 bytes, allocs estimate: 0.


julia> x64 = Vector{Float64}(undef, 16);

julia> @benchmark rand!($lrng, $x64)
BenchmarkTools.Trial: 10000 samples with 993 evaluations.
 Range (min  max):  36.073 ns  179.427 ns  ┊ GC (min  max): 0.00%  0.00%
 Time  (median):     43.142 ns               ┊ GC (median):    0.00%
 Time  (mean ± σ):   42.534 ns ±   3.070 ns  ┊ GC (mean ± σ):  0.00% ± 0.00%

                                   ▄▅█    ▄                     
  ▂▂▁▂▂▂▃▂▄▃▃█▃▇▃▄▄▃▄▃▃▃▂▃▂▂▂▂▃▃▃▂▂███▅▃███▄▃▅▅▆▃▂▃▃▃▃▂▂▂▂▂▂▂▂ ▃
  36.1 ns         Histogram: frequency by time         47.9 ns <

 Memory estimate: 0 bytes, allocs estimate: 0.

Versus the master bracnh

julia> using VectorizedRNG, Random
Precompiling VectorizedRNG
  1 dependency successfully precompiled in 2 seconds. 30 already precompiled.

julia> drng = Random.default_rng(); lrng = local_rng();

julia> x64 = Vector{Float64}(undef, 255);

julia> @benchmark rand!($drng, $x64)
BenchmarkTools.Trial: 7735 samples with 641 evaluations.
 Range (min  max):  192.515 ns  437.150 ns  ┊ GC (min  max): 0.00%  0.00%
 Time  (median):     198.988 ns               ┊ GC (median):    0.00%
 Time  (mean ± σ):   200.141 ns ±   7.497 ns  ┊ GC (mean ± σ):  0.00% ± 0.00%

              █                                                  
  ▂▄▂▆▅▃▄▃▃█▅▄█▇▅▇▄▄▅▅▄█▆▄▄▃▃▂▂▂▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ ▂
  193 ns           Histogram: frequency by time          221 ns <

 Memory estimate: 0 bytes, allocs estimate: 0.

julia> @benchmark rand!($lrng, $x64)
BenchmarkTools.Trial: 10000 samples with 991 evaluations.
 Range (min  max):  40.011 ns  162.111 ns  ┊ GC (min  max): 0.00%  0.00%
 Time  (median):     40.981 ns               ┊ GC (median):    0.00%
 Time  (mean ± σ):   41.033 ns ±   1.627 ns  ┊ GC (mean ± σ):  0.00% ± 0.00%

  ▇▆        ▆█▃    ▁▁ ▁▅▃                        ▁▁            ▂
  ██▄▃▁▁▁▄█▅████▆▅▇██▇████▆▆▅█▇▇▆▇▇▇▆▅▆█▇▆▆▄▅▃▆▅▅███▇▆▆▆▆▆▅▇▇▇ █
  40 ns         Histogram: log(frequency) by time      45.2 ns <

 Memory estimate: 0 bytes, allocs estimate: 0.

julia> x64 = Vector{Float64}(undef, 16);

julia> @benchmark rand!($lrng, $x64)
BenchmarkTools.Trial: 10000 samples with 1000 evaluations.
 Range (min  max):  4.759 ns  52.906 ns  ┊ GC (min  max): 0.00%  0.00%
 Time  (median):     7.022 ns              ┊ GC (median):    0.00%
 Time  (mean ± σ):   6.820 ns ±  1.551 ns  ┊ GC (mean ± σ):  0.00% ± 0.00%

   ▁▃▄▄▅▄▄▄▄▃▂▂▁▁▁▁▂  █▇▅▅▁▂▂▂▂▃▃▂▂▁                         ▂
  ████████████████████████████████████▆▇▅▆▇▆▄▄▆▄▃▃▄▂▄▂▃▂▄▄▄▄ █
  4.76 ns      Histogram: log(frequency) by time     11.2 ns <

 Memory estimate: 0 bytes, allocs estimate: 0.

No allocations, but 357 ns vs 41 ns mean time.

The fact that it does not allocate makes it harder to detect and realize that something is wrong.
Allocations are easier to test than runtime in unit tests. Perhaps looking at the code_llvm would work, assuming we don't get lied to. The current state of affairs is that it's safer to just not precompile, so few of my libraries do.

@chriselrod
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I cannot reproduce on Julia 1.9.3:

julia> using VectorizedRNG, Random
[ Info: Precompiling VectorizedRNG [33b4df10-0173-11e9-2a0c-851a7edac40e]

julia> drng = Random.default_rng(); lrng = local_rng();

julia> x64 = Vector{Float64}(undef, 255);

julia> @benchmark rand!($drng, $x64)
BenchmarkTools.Trial: 9698 samples with 937 evaluations.
 Range (min  max):  104.732 ns  229.638 ns  ┊ GC (min  max): 0.00%  0.00%
 Time  (median):     109.067 ns               ┊ GC (median):    0.00%
 Time  (mean ± σ):   109.176 ns ±   3.211 ns  ┊ GC (mean ± σ):  0.00% ± 0.00%

            ▇▅         █                                         
  ▂▂▁▁▂▂▂▂▂▂██▂▂▂▃▃▆▅▄▅█▃▃▂▃▃▄▄▄▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂ ▃
  105 ns           Histogram: frequency by time          119 ns <

 Memory estimate: 0 bytes, allocs estimate: 0.

julia> @benchmark rand!($lrng, $x64)
BenchmarkTools.Trial: 10000 samples with 990 evaluations.
 Range (min  max):  44.895 ns  72.201 ns  ┊ GC (min  max): 0.00%  0.00%
 Time  (median):     46.005 ns              ┊ GC (median):    0.00%
 Time  (mean ± σ):   45.661 ns ±  0.753 ns  ┊ GC (mean ± σ):  0.00% ± 0.00%

  ▁▆▇█▇▅▂                  ▃▆▇█▆▄▁                  ▁▁▂▂▁     ▃
  ███████▇▃▁▁▁▁▁▁▁▁▁▁▃▁▁▁▁▆███████▇▆▇▇▇▇▇▆▇█▇▇▆█▇▇▇█████████▇ █
  44.9 ns      Histogram: log(frequency) by time      47.4 ns <

 Memory estimate: 0 bytes, allocs estimate: 0.

julia> x64 = Vector{Float64}(undef, 256);

julia> @benchmark rand!($lrng, $x64)
BenchmarkTools.Trial: 10000 samples with 991 evaluations.
 Range (min  max):  41.219 ns  107.426 ns  ┊ GC (min  max): 0.00%  0.00%
 Time  (median):     44.272 ns               ┊ GC (median):    0.00%
 Time  (mean ± σ):   44.761 ns ±   1.031 ns  ┊ GC (mean ± σ):  0.00% ± 0.00%

                       ▁          ██▄        ▁█▇▃         ▃▃▁  ▂
  ▆▄▁▁▁▁▁▁▁▁▄▁▃▁▁▃▁▁▁▃▅█▇▄▁▁▁▁▃▁▁▄███▁▃▁▁▃▁▁▁████▆▇███████████ █
  41.2 ns       Histogram: log(frequency) by time      46.5 ns <

 Memory estimate: 0 bytes, allocs estimate: 0.

julia> x64 = Vector{Float64}(undef, 16);

julia> @benchmark rand!($lrng, $x64)
BenchmarkTools.Trial: 10000 samples with 1000 evaluations.
 Range (min  max):  5.288 ns  61.774 ns  ┊ GC (min  max): 0.00%  0.00%
 Time  (median):     5.884 ns              ┊ GC (median):    0.00%
 Time  (mean ± σ):   5.913 ns ±  0.640 ns  ┊ GC (mean ± σ):  0.00% ± 0.00%

                     ▁█    ▆                                  
  ▂▂▃▂▁▂▂▂▁▁▁▁▂▂▃▅▃▂▂██▅▂▂██▅▂▁▂▁▂▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▂▂▁▂▁▁▁▁▂▂ ▃
  5.29 ns        Histogram: frequency by time        6.95 ns <

 Memory estimate: 0 bytes, allocs estimate: 0.

julia> versioninfo()
Julia Version 1.9.3-DEV.0
Commit 6fc1be04ee* (2023-07-06 14:55 UTC)
Platform Info:
  OS: Linux (x86_64-redhat-linux)
  CPU: 8 × 11th Gen Intel(R) Core(TM) i7-1165G7 @ 2.80GHz
  WORD_SIZE: 64
  LIBM: libopenlibm
  LLVM: libLLVM-14.0.6 (ORCJIT, tigerlake)
  Threads: 8 on 8 virtual cores

Note that Base is also nearly 2x faster here.
I've noticed that Julia master is slower than the release version in general, but perhaps that's to be expected for an unreleased version.

Please sign in to comment.