Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Density geometry revamp #1157

Open
wants to merge 14 commits into
base: master
Choose a base branch
from
Prev Previous commit
Next Next commit
[WIP] prelim version of the density geometry revamp
working on #1152.

Note: This is a WIP and currently completely breaks `Geom.density` and
`Geom.violin` has several regressions.
  • Loading branch information
tlnagy committed Jun 3, 2018
commit 944fc295efb82afd7bff585e631520ad07cd6c92
71 changes: 71 additions & 0 deletions src/aesthetics.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
using IterTools

const NumericalOrCategoricalAesthetic =
Union{(Void), Vector, DataArray, IndirectArray}

@@ -413,3 +415,72 @@ function inherit!(a::Aesthetics, b::Aesthetics;
end
nothing
end

"""
Given aesthetics to group with, `by`, and an aesthetic to group `togroupvar`
this function constructs a dictionary that maps each given combination of the
`by` aesthetics to the positions which they apply to. Thus the output is a
dictionary of tuples of each unique combination of `by` mapped to a boolean
array of length `n` where `n` is the length of the aesthetics (they have to all
have the same length). If the provided aesthetics are missing, a placeholder
`nothing` is return instead of the unique value.

## Examples

```jldoctest
aes = Gadfly.Aesthetics()
aes.x = repeat([1, 2], inner=3)
aes.y = collect(1:6)

groupby(aes, [:x, :color], :y)

# output

Dict((2, nothing)=>Bool[false, false, false, true, true, true],(1, nothing)=>Bool[true, true, true, false, false, false])
```

```jldoctest
aes = Gadfly.Aesthetics()
aes.x = repeat([:a, :b], inner=2)
aes.y = collect(1:4)
aes.color = repeat([colorant"red", colorant"blue"], inner=2)

groupby(aes, [:x, :color], :y)

# output

Dict((:a, RGB{N0f8}(1.0,0.0,0.0))=>Bool[true, true, false, false],(:b, RGB{N0f8}(0.0,0.0,1.0))=>Bool[false, false, true, true])
```

"""
function groupby(aes::Gadfly.Aesthetics, by::Vector{Symbol}, togroupvar::Symbol)
types = fill(Nothing, length(by))
isconcrete = fill(false, length(by))
for i in 1:length(by)
isconcrete[i] = getfield(aes, by[i]) != nothing
(!isconcrete[i]) && continue
types[i] = eltype(getfield(aes, by[i]))
@assert length(getfield(aes, togroupvar)) == length(getfield(aes, by[i])) "$togroupvar and $(by[i]) aesthetics must have same length"
end

T = Tuple{types...}
grouped = Dict{T, Vector{Bool}}()

# gather options for each `by` aesthetic
opt = [if isconcrete[i] unique(getfield(aes, by[i])) else [nothing] end for i in 1:length(by)]

# The approach is to identify positions were multiple by aesthetics overlap
# and thus grouping the data positions. We first assume that all positions
# belong to a combination of aesthetics and then whittle it down
for combo in product(opt...)
belongs = fill(true, length(getfield(aes, togroupvar)))
for i in 1:length(combo)
(combo[i] == nothing) && continue
belongs .&= getfield(aes, by[i]) .== combo[i]
end
# for multiple by variables we need to check whether there is any overlap
# between this specific combo before adding it to the dict
(any(belongs)) && (grouped[combo] = belongs)
end
grouped
end
70 changes: 52 additions & 18 deletions src/geom/density.jl
Original file line number Diff line number Diff line change
@@ -1,8 +1,29 @@
struct DensityGeometry <: Gadfly.GeometryElement
stat::Gadfly.StatisticElement
order::Int
tag::Symbol
end

function DensityGeometry(; order=1, tag=empty_tag, kwargs...)
DensityGeometry(Gadfly.Stat.DensityStatistic(; kwargs...), order, tag)
end

DensityGeometry(stat; order=1, tag=empty_tag) = DensityGeometry(stat, order, tag)

const density = DensityGeometry

element_aesthetics(::DensityGeometry) = Symbol[]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

element_aesthetics should contain :x, :y, and :color, no?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I don't leave this blank, they are filled with autogenerated values so it's impossible to give useful error messages using Gadfly.assert_aesthetics_defined. I wasn't sure how to get around this so I leave this blank and figure out errors later: https://github.com/GiovineItalia/Gadfly.jl/pull/1157/files#diff-9ec506bf78232ae17d082c22c2e66449R616

default_statistic(geom::DensityGeometry) = Gadfly.Stat.DensityStatistic(geom.stat)

struct ViolinGeometry <: Gadfly.GeometryElement
stat::Gadfly.StatisticElement
split::Bool
order::Int
tag::Symbol
end
ViolinGeometry(; order=1, tag=empty_tag) = ViolinGeometry(order, tag)
function ViolinGeometry(; order=1, tag=empty_tag, split=false, kwargs...)
ViolinGeometry(Gadfly.Stat.DensityStatistic(; kwargs...), split, order, tag)
end

"""
Geom.violin[(; order=1)]
@@ -15,29 +36,42 @@ const violin = ViolinGeometry

element_aesthetics(::ViolinGeometry) = [:x, :y, :color]

default_statistic(::ViolinGeometry) = Gadfly.Stat.violin()
default_statistic(geom::ViolinGeometry) = Gadfly.Stat.DensityStatistic(geom.stat)

function render(geom::ViolinGeometry, theme::Gadfly.Theme, aes::Gadfly.Aesthetics)
# TODO: What should we do with the color aesthetic?

Gadfly.assert_aesthetics_defined("Geom.violin", aes, :y, :width)
Gadfly.assert_aesthetics_equal_length("Geom.violin", aes, :y, :width)

default_aes = Gadfly.Aesthetics()
default_aes.color = fill(theme.default_color, length(aes.y))
aes = Gadfly.inherit(aes, default_aes)

# Group y, width and color by x
ux = unique(aes.x)
grouped_color = Dict(x => first(aes.color[aes.x.==x]) for x in ux)
grouped_y = Dict(x => aes.y[aes.x.==x] for x in ux)
grouped_width = Dict(x => aes.width[aes.x.==x] for x in ux)

kgy = keys(grouped_y)
violins = [vcat([(x - w/2, y) for (y, w) in zip(grouped_y[x], grouped_width[x])],
reverse!([(x + w/2, y) for (y, w) in zip(grouped_y[x], grouped_width[x])]))
for x in kgy]
colors = [grouped_color[x] for x in kgy]
grouped_data = Gadfly.groupby(aes, [:x, :color], :y)
violins = Array{NTuple{2, Float64}}[]

colors = []
(aes.color == nothing) && (aes.color = fill(theme.default_color, length(aes.x)))
color_opts = unique(aes.color)
if geom.split && length(color_opts) > 2
error("Split violins require 2 colors, not more")
end

for (keys, belongs) in grouped_data
x, color = keys
ys = aes.y[belongs]
ws = aes.width[belongs]

if geom.split
pos = findfirst(color_opts, color)
if pos == 1
push!(violins, [(x - w/2, y) for (y, w) in zip(ys, ws)])
else
push!(violins, reverse!([(x + w/2, y) for (y, w) in zip(ys, ws)]))
end
push!(colors, color)
else
push!(violins, vcat([(x - w/2, y) for (y, w) in zip(ys, ws)],
reverse!([(x + w/2, y) for (y, w) in zip(ys, ws)])))
push!(colors, color != nothing ? color : theme.default_color)
end
end

ctx = context(order=geom.order)
compose!(ctx, Compose.polygon(violins, geom.tag), fill(colors))
10 changes: 0 additions & 10 deletions src/geom/line.jl
Original file line number Diff line number Diff line change
@@ -51,16 +51,6 @@ geometry is equivalent to [`Geom.line`](@ref) with `preserve_order=true`.
"""
path() = LineGeometry(preserve_order=true)

"""
Geom.density[(; bandwidth=-Inf)]

Draw a line showing the density estimate of the `x` aesthetic.
This geometry is equivalent to [`Geom.line`](@ref) with
[`Stat.density`](@ref); see the latter for more information.
"""
density(; bandwidth::Real=-Inf) =
LineGeometry(Gadfly.Stat.density(bandwidth=bandwidth))

"""
Geom.density2d[(; bandwidth=(-Inf,-Inf), levels=15)]

Loading