Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added support for dictionary and fixed bugs #95

Merged
merged 10 commits into from
Jun 18, 2024
149 changes: 135 additions & 14 deletions c_src/adbc_arrow_array.hpp

Large diffs are not rendered by default.

756 changes: 336 additions & 420 deletions c_src/adbc_column.hpp

Large diffs are not rendered by default.

14 changes: 9 additions & 5 deletions c_src/adbc_consts.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ static ERL_NIF_TERM kAtomAdbcError;
static ERL_NIF_TERM kAtomNil;
static ERL_NIF_TERM kAtomTrue;
static ERL_NIF_TERM kAtomFalse;
static ERL_NIF_TERM kAtomKey;
static ERL_NIF_TERM kAtomValue;
static ERL_NIF_TERM kAtomInfinity;
static ERL_NIF_TERM kAtomNegInfinity;
static ERL_NIF_TERM kAtomNaN;
Expand Down Expand Up @@ -120,17 +122,19 @@ static ERL_NIF_TERM kAdbcColumnTypeMap;
static ERL_NIF_TERM kAdbcColumnTypeDenseUnion;
static ERL_NIF_TERM kAdbcColumnTypeSparseUnion;
static ERL_NIF_TERM kAdbcColumnTypeRunEndEncoded;
static ERL_NIF_TERM kAdbcColumnTypeDictionary;

// error codes
constexpr int kErrorBufferIsNotAMap = 1;
constexpr int kErrorBufferGetDataListLength = 2;
constexpr int kErrorBufferGetMapValue = 3;
constexpr int kErrorBufferWrongStruct = 4;
constexpr int kErrorBufferDataIsNotAList = 5;
constexpr int kErrorBufferUnknownType = 6;
constexpr int kErrorBufferGetMetadataKey = 7;
constexpr int kErrorBufferGetMetadataValue = 8;
constexpr int kErrorExpectedCalendarISO = 9;
constexpr int kErrorInternalError = 10;
constexpr int kErrorBufferDataIsNotAMap = 6;
constexpr int kErrorBufferUnknownType = 7;
constexpr int kErrorBufferGetMetadataKey = 8;
constexpr int kErrorBufferGetMetadataValue = 9;
constexpr int kErrorExpectedCalendarISO = 10;
constexpr int kErrorInternalError = 11;

#endif // ADBC_CONSTS_H
3 changes: 3 additions & 0 deletions c_src/adbc_nif.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -786,6 +786,8 @@ static int on_load(ErlNifEnv *env, void **, ERL_NIF_TERM) {
kAtomNil = erlang::nif::atom(env, "nil");
kAtomTrue = erlang::nif::atom(env, "true");
kAtomFalse = erlang::nif::atom(env, "false");
kAtomKey = erlang::nif::atom(env, "key");
kAtomValue = erlang::nif::atom(env, "value");
kAtomInfinity = erlang::nif::atom(env, "infinity");
kAtomNegInfinity = erlang::nif::atom(env, "neg_infinity");
kAtomNaN = erlang::nif::atom(env, "nan");
Expand Down Expand Up @@ -864,6 +866,7 @@ static int on_load(ErlNifEnv *env, void **, ERL_NIF_TERM) {
kAdbcColumnTypeDenseUnion = erlang::nif::atom(env, "dense_union");
kAdbcColumnTypeSparseUnion = erlang::nif::atom(env, "sparse_union");
kAdbcColumnTypeRunEndEncoded = erlang::nif::atom(env, "run_end_encoded");
kAdbcColumnTypeDictionary = erlang::nif::atom(env, "dictionary");

return 0;
}
Expand Down
84 changes: 79 additions & 5 deletions lib/adbc_column.ex
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,10 @@ defmodule Adbc.Column do
values: %Adbc.Column{}
}
@valid_run_end_types [:i16, :i32, :i64]
@type dictionary_data_t :: %{
key: %Adbc.Column{},
value: %Adbc.Column{}
}
@type data_type ::
:boolean
| signed_integer
Expand All @@ -106,10 +110,13 @@ defmodule Adbc.Column do
| duration_t
| interval_t
| :run_end_encoded
@spec column(data_type(), list() | list_view_data_t(), Keyword.t()) :: %Adbc.Column{}
| :dictionary
@spec column(data_type(), list() | list_view_data_t() | dictionary_data_t(), Keyword.t()) ::
%Adbc.Column{}
def column(type, data, opts \\ [])
when (is_atom(type) or is_tuple(type)) and
(is_list(data) or (type in @list_view_types and is_map(data))) and is_list(opts) do
(is_list(data) or (type in @list_view_types and is_map(data)) or
(type == :dictionary and is_map(data))) and is_list(opts) do
name = opts[:name]
nullable = opts[:nullable] || false
metadata = opts[:metadata] || nil
Expand Down Expand Up @@ -785,7 +792,8 @@ defmodule Adbc.Column do

"""
@spec fixed_size_binary([iodata() | nil], non_neg_integer(), Keyword.t()) :: %Adbc.Column{}
def fixed_size_binary(data, nbytes, opts \\ []) when is_list(data) and is_list(opts) do
def fixed_size_binary(data, nbytes, opts \\ [])
when is_list(data) and is_integer(nbytes) and is_list(opts) do
column({:fixed_size_binary, nbytes}, data, opts)
end

Expand Down Expand Up @@ -1090,7 +1098,57 @@ defmodule Adbc.Column do
end

@doc """
Convert a list view to a list.
Construct an array using dictionary encoding.

Dictionary encoding is a data representation technique to represent values by integers
referencing a dictionary usually consisting of unique values. It can be effective when
you have data with many repeated values.

Any array can be dictionary-encoded. The dictionary is stored as an optional property
of an array. When a field is dictionary encoded, the values are represented by an array
of non-negative integers representing the index of the value in the dictionary. The memory
layout for a dictionary-encoded array is the same as that of a primitive integer layout.
The dictionary is handled as a separate columnar array with its own respective layout.

As an example, you could have the following data:

```elixir
Adbc.Column.string(["foo", "bar", "foo", "bar", nil, "baz"], nullable: true)
```

In dictionary-encoded form, this could appear as:

```elixir
Adbc.Column.dictionary(
Adbc.Column.string(["foo", "bar", "baz"], nullable: true),
Adbc.Column.i32([0, 1, 0, 1, nil, 2], nullable: true)
)
```

## Arguments

* `data`: a list, each element of which can be one of the following:
- `nil`
- `Adbc.Column`

Note that each `Adbc.Column` in the list should have the same type.

* `opts`: A keyword list of options

## Options

* `:name` - The name of the column
* `:nullable` - A boolean value indicating whether the column is nullable
* `:metadata` - A map of metadata
"""
@spec dictionary(%Adbc.Column{}, %Adbc.Column{}, Keyword.t()) :: %Adbc.Column{}
def dictionary(key = %Adbc.Column{type: index_type}, value = %Adbc.Column{}, opts \\ [])
when index_type in [:i8, :u8, :i16, :u16, :i32, :u32, :i64, :u64] do
column(:dictionary, %{key: key, value: value}, opts)
end

@doc """
Convert a list view, run-end encoding array or a dictionary to a list.

## Examples

Expand Down Expand Up @@ -1285,10 +1343,26 @@ defmodule Adbc.Column do
name: column.name,
type: values.type,
nullable: column.nullable,
data: Enum.reverse(decoded)
data: Enum.reverse(decoded),
metadata: nil
}
end

def to_list(column = %Adbc.Column{data: %{key: key, value: value}, type: :dictionary}) do
value = to_list(value)

column_data =
Enum.map(key.data, fn
index when is_integer(index) ->
Enum.at(value.data, index)

nil ->
nil
end)

%{column | data: column_data, type: value.type}
end

def to_list(column = %Adbc.Column{data: data}) when is_list(data) do
%{column | data: Enum.map(data, &Adbc.Column.to_list/1)}
end
Expand Down
28 changes: 28 additions & 0 deletions test/adbc_column_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -557,4 +557,32 @@ defmodule Adbc.Column.Test do
} == Adbc.Column.to_list(run_end_array)
end
end

describe "dictionary" do
test "to list" do
# type: VarBinary
# ['foo', 'bar', 'foo', 'bar', null, 'baz']
#
# In dictionary-encoded form, this could appear as:
# data VarBinary (dictionary-encoded)
# index_type: Int32
# values: [0, 1, 0, 1, null, 2]
#
# dictionary
# type: VarBinary
# values: ['foo', 'bar', 'baz']
key = Adbc.Column.i32([0, 1, 0, 1, nil, 2], name: "key", nullable: true)
value = Adbc.Column.string(["foo", "bar", "baz"], name: "value", nullable: false)
dict = Adbc.Column.dictionary(key, value)

assert %Adbc.Column{
name: nil,
type: :string,
nullable: false,
metadata: nil,
data: ["foo", "bar", "foo", "bar", nil, "baz"]
} =
Adbc.Column.to_list(dict)
end
end
end
110 changes: 55 additions & 55 deletions test/adbc_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -273,62 +273,62 @@ defmodule AdbcTest do
d5 = Decimal.new("9876543210987654321098765432109876543.2")
d6 = Decimal.new("-9876543210987654321098765432109876543.2")
d7 = Decimal.new("1E-37")

assert {:ok,
%Adbc.Result{
data: [
%Adbc.Column{
name: "d1",
type: {:decimal, 128, 38, 37},
nullable: true,
metadata: nil,
data: [^d1]
},
%Adbc.Column{
name: "d2",
type: {:decimal, 128, 38, 37},
nullable: true,
metadata: nil,
data: [^d2]
},
%Adbc.Column{
name: "d3",
type: :f64,
nullable: true,
metadata: nil,
data: [1.234567891234568]
},
%Adbc.Column{
name: "d4",
type: :f64,
nullable: true,
metadata: nil,
data: [-1.234567891234568]
},
%Adbc.Column{
name: "d5",
type: {:decimal, 128, 38, 1},
nullable: true,
metadata: nil,
data: [^d5]
},
%Adbc.Column{
name: "d6",
type: {:decimal, 128, 38, 1},
nullable: true,
metadata: nil,
data: [^d6]
},
%Adbc.Column{
name: "d7",
type: {:decimal, 128, 38, 37},
nullable: true,
metadata: nil,
data: [^d7]
}
],
num_rows: 0
}
} =
%Adbc.Result{
data: [
%Adbc.Column{
name: "d1",
type: {:decimal, 128, 38, 37},
nullable: true,
metadata: nil,
data: [^d1]
},
%Adbc.Column{
name: "d2",
type: {:decimal, 128, 38, 37},
nullable: true,
metadata: nil,
data: [^d2]
},
%Adbc.Column{
name: "d3",
type: :f64,
nullable: true,
metadata: nil,
data: [1.234567891234568]
},
%Adbc.Column{
name: "d4",
type: :f64,
nullable: true,
metadata: nil,
data: [-1.234567891234568]
},
%Adbc.Column{
name: "d5",
type: {:decimal, 128, 38, 1},
nullable: true,
metadata: nil,
data: [^d5]
},
%Adbc.Column{
name: "d6",
type: {:decimal, 128, 38, 1},
nullable: true,
metadata: nil,
data: [^d6]
},
%Adbc.Column{
name: "d7",
type: {:decimal, 128, 38, 37},
nullable: true,
metadata: nil,
data: [^d7]
}
],
num_rows: 0
}} =
Adbc.Connection.query(
conn,
"""
Expand Down