Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Raise on out of bounds range access #1061

Merged
merged 8 commits into from
Feb 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion lib/explorer/shared.ex
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,20 @@ defmodule Explorer.Shared do
names
end

def to_existing_columns(%{names: names}, %Range{} = columns, _raise?) do
def to_existing_columns(%{names: names} = df, first..last//_ = columns, raise?) do
if raise? do
# We say a range is in bounds when both limits are valid column indices.
# In certain instances this will reject a valid input to `Enum.slice/2`,
# but there were some ambiguous cases we needed to cover. See
# https://github.com/elixir-explorer/explorer/pull/1061 for a discussion.
n_cols = Explorer.DataFrame.n_columns(df)

if max(abs(first), abs(last)) >= n_cols do
raise ArgumentError,
"range #{inspect(columns)} is out of bounds for a dataframe with #{n_cols} column(s)"
end
end

Enum.slice(names, columns)
end

Expand Down
4 changes: 3 additions & 1 deletion test/explorer/data_frame/lazy_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -901,7 +901,9 @@ defmodule Explorer.DataFrame.LazyTest do
df3 = DF.distinct(df, ..)
assert DF.names(df3) == DF.names(df)

assert df == DF.distinct(df, 100..200)
assert_raise ArgumentError,
"range 100..200 is out of bounds for a dataframe with 10 column(s)",
fn -> DF.distinct(df, 100..200) end
end
end

Expand Down
29 changes: 24 additions & 5 deletions test/explorer/data_frame_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -2689,7 +2689,6 @@ defmodule Explorer.DataFrameTest do
assert DF.to_columns(df[["a"]]) == %{"a" => [1, 2, 3]}
assert DF.to_columns(df[[:a, :c]]) == %{"a" => [1, 2, 3], "c" => [4.0, 5.1, 6.2]}
assert DF.to_columns(df[0..-2//1]) == %{"a" => [1, 2, 3], "b" => ["a", "b", "c"]}
assert DF.to_columns(df[-3..-1]) == DF.to_columns(df)
assert DF.to_columns(df[..]) == DF.to_columns(df)

assert %Series{} = s1 = df[0]
Expand All @@ -2715,7 +2714,24 @@ defmodule Explorer.DataFrameTest do
~r"could not find column name \"class\"",
fn -> df[:class] end

assert DF.to_columns(df[0..100]) == DF.to_columns(df)
assert_raise ArgumentError,
"range 0..3 is out of bounds for a dataframe with 3 column(s)",
fn -> DF.to_columns(df[0..3]) end

assert_raise ArgumentError,
"range 0..-4//1 is out of bounds for a dataframe with 3 column(s)",
fn -> DF.to_columns(df[0..-4//1]) end

assert_raise ArgumentError,
"range -3..-1 is out of bounds for a dataframe with 3 column(s)",
fn -> DF.to_columns(df[-3..-1]) end

# Note: Even though `Enum.to_list(1..3//3) == [1]`, we still classify this
# range as out of bounds because we define in bounds as both limits of the
# range being in bounds.
assert_raise ArgumentError,
"range 1..3//3 is out of bounds for a dataframe with 3 column(s)",
fn -> DF.to_columns(df[1..3//3]) end
end

test "pop/2" do
Expand Down Expand Up @@ -2961,7 +2977,9 @@ defmodule Explorer.DataFrameTest do
df3 = DF.distinct(df, ..)
assert DF.names(df3) == DF.names(df)

assert df == DF.distinct(df, 100..200)
assert_raise ArgumentError,
"range 100..200 is out of bounds for a dataframe with 10 column(s)",
fn -> DF.drop_nil(df, 100..200) end
end
end

Expand All @@ -2983,8 +3001,9 @@ defmodule Explorer.DataFrameTest do
fn -> DF.drop_nil(df, [3, 4, 5]) end

# It takes the slice of columns in the range
df4 = DF.drop_nil(df, 0..200)
assert DF.to_columns(df4) == %{"a" => [1], "b" => [1]}
assert_raise ArgumentError,
"range 0..200 is out of bounds for a dataframe with 2 column(s)",
fn -> DF.drop_nil(df, 0..200) end
end

describe "relocate/3" do
Expand Down
Loading