Skip to content

Commit

Permalink
Skip incompatible feature ordering configuration (#131)
Browse files Browse the repository at this point in the history
Summary:

Skip incompatible feature ordering configuration to avoid writer failures in case of mismatch in the schema or flat map configuration.

Differential Revision: D68024263
  • Loading branch information
sdruzkin authored and facebook-github-bot committed Jan 13, 2025
1 parent f858d12 commit 7b3f444
Show file tree
Hide file tree
Showing 3 changed files with 112 additions and 58 deletions.
28 changes: 16 additions & 12 deletions dwio/nimble/velox/LayoutPlanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,19 +117,23 @@ std::vector<Stream> DefaultLayoutPlanner::getLayout(
orderedFlatMapOffsets.reserve(flatMapFeatureOrder_.size() * 3);

for (const auto& flatMapFeatures : flatMapFeatureOrder_) {
NIMBLE_CHECK(
std::get<0>(flatMapFeatures) < root.childrenCount(),
fmt::format(
"Column ordinal {} for feature ordering is out of range. "
"Top-level row has {} columns.",
std::get<0>(flatMapFeatures),
root.childrenCount()));
if (std::get<0>(flatMapFeatures) >= root.childrenCount()) {
LOG(WARNING) << fmt::format(
"Column ordinal {} for feature ordering is out of range. "
"Top-level row has {} columns.",
std::get<0>(flatMapFeatures),
root.childrenCount());
continue;
}

auto& column = root.childAt(std::get<0>(flatMapFeatures));
NIMBLE_CHECK(
column.kind() == Kind::FlatMap,
fmt::format(
"Column '{}' for feature ordering is not a flat map.",
root.nameAt(std::get<0>(flatMapFeatures))));

if (column.kind() != Kind::FlatMap) {
LOG(WARNING) << fmt::format(
"Column '{}' for feature ordering is not a flat map.",
root.nameAt(std::get<0>(flatMapFeatures)));
continue;
}

auto& flatMap = column.asFlatMap();

Expand Down
124 changes: 91 additions & 33 deletions dwio/nimble/velox/tests/LayoutPlannerTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -394,45 +394,74 @@ TEST(DefaultLayoutPlannerTests, NoFeatureReordering) {
testStreamLayout(rng, planner, std::move(streams), std::move(expected));
}

TEST(DefaultLayoutPlannerTests, IncompatibleOrdinals) {
TEST(DefaultLayoutPlannerTests, NonFlatMapOrdinalsAreIgnored) {
auto seed = folly::Random::rand32();
LOG(INFO) << "seed: " << seed;
std::mt19937 rng(seed);

nimble::SchemaBuilder builder;

nimble::test::FlatMapChildAdder fm1;
nimble::test::FlatMapChildAdder fm2;
nimble::test::FlatMapChildAdder fm3;

SCHEMA(
builder,
ROW({
{"c1", TINYINT()},
{"c2", FLATMAP(Int8, TINYINT(), fm1)},
{"c3", ARRAY(TINYINT())},
{"c4", FLATMAP(Int8, TINYINT(), fm2)},
{"c5", TINYINT()},
{"c6", FLATMAP(Int8, ARRAY(TINYINT()), fm3)},
}));
ROW(
{{"c0", TINYINT()},
{"c1", FLATMAP(Int8, TINYINT(), fm1)},
{"c2", ARRAY(TINYINT())},
{"c3", FLATMAP(Int8, TINYINT(), fm2)}}));

fm1.addChild("2");
fm1.addChild("5");
fm1.addChild("42");
fm1.addChild("7");
fm1.addChild("42");

fm2.addChild("2");
fm2.addChild("5");

nimble::DefaultLayoutPlanner planner{
[&]() { return builder.getRoot(); },
{{{1, {3, 42, 9, 2, 21}}, {2, {3, 2, 42, 21}}}}};
try {
planner.getLayout({});
FAIL() << "Factory should have failed.";
} catch (const nimble::NimbleUserError& e) {
EXPECT_THAT(
e.what(),
::testing::HasSubstr("for feature ordering is not a flat map"));
[&]() { return builder.getRoot(); }, {{{0, {1, 2, 3}}, {1, {42, 5}}}}};

auto namedTypes = getNamedTypes(*builder.getRoot());
std::vector<nimble::Stream> streams;
streams.reserve(namedTypes.size());
for (auto i = 0; i < namedTypes.size(); ++i) {
streams.push_back(nimble::Stream{
std::get<0>(namedTypes[i]), {std::get<1>(namedTypes[i])}});
}

std::vector<std::string> expected{
// Start with row
"r",
// Ordered streams
"r.c1(1).f",
"r.c1(1).f.42(3).im",
"r.c1(1).f.42(3).s",
"r.c1(1).f.5(1).im",
"r.c1(1).f.5(1).s",
// Non-ordered streams following the schema order
"r.c0(0).s",
"r.c1(1).f.2(0).im",
"r.c1(1).f.2(0).s",
"r.c1(1).f.7(2).im",
"r.c1(1).f.7(2).s",
"r.c2(2).a",
"r.c2(2).a.s",
"r.c3(3).f",
"r.c3(3).f.2(0).im",
"r.c3(3).f.2(0).s",
"r.c3(3).f.5(1).im",
"r.c3(3).f.5(1).s"};

testStreamLayout(rng, planner, std::move(streams), std::move(expected));
}

TEST(DefaultLayoutPlannerTests, OrdinalOutOfRange) {
TEST(DefaultLayoutPlannerTests, OrdinalOutOfRangeAreIgnored) {
auto seed = folly::Random::rand32();
LOG(INFO) << "seed: " << seed;
std::mt19937 rng(seed);

nimble::SchemaBuilder builder;

nimble::test::FlatMapChildAdder fm1;
Expand All @@ -442,31 +471,60 @@ TEST(DefaultLayoutPlannerTests, OrdinalOutOfRange) {
SCHEMA(
builder,
ROW({
{"c1", TINYINT()},
{"c2", FLATMAP(Int8, TINYINT(), fm1)},
{"c3", ARRAY(TINYINT())},
{"c4", FLATMAP(Int8, TINYINT(), fm2)},
{"c5", TINYINT()},
{"c6", FLATMAP(Int8, ARRAY(TINYINT()), fm3)},
{"c0", TINYINT()},
{"c1", FLATMAP(Int8, TINYINT(), fm1)},
{"c2", ARRAY(TINYINT())},
{"c3", FLATMAP(Int8, TINYINT(), fm2)},
{"c4", TINYINT()},
{"c5", FLATMAP(Int8, ARRAY(TINYINT()), fm3)},
}));

fm1.addChild("2");
fm1.addChild("5");
fm1.addChild("42");
fm1.addChild("7");

fm2.addChild("2");
fm2.addChild("5");

nimble::DefaultLayoutPlanner planner{
[&]() { return builder.getRoot(); },
{{{6, {3, 42, 9, 2, 21}}, {3, {3, 2, 42, 21}}}}};
try {
planner.getLayout({});
FAIL() << "Factory should have failed.";
} catch (const nimble::NimbleUserError& e) {
EXPECT_THAT(
e.what(), ::testing::HasSubstr("for feature ordering is out of range"));

auto namedTypes = getNamedTypes(*builder.getRoot());
std::vector<nimble::Stream> streams;
streams.reserve(namedTypes.size());
for (auto i = 0; i < namedTypes.size(); ++i) {
streams.push_back(nimble::Stream{
std::get<0>(namedTypes[i]), {std::get<1>(namedTypes[i])}});
}

std::vector<std::string> expected{
// Start with row
"r",
// Ordered streams
"r.c3(3).f",
"r.c3(3).f.2(0).im",
"r.c3(3).f.2(0).s",
// Non-ordered streams following the schema order
"r.c0(0).s",
"r.c1(1).f",
"r.c1(1).f.2(0).im",
"r.c1(1).f.2(0).s",
"r.c1(1).f.5(1).im",
"r.c1(1).f.5(1).s",
"r.c1(1).f.42(2).im",
"r.c1(1).f.42(2).s",
"r.c1(1).f.7(3).im",
"r.c1(1).f.7(3).s",
"r.c2(2).a",
"r.c2(2).a.s",
"r.c3(3).f.5(1).im",
"r.c3(3).f.5(1).s",
"r.c4(4).s",
"r.c5(5).f"};

testStreamLayout(rng, planner, std::move(streams), std::move(expected));
}

TEST(DefaultLayoutPlannerTests, IncompatibleSchema) {
Expand Down
18 changes: 5 additions & 13 deletions dwio/nimble/velox/tests/VeloxWriterTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,9 @@ TEST_F(VeloxWriterTests, RootHasNulls) {
}
}

TEST_F(VeloxWriterTests, FeatureReorderingNonFlatmapColumn) {
TEST_F(
VeloxWriterTests,
FeatureReorderingNonFlatmapColumnIgnoresMismatchedConfig) {
velox::test::VectorMaker vectorMaker{leafPool_.get()};
auto vector = vectorMaker.rowVector(
{"map", "flatmap"},
Expand Down Expand Up @@ -220,18 +222,8 @@ TEST_F(VeloxWriterTests, FeatureReorderingNonFlatmapColumn) {
.featureReordering =
std::vector<std::tuple<size_t, std::vector<int64_t>>>{
{0, {1, 2}}, {1, {3, 4}}}});

try {
writer.write(vector);
writer.close();
FAIL();
} catch (const nimble::NimbleUserError& e) {
EXPECT_EQ("USER", e.errorSource());
EXPECT_EQ("INVALID_ARGUMENT", e.errorCode());
EXPECT_EQ(
"Column 'map' for feature ordering is not a flat map.",
e.errorMessage());
}
writer.write(vector);
writer.close();
}

namespace {
Expand Down

0 comments on commit 7b3f444

Please sign in to comment.