Skip to content

Commit

Permalink
Typed wrappers around parse tree nodes (carbon-language#3534)
Browse files Browse the repository at this point in the history
These are intended to allow the structure of a parse tree node to be
described more precisely in code, to support these use cases:

- Automated checking that the parse tree conforms to the expected
structure. (Added to `Tree::Verify`.)
- Easier reading and understanding of the structure of the parse tree by
toolchain developers. (See `parse/typed_nodes.h`.)
- Easier navigation of the parse tree, for example for tooling uses and
for use when forming diagnostics.

On this last point, an object representing the file may be inspecting
using `Tree::ExtractFile`, as in:
```
auto file = tree->ExtractFile();
for (AnyDeclId decl_id : file.decls) {
  // `decl_id` is convertible to a `NodeId`.
  if (std::optional<FunctionDecl> fn_decl =
      tree->ExtractAs<FunctionDecl>(decl_id)) {
    // fn_decl->params is a `TuplePatternId` (which extends `NodeId`)
    // that is guaranteed to reference a `TuplePattern`.
    std::optional<TuplePattern> params = tree->Extract(fn_decl->params);
    // `params` has a value unless there was an error in that node.
  } else if (auto class_def = tree->ExtractAs<ClassDefinition>(decl_id)) {
    // ...
  }
}
```

The `Extract...` functions collect the child nodes into the typed parse
node's fields (internally using a `Tree::SiblingIterator`) for easy
access. However, this is not as fast as directly observing the tree
structure using the postorder strategy being used by the check stage.

These functions rely on using struct reflection on the typed parse node
definitions from `parse/typed_nodes.h` to get the expected structure of
child nodes and then populate them.

Note that validating these in `Tree::Verify` adds significant cost to
it, and is currently included in the parsing stage. Without this change,
a 10 mloc test case of lex & parse takes 4.129 s ± 0.041 s. With this
change, it takes 5.768 s ± 0.036 s.

This builds upon and completes carbon-language#3393.

Co-authored-by: Richard Smith <[email protected]>

---------

Co-authored-by: Richard Smith <[email protected]>
Co-authored-by: Chandler Carruth <[email protected]>
  • Loading branch information
3 people authored Dec 22, 2023
1 parent fe24ebc commit 2e97f27
Show file tree
Hide file tree
Showing 38 changed files with 1,865 additions and 76 deletions.
16 changes: 14 additions & 2 deletions common/struct_reflection.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
// - Only simple aggregate structs are supported. Types with base classes,
// non-public data members, constructors, or virtual functions are not
// supported.
// - Structs with more than 5 fields are not supported. This limit is easy to
// - Structs with more than 6 fields are not supported. This limit is easy to
// increase if needed, but removing it entirely is hard.
// - Structs containing a reference to the same type are not supported.

Expand Down Expand Up @@ -75,7 +75,8 @@ constexpr auto CountFields() -> int {
if constexpr (CanListInitialize<T, Fields...>(0)) {
return CountFields<T, true, Fields..., AnyField<T>>();
} else if constexpr (AnyWorkedSoFar) {
static_assert(sizeof...(Fields) <= 5,
// Note: Compare against the maximum number of fields supported *PLUS 1*.
static_assert(sizeof...(Fields) <= 7,
"Unsupported: too many fields in struct");
return sizeof...(Fields) - 1;
} else if constexpr (sizeof...(Fields) > 32) {
Expand Down Expand Up @@ -150,6 +151,17 @@ struct FieldAccessor<5> {
}
};

template <>
struct FieldAccessor<6> {
template <typename T>
static auto Get(T& value) -> auto {
auto& [field0, field1, field2, field3, field4, field5] = value;
return std::tuple<decltype(field0), decltype(field1), decltype(field2),
decltype(field3), decltype(field4), decltype(field5)>(
field0, field1, field2, field3, field4, field5);
}
};

} // namespace Internal

// Get the fields of the struct `T` as a tuple.
Expand Down
23 changes: 22 additions & 1 deletion common/struct_reflection_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,15 @@ struct TwoFields {
int y;
};

struct SixFields {
int one;
int two;
int three;
int four;
int five;
int six;
};

struct ReferenceField {
int& ref;
};
Expand Down Expand Up @@ -60,6 +69,7 @@ TEST(StructReflectionTest, CountFields) {
static_assert(Internal::CountFields<ZeroFields>() == 0);
static_assert(Internal::CountFields<OneField>() == 1);
static_assert(Internal::CountFields<TwoFields>() == 2);
static_assert(Internal::CountFields<SixFields>() == 6);
static_assert(Internal::CountFields<ReferenceField>() == 1);
static_assert(Internal::CountFields<OneFieldNoDefaultConstructor>() == 1);
}
Expand All @@ -74,12 +84,23 @@ TEST(StructReflectionTest, OneField) {
EXPECT_EQ(std::get<0>(fields), 1);
}

TEST(StructReflectionTest, TwoField) {
TEST(StructReflectionTest, TwoFields) {
std::tuple<int, int> fields = AsTuple(TwoFields{.x = 1, .y = 2});
EXPECT_EQ(std::get<0>(fields), 1);
EXPECT_EQ(std::get<1>(fields), 2);
}

TEST(StructReflectionTest, SixFields) {
std::tuple<int, int, int, int, int, int> fields = AsTuple(SixFields{
.one = 1, .two = 2, .three = 3, .four = 4, .five = 5, .six = 6});
EXPECT_EQ(std::get<0>(fields), 1);
EXPECT_EQ(std::get<1>(fields), 2);
EXPECT_EQ(std::get<2>(fields), 3);
EXPECT_EQ(std::get<3>(fields), 4);
EXPECT_EQ(std::get<4>(fields), 5);
EXPECT_EQ(std::get<5>(fields), 6);
}

TEST(StructReflectionTest, NoDefaultConstructor) {
std::tuple<NoDefaultConstructor, NoDefaultConstructor> fields =
AsTuple(TwoFieldsNoDefaultConstructor{.x = NoDefaultConstructor(1),
Expand Down
1 change: 1 addition & 0 deletions toolchain/check/check.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ static auto ProcessParseNodes(Context& context,
// clang warns on unhandled enum values; clang-tidy is incorrect here.
// NOLINTNEXTLINE(bugprone-switch-missing-default-case)
switch (auto parse_kind = context.parse_tree().node_kind(parse_node)) {
// TODO: Switch to `Parse::Name##Id(parse_node)` here.
#define CARBON_PARSE_NODE_KIND(Name) \
case Parse::NodeKind::Name: { \
if (!Check::Handle##Name(context, parse_node)) { \
Expand Down
2 changes: 1 addition & 1 deletion toolchain/check/decl_state.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ enum class KeywordModifierSet : uint32_t {
};

inline auto operator!(KeywordModifierSet k) -> bool {
return !static_cast<unsigned>(k);
return !static_cast<uint32_t>(k);
}

// State stored for each declaration we are currently in: the kind of
Expand Down
5 changes: 5 additions & 0 deletions toolchain/check/node_stack.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ class NodeStack {
}

// Pops the top of the stack and returns the parse_node.
// TODO: return a parse::NodeIdForKind<RequiredParseKind> instead.
template <const Parse::NodeKind& RequiredParseKind>
auto PopForSoloParseNode() -> Parse::NodeId {
Entry back = PopEntry<SemIR::InstId>();
Expand All @@ -102,6 +103,7 @@ class NodeStack {

// Pops the top of the stack if it is the given kind, and returns the
// parse_node. Otherwise, returns std::nullopt.
// TODO: Return a `Parse::NodeIdForKind<RequiredParseKind>` instead.
template <const Parse::NodeKind& RequiredParseKind>
auto PopForSoloParseNodeIf() -> std::optional<Parse::NodeId> {
if (PeekIs<RequiredParseKind>()) {
Expand Down Expand Up @@ -200,6 +202,9 @@ class NodeStack {
// Pops a name from the top of the stack and returns the ID.
auto PopName() -> SemIR::NameId { return PopNameWithParseNode().second; }

// TODO: Can we add a `Pop<...>` that takes a parse node category? See
// https://github.com/carbon-language/carbon-lang/pull/3534/files#r1432067519

// Pops the top of the stack and returns the ID.
template <const Parse::NodeKind& RequiredParseKind>
auto Pop() -> auto {
Expand Down
26 changes: 25 additions & 1 deletion toolchain/parse/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,34 @@ filegroup(
cc_library(
name = "node_kind",
srcs = ["node_kind.cpp"],
hdrs = ["node_kind.h"],
hdrs = [
"node_ids.h",
"node_kind.h",
"typed_nodes.h",
],
textual_hdrs = ["node_kind.def"],
deps = [
"//common:check",
"//common:enum_base",
"//toolchain/base:index_base",
"//toolchain/lex:token_kind",
"@llvm-project//llvm:Support",
],
)

cc_test(
name = "typed_nodes_test",
size = "small",
srcs = ["typed_nodes_test.cpp"],
deps = [
":node_kind",
":tree",
"//testing/base:gtest_main",
"//toolchain/diagnostics:diagnostic_emitter",
"//toolchain/diagnostics:mocks",
"//toolchain/lex",
"//toolchain/lex:tokenized_buffer",
"@com_google_googletest//:gtest",
],
)

Expand All @@ -38,6 +60,7 @@ cc_library(
srcs = [
"context.cpp",
"context.h",
"extract.cpp",
"tree.cpp",
] +
# Glob handler files to avoid missing any.
Expand All @@ -52,6 +75,7 @@ cc_library(
"//common:check",
"//common:error",
"//common:ostream",
"//common:struct_reflection",
"//common:vlog",
"//toolchain/base:pretty_stack_trace_function",
"//toolchain/base:value_store",
Expand Down
Loading

0 comments on commit 2e97f27

Please sign in to comment.