Skip to content

Commit

Permalink
Migrating from antlr3 to antlr4 (ydb-platform#2977)
Browse files Browse the repository at this point in the history
Co-authored-by: root <[email protected]>
  • Loading branch information
OrlovPavel and root authored Sep 4, 2024
1 parent 085cd3d commit 11433e1
Show file tree
Hide file tree
Showing 50 changed files with 14,126 additions and 1,773 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ __pycache__/
.idea/
.vscode/
.clangd
.antlr/

# KDevelop IDE
*.kdev4
Expand Down
19 changes: 16 additions & 3 deletions ydb/library/yql/parser/lexer_common/ut/hints_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
using namespace NSQLTranslation;
using namespace NSQLTranslationV1;

TSQLHints CollectHints(const TString& query) {
TSQLHints CollectHints(const TString& query, bool antlr4Parser) {
bool ansi = false;
auto lexer = MakeLexer(ansi);
auto lexer = MakeLexer(ansi, antlr4Parser);
UNIT_ASSERT(lexer);
TSQLHints result;
NYql::TIssues issues;
Expand All @@ -27,7 +27,20 @@ TString SerializeHints(const TVector<TSQLHint>& hints) {
Y_UNIT_TEST_SUITE(TLexerHintsTests) {
Y_UNIT_TEST(Basic) {
TString query = "/*+ some() */ SELECT /*+ foo(one) */ --+ bar(two)";
auto hintsWithPos = CollectHints(query);
auto hintsWithPos = CollectHints(query, false);
UNIT_ASSERT(hintsWithPos.size() == 1);
NYql::TPosition pos = hintsWithPos.begin()->first;
TVector<TSQLHint> hints = hintsWithPos.begin()->second;

UNIT_ASSERT_EQUAL(pos.Row, 1);
UNIT_ASSERT_EQUAL(pos.Column, 15);

TStringBuf expected = R"raw("foo":{"one"},"bar":{"two"})raw";
UNIT_ASSERT_NO_DIFF(SerializeHints(hints), expected);
}
Y_UNIT_TEST(Antlr4) {
TString query = "/*+ some() */ SELECT /*+ foo(one) */ --+ bar(two)";
auto hintsWithPos = CollectHints(query, true);
UNIT_ASSERT(hintsWithPos.size() == 1);
NYql::TPosition pos = hintsWithPos.begin()->first;
TVector<TSQLHint> hints = hintsWithPos.begin()->second;
Expand Down
5 changes: 4 additions & 1 deletion ydb/library/yql/parser/proto_ast/gen/jsonpath/ya.make
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ IF (CPP_PROTO)
SET(PROTOBUF_SUFFIX_PATH .pb.h)
SET(LEXER_PARSER_NAMESPACE NALP)


CONFIGURE_FILE(${ARCADIA_ROOT}/ydb/library/yql/parser/proto_ast/org/antlr/codegen/templates/Cpp/Cpp.stg.in ${antlr_templates}/Cpp/Cpp.stg)
CONFIGURE_FILE(${ARCADIA_ROOT}/ydb/library/yql/parser/proto_ast/org/antlr/codegen/templates/protobuf/protobuf.stg.in ${antlr_templates}/protobuf/protobuf.stg)

Expand All @@ -28,6 +27,10 @@ IF (CPP_PROTO)

NO_COMPILER_WARNINGS()

ADDINCL(
GLOBAL contrib/libs/antlr4_cpp_runtime/src
)

INCLUDE(${ARCADIA_ROOT}/ydb/library/yql/parser/proto_ast/org/antlr/codegen/templates/ya.make.incl)

RUN_ANTLR(
Expand Down
4 changes: 4 additions & 0 deletions ydb/library/yql/parser/proto_ast/gen/v0/ya.make
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ NO_COMPILER_WARNINGS()

INCLUDE(${ARCADIA_ROOT}/ydb/library/yql/parser/proto_ast/org/antlr/codegen/templates/ya.make.incl)

ADDINCL(
GLOBAL contrib/libs/antlr4_cpp_runtime/src
)

RUN_ANTLR(
${sql_grammar}
-lib .
Expand Down
4 changes: 4 additions & 0 deletions ydb/library/yql/parser/proto_ast/gen/v1/ya.make
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ ENDIF()

NO_COMPILER_WARNINGS()

ADDINCL(
GLOBAL contrib/libs/antlr4_cpp_runtime/src
)

INCLUDE(${ARCADIA_ROOT}/ydb/library/yql/parser/proto_ast/org/antlr/codegen/templates/ya.make.incl)

RUN_ANTLR(
Expand Down
4 changes: 4 additions & 0 deletions ydb/library/yql/parser/proto_ast/gen/v1_ansi/ya.make
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ ENDIF()

NO_COMPILER_WARNINGS()

ADDINCL(
GLOBAL contrib/libs/antlr4_cpp_runtime/src
)

INCLUDE(${ARCADIA_ROOT}/ydb/library/yql/parser/proto_ast/org/antlr/codegen/templates/ya.make.incl)

RUN_ANTLR(
Expand Down
10 changes: 10 additions & 0 deletions ydb/library/yql/parser/proto_ast/gen/v1_ansi_antlr4/epilogue.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
set(GRAMMAR_STRING_CORE_SINGLE "~([']) | (QUOTE_SINGLE QUOTE_SINGLE)")
set(GRAMMAR_STRING_CORE_DOUBLE "~([\"]) | (QUOTE_DOUBLE QUOTE_DOUBLE)")
set(GRAMMAR_MULTILINE_COMMENT_CORE "MULTILINE_COMMENT | .")

configure_file(
${CMAKE_SOURCE_DIR}/ydb/library/yql/sql/v1/SQLv1Antlr4.g.in
${CMAKE_BINARY_DIR}/ydb/library/yql/parser/proto_ast/gen/v1_ansi_antlr4/SQLv1Antlr4.g
)


52 changes: 52 additions & 0 deletions ydb/library/yql/parser/proto_ast/gen/v1_ansi_antlr4/ya.make
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
LIBRARY()

PEERDIR (
ydb/library/yql/parser/proto_ast/gen/v1_proto_split
)

SET(antlr_output ${ARCADIA_BUILD_ROOT}/${MODDIR})
SET(antlr_templates ${antlr_output}/org/antlr/v4/tool/templates/codegen)
SET(sql_grammar ${antlr_output}/SQLv1Antlr4.g)

SET(ANTLR_PACKAGE_NAME NSQLv1Generated)
SET(PROTOBUF_HEADER_PATH ydb/library/yql/parser/proto_ast/gen/v1_proto_split)
SET(PROTOBUF_SUFFIX_PATH .pb.main.h)

SET(LEXER_PARSER_NAMESPACE NALPAnsiAntlr4)

SET(GRAMMAR_STRING_CORE_SINGLE "\"~([']) | (QUOTE_SINGLE QUOTE_SINGLE)\"")
SET(GRAMMAR_STRING_CORE_DOUBLE "\"~([#DOUBLE_QUOTE#]) | (QUOTE_DOUBLE QUOTE_DOUBLE)\"")
SET(GRAMMAR_MULTILINE_COMMENT_CORE "\"MULTILINE_COMMENT | .\"")

CONFIGURE_FILE(${ARCADIA_ROOT}/ydb/library/yql/parser/proto_ast/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg.in ${antlr_templates}/Cpp/Cpp.stg)
CONFIGURE_FILE(${ARCADIA_ROOT}/ydb/library/yql/parser/proto_ast/org/antlr/v4/tool/templates/codegen/Cpp/Files.stg.in ${antlr_templates}/Cpp/Files.stg)

IF(EXPORT_CMAKE)
MANUAL_GENERATION(${sql_grammar})
ELSE()
CONFIGURE_FILE(${ARCADIA_ROOT}/ydb/library/yql/sql/v1/SQLv1Antlr4.g.in ${sql_grammar})
ENDIF()

NO_COMPILER_WARNINGS()

ADDINCL(
GLOBAL contrib/libs/antlr4_cpp_runtime/src
)

INCLUDE(${ARCADIA_ROOT}/ydb/library/yql/parser/proto_ast/org/antlr/v4/tool/templates/codegen/ya.make.incl)

RUN_ANTLR4(
${sql_grammar}
-no-listener
-package NALPAnsiAntlr4
-lib .
-o ${antlr_output}
IN ${sql_grammar} ${antlr_templates}/Cpp/Cpp.stg ${antlr_templates}/Cpp/Files.stg
OUT SQLv1Antlr4Parser.cpp SQLv1Antlr4Lexer.cpp SQLv1Antlr4Parser.h SQLv1Antlr4Lexer.h
OUTPUT_INCLUDES
${PROTOBUF_HEADER_PATH}/SQLv1Parser.pb.main.h
${STG_INCLUDES}
CWD ${antlr_output}
)

END()
9 changes: 9 additions & 0 deletions ydb/library/yql/parser/proto_ast/gen/v1_antlr4/epilogue.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
set(GRAMMAR_STRING_CORE_SINGLE "~(['\\]) | (BACKSLASH .)")
set(GRAMMAR_STRING_CORE_DOUBLE "~([\"\\]) | (BACKSLASH .)")
set(GRAMMAR_MULTILINE_COMMENT_CORE ".")
configure_file(
${CMAKE_SOURCE_DIR}/ydb/library/yql/sql/v1/SQLv1Antlr4.g.in
${CMAKE_BINARY_DIR}/ydb/library/yql/parser/proto_ast/gen/v1_antlr4/SQLv1Antlr4.g
)


52 changes: 52 additions & 0 deletions ydb/library/yql/parser/proto_ast/gen/v1_antlr4/ya.make
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
LIBRARY()

PEERDIR (
ydb/library/yql/parser/proto_ast/gen/v1_proto_split
)

SET(antlr_output ${ARCADIA_BUILD_ROOT}/${MODDIR})
SET(antlr_templates ${antlr_output}/org/antlr/v4/tool/templates/codegen)
SET(sql_grammar ${antlr_output}/SQLv1Antlr4.g)

SET(ANTLR_PACKAGE_NAME NSQLv1Generated)
SET(PROTOBUF_HEADER_PATH ydb/library/yql/parser/proto_ast/gen/v1_proto_split)
SET(PROTOBUF_SUFFIX_PATH .pb.main.h)

SET(LEXER_PARSER_NAMESPACE NALPDefaultAntlr4)

SET(GRAMMAR_STRING_CORE_SINGLE "\"~(['#BACKSLASH#]) | (BACKSLASH .)\"")
SET(GRAMMAR_STRING_CORE_DOUBLE "\"~([#DOUBLE_QUOTE##BACKSLASH#]) | (BACKSLASH .)\"")
SET(GRAMMAR_MULTILINE_COMMENT_CORE "\".\"")

CONFIGURE_FILE(${ARCADIA_ROOT}/ydb/library/yql/parser/proto_ast/org/antlr/v4/tool/templates/codegen/Cpp/Cpp.stg.in ${antlr_templates}/Cpp/Cpp.stg)
CONFIGURE_FILE(${ARCADIA_ROOT}/ydb/library/yql/parser/proto_ast/org/antlr/v4/tool/templates/codegen/Cpp/Files.stg.in ${antlr_templates}/Cpp/Files.stg)

IF(EXPORT_CMAKE)
MANUAL_GENERATION(${sql_grammar})
ELSE()
CONFIGURE_FILE(${ARCADIA_ROOT}/ydb/library/yql/sql/v1/SQLv1Antlr4.g.in ${sql_grammar})
ENDIF()

NO_COMPILER_WARNINGS()

ADDINCL(
GLOBAL contrib/libs/antlr4_cpp_runtime/src
)

INCLUDE(${ARCADIA_ROOT}/ydb/library/yql/parser/proto_ast/org/antlr/codegen/templates/ya.make.incl)

RUN_ANTLR4(
${sql_grammar}
-no-listener
-package NALPDefaultAntlr4
-lib .
-o ${antlr_output}
IN ${sql_grammar} ${antlr_templates}/Cpp/Cpp.stg ${antlr_templates}/Cpp/Files.stg
OUT SQLv1Antlr4Parser.cpp SQLv1Antlr4Lexer.cpp SQLv1Antlr4Parser.h SQLv1Antlr4Lexer.h
OUTPUT_INCLUDES
${PROTOBUF_HEADER_PATH}/SQLv1Parser.pb.main.h
${STG_INCLUDES}
CWD ${antlr_output}
)

END()
38 changes: 38 additions & 0 deletions ydb/library/yql/parser/proto_ast/gen/v1_proto_antlr4/ya.make.gen
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
PROTO_LIBRARY()

IF (GEN_PROTO)
SET(antlr_output ${ARCADIA_BUILD_ROOT}/${MODDIR})
SET(antlr_templates ${antlr_output}/org/antlr/v4/tool/templates/codegen)
SET(sql_grammar ${antlr_output}/SQLv1Antlr4.g)

SET(ANTLR_PACKAGE_NAME NSQLv1Generated)

SET(GRAMMAR_STRING_CORE_SINGLE "\"~(['#BACKSLASH#]) | (BACKSLASH .)\"")
SET(GRAMMAR_STRING_CORE_DOUBLE "\"~([#DOUBLE_QUOTE##BACKSLASH#]) | (BACKSLASH .)\"")
SET(GRAMMAR_MULTILINE_COMMENT_CORE "\".\"")

CONFIGURE_FILE(${ARCADIA_ROOT}/ydb/library/yql/parser/proto_ast/org/antlr/v4/tool/templates/codegen/Java/Java.stg.in ${antlr_templates}/Java/Java.stg)

IF(EXPORT_CMAKE)
MANUAL_GENERATION(${sql_grammar})
ELSE()
CONFIGURE_FILE(${ARCADIA_ROOT}/ydb/library/yql/sql/v1/SQLv1Antlr4.g.in ${sql_grammar})
ENDIF()

RUN_ANTLR4(
${sql_grammar}
-no-listener
-lib .
-o ${antlr_output}
-Dlanguage=Java
IN ${sql_grammar} ${antlr_templates}/Java/Java.stg
OUT_NOAUTO SQLv1Parser.proto
CWD ${antlr_output}
)
ENDIF()

SRCS(SQLv1Parser.proto)

EXCLUDE_TAGS(GO_PROTO JAVA_PROTO)

END()
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
set(GRAMMAR_STRING_CORE_SINGLE "~(['\\]) | (BACKSLASH .)")
set(GRAMMAR_STRING_CORE_DOUBLE "~([\"\\]) | (BACKSLASH .)")
set(GRAMMAR_MULTILINE_COMMENT_CORE ".")

configure_file(
${CMAKE_SOURCE_DIR}/ydb/library/yql/sql/v1/SQLv1Antlr4.g.in
${CMAKE_BINARY_DIR}/ydb/library/yql/parser/proto_ast/gen/v1_proto_split_antlr4/SQLv1Antlr4.g
)

Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/usr/bin/env bash
set -eux
cp ../v1_proto/ya.make.gen ../v1_proto/ya.make
yag make ../v1_proto --add-result=".h" --add-result=".cc"
rm ../v1_proto/ya.make
python3 ../multiproto.py SQLv1Parser ../v1_proto .

87 changes: 87 additions & 0 deletions ydb/library/yql/parser/proto_ast/gen/v1_proto_split_antlr4/ya.make
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
LIBRARY()

SET(antlr_output ${ARCADIA_BUILD_ROOT}/${MODDIR})
SET(antlr_templates ${antlr_output}/org/antlr/v4/tool/templates/codegen)
SET(sql_grammar ${antlr_output}/SQLv1Antlr4.g)

SET(ANTLR_PACKAGE_NAME NSQLv1Generated)

SET(GRAMMAR_STRING_CORE_SINGLE "\"~(['#BACKSLASH#]) | (BACKSLASH .)\"")
SET(GRAMMAR_STRING_CORE_DOUBLE "\"~([#DOUBLE_QUOTE##BACKSLASH#]) | (BACKSLASH .)\"")
SET(GRAMMAR_MULTILINE_COMMENT_CORE "\".\"")

CONFIGURE_FILE(${ARCADIA_ROOT}/ydb/library/yql/parser/proto_ast/org/antlr/v4/tool/templates/codegen/Java/Java.stg.in ${antlr_templates}/Java/Java.stg)

IF(EXPORT_CMAKE)
MANUAL_GENERATION(${sql_grammar})
ELSE()
CONFIGURE_FILE(${ARCADIA_ROOT}/ydb/library/yql/sql/v1/SQLv1Antlr4.g.in ${sql_grammar})
ENDIF()

RUN_ANTLR4(
${sql_grammar}
-lib .
-no-listener
-o ${antlr_output}
-Dlanguage=Java
IN ${sql_grammar} ${antlr_templates}/Java/Java.stg
OUT_NOAUTO SQLv1Antlr4Parser.proto
CWD ${antlr_output}
)

IF (USE_VANILLA_PROTOC)
SET(PROTOC_PATH contrib/tools/protoc_std)
ELSE()
SET(PROTOC_PATH contrib/tools/protoc/bin)
ENDIF()


RUN_PROGRAM(
$PROTOC_PATH -I=$CURDIR -I=$ARCADIA_ROOT -I=$ARCADIA_BUILD_ROOT -I=$ARCADIA_ROOT/contrib/libs/protobuf/src
--cpp_out=$ARCADIA_BUILD_ROOT --cpp_styleguide_out=$ARCADIA_BUILD_ROOT
--plugin=protoc-gen-cpp_styleguide=contrib/tools/protoc/plugins/cpp_styleguide
SQLv1Antlr4Parser.proto
IN SQLv1Antlr4Parser.proto
TOOL contrib/tools/protoc/plugins/cpp_styleguide
OUT_NOAUTO SQLv1Antlr4Parser.pb.h SQLv1Antlr4Parser.pb.cc
CWD $ARCADIA_BUILD_ROOT
)

RUN_PYTHON3(
${ARCADIA_ROOT}/ydb/library/yql/parser/proto_ast/gen/multiproto.py SQLv1Antlr4Parser
IN SQLv1Antlr4Parser.pb.h
IN SQLv1Antlr4Parser.pb.cc
OUT_NOAUTO
SQLv1Antlr4Parser.pb.code0.cc
SQLv1Antlr4Parser.pb.code1.cc
SQLv1Antlr4Parser.pb.code2.cc
SQLv1Antlr4Parser.pb.code3.cc
SQLv1Antlr4Parser.pb.code4.cc
SQLv1Antlr4Parser.pb.code5.cc
SQLv1Antlr4Parser.pb.code6.cc
SQLv1Antlr4Parser.pb.code7.cc
SQLv1Antlr4Parser.pb.code8.cc
SQLv1Antlr4Parser.pb.code9.cc
SQLv1Antlr4Parser.pb.data.cc
SQLv1Antlr4Parser.pb.classes.h
SQLv1Antlr4Parser.pb.main.h
CWD $ARCADIA_BUILD_ROOT/ydb/library/yql/parser/proto_ast/gen/v1_proto_split_antlr4
)

PEERDIR(contrib/libs/protobuf)

SRCS(
SQLv1Antlr4Parser.pb.code0.cc
SQLv1Antlr4Parser.pb.code1.cc
SQLv1Antlr4Parser.pb.code2.cc
SQLv1Antlr4Parser.pb.code3.cc
SQLv1Antlr4Parser.pb.code4.cc
SQLv1Antlr4Parser.pb.code5.cc
SQLv1Antlr4Parser.pb.code6.cc
SQLv1Antlr4Parser.pb.code7.cc
SQLv1Antlr4Parser.pb.code8.cc
SQLv1Antlr4Parser.pb.code9.cc
SQLv1Antlr4Parser.pb.data.cc
)

END()
2 changes: 2 additions & 0 deletions ydb/library/yql/parser/proto_ast/gen/ya.make
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,6 @@ RECURSE(
v0_proto_split
v1
v1_proto_split
v1_antlr4
v1_proto_split_antlr4
)
Loading

0 comments on commit 11433e1

Please sign in to comment.