From b37f762c0a973c04a755c24157c3894fc0158361 Mon Sep 17 00:00:00 2001 From: Thomas Ubensee <34603111+tomuben@users.noreply.github.com> Date: Wed, 9 Oct 2024 12:16:52 -0300 Subject: [PATCH] #967: Added ctpg script options parser (#454) related to exasol/script-languages-release#967 --- .github/workflows/check_bazel_tests.yml | 2 +- .../base/javacontainer/script_options/BUILD | 2 +- .../script_options/parser_legacy.cc | 2 +- exaudfclient/base/script_options_parser/BUILD | 7 - .../base/script_options_parser/ctpg/BUILD | 8 + .../base/script_options_parser/ctpg/ctpg.hpp | 3434 +++++++++++++++++ .../ctpg/script_option_lines_ctpg.cc | 199 + .../ctpg/script_option_lines_ctpg.h | 58 + .../script_options_parser/ctpg/test/BUILD | 8 + .../ctpg/test/script_option_lines_test.cpp | 225 ++ .../base/script_options_parser/legacy/BUILD | 8 + .../{ => legacy}/script_option_lines.cc | 0 .../{ => legacy}/script_option_lines.h | 0 .../script_options_parser/legacy/test/BUILD | 8 + .../test/script_option_lines_test.cpp | 2 +- .../base/script_options_parser/test/BUILD | 8 - 16 files changed, 3952 insertions(+), 19 deletions(-) create mode 100644 exaudfclient/base/script_options_parser/ctpg/BUILD create mode 100644 exaudfclient/base/script_options_parser/ctpg/ctpg.hpp create mode 100644 exaudfclient/base/script_options_parser/ctpg/script_option_lines_ctpg.cc create mode 100644 exaudfclient/base/script_options_parser/ctpg/script_option_lines_ctpg.h create mode 100644 exaudfclient/base/script_options_parser/ctpg/test/BUILD create mode 100644 exaudfclient/base/script_options_parser/ctpg/test/script_option_lines_test.cpp create mode 100644 exaudfclient/base/script_options_parser/legacy/BUILD rename exaudfclient/base/script_options_parser/{ => legacy}/script_option_lines.cc (100%) rename exaudfclient/base/script_options_parser/{ => legacy}/script_option_lines.h (100%) create mode 100644 exaudfclient/base/script_options_parser/legacy/test/BUILD rename exaudfclient/base/script_options_parser/{ => legacy}/test/script_option_lines_test.cpp (99%) delete mode 100644 exaudfclient/base/script_options_parser/test/BUILD diff --git a/.github/workflows/check_bazel_tests.yml b/.github/workflows/check_bazel_tests.yml index 04632466..def9fd7a 100644 --- a/.github/workflows/check_bazel_tests.yml +++ b/.github/workflows/check_bazel_tests.yml @@ -38,6 +38,6 @@ jobs: working-directory: ./exaudfclient/ - name: Script Options Parser Tests run: | - bazel test //base/script_options_parser/test/... + bazel test //base/script_options_parser/... working-directory: ./exaudfclient/ diff --git a/exaudfclient/base/javacontainer/script_options/BUILD b/exaudfclient/base/javacontainer/script_options/BUILD index cf090fe9..e453ec0b 100644 --- a/exaudfclient/base/javacontainer/script_options/BUILD +++ b/exaudfclient/base/javacontainer/script_options/BUILD @@ -6,6 +6,6 @@ cc_library( hdrs = [":extractor.h", ":parser_legacy.h"], srcs = [":parser.h", ":converter.h", ":converter.cc", ":parser_legacy.cc", ":extractor.cc", ":keywords.h", ":checksum.h", ":checksum.cc"], - deps = ["//base/script_options_parser:script_option_lines_parser", "//base:debug_message_h", + deps = ["//base/script_options_parser/legacy:script_option_lines_parser_legacy", "//base:debug_message_h", "//base/exaudflib:header", "//base/exaudflib:exaudflib-deps", "//base/swig_factory:swig_factory_if"], ) diff --git a/exaudfclient/base/javacontainer/script_options/parser_legacy.cc b/exaudfclient/base/javacontainer/script_options/parser_legacy.cc index bc8e6567..212fa455 100644 --- a/exaudfclient/base/javacontainer/script_options/parser_legacy.cc +++ b/exaudfclient/base/javacontainer/script_options/parser_legacy.cc @@ -1,6 +1,6 @@ #include "base/javacontainer/script_options/parser_legacy.h" #include "base/javacontainer/script_options/checksum.h" -#include "base/script_options_parser/script_option_lines.h" +#include "base/script_options_parser/legacy/script_option_lines.h" #include "base/exaudflib/swig/swig_meta_data.h" #include "base/swig_factory/swig_factory.h" diff --git a/exaudfclient/base/script_options_parser/BUILD b/exaudfclient/base/script_options_parser/BUILD index 39d46a95..ffd0fb0c 100644 --- a/exaudfclient/base/script_options_parser/BUILD +++ b/exaudfclient/base/script_options_parser/BUILD @@ -1,8 +1 @@ package(default_visibility = ["//visibility:public"]) - -cc_library( - name = "script_option_lines_parser", - hdrs = ["script_option_lines.h"], - srcs = ["script_option_lines.cc","script_option_lines.h"], - copts= ["-fno-lto"], -) diff --git a/exaudfclient/base/script_options_parser/ctpg/BUILD b/exaudfclient/base/script_options_parser/ctpg/BUILD new file mode 100644 index 00000000..c2da0d62 --- /dev/null +++ b/exaudfclient/base/script_options_parser/ctpg/BUILD @@ -0,0 +1,8 @@ +package(default_visibility = ["//visibility:public"]) + +cc_library( + name = "script_option_lines_parser_ctpg", + hdrs = ["script_option_lines_ctpg.h"], + srcs = ["script_option_lines_ctpg.cc","ctpg.hpp"], + copts= ["-fno-lto"], +) diff --git a/exaudfclient/base/script_options_parser/ctpg/ctpg.hpp b/exaudfclient/base/script_options_parser/ctpg/ctpg.hpp new file mode 100644 index 00000000..fc6a1237 --- /dev/null +++ b/exaudfclient/base/script_options_parser/ctpg/ctpg.hpp @@ -0,0 +1,3434 @@ +/** + +CTPG Library https://github.com/peter-winter/ctpg + +MIT License + +Copyright (c) 2021 peter-winter + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +**/ + +#ifndef CTPG_H +#define CTPG_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace exaudf_ctpg +{ + +using size_t = std::size_t; +using size8_t = std::uint8_t; +using size16_t = std::uint16_t; +using size32_t = std::uint32_t; + +template +using str_table = const char* [N]; + +constexpr size_t uninitialized = size_t(-1); +constexpr size16_t uninitialized16 = size16_t(-1); +constexpr size32_t uninitialized32 = size32_t(-1); + +namespace meta +{ + template + struct contains_type + {}; + + template + struct unique_type_list : contains_type... + { + using variant_type = std::variant; + + template + struct insert + { + static unique_type_list foo(contains_type*); + static unique_type_list foo(...); + + using type = decltype(foo(std::declval*>())); + }; + + template + typename insert::type operator + (New); + }; + + template + struct unique_types_variant + { + using unique_list = decltype((std::declval>() + ... + std::declval())); + using type = typename unique_list::variant_type; + }; + + template + using unique_types_variant_t = typename unique_types_variant::type; + + template + constexpr int sum_size = (0 + ... + sizeof(T)); + + template + constexpr size_t distinct_values_count = 1 << (sizeof(T) * 8); + + constexpr size_t distinct_chars_count = distinct_values_count; + + template + struct max + { + static const size_t value = std::max(First, max::value); + }; + + template + struct max + { + static const size_t value = X; + }; + + template + constexpr size_t max_v = max::value; + + template + constexpr size_t count_zeros = (0 + ... + (X == 0 ? 1 : 0)); +} + +namespace stdex +{ + template + struct is_cvector_compatible : std::bool_constant && std::is_trivially_destructible_v> + {}; + + template + class cvector + {}; + + template + class cvector::value>> + { + public: + using size_type = std::size_t; + + constexpr cvector() : + the_data{}, current_size(0) + {} + + constexpr cvector(const T& arg, size_t count): + the_data{}, current_size(0) + { + for (size_t i = 0; i < count; ++i) + push_back(arg); + } + + private: + template + struct iterator_base + { + using it_type = Derived; + constexpr it_type* cast() { return static_cast(this); } + constexpr const it_type* cast() const { return static_cast(this); } + + constexpr bool operator == (const it_type& other) const { return cast()->ptr == other.ptr; } + constexpr bool operator != (const it_type& other) const { return cast()->ptr != other.ptr; } + constexpr it_type operator - (size_type amount) const { return it_type{ cast()->ptr - amount }; } + constexpr size_type operator - (const it_type& other) const { return size_type(cast()->ptr - other.ptr); } + constexpr it_type operator + (size_type amount) const { return it_type{ cast()->ptr + amount }; } + constexpr it_type operator ++(int) { it_type it{ cast()->ptr }; ++(cast()->ptr); return it; } + constexpr it_type& operator ++() { ++(cast()->ptr); return *cast(); } + constexpr bool operator > (const it_type& other) const { return cast()->ptr > other.ptr; } + constexpr bool operator < (const it_type& other) const { return cast()->ptr < other.ptr; } + }; + + public: + struct iterator : iterator_base + { + T* ptr; + constexpr iterator(T* ptr) : ptr(ptr) {} + constexpr T& operator *() const { return *ptr; } + }; + + struct const_iterator : iterator_base + { + const T* ptr; + constexpr const_iterator(const T* ptr) : ptr(ptr) {} + constexpr const T& operator *() const { return *ptr; } + }; + + constexpr const T* data() const { return the_data; } + constexpr T* data() { return the_data; } + constexpr size_type size() const { return current_size; } + constexpr bool empty() const { return current_size == 0; } + constexpr void reserve(size_type) const {}; + constexpr const T& operator[](size_type idx) const { return the_data[idx]; } + constexpr T& operator[](size_type idx) { return the_data[idx]; } + constexpr void push_back(const T& v) { the_data[current_size++] = v; } + constexpr void emplace_back(T&& v) { the_data[current_size++] = std::move(v); } + constexpr const T& front() const { return the_data[0]; } + constexpr T& front() { return the_data[0]; } + constexpr T& back() { return the_data[current_size - 1]; } + constexpr const T& back() const { return the_data[current_size - 1]; } + constexpr const_iterator begin() const { return const_iterator(the_data); } + constexpr const_iterator end() const { return const_iterator(the_data + current_size); } + constexpr iterator begin() { return iterator(the_data); } + constexpr iterator end() { return iterator(the_data + current_size); } + constexpr void clear() { current_size = 0; } + constexpr void pop_back() { current_size--; } + constexpr iterator erase(iterator first, iterator last) + { + if (!(first < last)) + return end(); + auto from = first < begin() ? begin() : first; + auto to = last > end() ? end() : last; + size_type diff = to - from; + iterator it = to; + while (!(it == end())) + { + *from = std::move(*it); + ++from; ++it; + } + current_size -= diff; + + return end(); + } + + private: + T the_data[N]; + size_type current_size; + }; + + template + class cbitset + { + public: + using size_type = std::size_t; + using underlying_type = std::uint64_t; + + constexpr cbitset& set(size_type idx) + { + check_idx(idx); + data[idx / underlying_size] |= (underlying_type(1) << (idx % underlying_size)); + return *this; + } + + constexpr cbitset& set(size_type idx, bool value) + { + check_idx(idx); + data[idx / underlying_size] ^= (-!!value ^ data[idx / underlying_size]) & (underlying_type(1) << (idx % underlying_size)); + return *this; + } + + constexpr cbitset& reset(size_type idx) + { + check_idx(idx); + data[idx / underlying_size] &= ~(underlying_type(1) << (idx % underlying_size)); + return *this; + } + + constexpr cbitset& flip(size_type idx) + { + check_idx(idx); + data[idx / underlying_size] ^= (underlying_type(1) << (idx % underlying_size)); + return *this; + } + + constexpr bool test(size_type idx) const + { + check_idx(idx); + return (data[idx / underlying_size] >> (idx % underlying_size)) & underlying_type(1); + } + + constexpr cbitset& flip() + { + for (auto& d : data) + d = ~d; + return *this; + } + + constexpr cbitset& set() + { + for (auto& d : data) + d = underlying_type(-1); + return *this; + } + + constexpr cbitset& reset() + { + for (auto& d : data) + d = underlying_type(0); + return *this; + } + + constexpr size_type size() const + { + return N; + } + + private: + constexpr void check_idx(size_type idx) const + { + if (idx >= N) + throw std::runtime_error("Index access out of range"); + } + + static const size_type underlying_size = sizeof(underlying_type) * 8; + static const size_type underlying_count = (N / underlying_size) + ((N % underlying_size) ? 1 : 0); + underlying_type data[underlying_count] = {}; + }; + + template + struct is_cqueue_compatible : std::bool_constant && std::is_trivially_destructible_v> + {}; + + template + class cqueue + {}; + + template + class cqueue::value>> + { + public: + constexpr cqueue(): + data{}, start{0}, end{0}, size_{0} + {} + + constexpr void push(const T& v) + { + if (size_ >= N) + throw std::runtime_error("Pushing out of range"); + + data[end++] = v; + if (end == N) + end = 0; + size_++; + } + + constexpr void pop() + { + if (empty()) + throw std::runtime_error("Pop on empty"); + + start++; + if (start == N) + start = 0; + size_--; + } + + constexpr T& top() + { + if (empty()) + throw std::runtime_error("Top on empty"); + return data[start]; + } + + constexpr const T& top() const + { + return std::as_const(*this).top(); + } + + constexpr bool empty() const + { + return size_ == 0; + } + + constexpr std::size_t size() const + { + return size_; + } + + private: + T data[N]; + std::size_t start; + std::size_t end; + std::size_t size_; + }; + + template + constexpr Container& sort(Container& c, Pred p) + { + bool swap = true; + while (swap) + { + swap = false; + for (auto i = 0u; i < std::size(c) - 1; i++) + { + if (p(c[i + 1], c[i])) + { + auto x = c[i]; + c[i] = c[i + 1]; + c[i + 1] = x; + swap = true; + } + } + } + return c; + } +} + +namespace utils +{ + template + constexpr T construct_default(Args...) + { + return T(); + } + + template + constexpr void copy_array(T *a1, const T* a2, std::index_sequence) + { + (void(a1[I] = a2[I]), ...); + } + + constexpr size_t char_to_idx(char c) + { + return static_cast(static_cast(c)) & 0xff; + } + + constexpr char idx_to_char(size_t idx) + { + return static_cast(static_cast(idx & 0xff)); + } + + constexpr bool is_printable(char c) + { + return c >= 0x20 && c <= 0x7e; + } + + constexpr bool is_hex_digit(char c) + { + return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); + } + + constexpr bool is_dec_digit(char c) + { + return c >= '0' && c <= '9'; + } + + class char_names + { + public: + const static size_t name_size = 5; + + constexpr char_names() + { + for (size_t i = 0; i < meta::distinct_chars_count; ++i) + { + char d[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'}; + if (idx_to_char(i) > 32 && idx_to_char(i) < 127) + { + arr[i][0] = idx_to_char(i); + arr[i][1] = 0; + } + else + { + arr[i][0] = '\\'; + arr[i][1] = 'x'; + arr[i][2] = d[i / 16]; + arr[i][3] = d[i % 16]; + arr[i][4] = 0; + } + } + } + + constexpr const char* name(char c) const { return arr[char_to_idx(c)]; } + + private: + char arr[meta::distinct_chars_count][name_size] = {}; + }; + + constexpr char_names c_names = {}; + + constexpr bool str_equal(const char* str1, const char* str2) + { + if ((str1 == nullptr) || (str2 == nullptr)) + throw std::runtime_error("null pointers not allowed here"); + while (*str1 == *str2) + { + if (*str1 == 0) + return true; + str1++; str2++; + } + return false; + } + + template + constexpr size_t find_str(const str_table& table, const char* str) + { + size_t res = 0; + for (const auto& n : table) + { + if (str_equal(n, str)) + return res; + res++; + } + if (res == N) + throw std::runtime_error("string not found"); + return uninitialized; + } + + constexpr size_t find_char(char c, const char* str) + { + size_t i = 0; + while (*str) + { + if (*str == c) + return i; + str++; i++; + } + return uninitialized; + } + + constexpr std::size_t str_len(const char* str) + { + std::size_t i = 0; + const char* p = str; + while (*p) { ++i; ++p; } + return i; + } + + struct slice + { + size32_t start; + size32_t n; + }; + + template + struct fake_table + { + T operator[](size_t) const { return val; } + T val; + }; +} + +namespace ftors +{ + template + struct ignore + { + template + constexpr ignore(T&&){} + }; + + template> + class element + {}; + + template + class element> + { + public: + template + constexpr decltype(auto) operator ()(ignore..., First&& arg, Rest&&...) const + { + return std::forward(arg); + } + }; + + constexpr element<1> _e1; + constexpr element<2> _e2; + constexpr element<3> _e3; + constexpr element<4> _e4; + constexpr element<5> _e5; + constexpr element<6> _e6; + constexpr element<7> _e7; + constexpr element<8> _e8; + constexpr element<9> _e9; + + template + class val + { + public: + constexpr val(T&& v): + v(std::forward(v)) + {} + + template + constexpr auto operator ()(Args&&...) const + { + return v; + } + + private: + T v; + }; + + template + val(T&&) -> val>; + + template + class create + { + public: + template + constexpr auto operator ()(Args&&...) const + { + return T{}; + } + }; + + template> + struct construct + {}; + + template< + typename T, + std::size_t FromIdx, + std::size_t... Skip + > + struct construct< + T, + FromIdx, + std::index_sequence> + { + template + constexpr auto operator()(ignore..., Arg&& arg, Rest&&...) const + { + return T{std::forward(arg)}; + } + }; + + template< + std::size_t ContIdx = 1, + std::size_t ArgIdx = 2, + typename = std::make_index_sequence, + typename = std::make_index_sequence, + bool container_first = ContIdx < ArgIdx + > + struct emplace_back + {}; + + template< + std::size_t ContIdx, + std::size_t ArgIdx, + std::size_t... Skip1, + std::size_t... Skip2 + > + struct emplace_back< + ContIdx, + ArgIdx, + std::index_sequence, + std::index_sequence, + true> + { + template + constexpr decltype(auto) operator()(ignore..., Container &&container, ignore..., Arg&& arg, Rest&&...) const + { + container.emplace_back(std::move(arg)); + return std::move(container); + } + }; + + template< + std::size_t ContIdx, + std::size_t ArgIdx, + std::size_t... Skip1, + std::size_t... Skip2 + > + struct emplace_back< + ContIdx, + ArgIdx, + std::index_sequence, + std::index_sequence, + false> + { + template + constexpr decltype(auto) operator()(ignore..., Arg&& arg, ignore..., Container &&container, Rest&&...) const + { + container.emplace_back(std::move(arg)); + return std::move(container); + } + }; + + template< + std::size_t ContIdx = 1, + std::size_t ArgIdx = 2, + typename = std::make_index_sequence, + typename = std::make_index_sequence, + bool container_first = ContIdx < ArgIdx + > + struct push_back + {}; + + template< + std::size_t ContIdx, + std::size_t ArgIdx, + std::size_t... Skip1, + std::size_t... Skip2 + > + struct push_back< + ContIdx, + ArgIdx, + std::index_sequence, + std::index_sequence, + true> + { + template + constexpr decltype(auto) operator()(ignore..., Container &&container, ignore..., const Arg& arg, Rest&&...) const + { + container.push_back(arg); + return std::move(container); + } + }; + + template< + std::size_t ContIdx, + std::size_t ArgIdx, + std::size_t... Skip1, + std::size_t... Skip2 + > + struct push_back< + ContIdx, + ArgIdx, + std::index_sequence, + std::index_sequence, + false> + { + template + constexpr decltype(auto) operator()(ignore..., const Arg& arg, ignore..., Container &&container, Rest&&...) const + { + container.push_back(arg); + return std::move(container); + } + }; +} + +namespace buffers +{ + template + class cstring_buffer + { + public: + template + constexpr cstring_buffer(const char(&source)[N1]) + { + utils::copy_array(data, source, std::make_index_sequence{}); + } + + struct iterator + { + const char* ptr; + + constexpr char operator *() const { return *ptr; } + constexpr iterator& operator ++() { ++ptr; return *this; } + constexpr iterator operator ++(int) { iterator i(*this); ++ptr; return i; } + constexpr bool operator == (const iterator& other) const { return ptr == other.ptr; } + constexpr bool operator != (const iterator& other) const { return ptr != other.ptr; } + constexpr iterator& operator += (size_t len) { ptr += len; return *this; } + }; + + constexpr iterator begin() const { return iterator{ data }; } + constexpr iterator end() const { return iterator{ data + N - 1 }; } + constexpr std::string_view get_view(iterator start, iterator end) const { return std::string_view(start.ptr, end.ptr - start.ptr); } + + private: + char data[N] = { 0 }; + }; + + template + cstring_buffer(const char(&)[N])->cstring_buffer; + + class string_buffer + { + public: + string_buffer(std::string&& str): + str(std::move(str)) + {} + + string_buffer(const char* str): + str(str) + {} + + auto begin() const { return str.cbegin(); } + auto end() const { return str.cend(); } + + using iterator = std::string::const_iterator; + + std::string_view get_view(iterator start, iterator end) const + { + return std::string_view(str.data() + (start - str.begin()), end - start); + } + + private: + std::string str; + }; + + template + using iterator_t = typename Buffer::iterator; +} + +template +class nterm +{ +public: + using value_type = ValueType; + + constexpr nterm(const char* name) : + name(name) + { + if (name[0] == 0) + throw std::runtime_error("empty name not allowed"); + } + + constexpr const char* get_name() const { return name; } + + template + constexpr auto operator()(Args&&... args) const; + +private: + const char* name; +}; + +enum class associativity { no_assoc, ltor, rtol }; + +struct source_point +{ + size32_t line = 1; + size32_t column = 1; + size_t global = 0; + + template + constexpr void update(Iterator start, Iterator end) + { + while (!(start == end)) + { + if (*start == '\n') + { + ++line; + column = 1; + } + else + ++column; + ++global; + ++start; + } + } + + friend std::ostream& operator << (std::ostream& o, const source_point& sp); +}; + +inline std::ostream& operator << (std::ostream& o, const source_point& sp) +{ + o << "[" << sp.line << ":" << sp.column << "]"; + return o; +} + +template +class term_value +{ +public: + constexpr term_value(VT v, source_point sp): + value(v), sp{ sp } + {} + + constexpr operator VT() const { return value; } + constexpr size32_t get_line() const { return sp.line; } + constexpr size32_t get_column() const { return sp.column; } + constexpr const VT& get_value() const { return value; } + constexpr source_point get_sp() const { return sp; } + +private: + VT value; + source_point sp; +}; + +class term +{ +public: + constexpr term(int precedence = 0, associativity a = associativity::no_assoc) : + precedence(precedence), ass(a) + {} + + constexpr associativity get_associativity() const { return ass; } + constexpr int get_precedence() const { return precedence; } + +protected: + int precedence; + associativity ass; +}; + +namespace detail +{ + constexpr std::string_view pass_sv(const std::string_view& sv) { return sv; } + constexpr char first_sv_char(const std::string_view& sv) { return sv[0]; } +} + +class char_term : public term +{ +public: + using internal_value_type = char; + + static const size_t dfa_size = 2; + static const bool is_trivial = true; + + constexpr char_term(char c, int precedence = 0, associativity a = associativity::no_assoc): + term(precedence, a), c(c) + { + utils::copy_array(id, utils::c_names.name(c), std::make_index_sequence{}); + } + + constexpr const char* get_id() const { return id; } + constexpr const char* get_name() const { return get_id(); } + constexpr char get_char() const { return c; } + constexpr char get_data() const { return c; } + + constexpr const auto& get_ftor() const { return detail::first_sv_char; } + +private: + char c; + char id[utils::char_names::name_size] = {}; +}; + +template +class string_term : public term +{ +public: + using internal_value_type = std::string_view; + static const size_t dfa_size = (DataSize - 1) * 2; + static const bool is_trivial = true; + + constexpr string_term(const char (&str)[DataSize], int precedence = 0, associativity a = associativity::no_assoc): + term(precedence, a) + { + utils::copy_array(data, str, std::make_index_sequence{}); + } + + constexpr const char* get_id() const { return data; } + constexpr const char* get_name() const { return get_id(); } + constexpr const auto& get_data() const { return data; } + + constexpr const auto& get_ftor() const { return detail::pass_sv; } + +private: + char data[DataSize] = {}; +}; + +template +class typed_term +{ +public: + using internal_value_type = std::invoke_result_t; + static const size_t dfa_size = Term::dfa_size; + static const bool is_trivial = Term::is_trivial; + + constexpr typed_term(Term t, Ftor f): + term(t), ftor(f) + {} + + constexpr const char* get_id() const { return term.get_id(); } + constexpr const char* get_name() const { return term.get_name(); } + constexpr decltype(auto) get_data() const { return term.get_data(); } + + constexpr associativity get_associativity() const { return term.get_associativity(); } + constexpr int get_precedence() const { return term.get_precedence(); } + + using ftor_type = Ftor; + + constexpr const ftor_type& get_ftor() const { return ftor; } + +private: + Term term; + ftor_type ftor; +}; + +template +class custom_term : public term +{ +public: + using internal_value_type = std::invoke_result_t; + static const size_t dfa_size = 0; + static const bool is_trivial = false; + + constexpr custom_term(const char* custom_name, Ftor ftor, int precedence = 0, associativity a = associativity::no_assoc): + term(precedence, a), custom_name(custom_name), ftor(ftor) + {} + + constexpr const char* get_id() const { return get_name(); } + constexpr const char* get_name() const { return custom_name; } + + using ftor_type = Ftor; + + constexpr const ftor_type& get_ftor() const { return ftor; } + +private: + const char* custom_name; + Ftor ftor; +}; + +struct error_recovery_token +{ + constexpr static const char* get_name() { return ""; } + constexpr static const char* get_id() { return get_name(); } +}; + +constexpr error_recovery_token error; + +template +struct value_type +{}; + +template +struct value_type>> +{ + using type = term_value; +}; + +template +struct value_type> +{ + using type = ValueType; +}; + +template +struct value_type> +{ + using type = term_value::internal_value_type>; +}; + +struct no_type {}; + +template<> +struct value_type +{ + using type = no_type; +}; + +template +using value_type_t = typename value_type::type; + +struct parse_options +{ + constexpr parse_options& set_verbose(bool val = true) { verbose = val; return *this; } + constexpr parse_options& set_skip_whitespace(bool val = true) { skip_whitespace = val; return *this; } + constexpr parse_options& set_skip_newline(bool val = true) { skip_newline = val; return *this; } + + bool verbose = false; + bool skip_whitespace = true; + bool skip_newline = true; +}; + +struct match_options +{ + bool verbose = false; + constexpr match_options& set_verbose(bool val = true) { verbose = val; return *this; } +}; + +namespace detail +{ + struct no_stream + { + template + constexpr const no_stream& operator <<(T&&) const { return *this; } + }; +} + +template +struct recognized_term +{ + Iterator it; + size16_t term_idx; +}; + +namespace regex +{ + using conflicted_terms = size16_t[4]; + + constexpr void add_conflicted_term(conflicted_terms& ts, size16_t t) + { + for (size_t i = 0; i < 4; ++i) + if (ts[i] == uninitialized16) + { + ts[i] = t; + break; + } + } + + struct dfa_state + { + constexpr dfa_state() + { + for (auto& t : transitions) + t = uninitialized16; + } + + size8_t start_state = 0; + size8_t end_state = 0; + size8_t unreachable = 0; + conflicted_terms conflicted_recognition = { uninitialized16, uninitialized16, uninitialized16, uninitialized16 }; + static const size_t transitions_size = meta::distinct_values_count; + size16_t transitions[transitions_size] = {}; + }; + + struct char_range + { + constexpr char_range(char c): + start(c), end(c) + {} + + constexpr char_range(char c1, char c2): + start(c1), end(c2) + {} + char start; + char end; + }; + + constexpr char hex_digits_to_char(char d1, char d2) + { + auto dd = [](char d) -> char + { + if (d >= 'A' && d <= 'F') + return char(10) + d - 'A'; + else if (d >= 'a' && d <= 'f') + return char(10) + d - 'a'; + else + return d - '0'; + }; + return dd(d1) * 16 + dd(d2); + } + + class char_subset + { + public: + constexpr char_subset() = default; + + template + constexpr char_subset(const char (&str)[N]) + { + for (size_t i = 0; i < N - 1; ++i) // ignore trailing 0, so N - 1 + data.set(utils::char_to_idx(str[i])); + } + + constexpr char_subset(char_range r) + { + add_range(r); + } + + constexpr char_subset& add_range(char_range r) + { + for (size_t i = utils::char_to_idx(r.start); i <= utils::char_to_idx(r.end); ++i) + data.set(i); + return *this; + } + + constexpr bool test(size_t idx) const { return data.test(idx); } + constexpr size_t size() const { return data.size(); } + constexpr char_subset& set() { data.set(); return *this; } + constexpr char_subset& flip() { data.flip(); return *this; } + constexpr char_subset& set(size_t idx) { data.set(idx); return *this; } + + private: + stdex::cbitset> data = {}; + }; + + template + using dfa = stdex::cvector; + + class dfa_size_analyzer + { + public: + using slice = utils::slice; + + constexpr slice primary_char(char) { return prim(); } + constexpr slice primary_subset(char_subset&&) { return prim(); } + + constexpr slice star(slice s) { return s; } + constexpr slice plus(slice s) { return s; } + constexpr slice opt(slice s) { return s; } + constexpr slice cat(slice s1, slice s2) { return add(s1, s2); } + constexpr slice alt(slice s1, slice s2) { return add(s1, s2); } + + constexpr slice rep(slice s, size32_t n) + { + if (n == 0) + return s; + size += (s.n * (n - 1)); + return slice{ s.start, s.n * n }; + } + + private: + constexpr slice prim() { auto old = size; size += 2; return slice{ old, 2 }; } + constexpr slice add(slice s1, slice s2) { return slice{ s1.start, s1.n + s2.n }; } + + size32_t size = 0; + }; + + template + class dfa_builder + { + public: + constexpr dfa_builder(dfa& sm): + sm(sm) + {} + + using slice = utils::slice; + + constexpr dfa_state& transition(dfa_state& from, const char_subset& s) + { + for (size_t i = 0; i < s.size(); ++i) + if (s.test(i)) + { + from.transitions[i] = size16_t(sm.size()); + } + sm.push_back(dfa_state()); + return sm.back(); + } + + constexpr dfa_state& transition(dfa_state& from, char c) + { + from.transitions[utils::char_to_idx(c)] = size16_t(sm.size()); + sm.push_back(dfa_state()); + return sm.back(); + } + + constexpr slice primary_char(char c) + { + return primary_subset(char_subset(char_range(c))); + } + + constexpr slice primary_subset(const char_subset& s) + { + size_t old_size = sm.size(); + sm.push_back(dfa_state()); + sm.back().start_state = 1; + dfa_state& to = transition(sm.back(), s); + to.end_state = 1; + return slice{ size32_t(old_size), 2 }; + } + + constexpr slice star(slice s) + { + size_t b = s.start; + sm[b].end_state = 1; + for (size_t i = s.start; i < s.start + s.n; ++i) + { + if (sm[i].end_state) + merge(i, b); + } + return s; + } + + constexpr slice plus(slice s) + { + size_t b = s.start; + for (size_t i = s.start; i < s.start + s.n; ++i) + { + if (sm[i].end_state) + merge(i, b, true); + } + return s; + } + + constexpr slice opt(slice s) + { + sm[s.start].end_state = 1; + return s; + } + + constexpr slice rep(slice s, size32_t n) + { + if (n == 0) + { + for (size_t j = s.start; j < s.start + s.n; ++j) + { + if (sm[j].start_state) + { + for (auto& t : sm[j].transitions) + t = uninitialized16; + sm[j].end_state = 1; + sm[j].start_state = 0; + } + else + sm[j].unreachable = 1; + } + return s; + } + + for (size_t i = 0; i < n - 1; ++i) + { + for (size_t j = s.start; j < s.start + s.n; ++j) + { + sm.push_back(sm[j]); + auto& st = sm.back(); + for (auto& t : st.transitions) + { + if (t != uninitialized16) + t += size16_t(s.n * (i + 1)); + } + } + } + slice whole = s; + for (size_t i = 0; i < n - 1; ++i) + { + cat(whole, slice{ whole.start + whole.n, s.n }); + whole = slice{ whole.start, whole.n + s.n}; + } + return whole; + } + + constexpr slice cat(slice s1, slice s2) + { + size_t b = s2.start; + for (size_t i = s1.start; i < s1.start + s1.n; ++i) + { + if (sm[i].end_state) + merge(i, b, false, true); + } + return slice{ s1.start, s1.n + s2.n }; + } + + constexpr slice alt(slice s1, slice s2) + { + size_t b1 = s1.start; + size_t b2 = s2.start; + merge(b1, b2, true, true); + return slice{ s1.start, s1.n + s2.n }; + } + + constexpr void mark_end_states(slice s, size16_t idx) + { + for (size_t i = s.start; i < s.start + s.n; ++i) + { + mark_end_state(sm[i], idx); + } + } + + constexpr size_t size() const { return sm.size(); } + private: + constexpr void merge(size_t to, size_t from, bool keep_end_state = false, bool mark_from_as_unreachable = false) + { + if (to == from) + return; + dfa_state& s_from = sm[from]; + dfa_state& s_to = sm[to]; + s_from.start_state = 0; + if (keep_end_state) + s_to.end_state = s_to.end_state || s_from.end_state; + else + s_to.end_state = s_from.end_state; + + s_from.unreachable = mark_from_as_unreachable ? 1 : 0; + + for (size_t i = 0; i < dfa_state::transitions_size; ++i) + { + size16_t& tr_from = s_from.transitions[i]; + if (tr_from == uninitialized16) + continue; + size16_t& tr_to = s_to.transitions[i]; + if (tr_to == uninitialized16) + { + tr_to = tr_from; + sm[tr_to].unreachable = 0; + } + else + merge(tr_to, tr_from, keep_end_state, mark_from_as_unreachable); + } + + auto& cr = s_from.conflicted_recognition; + for (size_t j = 0; j < 4; ++j) + { + size16_t term_idx = cr[j]; + if (term_idx != uninitialized16) + mark_end_state(s_to, term_idx); + else + break; + } + } + + constexpr void mark_end_state(dfa_state& s, size16_t idx) + { + if (!s.end_state) + return; + add_conflicted_term(s.conflicted_recognition, idx); + } + + dfa& sm; + }; + + template + struct regex_pattern_data + { + const char (&pattern)[N]; + }; + + template + constexpr void add_term_data_to_dfa(char c, dfa_builder& b, const Parser&, size16_t idx) + { + using slice = utils::slice; + + slice prev{0, size32_t(b.size())}; + slice new_sl = b.primary_char(c); + b.mark_end_states(new_sl, idx); + b.alt(prev, new_sl); + } + + template + constexpr void add_term_data_to_dfa(const char (&str)[DataSize], dfa_builder& b, const Parser&, size16_t idx) + { + using slice = utils::slice; + slice prev{0, size32_t(b.size())}; + + slice whole = b.primary_char(str[0]); + for (size_t i = 1; i < DataSize - 1; ++i) + { + slice char_sl = b.primary_char(str[i]); + whole = b.cat(whole, char_sl); + } + + b.mark_end_states(whole, idx); + b.alt(prev, whole); + } + + template + constexpr void add_term_data_to_dfa( + const regex_pattern_data& pattern_data, + dfa_builder& b, + const Parser& p, + size16_t idx) + { + using slice = utils::slice; + detail::no_stream s{}; + std::optional res = p.parse( + parse_options{}.set_skip_whitespace(false), + buffers::cstring_buffer(pattern_data.pattern), + s + ); + + if (res.has_value()) + { + slice prev{0, size32_t(b.size())}; + b.mark_end_states(res.value(), idx); + b.alt(prev, res.value()); + } + else + throw std::runtime_error("Regex parse error"); + } + + template + constexpr auto dfa_match( + const dfa& sm, + match_options options, + source_point sp, + Iterator start, + Iterator end, + ErrorStream& error_stream) + { + size16_t state_idx = 0; + Iterator it_prev = start; + recognized_term rt{ start, uninitialized16 }; + while (true) + { + const dfa_state& state = sm[state_idx]; + size16_t rec_idx = state.conflicted_recognition[0]; + if (rec_idx != uninitialized16) + { + rt.it = start; + rt.term_idx = rec_idx; + + if (options.verbose) + { + error_stream << sp << " REGEX MATCH: Recognized " << rec_idx << "\n"; + } + } + sp.update(it_prev, start); + + if (start == end) + break; + + size16_t tr = state.transitions[utils::char_to_idx(*start)]; + if (tr == uninitialized16) + { + if (rt.term_idx == uninitialized16) + rt.it = start; + break; + } + + state_idx = tr; + + if (options.verbose) + { + error_stream << sp << " REGEX MATCH: Current char " << utils::c_names.name(*start) << "\n"; + error_stream << sp << " REGEX MATCH: New state " << state_idx << "\n"; + } + it_prev = start; + ++start; + } + return rt; + } + + template + constexpr void write_dfa_state_diag_str(const dfa_state& st, Stream& s, size16_t idx, const StrTable& term_names) + { + s << "STATE " << idx; + if (st.unreachable) + { + s << " (unreachable) \n"; + return; + } + + if (st.end_state) + s << " recognized "; + size16_t term_idx = st.conflicted_recognition[0]; + if (term_idx != uninitialized16) + s << term_names[term_idx]; + s << " "; + + auto f_range = [&s](const auto& r, size16_t state_idx) + { + if (r.size() > 2) + { + s << "["; + s << utils::c_names.name(r.front()); + s << " - "; + s << utils::c_names.name(r.back()); + s << "] -> " << state_idx << " "; + } + else + { + for (char c : r) + { + s << utils::c_names.name(c); + s << " -> " << state_idx << " "; + } + } + }; + + stdex::cvector tmp; + size16_t prev = uninitialized16; + for (size_t i = 0; i < dfa_state::transitions_size + 1; ++i) + { + size16_t to = (i == dfa_state::transitions_size ? uninitialized16 : st.transitions[i]); + if (to == prev && to != uninitialized16) + tmp.push_back(utils::idx_to_char(i)); + else + { + if (prev != uninitialized16) + { + f_range(tmp, prev); + tmp.clear(); + } + if (to != uninitialized16) + tmp.push_back(utils::idx_to_char(i)); + } + prev = to; + } + s << "\n"; + } + + template + constexpr void write_dfa_diag_str(const dfa& sm, Stream& stream, const StrTable& term_names) + { + for (size16_t i = 0; i < sm.size(); ++i) + write_dfa_state_diag_str(sm[i], stream, i, term_names); + } + + template + constexpr void write_dfa_diag_str(const dfa& sm, Stream& stream) + { + write_dfa_diag_str(sm, stream, utils::fake_table{""}); + } + + template + constexpr auto create_regex_parser(Builder& b); +} + +namespace detail +{ + template + constexpr decltype(auto) make_term(Arg&& arg) + { + return std::forward(arg); + } + + constexpr auto make_term(char c) + { + return char_term(c); + } + + template + constexpr auto make_term(const char (&str)[N]) + { + return string_term(str); + } + + template + struct fake_root + { + using value_type = ValueType; + + constexpr auto operator()(const nterm& nt) const; + + constexpr static const char* get_name() { return "##"; }; + }; + + struct eof + { + constexpr static const char* get_name() { return ""; } + }; + + template + class rule + { + public: + using f_type = F; + static const size_t n = sizeof...(R); + + constexpr rule(L l, std::tuple r) : + f(nullptr), l(l), r(r), precedence(0) + {} + + template + constexpr rule(F1&& f, L l, std::tuple r) : + f(std::move(f)), l(l), r(r), precedence(0) + {} + + template + constexpr rule(F1&& f, L l, std::tuple r, int precedence) : + f(std::move(f)), l(l), r(r), precedence(precedence) + {} + + constexpr auto operator[](int prec) + { + return rule(std::move(f), l, r, prec); + } + + template + constexpr auto operator >= (F1&& f) + { + return rule, L, R...>(std::move(f), l, r, precedence); + } + + template + constexpr auto operator >>= (F1&& f) + { + return rule, L, R...>(std::move(f), l, r, precedence); + } + + constexpr const F& get_f() const { return f; } + constexpr const L& get_l() const { return l; } + constexpr const auto& get_r() const { return r; } + constexpr int get_precedence() const { return precedence; } + + private: + F f; + L l; + std::tuple r; + int precedence; + }; + + template + rule(L l, std::tuple r) -> rule; + + template + constexpr auto make_rule_item(Arg&& arg) + { + return make_term(arg); + } + + template + constexpr auto make_rule_item(const nterm& nt) + { + return nt; + } + + template + constexpr auto fake_root::operator()(const nterm& nt) const + { + return rule(*this, std::make_tuple(nt)); + } +} + +template +template +constexpr auto nterm::operator()(Args&&... args) const +{ + return detail::rule( + *this, + std::make_tuple(detail::make_rule_item(args)...) + ); +} + +namespace detail +{ + constexpr size_t value_stack_initial_capacity = 1 << 10; + constexpr size_t cursor_stack_initial_capacity = 1 << 10; + + template + struct value_reductors + { + constexpr value_reductors(const RuleTupleType& rule_tuple): rule_tuple(rule_tuple) { + init_reductors(rule_tuple, std::make_index_sequence>{}); + } + + constexpr ValueVariantType invoke(Context&& context, size_t i, ValueVariantType* args) const { + return reductors[i](std::forward(context), rule_tuple, args); + } + + private: + template + constexpr void init_reductors(const RuleTupleType& rule_tuple, std::index_sequence) + { + (void(init_nth_reductor(std::get(rule_tuple))), ...); + } + + template + constexpr void init_nth_reductor(const detail::rule&) + { + reductors[Nr] = &reduce_value, value_type_t...>; + } + + template + constexpr static LValueType reduce_value_impl([[maybe_unused]] Context&& ctx, [[maybe_unused]] const F& f, ValueVariantType* start, std::index_sequence) + { + if constexpr (std::is_same_v) + return LValueType(std::get(std::move(*(start + I)))...); + else + { + if constexpr (RequiresContext) + return LValueType(f(std::forward(ctx), std::get(std::move(*(start + I)))...)); + else + return LValueType(f(std::get(std::move(*(start + I)))...)); + } + } + + template + constexpr static ValueVariantType reduce_value(Context&& ctx, const RuleTupleType& rules, ValueVariantType* start) + { + return ValueVariantType( + reduce_value_impl( + std::forward(ctx), std::get(rules).get_f(), start, std::index_sequence_for{}) + ); + } + + const RuleTupleType& rule_tuple; + + using value_reductor = ValueVariantType(*)(Context&&, const RuleTupleType&, ValueVariantType*); + value_reductor reductors[RuleCount] = {}; + }; + + template + struct parse_state + { + constexpr parse_state( + CursorStack& cursor_stack, + ValueStack& value_stack, + ErrorStream& error_stream, + parse_options options, + Iterator buffer_begin, + Iterator buffer_end, + const ValueReductors& reductors): + cursor_stack(cursor_stack), + value_stack(value_stack), + error_stream(error_stream), + options(options), + current_sp{1, 1}, + current_it(buffer_begin), + current_term_end_it(buffer_begin), + buffer_end(buffer_end), + reductors(reductors), + current_term_idx(uninitialized16), + recovery_mode(false), + consume_mode(false) + { + cursor_stack.reserve(cursor_stack_initial_capacity); + value_stack.reserve(value_stack_initial_capacity); + } + + constexpr void enter_recovery_mode() { recovery_mode = true; } + constexpr void leave_recovery_mode() { recovery_mode = false; } + constexpr void enter_consume_mode() { consume_mode = true; } + constexpr void leave_consume_mode() { consume_mode = false; } + constexpr bool in_recovery_mode() const { return recovery_mode; } + constexpr bool in_consume_mode() const { return consume_mode; } + + using iterator = Iterator; + + CursorStack& cursor_stack; + ValueStack& value_stack; + ErrorStream& error_stream; + parse_options options; + source_point current_sp; + iterator current_it; + iterator current_term_end_it; + iterator buffer_end; + const ValueReductors& reductors; + size16_t current_term_idx; + bool recovery_mode; + bool consume_mode; + }; + + template + struct parse_table_cursor_stack_type + { + using type = std::vector; + }; + + template + struct parse_table_cursor_stack_type, EmptyRulesCount> + { + using type = stdex::cvector; + }; + + template + using parse_table_cursor_stack_type_t = typename parse_table_cursor_stack_type::type; + + template + struct parser_value_stack_type + { + using type = std::vector; + }; + + template + struct parser_value_stack_type< + buffers::cstring_buffer, + EmptyRulesCount, + ValueVariantType, + std::enable_if_t::value> + > + { + using type = std::vector; + }; + + template + struct parser_value_stack_type< + buffers::cstring_buffer, + EmptyRulesCount, + ValueVariantType, + std::enable_if_t::value> + > + { + using type = stdex::cvector; + }; + + template + using parser_value_stack_type_t = typename parser_value_stack_type::type; +} + + +namespace detail +{ + struct deduce_max_states + { + template + static const size_t value = (0 + ... + (RuleSizes + 1)) * TermCount; + }; +} + +template +struct use_lexer +{ + using type = Lexer; +}; + +struct use_generated_lexer +{ + using type = no_type; +}; + +template +class parser +{}; + +template +class parser< + nterm, + std::tuple, + std::tuple, + std::tuple, + LexerUsage +> +{ + using term_tuple_type = std::tuple; + using nterm_tuple_type = std::tuple; + using rule_tuple_type = std::tuple; + using root_nterm_type = nterm; + using root_value_type = RootValueType; + + static const bool generate_lexer = std::is_same_v; + using lexer_type = typename LexerUsage::type; + +public: + constexpr parser( + root_nterm_type grammar_root, + term_tuple_type terms, + nterm_tuple_type nterms, + rule_tuple_type&& rules, + LexerUsage): + parser(grammar_root, terms, nterms, std::move(rules)) + {} + + constexpr parser( + root_nterm_type grammar_root, + term_tuple_type terms, + nterm_tuple_type nterms, + rule_tuple_type&& rules): + term_tuple(terms), + nterm_tuple(nterms), + rule_tuple(std::move(rules)) + { + auto seq_for_terms = std::make_index_sequence>{}; + analyze_nterms(std::make_index_sequence>{}); + analyze_nterm(detail::fake_root>{}); + analyze_terms(seq_for_terms); + analyze_eof(); + analyze_error_recovery_token(); + analyze_rules(std::make_index_sequence>{}, grammar_root); + analyze_states(); + + create_lexer(seq_for_terms); + } + + template + constexpr std::optional parse(const Buffer& buffer) const + { + detail::no_stream error_stream; + return parse(buffer, error_stream); + } + + template + constexpr std::optional context_parse(Context&& ctx, const Buffer& buffer) const + { + detail::no_stream error_stream; + return context_parse(std::forward(ctx), buffer, error_stream); + } + + template + constexpr std::optional parse(const Buffer& buffer, ErrorStream& error_stream) const + { + return parse(parse_options{}, buffer, error_stream); + } + + template + constexpr std::optional context_parse(Context&& ctx, const Buffer& buffer, ErrorStream& error_stream) const + { + return context_parse(std::forward(ctx), parse_options{}, buffer, error_stream); + } + + template + constexpr std::optional parse(parse_options options, const Buffer& buffer, ErrorStream& error_stream) const + { + return context_parse(no_type{}, options, buffer, error_stream); + } + + template + constexpr std::optional context_parse(Context&& ctx, parse_options options, const Buffer& buffer, ErrorStream& error_stream) const + { + detail::parser_value_stack_type_t value_stack{}; + detail::parse_table_cursor_stack_type_t cursor_stack{}; + + detail::value_reductors reductors(rule_tuple); + detail::parse_state ps(cursor_stack, value_stack, error_stream, options, buffer.begin(), buffer.end(), reductors); + + ps.cursor_stack.push_back(0); + + std::optional root_value; + + while (true) + { + size16_t cursor = ps.cursor_stack.back(); + + auto t_idx = get_current_term(ps); + if (t_idx == uninitialized16) + break; + + const auto& entry = parse_table[cursor][get_parse_table_idx(true, t_idx)]; + + if (entry.kind == parse_table_entry_kind::error) + { + if (ps.in_consume_mode()) + { + if (!consume_term_recovering(ps)) + break; + continue; + } + if (!ps.in_recovery_mode()) + { + syntax_error(ps); + enter_recovery_mode(ps); + } + if (!pop_stacks(ps)) + break; + continue; + } + else + { + if (ps.in_consume_mode()) + leave_consume_mode(ps); + } + + if (entry.kind == parse_table_entry_kind::shift) + { + shift(ps, buffer.get_view(ps.current_it, ps.current_term_end_it), t_idx, entry.shift); + consume_term(ps); + } + else if (entry.kind == parse_table_entry_kind::shift_error_recovery_token) + { + shift_recovery_token(ps, entry.shift); + leave_recovery_mode(ps); + enter_consume_mode(ps); + } + else if (entry.kind == parse_table_entry_kind::reduce) + reduce(std::forward(ctx), ps, entry.reduce); + else if (entry.kind == parse_table_entry_kind::rr_conflict) + rr_conflict(std::forward(ctx), ps, entry.reduce); + else if (entry.kind == parse_table_entry_kind::success) + { + root_value = std::optional(std::move(success(ps))); + break; + } + } + + return root_value; + } + + template + constexpr void write_diag_str(Stream& s) const + { + s << "PARSER" << "\n" << "\n"; + + s << "Parser Object size: " << sizeof(*this) << "\n\nRULES\n\n"; + + for (size16_t i = 0; i < rule_count; ++i) + { + s << i << " "; + write_rule_diag_str(s, i); + s << "\n"; + } + + s << "\nSTATES\n\n"; + for (size16_t i = 0; i < state_count; ++i) + { + write_state_diag_str(s, i); + s << "\n"; + } + + if constexpr (generate_lexer) + { + s << "\n" << "LEXICAL ANALYZER" << "\n" << "\n"; + regex::write_dfa_diag_str(lexer_sm, s, term_names); + } + } + +private: + static const size_t max_rule_element_count = meta::max_v<1, Rules::n...>; + static const size16_t eof_idx = sizeof...(Terms); + static const size16_t error_recovery_token_idx = sizeof...(Terms) + 1; + static const size_t term_count = sizeof...(Terms) + 2; + static const size16_t fake_root_idx = sizeof...(NTerms); + static const size_t nterm_count = sizeof...(NTerms) + 1; + static const size_t root_rule_idx = sizeof...(Rules); + static const size_t rule_count = sizeof...(Rules) + 1; + static const size_t empty_rules_count = meta::count_zeros; + static const size_t situation_size = max_rule_element_count + 1; + static const size_t situation_address_space_size = rule_count * situation_size * term_count; + static const size_t max_states = detail::deduce_max_states::template value; + static const size_t lexer_dfa_size = generate_lexer ? (0 + ... + Terms::dfa_size) : 1; + + using value_variant_type = meta::unique_types_variant_t< + std::nullptr_t, + no_type, + value_type_t..., + value_type_t... + >; + + using term_subset = stdex::cbitset; + using nterm_subset = stdex::cbitset; + using right_side_slice_subset = stdex::cbitset; + using state = stdex::cbitset; + using situation_set = stdex::cbitset; + + struct situation_queue_entry + { + size16_t state_idx; + size32_t idx; + }; + + using situation_queue_t = stdex::cqueue< + situation_queue_entry, + max_states * max_states + >; + + struct symbol + { + constexpr symbol() : + term(false), idx(uninitialized16) + {} + + constexpr symbol(bool term, size16_t idx) : + term(term), idx(idx) + {} + + constexpr size16_t get_parse_table_idx() const + { + return parser::get_parse_table_idx(term, idx); + } + + bool term; + size16_t idx; + }; + + struct situation_info + { + size16_t rule_info_idx; + size16_t after; + size16_t t; + }; + + struct rule_info + { + size16_t l_idx; + size16_t r_idx; + size16_t r_elements; + }; + + enum class parse_table_entry_kind : size16_t { error, success, shift, shift_error_recovery_token, reduce, rr_conflict }; + + constexpr static bool is_shift(parse_table_entry_kind kind) + { + return kind == parse_table_entry_kind::shift || kind == parse_table_entry_kind::shift_error_recovery_token; + } + + struct parse_table_entry + { + parse_table_entry_kind kind = parse_table_entry_kind::error; + size16_t shift = uninitialized16; + size16_t reduce = uninitialized16; + + bool has_shift = false; + bool has_reduce = false; + + constexpr void set_shift(size16_t value) { shift = value; has_shift = true; } + constexpr void set_reduce(size16_t value) { reduce = value; has_reduce = true; } + }; + + constexpr static size16_t get_parse_table_idx(bool term, size16_t idx) + { + return term ? nterm_count + idx : idx; + } + + constexpr void analyze_eof() + { + term_names[eof_idx] = detail::eof::get_name(); + term_ids[eof_idx] = detail::eof::get_name(); + term_precedences[eof_idx] = 0; + term_associativities[eof_idx] = associativity::no_assoc; + } + + constexpr void analyze_error_recovery_token() + { + term_names[error_recovery_token_idx] = error_recovery_token::get_name(); + term_ids[error_recovery_token_idx] = error_recovery_token::get_name(); + term_precedences[error_recovery_token_idx] = 0; + term_associativities[error_recovery_token_idx] = associativity::no_assoc; + } + + template + constexpr void analyze_term(const Term& t) + { + term_precedences[TermIdx] = t.get_precedence(); + term_associativities[TermIdx] = t.get_associativity(); + term_names[TermIdx] = t.get_name(); + term_ids[TermIdx] = t.get_id(); + term_ftors[TermIdx] = string_view_to_term_value; + } + + template + constexpr void analyze_nterm(const nterm& nt, size16_t idx) + { + nterm_names[idx] = nt.get_name(); + } + + template + constexpr void analyze_nterm(detail::fake_root) + { + nterm_names[fake_root_idx] = detail::fake_root::get_name(); + } + + template + constexpr auto make_symbol(const Term& t) const + { + return symbol{ true, size16_t(utils::find_str(term_ids, t.get_id())) }; + } + + template + constexpr auto make_symbol(const nterm& nt) const + { + return symbol{ false, size16_t(utils::find_str(nterm_names, nt.get_name())) }; + } + + template + constexpr void analyze_terms(std::index_sequence) + { + (void(analyze_term(std::get(term_tuple))), ...); + } + + template + constexpr void analyze_nterms(std::index_sequence) + { + (void(analyze_nterm(std::get(nterm_tuple), I)), ...); + } + + template + constexpr void analyze_rules(std::index_sequence, const root_nterm_type& root) + { + (void(analyze_rule(std::get(rule_tuple), std::make_index_sequence{})), ...); + analyze_rule(detail::fake_root>{}(root), std::index_sequence<0>{}); + stdex::sort(rule_infos, [](const auto& ri1, const auto& ri2) { return ri1.l_idx < ri2.l_idx; }); + make_nterm_rule_slices(); + } + + constexpr void make_nterm_rule_slices() + { + size16_t nt = 0; + for (size16_t i = 0u; i < rule_count; ++i) + { + if (nt != rule_infos[i].l_idx) + { + nt = rule_infos[i].l_idx; + nterm_rule_slices[nt].start = i; + nterm_rule_slices[nt].n = 1; + } + else + nterm_rule_slices[nt].n++; + } + } + + constexpr size16_t calculate_rule_last_term(size16_t rule_idx, size16_t rule_size) const + { + for (int i = int(rule_size - 1); i >= 0; --i) + { + const auto& s = right_sides[rule_idx][i]; + if (!s.term) + continue; + return s.idx; + } + return uninitialized16; + } + + constexpr int calculate_rule_precedence(int precedence, size16_t rule_idx) const + { + if (precedence != 0) + return precedence; + size16_t last_term_idx = rule_last_terms[rule_idx]; + if (last_term_idx != uninitialized16) + return term_precedences[last_term_idx]; + return 0; + } + + constexpr associativity calculate_rule_associativity(size16_t rule_idx) const + { + size16_t last_term_idx = rule_last_terms[rule_idx]; + if (last_term_idx != uninitialized16) + return term_associativities[last_term_idx]; + return associativity::no_assoc; + } + + template + constexpr void analyze_rule(const detail::rule& r, std::index_sequence) + { + size16_t l_idx = size16_t(utils::find_str(nterm_names, r.get_l().get_name())); + (void(right_sides[Nr][I] = make_symbol(std::get(r.get_r()))), ...); + constexpr size16_t rule_elements_count = size16_t(sizeof...(R)); + rule_infos[Nr] = { l_idx, size16_t(Nr), rule_elements_count }; + rule_last_terms[Nr] = calculate_rule_last_term(Nr, rule_elements_count); + rule_precedences[Nr] = calculate_rule_precedence(r.get_precedence(), Nr); + rule_associativities[Nr] = calculate_rule_associativity(Nr); + } + + struct state_analyze_info + { + situation_queue_t situation_queue = { }; + stdex::cvector closures[situation_address_space_size] = {}; + stdex::cbitset done_closures = {}; + term_subset situation_first_after[situation_address_space_size] = { }; + situation_set situation_first_after_analyzed = {}; + term_subset right_side_slice_first[situation_size * rule_count] = {}; + right_side_slice_subset right_side_slice_first_analyzed = {}; + nterm_subset nterm_empty = { }; + term_subset nterm_first[nterm_count] = { }; + nterm_subset nterm_empty_analyzed = { }; + nterm_subset nterm_first_analyzed = { }; + }; + + constexpr size32_t make_situation_idx(situation_info a) const + { + return a.rule_info_idx * situation_size * term_count + a.after * term_count + a.t; + } + + constexpr situation_info make_situation_info(size32_t idx) const + { + size16_t t = size16_t(idx % term_count); + idx /= term_count; + size16_t after = size16_t(idx % situation_size); + size16_t rule_info_idx = size16_t(idx / situation_size); + return situation_info{ rule_info_idx, after, t }; + } + + constexpr void add_term_subset(term_subset& dest, const term_subset& source) + { + for (size_t i = 0u; i < term_count; ++i) + dest.set(i, dest.test(i) || source.test(i)); + } + + constexpr const term_subset& make_right_side_slice_first(const rule_info& ri, size_t start, state_analyze_info& sai) + { + size_t right_side_slice_idx = max_rule_element_count * ri.r_idx + start; + auto& res = sai.right_side_slice_first[right_side_slice_idx]; + + if (sai.right_side_slice_first_analyzed.test(right_side_slice_idx)) + return res; + sai.right_side_slice_first_analyzed.set(right_side_slice_idx); + + for (size_t i = start; i < ri.r_elements; ++i) + { + const symbol& s = right_sides[ri.r_idx][i]; + if (s.term) + { + res.set(s.idx); + break; + } + add_term_subset(res, make_nterm_first(s.idx, sai)); + if (!make_nterm_empty(s.idx, sai)) + break; + } + return res; + } + + constexpr const term_subset& make_nterm_first(size16_t nt, state_analyze_info& sai) + { + if (sai.nterm_first_analyzed.test(nt)) + return sai.nterm_first[nt]; + sai.nterm_first_analyzed.set(nt); + + const utils::slice& s = nterm_rule_slices[nt]; + for (size_t i = 0u; i < s.n; ++i) + { + const rule_info& ri = rule_infos[s.start + i]; + add_term_subset(sai.nterm_first[nt], make_right_side_slice_first(ri, 0, sai)); + } + return sai.nterm_first[nt]; + } + + constexpr bool is_right_side_slice_empty(const rule_info& ri, size_t start, state_analyze_info& sai) + { + for (size_t i = start; i < ri.r_elements; ++i) + { + const symbol& s = right_sides[ri.r_idx][i]; + if (s.term) + return false; + if (!make_nterm_empty(s.idx, sai)) + return false; + } + return true; + } + + constexpr bool is_right_side_empty(const rule_info& ri, state_analyze_info& sai) + { + return is_right_side_slice_empty(ri, 0, sai); + } + + constexpr bool make_nterm_empty(size16_t nt, state_analyze_info& sai) + { + if (sai.nterm_empty_analyzed.test(nt)) + return sai.nterm_empty.test(nt); + sai.nterm_empty_analyzed.set(nt); + + const utils::slice& s = nterm_rule_slices[nt]; + for (size_t i = 0u; i < s.n; ++i) + { + if (is_right_side_empty(rule_infos[s.start + i], sai)) + { + return (sai.nterm_empty.set(nt), true); + } + } + return (sai.nterm_empty.reset(nt), false); + } + + constexpr const term_subset& make_situation_first_after(const situation_info& addr, size32_t idx, state_analyze_info& sai) + { + if (sai.situation_first_after_analyzed.test(idx)) + return sai.situation_first_after[idx]; + sai.situation_first_after_analyzed.set(idx); + + const rule_info& ri = rule_infos[addr.rule_info_idx]; + add_term_subset(sai.situation_first_after[idx], make_right_side_slice_first(ri, addr.after + 1, sai)); + if (is_right_side_slice_empty(ri, addr.after + 1, sai)) + sai.situation_first_after[idx].set(addr.t); + return sai.situation_first_after[idx]; + } + + constexpr void analyze_states() + { + situation_info root_situation_info{ root_rule_idx, 0, eof_idx }; + size32_t idx = make_situation_idx(root_situation_info); + state_count = 1; + + state_analyze_info sai; + + add_situation_to_state(0, idx, sai); + + while (!sai.situation_queue.empty()) + { + const auto& entry = sai.situation_queue.top(); + sai.situation_queue.pop(); + + situation_closure(entry.state_idx, entry.idx, sai); + situation_transition(entry.state_idx, entry.idx, sai); + } + } + + constexpr void add_situation_to_state(size16_t state_idx, size32_t idx, state_analyze_info& sai) + { + state& s = states[state_idx]; + if (!s.test(idx)) + { + s.set(idx); + sai.situation_queue.push({ state_idx, idx }); + } + } + + constexpr void situation_closure(size16_t state_idx, size32_t idx, state_analyze_info& sai) + { + if (sai.done_closures.test(idx)) + { + for(auto i = 0u; i < sai.closures[idx].size(); ++i) + { + add_situation_to_state(state_idx, sai.closures[idx][i], sai); + } + return; + } + + sai.done_closures.set(idx); + + situation_info addr = make_situation_info(idx); + const rule_info& ri = rule_infos[addr.rule_info_idx]; + if (addr.after >= ri.r_elements) + return; + + const symbol& s = right_sides[ri.r_idx][addr.after]; + if (!s.term) + { + size16_t nt = s.idx; + const term_subset& first = make_situation_first_after(addr, idx, sai); + const utils::slice& sl = nterm_rule_slices[nt]; + for (auto i = 0u; i < sl.n; ++i) + { + for (size16_t t = 0; t < term_count; ++t) + { + if (first.test(t)) + { + size32_t new_s_idx = make_situation_idx(situation_info{ size16_t(sl.start + i), 0, t }); + add_situation_to_state(state_idx, new_s_idx, sai); + sai.closures[idx].push_back(new_s_idx); + } + } + } + } + } + + constexpr auto solve_conflict(size16_t rule_info_idx, size16_t term_idx) const + { + size16_t rule_idx = rule_infos[rule_info_idx].r_idx; + int r_p = rule_precedences[rule_idx]; + int t_p = term_precedences[term_idx]; + if (r_p > t_p) + return parse_table_entry_kind::reduce; + + if (r_p == t_p) + { + if (rule_associativities[rule_idx] == associativity::ltor) + return parse_table_entry_kind::reduce; + } + return parse_table_entry_kind::shift; + } + + constexpr void situation_transition(size16_t state_idx, size32_t idx, state_analyze_info& sai) + { + situation_info addr = make_situation_info(idx); + const rule_info& ri = rule_infos[addr.rule_info_idx]; + bool reduction = addr.after >= ri.r_elements; + + const auto& s = right_sides[ri.r_idx][addr.after]; + size16_t symbol_idx = reduction ? get_parse_table_idx(true, addr.t) : s.get_parse_table_idx(); + auto& entry = parse_table[state_idx][symbol_idx]; + if (reduction) + { + if (ri.r_idx == root_rule_idx) + { + entry.kind = parse_table_entry_kind::success; + } + else if (entry.has_shift) + { + entry.kind = solve_conflict(addr.rule_info_idx, addr.t); + } + else if (entry.has_reduce) + { + entry.kind = parse_table_entry_kind::rr_conflict; + } + else + { + entry.kind = parse_table_entry_kind::reduce; + } + entry.set_reduce(size16_t(addr.rule_info_idx)); + return; + } + + situation_info new_addr = situation_info{ addr.rule_info_idx, size16_t(addr.after + 1), addr.t }; + size32_t new_idx = make_situation_idx(new_addr); + + if (entry.has_shift) + { + add_situation_to_state(entry.shift, new_idx, sai); + return; + } + + size16_t new_state_idx = uninitialized16; + for (size16_t i = 0; i < state_count; ++i) + { + if (states[i].test(new_idx)) + { + new_state_idx = i; + break; + } + } + if (new_state_idx == uninitialized16) + { + new_state_idx = state_count; + ++state_count; + } + + if (entry.has_reduce) + { + entry.kind = solve_conflict(entry.reduce, s.idx); + } + else + { + entry.kind = parse_table_entry_kind::shift; + }; + entry.set_shift(new_state_idx); + + if (entry.kind == parse_table_entry_kind::shift && s.idx == error_recovery_token_idx) + entry.kind = parse_table_entry_kind::shift_error_recovery_token; + + add_situation_to_state(new_state_idx, new_idx, sai); + } + + constexpr const char* get_symbol_name(const symbol& s) const + { + return s.term ? term_names[s.idx] : nterm_names[s.idx]; + } + + template + constexpr void write_rule_diag_str(Stream& s, size16_t rule_info_idx) const + { + const rule_info& ri = rule_infos[rule_info_idx]; + s << nterm_names[ri.l_idx] << " <- "; + if (ri.r_elements > 0) + s << get_symbol_name(right_sides[ri.r_idx][0]); + for (size_t i = 1u; i < ri.r_elements; ++i) + { + s << " " << get_symbol_name(right_sides[ri.r_idx][i]); + } + } + + template + constexpr void write_situation_diag_str(Stream& s, size32_t idx) const + { + const situation_info addr = make_situation_info(idx); + const rule_info& ri = rule_infos[addr.rule_info_idx]; + s << nterm_names[ri.l_idx] << " <- "; + for (size_t i = 0u; i < addr.after; ++i) + { + s << get_symbol_name(right_sides[ri.r_idx][i]) << " "; + } + s << ". "; + for (size_t i = addr.after; i < ri.r_elements; ++i) + { + s << get_symbol_name(right_sides[ri.r_idx][i]) << " "; + } + s << "==> " << term_names[addr.t]; + } + + template + constexpr void write_state_diag_str(Stream& s, size16_t idx) const + { + s << "STATE " << idx << "\n"; + + for (size32_t i = 0u; i < situation_address_space_size; ++i) + { + if (states[idx].test(i)) + { + write_situation_diag_str(s, i); + s << "\n"; + } + } + + s << "\n"; + + for (size_t i = 0; i < nterm_count; ++i) + { + const auto& entry = parse_table[idx][i]; + if (is_shift(entry.kind)) + s << "On " << nterm_names[i] << " go to " << entry.shift << "\n"; + } + for (size_t i = nterm_count; i < nterm_count + term_count; ++i) + { + const auto& entry = parse_table[idx][i]; + if (entry.kind == parse_table_entry_kind::error) + continue; + + size_t term_idx = i - nterm_count; + s << "On " << term_names[term_idx]; + if (entry.kind == parse_table_entry_kind::success) + s << " success \n"; + else if (entry.kind == parse_table_entry_kind::reduce && entry.has_shift) + s << " shift to " << entry.shift << " S/R CONFLICT, prefer reduce(" << rule_infos[entry.reduce].r_idx << ") over shift\n"; + else if (is_shift(entry.kind) && entry.has_reduce) + s << " shift to " << entry.shift << " S/R CONFLICT, prefer shift over reduce(" << rule_infos[entry.reduce].r_idx << ")\n"; + else if (is_shift(entry.kind)) + s << " shift to " << entry.shift << "\n"; + else if (entry.kind == parse_table_entry_kind::reduce) + s << " reduce using (" << rule_infos[entry.reduce].r_idx << ")\n"; + else if (entry.kind == parse_table_entry_kind::rr_conflict) + s << " R/R CONFLICT - !!! FIX IT !!! \n"; + } + } + + template + constexpr static value_variant_type string_view_to_term_value(const term_tuple_type& term_tuple, const std::string_view& sv, source_point sp) + { + const auto &t = std::get(term_tuple); + using term_value_type = value_type_t>; + return value_variant_type(term_value_type(t.get_ftor()(sv), sp)); + } + + template + constexpr void shift_recovery_token(ParseState& ps, size16_t new_cursor_value) const + { + if (ps.options.verbose) + ps.error_stream << ps.current_sp << " PARSE: Shift to " << new_cursor_value << ", term: " << term_names[error_recovery_token_idx] << "\n"; + ps.cursor_stack.push_back(new_cursor_value); + + ps.value_stack.emplace_back(term_value(no_type{}, ps.current_sp)); + } + + template + constexpr void shift(ParseState& ps, const std::string_view& sv, size16_t term_idx, size16_t new_cursor_value) const + { + if (ps.options.verbose) + ps.error_stream << ps.current_sp << " PARSE: Shift to " << new_cursor_value << ", term: " << sv << "\n"; + ps.cursor_stack.push_back(new_cursor_value); + + const auto& ftor = term_ftors[term_idx]; + ps.value_stack.emplace_back(ftor(term_tuple, sv, ps.current_sp)); + } + + template + constexpr void reduce(Context&& ctx, ParseState& ps, size16_t rule_info_idx) const + { + const auto& ri = rule_infos[rule_info_idx]; + if (ps.options.verbose) + { + ps.error_stream << ps.current_sp << " PARSE: Reduced using rule " << ri.r_idx << " "; + write_rule_diag_str(ps.error_stream, rule_info_idx); + ps.error_stream << "\n"; + } + + ps.cursor_stack.erase(ps.cursor_stack.end() - ri.r_elements, ps.cursor_stack.end()); + size16_t new_cursor_value = parse_table[ps.cursor_stack.back()][ri.l_idx].shift; + + if (ps.options.verbose) + { + ps.error_stream << ps.current_sp << " PARSE: Go to " << new_cursor_value << "\n"; + } + + ps.cursor_stack.push_back(new_cursor_value); + value_variant_type* start = ps.value_stack.data() + ps.value_stack.size() - ri.r_elements; + value_variant_type lvalue(ps.reductors.invoke(std::forward(ctx), ri.r_idx, start)); + ps.value_stack.erase(ps.value_stack.end() - ri.r_elements, ps.value_stack.end()); + ps.value_stack.emplace_back(std::move(lvalue)); + } + + template + constexpr void rr_conflict(Context&& ctx, ParseState& ps, size16_t rule_idx) const + { + if (ps.options.verbose) + { + ps.error_stream << ps.current_sp << " PARSE: R/R conflict encountered \n"; + } + reduce(std::forward(ctx), ps, rule_idx); + } + + template + constexpr bool pop_stacks(ParseState& ps) const + { + ps.cursor_stack.pop_back(); + if (ps.value_stack.size() != 0) + ps.value_stack.pop_back(); + + if (ps.cursor_stack.size() == 0) + { + if (ps.options.verbose) + { + ps.error_stream << ps.current_sp << " PARSE: Could not recover from error \n"; + } + return false; + } + + if (ps.options.verbose) + { + ps.error_stream << ps.current_sp << " PARSE: Recovering to state " << ps.cursor_stack.back() << "\n"; + } + + return true; + } + + template + constexpr void syntax_error(ParseState& ps) const + { + ps.error_stream << ps.current_sp << " PARSE: Syntax error: " << + "Unexpected '" << term_names[ps.current_term_idx] << "'" << "\n"; + } + + template + constexpr root_value_type& success(ParseState& ps) const + { + if (ps.options.verbose) + { + ps.error_stream << ps.current_sp << " PARSE: Success \n"; + } + return std::get(ps.value_stack.front()); + } + + template + constexpr void consume_term(ParseState& ps) const + { + ps.current_sp.update(ps.current_it, ps.current_term_end_it); + ps.current_it = ps.current_term_end_it; + } + + template + constexpr void enter_recovery_mode(ParseState& ps) const + { + if (ps.options.verbose) + { + ps.error_stream << ps.current_sp << " PARSE: Entering recovery mode \n"; + } + ps.enter_recovery_mode(); + } + + template + constexpr void leave_recovery_mode(ParseState& ps) const + { + if (ps.options.verbose) + { + ps.error_stream << ps.current_sp << " PARSE: Leaving recovery mode \n"; + } + ps.leave_recovery_mode(); + } + + template + constexpr void enter_consume_mode(ParseState& ps) const + { + if (ps.options.verbose) + { + ps.error_stream << ps.current_sp << " PARSE: Entering consume mode \n"; + } + ps.enter_consume_mode(); + } + + template + constexpr void leave_consume_mode(ParseState& ps) const + { + if (ps.options.verbose) + { + ps.error_stream << ps.current_sp << " PARSE: Leaving consume mode \n"; + } + ps.leave_consume_mode(); + } + + template + constexpr bool consume_term_recovering(ParseState& ps) const + { + if (ps.current_term_idx == eof_idx) + return false; + if (ps.options.verbose) + { + ps.error_stream << ps.current_sp << " PARSE: Recovery, consuming term " << term_names[ps.current_term_idx] << " \n"; + } + consume_term(ps); + return true; + } + + template + constexpr size16_t get_current_term(ParseState& ps) const + { + if (ps.in_recovery_mode()) + return error_recovery_token_idx; + + if (ps.current_it != ps.current_term_end_it) + return ps.current_term_idx; + + if (ps.options.skip_whitespace) + { + auto after_ws = skip_whitespace(ps); + ps.current_sp.update(ps.current_it, after_ws); + ps.current_it = after_ws; + } + + using res_type = recognized_term; + + if (ps.current_it == ps.buffer_end) + { + ps.current_term_idx = eof_idx; + trace_recognized_term(ps); + return eof_idx; + } + + res_type res{}; + match_options opts; + opts.set_verbose(ps.options.verbose); + + if constexpr (generate_lexer) + { + res = regex::dfa_match(lexer_sm, opts, ps.current_sp, ps.current_it, ps.buffer_end, ps.error_stream); + } + else + { + lexer_type custom_lexer; + res = custom_lexer.match(opts, ps.current_sp, ps.current_it, ps.buffer_end, ps.error_stream); + } + + ps.current_term_end_it = res.it; + ps.current_term_idx = res.term_idx; + + if (ps.current_term_idx == uninitialized16) + { + unexpected_char(ps); + return uninitialized16; + } + else + { + trace_recognized_term(ps); + } + + return ps.current_term_idx; + } + + template + constexpr auto skip_whitespace(ParseState& ps) const + { + constexpr char space_chars_newline[] = { 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x20, 0x00 }; + constexpr char space_chars_no_newline[] = { 0x09, 0x0b, 0x0c, 0x0d, 0x20, 0x00 }; + + const auto space_chars = ps.options.skip_newline + ? space_chars_newline + : space_chars_no_newline; + + auto start = ps.current_it; + while (true) + { + if (start == ps.buffer_end) + break; + if (utils::find_char(*start, space_chars) == uninitialized) + break; + ++start; + } + return start; + } + + template + constexpr void unexpected_char(ParseState& ps) const + { + ps.error_stream << ps.current_sp << " PARSE: Unexpected character: " << *ps.current_it << "\n"; + } + + template + constexpr void trace_recognized_term(ParseState& ps) const + { + if (ps.options.verbose) + ps.error_stream << ps.current_sp << " PARSE: Recognized " << term_names[ps.current_term_idx] << " \n"; + } + + struct no_parser{}; + + template + constexpr void create_lexer(std::index_sequence) + { + if constexpr (generate_lexer) + { + regex::dfa_builder b(lexer_sm); + constexpr bool trivial_lexer = (true && ... && std::tuple_element_t::is_trivial); + if constexpr (trivial_lexer) + (void(regex::add_term_data_to_dfa(std::get(term_tuple).get_data(), b, no_parser{}, size16_t(I))), ...); + else + { + auto p = regex::create_regex_parser(b); + (void(regex::add_term_data_to_dfa(std::get(term_tuple).get_data(), b, p, size16_t(I))), ...); + } + } + } + + str_table term_names = { }; + str_table term_ids = { }; + str_table nterm_names = { }; + symbol right_sides[rule_count][max_rule_element_count] = { }; + rule_info rule_infos[rule_count] = { }; + utils::slice nterm_rule_slices[nterm_count] = { }; + state states[max_states] = { }; + parse_table_entry parse_table[max_states][term_count + nterm_count] = {}; + size16_t state_count = 0; + int term_precedences[term_count] = { }; + associativity term_associativities[term_count] = { }; + int rule_precedences[rule_count] = { }; + associativity rule_associativities[rule_count] = { }; + size16_t rule_last_terms[rule_count] = { }; + term_tuple_type term_tuple; + nterm_tuple_type nterm_tuple; + rule_tuple_type rule_tuple; + + using string_view_to_term_value_t = value_variant_type(*)(const term_tuple_type&, const std::string_view&, source_point); + string_view_to_term_value_t term_ftors[term_count] = {}; + + using dfa_type = regex::dfa; + dfa_type lexer_sm = {}; +}; + +template +parser(Root, Terms, NTerms, Rules&&) -> parser; + +template +parser(Root, Terms, NTerms, Rules&&, LexerUsage) -> parser; + +template +constexpr auto terms(const Terms&... terms) +{ + return std::make_tuple(detail::make_term(terms)...); +} + +template +constexpr auto nterms(NTerms... nterms) +{ + return std::make_tuple(nterms...); +} + +template +constexpr auto rules(Rules&&... rules) +{ + return std::make_tuple(std::move(rules)...); +} + +struct skip +{ + template + constexpr skip(T&&) {} +}; + +namespace regex +{ + class regex_lexer + { + public: + constexpr regex_lexer() + { + //0 1 2 3 4 5 6 7 8 9 + //regex_digit_09 regex_primary * + ? | ( ) { } + + specials[utils::char_to_idx('*')] = 2; + specials[utils::char_to_idx('+')] = 3; + specials[utils::char_to_idx('?')] = 4; + specials[utils::char_to_idx('|')] = 5; + specials[utils::char_to_idx('(')] = 6; + specials[utils::char_to_idx(')')] = 7; + specials[utils::char_to_idx('{')] = 8; + specials[utils::char_to_idx('}')] = 9; + } + + template + constexpr auto match( + match_options options, + source_point sp, + Iterator start, + Iterator end, + ErrorStream& error_stream) + { + if (start == end) + return not_recognized(start); + + char c = *start; + + auto res = [&](size16_t idx, size_t len) + { return recognized(idx, len, options, sp, start, error_stream); }; + + if (specials[utils::char_to_idx(c)] != 0) + return res(specials[utils::char_to_idx(c)], 1); + + if (utils::is_dec_digit(c)) + return res(0, 1); + + size_t len = 0; + bool ok = match_primary(start, end, len); + if (ok) + return res(1, len); + + return not_recognized(start); + } + + private: + size16_t specials[meta::distinct_chars_count] = {}; + + template + constexpr bool match_primary(Iterator start, Iterator end, size_t& len) + { + len = 0; + bool ok = match_escaped(start, end, len); + if (!ok) + return false; + if (len != 0) + return true; + + ok = match_range(start, end, len); + if (!ok) + return false; + if (len != 0) + return true; + + ok = utils::is_printable(*start); + if (ok) + len = 1; + return ok; + } + + template + constexpr bool match_range(Iterator start, Iterator end, size_t& len) + { + char c = *start; + if (c == '[') + { + len = 1; + ++start; + if (start == end) + { + len = 0; + return false; + } + c = *start; + if (c == '^') + { + len = 2; + ++start; + } + if (start == end) + { + len = 0; + return false; + } + while (start != end && *start != ']') + { + size_t item_len = 0; + bool ok = match_range_item(start, end, item_len); + if (!ok) + { + len = 0; + return false; + } + len += item_len; + start += item_len; + } + if (start == end) + { + len = 0; + return false; + } + len++; + } + return true; + } + + template + constexpr bool match_range_item(Iterator start, Iterator end, size_t& len) + { + len = 0; + bool ok = match_escaped(start, end, len); + if (!ok) + return false; + + if (len == 0) + { + ok = utils::is_printable(*start); + if (!ok) + return false; + len = 1; + } + + start += len; + + if (*start == '-') + { + len++; + + ++start; + if (start == end || *start == ']') + return false; + + size_t range_end_len = 0; + bool escaped_ok = match_escaped(start, end, range_end_len); + if (!escaped_ok) + return false; + + if (range_end_len == 0) + { + ok = utils::is_printable(*start); + if (!ok) + return false; + len += 1; + } + else + len += range_end_len; + } + + return ok; + } + + template + constexpr bool match_escaped(Iterator start, Iterator end, size_t& len) + { + char c = *start; + + if (c == '\\') + { + ++start; + if (start == end) + return false; + c = *start; + len = 2; + if (c == 'x') + { + ++start; + if (start == end) + return true; + c = *start; + if (!utils::is_hex_digit(c)) + return true; + ++start; + len = 3; + if (start == end) + return true; + c = *start; + if (!utils::is_hex_digit(c)) + return true; + len = 4; + return true; + } + return utils::is_printable(c); + } + return true; + } + + template + constexpr auto recognized( + size16_t idx, + size_t len, + match_options options, + source_point sp, + Iterator start, + ErrorStream& error_stream) + { + Iterator term_end = start; + if (options.verbose) + error_stream << sp << " LEXER MATCH: Recognized " << idx << " \n"; + term_end += len; + sp.update(start, term_end); + return recognized_term{term_end, idx}; + } + + template + constexpr auto not_recognized(Iterator start) const + { + return recognized_term{start, uninitialized16}; + } + }; + + constexpr char regex_char(std::string_view sv, size_t& len) + { + if (sv[0] == '\\') + { + if (sv[1] == 'x') + { + if (sv.size() == 2 || !utils::is_hex_digit(sv[2])) + { + len = 2; + return 0; + } + else if (sv.size() == 3 || !utils::is_hex_digit(sv[3])) + { + len = 3; + return hex_digits_to_char('0', sv[2]); + } + else + { + len = 4; + return hex_digits_to_char(sv[2], sv[3]); + } + } + else + { + len = 2; + return sv[1]; + } + } + else + { + len = 1; + return sv[0]; + } + } + + constexpr char_subset string_view_to_subset(std::string_view sv) + { + char_subset cs; + + if (sv[0] == '.') + cs.flip(); + else if (sv[0] == '[') + { + bool flip = false; + size_t i = 1; + if (sv[1] == '^') + { + i++; + flip = true; + } + while (sv[i] != ']') + { + size_t len = 0; + char c1 = regex_char(sv.substr(i), len); + i += len; + if (sv[i] == '-') + { + ++i; + char c2 = regex_char(sv.substr(i), len); + cs.add_range(char_range{c1, c2}); + } + else + cs.set(utils::char_to_idx(c1)); + } + if (flip) + cs.flip(); + } + else + { + size_t len = 0; + cs.set(utils::char_to_idx(regex_char(sv, len))); + } + + return cs; + } + + template + constexpr auto create_regex_parser(Builder& b) + { + using slice = utils::slice; + using namespace ftors; + + constexpr nterm expr("expr"); + constexpr nterm alt("alt"); + constexpr nterm concat("concat"); + constexpr nterm q_expr("q_expr"); + constexpr nterm primary("primary"); + constexpr nterm number("number"); + + constexpr custom_term regex_digit_09("regex_digit_09", [](auto sv) { return size32_t(sv[0]) - '0'; }); + constexpr custom_term regex_primary("regex_primary", string_view_to_subset); + + return parser( + expr, + terms(regex_digit_09, regex_primary, '*', '+', '?', '|', '(', ')', '{', '}'), + nterms(expr, alt, concat, q_expr, primary, number), + rules( + number(regex_digit_09), + number(number, regex_digit_09) >= [](size32_t n, size32_t x){ return n * 10 + x; }, + primary(regex_digit_09) >= [&b](size32_t number){ return b.primary_char(char(number + '0')); }, + primary(regex_primary) >= [&b](const auto& s){ return b.primary_subset(s); }, + primary('(', expr, ')') >= _e2, + q_expr(primary), + q_expr(primary, '*') >= [&b](slice s, skip) { return b.star(s); }, + q_expr(primary, '+') >= [&b](slice s, skip) { return b.plus(s); }, + q_expr(primary, '?') >= [&b](slice s, skip) { return b.opt(s); }, + q_expr(primary, '{', number, '}') >= [&b](slice s, skip, size32_t n, skip) { return b.rep(s, n); }, + concat(q_expr), + concat(concat, q_expr) >= [&b](slice s1, slice s2) { return b.cat(s1, s2); }, + alt(concat), + alt(alt, '|', alt) >= [&b](slice s1, skip, slice s2) { return b.alt(s1, s2); }, + expr(alt) + ), + use_lexer{} + ); + } + + template + constexpr size32_t analyze_dfa_size(const char (&pattern)[N]) + { + dfa_size_analyzer a; + auto p = create_regex_parser(a); + buffers::cstring_buffer buffer(pattern); + detail::no_stream s{}; + auto res = p.parse( + parse_options{}.set_skip_whitespace(false), + buffer, + s); + if (!res.has_value()) + throw std::runtime_error("invalid regex"); + return res.value().n; + } + + template + constexpr void write_regex_parser_diag_msg(Stream& s) + { + regex::dfa_size_analyzer a; + auto p = regex::create_regex_parser(a); + p.write_diag_str(s); + } + + template + class expr + { + public: + static const size32_t dfa_size = analyze_dfa_size(Pattern); + + constexpr expr() + { + dfa_builder b(sm); + auto p = create_regex_parser(b); + detail::no_stream stream{}; + auto s = p.parse( + parse_options{}.set_skip_whitespace(false), + buffers::cstring_buffer(Pattern), + stream + ); + if (!s.has_value()) + throw std::runtime_error("invalid regex"); + b.mark_end_states(s.value(), 0); + } + + template + constexpr void debug_parse(Stream& s) const + { + regex::dfa_size_analyzer a; + auto p = regex::create_regex_parser(a); + p.parse( + parse_options{}.set_skip_whitespace(false).set_verbose(), + buffers::cstring_buffer(Pattern), + s + ); + } + + template + constexpr bool match(const char (&str)[N]) const + { + return match(buffers::cstring_buffer(str)); + } + + template + constexpr bool match(const Buffer& buf) const + { + detail::no_stream s; + return match(buf, s); + } + + template + constexpr bool match(const Buffer& buf, Stream& s) const + { + return match(match_options{}, buf, s); + } + + template + constexpr bool match(match_options opts, const Buffer& buf, Stream& s) const + { + auto res = dfa_match(sm, opts, source_point{}, buf.begin(), buf.end(), s); + if (res.term_idx == 0 && res.it == buf.end()) + return true; + else + { + if (res.term_idx == 0) + s << "Leftover text after recognition: " << buf.get_view(res.it, buf.end()) << "\n"; + else + s << "Unexpected char: " << utils::c_names.name(*res.it) << "\n"; + return false; + } + } + + template + constexpr void write_diag_str(Stream& stream) const + { + regex::write_dfa_diag_str(sm, stream); + } + + private: + dfa sm; + }; +} + +template +class regex_term : public term +{ +public: + using internal_value_type = std::string_view; + + static const size_t dfa_size = regex::analyze_dfa_size(Pattern); + static const bool is_trivial = false; + + static const size_t pattern_size = std::size(Pattern); + + constexpr regex_term(associativity a = associativity::no_assoc) : + regex_term(nullptr, 0, a) + {} + + constexpr regex_term(int precedence = 0, associativity a = associativity::no_assoc) : + regex_term(nullptr, precedence, a) + {} + + constexpr regex_term(const char *custom_name, int precedence = 0, associativity a = associativity::no_assoc) : + term(precedence, a), + custom_name(custom_name) + { + id[0] = 'r'; + id[1] = '_'; + utils::copy_array(&id[2], Pattern, std::make_index_sequence{}); + } + + constexpr const char* get_name() const { return custom_name ? custom_name : id; } + constexpr const char* get_id() const { return id; } + constexpr auto get_data() const { return regex::regex_pattern_data{ Pattern }; } + + constexpr const auto& get_ftor() const { return detail::pass_sv; } + +private: + char id[pattern_size + 2] = {}; + const char* custom_name = nullptr; +}; + +} // namespace exaudf_ctpg + +#endif diff --git a/exaudfclient/base/script_options_parser/ctpg/script_option_lines_ctpg.cc b/exaudfclient/base/script_options_parser/ctpg/script_option_lines_ctpg.cc new file mode 100644 index 00000000..f5584a33 --- /dev/null +++ b/exaudfclient/base/script_options_parser/ctpg/script_option_lines_ctpg.cc @@ -0,0 +1,199 @@ +#include "script_option_lines_ctpg.h" +#include "ctpg.hpp" +#include +#include +#include + +using namespace exaudf_ctpg; +using namespace exaudf_ctpg::ftors; + + +namespace ExecutionGraph +{ + +namespace OptionsLineParser +{ + + +namespace CTPG { + + +struct Option { + std::string key; + std::string value; + source_point start; + source_point end; +}; + +using options_type = std::vector