-
Notifications
You must be signed in to change notification settings - Fork 2
/
main.cpp
135 lines (127 loc) · 4.32 KB
/
main.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
// LICENSE
// This software is free for use and redistribution while including this
// license notice, unless:
// 1. is used for commercial or non-personal purposes, or
// 2. used for a product which includes or associated with a blockchain or other
// decentralized database technology, or
// 3. used for a product which includes or associated with the issuance or use
// of cryptographic or electronic currencies/coins/tokens.
// On all of the mentioned cases, an explicit and written permission is required
// from the Author (Ohad Asor).
// Contact [email protected] for requesting a permission. This license may be
// modified over time by the Author.
// CSV parser tutorial - part 8
//
// In this part we replace programatically defined grammar by a grammar in TGF.
#include <optional>
#include <limits>
#include "parser.h"
#ifdef min
# undef min
#endif
#ifdef max
# undef max
#endif
using namespace std;
using namespace idni;
struct csv_parser {
// CSV grammar as a TGF string
// TGF concatenates literals separated by a whitespace
// contrary to C++ where operator+ is required to concatenate literals
const char* csv_tgf =
" @use char classes digit, printable. "
// quote and esc to name the literal and use it in other rules by name
" esc => '\\\\'. "
" quote => '\"'. "
" digits => digit | digits digit. "
" integer => digits | '-' digits. "
" escaping => quote | esc. "
" unescaped => printable & ~escaping. "
" escaped => esc escaping. "
" strchar => unescaped | escaped. "
" strchars => strchar strchars | null. "
" str => quote strchars quote. "
// null is a reserved word representing a null literal
" nullvalue => null. "
" val => integer | str | nullvalue. "
// TGF names do not conflict with C++ names so it is safe
// to remove _ from row_ and rows_
" row => val row_rest. "
" row_rest => ',' val row_rest | null. "
" eol => '\\n' | \"\\r\\n\". "
" rows => row rows_rest. "
" rows_rest => eol row rows_rest | null. "
" start => rows. "
;
typedef variant<bool, int_t, string> value;
typedef vector<value> row;
typedef vector<row> rows;
// when initializing the CSV parser read the grammar from the TGF string
csv_parser() : g(tgf<>::from_string(nts, csv_tgf)), p(g) {}
optional<rows> parse(const char* data, size_t size) {
auto res = p.parse(data, size);
if (!res.found) {
cerr << res.parse_error << '\n';
return {};
}
return get_rows(res);
}
private:
nonterminals<> nts;
grammar<> g;
parser<> p;
// container for char class functions and all prods removed since it is
// covered by TGF
rows get_rows(typename parser<>::result& res) {
rows r;
auto get_int = [&res](const auto& n) -> value {
auto i = res.get_terminals_to_int(n);
if (!i) return cerr
<< "out of range, allowed range is from: "
<< numeric_limits<int_t>::min() << " to: "
<< numeric_limits<int_t>::max() << '\n', false;
return i.value();
};
auto cb_enter = [&r, &get_int, &res, this](const parser<>::pnode& n) {
// we care only for nonterminals, so skip terminals
if (!n.first.nt()) return;
// get name of the nonterminal
string nt = nts.get(n.first.n());
// decide what to do according to the name
// we can remove _ in row_ because no conflict with row
if (nt == "row") r.emplace_back();
else if (nt == "integer")
r.back().push_back(get_int(n));
else if (nt == "str")
r.back().push_back(res.get_terminals(n));
else if (nt == "nullvalue")
r.back().push_back(true);
};
res.get_forest()->traverse(cb_enter);
return r;
}
};
ostream& operator<<(ostream& os, const csv_parser::value& v) {
if (holds_alternative<int_t>(v)) os << get<int_t>(v);
else if (holds_alternative<bool>(v)) os << "NULL";
else os <<get<string>(v);
return os;
}
int main() {
cout << "CSV parser. Use comma separated values of string, "
<< "integer or NULL. Use a new line to separate rows. "
<< "Enter a CSV and use Ctrl-D to start parsing\n";
csv_parser p;
istreambuf_iterator<char> begin(cin), end;
string input(begin, end);
cout << "entered: `" << input << "`\n";
optional<csv_parser::rows> rsopt = p.parse(input.c_str(), input.size());
if (!rsopt) return 1;
for (const csv_parser::row& r : rsopt.value()) {
cout << "parsed row: ";
for (size_t i = 0; i != r.size(); ++i)
cout << (i ? ", " : "") << r[i];
cout << '\n';
}
return 0;
}