-
Notifications
You must be signed in to change notification settings - Fork 2
/
main.cpp
143 lines (135 loc) · 4.75 KB
/
main.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
// LICENSE
// This software is free for use and redistribution while including this
// license notice, unless:
// 1. is used for commercial or non-personal purposes, or
// 2. used for a product which includes or associated with a blockchain or other
// decentralized database technology, or
// 3. used for a product which includes or associated with the issuance or use
// of cryptographic or electronic currencies/coins/tokens.
// On all of the mentioned cases, an explicit and written permission is required
// from the Author (Ohad Asor).
// Contact [email protected] for requesting a permission. This license may be
// modified over time by the Author.
// CSV parser tutorial - part 6
//
// In this part we introduce parsing values separated by a comma.
#include <optional>
#include <limits>
#include "parser.h"
#ifdef min
# undef min
#endif
#ifdef max
# undef max
#endif
using namespace std;
using namespace idni;
struct csv_parser {
typedef variant<bool, int_t, string> value;
// declare a row type as a vector of values
typedef vector<value> row;
csv_parser() :
cc(predefined_char_classes({ "digit", "printable" }, nts)),
start(nts("start")), digit(nts("digit")), digits(nts("digits")),
integer(nts("integer")), printable(nts("printable")),
val(nts("val")), nullvalue(nts("nullvalue")),
escaping(nts("escaping")), escaped(nts("escaped")),
unescaped(nts("unescaped")), strchar(nts("strchar")),
strchars(nts("strchars")), str(nts("string")),
// create new nonterminals we will use
row_(nts("row")), row_rest(nts("row_rest")),
g(nts, rules(), start, cc), p(g) {}
// parse now returns a row instead of a value
optional<row> parse(const char* data, size_t size) {
auto res = p.parse(data, size);
if (!res.found) {
cerr << res.parse_error << '\n';
return {};
}
// return parsed values as a row by calling get_row method
return get_row(res);
}
private:
nonterminals<> nts;
char_class_fns<> cc;
// add new nonterminals
prods<> start, digit, digits, integer, printable, val, nullvalue,
escaping, escaped, unescaped, strchar, strchars, str,
row_, row_rest;
grammar<> g;
parser<> p;
prods<> rules() {
// define new comma terminal
prods<> r, minus('-'), quote('"'), esc('\\'), comma(','),
nul{ lit() };
r(digits, digit | (digits + digit));
r(integer, digits | (minus + digits));
r(escaping, quote | esc);
r(unescaped, printable & ~escaping);
r(escaped, esc + escaping);
r(strchar, unescaped | escaped);
r(strchars, (strchar + strchars) | nul);
r(str, quote + strchars + quote);
r(nullvalue, nul);
r(val, integer | str | nullvalue);
// define row as a value with the rest of row
r(row_, val + row_rest);
// define rest of row as a comma + val and rowrest or null
r(row_rest, (comma + val + row_rest) | nul);
// start nonterminal is a row
r(start, row_);
return r;
}
// traverses the parsed forest and reads a parsed row of values from it.
row get_row(typename parser<>::result& res) {
row r; // values we will return
auto get_int = [&res](const auto& n) -> value {
auto i = res.get_terminals_to_int(n);
if (!i) return cerr
<< "out of range, allowed range is from: "
<< numeric_limits<int_t>::min() << " to: "
<< numeric_limits<int_t>::max() << '\n', false;
return i.value();
};
auto cb_enter = [&r, &get_int, &res, this](const parser<>::pnode& n) {
// n is a pair of a literal and its range
// we can compare the literal with literals predefined
// as members of 'this' object: integer/str/nullvalue
// if integer push the integer into row of values
if (n.first == integer) r.push_back(get_int(n));
// if str then push the string into row of values
else if (n.first == str)
r.push_back(res.get_terminals(n));
// if null, push the bool
else if (n.first == nullvalue) r.push_back(true);
};
// run traversal with the enter callback
res.get_forest()->traverse(cb_enter);
return r; // return the value
}
};
// separate printing of a value into a << operator
ostream& operator<<(ostream& os, const csv_parser::value& v) {
if (holds_alternative<int_t>(v)) os << get<int_t>(v);
else if (holds_alternative<bool>(v)) os << "NULL";
else os << get<string>(v);
return os;
}
int main() {
cout << "Validator for rows of comma separated values of string, "
<<"integer or NULL. Enter a value per line or Ctrl-D to quit\n";
csv_parser p;
string line;
while (getline(cin, line)) {
cout << "entered: `" << line << "`\n";
// instead of getting just a value we now get a row of values
optional<csv_parser::row> ropt =
p.parse(line.c_str(), line.size());
if (!ropt) continue;
auto& r = ropt.value();
cout << "parsed row: "; // print parsed values
for (size_t i = 0; i != r.size(); ++i)
cout << (i ? ", " : "") << r[i];
cout << '\n';
}
}