From 87dca53010d922454fb611202d5e1096aeb89ea7 Mon Sep 17 00:00:00 2001 From: Kyle Butt Date: Mon, 6 May 2024 21:03:56 -0600 Subject: [PATCH 1/2] Correct error message to "colon" instead of "semi-colon" --- yaml.lua | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yaml.lua b/yaml.lua index 55211dc..4fc0972 100644 --- a/yaml.lua +++ b/yaml.lua @@ -471,7 +471,7 @@ Parser.parseHash = function (self, hash) if self:isInline() then local id = self:advanceValue() - self:expect(":", "expected semi-colon after id") + self:expect(":", "expected colon after id") self:ignoreSpace() if self:accept("indent") then indents = indents + 1 @@ -487,7 +487,7 @@ Parser.parseHash = function (self, hash) while self:peekType("id") do local id = self:advanceValue() - self:expect(":","expected semi-colon after id") + self:expect(":","expected colon after id") self:ignoreSpace() hash[id] = self:parse() self:ignoreSpace(); From ff47c72bf30c74c9a29b101cc186837a16175e0f Mon Sep 17 00:00:00 2001 From: Kyle Butt Date: Mon, 6 May 2024 21:05:04 -0600 Subject: [PATCH 2/2] Improve parsing of list-object combinations Use bounded lookahead to improve parsing of lists of objects and empty keys that should parse as null. Previously an empty key would parse as an object that contained keys that should have been siblings. Includes an expanded test of null parsing and a hie.yaml file that occurred in the wild and did not parse correctly. --- samples/hie.lua | 120 ++++++++++++++++++++++++++++++++++++++++++++++ samples/hie.yaml | 68 ++++++++++++++++++++++++++ samples/null.lua | 13 +++++ samples/null.yaml | 10 ++++ yaml.lua | 101 +++++++++++++++++++++++++++++++++++--- 5 files changed, 305 insertions(+), 7 deletions(-) create mode 100644 samples/hie.lua create mode 100644 samples/hie.yaml diff --git a/samples/hie.lua b/samples/hie.lua new file mode 100644 index 0000000..3e1ffdb --- /dev/null +++ b/samples/hie.lua @@ -0,0 +1,120 @@ +return { + ["cradle"] = { + ["multi"] = { + [1] = { + ["path"] = "./bazel-bin", + ["config"] = { + ["cradle"] = {} + } + }, + [2] = { + ["path"] = "./bazel-out", + ["config"] = { + ["cradle"] = {} + } + }, + [3] = { + ["path"] = "./bazel-testlogs", + ["config"] = { + ["cradle"] = {} + } + }, + [4] = { + ["path"] = "./bazel-yesod-bridge", + ["config"] = { + ["cradle"] = {} + } + }, + [5] = { + ["path"] = "./bazel-hls-bin", + ["config"] = { + ["cradle"] = {} + } + }, + [6] = { + ["path"] = "./bazel-hls-out", + ["config"] = { + ["cradle"] = {} + } + }, + [7] = { + ["path"] = "./bazel-hls-testlogs", + ["config"] = { + ["cradle"] = {} + } + }, + [8] = { + ["path"] = "./bazel-hls-yesod-bridge", + ["config"] = { + ["cradle"] = {} + } + }, + [9] = { + ["path"] = "./bridge-site", + ["config"] = { + ["cradle"] = { + ["bios"] = { + ["program"] = "./bridge-site/.hie-bios" + } + } + } + }, + [10] = { + ["path"] = "./cassandra-util", + ["config"] = { + ["cradle"] = { + ["bios"] = { + ["program"] = "./cassandra-util/.hie-bios" + } + } + } + }, + [11] = { + ["path"] = "./conduit-util", + ["config"] = { + ["cradle"] = { + ["bios"] = { + ["program"] = "./conduit-util/.hie-bios" + } + } + } + }, + [12] = { + ["path"] = "./hsx-util", + ["config"] = { + ["cradle"] = { + ["bios"] = { + ["program"] = "./hsx-util/.hie-bios" + } + } + } + }, + [13] = { + ["path"] = "./page", + ["config"] = { + ["cradle"] = { + ["bios"] = { + ["program"] = "./page/.hie-bios" + } + } + } + }, + [14] = { + ["path"] = "./wai-practice", + ["config"] = { + ["cradle"] = { + ["bios"] = { + ["program"] = "./wai-practice/.hie-bios" + } + } + } + }, + [15] = { + ["path"] = "./", + ["config"] = { + ["cradle"] = {} + } + } + } + } +} diff --git a/samples/hie.yaml b/samples/hie.yaml new file mode 100644 index 0000000..c09d4ac --- /dev/null +++ b/samples/hie.yaml @@ -0,0 +1,68 @@ +cradle: + multi: + - path: "./bazel-bin" + config: + cradle: + none: + - path: "./bazel-out" + config: + cradle: + none: + - path: "./bazel-testlogs" + config: + cradle: + none: + - path: "./bazel-yesod-bridge" + config: + cradle: + none: + - path: "./bazel-hls-bin" + config: + cradle: + none: + - path: "./bazel-hls-out" + config: + cradle: + none: + - path: "./bazel-hls-testlogs" + config: + cradle: + none: + - path: "./bazel-hls-yesod-bridge" + config: + cradle: + none: + - path: "./bridge-site" + config: + cradle: + bios: + program: "./bridge-site/.hie-bios" + - path: "./cassandra-util" + config: + cradle: + bios: + program: "./cassandra-util/.hie-bios" + - path: "./conduit-util" + config: + cradle: + bios: + program: "./conduit-util/.hie-bios" + - path: "./hsx-util" + config: + cradle: + bios: + program: "./hsx-util/.hie-bios" + - path: "./page" + config: + cradle: + bios: + program: "./page/.hie-bios" + - path: "./wai-practice" + config: + cradle: + bios: + program: "./wai-practice/.hie-bios" + - path: "./" + config: + cradle: + none: diff --git a/samples/null.lua b/samples/null.lua index 3988251..a943310 100644 --- a/samples/null.lua +++ b/samples/null.lua @@ -1,4 +1,17 @@ return { + ["empties"] = { + [1] = {}, + [2] = { + ["nonEmptyInitial"] = 7, + ["nonEmptyFinal"] = 8, + }, + [3] = { + ["nonEmptyFinal"] = 9, + }, + [4] = { + ["nonEmptyInitial"] = 10, + } + }, ["end"] = "test passed?", ["notnull"] = true, ["test"] = "A test for null values" diff --git a/samples/null.yaml b/samples/null.yaml index 26a7068..af0a294 100644 --- a/samples/null.yaml +++ b/samples/null.yaml @@ -3,6 +3,16 @@ thisis: null thistoo: NULL notnull: yes + empties: + - emptySolo: + - nonEmptyInitial: 7 + emptyCentral: + nonEmptyFinal: 8 + - emptyInitial: + nonEmptyFinal: 9 + - nonEmptyInitial: 10 + emptyFinal: + - emptyTerminal: tildeis: ~ capitalized: Null end: test passed? diff --git a/yaml.lua b/yaml.lua index 4fc0972..d568c42 100644 --- a/yaml.lua +++ b/yaml.lua @@ -336,6 +336,10 @@ Parser.parse = function (self) elseif c.token.const == true then self:advanceValue(); result = c.token.value + -- handle the case where a label is followed by a dedent and should parse as + -- null + elseif c.token[1] == "dedent" then + result = nil else error("ParseError: unexpected token '" .. c.token[1] .. "'" .. context(c.token.input)) end @@ -465,32 +469,109 @@ Parser.parseTextBlock = function (self, sep) return result end -Parser.parseHash = function (self, hash) +-- @param hash table|nil Table to populate with labels +-- @param listHash boolean True if this is a hash that occurs at the +-- start of a list +-- @return the fully parsed hash +Parser.parseHash = function (self, hash, listHash) hash = hash or {} local indents = 0 + local acceptedImpliedIndent = false if self:isInline() then local id = self:advanceValue() self:expect(":", "expected colon after id") self:ignoreSpace() - if self:accept("indent") then + if not listHash and self:accept("indent") then indents = indents + 1 hash[id] = self:parse() - else + elseif not listHash then hash[id] = self:parse() if self:accept("indent") then indents = indents + 1 end + else -- listHash == true + -- If there is another identifier *without* an indent, we have a case something like this: + --[[ + listof: + - nonNullValue: 7 + - nullValue: + scalarValue: + --]] + if self:peekType("id") then + hash[id] = nil + return + -- If the next token is another list item at the same indent level, then + -- parse this value as null. + --[[ + listof: + - nullValue: + - nonNullValue: 7 + --]] + elseif listHash and self:peekType("-") then + hash[id] = nil + return hash + -- if the next token is a single indent level and then an identifier + -- parse this key as null. + --[[ + listof: + - nullValue: + nonNullValue: 7 + --]] + elseif listHash and self:peekType("indent") and self:peekType("id", 2) then + self:advance() + acceptedImpliedIndent = true + indents = indents + 1 + hash[id] = nil + -- if the next token is a single indent level and then something other + -- than an identifier, consume the indent, and then parse recursively. + -- Handles these cases among others + --[[ + listof: + - nonNullValue: + subKey: 7 + --]] + --[[ + listof: + - keyedSubList: + - listItem: 1 + --]] + elseif listHash and self:peekType("indent") then + self:advance() + acceptedImpliedIndent = true + indents = indents + 1 + hash[id] = self:parse() + self:ignoreSpace(); + -- if the next token is not an indent, and is not one of the above cases + -- we likely have something on the same line like this: + --[[ + value: 7 + --]] + else + hash[id] = self:parse() + self:ignoreSpace(); + end end - self:ignoreSpace(); + end + + -- Consume the implied indent if it wasn't already consumed above. + if listHash and not acceptedImpliedIndent and self:peekType("indent") then + self:advance() + indents = indents + 1 end while self:peekType("id") do local id = self:advanceValue() self:expect(":","expected colon after id") self:ignoreSpace() - hash[id] = self:parse() - self:ignoreSpace(); + -- If the next token is another id at the same indent level, this key should + -- parse as null. + if self:peekType("id") then + hash[id] = nil + else + hash[id] = self:parse() + self:ignoreSpace(); + end end while indents > 0 do @@ -533,7 +614,13 @@ Parser.parseList = function (self) local list = {} while self:accept("-") do self:ignoreSpace() - list[#list + 1] = self:parse() + -- Check for the case of a hash starting as a list item, and pass that on + -- to the parseHash function directly. + if self:peekType("id") then + list[#list + 1] = self:parseHash({}, true) + else + list[#list + 1] = self:parse() + end self:ignoreSpace() end