Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: JSON block string encoding #843

Merged
merged 13 commits into from
Jul 15, 2024
94 changes: 94 additions & 0 deletions pkg/ast/ast_val_string_value.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,100 @@ func (d *Document) StringValueIsBlockString(ref int) bool {
return d.StringValues[ref].BlockString
}

func (d *Document) BlockStringValueContentRawBytes(ref int) []byte {

// Gets the full block string content, just inside the """ quotes.
// This is needed because the lexer ignores whitespace and we need to preserve it
// to account for the indentation of the block string.

blockStart := 0
for i := int(d.StringValues[ref].Content.Start) - 1; i >= 0; i-- {
if d.Input.RawBytes[i] == '"' {
blockStart = i + 1
break
}
}

blockEnd := d.Input.Length
for i := int(d.StringValues[ref].Content.End); i < d.Input.Length; i++ {
if d.Input.RawBytes[i] == '"' {
blockEnd = i
break
}
}

return d.Input.RawBytes[blockStart:blockEnd]
}

func (d *Document) BlockStringValueContentRawString(ref int) string {
return unsafebytes.BytesToString(d.BlockStringValueContentRawBytes(ref))
}

func (d *Document) BlockStringValueContentBytes(ref int) []byte {

// Implements https://spec.graphql.org/October2021/#BlockStringValue()

// NOTE: This implementation exactly follows the spec.
// It likely could be optimized for performance.

// split the raw value into lines
rawValue := d.BlockStringValueContentRawBytes(ref)
lines := splitBytesIntoLines(rawValue)

// find the common indent size (-1 means no common indent)
commonIndent := -1
for i, line := range lines {
if i == 0 {
continue
}
indent := leadingWhitespaceCount(line)
if indent < len(line) {
if commonIndent == -1 || indent < commonIndent {
commonIndent = indent
}
}
}

// remove the common indent from each line
if commonIndent != -1 {
for i := 1; i < len(lines); i++ {
var indent int
if len(lines[i]) > commonIndent {
indent = commonIndent
} else {
indent = len(lines[i])
}

lines[i] = lines[i][indent:]
}
}

// find first non-whitespace-only line
firstLine := 0
for i, line := range lines {
if leadingWhitespaceCount(line) != len(line) {
firstLine = i
break
}
}

// find last non-whitespace-only line
lastLine := len(lines) - 1
for i := len(lines) - 1; i >= 0; i-- {
if leadingWhitespaceCount(lines[i]) != len(lines[i]) {
lastLine = i
break
}
}

// join the lines to keep and return the result
return bytes.Join(lines[firstLine:lastLine+1], []byte{'\n'})
}

func (d *Document) BlockStringValueContentString(ref int) string {
return unsafebytes.BytesToString(d.BlockStringValueContentBytes(ref))
}

func (d *Document) StringValuesAreEquals(left, right int) bool {
return d.StringValueIsBlockString(left) == d.StringValueIsBlockString(right) &&
bytes.Equal(d.StringValueContentBytes(left), d.StringValueContentBytes(right))
Expand Down
16 changes: 15 additions & 1 deletion pkg/ast/ast_value.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package ast

import (
"bytes"
"encoding/json"
"fmt"
"io"

Expand Down Expand Up @@ -144,7 +145,20 @@ func (d *Document) writeJSONValue(buf *bytes.Buffer, value Value) error {
buf.Write(literal.TRUE)
}
case ValueKindString:
buf.Write(quotes.WrapBytes(d.StringValueContentBytes(value.Ref)))
if d.StringValueIsBlockString(value.Ref) {
content := d.BlockStringValueContentString(value.Ref)

enc := json.NewEncoder(buf)
enc.SetEscapeHTML(false)
if err := enc.Encode(content); err != nil {
return err
}

// Remove the extra newline that Encode adds
buf.Truncate(buf.Len() - 1)
} else {
buf.Write(quotes.WrapBytes(d.StringValueContentBytes(value.Ref)))
}
case ValueKindList:
buf.WriteByte(literal.LBRACK_BYTE)
for ii, ref := range d.ListValues[value.Ref].Refs {
Expand Down
52 changes: 45 additions & 7 deletions pkg/ast/ast_value_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,15 +83,53 @@ func TestDocument_ValueToJSON(t *testing.T) {
Ref: 1,
}
}, `true`))
t.Run("ValueKindString", run(func(doc *Document) Value {
t.Run("ValueKindString - non-block", run(func(doc *Document) Value {
doc.StringValues = append(doc.StringValues, StringValue{
Content: doc.Input.AppendInputString("foo"),
Content: doc.Input.AppendInputString(`foo\nbar\tbaz\"qux`),
})
return Value{
Kind: ValueKindString,
Ref: 0,
}
}, `"foo"`))
}, `"foo\nbar\tbaz\"qux"`))
t.Run("ValueKindString - block", run(func(doc *Document) Value {
doc.Input.AppendInputString(`"""`)
doc.StringValues = append(doc.StringValues, StringValue{
BlockString: true,
Content: doc.Input.AppendInputString("foo\nbar\tbaz\"qux"),
})
doc.Input.AppendInputString(`"""`)
return Value{
Kind: ValueKindString,
Ref: 0,
}
}, `"foo\nbar\tbaz\"qux"`))
t.Run("ValueKindString - block with indent", run(func(doc *Document) Value {
doc.Input.AppendInputString(`"""`)
doc.Input.AppendInputString("\n")
doc.StringValues = append(doc.StringValues, StringValue{
BlockString: true,
Content: doc.Input.AppendInputString(" foo\n bar"),
})
doc.Input.AppendInputString("\n")
doc.Input.AppendInputString(`"""`)
return Value{
Kind: ValueKindString,
Ref: 0,
}
}, `"foo\nbar"`))
t.Run("ValueKindString - block with mixed indent", run(func(doc *Document) Value {
doc.Input.AppendInputString(`"""`)
doc.StringValues = append(doc.StringValues, StringValue{
BlockString: true,
Content: doc.Input.AppendInputString("foo\n\t bar\n\t baz"),
})
doc.Input.AppendInputString(`"""`)
return Value{
Kind: ValueKindString,
Ref: 0,
}
}, `"foo\nbar\n baz"`))
t.Run("ValueKindList", run(func(doc *Document) Value {
doc.StringValues = append(doc.StringValues, StringValue{
Content: doc.Input.AppendInputString("foo"),
Expand Down Expand Up @@ -187,21 +225,21 @@ func TestDocument_PrintValue(t *testing.T) {
}
t.Run("ValueKindString - non-block", run(func(doc *Document) Value {
doc.StringValues = append(doc.StringValues, StringValue{
Content: doc.Input.AppendInputString("foo"),
Content: doc.Input.AppendInputString(`foo\nbar\tbaz\"qux`),
})
return Value{
Kind: ValueKindString,
Ref: 0,
}
}, `"foo"`))
}, `"foo\nbar\tbaz\"qux"`))
t.Run("ValueKindString - block", run(func(doc *Document) Value {
doc.StringValues = append(doc.StringValues, StringValue{
BlockString: true,
Content: doc.Input.AppendInputString("foo"),
Content: doc.Input.AppendInputString("foo\nbar\tbaz\"qux"),
})
return Value{
Kind: ValueKindString,
Ref: 0,
}
}, `"""foo"""`))
}, "\"\"\"foo\nbar\tbaz\"qux\"\"\""))
}
41 changes: 41 additions & 0 deletions pkg/ast/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,44 @@ func indexOf(refs []int, ref int) (int, bool) {
func deleteRef(refs *[]int, index int) {
*refs = append((*refs)[:index], (*refs)[index+1:]...)
}

// Splits byte slices into lines based on line terminators (\n, \r, \r\n)
// defined by https://spec.graphql.org/October2021/#sec-Line-Terminators
func splitBytesIntoLines(data []byte) [][]byte {
var lines [][]byte
start := 0
length := len(data)

for i := 0; i < length; i++ {
switch c := data[i]; c {
case '\n', '\r':
if start <= i {
lines = append(lines, data[start:i])
}

if c == '\r' && i+1 < length && data[i+1] == '\n' {
i++
}

start = i + 1
}
}

if start <= length {
lines = append(lines, data[start:])
}

return lines
}

// counts leading whitespace characters (spaces or tabs) in a byte slice
func leadingWhitespaceCount(line []byte) int {
count := 0
for _, c := range line {
if c != ' ' && c != '\t' {
break
}
count++
}
return count
}
94 changes: 94 additions & 0 deletions v2/pkg/ast/ast_val_string_value.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,100 @@ func (d *Document) StringValueIsBlockString(ref int) bool {
return d.StringValues[ref].BlockString
}

func (d *Document) BlockStringValueContentRawBytes(ref int) []byte {

// Gets the full block string content, just inside the """ quotes.
// This is needed because the lexer ignores whitespace and we need to preserve it
// to account for the indentation of the block string.

blockStart := 0
for i := int(d.StringValues[ref].Content.Start) - 1; i >= 0; i-- {
if d.Input.RawBytes[i] == '"' {
blockStart = i + 1
break
}
}

blockEnd := d.Input.Length
for i := int(d.StringValues[ref].Content.End); i < d.Input.Length; i++ {
if d.Input.RawBytes[i] == '"' {
blockEnd = i
break
}
}

return d.Input.RawBytes[blockStart:blockEnd]
}

func (d *Document) BlockStringValueContentRawString(ref int) string {
return unsafebytes.BytesToString(d.BlockStringValueContentRawBytes(ref))
}

func (d *Document) BlockStringValueContentBytes(ref int) []byte {

// Implements https://spec.graphql.org/October2021/#BlockStringValue()

// NOTE: This implementation exactly follows the spec.
// It likely could be optimized for performance.

// split the raw value into lines
rawValue := d.BlockStringValueContentRawBytes(ref)
lines := splitBytesIntoLines(rawValue)

// find the common indent size (-1 means no common indent)
commonIndent := -1
for i, line := range lines {
if i == 0 {
continue
}
indent := leadingWhitespaceCount(line)
if indent < len(line) {
if commonIndent == -1 || indent < commonIndent {
commonIndent = indent
}
}
}

// remove the common indent from each line
if commonIndent != -1 {
for i := 1; i < len(lines); i++ {
var indent int
if len(lines[i]) > commonIndent {
indent = commonIndent
} else {
indent = len(lines[i])
}

lines[i] = lines[i][indent:]
}
}

// find first non-whitespace-only line
firstLine := 0
for i, line := range lines {
if leadingWhitespaceCount(line) != len(line) {
firstLine = i
break
}
}

// find last non-whitespace-only line
lastLine := len(lines) - 1
for i := len(lines) - 1; i >= 0; i-- {
if leadingWhitespaceCount(lines[i]) != len(lines[i]) {
lastLine = i
break
}
}

// join the lines to keep and return the result
return bytes.Join(lines[firstLine:lastLine+1], []byte{'\n'})
}

func (d *Document) BlockStringValueContentString(ref int) string {
return unsafebytes.BytesToString(d.BlockStringValueContentBytes(ref))
}

func (d *Document) StringValuesAreEquals(left, right int) bool {
return d.StringValueIsBlockString(left) == d.StringValueIsBlockString(right) &&
bytes.Equal(d.StringValueContentBytes(left), d.StringValueContentBytes(right))
Expand Down
16 changes: 15 additions & 1 deletion v2/pkg/ast/ast_value.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package ast

import (
"bytes"
"encoding/json"
"fmt"
"io"

Expand Down Expand Up @@ -144,7 +145,20 @@ func (d *Document) writeJSONValue(buf *bytes.Buffer, value Value) error {
buf.Write(literal.TRUE)
}
case ValueKindString:
buf.Write(quotes.WrapBytes(d.StringValueContentBytes(value.Ref)))
if d.StringValueIsBlockString(value.Ref) {
content := d.BlockStringValueContentString(value.Ref)

enc := json.NewEncoder(buf)
enc.SetEscapeHTML(false)
if err := enc.Encode(content); err != nil {
return err
}

// Remove the extra newline that Encode adds
buf.Truncate(buf.Len() - 1)
} else {
buf.Write(quotes.WrapBytes(d.StringValueContentBytes(value.Ref)))
}
case ValueKindList:
buf.WriteByte(literal.LBRACK_BYTE)
for ii, ref := range d.ListValues[value.Ref].Refs {
Expand Down
Loading
Loading