Skip to content

Commit

Permalink
feat: add grammar helper (#93)
Browse files Browse the repository at this point in the history
Resolves #93

---------

Co-authored-by: Vladislav Mamon <[email protected]>
  • Loading branch information
mindplay-dk and norskeld authored Aug 22, 2023
1 parent a9df3e8 commit e8074be
Show file tree
Hide file tree
Showing 6 changed files with 353 additions and 2 deletions.
6 changes: 4 additions & 2 deletions benchmarks/src/json/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@ import { handlers } from '../@helpers'

import { SAMPLE } from './@sample'
import { parse as parseParjs } from './parjs'
import { parse as parseSigma } from './sigma'
import { parse as parseSigmaDefer } from './sigma'
import { parse as parseSigmaGrammar } from './sigma-grammar'

suite(
'JSON :: sigma vs parjs',

add('sigma', () => parseSigma(SAMPLE)),
add('sigma:defer', () => parseSigmaDefer(SAMPLE)),
add('sigma:grammar', () => parseSigmaGrammar(SAMPLE)),
add('parjs', () => parseParjs(SAMPLE)),

...handlers
Expand Down
169 changes: 169 additions & 0 deletions benchmarks/src/json/sigma-grammar.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
import { choice, sepBy, optional, map, takeMid, sequence } from '@combinators'
import { Parser } from '@lib'
import { float, grammar, integer, regexp, run, string, whitespace } from '@parsers'

import * as Ast from './ast'

/* Tokens. */

const Keywords = {
True: 'true',
False: 'false',
Null: 'null'
} as const

const Terminals = {
OpenBrace: '{',
CloseBrace: '}',
OpenSquare: '[',
CloseSquare: ']',
Colon: ':',
Comma: ','
} as const

/* Mapping functions to turn parsed string values into AST nodes. */

function toObject(values: Array<Ast.JsonObjectProp>): Ast.JsonObject {
return {
type: 'object',
values
}
}

function toObjectProp(tuple: [Ast.JsonString, string, Ast.JsonValue]): Ast.JsonObjectProp {
const [{ value: name }, _, value] = tuple

return {
type: 'property',
name,
value
}
}

function toArray(values: Array<Ast.JsonValue>): Ast.JsonArray {
return {
type: 'array',
values
}
}

function toString(text: string): Ast.JsonString {
return {
type: 'string',
value: text.slice(1, -1)
}
}

function toNumber(value: number): Ast.JsonNumber {
return {
type: 'number',
value: value
}
}

function toBoolean(kind: string): Ast.JsonBoolean {
switch (kind) {
case Keywords.True: {
return {
type: 'boolean',
value: true
}
}

case Keywords.False: {
return {
type: 'boolean',
value: false
}
}

default: {
return {
type: 'boolean',
value: false
}
}
}
}

function toNull(): Ast.JsonNull {
return {
type: 'null',
value: null
}
}

/* Parsers. */

// Non-Terminals.
const NumberLiteral = choice(float(), integer())

const Space = optional(whitespace())
const StringLiteral = regexp(/"([^"]|\\.)*"/g, 'string')

// Utility.
const match = (s: string) => takeMid(Space, string(s), Space)

// Grammar.
const Json = grammar({
Root(): Parser<Ast.JsonRoot> {
return choice(this.Object, this.Array)
},
Object(): Parser<Ast.JsonObject> {
return map(
takeMid(
match(Terminals.OpenBrace),
sepBy(this.ObjectProp, match(Terminals.Comma)),
match(Terminals.CloseBrace)
),
toObject
)
},
ObjectProp(): Parser<Ast.JsonObjectProp> {
return map(sequence(this.String, match(Terminals.Colon), this.Value), toObjectProp)
},
Array(): Parser<Ast.JsonArray> {
return map(
takeMid(
match(Terminals.OpenSquare),
sepBy(this.Value, match(Terminals.Comma)),
match(Terminals.CloseSquare)
),
toArray
)
},
String(): Parser<Ast.JsonString> {
return map(StringLiteral, toString)
},
Number(): Parser<Ast.JsonNumber> {
return map(NumberLiteral, toNumber)
},
Boolean(): Parser<Ast.JsonBoolean> {
return map(choice(match(Keywords.True), match(Keywords.False)), toBoolean)
},
Null(): Parser<Ast.JsonNull> {
return map(match(Keywords.Null), toNull)
},
Value(): Parser<Ast.JsonValue> {
return choice(this.Object, this.Array, this.String, this.Number, this.Boolean, this.Null)
}
})

/* Wrapper for bench runner. */

export function parse(text: string): Ast.JsonRoot {
const result = run(Json.Root).with(text)

switch (result.isOk) {
case true: {
return result.value
}

case false: {
return {
type: 'object',
values: []
}
}
}
}
70 changes: 70 additions & 0 deletions docs/docs/content/core/grammar.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
---
title: 'grammar'
kind: 'core'
description: 'This simplifies the creation of a self-contained grammar.'
---

# grammar

## Description

`grammar` is used to create a self-contained grammar.

Similarly to [defer], this allows for the creation of mutually recursive parsers,
but lets you define all of the component parsers within a single call.

The function takes an object with parser initializers, and returns an object with
all of those parsers initialized. Within the parser initializers, use `this` to
reference other initialized parsers, as in the example below.

The properties of the resulting object are just regular parsers - you can freely
destructure these, pass them around individually, or compose them with other
grammars, parsers or combinators as needed.

## Usage

Here is an example of a simple grammar that recursively matches `ab` sequences:

```ts
const ab = grammar({
a(): Parser<string> {
return map(
sequence(
string('a'),
optional(this.b)
),
([a, b]) => a + (b ?? '')
)
},
b(): Parser<string> {
return map(
sequence(
string('b'),
choice(this.a, this.b)
),
([b, a]) => b + a
)
}
})
```

The individual properties are just regular parsers:

```ts
const { a } = ab

console.log(run(a).with('abba'))
```

::: tip Success
```ts
{
isOk: true,
span: [0, 4],
pos: 4,
value: 'abba'
}
```
:::

[defer]: ../parsers/defer
23 changes: 23 additions & 0 deletions src/__tests__/core/grammar.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import { grammar } from '@core'
import { choice, map, optional, sequence, string } from '@lib'
import { describe, should, it, run, result } from '@testing'

describe('grammar', () => {
it('should create self-contained grammar', () => {
const ab = grammar({
a() {
return map(sequence(string('a'), optional(this.b)), ([a, b]) => a + (b ?? ''))
},
b() {
return map(sequence(string('b'), choice(this.a, this.b)), ([b, a]) => b + a)
}
})

const value = 'abba'

const actual = run(ab.a, value)
const expected = result(true, value)

should.matchState(actual, expected)
})
})
1 change: 1 addition & 0 deletions src/core.ts
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
export * from '@core/run'
export * from '@core/tryRun'
export * from '@core/grammar'
86 changes: 86 additions & 0 deletions src/core/grammar.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import type { Parser } from '@lib/types'

/**
* This type extracts the return-types from the parser initializers.
*/
export type Grammar<T> = {
[P in keyof T]: T[P] extends () => unknown ? ReturnType<T[P]> : never
}

/**
* This type injects the initialized parser types into `this`, allowing for
* type-safe self-contained and mutually recursive grammars.
*/
export type GrammarInit<T> = T & ThisType<Grammar<T>>

/**
* This defines the input to the `grammar` function - the parser initializers.
*/
export type GrammarType = {
[name: string]: () => Parser<unknown>
}

/**
* This is a utility function to simplify the creation of a self-contained grammar.
*
* Similarly to `defer`, this allows for the creation of mutually recursive parsers,
* but lets you define all of the component parsers within a single call.
*
* The function takes an object with parser initializers, and returns an object with
* all of those parsers initialized. Within the parser initializers, use `this` to
* reference other initialized parsers, as in the example below.
*
* The properties of the resulting object are just regular parsers - you can freely
* destructure these, pass them around individually, or compose them with other
* grammars, parsers or combinators as needed.
*
* @example
*
* ```typescript
* interface NumberNode {
* type: 'number'
* value: number
* }
*
* interface ListNode {
* type: 'list'
* value: Array<NumberNode | ListNode>
* }
*
* const tupleGrammar = grammar({
* tupleNumber(): Parser<NumberNode> {
* return map(integer(), (value, span) => ({ type: 'number', span, value }))
* },
* tupleList(): Parser<ListNode> {
* return map(
* takeMid(
* string('('),
* sepBy(choice(this.tupleList, this.tupleNumber), string(',')),
* string(')')
* ),
* (value, span) => ({ type: 'list', span, value })
* )
* }
* })
*
* const result = run(tupleGrammar.tupleList).with('(1,2,(3,4))')
* ```
*/
export function grammar<T extends GrammarType>(init: GrammarInit<T>): Grammar<T> {
const grammar = {} as { [key: string]: Parser<unknown> }

for (const key in init) {
grammar[key] = {
// istanbul ignore next
parse() {
throw new Error(`internal error`) // this line should never execute
}
}
}

for (const key in init) {
grammar[key].parse = init[key].apply(grammar).parse
}

return grammar as Grammar<T>
}

1 comment on commit e8074be

@vercel
Copy link

@vercel vercel bot commented on e8074be Aug 22, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.