diff --git a/benchmarks/src/json/index.ts b/benchmarks/src/json/index.ts index adb672d..f1551b0 100644 --- a/benchmarks/src/json/index.ts +++ b/benchmarks/src/json/index.ts @@ -4,12 +4,14 @@ import { handlers } from '../@helpers' import { SAMPLE } from './@sample' import { parse as parseParjs } from './parjs' -import { parse as parseSigma } from './sigma' +import { parse as parseSigmaDefer } from './sigma' +import { parse as parseSigmaGrammar } from './sigma-grammar' suite( 'JSON :: sigma vs parjs', - add('sigma', () => parseSigma(SAMPLE)), + add('sigma:defer', () => parseSigmaDefer(SAMPLE)), + add('sigma:grammar', () => parseSigmaGrammar(SAMPLE)), add('parjs', () => parseParjs(SAMPLE)), ...handlers diff --git a/benchmarks/src/json/sigma-grammar.ts b/benchmarks/src/json/sigma-grammar.ts new file mode 100644 index 0000000..17bfa37 --- /dev/null +++ b/benchmarks/src/json/sigma-grammar.ts @@ -0,0 +1,169 @@ +import { choice, sepBy, optional, map, takeMid, sequence } from '@combinators' +import { Parser } from '@lib' +import { float, grammar, integer, regexp, run, string, whitespace } from '@parsers' + +import * as Ast from './ast' + +/* Tokens. */ + +const Keywords = { + True: 'true', + False: 'false', + Null: 'null' +} as const + +const Terminals = { + OpenBrace: '{', + CloseBrace: '}', + OpenSquare: '[', + CloseSquare: ']', + Colon: ':', + Comma: ',' +} as const + +/* Mapping functions to turn parsed string values into AST nodes. */ + +function toObject(values: Array): Ast.JsonObject { + return { + type: 'object', + values + } +} + +function toObjectProp(tuple: [Ast.JsonString, string, Ast.JsonValue]): Ast.JsonObjectProp { + const [{ value: name }, _, value] = tuple + + return { + type: 'property', + name, + value + } +} + +function toArray(values: Array): Ast.JsonArray { + return { + type: 'array', + values + } +} + +function toString(text: string): Ast.JsonString { + return { + type: 'string', + value: text.slice(1, -1) + } +} + +function toNumber(value: number): Ast.JsonNumber { + return { + type: 'number', + value: value + } +} + +function toBoolean(kind: string): Ast.JsonBoolean { + switch (kind) { + case Keywords.True: { + return { + type: 'boolean', + value: true + } + } + + case Keywords.False: { + return { + type: 'boolean', + value: false + } + } + + default: { + return { + type: 'boolean', + value: false + } + } + } +} + +function toNull(): Ast.JsonNull { + return { + type: 'null', + value: null + } +} + +/* Parsers. */ + +// Non-Terminals. +const NumberLiteral = choice(float(), integer()) + +const Space = optional(whitespace()) +const StringLiteral = regexp(/"([^"]|\\.)*"/g, 'string') + +// Utility. +const match = (s: string) => takeMid(Space, string(s), Space) + +// Grammar. +const Json = grammar({ + Root(): Parser { + return choice(this.Object, this.Array) + }, + Object(): Parser { + return map( + takeMid( + match(Terminals.OpenBrace), + sepBy(this.ObjectProp, match(Terminals.Comma)), + match(Terminals.CloseBrace) + ), + toObject + ) + }, + ObjectProp(): Parser { + return map(sequence(this.String, match(Terminals.Colon), this.Value), toObjectProp) + }, + Array(): Parser { + return map( + takeMid( + match(Terminals.OpenSquare), + sepBy(this.Value, match(Terminals.Comma)), + match(Terminals.CloseSquare) + ), + toArray + ) + }, + String(): Parser { + return map(StringLiteral, toString) + }, + Number(): Parser { + return map(NumberLiteral, toNumber) + }, + Boolean(): Parser { + return map(choice(match(Keywords.True), match(Keywords.False)), toBoolean) + }, + Null(): Parser { + return map(match(Keywords.Null), toNull) + }, + Value(): Parser { + return choice(this.Object, this.Array, this.String, this.Number, this.Boolean, this.Null) + } +}) + +/* Wrapper for bench runner. */ + +export function parse(text: string): Ast.JsonRoot { + const result = run(Json.Root).with(text) + + switch (result.isOk) { + case true: { + return result.value + } + + case false: { + return { + type: 'object', + values: [] + } + } + } +} diff --git a/docs/docs/content/core/grammar.md b/docs/docs/content/core/grammar.md new file mode 100644 index 0000000..7816cf5 --- /dev/null +++ b/docs/docs/content/core/grammar.md @@ -0,0 +1,70 @@ +--- +title: 'grammar' +kind: 'core' +description: 'This simplifies the creation of a self-contained grammar.' +--- + +# grammar + +## Description + +`grammar` is used to create a self-contained grammar. + +Similarly to [defer], this allows for the creation of mutually recursive parsers, +but lets you define all of the component parsers within a single call. + +The function takes an object with parser initializers, and returns an object with +all of those parsers initialized. Within the parser initializers, use `this` to +reference other initialized parsers, as in the example below. + +The properties of the resulting object are just regular parsers - you can freely +destructure these, pass them around individually, or compose them with other +grammars, parsers or combinators as needed. + +## Usage + +Here is an example of a simple grammar that recursively matches `ab` sequences: + +```ts +const ab = grammar({ + a(): Parser { + return map( + sequence( + string('a'), + optional(this.b) + ), + ([a, b]) => a + (b ?? '') + ) + }, + b(): Parser { + return map( + sequence( + string('b'), + choice(this.a, this.b) + ), + ([b, a]) => b + a + ) + } +}) +``` + +The individual properties are just regular parsers: + +```ts +const { a } = ab + +console.log(run(a).with('abba')) +``` + +::: tip Success +```ts +{ + isOk: true, + span: [0, 4], + pos: 4, + value: 'abba' +} +``` +::: + +[defer]: ../parsers/defer diff --git a/src/__tests__/core/grammar.spec.ts b/src/__tests__/core/grammar.spec.ts new file mode 100644 index 0000000..6dc3072 --- /dev/null +++ b/src/__tests__/core/grammar.spec.ts @@ -0,0 +1,23 @@ +import { grammar } from '@core' +import { choice, map, optional, sequence, string } from '@lib' +import { describe, should, it, run, result } from '@testing' + +describe('grammar', () => { + it('should create self-contained grammar', () => { + const ab = grammar({ + a() { + return map(sequence(string('a'), optional(this.b)), ([a, b]) => a + (b ?? '')) + }, + b() { + return map(sequence(string('b'), choice(this.a, this.b)), ([b, a]) => b + a) + } + }) + + const value = 'abba' + + const actual = run(ab.a, value) + const expected = result(true, value) + + should.matchState(actual, expected) + }) +}) diff --git a/src/core.ts b/src/core.ts index 2db6579..506bf93 100644 --- a/src/core.ts +++ b/src/core.ts @@ -1,2 +1,3 @@ export * from '@core/run' export * from '@core/tryRun' +export * from '@core/grammar' diff --git a/src/core/grammar.ts b/src/core/grammar.ts new file mode 100644 index 0000000..167f335 --- /dev/null +++ b/src/core/grammar.ts @@ -0,0 +1,86 @@ +import type { Parser } from '@lib/types' + +/** + * This type extracts the return-types from the parser initializers. + */ +export type Grammar = { + [P in keyof T]: T[P] extends () => unknown ? ReturnType : never +} + +/** + * This type injects the initialized parser types into `this`, allowing for + * type-safe self-contained and mutually recursive grammars. + */ +export type GrammarInit = T & ThisType> + +/** + * This defines the input to the `grammar` function - the parser initializers. + */ +export type GrammarType = { + [name: string]: () => Parser +} + +/** + * This is a utility function to simplify the creation of a self-contained grammar. + * + * Similarly to `defer`, this allows for the creation of mutually recursive parsers, + * but lets you define all of the component parsers within a single call. + * + * The function takes an object with parser initializers, and returns an object with + * all of those parsers initialized. Within the parser initializers, use `this` to + * reference other initialized parsers, as in the example below. + * + * The properties of the resulting object are just regular parsers - you can freely + * destructure these, pass them around individually, or compose them with other + * grammars, parsers or combinators as needed. + * + * @example + * + * ```typescript + * interface NumberNode { + * type: 'number' + * value: number + * } + * + * interface ListNode { + * type: 'list' + * value: Array + * } + * + * const tupleGrammar = grammar({ + * tupleNumber(): Parser { + * return map(integer(), (value, span) => ({ type: 'number', span, value })) + * }, + * tupleList(): Parser { + * return map( + * takeMid( + * string('('), + * sepBy(choice(this.tupleList, this.tupleNumber), string(',')), + * string(')') + * ), + * (value, span) => ({ type: 'list', span, value }) + * ) + * } + * }) + * + * const result = run(tupleGrammar.tupleList).with('(1,2,(3,4))') + * ``` + */ +export function grammar(init: GrammarInit): Grammar { + const grammar = {} as { [key: string]: Parser } + + for (const key in init) { + grammar[key] = { + // istanbul ignore next + parse() { + throw new Error(`internal error`) // this line should never execute + } + } + } + + for (const key in init) { + grammar[key].parse = init[key].apply(grammar).parse + } + + return grammar as Grammar +}