-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
created base tokenzior functionality
- Loading branch information
1 parent
b0540c3
commit d1470f7
Showing
5 changed files
with
168 additions
and
17 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,36 @@ | ||
// https://github.com/EthanThatOneKid/fart/blob/main/lib/tokenize/tokenize.test.ts | ||
import { T } from "./t.ts"; | ||
import { Token } from "./token.ts"; | ||
import { tokenize } from "./tokenize.ts"; | ||
import { assertEquals } from "../../deps/std/testing.ts"; | ||
// import { LEXICON, Lexicon } from "./lexicon.ts"; | ||
|
||
Deno.test("yields no tokens given an empty string", () => { | ||
const input = ""; | ||
const expectation: Token[] = []; | ||
const reality = [...tokenize(input)]; | ||
assertEquals(expectation, reality); | ||
}); | ||
|
||
Deno.test("yields a single token `type`", () => { | ||
const input = "type"; | ||
const expectation = [T.type(1, 1)]; | ||
const reality = [...tokenize(input)]; | ||
assertEquals(expectation, reality); | ||
}); | ||
|
||
Deno.test("yields a full `type` definition", () => { | ||
const input = `type Example { | ||
testProperty: string | ||
}`; | ||
const expectation = [ | ||
T.type(1, 1), | ||
T.id("Example", 1, 6), | ||
T.nest(1, 14), | ||
T.id("testProperty", 2, 3), | ||
T.setter_1(2, 15), | ||
T.id("string", 2, 17), | ||
T.denest(3, 1), | ||
]; | ||
const reality = [...tokenize(input)]; | ||
assertEquals(expectation, reality); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,109 @@ | ||
// https://github.com/EthanThatOneKid/fart/blob/main/lib/tokenize/tokenize.ts | ||
import { LEXICON, Lexicon } from "./lexicon.ts"; | ||
import { Token } from "./token.ts"; | ||
import { findInLexicon } from "./utils.ts"; | ||
|
||
/** | ||
* Object used to memoize the process of properly tokenizing | ||
* Fart syntax. | ||
*/ | ||
interface TokenizationState { | ||
char: null | string; | ||
prevChar: null | string; | ||
substr: string; | ||
prevSubstr: string; | ||
line: number; | ||
column: number; | ||
yieldingChar: boolean; // if true, yields character as token at end of iteration | ||
yieldingSubstr: boolean; // if true, yields substring as token at end of iteration | ||
breakingLine: boolean; // if true, updates line and column counts at end of iteration | ||
} | ||
|
||
type FartTokenGenerator = Generator<Token, undefined, undefined | Token>; | ||
|
||
const INITIAL_TOKENIZATION_STATE: Readonly<TokenizationState> = Object.freeze({ | ||
char: null, | ||
prevChar: null, | ||
substr: "", | ||
prevSubstr: "", | ||
line: 1, | ||
column: 1, | ||
yieldingChar: false, | ||
yieldingSubstr: false, | ||
breakingLine: false, | ||
}); | ||
|
||
export function* tokenize( | ||
input: string, | ||
lex: ReadonlyMap<Lexicon, string | string[] | null> = LEXICON, | ||
): FartTokenGenerator { | ||
const memo = { ...INITIAL_TOKENIZATION_STATE }; | ||
|
||
while (input.length > 0) { | ||
memo.char = input[0]; | ||
memo.yieldingChar = INITIAL_TOKENIZATION_STATE.yieldingChar; | ||
memo.yieldingSubstr = INITIAL_TOKENIZATION_STATE.yieldingSubstr; | ||
memo.breakingLine = INITIAL_TOKENIZATION_STATE.breakingLine; | ||
|
||
switch (findInLexicon(memo.char, lex)) { | ||
// when a line break occurs, increment the line count, set column back to initial, | ||
// and the current substring should become a token. | ||
case Lexicon.EOF: { | ||
memo.breakingLine = true; | ||
memo.yieldingSubstr = true; | ||
break; | ||
} | ||
case Lexicon.StructOpener: | ||
case Lexicon.StructCloser: | ||
case Lexicon.TupleOpener: | ||
case Lexicon.TupleCloser: | ||
case Lexicon.PropertyDefiner: { | ||
memo.yieldingChar = true; | ||
memo.yieldingSubstr = true; | ||
break; | ||
} | ||
case Lexicon.PropertyOptionalMarker: | ||
case Lexicon.Whitespace: { | ||
memo.yieldingSubstr = true; | ||
break; | ||
} | ||
default: { | ||
memo.substr += memo.char; | ||
break; | ||
} | ||
} | ||
|
||
// yield and reset substring if substring is to be yielded | ||
if (memo.yieldingSubstr && memo.substr.length > 0) { | ||
yield new Token(memo.substr, memo.line, memo.column - memo.substr.length); | ||
memo.prevSubstr = memo.substr; | ||
memo.substr = INITIAL_TOKENIZATION_STATE.substr; | ||
} | ||
|
||
// if the current character is to be yielded, it must be yielded | ||
// _after_ the substring. | ||
if (memo.yieldingChar && memo.char !== null) { | ||
yield new Token(memo.char, memo.line, memo.column); | ||
} | ||
|
||
// when a line is broken, set the column count to it's initial | ||
// value and increment the line count by one. | ||
if (memo.breakingLine) { | ||
memo.column = INITIAL_TOKENIZATION_STATE.column - 1; | ||
memo.line++; | ||
} | ||
|
||
// column count is incremented per iteration | ||
memo.column++; | ||
|
||
// current character is discarded but set as previous. | ||
memo.prevChar = memo.char; | ||
input = input.slice(1); | ||
} | ||
|
||
// yield substring if one is left unresolved | ||
if (memo.substr.length > 0) { | ||
yield new Token(memo.substr, memo.line, memo.column - memo.substr.length); | ||
} | ||
|
||
return; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
d1470f7
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Failed to deploy: