Skip to content

Commit

Permalink
created base tokenzior functionality
Browse files Browse the repository at this point in the history
  • Loading branch information
EthanThatOneKid committed Nov 24, 2021
1 parent b0540c3 commit d1470f7
Show file tree
Hide file tree
Showing 5 changed files with 168 additions and 17 deletions.
23 changes: 14 additions & 9 deletions lib/tokenize/lexicon.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,21 @@ export enum Lexicon {
Comment,
CommentOpener,
CommentCloser,
Whitespace,
Unknown,
EOF,
}

export const LEXICON = new Map<Lexicon, string | string[] | null>([
export const LEXICON: ReadonlyMap<Lexicon, string | string[] | null> = new Map<
Lexicon,
string | string[] | null
>([
[Lexicon.Identifier, null],
[Lexicon.StructOpener, "{"],
[Lexicon.StructCloser, "}"],
[Lexicon.TupleOpener, "("],
[Lexicon.TupleCloser, ")"],
[Lexicon.TypeDefiner, ["type", "struct", "interface"]],
[Lexicon.TypeDefiner, ["type", "spec"]],
[Lexicon.PropertyDefiner, ":"],
[Lexicon.PropertyOptionalMarker, "?"],
[Lexicon.PropertyOptionalDefiner, "?:"],
Expand All @@ -34,17 +38,18 @@ export const LEXICON = new Map<Lexicon, string | string[] | null>([
[Lexicon.Comment, [";", "//"]],
[Lexicon.CommentOpener, "/*"],
[Lexicon.CommentCloser, "*/"],
[Lexicon.Whitespace, " "],
[Lexicon.Unknown, null],
[Lexicon.EOF, null],
[Lexicon.EOF, "\n"],
]);

// freezing LEXICON map into place, courtesy of https://stackoverflow.com/a/35776333
LEXICON.set = function (key) {
throw new Error("Can't add property " + key + ", map is not extensible");
// force-freezing LEXICON map into place, courtesy of https://stackoverflow.com/a/35776333
(LEXICON as Map<unknown, unknown>).set = function (key) {
throw new Error(`Can't add property ${key}, map is not extensible`);
};
LEXICON.delete = function (key) {
throw new Error("Can't delete property " + key + ", map is frozen");
(LEXICON as Map<unknown, unknown>).delete = function (key) {
throw new Error(`Can't delete property ${key}, map is frozen`);
};
LEXICON.clear = function () {
(LEXICON as Map<unknown, unknown>).clear = function () {
throw new Error("Can't clear map, map is frozen");
};
4 changes: 2 additions & 2 deletions lib/tokenize/token.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { Lexicon } from "./lexicon.ts";
import { LEXICON, Lexicon } from "./lexicon.ts";
import {
checkIsIdentifier,
checkIsTextLiteral,
Expand Down Expand Up @@ -38,7 +38,7 @@ export class Token {
}

static getKindOf(raw: string): Lexicon {
const matchingKind = findInLexicon(raw);
const matchingKind = findInLexicon(raw, LEXICON);
if (matchingKind !== null) return matchingKind;
if (checkIsIdentifier(raw)) return Lexicon.Identifier;
if (checkIsTextLiteral(raw)) return Lexicon.TextLiteral;
Expand Down
37 changes: 36 additions & 1 deletion lib/tokenize/tokenize.test.ts
Original file line number Diff line number Diff line change
@@ -1 +1,36 @@
// https://github.com/EthanThatOneKid/fart/blob/main/lib/tokenize/tokenize.test.ts
import { T } from "./t.ts";
import { Token } from "./token.ts";
import { tokenize } from "./tokenize.ts";
import { assertEquals } from "../../deps/std/testing.ts";
// import { LEXICON, Lexicon } from "./lexicon.ts";

Deno.test("yields no tokens given an empty string", () => {
const input = "";
const expectation: Token[] = [];
const reality = [...tokenize(input)];
assertEquals(expectation, reality);
});

Deno.test("yields a single token `type`", () => {
const input = "type";
const expectation = [T.type(1, 1)];
const reality = [...tokenize(input)];
assertEquals(expectation, reality);
});

Deno.test("yields a full `type` definition", () => {
const input = `type Example {
testProperty: string
}`;
const expectation = [
T.type(1, 1),
T.id("Example", 1, 6),
T.nest(1, 14),
T.id("testProperty", 2, 3),
T.setter_1(2, 15),
T.id("string", 2, 17),
T.denest(3, 1),
];
const reality = [...tokenize(input)];
assertEquals(expectation, reality);
});
110 changes: 109 additions & 1 deletion lib/tokenize/tokenize.ts
Original file line number Diff line number Diff line change
@@ -1 +1,109 @@
// https://github.com/EthanThatOneKid/fart/blob/main/lib/tokenize/tokenize.ts
import { LEXICON, Lexicon } from "./lexicon.ts";
import { Token } from "./token.ts";
import { findInLexicon } from "./utils.ts";

/**
* Object used to memoize the process of properly tokenizing
* Fart syntax.
*/
interface TokenizationState {
char: null | string;
prevChar: null | string;
substr: string;
prevSubstr: string;
line: number;
column: number;
yieldingChar: boolean; // if true, yields character as token at end of iteration
yieldingSubstr: boolean; // if true, yields substring as token at end of iteration
breakingLine: boolean; // if true, updates line and column counts at end of iteration
}

type FartTokenGenerator = Generator<Token, undefined, undefined | Token>;

const INITIAL_TOKENIZATION_STATE: Readonly<TokenizationState> = Object.freeze({
char: null,
prevChar: null,
substr: "",
prevSubstr: "",
line: 1,
column: 1,
yieldingChar: false,
yieldingSubstr: false,
breakingLine: false,
});

export function* tokenize(
input: string,
lex: ReadonlyMap<Lexicon, string | string[] | null> = LEXICON,
): FartTokenGenerator {
const memo = { ...INITIAL_TOKENIZATION_STATE };

while (input.length > 0) {
memo.char = input[0];
memo.yieldingChar = INITIAL_TOKENIZATION_STATE.yieldingChar;
memo.yieldingSubstr = INITIAL_TOKENIZATION_STATE.yieldingSubstr;
memo.breakingLine = INITIAL_TOKENIZATION_STATE.breakingLine;

switch (findInLexicon(memo.char, lex)) {
// when a line break occurs, increment the line count, set column back to initial,
// and the current substring should become a token.
case Lexicon.EOF: {
memo.breakingLine = true;
memo.yieldingSubstr = true;
break;
}
case Lexicon.StructOpener:
case Lexicon.StructCloser:
case Lexicon.TupleOpener:
case Lexicon.TupleCloser:
case Lexicon.PropertyDefiner: {
memo.yieldingChar = true;
memo.yieldingSubstr = true;
break;
}
case Lexicon.PropertyOptionalMarker:
case Lexicon.Whitespace: {
memo.yieldingSubstr = true;
break;
}
default: {
memo.substr += memo.char;
break;
}
}

// yield and reset substring if substring is to be yielded
if (memo.yieldingSubstr && memo.substr.length > 0) {
yield new Token(memo.substr, memo.line, memo.column - memo.substr.length);
memo.prevSubstr = memo.substr;
memo.substr = INITIAL_TOKENIZATION_STATE.substr;
}

// if the current character is to be yielded, it must be yielded
// _after_ the substring.
if (memo.yieldingChar && memo.char !== null) {
yield new Token(memo.char, memo.line, memo.column);
}

// when a line is broken, set the column count to it's initial
// value and increment the line count by one.
if (memo.breakingLine) {
memo.column = INITIAL_TOKENIZATION_STATE.column - 1;
memo.line++;
}

// column count is incremented per iteration
memo.column++;

// current character is discarded but set as previous.
memo.prevChar = memo.char;
input = input.slice(1);
}

// yield substring if one is left unresolved
if (memo.substr.length > 0) {
yield new Token(memo.substr, memo.line, memo.column - memo.substr.length);
}

return;
}
11 changes: 7 additions & 4 deletions lib/tokenize/utils.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import { LEXICON, Lexicon } from "./lexicon.ts";
import { Lexicon } from "./lexicon.ts";

export const findInLexicon = (raw: string): Lexicon | null => {
for (const [kind, value] of LEXICON) {
if ((Array.isArray(value) && value.includes(raw) || (raw === value))) {
export const findInLexicon = (
raw: string,
lex: ReadonlyMap<Lexicon, string | string[] | null>,
): Lexicon | null => {
for (const [kind, value] of lex) {
if (Array.isArray(value) && value.includes(raw) || (raw === value)) {
return kind;
}
}
Expand Down

1 comment on commit d1470f7

@deno-deploy
Copy link

@deno-deploy deno-deploy bot commented on d1470f7 Nov 24, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Failed to deploy:

failed to fetch 'https://raw.githubusercontent.com/EthanThatOneKid/fart/d1470f788c81d2c657e80659826631fe8674bfd1/std/server/worker.ts': HTTP status client error (404 Not Found) for url (https://raw.githubusercontent.com/EthanThatOneKid/fart/d1470f788c81d2c657e80659826631fe8674bfd1/std/server/worker.ts)

Please sign in to comment.