Skip to content

Commit

Permalink
Refactor tokenizer
Browse files Browse the repository at this point in the history
  • Loading branch information
kashin.aleksandr committed Dec 11, 2024
1 parent 593897b commit 6f02ada
Show file tree
Hide file tree
Showing 9 changed files with 209 additions and 160 deletions.
9 changes: 8 additions & 1 deletion cs/Markdown/MarkdownRenderer.cs
Original file line number Diff line number Diff line change
@@ -1,15 +1,22 @@
using Markdown.Render;
using Markdown.Tokenizer;
using Markdown.Tokenizer.Handlers;
using Markdown.Tokenizer.Nodes;
using Markdown.Tokenizer.Tags;

namespace Markdown;

public class MarkdownRenderer : IMarkdown
{
private readonly List<IHandler> handlers = new()
{
new HeaderHandler(),
new ItalicHandler(),
new BoldHandler(),
};
public string Render(string markdown)
{
var tokenizer = new MarkdownTokenizer();
var tokenizer = new MarkdownTokenizer(new HandlerManager(handlers), new TagProcessor());
var renderer = new HtmlRenderer();
var tokens = tokenizer.Tokenize(markdown);
var tree = ToTree(tokens);
Expand Down
4 changes: 3 additions & 1 deletion cs/Markdown/Tests/Tokenizer/BoldHandlerTests.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using FluentAssertions;
using Markdown.Tokenizer;
using Markdown.Tokenizer.Handlers;
using Markdown.Tokenizer.Tags;

namespace Markdown.Tests.Tokenizer;
Expand All @@ -10,7 +11,8 @@ public class BoldHandlerTests
[TestCaseSource(nameof(BoldTokenSource))]
public void BoldTokenizerTests((string input, Token[] tags) testCase)
{
var tokenizer = new MarkdownTokenizer();
var handlers = new List<IHandler>() { new HeaderHandler(), new ItalicHandler(), new BoldHandler() };
var tokenizer = new MarkdownTokenizer(new HandlerManager(handlers), new TagProcessor());
var res = tokenizer.Tokenize(testCase.input).ToArray();

for (var i = 0; i < testCase.tags.Length; i++)
Expand Down
4 changes: 3 additions & 1 deletion cs/Markdown/Tests/Tokenizer/HeaderHandlerTests.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using FluentAssertions;
using Markdown.Tokenizer;
using Markdown.Tokenizer.Handlers;
using Markdown.Tokenizer.Tags;

namespace Markdown.Tests.Tokenizer;
Expand All @@ -10,7 +11,8 @@ public class HeaderHandlerTests
[TestCaseSource(nameof(HeaderTokenSource))]
public void HeaderTokenizerTests((string input, Token[] tags) testCase)
{
var tokenizer = new MarkdownTokenizer();
var handlers = new List<IHandler>() { new HeaderHandler(), new ItalicHandler(), new BoldHandler() };
var tokenizer = new MarkdownTokenizer(new HandlerManager(handlers), new TagProcessor());
var res = tokenizer.Tokenize(testCase.input).ToArray();

for (var i = 0; i < testCase.tags.Length; i++)
Expand Down
4 changes: 3 additions & 1 deletion cs/Markdown/Tests/Tokenizer/ItalicHandlerTests.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using FluentAssertions;
using Markdown.Tokenizer;
using Markdown.Tokenizer.Handlers;
using Markdown.Tokenizer.Tags;


Expand All @@ -11,7 +12,8 @@ public class ItalicParserTests
[TestCaseSource(nameof(ItalicTokenSource))]
public void ItalicTokenizerTests((string input, Token[] tags) testCase)
{
var tokenizer = new MarkdownTokenizer();
var handlers = new List<IHandler>() { new HeaderHandler(), new ItalicHandler(), new BoldHandler() };
var tokenizer = new MarkdownTokenizer(new HandlerManager(handlers), new TagProcessor());
var res = tokenizer.Tokenize(testCase.input).ToArray();

for (var i = 0; i < testCase.tags.Length; i++)
Expand Down
33 changes: 33 additions & 0 deletions cs/Markdown/Tokenizer/HandlerManager.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
using System.Text;
using Markdown.Tokenizer.Handlers;
using Markdown.Tokenizer.Tags;

namespace Markdown.Tokenizer;

public class HandlerManager(IEnumerable<IHandler> handlers) : IHandlerManager
{
private readonly List<IHandler> handlers = handlers.ToList();

public void TryHandle(TokenizerContext context, StringBuilder buffer, List<Token> tags, Stack<Token> tagStack)
{
foreach (var handler in handlers)
{
var tag = handler.ProceedSymbol(context);
if (tag != null)
{
if (buffer.Length > 0)
{
var token = new TextToken(buffer.ToString());
tags.Add(token);
buffer.Clear();
}

tags.Add(tag);
tagStack.Push(tag);
return;
}
}

buffer.Append(context.Current);
}
}
9 changes: 9 additions & 0 deletions cs/Markdown/Tokenizer/IHandlerManager.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
using System.Text;
using Markdown.Tokenizer.Tags;

namespace Markdown.Tokenizer;

public interface IHandlerManager
{
void TryHandle(TokenizerContext context, StringBuilder buffer, List<Token> tags, Stack<Token> tagStack);
}
8 changes: 8 additions & 0 deletions cs/Markdown/Tokenizer/ITagProcessor.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
using Markdown.Tokenizer.Tags;

namespace Markdown.Tokenizer;

public interface ITagProcessor
{
void Process(List<Token> tags, Stack<Token> tagStack);
}
184 changes: 28 additions & 156 deletions cs/Markdown/Tokenizer/MarkdownTokenizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,192 +5,64 @@

namespace Markdown.Tokenizer;

public class MarkdownTokenizer : ITokenizer
public class MarkdownTokenizer(IHandlerManager handlerManager, ITagProcessor tagProcessor) : ITokenizer
{
private readonly StringBuilder buffer = new();
private List<Token> tags = new();
private readonly Stack<Token> tagStack = new();
private readonly List<IHandler> handlers = new()
{
new HeaderHandler(),
new ItalicHandler(),
new BoldHandler(),
};

public List<Token> Tokenize(string text)
{
var context = new TokenizerContext(text);
while (!context.IsEnd)
{
if (context.Current == '\n')
{
FlushBuffer();
var token = new NewLineToken();
tags.Add(token);
context.Advance();
continue;
}
if (context.Current == ' ')
{
if (buffer.Length > 0)
{
tags.Add(new TextToken(buffer.ToString()));
buffer.Clear();
}
buffer.Append(context.Current);
context.Advance();
continue;
}
if (context.Current == '\\')
{
FlushBuffer();
if(TryProceedSpecialSymbol(context)) continue;

tags.Add(new SlashToken());
context.Advance();
continue;
}

bool flag = false;
foreach (var handler in handlers)
{
var tag = handler.ProceedSymbol(context);
if (tag != null)
{
if (buffer.Length > 0)
{
var token = new TextToken(buffer.ToString());
tags.Add(token);
buffer.Clear();
}

tags.Add(tag);
tagStack.Push(tag);
flag = true;
break;
}
}

if (flag == false)
{
buffer.Append(context.Current);
}
handlerManager.TryHandle(context, buffer, tags, tagStack);

context.Advance();
}

FlushBuffer();
ProceedEscaped();
ProceedInWords();
ProceedTags();

tagProcessor.Process(tags, tagStack);

return tags;
}

private void ProceedInWords()
private bool TryProceedSpecialSymbol(TokenizerContext context)
{
for (var i = 0; i < tags.Count; i++)
switch (context.Current)
{
var current = tags[i];
if (current.TagStatus == TagStatus.InWord)
case '\n':
{
if (i - 2 >= 0)
{
if (tags[i - 1].TokenType == TokenType.String
&& tags[i - 2].TagStatus == TagStatus.Open)
{
current.TagStatus = TagStatus.Closed;
}
}
FlushBuffer();
var token = new NewLineToken();
tags.Add(token);
context.Advance();

if (i + 2 < tags.Count)
{
if (tags[i + 1].TokenType == TokenType.String)
{
if (tags[i + 2].TagStatus == TagStatus.Closed)
{
current.TagStatus = TagStatus.Open;
}
else if (tags[i + 2].TagStatus == TagStatus.InWord)
{
current.TagStatus = TagStatus.Open;
tags[i + 2].TagStatus = TagStatus.Closed;
}
}
}
return true;
}
}
}

private void ProceedEscaped()
{
for (var i = 0; i < tags.Count - 1; i++)
{
var current = tags[i];
var next = tags[i + 1];
if (current.TokenType is TokenType.Slash && current.TagStatus != TagStatus.Broken)
case ' ':
{
if (next is { TokenType: TokenType.Slash })
{
current.TagStatus = TagStatus.Escaped;
next.TagStatus = TagStatus.Broken;
}
else if (next is { TagStatus: TagStatus.Open or TagStatus.Closed or TagStatus.Single })
if (buffer.Length > 0)
{
next.TagStatus = TagStatus.Broken;
current.TagStatus = TagStatus.Escaped;
tags.Add(new TextToken(buffer.ToString()));
buffer.Clear();
}
}
}

tags = tags.Where(t => t.TagStatus != TagStatus.Escaped).ToList();
}

private void ProceedTags()
{
var tempStack = new Stack<Token>();

while (tagStack.Count > 0)
{
var current = tagStack.Pop();

if (current.TagStatus != TagStatus.Broken && current.TagStatus != TagStatus.Single)
{
if (tempStack.Count > 0)
{
var previousTag = tempStack.Peek();
buffer.Append(context.Current);
context.Advance();

if (previousTag.TokenType == current.TokenType)
{
if (previousTag.TagStatus == TagStatus.Closed && current.TagStatus == TagStatus.Open)
{
tempStack.Pop();
}
else
{
tempStack.Push(current);
}
}
else
{
if (current.TokenType == TokenType.Bold && previousTag.TokenType == TokenType.Italic)
{
current.TagStatus = TagStatus.Broken;
}
else
{
tempStack.Push(current);
}
}
}
else
{
tempStack.Push(current);
}
return true;
}
}
case '\\':
FlushBuffer();
tags.Add(new SlashToken());
context.Advance();

while (tempStack.Count > 0)
{
tempStack.Pop().TagStatus = TagStatus.Broken;
return true;
default:
return false;
}
}

Expand Down
Loading

0 comments on commit 6f02ada

Please sign in to comment.