From 593897bd6f9ca05c290abb1135f551f0be304ae4 Mon Sep 17 00:00:00 2001 From: "kashin.aleksandr" Date: Wed, 11 Dec 2024 16:07:07 +0500 Subject: [PATCH] =?UTF-8?q?=D0=A0=D0=B0=D0=B1=D0=BE=D1=87=D0=B5=D0=B5=20?= =?UTF-8?q?=D1=80=D0=B5=D1=88=D0=B5=D0=BD=D0=B8=D0=B5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cs/Markdown/{IMd.cs => IMarkdown.cs} | 2 +- cs/Markdown/Markdown.csproj | 11 + cs/Markdown/MarkdownRenderer.cs | 94 ++++++++ cs/Markdown/Md.cs | 15 -- cs/Markdown/Render/HtmlRenderer.cs | 33 ++- cs/Markdown/Render/ITokenRenderer.cs | 4 +- cs/Markdown/Render/Renders/BoldRender.cs | 11 - cs/Markdown/Render/Renders/HeadRender.cs | 11 - cs/Markdown/Render/Renders/IItalicRender.cs | 11 - cs/Markdown/Render/Renders/ITokenRender.cs | 8 - cs/Markdown/Render/Renders/ItemListRender.cs | 11 - cs/Markdown/Render/Renders/TextRender.cs | 11 - cs/Markdown/Tests/Markdown/MarkdownTests.cs | 93 ++++++++ ...ts.SimpleHeader_Render_Verify.received.txt | 1 + ...ts.SimpleHeader_Render_Verify.verified.txt | 1 + ...ests.TwoHeaders_Render_Verify.received.txt | 1 + ...ests.TwoHeaders_Render_Verify.verified.txt | 1 + .../Tests/Tokenizer/BoldHandlerTests.cs | 40 ++++ .../Tests/Tokenizer/HeaderHandlerTests.cs | 37 ++++ .../Tests/Tokenizer/ItalicHandlerTests.cs | 63 ++++++ cs/Markdown/Tokenizer/Handlers/BoldHandler.cs | 31 +++ .../Tokenizer/Handlers/HeaderHandler.cs | 22 ++ cs/Markdown/Tokenizer/Handlers/IHandler.cs | 8 + .../Tokenizer/Handlers/ItalicHandler.cs | 35 +++ cs/Markdown/Tokenizer/ITokenizer.cs | 4 +- cs/Markdown/Tokenizer/MarkdownTokenizer.cs | 202 ++++++++++++++++-- cs/Markdown/Tokenizer/Nodes/BoldNode.cs | 6 + cs/Markdown/Tokenizer/Nodes/HeaderNode.cs | 5 + cs/Markdown/Tokenizer/Nodes/ItalicNode.cs | 6 + cs/Markdown/Tokenizer/Nodes/MainNode.cs | 6 + cs/Markdown/Tokenizer/Nodes/Node.cs | 8 + cs/Markdown/Tokenizer/Nodes/NodeType.cs | 6 + cs/Markdown/Tokenizer/Nodes/TextNode.cs | 6 + cs/Markdown/Tokenizer/Parsers/BoldParser.cs | 9 - cs/Markdown/Tokenizer/Parsers/HeadParser.cs | 9 - cs/Markdown/Tokenizer/Parsers/ITokenParser.cs | 6 - cs/Markdown/Tokenizer/Parsers/ItalicParser.cs | 9 - .../Tokenizer/Parsers/ListItemParser.cs | 9 - cs/Markdown/Tokenizer/Parsers/TextParser.cs | 9 - cs/Markdown/Tokenizer/Tags/BoldTag.cs | 12 ++ cs/Markdown/Tokenizer/Tags/HeaderTag.cs | 12 ++ cs/Markdown/Tokenizer/Tags/ItalicTag.cs | 12 ++ cs/Markdown/Tokenizer/Tags/NewLineToken.cs | 11 + cs/Markdown/Tokenizer/Tags/SlashToken.cs | 11 + cs/Markdown/Tokenizer/Tags/TagStatus.cs | 12 ++ cs/Markdown/Tokenizer/Tags/TextToken.cs | 11 + cs/Markdown/Tokenizer/Tags/Token.cs | 8 + cs/Markdown/Tokenizer/Tags/TokenType.cs | 11 + cs/Markdown/Tokenizer/Token.cs | 16 -- cs/Markdown/Tokenizer/TokenType.cs | 10 - cs/Markdown/Tokenizer/TokenizerContext.cs | 28 +-- cs/NewMarkdown/NewMarkdown.csproj | 16 ++ cs/NewMarkdown/Node.cs | 14 ++ cs/NewMarkdown/NodeType.cs | 9 + cs/NewMarkdown/Tag.cs | 6 + cs/NewMarkdown/TextReader.cs | 17 ++ cs/NewMarkdown/Tokenizer.cs | 30 +++ cs/NewMarkdown/TokenizerTest.cs | 17 ++ 58 files changed, 903 insertions(+), 215 deletions(-) rename cs/Markdown/{IMd.cs => IMarkdown.cs} (68%) create mode 100644 cs/Markdown/MarkdownRenderer.cs delete mode 100644 cs/Markdown/Md.cs delete mode 100644 cs/Markdown/Render/Renders/BoldRender.cs delete mode 100644 cs/Markdown/Render/Renders/HeadRender.cs delete mode 100644 cs/Markdown/Render/Renders/IItalicRender.cs delete mode 100644 cs/Markdown/Render/Renders/ITokenRender.cs delete mode 100644 cs/Markdown/Render/Renders/ItemListRender.cs delete mode 100644 cs/Markdown/Render/Renders/TextRender.cs create mode 100644 cs/Markdown/Tests/Markdown/MarkdownTests.cs create mode 100644 cs/Markdown/Tests/Markdown/snapshots/MarkdownTests.SimpleHeader_Render_Verify.received.txt create mode 100644 cs/Markdown/Tests/Markdown/snapshots/MarkdownTests.SimpleHeader_Render_Verify.verified.txt create mode 100644 cs/Markdown/Tests/Markdown/snapshots/MarkdownTests.TwoHeaders_Render_Verify.received.txt create mode 100644 cs/Markdown/Tests/Markdown/snapshots/MarkdownTests.TwoHeaders_Render_Verify.verified.txt create mode 100644 cs/Markdown/Tests/Tokenizer/BoldHandlerTests.cs create mode 100644 cs/Markdown/Tests/Tokenizer/HeaderHandlerTests.cs create mode 100644 cs/Markdown/Tests/Tokenizer/ItalicHandlerTests.cs create mode 100644 cs/Markdown/Tokenizer/Handlers/BoldHandler.cs create mode 100644 cs/Markdown/Tokenizer/Handlers/HeaderHandler.cs create mode 100644 cs/Markdown/Tokenizer/Handlers/IHandler.cs create mode 100644 cs/Markdown/Tokenizer/Handlers/ItalicHandler.cs create mode 100644 cs/Markdown/Tokenizer/Nodes/BoldNode.cs create mode 100644 cs/Markdown/Tokenizer/Nodes/HeaderNode.cs create mode 100644 cs/Markdown/Tokenizer/Nodes/ItalicNode.cs create mode 100644 cs/Markdown/Tokenizer/Nodes/MainNode.cs create mode 100644 cs/Markdown/Tokenizer/Nodes/Node.cs create mode 100644 cs/Markdown/Tokenizer/Nodes/NodeType.cs create mode 100644 cs/Markdown/Tokenizer/Nodes/TextNode.cs delete mode 100644 cs/Markdown/Tokenizer/Parsers/BoldParser.cs delete mode 100644 cs/Markdown/Tokenizer/Parsers/HeadParser.cs delete mode 100644 cs/Markdown/Tokenizer/Parsers/ITokenParser.cs delete mode 100644 cs/Markdown/Tokenizer/Parsers/ItalicParser.cs delete mode 100644 cs/Markdown/Tokenizer/Parsers/ListItemParser.cs delete mode 100644 cs/Markdown/Tokenizer/Parsers/TextParser.cs create mode 100644 cs/Markdown/Tokenizer/Tags/BoldTag.cs create mode 100644 cs/Markdown/Tokenizer/Tags/HeaderTag.cs create mode 100644 cs/Markdown/Tokenizer/Tags/ItalicTag.cs create mode 100644 cs/Markdown/Tokenizer/Tags/NewLineToken.cs create mode 100644 cs/Markdown/Tokenizer/Tags/SlashToken.cs create mode 100644 cs/Markdown/Tokenizer/Tags/TagStatus.cs create mode 100644 cs/Markdown/Tokenizer/Tags/TextToken.cs create mode 100644 cs/Markdown/Tokenizer/Tags/Token.cs create mode 100644 cs/Markdown/Tokenizer/Tags/TokenType.cs delete mode 100644 cs/Markdown/Tokenizer/Token.cs delete mode 100644 cs/Markdown/Tokenizer/TokenType.cs create mode 100644 cs/NewMarkdown/NewMarkdown.csproj create mode 100644 cs/NewMarkdown/Node.cs create mode 100644 cs/NewMarkdown/NodeType.cs create mode 100644 cs/NewMarkdown/Tag.cs create mode 100644 cs/NewMarkdown/TextReader.cs create mode 100644 cs/NewMarkdown/Tokenizer.cs create mode 100644 cs/NewMarkdown/TokenizerTest.cs diff --git a/cs/Markdown/IMd.cs b/cs/Markdown/IMarkdown.cs similarity index 68% rename from cs/Markdown/IMd.cs rename to cs/Markdown/IMarkdown.cs index 28e9118e6..7ac783395 100644 --- a/cs/Markdown/IMd.cs +++ b/cs/Markdown/IMarkdown.cs @@ -1,6 +1,6 @@ namespace Markdown; -public interface IMd +public interface IMarkdown { string Render(string markdown); } \ No newline at end of file diff --git a/cs/Markdown/Markdown.csproj b/cs/Markdown/Markdown.csproj index 20ebbe3c6..fbc0a9283 100644 --- a/cs/Markdown/Markdown.csproj +++ b/cs/Markdown/Markdown.csproj @@ -6,4 +6,15 @@ enable + + + + + + + + + + + diff --git a/cs/Markdown/MarkdownRenderer.cs b/cs/Markdown/MarkdownRenderer.cs new file mode 100644 index 000000000..0006e5ff3 --- /dev/null +++ b/cs/Markdown/MarkdownRenderer.cs @@ -0,0 +1,94 @@ +using Markdown.Render; +using Markdown.Tokenizer; +using Markdown.Tokenizer.Nodes; +using Markdown.Tokenizer.Tags; + +namespace Markdown; + +public class MarkdownRenderer : IMarkdown +{ + public string Render(string markdown) + { + var tokenizer = new MarkdownTokenizer(); + var renderer = new HtmlRenderer(); + var tokens = tokenizer.Tokenize(markdown); + var tree = ToTree(tokens); + return renderer.Render(tree); + } + + private Node ToTree(List tokens) + { + Node mainNode = new MainNode(); + Node currentNode = mainNode; + for (int i = 0; i < tokens.Count; i++) + { + if (tokens[i].TagStatus == TagStatus.Broken) + { + currentNode.Children.Add(new TextNode{Value = tokens[i].Value}); + continue; + } + + if (tokens[i] is ItalicTag tag) + { + if(tag.TagStatus == TagStatus.Open) + { + var node = new ItalicNode(); + currentNode.Children.Add(node); + node.Parent = currentNode; + currentNode = node; + continue; + } + + if (tag.TagStatus == TagStatus.Closed) + { + currentNode = currentNode.Parent; + continue; + } + } + + if (tokens[i] is BoldTag boldTag) + { + if(boldTag.TagStatus == TagStatus.Open) + { + var node = new BoldNode(); + currentNode.Children.Add(node); + node.Parent = currentNode; + currentNode = node; + continue; + } + + if (boldTag.TagStatus == TagStatus.Closed) + { + currentNode = currentNode.Parent; + continue; + } + } + + if (tokens[i] is HeaderTag) + { + var node = new HeaderNode(); + currentNode.Children.Add(node); + node.Parent = currentNode; + currentNode = node; + continue; + } + + if (tokens[i] is NewLineToken) + { + if (currentNode is HeaderNode) + { + currentNode = currentNode.Parent; + } + continue; + } + + if (tokens[i] is TextToken textToken) + { + currentNode.Children.Add(new TextNode { Value = textToken.Value }); + continue; + } + } + + return currentNode.Parent ?? currentNode; + } +} \ No newline at end of file diff --git a/cs/Markdown/Md.cs b/cs/Markdown/Md.cs deleted file mode 100644 index fe7fddf65..000000000 --- a/cs/Markdown/Md.cs +++ /dev/null @@ -1,15 +0,0 @@ -using Markdown.Render; -using Markdown.Tokenizer; - -namespace Markdown; - -public class Md : IMd -{ - private readonly ITokenizer tokenizer = new MarkdownTokenizer(); - private readonly ITokenRenderer renderer = new HtmlRenderer(); - - public string Render(string markdown) - { - return renderer.Render(tokenizer.Tokenize(markdown)); - } -} \ No newline at end of file diff --git a/cs/Markdown/Render/HtmlRenderer.cs b/cs/Markdown/Render/HtmlRenderer.cs index 3039ab64f..63d9d72ac 100644 --- a/cs/Markdown/Render/HtmlRenderer.cs +++ b/cs/Markdown/Render/HtmlRenderer.cs @@ -1,35 +1,28 @@ using System.Text; -using Markdown.Render.Renders; -using Markdown.Tokenizer; +using Markdown.Tokenizer.Nodes; namespace Markdown.Render; public class HtmlRenderer : ITokenRenderer { - private readonly Dictionary _renders = new() - { - { TokenType.Italic , new ItalicRender() }, - { TokenType.Bold , new BoldRender() }, - { TokenType.Header, new HeadRender() }, - { TokenType.Text, new TextRender() }, - { TokenType.ItemList, new ItemListRender() } - }; - - public string Render(List tokens) + public string Render(Node tokens) { var sb = new StringBuilder(); - foreach (var token in tokens) - { - sb.Append(Render(token)); - } + foreach (var token in tokens.Children) + sb.Append(RenderToken(token)); return sb.ToString(); } - private string Render(Token token) + private string? RenderToken(Node node) { - return _renders[token.Type].Render(token); + return node switch + { + TextNode textNode => textNode.Value, + HeaderNode => $"

{Render(node)}

", + ItalicNode => $"{Render(node)}", + BoldNode => $"{Render(node)}", + _ => throw new Exception($"Unknown token type: {node.GetType()}") + }; } - - } \ No newline at end of file diff --git a/cs/Markdown/Render/ITokenRenderer.cs b/cs/Markdown/Render/ITokenRenderer.cs index a103dcdb8..12d3d2928 100644 --- a/cs/Markdown/Render/ITokenRenderer.cs +++ b/cs/Markdown/Render/ITokenRenderer.cs @@ -1,8 +1,8 @@ -using Markdown.Tokenizer; +using Markdown.Tokenizer.Nodes; namespace Markdown.Render; public interface ITokenRenderer { - string Render(List tokens); + string Render(Node tokens); } \ No newline at end of file diff --git a/cs/Markdown/Render/Renders/BoldRender.cs b/cs/Markdown/Render/Renders/BoldRender.cs deleted file mode 100644 index 982dac8de..000000000 --- a/cs/Markdown/Render/Renders/BoldRender.cs +++ /dev/null @@ -1,11 +0,0 @@ -using Markdown.Tokenizer; - -namespace Markdown.Render.Renders; - -public class BoldRender : ITokenRender -{ - public string Render(Token token) - { - throw new NotImplementedException(); - } -} \ No newline at end of file diff --git a/cs/Markdown/Render/Renders/HeadRender.cs b/cs/Markdown/Render/Renders/HeadRender.cs deleted file mode 100644 index db25a44cc..000000000 --- a/cs/Markdown/Render/Renders/HeadRender.cs +++ /dev/null @@ -1,11 +0,0 @@ -using Markdown.Tokenizer; - -namespace Markdown.Render.Renders; - -public class HeadRender : ITokenRender -{ - public string Render(Token token) - { - throw new NotImplementedException(); - } -} \ No newline at end of file diff --git a/cs/Markdown/Render/Renders/IItalicRender.cs b/cs/Markdown/Render/Renders/IItalicRender.cs deleted file mode 100644 index f8df95237..000000000 --- a/cs/Markdown/Render/Renders/IItalicRender.cs +++ /dev/null @@ -1,11 +0,0 @@ -using Markdown.Tokenizer; - -namespace Markdown.Render.Renders; - -public class ItalicRender : ITokenRender -{ - public string Render(Token token) - { - throw new NotImplementedException(); - } -} \ No newline at end of file diff --git a/cs/Markdown/Render/Renders/ITokenRender.cs b/cs/Markdown/Render/Renders/ITokenRender.cs deleted file mode 100644 index e50543ae4..000000000 --- a/cs/Markdown/Render/Renders/ITokenRender.cs +++ /dev/null @@ -1,8 +0,0 @@ -using Markdown.Tokenizer; - -namespace Markdown.Render.Renders; - -public interface ITokenRender -{ - string Render(Token token); -} \ No newline at end of file diff --git a/cs/Markdown/Render/Renders/ItemListRender.cs b/cs/Markdown/Render/Renders/ItemListRender.cs deleted file mode 100644 index dfca000e7..000000000 --- a/cs/Markdown/Render/Renders/ItemListRender.cs +++ /dev/null @@ -1,11 +0,0 @@ -using Markdown.Tokenizer; - -namespace Markdown.Render.Renders; - -public class ItemListRender : ITokenRender -{ - public string Render(Token token) - { - throw new NotImplementedException(); - } -} \ No newline at end of file diff --git a/cs/Markdown/Render/Renders/TextRender.cs b/cs/Markdown/Render/Renders/TextRender.cs deleted file mode 100644 index 30f3ed559..000000000 --- a/cs/Markdown/Render/Renders/TextRender.cs +++ /dev/null @@ -1,11 +0,0 @@ -using Markdown.Tokenizer; - -namespace Markdown.Render.Renders; - -public class TextRender : ITokenRender -{ - public string Render(Token token) - { - throw new NotImplementedException(); - } -} \ No newline at end of file diff --git a/cs/Markdown/Tests/Markdown/MarkdownTests.cs b/cs/Markdown/Tests/Markdown/MarkdownTests.cs new file mode 100644 index 000000000..a2e1df0c5 --- /dev/null +++ b/cs/Markdown/Tests/Markdown/MarkdownTests.cs @@ -0,0 +1,93 @@ +namespace Markdown.Tests.Markdown; + +[TestFixture] +public class MarkdownTests +{ + private static readonly VerifySettings Settings = new(); + private static readonly MarkdownRenderer Renderer = new(); + + [OneTimeSetUp] + public void OneTimeSetUp() + { + Settings.UseDirectory("snapshots"); + } + + [TestCaseSource(nameof(ItalicTestCases))] + public string Test_1(string input) => Renderer.Render(input); + + private static TestCaseData[] ItalicTestCases = + [ + new TestCaseData("# Header").Returns("

Header

"), + new TestCaseData("\\# Header").Returns("# Header"), + new TestCaseData("\\\\# Header").Returns("\\

Header

"), + new TestCaseData("_Italic text_").Returns("Italic text"), + new TestCaseData("\\_Text_").Returns("_Text_"), + new TestCaseData("\\\\_Italic text_").Returns("\\Italic text"), + new TestCaseData("_Italic text").Returns("_Italic text"), + new TestCaseData("Italic text_").Returns("Italic text_"), + new TestCaseData("Italic_ text_").Returns("Italic_ text_"), + new TestCaseData("_Italic _text").Returns("_Italic _text"), + new TestCaseData("_нач_але").Returns("начале"), + new TestCaseData("сер_еди_не").Returns("середине"), + new TestCaseData("цифры_1_12_3").Returns("цифры_1_12_3"), + new TestCaseData("кон_це._").Returns("конце."), + new TestCaseData("в ра_зных сл_овах не").Returns("в ра_зных сл_овах не"), + new TestCaseData("__bold__").Returns("bold"), + new TestCaseData("_Text__").Returns("_Text__"), + new TestCaseData("__Text_").Returns("__Text_"), + new TestCaseData("__Italic __text").Returns("__Italic __text"), + new TestCaseData("__два _один_ может__").Returns("два один может"), + new TestCaseData("_одинарного __двойное__ не_").Returns( "одинарного __двойное__ не") + ]; + + private static Task Verify(string target) => + Verifier.Verify(target, Settings); + + [Test] + public void SimpleText_Render_Verify() => + Verify(Renderer.Render("Text")); + + [Test] + public void EscapedCharacter_Render_Verify() => + Verify(Renderer.Render(@"\_Text_")); + + [Test] + public void ItalicText_Render_Verify() => + Verify(Renderer.Render("_Italic text_")); + + [Test] + public void BoldText_Render_Verify() => + Verify(Renderer.Render("__Bold text__")); + + [Test] + public void BoldWithItalicText_Render_Verify() => + Verify(Renderer.Render("__Bold _with italic_ text__")); + + [Test] + public void SimpleHeader_Render_Verify() => + Verify(Renderer.Render("# Header")); + + [Test] + public void TwoHeaders_Render_Verify() => + Verify(Renderer.Render("# Header one \n# Header two")); + // + // [Test] + // public void HeaderWithItalic_Render_Verify() => + // Verify(Renderer.Render("# Header with _italic text_")); + // + // [Test] + // public void HeaderWithBoldAndItalic_Render_Verify() => + // Verify(Renderer.Render("# Header with _italic_ and __bold__ text")); + // + // [Test] + // public void HeaderWithItalicInBold_Render_Verify() => + // Verify(Renderer.Render("# Header ___italic_ in bold__ text")); + // + // [Test] + // public void SimpleList_Render_Verify() => + // Verify(Renderer.Render("- item1\n- item2")); + // + // [Test] + // public void ListWithItalicAndBold_Render_Verify() => + // Verify(Renderer.Render("- _item1_\n- __item2__")); +} \ No newline at end of file diff --git a/cs/Markdown/Tests/Markdown/snapshots/MarkdownTests.SimpleHeader_Render_Verify.received.txt b/cs/Markdown/Tests/Markdown/snapshots/MarkdownTests.SimpleHeader_Render_Verify.received.txt new file mode 100644 index 000000000..35ba349aa --- /dev/null +++ b/cs/Markdown/Tests/Markdown/snapshots/MarkdownTests.SimpleHeader_Render_Verify.received.txt @@ -0,0 +1 @@ +

Header

\ No newline at end of file diff --git a/cs/Markdown/Tests/Markdown/snapshots/MarkdownTests.SimpleHeader_Render_Verify.verified.txt b/cs/Markdown/Tests/Markdown/snapshots/MarkdownTests.SimpleHeader_Render_Verify.verified.txt new file mode 100644 index 000000000..35ba349aa --- /dev/null +++ b/cs/Markdown/Tests/Markdown/snapshots/MarkdownTests.SimpleHeader_Render_Verify.verified.txt @@ -0,0 +1 @@ +

Header

\ No newline at end of file diff --git a/cs/Markdown/Tests/Markdown/snapshots/MarkdownTests.TwoHeaders_Render_Verify.received.txt b/cs/Markdown/Tests/Markdown/snapshots/MarkdownTests.TwoHeaders_Render_Verify.received.txt new file mode 100644 index 000000000..71b2c8c9a --- /dev/null +++ b/cs/Markdown/Tests/Markdown/snapshots/MarkdownTests.TwoHeaders_Render_Verify.received.txt @@ -0,0 +1 @@ +

Header one

Header two

\ No newline at end of file diff --git a/cs/Markdown/Tests/Markdown/snapshots/MarkdownTests.TwoHeaders_Render_Verify.verified.txt b/cs/Markdown/Tests/Markdown/snapshots/MarkdownTests.TwoHeaders_Render_Verify.verified.txt new file mode 100644 index 000000000..71b2c8c9a --- /dev/null +++ b/cs/Markdown/Tests/Markdown/snapshots/MarkdownTests.TwoHeaders_Render_Verify.verified.txt @@ -0,0 +1 @@ +

Header one

Header two

\ No newline at end of file diff --git a/cs/Markdown/Tests/Tokenizer/BoldHandlerTests.cs b/cs/Markdown/Tests/Tokenizer/BoldHandlerTests.cs new file mode 100644 index 000000000..1ceff7c41 --- /dev/null +++ b/cs/Markdown/Tests/Tokenizer/BoldHandlerTests.cs @@ -0,0 +1,40 @@ +using FluentAssertions; +using Markdown.Tokenizer; +using Markdown.Tokenizer.Tags; + +namespace Markdown.Tests.Tokenizer; + +[TestFixture] +public class BoldHandlerTests +{ + [TestCaseSource(nameof(BoldTokenSource))] + public void BoldTokenizerTests((string input, Token[] tags) testCase) + { + var tokenizer = new MarkdownTokenizer(); + var res = tokenizer.Tokenize(testCase.input).ToArray(); + + for (var i = 0; i < testCase.tags.Length; i++) + { + res[i].Value.Should().Be(testCase.tags[i].Value); + res[i].TokenType.Should().Be(testCase.tags[i].TokenType); + } + } + + public static IEnumerable<(string input, Token[] result)> BoldTokenSource() + { + yield return ("__abc__", [ + new BoldTag(TagStatus.Open), + new TextToken("abc"), + new BoldTag(TagStatus.Closed)]); + + yield return ("_abc__", [ + new ItalicTag(TagStatus.Open), + new TextToken("abc"), + new BoldTag(TagStatus.Closed)]); + + yield return ("__abc_", [ + new BoldTag(TagStatus.Open), + new TextToken("abc"), + new ItalicTag(TagStatus.Closed)]); + } +} \ No newline at end of file diff --git a/cs/Markdown/Tests/Tokenizer/HeaderHandlerTests.cs b/cs/Markdown/Tests/Tokenizer/HeaderHandlerTests.cs new file mode 100644 index 000000000..2fc9cce02 --- /dev/null +++ b/cs/Markdown/Tests/Tokenizer/HeaderHandlerTests.cs @@ -0,0 +1,37 @@ +using FluentAssertions; +using Markdown.Tokenizer; +using Markdown.Tokenizer.Tags; + +namespace Markdown.Tests.Tokenizer; + +[TestFixture] +public class HeaderHandlerTests +{ + [TestCaseSource(nameof(HeaderTokenSource))] + public void HeaderTokenizerTests((string input, Token[] tags) testCase) + { + var tokenizer = new MarkdownTokenizer(); + var res = tokenizer.Tokenize(testCase.input).ToArray(); + + for (var i = 0; i < testCase.tags.Length; i++) + { + res[i].Value.Should().Be(testCase.tags[i].Value); + res[i].TokenType.Should().Be(testCase.tags[i].TokenType); + } + } + + private static IEnumerable<(string input, Token[] tags)> HeaderTokenSource() + { + yield return ("abc", [new TextToken("abc")]); + yield return ("# abc", [new HeaderTag(), new TextToken("abc")]); + yield return ("f# abc", [new TextToken("f#"),new TextToken(" abc")]); + yield return ("\\# abc", [new SlashToken(), new HeaderTag(), new TextToken("abc")]); + yield return ("\\\\# abc", [new SlashToken(), new SlashToken(), new HeaderTag(), new TextToken("abc")]); + yield return ("# abc\n# qwe", [ + new HeaderTag(), + new TextToken("abc"), + new NewLineToken(), + new HeaderTag(), + new TextToken("qwe")]); + } +} \ No newline at end of file diff --git a/cs/Markdown/Tests/Tokenizer/ItalicHandlerTests.cs b/cs/Markdown/Tests/Tokenizer/ItalicHandlerTests.cs new file mode 100644 index 000000000..a07f4f2fd --- /dev/null +++ b/cs/Markdown/Tests/Tokenizer/ItalicHandlerTests.cs @@ -0,0 +1,63 @@ +using FluentAssertions; +using Markdown.Tokenizer; +using Markdown.Tokenizer.Tags; + + +namespace Markdown.Tests.Tokenizer; + +[TestFixture] +public class ItalicParserTests +{ + [TestCaseSource(nameof(ItalicTokenSource))] + public void ItalicTokenizerTests((string input, Token[] tags) testCase) + { + var tokenizer = new MarkdownTokenizer(); + var res = tokenizer.Tokenize(testCase.input).ToArray(); + + for (var i = 0; i < testCase.tags.Length; i++) + { + res[i].Value.Should().Be(testCase.tags[i].Value); + res[i].TokenType.Should().Be(testCase.tags[i].TokenType); + } + } + + private static IEnumerable<(string input, Token[] tags)> ItalicTokenSource() + { + yield return ("abc", [new TextToken("abc")]); + yield return ("_abc", [new ItalicTag(TagStatus.Open), new TextToken("abc")]); + yield return ("abc_", [new TextToken("abc"), new ItalicTag(TagStatus.Closed)]); + yield return ("a_bc_", [ + new TextToken("a"), + new ItalicTag(TagStatus.InWord), + new TextToken("bc"), + new ItalicTag(TagStatus.Closed)]); + yield return ("_a_bc", [ + new ItalicTag(TagStatus.Open), + new TextToken("a"), + new ItalicTag(TagStatus.InWord), + new TextToken("bc")]); + + yield return ("_a_bc_", [ + new ItalicTag(TagStatus.Open), + new TextToken("a"), + new ItalicTag(TagStatus.InWord), + new TextToken("bc"), + new ItalicTag(TagStatus.Closed)]); + + yield return ("_abc_", [ + new ItalicTag(TagStatus.Open), + new TextToken("abc"), + new ItalicTag(TagStatus.Closed)]); + + yield return ("\\_abc", [ + new SlashToken(), + new ItalicTag(TagStatus.Open), + new TextToken("abc")]); + + yield return ("\\\\_abc", [ + new SlashToken(), + new SlashToken(), + new ItalicTag(TagStatus.Open), + new TextToken("abc")]); + } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Handlers/BoldHandler.cs b/cs/Markdown/Tokenizer/Handlers/BoldHandler.cs new file mode 100644 index 000000000..e046b40d8 --- /dev/null +++ b/cs/Markdown/Tokenizer/Handlers/BoldHandler.cs @@ -0,0 +1,31 @@ +using Markdown.Tokenizer.Tags; + +namespace Markdown.Tokenizer.Handlers; + +public class BoldHandler : IHandler +{ + public Token? ProceedSymbol(TokenizerContext ctx) + { + var symbol = ctx.Current; + + if (symbol != '_') + return null; + + if (ctx.Next != '_') + return null; + + if ((ctx.Position == 0 || ctx.Previous == ' ') && ctx.NextNext != ' ') + { + ctx.Advance(); + return new BoldTag(TagStatus.Open); + } + + if (ctx.Previous != ' ') + { + ctx.Advance(); + return new BoldTag(TagStatus.Closed); + } + + return null; + } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Handlers/HeaderHandler.cs b/cs/Markdown/Tokenizer/Handlers/HeaderHandler.cs new file mode 100644 index 000000000..e86a4fc24 --- /dev/null +++ b/cs/Markdown/Tokenizer/Handlers/HeaderHandler.cs @@ -0,0 +1,22 @@ +using Markdown.Tokenizer.Tags; + +namespace Markdown.Tokenizer.Handlers; + +public class HeaderHandler : IHandler +{ + public Token? ProceedSymbol(TokenizerContext ctx) + { + var symbol = ctx.Current; + + if(symbol != '#') + return null; + + if ((ctx.Next == ' ' && (ctx.Previous == '\n' || ctx.Position == 0)) || (ctx.Previous == '\\')) + { + ctx.Advance(); + return new HeaderTag(); + } + + return null; + } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Handlers/IHandler.cs b/cs/Markdown/Tokenizer/Handlers/IHandler.cs new file mode 100644 index 000000000..8ede8da56 --- /dev/null +++ b/cs/Markdown/Tokenizer/Handlers/IHandler.cs @@ -0,0 +1,8 @@ +using Markdown.Tokenizer.Tags; + +namespace Markdown.Tokenizer.Handlers; + +public interface IHandler +{ + Token? ProceedSymbol(TokenizerContext context); +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Handlers/ItalicHandler.cs b/cs/Markdown/Tokenizer/Handlers/ItalicHandler.cs new file mode 100644 index 000000000..8af2fb70f --- /dev/null +++ b/cs/Markdown/Tokenizer/Handlers/ItalicHandler.cs @@ -0,0 +1,35 @@ +using Markdown.Tokenizer.Tags; + +namespace Markdown.Tokenizer.Handlers; + +public class ItalicHandler : IHandler +{ + public Token? ProceedSymbol(TokenizerContext ctx) + { + var symbol = ctx.Current; + + if(symbol != '_') + return null; + + if(ctx.Next == '_') + return null; + + if (char.IsDigit(ctx.Previous ?? ' ') || char.IsDigit(ctx.Next ?? ' ')) + return null; + + if (ctx.Position == 0 || ctx.Previous == ' ' || ctx.Previous == '\\') + { + return new ItalicTag(TagStatus.Open); + } + + if (ctx.Previous != ' ' && (ctx.Next == ' ' || ctx.Length - 1 == ctx.Position)) + { + return new ItalicTag(TagStatus.Closed); + } + + if (ctx.Previous != ' ' && ctx.Next != ' ' ) + return new ItalicTag(TagStatus.InWord); + + return null; + } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/ITokenizer.cs b/cs/Markdown/Tokenizer/ITokenizer.cs index cf10c2aef..a6c3a9d42 100644 --- a/cs/Markdown/Tokenizer/ITokenizer.cs +++ b/cs/Markdown/Tokenizer/ITokenizer.cs @@ -1,4 +1,6 @@ -namespace Markdown.Tokenizer; +using Markdown.Tokenizer.Tags; + +namespace Markdown.Tokenizer; public interface ITokenizer { diff --git a/cs/Markdown/Tokenizer/MarkdownTokenizer.cs b/cs/Markdown/Tokenizer/MarkdownTokenizer.cs index 0af5da3fb..feba3617f 100644 --- a/cs/Markdown/Tokenizer/MarkdownTokenizer.cs +++ b/cs/Markdown/Tokenizer/MarkdownTokenizer.cs @@ -1,31 +1,205 @@ -using Markdown.Tokenizer.Parsers; +using System.Text; +using Markdown.Tokenizer.Handlers; +using Markdown.Tokenizer.Tags; +using Token = Markdown.Tokenizer.Tags.Token; namespace Markdown.Tokenizer; public class MarkdownTokenizer : ITokenizer { - private static readonly List parsers - = new List + private readonly StringBuilder buffer = new(); + private List tags = new(); + private readonly Stack tagStack = new(); + private readonly List handlers = new() { - new HeadParser(), - new BoldParser(), - new ItalicParser(), - new TextParser(), - new ListItemParser() + new HeaderHandler(), + new ItalicHandler(), + new BoldHandler(), }; public List Tokenize(string text) { var context = new TokenizerContext(text); - var tokens = new List(); + while (!context.IsEnd) + { + if (context.Current == '\n') + { + FlushBuffer(); + var token = new NewLineToken(); + tags.Add(token); + context.Advance(); + continue; + } + if (context.Current == ' ') + { + if (buffer.Length > 0) + { + tags.Add(new TextToken(buffer.ToString())); + buffer.Clear(); + } + buffer.Append(context.Current); + context.Advance(); + continue; + } + if (context.Current == '\\') + { + FlushBuffer(); + + tags.Add(new SlashToken()); + context.Advance(); + continue; + } + + bool flag = false; + foreach (var handler in handlers) + { + var tag = handler.ProceedSymbol(context); + if (tag != null) + { + if (buffer.Length > 0) + { + var token = new TextToken(buffer.ToString()); + tags.Add(token); + buffer.Clear(); + } + + tags.Add(tag); + tagStack.Push(tag); + flag = true; + break; + } + } + + if (flag == false) + { + buffer.Append(context.Current); + } + + context.Advance(); + } + + FlushBuffer(); + ProceedEscaped(); + ProceedInWords(); + ProceedTags(); + return tags; + } + + private void ProceedInWords() + { + for (var i = 0; i < tags.Count; i++) + { + var current = tags[i]; + if (current.TagStatus == TagStatus.InWord) + { + if (i - 2 >= 0) + { + if (tags[i - 1].TokenType == TokenType.String + && tags[i - 2].TagStatus == TagStatus.Open) + { + current.TagStatus = TagStatus.Closed; + } + } + + if (i + 2 < tags.Count) + { + if (tags[i + 1].TokenType == TokenType.String) + { + if (tags[i + 2].TagStatus == TagStatus.Closed) + { + current.TagStatus = TagStatus.Open; + } + else if (tags[i + 2].TagStatus == TagStatus.InWord) + { + current.TagStatus = TagStatus.Open; + tags[i + 2].TagStatus = TagStatus.Closed; + } + } + } + } + } + } + + private void ProceedEscaped() + { + for (var i = 0; i < tags.Count - 1; i++) + { + var current = tags[i]; + var next = tags[i + 1]; + if (current.TokenType is TokenType.Slash && current.TagStatus != TagStatus.Broken) + { + if (next is { TokenType: TokenType.Slash }) + { + current.TagStatus = TagStatus.Escaped; + next.TagStatus = TagStatus.Broken; + } + else if (next is { TagStatus: TagStatus.Open or TagStatus.Closed or TagStatus.Single }) + { + next.TagStatus = TagStatus.Broken; + current.TagStatus = TagStatus.Escaped; + } + } + } + + tags = tags.Where(t => t.TagStatus != TagStatus.Escaped).ToList(); + } + + private void ProceedTags() + { + var tempStack = new Stack(); + + while (tagStack.Count > 0) + { + var current = tagStack.Pop(); + + if (current.TagStatus != TagStatus.Broken && current.TagStatus != TagStatus.Single) + { + if (tempStack.Count > 0) + { + var previousTag = tempStack.Peek(); + + if (previousTag.TokenType == current.TokenType) + { + if (previousTag.TagStatus == TagStatus.Closed && current.TagStatus == TagStatus.Open) + { + tempStack.Pop(); + } + else + { + tempStack.Push(current); + } + } + else + { + if (current.TokenType == TokenType.Bold && previousTag.TokenType == TokenType.Italic) + { + current.TagStatus = TagStatus.Broken; + } + else + { + tempStack.Push(current); + } + } + } + else + { + tempStack.Push(current); + } + } + } - foreach (var parser in parsers) + while (tempStack.Count > 0) { - var token = parser.Parse(context); - if(token is not null) - tokens.Add(token); + tempStack.Pop().TagStatus = TagStatus.Broken; } + } - return tokens; + private void FlushBuffer() + { + if (buffer.Length > 0) + { + tags.Add(new TextToken(buffer.ToString())); + buffer.Clear(); + } } } \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Nodes/BoldNode.cs b/cs/Markdown/Tokenizer/Nodes/BoldNode.cs new file mode 100644 index 000000000..3e42a57e0 --- /dev/null +++ b/cs/Markdown/Tokenizer/Nodes/BoldNode.cs @@ -0,0 +1,6 @@ +namespace Markdown.Tokenizer.Nodes; + +public class BoldNode : Node +{ + +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Nodes/HeaderNode.cs b/cs/Markdown/Tokenizer/Nodes/HeaderNode.cs new file mode 100644 index 000000000..aaf3bfa87 --- /dev/null +++ b/cs/Markdown/Tokenizer/Nodes/HeaderNode.cs @@ -0,0 +1,5 @@ +namespace Markdown.Tokenizer.Nodes; + +public class HeaderNode : Node +{ +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Nodes/ItalicNode.cs b/cs/Markdown/Tokenizer/Nodes/ItalicNode.cs new file mode 100644 index 000000000..528b72f7f --- /dev/null +++ b/cs/Markdown/Tokenizer/Nodes/ItalicNode.cs @@ -0,0 +1,6 @@ +namespace Markdown.Tokenizer.Nodes; + +public class ItalicNode : Node +{ + +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Nodes/MainNode.cs b/cs/Markdown/Tokenizer/Nodes/MainNode.cs new file mode 100644 index 000000000..f1db39b3c --- /dev/null +++ b/cs/Markdown/Tokenizer/Nodes/MainNode.cs @@ -0,0 +1,6 @@ +namespace Markdown.Tokenizer.Nodes; + +public class MainNode : Node +{ + +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Nodes/Node.cs b/cs/Markdown/Tokenizer/Nodes/Node.cs new file mode 100644 index 000000000..91ddd509a --- /dev/null +++ b/cs/Markdown/Tokenizer/Nodes/Node.cs @@ -0,0 +1,8 @@ +namespace Markdown.Tokenizer.Nodes; + +public abstract class Node +{ + public string? Value { get; set; } + public List Children { get; } = new(); + public Node? Parent { get; set; } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Nodes/NodeType.cs b/cs/Markdown/Tokenizer/Nodes/NodeType.cs new file mode 100644 index 000000000..2d70e7319 --- /dev/null +++ b/cs/Markdown/Tokenizer/Nodes/NodeType.cs @@ -0,0 +1,6 @@ +namespace Markdown.Tokenizer.Nodes; + +public enum NodeType +{ + Header +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Nodes/TextNode.cs b/cs/Markdown/Tokenizer/Nodes/TextNode.cs new file mode 100644 index 000000000..0ca76bc6f --- /dev/null +++ b/cs/Markdown/Tokenizer/Nodes/TextNode.cs @@ -0,0 +1,6 @@ +namespace Markdown.Tokenizer.Nodes; + +public class TextNode : Node +{ + +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Parsers/BoldParser.cs b/cs/Markdown/Tokenizer/Parsers/BoldParser.cs deleted file mode 100644 index b95789213..000000000 --- a/cs/Markdown/Tokenizer/Parsers/BoldParser.cs +++ /dev/null @@ -1,9 +0,0 @@ -namespace Markdown.Tokenizer.Parsers; - -public class BoldParser : ITokenParser -{ - public Token? Parse(TokenizerContext text) - { - throw new NotImplementedException(); - } -} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Parsers/HeadParser.cs b/cs/Markdown/Tokenizer/Parsers/HeadParser.cs deleted file mode 100644 index 55d8e4486..000000000 --- a/cs/Markdown/Tokenizer/Parsers/HeadParser.cs +++ /dev/null @@ -1,9 +0,0 @@ -namespace Markdown.Tokenizer.Parsers; - -public class HeadParser : ITokenParser -{ - public Token? Parse(TokenizerContext context) - { - throw new NotImplementedException(); - } -} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Parsers/ITokenParser.cs b/cs/Markdown/Tokenizer/Parsers/ITokenParser.cs deleted file mode 100644 index 225ac0447..000000000 --- a/cs/Markdown/Tokenizer/Parsers/ITokenParser.cs +++ /dev/null @@ -1,6 +0,0 @@ -namespace Markdown.Tokenizer.Parsers; - -public interface ITokenParser -{ - Token? Parse(TokenizerContext text); -} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Parsers/ItalicParser.cs b/cs/Markdown/Tokenizer/Parsers/ItalicParser.cs deleted file mode 100644 index 5b6918128..000000000 --- a/cs/Markdown/Tokenizer/Parsers/ItalicParser.cs +++ /dev/null @@ -1,9 +0,0 @@ -namespace Markdown.Tokenizer.Parsers; - -public class ItalicParser : ITokenParser -{ - public Token? Parse(TokenizerContext context) - { - throw new NotImplementedException(); - } -} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Parsers/ListItemParser.cs b/cs/Markdown/Tokenizer/Parsers/ListItemParser.cs deleted file mode 100644 index 712fa1a0d..000000000 --- a/cs/Markdown/Tokenizer/Parsers/ListItemParser.cs +++ /dev/null @@ -1,9 +0,0 @@ -namespace Markdown.Tokenizer.Parsers; - -public class ListItemParser : ITokenParser -{ - public Token? Parse(TokenizerContext text) - { - throw new NotImplementedException(); - } -} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Parsers/TextParser.cs b/cs/Markdown/Tokenizer/Parsers/TextParser.cs deleted file mode 100644 index 0388a047c..000000000 --- a/cs/Markdown/Tokenizer/Parsers/TextParser.cs +++ /dev/null @@ -1,9 +0,0 @@ -namespace Markdown.Tokenizer.Parsers; - -public class TextParser : ITokenParser -{ - public Token? Parse(TokenizerContext context) - { - throw new NotImplementedException(); - } -} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Tags/BoldTag.cs b/cs/Markdown/Tokenizer/Tags/BoldTag.cs new file mode 100644 index 000000000..37c8f150b --- /dev/null +++ b/cs/Markdown/Tokenizer/Tags/BoldTag.cs @@ -0,0 +1,12 @@ +namespace Markdown.Tokenizer.Tags; + +public class BoldTag : Token +{ + public override TokenType TokenType => TokenType.Bold; + + public BoldTag(TagStatus tagStatus) + { + Value = "__"; + TagStatus = tagStatus; + } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Tags/HeaderTag.cs b/cs/Markdown/Tokenizer/Tags/HeaderTag.cs new file mode 100644 index 000000000..6be13050e --- /dev/null +++ b/cs/Markdown/Tokenizer/Tags/HeaderTag.cs @@ -0,0 +1,12 @@ +namespace Markdown.Tokenizer.Tags; + +public class HeaderTag : Token +{ + public override TokenType TokenType => TokenType.Header; + + public HeaderTag() + { + TagStatus = TagStatus.Single; + Value = "# "; + } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Tags/ItalicTag.cs b/cs/Markdown/Tokenizer/Tags/ItalicTag.cs new file mode 100644 index 000000000..70e251ca7 --- /dev/null +++ b/cs/Markdown/Tokenizer/Tags/ItalicTag.cs @@ -0,0 +1,12 @@ +namespace Markdown.Tokenizer.Tags; + +public class ItalicTag : Token +{ + public override TokenType TokenType => TokenType.Italic; + + public ItalicTag(TagStatus tagStatus) + { + Value = "_"; + TagStatus = tagStatus; + } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Tags/NewLineToken.cs b/cs/Markdown/Tokenizer/Tags/NewLineToken.cs new file mode 100644 index 000000000..273fa7684 --- /dev/null +++ b/cs/Markdown/Tokenizer/Tags/NewLineToken.cs @@ -0,0 +1,11 @@ +namespace Markdown.Tokenizer.Tags; + +public class NewLineToken : Token +{ + public override TokenType TokenType => TokenType.NewLine; + + public NewLineToken() + { + Value = "\n"; + } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Tags/SlashToken.cs b/cs/Markdown/Tokenizer/Tags/SlashToken.cs new file mode 100644 index 000000000..a2ddd20e6 --- /dev/null +++ b/cs/Markdown/Tokenizer/Tags/SlashToken.cs @@ -0,0 +1,11 @@ +namespace Markdown.Tokenizer.Tags; + +public class SlashToken : Token +{ + public override TokenType TokenType => TokenType.Slash; + + public SlashToken() + { + Value = "\\"; + } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Tags/TagStatus.cs b/cs/Markdown/Tokenizer/Tags/TagStatus.cs new file mode 100644 index 000000000..4b2830560 --- /dev/null +++ b/cs/Markdown/Tokenizer/Tags/TagStatus.cs @@ -0,0 +1,12 @@ +namespace Markdown.Tokenizer.Tags; + +public enum TagStatus +{ + Open, + Closed, + Broken, + Escaped, + InWord, + Undefined, + Single +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Tags/TextToken.cs b/cs/Markdown/Tokenizer/Tags/TextToken.cs new file mode 100644 index 000000000..4a5d697d4 --- /dev/null +++ b/cs/Markdown/Tokenizer/Tags/TextToken.cs @@ -0,0 +1,11 @@ +namespace Markdown.Tokenizer.Tags; + +public class TextToken : Token +{ + public override TokenType TokenType => TokenType.String; + + public TextToken(string value) + { + Value = value; + } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Tags/Token.cs b/cs/Markdown/Tokenizer/Tags/Token.cs new file mode 100644 index 000000000..44fbfa9d5 --- /dev/null +++ b/cs/Markdown/Tokenizer/Tags/Token.cs @@ -0,0 +1,8 @@ +namespace Markdown.Tokenizer.Tags; + +public abstract class Token +{ + public virtual TagStatus TagStatus { get; set; } + public virtual TokenType TokenType { get; } + public string Value { get; set; } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Tags/TokenType.cs b/cs/Markdown/Tokenizer/Tags/TokenType.cs new file mode 100644 index 000000000..fea3f46ab --- /dev/null +++ b/cs/Markdown/Tokenizer/Tags/TokenType.cs @@ -0,0 +1,11 @@ +namespace Markdown.Tokenizer.Tags; + +public enum TokenType +{ + String, + Header, + Italic, + Bold, + Slash, + NewLine, +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Token.cs b/cs/Markdown/Tokenizer/Token.cs deleted file mode 100644 index 99fde149c..000000000 --- a/cs/Markdown/Tokenizer/Token.cs +++ /dev/null @@ -1,16 +0,0 @@ -namespace Markdown.Tokenizer; - -public class Token -{ - public TokenType Type { get; set; } - public string Content { get; set; } = string.Empty; - - public List? NestedTokens { get; set; } - - public Token(TokenType type, string content, List? nestedTokens = null) - { - Type = type; - Content = content; - NestedTokens = nestedTokens; - } -} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/TokenType.cs b/cs/Markdown/Tokenizer/TokenType.cs deleted file mode 100644 index 8c7dc4e4a..000000000 --- a/cs/Markdown/Tokenizer/TokenType.cs +++ /dev/null @@ -1,10 +0,0 @@ -namespace Markdown.Tokenizer; - -public enum TokenType -{ - Italic, - Bold, - Header, - Text, - ItemList, -} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/TokenizerContext.cs b/cs/Markdown/Tokenizer/TokenizerContext.cs index 200c82969..7829feaf5 100644 --- a/cs/Markdown/Tokenizer/TokenizerContext.cs +++ b/cs/Markdown/Tokenizer/TokenizerContext.cs @@ -2,35 +2,19 @@ public class TokenizerContext { - private readonly string text; private int position; - + private readonly string text; public TokenizerContext(string text) { this.text = text; position = 0; } - public bool IsEnd => position >= text.Length; public char Current => text[position]; - public void MoveNext() => position++; - - public string ReadWhile(Func predicate) - { - var start = position; - while (!IsEnd && predicate(Current)) - { - MoveNext(); - } - - return text.Substring(start, position - start); - } - - public bool Match(string pattern) - { - return text.Substring(position).StartsWith(pattern); - } - public int Position => position; - public void ResetTo(int position) => this.position = position; + public int Length => text.Length; + public void Advance() => position++; + public char? Previous => position > 0 ? text[position - 1] : null; + public char? Next => position < text.Length - 1 ? text[position + 1] : null; + public char? NextNext => position < text.Length - 2 ? text[position + 2] : null; } \ No newline at end of file diff --git a/cs/NewMarkdown/NewMarkdown.csproj b/cs/NewMarkdown/NewMarkdown.csproj new file mode 100644 index 000000000..c6cecb9b6 --- /dev/null +++ b/cs/NewMarkdown/NewMarkdown.csproj @@ -0,0 +1,16 @@ + + + + net8.0 + enable + enable + + + + + + + + + + diff --git a/cs/NewMarkdown/Node.cs b/cs/NewMarkdown/Node.cs new file mode 100644 index 000000000..9d710792f --- /dev/null +++ b/cs/NewMarkdown/Node.cs @@ -0,0 +1,14 @@ +namespace NewMarkdown; + +public class Node +{ + public string? Text { get; set; } + public NodeType Type { get; set; } + public List? Children { get; set; } + + public Node(NodeType type, string? text = null) + { + Type = type; + Text = text; + } +} \ No newline at end of file diff --git a/cs/NewMarkdown/NodeType.cs b/cs/NewMarkdown/NodeType.cs new file mode 100644 index 000000000..3d0217835 --- /dev/null +++ b/cs/NewMarkdown/NodeType.cs @@ -0,0 +1,9 @@ +namespace NewMarkdown; + +public enum NodeType +{ + Header, + Italic, + Bold, + Text +} \ No newline at end of file diff --git a/cs/NewMarkdown/Tag.cs b/cs/NewMarkdown/Tag.cs new file mode 100644 index 000000000..14a22f5bf --- /dev/null +++ b/cs/NewMarkdown/Tag.cs @@ -0,0 +1,6 @@ +namespace NewMarkdown; + +public class Tag +{ + +} \ No newline at end of file diff --git a/cs/NewMarkdown/TextReader.cs b/cs/NewMarkdown/TextReader.cs new file mode 100644 index 000000000..2706733d5 --- /dev/null +++ b/cs/NewMarkdown/TextReader.cs @@ -0,0 +1,17 @@ +namespace NewMarkdown.Lexer; + + +public class TextReader +{ + private string text; + private int position; + + public TextReader(string text) + { + this.text = text; + } + + public bool IsEnd => position >= text.Length; + public void MoveNext(int step = 1) => position += step; + public char Current => text[position]; +} \ No newline at end of file diff --git a/cs/NewMarkdown/Tokenizer.cs b/cs/NewMarkdown/Tokenizer.cs new file mode 100644 index 000000000..fcc4e7ab7 --- /dev/null +++ b/cs/NewMarkdown/Tokenizer.cs @@ -0,0 +1,30 @@ +using System.Text; +using TextReader = NewMarkdown.Lexer.TextReader; + +namespace NewMarkdown; + +public class Tokenizer +{ + private readonly List result = new List(); + public Tokenizer() + { } + + public List Tokenize(string text) + { + var reader = new TextReader(text); + var buffer = new StringBuilder(); + while (!reader.IsEnd) + { + TryParseHeader(reader); + } + result.Add(new Node(NodeType.Text, buffer.ToString())); + return result; + } + + // Если попадается слэш, то проверяем экранирует ли он + + private void TryParseHeader(TextReader reader) + { + if(reader.Current == "#" && reader.) + } +} \ No newline at end of file diff --git a/cs/NewMarkdown/TokenizerTest.cs b/cs/NewMarkdown/TokenizerTest.cs new file mode 100644 index 000000000..2395f8cf9 --- /dev/null +++ b/cs/NewMarkdown/TokenizerTest.cs @@ -0,0 +1,17 @@ +namespace NewMarkdown; + +[TestFixture] +public class TokenizerTest +{ + [Test] + public void Test() + { + var tokenizer = new Tokenizer(); + var result = tokenizer.Tokenize("This is a sample text."); + + foreach (var node in result) + { + Console.WriteLine($"{node.Type}: {node.Text}"); + } + } +} \ No newline at end of file