From d5d124a4b29ae91a9b29d81b7894b6f5be75693c Mon Sep 17 00:00:00 2001 From: "kashin.aleksandr" Date: Sat, 23 Nov 2024 11:03:27 +0500 Subject: [PATCH 1/4] =?UTF-8?q?=D0=9D=D0=B0=D0=B1=D1=80=D0=BE=D1=81=D0=BE?= =?UTF-8?q?=D0=BA=20=D0=B0=D1=80=D1=85=D0=B8=D1=82=D0=B5=D0=BA=D1=82=D1=83?= =?UTF-8?q?=D1=80=D1=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cs/Markdown/IMd.cs | 6 ++++ cs/Markdown/Markdown.csproj | 9 +++++ cs/Markdown/Md.cs | 15 ++++++++ cs/Markdown/Render/HtmlRenderer.cs | 35 ++++++++++++++++++ cs/Markdown/Render/ITokenRenderer.cs | 8 +++++ cs/Markdown/Render/Renders/BoldRender.cs | 11 ++++++ cs/Markdown/Render/Renders/HeadRender.cs | 11 ++++++ cs/Markdown/Render/Renders/IItalicRender.cs | 11 ++++++ cs/Markdown/Render/Renders/ITokenRender.cs | 8 +++++ cs/Markdown/Render/Renders/ItemListRender.cs | 11 ++++++ cs/Markdown/Render/Renders/TextRender.cs | 11 ++++++ cs/Markdown/Tokenizer/ITokenizer.cs | 6 ++++ cs/Markdown/Tokenizer/MarkdownTokenizer.cs | 31 ++++++++++++++++ cs/Markdown/Tokenizer/Parsers/BoldParser.cs | 9 +++++ cs/Markdown/Tokenizer/Parsers/HeadParser.cs | 9 +++++ cs/Markdown/Tokenizer/Parsers/ITokenParser.cs | 6 ++++ cs/Markdown/Tokenizer/Parsers/ItalicParser.cs | 9 +++++ .../Tokenizer/Parsers/ListItemParser.cs | 9 +++++ cs/Markdown/Tokenizer/Parsers/TextParser.cs | 9 +++++ cs/Markdown/Tokenizer/Token.cs | 16 +++++++++ cs/Markdown/Tokenizer/TokenType.cs | 10 ++++++ cs/Markdown/Tokenizer/TokenizerContext.cs | 36 +++++++++++++++++++ cs/clean-code.sln | 6 ++++ 23 files changed, 292 insertions(+) create mode 100644 cs/Markdown/IMd.cs create mode 100644 cs/Markdown/Markdown.csproj create mode 100644 cs/Markdown/Md.cs create mode 100644 cs/Markdown/Render/HtmlRenderer.cs create mode 100644 cs/Markdown/Render/ITokenRenderer.cs create mode 100644 cs/Markdown/Render/Renders/BoldRender.cs create mode 100644 cs/Markdown/Render/Renders/HeadRender.cs create mode 100644 cs/Markdown/Render/Renders/IItalicRender.cs create mode 100644 cs/Markdown/Render/Renders/ITokenRender.cs create mode 100644 cs/Markdown/Render/Renders/ItemListRender.cs create mode 100644 cs/Markdown/Render/Renders/TextRender.cs create mode 100644 cs/Markdown/Tokenizer/ITokenizer.cs create mode 100644 cs/Markdown/Tokenizer/MarkdownTokenizer.cs create mode 100644 cs/Markdown/Tokenizer/Parsers/BoldParser.cs create mode 100644 cs/Markdown/Tokenizer/Parsers/HeadParser.cs create mode 100644 cs/Markdown/Tokenizer/Parsers/ITokenParser.cs create mode 100644 cs/Markdown/Tokenizer/Parsers/ItalicParser.cs create mode 100644 cs/Markdown/Tokenizer/Parsers/ListItemParser.cs create mode 100644 cs/Markdown/Tokenizer/Parsers/TextParser.cs create mode 100644 cs/Markdown/Tokenizer/Token.cs create mode 100644 cs/Markdown/Tokenizer/TokenType.cs create mode 100644 cs/Markdown/Tokenizer/TokenizerContext.cs diff --git a/cs/Markdown/IMd.cs b/cs/Markdown/IMd.cs new file mode 100644 index 000000000..28e9118e6 --- /dev/null +++ b/cs/Markdown/IMd.cs @@ -0,0 +1,6 @@ +namespace Markdown; + +public interface IMd +{ + string Render(string markdown); +} \ No newline at end of file diff --git a/cs/Markdown/Markdown.csproj b/cs/Markdown/Markdown.csproj new file mode 100644 index 000000000..20ebbe3c6 --- /dev/null +++ b/cs/Markdown/Markdown.csproj @@ -0,0 +1,9 @@ + + + + net8.0 + enable + enable + + + diff --git a/cs/Markdown/Md.cs b/cs/Markdown/Md.cs new file mode 100644 index 000000000..fe7fddf65 --- /dev/null +++ b/cs/Markdown/Md.cs @@ -0,0 +1,15 @@ +using Markdown.Render; +using Markdown.Tokenizer; + +namespace Markdown; + +public class Md : IMd +{ + private readonly ITokenizer tokenizer = new MarkdownTokenizer(); + private readonly ITokenRenderer renderer = new HtmlRenderer(); + + public string Render(string markdown) + { + return renderer.Render(tokenizer.Tokenize(markdown)); + } +} \ No newline at end of file diff --git a/cs/Markdown/Render/HtmlRenderer.cs b/cs/Markdown/Render/HtmlRenderer.cs new file mode 100644 index 000000000..3039ab64f --- /dev/null +++ b/cs/Markdown/Render/HtmlRenderer.cs @@ -0,0 +1,35 @@ +using System.Text; +using Markdown.Render.Renders; +using Markdown.Tokenizer; + +namespace Markdown.Render; + +public class HtmlRenderer : ITokenRenderer +{ + private readonly Dictionary _renders = new() + { + { TokenType.Italic , new ItalicRender() }, + { TokenType.Bold , new BoldRender() }, + { TokenType.Header, new HeadRender() }, + { TokenType.Text, new TextRender() }, + { TokenType.ItemList, new ItemListRender() } + }; + + public string Render(List tokens) + { + var sb = new StringBuilder(); + foreach (var token in tokens) + { + sb.Append(Render(token)); + } + + return sb.ToString(); + } + + private string Render(Token token) + { + return _renders[token.Type].Render(token); + } + + +} \ No newline at end of file diff --git a/cs/Markdown/Render/ITokenRenderer.cs b/cs/Markdown/Render/ITokenRenderer.cs new file mode 100644 index 000000000..a103dcdb8 --- /dev/null +++ b/cs/Markdown/Render/ITokenRenderer.cs @@ -0,0 +1,8 @@ +using Markdown.Tokenizer; + +namespace Markdown.Render; + +public interface ITokenRenderer +{ + string Render(List tokens); +} \ No newline at end of file diff --git a/cs/Markdown/Render/Renders/BoldRender.cs b/cs/Markdown/Render/Renders/BoldRender.cs new file mode 100644 index 000000000..982dac8de --- /dev/null +++ b/cs/Markdown/Render/Renders/BoldRender.cs @@ -0,0 +1,11 @@ +using Markdown.Tokenizer; + +namespace Markdown.Render.Renders; + +public class BoldRender : ITokenRender +{ + public string Render(Token token) + { + throw new NotImplementedException(); + } +} \ No newline at end of file diff --git a/cs/Markdown/Render/Renders/HeadRender.cs b/cs/Markdown/Render/Renders/HeadRender.cs new file mode 100644 index 000000000..db25a44cc --- /dev/null +++ b/cs/Markdown/Render/Renders/HeadRender.cs @@ -0,0 +1,11 @@ +using Markdown.Tokenizer; + +namespace Markdown.Render.Renders; + +public class HeadRender : ITokenRender +{ + public string Render(Token token) + { + throw new NotImplementedException(); + } +} \ No newline at end of file diff --git a/cs/Markdown/Render/Renders/IItalicRender.cs b/cs/Markdown/Render/Renders/IItalicRender.cs new file mode 100644 index 000000000..f8df95237 --- /dev/null +++ b/cs/Markdown/Render/Renders/IItalicRender.cs @@ -0,0 +1,11 @@ +using Markdown.Tokenizer; + +namespace Markdown.Render.Renders; + +public class ItalicRender : ITokenRender +{ + public string Render(Token token) + { + throw new NotImplementedException(); + } +} \ No newline at end of file diff --git a/cs/Markdown/Render/Renders/ITokenRender.cs b/cs/Markdown/Render/Renders/ITokenRender.cs new file mode 100644 index 000000000..e50543ae4 --- /dev/null +++ b/cs/Markdown/Render/Renders/ITokenRender.cs @@ -0,0 +1,8 @@ +using Markdown.Tokenizer; + +namespace Markdown.Render.Renders; + +public interface ITokenRender +{ + string Render(Token token); +} \ No newline at end of file diff --git a/cs/Markdown/Render/Renders/ItemListRender.cs b/cs/Markdown/Render/Renders/ItemListRender.cs new file mode 100644 index 000000000..dfca000e7 --- /dev/null +++ b/cs/Markdown/Render/Renders/ItemListRender.cs @@ -0,0 +1,11 @@ +using Markdown.Tokenizer; + +namespace Markdown.Render.Renders; + +public class ItemListRender : ITokenRender +{ + public string Render(Token token) + { + throw new NotImplementedException(); + } +} \ No newline at end of file diff --git a/cs/Markdown/Render/Renders/TextRender.cs b/cs/Markdown/Render/Renders/TextRender.cs new file mode 100644 index 000000000..30f3ed559 --- /dev/null +++ b/cs/Markdown/Render/Renders/TextRender.cs @@ -0,0 +1,11 @@ +using Markdown.Tokenizer; + +namespace Markdown.Render.Renders; + +public class TextRender : ITokenRender +{ + public string Render(Token token) + { + throw new NotImplementedException(); + } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/ITokenizer.cs b/cs/Markdown/Tokenizer/ITokenizer.cs new file mode 100644 index 000000000..cf10c2aef --- /dev/null +++ b/cs/Markdown/Tokenizer/ITokenizer.cs @@ -0,0 +1,6 @@ +namespace Markdown.Tokenizer; + +public interface ITokenizer +{ + List Tokenize(string text); +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/MarkdownTokenizer.cs b/cs/Markdown/Tokenizer/MarkdownTokenizer.cs new file mode 100644 index 000000000..0af5da3fb --- /dev/null +++ b/cs/Markdown/Tokenizer/MarkdownTokenizer.cs @@ -0,0 +1,31 @@ +using Markdown.Tokenizer.Parsers; + +namespace Markdown.Tokenizer; + +public class MarkdownTokenizer : ITokenizer +{ + private static readonly List parsers + = new List + { + new HeadParser(), + new BoldParser(), + new ItalicParser(), + new TextParser(), + new ListItemParser() + }; + + public List Tokenize(string text) + { + var context = new TokenizerContext(text); + var tokens = new List(); + + foreach (var parser in parsers) + { + var token = parser.Parse(context); + if(token is not null) + tokens.Add(token); + } + + return tokens; + } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Parsers/BoldParser.cs b/cs/Markdown/Tokenizer/Parsers/BoldParser.cs new file mode 100644 index 000000000..b95789213 --- /dev/null +++ b/cs/Markdown/Tokenizer/Parsers/BoldParser.cs @@ -0,0 +1,9 @@ +namespace Markdown.Tokenizer.Parsers; + +public class BoldParser : ITokenParser +{ + public Token? Parse(TokenizerContext text) + { + throw new NotImplementedException(); + } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Parsers/HeadParser.cs b/cs/Markdown/Tokenizer/Parsers/HeadParser.cs new file mode 100644 index 000000000..55d8e4486 --- /dev/null +++ b/cs/Markdown/Tokenizer/Parsers/HeadParser.cs @@ -0,0 +1,9 @@ +namespace Markdown.Tokenizer.Parsers; + +public class HeadParser : ITokenParser +{ + public Token? Parse(TokenizerContext context) + { + throw new NotImplementedException(); + } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Parsers/ITokenParser.cs b/cs/Markdown/Tokenizer/Parsers/ITokenParser.cs new file mode 100644 index 000000000..225ac0447 --- /dev/null +++ b/cs/Markdown/Tokenizer/Parsers/ITokenParser.cs @@ -0,0 +1,6 @@ +namespace Markdown.Tokenizer.Parsers; + +public interface ITokenParser +{ + Token? Parse(TokenizerContext text); +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Parsers/ItalicParser.cs b/cs/Markdown/Tokenizer/Parsers/ItalicParser.cs new file mode 100644 index 000000000..5b6918128 --- /dev/null +++ b/cs/Markdown/Tokenizer/Parsers/ItalicParser.cs @@ -0,0 +1,9 @@ +namespace Markdown.Tokenizer.Parsers; + +public class ItalicParser : ITokenParser +{ + public Token? Parse(TokenizerContext context) + { + throw new NotImplementedException(); + } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Parsers/ListItemParser.cs b/cs/Markdown/Tokenizer/Parsers/ListItemParser.cs new file mode 100644 index 000000000..712fa1a0d --- /dev/null +++ b/cs/Markdown/Tokenizer/Parsers/ListItemParser.cs @@ -0,0 +1,9 @@ +namespace Markdown.Tokenizer.Parsers; + +public class ListItemParser : ITokenParser +{ + public Token? Parse(TokenizerContext text) + { + throw new NotImplementedException(); + } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Parsers/TextParser.cs b/cs/Markdown/Tokenizer/Parsers/TextParser.cs new file mode 100644 index 000000000..0388a047c --- /dev/null +++ b/cs/Markdown/Tokenizer/Parsers/TextParser.cs @@ -0,0 +1,9 @@ +namespace Markdown.Tokenizer.Parsers; + +public class TextParser : ITokenParser +{ + public Token? Parse(TokenizerContext context) + { + throw new NotImplementedException(); + } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Token.cs b/cs/Markdown/Tokenizer/Token.cs new file mode 100644 index 000000000..99fde149c --- /dev/null +++ b/cs/Markdown/Tokenizer/Token.cs @@ -0,0 +1,16 @@ +namespace Markdown.Tokenizer; + +public class Token +{ + public TokenType Type { get; set; } + public string Content { get; set; } = string.Empty; + + public List? NestedTokens { get; set; } + + public Token(TokenType type, string content, List? nestedTokens = null) + { + Type = type; + Content = content; + NestedTokens = nestedTokens; + } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/TokenType.cs b/cs/Markdown/Tokenizer/TokenType.cs new file mode 100644 index 000000000..8c7dc4e4a --- /dev/null +++ b/cs/Markdown/Tokenizer/TokenType.cs @@ -0,0 +1,10 @@ +namespace Markdown.Tokenizer; + +public enum TokenType +{ + Italic, + Bold, + Header, + Text, + ItemList, +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/TokenizerContext.cs b/cs/Markdown/Tokenizer/TokenizerContext.cs new file mode 100644 index 000000000..200c82969 --- /dev/null +++ b/cs/Markdown/Tokenizer/TokenizerContext.cs @@ -0,0 +1,36 @@ +namespace Markdown.Tokenizer; + +public class TokenizerContext +{ + private readonly string text; + private int position; + + public TokenizerContext(string text) + { + this.text = text; + position = 0; + } + + public bool IsEnd => position >= text.Length; + public char Current => text[position]; + public void MoveNext() => position++; + + public string ReadWhile(Func predicate) + { + var start = position; + while (!IsEnd && predicate(Current)) + { + MoveNext(); + } + + return text.Substring(start, position - start); + } + + public bool Match(string pattern) + { + return text.Substring(position).StartsWith(pattern); + } + + public int Position => position; + public void ResetTo(int position) => this.position = position; +} \ No newline at end of file diff --git a/cs/clean-code.sln b/cs/clean-code.sln index 2206d54db..253798549 100644 --- a/cs/clean-code.sln +++ b/cs/clean-code.sln @@ -9,6 +9,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ControlDigit", "ControlDigi EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Samples", "Samples\Samples.csproj", "{C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Markdown", "Markdown\Markdown.csproj", "{BB96272E-4341-4D87-9C3F-98CCFBFF7332}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -27,5 +29,9 @@ Global {C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}.Debug|Any CPU.Build.0 = Debug|Any CPU {C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}.Release|Any CPU.ActiveCfg = Release|Any CPU {C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}.Release|Any CPU.Build.0 = Release|Any CPU + {BB96272E-4341-4D87-9C3F-98CCFBFF7332}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {BB96272E-4341-4D87-9C3F-98CCFBFF7332}.Debug|Any CPU.Build.0 = Debug|Any CPU + {BB96272E-4341-4D87-9C3F-98CCFBFF7332}.Release|Any CPU.ActiveCfg = Release|Any CPU + {BB96272E-4341-4D87-9C3F-98CCFBFF7332}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection EndGlobal From 593897bd6f9ca05c290abb1135f551f0be304ae4 Mon Sep 17 00:00:00 2001 From: "kashin.aleksandr" Date: Wed, 11 Dec 2024 16:07:07 +0500 Subject: [PATCH 2/4] =?UTF-8?q?=D0=A0=D0=B0=D0=B1=D0=BE=D1=87=D0=B5=D0=B5?= =?UTF-8?q?=20=D1=80=D0=B5=D1=88=D0=B5=D0=BD=D0=B8=D0=B5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cs/Markdown/{IMd.cs => IMarkdown.cs} | 2 +- cs/Markdown/Markdown.csproj | 11 + cs/Markdown/MarkdownRenderer.cs | 94 ++++++++ cs/Markdown/Md.cs | 15 -- cs/Markdown/Render/HtmlRenderer.cs | 33 ++- cs/Markdown/Render/ITokenRenderer.cs | 4 +- cs/Markdown/Render/Renders/BoldRender.cs | 11 - cs/Markdown/Render/Renders/HeadRender.cs | 11 - cs/Markdown/Render/Renders/IItalicRender.cs | 11 - cs/Markdown/Render/Renders/ITokenRender.cs | 8 - cs/Markdown/Render/Renders/ItemListRender.cs | 11 - cs/Markdown/Render/Renders/TextRender.cs | 11 - cs/Markdown/Tests/Markdown/MarkdownTests.cs | 93 ++++++++ ...ts.SimpleHeader_Render_Verify.received.txt | 1 + ...ts.SimpleHeader_Render_Verify.verified.txt | 1 + ...ests.TwoHeaders_Render_Verify.received.txt | 1 + ...ests.TwoHeaders_Render_Verify.verified.txt | 1 + .../Tests/Tokenizer/BoldHandlerTests.cs | 40 ++++ .../Tests/Tokenizer/HeaderHandlerTests.cs | 37 ++++ .../Tests/Tokenizer/ItalicHandlerTests.cs | 63 ++++++ cs/Markdown/Tokenizer/Handlers/BoldHandler.cs | 31 +++ .../Tokenizer/Handlers/HeaderHandler.cs | 22 ++ cs/Markdown/Tokenizer/Handlers/IHandler.cs | 8 + .../Tokenizer/Handlers/ItalicHandler.cs | 35 +++ cs/Markdown/Tokenizer/ITokenizer.cs | 4 +- cs/Markdown/Tokenizer/MarkdownTokenizer.cs | 202 ++++++++++++++++-- cs/Markdown/Tokenizer/Nodes/BoldNode.cs | 6 + cs/Markdown/Tokenizer/Nodes/HeaderNode.cs | 5 + cs/Markdown/Tokenizer/Nodes/ItalicNode.cs | 6 + cs/Markdown/Tokenizer/Nodes/MainNode.cs | 6 + cs/Markdown/Tokenizer/Nodes/Node.cs | 8 + cs/Markdown/Tokenizer/Nodes/NodeType.cs | 6 + cs/Markdown/Tokenizer/Nodes/TextNode.cs | 6 + cs/Markdown/Tokenizer/Parsers/BoldParser.cs | 9 - cs/Markdown/Tokenizer/Parsers/HeadParser.cs | 9 - cs/Markdown/Tokenizer/Parsers/ITokenParser.cs | 6 - cs/Markdown/Tokenizer/Parsers/ItalicParser.cs | 9 - .../Tokenizer/Parsers/ListItemParser.cs | 9 - cs/Markdown/Tokenizer/Parsers/TextParser.cs | 9 - cs/Markdown/Tokenizer/Tags/BoldTag.cs | 12 ++ cs/Markdown/Tokenizer/Tags/HeaderTag.cs | 12 ++ cs/Markdown/Tokenizer/Tags/ItalicTag.cs | 12 ++ cs/Markdown/Tokenizer/Tags/NewLineToken.cs | 11 + cs/Markdown/Tokenizer/Tags/SlashToken.cs | 11 + cs/Markdown/Tokenizer/Tags/TagStatus.cs | 12 ++ cs/Markdown/Tokenizer/Tags/TextToken.cs | 11 + cs/Markdown/Tokenizer/Tags/Token.cs | 8 + cs/Markdown/Tokenizer/Tags/TokenType.cs | 11 + cs/Markdown/Tokenizer/Token.cs | 16 -- cs/Markdown/Tokenizer/TokenType.cs | 10 - cs/Markdown/Tokenizer/TokenizerContext.cs | 28 +-- cs/NewMarkdown/NewMarkdown.csproj | 16 ++ cs/NewMarkdown/Node.cs | 14 ++ cs/NewMarkdown/NodeType.cs | 9 + cs/NewMarkdown/Tag.cs | 6 + cs/NewMarkdown/TextReader.cs | 17 ++ cs/NewMarkdown/Tokenizer.cs | 30 +++ cs/NewMarkdown/TokenizerTest.cs | 17 ++ 58 files changed, 903 insertions(+), 215 deletions(-) rename cs/Markdown/{IMd.cs => IMarkdown.cs} (68%) create mode 100644 cs/Markdown/MarkdownRenderer.cs delete mode 100644 cs/Markdown/Md.cs delete mode 100644 cs/Markdown/Render/Renders/BoldRender.cs delete mode 100644 cs/Markdown/Render/Renders/HeadRender.cs delete mode 100644 cs/Markdown/Render/Renders/IItalicRender.cs delete mode 100644 cs/Markdown/Render/Renders/ITokenRender.cs delete mode 100644 cs/Markdown/Render/Renders/ItemListRender.cs delete mode 100644 cs/Markdown/Render/Renders/TextRender.cs create mode 100644 cs/Markdown/Tests/Markdown/MarkdownTests.cs create mode 100644 cs/Markdown/Tests/Markdown/snapshots/MarkdownTests.SimpleHeader_Render_Verify.received.txt create mode 100644 cs/Markdown/Tests/Markdown/snapshots/MarkdownTests.SimpleHeader_Render_Verify.verified.txt create mode 100644 cs/Markdown/Tests/Markdown/snapshots/MarkdownTests.TwoHeaders_Render_Verify.received.txt create mode 100644 cs/Markdown/Tests/Markdown/snapshots/MarkdownTests.TwoHeaders_Render_Verify.verified.txt create mode 100644 cs/Markdown/Tests/Tokenizer/BoldHandlerTests.cs create mode 100644 cs/Markdown/Tests/Tokenizer/HeaderHandlerTests.cs create mode 100644 cs/Markdown/Tests/Tokenizer/ItalicHandlerTests.cs create mode 100644 cs/Markdown/Tokenizer/Handlers/BoldHandler.cs create mode 100644 cs/Markdown/Tokenizer/Handlers/HeaderHandler.cs create mode 100644 cs/Markdown/Tokenizer/Handlers/IHandler.cs create mode 100644 cs/Markdown/Tokenizer/Handlers/ItalicHandler.cs create mode 100644 cs/Markdown/Tokenizer/Nodes/BoldNode.cs create mode 100644 cs/Markdown/Tokenizer/Nodes/HeaderNode.cs create mode 100644 cs/Markdown/Tokenizer/Nodes/ItalicNode.cs create mode 100644 cs/Markdown/Tokenizer/Nodes/MainNode.cs create mode 100644 cs/Markdown/Tokenizer/Nodes/Node.cs create mode 100644 cs/Markdown/Tokenizer/Nodes/NodeType.cs create mode 100644 cs/Markdown/Tokenizer/Nodes/TextNode.cs delete mode 100644 cs/Markdown/Tokenizer/Parsers/BoldParser.cs delete mode 100644 cs/Markdown/Tokenizer/Parsers/HeadParser.cs delete mode 100644 cs/Markdown/Tokenizer/Parsers/ITokenParser.cs delete mode 100644 cs/Markdown/Tokenizer/Parsers/ItalicParser.cs delete mode 100644 cs/Markdown/Tokenizer/Parsers/ListItemParser.cs delete mode 100644 cs/Markdown/Tokenizer/Parsers/TextParser.cs create mode 100644 cs/Markdown/Tokenizer/Tags/BoldTag.cs create mode 100644 cs/Markdown/Tokenizer/Tags/HeaderTag.cs create mode 100644 cs/Markdown/Tokenizer/Tags/ItalicTag.cs create mode 100644 cs/Markdown/Tokenizer/Tags/NewLineToken.cs create mode 100644 cs/Markdown/Tokenizer/Tags/SlashToken.cs create mode 100644 cs/Markdown/Tokenizer/Tags/TagStatus.cs create mode 100644 cs/Markdown/Tokenizer/Tags/TextToken.cs create mode 100644 cs/Markdown/Tokenizer/Tags/Token.cs create mode 100644 cs/Markdown/Tokenizer/Tags/TokenType.cs delete mode 100644 cs/Markdown/Tokenizer/Token.cs delete mode 100644 cs/Markdown/Tokenizer/TokenType.cs create mode 100644 cs/NewMarkdown/NewMarkdown.csproj create mode 100644 cs/NewMarkdown/Node.cs create mode 100644 cs/NewMarkdown/NodeType.cs create mode 100644 cs/NewMarkdown/Tag.cs create mode 100644 cs/NewMarkdown/TextReader.cs create mode 100644 cs/NewMarkdown/Tokenizer.cs create mode 100644 cs/NewMarkdown/TokenizerTest.cs diff --git a/cs/Markdown/IMd.cs b/cs/Markdown/IMarkdown.cs similarity index 68% rename from cs/Markdown/IMd.cs rename to cs/Markdown/IMarkdown.cs index 28e9118e6..7ac783395 100644 --- a/cs/Markdown/IMd.cs +++ b/cs/Markdown/IMarkdown.cs @@ -1,6 +1,6 @@ namespace Markdown; -public interface IMd +public interface IMarkdown { string Render(string markdown); } \ No newline at end of file diff --git a/cs/Markdown/Markdown.csproj b/cs/Markdown/Markdown.csproj index 20ebbe3c6..fbc0a9283 100644 --- a/cs/Markdown/Markdown.csproj +++ b/cs/Markdown/Markdown.csproj @@ -6,4 +6,15 @@ enable + + + + + + + + + + + diff --git a/cs/Markdown/MarkdownRenderer.cs b/cs/Markdown/MarkdownRenderer.cs new file mode 100644 index 000000000..0006e5ff3 --- /dev/null +++ b/cs/Markdown/MarkdownRenderer.cs @@ -0,0 +1,94 @@ +using Markdown.Render; +using Markdown.Tokenizer; +using Markdown.Tokenizer.Nodes; +using Markdown.Tokenizer.Tags; + +namespace Markdown; + +public class MarkdownRenderer : IMarkdown +{ + public string Render(string markdown) + { + var tokenizer = new MarkdownTokenizer(); + var renderer = new HtmlRenderer(); + var tokens = tokenizer.Tokenize(markdown); + var tree = ToTree(tokens); + return renderer.Render(tree); + } + + private Node ToTree(List tokens) + { + Node mainNode = new MainNode(); + Node currentNode = mainNode; + for (int i = 0; i < tokens.Count; i++) + { + if (tokens[i].TagStatus == TagStatus.Broken) + { + currentNode.Children.Add(new TextNode{Value = tokens[i].Value}); + continue; + } + + if (tokens[i] is ItalicTag tag) + { + if(tag.TagStatus == TagStatus.Open) + { + var node = new ItalicNode(); + currentNode.Children.Add(node); + node.Parent = currentNode; + currentNode = node; + continue; + } + + if (tag.TagStatus == TagStatus.Closed) + { + currentNode = currentNode.Parent; + continue; + } + } + + if (tokens[i] is BoldTag boldTag) + { + if(boldTag.TagStatus == TagStatus.Open) + { + var node = new BoldNode(); + currentNode.Children.Add(node); + node.Parent = currentNode; + currentNode = node; + continue; + } + + if (boldTag.TagStatus == TagStatus.Closed) + { + currentNode = currentNode.Parent; + continue; + } + } + + if (tokens[i] is HeaderTag) + { + var node = new HeaderNode(); + currentNode.Children.Add(node); + node.Parent = currentNode; + currentNode = node; + continue; + } + + if (tokens[i] is NewLineToken) + { + if (currentNode is HeaderNode) + { + currentNode = currentNode.Parent; + } + continue; + } + + if (tokens[i] is TextToken textToken) + { + currentNode.Children.Add(new TextNode { Value = textToken.Value }); + continue; + } + } + + return currentNode.Parent ?? currentNode; + } +} \ No newline at end of file diff --git a/cs/Markdown/Md.cs b/cs/Markdown/Md.cs deleted file mode 100644 index fe7fddf65..000000000 --- a/cs/Markdown/Md.cs +++ /dev/null @@ -1,15 +0,0 @@ -using Markdown.Render; -using Markdown.Tokenizer; - -namespace Markdown; - -public class Md : IMd -{ - private readonly ITokenizer tokenizer = new MarkdownTokenizer(); - private readonly ITokenRenderer renderer = new HtmlRenderer(); - - public string Render(string markdown) - { - return renderer.Render(tokenizer.Tokenize(markdown)); - } -} \ No newline at end of file diff --git a/cs/Markdown/Render/HtmlRenderer.cs b/cs/Markdown/Render/HtmlRenderer.cs index 3039ab64f..63d9d72ac 100644 --- a/cs/Markdown/Render/HtmlRenderer.cs +++ b/cs/Markdown/Render/HtmlRenderer.cs @@ -1,35 +1,28 @@ using System.Text; -using Markdown.Render.Renders; -using Markdown.Tokenizer; +using Markdown.Tokenizer.Nodes; namespace Markdown.Render; public class HtmlRenderer : ITokenRenderer { - private readonly Dictionary _renders = new() - { - { TokenType.Italic , new ItalicRender() }, - { TokenType.Bold , new BoldRender() }, - { TokenType.Header, new HeadRender() }, - { TokenType.Text, new TextRender() }, - { TokenType.ItemList, new ItemListRender() } - }; - - public string Render(List tokens) + public string Render(Node tokens) { var sb = new StringBuilder(); - foreach (var token in tokens) - { - sb.Append(Render(token)); - } + foreach (var token in tokens.Children) + sb.Append(RenderToken(token)); return sb.ToString(); } - private string Render(Token token) + private string? RenderToken(Node node) { - return _renders[token.Type].Render(token); + return node switch + { + TextNode textNode => textNode.Value, + HeaderNode => $"

{Render(node)}

", + ItalicNode => $"{Render(node)}", + BoldNode => $"{Render(node)}", + _ => throw new Exception($"Unknown token type: {node.GetType()}") + }; } - - } \ No newline at end of file diff --git a/cs/Markdown/Render/ITokenRenderer.cs b/cs/Markdown/Render/ITokenRenderer.cs index a103dcdb8..12d3d2928 100644 --- a/cs/Markdown/Render/ITokenRenderer.cs +++ b/cs/Markdown/Render/ITokenRenderer.cs @@ -1,8 +1,8 @@ -using Markdown.Tokenizer; +using Markdown.Tokenizer.Nodes; namespace Markdown.Render; public interface ITokenRenderer { - string Render(List tokens); + string Render(Node tokens); } \ No newline at end of file diff --git a/cs/Markdown/Render/Renders/BoldRender.cs b/cs/Markdown/Render/Renders/BoldRender.cs deleted file mode 100644 index 982dac8de..000000000 --- a/cs/Markdown/Render/Renders/BoldRender.cs +++ /dev/null @@ -1,11 +0,0 @@ -using Markdown.Tokenizer; - -namespace Markdown.Render.Renders; - -public class BoldRender : ITokenRender -{ - public string Render(Token token) - { - throw new NotImplementedException(); - } -} \ No newline at end of file diff --git a/cs/Markdown/Render/Renders/HeadRender.cs b/cs/Markdown/Render/Renders/HeadRender.cs deleted file mode 100644 index db25a44cc..000000000 --- a/cs/Markdown/Render/Renders/HeadRender.cs +++ /dev/null @@ -1,11 +0,0 @@ -using Markdown.Tokenizer; - -namespace Markdown.Render.Renders; - -public class HeadRender : ITokenRender -{ - public string Render(Token token) - { - throw new NotImplementedException(); - } -} \ No newline at end of file diff --git a/cs/Markdown/Render/Renders/IItalicRender.cs b/cs/Markdown/Render/Renders/IItalicRender.cs deleted file mode 100644 index f8df95237..000000000 --- a/cs/Markdown/Render/Renders/IItalicRender.cs +++ /dev/null @@ -1,11 +0,0 @@ -using Markdown.Tokenizer; - -namespace Markdown.Render.Renders; - -public class ItalicRender : ITokenRender -{ - public string Render(Token token) - { - throw new NotImplementedException(); - } -} \ No newline at end of file diff --git a/cs/Markdown/Render/Renders/ITokenRender.cs b/cs/Markdown/Render/Renders/ITokenRender.cs deleted file mode 100644 index e50543ae4..000000000 --- a/cs/Markdown/Render/Renders/ITokenRender.cs +++ /dev/null @@ -1,8 +0,0 @@ -using Markdown.Tokenizer; - -namespace Markdown.Render.Renders; - -public interface ITokenRender -{ - string Render(Token token); -} \ No newline at end of file diff --git a/cs/Markdown/Render/Renders/ItemListRender.cs b/cs/Markdown/Render/Renders/ItemListRender.cs deleted file mode 100644 index dfca000e7..000000000 --- a/cs/Markdown/Render/Renders/ItemListRender.cs +++ /dev/null @@ -1,11 +0,0 @@ -using Markdown.Tokenizer; - -namespace Markdown.Render.Renders; - -public class ItemListRender : ITokenRender -{ - public string Render(Token token) - { - throw new NotImplementedException(); - } -} \ No newline at end of file diff --git a/cs/Markdown/Render/Renders/TextRender.cs b/cs/Markdown/Render/Renders/TextRender.cs deleted file mode 100644 index 30f3ed559..000000000 --- a/cs/Markdown/Render/Renders/TextRender.cs +++ /dev/null @@ -1,11 +0,0 @@ -using Markdown.Tokenizer; - -namespace Markdown.Render.Renders; - -public class TextRender : ITokenRender -{ - public string Render(Token token) - { - throw new NotImplementedException(); - } -} \ No newline at end of file diff --git a/cs/Markdown/Tests/Markdown/MarkdownTests.cs b/cs/Markdown/Tests/Markdown/MarkdownTests.cs new file mode 100644 index 000000000..a2e1df0c5 --- /dev/null +++ b/cs/Markdown/Tests/Markdown/MarkdownTests.cs @@ -0,0 +1,93 @@ +namespace Markdown.Tests.Markdown; + +[TestFixture] +public class MarkdownTests +{ + private static readonly VerifySettings Settings = new(); + private static readonly MarkdownRenderer Renderer = new(); + + [OneTimeSetUp] + public void OneTimeSetUp() + { + Settings.UseDirectory("snapshots"); + } + + [TestCaseSource(nameof(ItalicTestCases))] + public string Test_1(string input) => Renderer.Render(input); + + private static TestCaseData[] ItalicTestCases = + [ + new TestCaseData("# Header").Returns("

Header

"), + new TestCaseData("\\# Header").Returns("# Header"), + new TestCaseData("\\\\# Header").Returns("\\

Header

"), + new TestCaseData("_Italic text_").Returns("Italic text"), + new TestCaseData("\\_Text_").Returns("_Text_"), + new TestCaseData("\\\\_Italic text_").Returns("\\Italic text"), + new TestCaseData("_Italic text").Returns("_Italic text"), + new TestCaseData("Italic text_").Returns("Italic text_"), + new TestCaseData("Italic_ text_").Returns("Italic_ text_"), + new TestCaseData("_Italic _text").Returns("_Italic _text"), + new TestCaseData("_нач_але").Returns("начале"), + new TestCaseData("сер_еди_не").Returns("середине"), + new TestCaseData("цифры_1_12_3").Returns("цифры_1_12_3"), + new TestCaseData("кон_це._").Returns("конце."), + new TestCaseData("в ра_зных сл_овах не").Returns("в ра_зных сл_овах не"), + new TestCaseData("__bold__").Returns("bold"), + new TestCaseData("_Text__").Returns("_Text__"), + new TestCaseData("__Text_").Returns("__Text_"), + new TestCaseData("__Italic __text").Returns("__Italic __text"), + new TestCaseData("__два _один_ может__").Returns("два один может"), + new TestCaseData("_одинарного __двойное__ не_").Returns( "одинарного __двойное__ не") + ]; + + private static Task Verify(string target) => + Verifier.Verify(target, Settings); + + [Test] + public void SimpleText_Render_Verify() => + Verify(Renderer.Render("Text")); + + [Test] + public void EscapedCharacter_Render_Verify() => + Verify(Renderer.Render(@"\_Text_")); + + [Test] + public void ItalicText_Render_Verify() => + Verify(Renderer.Render("_Italic text_")); + + [Test] + public void BoldText_Render_Verify() => + Verify(Renderer.Render("__Bold text__")); + + [Test] + public void BoldWithItalicText_Render_Verify() => + Verify(Renderer.Render("__Bold _with italic_ text__")); + + [Test] + public void SimpleHeader_Render_Verify() => + Verify(Renderer.Render("# Header")); + + [Test] + public void TwoHeaders_Render_Verify() => + Verify(Renderer.Render("# Header one \n# Header two")); + // + // [Test] + // public void HeaderWithItalic_Render_Verify() => + // Verify(Renderer.Render("# Header with _italic text_")); + // + // [Test] + // public void HeaderWithBoldAndItalic_Render_Verify() => + // Verify(Renderer.Render("# Header with _italic_ and __bold__ text")); + // + // [Test] + // public void HeaderWithItalicInBold_Render_Verify() => + // Verify(Renderer.Render("# Header ___italic_ in bold__ text")); + // + // [Test] + // public void SimpleList_Render_Verify() => + // Verify(Renderer.Render("- item1\n- item2")); + // + // [Test] + // public void ListWithItalicAndBold_Render_Verify() => + // Verify(Renderer.Render("- _item1_\n- __item2__")); +} \ No newline at end of file diff --git a/cs/Markdown/Tests/Markdown/snapshots/MarkdownTests.SimpleHeader_Render_Verify.received.txt b/cs/Markdown/Tests/Markdown/snapshots/MarkdownTests.SimpleHeader_Render_Verify.received.txt new file mode 100644 index 000000000..35ba349aa --- /dev/null +++ b/cs/Markdown/Tests/Markdown/snapshots/MarkdownTests.SimpleHeader_Render_Verify.received.txt @@ -0,0 +1 @@ +

Header

\ No newline at end of file diff --git a/cs/Markdown/Tests/Markdown/snapshots/MarkdownTests.SimpleHeader_Render_Verify.verified.txt b/cs/Markdown/Tests/Markdown/snapshots/MarkdownTests.SimpleHeader_Render_Verify.verified.txt new file mode 100644 index 000000000..35ba349aa --- /dev/null +++ b/cs/Markdown/Tests/Markdown/snapshots/MarkdownTests.SimpleHeader_Render_Verify.verified.txt @@ -0,0 +1 @@ +

Header

\ No newline at end of file diff --git a/cs/Markdown/Tests/Markdown/snapshots/MarkdownTests.TwoHeaders_Render_Verify.received.txt b/cs/Markdown/Tests/Markdown/snapshots/MarkdownTests.TwoHeaders_Render_Verify.received.txt new file mode 100644 index 000000000..71b2c8c9a --- /dev/null +++ b/cs/Markdown/Tests/Markdown/snapshots/MarkdownTests.TwoHeaders_Render_Verify.received.txt @@ -0,0 +1 @@ +

Header one

Header two

\ No newline at end of file diff --git a/cs/Markdown/Tests/Markdown/snapshots/MarkdownTests.TwoHeaders_Render_Verify.verified.txt b/cs/Markdown/Tests/Markdown/snapshots/MarkdownTests.TwoHeaders_Render_Verify.verified.txt new file mode 100644 index 000000000..71b2c8c9a --- /dev/null +++ b/cs/Markdown/Tests/Markdown/snapshots/MarkdownTests.TwoHeaders_Render_Verify.verified.txt @@ -0,0 +1 @@ +

Header one

Header two

\ No newline at end of file diff --git a/cs/Markdown/Tests/Tokenizer/BoldHandlerTests.cs b/cs/Markdown/Tests/Tokenizer/BoldHandlerTests.cs new file mode 100644 index 000000000..1ceff7c41 --- /dev/null +++ b/cs/Markdown/Tests/Tokenizer/BoldHandlerTests.cs @@ -0,0 +1,40 @@ +using FluentAssertions; +using Markdown.Tokenizer; +using Markdown.Tokenizer.Tags; + +namespace Markdown.Tests.Tokenizer; + +[TestFixture] +public class BoldHandlerTests +{ + [TestCaseSource(nameof(BoldTokenSource))] + public void BoldTokenizerTests((string input, Token[] tags) testCase) + { + var tokenizer = new MarkdownTokenizer(); + var res = tokenizer.Tokenize(testCase.input).ToArray(); + + for (var i = 0; i < testCase.tags.Length; i++) + { + res[i].Value.Should().Be(testCase.tags[i].Value); + res[i].TokenType.Should().Be(testCase.tags[i].TokenType); + } + } + + public static IEnumerable<(string input, Token[] result)> BoldTokenSource() + { + yield return ("__abc__", [ + new BoldTag(TagStatus.Open), + new TextToken("abc"), + new BoldTag(TagStatus.Closed)]); + + yield return ("_abc__", [ + new ItalicTag(TagStatus.Open), + new TextToken("abc"), + new BoldTag(TagStatus.Closed)]); + + yield return ("__abc_", [ + new BoldTag(TagStatus.Open), + new TextToken("abc"), + new ItalicTag(TagStatus.Closed)]); + } +} \ No newline at end of file diff --git a/cs/Markdown/Tests/Tokenizer/HeaderHandlerTests.cs b/cs/Markdown/Tests/Tokenizer/HeaderHandlerTests.cs new file mode 100644 index 000000000..2fc9cce02 --- /dev/null +++ b/cs/Markdown/Tests/Tokenizer/HeaderHandlerTests.cs @@ -0,0 +1,37 @@ +using FluentAssertions; +using Markdown.Tokenizer; +using Markdown.Tokenizer.Tags; + +namespace Markdown.Tests.Tokenizer; + +[TestFixture] +public class HeaderHandlerTests +{ + [TestCaseSource(nameof(HeaderTokenSource))] + public void HeaderTokenizerTests((string input, Token[] tags) testCase) + { + var tokenizer = new MarkdownTokenizer(); + var res = tokenizer.Tokenize(testCase.input).ToArray(); + + for (var i = 0; i < testCase.tags.Length; i++) + { + res[i].Value.Should().Be(testCase.tags[i].Value); + res[i].TokenType.Should().Be(testCase.tags[i].TokenType); + } + } + + private static IEnumerable<(string input, Token[] tags)> HeaderTokenSource() + { + yield return ("abc", [new TextToken("abc")]); + yield return ("# abc", [new HeaderTag(), new TextToken("abc")]); + yield return ("f# abc", [new TextToken("f#"),new TextToken(" abc")]); + yield return ("\\# abc", [new SlashToken(), new HeaderTag(), new TextToken("abc")]); + yield return ("\\\\# abc", [new SlashToken(), new SlashToken(), new HeaderTag(), new TextToken("abc")]); + yield return ("# abc\n# qwe", [ + new HeaderTag(), + new TextToken("abc"), + new NewLineToken(), + new HeaderTag(), + new TextToken("qwe")]); + } +} \ No newline at end of file diff --git a/cs/Markdown/Tests/Tokenizer/ItalicHandlerTests.cs b/cs/Markdown/Tests/Tokenizer/ItalicHandlerTests.cs new file mode 100644 index 000000000..a07f4f2fd --- /dev/null +++ b/cs/Markdown/Tests/Tokenizer/ItalicHandlerTests.cs @@ -0,0 +1,63 @@ +using FluentAssertions; +using Markdown.Tokenizer; +using Markdown.Tokenizer.Tags; + + +namespace Markdown.Tests.Tokenizer; + +[TestFixture] +public class ItalicParserTests +{ + [TestCaseSource(nameof(ItalicTokenSource))] + public void ItalicTokenizerTests((string input, Token[] tags) testCase) + { + var tokenizer = new MarkdownTokenizer(); + var res = tokenizer.Tokenize(testCase.input).ToArray(); + + for (var i = 0; i < testCase.tags.Length; i++) + { + res[i].Value.Should().Be(testCase.tags[i].Value); + res[i].TokenType.Should().Be(testCase.tags[i].TokenType); + } + } + + private static IEnumerable<(string input, Token[] tags)> ItalicTokenSource() + { + yield return ("abc", [new TextToken("abc")]); + yield return ("_abc", [new ItalicTag(TagStatus.Open), new TextToken("abc")]); + yield return ("abc_", [new TextToken("abc"), new ItalicTag(TagStatus.Closed)]); + yield return ("a_bc_", [ + new TextToken("a"), + new ItalicTag(TagStatus.InWord), + new TextToken("bc"), + new ItalicTag(TagStatus.Closed)]); + yield return ("_a_bc", [ + new ItalicTag(TagStatus.Open), + new TextToken("a"), + new ItalicTag(TagStatus.InWord), + new TextToken("bc")]); + + yield return ("_a_bc_", [ + new ItalicTag(TagStatus.Open), + new TextToken("a"), + new ItalicTag(TagStatus.InWord), + new TextToken("bc"), + new ItalicTag(TagStatus.Closed)]); + + yield return ("_abc_", [ + new ItalicTag(TagStatus.Open), + new TextToken("abc"), + new ItalicTag(TagStatus.Closed)]); + + yield return ("\\_abc", [ + new SlashToken(), + new ItalicTag(TagStatus.Open), + new TextToken("abc")]); + + yield return ("\\\\_abc", [ + new SlashToken(), + new SlashToken(), + new ItalicTag(TagStatus.Open), + new TextToken("abc")]); + } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Handlers/BoldHandler.cs b/cs/Markdown/Tokenizer/Handlers/BoldHandler.cs new file mode 100644 index 000000000..e046b40d8 --- /dev/null +++ b/cs/Markdown/Tokenizer/Handlers/BoldHandler.cs @@ -0,0 +1,31 @@ +using Markdown.Tokenizer.Tags; + +namespace Markdown.Tokenizer.Handlers; + +public class BoldHandler : IHandler +{ + public Token? ProceedSymbol(TokenizerContext ctx) + { + var symbol = ctx.Current; + + if (symbol != '_') + return null; + + if (ctx.Next != '_') + return null; + + if ((ctx.Position == 0 || ctx.Previous == ' ') && ctx.NextNext != ' ') + { + ctx.Advance(); + return new BoldTag(TagStatus.Open); + } + + if (ctx.Previous != ' ') + { + ctx.Advance(); + return new BoldTag(TagStatus.Closed); + } + + return null; + } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Handlers/HeaderHandler.cs b/cs/Markdown/Tokenizer/Handlers/HeaderHandler.cs new file mode 100644 index 000000000..e86a4fc24 --- /dev/null +++ b/cs/Markdown/Tokenizer/Handlers/HeaderHandler.cs @@ -0,0 +1,22 @@ +using Markdown.Tokenizer.Tags; + +namespace Markdown.Tokenizer.Handlers; + +public class HeaderHandler : IHandler +{ + public Token? ProceedSymbol(TokenizerContext ctx) + { + var symbol = ctx.Current; + + if(symbol != '#') + return null; + + if ((ctx.Next == ' ' && (ctx.Previous == '\n' || ctx.Position == 0)) || (ctx.Previous == '\\')) + { + ctx.Advance(); + return new HeaderTag(); + } + + return null; + } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Handlers/IHandler.cs b/cs/Markdown/Tokenizer/Handlers/IHandler.cs new file mode 100644 index 000000000..8ede8da56 --- /dev/null +++ b/cs/Markdown/Tokenizer/Handlers/IHandler.cs @@ -0,0 +1,8 @@ +using Markdown.Tokenizer.Tags; + +namespace Markdown.Tokenizer.Handlers; + +public interface IHandler +{ + Token? ProceedSymbol(TokenizerContext context); +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Handlers/ItalicHandler.cs b/cs/Markdown/Tokenizer/Handlers/ItalicHandler.cs new file mode 100644 index 000000000..8af2fb70f --- /dev/null +++ b/cs/Markdown/Tokenizer/Handlers/ItalicHandler.cs @@ -0,0 +1,35 @@ +using Markdown.Tokenizer.Tags; + +namespace Markdown.Tokenizer.Handlers; + +public class ItalicHandler : IHandler +{ + public Token? ProceedSymbol(TokenizerContext ctx) + { + var symbol = ctx.Current; + + if(symbol != '_') + return null; + + if(ctx.Next == '_') + return null; + + if (char.IsDigit(ctx.Previous ?? ' ') || char.IsDigit(ctx.Next ?? ' ')) + return null; + + if (ctx.Position == 0 || ctx.Previous == ' ' || ctx.Previous == '\\') + { + return new ItalicTag(TagStatus.Open); + } + + if (ctx.Previous != ' ' && (ctx.Next == ' ' || ctx.Length - 1 == ctx.Position)) + { + return new ItalicTag(TagStatus.Closed); + } + + if (ctx.Previous != ' ' && ctx.Next != ' ' ) + return new ItalicTag(TagStatus.InWord); + + return null; + } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/ITokenizer.cs b/cs/Markdown/Tokenizer/ITokenizer.cs index cf10c2aef..a6c3a9d42 100644 --- a/cs/Markdown/Tokenizer/ITokenizer.cs +++ b/cs/Markdown/Tokenizer/ITokenizer.cs @@ -1,4 +1,6 @@ -namespace Markdown.Tokenizer; +using Markdown.Tokenizer.Tags; + +namespace Markdown.Tokenizer; public interface ITokenizer { diff --git a/cs/Markdown/Tokenizer/MarkdownTokenizer.cs b/cs/Markdown/Tokenizer/MarkdownTokenizer.cs index 0af5da3fb..feba3617f 100644 --- a/cs/Markdown/Tokenizer/MarkdownTokenizer.cs +++ b/cs/Markdown/Tokenizer/MarkdownTokenizer.cs @@ -1,31 +1,205 @@ -using Markdown.Tokenizer.Parsers; +using System.Text; +using Markdown.Tokenizer.Handlers; +using Markdown.Tokenizer.Tags; +using Token = Markdown.Tokenizer.Tags.Token; namespace Markdown.Tokenizer; public class MarkdownTokenizer : ITokenizer { - private static readonly List parsers - = new List + private readonly StringBuilder buffer = new(); + private List tags = new(); + private readonly Stack tagStack = new(); + private readonly List handlers = new() { - new HeadParser(), - new BoldParser(), - new ItalicParser(), - new TextParser(), - new ListItemParser() + new HeaderHandler(), + new ItalicHandler(), + new BoldHandler(), }; public List Tokenize(string text) { var context = new TokenizerContext(text); - var tokens = new List(); + while (!context.IsEnd) + { + if (context.Current == '\n') + { + FlushBuffer(); + var token = new NewLineToken(); + tags.Add(token); + context.Advance(); + continue; + } + if (context.Current == ' ') + { + if (buffer.Length > 0) + { + tags.Add(new TextToken(buffer.ToString())); + buffer.Clear(); + } + buffer.Append(context.Current); + context.Advance(); + continue; + } + if (context.Current == '\\') + { + FlushBuffer(); + + tags.Add(new SlashToken()); + context.Advance(); + continue; + } + + bool flag = false; + foreach (var handler in handlers) + { + var tag = handler.ProceedSymbol(context); + if (tag != null) + { + if (buffer.Length > 0) + { + var token = new TextToken(buffer.ToString()); + tags.Add(token); + buffer.Clear(); + } + + tags.Add(tag); + tagStack.Push(tag); + flag = true; + break; + } + } + + if (flag == false) + { + buffer.Append(context.Current); + } + + context.Advance(); + } + + FlushBuffer(); + ProceedEscaped(); + ProceedInWords(); + ProceedTags(); + return tags; + } + + private void ProceedInWords() + { + for (var i = 0; i < tags.Count; i++) + { + var current = tags[i]; + if (current.TagStatus == TagStatus.InWord) + { + if (i - 2 >= 0) + { + if (tags[i - 1].TokenType == TokenType.String + && tags[i - 2].TagStatus == TagStatus.Open) + { + current.TagStatus = TagStatus.Closed; + } + } + + if (i + 2 < tags.Count) + { + if (tags[i + 1].TokenType == TokenType.String) + { + if (tags[i + 2].TagStatus == TagStatus.Closed) + { + current.TagStatus = TagStatus.Open; + } + else if (tags[i + 2].TagStatus == TagStatus.InWord) + { + current.TagStatus = TagStatus.Open; + tags[i + 2].TagStatus = TagStatus.Closed; + } + } + } + } + } + } + + private void ProceedEscaped() + { + for (var i = 0; i < tags.Count - 1; i++) + { + var current = tags[i]; + var next = tags[i + 1]; + if (current.TokenType is TokenType.Slash && current.TagStatus != TagStatus.Broken) + { + if (next is { TokenType: TokenType.Slash }) + { + current.TagStatus = TagStatus.Escaped; + next.TagStatus = TagStatus.Broken; + } + else if (next is { TagStatus: TagStatus.Open or TagStatus.Closed or TagStatus.Single }) + { + next.TagStatus = TagStatus.Broken; + current.TagStatus = TagStatus.Escaped; + } + } + } + + tags = tags.Where(t => t.TagStatus != TagStatus.Escaped).ToList(); + } + + private void ProceedTags() + { + var tempStack = new Stack(); + + while (tagStack.Count > 0) + { + var current = tagStack.Pop(); + + if (current.TagStatus != TagStatus.Broken && current.TagStatus != TagStatus.Single) + { + if (tempStack.Count > 0) + { + var previousTag = tempStack.Peek(); + + if (previousTag.TokenType == current.TokenType) + { + if (previousTag.TagStatus == TagStatus.Closed && current.TagStatus == TagStatus.Open) + { + tempStack.Pop(); + } + else + { + tempStack.Push(current); + } + } + else + { + if (current.TokenType == TokenType.Bold && previousTag.TokenType == TokenType.Italic) + { + current.TagStatus = TagStatus.Broken; + } + else + { + tempStack.Push(current); + } + } + } + else + { + tempStack.Push(current); + } + } + } - foreach (var parser in parsers) + while (tempStack.Count > 0) { - var token = parser.Parse(context); - if(token is not null) - tokens.Add(token); + tempStack.Pop().TagStatus = TagStatus.Broken; } + } - return tokens; + private void FlushBuffer() + { + if (buffer.Length > 0) + { + tags.Add(new TextToken(buffer.ToString())); + buffer.Clear(); + } } } \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Nodes/BoldNode.cs b/cs/Markdown/Tokenizer/Nodes/BoldNode.cs new file mode 100644 index 000000000..3e42a57e0 --- /dev/null +++ b/cs/Markdown/Tokenizer/Nodes/BoldNode.cs @@ -0,0 +1,6 @@ +namespace Markdown.Tokenizer.Nodes; + +public class BoldNode : Node +{ + +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Nodes/HeaderNode.cs b/cs/Markdown/Tokenizer/Nodes/HeaderNode.cs new file mode 100644 index 000000000..aaf3bfa87 --- /dev/null +++ b/cs/Markdown/Tokenizer/Nodes/HeaderNode.cs @@ -0,0 +1,5 @@ +namespace Markdown.Tokenizer.Nodes; + +public class HeaderNode : Node +{ +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Nodes/ItalicNode.cs b/cs/Markdown/Tokenizer/Nodes/ItalicNode.cs new file mode 100644 index 000000000..528b72f7f --- /dev/null +++ b/cs/Markdown/Tokenizer/Nodes/ItalicNode.cs @@ -0,0 +1,6 @@ +namespace Markdown.Tokenizer.Nodes; + +public class ItalicNode : Node +{ + +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Nodes/MainNode.cs b/cs/Markdown/Tokenizer/Nodes/MainNode.cs new file mode 100644 index 000000000..f1db39b3c --- /dev/null +++ b/cs/Markdown/Tokenizer/Nodes/MainNode.cs @@ -0,0 +1,6 @@ +namespace Markdown.Tokenizer.Nodes; + +public class MainNode : Node +{ + +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Nodes/Node.cs b/cs/Markdown/Tokenizer/Nodes/Node.cs new file mode 100644 index 000000000..91ddd509a --- /dev/null +++ b/cs/Markdown/Tokenizer/Nodes/Node.cs @@ -0,0 +1,8 @@ +namespace Markdown.Tokenizer.Nodes; + +public abstract class Node +{ + public string? Value { get; set; } + public List Children { get; } = new(); + public Node? Parent { get; set; } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Nodes/NodeType.cs b/cs/Markdown/Tokenizer/Nodes/NodeType.cs new file mode 100644 index 000000000..2d70e7319 --- /dev/null +++ b/cs/Markdown/Tokenizer/Nodes/NodeType.cs @@ -0,0 +1,6 @@ +namespace Markdown.Tokenizer.Nodes; + +public enum NodeType +{ + Header +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Nodes/TextNode.cs b/cs/Markdown/Tokenizer/Nodes/TextNode.cs new file mode 100644 index 000000000..0ca76bc6f --- /dev/null +++ b/cs/Markdown/Tokenizer/Nodes/TextNode.cs @@ -0,0 +1,6 @@ +namespace Markdown.Tokenizer.Nodes; + +public class TextNode : Node +{ + +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Parsers/BoldParser.cs b/cs/Markdown/Tokenizer/Parsers/BoldParser.cs deleted file mode 100644 index b95789213..000000000 --- a/cs/Markdown/Tokenizer/Parsers/BoldParser.cs +++ /dev/null @@ -1,9 +0,0 @@ -namespace Markdown.Tokenizer.Parsers; - -public class BoldParser : ITokenParser -{ - public Token? Parse(TokenizerContext text) - { - throw new NotImplementedException(); - } -} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Parsers/HeadParser.cs b/cs/Markdown/Tokenizer/Parsers/HeadParser.cs deleted file mode 100644 index 55d8e4486..000000000 --- a/cs/Markdown/Tokenizer/Parsers/HeadParser.cs +++ /dev/null @@ -1,9 +0,0 @@ -namespace Markdown.Tokenizer.Parsers; - -public class HeadParser : ITokenParser -{ - public Token? Parse(TokenizerContext context) - { - throw new NotImplementedException(); - } -} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Parsers/ITokenParser.cs b/cs/Markdown/Tokenizer/Parsers/ITokenParser.cs deleted file mode 100644 index 225ac0447..000000000 --- a/cs/Markdown/Tokenizer/Parsers/ITokenParser.cs +++ /dev/null @@ -1,6 +0,0 @@ -namespace Markdown.Tokenizer.Parsers; - -public interface ITokenParser -{ - Token? Parse(TokenizerContext text); -} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Parsers/ItalicParser.cs b/cs/Markdown/Tokenizer/Parsers/ItalicParser.cs deleted file mode 100644 index 5b6918128..000000000 --- a/cs/Markdown/Tokenizer/Parsers/ItalicParser.cs +++ /dev/null @@ -1,9 +0,0 @@ -namespace Markdown.Tokenizer.Parsers; - -public class ItalicParser : ITokenParser -{ - public Token? Parse(TokenizerContext context) - { - throw new NotImplementedException(); - } -} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Parsers/ListItemParser.cs b/cs/Markdown/Tokenizer/Parsers/ListItemParser.cs deleted file mode 100644 index 712fa1a0d..000000000 --- a/cs/Markdown/Tokenizer/Parsers/ListItemParser.cs +++ /dev/null @@ -1,9 +0,0 @@ -namespace Markdown.Tokenizer.Parsers; - -public class ListItemParser : ITokenParser -{ - public Token? Parse(TokenizerContext text) - { - throw new NotImplementedException(); - } -} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Parsers/TextParser.cs b/cs/Markdown/Tokenizer/Parsers/TextParser.cs deleted file mode 100644 index 0388a047c..000000000 --- a/cs/Markdown/Tokenizer/Parsers/TextParser.cs +++ /dev/null @@ -1,9 +0,0 @@ -namespace Markdown.Tokenizer.Parsers; - -public class TextParser : ITokenParser -{ - public Token? Parse(TokenizerContext context) - { - throw new NotImplementedException(); - } -} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Tags/BoldTag.cs b/cs/Markdown/Tokenizer/Tags/BoldTag.cs new file mode 100644 index 000000000..37c8f150b --- /dev/null +++ b/cs/Markdown/Tokenizer/Tags/BoldTag.cs @@ -0,0 +1,12 @@ +namespace Markdown.Tokenizer.Tags; + +public class BoldTag : Token +{ + public override TokenType TokenType => TokenType.Bold; + + public BoldTag(TagStatus tagStatus) + { + Value = "__"; + TagStatus = tagStatus; + } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Tags/HeaderTag.cs b/cs/Markdown/Tokenizer/Tags/HeaderTag.cs new file mode 100644 index 000000000..6be13050e --- /dev/null +++ b/cs/Markdown/Tokenizer/Tags/HeaderTag.cs @@ -0,0 +1,12 @@ +namespace Markdown.Tokenizer.Tags; + +public class HeaderTag : Token +{ + public override TokenType TokenType => TokenType.Header; + + public HeaderTag() + { + TagStatus = TagStatus.Single; + Value = "# "; + } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Tags/ItalicTag.cs b/cs/Markdown/Tokenizer/Tags/ItalicTag.cs new file mode 100644 index 000000000..70e251ca7 --- /dev/null +++ b/cs/Markdown/Tokenizer/Tags/ItalicTag.cs @@ -0,0 +1,12 @@ +namespace Markdown.Tokenizer.Tags; + +public class ItalicTag : Token +{ + public override TokenType TokenType => TokenType.Italic; + + public ItalicTag(TagStatus tagStatus) + { + Value = "_"; + TagStatus = tagStatus; + } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Tags/NewLineToken.cs b/cs/Markdown/Tokenizer/Tags/NewLineToken.cs new file mode 100644 index 000000000..273fa7684 --- /dev/null +++ b/cs/Markdown/Tokenizer/Tags/NewLineToken.cs @@ -0,0 +1,11 @@ +namespace Markdown.Tokenizer.Tags; + +public class NewLineToken : Token +{ + public override TokenType TokenType => TokenType.NewLine; + + public NewLineToken() + { + Value = "\n"; + } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Tags/SlashToken.cs b/cs/Markdown/Tokenizer/Tags/SlashToken.cs new file mode 100644 index 000000000..a2ddd20e6 --- /dev/null +++ b/cs/Markdown/Tokenizer/Tags/SlashToken.cs @@ -0,0 +1,11 @@ +namespace Markdown.Tokenizer.Tags; + +public class SlashToken : Token +{ + public override TokenType TokenType => TokenType.Slash; + + public SlashToken() + { + Value = "\\"; + } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Tags/TagStatus.cs b/cs/Markdown/Tokenizer/Tags/TagStatus.cs new file mode 100644 index 000000000..4b2830560 --- /dev/null +++ b/cs/Markdown/Tokenizer/Tags/TagStatus.cs @@ -0,0 +1,12 @@ +namespace Markdown.Tokenizer.Tags; + +public enum TagStatus +{ + Open, + Closed, + Broken, + Escaped, + InWord, + Undefined, + Single +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Tags/TextToken.cs b/cs/Markdown/Tokenizer/Tags/TextToken.cs new file mode 100644 index 000000000..4a5d697d4 --- /dev/null +++ b/cs/Markdown/Tokenizer/Tags/TextToken.cs @@ -0,0 +1,11 @@ +namespace Markdown.Tokenizer.Tags; + +public class TextToken : Token +{ + public override TokenType TokenType => TokenType.String; + + public TextToken(string value) + { + Value = value; + } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Tags/Token.cs b/cs/Markdown/Tokenizer/Tags/Token.cs new file mode 100644 index 000000000..44fbfa9d5 --- /dev/null +++ b/cs/Markdown/Tokenizer/Tags/Token.cs @@ -0,0 +1,8 @@ +namespace Markdown.Tokenizer.Tags; + +public abstract class Token +{ + public virtual TagStatus TagStatus { get; set; } + public virtual TokenType TokenType { get; } + public string Value { get; set; } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Tags/TokenType.cs b/cs/Markdown/Tokenizer/Tags/TokenType.cs new file mode 100644 index 000000000..fea3f46ab --- /dev/null +++ b/cs/Markdown/Tokenizer/Tags/TokenType.cs @@ -0,0 +1,11 @@ +namespace Markdown.Tokenizer.Tags; + +public enum TokenType +{ + String, + Header, + Italic, + Bold, + Slash, + NewLine, +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Token.cs b/cs/Markdown/Tokenizer/Token.cs deleted file mode 100644 index 99fde149c..000000000 --- a/cs/Markdown/Tokenizer/Token.cs +++ /dev/null @@ -1,16 +0,0 @@ -namespace Markdown.Tokenizer; - -public class Token -{ - public TokenType Type { get; set; } - public string Content { get; set; } = string.Empty; - - public List? NestedTokens { get; set; } - - public Token(TokenType type, string content, List? nestedTokens = null) - { - Type = type; - Content = content; - NestedTokens = nestedTokens; - } -} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/TokenType.cs b/cs/Markdown/Tokenizer/TokenType.cs deleted file mode 100644 index 8c7dc4e4a..000000000 --- a/cs/Markdown/Tokenizer/TokenType.cs +++ /dev/null @@ -1,10 +0,0 @@ -namespace Markdown.Tokenizer; - -public enum TokenType -{ - Italic, - Bold, - Header, - Text, - ItemList, -} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/TokenizerContext.cs b/cs/Markdown/Tokenizer/TokenizerContext.cs index 200c82969..7829feaf5 100644 --- a/cs/Markdown/Tokenizer/TokenizerContext.cs +++ b/cs/Markdown/Tokenizer/TokenizerContext.cs @@ -2,35 +2,19 @@ public class TokenizerContext { - private readonly string text; private int position; - + private readonly string text; public TokenizerContext(string text) { this.text = text; position = 0; } - public bool IsEnd => position >= text.Length; public char Current => text[position]; - public void MoveNext() => position++; - - public string ReadWhile(Func predicate) - { - var start = position; - while (!IsEnd && predicate(Current)) - { - MoveNext(); - } - - return text.Substring(start, position - start); - } - - public bool Match(string pattern) - { - return text.Substring(position).StartsWith(pattern); - } - public int Position => position; - public void ResetTo(int position) => this.position = position; + public int Length => text.Length; + public void Advance() => position++; + public char? Previous => position > 0 ? text[position - 1] : null; + public char? Next => position < text.Length - 1 ? text[position + 1] : null; + public char? NextNext => position < text.Length - 2 ? text[position + 2] : null; } \ No newline at end of file diff --git a/cs/NewMarkdown/NewMarkdown.csproj b/cs/NewMarkdown/NewMarkdown.csproj new file mode 100644 index 000000000..c6cecb9b6 --- /dev/null +++ b/cs/NewMarkdown/NewMarkdown.csproj @@ -0,0 +1,16 @@ + + + + net8.0 + enable + enable + + + + + + + + + + diff --git a/cs/NewMarkdown/Node.cs b/cs/NewMarkdown/Node.cs new file mode 100644 index 000000000..9d710792f --- /dev/null +++ b/cs/NewMarkdown/Node.cs @@ -0,0 +1,14 @@ +namespace NewMarkdown; + +public class Node +{ + public string? Text { get; set; } + public NodeType Type { get; set; } + public List? Children { get; set; } + + public Node(NodeType type, string? text = null) + { + Type = type; + Text = text; + } +} \ No newline at end of file diff --git a/cs/NewMarkdown/NodeType.cs b/cs/NewMarkdown/NodeType.cs new file mode 100644 index 000000000..3d0217835 --- /dev/null +++ b/cs/NewMarkdown/NodeType.cs @@ -0,0 +1,9 @@ +namespace NewMarkdown; + +public enum NodeType +{ + Header, + Italic, + Bold, + Text +} \ No newline at end of file diff --git a/cs/NewMarkdown/Tag.cs b/cs/NewMarkdown/Tag.cs new file mode 100644 index 000000000..14a22f5bf --- /dev/null +++ b/cs/NewMarkdown/Tag.cs @@ -0,0 +1,6 @@ +namespace NewMarkdown; + +public class Tag +{ + +} \ No newline at end of file diff --git a/cs/NewMarkdown/TextReader.cs b/cs/NewMarkdown/TextReader.cs new file mode 100644 index 000000000..2706733d5 --- /dev/null +++ b/cs/NewMarkdown/TextReader.cs @@ -0,0 +1,17 @@ +namespace NewMarkdown.Lexer; + + +public class TextReader +{ + private string text; + private int position; + + public TextReader(string text) + { + this.text = text; + } + + public bool IsEnd => position >= text.Length; + public void MoveNext(int step = 1) => position += step; + public char Current => text[position]; +} \ No newline at end of file diff --git a/cs/NewMarkdown/Tokenizer.cs b/cs/NewMarkdown/Tokenizer.cs new file mode 100644 index 000000000..fcc4e7ab7 --- /dev/null +++ b/cs/NewMarkdown/Tokenizer.cs @@ -0,0 +1,30 @@ +using System.Text; +using TextReader = NewMarkdown.Lexer.TextReader; + +namespace NewMarkdown; + +public class Tokenizer +{ + private readonly List result = new List(); + public Tokenizer() + { } + + public List Tokenize(string text) + { + var reader = new TextReader(text); + var buffer = new StringBuilder(); + while (!reader.IsEnd) + { + TryParseHeader(reader); + } + result.Add(new Node(NodeType.Text, buffer.ToString())); + return result; + } + + // Если попадается слэш, то проверяем экранирует ли он + + private void TryParseHeader(TextReader reader) + { + if(reader.Current == "#" && reader.) + } +} \ No newline at end of file diff --git a/cs/NewMarkdown/TokenizerTest.cs b/cs/NewMarkdown/TokenizerTest.cs new file mode 100644 index 000000000..2395f8cf9 --- /dev/null +++ b/cs/NewMarkdown/TokenizerTest.cs @@ -0,0 +1,17 @@ +namespace NewMarkdown; + +[TestFixture] +public class TokenizerTest +{ + [Test] + public void Test() + { + var tokenizer = new Tokenizer(); + var result = tokenizer.Tokenize("This is a sample text."); + + foreach (var node in result) + { + Console.WriteLine($"{node.Type}: {node.Text}"); + } + } +} \ No newline at end of file From 6f02ada4b9de454b1386f0bc1dab3864373868e4 Mon Sep 17 00:00:00 2001 From: "kashin.aleksandr" Date: Wed, 11 Dec 2024 17:42:19 +0500 Subject: [PATCH 3/4] Refactor tokenizer --- cs/Markdown/MarkdownRenderer.cs | 9 +- .../Tests/Tokenizer/BoldHandlerTests.cs | 4 +- .../Tests/Tokenizer/HeaderHandlerTests.cs | 4 +- .../Tests/Tokenizer/ItalicHandlerTests.cs | 4 +- cs/Markdown/Tokenizer/HandlerManager.cs | 33 ++++ cs/Markdown/Tokenizer/IHandlerManager.cs | 9 + cs/Markdown/Tokenizer/ITagProcessor.cs | 8 + cs/Markdown/Tokenizer/MarkdownTokenizer.cs | 184 +++--------------- cs/Markdown/Tokenizer/TagProcessor.cs | 114 +++++++++++ 9 files changed, 209 insertions(+), 160 deletions(-) create mode 100644 cs/Markdown/Tokenizer/HandlerManager.cs create mode 100644 cs/Markdown/Tokenizer/IHandlerManager.cs create mode 100644 cs/Markdown/Tokenizer/ITagProcessor.cs create mode 100644 cs/Markdown/Tokenizer/TagProcessor.cs diff --git a/cs/Markdown/MarkdownRenderer.cs b/cs/Markdown/MarkdownRenderer.cs index 0006e5ff3..1327312d8 100644 --- a/cs/Markdown/MarkdownRenderer.cs +++ b/cs/Markdown/MarkdownRenderer.cs @@ -1,5 +1,6 @@ using Markdown.Render; using Markdown.Tokenizer; +using Markdown.Tokenizer.Handlers; using Markdown.Tokenizer.Nodes; using Markdown.Tokenizer.Tags; @@ -7,9 +8,15 @@ namespace Markdown; public class MarkdownRenderer : IMarkdown { + private readonly List handlers = new() + { + new HeaderHandler(), + new ItalicHandler(), + new BoldHandler(), + }; public string Render(string markdown) { - var tokenizer = new MarkdownTokenizer(); + var tokenizer = new MarkdownTokenizer(new HandlerManager(handlers), new TagProcessor()); var renderer = new HtmlRenderer(); var tokens = tokenizer.Tokenize(markdown); var tree = ToTree(tokens); diff --git a/cs/Markdown/Tests/Tokenizer/BoldHandlerTests.cs b/cs/Markdown/Tests/Tokenizer/BoldHandlerTests.cs index 1ceff7c41..a1519b5a0 100644 --- a/cs/Markdown/Tests/Tokenizer/BoldHandlerTests.cs +++ b/cs/Markdown/Tests/Tokenizer/BoldHandlerTests.cs @@ -1,5 +1,6 @@ using FluentAssertions; using Markdown.Tokenizer; +using Markdown.Tokenizer.Handlers; using Markdown.Tokenizer.Tags; namespace Markdown.Tests.Tokenizer; @@ -10,7 +11,8 @@ public class BoldHandlerTests [TestCaseSource(nameof(BoldTokenSource))] public void BoldTokenizerTests((string input, Token[] tags) testCase) { - var tokenizer = new MarkdownTokenizer(); + var handlers = new List() { new HeaderHandler(), new ItalicHandler(), new BoldHandler() }; + var tokenizer = new MarkdownTokenizer(new HandlerManager(handlers), new TagProcessor()); var res = tokenizer.Tokenize(testCase.input).ToArray(); for (var i = 0; i < testCase.tags.Length; i++) diff --git a/cs/Markdown/Tests/Tokenizer/HeaderHandlerTests.cs b/cs/Markdown/Tests/Tokenizer/HeaderHandlerTests.cs index 2fc9cce02..5f81ce304 100644 --- a/cs/Markdown/Tests/Tokenizer/HeaderHandlerTests.cs +++ b/cs/Markdown/Tests/Tokenizer/HeaderHandlerTests.cs @@ -1,5 +1,6 @@ using FluentAssertions; using Markdown.Tokenizer; +using Markdown.Tokenizer.Handlers; using Markdown.Tokenizer.Tags; namespace Markdown.Tests.Tokenizer; @@ -10,7 +11,8 @@ public class HeaderHandlerTests [TestCaseSource(nameof(HeaderTokenSource))] public void HeaderTokenizerTests((string input, Token[] tags) testCase) { - var tokenizer = new MarkdownTokenizer(); + var handlers = new List() { new HeaderHandler(), new ItalicHandler(), new BoldHandler() }; + var tokenizer = new MarkdownTokenizer(new HandlerManager(handlers), new TagProcessor()); var res = tokenizer.Tokenize(testCase.input).ToArray(); for (var i = 0; i < testCase.tags.Length; i++) diff --git a/cs/Markdown/Tests/Tokenizer/ItalicHandlerTests.cs b/cs/Markdown/Tests/Tokenizer/ItalicHandlerTests.cs index a07f4f2fd..c2b68e3c5 100644 --- a/cs/Markdown/Tests/Tokenizer/ItalicHandlerTests.cs +++ b/cs/Markdown/Tests/Tokenizer/ItalicHandlerTests.cs @@ -1,5 +1,6 @@ using FluentAssertions; using Markdown.Tokenizer; +using Markdown.Tokenizer.Handlers; using Markdown.Tokenizer.Tags; @@ -11,7 +12,8 @@ public class ItalicParserTests [TestCaseSource(nameof(ItalicTokenSource))] public void ItalicTokenizerTests((string input, Token[] tags) testCase) { - var tokenizer = new MarkdownTokenizer(); + var handlers = new List() { new HeaderHandler(), new ItalicHandler(), new BoldHandler() }; + var tokenizer = new MarkdownTokenizer(new HandlerManager(handlers), new TagProcessor()); var res = tokenizer.Tokenize(testCase.input).ToArray(); for (var i = 0; i < testCase.tags.Length; i++) diff --git a/cs/Markdown/Tokenizer/HandlerManager.cs b/cs/Markdown/Tokenizer/HandlerManager.cs new file mode 100644 index 000000000..acc5f8969 --- /dev/null +++ b/cs/Markdown/Tokenizer/HandlerManager.cs @@ -0,0 +1,33 @@ +using System.Text; +using Markdown.Tokenizer.Handlers; +using Markdown.Tokenizer.Tags; + +namespace Markdown.Tokenizer; + +public class HandlerManager(IEnumerable handlers) : IHandlerManager +{ + private readonly List handlers = handlers.ToList(); + + public void TryHandle(TokenizerContext context, StringBuilder buffer, List tags, Stack tagStack) + { + foreach (var handler in handlers) + { + var tag = handler.ProceedSymbol(context); + if (tag != null) + { + if (buffer.Length > 0) + { + var token = new TextToken(buffer.ToString()); + tags.Add(token); + buffer.Clear(); + } + + tags.Add(tag); + tagStack.Push(tag); + return; + } + } + + buffer.Append(context.Current); + } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/IHandlerManager.cs b/cs/Markdown/Tokenizer/IHandlerManager.cs new file mode 100644 index 000000000..accf44f2c --- /dev/null +++ b/cs/Markdown/Tokenizer/IHandlerManager.cs @@ -0,0 +1,9 @@ +using System.Text; +using Markdown.Tokenizer.Tags; + +namespace Markdown.Tokenizer; + +public interface IHandlerManager +{ + void TryHandle(TokenizerContext context, StringBuilder buffer, List tags, Stack tagStack); +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/ITagProcessor.cs b/cs/Markdown/Tokenizer/ITagProcessor.cs new file mode 100644 index 000000000..7dd70d539 --- /dev/null +++ b/cs/Markdown/Tokenizer/ITagProcessor.cs @@ -0,0 +1,8 @@ +using Markdown.Tokenizer.Tags; + +namespace Markdown.Tokenizer; + +public interface ITagProcessor +{ + void Process(List tags, Stack tagStack); +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/MarkdownTokenizer.cs b/cs/Markdown/Tokenizer/MarkdownTokenizer.cs index feba3617f..1d30c8779 100644 --- a/cs/Markdown/Tokenizer/MarkdownTokenizer.cs +++ b/cs/Markdown/Tokenizer/MarkdownTokenizer.cs @@ -5,192 +5,64 @@ namespace Markdown.Tokenizer; -public class MarkdownTokenizer : ITokenizer +public class MarkdownTokenizer(IHandlerManager handlerManager, ITagProcessor tagProcessor) : ITokenizer { private readonly StringBuilder buffer = new(); private List tags = new(); private readonly Stack tagStack = new(); - private readonly List handlers = new() - { - new HeaderHandler(), - new ItalicHandler(), - new BoldHandler(), - }; public List Tokenize(string text) { var context = new TokenizerContext(text); while (!context.IsEnd) { - if (context.Current == '\n') - { - FlushBuffer(); - var token = new NewLineToken(); - tags.Add(token); - context.Advance(); - continue; - } - if (context.Current == ' ') - { - if (buffer.Length > 0) - { - tags.Add(new TextToken(buffer.ToString())); - buffer.Clear(); - } - buffer.Append(context.Current); - context.Advance(); - continue; - } - if (context.Current == '\\') - { - FlushBuffer(); + if(TryProceedSpecialSymbol(context)) continue; - tags.Add(new SlashToken()); - context.Advance(); - continue; - } - - bool flag = false; - foreach (var handler in handlers) - { - var tag = handler.ProceedSymbol(context); - if (tag != null) - { - if (buffer.Length > 0) - { - var token = new TextToken(buffer.ToString()); - tags.Add(token); - buffer.Clear(); - } - - tags.Add(tag); - tagStack.Push(tag); - flag = true; - break; - } - } - - if (flag == false) - { - buffer.Append(context.Current); - } + handlerManager.TryHandle(context, buffer, tags, tagStack); context.Advance(); } FlushBuffer(); - ProceedEscaped(); - ProceedInWords(); - ProceedTags(); + + tagProcessor.Process(tags, tagStack); + return tags; } - private void ProceedInWords() + private bool TryProceedSpecialSymbol(TokenizerContext context) { - for (var i = 0; i < tags.Count; i++) + switch (context.Current) { - var current = tags[i]; - if (current.TagStatus == TagStatus.InWord) + case '\n': { - if (i - 2 >= 0) - { - if (tags[i - 1].TokenType == TokenType.String - && tags[i - 2].TagStatus == TagStatus.Open) - { - current.TagStatus = TagStatus.Closed; - } - } + FlushBuffer(); + var token = new NewLineToken(); + tags.Add(token); + context.Advance(); - if (i + 2 < tags.Count) - { - if (tags[i + 1].TokenType == TokenType.String) - { - if (tags[i + 2].TagStatus == TagStatus.Closed) - { - current.TagStatus = TagStatus.Open; - } - else if (tags[i + 2].TagStatus == TagStatus.InWord) - { - current.TagStatus = TagStatus.Open; - tags[i + 2].TagStatus = TagStatus.Closed; - } - } - } + return true; } - } - } - - private void ProceedEscaped() - { - for (var i = 0; i < tags.Count - 1; i++) - { - var current = tags[i]; - var next = tags[i + 1]; - if (current.TokenType is TokenType.Slash && current.TagStatus != TagStatus.Broken) + case ' ': { - if (next is { TokenType: TokenType.Slash }) - { - current.TagStatus = TagStatus.Escaped; - next.TagStatus = TagStatus.Broken; - } - else if (next is { TagStatus: TagStatus.Open or TagStatus.Closed or TagStatus.Single }) + if (buffer.Length > 0) { - next.TagStatus = TagStatus.Broken; - current.TagStatus = TagStatus.Escaped; + tags.Add(new TextToken(buffer.ToString())); + buffer.Clear(); } - } - } - - tags = tags.Where(t => t.TagStatus != TagStatus.Escaped).ToList(); - } - - private void ProceedTags() - { - var tempStack = new Stack(); - - while (tagStack.Count > 0) - { - var current = tagStack.Pop(); - - if (current.TagStatus != TagStatus.Broken && current.TagStatus != TagStatus.Single) - { - if (tempStack.Count > 0) - { - var previousTag = tempStack.Peek(); + buffer.Append(context.Current); + context.Advance(); - if (previousTag.TokenType == current.TokenType) - { - if (previousTag.TagStatus == TagStatus.Closed && current.TagStatus == TagStatus.Open) - { - tempStack.Pop(); - } - else - { - tempStack.Push(current); - } - } - else - { - if (current.TokenType == TokenType.Bold && previousTag.TokenType == TokenType.Italic) - { - current.TagStatus = TagStatus.Broken; - } - else - { - tempStack.Push(current); - } - } - } - else - { - tempStack.Push(current); - } + return true; } - } + case '\\': + FlushBuffer(); + tags.Add(new SlashToken()); + context.Advance(); - while (tempStack.Count > 0) - { - tempStack.Pop().TagStatus = TagStatus.Broken; + return true; + default: + return false; } } diff --git a/cs/Markdown/Tokenizer/TagProcessor.cs b/cs/Markdown/Tokenizer/TagProcessor.cs new file mode 100644 index 000000000..8fe8747c4 --- /dev/null +++ b/cs/Markdown/Tokenizer/TagProcessor.cs @@ -0,0 +1,114 @@ +using Markdown.Tokenizer.Tags; + +namespace Markdown.Tokenizer; + +public class TagProcessor : ITagProcessor +{ + public void Process(List tags, Stack tagStack) + { + ProceedEscaped(tags); + ProceedInWords(tags); + ProceedTags(tagStack); + } + + private void ProceedInWords(List tags) + { + for (var i = 0; i < tags.Count; i++) + { + var current = tags[i]; + if (current.TagStatus == TagStatus.InWord) + { + if (i - 2 >= 0) + { + if (tags[i - 1].TokenType == TokenType.String + && tags[i - 2].TagStatus == TagStatus.Open) + { + current.TagStatus = TagStatus.Closed; + } + } + + if (i + 2 >= tags.Count) continue; + if (tags[i + 1].TokenType != TokenType.String) continue; + if (tags[i + 2].TagStatus == TagStatus.Closed) + { + current.TagStatus = TagStatus.Open; + } + else if (tags[i + 2].TagStatus == TagStatus.InWord) + { + current.TagStatus = TagStatus.Open; + tags[i + 2].TagStatus = TagStatus.Closed; + } + } + } + } + + private void ProceedEscaped(List tags) + { + for (var i = 0; i < tags.Count - 1; i++) + { + var current = tags[i]; + var next = tags[i + 1]; + if (current.TokenType is TokenType.Slash && current.TagStatus != TagStatus.Broken) + { + if (next is { TokenType: TokenType.Slash }) + { + current.TagStatus = TagStatus.Escaped; + next.TagStatus = TagStatus.Broken; + } + else if (next is { TagStatus: TagStatus.Open or TagStatus.Closed or TagStatus.Single }) + { + next.TagStatus = TagStatus.Broken; + current.TagStatus = TagStatus.Escaped; + } + } + } + } + + private void ProceedTags(Stack tagStack) + { + var tempStack = new Stack(); + + while (tagStack.Count > 0) + { + var current = tagStack.Pop(); + + if (current.TagStatus != TagStatus.Broken && current.TagStatus != TagStatus.Single) + { + if (tempStack.Count > 0) + { + var previousTag = tempStack.Peek(); + + if (previousTag.TokenType == current.TokenType) + { + if (previousTag.TagStatus == TagStatus.Closed && current.TagStatus == TagStatus.Open) + { + tempStack.Pop(); + } + else + { + tempStack.Push(current); + } + } + else + { + if (current.TokenType == TokenType.Bold && previousTag.TokenType == TokenType.Italic) + { + current.TagStatus = TagStatus.Broken; + } + else + { + tempStack.Push(current); + } + } + } + else + { + tempStack.Push(current); + } + } + } + + while (tempStack.Count > 0) + tempStack.Pop().TagStatus = TagStatus.Broken; + } +} \ No newline at end of file From 3c2aa5328fd380d92b4586f53f6870a97cdae95b Mon Sep 17 00:00:00 2001 From: "kashin.aleksandr" Date: Wed, 11 Dec 2024 18:48:11 +0500 Subject: [PATCH 4/4] =?UTF-8?q?=D0=95=D1=89=D0=B5=20=D1=80=D0=B5=D1=84?= =?UTF-8?q?=D0=B0=D0=BA=D1=82=D0=BE=D1=80=D0=B8=D0=BD=D0=B3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../{IMarkdown.cs => IMarkdownRenderer.cs} | 2 +- cs/Markdown/Markdown.csproj | 11 +- cs/Markdown/MarkdownRenderer.cs | 93 ++------------ cs/Markdown/Render/ITokenRenderer.cs | 8 -- cs/Markdown/Render/ITreeRenderer.cs | 8 ++ .../{HtmlRenderer.cs => TreeRenderer.cs} | 9 +- cs/Markdown/Tests/Markdown/MarkdownTests.cs | 116 ++++++------------ ...ts.SimpleHeader_Render_Verify.received.txt | 1 - ...ts.SimpleHeader_Render_Verify.verified.txt | 1 - ...ests.TwoHeaders_Render_Verify.received.txt | 1 - ...ests.TwoHeaders_Render_Verify.verified.txt | 1 - .../Tests/Tokenizer/BoldHandlerTests.cs | 28 +++-- .../Tests/Tokenizer/HeaderHandlerTests.cs | 24 ++-- .../Tests/Tokenizer/ItalicHandlerTests.cs | 37 ++++-- .../Tokenizer/Handlers/HeaderHandler.cs | 2 +- .../Tokenizer/Handlers/ItalicHandler.cs | 7 +- cs/Markdown/Tokenizer/Nodes/BoldNode.cs | 6 - cs/Markdown/Tokenizer/Nodes/HeaderNode.cs | 5 - cs/Markdown/Tokenizer/Nodes/ItalicNode.cs | 6 - cs/Markdown/Tokenizer/Nodes/MainNode.cs | 6 - cs/Markdown/Tokenizer/Nodes/Node.cs | 8 -- cs/Markdown/Tokenizer/Nodes/NodeType.cs | 6 - cs/Markdown/Tokenizer/Nodes/TextNode.cs | 6 - cs/Markdown/Tokenizer/TagProcessor.cs | 12 +- cs/Markdown/Tokenizer/Tags/TagStatus.cs | 1 - cs/Markdown/Tokenizer/Tags/Token.cs | 6 +- cs/Markdown/Tokenizer/TokenizerContext.cs | 10 +- cs/Markdown/TreeBuilder/INodeFactory.cs | 8 ++ .../MarkdownTokenizer.cs | 11 +- cs/Markdown/TreeBuilder/NodeAction.cs | 20 +++ cs/Markdown/TreeBuilder/NodeFactory.cs | 23 ++++ cs/Markdown/TreeBuilder/Nodes/BoldNode.cs | 7 ++ cs/Markdown/TreeBuilder/Nodes/HeaderNode.cs | 7 ++ cs/Markdown/TreeBuilder/Nodes/ItalicNode.cs | 7 ++ cs/Markdown/TreeBuilder/Nodes/MainNode.cs | 5 + cs/Markdown/TreeBuilder/Nodes/Node.cs | 10 ++ cs/Markdown/TreeBuilder/Nodes/TextNode.cs | 6 + cs/Markdown/TreeBuilder/TreeBuilder.cs | 42 +++++++ 38 files changed, 280 insertions(+), 287 deletions(-) rename cs/Markdown/{IMarkdown.cs => IMarkdownRenderer.cs} (63%) delete mode 100644 cs/Markdown/Render/ITokenRenderer.cs create mode 100644 cs/Markdown/Render/ITreeRenderer.cs rename cs/Markdown/Render/{HtmlRenderer.cs => TreeRenderer.cs} (54%) delete mode 100644 cs/Markdown/Tests/Markdown/snapshots/MarkdownTests.SimpleHeader_Render_Verify.received.txt delete mode 100644 cs/Markdown/Tests/Markdown/snapshots/MarkdownTests.SimpleHeader_Render_Verify.verified.txt delete mode 100644 cs/Markdown/Tests/Markdown/snapshots/MarkdownTests.TwoHeaders_Render_Verify.received.txt delete mode 100644 cs/Markdown/Tests/Markdown/snapshots/MarkdownTests.TwoHeaders_Render_Verify.verified.txt delete mode 100644 cs/Markdown/Tokenizer/Nodes/BoldNode.cs delete mode 100644 cs/Markdown/Tokenizer/Nodes/HeaderNode.cs delete mode 100644 cs/Markdown/Tokenizer/Nodes/ItalicNode.cs delete mode 100644 cs/Markdown/Tokenizer/Nodes/MainNode.cs delete mode 100644 cs/Markdown/Tokenizer/Nodes/Node.cs delete mode 100644 cs/Markdown/Tokenizer/Nodes/NodeType.cs delete mode 100644 cs/Markdown/Tokenizer/Nodes/TextNode.cs create mode 100644 cs/Markdown/TreeBuilder/INodeFactory.cs rename cs/Markdown/{Tokenizer => TreeBuilder}/MarkdownTokenizer.cs (86%) create mode 100644 cs/Markdown/TreeBuilder/NodeAction.cs create mode 100644 cs/Markdown/TreeBuilder/NodeFactory.cs create mode 100644 cs/Markdown/TreeBuilder/Nodes/BoldNode.cs create mode 100644 cs/Markdown/TreeBuilder/Nodes/HeaderNode.cs create mode 100644 cs/Markdown/TreeBuilder/Nodes/ItalicNode.cs create mode 100644 cs/Markdown/TreeBuilder/Nodes/MainNode.cs create mode 100644 cs/Markdown/TreeBuilder/Nodes/Node.cs create mode 100644 cs/Markdown/TreeBuilder/Nodes/TextNode.cs create mode 100644 cs/Markdown/TreeBuilder/TreeBuilder.cs diff --git a/cs/Markdown/IMarkdown.cs b/cs/Markdown/IMarkdownRenderer.cs similarity index 63% rename from cs/Markdown/IMarkdown.cs rename to cs/Markdown/IMarkdownRenderer.cs index 7ac783395..ead0a1aae 100644 --- a/cs/Markdown/IMarkdown.cs +++ b/cs/Markdown/IMarkdownRenderer.cs @@ -1,6 +1,6 @@ namespace Markdown; -public interface IMarkdown +public interface IMarkdownRenderer { string Render(string markdown); } \ No newline at end of file diff --git a/cs/Markdown/Markdown.csproj b/cs/Markdown/Markdown.csproj index fbc0a9283..85278278c 100644 --- a/cs/Markdown/Markdown.csproj +++ b/cs/Markdown/Markdown.csproj @@ -7,14 +7,9 @@ - - - - - - - - + + + diff --git a/cs/Markdown/MarkdownRenderer.cs b/cs/Markdown/MarkdownRenderer.cs index 1327312d8..b46a594a1 100644 --- a/cs/Markdown/MarkdownRenderer.cs +++ b/cs/Markdown/MarkdownRenderer.cs @@ -1,101 +1,32 @@ using Markdown.Render; using Markdown.Tokenizer; using Markdown.Tokenizer.Handlers; -using Markdown.Tokenizer.Nodes; -using Markdown.Tokenizer.Tags; +using Markdown.TreeBuilder; namespace Markdown; -public class MarkdownRenderer : IMarkdown +public class MarkdownRenderer : IMarkdownRenderer { + private readonly ITreeRenderer treeRenderer = new TreeRenderer(); + private readonly ITokenizer tokenizer; + private readonly List handlers = new() { new HeaderHandler(), new ItalicHandler(), new BoldHandler(), }; - public string Render(string markdown) + + public MarkdownRenderer() { - var tokenizer = new MarkdownTokenizer(new HandlerManager(handlers), new TagProcessor()); - var renderer = new HtmlRenderer(); - var tokens = tokenizer.Tokenize(markdown); - var tree = ToTree(tokens); - return renderer.Render(tree); + tokenizer = new MarkdownTokenizer(new HandlerManager(handlers), new TagProcessor()); } - private Node ToTree(List tokens) + public string Render(string markdown) { - Node mainNode = new MainNode(); - Node currentNode = mainNode; - for (int i = 0; i < tokens.Count; i++) - { - if (tokens[i].TagStatus == TagStatus.Broken) - { - currentNode.Children.Add(new TextNode{Value = tokens[i].Value}); - continue; - } - - if (tokens[i] is ItalicTag tag) - { - if(tag.TagStatus == TagStatus.Open) - { - var node = new ItalicNode(); - currentNode.Children.Add(node); - node.Parent = currentNode; - currentNode = node; - continue; - } - - if (tag.TagStatus == TagStatus.Closed) - { - currentNode = currentNode.Parent; - continue; - } - } - - if (tokens[i] is BoldTag boldTag) - { - if(boldTag.TagStatus == TagStatus.Open) - { - var node = new BoldNode(); - currentNode.Children.Add(node); - node.Parent = currentNode; - currentNode = node; - continue; - } - - if (boldTag.TagStatus == TagStatus.Closed) - { - currentNode = currentNode.Parent; - continue; - } - } - - if (tokens[i] is HeaderTag) - { - var node = new HeaderNode(); - currentNode.Children.Add(node); - node.Parent = currentNode; - currentNode = node; - continue; - } - - if (tokens[i] is NewLineToken) - { - if (currentNode is HeaderNode) - { - currentNode = currentNode.Parent; - } - continue; - } - - if (tokens[i] is TextToken textToken) - { - currentNode.Children.Add(new TextNode { Value = textToken.Value }); - continue; - } - } + var tokens = tokenizer.Tokenize(markdown); + var tree = new TreeBuilder.TreeBuilder(new NodeFactory()).Build(tokens); - return currentNode.Parent ?? currentNode; + return treeRenderer.Render(tree); } } \ No newline at end of file diff --git a/cs/Markdown/Render/ITokenRenderer.cs b/cs/Markdown/Render/ITokenRenderer.cs deleted file mode 100644 index 12d3d2928..000000000 --- a/cs/Markdown/Render/ITokenRenderer.cs +++ /dev/null @@ -1,8 +0,0 @@ -using Markdown.Tokenizer.Nodes; - -namespace Markdown.Render; - -public interface ITokenRenderer -{ - string Render(Node tokens); -} \ No newline at end of file diff --git a/cs/Markdown/Render/ITreeRenderer.cs b/cs/Markdown/Render/ITreeRenderer.cs new file mode 100644 index 000000000..ffcc74676 --- /dev/null +++ b/cs/Markdown/Render/ITreeRenderer.cs @@ -0,0 +1,8 @@ +using Markdown.TreeBuilder.Nodes; + +namespace Markdown.Render; + +public interface ITreeRenderer +{ + string Render(Node tokens); +} \ No newline at end of file diff --git a/cs/Markdown/Render/HtmlRenderer.cs b/cs/Markdown/Render/TreeRenderer.cs similarity index 54% rename from cs/Markdown/Render/HtmlRenderer.cs rename to cs/Markdown/Render/TreeRenderer.cs index 63d9d72ac..74eda954f 100644 --- a/cs/Markdown/Render/HtmlRenderer.cs +++ b/cs/Markdown/Render/TreeRenderer.cs @@ -1,9 +1,9 @@ using System.Text; -using Markdown.Tokenizer.Nodes; +using Markdown.TreeBuilder.Nodes; namespace Markdown.Render; -public class HtmlRenderer : ITokenRenderer +public class TreeRenderer : ITreeRenderer { public string Render(Node tokens) { @@ -19,10 +19,7 @@ public string Render(Node tokens) return node switch { TextNode textNode => textNode.Value, - HeaderNode => $"

{Render(node)}

", - ItalicNode => $"{Render(node)}", - BoldNode => $"{Render(node)}", - _ => throw new Exception($"Unknown token type: {node.GetType()}") + _ => $"{node.OpenTag}{Render(node)}{node.CloseTag}" }; } } \ No newline at end of file diff --git a/cs/Markdown/Tests/Markdown/MarkdownTests.cs b/cs/Markdown/Tests/Markdown/MarkdownTests.cs index a2e1df0c5..239def901 100644 --- a/cs/Markdown/Tests/Markdown/MarkdownTests.cs +++ b/cs/Markdown/Tests/Markdown/MarkdownTests.cs @@ -1,93 +1,47 @@ -namespace Markdown.Tests.Markdown; +using NUnit.Framework; + +namespace Markdown.Tests.Markdown; [TestFixture] public class MarkdownTests { - private static readonly VerifySettings Settings = new(); - private static readonly MarkdownRenderer Renderer = new(); + private IMarkdownRenderer renderer; - [OneTimeSetUp] - public void OneTimeSetUp() + [SetUp] + public void SetUp() { - Settings.UseDirectory("snapshots"); + renderer = new MarkdownRenderer(); } - [TestCaseSource(nameof(ItalicTestCases))] - public string Test_1(string input) => Renderer.Render(input); + [TestCaseSource(nameof(MarkdownRendererTestCases))] + public string MarkdownRenderer_Verify(string input) => renderer.Render(input); - private static TestCaseData[] ItalicTestCases = + private static TestCaseData[] MarkdownRendererTestCases = [ - new TestCaseData("# Header").Returns("

Header

"), - new TestCaseData("\\# Header").Returns("# Header"), - new TestCaseData("\\\\# Header").Returns("\\

Header

"), - new TestCaseData("_Italic text_").Returns("Italic text"), - new TestCaseData("\\_Text_").Returns("_Text_"), - new TestCaseData("\\\\_Italic text_").Returns("\\Italic text"), - new TestCaseData("_Italic text").Returns("_Italic text"), - new TestCaseData("Italic text_").Returns("Italic text_"), - new TestCaseData("Italic_ text_").Returns("Italic_ text_"), - new TestCaseData("_Italic _text").Returns("_Italic _text"), - new TestCaseData("_нач_але").Returns("начале"), - new TestCaseData("сер_еди_не").Returns("середине"), - new TestCaseData("цифры_1_12_3").Returns("цифры_1_12_3"), - new TestCaseData("кон_це._").Returns("конце."), - new TestCaseData("в ра_зных сл_овах не").Returns("в ра_зных сл_овах не"), - new TestCaseData("__bold__").Returns("bold"), - new TestCaseData("_Text__").Returns("_Text__"), - new TestCaseData("__Text_").Returns("__Text_"), - new TestCaseData("__Italic __text").Returns("__Italic __text"), - new TestCaseData("__два _один_ может__").Returns("два один может"), - new TestCaseData("_одинарного __двойное__ не_").Returns( "одинарного __двойное__ не") + new TestCaseData("# Header").Returns("

Header

").SetDescription("Простой заголовок."), + new TestCaseData("\\# Header").Returns("# Header").SetDescription("Экранированный заголовок."), + new TestCaseData("\\\\# Header").Returns("\\

Header

").SetDescription("Экранирован экранирования."), + new TestCaseData("_Italic text_").Returns("Italic text").SetDescription("Курсив"), + new TestCaseData("\\_Text_").Returns("_Text_").SetDescription("Экранирование курсива."), + new TestCaseData("\\\\_Italic text_").Returns("\\Italic text") + .SetDescription("Экранирование экранирования курсива."), + new TestCaseData("_Italic text").Returns("_Italic text").SetDescription("Одинокий открывающий тэг."), + new TestCaseData("Italic text_").Returns("Italic text_").SetDescription("Одинокий закрывающий тэг."), + new TestCaseData("Italic_ text_").Returns("Italic_ text_").SetDescription("Два закрывающих тэга."), + new TestCaseData("_Italic _text").Returns("_Italic _text").SetDescription("Два открывающих тэга."), + new TestCaseData("_нач_але").Returns("начале").SetDescription("Курсив в начале слова."), + new TestCaseData("сер_еди_не").Returns("середине").SetDescription("Курсив в середине слова."), + new TestCaseData("кон_це._").Returns("конце.").SetDescription("Курсив в конце слова."), + new TestCaseData("цифры_1_12_3").Returns("цифры_1_12_3").SetDescription("Между цифр - подчерки."), + new TestCaseData("в ра_зных сл_овах не").Returns("в ра_зных сл_овах не") + .SetDescription("В разных словах - не работает."), + new TestCaseData("__bold__").Returns("bold").SetDescription("Полужирный"), + new TestCaseData("_Text__").Returns("_Text__").SetDescription("Разные тэги 1"), + new TestCaseData("__Text_").Returns("__Text_").SetDescription("Разные тэги 2"), + new TestCaseData("__Italic __text").Returns("__Italic __text").SetDescription("Два открывающих тэга."), + new TestCaseData("__два _один_ может__").Returns("два один может") + .SetDescription("Курсив в полужирном."), + new TestCaseData("_одинарного __двойное__ не_").Returns("одинарного __двойное__ не") + .SetDescription("Полужирный в курсиве - не работает."), ]; - - private static Task Verify(string target) => - Verifier.Verify(target, Settings); - - [Test] - public void SimpleText_Render_Verify() => - Verify(Renderer.Render("Text")); - - [Test] - public void EscapedCharacter_Render_Verify() => - Verify(Renderer.Render(@"\_Text_")); - - [Test] - public void ItalicText_Render_Verify() => - Verify(Renderer.Render("_Italic text_")); - - [Test] - public void BoldText_Render_Verify() => - Verify(Renderer.Render("__Bold text__")); - - [Test] - public void BoldWithItalicText_Render_Verify() => - Verify(Renderer.Render("__Bold _with italic_ text__")); - - [Test] - public void SimpleHeader_Render_Verify() => - Verify(Renderer.Render("# Header")); - - [Test] - public void TwoHeaders_Render_Verify() => - Verify(Renderer.Render("# Header one \n# Header two")); - // - // [Test] - // public void HeaderWithItalic_Render_Verify() => - // Verify(Renderer.Render("# Header with _italic text_")); - // - // [Test] - // public void HeaderWithBoldAndItalic_Render_Verify() => - // Verify(Renderer.Render("# Header with _italic_ and __bold__ text")); - // - // [Test] - // public void HeaderWithItalicInBold_Render_Verify() => - // Verify(Renderer.Render("# Header ___italic_ in bold__ text")); - // - // [Test] - // public void SimpleList_Render_Verify() => - // Verify(Renderer.Render("- item1\n- item2")); - // - // [Test] - // public void ListWithItalicAndBold_Render_Verify() => - // Verify(Renderer.Render("- _item1_\n- __item2__")); } \ No newline at end of file diff --git a/cs/Markdown/Tests/Markdown/snapshots/MarkdownTests.SimpleHeader_Render_Verify.received.txt b/cs/Markdown/Tests/Markdown/snapshots/MarkdownTests.SimpleHeader_Render_Verify.received.txt deleted file mode 100644 index 35ba349aa..000000000 --- a/cs/Markdown/Tests/Markdown/snapshots/MarkdownTests.SimpleHeader_Render_Verify.received.txt +++ /dev/null @@ -1 +0,0 @@ -

Header

\ No newline at end of file diff --git a/cs/Markdown/Tests/Markdown/snapshots/MarkdownTests.SimpleHeader_Render_Verify.verified.txt b/cs/Markdown/Tests/Markdown/snapshots/MarkdownTests.SimpleHeader_Render_Verify.verified.txt deleted file mode 100644 index 35ba349aa..000000000 --- a/cs/Markdown/Tests/Markdown/snapshots/MarkdownTests.SimpleHeader_Render_Verify.verified.txt +++ /dev/null @@ -1 +0,0 @@ -

Header

\ No newline at end of file diff --git a/cs/Markdown/Tests/Markdown/snapshots/MarkdownTests.TwoHeaders_Render_Verify.received.txt b/cs/Markdown/Tests/Markdown/snapshots/MarkdownTests.TwoHeaders_Render_Verify.received.txt deleted file mode 100644 index 71b2c8c9a..000000000 --- a/cs/Markdown/Tests/Markdown/snapshots/MarkdownTests.TwoHeaders_Render_Verify.received.txt +++ /dev/null @@ -1 +0,0 @@ -

Header one

Header two

\ No newline at end of file diff --git a/cs/Markdown/Tests/Markdown/snapshots/MarkdownTests.TwoHeaders_Render_Verify.verified.txt b/cs/Markdown/Tests/Markdown/snapshots/MarkdownTests.TwoHeaders_Render_Verify.verified.txt deleted file mode 100644 index 71b2c8c9a..000000000 --- a/cs/Markdown/Tests/Markdown/snapshots/MarkdownTests.TwoHeaders_Render_Verify.verified.txt +++ /dev/null @@ -1 +0,0 @@ -

Header one

Header two

\ No newline at end of file diff --git a/cs/Markdown/Tests/Tokenizer/BoldHandlerTests.cs b/cs/Markdown/Tests/Tokenizer/BoldHandlerTests.cs index a1519b5a0..b87cd1505 100644 --- a/cs/Markdown/Tests/Tokenizer/BoldHandlerTests.cs +++ b/cs/Markdown/Tests/Tokenizer/BoldHandlerTests.cs @@ -2,23 +2,32 @@ using Markdown.Tokenizer; using Markdown.Tokenizer.Handlers; using Markdown.Tokenizer.Tags; +using Markdown.TreeBuilder; +using NUnit.Framework; namespace Markdown.Tests.Tokenizer; [TestFixture] public class BoldHandlerTests { + private ITokenizer tokenizer; + + [SetUp] + public void SetUp() + { + var handlers = new List { new HeaderHandler(), new ItalicHandler(), new BoldHandler() }; + tokenizer = new MarkdownTokenizer(new HandlerManager(handlers), new TagProcessor()); + } + [TestCaseSource(nameof(BoldTokenSource))] public void BoldTokenizerTests((string input, Token[] tags) testCase) { - var handlers = new List() { new HeaderHandler(), new ItalicHandler(), new BoldHandler() }; - var tokenizer = new MarkdownTokenizer(new HandlerManager(handlers), new TagProcessor()); - var res = tokenizer.Tokenize(testCase.input).ToArray(); + var tokens = tokenizer.Tokenize(testCase.input).ToArray(); for (var i = 0; i < testCase.tags.Length; i++) { - res[i].Value.Should().Be(testCase.tags[i].Value); - res[i].TokenType.Should().Be(testCase.tags[i].TokenType); + tokens[i].Value.Should().Be(testCase.tags[i].Value); + tokens[i].TokenType.Should().Be(testCase.tags[i].TokenType); } } @@ -27,16 +36,19 @@ public void BoldTokenizerTests((string input, Token[] tags) testCase) yield return ("__abc__", [ new BoldTag(TagStatus.Open), new TextToken("abc"), - new BoldTag(TagStatus.Closed)]); + new BoldTag(TagStatus.Closed) + ]); yield return ("_abc__", [ new ItalicTag(TagStatus.Open), new TextToken("abc"), - new BoldTag(TagStatus.Closed)]); + new BoldTag(TagStatus.Closed) + ]); yield return ("__abc_", [ new BoldTag(TagStatus.Open), new TextToken("abc"), - new ItalicTag(TagStatus.Closed)]); + new ItalicTag(TagStatus.Closed) + ]); } } \ No newline at end of file diff --git a/cs/Markdown/Tests/Tokenizer/HeaderHandlerTests.cs b/cs/Markdown/Tests/Tokenizer/HeaderHandlerTests.cs index 5f81ce304..72c144f08 100644 --- a/cs/Markdown/Tests/Tokenizer/HeaderHandlerTests.cs +++ b/cs/Markdown/Tests/Tokenizer/HeaderHandlerTests.cs @@ -2,23 +2,32 @@ using Markdown.Tokenizer; using Markdown.Tokenizer.Handlers; using Markdown.Tokenizer.Tags; +using Markdown.TreeBuilder; +using NUnit.Framework; namespace Markdown.Tests.Tokenizer; [TestFixture] public class HeaderHandlerTests { + private ITokenizer tokenizer; + + [SetUp] + public void SetUp() + { + var handlers = new List { new HeaderHandler(), new ItalicHandler(), new BoldHandler() }; + tokenizer = new MarkdownTokenizer(new HandlerManager(handlers), new TagProcessor()); + } + [TestCaseSource(nameof(HeaderTokenSource))] public void HeaderTokenizerTests((string input, Token[] tags) testCase) { - var handlers = new List() { new HeaderHandler(), new ItalicHandler(), new BoldHandler() }; - var tokenizer = new MarkdownTokenizer(new HandlerManager(handlers), new TagProcessor()); - var res = tokenizer.Tokenize(testCase.input).ToArray(); + var tokens = tokenizer.Tokenize(testCase.input).ToArray(); for (var i = 0; i < testCase.tags.Length; i++) { - res[i].Value.Should().Be(testCase.tags[i].Value); - res[i].TokenType.Should().Be(testCase.tags[i].TokenType); + tokens[i].Value.Should().Be(testCase.tags[i].Value); + tokens[i].TokenType.Should().Be(testCase.tags[i].TokenType); } } @@ -26,7 +35,7 @@ public void HeaderTokenizerTests((string input, Token[] tags) testCase) { yield return ("abc", [new TextToken("abc")]); yield return ("# abc", [new HeaderTag(), new TextToken("abc")]); - yield return ("f# abc", [new TextToken("f#"),new TextToken(" abc")]); + yield return ("f# abc", [new TextToken("f#"), new TextToken(" abc")]); yield return ("\\# abc", [new SlashToken(), new HeaderTag(), new TextToken("abc")]); yield return ("\\\\# abc", [new SlashToken(), new SlashToken(), new HeaderTag(), new TextToken("abc")]); yield return ("# abc\n# qwe", [ @@ -34,6 +43,7 @@ public void HeaderTokenizerTests((string input, Token[] tags) testCase) new TextToken("abc"), new NewLineToken(), new HeaderTag(), - new TextToken("qwe")]); + new TextToken("qwe") + ]); } } \ No newline at end of file diff --git a/cs/Markdown/Tests/Tokenizer/ItalicHandlerTests.cs b/cs/Markdown/Tests/Tokenizer/ItalicHandlerTests.cs index c2b68e3c5..eae0c758e 100644 --- a/cs/Markdown/Tests/Tokenizer/ItalicHandlerTests.cs +++ b/cs/Markdown/Tests/Tokenizer/ItalicHandlerTests.cs @@ -2,6 +2,8 @@ using Markdown.Tokenizer; using Markdown.Tokenizer.Handlers; using Markdown.Tokenizer.Tags; +using Markdown.TreeBuilder; +using NUnit.Framework; namespace Markdown.Tests.Tokenizer; @@ -9,17 +11,24 @@ namespace Markdown.Tests.Tokenizer; [TestFixture] public class ItalicParserTests { + private ITokenizer tokenizer; + + [SetUp] + public void SetUp() + { + var handlers = new List { new HeaderHandler(), new ItalicHandler(), new BoldHandler() }; + tokenizer = new MarkdownTokenizer(new HandlerManager(handlers), new TagProcessor()); + } + [TestCaseSource(nameof(ItalicTokenSource))] public void ItalicTokenizerTests((string input, Token[] tags) testCase) { - var handlers = new List() { new HeaderHandler(), new ItalicHandler(), new BoldHandler() }; - var tokenizer = new MarkdownTokenizer(new HandlerManager(handlers), new TagProcessor()); - var res = tokenizer.Tokenize(testCase.input).ToArray(); + var tokens = tokenizer.Tokenize(testCase.input).ToArray(); for (var i = 0; i < testCase.tags.Length; i++) { - res[i].Value.Should().Be(testCase.tags[i].Value); - res[i].TokenType.Should().Be(testCase.tags[i].TokenType); + tokens[i].Value.Should().Be(testCase.tags[i].Value); + tokens[i].TokenType.Should().Be(testCase.tags[i].TokenType); } } @@ -32,34 +41,40 @@ public void ItalicTokenizerTests((string input, Token[] tags) testCase) new TextToken("a"), new ItalicTag(TagStatus.InWord), new TextToken("bc"), - new ItalicTag(TagStatus.Closed)]); + new ItalicTag(TagStatus.Closed) + ]); yield return ("_a_bc", [ new ItalicTag(TagStatus.Open), new TextToken("a"), new ItalicTag(TagStatus.InWord), - new TextToken("bc")]); + new TextToken("bc") + ]); yield return ("_a_bc_", [ new ItalicTag(TagStatus.Open), new TextToken("a"), new ItalicTag(TagStatus.InWord), new TextToken("bc"), - new ItalicTag(TagStatus.Closed)]); + new ItalicTag(TagStatus.Closed) + ]); yield return ("_abc_", [ new ItalicTag(TagStatus.Open), new TextToken("abc"), - new ItalicTag(TagStatus.Closed)]); + new ItalicTag(TagStatus.Closed) + ]); yield return ("\\_abc", [ new SlashToken(), new ItalicTag(TagStatus.Open), - new TextToken("abc")]); + new TextToken("abc") + ]); yield return ("\\\\_abc", [ new SlashToken(), new SlashToken(), new ItalicTag(TagStatus.Open), - new TextToken("abc")]); + new TextToken("abc") + ]); } } \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Handlers/HeaderHandler.cs b/cs/Markdown/Tokenizer/Handlers/HeaderHandler.cs index e86a4fc24..900f40deb 100644 --- a/cs/Markdown/Tokenizer/Handlers/HeaderHandler.cs +++ b/cs/Markdown/Tokenizer/Handlers/HeaderHandler.cs @@ -8,7 +8,7 @@ public class HeaderHandler : IHandler { var symbol = ctx.Current; - if(symbol != '#') + if (symbol != '#') return null; if ((ctx.Next == ' ' && (ctx.Previous == '\n' || ctx.Position == 0)) || (ctx.Previous == '\\')) diff --git a/cs/Markdown/Tokenizer/Handlers/ItalicHandler.cs b/cs/Markdown/Tokenizer/Handlers/ItalicHandler.cs index 8af2fb70f..d1abaabdd 100644 --- a/cs/Markdown/Tokenizer/Handlers/ItalicHandler.cs +++ b/cs/Markdown/Tokenizer/Handlers/ItalicHandler.cs @@ -8,10 +8,7 @@ public class ItalicHandler : IHandler { var symbol = ctx.Current; - if(symbol != '_') - return null; - - if(ctx.Next == '_') + if (symbol != '_' || ctx.Next == '_') return null; if (char.IsDigit(ctx.Previous ?? ' ') || char.IsDigit(ctx.Next ?? ' ')) @@ -27,7 +24,7 @@ public class ItalicHandler : IHandler return new ItalicTag(TagStatus.Closed); } - if (ctx.Previous != ' ' && ctx.Next != ' ' ) + if (ctx.Previous != ' ' && ctx.Next != ' ') return new ItalicTag(TagStatus.InWord); return null; diff --git a/cs/Markdown/Tokenizer/Nodes/BoldNode.cs b/cs/Markdown/Tokenizer/Nodes/BoldNode.cs deleted file mode 100644 index 3e42a57e0..000000000 --- a/cs/Markdown/Tokenizer/Nodes/BoldNode.cs +++ /dev/null @@ -1,6 +0,0 @@ -namespace Markdown.Tokenizer.Nodes; - -public class BoldNode : Node -{ - -} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Nodes/HeaderNode.cs b/cs/Markdown/Tokenizer/Nodes/HeaderNode.cs deleted file mode 100644 index aaf3bfa87..000000000 --- a/cs/Markdown/Tokenizer/Nodes/HeaderNode.cs +++ /dev/null @@ -1,5 +0,0 @@ -namespace Markdown.Tokenizer.Nodes; - -public class HeaderNode : Node -{ -} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Nodes/ItalicNode.cs b/cs/Markdown/Tokenizer/Nodes/ItalicNode.cs deleted file mode 100644 index 528b72f7f..000000000 --- a/cs/Markdown/Tokenizer/Nodes/ItalicNode.cs +++ /dev/null @@ -1,6 +0,0 @@ -namespace Markdown.Tokenizer.Nodes; - -public class ItalicNode : Node -{ - -} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Nodes/MainNode.cs b/cs/Markdown/Tokenizer/Nodes/MainNode.cs deleted file mode 100644 index f1db39b3c..000000000 --- a/cs/Markdown/Tokenizer/Nodes/MainNode.cs +++ /dev/null @@ -1,6 +0,0 @@ -namespace Markdown.Tokenizer.Nodes; - -public class MainNode : Node -{ - -} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Nodes/Node.cs b/cs/Markdown/Tokenizer/Nodes/Node.cs deleted file mode 100644 index 91ddd509a..000000000 --- a/cs/Markdown/Tokenizer/Nodes/Node.cs +++ /dev/null @@ -1,8 +0,0 @@ -namespace Markdown.Tokenizer.Nodes; - -public abstract class Node -{ - public string? Value { get; set; } - public List Children { get; } = new(); - public Node? Parent { get; set; } -} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Nodes/NodeType.cs b/cs/Markdown/Tokenizer/Nodes/NodeType.cs deleted file mode 100644 index 2d70e7319..000000000 --- a/cs/Markdown/Tokenizer/Nodes/NodeType.cs +++ /dev/null @@ -1,6 +0,0 @@ -namespace Markdown.Tokenizer.Nodes; - -public enum NodeType -{ - Header -} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Nodes/TextNode.cs b/cs/Markdown/Tokenizer/Nodes/TextNode.cs deleted file mode 100644 index 0ca76bc6f..000000000 --- a/cs/Markdown/Tokenizer/Nodes/TextNode.cs +++ /dev/null @@ -1,6 +0,0 @@ -namespace Markdown.Tokenizer.Nodes; - -public class TextNode : Node -{ - -} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/TagProcessor.cs b/cs/Markdown/Tokenizer/TagProcessor.cs index 8fe8747c4..5f6f9c68e 100644 --- a/cs/Markdown/Tokenizer/TagProcessor.cs +++ b/cs/Markdown/Tokenizer/TagProcessor.cs @@ -6,12 +6,12 @@ public class TagProcessor : ITagProcessor { public void Process(List tags, Stack tagStack) { - ProceedEscaped(tags); - ProceedInWords(tags); - ProceedTags(tagStack); + ProceedEscapedTags(tags); + ProceedInWordsTags(tags); + ProceedPairTags(tagStack); } - private void ProceedInWords(List tags) + private void ProceedInWordsTags(List tags) { for (var i = 0; i < tags.Count; i++) { @@ -42,7 +42,7 @@ private void ProceedInWords(List tags) } } - private void ProceedEscaped(List tags) + private void ProceedEscapedTags(List tags) { for (var i = 0; i < tags.Count - 1; i++) { @@ -64,7 +64,7 @@ private void ProceedEscaped(List tags) } } - private void ProceedTags(Stack tagStack) + private void ProceedPairTags(Stack tagStack) { var tempStack = new Stack(); diff --git a/cs/Markdown/Tokenizer/Tags/TagStatus.cs b/cs/Markdown/Tokenizer/Tags/TagStatus.cs index 4b2830560..525959910 100644 --- a/cs/Markdown/Tokenizer/Tags/TagStatus.cs +++ b/cs/Markdown/Tokenizer/Tags/TagStatus.cs @@ -7,6 +7,5 @@ public enum TagStatus Broken, Escaped, InWord, - Undefined, Single } \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Tags/Token.cs b/cs/Markdown/Tokenizer/Tags/Token.cs index 44fbfa9d5..21279f838 100644 --- a/cs/Markdown/Tokenizer/Tags/Token.cs +++ b/cs/Markdown/Tokenizer/Tags/Token.cs @@ -1,8 +1,8 @@ namespace Markdown.Tokenizer.Tags; -public abstract class Token +public class Token { - public virtual TagStatus TagStatus { get; set; } + public TagStatus TagStatus { get; set; } public virtual TokenType TokenType { get; } - public string Value { get; set; } + public string Value = string.Empty; } \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/TokenizerContext.cs b/cs/Markdown/Tokenizer/TokenizerContext.cs index 7829feaf5..4b3526e5e 100644 --- a/cs/Markdown/Tokenizer/TokenizerContext.cs +++ b/cs/Markdown/Tokenizer/TokenizerContext.cs @@ -1,14 +1,8 @@ namespace Markdown.Tokenizer; -public class TokenizerContext +public class TokenizerContext(string text) { - private int position; - private readonly string text; - public TokenizerContext(string text) - { - this.text = text; - position = 0; - } + private int position = 0; public bool IsEnd => position >= text.Length; public char Current => text[position]; public int Position => position; diff --git a/cs/Markdown/TreeBuilder/INodeFactory.cs b/cs/Markdown/TreeBuilder/INodeFactory.cs new file mode 100644 index 000000000..48dc44819 --- /dev/null +++ b/cs/Markdown/TreeBuilder/INodeFactory.cs @@ -0,0 +1,8 @@ +using Markdown.Tokenizer.Tags; + +namespace Markdown.TreeBuilder; + +public interface INodeFactory +{ + NodeAction? CreateNode(Token token); +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/MarkdownTokenizer.cs b/cs/Markdown/TreeBuilder/MarkdownTokenizer.cs similarity index 86% rename from cs/Markdown/Tokenizer/MarkdownTokenizer.cs rename to cs/Markdown/TreeBuilder/MarkdownTokenizer.cs index 1d30c8779..b3757925d 100644 --- a/cs/Markdown/Tokenizer/MarkdownTokenizer.cs +++ b/cs/Markdown/TreeBuilder/MarkdownTokenizer.cs @@ -1,14 +1,14 @@ using System.Text; -using Markdown.Tokenizer.Handlers; +using Markdown.Tokenizer; using Markdown.Tokenizer.Tags; using Token = Markdown.Tokenizer.Tags.Token; -namespace Markdown.Tokenizer; +namespace Markdown.TreeBuilder; public class MarkdownTokenizer(IHandlerManager handlerManager, ITagProcessor tagProcessor) : ITokenizer { - private readonly StringBuilder buffer = new(); - private List tags = new(); + private readonly StringBuilder buffer = new(); + private readonly List tags = new(); private readonly Stack tagStack = new(); public List Tokenize(string text) @@ -16,7 +16,7 @@ public List Tokenize(string text) var context = new TokenizerContext(text); while (!context.IsEnd) { - if(TryProceedSpecialSymbol(context)) continue; + if (TryProceedSpecialSymbol(context)) continue; handlerManager.TryHandle(context, buffer, tags, tagStack); @@ -50,6 +50,7 @@ private bool TryProceedSpecialSymbol(TokenizerContext context) tags.Add(new TextToken(buffer.ToString())); buffer.Clear(); } + buffer.Append(context.Current); context.Advance(); diff --git a/cs/Markdown/TreeBuilder/NodeAction.cs b/cs/Markdown/TreeBuilder/NodeAction.cs new file mode 100644 index 000000000..48f27395f --- /dev/null +++ b/cs/Markdown/TreeBuilder/NodeAction.cs @@ -0,0 +1,20 @@ +using Markdown.TreeBuilder.Nodes; + +namespace Markdown.TreeBuilder; + +public abstract class NodeAction +{ + public class OpenNode : NodeAction + { + public Node Node { get; } + public OpenNode(Node node) => Node = node; + } + + public class CloseNode : NodeAction + { + } + + public class SkipNode : NodeAction + { + } +} \ No newline at end of file diff --git a/cs/Markdown/TreeBuilder/NodeFactory.cs b/cs/Markdown/TreeBuilder/NodeFactory.cs new file mode 100644 index 000000000..1c16920ca --- /dev/null +++ b/cs/Markdown/TreeBuilder/NodeFactory.cs @@ -0,0 +1,23 @@ +using Markdown.Tokenizer.Tags; +using Markdown.TreeBuilder.Nodes; + +namespace Markdown.TreeBuilder; + +public class NodeFactory : INodeFactory +{ + public NodeAction? CreateNode(Token token) + { + return token switch + { + { TagStatus: TagStatus.Broken } => null, + ItalicTag { TagStatus: TagStatus.Open } => new NodeAction.OpenNode(new ItalicNode()), + ItalicTag { TagStatus: TagStatus.Closed } => new NodeAction.CloseNode(), + BoldTag { TagStatus: TagStatus.Open } => new NodeAction.OpenNode(new BoldNode()), + BoldTag { TagStatus: TagStatus.Closed } => new NodeAction.CloseNode(), + SlashToken { TagStatus: TagStatus.Escaped } => new NodeAction.SkipNode(), + HeaderTag => new NodeAction.OpenNode(new HeaderNode()), + NewLineToken => new NodeAction.SkipNode(), + _ => null + }; + } +} \ No newline at end of file diff --git a/cs/Markdown/TreeBuilder/Nodes/BoldNode.cs b/cs/Markdown/TreeBuilder/Nodes/BoldNode.cs new file mode 100644 index 000000000..c83686c86 --- /dev/null +++ b/cs/Markdown/TreeBuilder/Nodes/BoldNode.cs @@ -0,0 +1,7 @@ +namespace Markdown.TreeBuilder.Nodes; + +public class BoldNode : Node +{ + public override string OpenTag => ""; + public override string CloseTag => ""; +} \ No newline at end of file diff --git a/cs/Markdown/TreeBuilder/Nodes/HeaderNode.cs b/cs/Markdown/TreeBuilder/Nodes/HeaderNode.cs new file mode 100644 index 000000000..f7f608ef5 --- /dev/null +++ b/cs/Markdown/TreeBuilder/Nodes/HeaderNode.cs @@ -0,0 +1,7 @@ +namespace Markdown.TreeBuilder.Nodes; + +public class HeaderNode : Node +{ + public override string OpenTag => "

"; + public override string CloseTag => "

"; +} \ No newline at end of file diff --git a/cs/Markdown/TreeBuilder/Nodes/ItalicNode.cs b/cs/Markdown/TreeBuilder/Nodes/ItalicNode.cs new file mode 100644 index 000000000..fb8caa564 --- /dev/null +++ b/cs/Markdown/TreeBuilder/Nodes/ItalicNode.cs @@ -0,0 +1,7 @@ +namespace Markdown.TreeBuilder.Nodes; + +public class ItalicNode : Node +{ + public override string OpenTag => ""; + public override string CloseTag => ""; +} \ No newline at end of file diff --git a/cs/Markdown/TreeBuilder/Nodes/MainNode.cs b/cs/Markdown/TreeBuilder/Nodes/MainNode.cs new file mode 100644 index 000000000..c6d4b4869 --- /dev/null +++ b/cs/Markdown/TreeBuilder/Nodes/MainNode.cs @@ -0,0 +1,5 @@ +namespace Markdown.TreeBuilder.Nodes; + +public class MainNode : Node +{ +} \ No newline at end of file diff --git a/cs/Markdown/TreeBuilder/Nodes/Node.cs b/cs/Markdown/TreeBuilder/Nodes/Node.cs new file mode 100644 index 000000000..cafc9062c --- /dev/null +++ b/cs/Markdown/TreeBuilder/Nodes/Node.cs @@ -0,0 +1,10 @@ +namespace Markdown.TreeBuilder.Nodes; + +public abstract class Node +{ + public List Children { get; } = new(); + public Node? Parent { get; set; } + + public virtual string OpenTag => string.Empty; + public virtual string CloseTag => string.Empty; +} \ No newline at end of file diff --git a/cs/Markdown/TreeBuilder/Nodes/TextNode.cs b/cs/Markdown/TreeBuilder/Nodes/TextNode.cs new file mode 100644 index 000000000..62a4c96b2 --- /dev/null +++ b/cs/Markdown/TreeBuilder/Nodes/TextNode.cs @@ -0,0 +1,6 @@ +namespace Markdown.TreeBuilder.Nodes; + +public class TextNode : Node +{ + public string? Value { get; init; } = string.Empty; +} \ No newline at end of file diff --git a/cs/Markdown/TreeBuilder/TreeBuilder.cs b/cs/Markdown/TreeBuilder/TreeBuilder.cs new file mode 100644 index 000000000..f5fbbb44d --- /dev/null +++ b/cs/Markdown/TreeBuilder/TreeBuilder.cs @@ -0,0 +1,42 @@ +using Markdown.Tokenizer.Tags; +using Markdown.TreeBuilder.Nodes; + +namespace Markdown.TreeBuilder; + +public class TreeBuilder(INodeFactory nodeFactory) +{ + public Node Build(List tokens) + { + Node mainNode = new MainNode(); + var currentNode = mainNode; + + foreach (var token in tokens) + { + var nodeAction = nodeFactory.CreateNode(token); + + if (nodeAction == null) + { + currentNode.Children.Add(new TextNode { Value = token.Value }); + continue; + } + + switch (nodeAction) + { + case NodeAction.OpenNode openNode: + currentNode.Children.Add(openNode.Node); + openNode.Node.Parent = currentNode; + currentNode = openNode.Node; + break; + + case NodeAction.CloseNode: + currentNode = currentNode.Parent ?? currentNode; + break; + + case NodeAction.SkipNode: + break; + } + } + + return mainNode; + } +} \ No newline at end of file